1da6c28aaSamw /* 2da6c28aaSamw * CDDL HEADER START 3da6c28aaSamw * 4da6c28aaSamw * The contents of this file are subject to the terms of the 5da6c28aaSamw * Common Development and Distribution License (the "License"). 6da6c28aaSamw * You may not use this file except in compliance with the License. 7da6c28aaSamw * 8da6c28aaSamw * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9da6c28aaSamw * or http://www.opensolaris.org/os/licensing. 10da6c28aaSamw * See the License for the specific language governing permissions 11da6c28aaSamw * and limitations under the License. 12da6c28aaSamw * 13da6c28aaSamw * When distributing Covered Code, include this CDDL HEADER in each 14da6c28aaSamw * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15da6c28aaSamw * If applicable, add the following below this CDDL HEADER, with the 16da6c28aaSamw * fields enclosed by brackets "[]" replaced with your own identifying 17da6c28aaSamw * information: Portions Copyright [yyyy] [name of copyright owner] 18da6c28aaSamw * 19da6c28aaSamw * CDDL HEADER END 20da6c28aaSamw */ 21da6c28aaSamw /* 22*bbf6f00cSJordan Brown * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23da6c28aaSamw * Use is subject to license terms. 24da6c28aaSamw */ 25da6c28aaSamw 26da6c28aaSamw #ifdef _KERNEL 27da6c28aaSamw #include <sys/types.h> 28da6c28aaSamw #include <sys/sunddi.h> 29da6c28aaSamw #else 30*bbf6f00cSJordan Brown #include <stdio.h> 31da6c28aaSamw #include <stdlib.h> 32da6c28aaSamw #include <string.h> 33da6c28aaSamw #include <strings.h> 34da6c28aaSamw #endif 35*bbf6f00cSJordan Brown #include <sys/u8_textprep.h> 36*bbf6f00cSJordan Brown #include <smbsrv/alloc.h> 37da6c28aaSamw #include <smbsrv/string.h> 38*bbf6f00cSJordan Brown #include <smbsrv/cp_usascii.h> 39*bbf6f00cSJordan Brown #include <smbsrv/cp_unicode.h> 40da6c28aaSamw 41*bbf6f00cSJordan Brown #define UNICODE_N_ENTRIES (sizeof (a_unicode) / sizeof (a_unicode[0])) 42*bbf6f00cSJordan Brown 43*bbf6f00cSJordan Brown /* 44*bbf6f00cSJordan Brown * Global pointer to the current codepage: defaults to ASCII, 45*bbf6f00cSJordan Brown * and a flag indicating whether the codepage is Unicode or ASCII. 46*bbf6f00cSJordan Brown */ 47*bbf6f00cSJordan Brown static smb_codepage_t *current_codepage = usascii_codepage; 48*bbf6f00cSJordan Brown static boolean_t is_unicode = B_FALSE; 49*bbf6f00cSJordan Brown 50*bbf6f00cSJordan Brown static smb_codepage_t *smb_unicode_init(void); 51da6c28aaSamw 52da6c28aaSamw /* 53da6c28aaSamw * strsubst 54da6c28aaSamw * 55da6c28aaSamw * Scan a string replacing all occurrences of orgchar with newchar. 56da6c28aaSamw * Returns a pointer to s, or null of s is null. 57da6c28aaSamw */ 58da6c28aaSamw char * 59da6c28aaSamw strsubst(char *s, char orgchar, char newchar) 60da6c28aaSamw { 61da6c28aaSamw char *p = s; 62da6c28aaSamw 63da6c28aaSamw if (p == 0) 64da6c28aaSamw return (0); 65da6c28aaSamw 66da6c28aaSamw while (*p) { 67da6c28aaSamw if (*p == orgchar) 68da6c28aaSamw *p = newchar; 69da6c28aaSamw ++p; 70da6c28aaSamw } 71da6c28aaSamw 72da6c28aaSamw return (s); 73da6c28aaSamw } 74da6c28aaSamw 75da6c28aaSamw /* 76da6c28aaSamw * strcanon 77da6c28aaSamw * 78da6c28aaSamw * Normalize a string by reducing all the repeated characters in 79da6c28aaSamw * buf as defined by class. For example; 80da6c28aaSamw * 81da6c28aaSamw * char *buf = strdup("/d1//d2//d3\\\\d4\\\\f1.txt"); 82da6c28aaSamw * strcanon(buf, "/\\"); 83da6c28aaSamw * 84da6c28aaSamw * Would result in buf containing the following string: 85da6c28aaSamw * 86da6c28aaSamw * /d1/d2/d3\d4\f1.txt 87da6c28aaSamw * 88da6c28aaSamw * This function modifies the contents of buf in place and returns 89da6c28aaSamw * a pointer to buf. 90da6c28aaSamw */ 91da6c28aaSamw char * 92da6c28aaSamw strcanon(char *buf, const char *class) 93da6c28aaSamw { 94da6c28aaSamw char *p = buf; 95da6c28aaSamw char *q = buf; 96da6c28aaSamw char *r; 97da6c28aaSamw 98da6c28aaSamw while (*p) { 99da6c28aaSamw *q++ = *p; 100da6c28aaSamw 101da6c28aaSamw if ((r = strchr(class, *p)) != 0) { 102da6c28aaSamw while (*p == *r) 103da6c28aaSamw ++p; 104da6c28aaSamw } else 105da6c28aaSamw ++p; 106da6c28aaSamw } 107da6c28aaSamw 108da6c28aaSamw *q = '\0'; 109da6c28aaSamw return (buf); 110da6c28aaSamw } 111*bbf6f00cSJordan Brown 112*bbf6f00cSJordan Brown void 113*bbf6f00cSJordan Brown smb_codepage_init(void) 114*bbf6f00cSJordan Brown { 115*bbf6f00cSJordan Brown smb_codepage_t *cp; 116*bbf6f00cSJordan Brown 117*bbf6f00cSJordan Brown if (is_unicode) 118*bbf6f00cSJordan Brown return; 119*bbf6f00cSJordan Brown 120*bbf6f00cSJordan Brown if ((cp = smb_unicode_init()) != NULL) { 121*bbf6f00cSJordan Brown current_codepage = cp; 122*bbf6f00cSJordan Brown is_unicode = B_TRUE; 123*bbf6f00cSJordan Brown } else { 124*bbf6f00cSJordan Brown current_codepage = usascii_codepage; 125*bbf6f00cSJordan Brown is_unicode = B_FALSE; 126*bbf6f00cSJordan Brown } 127*bbf6f00cSJordan Brown } 128*bbf6f00cSJordan Brown 129*bbf6f00cSJordan Brown /* 130*bbf6f00cSJordan Brown * Determine whether or not a character is an uppercase character. 131*bbf6f00cSJordan Brown * This function operates on the current codepage table. Returns 132*bbf6f00cSJordan Brown * non-zero if the character is uppercase. Otherwise returns zero. 133*bbf6f00cSJordan Brown */ 134*bbf6f00cSJordan Brown int 135*bbf6f00cSJordan Brown smb_isupper(int c) 136*bbf6f00cSJordan Brown { 137*bbf6f00cSJordan Brown uint16_t mask = is_unicode ? 0xffff : 0xff; 138*bbf6f00cSJordan Brown 139*bbf6f00cSJordan Brown return (current_codepage[c & mask].ctype & CODEPAGE_ISUPPER); 140*bbf6f00cSJordan Brown } 141*bbf6f00cSJordan Brown 142*bbf6f00cSJordan Brown /* 143*bbf6f00cSJordan Brown * Determine whether or not a character is an lowercase character. 144*bbf6f00cSJordan Brown * This function operates on the current codepage table. Returns 145*bbf6f00cSJordan Brown * non-zero if the character is lowercase. Otherwise returns zero. 146*bbf6f00cSJordan Brown */ 147*bbf6f00cSJordan Brown int 148*bbf6f00cSJordan Brown smb_islower(int c) 149*bbf6f00cSJordan Brown { 150*bbf6f00cSJordan Brown uint16_t mask = is_unicode ? 0xffff : 0xff; 151*bbf6f00cSJordan Brown 152*bbf6f00cSJordan Brown return (current_codepage[c & mask].ctype & CODEPAGE_ISLOWER); 153*bbf6f00cSJordan Brown } 154*bbf6f00cSJordan Brown 155*bbf6f00cSJordan Brown /* 156*bbf6f00cSJordan Brown * Convert individual characters to their uppercase equivalent value. 157*bbf6f00cSJordan Brown * If the specified character is lowercase, the uppercase value will 158*bbf6f00cSJordan Brown * be returned. Otherwise the original value will be returned. 159*bbf6f00cSJordan Brown */ 160*bbf6f00cSJordan Brown int 161*bbf6f00cSJordan Brown smb_toupper(int c) 162*bbf6f00cSJordan Brown { 163*bbf6f00cSJordan Brown uint16_t mask = is_unicode ? 0xffff : 0xff; 164*bbf6f00cSJordan Brown 165*bbf6f00cSJordan Brown return (current_codepage[c & mask].upper); 166*bbf6f00cSJordan Brown } 167*bbf6f00cSJordan Brown 168*bbf6f00cSJordan Brown /* 169*bbf6f00cSJordan Brown * Convert individual characters to their lowercase equivalent value. 170*bbf6f00cSJordan Brown * If the specified character is uppercase, the lowercase value will 171*bbf6f00cSJordan Brown * be returned. Otherwise the original value will be returned. 172*bbf6f00cSJordan Brown */ 173*bbf6f00cSJordan Brown int 174*bbf6f00cSJordan Brown smb_tolower(int c) 175*bbf6f00cSJordan Brown { 176*bbf6f00cSJordan Brown uint16_t mask = is_unicode ? 0xffff : 0xff; 177*bbf6f00cSJordan Brown 178*bbf6f00cSJordan Brown return (current_codepage[c & mask].lower); 179*bbf6f00cSJordan Brown } 180*bbf6f00cSJordan Brown 181*bbf6f00cSJordan Brown /* 182*bbf6f00cSJordan Brown * Convert a string to uppercase using the appropriate codepage. The 183*bbf6f00cSJordan Brown * string is converted in place. A pointer to the string is returned. 184*bbf6f00cSJordan Brown * There is an assumption here that uppercase and lowercase values 185*bbf6f00cSJordan Brown * always result encode to the same length. 186*bbf6f00cSJordan Brown */ 187*bbf6f00cSJordan Brown char * 188*bbf6f00cSJordan Brown smb_strupr(char *s) 189*bbf6f00cSJordan Brown { 190*bbf6f00cSJordan Brown smb_wchar_t c; 191*bbf6f00cSJordan Brown char *p = s; 192*bbf6f00cSJordan Brown 193*bbf6f00cSJordan Brown while (*p) { 194*bbf6f00cSJordan Brown if (smb_isascii(*p)) { 195*bbf6f00cSJordan Brown *p = smb_toupper(*p); 196*bbf6f00cSJordan Brown p++; 197*bbf6f00cSJordan Brown } else { 198*bbf6f00cSJordan Brown if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 199*bbf6f00cSJordan Brown return (0); 200*bbf6f00cSJordan Brown 201*bbf6f00cSJordan Brown if (c == 0) 202*bbf6f00cSJordan Brown break; 203*bbf6f00cSJordan Brown 204*bbf6f00cSJordan Brown c = smb_toupper(c); 205*bbf6f00cSJordan Brown p += smb_wctomb(p, c); 206*bbf6f00cSJordan Brown } 207*bbf6f00cSJordan Brown } 208*bbf6f00cSJordan Brown 209*bbf6f00cSJordan Brown return (s); 210*bbf6f00cSJordan Brown } 211*bbf6f00cSJordan Brown 212*bbf6f00cSJordan Brown /* 213*bbf6f00cSJordan Brown * Convert a string to lowercase using the appropriate codepage. The 214*bbf6f00cSJordan Brown * string is converted in place. A pointer to the string is returned. 215*bbf6f00cSJordan Brown * There is an assumption here that uppercase and lowercase values 216*bbf6f00cSJordan Brown * always result encode to the same length. 217*bbf6f00cSJordan Brown */ 218*bbf6f00cSJordan Brown char * 219*bbf6f00cSJordan Brown smb_strlwr(char *s) 220*bbf6f00cSJordan Brown { 221*bbf6f00cSJordan Brown smb_wchar_t c; 222*bbf6f00cSJordan Brown char *p = s; 223*bbf6f00cSJordan Brown 224*bbf6f00cSJordan Brown while (*p) { 225*bbf6f00cSJordan Brown if (smb_isascii(*p)) { 226*bbf6f00cSJordan Brown *p = smb_tolower(*p); 227*bbf6f00cSJordan Brown p++; 228*bbf6f00cSJordan Brown } else { 229*bbf6f00cSJordan Brown if (smb_mbtowc(&c, p, MTS_MB_CHAR_MAX) < 0) 230*bbf6f00cSJordan Brown return (0); 231*bbf6f00cSJordan Brown 232*bbf6f00cSJordan Brown if (c == 0) 233*bbf6f00cSJordan Brown break; 234*bbf6f00cSJordan Brown 235*bbf6f00cSJordan Brown c = smb_tolower(c); 236*bbf6f00cSJordan Brown p += smb_wctomb(p, c); 237*bbf6f00cSJordan Brown } 238*bbf6f00cSJordan Brown } 239*bbf6f00cSJordan Brown 240*bbf6f00cSJordan Brown return (s); 241*bbf6f00cSJordan Brown } 242*bbf6f00cSJordan Brown 243*bbf6f00cSJordan Brown /* 244*bbf6f00cSJordan Brown * Returns 1 if string contains NO uppercase chars 0 otherwise. However, 245*bbf6f00cSJordan Brown * -1 is returned if "s" is not a valid multi-byte string. 246*bbf6f00cSJordan Brown */ 247*bbf6f00cSJordan Brown int 248*bbf6f00cSJordan Brown smb_isstrlwr(const char *s) 249*bbf6f00cSJordan Brown { 250*bbf6f00cSJordan Brown smb_wchar_t c; 251*bbf6f00cSJordan Brown int n; 252*bbf6f00cSJordan Brown const char *p = s; 253*bbf6f00cSJordan Brown 254*bbf6f00cSJordan Brown while (*p) { 255*bbf6f00cSJordan Brown if (smb_isascii(*p) && smb_isupper(*p)) 256*bbf6f00cSJordan Brown return (0); 257*bbf6f00cSJordan Brown else { 258*bbf6f00cSJordan Brown if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 259*bbf6f00cSJordan Brown return (-1); 260*bbf6f00cSJordan Brown 261*bbf6f00cSJordan Brown if (c == 0) 262*bbf6f00cSJordan Brown break; 263*bbf6f00cSJordan Brown 264*bbf6f00cSJordan Brown if (smb_isupper(c)) 265*bbf6f00cSJordan Brown return (0); 266*bbf6f00cSJordan Brown 267*bbf6f00cSJordan Brown p += n; 268*bbf6f00cSJordan Brown } 269*bbf6f00cSJordan Brown } 270*bbf6f00cSJordan Brown 271*bbf6f00cSJordan Brown return (1); 272*bbf6f00cSJordan Brown } 273*bbf6f00cSJordan Brown 274*bbf6f00cSJordan Brown /* 275*bbf6f00cSJordan Brown * Returns 1 if string contains NO lowercase chars 0 otherwise. However, 276*bbf6f00cSJordan Brown * -1 is returned if "s" is not a valid multi-byte string. 277*bbf6f00cSJordan Brown */ 278*bbf6f00cSJordan Brown int 279*bbf6f00cSJordan Brown smb_isstrupr(const char *s) 280*bbf6f00cSJordan Brown { 281*bbf6f00cSJordan Brown smb_wchar_t c; 282*bbf6f00cSJordan Brown int n; 283*bbf6f00cSJordan Brown const char *p = s; 284*bbf6f00cSJordan Brown 285*bbf6f00cSJordan Brown while (*p) { 286*bbf6f00cSJordan Brown if (smb_isascii(*p) && smb_islower(*p)) 287*bbf6f00cSJordan Brown return (0); 288*bbf6f00cSJordan Brown else { 289*bbf6f00cSJordan Brown if ((n = smb_mbtowc(&c, p, MTS_MB_CHAR_MAX)) < 0) 290*bbf6f00cSJordan Brown return (-1); 291*bbf6f00cSJordan Brown 292*bbf6f00cSJordan Brown if (c == 0) 293*bbf6f00cSJordan Brown break; 294*bbf6f00cSJordan Brown 295*bbf6f00cSJordan Brown if (smb_islower(c)) 296*bbf6f00cSJordan Brown return (0); 297*bbf6f00cSJordan Brown 298*bbf6f00cSJordan Brown p += n; 299*bbf6f00cSJordan Brown } 300*bbf6f00cSJordan Brown } 301*bbf6f00cSJordan Brown 302*bbf6f00cSJordan Brown return (1); 303*bbf6f00cSJordan Brown } 304*bbf6f00cSJordan Brown 305*bbf6f00cSJordan Brown /* 306*bbf6f00cSJordan Brown * Compare the null-terminated strings s1 and s2 and return an integer 307*bbf6f00cSJordan Brown * greater than, equal to or less than 0 dependent on whether s1 is 308*bbf6f00cSJordan Brown * lexicographically greater than, equal to or less than s2 after 309*bbf6f00cSJordan Brown * translation of each character to lowercase. The original strings 310*bbf6f00cSJordan Brown * are not modified. 311*bbf6f00cSJordan Brown * 312*bbf6f00cSJordan Brown * If n is non-zero, at most n bytes are compared. Otherwise, the strings 313*bbf6f00cSJordan Brown * are compared until a null terminator is encountered. 314*bbf6f00cSJordan Brown * 315*bbf6f00cSJordan Brown * Out: 0 if strings are equal 316*bbf6f00cSJordan Brown * < 0 if first string < second string 317*bbf6f00cSJordan Brown * > 0 if first string > second string 318*bbf6f00cSJordan Brown */ 319*bbf6f00cSJordan Brown int 320*bbf6f00cSJordan Brown smb_strcasecmp(const char *s1, const char *s2, size_t n) 321*bbf6f00cSJordan Brown { 322*bbf6f00cSJordan Brown int err = 0; 323*bbf6f00cSJordan Brown int rc; 324*bbf6f00cSJordan Brown 325*bbf6f00cSJordan Brown rc = u8_strcmp(s1, s2, n, U8_STRCMP_CI_LOWER, U8_UNICODE_LATEST, &err); 326*bbf6f00cSJordan Brown if (err != 0) 327*bbf6f00cSJordan Brown return (-1); 328*bbf6f00cSJordan Brown return (rc); 329*bbf6f00cSJordan Brown } 330*bbf6f00cSJordan Brown 331*bbf6f00cSJordan Brown /* 332*bbf6f00cSJordan Brown * First build a codepage based on cp_unicode.h. Then build the unicode 333*bbf6f00cSJordan Brown * codepage from this interim codepage by copying the entries over while 334*bbf6f00cSJordan Brown * fixing them and filling in the gaps. 335*bbf6f00cSJordan Brown */ 336*bbf6f00cSJordan Brown static smb_codepage_t * 337*bbf6f00cSJordan Brown smb_unicode_init(void) 338*bbf6f00cSJordan Brown { 339*bbf6f00cSJordan Brown smb_codepage_t *unicode; 340*bbf6f00cSJordan Brown uint32_t a = 0; 341*bbf6f00cSJordan Brown uint32_t b = 0; 342*bbf6f00cSJordan Brown 343*bbf6f00cSJordan Brown unicode = MEM_ZALLOC("unicode", sizeof (smb_codepage_t) << 16); 344*bbf6f00cSJordan Brown if (unicode == NULL) 345*bbf6f00cSJordan Brown return (NULL); 346*bbf6f00cSJordan Brown 347*bbf6f00cSJordan Brown while (b != 0xffff) { 348*bbf6f00cSJordan Brown /* 349*bbf6f00cSJordan Brown * If there is a gap in the standard, 350*bbf6f00cSJordan Brown * fill in the gap with no-case entries. 351*bbf6f00cSJordan Brown */ 352*bbf6f00cSJordan Brown if (UNICODE_N_ENTRIES <= a || a_unicode[a].val > b) { 353*bbf6f00cSJordan Brown unicode[b].ctype = CODEPAGE_ISNONE; 354*bbf6f00cSJordan Brown unicode[b].upper = (smb_wchar_t)b; 355*bbf6f00cSJordan Brown unicode[b].lower = (smb_wchar_t)b; 356*bbf6f00cSJordan Brown b++; 357*bbf6f00cSJordan Brown continue; 358*bbf6f00cSJordan Brown } 359*bbf6f00cSJordan Brown 360*bbf6f00cSJordan Brown /* 361*bbf6f00cSJordan Brown * Copy the entry and fixup as required. 362*bbf6f00cSJordan Brown */ 363*bbf6f00cSJordan Brown switch (a_unicode[a].ctype) { 364*bbf6f00cSJordan Brown case CODEPAGE_ISNONE: 365*bbf6f00cSJordan Brown /* 366*bbf6f00cSJordan Brown * Replace 0xffff in upper/lower fields with its val. 367*bbf6f00cSJordan Brown */ 368*bbf6f00cSJordan Brown unicode[b].ctype = CODEPAGE_ISNONE; 369*bbf6f00cSJordan Brown unicode[b].upper = (smb_wchar_t)b; 370*bbf6f00cSJordan Brown unicode[b].lower = (smb_wchar_t)b; 371*bbf6f00cSJordan Brown break; 372*bbf6f00cSJordan Brown case CODEPAGE_ISUPPER: 373*bbf6f00cSJordan Brown /* 374*bbf6f00cSJordan Brown * Some characters may have case yet not have 375*bbf6f00cSJordan Brown * case conversion. Treat them as no-case. 376*bbf6f00cSJordan Brown */ 377*bbf6f00cSJordan Brown if (a_unicode[a].lower == 0xffff) { 378*bbf6f00cSJordan Brown unicode[b].ctype = CODEPAGE_ISNONE; 379*bbf6f00cSJordan Brown unicode[b].upper = (smb_wchar_t)b; 380*bbf6f00cSJordan Brown unicode[b].lower = (smb_wchar_t)b; 381*bbf6f00cSJordan Brown } else { 382*bbf6f00cSJordan Brown unicode[b].ctype = CODEPAGE_ISUPPER; 383*bbf6f00cSJordan Brown unicode[b].upper = (smb_wchar_t)b; 384*bbf6f00cSJordan Brown unicode[b].lower = a_unicode[a].lower; 385*bbf6f00cSJordan Brown } 386*bbf6f00cSJordan Brown break; 387*bbf6f00cSJordan Brown case CODEPAGE_ISLOWER: 388*bbf6f00cSJordan Brown /* 389*bbf6f00cSJordan Brown * Some characters may have case yet not have 390*bbf6f00cSJordan Brown * case conversion. Treat them as no-case. 391*bbf6f00cSJordan Brown */ 392*bbf6f00cSJordan Brown if (a_unicode[a].upper == 0xffff) { 393*bbf6f00cSJordan Brown unicode[b].ctype = CODEPAGE_ISNONE; 394*bbf6f00cSJordan Brown unicode[b].upper = (smb_wchar_t)b; 395*bbf6f00cSJordan Brown unicode[b].lower = (smb_wchar_t)b; 396*bbf6f00cSJordan Brown } else { 397*bbf6f00cSJordan Brown unicode[b].ctype = CODEPAGE_ISLOWER; 398*bbf6f00cSJordan Brown unicode[b].upper = a_unicode[a].upper; 399*bbf6f00cSJordan Brown unicode[b].lower = (smb_wchar_t)b; 400*bbf6f00cSJordan Brown } 401*bbf6f00cSJordan Brown break; 402*bbf6f00cSJordan Brown default: 403*bbf6f00cSJordan Brown MEM_FREE("unicode", unicode); 404*bbf6f00cSJordan Brown return (NULL); 405*bbf6f00cSJordan Brown } 406*bbf6f00cSJordan Brown 407*bbf6f00cSJordan Brown a++; 408*bbf6f00cSJordan Brown b++; 409*bbf6f00cSJordan Brown }; 410*bbf6f00cSJordan Brown 411*bbf6f00cSJordan Brown return (unicode); 412*bbf6f00cSJordan Brown } 413