1 /* -*- Mode: c; tab-width: 8; indent-tabs-mode: 1; c-basic-offset: 8; -*- */ 2 /* 3 * Copyright (c) 1993, 1994, 1995, 1996, 1997 4 * The Regents of the University of California. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the Computer Systems 17 * Engineering Group at Lawrence Berkeley Laboratory. 18 * 4. Neither the name of the University nor of the Laboratory may be used 19 * to endorse or promote products derived from this software without 20 * specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #ifdef _WIN32 36 #include <stdio.h> 37 #include <errno.h> 38 39 #include <pcap/pcap.h> /* Needed for PCAP_ERRBUF_SIZE */ 40 41 #include "charconv.h" 42 43 wchar_t * 44 cp_to_utf_16le(UINT codepage, const char *cp_string, DWORD flags) 45 { 46 int utf16le_len; 47 wchar_t *utf16le_string; 48 49 /* 50 * Map from the specified code page to UTF-16LE. 51 * First, find out how big a buffer we'll need. 52 */ 53 utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1, 54 NULL, 0); 55 if (utf16le_len == 0) { 56 /* 57 * Error. Fail with EINVAL. 58 */ 59 errno = EINVAL; 60 return (NULL); 61 } 62 63 /* 64 * Now attempt to allocate a buffer for that. 65 */ 66 utf16le_string = malloc(utf16le_len * sizeof (wchar_t)); 67 if (utf16le_string == NULL) { 68 /* 69 * Not enough memory; assume errno has been 70 * set, and fail. 71 */ 72 return (NULL); 73 } 74 75 /* 76 * Now convert. 77 */ 78 utf16le_len = MultiByteToWideChar(codepage, flags, cp_string, -1, 79 utf16le_string, utf16le_len); 80 if (utf16le_len == 0) { 81 /* 82 * Error. Fail with EINVAL. 83 * XXX - should this ever happen, given that 84 * we already ran the string through 85 * MultiByteToWideChar() to find out how big 86 * a buffer we needed? 87 */ 88 free(utf16le_string); 89 errno = EINVAL; 90 return (NULL); 91 } 92 return (utf16le_string); 93 } 94 95 char * 96 utf_16le_to_cp(UINT codepage, const wchar_t *utf16le_string) 97 { 98 int cp_len; 99 char *cp_string; 100 101 /* 102 * Map from UTF-16LE to the specified code page. 103 * First, find out how big a buffer we'll need. 104 * We convert composite characters to precomposed characters, 105 * as that's what Windows expects. 106 */ 107 cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK, 108 utf16le_string, -1, NULL, 0, NULL, NULL); 109 if (cp_len == 0) { 110 /* 111 * Error. Fail with EINVAL. 112 */ 113 errno = EINVAL; 114 return (NULL); 115 } 116 117 /* 118 * Now attempt to allocate a buffer for that. 119 */ 120 cp_string = malloc(cp_len * sizeof (char)); 121 if (cp_string == NULL) { 122 /* 123 * Not enough memory; assume errno has been 124 * set, and fail. 125 */ 126 return (NULL); 127 } 128 129 /* 130 * Now convert. 131 */ 132 cp_len = WideCharToMultiByte(codepage, WC_COMPOSITECHECK, 133 utf16le_string, -1, cp_string, cp_len, NULL, NULL); 134 if (cp_len == 0) { 135 /* 136 * Error. Fail with EINVAL. 137 * XXX - should this ever happen, given that 138 * we already ran the string through 139 * WideCharToMultiByte() to find out how big 140 * a buffer we needed? 141 */ 142 free(cp_string); 143 errno = EINVAL; 144 return (NULL); 145 } 146 return (cp_string); 147 } 148 149 /* 150 * Convert an error message string from UTF-8 to the local code page, as 151 * best we can. 152 * 153 * The buffer is assumed to be PCAP_ERRBUF_SIZE bytes long; we truncate 154 * if it doesn't fit. 155 */ 156 void 157 utf_8_to_acp_truncated(char *errbuf) 158 { 159 wchar_t *utf_16_errbuf; 160 int retval; 161 DWORD err; 162 163 /* 164 * Do this by converting to UTF-16LE and then to the local 165 * code page. That means we get to use Microsoft's 166 * conversion routines, rather than having to understand 167 * all the code pages ourselves, *and* that this routine 168 * can convert in place. 169 */ 170 171 /* 172 * Map from UTF-8 to UTF-16LE. 173 * First, find out how big a buffer we'll need. 174 * Convert any invalid characters to REPLACEMENT CHARACTER. 175 */ 176 utf_16_errbuf = cp_to_utf_16le(CP_UTF8, errbuf, 0); 177 if (utf_16_errbuf == NULL) { 178 /* 179 * Error. Give up. 180 */ 181 snprintf(errbuf, PCAP_ERRBUF_SIZE, 182 "Can't convert error string to the local code page"); 183 return; 184 } 185 186 /* 187 * Now, convert that to the local code page. 188 * Use the current thread's code page. For unconvertible 189 * characters, let it pick the "best fit" character. 190 * 191 * XXX - we'd like some way to do what utf_16le_to_utf_8_truncated() 192 * does if the buffer isn't big enough, but we don't want to have 193 * to handle all local code pages ourselves; doing so requires 194 * knowledge of all those code pages, including knowledge of how 195 * characters are formed in those code pages so that we can avoid 196 * cutting a multi-byte character into pieces. 197 * 198 * Converting to an un-truncated string using Windows APIs, and 199 * then copying to the buffer, still requires knowledge of how 200 * characters are formed in the target code page. 201 */ 202 retval = WideCharToMultiByte(CP_THREAD_ACP, 0, utf_16_errbuf, -1, 203 errbuf, PCAP_ERRBUF_SIZE, NULL, NULL); 204 if (retval == 0) { 205 err = GetLastError(); 206 free(utf_16_errbuf); 207 if (err == ERROR_INSUFFICIENT_BUFFER) 208 snprintf(errbuf, PCAP_ERRBUF_SIZE, 209 "The error string, in the local code page, didn't fit in the buffer"); 210 else 211 snprintf(errbuf, PCAP_ERRBUF_SIZE, 212 "Can't convert error string to the local code page"); 213 return; 214 } 215 free(utf_16_errbuf); 216 } 217 #endif 218