1 /* 2 * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the Computer Systems 16 * Engineering Group at Lawrence Berkeley Laboratory. 17 * 4. Neither the name of the University nor of the Laboratory may be used 18 * to endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Utilities for message formatting used both by libpcap and rpcapd. 36 */ 37 38 #include <config.h> 39 40 #include "ftmacros.h" 41 42 #include <stddef.h> 43 #include <stdarg.h> 44 #include <stdio.h> 45 #include <string.h> 46 #include <errno.h> 47 48 #include "pcap-int.h" 49 50 #include "portability.h" 51 52 #include "fmtutils.h" 53 54 #ifdef _WIN32 55 #include "charconv.h" 56 #endif 57 58 /* 59 * Set the encoding. 60 */ 61 #ifdef _WIN32 62 /* 63 * True if we should use UTF-8. 64 */ 65 static int use_utf_8; 66 67 void 68 pcapint_fmt_set_encoding(unsigned int opts) 69 { 70 if (opts == PCAP_CHAR_ENC_UTF_8) 71 use_utf_8 = 1; 72 } 73 #else 74 void 75 pcapint_fmt_set_encoding(unsigned int opts _U_) 76 { 77 /* 78 * Nothing to do here. 79 */ 80 } 81 #endif 82 83 #ifdef _WIN32 84 /* 85 * Convert a null-terminated UTF-16LE string to UTF-8, putting it into 86 * a buffer starting at the specified location and stopping if we go 87 * past the specified size. This will only put out complete UTF-8 88 * sequences. 89 * 90 * We do this ourselves because Microsoft doesn't offer a "convert and 91 * stop at a UTF-8 character boundary if we run out of space" routine. 92 */ 93 #define IS_LEADING_SURROGATE(c) \ 94 ((c) >= 0xd800 && (c) < 0xdc00) 95 #define IS_TRAILING_SURROGATE(c) \ 96 ((c) >= 0xdc00 && (c) < 0xe000) 97 #define SURROGATE_VALUE(leading, trailing) \ 98 (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000) 99 #define REPLACEMENT_CHARACTER 0x0FFFD 100 101 static char * 102 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8, 103 size_t utf_8_len) 104 { 105 wchar_t c, c2; 106 uint32_t uc; 107 108 if (utf_8_len == 0) { 109 /* 110 * Not even enough room for a trailing '\0'. 111 * Don't put anything into the buffer. 112 */ 113 return (utf_8); 114 } 115 116 while ((c = *utf_16++) != '\0') { 117 if (IS_LEADING_SURROGATE(c)) { 118 /* 119 * Leading surrogate. Must be followed by 120 * a trailing surrogate. 121 */ 122 c2 = *utf_16; 123 if (c2 == '\0') { 124 /* 125 * Oops, string ends with a lead 126 * surrogate. Try to drop in 127 * a REPLACEMENT CHARACTER, and 128 * don't move the string pointer, 129 * so on the next trip through 130 * the loop we grab the terminating 131 * '\0' and quit. 132 */ 133 uc = REPLACEMENT_CHARACTER; 134 } else { 135 /* 136 * OK, we can consume this 2-octet 137 * value. 138 */ 139 utf_16++; 140 if (IS_TRAILING_SURROGATE(c2)) { 141 /* 142 * Trailing surrogate. 143 * This calculation will, 144 * for c being a leading 145 * surrogate and c2 being 146 * a trailing surrogate, 147 * produce a value between 148 * 0x100000 and 0x10ffff, 149 * so it's always going to be 150 * a valid Unicode code point. 151 */ 152 uc = SURROGATE_VALUE(c, c2); 153 } else { 154 /* 155 * Not a trailing surrogate; 156 * try to drop in a 157 * REPLACEMENT CHARACTER. 158 */ 159 uc = REPLACEMENT_CHARACTER; 160 } 161 } 162 } else { 163 /* 164 * Not a leading surrogate. 165 */ 166 if (IS_TRAILING_SURROGATE(c)) { 167 /* 168 * Trailing surrogate without 169 * a preceding leading surrogate. 170 * Try to drop in a REPLACEMENT 171 * CHARACTER. 172 */ 173 uc = REPLACEMENT_CHARACTER; 174 } else { 175 /* 176 * This is a valid BMP character; 177 * drop it in. 178 */ 179 uc = c; 180 } 181 } 182 183 /* 184 * OK, uc is a valid Unicode character; how 185 * many bytes worth of UTF-8 does it require? 186 */ 187 if (uc < 0x0080) { 188 /* 1 byte. */ 189 if (utf_8_len < 2) { 190 /* 191 * Not enough room for that byte 192 * plus a trailing '\0'. 193 */ 194 break; 195 } 196 *utf_8++ = (char)uc; 197 utf_8_len--; 198 } else if (uc < 0x0800) { 199 /* 2 bytes. */ 200 if (utf_8_len < 3) { 201 /* 202 * Not enough room for those bytes 203 * plus a trailing '\0'. 204 */ 205 break; 206 } 207 *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0; 208 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80; 209 utf_8_len -= 2; 210 } else if (uc < 0x010000) { 211 /* 3 bytes. */ 212 if (utf_8_len < 4) { 213 /* 214 * Not enough room for those bytes 215 * plus a trailing '\0'. 216 */ 217 break; 218 } 219 *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0; 220 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80; 221 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80; 222 utf_8_len -= 3; 223 } else { 224 /* 4 bytes. */ 225 if (utf_8_len < 5) { 226 /* 227 * Not enough room for those bytes 228 * plus a trailing '\0'. 229 */ 230 break; 231 } 232 *utf_8++ = ((uc >> 18) & 0x03) | 0xF0; 233 *utf_8++ = ((uc >> 12) & 0x3F) | 0x80; 234 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80; 235 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80; 236 utf_8_len -= 3; 237 } 238 } 239 240 /* 241 * OK, we have enough room for (at least) a trailing '\0'. 242 * (We started out with enough room, thanks to the test 243 * for a zero-length buffer at the beginning, and if 244 * there wasn't enough room for any character we wanted 245 * to put into the buffer *plus* a trailing '\0', 246 * we'd have quit before putting it into the buffer, 247 * and thus would have left enough room for the trailing 248 * '\0'.) 249 * 250 * Drop it in. 251 */ 252 *utf_8 = '\0'; 253 254 /* 255 * Return a pointer to the terminating '\0', in case we 256 * want to drop something in after that. 257 */ 258 return (utf_8); 259 } 260 #endif /* _WIN32 */ 261 262 /* 263 * Generate an error message based on a format, arguments, and an 264 * errno, with a message for the errno after the formatted output. 265 */ 266 void 267 pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum, 268 const char *fmt, ...) 269 { 270 va_list ap; 271 272 va_start(ap, fmt); 273 pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap); 274 va_end(ap); 275 } 276 277 void 278 pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum, 279 const char *fmt, va_list ap) 280 { 281 size_t msglen; 282 char *p; 283 size_t errbuflen_remaining; 284 285 (void)vsnprintf(errbuf, errbuflen, fmt, ap); 286 msglen = strlen(errbuf); 287 288 /* 289 * Do we have enough space to append ": "? 290 * Including the terminating '\0', that's 3 bytes. 291 */ 292 if (msglen + 3 > errbuflen) { 293 /* No - just give them what we've produced. */ 294 return; 295 } 296 p = errbuf + msglen; 297 errbuflen_remaining = errbuflen - msglen; 298 *p++ = ':'; 299 *p++ = ' '; 300 *p = '\0'; 301 errbuflen_remaining -= 2; 302 303 /* 304 * Now append the string for the error code. 305 */ 306 #if defined(HAVE__WCSERROR_S) 307 /* 308 * We have a Windows-style _wcserror_s(). 309 * Generate a UTF-16LE error message. 310 */ 311 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE]; 312 errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum); 313 if (err != 0) { 314 /* 315 * It doesn't appear to be documented anywhere obvious 316 * what the error returns from _wcserror_s(). 317 */ 318 snprintf(p, errbuflen_remaining, "Error %d", errnum); 319 return; 320 } 321 322 /* 323 * Now convert it from UTF-16LE to UTF-8, dropping it in the 324 * remaining space in the buffer, and truncating it - cleanly, 325 * on a UTF-8 character boundary - if it doesn't fit. 326 */ 327 utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining); 328 329 /* 330 * Now, if we're not in UTF-8 mode, convert errbuf to the 331 * local code page. 332 */ 333 if (!use_utf_8) 334 utf_8_to_acp_truncated(errbuf); 335 #else 336 /* 337 * Either Windows without _wcserror_s() or not Windows. Let pcap_strerror() 338 * solve the non-UTF-16 part of this problem space. 339 */ 340 snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum)); 341 #endif 342 } 343 344 #ifdef _WIN32 345 /* 346 * Generate an error message based on a format, arguments, and a 347 * Win32 error, with a message for the Win32 error after the formatted output. 348 */ 349 void 350 pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum, 351 const char *fmt, ...) 352 { 353 va_list ap; 354 355 va_start(ap, fmt); 356 pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap); 357 va_end(ap); 358 } 359 360 void 361 pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum, 362 const char *fmt, va_list ap) 363 { 364 size_t msglen; 365 char *p; 366 size_t errbuflen_remaining; 367 DWORD retval; 368 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE]; 369 size_t utf_8_len; 370 371 vsnprintf(errbuf, errbuflen, fmt, ap); 372 msglen = strlen(errbuf); 373 374 /* 375 * Do we have enough space to append ": "? 376 * Including the terminating '\0', that's 3 bytes. 377 */ 378 if (msglen + 3 > errbuflen) { 379 /* No - just give them what we've produced. */ 380 return; 381 } 382 p = errbuf + msglen; 383 errbuflen_remaining = errbuflen - msglen; 384 *p++ = ':'; 385 *p++ = ' '; 386 *p = '\0'; 387 msglen += 2; 388 errbuflen_remaining -= 2; 389 390 /* 391 * Now append the string for the error code. 392 * 393 * XXX - what language ID to use? 394 * 395 * For UN*Xes, pcap_strerror() may or may not return localized 396 * strings. 397 * 398 * We currently don't have localized messages for libpcap, but 399 * we might want to do so. On the other hand, if most of these 400 * messages are going to be read by libpcap developers and 401 * perhaps by developers of libpcap-based applications, English 402 * might be a better choice, so the developer doesn't have to 403 * get the message translated if it's in a language they don't 404 * happen to understand. 405 */ 406 retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK, 407 NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), 408 utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL); 409 if (retval == 0) { 410 /* 411 * Failed. 412 */ 413 snprintf(p, errbuflen_remaining, 414 "Couldn't get error message for error (%lu)", errnum); 415 return; 416 } 417 418 /* 419 * Now convert it from UTF-16LE to UTF-8. 420 */ 421 p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining); 422 423 /* 424 * Now append the error number, if it fits. 425 */ 426 utf_8_len = p - errbuf; 427 errbuflen_remaining -= utf_8_len; 428 if (utf_8_len == 0) { 429 /* The message was empty. */ 430 snprintf(p, errbuflen_remaining, "(%lu)", errnum); 431 } else 432 snprintf(p, errbuflen_remaining, " (%lu)", errnum); 433 434 /* 435 * Now, if we're not in UTF-8 mode, convert errbuf to the 436 * local code page. 437 */ 438 if (!use_utf_8) 439 utf_8_to_acp_truncated(errbuf); 440 } 441 #endif 442