1 /* 2 * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the Computer Systems 16 * Engineering Group at Lawrence Berkeley Laboratory. 17 * 4. Neither the name of the University nor of the Laboratory may be used 18 * to endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 */ 33 34 /* 35 * Utilities for message formatting used both by libpcap and rpcapd. 36 */ 37 38 #ifdef HAVE_CONFIG_H 39 #include <config.h> 40 #endif 41 42 #include "ftmacros.h" 43 44 #include <stddef.h> 45 #include <stdarg.h> 46 #include <stdio.h> 47 #include <string.h> 48 #include <errno.h> 49 50 #include "pcap-int.h" 51 52 #include "portability.h" 53 54 #include "fmtutils.h" 55 56 #ifdef _WIN32 57 #include "charconv.h" 58 #endif 59 60 /* 61 * Set the encoding. 62 */ 63 #ifdef _WIN32 64 /* 65 * True if we shouold use UTF-8. 66 */ 67 static int use_utf_8; 68 69 void 70 pcap_fmt_set_encoding(unsigned int opts) 71 { 72 if (opts == PCAP_CHAR_ENC_UTF_8) 73 use_utf_8 = 1; 74 } 75 #else 76 void 77 pcap_fmt_set_encoding(unsigned int opts _U_) 78 { 79 /* 80 * Nothing to do here. 81 */ 82 } 83 #endif 84 85 #ifdef _WIN32 86 /* 87 * Convert a null-terminated UTF-16LE string to UTF-8, putting it into 88 * a buffer starting at the specified location and stopping if we go 89 * past the specified size. This will only put out complete UTF-8 90 * sequences. 91 * 92 * We do this ourselves because Microsoft doesn't offer a "convert and 93 * stop at a UTF-8 character boundary if we run out of space" routine. 94 */ 95 #define IS_LEADING_SURROGATE(c) \ 96 ((c) >= 0xd800 && (c) < 0xdc00) 97 #define IS_TRAILING_SURROGATE(c) \ 98 ((c) >= 0xdc00 && (c) < 0xe000) 99 #define SURROGATE_VALUE(leading, trailing) \ 100 (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000) 101 #define REPLACEMENT_CHARACTER 0x0FFFD 102 103 static char * 104 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8, 105 size_t utf_8_len) 106 { 107 wchar_t c, c2; 108 uint32_t uc; 109 110 if (utf_8_len == 0) { 111 /* 112 * Not even enough room for a trailing '\0'. 113 * Don't put anything into the buffer. 114 */ 115 return (utf_8); 116 } 117 118 while ((c = *utf_16++) != '\0') { 119 if (IS_LEADING_SURROGATE(c)) { 120 /* 121 * Leading surrogate. Must be followed by 122 * a trailing surrogate. 123 */ 124 c2 = *utf_16; 125 if (c2 == '\0') { 126 /* 127 * Oops, string ends with a lead 128 * surrogate. Try to drop in 129 * a REPLACEMENT CHARACTER, and 130 * don't move the string pointer, 131 * so on the next trip through 132 * the loop we grab the terminating 133 * '\0' and quit. 134 */ 135 uc = REPLACEMENT_CHARACTER; 136 } else { 137 /* 138 * OK, we can consume this 2-octet 139 * value. 140 */ 141 utf_16++; 142 if (IS_TRAILING_SURROGATE(c2)) { 143 /* 144 * Trailing surrogate. 145 * This calculation will, 146 * for c being a leading 147 * surrogate and c2 being 148 * a trailing surrogate, 149 * produce a value between 150 * 0x100000 and 0x10ffff, 151 * so it's always going to be 152 * a valid Unicode code point. 153 */ 154 uc = SURROGATE_VALUE(c, c2); 155 } else { 156 /* 157 * Not a trailing surroage; 158 * try to drop in a 159 * REPLACEMENT CHARACTER. 160 */ 161 uc = REPLACEMENT_CHARACTER; 162 } 163 } 164 } else { 165 /* 166 * Not a leading surrogate. 167 */ 168 if (IS_TRAILING_SURROGATE(c)) { 169 /* 170 * Trailing surrogate without 171 * a preceding leading surrogate. 172 * Try to drop in a REPLACEMENT 173 * CHARACTER. 174 */ 175 uc = REPLACEMENT_CHARACTER; 176 } else { 177 /* 178 * This is a valid BMP character; 179 * drop it in. 180 */ 181 uc = c; 182 } 183 } 184 185 /* 186 * OK, uc is a valid Unicode character; how 187 * many bytes worth of UTF-8 does it require? 188 */ 189 if (uc < 0x0080) { 190 /* 1 byte. */ 191 if (utf_8_len < 2) { 192 /* 193 * Not enough room for that byte 194 * plus a trailing '\0'. 195 */ 196 break; 197 } 198 *utf_8++ = (char)uc; 199 utf_8_len--; 200 } else if (uc < 0x0800) { 201 /* 2 bytes. */ 202 if (utf_8_len < 3) { 203 /* 204 * Not enough room for those bytes 205 * plus a trailing '\0'. 206 */ 207 break; 208 } 209 *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0; 210 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80; 211 utf_8_len -= 2; 212 } else if (uc < 0x010000) { 213 /* 3 bytes. */ 214 if (utf_8_len < 4) { 215 /* 216 * Not enough room for those bytes 217 * plus a trailing '\0'. 218 */ 219 break; 220 } 221 *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0; 222 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80; 223 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80; 224 utf_8_len -= 3; 225 } else { 226 /* 4 bytes. */ 227 if (utf_8_len < 5) { 228 /* 229 * Not enough room for those bytes 230 * plus a trailing '\0'. 231 */ 232 break; 233 } 234 *utf_8++ = ((uc >> 18) & 0x03) | 0xF0; 235 *utf_8++ = ((uc >> 12) & 0x3F) | 0x80; 236 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80; 237 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80; 238 utf_8_len -= 3; 239 } 240 } 241 242 /* 243 * OK, we have enough room for (at least) a trailing '\0'. 244 * (We started out with enough room, thanks to the test 245 * for a zero-length buffer at the beginning, and if 246 * there wasn't enough room for any character we wanted 247 * to put into the buffer *plus* a trailing '\0', 248 * we'd have quit before putting it into the buffer, 249 * and thus would have left enough room for the trailing 250 * '\0'.) 251 * 252 * Drop it in. 253 */ 254 *utf_8 = '\0'; 255 256 /* 257 * Return a pointer to the terminating '\0', in case we 258 * want to drop something in after that. 259 */ 260 return (utf_8); 261 } 262 #endif /* _WIN32 */ 263 264 /* 265 * Generate an error message based on a format, arguments, and an 266 * errno, with a message for the errno after the formatted output. 267 */ 268 void 269 pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum, 270 const char *fmt, ...) 271 { 272 va_list ap; 273 274 va_start(ap, fmt); 275 pcap_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap); 276 va_end(ap); 277 } 278 279 void 280 pcap_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum, 281 const char *fmt, va_list ap) 282 { 283 size_t msglen; 284 char *p; 285 size_t errbuflen_remaining; 286 287 (void)vsnprintf(errbuf, errbuflen, fmt, ap); 288 msglen = strlen(errbuf); 289 290 /* 291 * Do we have enough space to append ": "? 292 * Including the terminating '\0', that's 3 bytes. 293 */ 294 if (msglen + 3 > errbuflen) { 295 /* No - just give them what we've produced. */ 296 return; 297 } 298 p = errbuf + msglen; 299 errbuflen_remaining = errbuflen - msglen; 300 *p++ = ':'; 301 *p++ = ' '; 302 *p = '\0'; 303 errbuflen_remaining -= 2; 304 305 /* 306 * Now append the string for the error code. 307 */ 308 #if defined(HAVE__WCSERROR_S) 309 /* 310 * We have a Windows-style _wcserror_s(). 311 * Generate a UTF-16LE error message. 312 */ 313 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE]; 314 errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum); 315 if (err != 0) { 316 /* 317 * It doesn't appear to be documented anywhere obvious 318 * what the error returns from _wcserror_s(). 319 */ 320 snprintf(p, errbuflen_remaining, "Error %d", errnum); 321 return; 322 } 323 324 /* 325 * Now convert it from UTF-16LE to UTF-8, dropping it in the 326 * remaining space in the buffer, and truncating it - cleanly, 327 * on a UTF-8 character boundary - if it doesn't fit. 328 */ 329 utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining); 330 331 /* 332 * Now, if we're not in UTF-8 mode, convert errbuf to the 333 * local code page. 334 */ 335 if (!use_utf_8) 336 utf_8_to_acp_truncated(errbuf); 337 #elif defined(HAVE_GNU_STRERROR_R) 338 /* 339 * We have a GNU-style strerror_r(), which is *not* guaranteed to 340 * do anything to the buffer handed to it, and which returns a 341 * pointer to the error string, which may or may not be in 342 * the buffer. 343 * 344 * It is, however, guaranteed to succeed. 345 */ 346 char strerror_buf[PCAP_ERRBUF_SIZE]; 347 char *errstring = strerror_r(errnum, strerror_buf, PCAP_ERRBUF_SIZE); 348 snprintf(p, errbuflen_remaining, "%s", errstring); 349 #elif defined(HAVE_POSIX_STRERROR_R) 350 /* 351 * We have a POSIX-style strerror_r(), which is guaranteed to fill 352 * in the buffer, but is not guaranteed to succeed. 353 */ 354 int err = strerror_r(errnum, p, errbuflen_remaining); 355 if (err == EINVAL) { 356 /* 357 * UNIX 03 says this isn't guaranteed to produce a 358 * fallback error message. 359 */ 360 snprintf(p, errbuflen_remaining, "Unknown error: %d", 361 errnum); 362 } else if (err == ERANGE) { 363 /* 364 * UNIX 03 says this isn't guaranteed to produce a 365 * fallback error message. 366 */ 367 snprintf(p, errbuflen_remaining, 368 "Message for error %d is too long", errnum); 369 } 370 #else 371 /* 372 * We have neither _wcserror_s() nor strerror_r(), so we're 373 * stuck with using pcap_strerror(). 374 */ 375 snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum)); 376 #endif 377 } 378 379 #ifdef _WIN32 380 /* 381 * Generate an error message based on a format, arguments, and a 382 * Win32 error, with a message for the Win32 error after the formatted output. 383 */ 384 void 385 pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum, 386 const char *fmt, ...) 387 { 388 va_list ap; 389 390 va_start(ap, fmt); 391 pcap_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap); 392 va_end(ap); 393 } 394 395 void 396 pcap_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum, 397 const char *fmt, va_list ap) 398 { 399 size_t msglen; 400 char *p; 401 size_t errbuflen_remaining; 402 DWORD retval; 403 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE]; 404 size_t utf_8_len; 405 406 vsnprintf(errbuf, errbuflen, fmt, ap); 407 msglen = strlen(errbuf); 408 409 /* 410 * Do we have enough space to append ": "? 411 * Including the terminating '\0', that's 3 bytes. 412 */ 413 if (msglen + 3 > errbuflen) { 414 /* No - just give them what we've produced. */ 415 return; 416 } 417 p = errbuf + msglen; 418 errbuflen_remaining = errbuflen - msglen; 419 *p++ = ':'; 420 *p++ = ' '; 421 *p = '\0'; 422 msglen += 2; 423 errbuflen_remaining -= 2; 424 425 /* 426 * Now append the string for the error code. 427 * 428 * XXX - what language ID to use? 429 * 430 * For UN*Xes, pcap_strerror() may or may not return localized 431 * strings. 432 * 433 * We currently don't have localized messages for libpcap, but 434 * we might want to do so. On the other hand, if most of these 435 * messages are going to be read by libpcap developers and 436 * perhaps by developers of libpcap-based applications, English 437 * might be a better choice, so the developer doesn't have to 438 * get the message translated if it's in a language they don't 439 * happen to understand. 440 */ 441 retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK, 442 NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), 443 utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL); 444 if (retval == 0) { 445 /* 446 * Failed. 447 */ 448 snprintf(p, errbuflen_remaining, 449 "Couldn't get error message for error (%lu)", errnum); 450 return; 451 } 452 453 /* 454 * Now convert it from UTF-16LE to UTF-8. 455 */ 456 p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining); 457 458 /* 459 * Now append the error number, if it fits. 460 */ 461 utf_8_len = p - errbuf; 462 errbuflen_remaining -= utf_8_len; 463 if (utf_8_len == 0) { 464 /* The message was empty. */ 465 snprintf(p, errbuflen_remaining, "(%lu)", errnum); 466 } else 467 snprintf(p, errbuflen_remaining, " (%lu)", errnum); 468 469 /* 470 * Now, if we're not in UTF-8 mode, convert errbuf to the 471 * local code page. 472 */ 473 if (!use_utf_8) 474 utf_8_to_acp_truncated(errbuf); 475 } 476 #endif 477