xref: /freebsd/contrib/libpcap/fmtutils.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 /*
2  * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the Computer Systems
16  *	Engineering Group at Lawrence Berkeley Laboratory.
17  * 4. Neither the name of the University nor of the Laboratory may be used
18  *    to endorse or promote products derived from this software without
19  *    specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 /*
35  * Utilities for message formatting used both by libpcap and rpcapd.
36  */
37 
38 #ifdef HAVE_CONFIG_H
39 #include <config.h>
40 #endif
41 
42 #include "ftmacros.h"
43 
44 #include <stddef.h>
45 #include <stdarg.h>
46 #include <stdio.h>
47 #include <string.h>
48 #include <errno.h>
49 
50 #include "pcap-int.h"
51 
52 #include "portability.h"
53 
54 #include "fmtutils.h"
55 
56 #ifdef _WIN32
57 #include "charconv.h"
58 #endif
59 
60 /*
61  * Set the encoding.
62  */
63 #ifdef _WIN32
64 /*
65  * True if we shouold use UTF-8.
66  */
67 static int use_utf_8;
68 
69 void
70 pcap_fmt_set_encoding(unsigned int opts)
71 {
72 	if (opts == PCAP_CHAR_ENC_UTF_8)
73 		use_utf_8 = 1;
74 }
75 #else
76 void
77 pcap_fmt_set_encoding(unsigned int opts _U_)
78 {
79 	/*
80 	 * Nothing to do here.
81 	 */
82 }
83 #endif
84 
85 #ifdef _WIN32
86 /*
87  * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
88  * a buffer starting at the specified location and stopping if we go
89  * past the specified size.  This will only put out complete UTF-8
90  * sequences.
91  *
92  * We do this ourselves because Microsoft doesn't offer a "convert and
93  * stop at a UTF-8 character boundary if we run out of space" routine.
94  */
95 #define IS_LEADING_SURROGATE(c) \
96 	((c) >= 0xd800 && (c) < 0xdc00)
97 #define IS_TRAILING_SURROGATE(c) \
98 	((c) >= 0xdc00 && (c) < 0xe000)
99 #define SURROGATE_VALUE(leading, trailing) \
100 	(((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
101 #define REPLACEMENT_CHARACTER	0x0FFFD
102 
103 static char *
104 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
105     size_t utf_8_len)
106 {
107 	wchar_t c, c2;
108 	uint32_t uc;
109 
110 	if (utf_8_len == 0) {
111 		/*
112 		 * Not even enough room for a trailing '\0'.
113 		 * Don't put anything into the buffer.
114 		 */
115 		return (utf_8);
116 	}
117 
118 	while ((c = *utf_16++) != '\0') {
119 		if (IS_LEADING_SURROGATE(c)) {
120 			/*
121 			 * Leading surrogate.  Must be followed by
122 			 * a trailing surrogate.
123 			 */
124 			c2 = *utf_16;
125 			if (c2 == '\0') {
126 				/*
127 				 * Oops, string ends with a lead
128 				 * surrogate.  Try to drop in
129 				 * a REPLACEMENT CHARACTER, and
130 				 * don't move the string pointer,
131 				 * so on the next trip through
132 				 * the loop we grab the terminating
133 				 * '\0' and quit.
134 				 */
135 				uc = REPLACEMENT_CHARACTER;
136 			} else {
137 				/*
138 				 * OK, we can consume this 2-octet
139 				 * value.
140 				 */
141 				utf_16++;
142 				if (IS_TRAILING_SURROGATE(c2)) {
143 					/*
144 					 * Trailing surrogate.
145 					 * This calculation will,
146 					 * for c being a leading
147 					 * surrogate and c2 being
148 					 * a trailing surrogate,
149 					 * produce a value between
150 					 * 0x100000 and 0x10ffff,
151 					 * so it's always going to be
152 					 * a valid Unicode code point.
153 					 */
154 					uc = SURROGATE_VALUE(c, c2);
155 				} else {
156 					/*
157 					 * Not a trailing surroage;
158 					 * try to drop in a
159 					 * REPLACEMENT CHARACTER.
160 					 */
161 					uc = REPLACEMENT_CHARACTER;
162 				}
163 			}
164 		} else {
165 			/*
166 			 * Not a leading surrogate.
167 			 */
168 			if (IS_TRAILING_SURROGATE(c)) {
169 				/*
170 				 * Trailing surrogate without
171 				 * a preceding leading surrogate.
172 				 * Try to drop in a REPLACEMENT
173 				 * CHARACTER.
174 				 */
175 				uc = REPLACEMENT_CHARACTER;
176 			} else {
177 				/*
178 				 * This is a valid BMP character;
179 				 * drop it in.
180 				 */
181 				uc = c;
182 			}
183 		}
184 
185 		/*
186 		 * OK, uc is a valid Unicode character; how
187 		 * many bytes worth of UTF-8 does it require?
188 		 */
189 		if (uc < 0x0080) {
190 			/* 1 byte. */
191 			if (utf_8_len < 2) {
192 				/*
193 				 * Not enough room for that byte
194 				 * plus a trailing '\0'.
195 				 */
196 				break;
197 			}
198 			*utf_8++ = (char)uc;
199 			utf_8_len--;
200 		} else if (uc < 0x0800) {
201 			/* 2 bytes. */
202 			if (utf_8_len < 3) {
203 				/*
204 				 * Not enough room for those bytes
205 				 * plus a trailing '\0'.
206 				 */
207 				break;
208 			}
209 			*utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
210 			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
211 			utf_8_len -= 2;
212 		} else if (uc < 0x010000) {
213 			/* 3 bytes. */
214 			if (utf_8_len < 4) {
215 				/*
216 				 * Not enough room for those bytes
217 				 * plus a trailing '\0'.
218 				 */
219 				break;
220 			}
221 			*utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
222 			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
223 			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
224 			utf_8_len -= 3;
225 		} else {
226 			/* 4 bytes. */
227 			if (utf_8_len < 5) {
228 				/*
229 				 * Not enough room for those bytes
230 				 * plus a trailing '\0'.
231 				 */
232 				break;
233 			}
234 			*utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
235 			*utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
236 			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
237 			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
238 			utf_8_len -= 3;
239 		}
240 	}
241 
242 	/*
243 	 * OK, we have enough room for (at least) a trailing '\0'.
244 	 * (We started out with enough room, thanks to the test
245 	 * for a zero-length buffer at the beginning, and if
246 	 * there wasn't enough room for any character we wanted
247 	 * to put into the buffer *plus* a trailing '\0',
248 	 * we'd have quit before putting it into the buffer,
249 	 * and thus would have left enough room for the trailing
250 	 * '\0'.)
251 	 *
252 	 * Drop it in.
253 	 */
254 	*utf_8 = '\0';
255 
256 	/*
257 	 * Return a pointer to the terminating '\0', in case we
258 	 * want to drop something in after that.
259 	 */
260 	return (utf_8);
261 }
262 #endif /* _WIN32 */
263 
264 /*
265  * Generate an error message based on a format, arguments, and an
266  * errno, with a message for the errno after the formatted output.
267  */
268 void
269 pcap_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
270     const char *fmt, ...)
271 {
272 	va_list ap;
273 
274 	va_start(ap, fmt);
275 	pcap_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
276 	va_end(ap);
277 }
278 
279 void
280 pcap_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
281     const char *fmt, va_list ap)
282 {
283 	size_t msglen;
284 	char *p;
285 	size_t errbuflen_remaining;
286 
287 	(void)vsnprintf(errbuf, errbuflen, fmt, ap);
288 	msglen = strlen(errbuf);
289 
290 	/*
291 	 * Do we have enough space to append ": "?
292 	 * Including the terminating '\0', that's 3 bytes.
293 	 */
294 	if (msglen + 3 > errbuflen) {
295 		/* No - just give them what we've produced. */
296 		return;
297 	}
298 	p = errbuf + msglen;
299 	errbuflen_remaining = errbuflen - msglen;
300 	*p++ = ':';
301 	*p++ = ' ';
302 	*p = '\0';
303 	errbuflen_remaining -= 2;
304 
305 	/*
306 	 * Now append the string for the error code.
307 	 */
308 #if defined(HAVE__WCSERROR_S)
309 	/*
310 	 * We have a Windows-style _wcserror_s().
311 	 * Generate a UTF-16LE error message.
312 	 */
313 	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
314 	errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
315 	if (err != 0) {
316 		/*
317 		 * It doesn't appear to be documented anywhere obvious
318 		 * what the error returns from _wcserror_s().
319 		 */
320 		snprintf(p, errbuflen_remaining, "Error %d", errnum);
321 		return;
322 	}
323 
324 	/*
325 	 * Now convert it from UTF-16LE to UTF-8, dropping it in the
326 	 * remaining space in the buffer, and truncating it - cleanly,
327 	 * on a UTF-8 character boundary - if it doesn't fit.
328 	 */
329 	utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
330 
331 	/*
332 	 * Now, if we're not in UTF-8 mode, convert errbuf to the
333 	 * local code page.
334 	 */
335 	if (!use_utf_8)
336 		utf_8_to_acp_truncated(errbuf);
337 #elif defined(HAVE_GNU_STRERROR_R)
338 	/*
339 	 * We have a GNU-style strerror_r(), which is *not* guaranteed to
340 	 * do anything to the buffer handed to it, and which returns a
341 	 * pointer to the error string, which may or may not be in
342 	 * the buffer.
343 	 *
344 	 * It is, however, guaranteed to succeed.
345 	 */
346 	char strerror_buf[PCAP_ERRBUF_SIZE];
347 	char *errstring = strerror_r(errnum, strerror_buf, PCAP_ERRBUF_SIZE);
348 	snprintf(p, errbuflen_remaining, "%s", errstring);
349 #elif defined(HAVE_POSIX_STRERROR_R)
350 	/*
351 	 * We have a POSIX-style strerror_r(), which is guaranteed to fill
352 	 * in the buffer, but is not guaranteed to succeed.
353 	 */
354 	int err = strerror_r(errnum, p, errbuflen_remaining);
355 	if (err == EINVAL) {
356 		/*
357 		 * UNIX 03 says this isn't guaranteed to produce a
358 		 * fallback error message.
359 		 */
360 		snprintf(p, errbuflen_remaining, "Unknown error: %d",
361 		    errnum);
362 	} else if (err == ERANGE) {
363 		/*
364 		 * UNIX 03 says this isn't guaranteed to produce a
365 		 * fallback error message.
366 		 */
367 		snprintf(p, errbuflen_remaining,
368 		    "Message for error %d is too long", errnum);
369 	}
370 #else
371 	/*
372 	 * We have neither _wcserror_s() nor strerror_r(), so we're
373 	 * stuck with using pcap_strerror().
374 	 */
375 	snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
376 #endif
377 }
378 
379 #ifdef _WIN32
380 /*
381  * Generate an error message based on a format, arguments, and a
382  * Win32 error, with a message for the Win32 error after the formatted output.
383  */
384 void
385 pcap_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
386     const char *fmt, ...)
387 {
388 	va_list ap;
389 
390 	va_start(ap, fmt);
391 	pcap_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
392 	va_end(ap);
393 }
394 
395 void
396 pcap_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
397     const char *fmt, va_list ap)
398 {
399 	size_t msglen;
400 	char *p;
401 	size_t errbuflen_remaining;
402 	DWORD retval;
403 	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
404 	size_t utf_8_len;
405 
406 	vsnprintf(errbuf, errbuflen, fmt, ap);
407 	msglen = strlen(errbuf);
408 
409 	/*
410 	 * Do we have enough space to append ": "?
411 	 * Including the terminating '\0', that's 3 bytes.
412 	 */
413 	if (msglen + 3 > errbuflen) {
414 		/* No - just give them what we've produced. */
415 		return;
416 	}
417 	p = errbuf + msglen;
418 	errbuflen_remaining = errbuflen - msglen;
419 	*p++ = ':';
420 	*p++ = ' ';
421 	*p = '\0';
422 	msglen += 2;
423 	errbuflen_remaining -= 2;
424 
425 	/*
426 	 * Now append the string for the error code.
427 	 *
428 	 * XXX - what language ID to use?
429 	 *
430 	 * For UN*Xes, pcap_strerror() may or may not return localized
431 	 * strings.
432 	 *
433 	 * We currently don't have localized messages for libpcap, but
434 	 * we might want to do so.  On the other hand, if most of these
435 	 * messages are going to be read by libpcap developers and
436 	 * perhaps by developers of libpcap-based applications, English
437 	 * might be a better choice, so the developer doesn't have to
438 	 * get the message translated if it's in a language they don't
439 	 * happen to understand.
440 	 */
441 	retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
442 	    NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
443 	    utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
444 	if (retval == 0) {
445 		/*
446 		 * Failed.
447 		 */
448 		snprintf(p, errbuflen_remaining,
449 		    "Couldn't get error message for error (%lu)", errnum);
450 		return;
451 	}
452 
453 	/*
454 	 * Now convert it from UTF-16LE to UTF-8.
455 	 */
456 	p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
457 
458 	/*
459 	 * Now append the error number, if it fits.
460 	 */
461 	utf_8_len = p - errbuf;
462 	errbuflen_remaining -= utf_8_len;
463 	if (utf_8_len == 0) {
464 		/* The message was empty. */
465 		snprintf(p, errbuflen_remaining, "(%lu)", errnum);
466 	} else
467 		snprintf(p, errbuflen_remaining, " (%lu)", errnum);
468 
469 	/*
470 	 * Now, if we're not in UTF-8 mode, convert errbuf to the
471 	 * local code page.
472 	 */
473 	if (!use_utf_8)
474 		utf_8_to_acp_truncated(errbuf);
475 }
476 #endif
477