1 /*
2 * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the Computer Systems
16 * Engineering Group at Lawrence Berkeley Laboratory.
17 * 4. Neither the name of the University nor of the Laboratory may be used
18 * to endorse or promote products derived from this software without
19 * specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 /*
35 * Utilities for message formatting used both by libpcap and rpcapd.
36 */
37
38 #include <config.h>
39
40 #include "ftmacros.h"
41
42 #include <stddef.h>
43 #include <stdarg.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <errno.h>
47
48 #include "pcap-int.h"
49
50 #include "portability.h"
51
52 #include "fmtutils.h"
53
54 #ifdef _WIN32
55 #include "charconv.h"
56 #endif
57
58 /*
59 * Set the encoding.
60 */
61 #ifdef _WIN32
62 /*
63 * True if we should use UTF-8.
64 */
65 static int use_utf_8;
66
67 void
pcapint_fmt_set_encoding(unsigned int opts)68 pcapint_fmt_set_encoding(unsigned int opts)
69 {
70 if (opts == PCAP_CHAR_ENC_UTF_8)
71 use_utf_8 = 1;
72 }
73 #else
74 void
pcapint_fmt_set_encoding(unsigned int opts _U_)75 pcapint_fmt_set_encoding(unsigned int opts _U_)
76 {
77 /*
78 * Nothing to do here.
79 */
80 }
81 #endif
82
83 #ifdef _WIN32
84 /*
85 * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
86 * a buffer starting at the specified location and stopping if we go
87 * past the specified size. This will only put out complete UTF-8
88 * sequences.
89 *
90 * We do this ourselves because Microsoft doesn't offer a "convert and
91 * stop at a UTF-8 character boundary if we run out of space" routine.
92 */
93 #define IS_LEADING_SURROGATE(c) \
94 ((c) >= 0xd800 && (c) < 0xdc00)
95 #define IS_TRAILING_SURROGATE(c) \
96 ((c) >= 0xdc00 && (c) < 0xe000)
97 #define SURROGATE_VALUE(leading, trailing) \
98 (((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
99 #define REPLACEMENT_CHARACTER 0x0FFFD
100
101 static char *
utf_16le_to_utf_8_truncated(const wchar_t * utf_16,char * utf_8,size_t utf_8_len)102 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
103 size_t utf_8_len)
104 {
105 wchar_t c, c2;
106 uint32_t uc;
107
108 if (utf_8_len == 0) {
109 /*
110 * Not even enough room for a trailing '\0'.
111 * Don't put anything into the buffer.
112 */
113 return (utf_8);
114 }
115
116 while ((c = *utf_16++) != '\0') {
117 if (IS_LEADING_SURROGATE(c)) {
118 /*
119 * Leading surrogate. Must be followed by
120 * a trailing surrogate.
121 */
122 c2 = *utf_16;
123 if (c2 == '\0') {
124 /*
125 * Oops, string ends with a lead
126 * surrogate. Try to drop in
127 * a REPLACEMENT CHARACTER, and
128 * don't move the string pointer,
129 * so on the next trip through
130 * the loop we grab the terminating
131 * '\0' and quit.
132 */
133 uc = REPLACEMENT_CHARACTER;
134 } else {
135 /*
136 * OK, we can consume this 2-octet
137 * value.
138 */
139 utf_16++;
140 if (IS_TRAILING_SURROGATE(c2)) {
141 /*
142 * Trailing surrogate.
143 * This calculation will,
144 * for c being a leading
145 * surrogate and c2 being
146 * a trailing surrogate,
147 * produce a value between
148 * 0x100000 and 0x10ffff,
149 * so it's always going to be
150 * a valid Unicode code point.
151 */
152 uc = SURROGATE_VALUE(c, c2);
153 } else {
154 /*
155 * Not a trailing surrogate;
156 * try to drop in a
157 * REPLACEMENT CHARACTER.
158 */
159 uc = REPLACEMENT_CHARACTER;
160 }
161 }
162 } else {
163 /*
164 * Not a leading surrogate.
165 */
166 if (IS_TRAILING_SURROGATE(c)) {
167 /*
168 * Trailing surrogate without
169 * a preceding leading surrogate.
170 * Try to drop in a REPLACEMENT
171 * CHARACTER.
172 */
173 uc = REPLACEMENT_CHARACTER;
174 } else {
175 /*
176 * This is a valid BMP character;
177 * drop it in.
178 */
179 uc = c;
180 }
181 }
182
183 /*
184 * OK, uc is a valid Unicode character; how
185 * many bytes worth of UTF-8 does it require?
186 */
187 if (uc < 0x0080) {
188 /* 1 byte. */
189 if (utf_8_len < 2) {
190 /*
191 * Not enough room for that byte
192 * plus a trailing '\0'.
193 */
194 break;
195 }
196 *utf_8++ = (char)uc;
197 utf_8_len--;
198 } else if (uc < 0x0800) {
199 /* 2 bytes. */
200 if (utf_8_len < 3) {
201 /*
202 * Not enough room for those bytes
203 * plus a trailing '\0'.
204 */
205 break;
206 }
207 *utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
208 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
209 utf_8_len -= 2;
210 } else if (uc < 0x010000) {
211 /* 3 bytes. */
212 if (utf_8_len < 4) {
213 /*
214 * Not enough room for those bytes
215 * plus a trailing '\0'.
216 */
217 break;
218 }
219 *utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
220 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
221 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
222 utf_8_len -= 3;
223 } else {
224 /* 4 bytes. */
225 if (utf_8_len < 5) {
226 /*
227 * Not enough room for those bytes
228 * plus a trailing '\0'.
229 */
230 break;
231 }
232 *utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
233 *utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
234 *utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
235 *utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
236 utf_8_len -= 3;
237 }
238 }
239
240 /*
241 * OK, we have enough room for (at least) a trailing '\0'.
242 * (We started out with enough room, thanks to the test
243 * for a zero-length buffer at the beginning, and if
244 * there wasn't enough room for any character we wanted
245 * to put into the buffer *plus* a trailing '\0',
246 * we'd have quit before putting it into the buffer,
247 * and thus would have left enough room for the trailing
248 * '\0'.)
249 *
250 * Drop it in.
251 */
252 *utf_8 = '\0';
253
254 /*
255 * Return a pointer to the terminating '\0', in case we
256 * want to drop something in after that.
257 */
258 return (utf_8);
259 }
260 #endif /* _WIN32 */
261
262 /*
263 * Generate an error message based on a format, arguments, and an
264 * errno, with a message for the errno after the formatted output.
265 */
266 void
pcapint_fmt_errmsg_for_errno(char * errbuf,size_t errbuflen,int errnum,const char * fmt,...)267 pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
268 const char *fmt, ...)
269 {
270 va_list ap;
271
272 va_start(ap, fmt);
273 pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
274 va_end(ap);
275 }
276
277 void
pcapint_vfmt_errmsg_for_errno(char * errbuf,size_t errbuflen,int errnum,const char * fmt,va_list ap)278 pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
279 const char *fmt, va_list ap)
280 {
281 size_t msglen;
282 char *p;
283 size_t errbuflen_remaining;
284
285 (void)vsnprintf(errbuf, errbuflen, fmt, ap);
286 msglen = strlen(errbuf);
287
288 /*
289 * Do we have enough space to append ": "?
290 * Including the terminating '\0', that's 3 bytes.
291 */
292 if (msglen + 3 > errbuflen) {
293 /* No - just give them what we've produced. */
294 return;
295 }
296 p = errbuf + msglen;
297 errbuflen_remaining = errbuflen - msglen;
298 *p++ = ':';
299 *p++ = ' ';
300 *p = '\0';
301 errbuflen_remaining -= 2;
302
303 /*
304 * Now append the string for the error code.
305 */
306 #if defined(HAVE__WCSERROR_S)
307 /*
308 * We have a Windows-style _wcserror_s().
309 * Generate a UTF-16LE error message.
310 */
311 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
312 errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
313 if (err != 0) {
314 /*
315 * It doesn't appear to be documented anywhere obvious
316 * what the error returns from _wcserror_s().
317 */
318 snprintf(p, errbuflen_remaining, "Error %d", errnum);
319 return;
320 }
321
322 /*
323 * Now convert it from UTF-16LE to UTF-8, dropping it in the
324 * remaining space in the buffer, and truncating it - cleanly,
325 * on a UTF-8 character boundary - if it doesn't fit.
326 */
327 utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
328
329 /*
330 * Now, if we're not in UTF-8 mode, convert errbuf to the
331 * local code page.
332 */
333 if (!use_utf_8)
334 utf_8_to_acp_truncated(errbuf);
335 #else
336 /*
337 * Either Windows without _wcserror_s() or not Windows. Let pcap_strerror()
338 * solve the non-UTF-16 part of this problem space.
339 */
340 snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
341 #endif
342 }
343
344 #ifdef _WIN32
345 /*
346 * Generate an error message based on a format, arguments, and a
347 * Win32 error, with a message for the Win32 error after the formatted output.
348 */
349 void
pcapint_fmt_errmsg_for_win32_err(char * errbuf,size_t errbuflen,DWORD errnum,const char * fmt,...)350 pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
351 const char *fmt, ...)
352 {
353 va_list ap;
354
355 va_start(ap, fmt);
356 pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
357 va_end(ap);
358 }
359
360 void
pcapint_vfmt_errmsg_for_win32_err(char * errbuf,size_t errbuflen,DWORD errnum,const char * fmt,va_list ap)361 pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
362 const char *fmt, va_list ap)
363 {
364 size_t msglen;
365 char *p;
366 size_t errbuflen_remaining;
367 DWORD retval;
368 wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
369 size_t utf_8_len;
370
371 vsnprintf(errbuf, errbuflen, fmt, ap);
372 msglen = strlen(errbuf);
373
374 /*
375 * Do we have enough space to append ": "?
376 * Including the terminating '\0', that's 3 bytes.
377 */
378 if (msglen + 3 > errbuflen) {
379 /* No - just give them what we've produced. */
380 return;
381 }
382 p = errbuf + msglen;
383 errbuflen_remaining = errbuflen - msglen;
384 *p++ = ':';
385 *p++ = ' ';
386 *p = '\0';
387 msglen += 2;
388 errbuflen_remaining -= 2;
389
390 /*
391 * Now append the string for the error code.
392 *
393 * XXX - what language ID to use?
394 *
395 * For UN*Xes, pcap_strerror() may or may not return localized
396 * strings.
397 *
398 * We currently don't have localized messages for libpcap, but
399 * we might want to do so. On the other hand, if most of these
400 * messages are going to be read by libpcap developers and
401 * perhaps by developers of libpcap-based applications, English
402 * might be a better choice, so the developer doesn't have to
403 * get the message translated if it's in a language they don't
404 * happen to understand.
405 */
406 retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
407 NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
408 utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
409 if (retval == 0) {
410 /*
411 * Failed.
412 */
413 snprintf(p, errbuflen_remaining,
414 "Couldn't get error message for error (%lu)", errnum);
415 return;
416 }
417
418 /*
419 * Now convert it from UTF-16LE to UTF-8.
420 */
421 p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
422
423 /*
424 * Now append the error number, if it fits.
425 */
426 utf_8_len = p - errbuf;
427 errbuflen_remaining -= utf_8_len;
428 if (utf_8_len == 0) {
429 /* The message was empty. */
430 snprintf(p, errbuflen_remaining, "(%lu)", errnum);
431 } else
432 snprintf(p, errbuflen_remaining, " (%lu)", errnum);
433
434 /*
435 * Now, if we're not in UTF-8 mode, convert errbuf to the
436 * local code page.
437 */
438 if (!use_utf_8)
439 utf_8_to_acp_truncated(errbuf);
440 }
441 #endif
442