Home | History | Annotate | Line # | Download | only in dist
      1 /*
      2  * Copyright (c) 1993, 1994, 1995, 1996, 1997, 1998
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. All advertising materials mentioning features or use of this software
     14  *    must display the following acknowledgement:
     15  *	This product includes software developed by the Computer Systems
     16  *	Engineering Group at Lawrence Berkeley Laboratory.
     17  * 4. Neither the name of the University nor of the Laboratory may be used
     18  *    to endorse or promote products derived from this software without
     19  *    specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 /*
     35  * Utilities for message formatting used both by libpcap and rpcapd.
     36  */
     37 
     38 #include <config.h>
     39 
     40 #include "ftmacros.h"
     41 
     42 #include <stddef.h>
     43 #include <stdarg.h>
     44 #include <stdio.h>
     45 #include <string.h>
     46 #include <errno.h>
     47 
     48 #include "pcap-int.h"
     49 
     50 #include "portability.h"
     51 
     52 #include "fmtutils.h"
     53 
     54 #ifdef _WIN32
     55 #include "charconv.h"
     56 #endif
     57 
     58 /*
     59  * Set the encoding.
     60  */
     61 #ifdef _WIN32
     62 /*
     63  * True if we should use UTF-8.
     64  */
     65 static int use_utf_8;
     66 
     67 void
     68 pcapint_fmt_set_encoding(unsigned int opts)
     69 {
     70 	if (opts == PCAP_CHAR_ENC_UTF_8)
     71 		use_utf_8 = 1;
     72 }
     73 #else
     74 void
     75 pcapint_fmt_set_encoding(unsigned int opts _U_)
     76 {
     77 	/*
     78 	 * Nothing to do here.
     79 	 */
     80 }
     81 #endif
     82 
     83 #ifdef _WIN32
     84 /*
     85  * Convert a null-terminated UTF-16LE string to UTF-8, putting it into
     86  * a buffer starting at the specified location and stopping if we go
     87  * past the specified size.  This will only put out complete UTF-8
     88  * sequences.
     89  *
     90  * We do this ourselves because Microsoft doesn't offer a "convert and
     91  * stop at a UTF-8 character boundary if we run out of space" routine.
     92  */
     93 #define IS_LEADING_SURROGATE(c) \
     94 	((c) >= 0xd800 && (c) < 0xdc00)
     95 #define IS_TRAILING_SURROGATE(c) \
     96 	((c) >= 0xdc00 && (c) < 0xe000)
     97 #define SURROGATE_VALUE(leading, trailing) \
     98 	(((((leading) - 0xd800) << 10) | ((trailing) - 0xdc00)) + 0x10000)
     99 #define REPLACEMENT_CHARACTER	0x0FFFD
    100 
    101 static char *
    102 utf_16le_to_utf_8_truncated(const wchar_t *utf_16, char *utf_8,
    103     size_t utf_8_len)
    104 {
    105 	wchar_t c, c2;
    106 	uint32_t uc;
    107 
    108 	if (utf_8_len == 0) {
    109 		/*
    110 		 * Not even enough room for a trailing '\0'.
    111 		 * Don't put anything into the buffer.
    112 		 */
    113 		return (utf_8);
    114 	}
    115 
    116 	while ((c = *utf_16++) != '\0') {
    117 		if (IS_LEADING_SURROGATE(c)) {
    118 			/*
    119 			 * Leading surrogate.  Must be followed by
    120 			 * a trailing surrogate.
    121 			 */
    122 			c2 = *utf_16;
    123 			if (c2 == '\0') {
    124 				/*
    125 				 * Oops, string ends with a lead
    126 				 * surrogate.  Try to drop in
    127 				 * a REPLACEMENT CHARACTER, and
    128 				 * don't move the string pointer,
    129 				 * so on the next trip through
    130 				 * the loop we grab the terminating
    131 				 * '\0' and quit.
    132 				 */
    133 				uc = REPLACEMENT_CHARACTER;
    134 			} else {
    135 				/*
    136 				 * OK, we can consume this 2-octet
    137 				 * value.
    138 				 */
    139 				utf_16++;
    140 				if (IS_TRAILING_SURROGATE(c2)) {
    141 					/*
    142 					 * Trailing surrogate.
    143 					 * This calculation will,
    144 					 * for c being a leading
    145 					 * surrogate and c2 being
    146 					 * a trailing surrogate,
    147 					 * produce a value between
    148 					 * 0x100000 and 0x10ffff,
    149 					 * so it's always going to be
    150 					 * a valid Unicode code point.
    151 					 */
    152 					uc = SURROGATE_VALUE(c, c2);
    153 				} else {
    154 					/*
    155 					 * Not a trailing surrogate;
    156 					 * try to drop in a
    157 					 * REPLACEMENT CHARACTER.
    158 					 */
    159 					uc = REPLACEMENT_CHARACTER;
    160 				}
    161 			}
    162 		} else {
    163 			/*
    164 			 * Not a leading surrogate.
    165 			 */
    166 			if (IS_TRAILING_SURROGATE(c)) {
    167 				/*
    168 				 * Trailing surrogate without
    169 				 * a preceding leading surrogate.
    170 				 * Try to drop in a REPLACEMENT
    171 				 * CHARACTER.
    172 				 */
    173 				uc = REPLACEMENT_CHARACTER;
    174 			} else {
    175 				/*
    176 				 * This is a valid BMP character;
    177 				 * drop it in.
    178 				 */
    179 				uc = c;
    180 			}
    181 		}
    182 
    183 		/*
    184 		 * OK, uc is a valid Unicode character; how
    185 		 * many bytes worth of UTF-8 does it require?
    186 		 */
    187 		if (uc < 0x0080) {
    188 			/* 1 byte. */
    189 			if (utf_8_len < 2) {
    190 				/*
    191 				 * Not enough room for that byte
    192 				 * plus a trailing '\0'.
    193 				 */
    194 				break;
    195 			}
    196 			*utf_8++ = (char)uc;
    197 			utf_8_len--;
    198 		} else if (uc < 0x0800) {
    199 			/* 2 bytes. */
    200 			if (utf_8_len < 3) {
    201 				/*
    202 				 * Not enough room for those bytes
    203 				 * plus a trailing '\0'.
    204 				 */
    205 				break;
    206 			}
    207 			*utf_8++ = ((uc >> 6) & 0x3F) | 0xC0;
    208 			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
    209 			utf_8_len -= 2;
    210 		} else if (uc < 0x010000) {
    211 			/* 3 bytes. */
    212 			if (utf_8_len < 4) {
    213 				/*
    214 				 * Not enough room for those bytes
    215 				 * plus a trailing '\0'.
    216 				 */
    217 				break;
    218 			}
    219 			*utf_8++ = ((uc >> 12) & 0x0F) | 0xE0;
    220 			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
    221 			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
    222 			utf_8_len -= 3;
    223 		} else {
    224 			/* 4 bytes. */
    225 			if (utf_8_len < 5) {
    226 				/*
    227 				 * Not enough room for those bytes
    228 				 * plus a trailing '\0'.
    229 				 */
    230 				break;
    231 			}
    232 			*utf_8++ = ((uc >> 18) & 0x03) | 0xF0;
    233 			*utf_8++ = ((uc >> 12) & 0x3F) | 0x80;
    234 			*utf_8++ = ((uc >> 6) & 0x3F) | 0x80;
    235 			*utf_8++ = ((uc >> 0) & 0x3F) | 0x80;
    236 			utf_8_len -= 3;
    237 		}
    238 	}
    239 
    240 	/*
    241 	 * OK, we have enough room for (at least) a trailing '\0'.
    242 	 * (We started out with enough room, thanks to the test
    243 	 * for a zero-length buffer at the beginning, and if
    244 	 * there wasn't enough room for any character we wanted
    245 	 * to put into the buffer *plus* a trailing '\0',
    246 	 * we'd have quit before putting it into the buffer,
    247 	 * and thus would have left enough room for the trailing
    248 	 * '\0'.)
    249 	 *
    250 	 * Drop it in.
    251 	 */
    252 	*utf_8 = '\0';
    253 
    254 	/*
    255 	 * Return a pointer to the terminating '\0', in case we
    256 	 * want to drop something in after that.
    257 	 */
    258 	return (utf_8);
    259 }
    260 #endif /* _WIN32 */
    261 
    262 /*
    263  * Generate an error message based on a format, arguments, and an
    264  * errno, with a message for the errno after the formatted output.
    265  */
    266 void
    267 pcapint_fmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
    268     const char *fmt, ...)
    269 {
    270 	va_list ap;
    271 
    272 	va_start(ap, fmt);
    273 	pcapint_vfmt_errmsg_for_errno(errbuf, errbuflen, errnum, fmt, ap);
    274 	va_end(ap);
    275 }
    276 
    277 void
    278 pcapint_vfmt_errmsg_for_errno(char *errbuf, size_t errbuflen, int errnum,
    279     const char *fmt, va_list ap)
    280 {
    281 	size_t msglen;
    282 	char *p;
    283 	size_t errbuflen_remaining;
    284 
    285 	(void)vsnprintf(errbuf, errbuflen, fmt, ap);
    286 	msglen = strlen(errbuf);
    287 
    288 	/*
    289 	 * Do we have enough space to append ": "?
    290 	 * Including the terminating '\0', that's 3 bytes.
    291 	 */
    292 	if (msglen + 3 > errbuflen) {
    293 		/* No - just give them what we've produced. */
    294 		return;
    295 	}
    296 	p = errbuf + msglen;
    297 	errbuflen_remaining = errbuflen - msglen;
    298 	*p++ = ':';
    299 	*p++ = ' ';
    300 	*p = '\0';
    301 	errbuflen_remaining -= 2;
    302 
    303 	/*
    304 	 * Now append the string for the error code.
    305 	 */
    306 #if defined(HAVE__WCSERROR_S)
    307 	/*
    308 	 * We have a Windows-style _wcserror_s().
    309 	 * Generate a UTF-16LE error message.
    310 	 */
    311 	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
    312 	errno_t err = _wcserror_s(utf_16_errbuf, PCAP_ERRBUF_SIZE, errnum);
    313 	if (err != 0) {
    314 		/*
    315 		 * It doesn't appear to be documented anywhere obvious
    316 		 * what the error returns from _wcserror_s().
    317 		 */
    318 		snprintf(p, errbuflen_remaining, "Error %d", errnum);
    319 		return;
    320 	}
    321 
    322 	/*
    323 	 * Now convert it from UTF-16LE to UTF-8, dropping it in the
    324 	 * remaining space in the buffer, and truncating it - cleanly,
    325 	 * on a UTF-8 character boundary - if it doesn't fit.
    326 	 */
    327 	utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
    328 
    329 	/*
    330 	 * Now, if we're not in UTF-8 mode, convert errbuf to the
    331 	 * local code page.
    332 	 */
    333 	if (!use_utf_8)
    334 		utf_8_to_acp_truncated(errbuf);
    335 #else
    336 	/*
    337 	 * Either Windows without _wcserror_s() or not Windows.  Let pcap_strerror()
    338 	 * solve the non-UTF-16 part of this problem space.
    339 	 */
    340 	snprintf(p, errbuflen_remaining, "%s", pcap_strerror(errnum));
    341 #endif
    342 }
    343 
    344 #ifdef _WIN32
    345 /*
    346  * Generate an error message based on a format, arguments, and a
    347  * Win32 error, with a message for the Win32 error after the formatted output.
    348  */
    349 void
    350 pcapint_fmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
    351     const char *fmt, ...)
    352 {
    353 	va_list ap;
    354 
    355 	va_start(ap, fmt);
    356 	pcapint_vfmt_errmsg_for_win32_err(errbuf, errbuflen, errnum, fmt, ap);
    357 	va_end(ap);
    358 }
    359 
    360 void
    361 pcapint_vfmt_errmsg_for_win32_err(char *errbuf, size_t errbuflen, DWORD errnum,
    362     const char *fmt, va_list ap)
    363 {
    364 	size_t msglen;
    365 	char *p;
    366 	size_t errbuflen_remaining;
    367 	DWORD retval;
    368 	wchar_t utf_16_errbuf[PCAP_ERRBUF_SIZE];
    369 	size_t utf_8_len;
    370 
    371 	vsnprintf(errbuf, errbuflen, fmt, ap);
    372 	msglen = strlen(errbuf);
    373 
    374 	/*
    375 	 * Do we have enough space to append ": "?
    376 	 * Including the terminating '\0', that's 3 bytes.
    377 	 */
    378 	if (msglen + 3 > errbuflen) {
    379 		/* No - just give them what we've produced. */
    380 		return;
    381 	}
    382 	p = errbuf + msglen;
    383 	errbuflen_remaining = errbuflen - msglen;
    384 	*p++ = ':';
    385 	*p++ = ' ';
    386 	*p = '\0';
    387 	msglen += 2;
    388 	errbuflen_remaining -= 2;
    389 
    390 	/*
    391 	 * Now append the string for the error code.
    392 	 *
    393 	 * XXX - what language ID to use?
    394 	 *
    395 	 * For UN*Xes, pcap_strerror() may or may not return localized
    396 	 * strings.
    397 	 *
    398 	 * We currently don't have localized messages for libpcap, but
    399 	 * we might want to do so.  On the other hand, if most of these
    400 	 * messages are going to be read by libpcap developers and
    401 	 * perhaps by developers of libpcap-based applications, English
    402 	 * might be a better choice, so the developer doesn't have to
    403 	 * get the message translated if it's in a language they don't
    404 	 * happen to understand.
    405 	 */
    406 	retval = FormatMessageW(FORMAT_MESSAGE_FROM_SYSTEM|FORMAT_MESSAGE_IGNORE_INSERTS|FORMAT_MESSAGE_MAX_WIDTH_MASK,
    407 	    NULL, errnum, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
    408 	    utf_16_errbuf, PCAP_ERRBUF_SIZE, NULL);
    409 	if (retval == 0) {
    410 		/*
    411 		 * Failed.
    412 		 */
    413 		snprintf(p, errbuflen_remaining,
    414 		    "Couldn't get error message for error (%lu)", errnum);
    415 		return;
    416 	}
    417 
    418 	/*
    419 	 * Now convert it from UTF-16LE to UTF-8.
    420 	 */
    421 	p = utf_16le_to_utf_8_truncated(utf_16_errbuf, p, errbuflen_remaining);
    422 
    423 	/*
    424 	 * Now append the error number, if it fits.
    425 	 */
    426 	utf_8_len = p - errbuf;
    427 	errbuflen_remaining -= utf_8_len;
    428 	if (utf_8_len == 0) {
    429 		/* The message was empty. */
    430 		snprintf(p, errbuflen_remaining, "(%lu)", errnum);
    431 	} else
    432 		snprintf(p, errbuflen_remaining, " (%lu)", errnum);
    433 
    434 	/*
    435 	 * Now, if we're not in UTF-8 mode, convert errbuf to the
    436 	 * local code page.
    437 	 */
    438 	if (!use_utf_8)
    439 		utf_8_to_acp_truncated(errbuf);
    440 }
    441 #endif
    442