Home | History | Annotate | Line # | Download | only in gen
vis.c revision 1.53
      1 /*	$NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     46  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     47  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     49  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     50  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     54  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     55  * POSSIBILITY OF SUCH DAMAGE.
     56  */
     57 
     58 #include <sys/cdefs.h>
     59 #if defined(LIBC_SCCS) && !defined(lint)
     60 __RCSID("$NetBSD: vis.c,v 1.53 2013/02/15 00:28:10 christos Exp $");
     61 #endif /* LIBC_SCCS and not lint */
     62 #ifdef __FBSDID
     63 __FBSDID("$FreeBSD$");
     64 #define	_DIAGASSERT(x)	assert(x)
     65 #endif
     66 
     67 #include "namespace.h"
     68 #include <sys/types.h>
     69 
     70 #include <assert.h>
     71 #include <vis.h>
     72 #include <errno.h>
     73 #include <stdlib.h>
     74 #include <wchar.h>
     75 #include <wctype.h>
     76 
     77 #ifdef __weak_alias
     78 __weak_alias(strvisx,_strvisx)
     79 #endif
     80 
     81 #if !HAVE_VIS || !HAVE_SVIS
     82 #include <ctype.h>
     83 #include <limits.h>
     84 #include <stdio.h>
     85 #include <string.h>
     86 
     87 /*
     88  * The reason for going through the trouble to deal with character encodings
     89  * in vis(3), is that we use this to safe encode output of commands. This
     90  * safe encoding varies depending on the character set. For example if we
     91  * display ps output in French, we don't want to display French characters
     92  * as M-foo.
     93  */
     94 
     95 static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
     96 
     97 #undef BELL
     98 #define BELL L'\a'
     99 
    100 #define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
    101 #define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
    102 #define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
    103 #define xtoa(c)		L"0123456789abcdef"[c]
    104 #define XTOA(c)		L"0123456789ABCDEF"[c]
    105 
    106 #define MAXEXTRAS	9
    107 
    108 #define MAKEEXTRALIST(flag, extra, orig_str)				      \
    109 do {									      \
    110 	const wchar_t *orig = orig_str;					      \
    111 	const wchar_t *o = orig;					      \
    112 	wchar_t *e;							      \
    113 	while (*o++)							      \
    114 		continue;						      \
    115 	extra = calloc((size_t)((o - orig) + MAXEXTRAS), sizeof(*extra));    \
    116 	if (!extra) break;						      \
    117 	for (o = orig, e = extra; (*e++ = *o++) != L'\0';)		      \
    118 		continue;						      \
    119 	e--;								      \
    120 	if (flag & VIS_GLOB) {						      \
    121 		*e++ = L'*';						      \
    122 		*e++ = L'?';						      \
    123 		*e++ = L'[';						      \
    124 		*e++ = L'#';						      \
    125 	}								      \
    126 	if (flag & VIS_SP) *e++ = L' ';					      \
    127 	if (flag & VIS_TAB) *e++ = L'\t';				      \
    128 	if (flag & VIS_NL) *e++ = L'\n';				      \
    129 	if ((flag & VIS_NOSLASH) == 0) *e++ = L'\\';			      \
    130 	*e = L'\0';							      \
    131 } while (/*CONSTCOND*/0)
    132 
    133 /*
    134  * This is do_hvis, for HTTP style (RFC 1808)
    135  */
    136 static wchar_t *
    137 do_hvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
    138 {
    139 	if (iswalnum(c)
    140 	    /* safe */
    141 	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
    142 	    /* extra */
    143 	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
    144 	    || c == L',')
    145 		dst = do_svis(dst, c, flag, nextc, extra);
    146 	else {
    147 		*dst++ = L'%';
    148 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
    149 		*dst++ = xtoa((unsigned int)c & 0xf);
    150 	}
    151 
    152 	return dst;
    153 }
    154 
    155 /*
    156  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
    157  * NB: No handling of long lines or CRLF.
    158  */
    159 static wchar_t *
    160 do_mvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
    161 {
    162 	if ((c != L'\n') &&
    163 	    /* Space at the end of the line */
    164 	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
    165 	    /* Out of range */
    166 	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
    167 	    /* Specific char to be escaped */
    168 	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
    169 		*dst++ = L'=';
    170 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
    171 		*dst++ = XTOA((unsigned int)c & 0xf);
    172 	} else
    173 		dst = do_svis(dst, c, flag, nextc, extra);
    174 	return dst;
    175 }
    176 
    177 /*
    178  * This is do_vis, the central code of vis.
    179  * dst:	      Pointer to the destination buffer
    180  * c:	      Character to encode
    181  * flag:      Flag word
    182  * nextc:     The character following 'c'
    183  * extra:     Pointer to the list of extra characters to be
    184  *	      backslash-protected.
    185  */
    186 static wchar_t *
    187 do_svis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
    188 {
    189 	int iswextra;
    190 
    191 	iswextra = wcschr(extra, c) != NULL;
    192 	if (!iswextra && (iswgraph(c) || iswwhite(c) ||
    193 	    ((flag & VIS_SAFE) && iswsafe(c)))) {
    194 		*dst++ = c;
    195 		return dst;
    196 	}
    197 	if (flag & VIS_CSTYLE) {
    198 		switch (c) {
    199 		case L'\n':
    200 			*dst++ = L'\\'; *dst++ = L'n';
    201 			return dst;
    202 		case L'\r':
    203 			*dst++ = L'\\'; *dst++ = L'r';
    204 			return dst;
    205 		case L'\b':
    206 			*dst++ = L'\\'; *dst++ = L'b';
    207 			return dst;
    208 		case BELL:
    209 			*dst++ = L'\\'; *dst++ = L'a';
    210 			return dst;
    211 		case L'\v':
    212 			*dst++ = L'\\'; *dst++ = L'v';
    213 			return dst;
    214 		case L'\t':
    215 			*dst++ = L'\\'; *dst++ = L't';
    216 			return dst;
    217 		case L'\f':
    218 			*dst++ = L'\\'; *dst++ = L'f';
    219 			return dst;
    220 		case L' ':
    221 			*dst++ = L'\\'; *dst++ = L's';
    222 			return dst;
    223 		case L'\0':
    224 			*dst++ = L'\\'; *dst++ = L'0';
    225 			if (iswoctal(nextc)) {
    226 				*dst++ = L'0';
    227 				*dst++ = L'0';
    228 			}
    229 			return dst;
    230 		default:
    231 			if (iswgraph(c)) {
    232 				*dst++ = L'\\';
    233 				*dst++ = c;
    234 				return dst;
    235 			}
    236 		}
    237 	}
    238 	if (iswextra || ((c & 0177) == L' ') || (flag & VIS_OCTAL)) {
    239 		*dst++ = L'\\';
    240 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
    241 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
    242 		*dst++ =			     (c	      & 07) + L'0';
    243 	} else {
    244 		if ((flag & VIS_NOSLASH) == 0)
    245 			*dst++ = L'\\';
    246 
    247 		if (c & 0200) {
    248 			c &= 0177;
    249 			*dst++ = L'M';
    250 		}
    251 
    252 		if (iswcntrl(c)) {
    253 			*dst++ = L'^';
    254 			if (c == 0177)
    255 				*dst++ = L'?';
    256 			else
    257 				*dst++ = c + L'@';
    258 		} else {
    259 			*dst++ = L'-';
    260 			*dst++ = c;
    261 		}
    262 	}
    263 	return dst;
    264 }
    265 
    266 typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
    267 
    268 /*
    269  * Return the appropriate encoding function depending on the flags given.
    270  */
    271 static visfun_t
    272 getvisfun(int flag)
    273 {
    274 	if (flag & VIS_HTTPSTYLE)
    275 		return do_hvis;
    276 	if (flag & VIS_MIMESTYLE)
    277 		return do_mvis;
    278 	return do_svis;
    279 }
    280 
    281 /*
    282  * istrsnvisx()
    283  * 	The main internal function.
    284  *	All user-visible functions call this one.
    285  */
    286 static int
    287 istrsnvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
    288     int flag, const char *mbextra)
    289 {
    290 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra, *nextra;
    291 	size_t len, olen;
    292 	wint_t c;
    293 	visfun_t f;
    294 	int clen, error = -1;
    295 	ssize_t mbslength;
    296 
    297 	_DIAGASSERT(mbdst != NULL);
    298 	_DIAGASSERT(mbsrc != NULL);
    299 	_DIAGASSERT(mbextra != NULL);
    300 
    301 	/*
    302 	 * Input (mbsrc) is a char string considered to be multibyte
    303 	 * characters.  The input loop will read this string pulling
    304 	 * one character, possibly multiple bytes, from mbsrc and
    305 	 * converting each to wchar_t in src.
    306 	 *
    307 	 * The vis conversion will be done using the wide char
    308 	 * wchar_t string.
    309 	 *
    310 	 * This will then be converted back to a multibyte string to
    311 	 * return to the caller.
    312 	 */
    313 
    314 	/* Allocate space for the wide char strings */
    315 	psrc = pdst = extra = nextra = NULL;
    316 	if (!mblength)
    317 		mblength = strlen(mbsrc);
    318 
    319 	if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
    320 		return -1;
    321 	if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
    322 		goto out;
    323 	if ((extra = calloc((strlen(mbextra) + 1), sizeof(*extra))) == NULL)
    324 		goto out;
    325 
    326 	dst = pdst;
    327 	src = psrc;
    328 
    329 	/*
    330 	 * Input loop.
    331 	 * Handle up to mblength characters (not bytes).  We do not
    332 	 * stop at NULs because we may be processing a block of data
    333 	 * that includes NULs.  We process one more than the character
    334 	 * count so that we also get the next character of input which
    335 	 * is needed under some circumstances as a look-ahead character.
    336 	 */
    337 	mbslength = (ssize_t)mblength;
    338 	/*
    339 	 * When inputing a single character, must also read in the
    340 	 * next character for nextc, the look-ahead character.
    341 	 */
    342 	if (mbslength == 1)
    343 		mbslength++;
    344 	while (mbslength > 0) {
    345 		/* Convert one multibyte character to wchar_t. */
    346 		clen = mbtowc(src, mbsrc, MB_LEN_MAX);
    347 		if (clen < 0) {
    348 			/* Conversion error, process as a byte instead. */
    349 			*src = (wint_t)*mbsrc;
    350 			clen = 1;
    351 		}
    352 		if (clen == 0)
    353 			/*
    354 			 * NUL in input gives 0 return value. process
    355 			 * as single NUL byte.
    356 			 */
    357 			clen = 1;
    358 		/* Advance output pointer if we still have input left. */
    359 		src++;
    360 		/* Advance input pointer by number of bytes read. */
    361 		mbsrc += clen;
    362 		/* Decrement input count */
    363 		mbslength -= clen;
    364 	}
    365 	len = src - psrc;
    366 	src = psrc;
    367 	/*
    368 	 * In the single character input case, we will have actually
    369 	 * processed two characters, c and nextc.  Reset len back to
    370 	 * just a single character.
    371 	 */
    372 	if (mblength < len)
    373 		len = mblength;
    374 
    375 	/* Convert extra argument to list of characters for this mode. */
    376 	mbstowcs(extra, mbextra, strlen(mbextra));
    377 	MAKEEXTRALIST(flag, nextra, extra);
    378 	if (!nextra) {
    379 		if (dlen && *dlen == 0) {
    380 			errno = ENOSPC;
    381 			goto out;
    382 		}
    383 		*mbdst = '\0';		/* can't create nextra, return "" */
    384 		error = 0;
    385 		goto out;
    386 	}
    387 
    388 	/* Look up which processing function to call. */
    389 	f = getvisfun(flag);
    390 
    391 	/*
    392 	 * Main processing loop.
    393 	 * Call do_Xvis processing function one character at a time
    394 	 * with next character available for look-ahead.
    395 	 */
    396 	for (start = dst; len > 0; len--) {
    397 		c = *src++;
    398 		dst = (*f)(dst, c, flag, len >= 1 ? *src : L'\0', nextra);
    399 		if (dst == NULL) {
    400 			errno = ENOSPC;
    401 			goto out;
    402 		}
    403 	}
    404 
    405 	/* Terminate the output string. */
    406 	*dst = L'\0';
    407 
    408 	/* Convert wchar_t string back to multibyte output string. */
    409 	len = dlen ? *dlen : ((wcslen(start) + 1) * MB_LEN_MAX);
    410 	olen = wcstombs(mbdst, start, len * sizeof(*mbdst));
    411 
    412 	free(nextra);
    413 	free(extra);
    414 	free(pdst);
    415 	free(psrc);
    416 
    417 	return (int)olen;
    418 out:
    419 	free(nextra);
    420 	free(extra);
    421 	free(pdst);
    422 	free(psrc);
    423 	return error;
    424 }
    425 #endif
    426 
    427 #if !HAVE_SVIS
    428 /*
    429  *	The "svis" variants all take an "extra" arg that is a pointer
    430  *	to a NUL-terminated list of characters to be encoded, too.
    431  *	These functions are useful e. g. to encode strings in such a
    432  *	way so that they are not interpreted by a shell.
    433  */
    434 
    435 char *
    436 svis(char *mbdst, int c, int flag, int nextc, const char *mbextra)
    437 {
    438 	char cc[2];
    439 	int ret;
    440 
    441 	cc[0] = c;
    442 	cc[1] = nextc;
    443 
    444 	ret = istrsnvisx(mbdst, NULL, cc, 1, flag, mbextra);
    445 	if (ret < 0)
    446 		return NULL;
    447 	return mbdst + ret;
    448 }
    449 
    450 char *
    451 snvis(char *mbdst, size_t dlen, int c, int flag, int nextc, const char *mbextra)
    452 {
    453 	char cc[2];
    454 	int ret;
    455 
    456 	cc[0] = c;
    457 	cc[1] = nextc;
    458 
    459 	ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, mbextra);
    460 	if (ret < 0)
    461 		return NULL;
    462 	return mbdst + ret;
    463 }
    464 
    465 int
    466 strsvis(char *mbdst, const char *mbsrc, int flag, const char *mbextra)
    467 {
    468 	return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, mbextra);
    469 }
    470 
    471 int
    472 strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag, const char *mbextra)
    473 {
    474 	return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, mbextra);
    475 }
    476 
    477 int
    478 strsvisx(char *mbdst, const char *mbsrc, size_t len, int flag, const char *mbextra)
    479 {
    480 	return istrsnvisx(mbdst, NULL, mbsrc, len, flag, mbextra);
    481 }
    482 
    483 int
    484 strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag,
    485     const char *mbextra)
    486 {
    487 	return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, mbextra);
    488 }
    489 #endif
    490 
    491 #if !HAVE_VIS
    492 /*
    493  * vis - visually encode characters
    494  */
    495 char *
    496 vis(char *mbdst, int c, int flag, int nextc)
    497 {
    498 	char cc[2];
    499 	int ret;
    500 
    501 	cc[0] = c;
    502 	cc[1] = nextc;
    503 
    504 	ret = istrsnvisx(mbdst, NULL, cc, 1, flag, "");
    505 	if (ret < 0)
    506 		return NULL;
    507 	return mbdst + ret;
    508 }
    509 
    510 char *
    511 nvis(char *mbdst, size_t dlen, int c, int flag, int nextc)
    512 {
    513 	char cc[2];
    514 	int ret;
    515 
    516 	cc[0] = c;
    517 	cc[1] = nextc;
    518 
    519 	ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, "");
    520 	if (ret < 0)
    521 		return NULL;
    522 	return mbdst + ret;
    523 }
    524 
    525 /*
    526  * strvis - visually encode characters from src into dst
    527  *
    528  *	Dst must be 4 times the size of src to account for possible
    529  *	expansion.  The length of dst, not including the trailing NULL,
    530  *	is returned.
    531  */
    532 
    533 int
    534 strvis(char *mbdst, const char *mbsrc, int flag)
    535 {
    536 	return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, "");
    537 }
    538 
    539 int
    540 strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag)
    541 {
    542 	return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, "");
    543 }
    544 
    545 /*
    546  * strvisx - visually encode characters from src into dst
    547  *
    548  *	Dst must be 4 times the size of src to account for possible
    549  *	expansion.  The length of dst, not including the trailing NULL,
    550  *	is returned.
    551  *
    552  *	Strvisx encodes exactly len characters from src into dst.
    553  *	This is useful for encoding a block of data.
    554  */
    555 
    556 int
    557 strvisx(char *mbdst, const char *mbsrc, size_t len, int flag)
    558 {
    559 	return istrsnvisx(mbdst, NULL, mbsrc, len, flag, "");
    560 }
    561 
    562 int
    563 strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag)
    564 {
    565 	return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, "");
    566 }
    567 #endif
    568