Home | History | Annotate | Line # | Download | only in gen
vis.c revision 1.50
      1 /*	$NetBSD: vis.c,v 1.50 2013/02/13 22:15:43 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1989, 1993
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 /*-
     33  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
     34  * All rights reserved.
     35  *
     36  * Redistribution and use in source and binary forms, with or without
     37  * modification, are permitted provided that the following conditions
     38  * are met:
     39  * 1. Redistributions of source code must retain the above copyright
     40  *    notice, this list of conditions and the following disclaimer.
     41  * 2. Redistributions in binary form must reproduce the above copyright
     42  *    notice, this list of conditions and the following disclaimer in the
     43  *    documentation and/or other materials provided with the distribution.
     44  *
     45  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     46  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     47  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     48  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     49  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     50  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     51  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     52  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     53  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     54  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     55  * POSSIBILITY OF SUCH DAMAGE.
     56  */
     57 
     58 #include <sys/cdefs.h>
     59 #if defined(LIBC_SCCS) && !defined(lint)
     60 __RCSID("$NetBSD: vis.c,v 1.50 2013/02/13 22:15:43 christos Exp $");
     61 #endif /* LIBC_SCCS and not lint */
     62 #ifdef __FBSDID
     63 __FBSDID("$FreeBSD$");
     64 #define	_DIAGASSERT(x)	assert(x)
     65 #endif
     66 
     67 #include "namespace.h"
     68 #include <sys/types.h>
     69 
     70 #include <assert.h>
     71 #include <vis.h>
     72 #include <errno.h>
     73 #include <stdlib.h>
     74 #include <wchar.h>
     75 #include <wctype.h>
     76 
     77 #ifdef __weak_alias
     78 __weak_alias(strvisx,_strvisx)
     79 #endif
     80 
     81 #if !HAVE_VIS || !HAVE_SVIS
     82 #include <ctype.h>
     83 #include <limits.h>
     84 #include <stdio.h>
     85 #include <string.h>
     86 
     87 /*
     88  * The reason for going through the trouble to deal with character encodings
     89  * in vis(3), is that we use this to safe encode output of commands. This
     90  * safe encoding varies depending on the character set. For example if we
     91  * display ps output in French, we don't want to display French characters
     92  * as M-foo.
     93  */
     94 
     95 static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
     96 
     97 #undef BELL
     98 #define BELL L'\a'
     99 
    100 #define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
    101 #define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
    102 #define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
    103 #define xtoa(c)		L"0123456789abcdef"[c]
    104 #define XTOA(c)		L"0123456789ABCDEF"[c]
    105 
    106 #define MAXEXTRAS	9
    107 
    108 #define MAKEEXTRALIST(flag, extra, orig_str)				      \
    109 do {									      \
    110 	const wchar_t *orig = orig_str;					      \
    111 	const wchar_t *o = orig;					      \
    112 	wchar_t *e;							      \
    113 	while (*o++)							      \
    114 		continue;						      \
    115 	extra = calloc((size_t)((o - orig) + MAXEXTRAS), sizeof(*extra));    \
    116 	if (!extra) break;						      \
    117 	for (o = orig, e = extra; (*e++ = *o++) != L'\0';)		      \
    118 		continue;						      \
    119 	e--;								      \
    120 	if (flag & VIS_GLOB) {						      \
    121 		*e++ = L'*';						      \
    122 		*e++ = L'?';						      \
    123 		*e++ = L'[';						      \
    124 		*e++ = L'#';						      \
    125 	}								      \
    126 	if (flag & VIS_SP) *e++ = L' ';					      \
    127 	if (flag & VIS_TAB) *e++ = L'\t';				      \
    128 	if (flag & VIS_NL) *e++ = L'\n';				      \
    129 	if ((flag & VIS_NOSLASH) == 0) *e++ = L'\\';			      \
    130 	*e = L'\0';							      \
    131 } while (/*CONSTCOND*/0)
    132 
    133 /*
    134  * This is do_hvis, for HTTP style (RFC 1808)
    135  */
    136 static wchar_t *
    137 do_hvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
    138 {
    139 	if (iswalnum(c)
    140 	    /* safe */
    141 	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
    142 	    /* extra */
    143 	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
    144 	    || c == L',')
    145 		dst = do_svis(dst, c, flag, nextc, extra);
    146 	else {
    147 		*dst++ = L'%';
    148 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
    149 		*dst++ = xtoa((unsigned int)c & 0xf);
    150 	}
    151 
    152 	return dst;
    153 }
    154 
    155 /*
    156  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
    157  * NB: No handling of long lines or CRLF.
    158  */
    159 static wchar_t *
    160 do_mvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
    161 {
    162 	if ((c != L'\n') &&
    163 	    /* Space at the end of the line */
    164 	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
    165 	    /* Out of range */
    166 	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
    167 	    /* Specific char to be escaped */
    168 	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
    169 		*dst++ = L'=';
    170 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
    171 		*dst++ = XTOA((unsigned int)c & 0xf);
    172 	} else
    173 		dst = do_svis(dst, c, flag, nextc, extra);
    174 	return dst;
    175 }
    176 
    177 /*
    178  * This is do_vis, the central code of vis.
    179  * dst:	      Pointer to the destination buffer
    180  * c:	      Character to encode
    181  * flag:      Flag word
    182  * nextc:     The character following 'c'
    183  * extra:     Pointer to the list of extra characters to be
    184  *	      backslash-protected.
    185  */
    186 static wchar_t *
    187 do_svis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
    188 {
    189 	int iswextra;
    190 
    191 	iswextra = wcschr(extra, c) != NULL;
    192 	if (!iswextra && (iswgraph(c) || iswwhite(c) ||
    193 	    ((flag & VIS_SAFE) && iswsafe(c)))) {
    194 		*dst++ = c;
    195 		return dst;
    196 	}
    197 	if (flag & VIS_CSTYLE) {
    198 		switch (c) {
    199 		case L'\n':
    200 			*dst++ = L'\\'; *dst++ = L'n';
    201 			return dst;
    202 		case L'\r':
    203 			*dst++ = L'\\'; *dst++ = L'r';
    204 			return dst;
    205 		case L'\b':
    206 			*dst++ = L'\\'; *dst++ = L'b';
    207 			return dst;
    208 		case BELL:
    209 			*dst++ = L'\\'; *dst++ = L'a';
    210 			return dst;
    211 		case L'\v':
    212 			*dst++ = L'\\'; *dst++ = L'v';
    213 			return dst;
    214 		case L'\t':
    215 			*dst++ = L'\\'; *dst++ = L't';
    216 			return dst;
    217 		case L'\f':
    218 			*dst++ = L'\\'; *dst++ = L'f';
    219 			return dst;
    220 		case L' ':
    221 			*dst++ = L'\\'; *dst++ = L's';
    222 			return dst;
    223 		case L'\0':
    224 			*dst++ = L'\\'; *dst++ = L'0';
    225 			if (iswoctal(nextc)) {
    226 				*dst++ = L'0';
    227 				*dst++ = L'0';
    228 			}
    229 			return dst;
    230 		default:
    231 			if (iswgraph(c)) {
    232 				*dst++ = L'\\';
    233 				*dst++ = c;
    234 				return dst;
    235 			}
    236 		}
    237 	}
    238 	if (iswextra || ((c & 0177) == L' ') || (flag & VIS_OCTAL)) {
    239 		*dst++ = L'\\';
    240 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
    241 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
    242 		*dst++ =			     (c	      & 07) + L'0';
    243 	} else {
    244 		if ((flag & VIS_NOSLASH) == 0)
    245 			*dst++ = L'\\';
    246 
    247 		if (c & 0200) {
    248 			c &= 0177;
    249 			*dst++ = L'M';
    250 		}
    251 
    252 		if (iswcntrl(c)) {
    253 			*dst++ = L'^';
    254 			if (c == 0177)
    255 				*dst++ = L'?';
    256 			else
    257 				*dst++ = c + L'@';
    258 		} else {
    259 			*dst++ = L'-';
    260 			*dst++ = c;
    261 		}
    262 	}
    263 	return dst;
    264 }
    265 
    266 typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
    267 
    268 /*
    269  * Return the appropriate encoding function depending on the flags given.
    270  */
    271 static visfun_t
    272 getvisfun(int flag)
    273 {
    274 	if (flag & VIS_HTTPSTYLE)
    275 		return do_hvis;
    276 	if (flag & VIS_MIMESTYLE)
    277 		return do_mvis;
    278 	return do_svis;
    279 }
    280 
    281 /*
    282  * istrsnvisx()
    283  * 	The main internal function.
    284  *	All user-visible functions call this one.
    285  */
    286 static int
    287 istrsnvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
    288     int flag, const char *mbextra)
    289 {
    290 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra, *nextra;
    291 	size_t len, olen;
    292 	wint_t c;
    293 	visfun_t f;
    294 	int clen, error = -1;
    295 	ssize_t mbslength;
    296 
    297 	_DIAGASSERT(mbdst != NULL);
    298 	_DIAGASSERT(mbsrc != NULL);
    299 	_DIAGASSERT(mbextra != NULL);
    300 
    301 	psrc = pdst = extra = nextra = NULL;
    302 	if (!mblength)
    303 		mblength = strlen(mbsrc);
    304 
    305 	if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
    306 		return -1;
    307 	if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
    308 		goto out;
    309 	if ((extra = calloc((strlen(mbextra) + 1), sizeof(*extra))) == NULL)
    310 		goto out;
    311 
    312 	dst = pdst;
    313 	src = psrc;
    314 
    315 	if (mblength < len)
    316 		len = mblength;
    317 
    318 	mbslength = (ssize_t)mblength;
    319 	while (mbslength > 0) {
    320 		clen = mbtowc(src, mbsrc, MB_LEN_MAX);
    321 		if (clen < 0) {
    322 			*src = (wint_t)(u_char)*mbsrc;
    323 			clen = 1;
    324 		}`
    325 		if (clen == 0)
    326 			clen = 1;
    327 		src++;
    328 		mbsrc += clen;
    329 		mbslength -= clen;
    330 	}
    331 	len = src - psrc;
    332 	src = psrc;
    333 
    334 	mbstowcs(extra, mbextra, strlen(mbextra));
    335 	MAKEEXTRALIST(flag, nextra, extra);
    336 	if (!nextra) {
    337 		if (dlen && *dlen == 0) {
    338 			errno = ENOSPC;
    339 			goto out;
    340 		}
    341 		*mbdst = '\0';		/* can't create nextra, return "" */
    342 		error = 0;
    343 		goto out;
    344 	}
    345 
    346 	f = getvisfun(flag);
    347 
    348 	for (start = dst; len > 0; len--) {
    349 		c = *src++;
    350 		dst = (*f)(dst, c, flag, len >= 1 ? *src : L'\0', nextra);
    351 		if (dst == NULL) {
    352 			errno = ENOSPC;
    353 			goto out;
    354 		}
    355 	}
    356 
    357 	*dst = L'\0';
    358 
    359 	len = dlen ? *dlen : ((wcslen(start) + 1) * MB_LEN_MAX);
    360 	olen = wcstombs(mbdst, start, len * sizeof(*mbdst));
    361 
    362 	free(nextra);
    363 	free(extra);
    364 	free(pdst);
    365 	free(psrc);
    366 
    367 	return (int)olen;
    368 out:
    369 	free(nextra);
    370 	free(extra);
    371 	free(pdst);
    372 	free(psrc);
    373 	return error;
    374 }
    375 #endif
    376 
    377 #if !HAVE_SVIS
    378 /*
    379  *	The "svis" variants all take an "extra" arg that is a pointer
    380  *	to a NUL-terminated list of characters to be encoded, too.
    381  *	These functions are useful e. g. to encode strings in such a
    382  *	way so that they are not interpreted by a shell.
    383  */
    384 
    385 char *
    386 svis(char *mbdst, int c, int flag, int nextc, const char *mbextra)
    387 {
    388 	char cc[2];
    389 	int ret;
    390 
    391 	cc[0] = c;
    392 	cc[1] = nextc;
    393 
    394 	ret = istrsnvisx(mbdst, NULL, cc, 1, flag, mbextra);
    395 	if (ret < 0)
    396 		return NULL;
    397 	return mbdst + ret;
    398 }
    399 
    400 char *
    401 snvis(char *mbdst, size_t dlen, int c, int flag, int nextc, const char *mbextra)
    402 {
    403 	char cc[2];
    404 	int ret;
    405 
    406 	cc[0] = c;
    407 	cc[1] = nextc;
    408 
    409 	ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, mbextra);
    410 	if (ret < 0)
    411 		return NULL;
    412 	return mbdst + ret;
    413 }
    414 
    415 int
    416 strsvis(char *mbdst, const char *mbsrc, int flag, const char *mbextra)
    417 {
    418 	return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, mbextra);
    419 }
    420 
    421 int
    422 strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag, const char *mbextra)
    423 {
    424 	return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, mbextra);
    425 }
    426 
    427 int
    428 strsvisx(char *mbdst, const char *mbsrc, size_t len, int flag, const char *mbextra)
    429 {
    430 	return istrsnvisx(mbdst, NULL, mbsrc, len, flag, mbextra);
    431 }
    432 
    433 int
    434 strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag,
    435     const char *mbextra)
    436 {
    437 	return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, mbextra);
    438 }
    439 #endif
    440 
    441 #if !HAVE_VIS
    442 /*
    443  * vis - visually encode characters
    444  */
    445 char *
    446 vis(char *mbdst, int c, int flag, int nextc)
    447 {
    448 	char cc[2];
    449 	int ret;
    450 
    451 	cc[0] = c;
    452 	cc[1] = nextc;
    453 
    454 	ret = istrsnvisx(mbdst, NULL, cc, 1, flag, "");
    455 	if (ret < 0)
    456 		return NULL;
    457 	return mbdst + ret;
    458 }
    459 
    460 char *
    461 nvis(char *mbdst, size_t dlen, int c, int flag, int nextc)
    462 {
    463 	char cc[2];
    464 	int ret;
    465 
    466 	cc[0] = c;
    467 	cc[1] = nextc;
    468 
    469 	ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, "");
    470 	if (ret < 0)
    471 		return NULL;
    472 	return mbdst + ret;
    473 }
    474 
    475 /*
    476  * strvis - visually encode characters from src into dst
    477  *
    478  *	Dst must be 4 times the size of src to account for possible
    479  *	expansion.  The length of dst, not including the trailing NULL,
    480  *	is returned.
    481  */
    482 
    483 int
    484 strvis(char *mbdst, const char *mbsrc, int flag)
    485 {
    486 	return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, "");
    487 }
    488 
    489 int
    490 strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag)
    491 {
    492 	return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, "");
    493 }
    494 
    495 /*
    496  * strvisx - visually encode characters from src into dst
    497  *
    498  *	Dst must be 4 times the size of src to account for possible
    499  *	expansion.  The length of dst, not including the trailing NULL,
    500  *	is returned.
    501  *
    502  *	Strvisx encodes exactly len characters from src into dst.
    503  *	This is useful for encoding a block of data.
    504  */
    505 
    506 int
    507 strvisx(char *mbdst, const char *mbsrc, size_t len, int flag)
    508 {
    509 	return istrsnvisx(mbdst, NULL, mbsrc, len, flag, "");
    510 }
    511 
    512 int
    513 strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag)
    514 {
    515 	return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, "");
    516 }
    517 #endif
    518