Home | History | Annotate | Line # | Download | only in gen
      1  1.88    andvar /*	$NetBSD: vis.c,v 1.88 2024/03/17 21:48:02 andvar Exp $	*/
      2   1.6       cgd 
      3   1.1       cgd /*-
      4   1.6       cgd  * Copyright (c) 1989, 1993
      5  1.16  wennmach  *	The Regents of the University of California.  All rights reserved.
      6   1.1       cgd  *
      7   1.1       cgd  * Redistribution and use in source and binary forms, with or without
      8   1.1       cgd  * modification, are permitted provided that the following conditions
      9   1.1       cgd  * are met:
     10   1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     11   1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     12   1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     14   1.1       cgd  *    documentation and/or other materials provided with the distribution.
     15  1.29     lukem  * 3. Neither the name of the University nor the names of its contributors
     16   1.1       cgd  *    may be used to endorse or promote products derived from this software
     17   1.1       cgd  *    without specific prior written permission.
     18   1.1       cgd  *
     19   1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20   1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21   1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22   1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23   1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24   1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25   1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26   1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27   1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28   1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29   1.1       cgd  * SUCH DAMAGE.
     30   1.1       cgd  */
     31   1.1       cgd 
     32  1.26       agc /*-
     33  1.31     lukem  * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
     34  1.30     lukem  * All rights reserved.
     35  1.26       agc  *
     36  1.26       agc  * Redistribution and use in source and binary forms, with or without
     37  1.26       agc  * modification, are permitted provided that the following conditions
     38  1.26       agc  * are met:
     39  1.26       agc  * 1. Redistributions of source code must retain the above copyright
     40  1.26       agc  *    notice, this list of conditions and the following disclaimer.
     41  1.26       agc  * 2. Redistributions in binary form must reproduce the above copyright
     42  1.26       agc  *    notice, this list of conditions and the following disclaimer in the
     43  1.26       agc  *    documentation and/or other materials provided with the distribution.
     44  1.26       agc  *
     45  1.30     lukem  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     46  1.30     lukem  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     47  1.30     lukem  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     48  1.30     lukem  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     49  1.30     lukem  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     50  1.30     lukem  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     51  1.30     lukem  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     52  1.30     lukem  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     53  1.30     lukem  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     54  1.30     lukem  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     55  1.30     lukem  * POSSIBILITY OF SUCH DAMAGE.
     56  1.26       agc  */
     57  1.26       agc 
     58   1.7  christos #include <sys/cdefs.h>
     59  1.21        tv #if defined(LIBC_SCCS) && !defined(lint)
     60  1.88    andvar __RCSID("$NetBSD: vis.c,v 1.88 2024/03/17 21:48:02 andvar Exp $");
     61  1.21        tv #endif /* LIBC_SCCS and not lint */
     62  1.46  christos #ifdef __FBSDID
     63  1.46  christos __FBSDID("$FreeBSD$");
     64  1.46  christos #define	_DIAGASSERT(x)	assert(x)
     65  1.46  christos #endif
     66   1.1       cgd 
     67   1.8       jtc #include "namespace.h"
     68  1.84  riastrad 
     69  1.86  riastrad #include <sys/param.h>
     70   1.1       cgd #include <sys/types.h>
     71  1.12     lukem 
     72  1.12     lukem #include <assert.h>
     73  1.44  christos #include <errno.h>
     74  1.85  riastrad #include <stdint.h>
     75  1.22  christos #include <stdlib.h>
     76  1.84  riastrad #include <vis.h>
     77  1.46  christos #include <wchar.h>
     78  1.46  christos #include <wctype.h>
     79   1.8       jtc 
     80   1.8       jtc #ifdef __weak_alias
     81  1.18   mycroft __weak_alias(strvisx,_strvisx)
     82  1.20        tv #endif
     83  1.20        tv 
     84  1.24     pooka #if !HAVE_VIS || !HAVE_SVIS
     85  1.20        tv #include <ctype.h>
     86  1.20        tv #include <limits.h>
     87  1.20        tv #include <stdio.h>
     88  1.20        tv #include <string.h>
     89   1.1       cgd 
     90  1.47  christos /*
     91  1.47  christos  * The reason for going through the trouble to deal with character encodings
     92  1.47  christos  * in vis(3), is that we use this to safe encode output of commands. This
     93  1.47  christos  * safe encoding varies depending on the character set. For example if we
     94  1.47  christos  * display ps output in French, we don't want to display French characters
     95  1.47  christos  * as M-foo.
     96  1.47  christos  */
     97  1.47  christos 
     98  1.48     pooka static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
     99  1.37       dsl 
    100  1.15  wennmach #undef BELL
    101  1.46  christos #define BELL L'\a'
    102  1.68  christos 
    103  1.70  christos #if defined(LC_C_LOCALE)
    104  1.70  christos #define iscgraph(c)      isgraph_l(c, LC_C_LOCALE)
    105  1.68  christos #else
    106  1.69  christos /* Keep it simple for now, no locale stuff */
    107  1.69  christos #define iscgraph(c)	isgraph(c)
    108  1.69  christos #ifdef notyet
    109  1.69  christos #include <locale.h>
    110  1.68  christos static int
    111  1.68  christos iscgraph(int c) {
    112  1.68  christos 	int rv;
    113  1.68  christos 	char *ol;
    114  1.68  christos 
    115  1.68  christos 	ol = setlocale(LC_CTYPE, "C");
    116  1.68  christos 	rv = isgraph(c);
    117  1.68  christos 	if (ol)
    118  1.68  christos 		setlocale(LC_CTYPE, ol);
    119  1.68  christos 	return rv;
    120  1.68  christos }
    121  1.68  christos #endif
    122  1.69  christos #endif
    123  1.68  christos 
    124  1.68  christos #define ISGRAPH(flags, c) \
    125  1.68  christos     (((flags) & VIS_NOLOCALE) ? iscgraph(c) : iswgraph(c))
    126  1.15  wennmach 
    127  1.46  christos #define iswoctal(c)	(((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
    128  1.46  christos #define iswwhite(c)	(c == L' ' || c == L'\t' || c == L'\n')
    129  1.46  christos #define iswsafe(c)	(c == L'\b' || c == BELL || c == L'\r')
    130  1.46  christos #define xtoa(c)		L"0123456789abcdef"[c]
    131  1.46  christos #define XTOA(c)		L"0123456789ABCDEF"[c]
    132  1.16  wennmach 
    133  1.63  christos #define MAXEXTRAS	30
    134  1.63  christos 
    135  1.63  christos static const wchar_t char_shell[] = L"'`\";&<>()|{}]\\$!^~";
    136  1.63  christos static const wchar_t char_glob[] = L"*?[#";
    137  1.15  wennmach 
    138  1.60     joerg #if !HAVE_NBTOOL_CONFIG_H
    139  1.59  christos #ifndef __NetBSD__
    140  1.57  christos /*
    141  1.57  christos  * On NetBSD MB_LEN_MAX is currently 32 which does not fit on any integer
    142  1.57  christos  * integral type and it is probably wrong, since currently the maximum
    143  1.57  christos  * number of bytes and character needs is 6. Until this is fixed, the
    144  1.57  christos  * loops below are using sizeof(uint64_t) - 1 instead of MB_LEN_MAX, and
    145  1.57  christos  * the assertion is commented out.
    146  1.57  christos  */
    147  1.59  christos #ifdef __FreeBSD__
    148  1.59  christos /*
    149  1.59  christos  * On FreeBSD including <sys/systm.h> for CTASSERT only works in kernel
    150  1.59  christos  * mode.
    151  1.59  christos  */
    152  1.59  christos #ifndef CTASSERT
    153  1.59  christos #define CTASSERT(x)             _CTASSERT(x, __LINE__)
    154  1.59  christos #define _CTASSERT(x, y)         __CTASSERT(x, y)
    155  1.59  christos #define __CTASSERT(x, y)        typedef char __assert ## y[(x) ? 1 : -1]
    156  1.57  christos #endif
    157  1.59  christos #endif /* __FreeBSD__ */
    158  1.59  christos CTASSERT(MB_LEN_MAX <= sizeof(uint64_t));
    159  1.59  christos #endif /* !__NetBSD__ */
    160  1.60     joerg #endif
    161  1.57  christos 
    162  1.22  christos /*
    163  1.37       dsl  * This is do_hvis, for HTTP style (RFC 1808)
    164  1.22  christos  */
    165  1.46  christos static wchar_t *
    166  1.54  christos do_hvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
    167  1.37       dsl {
    168  1.46  christos 	if (iswalnum(c)
    169  1.41    plunky 	    /* safe */
    170  1.46  christos 	    || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
    171  1.41    plunky 	    /* extra */
    172  1.46  christos 	    || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
    173  1.46  christos 	    || c == L',')
    174  1.54  christos 		dst = do_svis(dst, c, flags, nextc, extra);
    175  1.46  christos 	else {
    176  1.46  christos 		*dst++ = L'%';
    177  1.37       dsl 		*dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
    178  1.37       dsl 		*dst++ = xtoa((unsigned int)c & 0xf);
    179  1.37       dsl 	}
    180  1.41    plunky 
    181  1.37       dsl 	return dst;
    182  1.37       dsl }
    183  1.27     enami 
    184  1.15  wennmach /*
    185  1.39  christos  * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
    186  1.39  christos  * NB: No handling of long lines or CRLF.
    187  1.39  christos  */
    188  1.46  christos static wchar_t *
    189  1.54  christos do_mvis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
    190  1.39  christos {
    191  1.46  christos 	if ((c != L'\n') &&
    192  1.39  christos 	    /* Space at the end of the line */
    193  1.46  christos 	    ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
    194  1.39  christos 	    /* Out of range */
    195  1.46  christos 	    (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
    196  1.55  christos 	    /* Specific char to be escaped */
    197  1.46  christos 	    wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
    198  1.46  christos 		*dst++ = L'=';
    199  1.39  christos 		*dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
    200  1.39  christos 		*dst++ = XTOA((unsigned int)c & 0xf);
    201  1.46  christos 	} else
    202  1.54  christos 		dst = do_svis(dst, c, flags, nextc, extra);
    203  1.39  christos 	return dst;
    204  1.39  christos }
    205  1.39  christos 
    206  1.39  christos /*
    207  1.54  christos  * Output single byte of multibyte character.
    208  1.15  wennmach  */
    209  1.46  christos static wchar_t *
    210  1.54  christos do_mbyte(wchar_t *dst, wint_t c, int flags, wint_t nextc, int iswextra)
    211  1.37       dsl {
    212  1.54  christos 	if (flags & VIS_CSTYLE) {
    213  1.37       dsl 		switch (c) {
    214  1.46  christos 		case L'\n':
    215  1.46  christos 			*dst++ = L'\\'; *dst++ = L'n';
    216  1.37       dsl 			return dst;
    217  1.46  christos 		case L'\r':
    218  1.46  christos 			*dst++ = L'\\'; *dst++ = L'r';
    219  1.37       dsl 			return dst;
    220  1.46  christos 		case L'\b':
    221  1.46  christos 			*dst++ = L'\\'; *dst++ = L'b';
    222  1.37       dsl 			return dst;
    223  1.37       dsl 		case BELL:
    224  1.46  christos 			*dst++ = L'\\'; *dst++ = L'a';
    225  1.37       dsl 			return dst;
    226  1.46  christos 		case L'\v':
    227  1.46  christos 			*dst++ = L'\\'; *dst++ = L'v';
    228  1.37       dsl 			return dst;
    229  1.46  christos 		case L'\t':
    230  1.46  christos 			*dst++ = L'\\'; *dst++ = L't';
    231  1.37       dsl 			return dst;
    232  1.46  christos 		case L'\f':
    233  1.46  christos 			*dst++ = L'\\'; *dst++ = L'f';
    234  1.37       dsl 			return dst;
    235  1.46  christos 		case L' ':
    236  1.46  christos 			*dst++ = L'\\'; *dst++ = L's';
    237  1.37       dsl 			return dst;
    238  1.46  christos 		case L'\0':
    239  1.46  christos 			*dst++ = L'\\'; *dst++ = L'0';
    240  1.46  christos 			if (iswoctal(nextc)) {
    241  1.46  christos 				*dst++ = L'0';
    242  1.46  christos 				*dst++ = L'0';
    243  1.37       dsl 			}
    244  1.37       dsl 			return dst;
    245  1.65       roy 		/* We cannot encode these characters in VIS_CSTYLE
    246  1.65       roy 		 * because they special meaning */
    247  1.65       roy 		case L'n':
    248  1.65       roy 		case L'r':
    249  1.65       roy 		case L'b':
    250  1.65       roy 		case L'a':
    251  1.65       roy 		case L'v':
    252  1.65       roy 		case L't':
    253  1.65       roy 		case L'f':
    254  1.65       roy 		case L's':
    255  1.65       roy 		case L'0':
    256  1.65       roy 		case L'M':
    257  1.65       roy 		case L'^':
    258  1.66       roy 		case L'$': /* vis(1) -l */
    259  1.65       roy 			break;
    260  1.37       dsl 		default:
    261  1.68  christos 			if (ISGRAPH(flags, c) && !iswoctal(c)) {
    262  1.46  christos 				*dst++ = L'\\';
    263  1.46  christos 				*dst++ = c;
    264  1.37       dsl 				return dst;
    265  1.37       dsl 			}
    266  1.37       dsl 		}
    267  1.37       dsl 	}
    268  1.54  christos 	if (iswextra || ((c & 0177) == L' ') || (flags & VIS_OCTAL)) {
    269  1.46  christos 		*dst++ = L'\\';
    270  1.46  christos 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
    271  1.46  christos 		*dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
    272  1.46  christos 		*dst++ =			     (c	      & 07) + L'0';
    273  1.37       dsl 	} else {
    274  1.54  christos 		if ((flags & VIS_NOSLASH) == 0)
    275  1.46  christos 			*dst++ = L'\\';
    276  1.55  christos 
    277  1.37       dsl 		if (c & 0200) {
    278  1.46  christos 			c &= 0177;
    279  1.46  christos 			*dst++ = L'M';
    280  1.37       dsl 		}
    281  1.55  christos 
    282  1.46  christos 		if (iswcntrl(c)) {
    283  1.46  christos 			*dst++ = L'^';
    284  1.37       dsl 			if (c == 0177)
    285  1.46  christos 				*dst++ = L'?';
    286  1.37       dsl 			else
    287  1.46  christos 				*dst++ = c + L'@';
    288  1.37       dsl 		} else {
    289  1.46  christos 			*dst++ = L'-';
    290  1.46  christos 			*dst++ = c;
    291  1.37       dsl 		}
    292  1.37       dsl 	}
    293  1.54  christos 
    294  1.54  christos 	return dst;
    295  1.54  christos }
    296  1.54  christos 
    297  1.54  christos /*
    298  1.54  christos  * This is do_vis, the central code of vis.
    299  1.54  christos  * dst:	      Pointer to the destination buffer
    300  1.54  christos  * c:	      Character to encode
    301  1.59  christos  * flags:     Flags word
    302  1.54  christos  * nextc:     The character following 'c'
    303  1.54  christos  * extra:     Pointer to the list of extra characters to be
    304  1.54  christos  *	      backslash-protected.
    305  1.54  christos  */
    306  1.54  christos static wchar_t *
    307  1.54  christos do_svis(wchar_t *dst, wint_t c, int flags, wint_t nextc, const wchar_t *extra)
    308  1.54  christos {
    309  1.54  christos 	int iswextra, i, shft;
    310  1.57  christos 	uint64_t bmsk, wmsk;
    311  1.54  christos 
    312  1.54  christos 	iswextra = wcschr(extra, c) != NULL;
    313  1.68  christos 	if (!iswextra && (ISGRAPH(flags, c) || iswwhite(c) ||
    314  1.54  christos 	    ((flags & VIS_SAFE) && iswsafe(c)))) {
    315  1.54  christos 		*dst++ = c;
    316  1.54  christos 		return dst;
    317  1.54  christos 	}
    318  1.54  christos 
    319  1.54  christos 	/* See comment in istrsenvisx() output loop, below. */
    320  1.54  christos 	wmsk = 0;
    321  1.59  christos 	for (i = sizeof(wmsk) - 1; i >= 0; i--) {
    322  1.54  christos 		shft = i * NBBY;
    323  1.59  christos 		bmsk = (uint64_t)0xffLL << shft;
    324  1.54  christos 		wmsk |= bmsk;
    325  1.54  christos 		if ((c & wmsk) || i == 0)
    326  1.54  christos 			dst = do_mbyte(dst, (wint_t)(
    327  1.59  christos 			    (uint64_t)(c & bmsk) >> shft),
    328  1.54  christos 			    flags, nextc, iswextra);
    329  1.54  christos 	}
    330  1.54  christos 
    331  1.37       dsl 	return dst;
    332  1.37       dsl }
    333  1.15  wennmach 
    334  1.46  christos typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
    335  1.39  christos 
    336  1.39  christos /*
    337  1.39  christos  * Return the appropriate encoding function depending on the flags given.
    338  1.39  christos  */
    339  1.39  christos static visfun_t
    340  1.54  christos getvisfun(int flags)
    341  1.39  christos {
    342  1.54  christos 	if (flags & VIS_HTTPSTYLE)
    343  1.39  christos 		return do_hvis;
    344  1.54  christos 	if (flags & VIS_MIMESTYLE)
    345  1.39  christos 		return do_mvis;
    346  1.39  christos 	return do_svis;
    347  1.39  christos }
    348  1.15  wennmach 
    349  1.15  wennmach /*
    350  1.54  christos  * Expand list of extra characters to not visually encode.
    351  1.54  christos  */
    352  1.54  christos static wchar_t *
    353  1.54  christos makeextralist(int flags, const char *src)
    354  1.54  christos {
    355  1.54  christos 	wchar_t *dst, *d;
    356  1.54  christos 	size_t len;
    357  1.64  christos 	const wchar_t *s;
    358  1.75  christos 	mbstate_t mbstate;
    359  1.54  christos 
    360  1.54  christos 	len = strlen(src);
    361  1.54  christos 	if ((dst = calloc(len + MAXEXTRAS, sizeof(*dst))) == NULL)
    362  1.54  christos 		return NULL;
    363  1.54  christos 
    364  1.75  christos 	memset(&mbstate, 0, sizeof(mbstate));
    365  1.75  christos 	if ((flags & VIS_NOLOCALE)
    366  1.75  christos 	    || mbsrtowcs(dst, &src, len, &mbstate) == (size_t)-1) {
    367  1.58      tron 		size_t i;
    368  1.58      tron 		for (i = 0; i < len; i++)
    369  1.63  christos 			dst[i] = (wchar_t)(u_char)src[i];
    370  1.54  christos 		d = dst + len;
    371  1.54  christos 	} else
    372  1.54  christos 		d = dst + wcslen(dst);
    373  1.54  christos 
    374  1.63  christos 	if (flags & VIS_GLOB)
    375  1.64  christos 		for (s = char_glob; *s; *d++ = *s++)
    376  1.63  christos 			continue;
    377  1.63  christos 
    378  1.63  christos 	if (flags & VIS_SHELL)
    379  1.64  christos 		for (s = char_shell; *s; *d++ = *s++)
    380  1.63  christos 			continue;
    381  1.54  christos 
    382  1.55  christos 	if (flags & VIS_SP) *d++ = L' ';
    383  1.55  christos 	if (flags & VIS_TAB) *d++ = L'\t';
    384  1.54  christos 	if (flags & VIS_NL) *d++ = L'\n';
    385  1.73  christos 	if (flags & VIS_DQ) *d++ = L'"';
    386  1.54  christos 	if ((flags & VIS_NOSLASH) == 0) *d++ = L'\\';
    387  1.54  christos 	*d = L'\0';
    388  1.54  christos 
    389  1.54  christos 	return dst;
    390  1.54  christos }
    391  1.54  christos 
    392  1.54  christos /*
    393  1.54  christos  * istrsenvisx()
    394  1.46  christos  * 	The main internal function.
    395  1.46  christos  *	All user-visible functions call this one.
    396  1.15  wennmach  */
    397  1.46  christos static int
    398  1.71  christos istrsenvisx(char **mbdstp, size_t *dlen, const char *mbsrc, size_t mblength,
    399  1.54  christos     int flags, const char *mbextra, int *cerr_ptr)
    400  1.15  wennmach {
    401  1.82  riastrad 	char mbbuf[MB_LEN_MAX];
    402  1.54  christos 	wchar_t *dst, *src, *pdst, *psrc, *start, *extra;
    403  1.50  christos 	size_t len, olen;
    404  1.57  christos 	uint64_t bmsk, wmsk;
    405  1.57  christos 	wint_t c;
    406  1.39  christos 	visfun_t f;
    407  1.87  christos 	int cerr, error = -1, i, shft;
    408  1.87  christos 	ssize_t clen = 0;
    409  1.82  riastrad 	char *mbdst, *mbwrite, *mdst;
    410  1.78  riastrad 	size_t mbslength;
    411  1.77  riastrad 	size_t maxolen;
    412  1.75  christos 	mbstate_t mbstate;
    413  1.46  christos 
    414  1.71  christos 	_DIAGASSERT(mbdstp != NULL);
    415  1.61  christos 	_DIAGASSERT(mbsrc != NULL || mblength == 0);
    416  1.46  christos 	_DIAGASSERT(mbextra != NULL);
    417  1.46  christos 
    418  1.78  riastrad 	mbslength = mblength;
    419  1.72  christos 	/*
    420  1.72  christos 	 * When inputing a single character, must also read in the
    421  1.72  christos 	 * next character for nextc, the look-ahead character.
    422  1.72  christos 	 */
    423  1.72  christos 	if (mbslength == 1)
    424  1.72  christos 		mbslength++;
    425  1.72  christos 
    426  1.53  christos 	/*
    427  1.53  christos 	 * Input (mbsrc) is a char string considered to be multibyte
    428  1.53  christos 	 * characters.  The input loop will read this string pulling
    429  1.53  christos 	 * one character, possibly multiple bytes, from mbsrc and
    430  1.53  christos 	 * converting each to wchar_t in src.
    431  1.53  christos 	 *
    432  1.53  christos 	 * The vis conversion will be done using the wide char
    433  1.53  christos 	 * wchar_t string.
    434  1.53  christos 	 *
    435  1.53  christos 	 * This will then be converted back to a multibyte string to
    436  1.53  christos 	 * return to the caller.
    437  1.53  christos 	 */
    438  1.53  christos 
    439  1.79  riastrad 	/*
    440  1.79  riastrad 	 * Guarantee the arithmetic on input to calloc won't overflow.
    441  1.79  riastrad 	 */
    442  1.79  riastrad 	if (mbslength > (SIZE_MAX - 1)/16) {
    443  1.79  riastrad 		errno = ENOMEM;
    444  1.79  riastrad 		return -1;
    445  1.79  riastrad 	}
    446  1.79  riastrad 
    447  1.53  christos 	/* Allocate space for the wide char strings */
    448  1.54  christos 	psrc = pdst = extra = NULL;
    449  1.71  christos 	mdst = NULL;
    450  1.72  christos 	if ((psrc = calloc(mbslength + 1, sizeof(*psrc))) == NULL)
    451  1.46  christos 		return -1;
    452  1.74  christos 	if ((pdst = calloc((16 * mbslength) + 1, sizeof(*pdst))) == NULL)
    453  1.46  christos 		goto out;
    454  1.71  christos 	if (*mbdstp == NULL) {
    455  1.74  christos 		if ((mdst = calloc((16 * mbslength) + 1, sizeof(*mdst))) == NULL)
    456  1.71  christos 			goto out;
    457  1.71  christos 		*mbdstp = mdst;
    458  1.71  christos 	}
    459  1.71  christos 
    460  1.71  christos 	mbdst = *mbdstp;
    461  1.46  christos 	dst = pdst;
    462  1.46  christos 	src = psrc;
    463  1.46  christos 
    464  1.67  christos 	if (flags & VIS_NOLOCALE) {
    465  1.67  christos 		/* Do one byte at a time conversion */
    466  1.67  christos 		cerr = 1;
    467  1.67  christos 	} else {
    468  1.67  christos 		/* Use caller's multibyte conversion error flag. */
    469  1.67  christos 		cerr = cerr_ptr ? *cerr_ptr : 0;
    470  1.67  christos 	}
    471  1.54  christos 
    472  1.53  christos 	/*
    473  1.53  christos 	 * Input loop.
    474  1.53  christos 	 * Handle up to mblength characters (not bytes).  We do not
    475  1.53  christos 	 * stop at NULs because we may be processing a block of data
    476  1.54  christos 	 * that includes NULs.
    477  1.53  christos 	 */
    478  1.75  christos 	memset(&mbstate, 0, sizeof(mbstate));
    479  1.53  christos 	while (mbslength > 0) {
    480  1.53  christos 		/* Convert one multibyte character to wchar_t. */
    481  1.78  riastrad 		if (!cerr) {
    482  1.87  christos 			clen = (ssize_t)mbrtowc(src, mbsrc,
    483  1.76  riastrad 			    (mbslength < MB_LEN_MAX
    484  1.76  riastrad 				? mbslength
    485  1.76  riastrad 				: MB_LEN_MAX),
    486  1.75  christos 			    &mbstate);
    487  1.78  riastrad 			assert(clen < 0 || (size_t)clen <= mbslength);
    488  1.78  riastrad 			assert(clen <= MB_LEN_MAX);
    489  1.78  riastrad 		}
    490  1.54  christos 		if (cerr || clen < 0) {
    491  1.53  christos 			/* Conversion error, process as a byte instead. */
    492  1.54  christos 			*src = (wint_t)(u_char)*mbsrc;
    493  1.50  christos 			clen = 1;
    494  1.54  christos 			cerr = 1;
    495  1.51  christos 		}
    496  1.74  christos 		if (clen == 0) {
    497  1.53  christos 			/*
    498  1.53  christos 			 * NUL in input gives 0 return value. process
    499  1.54  christos 			 * as single NUL byte and keep going.
    500  1.53  christos 			 */
    501  1.50  christos 			clen = 1;
    502  1.74  christos 		}
    503  1.78  riastrad 		/*
    504  1.78  riastrad 		 * Let n := MIN(mbslength, MB_LEN_MAX).  We have:
    505  1.78  riastrad 		 *
    506  1.78  riastrad 		 *	mbslength >= 1
    507  1.78  riastrad 		 *	mbrtowc(..., n, &mbstate) <= n,
    508  1.78  riastrad 		 *		by the contract of mbrtowc
    509  1.78  riastrad 		 *
    510  1.78  riastrad 		 *  clen is either
    511  1.78  riastrad 		 *  (a) mbrtowc(..., n, &mbstate), in which case
    512  1.78  riastrad 		 *      clen <= n <= mbslength; or
    513  1.78  riastrad 		 *  (b) 1, in which case clen = 1 <= mbslength.
    514  1.78  riastrad 		 */
    515  1.78  riastrad 		assert(clen > 0);
    516  1.78  riastrad 		assert((size_t)clen <= mbslength);
    517  1.54  christos 		/* Advance buffer character pointer. */
    518  1.50  christos 		src++;
    519  1.53  christos 		/* Advance input pointer by number of bytes read. */
    520  1.50  christos 		mbsrc += clen;
    521  1.54  christos 		/* Decrement input byte count. */
    522  1.50  christos 		mbslength -= clen;
    523  1.46  christos 	}
    524  1.55  christos 	len = src - psrc;
    525  1.50  christos 	src = psrc;
    526  1.72  christos 
    527  1.53  christos 	/*
    528  1.53  christos 	 * In the single character input case, we will have actually
    529  1.53  christos 	 * processed two characters, c and nextc.  Reset len back to
    530  1.53  christos 	 * just a single character.
    531  1.53  christos 	 */
    532  1.53  christos 	if (mblength < len)
    533  1.53  christos 		len = mblength;
    534  1.46  christos 
    535  1.53  christos 	/* Convert extra argument to list of characters for this mode. */
    536  1.54  christos 	extra = makeextralist(flags, mbextra);
    537  1.54  christos 	if (!extra) {
    538  1.44  christos 		if (dlen && *dlen == 0) {
    539  1.44  christos 			errno = ENOSPC;
    540  1.46  christos 			goto out;
    541  1.44  christos 		}
    542  1.71  christos 		*mbdst = '\0';	/* can't create extra, return "" */
    543  1.46  christos 		error = 0;
    544  1.46  christos 		goto out;
    545  1.31     lukem 	}
    546  1.46  christos 
    547  1.53  christos 	/* Look up which processing function to call. */
    548  1.54  christos 	f = getvisfun(flags);
    549  1.46  christos 
    550  1.53  christos 	/*
    551  1.53  christos 	 * Main processing loop.
    552  1.53  christos 	 * Call do_Xvis processing function one character at a time
    553  1.53  christos 	 * with next character available for look-ahead.
    554  1.53  christos 	 */
    555  1.46  christos 	for (start = dst; len > 0; len--) {
    556  1.46  christos 		c = *src++;
    557  1.54  christos 		dst = (*f)(dst, c, flags, len >= 1 ? *src : L'\0', extra);
    558  1.46  christos 		if (dst == NULL) {
    559  1.46  christos 			errno = ENOSPC;
    560  1.46  christos 			goto out;
    561  1.46  christos 		}
    562  1.46  christos 	}
    563  1.46  christos 
    564  1.54  christos 	/* Terminate the string in the buffer. */
    565  1.54  christos 	*dst = L'\0';
    566  1.54  christos 
    567  1.54  christos 	/*
    568  1.54  christos 	 * Output loop.
    569  1.54  christos 	 * Convert wchar_t string back to multibyte output string.
    570  1.54  christos 	 * If we have hit a multi-byte conversion error on input,
    571  1.54  christos 	 * output byte-by-byte here.  Else use wctomb().
    572  1.54  christos 	 */
    573  1.54  christos 	len = wcslen(start);
    574  1.81  riastrad 	if (dlen) {
    575  1.81  riastrad 		maxolen = *dlen;
    576  1.83  riastrad 		if (maxolen == 0) {
    577  1.83  riastrad 			errno = ENOSPC;
    578  1.83  riastrad 			goto out;
    579  1.83  riastrad 		}
    580  1.81  riastrad 	} else {
    581  1.81  riastrad 		if (len > (SIZE_MAX - 1)/MB_LEN_MAX) {
    582  1.81  riastrad 			errno = ENOSPC;
    583  1.81  riastrad 			goto out;
    584  1.81  riastrad 		}
    585  1.81  riastrad 		maxolen = len*MB_LEN_MAX + 1;
    586  1.81  riastrad 	}
    587  1.54  christos 	olen = 0;
    588  1.75  christos 	memset(&mbstate, 0, sizeof(mbstate));
    589  1.54  christos 	for (dst = start; len > 0; len--) {
    590  1.82  riastrad 		if (!cerr) {
    591  1.82  riastrad 			/*
    592  1.82  riastrad 			 * If we have at least MB_CUR_MAX bytes in the buffer,
    593  1.82  riastrad 			 * we'll just do the conversion in-place into mbdst.  We
    594  1.82  riastrad 			 * need to be a little more conservative when we get to
    595  1.82  riastrad 			 * the end of the buffer, as we may not have MB_CUR_MAX
    596  1.82  riastrad 			 * bytes but we may not need it.
    597  1.82  riastrad 			 */
    598  1.82  riastrad 			if (maxolen - olen > MB_CUR_MAX)
    599  1.82  riastrad 				mbwrite = mbdst;
    600  1.82  riastrad 			else
    601  1.82  riastrad 				mbwrite = mbbuf;
    602  1.87  christos 			clen = (ssize_t)wcrtomb(mbwrite, *dst, &mbstate);
    603  1.82  riastrad 			if (clen > 0 && mbwrite != mbdst) {
    604  1.82  riastrad 				/*
    605  1.82  riastrad 				 * Don't break past our output limit, noting
    606  1.82  riastrad 				 * that maxolen includes the nul terminator so
    607  1.82  riastrad 				 * we can't write past maxolen - 1 here.
    608  1.82  riastrad 				 */
    609  1.82  riastrad 				if (olen + clen >= maxolen) {
    610  1.82  riastrad 					errno = ENOSPC;
    611  1.82  riastrad 					goto out;
    612  1.82  riastrad 				}
    613  1.82  riastrad 
    614  1.82  riastrad 				memcpy(mbdst, mbwrite, clen);
    615  1.82  riastrad 			}
    616  1.82  riastrad 		}
    617  1.54  christos 		if (cerr || clen < 0) {
    618  1.54  christos 			/*
    619  1.54  christos 			 * Conversion error, process as a byte(s) instead.
    620  1.54  christos 			 * Examine each byte and higher-order bytes for
    621  1.55  christos 			 * data.  E.g.,
    622  1.59  christos 			 *	0x000000000000a264 -> a2 64
    623  1.59  christos 			 *	0x000000001f00a264 -> 1f 00 a2 64
    624  1.54  christos 			 */
    625  1.54  christos 			clen = 0;
    626  1.54  christos 			wmsk = 0;
    627  1.59  christos 			for (i = sizeof(wmsk) - 1; i >= 0; i--) {
    628  1.54  christos 				shft = i * NBBY;
    629  1.59  christos 				bmsk = (uint64_t)0xffLL << shft;
    630  1.54  christos 				wmsk |= bmsk;
    631  1.82  riastrad 				if ((*dst & wmsk) || i == 0) {
    632  1.82  riastrad 					if (olen + clen + 1 >= maxolen) {
    633  1.82  riastrad 						errno = ENOSPC;
    634  1.82  riastrad 						goto out;
    635  1.82  riastrad 					}
    636  1.82  riastrad 
    637  1.59  christos 					mbdst[clen++] = (char)(
    638  1.59  christos 					    (uint64_t)(*dst & bmsk) >>
    639  1.59  christos 					    shft);
    640  1.82  riastrad 				}
    641  1.54  christos 			}
    642  1.54  christos 			cerr = 1;
    643  1.54  christos 		}
    644  1.82  riastrad 
    645  1.82  riastrad 		/*
    646  1.82  riastrad 		 * We'll be dereferencing mbdst[clen] after this to write the
    647  1.82  riastrad 		 * nul terminator; the above paths should have checked for a
    648  1.82  riastrad 		 * possible overflow already.
    649  1.82  riastrad 		 */
    650  1.82  riastrad 		assert(olen + clen < maxolen);
    651  1.82  riastrad 
    652  1.54  christos 		/* Advance output pointer by number of bytes written. */
    653  1.54  christos 		mbdst += clen;
    654  1.54  christos 		/* Advance buffer character pointer. */
    655  1.54  christos 		dst++;
    656  1.88    andvar 		/* Increment output character count. */
    657  1.54  christos 		olen += clen;
    658  1.54  christos 	}
    659  1.54  christos 
    660  1.53  christos 	/* Terminate the output string. */
    661  1.83  riastrad 	assert(olen < maxolen);
    662  1.54  christos 	*mbdst = '\0';
    663  1.46  christos 
    664  1.67  christos 	if (flags & VIS_NOLOCALE) {
    665  1.67  christos 		/* Pass conversion error flag out. */
    666  1.67  christos 		if (cerr_ptr)
    667  1.67  christos 			*cerr_ptr = cerr;
    668  1.67  christos 	}
    669  1.46  christos 
    670  1.46  christos 	free(extra);
    671  1.46  christos 	free(pdst);
    672  1.46  christos 	free(psrc);
    673  1.46  christos 
    674  1.46  christos 	return (int)olen;
    675  1.46  christos out:
    676  1.46  christos 	free(extra);
    677  1.46  christos 	free(pdst);
    678  1.46  christos 	free(psrc);
    679  1.71  christos 	free(mdst);
    680  1.46  christos 	return error;
    681  1.15  wennmach }
    682  1.62  christos 
    683  1.62  christos static int
    684  1.71  christos istrsenvisxl(char **mbdstp, size_t *dlen, const char *mbsrc,
    685  1.62  christos     int flags, const char *mbextra, int *cerr_ptr)
    686  1.62  christos {
    687  1.71  christos 	return istrsenvisx(mbdstp, dlen, mbsrc,
    688  1.62  christos 	    mbsrc != NULL ? strlen(mbsrc) : 0, flags, mbextra, cerr_ptr);
    689  1.62  christos }
    690  1.62  christos 
    691  1.46  christos #endif
    692  1.46  christos 
    693  1.46  christos #if !HAVE_SVIS
    694  1.46  christos /*
    695  1.46  christos  *	The "svis" variants all take an "extra" arg that is a pointer
    696  1.46  christos  *	to a NUL-terminated list of characters to be encoded, too.
    697  1.46  christos  *	These functions are useful e. g. to encode strings in such a
    698  1.46  christos  *	way so that they are not interpreted by a shell.
    699  1.46  christos  */
    700  1.15  wennmach 
    701  1.44  christos char *
    702  1.54  christos svis(char *mbdst, int c, int flags, int nextc, const char *mbextra)
    703  1.44  christos {
    704  1.46  christos 	char cc[2];
    705  1.46  christos 	int ret;
    706  1.46  christos 
    707  1.46  christos 	cc[0] = c;
    708  1.46  christos 	cc[1] = nextc;
    709  1.46  christos 
    710  1.71  christos 	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, mbextra, NULL);
    711  1.46  christos 	if (ret < 0)
    712  1.46  christos 		return NULL;
    713  1.46  christos 	return mbdst + ret;
    714  1.44  christos }
    715  1.44  christos 
    716  1.44  christos char *
    717  1.54  christos snvis(char *mbdst, size_t dlen, int c, int flags, int nextc, const char *mbextra)
    718  1.44  christos {
    719  1.46  christos 	char cc[2];
    720  1.46  christos 	int ret;
    721  1.44  christos 
    722  1.46  christos 	cc[0] = c;
    723  1.46  christos 	cc[1] = nextc;
    724  1.15  wennmach 
    725  1.71  christos 	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, mbextra, NULL);
    726  1.46  christos 	if (ret < 0)
    727  1.46  christos 		return NULL;
    728  1.46  christos 	return mbdst + ret;
    729  1.15  wennmach }
    730  1.15  wennmach 
    731  1.44  christos int
    732  1.54  christos strsvis(char *mbdst, const char *mbsrc, int flags, const char *mbextra)
    733  1.44  christos {
    734  1.71  christos 	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, mbextra, NULL);
    735  1.44  christos }
    736  1.15  wennmach 
    737  1.15  wennmach int
    738  1.54  christos strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags, const char *mbextra)
    739  1.44  christos {
    740  1.71  christos 	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, mbextra, NULL);
    741  1.15  wennmach }
    742  1.44  christos 
    743  1.44  christos int
    744  1.54  christos strsvisx(char *mbdst, const char *mbsrc, size_t len, int flags, const char *mbextra)
    745  1.44  christos {
    746  1.71  christos 	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, mbextra, NULL);
    747  1.44  christos }
    748  1.44  christos 
    749  1.44  christos int
    750  1.54  christos strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
    751  1.46  christos     const char *mbextra)
    752  1.44  christos {
    753  1.71  christos 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, NULL);
    754  1.54  christos }
    755  1.54  christos 
    756  1.54  christos int
    757  1.54  christos strsenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
    758  1.54  christos     const char *mbextra, int *cerr_ptr)
    759  1.54  christos {
    760  1.71  christos 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, mbextra, cerr_ptr);
    761  1.44  christos }
    762  1.24     pooka #endif
    763  1.15  wennmach 
    764  1.24     pooka #if !HAVE_VIS
    765   1.1       cgd /*
    766   1.1       cgd  * vis - visually encode characters
    767   1.1       cgd  */
    768  1.46  christos char *
    769  1.54  christos vis(char *mbdst, int c, int flags, int nextc)
    770  1.15  wennmach {
    771  1.46  christos 	char cc[2];
    772  1.46  christos 	int ret;
    773  1.15  wennmach 
    774  1.46  christos 	cc[0] = c;
    775  1.46  christos 	cc[1] = nextc;
    776  1.15  wennmach 
    777  1.71  christos 	ret = istrsenvisx(&mbdst, NULL, cc, 1, flags, "", NULL);
    778  1.46  christos 	if (ret < 0)
    779  1.44  christos 		return NULL;
    780  1.46  christos 	return mbdst + ret;
    781   1.1       cgd }
    782   1.1       cgd 
    783  1.44  christos char *
    784  1.54  christos nvis(char *mbdst, size_t dlen, int c, int flags, int nextc)
    785  1.44  christos {
    786  1.46  christos 	char cc[2];
    787  1.46  christos 	int ret;
    788  1.46  christos 
    789  1.46  christos 	cc[0] = c;
    790  1.46  christos 	cc[1] = nextc;
    791  1.44  christos 
    792  1.71  christos 	ret = istrsenvisx(&mbdst, &dlen, cc, 1, flags, "", NULL);
    793  1.46  christos 	if (ret < 0)
    794  1.46  christos 		return NULL;
    795  1.46  christos 	return mbdst + ret;
    796  1.44  christos }
    797  1.44  christos 
    798   1.1       cgd /*
    799  1.46  christos  * strvis - visually encode characters from src into dst
    800  1.27     enami  *
    801  1.16  wennmach  *	Dst must be 4 times the size of src to account for possible
    802  1.16  wennmach  *	expansion.  The length of dst, not including the trailing NULL,
    803  1.27     enami  *	is returned.
    804   1.1       cgd  */
    805   1.1       cgd 
    806  1.44  christos int
    807  1.54  christos strvis(char *mbdst, const char *mbsrc, int flags)
    808  1.44  christos {
    809  1.71  christos 	return istrsenvisxl(&mbdst, NULL, mbsrc, flags, "", NULL);
    810  1.44  christos }
    811  1.15  wennmach 
    812   1.1       cgd int
    813  1.54  christos strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flags)
    814  1.44  christos {
    815  1.71  christos 	return istrsenvisxl(&mbdst, &dlen, mbsrc, flags, "", NULL);
    816  1.71  christos }
    817  1.71  christos 
    818  1.71  christos int
    819  1.71  christos stravis(char **mbdstp, const char *mbsrc, int flags)
    820  1.71  christos {
    821  1.71  christos 	*mbdstp = NULL;
    822  1.71  christos 	return istrsenvisxl(mbdstp, NULL, mbsrc, flags, "", NULL);
    823  1.44  christos }
    824  1.44  christos 
    825  1.46  christos /*
    826  1.46  christos  * strvisx - visually encode characters from src into dst
    827  1.46  christos  *
    828  1.46  christos  *	Dst must be 4 times the size of src to account for possible
    829  1.46  christos  *	expansion.  The length of dst, not including the trailing NULL,
    830  1.46  christos  *	is returned.
    831  1.46  christos  *
    832  1.46  christos  *	Strvisx encodes exactly len characters from src into dst.
    833  1.46  christos  *	This is useful for encoding a block of data.
    834  1.46  christos  */
    835  1.44  christos 
    836  1.44  christos int
    837  1.54  christos strvisx(char *mbdst, const char *mbsrc, size_t len, int flags)
    838  1.54  christos {
    839  1.71  christos 	return istrsenvisx(&mbdst, NULL, mbsrc, len, flags, "", NULL);
    840  1.54  christos }
    841  1.54  christos 
    842  1.54  christos int
    843  1.54  christos strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags)
    844  1.44  christos {
    845  1.71  christos 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", NULL);
    846  1.44  christos }
    847  1.44  christos 
    848  1.44  christos int
    849  1.54  christos strenvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flags,
    850  1.54  christos     int *cerr_ptr)
    851  1.44  christos {
    852  1.71  christos 	return istrsenvisx(&mbdst, &dlen, mbsrc, len, flags, "", cerr_ptr);
    853  1.44  christos }
    854  1.20        tv #endif
    855