vis.c revision 1.48 1 /* $NetBSD: vis.c,v 1.48 2013/02/13 12:15:09 pooka Exp $ */
2
3 /*-
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55 * POSSIBILITY OF SUCH DAMAGE.
56 */
57
58 #include <sys/cdefs.h>
59 #if defined(LIBC_SCCS) && !defined(lint)
60 __RCSID("$NetBSD: vis.c,v 1.48 2013/02/13 12:15:09 pooka Exp $");
61 #endif /* LIBC_SCCS and not lint */
62 #ifdef __FBSDID
63 __FBSDID("$FreeBSD$");
64 #define _DIAGASSERT(x) assert(x)
65 #endif
66
67 #include "namespace.h"
68 #include <sys/types.h>
69
70 #include <assert.h>
71 #include <vis.h>
72 #include <errno.h>
73 #include <stdlib.h>
74 #include <wchar.h>
75 #include <wctype.h>
76
77 #ifdef __weak_alias
78 __weak_alias(strvisx,_strvisx)
79 #endif
80
81 #if !HAVE_VIS || !HAVE_SVIS
82 #include <ctype.h>
83 #include <limits.h>
84 #include <stdio.h>
85 #include <string.h>
86
87 /*
88 * The reason for going through the trouble to deal with character encodings
89 * in vis(3), is that we use this to safe encode output of commands. This
90 * safe encoding varies depending on the character set. For example if we
91 * display ps output in French, we don't want to display French characters
92 * as M-foo.
93 */
94
95 static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
96
97 #undef BELL
98 #define BELL L'\a'
99
100 #define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
101 #define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n')
102 #define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r')
103 #define xtoa(c) L"0123456789abcdef"[c]
104 #define XTOA(c) L"0123456789ABCDEF"[c]
105
106 #define MAXEXTRAS 9
107
108 #define MAKEEXTRALIST(flag, extra, orig_str) \
109 do { \
110 const wchar_t *orig = orig_str; \
111 const wchar_t *o = orig; \
112 wchar_t *e; \
113 while (*o++) \
114 continue; \
115 extra = calloc((size_t)((o - orig) + MAXEXTRAS), sizeof(*extra)); \
116 if (!extra) break; \
117 for (o = orig, e = extra; (*e++ = *o++) != L'\0';) \
118 continue; \
119 e--; \
120 if (flag & VIS_GLOB) { \
121 *e++ = L'*'; \
122 *e++ = L'?'; \
123 *e++ = L'['; \
124 *e++ = L'#'; \
125 } \
126 if (flag & VIS_SP) *e++ = L' '; \
127 if (flag & VIS_TAB) *e++ = L'\t'; \
128 if (flag & VIS_NL) *e++ = L'\n'; \
129 if ((flag & VIS_NOSLASH) == 0) *e++ = L'\\'; \
130 *e = L'\0'; \
131 } while (/*CONSTCOND*/0)
132
133 /*
134 * This is do_hvis, for HTTP style (RFC 1808)
135 */
136 static wchar_t *
137 do_hvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
138 {
139 if (iswalnum(c)
140 /* safe */
141 || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
142 /* extra */
143 || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
144 || c == L',')
145 dst = do_svis(dst, c, flag, nextc, extra);
146 else {
147 *dst++ = L'%';
148 *dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
149 *dst++ = xtoa((unsigned int)c & 0xf);
150 }
151
152 return dst;
153 }
154
155 /*
156 * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
157 * NB: No handling of long lines or CRLF.
158 */
159 static wchar_t *
160 do_mvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
161 {
162 if ((c != L'\n') &&
163 /* Space at the end of the line */
164 ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
165 /* Out of range */
166 (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
167 /* Specific char to be escaped */
168 wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
169 *dst++ = L'=';
170 *dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
171 *dst++ = XTOA((unsigned int)c & 0xf);
172 } else
173 dst = do_svis(dst, c, flag, nextc, extra);
174 return dst;
175 }
176
177 /*
178 * This is do_vis, the central code of vis.
179 * dst: Pointer to the destination buffer
180 * c: Character to encode
181 * flag: Flag word
182 * nextc: The character following 'c'
183 * extra: Pointer to the list of extra characters to be
184 * backslash-protected.
185 */
186 static wchar_t *
187 do_svis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
188 {
189 int iswextra;
190
191 iswextra = wcschr(extra, c) != NULL;
192 if (!iswextra && (iswgraph(c) || iswwhite(c) ||
193 ((flag & VIS_SAFE) && iswsafe(c)))) {
194 *dst++ = c;
195 return dst;
196 }
197 if (flag & VIS_CSTYLE) {
198 switch (c) {
199 case L'\n':
200 *dst++ = L'\\'; *dst++ = L'n';
201 return dst;
202 case L'\r':
203 *dst++ = L'\\'; *dst++ = L'r';
204 return dst;
205 case L'\b':
206 *dst++ = L'\\'; *dst++ = L'b';
207 return dst;
208 case BELL:
209 *dst++ = L'\\'; *dst++ = L'a';
210 return dst;
211 case L'\v':
212 *dst++ = L'\\'; *dst++ = L'v';
213 return dst;
214 case L'\t':
215 *dst++ = L'\\'; *dst++ = L't';
216 return dst;
217 case L'\f':
218 *dst++ = L'\\'; *dst++ = L'f';
219 return dst;
220 case L' ':
221 *dst++ = L'\\'; *dst++ = L's';
222 return dst;
223 case L'\0':
224 *dst++ = L'\\'; *dst++ = L'0';
225 if (iswoctal(nextc)) {
226 *dst++ = L'0';
227 *dst++ = L'0';
228 }
229 return dst;
230 default:
231 if (iswgraph(c)) {
232 *dst++ = L'\\';
233 *dst++ = c;
234 return dst;
235 }
236 }
237 }
238 if (iswextra || ((c & 0177) == L' ') || (flag & VIS_OCTAL)) {
239 *dst++ = L'\\';
240 *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
241 *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
242 *dst++ = (c & 07) + L'0';
243 } else {
244 if ((flag & VIS_NOSLASH) == 0)
245 *dst++ = L'\\';
246
247 if (c & 0200) {
248 c &= 0177;
249 *dst++ = L'M';
250 }
251
252 if (iswcntrl(c)) {
253 *dst++ = L'^';
254 if (c == 0177)
255 *dst++ = L'?';
256 else
257 *dst++ = c + L'@';
258 } else {
259 *dst++ = L'-';
260 *dst++ = c;
261 }
262 }
263 return dst;
264 }
265
266 typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
267
268 /*
269 * Return the appropriate encoding function depending on the flags given.
270 */
271 static visfun_t
272 getvisfun(int flag)
273 {
274 if (flag & VIS_HTTPSTYLE)
275 return do_hvis;
276 if (flag & VIS_MIMESTYLE)
277 return do_mvis;
278 return do_svis;
279 }
280
281 /*
282 * istrsnvisx()
283 * The main internal function.
284 * All user-visible functions call this one.
285 */
286 static int
287 istrsnvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
288 int flag, const char *mbextra)
289 {
290 wchar_t *dst, *src, *pdst, *psrc, *start, *extra, *nextra;
291 size_t len, olen, mbslength;
292 wint_t c;
293 visfun_t f;
294 int clen, error = -1;
295
296 _DIAGASSERT(mbdst != NULL);
297 _DIAGASSERT(mbsrc != NULL);
298 _DIAGASSERT(mbextra != NULL);
299
300 psrc = pdst = extra = nextra = NULL;
301 if (!mblength)
302 mblength = strlen(mbsrc);
303
304 if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
305 return -1;
306 if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
307 goto out;
308 if ((extra = calloc((strlen(mbextra) + 1), sizeof(*extra))) == NULL)
309 goto out;
310
311 dst = pdst;
312 src = psrc;
313
314 if (mblength > 1) {
315 mbslength = mblength;
316 while (mbslength) {
317 clen = mbtowc(src, mbsrc, mbslength);
318 if (clen < 0)
319 break;
320 if (clen == 0)
321 clen = 1;
322 src++;
323 mbsrc += clen;
324 mbslength -= clen;
325 }
326 len = src - psrc;
327 src = psrc;
328 } else {
329 len = mblength;
330 src[0] = (wint_t)(u_char)mbsrc[0];
331 src[1] = (wint_t)(u_char)mbsrc[1];
332 }
333 if (mblength < len)
334 len = mblength;
335
336 mbstowcs(extra, mbextra, strlen(mbextra));
337 MAKEEXTRALIST(flag, nextra, extra);
338 if (!nextra) {
339 if (dlen && *dlen == 0) {
340 errno = ENOSPC;
341 goto out;
342 }
343 *mbdst = '\0'; /* can't create nextra, return "" */
344 error = 0;
345 goto out;
346 }
347
348 f = getvisfun(flag);
349
350 for (start = dst; len > 0; len--) {
351 c = *src++;
352 dst = (*f)(dst, c, flag, len >= 1 ? *src : L'\0', nextra);
353 if (dst == NULL) {
354 errno = ENOSPC;
355 goto out;
356 }
357 }
358
359 *dst = L'\0';
360
361 len = dlen ? *dlen : ((wcslen(start) + 1) * MB_LEN_MAX);
362 olen = wcstombs(mbdst, start, len * sizeof(*mbdst));
363
364 free(nextra);
365 free(extra);
366 free(pdst);
367 free(psrc);
368
369 return (int)olen;
370 out:
371 free(nextra);
372 free(extra);
373 free(pdst);
374 free(psrc);
375 return error;
376 }
377 #endif
378
379 #if !HAVE_SVIS
380 /*
381 * The "svis" variants all take an "extra" arg that is a pointer
382 * to a NUL-terminated list of characters to be encoded, too.
383 * These functions are useful e. g. to encode strings in such a
384 * way so that they are not interpreted by a shell.
385 */
386
387 char *
388 svis(char *mbdst, int c, int flag, int nextc, const char *mbextra)
389 {
390 char cc[2];
391 int ret;
392
393 cc[0] = c;
394 cc[1] = nextc;
395
396 ret = istrsnvisx(mbdst, NULL, cc, 1, flag, mbextra);
397 if (ret < 0)
398 return NULL;
399 return mbdst + ret;
400 }
401
402 char *
403 snvis(char *mbdst, size_t dlen, int c, int flag, int nextc, const char *mbextra)
404 {
405 char cc[2];
406 int ret;
407
408 cc[0] = c;
409 cc[1] = nextc;
410
411 ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, mbextra);
412 if (ret < 0)
413 return NULL;
414 return mbdst + ret;
415 }
416
417 int
418 strsvis(char *mbdst, const char *mbsrc, int flag, const char *mbextra)
419 {
420 return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, mbextra);
421 }
422
423 int
424 strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag, const char *mbextra)
425 {
426 return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, mbextra);
427 }
428
429 int
430 strsvisx(char *mbdst, const char *mbsrc, size_t len, int flag, const char *mbextra)
431 {
432 return istrsnvisx(mbdst, NULL, mbsrc, len, flag, mbextra);
433 }
434
435 int
436 strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag,
437 const char *mbextra)
438 {
439 return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, mbextra);
440 }
441 #endif
442
443 #if !HAVE_VIS
444 /*
445 * vis - visually encode characters
446 */
447 char *
448 vis(char *mbdst, int c, int flag, int nextc)
449 {
450 char cc[2];
451 int ret;
452
453 cc[0] = c;
454 cc[1] = nextc;
455
456 ret = istrsnvisx(mbdst, NULL, cc, 1, flag, "");
457 if (ret < 0)
458 return NULL;
459 return mbdst + ret;
460 }
461
462 char *
463 nvis(char *mbdst, size_t dlen, int c, int flag, int nextc)
464 {
465 char cc[2];
466 int ret;
467
468 cc[0] = c;
469 cc[1] = nextc;
470
471 ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, "");
472 if (ret < 0)
473 return NULL;
474 return mbdst + ret;
475 }
476
477 /*
478 * strvis - visually encode characters from src into dst
479 *
480 * Dst must be 4 times the size of src to account for possible
481 * expansion. The length of dst, not including the trailing NULL,
482 * is returned.
483 */
484
485 int
486 strvis(char *mbdst, const char *mbsrc, int flag)
487 {
488 return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, "");
489 }
490
491 int
492 strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag)
493 {
494 return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, "");
495 }
496
497 /*
498 * strvisx - visually encode characters from src into dst
499 *
500 * Dst must be 4 times the size of src to account for possible
501 * expansion. The length of dst, not including the trailing NULL,
502 * is returned.
503 *
504 * Strvisx encodes exactly len characters from src into dst.
505 * This is useful for encoding a block of data.
506 */
507
508 int
509 strvisx(char *mbdst, const char *mbsrc, size_t len, int flag)
510 {
511 return istrsnvisx(mbdst, NULL, mbsrc, len, flag, "");
512 }
513
514 int
515 strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag)
516 {
517 return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, "");
518 }
519 #endif
520