vis.c revision 1.50 1 /* $NetBSD: vis.c,v 1.50 2013/02/13 22:15:43 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 /*-
33 * Copyright (c) 1999, 2005 The NetBSD Foundation, Inc.
34 * All rights reserved.
35 *
36 * Redistribution and use in source and binary forms, with or without
37 * modification, are permitted provided that the following conditions
38 * are met:
39 * 1. Redistributions of source code must retain the above copyright
40 * notice, this list of conditions and the following disclaimer.
41 * 2. Redistributions in binary form must reproduce the above copyright
42 * notice, this list of conditions and the following disclaimer in the
43 * documentation and/or other materials provided with the distribution.
44 *
45 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
46 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
47 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
48 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
49 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
50 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
51 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
52 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
53 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
54 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
55 * POSSIBILITY OF SUCH DAMAGE.
56 */
57
58 #include <sys/cdefs.h>
59 #if defined(LIBC_SCCS) && !defined(lint)
60 __RCSID("$NetBSD: vis.c,v 1.50 2013/02/13 22:15:43 christos Exp $");
61 #endif /* LIBC_SCCS and not lint */
62 #ifdef __FBSDID
63 __FBSDID("$FreeBSD$");
64 #define _DIAGASSERT(x) assert(x)
65 #endif
66
67 #include "namespace.h"
68 #include <sys/types.h>
69
70 #include <assert.h>
71 #include <vis.h>
72 #include <errno.h>
73 #include <stdlib.h>
74 #include <wchar.h>
75 #include <wctype.h>
76
77 #ifdef __weak_alias
78 __weak_alias(strvisx,_strvisx)
79 #endif
80
81 #if !HAVE_VIS || !HAVE_SVIS
82 #include <ctype.h>
83 #include <limits.h>
84 #include <stdio.h>
85 #include <string.h>
86
87 /*
88 * The reason for going through the trouble to deal with character encodings
89 * in vis(3), is that we use this to safe encode output of commands. This
90 * safe encoding varies depending on the character set. For example if we
91 * display ps output in French, we don't want to display French characters
92 * as M-foo.
93 */
94
95 static wchar_t *do_svis(wchar_t *, wint_t, int, wint_t, const wchar_t *);
96
97 #undef BELL
98 #define BELL L'\a'
99
100 #define iswoctal(c) (((u_char)(c)) >= L'0' && ((u_char)(c)) <= L'7')
101 #define iswwhite(c) (c == L' ' || c == L'\t' || c == L'\n')
102 #define iswsafe(c) (c == L'\b' || c == BELL || c == L'\r')
103 #define xtoa(c) L"0123456789abcdef"[c]
104 #define XTOA(c) L"0123456789ABCDEF"[c]
105
106 #define MAXEXTRAS 9
107
108 #define MAKEEXTRALIST(flag, extra, orig_str) \
109 do { \
110 const wchar_t *orig = orig_str; \
111 const wchar_t *o = orig; \
112 wchar_t *e; \
113 while (*o++) \
114 continue; \
115 extra = calloc((size_t)((o - orig) + MAXEXTRAS), sizeof(*extra)); \
116 if (!extra) break; \
117 for (o = orig, e = extra; (*e++ = *o++) != L'\0';) \
118 continue; \
119 e--; \
120 if (flag & VIS_GLOB) { \
121 *e++ = L'*'; \
122 *e++ = L'?'; \
123 *e++ = L'['; \
124 *e++ = L'#'; \
125 } \
126 if (flag & VIS_SP) *e++ = L' '; \
127 if (flag & VIS_TAB) *e++ = L'\t'; \
128 if (flag & VIS_NL) *e++ = L'\n'; \
129 if ((flag & VIS_NOSLASH) == 0) *e++ = L'\\'; \
130 *e = L'\0'; \
131 } while (/*CONSTCOND*/0)
132
133 /*
134 * This is do_hvis, for HTTP style (RFC 1808)
135 */
136 static wchar_t *
137 do_hvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
138 {
139 if (iswalnum(c)
140 /* safe */
141 || c == L'$' || c == L'-' || c == L'_' || c == L'.' || c == L'+'
142 /* extra */
143 || c == L'!' || c == L'*' || c == L'\'' || c == L'(' || c == L')'
144 || c == L',')
145 dst = do_svis(dst, c, flag, nextc, extra);
146 else {
147 *dst++ = L'%';
148 *dst++ = xtoa(((unsigned int)c >> 4) & 0xf);
149 *dst++ = xtoa((unsigned int)c & 0xf);
150 }
151
152 return dst;
153 }
154
155 /*
156 * This is do_mvis, for Quoted-Printable MIME (RFC 2045)
157 * NB: No handling of long lines or CRLF.
158 */
159 static wchar_t *
160 do_mvis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
161 {
162 if ((c != L'\n') &&
163 /* Space at the end of the line */
164 ((iswspace(c) && (nextc == L'\r' || nextc == L'\n')) ||
165 /* Out of range */
166 (!iswspace(c) && (c < 33 || (c > 60 && c < 62) || c > 126)) ||
167 /* Specific char to be escaped */
168 wcschr(L"#$@[\\]^`{|}~", c) != NULL)) {
169 *dst++ = L'=';
170 *dst++ = XTOA(((unsigned int)c >> 4) & 0xf);
171 *dst++ = XTOA((unsigned int)c & 0xf);
172 } else
173 dst = do_svis(dst, c, flag, nextc, extra);
174 return dst;
175 }
176
177 /*
178 * This is do_vis, the central code of vis.
179 * dst: Pointer to the destination buffer
180 * c: Character to encode
181 * flag: Flag word
182 * nextc: The character following 'c'
183 * extra: Pointer to the list of extra characters to be
184 * backslash-protected.
185 */
186 static wchar_t *
187 do_svis(wchar_t *dst, wint_t c, int flag, wint_t nextc, const wchar_t *extra)
188 {
189 int iswextra;
190
191 iswextra = wcschr(extra, c) != NULL;
192 if (!iswextra && (iswgraph(c) || iswwhite(c) ||
193 ((flag & VIS_SAFE) && iswsafe(c)))) {
194 *dst++ = c;
195 return dst;
196 }
197 if (flag & VIS_CSTYLE) {
198 switch (c) {
199 case L'\n':
200 *dst++ = L'\\'; *dst++ = L'n';
201 return dst;
202 case L'\r':
203 *dst++ = L'\\'; *dst++ = L'r';
204 return dst;
205 case L'\b':
206 *dst++ = L'\\'; *dst++ = L'b';
207 return dst;
208 case BELL:
209 *dst++ = L'\\'; *dst++ = L'a';
210 return dst;
211 case L'\v':
212 *dst++ = L'\\'; *dst++ = L'v';
213 return dst;
214 case L'\t':
215 *dst++ = L'\\'; *dst++ = L't';
216 return dst;
217 case L'\f':
218 *dst++ = L'\\'; *dst++ = L'f';
219 return dst;
220 case L' ':
221 *dst++ = L'\\'; *dst++ = L's';
222 return dst;
223 case L'\0':
224 *dst++ = L'\\'; *dst++ = L'0';
225 if (iswoctal(nextc)) {
226 *dst++ = L'0';
227 *dst++ = L'0';
228 }
229 return dst;
230 default:
231 if (iswgraph(c)) {
232 *dst++ = L'\\';
233 *dst++ = c;
234 return dst;
235 }
236 }
237 }
238 if (iswextra || ((c & 0177) == L' ') || (flag & VIS_OCTAL)) {
239 *dst++ = L'\\';
240 *dst++ = (u_char)(((u_int32_t)(u_char)c >> 6) & 03) + L'0';
241 *dst++ = (u_char)(((u_int32_t)(u_char)c >> 3) & 07) + L'0';
242 *dst++ = (c & 07) + L'0';
243 } else {
244 if ((flag & VIS_NOSLASH) == 0)
245 *dst++ = L'\\';
246
247 if (c & 0200) {
248 c &= 0177;
249 *dst++ = L'M';
250 }
251
252 if (iswcntrl(c)) {
253 *dst++ = L'^';
254 if (c == 0177)
255 *dst++ = L'?';
256 else
257 *dst++ = c + L'@';
258 } else {
259 *dst++ = L'-';
260 *dst++ = c;
261 }
262 }
263 return dst;
264 }
265
266 typedef wchar_t *(*visfun_t)(wchar_t *, wint_t, int, wint_t, const wchar_t *);
267
268 /*
269 * Return the appropriate encoding function depending on the flags given.
270 */
271 static visfun_t
272 getvisfun(int flag)
273 {
274 if (flag & VIS_HTTPSTYLE)
275 return do_hvis;
276 if (flag & VIS_MIMESTYLE)
277 return do_mvis;
278 return do_svis;
279 }
280
281 /*
282 * istrsnvisx()
283 * The main internal function.
284 * All user-visible functions call this one.
285 */
286 static int
287 istrsnvisx(char *mbdst, size_t *dlen, const char *mbsrc, size_t mblength,
288 int flag, const char *mbextra)
289 {
290 wchar_t *dst, *src, *pdst, *psrc, *start, *extra, *nextra;
291 size_t len, olen;
292 wint_t c;
293 visfun_t f;
294 int clen, error = -1;
295 ssize_t mbslength;
296
297 _DIAGASSERT(mbdst != NULL);
298 _DIAGASSERT(mbsrc != NULL);
299 _DIAGASSERT(mbextra != NULL);
300
301 psrc = pdst = extra = nextra = NULL;
302 if (!mblength)
303 mblength = strlen(mbsrc);
304
305 if ((psrc = calloc(mblength + 1, sizeof(*psrc))) == NULL)
306 return -1;
307 if ((pdst = calloc((4 * mblength) + 1, sizeof(*pdst))) == NULL)
308 goto out;
309 if ((extra = calloc((strlen(mbextra) + 1), sizeof(*extra))) == NULL)
310 goto out;
311
312 dst = pdst;
313 src = psrc;
314
315 if (mblength < len)
316 len = mblength;
317
318 mbslength = (ssize_t)mblength;
319 while (mbslength > 0) {
320 clen = mbtowc(src, mbsrc, MB_LEN_MAX);
321 if (clen < 0) {
322 *src = (wint_t)(u_char)*mbsrc;
323 clen = 1;
324 }`
325 if (clen == 0)
326 clen = 1;
327 src++;
328 mbsrc += clen;
329 mbslength -= clen;
330 }
331 len = src - psrc;
332 src = psrc;
333
334 mbstowcs(extra, mbextra, strlen(mbextra));
335 MAKEEXTRALIST(flag, nextra, extra);
336 if (!nextra) {
337 if (dlen && *dlen == 0) {
338 errno = ENOSPC;
339 goto out;
340 }
341 *mbdst = '\0'; /* can't create nextra, return "" */
342 error = 0;
343 goto out;
344 }
345
346 f = getvisfun(flag);
347
348 for (start = dst; len > 0; len--) {
349 c = *src++;
350 dst = (*f)(dst, c, flag, len >= 1 ? *src : L'\0', nextra);
351 if (dst == NULL) {
352 errno = ENOSPC;
353 goto out;
354 }
355 }
356
357 *dst = L'\0';
358
359 len = dlen ? *dlen : ((wcslen(start) + 1) * MB_LEN_MAX);
360 olen = wcstombs(mbdst, start, len * sizeof(*mbdst));
361
362 free(nextra);
363 free(extra);
364 free(pdst);
365 free(psrc);
366
367 return (int)olen;
368 out:
369 free(nextra);
370 free(extra);
371 free(pdst);
372 free(psrc);
373 return error;
374 }
375 #endif
376
377 #if !HAVE_SVIS
378 /*
379 * The "svis" variants all take an "extra" arg that is a pointer
380 * to a NUL-terminated list of characters to be encoded, too.
381 * These functions are useful e. g. to encode strings in such a
382 * way so that they are not interpreted by a shell.
383 */
384
385 char *
386 svis(char *mbdst, int c, int flag, int nextc, const char *mbextra)
387 {
388 char cc[2];
389 int ret;
390
391 cc[0] = c;
392 cc[1] = nextc;
393
394 ret = istrsnvisx(mbdst, NULL, cc, 1, flag, mbextra);
395 if (ret < 0)
396 return NULL;
397 return mbdst + ret;
398 }
399
400 char *
401 snvis(char *mbdst, size_t dlen, int c, int flag, int nextc, const char *mbextra)
402 {
403 char cc[2];
404 int ret;
405
406 cc[0] = c;
407 cc[1] = nextc;
408
409 ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, mbextra);
410 if (ret < 0)
411 return NULL;
412 return mbdst + ret;
413 }
414
415 int
416 strsvis(char *mbdst, const char *mbsrc, int flag, const char *mbextra)
417 {
418 return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, mbextra);
419 }
420
421 int
422 strsnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag, const char *mbextra)
423 {
424 return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, mbextra);
425 }
426
427 int
428 strsvisx(char *mbdst, const char *mbsrc, size_t len, int flag, const char *mbextra)
429 {
430 return istrsnvisx(mbdst, NULL, mbsrc, len, flag, mbextra);
431 }
432
433 int
434 strsnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag,
435 const char *mbextra)
436 {
437 return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, mbextra);
438 }
439 #endif
440
441 #if !HAVE_VIS
442 /*
443 * vis - visually encode characters
444 */
445 char *
446 vis(char *mbdst, int c, int flag, int nextc)
447 {
448 char cc[2];
449 int ret;
450
451 cc[0] = c;
452 cc[1] = nextc;
453
454 ret = istrsnvisx(mbdst, NULL, cc, 1, flag, "");
455 if (ret < 0)
456 return NULL;
457 return mbdst + ret;
458 }
459
460 char *
461 nvis(char *mbdst, size_t dlen, int c, int flag, int nextc)
462 {
463 char cc[2];
464 int ret;
465
466 cc[0] = c;
467 cc[1] = nextc;
468
469 ret = istrsnvisx(mbdst, &dlen, cc, 1, flag, "");
470 if (ret < 0)
471 return NULL;
472 return mbdst + ret;
473 }
474
475 /*
476 * strvis - visually encode characters from src into dst
477 *
478 * Dst must be 4 times the size of src to account for possible
479 * expansion. The length of dst, not including the trailing NULL,
480 * is returned.
481 */
482
483 int
484 strvis(char *mbdst, const char *mbsrc, int flag)
485 {
486 return istrsnvisx(mbdst, NULL, mbsrc, 0, flag, "");
487 }
488
489 int
490 strnvis(char *mbdst, size_t dlen, const char *mbsrc, int flag)
491 {
492 return istrsnvisx(mbdst, &dlen, mbsrc, 0, flag, "");
493 }
494
495 /*
496 * strvisx - visually encode characters from src into dst
497 *
498 * Dst must be 4 times the size of src to account for possible
499 * expansion. The length of dst, not including the trailing NULL,
500 * is returned.
501 *
502 * Strvisx encodes exactly len characters from src into dst.
503 * This is useful for encoding a block of data.
504 */
505
506 int
507 strvisx(char *mbdst, const char *mbsrc, size_t len, int flag)
508 {
509 return istrsnvisx(mbdst, NULL, mbsrc, len, flag, "");
510 }
511
512 int
513 strnvisx(char *mbdst, size_t dlen, const char *mbsrc, size_t len, int flag)
514 {
515 return istrsnvisx(mbdst, &dlen, mbsrc, len, flag, "");
516 }
517 #endif
518