unvis.c revision 1.29 1 /* $NetBSD: unvis.c,v 1.29 2009/02/10 23:06:31 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 #if 0
35 static char sccsid[] = "@(#)unvis.c 8.1 (Berkeley) 6/4/93";
36 #else
37 __RCSID("$NetBSD: unvis.c,v 1.29 2009/02/10 23:06:31 christos Exp $");
38 #endif
39 #endif /* LIBC_SCCS and not lint */
40
41 #include "namespace.h"
42 #include <sys/types.h>
43
44 #include <assert.h>
45 #include <ctype.h>
46 #include <stdio.h>
47 #include <vis.h>
48
49 #ifdef __weak_alias
50 __weak_alias(strunvis,_strunvis)
51 #endif
52
53 #if !HAVE_VIS
54 /*
55 * decode driven by state machine
56 */
57 #define S_GROUND 0 /* haven't seen escape char */
58 #define S_START 1 /* start decoding special sequence */
59 #define S_META 2 /* metachar started (M) */
60 #define S_META1 3 /* metachar more, regular char (-) */
61 #define S_CTRL 4 /* control char started (^) */
62 #define S_OCTAL2 5 /* octal digit 2 */
63 #define S_OCTAL3 6 /* octal digit 3 */
64 #define S_HEX1 7 /* http hex digit */
65 #define S_HEX2 8 /* http hex digit 2 */
66 #define S_MIME1 9 /* mime hex digit 1 */
67 #define S_MIME2 10 /* mime hex digit 2 */
68 #define S_EATCRNL 11 /* mime eating CRNL */
69
70 #define isoctal(c) (((u_char)(c)) >= '0' && ((u_char)(c)) <= '7')
71 #define xtod(c) (isdigit(c) ? (c - '0') : ((tolower(c) - 'a') + 10))
72 #define XTOD(c) (isdigit(c) ? (c - '0') : ((c - 'A') + 10))
73
74 /*
75 * unvis - decode characters previously encoded by vis
76 */
77 int
78 unvis(char *cp, int c, int *astate, int flag)
79 {
80 unsigned char uc = (unsigned char)c;
81
82 _DIAGASSERT(cp != NULL);
83 _DIAGASSERT(astate != NULL);
84
85 if (flag & UNVIS_END) {
86 if (*astate == S_OCTAL2 || *astate == S_OCTAL3
87 || *astate == S_HEX2) {
88 *astate = S_GROUND;
89 return UNVIS_VALID;
90 }
91 return (*astate == S_GROUND ? UNVIS_NOCHAR : UNVIS_SYNBAD);
92 }
93
94 switch (*astate) {
95
96 case S_GROUND:
97 *cp = 0;
98 if (c == '\\') {
99 *astate = S_START;
100 return UNVIS_NOCHAR;
101 }
102 if ((flag & VIS_HTTPSTYLE) && c == '%') {
103 *astate = S_HEX1;
104 return UNVIS_NOCHAR;
105 }
106 if ((flag & VIS_MIMESTYLE) && c == '=') {
107 *astate = S_MIME1;
108 return UNVIS_NOCHAR;
109 }
110 *cp = c;
111 return UNVIS_VALID;
112
113 case S_START:
114 switch(c) {
115 case '\\':
116 *cp = c;
117 *astate = S_GROUND;
118 return UNVIS_VALID;
119 case '0': case '1': case '2': case '3':
120 case '4': case '5': case '6': case '7':
121 *cp = (c - '0');
122 *astate = S_OCTAL2;
123 return UNVIS_NOCHAR;
124 case 'M':
125 *cp = (char)0200;
126 *astate = S_META;
127 return UNVIS_NOCHAR;
128 case '^':
129 *astate = S_CTRL;
130 return UNVIS_NOCHAR;
131 case 'n':
132 *cp = '\n';
133 *astate = S_GROUND;
134 return UNVIS_VALID;
135 case 'r':
136 *cp = '\r';
137 *astate = S_GROUND;
138 return UNVIS_VALID;
139 case 'b':
140 *cp = '\b';
141 *astate = S_GROUND;
142 return UNVIS_VALID;
143 case 'a':
144 *cp = '\007';
145 *astate = S_GROUND;
146 return UNVIS_VALID;
147 case 'v':
148 *cp = '\v';
149 *astate = S_GROUND;
150 return UNVIS_VALID;
151 case 't':
152 *cp = '\t';
153 *astate = S_GROUND;
154 return UNVIS_VALID;
155 case 'f':
156 *cp = '\f';
157 *astate = S_GROUND;
158 return UNVIS_VALID;
159 case 's':
160 *cp = ' ';
161 *astate = S_GROUND;
162 return UNVIS_VALID;
163 case 'E':
164 *cp = '\033';
165 *astate = S_GROUND;
166 return UNVIS_VALID;
167 case '\n':
168 /*
169 * hidden newline
170 */
171 *astate = S_GROUND;
172 return (UNVIS_NOCHAR);
173 case '$':
174 /*
175 * hidden marker
176 */
177 *astate = S_GROUND;
178 return (UNVIS_NOCHAR);
179 }
180 *astate = S_GROUND;
181 return (UNVIS_SYNBAD);
182
183 case S_META:
184 if (c == '-')
185 *astate = S_META1;
186 else if (c == '^')
187 *astate = S_CTRL;
188 else {
189 *astate = S_GROUND;
190 return (UNVIS_SYNBAD);
191 }
192 return UNVIS_NOCHAR;
193
194 case S_META1:
195 *astate = S_GROUND;
196 *cp |= c;
197 return UNVIS_VALID;
198
199 case S_CTRL:
200 if (c == '?')
201 *cp |= 0177;
202 else
203 *cp |= c & 037;
204 *astate = S_GROUND;
205 return UNVIS_VALID;
206
207 case S_OCTAL2: /* second possible octal digit */
208 if (isoctal(uc)) {
209 /*
210 * yes - and maybe a third
211 */
212 *cp = (*cp << 3) + (c - '0');
213 *astate = S_OCTAL3;
214 return UNVIS_NOCHAR;
215 }
216 /*
217 * no - done with current sequence, push back passed char
218 */
219 *astate = S_GROUND;
220 return UNVIS_VALIDPUSH;
221
222 case S_OCTAL3: /* third possible octal digit */
223 *astate = S_GROUND;
224 if (isoctal(uc)) {
225 *cp = (*cp << 3) + (c - '0');
226 return UNVIS_VALID;
227 }
228 /*
229 * we were done, push back passed char
230 */
231 return UNVIS_VALIDPUSH;
232
233 case S_HEX1:
234 if (isxdigit(uc)) {
235 *cp = xtod(uc);
236 *astate = S_HEX2;
237 return UNVIS_NOCHAR;
238 }
239 /*
240 * no - done with current sequence, push back passed char
241 */
242 *astate = S_GROUND;
243 return UNVIS_VALIDPUSH;
244
245 case S_HEX2:
246 *astate = S_GROUND;
247 if (isxdigit(uc)) {
248 *cp = xtod(uc) | (*cp << 4);
249 return UNVIS_VALID;
250 }
251 return UNVIS_VALIDPUSH;
252
253 case S_MIME1:
254 if (uc == '\n' || uc == '\r') {
255 *astate = S_EATCRNL;
256 return UNVIS_NOCHAR;
257 }
258 if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
259 *cp = XTOD(uc);
260 *astate = S_MIME2;
261 return UNVIS_NOCHAR;
262 }
263 fprintf(stderr, "Bad hex digit1 %c\n", uc);
264 *astate = S_GROUND;
265 return UNVIS_SYNBAD;
266
267 case S_MIME2:
268 if (isxdigit(uc) && (isdigit(uc) || isupper(uc))) {
269 *astate = S_GROUND;
270 *cp = XTOD(uc) | (*cp << 4);
271 return UNVIS_VALID;
272 }
273 fprintf(stderr, "Bad hex digit2 %c\n", uc);
274 *astate = S_GROUND;
275 return UNVIS_SYNBAD;
276
277 case S_EATCRNL:
278 switch (uc) {
279 case '\r':
280 case '\n':
281 return UNVIS_NOCHAR;
282 case '=':
283 *astate = S_MIME1;
284 return UNVIS_NOCHAR;
285 default:
286 *cp = uc;
287 return UNVIS_VALID;
288 }
289
290 default:
291 /*
292 * decoder in unknown state - (probably uninitialized)
293 */
294 *astate = S_GROUND;
295 return UNVIS_SYNBAD;
296 }
297 }
298
299 /*
300 * strunvis - decode src into dst
301 *
302 * Number of chars decoded into dst is returned, -1 on error.
303 * Dst is null terminated.
304 */
305
306 int
307 strunvisx(dst, src, flag)
308 char *dst;
309 const char *src;
310 int flag;
311 {
312 char c;
313 char *start = dst;
314 int state = 0;
315
316 _DIAGASSERT(src != NULL);
317 _DIAGASSERT(dst != NULL);
318
319 while ((c = *src++) != '\0') {
320 again:
321 switch (unvis(dst, c, &state, flag)) {
322 case UNVIS_VALID:
323 dst++;
324 break;
325 case UNVIS_VALIDPUSH:
326 dst++;
327 goto again;
328 case 0:
329 case UNVIS_NOCHAR:
330 break;
331 default:
332 return (-1);
333 }
334 }
335 if (unvis(dst, c, &state, UNVIS_END) == UNVIS_VALID)
336 dst++;
337 *dst = '\0';
338 return (int)(dst - start);
339 }
340
341 int
342 strunvis(dst, src)
343 char *dst;
344 const char *src;
345 {
346 return strunvisx(dst, src, 0);
347 }
348 #endif
349