mime_header.c revision 1.5 1 1.5 christos /* $NetBSD: mime_header.c,v 1.5 2008/04/24 01:27:07 christos Exp $ */
2 1.1 christos
3 1.1 christos /*-
4 1.1 christos * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 1.1 christos * All rights reserved.
6 1.1 christos *
7 1.1 christos * This code is derived from software contributed to The NetBSD Foundation
8 1.1 christos * by Anon Ymous.
9 1.1 christos *
10 1.1 christos * Redistribution and use in source and binary forms, with or without
11 1.1 christos * modification, are permitted provided that the following conditions
12 1.1 christos * are met:
13 1.1 christos * 1. Redistributions of source code must retain the above copyright
14 1.1 christos * notice, this list of conditions and the following disclaimer.
15 1.1 christos * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 christos * notice, this list of conditions and the following disclaimer in the
17 1.1 christos * documentation and/or other materials provided with the distribution.
18 1.1 christos * 3. All advertising materials mentioning features or use of this software
19 1.1 christos * must display the following acknowledgement:
20 1.1 christos * This product includes software developed by the NetBSD
21 1.1 christos * Foundation, Inc. and its contributors.
22 1.1 christos * 4. Neither the name of The NetBSD Foundation nor the names of its
23 1.1 christos * contributors may be used to endorse or promote products derived
24 1.1 christos * from this software without specific prior written permission.
25 1.1 christos *
26 1.1 christos * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 1.1 christos * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 1.1 christos * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 1.1 christos * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 1.1 christos * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 1.1 christos * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 1.1 christos * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 1.1 christos * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 1.1 christos * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 1.1 christos * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 1.1 christos * POSSIBILITY OF SUCH DAMAGE.
37 1.1 christos */
38 1.1 christos
39 1.1 christos
40 1.1 christos /*
41 1.1 christos * This module contains the core MIME header decoding routines.
42 1.1 christos * Please refer to RFC 2047 and RFC 2822.
43 1.1 christos */
44 1.1 christos
45 1.1 christos #ifdef MIME_SUPPORT
46 1.1 christos
47 1.1 christos #include <sys/cdefs.h>
48 1.1 christos #ifndef __lint__
49 1.5 christos __RCSID("$NetBSD: mime_header.c,v 1.5 2008/04/24 01:27:07 christos Exp $");
50 1.1 christos #endif /* not __lint__ */
51 1.1 christos
52 1.1 christos #include <stdio.h>
53 1.1 christos #include <stdlib.h>
54 1.1 christos #include <string.h>
55 1.1 christos
56 1.1 christos #include "def.h"
57 1.1 christos #include "extern.h"
58 1.1 christos #include "mime.h"
59 1.1 christos #include "mime_header.h"
60 1.1 christos #include "mime_codecs.h"
61 1.1 christos
62 1.1 christos /*
63 1.1 christos * Our interface to mime_b64tobin()
64 1.1 christos *
65 1.1 christos * XXX - This should move to mime_codecs.c.
66 1.1 christos */
67 1.1 christos static ssize_t
68 1.1 christos mime_B64_decode(char *outbuf, size_t outlen, const char *inbuf, size_t inlen)
69 1.1 christos {
70 1.1 christos if (outlen < 3 * roundup(inlen, 4) / 4)
71 1.1 christos return -1;
72 1.1 christos
73 1.1 christos return mime_b64tobin(outbuf, inbuf, inlen);
74 1.1 christos }
75 1.1 christos
76 1.1 christos
77 1.1 christos /*
78 1.1 christos * Header specific "quoted-printable" decode!
79 1.1 christos * Differences with body QP decoding (see rfc 2047, sec 4.2):
80 1.1 christos * 1) '=' occurs _only_ when followed by two hex digits (FWS is not allowed).
81 1.1 christos * 2) Spaces can be encoded as '_' in headers for readability.
82 1.1 christos *
83 1.1 christos * XXX - This should move to mime_codecs.c.
84 1.1 christos */
85 1.1 christos static ssize_t
86 1.1 christos mime_QPh_decode(char *outbuf, size_t outlen, const char *inbuf, size_t inlen)
87 1.1 christos {
88 1.1 christos const char *p, *inend;
89 1.1 christos char *outend;
90 1.1 christos char *q;
91 1.1 christos
92 1.1 christos outend = outbuf + outlen;
93 1.1 christos inend = inbuf + inlen;
94 1.1 christos q = outbuf;
95 1.1 christos for (p = inbuf; p < inend; p++) {
96 1.1 christos if (q >= outend)
97 1.1 christos return -1;
98 1.1 christos if (*p == '=') {
99 1.1 christos p++;
100 1.1 christos if (p + 1 < inend) {
101 1.1 christos int c;
102 1.1 christos char *bufend;
103 1.1 christos char buf[3];
104 1.1 christos buf[0] = *p++;
105 1.1 christos buf[1] = *p;
106 1.1 christos buf[2] = '\0';
107 1.1 christos c = strtol(buf, &bufend, 16);
108 1.1 christos if (bufend != &buf[2])
109 1.1 christos return -1;
110 1.1 christos *q++ = c;
111 1.1 christos }
112 1.1 christos else
113 1.1 christos return -1;
114 1.1 christos }
115 1.1 christos else if (*p == '_') /* header's may encode ' ' as '_' */
116 1.1 christos *q++ = ' ';
117 1.1 christos else
118 1.1 christos *q++ = *p;
119 1.1 christos }
120 1.1 christos return q - outbuf;
121 1.1 christos }
122 1.1 christos
123 1.1 christos static const char *
124 1.1 christos grab_charset(char *from_cs, size_t from_cs_len, const char *p)
125 1.1 christos {
126 1.1 christos char *q;
127 1.1 christos q = from_cs;
128 1.1 christos for (/*EMPTY*/; *p != '?'; p++) {
129 1.1 christos if (*p == '\0' || q >= from_cs + from_cs_len - 1)
130 1.1 christos return NULL;
131 1.1 christos *q++ = *p;
132 1.1 christos }
133 1.1 christos *q = '\0';
134 1.1 christos return ++p; /* if here, then we got the '?' */
135 1.1 christos }
136 1.1 christos
137 1.1 christos /*
138 1.1 christos * An encoded word is a string of at most 75 non-white space
139 1.1 christos * characters of the following form:
140 1.1 christos *
141 1.1 christos * =?charset?X?encoding?=
142 1.1 christos *
143 1.1 christos * where:
144 1.1 christos * 'charset' is the original character set of the unencoded string.
145 1.1 christos *
146 1.1 christos * 'X' is the encoding type 'B' or 'Q' for "base64" or
147 1.1 christos * "quoted-printable", respectively,
148 1.1 christos * 'encoding' is the encoded string.
149 1.1 christos *
150 1.1 christos * Both 'charset' and 'X' are case independent and 'encoding' cannot
151 1.1 christos * contain any whitespace or '?' characters. The 'encoding' must also
152 1.1 christos * be fully contained within the encoded words, i.e., it cannot be
153 1.1 christos * split between encoded words.
154 1.1 christos *
155 1.1 christos * Note: the 'B' encoding is a slightly modified "quoted-printable"
156 1.1 christos * encoding. In particular, spaces (' ') may be encoded as '_' to
157 1.1 christos * improve undecoded readability.
158 1.1 christos */
159 1.1 christos static int
160 1.1 christos decode_word(const char **ibuf, char **obuf, char *oend, const char *to_cs)
161 1.1 christos {
162 1.1 christos ssize_t declen;
163 1.1 christos size_t enclen, dstlen;
164 1.1 christos char decword[LINESIZE];
165 1.1 christos char from_cs[LINESIZE];
166 1.1 christos const char *encword, *iend, *p;
167 1.1 christos char *dstend;
168 1.1 christos char enctype;
169 1.1 christos
170 1.1 christos p = *ibuf;
171 1.1 christos if (p[0] != '=' && p[1] != '?')
172 1.1 christos return -1;
173 1.1 christos if (strlen(p) < 2 + 1 + 3 + 1 + 2)
174 1.1 christos return -1;
175 1.1 christos p = grab_charset(from_cs, sizeof(from_cs), p + 2);
176 1.1 christos if (p == NULL)
177 1.1 christos return -1;
178 1.1 christos enctype = *p++;
179 1.1 christos if (*p++ != '?')
180 1.1 christos return -1;
181 1.1 christos encword = p;
182 1.1 christos p = strchr(p, '?');
183 1.1 christos if (p == NULL || p[1] != '=')
184 1.1 christos return -1;
185 1.1 christos enclen = p - encword; /* length of encoded substring */
186 1.1 christos iend = p + 2;
187 1.1 christos /* encoded words are at most 75 characters (RFC 2047, sec 2) */
188 1.1 christos if (iend > *ibuf + 75)
189 1.1 christos return -1;
190 1.1 christos
191 1.1 christos dstend = to_cs ? decword : *obuf;
192 1.1 christos dstlen = (to_cs ? sizeof(decword): oend - *obuf) - 1;
193 1.1 christos
194 1.1 christos if (enctype == 'B' || enctype == 'b')
195 1.1 christos declen = mime_B64_decode(dstend, dstlen, encword, enclen);
196 1.1 christos else if (enctype == 'Q' || enctype == 'q')
197 1.1 christos declen = mime_QPh_decode(dstend, dstlen, encword, enclen);
198 1.1 christos else
199 1.1 christos return -1;
200 1.1 christos
201 1.1 christos if (declen == -1)
202 1.1 christos return -1;
203 1.1 christos
204 1.1 christos dstend += declen;
205 1.1 christos #ifdef CHARSET_SUPPORT
206 1.1 christos if (to_cs != NULL) {
207 1.1 christos iconv_t cd;
208 1.1 christos const char *src;
209 1.1 christos size_t srclen;
210 1.1 christos size_t cnt;
211 1.1 christos
212 1.1 christos cd = iconv_open(to_cs, from_cs);
213 1.1 christos if (cd == (iconv_t)-1)
214 1.1 christos return -1;
215 1.1 christos
216 1.1 christos src = decword;
217 1.1 christos srclen = declen;
218 1.1 christos dstend = *obuf;
219 1.1 christos dstlen = oend - *obuf - 1;
220 1.1 christos cnt = mime_iconv(cd, &src, &srclen, &dstend, &dstlen);
221 1.4 christos
222 1.1 christos (void)iconv_close(cd);
223 1.1 christos if (cnt == (size_t)-1)
224 1.1 christos return -1;
225 1.1 christos }
226 1.1 christos #endif /* CHARSET_SUPPORT */
227 1.1 christos *dstend = '\0';
228 1.1 christos *ibuf = iend;
229 1.1 christos *obuf = dstend;
230 1.1 christos return 0;
231 1.1 christos }
232 1.1 christos
233 1.1 christos
234 1.1 christos /*
235 1.1 christos * Folding White Space. See RFC 2822.
236 1.4 christos *
237 1.4 christos * Note: RFC 2822 specifies that '\n' and '\r' only occur as CRLF
238 1.4 christos * pairs (i.e., "\r\n") and never separately. However, by the time
239 1.4 christos * mail(1) sees the messages, all CRLF pairs have been converted to
240 1.4 christos * '\n' characters.
241 1.4 christos *
242 1.4 christos * XXX - pull is_FWS() and skip_FWS() up to def.h?
243 1.1 christos */
244 1.1 christos static inline int
245 1.1 christos is_FWS(int c)
246 1.1 christos {
247 1.4 christos return c == ' ' || c == '\t' || c == '\n';
248 1.1 christos }
249 1.1 christos
250 1.1 christos static inline const char *
251 1.1 christos skip_FWS(const char *p)
252 1.1 christos {
253 1.4 christos while (is_FWS(*p))
254 1.1 christos p++;
255 1.1 christos return p;
256 1.1 christos }
257 1.1 christos
258 1.1 christos static inline void
259 1.1 christos copy_skipped_FWS(char **dst, char *dstend, const char **src, const char *srcend)
260 1.1 christos {
261 1.1 christos const char *p, *pend;
262 1.1 christos char *q, *qend;
263 1.1 christos
264 1.1 christos p = *src;
265 1.1 christos q = *dst;
266 1.1 christos pend = srcend;
267 1.1 christos qend = dstend;
268 1.1 christos
269 1.1 christos if (p) { /* copy any skipped linear-white-space */
270 1.1 christos while (p < pend && q < qend)
271 1.1 christos *q++ = *p++;
272 1.1 christos *dst = q;
273 1.1 christos *src = NULL;
274 1.1 christos }
275 1.1 christos }
276 1.1 christos
277 1.1 christos /*
278 1.1 christos * Decode an unstructured field.
279 1.1 christos *
280 1.1 christos * See RFC 2822 Sec 2.2.1 and 3.6.5.
281 1.1 christos * Encoded words may occur anywhere in unstructured fields provided
282 1.1 christos * they are separated from any other text or encoded words by at least
283 1.1 christos * one linear-white-space character. (See RFC 2047 sec 5.1.) If two
284 1.1 christos * encoded words occur sequentially (separated by only FWS) then the
285 1.1 christos * separating FWS is removed.
286 1.1 christos *
287 1.1 christos * NOTE: unstructured fields cannot contain 'quoted-pairs' (see
288 1.1 christos * RFC2822 sec 3.2.6 and RFC 2047), but that is no problem as a '\\'
289 1.1 christos * (or any non-whitespace character) immediately before an
290 1.1 christos * encoded-word will prevent it from being decoded.
291 1.1 christos *
292 1.1 christos * hstring should be a NULL terminated string.
293 1.1 christos * outbuf should be sufficiently large to hold the result.
294 1.1 christos */
295 1.1 christos static void
296 1.1 christos mime_decode_usfield(char *outbuf, size_t outsize, const char *hstring)
297 1.1 christos {
298 1.1 christos const char *p, *p0;
299 1.1 christos char *q, *qend;
300 1.1 christos int lastc;
301 1.1 christos const char *charset;
302 1.1 christos
303 1.1 christos charset = value(ENAME_MIME_CHARSET);
304 1.1 christos qend = outbuf + outsize - 1; /* Make sure there is room for the trailing NULL! */
305 1.1 christos q = outbuf;
306 1.1 christos p = hstring;
307 1.1 christos p0 = NULL;
308 1.1 christos lastc = (unsigned char)' ';
309 1.1 christos while (*p && q < qend) {
310 1.1 christos const char *p1;
311 1.1 christos char *q1;
312 1.1 christos if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
313 1.1 christos decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
314 1.4 christos (*p1 == '\0' || is_FWS(*p1))) {
315 1.1 christos p0 = p1; /* pointer to first character after encoded word */
316 1.1 christos q = q1;
317 1.1 christos p = skip_FWS(p1);
318 1.1 christos lastc = (unsigned char)*p0;
319 1.1 christos }
320 1.1 christos else {
321 1.1 christos copy_skipped_FWS(&q, qend, &p0, p);
322 1.1 christos lastc = (unsigned char)*p;
323 1.1 christos if (q < qend)
324 1.1 christos *q++ = *p++;
325 1.1 christos }
326 1.1 christos }
327 1.1 christos copy_skipped_FWS(&q, qend, &p0, p);
328 1.1 christos *q = '\0';
329 1.1 christos }
330 1.1 christos
331 1.1 christos /*
332 1.1 christos * Decode a field comment.
333 1.1 christos *
334 1.1 christos * Comments only occur in structured fields, can be nested (rfc 2822,
335 1.1 christos * sec 3.2.3), and can contain 'encoded-words' and 'quoted-pairs'.
336 1.1 christos * Otherwise, they can be regarded as unstructured fields that are
337 1.1 christos * bounded by '(' and ')' characters.
338 1.1 christos */
339 1.1 christos static int
340 1.1 christos decode_comment(char **obuf, char *oend, const char **ibuf, const char *iend, const char *charset)
341 1.1 christos {
342 1.1 christos const char *p, *pend, *p0;
343 1.1 christos char *q, *qend;
344 1.1 christos int lastc;
345 1.1 christos
346 1.1 christos p = *ibuf;
347 1.1 christos q = *obuf;
348 1.1 christos pend = iend;
349 1.1 christos qend = oend;
350 1.4 christos lastc = ' ';
351 1.1 christos p0 = NULL;
352 1.1 christos while (p < pend && q < qend) {
353 1.1 christos const char *p1;
354 1.1 christos char *q1;
355 1.1 christos
356 1.1 christos if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
357 1.1 christos decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
358 1.4 christos (*p1 == ')' || is_FWS(*p1))) {
359 1.1 christos lastc = (unsigned char)*p1;
360 1.1 christos p0 = p1;
361 1.1 christos q = q1;
362 1.1 christos p = skip_FWS(p1);
363 1.1 christos /*
364 1.1 christos * XXX - this check should be unnecessary as *pend should
365 1.1 christos * be '\0' which will stop skip_FWS()
366 1.1 christos */
367 1.1 christos if (p > pend)
368 1.1 christos p = pend;
369 1.1 christos }
370 1.1 christos else {
371 1.1 christos copy_skipped_FWS(&q, qend, &p0, p);
372 1.1 christos if (q >= qend) /* XXX - q > qend cannot happen */
373 1.1 christos break;
374 1.1 christos
375 1.1 christos if (*p == ')') {
376 1.1 christos *q++ = *p++; /* copy the closing ')' */
377 1.1 christos break; /* and get out of here! */
378 1.1 christos }
379 1.1 christos
380 1.1 christos if (*p == '(') {
381 1.1 christos *q++ = *p++; /* copy the opening '(' */
382 1.1 christos if (decode_comment(&q, qend, &p, pend, charset) == -1)
383 1.1 christos return -1; /* is this right or should we update? */
384 1.1 christos lastc = ')';
385 1.1 christos }
386 1.1 christos else if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
387 1.1 christos if (p[1] == '(' || p[1] == ')' || p[1] == '\\') /* need quoted-pair*/
388 1.1 christos *q++ = *p;
389 1.1 christos p++;
390 1.1 christos lastc = (unsigned char)*p;
391 1.1 christos if (q < qend)
392 1.1 christos *q++ = *p++;
393 1.1 christos }
394 1.1 christos else {
395 1.1 christos lastc = (unsigned char)*p;
396 1.1 christos *q++ = *p++;
397 1.1 christos }
398 1.1 christos }
399 1.1 christos }
400 1.1 christos *ibuf = p;
401 1.1 christos *obuf = q;
402 1.1 christos return 0;
403 1.1 christos }
404 1.1 christos
405 1.1 christos /*
406 1.1 christos * Decode a quoted-string or no-fold-quote.
407 1.1 christos *
408 1.1 christos * These cannot contain encoded words. They can contain quoted-pairs,
409 1.1 christos * making '\\' special. They have no other structure. See RFC 2822
410 1.1 christos * sec 3.2.5 and 3.6.4.
411 1.1 christos */
412 1.1 christos static void
413 1.1 christos decode_quoted_string(char **obuf, char *oend, const char **ibuf, const char *iend)
414 1.1 christos {
415 1.1 christos const char *p, *pend;
416 1.1 christos char *q, *qend;
417 1.1 christos
418 1.1 christos qend = oend;
419 1.1 christos pend = iend;
420 1.1 christos p = *ibuf;
421 1.1 christos q = *obuf;
422 1.1 christos while (p < pend && q < qend) {
423 1.1 christos if (*p == '"') {
424 1.1 christos *q++ = *p++; /* copy the closing '"' */
425 1.1 christos break;
426 1.1 christos }
427 1.1 christos if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
428 1.1 christos if (p[1] == '"' || p[1] == '\\') {
429 1.1 christos *q++ = *p;
430 1.1 christos if (q >= qend)
431 1.1 christos break;
432 1.1 christos }
433 1.1 christos p++;
434 1.1 christos }
435 1.1 christos *q++ = *p++;
436 1.1 christos }
437 1.1 christos *ibuf = p;
438 1.1 christos *obuf = q;
439 1.1 christos }
440 1.1 christos
441 1.1 christos /*
442 1.1 christos * Decode a domain-literal or no-fold-literal.
443 1.1 christos *
444 1.1 christos * These cannot contain encoded words. They can have quoted pairs and
445 1.1 christos * are delimited by '[' and ']' making '\\', '[', and ']' special.
446 1.1 christos * They have no other structure. See RFC 2822 sec 3.4.1 and 3.6.4.
447 1.1 christos */
448 1.1 christos static void
449 1.1 christos decode_domain_literal(char **obuf, char *oend, const char **ibuf, const char *iend)
450 1.1 christos {
451 1.1 christos const char *p, *pend;
452 1.1 christos char *q, *qend;
453 1.1 christos
454 1.1 christos qend = oend;
455 1.1 christos pend = iend;
456 1.1 christos p = *ibuf;
457 1.1 christos q = *obuf;
458 1.1 christos while (p < pend && q < qend) {
459 1.1 christos if (*p == ']') {
460 1.1 christos *q++ = *p++; /* copy the closing ']' */
461 1.1 christos break;
462 1.1 christos }
463 1.1 christos if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
464 1.1 christos if (p[1] == '[' || p[1] == ']' || p[1] == '\\') {
465 1.1 christos *q++ = *p;
466 1.1 christos if (q >= qend)
467 1.1 christos break;
468 1.1 christos }
469 1.1 christos p++;
470 1.1 christos }
471 1.1 christos *q++ = *p++;
472 1.1 christos }
473 1.1 christos *ibuf = p;
474 1.1 christos *obuf = q;
475 1.1 christos }
476 1.1 christos
477 1.1 christos /*
478 1.1 christos * Specials: see RFC 2822 sec 3.2.1.
479 1.1 christos */
480 1.1 christos static inline int
481 1.1 christos is_specials(int c)
482 1.1 christos {
483 1.1 christos static const char specialtab[] = {
484 1.1 christos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
485 1.1 christos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 1.1 christos 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0,
487 1.1 christos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0,
488 1.4 christos
489 1.1 christos 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
490 1.1 christos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0,
491 1.1 christos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492 1.1 christos 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 1.1 christos };
494 1.4 christos return !(c & ~0x7f) ? specialtab[c] : 0;
495 1.1 christos }
496 1.1 christos
497 1.1 christos /*
498 1.1 christos * Decode a structured field.
499 1.1 christos *
500 1.1 christos * At the top level, structured fields can only contain encoded-words
501 1.1 christos * via 'phrases' and 'comments'. See RFC 2047 sec 5.
502 1.1 christos */
503 1.1 christos static void
504 1.1 christos mime_decode_sfield(char *linebuf, size_t bufsize, const char *hstring)
505 1.1 christos {
506 1.1 christos const char *p, *pend, *p0;
507 1.1 christos char *q, *qend;
508 1.1 christos const char *charset;
509 1.1 christos int lastc;
510 1.1 christos
511 1.1 christos charset = value(ENAME_MIME_CHARSET);
512 1.1 christos
513 1.1 christos p = hstring;
514 1.1 christos q = linebuf;
515 1.1 christos pend = hstring + strlen(hstring);
516 1.1 christos qend = linebuf + bufsize - 1; /* save room for the NULL terminator */
517 1.1 christos lastc = (unsigned char)' ';
518 1.1 christos p0 = NULL;
519 1.1 christos while (p < pend && q < qend) {
520 1.1 christos const char *p1;
521 1.1 christos char *q1;
522 1.1 christos
523 1.1 christos if (*p != '=') {
524 1.1 christos copy_skipped_FWS(&q, qend, &p0, p);
525 1.1 christos if (q >= qend)
526 1.1 christos break;
527 1.1 christos }
528 1.1 christos
529 1.1 christos switch (*p) {
530 1.1 christos case '(': /* start of comment */
531 1.1 christos *q++ = *p++; /* copy the opening '(' */
532 1.1 christos (void)decode_comment(&q, qend, &p, pend, charset);
533 1.1 christos lastc = (unsigned char)p[-1];
534 1.1 christos break;
535 1.1 christos
536 1.1 christos case '"': /* start of quoted-string or no-fold-quote */
537 1.1 christos *q++ = *p++; /* copy the opening '"' */
538 1.1 christos decode_quoted_string(&q, qend, &p, pend);
539 1.1 christos lastc = (unsigned char)p[-1];
540 1.1 christos break;
541 1.1 christos
542 1.1 christos case '[': /* start of domain-literal or no-fold-literal */
543 1.1 christos *q++ = *p++; /* copy the opening '[' */
544 1.1 christos decode_domain_literal(&q, qend, &p, pend);
545 1.1 christos lastc = (unsigned char)p[-1];
546 1.1 christos break;
547 1.1 christos
548 1.1 christos case '\\': /* start of quoted-pair */
549 1.1 christos if (p + 1 < pend) { /* quoted pair */
550 1.1 christos if (is_specials(p[1])) {
551 1.1 christos *q++ = *p;
552 1.1 christos if (q >= qend)
553 1.1 christos break;
554 1.1 christos }
555 1.1 christos p++; /* skip the '\\' */
556 1.1 christos }
557 1.1 christos goto copy_char;
558 1.4 christos
559 1.1 christos case '=':
560 1.1 christos /*
561 1.1 christos * At this level encoded words can appear via
562 1.1 christos * 'phrases' (possibly delimited by ',' as in
563 1.1 christos * 'keywords'). Thus we handle them as such.
564 1.1 christos * Hopefully this is sufficient.
565 1.1 christos */
566 1.1 christos if ((lastc == ',' || is_FWS(lastc)) && p[1] == '?' &&
567 1.1 christos decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
568 1.4 christos (*p1 == '\0' || *p1 == ',' || is_FWS(*p1))) {
569 1.1 christos lastc = (unsigned char)*p1;
570 1.1 christos p0 = p1;
571 1.1 christos q = q1;
572 1.1 christos p = skip_FWS(p1);
573 1.1 christos /*
574 1.1 christos * XXX - this check should be
575 1.1 christos * unnecessary as *pend should be '\0'
576 1.1 christos * which will stop skip_FWS()
577 1.1 christos */
578 1.1 christos if (p > pend)
579 1.1 christos p = pend;
580 1.1 christos break;
581 1.1 christos }
582 1.1 christos else {
583 1.1 christos copy_skipped_FWS(&q, qend, &p0, p);
584 1.1 christos if (q >= qend)
585 1.1 christos break;
586 1.1 christos goto copy_char;
587 1.1 christos }
588 1.1 christos
589 1.1 christos case '<': /* start of angle-addr, msg-id, or path. */
590 1.1 christos /*
591 1.1 christos * A msg-id cannot contain encoded-pairs or
592 1.1 christos * encoded-words, but angle-addr and path can.
593 1.1 christos * Distinguishing between them seems to be
594 1.1 christos * unnecessary, so let's be loose and just
595 1.1 christos * decode them as if they were all the same.
596 1.1 christos */
597 1.1 christos default:
598 1.1 christos copy_char:
599 1.1 christos lastc = (unsigned char)*p;
600 1.1 christos *q++ = *p++;
601 1.1 christos break;
602 1.1 christos }
603 1.1 christos }
604 1.1 christos copy_skipped_FWS(&q, qend, &p0, p);
605 1.1 christos *q = '\0'; /* null terminate the result! */
606 1.1 christos }
607 1.1 christos
608 1.1 christos /*
609 1.1 christos * Returns the correct hfield decoder, or NULL if none.
610 1.1 christos * Info extracted from RFC 2822.
611 1.5 christos *
612 1.5 christos * name - pointer to field name of header line (with colon).
613 1.1 christos */
614 1.1 christos PUBLIC hfield_decoder_t
615 1.5 christos mime_hfield_decoder(const char *name)
616 1.1 christos {
617 1.1 christos static const struct field_decoder_tbl_s {
618 1.1 christos const char *field_name;
619 1.5 christos size_t field_len;
620 1.1 christos hfield_decoder_t decoder;
621 1.1 christos } field_decoder_tbl[] = {
622 1.5 christos #define X(s) s, sizeof(s) - 1
623 1.5 christos { X("Received:"), NULL },
624 1.5 christos
625 1.5 christos { X("Content-Type:"), NULL },
626 1.5 christos { X("Content-Disposition:"), NULL },
627 1.5 christos { X("Content-Transfer-Encoding:"), NULL },
628 1.5 christos { X("Content-Description:"), mime_decode_sfield },
629 1.5 christos { X("Content-ID:"), mime_decode_sfield },
630 1.5 christos { X("MIME-Version:"), mime_decode_sfield },
631 1.5 christos
632 1.5 christos { X("Bcc:"), mime_decode_sfield },
633 1.5 christos { X("Cc:"), mime_decode_sfield },
634 1.5 christos { X("Date:"), mime_decode_sfield },
635 1.5 christos { X("From:"), mime_decode_sfield },
636 1.5 christos { X("In-Reply-To:"), mime_decode_sfield },
637 1.5 christos { X("Keywords:"), mime_decode_sfield },
638 1.5 christos { X("Message-ID:"), mime_decode_sfield },
639 1.5 christos { X("References:"), mime_decode_sfield },
640 1.5 christos { X("Reply-To:"), mime_decode_sfield },
641 1.5 christos { X("Return-Path:"), mime_decode_sfield },
642 1.5 christos { X("Sender:"), mime_decode_sfield },
643 1.5 christos { X("To:"), mime_decode_sfield },
644 1.5 christos { X("Subject:"), mime_decode_usfield },
645 1.5 christos { X("Comments:"), mime_decode_usfield },
646 1.5 christos { X("X-"), mime_decode_usfield },
647 1.5 christos { NULL, 0, mime_decode_usfield }, /* optional-fields */
648 1.5 christos #undef X
649 1.1 christos };
650 1.1 christos const struct field_decoder_tbl_s *fp;
651 1.1 christos
652 1.1 christos /* XXX - this begs for a hash table! */
653 1.1 christos for (fp = field_decoder_tbl; fp->field_name; fp++)
654 1.5 christos if (strncasecmp(name, fp->field_name, fp->field_len) == 0)
655 1.5 christos break;
656 1.1 christos return fp->decoder;
657 1.1 christos }
658 1.1 christos
659 1.1 christos #endif /* MIME_SUPPORT */
660