Home | History | Annotate | Line # | Download | only in mail
mime_header.c revision 1.5
      1  1.5  christos /*	$NetBSD: mime_header.c,v 1.5 2008/04/24 01:27:07 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*-
      4  1.1  christos  * Copyright (c) 2006 The NetBSD Foundation, Inc.
      5  1.1  christos  * All rights reserved.
      6  1.1  christos  *
      7  1.1  christos  * This code is derived from software contributed to The NetBSD Foundation
      8  1.1  christos  * by Anon Ymous.
      9  1.1  christos  *
     10  1.1  christos  * Redistribution and use in source and binary forms, with or without
     11  1.1  christos  * modification, are permitted provided that the following conditions
     12  1.1  christos  * are met:
     13  1.1  christos  * 1. Redistributions of source code must retain the above copyright
     14  1.1  christos  *    notice, this list of conditions and the following disclaimer.
     15  1.1  christos  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.1  christos  *    notice, this list of conditions and the following disclaimer in the
     17  1.1  christos  *    documentation and/or other materials provided with the distribution.
     18  1.1  christos  * 3. All advertising materials mentioning features or use of this software
     19  1.1  christos  *    must display the following acknowledgement:
     20  1.1  christos  *        This product includes software developed by the NetBSD
     21  1.1  christos  *        Foundation, Inc. and its contributors.
     22  1.1  christos  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  1.1  christos  *    contributors may be used to endorse or promote products derived
     24  1.1  christos  *    from this software without specific prior written permission.
     25  1.1  christos  *
     26  1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  1.1  christos  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  1.1  christos  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  1.1  christos  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  1.1  christos  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  1.1  christos  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  1.1  christos  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  1.1  christos  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  1.1  christos  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  1.1  christos  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  1.1  christos  * POSSIBILITY OF SUCH DAMAGE.
     37  1.1  christos  */
     38  1.1  christos 
     39  1.1  christos 
     40  1.1  christos /*
     41  1.1  christos  * This module contains the core MIME header decoding routines.
     42  1.1  christos  * Please refer to RFC 2047 and RFC 2822.
     43  1.1  christos  */
     44  1.1  christos 
     45  1.1  christos #ifdef MIME_SUPPORT
     46  1.1  christos 
     47  1.1  christos #include <sys/cdefs.h>
     48  1.1  christos #ifndef __lint__
     49  1.5  christos __RCSID("$NetBSD: mime_header.c,v 1.5 2008/04/24 01:27:07 christos Exp $");
     50  1.1  christos #endif /* not __lint__ */
     51  1.1  christos 
     52  1.1  christos #include <stdio.h>
     53  1.1  christos #include <stdlib.h>
     54  1.1  christos #include <string.h>
     55  1.1  christos 
     56  1.1  christos #include "def.h"
     57  1.1  christos #include "extern.h"
     58  1.1  christos #include "mime.h"
     59  1.1  christos #include "mime_header.h"
     60  1.1  christos #include "mime_codecs.h"
     61  1.1  christos 
     62  1.1  christos /*
     63  1.1  christos  * Our interface to mime_b64tobin()
     64  1.1  christos  *
     65  1.1  christos  * XXX - This should move to mime_codecs.c.
     66  1.1  christos  */
     67  1.1  christos static ssize_t
     68  1.1  christos mime_B64_decode(char *outbuf, size_t outlen, const char *inbuf, size_t inlen)
     69  1.1  christos {
     70  1.1  christos 	if (outlen < 3 * roundup(inlen, 4) / 4)
     71  1.1  christos 		return -1;
     72  1.1  christos 
     73  1.1  christos 	return mime_b64tobin(outbuf, inbuf, inlen);
     74  1.1  christos }
     75  1.1  christos 
     76  1.1  christos 
     77  1.1  christos /*
     78  1.1  christos  * Header specific "quoted-printable" decode!
     79  1.1  christos  * Differences with body QP decoding (see rfc 2047, sec 4.2):
     80  1.1  christos  * 1) '=' occurs _only_ when followed by two hex digits (FWS is not allowed).
     81  1.1  christos  * 2) Spaces can be encoded as '_' in headers for readability.
     82  1.1  christos  *
     83  1.1  christos  * XXX - This should move to mime_codecs.c.
     84  1.1  christos  */
     85  1.1  christos static ssize_t
     86  1.1  christos mime_QPh_decode(char *outbuf, size_t outlen, const char *inbuf, size_t inlen)
     87  1.1  christos {
     88  1.1  christos 	const char *p, *inend;
     89  1.1  christos 	char *outend;
     90  1.1  christos 	char *q;
     91  1.1  christos 
     92  1.1  christos 	outend = outbuf + outlen;
     93  1.1  christos 	inend = inbuf + inlen;
     94  1.1  christos 	q = outbuf;
     95  1.1  christos 	for (p = inbuf; p < inend; p++) {
     96  1.1  christos 		if (q >= outend)
     97  1.1  christos 			return -1;
     98  1.1  christos 		if (*p == '=') {
     99  1.1  christos 			p++;
    100  1.1  christos 			if (p + 1 < inend) {
    101  1.1  christos 				int c;
    102  1.1  christos 				char *bufend;
    103  1.1  christos 				char buf[3];
    104  1.1  christos 				buf[0] = *p++;
    105  1.1  christos 				buf[1] = *p;
    106  1.1  christos 				buf[2] = '\0';
    107  1.1  christos 				c = strtol(buf, &bufend, 16);
    108  1.1  christos 				if (bufend != &buf[2])
    109  1.1  christos 					return -1;
    110  1.1  christos 				*q++ = c;
    111  1.1  christos 			}
    112  1.1  christos 			else
    113  1.1  christos 				return -1;
    114  1.1  christos 		}
    115  1.1  christos 		else if (*p == '_')  /* header's may encode ' ' as '_' */
    116  1.1  christos 			*q++ = ' ';
    117  1.1  christos 		else
    118  1.1  christos 			*q++ = *p;
    119  1.1  christos 	}
    120  1.1  christos 	return q - outbuf;
    121  1.1  christos }
    122  1.1  christos 
    123  1.1  christos static const char *
    124  1.1  christos grab_charset(char *from_cs, size_t from_cs_len, const char *p)
    125  1.1  christos {
    126  1.1  christos 	char *q;
    127  1.1  christos 	q = from_cs;
    128  1.1  christos 	for (/*EMPTY*/; *p != '?'; p++) {
    129  1.1  christos 		if (*p == '\0' || q >= from_cs + from_cs_len - 1)
    130  1.1  christos 			return NULL;
    131  1.1  christos 		*q++ = *p;
    132  1.1  christos 	}
    133  1.1  christos 	*q = '\0';
    134  1.1  christos 	return ++p;	/* if here, then we got the '?' */
    135  1.1  christos }
    136  1.1  christos 
    137  1.1  christos /*
    138  1.1  christos  * An encoded word is a string of at most 75 non-white space
    139  1.1  christos  * characters of the following form:
    140  1.1  christos  *
    141  1.1  christos  *  =?charset?X?encoding?=
    142  1.1  christos  *
    143  1.1  christos  * where:
    144  1.1  christos  *   'charset'	is the original character set of the unencoded string.
    145  1.1  christos  *
    146  1.1  christos  *   'X'	is the encoding type 'B' or 'Q' for "base64" or
    147  1.1  christos  *              "quoted-printable", respectively,
    148  1.1  christos  *   'encoding'	is the encoded string.
    149  1.1  christos  *
    150  1.1  christos  * Both 'charset' and 'X' are case independent and 'encoding' cannot
    151  1.1  christos  * contain any whitespace or '?' characters.  The 'encoding' must also
    152  1.1  christos  * be fully contained within the encoded words, i.e., it cannot be
    153  1.1  christos  * split between encoded words.
    154  1.1  christos  *
    155  1.1  christos  * Note: the 'B' encoding is a slightly modified "quoted-printable"
    156  1.1  christos  * encoding.  In particular, spaces (' ') may be encoded as '_' to
    157  1.1  christos  * improve undecoded readability.
    158  1.1  christos  */
    159  1.1  christos static int
    160  1.1  christos decode_word(const char **ibuf, char **obuf, char *oend, const char *to_cs)
    161  1.1  christos {
    162  1.1  christos 	ssize_t declen;
    163  1.1  christos 	size_t enclen, dstlen;
    164  1.1  christos 	char decword[LINESIZE];
    165  1.1  christos 	char from_cs[LINESIZE];
    166  1.1  christos 	const char *encword, *iend, *p;
    167  1.1  christos 	char *dstend;
    168  1.1  christos 	char enctype;
    169  1.1  christos 
    170  1.1  christos 	p = *ibuf;
    171  1.1  christos 	if (p[0] != '=' && p[1] != '?')
    172  1.1  christos 		return -1;
    173  1.1  christos 	if (strlen(p) <  2 + 1 + 3 + 1 + 2)
    174  1.1  christos 		return -1;
    175  1.1  christos 	p = grab_charset(from_cs, sizeof(from_cs), p + 2);
    176  1.1  christos 	if (p == NULL)
    177  1.1  christos 		return -1;
    178  1.1  christos 	enctype = *p++;
    179  1.1  christos 	if (*p++ != '?')
    180  1.1  christos 		return -1;
    181  1.1  christos 	encword = p;
    182  1.1  christos 	p = strchr(p, '?');
    183  1.1  christos 	if (p == NULL || p[1] != '=')
    184  1.1  christos 		return -1;
    185  1.1  christos 	enclen = p - encword;	/* length of encoded substring */
    186  1.1  christos 	iend = p + 2;
    187  1.1  christos 	/* encoded words are at most 75 characters (RFC 2047, sec 2) */
    188  1.1  christos 	if (iend > *ibuf + 75)
    189  1.1  christos 		return -1;
    190  1.1  christos 
    191  1.1  christos 	dstend = to_cs ? decword : *obuf;
    192  1.1  christos 	dstlen = (to_cs ? sizeof(decword): oend - *obuf) - 1;
    193  1.1  christos 
    194  1.1  christos 	if (enctype == 'B' || enctype == 'b')
    195  1.1  christos 		declen = mime_B64_decode(dstend, dstlen, encword, enclen);
    196  1.1  christos 	else if (enctype == 'Q' || enctype == 'q')
    197  1.1  christos 		declen = mime_QPh_decode(dstend, dstlen, encword, enclen);
    198  1.1  christos 	else
    199  1.1  christos 		return -1;
    200  1.1  christos 
    201  1.1  christos 	if (declen == -1)
    202  1.1  christos 		return -1;
    203  1.1  christos 
    204  1.1  christos 	dstend += declen;
    205  1.1  christos #ifdef CHARSET_SUPPORT
    206  1.1  christos 	if (to_cs != NULL) {
    207  1.1  christos 		iconv_t cd;
    208  1.1  christos 		const char *src;
    209  1.1  christos 		size_t srclen;
    210  1.1  christos 		size_t cnt;
    211  1.1  christos 
    212  1.1  christos 		cd = iconv_open(to_cs, from_cs);
    213  1.1  christos 		if (cd == (iconv_t)-1)
    214  1.1  christos 			return -1;
    215  1.1  christos 
    216  1.1  christos 		src = decword;
    217  1.1  christos 		srclen = declen;
    218  1.1  christos 		dstend = *obuf;
    219  1.1  christos 		dstlen = oend - *obuf - 1;
    220  1.1  christos 		cnt = mime_iconv(cd, &src, &srclen, &dstend, &dstlen);
    221  1.4  christos 
    222  1.1  christos 		(void)iconv_close(cd);
    223  1.1  christos 		if (cnt == (size_t)-1)
    224  1.1  christos 			return -1;
    225  1.1  christos 	}
    226  1.1  christos #endif /* CHARSET_SUPPORT */
    227  1.1  christos 	*dstend = '\0';
    228  1.1  christos 	*ibuf = iend;
    229  1.1  christos 	*obuf = dstend;
    230  1.1  christos 	return 0;
    231  1.1  christos }
    232  1.1  christos 
    233  1.1  christos 
    234  1.1  christos /*
    235  1.1  christos  * Folding White Space.  See RFC 2822.
    236  1.4  christos  *
    237  1.4  christos  * Note: RFC 2822 specifies that '\n' and '\r' only occur as CRLF
    238  1.4  christos  * pairs (i.e., "\r\n") and never separately.  However, by the time
    239  1.4  christos  * mail(1) sees the messages, all CRLF pairs have been converted to
    240  1.4  christos  * '\n' characters.
    241  1.4  christos  *
    242  1.4  christos  * XXX - pull is_FWS() and skip_FWS() up to def.h?
    243  1.1  christos  */
    244  1.1  christos static inline int
    245  1.1  christos is_FWS(int c)
    246  1.1  christos {
    247  1.4  christos 	return c == ' ' || c == '\t' || c == '\n';
    248  1.1  christos }
    249  1.1  christos 
    250  1.1  christos static inline const char *
    251  1.1  christos skip_FWS(const char *p)
    252  1.1  christos {
    253  1.4  christos 	while (is_FWS(*p))
    254  1.1  christos 		p++;
    255  1.1  christos 	return p;
    256  1.1  christos }
    257  1.1  christos 
    258  1.1  christos static inline void
    259  1.1  christos copy_skipped_FWS(char **dst, char *dstend, const char **src, const char *srcend)
    260  1.1  christos {
    261  1.1  christos 	const char *p, *pend;
    262  1.1  christos 	char *q, *qend;
    263  1.1  christos 
    264  1.1  christos 	p = *src;
    265  1.1  christos 	q = *dst;
    266  1.1  christos 	pend = srcend;
    267  1.1  christos 	qend = dstend;
    268  1.1  christos 
    269  1.1  christos 	if (p) {  /* copy any skipped linear-white-space */
    270  1.1  christos 		while (p < pend && q < qend)
    271  1.1  christos 			*q++ = *p++;
    272  1.1  christos 		*dst = q;
    273  1.1  christos 		*src = NULL;
    274  1.1  christos 	}
    275  1.1  christos }
    276  1.1  christos 
    277  1.1  christos /*
    278  1.1  christos  * Decode an unstructured field.
    279  1.1  christos  *
    280  1.1  christos  * See RFC 2822 Sec 2.2.1 and 3.6.5.
    281  1.1  christos  * Encoded words may occur anywhere in unstructured fields provided
    282  1.1  christos  * they are separated from any other text or encoded words by at least
    283  1.1  christos  * one linear-white-space character. (See RFC 2047 sec 5.1.)  If two
    284  1.1  christos  * encoded words occur sequentially (separated by only FWS) then the
    285  1.1  christos  * separating FWS is removed.
    286  1.1  christos  *
    287  1.1  christos  * NOTE: unstructured fields cannot contain 'quoted-pairs' (see
    288  1.1  christos  * RFC2822 sec 3.2.6 and RFC 2047), but that is no problem as a '\\'
    289  1.1  christos  * (or any non-whitespace character) immediately before an
    290  1.1  christos  * encoded-word will prevent it from being decoded.
    291  1.1  christos  *
    292  1.1  christos  * hstring should be a NULL terminated string.
    293  1.1  christos  * outbuf should be sufficiently large to hold the result.
    294  1.1  christos  */
    295  1.1  christos static void
    296  1.1  christos mime_decode_usfield(char *outbuf, size_t outsize, const char *hstring)
    297  1.1  christos {
    298  1.1  christos 	const char *p, *p0;
    299  1.1  christos 	char *q, *qend;
    300  1.1  christos 	int lastc;
    301  1.1  christos 	const char *charset;
    302  1.1  christos 
    303  1.1  christos 	charset = value(ENAME_MIME_CHARSET);
    304  1.1  christos 	qend = outbuf + outsize - 1; /* Make sure there is room for the trailing NULL! */
    305  1.1  christos 	q = outbuf;
    306  1.1  christos 	p = hstring;
    307  1.1  christos 	p0 = NULL;
    308  1.1  christos 	lastc = (unsigned char)' ';
    309  1.1  christos 	while (*p && q < qend) {
    310  1.1  christos 		const char *p1;
    311  1.1  christos 		char *q1;
    312  1.1  christos 		if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
    313  1.1  christos 		    decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
    314  1.4  christos 		    (*p1 == '\0' || is_FWS(*p1))) {
    315  1.1  christos 			p0 = p1;  /* pointer to first character after encoded word */
    316  1.1  christos 			q = q1;
    317  1.1  christos 			p = skip_FWS(p1);
    318  1.1  christos 			lastc = (unsigned char)*p0;
    319  1.1  christos 		}
    320  1.1  christos 		else {
    321  1.1  christos 			copy_skipped_FWS(&q, qend, &p0, p);
    322  1.1  christos 			lastc = (unsigned char)*p;
    323  1.1  christos 			if (q < qend)
    324  1.1  christos 				*q++ = *p++;
    325  1.1  christos 		}
    326  1.1  christos 	}
    327  1.1  christos 	copy_skipped_FWS(&q, qend, &p0, p);
    328  1.1  christos 	*q = '\0';
    329  1.1  christos }
    330  1.1  christos 
    331  1.1  christos /*
    332  1.1  christos  * Decode a field comment.
    333  1.1  christos  *
    334  1.1  christos  * Comments only occur in structured fields, can be nested (rfc 2822,
    335  1.1  christos  * sec 3.2.3), and can contain 'encoded-words' and 'quoted-pairs'.
    336  1.1  christos  * Otherwise, they can be regarded as unstructured fields that are
    337  1.1  christos  * bounded by '(' and ')' characters.
    338  1.1  christos  */
    339  1.1  christos static int
    340  1.1  christos decode_comment(char **obuf, char *oend, const char **ibuf, const char *iend, const char *charset)
    341  1.1  christos {
    342  1.1  christos 	const char *p, *pend, *p0;
    343  1.1  christos 	char *q, *qend;
    344  1.1  christos 	int lastc;
    345  1.1  christos 
    346  1.1  christos 	p = *ibuf;
    347  1.1  christos 	q = *obuf;
    348  1.1  christos 	pend = iend;
    349  1.1  christos 	qend = oend;
    350  1.4  christos 	lastc = ' ';
    351  1.1  christos 	p0 = NULL;
    352  1.1  christos 	while (p < pend && q < qend) {
    353  1.1  christos 		const char *p1;
    354  1.1  christos 		char *q1;
    355  1.1  christos 
    356  1.1  christos 		if (is_FWS(lastc) && p[0] == '=' && p[1] == '?' &&
    357  1.1  christos 		    decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
    358  1.4  christos 		    (*p1 == ')' || is_FWS(*p1))) {
    359  1.1  christos 			lastc = (unsigned char)*p1;
    360  1.1  christos 			p0 = p1;
    361  1.1  christos 			q = q1;
    362  1.1  christos 			p = skip_FWS(p1);
    363  1.1  christos 			/*
    364  1.1  christos 			 * XXX - this check should be unnecessary as *pend should
    365  1.1  christos 			 * be '\0' which will stop skip_FWS()
    366  1.1  christos 			 */
    367  1.1  christos 			if (p > pend)
    368  1.1  christos 				p = pend;
    369  1.1  christos 		}
    370  1.1  christos 		else {
    371  1.1  christos 			copy_skipped_FWS(&q, qend, &p0, p);
    372  1.1  christos 			if (q >= qend)	/* XXX - q > qend cannot happen */
    373  1.1  christos 				break;
    374  1.1  christos 
    375  1.1  christos 			if (*p == ')') {
    376  1.1  christos 				*q++ = *p++;	/* copy the closing ')' */
    377  1.1  christos 				break;		/* and get out of here! */
    378  1.1  christos 			}
    379  1.1  christos 
    380  1.1  christos 			if (*p == '(') {
    381  1.1  christos 				*q++ = *p++;	/* copy the opening '(' */
    382  1.1  christos 				if (decode_comment(&q, qend, &p, pend, charset) == -1)
    383  1.1  christos 					return -1;	/* is this right or should we update? */
    384  1.1  christos 				lastc = ')';
    385  1.1  christos 			}
    386  1.1  christos 			else if (*p == '\\' && p + 1 < pend) {	/* quoted-pair */
    387  1.1  christos 				if (p[1] == '(' || p[1] == ')' || p[1] == '\\') /* need quoted-pair*/
    388  1.1  christos 					*q++ = *p;
    389  1.1  christos 				p++;
    390  1.1  christos 				lastc = (unsigned char)*p;
    391  1.1  christos 				if (q < qend)
    392  1.1  christos 					*q++ = *p++;
    393  1.1  christos 			}
    394  1.1  christos 			else {
    395  1.1  christos 				lastc = (unsigned char)*p;
    396  1.1  christos 				*q++ = *p++;
    397  1.1  christos 			}
    398  1.1  christos 		}
    399  1.1  christos 	}
    400  1.1  christos 	*ibuf = p;
    401  1.1  christos 	*obuf = q;
    402  1.1  christos 	return 0;
    403  1.1  christos }
    404  1.1  christos 
    405  1.1  christos /*
    406  1.1  christos  * Decode a quoted-string or no-fold-quote.
    407  1.1  christos  *
    408  1.1  christos  * These cannot contain encoded words.  They can contain quoted-pairs,
    409  1.1  christos  * making '\\' special.  They have no other structure.  See RFC 2822
    410  1.1  christos  * sec 3.2.5 and 3.6.4.
    411  1.1  christos  */
    412  1.1  christos static void
    413  1.1  christos decode_quoted_string(char **obuf, char *oend, const char **ibuf, const char *iend)
    414  1.1  christos {
    415  1.1  christos 	const char *p, *pend;
    416  1.1  christos 	char *q, *qend;
    417  1.1  christos 
    418  1.1  christos 	qend = oend;
    419  1.1  christos 	pend = iend;
    420  1.1  christos 	p = *ibuf;
    421  1.1  christos 	q = *obuf;
    422  1.1  christos 	while (p < pend && q < qend) {
    423  1.1  christos 		if (*p == '"') {
    424  1.1  christos 			*q++ = *p++;	/* copy the closing '"' */
    425  1.1  christos 			break;
    426  1.1  christos 		}
    427  1.1  christos 		if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
    428  1.1  christos 			if (p[1] == '"' || p[1] == '\\') {
    429  1.1  christos 				*q++ = *p;
    430  1.1  christos 				if (q >= qend)
    431  1.1  christos 					break;
    432  1.1  christos 			}
    433  1.1  christos 			p++;
    434  1.1  christos 		}
    435  1.1  christos 		*q++ = *p++;
    436  1.1  christos 	}
    437  1.1  christos 	*ibuf = p;
    438  1.1  christos 	*obuf = q;
    439  1.1  christos }
    440  1.1  christos 
    441  1.1  christos /*
    442  1.1  christos  * Decode a domain-literal or no-fold-literal.
    443  1.1  christos  *
    444  1.1  christos  * These cannot contain encoded words.  They can have quoted pairs and
    445  1.1  christos  * are delimited by '[' and ']' making '\\', '[', and ']' special.
    446  1.1  christos  * They have no other structure.  See RFC 2822 sec 3.4.1 and 3.6.4.
    447  1.1  christos  */
    448  1.1  christos static void
    449  1.1  christos decode_domain_literal(char **obuf, char *oend, const char **ibuf, const char *iend)
    450  1.1  christos {
    451  1.1  christos 	const char *p, *pend;
    452  1.1  christos 	char *q, *qend;
    453  1.1  christos 
    454  1.1  christos 	qend = oend;
    455  1.1  christos 	pend = iend;
    456  1.1  christos 	p = *ibuf;
    457  1.1  christos 	q = *obuf;
    458  1.1  christos 	while (p < pend && q < qend) {
    459  1.1  christos 		if (*p == ']') {
    460  1.1  christos 			*q++ = *p++;	/* copy the closing ']' */
    461  1.1  christos 			break;
    462  1.1  christos 		}
    463  1.1  christos 		if (*p == '\\' && p + 1 < pend) { /* quoted-pair */
    464  1.1  christos 			if (p[1] == '[' || p[1] == ']' || p[1] == '\\') {
    465  1.1  christos 				*q++ = *p;
    466  1.1  christos 				if (q >= qend)
    467  1.1  christos 					break;
    468  1.1  christos 			}
    469  1.1  christos 			p++;
    470  1.1  christos 		}
    471  1.1  christos 		*q++ = *p++;
    472  1.1  christos 	}
    473  1.1  christos 	*ibuf = p;
    474  1.1  christos 	*obuf = q;
    475  1.1  christos }
    476  1.1  christos 
    477  1.1  christos /*
    478  1.1  christos  * Specials: see RFC 2822 sec 3.2.1.
    479  1.1  christos  */
    480  1.1  christos static inline int
    481  1.1  christos is_specials(int c)
    482  1.1  christos {
    483  1.1  christos 	static const char specialtab[] = {
    484  1.1  christos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
    485  1.1  christos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
    486  1.1  christos 		0, 0, 1, 0,  0, 0, 0, 0,  1, 1, 0, 0,  1, 0, 1, 0,
    487  1.1  christos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 1, 1,  1, 0, 1, 0,
    488  1.4  christos 
    489  1.1  christos 		1, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
    490  1.1  christos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 1,  1, 1, 0, 0,
    491  1.1  christos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
    492  1.1  christos 		0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
    493  1.1  christos 	};
    494  1.4  christos 	return !(c & ~0x7f) ? specialtab[c] : 0;
    495  1.1  christos }
    496  1.1  christos 
    497  1.1  christos /*
    498  1.1  christos  * Decode a structured field.
    499  1.1  christos  *
    500  1.1  christos  * At the top level, structured fields can only contain encoded-words
    501  1.1  christos  * via 'phrases' and 'comments'.  See RFC 2047 sec 5.
    502  1.1  christos  */
    503  1.1  christos static void
    504  1.1  christos mime_decode_sfield(char *linebuf, size_t bufsize, const char *hstring)
    505  1.1  christos {
    506  1.1  christos 	const char *p, *pend, *p0;
    507  1.1  christos 	char *q, *qend;
    508  1.1  christos 	const char *charset;
    509  1.1  christos 	int lastc;
    510  1.1  christos 
    511  1.1  christos 	charset = value(ENAME_MIME_CHARSET);
    512  1.1  christos 
    513  1.1  christos 	p = hstring;
    514  1.1  christos 	q = linebuf;
    515  1.1  christos 	pend = hstring + strlen(hstring);
    516  1.1  christos 	qend = linebuf + bufsize - 1;	/* save room for the NULL terminator */
    517  1.1  christos 	lastc = (unsigned char)' ';
    518  1.1  christos 	p0 = NULL;
    519  1.1  christos 	while (p < pend && q < qend) {
    520  1.1  christos 		const char *p1;
    521  1.1  christos 		char *q1;
    522  1.1  christos 
    523  1.1  christos 		if (*p != '=') {
    524  1.1  christos 			copy_skipped_FWS(&q, qend, &p0, p);
    525  1.1  christos 			if (q >= qend)
    526  1.1  christos 				break;
    527  1.1  christos 		}
    528  1.1  christos 
    529  1.1  christos 		switch (*p) {
    530  1.1  christos 		case '(':	/* start of comment */
    531  1.1  christos 			*q++ = *p++;	/* copy the opening '(' */
    532  1.1  christos 			(void)decode_comment(&q, qend, &p, pend, charset);
    533  1.1  christos 			lastc = (unsigned char)p[-1];
    534  1.1  christos 			break;
    535  1.1  christos 
    536  1.1  christos 		case '"':	/* start of quoted-string or no-fold-quote */
    537  1.1  christos 			*q++ = *p++;	/* copy the opening '"' */
    538  1.1  christos 			decode_quoted_string(&q, qend, &p, pend);
    539  1.1  christos 			lastc = (unsigned char)p[-1];
    540  1.1  christos 			break;
    541  1.1  christos 
    542  1.1  christos 		case '[':	/* start of domain-literal or no-fold-literal */
    543  1.1  christos 			*q++ = *p++;	/* copy the opening '[' */
    544  1.1  christos 			decode_domain_literal(&q, qend, &p, pend);
    545  1.1  christos 			lastc = (unsigned char)p[-1];
    546  1.1  christos 			break;
    547  1.1  christos 
    548  1.1  christos 		case '\\':	/* start of quoted-pair */
    549  1.1  christos 			if (p + 1 < pend) {		/* quoted pair */
    550  1.1  christos 				if (is_specials(p[1])) {
    551  1.1  christos 					*q++ = *p;
    552  1.1  christos 					if (q >= qend)
    553  1.1  christos 						break;
    554  1.1  christos 				}
    555  1.1  christos 				p++;	/* skip the '\\' */
    556  1.1  christos 			}
    557  1.1  christos 			goto copy_char;
    558  1.4  christos 
    559  1.1  christos 		case '=':
    560  1.1  christos 			/*
    561  1.1  christos 			 * At this level encoded words can appear via
    562  1.1  christos 			 * 'phrases' (possibly delimited by ',' as in
    563  1.1  christos 			 * 'keywords').  Thus we handle them as such.
    564  1.1  christos 			 * Hopefully this is sufficient.
    565  1.1  christos 			 */
    566  1.1  christos 			if ((lastc == ',' || is_FWS(lastc)) && p[1] == '?' &&
    567  1.1  christos 			    decode_word((p1 = p, &p1), (q1 = q, &q1), qend, charset) == 0 &&
    568  1.4  christos 			    (*p1 == '\0' || *p1 == ',' || is_FWS(*p1))) {
    569  1.1  christos 				lastc = (unsigned char)*p1;
    570  1.1  christos 				p0 = p1;
    571  1.1  christos 				q = q1;
    572  1.1  christos 				p = skip_FWS(p1);
    573  1.1  christos 				/*
    574  1.1  christos 				 * XXX - this check should be
    575  1.1  christos 				 * unnecessary as *pend should be '\0'
    576  1.1  christos 				 * which will stop skip_FWS()
    577  1.1  christos 				 */
    578  1.1  christos 				if (p > pend)
    579  1.1  christos 					p = pend;
    580  1.1  christos 				break;
    581  1.1  christos 			}
    582  1.1  christos 			else {
    583  1.1  christos 				copy_skipped_FWS(&q, qend, &p0, p);
    584  1.1  christos 				if (q >= qend)
    585  1.1  christos 					break;
    586  1.1  christos 				goto copy_char;
    587  1.1  christos 			}
    588  1.1  christos 
    589  1.1  christos 		case '<':	/* start of angle-addr, msg-id, or path. */
    590  1.1  christos 			/*
    591  1.1  christos 			 * A msg-id cannot contain encoded-pairs or
    592  1.1  christos 			 * encoded-words, but angle-addr and path can.
    593  1.1  christos 			 * Distinguishing between them seems to be
    594  1.1  christos 			 * unnecessary, so let's be loose and just
    595  1.1  christos 			 * decode them as if they were all the same.
    596  1.1  christos 			 */
    597  1.1  christos 		default:
    598  1.1  christos 	copy_char:
    599  1.1  christos 			lastc = (unsigned char)*p;
    600  1.1  christos 			*q++ = *p++;
    601  1.1  christos 			break;
    602  1.1  christos 		}
    603  1.1  christos 	}
    604  1.1  christos 	copy_skipped_FWS(&q, qend, &p0, p);
    605  1.1  christos 	*q = '\0';	/* null terminate the result! */
    606  1.1  christos }
    607  1.1  christos 
    608  1.1  christos /*
    609  1.1  christos  * Returns the correct hfield decoder, or NULL if none.
    610  1.1  christos  * Info extracted from RFC 2822.
    611  1.5  christos  *
    612  1.5  christos  * name - pointer to field name of header line (with colon).
    613  1.1  christos  */
    614  1.1  christos PUBLIC hfield_decoder_t
    615  1.5  christos mime_hfield_decoder(const char *name)
    616  1.1  christos {
    617  1.1  christos 	static const struct field_decoder_tbl_s {
    618  1.1  christos 		const char *field_name;
    619  1.5  christos 		size_t field_len;
    620  1.1  christos 		hfield_decoder_t decoder;
    621  1.1  christos 	} field_decoder_tbl[] = {
    622  1.5  christos #define X(s)	s, sizeof(s) - 1
    623  1.5  christos 		{ X("Received:"),			NULL },
    624  1.5  christos 
    625  1.5  christos 		{ X("Content-Type:"),			NULL },
    626  1.5  christos 		{ X("Content-Disposition:"),		NULL },
    627  1.5  christos 		{ X("Content-Transfer-Encoding:"),	NULL },
    628  1.5  christos 		{ X("Content-Description:"),		mime_decode_sfield },
    629  1.5  christos 		{ X("Content-ID:"),			mime_decode_sfield },
    630  1.5  christos 		{ X("MIME-Version:"),			mime_decode_sfield },
    631  1.5  christos 
    632  1.5  christos 		{ X("Bcc:"),				mime_decode_sfield },
    633  1.5  christos 		{ X("Cc:"),				mime_decode_sfield },
    634  1.5  christos 		{ X("Date:"),				mime_decode_sfield },
    635  1.5  christos 		{ X("From:"),				mime_decode_sfield },
    636  1.5  christos 		{ X("In-Reply-To:"),			mime_decode_sfield },
    637  1.5  christos 		{ X("Keywords:"),			mime_decode_sfield },
    638  1.5  christos 		{ X("Message-ID:"),			mime_decode_sfield },
    639  1.5  christos 		{ X("References:"),			mime_decode_sfield },
    640  1.5  christos 		{ X("Reply-To:"),			mime_decode_sfield },
    641  1.5  christos 		{ X("Return-Path:"),			mime_decode_sfield },
    642  1.5  christos 		{ X("Sender:"),				mime_decode_sfield },
    643  1.5  christos 		{ X("To:"),				mime_decode_sfield },
    644  1.5  christos 		{ X("Subject:"),			mime_decode_usfield },
    645  1.5  christos 		{ X("Comments:"),			mime_decode_usfield },
    646  1.5  christos 		{ X("X-"),				mime_decode_usfield },
    647  1.5  christos 		{ NULL, 0,				mime_decode_usfield },	/* optional-fields */
    648  1.5  christos #undef X
    649  1.1  christos 	};
    650  1.1  christos 	const struct field_decoder_tbl_s *fp;
    651  1.1  christos 
    652  1.1  christos 	/* XXX - this begs for a hash table! */
    653  1.1  christos 	for (fp = field_decoder_tbl; fp->field_name; fp++)
    654  1.5  christos 		if (strncasecmp(name, fp->field_name, fp->field_len) == 0)
    655  1.5  christos 			break;
    656  1.1  christos 	return fp->decoder;
    657  1.1  christos }
    658  1.1  christos 
    659  1.1  christos #endif /* MIME_SUPPORT */
    660