Home | History | Annotate | Line # | Download | only in global
      1 /*	$NetBSD: mime_state.c,v 1.4 2026/05/09 18:49:16 christos Exp $	*/
      2 
      3 /*++
      4 /* NAME
      5 /*	mime_state 3
      6 /* SUMMARY
      7 /*	MIME parser state machine
      8 /* SYNOPSIS
      9 /*	#include <mime_state.h>
     10 /*
     11 /*	MIME_STATE *mime_state_alloc(flags, head_out, head_end,
     12 /*					 body_out, body_end,
     13 /*					 err_print, context)
     14 /*	int	flags;
     15 /*	void	(*head_out)(void *ptr, int header_class,
     16 /*				const HEADER_OPTS *header_info,
     17 /*				VSTRING *buf, off_t offset);
     18 /*	void	(*head_end)(void *ptr);
     19 /*	void	(*body_out)(void *ptr, int rec_type,
     20 /*				const char *buf, ssize_t len,
     21 /*				off_t offset);
     22 /*	void	(*body_end)(void *ptr);
     23 /*	void	(*err_print)(void *ptr, int err_flag, const char *text)
     24 /*	void	*context;
     25 /*
     26 /*	int	mime_state_update(state, rec_type, buf, len)
     27 /*	MIME_STATE *state;
     28 /*	int	rec_type;
     29 /*	const char *buf;
     30 /*	ssize_t	len;
     31 /*
     32 /*	int	mime_state_status(state)
     33 /*	MIME_STATE *state;
     34 /*
     35 /*	MIME_STATE *mime_state_free(state)
     36 /*	MIME_STATE *state;
     37 /*
     38 /*	const char *mime_state_error(error_code)
     39 /*	int	error_code;
     40 /*
     41 /*	typedef struct {
     42 /* .in +4
     43 /*		const int code;		/* internal error code */
     44 /*		const char *dsn;	/* RFC 3463 */
     45 /*		const char *text;	/* descriptive text */
     46 /* .in -4
     47 /*	} MIME_STATE_DETAIL;
     48 /*
     49 /*	const MIME_STATE_DETAIL *mime_state_detail(error_code)
     50 /*	int	error_code;
     51 /* DESCRIPTION
     52 /*	This module implements a one-pass MIME processor with optional
     53 /*	8-bit to quoted-printable conversion.
     54 /*
     55 /*	In order to fend off denial of service attacks, message headers
     56 /*	are truncated at or above var_header_limit bytes, message boundary
     57 /*	strings are truncated at var_mime_bound_len bytes, and the multipart
     58 /*	nesting level is limited to var_mime_maxdepth levels.
     59 /*
     60 /*	mime_state_alloc() creates a MIME state machine. The machine
     61 /*	is delivered in its initial state, expecting content type
     62 /*	text/plain, 7-bit data.
     63 /*
     64 /*	mime_state_update() updates the MIME state machine according
     65 /*	to the input record type and the record content.
     66 /*	The result value is the bit-wise OR of zero or more of the following:
     67 /* .IP MIME_ERR_TRUNC_HEADER
     68 /*	A message header was longer than var_header_limit bytes.
     69 /* .IP MIME_ERR_NESTING
     70 /*	The MIME structure was nested more than var_mime_maxdepth levels.
     71 /* .IP MIME_ERR_8BIT_IN_HEADER
     72 /*	A message header contains 8-bit data. This is always illegal.
     73 /* .IP MIME_ERR_8BIT_IN_7BIT_BODY
     74 /*	A MIME header specifies (or defaults to) 7-bit content, but the
     75 /*	corresponding message body or body parts contain 8-bit content.
     76 /* .IP MIME_ERR_ENCODING_DOMAIN
     77 /*	An entity of type "message" or "multipart" specifies the wrong
     78 /*	content transfer encoding domain, or specifies a transformation
     79 /*	(quoted-printable, base64) instead of a domain (7bit, 8bit,
     80 /*	or binary).
     81 /* .IP MIME_ERR_NON_EMPTY_EOH
     82 /*	The primary message header was terminated with a non-empty line.
     83 /* .PP
     84 /*	mime_state_status() reports the same result as
     85 /*	mime_state_update(), but without changing state.
     86 /*
     87 /*	mime_state_free() releases storage for a MIME state machine,
     88 /*	and conveniently returns a null pointer.
     89 /*
     90 /*	mime_state_error() returns a string representation for the
     91 /*	specified error code. When multiple errors are specified it
     92 /*	reports what it deems the most serious one.
     93 /*
     94 /*	mime_state_detail() returns a table entry with error
     95 /*	information for the specified error code. When multiple
     96 /*	errors are specified it reports what it deems the most
     97 /*	serious one.
     98 /*
     99 /*	Arguments:
    100 /* .IP body_out
    101 /*	The output routine for body lines. It receives unmodified input
    102 /*	records, or the result of 8-bit -> 7-bit conversion.
    103 /* .IP body_end
    104 /*	A null pointer, or a pointer to a routine that is called after
    105 /*	the last input record is processed.
    106 /* .IP buf
    107 /*	Buffer with the content of a logical or physical message record.
    108 /* .IP context
    109 /*	Caller context that is passed on to the head_out and body_out
    110 /*	routines.
    111 /* .IP enc_type
    112 /*	The content encoding: MIME_ENC_7BIT or MIME_ENC_8BIT.
    113 /* .IP err_print
    114 /*	Null pointer, or pointer to a function that is called with
    115 /*	arguments: the application context, the error type, and the
    116 /*	offending input. Only one instance per error type is reported.
    117 /* .IP flags
    118 /*	Special processing options. Specify the bit-wise OR of zero or
    119 /*	more of the following:
    120 /* .RS
    121 /* .IP MIME_OPT_DISABLE_MIME
    122 /*	Pay no attention to Content-* message headers, and switch to
    123 /*	message body state at the end of the primary message headers.
    124 /* .IP MIME_OPT_REPORT_TRUNC_HEADER
    125 /*	Report errors that set the MIME_ERR_TRUNC_HEADER error flag
    126 /*	(see above).
    127 /* .IP MIME_OPT_REPORT_8BIT_IN_HEADER
    128 /*	Report errors that set the MIME_ERR_8BIT_IN_HEADER error
    129 /*	flag (see above). This rarely stops legitimate mail.
    130 /* .IP MIME_OPT_REPORT_8BIT_IN_7BIT_BODY
    131 /*	Report errors that set the MIME_ERR_8BIT_IN_7BIT_BODY error
    132 /*	flag (see above). This currently breaks Majordomo mail that is
    133 /*	forwarded for approval, because Majordomo does not propagate
    134 /*	MIME type information from the enclosed message to the message
    135 /*	headers of the request for approval.
    136 /* .IP MIME_OPT_REPORT_ENCODING_DOMAIN
    137 /*	Report errors that set the MIME_ERR_ENCODING_DOMAIN error
    138 /*	flag (see above).
    139 /* .IP MIME_OPT_REPORT_NESTING
    140 /*	Report errors that set the MIME_ERR_NESTING error flag
    141 /*	(see above).
    142 /* .IP MIME_OPT_REPORT_NON_EMPTY_EOH
    143 /*	Report errors that terminate the primary message header with a
    144 /*	non-empty line.
    145 /* .IP MIME_OPT_DOWNGRADE
    146 /*	Transform content that claims to be 8-bit into quoted-printable.
    147 /*	Where appropriate, update Content-Transfer-Encoding: message
    148 /*	headers.
    149 /* .RE
    150 /* .sp
    151 /*	For convenience, MIME_OPT_NONE requests no special processing.
    152 /* .IP header_class
    153 /*	Specifies where a message header is located.
    154 /* .RS
    155 /* .IP MIME_HDR_PRIMARY
    156 /*	In the primary message header section.
    157 /* .IP MIME_HDR_MULTIPART
    158 /*	In the header section after a multipart boundary string.
    159 /* .IP MIME_HDR_NESTED
    160 /*	At the start of a nested (e.g., message/rfc822) message.
    161 /* .RE
    162 /* .sp
    163 /*	For convenience, the macros MIME_HDR_FIRST and MIME_HDR_LAST
    164 /*	specify the range of MIME_HDR_MUMBLE macros.
    165 /* .sp
    166 /*	To find out if something is a MIME header at the beginning
    167 /*	of an RFC 822 message or an attached message, look at the
    168 /*	header_info argument.
    169 /* .IP header_info
    170 /*	Null pointer or information about the message header, see
    171 /*	header_opts(3).
    172 /* .IP head_out
    173 /*	The output routine that is invoked for outputting a message header.
    174 /*	A multi-line header is passed as one chunk of text with embedded
    175 /*	newlines.
    176 /*	It is the responsibility of the output routine to break the text
    177 /*	at embedded newlines, and to break up long text between newlines
    178 /*	into multiple output records.
    179 /*	Note: an output routine is explicitly allowed to modify the text.
    180 /* .IP head_end
    181 /*	A null pointer, or a pointer to a routine that is called after
    182 /*	the last message header in the first header block is processed.
    183 /* .IP len
    184 /*	Length of non-VSTRING input buffer.
    185 /* .IP offset
    186 /*	The offset in bytes from the start of the current block of message
    187 /*	headers or body lines. Line boundaries are counted as one byte.
    188 /* .IP rec_type
    189 /*	The input record type as defined in rec_type(3h). State is
    190 /*	updated for text records (REC_TYPE_NORM or REC_TYPE_CONT).
    191 /*	Some input records are stored internally in order to reconstruct
    192 /*	multi-line input.  Upon receipt of any non-text record type, all
    193 /*	stored input is flushed and the state is set to "body".
    194 /* .IP state
    195 /*	MIME parser state created with mime_state_alloc().
    196 /* BUGS
    197 /*	NOTE: when the end of headers is reached, mime_state_update()
    198 /*	may execute up to three call-backs before returning to the
    199 /*	caller: head_out(), head_end(), and body_out() or body_end().
    200 /*	As long as call-backs return no result, it is up to the
    201 /*	call-back routines to check if a previous call-back experienced
    202 /*	an error.
    203 /*
    204 /*	Different mail user agents treat malformed message boundary
    205 /*	strings in different ways. The Postfix MIME processor cannot
    206 /*	be bug-compatible with everything.
    207 /*
    208 /*	This module will not glue together multipart boundary strings that
    209 /*	span multiple input records.
    210 /*
    211 /*	This module will not glue together RFC 2231 formatted (boundary)
    212 /*	parameter values. RFC 2231 claims compatibility with existing
    213 /*	MIME processors. Splitting boundary strings is not backwards
    214 /*	compatible.
    215 /*
    216 /*	The "8-bit data inside 7-bit body" test is myopic. It is not aware
    217 /*	of any enclosing (message or multipart) encoding information.
    218 /*
    219 /*	If the input ends in data other than a hard line break, this module
    220 /*	will add a hard line break of its own. No line break is added to
    221 /*	empty input.
    222 /*
    223 /*	This code recognizes the obsolete form "headername :" but will
    224 /*	normalize it to the canonical form "headername:". Leaving the
    225 /*	obsolete form alone would cause too much trouble with existing code
    226 /*	that expects only the normalized form.
    227 /* SEE ALSO
    228 /*	msg(3) diagnostics interface
    229 /*	header_opts(3) header information lookup
    230 /*	RFC 822 (ARPA Internet Text Messages)
    231 /*	RFC 2045 (MIME: Format of internet message bodies)
    232 /*	RFC 2046 (MIME: Media types)
    233 /* DIAGNOSTICS
    234 /*	Fatal errors: memory allocation problem.
    235 /* LICENSE
    236 /* .ad
    237 /* .fi
    238 /*	The Secure Mailer license must be distributed with this software.
    239 /* HISTORY
    240 /* .ad
    241 /* .fi
    242 /*	This code was implemented from scratch after reading the RFC
    243 /*	documents. This was a relatively straightforward effort with
    244 /*	few if any surprises. Victor Duchovni of Morgan Stanley shared
    245 /*	his experiences with ambiguities in real-life MIME implementations.
    246 /*	Liviu Daia of the Romanian Academy shared his insights in some
    247 /*	of the darker corners.
    248 /* AUTHOR(S)
    249 /*	Wietse Venema
    250 /*	IBM T.J. Watson Research
    251 /*	P.O. Box 704
    252 /*	Yorktown Heights, NY 10598, USA
    253 /*
    254 /*	Wietse Venema
    255 /*	Google, Inc.
    256 /*	111 8th Avenue
    257 /*	New York, NY 10011, USA
    258 /*--*/
    259 
    260 /* System library. */
    261 
    262 #include <sys_defs.h>
    263 #include <stdarg.h>
    264 #include <ctype.h>
    265 #include <string.h>
    266 
    267 #ifdef STRCASECMP_IN_STRINGS_H
    268 #include <strings.h>
    269 #endif
    270 
    271 /* Utility library. */
    272 
    273 #include <mymalloc.h>
    274 #include <msg.h>
    275 #include <vstring.h>
    276 
    277 /* Global library. */
    278 
    279 #include <rec_type.h>
    280 #include <is_header.h>
    281 #include <header_opts.h>
    282 #include <mail_params.h>
    283 #include <header_token.h>
    284 #include <lex_822.h>
    285 #include <mime_state.h>
    286 
    287 /* Application-specific. */
    288 
    289  /*
    290   * Mime parser stack element for multipart content.
    291   */
    292 typedef struct MIME_STACK {
    293     int     def_ctype;			/* default content type */
    294     int     def_stype;			/* default content subtype */
    295     char   *boundary;			/* boundary string */
    296     ssize_t bound_len;			/* boundary length */
    297     struct MIME_STACK *next;		/* linkage */
    298 } MIME_STACK;
    299 
    300  /*
    301   * Mime parser state.
    302   */
    303 #define MIME_MAX_TOKEN		3	/* tokens per attribute */
    304 
    305 struct MIME_STATE {
    306 
    307     /*
    308      * Volatile members.
    309      */
    310     int     curr_state;			/* header/body state */
    311     int     curr_ctype;			/* last or default content type */
    312     int     curr_stype;			/* last or default content subtype */
    313     int     curr_encoding;		/* last or default content encoding */
    314     int     curr_domain;		/* last or default encoding unit */
    315     VSTRING *output_buffer;		/* headers, quoted-printable body */
    316     int     prev_rec_type;		/* previous input record type */
    317     int     nesting_level;		/* safety */
    318     MIME_STACK *stack;			/* for composite types */
    319     HEADER_TOKEN token[MIME_MAX_TOKEN];	/* header token array */
    320     VSTRING *token_buffer;		/* header parser scratch buffer */
    321     int     err_flags;			/* processing errors */
    322     off_t   head_offset;		/* offset in header block */
    323     off_t   body_offset;		/* offset in body block */
    324 
    325     /*
    326      * Static members.
    327      */
    328     int     static_flags;		/* static processing options */
    329     MIME_STATE_HEAD_OUT head_out;	/* header output routine */
    330     MIME_STATE_ANY_END head_end;	/* end of primary header routine */
    331     MIME_STATE_BODY_OUT body_out;	/* body output routine */
    332     MIME_STATE_ANY_END body_end;	/* end of body output routine */
    333     MIME_STATE_ERR_PRINT err_print;	/* error report */
    334     void   *app_context;		/* application context */
    335 };
    336 
    337  /*
    338   * Content types and subtypes that we care about, either because we have to,
    339   * or because we want to filter out broken MIME messages.
    340   */
    341 #define MIME_CTYPE_OTHER	0
    342 #define MIME_CTYPE_TEXT		1
    343 #define MIME_CTYPE_MESSAGE	2
    344 #define MIME_CTYPE_MULTIPART	3
    345 
    346 #define MIME_STYPE_OTHER	0
    347 #define MIME_STYPE_PLAIN	1
    348 #define MIME_STYPE_RFC822	2
    349 #define MIME_STYPE_PARTIAL	3
    350 #define MIME_STYPE_EXTERN_BODY	4
    351 #define MIME_STYPE_GLOBAL	5
    352 
    353  /*
    354   * MIME parser states. We steal from the public interface.
    355   */
    356 #define MIME_STATE_PRIMARY	MIME_HDR_PRIMARY	/* primary headers */
    357 #define MIME_STATE_MULTIPART	MIME_HDR_MULTIPART	/* after --boundary */
    358 #define MIME_STATE_NESTED	MIME_HDR_NESTED	/* message/rfc822 */
    359 #define MIME_STATE_BODY		(MIME_HDR_NESTED + 1)
    360 
    361 #define SET_MIME_STATE(ptr, state, ctype, stype, encoding, domain) do { \
    362 	(ptr)->curr_state = (state); \
    363 	(ptr)->curr_ctype = (ctype); \
    364 	(ptr)->curr_stype = (stype); \
    365 	(ptr)->curr_encoding = (encoding); \
    366 	(ptr)->curr_domain = (domain); \
    367 	if ((state) == MIME_STATE_BODY) \
    368 	    (ptr)->body_offset = 0; \
    369 	else \
    370 	    (ptr)->head_offset = 0; \
    371     } while (0)
    372 
    373 #define SET_CURR_STATE(ptr, state) do { \
    374 	(ptr)->curr_state = (state); \
    375 	if ((state) == MIME_STATE_BODY) \
    376 	    (ptr)->body_offset = 0; \
    377 	else \
    378 	    (ptr)->head_offset = 0; \
    379     } while (0)
    380 
    381  /*
    382   * MIME encodings and domains. We intentionally use the same codes for
    383   * encodings and domains, so that we can easily find out whether a content
    384   * transfer encoding header specifies a domain or whether it specifies
    385   * domain+encoding, which is illegal for multipart/any and message/any.
    386   */
    387 typedef struct MIME_ENCODING {
    388     const char *name;			/* external representation */
    389     int     encoding;			/* internal representation */
    390     int     domain;			/* subset of encoding */
    391 } MIME_ENCODING;
    392 
    393 #define MIME_ENC_QP		1	/* encoding + domain */
    394 #define MIME_ENC_BASE64		2	/* encoding + domain */
    395  /* These are defined in mime_state.h as part of the external interface. */
    396 #ifndef MIME_ENC_7BIT
    397 #define MIME_ENC_7BIT		7	/* domain only */
    398 #define MIME_ENC_8BIT		8	/* domain only */
    399 #define MIME_ENC_BINARY		9	/* domain only */
    400 #endif
    401 
    402 static const MIME_ENCODING mime_encoding_map[] = {	/* RFC 2045 */
    403     "7bit", MIME_ENC_7BIT, MIME_ENC_7BIT,	/* domain */
    404     "8bit", MIME_ENC_8BIT, MIME_ENC_8BIT,	/* domain */
    405     "binary", MIME_ENC_BINARY, MIME_ENC_BINARY,	/* domain */
    406     "base64", MIME_ENC_BASE64, MIME_ENC_7BIT,	/* encoding */
    407     "quoted-printable", MIME_ENC_QP, MIME_ENC_7BIT,	/* encoding */
    408     0,
    409 };
    410 
    411  /*
    412   * Silly Little Macros.
    413   */
    414 #define STR(x)		vstring_str(x)
    415 #define LEN(x)		VSTRING_LEN(x)
    416 #define END(x)		vstring_end(x)
    417 #define CU_CHAR_PTR(x)	((const unsigned char *) (x))
    418 
    419 #define REPORT_ERROR_LEN(state, err_type, text, len) do { \
    420 	if ((state->err_flags & err_type) == 0) { \
    421 	    if (state->err_print != 0) \
    422 		state->err_print(state->app_context, err_type, text, len); \
    423 	    state->err_flags |= err_type; \
    424 	} \
    425     } while (0)
    426 
    427 #define REPORT_ERROR(state, err_type, text) do { \
    428 	const char *_text = text; \
    429 	ssize_t _len = strlen(text); \
    430 	REPORT_ERROR_LEN(state, err_type, _text, _len); \
    431     } while (0)
    432 
    433 #define REPORT_ERROR_BUF(state, err_type, buf) \
    434     REPORT_ERROR_LEN(state, err_type, STR(buf), LEN(buf))
    435 
    436 
    437  /*
    438   * Outputs and state changes are interleaved, so we must maintain separate
    439   * offsets for header and body segments.
    440   */
    441 #define HEAD_OUT(ptr, info, len) do { \
    442 	if ((ptr)->head_out) { \
    443 	    (ptr)->head_out((ptr)->app_context, (ptr)->curr_state, \
    444 			    (info), (ptr)->output_buffer, (ptr)->head_offset); \
    445 	    (ptr)->head_offset += (len) + 1; \
    446 	} \
    447     } while(0)
    448 
    449 #define BODY_OUT(ptr, rec_type, text, len) do { \
    450 	if ((ptr)->body_out) { \
    451 	    (ptr)->body_out((ptr)->app_context, (rec_type), \
    452 			    (text), (len), (ptr)->body_offset); \
    453 	    (ptr)->body_offset += (len) + 1; \
    454 	} \
    455     } while(0)
    456 
    457 /* mime_state_push - push boundary onto stack */
    458 
    459 static void mime_state_push(MIME_STATE *state, int def_ctype, int def_stype,
    460 			            const char *boundary)
    461 {
    462     MIME_STACK *stack;
    463 
    464     /*
    465      * RFC 2046 mandates that a boundary string be up to 70 characters long.
    466      * Some MTAs, including Postfix, include the fully-qualified MTA name
    467      * which can be longer, so we are willing to handle boundary strings that
    468      * exceed the RFC specification. We allow for message headers of up to
    469      * var_header_limit characters. In order to avoid denial of service, we
    470      * have to impose a configurable limit on the amount of text that we are
    471      * willing to store as a boundary string. Despite this truncation way we
    472      * will still correctly detect all intermediate boundaries and all the
    473      * message headers that follow those boundaries.
    474      */
    475     state->nesting_level += 1;
    476     stack = (MIME_STACK *) mymalloc(sizeof(*stack));
    477     stack->def_ctype = def_ctype;
    478     stack->def_stype = def_stype;
    479     if ((stack->bound_len = strlen(boundary)) > var_mime_bound_len)
    480 	stack->bound_len = var_mime_bound_len;
    481     stack->boundary = mystrndup(boundary, stack->bound_len);
    482     stack->next = state->stack;
    483     state->stack = stack;
    484     if (msg_verbose)
    485 	msg_info("PUSH boundary %s", stack->boundary);
    486 }
    487 
    488 /* mime_state_pop - pop boundary from stack */
    489 
    490 static void mime_state_pop(MIME_STATE *state)
    491 {
    492     MIME_STACK *stack;
    493 
    494     if ((stack = state->stack) == 0)
    495 	msg_panic("mime_state_pop: there is no stack");
    496     if (msg_verbose)
    497 	msg_info("POP boundary %s", stack->boundary);
    498     state->nesting_level -= 1;
    499     state->stack = stack->next;
    500     myfree(stack->boundary);
    501     myfree((void *) stack);
    502 }
    503 
    504 /* mime_state_alloc - create MIME state machine */
    505 
    506 MIME_STATE *mime_state_alloc(int flags,
    507 			             MIME_STATE_HEAD_OUT head_out,
    508 			             MIME_STATE_ANY_END head_end,
    509 			             MIME_STATE_BODY_OUT body_out,
    510 			             MIME_STATE_ANY_END body_end,
    511 			             MIME_STATE_ERR_PRINT err_print,
    512 			             void *context)
    513 {
    514     MIME_STATE *state;
    515 
    516     state = (MIME_STATE *) mymalloc(sizeof(*state));
    517 
    518     /* Volatile members. */
    519     state->err_flags = 0;
    520     state->body_offset = 0;			/* XXX */
    521     SET_MIME_STATE(state, MIME_STATE_PRIMARY,
    522 		   MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
    523 		   MIME_ENC_7BIT, MIME_ENC_7BIT);
    524     state->output_buffer = vstring_alloc(100);
    525     state->prev_rec_type = 0;
    526     state->stack = 0;
    527     state->token_buffer = vstring_alloc(1);
    528     state->nesting_level = -1;			/* BC Fix 20170512 */
    529 
    530     /* Static members. */
    531     state->static_flags = flags;
    532     state->head_out = head_out;
    533     state->head_end = head_end;
    534     state->body_out = body_out;
    535     state->body_end = body_end;
    536     state->err_print = err_print;
    537     state->app_context = context;
    538     return (state);
    539 }
    540 
    541 /* mime_state_free - destroy MIME state machine */
    542 
    543 MIME_STATE *mime_state_free(MIME_STATE *state)
    544 {
    545     vstring_free(state->output_buffer);
    546     while (state->stack)
    547 	mime_state_pop(state);
    548     if (state->token_buffer)
    549 	vstring_free(state->token_buffer);
    550     myfree((void *) state);
    551     return (0);
    552 }
    553 
    554 /* mime_state_content_type - process content-type header */
    555 
    556 static void mime_state_content_type(MIME_STATE *state,
    557 				            const HEADER_OPTS *header_info)
    558 {
    559     const char *cp;
    560     ssize_t tok_count;
    561     int     def_ctype;
    562     int     def_stype;
    563 
    564 #define TOKEN_MATCH(tok, text) \
    565     ((tok).type == HEADER_TOK_TOKEN && strcasecmp((tok).u.value, (text)) == 0)
    566 
    567 #define RFC2045_TSPECIALS	"()<>@,;:\\\"/[]?="
    568 
    569 #define PARSE_CONTENT_TYPE_HEADER(state, ptr) \
    570     header_token(state->token, MIME_MAX_TOKEN, \
    571 	state->token_buffer, ptr, RFC2045_TSPECIALS, ';')
    572 
    573     cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
    574     if ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) > 0) {
    575 
    576 	/*
    577 	 * text/whatever. Right now we don't really care if it is plain or
    578 	 * not, but we may want to recognize subtypes later, and then this
    579 	 * code can serve as an example.
    580 	 */
    581 	if (TOKEN_MATCH(state->token[0], "text")) {
    582 	    state->curr_ctype = MIME_CTYPE_TEXT;
    583 	    if (tok_count >= 3
    584 		&& state->token[1].type == '/'
    585 		&& TOKEN_MATCH(state->token[2], "plain"))
    586 		state->curr_stype = MIME_STYPE_PLAIN;
    587 	    else
    588 		state->curr_stype = MIME_STYPE_OTHER;
    589 	    return;
    590 	}
    591 
    592 	/*
    593 	 * message/whatever body parts start with another block of message
    594 	 * headers that we may want to look at. The partial and external-body
    595 	 * subtypes cannot be subjected to 8-bit -> 7-bit conversion, so we
    596 	 * must properly recognize them.
    597 	 */
    598 	if (TOKEN_MATCH(state->token[0], "message")) {
    599 	    state->curr_ctype = MIME_CTYPE_MESSAGE;
    600 	    state->curr_stype = MIME_STYPE_OTHER;
    601 	    if (tok_count >= 3
    602 		&& state->token[1].type == '/') {
    603 		if (TOKEN_MATCH(state->token[2], "rfc822"))
    604 		    state->curr_stype = MIME_STYPE_RFC822;
    605 		else if (TOKEN_MATCH(state->token[2], "partial"))
    606 		    state->curr_stype = MIME_STYPE_PARTIAL;
    607 		else if (TOKEN_MATCH(state->token[2], "external-body"))
    608 		    state->curr_stype = MIME_STYPE_EXTERN_BODY;
    609 		else if (TOKEN_MATCH(state->token[2], "global"))
    610 		    state->curr_stype = MIME_STYPE_GLOBAL;
    611 	    }
    612 	    return;
    613 	}
    614 
    615 	/*
    616 	 * multipart/digest has default content type message/rfc822,
    617 	 * multipart/whatever has default content type text/plain.
    618 	 */
    619 	if (TOKEN_MATCH(state->token[0], "multipart")) {
    620 	    state->curr_ctype = MIME_CTYPE_MULTIPART;
    621 	    if (tok_count >= 3
    622 		&& state->token[1].type == '/'
    623 		&& TOKEN_MATCH(state->token[2], "digest")) {
    624 		def_ctype = MIME_CTYPE_MESSAGE;
    625 		def_stype = MIME_STYPE_RFC822;
    626 	    } else {
    627 		def_ctype = MIME_CTYPE_TEXT;
    628 		def_stype = MIME_STYPE_PLAIN;
    629 	    }
    630 
    631 	    /*
    632 	     * Yes, this is supposed to capture multiple boundary strings,
    633 	     * which are illegal and which could be used to hide content in
    634 	     * an implementation dependent manner. The code below allows us
    635 	     * to find embedded message headers as long as the sender uses
    636 	     * only one of these same-level boundary strings.
    637 	     *
    638 	     * Yes, this is supposed to ignore the boundary value type.
    639 	     */
    640 	    while ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) >= 0) {
    641 		if (tok_count >= 3
    642 		    && TOKEN_MATCH(state->token[0], "boundary")
    643 		    && state->token[1].type == '=') {
    644 		    if (state->nesting_level > var_mime_maxdepth) {
    645 			if (state->static_flags & MIME_OPT_REPORT_NESTING)
    646 			    REPORT_ERROR_BUF(state, MIME_ERR_NESTING,
    647 					     state->output_buffer);
    648 		    } else {
    649 			mime_state_push(state, def_ctype, def_stype,
    650 					state->token[2].u.value);
    651 		    }
    652 		}
    653 	    }
    654 	}
    655 	return;
    656     }
    657 
    658     /*
    659      * other/whatever.
    660      */
    661     else {
    662 	state->curr_ctype = MIME_CTYPE_OTHER;
    663 	return;
    664     }
    665 }
    666 
    667 /* mime_state_content_encoding - process content-transfer-encoding header */
    668 
    669 static void mime_state_content_encoding(MIME_STATE *state,
    670 				             const HEADER_OPTS *header_info)
    671 {
    672     const char *cp;
    673     const MIME_ENCODING *cmp;
    674 
    675 #define PARSE_CONTENT_ENCODING_HEADER(state, ptr) \
    676     header_token(state->token, 1, state->token_buffer, ptr, (char *) 0, 0)
    677 
    678     /*
    679      * Do content-transfer-encoding header. Never set the encoding domain to
    680      * something other than 7bit, 8bit or binary, even if we don't recognize
    681      * the input.
    682      */
    683     cp = STR(state->output_buffer) + strlen(header_info->name) + 1;
    684     if (PARSE_CONTENT_ENCODING_HEADER(state, &cp) > 0
    685 	&& state->token[0].type == HEADER_TOK_TOKEN) {
    686 	for (cmp = mime_encoding_map; cmp->name != 0; cmp++) {
    687 	    if (strcasecmp(state->token[0].u.value, cmp->name) == 0) {
    688 		state->curr_encoding = cmp->encoding;
    689 		state->curr_domain = cmp->domain;
    690 		break;
    691 	    }
    692 	}
    693     }
    694 }
    695 
    696 /* mime_state_enc_name - encoding to printable form */
    697 
    698 static const char *mime_state_enc_name(int encoding)
    699 {
    700     const MIME_ENCODING *cmp;
    701 
    702     for (cmp = mime_encoding_map; cmp->name != 0; cmp++)
    703 	if (encoding == cmp->encoding)
    704 	    return (cmp->name);
    705     return ("unknown");
    706 }
    707 
    708 /* mime_state_downgrade - convert 8-bit data to quoted-printable */
    709 
    710 static void mime_state_downgrade(MIME_STATE *state, int rec_type,
    711 				         const char *text, ssize_t len)
    712 {
    713     static char hexchars[] = "0123456789ABCDEF";
    714     const unsigned char *cp;
    715     int     ch;
    716 
    717 #define QP_ENCODE(buffer, ch) { \
    718 	VSTRING_ADDCH(buffer, '='); \
    719 	VSTRING_ADDCH(buffer, hexchars[(ch >> 4) & 0xff]); \
    720 	VSTRING_ADDCH(buffer, hexchars[ch & 0xf]); \
    721     }
    722 
    723     /*
    724      * Insert a soft line break when the output reaches a critical length
    725      * before we reach a hard line break.
    726      */
    727     for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++) {
    728 	/* Critical length before hard line break. */
    729 	if (LEN(state->output_buffer) > 72) {
    730 	    VSTRING_ADDCH(state->output_buffer, '=');
    731 	    VSTRING_TERMINATE(state->output_buffer);
    732 	    BODY_OUT(state, REC_TYPE_NORM,
    733 		     STR(state->output_buffer),
    734 		     LEN(state->output_buffer));
    735 	    VSTRING_RESET(state->output_buffer);
    736 	}
    737 	/* Append the next character. */
    738 	ch = *cp;
    739 	if ((ch < 32 && ch != '\t') || ch == '=' || ch > 126) {
    740 	    QP_ENCODE(state->output_buffer, ch);
    741 	} else {
    742 	    VSTRING_ADDCH(state->output_buffer, ch);
    743 	}
    744     }
    745 
    746     /*
    747      * Flush output after a hard line break (i.e. the end of a REC_TYPE_NORM
    748      * record). Fix trailing whitespace as per the RFC: in the worst case,
    749      * the output length will grow from 73 characters to 75 characters.
    750      */
    751     if (rec_type == REC_TYPE_NORM) {
    752 	if (LEN(state->output_buffer) > 0
    753 	    && ((ch = END(state->output_buffer)[-1]) == ' ' || ch == '\t')) {
    754 	    vstring_truncate(state->output_buffer,
    755 			     LEN(state->output_buffer) - 1);
    756 	    QP_ENCODE(state->output_buffer, ch);
    757 	}
    758 	VSTRING_TERMINATE(state->output_buffer);
    759 	BODY_OUT(state, REC_TYPE_NORM,
    760 		 STR(state->output_buffer),
    761 		 LEN(state->output_buffer));
    762 	VSTRING_RESET(state->output_buffer);
    763     }
    764 }
    765 
    766 /* mime_state_update - update MIME state machine */
    767 
    768 int     mime_state_update(MIME_STATE *state, int rec_type,
    769 			          const char *text, ssize_t len)
    770 {
    771     int     input_is_text = (rec_type == REC_TYPE_NORM
    772 			     || rec_type == REC_TYPE_CONT);
    773     MIME_STACK *sp;
    774     const HEADER_OPTS *header_info;
    775     const unsigned char *cp;
    776 
    777 #define SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type) do { \
    778 	state->prev_rec_type = rec_type; \
    779 	return (state->err_flags); \
    780     } while (0)
    781 
    782     /*
    783      * Be sure to flush any partial output line that might still be buffered
    784      * up before taking any other "end of input" actions.
    785      */
    786     if (!input_is_text && state->prev_rec_type == REC_TYPE_CONT)
    787 	mime_state_update(state, REC_TYPE_NORM, "", 0);
    788 
    789     /*
    790      * This message state machine is kept simple for the sake of robustness.
    791      * Standards evolve over time, and we want to be able to correctly
    792      * process messages that are not yet defined. This state machine knows
    793      * about headers and bodies, understands that multipart/whatever has
    794      * multiple body parts with a header and body, and that message/whatever
    795      * has message headers at the start of a body part.
    796      */
    797     switch (state->curr_state) {
    798 
    799 	/*
    800 	 * First, deal with header information that we have accumulated from
    801 	 * previous input records. Discard text that does not fit in a header
    802 	 * buffer. Our limit is quite generous; Sendmail will refuse mail
    803 	 * with only 32kbyte in all the message headers combined.
    804 	 */
    805     case MIME_STATE_PRIMARY:
    806     case MIME_STATE_MULTIPART:
    807     case MIME_STATE_NESTED:
    808 	if (LEN(state->output_buffer) > 0) {
    809 	    if (input_is_text) {
    810 		if (state->prev_rec_type == REC_TYPE_CONT) {
    811 		    if (LEN(state->output_buffer) < var_header_limit) {
    812 			vstring_strncat(state->output_buffer, text, len);
    813 		    } else {
    814 			if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
    815 			    REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
    816 					     state->output_buffer);
    817 		    }
    818 		    SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
    819 		}
    820 		if (IS_SPACE_TAB(*text)) {
    821 		    if (LEN(state->output_buffer) < var_header_limit) {
    822 			vstring_strcat(state->output_buffer, "\n");
    823 			vstring_strncat(state->output_buffer, text, len);
    824 		    } else {
    825 			if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER)
    826 			    REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER,
    827 					     state->output_buffer);
    828 		    }
    829 		    SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
    830 		}
    831 	    }
    832 
    833 	    /*
    834 	     * The input is (the beginning of) another message header, or is
    835 	     * not a message header, or is not even a text record. With no
    836 	     * more input to append to this saved header, do output
    837 	     * processing and reset the saved header buffer. Hold on to the
    838 	     * content transfer encoding header if we have to do a 8->7
    839 	     * transformation, because the proper information depends on the
    840 	     * content type header: message and multipart require a domain,
    841 	     * leaf entities have either a transformation or a domain.
    842 	     */
    843 	    if (LEN(state->output_buffer) > 0) {
    844 		header_info = header_opts_find(STR(state->output_buffer));
    845 		if (!(state->static_flags & MIME_OPT_DISABLE_MIME)
    846 		    && header_info != 0) {
    847 		    if (header_info->type == HDR_CONTENT_TYPE)
    848 			mime_state_content_type(state, header_info);
    849 		    if (header_info->type == HDR_CONTENT_TRANSFER_ENCODING)
    850 			mime_state_content_encoding(state, header_info);
    851 		}
    852 		if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_HEADER) != 0
    853 		    && (state->err_flags & MIME_ERR_8BIT_IN_HEADER) == 0) {
    854 		    for (cp = CU_CHAR_PTR(STR(state->output_buffer));
    855 			 cp < CU_CHAR_PTR(END(state->output_buffer)); cp++)
    856 			if (*cp & 0200) {
    857 			    REPORT_ERROR_BUF(state, MIME_ERR_8BIT_IN_HEADER,
    858 					     state->output_buffer);
    859 			    break;
    860 			}
    861 		}
    862 		/* Output routine is explicitly allowed to change the data. */
    863 		if (header_info == 0
    864 		    || header_info->type != HDR_CONTENT_TRANSFER_ENCODING
    865 		    || (state->static_flags & MIME_OPT_DOWNGRADE) == 0
    866 		    || state->curr_domain == MIME_ENC_7BIT)
    867 		    HEAD_OUT(state, header_info, len);
    868 		state->prev_rec_type = 0;
    869 		VSTRING_RESET(state->output_buffer);
    870 	    }
    871 	}
    872 
    873 	/*
    874 	 * With past header information moved out of the way, proceed with a
    875 	 * clean slate.
    876 	 */
    877 	if (input_is_text) {
    878 	    ssize_t header_len;
    879 
    880 	    /*
    881 	     * See if this input is (the beginning of) a message header.
    882 	     *
    883 	     * Normalize obsolete "name space colon" syntax to "name colon".
    884 	     * Things would be too confusing otherwise.
    885 	     *
    886 	     * Don't assume that the input is null terminated.
    887 	     */
    888 	    if ((header_len = is_header_buf(text, len)) > 0) {
    889 		vstring_strncpy(state->output_buffer, text, header_len);
    890 		for (text += header_len, len -= header_len;
    891 		     len > 0 && IS_SPACE_TAB(*text);
    892 		     text++, len--)
    893 		     /* void */ ;
    894 		vstring_strncat(state->output_buffer, text, len);
    895 		SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
    896 	    }
    897 	}
    898 
    899 	/*
    900 	 * This input terminates a block of message headers. When converting
    901 	 * 8-bit to 7-bit mail, this is the right place to emit the correct
    902 	 * content-transfer-encoding header. With message or multipart we
    903 	 * specify 7bit, with leaf entities we specify quoted-printable.
    904 	 *
    905 	 * We're not going to convert non-text data into base 64. If they send
    906 	 * arbitrary binary data as 8-bit text, then the data is already
    907 	 * broken beyond recovery, because the Postfix SMTP server sanitizes
    908 	 * record boundaries, treating broken record boundaries as CRLF.
    909 	 *
    910 	 * Clear the output buffer, we will need it for storage of the
    911 	 * conversion result.
    912 	 */
    913 	if ((state->static_flags & MIME_OPT_DOWNGRADE)
    914 	    && state->curr_domain != MIME_ENC_7BIT) {
    915 	    if ((state->curr_ctype == MIME_CTYPE_MESSAGE
    916 		 && state->curr_stype != MIME_STYPE_GLOBAL)
    917 		|| state->curr_ctype == MIME_CTYPE_MULTIPART)
    918 		cp = CU_CHAR_PTR("7bit");
    919 	    else
    920 		cp = CU_CHAR_PTR("quoted-printable");
    921 	    vstring_sprintf(state->output_buffer,
    922 			    "Content-Transfer-Encoding: %s", cp);
    923 	    HEAD_OUT(state, (HEADER_OPTS *) 0, len);
    924 	    VSTRING_RESET(state->output_buffer);
    925 	}
    926 
    927 	/*
    928 	 * This input terminates a block of message headers. Call the
    929 	 * optional header end routine at the end of the first header block.
    930 	 */
    931 	if (state->curr_state == MIME_STATE_PRIMARY) {
    932 	    if (len > 0
    933 		&& (state->static_flags & MIME_OPT_REPORT_NON_EMPTY_EOH))
    934 		REPORT_ERROR_LEN(state, MIME_ERR_NON_EMPTY_EOH, text, len);
    935 	    if (state->head_end)
    936 		state->head_end(state->app_context);
    937 	}
    938 
    939 	/*
    940 	 * This is the right place to check if the sender specified an
    941 	 * appropriate identity encoding (7bit, 8bit, binary) for multipart
    942 	 * and for message.
    943 	 */
    944 	if (state->static_flags & MIME_OPT_REPORT_ENCODING_DOMAIN) {
    945 	    if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
    946 		if (state->curr_stype == MIME_STYPE_PARTIAL
    947 		    || state->curr_stype == MIME_STYPE_EXTERN_BODY) {
    948 		    if (state->curr_domain != MIME_ENC_7BIT)
    949 			REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
    950 				 mime_state_enc_name(state->curr_encoding));
    951 		}
    952 		/* EAI: message/global allows non-identity encoding. */
    953 		else if (state->curr_stype == MIME_STYPE_RFC822) {
    954 		    if (state->curr_encoding != state->curr_domain)
    955 			REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
    956 				 mime_state_enc_name(state->curr_encoding));
    957 		}
    958 	    } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
    959 		if (state->curr_encoding != state->curr_domain)
    960 		    REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN,
    961 				 mime_state_enc_name(state->curr_encoding));
    962 	    }
    963 	}
    964 
    965 	/*
    966 	 * Find out if the next body starts with its own message headers. In
    967 	 * aggressive mode, examine headers of partial and external-body
    968 	 * messages. Otherwise, treat such headers as part of the "body". Set
    969 	 * the proper encoding information for the multipart prolog.
    970 	 *
    971 	 * XXX We parse headers inside message/* content even when the encoding
    972 	 * is invalid (encoding != domain). With base64 we won't recognize
    973 	 * any headers, and with quoted-printable we won't recognize MIME
    974 	 * boundary strings, but the MIME processor will still resynchronize
    975 	 * when it runs into the higher-level boundary string at the end of
    976 	 * the message/* content. Although we will treat some headers as body
    977 	 * text, we will still do a better job than if we were treating the
    978 	 * entire message/* content as body text.
    979 	 *
    980 	 * XXX This changes state to MIME_STATE_NESTED and then outputs a body
    981 	 * line, so that the body offset is not properly reset.
    982 	 *
    983 	 * Don't assume that the input is null terminated.
    984 	 */
    985 	if (input_is_text) {
    986 	    if (len == 0) {
    987 		state->body_offset = 0;		/* XXX */
    988 		if (state->curr_ctype == MIME_CTYPE_MESSAGE) {
    989 		    if (state->curr_stype == MIME_STYPE_RFC822)
    990 			SET_MIME_STATE(state, MIME_STATE_NESTED,
    991 				       MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
    992 				       MIME_ENC_7BIT, MIME_ENC_7BIT);
    993 		    else if (state->curr_stype == MIME_STYPE_GLOBAL
    994 			 && ((state->static_flags & MIME_OPT_DOWNGRADE) == 0
    995 			     || state->curr_domain == MIME_ENC_7BIT))
    996 			/* XXX EAI: inspect encoded message/global. */
    997 			SET_MIME_STATE(state, MIME_STATE_NESTED,
    998 				       MIME_CTYPE_TEXT, MIME_STYPE_PLAIN,
    999 				       MIME_ENC_7BIT, MIME_ENC_7BIT);
   1000 		    else
   1001 			SET_CURR_STATE(state, MIME_STATE_BODY);
   1002 		} else if (state->curr_ctype == MIME_CTYPE_MULTIPART) {
   1003 		    SET_MIME_STATE(state, MIME_STATE_BODY,
   1004 				   MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
   1005 				   MIME_ENC_7BIT, MIME_ENC_7BIT);
   1006 		} else {
   1007 		    SET_CURR_STATE(state, MIME_STATE_BODY);
   1008 		}
   1009 	    }
   1010 
   1011 	    /*
   1012 	     * Invalid input. Force output of one blank line and jump to the
   1013 	     * body state, leaving all other state alone.
   1014 	     *
   1015 	     * We don't break legitimate mail by inserting a blank line
   1016 	     * separator between primary headers and a non-empty body. Many
   1017 	     * MTA's don't even record the presence or absence of this
   1018 	     * separator, nor does the Milter protocol pass it on to Milter
   1019 	     * applications.
   1020 	     *
   1021 	     * XXX We don't insert a blank line separator into attachments, to
   1022 	     * avoid breaking digital signatures. Postfix shall not do a
   1023 	     * worse mail delivery job than MTAs that can't even parse MIME.
   1024 	     * We switch to body state anyway, to avoid treating body text as
   1025 	     * header text, and mis-interpreting or truncating it. The code
   1026 	     * below for initial From_ lines is for educational purposes.
   1027 	     *
   1028 	     * Sites concerned about MIME evasion can use a MIME normalizer.
   1029 	     * Postfix has a different mission.
   1030 	     */
   1031 	    else {
   1032 		if (msg_verbose)
   1033 		    msg_info("garbage in %s header",
   1034 		    state->curr_state == MIME_STATE_MULTIPART ? "multipart" :
   1035 		       state->curr_state == MIME_STATE_PRIMARY ? "primary" :
   1036 			 state->curr_state == MIME_STATE_NESTED ? "nested" :
   1037 			     "other");
   1038 		switch (state->curr_state) {
   1039 		case MIME_STATE_PRIMARY:
   1040 		    BODY_OUT(state, REC_TYPE_NORM, "", 0);
   1041 		    SET_CURR_STATE(state, MIME_STATE_BODY);
   1042 		    break;
   1043 #if 0
   1044 		case MIME_STATE_NESTED:
   1045 		    if (state->body_offset <= 1
   1046 			&& rec_type == REC_TYPE_NORM
   1047 			&& len > 7
   1048 			&& (strncmp(text + (*text == '>'), "From ", 5) == 0
   1049 			    || strncmp(text, "=46rom ", 7) == 0))
   1050 			break;
   1051 		    /* FALLTHROUGH */
   1052 #endif
   1053 		default:
   1054 		    SET_CURR_STATE(state, MIME_STATE_BODY);
   1055 		    break;
   1056 		}
   1057 	    }
   1058 	}
   1059 
   1060 	/*
   1061 	 * This input is not text. Go to body state, unconditionally.
   1062 	 */
   1063 	else {
   1064 	    SET_CURR_STATE(state, MIME_STATE_BODY);
   1065 	}
   1066 	/* FALLTHROUGH */
   1067 
   1068 	/*
   1069 	 * Body text. Look for message boundaries, and recover from missing
   1070 	 * boundary strings. Missing boundaries can happen in aggressive mode
   1071 	 * with text/rfc822-headers or with message/partial. Ignore non-space
   1072 	 * cruft after --boundary or --boundary--, because some MUAs do, and
   1073 	 * because only perverse software would take advantage of this to
   1074 	 * escape detection. We have to ignore trailing cruft anyway, because
   1075 	 * our saved copy of the boundary string may have been truncated for
   1076 	 * safety reasons.
   1077 	 *
   1078 	 * Optionally look for 8-bit data in content that was announced as, or
   1079 	 * that defaults to, 7-bit. Unfortunately, we cannot turn this on by
   1080 	 * default. Majordomo sends requests for approval that do not
   1081 	 * propagate the MIME information from the enclosed message to the
   1082 	 * message headers of the approval request.
   1083 	 *
   1084 	 * Set the proper state information after processing a message boundary
   1085 	 * string.
   1086 	 *
   1087 	 * Don't look for boundary strings at the start of a continued record.
   1088 	 *
   1089 	 * Don't assume that the input is null terminated.
   1090 	 */
   1091     case MIME_STATE_BODY:
   1092 	if (input_is_text) {
   1093 	    if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_7BIT_BODY) != 0
   1094 		&& state->curr_encoding == MIME_ENC_7BIT
   1095 		&& (state->err_flags & MIME_ERR_8BIT_IN_7BIT_BODY) == 0) {
   1096 		for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++)
   1097 		    if (*cp & 0200) {
   1098 			REPORT_ERROR_LEN(state, MIME_ERR_8BIT_IN_7BIT_BODY,
   1099 					 text, len);
   1100 			break;
   1101 		    }
   1102 	    }
   1103 	    if (state->stack && state->prev_rec_type != REC_TYPE_CONT
   1104 		&& len > 2 && text[0] == '-' && text[1] == '-') {
   1105 		for (sp = state->stack; sp != 0; sp = sp->next) {
   1106 		    if (len >= 2 + sp->bound_len &&
   1107 		      strncmp(text + 2, sp->boundary, sp->bound_len) == 0) {
   1108 			while (sp != state->stack)
   1109 			    mime_state_pop(state);
   1110 			if (len >= 4 + sp->bound_len &&
   1111 			  strncmp(text + 2 + sp->bound_len, "--", 2) == 0) {
   1112 			    mime_state_pop(state);
   1113 			    SET_MIME_STATE(state, MIME_STATE_BODY,
   1114 					 MIME_CTYPE_OTHER, MIME_STYPE_OTHER,
   1115 					   MIME_ENC_7BIT, MIME_ENC_7BIT);
   1116 			} else {
   1117 			    SET_MIME_STATE(state, MIME_STATE_MULTIPART,
   1118 					   sp->def_ctype, sp->def_stype,
   1119 					   MIME_ENC_7BIT, MIME_ENC_7BIT);
   1120 			}
   1121 			break;
   1122 		    }
   1123 		}
   1124 	    }
   1125 	    /* Put last for consistency with header output routine. */
   1126 	    if ((state->static_flags & MIME_OPT_DOWNGRADE)
   1127 		&& state->curr_domain != MIME_ENC_7BIT)
   1128 		mime_state_downgrade(state, rec_type, text, len);
   1129 	    else
   1130 		BODY_OUT(state, rec_type, text, len);
   1131 	}
   1132 
   1133 	/*
   1134 	 * The input is not a text record. Inform the application that this
   1135 	 * is the last opportunity to send any pending output.
   1136 	 */
   1137 	else {
   1138 	    if (state->body_end)
   1139 		state->body_end(state->app_context);
   1140 	}
   1141 	SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type);
   1142 
   1143 	/*
   1144 	 * Oops. This can't happen.
   1145 	 */
   1146     default:
   1147 	msg_panic("mime_state_update: unknown state: %d", state->curr_state);
   1148     }
   1149 }
   1150 
   1151 /* mime_state_status - return mime_state_update() like result */
   1152 
   1153 int     mime_state_status(MIME_STATE *state)
   1154 {
   1155     return (state->err_flags);
   1156 }
   1157 
   1158  /*
   1159   * Mime error to (DSN, text) mapping. Order matters; more serious errors
   1160   * must precede less serious errors, because the error-to-text conversion
   1161   * can report only one error.
   1162   */
   1163 static const MIME_STATE_DETAIL mime_err_detail[] = {
   1164     MIME_ERR_NESTING, "5.6.0", "MIME nesting exceeds safety limit",
   1165     MIME_ERR_TRUNC_HEADER, "5.6.0", "message header length exceeds safety limit",
   1166     MIME_ERR_8BIT_IN_HEADER, "5.6.0", "improper use of 8-bit data in message header",
   1167     MIME_ERR_8BIT_IN_7BIT_BODY, "5.6.0", "improper use of 8-bit data in message body",
   1168     MIME_ERR_ENCODING_DOMAIN, "5.6.0", "invalid message/* or multipart/* encoding domain",
   1169     MIME_ERR_NON_EMPTY_EOH, "5.6.0", "primary header was terminated with non-empty line",
   1170     0,
   1171 };
   1172 
   1173 /* mime_state_error - error code to string */
   1174 
   1175 const char *mime_state_error(int error_code)
   1176 {
   1177     const MIME_STATE_DETAIL *mp;
   1178 
   1179     if (error_code == 0)
   1180 	msg_panic("mime_state_error: there is no error");
   1181     for (mp = mime_err_detail; mp->code; mp++)
   1182 	if (mp->code & error_code)
   1183 	    return (mp->text);
   1184     msg_panic("mime_state_error: unknown error code %d", error_code);
   1185 }
   1186 
   1187 /* mime_state_detail - error code to table entry with assorted data */
   1188 
   1189 const MIME_STATE_DETAIL *mime_state_detail(int error_code)
   1190 {
   1191     const MIME_STATE_DETAIL *mp;
   1192 
   1193     if (error_code == 0)
   1194 	msg_panic("mime_state_detail: there is no error");
   1195     for (mp = mime_err_detail; mp->code; mp++)
   1196 	if (mp->code & error_code)
   1197 	    return (mp);
   1198     msg_panic("mime_state_detail: unknown error code %d", error_code);
   1199 }
   1200 
   1201 #ifdef TEST
   1202 
   1203 #include <stdlib.h>
   1204 #include <stringops.h>
   1205 #include <vstream.h>
   1206 #include <msg_vstream.h>
   1207 #include <rec_streamlf.h>
   1208 
   1209  /*
   1210   * Stress test the REC_TYPE_CONT/NORM handling, but don't break header
   1211   * labels.
   1212   */
   1213 /*#define REC_LEN	40*/
   1214 
   1215 #define REC_LEN	1024
   1216 
   1217 static void head_out(void *context, int class, const HEADER_OPTS *unused_info,
   1218 		             VSTRING *buf, off_t offset)
   1219 {
   1220     VSTREAM *stream = (VSTREAM *) context;
   1221 
   1222     vstream_fprintf(stream, "%s %ld\t|%s\n",
   1223 		    class == MIME_HDR_PRIMARY ? "MAIN" :
   1224 		    class == MIME_HDR_MULTIPART ? "MULT" :
   1225 		    class == MIME_HDR_NESTED ? "NEST" :
   1226 		    "ERROR", (long) offset, STR(buf));
   1227 }
   1228 
   1229 static void head_end(void *context)
   1230 {
   1231     VSTREAM *stream = (VSTREAM *) context;
   1232 
   1233     vstream_fprintf(stream, "HEADER END\n");
   1234 }
   1235 
   1236 static void body_out(void *context, int rec_type, const char *buf, ssize_t len,
   1237 		             off_t offset)
   1238 {
   1239     VSTREAM *stream = (VSTREAM *) context;
   1240 
   1241     vstream_fprintf(stream, "BODY %c %ld\t|", rec_type, (long) offset);
   1242     vstream_fwrite(stream, buf, len);
   1243     if (rec_type == REC_TYPE_NORM)
   1244 	VSTREAM_PUTC('\n', stream);
   1245 }
   1246 
   1247 static void body_end(void *context)
   1248 {
   1249     VSTREAM *stream = (VSTREAM *) context;
   1250 
   1251     vstream_fprintf(stream, "BODY END\n");
   1252 }
   1253 
   1254 static void err_print(void *unused_context, int err_flag,
   1255 		              const char *text, ssize_t len)
   1256 {
   1257     msg_warn("%s: %.*s", mime_state_error(err_flag),
   1258 	     len < 100 ? (int) len : 100, text);
   1259 }
   1260 
   1261 int     var_header_limit = 2000;
   1262 int     var_mime_maxdepth = 20;
   1263 int     var_mime_bound_len = 2000;
   1264 char   *var_drop_hdrs = DEF_DROP_HDRS;
   1265 
   1266 int     main(int unused_argc, char **argv)
   1267 {
   1268     int     rec_type;
   1269     int     last = 0;
   1270     VSTRING *buf;
   1271     MIME_STATE *state;
   1272     int     err;
   1273 
   1274     /*
   1275      * Initialize.
   1276      */
   1277 #define MIME_OPTIONS \
   1278 	    (MIME_OPT_REPORT_8BIT_IN_7BIT_BODY \
   1279 	    | MIME_OPT_REPORT_8BIT_IN_HEADER \
   1280 	    | MIME_OPT_REPORT_ENCODING_DOMAIN \
   1281 	    | MIME_OPT_REPORT_TRUNC_HEADER \
   1282 	    | MIME_OPT_REPORT_NESTING \
   1283 	    | MIME_OPT_REPORT_NON_EMPTY_EOH \
   1284 	    | MIME_OPT_DOWNGRADE)
   1285 
   1286     msg_vstream_init(basename(argv[0]), VSTREAM_OUT);
   1287     msg_verbose = 1;
   1288     buf = vstring_alloc(10);
   1289     state = mime_state_alloc(MIME_OPTIONS,
   1290 			     head_out, head_end,
   1291 			     body_out, body_end,
   1292 			     err_print,
   1293 			     (void *) VSTREAM_OUT);
   1294 
   1295     /*
   1296      * Main loop.
   1297      */
   1298     do {
   1299 	rec_type = rec_streamlf_get(VSTREAM_IN, buf, REC_LEN);
   1300 	VSTRING_TERMINATE(buf);
   1301 	err = mime_state_update(state, last = rec_type, STR(buf), LEN(buf));
   1302 	vstream_fflush(VSTREAM_OUT);
   1303     } while (rec_type > 0);
   1304 
   1305     /*
   1306      * Error reporting.
   1307      */
   1308     if (err & MIME_ERR_TRUNC_HEADER)
   1309 	msg_warn("message header length exceeds safety limit");
   1310     if (err & MIME_ERR_NESTING)
   1311 	msg_warn("MIME nesting exceeds safety limit");
   1312     if (err & MIME_ERR_8BIT_IN_HEADER)
   1313 	msg_warn("improper use of 8-bit data in message header");
   1314     if (err & MIME_ERR_8BIT_IN_7BIT_BODY)
   1315 	msg_warn("improper use of 8-bit data in message body");
   1316     if (err & MIME_ERR_ENCODING_DOMAIN)
   1317 	msg_warn("improper message/* or multipart/* encoding domain");
   1318     if (err & MIME_ERR_NON_EMPTY_EOH)
   1319 	msg_warn("non-empty end-of-header");
   1320 
   1321     /*
   1322      * Cleanup.
   1323      */
   1324     mime_state_free(state);
   1325     vstring_free(buf);
   1326     exit(0);
   1327 }
   1328 
   1329 #endif
   1330