1 /* $NetBSD: mime_state.c,v 1.4 2026/05/09 18:49:16 christos Exp $ */ 2 3 /*++ 4 /* NAME 5 /* mime_state 3 6 /* SUMMARY 7 /* MIME parser state machine 8 /* SYNOPSIS 9 /* #include <mime_state.h> 10 /* 11 /* MIME_STATE *mime_state_alloc(flags, head_out, head_end, 12 /* body_out, body_end, 13 /* err_print, context) 14 /* int flags; 15 /* void (*head_out)(void *ptr, int header_class, 16 /* const HEADER_OPTS *header_info, 17 /* VSTRING *buf, off_t offset); 18 /* void (*head_end)(void *ptr); 19 /* void (*body_out)(void *ptr, int rec_type, 20 /* const char *buf, ssize_t len, 21 /* off_t offset); 22 /* void (*body_end)(void *ptr); 23 /* void (*err_print)(void *ptr, int err_flag, const char *text) 24 /* void *context; 25 /* 26 /* int mime_state_update(state, rec_type, buf, len) 27 /* MIME_STATE *state; 28 /* int rec_type; 29 /* const char *buf; 30 /* ssize_t len; 31 /* 32 /* int mime_state_status(state) 33 /* MIME_STATE *state; 34 /* 35 /* MIME_STATE *mime_state_free(state) 36 /* MIME_STATE *state; 37 /* 38 /* const char *mime_state_error(error_code) 39 /* int error_code; 40 /* 41 /* typedef struct { 42 /* .in +4 43 /* const int code; /* internal error code */ 44 /* const char *dsn; /* RFC 3463 */ 45 /* const char *text; /* descriptive text */ 46 /* .in -4 47 /* } MIME_STATE_DETAIL; 48 /* 49 /* const MIME_STATE_DETAIL *mime_state_detail(error_code) 50 /* int error_code; 51 /* DESCRIPTION 52 /* This module implements a one-pass MIME processor with optional 53 /* 8-bit to quoted-printable conversion. 54 /* 55 /* In order to fend off denial of service attacks, message headers 56 /* are truncated at or above var_header_limit bytes, message boundary 57 /* strings are truncated at var_mime_bound_len bytes, and the multipart 58 /* nesting level is limited to var_mime_maxdepth levels. 59 /* 60 /* mime_state_alloc() creates a MIME state machine. The machine 61 /* is delivered in its initial state, expecting content type 62 /* text/plain, 7-bit data. 63 /* 64 /* mime_state_update() updates the MIME state machine according 65 /* to the input record type and the record content. 66 /* The result value is the bit-wise OR of zero or more of the following: 67 /* .IP MIME_ERR_TRUNC_HEADER 68 /* A message header was longer than var_header_limit bytes. 69 /* .IP MIME_ERR_NESTING 70 /* The MIME structure was nested more than var_mime_maxdepth levels. 71 /* .IP MIME_ERR_8BIT_IN_HEADER 72 /* A message header contains 8-bit data. This is always illegal. 73 /* .IP MIME_ERR_8BIT_IN_7BIT_BODY 74 /* A MIME header specifies (or defaults to) 7-bit content, but the 75 /* corresponding message body or body parts contain 8-bit content. 76 /* .IP MIME_ERR_ENCODING_DOMAIN 77 /* An entity of type "message" or "multipart" specifies the wrong 78 /* content transfer encoding domain, or specifies a transformation 79 /* (quoted-printable, base64) instead of a domain (7bit, 8bit, 80 /* or binary). 81 /* .IP MIME_ERR_NON_EMPTY_EOH 82 /* The primary message header was terminated with a non-empty line. 83 /* .PP 84 /* mime_state_status() reports the same result as 85 /* mime_state_update(), but without changing state. 86 /* 87 /* mime_state_free() releases storage for a MIME state machine, 88 /* and conveniently returns a null pointer. 89 /* 90 /* mime_state_error() returns a string representation for the 91 /* specified error code. When multiple errors are specified it 92 /* reports what it deems the most serious one. 93 /* 94 /* mime_state_detail() returns a table entry with error 95 /* information for the specified error code. When multiple 96 /* errors are specified it reports what it deems the most 97 /* serious one. 98 /* 99 /* Arguments: 100 /* .IP body_out 101 /* The output routine for body lines. It receives unmodified input 102 /* records, or the result of 8-bit -> 7-bit conversion. 103 /* .IP body_end 104 /* A null pointer, or a pointer to a routine that is called after 105 /* the last input record is processed. 106 /* .IP buf 107 /* Buffer with the content of a logical or physical message record. 108 /* .IP context 109 /* Caller context that is passed on to the head_out and body_out 110 /* routines. 111 /* .IP enc_type 112 /* The content encoding: MIME_ENC_7BIT or MIME_ENC_8BIT. 113 /* .IP err_print 114 /* Null pointer, or pointer to a function that is called with 115 /* arguments: the application context, the error type, and the 116 /* offending input. Only one instance per error type is reported. 117 /* .IP flags 118 /* Special processing options. Specify the bit-wise OR of zero or 119 /* more of the following: 120 /* .RS 121 /* .IP MIME_OPT_DISABLE_MIME 122 /* Pay no attention to Content-* message headers, and switch to 123 /* message body state at the end of the primary message headers. 124 /* .IP MIME_OPT_REPORT_TRUNC_HEADER 125 /* Report errors that set the MIME_ERR_TRUNC_HEADER error flag 126 /* (see above). 127 /* .IP MIME_OPT_REPORT_8BIT_IN_HEADER 128 /* Report errors that set the MIME_ERR_8BIT_IN_HEADER error 129 /* flag (see above). This rarely stops legitimate mail. 130 /* .IP MIME_OPT_REPORT_8BIT_IN_7BIT_BODY 131 /* Report errors that set the MIME_ERR_8BIT_IN_7BIT_BODY error 132 /* flag (see above). This currently breaks Majordomo mail that is 133 /* forwarded for approval, because Majordomo does not propagate 134 /* MIME type information from the enclosed message to the message 135 /* headers of the request for approval. 136 /* .IP MIME_OPT_REPORT_ENCODING_DOMAIN 137 /* Report errors that set the MIME_ERR_ENCODING_DOMAIN error 138 /* flag (see above). 139 /* .IP MIME_OPT_REPORT_NESTING 140 /* Report errors that set the MIME_ERR_NESTING error flag 141 /* (see above). 142 /* .IP MIME_OPT_REPORT_NON_EMPTY_EOH 143 /* Report errors that terminate the primary message header with a 144 /* non-empty line. 145 /* .IP MIME_OPT_DOWNGRADE 146 /* Transform content that claims to be 8-bit into quoted-printable. 147 /* Where appropriate, update Content-Transfer-Encoding: message 148 /* headers. 149 /* .RE 150 /* .sp 151 /* For convenience, MIME_OPT_NONE requests no special processing. 152 /* .IP header_class 153 /* Specifies where a message header is located. 154 /* .RS 155 /* .IP MIME_HDR_PRIMARY 156 /* In the primary message header section. 157 /* .IP MIME_HDR_MULTIPART 158 /* In the header section after a multipart boundary string. 159 /* .IP MIME_HDR_NESTED 160 /* At the start of a nested (e.g., message/rfc822) message. 161 /* .RE 162 /* .sp 163 /* For convenience, the macros MIME_HDR_FIRST and MIME_HDR_LAST 164 /* specify the range of MIME_HDR_MUMBLE macros. 165 /* .sp 166 /* To find out if something is a MIME header at the beginning 167 /* of an RFC 822 message or an attached message, look at the 168 /* header_info argument. 169 /* .IP header_info 170 /* Null pointer or information about the message header, see 171 /* header_opts(3). 172 /* .IP head_out 173 /* The output routine that is invoked for outputting a message header. 174 /* A multi-line header is passed as one chunk of text with embedded 175 /* newlines. 176 /* It is the responsibility of the output routine to break the text 177 /* at embedded newlines, and to break up long text between newlines 178 /* into multiple output records. 179 /* Note: an output routine is explicitly allowed to modify the text. 180 /* .IP head_end 181 /* A null pointer, or a pointer to a routine that is called after 182 /* the last message header in the first header block is processed. 183 /* .IP len 184 /* Length of non-VSTRING input buffer. 185 /* .IP offset 186 /* The offset in bytes from the start of the current block of message 187 /* headers or body lines. Line boundaries are counted as one byte. 188 /* .IP rec_type 189 /* The input record type as defined in rec_type(3h). State is 190 /* updated for text records (REC_TYPE_NORM or REC_TYPE_CONT). 191 /* Some input records are stored internally in order to reconstruct 192 /* multi-line input. Upon receipt of any non-text record type, all 193 /* stored input is flushed and the state is set to "body". 194 /* .IP state 195 /* MIME parser state created with mime_state_alloc(). 196 /* BUGS 197 /* NOTE: when the end of headers is reached, mime_state_update() 198 /* may execute up to three call-backs before returning to the 199 /* caller: head_out(), head_end(), and body_out() or body_end(). 200 /* As long as call-backs return no result, it is up to the 201 /* call-back routines to check if a previous call-back experienced 202 /* an error. 203 /* 204 /* Different mail user agents treat malformed message boundary 205 /* strings in different ways. The Postfix MIME processor cannot 206 /* be bug-compatible with everything. 207 /* 208 /* This module will not glue together multipart boundary strings that 209 /* span multiple input records. 210 /* 211 /* This module will not glue together RFC 2231 formatted (boundary) 212 /* parameter values. RFC 2231 claims compatibility with existing 213 /* MIME processors. Splitting boundary strings is not backwards 214 /* compatible. 215 /* 216 /* The "8-bit data inside 7-bit body" test is myopic. It is not aware 217 /* of any enclosing (message or multipart) encoding information. 218 /* 219 /* If the input ends in data other than a hard line break, this module 220 /* will add a hard line break of its own. No line break is added to 221 /* empty input. 222 /* 223 /* This code recognizes the obsolete form "headername :" but will 224 /* normalize it to the canonical form "headername:". Leaving the 225 /* obsolete form alone would cause too much trouble with existing code 226 /* that expects only the normalized form. 227 /* SEE ALSO 228 /* msg(3) diagnostics interface 229 /* header_opts(3) header information lookup 230 /* RFC 822 (ARPA Internet Text Messages) 231 /* RFC 2045 (MIME: Format of internet message bodies) 232 /* RFC 2046 (MIME: Media types) 233 /* DIAGNOSTICS 234 /* Fatal errors: memory allocation problem. 235 /* LICENSE 236 /* .ad 237 /* .fi 238 /* The Secure Mailer license must be distributed with this software. 239 /* HISTORY 240 /* .ad 241 /* .fi 242 /* This code was implemented from scratch after reading the RFC 243 /* documents. This was a relatively straightforward effort with 244 /* few if any surprises. Victor Duchovni of Morgan Stanley shared 245 /* his experiences with ambiguities in real-life MIME implementations. 246 /* Liviu Daia of the Romanian Academy shared his insights in some 247 /* of the darker corners. 248 /* AUTHOR(S) 249 /* Wietse Venema 250 /* IBM T.J. Watson Research 251 /* P.O. Box 704 252 /* Yorktown Heights, NY 10598, USA 253 /* 254 /* Wietse Venema 255 /* Google, Inc. 256 /* 111 8th Avenue 257 /* New York, NY 10011, USA 258 /*--*/ 259 260 /* System library. */ 261 262 #include <sys_defs.h> 263 #include <stdarg.h> 264 #include <ctype.h> 265 #include <string.h> 266 267 #ifdef STRCASECMP_IN_STRINGS_H 268 #include <strings.h> 269 #endif 270 271 /* Utility library. */ 272 273 #include <mymalloc.h> 274 #include <msg.h> 275 #include <vstring.h> 276 277 /* Global library. */ 278 279 #include <rec_type.h> 280 #include <is_header.h> 281 #include <header_opts.h> 282 #include <mail_params.h> 283 #include <header_token.h> 284 #include <lex_822.h> 285 #include <mime_state.h> 286 287 /* Application-specific. */ 288 289 /* 290 * Mime parser stack element for multipart content. 291 */ 292 typedef struct MIME_STACK { 293 int def_ctype; /* default content type */ 294 int def_stype; /* default content subtype */ 295 char *boundary; /* boundary string */ 296 ssize_t bound_len; /* boundary length */ 297 struct MIME_STACK *next; /* linkage */ 298 } MIME_STACK; 299 300 /* 301 * Mime parser state. 302 */ 303 #define MIME_MAX_TOKEN 3 /* tokens per attribute */ 304 305 struct MIME_STATE { 306 307 /* 308 * Volatile members. 309 */ 310 int curr_state; /* header/body state */ 311 int curr_ctype; /* last or default content type */ 312 int curr_stype; /* last or default content subtype */ 313 int curr_encoding; /* last or default content encoding */ 314 int curr_domain; /* last or default encoding unit */ 315 VSTRING *output_buffer; /* headers, quoted-printable body */ 316 int prev_rec_type; /* previous input record type */ 317 int nesting_level; /* safety */ 318 MIME_STACK *stack; /* for composite types */ 319 HEADER_TOKEN token[MIME_MAX_TOKEN]; /* header token array */ 320 VSTRING *token_buffer; /* header parser scratch buffer */ 321 int err_flags; /* processing errors */ 322 off_t head_offset; /* offset in header block */ 323 off_t body_offset; /* offset in body block */ 324 325 /* 326 * Static members. 327 */ 328 int static_flags; /* static processing options */ 329 MIME_STATE_HEAD_OUT head_out; /* header output routine */ 330 MIME_STATE_ANY_END head_end; /* end of primary header routine */ 331 MIME_STATE_BODY_OUT body_out; /* body output routine */ 332 MIME_STATE_ANY_END body_end; /* end of body output routine */ 333 MIME_STATE_ERR_PRINT err_print; /* error report */ 334 void *app_context; /* application context */ 335 }; 336 337 /* 338 * Content types and subtypes that we care about, either because we have to, 339 * or because we want to filter out broken MIME messages. 340 */ 341 #define MIME_CTYPE_OTHER 0 342 #define MIME_CTYPE_TEXT 1 343 #define MIME_CTYPE_MESSAGE 2 344 #define MIME_CTYPE_MULTIPART 3 345 346 #define MIME_STYPE_OTHER 0 347 #define MIME_STYPE_PLAIN 1 348 #define MIME_STYPE_RFC822 2 349 #define MIME_STYPE_PARTIAL 3 350 #define MIME_STYPE_EXTERN_BODY 4 351 #define MIME_STYPE_GLOBAL 5 352 353 /* 354 * MIME parser states. We steal from the public interface. 355 */ 356 #define MIME_STATE_PRIMARY MIME_HDR_PRIMARY /* primary headers */ 357 #define MIME_STATE_MULTIPART MIME_HDR_MULTIPART /* after --boundary */ 358 #define MIME_STATE_NESTED MIME_HDR_NESTED /* message/rfc822 */ 359 #define MIME_STATE_BODY (MIME_HDR_NESTED + 1) 360 361 #define SET_MIME_STATE(ptr, state, ctype, stype, encoding, domain) do { \ 362 (ptr)->curr_state = (state); \ 363 (ptr)->curr_ctype = (ctype); \ 364 (ptr)->curr_stype = (stype); \ 365 (ptr)->curr_encoding = (encoding); \ 366 (ptr)->curr_domain = (domain); \ 367 if ((state) == MIME_STATE_BODY) \ 368 (ptr)->body_offset = 0; \ 369 else \ 370 (ptr)->head_offset = 0; \ 371 } while (0) 372 373 #define SET_CURR_STATE(ptr, state) do { \ 374 (ptr)->curr_state = (state); \ 375 if ((state) == MIME_STATE_BODY) \ 376 (ptr)->body_offset = 0; \ 377 else \ 378 (ptr)->head_offset = 0; \ 379 } while (0) 380 381 /* 382 * MIME encodings and domains. We intentionally use the same codes for 383 * encodings and domains, so that we can easily find out whether a content 384 * transfer encoding header specifies a domain or whether it specifies 385 * domain+encoding, which is illegal for multipart/any and message/any. 386 */ 387 typedef struct MIME_ENCODING { 388 const char *name; /* external representation */ 389 int encoding; /* internal representation */ 390 int domain; /* subset of encoding */ 391 } MIME_ENCODING; 392 393 #define MIME_ENC_QP 1 /* encoding + domain */ 394 #define MIME_ENC_BASE64 2 /* encoding + domain */ 395 /* These are defined in mime_state.h as part of the external interface. */ 396 #ifndef MIME_ENC_7BIT 397 #define MIME_ENC_7BIT 7 /* domain only */ 398 #define MIME_ENC_8BIT 8 /* domain only */ 399 #define MIME_ENC_BINARY 9 /* domain only */ 400 #endif 401 402 static const MIME_ENCODING mime_encoding_map[] = { /* RFC 2045 */ 403 "7bit", MIME_ENC_7BIT, MIME_ENC_7BIT, /* domain */ 404 "8bit", MIME_ENC_8BIT, MIME_ENC_8BIT, /* domain */ 405 "binary", MIME_ENC_BINARY, MIME_ENC_BINARY, /* domain */ 406 "base64", MIME_ENC_BASE64, MIME_ENC_7BIT, /* encoding */ 407 "quoted-printable", MIME_ENC_QP, MIME_ENC_7BIT, /* encoding */ 408 0, 409 }; 410 411 /* 412 * Silly Little Macros. 413 */ 414 #define STR(x) vstring_str(x) 415 #define LEN(x) VSTRING_LEN(x) 416 #define END(x) vstring_end(x) 417 #define CU_CHAR_PTR(x) ((const unsigned char *) (x)) 418 419 #define REPORT_ERROR_LEN(state, err_type, text, len) do { \ 420 if ((state->err_flags & err_type) == 0) { \ 421 if (state->err_print != 0) \ 422 state->err_print(state->app_context, err_type, text, len); \ 423 state->err_flags |= err_type; \ 424 } \ 425 } while (0) 426 427 #define REPORT_ERROR(state, err_type, text) do { \ 428 const char *_text = text; \ 429 ssize_t _len = strlen(text); \ 430 REPORT_ERROR_LEN(state, err_type, _text, _len); \ 431 } while (0) 432 433 #define REPORT_ERROR_BUF(state, err_type, buf) \ 434 REPORT_ERROR_LEN(state, err_type, STR(buf), LEN(buf)) 435 436 437 /* 438 * Outputs and state changes are interleaved, so we must maintain separate 439 * offsets for header and body segments. 440 */ 441 #define HEAD_OUT(ptr, info, len) do { \ 442 if ((ptr)->head_out) { \ 443 (ptr)->head_out((ptr)->app_context, (ptr)->curr_state, \ 444 (info), (ptr)->output_buffer, (ptr)->head_offset); \ 445 (ptr)->head_offset += (len) + 1; \ 446 } \ 447 } while(0) 448 449 #define BODY_OUT(ptr, rec_type, text, len) do { \ 450 if ((ptr)->body_out) { \ 451 (ptr)->body_out((ptr)->app_context, (rec_type), \ 452 (text), (len), (ptr)->body_offset); \ 453 (ptr)->body_offset += (len) + 1; \ 454 } \ 455 } while(0) 456 457 /* mime_state_push - push boundary onto stack */ 458 459 static void mime_state_push(MIME_STATE *state, int def_ctype, int def_stype, 460 const char *boundary) 461 { 462 MIME_STACK *stack; 463 464 /* 465 * RFC 2046 mandates that a boundary string be up to 70 characters long. 466 * Some MTAs, including Postfix, include the fully-qualified MTA name 467 * which can be longer, so we are willing to handle boundary strings that 468 * exceed the RFC specification. We allow for message headers of up to 469 * var_header_limit characters. In order to avoid denial of service, we 470 * have to impose a configurable limit on the amount of text that we are 471 * willing to store as a boundary string. Despite this truncation way we 472 * will still correctly detect all intermediate boundaries and all the 473 * message headers that follow those boundaries. 474 */ 475 state->nesting_level += 1; 476 stack = (MIME_STACK *) mymalloc(sizeof(*stack)); 477 stack->def_ctype = def_ctype; 478 stack->def_stype = def_stype; 479 if ((stack->bound_len = strlen(boundary)) > var_mime_bound_len) 480 stack->bound_len = var_mime_bound_len; 481 stack->boundary = mystrndup(boundary, stack->bound_len); 482 stack->next = state->stack; 483 state->stack = stack; 484 if (msg_verbose) 485 msg_info("PUSH boundary %s", stack->boundary); 486 } 487 488 /* mime_state_pop - pop boundary from stack */ 489 490 static void mime_state_pop(MIME_STATE *state) 491 { 492 MIME_STACK *stack; 493 494 if ((stack = state->stack) == 0) 495 msg_panic("mime_state_pop: there is no stack"); 496 if (msg_verbose) 497 msg_info("POP boundary %s", stack->boundary); 498 state->nesting_level -= 1; 499 state->stack = stack->next; 500 myfree(stack->boundary); 501 myfree((void *) stack); 502 } 503 504 /* mime_state_alloc - create MIME state machine */ 505 506 MIME_STATE *mime_state_alloc(int flags, 507 MIME_STATE_HEAD_OUT head_out, 508 MIME_STATE_ANY_END head_end, 509 MIME_STATE_BODY_OUT body_out, 510 MIME_STATE_ANY_END body_end, 511 MIME_STATE_ERR_PRINT err_print, 512 void *context) 513 { 514 MIME_STATE *state; 515 516 state = (MIME_STATE *) mymalloc(sizeof(*state)); 517 518 /* Volatile members. */ 519 state->err_flags = 0; 520 state->body_offset = 0; /* XXX */ 521 SET_MIME_STATE(state, MIME_STATE_PRIMARY, 522 MIME_CTYPE_TEXT, MIME_STYPE_PLAIN, 523 MIME_ENC_7BIT, MIME_ENC_7BIT); 524 state->output_buffer = vstring_alloc(100); 525 state->prev_rec_type = 0; 526 state->stack = 0; 527 state->token_buffer = vstring_alloc(1); 528 state->nesting_level = -1; /* BC Fix 20170512 */ 529 530 /* Static members. */ 531 state->static_flags = flags; 532 state->head_out = head_out; 533 state->head_end = head_end; 534 state->body_out = body_out; 535 state->body_end = body_end; 536 state->err_print = err_print; 537 state->app_context = context; 538 return (state); 539 } 540 541 /* mime_state_free - destroy MIME state machine */ 542 543 MIME_STATE *mime_state_free(MIME_STATE *state) 544 { 545 vstring_free(state->output_buffer); 546 while (state->stack) 547 mime_state_pop(state); 548 if (state->token_buffer) 549 vstring_free(state->token_buffer); 550 myfree((void *) state); 551 return (0); 552 } 553 554 /* mime_state_content_type - process content-type header */ 555 556 static void mime_state_content_type(MIME_STATE *state, 557 const HEADER_OPTS *header_info) 558 { 559 const char *cp; 560 ssize_t tok_count; 561 int def_ctype; 562 int def_stype; 563 564 #define TOKEN_MATCH(tok, text) \ 565 ((tok).type == HEADER_TOK_TOKEN && strcasecmp((tok).u.value, (text)) == 0) 566 567 #define RFC2045_TSPECIALS "()<>@,;:\\\"/[]?=" 568 569 #define PARSE_CONTENT_TYPE_HEADER(state, ptr) \ 570 header_token(state->token, MIME_MAX_TOKEN, \ 571 state->token_buffer, ptr, RFC2045_TSPECIALS, ';') 572 573 cp = STR(state->output_buffer) + strlen(header_info->name) + 1; 574 if ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) > 0) { 575 576 /* 577 * text/whatever. Right now we don't really care if it is plain or 578 * not, but we may want to recognize subtypes later, and then this 579 * code can serve as an example. 580 */ 581 if (TOKEN_MATCH(state->token[0], "text")) { 582 state->curr_ctype = MIME_CTYPE_TEXT; 583 if (tok_count >= 3 584 && state->token[1].type == '/' 585 && TOKEN_MATCH(state->token[2], "plain")) 586 state->curr_stype = MIME_STYPE_PLAIN; 587 else 588 state->curr_stype = MIME_STYPE_OTHER; 589 return; 590 } 591 592 /* 593 * message/whatever body parts start with another block of message 594 * headers that we may want to look at. The partial and external-body 595 * subtypes cannot be subjected to 8-bit -> 7-bit conversion, so we 596 * must properly recognize them. 597 */ 598 if (TOKEN_MATCH(state->token[0], "message")) { 599 state->curr_ctype = MIME_CTYPE_MESSAGE; 600 state->curr_stype = MIME_STYPE_OTHER; 601 if (tok_count >= 3 602 && state->token[1].type == '/') { 603 if (TOKEN_MATCH(state->token[2], "rfc822")) 604 state->curr_stype = MIME_STYPE_RFC822; 605 else if (TOKEN_MATCH(state->token[2], "partial")) 606 state->curr_stype = MIME_STYPE_PARTIAL; 607 else if (TOKEN_MATCH(state->token[2], "external-body")) 608 state->curr_stype = MIME_STYPE_EXTERN_BODY; 609 else if (TOKEN_MATCH(state->token[2], "global")) 610 state->curr_stype = MIME_STYPE_GLOBAL; 611 } 612 return; 613 } 614 615 /* 616 * multipart/digest has default content type message/rfc822, 617 * multipart/whatever has default content type text/plain. 618 */ 619 if (TOKEN_MATCH(state->token[0], "multipart")) { 620 state->curr_ctype = MIME_CTYPE_MULTIPART; 621 if (tok_count >= 3 622 && state->token[1].type == '/' 623 && TOKEN_MATCH(state->token[2], "digest")) { 624 def_ctype = MIME_CTYPE_MESSAGE; 625 def_stype = MIME_STYPE_RFC822; 626 } else { 627 def_ctype = MIME_CTYPE_TEXT; 628 def_stype = MIME_STYPE_PLAIN; 629 } 630 631 /* 632 * Yes, this is supposed to capture multiple boundary strings, 633 * which are illegal and which could be used to hide content in 634 * an implementation dependent manner. The code below allows us 635 * to find embedded message headers as long as the sender uses 636 * only one of these same-level boundary strings. 637 * 638 * Yes, this is supposed to ignore the boundary value type. 639 */ 640 while ((tok_count = PARSE_CONTENT_TYPE_HEADER(state, &cp)) >= 0) { 641 if (tok_count >= 3 642 && TOKEN_MATCH(state->token[0], "boundary") 643 && state->token[1].type == '=') { 644 if (state->nesting_level > var_mime_maxdepth) { 645 if (state->static_flags & MIME_OPT_REPORT_NESTING) 646 REPORT_ERROR_BUF(state, MIME_ERR_NESTING, 647 state->output_buffer); 648 } else { 649 mime_state_push(state, def_ctype, def_stype, 650 state->token[2].u.value); 651 } 652 } 653 } 654 } 655 return; 656 } 657 658 /* 659 * other/whatever. 660 */ 661 else { 662 state->curr_ctype = MIME_CTYPE_OTHER; 663 return; 664 } 665 } 666 667 /* mime_state_content_encoding - process content-transfer-encoding header */ 668 669 static void mime_state_content_encoding(MIME_STATE *state, 670 const HEADER_OPTS *header_info) 671 { 672 const char *cp; 673 const MIME_ENCODING *cmp; 674 675 #define PARSE_CONTENT_ENCODING_HEADER(state, ptr) \ 676 header_token(state->token, 1, state->token_buffer, ptr, (char *) 0, 0) 677 678 /* 679 * Do content-transfer-encoding header. Never set the encoding domain to 680 * something other than 7bit, 8bit or binary, even if we don't recognize 681 * the input. 682 */ 683 cp = STR(state->output_buffer) + strlen(header_info->name) + 1; 684 if (PARSE_CONTENT_ENCODING_HEADER(state, &cp) > 0 685 && state->token[0].type == HEADER_TOK_TOKEN) { 686 for (cmp = mime_encoding_map; cmp->name != 0; cmp++) { 687 if (strcasecmp(state->token[0].u.value, cmp->name) == 0) { 688 state->curr_encoding = cmp->encoding; 689 state->curr_domain = cmp->domain; 690 break; 691 } 692 } 693 } 694 } 695 696 /* mime_state_enc_name - encoding to printable form */ 697 698 static const char *mime_state_enc_name(int encoding) 699 { 700 const MIME_ENCODING *cmp; 701 702 for (cmp = mime_encoding_map; cmp->name != 0; cmp++) 703 if (encoding == cmp->encoding) 704 return (cmp->name); 705 return ("unknown"); 706 } 707 708 /* mime_state_downgrade - convert 8-bit data to quoted-printable */ 709 710 static void mime_state_downgrade(MIME_STATE *state, int rec_type, 711 const char *text, ssize_t len) 712 { 713 static char hexchars[] = "0123456789ABCDEF"; 714 const unsigned char *cp; 715 int ch; 716 717 #define QP_ENCODE(buffer, ch) { \ 718 VSTRING_ADDCH(buffer, '='); \ 719 VSTRING_ADDCH(buffer, hexchars[(ch >> 4) & 0xff]); \ 720 VSTRING_ADDCH(buffer, hexchars[ch & 0xf]); \ 721 } 722 723 /* 724 * Insert a soft line break when the output reaches a critical length 725 * before we reach a hard line break. 726 */ 727 for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++) { 728 /* Critical length before hard line break. */ 729 if (LEN(state->output_buffer) > 72) { 730 VSTRING_ADDCH(state->output_buffer, '='); 731 VSTRING_TERMINATE(state->output_buffer); 732 BODY_OUT(state, REC_TYPE_NORM, 733 STR(state->output_buffer), 734 LEN(state->output_buffer)); 735 VSTRING_RESET(state->output_buffer); 736 } 737 /* Append the next character. */ 738 ch = *cp; 739 if ((ch < 32 && ch != '\t') || ch == '=' || ch > 126) { 740 QP_ENCODE(state->output_buffer, ch); 741 } else { 742 VSTRING_ADDCH(state->output_buffer, ch); 743 } 744 } 745 746 /* 747 * Flush output after a hard line break (i.e. the end of a REC_TYPE_NORM 748 * record). Fix trailing whitespace as per the RFC: in the worst case, 749 * the output length will grow from 73 characters to 75 characters. 750 */ 751 if (rec_type == REC_TYPE_NORM) { 752 if (LEN(state->output_buffer) > 0 753 && ((ch = END(state->output_buffer)[-1]) == ' ' || ch == '\t')) { 754 vstring_truncate(state->output_buffer, 755 LEN(state->output_buffer) - 1); 756 QP_ENCODE(state->output_buffer, ch); 757 } 758 VSTRING_TERMINATE(state->output_buffer); 759 BODY_OUT(state, REC_TYPE_NORM, 760 STR(state->output_buffer), 761 LEN(state->output_buffer)); 762 VSTRING_RESET(state->output_buffer); 763 } 764 } 765 766 /* mime_state_update - update MIME state machine */ 767 768 int mime_state_update(MIME_STATE *state, int rec_type, 769 const char *text, ssize_t len) 770 { 771 int input_is_text = (rec_type == REC_TYPE_NORM 772 || rec_type == REC_TYPE_CONT); 773 MIME_STACK *sp; 774 const HEADER_OPTS *header_info; 775 const unsigned char *cp; 776 777 #define SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type) do { \ 778 state->prev_rec_type = rec_type; \ 779 return (state->err_flags); \ 780 } while (0) 781 782 /* 783 * Be sure to flush any partial output line that might still be buffered 784 * up before taking any other "end of input" actions. 785 */ 786 if (!input_is_text && state->prev_rec_type == REC_TYPE_CONT) 787 mime_state_update(state, REC_TYPE_NORM, "", 0); 788 789 /* 790 * This message state machine is kept simple for the sake of robustness. 791 * Standards evolve over time, and we want to be able to correctly 792 * process messages that are not yet defined. This state machine knows 793 * about headers and bodies, understands that multipart/whatever has 794 * multiple body parts with a header and body, and that message/whatever 795 * has message headers at the start of a body part. 796 */ 797 switch (state->curr_state) { 798 799 /* 800 * First, deal with header information that we have accumulated from 801 * previous input records. Discard text that does not fit in a header 802 * buffer. Our limit is quite generous; Sendmail will refuse mail 803 * with only 32kbyte in all the message headers combined. 804 */ 805 case MIME_STATE_PRIMARY: 806 case MIME_STATE_MULTIPART: 807 case MIME_STATE_NESTED: 808 if (LEN(state->output_buffer) > 0) { 809 if (input_is_text) { 810 if (state->prev_rec_type == REC_TYPE_CONT) { 811 if (LEN(state->output_buffer) < var_header_limit) { 812 vstring_strncat(state->output_buffer, text, len); 813 } else { 814 if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER) 815 REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER, 816 state->output_buffer); 817 } 818 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type); 819 } 820 if (IS_SPACE_TAB(*text)) { 821 if (LEN(state->output_buffer) < var_header_limit) { 822 vstring_strcat(state->output_buffer, "\n"); 823 vstring_strncat(state->output_buffer, text, len); 824 } else { 825 if (state->static_flags & MIME_OPT_REPORT_TRUNC_HEADER) 826 REPORT_ERROR_BUF(state, MIME_ERR_TRUNC_HEADER, 827 state->output_buffer); 828 } 829 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type); 830 } 831 } 832 833 /* 834 * The input is (the beginning of) another message header, or is 835 * not a message header, or is not even a text record. With no 836 * more input to append to this saved header, do output 837 * processing and reset the saved header buffer. Hold on to the 838 * content transfer encoding header if we have to do a 8->7 839 * transformation, because the proper information depends on the 840 * content type header: message and multipart require a domain, 841 * leaf entities have either a transformation or a domain. 842 */ 843 if (LEN(state->output_buffer) > 0) { 844 header_info = header_opts_find(STR(state->output_buffer)); 845 if (!(state->static_flags & MIME_OPT_DISABLE_MIME) 846 && header_info != 0) { 847 if (header_info->type == HDR_CONTENT_TYPE) 848 mime_state_content_type(state, header_info); 849 if (header_info->type == HDR_CONTENT_TRANSFER_ENCODING) 850 mime_state_content_encoding(state, header_info); 851 } 852 if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_HEADER) != 0 853 && (state->err_flags & MIME_ERR_8BIT_IN_HEADER) == 0) { 854 for (cp = CU_CHAR_PTR(STR(state->output_buffer)); 855 cp < CU_CHAR_PTR(END(state->output_buffer)); cp++) 856 if (*cp & 0200) { 857 REPORT_ERROR_BUF(state, MIME_ERR_8BIT_IN_HEADER, 858 state->output_buffer); 859 break; 860 } 861 } 862 /* Output routine is explicitly allowed to change the data. */ 863 if (header_info == 0 864 || header_info->type != HDR_CONTENT_TRANSFER_ENCODING 865 || (state->static_flags & MIME_OPT_DOWNGRADE) == 0 866 || state->curr_domain == MIME_ENC_7BIT) 867 HEAD_OUT(state, header_info, len); 868 state->prev_rec_type = 0; 869 VSTRING_RESET(state->output_buffer); 870 } 871 } 872 873 /* 874 * With past header information moved out of the way, proceed with a 875 * clean slate. 876 */ 877 if (input_is_text) { 878 ssize_t header_len; 879 880 /* 881 * See if this input is (the beginning of) a message header. 882 * 883 * Normalize obsolete "name space colon" syntax to "name colon". 884 * Things would be too confusing otherwise. 885 * 886 * Don't assume that the input is null terminated. 887 */ 888 if ((header_len = is_header_buf(text, len)) > 0) { 889 vstring_strncpy(state->output_buffer, text, header_len); 890 for (text += header_len, len -= header_len; 891 len > 0 && IS_SPACE_TAB(*text); 892 text++, len--) 893 /* void */ ; 894 vstring_strncat(state->output_buffer, text, len); 895 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type); 896 } 897 } 898 899 /* 900 * This input terminates a block of message headers. When converting 901 * 8-bit to 7-bit mail, this is the right place to emit the correct 902 * content-transfer-encoding header. With message or multipart we 903 * specify 7bit, with leaf entities we specify quoted-printable. 904 * 905 * We're not going to convert non-text data into base 64. If they send 906 * arbitrary binary data as 8-bit text, then the data is already 907 * broken beyond recovery, because the Postfix SMTP server sanitizes 908 * record boundaries, treating broken record boundaries as CRLF. 909 * 910 * Clear the output buffer, we will need it for storage of the 911 * conversion result. 912 */ 913 if ((state->static_flags & MIME_OPT_DOWNGRADE) 914 && state->curr_domain != MIME_ENC_7BIT) { 915 if ((state->curr_ctype == MIME_CTYPE_MESSAGE 916 && state->curr_stype != MIME_STYPE_GLOBAL) 917 || state->curr_ctype == MIME_CTYPE_MULTIPART) 918 cp = CU_CHAR_PTR("7bit"); 919 else 920 cp = CU_CHAR_PTR("quoted-printable"); 921 vstring_sprintf(state->output_buffer, 922 "Content-Transfer-Encoding: %s", cp); 923 HEAD_OUT(state, (HEADER_OPTS *) 0, len); 924 VSTRING_RESET(state->output_buffer); 925 } 926 927 /* 928 * This input terminates a block of message headers. Call the 929 * optional header end routine at the end of the first header block. 930 */ 931 if (state->curr_state == MIME_STATE_PRIMARY) { 932 if (len > 0 933 && (state->static_flags & MIME_OPT_REPORT_NON_EMPTY_EOH)) 934 REPORT_ERROR_LEN(state, MIME_ERR_NON_EMPTY_EOH, text, len); 935 if (state->head_end) 936 state->head_end(state->app_context); 937 } 938 939 /* 940 * This is the right place to check if the sender specified an 941 * appropriate identity encoding (7bit, 8bit, binary) for multipart 942 * and for message. 943 */ 944 if (state->static_flags & MIME_OPT_REPORT_ENCODING_DOMAIN) { 945 if (state->curr_ctype == MIME_CTYPE_MESSAGE) { 946 if (state->curr_stype == MIME_STYPE_PARTIAL 947 || state->curr_stype == MIME_STYPE_EXTERN_BODY) { 948 if (state->curr_domain != MIME_ENC_7BIT) 949 REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN, 950 mime_state_enc_name(state->curr_encoding)); 951 } 952 /* EAI: message/global allows non-identity encoding. */ 953 else if (state->curr_stype == MIME_STYPE_RFC822) { 954 if (state->curr_encoding != state->curr_domain) 955 REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN, 956 mime_state_enc_name(state->curr_encoding)); 957 } 958 } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) { 959 if (state->curr_encoding != state->curr_domain) 960 REPORT_ERROR(state, MIME_ERR_ENCODING_DOMAIN, 961 mime_state_enc_name(state->curr_encoding)); 962 } 963 } 964 965 /* 966 * Find out if the next body starts with its own message headers. In 967 * aggressive mode, examine headers of partial and external-body 968 * messages. Otherwise, treat such headers as part of the "body". Set 969 * the proper encoding information for the multipart prolog. 970 * 971 * XXX We parse headers inside message/* content even when the encoding 972 * is invalid (encoding != domain). With base64 we won't recognize 973 * any headers, and with quoted-printable we won't recognize MIME 974 * boundary strings, but the MIME processor will still resynchronize 975 * when it runs into the higher-level boundary string at the end of 976 * the message/* content. Although we will treat some headers as body 977 * text, we will still do a better job than if we were treating the 978 * entire message/* content as body text. 979 * 980 * XXX This changes state to MIME_STATE_NESTED and then outputs a body 981 * line, so that the body offset is not properly reset. 982 * 983 * Don't assume that the input is null terminated. 984 */ 985 if (input_is_text) { 986 if (len == 0) { 987 state->body_offset = 0; /* XXX */ 988 if (state->curr_ctype == MIME_CTYPE_MESSAGE) { 989 if (state->curr_stype == MIME_STYPE_RFC822) 990 SET_MIME_STATE(state, MIME_STATE_NESTED, 991 MIME_CTYPE_TEXT, MIME_STYPE_PLAIN, 992 MIME_ENC_7BIT, MIME_ENC_7BIT); 993 else if (state->curr_stype == MIME_STYPE_GLOBAL 994 && ((state->static_flags & MIME_OPT_DOWNGRADE) == 0 995 || state->curr_domain == MIME_ENC_7BIT)) 996 /* XXX EAI: inspect encoded message/global. */ 997 SET_MIME_STATE(state, MIME_STATE_NESTED, 998 MIME_CTYPE_TEXT, MIME_STYPE_PLAIN, 999 MIME_ENC_7BIT, MIME_ENC_7BIT); 1000 else 1001 SET_CURR_STATE(state, MIME_STATE_BODY); 1002 } else if (state->curr_ctype == MIME_CTYPE_MULTIPART) { 1003 SET_MIME_STATE(state, MIME_STATE_BODY, 1004 MIME_CTYPE_OTHER, MIME_STYPE_OTHER, 1005 MIME_ENC_7BIT, MIME_ENC_7BIT); 1006 } else { 1007 SET_CURR_STATE(state, MIME_STATE_BODY); 1008 } 1009 } 1010 1011 /* 1012 * Invalid input. Force output of one blank line and jump to the 1013 * body state, leaving all other state alone. 1014 * 1015 * We don't break legitimate mail by inserting a blank line 1016 * separator between primary headers and a non-empty body. Many 1017 * MTA's don't even record the presence or absence of this 1018 * separator, nor does the Milter protocol pass it on to Milter 1019 * applications. 1020 * 1021 * XXX We don't insert a blank line separator into attachments, to 1022 * avoid breaking digital signatures. Postfix shall not do a 1023 * worse mail delivery job than MTAs that can't even parse MIME. 1024 * We switch to body state anyway, to avoid treating body text as 1025 * header text, and mis-interpreting or truncating it. The code 1026 * below for initial From_ lines is for educational purposes. 1027 * 1028 * Sites concerned about MIME evasion can use a MIME normalizer. 1029 * Postfix has a different mission. 1030 */ 1031 else { 1032 if (msg_verbose) 1033 msg_info("garbage in %s header", 1034 state->curr_state == MIME_STATE_MULTIPART ? "multipart" : 1035 state->curr_state == MIME_STATE_PRIMARY ? "primary" : 1036 state->curr_state == MIME_STATE_NESTED ? "nested" : 1037 "other"); 1038 switch (state->curr_state) { 1039 case MIME_STATE_PRIMARY: 1040 BODY_OUT(state, REC_TYPE_NORM, "", 0); 1041 SET_CURR_STATE(state, MIME_STATE_BODY); 1042 break; 1043 #if 0 1044 case MIME_STATE_NESTED: 1045 if (state->body_offset <= 1 1046 && rec_type == REC_TYPE_NORM 1047 && len > 7 1048 && (strncmp(text + (*text == '>'), "From ", 5) == 0 1049 || strncmp(text, "=46rom ", 7) == 0)) 1050 break; 1051 /* FALLTHROUGH */ 1052 #endif 1053 default: 1054 SET_CURR_STATE(state, MIME_STATE_BODY); 1055 break; 1056 } 1057 } 1058 } 1059 1060 /* 1061 * This input is not text. Go to body state, unconditionally. 1062 */ 1063 else { 1064 SET_CURR_STATE(state, MIME_STATE_BODY); 1065 } 1066 /* FALLTHROUGH */ 1067 1068 /* 1069 * Body text. Look for message boundaries, and recover from missing 1070 * boundary strings. Missing boundaries can happen in aggressive mode 1071 * with text/rfc822-headers or with message/partial. Ignore non-space 1072 * cruft after --boundary or --boundary--, because some MUAs do, and 1073 * because only perverse software would take advantage of this to 1074 * escape detection. We have to ignore trailing cruft anyway, because 1075 * our saved copy of the boundary string may have been truncated for 1076 * safety reasons. 1077 * 1078 * Optionally look for 8-bit data in content that was announced as, or 1079 * that defaults to, 7-bit. Unfortunately, we cannot turn this on by 1080 * default. Majordomo sends requests for approval that do not 1081 * propagate the MIME information from the enclosed message to the 1082 * message headers of the approval request. 1083 * 1084 * Set the proper state information after processing a message boundary 1085 * string. 1086 * 1087 * Don't look for boundary strings at the start of a continued record. 1088 * 1089 * Don't assume that the input is null terminated. 1090 */ 1091 case MIME_STATE_BODY: 1092 if (input_is_text) { 1093 if ((state->static_flags & MIME_OPT_REPORT_8BIT_IN_7BIT_BODY) != 0 1094 && state->curr_encoding == MIME_ENC_7BIT 1095 && (state->err_flags & MIME_ERR_8BIT_IN_7BIT_BODY) == 0) { 1096 for (cp = CU_CHAR_PTR(text); cp < CU_CHAR_PTR(text + len); cp++) 1097 if (*cp & 0200) { 1098 REPORT_ERROR_LEN(state, MIME_ERR_8BIT_IN_7BIT_BODY, 1099 text, len); 1100 break; 1101 } 1102 } 1103 if (state->stack && state->prev_rec_type != REC_TYPE_CONT 1104 && len > 2 && text[0] == '-' && text[1] == '-') { 1105 for (sp = state->stack; sp != 0; sp = sp->next) { 1106 if (len >= 2 + sp->bound_len && 1107 strncmp(text + 2, sp->boundary, sp->bound_len) == 0) { 1108 while (sp != state->stack) 1109 mime_state_pop(state); 1110 if (len >= 4 + sp->bound_len && 1111 strncmp(text + 2 + sp->bound_len, "--", 2) == 0) { 1112 mime_state_pop(state); 1113 SET_MIME_STATE(state, MIME_STATE_BODY, 1114 MIME_CTYPE_OTHER, MIME_STYPE_OTHER, 1115 MIME_ENC_7BIT, MIME_ENC_7BIT); 1116 } else { 1117 SET_MIME_STATE(state, MIME_STATE_MULTIPART, 1118 sp->def_ctype, sp->def_stype, 1119 MIME_ENC_7BIT, MIME_ENC_7BIT); 1120 } 1121 break; 1122 } 1123 } 1124 } 1125 /* Put last for consistency with header output routine. */ 1126 if ((state->static_flags & MIME_OPT_DOWNGRADE) 1127 && state->curr_domain != MIME_ENC_7BIT) 1128 mime_state_downgrade(state, rec_type, text, len); 1129 else 1130 BODY_OUT(state, rec_type, text, len); 1131 } 1132 1133 /* 1134 * The input is not a text record. Inform the application that this 1135 * is the last opportunity to send any pending output. 1136 */ 1137 else { 1138 if (state->body_end) 1139 state->body_end(state->app_context); 1140 } 1141 SAVE_PREV_REC_TYPE_AND_RETURN_ERR_FLAGS(state, rec_type); 1142 1143 /* 1144 * Oops. This can't happen. 1145 */ 1146 default: 1147 msg_panic("mime_state_update: unknown state: %d", state->curr_state); 1148 } 1149 } 1150 1151 /* mime_state_status - return mime_state_update() like result */ 1152 1153 int mime_state_status(MIME_STATE *state) 1154 { 1155 return (state->err_flags); 1156 } 1157 1158 /* 1159 * Mime error to (DSN, text) mapping. Order matters; more serious errors 1160 * must precede less serious errors, because the error-to-text conversion 1161 * can report only one error. 1162 */ 1163 static const MIME_STATE_DETAIL mime_err_detail[] = { 1164 MIME_ERR_NESTING, "5.6.0", "MIME nesting exceeds safety limit", 1165 MIME_ERR_TRUNC_HEADER, "5.6.0", "message header length exceeds safety limit", 1166 MIME_ERR_8BIT_IN_HEADER, "5.6.0", "improper use of 8-bit data in message header", 1167 MIME_ERR_8BIT_IN_7BIT_BODY, "5.6.0", "improper use of 8-bit data in message body", 1168 MIME_ERR_ENCODING_DOMAIN, "5.6.0", "invalid message/* or multipart/* encoding domain", 1169 MIME_ERR_NON_EMPTY_EOH, "5.6.0", "primary header was terminated with non-empty line", 1170 0, 1171 }; 1172 1173 /* mime_state_error - error code to string */ 1174 1175 const char *mime_state_error(int error_code) 1176 { 1177 const MIME_STATE_DETAIL *mp; 1178 1179 if (error_code == 0) 1180 msg_panic("mime_state_error: there is no error"); 1181 for (mp = mime_err_detail; mp->code; mp++) 1182 if (mp->code & error_code) 1183 return (mp->text); 1184 msg_panic("mime_state_error: unknown error code %d", error_code); 1185 } 1186 1187 /* mime_state_detail - error code to table entry with assorted data */ 1188 1189 const MIME_STATE_DETAIL *mime_state_detail(int error_code) 1190 { 1191 const MIME_STATE_DETAIL *mp; 1192 1193 if (error_code == 0) 1194 msg_panic("mime_state_detail: there is no error"); 1195 for (mp = mime_err_detail; mp->code; mp++) 1196 if (mp->code & error_code) 1197 return (mp); 1198 msg_panic("mime_state_detail: unknown error code %d", error_code); 1199 } 1200 1201 #ifdef TEST 1202 1203 #include <stdlib.h> 1204 #include <stringops.h> 1205 #include <vstream.h> 1206 #include <msg_vstream.h> 1207 #include <rec_streamlf.h> 1208 1209 /* 1210 * Stress test the REC_TYPE_CONT/NORM handling, but don't break header 1211 * labels. 1212 */ 1213 /*#define REC_LEN 40*/ 1214 1215 #define REC_LEN 1024 1216 1217 static void head_out(void *context, int class, const HEADER_OPTS *unused_info, 1218 VSTRING *buf, off_t offset) 1219 { 1220 VSTREAM *stream = (VSTREAM *) context; 1221 1222 vstream_fprintf(stream, "%s %ld\t|%s\n", 1223 class == MIME_HDR_PRIMARY ? "MAIN" : 1224 class == MIME_HDR_MULTIPART ? "MULT" : 1225 class == MIME_HDR_NESTED ? "NEST" : 1226 "ERROR", (long) offset, STR(buf)); 1227 } 1228 1229 static void head_end(void *context) 1230 { 1231 VSTREAM *stream = (VSTREAM *) context; 1232 1233 vstream_fprintf(stream, "HEADER END\n"); 1234 } 1235 1236 static void body_out(void *context, int rec_type, const char *buf, ssize_t len, 1237 off_t offset) 1238 { 1239 VSTREAM *stream = (VSTREAM *) context; 1240 1241 vstream_fprintf(stream, "BODY %c %ld\t|", rec_type, (long) offset); 1242 vstream_fwrite(stream, buf, len); 1243 if (rec_type == REC_TYPE_NORM) 1244 VSTREAM_PUTC('\n', stream); 1245 } 1246 1247 static void body_end(void *context) 1248 { 1249 VSTREAM *stream = (VSTREAM *) context; 1250 1251 vstream_fprintf(stream, "BODY END\n"); 1252 } 1253 1254 static void err_print(void *unused_context, int err_flag, 1255 const char *text, ssize_t len) 1256 { 1257 msg_warn("%s: %.*s", mime_state_error(err_flag), 1258 len < 100 ? (int) len : 100, text); 1259 } 1260 1261 int var_header_limit = 2000; 1262 int var_mime_maxdepth = 20; 1263 int var_mime_bound_len = 2000; 1264 char *var_drop_hdrs = DEF_DROP_HDRS; 1265 1266 int main(int unused_argc, char **argv) 1267 { 1268 int rec_type; 1269 int last = 0; 1270 VSTRING *buf; 1271 MIME_STATE *state; 1272 int err; 1273 1274 /* 1275 * Initialize. 1276 */ 1277 #define MIME_OPTIONS \ 1278 (MIME_OPT_REPORT_8BIT_IN_7BIT_BODY \ 1279 | MIME_OPT_REPORT_8BIT_IN_HEADER \ 1280 | MIME_OPT_REPORT_ENCODING_DOMAIN \ 1281 | MIME_OPT_REPORT_TRUNC_HEADER \ 1282 | MIME_OPT_REPORT_NESTING \ 1283 | MIME_OPT_REPORT_NON_EMPTY_EOH \ 1284 | MIME_OPT_DOWNGRADE) 1285 1286 msg_vstream_init(basename(argv[0]), VSTREAM_OUT); 1287 msg_verbose = 1; 1288 buf = vstring_alloc(10); 1289 state = mime_state_alloc(MIME_OPTIONS, 1290 head_out, head_end, 1291 body_out, body_end, 1292 err_print, 1293 (void *) VSTREAM_OUT); 1294 1295 /* 1296 * Main loop. 1297 */ 1298 do { 1299 rec_type = rec_streamlf_get(VSTREAM_IN, buf, REC_LEN); 1300 VSTRING_TERMINATE(buf); 1301 err = mime_state_update(state, last = rec_type, STR(buf), LEN(buf)); 1302 vstream_fflush(VSTREAM_OUT); 1303 } while (rec_type > 0); 1304 1305 /* 1306 * Error reporting. 1307 */ 1308 if (err & MIME_ERR_TRUNC_HEADER) 1309 msg_warn("message header length exceeds safety limit"); 1310 if (err & MIME_ERR_NESTING) 1311 msg_warn("MIME nesting exceeds safety limit"); 1312 if (err & MIME_ERR_8BIT_IN_HEADER) 1313 msg_warn("improper use of 8-bit data in message header"); 1314 if (err & MIME_ERR_8BIT_IN_7BIT_BODY) 1315 msg_warn("improper use of 8-bit data in message body"); 1316 if (err & MIME_ERR_ENCODING_DOMAIN) 1317 msg_warn("improper message/* or multipart/* encoding domain"); 1318 if (err & MIME_ERR_NON_EMPTY_EOH) 1319 msg_warn("non-empty end-of-header"); 1320 1321 /* 1322 * Cleanup. 1323 */ 1324 mime_state_free(state); 1325 vstring_free(buf); 1326 exit(0); 1327 } 1328 1329 #endif 1330