mime_codecs.c revision 1.9 1 /* $NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Anon Ymous.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 /*
33 * This module contains all mime related codecs. Typically there are
34 * two versions: one operating on buffers and one operating on files.
35 * All exported routines have a "mime_" prefix. The file oriented
36 * routines have a "mime_f" prefix replacing the "mime_" prefix of the
37 * equivalent buffer based version.
38 *
39 * The file based API should be:
40 *
41 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
42 *
43 * XXX - currently this naming convention has not been adheared to.
44 *
45 * where the cookie is a generic way to pass arguments to the routine.
46 * This way these routines can be run by run_function() in mime.c.
47 *
48 * The buffer based API is not as rigid.
49 */
50
51 #ifdef MIME_SUPPORT
52
53 #include <sys/cdefs.h>
54 #ifndef __lint__
55 __RCSID("$NetBSD: mime_codecs.c,v 1.9 2009/04/10 13:08:25 christos Exp $");
56 #endif /* not __lint__ */
57
58 #include <assert.h>
59 #include <iconv.h>
60 #include <stdio.h>
61 #include <stdlib.h>
62 #include <util.h>
63
64 #include "def.h"
65 #include "extern.h"
66 #include "mime_codecs.h"
67
68
69 #ifdef CHARSET_SUPPORT
70 /************************************************************************
71 * Core character set conversion routines.
72 *
73 */
74
75 /*
76 * Fault-tolerant iconv() function.
77 *
78 * This routine was borrowed from nail-11.25/mime.c and modified. It
79 * tries to handle errno == EILSEQ by restarting at the next input
80 * byte (is this a good idea?). All other errors are handled by the
81 * caller.
82 */
83 PUBLIC size_t
84 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
85 {
86 size_t sz = 0;
87
88 while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
89 && errno == EILSEQ) {
90 if (*outbleft > 0) {
91 *(*outb)++ = '?';
92 (*outbleft)--;
93 } else {
94 **outb = '\0';
95 return E2BIG;
96 }
97 if (*inbleft > 0) {
98 (*inb)++;
99 (*inbleft)--;
100 } else {
101 **outb = '\0';
102 break;
103 }
104 }
105 return sz;
106 }
107
108 /*
109 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
110 * We don't care about the invalid character count, so don't bother
111 * with __iconv(). We do care about robustness, so call iconv_ft()
112 * above to try to recover from errors.
113 */
114 #define INBUFSIZE 1024
115 #define OUTBUFSIZE (INBUFSIZE * 2)
116
117 PUBLIC void
118 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
119 {
120 char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
121 const char *in;
122 size_t inbytes, outbytes, ret;
123 iconv_t cd;
124
125 /*
126 * NOTE: iconv_t is actually a pointer typedef, so this
127 * conversion is not what it appears to be!
128 */
129 cd = (iconv_t)cookie;
130
131 while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
132 in = inbuf;
133 while (inbytes > 0) {
134 out = outbuf;
135 outbytes = OUTBUFSIZE;
136 ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
137 if (ret == (size_t)-1 && errno != E2BIG) {
138 if (errno != EINVAL || in == inbuf) {
139 /* XXX - what is proper here?
140 * Just copy out the remains? */
141 (void)fprintf(fo,
142 "\n\t[ iconv truncated message: %s ]\n\n",
143 strerror(errno));
144 return;
145 }
146 /*
147 * If here: errno == EINVAL && in != inbuf
148 */
149 /* incomplete input character */
150 (void)memmove(inbuf, in, inbytes);
151 ret = fread(inbuf + inbytes, 1,
152 INBUFSIZE - inbytes, fi);
153 if (ret == 0) {
154 if (feof(fi)) {
155 (void)fprintf(fo,
156 "\n\t[ unexpected end of file; "
157 "the last character is "
158 "incomplete. ]\n\n");
159 return;
160 }
161 (void)fprintf(fo,
162 "\n\t[ fread(): %s ]\n\n",
163 strerror(errno));
164 return;
165 }
166 in = inbuf;
167 inbytes += ret;
168
169 }
170 if (outbytes < OUTBUFSIZE)
171 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
172 }
173 }
174 /* reset the shift state of the output buffer */
175 outbytes = OUTBUFSIZE;
176 out = outbuf;
177 ret = iconv(cd, NULL, NULL, &out, &outbytes);
178 if (ret == (size_t)-1) {
179 (void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
180 strerror(errno));
181 return;
182 }
183 if (outbytes < OUTBUFSIZE)
184 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
185 }
186
187 #endif /* CHARSET_SUPPORT */
188
189
190
191 /************************************************************************
192 * Core base64 routines
193 *
194 * Defined in sec 6.8 of RFC 2045.
195 */
196
197 /*
198 * Decode a base64 buffer.
199 *
200 * bin: buffer to hold the decoded (binary) result (see note 1).
201 * b64: buffer holding the encoded (base64) source.
202 * cnt: number of bytes in the b64 buffer to decode (see note 2).
203 *
204 * Return: the number of bytes written to the 'bin' buffer or -1 on
205 * error.
206 * NOTES:
207 * 1) It is the callers responsibility to ensure that bin is large
208 * enough to hold the result.
209 * 2) The b64 buffer should always contain a multiple of 4 bytes of
210 * data!
211 */
212 PUBLIC ssize_t
213 mime_b64tobin(char *bin, const char *b64, size_t cnt)
214 {
215 static const signed char b64index[] = {
216 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
217 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
218 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
219 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
220 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
221 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
222 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
223 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
224 };
225 unsigned char *p;
226 const unsigned char *q, *end;
227
228 #define EQU (unsigned)-2
229 #define BAD (unsigned)-1
230 #define uchar64(c) ((c) >= sizeof(b64index) ? BAD : (unsigned)b64index[(c)])
231
232 p = (unsigned char *)bin;
233 q = (const unsigned char *)b64;
234 for (end = q + cnt; q < end; q += 4) {
235 unsigned a = uchar64(q[0]);
236 unsigned b = uchar64(q[1]);
237 unsigned c = uchar64(q[2]);
238 unsigned d = uchar64(q[3]);
239
240 *p++ = ((a << 2) | ((b & 0x30) >> 4));
241 if (c == EQU) { /* got '=' */
242 if (d != EQU)
243 return -1;
244 break;
245 }
246 *p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
247 if (d == EQU) { /* got '=' */
248 break;
249 }
250 *p++ = (((c & 0x03) << 6) | d);
251
252 if (a == BAD || b == BAD || c == BAD || d == BAD)
253 return -1;
254 }
255
256 #undef uchar64
257 #undef EQU
258 #undef BAD
259
260 return p - (unsigned char*)bin;
261 }
262
263 /*
264 * Encode a buffer as a base64 result.
265 *
266 * b64: buffer to hold the encoded (base64) result (see note).
267 * bin: buffer holding the binary source.
268 * cnt: number of bytes in the bin buffer to encode.
269 *
270 * NOTE: it is the callers responsibility to ensure that 'b64' is
271 * large enough to hold the result.
272 */
273 PUBLIC void
274 mime_bintob64(char *b64, const char *bin, size_t cnt)
275 {
276 static const char b64table[] =
277 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
278 const unsigned char *p = (const unsigned char*)bin;
279 ssize_t i;
280
281 for (i = cnt; i > 0; i -= 3) {
282 unsigned a = p[0];
283 unsigned b = p[1];
284 unsigned c = p[2];
285
286 b64[0] = b64table[a >> 2];
287 switch(i) {
288 case 1:
289 b64[1] = b64table[((a & 0x3) << 4)];
290 b64[2] = '=';
291 b64[3] = '=';
292 break;
293 case 2:
294 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
295 b64[2] = b64table[((b & 0xf) << 2)];
296 b64[3] = '=';
297 break;
298 default:
299 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
300 b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
301 b64[3] = b64table[c & 0x3f];
302 break;
303 }
304 p += 3;
305 b64 += 4;
306 }
307 }
308
309
310 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */
311
312 static void
313 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
314 {
315 static char b64[MIME_BASE64_LINE_MAX];
316 static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
317 size_t cnt;
318 char *cp;
319 size_t limit;
320 #ifdef __lint__
321 cookie = cookie;
322 #endif
323 limit = 0;
324 if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
325 limit = (size_t)atoi(cp);
326 if (limit == 0 || limit > sizeof(b64))
327 limit = sizeof(b64);
328
329 limit = 3 * roundup(limit, 4) / 4;
330 if (limit < 3)
331 limit = 3;
332
333 while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
334 mime_bintob64(b64, mem, (size_t)cnt);
335 (void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
336 (void)putc('\n', fo);
337 }
338 }
339
340 static void
341 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
342 {
343 char *line;
344 size_t len;
345 char *buf;
346 size_t buflen;
347
348 buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
349 buf = emalloc(buflen);
350
351 while ((line = fgetln(fi, &len)) != NULL) {
352 ssize_t binlen;
353 if (line[len-1] == '\n') /* forget the trailing newline */
354 len--;
355
356 /* trash trailing white space */
357 for (/*EMPTY*/; len > 0 && is_WSP(line[len-1]); len--)
358 continue;
359
360 /* skip leading white space */
361 for (/*EMPTY*/; len > 0 && is_WSP(line[0]); len--, line++)
362 continue;
363
364 if (len == 0)
365 break;
366
367 if (3 * len > 4 * buflen) {
368 buflen *= 2;
369 buf = erealloc(buf, buflen);
370 }
371
372 binlen = mime_b64tobin(buf, line, len);
373
374 if (binlen <= 0) {
375 (void)fprintf(fo, "WARN: invalid base64 encoding\n");
376 break;
377 }
378 (void)fwrite(buf, 1, (size_t)binlen, fo);
379 }
380
381 free(buf);
382
383 if (add_lf)
384 (void)fputc('\n', fo);
385 }
386
387
388 /************************************************************************
389 * Core quoted-printable routines.
390 *
391 * Note: the header QP routines are slightly different and burried
392 * inside mime_header.c
393 */
394
395 static int
396 mustquote(unsigned char *p, unsigned char *end, size_t l)
397 {
398 #define N 0 /* do not quote */
399 #define Q 1 /* must quote */
400 #define SP 2 /* white space */
401 #define XF 3 /* special character 'F' - maybe quoted */
402 #define XD 4 /* special character '.' - maybe quoted */
403 #define EQ Q /* '=' must be quoted */
404 #define TB SP /* treat '\t' as a space */
405 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
406 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */
407
408 static const signed char quotetab[] = {
409 Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q,
410 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
411 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N,
412 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N,
413
414 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N,
415 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
416 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
417 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
418 };
419 int flag = *p > 0x7f ? Q : quotetab[*p];
420
421 if (flag == N)
422 return 0;
423 if (flag == Q)
424 return 1;
425 if (flag == SP)
426 return p + 1 < end && p[1] == '\n'; /* trailing white space */
427
428 /* The remainder are special start-of-line cases. */
429 if (l != 0)
430 return 0;
431
432 if (flag == XF) /* line may start with "From" */
433 return p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm';
434
435 if (flag == XD) /* line may consist of a single dot */
436 return p + 1 < end && p[1] == '\n';
437
438 errx(EXIT_FAILURE,
439 "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
440 *p, *p, flag, l);
441 /* NOT REACHED */
442 return 0; /* appease GCC */
443
444 #undef N
445 #undef Q
446 #undef SP
447 #undef XX
448 #undef EQ
449 #undef TB
450 #undef NL
451 #undef CR
452 }
453
454
455 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */
456
457 static void
458 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
459 {
460 size_t l; /* length of current output line */
461 unsigned char *beg;
462 unsigned char *end;
463 unsigned char *p;
464
465 assert(limit <= MIME_QUOTED_LINE_MAX);
466
467 beg = (unsigned char*)line;
468 end = beg + len;
469 l = 0;
470 for (p = (unsigned char*)line; p < end; p++) {
471 if (mustquote(p, end, l)) {
472 if (l + 4 > limit) {
473 (void)fputs("=\n", fo);
474 l = 0;
475 }
476 (void)fprintf(fo, "=%02X", *p);
477 l += 3;
478 }
479 else {
480 if (*p == '\n') {
481 if (p > beg && p[-1] == '\r')
482 (void)fputs("=0A=", fo);
483 l = (size_t)-1;
484 }
485 else if (l + 2 > limit) {
486 (void)fputs("=\n", fo);
487 l = 0;
488 }
489 (void)putc(*p, fo);
490 l++;
491 }
492 }
493 /*
494 * Lines ending in a blank must escape the newline.
495 */
496 if (len && is_WSP(p[-1]))
497 (void)fputs("=\n", fo);
498 }
499
500 static void
501 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
502 {
503 char *line;
504 size_t len;
505 char *cp;
506 size_t limit;
507
508 #ifdef __lint__
509 cookie = cookie;
510 #endif
511 limit = 0;
512 if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
513 limit = (size_t)atoi(cp);
514 if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
515 limit = MIME_QUOTED_LINE_MAX;
516 if (limit < 4)
517 limit = 4;
518
519 while ((line = fgetln(fi, &len)) != NULL)
520 fput_quoted_line(fo, line, len, limit);
521 }
522
523 static void
524 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
525 {
526 char *line;
527 size_t len;
528
529 #ifdef __lint__
530 cookie = cookie;
531 #endif
532 while ((line = fgetln(fi, &len)) != NULL) {
533 char *p;
534 char *end;
535
536 end = line + len;
537 for (p = line; p < end; p++) {
538 if (*p == '=') {
539 p++;
540 while (p < end && is_WSP(*p))
541 p++;
542 if (*p != '\n' && p + 1 < end) {
543 int c;
544 char buf[3];
545
546 buf[0] = *p++;
547 buf[1] = *p;
548 buf[2] = '\0';
549 c = (int)strtol(buf, NULL, 16);
550 (void)fputc(c, fo);
551 }
552 }
553 else
554 (void)fputc(*p, fo);
555 }
556 }
557 }
558
559
560 /************************************************************************
561 * Routines to select the codec by name.
562 */
563
564 PUBLIC void
565 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
566 {
567 int c;
568
569 #ifdef __lint__
570 cookie = cookie;
571 #endif
572 while ((c = getc(fi)) != EOF)
573 (void)putc(c, fo);
574
575 (void)fflush(fo);
576 if (ferror(fi)) {
577 warn("read");
578 rewind(fi);
579 return;
580 }
581 if (ferror(fo)) {
582 warn("write");
583 (void)Fclose(fo);
584 rewind(fi);
585 return;
586 }
587 }
588
589
590 static const struct transfer_encoding_s {
591 const char *name;
592 mime_codec_t enc;
593 mime_codec_t dec;
594 } transfer_encoding_tbl[] = {
595 { MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy },
596 { MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy },
597 { MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy },
598 { MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode },
599 { MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode },
600 { NULL, NULL, NULL },
601 };
602
603
604 PUBLIC mime_codec_t
605 mime_fio_encoder(const char *ename)
606 {
607 const struct transfer_encoding_s *tep = NULL;
608
609 if (ename == NULL)
610 return NULL;
611
612 for (tep = transfer_encoding_tbl; tep->name; tep++)
613 if (strcasecmp(tep->name, ename) == 0)
614 break;
615 return tep->enc;
616 }
617
618 PUBLIC mime_codec_t
619 mime_fio_decoder(const char *ename)
620 {
621 const struct transfer_encoding_s *tep = NULL;
622
623 if (ename == NULL)
624 return NULL;
625
626 for (tep = transfer_encoding_tbl; tep->name; tep++)
627 if (strcasecmp(tep->name, ename) == 0)
628 break;
629 return tep->dec;
630 }
631
632 /*
633 * This is for use in complete.c and mime.c to get the list of
634 * encoding names without exposing the transfer_encoding_tbl[]. The
635 * first name is returned if called with a pointer to a NULL pointer.
636 * Subsequent calls with the same cookie give successive names. A
637 * NULL return indicates the end of the list.
638 */
639 PUBLIC const char *
640 mime_next_encoding_name(const void **cookie)
641 {
642 const struct transfer_encoding_s *tep;
643
644 tep = *cookie;
645 if (tep == NULL)
646 tep = transfer_encoding_tbl;
647
648 *cookie = tep->name ? &tep[1] : NULL;
649
650 return tep->name;
651 }
652
653 #endif /* MIME_SUPPORT */
654