mime_codecs.c revision 1.4 1 /* $NetBSD: mime_codecs.c,v 1.4 2006/10/24 19:57:05 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Anon Ymous.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * This module contains all mime related codecs. Typically there are
41 * two versions: one operating on buffers and one operating on files.
42 * All exported routines have a "mime_" prefix. The file oriented
43 * routines have a "mime_f" prefix replacing the "mime_" prefix of the
44 * equivalent buffer based version.
45 *
46 * The file based API should be:
47 *
48 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
49 *
50 * XXX - currently this naming convention has not been adheared to.
51 *
52 * where the cookie is a generic way to pass arguments to the routine.
53 * This way these routines can be run by run_function() in mime.c.
54 *
55 * The buffer based API is not as rigid.
56 */
57
58 #ifdef MIME_SUPPORT
59
60 #include <sys/cdefs.h>
61 #ifndef __lint__
62 __RCSID("$NetBSD: mime_codecs.c,v 1.4 2006/10/24 19:57:05 christos Exp $");
63 #endif /* not __lint__ */
64
65 #include <assert.h>
66 #include <iconv.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <util.h>
70
71 #include "def.h"
72 #include "extern.h"
73 #include "mime_codecs.h"
74
75
76 #ifdef CHARSET_SUPPORT
77 /************************************************************************
78 * Core character set conversion routines.
79 *
80 */
81
82 /*
83 * Fault-tolerant iconv() function.
84 *
85 * This routine was borrowed from nail-11.25/mime.c and modified. It
86 * tries to handle errno == EILSEQ by restarting at the next input
87 * byte (is this a good idea?). All other errors are handled by the
88 * caller.
89 */
90 PUBLIC size_t
91 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
92 {
93 size_t sz = 0;
94
95 while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
96 && errno == EILSEQ) {
97 if (*outbleft > 0) {
98 *(*outb)++ = '?';
99 (*outbleft)--;
100 } else {
101 **outb = '\0';
102 return E2BIG;
103 }
104 if (*inbleft > 0) {
105 (*inb)++;
106 (*inbleft)--;
107 } else {
108 **outb = '\0';
109 break;
110 }
111 }
112 return sz;
113 }
114
115 /*
116 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
117 * We don't care about the invalid character count, so don't bother
118 * with __iconv(). We do care about robustness, so call iconv_ft()
119 * above to try to recover from errors.
120 */
121 #define INBUFSIZE 1024
122 #define OUTBUFSIZE (INBUFSIZE * 2)
123
124 PUBLIC void
125 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
126 {
127 char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
128 const char *in;
129 size_t inbytes, outbytes, ret;
130 iconv_t cd;
131
132 /*
133 * NOTE: iconv_t is actually a pointer typedef, so this
134 * conversion is not what it appears to be!
135 */
136 cd = (iconv_t)cookie;
137
138 while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
139 in = inbuf;
140 while (inbytes > 0) {
141 out = outbuf;
142 outbytes = OUTBUFSIZE;
143 ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
144 if (ret == (size_t)-1 && errno != E2BIG) {
145 if (errno != EINVAL || in == inbuf) {
146 /* XXX - what is proper here?
147 * Just copy out the remains? */
148 (void)fprintf(fo,
149 "\n\t[ iconv truncated message: %s ]\n\n",
150 strerror(errno));
151 return;
152 }
153 /*
154 * If here: errno == EINVAL && in != inbuf
155 */
156 /* incomplete input character */
157 (void)memmove(inbuf, in, inbytes);
158 ret = fread(inbuf + inbytes, 1,
159 INBUFSIZE - inbytes, fi);
160 if (ret == 0) {
161 if (feof(fi)) {
162 (void)fprintf(fo,
163 "\n\t[ unexpected end of file; "
164 "the last character is "
165 "incomplete. ]\n\n");
166 return;
167 }
168 (void)fprintf(fo,
169 "\n\t[ fread(): %s ]\n\n",
170 strerror(errno));
171 return;
172 }
173 in = inbuf;
174 inbytes += ret;
175
176 }
177 if (outbytes < OUTBUFSIZE)
178 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
179 }
180 }
181 /* reset the shift state of the output buffer */
182 outbytes = OUTBUFSIZE;
183 out = outbuf;
184 ret = iconv(cd, NULL, NULL, &out, &outbytes);
185 if (ret == (size_t)-1) {
186 (void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
187 strerror(errno));
188 return;
189 }
190 if (outbytes < OUTBUFSIZE)
191 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
192 }
193
194 #endif /* CHARSET_SUPPORT */
195
196
197
198 /************************************************************************
199 * Core base64 routines
200 *
201 * Defined in sec 6.8 of RFC 2045.
202 */
203
204 /*
205 * Decode a base64 buffer.
206 *
207 * bin: buffer to hold the decoded (binary) result (see note 1).
208 * b64: buffer holding the encoded (base64) source.
209 * cnt: number of bytes in the b64 buffer to decode (see note 2).
210 *
211 * Return: the number of bytes written to the 'bin' buffer or -1 on
212 * error.
213 * NOTES:
214 * 1) It is the callers responsibility to ensure that bin is large
215 * enough to hold the result.
216 * 2) The b64 buffer should always contain a multiple of 4 bytes of
217 * data!
218 */
219 PUBLIC ssize_t
220 mime_b64tobin(char *bin, const char *b64, size_t cnt)
221 {
222 static const signed char b64index[] = {
223 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
224 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
225 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
226 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
227 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
228 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
229 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
230 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
231 };
232 unsigned char *p;
233 const unsigned char *q, *end;
234
235 #define EQU (unsigned)-2
236 #define BAD (unsigned)-1
237 #define uchar64(c) (unsigned)((c) >= sizeof(b64index) ? BAD : b64index[(c)])
238
239 p = (unsigned char *)bin;
240 q = (const unsigned char *)b64;
241 for (end = q + cnt; q < end; q += 4) {
242 unsigned a = uchar64(q[0]);
243 unsigned b = uchar64(q[1]);
244 unsigned c = uchar64(q[2]);
245 unsigned d = uchar64(q[3]);
246
247 *p++ = ((a << 2) | ((b & 0x30) >> 4));
248 if (c == EQU) { /* got '=' */
249 if (d != EQU)
250 return -1;
251 break;
252 }
253 *p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
254 if (d == EQU) { /* got '=' */
255 break;
256 }
257 *p++ = (((c & 0x03) << 6) | d);
258
259 if (a == BAD || b == BAD || c == BAD || d == BAD)
260 return -1;
261 }
262
263 #undef uchar64
264 #undef EQU
265 #undef BAD
266
267 return p - (unsigned char*)bin;
268 }
269
270 /*
271 * Encode a buffer as a base64 result.
272 *
273 * b64: buffer to hold the encoded (base64) result (see note).
274 * bin: buffer holding the binary source.
275 * cnt: number of bytes in the bin buffer to encode.
276 *
277 * NOTE: it is the callers responsibility to ensure that 'b64' is
278 * large enough to hold the result.
279 */
280 PUBLIC void
281 mime_bintob64(char *b64, const char *bin, size_t cnt)
282 {
283 static const char b64table[] =
284 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
285 const unsigned char *p = (const unsigned char*)bin;
286 int i;
287
288 for (i = cnt; i > 0; i -= 3) {
289 unsigned a = p[0];
290 unsigned b = p[1];
291 unsigned c = p[2];
292
293 b64[0] = b64table[a >> 2];
294 switch(i) {
295 case 1:
296 b64[1] = b64table[((a & 0x3) << 4)];
297 b64[2] = '=';
298 b64[3] = '=';
299 break;
300 case 2:
301 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
302 b64[2] = b64table[((b & 0xf) << 2)];
303 b64[3] = '=';
304 break;
305 default:
306 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
307 b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
308 b64[3] = b64table[c & 0x3f];
309 break;
310 }
311 p += 3;
312 b64 += 4;
313 }
314 }
315
316
317 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */
318
319 static void
320 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
321 {
322 static char b64[MIME_BASE64_LINE_MAX];
323 static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
324 int cnt;
325 char *cp;
326 size_t limit;
327 #ifdef __lint__
328 cookie = cookie;
329 #endif
330 limit = 0;
331 if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
332 limit = (size_t)atoi(cp);
333 if (limit == 0 || limit > sizeof(b64))
334 limit = sizeof(b64);
335
336 limit = 3 * roundup(limit, 4) / 4;
337 if (limit < 3)
338 limit = 3;
339
340 while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
341 mime_bintob64(b64, mem, (size_t)cnt);
342 (void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
343 (void)putc('\n', fo);
344 }
345 }
346
347 static void
348 mime_fB64_decode(FILE *fi, FILE *fo, void *add_lf)
349 {
350 char *line;
351 size_t len;
352 char *buf;
353 size_t buflen;
354
355 buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
356 buf = emalloc(buflen);
357
358 while ((line = fgetln(fi, &len)) != NULL) {
359 ssize_t binlen;
360 if (line[len-1] == '\n') /* forget the trailing newline */
361 len--;
362
363 /* trash trailing white space */
364 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[len-1]); len--)
365 continue;
366
367 /* skip leading white space */
368 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[0]); len--, line++)
369 continue;
370
371 if (len == 0)
372 break;
373
374 if (3 * len > 4 * buflen) {
375 buflen *= 2;
376 buf = erealloc(buf, buflen);
377 }
378
379 binlen = mime_b64tobin(buf, line, len);
380
381 if (binlen <= 0) {
382 (void)fprintf(fo, "WARN: invalid base64 encoding\n");
383 break;
384 }
385 (void)fwrite(buf, 1, (size_t)binlen, fo);
386 }
387
388 free(buf);
389
390 if (add_lf)
391 (void)fputc('\n', fo);
392 }
393
394
395 /************************************************************************
396 * Core quoted-printable routines.
397 *
398 * Note: the header QP routines are slightly different and burried
399 * inside mime_header.c
400 */
401
402 static int
403 mustquote(unsigned char *p, unsigned char *end, size_t l)
404 {
405 #define N 0 /* do not quote */
406 #define Q 1 /* must quote */
407 #define SP 2 /* white space */
408 #define XF 3 /* special character 'F' - maybe quoted */
409 #define XD 4 /* special character '.' - maybe quoted */
410 #define EQ Q /* '=' must be quoted */
411 #define TB SP /* treat '\t' as a space */
412 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
413 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */
414
415 static const signed char quotetab[] = {
416 Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q,
417 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
418 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N,
419 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N,
420
421 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N,
422 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
423 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
424 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
425 };
426 int flag = *p > 0x7f ? Q : quotetab[*p];
427
428 if (flag == N)
429 return 0;
430 if (flag == Q)
431 return 1;
432 if (flag == SP)
433 return (p + 1 < end && p[1] == '\n'); /* trailing white space */
434
435 /* The remainder are special start-of-line cases. */
436 if (l != 0)
437 return 0;
438
439 if (flag == XF) /* line may start with "From" */
440 return (p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm');
441
442 if (flag == XD) /* line may consist of a single dot */
443 return (p + 1 < end && p[1] == '\n');
444
445 errx(EXIT_FAILURE, "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
446 *p, *p, flag, l);
447 /* NOT REACHED */
448 return 0; /* appease GCC */
449
450 #undef N
451 #undef Q
452 #undef SP
453 #undef XX
454 #undef EQ
455 #undef TB
456 #undef NL
457 #undef CR
458 }
459
460
461 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */
462
463 static void
464 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
465 {
466 size_t l; /* length of current output line */
467 unsigned char *beg;
468 unsigned char *end;
469 unsigned char *p;
470
471 assert(limit <= MIME_QUOTED_LINE_MAX);
472
473 beg = (unsigned char*)line;
474 end = beg + len;
475 l = 0;
476 for (p = (unsigned char*)line; p < end; p++) {
477 if (mustquote(p, end, l)) {
478 if (l + 4 > limit) {
479 (void)fputs("=\n", fo);
480 l = 0;
481 }
482 (void)fprintf(fo, "=%02X", *p);
483 l += 3;
484 }
485 else {
486 if (*p == '\n') {
487 if (p > beg && p[-1] == '\r')
488 (void)fputs("=0A=", fo);
489 l = (size_t)-1;
490 }
491 else if (l + 2 > limit) {
492 (void)fputs("=\n", fo);
493 l = 0;
494 }
495 (void)putc(*p, fo);
496 l++;
497 }
498 }
499 /*
500 * Lines ending in a blank must escape the newline.
501 */
502 if (len && isblank((unsigned char)p[-1]))
503 (void)fputs("=\n", fo);
504 }
505
506 static void
507 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
508 {
509 char *line;
510 size_t len;
511 char *cp;
512 size_t limit;
513
514 #ifdef __lint__
515 cookie = cookie;
516 #endif
517 limit = 0;
518 if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
519 limit = (size_t)atoi(cp);
520 if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
521 limit = MIME_QUOTED_LINE_MAX;
522 if (limit < 4)
523 limit = 4;
524
525 while ((line = fgetln(fi, &len)) != NULL)
526 fput_quoted_line(fo, line, len, limit);
527 }
528
529 static void
530 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
531 {
532 char *line;
533 size_t len;
534
535 #ifdef __lint__
536 cookie = cookie;
537 #endif
538 while ((line = fgetln(fi, &len)) != NULL) {
539 int c;
540 char *p;
541 char *end;
542 end = line + len;
543 for (p = line; p < end; p++) {
544 if (*p == '=') {
545 p++;
546 while (p < end && isblank((unsigned char)*p))
547 p++;
548 if (*p != '\n' && p + 1 < end) {
549 char buf[3];
550 buf[0] = *p++;
551 buf[1] = *p;
552 buf[2] = '\0';
553 c = strtol(buf, NULL, 16);
554 (void)fputc(c, fo);
555 }
556 }
557 else
558 (void)fputc(*p, fo);
559 }
560 }
561 }
562
563
564 /************************************************************************
565 * Routines to select the codec by name.
566 */
567
568 PUBLIC void
569 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
570 {
571 int c;
572
573 #ifdef __lint__
574 cookie = cookie;
575 #endif
576 while ((c = getc(fi)) != EOF)
577 (void)putc(c, fo);
578
579 (void)fflush(fo);
580 if (ferror(fi)) {
581 warn("read");
582 rewind(fi);
583 return;
584 }
585 if (ferror(fo)) {
586 warn("write");
587 (void)Fclose(fo);
588 rewind(fi);
589 return;
590 }
591 }
592
593
594 static const struct transfer_encoding_s {
595 const char *name;
596 mime_codec_t enc;
597 mime_codec_t dec;
598 } transfer_encoding_tbl[] = {
599 { MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy },
600 { MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy },
601 { MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy },
602 { MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode },
603 { MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode },
604 { NULL, NULL, NULL },
605 };
606
607
608 PUBLIC mime_codec_t
609 mime_fio_encoder(const char *ename)
610 {
611 const struct transfer_encoding_s *tep = NULL;
612
613 if (ename == NULL)
614 return NULL;
615
616 for (tep = transfer_encoding_tbl; tep->name; tep++)
617 if (strcasecmp(tep->name, ename) == 0)
618 break;
619 return tep->enc;
620 }
621
622 PUBLIC mime_codec_t
623 mime_fio_decoder(const char *ename)
624 {
625 const struct transfer_encoding_s *tep = NULL;
626
627 if (ename == NULL)
628 return NULL;
629
630 for (tep = transfer_encoding_tbl; tep->name; tep++)
631 if (strcasecmp(tep->name, ename) == 0)
632 break;
633 return tep->dec;
634 }
635
636 /*
637 * This is for use in complete.c and mime.c to get the list of
638 * encoding names without exposing the transfer_encoding_tbl[]. The
639 * first name is returned if called with a pointer to a NULL pointer.
640 * Subsequent calls with the same cookie give successive names. A
641 * NULL return indicates the end of the list.
642 */
643 PUBLIC const char *
644 mime_next_encoding_name(const void **cookie)
645 {
646 const struct transfer_encoding_s *tep;
647
648 tep = *cookie;
649 if (tep == NULL)
650 tep = transfer_encoding_tbl;
651
652 *cookie = tep->name ? &tep[1] : NULL;
653
654 return tep->name;
655 }
656
657
658 #endif /* MIME_SUPPORT */
659