mime_codecs.c revision 1.2 1 /* $NetBSD: mime_codecs.c,v 1.2 2006/10/22 08:29:36 mrg Exp $ */
2
3 /*-
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Anon Ymous.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * This module contains all mime related codecs. Typically there are
41 * two versions: one operating on buffers and one operating on files.
42 * All exported routines have a "mime_" prefix. The file oriented
43 * routines have a "mime_f" prefix replacing the "mime_" prefix of the
44 * equivalent buffer based version.
45 *
46 * The file based API should be:
47 *
48 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
49 *
50 * XXX - currently this naming convention has not been adheared to.
51 *
52 * where the cookie is a generic way to pass arguments to the routine.
53 * This way these routines can be run by run_function() in mime.c.
54 *
55 * The buffer based API is not as rigid.
56 */
57
58 #ifdef MIME_SUPPORT
59
60 #include <sys/cdefs.h>
61 #ifndef __lint__
62 __RCSID("$NetBSD: mime_codecs.c,v 1.2 2006/10/22 08:29:36 mrg Exp $");
63 #endif /* not __lint__ */
64
65 #include <assert.h>
66 #include <iconv.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <util.h>
70
71 #include "def.h"
72 #include "extern.h"
73 #include "mime_codecs.h"
74
75
76 #ifdef CHARSET_SUPPORT
77 /************************************************************************
78 * Core character set conversion routines.
79 *
80 */
81
82 /*
83 * Fault-tolerant iconv() function.
84 *
85 * This routine was borrowed from nail-11.25/mime.c and modified. It
86 * tries to handle errno == EILSEQ by restarting at the next input
87 * byte (is this a good idea?). All other errors are handled by the
88 * caller.
89 */
90 PUBLIC size_t
91 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
92 {
93 size_t sz = 0;
94
95 while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
96 && errno == EILSEQ) {
97 if (*outbleft > 0) {
98 *(*outb)++ = '?';
99 (*outbleft)--;
100 } else {
101 **outb = '\0';
102 return E2BIG;
103 }
104 if (*inbleft > 0) {
105 (*inb)++;
106 (*inbleft)--;
107 } else {
108 **outb = '\0';
109 break;
110 }
111 }
112 return sz;
113 }
114
115 /*
116 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
117 * We don't care about the invalid character count, so don't bother
118 * with __iconv(). We do care about robustness, so call iconv_ft()
119 * above to try to recover from errors.
120 */
121 #define INBUFSIZE 1024
122 #define OUTBUFSIZE (INBUFSIZE * 2)
123
124 PUBLIC void
125 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
126 {
127 char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
128 const char *in;
129 size_t inbytes, outbytes, ret;
130 iconv_t cd;
131
132 /*
133 * NOTE: iconv_t is actually a pointer typedef, so this
134 * conversion is not what it appears to be!
135 */
136 cd = (iconv_t)cookie;
137
138 while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
139 in = inbuf;
140 while (inbytes > 0) {
141 out = outbuf;
142 outbytes = OUTBUFSIZE;
143 ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
144 if (ret == (size_t)-1 && errno != E2BIG) {
145 if (errno != EINVAL || in == inbuf) {
146 /* XXX - what is proper here?
147 * Just copy out the remains? */
148 (void)fprintf(fo,
149 "\n\t[ iconv truncated message: %s ]\n\n",
150 strerror(errno));
151 return;
152 }
153 /*
154 * If here: errno == EINVAL && in != inbuf
155 */
156 /* incomplete input character */
157 (void)memmove(inbuf, in, inbytes);
158 ret = fread(inbuf + inbytes, 1,
159 INBUFSIZE - inbytes, fi);
160 if (ret == 0) {
161 if (feof(fi)) {
162 (void)fprintf(fo,
163 "\n\t[ unexpected end of file; "
164 "the last character is "
165 "incomplete. ]\n\n");
166 return;
167 }
168 (void)fprintf(fo,
169 "\n\t[ fread(): %s ]\n\n",
170 strerror(errno));
171 return;
172 }
173 in = inbuf;
174 inbytes += ret;
175
176 }
177 if (outbytes < OUTBUFSIZE)
178 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
179 }
180 }
181 /* reset the shift state of the output buffer */
182 outbytes = OUTBUFSIZE;
183 out = outbuf;
184 ret = iconv(cd, NULL, NULL, &out, &outbytes);
185 if (ret == (size_t)-1) {
186 (void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
187 strerror(errno));
188 return;
189 }
190 if (outbytes < OUTBUFSIZE)
191 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
192 }
193
194 #endif /* CHARSET_SUPPORT */
195
196
197
198 /************************************************************************
199 * Core base64 routines
200 *
201 * Defined in sec 6.8 of RFC 2045.
202 */
203
204 /*
205 * Decode a base64 buffer.
206 *
207 * bin: buffer to hold the decoded (binary) result (see note 1).
208 * b64: buffer holding the encoded (base64) source.
209 * cnt: number of bytes in the b64 buffer to decode (see note 2).
210 *
211 * Return: the number of bytes written to the 'bin' buffer or -1 on
212 * error.
213 * NOTES:
214 * 1) It is the callers responsibility to ensure that bin is large
215 * enough to hold the result.
216 * 2) The b64 buffer should always contain a multiple of 4 bytes of
217 * data!
218 */
219 PUBLIC ssize_t
220 mime_b64tobin(char *bin, const char *b64, size_t cnt)
221 {
222 static const signed char b64index[] = {
223 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
224 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
225 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
226 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
227 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
228 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
229 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
230 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
231 };
232 unsigned char *p;
233 const char *end;
234
235 #define uchar64(c) (unsigned)((c) < 0 ? -1 : b64index[(int)(c)])
236 #define EQU (unsigned)-2
237 #define BAD (unsigned)-1
238
239 p = (unsigned char*)bin;
240 for (end = b64 + cnt; b64 < end; b64 += 4) {
241 unsigned a = uchar64(b64[0]);
242 unsigned b = uchar64(b64[1]);
243 unsigned c = uchar64(b64[2]);
244 unsigned d = uchar64(b64[3]);
245
246 *p++ = ((a << 2) | ((b & 0x30) >> 4));
247 if (c == EQU) { /* got '=' */
248 if (d != EQU)
249 return -1;
250 break;
251 }
252 *p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
253 if (d == EQU) { /* got '=' */
254 break;
255 }
256 *p++ = (((c & 0x03) << 6) | d);
257
258 if (a == BAD || b == BAD || c == BAD || d == BAD)
259 return -1;
260 }
261
262 #undef char64
263 #undef EQU
264 #undef BAD
265
266 return p - (unsigned char*)bin;
267 }
268
269 /*
270 * Encode a buffer as a base64 result.
271 *
272 * b64: buffer to hold the encoded (base64) result (see note).
273 * bin: buffer holding the binary source.
274 * cnt: number of bytes in the bin buffer to encode.
275 *
276 * NOTE: it is the callers responsibility to ensure that 'b64' is
277 * large enough to hold the result.
278 */
279 PUBLIC void
280 mime_bintob64(char *b64, const char *bin, size_t cnt)
281 {
282 static const char b64table[] =
283 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
284 const unsigned char *p = (const unsigned char*)bin;
285 int i;
286
287 for (i = cnt; i > 0; i -= 3) {
288 unsigned a = p[0];
289 unsigned b = p[1];
290 unsigned c = p[2];
291
292 b64[0] = b64table[a >> 2];
293 switch(i) {
294 case 1:
295 b64[1] = b64table[((a & 0x3) << 4)];
296 b64[2] = '=';
297 b64[3] = '=';
298 break;
299 case 2:
300 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
301 b64[2] = b64table[((b & 0xf) << 2)];
302 b64[3] = '=';
303 break;
304 default:
305 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
306 b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
307 b64[3] = b64table[c & 0x3f];
308 break;
309 }
310 p += 3;
311 b64 += 4;
312 }
313 }
314
315
316 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */
317
318 static void
319 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
320 {
321 static char b64[MIME_BASE64_LINE_MAX];
322 static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
323 int cnt;
324 char *cp;
325 size_t limit;
326 #ifdef __lint__
327 cookie = cookie;
328 #endif
329 limit = 0;
330 if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
331 limit = (size_t)atoi(cp);
332 if (limit == 0 || limit > sizeof(b64))
333 limit = sizeof(b64);
334
335 limit = 3 * roundup(limit, 4) / 4;
336 if (limit < 3)
337 limit = 3;
338
339 while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
340 mime_bintob64(b64, mem, (size_t)cnt);
341 (void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
342 (void)putc('\n', fo);
343 }
344 }
345
346 static void
347 mime_fB64_decode(FILE *fi, FILE *fo, void *cookie)
348 {
349 char *line;
350 size_t len;
351 char *buf;
352 size_t buflen;
353
354 buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
355 buf = emalloc(buflen);
356
357 while ((line = fgetln(fi, &len)) != NULL) {
358 ssize_t binlen;
359 if (line[len-1] == '\n') /* forget the trailing newline */
360 len--;
361
362 /* trash trailing white space */
363 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[len-1]); len--)
364 continue;
365
366 /* skip leading white space */
367 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[0]); len--, line++)
368 continue;
369
370 if (len == 0)
371 break;
372
373 if (3 * len > 4 * buflen) {
374 buflen *= 2;
375 buf = erealloc(buf, buflen);
376 }
377
378 binlen = mime_b64tobin(buf, line, len);
379
380 if (binlen <= 0) {
381 (void)fprintf(fo, "WARN: invalid base64 encoding\n");
382 break;
383 }
384 (void)fwrite(buf, 1, (size_t)binlen, fo);
385 }
386
387 free(buf);
388
389 if (cookie)
390 (void)fputc('\n', fo);
391 }
392
393
394 /************************************************************************
395 * Core quoted-printable routines.
396 *
397 * Note: the header QP routines are slightly different and burried
398 * inside mime_header.c
399 */
400
401 static int
402 mustquote(unsigned char *p, unsigned char *end, size_t l)
403 {
404 #define N 0 /* do not quote */
405 #define Q 1 /* must quote */
406 #define SP 2 /* white space */
407 #define XF 3 /* special character 'F' - maybe quoted */
408 #define XD 4 /* special character '.' - maybe quoted */
409 #define EQ Q /* '=' must be quoted */
410 #define TB SP /* treat '\t' as a space */
411 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
412 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */
413
414 static const signed char quotetab[] = {
415 Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q,
416 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
417 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N,
418 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N,
419
420 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N,
421 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
422 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
423 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
424 };
425 int flag = *p > 0x7f ? Q : quotetab[*p];
426
427 if (flag == N)
428 return 0;
429 if (flag == Q)
430 return 1;
431 if (flag == SP)
432 return (p + 1 < end && p[1] == '\n'); /* trailing white space */
433
434 /* The remainder are special start-of-line cases. */
435 if (l != 0)
436 return 0;
437
438 if (flag == XF) /* line may start with "From" */
439 return (p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm');
440
441 if (flag == XD) /* line may consist of a single dot */
442 return (p + 1 < end && p[1] == '\n');
443
444 errx(EXIT_FAILURE, "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%zu\n",
445 *p, *p, flag, l);
446 /* NOT REACHED */
447 return 0; /* appease GCC */
448
449 #undef N
450 #undef Q
451 #undef SP
452 #undef XX
453 #undef EQ
454 #undef TB
455 #undef NL
456 #undef CR
457 }
458
459
460 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */
461
462 static void
463 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
464 {
465 size_t l; /* length of current output line */
466 unsigned char *beg;
467 unsigned char *end;
468 unsigned char *p;
469
470 assert(limit <= MIME_QUOTED_LINE_MAX);
471
472 beg = (unsigned char*)line;
473 end = beg + len;
474 l = 0;
475 for (p = (unsigned char*)line; p < end; p++) {
476 if (mustquote(p, end, l)) {
477 if (l + 4 > limit) {
478 (void)fputs("=\n", fo);
479 l = 0;
480 }
481 (void)fprintf(fo, "=%02X", *p);
482 l += 3;
483 }
484 else {
485 if (*p == '\n') {
486 if (p > beg && p[-1] == '\r')
487 (void)fputs("=0A=", fo);
488 l = (size_t)-1;
489 }
490 else if (l + 2 > limit) {
491 (void)fputs("=\n", fo);
492 l = 0;
493 }
494 (void)putc(*p, fo);
495 l++;
496 }
497 }
498 /*
499 * Lines ending in a blank must escape the newline.
500 */
501 if (len && isblank((unsigned char)p[-1]))
502 (void)fputs("=\n", fo);
503 }
504
505 static void
506 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
507 {
508 char *line;
509 size_t len;
510 char *cp;
511 size_t limit;
512
513 #ifdef __lint__
514 cookie = cookie;
515 #endif
516 limit = 0;
517 if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
518 limit = (size_t)atoi(cp);
519 if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
520 limit = MIME_QUOTED_LINE_MAX;
521 if (limit < 4)
522 limit = 4;
523
524 while ((line = fgetln(fi, &len)) != NULL)
525 fput_quoted_line(fo, line, len, limit);
526 }
527
528 static void
529 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
530 {
531 char *line;
532 size_t len;
533
534 #ifdef __lint__
535 cookie = cookie;
536 #endif
537 while ((line = fgetln(fi, &len)) != NULL) {
538 int c;
539 char *p;
540 char *end;
541 end = line + len;
542 for (p = line; p < end; p++) {
543 if (*p == '=') {
544 p++;
545 while (p < end && isblank((unsigned char)*p))
546 p++;
547 if (*p != '\n' && p + 1 < end) {
548 char buf[3];
549 buf[0] = *p++;
550 buf[1] = *p;
551 buf[2] = '\0';
552 c = strtol(buf, NULL, 16);
553 (void)fputc(c, fo);
554 }
555 }
556 else
557 (void)fputc(*p, fo);
558 }
559 }
560 }
561
562
563 /************************************************************************
564 * Routines to select the codec by name.
565 */
566
567 PUBLIC void
568 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
569 {
570 int c;
571
572 #ifdef __lint__
573 cookie = cookie;
574 #endif
575 while ((c = getc(fi)) != EOF)
576 (void)putc(c, fo);
577
578 (void)fflush(fo);
579 if (ferror(fi)) {
580 warn("read");
581 rewind(fi);
582 return;
583 }
584 if (ferror(fo)) {
585 warn("write");
586 (void)Fclose(fo);
587 rewind(fi);
588 return;
589 }
590 }
591
592
593 static const struct transfer_encoding_s {
594 const char *name;
595 mime_codec_t enc;
596 mime_codec_t dec;
597 } transfer_encoding_tbl[] = {
598 { MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy },
599 { MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy },
600 { MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy },
601 { MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode },
602 { MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode },
603 { NULL, NULL, NULL },
604 };
605
606
607 PUBLIC mime_codec_t
608 mime_fio_encoder(const char *ename)
609 {
610 const struct transfer_encoding_s *tep = NULL;
611
612 if (ename == NULL)
613 return NULL;
614
615 for (tep = transfer_encoding_tbl; tep->name; tep++)
616 if (strcasecmp(tep->name, ename) == 0)
617 break;
618 return tep->enc;
619 }
620
621 PUBLIC mime_codec_t
622 mime_fio_decoder(const char *ename)
623 {
624 const struct transfer_encoding_s *tep = NULL;
625
626 if (ename == NULL)
627 return NULL;
628
629 for (tep = transfer_encoding_tbl; tep->name; tep++)
630 if (strcasecmp(tep->name, ename) == 0)
631 break;
632 return tep->dec;
633 }
634
635 /*
636 * This is for use in complete.c and mime.c to get the list of
637 * encoding names without exposing the transfer_encoding_tbl[]. The
638 * first name is returned if called with a pointer to a NULL pointer.
639 * Subsequent calls with the same cookie give successive names. A
640 * NULL return indicates the end of the list.
641 */
642 PUBLIC const char *
643 mime_next_encoding_name(const void **cookie)
644 {
645 const struct transfer_encoding_s *tep;
646
647 tep = *cookie;
648 if (tep == NULL)
649 tep = transfer_encoding_tbl;
650
651 *cookie = tep->name ? &tep[1] : NULL;
652
653 return tep->name;
654 }
655
656
657 #endif /* MIME_SUPPORT */
658