mime_codecs.c revision 1.1 1 /* $NetBSD: mime_codecs.c,v 1.1 2006/10/21 21:37:21 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2006 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Anon Ymous.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
37 */
38
39 /*
40 * This module contains all mime related codecs. Typically there are
41 * two versions: one operating on buffers and one operating on files.
42 * All exported routines have a "mime_" prefix. The file oriented
43 * routines have a "mime_f" prefix replacing the "mime_" prefix of the
44 * equivalent buffer based version.
45 *
46 * The file based API should be:
47 *
48 * mime_f<name>_{encode,decode}(FILE *in, FILE *out, void *cookie)
49 *
50 * XXX - currently this naming convention has not been adheared to.
51 *
52 * where the cookie is a generic way to pass arguments to the routine.
53 * This way these routines can be run by run_function() in mime.c.
54 *
55 * The buffer based API is not as rigid.
56 */
57
58 #ifdef MIME_SUPPORT
59
60 #include <sys/cdefs.h>
61 #ifndef __lint__
62 __RCSID("$NetBSD: mime_codecs.c,v 1.1 2006/10/21 21:37:21 christos Exp $");
63 #endif /* not __lint__ */
64
65 #include <assert.h>
66 #include <iconv.h>
67 #include <stdio.h>
68 #include <stdlib.h>
69 #include <util.h>
70
71 #include "def.h"
72 #include "extern.h"
73 #include "mime_codecs.h"
74
75
76 #ifdef CHARSET_SUPPORT
77 /************************************************************************
78 * Core character set conversion routines.
79 *
80 */
81
82 /*
83 * Fault-tolerant iconv() function.
84 *
85 * This routine was borrowed from nail-11.25/mime.c and modified. It
86 * tries to handle errno == EILSEQ by restarting at the next input
87 * byte (is this a good idea?). All other errors are handled by the
88 * caller.
89 */
90 PUBLIC size_t
91 mime_iconv(iconv_t cd, const char **inb, size_t *inbleft, char **outb, size_t *outbleft)
92 {
93 size_t sz = 0;
94
95 while ((sz = iconv(cd, inb, inbleft, outb, outbleft)) == (size_t)-1
96 && errno == EILSEQ) {
97 if (*outbleft > 0) {
98 *(*outb)++ = '?';
99 (*outbleft)--;
100 } else {
101 **outb = '\0';
102 return E2BIG;
103 }
104 if (*inbleft > 0) {
105 (*inb)++;
106 (*inbleft)--;
107 } else {
108 **outb = '\0';
109 break;
110 }
111 }
112 return sz;
113 }
114
115 /*
116 * This routine was mostly borrowed from src/usr.bin/iconv/iconv.c.
117 * We don't care about the invalid character count, so don't bother
118 * with __iconv(). We do care about robustness, so call iconv_ft()
119 * above to try to recover from errors.
120 */
121 #define INBUFSIZE 1024
122 #define OUTBUFSIZE (INBUFSIZE * 2)
123
124 PUBLIC void
125 mime_ficonv(FILE *fi, FILE *fo, void *cookie)
126 {
127 char inbuf[INBUFSIZE], outbuf[OUTBUFSIZE], *out;
128 const char *in;
129 size_t inbytes, outbytes, ret;
130 iconv_t cd;
131
132 /*
133 * NOTE: iconv_t is actually a pointer typedef, so this
134 * conversion is not what it appears to be!
135 */
136 cd = (iconv_t)cookie;
137
138 while ((inbytes = fread(inbuf, 1, INBUFSIZE, fi)) > 0) {
139 in = inbuf;
140 while (inbytes > 0) {
141 out = outbuf;
142 outbytes = OUTBUFSIZE;
143 ret = mime_iconv(cd, &in, &inbytes, &out, &outbytes);
144 if (ret == (size_t)-1 && errno != E2BIG) {
145 if (errno != EINVAL || in == inbuf) {
146 /* XXX - what is proper here?
147 * Just copy out the remains? */
148 (void)fprintf(fo,
149 "\n\t[ iconv truncated message: %s ]\n\n",
150 strerror(errno));
151 return;
152 }
153 /*
154 * If here: errno == EINVAL && in != inbuf
155 */
156 /* incomplete input character */
157 (void)memmove(inbuf, in, inbytes);
158 ret = fread(inbuf + inbytes, 1,
159 INBUFSIZE - inbytes, fi);
160 if (ret == 0) {
161 if (feof(fi)) {
162 (void)fprintf(fo,
163 "\n\t[ unexpected end of file; "
164 "the last character is "
165 "incomplete. ]\n\n");
166 return;
167 }
168 (void)fprintf(fo,
169 "\n\t[ fread(): %s ]\n\n",
170 strerror(errno));
171 return;
172 }
173 in = inbuf;
174 inbytes += ret;
175
176 }
177 if (outbytes < OUTBUFSIZE)
178 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
179 }
180 }
181 /* reset the shift state of the output buffer */
182 outbytes = OUTBUFSIZE;
183 out = outbuf;
184 ret = iconv(cd, NULL, NULL, &out, &outbytes);
185 if (ret == (size_t)-1) {
186 (void)fprintf(fo, "\n\t[ iconv(): %s ]\n\n",
187 strerror(errno));
188 return;
189 }
190 if (outbytes < OUTBUFSIZE)
191 (void)fwrite(outbuf, 1, OUTBUFSIZE - outbytes, fo);
192 }
193
194 #endif /* CHARSET_SUPPORT */
195
196
197
198 /************************************************************************
199 * Core base64 routines
200 *
201 * Defined in sec 6.8 of RFC 2045.
202 */
203
204 /*
205 * Decode a base64 buffer.
206 *
207 * bin: buffer to hold the decoded (binary) result (see note 1).
208 * b64: buffer holding the encoded (base64) source.
209 * cnt: number of bytes in the b64 buffer to decode (see note 2).
210 *
211 * Return: the number of bytes written to the 'bin' buffer or -1 on
212 * error.
213 * NOTES:
214 * 1) It is the callers responsibility to ensure that bin is large
215 * enough to hold the result.
216 * 2) The b64 buffer should always contain a multiple of 4 bytes of
217 * data!
218 */
219 PUBLIC ssize_t
220 mime_b64tobin(char *bin, const char *b64, size_t cnt)
221 {
222 static const signed char b64index[] = {
223 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
224 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
225 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
226 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-2,-1,-1,
227 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
228 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
229 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
230 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
231 };
232 unsigned char *p;
233 const char *end;
234
235 #define uchar64(c) (unsigned)((c) < 0 ? -1 : b64index[(int)(c)])
236 #define EQU (unsigned)-2
237 #define BAD (unsigned)-1
238
239 p = (unsigned char*)bin;
240 for (end = b64 + cnt; b64 < end; b64 += 4) {
241 unsigned a = uchar64(b64[0]);
242 unsigned b = uchar64(b64[1]);
243 unsigned c = uchar64(b64[2]);
244 unsigned d = uchar64(b64[3]);
245
246 *p++ = ((a << 2) | ((b & 0x30) >> 4));
247 if (c == EQU) { /* got '=' */
248 if (d != EQU)
249 return -1;
250 break;
251 }
252 *p++ = (((b & 0x0f) << 4) | ((c & 0x3c) >> 2));
253 if (d == EQU) { /* got '=' */
254 break;
255 }
256 *p++ = (((c & 0x03) << 6) | d);
257
258 if (a == BAD || b == BAD || c == BAD || d == BAD)
259 return -1;
260 }
261
262 #undef char64
263 #undef EQU
264 #undef BAD
265
266 return p - (unsigned char*)bin;
267 }
268
269 /*
270 * Encode a buffer as a base64 result.
271 *
272 * b64: buffer to hold the encoded (base64) result (see note).
273 * bin: buffer holding the binary source.
274 * cnt: number of bytes in the bin buffer to encode.
275 *
276 * NOTE: it is the callers responsibility to ensure that 'b64' is
277 * large enough to hold the result.
278 */
279 PUBLIC void
280 mime_bintob64(char *b64, const char *bin, size_t cnt)
281 {
282 static const char b64table[] =
283 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
284 const unsigned char *p = (const unsigned char*)bin;
285 int i;
286
287 for (i = cnt; i > 0; i -= 3) {
288 unsigned a = p[0];
289 unsigned b = p[1];
290 unsigned c = p[2];
291
292 b64[0] = b64table[a >> 2];
293 switch(i) {
294 case 1:
295 b64[1] = b64table[((a & 0x3) << 4)];
296 b64[2] = '=';
297 b64[3] = '=';
298 break;
299 case 2:
300 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
301 b64[2] = b64table[((b & 0xf) << 2)];
302 b64[3] = '=';
303 break;
304 default:
305 b64[1] = b64table[((a & 0x3) << 4) | ((b & 0xf0) >> 4)];
306 b64[2] = b64table[((b & 0xf) << 2) | ((c & 0xc0) >> 6)];
307 b64[3] = b64table[c & 0x3f];
308 break;
309 }
310 p += 3;
311 b64 += 4;
312 }
313 }
314
315
316 #define MIME_BASE64_LINE_MAX (4 * 19) /* max line length is 76: see RFC2045 sec 6.8 */
317
318 static void
319 mime_fB64_encode(FILE *fi, FILE *fo, void *cookie __unused)
320 {
321 static char b64[MIME_BASE64_LINE_MAX];
322 static char mem[3 * (MIME_BASE64_LINE_MAX / 4)];
323 int cnt;
324 char *cp;
325 size_t limit;
326 #ifdef __lint__
327 cookie = cookie;
328 #endif
329 limit = 0;
330 if ((cp = value(ENAME_MIME_B64_LINE_MAX)) != NULL)
331 limit = (size_t)atoi(cp);
332 if (limit == 0 || limit > sizeof(b64))
333 limit = sizeof(b64);
334
335 limit = 3 * roundup(limit, 4) / 4;
336 if (limit < 3)
337 limit = 3;
338
339 while ((cnt = fread(mem, sizeof(*mem), limit, fi)) > 0) {
340 mime_bintob64(b64, mem, (size_t)cnt);
341 (void)fwrite(b64, sizeof(*b64), (size_t)4 * roundup(cnt, 3) / 3, fo);
342 (void)putc('\n', fo);
343 }
344 }
345
346 static void
347 mime_fB64_decode(FILE *fi, FILE *fo, void *cookie)
348 {
349 char *line;
350 size_t len;
351 char *buf;
352 size_t buflen;
353 int add_lf;
354 add_lf = (int)cookie;
355
356 buflen = 3 * (MIME_BASE64_LINE_MAX / 4);
357 buf = emalloc(buflen);
358
359 while ((line = fgetln(fi, &len)) != NULL) {
360 ssize_t binlen;
361 if (line[len-1] == '\n') /* forget the trailing newline */
362 len--;
363
364 /* trash trailing white space */
365 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[len-1]); len--)
366 continue;
367
368 /* skip leading white space */
369 for (/* EMPTY */; len > 0 && isblank((unsigned char)line[0]); len--, line++)
370 continue;
371
372 if (len == 0)
373 break;
374
375 if (3 * len > 4 * buflen) {
376 buflen *= 2;
377 buf = erealloc(buf, buflen);
378 }
379
380 binlen = mime_b64tobin(buf, line, len);
381
382 if (binlen <= 0) {
383 (void)fprintf(fo, "WARN: invalid base64 encoding\n");
384 break;
385 }
386 (void)fwrite(buf, 1, (size_t)binlen, fo);
387 }
388
389 free(buf);
390
391 if (add_lf)
392 (void)fputc('\n', fo);
393 }
394
395
396 /************************************************************************
397 * Core quoted-printable routines.
398 *
399 * Note: the header QP routines are slightly different and burried
400 * inside mime_header.c
401 */
402
403 static int
404 mustquote(unsigned char *p, unsigned char *end, size_t l)
405 {
406 #define N 0 /* do not quote */
407 #define Q 1 /* must quote */
408 #define SP 2 /* white space */
409 #define XF 3 /* special character 'F' - maybe quoted */
410 #define XD 4 /* special character '.' - maybe quoted */
411 #define EQ Q /* '=' must be quoted */
412 #define TB SP /* treat '\t' as a space */
413 #define NL N /* don't quote '\n' (NL) - XXX - quoting here breaks the line length algorithm */
414 #define CR Q /* always quote a '\r' (CR) - it occurs only in a CRLF combo */
415
416 static const signed char quotetab[] = {
417 Q, Q, Q, Q, Q, Q, Q, Q, Q,TB,NL, Q, Q,CR, Q, Q,
418 Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q, Q,
419 SP, N, N, N, N, N, N, N, N, N, N, N, N, N,XD, N,
420 N, N, N, N, N, N, N, N, N, N, N, N, N,EQ, N, N,
421
422 N, N, N, N, N, N,XF, N, N, N, N, N, N, N, N, N,
423 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
424 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
425 N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, Q,
426 };
427 int flag = *p > 0x7f ? Q : quotetab[*p];
428
429 if (flag == N)
430 return 0;
431 if (flag == Q)
432 return 1;
433 if (flag == SP)
434 return (p + 1 < end && p[1] == '\n'); /* trailing white space */
435
436 /* The remainder are special start-of-line cases. */
437 if (l != 0)
438 return 0;
439
440 if (flag == XF) /* line may start with "From" */
441 return (p + 4 < end && p[1] == 'r' && p[2] == 'o' && p[3] == 'm');
442
443 if (flag == XD) /* line may consist of a single dot */
444 return (p + 1 < end && p[1] == '\n');
445
446 errx(EXIT_FAILURE, "mustquote: invalid logic: *p=0x%x (%d) flag=%d, l=%d\n",
447 *p, *p, flag, l);
448 /* NOT REACHED */
449 return 0; /* appease GCC */
450
451 #undef N
452 #undef Q
453 #undef SP
454 #undef XX
455 #undef EQ
456 #undef TB
457 #undef NL
458 #undef CR
459 }
460
461
462 #define MIME_QUOTED_LINE_MAX 76 /* QP max length: see RFC2045 sec 6.7 */
463
464 static void
465 fput_quoted_line(FILE *fo, char *line, size_t len, size_t limit)
466 {
467 size_t l; /* length of current output line */
468 unsigned char *beg;
469 unsigned char *end;
470 unsigned char *p;
471
472 assert(limit <= MIME_QUOTED_LINE_MAX);
473
474 beg = (unsigned char*)line;
475 end = beg + len;
476 l = 0;
477 for (p = (unsigned char*)line; p < end; p++) {
478 if (mustquote(p, end, l)) {
479 if (l + 4 > limit) {
480 (void)fputs("=\n", fo);
481 l = 0;
482 }
483 (void)fprintf(fo, "=%02X", *p);
484 l += 3;
485 }
486 else {
487 if (*p == '\n') {
488 if (p > beg && p[-1] == '\r')
489 (void)fputs("=0A=", fo);
490 l = (size_t)-1;
491 }
492 else if (l + 2 > limit) {
493 (void)fputs("=\n", fo);
494 l = 0;
495 }
496 (void)putc(*p, fo);
497 l++;
498 }
499 }
500 /*
501 * Lines ending in a blank must escape the newline.
502 */
503 if (len && isblank((unsigned char)p[-1]))
504 (void)fputs("=\n", fo);
505 }
506
507 static void
508 mime_fQP_encode(FILE *fi, FILE *fo, void *cookie __unused)
509 {
510 char *line;
511 size_t len;
512 char *cp;
513 size_t limit;
514
515 #ifdef __lint__
516 cookie = cookie;
517 #endif
518 limit = 0;
519 if ((cp = value(ENAME_MIME_QP_LINE_MAX)) != NULL)
520 limit = (size_t)atoi(cp);
521 if (limit == 0 || limit > MIME_QUOTED_LINE_MAX)
522 limit = MIME_QUOTED_LINE_MAX;
523 if (limit < 4)
524 limit = 4;
525
526 while ((line = fgetln(fi, &len)) != NULL)
527 fput_quoted_line(fo, line, len, limit);
528 }
529
530 static void
531 mime_fQP_decode(FILE *fi, FILE *fo, void *cookie __unused)
532 {
533 char *line;
534 size_t len;
535
536 #ifdef __lint__
537 cookie = cookie;
538 #endif
539 while ((line = fgetln(fi, &len)) != NULL) {
540 int c;
541 char *p;
542 char *end;
543 end = line + len;
544 for (p = line; p < end; p++) {
545 if (*p == '=') {
546 p++;
547 while (p < end && isblank((unsigned char)*p))
548 p++;
549 if (*p != '\n' && p + 1 < end) {
550 char buf[3];
551 buf[0] = *p++;
552 buf[1] = *p;
553 buf[2] = '\0';
554 c = strtol(buf, NULL, 16);
555 (void)fputc(c, fo);
556 }
557 }
558 else
559 (void)fputc(*p, fo);
560 }
561 }
562 }
563
564
565 /************************************************************************
566 * Routines to select the codec by name.
567 */
568
569 PUBLIC void
570 mime_fio_copy(FILE *fi, FILE *fo, void *cookie __unused)
571 {
572 int c;
573
574 #ifdef __lint__
575 cookie = cookie;
576 #endif
577 while ((c = getc(fi)) != EOF)
578 (void)putc(c, fo);
579
580 (void)fflush(fo);
581 if (ferror(fi)) {
582 warn("read");
583 rewind(fi);
584 return;
585 }
586 if (ferror(fo)) {
587 warn("write");
588 (void)Fclose(fo);
589 rewind(fi);
590 return;
591 }
592 }
593
594
595 static const struct transfer_encoding_s {
596 const char *name;
597 mime_codec_t enc;
598 mime_codec_t dec;
599 } transfer_encoding_tbl[] = {
600 { MIME_TRANSFER_7BIT, mime_fio_copy, mime_fio_copy },
601 { MIME_TRANSFER_8BIT, mime_fio_copy, mime_fio_copy },
602 { MIME_TRANSFER_BINARY, mime_fio_copy, mime_fio_copy },
603 { MIME_TRANSFER_QUOTED, mime_fQP_encode, mime_fQP_decode },
604 { MIME_TRANSFER_BASE64, mime_fB64_encode, mime_fB64_decode },
605 { NULL, NULL, NULL },
606 };
607
608
609 PUBLIC mime_codec_t
610 mime_fio_encoder(const char *ename)
611 {
612 const struct transfer_encoding_s *tep = NULL;
613
614 if (ename == NULL)
615 return NULL;
616
617 for (tep = transfer_encoding_tbl; tep->name; tep++)
618 if (strcasecmp(tep->name, ename) == 0)
619 break;
620 return tep->enc;
621 }
622
623 PUBLIC mime_codec_t
624 mime_fio_decoder(const char *ename)
625 {
626 const struct transfer_encoding_s *tep = NULL;
627
628 if (ename == NULL)
629 return NULL;
630
631 for (tep = transfer_encoding_tbl; tep->name; tep++)
632 if (strcasecmp(tep->name, ename) == 0)
633 break;
634 return tep->dec;
635 }
636
637 /*
638 * This is for use in complete.c and mime.c to get the list of
639 * encoding names without exposing the transfer_encoding_tbl[]. The
640 * first name is returned if called with a pointer to a NULL pointer.
641 * Subsequent calls with the same cookie give successive names. A
642 * NULL return indicates the end of the list.
643 */
644 PUBLIC const char *
645 mime_next_encoding_name(const void **cookie)
646 {
647 const struct transfer_encoding_s *tep;
648
649 tep = *cookie;
650 if (tep == NULL)
651 tep = transfer_encoding_tbl;
652
653 *cookie = tep->name ? &tep[1] : NULL;
654
655 return tep->name;
656 }
657
658
659 #endif /* MIME_SUPPORT */
660