xref: /freebsd/contrib/sendmail/src/mime.c (revision 1b6c76a2fe091c74f08427e6c870851025a9cf67)
1 /*
2  * Copyright (c) 1998-2000 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 #ifndef lint
18 static char id[] = "@(#)$Id: mime.c,v 8.94.16.3 2000/10/09 02:46:10 gshapiro Exp $";
19 #endif /* ! lint */
20 
21 static int	isboundary __P((char *, char **));
22 static int	mimeboundary __P((char *, char **));
23 static int	mime_fromqp __P((u_char *, u_char **, int, int));
24 static int	mime_getchar __P((FILE *, char **, int *));
25 static int	mime_getchar_crlf __P((FILE *, char **, int *));
26 
27 /*
28 **  MIME support.
29 **
30 **	I am indebted to John Beck of Hewlett-Packard, who contributed
31 **	his code to me for inclusion.  As it turns out, I did not use
32 **	his code since he used a "minimum change" approach that used
33 **	several temp files, and I wanted a "minimum impact" approach
34 **	that would avoid copying.  However, looking over his code
35 **	helped me cement my understanding of the problem.
36 **
37 **	I also looked at, but did not directly use, Nathaniel
38 **	Borenstein's "code.c" module.  Again, it functioned as
39 **	a file-to-file translator, which did not fit within my
40 **	design bounds, but it was a useful base for understanding
41 **	the problem.
42 */
43 
44 #if MIME8TO7
45 
46 /* character set for hex and base64 encoding */
47 static char	Base16Code[] =	"0123456789ABCDEF";
48 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
49 
50 /* types of MIME boundaries */
51 # define MBT_SYNTAX	0	/* syntax error */
52 # define MBT_NOTSEP	1	/* not a boundary */
53 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
54 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
55 
56 static char	*MimeBoundaryNames[] =
57 {
58 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
59 };
60 
61 static bool	MapNLtoCRLF;
62 
63 /*
64 **  MIME8TO7 -- output 8 bit body in 7 bit format
65 **
66 **	The header has already been output -- this has to do the
67 **	8 to 7 bit conversion.  It would be easy if we didn't have
68 **	to deal with nested formats (multipart/xxx and message/rfc822).
69 **
70 **	We won't be called if we don't have to do a conversion, and
71 **	appropriate MIME-Version: and Content-Type: fields have been
72 **	output.  Any Content-Transfer-Encoding: field has not been
73 **	output, and we can add it here.
74 **
75 **	Parameters:
76 **		mci -- mailer connection information.
77 **		header -- the header for this body part.
78 **		e -- envelope.
79 **		boundaries -- the currently pending message boundaries.
80 **			NULL if we are processing the outer portion.
81 **		flags -- to tweak processing.
82 **
83 **	Returns:
84 **		An indicator of what terminated the message part:
85 **		  MBT_FINAL -- the final boundary
86 **		  MBT_INTERMED -- an intermediate boundary
87 **		  MBT_NOTSEP -- an end of file
88 */
89 
90 struct args
91 {
92 	char	*a_field;	/* name of field */
93 	char	*a_value;	/* value of that field */
94 };
95 
96 int
97 mime8to7(mci, header, e, boundaries, flags)
98 	register MCI *mci;
99 	HDR *header;
100 	register ENVELOPE *e;
101 	char **boundaries;
102 	int flags;
103 {
104 	register char *p;
105 	int linelen;
106 	int bt;
107 	off_t offset;
108 	size_t sectionsize, sectionhighbits;
109 	int i;
110 	char *type;
111 	char *subtype;
112 	char *cte;
113 	char **pvp;
114 	int argc = 0;
115 	char *bp;
116 	bool use_qp = FALSE;
117 	struct args argv[MAXMIMEARGS];
118 	char bbuf[128];
119 	char buf[MAXLINE];
120 	char pvpbuf[MAXLINE];
121 	extern u_char MimeTokenTab[256];
122 
123 	if (tTd(43, 1))
124 	{
125 		dprintf("mime8to7: flags = %x, boundaries =", flags);
126 		if (boundaries[0] == NULL)
127 			dprintf(" <none>");
128 		else
129 		{
130 			for (i = 0; boundaries[i] != NULL; i++)
131 				dprintf(" %s", boundaries[i]);
132 		}
133 		dprintf("\n");
134 	}
135 	MapNLtoCRLF = TRUE;
136 	p = hvalue("Content-Transfer-Encoding", header);
137 	if (p == NULL ||
138 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
139 			   MimeTokenTab)) == NULL ||
140 	    pvp[0] == NULL)
141 	{
142 		cte = NULL;
143 	}
144 	else
145 	{
146 		cataddr(pvp, NULL, buf, sizeof buf, '\0');
147 		cte = newstr(buf);
148 	}
149 
150 	type = subtype = NULL;
151 	p = hvalue("Content-Type", header);
152 	if (p == NULL)
153 	{
154 		if (bitset(M87F_DIGEST, flags))
155 			p = "message/rfc822";
156 		else
157 			p = "text/plain";
158 	}
159 	if (p != NULL &&
160 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
161 			   MimeTokenTab)) != NULL &&
162 	    pvp[0] != NULL)
163 	{
164 		if (tTd(43, 40))
165 		{
166 			for (i = 0; pvp[i] != NULL; i++)
167 				dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
168 		}
169 		type = *pvp++;
170 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
171 		    *++pvp != NULL)
172 		{
173 			subtype = *pvp++;
174 		}
175 
176 		/* break out parameters */
177 		while (*pvp != NULL && argc < MAXMIMEARGS)
178 		{
179 			/* skip to semicolon separator */
180 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
181 				pvp++;
182 			if (*pvp++ == NULL || *pvp == NULL)
183 				break;
184 
185 			/* complain about empty values */
186 			if (strcmp(*pvp, ";") == 0)
187 			{
188 				usrerr("mime8to7: Empty parameter in Content-Type header");
189 
190 				/* avoid bounce loops */
191 				e->e_flags |= EF_DONT_MIME;
192 				continue;
193 			}
194 
195 			/* extract field name */
196 			argv[argc].a_field = *pvp++;
197 
198 			/* see if there is a value */
199 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
200 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
201 			{
202 				argv[argc].a_value = *pvp;
203 				argc++;
204 			}
205 		}
206 	}
207 
208 	/* check for disaster cases */
209 	if (type == NULL)
210 		type = "-none-";
211 	if (subtype == NULL)
212 		subtype = "-none-";
213 
214 	/* don't propogate some flags more than one level into the message */
215 	flags &= ~M87F_DIGEST;
216 
217 	/*
218 	**  Check for cases that can not be encoded.
219 	**
220 	**	For example, you can't encode certain kinds of types
221 	**	or already-encoded messages.  If we find this case,
222 	**	just copy it through.
223 	*/
224 
225 	snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype);
226 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
227 		flags |= M87F_NO8BIT;
228 
229 # ifdef USE_B_CLASS
230 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
231 		MapNLtoCRLF = FALSE;
232 # endif /* USE_B_CLASS */
233 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
234 		use_qp = TRUE;
235 
236 	/*
237 	**  Multipart requires special processing.
238 	**
239 	**	Do a recursive descent into the message.
240 	*/
241 
242 	if (strcasecmp(type, "multipart") == 0 &&
243 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)))
244 	{
245 
246 		if (strcasecmp(subtype, "digest") == 0)
247 			flags |= M87F_DIGEST;
248 
249 		for (i = 0; i < argc; i++)
250 		{
251 			if (strcasecmp(argv[i].a_field, "boundary") == 0)
252 				break;
253 		}
254 		if (i >= argc || argv[i].a_value == NULL)
255 		{
256 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
257 				i >= argc ? "missing" : "bogus", p);
258 			p = "---";
259 
260 			/* avoid bounce loops */
261 			e->e_flags |= EF_DONT_MIME;
262 		}
263 		else
264 		{
265 			p = argv[i].a_value;
266 			stripquotes(p);
267 		}
268 		if (strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf)
269 		{
270 			usrerr("mime8to7: multipart boundary \"%s\" too long",
271 				p);
272 
273 			/* avoid bounce loops */
274 			e->e_flags |= EF_DONT_MIME;
275 		}
276 
277 		if (tTd(43, 1))
278 			dprintf("mime8to7: multipart boundary \"%s\"\n", bbuf);
279 		for (i = 0; i < MAXMIMENESTING; i++)
280 		{
281 			if (boundaries[i] == NULL)
282 				break;
283 		}
284 		if (i >= MAXMIMENESTING)
285 		{
286 			usrerr("mime8to7: multipart nesting boundary too deep");
287 
288 			/* avoid bounce loops */
289 			e->e_flags |= EF_DONT_MIME;
290 		}
291 		else
292 		{
293 			boundaries[i] = bbuf;
294 			boundaries[i + 1] = NULL;
295 		}
296 		mci->mci_flags |= MCIF_INMIME;
297 
298 		/* skip the early "comment" prologue */
299 		putline("", mci);
300 		mci->mci_flags &= ~MCIF_INHEADER;
301 		bt = MBT_FINAL;
302 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
303 		{
304 			bt = mimeboundary(buf, boundaries);
305 			if (bt != MBT_NOTSEP)
306 				break;
307 			putxline(buf, strlen(buf), mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
308 			if (tTd(43, 99))
309 				dprintf("  ...%s", buf);
310 		}
311 		if (feof(e->e_dfp))
312 			bt = MBT_FINAL;
313 		while (bt != MBT_FINAL)
314 		{
315 			auto HDR *hdr = NULL;
316 
317 			snprintf(buf, sizeof buf, "--%s", bbuf);
318 			putline(buf, mci);
319 			if (tTd(43, 35))
320 				dprintf("  ...%s\n", buf);
321 			collect(e->e_dfp, FALSE, &hdr, e);
322 			if (tTd(43, 101))
323 				putline("+++after collect", mci);
324 			putheader(mci, hdr, e, flags);
325 			if (tTd(43, 101))
326 				putline("+++after putheader", mci);
327 			bt = mime8to7(mci, hdr, e, boundaries, flags);
328 		}
329 		snprintf(buf, sizeof buf, "--%s--", bbuf);
330 		putline(buf, mci);
331 		if (tTd(43, 35))
332 			dprintf("  ...%s\n", buf);
333 		boundaries[i] = NULL;
334 		mci->mci_flags &= ~MCIF_INMIME;
335 
336 		/* skip the late "comment" epilogue */
337 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
338 		{
339 			bt = mimeboundary(buf, boundaries);
340 			if (bt != MBT_NOTSEP)
341 				break;
342 			putxline(buf, strlen(buf), mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
343 			if (tTd(43, 99))
344 				dprintf("  ...%s", buf);
345 		}
346 		if (feof(e->e_dfp))
347 			bt = MBT_FINAL;
348 		if (tTd(43, 3))
349 			dprintf("\t\t\tmime8to7=>%s (multipart)\n",
350 				MimeBoundaryNames[bt]);
351 		return bt;
352 	}
353 
354 	/*
355 	**  Message/xxx types -- recurse exactly once.
356 	**
357 	**	Class 's' is predefined to have "rfc822" only.
358 	*/
359 
360 	if (strcasecmp(type, "message") == 0)
361 	{
362 		if (!wordinclass(subtype, 's'))
363 		{
364 			flags |= M87F_NO8BIT;
365 		}
366 		else
367 		{
368 			auto HDR *hdr = NULL;
369 
370 			putline("", mci);
371 
372 			mci->mci_flags |= MCIF_INMIME;
373 			collect(e->e_dfp, FALSE, &hdr, e);
374 			if (tTd(43, 101))
375 				putline("+++after collect", mci);
376 			putheader(mci, hdr, e, flags);
377 			if (tTd(43, 101))
378 				putline("+++after putheader", mci);
379 			if (hvalue("MIME-Version", hdr) == NULL)
380 				putline("MIME-Version: 1.0", mci);
381 			bt = mime8to7(mci, hdr, e, boundaries, flags);
382 			mci->mci_flags &= ~MCIF_INMIME;
383 			return bt;
384 		}
385 	}
386 
387 	/*
388 	**  Non-compound body type
389 	**
390 	**	Compute the ratio of seven to eight bit characters;
391 	**	use that as a heuristic to decide how to do the
392 	**	encoding.
393 	*/
394 
395 	sectionsize = sectionhighbits = 0;
396 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
397 	{
398 		/* remember where we were */
399 		offset = ftell(e->e_dfp);
400 		if (offset == -1)
401 			syserr("mime8to7: cannot ftell on df%s", e->e_id);
402 
403 		/* do a scan of this body type to count character types */
404 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
405 		{
406 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
407 				break;
408 			for (p = buf; *p != '\0'; p++)
409 			{
410 				/* count bytes with the high bit set */
411 				sectionsize++;
412 				if (bitset(0200, *p))
413 					sectionhighbits++;
414 			}
415 
416 			/*
417 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
418 			**  assume base64.  This heuristic avoids double-reading
419 			**  large graphics or video files.
420 			*/
421 
422 			if (sectionsize >= 4096 &&
423 			    sectionhighbits > sectionsize / 4)
424 				break;
425 		}
426 
427 		/* return to the original offset for processing */
428 		/* XXX use relative seeks to handle >31 bit file sizes? */
429 		if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
430 			syserr("mime8to7: cannot fseek on df%s", e->e_id);
431 		else
432 			clearerr(e->e_dfp);
433 	}
434 
435 	/*
436 	**  Heuristically determine encoding method.
437 	**	If more than 1/8 of the total characters have the
438 	**	eighth bit set, use base64; else use quoted-printable.
439 	**	However, only encode binary encoded data as base64,
440 	**	since otherwise the NL=>CRLF mapping will be a problem.
441 	*/
442 
443 	if (tTd(43, 8))
444 	{
445 		dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
446 			(long) sectionhighbits, (long) sectionsize,
447 			cte == NULL ? "[none]" : cte,
448 			type == NULL ? "[none]" : type,
449 			subtype == NULL ? "[none]" : subtype);
450 	}
451 	if (cte != NULL && strcasecmp(cte, "binary") == 0)
452 		sectionsize = sectionhighbits;
453 	linelen = 0;
454 	bp = buf;
455 	if (sectionhighbits == 0)
456 	{
457 		/* no encoding necessary */
458 		if (cte != NULL &&
459 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
460 			   mci->mci_flags) &&
461 		    !bitset(M87F_NO8TO7, flags))
462 		{
463 			/*
464 			**  Skip _unless_ in MIME mode and potentially
465 			**  converting from 8 bit to 7 bit MIME.  See
466 			**  putheader() for the counterpart where the
467 			**  CTE header is skipped in the opposite
468 			**  situation.
469 			*/
470 
471 			snprintf(buf, sizeof buf,
472 				"Content-Transfer-Encoding: %.200s", cte);
473 			putline(buf, mci);
474 			if (tTd(43, 36))
475 				dprintf("  ...%s\n", buf);
476 		}
477 		putline("", mci);
478 		mci->mci_flags &= ~MCIF_INHEADER;
479 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
480 		{
481 			bt = mimeboundary(buf, boundaries);
482 			if (bt != MBT_NOTSEP)
483 				break;
484 			putline(buf, mci);
485 		}
486 		if (feof(e->e_dfp))
487 			bt = MBT_FINAL;
488 	}
489 	else if (!MapNLtoCRLF ||
490 		 (sectionsize / 8 < sectionhighbits && !use_qp))
491 	{
492 		/* use base64 encoding */
493 		int c1, c2;
494 
495 		if (tTd(43, 36))
496 			dprintf("  ...Content-Transfer-Encoding: base64\n");
497 		putline("Content-Transfer-Encoding: base64", mci);
498 		snprintf(buf, sizeof buf,
499 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
500 			MyHostName, e->e_id);
501 		putline(buf, mci);
502 		putline("", mci);
503 		mci->mci_flags &= ~MCIF_INHEADER;
504 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) != EOF)
505 		{
506 			if (linelen > 71)
507 			{
508 				*bp = '\0';
509 				putline(buf, mci);
510 				linelen = 0;
511 				bp = buf;
512 			}
513 			linelen += 4;
514 			*bp++ = Base64Code[(c1 >> 2)];
515 			c1 = (c1 & 0x03) << 4;
516 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
517 			if (c2 == EOF)
518 			{
519 				*bp++ = Base64Code[c1];
520 				*bp++ = '=';
521 				*bp++ = '=';
522 				break;
523 			}
524 			c1 |= (c2 >> 4) & 0x0f;
525 			*bp++ = Base64Code[c1];
526 			c1 = (c2 & 0x0f) << 2;
527 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
528 			if (c2 == EOF)
529 			{
530 				*bp++ = Base64Code[c1];
531 				*bp++ = '=';
532 				break;
533 			}
534 			c1 |= (c2 >> 6) & 0x03;
535 			*bp++ = Base64Code[c1];
536 			*bp++ = Base64Code[c2 & 0x3f];
537 		}
538 		*bp = '\0';
539 		putline(buf, mci);
540 	}
541 	else
542 	{
543 		/* use quoted-printable encoding */
544 		int c1, c2;
545 		int fromstate;
546 		BITMAP256 badchars;
547 
548 		/* set up map of characters that must be mapped */
549 		clrbitmap(badchars);
550 		for (c1 = 0x00; c1 < 0x20; c1++)
551 			setbitn(c1, badchars);
552 		clrbitn('\t', badchars);
553 		for (c1 = 0x7f; c1 < 0x100; c1++)
554 			setbitn(c1, badchars);
555 		setbitn('=', badchars);
556 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
557 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
558 				setbitn(*p, badchars);
559 
560 		if (tTd(43, 36))
561 			dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
562 		putline("Content-Transfer-Encoding: quoted-printable", mci);
563 		snprintf(buf, sizeof buf,
564 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
565 			MyHostName, e->e_id);
566 		putline(buf, mci);
567 		putline("", mci);
568 		mci->mci_flags &= ~MCIF_INHEADER;
569 		fromstate = 0;
570 		c2 = '\n';
571 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != EOF)
572 		{
573 			if (c1 == '\n')
574 			{
575 				if (c2 == ' ' || c2 == '\t')
576 				{
577 					*bp++ = '=';
578 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
579 					*bp++ = Base16Code[c2 & 0x0f];
580 				}
581 				if (buf[0] == '.' && bp == &buf[1])
582 				{
583 					buf[0] = '=';
584 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
585 					*bp++ = Base16Code['.' & 0x0f];
586 				}
587 				*bp = '\0';
588 				putline(buf, mci);
589 				linelen = fromstate = 0;
590 				bp = buf;
591 				c2 = c1;
592 				continue;
593 			}
594 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
595 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
596 			{
597 				*bp++ = '=';
598 				*bp++ = '2';
599 				*bp++ = '0';
600 				linelen += 3;
601 			}
602 			else if (c2 == ' ' || c2 == '\t')
603 			{
604 				*bp++ = c2;
605 				linelen++;
606 			}
607 			if (linelen > 72 &&
608 			    (linelen > 75 || c1 != '.' ||
609 			     (linelen > 73 && c2 == '.')))
610 			{
611 				if (linelen > 73 && c2 == '.')
612 					bp--;
613 				else
614 					c2 = '\n';
615 				*bp++ = '=';
616 				*bp = '\0';
617 				putline(buf, mci);
618 				linelen = fromstate = 0;
619 				bp = buf;
620 				if (c2 == '.')
621 				{
622 					*bp++ = '.';
623 					linelen++;
624 				}
625 			}
626 			if (bitnset(bitidx(c1), badchars))
627 			{
628 				*bp++ = '=';
629 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
630 				*bp++ = Base16Code[c1 & 0x0f];
631 				linelen += 3;
632 			}
633 			else if (c1 != ' ' && c1 != '\t')
634 			{
635 				if (linelen < 4 && c1 == "From"[linelen])
636 					fromstate++;
637 				*bp++ = c1;
638 				linelen++;
639 			}
640 			c2 = c1;
641 		}
642 
643 		/* output any saved character */
644 		if (c2 == ' ' || c2 == '\t')
645 		{
646 			*bp++ = '=';
647 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
648 			*bp++ = Base16Code[c2 & 0x0f];
649 			linelen += 3;
650 		}
651 
652 		if (linelen > 0 || boundaries[0] != NULL)
653 		{
654 			*bp = '\0';
655 			putline(buf, mci);
656 		}
657 
658 	}
659 	if (tTd(43, 3))
660 		dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
661 	return bt;
662 }
663 /*
664 **  MIME_GETCHAR -- get a character for MIME processing
665 **
666 **	Treats boundaries as EOF.
667 **
668 **	Parameters:
669 **		fp -- the input file.
670 **		boundaries -- the current MIME boundaries.
671 **		btp -- if the return value is EOF, *btp is set to
672 **			the type of the boundary.
673 **
674 **	Returns:
675 **		The next character in the input stream.
676 */
677 
678 static int
679 mime_getchar(fp, boundaries, btp)
680 	register FILE *fp;
681 	char **boundaries;
682 	int *btp;
683 {
684 	int c;
685 	static u_char *bp = NULL;
686 	static int buflen = 0;
687 	static bool atbol = TRUE;	/* at beginning of line */
688 	static int bt = MBT_SYNTAX;	/* boundary type of next EOF */
689 	static u_char buf[128];		/* need not be a full line */
690 	int start = 0;			/* indicates position of - in buffer */
691 
692 	if (buflen == 1 && *bp == '\n')
693 	{
694 		/* last \n in buffer may be part of next MIME boundary */
695 		c = *bp;
696 	}
697 	else if (buflen > 0)
698 	{
699 		buflen--;
700 		return *bp++;
701 	}
702 	else
703 		c = getc(fp);
704 	bp = buf;
705 	buflen = 0;
706 	if (c == '\n')
707 	{
708 		/* might be part of a MIME boundary */
709 		*bp++ = c;
710 		atbol = TRUE;
711 		c = getc(fp);
712 		if (c == '\n')
713 		{
714 			(void) ungetc(c, fp);
715 			return c;
716 		}
717 		start = 1;
718 	}
719 	if (c != EOF)
720 		*bp++ = c;
721 	else
722 		bt = MBT_FINAL;
723 	if (atbol && c == '-')
724 	{
725 		/* check for a message boundary */
726 		c = getc(fp);
727 		if (c != '-')
728 		{
729 			if (c != EOF)
730 				*bp++ = c;
731 			else
732 				bt = MBT_FINAL;
733 			buflen = bp - buf - 1;
734 			bp = buf;
735 			return *bp++;
736 		}
737 
738 		/* got "--", now check for rest of separator */
739 		*bp++ = '-';
740 		while (bp < &buf[sizeof buf - 2] &&
741 		       (c = getc(fp)) != EOF && c != '\n')
742 		{
743 			*bp++ = c;
744 		}
745 		*bp = '\0';
746 		bt = mimeboundary((char *) &buf[start], boundaries);
747 		switch (bt)
748 		{
749 		  case MBT_FINAL:
750 		  case MBT_INTERMED:
751 			/* we have a message boundary */
752 			buflen = 0;
753 			*btp = bt;
754 			return EOF;
755 		}
756 
757 		atbol = c == '\n';
758 		if (c != EOF)
759 			*bp++ = c;
760 	}
761 
762 	buflen = bp - buf - 1;
763 	if (buflen < 0)
764 	{
765 		*btp = bt;
766 		return EOF;
767 	}
768 	bp = buf;
769 	return *bp++;
770 }
771 /*
772 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
773 **
774 **	Parameters:
775 **		fp -- the input file.
776 **		boundaries -- the current MIME boundaries.
777 **		btp -- if the return value is EOF, *btp is set to
778 **			the type of the boundary.
779 **
780 **	Returns:
781 **		The next character in the input stream.
782 */
783 
784 static int
785 mime_getchar_crlf(fp, boundaries, btp)
786 	register FILE *fp;
787 	char **boundaries;
788 	int *btp;
789 {
790 	static bool sendlf = FALSE;
791 	int c;
792 
793 	if (sendlf)
794 	{
795 		sendlf = FALSE;
796 		return '\n';
797 	}
798 	c = mime_getchar(fp, boundaries, btp);
799 	if (c == '\n' && MapNLtoCRLF)
800 	{
801 		sendlf = TRUE;
802 		return '\r';
803 	}
804 	return c;
805 }
806 /*
807 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
808 **
809 **	Parameters:
810 **		line -- the input line.
811 **		boundaries -- the set of currently pending boundaries.
812 **
813 **	Returns:
814 **		MBT_NOTSEP -- if this is not a separator line
815 **		MBT_INTERMED -- if this is an intermediate separator
816 **		MBT_FINAL -- if this is a final boundary
817 **		MBT_SYNTAX -- if this is a boundary for the wrong
818 **			enclosure -- i.e., a syntax error.
819 */
820 
821 static int
822 mimeboundary(line, boundaries)
823 	register char *line;
824 	char **boundaries;
825 {
826 	int type = MBT_NOTSEP;
827 	int i;
828 	int savec;
829 
830 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
831 		return MBT_NOTSEP;
832 	i = strlen(line);
833 	if (i > 0 && line[i - 1] == '\n')
834 		i--;
835 
836 	/* strip off trailing whitespace */
837 	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'))
838 		i--;
839 	savec = line[i];
840 	line[i] = '\0';
841 
842 	if (tTd(43, 5))
843 		dprintf("mimeboundary: line=\"%s\"... ", line);
844 
845 	/* check for this as an intermediate boundary */
846 	if (isboundary(&line[2], boundaries) >= 0)
847 		type = MBT_INTERMED;
848 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
849 	{
850 		/* check for a final boundary */
851 		line[i - 2] = '\0';
852 		if (isboundary(&line[2], boundaries) >= 0)
853 			type = MBT_FINAL;
854 		line[i - 2] = '-';
855 	}
856 
857 	line[i] = savec;
858 	if (tTd(43, 5))
859 		dprintf("%s\n", MimeBoundaryNames[type]);
860 	return type;
861 }
862 /*
863 **  DEFCHARSET -- return default character set for message
864 **
865 **	The first choice for character set is for the mailer
866 **	corresponding to the envelope sender.  If neither that
867 **	nor the global configuration file has a default character
868 **	set defined, return "unknown-8bit" as recommended by
869 **	RFC 1428 section 3.
870 **
871 **	Parameters:
872 **		e -- the envelope for this message.
873 **
874 **	Returns:
875 **		The default character set for that mailer.
876 */
877 
878 char *
879 defcharset(e)
880 	register ENVELOPE *e;
881 {
882 	if (e != NULL && e->e_from.q_mailer != NULL &&
883 	    e->e_from.q_mailer->m_defcharset != NULL)
884 		return e->e_from.q_mailer->m_defcharset;
885 	if (DefaultCharSet != NULL)
886 		return DefaultCharSet;
887 	return "unknown-8bit";
888 }
889 /*
890 **  ISBOUNDARY -- is a given string a currently valid boundary?
891 **
892 **	Parameters:
893 **		line -- the current input line.
894 **		boundaries -- the list of valid boundaries.
895 **
896 **	Returns:
897 **		The index number in boundaries if the line is found.
898 **		-1 -- otherwise.
899 **
900 */
901 
902 static int
903 isboundary(line, boundaries)
904 	char *line;
905 	char **boundaries;
906 {
907 	register int i;
908 
909 	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
910 	{
911 		if (strcmp(line, boundaries[i]) == 0)
912 			return i;
913 	}
914 	return -1;
915 }
916 #endif /* MIME8TO7 */
917 
918 #if MIME7TO8
919 
920 /*
921 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
922 **
923 **  This is a hack. Supports translating the two 7-bit body-encodings
924 **  (quoted-printable and base64) to 8-bit coded bodies.
925 **
926 **  There is not much point in supporting multipart here, as the UA
927 **  will be able to deal with encoded MIME bodies if it can parse MIME
928 **  multipart messages.
929 **
930 **  Note also that we wont be called unless it is a text/plain MIME
931 **  message, encoded base64 or QP and mailer flag '9' has been defined
932 **  on mailer.
933 **
934 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
935 **
936 **	Parameters:
937 **		mci -- mailer connection information.
938 **		header -- the header for this body part.
939 **		e -- envelope.
940 **
941 **	Returns:
942 **		none.
943 */
944 
945 static char index_64[128] =
946 {
947 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
948 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
949 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
950 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
951 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
952 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
953 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
954 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
955 };
956 
957 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
958 
959 void
960 mime7to8(mci, header, e)
961 	register MCI *mci;
962 	HDR *header;
963 	register ENVELOPE *e;
964 {
965 	register char *p;
966 	char *cte;
967 	char **pvp;
968 	u_char *fbufp;
969 	char buf[MAXLINE];
970 	u_char fbuf[MAXLINE + 1];
971 	char pvpbuf[MAXLINE];
972 	extern u_char MimeTokenTab[256];
973 
974 	p = hvalue("Content-Transfer-Encoding", header);
975 	if (p == NULL ||
976 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
977 			   MimeTokenTab)) == NULL ||
978 	    pvp[0] == NULL)
979 	{
980 		/* "can't happen" -- upper level should have caught this */
981 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
982 
983 		/* avoid bounce loops */
984 		e->e_flags |= EF_DONT_MIME;
985 
986 		/* cheap failsafe algorithm -- should work on text/plain */
987 		if (p != NULL)
988 		{
989 			snprintf(buf, sizeof buf,
990 				"Content-Transfer-Encoding: %s", p);
991 			putline(buf, mci);
992 		}
993 		putline("", mci);
994 		mci->mci_flags &= ~MCIF_INHEADER;
995 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
996 			putline(buf, mci);
997 		return;
998 	}
999 	cataddr(pvp, NULL, buf, sizeof buf, '\0');
1000 	cte = newstr(buf);
1001 
1002 	mci->mci_flags |= MCIF_INHEADER;
1003 	putline("Content-Transfer-Encoding: 8bit", mci);
1004 	snprintf(buf, sizeof buf,
1005 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1006 		cte, MyHostName, e->e_id);
1007 	putline(buf, mci);
1008 	putline("", mci);
1009 	mci->mci_flags &= ~MCIF_INHEADER;
1010 
1011 	/*
1012 	**  Translate body encoding to 8-bit.  Supports two types of
1013 	**  encodings; "base64" and "quoted-printable". Assume qp if
1014 	**  it is not base64.
1015 	*/
1016 
1017 	if (strcasecmp(cte, "base64") == 0)
1018 	{
1019 		int c1, c2, c3, c4;
1020 
1021 		fbufp = fbuf;
1022 		while ((c1 = fgetc(e->e_dfp)) != EOF)
1023 		{
1024 			if (isascii(c1) && isspace(c1))
1025 				continue;
1026 
1027 			do
1028 			{
1029 				c2 = fgetc(e->e_dfp);
1030 			} while (isascii(c2) && isspace(c2));
1031 			if (c2 == EOF)
1032 				break;
1033 
1034 			do
1035 			{
1036 				c3 = fgetc(e->e_dfp);
1037 			} while (isascii(c3) && isspace(c3));
1038 			if (c3 == EOF)
1039 				break;
1040 
1041 			do
1042 			{
1043 				c4 = fgetc(e->e_dfp);
1044 			} while (isascii(c4) && isspace(c4));
1045 			if (c4 == EOF)
1046 				break;
1047 
1048 			if (c1 == '=' || c2 == '=')
1049 				continue;
1050 			c1 = CHAR64(c1);
1051 			c2 = CHAR64(c2);
1052 
1053 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1054 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1055 			{
1056 				if (*--fbufp != '\n' ||
1057 				    (fbufp > fbuf && *--fbufp != '\r'))
1058 					fbufp++;
1059 				putxline((char *) fbuf, fbufp - fbuf,
1060 					 mci, PXLF_MAPFROM);
1061 				fbufp = fbuf;
1062 			}
1063 			if (c3 == '=')
1064 				continue;
1065 			c3 = CHAR64(c3);
1066 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1067 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1068 			{
1069 				if (*--fbufp != '\n' ||
1070 				    (fbufp > fbuf && *--fbufp != '\r'))
1071 					fbufp++;
1072 				putxline((char *) fbuf, fbufp - fbuf,
1073 					 mci, PXLF_MAPFROM);
1074 				fbufp = fbuf;
1075 			}
1076 			if (c4 == '=')
1077 				continue;
1078 			c4 = CHAR64(c4);
1079 			*fbufp = ((c3 & 0x03) << 6) | c4;
1080 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1081 			{
1082 				if (*--fbufp != '\n' ||
1083 				    (fbufp > fbuf && *--fbufp != '\r'))
1084 					fbufp++;
1085 				putxline((char *) fbuf, fbufp - fbuf,
1086 					 mci, PXLF_MAPFROM);
1087 				fbufp = fbuf;
1088 			}
1089 		}
1090 	}
1091 	else
1092 	{
1093 		/* quoted-printable */
1094 		fbufp = fbuf;
1095 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
1096 		{
1097 			if (mime_fromqp((u_char *) buf, &fbufp, 0,
1098 					&fbuf[MAXLINE] - fbufp) == 0)
1099 				continue;
1100 
1101 			if (fbufp - fbuf > 0)
1102 				putxline((char *) fbuf, fbufp - fbuf - 1, mci,
1103 					 PXLF_MAPFROM);
1104 			fbufp = fbuf;
1105 		}
1106 	}
1107 
1108 	/* force out partial last line */
1109 	if (fbufp > fbuf)
1110 	{
1111 		*fbufp = '\0';
1112 		putxline((char *) fbuf, fbufp - fbuf, mci, PXLF_MAPFROM);
1113 	}
1114 	if (tTd(43, 3))
1115 		dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1116 }
1117 /*
1118 **  The following is based on Borenstein's "codes.c" module, with simplifying
1119 **  changes as we do not deal with multipart, and to do the translation in-core,
1120 **  with an attempt to prevent overrun of output buffers.
1121 **
1122 **  What is needed here are changes to defned this code better against
1123 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1124 */
1125 
1126 static char index_hex[128] =
1127 {
1128 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1129 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1130 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1131 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1132 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1133 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1134 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1135 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1136 };
1137 
1138 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1139 
1140 static int
1141 mime_fromqp(infile, outfile, state, maxlen)
1142 	u_char *infile;
1143 	u_char **outfile;
1144 	int state;		/* Decoding body (0) or header (1) */
1145 	int maxlen;		/* Max # of chars allowed in outfile */
1146 {
1147 	int c1, c2;
1148 	int nchar = 0;
1149 
1150 	while ((c1 = *infile++) != '\0')
1151 	{
1152 		if (c1 == '=')
1153 		{
1154 			if ((c1 = *infile++) == 0)
1155 				break;
1156 
1157 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1158 			{
1159 				/* ignore it */
1160 				if (state == 0)
1161 					return 0;
1162 			}
1163 			else
1164 			{
1165 				do
1166 				{
1167 					if ((c2 = *infile++) == '\0')
1168 					{
1169 						c2 = -1;
1170 						break;
1171 					}
1172 				} while ((c2 = HEXCHAR(c2)) == -1);
1173 
1174 				if (c2 == -1 || ++nchar > maxlen)
1175 					break;
1176 
1177 				*(*outfile)++ = c1 << 4 | c2;
1178 			}
1179 		}
1180 		else
1181 		{
1182 			if (state == 1 && c1 == '_')
1183 				c1 = ' ';
1184 
1185 			if (++nchar > maxlen)
1186 				break;
1187 
1188 			*(*outfile)++ = c1;
1189 
1190 			if (c1 == '\n')
1191 				break;
1192 		}
1193 	}
1194 	*(*outfile)++ = '\0';
1195 	return 1;
1196 }
1197 #endif /* MIME7TO8 */
1198