xref: /freebsd/contrib/sendmail/src/mime.c (revision 7660b554bc59a07be0431c17e0e33815818baa69)
1 /*
2  * Copyright (c) 1998-2002 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 SM_RCSID("@(#)$Id: mime.c,v 8.130.2.1 2003/04/15 01:05:59 ca Exp $")
18 
19 /*
20 **  MIME support.
21 **
22 **	I am indebted to John Beck of Hewlett-Packard, who contributed
23 **	his code to me for inclusion.  As it turns out, I did not use
24 **	his code since he used a "minimum change" approach that used
25 **	several temp files, and I wanted a "minimum impact" approach
26 **	that would avoid copying.  However, looking over his code
27 **	helped me cement my understanding of the problem.
28 **
29 **	I also looked at, but did not directly use, Nathaniel
30 **	Borenstein's "code.c" module.  Again, it functioned as
31 **	a file-to-file translator, which did not fit within my
32 **	design bounds, but it was a useful base for understanding
33 **	the problem.
34 */
35 
36 #if MIME8TO7
37 static int	isboundary __P((char *, char **));
38 static int	mimeboundary __P((char *, char **));
39 static int	mime_getchar __P((SM_FILE_T *, char **, int *));
40 static int	mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
41 
42 /* character set for hex and base64 encoding */
43 static char	Base16Code[] =	"0123456789ABCDEF";
44 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
45 
46 /* types of MIME boundaries */
47 # define MBT_SYNTAX	0	/* syntax error */
48 # define MBT_NOTSEP	1	/* not a boundary */
49 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
50 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
51 
52 static char	*MimeBoundaryNames[] =
53 {
54 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
55 };
56 
57 static bool	MapNLtoCRLF;
58 
59 /*
60 **  MIME8TO7 -- output 8 bit body in 7 bit format
61 **
62 **	The header has already been output -- this has to do the
63 **	8 to 7 bit conversion.  It would be easy if we didn't have
64 **	to deal with nested formats (multipart/xxx and message/rfc822).
65 **
66 **	We won't be called if we don't have to do a conversion, and
67 **	appropriate MIME-Version: and Content-Type: fields have been
68 **	output.  Any Content-Transfer-Encoding: field has not been
69 **	output, and we can add it here.
70 **
71 **	Parameters:
72 **		mci -- mailer connection information.
73 **		header -- the header for this body part.
74 **		e -- envelope.
75 **		boundaries -- the currently pending message boundaries.
76 **			NULL if we are processing the outer portion.
77 **		flags -- to tweak processing.
78 **
79 **	Returns:
80 **		An indicator of what terminated the message part:
81 **		  MBT_FINAL -- the final boundary
82 **		  MBT_INTERMED -- an intermediate boundary
83 **		  MBT_NOTSEP -- an end of file
84 */
85 
86 struct args
87 {
88 	char	*a_field;	/* name of field */
89 	char	*a_value;	/* value of that field */
90 };
91 
92 int
93 mime8to7(mci, header, e, boundaries, flags)
94 	register MCI *mci;
95 	HDR *header;
96 	register ENVELOPE *e;
97 	char **boundaries;
98 	int flags;
99 {
100 	register char *p;
101 	int linelen;
102 	int bt;
103 	off_t offset;
104 	size_t sectionsize, sectionhighbits;
105 	int i;
106 	char *type;
107 	char *subtype;
108 	char *cte;
109 	char **pvp;
110 	int argc = 0;
111 	char *bp;
112 	bool use_qp = false;
113 	struct args argv[MAXMIMEARGS];
114 	char bbuf[128];
115 	char buf[MAXLINE];
116 	char pvpbuf[MAXLINE];
117 	extern unsigned char MimeTokenTab[256];
118 
119 	if (tTd(43, 1))
120 	{
121 		sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
122 		if (boundaries[0] == NULL)
123 			sm_dprintf(" <none>");
124 		else
125 		{
126 			for (i = 0; boundaries[i] != NULL; i++)
127 				sm_dprintf(" %s", boundaries[i]);
128 		}
129 		sm_dprintf("\n");
130 	}
131 	MapNLtoCRLF = true;
132 	p = hvalue("Content-Transfer-Encoding", header);
133 	if (p == NULL ||
134 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
135 			   MimeTokenTab)) == NULL ||
136 	    pvp[0] == NULL)
137 	{
138 		cte = NULL;
139 	}
140 	else
141 	{
142 		cataddr(pvp, NULL, buf, sizeof buf, '\0');
143 		cte = sm_rpool_strdup_x(e->e_rpool, buf);
144 	}
145 
146 	type = subtype = NULL;
147 	p = hvalue("Content-Type", header);
148 	if (p == NULL)
149 	{
150 		if (bitset(M87F_DIGEST, flags))
151 			p = "message/rfc822";
152 		else
153 			p = "text/plain";
154 	}
155 	if (p != NULL &&
156 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
157 			   MimeTokenTab)) != NULL &&
158 	    pvp[0] != NULL)
159 	{
160 		if (tTd(43, 40))
161 		{
162 			for (i = 0; pvp[i] != NULL; i++)
163 				sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
164 		}
165 		type = *pvp++;
166 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
167 		    *++pvp != NULL)
168 		{
169 			subtype = *pvp++;
170 		}
171 
172 		/* break out parameters */
173 		while (*pvp != NULL && argc < MAXMIMEARGS)
174 		{
175 			/* skip to semicolon separator */
176 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
177 				pvp++;
178 			if (*pvp++ == NULL || *pvp == NULL)
179 				break;
180 
181 			/* complain about empty values */
182 			if (strcmp(*pvp, ";") == 0)
183 			{
184 				usrerr("mime8to7: Empty parameter in Content-Type header");
185 
186 				/* avoid bounce loops */
187 				e->e_flags |= EF_DONT_MIME;
188 				continue;
189 			}
190 
191 			/* extract field name */
192 			argv[argc].a_field = *pvp++;
193 
194 			/* see if there is a value */
195 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
196 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
197 			{
198 				argv[argc].a_value = *pvp;
199 				argc++;
200 			}
201 		}
202 	}
203 
204 	/* check for disaster cases */
205 	if (type == NULL)
206 		type = "-none-";
207 	if (subtype == NULL)
208 		subtype = "-none-";
209 
210 	/* don't propogate some flags more than one level into the message */
211 	flags &= ~M87F_DIGEST;
212 
213 	/*
214 	**  Check for cases that can not be encoded.
215 	**
216 	**	For example, you can't encode certain kinds of types
217 	**	or already-encoded messages.  If we find this case,
218 	**	just copy it through.
219 	*/
220 
221 	(void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype);
222 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
223 		flags |= M87F_NO8BIT;
224 
225 # ifdef USE_B_CLASS
226 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
227 		MapNLtoCRLF = false;
228 # endif /* USE_B_CLASS */
229 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
230 		use_qp = true;
231 
232 	/*
233 	**  Multipart requires special processing.
234 	**
235 	**	Do a recursive descent into the message.
236 	*/
237 
238 	if (sm_strcasecmp(type, "multipart") == 0 &&
239 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)))
240 	{
241 
242 		if (sm_strcasecmp(subtype, "digest") == 0)
243 			flags |= M87F_DIGEST;
244 
245 		for (i = 0; i < argc; i++)
246 		{
247 			if (sm_strcasecmp(argv[i].a_field, "boundary") == 0)
248 				break;
249 		}
250 		if (i >= argc || argv[i].a_value == NULL)
251 		{
252 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
253 				i >= argc ? "missing" : "bogus", p);
254 			p = "---";
255 
256 			/* avoid bounce loops */
257 			e->e_flags |= EF_DONT_MIME;
258 		}
259 		else
260 		{
261 			p = argv[i].a_value;
262 			stripquotes(p);
263 		}
264 		if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf)
265 		{
266 			usrerr("mime8to7: multipart boundary \"%s\" too long",
267 				p);
268 
269 			/* avoid bounce loops */
270 			e->e_flags |= EF_DONT_MIME;
271 		}
272 
273 		if (tTd(43, 1))
274 			sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
275 				bbuf);
276 		for (i = 0; i < MAXMIMENESTING; i++)
277 		{
278 			if (boundaries[i] == NULL)
279 				break;
280 		}
281 		if (i >= MAXMIMENESTING)
282 		{
283 			usrerr("mime8to7: multipart nesting boundary too deep");
284 
285 			/* avoid bounce loops */
286 			e->e_flags |= EF_DONT_MIME;
287 		}
288 		else
289 		{
290 			boundaries[i] = bbuf;
291 			boundaries[i + 1] = NULL;
292 		}
293 		mci->mci_flags |= MCIF_INMIME;
294 
295 		/* skip the early "comment" prologue */
296 		putline("", mci);
297 		mci->mci_flags &= ~MCIF_INHEADER;
298 		bt = MBT_FINAL;
299 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
300 			!= NULL)
301 		{
302 			bt = mimeboundary(buf, boundaries);
303 			if (bt != MBT_NOTSEP)
304 				break;
305 			putxline(buf, strlen(buf), mci,
306 				 PXLF_MAPFROM|PXLF_STRIP8BIT);
307 			if (tTd(43, 99))
308 				sm_dprintf("  ...%s", buf);
309 		}
310 		if (sm_io_eof(e->e_dfp))
311 			bt = MBT_FINAL;
312 		while (bt != MBT_FINAL)
313 		{
314 			auto HDR *hdr = NULL;
315 
316 			(void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf);
317 			putline(buf, mci);
318 			if (tTd(43, 35))
319 				sm_dprintf("  ...%s\n", buf);
320 			collect(e->e_dfp, false, &hdr, e, false);
321 			if (tTd(43, 101))
322 				putline("+++after collect", mci);
323 			putheader(mci, hdr, e, flags);
324 			if (tTd(43, 101))
325 				putline("+++after putheader", mci);
326 			bt = mime8to7(mci, hdr, e, boundaries, flags);
327 		}
328 		(void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--");
329 		putline(buf, mci);
330 		if (tTd(43, 35))
331 			sm_dprintf("  ...%s\n", buf);
332 		boundaries[i] = NULL;
333 		mci->mci_flags &= ~MCIF_INMIME;
334 
335 		/* skip the late "comment" epilogue */
336 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
337 			!= NULL)
338 		{
339 			bt = mimeboundary(buf, boundaries);
340 			if (bt != MBT_NOTSEP)
341 				break;
342 			putxline(buf, strlen(buf), mci,
343 				 PXLF_MAPFROM|PXLF_STRIP8BIT);
344 			if (tTd(43, 99))
345 				sm_dprintf("  ...%s", buf);
346 		}
347 		if (sm_io_eof(e->e_dfp))
348 			bt = MBT_FINAL;
349 		if (tTd(43, 3))
350 			sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
351 				MimeBoundaryNames[bt]);
352 		return bt;
353 	}
354 
355 	/*
356 	**  Message/xxx types -- recurse exactly once.
357 	**
358 	**	Class 's' is predefined to have "rfc822" only.
359 	*/
360 
361 	if (sm_strcasecmp(type, "message") == 0)
362 	{
363 		if (!wordinclass(subtype, 's'))
364 		{
365 			flags |= M87F_NO8BIT;
366 		}
367 		else
368 		{
369 			auto HDR *hdr = NULL;
370 
371 			putline("", mci);
372 
373 			mci->mci_flags |= MCIF_INMIME;
374 			collect(e->e_dfp, false, &hdr, e, false);
375 			if (tTd(43, 101))
376 				putline("+++after collect", mci);
377 			putheader(mci, hdr, e, flags);
378 			if (tTd(43, 101))
379 				putline("+++after putheader", mci);
380 			if (hvalue("MIME-Version", hdr) == NULL &&
381 			    !bitset(M87F_NO8TO7, flags))
382 				putline("MIME-Version: 1.0", mci);
383 			bt = mime8to7(mci, hdr, e, boundaries, flags);
384 			mci->mci_flags &= ~MCIF_INMIME;
385 			return bt;
386 		}
387 	}
388 
389 	/*
390 	**  Non-compound body type
391 	**
392 	**	Compute the ratio of seven to eight bit characters;
393 	**	use that as a heuristic to decide how to do the
394 	**	encoding.
395 	*/
396 
397 	sectionsize = sectionhighbits = 0;
398 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
399 	{
400 		/* remember where we were */
401 		offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
402 		if (offset == -1)
403 			syserr("mime8to7: cannot sm_io_tell on %cf%s",
404 			       DATAFL_LETTER, e->e_id);
405 
406 		/* do a scan of this body type to count character types */
407 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
408 			!= NULL)
409 		{
410 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
411 				break;
412 			for (p = buf; *p != '\0'; p++)
413 			{
414 				/* count bytes with the high bit set */
415 				sectionsize++;
416 				if (bitset(0200, *p))
417 					sectionhighbits++;
418 			}
419 
420 			/*
421 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
422 			**  assume base64.  This heuristic avoids double-reading
423 			**  large graphics or video files.
424 			*/
425 
426 			if (sectionsize >= 4096 &&
427 			    sectionhighbits > sectionsize / 4)
428 				break;
429 		}
430 
431 		/* return to the original offset for processing */
432 		/* XXX use relative seeks to handle >31 bit file sizes? */
433 		if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
434 			syserr("mime8to7: cannot sm_io_fseek on %cf%s",
435 			       DATAFL_LETTER, e->e_id);
436 		else
437 			sm_io_clearerr(e->e_dfp);
438 	}
439 
440 	/*
441 	**  Heuristically determine encoding method.
442 	**	If more than 1/8 of the total characters have the
443 	**	eighth bit set, use base64; else use quoted-printable.
444 	**	However, only encode binary encoded data as base64,
445 	**	since otherwise the NL=>CRLF mapping will be a problem.
446 	*/
447 
448 	if (tTd(43, 8))
449 	{
450 		sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
451 			(long) sectionhighbits, (long) sectionsize,
452 			cte == NULL ? "[none]" : cte,
453 			type == NULL ? "[none]" : type,
454 			subtype == NULL ? "[none]" : subtype);
455 	}
456 	if (cte != NULL && sm_strcasecmp(cte, "binary") == 0)
457 		sectionsize = sectionhighbits;
458 	linelen = 0;
459 	bp = buf;
460 	if (sectionhighbits == 0)
461 	{
462 		/* no encoding necessary */
463 		if (cte != NULL &&
464 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
465 			   mci->mci_flags) &&
466 		    !bitset(M87F_NO8TO7, flags))
467 		{
468 			/*
469 			**  Skip _unless_ in MIME mode and potentially
470 			**  converting from 8 bit to 7 bit MIME.  See
471 			**  putheader() for the counterpart where the
472 			**  CTE header is skipped in the opposite
473 			**  situation.
474 			*/
475 
476 			(void) sm_snprintf(buf, sizeof buf,
477 				"Content-Transfer-Encoding: %.200s", cte);
478 			putline(buf, mci);
479 			if (tTd(43, 36))
480 				sm_dprintf("  ...%s\n", buf);
481 		}
482 		putline("", mci);
483 		mci->mci_flags &= ~MCIF_INHEADER;
484 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
485 			!= NULL)
486 		{
487 			bt = mimeboundary(buf, boundaries);
488 			if (bt != MBT_NOTSEP)
489 				break;
490 			putline(buf, mci);
491 		}
492 		if (sm_io_eof(e->e_dfp))
493 			bt = MBT_FINAL;
494 	}
495 	else if (!MapNLtoCRLF ||
496 		 (sectionsize / 8 < sectionhighbits && !use_qp))
497 	{
498 		/* use base64 encoding */
499 		int c1, c2;
500 
501 		if (tTd(43, 36))
502 			sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
503 		putline("Content-Transfer-Encoding: base64", mci);
504 		(void) sm_snprintf(buf, sizeof buf,
505 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
506 			MyHostName, e->e_id);
507 		putline(buf, mci);
508 		putline("", mci);
509 		mci->mci_flags &= ~MCIF_INHEADER;
510 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
511 			SM_IO_EOF)
512 		{
513 			if (linelen > 71)
514 			{
515 				*bp = '\0';
516 				putline(buf, mci);
517 				linelen = 0;
518 				bp = buf;
519 			}
520 			linelen += 4;
521 			*bp++ = Base64Code[(c1 >> 2)];
522 			c1 = (c1 & 0x03) << 4;
523 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
524 			if (c2 == SM_IO_EOF)
525 			{
526 				*bp++ = Base64Code[c1];
527 				*bp++ = '=';
528 				*bp++ = '=';
529 				break;
530 			}
531 			c1 |= (c2 >> 4) & 0x0f;
532 			*bp++ = Base64Code[c1];
533 			c1 = (c2 & 0x0f) << 2;
534 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
535 			if (c2 == SM_IO_EOF)
536 			{
537 				*bp++ = Base64Code[c1];
538 				*bp++ = '=';
539 				break;
540 			}
541 			c1 |= (c2 >> 6) & 0x03;
542 			*bp++ = Base64Code[c1];
543 			*bp++ = Base64Code[c2 & 0x3f];
544 		}
545 		*bp = '\0';
546 		putline(buf, mci);
547 	}
548 	else
549 	{
550 		/* use quoted-printable encoding */
551 		int c1, c2;
552 		int fromstate;
553 		BITMAP256 badchars;
554 
555 		/* set up map of characters that must be mapped */
556 		clrbitmap(badchars);
557 		for (c1 = 0x00; c1 < 0x20; c1++)
558 			setbitn(c1, badchars);
559 		clrbitn('\t', badchars);
560 		for (c1 = 0x7f; c1 < 0x100; c1++)
561 			setbitn(c1, badchars);
562 		setbitn('=', badchars);
563 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
564 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
565 				setbitn(*p, badchars);
566 
567 		if (tTd(43, 36))
568 			sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
569 		putline("Content-Transfer-Encoding: quoted-printable", mci);
570 		(void) sm_snprintf(buf, sizeof buf,
571 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
572 			MyHostName, e->e_id);
573 		putline(buf, mci);
574 		putline("", mci);
575 		mci->mci_flags &= ~MCIF_INHEADER;
576 		fromstate = 0;
577 		c2 = '\n';
578 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
579 			SM_IO_EOF)
580 		{
581 			if (c1 == '\n')
582 			{
583 				if (c2 == ' ' || c2 == '\t')
584 				{
585 					*bp++ = '=';
586 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
587 					*bp++ = Base16Code[c2 & 0x0f];
588 				}
589 				if (buf[0] == '.' && bp == &buf[1])
590 				{
591 					buf[0] = '=';
592 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
593 					*bp++ = Base16Code['.' & 0x0f];
594 				}
595 				*bp = '\0';
596 				putline(buf, mci);
597 				linelen = fromstate = 0;
598 				bp = buf;
599 				c2 = c1;
600 				continue;
601 			}
602 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
603 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
604 			{
605 				*bp++ = '=';
606 				*bp++ = '2';
607 				*bp++ = '0';
608 				linelen += 3;
609 			}
610 			else if (c2 == ' ' || c2 == '\t')
611 			{
612 				*bp++ = c2;
613 				linelen++;
614 			}
615 			if (linelen > 72 &&
616 			    (linelen > 75 || c1 != '.' ||
617 			     (linelen > 73 && c2 == '.')))
618 			{
619 				if (linelen > 73 && c2 == '.')
620 					bp--;
621 				else
622 					c2 = '\n';
623 				*bp++ = '=';
624 				*bp = '\0';
625 				putline(buf, mci);
626 				linelen = fromstate = 0;
627 				bp = buf;
628 				if (c2 == '.')
629 				{
630 					*bp++ = '.';
631 					linelen++;
632 				}
633 			}
634 			if (bitnset(bitidx(c1), badchars))
635 			{
636 				*bp++ = '=';
637 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
638 				*bp++ = Base16Code[c1 & 0x0f];
639 				linelen += 3;
640 			}
641 			else if (c1 != ' ' && c1 != '\t')
642 			{
643 				if (linelen < 4 && c1 == "From"[linelen])
644 					fromstate++;
645 				*bp++ = c1;
646 				linelen++;
647 			}
648 			c2 = c1;
649 		}
650 
651 		/* output any saved character */
652 		if (c2 == ' ' || c2 == '\t')
653 		{
654 			*bp++ = '=';
655 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
656 			*bp++ = Base16Code[c2 & 0x0f];
657 			linelen += 3;
658 		}
659 
660 		if (linelen > 0 || boundaries[0] != NULL)
661 		{
662 			*bp = '\0';
663 			putline(buf, mci);
664 		}
665 
666 	}
667 	if (tTd(43, 3))
668 		sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
669 	return bt;
670 }
671 /*
672 **  MIME_GETCHAR -- get a character for MIME processing
673 **
674 **	Treats boundaries as SM_IO_EOF.
675 **
676 **	Parameters:
677 **		fp -- the input file.
678 **		boundaries -- the current MIME boundaries.
679 **		btp -- if the return value is SM_IO_EOF, *btp is set to
680 **			the type of the boundary.
681 **
682 **	Returns:
683 **		The next character in the input stream.
684 */
685 
686 static int
687 mime_getchar(fp, boundaries, btp)
688 	register SM_FILE_T *fp;
689 	char **boundaries;
690 	int *btp;
691 {
692 	int c;
693 	static unsigned char *bp = NULL;
694 	static int buflen = 0;
695 	static bool atbol = true;	/* at beginning of line */
696 	static int bt = MBT_SYNTAX;	/* boundary type of next SM_IO_EOF */
697 	static unsigned char buf[128];	/* need not be a full line */
698 	int start = 0;			/* indicates position of - in buffer */
699 
700 	if (buflen == 1 && *bp == '\n')
701 	{
702 		/* last \n in buffer may be part of next MIME boundary */
703 		c = *bp;
704 	}
705 	else if (buflen > 0)
706 	{
707 		buflen--;
708 		return *bp++;
709 	}
710 	else
711 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
712 	bp = buf;
713 	buflen = 0;
714 	if (c == '\n')
715 	{
716 		/* might be part of a MIME boundary */
717 		*bp++ = c;
718 		atbol = true;
719 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
720 		if (c == '\n')
721 		{
722 			(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
723 			return c;
724 		}
725 		start = 1;
726 	}
727 	if (c != SM_IO_EOF)
728 		*bp++ = c;
729 	else
730 		bt = MBT_FINAL;
731 	if (atbol && c == '-')
732 	{
733 		/* check for a message boundary */
734 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
735 		if (c != '-')
736 		{
737 			if (c != SM_IO_EOF)
738 				*bp++ = c;
739 			else
740 				bt = MBT_FINAL;
741 			buflen = bp - buf - 1;
742 			bp = buf;
743 			return *bp++;
744 		}
745 
746 		/* got "--", now check for rest of separator */
747 		*bp++ = '-';
748 		while (bp < &buf[sizeof buf - 2] &&
749 		       (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
750 		       c != '\n')
751 		{
752 			*bp++ = c;
753 		}
754 		*bp = '\0';	/* XXX simply cut off? */
755 		bt = mimeboundary((char *) &buf[start], boundaries);
756 		switch (bt)
757 		{
758 		  case MBT_FINAL:
759 		  case MBT_INTERMED:
760 			/* we have a message boundary */
761 			buflen = 0;
762 			*btp = bt;
763 			return SM_IO_EOF;
764 		}
765 
766 		atbol = c == '\n';
767 		if (c != SM_IO_EOF)
768 			*bp++ = c;
769 	}
770 
771 	buflen = bp - buf - 1;
772 	if (buflen < 0)
773 	{
774 		*btp = bt;
775 		return SM_IO_EOF;
776 	}
777 	bp = buf;
778 	return *bp++;
779 }
780 /*
781 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
782 **
783 **	Parameters:
784 **		fp -- the input file.
785 **		boundaries -- the current MIME boundaries.
786 **		btp -- if the return value is SM_IO_EOF, *btp is set to
787 **			the type of the boundary.
788 **
789 **	Returns:
790 **		The next character in the input stream.
791 */
792 
793 static int
794 mime_getchar_crlf(fp, boundaries, btp)
795 	register SM_FILE_T *fp;
796 	char **boundaries;
797 	int *btp;
798 {
799 	static bool sendlf = false;
800 	int c;
801 
802 	if (sendlf)
803 	{
804 		sendlf = false;
805 		return '\n';
806 	}
807 	c = mime_getchar(fp, boundaries, btp);
808 	if (c == '\n' && MapNLtoCRLF)
809 	{
810 		sendlf = true;
811 		return '\r';
812 	}
813 	return c;
814 }
815 /*
816 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
817 **
818 **	Parameters:
819 **		line -- the input line.
820 **		boundaries -- the set of currently pending boundaries.
821 **
822 **	Returns:
823 **		MBT_NOTSEP -- if this is not a separator line
824 **		MBT_INTERMED -- if this is an intermediate separator
825 **		MBT_FINAL -- if this is a final boundary
826 **		MBT_SYNTAX -- if this is a boundary for the wrong
827 **			enclosure -- i.e., a syntax error.
828 */
829 
830 static int
831 mimeboundary(line, boundaries)
832 	register char *line;
833 	char **boundaries;
834 {
835 	int type = MBT_NOTSEP;
836 	int i;
837 	int savec;
838 
839 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
840 		return MBT_NOTSEP;
841 	i = strlen(line);
842 	if (i > 0 && line[i - 1] == '\n')
843 		i--;
844 
845 	/* strip off trailing whitespace */
846 	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'))
847 		i--;
848 	savec = line[i];
849 	line[i] = '\0';
850 
851 	if (tTd(43, 5))
852 		sm_dprintf("mimeboundary: line=\"%s\"... ", line);
853 
854 	/* check for this as an intermediate boundary */
855 	if (isboundary(&line[2], boundaries) >= 0)
856 		type = MBT_INTERMED;
857 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
858 	{
859 		/* check for a final boundary */
860 		line[i - 2] = '\0';
861 		if (isboundary(&line[2], boundaries) >= 0)
862 			type = MBT_FINAL;
863 		line[i - 2] = '-';
864 	}
865 
866 	line[i] = savec;
867 	if (tTd(43, 5))
868 		sm_dprintf("%s\n", MimeBoundaryNames[type]);
869 	return type;
870 }
871 /*
872 **  DEFCHARSET -- return default character set for message
873 **
874 **	The first choice for character set is for the mailer
875 **	corresponding to the envelope sender.  If neither that
876 **	nor the global configuration file has a default character
877 **	set defined, return "unknown-8bit" as recommended by
878 **	RFC 1428 section 3.
879 **
880 **	Parameters:
881 **		e -- the envelope for this message.
882 **
883 **	Returns:
884 **		The default character set for that mailer.
885 */
886 
887 char *
888 defcharset(e)
889 	register ENVELOPE *e;
890 {
891 	if (e != NULL && e->e_from.q_mailer != NULL &&
892 	    e->e_from.q_mailer->m_defcharset != NULL)
893 		return e->e_from.q_mailer->m_defcharset;
894 	if (DefaultCharSet != NULL)
895 		return DefaultCharSet;
896 	return "unknown-8bit";
897 }
898 /*
899 **  ISBOUNDARY -- is a given string a currently valid boundary?
900 **
901 **	Parameters:
902 **		line -- the current input line.
903 **		boundaries -- the list of valid boundaries.
904 **
905 **	Returns:
906 **		The index number in boundaries if the line is found.
907 **		-1 -- otherwise.
908 **
909 */
910 
911 static int
912 isboundary(line, boundaries)
913 	char *line;
914 	char **boundaries;
915 {
916 	register int i;
917 
918 	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
919 	{
920 		if (strcmp(line, boundaries[i]) == 0)
921 			return i;
922 	}
923 	return -1;
924 }
925 #endif /* MIME8TO7 */
926 
927 #if MIME7TO8
928 static int	mime_fromqp __P((unsigned char *, unsigned char **, int));
929 
930 /*
931 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
932 **
933 **  This is a hack. Supports translating the two 7-bit body-encodings
934 **  (quoted-printable and base64) to 8-bit coded bodies.
935 **
936 **  There is not much point in supporting multipart here, as the UA
937 **  will be able to deal with encoded MIME bodies if it can parse MIME
938 **  multipart messages.
939 **
940 **  Note also that we won't be called unless it is a text/plain MIME
941 **  message, encoded base64 or QP and mailer flag '9' has been defined
942 **  on mailer.
943 **
944 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
945 **
946 **	Parameters:
947 **		mci -- mailer connection information.
948 **		header -- the header for this body part.
949 **		e -- envelope.
950 **
951 **	Returns:
952 **		none.
953 */
954 
955 static char index_64[128] =
956 {
957 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
958 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
959 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
960 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
961 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
962 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
963 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
964 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
965 };
966 
967 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
968 
969 void
970 mime7to8(mci, header, e)
971 	register MCI *mci;
972 	HDR *header;
973 	register ENVELOPE *e;
974 {
975 	int pxflags;
976 	register char *p;
977 	char *cte;
978 	char **pvp;
979 	unsigned char *fbufp;
980 	char buf[MAXLINE];
981 	unsigned char fbuf[MAXLINE + 1];
982 	char pvpbuf[MAXLINE];
983 	extern unsigned char MimeTokenTab[256];
984 
985 	p = hvalue("Content-Transfer-Encoding", header);
986 	if (p == NULL ||
987 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
988 			   MimeTokenTab)) == NULL ||
989 	    pvp[0] == NULL)
990 	{
991 		/* "can't happen" -- upper level should have caught this */
992 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
993 
994 		/* avoid bounce loops */
995 		e->e_flags |= EF_DONT_MIME;
996 
997 		/* cheap failsafe algorithm -- should work on text/plain */
998 		if (p != NULL)
999 		{
1000 			(void) sm_snprintf(buf, sizeof buf,
1001 				"Content-Transfer-Encoding: %s", p);
1002 			putline(buf, mci);
1003 		}
1004 		putline("", mci);
1005 		mci->mci_flags &= ~MCIF_INHEADER;
1006 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
1007 			!= NULL)
1008 			putline(buf, mci);
1009 		return;
1010 	}
1011 	cataddr(pvp, NULL, buf, sizeof buf, '\0');
1012 	cte = sm_rpool_strdup_x(e->e_rpool, buf);
1013 
1014 	mci->mci_flags |= MCIF_INHEADER;
1015 	putline("Content-Transfer-Encoding: 8bit", mci);
1016 	(void) sm_snprintf(buf, sizeof buf,
1017 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1018 		cte, MyHostName, e->e_id);
1019 	putline(buf, mci);
1020 	putline("", mci);
1021 	mci->mci_flags &= ~MCIF_INHEADER;
1022 
1023 	/*
1024 	**  Translate body encoding to 8-bit.  Supports two types of
1025 	**  encodings; "base64" and "quoted-printable". Assume qp if
1026 	**  it is not base64.
1027 	*/
1028 
1029 	pxflags = PXLF_MAPFROM;
1030 	if (sm_strcasecmp(cte, "base64") == 0)
1031 	{
1032 		int c1, c2, c3, c4;
1033 
1034 		fbufp = fbuf;
1035 		while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1036 			SM_IO_EOF)
1037 		{
1038 			if (isascii(c1) && isspace(c1))
1039 				continue;
1040 
1041 			do
1042 			{
1043 				c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1044 			} while (isascii(c2) && isspace(c2));
1045 			if (c2 == SM_IO_EOF)
1046 				break;
1047 
1048 			do
1049 			{
1050 				c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1051 			} while (isascii(c3) && isspace(c3));
1052 			if (c3 == SM_IO_EOF)
1053 				break;
1054 
1055 			do
1056 			{
1057 				c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1058 			} while (isascii(c4) && isspace(c4));
1059 			if (c4 == SM_IO_EOF)
1060 				break;
1061 
1062 			if (c1 == '=' || c2 == '=')
1063 				continue;
1064 			c1 = CHAR64(c1);
1065 			c2 = CHAR64(c2);
1066 
1067 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1068 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1069 			{
1070 				if (*--fbufp != '\n' ||
1071 				    (fbufp > fbuf && *--fbufp != '\r'))
1072 				{
1073 					pxflags |= PXLF_NOADDEOL;
1074 					fbufp++;
1075 				}
1076 				putxline((char *) fbuf, fbufp - fbuf,
1077 					 mci, pxflags);
1078 				pxflags &= ~PXLF_NOADDEOL;
1079 				fbufp = fbuf;
1080 			}
1081 			if (c3 == '=')
1082 				continue;
1083 			c3 = CHAR64(c3);
1084 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1085 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1086 			{
1087 				if (*--fbufp != '\n' ||
1088 				    (fbufp > fbuf && *--fbufp != '\r'))
1089 				{
1090 					pxflags |= PXLF_NOADDEOL;
1091 					fbufp++;
1092 				}
1093 				putxline((char *) fbuf, fbufp - fbuf,
1094 					 mci, pxflags);
1095 				pxflags &= ~PXLF_NOADDEOL;
1096 				fbufp = fbuf;
1097 			}
1098 			if (c4 == '=')
1099 				continue;
1100 			c4 = CHAR64(c4);
1101 			*fbufp = ((c3 & 0x03) << 6) | c4;
1102 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1103 			{
1104 				if (*--fbufp != '\n' ||
1105 				    (fbufp > fbuf && *--fbufp != '\r'))
1106 				{
1107 					pxflags |= PXLF_NOADDEOL;
1108 					fbufp++;
1109 				}
1110 				putxline((char *) fbuf, fbufp - fbuf,
1111 					 mci, pxflags);
1112 				pxflags &= ~PXLF_NOADDEOL;
1113 				fbufp = fbuf;
1114 			}
1115 		}
1116 	}
1117 	else
1118 	{
1119 		int off;
1120 
1121 		/* quoted-printable */
1122 		pxflags |= PXLF_NOADDEOL;
1123 		fbufp = fbuf;
1124 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1125 				   sizeof buf) != NULL)
1126 		{
1127 			off = mime_fromqp((unsigned char *) buf, &fbufp,
1128 					  &fbuf[MAXLINE] - fbufp);
1129 again:
1130 			if (off < -1)
1131 				continue;
1132 
1133 			if (fbufp - fbuf > 0)
1134 				putxline((char *) fbuf, fbufp - fbuf - 1, mci,
1135 					 pxflags);
1136 			fbufp = fbuf;
1137 			if (off >= 0 && buf[off] != '\0')
1138 			{
1139 				off = mime_fromqp((unsigned char *) (buf + off),
1140 						  &fbufp,
1141 						  &fbuf[MAXLINE] - fbufp);
1142 				goto again;
1143 			}
1144 		}
1145 	}
1146 
1147 	/* force out partial last line */
1148 	if (fbufp > fbuf)
1149 	{
1150 		*fbufp = '\0';
1151 		putxline((char *) fbuf, fbufp - fbuf, mci, pxflags);
1152 	}
1153 
1154 	/*
1155 	**  The decoded text may end without an EOL.  Since this function
1156 	**  is only called for text/plain MIME messages, it is safe to
1157 	**  add an extra one at the end just in case.  This is a hack,
1158 	**  but so is auto-converting MIME in the first place.
1159 	*/
1160 
1161 	putline("", mci);
1162 
1163 	if (tTd(43, 3))
1164 		sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1165 }
1166 /*
1167 **  The following is based on Borenstein's "codes.c" module, with simplifying
1168 **  changes as we do not deal with multipart, and to do the translation in-core,
1169 **  with an attempt to prevent overrun of output buffers.
1170 **
1171 **  What is needed here are changes to defend this code better against
1172 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1173 */
1174 
1175 static char index_hex[128] =
1176 {
1177 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1178 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1179 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1180 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1181 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1182 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1183 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1184 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1185 };
1186 
1187 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1188 
1189 /*
1190 **  MIME_FROMQP -- decode quoted printable string
1191 **
1192 **	Parameters:
1193 **		infile -- input (encoded) string
1194 **		outfile -- output string
1195 **		maxlen -- size of output buffer
1196 **
1197 **	Returns:
1198 **		-2 if decoding failure
1199 **		-1 if infile completely decoded into outfile
1200 **		>= 0 is the position in infile decoding
1201 **			reached before maxlen was reached
1202 */
1203 
1204 static int
1205 mime_fromqp(infile, outfile, maxlen)
1206 	unsigned char *infile;
1207 	unsigned char **outfile;
1208 	int maxlen;		/* Max # of chars allowed in outfile */
1209 {
1210 	int c1, c2;
1211 	int nchar = 0;
1212 	unsigned char *b;
1213 
1214 	/* decrement by one for trailing '\0', at least one other char */
1215 	if (--maxlen < 1)
1216 		return 0;
1217 
1218 	b = infile;
1219 	while ((c1 = *infile++) != '\0' && nchar < maxlen)
1220 	{
1221 		if (c1 == '=')
1222 		{
1223 			if ((c1 = *infile++) == '\0')
1224 				break;
1225 
1226 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1227 			{
1228 				/* ignore it and the rest of the buffer */
1229 				return -2;
1230 			}
1231 			else
1232 			{
1233 				do
1234 				{
1235 					if ((c2 = *infile++) == '\0')
1236 					{
1237 						c2 = -1;
1238 						break;
1239 					}
1240 				} while ((c2 = HEXCHAR(c2)) == -1);
1241 
1242 				if (c2 == -1)
1243 					break;
1244 				nchar++;
1245 				*(*outfile)++ = c1 << 4 | c2;
1246 			}
1247 		}
1248 		else
1249 		{
1250 			nchar++;
1251 			*(*outfile)++ = c1;
1252 			if (c1 == '\n')
1253 				break;
1254 		}
1255 	}
1256 	*(*outfile)++ = '\0';
1257 	if (nchar >= maxlen)
1258 		return (infile - b - 1);
1259 	return -1;
1260 }
1261 #endif /* MIME7TO8 */
1262