xref: /freebsd/contrib/sendmail/src/mime.c (revision 6b3455a7665208c366849f0b2b3bc916fb97516e)
1 /*
2  * Copyright (c) 1998-2003 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 SM_RCSID("@(#)$Id: mime.c,v 8.136 2004/03/22 18:21:34 ca Exp $")
18 
19 /*
20 **  MIME support.
21 **
22 **	I am indebted to John Beck of Hewlett-Packard, who contributed
23 **	his code to me for inclusion.  As it turns out, I did not use
24 **	his code since he used a "minimum change" approach that used
25 **	several temp files, and I wanted a "minimum impact" approach
26 **	that would avoid copying.  However, looking over his code
27 **	helped me cement my understanding of the problem.
28 **
29 **	I also looked at, but did not directly use, Nathaniel
30 **	Borenstein's "code.c" module.  Again, it functioned as
31 **	a file-to-file translator, which did not fit within my
32 **	design bounds, but it was a useful base for understanding
33 **	the problem.
34 */
35 
36 /* use "old" mime 7 to 8 algorithm by default */
37 #ifndef MIME7TO8_OLD
38 # define MIME7TO8_OLD	1
39 #endif /* ! MIME7TO8_OLD */
40 
41 #if MIME8TO7
42 static int	isboundary __P((char *, char **));
43 static int	mimeboundary __P((char *, char **));
44 static int	mime_getchar __P((SM_FILE_T *, char **, int *));
45 static int	mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
46 
47 /* character set for hex and base64 encoding */
48 static char	Base16Code[] =	"0123456789ABCDEF";
49 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
50 
51 /* types of MIME boundaries */
52 # define MBT_SYNTAX	0	/* syntax error */
53 # define MBT_NOTSEP	1	/* not a boundary */
54 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
55 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
56 
57 static char	*MimeBoundaryNames[] =
58 {
59 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
60 };
61 
62 static bool	MapNLtoCRLF;
63 
64 /*
65 **  MIME8TO7 -- output 8 bit body in 7 bit format
66 **
67 **	The header has already been output -- this has to do the
68 **	8 to 7 bit conversion.  It would be easy if we didn't have
69 **	to deal with nested formats (multipart/xxx and message/rfc822).
70 **
71 **	We won't be called if we don't have to do a conversion, and
72 **	appropriate MIME-Version: and Content-Type: fields have been
73 **	output.  Any Content-Transfer-Encoding: field has not been
74 **	output, and we can add it here.
75 **
76 **	Parameters:
77 **		mci -- mailer connection information.
78 **		header -- the header for this body part.
79 **		e -- envelope.
80 **		boundaries -- the currently pending message boundaries.
81 **			NULL if we are processing the outer portion.
82 **		flags -- to tweak processing.
83 **
84 **	Returns:
85 **		An indicator of what terminated the message part:
86 **		  MBT_FINAL -- the final boundary
87 **		  MBT_INTERMED -- an intermediate boundary
88 **		  MBT_NOTSEP -- an end of file
89 */
90 
91 struct args
92 {
93 	char	*a_field;	/* name of field */
94 	char	*a_value;	/* value of that field */
95 };
96 
97 int
98 mime8to7(mci, header, e, boundaries, flags)
99 	register MCI *mci;
100 	HDR *header;
101 	register ENVELOPE *e;
102 	char **boundaries;
103 	int flags;
104 {
105 	register char *p;
106 	int linelen;
107 	int bt;
108 	off_t offset;
109 	size_t sectionsize, sectionhighbits;
110 	int i;
111 	char *type;
112 	char *subtype;
113 	char *cte;
114 	char **pvp;
115 	int argc = 0;
116 	char *bp;
117 	bool use_qp = false;
118 	struct args argv[MAXMIMEARGS];
119 	char bbuf[128];
120 	char buf[MAXLINE];
121 	char pvpbuf[MAXLINE];
122 	extern unsigned char MimeTokenTab[256];
123 
124 	if (tTd(43, 1))
125 	{
126 		sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
127 		if (boundaries[0] == NULL)
128 			sm_dprintf(" <none>");
129 		else
130 		{
131 			for (i = 0; boundaries[i] != NULL; i++)
132 				sm_dprintf(" %s", boundaries[i]);
133 		}
134 		sm_dprintf("\n");
135 	}
136 	MapNLtoCRLF = true;
137 	p = hvalue("Content-Transfer-Encoding", header);
138 	if (p == NULL ||
139 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
140 			   MimeTokenTab, false)) == NULL ||
141 	    pvp[0] == NULL)
142 	{
143 		cte = NULL;
144 	}
145 	else
146 	{
147 		cataddr(pvp, NULL, buf, sizeof buf, '\0');
148 		cte = sm_rpool_strdup_x(e->e_rpool, buf);
149 	}
150 
151 	type = subtype = NULL;
152 	p = hvalue("Content-Type", header);
153 	if (p == NULL)
154 	{
155 		if (bitset(M87F_DIGEST, flags))
156 			p = "message/rfc822";
157 		else
158 			p = "text/plain";
159 	}
160 	if (p != NULL &&
161 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
162 			   MimeTokenTab, false)) != NULL &&
163 	    pvp[0] != NULL)
164 	{
165 		if (tTd(43, 40))
166 		{
167 			for (i = 0; pvp[i] != NULL; i++)
168 				sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
169 		}
170 		type = *pvp++;
171 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
172 		    *++pvp != NULL)
173 		{
174 			subtype = *pvp++;
175 		}
176 
177 		/* break out parameters */
178 		while (*pvp != NULL && argc < MAXMIMEARGS)
179 		{
180 			/* skip to semicolon separator */
181 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
182 				pvp++;
183 			if (*pvp++ == NULL || *pvp == NULL)
184 				break;
185 
186 			/* complain about empty values */
187 			if (strcmp(*pvp, ";") == 0)
188 			{
189 				usrerr("mime8to7: Empty parameter in Content-Type header");
190 
191 				/* avoid bounce loops */
192 				e->e_flags |= EF_DONT_MIME;
193 				continue;
194 			}
195 
196 			/* extract field name */
197 			argv[argc].a_field = *pvp++;
198 
199 			/* see if there is a value */
200 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
201 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
202 			{
203 				argv[argc].a_value = *pvp;
204 				argc++;
205 			}
206 		}
207 	}
208 
209 	/* check for disaster cases */
210 	if (type == NULL)
211 		type = "-none-";
212 	if (subtype == NULL)
213 		subtype = "-none-";
214 
215 	/* don't propogate some flags more than one level into the message */
216 	flags &= ~M87F_DIGEST;
217 
218 	/*
219 	**  Check for cases that can not be encoded.
220 	**
221 	**	For example, you can't encode certain kinds of types
222 	**	or already-encoded messages.  If we find this case,
223 	**	just copy it through.
224 	*/
225 
226 	(void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype);
227 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
228 		flags |= M87F_NO8BIT;
229 
230 # ifdef USE_B_CLASS
231 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
232 		MapNLtoCRLF = false;
233 # endif /* USE_B_CLASS */
234 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
235 		use_qp = true;
236 
237 	/*
238 	**  Multipart requires special processing.
239 	**
240 	**	Do a recursive descent into the message.
241 	*/
242 
243 	if (sm_strcasecmp(type, "multipart") == 0 &&
244 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)))
245 	{
246 
247 		if (sm_strcasecmp(subtype, "digest") == 0)
248 			flags |= M87F_DIGEST;
249 
250 		for (i = 0; i < argc; i++)
251 		{
252 			if (sm_strcasecmp(argv[i].a_field, "boundary") == 0)
253 				break;
254 		}
255 		if (i >= argc || argv[i].a_value == NULL)
256 		{
257 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
258 				i >= argc ? "missing" : "bogus", p);
259 			p = "---";
260 
261 			/* avoid bounce loops */
262 			e->e_flags |= EF_DONT_MIME;
263 		}
264 		else
265 		{
266 			p = argv[i].a_value;
267 			stripquotes(p);
268 		}
269 		if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf)
270 		{
271 			usrerr("mime8to7: multipart boundary \"%s\" too long",
272 				p);
273 
274 			/* avoid bounce loops */
275 			e->e_flags |= EF_DONT_MIME;
276 		}
277 
278 		if (tTd(43, 1))
279 			sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
280 				bbuf);
281 		for (i = 0; i < MAXMIMENESTING; i++)
282 		{
283 			if (boundaries[i] == NULL)
284 				break;
285 		}
286 		if (i >= MAXMIMENESTING)
287 		{
288 			usrerr("mime8to7: multipart nesting boundary too deep");
289 
290 			/* avoid bounce loops */
291 			e->e_flags |= EF_DONT_MIME;
292 		}
293 		else
294 		{
295 			boundaries[i] = bbuf;
296 			boundaries[i + 1] = NULL;
297 		}
298 		mci->mci_flags |= MCIF_INMIME;
299 
300 		/* skip the early "comment" prologue */
301 		putline("", mci);
302 		mci->mci_flags &= ~MCIF_INHEADER;
303 		bt = MBT_FINAL;
304 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
305 			!= NULL)
306 		{
307 			bt = mimeboundary(buf, boundaries);
308 			if (bt != MBT_NOTSEP)
309 				break;
310 			putxline(buf, strlen(buf), mci,
311 				 PXLF_MAPFROM|PXLF_STRIP8BIT);
312 			if (tTd(43, 99))
313 				sm_dprintf("  ...%s", buf);
314 		}
315 		if (sm_io_eof(e->e_dfp))
316 			bt = MBT_FINAL;
317 		while (bt != MBT_FINAL)
318 		{
319 			auto HDR *hdr = NULL;
320 
321 			(void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf);
322 			putline(buf, mci);
323 			if (tTd(43, 35))
324 				sm_dprintf("  ...%s\n", buf);
325 			collect(e->e_dfp, false, &hdr, e, false);
326 			if (tTd(43, 101))
327 				putline("+++after collect", mci);
328 			putheader(mci, hdr, e, flags);
329 			if (tTd(43, 101))
330 				putline("+++after putheader", mci);
331 			bt = mime8to7(mci, hdr, e, boundaries, flags);
332 		}
333 		(void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--");
334 		putline(buf, mci);
335 		if (tTd(43, 35))
336 			sm_dprintf("  ...%s\n", buf);
337 		boundaries[i] = NULL;
338 		mci->mci_flags &= ~MCIF_INMIME;
339 
340 		/* skip the late "comment" epilogue */
341 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
342 			!= NULL)
343 		{
344 			bt = mimeboundary(buf, boundaries);
345 			if (bt != MBT_NOTSEP)
346 				break;
347 			putxline(buf, strlen(buf), mci,
348 				 PXLF_MAPFROM|PXLF_STRIP8BIT);
349 			if (tTd(43, 99))
350 				sm_dprintf("  ...%s", buf);
351 		}
352 		if (sm_io_eof(e->e_dfp))
353 			bt = MBT_FINAL;
354 		if (tTd(43, 3))
355 			sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
356 				MimeBoundaryNames[bt]);
357 		return bt;
358 	}
359 
360 	/*
361 	**  Message/xxx types -- recurse exactly once.
362 	**
363 	**	Class 's' is predefined to have "rfc822" only.
364 	*/
365 
366 	if (sm_strcasecmp(type, "message") == 0)
367 	{
368 		if (!wordinclass(subtype, 's'))
369 		{
370 			flags |= M87F_NO8BIT;
371 		}
372 		else
373 		{
374 			auto HDR *hdr = NULL;
375 
376 			putline("", mci);
377 
378 			mci->mci_flags |= MCIF_INMIME;
379 			collect(e->e_dfp, false, &hdr, e, false);
380 			if (tTd(43, 101))
381 				putline("+++after collect", mci);
382 			putheader(mci, hdr, e, flags);
383 			if (tTd(43, 101))
384 				putline("+++after putheader", mci);
385 			if (hvalue("MIME-Version", hdr) == NULL &&
386 			    !bitset(M87F_NO8TO7, flags))
387 				putline("MIME-Version: 1.0", mci);
388 			bt = mime8to7(mci, hdr, e, boundaries, flags);
389 			mci->mci_flags &= ~MCIF_INMIME;
390 			return bt;
391 		}
392 	}
393 
394 	/*
395 	**  Non-compound body type
396 	**
397 	**	Compute the ratio of seven to eight bit characters;
398 	**	use that as a heuristic to decide how to do the
399 	**	encoding.
400 	*/
401 
402 	sectionsize = sectionhighbits = 0;
403 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
404 	{
405 		/* remember where we were */
406 		offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
407 		if (offset == -1)
408 			syserr("mime8to7: cannot sm_io_tell on %cf%s",
409 			       DATAFL_LETTER, e->e_id);
410 
411 		/* do a scan of this body type to count character types */
412 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
413 			!= NULL)
414 		{
415 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
416 				break;
417 			for (p = buf; *p != '\0'; p++)
418 			{
419 				/* count bytes with the high bit set */
420 				sectionsize++;
421 				if (bitset(0200, *p))
422 					sectionhighbits++;
423 			}
424 
425 			/*
426 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
427 			**  assume base64.  This heuristic avoids double-reading
428 			**  large graphics or video files.
429 			*/
430 
431 			if (sectionsize >= 4096 &&
432 			    sectionhighbits > sectionsize / 4)
433 				break;
434 		}
435 
436 		/* return to the original offset for processing */
437 		/* XXX use relative seeks to handle >31 bit file sizes? */
438 		if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
439 			syserr("mime8to7: cannot sm_io_fseek on %cf%s",
440 			       DATAFL_LETTER, e->e_id);
441 		else
442 			sm_io_clearerr(e->e_dfp);
443 	}
444 
445 	/*
446 	**  Heuristically determine encoding method.
447 	**	If more than 1/8 of the total characters have the
448 	**	eighth bit set, use base64; else use quoted-printable.
449 	**	However, only encode binary encoded data as base64,
450 	**	since otherwise the NL=>CRLF mapping will be a problem.
451 	*/
452 
453 	if (tTd(43, 8))
454 	{
455 		sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
456 			(long) sectionhighbits, (long) sectionsize,
457 			cte == NULL ? "[none]" : cte,
458 			type == NULL ? "[none]" : type,
459 			subtype == NULL ? "[none]" : subtype);
460 	}
461 	if (cte != NULL && sm_strcasecmp(cte, "binary") == 0)
462 		sectionsize = sectionhighbits;
463 	linelen = 0;
464 	bp = buf;
465 	if (sectionhighbits == 0)
466 	{
467 		/* no encoding necessary */
468 		if (cte != NULL &&
469 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
470 			   mci->mci_flags) &&
471 		    !bitset(M87F_NO8TO7, flags))
472 		{
473 			/*
474 			**  Skip _unless_ in MIME mode and potentially
475 			**  converting from 8 bit to 7 bit MIME.  See
476 			**  putheader() for the counterpart where the
477 			**  CTE header is skipped in the opposite
478 			**  situation.
479 			*/
480 
481 			(void) sm_snprintf(buf, sizeof buf,
482 				"Content-Transfer-Encoding: %.200s", cte);
483 			putline(buf, mci);
484 			if (tTd(43, 36))
485 				sm_dprintf("  ...%s\n", buf);
486 		}
487 		putline("", mci);
488 		mci->mci_flags &= ~MCIF_INHEADER;
489 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
490 			!= NULL)
491 		{
492 			bt = mimeboundary(buf, boundaries);
493 			if (bt != MBT_NOTSEP)
494 				break;
495 			putline(buf, mci);
496 		}
497 		if (sm_io_eof(e->e_dfp))
498 			bt = MBT_FINAL;
499 	}
500 	else if (!MapNLtoCRLF ||
501 		 (sectionsize / 8 < sectionhighbits && !use_qp))
502 	{
503 		/* use base64 encoding */
504 		int c1, c2;
505 
506 		if (tTd(43, 36))
507 			sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
508 		putline("Content-Transfer-Encoding: base64", mci);
509 		(void) sm_snprintf(buf, sizeof buf,
510 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
511 			MyHostName, e->e_id);
512 		putline(buf, mci);
513 		putline("", mci);
514 		mci->mci_flags &= ~MCIF_INHEADER;
515 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
516 			SM_IO_EOF)
517 		{
518 			if (linelen > 71)
519 			{
520 				*bp = '\0';
521 				putline(buf, mci);
522 				linelen = 0;
523 				bp = buf;
524 			}
525 			linelen += 4;
526 			*bp++ = Base64Code[(c1 >> 2)];
527 			c1 = (c1 & 0x03) << 4;
528 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
529 			if (c2 == SM_IO_EOF)
530 			{
531 				*bp++ = Base64Code[c1];
532 				*bp++ = '=';
533 				*bp++ = '=';
534 				break;
535 			}
536 			c1 |= (c2 >> 4) & 0x0f;
537 			*bp++ = Base64Code[c1];
538 			c1 = (c2 & 0x0f) << 2;
539 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
540 			if (c2 == SM_IO_EOF)
541 			{
542 				*bp++ = Base64Code[c1];
543 				*bp++ = '=';
544 				break;
545 			}
546 			c1 |= (c2 >> 6) & 0x03;
547 			*bp++ = Base64Code[c1];
548 			*bp++ = Base64Code[c2 & 0x3f];
549 		}
550 		*bp = '\0';
551 		putline(buf, mci);
552 	}
553 	else
554 	{
555 		/* use quoted-printable encoding */
556 		int c1, c2;
557 		int fromstate;
558 		BITMAP256 badchars;
559 
560 		/* set up map of characters that must be mapped */
561 		clrbitmap(badchars);
562 		for (c1 = 0x00; c1 < 0x20; c1++)
563 			setbitn(c1, badchars);
564 		clrbitn('\t', badchars);
565 		for (c1 = 0x7f; c1 < 0x100; c1++)
566 			setbitn(c1, badchars);
567 		setbitn('=', badchars);
568 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
569 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
570 				setbitn(*p, badchars);
571 
572 		if (tTd(43, 36))
573 			sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
574 		putline("Content-Transfer-Encoding: quoted-printable", mci);
575 		(void) sm_snprintf(buf, sizeof buf,
576 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
577 			MyHostName, e->e_id);
578 		putline(buf, mci);
579 		putline("", mci);
580 		mci->mci_flags &= ~MCIF_INHEADER;
581 		fromstate = 0;
582 		c2 = '\n';
583 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
584 			SM_IO_EOF)
585 		{
586 			if (c1 == '\n')
587 			{
588 				if (c2 == ' ' || c2 == '\t')
589 				{
590 					*bp++ = '=';
591 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
592 					*bp++ = Base16Code[c2 & 0x0f];
593 				}
594 				if (buf[0] == '.' && bp == &buf[1])
595 				{
596 					buf[0] = '=';
597 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
598 					*bp++ = Base16Code['.' & 0x0f];
599 				}
600 				*bp = '\0';
601 				putline(buf, mci);
602 				linelen = fromstate = 0;
603 				bp = buf;
604 				c2 = c1;
605 				continue;
606 			}
607 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
608 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
609 			{
610 				*bp++ = '=';
611 				*bp++ = '2';
612 				*bp++ = '0';
613 				linelen += 3;
614 			}
615 			else if (c2 == ' ' || c2 == '\t')
616 			{
617 				*bp++ = c2;
618 				linelen++;
619 			}
620 			if (linelen > 72 &&
621 			    (linelen > 75 || c1 != '.' ||
622 			     (linelen > 73 && c2 == '.')))
623 			{
624 				if (linelen > 73 && c2 == '.')
625 					bp--;
626 				else
627 					c2 = '\n';
628 				*bp++ = '=';
629 				*bp = '\0';
630 				putline(buf, mci);
631 				linelen = fromstate = 0;
632 				bp = buf;
633 				if (c2 == '.')
634 				{
635 					*bp++ = '.';
636 					linelen++;
637 				}
638 			}
639 			if (bitnset(bitidx(c1), badchars))
640 			{
641 				*bp++ = '=';
642 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
643 				*bp++ = Base16Code[c1 & 0x0f];
644 				linelen += 3;
645 			}
646 			else if (c1 != ' ' && c1 != '\t')
647 			{
648 				if (linelen < 4 && c1 == "From"[linelen])
649 					fromstate++;
650 				*bp++ = c1;
651 				linelen++;
652 			}
653 			c2 = c1;
654 		}
655 
656 		/* output any saved character */
657 		if (c2 == ' ' || c2 == '\t')
658 		{
659 			*bp++ = '=';
660 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
661 			*bp++ = Base16Code[c2 & 0x0f];
662 			linelen += 3;
663 		}
664 
665 		if (linelen > 0 || boundaries[0] != NULL)
666 		{
667 			*bp = '\0';
668 			putline(buf, mci);
669 		}
670 
671 	}
672 	if (tTd(43, 3))
673 		sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
674 	return bt;
675 }
676 /*
677 **  MIME_GETCHAR -- get a character for MIME processing
678 **
679 **	Treats boundaries as SM_IO_EOF.
680 **
681 **	Parameters:
682 **		fp -- the input file.
683 **		boundaries -- the current MIME boundaries.
684 **		btp -- if the return value is SM_IO_EOF, *btp is set to
685 **			the type of the boundary.
686 **
687 **	Returns:
688 **		The next character in the input stream.
689 */
690 
691 static int
692 mime_getchar(fp, boundaries, btp)
693 	register SM_FILE_T *fp;
694 	char **boundaries;
695 	int *btp;
696 {
697 	int c;
698 	static unsigned char *bp = NULL;
699 	static int buflen = 0;
700 	static bool atbol = true;	/* at beginning of line */
701 	static int bt = MBT_SYNTAX;	/* boundary type of next SM_IO_EOF */
702 	static unsigned char buf[128];	/* need not be a full line */
703 	int start = 0;			/* indicates position of - in buffer */
704 
705 	if (buflen == 1 && *bp == '\n')
706 	{
707 		/* last \n in buffer may be part of next MIME boundary */
708 		c = *bp;
709 	}
710 	else if (buflen > 0)
711 	{
712 		buflen--;
713 		return *bp++;
714 	}
715 	else
716 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
717 	bp = buf;
718 	buflen = 0;
719 	if (c == '\n')
720 	{
721 		/* might be part of a MIME boundary */
722 		*bp++ = c;
723 		atbol = true;
724 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
725 		if (c == '\n')
726 		{
727 			(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
728 			return c;
729 		}
730 		start = 1;
731 	}
732 	if (c != SM_IO_EOF)
733 		*bp++ = c;
734 	else
735 		bt = MBT_FINAL;
736 	if (atbol && c == '-')
737 	{
738 		/* check for a message boundary */
739 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
740 		if (c != '-')
741 		{
742 			if (c != SM_IO_EOF)
743 				*bp++ = c;
744 			else
745 				bt = MBT_FINAL;
746 			buflen = bp - buf - 1;
747 			bp = buf;
748 			return *bp++;
749 		}
750 
751 		/* got "--", now check for rest of separator */
752 		*bp++ = '-';
753 		while (bp < &buf[sizeof buf - 2] &&
754 		       (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
755 		       c != '\n')
756 		{
757 			*bp++ = c;
758 		}
759 		*bp = '\0';	/* XXX simply cut off? */
760 		bt = mimeboundary((char *) &buf[start], boundaries);
761 		switch (bt)
762 		{
763 		  case MBT_FINAL:
764 		  case MBT_INTERMED:
765 			/* we have a message boundary */
766 			buflen = 0;
767 			*btp = bt;
768 			return SM_IO_EOF;
769 		}
770 
771 		if (bp < &buf[sizeof buf - 2] && c != SM_IO_EOF)
772 			*bp++ = c;
773 	}
774 
775 	atbol = c == '\n';
776 	buflen = bp - buf - 1;
777 	if (buflen < 0)
778 	{
779 		*btp = bt;
780 		return SM_IO_EOF;
781 	}
782 	bp = buf;
783 	return *bp++;
784 }
785 /*
786 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
787 **
788 **	Parameters:
789 **		fp -- the input file.
790 **		boundaries -- the current MIME boundaries.
791 **		btp -- if the return value is SM_IO_EOF, *btp is set to
792 **			the type of the boundary.
793 **
794 **	Returns:
795 **		The next character in the input stream.
796 */
797 
798 static int
799 mime_getchar_crlf(fp, boundaries, btp)
800 	register SM_FILE_T *fp;
801 	char **boundaries;
802 	int *btp;
803 {
804 	static bool sendlf = false;
805 	int c;
806 
807 	if (sendlf)
808 	{
809 		sendlf = false;
810 		return '\n';
811 	}
812 	c = mime_getchar(fp, boundaries, btp);
813 	if (c == '\n' && MapNLtoCRLF)
814 	{
815 		sendlf = true;
816 		return '\r';
817 	}
818 	return c;
819 }
820 /*
821 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
822 **
823 **	Parameters:
824 **		line -- the input line.
825 **		boundaries -- the set of currently pending boundaries.
826 **
827 **	Returns:
828 **		MBT_NOTSEP -- if this is not a separator line
829 **		MBT_INTERMED -- if this is an intermediate separator
830 **		MBT_FINAL -- if this is a final boundary
831 **		MBT_SYNTAX -- if this is a boundary for the wrong
832 **			enclosure -- i.e., a syntax error.
833 */
834 
835 static int
836 mimeboundary(line, boundaries)
837 	register char *line;
838 	char **boundaries;
839 {
840 	int type = MBT_NOTSEP;
841 	int i;
842 	int savec;
843 
844 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
845 		return MBT_NOTSEP;
846 	i = strlen(line);
847 	if (i > 0 && line[i - 1] == '\n')
848 		i--;
849 
850 	/* strip off trailing whitespace */
851 	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'))
852 		i--;
853 	savec = line[i];
854 	line[i] = '\0';
855 
856 	if (tTd(43, 5))
857 		sm_dprintf("mimeboundary: line=\"%s\"... ", line);
858 
859 	/* check for this as an intermediate boundary */
860 	if (isboundary(&line[2], boundaries) >= 0)
861 		type = MBT_INTERMED;
862 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
863 	{
864 		/* check for a final boundary */
865 		line[i - 2] = '\0';
866 		if (isboundary(&line[2], boundaries) >= 0)
867 			type = MBT_FINAL;
868 		line[i - 2] = '-';
869 	}
870 
871 	line[i] = savec;
872 	if (tTd(43, 5))
873 		sm_dprintf("%s\n", MimeBoundaryNames[type]);
874 	return type;
875 }
876 /*
877 **  DEFCHARSET -- return default character set for message
878 **
879 **	The first choice for character set is for the mailer
880 **	corresponding to the envelope sender.  If neither that
881 **	nor the global configuration file has a default character
882 **	set defined, return "unknown-8bit" as recommended by
883 **	RFC 1428 section 3.
884 **
885 **	Parameters:
886 **		e -- the envelope for this message.
887 **
888 **	Returns:
889 **		The default character set for that mailer.
890 */
891 
892 char *
893 defcharset(e)
894 	register ENVELOPE *e;
895 {
896 	if (e != NULL && e->e_from.q_mailer != NULL &&
897 	    e->e_from.q_mailer->m_defcharset != NULL)
898 		return e->e_from.q_mailer->m_defcharset;
899 	if (DefaultCharSet != NULL)
900 		return DefaultCharSet;
901 	return "unknown-8bit";
902 }
903 /*
904 **  ISBOUNDARY -- is a given string a currently valid boundary?
905 **
906 **	Parameters:
907 **		line -- the current input line.
908 **		boundaries -- the list of valid boundaries.
909 **
910 **	Returns:
911 **		The index number in boundaries if the line is found.
912 **		-1 -- otherwise.
913 **
914 */
915 
916 static int
917 isboundary(line, boundaries)
918 	char *line;
919 	char **boundaries;
920 {
921 	register int i;
922 
923 	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
924 	{
925 		if (strcmp(line, boundaries[i]) == 0)
926 			return i;
927 	}
928 	return -1;
929 }
930 #endif /* MIME8TO7 */
931 
932 #if MIME7TO8
933 static int	mime_fromqp __P((unsigned char *, unsigned char **, int));
934 
935 /*
936 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
937 **
938 **  This is a hack. Supports translating the two 7-bit body-encodings
939 **  (quoted-printable and base64) to 8-bit coded bodies.
940 **
941 **  There is not much point in supporting multipart here, as the UA
942 **  will be able to deal with encoded MIME bodies if it can parse MIME
943 **  multipart messages.
944 **
945 **  Note also that we won't be called unless it is a text/plain MIME
946 **  message, encoded base64 or QP and mailer flag '9' has been defined
947 **  on mailer.
948 **
949 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
950 **
951 **	Parameters:
952 **		mci -- mailer connection information.
953 **		header -- the header for this body part.
954 **		e -- envelope.
955 **
956 **	Returns:
957 **		none.
958 */
959 
960 static char index_64[128] =
961 {
962 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
963 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
964 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
965 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
966 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
967 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
968 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
969 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
970 };
971 
972 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
973 
974 void
975 mime7to8(mci, header, e)
976 	register MCI *mci;
977 	HDR *header;
978 	register ENVELOPE *e;
979 {
980 	int pxflags;
981 	register char *p;
982 	char *cte;
983 	char **pvp;
984 	unsigned char *fbufp;
985 	char buf[MAXLINE];
986 	unsigned char fbuf[MAXLINE + 1];
987 	char pvpbuf[MAXLINE];
988 	extern unsigned char MimeTokenTab[256];
989 
990 	p = hvalue("Content-Transfer-Encoding", header);
991 	if (p == NULL ||
992 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
993 			   MimeTokenTab, false)) == NULL ||
994 	    pvp[0] == NULL)
995 	{
996 		/* "can't happen" -- upper level should have caught this */
997 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
998 
999 		/* avoid bounce loops */
1000 		e->e_flags |= EF_DONT_MIME;
1001 
1002 		/* cheap failsafe algorithm -- should work on text/plain */
1003 		if (p != NULL)
1004 		{
1005 			(void) sm_snprintf(buf, sizeof buf,
1006 				"Content-Transfer-Encoding: %s", p);
1007 			putline(buf, mci);
1008 		}
1009 		putline("", mci);
1010 		mci->mci_flags &= ~MCIF_INHEADER;
1011 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf)
1012 			!= NULL)
1013 			putline(buf, mci);
1014 		return;
1015 	}
1016 	cataddr(pvp, NULL, buf, sizeof buf, '\0');
1017 	cte = sm_rpool_strdup_x(e->e_rpool, buf);
1018 
1019 	mci->mci_flags |= MCIF_INHEADER;
1020 	putline("Content-Transfer-Encoding: 8bit", mci);
1021 	(void) sm_snprintf(buf, sizeof buf,
1022 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1023 		cte, MyHostName, e->e_id);
1024 	putline(buf, mci);
1025 	putline("", mci);
1026 	mci->mci_flags &= ~MCIF_INHEADER;
1027 
1028 	/*
1029 	**  Translate body encoding to 8-bit.  Supports two types of
1030 	**  encodings; "base64" and "quoted-printable". Assume qp if
1031 	**  it is not base64.
1032 	*/
1033 
1034 	pxflags = PXLF_MAPFROM;
1035 	if (sm_strcasecmp(cte, "base64") == 0)
1036 	{
1037 		int c1, c2, c3, c4;
1038 
1039 		fbufp = fbuf;
1040 		while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1041 			SM_IO_EOF)
1042 		{
1043 			if (isascii(c1) && isspace(c1))
1044 				continue;
1045 
1046 			do
1047 			{
1048 				c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1049 			} while (isascii(c2) && isspace(c2));
1050 			if (c2 == SM_IO_EOF)
1051 				break;
1052 
1053 			do
1054 			{
1055 				c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1056 			} while (isascii(c3) && isspace(c3));
1057 			if (c3 == SM_IO_EOF)
1058 				break;
1059 
1060 			do
1061 			{
1062 				c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1063 			} while (isascii(c4) && isspace(c4));
1064 			if (c4 == SM_IO_EOF)
1065 				break;
1066 
1067 			if (c1 == '=' || c2 == '=')
1068 				continue;
1069 			c1 = CHAR64(c1);
1070 			c2 = CHAR64(c2);
1071 
1072 #if MIME7TO8_OLD
1073 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1074 			++fbufp;
1075 #else /* MIME7TO8_OLD */
1076 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1077 		{					\
1078 			++fbufp;			\
1079 			pxflags |= PXLF_NOADDEOL;	\
1080 		}
1081 #endif /* MIME7TO8_OLD */
1082 
1083 #define PUTLINE64	\
1084 	do		\
1085 	{		\
1086 		if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])	\
1087 		{							\
1088 			CHK_EOL;					\
1089 			putxline((char *) fbuf, fbufp - fbuf, mci, pxflags); \
1090 			pxflags &= ~PXLF_NOADDEOL;			\
1091 			fbufp = fbuf;					\
1092 		}	\
1093 	} while (0)
1094 
1095 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1096 			PUTLINE64;
1097 			if (c3 == '=')
1098 				continue;
1099 			c3 = CHAR64(c3);
1100 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1101 			PUTLINE64;
1102 			if (c4 == '=')
1103 				continue;
1104 			c4 = CHAR64(c4);
1105 			*fbufp = ((c3 & 0x03) << 6) | c4;
1106 			PUTLINE64;
1107 		}
1108 	}
1109 	else
1110 	{
1111 		int off;
1112 
1113 		/* quoted-printable */
1114 		pxflags |= PXLF_NOADDEOL;
1115 		fbufp = fbuf;
1116 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1117 				   sizeof buf) != NULL)
1118 		{
1119 			off = mime_fromqp((unsigned char *) buf, &fbufp,
1120 					  &fbuf[MAXLINE] - fbufp);
1121 again:
1122 			if (off < -1)
1123 				continue;
1124 
1125 			if (fbufp - fbuf > 0)
1126 				putxline((char *) fbuf, fbufp - fbuf - 1, mci,
1127 					 pxflags);
1128 			fbufp = fbuf;
1129 			if (off >= 0 && buf[off] != '\0')
1130 			{
1131 				off = mime_fromqp((unsigned char *) (buf + off),
1132 						  &fbufp,
1133 						  &fbuf[MAXLINE] - fbufp);
1134 				goto again;
1135 			}
1136 		}
1137 	}
1138 
1139 	/* force out partial last line */
1140 	if (fbufp > fbuf)
1141 	{
1142 		*fbufp = '\0';
1143 		putxline((char *) fbuf, fbufp - fbuf, mci, pxflags);
1144 	}
1145 
1146 	/*
1147 	**  The decoded text may end without an EOL.  Since this function
1148 	**  is only called for text/plain MIME messages, it is safe to
1149 	**  add an extra one at the end just in case.  This is a hack,
1150 	**  but so is auto-converting MIME in the first place.
1151 	*/
1152 
1153 	putline("", mci);
1154 
1155 	if (tTd(43, 3))
1156 		sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1157 }
1158 /*
1159 **  The following is based on Borenstein's "codes.c" module, with simplifying
1160 **  changes as we do not deal with multipart, and to do the translation in-core,
1161 **  with an attempt to prevent overrun of output buffers.
1162 **
1163 **  What is needed here are changes to defend this code better against
1164 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1165 */
1166 
1167 static char index_hex[128] =
1168 {
1169 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1170 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1171 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1172 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1173 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1174 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1175 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1176 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1177 };
1178 
1179 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1180 
1181 /*
1182 **  MIME_FROMQP -- decode quoted printable string
1183 **
1184 **	Parameters:
1185 **		infile -- input (encoded) string
1186 **		outfile -- output string
1187 **		maxlen -- size of output buffer
1188 **
1189 **	Returns:
1190 **		-2 if decoding failure
1191 **		-1 if infile completely decoded into outfile
1192 **		>= 0 is the position in infile decoding
1193 **			reached before maxlen was reached
1194 */
1195 
1196 static int
1197 mime_fromqp(infile, outfile, maxlen)
1198 	unsigned char *infile;
1199 	unsigned char **outfile;
1200 	int maxlen;		/* Max # of chars allowed in outfile */
1201 {
1202 	int c1, c2;
1203 	int nchar = 0;
1204 	unsigned char *b;
1205 
1206 	/* decrement by one for trailing '\0', at least one other char */
1207 	if (--maxlen < 1)
1208 		return 0;
1209 
1210 	b = infile;
1211 	while ((c1 = *infile++) != '\0' && nchar < maxlen)
1212 	{
1213 		if (c1 == '=')
1214 		{
1215 			if ((c1 = *infile++) == '\0')
1216 				break;
1217 
1218 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1219 			{
1220 				/* ignore it and the rest of the buffer */
1221 				return -2;
1222 			}
1223 			else
1224 			{
1225 				do
1226 				{
1227 					if ((c2 = *infile++) == '\0')
1228 					{
1229 						c2 = -1;
1230 						break;
1231 					}
1232 				} while ((c2 = HEXCHAR(c2)) == -1);
1233 
1234 				if (c2 == -1)
1235 					break;
1236 				nchar++;
1237 				*(*outfile)++ = c1 << 4 | c2;
1238 			}
1239 		}
1240 		else
1241 		{
1242 			nchar++;
1243 			*(*outfile)++ = c1;
1244 			if (c1 == '\n')
1245 				break;
1246 		}
1247 	}
1248 	*(*outfile)++ = '\0';
1249 	if (nchar >= maxlen)
1250 		return (infile - b - 1);
1251 	return -1;
1252 }
1253 #endif /* MIME7TO8 */
1254