xref: /freebsd/contrib/sendmail/src/mime.c (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 /*
2  * Copyright (c) 1998-2003, 2006, 2013 Proofpoint, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 SM_RCSID("@(#)$Id: mime.c,v 8.149 2013-11-22 20:51:56 ca Exp $")
18 #include <sm/sendmail.h>
19 
20 /*
21 **  MIME support.
22 **
23 **	I am indebted to John Beck of Hewlett-Packard, who contributed
24 **	his code to me for inclusion.  As it turns out, I did not use
25 **	his code since he used a "minimum change" approach that used
26 **	several temp files, and I wanted a "minimum impact" approach
27 **	that would avoid copying.  However, looking over his code
28 **	helped me cement my understanding of the problem.
29 **
30 **	I also looked at, but did not directly use, Nathaniel
31 **	Borenstein's "code.c" module.  Again, it functioned as
32 **	a file-to-file translator, which did not fit within my
33 **	design bounds, but it was a useful base for understanding
34 **	the problem.
35 */
36 
37 /* use "old" mime 7 to 8 algorithm by default */
38 #ifndef MIME7TO8_OLD
39 # define MIME7TO8_OLD	1
40 #endif
41 
42 #if MIME8TO7
43 static int	isboundary __P((char *, char **));
44 static int	mimeboundary __P((char *, char **));
45 static int	mime_getchar __P((SM_FILE_T *, char **, int *));
46 static int	mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
47 
48 /* character set for hex and base64 encoding */
49 static char	Base16Code[] =	"0123456789ABCDEF";
50 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
51 
52 /* types of MIME boundaries */
53 # define MBT_SYNTAX	0	/* syntax error */
54 # define MBT_NOTSEP	1	/* not a boundary */
55 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
56 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
57 
58 static char	*MimeBoundaryNames[] =
59 {
60 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
61 };
62 
63 static bool	MapNLtoCRLF;
64 
65 /*
66 **  MIME8TO7 -- output 8 bit body in 7 bit format
67 **
68 **	The header has already been output -- this has to do the
69 **	8 to 7 bit conversion.  It would be easy if we didn't have
70 **	to deal with nested formats (multipart/xxx and message/rfc822).
71 **
72 **	We won't be called if we don't have to do a conversion, and
73 **	appropriate MIME-Version: and Content-Type: fields have been
74 **	output.  Any Content-Transfer-Encoding: field has not been
75 **	output, and we can add it here.
76 **
77 **	Parameters:
78 **		mci -- mailer connection information.
79 **		header -- the header for this body part.
80 **		e -- envelope.
81 **		boundaries -- the currently pending message boundaries.
82 **			NULL if we are processing the outer portion.
83 **		flags -- to tweak processing.
84 **		level -- recursion level.
85 **
86 **	Returns:
87 **		An indicator of what terminated the message part:
88 **		  MBT_FINAL -- the final boundary
89 **		  MBT_INTERMED -- an intermediate boundary
90 **		  MBT_NOTSEP -- an end of file
91 **		  SM_IO_EOF -- I/O error occurred
92 */
93 
94 struct args
95 {
96 	char	*a_field;	/* name of field */
97 	char	*a_value;	/* value of that field */
98 };
99 
100 int
101 mime8to7(mci, header, e, boundaries, flags, level)
102 	register MCI *mci;
103 	HDR *header;
104 	register ENVELOPE *e;
105 	char **boundaries;
106 	int flags;
107 	int level;
108 {
109 	register char *p;
110 	int linelen;
111 	int blen;
112 	int bt;
113 	off_t offset;
114 	size_t sectionsize, sectionhighbits;
115 	int i;
116 	char *type;
117 	char *subtype;
118 	char *cte;
119 	char **pvp;
120 	int argc = 0;
121 	char *bp;
122 	bool use_qp = false;
123 	struct args argv[MAXMIMEARGS];
124 	char bbuf[128];
125 	char buf[MAXLINE];
126 	char pvpbuf[MAXLINE];
127 	extern unsigned char MimeTokenTab[256];
128 
129 	if (level > MAXMIMENESTING)
130 	{
131 		if (!bitset(EF_TOODEEP, e->e_flags))
132 		{
133 			if (tTd(43, 4))
134 				sm_dprintf("mime8to7: too deep, level=%d\n",
135 					   level);
136 			usrerr("mime8to7: recursion level %d exceeded",
137 				level);
138 			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
139 		}
140 	}
141 	if (tTd(43, 1))
142 	{
143 		sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
144 		if (boundaries[0] == NULL)
145 			sm_dprintf(" <none>");
146 		else
147 		{
148 			for (i = 0; boundaries[i] != NULL; i++)
149 				sm_dprintf(" %s", boundaries[i]);
150 		}
151 		sm_dprintf("\n");
152 	}
153 	MapNLtoCRLF = true;
154 	p = hvalue("Content-Transfer-Encoding", header);
155 	if (p == NULL ||
156 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
157 			   MimeTokenTab, false)) == NULL ||
158 	    pvp[0] == NULL)
159 	{
160 		cte = NULL;
161 	}
162 	else
163 	{
164 		cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
165 		cte = sm_rpool_strdup_x(e->e_rpool, buf);
166 	}
167 
168 	type = subtype = NULL;
169 	p = hvalue("Content-Type", header);
170 	if (p == NULL)
171 	{
172 		if (bitset(M87F_DIGEST, flags))
173 			p = "message/rfc822";
174 		else
175 			p = "text/plain";
176 	}
177 	if (p != NULL &&
178 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
179 			   MimeTokenTab, false)) != NULL &&
180 	    pvp[0] != NULL)
181 	{
182 		if (tTd(43, 40))
183 		{
184 			for (i = 0; pvp[i] != NULL; i++)
185 				sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
186 		}
187 		type = *pvp++;
188 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
189 		    *++pvp != NULL)
190 		{
191 			subtype = *pvp++;
192 		}
193 
194 		/* break out parameters */
195 		while (*pvp != NULL && argc < MAXMIMEARGS)
196 		{
197 			/* skip to semicolon separator */
198 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
199 				pvp++;
200 			if (*pvp++ == NULL || *pvp == NULL)
201 				break;
202 
203 			/* complain about empty values */
204 			if (strcmp(*pvp, ";") == 0)
205 			{
206 				usrerr("mime8to7: Empty parameter in Content-Type header");
207 
208 				/* avoid bounce loops */
209 				e->e_flags |= EF_DONT_MIME;
210 				continue;
211 			}
212 
213 			/* extract field name */
214 			argv[argc].a_field = *pvp++;
215 
216 			/* see if there is a value */
217 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
218 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
219 			{
220 				argv[argc].a_value = *pvp;
221 				argc++;
222 			}
223 		}
224 	}
225 
226 	/* check for disaster cases */
227 	if (type == NULL)
228 		type = "-none-";
229 	if (subtype == NULL)
230 		subtype = "-none-";
231 
232 	/* don't propagate some flags more than one level into the message */
233 	flags &= ~M87F_DIGEST;
234 
235 	/*
236 	**  Check for cases that can not be encoded.
237 	**
238 	**	For example, you can't encode certain kinds of types
239 	**	or already-encoded messages.  If we find this case,
240 	**	just copy it through.
241 	*/
242 
243 	(void) sm_snprintf(buf, sizeof(buf), "%.100s/%.100s", type, subtype);
244 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
245 		flags |= M87F_NO8BIT;
246 
247 # ifdef USE_B_CLASS
248 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
249 		MapNLtoCRLF = false;
250 # endif
251 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
252 		use_qp = true;
253 
254 	/*
255 	**  Multipart requires special processing.
256 	**
257 	**	Do a recursive descent into the message.
258 	*/
259 
260 	if (SM_STRCASEEQ(type, "multipart") &&
261 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)) &&
262 	    !bitset(EF_TOODEEP, e->e_flags)
263 	   )
264 	{
265 
266 		if (SM_STRCASEEQ(subtype, "digest"))
267 			flags |= M87F_DIGEST;
268 
269 		for (i = 0; i < argc; i++)
270 		{
271 			if (SM_STRCASEEQ(argv[i].a_field, "boundary"))
272 				break;
273 		}
274 		if (i >= argc || argv[i].a_value == NULL)
275 		{
276 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
277 				i >= argc ? "missing" : "bogus", p);
278 			p = "---";
279 
280 			/* avoid bounce loops */
281 			e->e_flags |= EF_DONT_MIME;
282 		}
283 		else
284 		{
285 			p = argv[i].a_value;
286 			unfoldstripquotes(p);
287 		}
288 		if (sm_strlcpy(bbuf, p, sizeof(bbuf)) >= sizeof(bbuf))
289 		{
290 			usrerr("mime8to7: multipart boundary \"%s\" too long",
291 				p);
292 
293 			/* avoid bounce loops */
294 			e->e_flags |= EF_DONT_MIME;
295 		}
296 
297 		if (tTd(43, 1))
298 			sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
299 				bbuf);
300 		for (i = 0; i < MAXMIMENESTING; i++)
301 		{
302 			if (boundaries[i] == NULL)
303 				break;
304 		}
305 		if (i >= MAXMIMENESTING)
306 		{
307 			if (tTd(43, 4))
308 				sm_dprintf("mime8to7: too deep, i=%d\n", i);
309 			if (!bitset(EF_TOODEEP, e->e_flags))
310 				usrerr("mime8to7: multipart nesting boundary too deep");
311 
312 			/* avoid bounce loops */
313 			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
314 		}
315 		else
316 		{
317 			boundaries[i] = bbuf;
318 			boundaries[i + 1] = NULL;
319 		}
320 		mci->mci_flags |= MCIF_INMIME;
321 
322 		/* skip the early "comment" prologue */
323 		if (!putline("", mci))
324 			goto writeerr;
325 		mci->mci_flags &= ~MCIF_INHEADER;
326 		bt = MBT_FINAL;
327 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
328 					sizeof(buf))) >= 0)
329 		{
330 			bt = mimeboundary(buf, boundaries);
331 			if (bt != MBT_NOTSEP)
332 				break;
333 			if (!putxline(buf, blen, mci,
334 					PXLF_MAPFROM|PXLF_STRIP8BIT))
335 				goto writeerr;
336 			if (tTd(43, 99))
337 				sm_dprintf("  ...%s", buf);
338 		}
339 		if (sm_io_eof(e->e_dfp))
340 			bt = MBT_FINAL;
341 		while (bt != MBT_FINAL)
342 		{
343 			auto HDR *hdr = NULL;
344 
345 			(void) sm_strlcpyn(buf, sizeof(buf), 2, "--", bbuf);
346 			if (!putline(buf, mci))
347 				goto writeerr;
348 			if (tTd(43, 35))
349 				sm_dprintf("  ...%s\n", buf);
350 			collect(e->e_dfp, SMTPMODE_NO, &hdr, e, false);
351 			if (tTd(43, 101))
352 				putline("+++after collect", mci);
353 			if (!putheader(mci, hdr, e, flags))
354 				goto writeerr;
355 			if (tTd(43, 101))
356 				putline("+++after putheader", mci);
357 			bt = mime8to7(mci, hdr, e, boundaries, flags,
358 				      level + 1);
359 			if (bt == SM_IO_EOF)
360 				goto writeerr;
361 		}
362 		(void) sm_strlcpyn(buf, sizeof(buf), 3, "--", bbuf, "--");
363 		if (!putline(buf, mci))
364 			goto writeerr;
365 		if (tTd(43, 35))
366 			sm_dprintf("  ...%s\n", buf);
367 		boundaries[i] = NULL;
368 		mci->mci_flags &= ~MCIF_INMIME;
369 
370 		/* skip the late "comment" epilogue */
371 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
372 					sizeof(buf))) >= 0)
373 		{
374 			bt = mimeboundary(buf, boundaries);
375 			if (bt != MBT_NOTSEP)
376 				break;
377 			if (!putxline(buf, blen, mci,
378 					PXLF_MAPFROM|PXLF_STRIP8BIT))
379 				goto writeerr;
380 			if (tTd(43, 99))
381 				sm_dprintf("  ...%s", buf);
382 		}
383 		if (sm_io_eof(e->e_dfp))
384 			bt = MBT_FINAL;
385 		if (tTd(43, 3))
386 			sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
387 				MimeBoundaryNames[bt]);
388 		return bt;
389 	}
390 
391 	/*
392 	**  Message/xxx types -- recurse exactly once.
393 	**
394 	**	Class 's' is predefined to have "rfc822" only.
395 	*/
396 
397 	if (SM_STRCASEEQ(type, "message"))
398 	{
399 		if (!wordinclass(subtype, 's') ||
400 		    bitset(EF_TOODEEP, e->e_flags))
401 		{
402 			flags |= M87F_NO8BIT;
403 		}
404 		else
405 		{
406 			auto HDR *hdr = NULL;
407 
408 			if (!putline("", mci))
409 				goto writeerr;
410 
411 			mci->mci_flags |= MCIF_INMIME;
412 			collect(e->e_dfp, SMTPMODE_NO, &hdr, e, false);
413 			if (tTd(43, 101))
414 				putline("+++after collect", mci);
415 			if (!putheader(mci, hdr, e, flags))
416 				goto writeerr;
417 			if (tTd(43, 101))
418 				putline("+++after putheader", mci);
419 			if (hvalue("MIME-Version", hdr) == NULL &&
420 			    !bitset(M87F_NO8TO7, flags) &&
421 			    !putline("MIME-Version: 1.0", mci))
422 				goto writeerr;
423 			bt = mime8to7(mci, hdr, e, boundaries, flags,
424 				      level + 1);
425 			mci->mci_flags &= ~MCIF_INMIME;
426 			return bt;
427 		}
428 	}
429 
430 	/*
431 	**  Non-compound body type
432 	**
433 	**	Compute the ratio of seven to eight bit characters;
434 	**	use that as a heuristic to decide how to do the
435 	**	encoding.
436 	*/
437 
438 	sectionsize = sectionhighbits = 0;
439 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
440 	{
441 		/* remember where we were */
442 		offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
443 		if (offset == -1)
444 			syserr("mime8to7: cannot sm_io_tell on %cf%s",
445 			       DATAFL_LETTER, e->e_id);
446 
447 		/* do a scan of this body type to count character types */
448 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
449 					sizeof(buf))) >= 0)
450 		{
451 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
452 				break;
453 			for (i = 0; i < blen; i++)
454 			{
455 				/* count bytes with the high bit set */
456 				sectionsize++;
457 				if (bitset(0200, buf[i]))
458 					sectionhighbits++;
459 			}
460 
461 			/*
462 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
463 			**  assume base64.  This heuristic avoids double-reading
464 			**  large graphics or video files.
465 			*/
466 
467 			if (sectionsize >= 4096 &&
468 			    sectionhighbits > sectionsize / 4)
469 				break;
470 		}
471 
472 		/* return to the original offset for processing */
473 		/* XXX use relative seeks to handle >31 bit file sizes? */
474 		if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
475 			syserr("mime8to7: cannot sm_io_fseek on %cf%s",
476 			       DATAFL_LETTER, e->e_id);
477 		else
478 			sm_io_clearerr(e->e_dfp);
479 	}
480 
481 	/*
482 	**  Heuristically determine encoding method.
483 	**	If more than 1/8 of the total characters have the
484 	**	eighth bit set, use base64; else use quoted-printable.
485 	**	However, only encode binary encoded data as base64,
486 	**	since otherwise the LF=>CRLF mapping will be a problem.
487 	*/
488 
489 	if (tTd(43, 8))
490 	{
491 		sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
492 			(long) sectionhighbits, (long) sectionsize,
493 			cte == NULL ? "[none]" : cte,
494 			type == NULL ? "[none]" : type,
495 			subtype == NULL ? "[none]" : subtype);
496 	}
497 	if (cte != NULL && SM_STRCASEEQ(cte, "binary"))
498 		sectionsize = sectionhighbits;
499 	linelen = 0;
500 	bp = buf;
501 	if (sectionhighbits == 0)
502 	{
503 		/* no encoding necessary */
504 		if (cte != NULL &&
505 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
506 			   mci->mci_flags) &&
507 		    !bitset(M87F_NO8TO7, flags))
508 		{
509 			/*
510 			**  Skip _unless_ in MIME mode and potentially
511 			**  converting from 8 bit to 7 bit MIME.  See
512 			**  putheader() for the counterpart where the
513 			**  CTE header is skipped in the opposite
514 			**  situation.
515 			*/
516 
517 			(void) sm_snprintf(buf, sizeof(buf),
518 				"Content-Transfer-Encoding: %.200s", cte);
519 			if (!putline(buf, mci))
520 				goto writeerr;
521 			if (tTd(43, 36))
522 				sm_dprintf("  ...%s\n", buf);
523 		}
524 		if (!putline("", mci))
525 			goto writeerr;
526 		mci->mci_flags &= ~MCIF_INHEADER;
527 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
528 					sizeof(buf))) >= 0)
529 		{
530 			if (!bitset(MCIF_INLONGLINE, mci->mci_flags))
531 			{
532 				bt = mimeboundary(buf, boundaries);
533 				if (bt != MBT_NOTSEP)
534 					break;
535 			}
536 			if (!putxline(buf, blen, mci,
537 				      PXLF_MAPFROM|PXLF_NOADDEOL))
538 				goto writeerr;
539 		}
540 		if (sm_io_eof(e->e_dfp))
541 			bt = MBT_FINAL;
542 	}
543 	else if (!MapNLtoCRLF ||
544 		 (sectionsize / 8 < sectionhighbits && !use_qp))
545 	{
546 		/* use base64 encoding */
547 		int c1, c2;
548 
549 		if (tTd(43, 36))
550 			sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
551 		if (!putline("Content-Transfer-Encoding: base64", mci))
552 			goto writeerr;
553 		(void) sm_snprintf(buf, sizeof(buf),
554 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
555 			MyHostName, e->e_id);
556 		if (!putline(buf, mci) || !putline("", mci))
557 			goto writeerr;
558 		mci->mci_flags &= ~MCIF_INHEADER;
559 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
560 			SM_IO_EOF)
561 		{
562 			if (linelen > 71)
563 			{
564 				*bp = '\0';
565 				if (!putline(buf, mci))
566 					goto writeerr;
567 				linelen = 0;
568 				bp = buf;
569 			}
570 			linelen += 4;
571 			*bp++ = Base64Code[(c1 >> 2)];
572 			c1 = (c1 & 0x03) << 4;
573 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
574 			if (c2 == SM_IO_EOF)
575 			{
576 				*bp++ = Base64Code[c1];
577 				*bp++ = '=';
578 				*bp++ = '=';
579 				break;
580 			}
581 			c1 |= (c2 >> 4) & 0x0f;
582 			*bp++ = Base64Code[c1];
583 			c1 = (c2 & 0x0f) << 2;
584 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
585 			if (c2 == SM_IO_EOF)
586 			{
587 				*bp++ = Base64Code[c1];
588 				*bp++ = '=';
589 				break;
590 			}
591 			c1 |= (c2 >> 6) & 0x03;
592 			*bp++ = Base64Code[c1];
593 			*bp++ = Base64Code[c2 & 0x3f];
594 		}
595 		*bp = '\0';
596 		if (!putline(buf, mci))
597 			goto writeerr;
598 	}
599 	else
600 	{
601 		/* use quoted-printable encoding */
602 		int c1, c2;
603 		int fromstate;
604 		BITMAP256 badchars;
605 
606 		/* set up map of characters that must be mapped */
607 		clrbitmap(badchars);
608 		for (c1 = 0x00; c1 < 0x20; c1++)
609 			setbitn(c1, badchars);
610 		clrbitn('\t', badchars);
611 		for (c1 = 0x7f; c1 < 0x100; c1++)
612 			setbitn(c1, badchars);
613 		setbitn('=', badchars);
614 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
615 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
616 				setbitn(*p, badchars);
617 
618 		if (tTd(43, 36))
619 			sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
620 		if (!putline("Content-Transfer-Encoding: quoted-printable",
621 				mci))
622 			goto writeerr;
623 		(void) sm_snprintf(buf, sizeof(buf),
624 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
625 			MyHostName, e->e_id);
626 		if (!putline(buf, mci) || !putline("", mci))
627 			goto writeerr;
628 		mci->mci_flags &= ~MCIF_INHEADER;
629 		fromstate = 0;
630 		c2 = '\n';
631 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
632 			SM_IO_EOF)
633 		{
634 			if (c1 == '\n')
635 			{
636 				if (c2 == ' ' || c2 == '\t')
637 				{
638 					*bp++ = '=';
639 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
640 					*bp++ = Base16Code[c2 & 0x0f];
641 				}
642 				if (buf[0] == '.' && bp == &buf[1])
643 				{
644 					buf[0] = '=';
645 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
646 					*bp++ = Base16Code['.' & 0x0f];
647 				}
648 				*bp = '\0';
649 				if (!putline(buf, mci))
650 					goto writeerr;
651 				linelen = fromstate = 0;
652 				bp = buf;
653 				c2 = c1;
654 				continue;
655 			}
656 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
657 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
658 			{
659 				*bp++ = '=';
660 				*bp++ = '2';
661 				*bp++ = '0';
662 				linelen += 3;
663 			}
664 			else if (c2 == ' ' || c2 == '\t')
665 			{
666 				*bp++ = c2;
667 				linelen++;
668 			}
669 			if (linelen > 72 &&
670 			    (linelen > 75 || c1 != '.' ||
671 			     (linelen > 73 && c2 == '.')))
672 			{
673 				if (linelen > 73 && c2 == '.')
674 					bp--;
675 				else
676 					c2 = '\n';
677 				*bp++ = '=';
678 				*bp = '\0';
679 				if (!putline(buf, mci))
680 					goto writeerr;
681 				linelen = fromstate = 0;
682 				bp = buf;
683 				if (c2 == '.')
684 				{
685 					*bp++ = '.';
686 					linelen++;
687 				}
688 			}
689 			if (bitnset(bitidx(c1), badchars))
690 			{
691 				*bp++ = '=';
692 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
693 				*bp++ = Base16Code[c1 & 0x0f];
694 				linelen += 3;
695 			}
696 			else if (c1 != ' ' && c1 != '\t')
697 			{
698 				if (linelen < 4 && c1 == "From"[linelen])
699 					fromstate++;
700 				*bp++ = c1;
701 				linelen++;
702 			}
703 			c2 = c1;
704 		}
705 
706 		/* output any saved character */
707 		if (c2 == ' ' || c2 == '\t')
708 		{
709 			*bp++ = '=';
710 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
711 			*bp++ = Base16Code[c2 & 0x0f];
712 			linelen += 3;
713 		}
714 
715 		if (linelen > 0 || boundaries[0] != NULL)
716 		{
717 			*bp = '\0';
718 			if (!putline(buf, mci))
719 				goto writeerr;
720 		}
721 
722 	}
723 	if (tTd(43, 3))
724 		sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
725 	return bt;
726 
727   writeerr:
728 	return SM_IO_EOF;
729 }
730 /*
731 **  MIME_GETCHAR -- get a character for MIME processing
732 **
733 **	Treats boundaries as SM_IO_EOF.
734 **
735 **	Parameters:
736 **		fp -- the input file.
737 **		boundaries -- the current MIME boundaries.
738 **		btp -- if the return value is SM_IO_EOF, *btp is set to
739 **			the type of the boundary.
740 **
741 **	Returns:
742 **		The next character in the input stream.
743 */
744 
745 static int
746 mime_getchar(fp, boundaries, btp)
747 	register SM_FILE_T *fp;
748 	char **boundaries;
749 	int *btp;
750 {
751 	int c;
752 	static unsigned char *bp = NULL;
753 	static int buflen = 0;
754 	static bool atbol = true;	/* at beginning of line */
755 	static int bt = MBT_SYNTAX;	/* boundary type of next SM_IO_EOF */
756 	static unsigned char buf[128];	/* need not be a full line */
757 	int start = 0;			/* indicates position of - in buffer */
758 
759 	if (buflen == 1 && *bp == '\n')
760 	{
761 		/* last \n in buffer may be part of next MIME boundary */
762 		c = *bp;
763 	}
764 	else if (buflen > 0)
765 	{
766 		buflen--;
767 		return *bp++;
768 	}
769 	else
770 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
771 	bp = buf;
772 	buflen = 0;
773 	if (c == '\n')
774 	{
775 		/* might be part of a MIME boundary */
776 		*bp++ = c;
777 		atbol = true;
778 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
779 		if (c == '\n')
780 		{
781 			(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
782 			return c;
783 		}
784 		start = 1;
785 	}
786 	if (c != SM_IO_EOF)
787 		*bp++ = c;
788 	else
789 		bt = MBT_FINAL;
790 	if (atbol && c == '-')
791 	{
792 		/* check for a message boundary */
793 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
794 		if (c != '-')
795 		{
796 			if (c != SM_IO_EOF)
797 				*bp++ = c;
798 			else
799 				bt = MBT_FINAL;
800 			buflen = bp - buf - 1;
801 			bp = buf;
802 			return *bp++;
803 		}
804 
805 		/* got "--", now check for rest of separator */
806 		*bp++ = '-';
807 		while (bp < &buf[sizeof(buf) - 2] &&
808 		       (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
809 		       c != '\n')
810 		{
811 			*bp++ = c;
812 		}
813 		*bp = '\0';	/* XXX simply cut off? */
814 		bt = mimeboundary((char *) &buf[start], boundaries);
815 		switch (bt)
816 		{
817 		  case MBT_FINAL:
818 		  case MBT_INTERMED:
819 			/* we have a message boundary */
820 			buflen = 0;
821 			*btp = bt;
822 			return SM_IO_EOF;
823 		}
824 
825 		if (bp < &buf[sizeof(buf) - 2] && c != SM_IO_EOF)
826 			*bp++ = c;
827 	}
828 
829 	atbol = c == '\n';
830 	buflen = bp - buf - 1;
831 	if (buflen < 0)
832 	{
833 		*btp = bt;
834 		return SM_IO_EOF;
835 	}
836 	bp = buf;
837 	return *bp++;
838 }
839 /*
840 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate LF => CRLF
841 **
842 **	Parameters:
843 **		fp -- the input file.
844 **		boundaries -- the current MIME boundaries.
845 **		btp -- if the return value is SM_IO_EOF, *btp is set to
846 **			the type of the boundary.
847 **
848 **	Returns:
849 **		The next character in the input stream.
850 */
851 
852 static int
853 mime_getchar_crlf(fp, boundaries, btp)
854 	register SM_FILE_T *fp;
855 	char **boundaries;
856 	int *btp;
857 {
858 	static bool sendlf = false;
859 	int c;
860 
861 	if (sendlf)
862 	{
863 		sendlf = false;
864 		return '\n';
865 	}
866 	c = mime_getchar(fp, boundaries, btp);
867 	if (c == '\n' && MapNLtoCRLF)
868 	{
869 		sendlf = true;
870 		return '\r';
871 	}
872 	return c;
873 }
874 /*
875 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
876 **
877 **	Parameters:
878 **		line -- the input line.
879 **		boundaries -- the set of currently pending boundaries.
880 **
881 **	Returns:
882 **		MBT_NOTSEP -- if this is not a separator line
883 **		MBT_INTERMED -- if this is an intermediate separator
884 **		MBT_FINAL -- if this is a final boundary
885 **		MBT_SYNTAX -- if this is a boundary for the wrong
886 **			enclosure -- i.e., a syntax error.
887 */
888 
889 static int
890 mimeboundary(line, boundaries)
891 	register char *line;
892 	char **boundaries;
893 {
894 	int type = MBT_NOTSEP;
895 	int i;
896 	int savec;
897 
898 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
899 		return MBT_NOTSEP;
900 	i = strlen(line);
901 	if (i > 0 && line[i - 1] == '\n')
902 		i--;
903 
904 	/* strip off trailing whitespace */
905 	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'
906 # if _FFR_MIME_CR_OK
907 		|| line[i - 1] == '\r'
908 # endif
909 	       ))
910 		i--;
911 	savec = line[i];
912 	line[i] = '\0';
913 
914 	if (tTd(43, 5))
915 		sm_dprintf("mimeboundary: line=\"%s\"... ", line);
916 
917 	/* check for this as an intermediate boundary */
918 	if (isboundary(&line[2], boundaries) >= 0)
919 		type = MBT_INTERMED;
920 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
921 	{
922 		/* check for a final boundary */
923 		line[i - 2] = '\0';
924 		if (isboundary(&line[2], boundaries) >= 0)
925 			type = MBT_FINAL;
926 		line[i - 2] = '-';
927 	}
928 
929 	line[i] = savec;
930 	if (tTd(43, 5))
931 		sm_dprintf("%s\n", MimeBoundaryNames[type]);
932 	return type;
933 }
934 /*
935 **  DEFCHARSET -- return default character set for message
936 **
937 **	The first choice for character set is for the mailer
938 **	corresponding to the envelope sender.  If neither that
939 **	nor the global configuration file has a default character
940 **	set defined, return "unknown-8bit" as recommended by
941 **	RFC 1428 section 3.
942 **
943 **	Parameters:
944 **		e -- the envelope for this message.
945 **
946 **	Returns:
947 **		The default character set for that mailer.
948 */
949 
950 char *
951 defcharset(e)
952 	register ENVELOPE *e;
953 {
954 	if (e != NULL && e->e_from.q_mailer != NULL &&
955 	    e->e_from.q_mailer->m_defcharset != NULL)
956 		return e->e_from.q_mailer->m_defcharset;
957 	if (DefaultCharSet != NULL)
958 		return DefaultCharSet;
959 	return "unknown-8bit";
960 }
961 /*
962 **  ISBOUNDARY -- is a given string a currently valid boundary?
963 **
964 **	Parameters:
965 **		line -- the current input line.
966 **		boundaries -- the list of valid boundaries.
967 **
968 **	Returns:
969 **		The index number in boundaries if the line is found.
970 **		-1 -- otherwise.
971 **
972 */
973 
974 static int
975 isboundary(line, boundaries)
976 	char *line;
977 	char **boundaries;
978 {
979 	register int i;
980 
981 	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
982 	{
983 		if (strcmp(line, boundaries[i]) == 0)
984 			return i;
985 	}
986 	return -1;
987 }
988 #endif /* MIME8TO7 */
989 
990 #if MIME7TO8
991 static int	mime_fromqp __P((unsigned char *, unsigned char **, int));
992 
993 /*
994 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
995 **
996 **  This is a hack. Supports translating the two 7-bit body-encodings
997 **  (quoted-printable and base64) to 8-bit coded bodies.
998 **
999 **  There is not much point in supporting multipart here, as the UA
1000 **  will be able to deal with encoded MIME bodies if it can parse MIME
1001 **  multipart messages.
1002 **
1003 **  Note also that we won't be called unless it is a text/plain MIME
1004 **  message, encoded base64 or QP and mailer flag '9' has been defined
1005 **  on mailer.
1006 **
1007 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
1008 **
1009 **	Parameters:
1010 **		mci -- mailer connection information.
1011 **		header -- the header for this body part.
1012 **		e -- envelope.
1013 **
1014 **	Returns:
1015 **		true iff body was written successfully
1016 */
1017 
1018 static char index_64[128] =
1019 {
1020 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1021 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1022 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
1023 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
1024 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
1025 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
1026 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
1027 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
1028 };
1029 
1030 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
1031 
1032 bool
1033 mime7to8(mci, header, e)
1034 	register MCI *mci;
1035 	HDR *header;
1036 	register ENVELOPE *e;
1037 {
1038 	int pxflags, blen;
1039 	register char *p;
1040 	char *cte;
1041 	char **pvp;
1042 	unsigned char *fbufp;
1043 	char buf[MAXLINE];
1044 	unsigned char fbuf[MAXLINE + 1];
1045 	char pvpbuf[MAXLINE];
1046 	extern unsigned char MimeTokenTab[256];
1047 
1048 	p = hvalue("Content-Transfer-Encoding", header);
1049 	if (p == NULL ||
1050 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
1051 			   MimeTokenTab, false)) == NULL ||
1052 	    pvp[0] == NULL)
1053 	{
1054 		/* "can't happen" -- upper level should have caught this */
1055 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
1056 
1057 		/* avoid bounce loops */
1058 		e->e_flags |= EF_DONT_MIME;
1059 
1060 		/* cheap failsafe algorithm -- should work on text/plain */
1061 		if (p != NULL)
1062 		{
1063 			(void) sm_snprintf(buf, sizeof(buf),
1064 				"Content-Transfer-Encoding: %s", p);
1065 			if (!putline(buf, mci))
1066 				goto writeerr;
1067 		}
1068 		if (!putline("", mci))
1069 			goto writeerr;
1070 		mci->mci_flags &= ~MCIF_INHEADER;
1071 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1072 					sizeof(buf))) >= 0)
1073 		{
1074 			if (!putxline(buf, blen, mci, PXLF_MAPFROM))
1075 				goto writeerr;
1076 		}
1077 		return true;
1078 	}
1079 	cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
1080 	cte = sm_rpool_strdup_x(e->e_rpool, buf);
1081 
1082 	mci->mci_flags |= MCIF_INHEADER;
1083 	if (!putline("Content-Transfer-Encoding: 8bit", mci))
1084 		goto writeerr;
1085 	(void) sm_snprintf(buf, sizeof(buf),
1086 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1087 		cte, MyHostName, e->e_id);
1088 	if (!putline(buf, mci) || !putline("", mci))
1089 		goto writeerr;
1090 	mci->mci_flags &= ~MCIF_INHEADER;
1091 
1092 	/*
1093 	**  Translate body encoding to 8-bit.  Supports two types of
1094 	**  encodings; "base64" and "quoted-printable". Assume qp if
1095 	**  it is not base64.
1096 	*/
1097 
1098 	pxflags = PXLF_MAPFROM;
1099 	if (SM_STRCASEEQ(cte, "base64"))
1100 	{
1101 		int c1, c2, c3, c4;
1102 
1103 		fbufp = fbuf;
1104 		while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1105 			SM_IO_EOF)
1106 		{
1107 			if (SM_ISSPACE(c1))
1108 				continue;
1109 
1110 			do
1111 			{
1112 				c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1113 			} while (SM_ISSPACE(c2));
1114 			if (c2 == SM_IO_EOF)
1115 				break;
1116 
1117 			do
1118 			{
1119 				c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1120 			} while (SM_ISSPACE(c3));
1121 			if (c3 == SM_IO_EOF)
1122 				break;
1123 
1124 			do
1125 			{
1126 				c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1127 			} while (SM_ISSPACE(c4));
1128 			if (c4 == SM_IO_EOF)
1129 				break;
1130 
1131 			if (c1 == '=' || c2 == '=')
1132 				continue;
1133 			c1 = CHAR64(c1);
1134 			c2 = CHAR64(c2);
1135 
1136 # if MIME7TO8_OLD
1137 #  define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1138 			++fbufp;
1139 # else /* MIME7TO8_OLD */
1140 #  define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1141 		{					\
1142 			++fbufp;			\
1143 			pxflags |= PXLF_NOADDEOL;	\
1144 		}
1145 # endif /* MIME7TO8_OLD */
1146 
1147 #define PUTLINE64	\
1148 	do		\
1149 	{		\
1150 		if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])	\
1151 		{							\
1152 			CHK_EOL;					\
1153 			if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) \
1154 				goto writeerr;				\
1155 			pxflags &= ~PXLF_NOADDEOL;			\
1156 			fbufp = fbuf;					\
1157 		}	\
1158 	} while (0)
1159 
1160 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1161 			PUTLINE64;
1162 			if (c3 == '=')
1163 				continue;
1164 			c3 = CHAR64(c3);
1165 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1166 			PUTLINE64;
1167 			if (c4 == '=')
1168 				continue;
1169 			c4 = CHAR64(c4);
1170 			*fbufp = ((c3 & 0x03) << 6) | c4;
1171 			PUTLINE64;
1172 		}
1173 	}
1174 	else
1175 	{
1176 		int off;
1177 
1178 		/* quoted-printable */
1179 		pxflags |= PXLF_NOADDEOL;
1180 		fbufp = fbuf;
1181 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1182 				   sizeof(buf)) >= 0)
1183 		{
1184 			off = mime_fromqp((unsigned char *) buf, &fbufp,
1185 					  &fbuf[MAXLINE] - fbufp);
1186 again:
1187 			if (off < -1)
1188 				continue;
1189 
1190 			if (fbufp - fbuf > 0)
1191 			{
1192 				if (!putxline((char *) fbuf, fbufp - fbuf - 1,
1193 						mci, pxflags))
1194 					goto writeerr;
1195 			}
1196 			fbufp = fbuf;
1197 			if (off >= 0 && buf[off] != '\0')
1198 			{
1199 				off = mime_fromqp((unsigned char *) (buf + off),
1200 						  &fbufp,
1201 						  &fbuf[MAXLINE] - fbufp);
1202 				goto again;
1203 			}
1204 		}
1205 	}
1206 
1207 	/* force out partial last line */
1208 	if (fbufp > fbuf)
1209 	{
1210 		*fbufp = '\0';
1211 		if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags))
1212 			goto writeerr;
1213 	}
1214 
1215 	/*
1216 	**  The decoded text may end without an EOL.  Since this function
1217 	**  is only called for text/plain MIME messages, it is safe to
1218 	**  add an extra one at the end just in case.  This is a hack,
1219 	**  but so is auto-converting MIME in the first place.
1220 	*/
1221 
1222 	if (!putline("", mci))
1223 		goto writeerr;
1224 
1225 	if (tTd(43, 3))
1226 		sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1227 	return true;
1228 
1229   writeerr:
1230 	return false;
1231 }
1232 /*
1233 **  The following is based on Borenstein's "codes.c" module, with simplifying
1234 **  changes as we do not deal with multipart, and to do the translation in-core,
1235 **  with an attempt to prevent overrun of output buffers.
1236 **
1237 **  What is needed here are changes to defend this code better against
1238 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1239 */
1240 
1241 static char index_hex[128] =
1242 {
1243 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1244 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1245 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1246 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1247 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1248 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1249 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1250 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1251 };
1252 
1253 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1254 
1255 /*
1256 **  MIME_FROMQP -- decode quoted printable string
1257 **
1258 **	Parameters:
1259 **		infile -- input (encoded) string
1260 **		outfile -- output string
1261 **		maxlen -- size of output buffer
1262 **
1263 **	Returns:
1264 **		-2 if decoding failure
1265 **		-1 if infile completely decoded into outfile
1266 **		>= 0 is the position in infile decoding
1267 **			reached before maxlen was reached
1268 */
1269 
1270 static int
1271 mime_fromqp(infile, outfile, maxlen)
1272 	unsigned char *infile;
1273 	unsigned char **outfile;
1274 	int maxlen;		/* Max # of chars allowed in outfile */
1275 {
1276 	int c1, c2;
1277 	int nchar = 0;
1278 	unsigned char *b;
1279 
1280 	/* decrement by one for trailing '\0', at least one other char */
1281 	if (--maxlen < 1)
1282 		return 0;
1283 
1284 	b = infile;
1285 	while ((c1 = *infile++) != '\0' && nchar < maxlen)
1286 	{
1287 		if (c1 == '=')
1288 		{
1289 			if ((c1 = *infile++) == '\0')
1290 				break;
1291 
1292 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1293 			{
1294 				/* ignore it and the rest of the buffer */
1295 				return -2;
1296 			}
1297 			else
1298 			{
1299 				do
1300 				{
1301 					if ((c2 = *infile++) == '\0')
1302 					{
1303 						c2 = -1;
1304 						break;
1305 					}
1306 				} while ((c2 = HEXCHAR(c2)) == -1);
1307 
1308 				if (c2 == -1)
1309 					break;
1310 				nchar++;
1311 				*(*outfile)++ = c1 << 4 | c2;
1312 			}
1313 		}
1314 		else
1315 		{
1316 			nchar++;
1317 			*(*outfile)++ = c1;
1318 			if (c1 == '\n')
1319 				break;
1320 		}
1321 	}
1322 	*(*outfile)++ = '\0';
1323 	if (nchar >= maxlen)
1324 		return (infile - b - 1);
1325 	return -1;
1326 }
1327 #endif /* MIME7TO8 */
1328