xref: /freebsd/contrib/sendmail/src/mime.c (revision 0b3105a37d7adcadcb720112fed4dc4e8040be99)
1 /*
2  * Copyright (c) 1998-2003, 2006, 2013 Proofpoint, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 SM_RCSID("@(#)$Id: mime.c,v 8.149 2013-11-22 20:51:56 ca Exp $")
18 
19 /*
20 **  MIME support.
21 **
22 **	I am indebted to John Beck of Hewlett-Packard, who contributed
23 **	his code to me for inclusion.  As it turns out, I did not use
24 **	his code since he used a "minimum change" approach that used
25 **	several temp files, and I wanted a "minimum impact" approach
26 **	that would avoid copying.  However, looking over his code
27 **	helped me cement my understanding of the problem.
28 **
29 **	I also looked at, but did not directly use, Nathaniel
30 **	Borenstein's "code.c" module.  Again, it functioned as
31 **	a file-to-file translator, which did not fit within my
32 **	design bounds, but it was a useful base for understanding
33 **	the problem.
34 */
35 
36 /* use "old" mime 7 to 8 algorithm by default */
37 #ifndef MIME7TO8_OLD
38 # define MIME7TO8_OLD	1
39 #endif /* ! MIME7TO8_OLD */
40 
41 #if MIME8TO7
42 static int	isboundary __P((char *, char **));
43 static int	mimeboundary __P((char *, char **));
44 static int	mime_getchar __P((SM_FILE_T *, char **, int *));
45 static int	mime_getchar_crlf __P((SM_FILE_T *, char **, int *));
46 
47 /* character set for hex and base64 encoding */
48 static char	Base16Code[] =	"0123456789ABCDEF";
49 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
50 
51 /* types of MIME boundaries */
52 # define MBT_SYNTAX	0	/* syntax error */
53 # define MBT_NOTSEP	1	/* not a boundary */
54 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
55 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
56 
57 static char	*MimeBoundaryNames[] =
58 {
59 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
60 };
61 
62 static bool	MapNLtoCRLF;
63 
64 /*
65 **  MIME8TO7 -- output 8 bit body in 7 bit format
66 **
67 **	The header has already been output -- this has to do the
68 **	8 to 7 bit conversion.  It would be easy if we didn't have
69 **	to deal with nested formats (multipart/xxx and message/rfc822).
70 **
71 **	We won't be called if we don't have to do a conversion, and
72 **	appropriate MIME-Version: and Content-Type: fields have been
73 **	output.  Any Content-Transfer-Encoding: field has not been
74 **	output, and we can add it here.
75 **
76 **	Parameters:
77 **		mci -- mailer connection information.
78 **		header -- the header for this body part.
79 **		e -- envelope.
80 **		boundaries -- the currently pending message boundaries.
81 **			NULL if we are processing the outer portion.
82 **		flags -- to tweak processing.
83 **		level -- recursion level.
84 **
85 **	Returns:
86 **		An indicator of what terminated the message part:
87 **		  MBT_FINAL -- the final boundary
88 **		  MBT_INTERMED -- an intermediate boundary
89 **		  MBT_NOTSEP -- an end of file
90 **		  SM_IO_EOF -- I/O error occurred
91 */
92 
93 struct args
94 {
95 	char	*a_field;	/* name of field */
96 	char	*a_value;	/* value of that field */
97 };
98 
99 int
100 mime8to7(mci, header, e, boundaries, flags, level)
101 	register MCI *mci;
102 	HDR *header;
103 	register ENVELOPE *e;
104 	char **boundaries;
105 	int flags;
106 	int level;
107 {
108 	register char *p;
109 	int linelen;
110 	int blen;
111 	int bt;
112 	off_t offset;
113 	size_t sectionsize, sectionhighbits;
114 	int i;
115 	char *type;
116 	char *subtype;
117 	char *cte;
118 	char **pvp;
119 	int argc = 0;
120 	char *bp;
121 	bool use_qp = false;
122 	struct args argv[MAXMIMEARGS];
123 	char bbuf[128];
124 	char buf[MAXLINE];
125 	char pvpbuf[MAXLINE];
126 	extern unsigned char MimeTokenTab[256];
127 
128 	if (level > MAXMIMENESTING)
129 	{
130 		if (!bitset(EF_TOODEEP, e->e_flags))
131 		{
132 			if (tTd(43, 4))
133 				sm_dprintf("mime8to7: too deep, level=%d\n",
134 					   level);
135 			usrerr("mime8to7: recursion level %d exceeded",
136 				level);
137 			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
138 		}
139 	}
140 	if (tTd(43, 1))
141 	{
142 		sm_dprintf("mime8to7: flags = %x, boundaries =", flags);
143 		if (boundaries[0] == NULL)
144 			sm_dprintf(" <none>");
145 		else
146 		{
147 			for (i = 0; boundaries[i] != NULL; i++)
148 				sm_dprintf(" %s", boundaries[i]);
149 		}
150 		sm_dprintf("\n");
151 	}
152 	MapNLtoCRLF = true;
153 	p = hvalue("Content-Transfer-Encoding", header);
154 	if (p == NULL ||
155 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
156 			   MimeTokenTab, false)) == NULL ||
157 	    pvp[0] == NULL)
158 	{
159 		cte = NULL;
160 	}
161 	else
162 	{
163 		cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
164 		cte = sm_rpool_strdup_x(e->e_rpool, buf);
165 	}
166 
167 	type = subtype = NULL;
168 	p = hvalue("Content-Type", header);
169 	if (p == NULL)
170 	{
171 		if (bitset(M87F_DIGEST, flags))
172 			p = "message/rfc822";
173 		else
174 			p = "text/plain";
175 	}
176 	if (p != NULL &&
177 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
178 			   MimeTokenTab, false)) != NULL &&
179 	    pvp[0] != NULL)
180 	{
181 		if (tTd(43, 40))
182 		{
183 			for (i = 0; pvp[i] != NULL; i++)
184 				sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
185 		}
186 		type = *pvp++;
187 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
188 		    *++pvp != NULL)
189 		{
190 			subtype = *pvp++;
191 		}
192 
193 		/* break out parameters */
194 		while (*pvp != NULL && argc < MAXMIMEARGS)
195 		{
196 			/* skip to semicolon separator */
197 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
198 				pvp++;
199 			if (*pvp++ == NULL || *pvp == NULL)
200 				break;
201 
202 			/* complain about empty values */
203 			if (strcmp(*pvp, ";") == 0)
204 			{
205 				usrerr("mime8to7: Empty parameter in Content-Type header");
206 
207 				/* avoid bounce loops */
208 				e->e_flags |= EF_DONT_MIME;
209 				continue;
210 			}
211 
212 			/* extract field name */
213 			argv[argc].a_field = *pvp++;
214 
215 			/* see if there is a value */
216 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
217 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
218 			{
219 				argv[argc].a_value = *pvp;
220 				argc++;
221 			}
222 		}
223 	}
224 
225 	/* check for disaster cases */
226 	if (type == NULL)
227 		type = "-none-";
228 	if (subtype == NULL)
229 		subtype = "-none-";
230 
231 	/* don't propagate some flags more than one level into the message */
232 	flags &= ~M87F_DIGEST;
233 
234 	/*
235 	**  Check for cases that can not be encoded.
236 	**
237 	**	For example, you can't encode certain kinds of types
238 	**	or already-encoded messages.  If we find this case,
239 	**	just copy it through.
240 	*/
241 
242 	(void) sm_snprintf(buf, sizeof(buf), "%.100s/%.100s", type, subtype);
243 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
244 		flags |= M87F_NO8BIT;
245 
246 # ifdef USE_B_CLASS
247 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
248 		MapNLtoCRLF = false;
249 # endif /* USE_B_CLASS */
250 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
251 		use_qp = true;
252 
253 	/*
254 	**  Multipart requires special processing.
255 	**
256 	**	Do a recursive descent into the message.
257 	*/
258 
259 	if (sm_strcasecmp(type, "multipart") == 0 &&
260 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)) &&
261 	    !bitset(EF_TOODEEP, e->e_flags)
262 	   )
263 	{
264 
265 		if (sm_strcasecmp(subtype, "digest") == 0)
266 			flags |= M87F_DIGEST;
267 
268 		for (i = 0; i < argc; i++)
269 		{
270 			if (sm_strcasecmp(argv[i].a_field, "boundary") == 0)
271 				break;
272 		}
273 		if (i >= argc || argv[i].a_value == NULL)
274 		{
275 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
276 				i >= argc ? "missing" : "bogus", p);
277 			p = "---";
278 
279 			/* avoid bounce loops */
280 			e->e_flags |= EF_DONT_MIME;
281 		}
282 		else
283 		{
284 			p = argv[i].a_value;
285 			stripquotes(p);
286 		}
287 		if (sm_strlcpy(bbuf, p, sizeof(bbuf)) >= sizeof(bbuf))
288 		{
289 			usrerr("mime8to7: multipart boundary \"%s\" too long",
290 				p);
291 
292 			/* avoid bounce loops */
293 			e->e_flags |= EF_DONT_MIME;
294 		}
295 
296 		if (tTd(43, 1))
297 			sm_dprintf("mime8to7: multipart boundary \"%s\"\n",
298 				bbuf);
299 		for (i = 0; i < MAXMIMENESTING; i++)
300 		{
301 			if (boundaries[i] == NULL)
302 				break;
303 		}
304 		if (i >= MAXMIMENESTING)
305 		{
306 			if (tTd(43, 4))
307 				sm_dprintf("mime8to7: too deep, i=%d\n", i);
308 			if (!bitset(EF_TOODEEP, e->e_flags))
309 				usrerr("mime8to7: multipart nesting boundary too deep");
310 
311 			/* avoid bounce loops */
312 			e->e_flags |= EF_DONT_MIME|EF_TOODEEP;
313 		}
314 		else
315 		{
316 			boundaries[i] = bbuf;
317 			boundaries[i + 1] = NULL;
318 		}
319 		mci->mci_flags |= MCIF_INMIME;
320 
321 		/* skip the early "comment" prologue */
322 		if (!putline("", mci))
323 			goto writeerr;
324 		mci->mci_flags &= ~MCIF_INHEADER;
325 		bt = MBT_FINAL;
326 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
327 					sizeof(buf))) >= 0)
328 		{
329 			bt = mimeboundary(buf, boundaries);
330 			if (bt != MBT_NOTSEP)
331 				break;
332 			if (!putxline(buf, blen, mci,
333 					PXLF_MAPFROM|PXLF_STRIP8BIT))
334 				goto writeerr;
335 			if (tTd(43, 99))
336 				sm_dprintf("  ...%s", buf);
337 		}
338 		if (sm_io_eof(e->e_dfp))
339 			bt = MBT_FINAL;
340 		while (bt != MBT_FINAL)
341 		{
342 			auto HDR *hdr = NULL;
343 
344 			(void) sm_strlcpyn(buf, sizeof(buf), 2, "--", bbuf);
345 			if (!putline(buf, mci))
346 				goto writeerr;
347 			if (tTd(43, 35))
348 				sm_dprintf("  ...%s\n", buf);
349 			collect(e->e_dfp, false, &hdr, e, false);
350 			if (tTd(43, 101))
351 				putline("+++after collect", mci);
352 			if (!putheader(mci, hdr, e, flags))
353 				goto writeerr;
354 			if (tTd(43, 101))
355 				putline("+++after putheader", mci);
356 			bt = mime8to7(mci, hdr, e, boundaries, flags,
357 				      level + 1);
358 			if (bt == SM_IO_EOF)
359 				goto writeerr;
360 		}
361 		(void) sm_strlcpyn(buf, sizeof(buf), 3, "--", bbuf, "--");
362 		if (!putline(buf, mci))
363 			goto writeerr;
364 		if (tTd(43, 35))
365 			sm_dprintf("  ...%s\n", buf);
366 		boundaries[i] = NULL;
367 		mci->mci_flags &= ~MCIF_INMIME;
368 
369 		/* skip the late "comment" epilogue */
370 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
371 					sizeof(buf))) >= 0)
372 		{
373 			bt = mimeboundary(buf, boundaries);
374 			if (bt != MBT_NOTSEP)
375 				break;
376 			if (!putxline(buf, blen, mci,
377 					PXLF_MAPFROM|PXLF_STRIP8BIT))
378 				goto writeerr;
379 			if (tTd(43, 99))
380 				sm_dprintf("  ...%s", buf);
381 		}
382 		if (sm_io_eof(e->e_dfp))
383 			bt = MBT_FINAL;
384 		if (tTd(43, 3))
385 			sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n",
386 				MimeBoundaryNames[bt]);
387 		return bt;
388 	}
389 
390 	/*
391 	**  Message/xxx types -- recurse exactly once.
392 	**
393 	**	Class 's' is predefined to have "rfc822" only.
394 	*/
395 
396 	if (sm_strcasecmp(type, "message") == 0)
397 	{
398 		if (!wordinclass(subtype, 's') ||
399 		    bitset(EF_TOODEEP, e->e_flags))
400 		{
401 			flags |= M87F_NO8BIT;
402 		}
403 		else
404 		{
405 			auto HDR *hdr = NULL;
406 
407 			if (!putline("", mci))
408 				goto writeerr;
409 
410 			mci->mci_flags |= MCIF_INMIME;
411 			collect(e->e_dfp, false, &hdr, e, false);
412 			if (tTd(43, 101))
413 				putline("+++after collect", mci);
414 			if (!putheader(mci, hdr, e, flags))
415 				goto writeerr;
416 			if (tTd(43, 101))
417 				putline("+++after putheader", mci);
418 			if (hvalue("MIME-Version", hdr) == NULL &&
419 			    !bitset(M87F_NO8TO7, flags) &&
420 			    !putline("MIME-Version: 1.0", mci))
421 				goto writeerr;
422 			bt = mime8to7(mci, hdr, e, boundaries, flags,
423 				      level + 1);
424 			mci->mci_flags &= ~MCIF_INMIME;
425 			return bt;
426 		}
427 	}
428 
429 	/*
430 	**  Non-compound body type
431 	**
432 	**	Compute the ratio of seven to eight bit characters;
433 	**	use that as a heuristic to decide how to do the
434 	**	encoding.
435 	*/
436 
437 	sectionsize = sectionhighbits = 0;
438 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
439 	{
440 		/* remember where we were */
441 		offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT);
442 		if (offset == -1)
443 			syserr("mime8to7: cannot sm_io_tell on %cf%s",
444 			       DATAFL_LETTER, e->e_id);
445 
446 		/* do a scan of this body type to count character types */
447 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
448 					sizeof(buf))) >= 0)
449 		{
450 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
451 				break;
452 			for (i = 0; i < blen; i++)
453 			{
454 				/* count bytes with the high bit set */
455 				sectionsize++;
456 				if (bitset(0200, buf[i]))
457 					sectionhighbits++;
458 			}
459 
460 			/*
461 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
462 			**  assume base64.  This heuristic avoids double-reading
463 			**  large graphics or video files.
464 			*/
465 
466 			if (sectionsize >= 4096 &&
467 			    sectionhighbits > sectionsize / 4)
468 				break;
469 		}
470 
471 		/* return to the original offset for processing */
472 		/* XXX use relative seeks to handle >31 bit file sizes? */
473 		if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0)
474 			syserr("mime8to7: cannot sm_io_fseek on %cf%s",
475 			       DATAFL_LETTER, e->e_id);
476 		else
477 			sm_io_clearerr(e->e_dfp);
478 	}
479 
480 	/*
481 	**  Heuristically determine encoding method.
482 	**	If more than 1/8 of the total characters have the
483 	**	eighth bit set, use base64; else use quoted-printable.
484 	**	However, only encode binary encoded data as base64,
485 	**	since otherwise the NL=>CRLF mapping will be a problem.
486 	*/
487 
488 	if (tTd(43, 8))
489 	{
490 		sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
491 			(long) sectionhighbits, (long) sectionsize,
492 			cte == NULL ? "[none]" : cte,
493 			type == NULL ? "[none]" : type,
494 			subtype == NULL ? "[none]" : subtype);
495 	}
496 	if (cte != NULL && sm_strcasecmp(cte, "binary") == 0)
497 		sectionsize = sectionhighbits;
498 	linelen = 0;
499 	bp = buf;
500 	if (sectionhighbits == 0)
501 	{
502 		/* no encoding necessary */
503 		if (cte != NULL &&
504 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
505 			   mci->mci_flags) &&
506 		    !bitset(M87F_NO8TO7, flags))
507 		{
508 			/*
509 			**  Skip _unless_ in MIME mode and potentially
510 			**  converting from 8 bit to 7 bit MIME.  See
511 			**  putheader() for the counterpart where the
512 			**  CTE header is skipped in the opposite
513 			**  situation.
514 			*/
515 
516 			(void) sm_snprintf(buf, sizeof(buf),
517 				"Content-Transfer-Encoding: %.200s", cte);
518 			if (!putline(buf, mci))
519 				goto writeerr;
520 			if (tTd(43, 36))
521 				sm_dprintf("  ...%s\n", buf);
522 		}
523 		if (!putline("", mci))
524 			goto writeerr;
525 		mci->mci_flags &= ~MCIF_INHEADER;
526 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
527 					sizeof(buf))) >= 0)
528 		{
529 			if (!bitset(MCIF_INLONGLINE, mci->mci_flags))
530 			{
531 				bt = mimeboundary(buf, boundaries);
532 				if (bt != MBT_NOTSEP)
533 					break;
534 			}
535 			if (!putxline(buf, blen, mci,
536 				      PXLF_MAPFROM|PXLF_NOADDEOL))
537 				goto writeerr;
538 		}
539 		if (sm_io_eof(e->e_dfp))
540 			bt = MBT_FINAL;
541 	}
542 	else if (!MapNLtoCRLF ||
543 		 (sectionsize / 8 < sectionhighbits && !use_qp))
544 	{
545 		/* use base64 encoding */
546 		int c1, c2;
547 
548 		if (tTd(43, 36))
549 			sm_dprintf("  ...Content-Transfer-Encoding: base64\n");
550 		if (!putline("Content-Transfer-Encoding: base64", mci))
551 			goto writeerr;
552 		(void) sm_snprintf(buf, sizeof(buf),
553 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
554 			MyHostName, e->e_id);
555 		if (!putline(buf, mci) || !putline("", mci))
556 			goto writeerr;
557 		mci->mci_flags &= ~MCIF_INHEADER;
558 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) !=
559 			SM_IO_EOF)
560 		{
561 			if (linelen > 71)
562 			{
563 				*bp = '\0';
564 				if (!putline(buf, mci))
565 					goto writeerr;
566 				linelen = 0;
567 				bp = buf;
568 			}
569 			linelen += 4;
570 			*bp++ = Base64Code[(c1 >> 2)];
571 			c1 = (c1 & 0x03) << 4;
572 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
573 			if (c2 == SM_IO_EOF)
574 			{
575 				*bp++ = Base64Code[c1];
576 				*bp++ = '=';
577 				*bp++ = '=';
578 				break;
579 			}
580 			c1 |= (c2 >> 4) & 0x0f;
581 			*bp++ = Base64Code[c1];
582 			c1 = (c2 & 0x0f) << 2;
583 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
584 			if (c2 == SM_IO_EOF)
585 			{
586 				*bp++ = Base64Code[c1];
587 				*bp++ = '=';
588 				break;
589 			}
590 			c1 |= (c2 >> 6) & 0x03;
591 			*bp++ = Base64Code[c1];
592 			*bp++ = Base64Code[c2 & 0x3f];
593 		}
594 		*bp = '\0';
595 		if (!putline(buf, mci))
596 			goto writeerr;
597 	}
598 	else
599 	{
600 		/* use quoted-printable encoding */
601 		int c1, c2;
602 		int fromstate;
603 		BITMAP256 badchars;
604 
605 		/* set up map of characters that must be mapped */
606 		clrbitmap(badchars);
607 		for (c1 = 0x00; c1 < 0x20; c1++)
608 			setbitn(c1, badchars);
609 		clrbitn('\t', badchars);
610 		for (c1 = 0x7f; c1 < 0x100; c1++)
611 			setbitn(c1, badchars);
612 		setbitn('=', badchars);
613 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
614 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
615 				setbitn(*p, badchars);
616 
617 		if (tTd(43, 36))
618 			sm_dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
619 		if (!putline("Content-Transfer-Encoding: quoted-printable",
620 				mci))
621 			goto writeerr;
622 		(void) sm_snprintf(buf, sizeof(buf),
623 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
624 			MyHostName, e->e_id);
625 		if (!putline(buf, mci) || !putline("", mci))
626 			goto writeerr;
627 		mci->mci_flags &= ~MCIF_INHEADER;
628 		fromstate = 0;
629 		c2 = '\n';
630 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) !=
631 			SM_IO_EOF)
632 		{
633 			if (c1 == '\n')
634 			{
635 				if (c2 == ' ' || c2 == '\t')
636 				{
637 					*bp++ = '=';
638 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
639 					*bp++ = Base16Code[c2 & 0x0f];
640 				}
641 				if (buf[0] == '.' && bp == &buf[1])
642 				{
643 					buf[0] = '=';
644 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
645 					*bp++ = Base16Code['.' & 0x0f];
646 				}
647 				*bp = '\0';
648 				if (!putline(buf, mci))
649 					goto writeerr;
650 				linelen = fromstate = 0;
651 				bp = buf;
652 				c2 = c1;
653 				continue;
654 			}
655 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
656 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
657 			{
658 				*bp++ = '=';
659 				*bp++ = '2';
660 				*bp++ = '0';
661 				linelen += 3;
662 			}
663 			else if (c2 == ' ' || c2 == '\t')
664 			{
665 				*bp++ = c2;
666 				linelen++;
667 			}
668 			if (linelen > 72 &&
669 			    (linelen > 75 || c1 != '.' ||
670 			     (linelen > 73 && c2 == '.')))
671 			{
672 				if (linelen > 73 && c2 == '.')
673 					bp--;
674 				else
675 					c2 = '\n';
676 				*bp++ = '=';
677 				*bp = '\0';
678 				if (!putline(buf, mci))
679 					goto writeerr;
680 				linelen = fromstate = 0;
681 				bp = buf;
682 				if (c2 == '.')
683 				{
684 					*bp++ = '.';
685 					linelen++;
686 				}
687 			}
688 			if (bitnset(bitidx(c1), badchars))
689 			{
690 				*bp++ = '=';
691 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
692 				*bp++ = Base16Code[c1 & 0x0f];
693 				linelen += 3;
694 			}
695 			else if (c1 != ' ' && c1 != '\t')
696 			{
697 				if (linelen < 4 && c1 == "From"[linelen])
698 					fromstate++;
699 				*bp++ = c1;
700 				linelen++;
701 			}
702 			c2 = c1;
703 		}
704 
705 		/* output any saved character */
706 		if (c2 == ' ' || c2 == '\t')
707 		{
708 			*bp++ = '=';
709 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
710 			*bp++ = Base16Code[c2 & 0x0f];
711 			linelen += 3;
712 		}
713 
714 		if (linelen > 0 || boundaries[0] != NULL)
715 		{
716 			*bp = '\0';
717 			if (!putline(buf, mci))
718 				goto writeerr;
719 		}
720 
721 	}
722 	if (tTd(43, 3))
723 		sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
724 	return bt;
725 
726   writeerr:
727 	return SM_IO_EOF;
728 }
729 /*
730 **  MIME_GETCHAR -- get a character for MIME processing
731 **
732 **	Treats boundaries as SM_IO_EOF.
733 **
734 **	Parameters:
735 **		fp -- the input file.
736 **		boundaries -- the current MIME boundaries.
737 **		btp -- if the return value is SM_IO_EOF, *btp is set to
738 **			the type of the boundary.
739 **
740 **	Returns:
741 **		The next character in the input stream.
742 */
743 
744 static int
745 mime_getchar(fp, boundaries, btp)
746 	register SM_FILE_T *fp;
747 	char **boundaries;
748 	int *btp;
749 {
750 	int c;
751 	static unsigned char *bp = NULL;
752 	static int buflen = 0;
753 	static bool atbol = true;	/* at beginning of line */
754 	static int bt = MBT_SYNTAX;	/* boundary type of next SM_IO_EOF */
755 	static unsigned char buf[128];	/* need not be a full line */
756 	int start = 0;			/* indicates position of - in buffer */
757 
758 	if (buflen == 1 && *bp == '\n')
759 	{
760 		/* last \n in buffer may be part of next MIME boundary */
761 		c = *bp;
762 	}
763 	else if (buflen > 0)
764 	{
765 		buflen--;
766 		return *bp++;
767 	}
768 	else
769 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
770 	bp = buf;
771 	buflen = 0;
772 	if (c == '\n')
773 	{
774 		/* might be part of a MIME boundary */
775 		*bp++ = c;
776 		atbol = true;
777 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
778 		if (c == '\n')
779 		{
780 			(void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c);
781 			return c;
782 		}
783 		start = 1;
784 	}
785 	if (c != SM_IO_EOF)
786 		*bp++ = c;
787 	else
788 		bt = MBT_FINAL;
789 	if (atbol && c == '-')
790 	{
791 		/* check for a message boundary */
792 		c = sm_io_getc(fp, SM_TIME_DEFAULT);
793 		if (c != '-')
794 		{
795 			if (c != SM_IO_EOF)
796 				*bp++ = c;
797 			else
798 				bt = MBT_FINAL;
799 			buflen = bp - buf - 1;
800 			bp = buf;
801 			return *bp++;
802 		}
803 
804 		/* got "--", now check for rest of separator */
805 		*bp++ = '-';
806 		while (bp < &buf[sizeof(buf) - 2] &&
807 		       (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF &&
808 		       c != '\n')
809 		{
810 			*bp++ = c;
811 		}
812 		*bp = '\0';	/* XXX simply cut off? */
813 		bt = mimeboundary((char *) &buf[start], boundaries);
814 		switch (bt)
815 		{
816 		  case MBT_FINAL:
817 		  case MBT_INTERMED:
818 			/* we have a message boundary */
819 			buflen = 0;
820 			*btp = bt;
821 			return SM_IO_EOF;
822 		}
823 
824 		if (bp < &buf[sizeof(buf) - 2] && c != SM_IO_EOF)
825 			*bp++ = c;
826 	}
827 
828 	atbol = c == '\n';
829 	buflen = bp - buf - 1;
830 	if (buflen < 0)
831 	{
832 		*btp = bt;
833 		return SM_IO_EOF;
834 	}
835 	bp = buf;
836 	return *bp++;
837 }
838 /*
839 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
840 **
841 **	Parameters:
842 **		fp -- the input file.
843 **		boundaries -- the current MIME boundaries.
844 **		btp -- if the return value is SM_IO_EOF, *btp is set to
845 **			the type of the boundary.
846 **
847 **	Returns:
848 **		The next character in the input stream.
849 */
850 
851 static int
852 mime_getchar_crlf(fp, boundaries, btp)
853 	register SM_FILE_T *fp;
854 	char **boundaries;
855 	int *btp;
856 {
857 	static bool sendlf = false;
858 	int c;
859 
860 	if (sendlf)
861 	{
862 		sendlf = false;
863 		return '\n';
864 	}
865 	c = mime_getchar(fp, boundaries, btp);
866 	if (c == '\n' && MapNLtoCRLF)
867 	{
868 		sendlf = true;
869 		return '\r';
870 	}
871 	return c;
872 }
873 /*
874 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
875 **
876 **	Parameters:
877 **		line -- the input line.
878 **		boundaries -- the set of currently pending boundaries.
879 **
880 **	Returns:
881 **		MBT_NOTSEP -- if this is not a separator line
882 **		MBT_INTERMED -- if this is an intermediate separator
883 **		MBT_FINAL -- if this is a final boundary
884 **		MBT_SYNTAX -- if this is a boundary for the wrong
885 **			enclosure -- i.e., a syntax error.
886 */
887 
888 static int
889 mimeboundary(line, boundaries)
890 	register char *line;
891 	char **boundaries;
892 {
893 	int type = MBT_NOTSEP;
894 	int i;
895 	int savec;
896 
897 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
898 		return MBT_NOTSEP;
899 	i = strlen(line);
900 	if (i > 0 && line[i - 1] == '\n')
901 		i--;
902 
903 	/* strip off trailing whitespace */
904 	while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t'
905 #if _FFR_MIME_CR_OK
906 		|| line[i - 1] == '\r'
907 #endif /* _FFR_MIME_CR_OK */
908 	       ))
909 		i--;
910 	savec = line[i];
911 	line[i] = '\0';
912 
913 	if (tTd(43, 5))
914 		sm_dprintf("mimeboundary: line=\"%s\"... ", line);
915 
916 	/* check for this as an intermediate boundary */
917 	if (isboundary(&line[2], boundaries) >= 0)
918 		type = MBT_INTERMED;
919 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
920 	{
921 		/* check for a final boundary */
922 		line[i - 2] = '\0';
923 		if (isboundary(&line[2], boundaries) >= 0)
924 			type = MBT_FINAL;
925 		line[i - 2] = '-';
926 	}
927 
928 	line[i] = savec;
929 	if (tTd(43, 5))
930 		sm_dprintf("%s\n", MimeBoundaryNames[type]);
931 	return type;
932 }
933 /*
934 **  DEFCHARSET -- return default character set for message
935 **
936 **	The first choice for character set is for the mailer
937 **	corresponding to the envelope sender.  If neither that
938 **	nor the global configuration file has a default character
939 **	set defined, return "unknown-8bit" as recommended by
940 **	RFC 1428 section 3.
941 **
942 **	Parameters:
943 **		e -- the envelope for this message.
944 **
945 **	Returns:
946 **		The default character set for that mailer.
947 */
948 
949 char *
950 defcharset(e)
951 	register ENVELOPE *e;
952 {
953 	if (e != NULL && e->e_from.q_mailer != NULL &&
954 	    e->e_from.q_mailer->m_defcharset != NULL)
955 		return e->e_from.q_mailer->m_defcharset;
956 	if (DefaultCharSet != NULL)
957 		return DefaultCharSet;
958 	return "unknown-8bit";
959 }
960 /*
961 **  ISBOUNDARY -- is a given string a currently valid boundary?
962 **
963 **	Parameters:
964 **		line -- the current input line.
965 **		boundaries -- the list of valid boundaries.
966 **
967 **	Returns:
968 **		The index number in boundaries if the line is found.
969 **		-1 -- otherwise.
970 **
971 */
972 
973 static int
974 isboundary(line, boundaries)
975 	char *line;
976 	char **boundaries;
977 {
978 	register int i;
979 
980 	for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++)
981 	{
982 		if (strcmp(line, boundaries[i]) == 0)
983 			return i;
984 	}
985 	return -1;
986 }
987 #endif /* MIME8TO7 */
988 
989 #if MIME7TO8
990 static int	mime_fromqp __P((unsigned char *, unsigned char **, int));
991 
992 /*
993 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
994 **
995 **  This is a hack. Supports translating the two 7-bit body-encodings
996 **  (quoted-printable and base64) to 8-bit coded bodies.
997 **
998 **  There is not much point in supporting multipart here, as the UA
999 **  will be able to deal with encoded MIME bodies if it can parse MIME
1000 **  multipart messages.
1001 **
1002 **  Note also that we won't be called unless it is a text/plain MIME
1003 **  message, encoded base64 or QP and mailer flag '9' has been defined
1004 **  on mailer.
1005 **
1006 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
1007 **
1008 **	Parameters:
1009 **		mci -- mailer connection information.
1010 **		header -- the header for this body part.
1011 **		e -- envelope.
1012 **
1013 **	Returns:
1014 **		true iff body was written successfully
1015 */
1016 
1017 static char index_64[128] =
1018 {
1019 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1020 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1021 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
1022 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
1023 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
1024 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
1025 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
1026 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
1027 };
1028 
1029 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
1030 
1031 bool
1032 mime7to8(mci, header, e)
1033 	register MCI *mci;
1034 	HDR *header;
1035 	register ENVELOPE *e;
1036 {
1037 	int pxflags, blen;
1038 	register char *p;
1039 	char *cte;
1040 	char **pvp;
1041 	unsigned char *fbufp;
1042 	char buf[MAXLINE];
1043 	unsigned char fbuf[MAXLINE + 1];
1044 	char pvpbuf[MAXLINE];
1045 	extern unsigned char MimeTokenTab[256];
1046 
1047 	p = hvalue("Content-Transfer-Encoding", header);
1048 	if (p == NULL ||
1049 	    (pvp = prescan(p, '\0', pvpbuf, sizeof(pvpbuf), NULL,
1050 			   MimeTokenTab, false)) == NULL ||
1051 	    pvp[0] == NULL)
1052 	{
1053 		/* "can't happen" -- upper level should have caught this */
1054 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
1055 
1056 		/* avoid bounce loops */
1057 		e->e_flags |= EF_DONT_MIME;
1058 
1059 		/* cheap failsafe algorithm -- should work on text/plain */
1060 		if (p != NULL)
1061 		{
1062 			(void) sm_snprintf(buf, sizeof(buf),
1063 				"Content-Transfer-Encoding: %s", p);
1064 			if (!putline(buf, mci))
1065 				goto writeerr;
1066 		}
1067 		if (!putline("", mci))
1068 			goto writeerr;
1069 		mci->mci_flags &= ~MCIF_INHEADER;
1070 		while ((blen = sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1071 					sizeof(buf))) >= 0)
1072 		{
1073 			if (!putxline(buf, blen, mci, PXLF_MAPFROM))
1074 				goto writeerr;
1075 		}
1076 		return true;
1077 	}
1078 	cataddr(pvp, NULL, buf, sizeof(buf), '\0', false);
1079 	cte = sm_rpool_strdup_x(e->e_rpool, buf);
1080 
1081 	mci->mci_flags |= MCIF_INHEADER;
1082 	if (!putline("Content-Transfer-Encoding: 8bit", mci))
1083 		goto writeerr;
1084 	(void) sm_snprintf(buf, sizeof(buf),
1085 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1086 		cte, MyHostName, e->e_id);
1087 	if (!putline(buf, mci) || !putline("", mci))
1088 		goto writeerr;
1089 	mci->mci_flags &= ~MCIF_INHEADER;
1090 
1091 	/*
1092 	**  Translate body encoding to 8-bit.  Supports two types of
1093 	**  encodings; "base64" and "quoted-printable". Assume qp if
1094 	**  it is not base64.
1095 	*/
1096 
1097 	pxflags = PXLF_MAPFROM;
1098 	if (sm_strcasecmp(cte, "base64") == 0)
1099 	{
1100 		int c1, c2, c3, c4;
1101 
1102 		fbufp = fbuf;
1103 		while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) !=
1104 			SM_IO_EOF)
1105 		{
1106 			if (isascii(c1) && isspace(c1))
1107 				continue;
1108 
1109 			do
1110 			{
1111 				c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1112 			} while (isascii(c2) && isspace(c2));
1113 			if (c2 == SM_IO_EOF)
1114 				break;
1115 
1116 			do
1117 			{
1118 				c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1119 			} while (isascii(c3) && isspace(c3));
1120 			if (c3 == SM_IO_EOF)
1121 				break;
1122 
1123 			do
1124 			{
1125 				c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT);
1126 			} while (isascii(c4) && isspace(c4));
1127 			if (c4 == SM_IO_EOF)
1128 				break;
1129 
1130 			if (c1 == '=' || c2 == '=')
1131 				continue;
1132 			c1 = CHAR64(c1);
1133 			c2 = CHAR64(c2);
1134 
1135 #if MIME7TO8_OLD
1136 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1137 			++fbufp;
1138 #else /* MIME7TO8_OLD */
1139 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \
1140 		{					\
1141 			++fbufp;			\
1142 			pxflags |= PXLF_NOADDEOL;	\
1143 		}
1144 #endif /* MIME7TO8_OLD */
1145 
1146 #define PUTLINE64	\
1147 	do		\
1148 	{		\
1149 		if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])	\
1150 		{							\
1151 			CHK_EOL;					\
1152 			if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) \
1153 				goto writeerr;				\
1154 			pxflags &= ~PXLF_NOADDEOL;			\
1155 			fbufp = fbuf;					\
1156 		}	\
1157 	} while (0)
1158 
1159 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1160 			PUTLINE64;
1161 			if (c3 == '=')
1162 				continue;
1163 			c3 = CHAR64(c3);
1164 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1165 			PUTLINE64;
1166 			if (c4 == '=')
1167 				continue;
1168 			c4 = CHAR64(c4);
1169 			*fbufp = ((c3 & 0x03) << 6) | c4;
1170 			PUTLINE64;
1171 		}
1172 	}
1173 	else
1174 	{
1175 		int off;
1176 
1177 		/* quoted-printable */
1178 		pxflags |= PXLF_NOADDEOL;
1179 		fbufp = fbuf;
1180 		while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf,
1181 				   sizeof(buf)) >= 0)
1182 		{
1183 			off = mime_fromqp((unsigned char *) buf, &fbufp,
1184 					  &fbuf[MAXLINE] - fbufp);
1185 again:
1186 			if (off < -1)
1187 				continue;
1188 
1189 			if (fbufp - fbuf > 0)
1190 			{
1191 				if (!putxline((char *) fbuf, fbufp - fbuf - 1,
1192 						mci, pxflags))
1193 					goto writeerr;
1194 			}
1195 			fbufp = fbuf;
1196 			if (off >= 0 && buf[off] != '\0')
1197 			{
1198 				off = mime_fromqp((unsigned char *) (buf + off),
1199 						  &fbufp,
1200 						  &fbuf[MAXLINE] - fbufp);
1201 				goto again;
1202 			}
1203 		}
1204 	}
1205 
1206 	/* force out partial last line */
1207 	if (fbufp > fbuf)
1208 	{
1209 		*fbufp = '\0';
1210 		if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags))
1211 			goto writeerr;
1212 	}
1213 
1214 	/*
1215 	**  The decoded text may end without an EOL.  Since this function
1216 	**  is only called for text/plain MIME messages, it is safe to
1217 	**  add an extra one at the end just in case.  This is a hack,
1218 	**  but so is auto-converting MIME in the first place.
1219 	*/
1220 
1221 	if (!putline("", mci))
1222 		goto writeerr;
1223 
1224 	if (tTd(43, 3))
1225 		sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1226 	return true;
1227 
1228   writeerr:
1229 	return false;
1230 }
1231 /*
1232 **  The following is based on Borenstein's "codes.c" module, with simplifying
1233 **  changes as we do not deal with multipart, and to do the translation in-core,
1234 **  with an attempt to prevent overrun of output buffers.
1235 **
1236 **  What is needed here are changes to defend this code better against
1237 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1238 */
1239 
1240 static char index_hex[128] =
1241 {
1242 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1243 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1244 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1245 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1246 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1247 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1248 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1249 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1250 };
1251 
1252 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1253 
1254 /*
1255 **  MIME_FROMQP -- decode quoted printable string
1256 **
1257 **	Parameters:
1258 **		infile -- input (encoded) string
1259 **		outfile -- output string
1260 **		maxlen -- size of output buffer
1261 **
1262 **	Returns:
1263 **		-2 if decoding failure
1264 **		-1 if infile completely decoded into outfile
1265 **		>= 0 is the position in infile decoding
1266 **			reached before maxlen was reached
1267 */
1268 
1269 static int
1270 mime_fromqp(infile, outfile, maxlen)
1271 	unsigned char *infile;
1272 	unsigned char **outfile;
1273 	int maxlen;		/* Max # of chars allowed in outfile */
1274 {
1275 	int c1, c2;
1276 	int nchar = 0;
1277 	unsigned char *b;
1278 
1279 	/* decrement by one for trailing '\0', at least one other char */
1280 	if (--maxlen < 1)
1281 		return 0;
1282 
1283 	b = infile;
1284 	while ((c1 = *infile++) != '\0' && nchar < maxlen)
1285 	{
1286 		if (c1 == '=')
1287 		{
1288 			if ((c1 = *infile++) == '\0')
1289 				break;
1290 
1291 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1292 			{
1293 				/* ignore it and the rest of the buffer */
1294 				return -2;
1295 			}
1296 			else
1297 			{
1298 				do
1299 				{
1300 					if ((c2 = *infile++) == '\0')
1301 					{
1302 						c2 = -1;
1303 						break;
1304 					}
1305 				} while ((c2 = HEXCHAR(c2)) == -1);
1306 
1307 				if (c2 == -1)
1308 					break;
1309 				nchar++;
1310 				*(*outfile)++ = c1 << 4 | c2;
1311 			}
1312 		}
1313 		else
1314 		{
1315 			nchar++;
1316 			*(*outfile)++ = c1;
1317 			if (c1 == '\n')
1318 				break;
1319 		}
1320 	}
1321 	*(*outfile)++ = '\0';
1322 	if (nchar >= maxlen)
1323 		return (infile - b - 1);
1324 	return -1;
1325 }
1326 #endif /* MIME7TO8 */
1327