xref: /freebsd/contrib/sendmail/src/mime.c (revision 77a0943ded95b9e6438f7db70c4a28e4d93946d4)
1 /*
2  * Copyright (c) 1998, 1999 Sendmail, Inc. and its suppliers.
3  *	All rights reserved.
4  * Copyright (c) 1994, 1996-1997 Eric P. Allman.  All rights reserved.
5  * Copyright (c) 1994
6  *	The Regents of the University of California.  All rights reserved.
7  *
8  * By using this file, you agree to the terms and conditions set
9  * forth in the LICENSE file which can be found at the top level of
10  * the sendmail distribution.
11  *
12  */
13 
14 #include <sendmail.h>
15 #include <string.h>
16 
17 #ifndef lint
18 static char id[] = "@(#)$Id: mime.c,v 8.94 1999/10/17 17:35:58 ca Exp $";
19 #endif /* ! lint */
20 
21 static int	isboundary __P((char *, char **));
22 static int	mimeboundary __P((char *, char **));
23 static int	mime_fromqp __P((u_char *, u_char **, int, int));
24 static int	mime_getchar __P((FILE *, char **, int *));
25 static int	mime_getchar_crlf __P((FILE *, char **, int *));
26 
27 /*
28 **  MIME support.
29 **
30 **	I am indebted to John Beck of Hewlett-Packard, who contributed
31 **	his code to me for inclusion.  As it turns out, I did not use
32 **	his code since he used a "minimum change" approach that used
33 **	several temp files, and I wanted a "minimum impact" approach
34 **	that would avoid copying.  However, looking over his code
35 **	helped me cement my understanding of the problem.
36 **
37 **	I also looked at, but did not directly use, Nathaniel
38 **	Borenstein's "code.c" module.  Again, it functioned as
39 **	a file-to-file translator, which did not fit within my
40 **	design bounds, but it was a useful base for understanding
41 **	the problem.
42 */
43 
44 #if MIME8TO7
45 
46 /* character set for hex and base64 encoding */
47 static char	Base16Code[] =	"0123456789ABCDEF";
48 static char	Base64Code[] =	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
49 
50 /* types of MIME boundaries */
51 # define MBT_SYNTAX	0	/* syntax error */
52 # define MBT_NOTSEP	1	/* not a boundary */
53 # define MBT_INTERMED	2	/* intermediate boundary (no trailing --) */
54 # define MBT_FINAL	3	/* final boundary (trailing -- included) */
55 
56 static char	*MimeBoundaryNames[] =
57 {
58 	"SYNTAX",	"NOTSEP",	"INTERMED",	"FINAL"
59 };
60 
61 static bool	MapNLtoCRLF;
62 
63 /*
64 **  MIME8TO7 -- output 8 bit body in 7 bit format
65 **
66 **	The header has already been output -- this has to do the
67 **	8 to 7 bit conversion.  It would be easy if we didn't have
68 **	to deal with nested formats (multipart/xxx and message/rfc822).
69 **
70 **	We won't be called if we don't have to do a conversion, and
71 **	appropriate MIME-Version: and Content-Type: fields have been
72 **	output.  Any Content-Transfer-Encoding: field has not been
73 **	output, and we can add it here.
74 **
75 **	Parameters:
76 **		mci -- mailer connection information.
77 **		header -- the header for this body part.
78 **		e -- envelope.
79 **		boundaries -- the currently pending message boundaries.
80 **			NULL if we are processing the outer portion.
81 **		flags -- to tweak processing.
82 **
83 **	Returns:
84 **		An indicator of what terminated the message part:
85 **		  MBT_FINAL -- the final boundary
86 **		  MBT_INTERMED -- an intermediate boundary
87 **		  MBT_NOTSEP -- an end of file
88 */
89 
90 struct args
91 {
92 	char	*a_field;	/* name of field */
93 	char	*a_value;	/* value of that field */
94 };
95 
96 int
97 mime8to7(mci, header, e, boundaries, flags)
98 	register MCI *mci;
99 	HDR *header;
100 	register ENVELOPE *e;
101 	char **boundaries;
102 	int flags;
103 {
104 	register char *p;
105 	int linelen;
106 	int bt;
107 	off_t offset;
108 	size_t sectionsize, sectionhighbits;
109 	int i;
110 	char *type;
111 	char *subtype;
112 	char *cte;
113 	char **pvp;
114 	int argc = 0;
115 	char *bp;
116 	bool use_qp = FALSE;
117 	struct args argv[MAXMIMEARGS];
118 	char bbuf[128];
119 	char buf[MAXLINE];
120 	char pvpbuf[MAXLINE];
121 	extern u_char MimeTokenTab[256];
122 
123 	if (tTd(43, 1))
124 	{
125 		dprintf("mime8to7: flags = %x, boundaries =", flags);
126 		if (boundaries[0] == NULL)
127 			dprintf(" <none>");
128 		else
129 		{
130 			for (i = 0; boundaries[i] != NULL; i++)
131 				dprintf(" %s", boundaries[i]);
132 		}
133 		dprintf("\n");
134 	}
135 	MapNLtoCRLF = TRUE;
136 	p = hvalue("Content-Transfer-Encoding", header);
137 	if (p == NULL ||
138 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
139 			   MimeTokenTab)) == NULL ||
140 	    pvp[0] == NULL)
141 	{
142 		cte = NULL;
143 	}
144 	else
145 	{
146 		cataddr(pvp, NULL, buf, sizeof buf, '\0');
147 		cte = newstr(buf);
148 	}
149 
150 	type = subtype = NULL;
151 	p = hvalue("Content-Type", header);
152 	if (p == NULL)
153 	{
154 		if (bitset(M87F_DIGEST, flags))
155 			p = "message/rfc822";
156 		else
157 			p = "text/plain";
158 	}
159 	if (p != NULL &&
160 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
161 			   MimeTokenTab)) != NULL &&
162 	    pvp[0] != NULL)
163 	{
164 		if (tTd(43, 40))
165 		{
166 			for (i = 0; pvp[i] != NULL; i++)
167 				dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]);
168 		}
169 		type = *pvp++;
170 		if (*pvp != NULL && strcmp(*pvp, "/") == 0 &&
171 		    *++pvp != NULL)
172 		{
173 			subtype = *pvp++;
174 		}
175 
176 		/* break out parameters */
177 		while (*pvp != NULL && argc < MAXMIMEARGS)
178 		{
179 			/* skip to semicolon separator */
180 			while (*pvp != NULL && strcmp(*pvp, ";") != 0)
181 				pvp++;
182 			if (*pvp++ == NULL || *pvp == NULL)
183 				break;
184 
185 			/* complain about empty values */
186 			if (strcmp(*pvp, ";") == 0)
187 			{
188 				usrerr("mime8to7: Empty parameter in Content-Type header");
189 
190 				/* avoid bounce loops */
191 				e->e_flags |= EF_DONT_MIME;
192 				continue;
193 			}
194 
195 			/* extract field name */
196 			argv[argc].a_field = *pvp++;
197 
198 			/* see if there is a value */
199 			if (*pvp != NULL && strcmp(*pvp, "=") == 0 &&
200 			    (*++pvp == NULL || strcmp(*pvp, ";") != 0))
201 			{
202 				argv[argc].a_value = *pvp;
203 				argc++;
204 			}
205 		}
206 	}
207 
208 	/* check for disaster cases */
209 	if (type == NULL)
210 		type = "-none-";
211 	if (subtype == NULL)
212 		subtype = "-none-";
213 
214 	/* don't propogate some flags more than one level into the message */
215 	flags &= ~M87F_DIGEST;
216 
217 	/*
218 	**  Check for cases that can not be encoded.
219 	**
220 	**	For example, you can't encode certain kinds of types
221 	**	or already-encoded messages.  If we find this case,
222 	**	just copy it through.
223 	*/
224 
225 	snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype);
226 	if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e')))
227 		flags |= M87F_NO8BIT;
228 
229 # ifdef USE_B_CLASS
230 	if (wordinclass(buf, 'b') || wordinclass(type, 'b'))
231 		MapNLtoCRLF = FALSE;
232 # endif /* USE_B_CLASS */
233 	if (wordinclass(buf, 'q') || wordinclass(type, 'q'))
234 		use_qp = TRUE;
235 
236 	/*
237 	**  Multipart requires special processing.
238 	**
239 	**	Do a recursive descent into the message.
240 	*/
241 
242 	if (strcasecmp(type, "multipart") == 0 &&
243 	    (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags)))
244 	{
245 
246 		if (strcasecmp(subtype, "digest") == 0)
247 			flags |= M87F_DIGEST;
248 
249 		for (i = 0; i < argc; i++)
250 		{
251 			if (strcasecmp(argv[i].a_field, "boundary") == 0)
252 				break;
253 		}
254 		if (i >= argc || argv[i].a_value == NULL)
255 		{
256 			usrerr("mime8to7: Content-Type: \"%s\": %s boundary",
257 				i >= argc ? "missing" : "bogus", p);
258 			p = "---";
259 
260 			/* avoid bounce loops */
261 			e->e_flags |= EF_DONT_MIME;
262 		}
263 		else
264 		{
265 			p = argv[i].a_value;
266 			stripquotes(p);
267 		}
268 		if (strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf)
269 		{
270 			usrerr("mime8to7: multipart boundary \"%s\" too long",
271 				p);
272 
273 			/* avoid bounce loops */
274 			e->e_flags |= EF_DONT_MIME;
275 		}
276 
277 		if (tTd(43, 1))
278 			dprintf("mime8to7: multipart boundary \"%s\"\n", bbuf);
279 		for (i = 0; i < MAXMIMENESTING; i++)
280 			if (boundaries[i] == NULL)
281 				break;
282 		if (i >= MAXMIMENESTING)
283 		{
284 			usrerr("mime8to7: multipart nesting boundary too deep");
285 
286 			/* avoid bounce loops */
287 			e->e_flags |= EF_DONT_MIME;
288 		}
289 		else
290 		{
291 			boundaries[i] = bbuf;
292 			boundaries[i + 1] = NULL;
293 		}
294 		mci->mci_flags |= MCIF_INMIME;
295 
296 		/* skip the early "comment" prologue */
297 		putline("", mci);
298 		mci->mci_flags &= ~MCIF_INHEADER;
299 		bt = MBT_FINAL;
300 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
301 		{
302 			bt = mimeboundary(buf, boundaries);
303 			if (bt != MBT_NOTSEP)
304 				break;
305 			putxline(buf, strlen(buf), mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
306 			if (tTd(43, 99))
307 				dprintf("  ...%s", buf);
308 		}
309 		if (feof(e->e_dfp))
310 			bt = MBT_FINAL;
311 		while (bt != MBT_FINAL)
312 		{
313 			auto HDR *hdr = NULL;
314 
315 			snprintf(buf, sizeof buf, "--%s", bbuf);
316 			putline(buf, mci);
317 			if (tTd(43, 35))
318 				dprintf("  ...%s\n", buf);
319 			collect(e->e_dfp, FALSE, &hdr, e);
320 			if (tTd(43, 101))
321 				putline("+++after collect", mci);
322 			putheader(mci, hdr, e, flags);
323 			if (tTd(43, 101))
324 				putline("+++after putheader", mci);
325 			bt = mime8to7(mci, hdr, e, boundaries, flags);
326 		}
327 		snprintf(buf, sizeof buf, "--%s--", bbuf);
328 		putline(buf, mci);
329 		if (tTd(43, 35))
330 			dprintf("  ...%s\n", buf);
331 		boundaries[i] = NULL;
332 		mci->mci_flags &= ~MCIF_INMIME;
333 
334 		/* skip the late "comment" epilogue */
335 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
336 		{
337 			bt = mimeboundary(buf, boundaries);
338 			if (bt != MBT_NOTSEP)
339 				break;
340 			putxline(buf, strlen(buf), mci, PXLF_MAPFROM|PXLF_STRIP8BIT);
341 			if (tTd(43, 99))
342 				dprintf("  ...%s", buf);
343 		}
344 		if (feof(e->e_dfp))
345 			bt = MBT_FINAL;
346 		if (tTd(43, 3))
347 			dprintf("\t\t\tmime8to7=>%s (multipart)\n",
348 				MimeBoundaryNames[bt]);
349 		return bt;
350 	}
351 
352 	/*
353 	**  Message/xxx types -- recurse exactly once.
354 	**
355 	**	Class 's' is predefined to have "rfc822" only.
356 	*/
357 
358 	if (strcasecmp(type, "message") == 0)
359 	{
360 		if (!wordinclass(subtype, 's'))
361 		{
362 			flags |= M87F_NO8BIT;
363 		}
364 		else
365 		{
366 			auto HDR *hdr = NULL;
367 
368 			putline("", mci);
369 
370 			mci->mci_flags |= MCIF_INMIME;
371 			collect(e->e_dfp, FALSE, &hdr, e);
372 			if (tTd(43, 101))
373 				putline("+++after collect", mci);
374 			putheader(mci, hdr, e, flags);
375 			if (tTd(43, 101))
376 				putline("+++after putheader", mci);
377 			if (hvalue("MIME-Version", hdr) == NULL)
378 				putline("MIME-Version: 1.0", mci);
379 			bt = mime8to7(mci, hdr, e, boundaries, flags);
380 			mci->mci_flags &= ~MCIF_INMIME;
381 			return bt;
382 		}
383 	}
384 
385 	/*
386 	**  Non-compound body type
387 	**
388 	**	Compute the ratio of seven to eight bit characters;
389 	**	use that as a heuristic to decide how to do the
390 	**	encoding.
391 	*/
392 
393 	sectionsize = sectionhighbits = 0;
394 	if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags))
395 	{
396 		/* remember where we were */
397 		offset = ftell(e->e_dfp);
398 		if (offset == -1)
399 			syserr("mime8to7: cannot ftell on df%s", e->e_id);
400 
401 		/* do a scan of this body type to count character types */
402 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
403 		{
404 			if (mimeboundary(buf, boundaries) != MBT_NOTSEP)
405 				break;
406 			for (p = buf; *p != '\0'; p++)
407 			{
408 				/* count bytes with the high bit set */
409 				sectionsize++;
410 				if (bitset(0200, *p))
411 					sectionhighbits++;
412 			}
413 
414 			/*
415 			**  Heuristic: if 1/4 of the first 4K bytes are 8-bit,
416 			**  assume base64.  This heuristic avoids double-reading
417 			**  large graphics or video files.
418 			*/
419 
420 			if (sectionsize >= 4096 &&
421 			    sectionhighbits > sectionsize / 4)
422 				break;
423 		}
424 
425 		/* return to the original offset for processing */
426 		/* XXX use relative seeks to handle >31 bit file sizes? */
427 		if (fseek(e->e_dfp, offset, SEEK_SET) < 0)
428 			syserr("mime8to7: cannot fseek on df%s", e->e_id);
429 		else
430 			clearerr(e->e_dfp);
431 	}
432 
433 	/*
434 	**  Heuristically determine encoding method.
435 	**	If more than 1/8 of the total characters have the
436 	**	eighth bit set, use base64; else use quoted-printable.
437 	**	However, only encode binary encoded data as base64,
438 	**	since otherwise the NL=>CRLF mapping will be a problem.
439 	*/
440 
441 	if (tTd(43, 8))
442 	{
443 		dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n",
444 			(long) sectionhighbits, (long) sectionsize,
445 			cte == NULL ? "[none]" : cte,
446 			type == NULL ? "[none]" : type,
447 			subtype == NULL ? "[none]" : subtype);
448 	}
449 	if (cte != NULL && strcasecmp(cte, "binary") == 0)
450 		sectionsize = sectionhighbits;
451 	linelen = 0;
452 	bp = buf;
453 	if (sectionhighbits == 0)
454 	{
455 		/* no encoding necessary */
456 		if (cte != NULL &&
457 		    bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME,
458 			   mci->mci_flags) &&
459 		    !bitset(M87F_NO8TO7, flags))
460 		{
461 			/*
462 			**  Skip _unless_ in MIME mode and potentially
463 			**  converting from 8 bit to 7 bit MIME.  See
464 			**  putheader() for the counterpart where the
465 			**  CTE header is skipped in the opposite
466 			**  situation.
467 			*/
468 
469 			snprintf(buf, sizeof buf,
470 				"Content-Transfer-Encoding: %.200s", cte);
471 			putline(buf, mci);
472 			if (tTd(43, 36))
473 				dprintf("  ...%s\n", buf);
474 		}
475 		putline("", mci);
476 		mci->mci_flags &= ~MCIF_INHEADER;
477 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
478 		{
479 			bt = mimeboundary(buf, boundaries);
480 			if (bt != MBT_NOTSEP)
481 				break;
482 			putline(buf, mci);
483 		}
484 		if (feof(e->e_dfp))
485 			bt = MBT_FINAL;
486 	}
487 	else if (!MapNLtoCRLF ||
488 		 (sectionsize / 8 < sectionhighbits && !use_qp))
489 	{
490 		/* use base64 encoding */
491 		int c1, c2;
492 
493 		if (tTd(43, 36))
494 			dprintf("  ...Content-Transfer-Encoding: base64\n");
495 		putline("Content-Transfer-Encoding: base64", mci);
496 		snprintf(buf, sizeof buf,
497 			"X-MIME-Autoconverted: from 8bit to base64 by %s id %s",
498 			MyHostName, e->e_id);
499 		putline(buf, mci);
500 		putline("", mci);
501 		mci->mci_flags &= ~MCIF_INHEADER;
502 		while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) != EOF)
503 		{
504 			if (linelen > 71)
505 			{
506 				*bp = '\0';
507 				putline(buf, mci);
508 				linelen = 0;
509 				bp = buf;
510 			}
511 			linelen += 4;
512 			*bp++ = Base64Code[(c1 >> 2)];
513 			c1 = (c1 & 0x03) << 4;
514 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
515 			if (c2 == EOF)
516 			{
517 				*bp++ = Base64Code[c1];
518 				*bp++ = '=';
519 				*bp++ = '=';
520 				break;
521 			}
522 			c1 |= (c2 >> 4) & 0x0f;
523 			*bp++ = Base64Code[c1];
524 			c1 = (c2 & 0x0f) << 2;
525 			c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt);
526 			if (c2 == EOF)
527 			{
528 				*bp++ = Base64Code[c1];
529 				*bp++ = '=';
530 				break;
531 			}
532 			c1 |= (c2 >> 6) & 0x03;
533 			*bp++ = Base64Code[c1];
534 			*bp++ = Base64Code[c2 & 0x3f];
535 		}
536 		*bp = '\0';
537 		putline(buf, mci);
538 	}
539 	else
540 	{
541 		/* use quoted-printable encoding */
542 		int c1, c2;
543 		int fromstate;
544 		BITMAP256 badchars;
545 
546 		/* set up map of characters that must be mapped */
547 		clrbitmap(badchars);
548 		for (c1 = 0x00; c1 < 0x20; c1++)
549 			setbitn(c1, badchars);
550 		clrbitn('\t', badchars);
551 		for (c1 = 0x7f; c1 < 0x100; c1++)
552 			setbitn(c1, badchars);
553 		setbitn('=', badchars);
554 		if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags))
555 			for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++)
556 				setbitn(*p, badchars);
557 
558 		if (tTd(43, 36))
559 			dprintf("  ...Content-Transfer-Encoding: quoted-printable\n");
560 		putline("Content-Transfer-Encoding: quoted-printable", mci);
561 		snprintf(buf, sizeof buf,
562 			"X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s",
563 			MyHostName, e->e_id);
564 		putline(buf, mci);
565 		putline("", mci);
566 		mci->mci_flags &= ~MCIF_INHEADER;
567 		fromstate = 0;
568 		c2 = '\n';
569 		while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != EOF)
570 		{
571 			if (c1 == '\n')
572 			{
573 				if (c2 == ' ' || c2 == '\t')
574 				{
575 					*bp++ = '=';
576 					*bp++ = Base16Code[(c2 >> 4) & 0x0f];
577 					*bp++ = Base16Code[c2 & 0x0f];
578 				}
579 				if (buf[0] == '.' && bp == &buf[1])
580 				{
581 					buf[0] = '=';
582 					*bp++ = Base16Code[('.' >> 4) & 0x0f];
583 					*bp++ = Base16Code['.' & 0x0f];
584 				}
585 				*bp = '\0';
586 				putline(buf, mci);
587 				linelen = fromstate = 0;
588 				bp = buf;
589 				c2 = c1;
590 				continue;
591 			}
592 			if (c2 == ' ' && linelen == 4 && fromstate == 4 &&
593 			    bitnset(M_ESCFROM, mci->mci_mailer->m_flags))
594 			{
595 				*bp++ = '=';
596 				*bp++ = '2';
597 				*bp++ = '0';
598 				linelen += 3;
599 			}
600 			else if (c2 == ' ' || c2 == '\t')
601 			{
602 				*bp++ = c2;
603 				linelen++;
604 			}
605 			if (linelen > 72 &&
606 			    (linelen > 75 || c1 != '.' ||
607 			     (linelen > 73 && c2 == '.')))
608 			{
609 				if (linelen > 73 && c2 == '.')
610 					bp--;
611 				else
612 					c2 = '\n';
613 				*bp++ = '=';
614 				*bp = '\0';
615 				putline(buf, mci);
616 				linelen = fromstate = 0;
617 				bp = buf;
618 				if (c2 == '.')
619 				{
620 					*bp++ = '.';
621 					linelen++;
622 				}
623 			}
624 			if (bitnset(c1 & 0xff, badchars))
625 			{
626 				*bp++ = '=';
627 				*bp++ = Base16Code[(c1 >> 4) & 0x0f];
628 				*bp++ = Base16Code[c1 & 0x0f];
629 				linelen += 3;
630 			}
631 			else if (c1 != ' ' && c1 != '\t')
632 			{
633 				if (linelen < 4 && c1 == "From"[linelen])
634 					fromstate++;
635 				*bp++ = c1;
636 				linelen++;
637 			}
638 			c2 = c1;
639 		}
640 
641 		/* output any saved character */
642 		if (c2 == ' ' || c2 == '\t')
643 		{
644 			*bp++ = '=';
645 			*bp++ = Base16Code[(c2 >> 4) & 0x0f];
646 			*bp++ = Base16Code[c2 & 0x0f];
647 			linelen += 3;
648 		}
649 
650 		if (linelen > 0 || boundaries[0] != NULL)
651 		{
652 			*bp = '\0';
653 			putline(buf, mci);
654 		}
655 
656 	}
657 	if (tTd(43, 3))
658 		dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]);
659 	return bt;
660 }
661 /*
662 **  MIME_GETCHAR -- get a character for MIME processing
663 **
664 **	Treats boundaries as EOF.
665 **
666 **	Parameters:
667 **		fp -- the input file.
668 **		boundaries -- the current MIME boundaries.
669 **		btp -- if the return value is EOF, *btp is set to
670 **			the type of the boundary.
671 **
672 **	Returns:
673 **		The next character in the input stream.
674 */
675 
676 static int
677 mime_getchar(fp, boundaries, btp)
678 	register FILE *fp;
679 	char **boundaries;
680 	int *btp;
681 {
682 	int c;
683 	static u_char *bp = NULL;
684 	static int buflen = 0;
685 	static bool atbol = TRUE;	/* at beginning of line */
686 	static int bt = MBT_SYNTAX;	/* boundary type of next EOF */
687 	static u_char buf[128];		/* need not be a full line */
688 	int start = 0;			/* indicates position of - in buffer */
689 
690 	if (buflen == 1 && *bp == '\n')
691 	{
692 		/* last \n in buffer may be part of next MIME boundary */
693 		c = *bp;
694 	}
695 	else if (buflen > 0)
696 	{
697 		buflen--;
698 		return *bp++;
699 	}
700 	else
701 		c = getc(fp);
702 	bp = buf;
703 	buflen = 0;
704 	if (c == '\n')
705 	{
706 		/* might be part of a MIME boundary */
707 		*bp++ = c;
708 		atbol = TRUE;
709 		c = getc(fp);
710 		if (c == '\n')
711 		{
712 			(void) ungetc(c, fp);
713 			return c;
714 		}
715 		start = 1;
716 	}
717 	if (c != EOF)
718 		*bp++ = c;
719 	else
720 		bt = MBT_FINAL;
721 	if (atbol && c == '-')
722 	{
723 		/* check for a message boundary */
724 		c = getc(fp);
725 		if (c != '-')
726 		{
727 			if (c != EOF)
728 				*bp++ = c;
729 			else
730 				bt = MBT_FINAL;
731 			buflen = bp - buf - 1;
732 			bp = buf;
733 			return *bp++;
734 		}
735 
736 		/* got "--", now check for rest of separator */
737 		*bp++ = '-';
738 		while (bp < &buf[sizeof buf - 2] &&
739 		       (c = getc(fp)) != EOF && c != '\n')
740 		{
741 			*bp++ = c;
742 		}
743 		*bp = '\0';
744 		bt = mimeboundary((char *) &buf[start], boundaries);
745 		switch (bt)
746 		{
747 		  case MBT_FINAL:
748 		  case MBT_INTERMED:
749 			/* we have a message boundary */
750 			buflen = 0;
751 			*btp = bt;
752 			return EOF;
753 		}
754 
755 		atbol = c == '\n';
756 		if (c != EOF)
757 			*bp++ = c;
758 	}
759 
760 	buflen = bp - buf - 1;
761 	if (buflen < 0)
762 	{
763 		*btp = bt;
764 		return EOF;
765 	}
766 	bp = buf;
767 	return *bp++;
768 }
769 /*
770 **  MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF
771 **
772 **	Parameters:
773 **		fp -- the input file.
774 **		boundaries -- the current MIME boundaries.
775 **		btp -- if the return value is EOF, *btp is set to
776 **			the type of the boundary.
777 **
778 **	Returns:
779 **		The next character in the input stream.
780 */
781 
782 static int
783 mime_getchar_crlf(fp, boundaries, btp)
784 	register FILE *fp;
785 	char **boundaries;
786 	int *btp;
787 {
788 	static bool sendlf = FALSE;
789 	int c;
790 
791 	if (sendlf)
792 	{
793 		sendlf = FALSE;
794 		return '\n';
795 	}
796 	c = mime_getchar(fp, boundaries, btp);
797 	if (c == '\n' && MapNLtoCRLF)
798 	{
799 		sendlf = TRUE;
800 		return '\r';
801 	}
802 	return c;
803 }
804 /*
805 **  MIMEBOUNDARY -- determine if this line is a MIME boundary & its type
806 **
807 **	Parameters:
808 **		line -- the input line.
809 **		boundaries -- the set of currently pending boundaries.
810 **
811 **	Returns:
812 **		MBT_NOTSEP -- if this is not a separator line
813 **		MBT_INTERMED -- if this is an intermediate separator
814 **		MBT_FINAL -- if this is a final boundary
815 **		MBT_SYNTAX -- if this is a boundary for the wrong
816 **			enclosure -- i.e., a syntax error.
817 */
818 
819 static int
820 mimeboundary(line, boundaries)
821 	register char *line;
822 	char **boundaries;
823 {
824 	int type = MBT_NOTSEP;
825 	int i;
826 	int savec;
827 
828 	if (line[0] != '-' || line[1] != '-' || boundaries == NULL)
829 		return MBT_NOTSEP;
830 	i = strlen(line);
831 	if (line[i - 1] == '\n')
832 		i--;
833 
834 	/* strip off trailing whitespace */
835 	while (line[i - 1] == ' ' || line[i - 1] == '\t')
836 		i--;
837 	savec = line[i];
838 	line[i] = '\0';
839 
840 	if (tTd(43, 5))
841 		dprintf("mimeboundary: line=\"%s\"... ", line);
842 
843 	/* check for this as an intermediate boundary */
844 	if (isboundary(&line[2], boundaries) >= 0)
845 		type = MBT_INTERMED;
846 	else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0)
847 	{
848 		/* check for a final boundary */
849 		line[i - 2] = '\0';
850 		if (isboundary(&line[2], boundaries) >= 0)
851 			type = MBT_FINAL;
852 		line[i - 2] = '-';
853 	}
854 
855 	line[i] = savec;
856 	if (tTd(43, 5))
857 		dprintf("%s\n", MimeBoundaryNames[type]);
858 	return type;
859 }
860 /*
861 **  DEFCHARSET -- return default character set for message
862 **
863 **	The first choice for character set is for the mailer
864 **	corresponding to the envelope sender.  If neither that
865 **	nor the global configuration file has a default character
866 **	set defined, return "unknown-8bit" as recommended by
867 **	RFC 1428 section 3.
868 **
869 **	Parameters:
870 **		e -- the envelope for this message.
871 **
872 **	Returns:
873 **		The default character set for that mailer.
874 */
875 
876 char *
877 defcharset(e)
878 	register ENVELOPE *e;
879 {
880 	if (e != NULL && e->e_from.q_mailer != NULL &&
881 	    e->e_from.q_mailer->m_defcharset != NULL)
882 		return e->e_from.q_mailer->m_defcharset;
883 	if (DefaultCharSet != NULL)
884 		return DefaultCharSet;
885 	return "unknown-8bit";
886 }
887 /*
888 **  ISBOUNDARY -- is a given string a currently valid boundary?
889 **
890 **	Parameters:
891 **		line -- the current input line.
892 **		boundaries -- the list of valid boundaries.
893 **
894 **	Returns:
895 **		The index number in boundaries if the line is found.
896 **		-1 -- otherwise.
897 **
898 */
899 
900 static int
901 isboundary(line, boundaries)
902 	char *line;
903 	char **boundaries;
904 {
905 	register int i;
906 
907 	for (i = 0; boundaries[i] != NULL; i++)
908 	{
909 		if (strcmp(line, boundaries[i]) == 0)
910 			return i;
911 	}
912 	return -1;
913 }
914 #endif /* MIME8TO7 */
915 
916 #if MIME7TO8
917 
918 /*
919 **  MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format
920 **
921 **  This is a hack. Supports translating the two 7-bit body-encodings
922 **  (quoted-printable and base64) to 8-bit coded bodies.
923 **
924 **  There is not much point in supporting multipart here, as the UA
925 **  will be able to deal with encoded MIME bodies if it can parse MIME
926 **  multipart messages.
927 **
928 **  Note also that we wont be called unless it is a text/plain MIME
929 **  message, encoded base64 or QP and mailer flag '9' has been defined
930 **  on mailer.
931 **
932 **  Contributed by Marius Olaffson <marius@rhi.hi.is>.
933 **
934 **	Parameters:
935 **		mci -- mailer connection information.
936 **		header -- the header for this body part.
937 **		e -- envelope.
938 **
939 **	Returns:
940 **		none.
941 */
942 
943 static char index_64[128] =
944 {
945 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
946 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
947 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
948 	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1,
949 	-1, 0, 1, 2,  3, 4, 5, 6,  7, 8, 9,10, 11,12,13,14,
950 	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
951 	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
952 	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
953 };
954 
955 # define CHAR64(c)  (((c) < 0 || (c) > 127) ? -1 : index_64[(c)])
956 
957 void
958 mime7to8(mci, header, e)
959 	register MCI *mci;
960 	HDR *header;
961 	register ENVELOPE *e;
962 {
963 	register char *p;
964 	char *cte;
965 	char **pvp;
966 	u_char *fbufp;
967 	char buf[MAXLINE];
968 	u_char fbuf[MAXLINE + 1];
969 	char pvpbuf[MAXLINE];
970 	extern u_char MimeTokenTab[256];
971 
972 	p = hvalue("Content-Transfer-Encoding", header);
973 	if (p == NULL ||
974 	    (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL,
975 			   MimeTokenTab)) == NULL ||
976 	    pvp[0] == NULL)
977 	{
978 		/* "can't happen" -- upper level should have caught this */
979 		syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p);
980 
981 		/* avoid bounce loops */
982 		e->e_flags |= EF_DONT_MIME;
983 
984 		/* cheap failsafe algorithm -- should work on text/plain */
985 		if (p != NULL)
986 		{
987 			snprintf(buf, sizeof buf,
988 				"Content-Transfer-Encoding: %s", p);
989 			putline(buf, mci);
990 		}
991 		putline("", mci);
992 		mci->mci_flags &= ~MCIF_INHEADER;
993 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
994 			putline(buf, mci);
995 		return;
996 	}
997 	cataddr(pvp, NULL, buf, sizeof buf, '\0');
998 	cte = newstr(buf);
999 
1000 	mci->mci_flags |= MCIF_INHEADER;
1001 	putline("Content-Transfer-Encoding: 8bit", mci);
1002 	snprintf(buf, sizeof buf,
1003 		"X-MIME-Autoconverted: from %.200s to 8bit by %s id %s",
1004 		cte, MyHostName, e->e_id);
1005 	putline(buf, mci);
1006 	putline("", mci);
1007 	mci->mci_flags &= ~MCIF_INHEADER;
1008 
1009 	/*
1010 	**  Translate body encoding to 8-bit.  Supports two types of
1011 	**  encodings; "base64" and "quoted-printable". Assume qp if
1012 	**  it is not base64.
1013 	*/
1014 
1015 	if (strcasecmp(cte, "base64") == 0)
1016 	{
1017 		int c1, c2, c3, c4;
1018 
1019 		fbufp = fbuf;
1020 		while ((c1 = fgetc(e->e_dfp)) != EOF)
1021 		{
1022 			if (isascii(c1) && isspace(c1))
1023 				continue;
1024 
1025 			do
1026 			{
1027 				c2 = fgetc(e->e_dfp);
1028 			} while (isascii(c2) && isspace(c2));
1029 			if (c2 == EOF)
1030 				break;
1031 
1032 			do
1033 			{
1034 				c3 = fgetc(e->e_dfp);
1035 			} while (isascii(c3) && isspace(c3));
1036 			if (c3 == EOF)
1037 				break;
1038 
1039 			do
1040 			{
1041 				c4 = fgetc(e->e_dfp);
1042 			} while (isascii(c4) && isspace(c4));
1043 			if (c4 == EOF)
1044 				break;
1045 
1046 			if (c1 == '=' || c2 == '=')
1047 				continue;
1048 			c1 = CHAR64(c1);
1049 			c2 = CHAR64(c2);
1050 
1051 			*fbufp = (c1 << 2) | ((c2 & 0x30) >> 4);
1052 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1053 			{
1054 				if (*--fbufp != '\n' ||
1055 				    (fbufp > fbuf && *--fbufp != '\r'))
1056 					fbufp++;
1057 				putxline((char *) fbuf, fbufp - fbuf,
1058 					 mci, PXLF_MAPFROM);
1059 				fbufp = fbuf;
1060 			}
1061 			if (c3 == '=')
1062 				continue;
1063 			c3 = CHAR64(c3);
1064 			*fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2);
1065 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1066 			{
1067 				if (*--fbufp != '\n' ||
1068 				    (fbufp > fbuf && *--fbufp != '\r'))
1069 					fbufp++;
1070 				putxline((char *) fbuf, fbufp - fbuf,
1071 					 mci, PXLF_MAPFROM);
1072 				fbufp = fbuf;
1073 			}
1074 			if (c4 == '=')
1075 				continue;
1076 			c4 = CHAR64(c4);
1077 			*fbufp = ((c3 & 0x03) << 6) | c4;
1078 			if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE])
1079 			{
1080 				if (*--fbufp != '\n' ||
1081 				    (fbufp > fbuf && *--fbufp != '\r'))
1082 					fbufp++;
1083 				putxline((char *) fbuf, fbufp - fbuf,
1084 					 mci, PXLF_MAPFROM);
1085 				fbufp = fbuf;
1086 			}
1087 		}
1088 	}
1089 	else
1090 	{
1091 		/* quoted-printable */
1092 		fbufp = fbuf;
1093 		while (fgets(buf, sizeof buf, e->e_dfp) != NULL)
1094 		{
1095 			if (mime_fromqp((u_char *) buf, &fbufp, 0,
1096 					&fbuf[MAXLINE] - fbufp) == 0)
1097 				continue;
1098 
1099 			if (fbufp - fbuf > 0)
1100 				putxline((char *) fbuf, fbufp - fbuf - 1, mci,
1101 					 PXLF_MAPFROM);
1102 			fbufp = fbuf;
1103 		}
1104 	}
1105 
1106 	/* force out partial last line */
1107 	if (fbufp > fbuf)
1108 	{
1109 		*fbufp = '\0';
1110 		putxline((char *) fbuf, fbufp - fbuf, mci, PXLF_MAPFROM);
1111 	}
1112 	if (tTd(43, 3))
1113 		dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte);
1114 }
1115 /*
1116 **  The following is based on Borenstein's "codes.c" module, with simplifying
1117 **  changes as we do not deal with multipart, and to do the translation in-core,
1118 **  with an attempt to prevent overrun of output buffers.
1119 **
1120 **  What is needed here are changes to defned this code better against
1121 **  bad encodings. Questionable to always return 0xFF for bad mappings.
1122 */
1123 
1124 static char index_hex[128] =
1125 {
1126 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1127 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1128 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1129 	0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
1130 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1131 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1132 	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1133 	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1134 };
1135 
1136 # define HEXCHAR(c)  (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)])
1137 
1138 static int
1139 mime_fromqp(infile, outfile, state, maxlen)
1140 	u_char *infile;
1141 	u_char **outfile;
1142 	int state;		/* Decoding body (0) or header (1) */
1143 	int maxlen;		/* Max # of chars allowed in outfile */
1144 {
1145 	int c1, c2;
1146 	int nchar = 0;
1147 
1148 	while ((c1 = *infile++) != '\0')
1149 	{
1150 		if (c1 == '=')
1151 		{
1152 			if ((c1 = *infile++) == 0)
1153 				break;
1154 
1155 			if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1)
1156 			{
1157 				/* ignore it */
1158 				if (state == 0)
1159 					return 0;
1160 			}
1161 			else
1162 			{
1163 				do
1164 				{
1165 					if ((c2 = *infile++) == '\0')
1166 					{
1167 						c2 = -1;
1168 						break;
1169 					}
1170 				} while ((c2 = HEXCHAR(c2)) == -1);
1171 
1172 				if (c2 == -1 || ++nchar > maxlen)
1173 					break;
1174 
1175 				*(*outfile)++ = c1 << 4 | c2;
1176 			}
1177 		}
1178 		else
1179 		{
1180 			if (state == 1 && c1 == '_')
1181 				c1 = ' ';
1182 
1183 			if (++nchar > maxlen)
1184 				break;
1185 
1186 			*(*outfile)++ = c1;
1187 
1188 			if (c1 == '\n')
1189 				break;
1190 		}
1191 	}
1192 	*(*outfile)++ = '\0';
1193 	return 1;
1194 }
1195 #endif /* MIME7TO8 */
1196