1 /* 2 * Copyright (c) 1998-2003 Sendmail, Inc. and its suppliers. 3 * All rights reserved. 4 * Copyright (c) 1994, 1996-1997 Eric P. Allman. All rights reserved. 5 * Copyright (c) 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * By using this file, you agree to the terms and conditions set 9 * forth in the LICENSE file which can be found at the top level of 10 * the sendmail distribution. 11 * 12 */ 13 14 #pragma ident "%Z%%M% %I% %E% SMI" 15 16 #include <sendmail.h> 17 #include <string.h> 18 19 SM_RCSID("@(#)$Id: mime.c,v 8.139 2006/03/01 18:07:45 ca Exp $") 20 21 /* 22 ** MIME support. 23 ** 24 ** I am indebted to John Beck of Hewlett-Packard, who contributed 25 ** his code to me for inclusion. As it turns out, I did not use 26 ** his code since he used a "minimum change" approach that used 27 ** several temp files, and I wanted a "minimum impact" approach 28 ** that would avoid copying. However, looking over his code 29 ** helped me cement my understanding of the problem. 30 ** 31 ** I also looked at, but did not directly use, Nathaniel 32 ** Borenstein's "code.c" module. Again, it functioned as 33 ** a file-to-file translator, which did not fit within my 34 ** design bounds, but it was a useful base for understanding 35 ** the problem. 36 */ 37 38 /* use "old" mime 7 to 8 algorithm by default */ 39 #ifndef MIME7TO8_OLD 40 # define MIME7TO8_OLD 1 41 #endif /* ! MIME7TO8_OLD */ 42 43 #if MIME8TO7 44 static int isboundary __P((char *, char **)); 45 static int mimeboundary __P((char *, char **)); 46 static int mime_getchar __P((SM_FILE_T *, char **, int *)); 47 static int mime_getchar_crlf __P((SM_FILE_T *, char **, int *)); 48 49 /* character set for hex and base64 encoding */ 50 static char Base16Code[] = "0123456789ABCDEF"; 51 static char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 52 53 /* types of MIME boundaries */ 54 # define MBT_SYNTAX 0 /* syntax error */ 55 # define MBT_NOTSEP 1 /* not a boundary */ 56 # define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 57 # define MBT_FINAL 3 /* final boundary (trailing -- included) */ 58 59 static char *MimeBoundaryNames[] = 60 { 61 "SYNTAX", "NOTSEP", "INTERMED", "FINAL" 62 }; 63 64 static bool MapNLtoCRLF; 65 66 /* 67 ** MIME8TO7 -- output 8 bit body in 7 bit format 68 ** 69 ** The header has already been output -- this has to do the 70 ** 8 to 7 bit conversion. It would be easy if we didn't have 71 ** to deal with nested formats (multipart/xxx and message/rfc822). 72 ** 73 ** We won't be called if we don't have to do a conversion, and 74 ** appropriate MIME-Version: and Content-Type: fields have been 75 ** output. Any Content-Transfer-Encoding: field has not been 76 ** output, and we can add it here. 77 ** 78 ** Parameters: 79 ** mci -- mailer connection information. 80 ** header -- the header for this body part. 81 ** e -- envelope. 82 ** boundaries -- the currently pending message boundaries. 83 ** NULL if we are processing the outer portion. 84 ** flags -- to tweak processing. 85 ** 86 ** Returns: 87 ** An indicator of what terminated the message part: 88 ** MBT_FINAL -- the final boundary 89 ** MBT_INTERMED -- an intermediate boundary 90 ** MBT_NOTSEP -- an end of file 91 ** SM_IO_EOF -- I/O error occurred 92 */ 93 94 struct args 95 { 96 char *a_field; /* name of field */ 97 char *a_value; /* value of that field */ 98 }; 99 100 int 101 mime8to7(mci, header, e, boundaries, flags) 102 register MCI *mci; 103 HDR *header; 104 register ENVELOPE *e; 105 char **boundaries; 106 int flags; 107 { 108 register char *p; 109 int linelen; 110 int bt; 111 off_t offset; 112 size_t sectionsize, sectionhighbits; 113 int i; 114 char *type; 115 char *subtype; 116 char *cte; 117 char **pvp; 118 int argc = 0; 119 char *bp; 120 bool use_qp = false; 121 struct args argv[MAXMIMEARGS]; 122 char bbuf[128]; 123 char buf[MAXLINE]; 124 char pvpbuf[MAXLINE]; 125 extern unsigned char MimeTokenTab[256]; 126 127 if (tTd(43, 1)) 128 { 129 sm_dprintf("mime8to7: flags = %x, boundaries =", flags); 130 if (boundaries[0] == NULL) 131 sm_dprintf(" <none>"); 132 else 133 { 134 for (i = 0; boundaries[i] != NULL; i++) 135 sm_dprintf(" %s", boundaries[i]); 136 } 137 sm_dprintf("\n"); 138 } 139 MapNLtoCRLF = true; 140 p = hvalue("Content-Transfer-Encoding", header); 141 if (p == NULL || 142 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 143 MimeTokenTab, false)) == NULL || 144 pvp[0] == NULL) 145 { 146 cte = NULL; 147 } 148 else 149 { 150 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 151 cte = sm_rpool_strdup_x(e->e_rpool, buf); 152 } 153 154 type = subtype = NULL; 155 p = hvalue("Content-Type", header); 156 if (p == NULL) 157 { 158 if (bitset(M87F_DIGEST, flags)) 159 p = "message/rfc822"; 160 else 161 p = "text/plain"; 162 } 163 if (p != NULL && 164 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 165 MimeTokenTab, false)) != NULL && 166 pvp[0] != NULL) 167 { 168 if (tTd(43, 40)) 169 { 170 for (i = 0; pvp[i] != NULL; i++) 171 sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]); 172 } 173 type = *pvp++; 174 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 175 *++pvp != NULL) 176 { 177 subtype = *pvp++; 178 } 179 180 /* break out parameters */ 181 while (*pvp != NULL && argc < MAXMIMEARGS) 182 { 183 /* skip to semicolon separator */ 184 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 185 pvp++; 186 if (*pvp++ == NULL || *pvp == NULL) 187 break; 188 189 /* complain about empty values */ 190 if (strcmp(*pvp, ";") == 0) 191 { 192 usrerr("mime8to7: Empty parameter in Content-Type header"); 193 194 /* avoid bounce loops */ 195 e->e_flags |= EF_DONT_MIME; 196 continue; 197 } 198 199 /* extract field name */ 200 argv[argc].a_field = *pvp++; 201 202 /* see if there is a value */ 203 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 204 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 205 { 206 argv[argc].a_value = *pvp; 207 argc++; 208 } 209 } 210 } 211 212 /* check for disaster cases */ 213 if (type == NULL) 214 type = "-none-"; 215 if (subtype == NULL) 216 subtype = "-none-"; 217 218 /* don't propogate some flags more than one level into the message */ 219 flags &= ~M87F_DIGEST; 220 221 /* 222 ** Check for cases that can not be encoded. 223 ** 224 ** For example, you can't encode certain kinds of types 225 ** or already-encoded messages. If we find this case, 226 ** just copy it through. 227 */ 228 229 (void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype); 230 if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e'))) 231 flags |= M87F_NO8BIT; 232 233 # ifdef USE_B_CLASS 234 if (wordinclass(buf, 'b') || wordinclass(type, 'b')) 235 MapNLtoCRLF = false; 236 # endif /* USE_B_CLASS */ 237 if (wordinclass(buf, 'q') || wordinclass(type, 'q')) 238 use_qp = true; 239 240 /* 241 ** Multipart requires special processing. 242 ** 243 ** Do a recursive descent into the message. 244 */ 245 246 if (sm_strcasecmp(type, "multipart") == 0 && 247 (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags))) 248 { 249 250 if (sm_strcasecmp(subtype, "digest") == 0) 251 flags |= M87F_DIGEST; 252 253 for (i = 0; i < argc; i++) 254 { 255 if (sm_strcasecmp(argv[i].a_field, "boundary") == 0) 256 break; 257 } 258 if (i >= argc || argv[i].a_value == NULL) 259 { 260 usrerr("mime8to7: Content-Type: \"%s\": %s boundary", 261 i >= argc ? "missing" : "bogus", p); 262 p = "---"; 263 264 /* avoid bounce loops */ 265 e->e_flags |= EF_DONT_MIME; 266 } 267 else 268 { 269 p = argv[i].a_value; 270 stripquotes(p); 271 } 272 if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf) 273 { 274 usrerr("mime8to7: multipart boundary \"%s\" too long", 275 p); 276 277 /* avoid bounce loops */ 278 e->e_flags |= EF_DONT_MIME; 279 } 280 281 if (tTd(43, 1)) 282 sm_dprintf("mime8to7: multipart boundary \"%s\"\n", 283 bbuf); 284 for (i = 0; i < MAXMIMENESTING; i++) 285 { 286 if (boundaries[i] == NULL) 287 break; 288 } 289 if (i >= MAXMIMENESTING) 290 { 291 usrerr("mime8to7: multipart nesting boundary too deep"); 292 293 /* avoid bounce loops */ 294 e->e_flags |= EF_DONT_MIME; 295 } 296 else 297 { 298 boundaries[i] = bbuf; 299 boundaries[i + 1] = NULL; 300 } 301 mci->mci_flags |= MCIF_INMIME; 302 303 /* skip the early "comment" prologue */ 304 if (!putline("", mci)) 305 goto writeerr; 306 mci->mci_flags &= ~MCIF_INHEADER; 307 bt = MBT_FINAL; 308 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 309 != NULL) 310 { 311 bt = mimeboundary(buf, boundaries); 312 if (bt != MBT_NOTSEP) 313 break; 314 if (!putxline(buf, strlen(buf), mci, 315 PXLF_MAPFROM|PXLF_STRIP8BIT)) 316 goto writeerr; 317 if (tTd(43, 99)) 318 sm_dprintf(" ...%s", buf); 319 } 320 if (sm_io_eof(e->e_dfp)) 321 bt = MBT_FINAL; 322 while (bt != MBT_FINAL) 323 { 324 auto HDR *hdr = NULL; 325 326 (void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf); 327 if (!putline(buf, mci)) 328 goto writeerr; 329 if (tTd(43, 35)) 330 sm_dprintf(" ...%s\n", buf); 331 collect(e->e_dfp, false, &hdr, e, false); 332 if (tTd(43, 101)) 333 putline("+++after collect", mci); 334 if (!putheader(mci, hdr, e, flags)) 335 goto writeerr; 336 if (tTd(43, 101)) 337 putline("+++after putheader", mci); 338 bt = mime8to7(mci, hdr, e, boundaries, flags); 339 if (bt == SM_IO_EOF) 340 goto writeerr; 341 } 342 (void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--"); 343 if (!putline(buf, mci)) 344 goto writeerr; 345 if (tTd(43, 35)) 346 sm_dprintf(" ...%s\n", buf); 347 boundaries[i] = NULL; 348 mci->mci_flags &= ~MCIF_INMIME; 349 350 /* skip the late "comment" epilogue */ 351 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 352 != NULL) 353 { 354 bt = mimeboundary(buf, boundaries); 355 if (bt != MBT_NOTSEP) 356 break; 357 if (!putxline(buf, strlen(buf), mci, 358 PXLF_MAPFROM|PXLF_STRIP8BIT)) 359 goto writeerr; 360 if (tTd(43, 99)) 361 sm_dprintf(" ...%s", buf); 362 } 363 if (sm_io_eof(e->e_dfp)) 364 bt = MBT_FINAL; 365 if (tTd(43, 3)) 366 sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n", 367 MimeBoundaryNames[bt]); 368 return bt; 369 } 370 371 /* 372 ** Message/xxx types -- recurse exactly once. 373 ** 374 ** Class 's' is predefined to have "rfc822" only. 375 */ 376 377 if (sm_strcasecmp(type, "message") == 0) 378 { 379 if (!wordinclass(subtype, 's')) 380 { 381 flags |= M87F_NO8BIT; 382 } 383 else 384 { 385 auto HDR *hdr = NULL; 386 387 if (!putline("", mci)) 388 goto writeerr; 389 390 mci->mci_flags |= MCIF_INMIME; 391 collect(e->e_dfp, false, &hdr, e, false); 392 if (tTd(43, 101)) 393 putline("+++after collect", mci); 394 if (!putheader(mci, hdr, e, flags)) 395 goto writeerr; 396 if (tTd(43, 101)) 397 putline("+++after putheader", mci); 398 if (hvalue("MIME-Version", hdr) == NULL && 399 !bitset(M87F_NO8TO7, flags) && 400 !putline("MIME-Version: 1.0", mci)) 401 goto writeerr; 402 bt = mime8to7(mci, hdr, e, boundaries, flags); 403 mci->mci_flags &= ~MCIF_INMIME; 404 return bt; 405 } 406 } 407 408 /* 409 ** Non-compound body type 410 ** 411 ** Compute the ratio of seven to eight bit characters; 412 ** use that as a heuristic to decide how to do the 413 ** encoding. 414 */ 415 416 sectionsize = sectionhighbits = 0; 417 if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags)) 418 { 419 /* remember where we were */ 420 offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT); 421 if (offset == -1) 422 syserr("mime8to7: cannot sm_io_tell on %cf%s", 423 DATAFL_LETTER, e->e_id); 424 425 /* do a scan of this body type to count character types */ 426 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 427 != NULL) 428 { 429 if (mimeboundary(buf, boundaries) != MBT_NOTSEP) 430 break; 431 for (p = buf; *p != '\0'; p++) 432 { 433 /* count bytes with the high bit set */ 434 sectionsize++; 435 if (bitset(0200, *p)) 436 sectionhighbits++; 437 } 438 439 /* 440 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 441 ** assume base64. This heuristic avoids double-reading 442 ** large graphics or video files. 443 */ 444 445 if (sectionsize >= 4096 && 446 sectionhighbits > sectionsize / 4) 447 break; 448 } 449 450 /* return to the original offset for processing */ 451 /* XXX use relative seeks to handle >31 bit file sizes? */ 452 if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0) 453 syserr("mime8to7: cannot sm_io_fseek on %cf%s", 454 DATAFL_LETTER, e->e_id); 455 else 456 sm_io_clearerr(e->e_dfp); 457 } 458 459 /* 460 ** Heuristically determine encoding method. 461 ** If more than 1/8 of the total characters have the 462 ** eighth bit set, use base64; else use quoted-printable. 463 ** However, only encode binary encoded data as base64, 464 ** since otherwise the NL=>CRLF mapping will be a problem. 465 */ 466 467 if (tTd(43, 8)) 468 { 469 sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n", 470 (long) sectionhighbits, (long) sectionsize, 471 cte == NULL ? "[none]" : cte, 472 type == NULL ? "[none]" : type, 473 subtype == NULL ? "[none]" : subtype); 474 } 475 if (cte != NULL && sm_strcasecmp(cte, "binary") == 0) 476 sectionsize = sectionhighbits; 477 linelen = 0; 478 bp = buf; 479 if (sectionhighbits == 0) 480 { 481 /* no encoding necessary */ 482 if (cte != NULL && 483 bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME, 484 mci->mci_flags) && 485 !bitset(M87F_NO8TO7, flags)) 486 { 487 /* 488 ** Skip _unless_ in MIME mode and potentially 489 ** converting from 8 bit to 7 bit MIME. See 490 ** putheader() for the counterpart where the 491 ** CTE header is skipped in the opposite 492 ** situation. 493 */ 494 495 (void) sm_snprintf(buf, sizeof buf, 496 "Content-Transfer-Encoding: %.200s", cte); 497 if (!putline(buf, mci)) 498 goto writeerr; 499 if (tTd(43, 36)) 500 sm_dprintf(" ...%s\n", buf); 501 } 502 if (!putline("", mci)) 503 goto writeerr; 504 mci->mci_flags &= ~MCIF_INHEADER; 505 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 506 != NULL) 507 { 508 bt = mimeboundary(buf, boundaries); 509 if (bt != MBT_NOTSEP) 510 break; 511 if (!putline(buf, mci)) 512 goto writeerr; 513 } 514 if (sm_io_eof(e->e_dfp)) 515 bt = MBT_FINAL; 516 } 517 else if (!MapNLtoCRLF || 518 (sectionsize / 8 < sectionhighbits && !use_qp)) 519 { 520 /* use base64 encoding */ 521 int c1, c2; 522 523 if (tTd(43, 36)) 524 sm_dprintf(" ...Content-Transfer-Encoding: base64\n"); 525 if (!putline("Content-Transfer-Encoding: base64", mci)) 526 goto writeerr; 527 (void) sm_snprintf(buf, sizeof buf, 528 "X-MIME-Autoconverted: from 8bit to base64 by %s id %s", 529 MyHostName, e->e_id); 530 if (!putline(buf, mci) || !putline("", mci)) 531 goto writeerr; 532 mci->mci_flags &= ~MCIF_INHEADER; 533 while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) != 534 SM_IO_EOF) 535 { 536 if (linelen > 71) 537 { 538 *bp = '\0'; 539 if (!putline(buf, mci)) 540 goto writeerr; 541 linelen = 0; 542 bp = buf; 543 } 544 linelen += 4; 545 *bp++ = Base64Code[(c1 >> 2)]; 546 c1 = (c1 & 0x03) << 4; 547 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 548 if (c2 == SM_IO_EOF) 549 { 550 *bp++ = Base64Code[c1]; 551 *bp++ = '='; 552 *bp++ = '='; 553 break; 554 } 555 c1 |= (c2 >> 4) & 0x0f; 556 *bp++ = Base64Code[c1]; 557 c1 = (c2 & 0x0f) << 2; 558 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 559 if (c2 == SM_IO_EOF) 560 { 561 *bp++ = Base64Code[c1]; 562 *bp++ = '='; 563 break; 564 } 565 c1 |= (c2 >> 6) & 0x03; 566 *bp++ = Base64Code[c1]; 567 *bp++ = Base64Code[c2 & 0x3f]; 568 } 569 *bp = '\0'; 570 if (!putline(buf, mci)) 571 goto writeerr; 572 } 573 else 574 { 575 /* use quoted-printable encoding */ 576 int c1, c2; 577 int fromstate; 578 BITMAP256 badchars; 579 580 /* set up map of characters that must be mapped */ 581 clrbitmap(badchars); 582 for (c1 = 0x00; c1 < 0x20; c1++) 583 setbitn(c1, badchars); 584 clrbitn('\t', badchars); 585 for (c1 = 0x7f; c1 < 0x100; c1++) 586 setbitn(c1, badchars); 587 setbitn('=', badchars); 588 if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags)) 589 for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++) 590 setbitn(*p, badchars); 591 592 if (tTd(43, 36)) 593 sm_dprintf(" ...Content-Transfer-Encoding: quoted-printable\n"); 594 if (!putline("Content-Transfer-Encoding: quoted-printable", 595 mci)) 596 goto writeerr; 597 (void) sm_snprintf(buf, sizeof buf, 598 "X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s", 599 MyHostName, e->e_id); 600 if (!putline(buf, mci) || !putline("", mci)) 601 goto writeerr; 602 mci->mci_flags &= ~MCIF_INHEADER; 603 fromstate = 0; 604 c2 = '\n'; 605 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != 606 SM_IO_EOF) 607 { 608 if (c1 == '\n') 609 { 610 if (c2 == ' ' || c2 == '\t') 611 { 612 *bp++ = '='; 613 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 614 *bp++ = Base16Code[c2 & 0x0f]; 615 } 616 if (buf[0] == '.' && bp == &buf[1]) 617 { 618 buf[0] = '='; 619 *bp++ = Base16Code[('.' >> 4) & 0x0f]; 620 *bp++ = Base16Code['.' & 0x0f]; 621 } 622 *bp = '\0'; 623 if (!putline(buf, mci)) 624 goto writeerr; 625 linelen = fromstate = 0; 626 bp = buf; 627 c2 = c1; 628 continue; 629 } 630 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 631 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 632 { 633 *bp++ = '='; 634 *bp++ = '2'; 635 *bp++ = '0'; 636 linelen += 3; 637 } 638 else if (c2 == ' ' || c2 == '\t') 639 { 640 *bp++ = c2; 641 linelen++; 642 } 643 if (linelen > 72 && 644 (linelen > 75 || c1 != '.' || 645 (linelen > 73 && c2 == '.'))) 646 { 647 if (linelen > 73 && c2 == '.') 648 bp--; 649 else 650 c2 = '\n'; 651 *bp++ = '='; 652 *bp = '\0'; 653 if (!putline(buf, mci)) 654 goto writeerr; 655 linelen = fromstate = 0; 656 bp = buf; 657 if (c2 == '.') 658 { 659 *bp++ = '.'; 660 linelen++; 661 } 662 } 663 if (bitnset(bitidx(c1), badchars)) 664 { 665 *bp++ = '='; 666 *bp++ = Base16Code[(c1 >> 4) & 0x0f]; 667 *bp++ = Base16Code[c1 & 0x0f]; 668 linelen += 3; 669 } 670 else if (c1 != ' ' && c1 != '\t') 671 { 672 if (linelen < 4 && c1 == "From"[linelen]) 673 fromstate++; 674 *bp++ = c1; 675 linelen++; 676 } 677 c2 = c1; 678 } 679 680 /* output any saved character */ 681 if (c2 == ' ' || c2 == '\t') 682 { 683 *bp++ = '='; 684 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 685 *bp++ = Base16Code[c2 & 0x0f]; 686 linelen += 3; 687 } 688 689 if (linelen > 0 || boundaries[0] != NULL) 690 { 691 *bp = '\0'; 692 if (!putline(buf, mci)) 693 goto writeerr; 694 } 695 696 } 697 if (tTd(43, 3)) 698 sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]); 699 return bt; 700 701 writeerr: 702 return SM_IO_EOF; 703 } 704 /* 705 ** MIME_GETCHAR -- get a character for MIME processing 706 ** 707 ** Treats boundaries as SM_IO_EOF. 708 ** 709 ** Parameters: 710 ** fp -- the input file. 711 ** boundaries -- the current MIME boundaries. 712 ** btp -- if the return value is SM_IO_EOF, *btp is set to 713 ** the type of the boundary. 714 ** 715 ** Returns: 716 ** The next character in the input stream. 717 */ 718 719 static int 720 mime_getchar(fp, boundaries, btp) 721 register SM_FILE_T *fp; 722 char **boundaries; 723 int *btp; 724 { 725 int c; 726 static unsigned char *bp = NULL; 727 static int buflen = 0; 728 static bool atbol = true; /* at beginning of line */ 729 static int bt = MBT_SYNTAX; /* boundary type of next SM_IO_EOF */ 730 static unsigned char buf[128]; /* need not be a full line */ 731 int start = 0; /* indicates position of - in buffer */ 732 733 if (buflen == 1 && *bp == '\n') 734 { 735 /* last \n in buffer may be part of next MIME boundary */ 736 c = *bp; 737 } 738 else if (buflen > 0) 739 { 740 buflen--; 741 return *bp++; 742 } 743 else 744 c = sm_io_getc(fp, SM_TIME_DEFAULT); 745 bp = buf; 746 buflen = 0; 747 if (c == '\n') 748 { 749 /* might be part of a MIME boundary */ 750 *bp++ = c; 751 atbol = true; 752 c = sm_io_getc(fp, SM_TIME_DEFAULT); 753 if (c == '\n') 754 { 755 (void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c); 756 return c; 757 } 758 start = 1; 759 } 760 if (c != SM_IO_EOF) 761 *bp++ = c; 762 else 763 bt = MBT_FINAL; 764 if (atbol && c == '-') 765 { 766 /* check for a message boundary */ 767 c = sm_io_getc(fp, SM_TIME_DEFAULT); 768 if (c != '-') 769 { 770 if (c != SM_IO_EOF) 771 *bp++ = c; 772 else 773 bt = MBT_FINAL; 774 buflen = bp - buf - 1; 775 bp = buf; 776 return *bp++; 777 } 778 779 /* got "--", now check for rest of separator */ 780 *bp++ = '-'; 781 while (bp < &buf[sizeof buf - 2] && 782 (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF && 783 c != '\n') 784 { 785 *bp++ = c; 786 } 787 *bp = '\0'; /* XXX simply cut off? */ 788 bt = mimeboundary((char *) &buf[start], boundaries); 789 switch (bt) 790 { 791 case MBT_FINAL: 792 case MBT_INTERMED: 793 /* we have a message boundary */ 794 buflen = 0; 795 *btp = bt; 796 return SM_IO_EOF; 797 } 798 799 if (bp < &buf[sizeof buf - 2] && c != SM_IO_EOF) 800 *bp++ = c; 801 } 802 803 atbol = c == '\n'; 804 buflen = bp - buf - 1; 805 if (buflen < 0) 806 { 807 *btp = bt; 808 return SM_IO_EOF; 809 } 810 bp = buf; 811 return *bp++; 812 } 813 /* 814 ** MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF 815 ** 816 ** Parameters: 817 ** fp -- the input file. 818 ** boundaries -- the current MIME boundaries. 819 ** btp -- if the return value is SM_IO_EOF, *btp is set to 820 ** the type of the boundary. 821 ** 822 ** Returns: 823 ** The next character in the input stream. 824 */ 825 826 static int 827 mime_getchar_crlf(fp, boundaries, btp) 828 register SM_FILE_T *fp; 829 char **boundaries; 830 int *btp; 831 { 832 static bool sendlf = false; 833 int c; 834 835 if (sendlf) 836 { 837 sendlf = false; 838 return '\n'; 839 } 840 c = mime_getchar(fp, boundaries, btp); 841 if (c == '\n' && MapNLtoCRLF) 842 { 843 sendlf = true; 844 return '\r'; 845 } 846 return c; 847 } 848 /* 849 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 850 ** 851 ** Parameters: 852 ** line -- the input line. 853 ** boundaries -- the set of currently pending boundaries. 854 ** 855 ** Returns: 856 ** MBT_NOTSEP -- if this is not a separator line 857 ** MBT_INTERMED -- if this is an intermediate separator 858 ** MBT_FINAL -- if this is a final boundary 859 ** MBT_SYNTAX -- if this is a boundary for the wrong 860 ** enclosure -- i.e., a syntax error. 861 */ 862 863 static int 864 mimeboundary(line, boundaries) 865 register char *line; 866 char **boundaries; 867 { 868 int type = MBT_NOTSEP; 869 int i; 870 int savec; 871 872 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 873 return MBT_NOTSEP; 874 i = strlen(line); 875 if (i > 0 && line[i - 1] == '\n') 876 i--; 877 878 /* strip off trailing whitespace */ 879 while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t' 880 #if _FFR_MIME_CR_OK 881 || line[i - 1] == '\r' 882 #endif /* _FFR_MIME_CR_OK */ 883 )) 884 i--; 885 savec = line[i]; 886 line[i] = '\0'; 887 888 if (tTd(43, 5)) 889 sm_dprintf("mimeboundary: line=\"%s\"... ", line); 890 891 /* check for this as an intermediate boundary */ 892 if (isboundary(&line[2], boundaries) >= 0) 893 type = MBT_INTERMED; 894 else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 895 { 896 /* check for a final boundary */ 897 line[i - 2] = '\0'; 898 if (isboundary(&line[2], boundaries) >= 0) 899 type = MBT_FINAL; 900 line[i - 2] = '-'; 901 } 902 903 line[i] = savec; 904 if (tTd(43, 5)) 905 sm_dprintf("%s\n", MimeBoundaryNames[type]); 906 return type; 907 } 908 /* 909 ** DEFCHARSET -- return default character set for message 910 ** 911 ** The first choice for character set is for the mailer 912 ** corresponding to the envelope sender. If neither that 913 ** nor the global configuration file has a default character 914 ** set defined, return "unknown-8bit" as recommended by 915 ** RFC 1428 section 3. 916 ** 917 ** Parameters: 918 ** e -- the envelope for this message. 919 ** 920 ** Returns: 921 ** The default character set for that mailer. 922 */ 923 924 char * 925 defcharset(e) 926 register ENVELOPE *e; 927 { 928 if (e != NULL && e->e_from.q_mailer != NULL && 929 e->e_from.q_mailer->m_defcharset != NULL) 930 return e->e_from.q_mailer->m_defcharset; 931 if (DefaultCharSet != NULL) 932 return DefaultCharSet; 933 return "unknown-8bit"; 934 } 935 /* 936 ** ISBOUNDARY -- is a given string a currently valid boundary? 937 ** 938 ** Parameters: 939 ** line -- the current input line. 940 ** boundaries -- the list of valid boundaries. 941 ** 942 ** Returns: 943 ** The index number in boundaries if the line is found. 944 ** -1 -- otherwise. 945 ** 946 */ 947 948 static int 949 isboundary(line, boundaries) 950 char *line; 951 char **boundaries; 952 { 953 register int i; 954 955 for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++) 956 { 957 if (strcmp(line, boundaries[i]) == 0) 958 return i; 959 } 960 return -1; 961 } 962 #endif /* MIME8TO7 */ 963 964 #if MIME7TO8 965 static int mime_fromqp __P((unsigned char *, unsigned char **, int)); 966 967 /* 968 ** MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format 969 ** 970 ** This is a hack. Supports translating the two 7-bit body-encodings 971 ** (quoted-printable and base64) to 8-bit coded bodies. 972 ** 973 ** There is not much point in supporting multipart here, as the UA 974 ** will be able to deal with encoded MIME bodies if it can parse MIME 975 ** multipart messages. 976 ** 977 ** Note also that we won't be called unless it is a text/plain MIME 978 ** message, encoded base64 or QP and mailer flag '9' has been defined 979 ** on mailer. 980 ** 981 ** Contributed by Marius Olaffson <marius@rhi.hi.is>. 982 ** 983 ** Parameters: 984 ** mci -- mailer connection information. 985 ** header -- the header for this body part. 986 ** e -- envelope. 987 ** 988 ** Returns: 989 ** true iff body was written successfully 990 */ 991 992 static char index_64[128] = 993 { 994 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 995 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 996 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 997 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, 998 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 999 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 1000 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 1001 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 1002 }; 1003 1004 # define CHAR64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)]) 1005 1006 bool 1007 mime7to8(mci, header, e) 1008 register MCI *mci; 1009 HDR *header; 1010 register ENVELOPE *e; 1011 { 1012 int pxflags; 1013 register char *p; 1014 char *cte; 1015 char **pvp; 1016 unsigned char *fbufp; 1017 char buf[MAXLINE]; 1018 unsigned char fbuf[MAXLINE + 1]; 1019 char pvpbuf[MAXLINE]; 1020 extern unsigned char MimeTokenTab[256]; 1021 1022 p = hvalue("Content-Transfer-Encoding", header); 1023 if (p == NULL || 1024 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 1025 MimeTokenTab, false)) == NULL || 1026 pvp[0] == NULL) 1027 { 1028 /* "can't happen" -- upper level should have caught this */ 1029 syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p); 1030 1031 /* avoid bounce loops */ 1032 e->e_flags |= EF_DONT_MIME; 1033 1034 /* cheap failsafe algorithm -- should work on text/plain */ 1035 if (p != NULL) 1036 { 1037 (void) sm_snprintf(buf, sizeof buf, 1038 "Content-Transfer-Encoding: %s", p); 1039 if (!putline(buf, mci)) 1040 goto writeerr; 1041 } 1042 if (!putline("", mci)) 1043 goto writeerr; 1044 mci->mci_flags &= ~MCIF_INHEADER; 1045 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 1046 != NULL) 1047 { 1048 if (!putline(buf, mci)) 1049 goto writeerr; 1050 } 1051 return true; 1052 } 1053 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 1054 cte = sm_rpool_strdup_x(e->e_rpool, buf); 1055 1056 mci->mci_flags |= MCIF_INHEADER; 1057 if (!putline("Content-Transfer-Encoding: 8bit", mci)) 1058 goto writeerr; 1059 (void) sm_snprintf(buf, sizeof buf, 1060 "X-MIME-Autoconverted: from %.200s to 8bit by %s id %s", 1061 cte, MyHostName, e->e_id); 1062 if (!putline(buf, mci) || !putline("", mci)) 1063 goto writeerr; 1064 mci->mci_flags &= ~MCIF_INHEADER; 1065 1066 /* 1067 ** Translate body encoding to 8-bit. Supports two types of 1068 ** encodings; "base64" and "quoted-printable". Assume qp if 1069 ** it is not base64. 1070 */ 1071 1072 pxflags = PXLF_MAPFROM; 1073 if (sm_strcasecmp(cte, "base64") == 0) 1074 { 1075 int c1, c2, c3, c4; 1076 1077 fbufp = fbuf; 1078 while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) != 1079 SM_IO_EOF) 1080 { 1081 if (isascii(c1) && isspace(c1)) 1082 continue; 1083 1084 do 1085 { 1086 c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1087 } while (isascii(c2) && isspace(c2)); 1088 if (c2 == SM_IO_EOF) 1089 break; 1090 1091 do 1092 { 1093 c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1094 } while (isascii(c3) && isspace(c3)); 1095 if (c3 == SM_IO_EOF) 1096 break; 1097 1098 do 1099 { 1100 c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1101 } while (isascii(c4) && isspace(c4)); 1102 if (c4 == SM_IO_EOF) 1103 break; 1104 1105 if (c1 == '=' || c2 == '=') 1106 continue; 1107 c1 = CHAR64(c1); 1108 c2 = CHAR64(c2); 1109 1110 #if MIME7TO8_OLD 1111 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \ 1112 ++fbufp; 1113 #else /* MIME7TO8_OLD */ 1114 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \ 1115 { \ 1116 ++fbufp; \ 1117 pxflags |= PXLF_NOADDEOL; \ 1118 } 1119 #endif /* MIME7TO8_OLD */ 1120 1121 #define PUTLINE64 \ 1122 do \ 1123 { \ 1124 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) \ 1125 { \ 1126 CHK_EOL; \ 1127 if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) \ 1128 goto writeerr; \ 1129 pxflags &= ~PXLF_NOADDEOL; \ 1130 fbufp = fbuf; \ 1131 } \ 1132 } while (0) 1133 1134 *fbufp = (c1 << 2) | ((c2 & 0x30) >> 4); 1135 PUTLINE64; 1136 if (c3 == '=') 1137 continue; 1138 c3 = CHAR64(c3); 1139 *fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2); 1140 PUTLINE64; 1141 if (c4 == '=') 1142 continue; 1143 c4 = CHAR64(c4); 1144 *fbufp = ((c3 & 0x03) << 6) | c4; 1145 PUTLINE64; 1146 } 1147 } 1148 else 1149 { 1150 int off; 1151 1152 /* quoted-printable */ 1153 pxflags |= PXLF_NOADDEOL; 1154 fbufp = fbuf; 1155 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, 1156 sizeof buf) != NULL) 1157 { 1158 off = mime_fromqp((unsigned char *) buf, &fbufp, 1159 &fbuf[MAXLINE] - fbufp); 1160 again: 1161 if (off < -1) 1162 continue; 1163 1164 if (fbufp - fbuf > 0) 1165 { 1166 if (!putxline((char *) fbuf, fbufp - fbuf - 1, 1167 mci, pxflags)) 1168 goto writeerr; 1169 } 1170 fbufp = fbuf; 1171 if (off >= 0 && buf[off] != '\0') 1172 { 1173 off = mime_fromqp((unsigned char *) (buf + off), 1174 &fbufp, 1175 &fbuf[MAXLINE] - fbufp); 1176 goto again; 1177 } 1178 } 1179 } 1180 1181 /* force out partial last line */ 1182 if (fbufp > fbuf) 1183 { 1184 *fbufp = '\0'; 1185 if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) 1186 goto writeerr; 1187 } 1188 1189 /* 1190 ** The decoded text may end without an EOL. Since this function 1191 ** is only called for text/plain MIME messages, it is safe to 1192 ** add an extra one at the end just in case. This is a hack, 1193 ** but so is auto-converting MIME in the first place. 1194 */ 1195 1196 if (!putline("", mci)) 1197 goto writeerr; 1198 1199 if (tTd(43, 3)) 1200 sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte); 1201 return true; 1202 1203 writeerr: 1204 return false; 1205 } 1206 /* 1207 ** The following is based on Borenstein's "codes.c" module, with simplifying 1208 ** changes as we do not deal with multipart, and to do the translation in-core, 1209 ** with an attempt to prevent overrun of output buffers. 1210 ** 1211 ** What is needed here are changes to defend this code better against 1212 ** bad encodings. Questionable to always return 0xFF for bad mappings. 1213 */ 1214 1215 static char index_hex[128] = 1216 { 1217 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1218 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1219 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1220 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, 1221 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1222 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1223 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1224 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 1225 }; 1226 1227 # define HEXCHAR(c) (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)]) 1228 1229 /* 1230 ** MIME_FROMQP -- decode quoted printable string 1231 ** 1232 ** Parameters: 1233 ** infile -- input (encoded) string 1234 ** outfile -- output string 1235 ** maxlen -- size of output buffer 1236 ** 1237 ** Returns: 1238 ** -2 if decoding failure 1239 ** -1 if infile completely decoded into outfile 1240 ** >= 0 is the position in infile decoding 1241 ** reached before maxlen was reached 1242 */ 1243 1244 static int 1245 mime_fromqp(infile, outfile, maxlen) 1246 unsigned char *infile; 1247 unsigned char **outfile; 1248 int maxlen; /* Max # of chars allowed in outfile */ 1249 { 1250 int c1, c2; 1251 int nchar = 0; 1252 unsigned char *b; 1253 1254 /* decrement by one for trailing '\0', at least one other char */ 1255 if (--maxlen < 1) 1256 return 0; 1257 1258 b = infile; 1259 while ((c1 = *infile++) != '\0' && nchar < maxlen) 1260 { 1261 if (c1 == '=') 1262 { 1263 if ((c1 = *infile++) == '\0') 1264 break; 1265 1266 if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1) 1267 { 1268 /* ignore it and the rest of the buffer */ 1269 return -2; 1270 } 1271 else 1272 { 1273 do 1274 { 1275 if ((c2 = *infile++) == '\0') 1276 { 1277 c2 = -1; 1278 break; 1279 } 1280 } while ((c2 = HEXCHAR(c2)) == -1); 1281 1282 if (c2 == -1) 1283 break; 1284 nchar++; 1285 *(*outfile)++ = c1 << 4 | c2; 1286 } 1287 } 1288 else 1289 { 1290 nchar++; 1291 *(*outfile)++ = c1; 1292 if (c1 == '\n') 1293 break; 1294 } 1295 } 1296 *(*outfile)++ = '\0'; 1297 if (nchar >= maxlen) 1298 return (infile - b - 1); 1299 return -1; 1300 } 1301 #endif /* MIME7TO8 */ 1302