1 /* 2 * Copyright (c) 1998-2003 Sendmail, Inc. and its suppliers. 3 * All rights reserved. 4 * Copyright (c) 1994, 1996-1997 Eric P. Allman. All rights reserved. 5 * Copyright (c) 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * By using this file, you agree to the terms and conditions set 9 * forth in the LICENSE file which can be found at the top level of 10 * the sendmail distribution. 11 * 12 */ 13 14 #include <sendmail.h> 15 #include <string.h> 16 17 SM_RCSID("@(#)$Id: mime.c,v 8.139 2006/03/01 18:07:45 ca Exp $") 18 19 /* 20 ** MIME support. 21 ** 22 ** I am indebted to John Beck of Hewlett-Packard, who contributed 23 ** his code to me for inclusion. As it turns out, I did not use 24 ** his code since he used a "minimum change" approach that used 25 ** several temp files, and I wanted a "minimum impact" approach 26 ** that would avoid copying. However, looking over his code 27 ** helped me cement my understanding of the problem. 28 ** 29 ** I also looked at, but did not directly use, Nathaniel 30 ** Borenstein's "code.c" module. Again, it functioned as 31 ** a file-to-file translator, which did not fit within my 32 ** design bounds, but it was a useful base for understanding 33 ** the problem. 34 */ 35 36 /* use "old" mime 7 to 8 algorithm by default */ 37 #ifndef MIME7TO8_OLD 38 # define MIME7TO8_OLD 1 39 #endif /* ! MIME7TO8_OLD */ 40 41 #if MIME8TO7 42 static int isboundary __P((char *, char **)); 43 static int mimeboundary __P((char *, char **)); 44 static int mime_getchar __P((SM_FILE_T *, char **, int *)); 45 static int mime_getchar_crlf __P((SM_FILE_T *, char **, int *)); 46 47 /* character set for hex and base64 encoding */ 48 static char Base16Code[] = "0123456789ABCDEF"; 49 static char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 50 51 /* types of MIME boundaries */ 52 # define MBT_SYNTAX 0 /* syntax error */ 53 # define MBT_NOTSEP 1 /* not a boundary */ 54 # define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 55 # define MBT_FINAL 3 /* final boundary (trailing -- included) */ 56 57 static char *MimeBoundaryNames[] = 58 { 59 "SYNTAX", "NOTSEP", "INTERMED", "FINAL" 60 }; 61 62 static bool MapNLtoCRLF; 63 64 /* 65 ** MIME8TO7 -- output 8 bit body in 7 bit format 66 ** 67 ** The header has already been output -- this has to do the 68 ** 8 to 7 bit conversion. It would be easy if we didn't have 69 ** to deal with nested formats (multipart/xxx and message/rfc822). 70 ** 71 ** We won't be called if we don't have to do a conversion, and 72 ** appropriate MIME-Version: and Content-Type: fields have been 73 ** output. Any Content-Transfer-Encoding: field has not been 74 ** output, and we can add it here. 75 ** 76 ** Parameters: 77 ** mci -- mailer connection information. 78 ** header -- the header for this body part. 79 ** e -- envelope. 80 ** boundaries -- the currently pending message boundaries. 81 ** NULL if we are processing the outer portion. 82 ** flags -- to tweak processing. 83 ** 84 ** Returns: 85 ** An indicator of what terminated the message part: 86 ** MBT_FINAL -- the final boundary 87 ** MBT_INTERMED -- an intermediate boundary 88 ** MBT_NOTSEP -- an end of file 89 ** SM_IO_EOF -- I/O error occurred 90 */ 91 92 struct args 93 { 94 char *a_field; /* name of field */ 95 char *a_value; /* value of that field */ 96 }; 97 98 int 99 mime8to7(mci, header, e, boundaries, flags) 100 register MCI *mci; 101 HDR *header; 102 register ENVELOPE *e; 103 char **boundaries; 104 int flags; 105 { 106 register char *p; 107 int linelen; 108 int bt; 109 off_t offset; 110 size_t sectionsize, sectionhighbits; 111 int i; 112 char *type; 113 char *subtype; 114 char *cte; 115 char **pvp; 116 int argc = 0; 117 char *bp; 118 bool use_qp = false; 119 struct args argv[MAXMIMEARGS]; 120 char bbuf[128]; 121 char buf[MAXLINE]; 122 char pvpbuf[MAXLINE]; 123 extern unsigned char MimeTokenTab[256]; 124 125 if (tTd(43, 1)) 126 { 127 sm_dprintf("mime8to7: flags = %x, boundaries =", flags); 128 if (boundaries[0] == NULL) 129 sm_dprintf(" <none>"); 130 else 131 { 132 for (i = 0; boundaries[i] != NULL; i++) 133 sm_dprintf(" %s", boundaries[i]); 134 } 135 sm_dprintf("\n"); 136 } 137 MapNLtoCRLF = true; 138 p = hvalue("Content-Transfer-Encoding", header); 139 if (p == NULL || 140 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 141 MimeTokenTab, false)) == NULL || 142 pvp[0] == NULL) 143 { 144 cte = NULL; 145 } 146 else 147 { 148 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 149 cte = sm_rpool_strdup_x(e->e_rpool, buf); 150 } 151 152 type = subtype = NULL; 153 p = hvalue("Content-Type", header); 154 if (p == NULL) 155 { 156 if (bitset(M87F_DIGEST, flags)) 157 p = "message/rfc822"; 158 else 159 p = "text/plain"; 160 } 161 if (p != NULL && 162 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 163 MimeTokenTab, false)) != NULL && 164 pvp[0] != NULL) 165 { 166 if (tTd(43, 40)) 167 { 168 for (i = 0; pvp[i] != NULL; i++) 169 sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]); 170 } 171 type = *pvp++; 172 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 173 *++pvp != NULL) 174 { 175 subtype = *pvp++; 176 } 177 178 /* break out parameters */ 179 while (*pvp != NULL && argc < MAXMIMEARGS) 180 { 181 /* skip to semicolon separator */ 182 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 183 pvp++; 184 if (*pvp++ == NULL || *pvp == NULL) 185 break; 186 187 /* complain about empty values */ 188 if (strcmp(*pvp, ";") == 0) 189 { 190 usrerr("mime8to7: Empty parameter in Content-Type header"); 191 192 /* avoid bounce loops */ 193 e->e_flags |= EF_DONT_MIME; 194 continue; 195 } 196 197 /* extract field name */ 198 argv[argc].a_field = *pvp++; 199 200 /* see if there is a value */ 201 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 202 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 203 { 204 argv[argc].a_value = *pvp; 205 argc++; 206 } 207 } 208 } 209 210 /* check for disaster cases */ 211 if (type == NULL) 212 type = "-none-"; 213 if (subtype == NULL) 214 subtype = "-none-"; 215 216 /* don't propogate some flags more than one level into the message */ 217 flags &= ~M87F_DIGEST; 218 219 /* 220 ** Check for cases that can not be encoded. 221 ** 222 ** For example, you can't encode certain kinds of types 223 ** or already-encoded messages. If we find this case, 224 ** just copy it through. 225 */ 226 227 (void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype); 228 if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e'))) 229 flags |= M87F_NO8BIT; 230 231 # ifdef USE_B_CLASS 232 if (wordinclass(buf, 'b') || wordinclass(type, 'b')) 233 MapNLtoCRLF = false; 234 # endif /* USE_B_CLASS */ 235 if (wordinclass(buf, 'q') || wordinclass(type, 'q')) 236 use_qp = true; 237 238 /* 239 ** Multipart requires special processing. 240 ** 241 ** Do a recursive descent into the message. 242 */ 243 244 if (sm_strcasecmp(type, "multipart") == 0 && 245 (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags))) 246 { 247 248 if (sm_strcasecmp(subtype, "digest") == 0) 249 flags |= M87F_DIGEST; 250 251 for (i = 0; i < argc; i++) 252 { 253 if (sm_strcasecmp(argv[i].a_field, "boundary") == 0) 254 break; 255 } 256 if (i >= argc || argv[i].a_value == NULL) 257 { 258 usrerr("mime8to7: Content-Type: \"%s\": %s boundary", 259 i >= argc ? "missing" : "bogus", p); 260 p = "---"; 261 262 /* avoid bounce loops */ 263 e->e_flags |= EF_DONT_MIME; 264 } 265 else 266 { 267 p = argv[i].a_value; 268 stripquotes(p); 269 } 270 if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf) 271 { 272 usrerr("mime8to7: multipart boundary \"%s\" too long", 273 p); 274 275 /* avoid bounce loops */ 276 e->e_flags |= EF_DONT_MIME; 277 } 278 279 if (tTd(43, 1)) 280 sm_dprintf("mime8to7: multipart boundary \"%s\"\n", 281 bbuf); 282 for (i = 0; i < MAXMIMENESTING; i++) 283 { 284 if (boundaries[i] == NULL) 285 break; 286 } 287 if (i >= MAXMIMENESTING) 288 { 289 usrerr("mime8to7: multipart nesting boundary too deep"); 290 291 /* avoid bounce loops */ 292 e->e_flags |= EF_DONT_MIME; 293 } 294 else 295 { 296 boundaries[i] = bbuf; 297 boundaries[i + 1] = NULL; 298 } 299 mci->mci_flags |= MCIF_INMIME; 300 301 /* skip the early "comment" prologue */ 302 if (!putline("", mci)) 303 goto writeerr; 304 mci->mci_flags &= ~MCIF_INHEADER; 305 bt = MBT_FINAL; 306 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 307 != NULL) 308 { 309 bt = mimeboundary(buf, boundaries); 310 if (bt != MBT_NOTSEP) 311 break; 312 if (!putxline(buf, strlen(buf), mci, 313 PXLF_MAPFROM|PXLF_STRIP8BIT)) 314 goto writeerr; 315 if (tTd(43, 99)) 316 sm_dprintf(" ...%s", buf); 317 } 318 if (sm_io_eof(e->e_dfp)) 319 bt = MBT_FINAL; 320 while (bt != MBT_FINAL) 321 { 322 auto HDR *hdr = NULL; 323 324 (void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf); 325 if (!putline(buf, mci)) 326 goto writeerr; 327 if (tTd(43, 35)) 328 sm_dprintf(" ...%s\n", buf); 329 collect(e->e_dfp, false, &hdr, e, false); 330 if (tTd(43, 101)) 331 putline("+++after collect", mci); 332 if (!putheader(mci, hdr, e, flags)) 333 goto writeerr; 334 if (tTd(43, 101)) 335 putline("+++after putheader", mci); 336 bt = mime8to7(mci, hdr, e, boundaries, flags); 337 if (bt == SM_IO_EOF) 338 goto writeerr; 339 } 340 (void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--"); 341 if (!putline(buf, mci)) 342 goto writeerr; 343 if (tTd(43, 35)) 344 sm_dprintf(" ...%s\n", buf); 345 boundaries[i] = NULL; 346 mci->mci_flags &= ~MCIF_INMIME; 347 348 /* skip the late "comment" epilogue */ 349 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 350 != NULL) 351 { 352 bt = mimeboundary(buf, boundaries); 353 if (bt != MBT_NOTSEP) 354 break; 355 if (!putxline(buf, strlen(buf), mci, 356 PXLF_MAPFROM|PXLF_STRIP8BIT)) 357 goto writeerr; 358 if (tTd(43, 99)) 359 sm_dprintf(" ...%s", buf); 360 } 361 if (sm_io_eof(e->e_dfp)) 362 bt = MBT_FINAL; 363 if (tTd(43, 3)) 364 sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n", 365 MimeBoundaryNames[bt]); 366 return bt; 367 } 368 369 /* 370 ** Message/xxx types -- recurse exactly once. 371 ** 372 ** Class 's' is predefined to have "rfc822" only. 373 */ 374 375 if (sm_strcasecmp(type, "message") == 0) 376 { 377 if (!wordinclass(subtype, 's')) 378 { 379 flags |= M87F_NO8BIT; 380 } 381 else 382 { 383 auto HDR *hdr = NULL; 384 385 if (!putline("", mci)) 386 goto writeerr; 387 388 mci->mci_flags |= MCIF_INMIME; 389 collect(e->e_dfp, false, &hdr, e, false); 390 if (tTd(43, 101)) 391 putline("+++after collect", mci); 392 if (!putheader(mci, hdr, e, flags)) 393 goto writeerr; 394 if (tTd(43, 101)) 395 putline("+++after putheader", mci); 396 if (hvalue("MIME-Version", hdr) == NULL && 397 !bitset(M87F_NO8TO7, flags) && 398 !putline("MIME-Version: 1.0", mci)) 399 goto writeerr; 400 bt = mime8to7(mci, hdr, e, boundaries, flags); 401 mci->mci_flags &= ~MCIF_INMIME; 402 return bt; 403 } 404 } 405 406 /* 407 ** Non-compound body type 408 ** 409 ** Compute the ratio of seven to eight bit characters; 410 ** use that as a heuristic to decide how to do the 411 ** encoding. 412 */ 413 414 sectionsize = sectionhighbits = 0; 415 if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags)) 416 { 417 /* remember where we were */ 418 offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT); 419 if (offset == -1) 420 syserr("mime8to7: cannot sm_io_tell on %cf%s", 421 DATAFL_LETTER, e->e_id); 422 423 /* do a scan of this body type to count character types */ 424 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 425 != NULL) 426 { 427 if (mimeboundary(buf, boundaries) != MBT_NOTSEP) 428 break; 429 for (p = buf; *p != '\0'; p++) 430 { 431 /* count bytes with the high bit set */ 432 sectionsize++; 433 if (bitset(0200, *p)) 434 sectionhighbits++; 435 } 436 437 /* 438 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 439 ** assume base64. This heuristic avoids double-reading 440 ** large graphics or video files. 441 */ 442 443 if (sectionsize >= 4096 && 444 sectionhighbits > sectionsize / 4) 445 break; 446 } 447 448 /* return to the original offset for processing */ 449 /* XXX use relative seeks to handle >31 bit file sizes? */ 450 if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0) 451 syserr("mime8to7: cannot sm_io_fseek on %cf%s", 452 DATAFL_LETTER, e->e_id); 453 else 454 sm_io_clearerr(e->e_dfp); 455 } 456 457 /* 458 ** Heuristically determine encoding method. 459 ** If more than 1/8 of the total characters have the 460 ** eighth bit set, use base64; else use quoted-printable. 461 ** However, only encode binary encoded data as base64, 462 ** since otherwise the NL=>CRLF mapping will be a problem. 463 */ 464 465 if (tTd(43, 8)) 466 { 467 sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n", 468 (long) sectionhighbits, (long) sectionsize, 469 cte == NULL ? "[none]" : cte, 470 type == NULL ? "[none]" : type, 471 subtype == NULL ? "[none]" : subtype); 472 } 473 if (cte != NULL && sm_strcasecmp(cte, "binary") == 0) 474 sectionsize = sectionhighbits; 475 linelen = 0; 476 bp = buf; 477 if (sectionhighbits == 0) 478 { 479 /* no encoding necessary */ 480 if (cte != NULL && 481 bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME, 482 mci->mci_flags) && 483 !bitset(M87F_NO8TO7, flags)) 484 { 485 /* 486 ** Skip _unless_ in MIME mode and potentially 487 ** converting from 8 bit to 7 bit MIME. See 488 ** putheader() for the counterpart where the 489 ** CTE header is skipped in the opposite 490 ** situation. 491 */ 492 493 (void) sm_snprintf(buf, sizeof buf, 494 "Content-Transfer-Encoding: %.200s", cte); 495 if (!putline(buf, mci)) 496 goto writeerr; 497 if (tTd(43, 36)) 498 sm_dprintf(" ...%s\n", buf); 499 } 500 if (!putline("", mci)) 501 goto writeerr; 502 mci->mci_flags &= ~MCIF_INHEADER; 503 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 504 != NULL) 505 { 506 bt = mimeboundary(buf, boundaries); 507 if (bt != MBT_NOTSEP) 508 break; 509 if (!putline(buf, mci)) 510 goto writeerr; 511 } 512 if (sm_io_eof(e->e_dfp)) 513 bt = MBT_FINAL; 514 } 515 else if (!MapNLtoCRLF || 516 (sectionsize / 8 < sectionhighbits && !use_qp)) 517 { 518 /* use base64 encoding */ 519 int c1, c2; 520 521 if (tTd(43, 36)) 522 sm_dprintf(" ...Content-Transfer-Encoding: base64\n"); 523 if (!putline("Content-Transfer-Encoding: base64", mci)) 524 goto writeerr; 525 (void) sm_snprintf(buf, sizeof buf, 526 "X-MIME-Autoconverted: from 8bit to base64 by %s id %s", 527 MyHostName, e->e_id); 528 if (!putline(buf, mci) || !putline("", mci)) 529 goto writeerr; 530 mci->mci_flags &= ~MCIF_INHEADER; 531 while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) != 532 SM_IO_EOF) 533 { 534 if (linelen > 71) 535 { 536 *bp = '\0'; 537 if (!putline(buf, mci)) 538 goto writeerr; 539 linelen = 0; 540 bp = buf; 541 } 542 linelen += 4; 543 *bp++ = Base64Code[(c1 >> 2)]; 544 c1 = (c1 & 0x03) << 4; 545 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 546 if (c2 == SM_IO_EOF) 547 { 548 *bp++ = Base64Code[c1]; 549 *bp++ = '='; 550 *bp++ = '='; 551 break; 552 } 553 c1 |= (c2 >> 4) & 0x0f; 554 *bp++ = Base64Code[c1]; 555 c1 = (c2 & 0x0f) << 2; 556 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 557 if (c2 == SM_IO_EOF) 558 { 559 *bp++ = Base64Code[c1]; 560 *bp++ = '='; 561 break; 562 } 563 c1 |= (c2 >> 6) & 0x03; 564 *bp++ = Base64Code[c1]; 565 *bp++ = Base64Code[c2 & 0x3f]; 566 } 567 *bp = '\0'; 568 if (!putline(buf, mci)) 569 goto writeerr; 570 } 571 else 572 { 573 /* use quoted-printable encoding */ 574 int c1, c2; 575 int fromstate; 576 BITMAP256 badchars; 577 578 /* set up map of characters that must be mapped */ 579 clrbitmap(badchars); 580 for (c1 = 0x00; c1 < 0x20; c1++) 581 setbitn(c1, badchars); 582 clrbitn('\t', badchars); 583 for (c1 = 0x7f; c1 < 0x100; c1++) 584 setbitn(c1, badchars); 585 setbitn('=', badchars); 586 if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags)) 587 for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++) 588 setbitn(*p, badchars); 589 590 if (tTd(43, 36)) 591 sm_dprintf(" ...Content-Transfer-Encoding: quoted-printable\n"); 592 if (!putline("Content-Transfer-Encoding: quoted-printable", 593 mci)) 594 goto writeerr; 595 (void) sm_snprintf(buf, sizeof buf, 596 "X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s", 597 MyHostName, e->e_id); 598 if (!putline(buf, mci) || !putline("", mci)) 599 goto writeerr; 600 mci->mci_flags &= ~MCIF_INHEADER; 601 fromstate = 0; 602 c2 = '\n'; 603 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != 604 SM_IO_EOF) 605 { 606 if (c1 == '\n') 607 { 608 if (c2 == ' ' || c2 == '\t') 609 { 610 *bp++ = '='; 611 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 612 *bp++ = Base16Code[c2 & 0x0f]; 613 } 614 if (buf[0] == '.' && bp == &buf[1]) 615 { 616 buf[0] = '='; 617 *bp++ = Base16Code[('.' >> 4) & 0x0f]; 618 *bp++ = Base16Code['.' & 0x0f]; 619 } 620 *bp = '\0'; 621 if (!putline(buf, mci)) 622 goto writeerr; 623 linelen = fromstate = 0; 624 bp = buf; 625 c2 = c1; 626 continue; 627 } 628 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 629 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 630 { 631 *bp++ = '='; 632 *bp++ = '2'; 633 *bp++ = '0'; 634 linelen += 3; 635 } 636 else if (c2 == ' ' || c2 == '\t') 637 { 638 *bp++ = c2; 639 linelen++; 640 } 641 if (linelen > 72 && 642 (linelen > 75 || c1 != '.' || 643 (linelen > 73 && c2 == '.'))) 644 { 645 if (linelen > 73 && c2 == '.') 646 bp--; 647 else 648 c2 = '\n'; 649 *bp++ = '='; 650 *bp = '\0'; 651 if (!putline(buf, mci)) 652 goto writeerr; 653 linelen = fromstate = 0; 654 bp = buf; 655 if (c2 == '.') 656 { 657 *bp++ = '.'; 658 linelen++; 659 } 660 } 661 if (bitnset(bitidx(c1), badchars)) 662 { 663 *bp++ = '='; 664 *bp++ = Base16Code[(c1 >> 4) & 0x0f]; 665 *bp++ = Base16Code[c1 & 0x0f]; 666 linelen += 3; 667 } 668 else if (c1 != ' ' && c1 != '\t') 669 { 670 if (linelen < 4 && c1 == "From"[linelen]) 671 fromstate++; 672 *bp++ = c1; 673 linelen++; 674 } 675 c2 = c1; 676 } 677 678 /* output any saved character */ 679 if (c2 == ' ' || c2 == '\t') 680 { 681 *bp++ = '='; 682 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 683 *bp++ = Base16Code[c2 & 0x0f]; 684 linelen += 3; 685 } 686 687 if (linelen > 0 || boundaries[0] != NULL) 688 { 689 *bp = '\0'; 690 if (!putline(buf, mci)) 691 goto writeerr; 692 } 693 694 } 695 if (tTd(43, 3)) 696 sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]); 697 return bt; 698 699 writeerr: 700 return SM_IO_EOF; 701 } 702 /* 703 ** MIME_GETCHAR -- get a character for MIME processing 704 ** 705 ** Treats boundaries as SM_IO_EOF. 706 ** 707 ** Parameters: 708 ** fp -- the input file. 709 ** boundaries -- the current MIME boundaries. 710 ** btp -- if the return value is SM_IO_EOF, *btp is set to 711 ** the type of the boundary. 712 ** 713 ** Returns: 714 ** The next character in the input stream. 715 */ 716 717 static int 718 mime_getchar(fp, boundaries, btp) 719 register SM_FILE_T *fp; 720 char **boundaries; 721 int *btp; 722 { 723 int c; 724 static unsigned char *bp = NULL; 725 static int buflen = 0; 726 static bool atbol = true; /* at beginning of line */ 727 static int bt = MBT_SYNTAX; /* boundary type of next SM_IO_EOF */ 728 static unsigned char buf[128]; /* need not be a full line */ 729 int start = 0; /* indicates position of - in buffer */ 730 731 if (buflen == 1 && *bp == '\n') 732 { 733 /* last \n in buffer may be part of next MIME boundary */ 734 c = *bp; 735 } 736 else if (buflen > 0) 737 { 738 buflen--; 739 return *bp++; 740 } 741 else 742 c = sm_io_getc(fp, SM_TIME_DEFAULT); 743 bp = buf; 744 buflen = 0; 745 if (c == '\n') 746 { 747 /* might be part of a MIME boundary */ 748 *bp++ = c; 749 atbol = true; 750 c = sm_io_getc(fp, SM_TIME_DEFAULT); 751 if (c == '\n') 752 { 753 (void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c); 754 return c; 755 } 756 start = 1; 757 } 758 if (c != SM_IO_EOF) 759 *bp++ = c; 760 else 761 bt = MBT_FINAL; 762 if (atbol && c == '-') 763 { 764 /* check for a message boundary */ 765 c = sm_io_getc(fp, SM_TIME_DEFAULT); 766 if (c != '-') 767 { 768 if (c != SM_IO_EOF) 769 *bp++ = c; 770 else 771 bt = MBT_FINAL; 772 buflen = bp - buf - 1; 773 bp = buf; 774 return *bp++; 775 } 776 777 /* got "--", now check for rest of separator */ 778 *bp++ = '-'; 779 while (bp < &buf[sizeof buf - 2] && 780 (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF && 781 c != '\n') 782 { 783 *bp++ = c; 784 } 785 *bp = '\0'; /* XXX simply cut off? */ 786 bt = mimeboundary((char *) &buf[start], boundaries); 787 switch (bt) 788 { 789 case MBT_FINAL: 790 case MBT_INTERMED: 791 /* we have a message boundary */ 792 buflen = 0; 793 *btp = bt; 794 return SM_IO_EOF; 795 } 796 797 if (bp < &buf[sizeof buf - 2] && c != SM_IO_EOF) 798 *bp++ = c; 799 } 800 801 atbol = c == '\n'; 802 buflen = bp - buf - 1; 803 if (buflen < 0) 804 { 805 *btp = bt; 806 return SM_IO_EOF; 807 } 808 bp = buf; 809 return *bp++; 810 } 811 /* 812 ** MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF 813 ** 814 ** Parameters: 815 ** fp -- the input file. 816 ** boundaries -- the current MIME boundaries. 817 ** btp -- if the return value is SM_IO_EOF, *btp is set to 818 ** the type of the boundary. 819 ** 820 ** Returns: 821 ** The next character in the input stream. 822 */ 823 824 static int 825 mime_getchar_crlf(fp, boundaries, btp) 826 register SM_FILE_T *fp; 827 char **boundaries; 828 int *btp; 829 { 830 static bool sendlf = false; 831 int c; 832 833 if (sendlf) 834 { 835 sendlf = false; 836 return '\n'; 837 } 838 c = mime_getchar(fp, boundaries, btp); 839 if (c == '\n' && MapNLtoCRLF) 840 { 841 sendlf = true; 842 return '\r'; 843 } 844 return c; 845 } 846 /* 847 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 848 ** 849 ** Parameters: 850 ** line -- the input line. 851 ** boundaries -- the set of currently pending boundaries. 852 ** 853 ** Returns: 854 ** MBT_NOTSEP -- if this is not a separator line 855 ** MBT_INTERMED -- if this is an intermediate separator 856 ** MBT_FINAL -- if this is a final boundary 857 ** MBT_SYNTAX -- if this is a boundary for the wrong 858 ** enclosure -- i.e., a syntax error. 859 */ 860 861 static int 862 mimeboundary(line, boundaries) 863 register char *line; 864 char **boundaries; 865 { 866 int type = MBT_NOTSEP; 867 int i; 868 int savec; 869 870 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 871 return MBT_NOTSEP; 872 i = strlen(line); 873 if (i > 0 && line[i - 1] == '\n') 874 i--; 875 876 /* strip off trailing whitespace */ 877 while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t' 878 #if _FFR_MIME_CR_OK 879 || line[i - 1] == '\r' 880 #endif /* _FFR_MIME_CR_OK */ 881 )) 882 i--; 883 savec = line[i]; 884 line[i] = '\0'; 885 886 if (tTd(43, 5)) 887 sm_dprintf("mimeboundary: line=\"%s\"... ", line); 888 889 /* check for this as an intermediate boundary */ 890 if (isboundary(&line[2], boundaries) >= 0) 891 type = MBT_INTERMED; 892 else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 893 { 894 /* check for a final boundary */ 895 line[i - 2] = '\0'; 896 if (isboundary(&line[2], boundaries) >= 0) 897 type = MBT_FINAL; 898 line[i - 2] = '-'; 899 } 900 901 line[i] = savec; 902 if (tTd(43, 5)) 903 sm_dprintf("%s\n", MimeBoundaryNames[type]); 904 return type; 905 } 906 /* 907 ** DEFCHARSET -- return default character set for message 908 ** 909 ** The first choice for character set is for the mailer 910 ** corresponding to the envelope sender. If neither that 911 ** nor the global configuration file has a default character 912 ** set defined, return "unknown-8bit" as recommended by 913 ** RFC 1428 section 3. 914 ** 915 ** Parameters: 916 ** e -- the envelope for this message. 917 ** 918 ** Returns: 919 ** The default character set for that mailer. 920 */ 921 922 char * 923 defcharset(e) 924 register ENVELOPE *e; 925 { 926 if (e != NULL && e->e_from.q_mailer != NULL && 927 e->e_from.q_mailer->m_defcharset != NULL) 928 return e->e_from.q_mailer->m_defcharset; 929 if (DefaultCharSet != NULL) 930 return DefaultCharSet; 931 return "unknown-8bit"; 932 } 933 /* 934 ** ISBOUNDARY -- is a given string a currently valid boundary? 935 ** 936 ** Parameters: 937 ** line -- the current input line. 938 ** boundaries -- the list of valid boundaries. 939 ** 940 ** Returns: 941 ** The index number in boundaries if the line is found. 942 ** -1 -- otherwise. 943 ** 944 */ 945 946 static int 947 isboundary(line, boundaries) 948 char *line; 949 char **boundaries; 950 { 951 register int i; 952 953 for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++) 954 { 955 if (strcmp(line, boundaries[i]) == 0) 956 return i; 957 } 958 return -1; 959 } 960 #endif /* MIME8TO7 */ 961 962 #if MIME7TO8 963 static int mime_fromqp __P((unsigned char *, unsigned char **, int)); 964 965 /* 966 ** MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format 967 ** 968 ** This is a hack. Supports translating the two 7-bit body-encodings 969 ** (quoted-printable and base64) to 8-bit coded bodies. 970 ** 971 ** There is not much point in supporting multipart here, as the UA 972 ** will be able to deal with encoded MIME bodies if it can parse MIME 973 ** multipart messages. 974 ** 975 ** Note also that we won't be called unless it is a text/plain MIME 976 ** message, encoded base64 or QP and mailer flag '9' has been defined 977 ** on mailer. 978 ** 979 ** Contributed by Marius Olaffson <marius@rhi.hi.is>. 980 ** 981 ** Parameters: 982 ** mci -- mailer connection information. 983 ** header -- the header for this body part. 984 ** e -- envelope. 985 ** 986 ** Returns: 987 ** true iff body was written successfully 988 */ 989 990 static char index_64[128] = 991 { 992 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 993 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 994 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 995 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, 996 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 997 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 998 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 999 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 1000 }; 1001 1002 # define CHAR64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)]) 1003 1004 bool 1005 mime7to8(mci, header, e) 1006 register MCI *mci; 1007 HDR *header; 1008 register ENVELOPE *e; 1009 { 1010 int pxflags; 1011 register char *p; 1012 char *cte; 1013 char **pvp; 1014 unsigned char *fbufp; 1015 char buf[MAXLINE]; 1016 unsigned char fbuf[MAXLINE + 1]; 1017 char pvpbuf[MAXLINE]; 1018 extern unsigned char MimeTokenTab[256]; 1019 1020 p = hvalue("Content-Transfer-Encoding", header); 1021 if (p == NULL || 1022 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 1023 MimeTokenTab, false)) == NULL || 1024 pvp[0] == NULL) 1025 { 1026 /* "can't happen" -- upper level should have caught this */ 1027 syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p); 1028 1029 /* avoid bounce loops */ 1030 e->e_flags |= EF_DONT_MIME; 1031 1032 /* cheap failsafe algorithm -- should work on text/plain */ 1033 if (p != NULL) 1034 { 1035 (void) sm_snprintf(buf, sizeof buf, 1036 "Content-Transfer-Encoding: %s", p); 1037 if (!putline(buf, mci)) 1038 goto writeerr; 1039 } 1040 if (!putline("", mci)) 1041 goto writeerr; 1042 mci->mci_flags &= ~MCIF_INHEADER; 1043 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 1044 != NULL) 1045 { 1046 if (!putline(buf, mci)) 1047 goto writeerr; 1048 } 1049 return true; 1050 } 1051 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 1052 cte = sm_rpool_strdup_x(e->e_rpool, buf); 1053 1054 mci->mci_flags |= MCIF_INHEADER; 1055 if (!putline("Content-Transfer-Encoding: 8bit", mci)) 1056 goto writeerr; 1057 (void) sm_snprintf(buf, sizeof buf, 1058 "X-MIME-Autoconverted: from %.200s to 8bit by %s id %s", 1059 cte, MyHostName, e->e_id); 1060 if (!putline(buf, mci) || !putline("", mci)) 1061 goto writeerr; 1062 mci->mci_flags &= ~MCIF_INHEADER; 1063 1064 /* 1065 ** Translate body encoding to 8-bit. Supports two types of 1066 ** encodings; "base64" and "quoted-printable". Assume qp if 1067 ** it is not base64. 1068 */ 1069 1070 pxflags = PXLF_MAPFROM; 1071 if (sm_strcasecmp(cte, "base64") == 0) 1072 { 1073 int c1, c2, c3, c4; 1074 1075 fbufp = fbuf; 1076 while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) != 1077 SM_IO_EOF) 1078 { 1079 if (isascii(c1) && isspace(c1)) 1080 continue; 1081 1082 do 1083 { 1084 c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1085 } while (isascii(c2) && isspace(c2)); 1086 if (c2 == SM_IO_EOF) 1087 break; 1088 1089 do 1090 { 1091 c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1092 } while (isascii(c3) && isspace(c3)); 1093 if (c3 == SM_IO_EOF) 1094 break; 1095 1096 do 1097 { 1098 c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1099 } while (isascii(c4) && isspace(c4)); 1100 if (c4 == SM_IO_EOF) 1101 break; 1102 1103 if (c1 == '=' || c2 == '=') 1104 continue; 1105 c1 = CHAR64(c1); 1106 c2 = CHAR64(c2); 1107 1108 #if MIME7TO8_OLD 1109 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \ 1110 ++fbufp; 1111 #else /* MIME7TO8_OLD */ 1112 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \ 1113 { \ 1114 ++fbufp; \ 1115 pxflags |= PXLF_NOADDEOL; \ 1116 } 1117 #endif /* MIME7TO8_OLD */ 1118 1119 #define PUTLINE64 \ 1120 do \ 1121 { \ 1122 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) \ 1123 { \ 1124 CHK_EOL; \ 1125 if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) \ 1126 goto writeerr; \ 1127 pxflags &= ~PXLF_NOADDEOL; \ 1128 fbufp = fbuf; \ 1129 } \ 1130 } while (0) 1131 1132 *fbufp = (c1 << 2) | ((c2 & 0x30) >> 4); 1133 PUTLINE64; 1134 if (c3 == '=') 1135 continue; 1136 c3 = CHAR64(c3); 1137 *fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2); 1138 PUTLINE64; 1139 if (c4 == '=') 1140 continue; 1141 c4 = CHAR64(c4); 1142 *fbufp = ((c3 & 0x03) << 6) | c4; 1143 PUTLINE64; 1144 } 1145 } 1146 else 1147 { 1148 int off; 1149 1150 /* quoted-printable */ 1151 pxflags |= PXLF_NOADDEOL; 1152 fbufp = fbuf; 1153 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, 1154 sizeof buf) != NULL) 1155 { 1156 off = mime_fromqp((unsigned char *) buf, &fbufp, 1157 &fbuf[MAXLINE] - fbufp); 1158 again: 1159 if (off < -1) 1160 continue; 1161 1162 if (fbufp - fbuf > 0) 1163 { 1164 if (!putxline((char *) fbuf, fbufp - fbuf - 1, 1165 mci, pxflags)) 1166 goto writeerr; 1167 } 1168 fbufp = fbuf; 1169 if (off >= 0 && buf[off] != '\0') 1170 { 1171 off = mime_fromqp((unsigned char *) (buf + off), 1172 &fbufp, 1173 &fbuf[MAXLINE] - fbufp); 1174 goto again; 1175 } 1176 } 1177 } 1178 1179 /* force out partial last line */ 1180 if (fbufp > fbuf) 1181 { 1182 *fbufp = '\0'; 1183 if (!putxline((char *) fbuf, fbufp - fbuf, mci, pxflags)) 1184 goto writeerr; 1185 } 1186 1187 /* 1188 ** The decoded text may end without an EOL. Since this function 1189 ** is only called for text/plain MIME messages, it is safe to 1190 ** add an extra one at the end just in case. This is a hack, 1191 ** but so is auto-converting MIME in the first place. 1192 */ 1193 1194 if (!putline("", mci)) 1195 goto writeerr; 1196 1197 if (tTd(43, 3)) 1198 sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte); 1199 return true; 1200 1201 writeerr: 1202 return false; 1203 } 1204 /* 1205 ** The following is based on Borenstein's "codes.c" module, with simplifying 1206 ** changes as we do not deal with multipart, and to do the translation in-core, 1207 ** with an attempt to prevent overrun of output buffers. 1208 ** 1209 ** What is needed here are changes to defend this code better against 1210 ** bad encodings. Questionable to always return 0xFF for bad mappings. 1211 */ 1212 1213 static char index_hex[128] = 1214 { 1215 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1216 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1217 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1218 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, 1219 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1220 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1221 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1222 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 1223 }; 1224 1225 # define HEXCHAR(c) (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)]) 1226 1227 /* 1228 ** MIME_FROMQP -- decode quoted printable string 1229 ** 1230 ** Parameters: 1231 ** infile -- input (encoded) string 1232 ** outfile -- output string 1233 ** maxlen -- size of output buffer 1234 ** 1235 ** Returns: 1236 ** -2 if decoding failure 1237 ** -1 if infile completely decoded into outfile 1238 ** >= 0 is the position in infile decoding 1239 ** reached before maxlen was reached 1240 */ 1241 1242 static int 1243 mime_fromqp(infile, outfile, maxlen) 1244 unsigned char *infile; 1245 unsigned char **outfile; 1246 int maxlen; /* Max # of chars allowed in outfile */ 1247 { 1248 int c1, c2; 1249 int nchar = 0; 1250 unsigned char *b; 1251 1252 /* decrement by one for trailing '\0', at least one other char */ 1253 if (--maxlen < 1) 1254 return 0; 1255 1256 b = infile; 1257 while ((c1 = *infile++) != '\0' && nchar < maxlen) 1258 { 1259 if (c1 == '=') 1260 { 1261 if ((c1 = *infile++) == '\0') 1262 break; 1263 1264 if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1) 1265 { 1266 /* ignore it and the rest of the buffer */ 1267 return -2; 1268 } 1269 else 1270 { 1271 do 1272 { 1273 if ((c2 = *infile++) == '\0') 1274 { 1275 c2 = -1; 1276 break; 1277 } 1278 } while ((c2 = HEXCHAR(c2)) == -1); 1279 1280 if (c2 == -1) 1281 break; 1282 nchar++; 1283 *(*outfile)++ = c1 << 4 | c2; 1284 } 1285 } 1286 else 1287 { 1288 nchar++; 1289 *(*outfile)++ = c1; 1290 if (c1 == '\n') 1291 break; 1292 } 1293 } 1294 *(*outfile)++ = '\0'; 1295 if (nchar >= maxlen) 1296 return (infile - b - 1); 1297 return -1; 1298 } 1299 #endif /* MIME7TO8 */ 1300