1 /* 2 * Copyright (c) 1998-2001 Sendmail, Inc. and its suppliers. 3 * All rights reserved. 4 * Copyright (c) 1994, 1996-1997 Eric P. Allman. All rights reserved. 5 * Copyright (c) 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * By using this file, you agree to the terms and conditions set 9 * forth in the LICENSE file which can be found at the top level of 10 * the sendmail distribution. 11 * 12 */ 13 14 #include <sendmail.h> 15 #include <string.h> 16 17 SM_RCSID("@(#)$Id: mime.c,v 8.125 2001/09/11 04:05:15 gshapiro Exp $") 18 19 /* 20 ** MIME support. 21 ** 22 ** I am indebted to John Beck of Hewlett-Packard, who contributed 23 ** his code to me for inclusion. As it turns out, I did not use 24 ** his code since he used a "minimum change" approach that used 25 ** several temp files, and I wanted a "minimum impact" approach 26 ** that would avoid copying. However, looking over his code 27 ** helped me cement my understanding of the problem. 28 ** 29 ** I also looked at, but did not directly use, Nathaniel 30 ** Borenstein's "code.c" module. Again, it functioned as 31 ** a file-to-file translator, which did not fit within my 32 ** design bounds, but it was a useful base for understanding 33 ** the problem. 34 */ 35 36 #if MIME8TO7 37 static int isboundary __P((char *, char **)); 38 static int mimeboundary __P((char *, char **)); 39 static int mime_getchar __P((SM_FILE_T *, char **, int *)); 40 static int mime_getchar_crlf __P((SM_FILE_T *, char **, int *)); 41 42 /* character set for hex and base64 encoding */ 43 static char Base16Code[] = "0123456789ABCDEF"; 44 static char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 45 46 /* types of MIME boundaries */ 47 # define MBT_SYNTAX 0 /* syntax error */ 48 # define MBT_NOTSEP 1 /* not a boundary */ 49 # define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 50 # define MBT_FINAL 3 /* final boundary (trailing -- included) */ 51 52 static char *MimeBoundaryNames[] = 53 { 54 "SYNTAX", "NOTSEP", "INTERMED", "FINAL" 55 }; 56 57 static bool MapNLtoCRLF; 58 59 /* 60 ** MIME8TO7 -- output 8 bit body in 7 bit format 61 ** 62 ** The header has already been output -- this has to do the 63 ** 8 to 7 bit conversion. It would be easy if we didn't have 64 ** to deal with nested formats (multipart/xxx and message/rfc822). 65 ** 66 ** We won't be called if we don't have to do a conversion, and 67 ** appropriate MIME-Version: and Content-Type: fields have been 68 ** output. Any Content-Transfer-Encoding: field has not been 69 ** output, and we can add it here. 70 ** 71 ** Parameters: 72 ** mci -- mailer connection information. 73 ** header -- the header for this body part. 74 ** e -- envelope. 75 ** boundaries -- the currently pending message boundaries. 76 ** NULL if we are processing the outer portion. 77 ** flags -- to tweak processing. 78 ** 79 ** Returns: 80 ** An indicator of what terminated the message part: 81 ** MBT_FINAL -- the final boundary 82 ** MBT_INTERMED -- an intermediate boundary 83 ** MBT_NOTSEP -- an end of file 84 */ 85 86 struct args 87 { 88 char *a_field; /* name of field */ 89 char *a_value; /* value of that field */ 90 }; 91 92 int 93 mime8to7(mci, header, e, boundaries, flags) 94 register MCI *mci; 95 HDR *header; 96 register ENVELOPE *e; 97 char **boundaries; 98 int flags; 99 { 100 register char *p; 101 int linelen; 102 int bt; 103 off_t offset; 104 size_t sectionsize, sectionhighbits; 105 int i; 106 char *type; 107 char *subtype; 108 char *cte; 109 char **pvp; 110 int argc = 0; 111 char *bp; 112 bool use_qp = false; 113 struct args argv[MAXMIMEARGS]; 114 char bbuf[128]; 115 char buf[MAXLINE]; 116 char pvpbuf[MAXLINE]; 117 extern unsigned char MimeTokenTab[256]; 118 119 if (tTd(43, 1)) 120 { 121 sm_dprintf("mime8to7: flags = %x, boundaries =", flags); 122 if (boundaries[0] == NULL) 123 sm_dprintf(" <none>"); 124 else 125 { 126 for (i = 0; boundaries[i] != NULL; i++) 127 sm_dprintf(" %s", boundaries[i]); 128 } 129 sm_dprintf("\n"); 130 } 131 MapNLtoCRLF = true; 132 p = hvalue("Content-Transfer-Encoding", header); 133 if (p == NULL || 134 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 135 MimeTokenTab)) == NULL || 136 pvp[0] == NULL) 137 { 138 cte = NULL; 139 } 140 else 141 { 142 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 143 cte = sm_rpool_strdup_x(e->e_rpool, buf); 144 } 145 146 type = subtype = NULL; 147 p = hvalue("Content-Type", header); 148 if (p == NULL) 149 { 150 if (bitset(M87F_DIGEST, flags)) 151 p = "message/rfc822"; 152 else 153 p = "text/plain"; 154 } 155 if (p != NULL && 156 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 157 MimeTokenTab)) != NULL && 158 pvp[0] != NULL) 159 { 160 if (tTd(43, 40)) 161 { 162 for (i = 0; pvp[i] != NULL; i++) 163 sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]); 164 } 165 type = *pvp++; 166 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 167 *++pvp != NULL) 168 { 169 subtype = *pvp++; 170 } 171 172 /* break out parameters */ 173 while (*pvp != NULL && argc < MAXMIMEARGS) 174 { 175 /* skip to semicolon separator */ 176 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 177 pvp++; 178 if (*pvp++ == NULL || *pvp == NULL) 179 break; 180 181 /* complain about empty values */ 182 if (strcmp(*pvp, ";") == 0) 183 { 184 usrerr("mime8to7: Empty parameter in Content-Type header"); 185 186 /* avoid bounce loops */ 187 e->e_flags |= EF_DONT_MIME; 188 continue; 189 } 190 191 /* extract field name */ 192 argv[argc].a_field = *pvp++; 193 194 /* see if there is a value */ 195 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 196 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 197 { 198 argv[argc].a_value = *pvp; 199 argc++; 200 } 201 } 202 } 203 204 /* check for disaster cases */ 205 if (type == NULL) 206 type = "-none-"; 207 if (subtype == NULL) 208 subtype = "-none-"; 209 210 /* don't propogate some flags more than one level into the message */ 211 flags &= ~M87F_DIGEST; 212 213 /* 214 ** Check for cases that can not be encoded. 215 ** 216 ** For example, you can't encode certain kinds of types 217 ** or already-encoded messages. If we find this case, 218 ** just copy it through. 219 */ 220 221 (void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype); 222 if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e'))) 223 flags |= M87F_NO8BIT; 224 225 # ifdef USE_B_CLASS 226 if (wordinclass(buf, 'b') || wordinclass(type, 'b')) 227 MapNLtoCRLF = false; 228 # endif /* USE_B_CLASS */ 229 if (wordinclass(buf, 'q') || wordinclass(type, 'q')) 230 use_qp = true; 231 232 /* 233 ** Multipart requires special processing. 234 ** 235 ** Do a recursive descent into the message. 236 */ 237 238 if (sm_strcasecmp(type, "multipart") == 0 && 239 (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags))) 240 { 241 242 if (sm_strcasecmp(subtype, "digest") == 0) 243 flags |= M87F_DIGEST; 244 245 for (i = 0; i < argc; i++) 246 { 247 if (sm_strcasecmp(argv[i].a_field, "boundary") == 0) 248 break; 249 } 250 if (i >= argc || argv[i].a_value == NULL) 251 { 252 usrerr("mime8to7: Content-Type: \"%s\": %s boundary", 253 i >= argc ? "missing" : "bogus", p); 254 p = "---"; 255 256 /* avoid bounce loops */ 257 e->e_flags |= EF_DONT_MIME; 258 } 259 else 260 { 261 p = argv[i].a_value; 262 stripquotes(p); 263 } 264 if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf) 265 { 266 usrerr("mime8to7: multipart boundary \"%s\" too long", 267 p); 268 269 /* avoid bounce loops */ 270 e->e_flags |= EF_DONT_MIME; 271 } 272 273 if (tTd(43, 1)) 274 sm_dprintf("mime8to7: multipart boundary \"%s\"\n", 275 bbuf); 276 for (i = 0; i < MAXMIMENESTING; i++) 277 { 278 if (boundaries[i] == NULL) 279 break; 280 } 281 if (i >= MAXMIMENESTING) 282 { 283 usrerr("mime8to7: multipart nesting boundary too deep"); 284 285 /* avoid bounce loops */ 286 e->e_flags |= EF_DONT_MIME; 287 } 288 else 289 { 290 boundaries[i] = bbuf; 291 boundaries[i + 1] = NULL; 292 } 293 mci->mci_flags |= MCIF_INMIME; 294 295 /* skip the early "comment" prologue */ 296 putline("", mci); 297 mci->mci_flags &= ~MCIF_INHEADER; 298 bt = MBT_FINAL; 299 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 300 != NULL) 301 { 302 bt = mimeboundary(buf, boundaries); 303 if (bt != MBT_NOTSEP) 304 break; 305 putxline(buf, strlen(buf), mci, 306 PXLF_MAPFROM|PXLF_STRIP8BIT); 307 if (tTd(43, 99)) 308 sm_dprintf(" ...%s", buf); 309 } 310 if (sm_io_eof(e->e_dfp)) 311 bt = MBT_FINAL; 312 while (bt != MBT_FINAL) 313 { 314 auto HDR *hdr = NULL; 315 316 (void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf); 317 putline(buf, mci); 318 if (tTd(43, 35)) 319 sm_dprintf(" ...%s\n", buf); 320 collect(e->e_dfp, false, &hdr, e); 321 if (tTd(43, 101)) 322 putline("+++after collect", mci); 323 putheader(mci, hdr, e, flags); 324 if (tTd(43, 101)) 325 putline("+++after putheader", mci); 326 bt = mime8to7(mci, hdr, e, boundaries, flags); 327 } 328 (void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--"); 329 putline(buf, mci); 330 if (tTd(43, 35)) 331 sm_dprintf(" ...%s\n", buf); 332 boundaries[i] = NULL; 333 mci->mci_flags &= ~MCIF_INMIME; 334 335 /* skip the late "comment" epilogue */ 336 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 337 != NULL) 338 { 339 bt = mimeboundary(buf, boundaries); 340 if (bt != MBT_NOTSEP) 341 break; 342 putxline(buf, strlen(buf), mci, 343 PXLF_MAPFROM|PXLF_STRIP8BIT); 344 if (tTd(43, 99)) 345 sm_dprintf(" ...%s", buf); 346 } 347 if (sm_io_eof(e->e_dfp)) 348 bt = MBT_FINAL; 349 if (tTd(43, 3)) 350 sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n", 351 MimeBoundaryNames[bt]); 352 return bt; 353 } 354 355 /* 356 ** Message/xxx types -- recurse exactly once. 357 ** 358 ** Class 's' is predefined to have "rfc822" only. 359 */ 360 361 if (sm_strcasecmp(type, "message") == 0) 362 { 363 if (!wordinclass(subtype, 's')) 364 { 365 flags |= M87F_NO8BIT; 366 } 367 else 368 { 369 auto HDR *hdr = NULL; 370 371 putline("", mci); 372 373 mci->mci_flags |= MCIF_INMIME; 374 collect(e->e_dfp, false, &hdr, e); 375 if (tTd(43, 101)) 376 putline("+++after collect", mci); 377 putheader(mci, hdr, e, flags); 378 if (tTd(43, 101)) 379 putline("+++after putheader", mci); 380 if (hvalue("MIME-Version", hdr) == NULL) 381 putline("MIME-Version: 1.0", mci); 382 bt = mime8to7(mci, hdr, e, boundaries, flags); 383 mci->mci_flags &= ~MCIF_INMIME; 384 return bt; 385 } 386 } 387 388 /* 389 ** Non-compound body type 390 ** 391 ** Compute the ratio of seven to eight bit characters; 392 ** use that as a heuristic to decide how to do the 393 ** encoding. 394 */ 395 396 sectionsize = sectionhighbits = 0; 397 if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags)) 398 { 399 /* remember where we were */ 400 offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT); 401 if (offset == -1) 402 syserr("mime8to7: cannot sm_io_tell on %cf%s", 403 DATAFL_LETTER, e->e_id); 404 405 /* do a scan of this body type to count character types */ 406 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 407 != NULL) 408 { 409 if (mimeboundary(buf, boundaries) != MBT_NOTSEP) 410 break; 411 for (p = buf; *p != '\0'; p++) 412 { 413 /* count bytes with the high bit set */ 414 sectionsize++; 415 if (bitset(0200, *p)) 416 sectionhighbits++; 417 } 418 419 /* 420 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 421 ** assume base64. This heuristic avoids double-reading 422 ** large graphics or video files. 423 */ 424 425 if (sectionsize >= 4096 && 426 sectionhighbits > sectionsize / 4) 427 break; 428 } 429 430 /* return to the original offset for processing */ 431 /* XXX use relative seeks to handle >31 bit file sizes? */ 432 if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0) 433 syserr("mime8to7: cannot sm_io_fseek on %cf%s", 434 DATAFL_LETTER, e->e_id); 435 else 436 sm_io_clearerr(e->e_dfp); 437 } 438 439 /* 440 ** Heuristically determine encoding method. 441 ** If more than 1/8 of the total characters have the 442 ** eighth bit set, use base64; else use quoted-printable. 443 ** However, only encode binary encoded data as base64, 444 ** since otherwise the NL=>CRLF mapping will be a problem. 445 */ 446 447 if (tTd(43, 8)) 448 { 449 sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n", 450 (long) sectionhighbits, (long) sectionsize, 451 cte == NULL ? "[none]" : cte, 452 type == NULL ? "[none]" : type, 453 subtype == NULL ? "[none]" : subtype); 454 } 455 if (cte != NULL && sm_strcasecmp(cte, "binary") == 0) 456 sectionsize = sectionhighbits; 457 linelen = 0; 458 bp = buf; 459 if (sectionhighbits == 0) 460 { 461 /* no encoding necessary */ 462 if (cte != NULL && 463 bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME, 464 mci->mci_flags) && 465 !bitset(M87F_NO8TO7, flags)) 466 { 467 /* 468 ** Skip _unless_ in MIME mode and potentially 469 ** converting from 8 bit to 7 bit MIME. See 470 ** putheader() for the counterpart where the 471 ** CTE header is skipped in the opposite 472 ** situation. 473 */ 474 475 (void) sm_snprintf(buf, sizeof buf, 476 "Content-Transfer-Encoding: %.200s", cte); 477 putline(buf, mci); 478 if (tTd(43, 36)) 479 sm_dprintf(" ...%s\n", buf); 480 } 481 putline("", mci); 482 mci->mci_flags &= ~MCIF_INHEADER; 483 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 484 != NULL) 485 { 486 bt = mimeboundary(buf, boundaries); 487 if (bt != MBT_NOTSEP) 488 break; 489 putline(buf, mci); 490 } 491 if (sm_io_eof(e->e_dfp)) 492 bt = MBT_FINAL; 493 } 494 else if (!MapNLtoCRLF || 495 (sectionsize / 8 < sectionhighbits && !use_qp)) 496 { 497 /* use base64 encoding */ 498 int c1, c2; 499 500 if (tTd(43, 36)) 501 sm_dprintf(" ...Content-Transfer-Encoding: base64\n"); 502 putline("Content-Transfer-Encoding: base64", mci); 503 (void) sm_snprintf(buf, sizeof buf, 504 "X-MIME-Autoconverted: from 8bit to base64 by %s id %s", 505 MyHostName, e->e_id); 506 putline(buf, mci); 507 putline("", mci); 508 mci->mci_flags &= ~MCIF_INHEADER; 509 while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) != 510 SM_IO_EOF) 511 { 512 if (linelen > 71) 513 { 514 *bp = '\0'; 515 putline(buf, mci); 516 linelen = 0; 517 bp = buf; 518 } 519 linelen += 4; 520 *bp++ = Base64Code[(c1 >> 2)]; 521 c1 = (c1 & 0x03) << 4; 522 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 523 if (c2 == SM_IO_EOF) 524 { 525 *bp++ = Base64Code[c1]; 526 *bp++ = '='; 527 *bp++ = '='; 528 break; 529 } 530 c1 |= (c2 >> 4) & 0x0f; 531 *bp++ = Base64Code[c1]; 532 c1 = (c2 & 0x0f) << 2; 533 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 534 if (c2 == SM_IO_EOF) 535 { 536 *bp++ = Base64Code[c1]; 537 *bp++ = '='; 538 break; 539 } 540 c1 |= (c2 >> 6) & 0x03; 541 *bp++ = Base64Code[c1]; 542 *bp++ = Base64Code[c2 & 0x3f]; 543 } 544 *bp = '\0'; 545 putline(buf, mci); 546 } 547 else 548 { 549 /* use quoted-printable encoding */ 550 int c1, c2; 551 int fromstate; 552 BITMAP256 badchars; 553 554 /* set up map of characters that must be mapped */ 555 clrbitmap(badchars); 556 for (c1 = 0x00; c1 < 0x20; c1++) 557 setbitn(c1, badchars); 558 clrbitn('\t', badchars); 559 for (c1 = 0x7f; c1 < 0x100; c1++) 560 setbitn(c1, badchars); 561 setbitn('=', badchars); 562 if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags)) 563 for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++) 564 setbitn(*p, badchars); 565 566 if (tTd(43, 36)) 567 sm_dprintf(" ...Content-Transfer-Encoding: quoted-printable\n"); 568 putline("Content-Transfer-Encoding: quoted-printable", mci); 569 (void) sm_snprintf(buf, sizeof buf, 570 "X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s", 571 MyHostName, e->e_id); 572 putline(buf, mci); 573 putline("", mci); 574 mci->mci_flags &= ~MCIF_INHEADER; 575 fromstate = 0; 576 c2 = '\n'; 577 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != 578 SM_IO_EOF) 579 { 580 if (c1 == '\n') 581 { 582 if (c2 == ' ' || c2 == '\t') 583 { 584 *bp++ = '='; 585 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 586 *bp++ = Base16Code[c2 & 0x0f]; 587 } 588 if (buf[0] == '.' && bp == &buf[1]) 589 { 590 buf[0] = '='; 591 *bp++ = Base16Code[('.' >> 4) & 0x0f]; 592 *bp++ = Base16Code['.' & 0x0f]; 593 } 594 *bp = '\0'; 595 putline(buf, mci); 596 linelen = fromstate = 0; 597 bp = buf; 598 c2 = c1; 599 continue; 600 } 601 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 602 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 603 { 604 *bp++ = '='; 605 *bp++ = '2'; 606 *bp++ = '0'; 607 linelen += 3; 608 } 609 else if (c2 == ' ' || c2 == '\t') 610 { 611 *bp++ = c2; 612 linelen++; 613 } 614 if (linelen > 72 && 615 (linelen > 75 || c1 != '.' || 616 (linelen > 73 && c2 == '.'))) 617 { 618 if (linelen > 73 && c2 == '.') 619 bp--; 620 else 621 c2 = '\n'; 622 *bp++ = '='; 623 *bp = '\0'; 624 putline(buf, mci); 625 linelen = fromstate = 0; 626 bp = buf; 627 if (c2 == '.') 628 { 629 *bp++ = '.'; 630 linelen++; 631 } 632 } 633 if (bitnset(bitidx(c1), badchars)) 634 { 635 *bp++ = '='; 636 *bp++ = Base16Code[(c1 >> 4) & 0x0f]; 637 *bp++ = Base16Code[c1 & 0x0f]; 638 linelen += 3; 639 } 640 else if (c1 != ' ' && c1 != '\t') 641 { 642 if (linelen < 4 && c1 == "From"[linelen]) 643 fromstate++; 644 *bp++ = c1; 645 linelen++; 646 } 647 c2 = c1; 648 } 649 650 /* output any saved character */ 651 if (c2 == ' ' || c2 == '\t') 652 { 653 *bp++ = '='; 654 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 655 *bp++ = Base16Code[c2 & 0x0f]; 656 linelen += 3; 657 } 658 659 if (linelen > 0 || boundaries[0] != NULL) 660 { 661 *bp = '\0'; 662 putline(buf, mci); 663 } 664 665 } 666 if (tTd(43, 3)) 667 sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]); 668 return bt; 669 } 670 /* 671 ** MIME_GETCHAR -- get a character for MIME processing 672 ** 673 ** Treats boundaries as SM_IO_EOF. 674 ** 675 ** Parameters: 676 ** fp -- the input file. 677 ** boundaries -- the current MIME boundaries. 678 ** btp -- if the return value is SM_IO_EOF, *btp is set to 679 ** the type of the boundary. 680 ** 681 ** Returns: 682 ** The next character in the input stream. 683 */ 684 685 static int 686 mime_getchar(fp, boundaries, btp) 687 register SM_FILE_T *fp; 688 char **boundaries; 689 int *btp; 690 { 691 int c; 692 static unsigned char *bp = NULL; 693 static int buflen = 0; 694 static bool atbol = true; /* at beginning of line */ 695 static int bt = MBT_SYNTAX; /* boundary type of next SM_IO_EOF */ 696 static unsigned char buf[128]; /* need not be a full line */ 697 int start = 0; /* indicates position of - in buffer */ 698 699 if (buflen == 1 && *bp == '\n') 700 { 701 /* last \n in buffer may be part of next MIME boundary */ 702 c = *bp; 703 } 704 else if (buflen > 0) 705 { 706 buflen--; 707 return *bp++; 708 } 709 else 710 c = sm_io_getc(fp, SM_TIME_DEFAULT); 711 bp = buf; 712 buflen = 0; 713 if (c == '\n') 714 { 715 /* might be part of a MIME boundary */ 716 *bp++ = c; 717 atbol = true; 718 c = sm_io_getc(fp, SM_TIME_DEFAULT); 719 if (c == '\n') 720 { 721 (void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c); 722 return c; 723 } 724 start = 1; 725 } 726 if (c != SM_IO_EOF) 727 *bp++ = c; 728 else 729 bt = MBT_FINAL; 730 if (atbol && c == '-') 731 { 732 /* check for a message boundary */ 733 c = sm_io_getc(fp, SM_TIME_DEFAULT); 734 if (c != '-') 735 { 736 if (c != SM_IO_EOF) 737 *bp++ = c; 738 else 739 bt = MBT_FINAL; 740 buflen = bp - buf - 1; 741 bp = buf; 742 return *bp++; 743 } 744 745 /* got "--", now check for rest of separator */ 746 *bp++ = '-'; 747 while (bp < &buf[sizeof buf - 2] && 748 (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF && 749 c != '\n') 750 { 751 *bp++ = c; 752 } 753 *bp = '\0'; /* XXX simply cut off? */ 754 bt = mimeboundary((char *) &buf[start], boundaries); 755 switch (bt) 756 { 757 case MBT_FINAL: 758 case MBT_INTERMED: 759 /* we have a message boundary */ 760 buflen = 0; 761 *btp = bt; 762 return SM_IO_EOF; 763 } 764 765 atbol = c == '\n'; 766 if (c != SM_IO_EOF) 767 *bp++ = c; 768 } 769 770 buflen = bp - buf - 1; 771 if (buflen < 0) 772 { 773 *btp = bt; 774 return SM_IO_EOF; 775 } 776 bp = buf; 777 return *bp++; 778 } 779 /* 780 ** MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF 781 ** 782 ** Parameters: 783 ** fp -- the input file. 784 ** boundaries -- the current MIME boundaries. 785 ** btp -- if the return value is SM_IO_EOF, *btp is set to 786 ** the type of the boundary. 787 ** 788 ** Returns: 789 ** The next character in the input stream. 790 */ 791 792 static int 793 mime_getchar_crlf(fp, boundaries, btp) 794 register SM_FILE_T *fp; 795 char **boundaries; 796 int *btp; 797 { 798 static bool sendlf = false; 799 int c; 800 801 if (sendlf) 802 { 803 sendlf = false; 804 return '\n'; 805 } 806 c = mime_getchar(fp, boundaries, btp); 807 if (c == '\n' && MapNLtoCRLF) 808 { 809 sendlf = true; 810 return '\r'; 811 } 812 return c; 813 } 814 /* 815 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 816 ** 817 ** Parameters: 818 ** line -- the input line. 819 ** boundaries -- the set of currently pending boundaries. 820 ** 821 ** Returns: 822 ** MBT_NOTSEP -- if this is not a separator line 823 ** MBT_INTERMED -- if this is an intermediate separator 824 ** MBT_FINAL -- if this is a final boundary 825 ** MBT_SYNTAX -- if this is a boundary for the wrong 826 ** enclosure -- i.e., a syntax error. 827 */ 828 829 static int 830 mimeboundary(line, boundaries) 831 register char *line; 832 char **boundaries; 833 { 834 int type = MBT_NOTSEP; 835 int i; 836 int savec; 837 838 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 839 return MBT_NOTSEP; 840 i = strlen(line); 841 if (i > 0 && line[i - 1] == '\n') 842 i--; 843 844 /* strip off trailing whitespace */ 845 while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t')) 846 i--; 847 savec = line[i]; 848 line[i] = '\0'; 849 850 if (tTd(43, 5)) 851 sm_dprintf("mimeboundary: line=\"%s\"... ", line); 852 853 /* check for this as an intermediate boundary */ 854 if (isboundary(&line[2], boundaries) >= 0) 855 type = MBT_INTERMED; 856 else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 857 { 858 /* check for a final boundary */ 859 line[i - 2] = '\0'; 860 if (isboundary(&line[2], boundaries) >= 0) 861 type = MBT_FINAL; 862 line[i - 2] = '-'; 863 } 864 865 line[i] = savec; 866 if (tTd(43, 5)) 867 sm_dprintf("%s\n", MimeBoundaryNames[type]); 868 return type; 869 } 870 /* 871 ** DEFCHARSET -- return default character set for message 872 ** 873 ** The first choice for character set is for the mailer 874 ** corresponding to the envelope sender. If neither that 875 ** nor the global configuration file has a default character 876 ** set defined, return "unknown-8bit" as recommended by 877 ** RFC 1428 section 3. 878 ** 879 ** Parameters: 880 ** e -- the envelope for this message. 881 ** 882 ** Returns: 883 ** The default character set for that mailer. 884 */ 885 886 char * 887 defcharset(e) 888 register ENVELOPE *e; 889 { 890 if (e != NULL && e->e_from.q_mailer != NULL && 891 e->e_from.q_mailer->m_defcharset != NULL) 892 return e->e_from.q_mailer->m_defcharset; 893 if (DefaultCharSet != NULL) 894 return DefaultCharSet; 895 return "unknown-8bit"; 896 } 897 /* 898 ** ISBOUNDARY -- is a given string a currently valid boundary? 899 ** 900 ** Parameters: 901 ** line -- the current input line. 902 ** boundaries -- the list of valid boundaries. 903 ** 904 ** Returns: 905 ** The index number in boundaries if the line is found. 906 ** -1 -- otherwise. 907 ** 908 */ 909 910 static int 911 isboundary(line, boundaries) 912 char *line; 913 char **boundaries; 914 { 915 register int i; 916 917 for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++) 918 { 919 if (strcmp(line, boundaries[i]) == 0) 920 return i; 921 } 922 return -1; 923 } 924 #endif /* MIME8TO7 */ 925 926 #if MIME7TO8 927 static int mime_fromqp __P((unsigned char *, unsigned char **, int, int)); 928 929 /* 930 ** MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format 931 ** 932 ** This is a hack. Supports translating the two 7-bit body-encodings 933 ** (quoted-printable and base64) to 8-bit coded bodies. 934 ** 935 ** There is not much point in supporting multipart here, as the UA 936 ** will be able to deal with encoded MIME bodies if it can parse MIME 937 ** multipart messages. 938 ** 939 ** Note also that we wont be called unless it is a text/plain MIME 940 ** message, encoded base64 or QP and mailer flag '9' has been defined 941 ** on mailer. 942 ** 943 ** Contributed by Marius Olaffson <marius@rhi.hi.is>. 944 ** 945 ** Parameters: 946 ** mci -- mailer connection information. 947 ** header -- the header for this body part. 948 ** e -- envelope. 949 ** 950 ** Returns: 951 ** none. 952 */ 953 954 static char index_64[128] = 955 { 956 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 957 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 958 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 959 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, 960 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 961 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 962 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 963 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 964 }; 965 966 # define CHAR64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)]) 967 968 void 969 mime7to8(mci, header, e) 970 register MCI *mci; 971 HDR *header; 972 register ENVELOPE *e; 973 { 974 register char *p; 975 char *cte; 976 char **pvp; 977 unsigned char *fbufp; 978 char buf[MAXLINE]; 979 unsigned char fbuf[MAXLINE + 1]; 980 char pvpbuf[MAXLINE]; 981 extern unsigned char MimeTokenTab[256]; 982 983 p = hvalue("Content-Transfer-Encoding", header); 984 if (p == NULL || 985 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 986 MimeTokenTab)) == NULL || 987 pvp[0] == NULL) 988 { 989 /* "can't happen" -- upper level should have caught this */ 990 syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p); 991 992 /* avoid bounce loops */ 993 e->e_flags |= EF_DONT_MIME; 994 995 /* cheap failsafe algorithm -- should work on text/plain */ 996 if (p != NULL) 997 { 998 (void) sm_snprintf(buf, sizeof buf, 999 "Content-Transfer-Encoding: %s", p); 1000 putline(buf, mci); 1001 } 1002 putline("", mci); 1003 mci->mci_flags &= ~MCIF_INHEADER; 1004 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 1005 != NULL) 1006 putline(buf, mci); 1007 return; 1008 } 1009 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 1010 cte = sm_rpool_strdup_x(e->e_rpool, buf); 1011 1012 mci->mci_flags |= MCIF_INHEADER; 1013 putline("Content-Transfer-Encoding: 8bit", mci); 1014 (void) sm_snprintf(buf, sizeof buf, 1015 "X-MIME-Autoconverted: from %.200s to 8bit by %s id %s", 1016 cte, MyHostName, e->e_id); 1017 putline(buf, mci); 1018 putline("", mci); 1019 mci->mci_flags &= ~MCIF_INHEADER; 1020 1021 /* 1022 ** Translate body encoding to 8-bit. Supports two types of 1023 ** encodings; "base64" and "quoted-printable". Assume qp if 1024 ** it is not base64. 1025 */ 1026 1027 if (sm_strcasecmp(cte, "base64") == 0) 1028 { 1029 int c1, c2, c3, c4; 1030 1031 fbufp = fbuf; 1032 while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) != 1033 SM_IO_EOF) 1034 { 1035 if (isascii(c1) && isspace(c1)) 1036 continue; 1037 1038 do 1039 { 1040 c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1041 } while (isascii(c2) && isspace(c2)); 1042 if (c2 == SM_IO_EOF) 1043 break; 1044 1045 do 1046 { 1047 c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1048 } while (isascii(c3) && isspace(c3)); 1049 if (c3 == SM_IO_EOF) 1050 break; 1051 1052 do 1053 { 1054 c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1055 } while (isascii(c4) && isspace(c4)); 1056 if (c4 == SM_IO_EOF) 1057 break; 1058 1059 if (c1 == '=' || c2 == '=') 1060 continue; 1061 c1 = CHAR64(c1); 1062 c2 = CHAR64(c2); 1063 1064 *fbufp = (c1 << 2) | ((c2 & 0x30) >> 4); 1065 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) 1066 { 1067 if (*--fbufp != '\n' || 1068 (fbufp > fbuf && *--fbufp != '\r')) 1069 fbufp++; 1070 putxline((char *) fbuf, fbufp - fbuf, 1071 mci, PXLF_MAPFROM); 1072 fbufp = fbuf; 1073 } 1074 if (c3 == '=') 1075 continue; 1076 c3 = CHAR64(c3); 1077 *fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2); 1078 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) 1079 { 1080 if (*--fbufp != '\n' || 1081 (fbufp > fbuf && *--fbufp != '\r')) 1082 fbufp++; 1083 putxline((char *) fbuf, fbufp - fbuf, 1084 mci, PXLF_MAPFROM); 1085 fbufp = fbuf; 1086 } 1087 if (c4 == '=') 1088 continue; 1089 c4 = CHAR64(c4); 1090 *fbufp = ((c3 & 0x03) << 6) | c4; 1091 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) 1092 { 1093 if (*--fbufp != '\n' || 1094 (fbufp > fbuf && *--fbufp != '\r')) 1095 fbufp++; 1096 putxline((char *) fbuf, fbufp - fbuf, 1097 mci, PXLF_MAPFROM); 1098 fbufp = fbuf; 1099 } 1100 } 1101 } 1102 else 1103 { 1104 /* quoted-printable */ 1105 fbufp = fbuf; 1106 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 1107 != NULL) 1108 { 1109 if (mime_fromqp((unsigned char *) buf, &fbufp, 0, 1110 &fbuf[MAXLINE] - fbufp) == 0) 1111 continue; 1112 1113 if (fbufp - fbuf > 0) 1114 putxline((char *) fbuf, fbufp - fbuf - 1, mci, 1115 PXLF_MAPFROM); 1116 fbufp = fbuf; 1117 } 1118 } 1119 1120 /* force out partial last line */ 1121 if (fbufp > fbuf) 1122 { 1123 *fbufp = '\0'; 1124 putxline((char *) fbuf, fbufp - fbuf, mci, PXLF_MAPFROM); 1125 } 1126 if (tTd(43, 3)) 1127 sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte); 1128 } 1129 /* 1130 ** The following is based on Borenstein's "codes.c" module, with simplifying 1131 ** changes as we do not deal with multipart, and to do the translation in-core, 1132 ** with an attempt to prevent overrun of output buffers. 1133 ** 1134 ** What is needed here are changes to defend this code better against 1135 ** bad encodings. Questionable to always return 0xFF for bad mappings. 1136 */ 1137 1138 static char index_hex[128] = 1139 { 1140 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1141 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1142 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1143 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, 1144 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1145 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1146 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1147 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 1148 }; 1149 1150 # define HEXCHAR(c) (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)]) 1151 1152 static int 1153 mime_fromqp(infile, outfile, state, maxlen) 1154 unsigned char *infile; 1155 unsigned char **outfile; 1156 int state; /* Decoding body (0) or header (1) */ 1157 int maxlen; /* Max # of chars allowed in outfile */ 1158 { 1159 int c1, c2; 1160 int nchar = 0; 1161 1162 if (maxlen < 0) 1163 return 0; 1164 1165 while ((c1 = *infile++) != '\0') 1166 { 1167 if (c1 == '=') 1168 { 1169 if ((c1 = *infile++) == 0) 1170 break; 1171 1172 if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1) 1173 { 1174 /* ignore it */ 1175 if (state == 0) 1176 return 0; 1177 } 1178 else 1179 { 1180 do 1181 { 1182 if ((c2 = *infile++) == '\0') 1183 { 1184 c2 = -1; 1185 break; 1186 } 1187 } while ((c2 = HEXCHAR(c2)) == -1); 1188 1189 if (c2 == -1 || ++nchar > maxlen) 1190 break; 1191 1192 *(*outfile)++ = c1 << 4 | c2; 1193 } 1194 } 1195 else 1196 { 1197 if (state == 1 && c1 == '_') 1198 c1 = ' '; 1199 1200 if (++nchar > maxlen) 1201 break; 1202 1203 *(*outfile)++ = c1; 1204 1205 if (c1 == '\n') 1206 break; 1207 } 1208 } 1209 *(*outfile)++ = '\0'; 1210 return 1; 1211 } 1212 #endif /* MIME7TO8 */ 1213