1 /* 2 * Copyright (c) 1998-2003 Sendmail, Inc. and its suppliers. 3 * All rights reserved. 4 * Copyright (c) 1994, 1996-1997 Eric P. Allman. All rights reserved. 5 * Copyright (c) 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * By using this file, you agree to the terms and conditions set 9 * forth in the LICENSE file which can be found at the top level of 10 * the sendmail distribution. 11 * 12 */ 13 14 #include <sendmail.h> 15 #include <string.h> 16 17 SM_RCSID("@(#)$Id: mime.c,v 8.136 2004/03/22 18:21:34 ca Exp $") 18 19 /* 20 ** MIME support. 21 ** 22 ** I am indebted to John Beck of Hewlett-Packard, who contributed 23 ** his code to me for inclusion. As it turns out, I did not use 24 ** his code since he used a "minimum change" approach that used 25 ** several temp files, and I wanted a "minimum impact" approach 26 ** that would avoid copying. However, looking over his code 27 ** helped me cement my understanding of the problem. 28 ** 29 ** I also looked at, but did not directly use, Nathaniel 30 ** Borenstein's "code.c" module. Again, it functioned as 31 ** a file-to-file translator, which did not fit within my 32 ** design bounds, but it was a useful base for understanding 33 ** the problem. 34 */ 35 36 /* use "old" mime 7 to 8 algorithm by default */ 37 #ifndef MIME7TO8_OLD 38 # define MIME7TO8_OLD 1 39 #endif /* ! MIME7TO8_OLD */ 40 41 #if MIME8TO7 42 static int isboundary __P((char *, char **)); 43 static int mimeboundary __P((char *, char **)); 44 static int mime_getchar __P((SM_FILE_T *, char **, int *)); 45 static int mime_getchar_crlf __P((SM_FILE_T *, char **, int *)); 46 47 /* character set for hex and base64 encoding */ 48 static char Base16Code[] = "0123456789ABCDEF"; 49 static char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 50 51 /* types of MIME boundaries */ 52 # define MBT_SYNTAX 0 /* syntax error */ 53 # define MBT_NOTSEP 1 /* not a boundary */ 54 # define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 55 # define MBT_FINAL 3 /* final boundary (trailing -- included) */ 56 57 static char *MimeBoundaryNames[] = 58 { 59 "SYNTAX", "NOTSEP", "INTERMED", "FINAL" 60 }; 61 62 static bool MapNLtoCRLF; 63 64 /* 65 ** MIME8TO7 -- output 8 bit body in 7 bit format 66 ** 67 ** The header has already been output -- this has to do the 68 ** 8 to 7 bit conversion. It would be easy if we didn't have 69 ** to deal with nested formats (multipart/xxx and message/rfc822). 70 ** 71 ** We won't be called if we don't have to do a conversion, and 72 ** appropriate MIME-Version: and Content-Type: fields have been 73 ** output. Any Content-Transfer-Encoding: field has not been 74 ** output, and we can add it here. 75 ** 76 ** Parameters: 77 ** mci -- mailer connection information. 78 ** header -- the header for this body part. 79 ** e -- envelope. 80 ** boundaries -- the currently pending message boundaries. 81 ** NULL if we are processing the outer portion. 82 ** flags -- to tweak processing. 83 ** 84 ** Returns: 85 ** An indicator of what terminated the message part: 86 ** MBT_FINAL -- the final boundary 87 ** MBT_INTERMED -- an intermediate boundary 88 ** MBT_NOTSEP -- an end of file 89 */ 90 91 struct args 92 { 93 char *a_field; /* name of field */ 94 char *a_value; /* value of that field */ 95 }; 96 97 int 98 mime8to7(mci, header, e, boundaries, flags) 99 register MCI *mci; 100 HDR *header; 101 register ENVELOPE *e; 102 char **boundaries; 103 int flags; 104 { 105 register char *p; 106 int linelen; 107 int bt; 108 off_t offset; 109 size_t sectionsize, sectionhighbits; 110 int i; 111 char *type; 112 char *subtype; 113 char *cte; 114 char **pvp; 115 int argc = 0; 116 char *bp; 117 bool use_qp = false; 118 struct args argv[MAXMIMEARGS]; 119 char bbuf[128]; 120 char buf[MAXLINE]; 121 char pvpbuf[MAXLINE]; 122 extern unsigned char MimeTokenTab[256]; 123 124 if (tTd(43, 1)) 125 { 126 sm_dprintf("mime8to7: flags = %x, boundaries =", flags); 127 if (boundaries[0] == NULL) 128 sm_dprintf(" <none>"); 129 else 130 { 131 for (i = 0; boundaries[i] != NULL; i++) 132 sm_dprintf(" %s", boundaries[i]); 133 } 134 sm_dprintf("\n"); 135 } 136 MapNLtoCRLF = true; 137 p = hvalue("Content-Transfer-Encoding", header); 138 if (p == NULL || 139 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 140 MimeTokenTab, false)) == NULL || 141 pvp[0] == NULL) 142 { 143 cte = NULL; 144 } 145 else 146 { 147 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 148 cte = sm_rpool_strdup_x(e->e_rpool, buf); 149 } 150 151 type = subtype = NULL; 152 p = hvalue("Content-Type", header); 153 if (p == NULL) 154 { 155 if (bitset(M87F_DIGEST, flags)) 156 p = "message/rfc822"; 157 else 158 p = "text/plain"; 159 } 160 if (p != NULL && 161 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 162 MimeTokenTab, false)) != NULL && 163 pvp[0] != NULL) 164 { 165 if (tTd(43, 40)) 166 { 167 for (i = 0; pvp[i] != NULL; i++) 168 sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]); 169 } 170 type = *pvp++; 171 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 172 *++pvp != NULL) 173 { 174 subtype = *pvp++; 175 } 176 177 /* break out parameters */ 178 while (*pvp != NULL && argc < MAXMIMEARGS) 179 { 180 /* skip to semicolon separator */ 181 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 182 pvp++; 183 if (*pvp++ == NULL || *pvp == NULL) 184 break; 185 186 /* complain about empty values */ 187 if (strcmp(*pvp, ";") == 0) 188 { 189 usrerr("mime8to7: Empty parameter in Content-Type header"); 190 191 /* avoid bounce loops */ 192 e->e_flags |= EF_DONT_MIME; 193 continue; 194 } 195 196 /* extract field name */ 197 argv[argc].a_field = *pvp++; 198 199 /* see if there is a value */ 200 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 201 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 202 { 203 argv[argc].a_value = *pvp; 204 argc++; 205 } 206 } 207 } 208 209 /* check for disaster cases */ 210 if (type == NULL) 211 type = "-none-"; 212 if (subtype == NULL) 213 subtype = "-none-"; 214 215 /* don't propogate some flags more than one level into the message */ 216 flags &= ~M87F_DIGEST; 217 218 /* 219 ** Check for cases that can not be encoded. 220 ** 221 ** For example, you can't encode certain kinds of types 222 ** or already-encoded messages. If we find this case, 223 ** just copy it through. 224 */ 225 226 (void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype); 227 if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e'))) 228 flags |= M87F_NO8BIT; 229 230 # ifdef USE_B_CLASS 231 if (wordinclass(buf, 'b') || wordinclass(type, 'b')) 232 MapNLtoCRLF = false; 233 # endif /* USE_B_CLASS */ 234 if (wordinclass(buf, 'q') || wordinclass(type, 'q')) 235 use_qp = true; 236 237 /* 238 ** Multipart requires special processing. 239 ** 240 ** Do a recursive descent into the message. 241 */ 242 243 if (sm_strcasecmp(type, "multipart") == 0 && 244 (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags))) 245 { 246 247 if (sm_strcasecmp(subtype, "digest") == 0) 248 flags |= M87F_DIGEST; 249 250 for (i = 0; i < argc; i++) 251 { 252 if (sm_strcasecmp(argv[i].a_field, "boundary") == 0) 253 break; 254 } 255 if (i >= argc || argv[i].a_value == NULL) 256 { 257 usrerr("mime8to7: Content-Type: \"%s\": %s boundary", 258 i >= argc ? "missing" : "bogus", p); 259 p = "---"; 260 261 /* avoid bounce loops */ 262 e->e_flags |= EF_DONT_MIME; 263 } 264 else 265 { 266 p = argv[i].a_value; 267 stripquotes(p); 268 } 269 if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf) 270 { 271 usrerr("mime8to7: multipart boundary \"%s\" too long", 272 p); 273 274 /* avoid bounce loops */ 275 e->e_flags |= EF_DONT_MIME; 276 } 277 278 if (tTd(43, 1)) 279 sm_dprintf("mime8to7: multipart boundary \"%s\"\n", 280 bbuf); 281 for (i = 0; i < MAXMIMENESTING; i++) 282 { 283 if (boundaries[i] == NULL) 284 break; 285 } 286 if (i >= MAXMIMENESTING) 287 { 288 usrerr("mime8to7: multipart nesting boundary too deep"); 289 290 /* avoid bounce loops */ 291 e->e_flags |= EF_DONT_MIME; 292 } 293 else 294 { 295 boundaries[i] = bbuf; 296 boundaries[i + 1] = NULL; 297 } 298 mci->mci_flags |= MCIF_INMIME; 299 300 /* skip the early "comment" prologue */ 301 putline("", mci); 302 mci->mci_flags &= ~MCIF_INHEADER; 303 bt = MBT_FINAL; 304 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 305 != NULL) 306 { 307 bt = mimeboundary(buf, boundaries); 308 if (bt != MBT_NOTSEP) 309 break; 310 putxline(buf, strlen(buf), mci, 311 PXLF_MAPFROM|PXLF_STRIP8BIT); 312 if (tTd(43, 99)) 313 sm_dprintf(" ...%s", buf); 314 } 315 if (sm_io_eof(e->e_dfp)) 316 bt = MBT_FINAL; 317 while (bt != MBT_FINAL) 318 { 319 auto HDR *hdr = NULL; 320 321 (void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf); 322 putline(buf, mci); 323 if (tTd(43, 35)) 324 sm_dprintf(" ...%s\n", buf); 325 collect(e->e_dfp, false, &hdr, e, false); 326 if (tTd(43, 101)) 327 putline("+++after collect", mci); 328 putheader(mci, hdr, e, flags); 329 if (tTd(43, 101)) 330 putline("+++after putheader", mci); 331 bt = mime8to7(mci, hdr, e, boundaries, flags); 332 } 333 (void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--"); 334 putline(buf, mci); 335 if (tTd(43, 35)) 336 sm_dprintf(" ...%s\n", buf); 337 boundaries[i] = NULL; 338 mci->mci_flags &= ~MCIF_INMIME; 339 340 /* skip the late "comment" epilogue */ 341 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 342 != NULL) 343 { 344 bt = mimeboundary(buf, boundaries); 345 if (bt != MBT_NOTSEP) 346 break; 347 putxline(buf, strlen(buf), mci, 348 PXLF_MAPFROM|PXLF_STRIP8BIT); 349 if (tTd(43, 99)) 350 sm_dprintf(" ...%s", buf); 351 } 352 if (sm_io_eof(e->e_dfp)) 353 bt = MBT_FINAL; 354 if (tTd(43, 3)) 355 sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n", 356 MimeBoundaryNames[bt]); 357 return bt; 358 } 359 360 /* 361 ** Message/xxx types -- recurse exactly once. 362 ** 363 ** Class 's' is predefined to have "rfc822" only. 364 */ 365 366 if (sm_strcasecmp(type, "message") == 0) 367 { 368 if (!wordinclass(subtype, 's')) 369 { 370 flags |= M87F_NO8BIT; 371 } 372 else 373 { 374 auto HDR *hdr = NULL; 375 376 putline("", mci); 377 378 mci->mci_flags |= MCIF_INMIME; 379 collect(e->e_dfp, false, &hdr, e, false); 380 if (tTd(43, 101)) 381 putline("+++after collect", mci); 382 putheader(mci, hdr, e, flags); 383 if (tTd(43, 101)) 384 putline("+++after putheader", mci); 385 if (hvalue("MIME-Version", hdr) == NULL && 386 !bitset(M87F_NO8TO7, flags)) 387 putline("MIME-Version: 1.0", mci); 388 bt = mime8to7(mci, hdr, e, boundaries, flags); 389 mci->mci_flags &= ~MCIF_INMIME; 390 return bt; 391 } 392 } 393 394 /* 395 ** Non-compound body type 396 ** 397 ** Compute the ratio of seven to eight bit characters; 398 ** use that as a heuristic to decide how to do the 399 ** encoding. 400 */ 401 402 sectionsize = sectionhighbits = 0; 403 if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags)) 404 { 405 /* remember where we were */ 406 offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT); 407 if (offset == -1) 408 syserr("mime8to7: cannot sm_io_tell on %cf%s", 409 DATAFL_LETTER, e->e_id); 410 411 /* do a scan of this body type to count character types */ 412 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 413 != NULL) 414 { 415 if (mimeboundary(buf, boundaries) != MBT_NOTSEP) 416 break; 417 for (p = buf; *p != '\0'; p++) 418 { 419 /* count bytes with the high bit set */ 420 sectionsize++; 421 if (bitset(0200, *p)) 422 sectionhighbits++; 423 } 424 425 /* 426 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 427 ** assume base64. This heuristic avoids double-reading 428 ** large graphics or video files. 429 */ 430 431 if (sectionsize >= 4096 && 432 sectionhighbits > sectionsize / 4) 433 break; 434 } 435 436 /* return to the original offset for processing */ 437 /* XXX use relative seeks to handle >31 bit file sizes? */ 438 if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0) 439 syserr("mime8to7: cannot sm_io_fseek on %cf%s", 440 DATAFL_LETTER, e->e_id); 441 else 442 sm_io_clearerr(e->e_dfp); 443 } 444 445 /* 446 ** Heuristically determine encoding method. 447 ** If more than 1/8 of the total characters have the 448 ** eighth bit set, use base64; else use quoted-printable. 449 ** However, only encode binary encoded data as base64, 450 ** since otherwise the NL=>CRLF mapping will be a problem. 451 */ 452 453 if (tTd(43, 8)) 454 { 455 sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n", 456 (long) sectionhighbits, (long) sectionsize, 457 cte == NULL ? "[none]" : cte, 458 type == NULL ? "[none]" : type, 459 subtype == NULL ? "[none]" : subtype); 460 } 461 if (cte != NULL && sm_strcasecmp(cte, "binary") == 0) 462 sectionsize = sectionhighbits; 463 linelen = 0; 464 bp = buf; 465 if (sectionhighbits == 0) 466 { 467 /* no encoding necessary */ 468 if (cte != NULL && 469 bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME, 470 mci->mci_flags) && 471 !bitset(M87F_NO8TO7, flags)) 472 { 473 /* 474 ** Skip _unless_ in MIME mode and potentially 475 ** converting from 8 bit to 7 bit MIME. See 476 ** putheader() for the counterpart where the 477 ** CTE header is skipped in the opposite 478 ** situation. 479 */ 480 481 (void) sm_snprintf(buf, sizeof buf, 482 "Content-Transfer-Encoding: %.200s", cte); 483 putline(buf, mci); 484 if (tTd(43, 36)) 485 sm_dprintf(" ...%s\n", buf); 486 } 487 putline("", mci); 488 mci->mci_flags &= ~MCIF_INHEADER; 489 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 490 != NULL) 491 { 492 bt = mimeboundary(buf, boundaries); 493 if (bt != MBT_NOTSEP) 494 break; 495 putline(buf, mci); 496 } 497 if (sm_io_eof(e->e_dfp)) 498 bt = MBT_FINAL; 499 } 500 else if (!MapNLtoCRLF || 501 (sectionsize / 8 < sectionhighbits && !use_qp)) 502 { 503 /* use base64 encoding */ 504 int c1, c2; 505 506 if (tTd(43, 36)) 507 sm_dprintf(" ...Content-Transfer-Encoding: base64\n"); 508 putline("Content-Transfer-Encoding: base64", mci); 509 (void) sm_snprintf(buf, sizeof buf, 510 "X-MIME-Autoconverted: from 8bit to base64 by %s id %s", 511 MyHostName, e->e_id); 512 putline(buf, mci); 513 putline("", mci); 514 mci->mci_flags &= ~MCIF_INHEADER; 515 while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) != 516 SM_IO_EOF) 517 { 518 if (linelen > 71) 519 { 520 *bp = '\0'; 521 putline(buf, mci); 522 linelen = 0; 523 bp = buf; 524 } 525 linelen += 4; 526 *bp++ = Base64Code[(c1 >> 2)]; 527 c1 = (c1 & 0x03) << 4; 528 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 529 if (c2 == SM_IO_EOF) 530 { 531 *bp++ = Base64Code[c1]; 532 *bp++ = '='; 533 *bp++ = '='; 534 break; 535 } 536 c1 |= (c2 >> 4) & 0x0f; 537 *bp++ = Base64Code[c1]; 538 c1 = (c2 & 0x0f) << 2; 539 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 540 if (c2 == SM_IO_EOF) 541 { 542 *bp++ = Base64Code[c1]; 543 *bp++ = '='; 544 break; 545 } 546 c1 |= (c2 >> 6) & 0x03; 547 *bp++ = Base64Code[c1]; 548 *bp++ = Base64Code[c2 & 0x3f]; 549 } 550 *bp = '\0'; 551 putline(buf, mci); 552 } 553 else 554 { 555 /* use quoted-printable encoding */ 556 int c1, c2; 557 int fromstate; 558 BITMAP256 badchars; 559 560 /* set up map of characters that must be mapped */ 561 clrbitmap(badchars); 562 for (c1 = 0x00; c1 < 0x20; c1++) 563 setbitn(c1, badchars); 564 clrbitn('\t', badchars); 565 for (c1 = 0x7f; c1 < 0x100; c1++) 566 setbitn(c1, badchars); 567 setbitn('=', badchars); 568 if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags)) 569 for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++) 570 setbitn(*p, badchars); 571 572 if (tTd(43, 36)) 573 sm_dprintf(" ...Content-Transfer-Encoding: quoted-printable\n"); 574 putline("Content-Transfer-Encoding: quoted-printable", mci); 575 (void) sm_snprintf(buf, sizeof buf, 576 "X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s", 577 MyHostName, e->e_id); 578 putline(buf, mci); 579 putline("", mci); 580 mci->mci_flags &= ~MCIF_INHEADER; 581 fromstate = 0; 582 c2 = '\n'; 583 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != 584 SM_IO_EOF) 585 { 586 if (c1 == '\n') 587 { 588 if (c2 == ' ' || c2 == '\t') 589 { 590 *bp++ = '='; 591 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 592 *bp++ = Base16Code[c2 & 0x0f]; 593 } 594 if (buf[0] == '.' && bp == &buf[1]) 595 { 596 buf[0] = '='; 597 *bp++ = Base16Code[('.' >> 4) & 0x0f]; 598 *bp++ = Base16Code['.' & 0x0f]; 599 } 600 *bp = '\0'; 601 putline(buf, mci); 602 linelen = fromstate = 0; 603 bp = buf; 604 c2 = c1; 605 continue; 606 } 607 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 608 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 609 { 610 *bp++ = '='; 611 *bp++ = '2'; 612 *bp++ = '0'; 613 linelen += 3; 614 } 615 else if (c2 == ' ' || c2 == '\t') 616 { 617 *bp++ = c2; 618 linelen++; 619 } 620 if (linelen > 72 && 621 (linelen > 75 || c1 != '.' || 622 (linelen > 73 && c2 == '.'))) 623 { 624 if (linelen > 73 && c2 == '.') 625 bp--; 626 else 627 c2 = '\n'; 628 *bp++ = '='; 629 *bp = '\0'; 630 putline(buf, mci); 631 linelen = fromstate = 0; 632 bp = buf; 633 if (c2 == '.') 634 { 635 *bp++ = '.'; 636 linelen++; 637 } 638 } 639 if (bitnset(bitidx(c1), badchars)) 640 { 641 *bp++ = '='; 642 *bp++ = Base16Code[(c1 >> 4) & 0x0f]; 643 *bp++ = Base16Code[c1 & 0x0f]; 644 linelen += 3; 645 } 646 else if (c1 != ' ' && c1 != '\t') 647 { 648 if (linelen < 4 && c1 == "From"[linelen]) 649 fromstate++; 650 *bp++ = c1; 651 linelen++; 652 } 653 c2 = c1; 654 } 655 656 /* output any saved character */ 657 if (c2 == ' ' || c2 == '\t') 658 { 659 *bp++ = '='; 660 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 661 *bp++ = Base16Code[c2 & 0x0f]; 662 linelen += 3; 663 } 664 665 if (linelen > 0 || boundaries[0] != NULL) 666 { 667 *bp = '\0'; 668 putline(buf, mci); 669 } 670 671 } 672 if (tTd(43, 3)) 673 sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]); 674 return bt; 675 } 676 /* 677 ** MIME_GETCHAR -- get a character for MIME processing 678 ** 679 ** Treats boundaries as SM_IO_EOF. 680 ** 681 ** Parameters: 682 ** fp -- the input file. 683 ** boundaries -- the current MIME boundaries. 684 ** btp -- if the return value is SM_IO_EOF, *btp is set to 685 ** the type of the boundary. 686 ** 687 ** Returns: 688 ** The next character in the input stream. 689 */ 690 691 static int 692 mime_getchar(fp, boundaries, btp) 693 register SM_FILE_T *fp; 694 char **boundaries; 695 int *btp; 696 { 697 int c; 698 static unsigned char *bp = NULL; 699 static int buflen = 0; 700 static bool atbol = true; /* at beginning of line */ 701 static int bt = MBT_SYNTAX; /* boundary type of next SM_IO_EOF */ 702 static unsigned char buf[128]; /* need not be a full line */ 703 int start = 0; /* indicates position of - in buffer */ 704 705 if (buflen == 1 && *bp == '\n') 706 { 707 /* last \n in buffer may be part of next MIME boundary */ 708 c = *bp; 709 } 710 else if (buflen > 0) 711 { 712 buflen--; 713 return *bp++; 714 } 715 else 716 c = sm_io_getc(fp, SM_TIME_DEFAULT); 717 bp = buf; 718 buflen = 0; 719 if (c == '\n') 720 { 721 /* might be part of a MIME boundary */ 722 *bp++ = c; 723 atbol = true; 724 c = sm_io_getc(fp, SM_TIME_DEFAULT); 725 if (c == '\n') 726 { 727 (void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c); 728 return c; 729 } 730 start = 1; 731 } 732 if (c != SM_IO_EOF) 733 *bp++ = c; 734 else 735 bt = MBT_FINAL; 736 if (atbol && c == '-') 737 { 738 /* check for a message boundary */ 739 c = sm_io_getc(fp, SM_TIME_DEFAULT); 740 if (c != '-') 741 { 742 if (c != SM_IO_EOF) 743 *bp++ = c; 744 else 745 bt = MBT_FINAL; 746 buflen = bp - buf - 1; 747 bp = buf; 748 return *bp++; 749 } 750 751 /* got "--", now check for rest of separator */ 752 *bp++ = '-'; 753 while (bp < &buf[sizeof buf - 2] && 754 (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF && 755 c != '\n') 756 { 757 *bp++ = c; 758 } 759 *bp = '\0'; /* XXX simply cut off? */ 760 bt = mimeboundary((char *) &buf[start], boundaries); 761 switch (bt) 762 { 763 case MBT_FINAL: 764 case MBT_INTERMED: 765 /* we have a message boundary */ 766 buflen = 0; 767 *btp = bt; 768 return SM_IO_EOF; 769 } 770 771 if (bp < &buf[sizeof buf - 2] && c != SM_IO_EOF) 772 *bp++ = c; 773 } 774 775 atbol = c == '\n'; 776 buflen = bp - buf - 1; 777 if (buflen < 0) 778 { 779 *btp = bt; 780 return SM_IO_EOF; 781 } 782 bp = buf; 783 return *bp++; 784 } 785 /* 786 ** MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF 787 ** 788 ** Parameters: 789 ** fp -- the input file. 790 ** boundaries -- the current MIME boundaries. 791 ** btp -- if the return value is SM_IO_EOF, *btp is set to 792 ** the type of the boundary. 793 ** 794 ** Returns: 795 ** The next character in the input stream. 796 */ 797 798 static int 799 mime_getchar_crlf(fp, boundaries, btp) 800 register SM_FILE_T *fp; 801 char **boundaries; 802 int *btp; 803 { 804 static bool sendlf = false; 805 int c; 806 807 if (sendlf) 808 { 809 sendlf = false; 810 return '\n'; 811 } 812 c = mime_getchar(fp, boundaries, btp); 813 if (c == '\n' && MapNLtoCRLF) 814 { 815 sendlf = true; 816 return '\r'; 817 } 818 return c; 819 } 820 /* 821 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 822 ** 823 ** Parameters: 824 ** line -- the input line. 825 ** boundaries -- the set of currently pending boundaries. 826 ** 827 ** Returns: 828 ** MBT_NOTSEP -- if this is not a separator line 829 ** MBT_INTERMED -- if this is an intermediate separator 830 ** MBT_FINAL -- if this is a final boundary 831 ** MBT_SYNTAX -- if this is a boundary for the wrong 832 ** enclosure -- i.e., a syntax error. 833 */ 834 835 static int 836 mimeboundary(line, boundaries) 837 register char *line; 838 char **boundaries; 839 { 840 int type = MBT_NOTSEP; 841 int i; 842 int savec; 843 844 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 845 return MBT_NOTSEP; 846 i = strlen(line); 847 if (i > 0 && line[i - 1] == '\n') 848 i--; 849 850 /* strip off trailing whitespace */ 851 while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t')) 852 i--; 853 savec = line[i]; 854 line[i] = '\0'; 855 856 if (tTd(43, 5)) 857 sm_dprintf("mimeboundary: line=\"%s\"... ", line); 858 859 /* check for this as an intermediate boundary */ 860 if (isboundary(&line[2], boundaries) >= 0) 861 type = MBT_INTERMED; 862 else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 863 { 864 /* check for a final boundary */ 865 line[i - 2] = '\0'; 866 if (isboundary(&line[2], boundaries) >= 0) 867 type = MBT_FINAL; 868 line[i - 2] = '-'; 869 } 870 871 line[i] = savec; 872 if (tTd(43, 5)) 873 sm_dprintf("%s\n", MimeBoundaryNames[type]); 874 return type; 875 } 876 /* 877 ** DEFCHARSET -- return default character set for message 878 ** 879 ** The first choice for character set is for the mailer 880 ** corresponding to the envelope sender. If neither that 881 ** nor the global configuration file has a default character 882 ** set defined, return "unknown-8bit" as recommended by 883 ** RFC 1428 section 3. 884 ** 885 ** Parameters: 886 ** e -- the envelope for this message. 887 ** 888 ** Returns: 889 ** The default character set for that mailer. 890 */ 891 892 char * 893 defcharset(e) 894 register ENVELOPE *e; 895 { 896 if (e != NULL && e->e_from.q_mailer != NULL && 897 e->e_from.q_mailer->m_defcharset != NULL) 898 return e->e_from.q_mailer->m_defcharset; 899 if (DefaultCharSet != NULL) 900 return DefaultCharSet; 901 return "unknown-8bit"; 902 } 903 /* 904 ** ISBOUNDARY -- is a given string a currently valid boundary? 905 ** 906 ** Parameters: 907 ** line -- the current input line. 908 ** boundaries -- the list of valid boundaries. 909 ** 910 ** Returns: 911 ** The index number in boundaries if the line is found. 912 ** -1 -- otherwise. 913 ** 914 */ 915 916 static int 917 isboundary(line, boundaries) 918 char *line; 919 char **boundaries; 920 { 921 register int i; 922 923 for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++) 924 { 925 if (strcmp(line, boundaries[i]) == 0) 926 return i; 927 } 928 return -1; 929 } 930 #endif /* MIME8TO7 */ 931 932 #if MIME7TO8 933 static int mime_fromqp __P((unsigned char *, unsigned char **, int)); 934 935 /* 936 ** MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format 937 ** 938 ** This is a hack. Supports translating the two 7-bit body-encodings 939 ** (quoted-printable and base64) to 8-bit coded bodies. 940 ** 941 ** There is not much point in supporting multipart here, as the UA 942 ** will be able to deal with encoded MIME bodies if it can parse MIME 943 ** multipart messages. 944 ** 945 ** Note also that we won't be called unless it is a text/plain MIME 946 ** message, encoded base64 or QP and mailer flag '9' has been defined 947 ** on mailer. 948 ** 949 ** Contributed by Marius Olaffson <marius@rhi.hi.is>. 950 ** 951 ** Parameters: 952 ** mci -- mailer connection information. 953 ** header -- the header for this body part. 954 ** e -- envelope. 955 ** 956 ** Returns: 957 ** none. 958 */ 959 960 static char index_64[128] = 961 { 962 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 963 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 964 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 965 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, 966 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 967 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 968 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 969 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 970 }; 971 972 # define CHAR64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)]) 973 974 void 975 mime7to8(mci, header, e) 976 register MCI *mci; 977 HDR *header; 978 register ENVELOPE *e; 979 { 980 int pxflags; 981 register char *p; 982 char *cte; 983 char **pvp; 984 unsigned char *fbufp; 985 char buf[MAXLINE]; 986 unsigned char fbuf[MAXLINE + 1]; 987 char pvpbuf[MAXLINE]; 988 extern unsigned char MimeTokenTab[256]; 989 990 p = hvalue("Content-Transfer-Encoding", header); 991 if (p == NULL || 992 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 993 MimeTokenTab, false)) == NULL || 994 pvp[0] == NULL) 995 { 996 /* "can't happen" -- upper level should have caught this */ 997 syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p); 998 999 /* avoid bounce loops */ 1000 e->e_flags |= EF_DONT_MIME; 1001 1002 /* cheap failsafe algorithm -- should work on text/plain */ 1003 if (p != NULL) 1004 { 1005 (void) sm_snprintf(buf, sizeof buf, 1006 "Content-Transfer-Encoding: %s", p); 1007 putline(buf, mci); 1008 } 1009 putline("", mci); 1010 mci->mci_flags &= ~MCIF_INHEADER; 1011 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 1012 != NULL) 1013 putline(buf, mci); 1014 return; 1015 } 1016 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 1017 cte = sm_rpool_strdup_x(e->e_rpool, buf); 1018 1019 mci->mci_flags |= MCIF_INHEADER; 1020 putline("Content-Transfer-Encoding: 8bit", mci); 1021 (void) sm_snprintf(buf, sizeof buf, 1022 "X-MIME-Autoconverted: from %.200s to 8bit by %s id %s", 1023 cte, MyHostName, e->e_id); 1024 putline(buf, mci); 1025 putline("", mci); 1026 mci->mci_flags &= ~MCIF_INHEADER; 1027 1028 /* 1029 ** Translate body encoding to 8-bit. Supports two types of 1030 ** encodings; "base64" and "quoted-printable". Assume qp if 1031 ** it is not base64. 1032 */ 1033 1034 pxflags = PXLF_MAPFROM; 1035 if (sm_strcasecmp(cte, "base64") == 0) 1036 { 1037 int c1, c2, c3, c4; 1038 1039 fbufp = fbuf; 1040 while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) != 1041 SM_IO_EOF) 1042 { 1043 if (isascii(c1) && isspace(c1)) 1044 continue; 1045 1046 do 1047 { 1048 c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1049 } while (isascii(c2) && isspace(c2)); 1050 if (c2 == SM_IO_EOF) 1051 break; 1052 1053 do 1054 { 1055 c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1056 } while (isascii(c3) && isspace(c3)); 1057 if (c3 == SM_IO_EOF) 1058 break; 1059 1060 do 1061 { 1062 c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1063 } while (isascii(c4) && isspace(c4)); 1064 if (c4 == SM_IO_EOF) 1065 break; 1066 1067 if (c1 == '=' || c2 == '=') 1068 continue; 1069 c1 = CHAR64(c1); 1070 c2 = CHAR64(c2); 1071 1072 #if MIME7TO8_OLD 1073 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \ 1074 ++fbufp; 1075 #else /* MIME7TO8_OLD */ 1076 #define CHK_EOL if (*--fbufp != '\n' || (fbufp > fbuf && *--fbufp != '\r')) \ 1077 { \ 1078 ++fbufp; \ 1079 pxflags |= PXLF_NOADDEOL; \ 1080 } 1081 #endif /* MIME7TO8_OLD */ 1082 1083 #define PUTLINE64 \ 1084 do \ 1085 { \ 1086 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) \ 1087 { \ 1088 CHK_EOL; \ 1089 putxline((char *) fbuf, fbufp - fbuf, mci, pxflags); \ 1090 pxflags &= ~PXLF_NOADDEOL; \ 1091 fbufp = fbuf; \ 1092 } \ 1093 } while (0) 1094 1095 *fbufp = (c1 << 2) | ((c2 & 0x30) >> 4); 1096 PUTLINE64; 1097 if (c3 == '=') 1098 continue; 1099 c3 = CHAR64(c3); 1100 *fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2); 1101 PUTLINE64; 1102 if (c4 == '=') 1103 continue; 1104 c4 = CHAR64(c4); 1105 *fbufp = ((c3 & 0x03) << 6) | c4; 1106 PUTLINE64; 1107 } 1108 } 1109 else 1110 { 1111 int off; 1112 1113 /* quoted-printable */ 1114 pxflags |= PXLF_NOADDEOL; 1115 fbufp = fbuf; 1116 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, 1117 sizeof buf) != NULL) 1118 { 1119 off = mime_fromqp((unsigned char *) buf, &fbufp, 1120 &fbuf[MAXLINE] - fbufp); 1121 again: 1122 if (off < -1) 1123 continue; 1124 1125 if (fbufp - fbuf > 0) 1126 putxline((char *) fbuf, fbufp - fbuf - 1, mci, 1127 pxflags); 1128 fbufp = fbuf; 1129 if (off >= 0 && buf[off] != '\0') 1130 { 1131 off = mime_fromqp((unsigned char *) (buf + off), 1132 &fbufp, 1133 &fbuf[MAXLINE] - fbufp); 1134 goto again; 1135 } 1136 } 1137 } 1138 1139 /* force out partial last line */ 1140 if (fbufp > fbuf) 1141 { 1142 *fbufp = '\0'; 1143 putxline((char *) fbuf, fbufp - fbuf, mci, pxflags); 1144 } 1145 1146 /* 1147 ** The decoded text may end without an EOL. Since this function 1148 ** is only called for text/plain MIME messages, it is safe to 1149 ** add an extra one at the end just in case. This is a hack, 1150 ** but so is auto-converting MIME in the first place. 1151 */ 1152 1153 putline("", mci); 1154 1155 if (tTd(43, 3)) 1156 sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte); 1157 } 1158 /* 1159 ** The following is based on Borenstein's "codes.c" module, with simplifying 1160 ** changes as we do not deal with multipart, and to do the translation in-core, 1161 ** with an attempt to prevent overrun of output buffers. 1162 ** 1163 ** What is needed here are changes to defend this code better against 1164 ** bad encodings. Questionable to always return 0xFF for bad mappings. 1165 */ 1166 1167 static char index_hex[128] = 1168 { 1169 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1170 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1171 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1172 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, 1173 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1174 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1175 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1176 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 1177 }; 1178 1179 # define HEXCHAR(c) (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)]) 1180 1181 /* 1182 ** MIME_FROMQP -- decode quoted printable string 1183 ** 1184 ** Parameters: 1185 ** infile -- input (encoded) string 1186 ** outfile -- output string 1187 ** maxlen -- size of output buffer 1188 ** 1189 ** Returns: 1190 ** -2 if decoding failure 1191 ** -1 if infile completely decoded into outfile 1192 ** >= 0 is the position in infile decoding 1193 ** reached before maxlen was reached 1194 */ 1195 1196 static int 1197 mime_fromqp(infile, outfile, maxlen) 1198 unsigned char *infile; 1199 unsigned char **outfile; 1200 int maxlen; /* Max # of chars allowed in outfile */ 1201 { 1202 int c1, c2; 1203 int nchar = 0; 1204 unsigned char *b; 1205 1206 /* decrement by one for trailing '\0', at least one other char */ 1207 if (--maxlen < 1) 1208 return 0; 1209 1210 b = infile; 1211 while ((c1 = *infile++) != '\0' && nchar < maxlen) 1212 { 1213 if (c1 == '=') 1214 { 1215 if ((c1 = *infile++) == '\0') 1216 break; 1217 1218 if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1) 1219 { 1220 /* ignore it and the rest of the buffer */ 1221 return -2; 1222 } 1223 else 1224 { 1225 do 1226 { 1227 if ((c2 = *infile++) == '\0') 1228 { 1229 c2 = -1; 1230 break; 1231 } 1232 } while ((c2 = HEXCHAR(c2)) == -1); 1233 1234 if (c2 == -1) 1235 break; 1236 nchar++; 1237 *(*outfile)++ = c1 << 4 | c2; 1238 } 1239 } 1240 else 1241 { 1242 nchar++; 1243 *(*outfile)++ = c1; 1244 if (c1 == '\n') 1245 break; 1246 } 1247 } 1248 *(*outfile)++ = '\0'; 1249 if (nchar >= maxlen) 1250 return (infile - b - 1); 1251 return -1; 1252 } 1253 #endif /* MIME7TO8 */ 1254