1 /* 2 * Copyright (c) 1998-2002 Sendmail, Inc. and its suppliers. 3 * All rights reserved. 4 * Copyright (c) 1994, 1996-1997 Eric P. Allman. All rights reserved. 5 * Copyright (c) 1994 6 * The Regents of the University of California. All rights reserved. 7 * 8 * By using this file, you agree to the terms and conditions set 9 * forth in the LICENSE file which can be found at the top level of 10 * the sendmail distribution. 11 * 12 */ 13 14 #include <sendmail.h> 15 #include <string.h> 16 17 SM_RCSID("@(#)$Id: mime.c,v 8.130.2.1 2003/04/15 01:05:59 ca Exp $") 18 19 /* 20 ** MIME support. 21 ** 22 ** I am indebted to John Beck of Hewlett-Packard, who contributed 23 ** his code to me for inclusion. As it turns out, I did not use 24 ** his code since he used a "minimum change" approach that used 25 ** several temp files, and I wanted a "minimum impact" approach 26 ** that would avoid copying. However, looking over his code 27 ** helped me cement my understanding of the problem. 28 ** 29 ** I also looked at, but did not directly use, Nathaniel 30 ** Borenstein's "code.c" module. Again, it functioned as 31 ** a file-to-file translator, which did not fit within my 32 ** design bounds, but it was a useful base for understanding 33 ** the problem. 34 */ 35 36 #if MIME8TO7 37 static int isboundary __P((char *, char **)); 38 static int mimeboundary __P((char *, char **)); 39 static int mime_getchar __P((SM_FILE_T *, char **, int *)); 40 static int mime_getchar_crlf __P((SM_FILE_T *, char **, int *)); 41 42 /* character set for hex and base64 encoding */ 43 static char Base16Code[] = "0123456789ABCDEF"; 44 static char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 45 46 /* types of MIME boundaries */ 47 # define MBT_SYNTAX 0 /* syntax error */ 48 # define MBT_NOTSEP 1 /* not a boundary */ 49 # define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 50 # define MBT_FINAL 3 /* final boundary (trailing -- included) */ 51 52 static char *MimeBoundaryNames[] = 53 { 54 "SYNTAX", "NOTSEP", "INTERMED", "FINAL" 55 }; 56 57 static bool MapNLtoCRLF; 58 59 /* 60 ** MIME8TO7 -- output 8 bit body in 7 bit format 61 ** 62 ** The header has already been output -- this has to do the 63 ** 8 to 7 bit conversion. It would be easy if we didn't have 64 ** to deal with nested formats (multipart/xxx and message/rfc822). 65 ** 66 ** We won't be called if we don't have to do a conversion, and 67 ** appropriate MIME-Version: and Content-Type: fields have been 68 ** output. Any Content-Transfer-Encoding: field has not been 69 ** output, and we can add it here. 70 ** 71 ** Parameters: 72 ** mci -- mailer connection information. 73 ** header -- the header for this body part. 74 ** e -- envelope. 75 ** boundaries -- the currently pending message boundaries. 76 ** NULL if we are processing the outer portion. 77 ** flags -- to tweak processing. 78 ** 79 ** Returns: 80 ** An indicator of what terminated the message part: 81 ** MBT_FINAL -- the final boundary 82 ** MBT_INTERMED -- an intermediate boundary 83 ** MBT_NOTSEP -- an end of file 84 */ 85 86 struct args 87 { 88 char *a_field; /* name of field */ 89 char *a_value; /* value of that field */ 90 }; 91 92 int 93 mime8to7(mci, header, e, boundaries, flags) 94 register MCI *mci; 95 HDR *header; 96 register ENVELOPE *e; 97 char **boundaries; 98 int flags; 99 { 100 register char *p; 101 int linelen; 102 int bt; 103 off_t offset; 104 size_t sectionsize, sectionhighbits; 105 int i; 106 char *type; 107 char *subtype; 108 char *cte; 109 char **pvp; 110 int argc = 0; 111 char *bp; 112 bool use_qp = false; 113 struct args argv[MAXMIMEARGS]; 114 char bbuf[128]; 115 char buf[MAXLINE]; 116 char pvpbuf[MAXLINE]; 117 extern unsigned char MimeTokenTab[256]; 118 119 if (tTd(43, 1)) 120 { 121 sm_dprintf("mime8to7: flags = %x, boundaries =", flags); 122 if (boundaries[0] == NULL) 123 sm_dprintf(" <none>"); 124 else 125 { 126 for (i = 0; boundaries[i] != NULL; i++) 127 sm_dprintf(" %s", boundaries[i]); 128 } 129 sm_dprintf("\n"); 130 } 131 MapNLtoCRLF = true; 132 p = hvalue("Content-Transfer-Encoding", header); 133 if (p == NULL || 134 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 135 MimeTokenTab)) == NULL || 136 pvp[0] == NULL) 137 { 138 cte = NULL; 139 } 140 else 141 { 142 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 143 cte = sm_rpool_strdup_x(e->e_rpool, buf); 144 } 145 146 type = subtype = NULL; 147 p = hvalue("Content-Type", header); 148 if (p == NULL) 149 { 150 if (bitset(M87F_DIGEST, flags)) 151 p = "message/rfc822"; 152 else 153 p = "text/plain"; 154 } 155 if (p != NULL && 156 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 157 MimeTokenTab)) != NULL && 158 pvp[0] != NULL) 159 { 160 if (tTd(43, 40)) 161 { 162 for (i = 0; pvp[i] != NULL; i++) 163 sm_dprintf("pvp[%d] = \"%s\"\n", i, pvp[i]); 164 } 165 type = *pvp++; 166 if (*pvp != NULL && strcmp(*pvp, "/") == 0 && 167 *++pvp != NULL) 168 { 169 subtype = *pvp++; 170 } 171 172 /* break out parameters */ 173 while (*pvp != NULL && argc < MAXMIMEARGS) 174 { 175 /* skip to semicolon separator */ 176 while (*pvp != NULL && strcmp(*pvp, ";") != 0) 177 pvp++; 178 if (*pvp++ == NULL || *pvp == NULL) 179 break; 180 181 /* complain about empty values */ 182 if (strcmp(*pvp, ";") == 0) 183 { 184 usrerr("mime8to7: Empty parameter in Content-Type header"); 185 186 /* avoid bounce loops */ 187 e->e_flags |= EF_DONT_MIME; 188 continue; 189 } 190 191 /* extract field name */ 192 argv[argc].a_field = *pvp++; 193 194 /* see if there is a value */ 195 if (*pvp != NULL && strcmp(*pvp, "=") == 0 && 196 (*++pvp == NULL || strcmp(*pvp, ";") != 0)) 197 { 198 argv[argc].a_value = *pvp; 199 argc++; 200 } 201 } 202 } 203 204 /* check for disaster cases */ 205 if (type == NULL) 206 type = "-none-"; 207 if (subtype == NULL) 208 subtype = "-none-"; 209 210 /* don't propogate some flags more than one level into the message */ 211 flags &= ~M87F_DIGEST; 212 213 /* 214 ** Check for cases that can not be encoded. 215 ** 216 ** For example, you can't encode certain kinds of types 217 ** or already-encoded messages. If we find this case, 218 ** just copy it through. 219 */ 220 221 (void) sm_snprintf(buf, sizeof buf, "%.100s/%.100s", type, subtype); 222 if (wordinclass(buf, 'n') || (cte != NULL && !wordinclass(cte, 'e'))) 223 flags |= M87F_NO8BIT; 224 225 # ifdef USE_B_CLASS 226 if (wordinclass(buf, 'b') || wordinclass(type, 'b')) 227 MapNLtoCRLF = false; 228 # endif /* USE_B_CLASS */ 229 if (wordinclass(buf, 'q') || wordinclass(type, 'q')) 230 use_qp = true; 231 232 /* 233 ** Multipart requires special processing. 234 ** 235 ** Do a recursive descent into the message. 236 */ 237 238 if (sm_strcasecmp(type, "multipart") == 0 && 239 (!bitset(M87F_NO8BIT, flags) || bitset(M87F_NO8TO7, flags))) 240 { 241 242 if (sm_strcasecmp(subtype, "digest") == 0) 243 flags |= M87F_DIGEST; 244 245 for (i = 0; i < argc; i++) 246 { 247 if (sm_strcasecmp(argv[i].a_field, "boundary") == 0) 248 break; 249 } 250 if (i >= argc || argv[i].a_value == NULL) 251 { 252 usrerr("mime8to7: Content-Type: \"%s\": %s boundary", 253 i >= argc ? "missing" : "bogus", p); 254 p = "---"; 255 256 /* avoid bounce loops */ 257 e->e_flags |= EF_DONT_MIME; 258 } 259 else 260 { 261 p = argv[i].a_value; 262 stripquotes(p); 263 } 264 if (sm_strlcpy(bbuf, p, sizeof bbuf) >= sizeof bbuf) 265 { 266 usrerr("mime8to7: multipart boundary \"%s\" too long", 267 p); 268 269 /* avoid bounce loops */ 270 e->e_flags |= EF_DONT_MIME; 271 } 272 273 if (tTd(43, 1)) 274 sm_dprintf("mime8to7: multipart boundary \"%s\"\n", 275 bbuf); 276 for (i = 0; i < MAXMIMENESTING; i++) 277 { 278 if (boundaries[i] == NULL) 279 break; 280 } 281 if (i >= MAXMIMENESTING) 282 { 283 usrerr("mime8to7: multipart nesting boundary too deep"); 284 285 /* avoid bounce loops */ 286 e->e_flags |= EF_DONT_MIME; 287 } 288 else 289 { 290 boundaries[i] = bbuf; 291 boundaries[i + 1] = NULL; 292 } 293 mci->mci_flags |= MCIF_INMIME; 294 295 /* skip the early "comment" prologue */ 296 putline("", mci); 297 mci->mci_flags &= ~MCIF_INHEADER; 298 bt = MBT_FINAL; 299 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 300 != NULL) 301 { 302 bt = mimeboundary(buf, boundaries); 303 if (bt != MBT_NOTSEP) 304 break; 305 putxline(buf, strlen(buf), mci, 306 PXLF_MAPFROM|PXLF_STRIP8BIT); 307 if (tTd(43, 99)) 308 sm_dprintf(" ...%s", buf); 309 } 310 if (sm_io_eof(e->e_dfp)) 311 bt = MBT_FINAL; 312 while (bt != MBT_FINAL) 313 { 314 auto HDR *hdr = NULL; 315 316 (void) sm_strlcpyn(buf, sizeof buf, 2, "--", bbuf); 317 putline(buf, mci); 318 if (tTd(43, 35)) 319 sm_dprintf(" ...%s\n", buf); 320 collect(e->e_dfp, false, &hdr, e, false); 321 if (tTd(43, 101)) 322 putline("+++after collect", mci); 323 putheader(mci, hdr, e, flags); 324 if (tTd(43, 101)) 325 putline("+++after putheader", mci); 326 bt = mime8to7(mci, hdr, e, boundaries, flags); 327 } 328 (void) sm_strlcpyn(buf, sizeof buf, 3, "--", bbuf, "--"); 329 putline(buf, mci); 330 if (tTd(43, 35)) 331 sm_dprintf(" ...%s\n", buf); 332 boundaries[i] = NULL; 333 mci->mci_flags &= ~MCIF_INMIME; 334 335 /* skip the late "comment" epilogue */ 336 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 337 != NULL) 338 { 339 bt = mimeboundary(buf, boundaries); 340 if (bt != MBT_NOTSEP) 341 break; 342 putxline(buf, strlen(buf), mci, 343 PXLF_MAPFROM|PXLF_STRIP8BIT); 344 if (tTd(43, 99)) 345 sm_dprintf(" ...%s", buf); 346 } 347 if (sm_io_eof(e->e_dfp)) 348 bt = MBT_FINAL; 349 if (tTd(43, 3)) 350 sm_dprintf("\t\t\tmime8to7=>%s (multipart)\n", 351 MimeBoundaryNames[bt]); 352 return bt; 353 } 354 355 /* 356 ** Message/xxx types -- recurse exactly once. 357 ** 358 ** Class 's' is predefined to have "rfc822" only. 359 */ 360 361 if (sm_strcasecmp(type, "message") == 0) 362 { 363 if (!wordinclass(subtype, 's')) 364 { 365 flags |= M87F_NO8BIT; 366 } 367 else 368 { 369 auto HDR *hdr = NULL; 370 371 putline("", mci); 372 373 mci->mci_flags |= MCIF_INMIME; 374 collect(e->e_dfp, false, &hdr, e, false); 375 if (tTd(43, 101)) 376 putline("+++after collect", mci); 377 putheader(mci, hdr, e, flags); 378 if (tTd(43, 101)) 379 putline("+++after putheader", mci); 380 if (hvalue("MIME-Version", hdr) == NULL && 381 !bitset(M87F_NO8TO7, flags)) 382 putline("MIME-Version: 1.0", mci); 383 bt = mime8to7(mci, hdr, e, boundaries, flags); 384 mci->mci_flags &= ~MCIF_INMIME; 385 return bt; 386 } 387 } 388 389 /* 390 ** Non-compound body type 391 ** 392 ** Compute the ratio of seven to eight bit characters; 393 ** use that as a heuristic to decide how to do the 394 ** encoding. 395 */ 396 397 sectionsize = sectionhighbits = 0; 398 if (!bitset(M87F_NO8BIT|M87F_NO8TO7, flags)) 399 { 400 /* remember where we were */ 401 offset = sm_io_tell(e->e_dfp, SM_TIME_DEFAULT); 402 if (offset == -1) 403 syserr("mime8to7: cannot sm_io_tell on %cf%s", 404 DATAFL_LETTER, e->e_id); 405 406 /* do a scan of this body type to count character types */ 407 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 408 != NULL) 409 { 410 if (mimeboundary(buf, boundaries) != MBT_NOTSEP) 411 break; 412 for (p = buf; *p != '\0'; p++) 413 { 414 /* count bytes with the high bit set */ 415 sectionsize++; 416 if (bitset(0200, *p)) 417 sectionhighbits++; 418 } 419 420 /* 421 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 422 ** assume base64. This heuristic avoids double-reading 423 ** large graphics or video files. 424 */ 425 426 if (sectionsize >= 4096 && 427 sectionhighbits > sectionsize / 4) 428 break; 429 } 430 431 /* return to the original offset for processing */ 432 /* XXX use relative seeks to handle >31 bit file sizes? */ 433 if (sm_io_seek(e->e_dfp, SM_TIME_DEFAULT, offset, SEEK_SET) < 0) 434 syserr("mime8to7: cannot sm_io_fseek on %cf%s", 435 DATAFL_LETTER, e->e_id); 436 else 437 sm_io_clearerr(e->e_dfp); 438 } 439 440 /* 441 ** Heuristically determine encoding method. 442 ** If more than 1/8 of the total characters have the 443 ** eighth bit set, use base64; else use quoted-printable. 444 ** However, only encode binary encoded data as base64, 445 ** since otherwise the NL=>CRLF mapping will be a problem. 446 */ 447 448 if (tTd(43, 8)) 449 { 450 sm_dprintf("mime8to7: %ld high bit(s) in %ld byte(s), cte=%s, type=%s/%s\n", 451 (long) sectionhighbits, (long) sectionsize, 452 cte == NULL ? "[none]" : cte, 453 type == NULL ? "[none]" : type, 454 subtype == NULL ? "[none]" : subtype); 455 } 456 if (cte != NULL && sm_strcasecmp(cte, "binary") == 0) 457 sectionsize = sectionhighbits; 458 linelen = 0; 459 bp = buf; 460 if (sectionhighbits == 0) 461 { 462 /* no encoding necessary */ 463 if (cte != NULL && 464 bitset(MCIF_CVT8TO7|MCIF_CVT7TO8|MCIF_INMIME, 465 mci->mci_flags) && 466 !bitset(M87F_NO8TO7, flags)) 467 { 468 /* 469 ** Skip _unless_ in MIME mode and potentially 470 ** converting from 8 bit to 7 bit MIME. See 471 ** putheader() for the counterpart where the 472 ** CTE header is skipped in the opposite 473 ** situation. 474 */ 475 476 (void) sm_snprintf(buf, sizeof buf, 477 "Content-Transfer-Encoding: %.200s", cte); 478 putline(buf, mci); 479 if (tTd(43, 36)) 480 sm_dprintf(" ...%s\n", buf); 481 } 482 putline("", mci); 483 mci->mci_flags &= ~MCIF_INHEADER; 484 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 485 != NULL) 486 { 487 bt = mimeboundary(buf, boundaries); 488 if (bt != MBT_NOTSEP) 489 break; 490 putline(buf, mci); 491 } 492 if (sm_io_eof(e->e_dfp)) 493 bt = MBT_FINAL; 494 } 495 else if (!MapNLtoCRLF || 496 (sectionsize / 8 < sectionhighbits && !use_qp)) 497 { 498 /* use base64 encoding */ 499 int c1, c2; 500 501 if (tTd(43, 36)) 502 sm_dprintf(" ...Content-Transfer-Encoding: base64\n"); 503 putline("Content-Transfer-Encoding: base64", mci); 504 (void) sm_snprintf(buf, sizeof buf, 505 "X-MIME-Autoconverted: from 8bit to base64 by %s id %s", 506 MyHostName, e->e_id); 507 putline(buf, mci); 508 putline("", mci); 509 mci->mci_flags &= ~MCIF_INHEADER; 510 while ((c1 = mime_getchar_crlf(e->e_dfp, boundaries, &bt)) != 511 SM_IO_EOF) 512 { 513 if (linelen > 71) 514 { 515 *bp = '\0'; 516 putline(buf, mci); 517 linelen = 0; 518 bp = buf; 519 } 520 linelen += 4; 521 *bp++ = Base64Code[(c1 >> 2)]; 522 c1 = (c1 & 0x03) << 4; 523 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 524 if (c2 == SM_IO_EOF) 525 { 526 *bp++ = Base64Code[c1]; 527 *bp++ = '='; 528 *bp++ = '='; 529 break; 530 } 531 c1 |= (c2 >> 4) & 0x0f; 532 *bp++ = Base64Code[c1]; 533 c1 = (c2 & 0x0f) << 2; 534 c2 = mime_getchar_crlf(e->e_dfp, boundaries, &bt); 535 if (c2 == SM_IO_EOF) 536 { 537 *bp++ = Base64Code[c1]; 538 *bp++ = '='; 539 break; 540 } 541 c1 |= (c2 >> 6) & 0x03; 542 *bp++ = Base64Code[c1]; 543 *bp++ = Base64Code[c2 & 0x3f]; 544 } 545 *bp = '\0'; 546 putline(buf, mci); 547 } 548 else 549 { 550 /* use quoted-printable encoding */ 551 int c1, c2; 552 int fromstate; 553 BITMAP256 badchars; 554 555 /* set up map of characters that must be mapped */ 556 clrbitmap(badchars); 557 for (c1 = 0x00; c1 < 0x20; c1++) 558 setbitn(c1, badchars); 559 clrbitn('\t', badchars); 560 for (c1 = 0x7f; c1 < 0x100; c1++) 561 setbitn(c1, badchars); 562 setbitn('=', badchars); 563 if (bitnset(M_EBCDIC, mci->mci_mailer->m_flags)) 564 for (p = "!\"#$@[\\]^`{|}~"; *p != '\0'; p++) 565 setbitn(*p, badchars); 566 567 if (tTd(43, 36)) 568 sm_dprintf(" ...Content-Transfer-Encoding: quoted-printable\n"); 569 putline("Content-Transfer-Encoding: quoted-printable", mci); 570 (void) sm_snprintf(buf, sizeof buf, 571 "X-MIME-Autoconverted: from 8bit to quoted-printable by %s id %s", 572 MyHostName, e->e_id); 573 putline(buf, mci); 574 putline("", mci); 575 mci->mci_flags &= ~MCIF_INHEADER; 576 fromstate = 0; 577 c2 = '\n'; 578 while ((c1 = mime_getchar(e->e_dfp, boundaries, &bt)) != 579 SM_IO_EOF) 580 { 581 if (c1 == '\n') 582 { 583 if (c2 == ' ' || c2 == '\t') 584 { 585 *bp++ = '='; 586 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 587 *bp++ = Base16Code[c2 & 0x0f]; 588 } 589 if (buf[0] == '.' && bp == &buf[1]) 590 { 591 buf[0] = '='; 592 *bp++ = Base16Code[('.' >> 4) & 0x0f]; 593 *bp++ = Base16Code['.' & 0x0f]; 594 } 595 *bp = '\0'; 596 putline(buf, mci); 597 linelen = fromstate = 0; 598 bp = buf; 599 c2 = c1; 600 continue; 601 } 602 if (c2 == ' ' && linelen == 4 && fromstate == 4 && 603 bitnset(M_ESCFROM, mci->mci_mailer->m_flags)) 604 { 605 *bp++ = '='; 606 *bp++ = '2'; 607 *bp++ = '0'; 608 linelen += 3; 609 } 610 else if (c2 == ' ' || c2 == '\t') 611 { 612 *bp++ = c2; 613 linelen++; 614 } 615 if (linelen > 72 && 616 (linelen > 75 || c1 != '.' || 617 (linelen > 73 && c2 == '.'))) 618 { 619 if (linelen > 73 && c2 == '.') 620 bp--; 621 else 622 c2 = '\n'; 623 *bp++ = '='; 624 *bp = '\0'; 625 putline(buf, mci); 626 linelen = fromstate = 0; 627 bp = buf; 628 if (c2 == '.') 629 { 630 *bp++ = '.'; 631 linelen++; 632 } 633 } 634 if (bitnset(bitidx(c1), badchars)) 635 { 636 *bp++ = '='; 637 *bp++ = Base16Code[(c1 >> 4) & 0x0f]; 638 *bp++ = Base16Code[c1 & 0x0f]; 639 linelen += 3; 640 } 641 else if (c1 != ' ' && c1 != '\t') 642 { 643 if (linelen < 4 && c1 == "From"[linelen]) 644 fromstate++; 645 *bp++ = c1; 646 linelen++; 647 } 648 c2 = c1; 649 } 650 651 /* output any saved character */ 652 if (c2 == ' ' || c2 == '\t') 653 { 654 *bp++ = '='; 655 *bp++ = Base16Code[(c2 >> 4) & 0x0f]; 656 *bp++ = Base16Code[c2 & 0x0f]; 657 linelen += 3; 658 } 659 660 if (linelen > 0 || boundaries[0] != NULL) 661 { 662 *bp = '\0'; 663 putline(buf, mci); 664 } 665 666 } 667 if (tTd(43, 3)) 668 sm_dprintf("\t\t\tmime8to7=>%s (basic)\n", MimeBoundaryNames[bt]); 669 return bt; 670 } 671 /* 672 ** MIME_GETCHAR -- get a character for MIME processing 673 ** 674 ** Treats boundaries as SM_IO_EOF. 675 ** 676 ** Parameters: 677 ** fp -- the input file. 678 ** boundaries -- the current MIME boundaries. 679 ** btp -- if the return value is SM_IO_EOF, *btp is set to 680 ** the type of the boundary. 681 ** 682 ** Returns: 683 ** The next character in the input stream. 684 */ 685 686 static int 687 mime_getchar(fp, boundaries, btp) 688 register SM_FILE_T *fp; 689 char **boundaries; 690 int *btp; 691 { 692 int c; 693 static unsigned char *bp = NULL; 694 static int buflen = 0; 695 static bool atbol = true; /* at beginning of line */ 696 static int bt = MBT_SYNTAX; /* boundary type of next SM_IO_EOF */ 697 static unsigned char buf[128]; /* need not be a full line */ 698 int start = 0; /* indicates position of - in buffer */ 699 700 if (buflen == 1 && *bp == '\n') 701 { 702 /* last \n in buffer may be part of next MIME boundary */ 703 c = *bp; 704 } 705 else if (buflen > 0) 706 { 707 buflen--; 708 return *bp++; 709 } 710 else 711 c = sm_io_getc(fp, SM_TIME_DEFAULT); 712 bp = buf; 713 buflen = 0; 714 if (c == '\n') 715 { 716 /* might be part of a MIME boundary */ 717 *bp++ = c; 718 atbol = true; 719 c = sm_io_getc(fp, SM_TIME_DEFAULT); 720 if (c == '\n') 721 { 722 (void) sm_io_ungetc(fp, SM_TIME_DEFAULT, c); 723 return c; 724 } 725 start = 1; 726 } 727 if (c != SM_IO_EOF) 728 *bp++ = c; 729 else 730 bt = MBT_FINAL; 731 if (atbol && c == '-') 732 { 733 /* check for a message boundary */ 734 c = sm_io_getc(fp, SM_TIME_DEFAULT); 735 if (c != '-') 736 { 737 if (c != SM_IO_EOF) 738 *bp++ = c; 739 else 740 bt = MBT_FINAL; 741 buflen = bp - buf - 1; 742 bp = buf; 743 return *bp++; 744 } 745 746 /* got "--", now check for rest of separator */ 747 *bp++ = '-'; 748 while (bp < &buf[sizeof buf - 2] && 749 (c = sm_io_getc(fp, SM_TIME_DEFAULT)) != SM_IO_EOF && 750 c != '\n') 751 { 752 *bp++ = c; 753 } 754 *bp = '\0'; /* XXX simply cut off? */ 755 bt = mimeboundary((char *) &buf[start], boundaries); 756 switch (bt) 757 { 758 case MBT_FINAL: 759 case MBT_INTERMED: 760 /* we have a message boundary */ 761 buflen = 0; 762 *btp = bt; 763 return SM_IO_EOF; 764 } 765 766 atbol = c == '\n'; 767 if (c != SM_IO_EOF) 768 *bp++ = c; 769 } 770 771 buflen = bp - buf - 1; 772 if (buflen < 0) 773 { 774 *btp = bt; 775 return SM_IO_EOF; 776 } 777 bp = buf; 778 return *bp++; 779 } 780 /* 781 ** MIME_GETCHAR_CRLF -- do mime_getchar, but translate NL => CRLF 782 ** 783 ** Parameters: 784 ** fp -- the input file. 785 ** boundaries -- the current MIME boundaries. 786 ** btp -- if the return value is SM_IO_EOF, *btp is set to 787 ** the type of the boundary. 788 ** 789 ** Returns: 790 ** The next character in the input stream. 791 */ 792 793 static int 794 mime_getchar_crlf(fp, boundaries, btp) 795 register SM_FILE_T *fp; 796 char **boundaries; 797 int *btp; 798 { 799 static bool sendlf = false; 800 int c; 801 802 if (sendlf) 803 { 804 sendlf = false; 805 return '\n'; 806 } 807 c = mime_getchar(fp, boundaries, btp); 808 if (c == '\n' && MapNLtoCRLF) 809 { 810 sendlf = true; 811 return '\r'; 812 } 813 return c; 814 } 815 /* 816 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 817 ** 818 ** Parameters: 819 ** line -- the input line. 820 ** boundaries -- the set of currently pending boundaries. 821 ** 822 ** Returns: 823 ** MBT_NOTSEP -- if this is not a separator line 824 ** MBT_INTERMED -- if this is an intermediate separator 825 ** MBT_FINAL -- if this is a final boundary 826 ** MBT_SYNTAX -- if this is a boundary for the wrong 827 ** enclosure -- i.e., a syntax error. 828 */ 829 830 static int 831 mimeboundary(line, boundaries) 832 register char *line; 833 char **boundaries; 834 { 835 int type = MBT_NOTSEP; 836 int i; 837 int savec; 838 839 if (line[0] != '-' || line[1] != '-' || boundaries == NULL) 840 return MBT_NOTSEP; 841 i = strlen(line); 842 if (i > 0 && line[i - 1] == '\n') 843 i--; 844 845 /* strip off trailing whitespace */ 846 while (i > 0 && (line[i - 1] == ' ' || line[i - 1] == '\t')) 847 i--; 848 savec = line[i]; 849 line[i] = '\0'; 850 851 if (tTd(43, 5)) 852 sm_dprintf("mimeboundary: line=\"%s\"... ", line); 853 854 /* check for this as an intermediate boundary */ 855 if (isboundary(&line[2], boundaries) >= 0) 856 type = MBT_INTERMED; 857 else if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 858 { 859 /* check for a final boundary */ 860 line[i - 2] = '\0'; 861 if (isboundary(&line[2], boundaries) >= 0) 862 type = MBT_FINAL; 863 line[i - 2] = '-'; 864 } 865 866 line[i] = savec; 867 if (tTd(43, 5)) 868 sm_dprintf("%s\n", MimeBoundaryNames[type]); 869 return type; 870 } 871 /* 872 ** DEFCHARSET -- return default character set for message 873 ** 874 ** The first choice for character set is for the mailer 875 ** corresponding to the envelope sender. If neither that 876 ** nor the global configuration file has a default character 877 ** set defined, return "unknown-8bit" as recommended by 878 ** RFC 1428 section 3. 879 ** 880 ** Parameters: 881 ** e -- the envelope for this message. 882 ** 883 ** Returns: 884 ** The default character set for that mailer. 885 */ 886 887 char * 888 defcharset(e) 889 register ENVELOPE *e; 890 { 891 if (e != NULL && e->e_from.q_mailer != NULL && 892 e->e_from.q_mailer->m_defcharset != NULL) 893 return e->e_from.q_mailer->m_defcharset; 894 if (DefaultCharSet != NULL) 895 return DefaultCharSet; 896 return "unknown-8bit"; 897 } 898 /* 899 ** ISBOUNDARY -- is a given string a currently valid boundary? 900 ** 901 ** Parameters: 902 ** line -- the current input line. 903 ** boundaries -- the list of valid boundaries. 904 ** 905 ** Returns: 906 ** The index number in boundaries if the line is found. 907 ** -1 -- otherwise. 908 ** 909 */ 910 911 static int 912 isboundary(line, boundaries) 913 char *line; 914 char **boundaries; 915 { 916 register int i; 917 918 for (i = 0; i <= MAXMIMENESTING && boundaries[i] != NULL; i++) 919 { 920 if (strcmp(line, boundaries[i]) == 0) 921 return i; 922 } 923 return -1; 924 } 925 #endif /* MIME8TO7 */ 926 927 #if MIME7TO8 928 static int mime_fromqp __P((unsigned char *, unsigned char **, int)); 929 930 /* 931 ** MIME7TO8 -- output 7 bit encoded MIME body in 8 bit format 932 ** 933 ** This is a hack. Supports translating the two 7-bit body-encodings 934 ** (quoted-printable and base64) to 8-bit coded bodies. 935 ** 936 ** There is not much point in supporting multipart here, as the UA 937 ** will be able to deal with encoded MIME bodies if it can parse MIME 938 ** multipart messages. 939 ** 940 ** Note also that we won't be called unless it is a text/plain MIME 941 ** message, encoded base64 or QP and mailer flag '9' has been defined 942 ** on mailer. 943 ** 944 ** Contributed by Marius Olaffson <marius@rhi.hi.is>. 945 ** 946 ** Parameters: 947 ** mci -- mailer connection information. 948 ** header -- the header for this body part. 949 ** e -- envelope. 950 ** 951 ** Returns: 952 ** none. 953 */ 954 955 static char index_64[128] = 956 { 957 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 958 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 959 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63, 960 52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1,-1,-1,-1, 961 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14, 962 15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1, 963 -1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40, 964 41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1 965 }; 966 967 # define CHAR64(c) (((c) < 0 || (c) > 127) ? -1 : index_64[(c)]) 968 969 void 970 mime7to8(mci, header, e) 971 register MCI *mci; 972 HDR *header; 973 register ENVELOPE *e; 974 { 975 int pxflags; 976 register char *p; 977 char *cte; 978 char **pvp; 979 unsigned char *fbufp; 980 char buf[MAXLINE]; 981 unsigned char fbuf[MAXLINE + 1]; 982 char pvpbuf[MAXLINE]; 983 extern unsigned char MimeTokenTab[256]; 984 985 p = hvalue("Content-Transfer-Encoding", header); 986 if (p == NULL || 987 (pvp = prescan(p, '\0', pvpbuf, sizeof pvpbuf, NULL, 988 MimeTokenTab)) == NULL || 989 pvp[0] == NULL) 990 { 991 /* "can't happen" -- upper level should have caught this */ 992 syserr("mime7to8: unparsable CTE %s", p == NULL ? "<NULL>" : p); 993 994 /* avoid bounce loops */ 995 e->e_flags |= EF_DONT_MIME; 996 997 /* cheap failsafe algorithm -- should work on text/plain */ 998 if (p != NULL) 999 { 1000 (void) sm_snprintf(buf, sizeof buf, 1001 "Content-Transfer-Encoding: %s", p); 1002 putline(buf, mci); 1003 } 1004 putline("", mci); 1005 mci->mci_flags &= ~MCIF_INHEADER; 1006 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, sizeof buf) 1007 != NULL) 1008 putline(buf, mci); 1009 return; 1010 } 1011 cataddr(pvp, NULL, buf, sizeof buf, '\0'); 1012 cte = sm_rpool_strdup_x(e->e_rpool, buf); 1013 1014 mci->mci_flags |= MCIF_INHEADER; 1015 putline("Content-Transfer-Encoding: 8bit", mci); 1016 (void) sm_snprintf(buf, sizeof buf, 1017 "X-MIME-Autoconverted: from %.200s to 8bit by %s id %s", 1018 cte, MyHostName, e->e_id); 1019 putline(buf, mci); 1020 putline("", mci); 1021 mci->mci_flags &= ~MCIF_INHEADER; 1022 1023 /* 1024 ** Translate body encoding to 8-bit. Supports two types of 1025 ** encodings; "base64" and "quoted-printable". Assume qp if 1026 ** it is not base64. 1027 */ 1028 1029 pxflags = PXLF_MAPFROM; 1030 if (sm_strcasecmp(cte, "base64") == 0) 1031 { 1032 int c1, c2, c3, c4; 1033 1034 fbufp = fbuf; 1035 while ((c1 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT)) != 1036 SM_IO_EOF) 1037 { 1038 if (isascii(c1) && isspace(c1)) 1039 continue; 1040 1041 do 1042 { 1043 c2 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1044 } while (isascii(c2) && isspace(c2)); 1045 if (c2 == SM_IO_EOF) 1046 break; 1047 1048 do 1049 { 1050 c3 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1051 } while (isascii(c3) && isspace(c3)); 1052 if (c3 == SM_IO_EOF) 1053 break; 1054 1055 do 1056 { 1057 c4 = sm_io_getc(e->e_dfp, SM_TIME_DEFAULT); 1058 } while (isascii(c4) && isspace(c4)); 1059 if (c4 == SM_IO_EOF) 1060 break; 1061 1062 if (c1 == '=' || c2 == '=') 1063 continue; 1064 c1 = CHAR64(c1); 1065 c2 = CHAR64(c2); 1066 1067 *fbufp = (c1 << 2) | ((c2 & 0x30) >> 4); 1068 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) 1069 { 1070 if (*--fbufp != '\n' || 1071 (fbufp > fbuf && *--fbufp != '\r')) 1072 { 1073 pxflags |= PXLF_NOADDEOL; 1074 fbufp++; 1075 } 1076 putxline((char *) fbuf, fbufp - fbuf, 1077 mci, pxflags); 1078 pxflags &= ~PXLF_NOADDEOL; 1079 fbufp = fbuf; 1080 } 1081 if (c3 == '=') 1082 continue; 1083 c3 = CHAR64(c3); 1084 *fbufp = ((c2 & 0x0f) << 4) | ((c3 & 0x3c) >> 2); 1085 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) 1086 { 1087 if (*--fbufp != '\n' || 1088 (fbufp > fbuf && *--fbufp != '\r')) 1089 { 1090 pxflags |= PXLF_NOADDEOL; 1091 fbufp++; 1092 } 1093 putxline((char *) fbuf, fbufp - fbuf, 1094 mci, pxflags); 1095 pxflags &= ~PXLF_NOADDEOL; 1096 fbufp = fbuf; 1097 } 1098 if (c4 == '=') 1099 continue; 1100 c4 = CHAR64(c4); 1101 *fbufp = ((c3 & 0x03) << 6) | c4; 1102 if (*fbufp++ == '\n' || fbufp >= &fbuf[MAXLINE]) 1103 { 1104 if (*--fbufp != '\n' || 1105 (fbufp > fbuf && *--fbufp != '\r')) 1106 { 1107 pxflags |= PXLF_NOADDEOL; 1108 fbufp++; 1109 } 1110 putxline((char *) fbuf, fbufp - fbuf, 1111 mci, pxflags); 1112 pxflags &= ~PXLF_NOADDEOL; 1113 fbufp = fbuf; 1114 } 1115 } 1116 } 1117 else 1118 { 1119 int off; 1120 1121 /* quoted-printable */ 1122 pxflags |= PXLF_NOADDEOL; 1123 fbufp = fbuf; 1124 while (sm_io_fgets(e->e_dfp, SM_TIME_DEFAULT, buf, 1125 sizeof buf) != NULL) 1126 { 1127 off = mime_fromqp((unsigned char *) buf, &fbufp, 1128 &fbuf[MAXLINE] - fbufp); 1129 again: 1130 if (off < -1) 1131 continue; 1132 1133 if (fbufp - fbuf > 0) 1134 putxline((char *) fbuf, fbufp - fbuf - 1, mci, 1135 pxflags); 1136 fbufp = fbuf; 1137 if (off >= 0 && buf[off] != '\0') 1138 { 1139 off = mime_fromqp((unsigned char *) (buf + off), 1140 &fbufp, 1141 &fbuf[MAXLINE] - fbufp); 1142 goto again; 1143 } 1144 } 1145 } 1146 1147 /* force out partial last line */ 1148 if (fbufp > fbuf) 1149 { 1150 *fbufp = '\0'; 1151 putxline((char *) fbuf, fbufp - fbuf, mci, pxflags); 1152 } 1153 1154 /* 1155 ** The decoded text may end without an EOL. Since this function 1156 ** is only called for text/plain MIME messages, it is safe to 1157 ** add an extra one at the end just in case. This is a hack, 1158 ** but so is auto-converting MIME in the first place. 1159 */ 1160 1161 putline("", mci); 1162 1163 if (tTd(43, 3)) 1164 sm_dprintf("\t\t\tmime7to8 => %s to 8bit done\n", cte); 1165 } 1166 /* 1167 ** The following is based on Borenstein's "codes.c" module, with simplifying 1168 ** changes as we do not deal with multipart, and to do the translation in-core, 1169 ** with an attempt to prevent overrun of output buffers. 1170 ** 1171 ** What is needed here are changes to defend this code better against 1172 ** bad encodings. Questionable to always return 0xFF for bad mappings. 1173 */ 1174 1175 static char index_hex[128] = 1176 { 1177 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1178 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1179 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1180 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1, 1181 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1182 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1183 -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1, 1184 -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1 1185 }; 1186 1187 # define HEXCHAR(c) (((c) < 0 || (c) > 127) ? -1 : index_hex[(c)]) 1188 1189 /* 1190 ** MIME_FROMQP -- decode quoted printable string 1191 ** 1192 ** Parameters: 1193 ** infile -- input (encoded) string 1194 ** outfile -- output string 1195 ** maxlen -- size of output buffer 1196 ** 1197 ** Returns: 1198 ** -2 if decoding failure 1199 ** -1 if infile completely decoded into outfile 1200 ** >= 0 is the position in infile decoding 1201 ** reached before maxlen was reached 1202 */ 1203 1204 static int 1205 mime_fromqp(infile, outfile, maxlen) 1206 unsigned char *infile; 1207 unsigned char **outfile; 1208 int maxlen; /* Max # of chars allowed in outfile */ 1209 { 1210 int c1, c2; 1211 int nchar = 0; 1212 unsigned char *b; 1213 1214 /* decrement by one for trailing '\0', at least one other char */ 1215 if (--maxlen < 1) 1216 return 0; 1217 1218 b = infile; 1219 while ((c1 = *infile++) != '\0' && nchar < maxlen) 1220 { 1221 if (c1 == '=') 1222 { 1223 if ((c1 = *infile++) == '\0') 1224 break; 1225 1226 if (c1 == '\n' || (c1 = HEXCHAR(c1)) == -1) 1227 { 1228 /* ignore it and the rest of the buffer */ 1229 return -2; 1230 } 1231 else 1232 { 1233 do 1234 { 1235 if ((c2 = *infile++) == '\0') 1236 { 1237 c2 = -1; 1238 break; 1239 } 1240 } while ((c2 = HEXCHAR(c2)) == -1); 1241 1242 if (c2 == -1) 1243 break; 1244 nchar++; 1245 *(*outfile)++ = c1 << 4 | c2; 1246 } 1247 } 1248 else 1249 { 1250 nchar++; 1251 *(*outfile)++ = c1; 1252 if (c1 == '\n') 1253 break; 1254 } 1255 } 1256 *(*outfile)++ = '\0'; 1257 if (nchar >= maxlen) 1258 return (infile - b - 1); 1259 return -1; 1260 } 1261 #endif /* MIME7TO8 */ 1262