1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * rfc2047.c -- decode RFC-2047 header format 3*7c478bd9Sstevel@tonic-gate */ 4*7c478bd9Sstevel@tonic-gate 5*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 6*7c478bd9Sstevel@tonic-gate 7*7c478bd9Sstevel@tonic-gate #ifndef lint 8*7c478bd9Sstevel@tonic-gate static char sccsi2[] = "%W% (Sun) %G%"; 9*7c478bd9Sstevel@tonic-gate #endif 10*7c478bd9Sstevel@tonic-gate 11*7c478bd9Sstevel@tonic-gate /* 12*7c478bd9Sstevel@tonic-gate * Copyright (c) 1997-1998 Richard Coleman 13*7c478bd9Sstevel@tonic-gate * All rights reserved. 14*7c478bd9Sstevel@tonic-gate * 15*7c478bd9Sstevel@tonic-gate * Permission is hereby granted, without written agreement and without 16*7c478bd9Sstevel@tonic-gate * license or royalty fees, to use, copy, modify, and distribute this 17*7c478bd9Sstevel@tonic-gate * software and to distribute modified versions of this software for any 18*7c478bd9Sstevel@tonic-gate * purpose, provided that the above copyright notice and the following two 19*7c478bd9Sstevel@tonic-gate * paragraphs appear in all copies of this software. 20*7c478bd9Sstevel@tonic-gate * 21*7c478bd9Sstevel@tonic-gate * In no event shall Richard Coleman be liable to any party for direct, 22*7c478bd9Sstevel@tonic-gate * indirect, special, incidental, or consequential damages arising out of 23*7c478bd9Sstevel@tonic-gate * the use of this software and its documentation, even if Richard Coleman 24*7c478bd9Sstevel@tonic-gate * has been advised of the possibility of such damage. 25*7c478bd9Sstevel@tonic-gate * 26*7c478bd9Sstevel@tonic-gate * Richard Coleman specifically disclaims any warranties, including, but 27*7c478bd9Sstevel@tonic-gate * not limited to, the implied warranties of merchantability and fitness 28*7c478bd9Sstevel@tonic-gate * for a particular purpose. The software provided hereunder is on an "as 29*7c478bd9Sstevel@tonic-gate * is" basis, and Richard Coleman has no obligation to provide maintenance, 30*7c478bd9Sstevel@tonic-gate * support, updates, enhancements, or modifications. 31*7c478bd9Sstevel@tonic-gate */ 32*7c478bd9Sstevel@tonic-gate 33*7c478bd9Sstevel@tonic-gate /* 34*7c478bd9Sstevel@tonic-gate * Parts of this code were derived from metamail, which is ... 35*7c478bd9Sstevel@tonic-gate * 36*7c478bd9Sstevel@tonic-gate * Copyright (c) 1991 Bell Communications Research, Inc. (Bellcore) 37*7c478bd9Sstevel@tonic-gate * 38*7c478bd9Sstevel@tonic-gate * Permission to use, copy, modify, and distribute this material 39*7c478bd9Sstevel@tonic-gate * for any purpose and without fee is hereby granted, provided 40*7c478bd9Sstevel@tonic-gate * that the above copyright notice and this permission notice 41*7c478bd9Sstevel@tonic-gate * appear in all copies, and that the name of Bellcore not be 42*7c478bd9Sstevel@tonic-gate * used in advertising or publicity pertaining to this 43*7c478bd9Sstevel@tonic-gate * material without the specific, prior written permission 44*7c478bd9Sstevel@tonic-gate * of an authorized representative of Bellcore. BELLCORE 45*7c478bd9Sstevel@tonic-gate * MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY 46*7c478bd9Sstevel@tonic-gate * OF THIS MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS", 47*7c478bd9Sstevel@tonic-gate * WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES. 48*7c478bd9Sstevel@tonic-gate */ 49*7c478bd9Sstevel@tonic-gate 50*7c478bd9Sstevel@tonic-gate /* 51*7c478bd9Sstevel@tonic-gate * Copyright (c) 1998, by Sun Microsystems, Inc. 52*7c478bd9Sstevel@tonic-gate * All rights reserved. 53*7c478bd9Sstevel@tonic-gate */ 54*7c478bd9Sstevel@tonic-gate 55*7c478bd9Sstevel@tonic-gate #include <string.h> 56*7c478bd9Sstevel@tonic-gate 57*7c478bd9Sstevel@tonic-gate typedef int bool; 58*7c478bd9Sstevel@tonic-gate 59*7c478bd9Sstevel@tonic-gate #define FALSE 0 60*7c478bd9Sstevel@tonic-gate #define TRUE 1 61*7c478bd9Sstevel@tonic-gate 62*7c478bd9Sstevel@tonic-gate static signed char hexindex[] = { 63*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 64*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 65*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 66*7c478bd9Sstevel@tonic-gate 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, 67*7c478bd9Sstevel@tonic-gate -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 68*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 69*7c478bd9Sstevel@tonic-gate -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, 70*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 71*7c478bd9Sstevel@tonic-gate }; 72*7c478bd9Sstevel@tonic-gate 73*7c478bd9Sstevel@tonic-gate static signed char index_64[128] = { 74*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 75*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 76*7c478bd9Sstevel@tonic-gate -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 77*7c478bd9Sstevel@tonic-gate 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, 78*7c478bd9Sstevel@tonic-gate -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 79*7c478bd9Sstevel@tonic-gate 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, 80*7c478bd9Sstevel@tonic-gate -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 81*7c478bd9Sstevel@tonic-gate 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1 82*7c478bd9Sstevel@tonic-gate }; 83*7c478bd9Sstevel@tonic-gate 84*7c478bd9Sstevel@tonic-gate #define char64(c) (((unsigned char) (c) > 127) ? -1 : \ 85*7c478bd9Sstevel@tonic-gate index_64[(unsigned char) (c)]) 86*7c478bd9Sstevel@tonic-gate 87*7c478bd9Sstevel@tonic-gate static int 88*7c478bd9Sstevel@tonic-gate unqp(unsigned char byte1, unsigned char byte2) 89*7c478bd9Sstevel@tonic-gate { 90*7c478bd9Sstevel@tonic-gate if (hexindex[byte1] == -1 || hexindex[byte2] == -1) 91*7c478bd9Sstevel@tonic-gate return (-1); 92*7c478bd9Sstevel@tonic-gate return (hexindex[byte1] << 4 | hexindex[byte2]); 93*7c478bd9Sstevel@tonic-gate } 94*7c478bd9Sstevel@tonic-gate 95*7c478bd9Sstevel@tonic-gate /* Check if character is linear whitespace */ 96*7c478bd9Sstevel@tonic-gate #define is_lws(c) ((c) == ' ' || (c) == '\t' || (c) == '\n') 97*7c478bd9Sstevel@tonic-gate 98*7c478bd9Sstevel@tonic-gate /* 99*7c478bd9Sstevel@tonic-gate * Decode the string as a RFC-2047 header field 100*7c478bd9Sstevel@tonic-gate */ 101*7c478bd9Sstevel@tonic-gate 102*7c478bd9Sstevel@tonic-gate bool 103*7c478bd9Sstevel@tonic-gate decode_rfc2047(char *str, char *dst, char *charset) 104*7c478bd9Sstevel@tonic-gate { 105*7c478bd9Sstevel@tonic-gate char *p, *q, *pp; 106*7c478bd9Sstevel@tonic-gate char *startofmime, *endofmime; 107*7c478bd9Sstevel@tonic-gate int c, quoted_printable; 108*7c478bd9Sstevel@tonic-gate bool encoding_found = FALSE; /* did we decode anything? */ 109*7c478bd9Sstevel@tonic-gate bool between_encodings = FALSE; /* are we between two encodings? */ 110*7c478bd9Sstevel@tonic-gate bool equals_pending = FALSE; /* is there a '=' pending? */ 111*7c478bd9Sstevel@tonic-gate int whitespace = 0; /* how much whitespace between encodings? */ 112*7c478bd9Sstevel@tonic-gate 113*7c478bd9Sstevel@tonic-gate if (str == NULL) 114*7c478bd9Sstevel@tonic-gate return (FALSE); 115*7c478bd9Sstevel@tonic-gate 116*7c478bd9Sstevel@tonic-gate /* 117*7c478bd9Sstevel@tonic-gate * Do a quick and dirty check for the '=' character. 118*7c478bd9Sstevel@tonic-gate * This should quickly eliminate many cases. 119*7c478bd9Sstevel@tonic-gate */ 120*7c478bd9Sstevel@tonic-gate if (!strchr(str, '=')) 121*7c478bd9Sstevel@tonic-gate return (FALSE); 122*7c478bd9Sstevel@tonic-gate 123*7c478bd9Sstevel@tonic-gate for (p = str, q = dst; *p; p++) { 124*7c478bd9Sstevel@tonic-gate /* 125*7c478bd9Sstevel@tonic-gate * If we had an '=' character pending from 126*7c478bd9Sstevel@tonic-gate * last iteration, then add it first. 127*7c478bd9Sstevel@tonic-gate */ 128*7c478bd9Sstevel@tonic-gate if (equals_pending) { 129*7c478bd9Sstevel@tonic-gate *q++ = '='; 130*7c478bd9Sstevel@tonic-gate equals_pending = FALSE; 131*7c478bd9Sstevel@tonic-gate between_encodings = FALSE; /* we added non-WS text */ 132*7c478bd9Sstevel@tonic-gate } 133*7c478bd9Sstevel@tonic-gate 134*7c478bd9Sstevel@tonic-gate if (*p != '=') { 135*7c478bd9Sstevel@tonic-gate /* count linear whitespace while between encodings */ 136*7c478bd9Sstevel@tonic-gate if (between_encodings && is_lws(*p)) 137*7c478bd9Sstevel@tonic-gate whitespace++; 138*7c478bd9Sstevel@tonic-gate else 139*7c478bd9Sstevel@tonic-gate between_encodings = FALSE; /* non-WS added */ 140*7c478bd9Sstevel@tonic-gate *q++ = *p; 141*7c478bd9Sstevel@tonic-gate continue; 142*7c478bd9Sstevel@tonic-gate } 143*7c478bd9Sstevel@tonic-gate 144*7c478bd9Sstevel@tonic-gate equals_pending = TRUE; /* we have a '=' pending */ 145*7c478bd9Sstevel@tonic-gate 146*7c478bd9Sstevel@tonic-gate /* Check for initial =? */ 147*7c478bd9Sstevel@tonic-gate if (*p == '=' && p[1] && p[1] == '?' && p[2]) { 148*7c478bd9Sstevel@tonic-gate startofmime = p + 2; 149*7c478bd9Sstevel@tonic-gate 150*7c478bd9Sstevel@tonic-gate /* Scan ahead for the next '?' character */ 151*7c478bd9Sstevel@tonic-gate for (pp = startofmime; *pp && *pp != '?'; pp++) 152*7c478bd9Sstevel@tonic-gate ; 153*7c478bd9Sstevel@tonic-gate 154*7c478bd9Sstevel@tonic-gate if (!*pp) 155*7c478bd9Sstevel@tonic-gate continue; 156*7c478bd9Sstevel@tonic-gate 157*7c478bd9Sstevel@tonic-gate strncpy(charset, startofmime, pp - startofmime); 158*7c478bd9Sstevel@tonic-gate charset[pp - startofmime] = '\0'; 159*7c478bd9Sstevel@tonic-gate 160*7c478bd9Sstevel@tonic-gate startofmime = pp + 1; 161*7c478bd9Sstevel@tonic-gate 162*7c478bd9Sstevel@tonic-gate /* Check for valid encoding type */ 163*7c478bd9Sstevel@tonic-gate if (*startofmime != 'B' && *startofmime != 'b' && 164*7c478bd9Sstevel@tonic-gate *startofmime != 'Q' && *startofmime != 'q') 165*7c478bd9Sstevel@tonic-gate continue; 166*7c478bd9Sstevel@tonic-gate 167*7c478bd9Sstevel@tonic-gate /* Is encoding quoted printable or base64? */ 168*7c478bd9Sstevel@tonic-gate quoted_printable = (*startofmime == 'Q' || 169*7c478bd9Sstevel@tonic-gate *startofmime == 'q'); 170*7c478bd9Sstevel@tonic-gate startofmime++; 171*7c478bd9Sstevel@tonic-gate 172*7c478bd9Sstevel@tonic-gate /* Check for next '?' character */ 173*7c478bd9Sstevel@tonic-gate if (*startofmime != '?') 174*7c478bd9Sstevel@tonic-gate continue; 175*7c478bd9Sstevel@tonic-gate startofmime++; 176*7c478bd9Sstevel@tonic-gate 177*7c478bd9Sstevel@tonic-gate /* 178*7c478bd9Sstevel@tonic-gate * Scan ahead for the ending ?= 179*7c478bd9Sstevel@tonic-gate * 180*7c478bd9Sstevel@tonic-gate * While doing this, we will also check if encoded 181*7c478bd9Sstevel@tonic-gate * word has any embedded linear whitespace. 182*7c478bd9Sstevel@tonic-gate */ 183*7c478bd9Sstevel@tonic-gate endofmime = NULL; 184*7c478bd9Sstevel@tonic-gate for (pp = startofmime; *pp && *(pp+1); pp++) { 185*7c478bd9Sstevel@tonic-gate if (is_lws(*pp)) 186*7c478bd9Sstevel@tonic-gate break; 187*7c478bd9Sstevel@tonic-gate else if (*pp == '?' && pp[1] == '=') { 188*7c478bd9Sstevel@tonic-gate endofmime = pp; 189*7c478bd9Sstevel@tonic-gate break; 190*7c478bd9Sstevel@tonic-gate } 191*7c478bd9Sstevel@tonic-gate } 192*7c478bd9Sstevel@tonic-gate if (is_lws(*pp) || endofmime == NULL) 193*7c478bd9Sstevel@tonic-gate continue; 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate /* 196*7c478bd9Sstevel@tonic-gate * We've found an encoded word, so we can drop 197*7c478bd9Sstevel@tonic-gate * the '=' that was pending 198*7c478bd9Sstevel@tonic-gate */ 199*7c478bd9Sstevel@tonic-gate equals_pending = FALSE; 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate /* 202*7c478bd9Sstevel@tonic-gate * If we are between two encoded words separated only 203*7c478bd9Sstevel@tonic-gate * by linear whitespace, then we ignore the whitespace. 204*7c478bd9Sstevel@tonic-gate * We will roll back the buffer the number of whitespace 205*7c478bd9Sstevel@tonic-gate * characters we've seen since last encoded word. 206*7c478bd9Sstevel@tonic-gate */ 207*7c478bd9Sstevel@tonic-gate if (between_encodings) 208*7c478bd9Sstevel@tonic-gate q -= whitespace; 209*7c478bd9Sstevel@tonic-gate 210*7c478bd9Sstevel@tonic-gate /* Now decode the text */ 211*7c478bd9Sstevel@tonic-gate if (quoted_printable) { 212*7c478bd9Sstevel@tonic-gate for (pp = startofmime; pp < endofmime; pp++) { 213*7c478bd9Sstevel@tonic-gate if (*pp == '=') { 214*7c478bd9Sstevel@tonic-gate c = unqp(pp[1], pp[2]); 215*7c478bd9Sstevel@tonic-gate if (c == -1) 216*7c478bd9Sstevel@tonic-gate continue; 217*7c478bd9Sstevel@tonic-gate if (c != 0) 218*7c478bd9Sstevel@tonic-gate *q++ = c; 219*7c478bd9Sstevel@tonic-gate pp += 2; 220*7c478bd9Sstevel@tonic-gate } else if (*pp == '_') 221*7c478bd9Sstevel@tonic-gate *q++ = ' '; 222*7c478bd9Sstevel@tonic-gate else 223*7c478bd9Sstevel@tonic-gate *q++ = *pp; 224*7c478bd9Sstevel@tonic-gate } 225*7c478bd9Sstevel@tonic-gate } else { 226*7c478bd9Sstevel@tonic-gate /* base64 */ 227*7c478bd9Sstevel@tonic-gate int c1, c2, c3, c4; 228*7c478bd9Sstevel@tonic-gate 229*7c478bd9Sstevel@tonic-gate pp = startofmime; 230*7c478bd9Sstevel@tonic-gate while (pp < endofmime) { 231*7c478bd9Sstevel@tonic-gate /* 6 + 2 bits */ 232*7c478bd9Sstevel@tonic-gate while ((pp < endofmime) && 233*7c478bd9Sstevel@tonic-gate ((c1 = char64(*pp)) == -1)) { 234*7c478bd9Sstevel@tonic-gate pp++; 235*7c478bd9Sstevel@tonic-gate } 236*7c478bd9Sstevel@tonic-gate if (pp < endofmime) 237*7c478bd9Sstevel@tonic-gate pp++; 238*7c478bd9Sstevel@tonic-gate while ((pp < endofmime) && 239*7c478bd9Sstevel@tonic-gate ((c2 = char64(*pp)) == -1)) { 240*7c478bd9Sstevel@tonic-gate pp++; 241*7c478bd9Sstevel@tonic-gate } 242*7c478bd9Sstevel@tonic-gate if (pp < endofmime && c1 != -1 && 243*7c478bd9Sstevel@tonic-gate c2 != -1) { 244*7c478bd9Sstevel@tonic-gate *q++ = (c1 << 2) | (c2 >> 4); 245*7c478bd9Sstevel@tonic-gate pp++; 246*7c478bd9Sstevel@tonic-gate } 247*7c478bd9Sstevel@tonic-gate /* 4 + 4 bits */ 248*7c478bd9Sstevel@tonic-gate while ((pp < endofmime) && 249*7c478bd9Sstevel@tonic-gate ((c3 = char64(*pp)) == -1)) { 250*7c478bd9Sstevel@tonic-gate pp++; 251*7c478bd9Sstevel@tonic-gate } 252*7c478bd9Sstevel@tonic-gate if (pp < endofmime && c2 != -1 && 253*7c478bd9Sstevel@tonic-gate c3 != -1) { 254*7c478bd9Sstevel@tonic-gate *q++ = ((c2 & 0xF) << 4) | 255*7c478bd9Sstevel@tonic-gate (c3 >> 2); 256*7c478bd9Sstevel@tonic-gate pp++; 257*7c478bd9Sstevel@tonic-gate } 258*7c478bd9Sstevel@tonic-gate /* 2 + 6 bits */ 259*7c478bd9Sstevel@tonic-gate while ((pp < endofmime) && 260*7c478bd9Sstevel@tonic-gate ((c4 = char64(*pp)) == -1)) { 261*7c478bd9Sstevel@tonic-gate pp++; 262*7c478bd9Sstevel@tonic-gate } 263*7c478bd9Sstevel@tonic-gate if (pp < endofmime && c3 != -1 && 264*7c478bd9Sstevel@tonic-gate c4 != -1) { 265*7c478bd9Sstevel@tonic-gate *q++ = ((c3 & 0x3) << 6) | (c4); 266*7c478bd9Sstevel@tonic-gate pp++; 267*7c478bd9Sstevel@tonic-gate } 268*7c478bd9Sstevel@tonic-gate } 269*7c478bd9Sstevel@tonic-gate } 270*7c478bd9Sstevel@tonic-gate 271*7c478bd9Sstevel@tonic-gate /* 272*7c478bd9Sstevel@tonic-gate * Now that we are done decoding this particular 273*7c478bd9Sstevel@tonic-gate * encoded word, advance string to trailing '='. 274*7c478bd9Sstevel@tonic-gate */ 275*7c478bd9Sstevel@tonic-gate p = endofmime + 1; 276*7c478bd9Sstevel@tonic-gate 277*7c478bd9Sstevel@tonic-gate encoding_found = TRUE; /* found (>= 1) encoded word */ 278*7c478bd9Sstevel@tonic-gate between_encodings = TRUE; /* just decoded something */ 279*7c478bd9Sstevel@tonic-gate whitespace = 0; /* re-initialize amount of whitespace */ 280*7c478bd9Sstevel@tonic-gate } 281*7c478bd9Sstevel@tonic-gate } 282*7c478bd9Sstevel@tonic-gate 283*7c478bd9Sstevel@tonic-gate /* If an equals was pending at end of string, add it now. */ 284*7c478bd9Sstevel@tonic-gate if (equals_pending) 285*7c478bd9Sstevel@tonic-gate *q++ = '='; 286*7c478bd9Sstevel@tonic-gate *q = '\0'; 287*7c478bd9Sstevel@tonic-gate 288*7c478bd9Sstevel@tonic-gate return (encoding_found); 289*7c478bd9Sstevel@tonic-gate } 290