1 /* 2 * Copyright (c) 2020 Proofpoint, Inc. and its suppliers. 3 * All rights reserved. 4 * 5 * By using this file, you agree to the terms and conditions set 6 * forth in the LICENSE file which can be found at the top level of 7 * the sendmail distribution. 8 * 9 */ 10 11 #include <sm/gen.h> 12 #include <sm/sendmail.h> 13 14 /* 15 ** based on 16 ** https://github.com/aox/encodings/utf.cpp 17 ** see license.txt included below. 18 */ 19 20 #if USE_EAI 21 #include <ctype.h> 22 #define SM_ISDIGIT(c) (isascii(c) && isdigit(c)) 23 24 #include <sm/assert.h> 25 26 /* for prototype */ 27 #include <sm/ixlen.h> 28 29 # if 0 30 /* 31 ** RFC 6533: 32 ** 33 ** In the ABNF below, all productions not defined in this document are 34 ** defined in Appendix B of [RFC5234], in Section 4 of [RFC3629], or in 35 ** [RFC3464]. 36 ** 37 ** utf-8-type-addr = "utf-8;" utf-8-enc-addr 38 ** utf-8-address = Mailbox ; Mailbox as defined in [RFC6531]. 39 ** utf-8-enc-addr = utf-8-addr-xtext / 40 ** utf-8-addr-unitext / 41 ** utf-8-address 42 ** utf-8-addr-xtext = 1*(QCHAR / EmbeddedUnicodeChar) 43 ** ; 7bit form of utf-8-addr-unitext. 44 ** ; Safe for use in the ORCPT [RFC3461] 45 ** ; parameter even when SMTPUTF8 SMTP 46 ** ; extension is not advertised. 47 ** utf-8-addr-unitext = 1*(QUCHAR / EmbeddedUnicodeChar) 48 ** ; MUST follow utf-8-address ABNF when 49 ** ; dequoted. 50 ** ; Safe for using in the ORCPT [RFC3461] 51 ** ; parameter when SMTPUTF8 SMTP extension 52 ** ; is also advertised. 53 ** QCHAR = %x21-2a / %x2c-3c / %x3e-5b / %x5d-7e 54 ** ; ASCII printable characters except 55 ** ; CTLs, SP, '\', '+', '='. 56 ** QUCHAR = QCHAR / UTF8-2 / UTF8-3 / UTF8-4 57 ** ; ASCII printable characters except 58 ** ; CTLs, SP, '\', '+' and '=', plus 59 ** ; other Unicode characters encoded in UTF-8 60 ** EmbeddedUnicodeChar = %x5C.78 "{" HEXPOINT "}" 61 ** ; starts with "\x" 62 ** HEXPOINT = ( ( "0"/"1" ) %x31-39 ) / "10" / "20" / 63 ** "2B" / "3D" / "7F" / ; all xtext-specials 64 ** "5C" / (HEXDIG8 HEXDIG) / ; 2-digit forms 65 ** ( NZHEXDIG 2(HEXDIG) ) / ; 3-digit forms 66 ** ( NZDHEXDIG 3(HEXDIG) ) / ; 4-digit forms excluding 67 ** ( "D" %x30-37 2(HEXDIG) ) / ; ... surrogate 68 ** ( NZHEXDIG 4(HEXDIG) ) / ; 5-digit forms 69 ** ( "10" 4*HEXDIG ) ; 6-digit forms 70 ** ; represents either "\" or a Unicode code point outside 71 ** ; the ASCII repertoire 72 ** HEXDIG8 = %x38-39 / "A" / "B" / "C" / "D" / "E" / "F" 73 ** ; HEXDIG excluding 0-7 74 ** NZHEXDIG = %x31-39 / "A" / "B" / "C" / "D" / "E" / "F" 75 ** ; HEXDIG excluding "0" 76 ** NZDHEXDIG = %x31-39 / "A" / "B" / "C" / "E" / "F" 77 ** ; HEXDIG excluding "0" and "D" 78 */ 79 # endif /* 0 */ 80 81 /* 82 ** UXTEXT_UNQUOTE -- "unquote" a utf-8-addr-unitext 83 ** 84 ** Parameters: 85 ** quoted -- original string [x] 86 ** unquoted -- "decoded" string [x] (buffer provided by caller) 87 ** if NULL this is basically a syntax check. 88 ** olen -- length of unquoted (must be > 0) 89 ** 90 ** Returns: 91 ** >0: length of "decoded" string 92 ** <0: error 93 */ 94 95 int 96 uxtext_unquote(quoted, unquoted, olen) 97 const char *quoted; 98 char *unquoted; 99 int olen; 100 { 101 const unsigned char *cp; 102 int ch, len; 103 104 #define APPCH(ch) do \ 105 { \ 106 if (len >= olen) \ 107 return 0 - olen; \ 108 if (NULL != unquoted) \ 109 unquoted[len] = (char) (ch); \ 110 len++; \ 111 } while (0) 112 113 SM_REQUIRE(olen > 0); 114 SM_REQUIRE(NULL != quoted); 115 len = 0; 116 for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++) 117 { 118 if (ch == '\\' && cp[1] == 'x' && cp[2] == '{') 119 { 120 int uc = 0; 121 122 cp += 2; 123 while ((ch = *++cp) != '}') 124 { 125 if (SM_ISDIGIT(ch)) 126 uc = (uc << 4) + (ch - '0'); 127 else if (ch >= 'a' && ch <= 'f') 128 uc = (uc << 4) + (ch - 'a' + 10); 129 else if (ch >= 'A' && ch <= 'F') 130 uc = (uc << 4) + (ch - 'A' + 10); 131 else 132 return 0 - len; 133 if (uc > 0x10ffff) 134 return 0 - len; 135 } 136 137 if (uc < 0x80) 138 APPCH(uc); 139 else if (uc < 0x800) 140 { 141 APPCH(0xc0 | ((char) (uc >> 6))); 142 APPCH(0x80 | ((char) (uc & 0x3f))); 143 } 144 else if (uc < 0x10000) 145 { 146 APPCH(0xe0 | ((char) (uc >> 12))); 147 APPCH(0x80 | ((char) (uc >> 6) & 0x3f)); 148 APPCH(0x80 | ((char) (uc & 0x3f))); 149 } 150 else if (uc < 0x200000) 151 { 152 APPCH(0xf0 | ((char) (uc >> 18))); 153 APPCH(0x80 | ((char) (uc >> 12) & 0x3f)); 154 APPCH(0x80 | ((char) (uc >> 6) & 0x3f)); 155 APPCH(0x80 | ((char) (uc & 0x3f))); 156 } 157 else if (uc < 0x4000000) 158 { 159 APPCH(0xf8 | ((char) (uc >> 24))); 160 APPCH(0x80 | ((char) (uc >> 18) & 0x3f)); 161 APPCH(0x80 | ((char) (uc >> 12) & 0x3f)); 162 APPCH(0x80 | ((char) (uc >> 6) & 0x3f)); 163 APPCH(0x80 | ((char) (uc & 0x3f))); 164 } 165 else 166 { 167 APPCH(0xfc | ((char) (uc >> 30))); 168 APPCH(0x80 | ((char) (uc >> 24) & 0x3f)); 169 APPCH(0x80 | ((char) (uc >> 18) & 0x3f)); 170 APPCH(0x80 | ((char) (uc >> 12) & 0x3f)); 171 APPCH(0x80 | ((char) (uc >> 6) & 0x3f)); 172 APPCH(0x80 | ((char) (uc & 0x3f))); 173 } 174 } 175 else 176 APPCH(ch); 177 } 178 APPCH('\0'); 179 return len; 180 } 181 182 # if 0 183 aox/doc/readme/license.txt 184 185 Copyright (c) 2003-2014, Archiveopteryx and its contributors. 186 187 Permission to use, copy, modify, and distribute this software and its 188 documentation for any purpose, without fee, and without a written 189 agreement is hereby granted, provided that the above copyright notice 190 and this paragraph and the following two paragraphs appear in all 191 copies. 192 193 IN NO EVENT SHALL ORYX BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, 194 SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, 195 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF 196 ORYX HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 197 198 ORYX SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED 199 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 200 PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" 201 BASIS, AND ORYX HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, 202 UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 203 # endif /* 0 */ 204 #endif /* USE_EAI */ 205