1 /*
2 * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3 * All rights reserved.
4 *
5 * By using this file, you agree to the terms and conditions set
6 * forth in the LICENSE file which can be found at the top level of
7 * the sendmail distribution.
8 *
9 */
10
11 #include <sm/gen.h>
12 #include <sm/sendmail.h>
13
14 /*
15 ** based on
16 ** https://github.com/aox/encodings/utf.cpp
17 ** see license.txt included below.
18 */
19
20 #if USE_EAI
21 #include <ctype.h>
22 #define SM_ISDIGIT(c) (isascii(c) && isdigit(c))
23
24 #include <sm/assert.h>
25
26 /* for prototype */
27 #include <sm/ixlen.h>
28
29 # if 0
30 /*
31 ** RFC 6533:
32 **
33 ** In the ABNF below, all productions not defined in this document are
34 ** defined in Appendix B of [RFC5234], in Section 4 of [RFC3629], or in
35 ** [RFC3464].
36 **
37 ** utf-8-type-addr = "utf-8;" utf-8-enc-addr
38 ** utf-8-address = Mailbox ; Mailbox as defined in [RFC6531].
39 ** utf-8-enc-addr = utf-8-addr-xtext /
40 ** utf-8-addr-unitext /
41 ** utf-8-address
42 ** utf-8-addr-xtext = 1*(QCHAR / EmbeddedUnicodeChar)
43 ** ; 7bit form of utf-8-addr-unitext.
44 ** ; Safe for use in the ORCPT [RFC3461]
45 ** ; parameter even when SMTPUTF8 SMTP
46 ** ; extension is not advertised.
47 ** utf-8-addr-unitext = 1*(QUCHAR / EmbeddedUnicodeChar)
48 ** ; MUST follow utf-8-address ABNF when
49 ** ; dequoted.
50 ** ; Safe for using in the ORCPT [RFC3461]
51 ** ; parameter when SMTPUTF8 SMTP extension
52 ** ; is also advertised.
53 ** QCHAR = %x21-2a / %x2c-3c / %x3e-5b / %x5d-7e
54 ** ; ASCII printable characters except
55 ** ; CTLs, SP, '\', '+', '='.
56 ** QUCHAR = QCHAR / UTF8-2 / UTF8-3 / UTF8-4
57 ** ; ASCII printable characters except
58 ** ; CTLs, SP, '\', '+' and '=', plus
59 ** ; other Unicode characters encoded in UTF-8
60 ** EmbeddedUnicodeChar = %x5C.78 "{" HEXPOINT "}"
61 ** ; starts with "\x"
62 ** HEXPOINT = ( ( "0"/"1" ) %x31-39 ) / "10" / "20" /
63 ** "2B" / "3D" / "7F" / ; all xtext-specials
64 ** "5C" / (HEXDIG8 HEXDIG) / ; 2-digit forms
65 ** ( NZHEXDIG 2(HEXDIG) ) / ; 3-digit forms
66 ** ( NZDHEXDIG 3(HEXDIG) ) / ; 4-digit forms excluding
67 ** ( "D" %x30-37 2(HEXDIG) ) / ; ... surrogate
68 ** ( NZHEXDIG 4(HEXDIG) ) / ; 5-digit forms
69 ** ( "10" 4*HEXDIG ) ; 6-digit forms
70 ** ; represents either "\" or a Unicode code point outside
71 ** ; the ASCII repertoire
72 ** HEXDIG8 = %x38-39 / "A" / "B" / "C" / "D" / "E" / "F"
73 ** ; HEXDIG excluding 0-7
74 ** NZHEXDIG = %x31-39 / "A" / "B" / "C" / "D" / "E" / "F"
75 ** ; HEXDIG excluding "0"
76 ** NZDHEXDIG = %x31-39 / "A" / "B" / "C" / "E" / "F"
77 ** ; HEXDIG excluding "0" and "D"
78 */
79 # endif /* 0 */
80
81 /*
82 ** UXTEXT_UNQUOTE -- "unquote" a utf-8-addr-unitext
83 **
84 ** Parameters:
85 ** quoted -- original string [x]
86 ** unquoted -- "decoded" string [x] (buffer provided by caller)
87 ** if NULL this is basically a syntax check.
88 ** olen -- length of unquoted (must be > 0)
89 **
90 ** Returns:
91 ** >0: length of "decoded" string
92 ** <0: error
93 */
94
95 int
uxtext_unquote(quoted,unquoted,olen)96 uxtext_unquote(quoted, unquoted, olen)
97 const char *quoted;
98 char *unquoted;
99 int olen;
100 {
101 const unsigned char *cp;
102 int ch, len;
103
104 #define APPCH(ch) do \
105 { \
106 if (len >= olen) \
107 return 0 - olen; \
108 if (NULL != unquoted) \
109 unquoted[len] = (char) (ch); \
110 len++; \
111 } while (0)
112
113 SM_REQUIRE(olen > 0);
114 SM_REQUIRE(NULL != quoted);
115 len = 0;
116 for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++)
117 {
118 if (ch == '\\' && cp[1] == 'x' && cp[2] == '{')
119 {
120 int uc = 0;
121
122 cp += 2;
123 while ((ch = *++cp) != '}')
124 {
125 if (SM_ISDIGIT(ch))
126 uc = (uc << 4) + (ch - '0');
127 else if (ch >= 'a' && ch <= 'f')
128 uc = (uc << 4) + (ch - 'a' + 10);
129 else if (ch >= 'A' && ch <= 'F')
130 uc = (uc << 4) + (ch - 'A' + 10);
131 else
132 return 0 - len;
133 if (uc > 0x10ffff)
134 return 0 - len;
135 }
136
137 if (uc < 0x80)
138 APPCH(uc);
139 else if (uc < 0x800)
140 {
141 APPCH(0xc0 | ((char) (uc >> 6)));
142 APPCH(0x80 | ((char) (uc & 0x3f)));
143 }
144 else if (uc < 0x10000)
145 {
146 APPCH(0xe0 | ((char) (uc >> 12)));
147 APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
148 APPCH(0x80 | ((char) (uc & 0x3f)));
149 }
150 else if (uc < 0x200000)
151 {
152 APPCH(0xf0 | ((char) (uc >> 18)));
153 APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
154 APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
155 APPCH(0x80 | ((char) (uc & 0x3f)));
156 }
157 else if (uc < 0x4000000)
158 {
159 APPCH(0xf8 | ((char) (uc >> 24)));
160 APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
161 APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
162 APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
163 APPCH(0x80 | ((char) (uc & 0x3f)));
164 }
165 else
166 {
167 APPCH(0xfc | ((char) (uc >> 30)));
168 APPCH(0x80 | ((char) (uc >> 24) & 0x3f));
169 APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
170 APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
171 APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
172 APPCH(0x80 | ((char) (uc & 0x3f)));
173 }
174 }
175 else
176 APPCH(ch);
177 }
178 APPCH('\0');
179 return len;
180 }
181
182 # if 0
183 aox/doc/readme/license.txt
184
185 Copyright (c) 2003-2014, Archiveopteryx and its contributors.
186
187 Permission to use, copy, modify, and distribute this software and its
188 documentation for any purpose, without fee, and without a written
189 agreement is hereby granted, provided that the above copyright notice
190 and this paragraph and the following two paragraphs appear in all
191 copies.
192
193 IN NO EVENT SHALL ORYX BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
194 SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
195 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
196 ORYX HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
197
198 ORYX SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
199 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
200 PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
201 BASIS, AND ORYX HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
202 UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
203 # endif /* 0 */
204 #endif /* USE_EAI */
205