1*2fb4f839SGregory Neil Shapiro /*
2*2fb4f839SGregory Neil Shapiro * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3*2fb4f839SGregory Neil Shapiro * All rights reserved.
4*2fb4f839SGregory Neil Shapiro *
5*2fb4f839SGregory Neil Shapiro * By using this file, you agree to the terms and conditions set
6*2fb4f839SGregory Neil Shapiro * forth in the LICENSE file which can be found at the top level of
7*2fb4f839SGregory Neil Shapiro * the sendmail distribution.
8*2fb4f839SGregory Neil Shapiro *
9*2fb4f839SGregory Neil Shapiro */
10*2fb4f839SGregory Neil Shapiro
11*2fb4f839SGregory Neil Shapiro #include <sm/gen.h>
12*2fb4f839SGregory Neil Shapiro #include <sm/sendmail.h>
13*2fb4f839SGregory Neil Shapiro
14*2fb4f839SGregory Neil Shapiro /*
15*2fb4f839SGregory Neil Shapiro ** based on
16*2fb4f839SGregory Neil Shapiro ** https://github.com/aox/encodings/utf.cpp
17*2fb4f839SGregory Neil Shapiro ** see license.txt included below.
18*2fb4f839SGregory Neil Shapiro */
19*2fb4f839SGregory Neil Shapiro
20*2fb4f839SGregory Neil Shapiro #if USE_EAI
21*2fb4f839SGregory Neil Shapiro #include <ctype.h>
22*2fb4f839SGregory Neil Shapiro #define SM_ISDIGIT(c) (isascii(c) && isdigit(c))
23*2fb4f839SGregory Neil Shapiro
24*2fb4f839SGregory Neil Shapiro #include <sm/assert.h>
25*2fb4f839SGregory Neil Shapiro
26*2fb4f839SGregory Neil Shapiro /* for prototype */
27*2fb4f839SGregory Neil Shapiro #include <sm/ixlen.h>
28*2fb4f839SGregory Neil Shapiro
29*2fb4f839SGregory Neil Shapiro # if 0
30*2fb4f839SGregory Neil Shapiro /*
31*2fb4f839SGregory Neil Shapiro ** RFC 6533:
32*2fb4f839SGregory Neil Shapiro **
33*2fb4f839SGregory Neil Shapiro ** In the ABNF below, all productions not defined in this document are
34*2fb4f839SGregory Neil Shapiro ** defined in Appendix B of [RFC5234], in Section 4 of [RFC3629], or in
35*2fb4f839SGregory Neil Shapiro ** [RFC3464].
36*2fb4f839SGregory Neil Shapiro **
37*2fb4f839SGregory Neil Shapiro ** utf-8-type-addr = "utf-8;" utf-8-enc-addr
38*2fb4f839SGregory Neil Shapiro ** utf-8-address = Mailbox ; Mailbox as defined in [RFC6531].
39*2fb4f839SGregory Neil Shapiro ** utf-8-enc-addr = utf-8-addr-xtext /
40*2fb4f839SGregory Neil Shapiro ** utf-8-addr-unitext /
41*2fb4f839SGregory Neil Shapiro ** utf-8-address
42*2fb4f839SGregory Neil Shapiro ** utf-8-addr-xtext = 1*(QCHAR / EmbeddedUnicodeChar)
43*2fb4f839SGregory Neil Shapiro ** ; 7bit form of utf-8-addr-unitext.
44*2fb4f839SGregory Neil Shapiro ** ; Safe for use in the ORCPT [RFC3461]
45*2fb4f839SGregory Neil Shapiro ** ; parameter even when SMTPUTF8 SMTP
46*2fb4f839SGregory Neil Shapiro ** ; extension is not advertised.
47*2fb4f839SGregory Neil Shapiro ** utf-8-addr-unitext = 1*(QUCHAR / EmbeddedUnicodeChar)
48*2fb4f839SGregory Neil Shapiro ** ; MUST follow utf-8-address ABNF when
49*2fb4f839SGregory Neil Shapiro ** ; dequoted.
50*2fb4f839SGregory Neil Shapiro ** ; Safe for using in the ORCPT [RFC3461]
51*2fb4f839SGregory Neil Shapiro ** ; parameter when SMTPUTF8 SMTP extension
52*2fb4f839SGregory Neil Shapiro ** ; is also advertised.
53*2fb4f839SGregory Neil Shapiro ** QCHAR = %x21-2a / %x2c-3c / %x3e-5b / %x5d-7e
54*2fb4f839SGregory Neil Shapiro ** ; ASCII printable characters except
55*2fb4f839SGregory Neil Shapiro ** ; CTLs, SP, '\', '+', '='.
56*2fb4f839SGregory Neil Shapiro ** QUCHAR = QCHAR / UTF8-2 / UTF8-3 / UTF8-4
57*2fb4f839SGregory Neil Shapiro ** ; ASCII printable characters except
58*2fb4f839SGregory Neil Shapiro ** ; CTLs, SP, '\', '+' and '=', plus
59*2fb4f839SGregory Neil Shapiro ** ; other Unicode characters encoded in UTF-8
60*2fb4f839SGregory Neil Shapiro ** EmbeddedUnicodeChar = %x5C.78 "{" HEXPOINT "}"
61*2fb4f839SGregory Neil Shapiro ** ; starts with "\x"
62*2fb4f839SGregory Neil Shapiro ** HEXPOINT = ( ( "0"/"1" ) %x31-39 ) / "10" / "20" /
63*2fb4f839SGregory Neil Shapiro ** "2B" / "3D" / "7F" / ; all xtext-specials
64*2fb4f839SGregory Neil Shapiro ** "5C" / (HEXDIG8 HEXDIG) / ; 2-digit forms
65*2fb4f839SGregory Neil Shapiro ** ( NZHEXDIG 2(HEXDIG) ) / ; 3-digit forms
66*2fb4f839SGregory Neil Shapiro ** ( NZDHEXDIG 3(HEXDIG) ) / ; 4-digit forms excluding
67*2fb4f839SGregory Neil Shapiro ** ( "D" %x30-37 2(HEXDIG) ) / ; ... surrogate
68*2fb4f839SGregory Neil Shapiro ** ( NZHEXDIG 4(HEXDIG) ) / ; 5-digit forms
69*2fb4f839SGregory Neil Shapiro ** ( "10" 4*HEXDIG ) ; 6-digit forms
70*2fb4f839SGregory Neil Shapiro ** ; represents either "\" or a Unicode code point outside
71*2fb4f839SGregory Neil Shapiro ** ; the ASCII repertoire
72*2fb4f839SGregory Neil Shapiro ** HEXDIG8 = %x38-39 / "A" / "B" / "C" / "D" / "E" / "F"
73*2fb4f839SGregory Neil Shapiro ** ; HEXDIG excluding 0-7
74*2fb4f839SGregory Neil Shapiro ** NZHEXDIG = %x31-39 / "A" / "B" / "C" / "D" / "E" / "F"
75*2fb4f839SGregory Neil Shapiro ** ; HEXDIG excluding "0"
76*2fb4f839SGregory Neil Shapiro ** NZDHEXDIG = %x31-39 / "A" / "B" / "C" / "E" / "F"
77*2fb4f839SGregory Neil Shapiro ** ; HEXDIG excluding "0" and "D"
78*2fb4f839SGregory Neil Shapiro */
79*2fb4f839SGregory Neil Shapiro # endif /* 0 */
80*2fb4f839SGregory Neil Shapiro
81*2fb4f839SGregory Neil Shapiro /*
82*2fb4f839SGregory Neil Shapiro ** UXTEXT_UNQUOTE -- "unquote" a utf-8-addr-unitext
83*2fb4f839SGregory Neil Shapiro **
84*2fb4f839SGregory Neil Shapiro ** Parameters:
85*2fb4f839SGregory Neil Shapiro ** quoted -- original string [x]
86*2fb4f839SGregory Neil Shapiro ** unquoted -- "decoded" string [x] (buffer provided by caller)
87*2fb4f839SGregory Neil Shapiro ** if NULL this is basically a syntax check.
88*2fb4f839SGregory Neil Shapiro ** olen -- length of unquoted (must be > 0)
89*2fb4f839SGregory Neil Shapiro **
90*2fb4f839SGregory Neil Shapiro ** Returns:
91*2fb4f839SGregory Neil Shapiro ** >0: length of "decoded" string
92*2fb4f839SGregory Neil Shapiro ** <0: error
93*2fb4f839SGregory Neil Shapiro */
94*2fb4f839SGregory Neil Shapiro
95*2fb4f839SGregory Neil Shapiro int
uxtext_unquote(quoted,unquoted,olen)96*2fb4f839SGregory Neil Shapiro uxtext_unquote(quoted, unquoted, olen)
97*2fb4f839SGregory Neil Shapiro const char *quoted;
98*2fb4f839SGregory Neil Shapiro char *unquoted;
99*2fb4f839SGregory Neil Shapiro int olen;
100*2fb4f839SGregory Neil Shapiro {
101*2fb4f839SGregory Neil Shapiro const unsigned char *cp;
102*2fb4f839SGregory Neil Shapiro int ch, len;
103*2fb4f839SGregory Neil Shapiro
104*2fb4f839SGregory Neil Shapiro #define APPCH(ch) do \
105*2fb4f839SGregory Neil Shapiro { \
106*2fb4f839SGregory Neil Shapiro if (len >= olen) \
107*2fb4f839SGregory Neil Shapiro return 0 - olen; \
108*2fb4f839SGregory Neil Shapiro if (NULL != unquoted) \
109*2fb4f839SGregory Neil Shapiro unquoted[len] = (char) (ch); \
110*2fb4f839SGregory Neil Shapiro len++; \
111*2fb4f839SGregory Neil Shapiro } while (0)
112*2fb4f839SGregory Neil Shapiro
113*2fb4f839SGregory Neil Shapiro SM_REQUIRE(olen > 0);
114*2fb4f839SGregory Neil Shapiro SM_REQUIRE(NULL != quoted);
115*2fb4f839SGregory Neil Shapiro len = 0;
116*2fb4f839SGregory Neil Shapiro for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++)
117*2fb4f839SGregory Neil Shapiro {
118*2fb4f839SGregory Neil Shapiro if (ch == '\\' && cp[1] == 'x' && cp[2] == '{')
119*2fb4f839SGregory Neil Shapiro {
120*2fb4f839SGregory Neil Shapiro int uc = 0;
121*2fb4f839SGregory Neil Shapiro
122*2fb4f839SGregory Neil Shapiro cp += 2;
123*2fb4f839SGregory Neil Shapiro while ((ch = *++cp) != '}')
124*2fb4f839SGregory Neil Shapiro {
125*2fb4f839SGregory Neil Shapiro if (SM_ISDIGIT(ch))
126*2fb4f839SGregory Neil Shapiro uc = (uc << 4) + (ch - '0');
127*2fb4f839SGregory Neil Shapiro else if (ch >= 'a' && ch <= 'f')
128*2fb4f839SGregory Neil Shapiro uc = (uc << 4) + (ch - 'a' + 10);
129*2fb4f839SGregory Neil Shapiro else if (ch >= 'A' && ch <= 'F')
130*2fb4f839SGregory Neil Shapiro uc = (uc << 4) + (ch - 'A' + 10);
131*2fb4f839SGregory Neil Shapiro else
132*2fb4f839SGregory Neil Shapiro return 0 - len;
133*2fb4f839SGregory Neil Shapiro if (uc > 0x10ffff)
134*2fb4f839SGregory Neil Shapiro return 0 - len;
135*2fb4f839SGregory Neil Shapiro }
136*2fb4f839SGregory Neil Shapiro
137*2fb4f839SGregory Neil Shapiro if (uc < 0x80)
138*2fb4f839SGregory Neil Shapiro APPCH(uc);
139*2fb4f839SGregory Neil Shapiro else if (uc < 0x800)
140*2fb4f839SGregory Neil Shapiro {
141*2fb4f839SGregory Neil Shapiro APPCH(0xc0 | ((char) (uc >> 6)));
142*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc & 0x3f)));
143*2fb4f839SGregory Neil Shapiro }
144*2fb4f839SGregory Neil Shapiro else if (uc < 0x10000)
145*2fb4f839SGregory Neil Shapiro {
146*2fb4f839SGregory Neil Shapiro APPCH(0xe0 | ((char) (uc >> 12)));
147*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
148*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc & 0x3f)));
149*2fb4f839SGregory Neil Shapiro }
150*2fb4f839SGregory Neil Shapiro else if (uc < 0x200000)
151*2fb4f839SGregory Neil Shapiro {
152*2fb4f839SGregory Neil Shapiro APPCH(0xf0 | ((char) (uc >> 18)));
153*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
154*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
155*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc & 0x3f)));
156*2fb4f839SGregory Neil Shapiro }
157*2fb4f839SGregory Neil Shapiro else if (uc < 0x4000000)
158*2fb4f839SGregory Neil Shapiro {
159*2fb4f839SGregory Neil Shapiro APPCH(0xf8 | ((char) (uc >> 24)));
160*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
161*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
162*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
163*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc & 0x3f)));
164*2fb4f839SGregory Neil Shapiro }
165*2fb4f839SGregory Neil Shapiro else
166*2fb4f839SGregory Neil Shapiro {
167*2fb4f839SGregory Neil Shapiro APPCH(0xfc | ((char) (uc >> 30)));
168*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 24) & 0x3f));
169*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
170*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
171*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
172*2fb4f839SGregory Neil Shapiro APPCH(0x80 | ((char) (uc & 0x3f)));
173*2fb4f839SGregory Neil Shapiro }
174*2fb4f839SGregory Neil Shapiro }
175*2fb4f839SGregory Neil Shapiro else
176*2fb4f839SGregory Neil Shapiro APPCH(ch);
177*2fb4f839SGregory Neil Shapiro }
178*2fb4f839SGregory Neil Shapiro APPCH('\0');
179*2fb4f839SGregory Neil Shapiro return len;
180*2fb4f839SGregory Neil Shapiro }
181*2fb4f839SGregory Neil Shapiro
182*2fb4f839SGregory Neil Shapiro # if 0
183*2fb4f839SGregory Neil Shapiro aox/doc/readme/license.txt
184*2fb4f839SGregory Neil Shapiro
185*2fb4f839SGregory Neil Shapiro Copyright (c) 2003-2014, Archiveopteryx and its contributors.
186*2fb4f839SGregory Neil Shapiro
187*2fb4f839SGregory Neil Shapiro Permission to use, copy, modify, and distribute this software and its
188*2fb4f839SGregory Neil Shapiro documentation for any purpose, without fee, and without a written
189*2fb4f839SGregory Neil Shapiro agreement is hereby granted, provided that the above copyright notice
190*2fb4f839SGregory Neil Shapiro and this paragraph and the following two paragraphs appear in all
191*2fb4f839SGregory Neil Shapiro copies.
192*2fb4f839SGregory Neil Shapiro
193*2fb4f839SGregory Neil Shapiro IN NO EVENT SHALL ORYX BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
194*2fb4f839SGregory Neil Shapiro SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
195*2fb4f839SGregory Neil Shapiro ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
196*2fb4f839SGregory Neil Shapiro ORYX HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
197*2fb4f839SGregory Neil Shapiro
198*2fb4f839SGregory Neil Shapiro ORYX SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
199*2fb4f839SGregory Neil Shapiro TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
200*2fb4f839SGregory Neil Shapiro PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
201*2fb4f839SGregory Neil Shapiro BASIS, AND ORYX HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
202*2fb4f839SGregory Neil Shapiro UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
203*2fb4f839SGregory Neil Shapiro # endif /* 0 */
204*2fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
205