xref: /freebsd/contrib/sendmail/libsm/uxtext_unquote.c (revision 2fb4f839f3fc72ce2bab12f9ba4760f97f73e97f)
1*2fb4f839SGregory Neil Shapiro /*
2*2fb4f839SGregory Neil Shapiro  * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3*2fb4f839SGregory Neil Shapiro  *	All rights reserved.
4*2fb4f839SGregory Neil Shapiro  *
5*2fb4f839SGregory Neil Shapiro  * By using this file, you agree to the terms and conditions set
6*2fb4f839SGregory Neil Shapiro  * forth in the LICENSE file which can be found at the top level of
7*2fb4f839SGregory Neil Shapiro  * the sendmail distribution.
8*2fb4f839SGregory Neil Shapiro  *
9*2fb4f839SGregory Neil Shapiro  */
10*2fb4f839SGregory Neil Shapiro 
11*2fb4f839SGregory Neil Shapiro #include <sm/gen.h>
12*2fb4f839SGregory Neil Shapiro #include <sm/sendmail.h>
13*2fb4f839SGregory Neil Shapiro 
14*2fb4f839SGregory Neil Shapiro /*
15*2fb4f839SGregory Neil Shapiro **  based on
16*2fb4f839SGregory Neil Shapiro **  https://github.com/aox/encodings/utf.cpp
17*2fb4f839SGregory Neil Shapiro **  see license.txt included below.
18*2fb4f839SGregory Neil Shapiro */
19*2fb4f839SGregory Neil Shapiro 
20*2fb4f839SGregory Neil Shapiro #if USE_EAI
21*2fb4f839SGregory Neil Shapiro #include <ctype.h>
22*2fb4f839SGregory Neil Shapiro #define SM_ISDIGIT(c)	(isascii(c) && isdigit(c))
23*2fb4f839SGregory Neil Shapiro 
24*2fb4f839SGregory Neil Shapiro #include <sm/assert.h>
25*2fb4f839SGregory Neil Shapiro 
26*2fb4f839SGregory Neil Shapiro /* for prototype */
27*2fb4f839SGregory Neil Shapiro #include <sm/ixlen.h>
28*2fb4f839SGregory Neil Shapiro 
29*2fb4f839SGregory Neil Shapiro # if 0
30*2fb4f839SGregory Neil Shapiro /*
31*2fb4f839SGregory Neil Shapiro **   RFC 6533:
32*2fb4f839SGregory Neil Shapiro **
33*2fb4f839SGregory Neil Shapiro **   In the ABNF below, all productions not defined in this document are
34*2fb4f839SGregory Neil Shapiro **   defined in Appendix B of [RFC5234], in Section 4 of [RFC3629], or in
35*2fb4f839SGregory Neil Shapiro **   [RFC3464].
36*2fb4f839SGregory Neil Shapiro **
37*2fb4f839SGregory Neil Shapiro **   utf-8-type-addr     = "utf-8;" utf-8-enc-addr
38*2fb4f839SGregory Neil Shapiro **   utf-8-address       = Mailbox ; Mailbox as defined in [RFC6531].
39*2fb4f839SGregory Neil Shapiro **   utf-8-enc-addr      = utf-8-addr-xtext /
40*2fb4f839SGregory Neil Shapiro **                         utf-8-addr-unitext /
41*2fb4f839SGregory Neil Shapiro **                         utf-8-address
42*2fb4f839SGregory Neil Shapiro **   utf-8-addr-xtext    = 1*(QCHAR / EmbeddedUnicodeChar)
43*2fb4f839SGregory Neil Shapiro **                         ; 7bit form of utf-8-addr-unitext.
44*2fb4f839SGregory Neil Shapiro **                         ; Safe for use in the ORCPT [RFC3461]
45*2fb4f839SGregory Neil Shapiro **                         ; parameter even when SMTPUTF8 SMTP
46*2fb4f839SGregory Neil Shapiro **                         ; extension is not advertised.
47*2fb4f839SGregory Neil Shapiro **   utf-8-addr-unitext  = 1*(QUCHAR / EmbeddedUnicodeChar)
48*2fb4f839SGregory Neil Shapiro **                       ; MUST follow utf-8-address ABNF when
49*2fb4f839SGregory Neil Shapiro **                       ; dequoted.
50*2fb4f839SGregory Neil Shapiro **                       ; Safe for using in the ORCPT [RFC3461]
51*2fb4f839SGregory Neil Shapiro **                       ; parameter when SMTPUTF8 SMTP extension
52*2fb4f839SGregory Neil Shapiro **                       ; is also advertised.
53*2fb4f839SGregory Neil Shapiro **   QCHAR              = %x21-2a / %x2c-3c / %x3e-5b / %x5d-7e
54*2fb4f839SGregory Neil Shapiro **                       ; ASCII printable characters except
55*2fb4f839SGregory Neil Shapiro **                       ; CTLs, SP, '\', '+', '='.
56*2fb4f839SGregory Neil Shapiro **   QUCHAR              = QCHAR / UTF8-2 / UTF8-3 / UTF8-4
57*2fb4f839SGregory Neil Shapiro **                       ; ASCII printable characters except
58*2fb4f839SGregory Neil Shapiro **                       ; CTLs, SP, '\', '+' and '=', plus
59*2fb4f839SGregory Neil Shapiro **                       ; other Unicode characters encoded in UTF-8
60*2fb4f839SGregory Neil Shapiro **   EmbeddedUnicodeChar =   %x5C.78 "{" HEXPOINT "}"
61*2fb4f839SGregory Neil Shapiro **                       ; starts with "\x"
62*2fb4f839SGregory Neil Shapiro **   HEXPOINT = ( ( "0"/"1" ) %x31-39 ) / "10" / "20" /
63*2fb4f839SGregory Neil Shapiro **              "2B" / "3D" / "7F" /         ; all xtext-specials
64*2fb4f839SGregory Neil Shapiro **              "5C" / (HEXDIG8 HEXDIG) /    ; 2-digit forms
65*2fb4f839SGregory Neil Shapiro **              ( NZHEXDIG 2(HEXDIG) ) /     ; 3-digit forms
66*2fb4f839SGregory Neil Shapiro **              ( NZDHEXDIG 3(HEXDIG) ) /    ; 4-digit forms excluding
67*2fb4f839SGregory Neil Shapiro **              ( "D" %x30-37 2(HEXDIG) ) /  ; ... surrogate
68*2fb4f839SGregory Neil Shapiro **              ( NZHEXDIG 4(HEXDIG) ) /     ; 5-digit forms
69*2fb4f839SGregory Neil Shapiro **              ( "10" 4*HEXDIG )            ; 6-digit forms
70*2fb4f839SGregory Neil Shapiro **              ; represents either "\" or a Unicode code point outside
71*2fb4f839SGregory Neil Shapiro **              ; the ASCII repertoire
72*2fb4f839SGregory Neil Shapiro **   HEXDIG8             = %x38-39 / "A" / "B" / "C" / "D" / "E" / "F"
73*2fb4f839SGregory Neil Shapiro **                       ; HEXDIG excluding 0-7
74*2fb4f839SGregory Neil Shapiro **   NZHEXDIG            = %x31-39 / "A" / "B" / "C" / "D" / "E" / "F"
75*2fb4f839SGregory Neil Shapiro **                       ; HEXDIG excluding "0"
76*2fb4f839SGregory Neil Shapiro **   NZDHEXDIG           = %x31-39 / "A" / "B" / "C" / "E" / "F"
77*2fb4f839SGregory Neil Shapiro **                       ; HEXDIG excluding "0" and "D"
78*2fb4f839SGregory Neil Shapiro */
79*2fb4f839SGregory Neil Shapiro # endif /* 0 */
80*2fb4f839SGregory Neil Shapiro 
81*2fb4f839SGregory Neil Shapiro /*
82*2fb4f839SGregory Neil Shapiro **  UXTEXT_UNQUOTE -- "unquote" a utf-8-addr-unitext
83*2fb4f839SGregory Neil Shapiro **
84*2fb4f839SGregory Neil Shapiro **	Parameters:
85*2fb4f839SGregory Neil Shapiro **		quoted -- original string [x]
86*2fb4f839SGregory Neil Shapiro **		unquoted -- "decoded" string [x] (buffer provided by caller)
87*2fb4f839SGregory Neil Shapiro **			if NULL this is basically a syntax check.
88*2fb4f839SGregory Neil Shapiro **		olen -- length of unquoted (must be > 0)
89*2fb4f839SGregory Neil Shapiro **
90*2fb4f839SGregory Neil Shapiro **	Returns:
91*2fb4f839SGregory Neil Shapiro **		>0: length of "decoded" string
92*2fb4f839SGregory Neil Shapiro **		<0: error
93*2fb4f839SGregory Neil Shapiro */
94*2fb4f839SGregory Neil Shapiro 
95*2fb4f839SGregory Neil Shapiro int
uxtext_unquote(quoted,unquoted,olen)96*2fb4f839SGregory Neil Shapiro uxtext_unquote(quoted, unquoted, olen)
97*2fb4f839SGregory Neil Shapiro 	const char *quoted;
98*2fb4f839SGregory Neil Shapiro 	char *unquoted;
99*2fb4f839SGregory Neil Shapiro 	int olen;
100*2fb4f839SGregory Neil Shapiro {
101*2fb4f839SGregory Neil Shapiro 	const unsigned char *cp;
102*2fb4f839SGregory Neil Shapiro 	int ch, len;
103*2fb4f839SGregory Neil Shapiro 
104*2fb4f839SGregory Neil Shapiro #define APPCH(ch) do	\
105*2fb4f839SGregory Neil Shapiro 	{		\
106*2fb4f839SGregory Neil Shapiro 		if (len >= olen)	\
107*2fb4f839SGregory Neil Shapiro 			return 0 - olen;	\
108*2fb4f839SGregory Neil Shapiro 		if (NULL !=  unquoted)	\
109*2fb4f839SGregory Neil Shapiro 			unquoted[len] = (char) (ch);	\
110*2fb4f839SGregory Neil Shapiro 		len++;	\
111*2fb4f839SGregory Neil Shapiro 	} while (0)
112*2fb4f839SGregory Neil Shapiro 
113*2fb4f839SGregory Neil Shapiro 	SM_REQUIRE(olen > 0);
114*2fb4f839SGregory Neil Shapiro 	SM_REQUIRE(NULL != quoted);
115*2fb4f839SGregory Neil Shapiro 	len = 0;
116*2fb4f839SGregory Neil Shapiro 	for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++)
117*2fb4f839SGregory Neil Shapiro 	{
118*2fb4f839SGregory Neil Shapiro 		if (ch == '\\' && cp[1] == 'x' && cp[2] == '{')
119*2fb4f839SGregory Neil Shapiro 		{
120*2fb4f839SGregory Neil Shapiro 			int	 uc = 0;
121*2fb4f839SGregory Neil Shapiro 
122*2fb4f839SGregory Neil Shapiro 			cp += 2;
123*2fb4f839SGregory Neil Shapiro 			while ((ch = *++cp) != '}')
124*2fb4f839SGregory Neil Shapiro 			{
125*2fb4f839SGregory Neil Shapiro 				if (SM_ISDIGIT(ch))
126*2fb4f839SGregory Neil Shapiro 					uc = (uc << 4) + (ch - '0');
127*2fb4f839SGregory Neil Shapiro 				else if (ch >= 'a' && ch <= 'f')
128*2fb4f839SGregory Neil Shapiro 					uc = (uc << 4) + (ch - 'a' + 10);
129*2fb4f839SGregory Neil Shapiro 				else if (ch >= 'A' && ch <= 'F')
130*2fb4f839SGregory Neil Shapiro 					uc = (uc << 4) + (ch - 'A' + 10);
131*2fb4f839SGregory Neil Shapiro 				else
132*2fb4f839SGregory Neil Shapiro 					return 0 - len;
133*2fb4f839SGregory Neil Shapiro 				if (uc > 0x10ffff)
134*2fb4f839SGregory Neil Shapiro 					return 0 - len;
135*2fb4f839SGregory Neil Shapiro 			}
136*2fb4f839SGregory Neil Shapiro 
137*2fb4f839SGregory Neil Shapiro 			if (uc < 0x80)
138*2fb4f839SGregory Neil Shapiro 				APPCH(uc);
139*2fb4f839SGregory Neil Shapiro 			else if (uc < 0x800)
140*2fb4f839SGregory Neil Shapiro 			{
141*2fb4f839SGregory Neil Shapiro 				APPCH(0xc0 | ((char) (uc >> 6)));
142*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc & 0x3f)));
143*2fb4f839SGregory Neil Shapiro 			}
144*2fb4f839SGregory Neil Shapiro 			else if (uc < 0x10000)
145*2fb4f839SGregory Neil Shapiro 			{
146*2fb4f839SGregory Neil Shapiro 				APPCH(0xe0 | ((char) (uc >> 12)));
147*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
148*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc & 0x3f)));
149*2fb4f839SGregory Neil Shapiro 			}
150*2fb4f839SGregory Neil Shapiro 			else if (uc < 0x200000)
151*2fb4f839SGregory Neil Shapiro 			{
152*2fb4f839SGregory Neil Shapiro 				APPCH(0xf0 | ((char) (uc >> 18)));
153*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
154*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
155*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc & 0x3f)));
156*2fb4f839SGregory Neil Shapiro 			}
157*2fb4f839SGregory Neil Shapiro 			else if (uc < 0x4000000)
158*2fb4f839SGregory Neil Shapiro 			{
159*2fb4f839SGregory Neil Shapiro 				APPCH(0xf8 | ((char) (uc >> 24)));
160*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
161*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
162*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
163*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc & 0x3f)));
164*2fb4f839SGregory Neil Shapiro 			}
165*2fb4f839SGregory Neil Shapiro 			else
166*2fb4f839SGregory Neil Shapiro 			{
167*2fb4f839SGregory Neil Shapiro 				APPCH(0xfc | ((char) (uc >> 30)));
168*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 24) & 0x3f));
169*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
170*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
171*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
172*2fb4f839SGregory Neil Shapiro 				APPCH(0x80 | ((char) (uc & 0x3f)));
173*2fb4f839SGregory Neil Shapiro 			}
174*2fb4f839SGregory Neil Shapiro 		}
175*2fb4f839SGregory Neil Shapiro 		else
176*2fb4f839SGregory Neil Shapiro 			APPCH(ch);
177*2fb4f839SGregory Neil Shapiro 	}
178*2fb4f839SGregory Neil Shapiro 	APPCH('\0');
179*2fb4f839SGregory Neil Shapiro 	return len;
180*2fb4f839SGregory Neil Shapiro }
181*2fb4f839SGregory Neil Shapiro 
182*2fb4f839SGregory Neil Shapiro # if 0
183*2fb4f839SGregory Neil Shapiro aox/doc/readme/license.txt
184*2fb4f839SGregory Neil Shapiro 
185*2fb4f839SGregory Neil Shapiro Copyright (c) 2003-2014, Archiveopteryx and its contributors.
186*2fb4f839SGregory Neil Shapiro 
187*2fb4f839SGregory Neil Shapiro Permission to use, copy, modify, and distribute this software and its
188*2fb4f839SGregory Neil Shapiro documentation for any purpose, without fee, and without a written
189*2fb4f839SGregory Neil Shapiro agreement is hereby granted, provided that the above copyright notice
190*2fb4f839SGregory Neil Shapiro and this paragraph and the following two paragraphs appear in all
191*2fb4f839SGregory Neil Shapiro copies.
192*2fb4f839SGregory Neil Shapiro 
193*2fb4f839SGregory Neil Shapiro IN NO EVENT SHALL ORYX BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
194*2fb4f839SGregory Neil Shapiro SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
195*2fb4f839SGregory Neil Shapiro ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
196*2fb4f839SGregory Neil Shapiro ORYX HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
197*2fb4f839SGregory Neil Shapiro 
198*2fb4f839SGregory Neil Shapiro ORYX SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
199*2fb4f839SGregory Neil Shapiro TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
200*2fb4f839SGregory Neil Shapiro PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
201*2fb4f839SGregory Neil Shapiro BASIS, AND ORYX HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
202*2fb4f839SGregory Neil Shapiro UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
203*2fb4f839SGregory Neil Shapiro # endif /* 0 */
204*2fb4f839SGregory Neil Shapiro #endif /* USE_EAI */
205