xref: /freebsd/contrib/sendmail/libsm/uxtext_unquote.c (revision 05427f4639bcf2703329a9be9d25ec09bb782742)
1 /*
2  * Copyright (c) 2020 Proofpoint, Inc. and its suppliers.
3  *	All rights reserved.
4  *
5  * By using this file, you agree to the terms and conditions set
6  * forth in the LICENSE file which can be found at the top level of
7  * the sendmail distribution.
8  *
9  */
10 
11 #include <sm/gen.h>
12 #include <sm/sendmail.h>
13 
14 /*
15 **  based on
16 **  https://github.com/aox/encodings/utf.cpp
17 **  see license.txt included below.
18 */
19 
20 #if USE_EAI
21 #include <ctype.h>
22 #define SM_ISDIGIT(c)	(isascii(c) && isdigit(c))
23 
24 #include <sm/assert.h>
25 
26 /* for prototype */
27 #include <sm/ixlen.h>
28 
29 # if 0
30 /*
31 **   RFC 6533:
32 **
33 **   In the ABNF below, all productions not defined in this document are
34 **   defined in Appendix B of [RFC5234], in Section 4 of [RFC3629], or in
35 **   [RFC3464].
36 **
37 **   utf-8-type-addr     = "utf-8;" utf-8-enc-addr
38 **   utf-8-address       = Mailbox ; Mailbox as defined in [RFC6531].
39 **   utf-8-enc-addr      = utf-8-addr-xtext /
40 **                         utf-8-addr-unitext /
41 **                         utf-8-address
42 **   utf-8-addr-xtext    = 1*(QCHAR / EmbeddedUnicodeChar)
43 **                         ; 7bit form of utf-8-addr-unitext.
44 **                         ; Safe for use in the ORCPT [RFC3461]
45 **                         ; parameter even when SMTPUTF8 SMTP
46 **                         ; extension is not advertised.
47 **   utf-8-addr-unitext  = 1*(QUCHAR / EmbeddedUnicodeChar)
48 **                       ; MUST follow utf-8-address ABNF when
49 **                       ; dequoted.
50 **                       ; Safe for using in the ORCPT [RFC3461]
51 **                       ; parameter when SMTPUTF8 SMTP extension
52 **                       ; is also advertised.
53 **   QCHAR              = %x21-2a / %x2c-3c / %x3e-5b / %x5d-7e
54 **                       ; ASCII printable characters except
55 **                       ; CTLs, SP, '\', '+', '='.
56 **   QUCHAR              = QCHAR / UTF8-2 / UTF8-3 / UTF8-4
57 **                       ; ASCII printable characters except
58 **                       ; CTLs, SP, '\', '+' and '=', plus
59 **                       ; other Unicode characters encoded in UTF-8
60 **   EmbeddedUnicodeChar =   %x5C.78 "{" HEXPOINT "}"
61 **                       ; starts with "\x"
62 **   HEXPOINT = ( ( "0"/"1" ) %x31-39 ) / "10" / "20" /
63 **              "2B" / "3D" / "7F" /         ; all xtext-specials
64 **              "5C" / (HEXDIG8 HEXDIG) /    ; 2-digit forms
65 **              ( NZHEXDIG 2(HEXDIG) ) /     ; 3-digit forms
66 **              ( NZDHEXDIG 3(HEXDIG) ) /    ; 4-digit forms excluding
67 **              ( "D" %x30-37 2(HEXDIG) ) /  ; ... surrogate
68 **              ( NZHEXDIG 4(HEXDIG) ) /     ; 5-digit forms
69 **              ( "10" 4*HEXDIG )            ; 6-digit forms
70 **              ; represents either "\" or a Unicode code point outside
71 **              ; the ASCII repertoire
72 **   HEXDIG8             = %x38-39 / "A" / "B" / "C" / "D" / "E" / "F"
73 **                       ; HEXDIG excluding 0-7
74 **   NZHEXDIG            = %x31-39 / "A" / "B" / "C" / "D" / "E" / "F"
75 **                       ; HEXDIG excluding "0"
76 **   NZDHEXDIG           = %x31-39 / "A" / "B" / "C" / "E" / "F"
77 **                       ; HEXDIG excluding "0" and "D"
78 */
79 # endif /* 0 */
80 
81 /*
82 **  UXTEXT_UNQUOTE -- "unquote" a utf-8-addr-unitext
83 **
84 **	Parameters:
85 **		quoted -- original string [x]
86 **		unquoted -- "decoded" string [x] (buffer provided by caller)
87 **			if NULL this is basically a syntax check.
88 **		olen -- length of unquoted (must be > 0)
89 **
90 **	Returns:
91 **		>0: length of "decoded" string
92 **		<0: error
93 */
94 
95 int
96 uxtext_unquote(quoted, unquoted, olen)
97 	const char *quoted;
98 	char *unquoted;
99 	int olen;
100 {
101 	const unsigned char *cp;
102 	int ch, len;
103 
104 #define APPCH(ch) do	\
105 	{		\
106 		if (len >= olen)	\
107 			return 0 - olen;	\
108 		if (NULL !=  unquoted)	\
109 			unquoted[len] = (char) (ch);	\
110 		len++;	\
111 	} while (0)
112 
113 	SM_REQUIRE(olen > 0);
114 	SM_REQUIRE(NULL != quoted);
115 	len = 0;
116 	for (cp = (const unsigned char *) quoted; (ch = *cp) != 0; cp++)
117 	{
118 		if (ch == '\\' && cp[1] == 'x' && cp[2] == '{')
119 		{
120 			int	 uc = 0;
121 
122 			cp += 2;
123 			while ((ch = *++cp) != '}')
124 			{
125 				if (SM_ISDIGIT(ch))
126 					uc = (uc << 4) + (ch - '0');
127 				else if (ch >= 'a' && ch <= 'f')
128 					uc = (uc << 4) + (ch - 'a' + 10);
129 				else if (ch >= 'A' && ch <= 'F')
130 					uc = (uc << 4) + (ch - 'A' + 10);
131 				else
132 					return 0 - len;
133 				if (uc > 0x10ffff)
134 					return 0 - len;
135 			}
136 
137 			if (uc < 0x80)
138 				APPCH(uc);
139 			else if (uc < 0x800)
140 			{
141 				APPCH(0xc0 | ((char) (uc >> 6)));
142 				APPCH(0x80 | ((char) (uc & 0x3f)));
143 			}
144 			else if (uc < 0x10000)
145 			{
146 				APPCH(0xe0 | ((char) (uc >> 12)));
147 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
148 				APPCH(0x80 | ((char) (uc & 0x3f)));
149 			}
150 			else if (uc < 0x200000)
151 			{
152 				APPCH(0xf0 | ((char) (uc >> 18)));
153 				APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
154 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
155 				APPCH(0x80 | ((char) (uc & 0x3f)));
156 			}
157 			else if (uc < 0x4000000)
158 			{
159 				APPCH(0xf8 | ((char) (uc >> 24)));
160 				APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
161 				APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
162 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
163 				APPCH(0x80 | ((char) (uc & 0x3f)));
164 			}
165 			else
166 			{
167 				APPCH(0xfc | ((char) (uc >> 30)));
168 				APPCH(0x80 | ((char) (uc >> 24) & 0x3f));
169 				APPCH(0x80 | ((char) (uc >> 18) & 0x3f));
170 				APPCH(0x80 | ((char) (uc >> 12) & 0x3f));
171 				APPCH(0x80 | ((char) (uc >> 6) & 0x3f));
172 				APPCH(0x80 | ((char) (uc & 0x3f)));
173 			}
174 		}
175 		else
176 			APPCH(ch);
177 	}
178 	APPCH('\0');
179 	return len;
180 }
181 
182 # if 0
183 aox/doc/readme/license.txt
184 
185 Copyright (c) 2003-2014, Archiveopteryx and its contributors.
186 
187 Permission to use, copy, modify, and distribute this software and its
188 documentation for any purpose, without fee, and without a written
189 agreement is hereby granted, provided that the above copyright notice
190 and this paragraph and the following two paragraphs appear in all
191 copies.
192 
193 IN NO EVENT SHALL ORYX BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
194 SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS,
195 ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF
196 ORYX HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
197 
198 ORYX SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED
199 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
200 PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
201 BASIS, AND ORYX HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT,
202 UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
203 # endif /* 0 */
204 #endif /* USE_EAI */
205