xref: /freebsd/crypto/openssl/test/punycode_test.c (revision a689bfa4e25af8307709dc12f75b0e02a65abf18)
1 /*
2  * Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <openssl/crypto.h>
11 #include <string.h>
12 
13 #include "crypto/punycode.h"
14 #include "internal/nelem.h"
15 #include "internal/packet.h"
16 #include "testutil.h"
17 
18 static const struct puny_test {
19     unsigned int raw[50];
20     const char *encoded;
21 } puny_cases[] = {
22     { /* Test of 4 byte codepoint using smileyface emoji */
23         { 0x1F600 },
24         "e28h" },
25     /* Test cases from RFC 3492 */
26     { /* Arabic (Egyptian) */
27         { 0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644,
28             0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F },
29         "egbpdaj6bu4bxfgehfvwxn" },
30     { /* Chinese (simplified) */
31         { 0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587 },
32         "ihqwcrb4cv8a8dqg056pqjye" },
33     { /* Chinese (traditional) */
34         { 0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587 },
35         "ihqwctvzc91f659drss3x8bo0yb" },
36     { /* Czech: Pro<ccaron>prost<ecaron>nemluv<iacute><ccaron>esky */
37         { 0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074,
38             0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D,
39             0x0065, 0x0073, 0x006B, 0x0079 },
40         "Proprostnemluvesky-uyb24dma41a" },
41     { /* Hebrew */
42         { 0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8,
43             0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2,
44             0x05D1, 0x05E8, 0x05D9, 0x05EA },
45         "4dbcagdahymbxekheh6e0a7fei0b" },
46     { /* Hindi (Devanagari) */
47         { 0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D,
48             0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939,
49             0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947,
50             0x0939, 0x0948, 0x0902 },
51         "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" },
52     { /* Japanese (kanji and hiragana) */
53         { 0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092,
54             0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B },
55         "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" },
56     { /* Korean (Hangul syllables) */
57         { 0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774,
58             0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74,
59             0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C },
60         "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" },
61     { /* Russian (Cyrillic) */
62         { 0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E,
63             0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440,
64             0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A,
65             0x0438 },
66         "b1abfaaepdrnnbgefbaDotcwatmq2g4l" },
67     { /* Spanish */
68         { 0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070,
69             0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070,
70             0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061,
71             0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070,
72             0x0061, 0x00F1, 0x006F, 0x006C },
73         "PorqunopuedensimplementehablarenEspaol-fmd56a" },
74     { /* Vietnamese */
75         { 0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B,
76             0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068,
77             0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067,
78             0x0056, 0x0069, 0x1EC7, 0x0074 },
79         "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" },
80     { /* Japanese: 3<nen>B<gumi><kinpachi><sensei> */
81         { 0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F },
82         "3B-ww4c5e180e575a65lsy2b" },
83     { /* Japanese: <amuro><namie>-with-SUPER-MONKEYS */
84         { 0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074,
85             0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D,
86             0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053 },
87         "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" },
88     { /* Japanese: Hello-Another-Way-<sorezore><no><basho> */
89         { 0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F,
90             0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D,
91             0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240 },
92         "Hello-Another-Way--fc4qua05auwb3674vfr0b" },
93     { /* Japanese: <hitotsu><yane><no><shita>2 */
94         { 0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032 },
95         "2-u9tlzr9756bt3uc0v" },
96     { /* Japanese: Maji<de>Koi<suru>5<byou><mae> */
97         { 0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059,
98             0x308B, 0x0035, 0x79D2, 0x524D },
99         "MajiKoi5-783gue6qz075azm5e" },
100     { /* Japanese: <pafii>de<runba> */
101         { 0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0 },
102         "de-jg4avhby1noc0d" },
103     { /* Japanese: <sono><supiido><de> */
104         { 0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067 },
105         "d9juau41awczczp" },
106     { /* -> $1.00 <- */
107         { 0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020,
108             0x003C, 0x002D },
109         "-> $1.00 <--" }
110 };
111 
112 static int test_punycode(int n)
113 {
114     const struct puny_test *tc = puny_cases + n;
115     unsigned int buffer[50];
116     unsigned int bsize = OSSL_NELEM(buffer);
117     size_t i;
118 
119     if (!TEST_true(ossl_punycode_decode(tc->encoded, strlen(tc->encoded),
120             buffer, &bsize)))
121         return 0;
122     for (i = 0; i < OSSL_NELEM(tc->raw); i++)
123         if (tc->raw[i] == 0)
124             break;
125     if (!TEST_mem_eq(buffer, bsize * sizeof(*buffer),
126             tc->raw, i * sizeof(*tc->raw)))
127         return 0;
128     return 1;
129 }
130 
131 static const struct bad_decode_test {
132     size_t outlen;
133     const char input[20];
134 } bad_decode_tests[] = {
135     { 20, "xn--e-*" }, /* bad digit '*' */
136     { 10, "xn--e-999" }, /* loop > enc_len */
137     { 20, "xn--e-999999999" }, /* Too big */
138     { 20, { 'x', 'n', '-', '-', (char)0x80, '-' } }, /* Not basic */
139     { 20, "xn--e-Oy65t" }, /* codepoint > 0x10FFFF */
140 };
141 
142 static int test_a2ulabel_bad_decode(int tst)
143 {
144     char out[20];
145 
146     return TEST_int_eq(ossl_a2ulabel(bad_decode_tests[tst].input, out, bad_decode_tests[tst].outlen), -1);
147 }
148 
149 static int test_a2ulabel(void)
150 {
151     char out[50];
152     char in[530] = { 0 };
153 
154     /*
155      * The punycode being passed in and parsed is malformed but we're not
156      * verifying that behaviour here.
157      */
158     if (!TEST_int_eq(ossl_a2ulabel("xn--a.b.c", out, 1), 0)
159         || !TEST_int_eq(ossl_a2ulabel("xn--a.b.c", out, 7), 1))
160         return 0;
161     /* Test for an off by one on the buffer size works */
162     if (!TEST_int_eq(ossl_a2ulabel("xn--a.b.c", out, 6), 0)
163         || !TEST_int_eq(ossl_a2ulabel("xn--a.b.c", out, 7), 1)
164         || !TEST_str_eq(out, "\xc2\x80.b.c"))
165         return 0;
166 
167     /* Test 4 byte smiley face */
168     if (!TEST_int_eq(ossl_a2ulabel("xn--e28h.com", out, 10), 1))
169         return 0;
170 
171     /* Test that we dont overflow the fixed internal buffer of 512 bytes when the starting bytes are copied */
172     strcpy(in, "xn--");
173     memset(in + 4, 'e', 513);
174     memcpy(in + 517, "-3ya", 4);
175     if (!TEST_int_eq(ossl_a2ulabel(in, out, 50), -1))
176         return 0;
177 
178     return 1;
179 }
180 
181 static int test_puny_overrun(void)
182 {
183     static const unsigned int out[] = {
184         0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F
185     };
186     static const char *in = "3B-ww4c5e180e575a65lsy2b";
187     unsigned int buf[OSSL_NELEM(out)];
188     unsigned int bsize = OSSL_NELEM(buf) - 1;
189 
190     if (!TEST_false(ossl_punycode_decode(in, strlen(in), buf, &bsize))) {
191         if (TEST_mem_eq(buf, bsize * sizeof(*buf), out, sizeof(out)))
192             TEST_error("CRITICAL: buffer overrun detected!");
193         return 0;
194     }
195     return 1;
196 }
197 
198 static int test_dotted_overflow(void)
199 {
200     static const char string[] = "a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a.a";
201     const size_t num_reps = OSSL_NELEM(string) / 2;
202     WPACKET p;
203     BUF_MEM *in;
204     char *out = NULL;
205     size_t i;
206     int res = 0;
207 
208     /* Create out input punycode string */
209     if (!TEST_ptr(in = BUF_MEM_new()))
210         return 0;
211     if (!TEST_true(WPACKET_init_len(&p, in, 0))) {
212         BUF_MEM_free(in);
213         return 0;
214     }
215     for (i = 0; i < num_reps; i++) {
216         if (i > 1 && !TEST_true(WPACKET_put_bytes_u8(&p, '.')))
217             goto err;
218         if (!TEST_true(WPACKET_memcpy(&p, "xn--a", sizeof("xn--a") - 1)))
219             goto err;
220     }
221     if (!TEST_true(WPACKET_put_bytes_u8(&p, '\0')))
222         goto err;
223     if (!TEST_ptr(out = OPENSSL_malloc(in->length)))
224         goto err;
225 
226     /* Test the decode into an undersized buffer */
227     memset(out, 0x7f, in->length - 1);
228     if (!TEST_int_le(ossl_a2ulabel(in->data, out, num_reps), 0)
229         || !TEST_int_eq(out[num_reps], 0x7f))
230         goto err;
231 
232     /* Test the decode works into a full size buffer */
233     if (!TEST_int_gt(ossl_a2ulabel(in->data, out, in->length), 0)
234         || !TEST_size_t_eq(strlen(out), num_reps * 3))
235         goto err;
236 
237     res = 1;
238 err:
239     WPACKET_cleanup(&p);
240     BUF_MEM_free(in);
241     OPENSSL_free(out);
242     return res;
243 }
244 
245 int setup_tests(void)
246 {
247     ADD_ALL_TESTS(test_punycode, OSSL_NELEM(puny_cases));
248     ADD_TEST(test_dotted_overflow);
249     ADD_TEST(test_a2ulabel);
250     ADD_TEST(test_puny_overrun);
251     ADD_ALL_TESTS(test_a2ulabel_bad_decode, OSSL_NELEM(bad_decode_tests));
252     return 1;
253 }
254