xref: /freebsd/contrib/libcbor/test/string_test.c (revision 5b56413d04e608379c9a306373554a8e4d321bc0)
1 /*
2  * Copyright (c) 2014-2020 Pavel Kalvoda <me@pavelkalvoda.com>
3  *
4  * libcbor is free software; you can redistribute it and/or modify
5  * it under the terms of the MIT license. See LICENSE for details.
6  */
7 
8 #include <string.h>
9 #include "assertions.h"
10 #include "cbor.h"
11 #include "test_allocator.h"
12 
13 cbor_item_t *string;
14 struct cbor_load_result res;
15 
16 unsigned char empty_string_data[] = {0x60};
17 
18 static void test_empty_string(void **_CBOR_UNUSED(_state)) {
19   string = cbor_load(empty_string_data, 1, &res);
20   assert_non_null(string);
21   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
22   assert_true(cbor_isa_string(string));
23   assert_size_equal(cbor_string_length(string), 0);
24   assert_size_equal(cbor_string_codepoint_count(string), 0);
25   assert_true(res.read == 1);
26   cbor_decref(&string);
27   assert_null(string);
28 }
29 
30 unsigned char short_string_data[] = {0x6C, 0x48, 0x65, 0x6C, 0x6C, 0x6F, 0x20,
31                                      0x77, 0x6F, 0x72, 0x6C, 0x64, 0x21};
32 
33 /*                              0x60 + 12 | Hello world! */
34 static void test_short_string(void **_CBOR_UNUSED(_state)) {
35   string = cbor_load(short_string_data, 13, &res);
36   assert_non_null(string);
37   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
38   assert_true(cbor_isa_string(string));
39   assert_size_equal(cbor_string_length(string), 12);
40   assert_size_equal(cbor_string_codepoint_count(string), 12);
41   assert_memory_equal(&"Hello world!", cbor_string_handle(string), 12);
42   assert_true(res.read == 13);
43   cbor_decref(&string);
44   assert_null(string);
45 }
46 
47 unsigned char short_multibyte_string_data[] = {
48     0x6F, 0xC4, 0x8C, 0x61, 0x75, 0x65, 0x73, 0x20,
49     0xC3, 0x9F, 0x76, 0xC4, 0x9B, 0x74, 0x65, 0x21};
50 
51 /*                              0x60 + 15 | Čaues ßvěte! */
52 static void test_short_multibyte_string(void **_CBOR_UNUSED(_state)) {
53   string = cbor_load(short_multibyte_string_data, 16, &res);
54   assert_non_null(string);
55   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
56   assert_true(cbor_isa_string(string));
57   assert_size_equal(cbor_string_length(string), 15);
58   assert_size_equal(cbor_string_codepoint_count(string), 12);
59   assert_memory_equal(&"Čaues ßvěte!", cbor_string_handle(string), 15);
60   assert_true(res.read == 16);
61   cbor_decref(&string);
62   assert_null(string);
63 }
64 
65 unsigned char int8_string_data[] = {
66     0x78, 0x96, 0x4C, 0x6F, 0x72, 0x65, 0x6D, 0x20, 0x69, 0x70, 0x73, 0x75,
67     0x6D, 0x20, 0x64, 0x6F, 0x6C, 0x6F, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20,
68     0x61, 0x6D, 0x65, 0x74, 0x2C, 0x20, 0x63, 0x6F, 0x6E, 0x73, 0x65, 0x63,
69     0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73,
70     0x63, 0x69, 0x6E, 0x67, 0x20, 0x65, 0x6C, 0x69, 0x74, 0x2E, 0x20, 0x44,
71     0x6F, 0x6E, 0x65, 0x63, 0x20, 0x6D, 0x69, 0x20, 0x74, 0x65, 0x6C, 0x6C,
72     0x75, 0x73, 0x2C, 0x20, 0x69, 0x61, 0x63, 0x75, 0x6C, 0x69, 0x73, 0x20,
73     0x6E, 0x65, 0x63, 0x20, 0x76, 0x65, 0x73, 0x74, 0x69, 0x62, 0x75, 0x6C,
74     0x75, 0x6D, 0x20, 0x71, 0x75, 0x69, 0x73, 0x2C, 0x20, 0x66, 0x65, 0x72,
75     0x6D, 0x65, 0x6E, 0x74, 0x75, 0x6D, 0x20, 0x6E, 0x6F, 0x6E, 0x20, 0x66,
76     0x65, 0x6C, 0x69, 0x73, 0x2E, 0x20, 0x4D, 0x61, 0x65, 0x63, 0x65, 0x6E,
77     0x61, 0x73, 0x20, 0x75, 0x74, 0x20, 0x6A, 0x75, 0x73, 0x74, 0x6F, 0x20,
78     0x70, 0x6F, 0x73, 0x75, 0x65, 0x72, 0x65, 0x2E};
79 
80 /*                                          150 | Lorem ....*/
81 static void test_int8_string(void **_CBOR_UNUSED(_state)) {
82   string = cbor_load(int8_string_data, 152, &res);
83   assert_non_null(string);
84   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
85   assert_true(cbor_isa_string(string));
86   assert_size_equal(cbor_string_length(string), 150);
87   assert_size_equal(cbor_string_codepoint_count(string), 150);
88   assert_memory_equal(
89 		&"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec mi tellus, iaculis nec vestibulum quis, fermentum non felis. Maecenas ut justo posuere.",
90 		cbor_string_handle(string),
91 		150
92 	);
93   assert_true(res.read == 152);
94   cbor_decref(&string);
95   assert_null(string);
96 }
97 
98 unsigned char int16_string_data[] = {
99     0x79, 0x00, 0x96, 0x4C, 0x6F, 0x72, 0x65, 0x6D, 0x20, 0x69, 0x70, 0x73,
100     0x75, 0x6D, 0x20, 0x64, 0x6F, 0x6C, 0x6F, 0x72, 0x20, 0x73, 0x69, 0x74,
101     0x20, 0x61, 0x6D, 0x65, 0x74, 0x2C, 0x20, 0x63, 0x6F, 0x6E, 0x73, 0x65,
102     0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69, 0x70, 0x69,
103     0x73, 0x63, 0x69, 0x6E, 0x67, 0x20, 0x65, 0x6C, 0x69, 0x74, 0x2E, 0x20,
104     0x44, 0x6F, 0x6E, 0x65, 0x63, 0x20, 0x6D, 0x69, 0x20, 0x74, 0x65, 0x6C,
105     0x6C, 0x75, 0x73, 0x2C, 0x20, 0x69, 0x61, 0x63, 0x75, 0x6C, 0x69, 0x73,
106     0x20, 0x6E, 0x65, 0x63, 0x20, 0x76, 0x65, 0x73, 0x74, 0x69, 0x62, 0x75,
107     0x6C, 0x75, 0x6D, 0x20, 0x71, 0x75, 0x69, 0x73, 0x2C, 0x20, 0x66, 0x65,
108     0x72, 0x6D, 0x65, 0x6E, 0x74, 0x75, 0x6D, 0x20, 0x6E, 0x6F, 0x6E, 0x20,
109     0x66, 0x65, 0x6C, 0x69, 0x73, 0x2E, 0x20, 0x4D, 0x61, 0x65, 0x63, 0x65,
110     0x6E, 0x61, 0x73, 0x20, 0x75, 0x74, 0x20, 0x6A, 0x75, 0x73, 0x74, 0x6F,
111     0x20, 0x70, 0x6F, 0x73, 0x75, 0x65, 0x72, 0x65, 0x2E};
112 /*                                          150 | Lorem ....*/
113 /* This valid but not realistic - length 150 could be encoded in a single
114  * uint8_t (but we need to keep the test files reasonably compact) */
115 static void test_int16_string(void **_CBOR_UNUSED(_state)) {
116   string = cbor_load(int16_string_data, 153, &res);
117   assert_non_null(string);
118   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
119   assert_true(cbor_isa_string(string));
120   assert_size_equal(cbor_string_length(string), 150);
121   assert_size_equal(cbor_string_codepoint_count(string), 150);
122   assert_memory_equal(
123 		&"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec mi tellus, iaculis nec vestibulum quis, fermentum non felis. Maecenas ut justo posuere.",
124 		cbor_string_handle(string),
125 		150
126 	);
127   assert_true(res.read == 153);
128   cbor_decref(&string);
129   assert_null(string);
130 }
131 
132 unsigned char int32_string_data[] = {
133     0x7A, 0x00, 0x00, 0x00, 0x96, 0x4C, 0x6F, 0x72, 0x65, 0x6D, 0x20, 0x69,
134     0x70, 0x73, 0x75, 0x6D, 0x20, 0x64, 0x6F, 0x6C, 0x6F, 0x72, 0x20, 0x73,
135     0x69, 0x74, 0x20, 0x61, 0x6D, 0x65, 0x74, 0x2C, 0x20, 0x63, 0x6F, 0x6E,
136     0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72, 0x20, 0x61, 0x64, 0x69,
137     0x70, 0x69, 0x73, 0x63, 0x69, 0x6E, 0x67, 0x20, 0x65, 0x6C, 0x69, 0x74,
138     0x2E, 0x20, 0x44, 0x6F, 0x6E, 0x65, 0x63, 0x20, 0x6D, 0x69, 0x20, 0x74,
139     0x65, 0x6C, 0x6C, 0x75, 0x73, 0x2C, 0x20, 0x69, 0x61, 0x63, 0x75, 0x6C,
140     0x69, 0x73, 0x20, 0x6E, 0x65, 0x63, 0x20, 0x76, 0x65, 0x73, 0x74, 0x69,
141     0x62, 0x75, 0x6C, 0x75, 0x6D, 0x20, 0x71, 0x75, 0x69, 0x73, 0x2C, 0x20,
142     0x66, 0x65, 0x72, 0x6D, 0x65, 0x6E, 0x74, 0x75, 0x6D, 0x20, 0x6E, 0x6F,
143     0x6E, 0x20, 0x66, 0x65, 0x6C, 0x69, 0x73, 0x2E, 0x20, 0x4D, 0x61, 0x65,
144     0x63, 0x65, 0x6E, 0x61, 0x73, 0x20, 0x75, 0x74, 0x20, 0x6A, 0x75, 0x73,
145     0x74, 0x6F, 0x20, 0x70, 0x6F, 0x73, 0x75, 0x65, 0x72, 0x65, 0x2E};
146 
147 /*                                          150 | Lorem ....*/
148 static void test_int32_string(void **_CBOR_UNUSED(_state)) {
149   string = cbor_load(int32_string_data, 155, &res);
150   assert_non_null(string);
151   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
152   assert_true(cbor_isa_string(string));
153   assert_size_equal(cbor_string_length(string), 150);
154   assert_size_equal(cbor_string_codepoint_count(string), 150);
155   assert_memory_equal(
156 		&"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec mi tellus, iaculis nec vestibulum quis, fermentum non felis. Maecenas ut justo posuere.",
157 		cbor_string_handle(string),
158 		150
159 	);
160   assert_true(res.read == 155);
161   cbor_decref(&string);
162   assert_null(string);
163 }
164 
165 unsigned char int64_string_data[] = {
166     0x7B, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x96, 0x4C, 0x6F, 0x72,
167     0x65, 0x6D, 0x20, 0x69, 0x70, 0x73, 0x75, 0x6D, 0x20, 0x64, 0x6F, 0x6C,
168     0x6F, 0x72, 0x20, 0x73, 0x69, 0x74, 0x20, 0x61, 0x6D, 0x65, 0x74, 0x2C,
169     0x20, 0x63, 0x6F, 0x6E, 0x73, 0x65, 0x63, 0x74, 0x65, 0x74, 0x75, 0x72,
170     0x20, 0x61, 0x64, 0x69, 0x70, 0x69, 0x73, 0x63, 0x69, 0x6E, 0x67, 0x20,
171     0x65, 0x6C, 0x69, 0x74, 0x2E, 0x20, 0x44, 0x6F, 0x6E, 0x65, 0x63, 0x20,
172     0x6D, 0x69, 0x20, 0x74, 0x65, 0x6C, 0x6C, 0x75, 0x73, 0x2C, 0x20, 0x69,
173     0x61, 0x63, 0x75, 0x6C, 0x69, 0x73, 0x20, 0x6E, 0x65, 0x63, 0x20, 0x76,
174     0x65, 0x73, 0x74, 0x69, 0x62, 0x75, 0x6C, 0x75, 0x6D, 0x20, 0x71, 0x75,
175     0x69, 0x73, 0x2C, 0x20, 0x66, 0x65, 0x72, 0x6D, 0x65, 0x6E, 0x74, 0x75,
176     0x6D, 0x20, 0x6E, 0x6F, 0x6E, 0x20, 0x66, 0x65, 0x6C, 0x69, 0x73, 0x2E,
177     0x20, 0x4D, 0x61, 0x65, 0x63, 0x65, 0x6E, 0x61, 0x73, 0x20, 0x75, 0x74,
178     0x20, 0x6A, 0x75, 0x73, 0x74, 0x6F, 0x20, 0x70, 0x6F, 0x73, 0x75, 0x65,
179     0x72, 0x65, 0x2E};
180 
181 /*                                          150 | Lorem ....*/
182 static void test_int64_string(void **_CBOR_UNUSED(_state)) {
183   string = cbor_load(int64_string_data, 159, &res);
184   assert_non_null(string);
185   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
186   assert_true(cbor_isa_string(string));
187   assert_size_equal(cbor_string_length(string), 150);
188   assert_size_equal(cbor_string_codepoint_count(string), 150);
189   assert_memory_equal(
190 		&"Lorem ipsum dolor sit amet, consectetur adipiscing elit. Donec mi tellus, iaculis nec vestibulum quis, fermentum non felis. Maecenas ut justo posuere.",
191 		cbor_string_handle(string),
192 		150
193 	);
194   assert_true(res.read == 159);
195   cbor_decref(&string);
196   assert_null(string);
197 }
198 
199 unsigned char short_indef_string_data[] = {0x7F, 0x78, 0x01, 0x65, 0xFF, 0xFF};
200 
201 /*                                         start |   string      | break| extra
202  */
203 
204 static void test_short_indef_string(void **_CBOR_UNUSED(_state)) {
205   string = cbor_load(short_indef_string_data, 6, &res);
206   assert_non_null(string);
207   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
208   assert_true(cbor_isa_string(string));
209   assert_true(cbor_string_length(string) == 0);
210   assert_true(cbor_string_is_indefinite(string));
211   assert_true(cbor_string_chunk_count(string) == 1);
212   assert_true(res.read == 5);
213   assert_true(cbor_isa_string(cbor_string_chunks_handle(string)[0]));
214   assert_true(cbor_string_length(cbor_string_chunks_handle(string)[0]) == 1);
215   assert_true(*cbor_string_handle(cbor_string_chunks_handle(string)[0]) == 'e');
216   cbor_decref(&string);
217   assert_null(string);
218 }
219 
220 static void test_invalid_utf(void **_CBOR_UNUSED(_state)) {
221   /* 0x60 + 1 | 0xC5 (invalid unfinished 2B codepoint) */
222   unsigned char string_data[] = {0x61, 0xC5};
223   string = cbor_load(string_data, 2, &res);
224 
225   assert_non_null(string);
226   assert_true(cbor_typeof(string) == CBOR_TYPE_STRING);
227   assert_true(cbor_isa_string(string));
228   assert_size_equal(cbor_string_length(string), 1);
229   assert_size_equal(cbor_string_codepoint_count(string), 0);
230   assert_true(cbor_string_is_definite(string));
231   assert_true(res.read == 2);
232 
233   cbor_decref(&string);
234 }
235 
236 static void test_inline_creation(void **_CBOR_UNUSED(_state)) {
237   string = cbor_build_string("Hello!");
238   assert_memory_equal(cbor_string_handle(string), "Hello!", strlen("Hello!"));
239   cbor_decref(&string);
240 }
241 
242 static void test_string_creation(void **_CBOR_UNUSED(_state)) {
243   WITH_FAILING_MALLOC({ assert_null(cbor_new_definite_string()); });
244 
245   WITH_FAILING_MALLOC({ assert_null(cbor_new_indefinite_string()); });
246   WITH_MOCK_MALLOC({ assert_null(cbor_new_indefinite_string()); }, 2, MALLOC,
247                    MALLOC_FAIL);
248 
249   WITH_FAILING_MALLOC({ assert_null(cbor_build_string("Test")); });
250   WITH_MOCK_MALLOC({ assert_null(cbor_build_string("Test")); }, 2, MALLOC,
251                    MALLOC_FAIL);
252 
253   WITH_FAILING_MALLOC({ assert_null(cbor_build_stringn("Test", 4)); });
254   WITH_MOCK_MALLOC({ assert_null(cbor_build_stringn("Test", 4)); }, 2, MALLOC,
255                    MALLOC_FAIL);
256 }
257 
258 static void test_string_add_chunk(void **_CBOR_UNUSED(_state)) {
259   WITH_MOCK_MALLOC(
260       {
261         cbor_item_t *string = cbor_new_indefinite_string();
262         cbor_item_t *chunk = cbor_build_string("Hello!");
263 
264         assert_false(cbor_string_add_chunk(string, chunk));
265         assert_size_equal(cbor_string_chunk_count(string), 0);
266         assert_size_equal(((struct cbor_indefinite_string_data *)string->data)
267                               ->chunk_capacity,
268                           0);
269 
270         cbor_decref(&chunk);
271         cbor_decref(&string);
272       },
273       5, MALLOC, MALLOC, MALLOC, MALLOC, REALLOC_FAIL);
274 }
275 
276 static void test_add_chunk_reallocation_overflow(void **_CBOR_UNUSED(_state)) {
277   string = cbor_new_indefinite_string();
278   cbor_item_t *chunk = cbor_build_string("Hello!");
279   struct cbor_indefinite_string_data *metadata =
280       (struct cbor_indefinite_string_data *)string->data;
281   // Pretend we already have many chunks allocated
282   metadata->chunk_count = SIZE_MAX;
283   metadata->chunk_capacity = SIZE_MAX;
284 
285   assert_false(cbor_string_add_chunk(string, chunk));
286   assert_size_equal(cbor_refcount(chunk), 1);
287 
288   metadata->chunk_count = 0;
289   metadata->chunk_capacity = 0;
290   cbor_decref(&chunk);
291   cbor_decref(&string);
292 }
293 
294 static void test_set_handle(void **_CBOR_UNUSED(_state)) {
295   string = cbor_new_definite_string();
296   char *test_string = "Hello";
297   unsigned char *string_data = malloc(strlen(test_string));
298   memcpy(string_data, test_string, strlen(test_string));
299   assert_ptr_not_equal(string_data, NULL);
300   cbor_string_set_handle(string, string_data, strlen(test_string));
301 
302   assert_ptr_equal(cbor_string_handle(string), string_data);
303   assert_size_equal(cbor_string_length(string), 5);
304   assert_size_equal(cbor_string_codepoint_count(string), 5);
305 
306   cbor_decref(&string);
307 }
308 
309 static void test_set_handle_multibyte_codepoint(void **_CBOR_UNUSED(_state)) {
310   string = cbor_new_definite_string();
311   // "Štěstíčko" in UTF-8
312   char *test_string = "\xc5\xa0t\xc4\x9bst\xc3\xad\xc4\x8dko";
313   unsigned char *string_data = malloc(strlen(test_string));
314   memcpy(string_data, test_string, strlen(test_string));
315   assert_ptr_not_equal(string_data, NULL);
316   cbor_string_set_handle(string, string_data, strlen(test_string));
317 
318   assert_ptr_equal(cbor_string_handle(string), string_data);
319   assert_size_equal(cbor_string_length(string), 13);
320   assert_size_equal(cbor_string_codepoint_count(string), 9);
321 
322   cbor_decref(&string);
323 }
324 
325 static void test_set_handle_invalid_utf(void **_CBOR_UNUSED(_state)) {
326   string = cbor_new_definite_string();
327   // Invalid multi-byte character (missing the second byte).
328   char *test_string = "Test: \xc5";
329   unsigned char *string_data = malloc(strlen(test_string));
330   memcpy(string_data, test_string, strlen(test_string));
331   assert_ptr_not_equal(string_data, NULL);
332   cbor_string_set_handle(string, string_data, strlen(test_string));
333 
334   assert_ptr_equal(cbor_string_handle(string), string_data);
335   assert_size_equal(cbor_string_length(string), 7);
336   assert_size_equal(cbor_string_codepoint_count(string), 0);
337 
338   cbor_decref(&string);
339 }
340 
341 int main(void) {
342   const struct CMUnitTest tests[] = {
343       cmocka_unit_test(test_empty_string),
344       cmocka_unit_test(test_short_string),
345       cmocka_unit_test(test_short_multibyte_string),
346       cmocka_unit_test(test_int8_string),
347       cmocka_unit_test(test_int16_string),
348       cmocka_unit_test(test_int32_string),
349       cmocka_unit_test(test_int64_string),
350       cmocka_unit_test(test_short_indef_string),
351       cmocka_unit_test(test_invalid_utf),
352       cmocka_unit_test(test_inline_creation),
353       cmocka_unit_test(test_string_creation),
354       cmocka_unit_test(test_string_add_chunk),
355       cmocka_unit_test(test_add_chunk_reallocation_overflow),
356       cmocka_unit_test(test_set_handle),
357       cmocka_unit_test(test_set_handle_multibyte_codepoint),
358       cmocka_unit_test(test_set_handle_invalid_utf),
359   };
360   return cmocka_run_group_tests(tests, NULL, NULL);
361 }
362