xref: /freebsd/contrib/expat/tests/runtests.c (revision 076ad2f836d5f49dc1375f1677335a48fe0d4b82)
1 /* Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
2    See the file COPYING for copying permission.
3 
4    runtest.c : run the Expat test suite
5 */
6 
7 #ifdef HAVE_EXPAT_CONFIG_H
8 #include <expat_config.h>
9 #endif
10 
11 #include <assert.h>
12 #include <stdlib.h>
13 #include <stdio.h>
14 #include <string.h>
15 #include <stdint.h>
16 #include <stddef.h>  /* ptrdiff_t */
17 #ifndef __cplusplus
18 # include <stdbool.h>
19 #endif
20 
21 #include "expat.h"
22 #include "chardata.h"
23 #include "internal.h"  /* for UNUSED_P only */
24 #include "minicheck.h"
25 
26 #if defined(__amigaos__) && defined(__USE_INLINE__)
27 #include <proto/expat.h>
28 #endif
29 
30 #ifdef XML_LARGE_SIZE
31 #define XML_FMT_INT_MOD "ll"
32 #else
33 #define XML_FMT_INT_MOD "l"
34 #endif
35 
36 static XML_Parser parser;
37 
38 
39 static void
40 basic_setup(void)
41 {
42     parser = XML_ParserCreate(NULL);
43     if (parser == NULL)
44         fail("Parser not created.");
45 }
46 
47 static void
48 basic_teardown(void)
49 {
50     if (parser != NULL)
51         XML_ParserFree(parser);
52 }
53 
54 /* Generate a failure using the parser state to create an error message;
55    this should be used when the parser reports an error we weren't
56    expecting.
57 */
58 static void
59 _xml_failure(XML_Parser parser, const char *file, int line)
60 {
61     char buffer[1024];
62     enum XML_Error err = XML_GetErrorCode(parser);
63     sprintf(buffer,
64             "    %d: %s (line %" XML_FMT_INT_MOD "u, offset %"\
65                 XML_FMT_INT_MOD "u)\n    reported from %s, line %d\n",
66             err,
67             XML_ErrorString(err),
68             XML_GetCurrentLineNumber(parser),
69             XML_GetCurrentColumnNumber(parser),
70             file, line);
71     _fail_unless(0, file, line, buffer);
72 }
73 
74 static enum XML_Status
75 _XML_Parse_SINGLE_BYTES(XML_Parser parser, const char *s, int len, int isFinal)
76 {
77     enum XML_Status res = XML_STATUS_ERROR;
78     int offset = 0;
79 
80     if (len == 0) {
81         return XML_Parse(parser, s, len, isFinal);
82     }
83 
84     for (; offset < len; offset++) {
85         const int innerIsFinal = (offset == len - 1) && isFinal;
86         const char c = s[offset]; /* to help out-of-bounds detection */
87         res = XML_Parse(parser, &c, sizeof(char), innerIsFinal);
88         if (res != XML_STATUS_OK) {
89             return res;
90         }
91     }
92     return res;
93 }
94 
95 #define xml_failure(parser) _xml_failure((parser), __FILE__, __LINE__)
96 
97 static void
98 _expect_failure(const char *text, enum XML_Error errorCode, const char *errorMessage,
99                 const char *file, int lineno)
100 {
101     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK)
102         /* Hackish use of _fail_unless() macro, but let's us report
103            the right filename and line number. */
104         _fail_unless(0, file, lineno, errorMessage);
105     if (XML_GetErrorCode(parser) != errorCode)
106         _xml_failure(parser, file, lineno);
107 }
108 
109 #define expect_failure(text, errorCode, errorMessage) \
110         _expect_failure((text), (errorCode), (errorMessage), \
111                         __FILE__, __LINE__)
112 
113 /* Dummy handlers for when we need to set a handler to tickle a bug,
114    but it doesn't need to do anything.
115 */
116 
117 static void XMLCALL
118 dummy_start_doctype_handler(void           *UNUSED_P(userData),
119                             const XML_Char *UNUSED_P(doctypeName),
120                             const XML_Char *UNUSED_P(sysid),
121                             const XML_Char *UNUSED_P(pubid),
122                             int            UNUSED_P(has_internal_subset))
123 {}
124 
125 static void XMLCALL
126 dummy_end_doctype_handler(void *UNUSED_P(userData))
127 {}
128 
129 static void XMLCALL
130 dummy_entity_decl_handler(void           *UNUSED_P(userData),
131                           const XML_Char *UNUSED_P(entityName),
132                           int            UNUSED_P(is_parameter_entity),
133                           const XML_Char *UNUSED_P(value),
134                           int            UNUSED_P(value_length),
135                           const XML_Char *UNUSED_P(base),
136                           const XML_Char *UNUSED_P(systemId),
137                           const XML_Char *UNUSED_P(publicId),
138                           const XML_Char *UNUSED_P(notationName))
139 {}
140 
141 static void XMLCALL
142 dummy_notation_decl_handler(void *UNUSED_P(userData),
143                             const XML_Char *UNUSED_P(notationName),
144                             const XML_Char *UNUSED_P(base),
145                             const XML_Char *UNUSED_P(systemId),
146                             const XML_Char *UNUSED_P(publicId))
147 {}
148 
149 static void XMLCALL
150 dummy_element_decl_handler(void *UNUSED_P(userData),
151                            const XML_Char *UNUSED_P(name),
152                            XML_Content *UNUSED_P(model))
153 {}
154 
155 static void XMLCALL
156 dummy_attlist_decl_handler(void           *UNUSED_P(userData),
157                            const XML_Char *UNUSED_P(elname),
158                            const XML_Char *UNUSED_P(attname),
159                            const XML_Char *UNUSED_P(att_type),
160                            const XML_Char *UNUSED_P(dflt),
161                            int            UNUSED_P(isrequired))
162 {}
163 
164 static void XMLCALL
165 dummy_comment_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(data))
166 {}
167 
168 static void XMLCALL
169 dummy_pi_handler(void *UNUSED_P(userData), const XML_Char *UNUSED_P(target), const XML_Char *UNUSED_P(data))
170 {}
171 
172 static void XMLCALL
173 dummy_start_element(void *UNUSED_P(userData),
174                     const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
175 {}
176 
177 
178 /*
179  * Character & encoding tests.
180  */
181 
182 START_TEST(test_nul_byte)
183 {
184     char text[] = "<doc>\0</doc>";
185 
186     /* test that a NUL byte (in US-ASCII data) is an error */
187     if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_OK)
188         fail("Parser did not report error on NUL-byte.");
189     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
190         xml_failure(parser);
191 }
192 END_TEST
193 
194 
195 START_TEST(test_u0000_char)
196 {
197     /* test that a NUL byte (in US-ASCII data) is an error */
198     expect_failure("<doc>&#0;</doc>",
199                    XML_ERROR_BAD_CHAR_REF,
200                    "Parser did not report error on NUL-byte.");
201 }
202 END_TEST
203 
204 START_TEST(test_bom_utf8)
205 {
206     /* This test is really just making sure we don't core on a UTF-8 BOM. */
207     const char *text = "\357\273\277<e/>";
208 
209     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
210         xml_failure(parser);
211 }
212 END_TEST
213 
214 START_TEST(test_bom_utf16_be)
215 {
216     char text[] = "\376\377\0<\0e\0/\0>";
217 
218     if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
219         xml_failure(parser);
220 }
221 END_TEST
222 
223 START_TEST(test_bom_utf16_le)
224 {
225     char text[] = "\377\376<\0e\0/\0>\0";
226 
227     if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
228         xml_failure(parser);
229 }
230 END_TEST
231 
232 static void XMLCALL
233 accumulate_characters(void *userData, const XML_Char *s, int len)
234 {
235     CharData_AppendXMLChars((CharData *)userData, s, len);
236 }
237 
238 static void XMLCALL
239 accumulate_attribute(void *userData, const XML_Char *UNUSED_P(name),
240                      const XML_Char **atts)
241 {
242     CharData *storage = (CharData *)userData;
243     if (storage->count < 0 && atts != NULL && atts[0] != NULL) {
244         /* "accumulate" the value of the first attribute we see */
245         CharData_AppendXMLChars(storage, atts[1], -1);
246     }
247 }
248 
249 
250 static void
251 _run_character_check(const XML_Char *text, const XML_Char *expected,
252                      const char *file, int line)
253 {
254     CharData storage;
255 
256     CharData_Init(&storage);
257     XML_SetUserData(parser, &storage);
258     XML_SetCharacterDataHandler(parser, accumulate_characters);
259     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
260         _xml_failure(parser, file, line);
261     CharData_CheckXMLChars(&storage, expected);
262 }
263 
264 #define run_character_check(text, expected) \
265         _run_character_check(text, expected, __FILE__, __LINE__)
266 
267 static void
268 _run_attribute_check(const XML_Char *text, const XML_Char *expected,
269                      const char *file, int line)
270 {
271     CharData storage;
272 
273     CharData_Init(&storage);
274     XML_SetUserData(parser, &storage);
275     XML_SetStartElementHandler(parser, accumulate_attribute);
276     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
277         _xml_failure(parser, file, line);
278     CharData_CheckXMLChars(&storage, expected);
279 }
280 
281 #define run_attribute_check(text, expected) \
282         _run_attribute_check(text, expected, __FILE__, __LINE__)
283 
284 /* Regression test for SF bug #491986. */
285 START_TEST(test_danish_latin1)
286 {
287     const char *text =
288         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
289         "<e>J\xF8rgen \xE6\xF8\xE5\xC6\xD8\xC5</e>";
290     run_character_check(text,
291              "J\xC3\xB8rgen \xC3\xA6\xC3\xB8\xC3\xA5\xC3\x86\xC3\x98\xC3\x85");
292 }
293 END_TEST
294 
295 
296 /* Regression test for SF bug #514281. */
297 START_TEST(test_french_charref_hexidecimal)
298 {
299     const char *text =
300         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
301         "<doc>&#xE9;&#xE8;&#xE0;&#xE7;&#xEA;&#xC8;</doc>";
302     run_character_check(text,
303                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
304 }
305 END_TEST
306 
307 START_TEST(test_french_charref_decimal)
308 {
309     const char *text =
310         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
311         "<doc>&#233;&#232;&#224;&#231;&#234;&#200;</doc>";
312     run_character_check(text,
313                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
314 }
315 END_TEST
316 
317 START_TEST(test_french_latin1)
318 {
319     const char *text =
320         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
321         "<doc>\xE9\xE8\xE0\xE7\xEa\xC8</doc>";
322     run_character_check(text,
323                         "\xC3\xA9\xC3\xA8\xC3\xA0\xC3\xA7\xC3\xAA\xC3\x88");
324 }
325 END_TEST
326 
327 START_TEST(test_french_utf8)
328 {
329     const char *text =
330         "<?xml version='1.0' encoding='utf-8'?>\n"
331         "<doc>\xC3\xA9</doc>";
332     run_character_check(text, "\xC3\xA9");
333 }
334 END_TEST
335 
336 /* Regression test for SF bug #600479.
337    XXX There should be a test that exercises all legal XML Unicode
338    characters as PCDATA and attribute value content, and XML Name
339    characters as part of element and attribute names.
340 */
341 START_TEST(test_utf8_false_rejection)
342 {
343     const char *text = "<doc>\xEF\xBA\xBF</doc>";
344     run_character_check(text, "\xEF\xBA\xBF");
345 }
346 END_TEST
347 
348 /* Regression test for SF bug #477667.
349    This test assures that any 8-bit character followed by a 7-bit
350    character will not be mistakenly interpreted as a valid UTF-8
351    sequence.
352 */
353 START_TEST(test_illegal_utf8)
354 {
355     char text[100];
356     int i;
357 
358     for (i = 128; i <= 255; ++i) {
359         sprintf(text, "<e>%ccd</e>", i);
360         if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_OK) {
361             sprintf(text,
362                     "expected token error for '%c' (ordinal %d) in UTF-8 text",
363                     i, i);
364             fail(text);
365         }
366         else if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
367             xml_failure(parser);
368         /* Reset the parser since we use the same parser repeatedly. */
369         XML_ParserReset(parser, NULL);
370     }
371 }
372 END_TEST
373 
374 
375 /* Examples, not masks: */
376 #define UTF8_LEAD_1  "\x7f"  /* 0b01111111 */
377 #define UTF8_LEAD_2  "\xdf"  /* 0b11011111 */
378 #define UTF8_LEAD_3  "\xef"  /* 0b11101111 */
379 #define UTF8_LEAD_4  "\xf7"  /* 0b11110111 */
380 #define UTF8_FOLLOW  "\xbf"  /* 0b10111111 */
381 
382 START_TEST(test_utf8_auto_align)
383 {
384     struct TestCase {
385         ptrdiff_t expectedMovementInChars;
386         const char * input;
387     };
388 
389     struct TestCase cases[] = {
390         {00, ""},
391 
392         {00, UTF8_LEAD_1},
393 
394         {-1, UTF8_LEAD_2},
395         {00, UTF8_LEAD_2 UTF8_FOLLOW},
396 
397         {-1, UTF8_LEAD_3},
398         {-2, UTF8_LEAD_3 UTF8_FOLLOW},
399         {00, UTF8_LEAD_3 UTF8_FOLLOW UTF8_FOLLOW},
400 
401         {-1, UTF8_LEAD_4},
402         {-2, UTF8_LEAD_4 UTF8_FOLLOW},
403         {-3, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW},
404         {00, UTF8_LEAD_4 UTF8_FOLLOW UTF8_FOLLOW UTF8_FOLLOW},
405     };
406 
407     size_t i = 0;
408     bool success = true;
409     for (; i < sizeof(cases) / sizeof(*cases); i++) {
410         const char * fromLim = cases[i].input + strlen(cases[i].input);
411         const char * const fromLimInitially = fromLim;
412         ptrdiff_t actualMovementInChars;
413 
414         align_limit_to_full_utf8_characters(cases[i].input, &fromLim);
415 
416         actualMovementInChars = (fromLim - fromLimInitially);
417         if (actualMovementInChars != cases[i].expectedMovementInChars) {
418             size_t j = 0;
419             success = false;
420             printf("[-] UTF-8 case %2lu: Expected movement by %2ld chars"
421                     ", actually moved by %2ld chars: \"",
422                     i + 1, cases[i].expectedMovementInChars, actualMovementInChars);
423             for (; j < strlen(cases[i].input); j++) {
424                 printf("\\x%02x", (unsigned char)cases[i].input[j]);
425             }
426             printf("\"\n");
427         }
428     }
429 
430     if (! success) {
431         fail("UTF-8 auto-alignment is not bullet-proof\n");
432     }
433 }
434 END_TEST
435 
436 START_TEST(test_utf16)
437 {
438     /* <?xml version="1.0" encoding="UTF-16"?>
439        <doc a='123'>some text</doc>
440     */
441     char text[] =
442         "\000<\000?\000x\000m\000\154\000 \000v\000e\000r\000s\000i\000o"
443         "\000n\000=\000'\0001\000.\000\060\000'\000 \000e\000n\000c\000o"
444         "\000d\000i\000n\000g\000=\000'\000U\000T\000F\000-\0001\000\066"
445         "\000'\000?\000>\000\n"
446         "\000<\000d\000o\000c\000 \000a\000=\000'\0001\0002\0003\000'"
447         "\000>\000s\000o\000m\000e\000 \000t\000e\000x\000t\000<\000/"
448         "\000d\000o\000c\000>";
449     if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text)-1, XML_TRUE) == XML_STATUS_ERROR)
450         xml_failure(parser);
451 }
452 END_TEST
453 
454 START_TEST(test_utf16_le_epilog_newline)
455 {
456     unsigned int first_chunk_bytes = 17;
457     char text[] =
458         "\xFF\xFE"                      /* BOM */
459         "<\000e\000/\000>\000"          /* document element */
460         "\r\000\n\000\r\000\n\000";     /* epilog */
461 
462     if (first_chunk_bytes >= sizeof(text) - 1)
463         fail("bad value of first_chunk_bytes");
464     if (  _XML_Parse_SINGLE_BYTES(parser, text, first_chunk_bytes, XML_FALSE)
465           == XML_STATUS_ERROR)
466         xml_failure(parser);
467     else {
468         enum XML_Status rc;
469         rc = _XML_Parse_SINGLE_BYTES(parser, text + first_chunk_bytes,
470                        sizeof(text) - first_chunk_bytes - 1, XML_TRUE);
471         if (rc == XML_STATUS_ERROR)
472             xml_failure(parser);
473     }
474 }
475 END_TEST
476 
477 /* Regression test for SF bug #481609, #774028. */
478 START_TEST(test_latin1_umlauts)
479 {
480     const char *text =
481         "<?xml version='1.0' encoding='iso-8859-1'?>\n"
482         "<e a='\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; >'\n"
483         "  >\xE4 \xF6 \xFC &#228; &#246; &#252; &#x00E4; &#x0F6; &#xFC; ></e>";
484     const char *utf8 =
485         "\xC3\xA4 \xC3\xB6 \xC3\xBC "
486         "\xC3\xA4 \xC3\xB6 \xC3\xBC "
487         "\xC3\xA4 \xC3\xB6 \xC3\xBC >";
488     run_character_check(text, utf8);
489     XML_ParserReset(parser, NULL);
490     run_attribute_check(text, utf8);
491 }
492 END_TEST
493 
494 /* Regression test #1 for SF bug #653180. */
495 START_TEST(test_line_number_after_parse)
496 {
497     const char *text =
498         "<tag>\n"
499         "\n"
500         "\n</tag>";
501     XML_Size lineno;
502 
503     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
504         xml_failure(parser);
505     lineno = XML_GetCurrentLineNumber(parser);
506     if (lineno != 4) {
507         char buffer[100];
508         sprintf(buffer,
509             "expected 4 lines, saw %" XML_FMT_INT_MOD "u", lineno);
510         fail(buffer);
511     }
512 }
513 END_TEST
514 
515 /* Regression test #2 for SF bug #653180. */
516 START_TEST(test_column_number_after_parse)
517 {
518     const char *text = "<tag></tag>";
519     XML_Size colno;
520 
521     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) == XML_STATUS_ERROR)
522         xml_failure(parser);
523     colno = XML_GetCurrentColumnNumber(parser);
524     if (colno != 11) {
525         char buffer[100];
526         sprintf(buffer,
527             "expected 11 columns, saw %" XML_FMT_INT_MOD "u", colno);
528         fail(buffer);
529     }
530 }
531 END_TEST
532 
533 static void XMLCALL
534 start_element_event_handler2(void *userData, const XML_Char *name,
535 			     const XML_Char **UNUSED_P(attr))
536 {
537     CharData *storage = (CharData *) userData;
538     char buffer[100];
539 
540     sprintf(buffer,
541         "<%s> at col:%" XML_FMT_INT_MOD "u line:%"\
542             XML_FMT_INT_MOD "u\n", name,
543 	    XML_GetCurrentColumnNumber(parser),
544 	    XML_GetCurrentLineNumber(parser));
545     CharData_AppendString(storage, buffer);
546 }
547 
548 static void XMLCALL
549 end_element_event_handler2(void *userData, const XML_Char *name)
550 {
551     CharData *storage = (CharData *) userData;
552     char buffer[100];
553 
554     sprintf(buffer,
555         "</%s> at col:%" XML_FMT_INT_MOD "u line:%"\
556             XML_FMT_INT_MOD "u\n", name,
557 	    XML_GetCurrentColumnNumber(parser),
558 	    XML_GetCurrentLineNumber(parser));
559     CharData_AppendString(storage, buffer);
560 }
561 
562 /* Regression test #3 for SF bug #653180. */
563 START_TEST(test_line_and_column_numbers_inside_handlers)
564 {
565     const char *text =
566         "<a>\n"        /* Unix end-of-line */
567         "  <b>\r\n"    /* Windows end-of-line */
568         "    <c/>\r"   /* Mac OS end-of-line */
569         "  </b>\n"
570         "  <d>\n"
571         "    <f/>\n"
572         "  </d>\n"
573         "</a>";
574     const char *expected =
575         "<a> at col:0 line:1\n"
576         "<b> at col:2 line:2\n"
577         "<c> at col:4 line:3\n"
578         "</c> at col:8 line:3\n"
579         "</b> at col:2 line:4\n"
580         "<d> at col:2 line:5\n"
581         "<f> at col:4 line:6\n"
582         "</f> at col:8 line:6\n"
583         "</d> at col:2 line:7\n"
584         "</a> at col:0 line:8\n";
585     CharData storage;
586 
587     CharData_Init(&storage);
588     XML_SetUserData(parser, &storage);
589     XML_SetStartElementHandler(parser, start_element_event_handler2);
590     XML_SetEndElementHandler(parser, end_element_event_handler2);
591     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
592         xml_failure(parser);
593 
594     CharData_CheckString(&storage, expected);
595 }
596 END_TEST
597 
598 /* Regression test #4 for SF bug #653180. */
599 START_TEST(test_line_number_after_error)
600 {
601     const char *text =
602         "<a>\n"
603         "  <b>\n"
604         "  </a>";  /* missing </b> */
605     XML_Size lineno;
606     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
607         fail("Expected a parse error");
608 
609     lineno = XML_GetCurrentLineNumber(parser);
610     if (lineno != 3) {
611         char buffer[100];
612         sprintf(buffer, "expected 3 lines, saw %" XML_FMT_INT_MOD "u", lineno);
613         fail(buffer);
614     }
615 }
616 END_TEST
617 
618 /* Regression test #5 for SF bug #653180. */
619 START_TEST(test_column_number_after_error)
620 {
621     const char *text =
622         "<a>\n"
623         "  <b>\n"
624         "  </a>";  /* missing </b> */
625     XML_Size colno;
626     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_FALSE) != XML_STATUS_ERROR)
627         fail("Expected a parse error");
628 
629     colno = XML_GetCurrentColumnNumber(parser);
630     if (colno != 4) {
631         char buffer[100];
632         sprintf(buffer,
633             "expected 4 columns, saw %" XML_FMT_INT_MOD "u", colno);
634         fail(buffer);
635     }
636 }
637 END_TEST
638 
639 /* Regression test for SF bug #478332. */
640 START_TEST(test_really_long_lines)
641 {
642     /* This parses an input line longer than INIT_DATA_BUF_SIZE
643        characters long (defined to be 1024 in xmlparse.c).  We take a
644        really cheesy approach to building the input buffer, because
645        this avoids writing bugs in buffer-filling code.
646     */
647     const char *text =
648         "<e>"
649         /* 64 chars */
650         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
651         /* until we have at least 1024 characters on the line: */
652         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
653         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
654         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
655         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
656         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
657         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
658         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
659         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
660         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
661         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
662         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
663         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
664         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
665         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
666         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
667         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-+"
668         "</e>";
669     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
670         xml_failure(parser);
671 }
672 END_TEST
673 
674 
675 /*
676  * Element event tests.
677  */
678 
679 static void XMLCALL
680 end_element_event_handler(void *userData, const XML_Char *name)
681 {
682     CharData *storage = (CharData *) userData;
683     CharData_AppendString(storage, "/");
684     CharData_AppendXMLChars(storage, name, -1);
685 }
686 
687 START_TEST(test_end_element_events)
688 {
689     const char *text = "<a><b><c/></b><d><f/></d></a>";
690     const char *expected = "/c/b/f/d/a";
691     CharData storage;
692 
693     CharData_Init(&storage);
694     XML_SetUserData(parser, &storage);
695     XML_SetEndElementHandler(parser, end_element_event_handler);
696     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
697         xml_failure(parser);
698     CharData_CheckString(&storage, expected);
699 }
700 END_TEST
701 
702 
703 /*
704  * Attribute tests.
705  */
706 
707 /* Helpers used by the following test; this checks any "attr" and "refs"
708    attributes to make sure whitespace has been normalized.
709 
710    Return true if whitespace has been normalized in a string, using
711    the rules for attribute value normalization.  The 'is_cdata' flag
712    is needed since CDATA attributes don't need to have multiple
713    whitespace characters collapsed to a single space, while other
714    attribute data types do.  (Section 3.3.3 of the recommendation.)
715 */
716 static int
717 is_whitespace_normalized(const XML_Char *s, int is_cdata)
718 {
719     int blanks = 0;
720     int at_start = 1;
721     while (*s) {
722         if (*s == ' ')
723             ++blanks;
724         else if (*s == '\t' || *s == '\n' || *s == '\r')
725             return 0;
726         else {
727             if (at_start) {
728                 at_start = 0;
729                 if (blanks && !is_cdata)
730                     /* illegal leading blanks */
731                     return 0;
732             }
733             else if (blanks > 1 && !is_cdata)
734                 return 0;
735             blanks = 0;
736         }
737         ++s;
738     }
739     if (blanks && !is_cdata)
740         return 0;
741     return 1;
742 }
743 
744 /* Check the attribute whitespace checker: */
745 static void
746 testhelper_is_whitespace_normalized(void)
747 {
748     assert(is_whitespace_normalized("abc", 0));
749     assert(is_whitespace_normalized("abc", 1));
750     assert(is_whitespace_normalized("abc def ghi", 0));
751     assert(is_whitespace_normalized("abc def ghi", 1));
752     assert(!is_whitespace_normalized(" abc def ghi", 0));
753     assert(is_whitespace_normalized(" abc def ghi", 1));
754     assert(!is_whitespace_normalized("abc  def ghi", 0));
755     assert(is_whitespace_normalized("abc  def ghi", 1));
756     assert(!is_whitespace_normalized("abc def ghi ", 0));
757     assert(is_whitespace_normalized("abc def ghi ", 1));
758     assert(!is_whitespace_normalized(" ", 0));
759     assert(is_whitespace_normalized(" ", 1));
760     assert(!is_whitespace_normalized("\t", 0));
761     assert(!is_whitespace_normalized("\t", 1));
762     assert(!is_whitespace_normalized("\n", 0));
763     assert(!is_whitespace_normalized("\n", 1));
764     assert(!is_whitespace_normalized("\r", 0));
765     assert(!is_whitespace_normalized("\r", 1));
766     assert(!is_whitespace_normalized("abc\t def", 1));
767 }
768 
769 static void XMLCALL
770 check_attr_contains_normalized_whitespace(void *UNUSED_P(userData),
771                                           const XML_Char *UNUSED_P(name),
772                                           const XML_Char **atts)
773 {
774     int i;
775     for (i = 0; atts[i] != NULL; i += 2) {
776         const XML_Char *attrname = atts[i];
777         const XML_Char *value = atts[i + 1];
778         if (strcmp("attr", attrname) == 0
779             || strcmp("ents", attrname) == 0
780             || strcmp("refs", attrname) == 0) {
781             if (!is_whitespace_normalized(value, 0)) {
782                 char buffer[256];
783                 sprintf(buffer, "attribute value not normalized: %s='%s'",
784                         attrname, value);
785                 fail(buffer);
786             }
787         }
788     }
789 }
790 
791 START_TEST(test_attr_whitespace_normalization)
792 {
793     const char *text =
794         "<!DOCTYPE doc [\n"
795         "  <!ATTLIST doc\n"
796         "            attr NMTOKENS #REQUIRED\n"
797         "            ents ENTITIES #REQUIRED\n"
798         "            refs IDREFS   #REQUIRED>\n"
799         "]>\n"
800         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
801         "     ents=' ent-1   \t\r\n"
802         "            ent-2  ' >\n"
803         "  <e id='id-1'/>\n"
804         "  <e id='id-2'/>\n"
805         "</doc>";
806 
807     XML_SetStartElementHandler(parser,
808                                check_attr_contains_normalized_whitespace);
809     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
810         xml_failure(parser);
811 }
812 END_TEST
813 
814 
815 /*
816  * XML declaration tests.
817  */
818 
819 START_TEST(test_xmldecl_misplaced)
820 {
821     expect_failure("\n"
822                    "<?xml version='1.0'?>\n"
823                    "<a/>",
824                    XML_ERROR_MISPLACED_XML_PI,
825                    "failed to report misplaced XML declaration");
826 }
827 END_TEST
828 
829 /* Regression test for SF bug #584832. */
830 static int XMLCALL
831 UnknownEncodingHandler(void *UNUSED_P(data),const XML_Char *encoding,XML_Encoding *info)
832 {
833     if (strcmp(encoding,"unsupported-encoding") == 0) {
834         int i;
835         for (i = 0; i < 256; ++i)
836             info->map[i] = i;
837         info->data = NULL;
838         info->convert = NULL;
839         info->release = NULL;
840         return XML_STATUS_OK;
841     }
842     return XML_STATUS_ERROR;
843 }
844 
845 START_TEST(test_unknown_encoding_internal_entity)
846 {
847     const char *text =
848         "<?xml version='1.0' encoding='unsupported-encoding'?>\n"
849         "<!DOCTYPE test [<!ENTITY foo 'bar'>]>\n"
850         "<test a='&foo;'/>";
851 
852     XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, NULL);
853     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
854         xml_failure(parser);
855 }
856 END_TEST
857 
858 /* Regression test for SF bug #620106. */
859 static int XMLCALL
860 external_entity_loader_set_encoding(XML_Parser parser,
861                                     const XML_Char *context,
862                                     const XML_Char *UNUSED_P(base),
863                                     const XML_Char *UNUSED_P(systemId),
864                                     const XML_Char *UNUSED_P(publicId))
865 {
866     /* This text says it's an unsupported encoding, but it's really
867        UTF-8, which we tell Expat using XML_SetEncoding().
868     */
869     const char *text =
870         "<?xml encoding='iso-8859-3'?>"
871         "\xC3\xA9";
872     XML_Parser extparser;
873 
874     extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
875     if (extparser == NULL)
876         fail("Could not create external entity parser.");
877     if (!XML_SetEncoding(extparser, "utf-8"))
878         fail("XML_SetEncoding() ignored for external entity");
879     if (  _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
880           == XML_STATUS_ERROR) {
881         xml_failure(parser);
882         return 0;
883     }
884     return 1;
885 }
886 
887 START_TEST(test_ext_entity_set_encoding)
888 {
889     const char *text =
890         "<!DOCTYPE doc [\n"
891         "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/dummy.ent'>\n"
892         "]>\n"
893         "<doc>&en;</doc>";
894 
895     XML_SetExternalEntityRefHandler(parser,
896                                     external_entity_loader_set_encoding);
897     run_character_check(text, "\xC3\xA9");
898 }
899 END_TEST
900 
901 /* Test that no error is reported for unknown entities if we don't
902    read an external subset.  This was fixed in Expat 1.95.5.
903 */
904 START_TEST(test_wfc_undeclared_entity_unread_external_subset) {
905     const char *text =
906         "<!DOCTYPE doc SYSTEM 'foo'>\n"
907         "<doc>&entity;</doc>";
908 
909     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
910         xml_failure(parser);
911 }
912 END_TEST
913 
914 /* Test that an error is reported for unknown entities if we don't
915    have an external subset.
916 */
917 START_TEST(test_wfc_undeclared_entity_no_external_subset) {
918     expect_failure("<doc>&entity;</doc>",
919                    XML_ERROR_UNDEFINED_ENTITY,
920                    "Parser did not report undefined entity w/out a DTD.");
921 }
922 END_TEST
923 
924 /* Test that an error is reported for unknown entities if we don't
925    read an external subset, but have been declared standalone.
926 */
927 START_TEST(test_wfc_undeclared_entity_standalone) {
928     const char *text =
929         "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
930         "<!DOCTYPE doc SYSTEM 'foo'>\n"
931         "<doc>&entity;</doc>";
932 
933     expect_failure(text,
934                    XML_ERROR_UNDEFINED_ENTITY,
935                    "Parser did not report undefined entity (standalone).");
936 }
937 END_TEST
938 
939 static int XMLCALL
940 external_entity_loader(XML_Parser parser,
941                        const XML_Char *context,
942                        const XML_Char *UNUSED_P(base),
943                        const XML_Char *UNUSED_P(systemId),
944                        const XML_Char *UNUSED_P(publicId))
945 {
946     char *text = (char *)XML_GetUserData(parser);
947     XML_Parser extparser;
948 
949     extparser = XML_ExternalEntityParserCreate(parser, context, NULL);
950     if (extparser == NULL)
951         fail("Could not create external entity parser.");
952     if (  _XML_Parse_SINGLE_BYTES(extparser, text, strlen(text), XML_TRUE)
953           == XML_STATUS_ERROR) {
954         xml_failure(parser);
955         return XML_STATUS_ERROR;
956     }
957     return XML_STATUS_OK;
958 }
959 
960 /* Test that an error is reported for unknown entities if we have read
961    an external subset, and standalone is true.
962 */
963 START_TEST(test_wfc_undeclared_entity_with_external_subset_standalone) {
964     const char *text =
965         "<?xml version='1.0' encoding='us-ascii' standalone='yes'?>\n"
966         "<!DOCTYPE doc SYSTEM 'foo'>\n"
967         "<doc>&entity;</doc>";
968     char foo_text[] =
969         "<!ELEMENT doc (#PCDATA)*>";
970 
971     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
972     XML_SetUserData(parser, foo_text);
973     XML_SetExternalEntityRefHandler(parser, external_entity_loader);
974     expect_failure(text,
975                    XML_ERROR_UNDEFINED_ENTITY,
976                    "Parser did not report undefined entity (external DTD).");
977 }
978 END_TEST
979 
980 /* Test that no error is reported for unknown entities if we have read
981    an external subset, and standalone is false.
982 */
983 START_TEST(test_wfc_undeclared_entity_with_external_subset) {
984     const char *text =
985         "<?xml version='1.0' encoding='us-ascii'?>\n"
986         "<!DOCTYPE doc SYSTEM 'foo'>\n"
987         "<doc>&entity;</doc>";
988     char foo_text[] =
989         "<!ELEMENT doc (#PCDATA)*>";
990 
991     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
992     XML_SetUserData(parser, foo_text);
993     XML_SetExternalEntityRefHandler(parser, external_entity_loader);
994     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
995         xml_failure(parser);
996 }
997 END_TEST
998 
999 START_TEST(test_wfc_no_recursive_entity_refs)
1000 {
1001     const char *text =
1002         "<!DOCTYPE doc [\n"
1003         "  <!ENTITY entity '&#38;entity;'>\n"
1004         "]>\n"
1005         "<doc>&entity;</doc>";
1006 
1007     expect_failure(text,
1008                    XML_ERROR_RECURSIVE_ENTITY_REF,
1009                    "Parser did not report recursive entity reference.");
1010 }
1011 END_TEST
1012 
1013 /* Regression test for SF bug #483514. */
1014 START_TEST(test_dtd_default_handling)
1015 {
1016     const char *text =
1017         "<!DOCTYPE doc [\n"
1018         "<!ENTITY e SYSTEM 'http://xml.libexpat.org/e'>\n"
1019         "<!NOTATION n SYSTEM 'http://xml.libexpat.org/n'>\n"
1020         "<!ELEMENT doc EMPTY>\n"
1021         "<!ATTLIST doc a CDATA #IMPLIED>\n"
1022         "<?pi in dtd?>\n"
1023         "<!--comment in dtd-->\n"
1024         "]><doc/>";
1025 
1026     XML_SetDefaultHandler(parser, accumulate_characters);
1027     XML_SetDoctypeDeclHandler(parser,
1028                               dummy_start_doctype_handler,
1029                               dummy_end_doctype_handler);
1030     XML_SetEntityDeclHandler(parser, dummy_entity_decl_handler);
1031     XML_SetNotationDeclHandler(parser, dummy_notation_decl_handler);
1032     XML_SetElementDeclHandler(parser, dummy_element_decl_handler);
1033     XML_SetAttlistDeclHandler(parser, dummy_attlist_decl_handler);
1034     XML_SetProcessingInstructionHandler(parser, dummy_pi_handler);
1035     XML_SetCommentHandler(parser, dummy_comment_handler);
1036     run_character_check(text, "\n\n\n\n\n\n\n<doc/>");
1037 }
1038 END_TEST
1039 
1040 /* See related SF bug #673791.
1041    When namespace processing is enabled, setting the namespace URI for
1042    a prefix is not allowed; this test ensures that it *is* allowed
1043    when namespace processing is not enabled.
1044    (See Namespaces in XML, section 2.)
1045 */
1046 START_TEST(test_empty_ns_without_namespaces)
1047 {
1048     const char *text =
1049         "<doc xmlns:prefix='http://www.example.com/'>\n"
1050         "  <e xmlns:prefix=''/>\n"
1051         "</doc>";
1052 
1053     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1054         xml_failure(parser);
1055 }
1056 END_TEST
1057 
1058 /* Regression test for SF bug #824420.
1059    Checks that an xmlns:prefix attribute set in an attribute's default
1060    value isn't misinterpreted.
1061 */
1062 START_TEST(test_ns_in_attribute_default_without_namespaces)
1063 {
1064     const char *text =
1065         "<!DOCTYPE e:element [\n"
1066         "  <!ATTLIST e:element\n"
1067         "    xmlns:e CDATA 'http://example.com/'>\n"
1068         "      ]>\n"
1069         "<e:element/>";
1070 
1071     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1072         xml_failure(parser);
1073 }
1074 END_TEST
1075 
1076 static const char *long_character_data_text =
1077     "<?xml version='1.0' encoding='iso-8859-1'?><s>"
1078     "012345678901234567890123456789012345678901234567890123456789"
1079     "012345678901234567890123456789012345678901234567890123456789"
1080     "012345678901234567890123456789012345678901234567890123456789"
1081     "012345678901234567890123456789012345678901234567890123456789"
1082     "012345678901234567890123456789012345678901234567890123456789"
1083     "012345678901234567890123456789012345678901234567890123456789"
1084     "012345678901234567890123456789012345678901234567890123456789"
1085     "012345678901234567890123456789012345678901234567890123456789"
1086     "012345678901234567890123456789012345678901234567890123456789"
1087     "012345678901234567890123456789012345678901234567890123456789"
1088     "012345678901234567890123456789012345678901234567890123456789"
1089     "012345678901234567890123456789012345678901234567890123456789"
1090     "012345678901234567890123456789012345678901234567890123456789"
1091     "012345678901234567890123456789012345678901234567890123456789"
1092     "012345678901234567890123456789012345678901234567890123456789"
1093     "012345678901234567890123456789012345678901234567890123456789"
1094     "012345678901234567890123456789012345678901234567890123456789"
1095     "012345678901234567890123456789012345678901234567890123456789"
1096     "012345678901234567890123456789012345678901234567890123456789"
1097     "012345678901234567890123456789012345678901234567890123456789"
1098     "</s>";
1099 
1100 static XML_Bool resumable = XML_FALSE;
1101 
1102 static void
1103 clearing_aborting_character_handler(void *UNUSED_P(userData),
1104                                     const XML_Char *UNUSED_P(s), int UNUSED_P(len))
1105 {
1106     XML_StopParser(parser, resumable);
1107     XML_SetCharacterDataHandler(parser, NULL);
1108 }
1109 
1110 /* Regression test for SF bug #1515266: missing check of stopped
1111    parser in doContext() 'for' loop. */
1112 START_TEST(test_stop_parser_between_char_data_calls)
1113 {
1114     /* The sample data must be big enough that there are two calls to
1115        the character data handler from within the inner "for" loop of
1116        the XML_TOK_DATA_CHARS case in doContent(), and the character
1117        handler must stop the parser and clear the character data
1118        handler.
1119     */
1120     const char *text = long_character_data_text;
1121 
1122     XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
1123     resumable = XML_FALSE;
1124     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_ERROR)
1125         xml_failure(parser);
1126     if (XML_GetErrorCode(parser) != XML_ERROR_ABORTED)
1127         xml_failure(parser);
1128 }
1129 END_TEST
1130 
1131 /* Regression test for SF bug #1515266: missing check of stopped
1132    parser in doContext() 'for' loop. */
1133 START_TEST(test_suspend_parser_between_char_data_calls)
1134 {
1135     /* The sample data must be big enough that there are two calls to
1136        the character data handler from within the inner "for" loop of
1137        the XML_TOK_DATA_CHARS case in doContent(), and the character
1138        handler must stop the parser and clear the character data
1139        handler.
1140     */
1141     const char *text = long_character_data_text;
1142 
1143     XML_SetCharacterDataHandler(parser, clearing_aborting_character_handler);
1144     resumable = XML_TRUE;
1145     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) != XML_STATUS_SUSPENDED)
1146         xml_failure(parser);
1147     if (XML_GetErrorCode(parser) != XML_ERROR_NONE)
1148         xml_failure(parser);
1149 }
1150 END_TEST
1151 
1152 START_TEST(test_good_cdata_ascii)
1153 {
1154     const char *text = "<a><![CDATA[<greeting>Hello, world!</greeting>]]></a>";
1155     const char *expected = "<greeting>Hello, world!</greeting>";
1156 
1157     CharData storage;
1158     CharData_Init(&storage);
1159     XML_SetUserData(parser, &storage);
1160     XML_SetCharacterDataHandler(parser, accumulate_characters);
1161 
1162     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1163         xml_failure(parser);
1164     CharData_CheckXMLChars(&storage, expected);
1165 }
1166 END_TEST
1167 
1168 START_TEST(test_good_cdata_utf16)
1169 {
1170     /* Test data is:
1171      *   <?xml version='1.0' encoding='utf-16'?>
1172      *   <a><![CDATA[hello]]></a>
1173      */
1174     const char text[] =
1175             "\0<\0?\0x\0m\0l\0"
1176                 " \0v\0e\0r\0s\0i\0o\0n\0=\0'\0\x31\0.\0\x30\0'\0"
1177                 " \0e\0n\0c\0o\0d\0i\0n\0g\0=\0'\0u\0t\0f\0-\0""1\0""6\0'"
1178                 "\0?\0>\0\n"
1179             "\0<\0a\0>\0<\0!\0[\0C\0D\0A\0T\0A\0[\0h\0e\0l\0l\0o\0]\0]\0>\0<\0/\0a\0>";
1180     const char *expected = "hello";
1181 
1182     CharData storage;
1183     CharData_Init(&storage);
1184     XML_SetUserData(parser, &storage);
1185     XML_SetCharacterDataHandler(parser, accumulate_characters);
1186 
1187     if (_XML_Parse_SINGLE_BYTES(parser, text, sizeof(text) - 1, XML_TRUE) == XML_STATUS_ERROR)
1188         xml_failure(parser);
1189     CharData_CheckXMLChars(&storage, expected);
1190 }
1191 END_TEST
1192 
1193 START_TEST(test_bad_cdata)
1194 {
1195     struct CaseData {
1196         const char *text;
1197         enum XML_Error expectedError;
1198     };
1199 
1200     struct CaseData cases[] = {
1201         {"<a><", XML_ERROR_UNCLOSED_TOKEN},
1202         {"<a><!", XML_ERROR_UNCLOSED_TOKEN},
1203         {"<a><![", XML_ERROR_UNCLOSED_TOKEN},
1204         {"<a><![C", XML_ERROR_UNCLOSED_TOKEN},
1205         {"<a><![CD", XML_ERROR_UNCLOSED_TOKEN},
1206         {"<a><![CDA", XML_ERROR_UNCLOSED_TOKEN},
1207         {"<a><![CDAT", XML_ERROR_UNCLOSED_TOKEN},
1208         {"<a><![CDATA", XML_ERROR_UNCLOSED_TOKEN},
1209 
1210         {"<a><![CDATA[", XML_ERROR_UNCLOSED_CDATA_SECTION},
1211         {"<a><![CDATA[]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1212         {"<a><![CDATA[]]", XML_ERROR_UNCLOSED_CDATA_SECTION},
1213 
1214         {"<a><!<a/>", XML_ERROR_INVALID_TOKEN},
1215         {"<a><![<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1216         {"<a><![C<a/>", XML_ERROR_UNCLOSED_TOKEN}, /* ?! */
1217         {"<a><![CD<a/>", XML_ERROR_INVALID_TOKEN},
1218         {"<a><![CDA<a/>", XML_ERROR_INVALID_TOKEN},
1219         {"<a><![CDAT<a/>", XML_ERROR_INVALID_TOKEN},
1220         {"<a><![CDATA<a/>", XML_ERROR_INVALID_TOKEN},
1221 
1222         {"<a><![CDATA[<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1223         {"<a><![CDATA[]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION},
1224         {"<a><![CDATA[]]<a/>", XML_ERROR_UNCLOSED_CDATA_SECTION}
1225     };
1226 
1227     size_t i = 0;
1228     for (; i < sizeof(cases) / sizeof(struct CaseData); i++) {
1229         const enum XML_Status actualStatus = _XML_Parse_SINGLE_BYTES(
1230                 parser, cases[i].text, strlen(cases[i].text), XML_TRUE);
1231         const enum XML_Error actualError = XML_GetErrorCode(parser);
1232 
1233         assert(actualStatus == XML_STATUS_ERROR);
1234 
1235         if (actualError != cases[i].expectedError) {
1236             char message[100];
1237             sprintf(message, "Expected error %d but got error %d for case %u: \"%s\"\n",
1238                     cases[i].expectedError, actualError, (unsigned int)i + 1, cases[i].text);
1239             fail(message);
1240         }
1241 
1242         XML_ParserReset(parser, NULL);
1243     }
1244 }
1245 END_TEST
1246 
1247 
1248 /*
1249  * Namespaces tests.
1250  */
1251 
1252 static void
1253 namespace_setup(void)
1254 {
1255     parser = XML_ParserCreateNS(NULL, ' ');
1256     if (parser == NULL)
1257         fail("Parser not created.");
1258 }
1259 
1260 static void
1261 namespace_teardown(void)
1262 {
1263     basic_teardown();
1264 }
1265 
1266 /* Check that an element name and attribute name match the expected values.
1267    The expected values are passed as an array reference of string pointers
1268    provided as the userData argument; the first is the expected
1269    element name, and the second is the expected attribute name.
1270 */
1271 static void XMLCALL
1272 triplet_start_checker(void *userData, const XML_Char *name,
1273                       const XML_Char **atts)
1274 {
1275     char **elemstr = (char **)userData;
1276     char buffer[1024];
1277     if (strcmp(elemstr[0], name) != 0) {
1278         sprintf(buffer, "unexpected start string: '%s'", name);
1279         fail(buffer);
1280     }
1281     if (strcmp(elemstr[1], atts[0]) != 0) {
1282         sprintf(buffer, "unexpected attribute string: '%s'", atts[0]);
1283         fail(buffer);
1284     }
1285 }
1286 
1287 /* Check that the element name passed to the end-element handler matches
1288    the expected value.  The expected value is passed as the first element
1289    in an array of strings passed as the userData argument.
1290 */
1291 static void XMLCALL
1292 triplet_end_checker(void *userData, const XML_Char *name)
1293 {
1294     char **elemstr = (char **)userData;
1295     if (strcmp(elemstr[0], name) != 0) {
1296         char buffer[1024];
1297         sprintf(buffer, "unexpected end string: '%s'", name);
1298         fail(buffer);
1299     }
1300 }
1301 
1302 START_TEST(test_return_ns_triplet)
1303 {
1304     const char *text =
1305         "<foo:e xmlns:foo='http://expat.sf.net/' bar:a='12'\n"
1306         "       xmlns:bar='http://expat.sf.net/'></foo:e>";
1307     const char *elemstr[] = {
1308         "http://expat.sf.net/ e foo",
1309         "http://expat.sf.net/ a bar"
1310     };
1311     XML_SetReturnNSTriplet(parser, XML_TRUE);
1312     XML_SetUserData(parser, elemstr);
1313     XML_SetElementHandler(parser, triplet_start_checker, triplet_end_checker);
1314     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1315         xml_failure(parser);
1316 }
1317 END_TEST
1318 
1319 static void XMLCALL
1320 overwrite_start_checker(void *userData, const XML_Char *name,
1321                         const XML_Char **atts)
1322 {
1323     CharData *storage = (CharData *) userData;
1324     CharData_AppendString(storage, "start ");
1325     CharData_AppendXMLChars(storage, name, -1);
1326     while (*atts != NULL) {
1327         CharData_AppendString(storage, "\nattribute ");
1328         CharData_AppendXMLChars(storage, *atts, -1);
1329         atts += 2;
1330     }
1331     CharData_AppendString(storage, "\n");
1332 }
1333 
1334 static void XMLCALL
1335 overwrite_end_checker(void *userData, const XML_Char *name)
1336 {
1337     CharData *storage = (CharData *) userData;
1338     CharData_AppendString(storage, "end ");
1339     CharData_AppendXMLChars(storage, name, -1);
1340     CharData_AppendString(storage, "\n");
1341 }
1342 
1343 static void
1344 run_ns_tagname_overwrite_test(const char *text, const char *result)
1345 {
1346     CharData storage;
1347     CharData_Init(&storage);
1348     XML_SetUserData(parser, &storage);
1349     XML_SetElementHandler(parser,
1350                           overwrite_start_checker, overwrite_end_checker);
1351     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1352         xml_failure(parser);
1353     CharData_CheckString(&storage, result);
1354 }
1355 
1356 /* Regression test for SF bug #566334. */
1357 START_TEST(test_ns_tagname_overwrite)
1358 {
1359     const char *text =
1360         "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
1361         "  <n:f n:attr='foo'/>\n"
1362         "  <n:g n:attr2='bar'/>\n"
1363         "</n:e>";
1364     const char *result =
1365         "start http://xml.libexpat.org/ e\n"
1366         "start http://xml.libexpat.org/ f\n"
1367         "attribute http://xml.libexpat.org/ attr\n"
1368         "end http://xml.libexpat.org/ f\n"
1369         "start http://xml.libexpat.org/ g\n"
1370         "attribute http://xml.libexpat.org/ attr2\n"
1371         "end http://xml.libexpat.org/ g\n"
1372         "end http://xml.libexpat.org/ e\n";
1373     run_ns_tagname_overwrite_test(text, result);
1374 }
1375 END_TEST
1376 
1377 /* Regression test for SF bug #566334. */
1378 START_TEST(test_ns_tagname_overwrite_triplet)
1379 {
1380     const char *text =
1381         "<n:e xmlns:n='http://xml.libexpat.org/'>\n"
1382         "  <n:f n:attr='foo'/>\n"
1383         "  <n:g n:attr2='bar'/>\n"
1384         "</n:e>";
1385     const char *result =
1386         "start http://xml.libexpat.org/ e n\n"
1387         "start http://xml.libexpat.org/ f n\n"
1388         "attribute http://xml.libexpat.org/ attr n\n"
1389         "end http://xml.libexpat.org/ f n\n"
1390         "start http://xml.libexpat.org/ g n\n"
1391         "attribute http://xml.libexpat.org/ attr2 n\n"
1392         "end http://xml.libexpat.org/ g n\n"
1393         "end http://xml.libexpat.org/ e n\n";
1394     XML_SetReturnNSTriplet(parser, XML_TRUE);
1395     run_ns_tagname_overwrite_test(text, result);
1396 }
1397 END_TEST
1398 
1399 
1400 /* Regression test for SF bug #620343. */
1401 static void XMLCALL
1402 start_element_fail(void *UNUSED_P(userData),
1403                    const XML_Char *UNUSED_P(name), const XML_Char **UNUSED_P(atts))
1404 {
1405     /* We should never get here. */
1406     fail("should never reach start_element_fail()");
1407 }
1408 
1409 static void XMLCALL
1410 start_ns_clearing_start_element(void *userData,
1411                                 const XML_Char *UNUSED_P(prefix),
1412                                 const XML_Char *UNUSED_P(uri))
1413 {
1414     XML_SetStartElementHandler((XML_Parser) userData, NULL);
1415 }
1416 
1417 START_TEST(test_start_ns_clears_start_element)
1418 {
1419     /* This needs to use separate start/end tags; using the empty tag
1420        syntax doesn't cause the problematic path through Expat to be
1421        taken.
1422     */
1423     const char *text = "<e xmlns='http://xml.libexpat.org/'></e>";
1424 
1425     XML_SetStartElementHandler(parser, start_element_fail);
1426     XML_SetStartNamespaceDeclHandler(parser, start_ns_clearing_start_element);
1427     XML_UseParserAsHandlerArg(parser);
1428     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1429         xml_failure(parser);
1430 }
1431 END_TEST
1432 
1433 /* Regression test for SF bug #616863. */
1434 static int XMLCALL
1435 external_entity_handler(XML_Parser parser,
1436                         const XML_Char *context,
1437                         const XML_Char *UNUSED_P(base),
1438                         const XML_Char *UNUSED_P(systemId),
1439                         const XML_Char *UNUSED_P(publicId))
1440 {
1441     intptr_t callno = 1 + (intptr_t)XML_GetUserData(parser);
1442     const char *text;
1443     XML_Parser p2;
1444 
1445     if (callno == 1)
1446         text = ("<!ELEMENT doc (e+)>\n"
1447                 "<!ATTLIST doc xmlns CDATA #IMPLIED>\n"
1448                 "<!ELEMENT e EMPTY>\n");
1449     else
1450         text = ("<?xml version='1.0' encoding='us-ascii'?>"
1451                 "<e/>");
1452 
1453     XML_SetUserData(parser, (void *) callno);
1454     p2 = XML_ExternalEntityParserCreate(parser, context, NULL);
1455     if (_XML_Parse_SINGLE_BYTES(p2, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR) {
1456         xml_failure(p2);
1457         return 0;
1458     }
1459     XML_ParserFree(p2);
1460     return 1;
1461 }
1462 
1463 START_TEST(test_default_ns_from_ext_subset_and_ext_ge)
1464 {
1465     const char *text =
1466         "<?xml version='1.0'?>\n"
1467         "<!DOCTYPE doc SYSTEM 'http://xml.libexpat.org/doc.dtd' [\n"
1468         "  <!ENTITY en SYSTEM 'http://xml.libexpat.org/entity.ent'>\n"
1469         "]>\n"
1470         "<doc xmlns='http://xml.libexpat.org/ns1'>\n"
1471         "&en;\n"
1472         "</doc>";
1473 
1474     XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
1475     XML_SetExternalEntityRefHandler(parser, external_entity_handler);
1476     /* We actually need to set this handler to tickle this bug. */
1477     XML_SetStartElementHandler(parser, dummy_start_element);
1478     XML_SetUserData(parser, NULL);
1479     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1480         xml_failure(parser);
1481 }
1482 END_TEST
1483 
1484 /* Regression test #1 for SF bug #673791. */
1485 START_TEST(test_ns_prefix_with_empty_uri_1)
1486 {
1487     const char *text =
1488         "<doc xmlns:prefix='http://xml.libexpat.org/'>\n"
1489         "  <e xmlns:prefix=''/>\n"
1490         "</doc>";
1491 
1492     expect_failure(text,
1493                    XML_ERROR_UNDECLARING_PREFIX,
1494                    "Did not report re-setting namespace"
1495                    " URI with prefix to ''.");
1496 }
1497 END_TEST
1498 
1499 /* Regression test #2 for SF bug #673791. */
1500 START_TEST(test_ns_prefix_with_empty_uri_2)
1501 {
1502     const char *text =
1503         "<?xml version='1.0'?>\n"
1504         "<docelem xmlns:pre=''/>";
1505 
1506     expect_failure(text,
1507                    XML_ERROR_UNDECLARING_PREFIX,
1508                    "Did not report setting namespace URI with prefix to ''.");
1509 }
1510 END_TEST
1511 
1512 /* Regression test #3 for SF bug #673791. */
1513 START_TEST(test_ns_prefix_with_empty_uri_3)
1514 {
1515     const char *text =
1516         "<!DOCTYPE doc [\n"
1517         "  <!ELEMENT doc EMPTY>\n"
1518         "  <!ATTLIST doc\n"
1519         "    xmlns:prefix CDATA ''>\n"
1520         "]>\n"
1521         "<doc/>";
1522 
1523     expect_failure(text,
1524                    XML_ERROR_UNDECLARING_PREFIX,
1525                    "Didn't report attr default setting NS w/ prefix to ''.");
1526 }
1527 END_TEST
1528 
1529 /* Regression test #4 for SF bug #673791. */
1530 START_TEST(test_ns_prefix_with_empty_uri_4)
1531 {
1532     const char *text =
1533         "<!DOCTYPE doc [\n"
1534         "  <!ELEMENT prefix:doc EMPTY>\n"
1535         "  <!ATTLIST prefix:doc\n"
1536         "    xmlns:prefix CDATA 'http://xml.libexpat.org/'>\n"
1537         "]>\n"
1538         "<prefix:doc/>";
1539     /* Packaged info expected by the end element handler;
1540        the weird structuring lets us re-use the triplet_end_checker()
1541        function also used for another test. */
1542     const char *elemstr[] = {
1543         "http://xml.libexpat.org/ doc prefix"
1544     };
1545     XML_SetReturnNSTriplet(parser, XML_TRUE);
1546     XML_SetUserData(parser, elemstr);
1547     XML_SetEndElementHandler(parser, triplet_end_checker);
1548     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1549         xml_failure(parser);
1550 }
1551 END_TEST
1552 
1553 START_TEST(test_ns_default_with_empty_uri)
1554 {
1555     const char *text =
1556         "<doc xmlns='http://xml.libexpat.org/'>\n"
1557         "  <e xmlns=''/>\n"
1558         "</doc>";
1559     if (_XML_Parse_SINGLE_BYTES(parser, text, strlen(text), XML_TRUE) == XML_STATUS_ERROR)
1560         xml_failure(parser);
1561 }
1562 END_TEST
1563 
1564 /* Regression test for SF bug #692964: two prefixes for one namespace. */
1565 START_TEST(test_ns_duplicate_attrs_diff_prefixes)
1566 {
1567     const char *text =
1568         "<doc xmlns:a='http://xml.libexpat.org/a'\n"
1569         "     xmlns:b='http://xml.libexpat.org/a'\n"
1570         "     a:a='v' b:a='v' />";
1571     expect_failure(text,
1572                    XML_ERROR_DUPLICATE_ATTRIBUTE,
1573                    "did not report multiple attributes with same URI+name");
1574 }
1575 END_TEST
1576 
1577 /* Regression test for SF bug #695401: unbound prefix. */
1578 START_TEST(test_ns_unbound_prefix_on_attribute)
1579 {
1580     const char *text = "<doc a:attr=''/>";
1581     expect_failure(text,
1582                    XML_ERROR_UNBOUND_PREFIX,
1583                    "did not report unbound prefix on attribute");
1584 }
1585 END_TEST
1586 
1587 /* Regression test for SF bug #695401: unbound prefix. */
1588 START_TEST(test_ns_unbound_prefix_on_element)
1589 {
1590     const char *text = "<a:doc/>";
1591     expect_failure(text,
1592                    XML_ERROR_UNBOUND_PREFIX,
1593                    "did not report unbound prefix on element");
1594 }
1595 END_TEST
1596 
1597 static Suite *
1598 make_suite(void)
1599 {
1600     Suite *s = suite_create("basic");
1601     TCase *tc_basic = tcase_create("basic tests");
1602     TCase *tc_namespace = tcase_create("XML namespaces");
1603 
1604     suite_add_tcase(s, tc_basic);
1605     tcase_add_checked_fixture(tc_basic, basic_setup, basic_teardown);
1606     tcase_add_test(tc_basic, test_nul_byte);
1607     tcase_add_test(tc_basic, test_u0000_char);
1608     tcase_add_test(tc_basic, test_bom_utf8);
1609     tcase_add_test(tc_basic, test_bom_utf16_be);
1610     tcase_add_test(tc_basic, test_bom_utf16_le);
1611     tcase_add_test(tc_basic, test_illegal_utf8);
1612     tcase_add_test(tc_basic, test_utf8_auto_align);
1613     tcase_add_test(tc_basic, test_utf16);
1614     tcase_add_test(tc_basic, test_utf16_le_epilog_newline);
1615     tcase_add_test(tc_basic, test_latin1_umlauts);
1616     /* Regression test for SF bug #491986. */
1617     tcase_add_test(tc_basic, test_danish_latin1);
1618     /* Regression test for SF bug #514281. */
1619     tcase_add_test(tc_basic, test_french_charref_hexidecimal);
1620     tcase_add_test(tc_basic, test_french_charref_decimal);
1621     tcase_add_test(tc_basic, test_french_latin1);
1622     tcase_add_test(tc_basic, test_french_utf8);
1623     tcase_add_test(tc_basic, test_utf8_false_rejection);
1624     tcase_add_test(tc_basic, test_line_number_after_parse);
1625     tcase_add_test(tc_basic, test_column_number_after_parse);
1626     tcase_add_test(tc_basic, test_line_and_column_numbers_inside_handlers);
1627     tcase_add_test(tc_basic, test_line_number_after_error);
1628     tcase_add_test(tc_basic, test_column_number_after_error);
1629     tcase_add_test(tc_basic, test_really_long_lines);
1630     tcase_add_test(tc_basic, test_end_element_events);
1631     tcase_add_test(tc_basic, test_attr_whitespace_normalization);
1632     tcase_add_test(tc_basic, test_xmldecl_misplaced);
1633     tcase_add_test(tc_basic, test_unknown_encoding_internal_entity);
1634     tcase_add_test(tc_basic,
1635                    test_wfc_undeclared_entity_unread_external_subset);
1636     tcase_add_test(tc_basic, test_wfc_undeclared_entity_no_external_subset);
1637     tcase_add_test(tc_basic, test_wfc_undeclared_entity_standalone);
1638     tcase_add_test(tc_basic, test_wfc_undeclared_entity_with_external_subset);
1639     tcase_add_test(tc_basic,
1640                    test_wfc_undeclared_entity_with_external_subset_standalone);
1641     tcase_add_test(tc_basic, test_wfc_no_recursive_entity_refs);
1642     tcase_add_test(tc_basic, test_ext_entity_set_encoding);
1643     tcase_add_test(tc_basic, test_dtd_default_handling);
1644     tcase_add_test(tc_basic, test_empty_ns_without_namespaces);
1645     tcase_add_test(tc_basic, test_ns_in_attribute_default_without_namespaces);
1646     tcase_add_test(tc_basic, test_stop_parser_between_char_data_calls);
1647     tcase_add_test(tc_basic, test_suspend_parser_between_char_data_calls);
1648     tcase_add_test(tc_basic, test_good_cdata_ascii);
1649     tcase_add_test(tc_basic, test_good_cdata_utf16);
1650     tcase_add_test(tc_basic, test_bad_cdata);
1651 
1652     suite_add_tcase(s, tc_namespace);
1653     tcase_add_checked_fixture(tc_namespace,
1654                               namespace_setup, namespace_teardown);
1655     tcase_add_test(tc_namespace, test_return_ns_triplet);
1656     tcase_add_test(tc_namespace, test_ns_tagname_overwrite);
1657     tcase_add_test(tc_namespace, test_ns_tagname_overwrite_triplet);
1658     tcase_add_test(tc_namespace, test_start_ns_clears_start_element);
1659     tcase_add_test(tc_namespace, test_default_ns_from_ext_subset_and_ext_ge);
1660     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_1);
1661     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_2);
1662     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_3);
1663     tcase_add_test(tc_namespace, test_ns_prefix_with_empty_uri_4);
1664     tcase_add_test(tc_namespace, test_ns_default_with_empty_uri);
1665     tcase_add_test(tc_namespace, test_ns_duplicate_attrs_diff_prefixes);
1666     tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_attribute);
1667     tcase_add_test(tc_namespace, test_ns_unbound_prefix_on_element);
1668 
1669     return s;
1670 }
1671 
1672 
1673 int
1674 main(int argc, char *argv[])
1675 {
1676     int i, nf;
1677     int verbosity = CK_NORMAL;
1678     Suite *s = make_suite();
1679     SRunner *sr = srunner_create(s);
1680 
1681     /* run the tests for internal helper functions */
1682     testhelper_is_whitespace_normalized();
1683 
1684     for (i = 1; i < argc; ++i) {
1685         char *opt = argv[i];
1686         if (strcmp(opt, "-v") == 0 || strcmp(opt, "--verbose") == 0)
1687             verbosity = CK_VERBOSE;
1688         else if (strcmp(opt, "-q") == 0 || strcmp(opt, "--quiet") == 0)
1689             verbosity = CK_SILENT;
1690         else {
1691             fprintf(stderr, "runtests: unknown option '%s'\n", opt);
1692             return 2;
1693         }
1694     }
1695     if (verbosity != CK_SILENT)
1696         printf("Expat version: %s\n", XML_ExpatVersion());
1697     srunner_run_all(sr, verbosity);
1698     nf = srunner_ntests_failed(sr);
1699     srunner_free(sr);
1700 
1701     return (nf == 0) ? EXIT_SUCCESS : EXIT_FAILURE;
1702 }
1703