1 /* Read an XML document from standard input and print
2 element declarations (if any) to standard output.
3 It must be used with Expat compiled for UTF-8 output.
4 __ __ _
5 ___\ \/ /_ __ __ _| |_
6 / _ \\ /| '_ \ / _` | __|
7 | __// \| |_) | (_| | |_
8 \___/_/\_\ .__/ \__,_|\__|
9 |_| XML parser
10
11 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
12 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
13 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
17 Copyright (c) 2019 Zhongyuan Zhou <zhouzhongyuan@huawei.com>
18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org>
19 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com>
20 Licensed under the MIT license:
21
22 Permission is hereby granted, free of charge, to any person obtaining
23 a copy of this software and associated documentation files (the
24 "Software"), to deal in the Software without restriction, including
25 without limitation the rights to use, copy, modify, merge, publish,
26 distribute, sublicense, and/or sell copies of the Software, and to permit
27 persons to whom the Software is furnished to do so, subject to the
28 following conditions:
29
30 The above copyright notice and this permission notice shall be included
31 in all copies or substantial portions of the Software.
32
33 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
34 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
35 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
36 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
37 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
38 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
39 USE OR OTHER DEALINGS IN THE SOFTWARE.
40 */
41
42 #include <stdbool.h>
43 #include <stdio.h>
44 #include <stdlib.h>
45 #include <expat.h>
46
47 #ifdef XML_LARGE_SIZE
48 # define XML_FMT_INT_MOD "ll"
49 #else
50 # define XML_FMT_INT_MOD "l"
51 #endif
52
53 #ifdef XML_UNICODE_WCHAR_T
54 # define XML_FMT_STR "ls"
55 #else
56 # define XML_FMT_STR "s"
57 #endif
58
59 // While traversing the XML_Content tree, we avoid recursion
60 // to not be vulnerable to a denial of service attack.
61 typedef struct StackStruct {
62 const XML_Content *model;
63 unsigned level;
64 struct StackStruct *prev;
65 } Stack;
66
67 static Stack *
stackPushMalloc(Stack * stackTop,const XML_Content * model,unsigned level)68 stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) {
69 Stack *const newStackTop = malloc(sizeof(Stack));
70 if (! newStackTop) {
71 return NULL;
72 }
73 newStackTop->model = model;
74 newStackTop->level = level;
75 newStackTop->prev = stackTop;
76 return newStackTop;
77 }
78
79 static Stack *
stackPopFree(Stack * stackTop)80 stackPopFree(Stack *stackTop) {
81 Stack *const newStackTop = stackTop->prev;
82 free(stackTop);
83 return newStackTop;
84 }
85
86 static const char *
contentTypeName(enum XML_Content_Type contentType)87 contentTypeName(enum XML_Content_Type contentType) {
88 switch (contentType) {
89 case XML_CTYPE_EMPTY:
90 return "EMPTY";
91 case XML_CTYPE_ANY:
92 return "ANY";
93 case XML_CTYPE_MIXED:
94 return "MIXED";
95 case XML_CTYPE_NAME:
96 return "NAME";
97 case XML_CTYPE_CHOICE:
98 return "CHOICE";
99 case XML_CTYPE_SEQ:
100 return "SEQ";
101 default:
102 return "???";
103 }
104 }
105
106 static const char *
contentQuantName(enum XML_Content_Quant contentQuant)107 contentQuantName(enum XML_Content_Quant contentQuant) {
108 switch (contentQuant) {
109 case XML_CQUANT_NONE:
110 return "NONE";
111 case XML_CQUANT_OPT:
112 return "OPT";
113 case XML_CQUANT_REP:
114 return "REP";
115 case XML_CQUANT_PLUS:
116 return "PLUS";
117 default:
118 return "???";
119 }
120 }
121
122 static void
dumpContentModelElement(const XML_Content * model,unsigned level,const XML_Content * root)123 dumpContentModelElement(const XML_Content *model, unsigned level,
124 const XML_Content *root) {
125 // Indent
126 unsigned u = 0;
127 for (; u < level; u++) {
128 printf(" ");
129 }
130
131 // Node
132 printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root),
133 contentTypeName(model->type), (unsigned int)model->type,
134 contentQuantName(model->quant), (unsigned int)model->quant);
135 if (model->name) {
136 printf(", name=\"%" XML_FMT_STR "\"", model->name);
137 } else {
138 printf(", name=NULL");
139 }
140 printf(", numchildren=%u", model->numchildren);
141 printf("\n");
142 }
143
144 static bool
dumpContentModel(const XML_Char * name,const XML_Content * root)145 dumpContentModel(const XML_Char *name, const XML_Content *root) {
146 printf("Element \"%" XML_FMT_STR "\":\n", name);
147 Stack *stackTop = stackPushMalloc(NULL, root, 1);
148 if (! stackTop) {
149 return false;
150 }
151
152 while (stackTop) {
153 const XML_Content *const model = stackTop->model;
154 const unsigned level = stackTop->level;
155
156 dumpContentModelElement(model, level, root);
157
158 stackTop = stackPopFree(stackTop);
159
160 for (size_t u = model->numchildren; u >= 1; u--) {
161 Stack *const newStackTop
162 = stackPushMalloc(stackTop, model->children + (u - 1), level + 1);
163 if (! newStackTop) {
164 // We ran out of memory, so let's free all memory allocated
165 // earlier in this function, to be leak-clean:
166 while (stackTop != NULL) {
167 stackTop = stackPopFree(stackTop);
168 }
169 return false;
170 }
171 stackTop = newStackTop;
172 }
173 }
174
175 printf("\n");
176 return true;
177 }
178
179 static void XMLCALL
handleElementDeclaration(void * userData,const XML_Char * name,XML_Content * model)180 handleElementDeclaration(void *userData, const XML_Char *name,
181 XML_Content *model) {
182 XML_Parser parser = (XML_Parser)userData;
183 const bool success = dumpContentModel(name, model);
184 XML_FreeContentModel(parser, model);
185 if (! success) {
186 XML_StopParser(parser, /* resumable= */ XML_FALSE);
187 }
188 }
189
190 int
main(void)191 main(void) {
192 XML_Parser parser = XML_ParserCreate(NULL);
193 int done;
194
195 if (! parser) {
196 fprintf(stderr, "Couldn't allocate memory for parser\n");
197 return 1;
198 }
199
200 XML_SetUserData(parser, parser);
201 XML_SetElementDeclHandler(parser, handleElementDeclaration);
202
203 do {
204 void *const buf = XML_GetBuffer(parser, BUFSIZ);
205 if (! buf) {
206 fprintf(stderr, "Couldn't allocate memory for buffer\n");
207 XML_ParserFree(parser);
208 return 1;
209 }
210
211 const size_t len = fread(buf, 1, BUFSIZ, stdin);
212
213 if (ferror(stdin)) {
214 fprintf(stderr, "Read error\n");
215 XML_ParserFree(parser);
216 return 1;
217 }
218
219 done = feof(stdin);
220
221 if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) {
222 enum XML_Error errorCode = XML_GetErrorCode(parser);
223 if (errorCode == XML_ERROR_ABORTED) {
224 errorCode = XML_ERROR_NO_MEMORY;
225 }
226 fprintf(stderr,
227 "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n",
228 XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode));
229 XML_ParserFree(parser);
230 return 1;
231 }
232 } while (! done);
233
234 XML_ParserFree(parser);
235 return 0;
236 }
237