1 /* Read an XML document from standard input and print
2 element declarations (if any) to standard output.
3 It must be used with Expat compiled for UTF-8 output.
4 __ __ _
5 ___\ \/ /_ __ __ _| |_
6 / _ \\ /| '_ \ / _` | __|
7 | __// \| |_) | (_| | |_
8 \___/_/\_\ .__/ \__,_|\__|
9 |_| XML parser
10
11 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
12 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
13 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
14 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
15 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
16 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
17 Copyright (c) 2019 Zhongyuan Zhou <zhouzhongyuan@huawei.com>
18 Licensed under the MIT license:
19
20 Permission is hereby granted, free of charge, to any person obtaining
21 a copy of this software and associated documentation files (the
22 "Software"), to deal in the Software without restriction, including
23 without limitation the rights to use, copy, modify, merge, publish,
24 distribute, sublicense, and/or sell copies of the Software, and to permit
25 persons to whom the Software is furnished to do so, subject to the
26 following conditions:
27
28 The above copyright notice and this permission notice shall be included
29 in all copies or substantial portions of the Software.
30
31 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
34 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
35 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
36 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
37 USE OR OTHER DEALINGS IN THE SOFTWARE.
38 */
39
40 #include <stdbool.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <expat.h>
44
45 #ifdef XML_LARGE_SIZE
46 # define XML_FMT_INT_MOD "ll"
47 #else
48 # define XML_FMT_INT_MOD "l"
49 #endif
50
51 #ifdef XML_UNICODE_WCHAR_T
52 # define XML_FMT_STR "ls"
53 #else
54 # define XML_FMT_STR "s"
55 #endif
56
57 // While traversing the XML_Content tree, we avoid recursion
58 // to not be vulnerable to a denial of service attack.
59 typedef struct StackStruct {
60 const XML_Content *model;
61 unsigned level;
62 struct StackStruct *prev;
63 } Stack;
64
65 static Stack *
stackPushMalloc(Stack * stackTop,const XML_Content * model,unsigned level)66 stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) {
67 Stack *const newStackTop = malloc(sizeof(Stack));
68 if (! newStackTop) {
69 return NULL;
70 }
71 newStackTop->model = model;
72 newStackTop->level = level;
73 newStackTop->prev = stackTop;
74 return newStackTop;
75 }
76
77 static Stack *
stackPopFree(Stack * stackTop)78 stackPopFree(Stack *stackTop) {
79 Stack *const newStackTop = stackTop->prev;
80 free(stackTop);
81 return newStackTop;
82 }
83
84 static char *
contentTypeName(enum XML_Content_Type contentType)85 contentTypeName(enum XML_Content_Type contentType) {
86 switch (contentType) {
87 case XML_CTYPE_EMPTY:
88 return "EMPTY";
89 case XML_CTYPE_ANY:
90 return "ANY";
91 case XML_CTYPE_MIXED:
92 return "MIXED";
93 case XML_CTYPE_NAME:
94 return "NAME";
95 case XML_CTYPE_CHOICE:
96 return "CHOICE";
97 case XML_CTYPE_SEQ:
98 return "SEQ";
99 default:
100 return "???";
101 }
102 }
103
104 static char *
contentQuantName(enum XML_Content_Quant contentQuant)105 contentQuantName(enum XML_Content_Quant contentQuant) {
106 switch (contentQuant) {
107 case XML_CQUANT_NONE:
108 return "NONE";
109 case XML_CQUANT_OPT:
110 return "OPT";
111 case XML_CQUANT_REP:
112 return "REP";
113 case XML_CQUANT_PLUS:
114 return "PLUS";
115 default:
116 return "???";
117 }
118 }
119
120 static void
dumpContentModelElement(const XML_Content * model,unsigned level,const XML_Content * root)121 dumpContentModelElement(const XML_Content *model, unsigned level,
122 const XML_Content *root) {
123 // Indent
124 unsigned u = 0;
125 for (; u < level; u++) {
126 printf(" ");
127 }
128
129 // Node
130 printf("[%u] type=%s(%d), quant=%s(%d)", (unsigned)(model - root),
131 contentTypeName(model->type), model->type,
132 contentQuantName(model->quant), model->quant);
133 if (model->name) {
134 printf(", name=\"%" XML_FMT_STR "\"", model->name);
135 } else {
136 printf(", name=NULL");
137 }
138 printf(", numchildren=%d", model->numchildren);
139 printf("\n");
140 }
141
142 static bool
dumpContentModel(const XML_Char * name,const XML_Content * root)143 dumpContentModel(const XML_Char *name, const XML_Content *root) {
144 printf("Element \"%" XML_FMT_STR "\":\n", name);
145 Stack *stackTop = stackPushMalloc(NULL, root, 1);
146 if (! stackTop) {
147 return false;
148 }
149
150 while (stackTop) {
151 const XML_Content *const model = stackTop->model;
152 const unsigned level = stackTop->level;
153
154 dumpContentModelElement(model, level, root);
155
156 stackTop = stackPopFree(stackTop);
157
158 for (size_t u = model->numchildren; u >= 1; u--) {
159 Stack *const newStackTop
160 = stackPushMalloc(stackTop, model->children + (u - 1), level + 1);
161 if (! newStackTop) {
162 // We ran out of memory, so let's free all memory allocated
163 // earlier in this function, to be leak-clean:
164 while (stackTop != NULL) {
165 stackTop = stackPopFree(stackTop);
166 }
167 return false;
168 }
169 stackTop = newStackTop;
170 }
171 }
172
173 printf("\n");
174 return true;
175 }
176
177 static void XMLCALL
handleElementDeclaration(void * userData,const XML_Char * name,XML_Content * model)178 handleElementDeclaration(void *userData, const XML_Char *name,
179 XML_Content *model) {
180 XML_Parser parser = (XML_Parser)userData;
181 const bool success = dumpContentModel(name, model);
182 XML_FreeContentModel(parser, model);
183 if (! success) {
184 XML_StopParser(parser, /* resumable= */ XML_FALSE);
185 }
186 }
187
188 int
main(void)189 main(void) {
190 XML_Parser parser = XML_ParserCreate(NULL);
191 int done;
192
193 if (! parser) {
194 fprintf(stderr, "Couldn't allocate memory for parser\n");
195 return 1;
196 }
197
198 XML_SetUserData(parser, parser);
199 XML_SetElementDeclHandler(parser, handleElementDeclaration);
200
201 do {
202 void *const buf = XML_GetBuffer(parser, BUFSIZ);
203 if (! buf) {
204 fprintf(stderr, "Couldn't allocate memory for buffer\n");
205 XML_ParserFree(parser);
206 return 1;
207 }
208
209 const size_t len = fread(buf, 1, BUFSIZ, stdin);
210
211 if (ferror(stdin)) {
212 fprintf(stderr, "Read error\n");
213 XML_ParserFree(parser);
214 return 1;
215 }
216
217 done = feof(stdin);
218
219 if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) {
220 enum XML_Error errorCode = XML_GetErrorCode(parser);
221 if (errorCode == XML_ERROR_ABORTED) {
222 errorCode = XML_ERROR_NO_MEMORY;
223 }
224 fprintf(stderr,
225 "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n",
226 XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode));
227 XML_ParserFree(parser);
228 return 1;
229 }
230 } while (! done);
231
232 XML_ParserFree(parser);
233 return 0;
234 }
235