1 /* Read an XML document from standard input and print 2 element declarations (if any) to standard output. 3 It must be used with Expat compiled for UTF-8 output. 4 __ __ _ 5 ___\ \/ /_ __ __ _| |_ 6 / _ \\ /| '_ \ / _` | __| 7 | __// \| |_) | (_| | |_ 8 \___/_/\_\ .__/ \__,_|\__| 9 |_| XML parser 10 11 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 12 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 13 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> 14 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 15 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 16 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 17 Copyright (c) 2019 Zhongyuan Zhou <zhouzhongyuan@huawei.com> 18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 19 Copyright (c) 2026 Matthew Fernandez <matthew.fernandez@gmail.com> 20 Licensed under the MIT license: 21 22 Permission is hereby granted, free of charge, to any person obtaining 23 a copy of this software and associated documentation files (the 24 "Software"), to deal in the Software without restriction, including 25 without limitation the rights to use, copy, modify, merge, publish, 26 distribute, sublicense, and/or sell copies of the Software, and to permit 27 persons to whom the Software is furnished to do so, subject to the 28 following conditions: 29 30 The above copyright notice and this permission notice shall be included 31 in all copies or substantial portions of the Software. 32 33 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 34 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 35 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 36 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 37 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 38 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 39 USE OR OTHER DEALINGS IN THE SOFTWARE. 40 */ 41 42 #include <stdbool.h> 43 #include <stdio.h> 44 #include <stdlib.h> 45 #include <expat.h> 46 47 #ifdef XML_LARGE_SIZE 48 # define XML_FMT_INT_MOD "ll" 49 #else 50 # define XML_FMT_INT_MOD "l" 51 #endif 52 53 #ifdef XML_UNICODE_WCHAR_T 54 # define XML_FMT_STR "ls" 55 #else 56 # define XML_FMT_STR "s" 57 #endif 58 59 // While traversing the XML_Content tree, we avoid recursion 60 // to not be vulnerable to a denial of service attack. 61 typedef struct StackStruct { 62 const XML_Content *model; 63 unsigned level; 64 struct StackStruct *prev; 65 } Stack; 66 67 static Stack * 68 stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) { 69 Stack *const newStackTop = malloc(sizeof(Stack)); 70 if (! newStackTop) { 71 return NULL; 72 } 73 newStackTop->model = model; 74 newStackTop->level = level; 75 newStackTop->prev = stackTop; 76 return newStackTop; 77 } 78 79 static Stack * 80 stackPopFree(Stack *stackTop) { 81 Stack *const newStackTop = stackTop->prev; 82 free(stackTop); 83 return newStackTop; 84 } 85 86 static const char * 87 contentTypeName(enum XML_Content_Type contentType) { 88 switch (contentType) { 89 case XML_CTYPE_EMPTY: 90 return "EMPTY"; 91 case XML_CTYPE_ANY: 92 return "ANY"; 93 case XML_CTYPE_MIXED: 94 return "MIXED"; 95 case XML_CTYPE_NAME: 96 return "NAME"; 97 case XML_CTYPE_CHOICE: 98 return "CHOICE"; 99 case XML_CTYPE_SEQ: 100 return "SEQ"; 101 default: 102 return "???"; 103 } 104 } 105 106 static const char * 107 contentQuantName(enum XML_Content_Quant contentQuant) { 108 switch (contentQuant) { 109 case XML_CQUANT_NONE: 110 return "NONE"; 111 case XML_CQUANT_OPT: 112 return "OPT"; 113 case XML_CQUANT_REP: 114 return "REP"; 115 case XML_CQUANT_PLUS: 116 return "PLUS"; 117 default: 118 return "???"; 119 } 120 } 121 122 static void 123 dumpContentModelElement(const XML_Content *model, unsigned level, 124 const XML_Content *root) { 125 // Indent 126 unsigned u = 0; 127 for (; u < level; u++) { 128 printf(" "); 129 } 130 131 // Node 132 printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root), 133 contentTypeName(model->type), (unsigned int)model->type, 134 contentQuantName(model->quant), (unsigned int)model->quant); 135 if (model->name) { 136 printf(", name=\"%" XML_FMT_STR "\"", model->name); 137 } else { 138 printf(", name=NULL"); 139 } 140 printf(", numchildren=%u", model->numchildren); 141 printf("\n"); 142 } 143 144 static bool 145 dumpContentModel(const XML_Char *name, const XML_Content *root) { 146 printf("Element \"%" XML_FMT_STR "\":\n", name); 147 Stack *stackTop = stackPushMalloc(NULL, root, 1); 148 if (! stackTop) { 149 return false; 150 } 151 152 while (stackTop) { 153 const XML_Content *const model = stackTop->model; 154 const unsigned level = stackTop->level; 155 156 dumpContentModelElement(model, level, root); 157 158 stackTop = stackPopFree(stackTop); 159 160 for (size_t u = model->numchildren; u >= 1; u--) { 161 Stack *const newStackTop 162 = stackPushMalloc(stackTop, model->children + (u - 1), level + 1); 163 if (! newStackTop) { 164 // We ran out of memory, so let's free all memory allocated 165 // earlier in this function, to be leak-clean: 166 while (stackTop != NULL) { 167 stackTop = stackPopFree(stackTop); 168 } 169 return false; 170 } 171 stackTop = newStackTop; 172 } 173 } 174 175 printf("\n"); 176 return true; 177 } 178 179 static void XMLCALL 180 handleElementDeclaration(void *userData, const XML_Char *name, 181 XML_Content *model) { 182 XML_Parser parser = (XML_Parser)userData; 183 const bool success = dumpContentModel(name, model); 184 XML_FreeContentModel(parser, model); 185 if (! success) { 186 XML_StopParser(parser, /* resumable= */ XML_FALSE); 187 } 188 } 189 190 int 191 main(void) { 192 XML_Parser parser = XML_ParserCreate(NULL); 193 int done; 194 195 if (! parser) { 196 fprintf(stderr, "Couldn't allocate memory for parser\n"); 197 return 1; 198 } 199 200 XML_SetUserData(parser, parser); 201 XML_SetElementDeclHandler(parser, handleElementDeclaration); 202 203 do { 204 void *const buf = XML_GetBuffer(parser, BUFSIZ); 205 if (! buf) { 206 fprintf(stderr, "Couldn't allocate memory for buffer\n"); 207 XML_ParserFree(parser); 208 return 1; 209 } 210 211 const size_t len = fread(buf, 1, BUFSIZ, stdin); 212 213 if (ferror(stdin)) { 214 fprintf(stderr, "Read error\n"); 215 XML_ParserFree(parser); 216 return 1; 217 } 218 219 done = feof(stdin); 220 221 if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) { 222 enum XML_Error errorCode = XML_GetErrorCode(parser); 223 if (errorCode == XML_ERROR_ABORTED) { 224 errorCode = XML_ERROR_NO_MEMORY; 225 } 226 fprintf(stderr, 227 "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n", 228 XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode)); 229 XML_ParserFree(parser); 230 return 1; 231 } 232 } while (! done); 233 234 XML_ParserFree(parser); 235 return 0; 236 } 237