1 /* Read an XML document from standard input and print 2 element declarations (if any) to standard output. 3 It must be used with Expat compiled for UTF-8 output. 4 __ __ _ 5 ___\ \/ /_ __ __ _| |_ 6 / _ \\ /| '_ \ / _` | __| 7 | __// \| |_) | (_| | |_ 8 \___/_/\_\ .__/ \__,_|\__| 9 |_| XML parser 10 11 Copyright (c) 1997-2000 Thai Open Source Software Center Ltd 12 Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net> 13 Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net> 14 Copyright (c) 2005-2007 Steven Solie <steven@solie.ca> 15 Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org> 16 Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk> 17 Copyright (c) 2019 Zhongyuan Zhou <zhouzhongyuan@huawei.com> 18 Copyright (c) 2024 Hanno Böck <hanno@gentoo.org> 19 Licensed under the MIT license: 20 21 Permission is hereby granted, free of charge, to any person obtaining 22 a copy of this software and associated documentation files (the 23 "Software"), to deal in the Software without restriction, including 24 without limitation the rights to use, copy, modify, merge, publish, 25 distribute, sublicense, and/or sell copies of the Software, and to permit 26 persons to whom the Software is furnished to do so, subject to the 27 following conditions: 28 29 The above copyright notice and this permission notice shall be included 30 in all copies or substantial portions of the Software. 31 32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 33 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 34 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 35 NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 36 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 37 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 38 USE OR OTHER DEALINGS IN THE SOFTWARE. 39 */ 40 41 #include <stdbool.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <expat.h> 45 46 #ifdef XML_LARGE_SIZE 47 # define XML_FMT_INT_MOD "ll" 48 #else 49 # define XML_FMT_INT_MOD "l" 50 #endif 51 52 #ifdef XML_UNICODE_WCHAR_T 53 # define XML_FMT_STR "ls" 54 #else 55 # define XML_FMT_STR "s" 56 #endif 57 58 // While traversing the XML_Content tree, we avoid recursion 59 // to not be vulnerable to a denial of service attack. 60 typedef struct StackStruct { 61 const XML_Content *model; 62 unsigned level; 63 struct StackStruct *prev; 64 } Stack; 65 66 static Stack * 67 stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) { 68 Stack *const newStackTop = malloc(sizeof(Stack)); 69 if (! newStackTop) { 70 return NULL; 71 } 72 newStackTop->model = model; 73 newStackTop->level = level; 74 newStackTop->prev = stackTop; 75 return newStackTop; 76 } 77 78 static Stack * 79 stackPopFree(Stack *stackTop) { 80 Stack *const newStackTop = stackTop->prev; 81 free(stackTop); 82 return newStackTop; 83 } 84 85 static char * 86 contentTypeName(enum XML_Content_Type contentType) { 87 switch (contentType) { 88 case XML_CTYPE_EMPTY: 89 return "EMPTY"; 90 case XML_CTYPE_ANY: 91 return "ANY"; 92 case XML_CTYPE_MIXED: 93 return "MIXED"; 94 case XML_CTYPE_NAME: 95 return "NAME"; 96 case XML_CTYPE_CHOICE: 97 return "CHOICE"; 98 case XML_CTYPE_SEQ: 99 return "SEQ"; 100 default: 101 return "???"; 102 } 103 } 104 105 static char * 106 contentQuantName(enum XML_Content_Quant contentQuant) { 107 switch (contentQuant) { 108 case XML_CQUANT_NONE: 109 return "NONE"; 110 case XML_CQUANT_OPT: 111 return "OPT"; 112 case XML_CQUANT_REP: 113 return "REP"; 114 case XML_CQUANT_PLUS: 115 return "PLUS"; 116 default: 117 return "???"; 118 } 119 } 120 121 static void 122 dumpContentModelElement(const XML_Content *model, unsigned level, 123 const XML_Content *root) { 124 // Indent 125 unsigned u = 0; 126 for (; u < level; u++) { 127 printf(" "); 128 } 129 130 // Node 131 printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root), 132 contentTypeName(model->type), (unsigned int)model->type, 133 contentQuantName(model->quant), (unsigned int)model->quant); 134 if (model->name) { 135 printf(", name=\"%" XML_FMT_STR "\"", model->name); 136 } else { 137 printf(", name=NULL"); 138 } 139 printf(", numchildren=%u", model->numchildren); 140 printf("\n"); 141 } 142 143 static bool 144 dumpContentModel(const XML_Char *name, const XML_Content *root) { 145 printf("Element \"%" XML_FMT_STR "\":\n", name); 146 Stack *stackTop = stackPushMalloc(NULL, root, 1); 147 if (! stackTop) { 148 return false; 149 } 150 151 while (stackTop) { 152 const XML_Content *const model = stackTop->model; 153 const unsigned level = stackTop->level; 154 155 dumpContentModelElement(model, level, root); 156 157 stackTop = stackPopFree(stackTop); 158 159 for (size_t u = model->numchildren; u >= 1; u--) { 160 Stack *const newStackTop 161 = stackPushMalloc(stackTop, model->children + (u - 1), level + 1); 162 if (! newStackTop) { 163 // We ran out of memory, so let's free all memory allocated 164 // earlier in this function, to be leak-clean: 165 while (stackTop != NULL) { 166 stackTop = stackPopFree(stackTop); 167 } 168 return false; 169 } 170 stackTop = newStackTop; 171 } 172 } 173 174 printf("\n"); 175 return true; 176 } 177 178 static void XMLCALL 179 handleElementDeclaration(void *userData, const XML_Char *name, 180 XML_Content *model) { 181 XML_Parser parser = (XML_Parser)userData; 182 const bool success = dumpContentModel(name, model); 183 XML_FreeContentModel(parser, model); 184 if (! success) { 185 XML_StopParser(parser, /* resumable= */ XML_FALSE); 186 } 187 } 188 189 int 190 main(void) { 191 XML_Parser parser = XML_ParserCreate(NULL); 192 int done; 193 194 if (! parser) { 195 fprintf(stderr, "Couldn't allocate memory for parser\n"); 196 return 1; 197 } 198 199 XML_SetUserData(parser, parser); 200 XML_SetElementDeclHandler(parser, handleElementDeclaration); 201 202 do { 203 void *const buf = XML_GetBuffer(parser, BUFSIZ); 204 if (! buf) { 205 fprintf(stderr, "Couldn't allocate memory for buffer\n"); 206 XML_ParserFree(parser); 207 return 1; 208 } 209 210 const size_t len = fread(buf, 1, BUFSIZ, stdin); 211 212 if (ferror(stdin)) { 213 fprintf(stderr, "Read error\n"); 214 XML_ParserFree(parser); 215 return 1; 216 } 217 218 done = feof(stdin); 219 220 if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) { 221 enum XML_Error errorCode = XML_GetErrorCode(parser); 222 if (errorCode == XML_ERROR_ABORTED) { 223 errorCode = XML_ERROR_NO_MEMORY; 224 } 225 fprintf(stderr, 226 "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n", 227 XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode)); 228 XML_ParserFree(parser); 229 return 1; 230 } 231 } while (! done); 232 233 XML_ParserFree(parser); 234 return 0; 235 } 236