xref: /freebsd/contrib/expat/examples/element_declarations.c (revision 908f215e80fa482aa953c39afa6bb516f561fc00)
14543ef51SXin LI /* Read an XML document from standard input and print
24543ef51SXin LI    element declarations (if any) to standard output.
34543ef51SXin LI    It must be used with Expat compiled for UTF-8 output.
44543ef51SXin LI                             __  __            _
54543ef51SXin LI                          ___\ \/ /_ __   __ _| |_
64543ef51SXin LI                         / _ \\  /| '_ \ / _` | __|
74543ef51SXin LI                        |  __//  \| |_) | (_| | |_
84543ef51SXin LI                         \___/_/\_\ .__/ \__,_|\__|
94543ef51SXin LI                                  |_| XML parser
104543ef51SXin LI 
114543ef51SXin LI    Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
124543ef51SXin LI    Copyright (c) 2001-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
134543ef51SXin LI    Copyright (c) 2004-2006 Karl Waclawek <karl@waclawek.net>
144543ef51SXin LI    Copyright (c) 2005-2007 Steven Solie <steven@solie.ca>
154543ef51SXin LI    Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
164543ef51SXin LI    Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
174543ef51SXin LI    Copyright (c) 2019      Zhongyuan Zhou <zhouzhongyuan@huawei.com>
18*908f215eSXin LI    Copyright (c) 2024      Hanno Böck <hanno@gentoo.org>
194543ef51SXin LI    Licensed under the MIT license:
204543ef51SXin LI 
214543ef51SXin LI    Permission is  hereby granted,  free of charge,  to any  person obtaining
224543ef51SXin LI    a  copy  of  this  software   and  associated  documentation  files  (the
234543ef51SXin LI    "Software"),  to  deal in  the  Software  without restriction,  including
244543ef51SXin LI    without  limitation the  rights  to use,  copy,  modify, merge,  publish,
254543ef51SXin LI    distribute, sublicense, and/or sell copies of the Software, and to permit
264543ef51SXin LI    persons  to whom  the Software  is  furnished to  do so,  subject to  the
274543ef51SXin LI    following conditions:
284543ef51SXin LI 
294543ef51SXin LI    The above copyright  notice and this permission notice  shall be included
304543ef51SXin LI    in all copies or substantial portions of the Software.
314543ef51SXin LI 
324543ef51SXin LI    THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
334543ef51SXin LI    EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
344543ef51SXin LI    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
354543ef51SXin LI    NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
364543ef51SXin LI    DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
374543ef51SXin LI    OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
384543ef51SXin LI    USE OR OTHER DEALINGS IN THE SOFTWARE.
394543ef51SXin LI */
404543ef51SXin LI 
414543ef51SXin LI #include <stdbool.h>
424543ef51SXin LI #include <stdio.h>
434543ef51SXin LI #include <stdlib.h>
444543ef51SXin LI #include <expat.h>
454543ef51SXin LI 
464543ef51SXin LI #ifdef XML_LARGE_SIZE
474543ef51SXin LI #  define XML_FMT_INT_MOD "ll"
484543ef51SXin LI #else
494543ef51SXin LI #  define XML_FMT_INT_MOD "l"
504543ef51SXin LI #endif
514543ef51SXin LI 
524543ef51SXin LI #ifdef XML_UNICODE_WCHAR_T
534543ef51SXin LI #  define XML_FMT_STR "ls"
544543ef51SXin LI #else
554543ef51SXin LI #  define XML_FMT_STR "s"
564543ef51SXin LI #endif
574543ef51SXin LI 
584543ef51SXin LI // While traversing the XML_Content tree, we avoid recursion
594543ef51SXin LI // to not be vulnerable to a denial of service attack.
604543ef51SXin LI typedef struct StackStruct {
614543ef51SXin LI   const XML_Content *model;
624543ef51SXin LI   unsigned level;
634543ef51SXin LI   struct StackStruct *prev;
644543ef51SXin LI } Stack;
654543ef51SXin LI 
664543ef51SXin LI static Stack *
674543ef51SXin LI stackPushMalloc(Stack *stackTop, const XML_Content *model, unsigned level) {
684543ef51SXin LI   Stack *const newStackTop = malloc(sizeof(Stack));
694543ef51SXin LI   if (! newStackTop) {
704543ef51SXin LI     return NULL;
714543ef51SXin LI   }
724543ef51SXin LI   newStackTop->model = model;
734543ef51SXin LI   newStackTop->level = level;
744543ef51SXin LI   newStackTop->prev = stackTop;
754543ef51SXin LI   return newStackTop;
764543ef51SXin LI }
774543ef51SXin LI 
784543ef51SXin LI static Stack *
794543ef51SXin LI stackPopFree(Stack *stackTop) {
804543ef51SXin LI   Stack *const newStackTop = stackTop->prev;
814543ef51SXin LI   free(stackTop);
824543ef51SXin LI   return newStackTop;
834543ef51SXin LI }
844543ef51SXin LI 
854543ef51SXin LI static char *
864543ef51SXin LI contentTypeName(enum XML_Content_Type contentType) {
874543ef51SXin LI   switch (contentType) {
884543ef51SXin LI   case XML_CTYPE_EMPTY:
894543ef51SXin LI     return "EMPTY";
904543ef51SXin LI   case XML_CTYPE_ANY:
914543ef51SXin LI     return "ANY";
924543ef51SXin LI   case XML_CTYPE_MIXED:
934543ef51SXin LI     return "MIXED";
944543ef51SXin LI   case XML_CTYPE_NAME:
954543ef51SXin LI     return "NAME";
964543ef51SXin LI   case XML_CTYPE_CHOICE:
974543ef51SXin LI     return "CHOICE";
984543ef51SXin LI   case XML_CTYPE_SEQ:
994543ef51SXin LI     return "SEQ";
1004543ef51SXin LI   default:
1014543ef51SXin LI     return "???";
1024543ef51SXin LI   }
1034543ef51SXin LI }
1044543ef51SXin LI 
1054543ef51SXin LI static char *
1064543ef51SXin LI contentQuantName(enum XML_Content_Quant contentQuant) {
1074543ef51SXin LI   switch (contentQuant) {
1084543ef51SXin LI   case XML_CQUANT_NONE:
1094543ef51SXin LI     return "NONE";
1104543ef51SXin LI   case XML_CQUANT_OPT:
1114543ef51SXin LI     return "OPT";
1124543ef51SXin LI   case XML_CQUANT_REP:
1134543ef51SXin LI     return "REP";
1144543ef51SXin LI   case XML_CQUANT_PLUS:
1154543ef51SXin LI     return "PLUS";
1164543ef51SXin LI   default:
1174543ef51SXin LI     return "???";
1184543ef51SXin LI   }
1194543ef51SXin LI }
1204543ef51SXin LI 
1214543ef51SXin LI static void
1224543ef51SXin LI dumpContentModelElement(const XML_Content *model, unsigned level,
1234543ef51SXin LI                         const XML_Content *root) {
1244543ef51SXin LI   // Indent
1254543ef51SXin LI   unsigned u = 0;
1264543ef51SXin LI   for (; u < level; u++) {
1274543ef51SXin LI     printf("  ");
1284543ef51SXin LI   }
1294543ef51SXin LI 
1304543ef51SXin LI   // Node
131*908f215eSXin LI   printf("[%u] type=%s(%u), quant=%s(%u)", (unsigned)(model - root),
132*908f215eSXin LI          contentTypeName(model->type), (unsigned int)model->type,
133*908f215eSXin LI          contentQuantName(model->quant), (unsigned int)model->quant);
1344543ef51SXin LI   if (model->name) {
1354543ef51SXin LI     printf(", name=\"%" XML_FMT_STR "\"", model->name);
1364543ef51SXin LI   } else {
1374543ef51SXin LI     printf(", name=NULL");
1384543ef51SXin LI   }
139*908f215eSXin LI   printf(", numchildren=%u", model->numchildren);
1404543ef51SXin LI   printf("\n");
1414543ef51SXin LI }
1424543ef51SXin LI 
1434543ef51SXin LI static bool
1444543ef51SXin LI dumpContentModel(const XML_Char *name, const XML_Content *root) {
1454543ef51SXin LI   printf("Element \"%" XML_FMT_STR "\":\n", name);
1464543ef51SXin LI   Stack *stackTop = stackPushMalloc(NULL, root, 1);
1474543ef51SXin LI   if (! stackTop) {
1484543ef51SXin LI     return false;
1494543ef51SXin LI   }
1504543ef51SXin LI 
1514543ef51SXin LI   while (stackTop) {
1524543ef51SXin LI     const XML_Content *const model = stackTop->model;
1534543ef51SXin LI     const unsigned level = stackTop->level;
1544543ef51SXin LI 
1554543ef51SXin LI     dumpContentModelElement(model, level, root);
1564543ef51SXin LI 
1574543ef51SXin LI     stackTop = stackPopFree(stackTop);
1584543ef51SXin LI 
1594543ef51SXin LI     for (size_t u = model->numchildren; u >= 1; u--) {
1604543ef51SXin LI       Stack *const newStackTop
1614543ef51SXin LI           = stackPushMalloc(stackTop, model->children + (u - 1), level + 1);
1624543ef51SXin LI       if (! newStackTop) {
1634543ef51SXin LI         // We ran out of memory, so let's free all memory allocated
1644543ef51SXin LI         // earlier in this function, to be leak-clean:
1654543ef51SXin LI         while (stackTop != NULL) {
1664543ef51SXin LI           stackTop = stackPopFree(stackTop);
1674543ef51SXin LI         }
1684543ef51SXin LI         return false;
1694543ef51SXin LI       }
1704543ef51SXin LI       stackTop = newStackTop;
1714543ef51SXin LI     }
1724543ef51SXin LI   }
1734543ef51SXin LI 
1744543ef51SXin LI   printf("\n");
1754543ef51SXin LI   return true;
1764543ef51SXin LI }
1774543ef51SXin LI 
1784543ef51SXin LI static void XMLCALL
1794543ef51SXin LI handleElementDeclaration(void *userData, const XML_Char *name,
1804543ef51SXin LI                          XML_Content *model) {
1814543ef51SXin LI   XML_Parser parser = (XML_Parser)userData;
1824543ef51SXin LI   const bool success = dumpContentModel(name, model);
1834543ef51SXin LI   XML_FreeContentModel(parser, model);
1844543ef51SXin LI   if (! success) {
1854543ef51SXin LI     XML_StopParser(parser, /* resumable= */ XML_FALSE);
1864543ef51SXin LI   }
1874543ef51SXin LI }
1884543ef51SXin LI 
1894543ef51SXin LI int
1904543ef51SXin LI main(void) {
1914543ef51SXin LI   XML_Parser parser = XML_ParserCreate(NULL);
1924543ef51SXin LI   int done;
1934543ef51SXin LI 
1944543ef51SXin LI   if (! parser) {
1954543ef51SXin LI     fprintf(stderr, "Couldn't allocate memory for parser\n");
1964543ef51SXin LI     return 1;
1974543ef51SXin LI   }
1984543ef51SXin LI 
1994543ef51SXin LI   XML_SetUserData(parser, parser);
2004543ef51SXin LI   XML_SetElementDeclHandler(parser, handleElementDeclaration);
2014543ef51SXin LI 
2024543ef51SXin LI   do {
2034543ef51SXin LI     void *const buf = XML_GetBuffer(parser, BUFSIZ);
2044543ef51SXin LI     if (! buf) {
2054543ef51SXin LI       fprintf(stderr, "Couldn't allocate memory for buffer\n");
2064543ef51SXin LI       XML_ParserFree(parser);
2074543ef51SXin LI       return 1;
2084543ef51SXin LI     }
2094543ef51SXin LI 
2104543ef51SXin LI     const size_t len = fread(buf, 1, BUFSIZ, stdin);
2114543ef51SXin LI 
2124543ef51SXin LI     if (ferror(stdin)) {
2134543ef51SXin LI       fprintf(stderr, "Read error\n");
2144543ef51SXin LI       XML_ParserFree(parser);
2154543ef51SXin LI       return 1;
2164543ef51SXin LI     }
2174543ef51SXin LI 
2184543ef51SXin LI     done = feof(stdin);
2194543ef51SXin LI 
2204543ef51SXin LI     if (XML_ParseBuffer(parser, (int)len, done) == XML_STATUS_ERROR) {
2214543ef51SXin LI       enum XML_Error errorCode = XML_GetErrorCode(parser);
2224543ef51SXin LI       if (errorCode == XML_ERROR_ABORTED) {
2234543ef51SXin LI         errorCode = XML_ERROR_NO_MEMORY;
2244543ef51SXin LI       }
2254543ef51SXin LI       fprintf(stderr,
2264543ef51SXin LI               "Parse error at line %" XML_FMT_INT_MOD "u:\n%" XML_FMT_STR "\n",
2274543ef51SXin LI               XML_GetCurrentLineNumber(parser), XML_ErrorString(errorCode));
2284543ef51SXin LI       XML_ParserFree(parser);
2294543ef51SXin LI       return 1;
2304543ef51SXin LI     }
2314543ef51SXin LI   } while (! done);
2324543ef51SXin LI 
2334543ef51SXin LI   XML_ParserFree(parser);
2344543ef51SXin LI   return 0;
2354543ef51SXin LI }
236