10a48773fSEric van Gyzen /*
20a48773fSEric van Gyzen __ __ _
30a48773fSEric van Gyzen ___\ \/ /_ __ __ _| |_
40a48773fSEric van Gyzen / _ \\ /| '_ \ / _` | __|
50a48773fSEric van Gyzen | __// \| |_) | (_| | |_
60a48773fSEric van Gyzen \___/_/\_\ .__/ \__,_|\__|
70a48773fSEric van Gyzen |_| XML parser
80a48773fSEric van Gyzen
90a48773fSEric van Gyzen Copyright (c) 1997-2000 Thai Open Source Software Center Ltd
10cc68614dSXin LI Copyright (c) 2000 Clark Cooper <coopercc@users.sourceforge.net>
11cc68614dSXin LI Copyright (c) 2001-2004 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
12cc68614dSXin LI Copyright (c) 2002-2009 Karl Waclawek <karl@waclawek.net>
13cc68614dSXin LI Copyright (c) 2016-2017 Sebastian Pipping <sebastian@pipping.org>
14cc68614dSXin LI Copyright (c) 2017 Rhodri James <rhodri@wildebeest.org.uk>
15cc68614dSXin LI Copyright (c) 2017 Franek Korta <fkorta@gmail.com>
164543ef51SXin LI Copyright (c) 2022 Sean McBride <sean@rogue-research.com>
17*fe927888SPhilip Paeps Copyright (c) 2025 Hanno Böck <hanno@gentoo.org>
180a48773fSEric van Gyzen Licensed under the MIT license:
190a48773fSEric van Gyzen
200a48773fSEric van Gyzen Permission is hereby granted, free of charge, to any person obtaining
210a48773fSEric van Gyzen a copy of this software and associated documentation files (the
220a48773fSEric van Gyzen "Software"), to deal in the Software without restriction, including
230a48773fSEric van Gyzen without limitation the rights to use, copy, modify, merge, publish,
240a48773fSEric van Gyzen distribute, sublicense, and/or sell copies of the Software, and to permit
250a48773fSEric van Gyzen persons to whom the Software is furnished to do so, subject to the
260a48773fSEric van Gyzen following conditions:
270a48773fSEric van Gyzen
280a48773fSEric van Gyzen The above copyright notice and this permission notice shall be included
290a48773fSEric van Gyzen in all copies or substantial portions of the Software.
300a48773fSEric van Gyzen
310a48773fSEric van Gyzen THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
320a48773fSEric van Gyzen EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
330a48773fSEric van Gyzen MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
340a48773fSEric van Gyzen NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
350a48773fSEric van Gyzen DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
360a48773fSEric van Gyzen OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
370a48773fSEric van Gyzen USE OR OTHER DEALINGS IN THE SOFTWARE.
385bb6a25fSPoul-Henning Kamp */
395bb6a25fSPoul-Henning Kamp
405bb6a25fSPoul-Henning Kamp #include <sys/types.h>
415bb6a25fSPoul-Henning Kamp #include <sys/stat.h>
425bb6a25fSPoul-Henning Kamp #include <fcntl.h>
435bb6a25fSPoul-Henning Kamp #include <stdlib.h>
445bb6a25fSPoul-Henning Kamp #include <stdio.h>
455bb6a25fSPoul-Henning Kamp
46be8aff81SXin LI /* Functions close(2) and read(2) */
470a48773fSEric van Gyzen #if ! defined(_WIN32) && ! defined(_WIN64)
480a48773fSEric van Gyzen # include <unistd.h>
490a48773fSEric van Gyzen #endif
500a48773fSEric van Gyzen
510a48773fSEric van Gyzen /* Function "read": */
520a48773fSEric van Gyzen #if defined(_MSC_VER)
53220ed979SColeman Kane # include <io.h>
540a48773fSEric van Gyzen /* https://msdn.microsoft.com/en-us/library/wyssk1bs(v=vs.100).aspx */
554543ef51SXin LI # define EXPAT_read _read
564543ef51SXin LI # define EXPAT_read_count_t int
574543ef51SXin LI # define EXPAT_read_req_t unsigned int
580a48773fSEric van Gyzen #else /* POSIX */
59*fe927888SPhilip Paeps /* https://pubs.opengroup.org/onlinepubs/009695399/functions/read.html */
604543ef51SXin LI # define EXPAT_read read
614543ef51SXin LI # define EXPAT_read_count_t ssize_t
624543ef51SXin LI # define EXPAT_read_req_t size_t
63be8aff81SXin LI #endif
64220ed979SColeman Kane
655bb6a25fSPoul-Henning Kamp #ifndef S_ISREG
665bb6a25fSPoul-Henning Kamp # ifndef S_IFREG
675bb6a25fSPoul-Henning Kamp # define S_IFREG _S_IFREG
685bb6a25fSPoul-Henning Kamp # endif
695bb6a25fSPoul-Henning Kamp # ifndef S_IFMT
705bb6a25fSPoul-Henning Kamp # define S_IFMT _S_IFMT
715bb6a25fSPoul-Henning Kamp # endif
725bb6a25fSPoul-Henning Kamp # define S_ISREG(m) (((m) & S_IFMT) == S_IFREG)
735bb6a25fSPoul-Henning Kamp #endif /* not S_ISREG */
745bb6a25fSPoul-Henning Kamp
755bb6a25fSPoul-Henning Kamp #ifndef O_BINARY
765bb6a25fSPoul-Henning Kamp # ifdef _O_BINARY
775bb6a25fSPoul-Henning Kamp # define O_BINARY _O_BINARY
785bb6a25fSPoul-Henning Kamp # else
795bb6a25fSPoul-Henning Kamp # define O_BINARY 0
805bb6a25fSPoul-Henning Kamp # endif
815bb6a25fSPoul-Henning Kamp #endif
825bb6a25fSPoul-Henning Kamp
830a48773fSEric van Gyzen #include "xmltchar.h"
845bb6a25fSPoul-Henning Kamp #include "filemap.h"
855bb6a25fSPoul-Henning Kamp
865bb6a25fSPoul-Henning Kamp int
filemap(const tchar * name,void (* processor)(const void *,size_t,const tchar *,void * arg),void * arg)870a48773fSEric van Gyzen filemap(const tchar *name,
880a48773fSEric van Gyzen void (*processor)(const void *, size_t, const tchar *, void *arg),
896b2c1e49SXin LI void *arg) {
905bb6a25fSPoul-Henning Kamp size_t nbytes;
915bb6a25fSPoul-Henning Kamp int fd;
924543ef51SXin LI EXPAT_read_count_t n;
935bb6a25fSPoul-Henning Kamp struct stat sb;
945bb6a25fSPoul-Henning Kamp void *p;
955bb6a25fSPoul-Henning Kamp
960a48773fSEric van Gyzen fd = topen(name, O_RDONLY | O_BINARY);
975bb6a25fSPoul-Henning Kamp if (fd < 0) {
980a48773fSEric van Gyzen tperror(name);
995bb6a25fSPoul-Henning Kamp return 0;
1005bb6a25fSPoul-Henning Kamp }
1015bb6a25fSPoul-Henning Kamp if (fstat(fd, &sb) < 0) {
1020a48773fSEric van Gyzen tperror(name);
103e3466a89SXin LI close(fd);
1045bb6a25fSPoul-Henning Kamp return 0;
1055bb6a25fSPoul-Henning Kamp }
1065bb6a25fSPoul-Henning Kamp if (! S_ISREG(sb.st_mode)) {
1070a48773fSEric van Gyzen ftprintf(stderr, T("%s: not a regular file\n"), name);
108e3466a89SXin LI close(fd);
1095bb6a25fSPoul-Henning Kamp return 0;
1105bb6a25fSPoul-Henning Kamp }
1110a48773fSEric van Gyzen if (sb.st_size > XML_MAX_CHUNK_LEN) {
1120a48773fSEric van Gyzen close(fd);
1130a48773fSEric van Gyzen return 2; /* Cannot be passed to XML_Parse in one go */
1140a48773fSEric van Gyzen }
1150a48773fSEric van Gyzen
1165bb6a25fSPoul-Henning Kamp nbytes = sb.st_size;
117220ed979SColeman Kane /* malloc will return NULL with nbytes == 0, handle files with size 0 */
118220ed979SColeman Kane if (nbytes == 0) {
119220ed979SColeman Kane static const char c = '\0';
120220ed979SColeman Kane processor(&c, 0, name, arg);
121220ed979SColeman Kane close(fd);
122220ed979SColeman Kane return 1;
123220ed979SColeman Kane }
1245bb6a25fSPoul-Henning Kamp p = malloc(nbytes);
1255bb6a25fSPoul-Henning Kamp if (! p) {
1260a48773fSEric van Gyzen ftprintf(stderr, T("%s: out of memory\n"), name);
127220ed979SColeman Kane close(fd);
1285bb6a25fSPoul-Henning Kamp return 0;
1295bb6a25fSPoul-Henning Kamp }
1304543ef51SXin LI n = EXPAT_read(fd, p, (EXPAT_read_req_t)nbytes);
1315bb6a25fSPoul-Henning Kamp if (n < 0) {
1320a48773fSEric van Gyzen tperror(name);
133220ed979SColeman Kane free(p);
1345bb6a25fSPoul-Henning Kamp close(fd);
1355bb6a25fSPoul-Henning Kamp return 0;
1365bb6a25fSPoul-Henning Kamp }
1374543ef51SXin LI if (n != (EXPAT_read_count_t)nbytes) {
1380a48773fSEric van Gyzen ftprintf(stderr, T("%s: read unexpected number of bytes\n"), name);
139220ed979SColeman Kane free(p);
1405bb6a25fSPoul-Henning Kamp close(fd);
1415bb6a25fSPoul-Henning Kamp return 0;
1425bb6a25fSPoul-Henning Kamp }
1435bb6a25fSPoul-Henning Kamp processor(p, nbytes, name, arg);
1445bb6a25fSPoul-Henning Kamp free(p);
1455bb6a25fSPoul-Henning Kamp close(fd);
1465bb6a25fSPoul-Henning Kamp return 1;
1475bb6a25fSPoul-Henning Kamp }
148