xref: /freebsd/contrib/expat/doc/xmlwf.xml (revision 908f215e80fa482aa953c39afa6bb516f561fc00)
1cc68614dSXin LI<!--
2cc68614dSXin LI                            __  __            _
3cc68614dSXin LI                         ___\ \/ /_ __   __ _| |_
4cc68614dSXin LI                        / _ \\  /| '_ \ / _` | __|
5cc68614dSXin LI                       |  __//  \| |_) | (_| | |_
6cc68614dSXin LI                        \___/_/\_\ .__/ \__,_|\__|
7cc68614dSXin LI                                 |_| XML parser
8cc68614dSXin LI
9cc68614dSXin LI   Copyright (c) 2001      Scott Bronson <bronson@rinspin.com>
10cc68614dSXin LI   Copyright (c) 2002-2003 Fred L. Drake, Jr. <fdrake@users.sourceforge.net>
11cc68614dSXin LI   Copyright (c) 2009      Karl Waclawek <karl@waclawek.net>
124543ef51SXin LI   Copyright (c) 2016-2024 Sebastian Pipping <sebastian@pipping.org>
13cc68614dSXin LI   Copyright (c) 2016      Ardo van Rangelrooij <ardo@debian.org>
14cc68614dSXin LI   Copyright (c) 2017      Rhodri James <rhodri@wildebeest.org.uk>
15cc68614dSXin LI   Copyright (c) 2020      Joe Orton <jorton@redhat.com>
16cc68614dSXin LI   Copyright (c) 2021      Tim Bray <tbray@textuality.com>
17cc68614dSXin LI   Unlike most of Expat,
18cc68614dSXin LI   this file is copyrighted under the GNU Free Documentation License 1.1.
19cc68614dSXin LI-->
20cc68614dSXin LI<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
21cc68614dSXin LI          "http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd" [
22be8aff81SXin LI  <!ENTITY dhfirstname "<firstname>Scott</firstname>">
23be8aff81SXin LI  <!ENTITY dhsurname   "<surname>Bronson</surname>">
24*908f215eSXin LI  <!ENTITY dhdate      "<date>November 6, 2024</date>">
25ac69e5d4SEric van Gyzen  <!-- Please adjust this^^ date whenever cutting a new release. -->
26be8aff81SXin LI  <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
27be8aff81SXin LI  <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
28be8aff81SXin LI  <!ENTITY dhusername  "Scott Bronson">
29be8aff81SXin LI  <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
30be8aff81SXin LI  <!ENTITY dhpackage   "xmlwf">
31be8aff81SXin LI
32be8aff81SXin LI  <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
33be8aff81SXin LI  <!ENTITY gnu         "<acronym>GNU</acronym>">
34be8aff81SXin LI]>
35be8aff81SXin LI
36be8aff81SXin LI<refentry>
37be8aff81SXin LI  <refentryinfo>
38be8aff81SXin LI    <address>
39be8aff81SXin LI      &dhemail;
40be8aff81SXin LI    </address>
41be8aff81SXin LI    <author>
42be8aff81SXin LI      &dhfirstname;
43be8aff81SXin LI      &dhsurname;
44be8aff81SXin LI    </author>
45be8aff81SXin LI    <copyright>
46be8aff81SXin LI      <year>2001</year>
47be8aff81SXin LI      <holder>&dhusername;</holder>
48be8aff81SXin LI    </copyright>
49be8aff81SXin LI    &dhdate;
50be8aff81SXin LI  </refentryinfo>
51be8aff81SXin LI  <refmeta>
52be8aff81SXin LI    &dhucpackage;
53be8aff81SXin LI
54be8aff81SXin LI    &dhsection;
55be8aff81SXin LI  </refmeta>
56be8aff81SXin LI  <refnamediv>
57be8aff81SXin LI    <refname>&dhpackage;</refname>
58be8aff81SXin LI
59be8aff81SXin LI    <refpurpose>Determines if an XML document is well-formed</refpurpose>
60be8aff81SXin LI  </refnamediv>
61be8aff81SXin LI  <refsynopsisdiv>
62be8aff81SXin LI    <cmdsynopsis>
63be8aff81SXin LI      <command>&dhpackage;</command>
64cc68614dSXin LI      <arg><replaceable>OPTIONS</replaceable></arg>
65cc68614dSXin LI      <arg><replaceable>FILE</replaceable> ...</arg>
66cc68614dSXin LI    </cmdsynopsis>
67cc68614dSXin LI    <cmdsynopsis>
68cc68614dSXin LI      <command>&dhpackage;</command>
694543ef51SXin LI      <group choice="plain">
704543ef51SXin LI        <arg><option>-h</option></arg>
714543ef51SXin LI        <arg><option>--help</option></arg>
724543ef51SXin LI      </group>
73cc68614dSXin LI    </cmdsynopsis>
74cc68614dSXin LI    <cmdsynopsis>
75cc68614dSXin LI      <command>&dhpackage;</command>
764543ef51SXin LI      <group choice="plain">
774543ef51SXin LI        <arg><option>-v</option></arg>
784543ef51SXin LI        <arg><option>--version</option></arg>
794543ef51SXin LI      </group>
80be8aff81SXin LI    </cmdsynopsis>
81be8aff81SXin LI  </refsynopsisdiv>
82be8aff81SXin LI
83be8aff81SXin LI  <refsect1>
84be8aff81SXin LI    <title>DESCRIPTION</title>
85be8aff81SXin LI
86be8aff81SXin LI    <para>
87be8aff81SXin LI	<command>&dhpackage;</command> uses the Expat library to
88be8aff81SXin LI	determine if an XML document is well-formed.  It is
89be8aff81SXin LI	non-validating.
90be8aff81SXin LI	</para>
91be8aff81SXin LI
92be8aff81SXin LI	<para>
93be8aff81SXin LI	If you do not specify any files on the command-line, and you
94be8aff81SXin LI	have a recent version of <command>&dhpackage;</command>, the
95be8aff81SXin LI	input file will be read from standard input.
96be8aff81SXin LI	</para>
97be8aff81SXin LI
98be8aff81SXin LI  </refsect1>
99be8aff81SXin LI
100be8aff81SXin LI  <refsect1>
101be8aff81SXin LI    <title>WELL-FORMED DOCUMENTS</title>
102be8aff81SXin LI
103be8aff81SXin LI	<para>
104be8aff81SXin LI	  A well-formed document must adhere to the
105be8aff81SXin LI	  following rules:
106be8aff81SXin LI	</para>
107be8aff81SXin LI
108be8aff81SXin LI	<itemizedlist>
109be8aff81SXin LI      <listitem><para>
110be8aff81SXin LI	    The file begins with an XML declaration.  For instance,
111be8aff81SXin LI		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
112cc68614dSXin LI		<emphasis>NOTE</emphasis>:
113be8aff81SXin LI		<command>&dhpackage;</command> does not currently
114be8aff81SXin LI		check for a valid XML declaration.
115be8aff81SXin LI      </para></listitem>
116be8aff81SXin LI      <listitem><para>
117be8aff81SXin LI		Every start tag is either empty (&lt;tag/&gt;)
118be8aff81SXin LI		or has a corresponding end tag.
119be8aff81SXin LI      </para></listitem>
120be8aff81SXin LI      <listitem><para>
121be8aff81SXin LI	    There is exactly one root element.  This element must contain
122be8aff81SXin LI		all other elements in the document.  Only comments, white
123be8aff81SXin LI		space, and processing instructions may come after the close
124be8aff81SXin LI		of the root element.
125be8aff81SXin LI      </para></listitem>
126be8aff81SXin LI      <listitem><para>
127be8aff81SXin LI		All elements nest properly.
128be8aff81SXin LI      </para></listitem>
129be8aff81SXin LI      <listitem><para>
130be8aff81SXin LI		All attribute values are enclosed in quotes (either single
131be8aff81SXin LI		or double).
132be8aff81SXin LI      </para></listitem>
133be8aff81SXin LI    </itemizedlist>
134be8aff81SXin LI
135be8aff81SXin LI	<para>
136be8aff81SXin LI	  If the document has a DTD, and it strictly complies with that
137be8aff81SXin LI	  DTD, then the document is also considered <emphasis>valid</emphasis>.
138be8aff81SXin LI	  <command>&dhpackage;</command> is a non-validating parser --
139be8aff81SXin LI	  it does not check the DTD.  However, it does support
140be8aff81SXin LI	  external entities (see the <option>-x</option> option).
141be8aff81SXin LI	</para>
142be8aff81SXin LI  </refsect1>
143be8aff81SXin LI
144be8aff81SXin LI  <refsect1>
145be8aff81SXin LI    <title>OPTIONS</title>
146be8aff81SXin LI
147be8aff81SXin LI<para>
148be8aff81SXin LIWhen an option includes an argument, you may specify the argument either
149cc68614dSXin LIseparately ("<option>-d</option> <replaceable>output</replaceable>") or concatenated with the
150cc68614dSXin LIoption ("<option>-d</option><replaceable>output</replaceable>").  <command>&dhpackage;</command>
151be8aff81SXin LIsupports both.
152be8aff81SXin LI</para>
153be8aff81SXin LI
154be8aff81SXin LI    <variablelist>
155be8aff81SXin LI
156be8aff81SXin LI      <varlistentry>
157cc68614dSXin LI        <term><option>-a</option> <replaceable>factor</replaceable></term>
158cc68614dSXin LI        <listitem>
159cc68614dSXin LI          <para>
160cc68614dSXin LI            Sets the maximum tolerated amplification factor
161cc68614dSXin LI            for protection against billion laughs attacks (default: 100.0).
162cc68614dSXin LI            The amplification factor is calculated as ..
163cc68614dSXin LI          </para>
164cc68614dSXin LI          <literallayout>
165cc68614dSXin LI            amplification := (direct + indirect) / direct
166cc68614dSXin LI          </literallayout>
167cc68614dSXin LI          <para>
168cc68614dSXin LI            .. while parsing, whereas
169cc68614dSXin LI            &lt;direct&gt; is the number of bytes read
170cc68614dSXin LI              from the primary document in parsing and
171cc68614dSXin LI            &lt;indirect&gt; is the number of bytes
172cc68614dSXin LI              added by expanding entities and reading of external DTD files,
173cc68614dSXin LI              combined.
174cc68614dSXin LI          </para>
175cc68614dSXin LI          <para>
176cc68614dSXin LI            <emphasis>NOTE</emphasis>:
177cc68614dSXin LI            If you ever need to increase this value for non-attack payload,
178cc68614dSXin LI            please file a bug report.
179cc68614dSXin LI          </para>
180cc68614dSXin LI        </listitem>
181cc68614dSXin LI      </varlistentry>
182cc68614dSXin LI
183cc68614dSXin LI      <varlistentry>
184cc68614dSXin LI        <term><option>-b</option> <replaceable>bytes</replaceable></term>
185cc68614dSXin LI        <listitem>
186cc68614dSXin LI          <para>
187cc68614dSXin LI            Sets the number of output bytes (including amplification)
188cc68614dSXin LI            needed to activate protection against billion laughs attacks
189cc68614dSXin LI            (default: 8 MiB).
190cc68614dSXin LI            This can be thought of as an &quot;activation threshold&quot;.
191cc68614dSXin LI          </para>
192cc68614dSXin LI          <para>
193cc68614dSXin LI            <emphasis>NOTE</emphasis>:
194cc68614dSXin LI            If you ever need to increase this value for non-attack payload,
195cc68614dSXin LI            please file a bug report.
196cc68614dSXin LI          </para>
197cc68614dSXin LI        </listitem>
198cc68614dSXin LI      </varlistentry>
199cc68614dSXin LI
200cc68614dSXin LI      <varlistentry>
201be8aff81SXin LI        <term><option>-c</option></term>
202be8aff81SXin LI        <listitem>
203be8aff81SXin LI		<para>
204be8aff81SXin LI  If the input file is well-formed and <command>&dhpackage;</command>
205be8aff81SXin LI  doesn't encounter any errors, the input file is simply copied to
206be8aff81SXin LI  the output directory unchanged.
207be8aff81SXin LI  This implies no namespaces (turns off <option>-n</option>) and
2080a48773fSEric van Gyzen  requires <option>-d</option> to specify an output directory.
209be8aff81SXin LI  		</para>
210be8aff81SXin LI        </listitem>
211be8aff81SXin LI      </varlistentry>
212be8aff81SXin LI
213be8aff81SXin LI      <varlistentry>
214cc68614dSXin LI        <term><option>-d</option> <replaceable>output-dir</replaceable></term>
215be8aff81SXin LI        <listitem>
216be8aff81SXin LI		<para>
217be8aff81SXin LI  Specifies a directory to contain transformed
218be8aff81SXin LI  representations of the input files.
219be8aff81SXin LI  By default, <option>-d</option> outputs a canonical representation
220be8aff81SXin LI  (described below).
2210a48773fSEric van Gyzen  You can select different output formats using <option>-c</option>,
2220a48773fSEric van Gyzen  <option>-m</option> and <option>-N</option>.
223be8aff81SXin LI	  </para>
224be8aff81SXin LI	  <para>
225be8aff81SXin LI  The output filenames will
226be8aff81SXin LI  be exactly the same as the input filenames or "STDIN" if the input is
227be8aff81SXin LI  coming from standard input.  Therefore, you must be careful that the
228be8aff81SXin LI  output file does not go into the same directory as the input
229be8aff81SXin LI  file.  Otherwise, <command>&dhpackage;</command> will delete the
230be8aff81SXin LI  input file before it generates the output file (just like running
231be8aff81SXin LI  <literal>cat &lt; file &gt; file</literal> in most shells).
232be8aff81SXin LI	  </para>
233be8aff81SXin LI	  <para>
234be8aff81SXin LI  Two structurally equivalent XML documents have a byte-for-byte
235be8aff81SXin LI  identical canonical XML representation.
236be8aff81SXin LI  Note that ignorable white space is considered significant and
237be8aff81SXin LI  is treated equivalently to data.
238be8aff81SXin LI  More on canonical XML can be found at
239be8aff81SXin LI  http://www.jclark.com/xml/canonxml.html .
240be8aff81SXin LI	  </para>
241be8aff81SXin LI        </listitem>
242be8aff81SXin LI      </varlistentry>
243be8aff81SXin LI
244be8aff81SXin LI      <varlistentry>
245cc68614dSXin LI        <term><option>-e</option> <replaceable>encoding</replaceable></term>
246be8aff81SXin LI        <listitem>
247be8aff81SXin LI		<para>
248be8aff81SXin LI   Specifies the character encoding for the document, overriding
249be8aff81SXin LI   any document encoding declaration.  <command>&dhpackage;</command>
250be8aff81SXin LI   supports four built-in encodings:
251be8aff81SXin LI   	<literal>US-ASCII</literal>,
252be8aff81SXin LI	<literal>UTF-8</literal>,
253be8aff81SXin LI	<literal>UTF-16</literal>, and
254be8aff81SXin LI	<literal>ISO-8859-1</literal>.
255be8aff81SXin LI   Also see the <option>-w</option> option.
256be8aff81SXin LI	   </para>
257be8aff81SXin LI        </listitem>
258be8aff81SXin LI      </varlistentry>
259be8aff81SXin LI
260be8aff81SXin LI      <varlistentry>
2614543ef51SXin LI        <term><option>-g</option> <replaceable>bytes</replaceable></term>
2624543ef51SXin LI        <listitem>
2634543ef51SXin LI          <para>
2644543ef51SXin LI            Sets the buffer size to request per call pair to
2654543ef51SXin LI            <function>XML_GetBuffer</function> and <function>read</function>
2664543ef51SXin LI            (default: 8 KiB).
2674543ef51SXin LI          </para>
2684543ef51SXin LI        </listitem>
2694543ef51SXin LI      </varlistentry>
2704543ef51SXin LI
2714543ef51SXin LI      <varlistentry>
2724543ef51SXin LI        <term><option>-h</option></term>
2734543ef51SXin LI        <term><option>--help</option></term>
2744543ef51SXin LI        <listitem>
2754543ef51SXin LI          <para>
2764543ef51SXin LI            Prints short usage information on command <command>&dhpackage;</command>,
2774543ef51SXin LI            and then exits.
2784543ef51SXin LI            Similar to this man page but more concise.
2794543ef51SXin LI          </para>
2804543ef51SXin LI        </listitem>
2814543ef51SXin LI      </varlistentry>
2824543ef51SXin LI
2834543ef51SXin LI      <varlistentry>
284cc68614dSXin LI        <term><option>-k</option></term>
285cc68614dSXin LI        <listitem>
286cc68614dSXin LI          <para>
287cc68614dSXin LI            When processing multiple files, <command>&dhpackage;</command>
288cc68614dSXin LI            by default halts after the the first file with an error.
289cc68614dSXin LI            This tells <command>&dhpackage;</command> to report the error
290cc68614dSXin LI            but to keep processing.
291cc68614dSXin LI            This can be useful, for example, when testing a filter that converts
292cc68614dSXin LI            many files to XML and you want to quickly find out which conversions
293cc68614dSXin LI            failed.
294cc68614dSXin LI          </para>
295cc68614dSXin LI        </listitem>
296cc68614dSXin LI      </varlistentry>
297cc68614dSXin LI
298cc68614dSXin LI      <varlistentry>
299be8aff81SXin LI        <term><option>-m</option></term>
300be8aff81SXin LI        <listitem>
301be8aff81SXin LI		<para>
302be8aff81SXin LI  Outputs some strange sort of XML file that completely
303be8aff81SXin LI  describes the input file, including character positions.
304be8aff81SXin LI  Requires <option>-d</option> to specify an output file.
305be8aff81SXin LI	   </para>
306be8aff81SXin LI        </listitem>
307be8aff81SXin LI      </varlistentry>
308be8aff81SXin LI
309be8aff81SXin LI      <varlistentry>
310be8aff81SXin LI        <term><option>-n</option></term>
311be8aff81SXin LI        <listitem>
312be8aff81SXin LI		<para>
313be8aff81SXin LI  Turns on namespace processing.  (describe namespaces)
314be8aff81SXin LI  <option>-c</option> disables namespaces.
315be8aff81SXin LI	   </para>
316be8aff81SXin LI        </listitem>
317be8aff81SXin LI      </varlistentry>
318be8aff81SXin LI
319be8aff81SXin LI      <varlistentry>
3200a48773fSEric van Gyzen        <term><option>-N</option></term>
3210a48773fSEric van Gyzen        <listitem>
3220a48773fSEric van Gyzen          <para>
3230a48773fSEric van Gyzen  Adds a doctype and notation declarations to canonical XML output.
3240a48773fSEric van Gyzen  This matches the example output used by the formal XML test cases.
3250a48773fSEric van Gyzen  Requires <option>-d</option> to specify an output file.
3260a48773fSEric van Gyzen          </para>
3270a48773fSEric van Gyzen        </listitem>
3280a48773fSEric van Gyzen      </varlistentry>
3290a48773fSEric van Gyzen
3300a48773fSEric van Gyzen      <varlistentry>
331be8aff81SXin LI        <term><option>-p</option></term>
332be8aff81SXin LI        <listitem>
333be8aff81SXin LI		<para>
334cc68614dSXin LI    Tells <command>&dhpackage;</command> to process external DTDs and parameter
335be8aff81SXin LI    entities.
336be8aff81SXin LI	 </para>
337be8aff81SXin LI	 <para>
338be8aff81SXin LI   Normally <command>&dhpackage;</command> never parses parameter
339be8aff81SXin LI   entities.  <option>-p</option> tells it to always parse them.
340be8aff81SXin LI   <option>-p</option> implies <option>-x</option>.
341be8aff81SXin LI	   </para>
342be8aff81SXin LI        </listitem>
343be8aff81SXin LI      </varlistentry>
344be8aff81SXin LI
345be8aff81SXin LI      <varlistentry>
3464543ef51SXin LI        <term><option>-q</option></term>
3474543ef51SXin LI        <listitem>
3484543ef51SXin LI          <para>
3494543ef51SXin LI            Disable reparse deferral, and allow quadratic parse runtime
3504543ef51SXin LI            on large tokens (default: reparse deferral enabled).
3514543ef51SXin LI          </para>
3524543ef51SXin LI        </listitem>
3534543ef51SXin LI      </varlistentry>
3544543ef51SXin LI
3554543ef51SXin LI      <varlistentry>
356be8aff81SXin LI        <term><option>-r</option></term>
357be8aff81SXin LI        <listitem>
358be8aff81SXin LI		<para>
359be8aff81SXin LI   Normally <command>&dhpackage;</command> memory-maps the XML file
360be8aff81SXin LI   before parsing; this can result in faster parsing on many
361be8aff81SXin LI   platforms.
362be8aff81SXin LI   <option>-r</option> turns off memory-mapping and uses normal file
363be8aff81SXin LI   IO calls instead.
364be8aff81SXin LI   Of course, memory-mapping is automatically turned off
365be8aff81SXin LI   when reading from standard input.
366be8aff81SXin LI	   </para>
367be8aff81SXin LI		<para>
368be8aff81SXin LI   Use of memory-mapping can cause some platforms to report
369be8aff81SXin LI   substantially higher memory usage for
370be8aff81SXin LI   <command>&dhpackage;</command>, but this appears to be a matter of
371be8aff81SXin LI   the operating system reporting memory in a strange way; there is
372be8aff81SXin LI   not a leak in <command>&dhpackage;</command>.
373be8aff81SXin LI           </para>
374be8aff81SXin LI        </listitem>
375be8aff81SXin LI      </varlistentry>
376be8aff81SXin LI
377be8aff81SXin LI      <varlistentry>
378be8aff81SXin LI        <term><option>-s</option></term>
379be8aff81SXin LI        <listitem>
380be8aff81SXin LI		<para>
381be8aff81SXin LI  Prints an error if the document is not standalone.
382be8aff81SXin LI  A document is standalone if it has no external subset and no
383be8aff81SXin LI  references to parameter entities.
384be8aff81SXin LI	   </para>
385be8aff81SXin LI        </listitem>
386be8aff81SXin LI      </varlistentry>
387be8aff81SXin LI
388be8aff81SXin LI      <varlistentry>
389be8aff81SXin LI        <term><option>-t</option></term>
390be8aff81SXin LI        <listitem>
391be8aff81SXin LI		<para>
392be8aff81SXin LI  Turns on timings.  This tells Expat to parse the entire file,
393be8aff81SXin LI  but not perform any processing.
394be8aff81SXin LI  This gives a fairly accurate idea of the raw speed of Expat itself
395be8aff81SXin LI  without client overhead.
396be8aff81SXin LI  <option>-t</option> turns off most of the output options
397be8aff81SXin LI  (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
398be8aff81SXin LI	   </para>
399be8aff81SXin LI        </listitem>
400be8aff81SXin LI      </varlistentry>
401be8aff81SXin LI
402be8aff81SXin LI      <varlistentry>
403be8aff81SXin LI        <term><option>-v</option></term>
4044543ef51SXin LI        <term><option>--version</option></term>
405be8aff81SXin LI        <listitem>
406be8aff81SXin LI		<para>
407be8aff81SXin LI  Prints the version of the Expat library being used, including some
408be8aff81SXin LI  information on the compile-time configuration of the library, and
409be8aff81SXin LI  then exits.
410be8aff81SXin LI	   </para>
411be8aff81SXin LI        </listitem>
412be8aff81SXin LI      </varlistentry>
413be8aff81SXin LI
414be8aff81SXin LI      <varlistentry>
415be8aff81SXin LI        <term><option>-w</option></term>
416be8aff81SXin LI        <listitem>
417be8aff81SXin LI		<para>
418be8aff81SXin LI  Enables support for Windows code pages.
419be8aff81SXin LI  Normally, <command>&dhpackage;</command> will throw an error if it
420be8aff81SXin LI  runs across an encoding that it is not equipped to handle itself.  With
421cc68614dSXin LI  <option>-w</option>, <command>&dhpackage;</command> will try to use a Windows code
422be8aff81SXin LI  page.  See also <option>-e</option>.
423be8aff81SXin LI	   </para>
424be8aff81SXin LI        </listitem>
425be8aff81SXin LI      </varlistentry>
426be8aff81SXin LI
427be8aff81SXin LI      <varlistentry>
428be8aff81SXin LI        <term><option>-x</option></term>
429be8aff81SXin LI        <listitem>
430be8aff81SXin LI		<para>
431be8aff81SXin LI  Turns on parsing external entities.
432be8aff81SXin LI  </para>
433be8aff81SXin LI<para>
434be8aff81SXin LI  Non-validating parsers are not required to resolve external
435be8aff81SXin LI  entities, or even expand entities at all.
436be8aff81SXin LI  Expat always expands internal entities (?),
437be8aff81SXin LI  but external entity parsing must be enabled explicitly.
438be8aff81SXin LI  </para>
439be8aff81SXin LI  <para>
440be8aff81SXin LI  External entities are simply entities that obtain their
441be8aff81SXin LI  data from outside the XML file currently being parsed.
442be8aff81SXin LI  </para>
443be8aff81SXin LI  <para>
444be8aff81SXin LI  This is an example of an internal entity:
445be8aff81SXin LI<literallayout>
446be8aff81SXin LI&lt;!ENTITY vers '1.0.2'&gt;
447be8aff81SXin LI</literallayout>
448be8aff81SXin LI  </para>
449be8aff81SXin LI  <para>
450be8aff81SXin LI  And here are some examples of external entities:
451be8aff81SXin LI
452be8aff81SXin LI<literallayout>
453be8aff81SXin LI&lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
454be8aff81SXin LI&lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
455be8aff81SXin LI</literallayout>
456be8aff81SXin LI
457be8aff81SXin LI	   </para>
458be8aff81SXin LI        </listitem>
459be8aff81SXin LI      </varlistentry>
460be8aff81SXin LI
461be8aff81SXin LI      <varlistentry>
462be8aff81SXin LI        <term><option>--</option></term>
463be8aff81SXin LI        <listitem>
464be8aff81SXin LI		<para>
465be8aff81SXin LI    (Two hyphens.)
466be8aff81SXin LI    Terminates the list of options.  This is only needed if a filename
467be8aff81SXin LI    starts with a hyphen.  For example:
468be8aff81SXin LI	   </para>
469be8aff81SXin LI<literallayout>
470be8aff81SXin LI&dhpackage; -- -myfile.xml
471be8aff81SXin LI</literallayout>
472be8aff81SXin LI		<para>
473be8aff81SXin LI    will run <command>&dhpackage;</command> on the file
474be8aff81SXin LI    <filename>-myfile.xml</filename>.
475be8aff81SXin LI	   </para>
476be8aff81SXin LI        </listitem>
477be8aff81SXin LI      </varlistentry>
478be8aff81SXin LI    </variablelist>
479be8aff81SXin LI
480be8aff81SXin LI	<para>
481be8aff81SXin LI    Older versions of <command>&dhpackage;</command> do not support
482be8aff81SXin LI    reading from standard input.
483be8aff81SXin LI	</para>
484be8aff81SXin LI  </refsect1>
485be8aff81SXin LI
486be8aff81SXin LI  <refsect1>
487be8aff81SXin LI  <title>OUTPUT</title>
488be8aff81SXin LI    <para>
489cc68614dSXin LI	<command>&dhpackage;</command> outputs nothing for files which are problem-free.
490cc68614dSXin LI        If any input file is not well-formed, or if the output for any
491cc68614dSXin LI	input file cannot be opened, <command>&dhpackage;</command> prints a single
492cc68614dSXin LI	line describing	the problem to standard output.
493cc68614dSXin LI    </para>
494cc68614dSXin LI    <para>
495cc68614dSXin LI        If the <option>-k</option> option is not provided, <command>&dhpackage;</command>
496cc68614dSXin LI	halts upon encountering a well-formedness or output-file error.
497cc68614dSXin LI	If <option>-k</option> is provided, <command>&dhpackage;</command> continues
498cc68614dSXin LI	processing the remaining input files, describing problems found with any of them.
499be8aff81SXin LI    </para>
500be8aff81SXin LI  </refsect1>
501be8aff81SXin LI
502be8aff81SXin LI  <refsect1>
503cc68614dSXin LI  <title>EXIT STATUS</title>
5044543ef51SXin LI    <para>For options <option>-v</option>|<option>--version</option> or <option>-h</option>|<option>--help</option>, <command>&dhpackage;</command> always exits with status code 0.  For other cases, the following exit status codes are returned:
505cc68614dSXin LI    <variablelist>
506cc68614dSXin LI      <varlistentry>
507cc68614dSXin LI        <term><option>0</option></term>
508cc68614dSXin LI        <listitem><para>The input files are well-formed and the output (if requested) was written successfully.</para>
509cc68614dSXin LI        </listitem>
510cc68614dSXin LI      </varlistentry>
511cc68614dSXin LI      <varlistentry>
512cc68614dSXin LI        <term><option>1</option></term>
513cc68614dSXin LI        <listitem><para>An internal error occurred.</para>
514cc68614dSXin LI        </listitem>
515cc68614dSXin LI      </varlistentry>
516cc68614dSXin LI      <varlistentry>
517cc68614dSXin LI        <term><option>2</option></term>
518cc68614dSXin LI        <listitem><para>One or more input files were not well-formed or could not be parsed.</para>
519cc68614dSXin LI        </listitem>
520cc68614dSXin LI      </varlistentry>
521cc68614dSXin LI      <varlistentry>
522cc68614dSXin LI        <term><option>3</option></term>
523cc68614dSXin LI        <listitem><para>If using the <option>-d</option> option, an error occurred opening an output file.</para>
524cc68614dSXin LI        </listitem>
525cc68614dSXin LI      </varlistentry>
526cc68614dSXin LI      <varlistentry>
527cc68614dSXin LI        <term><option>4</option></term>
528cc68614dSXin LI        <listitem><para>There was a command-line argument error in how <command>&dhpackage;</command> was invoked.</para>
529cc68614dSXin LI        </listitem>
530cc68614dSXin LI      </varlistentry>
531cc68614dSXin LI    </variablelist>
532be8aff81SXin LI	</para>
533cc68614dSXin LI  </refsect1>
534cc68614dSXin LI
535cc68614dSXin LI
536cc68614dSXin LI  <refsect1>
537cc68614dSXin LI    <title>BUGS</title>
538be8aff81SXin LI	<para>
539be8aff81SXin LI	The errors should go to standard error, not standard output.
540be8aff81SXin LI	</para>
541be8aff81SXin LI	<para>
542be8aff81SXin LI	There should be a way to get <option>-d</option> to send its
543be8aff81SXin LI	output to standard output rather than forcing the user to send
544be8aff81SXin LI	it to a file.
545be8aff81SXin LI	</para>
546be8aff81SXin LI	<para>
547be8aff81SXin LI	I have no idea why anyone would want to use the
548be8aff81SXin LI	<option>-d</option>, <option>-c</option>, and
549be8aff81SXin LI	<option>-m</option> options.  If someone could explain it to
550be8aff81SXin LI	me, I'd like to add this information to this manpage.
551be8aff81SXin LI	</para>
552be8aff81SXin LI  </refsect1>
553be8aff81SXin LI
554be8aff81SXin LI  <refsect1>
555be8aff81SXin LI    <title>SEE ALSO</title>
556be8aff81SXin LI	<para>
557be8aff81SXin LI
558be8aff81SXin LI<literallayout>
559cc68614dSXin LIThe Expat home page:                            https://libexpat.github.io/
560cc68614dSXin LIThe W3 XML 1.0 specification (fourth edition):  https://www.w3.org/TR/2006/REC-xml-20060816/
561cc68614dSXin LIBillion laughs attack:                          https://en.wikipedia.org/wiki/Billion_laughs_attack
562be8aff81SXin LI</literallayout>
563be8aff81SXin LI
564be8aff81SXin LI	</para>
565be8aff81SXin LI  </refsect1>
566be8aff81SXin LI
567be8aff81SXin LI  <refsect1>
568be8aff81SXin LI    <title>AUTHOR</title>
569be8aff81SXin LI    <para>
570cc68614dSXin LI      This manual page was originally written by &dhusername; &dhemail;
571cc68614dSXin LI      in December 2001 for
572be8aff81SXin LI      the &debian; system (but may be used by others).  Permission is
573be8aff81SXin LI      granted to copy, distribute and/or modify this document under
574be8aff81SXin LI      the terms of the <acronym>GNU</acronym> Free Documentation
575be8aff81SXin LI      License, Version 1.1.
576be8aff81SXin LI	</para>
577be8aff81SXin LI  </refsect1>
578be8aff81SXin LI</refentry>
579