xref: /freebsd/contrib/expat/doc/xmlwf.xml (revision eb69d1f144a6fcc765d1b9d44a5ae8082353e70b)
1<!DOCTYPE refentry [
2  <!-- Fill in your name for FIRSTNAME and SURNAME. -->
3  <!ENTITY dhfirstname "<firstname>Scott</firstname>">
4  <!ENTITY dhsurname   "<surname>Bronson</surname>">
5  <!-- Please adjust the date whenever revising the manpage. -->
6  <!ENTITY dhdate      "<date>March 11, 2016</date>">
7  <!-- SECTION should be 1-8, maybe w/ subsection other parameters are
8       allowed: see man(7), man(1). -->
9  <!ENTITY dhsection   "<manvolnum>1</manvolnum>">
10  <!ENTITY dhemail     "<email>bronson@rinspin.com</email>">
11  <!ENTITY dhusername  "Scott Bronson">
12  <!ENTITY dhucpackage "<refentrytitle>XMLWF</refentrytitle>">
13  <!ENTITY dhpackage   "xmlwf">
14
15  <!ENTITY debian      "<productname>Debian GNU/Linux</productname>">
16  <!ENTITY gnu         "<acronym>GNU</acronym>">
17]>
18
19<refentry>
20  <refentryinfo>
21    <address>
22      &dhemail;
23    </address>
24    <author>
25      &dhfirstname;
26      &dhsurname;
27    </author>
28    <copyright>
29      <year>2001</year>
30      <holder>&dhusername;</holder>
31    </copyright>
32    &dhdate;
33  </refentryinfo>
34  <refmeta>
35    &dhucpackage;
36
37    &dhsection;
38  </refmeta>
39  <refnamediv>
40    <refname>&dhpackage;</refname>
41
42    <refpurpose>Determines if an XML document is well-formed</refpurpose>
43  </refnamediv>
44  <refsynopsisdiv>
45    <cmdsynopsis>
46      <command>&dhpackage;</command>
47	  <arg><option>-s</option></arg>
48	  <arg><option>-n</option></arg>
49	  <arg><option>-p</option></arg>
50	  <arg><option>-x</option></arg>
51
52	  <arg><option>-e <replaceable>encoding</replaceable></option></arg>
53	  <arg><option>-w</option></arg>
54
55	  <arg><option>-d <replaceable>output-dir</replaceable></option></arg>
56	  <arg><option>-c</option></arg>
57	  <arg><option>-m</option></arg>
58
59	  <arg><option>-r</option></arg>
60	  <arg><option>-t</option></arg>
61
62	  <arg><option>-v</option></arg>
63
64	  <arg>file ...</arg>
65    </cmdsynopsis>
66  </refsynopsisdiv>
67
68  <refsect1>
69    <title>DESCRIPTION</title>
70
71    <para>
72	<command>&dhpackage;</command> uses the Expat library to
73	determine if an XML document is well-formed.  It is
74	non-validating.
75	</para>
76
77	<para>
78	If you do not specify any files on the command-line, and you
79	have a recent version of <command>&dhpackage;</command>, the
80	input file will be read from standard input.
81	</para>
82
83  </refsect1>
84
85  <refsect1>
86    <title>WELL-FORMED DOCUMENTS</title>
87
88	<para>
89	  A well-formed document must adhere to the
90	  following rules:
91	</para>
92
93	<itemizedlist>
94      <listitem><para>
95	    The file begins with an XML declaration.  For instance,
96		<literal>&lt;?xml version="1.0" standalone="yes"?&gt;</literal>.
97		<emphasis>NOTE:</emphasis>
98		<command>&dhpackage;</command> does not currently
99		check for a valid XML declaration.
100      </para></listitem>
101      <listitem><para>
102		Every start tag is either empty (&lt;tag/&gt;)
103		or has a corresponding end tag.
104      </para></listitem>
105      <listitem><para>
106	    There is exactly one root element.  This element must contain
107		all other elements in the document.  Only comments, white
108		space, and processing instructions may come after the close
109		of the root element.
110      </para></listitem>
111      <listitem><para>
112		All elements nest properly.
113      </para></listitem>
114      <listitem><para>
115		All attribute values are enclosed in quotes (either single
116		or double).
117      </para></listitem>
118    </itemizedlist>
119
120	<para>
121	  If the document has a DTD, and it strictly complies with that
122	  DTD, then the document is also considered <emphasis>valid</emphasis>.
123	  <command>&dhpackage;</command> is a non-validating parser --
124	  it does not check the DTD.  However, it does support
125	  external entities (see the <option>-x</option> option).
126	</para>
127  </refsect1>
128
129  <refsect1>
130    <title>OPTIONS</title>
131
132<para>
133When an option includes an argument, you may specify the argument either
134separately ("<option>-d</option> output") or concatenated with the
135option ("<option>-d</option>output").  <command>&dhpackage;</command>
136supports both.
137</para>
138
139    <variablelist>
140
141      <varlistentry>
142        <term><option>-c</option></term>
143        <listitem>
144		<para>
145  If the input file is well-formed and <command>&dhpackage;</command>
146  doesn't encounter any errors, the input file is simply copied to
147  the output directory unchanged.
148  This implies no namespaces (turns off <option>-n</option>) and
149  requires <option>-d</option> to specify an output file.
150  		</para>
151        </listitem>
152      </varlistentry>
153
154      <varlistentry>
155        <term><option>-d output-dir</option></term>
156        <listitem>
157		<para>
158  Specifies a directory to contain transformed
159  representations of the input files.
160  By default, <option>-d</option> outputs a canonical representation
161  (described below).
162  You can select different output formats using <option>-c</option>
163  and <option>-m</option>.
164	  </para>
165	  <para>
166  The output filenames will
167  be exactly the same as the input filenames or "STDIN" if the input is
168  coming from standard input.  Therefore, you must be careful that the
169  output file does not go into the same directory as the input
170  file.  Otherwise, <command>&dhpackage;</command> will delete the
171  input file before it generates the output file (just like running
172  <literal>cat &lt; file &gt; file</literal> in most shells).
173	  </para>
174	  <para>
175  Two structurally equivalent XML documents have a byte-for-byte
176  identical canonical XML representation.
177  Note that ignorable white space is considered significant and
178  is treated equivalently to data.
179  More on canonical XML can be found at
180  http://www.jclark.com/xml/canonxml.html .
181	  </para>
182        </listitem>
183      </varlistentry>
184
185      <varlistentry>
186        <term><option>-e encoding</option></term>
187        <listitem>
188		<para>
189   Specifies the character encoding for the document, overriding
190   any document encoding declaration.  <command>&dhpackage;</command>
191   supports four built-in encodings:
192   	<literal>US-ASCII</literal>,
193	<literal>UTF-8</literal>,
194	<literal>UTF-16</literal>, and
195	<literal>ISO-8859-1</literal>.
196   Also see the <option>-w</option> option.
197	   </para>
198        </listitem>
199      </varlistentry>
200
201      <varlistentry>
202        <term><option>-m</option></term>
203        <listitem>
204		<para>
205  Outputs some strange sort of XML file that completely
206  describes the input file, including character positions.
207  Requires <option>-d</option> to specify an output file.
208	   </para>
209        </listitem>
210      </varlistentry>
211
212      <varlistentry>
213        <term><option>-n</option></term>
214        <listitem>
215		<para>
216  Turns on namespace processing.  (describe namespaces)
217  <option>-c</option> disables namespaces.
218	   </para>
219        </listitem>
220      </varlistentry>
221
222      <varlistentry>
223        <term><option>-p</option></term>
224        <listitem>
225		<para>
226    Tells xmlwf to process external DTDs and parameter
227    entities.
228	 </para>
229	 <para>
230   Normally <command>&dhpackage;</command> never parses parameter
231   entities.  <option>-p</option> tells it to always parse them.
232   <option>-p</option> implies <option>-x</option>.
233	   </para>
234        </listitem>
235      </varlistentry>
236
237      <varlistentry>
238        <term><option>-r</option></term>
239        <listitem>
240		<para>
241   Normally <command>&dhpackage;</command> memory-maps the XML file
242   before parsing; this can result in faster parsing on many
243   platforms.
244   <option>-r</option> turns off memory-mapping and uses normal file
245   IO calls instead.
246   Of course, memory-mapping is automatically turned off
247   when reading from standard input.
248	   </para>
249		<para>
250   Use of memory-mapping can cause some platforms to report
251   substantially higher memory usage for
252   <command>&dhpackage;</command>, but this appears to be a matter of
253   the operating system reporting memory in a strange way; there is
254   not a leak in <command>&dhpackage;</command>.
255           </para>
256        </listitem>
257      </varlistentry>
258
259      <varlistentry>
260        <term><option>-s</option></term>
261        <listitem>
262		<para>
263  Prints an error if the document is not standalone.
264  A document is standalone if it has no external subset and no
265  references to parameter entities.
266	   </para>
267        </listitem>
268      </varlistentry>
269
270      <varlistentry>
271        <term><option>-t</option></term>
272        <listitem>
273		<para>
274  Turns on timings.  This tells Expat to parse the entire file,
275  but not perform any processing.
276  This gives a fairly accurate idea of the raw speed of Expat itself
277  without client overhead.
278  <option>-t</option> turns off most of the output options
279  (<option>-d</option>, <option>-m</option>, <option>-c</option>, ...).
280	   </para>
281        </listitem>
282      </varlistentry>
283
284      <varlistentry>
285        <term><option>-v</option></term>
286        <listitem>
287		<para>
288  Prints the version of the Expat library being used, including some
289  information on the compile-time configuration of the library, and
290  then exits.
291	   </para>
292        </listitem>
293      </varlistentry>
294
295      <varlistentry>
296        <term><option>-w</option></term>
297        <listitem>
298		<para>
299  Enables support for Windows code pages.
300  Normally, <command>&dhpackage;</command> will throw an error if it
301  runs across an encoding that it is not equipped to handle itself.  With
302  <option>-w</option>, &dhpackage; will try to use a Windows code
303  page.  See also <option>-e</option>.
304	   </para>
305        </listitem>
306      </varlistentry>
307
308      <varlistentry>
309        <term><option>-x</option></term>
310        <listitem>
311		<para>
312  Turns on parsing external entities.
313  </para>
314<para>
315  Non-validating parsers are not required to resolve external
316  entities, or even expand entities at all.
317  Expat always expands internal entities (?),
318  but external entity parsing must be enabled explicitly.
319  </para>
320  <para>
321  External entities are simply entities that obtain their
322  data from outside the XML file currently being parsed.
323  </para>
324  <para>
325  This is an example of an internal entity:
326<literallayout>
327&lt;!ENTITY vers '1.0.2'&gt;
328</literallayout>
329  </para>
330  <para>
331  And here are some examples of external entities:
332
333<literallayout>
334&lt;!ENTITY header SYSTEM "header-&amp;vers;.xml"&gt;  (parsed)
335&lt;!ENTITY logo SYSTEM "logo.png" PNG&gt;         (unparsed)
336</literallayout>
337
338	   </para>
339        </listitem>
340      </varlistentry>
341
342      <varlistentry>
343        <term><option>--</option></term>
344        <listitem>
345		<para>
346    (Two hyphens.)
347    Terminates the list of options.  This is only needed if a filename
348    starts with a hyphen.  For example:
349	   </para>
350<literallayout>
351&dhpackage; -- -myfile.xml
352</literallayout>
353		<para>
354    will run <command>&dhpackage;</command> on the file
355    <filename>-myfile.xml</filename>.
356	   </para>
357        </listitem>
358      </varlistentry>
359    </variablelist>
360
361	<para>
362    Older versions of <command>&dhpackage;</command> do not support
363    reading from standard input.
364	</para>
365  </refsect1>
366
367  <refsect1>
368  <title>OUTPUT</title>
369    <para>
370	If an input file is not well-formed,
371	<command>&dhpackage;</command> prints a single line describing
372	the problem to standard output.  If a file is well formed,
373	<command>&dhpackage;</command> outputs nothing.
374	Note that the result code is <emphasis>not</emphasis> set.
375	</para>
376  </refsect1>
377
378  <refsect1>
379    <title>BUGS</title>
380	<para>
381	<command>&dhpackage;</command> returns a 0 - noerr result,
382	even if the file is not well-formed.  There is no good way for
383	a program to use <command>&dhpackage;</command> to quickly
384	check a file -- it must parse <command>&dhpackage;</command>'s
385	standard output.
386	</para>
387	<para>
388	The errors should go to standard error, not standard output.
389	</para>
390	<para>
391	There should be a way to get <option>-d</option> to send its
392	output to standard output rather than forcing the user to send
393	it to a file.
394	</para>
395	<para>
396	I have no idea why anyone would want to use the
397	<option>-d</option>, <option>-c</option>, and
398	<option>-m</option> options.  If someone could explain it to
399	me, I'd like to add this information to this manpage.
400	</para>
401  </refsect1>
402
403  <refsect1>
404    <title>ALTERNATIVES</title>
405	<para>
406	  Here are some XML validators on the web:
407
408<literallayout>
409http://www.hcrc.ed.ac.uk/~richard/xml-check.html
410http://www.stg.brown.edu/service/xmlvalid/
411http://www.scripting.com/frontier5/xml/code/xmlValidator.html
412http://www.xml.com/pub/a/tools/ruwf/check.html
413</literallayout>
414
415		 </para>
416  </refsect1>
417
418  <refsect1>
419    <title>SEE ALSO</title>
420	<para>
421
422<literallayout>
423The Expat home page:        http://www.libexpat.org/
424The W3 XML specification:   http://www.w3.org/TR/REC-xml
425</literallayout>
426
427	</para>
428  </refsect1>
429
430  <refsect1>
431    <title>AUTHOR</title>
432    <para>
433	  This manual page was written by &dhusername; &dhemail; for
434      the &debian; system (but may be used by others).  Permission is
435      granted to copy, distribute and/or modify this document under
436      the terms of the <acronym>GNU</acronym> Free Documentation
437      License, Version 1.1.
438	</para>
439  </refsect1>
440</refentry>
441