xref: /freebsd/contrib/mandoc/mandoc.3 (revision 7648bc9fee8dec6cb3c4941e0165a930fbe8dcb0)
1*7295610fSBaptiste Daroussin.\"	$Id: mandoc.3,v 1.44 2018/12/30 00:49:55 schwarze Exp $
261d06d6bSBaptiste Daroussin.\"
361d06d6bSBaptiste Daroussin.\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
461d06d6bSBaptiste Daroussin.\" Copyright (c) 2010-2017 Ingo Schwarze <schwarze@openbsd.org>
561d06d6bSBaptiste Daroussin.\"
661d06d6bSBaptiste Daroussin.\" Permission to use, copy, modify, and distribute this software for any
761d06d6bSBaptiste Daroussin.\" purpose with or without fee is hereby granted, provided that the above
861d06d6bSBaptiste Daroussin.\" copyright notice and this permission notice appear in all copies.
961d06d6bSBaptiste Daroussin.\"
1061d06d6bSBaptiste Daroussin.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1161d06d6bSBaptiste Daroussin.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1261d06d6bSBaptiste Daroussin.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1361d06d6bSBaptiste Daroussin.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1461d06d6bSBaptiste Daroussin.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1561d06d6bSBaptiste Daroussin.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1661d06d6bSBaptiste Daroussin.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1761d06d6bSBaptiste Daroussin.\"
18*7295610fSBaptiste Daroussin.Dd $Mdocdate: December 30 2018 $
1961d06d6bSBaptiste Daroussin.Dt MANDOC 3
2061d06d6bSBaptiste Daroussin.Os
2161d06d6bSBaptiste Daroussin.Sh NAME
2261d06d6bSBaptiste Daroussin.Nm mandoc ,
2361d06d6bSBaptiste Daroussin.Nm deroff ,
2461d06d6bSBaptiste Daroussin.Nm mparse_alloc ,
25*7295610fSBaptiste Daroussin.Nm mparse_copy ,
2661d06d6bSBaptiste Daroussin.Nm mparse_free ,
2761d06d6bSBaptiste Daroussin.Nm mparse_open ,
2861d06d6bSBaptiste Daroussin.Nm mparse_readfd ,
2961d06d6bSBaptiste Daroussin.Nm mparse_reset ,
30*7295610fSBaptiste Daroussin.Nm mparse_result
3161d06d6bSBaptiste Daroussin.Nd mandoc macro compiler library
3261d06d6bSBaptiste Daroussin.Sh SYNOPSIS
3361d06d6bSBaptiste Daroussin.In sys/types.h
34*7295610fSBaptiste Daroussin.In stdio.h
3561d06d6bSBaptiste Daroussin.In mandoc.h
3661d06d6bSBaptiste Daroussin.Pp
3761d06d6bSBaptiste Daroussin.Fd "#define ASCII_NBRSP"
3861d06d6bSBaptiste Daroussin.Fd "#define ASCII_HYPH"
3961d06d6bSBaptiste Daroussin.Fd "#define ASCII_BREAK"
4061d06d6bSBaptiste Daroussin.Ft struct mparse *
4161d06d6bSBaptiste Daroussin.Fo mparse_alloc
4261d06d6bSBaptiste Daroussin.Fa "int options"
4361d06d6bSBaptiste Daroussin.Fa "enum mandoc_os oe_e"
4461d06d6bSBaptiste Daroussin.Fa "char *os_s"
4561d06d6bSBaptiste Daroussin.Fc
4661d06d6bSBaptiste Daroussin.Ft void
4761d06d6bSBaptiste Daroussin.Fo mparse_free
4861d06d6bSBaptiste Daroussin.Fa "struct mparse *parse"
4961d06d6bSBaptiste Daroussin.Fc
5061d06d6bSBaptiste Daroussin.Ft void
51*7295610fSBaptiste Daroussin.Fo mparse_copy
52*7295610fSBaptiste Daroussin.Fa "const struct mparse *parse"
5361d06d6bSBaptiste Daroussin.Fc
5461d06d6bSBaptiste Daroussin.Ft int
5561d06d6bSBaptiste Daroussin.Fo mparse_open
5661d06d6bSBaptiste Daroussin.Fa "struct mparse *parse"
5761d06d6bSBaptiste Daroussin.Fa "const char *fname"
5861d06d6bSBaptiste Daroussin.Fc
59*7295610fSBaptiste Daroussin.Ft void
6061d06d6bSBaptiste Daroussin.Fo mparse_readfd
6161d06d6bSBaptiste Daroussin.Fa "struct mparse *parse"
6261d06d6bSBaptiste Daroussin.Fa "int fd"
6361d06d6bSBaptiste Daroussin.Fa "const char *fname"
6461d06d6bSBaptiste Daroussin.Fc
6561d06d6bSBaptiste Daroussin.Ft void
6661d06d6bSBaptiste Daroussin.Fo mparse_reset
6761d06d6bSBaptiste Daroussin.Fa "struct mparse *parse"
6861d06d6bSBaptiste Daroussin.Fc
69*7295610fSBaptiste Daroussin.Ft struct roff_meta *
7061d06d6bSBaptiste Daroussin.Fo mparse_result
7161d06d6bSBaptiste Daroussin.Fa "struct mparse *parse"
7261d06d6bSBaptiste Daroussin.Fc
7361d06d6bSBaptiste Daroussin.In roff.h
7461d06d6bSBaptiste Daroussin.Ft void
7561d06d6bSBaptiste Daroussin.Fo deroff
7661d06d6bSBaptiste Daroussin.Fa "char **dest"
7761d06d6bSBaptiste Daroussin.Fa "const struct roff_node *node"
7861d06d6bSBaptiste Daroussin.Fc
7961d06d6bSBaptiste Daroussin.In sys/types.h
8061d06d6bSBaptiste Daroussin.In mandoc.h
8161d06d6bSBaptiste Daroussin.In mdoc.h
8261d06d6bSBaptiste Daroussin.Vt extern const char * const * mdoc_argnames;
8361d06d6bSBaptiste Daroussin.Vt extern const char * const * mdoc_macronames;
8461d06d6bSBaptiste Daroussin.In sys/types.h
8561d06d6bSBaptiste Daroussin.In mandoc.h
8661d06d6bSBaptiste Daroussin.In man.h
8761d06d6bSBaptiste Daroussin.Vt extern const char * const * man_macronames;
8861d06d6bSBaptiste Daroussin.Sh DESCRIPTION
8961d06d6bSBaptiste DaroussinThe
9061d06d6bSBaptiste Daroussin.Nm mandoc
9161d06d6bSBaptiste Daroussinlibrary parses a
9261d06d6bSBaptiste Daroussin.Ux
9361d06d6bSBaptiste Daroussinmanual into an abstract syntax tree (AST).
9461d06d6bSBaptiste Daroussin.Ux
9561d06d6bSBaptiste Daroussinmanuals are composed of
9661d06d6bSBaptiste Daroussin.Xr mdoc 7
9761d06d6bSBaptiste Daroussinor
9861d06d6bSBaptiste Daroussin.Xr man 7 ,
9961d06d6bSBaptiste Daroussinand may be mixed with
10061d06d6bSBaptiste Daroussin.Xr roff 7 ,
10161d06d6bSBaptiste Daroussin.Xr tbl 7 ,
10261d06d6bSBaptiste Daroussinand
10361d06d6bSBaptiste Daroussin.Xr eqn 7
10461d06d6bSBaptiste Daroussininvocations.
10561d06d6bSBaptiste Daroussin.Pp
10661d06d6bSBaptiste DaroussinThe following describes a general parse sequence:
10761d06d6bSBaptiste Daroussin.Bl -enum
10861d06d6bSBaptiste Daroussin.It
10961d06d6bSBaptiste Daroussininitiate a parsing sequence with
11061d06d6bSBaptiste Daroussin.Xr mchars_alloc 3
11161d06d6bSBaptiste Daroussinand
11261d06d6bSBaptiste Daroussin.Fn mparse_alloc ;
11361d06d6bSBaptiste Daroussin.It
11461d06d6bSBaptiste Daroussinopen a file with
11561d06d6bSBaptiste Daroussin.Xr open 2
11661d06d6bSBaptiste Daroussinor
11761d06d6bSBaptiste Daroussin.Fn mparse_open ;
11861d06d6bSBaptiste Daroussin.It
11961d06d6bSBaptiste Daroussinparse it with
12061d06d6bSBaptiste Daroussin.Fn mparse_readfd ;
12161d06d6bSBaptiste Daroussin.It
12261d06d6bSBaptiste Daroussinclose it with
12361d06d6bSBaptiste Daroussin.Xr close 2 ;
12461d06d6bSBaptiste Daroussin.It
12561d06d6bSBaptiste Daroussinretrieve the syntax tree with
12661d06d6bSBaptiste Daroussin.Fn mparse_result ;
12761d06d6bSBaptiste Daroussin.It
12861d06d6bSBaptiste Daroussinif information about the validity of the input is needed, fetch it with
12961d06d6bSBaptiste Daroussin.Fn mparse_updaterc ;
13061d06d6bSBaptiste Daroussin.It
13161d06d6bSBaptiste Daroussiniterate over parse nodes with starting from the
13261d06d6bSBaptiste Daroussin.Fa first
13361d06d6bSBaptiste Daroussinmember of the returned
134*7295610fSBaptiste Daroussin.Vt struct roff_meta ;
13561d06d6bSBaptiste Daroussin.It
13661d06d6bSBaptiste Daroussinfree all allocated memory with
13761d06d6bSBaptiste Daroussin.Fn mparse_free
13861d06d6bSBaptiste Daroussinand
13961d06d6bSBaptiste Daroussin.Xr mchars_free 3 ,
14061d06d6bSBaptiste Daroussinor invoke
14161d06d6bSBaptiste Daroussin.Fn mparse_reset
14261d06d6bSBaptiste Daroussinand go back to step 2 to parse new files.
14361d06d6bSBaptiste Daroussin.El
14461d06d6bSBaptiste Daroussin.Sh REFERENCE
14561d06d6bSBaptiste DaroussinThis section documents the functions, types, and variables available
14661d06d6bSBaptiste Daroussinvia
14761d06d6bSBaptiste Daroussin.In mandoc.h ,
14861d06d6bSBaptiste Daroussinwith the exception of those documented in
14961d06d6bSBaptiste Daroussin.Xr mandoc_escape 3
15061d06d6bSBaptiste Daroussinand
15161d06d6bSBaptiste Daroussin.Xr mchars_alloc 3 .
15261d06d6bSBaptiste Daroussin.Ss Types
15361d06d6bSBaptiste Daroussin.Bl -ohang
15461d06d6bSBaptiste Daroussin.It Vt "enum mandocerr"
15561d06d6bSBaptiste DaroussinAn error or warning message during parsing.
15661d06d6bSBaptiste Daroussin.It Vt "enum mandoclevel"
15761d06d6bSBaptiste DaroussinA classification of an
15861d06d6bSBaptiste Daroussin.Vt "enum mandocerr"
15961d06d6bSBaptiste Daroussinas regards system operation.
16061d06d6bSBaptiste DaroussinSee the DIAGNOSTICS section in
16161d06d6bSBaptiste Daroussin.Xr mandoc 1
16261d06d6bSBaptiste Daroussinregarding the meanings of the levels.
16361d06d6bSBaptiste Daroussin.It Vt "struct mparse"
16461d06d6bSBaptiste DaroussinAn opaque pointer to a running parse sequence.
16561d06d6bSBaptiste DaroussinCreated with
16661d06d6bSBaptiste Daroussin.Fn mparse_alloc
16761d06d6bSBaptiste Daroussinand freed with
16861d06d6bSBaptiste Daroussin.Fn mparse_free .
16961d06d6bSBaptiste DaroussinThis may be used across parsed input if
17061d06d6bSBaptiste Daroussin.Fn mparse_reset
17161d06d6bSBaptiste Daroussinis called between parses.
17261d06d6bSBaptiste Daroussin.El
17361d06d6bSBaptiste Daroussin.Ss Functions
17461d06d6bSBaptiste Daroussin.Bl -ohang
17561d06d6bSBaptiste Daroussin.It Fn deroff
17661d06d6bSBaptiste DaroussinObtain a text-only representation of a
17761d06d6bSBaptiste Daroussin.Vt struct roff_node ,
17861d06d6bSBaptiste Daroussinincluding text contained in its child nodes.
17961d06d6bSBaptiste DaroussinTo be used on children of the
18061d06d6bSBaptiste Daroussin.Fa first
18161d06d6bSBaptiste Daroussinmember of
182*7295610fSBaptiste Daroussin.Vt struct roff_meta .
18361d06d6bSBaptiste DaroussinWhen it is no longer needed, the pointer returned from
18461d06d6bSBaptiste Daroussin.Fn deroff
18561d06d6bSBaptiste Daroussincan be passed to
18661d06d6bSBaptiste Daroussin.Xr free 3 .
18761d06d6bSBaptiste Daroussin.It Fn mparse_alloc
18861d06d6bSBaptiste DaroussinAllocate a parser.
18961d06d6bSBaptiste DaroussinThe arguments have the following effect:
19061d06d6bSBaptiste Daroussin.Bl -tag -offset 5n -width inttype
19161d06d6bSBaptiste Daroussin.It Ar options
19261d06d6bSBaptiste DaroussinWhen the
19361d06d6bSBaptiste Daroussin.Dv MPARSE_MDOC
19461d06d6bSBaptiste Daroussinor
19561d06d6bSBaptiste Daroussin.Dv MPARSE_MAN
19661d06d6bSBaptiste Daroussinbit is set, only that parser is used.
19761d06d6bSBaptiste DaroussinOtherwise, the document type is automatically detected.
19861d06d6bSBaptiste Daroussin.Pp
19961d06d6bSBaptiste DaroussinWhen the
20061d06d6bSBaptiste Daroussin.Dv MPARSE_SO
20161d06d6bSBaptiste Daroussinbit is set,
20261d06d6bSBaptiste Daroussin.Xr roff 7
20361d06d6bSBaptiste Daroussin.Ic \&so
20461d06d6bSBaptiste Daroussinfile inclusion requests are always honoured.
20561d06d6bSBaptiste DaroussinOtherwise, if the request is the only content in an input file,
20661d06d6bSBaptiste Daroussinonly the file name is remembered, to be returned in the
20761d06d6bSBaptiste Daroussin.Fa sodest
208*7295610fSBaptiste Daroussinfield of
209*7295610fSBaptiste Daroussin.Vt struct roff_meta .
21061d06d6bSBaptiste Daroussin.Pp
21161d06d6bSBaptiste DaroussinWhen the
21261d06d6bSBaptiste Daroussin.Dv MPARSE_QUICK
21361d06d6bSBaptiste Daroussinbit is set, parsing is aborted after the NAME section.
21461d06d6bSBaptiste DaroussinThis is for example useful in
21561d06d6bSBaptiste Daroussin.Xr makewhatis 8
21661d06d6bSBaptiste Daroussin.Fl Q
21761d06d6bSBaptiste Daroussinto quickly build minimal databases.
218*7295610fSBaptiste Daroussin.Pp
219*7295610fSBaptiste DaroussinWhen the
220*7295610fSBaptiste Daroussin.Dv MARSE_VALIDATE
221*7295610fSBaptiste Daroussinbit is set,
222*7295610fSBaptiste Daroussin.Fn mparse_result
223*7295610fSBaptiste Daroussinruns the validation functions before returning the syntax tree.
224*7295610fSBaptiste DaroussinThis is almost always required, except in certain debugging scenarios,
225*7295610fSBaptiste Daroussinfor example to dump unvalidated syntax trees.
22661d06d6bSBaptiste Daroussin.It Ar os_e
22761d06d6bSBaptiste DaroussinOperating system to check base system conventions for.
22861d06d6bSBaptiste DaroussinIf
22961d06d6bSBaptiste Daroussin.Dv MANDOC_OS_OTHER ,
23061d06d6bSBaptiste Daroussinthe system is automatically detected from
23161d06d6bSBaptiste Daroussin.Ic \&Os ,
23261d06d6bSBaptiste Daroussin.Fl Ios ,
23361d06d6bSBaptiste Daroussinor
23461d06d6bSBaptiste Daroussin.Xr uname 3 .
23561d06d6bSBaptiste Daroussin.It Ar os_s
23661d06d6bSBaptiste DaroussinA default string for the
23761d06d6bSBaptiste Daroussin.Xr mdoc 7
23861d06d6bSBaptiste Daroussin.Ic \&Os
23961d06d6bSBaptiste Daroussinmacro, overriding the
24061d06d6bSBaptiste Daroussin.Dv OSNAME
24161d06d6bSBaptiste Daroussinpreprocessor definition and the results of
24261d06d6bSBaptiste Daroussin.Xr uname 3 .
24361d06d6bSBaptiste DaroussinPassing
24461d06d6bSBaptiste Daroussin.Dv NULL
24561d06d6bSBaptiste Daroussinsets no default.
24661d06d6bSBaptiste Daroussin.El
24761d06d6bSBaptiste Daroussin.Pp
24861d06d6bSBaptiste DaroussinThe same parser may be used for multiple files so long as
24961d06d6bSBaptiste Daroussin.Fn mparse_reset
25061d06d6bSBaptiste Daroussinis called between parses.
25161d06d6bSBaptiste Daroussin.Fn mparse_free
25261d06d6bSBaptiste Daroussinmust be called to free the memory allocated by this function.
25361d06d6bSBaptiste DaroussinDeclared in
25461d06d6bSBaptiste Daroussin.In mandoc.h ,
25561d06d6bSBaptiste Daroussinimplemented in
25661d06d6bSBaptiste Daroussin.Pa read.c .
25761d06d6bSBaptiste Daroussin.It Fn mparse_free
25861d06d6bSBaptiste DaroussinFree all memory allocated by
25961d06d6bSBaptiste Daroussin.Fn mparse_alloc .
26061d06d6bSBaptiste DaroussinDeclared in
26161d06d6bSBaptiste Daroussin.In mandoc.h ,
26261d06d6bSBaptiste Daroussinimplemented in
26361d06d6bSBaptiste Daroussin.Pa read.c .
264*7295610fSBaptiste Daroussin.It Fn mparse_copy
265*7295610fSBaptiste DaroussinDump a copy of the input to the standard output; used for
266*7295610fSBaptiste Daroussin.Fl man T Ns Cm man .
26761d06d6bSBaptiste DaroussinDeclared in
26861d06d6bSBaptiste Daroussin.In mandoc.h ,
26961d06d6bSBaptiste Daroussinimplemented in
27061d06d6bSBaptiste Daroussin.Pa read.c .
27161d06d6bSBaptiste Daroussin.It Fn mparse_open
27261d06d6bSBaptiste DaroussinOpen the file for reading.
27361d06d6bSBaptiste DaroussinIf that fails and
27461d06d6bSBaptiste Daroussin.Fa fname
27561d06d6bSBaptiste Daroussindoes not already end in
27661d06d6bSBaptiste Daroussin.Ql .gz ,
27761d06d6bSBaptiste Daroussintry again after appending
27861d06d6bSBaptiste Daroussin.Ql .gz .
27961d06d6bSBaptiste DaroussinSave the information whether the file is zipped or not.
28061d06d6bSBaptiste DaroussinReturn a file descriptor open for reading or -1 on failure.
28161d06d6bSBaptiste DaroussinIt can be passed to
28261d06d6bSBaptiste Daroussin.Fn mparse_readfd
28361d06d6bSBaptiste Daroussinor used directly.
28461d06d6bSBaptiste DaroussinDeclared in
28561d06d6bSBaptiste Daroussin.In mandoc.h ,
28661d06d6bSBaptiste Daroussinimplemented in
28761d06d6bSBaptiste Daroussin.Pa read.c .
28861d06d6bSBaptiste Daroussin.It Fn mparse_readfd
28961d06d6bSBaptiste DaroussinParse a file descriptor opened with
29061d06d6bSBaptiste Daroussin.Xr open 2
29161d06d6bSBaptiste Daroussinor
29261d06d6bSBaptiste Daroussin.Fn mparse_open .
29361d06d6bSBaptiste DaroussinPass the associated filename in
29461d06d6bSBaptiste Daroussin.Va fname .
29561d06d6bSBaptiste DaroussinThis function may be called multiple times with different parameters; however,
29661d06d6bSBaptiste Daroussin.Xr close 2
29761d06d6bSBaptiste Daroussinand
29861d06d6bSBaptiste Daroussin.Fn mparse_reset
29961d06d6bSBaptiste Daroussinshould be invoked between parses.
30061d06d6bSBaptiste DaroussinDeclared in
30161d06d6bSBaptiste Daroussin.In mandoc.h ,
30261d06d6bSBaptiste Daroussinimplemented in
30361d06d6bSBaptiste Daroussin.Pa read.c .
30461d06d6bSBaptiste Daroussin.It Fn mparse_reset
30561d06d6bSBaptiste DaroussinReset a parser so that
30661d06d6bSBaptiste Daroussin.Fn mparse_readfd
30761d06d6bSBaptiste Daroussinmay be used again.
30861d06d6bSBaptiste DaroussinDeclared in
30961d06d6bSBaptiste Daroussin.In mandoc.h ,
31061d06d6bSBaptiste Daroussinimplemented in
31161d06d6bSBaptiste Daroussin.Pa read.c .
31261d06d6bSBaptiste Daroussin.It Fn mparse_result
31361d06d6bSBaptiste DaroussinObtain the result of a parse.
31461d06d6bSBaptiste DaroussinDeclared in
31561d06d6bSBaptiste Daroussin.In mandoc.h ,
31661d06d6bSBaptiste Daroussinimplemented in
31761d06d6bSBaptiste Daroussin.Pa read.c .
31861d06d6bSBaptiste Daroussin.El
31961d06d6bSBaptiste Daroussin.Ss Variables
32061d06d6bSBaptiste Daroussin.Bl -ohang
32161d06d6bSBaptiste Daroussin.It Va man_macronames
32261d06d6bSBaptiste DaroussinThe string representation of a
32361d06d6bSBaptiste Daroussin.Xr man 7
32461d06d6bSBaptiste Daroussinmacro as indexed by
32561d06d6bSBaptiste Daroussin.Vt "enum mant" .
32661d06d6bSBaptiste Daroussin.It Va mdoc_argnames
32761d06d6bSBaptiste DaroussinThe string representation of an
32861d06d6bSBaptiste Daroussin.Xr mdoc 7
32961d06d6bSBaptiste Daroussinmacro argument as indexed by
33061d06d6bSBaptiste Daroussin.Vt "enum mdocargt" .
33161d06d6bSBaptiste Daroussin.It Va mdoc_macronames
33261d06d6bSBaptiste DaroussinThe string representation of an
33361d06d6bSBaptiste Daroussin.Xr mdoc 7
33461d06d6bSBaptiste Daroussinmacro as indexed by
33561d06d6bSBaptiste Daroussin.Vt "enum mdoct" .
33661d06d6bSBaptiste Daroussin.El
33761d06d6bSBaptiste Daroussin.Sh IMPLEMENTATION NOTES
33861d06d6bSBaptiste DaroussinThis section consists of structural documentation for
33961d06d6bSBaptiste Daroussin.Xr mdoc 7
34061d06d6bSBaptiste Daroussinand
34161d06d6bSBaptiste Daroussin.Xr man 7
34261d06d6bSBaptiste Daroussinsyntax trees and strings.
34361d06d6bSBaptiste Daroussin.Ss Man and Mdoc Strings
34461d06d6bSBaptiste DaroussinStrings may be extracted from mdoc and man meta-data, or from text
34561d06d6bSBaptiste Daroussinnodes (MDOC_TEXT and MAN_TEXT, respectively).
34661d06d6bSBaptiste DaroussinThese strings have special non-printing formatting cues embedded in the
34761d06d6bSBaptiste Daroussintext itself, as well as
34861d06d6bSBaptiste Daroussin.Xr roff 7
34961d06d6bSBaptiste Daroussinescapes preserved from input.
35061d06d6bSBaptiste DaroussinImplementing systems will need to handle both situations to produce
35161d06d6bSBaptiste Daroussinhuman-readable text.
35261d06d6bSBaptiste DaroussinIn general, strings may be assumed to consist of 7-bit ASCII characters.
35361d06d6bSBaptiste Daroussin.Pp
35461d06d6bSBaptiste DaroussinThe following non-printing characters may be embedded in text strings:
35561d06d6bSBaptiste Daroussin.Bl -tag -width Ds
35661d06d6bSBaptiste Daroussin.It Dv ASCII_NBRSP
35761d06d6bSBaptiste DaroussinA non-breaking space character.
35861d06d6bSBaptiste Daroussin.It Dv ASCII_HYPH
35961d06d6bSBaptiste DaroussinA soft hyphen.
36061d06d6bSBaptiste Daroussin.It Dv ASCII_BREAK
36161d06d6bSBaptiste DaroussinA breakable zero-width space.
36261d06d6bSBaptiste Daroussin.El
36361d06d6bSBaptiste Daroussin.Pp
36461d06d6bSBaptiste DaroussinEscape characters are also passed verbatim into text strings.
36561d06d6bSBaptiste DaroussinAn escape character is a sequence of characters beginning with the
36661d06d6bSBaptiste Daroussinbackslash
36761d06d6bSBaptiste Daroussin.Pq Sq \e .
36861d06d6bSBaptiste DaroussinTo construct human-readable text, these should be intercepted with
36961d06d6bSBaptiste Daroussin.Xr mandoc_escape 3
37061d06d6bSBaptiste Daroussinand converted with one the functions described in
37161d06d6bSBaptiste Daroussin.Xr mchars_alloc 3 .
37261d06d6bSBaptiste Daroussin.Ss Man Abstract Syntax Tree
37361d06d6bSBaptiste DaroussinThis AST is governed by the ontological rules dictated in
37461d06d6bSBaptiste Daroussin.Xr man 7
37561d06d6bSBaptiste Daroussinand derives its terminology accordingly.
37661d06d6bSBaptiste Daroussin.Pp
37761d06d6bSBaptiste DaroussinThe AST is composed of
37861d06d6bSBaptiste Daroussin.Vt struct roff_node
37961d06d6bSBaptiste Daroussinnodes with element, root and text types as declared by the
38061d06d6bSBaptiste Daroussin.Va type
38161d06d6bSBaptiste Daroussinfield.
38261d06d6bSBaptiste DaroussinEach node also provides its parse point (the
38361d06d6bSBaptiste Daroussin.Va line ,
38461d06d6bSBaptiste Daroussin.Va pos ,
38561d06d6bSBaptiste Daroussinand
38661d06d6bSBaptiste Daroussin.Va sec
38761d06d6bSBaptiste Daroussinfields), its position in the tree (the
38861d06d6bSBaptiste Daroussin.Va parent ,
38961d06d6bSBaptiste Daroussin.Va child ,
39061d06d6bSBaptiste Daroussin.Va next
39161d06d6bSBaptiste Daroussinand
39261d06d6bSBaptiste Daroussin.Va prev
39361d06d6bSBaptiste Daroussinfields) and some type-specific data.
39461d06d6bSBaptiste Daroussin.Pp
39561d06d6bSBaptiste DaroussinThe tree itself is arranged according to the following normal form,
39661d06d6bSBaptiste Daroussinwhere capitalised non-terminals represent nodes.
39761d06d6bSBaptiste Daroussin.Pp
39861d06d6bSBaptiste Daroussin.Bl -tag -width "ELEMENTXX" -compact
39961d06d6bSBaptiste Daroussin.It ROOT
40061d06d6bSBaptiste Daroussin\(<- mnode+
40161d06d6bSBaptiste Daroussin.It mnode
40261d06d6bSBaptiste Daroussin\(<- ELEMENT | TEXT | BLOCK
40361d06d6bSBaptiste Daroussin.It BLOCK
40461d06d6bSBaptiste Daroussin\(<- HEAD BODY
40561d06d6bSBaptiste Daroussin.It HEAD
40661d06d6bSBaptiste Daroussin\(<- mnode*
40761d06d6bSBaptiste Daroussin.It BODY
40861d06d6bSBaptiste Daroussin\(<- mnode*
40961d06d6bSBaptiste Daroussin.It ELEMENT
41061d06d6bSBaptiste Daroussin\(<- ELEMENT | TEXT*
41161d06d6bSBaptiste Daroussin.It TEXT
41261d06d6bSBaptiste Daroussin\(<- [[:ascii:]]*
41361d06d6bSBaptiste Daroussin.El
41461d06d6bSBaptiste Daroussin.Pp
41561d06d6bSBaptiste DaroussinThe only elements capable of nesting other elements are those with
41661d06d6bSBaptiste Daroussinnext-line scope as documented in
41761d06d6bSBaptiste Daroussin.Xr man 7 .
41861d06d6bSBaptiste Daroussin.Ss Mdoc Abstract Syntax Tree
41961d06d6bSBaptiste DaroussinThis AST is governed by the ontological
42061d06d6bSBaptiste Daroussinrules dictated in
42161d06d6bSBaptiste Daroussin.Xr mdoc 7
42261d06d6bSBaptiste Daroussinand derives its terminology accordingly.
42361d06d6bSBaptiste Daroussin.Qq In-line
42461d06d6bSBaptiste Daroussinelements described in
42561d06d6bSBaptiste Daroussin.Xr mdoc 7
42661d06d6bSBaptiste Daroussinare described simply as
42761d06d6bSBaptiste Daroussin.Qq elements .
42861d06d6bSBaptiste Daroussin.Pp
42961d06d6bSBaptiste DaroussinThe AST is composed of
43061d06d6bSBaptiste Daroussin.Vt struct roff_node
43161d06d6bSBaptiste Daroussinnodes with block, head, body, element, root and text types as declared
43261d06d6bSBaptiste Daroussinby the
43361d06d6bSBaptiste Daroussin.Va type
43461d06d6bSBaptiste Daroussinfield.
43561d06d6bSBaptiste DaroussinEach node also provides its parse point (the
43661d06d6bSBaptiste Daroussin.Va line ,
43761d06d6bSBaptiste Daroussin.Va pos ,
43861d06d6bSBaptiste Daroussinand
43961d06d6bSBaptiste Daroussin.Va sec
44061d06d6bSBaptiste Daroussinfields), its position in the tree (the
44161d06d6bSBaptiste Daroussin.Va parent ,
44261d06d6bSBaptiste Daroussin.Va child ,
44361d06d6bSBaptiste Daroussin.Va last ,
44461d06d6bSBaptiste Daroussin.Va next
44561d06d6bSBaptiste Daroussinand
44661d06d6bSBaptiste Daroussin.Va prev
44761d06d6bSBaptiste Daroussinfields) and some type-specific data, in particular, for nodes generated
44861d06d6bSBaptiste Daroussinfrom macros, the generating macro in the
44961d06d6bSBaptiste Daroussin.Va tok
45061d06d6bSBaptiste Daroussinfield.
45161d06d6bSBaptiste Daroussin.Pp
45261d06d6bSBaptiste DaroussinThe tree itself is arranged according to the following normal form,
45361d06d6bSBaptiste Daroussinwhere capitalised non-terminals represent nodes.
45461d06d6bSBaptiste Daroussin.Pp
45561d06d6bSBaptiste Daroussin.Bl -tag -width "ELEMENTXX" -compact
45661d06d6bSBaptiste Daroussin.It ROOT
45761d06d6bSBaptiste Daroussin\(<- mnode+
45861d06d6bSBaptiste Daroussin.It mnode
45961d06d6bSBaptiste Daroussin\(<- BLOCK | ELEMENT | TEXT
46061d06d6bSBaptiste Daroussin.It BLOCK
46161d06d6bSBaptiste Daroussin\(<- HEAD [TEXT] (BODY [TEXT])+ [TAIL [TEXT]]
46261d06d6bSBaptiste Daroussin.It ELEMENT
46361d06d6bSBaptiste Daroussin\(<- TEXT*
46461d06d6bSBaptiste Daroussin.It HEAD
46561d06d6bSBaptiste Daroussin\(<- mnode*
46661d06d6bSBaptiste Daroussin.It BODY
46761d06d6bSBaptiste Daroussin\(<- mnode* [ENDBODY mnode*]
46861d06d6bSBaptiste Daroussin.It TAIL
46961d06d6bSBaptiste Daroussin\(<- mnode*
47061d06d6bSBaptiste Daroussin.It TEXT
47161d06d6bSBaptiste Daroussin\(<- [[:ascii:]]*
47261d06d6bSBaptiste Daroussin.El
47361d06d6bSBaptiste Daroussin.Pp
47461d06d6bSBaptiste DaroussinOf note are the TEXT nodes following the HEAD, BODY and TAIL nodes of
47561d06d6bSBaptiste Daroussinthe BLOCK production: these refer to punctuation marks.
47661d06d6bSBaptiste DaroussinFurthermore, although a TEXT node will generally have a non-zero-length
47761d06d6bSBaptiste Daroussinstring, in the specific case of
47861d06d6bSBaptiste Daroussin.Sq \&.Bd \-literal ,
47961d06d6bSBaptiste Daroussinan empty line will produce a zero-length string.
48061d06d6bSBaptiste DaroussinMultiple body parts are only found in invocations of
48161d06d6bSBaptiste Daroussin.Sq \&Bl \-column ,
48261d06d6bSBaptiste Daroussinwhere a new body introduces a new phrase.
48361d06d6bSBaptiste Daroussin.Pp
48461d06d6bSBaptiste DaroussinThe
48561d06d6bSBaptiste Daroussin.Xr mdoc 7
48661d06d6bSBaptiste Daroussinsyntax tree accommodates for broken block structures as well.
48761d06d6bSBaptiste DaroussinThe ENDBODY node is available to end the formatting associated
48861d06d6bSBaptiste Daroussinwith a given block before the physical end of that block.
48961d06d6bSBaptiste DaroussinIt has a non-null
49061d06d6bSBaptiste Daroussin.Va end
49161d06d6bSBaptiste Daroussinfield, is of the BODY
49261d06d6bSBaptiste Daroussin.Va type ,
49361d06d6bSBaptiste Daroussinhas the same
49461d06d6bSBaptiste Daroussin.Va tok
49561d06d6bSBaptiste Daroussinas the BLOCK it is ending, and has a
49661d06d6bSBaptiste Daroussin.Va pending
49761d06d6bSBaptiste Daroussinfield pointing to that BLOCK's BODY node.
49861d06d6bSBaptiste DaroussinIt is an indirect child of that BODY node
49961d06d6bSBaptiste Daroussinand has no children of its own.
50061d06d6bSBaptiste Daroussin.Pp
50161d06d6bSBaptiste DaroussinAn ENDBODY node is generated when a block ends while one of its child
50261d06d6bSBaptiste Daroussinblocks is still open, like in the following example:
50361d06d6bSBaptiste Daroussin.Bd -literal -offset indent
50461d06d6bSBaptiste Daroussin\&.Ao ao
50561d06d6bSBaptiste Daroussin\&.Bo bo ac
50661d06d6bSBaptiste Daroussin\&.Ac bc
50761d06d6bSBaptiste Daroussin\&.Bc end
50861d06d6bSBaptiste Daroussin.Ed
50961d06d6bSBaptiste Daroussin.Pp
51061d06d6bSBaptiste DaroussinThis example results in the following block structure:
51161d06d6bSBaptiste Daroussin.Bd -literal -offset indent
51261d06d6bSBaptiste DaroussinBLOCK Ao
51361d06d6bSBaptiste Daroussin    HEAD Ao
51461d06d6bSBaptiste Daroussin    BODY Ao
51561d06d6bSBaptiste Daroussin        TEXT ao
51661d06d6bSBaptiste Daroussin        BLOCK Bo, pending -> Ao
51761d06d6bSBaptiste Daroussin            HEAD Bo
51861d06d6bSBaptiste Daroussin            BODY Bo
51961d06d6bSBaptiste Daroussin                TEXT bo
52061d06d6bSBaptiste Daroussin                TEXT ac
52161d06d6bSBaptiste Daroussin                ENDBODY Ao, pending -> Ao
52261d06d6bSBaptiste Daroussin                TEXT bc
52361d06d6bSBaptiste DaroussinTEXT end
52461d06d6bSBaptiste Daroussin.Ed
52561d06d6bSBaptiste Daroussin.Pp
52661d06d6bSBaptiste DaroussinHere, the formatting of the
52761d06d6bSBaptiste Daroussin.Ic \&Ao
52861d06d6bSBaptiste Daroussinblock extends from TEXT ao to TEXT ac,
52961d06d6bSBaptiste Daroussinwhile the formatting of the
53061d06d6bSBaptiste Daroussin.Ic \&Bo
53161d06d6bSBaptiste Daroussinblock extends from TEXT bo to TEXT bc.
53261d06d6bSBaptiste DaroussinIt renders as follows in
53361d06d6bSBaptiste Daroussin.Fl T Ns Cm ascii
53461d06d6bSBaptiste Daroussinmode:
53561d06d6bSBaptiste Daroussin.Pp
53661d06d6bSBaptiste Daroussin.Dl <ao [bo ac> bc] end
53761d06d6bSBaptiste Daroussin.Pp
53861d06d6bSBaptiste DaroussinSupport for badly-nested blocks is only provided for backward
53961d06d6bSBaptiste Daroussincompatibility with some older
54061d06d6bSBaptiste Daroussin.Xr mdoc 7
54161d06d6bSBaptiste Daroussinimplementations.
54261d06d6bSBaptiste DaroussinUsing badly-nested blocks is
54361d06d6bSBaptiste Daroussin.Em strongly discouraged ;
54461d06d6bSBaptiste Daroussinfor example, the
54561d06d6bSBaptiste Daroussin.Fl T Ns Cm html
54661d06d6bSBaptiste Daroussinfront-end to
54761d06d6bSBaptiste Daroussin.Xr mandoc 1
54861d06d6bSBaptiste Daroussinis unable to render them in any meaningful way.
54961d06d6bSBaptiste DaroussinFurthermore, behaviour when encountering badly-nested blocks is not
55061d06d6bSBaptiste Daroussinconsistent across troff implementations, especially when using multiple
55161d06d6bSBaptiste Daroussinlevels of badly-nested blocks.
55261d06d6bSBaptiste Daroussin.Sh SEE ALSO
55361d06d6bSBaptiste Daroussin.Xr mandoc 1 ,
55461d06d6bSBaptiste Daroussin.Xr man.cgi 3 ,
55561d06d6bSBaptiste Daroussin.Xr mandoc_escape 3 ,
55661d06d6bSBaptiste Daroussin.Xr mandoc_headers 3 ,
55761d06d6bSBaptiste Daroussin.Xr mandoc_malloc 3 ,
55861d06d6bSBaptiste Daroussin.Xr mansearch 3 ,
55961d06d6bSBaptiste Daroussin.Xr mchars_alloc 3 ,
56061d06d6bSBaptiste Daroussin.Xr tbl 3 ,
56161d06d6bSBaptiste Daroussin.Xr eqn 7 ,
56261d06d6bSBaptiste Daroussin.Xr man 7 ,
56361d06d6bSBaptiste Daroussin.Xr mandoc_char 7 ,
56461d06d6bSBaptiste Daroussin.Xr mdoc 7 ,
56561d06d6bSBaptiste Daroussin.Xr roff 7 ,
56661d06d6bSBaptiste Daroussin.Xr tbl 7
56761d06d6bSBaptiste Daroussin.Sh AUTHORS
56861d06d6bSBaptiste Daroussin.An -nosplit
56961d06d6bSBaptiste DaroussinThe
57061d06d6bSBaptiste Daroussin.Nm
57161d06d6bSBaptiste Daroussinlibrary was written by
57261d06d6bSBaptiste Daroussin.An Kristaps Dzonsons Aq Mt kristaps@bsd.lv
57361d06d6bSBaptiste Daroussinand is maintained by
57461d06d6bSBaptiste Daroussin.An Ingo Schwarze Aq Mt schwarze@openbsd.org .
575