1*6d38604fSBaptiste Daroussin /* $Id: mdoc.c,v 1.275 2020/04/06 10:16:17 schwarze Exp $ */
261d06d6bSBaptiste Daroussin /*
3*6d38604fSBaptiste Daroussin * Copyright (c) 2010, 2012-2018, 2020 Ingo Schwarze <schwarze@openbsd.org>
461d06d6bSBaptiste Daroussin * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
561d06d6bSBaptiste Daroussin *
661d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any
761d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above
861d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies.
961d06d6bSBaptiste Daroussin *
1061d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
1161d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1261d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
1361d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1461d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1561d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1661d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17*6d38604fSBaptiste Daroussin *
18*6d38604fSBaptiste Daroussin * Top level and utility functions of the mdoc(7) parser for mandoc(1).
1961d06d6bSBaptiste Daroussin */
2061d06d6bSBaptiste Daroussin #include "config.h"
2161d06d6bSBaptiste Daroussin
2261d06d6bSBaptiste Daroussin #include <sys/types.h>
2361d06d6bSBaptiste Daroussin
2461d06d6bSBaptiste Daroussin #include <assert.h>
2561d06d6bSBaptiste Daroussin #include <ctype.h>
2661d06d6bSBaptiste Daroussin #include <stdarg.h>
2761d06d6bSBaptiste Daroussin #include <stdio.h>
2861d06d6bSBaptiste Daroussin #include <stdlib.h>
2961d06d6bSBaptiste Daroussin #include <string.h>
3061d06d6bSBaptiste Daroussin #include <time.h>
3161d06d6bSBaptiste Daroussin
3261d06d6bSBaptiste Daroussin #include "mandoc_aux.h"
3361d06d6bSBaptiste Daroussin #include "mandoc.h"
3461d06d6bSBaptiste Daroussin #include "roff.h"
3561d06d6bSBaptiste Daroussin #include "mdoc.h"
3661d06d6bSBaptiste Daroussin #include "libmandoc.h"
3761d06d6bSBaptiste Daroussin #include "roff_int.h"
3861d06d6bSBaptiste Daroussin #include "libmdoc.h"
3961d06d6bSBaptiste Daroussin
4061d06d6bSBaptiste Daroussin const char *const __mdoc_argnames[MDOC_ARG_MAX] = {
4161d06d6bSBaptiste Daroussin "split", "nosplit", "ragged",
4261d06d6bSBaptiste Daroussin "unfilled", "literal", "file",
4361d06d6bSBaptiste Daroussin "offset", "bullet", "dash",
4461d06d6bSBaptiste Daroussin "hyphen", "item", "enum",
4561d06d6bSBaptiste Daroussin "tag", "diag", "hang",
4661d06d6bSBaptiste Daroussin "ohang", "inset", "column",
4761d06d6bSBaptiste Daroussin "width", "compact", "std",
4861d06d6bSBaptiste Daroussin "filled", "words", "emphasis",
4961d06d6bSBaptiste Daroussin "symbolic", "nested", "centered"
5061d06d6bSBaptiste Daroussin };
5161d06d6bSBaptiste Daroussin const char * const *mdoc_argnames = __mdoc_argnames;
5261d06d6bSBaptiste Daroussin
5361d06d6bSBaptiste Daroussin static int mdoc_ptext(struct roff_man *, int, char *, int);
5461d06d6bSBaptiste Daroussin static int mdoc_pmacro(struct roff_man *, int, char *, int);
5561d06d6bSBaptiste Daroussin
5661d06d6bSBaptiste Daroussin
5761d06d6bSBaptiste Daroussin /*
5861d06d6bSBaptiste Daroussin * Main parse routine. Parses a single line -- really just hands off to
5961d06d6bSBaptiste Daroussin * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
6061d06d6bSBaptiste Daroussin */
6161d06d6bSBaptiste Daroussin int
mdoc_parseln(struct roff_man * mdoc,int ln,char * buf,int offs)6261d06d6bSBaptiste Daroussin mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs)
6361d06d6bSBaptiste Daroussin {
6461d06d6bSBaptiste Daroussin
6561d06d6bSBaptiste Daroussin if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line)
6661d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_NEWLINE;
6761d06d6bSBaptiste Daroussin
6861d06d6bSBaptiste Daroussin /*
6961d06d6bSBaptiste Daroussin * Let the roff nS register switch SYNOPSIS mode early,
7061d06d6bSBaptiste Daroussin * such that the parser knows at all times
7161d06d6bSBaptiste Daroussin * whether this mode is on or off.
7261d06d6bSBaptiste Daroussin * Note that this mode is also switched by the Sh macro.
7361d06d6bSBaptiste Daroussin */
7461d06d6bSBaptiste Daroussin if (roff_getreg(mdoc->roff, "nS"))
7561d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_SYNOPSIS;
7661d06d6bSBaptiste Daroussin else
7761d06d6bSBaptiste Daroussin mdoc->flags &= ~MDOC_SYNOPSIS;
7861d06d6bSBaptiste Daroussin
7961d06d6bSBaptiste Daroussin return roff_getcontrol(mdoc->roff, buf, &offs) ?
8061d06d6bSBaptiste Daroussin mdoc_pmacro(mdoc, ln, buf, offs) :
8161d06d6bSBaptiste Daroussin mdoc_ptext(mdoc, ln, buf, offs);
8261d06d6bSBaptiste Daroussin }
8361d06d6bSBaptiste Daroussin
8461d06d6bSBaptiste Daroussin void
mdoc_tail_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok)8561d06d6bSBaptiste Daroussin mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok)
8661d06d6bSBaptiste Daroussin {
8761d06d6bSBaptiste Daroussin struct roff_node *p;
8861d06d6bSBaptiste Daroussin
8961d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok);
9061d06d6bSBaptiste Daroussin roff_node_append(mdoc, p);
9161d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_CHILD;
9261d06d6bSBaptiste Daroussin }
9361d06d6bSBaptiste Daroussin
9461d06d6bSBaptiste Daroussin struct roff_node *
mdoc_endbody_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok,struct roff_node * body)9561d06d6bSBaptiste Daroussin mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos,
9661d06d6bSBaptiste Daroussin enum roff_tok tok, struct roff_node *body)
9761d06d6bSBaptiste Daroussin {
9861d06d6bSBaptiste Daroussin struct roff_node *p;
9961d06d6bSBaptiste Daroussin
10061d06d6bSBaptiste Daroussin body->flags |= NODE_ENDED;
10161d06d6bSBaptiste Daroussin body->parent->flags |= NODE_ENDED;
10261d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok);
10361d06d6bSBaptiste Daroussin p->body = body;
10461d06d6bSBaptiste Daroussin p->norm = body->norm;
10561d06d6bSBaptiste Daroussin p->end = ENDBODY_SPACE;
10661d06d6bSBaptiste Daroussin roff_node_append(mdoc, p);
10761d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_SIBLING;
10861d06d6bSBaptiste Daroussin return p;
10961d06d6bSBaptiste Daroussin }
11061d06d6bSBaptiste Daroussin
11161d06d6bSBaptiste Daroussin struct roff_node *
mdoc_block_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok,struct mdoc_arg * args)11261d06d6bSBaptiste Daroussin mdoc_block_alloc(struct roff_man *mdoc, int line, int pos,
11361d06d6bSBaptiste Daroussin enum roff_tok tok, struct mdoc_arg *args)
11461d06d6bSBaptiste Daroussin {
11561d06d6bSBaptiste Daroussin struct roff_node *p;
11661d06d6bSBaptiste Daroussin
11761d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok);
11861d06d6bSBaptiste Daroussin p->args = args;
11961d06d6bSBaptiste Daroussin if (p->args)
12061d06d6bSBaptiste Daroussin (args->refcnt)++;
12161d06d6bSBaptiste Daroussin
12261d06d6bSBaptiste Daroussin switch (tok) {
12361d06d6bSBaptiste Daroussin case MDOC_Bd:
12461d06d6bSBaptiste Daroussin case MDOC_Bf:
12561d06d6bSBaptiste Daroussin case MDOC_Bl:
12661d06d6bSBaptiste Daroussin case MDOC_En:
12761d06d6bSBaptiste Daroussin case MDOC_Rs:
12861d06d6bSBaptiste Daroussin p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
12961d06d6bSBaptiste Daroussin break;
13061d06d6bSBaptiste Daroussin default:
13161d06d6bSBaptiste Daroussin break;
13261d06d6bSBaptiste Daroussin }
13361d06d6bSBaptiste Daroussin roff_node_append(mdoc, p);
13461d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_CHILD;
13561d06d6bSBaptiste Daroussin return p;
13661d06d6bSBaptiste Daroussin }
13761d06d6bSBaptiste Daroussin
13861d06d6bSBaptiste Daroussin void
mdoc_elem_alloc(struct roff_man * mdoc,int line,int pos,enum roff_tok tok,struct mdoc_arg * args)13961d06d6bSBaptiste Daroussin mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos,
14061d06d6bSBaptiste Daroussin enum roff_tok tok, struct mdoc_arg *args)
14161d06d6bSBaptiste Daroussin {
14261d06d6bSBaptiste Daroussin struct roff_node *p;
14361d06d6bSBaptiste Daroussin
14461d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok);
14561d06d6bSBaptiste Daroussin p->args = args;
14661d06d6bSBaptiste Daroussin if (p->args)
14761d06d6bSBaptiste Daroussin (args->refcnt)++;
14861d06d6bSBaptiste Daroussin
14961d06d6bSBaptiste Daroussin switch (tok) {
15061d06d6bSBaptiste Daroussin case MDOC_An:
15161d06d6bSBaptiste Daroussin p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
15261d06d6bSBaptiste Daroussin break;
15361d06d6bSBaptiste Daroussin default:
15461d06d6bSBaptiste Daroussin break;
15561d06d6bSBaptiste Daroussin }
15661d06d6bSBaptiste Daroussin roff_node_append(mdoc, p);
15761d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_CHILD;
15861d06d6bSBaptiste Daroussin }
15961d06d6bSBaptiste Daroussin
16061d06d6bSBaptiste Daroussin /*
16161d06d6bSBaptiste Daroussin * Parse free-form text, that is, a line that does not begin with the
16261d06d6bSBaptiste Daroussin * control character.
16361d06d6bSBaptiste Daroussin */
16461d06d6bSBaptiste Daroussin static int
mdoc_ptext(struct roff_man * mdoc,int line,char * buf,int offs)16561d06d6bSBaptiste Daroussin mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs)
16661d06d6bSBaptiste Daroussin {
16761d06d6bSBaptiste Daroussin struct roff_node *n;
16861d06d6bSBaptiste Daroussin const char *cp, *sp;
16961d06d6bSBaptiste Daroussin char *c, *ws, *end;
17061d06d6bSBaptiste Daroussin
17161d06d6bSBaptiste Daroussin n = mdoc->last;
17261d06d6bSBaptiste Daroussin
17361d06d6bSBaptiste Daroussin /*
17461d06d6bSBaptiste Daroussin * If a column list contains plain text, assume an implicit item
17561d06d6bSBaptiste Daroussin * macro. This can happen one or more times at the beginning
17661d06d6bSBaptiste Daroussin * of such a list, intermixed with non-It mdoc macros and with
17761d06d6bSBaptiste Daroussin * nodes generated on the roff level, for example by tbl.
17861d06d6bSBaptiste Daroussin */
17961d06d6bSBaptiste Daroussin
18061d06d6bSBaptiste Daroussin if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
18161d06d6bSBaptiste Daroussin n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
18261d06d6bSBaptiste Daroussin (n->parent != NULL && n->parent->tok == MDOC_Bl &&
18361d06d6bSBaptiste Daroussin n->parent->norm->Bl.type == LIST_column)) {
18461d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_FREECOL;
1857295610fSBaptiste Daroussin (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It,
1867295610fSBaptiste Daroussin line, offs, &offs, buf);
18761d06d6bSBaptiste Daroussin return 1;
18861d06d6bSBaptiste Daroussin }
18961d06d6bSBaptiste Daroussin
19061d06d6bSBaptiste Daroussin /*
19161d06d6bSBaptiste Daroussin * Search for the beginning of unescaped trailing whitespace (ws)
19261d06d6bSBaptiste Daroussin * and for the first character not to be output (end).
19361d06d6bSBaptiste Daroussin */
19461d06d6bSBaptiste Daroussin
19561d06d6bSBaptiste Daroussin /* FIXME: replace with strcspn(). */
19661d06d6bSBaptiste Daroussin ws = NULL;
19761d06d6bSBaptiste Daroussin for (c = end = buf + offs; *c; c++) {
19861d06d6bSBaptiste Daroussin switch (*c) {
19961d06d6bSBaptiste Daroussin case ' ':
20061d06d6bSBaptiste Daroussin if (NULL == ws)
20161d06d6bSBaptiste Daroussin ws = c;
20261d06d6bSBaptiste Daroussin continue;
20361d06d6bSBaptiste Daroussin case '\t':
20461d06d6bSBaptiste Daroussin /*
20561d06d6bSBaptiste Daroussin * Always warn about trailing tabs,
20661d06d6bSBaptiste Daroussin * even outside literal context,
20761d06d6bSBaptiste Daroussin * where they should be put on the next line.
20861d06d6bSBaptiste Daroussin */
20961d06d6bSBaptiste Daroussin if (NULL == ws)
21061d06d6bSBaptiste Daroussin ws = c;
21161d06d6bSBaptiste Daroussin /*
21261d06d6bSBaptiste Daroussin * Strip trailing tabs in literal context only;
21361d06d6bSBaptiste Daroussin * outside, they affect the next line.
21461d06d6bSBaptiste Daroussin */
2157295610fSBaptiste Daroussin if (mdoc->flags & ROFF_NOFILL)
21661d06d6bSBaptiste Daroussin continue;
21761d06d6bSBaptiste Daroussin break;
21861d06d6bSBaptiste Daroussin case '\\':
21961d06d6bSBaptiste Daroussin /* Skip the escaped character, too, if any. */
22061d06d6bSBaptiste Daroussin if (c[1])
22161d06d6bSBaptiste Daroussin c++;
22261d06d6bSBaptiste Daroussin /* FALLTHROUGH */
22361d06d6bSBaptiste Daroussin default:
22461d06d6bSBaptiste Daroussin ws = NULL;
22561d06d6bSBaptiste Daroussin break;
22661d06d6bSBaptiste Daroussin }
22761d06d6bSBaptiste Daroussin end = c + 1;
22861d06d6bSBaptiste Daroussin }
22961d06d6bSBaptiste Daroussin *end = '\0';
23061d06d6bSBaptiste Daroussin
23161d06d6bSBaptiste Daroussin if (ws)
2327295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_SPACE_EOL, line, (int)(ws - buf), NULL);
23361d06d6bSBaptiste Daroussin
23461d06d6bSBaptiste Daroussin /*
23561d06d6bSBaptiste Daroussin * Blank lines are allowed in no-fill mode
23661d06d6bSBaptiste Daroussin * and cancel preceding \c,
23761d06d6bSBaptiste Daroussin * but add a single vertical space elsewhere.
23861d06d6bSBaptiste Daroussin */
23961d06d6bSBaptiste Daroussin
2407295610fSBaptiste Daroussin if (buf[offs] == '\0' && (mdoc->flags & ROFF_NOFILL) == 0) {
24161d06d6bSBaptiste Daroussin switch (mdoc->last->type) {
24261d06d6bSBaptiste Daroussin case ROFFT_TEXT:
24361d06d6bSBaptiste Daroussin sp = mdoc->last->string;
24461d06d6bSBaptiste Daroussin cp = end = strchr(sp, '\0') - 2;
24561d06d6bSBaptiste Daroussin if (cp < sp || cp[0] != '\\' || cp[1] != 'c')
24661d06d6bSBaptiste Daroussin break;
24761d06d6bSBaptiste Daroussin while (cp > sp && cp[-1] == '\\')
24861d06d6bSBaptiste Daroussin cp--;
24961d06d6bSBaptiste Daroussin if ((end - cp) % 2)
25061d06d6bSBaptiste Daroussin break;
25161d06d6bSBaptiste Daroussin *end = '\0';
25261d06d6bSBaptiste Daroussin return 1;
25361d06d6bSBaptiste Daroussin default:
25461d06d6bSBaptiste Daroussin break;
25561d06d6bSBaptiste Daroussin }
2567295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_FI_BLANK, line, (int)(c - buf), NULL);
25761d06d6bSBaptiste Daroussin roff_elem_alloc(mdoc, line, offs, ROFF_sp);
25861d06d6bSBaptiste Daroussin mdoc->last->flags |= NODE_VALID | NODE_ENDED;
25961d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_SIBLING;
26061d06d6bSBaptiste Daroussin return 1;
26161d06d6bSBaptiste Daroussin }
26261d06d6bSBaptiste Daroussin
26361d06d6bSBaptiste Daroussin roff_word_alloc(mdoc, line, offs, buf+offs);
26461d06d6bSBaptiste Daroussin
2657295610fSBaptiste Daroussin if (mdoc->flags & ROFF_NOFILL)
26661d06d6bSBaptiste Daroussin return 1;
26761d06d6bSBaptiste Daroussin
26861d06d6bSBaptiste Daroussin /*
26961d06d6bSBaptiste Daroussin * End-of-sentence check. If the last character is an unescaped
27061d06d6bSBaptiste Daroussin * EOS character, then flag the node as being the end of a
27161d06d6bSBaptiste Daroussin * sentence. The front-end will know how to interpret this.
27261d06d6bSBaptiste Daroussin */
27361d06d6bSBaptiste Daroussin
27461d06d6bSBaptiste Daroussin assert(buf < end);
27561d06d6bSBaptiste Daroussin
27661d06d6bSBaptiste Daroussin if (mandoc_eos(buf+offs, (size_t)(end-buf-offs)))
27761d06d6bSBaptiste Daroussin mdoc->last->flags |= NODE_EOS;
27861d06d6bSBaptiste Daroussin
27961d06d6bSBaptiste Daroussin for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) {
28061d06d6bSBaptiste Daroussin if (c - buf < offs + 2)
28161d06d6bSBaptiste Daroussin continue;
28261d06d6bSBaptiste Daroussin if (end - c < 3)
28361d06d6bSBaptiste Daroussin break;
28461d06d6bSBaptiste Daroussin if (c[1] != ' ' ||
28561d06d6bSBaptiste Daroussin isalnum((unsigned char)c[-2]) == 0 ||
28661d06d6bSBaptiste Daroussin isalnum((unsigned char)c[-1]) == 0 ||
28761d06d6bSBaptiste Daroussin (c[-2] == 'n' && c[-1] == 'c') ||
28861d06d6bSBaptiste Daroussin (c[-2] == 'v' && c[-1] == 's'))
28961d06d6bSBaptiste Daroussin continue;
29061d06d6bSBaptiste Daroussin c += 2;
29161d06d6bSBaptiste Daroussin if (*c == ' ')
29261d06d6bSBaptiste Daroussin c++;
29361d06d6bSBaptiste Daroussin if (*c == ' ')
29461d06d6bSBaptiste Daroussin c++;
29561d06d6bSBaptiste Daroussin if (isupper((unsigned char)(*c)))
2967295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_EOS, line, (int)(c - buf), NULL);
29761d06d6bSBaptiste Daroussin }
29861d06d6bSBaptiste Daroussin
29961d06d6bSBaptiste Daroussin return 1;
30061d06d6bSBaptiste Daroussin }
30161d06d6bSBaptiste Daroussin
30261d06d6bSBaptiste Daroussin /*
30361d06d6bSBaptiste Daroussin * Parse a macro line, that is, a line beginning with the control
30461d06d6bSBaptiste Daroussin * character.
30561d06d6bSBaptiste Daroussin */
30661d06d6bSBaptiste Daroussin static int
mdoc_pmacro(struct roff_man * mdoc,int ln,char * buf,int offs)30761d06d6bSBaptiste Daroussin mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs)
30861d06d6bSBaptiste Daroussin {
30961d06d6bSBaptiste Daroussin struct roff_node *n;
31061d06d6bSBaptiste Daroussin const char *cp;
31161d06d6bSBaptiste Daroussin size_t sz;
31261d06d6bSBaptiste Daroussin enum roff_tok tok;
31361d06d6bSBaptiste Daroussin int sv;
31461d06d6bSBaptiste Daroussin
31561d06d6bSBaptiste Daroussin /* Determine the line macro. */
31661d06d6bSBaptiste Daroussin
31761d06d6bSBaptiste Daroussin sv = offs;
31861d06d6bSBaptiste Daroussin tok = TOKEN_NONE;
31961d06d6bSBaptiste Daroussin for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++)
32061d06d6bSBaptiste Daroussin offs++;
32161d06d6bSBaptiste Daroussin if (sz == 2 || sz == 3)
32261d06d6bSBaptiste Daroussin tok = roffhash_find(mdoc->mdocmac, buf + sv, sz);
32361d06d6bSBaptiste Daroussin if (tok == TOKEN_NONE) {
3247295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_MACRO, ln, sv, "%s", buf + sv - 1);
32561d06d6bSBaptiste Daroussin return 1;
32661d06d6bSBaptiste Daroussin }
32761d06d6bSBaptiste Daroussin
32861d06d6bSBaptiste Daroussin /* Skip a leading escape sequence or tab. */
32961d06d6bSBaptiste Daroussin
33061d06d6bSBaptiste Daroussin switch (buf[offs]) {
33161d06d6bSBaptiste Daroussin case '\\':
33261d06d6bSBaptiste Daroussin cp = buf + offs + 1;
33361d06d6bSBaptiste Daroussin mandoc_escape(&cp, NULL, NULL);
33461d06d6bSBaptiste Daroussin offs = cp - buf;
33561d06d6bSBaptiste Daroussin break;
33661d06d6bSBaptiste Daroussin case '\t':
33761d06d6bSBaptiste Daroussin offs++;
33861d06d6bSBaptiste Daroussin break;
33961d06d6bSBaptiste Daroussin default:
34061d06d6bSBaptiste Daroussin break;
34161d06d6bSBaptiste Daroussin }
34261d06d6bSBaptiste Daroussin
34361d06d6bSBaptiste Daroussin /* Jump to the next non-whitespace word. */
34461d06d6bSBaptiste Daroussin
34561d06d6bSBaptiste Daroussin while (buf[offs] == ' ')
34661d06d6bSBaptiste Daroussin offs++;
34761d06d6bSBaptiste Daroussin
34861d06d6bSBaptiste Daroussin /*
34961d06d6bSBaptiste Daroussin * Trailing whitespace. Note that tabs are allowed to be passed
35061d06d6bSBaptiste Daroussin * into the parser as "text", so we only warn about spaces here.
35161d06d6bSBaptiste Daroussin */
35261d06d6bSBaptiste Daroussin
35361d06d6bSBaptiste Daroussin if ('\0' == buf[offs] && ' ' == buf[offs - 1])
3547295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_SPACE_EOL, ln, offs - 1, NULL);
35561d06d6bSBaptiste Daroussin
35661d06d6bSBaptiste Daroussin /*
357*6d38604fSBaptiste Daroussin * If an initial or transparent macro or a list invocation,
358*6d38604fSBaptiste Daroussin * divert directly into macro processing.
35961d06d6bSBaptiste Daroussin */
36061d06d6bSBaptiste Daroussin
36161d06d6bSBaptiste Daroussin n = mdoc->last;
362*6d38604fSBaptiste Daroussin if (n == NULL || tok == MDOC_It || tok == MDOC_El ||
363*6d38604fSBaptiste Daroussin roff_tok_transparent(tok)) {
3647295610fSBaptiste Daroussin (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
36561d06d6bSBaptiste Daroussin return 1;
36661d06d6bSBaptiste Daroussin }
36761d06d6bSBaptiste Daroussin
36861d06d6bSBaptiste Daroussin /*
36961d06d6bSBaptiste Daroussin * If a column list contains a non-It macro, assume an implicit
37061d06d6bSBaptiste Daroussin * item macro. This can happen one or more times at the
37161d06d6bSBaptiste Daroussin * beginning of such a list, intermixed with text lines and
37261d06d6bSBaptiste Daroussin * with nodes generated on the roff level, for example by tbl.
37361d06d6bSBaptiste Daroussin */
37461d06d6bSBaptiste Daroussin
37561d06d6bSBaptiste Daroussin if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY &&
37661d06d6bSBaptiste Daroussin n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) ||
37761d06d6bSBaptiste Daroussin (n->parent != NULL && n->parent->tok == MDOC_Bl &&
37861d06d6bSBaptiste Daroussin n->parent->norm->Bl.type == LIST_column)) {
37961d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_FREECOL;
3807295610fSBaptiste Daroussin (*mdoc_macro(MDOC_It)->fp)(mdoc, MDOC_It, ln, sv, &sv, buf);
38161d06d6bSBaptiste Daroussin return 1;
38261d06d6bSBaptiste Daroussin }
38361d06d6bSBaptiste Daroussin
38461d06d6bSBaptiste Daroussin /* Normal processing of a macro. */
38561d06d6bSBaptiste Daroussin
3867295610fSBaptiste Daroussin (*mdoc_macro(tok)->fp)(mdoc, tok, ln, sv, &offs, buf);
38761d06d6bSBaptiste Daroussin
38861d06d6bSBaptiste Daroussin /* In quick mode (for mandocdb), abort after the NAME section. */
38961d06d6bSBaptiste Daroussin
39061d06d6bSBaptiste Daroussin if (mdoc->quick && MDOC_Sh == tok &&
39161d06d6bSBaptiste Daroussin SEC_NAME != mdoc->last->sec)
39261d06d6bSBaptiste Daroussin return 2;
39361d06d6bSBaptiste Daroussin
39461d06d6bSBaptiste Daroussin return 1;
39561d06d6bSBaptiste Daroussin }
39661d06d6bSBaptiste Daroussin
39761d06d6bSBaptiste Daroussin enum mdelim
mdoc_isdelim(const char * p)39861d06d6bSBaptiste Daroussin mdoc_isdelim(const char *p)
39961d06d6bSBaptiste Daroussin {
40061d06d6bSBaptiste Daroussin
40161d06d6bSBaptiste Daroussin if ('\0' == p[0])
40261d06d6bSBaptiste Daroussin return DELIM_NONE;
40361d06d6bSBaptiste Daroussin
40461d06d6bSBaptiste Daroussin if ('\0' == p[1])
40561d06d6bSBaptiste Daroussin switch (p[0]) {
40661d06d6bSBaptiste Daroussin case '(':
40761d06d6bSBaptiste Daroussin case '[':
40861d06d6bSBaptiste Daroussin return DELIM_OPEN;
40961d06d6bSBaptiste Daroussin case '|':
41061d06d6bSBaptiste Daroussin return DELIM_MIDDLE;
41161d06d6bSBaptiste Daroussin case '.':
41261d06d6bSBaptiste Daroussin case ',':
41361d06d6bSBaptiste Daroussin case ';':
41461d06d6bSBaptiste Daroussin case ':':
41561d06d6bSBaptiste Daroussin case '?':
41661d06d6bSBaptiste Daroussin case '!':
41761d06d6bSBaptiste Daroussin case ')':
41861d06d6bSBaptiste Daroussin case ']':
41961d06d6bSBaptiste Daroussin return DELIM_CLOSE;
42061d06d6bSBaptiste Daroussin default:
42161d06d6bSBaptiste Daroussin return DELIM_NONE;
42261d06d6bSBaptiste Daroussin }
42361d06d6bSBaptiste Daroussin
42461d06d6bSBaptiste Daroussin if ('\\' != p[0])
42561d06d6bSBaptiste Daroussin return DELIM_NONE;
42661d06d6bSBaptiste Daroussin
42761d06d6bSBaptiste Daroussin if (0 == strcmp(p + 1, "."))
42861d06d6bSBaptiste Daroussin return DELIM_CLOSE;
42961d06d6bSBaptiste Daroussin if (0 == strcmp(p + 1, "fR|\\fP"))
43061d06d6bSBaptiste Daroussin return DELIM_MIDDLE;
43161d06d6bSBaptiste Daroussin
43261d06d6bSBaptiste Daroussin return DELIM_NONE;
43361d06d6bSBaptiste Daroussin }
434