1*61d06d6bSBaptiste Daroussin /* $Id: mdoc.c,v 1.268 2017/08/11 16:56:21 schwarze Exp $ */ 2*61d06d6bSBaptiste Daroussin /* 3*61d06d6bSBaptiste Daroussin * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*61d06d6bSBaptiste Daroussin * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> 5*61d06d6bSBaptiste Daroussin * 6*61d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 7*61d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 8*61d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 9*61d06d6bSBaptiste Daroussin * 10*61d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11*61d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*61d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13*61d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*61d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*61d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*61d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*61d06d6bSBaptiste Daroussin */ 18*61d06d6bSBaptiste Daroussin #include "config.h" 19*61d06d6bSBaptiste Daroussin 20*61d06d6bSBaptiste Daroussin #include <sys/types.h> 21*61d06d6bSBaptiste Daroussin 22*61d06d6bSBaptiste Daroussin #include <assert.h> 23*61d06d6bSBaptiste Daroussin #include <ctype.h> 24*61d06d6bSBaptiste Daroussin #include <stdarg.h> 25*61d06d6bSBaptiste Daroussin #include <stdio.h> 26*61d06d6bSBaptiste Daroussin #include <stdlib.h> 27*61d06d6bSBaptiste Daroussin #include <string.h> 28*61d06d6bSBaptiste Daroussin #include <time.h> 29*61d06d6bSBaptiste Daroussin 30*61d06d6bSBaptiste Daroussin #include "mandoc_aux.h" 31*61d06d6bSBaptiste Daroussin #include "mandoc.h" 32*61d06d6bSBaptiste Daroussin #include "roff.h" 33*61d06d6bSBaptiste Daroussin #include "mdoc.h" 34*61d06d6bSBaptiste Daroussin #include "libmandoc.h" 35*61d06d6bSBaptiste Daroussin #include "roff_int.h" 36*61d06d6bSBaptiste Daroussin #include "libmdoc.h" 37*61d06d6bSBaptiste Daroussin 38*61d06d6bSBaptiste Daroussin const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 39*61d06d6bSBaptiste Daroussin "split", "nosplit", "ragged", 40*61d06d6bSBaptiste Daroussin "unfilled", "literal", "file", 41*61d06d6bSBaptiste Daroussin "offset", "bullet", "dash", 42*61d06d6bSBaptiste Daroussin "hyphen", "item", "enum", 43*61d06d6bSBaptiste Daroussin "tag", "diag", "hang", 44*61d06d6bSBaptiste Daroussin "ohang", "inset", "column", 45*61d06d6bSBaptiste Daroussin "width", "compact", "std", 46*61d06d6bSBaptiste Daroussin "filled", "words", "emphasis", 47*61d06d6bSBaptiste Daroussin "symbolic", "nested", "centered" 48*61d06d6bSBaptiste Daroussin }; 49*61d06d6bSBaptiste Daroussin const char * const *mdoc_argnames = __mdoc_argnames; 50*61d06d6bSBaptiste Daroussin 51*61d06d6bSBaptiste Daroussin static int mdoc_ptext(struct roff_man *, int, char *, int); 52*61d06d6bSBaptiste Daroussin static int mdoc_pmacro(struct roff_man *, int, char *, int); 53*61d06d6bSBaptiste Daroussin 54*61d06d6bSBaptiste Daroussin 55*61d06d6bSBaptiste Daroussin /* 56*61d06d6bSBaptiste Daroussin * Main parse routine. Parses a single line -- really just hands off to 57*61d06d6bSBaptiste Daroussin * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 58*61d06d6bSBaptiste Daroussin */ 59*61d06d6bSBaptiste Daroussin int 60*61d06d6bSBaptiste Daroussin mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 61*61d06d6bSBaptiste Daroussin { 62*61d06d6bSBaptiste Daroussin 63*61d06d6bSBaptiste Daroussin if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 64*61d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_NEWLINE; 65*61d06d6bSBaptiste Daroussin 66*61d06d6bSBaptiste Daroussin /* 67*61d06d6bSBaptiste Daroussin * Let the roff nS register switch SYNOPSIS mode early, 68*61d06d6bSBaptiste Daroussin * such that the parser knows at all times 69*61d06d6bSBaptiste Daroussin * whether this mode is on or off. 70*61d06d6bSBaptiste Daroussin * Note that this mode is also switched by the Sh macro. 71*61d06d6bSBaptiste Daroussin */ 72*61d06d6bSBaptiste Daroussin if (roff_getreg(mdoc->roff, "nS")) 73*61d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_SYNOPSIS; 74*61d06d6bSBaptiste Daroussin else 75*61d06d6bSBaptiste Daroussin mdoc->flags &= ~MDOC_SYNOPSIS; 76*61d06d6bSBaptiste Daroussin 77*61d06d6bSBaptiste Daroussin return roff_getcontrol(mdoc->roff, buf, &offs) ? 78*61d06d6bSBaptiste Daroussin mdoc_pmacro(mdoc, ln, buf, offs) : 79*61d06d6bSBaptiste Daroussin mdoc_ptext(mdoc, ln, buf, offs); 80*61d06d6bSBaptiste Daroussin } 81*61d06d6bSBaptiste Daroussin 82*61d06d6bSBaptiste Daroussin void 83*61d06d6bSBaptiste Daroussin mdoc_macro(MACRO_PROT_ARGS) 84*61d06d6bSBaptiste Daroussin { 85*61d06d6bSBaptiste Daroussin assert(tok >= MDOC_Dd && tok < MDOC_MAX); 86*61d06d6bSBaptiste Daroussin (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 87*61d06d6bSBaptiste Daroussin } 88*61d06d6bSBaptiste Daroussin 89*61d06d6bSBaptiste Daroussin void 90*61d06d6bSBaptiste Daroussin mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, enum roff_tok tok) 91*61d06d6bSBaptiste Daroussin { 92*61d06d6bSBaptiste Daroussin struct roff_node *p; 93*61d06d6bSBaptiste Daroussin 94*61d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 95*61d06d6bSBaptiste Daroussin roff_node_append(mdoc, p); 96*61d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_CHILD; 97*61d06d6bSBaptiste Daroussin } 98*61d06d6bSBaptiste Daroussin 99*61d06d6bSBaptiste Daroussin struct roff_node * 100*61d06d6bSBaptiste Daroussin mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, 101*61d06d6bSBaptiste Daroussin enum roff_tok tok, struct roff_node *body) 102*61d06d6bSBaptiste Daroussin { 103*61d06d6bSBaptiste Daroussin struct roff_node *p; 104*61d06d6bSBaptiste Daroussin 105*61d06d6bSBaptiste Daroussin body->flags |= NODE_ENDED; 106*61d06d6bSBaptiste Daroussin body->parent->flags |= NODE_ENDED; 107*61d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 108*61d06d6bSBaptiste Daroussin p->body = body; 109*61d06d6bSBaptiste Daroussin p->norm = body->norm; 110*61d06d6bSBaptiste Daroussin p->end = ENDBODY_SPACE; 111*61d06d6bSBaptiste Daroussin roff_node_append(mdoc, p); 112*61d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_SIBLING; 113*61d06d6bSBaptiste Daroussin return p; 114*61d06d6bSBaptiste Daroussin } 115*61d06d6bSBaptiste Daroussin 116*61d06d6bSBaptiste Daroussin struct roff_node * 117*61d06d6bSBaptiste Daroussin mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 118*61d06d6bSBaptiste Daroussin enum roff_tok tok, struct mdoc_arg *args) 119*61d06d6bSBaptiste Daroussin { 120*61d06d6bSBaptiste Daroussin struct roff_node *p; 121*61d06d6bSBaptiste Daroussin 122*61d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 123*61d06d6bSBaptiste Daroussin p->args = args; 124*61d06d6bSBaptiste Daroussin if (p->args) 125*61d06d6bSBaptiste Daroussin (args->refcnt)++; 126*61d06d6bSBaptiste Daroussin 127*61d06d6bSBaptiste Daroussin switch (tok) { 128*61d06d6bSBaptiste Daroussin case MDOC_Bd: 129*61d06d6bSBaptiste Daroussin case MDOC_Bf: 130*61d06d6bSBaptiste Daroussin case MDOC_Bl: 131*61d06d6bSBaptiste Daroussin case MDOC_En: 132*61d06d6bSBaptiste Daroussin case MDOC_Rs: 133*61d06d6bSBaptiste Daroussin p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 134*61d06d6bSBaptiste Daroussin break; 135*61d06d6bSBaptiste Daroussin default: 136*61d06d6bSBaptiste Daroussin break; 137*61d06d6bSBaptiste Daroussin } 138*61d06d6bSBaptiste Daroussin roff_node_append(mdoc, p); 139*61d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_CHILD; 140*61d06d6bSBaptiste Daroussin return p; 141*61d06d6bSBaptiste Daroussin } 142*61d06d6bSBaptiste Daroussin 143*61d06d6bSBaptiste Daroussin void 144*61d06d6bSBaptiste Daroussin mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 145*61d06d6bSBaptiste Daroussin enum roff_tok tok, struct mdoc_arg *args) 146*61d06d6bSBaptiste Daroussin { 147*61d06d6bSBaptiste Daroussin struct roff_node *p; 148*61d06d6bSBaptiste Daroussin 149*61d06d6bSBaptiste Daroussin p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 150*61d06d6bSBaptiste Daroussin p->args = args; 151*61d06d6bSBaptiste Daroussin if (p->args) 152*61d06d6bSBaptiste Daroussin (args->refcnt)++; 153*61d06d6bSBaptiste Daroussin 154*61d06d6bSBaptiste Daroussin switch (tok) { 155*61d06d6bSBaptiste Daroussin case MDOC_An: 156*61d06d6bSBaptiste Daroussin p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 157*61d06d6bSBaptiste Daroussin break; 158*61d06d6bSBaptiste Daroussin default: 159*61d06d6bSBaptiste Daroussin break; 160*61d06d6bSBaptiste Daroussin } 161*61d06d6bSBaptiste Daroussin roff_node_append(mdoc, p); 162*61d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_CHILD; 163*61d06d6bSBaptiste Daroussin } 164*61d06d6bSBaptiste Daroussin 165*61d06d6bSBaptiste Daroussin void 166*61d06d6bSBaptiste Daroussin mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 167*61d06d6bSBaptiste Daroussin { 168*61d06d6bSBaptiste Daroussin 169*61d06d6bSBaptiste Daroussin roff_node_unlink(mdoc, p); 170*61d06d6bSBaptiste Daroussin p->prev = p->next = NULL; 171*61d06d6bSBaptiste Daroussin roff_node_append(mdoc, p); 172*61d06d6bSBaptiste Daroussin } 173*61d06d6bSBaptiste Daroussin 174*61d06d6bSBaptiste Daroussin /* 175*61d06d6bSBaptiste Daroussin * Parse free-form text, that is, a line that does not begin with the 176*61d06d6bSBaptiste Daroussin * control character. 177*61d06d6bSBaptiste Daroussin */ 178*61d06d6bSBaptiste Daroussin static int 179*61d06d6bSBaptiste Daroussin mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 180*61d06d6bSBaptiste Daroussin { 181*61d06d6bSBaptiste Daroussin struct roff_node *n; 182*61d06d6bSBaptiste Daroussin const char *cp, *sp; 183*61d06d6bSBaptiste Daroussin char *c, *ws, *end; 184*61d06d6bSBaptiste Daroussin 185*61d06d6bSBaptiste Daroussin n = mdoc->last; 186*61d06d6bSBaptiste Daroussin 187*61d06d6bSBaptiste Daroussin /* 188*61d06d6bSBaptiste Daroussin * If a column list contains plain text, assume an implicit item 189*61d06d6bSBaptiste Daroussin * macro. This can happen one or more times at the beginning 190*61d06d6bSBaptiste Daroussin * of such a list, intermixed with non-It mdoc macros and with 191*61d06d6bSBaptiste Daroussin * nodes generated on the roff level, for example by tbl. 192*61d06d6bSBaptiste Daroussin */ 193*61d06d6bSBaptiste Daroussin 194*61d06d6bSBaptiste Daroussin if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 195*61d06d6bSBaptiste Daroussin n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 196*61d06d6bSBaptiste Daroussin (n->parent != NULL && n->parent->tok == MDOC_Bl && 197*61d06d6bSBaptiste Daroussin n->parent->norm->Bl.type == LIST_column)) { 198*61d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_FREECOL; 199*61d06d6bSBaptiste Daroussin mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 200*61d06d6bSBaptiste Daroussin return 1; 201*61d06d6bSBaptiste Daroussin } 202*61d06d6bSBaptiste Daroussin 203*61d06d6bSBaptiste Daroussin /* 204*61d06d6bSBaptiste Daroussin * Search for the beginning of unescaped trailing whitespace (ws) 205*61d06d6bSBaptiste Daroussin * and for the first character not to be output (end). 206*61d06d6bSBaptiste Daroussin */ 207*61d06d6bSBaptiste Daroussin 208*61d06d6bSBaptiste Daroussin /* FIXME: replace with strcspn(). */ 209*61d06d6bSBaptiste Daroussin ws = NULL; 210*61d06d6bSBaptiste Daroussin for (c = end = buf + offs; *c; c++) { 211*61d06d6bSBaptiste Daroussin switch (*c) { 212*61d06d6bSBaptiste Daroussin case ' ': 213*61d06d6bSBaptiste Daroussin if (NULL == ws) 214*61d06d6bSBaptiste Daroussin ws = c; 215*61d06d6bSBaptiste Daroussin continue; 216*61d06d6bSBaptiste Daroussin case '\t': 217*61d06d6bSBaptiste Daroussin /* 218*61d06d6bSBaptiste Daroussin * Always warn about trailing tabs, 219*61d06d6bSBaptiste Daroussin * even outside literal context, 220*61d06d6bSBaptiste Daroussin * where they should be put on the next line. 221*61d06d6bSBaptiste Daroussin */ 222*61d06d6bSBaptiste Daroussin if (NULL == ws) 223*61d06d6bSBaptiste Daroussin ws = c; 224*61d06d6bSBaptiste Daroussin /* 225*61d06d6bSBaptiste Daroussin * Strip trailing tabs in literal context only; 226*61d06d6bSBaptiste Daroussin * outside, they affect the next line. 227*61d06d6bSBaptiste Daroussin */ 228*61d06d6bSBaptiste Daroussin if (MDOC_LITERAL & mdoc->flags) 229*61d06d6bSBaptiste Daroussin continue; 230*61d06d6bSBaptiste Daroussin break; 231*61d06d6bSBaptiste Daroussin case '\\': 232*61d06d6bSBaptiste Daroussin /* Skip the escaped character, too, if any. */ 233*61d06d6bSBaptiste Daroussin if (c[1]) 234*61d06d6bSBaptiste Daroussin c++; 235*61d06d6bSBaptiste Daroussin /* FALLTHROUGH */ 236*61d06d6bSBaptiste Daroussin default: 237*61d06d6bSBaptiste Daroussin ws = NULL; 238*61d06d6bSBaptiste Daroussin break; 239*61d06d6bSBaptiste Daroussin } 240*61d06d6bSBaptiste Daroussin end = c + 1; 241*61d06d6bSBaptiste Daroussin } 242*61d06d6bSBaptiste Daroussin *end = '\0'; 243*61d06d6bSBaptiste Daroussin 244*61d06d6bSBaptiste Daroussin if (ws) 245*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 246*61d06d6bSBaptiste Daroussin line, (int)(ws-buf), NULL); 247*61d06d6bSBaptiste Daroussin 248*61d06d6bSBaptiste Daroussin /* 249*61d06d6bSBaptiste Daroussin * Blank lines are allowed in no-fill mode 250*61d06d6bSBaptiste Daroussin * and cancel preceding \c, 251*61d06d6bSBaptiste Daroussin * but add a single vertical space elsewhere. 252*61d06d6bSBaptiste Daroussin */ 253*61d06d6bSBaptiste Daroussin 254*61d06d6bSBaptiste Daroussin if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 255*61d06d6bSBaptiste Daroussin switch (mdoc->last->type) { 256*61d06d6bSBaptiste Daroussin case ROFFT_TEXT: 257*61d06d6bSBaptiste Daroussin sp = mdoc->last->string; 258*61d06d6bSBaptiste Daroussin cp = end = strchr(sp, '\0') - 2; 259*61d06d6bSBaptiste Daroussin if (cp < sp || cp[0] != '\\' || cp[1] != 'c') 260*61d06d6bSBaptiste Daroussin break; 261*61d06d6bSBaptiste Daroussin while (cp > sp && cp[-1] == '\\') 262*61d06d6bSBaptiste Daroussin cp--; 263*61d06d6bSBaptiste Daroussin if ((end - cp) % 2) 264*61d06d6bSBaptiste Daroussin break; 265*61d06d6bSBaptiste Daroussin *end = '\0'; 266*61d06d6bSBaptiste Daroussin return 1; 267*61d06d6bSBaptiste Daroussin default: 268*61d06d6bSBaptiste Daroussin break; 269*61d06d6bSBaptiste Daroussin } 270*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 271*61d06d6bSBaptiste Daroussin line, (int)(c - buf), NULL); 272*61d06d6bSBaptiste Daroussin roff_elem_alloc(mdoc, line, offs, ROFF_sp); 273*61d06d6bSBaptiste Daroussin mdoc->last->flags |= NODE_VALID | NODE_ENDED; 274*61d06d6bSBaptiste Daroussin mdoc->next = ROFF_NEXT_SIBLING; 275*61d06d6bSBaptiste Daroussin return 1; 276*61d06d6bSBaptiste Daroussin } 277*61d06d6bSBaptiste Daroussin 278*61d06d6bSBaptiste Daroussin roff_word_alloc(mdoc, line, offs, buf+offs); 279*61d06d6bSBaptiste Daroussin 280*61d06d6bSBaptiste Daroussin if (mdoc->flags & MDOC_LITERAL) 281*61d06d6bSBaptiste Daroussin return 1; 282*61d06d6bSBaptiste Daroussin 283*61d06d6bSBaptiste Daroussin /* 284*61d06d6bSBaptiste Daroussin * End-of-sentence check. If the last character is an unescaped 285*61d06d6bSBaptiste Daroussin * EOS character, then flag the node as being the end of a 286*61d06d6bSBaptiste Daroussin * sentence. The front-end will know how to interpret this. 287*61d06d6bSBaptiste Daroussin */ 288*61d06d6bSBaptiste Daroussin 289*61d06d6bSBaptiste Daroussin assert(buf < end); 290*61d06d6bSBaptiste Daroussin 291*61d06d6bSBaptiste Daroussin if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 292*61d06d6bSBaptiste Daroussin mdoc->last->flags |= NODE_EOS; 293*61d06d6bSBaptiste Daroussin 294*61d06d6bSBaptiste Daroussin for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { 295*61d06d6bSBaptiste Daroussin if (c - buf < offs + 2) 296*61d06d6bSBaptiste Daroussin continue; 297*61d06d6bSBaptiste Daroussin if (end - c < 3) 298*61d06d6bSBaptiste Daroussin break; 299*61d06d6bSBaptiste Daroussin if (c[1] != ' ' || 300*61d06d6bSBaptiste Daroussin isalnum((unsigned char)c[-2]) == 0 || 301*61d06d6bSBaptiste Daroussin isalnum((unsigned char)c[-1]) == 0 || 302*61d06d6bSBaptiste Daroussin (c[-2] == 'n' && c[-1] == 'c') || 303*61d06d6bSBaptiste Daroussin (c[-2] == 'v' && c[-1] == 's')) 304*61d06d6bSBaptiste Daroussin continue; 305*61d06d6bSBaptiste Daroussin c += 2; 306*61d06d6bSBaptiste Daroussin if (*c == ' ') 307*61d06d6bSBaptiste Daroussin c++; 308*61d06d6bSBaptiste Daroussin if (*c == ' ') 309*61d06d6bSBaptiste Daroussin c++; 310*61d06d6bSBaptiste Daroussin if (isupper((unsigned char)(*c))) 311*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_EOS, mdoc->parse, 312*61d06d6bSBaptiste Daroussin line, (int)(c - buf), NULL); 313*61d06d6bSBaptiste Daroussin } 314*61d06d6bSBaptiste Daroussin 315*61d06d6bSBaptiste Daroussin return 1; 316*61d06d6bSBaptiste Daroussin } 317*61d06d6bSBaptiste Daroussin 318*61d06d6bSBaptiste Daroussin /* 319*61d06d6bSBaptiste Daroussin * Parse a macro line, that is, a line beginning with the control 320*61d06d6bSBaptiste Daroussin * character. 321*61d06d6bSBaptiste Daroussin */ 322*61d06d6bSBaptiste Daroussin static int 323*61d06d6bSBaptiste Daroussin mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 324*61d06d6bSBaptiste Daroussin { 325*61d06d6bSBaptiste Daroussin struct roff_node *n; 326*61d06d6bSBaptiste Daroussin const char *cp; 327*61d06d6bSBaptiste Daroussin size_t sz; 328*61d06d6bSBaptiste Daroussin enum roff_tok tok; 329*61d06d6bSBaptiste Daroussin int sv; 330*61d06d6bSBaptiste Daroussin 331*61d06d6bSBaptiste Daroussin /* Determine the line macro. */ 332*61d06d6bSBaptiste Daroussin 333*61d06d6bSBaptiste Daroussin sv = offs; 334*61d06d6bSBaptiste Daroussin tok = TOKEN_NONE; 335*61d06d6bSBaptiste Daroussin for (sz = 0; sz < 4 && strchr(" \t\\", buf[offs]) == NULL; sz++) 336*61d06d6bSBaptiste Daroussin offs++; 337*61d06d6bSBaptiste Daroussin if (sz == 2 || sz == 3) 338*61d06d6bSBaptiste Daroussin tok = roffhash_find(mdoc->mdocmac, buf + sv, sz); 339*61d06d6bSBaptiste Daroussin if (tok == TOKEN_NONE) { 340*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 341*61d06d6bSBaptiste Daroussin ln, sv, buf + sv - 1); 342*61d06d6bSBaptiste Daroussin return 1; 343*61d06d6bSBaptiste Daroussin } 344*61d06d6bSBaptiste Daroussin 345*61d06d6bSBaptiste Daroussin /* Skip a leading escape sequence or tab. */ 346*61d06d6bSBaptiste Daroussin 347*61d06d6bSBaptiste Daroussin switch (buf[offs]) { 348*61d06d6bSBaptiste Daroussin case '\\': 349*61d06d6bSBaptiste Daroussin cp = buf + offs + 1; 350*61d06d6bSBaptiste Daroussin mandoc_escape(&cp, NULL, NULL); 351*61d06d6bSBaptiste Daroussin offs = cp - buf; 352*61d06d6bSBaptiste Daroussin break; 353*61d06d6bSBaptiste Daroussin case '\t': 354*61d06d6bSBaptiste Daroussin offs++; 355*61d06d6bSBaptiste Daroussin break; 356*61d06d6bSBaptiste Daroussin default: 357*61d06d6bSBaptiste Daroussin break; 358*61d06d6bSBaptiste Daroussin } 359*61d06d6bSBaptiste Daroussin 360*61d06d6bSBaptiste Daroussin /* Jump to the next non-whitespace word. */ 361*61d06d6bSBaptiste Daroussin 362*61d06d6bSBaptiste Daroussin while (buf[offs] == ' ') 363*61d06d6bSBaptiste Daroussin offs++; 364*61d06d6bSBaptiste Daroussin 365*61d06d6bSBaptiste Daroussin /* 366*61d06d6bSBaptiste Daroussin * Trailing whitespace. Note that tabs are allowed to be passed 367*61d06d6bSBaptiste Daroussin * into the parser as "text", so we only warn about spaces here. 368*61d06d6bSBaptiste Daroussin */ 369*61d06d6bSBaptiste Daroussin 370*61d06d6bSBaptiste Daroussin if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 371*61d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 372*61d06d6bSBaptiste Daroussin ln, offs - 1, NULL); 373*61d06d6bSBaptiste Daroussin 374*61d06d6bSBaptiste Daroussin /* 375*61d06d6bSBaptiste Daroussin * If an initial macro or a list invocation, divert directly 376*61d06d6bSBaptiste Daroussin * into macro processing. 377*61d06d6bSBaptiste Daroussin */ 378*61d06d6bSBaptiste Daroussin 379*61d06d6bSBaptiste Daroussin n = mdoc->last; 380*61d06d6bSBaptiste Daroussin if (n == NULL || tok == MDOC_It || tok == MDOC_El) { 381*61d06d6bSBaptiste Daroussin mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 382*61d06d6bSBaptiste Daroussin return 1; 383*61d06d6bSBaptiste Daroussin } 384*61d06d6bSBaptiste Daroussin 385*61d06d6bSBaptiste Daroussin /* 386*61d06d6bSBaptiste Daroussin * If a column list contains a non-It macro, assume an implicit 387*61d06d6bSBaptiste Daroussin * item macro. This can happen one or more times at the 388*61d06d6bSBaptiste Daroussin * beginning of such a list, intermixed with text lines and 389*61d06d6bSBaptiste Daroussin * with nodes generated on the roff level, for example by tbl. 390*61d06d6bSBaptiste Daroussin */ 391*61d06d6bSBaptiste Daroussin 392*61d06d6bSBaptiste Daroussin if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 393*61d06d6bSBaptiste Daroussin n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 394*61d06d6bSBaptiste Daroussin (n->parent != NULL && n->parent->tok == MDOC_Bl && 395*61d06d6bSBaptiste Daroussin n->parent->norm->Bl.type == LIST_column)) { 396*61d06d6bSBaptiste Daroussin mdoc->flags |= MDOC_FREECOL; 397*61d06d6bSBaptiste Daroussin mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 398*61d06d6bSBaptiste Daroussin return 1; 399*61d06d6bSBaptiste Daroussin } 400*61d06d6bSBaptiste Daroussin 401*61d06d6bSBaptiste Daroussin /* Normal processing of a macro. */ 402*61d06d6bSBaptiste Daroussin 403*61d06d6bSBaptiste Daroussin mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 404*61d06d6bSBaptiste Daroussin 405*61d06d6bSBaptiste Daroussin /* In quick mode (for mandocdb), abort after the NAME section. */ 406*61d06d6bSBaptiste Daroussin 407*61d06d6bSBaptiste Daroussin if (mdoc->quick && MDOC_Sh == tok && 408*61d06d6bSBaptiste Daroussin SEC_NAME != mdoc->last->sec) 409*61d06d6bSBaptiste Daroussin return 2; 410*61d06d6bSBaptiste Daroussin 411*61d06d6bSBaptiste Daroussin return 1; 412*61d06d6bSBaptiste Daroussin } 413*61d06d6bSBaptiste Daroussin 414*61d06d6bSBaptiste Daroussin enum mdelim 415*61d06d6bSBaptiste Daroussin mdoc_isdelim(const char *p) 416*61d06d6bSBaptiste Daroussin { 417*61d06d6bSBaptiste Daroussin 418*61d06d6bSBaptiste Daroussin if ('\0' == p[0]) 419*61d06d6bSBaptiste Daroussin return DELIM_NONE; 420*61d06d6bSBaptiste Daroussin 421*61d06d6bSBaptiste Daroussin if ('\0' == p[1]) 422*61d06d6bSBaptiste Daroussin switch (p[0]) { 423*61d06d6bSBaptiste Daroussin case '(': 424*61d06d6bSBaptiste Daroussin case '[': 425*61d06d6bSBaptiste Daroussin return DELIM_OPEN; 426*61d06d6bSBaptiste Daroussin case '|': 427*61d06d6bSBaptiste Daroussin return DELIM_MIDDLE; 428*61d06d6bSBaptiste Daroussin case '.': 429*61d06d6bSBaptiste Daroussin case ',': 430*61d06d6bSBaptiste Daroussin case ';': 431*61d06d6bSBaptiste Daroussin case ':': 432*61d06d6bSBaptiste Daroussin case '?': 433*61d06d6bSBaptiste Daroussin case '!': 434*61d06d6bSBaptiste Daroussin case ')': 435*61d06d6bSBaptiste Daroussin case ']': 436*61d06d6bSBaptiste Daroussin return DELIM_CLOSE; 437*61d06d6bSBaptiste Daroussin default: 438*61d06d6bSBaptiste Daroussin return DELIM_NONE; 439*61d06d6bSBaptiste Daroussin } 440*61d06d6bSBaptiste Daroussin 441*61d06d6bSBaptiste Daroussin if ('\\' != p[0]) 442*61d06d6bSBaptiste Daroussin return DELIM_NONE; 443*61d06d6bSBaptiste Daroussin 444*61d06d6bSBaptiste Daroussin if (0 == strcmp(p + 1, ".")) 445*61d06d6bSBaptiste Daroussin return DELIM_CLOSE; 446*61d06d6bSBaptiste Daroussin if (0 == strcmp(p + 1, "fR|\\fP")) 447*61d06d6bSBaptiste Daroussin return DELIM_MIDDLE; 448*61d06d6bSBaptiste Daroussin 449*61d06d6bSBaptiste Daroussin return DELIM_NONE; 450*61d06d6bSBaptiste Daroussin } 451*61d06d6bSBaptiste Daroussin 452*61d06d6bSBaptiste Daroussin void 453*61d06d6bSBaptiste Daroussin mdoc_validate(struct roff_man *mdoc) 454*61d06d6bSBaptiste Daroussin { 455*61d06d6bSBaptiste Daroussin 456*61d06d6bSBaptiste Daroussin mdoc->last = mdoc->first; 457*61d06d6bSBaptiste Daroussin mdoc_node_validate(mdoc); 458*61d06d6bSBaptiste Daroussin mdoc_state_reset(mdoc); 459*61d06d6bSBaptiste Daroussin } 460