1*260e9a87SYuri Pankov /* $Id: man.c,v 1.149 2015/01/30 21:28:46 schwarze Exp $ */ 295c635efSGarrett D'Amore /* 395c635efSGarrett D'Amore * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*260e9a87SYuri Pankov * Copyright (c) 2013, 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> 5*260e9a87SYuri Pankov * Copyright (c) 2011 Joerg Sonnenberger <joerg@netbsd.org> 695c635efSGarrett D'Amore * 795c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 895c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 995c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 1095c635efSGarrett D'Amore * 1195c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1295c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1395c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1495c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1595c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1695c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1795c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1895c635efSGarrett D'Amore */ 1995c635efSGarrett D'Amore #include "config.h" 2095c635efSGarrett D'Amore 2195c635efSGarrett D'Amore #include <sys/types.h> 2295c635efSGarrett D'Amore 2395c635efSGarrett D'Amore #include <assert.h> 24*260e9a87SYuri Pankov #include <ctype.h> 2595c635efSGarrett D'Amore #include <stdarg.h> 2695c635efSGarrett D'Amore #include <stdlib.h> 2795c635efSGarrett D'Amore #include <stdio.h> 2895c635efSGarrett D'Amore #include <string.h> 2995c635efSGarrett D'Amore 3095c635efSGarrett D'Amore #include "man.h" 3195c635efSGarrett D'Amore #include "mandoc.h" 32*260e9a87SYuri Pankov #include "mandoc_aux.h" 3395c635efSGarrett D'Amore #include "libman.h" 3495c635efSGarrett D'Amore #include "libmandoc.h" 3595c635efSGarrett D'Amore 3695c635efSGarrett D'Amore const char *const __man_macronames[MAN_MAX] = { 3795c635efSGarrett D'Amore "br", "TH", "SH", "SS", 3895c635efSGarrett D'Amore "TP", "LP", "PP", "P", 3995c635efSGarrett D'Amore "IP", "HP", "SM", "SB", 4095c635efSGarrett D'Amore "BI", "IB", "BR", "RB", 4195c635efSGarrett D'Amore "R", "B", "I", "IR", 42*260e9a87SYuri Pankov "RI", "sp", "nf", 4395c635efSGarrett D'Amore "fi", "RE", "RS", "DT", 4495c635efSGarrett D'Amore "UC", "PD", "AT", "in", 45698f87a4SGarrett D'Amore "ft", "OP", "EX", "EE", 46*260e9a87SYuri Pankov "UR", "UE", "ll" 4795c635efSGarrett D'Amore }; 4895c635efSGarrett D'Amore 4995c635efSGarrett D'Amore const char * const *man_macronames = __man_macronames; 5095c635efSGarrett D'Amore 51*260e9a87SYuri Pankov static void man_alloc1(struct man *); 52*260e9a87SYuri Pankov static void man_breakscope(struct man *, enum mant); 53*260e9a87SYuri Pankov static void man_descope(struct man *, int, int); 54*260e9a87SYuri Pankov static void man_free1(struct man *); 5595c635efSGarrett D'Amore static struct man_node *man_node_alloc(struct man *, int, int, 5695c635efSGarrett D'Amore enum man_type, enum mant); 57*260e9a87SYuri Pankov static void man_node_append(struct man *, struct man_node *); 5895c635efSGarrett D'Amore static void man_node_free(struct man_node *); 5995c635efSGarrett D'Amore static void man_node_unlink(struct man *, 6095c635efSGarrett D'Amore struct man_node *); 6195c635efSGarrett D'Amore static int man_ptext(struct man *, int, char *, int); 6295c635efSGarrett D'Amore static int man_pmacro(struct man *, int, char *, int); 6395c635efSGarrett D'Amore 6495c635efSGarrett D'Amore 6595c635efSGarrett D'Amore const struct man_node * 66698f87a4SGarrett D'Amore man_node(const struct man *man) 6795c635efSGarrett D'Amore { 6895c635efSGarrett D'Amore 69698f87a4SGarrett D'Amore return(man->first); 7095c635efSGarrett D'Amore } 7195c635efSGarrett D'Amore 7295c635efSGarrett D'Amore const struct man_meta * 73698f87a4SGarrett D'Amore man_meta(const struct man *man) 7495c635efSGarrett D'Amore { 7595c635efSGarrett D'Amore 76698f87a4SGarrett D'Amore return(&man->meta); 7795c635efSGarrett D'Amore } 7895c635efSGarrett D'Amore 7995c635efSGarrett D'Amore void 8095c635efSGarrett D'Amore man_reset(struct man *man) 8195c635efSGarrett D'Amore { 8295c635efSGarrett D'Amore 8395c635efSGarrett D'Amore man_free1(man); 8495c635efSGarrett D'Amore man_alloc1(man); 8595c635efSGarrett D'Amore } 8695c635efSGarrett D'Amore 8795c635efSGarrett D'Amore void 8895c635efSGarrett D'Amore man_free(struct man *man) 8995c635efSGarrett D'Amore { 9095c635efSGarrett D'Amore 9195c635efSGarrett D'Amore man_free1(man); 9295c635efSGarrett D'Amore free(man); 9395c635efSGarrett D'Amore } 9495c635efSGarrett D'Amore 9595c635efSGarrett D'Amore struct man * 96*260e9a87SYuri Pankov man_alloc(struct roff *roff, struct mparse *parse, 97*260e9a87SYuri Pankov const char *defos, int quick) 9895c635efSGarrett D'Amore { 9995c635efSGarrett D'Amore struct man *p; 10095c635efSGarrett D'Amore 10195c635efSGarrett D'Amore p = mandoc_calloc(1, sizeof(struct man)); 10295c635efSGarrett D'Amore 10395c635efSGarrett D'Amore man_hash_init(); 10495c635efSGarrett D'Amore p->parse = parse; 105*260e9a87SYuri Pankov p->defos = defos; 106*260e9a87SYuri Pankov p->quick = quick; 10795c635efSGarrett D'Amore p->roff = roff; 10895c635efSGarrett D'Amore 10995c635efSGarrett D'Amore man_alloc1(p); 11095c635efSGarrett D'Amore return(p); 11195c635efSGarrett D'Amore } 11295c635efSGarrett D'Amore 113*260e9a87SYuri Pankov void 114698f87a4SGarrett D'Amore man_endparse(struct man *man) 11595c635efSGarrett D'Amore { 11695c635efSGarrett D'Amore 117*260e9a87SYuri Pankov man_macroend(man); 11895c635efSGarrett D'Amore } 11995c635efSGarrett D'Amore 12095c635efSGarrett D'Amore int 121698f87a4SGarrett D'Amore man_parseln(struct man *man, int ln, char *buf, int offs) 12295c635efSGarrett D'Amore { 12395c635efSGarrett D'Amore 124*260e9a87SYuri Pankov if (man->last->type != MAN_EQN || ln > man->last->line) 125698f87a4SGarrett D'Amore man->flags |= MAN_NEWLINE; 12695c635efSGarrett D'Amore 127698f87a4SGarrett D'Amore return (roff_getcontrol(man->roff, buf, &offs) ? 128698f87a4SGarrett D'Amore man_pmacro(man, ln, buf, offs) : 129698f87a4SGarrett D'Amore man_ptext(man, ln, buf, offs)); 13095c635efSGarrett D'Amore } 13195c635efSGarrett D'Amore 13295c635efSGarrett D'Amore static void 13395c635efSGarrett D'Amore man_free1(struct man *man) 13495c635efSGarrett D'Amore { 13595c635efSGarrett D'Amore 13695c635efSGarrett D'Amore if (man->first) 13795c635efSGarrett D'Amore man_node_delete(man, man->first); 13895c635efSGarrett D'Amore free(man->meta.title); 13995c635efSGarrett D'Amore free(man->meta.source); 14095c635efSGarrett D'Amore free(man->meta.date); 14195c635efSGarrett D'Amore free(man->meta.vol); 14295c635efSGarrett D'Amore free(man->meta.msec); 14395c635efSGarrett D'Amore } 14495c635efSGarrett D'Amore 14595c635efSGarrett D'Amore static void 146698f87a4SGarrett D'Amore man_alloc1(struct man *man) 14795c635efSGarrett D'Amore { 14895c635efSGarrett D'Amore 149698f87a4SGarrett D'Amore memset(&man->meta, 0, sizeof(struct man_meta)); 150698f87a4SGarrett D'Amore man->flags = 0; 151698f87a4SGarrett D'Amore man->last = mandoc_calloc(1, sizeof(struct man_node)); 152698f87a4SGarrett D'Amore man->first = man->last; 153698f87a4SGarrett D'Amore man->last->type = MAN_ROOT; 154698f87a4SGarrett D'Amore man->last->tok = MAN_MAX; 155698f87a4SGarrett D'Amore man->next = MAN_NEXT_CHILD; 15695c635efSGarrett D'Amore } 15795c635efSGarrett D'Amore 15895c635efSGarrett D'Amore 159*260e9a87SYuri Pankov static void 16095c635efSGarrett D'Amore man_node_append(struct man *man, struct man_node *p) 16195c635efSGarrett D'Amore { 16295c635efSGarrett D'Amore 16395c635efSGarrett D'Amore assert(man->last); 16495c635efSGarrett D'Amore assert(man->first); 165*260e9a87SYuri Pankov assert(p->type != MAN_ROOT); 16695c635efSGarrett D'Amore 16795c635efSGarrett D'Amore switch (man->next) { 168*260e9a87SYuri Pankov case MAN_NEXT_SIBLING: 16995c635efSGarrett D'Amore man->last->next = p; 17095c635efSGarrett D'Amore p->prev = man->last; 17195c635efSGarrett D'Amore p->parent = man->last->parent; 17295c635efSGarrett D'Amore break; 173*260e9a87SYuri Pankov case MAN_NEXT_CHILD: 17495c635efSGarrett D'Amore man->last->child = p; 17595c635efSGarrett D'Amore p->parent = man->last; 17695c635efSGarrett D'Amore break; 17795c635efSGarrett D'Amore default: 17895c635efSGarrett D'Amore abort(); 17995c635efSGarrett D'Amore /* NOTREACHED */ 18095c635efSGarrett D'Amore } 18195c635efSGarrett D'Amore 18295c635efSGarrett D'Amore assert(p->parent); 18395c635efSGarrett D'Amore p->parent->nchild++; 18495c635efSGarrett D'Amore 18595c635efSGarrett D'Amore switch (p->type) { 186*260e9a87SYuri Pankov case MAN_BLOCK: 187*260e9a87SYuri Pankov if (p->tok == MAN_SH || p->tok == MAN_SS) 188*260e9a87SYuri Pankov man->flags &= ~MAN_LITERAL; 189*260e9a87SYuri Pankov break; 190*260e9a87SYuri Pankov case MAN_HEAD: 191*260e9a87SYuri Pankov assert(p->parent->type == MAN_BLOCK); 19295c635efSGarrett D'Amore p->parent->head = p; 19395c635efSGarrett D'Amore break; 194*260e9a87SYuri Pankov case MAN_BODY: 195*260e9a87SYuri Pankov assert(p->parent->type == MAN_BLOCK); 19695c635efSGarrett D'Amore p->parent->body = p; 19795c635efSGarrett D'Amore break; 19895c635efSGarrett D'Amore default: 19995c635efSGarrett D'Amore break; 20095c635efSGarrett D'Amore } 20195c635efSGarrett D'Amore 20295c635efSGarrett D'Amore man->last = p; 20395c635efSGarrett D'Amore 20495c635efSGarrett D'Amore switch (p->type) { 205*260e9a87SYuri Pankov case MAN_TBL: 20695c635efSGarrett D'Amore /* FALLTHROUGH */ 207*260e9a87SYuri Pankov case MAN_TEXT: 208*260e9a87SYuri Pankov man_valid_post(man); 20995c635efSGarrett D'Amore break; 21095c635efSGarrett D'Amore default: 21195c635efSGarrett D'Amore break; 21295c635efSGarrett D'Amore } 21395c635efSGarrett D'Amore } 21495c635efSGarrett D'Amore 21595c635efSGarrett D'Amore static struct man_node * 216698f87a4SGarrett D'Amore man_node_alloc(struct man *man, int line, int pos, 21795c635efSGarrett D'Amore enum man_type type, enum mant tok) 21895c635efSGarrett D'Amore { 21995c635efSGarrett D'Amore struct man_node *p; 22095c635efSGarrett D'Amore 22195c635efSGarrett D'Amore p = mandoc_calloc(1, sizeof(struct man_node)); 22295c635efSGarrett D'Amore p->line = line; 22395c635efSGarrett D'Amore p->pos = pos; 22495c635efSGarrett D'Amore p->type = type; 22595c635efSGarrett D'Amore p->tok = tok; 22695c635efSGarrett D'Amore 227*260e9a87SYuri Pankov if (man->flags & MAN_NEWLINE) 22895c635efSGarrett D'Amore p->flags |= MAN_LINE; 229698f87a4SGarrett D'Amore man->flags &= ~MAN_NEWLINE; 23095c635efSGarrett D'Amore return(p); 23195c635efSGarrett D'Amore } 23295c635efSGarrett D'Amore 233*260e9a87SYuri Pankov void 234698f87a4SGarrett D'Amore man_elem_alloc(struct man *man, int line, int pos, enum mant tok) 23595c635efSGarrett D'Amore { 23695c635efSGarrett D'Amore struct man_node *p; 23795c635efSGarrett D'Amore 238698f87a4SGarrett D'Amore p = man_node_alloc(man, line, pos, MAN_ELEM, tok); 239*260e9a87SYuri Pankov man_node_append(man, p); 240698f87a4SGarrett D'Amore man->next = MAN_NEXT_CHILD; 24195c635efSGarrett D'Amore } 24295c635efSGarrett D'Amore 243*260e9a87SYuri Pankov void 244698f87a4SGarrett D'Amore man_head_alloc(struct man *man, int line, int pos, enum mant tok) 24595c635efSGarrett D'Amore { 24695c635efSGarrett D'Amore struct man_node *p; 24795c635efSGarrett D'Amore 248698f87a4SGarrett D'Amore p = man_node_alloc(man, line, pos, MAN_HEAD, tok); 249*260e9a87SYuri Pankov man_node_append(man, p); 250698f87a4SGarrett D'Amore man->next = MAN_NEXT_CHILD; 25195c635efSGarrett D'Amore } 25295c635efSGarrett D'Amore 253*260e9a87SYuri Pankov void 254698f87a4SGarrett D'Amore man_body_alloc(struct man *man, int line, int pos, enum mant tok) 25595c635efSGarrett D'Amore { 25695c635efSGarrett D'Amore struct man_node *p; 25795c635efSGarrett D'Amore 258698f87a4SGarrett D'Amore p = man_node_alloc(man, line, pos, MAN_BODY, tok); 259*260e9a87SYuri Pankov man_node_append(man, p); 260698f87a4SGarrett D'Amore man->next = MAN_NEXT_CHILD; 26195c635efSGarrett D'Amore } 26295c635efSGarrett D'Amore 263*260e9a87SYuri Pankov void 264698f87a4SGarrett D'Amore man_block_alloc(struct man *man, int line, int pos, enum mant tok) 26595c635efSGarrett D'Amore { 26695c635efSGarrett D'Amore struct man_node *p; 26795c635efSGarrett D'Amore 268698f87a4SGarrett D'Amore p = man_node_alloc(man, line, pos, MAN_BLOCK, tok); 269*260e9a87SYuri Pankov man_node_append(man, p); 270698f87a4SGarrett D'Amore man->next = MAN_NEXT_CHILD; 27195c635efSGarrett D'Amore } 27295c635efSGarrett D'Amore 273*260e9a87SYuri Pankov void 274698f87a4SGarrett D'Amore man_word_alloc(struct man *man, int line, int pos, const char *word) 27595c635efSGarrett D'Amore { 27695c635efSGarrett D'Amore struct man_node *n; 27795c635efSGarrett D'Amore 278698f87a4SGarrett D'Amore n = man_node_alloc(man, line, pos, MAN_TEXT, MAN_MAX); 279698f87a4SGarrett D'Amore n->string = roff_strdup(man->roff, word); 280*260e9a87SYuri Pankov man_node_append(man, n); 281698f87a4SGarrett D'Amore man->next = MAN_NEXT_SIBLING; 28295c635efSGarrett D'Amore } 28395c635efSGarrett D'Amore 284*260e9a87SYuri Pankov void 285*260e9a87SYuri Pankov man_word_append(struct man *man, const char *word) 286*260e9a87SYuri Pankov { 287*260e9a87SYuri Pankov struct man_node *n; 288*260e9a87SYuri Pankov char *addstr, *newstr; 289*260e9a87SYuri Pankov 290*260e9a87SYuri Pankov n = man->last; 291*260e9a87SYuri Pankov addstr = roff_strdup(man->roff, word); 292*260e9a87SYuri Pankov mandoc_asprintf(&newstr, "%s %s", n->string, addstr); 293*260e9a87SYuri Pankov free(addstr); 294*260e9a87SYuri Pankov free(n->string); 295*260e9a87SYuri Pankov n->string = newstr; 296*260e9a87SYuri Pankov man->next = MAN_NEXT_SIBLING; 297*260e9a87SYuri Pankov } 29895c635efSGarrett D'Amore 29995c635efSGarrett D'Amore /* 30095c635efSGarrett D'Amore * Free all of the resources held by a node. This does NOT unlink a 30195c635efSGarrett D'Amore * node from its context; for that, see man_node_unlink(). 30295c635efSGarrett D'Amore */ 30395c635efSGarrett D'Amore static void 30495c635efSGarrett D'Amore man_node_free(struct man_node *p) 30595c635efSGarrett D'Amore { 30695c635efSGarrett D'Amore 30795c635efSGarrett D'Amore free(p->string); 30895c635efSGarrett D'Amore free(p); 30995c635efSGarrett D'Amore } 31095c635efSGarrett D'Amore 31195c635efSGarrett D'Amore void 312698f87a4SGarrett D'Amore man_node_delete(struct man *man, struct man_node *p) 31395c635efSGarrett D'Amore { 31495c635efSGarrett D'Amore 31595c635efSGarrett D'Amore while (p->child) 316698f87a4SGarrett D'Amore man_node_delete(man, p->child); 31795c635efSGarrett D'Amore 318698f87a4SGarrett D'Amore man_node_unlink(man, p); 31995c635efSGarrett D'Amore man_node_free(p); 32095c635efSGarrett D'Amore } 32195c635efSGarrett D'Amore 322*260e9a87SYuri Pankov void 323698f87a4SGarrett D'Amore man_addeqn(struct man *man, const struct eqn *ep) 32495c635efSGarrett D'Amore { 32595c635efSGarrett D'Amore struct man_node *n; 32695c635efSGarrett D'Amore 327698f87a4SGarrett D'Amore n = man_node_alloc(man, ep->ln, ep->pos, MAN_EQN, MAN_MAX); 32895c635efSGarrett D'Amore n->eqn = ep; 329*260e9a87SYuri Pankov if (ep->ln > man->last->line) 330*260e9a87SYuri Pankov n->flags |= MAN_LINE; 331*260e9a87SYuri Pankov man_node_append(man, n); 332698f87a4SGarrett D'Amore man->next = MAN_NEXT_SIBLING; 333*260e9a87SYuri Pankov man_descope(man, ep->ln, ep->pos); 33495c635efSGarrett D'Amore } 33595c635efSGarrett D'Amore 336*260e9a87SYuri Pankov void 337698f87a4SGarrett D'Amore man_addspan(struct man *man, const struct tbl_span *sp) 33895c635efSGarrett D'Amore { 33995c635efSGarrett D'Amore struct man_node *n; 34095c635efSGarrett D'Amore 341*260e9a87SYuri Pankov man_breakscope(man, MAN_MAX); 342698f87a4SGarrett D'Amore n = man_node_alloc(man, sp->line, 0, MAN_TBL, MAN_MAX); 34395c635efSGarrett D'Amore n->span = sp; 344*260e9a87SYuri Pankov man_node_append(man, n); 345698f87a4SGarrett D'Amore man->next = MAN_NEXT_SIBLING; 346*260e9a87SYuri Pankov man_descope(man, sp->line, 0); 34795c635efSGarrett D'Amore } 34895c635efSGarrett D'Amore 349*260e9a87SYuri Pankov static void 350698f87a4SGarrett D'Amore man_descope(struct man *man, int line, int offs) 35195c635efSGarrett D'Amore { 35295c635efSGarrett D'Amore /* 35395c635efSGarrett D'Amore * Co-ordinate what happens with having a next-line scope open: 35495c635efSGarrett D'Amore * first close out the element scope (if applicable), then close 35595c635efSGarrett D'Amore * out the block scope (also if applicable). 35695c635efSGarrett D'Amore */ 35795c635efSGarrett D'Amore 358*260e9a87SYuri Pankov if (man->flags & MAN_ELINE) { 359698f87a4SGarrett D'Amore man->flags &= ~MAN_ELINE; 360*260e9a87SYuri Pankov man_unscope(man, man->last->parent); 36195c635efSGarrett D'Amore } 362*260e9a87SYuri Pankov if ( ! (man->flags & MAN_BLINE)) 363*260e9a87SYuri Pankov return; 364698f87a4SGarrett D'Amore man->flags &= ~MAN_BLINE; 365*260e9a87SYuri Pankov man_unscope(man, man->last->parent); 366*260e9a87SYuri Pankov man_body_alloc(man, line, offs, man->last->tok); 36795c635efSGarrett D'Amore } 36895c635efSGarrett D'Amore 36995c635efSGarrett D'Amore static int 370698f87a4SGarrett D'Amore man_ptext(struct man *man, int line, char *buf, int offs) 37195c635efSGarrett D'Amore { 37295c635efSGarrett D'Amore int i; 37395c635efSGarrett D'Amore 37495c635efSGarrett D'Amore /* Literal free-form text whitespace is preserved. */ 37595c635efSGarrett D'Amore 376*260e9a87SYuri Pankov if (man->flags & MAN_LITERAL) { 377*260e9a87SYuri Pankov man_word_alloc(man, line, offs, buf + offs); 378*260e9a87SYuri Pankov man_descope(man, line, offs); 379*260e9a87SYuri Pankov return(1); 38095c635efSGarrett D'Amore } 38195c635efSGarrett D'Amore 382*260e9a87SYuri Pankov for (i = offs; buf[i] == ' '; i++) 38395c635efSGarrett D'Amore /* Skip leading whitespace. */ ; 38495c635efSGarrett D'Amore 385698f87a4SGarrett D'Amore /* 386698f87a4SGarrett D'Amore * Blank lines are ignored right after headings 387698f87a4SGarrett D'Amore * but add a single vertical space elsewhere. 388698f87a4SGarrett D'Amore */ 389698f87a4SGarrett D'Amore 390*260e9a87SYuri Pankov if (buf[i] == '\0') { 39195c635efSGarrett D'Amore /* Allocate a blank entry. */ 392*260e9a87SYuri Pankov if (man->last->tok != MAN_SH && 393*260e9a87SYuri Pankov man->last->tok != MAN_SS) { 394*260e9a87SYuri Pankov man_elem_alloc(man, line, offs, MAN_sp); 395698f87a4SGarrett D'Amore man->next = MAN_NEXT_SIBLING; 396698f87a4SGarrett D'Amore } 397698f87a4SGarrett D'Amore return(1); 39895c635efSGarrett D'Amore } 39995c635efSGarrett D'Amore 40095c635efSGarrett D'Amore /* 40195c635efSGarrett D'Amore * Warn if the last un-escaped character is whitespace. Then 40295c635efSGarrett D'Amore * strip away the remaining spaces (tabs stay!). 40395c635efSGarrett D'Amore */ 40495c635efSGarrett D'Amore 40595c635efSGarrett D'Amore i = (int)strlen(buf); 40695c635efSGarrett D'Amore assert(i); 40795c635efSGarrett D'Amore 40895c635efSGarrett D'Amore if (' ' == buf[i - 1] || '\t' == buf[i - 1]) { 40995c635efSGarrett D'Amore if (i > 1 && '\\' != buf[i - 2]) 410*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, 411*260e9a87SYuri Pankov line, i - 1, NULL); 41295c635efSGarrett D'Amore 41395c635efSGarrett D'Amore for (--i; i && ' ' == buf[i]; i--) 41495c635efSGarrett D'Amore /* Spin back to non-space. */ ; 41595c635efSGarrett D'Amore 41695c635efSGarrett D'Amore /* Jump ahead of escaped whitespace. */ 41795c635efSGarrett D'Amore i += '\\' == buf[i] ? 2 : 1; 41895c635efSGarrett D'Amore 41995c635efSGarrett D'Amore buf[i] = '\0'; 42095c635efSGarrett D'Amore } 421*260e9a87SYuri Pankov man_word_alloc(man, line, offs, buf + offs); 42295c635efSGarrett D'Amore 42395c635efSGarrett D'Amore /* 42495c635efSGarrett D'Amore * End-of-sentence check. If the last character is an unescaped 42595c635efSGarrett D'Amore * EOS character, then flag the node as being the end of a 42695c635efSGarrett D'Amore * sentence. The front-end will know how to interpret this. 42795c635efSGarrett D'Amore */ 42895c635efSGarrett D'Amore 42995c635efSGarrett D'Amore assert(i); 430*260e9a87SYuri Pankov if (mandoc_eos(buf, (size_t)i)) 431698f87a4SGarrett D'Amore man->last->flags |= MAN_EOS; 43295c635efSGarrett D'Amore 433*260e9a87SYuri Pankov man_descope(man, line, offs); 434*260e9a87SYuri Pankov return(1); 43595c635efSGarrett D'Amore } 43695c635efSGarrett D'Amore 43795c635efSGarrett D'Amore static int 438698f87a4SGarrett D'Amore man_pmacro(struct man *man, int ln, char *buf, int offs) 43995c635efSGarrett D'Amore { 44095c635efSGarrett D'Amore struct man_node *n; 441*260e9a87SYuri Pankov const char *cp; 442*260e9a87SYuri Pankov enum mant tok; 443*260e9a87SYuri Pankov int i, ppos; 444*260e9a87SYuri Pankov int bline; 445*260e9a87SYuri Pankov char mac[5]; 44695c635efSGarrett D'Amore 44795c635efSGarrett D'Amore ppos = offs; 44895c635efSGarrett D'Amore 44995c635efSGarrett D'Amore /* 45095c635efSGarrett D'Amore * Copy the first word into a nil-terminated buffer. 451*260e9a87SYuri Pankov * Stop when a space, tab, escape, or eoln is encountered. 45295c635efSGarrett D'Amore */ 45395c635efSGarrett D'Amore 45495c635efSGarrett D'Amore i = 0; 455*260e9a87SYuri Pankov while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 45695c635efSGarrett D'Amore mac[i++] = buf[offs++]; 45795c635efSGarrett D'Amore 45895c635efSGarrett D'Amore mac[i] = '\0'; 45995c635efSGarrett D'Amore 46095c635efSGarrett D'Amore tok = (i > 0 && i < 4) ? man_hash_find(mac) : MAN_MAX; 46195c635efSGarrett D'Amore 462*260e9a87SYuri Pankov if (tok == MAN_MAX) { 463*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_MACRO, man->parse, 464*260e9a87SYuri Pankov ln, ppos, buf + ppos - 1); 46595c635efSGarrett D'Amore return(1); 46695c635efSGarrett D'Amore } 46795c635efSGarrett D'Amore 468*260e9a87SYuri Pankov /* Skip a leading escape sequence or tab. */ 46995c635efSGarrett D'Amore 470*260e9a87SYuri Pankov switch (buf[offs]) { 471*260e9a87SYuri Pankov case '\\': 472*260e9a87SYuri Pankov cp = buf + offs + 1; 473*260e9a87SYuri Pankov mandoc_escape(&cp, NULL, NULL); 474*260e9a87SYuri Pankov offs = cp - buf; 475*260e9a87SYuri Pankov break; 476*260e9a87SYuri Pankov case '\t': 477*260e9a87SYuri Pankov offs++; 478*260e9a87SYuri Pankov break; 479*260e9a87SYuri Pankov default: 480*260e9a87SYuri Pankov break; 481*260e9a87SYuri Pankov } 482*260e9a87SYuri Pankov 483*260e9a87SYuri Pankov /* Jump to the next non-whitespace word. */ 484*260e9a87SYuri Pankov 485*260e9a87SYuri Pankov while (buf[offs] && buf[offs] == ' ') 48695c635efSGarrett D'Amore offs++; 48795c635efSGarrett D'Amore 48895c635efSGarrett D'Amore /* 48995c635efSGarrett D'Amore * Trailing whitespace. Note that tabs are allowed to be passed 49095c635efSGarrett D'Amore * into the parser as "text", so we only warn about spaces here. 49195c635efSGarrett D'Amore */ 49295c635efSGarrett D'Amore 493*260e9a87SYuri Pankov if (buf[offs] == '\0' && buf[offs - 1] == ' ') 494*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_SPACE_EOL, man->parse, 495*260e9a87SYuri Pankov ln, offs - 1, NULL); 49695c635efSGarrett D'Amore 49795c635efSGarrett D'Amore /* 498*260e9a87SYuri Pankov * Some macros break next-line scopes; otherwise, remember 499*260e9a87SYuri Pankov * whether we are in next-line scope for a block head. 50095c635efSGarrett D'Amore */ 50195c635efSGarrett D'Amore 502*260e9a87SYuri Pankov man_breakscope(man, tok); 503*260e9a87SYuri Pankov bline = man->flags & MAN_BLINE; 504*260e9a87SYuri Pankov 505*260e9a87SYuri Pankov /* Call to handler... */ 506*260e9a87SYuri Pankov 507*260e9a87SYuri Pankov assert(man_macros[tok].fp); 508*260e9a87SYuri Pankov (*man_macros[tok].fp)(man, tok, ln, ppos, &offs, buf); 509*260e9a87SYuri Pankov 510*260e9a87SYuri Pankov /* In quick mode (for mandocdb), abort after the NAME section. */ 511*260e9a87SYuri Pankov 512*260e9a87SYuri Pankov if (man->quick && tok == MAN_SH) { 513698f87a4SGarrett D'Amore n = man->last; 514*260e9a87SYuri Pankov if (n->type == MAN_BODY && 515*260e9a87SYuri Pankov strcmp(n->prev->child->string, "NAME")) 516*260e9a87SYuri Pankov return(2); 517*260e9a87SYuri Pankov } 51895c635efSGarrett D'Amore 519*260e9a87SYuri Pankov /* 520*260e9a87SYuri Pankov * If we are in a next-line scope for a block head, 521*260e9a87SYuri Pankov * close it out now and switch to the body, 522*260e9a87SYuri Pankov * unless the next-line scope is allowed to continue. 523*260e9a87SYuri Pankov */ 52495c635efSGarrett D'Amore 525*260e9a87SYuri Pankov if ( ! bline || man->flags & MAN_ELINE || 526*260e9a87SYuri Pankov man_macros[tok].flags & MAN_NSCOPED) 527*260e9a87SYuri Pankov return(1); 528*260e9a87SYuri Pankov 529*260e9a87SYuri Pankov assert(man->flags & MAN_BLINE); 530*260e9a87SYuri Pankov man->flags &= ~MAN_BLINE; 531*260e9a87SYuri Pankov 532*260e9a87SYuri Pankov man_unscope(man, man->last->parent); 533*260e9a87SYuri Pankov man_body_alloc(man, ln, ppos, man->last->tok); 534*260e9a87SYuri Pankov return(1); 535*260e9a87SYuri Pankov } 536*260e9a87SYuri Pankov 537*260e9a87SYuri Pankov void 538*260e9a87SYuri Pankov man_breakscope(struct man *man, enum mant tok) 539*260e9a87SYuri Pankov { 540*260e9a87SYuri Pankov struct man_node *n; 541*260e9a87SYuri Pankov 542*260e9a87SYuri Pankov /* 543*260e9a87SYuri Pankov * An element next line scope is open, 544*260e9a87SYuri Pankov * and the new macro is not allowed inside elements. 545*260e9a87SYuri Pankov * Delete the element that is being broken. 546*260e9a87SYuri Pankov */ 547*260e9a87SYuri Pankov 548*260e9a87SYuri Pankov if (man->flags & MAN_ELINE && (tok == MAN_MAX || 549*260e9a87SYuri Pankov ! (man_macros[tok].flags & MAN_NSCOPED))) { 550*260e9a87SYuri Pankov n = man->last; 551*260e9a87SYuri Pankov assert(n->type != MAN_TEXT); 552*260e9a87SYuri Pankov if (man_macros[n->tok].flags & MAN_NSCOPED) 55395c635efSGarrett D'Amore n = n->parent; 55495c635efSGarrett D'Amore 555*260e9a87SYuri Pankov mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, 556*260e9a87SYuri Pankov n->line, n->pos, "%s breaks %s", 557*260e9a87SYuri Pankov tok == MAN_MAX ? "TS" : man_macronames[tok], 55895c635efSGarrett D'Amore man_macronames[n->tok]); 55995c635efSGarrett D'Amore 560698f87a4SGarrett D'Amore man_node_delete(man, n); 561698f87a4SGarrett D'Amore man->flags &= ~MAN_ELINE; 56295c635efSGarrett D'Amore } 56395c635efSGarrett D'Amore 56495c635efSGarrett D'Amore /* 565*260e9a87SYuri Pankov * A block header next line scope is open, 566*260e9a87SYuri Pankov * and the new macro is not allowed inside block headers. 567*260e9a87SYuri Pankov * Delete the block that is being broken. 56895c635efSGarrett D'Amore */ 569*260e9a87SYuri Pankov 570*260e9a87SYuri Pankov if (man->flags & MAN_BLINE && (tok == MAN_MAX || 571*260e9a87SYuri Pankov man_macros[tok].flags & MAN_BSCOPE)) { 572698f87a4SGarrett D'Amore n = man->last; 573*260e9a87SYuri Pankov if (n->type == MAN_TEXT) 574*260e9a87SYuri Pankov n = n->parent; 575*260e9a87SYuri Pankov if ( ! (man_macros[n->tok].flags & MAN_BSCOPE)) 57695c635efSGarrett D'Amore n = n->parent; 57795c635efSGarrett D'Amore 578*260e9a87SYuri Pankov assert(n->type == MAN_HEAD); 57995c635efSGarrett D'Amore n = n->parent; 580*260e9a87SYuri Pankov assert(n->type == MAN_BLOCK); 581*260e9a87SYuri Pankov assert(man_macros[n->tok].flags & MAN_SCOPED); 58295c635efSGarrett D'Amore 583*260e9a87SYuri Pankov mandoc_vmsg(MANDOCERR_BLK_LINE, man->parse, 584*260e9a87SYuri Pankov n->line, n->pos, "%s breaks %s", 585*260e9a87SYuri Pankov tok == MAN_MAX ? "TS" : man_macronames[tok], 58695c635efSGarrett D'Amore man_macronames[n->tok]); 58795c635efSGarrett D'Amore 588698f87a4SGarrett D'Amore man_node_delete(man, n); 589698f87a4SGarrett D'Amore man->flags &= ~MAN_BLINE; 59095c635efSGarrett D'Amore } 59195c635efSGarrett D'Amore } 59295c635efSGarrett D'Amore 59395c635efSGarrett D'Amore /* 594698f87a4SGarrett D'Amore * Unlink a node from its context. If "man" is provided, the last parse 59595c635efSGarrett D'Amore * point will also be adjusted accordingly. 59695c635efSGarrett D'Amore */ 59795c635efSGarrett D'Amore static void 598698f87a4SGarrett D'Amore man_node_unlink(struct man *man, struct man_node *n) 59995c635efSGarrett D'Amore { 60095c635efSGarrett D'Amore 60195c635efSGarrett D'Amore /* Adjust siblings. */ 60295c635efSGarrett D'Amore 60395c635efSGarrett D'Amore if (n->prev) 60495c635efSGarrett D'Amore n->prev->next = n->next; 60595c635efSGarrett D'Amore if (n->next) 60695c635efSGarrett D'Amore n->next->prev = n->prev; 60795c635efSGarrett D'Amore 60895c635efSGarrett D'Amore /* Adjust parent. */ 60995c635efSGarrett D'Amore 61095c635efSGarrett D'Amore if (n->parent) { 61195c635efSGarrett D'Amore n->parent->nchild--; 61295c635efSGarrett D'Amore if (n->parent->child == n) 61395c635efSGarrett D'Amore n->parent->child = n->prev ? n->prev : n->next; 61495c635efSGarrett D'Amore } 61595c635efSGarrett D'Amore 61695c635efSGarrett D'Amore /* Adjust parse point, if applicable. */ 61795c635efSGarrett D'Amore 618698f87a4SGarrett D'Amore if (man && man->last == n) { 61995c635efSGarrett D'Amore /*XXX: this can occur when bailing from validation. */ 62095c635efSGarrett D'Amore /*assert(NULL == n->next);*/ 62195c635efSGarrett D'Amore if (n->prev) { 622698f87a4SGarrett D'Amore man->last = n->prev; 623698f87a4SGarrett D'Amore man->next = MAN_NEXT_SIBLING; 62495c635efSGarrett D'Amore } else { 625698f87a4SGarrett D'Amore man->last = n->parent; 626698f87a4SGarrett D'Amore man->next = MAN_NEXT_CHILD; 62795c635efSGarrett D'Amore } 62895c635efSGarrett D'Amore } 62995c635efSGarrett D'Amore 630698f87a4SGarrett D'Amore if (man && man->first == n) 631698f87a4SGarrett D'Amore man->first = NULL; 63295c635efSGarrett D'Amore } 63395c635efSGarrett D'Amore 63495c635efSGarrett D'Amore const struct mparse * 635698f87a4SGarrett D'Amore man_mparse(const struct man *man) 63695c635efSGarrett D'Amore { 63795c635efSGarrett D'Amore 638698f87a4SGarrett D'Amore assert(man && man->parse); 639698f87a4SGarrett D'Amore return(man->parse); 64095c635efSGarrett D'Amore } 641*260e9a87SYuri Pankov 642*260e9a87SYuri Pankov void 643*260e9a87SYuri Pankov man_deroff(char **dest, const struct man_node *n) 644*260e9a87SYuri Pankov { 645*260e9a87SYuri Pankov char *cp; 646*260e9a87SYuri Pankov size_t sz; 647*260e9a87SYuri Pankov 648*260e9a87SYuri Pankov if (n->type != MAN_TEXT) { 649*260e9a87SYuri Pankov for (n = n->child; n; n = n->next) 650*260e9a87SYuri Pankov man_deroff(dest, n); 651*260e9a87SYuri Pankov return; 652*260e9a87SYuri Pankov } 653*260e9a87SYuri Pankov 654*260e9a87SYuri Pankov /* Skip leading whitespace and escape sequences. */ 655*260e9a87SYuri Pankov 656*260e9a87SYuri Pankov cp = n->string; 657*260e9a87SYuri Pankov while ('\0' != *cp) { 658*260e9a87SYuri Pankov if ('\\' == *cp) { 659*260e9a87SYuri Pankov cp++; 660*260e9a87SYuri Pankov mandoc_escape((const char **)&cp, NULL, NULL); 661*260e9a87SYuri Pankov } else if (isspace((unsigned char)*cp)) 662*260e9a87SYuri Pankov cp++; 663*260e9a87SYuri Pankov else 664*260e9a87SYuri Pankov break; 665*260e9a87SYuri Pankov } 666*260e9a87SYuri Pankov 667*260e9a87SYuri Pankov /* Skip trailing whitespace. */ 668*260e9a87SYuri Pankov 669*260e9a87SYuri Pankov for (sz = strlen(cp); sz; sz--) 670*260e9a87SYuri Pankov if (0 == isspace((unsigned char)cp[sz-1])) 671*260e9a87SYuri Pankov break; 672*260e9a87SYuri Pankov 673*260e9a87SYuri Pankov /* Skip empty strings. */ 674*260e9a87SYuri Pankov 675*260e9a87SYuri Pankov if (0 == sz) 676*260e9a87SYuri Pankov return; 677*260e9a87SYuri Pankov 678*260e9a87SYuri Pankov if (NULL == *dest) { 679*260e9a87SYuri Pankov *dest = mandoc_strndup(cp, sz); 680*260e9a87SYuri Pankov return; 681*260e9a87SYuri Pankov } 682*260e9a87SYuri Pankov 683*260e9a87SYuri Pankov mandoc_asprintf(&cp, "%s %*s", *dest, (int)sz, cp); 684*260e9a87SYuri Pankov free(*dest); 685*260e9a87SYuri Pankov *dest = cp; 686*260e9a87SYuri Pankov } 687