xref: /titanic_44/usr/src/cmd/mandoc/mdoc.c (revision 698f87a48e2e945bfe5493ce168e0d0ae1cedd5c)
1 /*	$Id: mdoc.c,v 1.206 2013/12/24 19:11:46 schwarze Exp $ */
2 /*
3  * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2010, 2012, 2013 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <sys/types.h>
23 
24 #include <assert.h>
25 #include <stdarg.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <time.h>
30 
31 #include "mdoc.h"
32 #include "mandoc.h"
33 #include "libmdoc.h"
34 #include "libmandoc.h"
35 
36 const	char *const __mdoc_macronames[MDOC_MAX] = {
37 	"Ap",		"Dd",		"Dt",		"Os",
38 	"Sh",		"Ss",		"Pp",		"D1",
39 	"Dl",		"Bd",		"Ed",		"Bl",
40 	"El",		"It",		"Ad",		"An",
41 	"Ar",		"Cd",		"Cm",		"Dv",
42 	"Er",		"Ev",		"Ex",		"Fa",
43 	"Fd",		"Fl",		"Fn",		"Ft",
44 	"Ic",		"In",		"Li",		"Nd",
45 	"Nm",		"Op",		"Ot",		"Pa",
46 	"Rv",		"St",		"Va",		"Vt",
47 	/* LINTED */
48 	"Xr",		"%A",		"%B",		"%D",
49 	/* LINTED */
50 	"%I",		"%J",		"%N",		"%O",
51 	/* LINTED */
52 	"%P",		"%R",		"%T",		"%V",
53 	"Ac",		"Ao",		"Aq",		"At",
54 	"Bc",		"Bf",		"Bo",		"Bq",
55 	"Bsx",		"Bx",		"Db",		"Dc",
56 	"Do",		"Dq",		"Ec",		"Ef",
57 	"Em",		"Eo",		"Fx",		"Ms",
58 	"No",		"Ns",		"Nx",		"Ox",
59 	"Pc",		"Pf",		"Po",		"Pq",
60 	"Qc",		"Ql",		"Qo",		"Qq",
61 	"Re",		"Rs",		"Sc",		"So",
62 	"Sq",		"Sm",		"Sx",		"Sy",
63 	"Tn",		"Ux",		"Xc",		"Xo",
64 	"Fo",		"Fc",		"Oo",		"Oc",
65 	"Bk",		"Ek",		"Bt",		"Hf",
66 	"Fr",		"Ud",		"Lb",		"Lp",
67 	"Lk",		"Mt",		"Brq",		"Bro",
68 	/* LINTED */
69 	"Brc",		"%C",		"Es",		"En",
70 	/* LINTED */
71 	"Dx",		"%Q",		"br",		"sp",
72 	/* LINTED */
73 	"%U",		"Ta"
74 	};
75 
76 const	char *const __mdoc_argnames[MDOC_ARG_MAX] = {
77 	"split",		"nosplit",		"ragged",
78 	"unfilled",		"literal",		"file",
79 	"offset",		"bullet",		"dash",
80 	"hyphen",		"item",			"enum",
81 	"tag",			"diag",			"hang",
82 	"ohang",		"inset",		"column",
83 	"width",		"compact",		"std",
84 	"filled",		"words",		"emphasis",
85 	"symbolic",		"nested",		"centered"
86 	};
87 
88 const	char * const *mdoc_macronames = __mdoc_macronames;
89 const	char * const *mdoc_argnames = __mdoc_argnames;
90 
91 static	void		  mdoc_node_free(struct mdoc_node *);
92 static	void		  mdoc_node_unlink(struct mdoc *,
93 				struct mdoc_node *);
94 static	void		  mdoc_free1(struct mdoc *);
95 static	void		  mdoc_alloc1(struct mdoc *);
96 static	struct mdoc_node *node_alloc(struct mdoc *, int, int,
97 				enum mdoct, enum mdoc_type);
98 static	int		  node_append(struct mdoc *,
99 				struct mdoc_node *);
100 #if 0
101 static	int		  mdoc_preptext(struct mdoc *, int, char *, int);
102 #endif
103 static	int		  mdoc_ptext(struct mdoc *, int, char *, int);
104 static	int		  mdoc_pmacro(struct mdoc *, int, char *, int);
105 
106 const struct mdoc_node *
mdoc_node(const struct mdoc * mdoc)107 mdoc_node(const struct mdoc *mdoc)
108 {
109 
110 	assert( ! (MDOC_HALT & mdoc->flags));
111 	return(mdoc->first);
112 }
113 
114 
115 const struct mdoc_meta *
mdoc_meta(const struct mdoc * mdoc)116 mdoc_meta(const struct mdoc *mdoc)
117 {
118 
119 	assert( ! (MDOC_HALT & mdoc->flags));
120 	return(&mdoc->meta);
121 }
122 
123 
124 /*
125  * Frees volatile resources (parse tree, meta-data, fields).
126  */
127 static void
mdoc_free1(struct mdoc * mdoc)128 mdoc_free1(struct mdoc *mdoc)
129 {
130 
131 	if (mdoc->first)
132 		mdoc_node_delete(mdoc, mdoc->first);
133 	if (mdoc->meta.title)
134 		free(mdoc->meta.title);
135 	if (mdoc->meta.os)
136 		free(mdoc->meta.os);
137 	if (mdoc->meta.name)
138 		free(mdoc->meta.name);
139 	if (mdoc->meta.arch)
140 		free(mdoc->meta.arch);
141 	if (mdoc->meta.vol)
142 		free(mdoc->meta.vol);
143 	if (mdoc->meta.msec)
144 		free(mdoc->meta.msec);
145 	if (mdoc->meta.date)
146 		free(mdoc->meta.date);
147 }
148 
149 
150 /*
151  * Allocate all volatile resources (parse tree, meta-data, fields).
152  */
153 static void
mdoc_alloc1(struct mdoc * mdoc)154 mdoc_alloc1(struct mdoc *mdoc)
155 {
156 
157 	memset(&mdoc->meta, 0, sizeof(struct mdoc_meta));
158 	mdoc->flags = 0;
159 	mdoc->lastnamed = mdoc->lastsec = SEC_NONE;
160 	mdoc->last = mandoc_calloc(1, sizeof(struct mdoc_node));
161 	mdoc->first = mdoc->last;
162 	mdoc->last->type = MDOC_ROOT;
163 	mdoc->last->tok = MDOC_MAX;
164 	mdoc->next = MDOC_NEXT_CHILD;
165 }
166 
167 
168 /*
169  * Free up volatile resources (see mdoc_free1()) then re-initialises the
170  * data with mdoc_alloc1().  After invocation, parse data has been reset
171  * and the parser is ready for re-invocation on a new tree; however,
172  * cross-parse non-volatile data is kept intact.
173  */
174 void
mdoc_reset(struct mdoc * mdoc)175 mdoc_reset(struct mdoc *mdoc)
176 {
177 
178 	mdoc_free1(mdoc);
179 	mdoc_alloc1(mdoc);
180 }
181 
182 
183 /*
184  * Completely free up all volatile and non-volatile parse resources.
185  * After invocation, the pointer is no longer usable.
186  */
187 void
mdoc_free(struct mdoc * mdoc)188 mdoc_free(struct mdoc *mdoc)
189 {
190 
191 	mdoc_free1(mdoc);
192 	free(mdoc);
193 }
194 
195 
196 /*
197  * Allocate volatile and non-volatile parse resources.
198  */
199 struct mdoc *
mdoc_alloc(struct roff * roff,struct mparse * parse,char * defos)200 mdoc_alloc(struct roff *roff, struct mparse *parse, char *defos)
201 {
202 	struct mdoc	*p;
203 
204 	p = mandoc_calloc(1, sizeof(struct mdoc));
205 
206 	p->parse = parse;
207 	p->defos = defos;
208 	p->roff = roff;
209 
210 	mdoc_hash_init();
211 	mdoc_alloc1(p);
212 	return(p);
213 }
214 
215 
216 /*
217  * Climb back up the parse tree, validating open scopes.  Mostly calls
218  * through to macro_end() in macro.c.
219  */
220 int
mdoc_endparse(struct mdoc * mdoc)221 mdoc_endparse(struct mdoc *mdoc)
222 {
223 
224 	assert( ! (MDOC_HALT & mdoc->flags));
225 	if (mdoc_macroend(mdoc))
226 		return(1);
227 	mdoc->flags |= MDOC_HALT;
228 	return(0);
229 }
230 
231 int
mdoc_addeqn(struct mdoc * mdoc,const struct eqn * ep)232 mdoc_addeqn(struct mdoc *mdoc, const struct eqn *ep)
233 {
234 	struct mdoc_node *n;
235 
236 	assert( ! (MDOC_HALT & mdoc->flags));
237 
238 	/* No text before an initial macro. */
239 
240 	if (SEC_NONE == mdoc->lastnamed) {
241 		mdoc_pmsg(mdoc, ep->ln, ep->pos, MANDOCERR_NOTEXT);
242 		return(1);
243 	}
244 
245 	n = node_alloc(mdoc, ep->ln, ep->pos, MDOC_MAX, MDOC_EQN);
246 	n->eqn = ep;
247 
248 	if ( ! node_append(mdoc, n))
249 		return(0);
250 
251 	mdoc->next = MDOC_NEXT_SIBLING;
252 	return(1);
253 }
254 
255 int
mdoc_addspan(struct mdoc * mdoc,const struct tbl_span * sp)256 mdoc_addspan(struct mdoc *mdoc, const struct tbl_span *sp)
257 {
258 	struct mdoc_node *n;
259 
260 	assert( ! (MDOC_HALT & mdoc->flags));
261 
262 	/* No text before an initial macro. */
263 
264 	if (SEC_NONE == mdoc->lastnamed) {
265 		mdoc_pmsg(mdoc, sp->line, 0, MANDOCERR_NOTEXT);
266 		return(1);
267 	}
268 
269 	n = node_alloc(mdoc, sp->line, 0, MDOC_MAX, MDOC_TBL);
270 	n->span = sp;
271 
272 	if ( ! node_append(mdoc, n))
273 		return(0);
274 
275 	mdoc->next = MDOC_NEXT_SIBLING;
276 	return(1);
277 }
278 
279 
280 /*
281  * Main parse routine.  Parses a single line -- really just hands off to
282  * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()).
283  */
284 int
mdoc_parseln(struct mdoc * mdoc,int ln,char * buf,int offs)285 mdoc_parseln(struct mdoc *mdoc, int ln, char *buf, int offs)
286 {
287 
288 	assert( ! (MDOC_HALT & mdoc->flags));
289 
290 	mdoc->flags |= MDOC_NEWLINE;
291 
292 	/*
293 	 * Let the roff nS register switch SYNOPSIS mode early,
294 	 * such that the parser knows at all times
295 	 * whether this mode is on or off.
296 	 * Note that this mode is also switched by the Sh macro.
297 	 */
298 	if (roff_getreg(mdoc->roff, "nS"))
299 		mdoc->flags |= MDOC_SYNOPSIS;
300 	else
301 		mdoc->flags &= ~MDOC_SYNOPSIS;
302 
303 	return(roff_getcontrol(mdoc->roff, buf, &offs) ?
304 			mdoc_pmacro(mdoc, ln, buf, offs) :
305 			mdoc_ptext(mdoc, ln, buf, offs));
306 }
307 
308 int
mdoc_macro(MACRO_PROT_ARGS)309 mdoc_macro(MACRO_PROT_ARGS)
310 {
311 	assert(tok < MDOC_MAX);
312 
313 	/* If we're in the body, deny prologue calls. */
314 
315 	if (MDOC_PROLOGUE & mdoc_macros[tok].flags &&
316 			MDOC_PBODY & mdoc->flags) {
317 		mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADBODY);
318 		return(1);
319 	}
320 
321 	/* If we're in the prologue, deny "body" macros.  */
322 
323 	if ( ! (MDOC_PROLOGUE & mdoc_macros[tok].flags) &&
324 			! (MDOC_PBODY & mdoc->flags)) {
325 		mdoc_pmsg(mdoc, line, ppos, MANDOCERR_BADPROLOG);
326 		if (NULL == mdoc->meta.msec)
327 			mdoc->meta.msec = mandoc_strdup("1");
328 		if (NULL == mdoc->meta.title)
329 			mdoc->meta.title = mandoc_strdup("UNKNOWN");
330 		if (NULL == mdoc->meta.vol)
331 			mdoc->meta.vol = mandoc_strdup("LOCAL");
332 		if (NULL == mdoc->meta.os)
333 			mdoc->meta.os = mandoc_strdup("LOCAL");
334 		if (NULL == mdoc->meta.date)
335 			mdoc->meta.date = mandoc_normdate
336 				(mdoc->parse, NULL, line, ppos);
337 		mdoc->flags |= MDOC_PBODY;
338 	}
339 
340 	return((*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf));
341 }
342 
343 
344 static int
node_append(struct mdoc * mdoc,struct mdoc_node * p)345 node_append(struct mdoc *mdoc, struct mdoc_node *p)
346 {
347 
348 	assert(mdoc->last);
349 	assert(mdoc->first);
350 	assert(MDOC_ROOT != p->type);
351 
352 	switch (mdoc->next) {
353 	case (MDOC_NEXT_SIBLING):
354 		mdoc->last->next = p;
355 		p->prev = mdoc->last;
356 		p->parent = mdoc->last->parent;
357 		break;
358 	case (MDOC_NEXT_CHILD):
359 		mdoc->last->child = p;
360 		p->parent = mdoc->last;
361 		break;
362 	default:
363 		abort();
364 		/* NOTREACHED */
365 	}
366 
367 	p->parent->nchild++;
368 
369 	/*
370 	 * Copy over the normalised-data pointer of our parent.  Not
371 	 * everybody has one, but copying a null pointer is fine.
372 	 */
373 
374 	switch (p->type) {
375 	case (MDOC_BODY):
376 		if (ENDBODY_NOT != p->end)
377 			break;
378 		/* FALLTHROUGH */
379 	case (MDOC_TAIL):
380 		/* FALLTHROUGH */
381 	case (MDOC_HEAD):
382 		p->norm = p->parent->norm;
383 		break;
384 	default:
385 		break;
386 	}
387 
388 	if ( ! mdoc_valid_pre(mdoc, p))
389 		return(0);
390 
391 	switch (p->type) {
392 	case (MDOC_HEAD):
393 		assert(MDOC_BLOCK == p->parent->type);
394 		p->parent->head = p;
395 		break;
396 	case (MDOC_TAIL):
397 		assert(MDOC_BLOCK == p->parent->type);
398 		p->parent->tail = p;
399 		break;
400 	case (MDOC_BODY):
401 		if (p->end)
402 			break;
403 		assert(MDOC_BLOCK == p->parent->type);
404 		p->parent->body = p;
405 		break;
406 	default:
407 		break;
408 	}
409 
410 	mdoc->last = p;
411 
412 	switch (p->type) {
413 	case (MDOC_TBL):
414 		/* FALLTHROUGH */
415 	case (MDOC_TEXT):
416 		if ( ! mdoc_valid_post(mdoc))
417 			return(0);
418 		break;
419 	default:
420 		break;
421 	}
422 
423 	return(1);
424 }
425 
426 
427 static struct mdoc_node *
node_alloc(struct mdoc * mdoc,int line,int pos,enum mdoct tok,enum mdoc_type type)428 node_alloc(struct mdoc *mdoc, int line, int pos,
429 		enum mdoct tok, enum mdoc_type type)
430 {
431 	struct mdoc_node *p;
432 
433 	p = mandoc_calloc(1, sizeof(struct mdoc_node));
434 	p->sec = mdoc->lastsec;
435 	p->line = line;
436 	p->pos = pos;
437 	p->lastline = line;
438 	p->tok = tok;
439 	p->type = type;
440 
441 	/* Flag analysis. */
442 
443 	if (MDOC_SYNOPSIS & mdoc->flags)
444 		p->flags |= MDOC_SYNPRETTY;
445 	else
446 		p->flags &= ~MDOC_SYNPRETTY;
447 	if (MDOC_NEWLINE & mdoc->flags)
448 		p->flags |= MDOC_LINE;
449 	mdoc->flags &= ~MDOC_NEWLINE;
450 
451 	return(p);
452 }
453 
454 
455 int
mdoc_tail_alloc(struct mdoc * mdoc,int line,int pos,enum mdoct tok)456 mdoc_tail_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
457 {
458 	struct mdoc_node *p;
459 
460 	p = node_alloc(mdoc, line, pos, tok, MDOC_TAIL);
461 	if ( ! node_append(mdoc, p))
462 		return(0);
463 	mdoc->next = MDOC_NEXT_CHILD;
464 	return(1);
465 }
466 
467 
468 int
mdoc_head_alloc(struct mdoc * mdoc,int line,int pos,enum mdoct tok)469 mdoc_head_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
470 {
471 	struct mdoc_node *p;
472 
473 	assert(mdoc->first);
474 	assert(mdoc->last);
475 
476 	p = node_alloc(mdoc, line, pos, tok, MDOC_HEAD);
477 	if ( ! node_append(mdoc, p))
478 		return(0);
479 	mdoc->next = MDOC_NEXT_CHILD;
480 	return(1);
481 }
482 
483 
484 int
mdoc_body_alloc(struct mdoc * mdoc,int line,int pos,enum mdoct tok)485 mdoc_body_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok)
486 {
487 	struct mdoc_node *p;
488 
489 	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
490 	if ( ! node_append(mdoc, p))
491 		return(0);
492 	mdoc->next = MDOC_NEXT_CHILD;
493 	return(1);
494 }
495 
496 
497 int
mdoc_endbody_alloc(struct mdoc * mdoc,int line,int pos,enum mdoct tok,struct mdoc_node * body,enum mdoc_endbody end)498 mdoc_endbody_alloc(struct mdoc *mdoc, int line, int pos, enum mdoct tok,
499 		struct mdoc_node *body, enum mdoc_endbody end)
500 {
501 	struct mdoc_node *p;
502 
503 	p = node_alloc(mdoc, line, pos, tok, MDOC_BODY);
504 	p->pending = body;
505 	p->norm = body->norm;
506 	p->end = end;
507 	if ( ! node_append(mdoc, p))
508 		return(0);
509 	mdoc->next = MDOC_NEXT_SIBLING;
510 	return(1);
511 }
512 
513 
514 int
mdoc_block_alloc(struct mdoc * mdoc,int line,int pos,enum mdoct tok,struct mdoc_arg * args)515 mdoc_block_alloc(struct mdoc *mdoc, int line, int pos,
516 		enum mdoct tok, struct mdoc_arg *args)
517 {
518 	struct mdoc_node *p;
519 
520 	p = node_alloc(mdoc, line, pos, tok, MDOC_BLOCK);
521 	p->args = args;
522 	if (p->args)
523 		(args->refcnt)++;
524 
525 	switch (tok) {
526 	case (MDOC_Bd):
527 		/* FALLTHROUGH */
528 	case (MDOC_Bf):
529 		/* FALLTHROUGH */
530 	case (MDOC_Bl):
531 		/* FALLTHROUGH */
532 	case (MDOC_Rs):
533 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
534 		break;
535 	default:
536 		break;
537 	}
538 
539 	if ( ! node_append(mdoc, p))
540 		return(0);
541 	mdoc->next = MDOC_NEXT_CHILD;
542 	return(1);
543 }
544 
545 
546 int
mdoc_elem_alloc(struct mdoc * mdoc,int line,int pos,enum mdoct tok,struct mdoc_arg * args)547 mdoc_elem_alloc(struct mdoc *mdoc, int line, int pos,
548 		enum mdoct tok, struct mdoc_arg *args)
549 {
550 	struct mdoc_node *p;
551 
552 	p = node_alloc(mdoc, line, pos, tok, MDOC_ELEM);
553 	p->args = args;
554 	if (p->args)
555 		(args->refcnt)++;
556 
557 	switch (tok) {
558 	case (MDOC_An):
559 		p->norm = mandoc_calloc(1, sizeof(union mdoc_data));
560 		break;
561 	default:
562 		break;
563 	}
564 
565 	if ( ! node_append(mdoc, p))
566 		return(0);
567 	mdoc->next = MDOC_NEXT_CHILD;
568 	return(1);
569 }
570 
571 int
mdoc_word_alloc(struct mdoc * mdoc,int line,int pos,const char * p)572 mdoc_word_alloc(struct mdoc *mdoc, int line, int pos, const char *p)
573 {
574 	struct mdoc_node *n;
575 
576 	n = node_alloc(mdoc, line, pos, MDOC_MAX, MDOC_TEXT);
577 	n->string = roff_strdup(mdoc->roff, p);
578 
579 	if ( ! node_append(mdoc, n))
580 		return(0);
581 
582 	mdoc->next = MDOC_NEXT_SIBLING;
583 	return(1);
584 }
585 
586 void
mdoc_word_append(struct mdoc * mdoc,const char * p)587 mdoc_word_append(struct mdoc *mdoc, const char *p)
588 {
589 	struct mdoc_node	*n;
590 	char			*addstr, *newstr;
591 
592 	n = mdoc->last;
593 	addstr = roff_strdup(mdoc->roff, p);
594 	if (-1 == asprintf(&newstr, "%s %s", n->string, addstr)) {
595 		perror(NULL);
596 		exit((int)MANDOCLEVEL_SYSERR);
597 	}
598 	free(addstr);
599 	free(n->string);
600 	n->string = newstr;
601 	mdoc->next = MDOC_NEXT_SIBLING;
602 }
603 
604 static void
mdoc_node_free(struct mdoc_node * p)605 mdoc_node_free(struct mdoc_node *p)
606 {
607 
608 	if (MDOC_BLOCK == p->type || MDOC_ELEM == p->type)
609 		free(p->norm);
610 	if (p->string)
611 		free(p->string);
612 	if (p->args)
613 		mdoc_argv_free(p->args);
614 	free(p);
615 }
616 
617 
618 static void
mdoc_node_unlink(struct mdoc * mdoc,struct mdoc_node * n)619 mdoc_node_unlink(struct mdoc *mdoc, struct mdoc_node *n)
620 {
621 
622 	/* Adjust siblings. */
623 
624 	if (n->prev)
625 		n->prev->next = n->next;
626 	if (n->next)
627 		n->next->prev = n->prev;
628 
629 	/* Adjust parent. */
630 
631 	if (n->parent) {
632 		n->parent->nchild--;
633 		if (n->parent->child == n)
634 			n->parent->child = n->prev ? n->prev : n->next;
635 		if (n->parent->last == n)
636 			n->parent->last = n->prev ? n->prev : NULL;
637 	}
638 
639 	/* Adjust parse point, if applicable. */
640 
641 	if (mdoc && mdoc->last == n) {
642 		if (n->prev) {
643 			mdoc->last = n->prev;
644 			mdoc->next = MDOC_NEXT_SIBLING;
645 		} else {
646 			mdoc->last = n->parent;
647 			mdoc->next = MDOC_NEXT_CHILD;
648 		}
649 	}
650 
651 	if (mdoc && mdoc->first == n)
652 		mdoc->first = NULL;
653 }
654 
655 
656 void
mdoc_node_delete(struct mdoc * mdoc,struct mdoc_node * p)657 mdoc_node_delete(struct mdoc *mdoc, struct mdoc_node *p)
658 {
659 
660 	while (p->child) {
661 		assert(p->nchild);
662 		mdoc_node_delete(mdoc, p->child);
663 	}
664 	assert(0 == p->nchild);
665 
666 	mdoc_node_unlink(mdoc, p);
667 	mdoc_node_free(p);
668 }
669 
670 int
mdoc_node_relink(struct mdoc * mdoc,struct mdoc_node * p)671 mdoc_node_relink(struct mdoc *mdoc, struct mdoc_node *p)
672 {
673 
674 	mdoc_node_unlink(mdoc, p);
675 	return(node_append(mdoc, p));
676 }
677 
678 #if 0
679 /*
680  * Pre-treat a text line.
681  * Text lines can consist of equations, which must be handled apart from
682  * the regular text.
683  * Thus, use this function to step through a line checking if it has any
684  * equations embedded in it.
685  * This must handle multiple equations AND equations that do not end at
686  * the end-of-line, i.e., will re-enter in the next roff parse.
687  */
688 static int
689 mdoc_preptext(struct mdoc *mdoc, int line, char *buf, int offs)
690 {
691 	char		*start, *end;
692 	char		 delim;
693 
694 	while ('\0' != buf[offs]) {
695 		/* Mark starting position if eqn is set. */
696 		start = NULL;
697 		if ('\0' != (delim = roff_eqndelim(mdoc->roff)))
698 			if (NULL != (start = strchr(buf + offs, delim)))
699 				*start++ = '\0';
700 
701 		/* Parse text as normal. */
702 		if ( ! mdoc_ptext(mdoc, line, buf, offs))
703 			return(0);
704 
705 		/* Continue only if an equation exists. */
706 		if (NULL == start)
707 			break;
708 
709 		/* Read past the end of the equation. */
710 		offs += start - (buf + offs);
711 		assert(start == &buf[offs]);
712 		if (NULL != (end = strchr(buf + offs, delim))) {
713 			*end++ = '\0';
714 			while (' ' == *end)
715 				end++;
716 		}
717 
718 		/* Parse the equation itself. */
719 		roff_openeqn(mdoc->roff, NULL, line, offs, buf);
720 
721 		/* Process a finished equation? */
722 		if (roff_closeeqn(mdoc->roff))
723 			if ( ! mdoc_addeqn(mdoc, roff_eqn(mdoc->roff)))
724 				return(0);
725 		offs += (end - (buf + offs));
726 	}
727 
728 	return(1);
729 }
730 #endif
731 
732 /*
733  * Parse free-form text, that is, a line that does not begin with the
734  * control character.
735  */
736 static int
mdoc_ptext(struct mdoc * mdoc,int line,char * buf,int offs)737 mdoc_ptext(struct mdoc *mdoc, int line, char *buf, int offs)
738 {
739 	char		 *c, *ws, *end;
740 	struct mdoc_node *n;
741 
742 	/* No text before an initial macro. */
743 
744 	if (SEC_NONE == mdoc->lastnamed) {
745 		mdoc_pmsg(mdoc, line, offs, MANDOCERR_NOTEXT);
746 		return(1);
747 	}
748 
749 	assert(mdoc->last);
750 	n = mdoc->last;
751 
752 	/*
753 	 * Divert directly to list processing if we're encountering a
754 	 * columnar MDOC_BLOCK with or without a prior MDOC_BLOCK entry
755 	 * (a MDOC_BODY means it's already open, in which case we should
756 	 * process within its context in the normal way).
757 	 */
758 
759 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
760 			LIST_column == n->norm->Bl.type) {
761 		/* `Bl' is open without any children. */
762 		mdoc->flags |= MDOC_FREECOL;
763 		return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
764 	}
765 
766 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
767 			NULL != n->parent &&
768 			MDOC_Bl == n->parent->tok &&
769 			LIST_column == n->parent->norm->Bl.type) {
770 		/* `Bl' has block-level `It' children. */
771 		mdoc->flags |= MDOC_FREECOL;
772 		return(mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf));
773 	}
774 
775 	/*
776 	 * Search for the beginning of unescaped trailing whitespace (ws)
777 	 * and for the first character not to be output (end).
778 	 */
779 
780 	/* FIXME: replace with strcspn(). */
781 	ws = NULL;
782 	for (c = end = buf + offs; *c; c++) {
783 		switch (*c) {
784 		case ' ':
785 			if (NULL == ws)
786 				ws = c;
787 			continue;
788 		case '\t':
789 			/*
790 			 * Always warn about trailing tabs,
791 			 * even outside literal context,
792 			 * where they should be put on the next line.
793 			 */
794 			if (NULL == ws)
795 				ws = c;
796 			/*
797 			 * Strip trailing tabs in literal context only;
798 			 * outside, they affect the next line.
799 			 */
800 			if (MDOC_LITERAL & mdoc->flags)
801 				continue;
802 			break;
803 		case '\\':
804 			/* Skip the escaped character, too, if any. */
805 			if (c[1])
806 				c++;
807 			/* FALLTHROUGH */
808 		default:
809 			ws = NULL;
810 			break;
811 		}
812 		end = c + 1;
813 	}
814 	*end = '\0';
815 
816 	if (ws)
817 		mdoc_pmsg(mdoc, line, (int)(ws-buf), MANDOCERR_EOLNSPACE);
818 
819 	if ('\0' == buf[offs] && ! (MDOC_LITERAL & mdoc->flags)) {
820 		mdoc_pmsg(mdoc, line, (int)(c-buf), MANDOCERR_NOBLANKLN);
821 
822 		/*
823 		 * Insert a `sp' in the case of a blank line.  Technically,
824 		 * blank lines aren't allowed, but enough manuals assume this
825 		 * behaviour that we want to work around it.
826 		 */
827 		if ( ! mdoc_elem_alloc(mdoc, line, offs, MDOC_sp, NULL))
828 			return(0);
829 
830 		mdoc->next = MDOC_NEXT_SIBLING;
831 
832 		return(mdoc_valid_post(mdoc));
833 	}
834 
835 	if ( ! mdoc_word_alloc(mdoc, line, offs, buf+offs))
836 		return(0);
837 
838 	if (MDOC_LITERAL & mdoc->flags)
839 		return(1);
840 
841 	/*
842 	 * End-of-sentence check.  If the last character is an unescaped
843 	 * EOS character, then flag the node as being the end of a
844 	 * sentence.  The front-end will know how to interpret this.
845 	 */
846 
847 	assert(buf < end);
848 
849 	if (mandoc_eos(buf+offs, (size_t)(end-buf-offs), 0))
850 		mdoc->last->flags |= MDOC_EOS;
851 
852 	return(1);
853 }
854 
855 
856 /*
857  * Parse a macro line, that is, a line beginning with the control
858  * character.
859  */
860 static int
mdoc_pmacro(struct mdoc * mdoc,int ln,char * buf,int offs)861 mdoc_pmacro(struct mdoc *mdoc, int ln, char *buf, int offs)
862 {
863 	enum mdoct	  tok;
864 	int		  i, sv;
865 	char		  mac[5];
866 	struct mdoc_node *n;
867 
868 	/* Empty post-control lines are ignored. */
869 
870 	if ('"' == buf[offs]) {
871 		mdoc_pmsg(mdoc, ln, offs, MANDOCERR_BADCOMMENT);
872 		return(1);
873 	} else if ('\0' == buf[offs])
874 		return(1);
875 
876 	sv = offs;
877 
878 	/*
879 	 * Copy the first word into a nil-terminated buffer.
880 	 * Stop copying when a tab, space, or eoln is encountered.
881 	 */
882 
883 	i = 0;
884 	while (i < 4 && '\0' != buf[offs] &&
885 			' ' != buf[offs] && '\t' != buf[offs])
886 		mac[i++] = buf[offs++];
887 
888 	mac[i] = '\0';
889 
890 	tok = (i > 1 || i < 4) ? mdoc_hash_find(mac) : MDOC_MAX;
891 
892 	if (MDOC_MAX == tok) {
893 		mandoc_vmsg(MANDOCERR_MACRO, mdoc->parse,
894 				ln, sv, "%s", buf + sv - 1);
895 		return(1);
896 	}
897 
898 	/* Disregard the first trailing tab, if applicable. */
899 
900 	if ('\t' == buf[offs])
901 		offs++;
902 
903 	/* Jump to the next non-whitespace word. */
904 
905 	while (buf[offs] && ' ' == buf[offs])
906 		offs++;
907 
908 	/*
909 	 * Trailing whitespace.  Note that tabs are allowed to be passed
910 	 * into the parser as "text", so we only warn about spaces here.
911 	 */
912 
913 	if ('\0' == buf[offs] && ' ' == buf[offs - 1])
914 		mdoc_pmsg(mdoc, ln, offs - 1, MANDOCERR_EOLNSPACE);
915 
916 	/*
917 	 * If an initial macro or a list invocation, divert directly
918 	 * into macro processing.
919 	 */
920 
921 	if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) {
922 		if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf))
923 			goto err;
924 		return(1);
925 	}
926 
927 	n = mdoc->last;
928 	assert(mdoc->last);
929 
930 	/*
931 	 * If the first macro of a `Bl -column', open an `It' block
932 	 * context around the parsed macro.
933 	 */
934 
935 	if (MDOC_Bl == n->tok && MDOC_BODY == n->type &&
936 			LIST_column == n->norm->Bl.type) {
937 		mdoc->flags |= MDOC_FREECOL;
938 		if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf))
939 			goto err;
940 		return(1);
941 	}
942 
943 	/*
944 	 * If we're following a block-level `It' within a `Bl -column'
945 	 * context (perhaps opened in the above block or in ptext()),
946 	 * then open an `It' block context around the parsed macro.
947 	 */
948 
949 	if (MDOC_It == n->tok && MDOC_BLOCK == n->type &&
950 			NULL != n->parent &&
951 			MDOC_Bl == n->parent->tok &&
952 			LIST_column == n->parent->norm->Bl.type) {
953 		mdoc->flags |= MDOC_FREECOL;
954 		if ( ! mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf))
955 			goto err;
956 		return(1);
957 	}
958 
959 	/* Normal processing of a macro. */
960 
961 	if ( ! mdoc_macro(mdoc, tok, ln, sv, &offs, buf))
962 		goto err;
963 
964 	return(1);
965 
966 err:	/* Error out. */
967 
968 	mdoc->flags |= MDOC_HALT;
969 	return(0);
970 }
971 
972 enum mdelim
mdoc_isdelim(const char * p)973 mdoc_isdelim(const char *p)
974 {
975 
976 	if ('\0' == p[0])
977 		return(DELIM_NONE);
978 
979 	if ('\0' == p[1])
980 		switch (p[0]) {
981 		case('('):
982 			/* FALLTHROUGH */
983 		case('['):
984 			return(DELIM_OPEN);
985 		case('|'):
986 			return(DELIM_MIDDLE);
987 		case('.'):
988 			/* FALLTHROUGH */
989 		case(','):
990 			/* FALLTHROUGH */
991 		case(';'):
992 			/* FALLTHROUGH */
993 		case(':'):
994 			/* FALLTHROUGH */
995 		case('?'):
996 			/* FALLTHROUGH */
997 		case('!'):
998 			/* FALLTHROUGH */
999 		case(')'):
1000 			/* FALLTHROUGH */
1001 		case(']'):
1002 			return(DELIM_CLOSE);
1003 		default:
1004 			return(DELIM_NONE);
1005 		}
1006 
1007 	if ('\\' != p[0])
1008 		return(DELIM_NONE);
1009 
1010 	if (0 == strcmp(p + 1, "."))
1011 		return(DELIM_CLOSE);
1012 	if (0 == strcmp(p + 1, "fR|\\fP"))
1013 		return(DELIM_MIDDLE);
1014 
1015 	return(DELIM_NONE);
1016 }
1017