xref: /freebsd/contrib/mandoc/tbl_data.c (revision 7295610f5da64ab1818458ce007d9eb924496330)
1*7295610fSBaptiste Daroussin /*	$Id: tbl_data.c,v 1.52 2019/02/09 16:00:39 schwarze Exp $ */
261d06d6bSBaptiste Daroussin /*
361d06d6bSBaptiste Daroussin  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*7295610fSBaptiste Daroussin  * Copyright (c) 2011,2015,2017,2018,2019 Ingo Schwarze <schwarze@openbsd.org>
561d06d6bSBaptiste Daroussin  *
661d06d6bSBaptiste Daroussin  * Permission to use, copy, modify, and distribute this software for any
761d06d6bSBaptiste Daroussin  * purpose with or without fee is hereby granted, provided that the above
861d06d6bSBaptiste Daroussin  * copyright notice and this permission notice appear in all copies.
961d06d6bSBaptiste Daroussin  *
1061d06d6bSBaptiste Daroussin  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1161d06d6bSBaptiste Daroussin  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1261d06d6bSBaptiste Daroussin  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1361d06d6bSBaptiste Daroussin  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1461d06d6bSBaptiste Daroussin  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1561d06d6bSBaptiste Daroussin  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1661d06d6bSBaptiste Daroussin  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1761d06d6bSBaptiste Daroussin  */
1861d06d6bSBaptiste Daroussin #include "config.h"
1961d06d6bSBaptiste Daroussin 
2061d06d6bSBaptiste Daroussin #include <sys/types.h>
2161d06d6bSBaptiste Daroussin 
2261d06d6bSBaptiste Daroussin #include <assert.h>
2361d06d6bSBaptiste Daroussin #include <ctype.h>
24*7295610fSBaptiste Daroussin #include <stdio.h>
2561d06d6bSBaptiste Daroussin #include <stdlib.h>
2661d06d6bSBaptiste Daroussin #include <string.h>
2761d06d6bSBaptiste Daroussin #include <time.h>
2861d06d6bSBaptiste Daroussin 
2961d06d6bSBaptiste Daroussin #include "mandoc_aux.h"
30*7295610fSBaptiste Daroussin #include "mandoc.h"
31*7295610fSBaptiste Daroussin #include "tbl.h"
3261d06d6bSBaptiste Daroussin #include "libmandoc.h"
33*7295610fSBaptiste Daroussin #include "tbl_int.h"
3461d06d6bSBaptiste Daroussin 
3561d06d6bSBaptiste Daroussin static	void		 getdata(struct tbl_node *, struct tbl_span *,
3661d06d6bSBaptiste Daroussin 				int, const char *, int *);
3761d06d6bSBaptiste Daroussin static	struct tbl_span	*newspan(struct tbl_node *, int,
3861d06d6bSBaptiste Daroussin 				struct tbl_row *);
3961d06d6bSBaptiste Daroussin 
4061d06d6bSBaptiste Daroussin 
4161d06d6bSBaptiste Daroussin static void
4261d06d6bSBaptiste Daroussin getdata(struct tbl_node *tbl, struct tbl_span *dp,
4361d06d6bSBaptiste Daroussin 		int ln, const char *p, int *pos)
4461d06d6bSBaptiste Daroussin {
45*7295610fSBaptiste Daroussin 	struct tbl_dat	*dat, *pdat;
4661d06d6bSBaptiste Daroussin 	struct tbl_cell	*cp;
47*7295610fSBaptiste Daroussin 	struct tbl_span	*pdp;
4861d06d6bSBaptiste Daroussin 	int		 sv;
4961d06d6bSBaptiste Daroussin 
50*7295610fSBaptiste Daroussin 	/*
51*7295610fSBaptiste Daroussin 	 * Determine the length of the string in the cell
52*7295610fSBaptiste Daroussin 	 * and advance the parse point to the end of the cell.
53*7295610fSBaptiste Daroussin 	 */
54*7295610fSBaptiste Daroussin 
55*7295610fSBaptiste Daroussin 	sv = *pos;
56*7295610fSBaptiste Daroussin 	while (p[*pos] != '\0' && p[*pos] != tbl->opts.tab)
57*7295610fSBaptiste Daroussin 		(*pos)++;
58*7295610fSBaptiste Daroussin 
5961d06d6bSBaptiste Daroussin 	/* Advance to the next layout cell, skipping spanners. */
6061d06d6bSBaptiste Daroussin 
6161d06d6bSBaptiste Daroussin 	cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
6261d06d6bSBaptiste Daroussin 	while (cp != NULL && cp->pos == TBL_CELL_SPAN)
6361d06d6bSBaptiste Daroussin 		cp = cp->next;
6461d06d6bSBaptiste Daroussin 
6561d06d6bSBaptiste Daroussin 	/*
6661d06d6bSBaptiste Daroussin 	 * If the current layout row is out of cells, allocate
6761d06d6bSBaptiste Daroussin 	 * a new cell if another row of the table has at least
6861d06d6bSBaptiste Daroussin 	 * this number of columns, or discard the input if we
6961d06d6bSBaptiste Daroussin 	 * are beyond the last column of the table as a whole.
7061d06d6bSBaptiste Daroussin 	 */
7161d06d6bSBaptiste Daroussin 
7261d06d6bSBaptiste Daroussin 	if (cp == NULL) {
7361d06d6bSBaptiste Daroussin 		if (dp->layout->last->col + 1 < dp->opts->cols) {
7461d06d6bSBaptiste Daroussin 			cp = mandoc_calloc(1, sizeof(*cp));
7561d06d6bSBaptiste Daroussin 			cp->pos = TBL_CELL_LEFT;
7661d06d6bSBaptiste Daroussin 			dp->layout->last->next = cp;
7761d06d6bSBaptiste Daroussin 			cp->col = dp->layout->last->col + 1;
7861d06d6bSBaptiste Daroussin 			dp->layout->last = cp;
7961d06d6bSBaptiste Daroussin 		} else {
80*7295610fSBaptiste Daroussin 			mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
81*7295610fSBaptiste Daroussin 			    ln, sv, "%s", p + sv);
82*7295610fSBaptiste Daroussin 			while (p[*pos] != '\0')
8361d06d6bSBaptiste Daroussin 				(*pos)++;
8461d06d6bSBaptiste Daroussin 			return;
8561d06d6bSBaptiste Daroussin 		}
8661d06d6bSBaptiste Daroussin 	}
8761d06d6bSBaptiste Daroussin 
88*7295610fSBaptiste Daroussin 	dat = mandoc_malloc(sizeof(*dat));
8961d06d6bSBaptiste Daroussin 	dat->layout = cp;
90*7295610fSBaptiste Daroussin 	dat->next = NULL;
91*7295610fSBaptiste Daroussin 	dat->string = NULL;
92*7295610fSBaptiste Daroussin 	dat->hspans = 0;
93*7295610fSBaptiste Daroussin 	dat->vspans = 0;
94*7295610fSBaptiste Daroussin 	dat->block = 0;
9561d06d6bSBaptiste Daroussin 	dat->pos = TBL_DATA_NONE;
96*7295610fSBaptiste Daroussin 
97*7295610fSBaptiste Daroussin 	/*
98*7295610fSBaptiste Daroussin 	 * Increment the number of vertical spans in a data cell above,
99*7295610fSBaptiste Daroussin 	 * if this cell vertically extends one or more cells above.
100*7295610fSBaptiste Daroussin 	 * The iteration must be done over data rows,
101*7295610fSBaptiste Daroussin 	 * not over layout rows, because one layout row
102*7295610fSBaptiste Daroussin 	 * can be reused for more than one data row.
103*7295610fSBaptiste Daroussin 	 */
104*7295610fSBaptiste Daroussin 
105*7295610fSBaptiste Daroussin 	if (cp->pos == TBL_CELL_DOWN ||
106*7295610fSBaptiste Daroussin 	    (*pos - sv == 2 && p[sv] == '\\' && p[sv + 1] == '^')) {
107*7295610fSBaptiste Daroussin 		pdp = dp;
108*7295610fSBaptiste Daroussin 		while ((pdp = pdp->prev) != NULL) {
109*7295610fSBaptiste Daroussin 			pdat = pdp->first;
110*7295610fSBaptiste Daroussin 			while (pdat != NULL &&
111*7295610fSBaptiste Daroussin 			    pdat->layout->col < dat->layout->col)
112*7295610fSBaptiste Daroussin 				pdat = pdat->next;
113*7295610fSBaptiste Daroussin 			if (pdat == NULL)
114*7295610fSBaptiste Daroussin 				break;
115*7295610fSBaptiste Daroussin 			if (pdat->layout->pos != TBL_CELL_DOWN &&
116*7295610fSBaptiste Daroussin 			    strcmp(pdat->string, "\\^") != 0) {
117*7295610fSBaptiste Daroussin 				pdat->vspans++;
118*7295610fSBaptiste Daroussin 				break;
119*7295610fSBaptiste Daroussin 			}
120*7295610fSBaptiste Daroussin 		}
121*7295610fSBaptiste Daroussin 	}
122*7295610fSBaptiste Daroussin 
123*7295610fSBaptiste Daroussin 	/*
124*7295610fSBaptiste Daroussin 	 * Count the number of horizontal spans to the right of this cell.
125*7295610fSBaptiste Daroussin 	 * This is purely a matter of the layout, independent of the data.
126*7295610fSBaptiste Daroussin 	 */
127*7295610fSBaptiste Daroussin 
12861d06d6bSBaptiste Daroussin 	for (cp = cp->next; cp != NULL; cp = cp->next)
12961d06d6bSBaptiste Daroussin 		if (cp->pos == TBL_CELL_SPAN)
130*7295610fSBaptiste Daroussin 			dat->hspans++;
13161d06d6bSBaptiste Daroussin 		else
13261d06d6bSBaptiste Daroussin 			break;
13361d06d6bSBaptiste Daroussin 
134*7295610fSBaptiste Daroussin 	/* Append the new data cell to the data row. */
135*7295610fSBaptiste Daroussin 
13661d06d6bSBaptiste Daroussin 	if (dp->last == NULL)
13761d06d6bSBaptiste Daroussin 		dp->first = dat;
13861d06d6bSBaptiste Daroussin 	else
13961d06d6bSBaptiste Daroussin 		dp->last->next = dat;
14061d06d6bSBaptiste Daroussin 	dp->last = dat;
14161d06d6bSBaptiste Daroussin 
14261d06d6bSBaptiste Daroussin 	/*
14361d06d6bSBaptiste Daroussin 	 * Check for a continued-data scope opening.  This consists of a
14461d06d6bSBaptiste Daroussin 	 * trailing `T{' at the end of the line.  Subsequent lines,
14561d06d6bSBaptiste Daroussin 	 * until a standalone `T}', are included in our cell.
14661d06d6bSBaptiste Daroussin 	 */
14761d06d6bSBaptiste Daroussin 
14861d06d6bSBaptiste Daroussin 	if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') {
14961d06d6bSBaptiste Daroussin 		tbl->part = TBL_PART_CDATA;
15061d06d6bSBaptiste Daroussin 		return;
15161d06d6bSBaptiste Daroussin 	}
15261d06d6bSBaptiste Daroussin 
15361d06d6bSBaptiste Daroussin 	dat->string = mandoc_strndup(p + sv, *pos - sv);
15461d06d6bSBaptiste Daroussin 
155*7295610fSBaptiste Daroussin 	if (p[*pos] != '\0')
15661d06d6bSBaptiste Daroussin 		(*pos)++;
15761d06d6bSBaptiste Daroussin 
15861d06d6bSBaptiste Daroussin 	if ( ! strcmp(dat->string, "_"))
15961d06d6bSBaptiste Daroussin 		dat->pos = TBL_DATA_HORIZ;
16061d06d6bSBaptiste Daroussin 	else if ( ! strcmp(dat->string, "="))
16161d06d6bSBaptiste Daroussin 		dat->pos = TBL_DATA_DHORIZ;
16261d06d6bSBaptiste Daroussin 	else if ( ! strcmp(dat->string, "\\_"))
16361d06d6bSBaptiste Daroussin 		dat->pos = TBL_DATA_NHORIZ;
16461d06d6bSBaptiste Daroussin 	else if ( ! strcmp(dat->string, "\\="))
16561d06d6bSBaptiste Daroussin 		dat->pos = TBL_DATA_NDHORIZ;
16661d06d6bSBaptiste Daroussin 	else
16761d06d6bSBaptiste Daroussin 		dat->pos = TBL_DATA_DATA;
16861d06d6bSBaptiste Daroussin 
16961d06d6bSBaptiste Daroussin 	if ((dat->layout->pos == TBL_CELL_HORIZ ||
17061d06d6bSBaptiste Daroussin 	    dat->layout->pos == TBL_CELL_DHORIZ ||
17161d06d6bSBaptiste Daroussin 	    dat->layout->pos == TBL_CELL_DOWN) &&
17261d06d6bSBaptiste Daroussin 	    dat->pos == TBL_DATA_DATA && *dat->string != '\0')
17361d06d6bSBaptiste Daroussin 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
174*7295610fSBaptiste Daroussin 		    ln, sv, "%s", dat->string);
17561d06d6bSBaptiste Daroussin }
17661d06d6bSBaptiste Daroussin 
17761d06d6bSBaptiste Daroussin void
17861d06d6bSBaptiste Daroussin tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
17961d06d6bSBaptiste Daroussin {
18061d06d6bSBaptiste Daroussin 	struct tbl_dat	*dat;
18161d06d6bSBaptiste Daroussin 	size_t		 sz;
18261d06d6bSBaptiste Daroussin 
18361d06d6bSBaptiste Daroussin 	dat = tbl->last_span->last;
18461d06d6bSBaptiste Daroussin 
18561d06d6bSBaptiste Daroussin 	if (p[pos] == 'T' && p[pos + 1] == '}') {
18661d06d6bSBaptiste Daroussin 		pos += 2;
18761d06d6bSBaptiste Daroussin 		if (p[pos] == tbl->opts.tab) {
18861d06d6bSBaptiste Daroussin 			tbl->part = TBL_PART_DATA;
18961d06d6bSBaptiste Daroussin 			pos++;
19061d06d6bSBaptiste Daroussin 			while (p[pos] != '\0')
19161d06d6bSBaptiste Daroussin 				getdata(tbl, tbl->last_span, ln, p, &pos);
19261d06d6bSBaptiste Daroussin 			return;
19361d06d6bSBaptiste Daroussin 		} else if (p[pos] == '\0') {
19461d06d6bSBaptiste Daroussin 			tbl->part = TBL_PART_DATA;
19561d06d6bSBaptiste Daroussin 			return;
19661d06d6bSBaptiste Daroussin 		}
19761d06d6bSBaptiste Daroussin 
19861d06d6bSBaptiste Daroussin 		/* Fallthrough: T} is part of a word. */
19961d06d6bSBaptiste Daroussin 	}
20061d06d6bSBaptiste Daroussin 
20161d06d6bSBaptiste Daroussin 	dat->pos = TBL_DATA_DATA;
20261d06d6bSBaptiste Daroussin 	dat->block = 1;
20361d06d6bSBaptiste Daroussin 
20461d06d6bSBaptiste Daroussin 	if (dat->string != NULL) {
20561d06d6bSBaptiste Daroussin 		sz = strlen(p + pos) + strlen(dat->string) + 2;
20661d06d6bSBaptiste Daroussin 		dat->string = mandoc_realloc(dat->string, sz);
20761d06d6bSBaptiste Daroussin 		(void)strlcat(dat->string, " ", sz);
20861d06d6bSBaptiste Daroussin 		(void)strlcat(dat->string, p + pos, sz);
20961d06d6bSBaptiste Daroussin 	} else
21061d06d6bSBaptiste Daroussin 		dat->string = mandoc_strdup(p + pos);
21161d06d6bSBaptiste Daroussin 
21261d06d6bSBaptiste Daroussin 	if (dat->layout->pos == TBL_CELL_DOWN)
213*7295610fSBaptiste Daroussin 		mandoc_msg(MANDOCERR_TBLDATA_SPAN,
214*7295610fSBaptiste Daroussin 		    ln, pos, "%s", dat->string);
21561d06d6bSBaptiste Daroussin }
21661d06d6bSBaptiste Daroussin 
21761d06d6bSBaptiste Daroussin static struct tbl_span *
21861d06d6bSBaptiste Daroussin newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
21961d06d6bSBaptiste Daroussin {
22061d06d6bSBaptiste Daroussin 	struct tbl_span	*dp;
22161d06d6bSBaptiste Daroussin 
22261d06d6bSBaptiste Daroussin 	dp = mandoc_calloc(1, sizeof(*dp));
22361d06d6bSBaptiste Daroussin 	dp->line = line;
22461d06d6bSBaptiste Daroussin 	dp->opts = &tbl->opts;
22561d06d6bSBaptiste Daroussin 	dp->layout = rp;
22661d06d6bSBaptiste Daroussin 	dp->prev = tbl->last_span;
22761d06d6bSBaptiste Daroussin 
22861d06d6bSBaptiste Daroussin 	if (dp->prev == NULL) {
22961d06d6bSBaptiste Daroussin 		tbl->first_span = dp;
23061d06d6bSBaptiste Daroussin 		tbl->current_span = NULL;
23161d06d6bSBaptiste Daroussin 	} else
23261d06d6bSBaptiste Daroussin 		dp->prev->next = dp;
23361d06d6bSBaptiste Daroussin 	tbl->last_span = dp;
23461d06d6bSBaptiste Daroussin 
23561d06d6bSBaptiste Daroussin 	return dp;
23661d06d6bSBaptiste Daroussin }
23761d06d6bSBaptiste Daroussin 
23861d06d6bSBaptiste Daroussin void
23961d06d6bSBaptiste Daroussin tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
24061d06d6bSBaptiste Daroussin {
24161d06d6bSBaptiste Daroussin 	struct tbl_row	*rp;
24261d06d6bSBaptiste Daroussin 	struct tbl_cell	*cp;
24361d06d6bSBaptiste Daroussin 	struct tbl_span	*sp;
24461d06d6bSBaptiste Daroussin 
24561d06d6bSBaptiste Daroussin 	rp = (sp = tbl->last_span) == NULL ? tbl->first_row :
24661d06d6bSBaptiste Daroussin 	    sp->pos == TBL_SPAN_DATA && sp->layout->next != NULL ?
24761d06d6bSBaptiste Daroussin 	    sp->layout->next : sp->layout;
24861d06d6bSBaptiste Daroussin 
24961d06d6bSBaptiste Daroussin 	assert(rp != NULL);
25061d06d6bSBaptiste Daroussin 
251*7295610fSBaptiste Daroussin 	if (p[1] == '\0') {
252*7295610fSBaptiste Daroussin 		switch (p[0]) {
253*7295610fSBaptiste Daroussin 		case '.':
254*7295610fSBaptiste Daroussin 			/*
255*7295610fSBaptiste Daroussin 			 * Empty request lines must be handled here
256*7295610fSBaptiste Daroussin 			 * and cannot be discarded in roff_parseln()
257*7295610fSBaptiste Daroussin 			 * because in the layout section, they
258*7295610fSBaptiste Daroussin 			 * are significant and end the layout.
259*7295610fSBaptiste Daroussin 			 */
260*7295610fSBaptiste Daroussin 			return;
261*7295610fSBaptiste Daroussin 		case '_':
26261d06d6bSBaptiste Daroussin 			sp = newspan(tbl, ln, rp);
26361d06d6bSBaptiste Daroussin 			sp->pos = TBL_SPAN_HORIZ;
26461d06d6bSBaptiste Daroussin 			return;
265*7295610fSBaptiste Daroussin 		case '=':
26661d06d6bSBaptiste Daroussin 			sp = newspan(tbl, ln, rp);
26761d06d6bSBaptiste Daroussin 			sp->pos = TBL_SPAN_DHORIZ;
26861d06d6bSBaptiste Daroussin 			return;
269*7295610fSBaptiste Daroussin 		default:
270*7295610fSBaptiste Daroussin 			break;
271*7295610fSBaptiste Daroussin 		}
27261d06d6bSBaptiste Daroussin 	}
27361d06d6bSBaptiste Daroussin 
27461d06d6bSBaptiste Daroussin 	/*
27561d06d6bSBaptiste Daroussin 	 * If the layout row contains nothing but horizontal lines,
27661d06d6bSBaptiste Daroussin 	 * allocate an empty span for it and assign the current span
27761d06d6bSBaptiste Daroussin 	 * to the next layout row accepting data.
27861d06d6bSBaptiste Daroussin 	 */
27961d06d6bSBaptiste Daroussin 
28061d06d6bSBaptiste Daroussin 	while (rp->next != NULL) {
28161d06d6bSBaptiste Daroussin 		if (rp->last->col + 1 < tbl->opts.cols)
28261d06d6bSBaptiste Daroussin 			break;
28361d06d6bSBaptiste Daroussin 		for (cp = rp->first; cp != NULL; cp = cp->next)
28461d06d6bSBaptiste Daroussin 			if (cp->pos != TBL_CELL_HORIZ &&
28561d06d6bSBaptiste Daroussin 			    cp->pos != TBL_CELL_DHORIZ)
28661d06d6bSBaptiste Daroussin 				break;
28761d06d6bSBaptiste Daroussin 		if (cp != NULL)
28861d06d6bSBaptiste Daroussin 			break;
28961d06d6bSBaptiste Daroussin 		sp = newspan(tbl, ln, rp);
29061d06d6bSBaptiste Daroussin 		sp->pos = TBL_SPAN_DATA;
29161d06d6bSBaptiste Daroussin 		rp = rp->next;
29261d06d6bSBaptiste Daroussin 	}
29361d06d6bSBaptiste Daroussin 
29461d06d6bSBaptiste Daroussin 	/* Process a real data row. */
29561d06d6bSBaptiste Daroussin 
29661d06d6bSBaptiste Daroussin 	sp = newspan(tbl, ln, rp);
29761d06d6bSBaptiste Daroussin 	sp->pos = TBL_SPAN_DATA;
29861d06d6bSBaptiste Daroussin 	while (p[pos] != '\0')
29961d06d6bSBaptiste Daroussin 		getdata(tbl, sp, ln, p, &pos);
30061d06d6bSBaptiste Daroussin }
301