1*6d38604fSBaptiste Daroussin /* $Id: tbl_data.c,v 1.59 2021/09/10 13:24:38 schwarze Exp $ */
261d06d6bSBaptiste Daroussin /*
361d06d6bSBaptiste Daroussin * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*6d38604fSBaptiste Daroussin * Copyright (c) 2011,2015,2017-2019,2021 Ingo Schwarze <schwarze@openbsd.org>
561d06d6bSBaptiste Daroussin *
661d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any
761d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above
861d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies.
961d06d6bSBaptiste Daroussin *
1061d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1161d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1261d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1361d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1461d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1561d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1661d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1761d06d6bSBaptiste Daroussin */
1861d06d6bSBaptiste Daroussin #include "config.h"
1961d06d6bSBaptiste Daroussin
2061d06d6bSBaptiste Daroussin #include <sys/types.h>
2161d06d6bSBaptiste Daroussin
2261d06d6bSBaptiste Daroussin #include <assert.h>
2361d06d6bSBaptiste Daroussin #include <ctype.h>
24*6d38604fSBaptiste Daroussin #include <stdint.h>
257295610fSBaptiste Daroussin #include <stdio.h>
2661d06d6bSBaptiste Daroussin #include <stdlib.h>
2761d06d6bSBaptiste Daroussin #include <string.h>
2861d06d6bSBaptiste Daroussin #include <time.h>
2961d06d6bSBaptiste Daroussin
3061d06d6bSBaptiste Daroussin #include "mandoc_aux.h"
317295610fSBaptiste Daroussin #include "mandoc.h"
327295610fSBaptiste Daroussin #include "tbl.h"
3361d06d6bSBaptiste Daroussin #include "libmandoc.h"
347295610fSBaptiste Daroussin #include "tbl_int.h"
3561d06d6bSBaptiste Daroussin
3661d06d6bSBaptiste Daroussin static void getdata(struct tbl_node *, struct tbl_span *,
3761d06d6bSBaptiste Daroussin int, const char *, int *);
3861d06d6bSBaptiste Daroussin static struct tbl_span *newspan(struct tbl_node *, int,
3961d06d6bSBaptiste Daroussin struct tbl_row *);
4061d06d6bSBaptiste Daroussin
4161d06d6bSBaptiste Daroussin
4261d06d6bSBaptiste Daroussin static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)4361d06d6bSBaptiste Daroussin getdata(struct tbl_node *tbl, struct tbl_span *dp,
4461d06d6bSBaptiste Daroussin int ln, const char *p, int *pos)
4561d06d6bSBaptiste Daroussin {
467295610fSBaptiste Daroussin struct tbl_dat *dat, *pdat;
4761d06d6bSBaptiste Daroussin struct tbl_cell *cp;
487295610fSBaptiste Daroussin struct tbl_span *pdp;
49*6d38604fSBaptiste Daroussin const char *ccp;
50*6d38604fSBaptiste Daroussin int startpos, endpos;
5161d06d6bSBaptiste Daroussin
527295610fSBaptiste Daroussin /*
537295610fSBaptiste Daroussin * Determine the length of the string in the cell
547295610fSBaptiste Daroussin * and advance the parse point to the end of the cell.
557295610fSBaptiste Daroussin */
567295610fSBaptiste Daroussin
57*6d38604fSBaptiste Daroussin startpos = *pos;
58*6d38604fSBaptiste Daroussin ccp = p + startpos;
59*6d38604fSBaptiste Daroussin while (*ccp != '\0' && *ccp != tbl->opts.tab)
60*6d38604fSBaptiste Daroussin if (*ccp++ == '\\')
61*6d38604fSBaptiste Daroussin mandoc_escape(&ccp, NULL, NULL);
62*6d38604fSBaptiste Daroussin *pos = ccp - p;
637295610fSBaptiste Daroussin
6461d06d6bSBaptiste Daroussin /* Advance to the next layout cell, skipping spanners. */
6561d06d6bSBaptiste Daroussin
6661d06d6bSBaptiste Daroussin cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
6761d06d6bSBaptiste Daroussin while (cp != NULL && cp->pos == TBL_CELL_SPAN)
6861d06d6bSBaptiste Daroussin cp = cp->next;
6961d06d6bSBaptiste Daroussin
7061d06d6bSBaptiste Daroussin /*
7161d06d6bSBaptiste Daroussin * If the current layout row is out of cells, allocate
7261d06d6bSBaptiste Daroussin * a new cell if another row of the table has at least
7361d06d6bSBaptiste Daroussin * this number of columns, or discard the input if we
7461d06d6bSBaptiste Daroussin * are beyond the last column of the table as a whole.
7561d06d6bSBaptiste Daroussin */
7661d06d6bSBaptiste Daroussin
7761d06d6bSBaptiste Daroussin if (cp == NULL) {
7861d06d6bSBaptiste Daroussin if (dp->layout->last->col + 1 < dp->opts->cols) {
7961d06d6bSBaptiste Daroussin cp = mandoc_calloc(1, sizeof(*cp));
8061d06d6bSBaptiste Daroussin cp->pos = TBL_CELL_LEFT;
81*6d38604fSBaptiste Daroussin cp->font = ESCAPE_FONTROMAN;
82*6d38604fSBaptiste Daroussin cp->spacing = SIZE_MAX;
8361d06d6bSBaptiste Daroussin dp->layout->last->next = cp;
8461d06d6bSBaptiste Daroussin cp->col = dp->layout->last->col + 1;
8561d06d6bSBaptiste Daroussin dp->layout->last = cp;
8661d06d6bSBaptiste Daroussin } else {
877295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_TBLDATA_EXTRA,
88*6d38604fSBaptiste Daroussin ln, startpos, "%s", p + startpos);
897295610fSBaptiste Daroussin while (p[*pos] != '\0')
9061d06d6bSBaptiste Daroussin (*pos)++;
9161d06d6bSBaptiste Daroussin return;
9261d06d6bSBaptiste Daroussin }
9361d06d6bSBaptiste Daroussin }
9461d06d6bSBaptiste Daroussin
957295610fSBaptiste Daroussin dat = mandoc_malloc(sizeof(*dat));
9661d06d6bSBaptiste Daroussin dat->layout = cp;
977295610fSBaptiste Daroussin dat->next = NULL;
987295610fSBaptiste Daroussin dat->string = NULL;
997295610fSBaptiste Daroussin dat->hspans = 0;
1007295610fSBaptiste Daroussin dat->vspans = 0;
1017295610fSBaptiste Daroussin dat->block = 0;
10261d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_NONE;
1037295610fSBaptiste Daroussin
1047295610fSBaptiste Daroussin /*
1057295610fSBaptiste Daroussin * Increment the number of vertical spans in a data cell above,
1067295610fSBaptiste Daroussin * if this cell vertically extends one or more cells above.
1077295610fSBaptiste Daroussin * The iteration must be done over data rows,
1087295610fSBaptiste Daroussin * not over layout rows, because one layout row
1097295610fSBaptiste Daroussin * can be reused for more than one data row.
1107295610fSBaptiste Daroussin */
1117295610fSBaptiste Daroussin
1127295610fSBaptiste Daroussin if (cp->pos == TBL_CELL_DOWN ||
113*6d38604fSBaptiste Daroussin (*pos - startpos == 2 &&
114*6d38604fSBaptiste Daroussin p[startpos] == '\\' && p[startpos + 1] == '^')) {
1157295610fSBaptiste Daroussin pdp = dp;
1167295610fSBaptiste Daroussin while ((pdp = pdp->prev) != NULL) {
1177295610fSBaptiste Daroussin pdat = pdp->first;
1187295610fSBaptiste Daroussin while (pdat != NULL &&
1197295610fSBaptiste Daroussin pdat->layout->col < dat->layout->col)
1207295610fSBaptiste Daroussin pdat = pdat->next;
1217295610fSBaptiste Daroussin if (pdat == NULL)
1227295610fSBaptiste Daroussin break;
1237295610fSBaptiste Daroussin if (pdat->layout->pos != TBL_CELL_DOWN &&
1247295610fSBaptiste Daroussin strcmp(pdat->string, "\\^") != 0) {
1257295610fSBaptiste Daroussin pdat->vspans++;
1267295610fSBaptiste Daroussin break;
1277295610fSBaptiste Daroussin }
1287295610fSBaptiste Daroussin }
1297295610fSBaptiste Daroussin }
1307295610fSBaptiste Daroussin
1317295610fSBaptiste Daroussin /*
1327295610fSBaptiste Daroussin * Count the number of horizontal spans to the right of this cell.
1337295610fSBaptiste Daroussin * This is purely a matter of the layout, independent of the data.
1347295610fSBaptiste Daroussin */
1357295610fSBaptiste Daroussin
13661d06d6bSBaptiste Daroussin for (cp = cp->next; cp != NULL; cp = cp->next)
13761d06d6bSBaptiste Daroussin if (cp->pos == TBL_CELL_SPAN)
1387295610fSBaptiste Daroussin dat->hspans++;
13961d06d6bSBaptiste Daroussin else
14061d06d6bSBaptiste Daroussin break;
14161d06d6bSBaptiste Daroussin
1427295610fSBaptiste Daroussin /* Append the new data cell to the data row. */
1437295610fSBaptiste Daroussin
14461d06d6bSBaptiste Daroussin if (dp->last == NULL)
14561d06d6bSBaptiste Daroussin dp->first = dat;
14661d06d6bSBaptiste Daroussin else
14761d06d6bSBaptiste Daroussin dp->last->next = dat;
14861d06d6bSBaptiste Daroussin dp->last = dat;
14961d06d6bSBaptiste Daroussin
150*6d38604fSBaptiste Daroussin /* Strip leading and trailing spaces, if requested. */
151*6d38604fSBaptiste Daroussin
152*6d38604fSBaptiste Daroussin endpos = *pos;
153*6d38604fSBaptiste Daroussin if (dp->opts->opts & TBL_OPT_NOSPACE) {
154*6d38604fSBaptiste Daroussin while (p[startpos] == ' ')
155*6d38604fSBaptiste Daroussin startpos++;
156*6d38604fSBaptiste Daroussin while (endpos > startpos && p[endpos - 1] == ' ')
157*6d38604fSBaptiste Daroussin endpos--;
158*6d38604fSBaptiste Daroussin }
159*6d38604fSBaptiste Daroussin
16061d06d6bSBaptiste Daroussin /*
16161d06d6bSBaptiste Daroussin * Check for a continued-data scope opening. This consists of a
16261d06d6bSBaptiste Daroussin * trailing `T{' at the end of the line. Subsequent lines,
16361d06d6bSBaptiste Daroussin * until a standalone `T}', are included in our cell.
16461d06d6bSBaptiste Daroussin */
16561d06d6bSBaptiste Daroussin
166*6d38604fSBaptiste Daroussin if (endpos - startpos == 2 &&
167*6d38604fSBaptiste Daroussin p[startpos] == 'T' && p[startpos + 1] == '{') {
16861d06d6bSBaptiste Daroussin tbl->part = TBL_PART_CDATA;
16961d06d6bSBaptiste Daroussin return;
17061d06d6bSBaptiste Daroussin }
17161d06d6bSBaptiste Daroussin
172*6d38604fSBaptiste Daroussin dat->string = mandoc_strndup(p + startpos, endpos - startpos);
17361d06d6bSBaptiste Daroussin
1747295610fSBaptiste Daroussin if (p[*pos] != '\0')
17561d06d6bSBaptiste Daroussin (*pos)++;
17661d06d6bSBaptiste Daroussin
17761d06d6bSBaptiste Daroussin if ( ! strcmp(dat->string, "_"))
17861d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_HORIZ;
17961d06d6bSBaptiste Daroussin else if ( ! strcmp(dat->string, "="))
18061d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_DHORIZ;
18161d06d6bSBaptiste Daroussin else if ( ! strcmp(dat->string, "\\_"))
18261d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_NHORIZ;
18361d06d6bSBaptiste Daroussin else if ( ! strcmp(dat->string, "\\="))
18461d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_NDHORIZ;
18561d06d6bSBaptiste Daroussin else
18661d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_DATA;
18761d06d6bSBaptiste Daroussin
18861d06d6bSBaptiste Daroussin if ((dat->layout->pos == TBL_CELL_HORIZ ||
18961d06d6bSBaptiste Daroussin dat->layout->pos == TBL_CELL_DHORIZ ||
19061d06d6bSBaptiste Daroussin dat->layout->pos == TBL_CELL_DOWN) &&
19161d06d6bSBaptiste Daroussin dat->pos == TBL_DATA_DATA && *dat->string != '\0')
19261d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_TBLDATA_SPAN,
193*6d38604fSBaptiste Daroussin ln, startpos, "%s", dat->string);
19461d06d6bSBaptiste Daroussin }
19561d06d6bSBaptiste Daroussin
19661d06d6bSBaptiste Daroussin void
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)19761d06d6bSBaptiste Daroussin tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
19861d06d6bSBaptiste Daroussin {
19961d06d6bSBaptiste Daroussin struct tbl_dat *dat;
20061d06d6bSBaptiste Daroussin size_t sz;
20161d06d6bSBaptiste Daroussin
20261d06d6bSBaptiste Daroussin dat = tbl->last_span->last;
20361d06d6bSBaptiste Daroussin
20461d06d6bSBaptiste Daroussin if (p[pos] == 'T' && p[pos + 1] == '}') {
20561d06d6bSBaptiste Daroussin pos += 2;
206*6d38604fSBaptiste Daroussin if (tbl->opts.opts & TBL_OPT_NOSPACE)
207*6d38604fSBaptiste Daroussin while (p[pos] == ' ')
208*6d38604fSBaptiste Daroussin pos++;
20961d06d6bSBaptiste Daroussin if (p[pos] == tbl->opts.tab) {
21061d06d6bSBaptiste Daroussin tbl->part = TBL_PART_DATA;
21161d06d6bSBaptiste Daroussin pos++;
21261d06d6bSBaptiste Daroussin while (p[pos] != '\0')
21361d06d6bSBaptiste Daroussin getdata(tbl, tbl->last_span, ln, p, &pos);
21461d06d6bSBaptiste Daroussin return;
21561d06d6bSBaptiste Daroussin } else if (p[pos] == '\0') {
21661d06d6bSBaptiste Daroussin tbl->part = TBL_PART_DATA;
21761d06d6bSBaptiste Daroussin return;
21861d06d6bSBaptiste Daroussin }
21961d06d6bSBaptiste Daroussin
22061d06d6bSBaptiste Daroussin /* Fallthrough: T} is part of a word. */
22161d06d6bSBaptiste Daroussin }
22261d06d6bSBaptiste Daroussin
22361d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_DATA;
22461d06d6bSBaptiste Daroussin dat->block = 1;
22561d06d6bSBaptiste Daroussin
22661d06d6bSBaptiste Daroussin if (dat->string != NULL) {
22761d06d6bSBaptiste Daroussin sz = strlen(p + pos) + strlen(dat->string) + 2;
22861d06d6bSBaptiste Daroussin dat->string = mandoc_realloc(dat->string, sz);
22961d06d6bSBaptiste Daroussin (void)strlcat(dat->string, " ", sz);
23061d06d6bSBaptiste Daroussin (void)strlcat(dat->string, p + pos, sz);
23161d06d6bSBaptiste Daroussin } else
23261d06d6bSBaptiste Daroussin dat->string = mandoc_strdup(p + pos);
23361d06d6bSBaptiste Daroussin
23461d06d6bSBaptiste Daroussin if (dat->layout->pos == TBL_CELL_DOWN)
2357295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_TBLDATA_SPAN,
2367295610fSBaptiste Daroussin ln, pos, "%s", dat->string);
23761d06d6bSBaptiste Daroussin }
23861d06d6bSBaptiste Daroussin
23961d06d6bSBaptiste Daroussin static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)24061d06d6bSBaptiste Daroussin newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
24161d06d6bSBaptiste Daroussin {
24261d06d6bSBaptiste Daroussin struct tbl_span *dp;
24361d06d6bSBaptiste Daroussin
24461d06d6bSBaptiste Daroussin dp = mandoc_calloc(1, sizeof(*dp));
24561d06d6bSBaptiste Daroussin dp->line = line;
24661d06d6bSBaptiste Daroussin dp->opts = &tbl->opts;
24761d06d6bSBaptiste Daroussin dp->layout = rp;
24861d06d6bSBaptiste Daroussin dp->prev = tbl->last_span;
24961d06d6bSBaptiste Daroussin
25061d06d6bSBaptiste Daroussin if (dp->prev == NULL) {
25161d06d6bSBaptiste Daroussin tbl->first_span = dp;
25261d06d6bSBaptiste Daroussin tbl->current_span = NULL;
25361d06d6bSBaptiste Daroussin } else
25461d06d6bSBaptiste Daroussin dp->prev->next = dp;
25561d06d6bSBaptiste Daroussin tbl->last_span = dp;
25661d06d6bSBaptiste Daroussin
25761d06d6bSBaptiste Daroussin return dp;
25861d06d6bSBaptiste Daroussin }
25961d06d6bSBaptiste Daroussin
26061d06d6bSBaptiste Daroussin void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)26161d06d6bSBaptiste Daroussin tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
26261d06d6bSBaptiste Daroussin {
26361d06d6bSBaptiste Daroussin struct tbl_row *rp;
26461d06d6bSBaptiste Daroussin struct tbl_cell *cp;
26561d06d6bSBaptiste Daroussin struct tbl_span *sp;
26661d06d6bSBaptiste Daroussin
267*6d38604fSBaptiste Daroussin for (sp = tbl->last_span; sp != NULL; sp = sp->prev)
268*6d38604fSBaptiste Daroussin if (sp->pos == TBL_SPAN_DATA)
269*6d38604fSBaptiste Daroussin break;
270*6d38604fSBaptiste Daroussin rp = sp == NULL ? tbl->first_row :
271*6d38604fSBaptiste Daroussin sp->layout->next == NULL ? sp->layout : sp->layout->next;
27261d06d6bSBaptiste Daroussin assert(rp != NULL);
27361d06d6bSBaptiste Daroussin
2747295610fSBaptiste Daroussin if (p[1] == '\0') {
2757295610fSBaptiste Daroussin switch (p[0]) {
2767295610fSBaptiste Daroussin case '.':
2777295610fSBaptiste Daroussin /*
2787295610fSBaptiste Daroussin * Empty request lines must be handled here
2797295610fSBaptiste Daroussin * and cannot be discarded in roff_parseln()
2807295610fSBaptiste Daroussin * because in the layout section, they
2817295610fSBaptiste Daroussin * are significant and end the layout.
2827295610fSBaptiste Daroussin */
2837295610fSBaptiste Daroussin return;
2847295610fSBaptiste Daroussin case '_':
28561d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp);
28661d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_HORIZ;
28761d06d6bSBaptiste Daroussin return;
2887295610fSBaptiste Daroussin case '=':
28961d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp);
29061d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_DHORIZ;
29161d06d6bSBaptiste Daroussin return;
2927295610fSBaptiste Daroussin default:
2937295610fSBaptiste Daroussin break;
2947295610fSBaptiste Daroussin }
29561d06d6bSBaptiste Daroussin }
29661d06d6bSBaptiste Daroussin
29761d06d6bSBaptiste Daroussin /*
29861d06d6bSBaptiste Daroussin * If the layout row contains nothing but horizontal lines,
29961d06d6bSBaptiste Daroussin * allocate an empty span for it and assign the current span
30061d06d6bSBaptiste Daroussin * to the next layout row accepting data.
30161d06d6bSBaptiste Daroussin */
30261d06d6bSBaptiste Daroussin
30361d06d6bSBaptiste Daroussin while (rp->next != NULL) {
30461d06d6bSBaptiste Daroussin if (rp->last->col + 1 < tbl->opts.cols)
30561d06d6bSBaptiste Daroussin break;
30661d06d6bSBaptiste Daroussin for (cp = rp->first; cp != NULL; cp = cp->next)
30761d06d6bSBaptiste Daroussin if (cp->pos != TBL_CELL_HORIZ &&
30861d06d6bSBaptiste Daroussin cp->pos != TBL_CELL_DHORIZ)
30961d06d6bSBaptiste Daroussin break;
31061d06d6bSBaptiste Daroussin if (cp != NULL)
31161d06d6bSBaptiste Daroussin break;
31261d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp);
31361d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_DATA;
31461d06d6bSBaptiste Daroussin rp = rp->next;
31561d06d6bSBaptiste Daroussin }
31661d06d6bSBaptiste Daroussin
31761d06d6bSBaptiste Daroussin /* Process a real data row. */
31861d06d6bSBaptiste Daroussin
31961d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp);
32061d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_DATA;
32161d06d6bSBaptiste Daroussin while (p[pos] != '\0')
32261d06d6bSBaptiste Daroussin getdata(tbl, sp, ln, p, &pos);
32361d06d6bSBaptiste Daroussin }
324