1*ffb8ebfaSGarrett D'Amore /* $Id: tbl_data.c,v 1.27 2013/06/01 04:56:50 schwarze Exp $ */
232a712daSGarrett D'Amore /*
332a712daSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
432a712daSGarrett D'Amore * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
532a712daSGarrett D'Amore *
632a712daSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
732a712daSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
832a712daSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
932a712daSGarrett D'Amore *
1032a712daSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1132a712daSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1232a712daSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1332a712daSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1432a712daSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1532a712daSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1632a712daSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1732a712daSGarrett D'Amore */
1832a712daSGarrett D'Amore #ifdef HAVE_CONFIG_H
1932a712daSGarrett D'Amore #include "config.h"
2032a712daSGarrett D'Amore #endif
2132a712daSGarrett D'Amore
2232a712daSGarrett D'Amore #include <assert.h>
2332a712daSGarrett D'Amore #include <ctype.h>
2432a712daSGarrett D'Amore #include <stdlib.h>
2532a712daSGarrett D'Amore #include <string.h>
2632a712daSGarrett D'Amore #include <time.h>
2732a712daSGarrett D'Amore
2832a712daSGarrett D'Amore #include "mandoc.h"
2932a712daSGarrett D'Amore #include "libmandoc.h"
3032a712daSGarrett D'Amore #include "libroff.h"
3132a712daSGarrett D'Amore
3232a712daSGarrett D'Amore static int data(struct tbl_node *, struct tbl_span *,
3332a712daSGarrett D'Amore int, const char *, int *);
3432a712daSGarrett D'Amore static struct tbl_span *newspan(struct tbl_node *, int,
3532a712daSGarrett D'Amore struct tbl_row *);
3632a712daSGarrett D'Amore
3732a712daSGarrett D'Amore static int
data(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)3832a712daSGarrett D'Amore data(struct tbl_node *tbl, struct tbl_span *dp,
3932a712daSGarrett D'Amore int ln, const char *p, int *pos)
4032a712daSGarrett D'Amore {
4132a712daSGarrett D'Amore struct tbl_dat *dat;
4232a712daSGarrett D'Amore struct tbl_cell *cp;
4332a712daSGarrett D'Amore int sv, spans;
4432a712daSGarrett D'Amore
4532a712daSGarrett D'Amore cp = NULL;
4632a712daSGarrett D'Amore if (dp->last && dp->last->layout)
4732a712daSGarrett D'Amore cp = dp->last->layout->next;
4832a712daSGarrett D'Amore else if (NULL == dp->last)
4932a712daSGarrett D'Amore cp = dp->layout->first;
5032a712daSGarrett D'Amore
5132a712daSGarrett D'Amore /*
52*ffb8ebfaSGarrett D'Amore * Skip over spanners, since
5332a712daSGarrett D'Amore * we want to match data with data layout cells in the header.
5432a712daSGarrett D'Amore */
5532a712daSGarrett D'Amore
56*ffb8ebfaSGarrett D'Amore while (cp && TBL_CELL_SPAN == cp->pos)
5732a712daSGarrett D'Amore cp = cp->next;
5832a712daSGarrett D'Amore
5932a712daSGarrett D'Amore /*
6032a712daSGarrett D'Amore * Stop processing when we reach the end of the available layout
6132a712daSGarrett D'Amore * cells. This means that we have extra input.
6232a712daSGarrett D'Amore */
6332a712daSGarrett D'Amore
6432a712daSGarrett D'Amore if (NULL == cp) {
6532a712daSGarrett D'Amore mandoc_msg(MANDOCERR_TBLEXTRADAT,
6632a712daSGarrett D'Amore tbl->parse, ln, *pos, NULL);
6732a712daSGarrett D'Amore /* Skip to the end... */
6832a712daSGarrett D'Amore while (p[*pos])
6932a712daSGarrett D'Amore (*pos)++;
7032a712daSGarrett D'Amore return(1);
7132a712daSGarrett D'Amore }
7232a712daSGarrett D'Amore
7332a712daSGarrett D'Amore dat = mandoc_calloc(1, sizeof(struct tbl_dat));
7432a712daSGarrett D'Amore dat->layout = cp;
7532a712daSGarrett D'Amore dat->pos = TBL_DATA_NONE;
7632a712daSGarrett D'Amore
7732a712daSGarrett D'Amore assert(TBL_CELL_SPAN != cp->pos);
7832a712daSGarrett D'Amore
7932a712daSGarrett D'Amore for (spans = 0, cp = cp->next; cp; cp = cp->next)
8032a712daSGarrett D'Amore if (TBL_CELL_SPAN == cp->pos)
8132a712daSGarrett D'Amore spans++;
8232a712daSGarrett D'Amore else
8332a712daSGarrett D'Amore break;
8432a712daSGarrett D'Amore
8532a712daSGarrett D'Amore dat->spans = spans;
8632a712daSGarrett D'Amore
8732a712daSGarrett D'Amore if (dp->last) {
8832a712daSGarrett D'Amore dp->last->next = dat;
8932a712daSGarrett D'Amore dp->last = dat;
9032a712daSGarrett D'Amore } else
9132a712daSGarrett D'Amore dp->last = dp->first = dat;
9232a712daSGarrett D'Amore
9332a712daSGarrett D'Amore sv = *pos;
9432a712daSGarrett D'Amore while (p[*pos] && p[*pos] != tbl->opts.tab)
9532a712daSGarrett D'Amore (*pos)++;
9632a712daSGarrett D'Amore
9732a712daSGarrett D'Amore /*
9832a712daSGarrett D'Amore * Check for a continued-data scope opening. This consists of a
9932a712daSGarrett D'Amore * trailing `T{' at the end of the line. Subsequent lines,
10032a712daSGarrett D'Amore * until a standalone `T}', are included in our cell.
10132a712daSGarrett D'Amore */
10232a712daSGarrett D'Amore
10332a712daSGarrett D'Amore if (*pos - sv == 2 && 'T' == p[sv] && '{' == p[sv + 1]) {
10432a712daSGarrett D'Amore tbl->part = TBL_PART_CDATA;
105*ffb8ebfaSGarrett D'Amore return(1);
10632a712daSGarrett D'Amore }
10732a712daSGarrett D'Amore
10832a712daSGarrett D'Amore assert(*pos - sv >= 0);
10932a712daSGarrett D'Amore
11032a712daSGarrett D'Amore dat->string = mandoc_malloc((size_t)(*pos - sv + 1));
11132a712daSGarrett D'Amore memcpy(dat->string, &p[sv], (size_t)(*pos - sv));
11232a712daSGarrett D'Amore dat->string[*pos - sv] = '\0';
11332a712daSGarrett D'Amore
11432a712daSGarrett D'Amore if (p[*pos])
11532a712daSGarrett D'Amore (*pos)++;
11632a712daSGarrett D'Amore
11732a712daSGarrett D'Amore if ( ! strcmp(dat->string, "_"))
11832a712daSGarrett D'Amore dat->pos = TBL_DATA_HORIZ;
11932a712daSGarrett D'Amore else if ( ! strcmp(dat->string, "="))
12032a712daSGarrett D'Amore dat->pos = TBL_DATA_DHORIZ;
12132a712daSGarrett D'Amore else if ( ! strcmp(dat->string, "\\_"))
12232a712daSGarrett D'Amore dat->pos = TBL_DATA_NHORIZ;
12332a712daSGarrett D'Amore else if ( ! strcmp(dat->string, "\\="))
12432a712daSGarrett D'Amore dat->pos = TBL_DATA_NDHORIZ;
12532a712daSGarrett D'Amore else
12632a712daSGarrett D'Amore dat->pos = TBL_DATA_DATA;
12732a712daSGarrett D'Amore
12832a712daSGarrett D'Amore if (TBL_CELL_HORIZ == dat->layout->pos ||
12932a712daSGarrett D'Amore TBL_CELL_DHORIZ == dat->layout->pos ||
13032a712daSGarrett D'Amore TBL_CELL_DOWN == dat->layout->pos)
13132a712daSGarrett D'Amore if (TBL_DATA_DATA == dat->pos && '\0' != *dat->string)
13232a712daSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA,
13332a712daSGarrett D'Amore tbl->parse, ln, sv, NULL);
13432a712daSGarrett D'Amore
13532a712daSGarrett D'Amore return(1);
13632a712daSGarrett D'Amore }
13732a712daSGarrett D'Amore
13832a712daSGarrett D'Amore /* ARGSUSED */
13932a712daSGarrett D'Amore int
tbl_cdata(struct tbl_node * tbl,int ln,const char * p)14032a712daSGarrett D'Amore tbl_cdata(struct tbl_node *tbl, int ln, const char *p)
14132a712daSGarrett D'Amore {
14232a712daSGarrett D'Amore struct tbl_dat *dat;
14332a712daSGarrett D'Amore size_t sz;
14432a712daSGarrett D'Amore int pos;
14532a712daSGarrett D'Amore
14632a712daSGarrett D'Amore pos = 0;
14732a712daSGarrett D'Amore
14832a712daSGarrett D'Amore dat = tbl->last_span->last;
14932a712daSGarrett D'Amore
15032a712daSGarrett D'Amore if (p[pos] == 'T' && p[pos + 1] == '}') {
15132a712daSGarrett D'Amore pos += 2;
15232a712daSGarrett D'Amore if (p[pos] == tbl->opts.tab) {
15332a712daSGarrett D'Amore tbl->part = TBL_PART_DATA;
15432a712daSGarrett D'Amore pos++;
15532a712daSGarrett D'Amore return(data(tbl, tbl->last_span, ln, p, &pos));
15632a712daSGarrett D'Amore } else if ('\0' == p[pos]) {
15732a712daSGarrett D'Amore tbl->part = TBL_PART_DATA;
15832a712daSGarrett D'Amore return(1);
15932a712daSGarrett D'Amore }
16032a712daSGarrett D'Amore
16132a712daSGarrett D'Amore /* Fallthrough: T} is part of a word. */
16232a712daSGarrett D'Amore }
16332a712daSGarrett D'Amore
16432a712daSGarrett D'Amore dat->pos = TBL_DATA_DATA;
16532a712daSGarrett D'Amore
16632a712daSGarrett D'Amore if (dat->string) {
16732a712daSGarrett D'Amore sz = strlen(p) + strlen(dat->string) + 2;
16832a712daSGarrett D'Amore dat->string = mandoc_realloc(dat->string, sz);
16932a712daSGarrett D'Amore strlcat(dat->string, " ", sz);
17032a712daSGarrett D'Amore strlcat(dat->string, p, sz);
17132a712daSGarrett D'Amore } else
17232a712daSGarrett D'Amore dat->string = mandoc_strdup(p);
17332a712daSGarrett D'Amore
17432a712daSGarrett D'Amore if (TBL_CELL_DOWN == dat->layout->pos)
17532a712daSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA,
17632a712daSGarrett D'Amore tbl->parse, ln, pos, NULL);
17732a712daSGarrett D'Amore
17832a712daSGarrett D'Amore return(0);
17932a712daSGarrett D'Amore }
18032a712daSGarrett D'Amore
18132a712daSGarrett D'Amore static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)18232a712daSGarrett D'Amore newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
18332a712daSGarrett D'Amore {
18432a712daSGarrett D'Amore struct tbl_span *dp;
18532a712daSGarrett D'Amore
18632a712daSGarrett D'Amore dp = mandoc_calloc(1, sizeof(struct tbl_span));
18732a712daSGarrett D'Amore dp->line = line;
188*ffb8ebfaSGarrett D'Amore dp->opts = &tbl->opts;
18932a712daSGarrett D'Amore dp->layout = rp;
19032a712daSGarrett D'Amore dp->head = tbl->first_head;
19132a712daSGarrett D'Amore
19232a712daSGarrett D'Amore if (tbl->last_span) {
19332a712daSGarrett D'Amore tbl->last_span->next = dp;
19432a712daSGarrett D'Amore tbl->last_span = dp;
19532a712daSGarrett D'Amore } else {
19632a712daSGarrett D'Amore tbl->last_span = tbl->first_span = dp;
19732a712daSGarrett D'Amore tbl->current_span = NULL;
19832a712daSGarrett D'Amore dp->flags |= TBL_SPAN_FIRST;
19932a712daSGarrett D'Amore }
20032a712daSGarrett D'Amore
20132a712daSGarrett D'Amore return(dp);
20232a712daSGarrett D'Amore }
20332a712daSGarrett D'Amore
20432a712daSGarrett D'Amore int
tbl_data(struct tbl_node * tbl,int ln,const char * p)20532a712daSGarrett D'Amore tbl_data(struct tbl_node *tbl, int ln, const char *p)
20632a712daSGarrett D'Amore {
20732a712daSGarrett D'Amore struct tbl_span *dp;
20832a712daSGarrett D'Amore struct tbl_row *rp;
20932a712daSGarrett D'Amore int pos;
21032a712daSGarrett D'Amore
21132a712daSGarrett D'Amore pos = 0;
21232a712daSGarrett D'Amore
21332a712daSGarrett D'Amore if ('\0' == p[pos]) {
21432a712daSGarrett D'Amore mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, pos, NULL);
21532a712daSGarrett D'Amore return(0);
21632a712daSGarrett D'Amore }
21732a712daSGarrett D'Amore
21832a712daSGarrett D'Amore /*
21932a712daSGarrett D'Amore * Choose a layout row: take the one following the last parsed
22032a712daSGarrett D'Amore * span's. If that doesn't exist, use the last parsed span's.
22132a712daSGarrett D'Amore * If there's no last parsed span, use the first row. Lastly,
22232a712daSGarrett D'Amore * if the last span was a horizontal line, use the same layout
22332a712daSGarrett D'Amore * (it doesn't "consume" the layout).
22432a712daSGarrett D'Amore */
22532a712daSGarrett D'Amore
22632a712daSGarrett D'Amore if (tbl->last_span) {
22732a712daSGarrett D'Amore assert(tbl->last_span->layout);
22832a712daSGarrett D'Amore if (tbl->last_span->pos == TBL_SPAN_DATA) {
22932a712daSGarrett D'Amore for (rp = tbl->last_span->layout->next;
23032a712daSGarrett D'Amore rp && rp->first; rp = rp->next) {
23132a712daSGarrett D'Amore switch (rp->first->pos) {
23232a712daSGarrett D'Amore case (TBL_CELL_HORIZ):
23332a712daSGarrett D'Amore dp = newspan(tbl, ln, rp);
23432a712daSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ;
23532a712daSGarrett D'Amore continue;
23632a712daSGarrett D'Amore case (TBL_CELL_DHORIZ):
23732a712daSGarrett D'Amore dp = newspan(tbl, ln, rp);
23832a712daSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ;
23932a712daSGarrett D'Amore continue;
24032a712daSGarrett D'Amore default:
24132a712daSGarrett D'Amore break;
24232a712daSGarrett D'Amore }
24332a712daSGarrett D'Amore break;
24432a712daSGarrett D'Amore }
24532a712daSGarrett D'Amore } else
24632a712daSGarrett D'Amore rp = tbl->last_span->layout;
24732a712daSGarrett D'Amore
24832a712daSGarrett D'Amore if (NULL == rp)
24932a712daSGarrett D'Amore rp = tbl->last_span->layout;
25032a712daSGarrett D'Amore } else
25132a712daSGarrett D'Amore rp = tbl->first_row;
25232a712daSGarrett D'Amore
25332a712daSGarrett D'Amore assert(rp);
25432a712daSGarrett D'Amore
25532a712daSGarrett D'Amore dp = newspan(tbl, ln, rp);
25632a712daSGarrett D'Amore
25732a712daSGarrett D'Amore if ( ! strcmp(p, "_")) {
25832a712daSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ;
25932a712daSGarrett D'Amore return(1);
26032a712daSGarrett D'Amore } else if ( ! strcmp(p, "=")) {
26132a712daSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ;
26232a712daSGarrett D'Amore return(1);
26332a712daSGarrett D'Amore }
26432a712daSGarrett D'Amore
26532a712daSGarrett D'Amore dp->pos = TBL_SPAN_DATA;
26632a712daSGarrett D'Amore
26732a712daSGarrett D'Amore /* This returns 0 when TBL_PART_CDATA is entered. */
26832a712daSGarrett D'Amore
26932a712daSGarrett D'Amore while ('\0' != p[pos])
27032a712daSGarrett D'Amore if ( ! data(tbl, dp, ln, p, &pos))
27132a712daSGarrett D'Amore return(0);
27232a712daSGarrett D'Amore
27332a712daSGarrett D'Amore return(1);
27432a712daSGarrett D'Amore }
275