1*698f87a4SGarrett D'Amore /* $Id: tbl_data.c,v 1.27 2013/06/01 04:56:50 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
495c635efSGarrett D'Amore * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
595c635efSGarrett D'Amore *
695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore *
1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1795c635efSGarrett D'Amore */
1895c635efSGarrett D'Amore #ifdef HAVE_CONFIG_H
1995c635efSGarrett D'Amore #include "config.h"
2095c635efSGarrett D'Amore #endif
2195c635efSGarrett D'Amore
2295c635efSGarrett D'Amore #include <assert.h>
2395c635efSGarrett D'Amore #include <ctype.h>
2495c635efSGarrett D'Amore #include <stdlib.h>
2595c635efSGarrett D'Amore #include <string.h>
2695c635efSGarrett D'Amore #include <time.h>
2795c635efSGarrett D'Amore
2895c635efSGarrett D'Amore #include "mandoc.h"
2995c635efSGarrett D'Amore #include "libmandoc.h"
3095c635efSGarrett D'Amore #include "libroff.h"
3195c635efSGarrett D'Amore
3295c635efSGarrett D'Amore static int data(struct tbl_node *, struct tbl_span *,
3395c635efSGarrett D'Amore int, const char *, int *);
3495c635efSGarrett D'Amore static struct tbl_span *newspan(struct tbl_node *, int,
3595c635efSGarrett D'Amore struct tbl_row *);
3695c635efSGarrett D'Amore
3795c635efSGarrett D'Amore static int
data(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)3895c635efSGarrett D'Amore data(struct tbl_node *tbl, struct tbl_span *dp,
3995c635efSGarrett D'Amore int ln, const char *p, int *pos)
4095c635efSGarrett D'Amore {
4195c635efSGarrett D'Amore struct tbl_dat *dat;
4295c635efSGarrett D'Amore struct tbl_cell *cp;
4395c635efSGarrett D'Amore int sv, spans;
4495c635efSGarrett D'Amore
4595c635efSGarrett D'Amore cp = NULL;
4695c635efSGarrett D'Amore if (dp->last && dp->last->layout)
4795c635efSGarrett D'Amore cp = dp->last->layout->next;
4895c635efSGarrett D'Amore else if (NULL == dp->last)
4995c635efSGarrett D'Amore cp = dp->layout->first;
5095c635efSGarrett D'Amore
5195c635efSGarrett D'Amore /*
52*698f87a4SGarrett D'Amore * Skip over spanners, since
5395c635efSGarrett D'Amore * we want to match data with data layout cells in the header.
5495c635efSGarrett D'Amore */
5595c635efSGarrett D'Amore
56*698f87a4SGarrett D'Amore while (cp && TBL_CELL_SPAN == cp->pos)
5795c635efSGarrett D'Amore cp = cp->next;
5895c635efSGarrett D'Amore
5995c635efSGarrett D'Amore /*
6095c635efSGarrett D'Amore * Stop processing when we reach the end of the available layout
6195c635efSGarrett D'Amore * cells. This means that we have extra input.
6295c635efSGarrett D'Amore */
6395c635efSGarrett D'Amore
6495c635efSGarrett D'Amore if (NULL == cp) {
6595c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLEXTRADAT,
6695c635efSGarrett D'Amore tbl->parse, ln, *pos, NULL);
6795c635efSGarrett D'Amore /* Skip to the end... */
6895c635efSGarrett D'Amore while (p[*pos])
6995c635efSGarrett D'Amore (*pos)++;
7095c635efSGarrett D'Amore return(1);
7195c635efSGarrett D'Amore }
7295c635efSGarrett D'Amore
7395c635efSGarrett D'Amore dat = mandoc_calloc(1, sizeof(struct tbl_dat));
7495c635efSGarrett D'Amore dat->layout = cp;
7595c635efSGarrett D'Amore dat->pos = TBL_DATA_NONE;
7695c635efSGarrett D'Amore
7795c635efSGarrett D'Amore assert(TBL_CELL_SPAN != cp->pos);
7895c635efSGarrett D'Amore
7995c635efSGarrett D'Amore for (spans = 0, cp = cp->next; cp; cp = cp->next)
8095c635efSGarrett D'Amore if (TBL_CELL_SPAN == cp->pos)
8195c635efSGarrett D'Amore spans++;
8295c635efSGarrett D'Amore else
8395c635efSGarrett D'Amore break;
8495c635efSGarrett D'Amore
8595c635efSGarrett D'Amore dat->spans = spans;
8695c635efSGarrett D'Amore
8795c635efSGarrett D'Amore if (dp->last) {
8895c635efSGarrett D'Amore dp->last->next = dat;
8995c635efSGarrett D'Amore dp->last = dat;
9095c635efSGarrett D'Amore } else
9195c635efSGarrett D'Amore dp->last = dp->first = dat;
9295c635efSGarrett D'Amore
9395c635efSGarrett D'Amore sv = *pos;
9495c635efSGarrett D'Amore while (p[*pos] && p[*pos] != tbl->opts.tab)
9595c635efSGarrett D'Amore (*pos)++;
9695c635efSGarrett D'Amore
9795c635efSGarrett D'Amore /*
9895c635efSGarrett D'Amore * Check for a continued-data scope opening. This consists of a
9995c635efSGarrett D'Amore * trailing `T{' at the end of the line. Subsequent lines,
10095c635efSGarrett D'Amore * until a standalone `T}', are included in our cell.
10195c635efSGarrett D'Amore */
10295c635efSGarrett D'Amore
10395c635efSGarrett D'Amore if (*pos - sv == 2 && 'T' == p[sv] && '{' == p[sv + 1]) {
10495c635efSGarrett D'Amore tbl->part = TBL_PART_CDATA;
105*698f87a4SGarrett D'Amore return(1);
10695c635efSGarrett D'Amore }
10795c635efSGarrett D'Amore
10895c635efSGarrett D'Amore assert(*pos - sv >= 0);
10995c635efSGarrett D'Amore
11095c635efSGarrett D'Amore dat->string = mandoc_malloc((size_t)(*pos - sv + 1));
11195c635efSGarrett D'Amore memcpy(dat->string, &p[sv], (size_t)(*pos - sv));
11295c635efSGarrett D'Amore dat->string[*pos - sv] = '\0';
11395c635efSGarrett D'Amore
11495c635efSGarrett D'Amore if (p[*pos])
11595c635efSGarrett D'Amore (*pos)++;
11695c635efSGarrett D'Amore
11795c635efSGarrett D'Amore if ( ! strcmp(dat->string, "_"))
11895c635efSGarrett D'Amore dat->pos = TBL_DATA_HORIZ;
11995c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "="))
12095c635efSGarrett D'Amore dat->pos = TBL_DATA_DHORIZ;
12195c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\_"))
12295c635efSGarrett D'Amore dat->pos = TBL_DATA_NHORIZ;
12395c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\="))
12495c635efSGarrett D'Amore dat->pos = TBL_DATA_NDHORIZ;
12595c635efSGarrett D'Amore else
12695c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA;
12795c635efSGarrett D'Amore
12895c635efSGarrett D'Amore if (TBL_CELL_HORIZ == dat->layout->pos ||
12995c635efSGarrett D'Amore TBL_CELL_DHORIZ == dat->layout->pos ||
13095c635efSGarrett D'Amore TBL_CELL_DOWN == dat->layout->pos)
13195c635efSGarrett D'Amore if (TBL_DATA_DATA == dat->pos && '\0' != *dat->string)
13295c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA,
13395c635efSGarrett D'Amore tbl->parse, ln, sv, NULL);
13495c635efSGarrett D'Amore
13595c635efSGarrett D'Amore return(1);
13695c635efSGarrett D'Amore }
13795c635efSGarrett D'Amore
13895c635efSGarrett D'Amore /* ARGSUSED */
13995c635efSGarrett D'Amore int
tbl_cdata(struct tbl_node * tbl,int ln,const char * p)14095c635efSGarrett D'Amore tbl_cdata(struct tbl_node *tbl, int ln, const char *p)
14195c635efSGarrett D'Amore {
14295c635efSGarrett D'Amore struct tbl_dat *dat;
14395c635efSGarrett D'Amore size_t sz;
14495c635efSGarrett D'Amore int pos;
14595c635efSGarrett D'Amore
14695c635efSGarrett D'Amore pos = 0;
14795c635efSGarrett D'Amore
14895c635efSGarrett D'Amore dat = tbl->last_span->last;
14995c635efSGarrett D'Amore
15095c635efSGarrett D'Amore if (p[pos] == 'T' && p[pos + 1] == '}') {
15195c635efSGarrett D'Amore pos += 2;
15295c635efSGarrett D'Amore if (p[pos] == tbl->opts.tab) {
15395c635efSGarrett D'Amore tbl->part = TBL_PART_DATA;
15495c635efSGarrett D'Amore pos++;
15595c635efSGarrett D'Amore return(data(tbl, tbl->last_span, ln, p, &pos));
15695c635efSGarrett D'Amore } else if ('\0' == p[pos]) {
15795c635efSGarrett D'Amore tbl->part = TBL_PART_DATA;
15895c635efSGarrett D'Amore return(1);
15995c635efSGarrett D'Amore }
16095c635efSGarrett D'Amore
16195c635efSGarrett D'Amore /* Fallthrough: T} is part of a word. */
16295c635efSGarrett D'Amore }
16395c635efSGarrett D'Amore
16495c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA;
16595c635efSGarrett D'Amore
16695c635efSGarrett D'Amore if (dat->string) {
16795c635efSGarrett D'Amore sz = strlen(p) + strlen(dat->string) + 2;
16895c635efSGarrett D'Amore dat->string = mandoc_realloc(dat->string, sz);
16995c635efSGarrett D'Amore strlcat(dat->string, " ", sz);
17095c635efSGarrett D'Amore strlcat(dat->string, p, sz);
17195c635efSGarrett D'Amore } else
17295c635efSGarrett D'Amore dat->string = mandoc_strdup(p);
17395c635efSGarrett D'Amore
17495c635efSGarrett D'Amore if (TBL_CELL_DOWN == dat->layout->pos)
17595c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA,
17695c635efSGarrett D'Amore tbl->parse, ln, pos, NULL);
17795c635efSGarrett D'Amore
17895c635efSGarrett D'Amore return(0);
17995c635efSGarrett D'Amore }
18095c635efSGarrett D'Amore
18195c635efSGarrett D'Amore static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)18295c635efSGarrett D'Amore newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
18395c635efSGarrett D'Amore {
18495c635efSGarrett D'Amore struct tbl_span *dp;
18595c635efSGarrett D'Amore
18695c635efSGarrett D'Amore dp = mandoc_calloc(1, sizeof(struct tbl_span));
18795c635efSGarrett D'Amore dp->line = line;
188*698f87a4SGarrett D'Amore dp->opts = &tbl->opts;
18995c635efSGarrett D'Amore dp->layout = rp;
19095c635efSGarrett D'Amore dp->head = tbl->first_head;
19195c635efSGarrett D'Amore
19295c635efSGarrett D'Amore if (tbl->last_span) {
19395c635efSGarrett D'Amore tbl->last_span->next = dp;
19495c635efSGarrett D'Amore tbl->last_span = dp;
19595c635efSGarrett D'Amore } else {
19695c635efSGarrett D'Amore tbl->last_span = tbl->first_span = dp;
19795c635efSGarrett D'Amore tbl->current_span = NULL;
19895c635efSGarrett D'Amore dp->flags |= TBL_SPAN_FIRST;
19995c635efSGarrett D'Amore }
20095c635efSGarrett D'Amore
20195c635efSGarrett D'Amore return(dp);
20295c635efSGarrett D'Amore }
20395c635efSGarrett D'Amore
20495c635efSGarrett D'Amore int
tbl_data(struct tbl_node * tbl,int ln,const char * p)20595c635efSGarrett D'Amore tbl_data(struct tbl_node *tbl, int ln, const char *p)
20695c635efSGarrett D'Amore {
20795c635efSGarrett D'Amore struct tbl_span *dp;
20895c635efSGarrett D'Amore struct tbl_row *rp;
20995c635efSGarrett D'Amore int pos;
21095c635efSGarrett D'Amore
21195c635efSGarrett D'Amore pos = 0;
21295c635efSGarrett D'Amore
21395c635efSGarrett D'Amore if ('\0' == p[pos]) {
21495c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, pos, NULL);
21595c635efSGarrett D'Amore return(0);
21695c635efSGarrett D'Amore }
21795c635efSGarrett D'Amore
21895c635efSGarrett D'Amore /*
21995c635efSGarrett D'Amore * Choose a layout row: take the one following the last parsed
22095c635efSGarrett D'Amore * span's. If that doesn't exist, use the last parsed span's.
22195c635efSGarrett D'Amore * If there's no last parsed span, use the first row. Lastly,
22295c635efSGarrett D'Amore * if the last span was a horizontal line, use the same layout
22395c635efSGarrett D'Amore * (it doesn't "consume" the layout).
22495c635efSGarrett D'Amore */
22595c635efSGarrett D'Amore
22695c635efSGarrett D'Amore if (tbl->last_span) {
22795c635efSGarrett D'Amore assert(tbl->last_span->layout);
22895c635efSGarrett D'Amore if (tbl->last_span->pos == TBL_SPAN_DATA) {
22995c635efSGarrett D'Amore for (rp = tbl->last_span->layout->next;
23095c635efSGarrett D'Amore rp && rp->first; rp = rp->next) {
23195c635efSGarrett D'Amore switch (rp->first->pos) {
23295c635efSGarrett D'Amore case (TBL_CELL_HORIZ):
23395c635efSGarrett D'Amore dp = newspan(tbl, ln, rp);
23495c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ;
23595c635efSGarrett D'Amore continue;
23695c635efSGarrett D'Amore case (TBL_CELL_DHORIZ):
23795c635efSGarrett D'Amore dp = newspan(tbl, ln, rp);
23895c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ;
23995c635efSGarrett D'Amore continue;
24095c635efSGarrett D'Amore default:
24195c635efSGarrett D'Amore break;
24295c635efSGarrett D'Amore }
24395c635efSGarrett D'Amore break;
24495c635efSGarrett D'Amore }
24595c635efSGarrett D'Amore } else
24695c635efSGarrett D'Amore rp = tbl->last_span->layout;
24795c635efSGarrett D'Amore
24895c635efSGarrett D'Amore if (NULL == rp)
24995c635efSGarrett D'Amore rp = tbl->last_span->layout;
25095c635efSGarrett D'Amore } else
25195c635efSGarrett D'Amore rp = tbl->first_row;
25295c635efSGarrett D'Amore
25395c635efSGarrett D'Amore assert(rp);
25495c635efSGarrett D'Amore
25595c635efSGarrett D'Amore dp = newspan(tbl, ln, rp);
25695c635efSGarrett D'Amore
25795c635efSGarrett D'Amore if ( ! strcmp(p, "_")) {
25895c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ;
25995c635efSGarrett D'Amore return(1);
26095c635efSGarrett D'Amore } else if ( ! strcmp(p, "=")) {
26195c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ;
26295c635efSGarrett D'Amore return(1);
26395c635efSGarrett D'Amore }
26495c635efSGarrett D'Amore
26595c635efSGarrett D'Amore dp->pos = TBL_SPAN_DATA;
26695c635efSGarrett D'Amore
26795c635efSGarrett D'Amore /* This returns 0 when TBL_PART_CDATA is entered. */
26895c635efSGarrett D'Amore
26995c635efSGarrett D'Amore while ('\0' != p[pos])
27095c635efSGarrett D'Amore if ( ! data(tbl, dp, ln, p, &pos))
27195c635efSGarrett D'Amore return(0);
27295c635efSGarrett D'Amore
27395c635efSGarrett D'Amore return(1);
27495c635efSGarrett D'Amore }
275