1*260e9a87SYuri Pankov /* $Id: tbl_data.c,v 1.39 2015/01/30 17:32:16 schwarze Exp $ */
295c635efSGarrett D'Amore /*
395c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4*260e9a87SYuri Pankov * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org>
595c635efSGarrett D'Amore *
695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any
795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above
895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies.
995c635efSGarrett D'Amore *
1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1795c635efSGarrett D'Amore */
1895c635efSGarrett D'Amore #include "config.h"
19*260e9a87SYuri Pankov
20*260e9a87SYuri Pankov #include <sys/types.h>
2195c635efSGarrett D'Amore
2295c635efSGarrett D'Amore #include <assert.h>
2395c635efSGarrett D'Amore #include <ctype.h>
2495c635efSGarrett D'Amore #include <stdlib.h>
2595c635efSGarrett D'Amore #include <string.h>
2695c635efSGarrett D'Amore #include <time.h>
2795c635efSGarrett D'Amore
2895c635efSGarrett D'Amore #include "mandoc.h"
29*260e9a87SYuri Pankov #include "mandoc_aux.h"
3095c635efSGarrett D'Amore #include "libmandoc.h"
3195c635efSGarrett D'Amore #include "libroff.h"
3295c635efSGarrett D'Amore
33*260e9a87SYuri Pankov static void getdata(struct tbl_node *, struct tbl_span *,
3495c635efSGarrett D'Amore int, const char *, int *);
3595c635efSGarrett D'Amore static struct tbl_span *newspan(struct tbl_node *, int,
3695c635efSGarrett D'Amore struct tbl_row *);
3795c635efSGarrett D'Amore
38*260e9a87SYuri Pankov
39*260e9a87SYuri Pankov static void
getdata(struct tbl_node * tbl,struct tbl_span * dp,int ln,const char * p,int * pos)40*260e9a87SYuri Pankov getdata(struct tbl_node *tbl, struct tbl_span *dp,
4195c635efSGarrett D'Amore int ln, const char *p, int *pos)
4295c635efSGarrett D'Amore {
4395c635efSGarrett D'Amore struct tbl_dat *dat;
4495c635efSGarrett D'Amore struct tbl_cell *cp;
45*260e9a87SYuri Pankov int sv;
4695c635efSGarrett D'Amore
47*260e9a87SYuri Pankov /* Advance to the next layout cell, skipping spanners. */
4895c635efSGarrett D'Amore
49*260e9a87SYuri Pankov cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next;
50*260e9a87SYuri Pankov while (cp != NULL && cp->pos == TBL_CELL_SPAN)
5195c635efSGarrett D'Amore cp = cp->next;
5295c635efSGarrett D'Amore
5395c635efSGarrett D'Amore /*
5495c635efSGarrett D'Amore * Stop processing when we reach the end of the available layout
5595c635efSGarrett D'Amore * cells. This means that we have extra input.
5695c635efSGarrett D'Amore */
5795c635efSGarrett D'Amore
58*260e9a87SYuri Pankov if (cp == NULL) {
59*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TBLDATA_EXTRA, tbl->parse,
60*260e9a87SYuri Pankov ln, *pos, p + *pos);
6195c635efSGarrett D'Amore /* Skip to the end... */
6295c635efSGarrett D'Amore while (p[*pos])
6395c635efSGarrett D'Amore (*pos)++;
64*260e9a87SYuri Pankov return;
6595c635efSGarrett D'Amore }
6695c635efSGarrett D'Amore
67*260e9a87SYuri Pankov dat = mandoc_calloc(1, sizeof(*dat));
6895c635efSGarrett D'Amore dat->layout = cp;
6995c635efSGarrett D'Amore dat->pos = TBL_DATA_NONE;
70*260e9a87SYuri Pankov dat->spans = 0;
71*260e9a87SYuri Pankov for (cp = cp->next; cp != NULL; cp = cp->next)
72*260e9a87SYuri Pankov if (cp->pos == TBL_CELL_SPAN)
73*260e9a87SYuri Pankov dat->spans++;
7495c635efSGarrett D'Amore else
7595c635efSGarrett D'Amore break;
7695c635efSGarrett D'Amore
77*260e9a87SYuri Pankov if (dp->last == NULL)
78*260e9a87SYuri Pankov dp->first = dat;
79*260e9a87SYuri Pankov else
8095c635efSGarrett D'Amore dp->last->next = dat;
8195c635efSGarrett D'Amore dp->last = dat;
8295c635efSGarrett D'Amore
8395c635efSGarrett D'Amore sv = *pos;
8495c635efSGarrett D'Amore while (p[*pos] && p[*pos] != tbl->opts.tab)
8595c635efSGarrett D'Amore (*pos)++;
8695c635efSGarrett D'Amore
8795c635efSGarrett D'Amore /*
8895c635efSGarrett D'Amore * Check for a continued-data scope opening. This consists of a
8995c635efSGarrett D'Amore * trailing `T{' at the end of the line. Subsequent lines,
9095c635efSGarrett D'Amore * until a standalone `T}', are included in our cell.
9195c635efSGarrett D'Amore */
9295c635efSGarrett D'Amore
93*260e9a87SYuri Pankov if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') {
9495c635efSGarrett D'Amore tbl->part = TBL_PART_CDATA;
95*260e9a87SYuri Pankov return;
9695c635efSGarrett D'Amore }
9795c635efSGarrett D'Amore
98*260e9a87SYuri Pankov dat->string = mandoc_strndup(p + sv, *pos - sv);
9995c635efSGarrett D'Amore
10095c635efSGarrett D'Amore if (p[*pos])
10195c635efSGarrett D'Amore (*pos)++;
10295c635efSGarrett D'Amore
10395c635efSGarrett D'Amore if ( ! strcmp(dat->string, "_"))
10495c635efSGarrett D'Amore dat->pos = TBL_DATA_HORIZ;
10595c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "="))
10695c635efSGarrett D'Amore dat->pos = TBL_DATA_DHORIZ;
10795c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\_"))
10895c635efSGarrett D'Amore dat->pos = TBL_DATA_NHORIZ;
10995c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\="))
11095c635efSGarrett D'Amore dat->pos = TBL_DATA_NDHORIZ;
11195c635efSGarrett D'Amore else
11295c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA;
11395c635efSGarrett D'Amore
114*260e9a87SYuri Pankov if ((dat->layout->pos == TBL_CELL_HORIZ ||
115*260e9a87SYuri Pankov dat->layout->pos == TBL_CELL_DHORIZ ||
116*260e9a87SYuri Pankov dat->layout->pos == TBL_CELL_DOWN) &&
117*260e9a87SYuri Pankov dat->pos == TBL_DATA_DATA && *dat->string != '\0')
118*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TBLDATA_SPAN,
119*260e9a87SYuri Pankov tbl->parse, ln, sv, dat->string);
12095c635efSGarrett D'Amore }
12195c635efSGarrett D'Amore
12295c635efSGarrett D'Amore int
tbl_cdata(struct tbl_node * tbl,int ln,const char * p,int pos)123*260e9a87SYuri Pankov tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos)
12495c635efSGarrett D'Amore {
12595c635efSGarrett D'Amore struct tbl_dat *dat;
12695c635efSGarrett D'Amore size_t sz;
12795c635efSGarrett D'Amore
12895c635efSGarrett D'Amore dat = tbl->last_span->last;
12995c635efSGarrett D'Amore
13095c635efSGarrett D'Amore if (p[pos] == 'T' && p[pos + 1] == '}') {
13195c635efSGarrett D'Amore pos += 2;
13295c635efSGarrett D'Amore if (p[pos] == tbl->opts.tab) {
13395c635efSGarrett D'Amore tbl->part = TBL_PART_DATA;
13495c635efSGarrett D'Amore pos++;
135*260e9a87SYuri Pankov getdata(tbl, tbl->last_span, ln, p, &pos);
136*260e9a87SYuri Pankov return(1);
137*260e9a87SYuri Pankov } else if (p[pos] == '\0') {
13895c635efSGarrett D'Amore tbl->part = TBL_PART_DATA;
13995c635efSGarrett D'Amore return(1);
14095c635efSGarrett D'Amore }
14195c635efSGarrett D'Amore
14295c635efSGarrett D'Amore /* Fallthrough: T} is part of a word. */
14395c635efSGarrett D'Amore }
14495c635efSGarrett D'Amore
14595c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA;
14695c635efSGarrett D'Amore
147*260e9a87SYuri Pankov if (dat->string != NULL) {
148*260e9a87SYuri Pankov sz = strlen(p + pos) + strlen(dat->string) + 2;
14995c635efSGarrett D'Amore dat->string = mandoc_realloc(dat->string, sz);
150*260e9a87SYuri Pankov (void)strlcat(dat->string, " ", sz);
151*260e9a87SYuri Pankov (void)strlcat(dat->string, p + pos, sz);
15295c635efSGarrett D'Amore } else
153*260e9a87SYuri Pankov dat->string = mandoc_strdup(p + pos);
15495c635efSGarrett D'Amore
155*260e9a87SYuri Pankov if (dat->layout->pos == TBL_CELL_DOWN)
156*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TBLDATA_SPAN, tbl->parse,
157*260e9a87SYuri Pankov ln, pos, dat->string);
15895c635efSGarrett D'Amore
15995c635efSGarrett D'Amore return(0);
16095c635efSGarrett D'Amore }
16195c635efSGarrett D'Amore
16295c635efSGarrett D'Amore static struct tbl_span *
newspan(struct tbl_node * tbl,int line,struct tbl_row * rp)16395c635efSGarrett D'Amore newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
16495c635efSGarrett D'Amore {
16595c635efSGarrett D'Amore struct tbl_span *dp;
16695c635efSGarrett D'Amore
167*260e9a87SYuri Pankov dp = mandoc_calloc(1, sizeof(*dp));
16895c635efSGarrett D'Amore dp->line = line;
169698f87a4SGarrett D'Amore dp->opts = &tbl->opts;
17095c635efSGarrett D'Amore dp->layout = rp;
171*260e9a87SYuri Pankov dp->prev = tbl->last_span;
17295c635efSGarrett D'Amore
173*260e9a87SYuri Pankov if (dp->prev == NULL) {
174*260e9a87SYuri Pankov tbl->first_span = dp;
17595c635efSGarrett D'Amore tbl->current_span = NULL;
176*260e9a87SYuri Pankov } else
177*260e9a87SYuri Pankov dp->prev->next = dp;
178*260e9a87SYuri Pankov tbl->last_span = dp;
17995c635efSGarrett D'Amore
18095c635efSGarrett D'Amore return(dp);
18195c635efSGarrett D'Amore }
18295c635efSGarrett D'Amore
183*260e9a87SYuri Pankov void
tbl_data(struct tbl_node * tbl,int ln,const char * p,int pos)184*260e9a87SYuri Pankov tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos)
18595c635efSGarrett D'Amore {
18695c635efSGarrett D'Amore struct tbl_span *dp;
18795c635efSGarrett D'Amore struct tbl_row *rp;
18895c635efSGarrett D'Amore
18995c635efSGarrett D'Amore /*
19095c635efSGarrett D'Amore * Choose a layout row: take the one following the last parsed
19195c635efSGarrett D'Amore * span's. If that doesn't exist, use the last parsed span's.
19295c635efSGarrett D'Amore * If there's no last parsed span, use the first row. Lastly,
19395c635efSGarrett D'Amore * if the last span was a horizontal line, use the same layout
19495c635efSGarrett D'Amore * (it doesn't "consume" the layout).
19595c635efSGarrett D'Amore */
19695c635efSGarrett D'Amore
197*260e9a87SYuri Pankov if (tbl->last_span != NULL) {
19895c635efSGarrett D'Amore if (tbl->last_span->pos == TBL_SPAN_DATA) {
19995c635efSGarrett D'Amore for (rp = tbl->last_span->layout->next;
200*260e9a87SYuri Pankov rp != NULL && rp->first != NULL;
201*260e9a87SYuri Pankov rp = rp->next) {
20295c635efSGarrett D'Amore switch (rp->first->pos) {
203*260e9a87SYuri Pankov case TBL_CELL_HORIZ:
20495c635efSGarrett D'Amore dp = newspan(tbl, ln, rp);
20595c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ;
20695c635efSGarrett D'Amore continue;
207*260e9a87SYuri Pankov case TBL_CELL_DHORIZ:
20895c635efSGarrett D'Amore dp = newspan(tbl, ln, rp);
20995c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ;
21095c635efSGarrett D'Amore continue;
21195c635efSGarrett D'Amore default:
21295c635efSGarrett D'Amore break;
21395c635efSGarrett D'Amore }
21495c635efSGarrett D'Amore break;
21595c635efSGarrett D'Amore }
21695c635efSGarrett D'Amore } else
21795c635efSGarrett D'Amore rp = tbl->last_span->layout;
21895c635efSGarrett D'Amore
219*260e9a87SYuri Pankov if (rp == NULL)
22095c635efSGarrett D'Amore rp = tbl->last_span->layout;
22195c635efSGarrett D'Amore } else
22295c635efSGarrett D'Amore rp = tbl->first_row;
22395c635efSGarrett D'Amore
22495c635efSGarrett D'Amore assert(rp);
22595c635efSGarrett D'Amore
22695c635efSGarrett D'Amore dp = newspan(tbl, ln, rp);
22795c635efSGarrett D'Amore
22895c635efSGarrett D'Amore if ( ! strcmp(p, "_")) {
22995c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ;
230*260e9a87SYuri Pankov return;
23195c635efSGarrett D'Amore } else if ( ! strcmp(p, "=")) {
23295c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ;
233*260e9a87SYuri Pankov return;
23495c635efSGarrett D'Amore }
23595c635efSGarrett D'Amore
23695c635efSGarrett D'Amore dp->pos = TBL_SPAN_DATA;
23795c635efSGarrett D'Amore
238*260e9a87SYuri Pankov while (p[pos] != '\0')
239*260e9a87SYuri Pankov getdata(tbl, dp, ln, p, &pos);
24095c635efSGarrett D'Amore }
241