1*260e9a87SYuri Pankov /* $Id: tbl_data.c,v 1.39 2015/01/30 17:32:16 schwarze Exp $ */ 295c635efSGarrett D'Amore /* 395c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*260e9a87SYuri Pankov * Copyright (c) 2011, 2015 Ingo Schwarze <schwarze@openbsd.org> 595c635efSGarrett D'Amore * 695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 995c635efSGarrett D'Amore * 1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1795c635efSGarrett D'Amore */ 1895c635efSGarrett D'Amore #include "config.h" 19*260e9a87SYuri Pankov 20*260e9a87SYuri Pankov #include <sys/types.h> 2195c635efSGarrett D'Amore 2295c635efSGarrett D'Amore #include <assert.h> 2395c635efSGarrett D'Amore #include <ctype.h> 2495c635efSGarrett D'Amore #include <stdlib.h> 2595c635efSGarrett D'Amore #include <string.h> 2695c635efSGarrett D'Amore #include <time.h> 2795c635efSGarrett D'Amore 2895c635efSGarrett D'Amore #include "mandoc.h" 29*260e9a87SYuri Pankov #include "mandoc_aux.h" 3095c635efSGarrett D'Amore #include "libmandoc.h" 3195c635efSGarrett D'Amore #include "libroff.h" 3295c635efSGarrett D'Amore 33*260e9a87SYuri Pankov static void getdata(struct tbl_node *, struct tbl_span *, 3495c635efSGarrett D'Amore int, const char *, int *); 3595c635efSGarrett D'Amore static struct tbl_span *newspan(struct tbl_node *, int, 3695c635efSGarrett D'Amore struct tbl_row *); 3795c635efSGarrett D'Amore 38*260e9a87SYuri Pankov 39*260e9a87SYuri Pankov static void 40*260e9a87SYuri Pankov getdata(struct tbl_node *tbl, struct tbl_span *dp, 4195c635efSGarrett D'Amore int ln, const char *p, int *pos) 4295c635efSGarrett D'Amore { 4395c635efSGarrett D'Amore struct tbl_dat *dat; 4495c635efSGarrett D'Amore struct tbl_cell *cp; 45*260e9a87SYuri Pankov int sv; 4695c635efSGarrett D'Amore 47*260e9a87SYuri Pankov /* Advance to the next layout cell, skipping spanners. */ 4895c635efSGarrett D'Amore 49*260e9a87SYuri Pankov cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next; 50*260e9a87SYuri Pankov while (cp != NULL && cp->pos == TBL_CELL_SPAN) 5195c635efSGarrett D'Amore cp = cp->next; 5295c635efSGarrett D'Amore 5395c635efSGarrett D'Amore /* 5495c635efSGarrett D'Amore * Stop processing when we reach the end of the available layout 5595c635efSGarrett D'Amore * cells. This means that we have extra input. 5695c635efSGarrett D'Amore */ 5795c635efSGarrett D'Amore 58*260e9a87SYuri Pankov if (cp == NULL) { 59*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TBLDATA_EXTRA, tbl->parse, 60*260e9a87SYuri Pankov ln, *pos, p + *pos); 6195c635efSGarrett D'Amore /* Skip to the end... */ 6295c635efSGarrett D'Amore while (p[*pos]) 6395c635efSGarrett D'Amore (*pos)++; 64*260e9a87SYuri Pankov return; 6595c635efSGarrett D'Amore } 6695c635efSGarrett D'Amore 67*260e9a87SYuri Pankov dat = mandoc_calloc(1, sizeof(*dat)); 6895c635efSGarrett D'Amore dat->layout = cp; 6995c635efSGarrett D'Amore dat->pos = TBL_DATA_NONE; 70*260e9a87SYuri Pankov dat->spans = 0; 71*260e9a87SYuri Pankov for (cp = cp->next; cp != NULL; cp = cp->next) 72*260e9a87SYuri Pankov if (cp->pos == TBL_CELL_SPAN) 73*260e9a87SYuri Pankov dat->spans++; 7495c635efSGarrett D'Amore else 7595c635efSGarrett D'Amore break; 7695c635efSGarrett D'Amore 77*260e9a87SYuri Pankov if (dp->last == NULL) 78*260e9a87SYuri Pankov dp->first = dat; 79*260e9a87SYuri Pankov else 8095c635efSGarrett D'Amore dp->last->next = dat; 8195c635efSGarrett D'Amore dp->last = dat; 8295c635efSGarrett D'Amore 8395c635efSGarrett D'Amore sv = *pos; 8495c635efSGarrett D'Amore while (p[*pos] && p[*pos] != tbl->opts.tab) 8595c635efSGarrett D'Amore (*pos)++; 8695c635efSGarrett D'Amore 8795c635efSGarrett D'Amore /* 8895c635efSGarrett D'Amore * Check for a continued-data scope opening. This consists of a 8995c635efSGarrett D'Amore * trailing `T{' at the end of the line. Subsequent lines, 9095c635efSGarrett D'Amore * until a standalone `T}', are included in our cell. 9195c635efSGarrett D'Amore */ 9295c635efSGarrett D'Amore 93*260e9a87SYuri Pankov if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') { 9495c635efSGarrett D'Amore tbl->part = TBL_PART_CDATA; 95*260e9a87SYuri Pankov return; 9695c635efSGarrett D'Amore } 9795c635efSGarrett D'Amore 98*260e9a87SYuri Pankov dat->string = mandoc_strndup(p + sv, *pos - sv); 9995c635efSGarrett D'Amore 10095c635efSGarrett D'Amore if (p[*pos]) 10195c635efSGarrett D'Amore (*pos)++; 10295c635efSGarrett D'Amore 10395c635efSGarrett D'Amore if ( ! strcmp(dat->string, "_")) 10495c635efSGarrett D'Amore dat->pos = TBL_DATA_HORIZ; 10595c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "=")) 10695c635efSGarrett D'Amore dat->pos = TBL_DATA_DHORIZ; 10795c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\_")) 10895c635efSGarrett D'Amore dat->pos = TBL_DATA_NHORIZ; 10995c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\=")) 11095c635efSGarrett D'Amore dat->pos = TBL_DATA_NDHORIZ; 11195c635efSGarrett D'Amore else 11295c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA; 11395c635efSGarrett D'Amore 114*260e9a87SYuri Pankov if ((dat->layout->pos == TBL_CELL_HORIZ || 115*260e9a87SYuri Pankov dat->layout->pos == TBL_CELL_DHORIZ || 116*260e9a87SYuri Pankov dat->layout->pos == TBL_CELL_DOWN) && 117*260e9a87SYuri Pankov dat->pos == TBL_DATA_DATA && *dat->string != '\0') 118*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TBLDATA_SPAN, 119*260e9a87SYuri Pankov tbl->parse, ln, sv, dat->string); 12095c635efSGarrett D'Amore } 12195c635efSGarrett D'Amore 12295c635efSGarrett D'Amore int 123*260e9a87SYuri Pankov tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) 12495c635efSGarrett D'Amore { 12595c635efSGarrett D'Amore struct tbl_dat *dat; 12695c635efSGarrett D'Amore size_t sz; 12795c635efSGarrett D'Amore 12895c635efSGarrett D'Amore dat = tbl->last_span->last; 12995c635efSGarrett D'Amore 13095c635efSGarrett D'Amore if (p[pos] == 'T' && p[pos + 1] == '}') { 13195c635efSGarrett D'Amore pos += 2; 13295c635efSGarrett D'Amore if (p[pos] == tbl->opts.tab) { 13395c635efSGarrett D'Amore tbl->part = TBL_PART_DATA; 13495c635efSGarrett D'Amore pos++; 135*260e9a87SYuri Pankov getdata(tbl, tbl->last_span, ln, p, &pos); 136*260e9a87SYuri Pankov return(1); 137*260e9a87SYuri Pankov } else if (p[pos] == '\0') { 13895c635efSGarrett D'Amore tbl->part = TBL_PART_DATA; 13995c635efSGarrett D'Amore return(1); 14095c635efSGarrett D'Amore } 14195c635efSGarrett D'Amore 14295c635efSGarrett D'Amore /* Fallthrough: T} is part of a word. */ 14395c635efSGarrett D'Amore } 14495c635efSGarrett D'Amore 14595c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA; 14695c635efSGarrett D'Amore 147*260e9a87SYuri Pankov if (dat->string != NULL) { 148*260e9a87SYuri Pankov sz = strlen(p + pos) + strlen(dat->string) + 2; 14995c635efSGarrett D'Amore dat->string = mandoc_realloc(dat->string, sz); 150*260e9a87SYuri Pankov (void)strlcat(dat->string, " ", sz); 151*260e9a87SYuri Pankov (void)strlcat(dat->string, p + pos, sz); 15295c635efSGarrett D'Amore } else 153*260e9a87SYuri Pankov dat->string = mandoc_strdup(p + pos); 15495c635efSGarrett D'Amore 155*260e9a87SYuri Pankov if (dat->layout->pos == TBL_CELL_DOWN) 156*260e9a87SYuri Pankov mandoc_msg(MANDOCERR_TBLDATA_SPAN, tbl->parse, 157*260e9a87SYuri Pankov ln, pos, dat->string); 15895c635efSGarrett D'Amore 15995c635efSGarrett D'Amore return(0); 16095c635efSGarrett D'Amore } 16195c635efSGarrett D'Amore 16295c635efSGarrett D'Amore static struct tbl_span * 16395c635efSGarrett D'Amore newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) 16495c635efSGarrett D'Amore { 16595c635efSGarrett D'Amore struct tbl_span *dp; 16695c635efSGarrett D'Amore 167*260e9a87SYuri Pankov dp = mandoc_calloc(1, sizeof(*dp)); 16895c635efSGarrett D'Amore dp->line = line; 169698f87a4SGarrett D'Amore dp->opts = &tbl->opts; 17095c635efSGarrett D'Amore dp->layout = rp; 171*260e9a87SYuri Pankov dp->prev = tbl->last_span; 17295c635efSGarrett D'Amore 173*260e9a87SYuri Pankov if (dp->prev == NULL) { 174*260e9a87SYuri Pankov tbl->first_span = dp; 17595c635efSGarrett D'Amore tbl->current_span = NULL; 176*260e9a87SYuri Pankov } else 177*260e9a87SYuri Pankov dp->prev->next = dp; 178*260e9a87SYuri Pankov tbl->last_span = dp; 17995c635efSGarrett D'Amore 18095c635efSGarrett D'Amore return(dp); 18195c635efSGarrett D'Amore } 18295c635efSGarrett D'Amore 183*260e9a87SYuri Pankov void 184*260e9a87SYuri Pankov tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos) 18595c635efSGarrett D'Amore { 18695c635efSGarrett D'Amore struct tbl_span *dp; 18795c635efSGarrett D'Amore struct tbl_row *rp; 18895c635efSGarrett D'Amore 18995c635efSGarrett D'Amore /* 19095c635efSGarrett D'Amore * Choose a layout row: take the one following the last parsed 19195c635efSGarrett D'Amore * span's. If that doesn't exist, use the last parsed span's. 19295c635efSGarrett D'Amore * If there's no last parsed span, use the first row. Lastly, 19395c635efSGarrett D'Amore * if the last span was a horizontal line, use the same layout 19495c635efSGarrett D'Amore * (it doesn't "consume" the layout). 19595c635efSGarrett D'Amore */ 19695c635efSGarrett D'Amore 197*260e9a87SYuri Pankov if (tbl->last_span != NULL) { 19895c635efSGarrett D'Amore if (tbl->last_span->pos == TBL_SPAN_DATA) { 19995c635efSGarrett D'Amore for (rp = tbl->last_span->layout->next; 200*260e9a87SYuri Pankov rp != NULL && rp->first != NULL; 201*260e9a87SYuri Pankov rp = rp->next) { 20295c635efSGarrett D'Amore switch (rp->first->pos) { 203*260e9a87SYuri Pankov case TBL_CELL_HORIZ: 20495c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 20595c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ; 20695c635efSGarrett D'Amore continue; 207*260e9a87SYuri Pankov case TBL_CELL_DHORIZ: 20895c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 20995c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ; 21095c635efSGarrett D'Amore continue; 21195c635efSGarrett D'Amore default: 21295c635efSGarrett D'Amore break; 21395c635efSGarrett D'Amore } 21495c635efSGarrett D'Amore break; 21595c635efSGarrett D'Amore } 21695c635efSGarrett D'Amore } else 21795c635efSGarrett D'Amore rp = tbl->last_span->layout; 21895c635efSGarrett D'Amore 219*260e9a87SYuri Pankov if (rp == NULL) 22095c635efSGarrett D'Amore rp = tbl->last_span->layout; 22195c635efSGarrett D'Amore } else 22295c635efSGarrett D'Amore rp = tbl->first_row; 22395c635efSGarrett D'Amore 22495c635efSGarrett D'Amore assert(rp); 22595c635efSGarrett D'Amore 22695c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 22795c635efSGarrett D'Amore 22895c635efSGarrett D'Amore if ( ! strcmp(p, "_")) { 22995c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ; 230*260e9a87SYuri Pankov return; 23195c635efSGarrett D'Amore } else if ( ! strcmp(p, "=")) { 23295c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ; 233*260e9a87SYuri Pankov return; 23495c635efSGarrett D'Amore } 23595c635efSGarrett D'Amore 23695c635efSGarrett D'Amore dp->pos = TBL_SPAN_DATA; 23795c635efSGarrett D'Amore 238*260e9a87SYuri Pankov while (p[pos] != '\0') 239*260e9a87SYuri Pankov getdata(tbl, dp, ln, p, &pos); 24095c635efSGarrett D'Amore } 241