1*7295610fSBaptiste Daroussin /* $Id: tbl_data.c,v 1.52 2019/02/09 16:00:39 schwarze Exp $ */ 261d06d6bSBaptiste Daroussin /* 361d06d6bSBaptiste Daroussin * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*7295610fSBaptiste Daroussin * Copyright (c) 2011,2015,2017,2018,2019 Ingo Schwarze <schwarze@openbsd.org> 561d06d6bSBaptiste Daroussin * 661d06d6bSBaptiste Daroussin * Permission to use, copy, modify, and distribute this software for any 761d06d6bSBaptiste Daroussin * purpose with or without fee is hereby granted, provided that the above 861d06d6bSBaptiste Daroussin * copyright notice and this permission notice appear in all copies. 961d06d6bSBaptiste Daroussin * 1061d06d6bSBaptiste Daroussin * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1161d06d6bSBaptiste Daroussin * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1261d06d6bSBaptiste Daroussin * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1361d06d6bSBaptiste Daroussin * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1461d06d6bSBaptiste Daroussin * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1561d06d6bSBaptiste Daroussin * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1661d06d6bSBaptiste Daroussin * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1761d06d6bSBaptiste Daroussin */ 1861d06d6bSBaptiste Daroussin #include "config.h" 1961d06d6bSBaptiste Daroussin 2061d06d6bSBaptiste Daroussin #include <sys/types.h> 2161d06d6bSBaptiste Daroussin 2261d06d6bSBaptiste Daroussin #include <assert.h> 2361d06d6bSBaptiste Daroussin #include <ctype.h> 24*7295610fSBaptiste Daroussin #include <stdio.h> 2561d06d6bSBaptiste Daroussin #include <stdlib.h> 2661d06d6bSBaptiste Daroussin #include <string.h> 2761d06d6bSBaptiste Daroussin #include <time.h> 2861d06d6bSBaptiste Daroussin 2961d06d6bSBaptiste Daroussin #include "mandoc_aux.h" 30*7295610fSBaptiste Daroussin #include "mandoc.h" 31*7295610fSBaptiste Daroussin #include "tbl.h" 3261d06d6bSBaptiste Daroussin #include "libmandoc.h" 33*7295610fSBaptiste Daroussin #include "tbl_int.h" 3461d06d6bSBaptiste Daroussin 3561d06d6bSBaptiste Daroussin static void getdata(struct tbl_node *, struct tbl_span *, 3661d06d6bSBaptiste Daroussin int, const char *, int *); 3761d06d6bSBaptiste Daroussin static struct tbl_span *newspan(struct tbl_node *, int, 3861d06d6bSBaptiste Daroussin struct tbl_row *); 3961d06d6bSBaptiste Daroussin 4061d06d6bSBaptiste Daroussin 4161d06d6bSBaptiste Daroussin static void 4261d06d6bSBaptiste Daroussin getdata(struct tbl_node *tbl, struct tbl_span *dp, 4361d06d6bSBaptiste Daroussin int ln, const char *p, int *pos) 4461d06d6bSBaptiste Daroussin { 45*7295610fSBaptiste Daroussin struct tbl_dat *dat, *pdat; 4661d06d6bSBaptiste Daroussin struct tbl_cell *cp; 47*7295610fSBaptiste Daroussin struct tbl_span *pdp; 4861d06d6bSBaptiste Daroussin int sv; 4961d06d6bSBaptiste Daroussin 50*7295610fSBaptiste Daroussin /* 51*7295610fSBaptiste Daroussin * Determine the length of the string in the cell 52*7295610fSBaptiste Daroussin * and advance the parse point to the end of the cell. 53*7295610fSBaptiste Daroussin */ 54*7295610fSBaptiste Daroussin 55*7295610fSBaptiste Daroussin sv = *pos; 56*7295610fSBaptiste Daroussin while (p[*pos] != '\0' && p[*pos] != tbl->opts.tab) 57*7295610fSBaptiste Daroussin (*pos)++; 58*7295610fSBaptiste Daroussin 5961d06d6bSBaptiste Daroussin /* Advance to the next layout cell, skipping spanners. */ 6061d06d6bSBaptiste Daroussin 6161d06d6bSBaptiste Daroussin cp = dp->last == NULL ? dp->layout->first : dp->last->layout->next; 6261d06d6bSBaptiste Daroussin while (cp != NULL && cp->pos == TBL_CELL_SPAN) 6361d06d6bSBaptiste Daroussin cp = cp->next; 6461d06d6bSBaptiste Daroussin 6561d06d6bSBaptiste Daroussin /* 6661d06d6bSBaptiste Daroussin * If the current layout row is out of cells, allocate 6761d06d6bSBaptiste Daroussin * a new cell if another row of the table has at least 6861d06d6bSBaptiste Daroussin * this number of columns, or discard the input if we 6961d06d6bSBaptiste Daroussin * are beyond the last column of the table as a whole. 7061d06d6bSBaptiste Daroussin */ 7161d06d6bSBaptiste Daroussin 7261d06d6bSBaptiste Daroussin if (cp == NULL) { 7361d06d6bSBaptiste Daroussin if (dp->layout->last->col + 1 < dp->opts->cols) { 7461d06d6bSBaptiste Daroussin cp = mandoc_calloc(1, sizeof(*cp)); 7561d06d6bSBaptiste Daroussin cp->pos = TBL_CELL_LEFT; 7661d06d6bSBaptiste Daroussin dp->layout->last->next = cp; 7761d06d6bSBaptiste Daroussin cp->col = dp->layout->last->col + 1; 7861d06d6bSBaptiste Daroussin dp->layout->last = cp; 7961d06d6bSBaptiste Daroussin } else { 80*7295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_TBLDATA_EXTRA, 81*7295610fSBaptiste Daroussin ln, sv, "%s", p + sv); 82*7295610fSBaptiste Daroussin while (p[*pos] != '\0') 8361d06d6bSBaptiste Daroussin (*pos)++; 8461d06d6bSBaptiste Daroussin return; 8561d06d6bSBaptiste Daroussin } 8661d06d6bSBaptiste Daroussin } 8761d06d6bSBaptiste Daroussin 88*7295610fSBaptiste Daroussin dat = mandoc_malloc(sizeof(*dat)); 8961d06d6bSBaptiste Daroussin dat->layout = cp; 90*7295610fSBaptiste Daroussin dat->next = NULL; 91*7295610fSBaptiste Daroussin dat->string = NULL; 92*7295610fSBaptiste Daroussin dat->hspans = 0; 93*7295610fSBaptiste Daroussin dat->vspans = 0; 94*7295610fSBaptiste Daroussin dat->block = 0; 9561d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_NONE; 96*7295610fSBaptiste Daroussin 97*7295610fSBaptiste Daroussin /* 98*7295610fSBaptiste Daroussin * Increment the number of vertical spans in a data cell above, 99*7295610fSBaptiste Daroussin * if this cell vertically extends one or more cells above. 100*7295610fSBaptiste Daroussin * The iteration must be done over data rows, 101*7295610fSBaptiste Daroussin * not over layout rows, because one layout row 102*7295610fSBaptiste Daroussin * can be reused for more than one data row. 103*7295610fSBaptiste Daroussin */ 104*7295610fSBaptiste Daroussin 105*7295610fSBaptiste Daroussin if (cp->pos == TBL_CELL_DOWN || 106*7295610fSBaptiste Daroussin (*pos - sv == 2 && p[sv] == '\\' && p[sv + 1] == '^')) { 107*7295610fSBaptiste Daroussin pdp = dp; 108*7295610fSBaptiste Daroussin while ((pdp = pdp->prev) != NULL) { 109*7295610fSBaptiste Daroussin pdat = pdp->first; 110*7295610fSBaptiste Daroussin while (pdat != NULL && 111*7295610fSBaptiste Daroussin pdat->layout->col < dat->layout->col) 112*7295610fSBaptiste Daroussin pdat = pdat->next; 113*7295610fSBaptiste Daroussin if (pdat == NULL) 114*7295610fSBaptiste Daroussin break; 115*7295610fSBaptiste Daroussin if (pdat->layout->pos != TBL_CELL_DOWN && 116*7295610fSBaptiste Daroussin strcmp(pdat->string, "\\^") != 0) { 117*7295610fSBaptiste Daroussin pdat->vspans++; 118*7295610fSBaptiste Daroussin break; 119*7295610fSBaptiste Daroussin } 120*7295610fSBaptiste Daroussin } 121*7295610fSBaptiste Daroussin } 122*7295610fSBaptiste Daroussin 123*7295610fSBaptiste Daroussin /* 124*7295610fSBaptiste Daroussin * Count the number of horizontal spans to the right of this cell. 125*7295610fSBaptiste Daroussin * This is purely a matter of the layout, independent of the data. 126*7295610fSBaptiste Daroussin */ 127*7295610fSBaptiste Daroussin 12861d06d6bSBaptiste Daroussin for (cp = cp->next; cp != NULL; cp = cp->next) 12961d06d6bSBaptiste Daroussin if (cp->pos == TBL_CELL_SPAN) 130*7295610fSBaptiste Daroussin dat->hspans++; 13161d06d6bSBaptiste Daroussin else 13261d06d6bSBaptiste Daroussin break; 13361d06d6bSBaptiste Daroussin 134*7295610fSBaptiste Daroussin /* Append the new data cell to the data row. */ 135*7295610fSBaptiste Daroussin 13661d06d6bSBaptiste Daroussin if (dp->last == NULL) 13761d06d6bSBaptiste Daroussin dp->first = dat; 13861d06d6bSBaptiste Daroussin else 13961d06d6bSBaptiste Daroussin dp->last->next = dat; 14061d06d6bSBaptiste Daroussin dp->last = dat; 14161d06d6bSBaptiste Daroussin 14261d06d6bSBaptiste Daroussin /* 14361d06d6bSBaptiste Daroussin * Check for a continued-data scope opening. This consists of a 14461d06d6bSBaptiste Daroussin * trailing `T{' at the end of the line. Subsequent lines, 14561d06d6bSBaptiste Daroussin * until a standalone `T}', are included in our cell. 14661d06d6bSBaptiste Daroussin */ 14761d06d6bSBaptiste Daroussin 14861d06d6bSBaptiste Daroussin if (*pos - sv == 2 && p[sv] == 'T' && p[sv + 1] == '{') { 14961d06d6bSBaptiste Daroussin tbl->part = TBL_PART_CDATA; 15061d06d6bSBaptiste Daroussin return; 15161d06d6bSBaptiste Daroussin } 15261d06d6bSBaptiste Daroussin 15361d06d6bSBaptiste Daroussin dat->string = mandoc_strndup(p + sv, *pos - sv); 15461d06d6bSBaptiste Daroussin 155*7295610fSBaptiste Daroussin if (p[*pos] != '\0') 15661d06d6bSBaptiste Daroussin (*pos)++; 15761d06d6bSBaptiste Daroussin 15861d06d6bSBaptiste Daroussin if ( ! strcmp(dat->string, "_")) 15961d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_HORIZ; 16061d06d6bSBaptiste Daroussin else if ( ! strcmp(dat->string, "=")) 16161d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_DHORIZ; 16261d06d6bSBaptiste Daroussin else if ( ! strcmp(dat->string, "\\_")) 16361d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_NHORIZ; 16461d06d6bSBaptiste Daroussin else if ( ! strcmp(dat->string, "\\=")) 16561d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_NDHORIZ; 16661d06d6bSBaptiste Daroussin else 16761d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_DATA; 16861d06d6bSBaptiste Daroussin 16961d06d6bSBaptiste Daroussin if ((dat->layout->pos == TBL_CELL_HORIZ || 17061d06d6bSBaptiste Daroussin dat->layout->pos == TBL_CELL_DHORIZ || 17161d06d6bSBaptiste Daroussin dat->layout->pos == TBL_CELL_DOWN) && 17261d06d6bSBaptiste Daroussin dat->pos == TBL_DATA_DATA && *dat->string != '\0') 17361d06d6bSBaptiste Daroussin mandoc_msg(MANDOCERR_TBLDATA_SPAN, 174*7295610fSBaptiste Daroussin ln, sv, "%s", dat->string); 17561d06d6bSBaptiste Daroussin } 17661d06d6bSBaptiste Daroussin 17761d06d6bSBaptiste Daroussin void 17861d06d6bSBaptiste Daroussin tbl_cdata(struct tbl_node *tbl, int ln, const char *p, int pos) 17961d06d6bSBaptiste Daroussin { 18061d06d6bSBaptiste Daroussin struct tbl_dat *dat; 18161d06d6bSBaptiste Daroussin size_t sz; 18261d06d6bSBaptiste Daroussin 18361d06d6bSBaptiste Daroussin dat = tbl->last_span->last; 18461d06d6bSBaptiste Daroussin 18561d06d6bSBaptiste Daroussin if (p[pos] == 'T' && p[pos + 1] == '}') { 18661d06d6bSBaptiste Daroussin pos += 2; 18761d06d6bSBaptiste Daroussin if (p[pos] == tbl->opts.tab) { 18861d06d6bSBaptiste Daroussin tbl->part = TBL_PART_DATA; 18961d06d6bSBaptiste Daroussin pos++; 19061d06d6bSBaptiste Daroussin while (p[pos] != '\0') 19161d06d6bSBaptiste Daroussin getdata(tbl, tbl->last_span, ln, p, &pos); 19261d06d6bSBaptiste Daroussin return; 19361d06d6bSBaptiste Daroussin } else if (p[pos] == '\0') { 19461d06d6bSBaptiste Daroussin tbl->part = TBL_PART_DATA; 19561d06d6bSBaptiste Daroussin return; 19661d06d6bSBaptiste Daroussin } 19761d06d6bSBaptiste Daroussin 19861d06d6bSBaptiste Daroussin /* Fallthrough: T} is part of a word. */ 19961d06d6bSBaptiste Daroussin } 20061d06d6bSBaptiste Daroussin 20161d06d6bSBaptiste Daroussin dat->pos = TBL_DATA_DATA; 20261d06d6bSBaptiste Daroussin dat->block = 1; 20361d06d6bSBaptiste Daroussin 20461d06d6bSBaptiste Daroussin if (dat->string != NULL) { 20561d06d6bSBaptiste Daroussin sz = strlen(p + pos) + strlen(dat->string) + 2; 20661d06d6bSBaptiste Daroussin dat->string = mandoc_realloc(dat->string, sz); 20761d06d6bSBaptiste Daroussin (void)strlcat(dat->string, " ", sz); 20861d06d6bSBaptiste Daroussin (void)strlcat(dat->string, p + pos, sz); 20961d06d6bSBaptiste Daroussin } else 21061d06d6bSBaptiste Daroussin dat->string = mandoc_strdup(p + pos); 21161d06d6bSBaptiste Daroussin 21261d06d6bSBaptiste Daroussin if (dat->layout->pos == TBL_CELL_DOWN) 213*7295610fSBaptiste Daroussin mandoc_msg(MANDOCERR_TBLDATA_SPAN, 214*7295610fSBaptiste Daroussin ln, pos, "%s", dat->string); 21561d06d6bSBaptiste Daroussin } 21661d06d6bSBaptiste Daroussin 21761d06d6bSBaptiste Daroussin static struct tbl_span * 21861d06d6bSBaptiste Daroussin newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) 21961d06d6bSBaptiste Daroussin { 22061d06d6bSBaptiste Daroussin struct tbl_span *dp; 22161d06d6bSBaptiste Daroussin 22261d06d6bSBaptiste Daroussin dp = mandoc_calloc(1, sizeof(*dp)); 22361d06d6bSBaptiste Daroussin dp->line = line; 22461d06d6bSBaptiste Daroussin dp->opts = &tbl->opts; 22561d06d6bSBaptiste Daroussin dp->layout = rp; 22661d06d6bSBaptiste Daroussin dp->prev = tbl->last_span; 22761d06d6bSBaptiste Daroussin 22861d06d6bSBaptiste Daroussin if (dp->prev == NULL) { 22961d06d6bSBaptiste Daroussin tbl->first_span = dp; 23061d06d6bSBaptiste Daroussin tbl->current_span = NULL; 23161d06d6bSBaptiste Daroussin } else 23261d06d6bSBaptiste Daroussin dp->prev->next = dp; 23361d06d6bSBaptiste Daroussin tbl->last_span = dp; 23461d06d6bSBaptiste Daroussin 23561d06d6bSBaptiste Daroussin return dp; 23661d06d6bSBaptiste Daroussin } 23761d06d6bSBaptiste Daroussin 23861d06d6bSBaptiste Daroussin void 23961d06d6bSBaptiste Daroussin tbl_data(struct tbl_node *tbl, int ln, const char *p, int pos) 24061d06d6bSBaptiste Daroussin { 24161d06d6bSBaptiste Daroussin struct tbl_row *rp; 24261d06d6bSBaptiste Daroussin struct tbl_cell *cp; 24361d06d6bSBaptiste Daroussin struct tbl_span *sp; 24461d06d6bSBaptiste Daroussin 24561d06d6bSBaptiste Daroussin rp = (sp = tbl->last_span) == NULL ? tbl->first_row : 24661d06d6bSBaptiste Daroussin sp->pos == TBL_SPAN_DATA && sp->layout->next != NULL ? 24761d06d6bSBaptiste Daroussin sp->layout->next : sp->layout; 24861d06d6bSBaptiste Daroussin 24961d06d6bSBaptiste Daroussin assert(rp != NULL); 25061d06d6bSBaptiste Daroussin 251*7295610fSBaptiste Daroussin if (p[1] == '\0') { 252*7295610fSBaptiste Daroussin switch (p[0]) { 253*7295610fSBaptiste Daroussin case '.': 254*7295610fSBaptiste Daroussin /* 255*7295610fSBaptiste Daroussin * Empty request lines must be handled here 256*7295610fSBaptiste Daroussin * and cannot be discarded in roff_parseln() 257*7295610fSBaptiste Daroussin * because in the layout section, they 258*7295610fSBaptiste Daroussin * are significant and end the layout. 259*7295610fSBaptiste Daroussin */ 260*7295610fSBaptiste Daroussin return; 261*7295610fSBaptiste Daroussin case '_': 26261d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp); 26361d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_HORIZ; 26461d06d6bSBaptiste Daroussin return; 265*7295610fSBaptiste Daroussin case '=': 26661d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp); 26761d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_DHORIZ; 26861d06d6bSBaptiste Daroussin return; 269*7295610fSBaptiste Daroussin default: 270*7295610fSBaptiste Daroussin break; 271*7295610fSBaptiste Daroussin } 27261d06d6bSBaptiste Daroussin } 27361d06d6bSBaptiste Daroussin 27461d06d6bSBaptiste Daroussin /* 27561d06d6bSBaptiste Daroussin * If the layout row contains nothing but horizontal lines, 27661d06d6bSBaptiste Daroussin * allocate an empty span for it and assign the current span 27761d06d6bSBaptiste Daroussin * to the next layout row accepting data. 27861d06d6bSBaptiste Daroussin */ 27961d06d6bSBaptiste Daroussin 28061d06d6bSBaptiste Daroussin while (rp->next != NULL) { 28161d06d6bSBaptiste Daroussin if (rp->last->col + 1 < tbl->opts.cols) 28261d06d6bSBaptiste Daroussin break; 28361d06d6bSBaptiste Daroussin for (cp = rp->first; cp != NULL; cp = cp->next) 28461d06d6bSBaptiste Daroussin if (cp->pos != TBL_CELL_HORIZ && 28561d06d6bSBaptiste Daroussin cp->pos != TBL_CELL_DHORIZ) 28661d06d6bSBaptiste Daroussin break; 28761d06d6bSBaptiste Daroussin if (cp != NULL) 28861d06d6bSBaptiste Daroussin break; 28961d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp); 29061d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_DATA; 29161d06d6bSBaptiste Daroussin rp = rp->next; 29261d06d6bSBaptiste Daroussin } 29361d06d6bSBaptiste Daroussin 29461d06d6bSBaptiste Daroussin /* Process a real data row. */ 29561d06d6bSBaptiste Daroussin 29661d06d6bSBaptiste Daroussin sp = newspan(tbl, ln, rp); 29761d06d6bSBaptiste Daroussin sp->pos = TBL_SPAN_DATA; 29861d06d6bSBaptiste Daroussin while (p[pos] != '\0') 29961d06d6bSBaptiste Daroussin getdata(tbl, sp, ln, p, &pos); 30061d06d6bSBaptiste Daroussin } 301