1*698f87a4SGarrett D'Amore /* $Id: tbl_data.c,v 1.27 2013/06/01 04:56:50 schwarze Exp $ */ 295c635efSGarrett D'Amore /* 395c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 495c635efSGarrett D'Amore * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 595c635efSGarrett D'Amore * 695c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 795c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 895c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 995c635efSGarrett D'Amore * 1095c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 1195c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 1295c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 1395c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 1495c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 1595c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 1695c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 1795c635efSGarrett D'Amore */ 1895c635efSGarrett D'Amore #ifdef HAVE_CONFIG_H 1995c635efSGarrett D'Amore #include "config.h" 2095c635efSGarrett D'Amore #endif 2195c635efSGarrett D'Amore 2295c635efSGarrett D'Amore #include <assert.h> 2395c635efSGarrett D'Amore #include <ctype.h> 2495c635efSGarrett D'Amore #include <stdlib.h> 2595c635efSGarrett D'Amore #include <string.h> 2695c635efSGarrett D'Amore #include <time.h> 2795c635efSGarrett D'Amore 2895c635efSGarrett D'Amore #include "mandoc.h" 2995c635efSGarrett D'Amore #include "libmandoc.h" 3095c635efSGarrett D'Amore #include "libroff.h" 3195c635efSGarrett D'Amore 3295c635efSGarrett D'Amore static int data(struct tbl_node *, struct tbl_span *, 3395c635efSGarrett D'Amore int, const char *, int *); 3495c635efSGarrett D'Amore static struct tbl_span *newspan(struct tbl_node *, int, 3595c635efSGarrett D'Amore struct tbl_row *); 3695c635efSGarrett D'Amore 3795c635efSGarrett D'Amore static int 3895c635efSGarrett D'Amore data(struct tbl_node *tbl, struct tbl_span *dp, 3995c635efSGarrett D'Amore int ln, const char *p, int *pos) 4095c635efSGarrett D'Amore { 4195c635efSGarrett D'Amore struct tbl_dat *dat; 4295c635efSGarrett D'Amore struct tbl_cell *cp; 4395c635efSGarrett D'Amore int sv, spans; 4495c635efSGarrett D'Amore 4595c635efSGarrett D'Amore cp = NULL; 4695c635efSGarrett D'Amore if (dp->last && dp->last->layout) 4795c635efSGarrett D'Amore cp = dp->last->layout->next; 4895c635efSGarrett D'Amore else if (NULL == dp->last) 4995c635efSGarrett D'Amore cp = dp->layout->first; 5095c635efSGarrett D'Amore 5195c635efSGarrett D'Amore /* 52*698f87a4SGarrett D'Amore * Skip over spanners, since 5395c635efSGarrett D'Amore * we want to match data with data layout cells in the header. 5495c635efSGarrett D'Amore */ 5595c635efSGarrett D'Amore 56*698f87a4SGarrett D'Amore while (cp && TBL_CELL_SPAN == cp->pos) 5795c635efSGarrett D'Amore cp = cp->next; 5895c635efSGarrett D'Amore 5995c635efSGarrett D'Amore /* 6095c635efSGarrett D'Amore * Stop processing when we reach the end of the available layout 6195c635efSGarrett D'Amore * cells. This means that we have extra input. 6295c635efSGarrett D'Amore */ 6395c635efSGarrett D'Amore 6495c635efSGarrett D'Amore if (NULL == cp) { 6595c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLEXTRADAT, 6695c635efSGarrett D'Amore tbl->parse, ln, *pos, NULL); 6795c635efSGarrett D'Amore /* Skip to the end... */ 6895c635efSGarrett D'Amore while (p[*pos]) 6995c635efSGarrett D'Amore (*pos)++; 7095c635efSGarrett D'Amore return(1); 7195c635efSGarrett D'Amore } 7295c635efSGarrett D'Amore 7395c635efSGarrett D'Amore dat = mandoc_calloc(1, sizeof(struct tbl_dat)); 7495c635efSGarrett D'Amore dat->layout = cp; 7595c635efSGarrett D'Amore dat->pos = TBL_DATA_NONE; 7695c635efSGarrett D'Amore 7795c635efSGarrett D'Amore assert(TBL_CELL_SPAN != cp->pos); 7895c635efSGarrett D'Amore 7995c635efSGarrett D'Amore for (spans = 0, cp = cp->next; cp; cp = cp->next) 8095c635efSGarrett D'Amore if (TBL_CELL_SPAN == cp->pos) 8195c635efSGarrett D'Amore spans++; 8295c635efSGarrett D'Amore else 8395c635efSGarrett D'Amore break; 8495c635efSGarrett D'Amore 8595c635efSGarrett D'Amore dat->spans = spans; 8695c635efSGarrett D'Amore 8795c635efSGarrett D'Amore if (dp->last) { 8895c635efSGarrett D'Amore dp->last->next = dat; 8995c635efSGarrett D'Amore dp->last = dat; 9095c635efSGarrett D'Amore } else 9195c635efSGarrett D'Amore dp->last = dp->first = dat; 9295c635efSGarrett D'Amore 9395c635efSGarrett D'Amore sv = *pos; 9495c635efSGarrett D'Amore while (p[*pos] && p[*pos] != tbl->opts.tab) 9595c635efSGarrett D'Amore (*pos)++; 9695c635efSGarrett D'Amore 9795c635efSGarrett D'Amore /* 9895c635efSGarrett D'Amore * Check for a continued-data scope opening. This consists of a 9995c635efSGarrett D'Amore * trailing `T{' at the end of the line. Subsequent lines, 10095c635efSGarrett D'Amore * until a standalone `T}', are included in our cell. 10195c635efSGarrett D'Amore */ 10295c635efSGarrett D'Amore 10395c635efSGarrett D'Amore if (*pos - sv == 2 && 'T' == p[sv] && '{' == p[sv + 1]) { 10495c635efSGarrett D'Amore tbl->part = TBL_PART_CDATA; 105*698f87a4SGarrett D'Amore return(1); 10695c635efSGarrett D'Amore } 10795c635efSGarrett D'Amore 10895c635efSGarrett D'Amore assert(*pos - sv >= 0); 10995c635efSGarrett D'Amore 11095c635efSGarrett D'Amore dat->string = mandoc_malloc((size_t)(*pos - sv + 1)); 11195c635efSGarrett D'Amore memcpy(dat->string, &p[sv], (size_t)(*pos - sv)); 11295c635efSGarrett D'Amore dat->string[*pos - sv] = '\0'; 11395c635efSGarrett D'Amore 11495c635efSGarrett D'Amore if (p[*pos]) 11595c635efSGarrett D'Amore (*pos)++; 11695c635efSGarrett D'Amore 11795c635efSGarrett D'Amore if ( ! strcmp(dat->string, "_")) 11895c635efSGarrett D'Amore dat->pos = TBL_DATA_HORIZ; 11995c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "=")) 12095c635efSGarrett D'Amore dat->pos = TBL_DATA_DHORIZ; 12195c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\_")) 12295c635efSGarrett D'Amore dat->pos = TBL_DATA_NHORIZ; 12395c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\=")) 12495c635efSGarrett D'Amore dat->pos = TBL_DATA_NDHORIZ; 12595c635efSGarrett D'Amore else 12695c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA; 12795c635efSGarrett D'Amore 12895c635efSGarrett D'Amore if (TBL_CELL_HORIZ == dat->layout->pos || 12995c635efSGarrett D'Amore TBL_CELL_DHORIZ == dat->layout->pos || 13095c635efSGarrett D'Amore TBL_CELL_DOWN == dat->layout->pos) 13195c635efSGarrett D'Amore if (TBL_DATA_DATA == dat->pos && '\0' != *dat->string) 13295c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA, 13395c635efSGarrett D'Amore tbl->parse, ln, sv, NULL); 13495c635efSGarrett D'Amore 13595c635efSGarrett D'Amore return(1); 13695c635efSGarrett D'Amore } 13795c635efSGarrett D'Amore 13895c635efSGarrett D'Amore /* ARGSUSED */ 13995c635efSGarrett D'Amore int 14095c635efSGarrett D'Amore tbl_cdata(struct tbl_node *tbl, int ln, const char *p) 14195c635efSGarrett D'Amore { 14295c635efSGarrett D'Amore struct tbl_dat *dat; 14395c635efSGarrett D'Amore size_t sz; 14495c635efSGarrett D'Amore int pos; 14595c635efSGarrett D'Amore 14695c635efSGarrett D'Amore pos = 0; 14795c635efSGarrett D'Amore 14895c635efSGarrett D'Amore dat = tbl->last_span->last; 14995c635efSGarrett D'Amore 15095c635efSGarrett D'Amore if (p[pos] == 'T' && p[pos + 1] == '}') { 15195c635efSGarrett D'Amore pos += 2; 15295c635efSGarrett D'Amore if (p[pos] == tbl->opts.tab) { 15395c635efSGarrett D'Amore tbl->part = TBL_PART_DATA; 15495c635efSGarrett D'Amore pos++; 15595c635efSGarrett D'Amore return(data(tbl, tbl->last_span, ln, p, &pos)); 15695c635efSGarrett D'Amore } else if ('\0' == p[pos]) { 15795c635efSGarrett D'Amore tbl->part = TBL_PART_DATA; 15895c635efSGarrett D'Amore return(1); 15995c635efSGarrett D'Amore } 16095c635efSGarrett D'Amore 16195c635efSGarrett D'Amore /* Fallthrough: T} is part of a word. */ 16295c635efSGarrett D'Amore } 16395c635efSGarrett D'Amore 16495c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA; 16595c635efSGarrett D'Amore 16695c635efSGarrett D'Amore if (dat->string) { 16795c635efSGarrett D'Amore sz = strlen(p) + strlen(dat->string) + 2; 16895c635efSGarrett D'Amore dat->string = mandoc_realloc(dat->string, sz); 16995c635efSGarrett D'Amore strlcat(dat->string, " ", sz); 17095c635efSGarrett D'Amore strlcat(dat->string, p, sz); 17195c635efSGarrett D'Amore } else 17295c635efSGarrett D'Amore dat->string = mandoc_strdup(p); 17395c635efSGarrett D'Amore 17495c635efSGarrett D'Amore if (TBL_CELL_DOWN == dat->layout->pos) 17595c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA, 17695c635efSGarrett D'Amore tbl->parse, ln, pos, NULL); 17795c635efSGarrett D'Amore 17895c635efSGarrett D'Amore return(0); 17995c635efSGarrett D'Amore } 18095c635efSGarrett D'Amore 18195c635efSGarrett D'Amore static struct tbl_span * 18295c635efSGarrett D'Amore newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) 18395c635efSGarrett D'Amore { 18495c635efSGarrett D'Amore struct tbl_span *dp; 18595c635efSGarrett D'Amore 18695c635efSGarrett D'Amore dp = mandoc_calloc(1, sizeof(struct tbl_span)); 18795c635efSGarrett D'Amore dp->line = line; 188*698f87a4SGarrett D'Amore dp->opts = &tbl->opts; 18995c635efSGarrett D'Amore dp->layout = rp; 19095c635efSGarrett D'Amore dp->head = tbl->first_head; 19195c635efSGarrett D'Amore 19295c635efSGarrett D'Amore if (tbl->last_span) { 19395c635efSGarrett D'Amore tbl->last_span->next = dp; 19495c635efSGarrett D'Amore tbl->last_span = dp; 19595c635efSGarrett D'Amore } else { 19695c635efSGarrett D'Amore tbl->last_span = tbl->first_span = dp; 19795c635efSGarrett D'Amore tbl->current_span = NULL; 19895c635efSGarrett D'Amore dp->flags |= TBL_SPAN_FIRST; 19995c635efSGarrett D'Amore } 20095c635efSGarrett D'Amore 20195c635efSGarrett D'Amore return(dp); 20295c635efSGarrett D'Amore } 20395c635efSGarrett D'Amore 20495c635efSGarrett D'Amore int 20595c635efSGarrett D'Amore tbl_data(struct tbl_node *tbl, int ln, const char *p) 20695c635efSGarrett D'Amore { 20795c635efSGarrett D'Amore struct tbl_span *dp; 20895c635efSGarrett D'Amore struct tbl_row *rp; 20995c635efSGarrett D'Amore int pos; 21095c635efSGarrett D'Amore 21195c635efSGarrett D'Amore pos = 0; 21295c635efSGarrett D'Amore 21395c635efSGarrett D'Amore if ('\0' == p[pos]) { 21495c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, pos, NULL); 21595c635efSGarrett D'Amore return(0); 21695c635efSGarrett D'Amore } 21795c635efSGarrett D'Amore 21895c635efSGarrett D'Amore /* 21995c635efSGarrett D'Amore * Choose a layout row: take the one following the last parsed 22095c635efSGarrett D'Amore * span's. If that doesn't exist, use the last parsed span's. 22195c635efSGarrett D'Amore * If there's no last parsed span, use the first row. Lastly, 22295c635efSGarrett D'Amore * if the last span was a horizontal line, use the same layout 22395c635efSGarrett D'Amore * (it doesn't "consume" the layout). 22495c635efSGarrett D'Amore */ 22595c635efSGarrett D'Amore 22695c635efSGarrett D'Amore if (tbl->last_span) { 22795c635efSGarrett D'Amore assert(tbl->last_span->layout); 22895c635efSGarrett D'Amore if (tbl->last_span->pos == TBL_SPAN_DATA) { 22995c635efSGarrett D'Amore for (rp = tbl->last_span->layout->next; 23095c635efSGarrett D'Amore rp && rp->first; rp = rp->next) { 23195c635efSGarrett D'Amore switch (rp->first->pos) { 23295c635efSGarrett D'Amore case (TBL_CELL_HORIZ): 23395c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 23495c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ; 23595c635efSGarrett D'Amore continue; 23695c635efSGarrett D'Amore case (TBL_CELL_DHORIZ): 23795c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 23895c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ; 23995c635efSGarrett D'Amore continue; 24095c635efSGarrett D'Amore default: 24195c635efSGarrett D'Amore break; 24295c635efSGarrett D'Amore } 24395c635efSGarrett D'Amore break; 24495c635efSGarrett D'Amore } 24595c635efSGarrett D'Amore } else 24695c635efSGarrett D'Amore rp = tbl->last_span->layout; 24795c635efSGarrett D'Amore 24895c635efSGarrett D'Amore if (NULL == rp) 24995c635efSGarrett D'Amore rp = tbl->last_span->layout; 25095c635efSGarrett D'Amore } else 25195c635efSGarrett D'Amore rp = tbl->first_row; 25295c635efSGarrett D'Amore 25395c635efSGarrett D'Amore assert(rp); 25495c635efSGarrett D'Amore 25595c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 25695c635efSGarrett D'Amore 25795c635efSGarrett D'Amore if ( ! strcmp(p, "_")) { 25895c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ; 25995c635efSGarrett D'Amore return(1); 26095c635efSGarrett D'Amore } else if ( ! strcmp(p, "=")) { 26195c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ; 26295c635efSGarrett D'Amore return(1); 26395c635efSGarrett D'Amore } 26495c635efSGarrett D'Amore 26595c635efSGarrett D'Amore dp->pos = TBL_SPAN_DATA; 26695c635efSGarrett D'Amore 26795c635efSGarrett D'Amore /* This returns 0 when TBL_PART_CDATA is entered. */ 26895c635efSGarrett D'Amore 26995c635efSGarrett D'Amore while ('\0' != p[pos]) 27095c635efSGarrett D'Amore if ( ! data(tbl, dp, ln, p, &pos)) 27195c635efSGarrett D'Amore return(0); 27295c635efSGarrett D'Amore 27395c635efSGarrett D'Amore return(1); 27495c635efSGarrett D'Amore } 275