1*95c635efSGarrett D'Amore /* $Id: tbl_data.c,v 1.24 2011/03/20 16:02:05 kristaps Exp $ */ 2*95c635efSGarrett D'Amore /* 3*95c635efSGarrett D'Amore * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4*95c635efSGarrett D'Amore * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org> 5*95c635efSGarrett D'Amore * 6*95c635efSGarrett D'Amore * Permission to use, copy, modify, and distribute this software for any 7*95c635efSGarrett D'Amore * purpose with or without fee is hereby granted, provided that the above 8*95c635efSGarrett D'Amore * copyright notice and this permission notice appear in all copies. 9*95c635efSGarrett D'Amore * 10*95c635efSGarrett D'Amore * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11*95c635efSGarrett D'Amore * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12*95c635efSGarrett D'Amore * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13*95c635efSGarrett D'Amore * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14*95c635efSGarrett D'Amore * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15*95c635efSGarrett D'Amore * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16*95c635efSGarrett D'Amore * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17*95c635efSGarrett D'Amore */ 18*95c635efSGarrett D'Amore #ifdef HAVE_CONFIG_H 19*95c635efSGarrett D'Amore #include "config.h" 20*95c635efSGarrett D'Amore #endif 21*95c635efSGarrett D'Amore 22*95c635efSGarrett D'Amore #include <assert.h> 23*95c635efSGarrett D'Amore #include <ctype.h> 24*95c635efSGarrett D'Amore #include <stdlib.h> 25*95c635efSGarrett D'Amore #include <string.h> 26*95c635efSGarrett D'Amore #include <time.h> 27*95c635efSGarrett D'Amore 28*95c635efSGarrett D'Amore #include "mandoc.h" 29*95c635efSGarrett D'Amore #include "libmandoc.h" 30*95c635efSGarrett D'Amore #include "libroff.h" 31*95c635efSGarrett D'Amore 32*95c635efSGarrett D'Amore static int data(struct tbl_node *, struct tbl_span *, 33*95c635efSGarrett D'Amore int, const char *, int *); 34*95c635efSGarrett D'Amore static struct tbl_span *newspan(struct tbl_node *, int, 35*95c635efSGarrett D'Amore struct tbl_row *); 36*95c635efSGarrett D'Amore 37*95c635efSGarrett D'Amore static int 38*95c635efSGarrett D'Amore data(struct tbl_node *tbl, struct tbl_span *dp, 39*95c635efSGarrett D'Amore int ln, const char *p, int *pos) 40*95c635efSGarrett D'Amore { 41*95c635efSGarrett D'Amore struct tbl_dat *dat; 42*95c635efSGarrett D'Amore struct tbl_cell *cp; 43*95c635efSGarrett D'Amore int sv, spans; 44*95c635efSGarrett D'Amore 45*95c635efSGarrett D'Amore cp = NULL; 46*95c635efSGarrett D'Amore if (dp->last && dp->last->layout) 47*95c635efSGarrett D'Amore cp = dp->last->layout->next; 48*95c635efSGarrett D'Amore else if (NULL == dp->last) 49*95c635efSGarrett D'Amore cp = dp->layout->first; 50*95c635efSGarrett D'Amore 51*95c635efSGarrett D'Amore /* 52*95c635efSGarrett D'Amore * Skip over spanners and vertical lines to data formats, since 53*95c635efSGarrett D'Amore * we want to match data with data layout cells in the header. 54*95c635efSGarrett D'Amore */ 55*95c635efSGarrett D'Amore 56*95c635efSGarrett D'Amore while (cp && (TBL_CELL_VERT == cp->pos || 57*95c635efSGarrett D'Amore TBL_CELL_DVERT == cp->pos || 58*95c635efSGarrett D'Amore TBL_CELL_SPAN == cp->pos)) 59*95c635efSGarrett D'Amore cp = cp->next; 60*95c635efSGarrett D'Amore 61*95c635efSGarrett D'Amore /* 62*95c635efSGarrett D'Amore * Stop processing when we reach the end of the available layout 63*95c635efSGarrett D'Amore * cells. This means that we have extra input. 64*95c635efSGarrett D'Amore */ 65*95c635efSGarrett D'Amore 66*95c635efSGarrett D'Amore if (NULL == cp) { 67*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLEXTRADAT, 68*95c635efSGarrett D'Amore tbl->parse, ln, *pos, NULL); 69*95c635efSGarrett D'Amore /* Skip to the end... */ 70*95c635efSGarrett D'Amore while (p[*pos]) 71*95c635efSGarrett D'Amore (*pos)++; 72*95c635efSGarrett D'Amore return(1); 73*95c635efSGarrett D'Amore } 74*95c635efSGarrett D'Amore 75*95c635efSGarrett D'Amore dat = mandoc_calloc(1, sizeof(struct tbl_dat)); 76*95c635efSGarrett D'Amore dat->layout = cp; 77*95c635efSGarrett D'Amore dat->pos = TBL_DATA_NONE; 78*95c635efSGarrett D'Amore 79*95c635efSGarrett D'Amore assert(TBL_CELL_SPAN != cp->pos); 80*95c635efSGarrett D'Amore 81*95c635efSGarrett D'Amore for (spans = 0, cp = cp->next; cp; cp = cp->next) 82*95c635efSGarrett D'Amore if (TBL_CELL_SPAN == cp->pos) 83*95c635efSGarrett D'Amore spans++; 84*95c635efSGarrett D'Amore else 85*95c635efSGarrett D'Amore break; 86*95c635efSGarrett D'Amore 87*95c635efSGarrett D'Amore dat->spans = spans; 88*95c635efSGarrett D'Amore 89*95c635efSGarrett D'Amore if (dp->last) { 90*95c635efSGarrett D'Amore dp->last->next = dat; 91*95c635efSGarrett D'Amore dp->last = dat; 92*95c635efSGarrett D'Amore } else 93*95c635efSGarrett D'Amore dp->last = dp->first = dat; 94*95c635efSGarrett D'Amore 95*95c635efSGarrett D'Amore sv = *pos; 96*95c635efSGarrett D'Amore while (p[*pos] && p[*pos] != tbl->opts.tab) 97*95c635efSGarrett D'Amore (*pos)++; 98*95c635efSGarrett D'Amore 99*95c635efSGarrett D'Amore /* 100*95c635efSGarrett D'Amore * Check for a continued-data scope opening. This consists of a 101*95c635efSGarrett D'Amore * trailing `T{' at the end of the line. Subsequent lines, 102*95c635efSGarrett D'Amore * until a standalone `T}', are included in our cell. 103*95c635efSGarrett D'Amore */ 104*95c635efSGarrett D'Amore 105*95c635efSGarrett D'Amore if (*pos - sv == 2 && 'T' == p[sv] && '{' == p[sv + 1]) { 106*95c635efSGarrett D'Amore tbl->part = TBL_PART_CDATA; 107*95c635efSGarrett D'Amore return(0); 108*95c635efSGarrett D'Amore } 109*95c635efSGarrett D'Amore 110*95c635efSGarrett D'Amore assert(*pos - sv >= 0); 111*95c635efSGarrett D'Amore 112*95c635efSGarrett D'Amore dat->string = mandoc_malloc((size_t)(*pos - sv + 1)); 113*95c635efSGarrett D'Amore memcpy(dat->string, &p[sv], (size_t)(*pos - sv)); 114*95c635efSGarrett D'Amore dat->string[*pos - sv] = '\0'; 115*95c635efSGarrett D'Amore 116*95c635efSGarrett D'Amore if (p[*pos]) 117*95c635efSGarrett D'Amore (*pos)++; 118*95c635efSGarrett D'Amore 119*95c635efSGarrett D'Amore if ( ! strcmp(dat->string, "_")) 120*95c635efSGarrett D'Amore dat->pos = TBL_DATA_HORIZ; 121*95c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "=")) 122*95c635efSGarrett D'Amore dat->pos = TBL_DATA_DHORIZ; 123*95c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\_")) 124*95c635efSGarrett D'Amore dat->pos = TBL_DATA_NHORIZ; 125*95c635efSGarrett D'Amore else if ( ! strcmp(dat->string, "\\=")) 126*95c635efSGarrett D'Amore dat->pos = TBL_DATA_NDHORIZ; 127*95c635efSGarrett D'Amore else 128*95c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA; 129*95c635efSGarrett D'Amore 130*95c635efSGarrett D'Amore if (TBL_CELL_HORIZ == dat->layout->pos || 131*95c635efSGarrett D'Amore TBL_CELL_DHORIZ == dat->layout->pos || 132*95c635efSGarrett D'Amore TBL_CELL_DOWN == dat->layout->pos) 133*95c635efSGarrett D'Amore if (TBL_DATA_DATA == dat->pos && '\0' != *dat->string) 134*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA, 135*95c635efSGarrett D'Amore tbl->parse, ln, sv, NULL); 136*95c635efSGarrett D'Amore 137*95c635efSGarrett D'Amore return(1); 138*95c635efSGarrett D'Amore } 139*95c635efSGarrett D'Amore 140*95c635efSGarrett D'Amore /* ARGSUSED */ 141*95c635efSGarrett D'Amore int 142*95c635efSGarrett D'Amore tbl_cdata(struct tbl_node *tbl, int ln, const char *p) 143*95c635efSGarrett D'Amore { 144*95c635efSGarrett D'Amore struct tbl_dat *dat; 145*95c635efSGarrett D'Amore size_t sz; 146*95c635efSGarrett D'Amore int pos; 147*95c635efSGarrett D'Amore 148*95c635efSGarrett D'Amore pos = 0; 149*95c635efSGarrett D'Amore 150*95c635efSGarrett D'Amore dat = tbl->last_span->last; 151*95c635efSGarrett D'Amore 152*95c635efSGarrett D'Amore if (p[pos] == 'T' && p[pos + 1] == '}') { 153*95c635efSGarrett D'Amore pos += 2; 154*95c635efSGarrett D'Amore if (p[pos] == tbl->opts.tab) { 155*95c635efSGarrett D'Amore tbl->part = TBL_PART_DATA; 156*95c635efSGarrett D'Amore pos++; 157*95c635efSGarrett D'Amore return(data(tbl, tbl->last_span, ln, p, &pos)); 158*95c635efSGarrett D'Amore } else if ('\0' == p[pos]) { 159*95c635efSGarrett D'Amore tbl->part = TBL_PART_DATA; 160*95c635efSGarrett D'Amore return(1); 161*95c635efSGarrett D'Amore } 162*95c635efSGarrett D'Amore 163*95c635efSGarrett D'Amore /* Fallthrough: T} is part of a word. */ 164*95c635efSGarrett D'Amore } 165*95c635efSGarrett D'Amore 166*95c635efSGarrett D'Amore dat->pos = TBL_DATA_DATA; 167*95c635efSGarrett D'Amore 168*95c635efSGarrett D'Amore if (dat->string) { 169*95c635efSGarrett D'Amore sz = strlen(p) + strlen(dat->string) + 2; 170*95c635efSGarrett D'Amore dat->string = mandoc_realloc(dat->string, sz); 171*95c635efSGarrett D'Amore strlcat(dat->string, " ", sz); 172*95c635efSGarrett D'Amore strlcat(dat->string, p, sz); 173*95c635efSGarrett D'Amore } else 174*95c635efSGarrett D'Amore dat->string = mandoc_strdup(p); 175*95c635efSGarrett D'Amore 176*95c635efSGarrett D'Amore if (TBL_CELL_DOWN == dat->layout->pos) 177*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBLIGNDATA, 178*95c635efSGarrett D'Amore tbl->parse, ln, pos, NULL); 179*95c635efSGarrett D'Amore 180*95c635efSGarrett D'Amore return(0); 181*95c635efSGarrett D'Amore } 182*95c635efSGarrett D'Amore 183*95c635efSGarrett D'Amore static struct tbl_span * 184*95c635efSGarrett D'Amore newspan(struct tbl_node *tbl, int line, struct tbl_row *rp) 185*95c635efSGarrett D'Amore { 186*95c635efSGarrett D'Amore struct tbl_span *dp; 187*95c635efSGarrett D'Amore 188*95c635efSGarrett D'Amore dp = mandoc_calloc(1, sizeof(struct tbl_span)); 189*95c635efSGarrett D'Amore dp->line = line; 190*95c635efSGarrett D'Amore dp->tbl = &tbl->opts; 191*95c635efSGarrett D'Amore dp->layout = rp; 192*95c635efSGarrett D'Amore dp->head = tbl->first_head; 193*95c635efSGarrett D'Amore 194*95c635efSGarrett D'Amore if (tbl->last_span) { 195*95c635efSGarrett D'Amore tbl->last_span->next = dp; 196*95c635efSGarrett D'Amore tbl->last_span = dp; 197*95c635efSGarrett D'Amore } else { 198*95c635efSGarrett D'Amore tbl->last_span = tbl->first_span = dp; 199*95c635efSGarrett D'Amore tbl->current_span = NULL; 200*95c635efSGarrett D'Amore dp->flags |= TBL_SPAN_FIRST; 201*95c635efSGarrett D'Amore } 202*95c635efSGarrett D'Amore 203*95c635efSGarrett D'Amore return(dp); 204*95c635efSGarrett D'Amore } 205*95c635efSGarrett D'Amore 206*95c635efSGarrett D'Amore int 207*95c635efSGarrett D'Amore tbl_data(struct tbl_node *tbl, int ln, const char *p) 208*95c635efSGarrett D'Amore { 209*95c635efSGarrett D'Amore struct tbl_span *dp; 210*95c635efSGarrett D'Amore struct tbl_row *rp; 211*95c635efSGarrett D'Amore int pos; 212*95c635efSGarrett D'Amore 213*95c635efSGarrett D'Amore pos = 0; 214*95c635efSGarrett D'Amore 215*95c635efSGarrett D'Amore if ('\0' == p[pos]) { 216*95c635efSGarrett D'Amore mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, pos, NULL); 217*95c635efSGarrett D'Amore return(0); 218*95c635efSGarrett D'Amore } 219*95c635efSGarrett D'Amore 220*95c635efSGarrett D'Amore /* 221*95c635efSGarrett D'Amore * Choose a layout row: take the one following the last parsed 222*95c635efSGarrett D'Amore * span's. If that doesn't exist, use the last parsed span's. 223*95c635efSGarrett D'Amore * If there's no last parsed span, use the first row. Lastly, 224*95c635efSGarrett D'Amore * if the last span was a horizontal line, use the same layout 225*95c635efSGarrett D'Amore * (it doesn't "consume" the layout). 226*95c635efSGarrett D'Amore */ 227*95c635efSGarrett D'Amore 228*95c635efSGarrett D'Amore if (tbl->last_span) { 229*95c635efSGarrett D'Amore assert(tbl->last_span->layout); 230*95c635efSGarrett D'Amore if (tbl->last_span->pos == TBL_SPAN_DATA) { 231*95c635efSGarrett D'Amore for (rp = tbl->last_span->layout->next; 232*95c635efSGarrett D'Amore rp && rp->first; rp = rp->next) { 233*95c635efSGarrett D'Amore switch (rp->first->pos) { 234*95c635efSGarrett D'Amore case (TBL_CELL_HORIZ): 235*95c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 236*95c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ; 237*95c635efSGarrett D'Amore continue; 238*95c635efSGarrett D'Amore case (TBL_CELL_DHORIZ): 239*95c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 240*95c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ; 241*95c635efSGarrett D'Amore continue; 242*95c635efSGarrett D'Amore default: 243*95c635efSGarrett D'Amore break; 244*95c635efSGarrett D'Amore } 245*95c635efSGarrett D'Amore break; 246*95c635efSGarrett D'Amore } 247*95c635efSGarrett D'Amore } else 248*95c635efSGarrett D'Amore rp = tbl->last_span->layout; 249*95c635efSGarrett D'Amore 250*95c635efSGarrett D'Amore if (NULL == rp) 251*95c635efSGarrett D'Amore rp = tbl->last_span->layout; 252*95c635efSGarrett D'Amore } else 253*95c635efSGarrett D'Amore rp = tbl->first_row; 254*95c635efSGarrett D'Amore 255*95c635efSGarrett D'Amore assert(rp); 256*95c635efSGarrett D'Amore 257*95c635efSGarrett D'Amore dp = newspan(tbl, ln, rp); 258*95c635efSGarrett D'Amore 259*95c635efSGarrett D'Amore if ( ! strcmp(p, "_")) { 260*95c635efSGarrett D'Amore dp->pos = TBL_SPAN_HORIZ; 261*95c635efSGarrett D'Amore return(1); 262*95c635efSGarrett D'Amore } else if ( ! strcmp(p, "=")) { 263*95c635efSGarrett D'Amore dp->pos = TBL_SPAN_DHORIZ; 264*95c635efSGarrett D'Amore return(1); 265*95c635efSGarrett D'Amore } 266*95c635efSGarrett D'Amore 267*95c635efSGarrett D'Amore dp->pos = TBL_SPAN_DATA; 268*95c635efSGarrett D'Amore 269*95c635efSGarrett D'Amore /* This returns 0 when TBL_PART_CDATA is entered. */ 270*95c635efSGarrett D'Amore 271*95c635efSGarrett D'Amore while ('\0' != p[pos]) 272*95c635efSGarrett D'Amore if ( ! data(tbl, dp, ln, p, &pos)) 273*95c635efSGarrett D'Amore return(0); 274*95c635efSGarrett D'Amore 275*95c635efSGarrett D'Amore return(1); 276*95c635efSGarrett D'Amore } 277