xref: /illumos-gate/usr/src/cmd/mandoc/tbl_data.c (revision 95c635efb7c3b86efc493e0447eaec7aecca3f0f)
1 /*	$Id: tbl_data.c,v 1.24 2011/03/20 16:02:05 kristaps Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2011 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "mandoc.h"
29 #include "libmandoc.h"
30 #include "libroff.h"
31 
32 static	int		 data(struct tbl_node *, struct tbl_span *,
33 				int, const char *, int *);
34 static	struct tbl_span	*newspan(struct tbl_node *, int,
35 				struct tbl_row *);
36 
37 static int
38 data(struct tbl_node *tbl, struct tbl_span *dp,
39 		int ln, const char *p, int *pos)
40 {
41 	struct tbl_dat	*dat;
42 	struct tbl_cell	*cp;
43 	int		 sv, spans;
44 
45 	cp = NULL;
46 	if (dp->last && dp->last->layout)
47 		cp = dp->last->layout->next;
48 	else if (NULL == dp->last)
49 		cp = dp->layout->first;
50 
51 	/*
52 	 * Skip over spanners and vertical lines to data formats, since
53 	 * we want to match data with data layout cells in the header.
54 	 */
55 
56 	while (cp && (TBL_CELL_VERT == cp->pos ||
57 				TBL_CELL_DVERT == cp->pos ||
58 				TBL_CELL_SPAN == cp->pos))
59 		cp = cp->next;
60 
61 	/*
62 	 * Stop processing when we reach the end of the available layout
63 	 * cells.  This means that we have extra input.
64 	 */
65 
66 	if (NULL == cp) {
67 		mandoc_msg(MANDOCERR_TBLEXTRADAT,
68 				tbl->parse, ln, *pos, NULL);
69 		/* Skip to the end... */
70 		while (p[*pos])
71 			(*pos)++;
72 		return(1);
73 	}
74 
75 	dat = mandoc_calloc(1, sizeof(struct tbl_dat));
76 	dat->layout = cp;
77 	dat->pos = TBL_DATA_NONE;
78 
79 	assert(TBL_CELL_SPAN != cp->pos);
80 
81 	for (spans = 0, cp = cp->next; cp; cp = cp->next)
82 		if (TBL_CELL_SPAN == cp->pos)
83 			spans++;
84 		else
85 			break;
86 
87 	dat->spans = spans;
88 
89 	if (dp->last) {
90 		dp->last->next = dat;
91 		dp->last = dat;
92 	} else
93 		dp->last = dp->first = dat;
94 
95 	sv = *pos;
96 	while (p[*pos] && p[*pos] != tbl->opts.tab)
97 		(*pos)++;
98 
99 	/*
100 	 * Check for a continued-data scope opening.  This consists of a
101 	 * trailing `T{' at the end of the line.  Subsequent lines,
102 	 * until a standalone `T}', are included in our cell.
103 	 */
104 
105 	if (*pos - sv == 2 && 'T' == p[sv] && '{' == p[sv + 1]) {
106 		tbl->part = TBL_PART_CDATA;
107 		return(0);
108 	}
109 
110 	assert(*pos - sv >= 0);
111 
112 	dat->string = mandoc_malloc((size_t)(*pos - sv + 1));
113 	memcpy(dat->string, &p[sv], (size_t)(*pos - sv));
114 	dat->string[*pos - sv] = '\0';
115 
116 	if (p[*pos])
117 		(*pos)++;
118 
119 	if ( ! strcmp(dat->string, "_"))
120 		dat->pos = TBL_DATA_HORIZ;
121 	else if ( ! strcmp(dat->string, "="))
122 		dat->pos = TBL_DATA_DHORIZ;
123 	else if ( ! strcmp(dat->string, "\\_"))
124 		dat->pos = TBL_DATA_NHORIZ;
125 	else if ( ! strcmp(dat->string, "\\="))
126 		dat->pos = TBL_DATA_NDHORIZ;
127 	else
128 		dat->pos = TBL_DATA_DATA;
129 
130 	if (TBL_CELL_HORIZ == dat->layout->pos ||
131 			TBL_CELL_DHORIZ == dat->layout->pos ||
132 			TBL_CELL_DOWN == dat->layout->pos)
133 		if (TBL_DATA_DATA == dat->pos && '\0' != *dat->string)
134 			mandoc_msg(MANDOCERR_TBLIGNDATA,
135 					tbl->parse, ln, sv, NULL);
136 
137 	return(1);
138 }
139 
140 /* ARGSUSED */
141 int
142 tbl_cdata(struct tbl_node *tbl, int ln, const char *p)
143 {
144 	struct tbl_dat	*dat;
145 	size_t	 	 sz;
146 	int		 pos;
147 
148 	pos = 0;
149 
150 	dat = tbl->last_span->last;
151 
152 	if (p[pos] == 'T' && p[pos + 1] == '}') {
153 		pos += 2;
154 		if (p[pos] == tbl->opts.tab) {
155 			tbl->part = TBL_PART_DATA;
156 			pos++;
157 			return(data(tbl, tbl->last_span, ln, p, &pos));
158 		} else if ('\0' == p[pos]) {
159 			tbl->part = TBL_PART_DATA;
160 			return(1);
161 		}
162 
163 		/* Fallthrough: T} is part of a word. */
164 	}
165 
166 	dat->pos = TBL_DATA_DATA;
167 
168 	if (dat->string) {
169 		sz = strlen(p) + strlen(dat->string) + 2;
170 		dat->string = mandoc_realloc(dat->string, sz);
171 		strlcat(dat->string, " ", sz);
172 		strlcat(dat->string, p, sz);
173 	} else
174 		dat->string = mandoc_strdup(p);
175 
176 	if (TBL_CELL_DOWN == dat->layout->pos)
177 		mandoc_msg(MANDOCERR_TBLIGNDATA,
178 				tbl->parse, ln, pos, NULL);
179 
180 	return(0);
181 }
182 
183 static struct tbl_span *
184 newspan(struct tbl_node *tbl, int line, struct tbl_row *rp)
185 {
186 	struct tbl_span	*dp;
187 
188 	dp = mandoc_calloc(1, sizeof(struct tbl_span));
189 	dp->line = line;
190 	dp->tbl = &tbl->opts;
191 	dp->layout = rp;
192 	dp->head = tbl->first_head;
193 
194 	if (tbl->last_span) {
195 		tbl->last_span->next = dp;
196 		tbl->last_span = dp;
197 	} else {
198 		tbl->last_span = tbl->first_span = dp;
199 		tbl->current_span = NULL;
200 		dp->flags |= TBL_SPAN_FIRST;
201 	}
202 
203 	return(dp);
204 }
205 
206 int
207 tbl_data(struct tbl_node *tbl, int ln, const char *p)
208 {
209 	struct tbl_span	*dp;
210 	struct tbl_row	*rp;
211 	int		 pos;
212 
213 	pos = 0;
214 
215 	if ('\0' == p[pos]) {
216 		mandoc_msg(MANDOCERR_TBL, tbl->parse, ln, pos, NULL);
217 		return(0);
218 	}
219 
220 	/*
221 	 * Choose a layout row: take the one following the last parsed
222 	 * span's.  If that doesn't exist, use the last parsed span's.
223 	 * If there's no last parsed span, use the first row.  Lastly,
224 	 * if the last span was a horizontal line, use the same layout
225 	 * (it doesn't "consume" the layout).
226 	 */
227 
228 	if (tbl->last_span) {
229 		assert(tbl->last_span->layout);
230 		if (tbl->last_span->pos == TBL_SPAN_DATA) {
231 			for (rp = tbl->last_span->layout->next;
232 					rp && rp->first; rp = rp->next) {
233 				switch (rp->first->pos) {
234 				case (TBL_CELL_HORIZ):
235 					dp = newspan(tbl, ln, rp);
236 					dp->pos = TBL_SPAN_HORIZ;
237 					continue;
238 				case (TBL_CELL_DHORIZ):
239 					dp = newspan(tbl, ln, rp);
240 					dp->pos = TBL_SPAN_DHORIZ;
241 					continue;
242 				default:
243 					break;
244 				}
245 				break;
246 			}
247 		} else
248 			rp = tbl->last_span->layout;
249 
250 		if (NULL == rp)
251 			rp = tbl->last_span->layout;
252 	} else
253 		rp = tbl->first_row;
254 
255 	assert(rp);
256 
257 	dp = newspan(tbl, ln, rp);
258 
259 	if ( ! strcmp(p, "_")) {
260 		dp->pos = TBL_SPAN_HORIZ;
261 		return(1);
262 	} else if ( ! strcmp(p, "=")) {
263 		dp->pos = TBL_SPAN_DHORIZ;
264 		return(1);
265 	}
266 
267 	dp->pos = TBL_SPAN_DATA;
268 
269 	/* This returns 0 when TBL_PART_CDATA is entered. */
270 
271 	while ('\0' != p[pos])
272 		if ( ! data(tbl, dp, ln, p, &pos))
273 			return(0);
274 
275 	return(1);
276 }
277