xref: /illumos-gate/usr/src/cmd/mandoc/tbl_layout.c (revision c124a83e09115de88ecccd4f689983f42a1d53bd)
1 /*	$Id: tbl_layout.c,v 1.23 2012/05/27 17:54:54 schwarze Exp $ */
2 /*
3  * Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2012 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <time.h>
27 
28 #include "mandoc.h"
29 #include "libmandoc.h"
30 #include "libroff.h"
31 
32 struct	tbl_phrase {
33 	char		 name;
34 	enum tbl_cellt	 key;
35 };
36 
37 /*
38  * FIXME: we can make this parse a lot nicer by, when an error is
39  * encountered in a layout key, bailing to the next key (i.e. to the
40  * next whitespace then continuing).
41  */
42 
43 #define	KEYS_MAX	 11
44 
45 static	const struct tbl_phrase keys[KEYS_MAX] = {
46 	{ 'c',		 TBL_CELL_CENTRE },
47 	{ 'r',		 TBL_CELL_RIGHT },
48 	{ 'l',		 TBL_CELL_LEFT },
49 	{ 'n',		 TBL_CELL_NUMBER },
50 	{ 's',		 TBL_CELL_SPAN },
51 	{ 'a',		 TBL_CELL_LONG },
52 	{ '^',		 TBL_CELL_DOWN },
53 	{ '-',		 TBL_CELL_HORIZ },
54 	{ '_',		 TBL_CELL_HORIZ },
55 	{ '=',		 TBL_CELL_DHORIZ }
56 };
57 
58 static	int		 mods(struct tbl_node *, struct tbl_cell *,
59 				int, const char *, int *);
60 static	int		 cell(struct tbl_node *, struct tbl_row *,
61 				int, const char *, int *);
62 static	void		 row(struct tbl_node *, int, const char *, int *);
63 static	struct tbl_cell *cell_alloc(struct tbl_node *, struct tbl_row *,
64 				enum tbl_cellt, int vert);
65 
66 static int
67 mods(struct tbl_node *tbl, struct tbl_cell *cp,
68 		int ln, const char *p, int *pos)
69 {
70 	char		 buf[5];
71 	int		 i;
72 
73 	/* Not all types accept modifiers. */
74 
75 	switch (cp->pos) {
76 	case (TBL_CELL_DOWN):
77 		/* FALLTHROUGH */
78 	case (TBL_CELL_HORIZ):
79 		/* FALLTHROUGH */
80 	case (TBL_CELL_DHORIZ):
81 		return(1);
82 	default:
83 		break;
84 	}
85 
86 mod:
87 	/*
88 	 * XXX: since, at least for now, modifiers are non-conflicting
89 	 * (are separable by value, regardless of position), we let
90 	 * modifiers come in any order.  The existing tbl doesn't let
91 	 * this happen.
92 	 */
93 	switch (p[*pos]) {
94 	case ('\0'):
95 		/* FALLTHROUGH */
96 	case (' '):
97 		/* FALLTHROUGH */
98 	case ('\t'):
99 		/* FALLTHROUGH */
100 	case (','):
101 		/* FALLTHROUGH */
102 	case ('.'):
103 		return(1);
104 	default:
105 		break;
106 	}
107 
108 	/* Throw away parenthesised expression. */
109 
110 	if ('(' == p[*pos]) {
111 		(*pos)++;
112 		while (p[*pos] && ')' != p[*pos])
113 			(*pos)++;
114 		if (')' == p[*pos]) {
115 			(*pos)++;
116 			goto mod;
117 		}
118 		mandoc_msg(MANDOCERR_TBLLAYOUT,
119 				tbl->parse, ln, *pos, NULL);
120 		return(0);
121 	}
122 
123 	/* Parse numerical spacing from modifier string. */
124 
125 	if (isdigit((unsigned char)p[*pos])) {
126 		for (i = 0; i < 4; i++) {
127 			if ( ! isdigit((unsigned char)p[*pos + i]))
128 				break;
129 			buf[i] = p[*pos + i];
130 		}
131 		buf[i] = '\0';
132 
133 		/* No greater than 4 digits. */
134 
135 		if (4 == i) {
136 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
137 					ln, *pos, NULL);
138 			return(0);
139 		}
140 
141 		*pos += i;
142 		cp->spacing = (size_t)atoi(buf);
143 
144 		goto mod;
145 		/* NOTREACHED */
146 	}
147 
148 	/* TODO: GNU has many more extensions. */
149 
150 	switch (tolower((unsigned char)p[(*pos)++])) {
151 	case ('z'):
152 		cp->flags |= TBL_CELL_WIGN;
153 		goto mod;
154 	case ('u'):
155 		cp->flags |= TBL_CELL_UP;
156 		goto mod;
157 	case ('e'):
158 		cp->flags |= TBL_CELL_EQUAL;
159 		goto mod;
160 	case ('t'):
161 		cp->flags |= TBL_CELL_TALIGN;
162 		goto mod;
163 	case ('d'):
164 		cp->flags |= TBL_CELL_BALIGN;
165 		goto mod;
166 	case ('w'):  /* XXX for now, ignore minimal column width */
167 		goto mod;
168 	case ('f'):
169 		break;
170 	case ('r'):
171 		/* FALLTHROUGH */
172 	case ('b'):
173 		/* FALLTHROUGH */
174 	case ('i'):
175 		(*pos)--;
176 		break;
177 	default:
178 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
179 				ln, *pos - 1, NULL);
180 		return(0);
181 	}
182 
183 	switch (tolower((unsigned char)p[(*pos)++])) {
184 	case ('3'):
185 		/* FALLTHROUGH */
186 	case ('b'):
187 		cp->flags |= TBL_CELL_BOLD;
188 		goto mod;
189 	case ('2'):
190 		/* FALLTHROUGH */
191 	case ('i'):
192 		cp->flags |= TBL_CELL_ITALIC;
193 		goto mod;
194 	case ('1'):
195 		/* FALLTHROUGH */
196 	case ('r'):
197 		goto mod;
198 	default:
199 		break;
200 	}
201 
202 	mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
203 			ln, *pos - 1, NULL);
204 	return(0);
205 }
206 
207 static int
208 cell(struct tbl_node *tbl, struct tbl_row *rp,
209 		int ln, const char *p, int *pos)
210 {
211 	int		 vert, i;
212 	enum tbl_cellt	 c;
213 
214 	/* Handle vertical lines. */
215 
216 	for (vert = 0; '|' == p[*pos]; ++*pos)
217 		vert++;
218 	while (' ' == p[*pos])
219 		(*pos)++;
220 
221 	/* Parse the column position (`c', `l', `r', ...). */
222 
223 	for (i = 0; i < KEYS_MAX; i++)
224 		if (tolower((unsigned char)p[*pos]) == keys[i].name)
225 			break;
226 
227 	if (KEYS_MAX == i) {
228 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
229 				ln, *pos, NULL);
230 		return(0);
231 	}
232 
233 	c = keys[i].key;
234 
235 	/*
236 	 * If a span cell is found first, raise a warning and abort the
237 	 * parse.  If a span cell is found and the last layout element
238 	 * isn't a "normal" layout, bail.
239 	 *
240 	 * FIXME: recover from this somehow?
241 	 */
242 
243 	if (TBL_CELL_SPAN == c) {
244 		if (NULL == rp->first) {
245 			mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
246 					ln, *pos, NULL);
247 			return(0);
248 		} else if (rp->last)
249 			switch (rp->last->pos) {
250 			case (TBL_CELL_HORIZ):
251 			case (TBL_CELL_DHORIZ):
252 				mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse,
253 						ln, *pos, NULL);
254 				return(0);
255 			default:
256 				break;
257 			}
258 	}
259 
260 	/*
261 	 * If a vertical spanner is found, we may not be in the first
262 	 * row.
263 	 */
264 
265 	if (TBL_CELL_DOWN == c && rp == tbl->first_row) {
266 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos, NULL);
267 		return(0);
268 	}
269 
270 	(*pos)++;
271 
272 	/* Disallow adjacent spacers. */
273 
274 	if (vert > 2) {
275 		mandoc_msg(MANDOCERR_TBLLAYOUT, tbl->parse, ln, *pos - 1, NULL);
276 		return(0);
277 	}
278 
279 	/* Allocate cell then parse its modifiers. */
280 
281 	return(mods(tbl, cell_alloc(tbl, rp, c, vert), ln, p, pos));
282 }
283 
284 
285 static void
286 row(struct tbl_node *tbl, int ln, const char *p, int *pos)
287 {
288 	struct tbl_row	*rp;
289 
290 row:	/*
291 	 * EBNF describing this section:
292 	 *
293 	 * row		::= row_list [:space:]* [.]?[\n]
294 	 * row_list	::= [:space:]* row_elem row_tail
295 	 * row_tail	::= [:space:]*[,] row_list |
296 	 *                  epsilon
297 	 * row_elem	::= [\t\ ]*[:alpha:]+
298 	 */
299 
300 	rp = mandoc_calloc(1, sizeof(struct tbl_row));
301 	if (tbl->last_row)
302 		tbl->last_row->next = rp;
303 	else
304 		tbl->first_row = rp;
305 	tbl->last_row = rp;
306 
307 cell:
308 	while (isspace((unsigned char)p[*pos]))
309 		(*pos)++;
310 
311 	/* Safely exit layout context. */
312 
313 	if ('.' == p[*pos]) {
314 		tbl->part = TBL_PART_DATA;
315 		if (NULL == tbl->first_row)
316 			mandoc_msg(MANDOCERR_TBLNOLAYOUT, tbl->parse,
317 					ln, *pos, NULL);
318 		(*pos)++;
319 		return;
320 	}
321 
322 	/* End (and possibly restart) a row. */
323 
324 	if (',' == p[*pos]) {
325 		(*pos)++;
326 		goto row;
327 	} else if ('\0' == p[*pos])
328 		return;
329 
330 	if ( ! cell(tbl, rp, ln, p, pos))
331 		return;
332 
333 	goto cell;
334 	/* NOTREACHED */
335 }
336 
337 int
338 tbl_layout(struct tbl_node *tbl, int ln, const char *p)
339 {
340 	int		 pos;
341 
342 	pos = 0;
343 	row(tbl, ln, p, &pos);
344 
345 	/* Always succeed. */
346 	return(1);
347 }
348 
349 static struct tbl_cell *
350 cell_alloc(struct tbl_node *tbl, struct tbl_row *rp, enum tbl_cellt pos,
351 		int vert)
352 {
353 	struct tbl_cell	*p, *pp;
354 	struct tbl_head	*h, *hp;
355 
356 	p = mandoc_calloc(1, sizeof(struct tbl_cell));
357 
358 	if (NULL != (pp = rp->last)) {
359 		pp->next = p;
360 		h = pp->head->next;
361 	} else {
362 		rp->first = p;
363 		h = tbl->first_head;
364 	}
365 	rp->last = p;
366 
367 	p->pos = pos;
368 	p->vert = vert;
369 
370 	/* Re-use header. */
371 
372 	if (h) {
373 		p->head = h;
374 		return(p);
375 	}
376 
377 	hp = mandoc_calloc(1, sizeof(struct tbl_head));
378 	hp->ident = tbl->opts.cols++;
379 	hp->vert = vert;
380 
381 	if (tbl->last_head) {
382 		hp->prev = tbl->last_head;
383 		tbl->last_head->next = hp;
384 	} else
385 		tbl->first_head = hp;
386 	tbl->last_head = hp;
387 
388 	p->head = hp;
389 	return(p);
390 }
391