xref: /illumos-gate/usr/src/cmd/mandoc/eqn.c (revision 93cf283ac1d0119d2ee3842f515f8d0e1bc7cbdb)
1 /*	$Id: eqn.c,v 1.58 2015/03/04 12:19:49 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28 
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33 
34 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
35 #define	STRNEQ(p1, sz1, p2, sz2) \
36 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37 
38 enum	eqn_tok {
39 	EQN_TOK_DYAD = 0,
40 	EQN_TOK_VEC,
41 	EQN_TOK_UNDER,
42 	EQN_TOK_BAR,
43 	EQN_TOK_TILDE,
44 	EQN_TOK_HAT,
45 	EQN_TOK_DOT,
46 	EQN_TOK_DOTDOT,
47 	EQN_TOK_FWD,
48 	EQN_TOK_BACK,
49 	EQN_TOK_DOWN,
50 	EQN_TOK_UP,
51 	EQN_TOK_FAT,
52 	EQN_TOK_ROMAN,
53 	EQN_TOK_ITALIC,
54 	EQN_TOK_BOLD,
55 	EQN_TOK_SIZE,
56 	EQN_TOK_SUB,
57 	EQN_TOK_SUP,
58 	EQN_TOK_SQRT,
59 	EQN_TOK_OVER,
60 	EQN_TOK_FROM,
61 	EQN_TOK_TO,
62 	EQN_TOK_BRACE_OPEN,
63 	EQN_TOK_BRACE_CLOSE,
64 	EQN_TOK_GSIZE,
65 	EQN_TOK_GFONT,
66 	EQN_TOK_MARK,
67 	EQN_TOK_LINEUP,
68 	EQN_TOK_LEFT,
69 	EQN_TOK_RIGHT,
70 	EQN_TOK_PILE,
71 	EQN_TOK_LPILE,
72 	EQN_TOK_RPILE,
73 	EQN_TOK_CPILE,
74 	EQN_TOK_MATRIX,
75 	EQN_TOK_CCOL,
76 	EQN_TOK_LCOL,
77 	EQN_TOK_RCOL,
78 	EQN_TOK_DELIM,
79 	EQN_TOK_DEFINE,
80 	EQN_TOK_TDEFINE,
81 	EQN_TOK_NDEFINE,
82 	EQN_TOK_UNDEF,
83 	EQN_TOK_EOF,
84 	EQN_TOK_ABOVE,
85 	EQN_TOK__MAX
86 };
87 
88 static	const char *eqn_toks[EQN_TOK__MAX] = {
89 	"dyad", /* EQN_TOK_DYAD */
90 	"vec", /* EQN_TOK_VEC */
91 	"under", /* EQN_TOK_UNDER */
92 	"bar", /* EQN_TOK_BAR */
93 	"tilde", /* EQN_TOK_TILDE */
94 	"hat", /* EQN_TOK_HAT */
95 	"dot", /* EQN_TOK_DOT */
96 	"dotdot", /* EQN_TOK_DOTDOT */
97 	"fwd", /* EQN_TOK_FWD * */
98 	"back", /* EQN_TOK_BACK */
99 	"down", /* EQN_TOK_DOWN */
100 	"up", /* EQN_TOK_UP */
101 	"fat", /* EQN_TOK_FAT */
102 	"roman", /* EQN_TOK_ROMAN */
103 	"italic", /* EQN_TOK_ITALIC */
104 	"bold", /* EQN_TOK_BOLD */
105 	"size", /* EQN_TOK_SIZE */
106 	"sub", /* EQN_TOK_SUB */
107 	"sup", /* EQN_TOK_SUP */
108 	"sqrt", /* EQN_TOK_SQRT */
109 	"over", /* EQN_TOK_OVER */
110 	"from", /* EQN_TOK_FROM */
111 	"to", /* EQN_TOK_TO */
112 	"{", /* EQN_TOK_BRACE_OPEN */
113 	"}", /* EQN_TOK_BRACE_CLOSE */
114 	"gsize", /* EQN_TOK_GSIZE */
115 	"gfont", /* EQN_TOK_GFONT */
116 	"mark", /* EQN_TOK_MARK */
117 	"lineup", /* EQN_TOK_LINEUP */
118 	"left", /* EQN_TOK_LEFT */
119 	"right", /* EQN_TOK_RIGHT */
120 	"pile", /* EQN_TOK_PILE */
121 	"lpile", /* EQN_TOK_LPILE */
122 	"rpile", /* EQN_TOK_RPILE */
123 	"cpile", /* EQN_TOK_CPILE */
124 	"matrix", /* EQN_TOK_MATRIX */
125 	"ccol", /* EQN_TOK_CCOL */
126 	"lcol", /* EQN_TOK_LCOL */
127 	"rcol", /* EQN_TOK_RCOL */
128 	"delim", /* EQN_TOK_DELIM */
129 	"define", /* EQN_TOK_DEFINE */
130 	"tdefine", /* EQN_TOK_TDEFINE */
131 	"ndefine", /* EQN_TOK_NDEFINE */
132 	"undef", /* EQN_TOK_UNDEF */
133 	NULL, /* EQN_TOK_EOF */
134 	"above", /* EQN_TOK_ABOVE */
135 };
136 
137 enum	eqn_symt {
138 	EQNSYM_alpha,
139 	EQNSYM_beta,
140 	EQNSYM_chi,
141 	EQNSYM_delta,
142 	EQNSYM_epsilon,
143 	EQNSYM_eta,
144 	EQNSYM_gamma,
145 	EQNSYM_iota,
146 	EQNSYM_kappa,
147 	EQNSYM_lambda,
148 	EQNSYM_mu,
149 	EQNSYM_nu,
150 	EQNSYM_omega,
151 	EQNSYM_omicron,
152 	EQNSYM_phi,
153 	EQNSYM_pi,
154 	EQNSYM_ps,
155 	EQNSYM_rho,
156 	EQNSYM_sigma,
157 	EQNSYM_tau,
158 	EQNSYM_theta,
159 	EQNSYM_upsilon,
160 	EQNSYM_xi,
161 	EQNSYM_zeta,
162 	EQNSYM_DELTA,
163 	EQNSYM_GAMMA,
164 	EQNSYM_LAMBDA,
165 	EQNSYM_OMEGA,
166 	EQNSYM_PHI,
167 	EQNSYM_PI,
168 	EQNSYM_PSI,
169 	EQNSYM_SIGMA,
170 	EQNSYM_THETA,
171 	EQNSYM_UPSILON,
172 	EQNSYM_XI,
173 	EQNSYM_inter,
174 	EQNSYM_union,
175 	EQNSYM_prod,
176 	EQNSYM_int,
177 	EQNSYM_sum,
178 	EQNSYM_grad,
179 	EQNSYM_del,
180 	EQNSYM_times,
181 	EQNSYM_cdot,
182 	EQNSYM_nothing,
183 	EQNSYM_approx,
184 	EQNSYM_prime,
185 	EQNSYM_half,
186 	EQNSYM_partial,
187 	EQNSYM_inf,
188 	EQNSYM_muchgreat,
189 	EQNSYM_muchless,
190 	EQNSYM_larrow,
191 	EQNSYM_rarrow,
192 	EQNSYM_pm,
193 	EQNSYM_nequal,
194 	EQNSYM_equiv,
195 	EQNSYM_lessequal,
196 	EQNSYM_moreequal,
197 	EQNSYM_minus,
198 	EQNSYM__MAX
199 };
200 
201 struct	eqnsym {
202 	const char	*str;
203 	const char	*sym;
204 };
205 
206 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
207 	{ "alpha", "*a" }, /* EQNSYM_alpha */
208 	{ "beta", "*b" }, /* EQNSYM_beta */
209 	{ "chi", "*x" }, /* EQNSYM_chi */
210 	{ "delta", "*d" }, /* EQNSYM_delta */
211 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
212 	{ "eta", "*y" }, /* EQNSYM_eta */
213 	{ "gamma", "*g" }, /* EQNSYM_gamma */
214 	{ "iota", "*i" }, /* EQNSYM_iota */
215 	{ "kappa", "*k" }, /* EQNSYM_kappa */
216 	{ "lambda", "*l" }, /* EQNSYM_lambda */
217 	{ "mu", "*m" }, /* EQNSYM_mu */
218 	{ "nu", "*n" }, /* EQNSYM_nu */
219 	{ "omega", "*w" }, /* EQNSYM_omega */
220 	{ "omicron", "*o" }, /* EQNSYM_omicron */
221 	{ "phi", "*f" }, /* EQNSYM_phi */
222 	{ "pi", "*p" }, /* EQNSYM_pi */
223 	{ "psi", "*q" }, /* EQNSYM_psi */
224 	{ "rho", "*r" }, /* EQNSYM_rho */
225 	{ "sigma", "*s" }, /* EQNSYM_sigma */
226 	{ "tau", "*t" }, /* EQNSYM_tau */
227 	{ "theta", "*h" }, /* EQNSYM_theta */
228 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
229 	{ "xi", "*c" }, /* EQNSYM_xi */
230 	{ "zeta", "*z" }, /* EQNSYM_zeta */
231 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
232 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
233 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
234 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
235 	{ "PHI", "*F" }, /* EQNSYM_PHI */
236 	{ "PI", "*P" }, /* EQNSYM_PI */
237 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
238 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
239 	{ "THETA", "*H" }, /* EQNSYM_THETA */
240 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
241 	{ "XI", "*C" }, /* EQNSYM_XI */
242 	{ "inter", "ca" }, /* EQNSYM_inter */
243 	{ "union", "cu" }, /* EQNSYM_union */
244 	{ "prod", "product" }, /* EQNSYM_prod */
245 	{ "int", "integral" }, /* EQNSYM_int */
246 	{ "sum", "sum" }, /* EQNSYM_sum */
247 	{ "grad", "gr" }, /* EQNSYM_grad */
248 	{ "del", "gr" }, /* EQNSYM_del */
249 	{ "times", "mu" }, /* EQNSYM_times */
250 	{ "cdot", "pc" }, /* EQNSYM_cdot */
251 	{ "nothing", "&" }, /* EQNSYM_nothing */
252 	{ "approx", "~~" }, /* EQNSYM_approx */
253 	{ "prime", "fm" }, /* EQNSYM_prime */
254 	{ "half", "12" }, /* EQNSYM_half */
255 	{ "partial", "pd" }, /* EQNSYM_partial */
256 	{ "inf", "if" }, /* EQNSYM_inf */
257 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
258 	{ "<<", "<<" }, /* EQNSYM_muchless */
259 	{ "<-", "<-" }, /* EQNSYM_larrow */
260 	{ "->", "->" }, /* EQNSYM_rarrow */
261 	{ "+-", "+-" }, /* EQNSYM_pm */
262 	{ "!=", "!=" }, /* EQNSYM_nequal */
263 	{ "==", "==" }, /* EQNSYM_equiv */
264 	{ "<=", "<=" }, /* EQNSYM_lessequal */
265 	{ ">=", ">=" }, /* EQNSYM_moreequal */
266 	{ "-", "mi" }, /* EQNSYM_minus */
267 };
268 
269 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
270 static	void		 eqn_box_free(struct eqn_box *);
271 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
272 				enum eqn_post, struct eqn_box *);
273 static	void		 eqn_def(struct eqn_node *);
274 static	struct eqn_def	*eqn_def_find(struct eqn_node *, const char *, size_t);
275 static	void		 eqn_delim(struct eqn_node *);
276 static	const char	*eqn_next(struct eqn_node *, char, size_t *, int);
277 static	const char	*eqn_nextrawtok(struct eqn_node *, size_t *);
278 static	const char	*eqn_nexttok(struct eqn_node *, size_t *);
279 static	enum rofferr	 eqn_parse(struct eqn_node *, struct eqn_box *);
280 static	enum eqn_tok	 eqn_tok_parse(struct eqn_node *, char **);
281 static	void		 eqn_undef(struct eqn_node *);
282 
283 
284 enum rofferr
285 eqn_read(struct eqn_node **epp, int ln,
286 		const char *p, int pos, int *offs)
287 {
288 	size_t		 sz;
289 	struct eqn_node	*ep;
290 	enum rofferr	 er;
291 
292 	ep = *epp;
293 
294 	/*
295 	 * If we're the terminating mark, unset our equation status and
296 	 * validate the full equation.
297 	 */
298 
299 	if (0 == strncmp(p, ".EN", 3)) {
300 		er = eqn_end(epp);
301 		p += 3;
302 		while (' ' == *p || '\t' == *p)
303 			p++;
304 		if ('\0' == *p)
305 			return(er);
306 		mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
307 		    ln, pos, "EN %s", p);
308 		return(er);
309 	}
310 
311 	/*
312 	 * Build up the full string, replacing all newlines with regular
313 	 * whitespace.
314 	 */
315 
316 	sz = strlen(p + pos) + 1;
317 	ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
318 
319 	/* First invocation: nil terminate the string. */
320 
321 	if (0 == ep->sz)
322 		*ep->data = '\0';
323 
324 	ep->sz += sz;
325 	strlcat(ep->data, p + pos, ep->sz + 1);
326 	strlcat(ep->data, " ", ep->sz + 1);
327 	return(ROFF_IGN);
328 }
329 
330 struct eqn_node *
331 eqn_alloc(int pos, int line, struct mparse *parse)
332 {
333 	struct eqn_node	*p;
334 
335 	p = mandoc_calloc(1, sizeof(struct eqn_node));
336 
337 	p->parse = parse;
338 	p->eqn.ln = line;
339 	p->eqn.pos = pos;
340 	p->gsize = EQN_DEFSIZE;
341 
342 	return(p);
343 }
344 
345 /*
346  * Find the key "key" of the give size within our eqn-defined values.
347  */
348 static struct eqn_def *
349 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
350 {
351 	int		 i;
352 
353 	for (i = 0; i < (int)ep->defsz; i++)
354 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
355 		    ep->defs[i].keysz, key, sz))
356 			return(&ep->defs[i]);
357 
358 	return(NULL);
359 }
360 
361 /*
362  * Get the next token from the input stream using the given quote
363  * character.
364  * Optionally make any replacements.
365  */
366 static const char *
367 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
368 {
369 	char		*start, *next;
370 	int		 q, diff, lim;
371 	size_t		 ssz, dummy;
372 	struct eqn_def	*def;
373 
374 	if (NULL == sz)
375 		sz = &dummy;
376 
377 	lim = 0;
378 	ep->rew = ep->cur;
379 again:
380 	/* Prevent self-definitions. */
381 
382 	if (lim >= EQN_NEST_MAX) {
383 		mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
384 		    ep->eqn.ln, ep->eqn.pos, NULL);
385 		return(NULL);
386 	}
387 
388 	ep->cur = ep->rew;
389 	start = &ep->data[(int)ep->cur];
390 	q = 0;
391 
392 	if ('\0' == *start)
393 		return(NULL);
394 
395 	if (quote == *start) {
396 		ep->cur++;
397 		q = 1;
398 	}
399 
400 	start = &ep->data[(int)ep->cur];
401 
402 	if ( ! q) {
403 		if ('{' == *start || '}' == *start)
404 			ssz = 1;
405 		else
406 			ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
407 		next = start + (int)ssz;
408 		if ('\0' == *next)
409 			next = NULL;
410 	} else
411 		next = strchr(start, quote);
412 
413 	if (NULL != next) {
414 		*sz = (size_t)(next - start);
415 		ep->cur += *sz;
416 		if (q)
417 			ep->cur++;
418 		while (' ' == ep->data[(int)ep->cur] ||
419 		    '\t' == ep->data[(int)ep->cur] ||
420 		    '^' == ep->data[(int)ep->cur] ||
421 		    '~' == ep->data[(int)ep->cur])
422 			ep->cur++;
423 	} else {
424 		if (q)
425 			mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
426 			    ep->eqn.ln, ep->eqn.pos, NULL);
427 		next = strchr(start, '\0');
428 		*sz = (size_t)(next - start);
429 		ep->cur += *sz;
430 	}
431 
432 	/* Quotes aren't expanded for values. */
433 
434 	if (q || ! repl)
435 		return(start);
436 
437 	if (NULL != (def = eqn_def_find(ep, start, *sz))) {
438 		diff = def->valsz - *sz;
439 
440 		if (def->valsz > *sz) {
441 			ep->sz += diff;
442 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
443 			ep->data[ep->sz] = '\0';
444 			start = &ep->data[(int)ep->rew];
445 		}
446 
447 		diff = def->valsz - *sz;
448 		memmove(start + *sz + diff, start + *sz,
449 		    (strlen(start) - *sz) + 1);
450 		memcpy(start, def->val, def->valsz);
451 		goto again;
452 	}
453 
454 	return(start);
455 }
456 
457 /*
458  * Get the next delimited token using the default current quote
459  * character.
460  */
461 static const char *
462 eqn_nexttok(struct eqn_node *ep, size_t *sz)
463 {
464 
465 	return(eqn_next(ep, '"', sz, 1));
466 }
467 
468 /*
469  * Get next token without replacement.
470  */
471 static const char *
472 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
473 {
474 
475 	return(eqn_next(ep, '"', sz, 0));
476 }
477 
478 /*
479  * Parse a token from the stream of text.
480  * A token consists of one of the recognised eqn(7) strings.
481  * Strings are separated by delimiting marks.
482  * This returns EQN_TOK_EOF when there are no more tokens.
483  * If the token is an unrecognised string literal, then it returns
484  * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
485  * string.
486  * This must be later freed with free(3).
487  */
488 static enum eqn_tok
489 eqn_tok_parse(struct eqn_node *ep, char **p)
490 {
491 	const char	*start;
492 	size_t		 i, sz;
493 	int		 quoted;
494 
495 	if (NULL != p)
496 		*p = NULL;
497 
498 	quoted = ep->data[ep->cur] == '"';
499 
500 	if (NULL == (start = eqn_nexttok(ep, &sz)))
501 		return(EQN_TOK_EOF);
502 
503 	if (quoted) {
504 		if (p != NULL)
505 			*p = mandoc_strndup(start, sz);
506 		return(EQN_TOK__MAX);
507 	}
508 
509 	for (i = 0; i < EQN_TOK__MAX; i++) {
510 		if (NULL == eqn_toks[i])
511 			continue;
512 		if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
513 			break;
514 	}
515 
516 	if (i == EQN_TOK__MAX && NULL != p)
517 		*p = mandoc_strndup(start, sz);
518 
519 	return(i);
520 }
521 
522 static void
523 eqn_box_free(struct eqn_box *bp)
524 {
525 
526 	if (bp->first)
527 		eqn_box_free(bp->first);
528 	if (bp->next)
529 		eqn_box_free(bp->next);
530 
531 	free(bp->text);
532 	free(bp->left);
533 	free(bp->right);
534 	free(bp->top);
535 	free(bp->bottom);
536 	free(bp);
537 }
538 
539 /*
540  * Allocate a box as the last child of the parent node.
541  */
542 static struct eqn_box *
543 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
544 {
545 	struct eqn_box	*bp;
546 
547 	bp = mandoc_calloc(1, sizeof(struct eqn_box));
548 	bp->parent = parent;
549 	bp->parent->args++;
550 	bp->expectargs = UINT_MAX;
551 	bp->size = ep->gsize;
552 
553 	if (NULL != parent->first) {
554 		parent->last->next = bp;
555 		bp->prev = parent->last;
556 	} else
557 		parent->first = bp;
558 
559 	parent->last = bp;
560 	return(bp);
561 }
562 
563 /*
564  * Reparent the current last node (of the current parent) under a new
565  * EQN_SUBEXPR as the first element.
566  * Then return the new parent.
567  * The new EQN_SUBEXPR will have a two-child limit.
568  */
569 static struct eqn_box *
570 eqn_box_makebinary(struct eqn_node *ep,
571 	enum eqn_post pos, struct eqn_box *parent)
572 {
573 	struct eqn_box	*b, *newb;
574 
575 	assert(NULL != parent->last);
576 	b = parent->last;
577 	if (parent->last == parent->first)
578 		parent->first = NULL;
579 	parent->args--;
580 	parent->last = b->prev;
581 	b->prev = NULL;
582 	newb = eqn_box_alloc(ep, parent);
583 	newb->pos = pos;
584 	newb->type = EQN_SUBEXPR;
585 	newb->expectargs = 2;
586 	newb->args = 1;
587 	newb->first = newb->last = b;
588 	newb->first->next = NULL;
589 	b->parent = newb;
590 	return(newb);
591 }
592 
593 /*
594  * Parse the "delim" control statement.
595  */
596 static void
597 eqn_delim(struct eqn_node *ep)
598 {
599 	const char	*start;
600 	size_t		 sz;
601 
602 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
603 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
604 		    ep->eqn.ln, ep->eqn.pos, "delim");
605 	else if (strncmp(start, "off", 3) == 0)
606 		ep->delim = 0;
607 	else if (strncmp(start, "on", 2) == 0) {
608 		if (ep->odelim && ep->cdelim)
609 			ep->delim = 1;
610 	} else if (start[1] != '\0') {
611 		ep->odelim = start[0];
612 		ep->cdelim = start[1];
613 		ep->delim = 1;
614 	}
615 }
616 
617 /*
618  * Undefine a previously-defined string.
619  */
620 static void
621 eqn_undef(struct eqn_node *ep)
622 {
623 	const char	*start;
624 	struct eqn_def	*def;
625 	size_t		 sz;
626 
627 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
628 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
629 		    ep->eqn.ln, ep->eqn.pos, "undef");
630 		return;
631 	}
632 	if ((def = eqn_def_find(ep, start, sz)) == NULL)
633 		return;
634 	free(def->key);
635 	free(def->val);
636 	def->key = def->val = NULL;
637 	def->keysz = def->valsz = 0;
638 }
639 
640 static void
641 eqn_def(struct eqn_node *ep)
642 {
643 	const char	*start;
644 	size_t		 sz;
645 	struct eqn_def	*def;
646 	int		 i;
647 
648 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
649 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
650 		    ep->eqn.ln, ep->eqn.pos, "define");
651 		return;
652 	}
653 
654 	/*
655 	 * Search for a key that already exists.
656 	 * Create a new key if none is found.
657 	 */
658 	if (NULL == (def = eqn_def_find(ep, start, sz))) {
659 		/* Find holes in string array. */
660 		for (i = 0; i < (int)ep->defsz; i++)
661 			if (0 == ep->defs[i].keysz)
662 				break;
663 
664 		if (i == (int)ep->defsz) {
665 			ep->defsz++;
666 			ep->defs = mandoc_reallocarray(ep->defs,
667 			    ep->defsz, sizeof(struct eqn_def));
668 			ep->defs[i].key = ep->defs[i].val = NULL;
669 		}
670 
671 		def = ep->defs + i;
672 		free(def->key);
673 		def->key = mandoc_strndup(start, sz);
674 		def->keysz = sz;
675 	}
676 
677 	start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
678 	if (start == NULL) {
679 		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
680 		    ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
681 		free(def->key);
682 		free(def->val);
683 		def->key = def->val = NULL;
684 		def->keysz = def->valsz = 0;
685 		return;
686 	}
687 	free(def->val);
688 	def->val = mandoc_strndup(start, sz);
689 	def->valsz = sz;
690 }
691 
692 /*
693  * Recursively parse an eqn(7) expression.
694  */
695 static enum rofferr
696 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
697 {
698 	char		 sym[64];
699 	struct eqn_box	*cur;
700 	const char	*start;
701 	char		*p;
702 	size_t		 i, sz;
703 	enum eqn_tok	 tok, subtok;
704 	enum eqn_post	 pos;
705 	int		 size;
706 
707 	assert(parent != NULL);
708 
709 	/*
710 	 * Empty equation.
711 	 * Do not add it to the high-level syntax tree.
712 	 */
713 
714 	if (ep->data == NULL)
715 		return(ROFF_IGN);
716 
717 next_tok:
718 	tok = eqn_tok_parse(ep, &p);
719 
720 this_tok:
721 	switch (tok) {
722 	case (EQN_TOK_UNDEF):
723 		eqn_undef(ep);
724 		break;
725 	case (EQN_TOK_NDEFINE):
726 	case (EQN_TOK_DEFINE):
727 		eqn_def(ep);
728 		break;
729 	case (EQN_TOK_TDEFINE):
730 		if (eqn_nextrawtok(ep, NULL) == NULL ||
731 		    eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
732 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
733 			    ep->eqn.ln, ep->eqn.pos, "tdefine");
734 		break;
735 	case (EQN_TOK_DELIM):
736 		eqn_delim(ep);
737 		break;
738 	case (EQN_TOK_GFONT):
739 		if (eqn_nextrawtok(ep, NULL) == NULL)
740 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
741 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
742 		break;
743 	case (EQN_TOK_MARK):
744 	case (EQN_TOK_LINEUP):
745 		/* Ignore these. */
746 		break;
747 	case (EQN_TOK_DYAD):
748 	case (EQN_TOK_VEC):
749 	case (EQN_TOK_UNDER):
750 	case (EQN_TOK_BAR):
751 	case (EQN_TOK_TILDE):
752 	case (EQN_TOK_HAT):
753 	case (EQN_TOK_DOT):
754 	case (EQN_TOK_DOTDOT):
755 		if (parent->last == NULL) {
756 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
757 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
758 			cur = eqn_box_alloc(ep, parent);
759 			cur->type = EQN_TEXT;
760 			cur->text = mandoc_strdup("");
761 		}
762 		parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
763 		parent->type = EQN_LISTONE;
764 		parent->expectargs = 1;
765 		switch (tok) {
766 		case (EQN_TOK_DOTDOT):
767 			strlcpy(sym, "\\[ad]", sizeof(sym));
768 			break;
769 		case (EQN_TOK_VEC):
770 			strlcpy(sym, "\\[->]", sizeof(sym));
771 			break;
772 		case (EQN_TOK_DYAD):
773 			strlcpy(sym, "\\[<>]", sizeof(sym));
774 			break;
775 		case (EQN_TOK_TILDE):
776 			strlcpy(sym, "\\[a~]", sizeof(sym));
777 			break;
778 		case (EQN_TOK_UNDER):
779 			strlcpy(sym, "\\[ul]", sizeof(sym));
780 			break;
781 		case (EQN_TOK_BAR):
782 			strlcpy(sym, "\\[rl]", sizeof(sym));
783 			break;
784 		case (EQN_TOK_DOT):
785 			strlcpy(sym, "\\[a.]", sizeof(sym));
786 			break;
787 		case (EQN_TOK_HAT):
788 			strlcpy(sym, "\\[ha]", sizeof(sym));
789 			break;
790 		default:
791 			abort();
792 		}
793 
794 		switch (tok) {
795 		case (EQN_TOK_DOTDOT):
796 		case (EQN_TOK_VEC):
797 		case (EQN_TOK_DYAD):
798 		case (EQN_TOK_TILDE):
799 		case (EQN_TOK_BAR):
800 		case (EQN_TOK_DOT):
801 		case (EQN_TOK_HAT):
802 			parent->top = mandoc_strdup(sym);
803 			break;
804 		case (EQN_TOK_UNDER):
805 			parent->bottom = mandoc_strdup(sym);
806 			break;
807 		default:
808 			abort();
809 		}
810 		parent = parent->parent;
811 		break;
812 	case (EQN_TOK_FWD):
813 	case (EQN_TOK_BACK):
814 	case (EQN_TOK_DOWN):
815 	case (EQN_TOK_UP):
816 		subtok = eqn_tok_parse(ep, NULL);
817 		if (subtok != EQN_TOK__MAX) {
818 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
819 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
820 			tok = subtok;
821 			goto this_tok;
822 		}
823 		break;
824 	case (EQN_TOK_FAT):
825 	case (EQN_TOK_ROMAN):
826 	case (EQN_TOK_ITALIC):
827 	case (EQN_TOK_BOLD):
828 		while (parent->args == parent->expectargs)
829 			parent = parent->parent;
830 		/*
831 		 * These values apply to the next word or sequence of
832 		 * words; thus, we mark that we'll have a child with
833 		 * exactly one of those.
834 		 */
835 		parent = eqn_box_alloc(ep, parent);
836 		parent->type = EQN_LISTONE;
837 		parent->expectargs = 1;
838 		switch (tok) {
839 		case (EQN_TOK_FAT):
840 			parent->font = EQNFONT_FAT;
841 			break;
842 		case (EQN_TOK_ROMAN):
843 			parent->font = EQNFONT_ROMAN;
844 			break;
845 		case (EQN_TOK_ITALIC):
846 			parent->font = EQNFONT_ITALIC;
847 			break;
848 		case (EQN_TOK_BOLD):
849 			parent->font = EQNFONT_BOLD;
850 			break;
851 		default:
852 			abort();
853 		}
854 		break;
855 	case (EQN_TOK_SIZE):
856 	case (EQN_TOK_GSIZE):
857 		/* Accept two values: integral size and a single. */
858 		if (NULL == (start = eqn_nexttok(ep, &sz))) {
859 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
860 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
861 			break;
862 		}
863 		size = mandoc_strntoi(start, sz, 10);
864 		if (-1 == size) {
865 			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
866 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
867 			break;
868 		}
869 		if (EQN_TOK_GSIZE == tok) {
870 			ep->gsize = size;
871 			break;
872 		}
873 		parent = eqn_box_alloc(ep, parent);
874 		parent->type = EQN_LISTONE;
875 		parent->expectargs = 1;
876 		parent->size = size;
877 		break;
878 	case (EQN_TOK_FROM):
879 	case (EQN_TOK_TO):
880 	case (EQN_TOK_SUB):
881 	case (EQN_TOK_SUP):
882 		/*
883 		 * We have a left-right-associative expression.
884 		 * Repivot under a positional node, open a child scope
885 		 * and keep on reading.
886 		 */
887 		if (parent->last == NULL) {
888 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
889 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
890 			cur = eqn_box_alloc(ep, parent);
891 			cur->type = EQN_TEXT;
892 			cur->text = mandoc_strdup("");
893 		}
894 		/* Handle the "subsup" and "fromto" positions. */
895 		if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
896 			parent->expectargs = 3;
897 			parent->pos = EQNPOS_SUBSUP;
898 			break;
899 		}
900 		if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
901 			parent->expectargs = 3;
902 			parent->pos = EQNPOS_FROMTO;
903 			break;
904 		}
905 		switch (tok) {
906 		case (EQN_TOK_FROM):
907 			pos = EQNPOS_FROM;
908 			break;
909 		case (EQN_TOK_TO):
910 			pos = EQNPOS_TO;
911 			break;
912 		case (EQN_TOK_SUP):
913 			pos = EQNPOS_SUP;
914 			break;
915 		case (EQN_TOK_SUB):
916 			pos = EQNPOS_SUB;
917 			break;
918 		default:
919 			abort();
920 		}
921 		parent = eqn_box_makebinary(ep, pos, parent);
922 		break;
923 	case (EQN_TOK_SQRT):
924 		while (parent->args == parent->expectargs)
925 			parent = parent->parent;
926 		/*
927 		 * Accept a left-right-associative set of arguments just
928 		 * like sub and sup and friends but without rebalancing
929 		 * under a pivot.
930 		 */
931 		parent = eqn_box_alloc(ep, parent);
932 		parent->type = EQN_SUBEXPR;
933 		parent->pos = EQNPOS_SQRT;
934 		parent->expectargs = 1;
935 		break;
936 	case (EQN_TOK_OVER):
937 		/*
938 		 * We have a right-left-associative fraction.
939 		 * Close out anything that's currently open, then
940 		 * rebalance and continue reading.
941 		 */
942 		if (parent->last == NULL) {
943 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
944 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
945 			cur = eqn_box_alloc(ep, parent);
946 			cur->type = EQN_TEXT;
947 			cur->text = mandoc_strdup("");
948 		}
949 		while (EQN_SUBEXPR == parent->type)
950 			parent = parent->parent;
951 		parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
952 		break;
953 	case (EQN_TOK_RIGHT):
954 	case (EQN_TOK_BRACE_CLOSE):
955 		/*
956 		 * Close out the existing brace.
957 		 * FIXME: this is a shitty sentinel: we should really
958 		 * have a native EQN_BRACE type or whatnot.
959 		 */
960 		for (cur = parent; cur != NULL; cur = cur->parent)
961 			if (cur->type == EQN_LIST &&
962 			    (tok == EQN_TOK_BRACE_CLOSE ||
963 			     cur->left != NULL))
964 				break;
965 		if (cur == NULL) {
966 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
967 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
968 			break;
969 		}
970 		parent = cur;
971 		if (EQN_TOK_RIGHT == tok) {
972 			if (NULL == (start = eqn_nexttok(ep, &sz))) {
973 				mandoc_msg(MANDOCERR_REQ_EMPTY,
974 				    ep->parse, ep->eqn.ln,
975 				    ep->eqn.pos, eqn_toks[tok]);
976 				break;
977 			}
978 			/* Handling depends on right/left. */
979 			if (STRNEQ(start, sz, "ceiling", 7)) {
980 				strlcpy(sym, "\\[rc]", sizeof(sym));
981 				parent->right = mandoc_strdup(sym);
982 			} else if (STRNEQ(start, sz, "floor", 5)) {
983 				strlcpy(sym, "\\[rf]", sizeof(sym));
984 				parent->right = mandoc_strdup(sym);
985 			} else
986 				parent->right = mandoc_strndup(start, sz);
987 		}
988 		parent = parent->parent;
989 		if (EQN_TOK_BRACE_CLOSE == tok && parent &&
990 		    (parent->type == EQN_PILE ||
991 		     parent->type == EQN_MATRIX))
992 			parent = parent->parent;
993 		/* Close out any "singleton" lists. */
994 		while (parent->type == EQN_LISTONE &&
995 		    parent->args == parent->expectargs)
996 			parent = parent->parent;
997 		break;
998 	case (EQN_TOK_BRACE_OPEN):
999 	case (EQN_TOK_LEFT):
1000 		/*
1001 		 * If we already have something in the stack and we're
1002 		 * in an expression, then rewind til we're not any more
1003 		 * (just like with the text node).
1004 		 */
1005 		while (parent->args == parent->expectargs)
1006 			parent = parent->parent;
1007 		if (EQN_TOK_LEFT == tok &&
1008 		    (start = eqn_nexttok(ep, &sz)) == NULL) {
1009 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1010 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1011 			break;
1012 		}
1013 		parent = eqn_box_alloc(ep, parent);
1014 		parent->type = EQN_LIST;
1015 		if (EQN_TOK_LEFT == tok) {
1016 			if (STRNEQ(start, sz, "ceiling", 7)) {
1017 				strlcpy(sym, "\\[lc]", sizeof(sym));
1018 				parent->left = mandoc_strdup(sym);
1019 			} else if (STRNEQ(start, sz, "floor", 5)) {
1020 				strlcpy(sym, "\\[lf]", sizeof(sym));
1021 				parent->left = mandoc_strdup(sym);
1022 			} else
1023 				parent->left = mandoc_strndup(start, sz);
1024 		}
1025 		break;
1026 	case (EQN_TOK_PILE):
1027 	case (EQN_TOK_LPILE):
1028 	case (EQN_TOK_RPILE):
1029 	case (EQN_TOK_CPILE):
1030 	case (EQN_TOK_CCOL):
1031 	case (EQN_TOK_LCOL):
1032 	case (EQN_TOK_RCOL):
1033 		while (parent->args == parent->expectargs)
1034 			parent = parent->parent;
1035 		parent = eqn_box_alloc(ep, parent);
1036 		parent->type = EQN_PILE;
1037 		parent->expectargs = 1;
1038 		break;
1039 	case (EQN_TOK_ABOVE):
1040 		for (cur = parent; cur != NULL; cur = cur->parent)
1041 			if (cur->type == EQN_PILE)
1042 				break;
1043 		if (cur == NULL) {
1044 			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1045 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1046 			break;
1047 		}
1048 		parent = eqn_box_alloc(ep, cur);
1049 		parent->type = EQN_LIST;
1050 		break;
1051 	case (EQN_TOK_MATRIX):
1052 		while (parent->args == parent->expectargs)
1053 			parent = parent->parent;
1054 		parent = eqn_box_alloc(ep, parent);
1055 		parent->type = EQN_MATRIX;
1056 		parent->expectargs = 1;
1057 		break;
1058 	case (EQN_TOK_EOF):
1059 		/*
1060 		 * End of file!
1061 		 * TODO: make sure we're not in an open subexpression.
1062 		 */
1063 		return(ROFF_EQN);
1064 	default:
1065 		assert(tok == EQN_TOK__MAX);
1066 		assert(NULL != p);
1067 		/*
1068 		 * If we already have something in the stack and we're
1069 		 * in an expression, then rewind til we're not any more.
1070 		 */
1071 		while (parent->args == parent->expectargs)
1072 			parent = parent->parent;
1073 		cur = eqn_box_alloc(ep, parent);
1074 		cur->type = EQN_TEXT;
1075 		for (i = 0; i < EQNSYM__MAX; i++)
1076 			if (0 == strcmp(eqnsyms[i].str, p)) {
1077 				(void)snprintf(sym, sizeof(sym),
1078 					"\\[%s]", eqnsyms[i].sym);
1079 				cur->text = mandoc_strdup(sym);
1080 				free(p);
1081 				break;
1082 			}
1083 
1084 		if (i == EQNSYM__MAX)
1085 			cur->text = p;
1086 		/*
1087 		 * Post-process list status.
1088 		 */
1089 		while (parent->type == EQN_LISTONE &&
1090 		    parent->args == parent->expectargs)
1091 			parent = parent->parent;
1092 		break;
1093 	}
1094 	goto next_tok;
1095 }
1096 
1097 enum rofferr
1098 eqn_end(struct eqn_node **epp)
1099 {
1100 	struct eqn_node	*ep;
1101 
1102 	ep = *epp;
1103 	*epp = NULL;
1104 
1105 	ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1106 	ep->eqn.root->expectargs = UINT_MAX;
1107 	return(eqn_parse(ep, ep->eqn.root));
1108 }
1109 
1110 void
1111 eqn_free(struct eqn_node *p)
1112 {
1113 	int		 i;
1114 
1115 	eqn_box_free(p->eqn.root);
1116 
1117 	for (i = 0; i < (int)p->defsz; i++) {
1118 		free(p->defs[i].key);
1119 		free(p->defs[i].val);
1120 	}
1121 
1122 	free(p->data);
1123 	free(p->defs);
1124 	free(p);
1125 }
1126