xref: /illumos-gate/usr/src/cmd/mandoc/eqn.c (revision 0ae3c1c128212f440f395f5f1e70577eada7ca85)
1 /*	$Id: eqn.c,v 1.78 2017/07/15 16:26:17 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "libmandoc.h"
34 #include "libroff.h"
35 
36 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
37 #define	STRNEQ(p1, sz1, p2, sz2) \
38 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
39 
40 enum	eqn_tok {
41 	EQN_TOK_DYAD = 0,
42 	EQN_TOK_VEC,
43 	EQN_TOK_UNDER,
44 	EQN_TOK_BAR,
45 	EQN_TOK_TILDE,
46 	EQN_TOK_HAT,
47 	EQN_TOK_DOT,
48 	EQN_TOK_DOTDOT,
49 	EQN_TOK_FWD,
50 	EQN_TOK_BACK,
51 	EQN_TOK_DOWN,
52 	EQN_TOK_UP,
53 	EQN_TOK_FAT,
54 	EQN_TOK_ROMAN,
55 	EQN_TOK_ITALIC,
56 	EQN_TOK_BOLD,
57 	EQN_TOK_SIZE,
58 	EQN_TOK_SUB,
59 	EQN_TOK_SUP,
60 	EQN_TOK_SQRT,
61 	EQN_TOK_OVER,
62 	EQN_TOK_FROM,
63 	EQN_TOK_TO,
64 	EQN_TOK_BRACE_OPEN,
65 	EQN_TOK_BRACE_CLOSE,
66 	EQN_TOK_GSIZE,
67 	EQN_TOK_GFONT,
68 	EQN_TOK_MARK,
69 	EQN_TOK_LINEUP,
70 	EQN_TOK_LEFT,
71 	EQN_TOK_RIGHT,
72 	EQN_TOK_PILE,
73 	EQN_TOK_LPILE,
74 	EQN_TOK_RPILE,
75 	EQN_TOK_CPILE,
76 	EQN_TOK_MATRIX,
77 	EQN_TOK_CCOL,
78 	EQN_TOK_LCOL,
79 	EQN_TOK_RCOL,
80 	EQN_TOK_DELIM,
81 	EQN_TOK_DEFINE,
82 	EQN_TOK_TDEFINE,
83 	EQN_TOK_NDEFINE,
84 	EQN_TOK_UNDEF,
85 	EQN_TOK_ABOVE,
86 	EQN_TOK__MAX,
87 	EQN_TOK_FUNC,
88 	EQN_TOK_QUOTED,
89 	EQN_TOK_SYM,
90 	EQN_TOK_EOF
91 };
92 
93 static	const char *eqn_toks[EQN_TOK__MAX] = {
94 	"dyad", /* EQN_TOK_DYAD */
95 	"vec", /* EQN_TOK_VEC */
96 	"under", /* EQN_TOK_UNDER */
97 	"bar", /* EQN_TOK_BAR */
98 	"tilde", /* EQN_TOK_TILDE */
99 	"hat", /* EQN_TOK_HAT */
100 	"dot", /* EQN_TOK_DOT */
101 	"dotdot", /* EQN_TOK_DOTDOT */
102 	"fwd", /* EQN_TOK_FWD * */
103 	"back", /* EQN_TOK_BACK */
104 	"down", /* EQN_TOK_DOWN */
105 	"up", /* EQN_TOK_UP */
106 	"fat", /* EQN_TOK_FAT */
107 	"roman", /* EQN_TOK_ROMAN */
108 	"italic", /* EQN_TOK_ITALIC */
109 	"bold", /* EQN_TOK_BOLD */
110 	"size", /* EQN_TOK_SIZE */
111 	"sub", /* EQN_TOK_SUB */
112 	"sup", /* EQN_TOK_SUP */
113 	"sqrt", /* EQN_TOK_SQRT */
114 	"over", /* EQN_TOK_OVER */
115 	"from", /* EQN_TOK_FROM */
116 	"to", /* EQN_TOK_TO */
117 	"{", /* EQN_TOK_BRACE_OPEN */
118 	"}", /* EQN_TOK_BRACE_CLOSE */
119 	"gsize", /* EQN_TOK_GSIZE */
120 	"gfont", /* EQN_TOK_GFONT */
121 	"mark", /* EQN_TOK_MARK */
122 	"lineup", /* EQN_TOK_LINEUP */
123 	"left", /* EQN_TOK_LEFT */
124 	"right", /* EQN_TOK_RIGHT */
125 	"pile", /* EQN_TOK_PILE */
126 	"lpile", /* EQN_TOK_LPILE */
127 	"rpile", /* EQN_TOK_RPILE */
128 	"cpile", /* EQN_TOK_CPILE */
129 	"matrix", /* EQN_TOK_MATRIX */
130 	"ccol", /* EQN_TOK_CCOL */
131 	"lcol", /* EQN_TOK_LCOL */
132 	"rcol", /* EQN_TOK_RCOL */
133 	"delim", /* EQN_TOK_DELIM */
134 	"define", /* EQN_TOK_DEFINE */
135 	"tdefine", /* EQN_TOK_TDEFINE */
136 	"ndefine", /* EQN_TOK_NDEFINE */
137 	"undef", /* EQN_TOK_UNDEF */
138 	"above", /* EQN_TOK_ABOVE */
139 };
140 
141 static	const char *const eqn_func[] = {
142 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
143 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
144 	"if",	"lim",	"ln",	"log",	"max",	"min",
145 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
146 };
147 
148 enum	eqn_symt {
149 	EQNSYM_alpha = 0,
150 	EQNSYM_beta,
151 	EQNSYM_chi,
152 	EQNSYM_delta,
153 	EQNSYM_epsilon,
154 	EQNSYM_eta,
155 	EQNSYM_gamma,
156 	EQNSYM_iota,
157 	EQNSYM_kappa,
158 	EQNSYM_lambda,
159 	EQNSYM_mu,
160 	EQNSYM_nu,
161 	EQNSYM_omega,
162 	EQNSYM_omicron,
163 	EQNSYM_phi,
164 	EQNSYM_pi,
165 	EQNSYM_ps,
166 	EQNSYM_rho,
167 	EQNSYM_sigma,
168 	EQNSYM_tau,
169 	EQNSYM_theta,
170 	EQNSYM_upsilon,
171 	EQNSYM_xi,
172 	EQNSYM_zeta,
173 	EQNSYM_DELTA,
174 	EQNSYM_GAMMA,
175 	EQNSYM_LAMBDA,
176 	EQNSYM_OMEGA,
177 	EQNSYM_PHI,
178 	EQNSYM_PI,
179 	EQNSYM_PSI,
180 	EQNSYM_SIGMA,
181 	EQNSYM_THETA,
182 	EQNSYM_UPSILON,
183 	EQNSYM_XI,
184 	EQNSYM_inter,
185 	EQNSYM_union,
186 	EQNSYM_prod,
187 	EQNSYM_int,
188 	EQNSYM_sum,
189 	EQNSYM_grad,
190 	EQNSYM_del,
191 	EQNSYM_times,
192 	EQNSYM_cdot,
193 	EQNSYM_nothing,
194 	EQNSYM_approx,
195 	EQNSYM_prime,
196 	EQNSYM_half,
197 	EQNSYM_partial,
198 	EQNSYM_inf,
199 	EQNSYM_muchgreat,
200 	EQNSYM_muchless,
201 	EQNSYM_larrow,
202 	EQNSYM_rarrow,
203 	EQNSYM_pm,
204 	EQNSYM_nequal,
205 	EQNSYM_equiv,
206 	EQNSYM_lessequal,
207 	EQNSYM_moreequal,
208 	EQNSYM_minus,
209 	EQNSYM__MAX
210 };
211 
212 struct	eqnsym {
213 	const char	*str;
214 	const char	*sym;
215 };
216 
217 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
218 	{ "alpha", "*a" }, /* EQNSYM_alpha */
219 	{ "beta", "*b" }, /* EQNSYM_beta */
220 	{ "chi", "*x" }, /* EQNSYM_chi */
221 	{ "delta", "*d" }, /* EQNSYM_delta */
222 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
223 	{ "eta", "*y" }, /* EQNSYM_eta */
224 	{ "gamma", "*g" }, /* EQNSYM_gamma */
225 	{ "iota", "*i" }, /* EQNSYM_iota */
226 	{ "kappa", "*k" }, /* EQNSYM_kappa */
227 	{ "lambda", "*l" }, /* EQNSYM_lambda */
228 	{ "mu", "*m" }, /* EQNSYM_mu */
229 	{ "nu", "*n" }, /* EQNSYM_nu */
230 	{ "omega", "*w" }, /* EQNSYM_omega */
231 	{ "omicron", "*o" }, /* EQNSYM_omicron */
232 	{ "phi", "*f" }, /* EQNSYM_phi */
233 	{ "pi", "*p" }, /* EQNSYM_pi */
234 	{ "psi", "*q" }, /* EQNSYM_psi */
235 	{ "rho", "*r" }, /* EQNSYM_rho */
236 	{ "sigma", "*s" }, /* EQNSYM_sigma */
237 	{ "tau", "*t" }, /* EQNSYM_tau */
238 	{ "theta", "*h" }, /* EQNSYM_theta */
239 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
240 	{ "xi", "*c" }, /* EQNSYM_xi */
241 	{ "zeta", "*z" }, /* EQNSYM_zeta */
242 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
243 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
244 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
245 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
246 	{ "PHI", "*F" }, /* EQNSYM_PHI */
247 	{ "PI", "*P" }, /* EQNSYM_PI */
248 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
249 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
250 	{ "THETA", "*H" }, /* EQNSYM_THETA */
251 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
252 	{ "XI", "*C" }, /* EQNSYM_XI */
253 	{ "inter", "ca" }, /* EQNSYM_inter */
254 	{ "union", "cu" }, /* EQNSYM_union */
255 	{ "prod", "product" }, /* EQNSYM_prod */
256 	{ "int", "integral" }, /* EQNSYM_int */
257 	{ "sum", "sum" }, /* EQNSYM_sum */
258 	{ "grad", "gr" }, /* EQNSYM_grad */
259 	{ "del", "gr" }, /* EQNSYM_del */
260 	{ "times", "mu" }, /* EQNSYM_times */
261 	{ "cdot", "pc" }, /* EQNSYM_cdot */
262 	{ "nothing", "&" }, /* EQNSYM_nothing */
263 	{ "approx", "~~" }, /* EQNSYM_approx */
264 	{ "prime", "fm" }, /* EQNSYM_prime */
265 	{ "half", "12" }, /* EQNSYM_half */
266 	{ "partial", "pd" }, /* EQNSYM_partial */
267 	{ "inf", "if" }, /* EQNSYM_inf */
268 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
269 	{ "<<", "<<" }, /* EQNSYM_muchless */
270 	{ "<-", "<-" }, /* EQNSYM_larrow */
271 	{ "->", "->" }, /* EQNSYM_rarrow */
272 	{ "+-", "+-" }, /* EQNSYM_pm */
273 	{ "!=", "!=" }, /* EQNSYM_nequal */
274 	{ "==", "==" }, /* EQNSYM_equiv */
275 	{ "<=", "<=" }, /* EQNSYM_lessequal */
276 	{ ">=", ">=" }, /* EQNSYM_moreequal */
277 	{ "-", "mi" }, /* EQNSYM_minus */
278 };
279 
280 enum	parse_mode {
281 	MODE_QUOTED,
282 	MODE_NOSUB,
283 	MODE_SUB,
284 	MODE_TOK
285 };
286 
287 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
288 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
289 				struct eqn_box *);
290 static	void		 eqn_def(struct eqn_node *);
291 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
292 static	void		 eqn_delim(struct eqn_node *);
293 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
294 static	void		 eqn_undef(struct eqn_node *);
295 
296 
297 struct eqn_node *
298 eqn_alloc(struct mparse *parse)
299 {
300 	struct eqn_node *ep;
301 
302 	ep = mandoc_calloc(1, sizeof(*ep));
303 	ep->parse = parse;
304 	ep->gsize = EQN_DEFSIZE;
305 	return ep;
306 }
307 
308 void
309 eqn_reset(struct eqn_node *ep)
310 {
311 	free(ep->data);
312 	ep->data = ep->start = ep->end = NULL;
313 	ep->sz = ep->toksz = 0;
314 }
315 
316 void
317 eqn_read(struct eqn_node *ep, const char *p)
318 {
319 	char		*cp;
320 
321 	if (ep->data == NULL) {
322 		ep->sz = strlen(p);
323 		ep->data = mandoc_strdup(p);
324 	} else {
325 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
326 		free(ep->data);
327 		ep->data = cp;
328 	}
329 	ep->sz += 1;
330 }
331 
332 /*
333  * Find the key "key" of the give size within our eqn-defined values.
334  */
335 static struct eqn_def *
336 eqn_def_find(struct eqn_node *ep)
337 {
338 	int		 i;
339 
340 	for (i = 0; i < (int)ep->defsz; i++)
341 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
342 		    ep->defs[i].keysz, ep->start, ep->toksz))
343 			return &ep->defs[i];
344 
345 	return NULL;
346 }
347 
348 /*
349  * Parse a token from the input text.  The modes are:
350  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
351  *   before its next occurence.  Do not interpret the token in any
352  *   way and return EQN_TOK_QUOTED.  All other modes behave like
353  *   MODE_QUOTED when *ep->start is '"'.
354  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
355  *   otherwise, it ends before the next whitespace or brace.
356  *   Do not interpret the token and return EQN_TOK__MAX.
357  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
358  *   alias created with define.  If it is an alias, replace it with
359  *   its string value and reparse.
360  * MODE_TOK: Like MODE_SUB, but also check the token against the list
361  *   of tokens, and if there is a match, return that token.  Otherwise,
362  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
363  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
364  *   a token match, *ep->start is set to an allocated string that the
365  *   caller is expected to free.
366  * All modes skip whitespace following the end of the token.
367  */
368 static enum eqn_tok
369 eqn_next(struct eqn_node *ep, enum parse_mode mode)
370 {
371 	static int	 last_len, lim;
372 
373 	struct eqn_def	*def;
374 	size_t		 start;
375 	int		 diff, i, quoted;
376 	enum eqn_tok	 tok;
377 
378 	/*
379 	 * Reset the recursion counter after advancing
380 	 * beyond the end of the previous substitution.
381 	 */
382 	if (ep->end - ep->data >= last_len)
383 		lim = 0;
384 
385 	ep->start = ep->end;
386 	quoted = mode == MODE_QUOTED;
387 	for (;;) {
388 		switch (*ep->start) {
389 		case '\0':
390 			ep->toksz = 0;
391 			return EQN_TOK_EOF;
392 		case '"':
393 			quoted = 1;
394 			break;
395 		default:
396 			break;
397 		}
398 		if (quoted) {
399 			ep->end = strchr(ep->start + 1, *ep->start);
400 			ep->start++;  /* Skip opening quote. */
401 			if (ep->end == NULL) {
402 				mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
403 				    ep->node->line, ep->node->pos, NULL);
404 				ep->end = strchr(ep->start, '\0');
405 			}
406 		} else {
407 			ep->end = ep->start + 1;
408 			if (*ep->start != '{' && *ep->start != '}')
409 				ep->end += strcspn(ep->end, " ^~\"{}\t");
410 		}
411 		ep->toksz = ep->end - ep->start;
412 		if (quoted && *ep->end != '\0')
413 			ep->end++;  /* Skip closing quote. */
414 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
415 			ep->end++;
416 		if (quoted)  /* Cannot return, may have to strndup. */
417 			break;
418 		if (mode == MODE_NOSUB)
419 			return EQN_TOK__MAX;
420 		if ((def = eqn_def_find(ep)) == NULL)
421 			break;
422 		if (++lim > EQN_NEST_MAX) {
423 			mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
424 			    ep->node->line, ep->node->pos, NULL);
425 			return EQN_TOK_EOF;
426 		}
427 
428 		/* Replace a defined name with its string value. */
429 		if ((diff = def->valsz - ep->toksz) > 0) {
430 			start = ep->start - ep->data;
431 			ep->sz += diff;
432 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
433 			ep->start = ep->data + start;
434 		}
435 		if (diff)
436 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
437 			    strlen(ep->start + ep->toksz) + 1);
438 		memcpy(ep->start, def->val, def->valsz);
439 		last_len = ep->start - ep->data + def->valsz;
440 	}
441 	if (mode != MODE_TOK)
442 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
443 	if (quoted) {
444 		ep->start = mandoc_strndup(ep->start, ep->toksz);
445 		return EQN_TOK_QUOTED;
446 	}
447 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
448 		if (STRNEQ(ep->start, ep->toksz,
449 		    eqn_toks[tok], strlen(eqn_toks[tok])))
450 			return tok;
451 
452 	for (i = 0; i < EQNSYM__MAX; i++) {
453 		if (STRNEQ(ep->start, ep->toksz,
454 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
455 			mandoc_asprintf(&ep->start,
456 			    "\\[%s]", eqnsyms[i].sym);
457 			return EQN_TOK_SYM;
458 		}
459 	}
460 	ep->start = mandoc_strndup(ep->start, ep->toksz);
461 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
462 		if (STRNEQ(ep->start, ep->toksz,
463 		    eqn_func[i], strlen(eqn_func[i])))
464 			return EQN_TOK_FUNC;
465 	return EQN_TOK__MAX;
466 }
467 
468 void
469 eqn_box_free(struct eqn_box *bp)
470 {
471 
472 	if (bp->first)
473 		eqn_box_free(bp->first);
474 	if (bp->next)
475 		eqn_box_free(bp->next);
476 
477 	free(bp->text);
478 	free(bp->left);
479 	free(bp->right);
480 	free(bp->top);
481 	free(bp->bottom);
482 	free(bp);
483 }
484 
485 /*
486  * Allocate a box as the last child of the parent node.
487  */
488 static struct eqn_box *
489 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
490 {
491 	struct eqn_box	*bp;
492 
493 	bp = mandoc_calloc(1, sizeof(struct eqn_box));
494 	bp->parent = parent;
495 	bp->parent->args++;
496 	bp->expectargs = UINT_MAX;
497 	bp->font = bp->parent->font;
498 	bp->size = ep->gsize;
499 
500 	if (NULL != parent->first) {
501 		parent->last->next = bp;
502 		bp->prev = parent->last;
503 	} else
504 		parent->first = bp;
505 
506 	parent->last = bp;
507 	return bp;
508 }
509 
510 /*
511  * Reparent the current last node (of the current parent) under a new
512  * EQN_SUBEXPR as the first element.
513  * Then return the new parent.
514  * The new EQN_SUBEXPR will have a two-child limit.
515  */
516 static struct eqn_box *
517 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
518 {
519 	struct eqn_box	*b, *newb;
520 
521 	assert(NULL != parent->last);
522 	b = parent->last;
523 	if (parent->last == parent->first)
524 		parent->first = NULL;
525 	parent->args--;
526 	parent->last = b->prev;
527 	b->prev = NULL;
528 	newb = eqn_box_alloc(ep, parent);
529 	newb->type = EQN_SUBEXPR;
530 	newb->expectargs = 2;
531 	newb->args = 1;
532 	newb->first = newb->last = b;
533 	newb->first->next = NULL;
534 	b->parent = newb;
535 	return newb;
536 }
537 
538 /*
539  * Parse the "delim" control statement.
540  */
541 static void
542 eqn_delim(struct eqn_node *ep)
543 {
544 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
545 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
546 		    ep->node->line, ep->node->pos, "delim");
547 		if (ep->end[0] != '\0')
548 			ep->end++;
549 	} else if (strncmp(ep->end, "off", 3) == 0) {
550 		ep->delim = 0;
551 		ep->end += 3;
552 	} else if (strncmp(ep->end, "on", 2) == 0) {
553 		if (ep->odelim && ep->cdelim)
554 			ep->delim = 1;
555 		ep->end += 2;
556 	} else {
557 		ep->odelim = *ep->end++;
558 		ep->cdelim = *ep->end++;
559 		ep->delim = 1;
560 	}
561 }
562 
563 /*
564  * Undefine a previously-defined string.
565  */
566 static void
567 eqn_undef(struct eqn_node *ep)
568 {
569 	struct eqn_def	*def;
570 
571 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
572 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
573 		    ep->node->line, ep->node->pos, "undef");
574 		return;
575 	}
576 	if ((def = eqn_def_find(ep)) == NULL)
577 		return;
578 	free(def->key);
579 	free(def->val);
580 	def->key = def->val = NULL;
581 	def->keysz = def->valsz = 0;
582 }
583 
584 static void
585 eqn_def(struct eqn_node *ep)
586 {
587 	struct eqn_def	*def;
588 	int		 i;
589 
590 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
591 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
592 		    ep->node->line, ep->node->pos, "define");
593 		return;
594 	}
595 
596 	/*
597 	 * Search for a key that already exists.
598 	 * Create a new key if none is found.
599 	 */
600 	if ((def = eqn_def_find(ep)) == NULL) {
601 		/* Find holes in string array. */
602 		for (i = 0; i < (int)ep->defsz; i++)
603 			if (0 == ep->defs[i].keysz)
604 				break;
605 
606 		if (i == (int)ep->defsz) {
607 			ep->defsz++;
608 			ep->defs = mandoc_reallocarray(ep->defs,
609 			    ep->defsz, sizeof(struct eqn_def));
610 			ep->defs[i].key = ep->defs[i].val = NULL;
611 		}
612 
613 		def = ep->defs + i;
614 		free(def->key);
615 		def->key = mandoc_strndup(ep->start, ep->toksz);
616 		def->keysz = ep->toksz;
617 	}
618 
619 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
620 		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
621 		    ep->node->line, ep->node->pos, "define %s", def->key);
622 		free(def->key);
623 		free(def->val);
624 		def->key = def->val = NULL;
625 		def->keysz = def->valsz = 0;
626 		return;
627 	}
628 	free(def->val);
629 	def->val = mandoc_strndup(ep->start, ep->toksz);
630 	def->valsz = ep->toksz;
631 }
632 
633 void
634 eqn_parse(struct eqn_node *ep)
635 {
636 	struct eqn_box	*cur, *nbox, *parent, *split;
637 	const char	*cp, *cpn;
638 	char		*p;
639 	enum eqn_tok	 tok;
640 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
641 	int		 size;
642 
643 	parent = ep->node->eqn;
644 	assert(parent != NULL);
645 
646 	/*
647 	 * Empty equation.
648 	 * Do not add it to the high-level syntax tree.
649 	 */
650 
651 	if (ep->data == NULL)
652 		return;
653 
654 	ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
655 
656 next_tok:
657 	tok = eqn_next(ep, MODE_TOK);
658 	switch (tok) {
659 	case EQN_TOK_UNDEF:
660 		eqn_undef(ep);
661 		break;
662 	case EQN_TOK_NDEFINE:
663 	case EQN_TOK_DEFINE:
664 		eqn_def(ep);
665 		break;
666 	case EQN_TOK_TDEFINE:
667 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
668 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
669 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
670 			    ep->node->line, ep->node->pos, "tdefine");
671 		break;
672 	case EQN_TOK_DELIM:
673 		eqn_delim(ep);
674 		break;
675 	case EQN_TOK_GFONT:
676 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
677 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
678 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
679 		break;
680 	case EQN_TOK_MARK:
681 	case EQN_TOK_LINEUP:
682 		/* Ignore these. */
683 		break;
684 	case EQN_TOK_DYAD:
685 	case EQN_TOK_VEC:
686 	case EQN_TOK_UNDER:
687 	case EQN_TOK_BAR:
688 	case EQN_TOK_TILDE:
689 	case EQN_TOK_HAT:
690 	case EQN_TOK_DOT:
691 	case EQN_TOK_DOTDOT:
692 		if (parent->last == NULL) {
693 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
694 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
695 			cur = eqn_box_alloc(ep, parent);
696 			cur->type = EQN_TEXT;
697 			cur->text = mandoc_strdup("");
698 		}
699 		parent = eqn_box_makebinary(ep, parent);
700 		parent->type = EQN_LIST;
701 		parent->expectargs = 1;
702 		parent->font = EQNFONT_ROMAN;
703 		switch (tok) {
704 		case EQN_TOK_DOTDOT:
705 			parent->top = mandoc_strdup("\\[ad]");
706 			break;
707 		case EQN_TOK_VEC:
708 			parent->top = mandoc_strdup("\\[->]");
709 			break;
710 		case EQN_TOK_DYAD:
711 			parent->top = mandoc_strdup("\\[<>]");
712 			break;
713 		case EQN_TOK_TILDE:
714 			parent->top = mandoc_strdup("\\[a~]");
715 			break;
716 		case EQN_TOK_UNDER:
717 			parent->bottom = mandoc_strdup("\\[ul]");
718 			break;
719 		case EQN_TOK_BAR:
720 			parent->top = mandoc_strdup("\\[rn]");
721 			break;
722 		case EQN_TOK_DOT:
723 			parent->top = mandoc_strdup("\\[a.]");
724 			break;
725 		case EQN_TOK_HAT:
726 			parent->top = mandoc_strdup("\\[ha]");
727 			break;
728 		default:
729 			abort();
730 		}
731 		parent = parent->parent;
732 		break;
733 	case EQN_TOK_FWD:
734 	case EQN_TOK_BACK:
735 	case EQN_TOK_DOWN:
736 	case EQN_TOK_UP:
737 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
738 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
739 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
740 		break;
741 	case EQN_TOK_FAT:
742 	case EQN_TOK_ROMAN:
743 	case EQN_TOK_ITALIC:
744 	case EQN_TOK_BOLD:
745 		while (parent->args == parent->expectargs)
746 			parent = parent->parent;
747 		/*
748 		 * These values apply to the next word or sequence of
749 		 * words; thus, we mark that we'll have a child with
750 		 * exactly one of those.
751 		 */
752 		parent = eqn_box_alloc(ep, parent);
753 		parent->type = EQN_LIST;
754 		parent->expectargs = 1;
755 		switch (tok) {
756 		case EQN_TOK_FAT:
757 			parent->font = EQNFONT_FAT;
758 			break;
759 		case EQN_TOK_ROMAN:
760 			parent->font = EQNFONT_ROMAN;
761 			break;
762 		case EQN_TOK_ITALIC:
763 			parent->font = EQNFONT_ITALIC;
764 			break;
765 		case EQN_TOK_BOLD:
766 			parent->font = EQNFONT_BOLD;
767 			break;
768 		default:
769 			abort();
770 		}
771 		break;
772 	case EQN_TOK_SIZE:
773 	case EQN_TOK_GSIZE:
774 		/* Accept two values: integral size and a single. */
775 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
776 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
777 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
778 			break;
779 		}
780 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
781 		if (-1 == size) {
782 			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
783 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
784 			break;
785 		}
786 		if (EQN_TOK_GSIZE == tok) {
787 			ep->gsize = size;
788 			break;
789 		}
790 		while (parent->args == parent->expectargs)
791 			parent = parent->parent;
792 		parent = eqn_box_alloc(ep, parent);
793 		parent->type = EQN_LIST;
794 		parent->expectargs = 1;
795 		parent->size = size;
796 		break;
797 	case EQN_TOK_FROM:
798 	case EQN_TOK_TO:
799 	case EQN_TOK_SUB:
800 	case EQN_TOK_SUP:
801 		/*
802 		 * We have a left-right-associative expression.
803 		 * Repivot under a positional node, open a child scope
804 		 * and keep on reading.
805 		 */
806 		if (parent->last == NULL) {
807 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
808 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
809 			cur = eqn_box_alloc(ep, parent);
810 			cur->type = EQN_TEXT;
811 			cur->text = mandoc_strdup("");
812 		}
813 		while (parent->expectargs == 1 && parent->args == 1)
814 			parent = parent->parent;
815 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
816 			for (cur = parent; cur != NULL; cur = cur->parent)
817 				if (cur->pos == EQNPOS_SUB ||
818 				    cur->pos == EQNPOS_SUP ||
819 				    cur->pos == EQNPOS_SUBSUP ||
820 				    cur->pos == EQNPOS_SQRT ||
821 				    cur->pos == EQNPOS_OVER)
822 					break;
823 			if (cur != NULL)
824 				parent = cur->parent;
825 		}
826 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
827 			parent->expectargs = 3;
828 			parent->pos = EQNPOS_SUBSUP;
829 			break;
830 		}
831 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
832 			parent->expectargs = 3;
833 			parent->pos = EQNPOS_FROMTO;
834 			break;
835 		}
836 		parent = eqn_box_makebinary(ep, parent);
837 		switch (tok) {
838 		case EQN_TOK_FROM:
839 			parent->pos = EQNPOS_FROM;
840 			break;
841 		case EQN_TOK_TO:
842 			parent->pos = EQNPOS_TO;
843 			break;
844 		case EQN_TOK_SUP:
845 			parent->pos = EQNPOS_SUP;
846 			break;
847 		case EQN_TOK_SUB:
848 			parent->pos = EQNPOS_SUB;
849 			break;
850 		default:
851 			abort();
852 		}
853 		break;
854 	case EQN_TOK_SQRT:
855 		while (parent->args == parent->expectargs)
856 			parent = parent->parent;
857 		/*
858 		 * Accept a left-right-associative set of arguments just
859 		 * like sub and sup and friends but without rebalancing
860 		 * under a pivot.
861 		 */
862 		parent = eqn_box_alloc(ep, parent);
863 		parent->type = EQN_SUBEXPR;
864 		parent->pos = EQNPOS_SQRT;
865 		parent->expectargs = 1;
866 		break;
867 	case EQN_TOK_OVER:
868 		/*
869 		 * We have a right-left-associative fraction.
870 		 * Close out anything that's currently open, then
871 		 * rebalance and continue reading.
872 		 */
873 		if (parent->last == NULL) {
874 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
875 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
876 			cur = eqn_box_alloc(ep, parent);
877 			cur->type = EQN_TEXT;
878 			cur->text = mandoc_strdup("");
879 		}
880 		while (parent->args == parent->expectargs)
881 			parent = parent->parent;
882 		while (EQN_SUBEXPR == parent->type)
883 			parent = parent->parent;
884 		parent = eqn_box_makebinary(ep, parent);
885 		parent->pos = EQNPOS_OVER;
886 		break;
887 	case EQN_TOK_RIGHT:
888 	case EQN_TOK_BRACE_CLOSE:
889 		/*
890 		 * Close out the existing brace.
891 		 * FIXME: this is a shitty sentinel: we should really
892 		 * have a native EQN_BRACE type or whatnot.
893 		 */
894 		for (cur = parent; cur != NULL; cur = cur->parent)
895 			if (cur->type == EQN_LIST &&
896 			    cur->expectargs > 1 &&
897 			    (tok == EQN_TOK_BRACE_CLOSE ||
898 			     cur->left != NULL))
899 				break;
900 		if (cur == NULL) {
901 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
902 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
903 			break;
904 		}
905 		parent = cur;
906 		if (EQN_TOK_RIGHT == tok) {
907 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
908 				mandoc_msg(MANDOCERR_REQ_EMPTY,
909 				    ep->parse, ep->node->line,
910 				    ep->node->pos, eqn_toks[tok]);
911 				break;
912 			}
913 			/* Handling depends on right/left. */
914 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
915 				parent->right = mandoc_strdup("\\[rc]");
916 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
917 				parent->right = mandoc_strdup("\\[rf]");
918 			else
919 				parent->right =
920 				    mandoc_strndup(ep->start, ep->toksz);
921 		}
922 		parent = parent->parent;
923 		if (tok == EQN_TOK_BRACE_CLOSE &&
924 		    (parent->type == EQN_PILE ||
925 		     parent->type == EQN_MATRIX))
926 			parent = parent->parent;
927 		/* Close out any "singleton" lists. */
928 		while (parent->type == EQN_LIST &&
929 		    parent->expectargs == 1 &&
930 		    parent->args == 1)
931 			parent = parent->parent;
932 		break;
933 	case EQN_TOK_BRACE_OPEN:
934 	case EQN_TOK_LEFT:
935 		/*
936 		 * If we already have something in the stack and we're
937 		 * in an expression, then rewind til we're not any more
938 		 * (just like with the text node).
939 		 */
940 		while (parent->args == parent->expectargs)
941 			parent = parent->parent;
942 		if (EQN_TOK_LEFT == tok &&
943 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
944 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
945 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
946 			break;
947 		}
948 		parent = eqn_box_alloc(ep, parent);
949 		parent->type = EQN_LIST;
950 		if (EQN_TOK_LEFT == tok) {
951 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
952 				parent->left = mandoc_strdup("\\[lc]");
953 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
954 				parent->left = mandoc_strdup("\\[lf]");
955 			else
956 				parent->left =
957 				    mandoc_strndup(ep->start, ep->toksz);
958 		}
959 		break;
960 	case EQN_TOK_PILE:
961 	case EQN_TOK_LPILE:
962 	case EQN_TOK_RPILE:
963 	case EQN_TOK_CPILE:
964 	case EQN_TOK_CCOL:
965 	case EQN_TOK_LCOL:
966 	case EQN_TOK_RCOL:
967 		while (parent->args == parent->expectargs)
968 			parent = parent->parent;
969 		parent = eqn_box_alloc(ep, parent);
970 		parent->type = EQN_PILE;
971 		parent->expectargs = 1;
972 		break;
973 	case EQN_TOK_ABOVE:
974 		for (cur = parent; cur != NULL; cur = cur->parent)
975 			if (cur->type == EQN_PILE)
976 				break;
977 		if (cur == NULL) {
978 			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
979 			    ep->node->line, ep->node->pos, eqn_toks[tok]);
980 			break;
981 		}
982 		parent = eqn_box_alloc(ep, cur);
983 		parent->type = EQN_LIST;
984 		break;
985 	case EQN_TOK_MATRIX:
986 		while (parent->args == parent->expectargs)
987 			parent = parent->parent;
988 		parent = eqn_box_alloc(ep, parent);
989 		parent->type = EQN_MATRIX;
990 		parent->expectargs = 1;
991 		break;
992 	case EQN_TOK_EOF:
993 		return;
994 	case EQN_TOK__MAX:
995 	case EQN_TOK_FUNC:
996 	case EQN_TOK_QUOTED:
997 	case EQN_TOK_SYM:
998 		p = ep->start;
999 		assert(p != NULL);
1000 		/*
1001 		 * If we already have something in the stack and we're
1002 		 * in an expression, then rewind til we're not any more.
1003 		 */
1004 		while (parent->args == parent->expectargs)
1005 			parent = parent->parent;
1006 		cur = eqn_box_alloc(ep, parent);
1007 		cur->type = EQN_TEXT;
1008 		cur->text = p;
1009 		switch (tok) {
1010 		case EQN_TOK_FUNC:
1011 			cur->font = EQNFONT_ROMAN;
1012 			break;
1013 		case EQN_TOK_QUOTED:
1014 			if (cur->font == EQNFONT_NONE)
1015 				cur->font = EQNFONT_ITALIC;
1016 			break;
1017 		case EQN_TOK_SYM:
1018 			break;
1019 		default:
1020 			if (cur->font != EQNFONT_NONE || *p == '\0')
1021 				break;
1022 			cpn = p - 1;
1023 			ccln = CCL_LET;
1024 			split = NULL;
1025 			for (;;) {
1026 				/* Advance to next character. */
1027 				cp = cpn++;
1028 				ccl = ccln;
1029 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1030 				    isdigit((unsigned char)*cpn) ||
1031 				    (*cpn == '.' && (ccl == CCL_DIG ||
1032 				     isdigit((unsigned char)cpn[1]))) ?
1033 				    CCL_DIG : CCL_PUN;
1034 				/* No boundary before first character. */
1035 				if (cp < p)
1036 					continue;
1037 				cur->font = ccl == CCL_LET ?
1038 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1039 				if (*cp == '\\')
1040 					mandoc_escape(&cpn, NULL, NULL);
1041 				/* No boundary after last character. */
1042 				if (*cpn == '\0')
1043 					break;
1044 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1045 					continue;
1046 				/* Boundary found, split the text. */
1047 				if (parent->args == parent->expectargs) {
1048 					/* Remove the text from the tree. */
1049 					if (cur->prev == NULL)
1050 						parent->first = cur->next;
1051 					else
1052 						cur->prev->next = NULL;
1053 					parent->last = cur->prev;
1054 					parent->args--;
1055 					/* Set up a list instead. */
1056 					split = eqn_box_alloc(ep, parent);
1057 					split->type = EQN_LIST;
1058 					/* Insert the word into the list. */
1059 					split->first = split->last = cur;
1060 					cur->parent = split;
1061 					cur->prev = NULL;
1062 					parent = split;
1063 				}
1064 				/* Append a new text box. */
1065 				nbox = eqn_box_alloc(ep, parent);
1066 				nbox->type = EQN_TEXT;
1067 				nbox->text = mandoc_strdup(cpn);
1068 				/* Truncate the old box. */
1069 				p = mandoc_strndup(cur->text,
1070 				    cpn - cur->text);
1071 				free(cur->text);
1072 				cur->text = p;
1073 				/* Setup to process the new box. */
1074 				cur = nbox;
1075 				p = nbox->text;
1076 				cpn = p - 1;
1077 				ccln = CCL_LET;
1078 			}
1079 			if (split != NULL)
1080 				parent = split->parent;
1081 			break;
1082 		}
1083 		break;
1084 	default:
1085 		abort();
1086 	}
1087 	goto next_tok;
1088 }
1089 
1090 void
1091 eqn_free(struct eqn_node *p)
1092 {
1093 	int		 i;
1094 
1095 	for (i = 0; i < (int)p->defsz; i++) {
1096 		free(p->defs[i].key);
1097 		free(p->defs[i].val);
1098 	}
1099 
1100 	free(p->data);
1101 	free(p->defs);
1102 	free(p);
1103 }
1104