xref: /freebsd/contrib/mandoc/eqn.c (revision b2d2a78ad80ec68d4a17f5aef97d21686cb1e29b)
1 /* $Id: eqn.c,v 1.86 2023/04/28 19:11:03 schwarze Exp $ */
2 /*
3  * Copyright (c) 2014, 2015, 2017, 2018, 2020, 2022
4  *               Ingo Schwarze <schwarze@openbsd.org>
5  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 #include "config.h"
20 
21 #include <sys/types.h>
22 
23 #include <assert.h>
24 #include <ctype.h>
25 #include <limits.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 #include <time.h>
30 
31 #include "mandoc_aux.h"
32 #include "mandoc.h"
33 #include "roff.h"
34 #include "eqn.h"
35 #include "libmandoc.h"
36 #include "eqn_parse.h"
37 
38 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
39 #define	STRNEQ(p1, sz1, p2, sz2) \
40 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
41 
42 enum	eqn_tok {
43 	EQN_TOK_DYAD = 0,
44 	EQN_TOK_VEC,
45 	EQN_TOK_UNDER,
46 	EQN_TOK_BAR,
47 	EQN_TOK_TILDE,
48 	EQN_TOK_HAT,
49 	EQN_TOK_DOT,
50 	EQN_TOK_DOTDOT,
51 	EQN_TOK_FWD,
52 	EQN_TOK_BACK,
53 	EQN_TOK_DOWN,
54 	EQN_TOK_UP,
55 	EQN_TOK_FAT,
56 	EQN_TOK_ROMAN,
57 	EQN_TOK_ITALIC,
58 	EQN_TOK_BOLD,
59 	EQN_TOK_SIZE,
60 	EQN_TOK_SUB,
61 	EQN_TOK_SUP,
62 	EQN_TOK_SQRT,
63 	EQN_TOK_OVER,
64 	EQN_TOK_FROM,
65 	EQN_TOK_TO,
66 	EQN_TOK_BRACE_OPEN,
67 	EQN_TOK_BRACE_CLOSE,
68 	EQN_TOK_GSIZE,
69 	EQN_TOK_GFONT,
70 	EQN_TOK_MARK,
71 	EQN_TOK_LINEUP,
72 	EQN_TOK_LEFT,
73 	EQN_TOK_RIGHT,
74 	EQN_TOK_PILE,
75 	EQN_TOK_LPILE,
76 	EQN_TOK_RPILE,
77 	EQN_TOK_CPILE,
78 	EQN_TOK_MATRIX,
79 	EQN_TOK_CCOL,
80 	EQN_TOK_LCOL,
81 	EQN_TOK_RCOL,
82 	EQN_TOK_DELIM,
83 	EQN_TOK_DEFINE,
84 	EQN_TOK_TDEFINE,
85 	EQN_TOK_NDEFINE,
86 	EQN_TOK_UNDEF,
87 	EQN_TOK_ABOVE,
88 	EQN_TOK__MAX,
89 	EQN_TOK_FUNC,
90 	EQN_TOK_QUOTED,
91 	EQN_TOK_SYM,
92 	EQN_TOK_EOF
93 };
94 
95 static	const char *eqn_toks[EQN_TOK__MAX] = {
96 	"dyad", /* EQN_TOK_DYAD */
97 	"vec", /* EQN_TOK_VEC */
98 	"under", /* EQN_TOK_UNDER */
99 	"bar", /* EQN_TOK_BAR */
100 	"tilde", /* EQN_TOK_TILDE */
101 	"hat", /* EQN_TOK_HAT */
102 	"dot", /* EQN_TOK_DOT */
103 	"dotdot", /* EQN_TOK_DOTDOT */
104 	"fwd", /* EQN_TOK_FWD * */
105 	"back", /* EQN_TOK_BACK */
106 	"down", /* EQN_TOK_DOWN */
107 	"up", /* EQN_TOK_UP */
108 	"fat", /* EQN_TOK_FAT */
109 	"roman", /* EQN_TOK_ROMAN */
110 	"italic", /* EQN_TOK_ITALIC */
111 	"bold", /* EQN_TOK_BOLD */
112 	"size", /* EQN_TOK_SIZE */
113 	"sub", /* EQN_TOK_SUB */
114 	"sup", /* EQN_TOK_SUP */
115 	"sqrt", /* EQN_TOK_SQRT */
116 	"over", /* EQN_TOK_OVER */
117 	"from", /* EQN_TOK_FROM */
118 	"to", /* EQN_TOK_TO */
119 	"{", /* EQN_TOK_BRACE_OPEN */
120 	"}", /* EQN_TOK_BRACE_CLOSE */
121 	"gsize", /* EQN_TOK_GSIZE */
122 	"gfont", /* EQN_TOK_GFONT */
123 	"mark", /* EQN_TOK_MARK */
124 	"lineup", /* EQN_TOK_LINEUP */
125 	"left", /* EQN_TOK_LEFT */
126 	"right", /* EQN_TOK_RIGHT */
127 	"pile", /* EQN_TOK_PILE */
128 	"lpile", /* EQN_TOK_LPILE */
129 	"rpile", /* EQN_TOK_RPILE */
130 	"cpile", /* EQN_TOK_CPILE */
131 	"matrix", /* EQN_TOK_MATRIX */
132 	"ccol", /* EQN_TOK_CCOL */
133 	"lcol", /* EQN_TOK_LCOL */
134 	"rcol", /* EQN_TOK_RCOL */
135 	"delim", /* EQN_TOK_DELIM */
136 	"define", /* EQN_TOK_DEFINE */
137 	"tdefine", /* EQN_TOK_TDEFINE */
138 	"ndefine", /* EQN_TOK_NDEFINE */
139 	"undef", /* EQN_TOK_UNDEF */
140 	"above", /* EQN_TOK_ABOVE */
141 };
142 
143 static	const char *const eqn_func[] = {
144 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
145 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
146 	"if",	"lim",	"ln",	"log",	"max",	"min",
147 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
148 };
149 
150 enum	eqn_symt {
151 	EQNSYM_alpha = 0,
152 	EQNSYM_beta,
153 	EQNSYM_chi,
154 	EQNSYM_delta,
155 	EQNSYM_epsilon,
156 	EQNSYM_eta,
157 	EQNSYM_gamma,
158 	EQNSYM_iota,
159 	EQNSYM_kappa,
160 	EQNSYM_lambda,
161 	EQNSYM_mu,
162 	EQNSYM_nu,
163 	EQNSYM_omega,
164 	EQNSYM_omicron,
165 	EQNSYM_phi,
166 	EQNSYM_pi,
167 	EQNSYM_ps,
168 	EQNSYM_rho,
169 	EQNSYM_sigma,
170 	EQNSYM_tau,
171 	EQNSYM_theta,
172 	EQNSYM_upsilon,
173 	EQNSYM_xi,
174 	EQNSYM_zeta,
175 	EQNSYM_DELTA,
176 	EQNSYM_GAMMA,
177 	EQNSYM_LAMBDA,
178 	EQNSYM_OMEGA,
179 	EQNSYM_PHI,
180 	EQNSYM_PI,
181 	EQNSYM_PSI,
182 	EQNSYM_SIGMA,
183 	EQNSYM_THETA,
184 	EQNSYM_UPSILON,
185 	EQNSYM_XI,
186 	EQNSYM_inter,
187 	EQNSYM_union,
188 	EQNSYM_prod,
189 	EQNSYM_int,
190 	EQNSYM_sum,
191 	EQNSYM_grad,
192 	EQNSYM_del,
193 	EQNSYM_times,
194 	EQNSYM_cdot,
195 	EQNSYM_nothing,
196 	EQNSYM_approx,
197 	EQNSYM_prime,
198 	EQNSYM_half,
199 	EQNSYM_partial,
200 	EQNSYM_inf,
201 	EQNSYM_muchgreat,
202 	EQNSYM_muchless,
203 	EQNSYM_larrow,
204 	EQNSYM_rarrow,
205 	EQNSYM_pm,
206 	EQNSYM_nequal,
207 	EQNSYM_equiv,
208 	EQNSYM_lessequal,
209 	EQNSYM_moreequal,
210 	EQNSYM_minus,
211 	EQNSYM__MAX
212 };
213 
214 struct	eqnsym {
215 	const char	*str;
216 	const char	*sym;
217 };
218 
219 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
220 	{ "alpha", "*a" }, /* EQNSYM_alpha */
221 	{ "beta", "*b" }, /* EQNSYM_beta */
222 	{ "chi", "*x" }, /* EQNSYM_chi */
223 	{ "delta", "*d" }, /* EQNSYM_delta */
224 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
225 	{ "eta", "*y" }, /* EQNSYM_eta */
226 	{ "gamma", "*g" }, /* EQNSYM_gamma */
227 	{ "iota", "*i" }, /* EQNSYM_iota */
228 	{ "kappa", "*k" }, /* EQNSYM_kappa */
229 	{ "lambda", "*l" }, /* EQNSYM_lambda */
230 	{ "mu", "*m" }, /* EQNSYM_mu */
231 	{ "nu", "*n" }, /* EQNSYM_nu */
232 	{ "omega", "*w" }, /* EQNSYM_omega */
233 	{ "omicron", "*o" }, /* EQNSYM_omicron */
234 	{ "phi", "*f" }, /* EQNSYM_phi */
235 	{ "pi", "*p" }, /* EQNSYM_pi */
236 	{ "psi", "*q" }, /* EQNSYM_psi */
237 	{ "rho", "*r" }, /* EQNSYM_rho */
238 	{ "sigma", "*s" }, /* EQNSYM_sigma */
239 	{ "tau", "*t" }, /* EQNSYM_tau */
240 	{ "theta", "*h" }, /* EQNSYM_theta */
241 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
242 	{ "xi", "*c" }, /* EQNSYM_xi */
243 	{ "zeta", "*z" }, /* EQNSYM_zeta */
244 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
245 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
246 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
247 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
248 	{ "PHI", "*F" }, /* EQNSYM_PHI */
249 	{ "PI", "*P" }, /* EQNSYM_PI */
250 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
251 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
252 	{ "THETA", "*H" }, /* EQNSYM_THETA */
253 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
254 	{ "XI", "*C" }, /* EQNSYM_XI */
255 	{ "inter", "ca" }, /* EQNSYM_inter */
256 	{ "union", "cu" }, /* EQNSYM_union */
257 	{ "prod", "product" }, /* EQNSYM_prod */
258 	{ "int", "integral" }, /* EQNSYM_int */
259 	{ "sum", "sum" }, /* EQNSYM_sum */
260 	{ "grad", "gr" }, /* EQNSYM_grad */
261 	{ "del", "gr" }, /* EQNSYM_del */
262 	{ "times", "mu" }, /* EQNSYM_times */
263 	{ "cdot", "pc" }, /* EQNSYM_cdot */
264 	{ "nothing", "&" }, /* EQNSYM_nothing */
265 	{ "approx", "~~" }, /* EQNSYM_approx */
266 	{ "prime", "fm" }, /* EQNSYM_prime */
267 	{ "half", "12" }, /* EQNSYM_half */
268 	{ "partial", "pd" }, /* EQNSYM_partial */
269 	{ "inf", "if" }, /* EQNSYM_inf */
270 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
271 	{ "<<", "<<" }, /* EQNSYM_muchless */
272 	{ "<-", "<-" }, /* EQNSYM_larrow */
273 	{ "->", "->" }, /* EQNSYM_rarrow */
274 	{ "+-", "+-" }, /* EQNSYM_pm */
275 	{ "!=", "!=" }, /* EQNSYM_nequal */
276 	{ "==", "==" }, /* EQNSYM_equiv */
277 	{ "<=", "<=" }, /* EQNSYM_lessequal */
278 	{ ">=", ">=" }, /* EQNSYM_moreequal */
279 	{ "-", "mi" }, /* EQNSYM_minus */
280 };
281 
282 enum	parse_mode {
283 	MODE_QUOTED,
284 	MODE_NOSUB,
285 	MODE_SUB,
286 	MODE_TOK
287 };
288 
289 struct	eqn_def {
290 	char		 *key;
291 	size_t		  keysz;
292 	char		 *val;
293 	size_t		  valsz;
294 };
295 
296 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
297 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
298 				struct eqn_box *);
299 static	void		 eqn_def(struct eqn_node *);
300 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
301 static	void		 eqn_delim(struct eqn_node *);
302 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
303 static	void		 eqn_undef(struct eqn_node *);
304 
305 
306 struct eqn_node *
307 eqn_alloc(void)
308 {
309 	struct eqn_node *ep;
310 
311 	ep = mandoc_calloc(1, sizeof(*ep));
312 	ep->gsize = EQN_DEFSIZE;
313 	return ep;
314 }
315 
316 void
317 eqn_reset(struct eqn_node *ep)
318 {
319 	free(ep->data);
320 	ep->data = ep->start = ep->end = NULL;
321 	ep->sz = ep->toksz = 0;
322 }
323 
324 void
325 eqn_read(struct eqn_node *ep, const char *p)
326 {
327 	char		*cp;
328 
329 	if (ep->data == NULL) {
330 		ep->sz = strlen(p);
331 		ep->data = mandoc_strdup(p);
332 	} else {
333 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
334 		free(ep->data);
335 		ep->data = cp;
336 	}
337 	ep->sz += 1;
338 }
339 
340 /*
341  * Find the key "key" of the give size within our eqn-defined values.
342  */
343 static struct eqn_def *
344 eqn_def_find(struct eqn_node *ep)
345 {
346 	int		 i;
347 
348 	for (i = 0; i < (int)ep->defsz; i++)
349 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
350 		    ep->defs[i].keysz, ep->start, ep->toksz))
351 			return &ep->defs[i];
352 
353 	return NULL;
354 }
355 
356 /*
357  * Parse a token from the input text.  The modes are:
358  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
359  *   before its next occurrence.  Do not interpret the token in any
360  *   way and return EQN_TOK_QUOTED.  All other modes behave like
361  *   MODE_QUOTED when *ep->start is '"'.
362  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
363  *   otherwise, it ends before the next whitespace or brace.
364  *   Do not interpret the token and return EQN_TOK__MAX.
365  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
366  *   alias created with define.  If it is an alias, replace it with
367  *   its string value and reparse.
368  * MODE_TOK: Like MODE_SUB, but also check the token against the list
369  *   of tokens, and if there is a match, return that token.  Otherwise,
370  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
371  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
372  *   a token match, *ep->start is set to an allocated string that the
373  *   caller is expected to free.
374  * All modes skip whitespace following the end of the token.
375  */
376 static enum eqn_tok
377 eqn_next(struct eqn_node *ep, enum parse_mode mode)
378 {
379 	struct eqn_def	*def;
380 	size_t		 start;
381 	int		 diff, i, newlen, quoted;
382 	enum eqn_tok	 tok;
383 
384 	/*
385 	 * Reset the recursion counter after advancing
386 	 * beyond the end of the rightmost substitution.
387 	 */
388 	if (ep->end - ep->data >= ep->sublen)
389 		ep->subcnt = 0;
390 
391 	ep->start = ep->end;
392 	quoted = mode == MODE_QUOTED;
393 	for (;;) {
394 		switch (*ep->start) {
395 		case '\0':
396 			ep->toksz = 0;
397 			return EQN_TOK_EOF;
398 		case '"':
399 			quoted = 1;
400 			break;
401 		case ' ':
402 		case '\t':
403 		case '~':
404 		case '^':
405 			if (quoted)
406 				break;
407 			ep->start++;
408 			continue;
409 		default:
410 			break;
411 		}
412 		if (quoted) {
413 			ep->end = strchr(ep->start + 1, *ep->start);
414 			ep->start++;  /* Skip opening quote. */
415 			if (ep->end == NULL) {
416 				mandoc_msg(MANDOCERR_ARG_QUOTE,
417 				    ep->node->line, ep->node->pos, NULL);
418 				ep->end = strchr(ep->start, '\0');
419 			}
420 		} else {
421 			ep->end = ep->start + 1;
422 			if (*ep->start != '{' && *ep->start != '}')
423 				ep->end += strcspn(ep->end, " ^~\"{}\t");
424 		}
425 		ep->toksz = ep->end - ep->start;
426 		if (quoted && *ep->end != '\0')
427 			ep->end++;  /* Skip closing quote. */
428 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
429 			ep->end++;
430 		if (quoted)  /* Cannot return, may have to strndup. */
431 			break;
432 		if (mode == MODE_NOSUB)
433 			return EQN_TOK__MAX;
434 		if ((def = eqn_def_find(ep)) == NULL)
435 			break;
436 		if (++ep->subcnt > EQN_NEST_MAX) {
437 			mandoc_msg(MANDOCERR_ROFFLOOP,
438 			    ep->node->line, ep->node->pos, NULL);
439 			break;
440 		}
441 
442 		/* Replace a defined name with its string value. */
443 		if ((diff = def->valsz - ep->toksz) > 0) {
444 			start = ep->start - ep->data;
445 			ep->sz += diff;
446 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
447 			ep->start = ep->data + start;
448 			ep->sublen += diff;
449 		}
450 		if (diff)
451 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
452 			    strlen(ep->start + ep->toksz) + 1);
453 		memcpy(ep->start, def->val, def->valsz);
454 		newlen = ep->start - ep->data + def->valsz;
455 		if (ep->sublen < newlen)
456 			ep->sublen = newlen;
457 	}
458 	if (mode != MODE_TOK)
459 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
460 	if (quoted) {
461 		ep->start = mandoc_strndup(ep->start, ep->toksz);
462 		return EQN_TOK_QUOTED;
463 	}
464 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
465 		if (STRNEQ(ep->start, ep->toksz,
466 		    eqn_toks[tok], strlen(eqn_toks[tok])))
467 			return tok;
468 
469 	for (i = 0; i < EQNSYM__MAX; i++) {
470 		if (STRNEQ(ep->start, ep->toksz,
471 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
472 			mandoc_asprintf(&ep->start,
473 			    "\\[%s]", eqnsyms[i].sym);
474 			return EQN_TOK_SYM;
475 		}
476 	}
477 	ep->start = mandoc_strndup(ep->start, ep->toksz);
478 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
479 		if (STRNEQ(ep->start, ep->toksz,
480 		    eqn_func[i], strlen(eqn_func[i])))
481 			return EQN_TOK_FUNC;
482 	return EQN_TOK__MAX;
483 }
484 
485 void
486 eqn_box_free(struct eqn_box *bp)
487 {
488 	if (bp == NULL)
489 		return;
490 
491 	if (bp->first)
492 		eqn_box_free(bp->first);
493 	if (bp->next)
494 		eqn_box_free(bp->next);
495 
496 	free(bp->text);
497 	free(bp->left);
498 	free(bp->right);
499 	free(bp->top);
500 	free(bp->bottom);
501 	free(bp);
502 }
503 
504 struct eqn_box *
505 eqn_box_new(void)
506 {
507 	struct eqn_box	*bp;
508 
509 	bp = mandoc_calloc(1, sizeof(*bp));
510 	bp->expectargs = UINT_MAX;
511 	return bp;
512 }
513 
514 /*
515  * Allocate a box as the last child of the parent node.
516  */
517 static struct eqn_box *
518 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
519 {
520 	struct eqn_box	*bp;
521 
522 	bp = eqn_box_new();
523 	bp->parent = parent;
524 	bp->parent->args++;
525 	bp->font = bp->parent->font;
526 	bp->size = ep->gsize;
527 
528 	if (NULL != parent->first) {
529 		parent->last->next = bp;
530 		bp->prev = parent->last;
531 	} else
532 		parent->first = bp;
533 
534 	parent->last = bp;
535 	return bp;
536 }
537 
538 /*
539  * Reparent the current last node (of the current parent) under a new
540  * EQN_SUBEXPR as the first element.
541  * Then return the new parent.
542  * The new EQN_SUBEXPR will have a two-child limit.
543  */
544 static struct eqn_box *
545 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
546 {
547 	struct eqn_box	*b, *newb;
548 
549 	assert(NULL != parent->last);
550 	b = parent->last;
551 	if (parent->last == parent->first)
552 		parent->first = NULL;
553 	parent->args--;
554 	parent->last = b->prev;
555 	b->prev = NULL;
556 	newb = eqn_box_alloc(ep, parent);
557 	newb->type = EQN_SUBEXPR;
558 	newb->expectargs = 2;
559 	newb->args = 1;
560 	newb->first = newb->last = b;
561 	newb->first->next = NULL;
562 	b->parent = newb;
563 	return newb;
564 }
565 
566 /*
567  * Parse the "delim" control statement.
568  */
569 static void
570 eqn_delim(struct eqn_node *ep)
571 {
572 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
573 		mandoc_msg(MANDOCERR_REQ_EMPTY,
574 		    ep->node->line, ep->node->pos, "delim");
575 		if (ep->end[0] != '\0')
576 			ep->end++;
577 	} else if (strncmp(ep->end, "off", 3) == 0) {
578 		ep->delim = 0;
579 		ep->end += 3;
580 	} else if (strncmp(ep->end, "on", 2) == 0) {
581 		if (ep->odelim && ep->cdelim)
582 			ep->delim = 1;
583 		ep->end += 2;
584 	} else {
585 		ep->odelim = *ep->end++;
586 		ep->cdelim = *ep->end++;
587 		ep->delim = 1;
588 	}
589 }
590 
591 /*
592  * Undefine a previously-defined string.
593  */
594 static void
595 eqn_undef(struct eqn_node *ep)
596 {
597 	struct eqn_def	*def;
598 
599 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
600 		mandoc_msg(MANDOCERR_REQ_EMPTY,
601 		    ep->node->line, ep->node->pos, "undef");
602 		return;
603 	}
604 	if ((def = eqn_def_find(ep)) == NULL)
605 		return;
606 	free(def->key);
607 	free(def->val);
608 	def->key = def->val = NULL;
609 	def->keysz = def->valsz = 0;
610 }
611 
612 static void
613 eqn_def(struct eqn_node *ep)
614 {
615 	struct eqn_def	*def;
616 	int		 i;
617 
618 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
619 		mandoc_msg(MANDOCERR_REQ_EMPTY,
620 		    ep->node->line, ep->node->pos, "define");
621 		return;
622 	}
623 
624 	/*
625 	 * Search for a key that already exists.
626 	 * Create a new key if none is found.
627 	 */
628 	if ((def = eqn_def_find(ep)) == NULL) {
629 		/* Find holes in string array. */
630 		for (i = 0; i < (int)ep->defsz; i++)
631 			if (0 == ep->defs[i].keysz)
632 				break;
633 
634 		if (i == (int)ep->defsz) {
635 			ep->defsz++;
636 			ep->defs = mandoc_reallocarray(ep->defs,
637 			    ep->defsz, sizeof(struct eqn_def));
638 			ep->defs[i].key = ep->defs[i].val = NULL;
639 		}
640 
641 		def = ep->defs + i;
642 		free(def->key);
643 		def->key = mandoc_strndup(ep->start, ep->toksz);
644 		def->keysz = ep->toksz;
645 	}
646 
647 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
648 		mandoc_msg(MANDOCERR_REQ_EMPTY,
649 		    ep->node->line, ep->node->pos, "define %s", def->key);
650 		free(def->key);
651 		free(def->val);
652 		def->key = def->val = NULL;
653 		def->keysz = def->valsz = 0;
654 		return;
655 	}
656 	free(def->val);
657 	def->val = mandoc_strndup(ep->start, ep->toksz);
658 	def->valsz = ep->toksz;
659 }
660 
661 void
662 eqn_parse(struct eqn_node *ep)
663 {
664 	struct eqn_box	*cur, *nbox, *parent, *split;
665 	const char	*cp, *cpn;
666 	char		*p;
667 	enum eqn_tok	 tok;
668 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
669 	int		 size;
670 
671 	parent = ep->node->eqn;
672 	assert(parent != NULL);
673 
674 	/*
675 	 * Empty equation.
676 	 * Do not add it to the high-level syntax tree.
677 	 */
678 
679 	if (ep->data == NULL)
680 		return;
681 
682 	ep->start = ep->end = ep->data;
683 	ep->sublen = 0;
684 	ep->subcnt = 0;
685 
686 next_tok:
687 	tok = eqn_next(ep, MODE_TOK);
688 	switch (tok) {
689 	case EQN_TOK_UNDEF:
690 		eqn_undef(ep);
691 		break;
692 	case EQN_TOK_NDEFINE:
693 	case EQN_TOK_DEFINE:
694 		eqn_def(ep);
695 		break;
696 	case EQN_TOK_TDEFINE:
697 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
698 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
699 			mandoc_msg(MANDOCERR_REQ_EMPTY,
700 			    ep->node->line, ep->node->pos, "tdefine");
701 		break;
702 	case EQN_TOK_DELIM:
703 		eqn_delim(ep);
704 		break;
705 	case EQN_TOK_GFONT:
706 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
707 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
708 			    ep->node->pos, "%s", eqn_toks[tok]);
709 		break;
710 	case EQN_TOK_MARK:
711 	case EQN_TOK_LINEUP:
712 		/* Ignore these. */
713 		break;
714 	case EQN_TOK_DYAD:
715 	case EQN_TOK_VEC:
716 	case EQN_TOK_UNDER:
717 	case EQN_TOK_BAR:
718 	case EQN_TOK_TILDE:
719 	case EQN_TOK_HAT:
720 	case EQN_TOK_DOT:
721 	case EQN_TOK_DOTDOT:
722 		if (parent->last == NULL) {
723 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
724 			    ep->node->pos, "%s", eqn_toks[tok]);
725 			cur = eqn_box_alloc(ep, parent);
726 			cur->type = EQN_TEXT;
727 			cur->text = mandoc_strdup("");
728 		}
729 		parent = eqn_box_makebinary(ep, parent);
730 		parent->type = EQN_LIST;
731 		parent->expectargs = 1;
732 		parent->font = EQNFONT_ROMAN;
733 		switch (tok) {
734 		case EQN_TOK_DOTDOT:
735 			parent->top = mandoc_strdup("\\[ad]");
736 			break;
737 		case EQN_TOK_VEC:
738 			parent->top = mandoc_strdup("\\[->]");
739 			break;
740 		case EQN_TOK_DYAD:
741 			parent->top = mandoc_strdup("\\[<>]");
742 			break;
743 		case EQN_TOK_TILDE:
744 			parent->top = mandoc_strdup("\\[a~]");
745 			break;
746 		case EQN_TOK_UNDER:
747 			parent->bottom = mandoc_strdup("\\[ul]");
748 			break;
749 		case EQN_TOK_BAR:
750 			parent->top = mandoc_strdup("\\[rn]");
751 			break;
752 		case EQN_TOK_DOT:
753 			parent->top = mandoc_strdup("\\[a.]");
754 			break;
755 		case EQN_TOK_HAT:
756 			parent->top = mandoc_strdup("\\[ha]");
757 			break;
758 		default:
759 			abort();
760 		}
761 		parent = parent->parent;
762 		break;
763 	case EQN_TOK_FWD:
764 	case EQN_TOK_BACK:
765 	case EQN_TOK_DOWN:
766 	case EQN_TOK_UP:
767 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
768 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
769 			    ep->node->pos, "%s", eqn_toks[tok]);
770 		break;
771 	case EQN_TOK_FAT:
772 	case EQN_TOK_ROMAN:
773 	case EQN_TOK_ITALIC:
774 	case EQN_TOK_BOLD:
775 		while (parent->args == parent->expectargs)
776 			parent = parent->parent;
777 		/*
778 		 * These values apply to the next word or sequence of
779 		 * words; thus, we mark that we'll have a child with
780 		 * exactly one of those.
781 		 */
782 		parent = eqn_box_alloc(ep, parent);
783 		parent->type = EQN_LIST;
784 		parent->expectargs = 1;
785 		switch (tok) {
786 		case EQN_TOK_FAT:
787 			parent->font = EQNFONT_FAT;
788 			break;
789 		case EQN_TOK_ROMAN:
790 			parent->font = EQNFONT_ROMAN;
791 			break;
792 		case EQN_TOK_ITALIC:
793 			parent->font = EQNFONT_ITALIC;
794 			break;
795 		case EQN_TOK_BOLD:
796 			parent->font = EQNFONT_BOLD;
797 			break;
798 		default:
799 			abort();
800 		}
801 		break;
802 	case EQN_TOK_SIZE:
803 	case EQN_TOK_GSIZE:
804 		/* Accept two values: integral size and a single. */
805 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
806 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
807 			    ep->node->pos, "%s", eqn_toks[tok]);
808 			break;
809 		}
810 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
811 		if (-1 == size) {
812 			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
813 			    ep->node->pos, "%s", eqn_toks[tok]);
814 			break;
815 		}
816 		if (EQN_TOK_GSIZE == tok) {
817 			ep->gsize = size;
818 			break;
819 		}
820 		while (parent->args == parent->expectargs)
821 			parent = parent->parent;
822 		parent = eqn_box_alloc(ep, parent);
823 		parent->type = EQN_LIST;
824 		parent->expectargs = 1;
825 		parent->size = size;
826 		break;
827 	case EQN_TOK_FROM:
828 	case EQN_TOK_TO:
829 	case EQN_TOK_SUB:
830 	case EQN_TOK_SUP:
831 		/*
832 		 * We have a left-right-associative expression.
833 		 * Repivot under a positional node, open a child scope
834 		 * and keep on reading.
835 		 */
836 		if (parent->last == NULL) {
837 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
838 			    ep->node->pos, "%s", eqn_toks[tok]);
839 			cur = eqn_box_alloc(ep, parent);
840 			cur->type = EQN_TEXT;
841 			cur->text = mandoc_strdup("");
842 		}
843 		while (parent->expectargs == 1 && parent->args == 1)
844 			parent = parent->parent;
845 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
846 			for (cur = parent; cur != NULL; cur = cur->parent)
847 				if (cur->pos == EQNPOS_SUB ||
848 				    cur->pos == EQNPOS_SUP ||
849 				    cur->pos == EQNPOS_SUBSUP ||
850 				    cur->pos == EQNPOS_SQRT ||
851 				    cur->pos == EQNPOS_OVER)
852 					break;
853 			if (cur != NULL)
854 				parent = cur->parent;
855 		}
856 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
857 			parent->expectargs = 3;
858 			parent->pos = EQNPOS_SUBSUP;
859 			break;
860 		}
861 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
862 			parent->expectargs = 3;
863 			parent->pos = EQNPOS_FROMTO;
864 			break;
865 		}
866 		parent = eqn_box_makebinary(ep, parent);
867 		switch (tok) {
868 		case EQN_TOK_FROM:
869 			parent->pos = EQNPOS_FROM;
870 			break;
871 		case EQN_TOK_TO:
872 			parent->pos = EQNPOS_TO;
873 			break;
874 		case EQN_TOK_SUP:
875 			parent->pos = EQNPOS_SUP;
876 			break;
877 		case EQN_TOK_SUB:
878 			parent->pos = EQNPOS_SUB;
879 			break;
880 		default:
881 			abort();
882 		}
883 		break;
884 	case EQN_TOK_SQRT:
885 		while (parent->args == parent->expectargs)
886 			parent = parent->parent;
887 		/*
888 		 * Accept a left-right-associative set of arguments just
889 		 * like sub and sup and friends but without rebalancing
890 		 * under a pivot.
891 		 */
892 		parent = eqn_box_alloc(ep, parent);
893 		parent->type = EQN_SUBEXPR;
894 		parent->pos = EQNPOS_SQRT;
895 		parent->expectargs = 1;
896 		break;
897 	case EQN_TOK_OVER:
898 		/*
899 		 * We have a right-left-associative fraction.
900 		 * Close out anything that's currently open, then
901 		 * rebalance and continue reading.
902 		 */
903 		if (parent->last == NULL) {
904 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
905 			    ep->node->pos, "%s", eqn_toks[tok]);
906 			cur = eqn_box_alloc(ep, parent);
907 			cur->type = EQN_TEXT;
908 			cur->text = mandoc_strdup("");
909 		}
910 		while (parent->args == parent->expectargs)
911 			parent = parent->parent;
912 		while (EQN_SUBEXPR == parent->type)
913 			parent = parent->parent;
914 		parent = eqn_box_makebinary(ep, parent);
915 		parent->pos = EQNPOS_OVER;
916 		break;
917 	case EQN_TOK_RIGHT:
918 	case EQN_TOK_BRACE_CLOSE:
919 		/*
920 		 * Close out the existing brace.
921 		 * FIXME: this is a shitty sentinel: we should really
922 		 * have a native EQN_BRACE type or whatnot.
923 		 */
924 		for (cur = parent; cur != NULL; cur = cur->parent)
925 			if (cur->type == EQN_LIST &&
926 			    cur->expectargs > 1 &&
927 			    (tok == EQN_TOK_BRACE_CLOSE ||
928 			     cur->left != NULL))
929 				break;
930 		if (cur == NULL) {
931 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
932 			    ep->node->pos, "%s", eqn_toks[tok]);
933 			break;
934 		}
935 		parent = cur;
936 		if (EQN_TOK_RIGHT == tok) {
937 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
938 				mandoc_msg(MANDOCERR_REQ_EMPTY,
939 				    ep->node->line, ep->node->pos,
940 				    "%s", eqn_toks[tok]);
941 				break;
942 			}
943 			/* Handling depends on right/left. */
944 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
945 				parent->right = mandoc_strdup("\\[rc]");
946 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
947 				parent->right = mandoc_strdup("\\[rf]");
948 			else
949 				parent->right =
950 				    mandoc_strndup(ep->start, ep->toksz);
951 		}
952 		parent = parent->parent;
953 		if (tok == EQN_TOK_BRACE_CLOSE &&
954 		    (parent->type == EQN_PILE ||
955 		     parent->type == EQN_MATRIX))
956 			parent = parent->parent;
957 		/* Close out any "singleton" lists. */
958 		while (parent->type == EQN_LIST &&
959 		    parent->expectargs == 1 &&
960 		    parent->args == 1)
961 			parent = parent->parent;
962 		break;
963 	case EQN_TOK_BRACE_OPEN:
964 	case EQN_TOK_LEFT:
965 		/*
966 		 * If we already have something in the stack and we're
967 		 * in an expression, then rewind til we're not any more
968 		 * (just like with the text node).
969 		 */
970 		while (parent->args == parent->expectargs)
971 			parent = parent->parent;
972 		if (EQN_TOK_LEFT == tok &&
973 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
974 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
975 			    ep->node->pos, "%s", eqn_toks[tok]);
976 			break;
977 		}
978 		parent = eqn_box_alloc(ep, parent);
979 		parent->type = EQN_LIST;
980 		if (EQN_TOK_LEFT == tok) {
981 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
982 				parent->left = mandoc_strdup("\\[lc]");
983 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
984 				parent->left = mandoc_strdup("\\[lf]");
985 			else
986 				parent->left =
987 				    mandoc_strndup(ep->start, ep->toksz);
988 		}
989 		break;
990 	case EQN_TOK_PILE:
991 	case EQN_TOK_LPILE:
992 	case EQN_TOK_RPILE:
993 	case EQN_TOK_CPILE:
994 	case EQN_TOK_CCOL:
995 	case EQN_TOK_LCOL:
996 	case EQN_TOK_RCOL:
997 		while (parent->args == parent->expectargs)
998 			parent = parent->parent;
999 		parent = eqn_box_alloc(ep, parent);
1000 		parent->type = EQN_PILE;
1001 		parent->expectargs = 1;
1002 		break;
1003 	case EQN_TOK_ABOVE:
1004 		for (cur = parent; cur != NULL; cur = cur->parent)
1005 			if (cur->type == EQN_PILE)
1006 				break;
1007 		if (cur == NULL) {
1008 			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
1009 			    ep->node->pos, "%s", eqn_toks[tok]);
1010 			break;
1011 		}
1012 		parent = eqn_box_alloc(ep, cur);
1013 		parent->type = EQN_LIST;
1014 		break;
1015 	case EQN_TOK_MATRIX:
1016 		while (parent->args == parent->expectargs)
1017 			parent = parent->parent;
1018 		parent = eqn_box_alloc(ep, parent);
1019 		parent->type = EQN_MATRIX;
1020 		parent->expectargs = 1;
1021 		break;
1022 	case EQN_TOK_EOF:
1023 		return;
1024 	case EQN_TOK__MAX:
1025 	case EQN_TOK_FUNC:
1026 	case EQN_TOK_QUOTED:
1027 	case EQN_TOK_SYM:
1028 		p = ep->start;
1029 		assert(p != NULL);
1030 		/*
1031 		 * If we already have something in the stack and we're
1032 		 * in an expression, then rewind til we're not any more.
1033 		 */
1034 		while (parent->args == parent->expectargs)
1035 			parent = parent->parent;
1036 		cur = eqn_box_alloc(ep, parent);
1037 		cur->type = EQN_TEXT;
1038 		cur->text = p;
1039 		switch (tok) {
1040 		case EQN_TOK_FUNC:
1041 			cur->font = EQNFONT_ROMAN;
1042 			break;
1043 		case EQN_TOK_QUOTED:
1044 			if (cur->font == EQNFONT_NONE)
1045 				cur->font = EQNFONT_ITALIC;
1046 			break;
1047 		case EQN_TOK_SYM:
1048 			break;
1049 		default:
1050 			if (cur->font != EQNFONT_NONE || *p == '\0')
1051 				break;
1052 			cpn = p - 1;
1053 			ccln = CCL_LET;
1054 			split = NULL;
1055 			for (;;) {
1056 				/* Advance to next character. */
1057 				cp = cpn++;
1058 				ccl = ccln;
1059 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1060 				    isdigit((unsigned char)*cpn) ||
1061 				    (*cpn == '.' && (ccl == CCL_DIG ||
1062 				     isdigit((unsigned char)cpn[1]))) ?
1063 				    CCL_DIG : CCL_PUN;
1064 				/* No boundary before first character. */
1065 				if (cp < p)
1066 					continue;
1067 				cur->font = ccl == CCL_LET ?
1068 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1069 				if (*cp == '\\')
1070 					mandoc_escape(&cpn, NULL, NULL);
1071 				/* No boundary after last character. */
1072 				if (*cpn == '\0')
1073 					break;
1074 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1075 					continue;
1076 				/* Boundary found, split the text. */
1077 				if (parent->args == parent->expectargs) {
1078 					/* Remove the text from the tree. */
1079 					if (cur->prev == NULL)
1080 						parent->first = cur->next;
1081 					else
1082 						cur->prev->next = NULL;
1083 					parent->last = cur->prev;
1084 					parent->args--;
1085 					/* Set up a list instead. */
1086 					split = eqn_box_alloc(ep, parent);
1087 					split->type = EQN_LIST;
1088 					/* Insert the word into the list. */
1089 					split->first = split->last = cur;
1090 					cur->parent = split;
1091 					cur->prev = NULL;
1092 					parent = split;
1093 				}
1094 				/* Append a new text box. */
1095 				nbox = eqn_box_alloc(ep, parent);
1096 				nbox->type = EQN_TEXT;
1097 				nbox->text = mandoc_strdup(cpn);
1098 				/* Truncate the old box. */
1099 				p = mandoc_strndup(cur->text,
1100 				    cpn - cur->text);
1101 				free(cur->text);
1102 				cur->text = p;
1103 				/* Setup to process the new box. */
1104 				cur = nbox;
1105 				p = nbox->text;
1106 				cpn = p - 1;
1107 				ccln = CCL_LET;
1108 			}
1109 			if (split != NULL)
1110 				parent = split->parent;
1111 			break;
1112 		}
1113 		break;
1114 	default:
1115 		abort();
1116 	}
1117 	goto next_tok;
1118 }
1119 
1120 void
1121 eqn_free(struct eqn_node *p)
1122 {
1123 	int		 i;
1124 
1125 	if (p == NULL)
1126 		return;
1127 
1128 	for (i = 0; i < (int)p->defsz; i++) {
1129 		free(p->defs[i].key);
1130 		free(p->defs[i].val);
1131 	}
1132 
1133 	free(p->data);
1134 	free(p->defs);
1135 	free(p);
1136 }
1137