xref: /freebsd/contrib/mandoc/eqn.c (revision e7437ae907c89bf85a99c5cbb7ddd194a1ff1354)
1 /*	$Id: eqn.c,v 1.84 2020/01/08 12:16:24 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "eqn.h"
34 #include "libmandoc.h"
35 #include "eqn_parse.h"
36 
37 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
38 #define	STRNEQ(p1, sz1, p2, sz2) \
39 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
40 
41 enum	eqn_tok {
42 	EQN_TOK_DYAD = 0,
43 	EQN_TOK_VEC,
44 	EQN_TOK_UNDER,
45 	EQN_TOK_BAR,
46 	EQN_TOK_TILDE,
47 	EQN_TOK_HAT,
48 	EQN_TOK_DOT,
49 	EQN_TOK_DOTDOT,
50 	EQN_TOK_FWD,
51 	EQN_TOK_BACK,
52 	EQN_TOK_DOWN,
53 	EQN_TOK_UP,
54 	EQN_TOK_FAT,
55 	EQN_TOK_ROMAN,
56 	EQN_TOK_ITALIC,
57 	EQN_TOK_BOLD,
58 	EQN_TOK_SIZE,
59 	EQN_TOK_SUB,
60 	EQN_TOK_SUP,
61 	EQN_TOK_SQRT,
62 	EQN_TOK_OVER,
63 	EQN_TOK_FROM,
64 	EQN_TOK_TO,
65 	EQN_TOK_BRACE_OPEN,
66 	EQN_TOK_BRACE_CLOSE,
67 	EQN_TOK_GSIZE,
68 	EQN_TOK_GFONT,
69 	EQN_TOK_MARK,
70 	EQN_TOK_LINEUP,
71 	EQN_TOK_LEFT,
72 	EQN_TOK_RIGHT,
73 	EQN_TOK_PILE,
74 	EQN_TOK_LPILE,
75 	EQN_TOK_RPILE,
76 	EQN_TOK_CPILE,
77 	EQN_TOK_MATRIX,
78 	EQN_TOK_CCOL,
79 	EQN_TOK_LCOL,
80 	EQN_TOK_RCOL,
81 	EQN_TOK_DELIM,
82 	EQN_TOK_DEFINE,
83 	EQN_TOK_TDEFINE,
84 	EQN_TOK_NDEFINE,
85 	EQN_TOK_UNDEF,
86 	EQN_TOK_ABOVE,
87 	EQN_TOK__MAX,
88 	EQN_TOK_FUNC,
89 	EQN_TOK_QUOTED,
90 	EQN_TOK_SYM,
91 	EQN_TOK_EOF
92 };
93 
94 static	const char *eqn_toks[EQN_TOK__MAX] = {
95 	"dyad", /* EQN_TOK_DYAD */
96 	"vec", /* EQN_TOK_VEC */
97 	"under", /* EQN_TOK_UNDER */
98 	"bar", /* EQN_TOK_BAR */
99 	"tilde", /* EQN_TOK_TILDE */
100 	"hat", /* EQN_TOK_HAT */
101 	"dot", /* EQN_TOK_DOT */
102 	"dotdot", /* EQN_TOK_DOTDOT */
103 	"fwd", /* EQN_TOK_FWD * */
104 	"back", /* EQN_TOK_BACK */
105 	"down", /* EQN_TOK_DOWN */
106 	"up", /* EQN_TOK_UP */
107 	"fat", /* EQN_TOK_FAT */
108 	"roman", /* EQN_TOK_ROMAN */
109 	"italic", /* EQN_TOK_ITALIC */
110 	"bold", /* EQN_TOK_BOLD */
111 	"size", /* EQN_TOK_SIZE */
112 	"sub", /* EQN_TOK_SUB */
113 	"sup", /* EQN_TOK_SUP */
114 	"sqrt", /* EQN_TOK_SQRT */
115 	"over", /* EQN_TOK_OVER */
116 	"from", /* EQN_TOK_FROM */
117 	"to", /* EQN_TOK_TO */
118 	"{", /* EQN_TOK_BRACE_OPEN */
119 	"}", /* EQN_TOK_BRACE_CLOSE */
120 	"gsize", /* EQN_TOK_GSIZE */
121 	"gfont", /* EQN_TOK_GFONT */
122 	"mark", /* EQN_TOK_MARK */
123 	"lineup", /* EQN_TOK_LINEUP */
124 	"left", /* EQN_TOK_LEFT */
125 	"right", /* EQN_TOK_RIGHT */
126 	"pile", /* EQN_TOK_PILE */
127 	"lpile", /* EQN_TOK_LPILE */
128 	"rpile", /* EQN_TOK_RPILE */
129 	"cpile", /* EQN_TOK_CPILE */
130 	"matrix", /* EQN_TOK_MATRIX */
131 	"ccol", /* EQN_TOK_CCOL */
132 	"lcol", /* EQN_TOK_LCOL */
133 	"rcol", /* EQN_TOK_RCOL */
134 	"delim", /* EQN_TOK_DELIM */
135 	"define", /* EQN_TOK_DEFINE */
136 	"tdefine", /* EQN_TOK_TDEFINE */
137 	"ndefine", /* EQN_TOK_NDEFINE */
138 	"undef", /* EQN_TOK_UNDEF */
139 	"above", /* EQN_TOK_ABOVE */
140 };
141 
142 static	const char *const eqn_func[] = {
143 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
144 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
145 	"if",	"lim",	"ln",	"log",	"max",	"min",
146 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
147 };
148 
149 enum	eqn_symt {
150 	EQNSYM_alpha = 0,
151 	EQNSYM_beta,
152 	EQNSYM_chi,
153 	EQNSYM_delta,
154 	EQNSYM_epsilon,
155 	EQNSYM_eta,
156 	EQNSYM_gamma,
157 	EQNSYM_iota,
158 	EQNSYM_kappa,
159 	EQNSYM_lambda,
160 	EQNSYM_mu,
161 	EQNSYM_nu,
162 	EQNSYM_omega,
163 	EQNSYM_omicron,
164 	EQNSYM_phi,
165 	EQNSYM_pi,
166 	EQNSYM_ps,
167 	EQNSYM_rho,
168 	EQNSYM_sigma,
169 	EQNSYM_tau,
170 	EQNSYM_theta,
171 	EQNSYM_upsilon,
172 	EQNSYM_xi,
173 	EQNSYM_zeta,
174 	EQNSYM_DELTA,
175 	EQNSYM_GAMMA,
176 	EQNSYM_LAMBDA,
177 	EQNSYM_OMEGA,
178 	EQNSYM_PHI,
179 	EQNSYM_PI,
180 	EQNSYM_PSI,
181 	EQNSYM_SIGMA,
182 	EQNSYM_THETA,
183 	EQNSYM_UPSILON,
184 	EQNSYM_XI,
185 	EQNSYM_inter,
186 	EQNSYM_union,
187 	EQNSYM_prod,
188 	EQNSYM_int,
189 	EQNSYM_sum,
190 	EQNSYM_grad,
191 	EQNSYM_del,
192 	EQNSYM_times,
193 	EQNSYM_cdot,
194 	EQNSYM_nothing,
195 	EQNSYM_approx,
196 	EQNSYM_prime,
197 	EQNSYM_half,
198 	EQNSYM_partial,
199 	EQNSYM_inf,
200 	EQNSYM_muchgreat,
201 	EQNSYM_muchless,
202 	EQNSYM_larrow,
203 	EQNSYM_rarrow,
204 	EQNSYM_pm,
205 	EQNSYM_nequal,
206 	EQNSYM_equiv,
207 	EQNSYM_lessequal,
208 	EQNSYM_moreequal,
209 	EQNSYM_minus,
210 	EQNSYM__MAX
211 };
212 
213 struct	eqnsym {
214 	const char	*str;
215 	const char	*sym;
216 };
217 
218 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
219 	{ "alpha", "*a" }, /* EQNSYM_alpha */
220 	{ "beta", "*b" }, /* EQNSYM_beta */
221 	{ "chi", "*x" }, /* EQNSYM_chi */
222 	{ "delta", "*d" }, /* EQNSYM_delta */
223 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
224 	{ "eta", "*y" }, /* EQNSYM_eta */
225 	{ "gamma", "*g" }, /* EQNSYM_gamma */
226 	{ "iota", "*i" }, /* EQNSYM_iota */
227 	{ "kappa", "*k" }, /* EQNSYM_kappa */
228 	{ "lambda", "*l" }, /* EQNSYM_lambda */
229 	{ "mu", "*m" }, /* EQNSYM_mu */
230 	{ "nu", "*n" }, /* EQNSYM_nu */
231 	{ "omega", "*w" }, /* EQNSYM_omega */
232 	{ "omicron", "*o" }, /* EQNSYM_omicron */
233 	{ "phi", "*f" }, /* EQNSYM_phi */
234 	{ "pi", "*p" }, /* EQNSYM_pi */
235 	{ "psi", "*q" }, /* EQNSYM_psi */
236 	{ "rho", "*r" }, /* EQNSYM_rho */
237 	{ "sigma", "*s" }, /* EQNSYM_sigma */
238 	{ "tau", "*t" }, /* EQNSYM_tau */
239 	{ "theta", "*h" }, /* EQNSYM_theta */
240 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
241 	{ "xi", "*c" }, /* EQNSYM_xi */
242 	{ "zeta", "*z" }, /* EQNSYM_zeta */
243 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
244 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
245 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
246 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
247 	{ "PHI", "*F" }, /* EQNSYM_PHI */
248 	{ "PI", "*P" }, /* EQNSYM_PI */
249 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
250 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
251 	{ "THETA", "*H" }, /* EQNSYM_THETA */
252 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
253 	{ "XI", "*C" }, /* EQNSYM_XI */
254 	{ "inter", "ca" }, /* EQNSYM_inter */
255 	{ "union", "cu" }, /* EQNSYM_union */
256 	{ "prod", "product" }, /* EQNSYM_prod */
257 	{ "int", "integral" }, /* EQNSYM_int */
258 	{ "sum", "sum" }, /* EQNSYM_sum */
259 	{ "grad", "gr" }, /* EQNSYM_grad */
260 	{ "del", "gr" }, /* EQNSYM_del */
261 	{ "times", "mu" }, /* EQNSYM_times */
262 	{ "cdot", "pc" }, /* EQNSYM_cdot */
263 	{ "nothing", "&" }, /* EQNSYM_nothing */
264 	{ "approx", "~~" }, /* EQNSYM_approx */
265 	{ "prime", "fm" }, /* EQNSYM_prime */
266 	{ "half", "12" }, /* EQNSYM_half */
267 	{ "partial", "pd" }, /* EQNSYM_partial */
268 	{ "inf", "if" }, /* EQNSYM_inf */
269 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
270 	{ "<<", "<<" }, /* EQNSYM_muchless */
271 	{ "<-", "<-" }, /* EQNSYM_larrow */
272 	{ "->", "->" }, /* EQNSYM_rarrow */
273 	{ "+-", "+-" }, /* EQNSYM_pm */
274 	{ "!=", "!=" }, /* EQNSYM_nequal */
275 	{ "==", "==" }, /* EQNSYM_equiv */
276 	{ "<=", "<=" }, /* EQNSYM_lessequal */
277 	{ ">=", ">=" }, /* EQNSYM_moreequal */
278 	{ "-", "mi" }, /* EQNSYM_minus */
279 };
280 
281 enum	parse_mode {
282 	MODE_QUOTED,
283 	MODE_NOSUB,
284 	MODE_SUB,
285 	MODE_TOK
286 };
287 
288 struct	eqn_def {
289 	char		 *key;
290 	size_t		  keysz;
291 	char		 *val;
292 	size_t		  valsz;
293 };
294 
295 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
296 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
297 				struct eqn_box *);
298 static	void		 eqn_def(struct eqn_node *);
299 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
300 static	void		 eqn_delim(struct eqn_node *);
301 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
302 static	void		 eqn_undef(struct eqn_node *);
303 
304 
305 struct eqn_node *
306 eqn_alloc(void)
307 {
308 	struct eqn_node *ep;
309 
310 	ep = mandoc_calloc(1, sizeof(*ep));
311 	ep->gsize = EQN_DEFSIZE;
312 	return ep;
313 }
314 
315 void
316 eqn_reset(struct eqn_node *ep)
317 {
318 	free(ep->data);
319 	ep->data = ep->start = ep->end = NULL;
320 	ep->sz = ep->toksz = 0;
321 }
322 
323 void
324 eqn_read(struct eqn_node *ep, const char *p)
325 {
326 	char		*cp;
327 
328 	if (ep->data == NULL) {
329 		ep->sz = strlen(p);
330 		ep->data = mandoc_strdup(p);
331 	} else {
332 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
333 		free(ep->data);
334 		ep->data = cp;
335 	}
336 	ep->sz += 1;
337 }
338 
339 /*
340  * Find the key "key" of the give size within our eqn-defined values.
341  */
342 static struct eqn_def *
343 eqn_def_find(struct eqn_node *ep)
344 {
345 	int		 i;
346 
347 	for (i = 0; i < (int)ep->defsz; i++)
348 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
349 		    ep->defs[i].keysz, ep->start, ep->toksz))
350 			return &ep->defs[i];
351 
352 	return NULL;
353 }
354 
355 /*
356  * Parse a token from the input text.  The modes are:
357  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
358  *   before its next occurence.  Do not interpret the token in any
359  *   way and return EQN_TOK_QUOTED.  All other modes behave like
360  *   MODE_QUOTED when *ep->start is '"'.
361  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
362  *   otherwise, it ends before the next whitespace or brace.
363  *   Do not interpret the token and return EQN_TOK__MAX.
364  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
365  *   alias created with define.  If it is an alias, replace it with
366  *   its string value and reparse.
367  * MODE_TOK: Like MODE_SUB, but also check the token against the list
368  *   of tokens, and if there is a match, return that token.  Otherwise,
369  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
370  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
371  *   a token match, *ep->start is set to an allocated string that the
372  *   caller is expected to free.
373  * All modes skip whitespace following the end of the token.
374  */
375 static enum eqn_tok
376 eqn_next(struct eqn_node *ep, enum parse_mode mode)
377 {
378 	static int	 last_len, lim;
379 
380 	struct eqn_def	*def;
381 	size_t		 start;
382 	int		 diff, i, quoted;
383 	enum eqn_tok	 tok;
384 
385 	/*
386 	 * Reset the recursion counter after advancing
387 	 * beyond the end of the previous substitution.
388 	 */
389 	if (ep->end - ep->data >= last_len)
390 		lim = 0;
391 
392 	ep->start = ep->end;
393 	quoted = mode == MODE_QUOTED;
394 	for (;;) {
395 		switch (*ep->start) {
396 		case '\0':
397 			ep->toksz = 0;
398 			return EQN_TOK_EOF;
399 		case '"':
400 			quoted = 1;
401 			break;
402 		case ' ':
403 		case '\t':
404 		case '~':
405 		case '^':
406 			if (quoted)
407 				break;
408 			ep->start++;
409 			continue;
410 		default:
411 			break;
412 		}
413 		if (quoted) {
414 			ep->end = strchr(ep->start + 1, *ep->start);
415 			ep->start++;  /* Skip opening quote. */
416 			if (ep->end == NULL) {
417 				mandoc_msg(MANDOCERR_ARG_QUOTE,
418 				    ep->node->line, ep->node->pos, NULL);
419 				ep->end = strchr(ep->start, '\0');
420 			}
421 		} else {
422 			ep->end = ep->start + 1;
423 			if (*ep->start != '{' && *ep->start != '}')
424 				ep->end += strcspn(ep->end, " ^~\"{}\t");
425 		}
426 		ep->toksz = ep->end - ep->start;
427 		if (quoted && *ep->end != '\0')
428 			ep->end++;  /* Skip closing quote. */
429 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
430 			ep->end++;
431 		if (quoted)  /* Cannot return, may have to strndup. */
432 			break;
433 		if (mode == MODE_NOSUB)
434 			return EQN_TOK__MAX;
435 		if ((def = eqn_def_find(ep)) == NULL)
436 			break;
437 		if (++lim > EQN_NEST_MAX) {
438 			mandoc_msg(MANDOCERR_ROFFLOOP,
439 			    ep->node->line, ep->node->pos, NULL);
440 			return EQN_TOK_EOF;
441 		}
442 
443 		/* Replace a defined name with its string value. */
444 		if ((diff = def->valsz - ep->toksz) > 0) {
445 			start = ep->start - ep->data;
446 			ep->sz += diff;
447 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
448 			ep->start = ep->data + start;
449 		}
450 		if (diff)
451 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
452 			    strlen(ep->start + ep->toksz) + 1);
453 		memcpy(ep->start, def->val, def->valsz);
454 		last_len = ep->start - ep->data + def->valsz;
455 	}
456 	if (mode != MODE_TOK)
457 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
458 	if (quoted) {
459 		ep->start = mandoc_strndup(ep->start, ep->toksz);
460 		return EQN_TOK_QUOTED;
461 	}
462 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
463 		if (STRNEQ(ep->start, ep->toksz,
464 		    eqn_toks[tok], strlen(eqn_toks[tok])))
465 			return tok;
466 
467 	for (i = 0; i < EQNSYM__MAX; i++) {
468 		if (STRNEQ(ep->start, ep->toksz,
469 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
470 			mandoc_asprintf(&ep->start,
471 			    "\\[%s]", eqnsyms[i].sym);
472 			return EQN_TOK_SYM;
473 		}
474 	}
475 	ep->start = mandoc_strndup(ep->start, ep->toksz);
476 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
477 		if (STRNEQ(ep->start, ep->toksz,
478 		    eqn_func[i], strlen(eqn_func[i])))
479 			return EQN_TOK_FUNC;
480 	return EQN_TOK__MAX;
481 }
482 
483 void
484 eqn_box_free(struct eqn_box *bp)
485 {
486 	if (bp == NULL)
487 		return;
488 
489 	if (bp->first)
490 		eqn_box_free(bp->first);
491 	if (bp->next)
492 		eqn_box_free(bp->next);
493 
494 	free(bp->text);
495 	free(bp->left);
496 	free(bp->right);
497 	free(bp->top);
498 	free(bp->bottom);
499 	free(bp);
500 }
501 
502 struct eqn_box *
503 eqn_box_new(void)
504 {
505 	struct eqn_box	*bp;
506 
507 	bp = mandoc_calloc(1, sizeof(*bp));
508 	bp->expectargs = UINT_MAX;
509 	return bp;
510 }
511 
512 /*
513  * Allocate a box as the last child of the parent node.
514  */
515 static struct eqn_box *
516 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
517 {
518 	struct eqn_box	*bp;
519 
520 	bp = eqn_box_new();
521 	bp->parent = parent;
522 	bp->parent->args++;
523 	bp->font = bp->parent->font;
524 	bp->size = ep->gsize;
525 
526 	if (NULL != parent->first) {
527 		parent->last->next = bp;
528 		bp->prev = parent->last;
529 	} else
530 		parent->first = bp;
531 
532 	parent->last = bp;
533 	return bp;
534 }
535 
536 /*
537  * Reparent the current last node (of the current parent) under a new
538  * EQN_SUBEXPR as the first element.
539  * Then return the new parent.
540  * The new EQN_SUBEXPR will have a two-child limit.
541  */
542 static struct eqn_box *
543 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
544 {
545 	struct eqn_box	*b, *newb;
546 
547 	assert(NULL != parent->last);
548 	b = parent->last;
549 	if (parent->last == parent->first)
550 		parent->first = NULL;
551 	parent->args--;
552 	parent->last = b->prev;
553 	b->prev = NULL;
554 	newb = eqn_box_alloc(ep, parent);
555 	newb->type = EQN_SUBEXPR;
556 	newb->expectargs = 2;
557 	newb->args = 1;
558 	newb->first = newb->last = b;
559 	newb->first->next = NULL;
560 	b->parent = newb;
561 	return newb;
562 }
563 
564 /*
565  * Parse the "delim" control statement.
566  */
567 static void
568 eqn_delim(struct eqn_node *ep)
569 {
570 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
571 		mandoc_msg(MANDOCERR_REQ_EMPTY,
572 		    ep->node->line, ep->node->pos, "delim");
573 		if (ep->end[0] != '\0')
574 			ep->end++;
575 	} else if (strncmp(ep->end, "off", 3) == 0) {
576 		ep->delim = 0;
577 		ep->end += 3;
578 	} else if (strncmp(ep->end, "on", 2) == 0) {
579 		if (ep->odelim && ep->cdelim)
580 			ep->delim = 1;
581 		ep->end += 2;
582 	} else {
583 		ep->odelim = *ep->end++;
584 		ep->cdelim = *ep->end++;
585 		ep->delim = 1;
586 	}
587 }
588 
589 /*
590  * Undefine a previously-defined string.
591  */
592 static void
593 eqn_undef(struct eqn_node *ep)
594 {
595 	struct eqn_def	*def;
596 
597 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
598 		mandoc_msg(MANDOCERR_REQ_EMPTY,
599 		    ep->node->line, ep->node->pos, "undef");
600 		return;
601 	}
602 	if ((def = eqn_def_find(ep)) == NULL)
603 		return;
604 	free(def->key);
605 	free(def->val);
606 	def->key = def->val = NULL;
607 	def->keysz = def->valsz = 0;
608 }
609 
610 static void
611 eqn_def(struct eqn_node *ep)
612 {
613 	struct eqn_def	*def;
614 	int		 i;
615 
616 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
617 		mandoc_msg(MANDOCERR_REQ_EMPTY,
618 		    ep->node->line, ep->node->pos, "define");
619 		return;
620 	}
621 
622 	/*
623 	 * Search for a key that already exists.
624 	 * Create a new key if none is found.
625 	 */
626 	if ((def = eqn_def_find(ep)) == NULL) {
627 		/* Find holes in string array. */
628 		for (i = 0; i < (int)ep->defsz; i++)
629 			if (0 == ep->defs[i].keysz)
630 				break;
631 
632 		if (i == (int)ep->defsz) {
633 			ep->defsz++;
634 			ep->defs = mandoc_reallocarray(ep->defs,
635 			    ep->defsz, sizeof(struct eqn_def));
636 			ep->defs[i].key = ep->defs[i].val = NULL;
637 		}
638 
639 		def = ep->defs + i;
640 		free(def->key);
641 		def->key = mandoc_strndup(ep->start, ep->toksz);
642 		def->keysz = ep->toksz;
643 	}
644 
645 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
646 		mandoc_msg(MANDOCERR_REQ_EMPTY,
647 		    ep->node->line, ep->node->pos, "define %s", def->key);
648 		free(def->key);
649 		free(def->val);
650 		def->key = def->val = NULL;
651 		def->keysz = def->valsz = 0;
652 		return;
653 	}
654 	free(def->val);
655 	def->val = mandoc_strndup(ep->start, ep->toksz);
656 	def->valsz = ep->toksz;
657 }
658 
659 void
660 eqn_parse(struct eqn_node *ep)
661 {
662 	struct eqn_box	*cur, *nbox, *parent, *split;
663 	const char	*cp, *cpn;
664 	char		*p;
665 	enum eqn_tok	 tok;
666 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
667 	int		 size;
668 
669 	parent = ep->node->eqn;
670 	assert(parent != NULL);
671 
672 	/*
673 	 * Empty equation.
674 	 * Do not add it to the high-level syntax tree.
675 	 */
676 
677 	if (ep->data == NULL)
678 		return;
679 
680 	ep->start = ep->end = ep->data;
681 
682 next_tok:
683 	tok = eqn_next(ep, MODE_TOK);
684 	switch (tok) {
685 	case EQN_TOK_UNDEF:
686 		eqn_undef(ep);
687 		break;
688 	case EQN_TOK_NDEFINE:
689 	case EQN_TOK_DEFINE:
690 		eqn_def(ep);
691 		break;
692 	case EQN_TOK_TDEFINE:
693 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
694 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
695 			mandoc_msg(MANDOCERR_REQ_EMPTY,
696 			    ep->node->line, ep->node->pos, "tdefine");
697 		break;
698 	case EQN_TOK_DELIM:
699 		eqn_delim(ep);
700 		break;
701 	case EQN_TOK_GFONT:
702 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
703 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
704 			    ep->node->pos, "%s", eqn_toks[tok]);
705 		break;
706 	case EQN_TOK_MARK:
707 	case EQN_TOK_LINEUP:
708 		/* Ignore these. */
709 		break;
710 	case EQN_TOK_DYAD:
711 	case EQN_TOK_VEC:
712 	case EQN_TOK_UNDER:
713 	case EQN_TOK_BAR:
714 	case EQN_TOK_TILDE:
715 	case EQN_TOK_HAT:
716 	case EQN_TOK_DOT:
717 	case EQN_TOK_DOTDOT:
718 		if (parent->last == NULL) {
719 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
720 			    ep->node->pos, "%s", eqn_toks[tok]);
721 			cur = eqn_box_alloc(ep, parent);
722 			cur->type = EQN_TEXT;
723 			cur->text = mandoc_strdup("");
724 		}
725 		parent = eqn_box_makebinary(ep, parent);
726 		parent->type = EQN_LIST;
727 		parent->expectargs = 1;
728 		parent->font = EQNFONT_ROMAN;
729 		switch (tok) {
730 		case EQN_TOK_DOTDOT:
731 			parent->top = mandoc_strdup("\\[ad]");
732 			break;
733 		case EQN_TOK_VEC:
734 			parent->top = mandoc_strdup("\\[->]");
735 			break;
736 		case EQN_TOK_DYAD:
737 			parent->top = mandoc_strdup("\\[<>]");
738 			break;
739 		case EQN_TOK_TILDE:
740 			parent->top = mandoc_strdup("\\[a~]");
741 			break;
742 		case EQN_TOK_UNDER:
743 			parent->bottom = mandoc_strdup("\\[ul]");
744 			break;
745 		case EQN_TOK_BAR:
746 			parent->top = mandoc_strdup("\\[rn]");
747 			break;
748 		case EQN_TOK_DOT:
749 			parent->top = mandoc_strdup("\\[a.]");
750 			break;
751 		case EQN_TOK_HAT:
752 			parent->top = mandoc_strdup("\\[ha]");
753 			break;
754 		default:
755 			abort();
756 		}
757 		parent = parent->parent;
758 		break;
759 	case EQN_TOK_FWD:
760 	case EQN_TOK_BACK:
761 	case EQN_TOK_DOWN:
762 	case EQN_TOK_UP:
763 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
764 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
765 			    ep->node->pos, "%s", eqn_toks[tok]);
766 		break;
767 	case EQN_TOK_FAT:
768 	case EQN_TOK_ROMAN:
769 	case EQN_TOK_ITALIC:
770 	case EQN_TOK_BOLD:
771 		while (parent->args == parent->expectargs)
772 			parent = parent->parent;
773 		/*
774 		 * These values apply to the next word or sequence of
775 		 * words; thus, we mark that we'll have a child with
776 		 * exactly one of those.
777 		 */
778 		parent = eqn_box_alloc(ep, parent);
779 		parent->type = EQN_LIST;
780 		parent->expectargs = 1;
781 		switch (tok) {
782 		case EQN_TOK_FAT:
783 			parent->font = EQNFONT_FAT;
784 			break;
785 		case EQN_TOK_ROMAN:
786 			parent->font = EQNFONT_ROMAN;
787 			break;
788 		case EQN_TOK_ITALIC:
789 			parent->font = EQNFONT_ITALIC;
790 			break;
791 		case EQN_TOK_BOLD:
792 			parent->font = EQNFONT_BOLD;
793 			break;
794 		default:
795 			abort();
796 		}
797 		break;
798 	case EQN_TOK_SIZE:
799 	case EQN_TOK_GSIZE:
800 		/* Accept two values: integral size and a single. */
801 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
802 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
803 			    ep->node->pos, "%s", eqn_toks[tok]);
804 			break;
805 		}
806 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
807 		if (-1 == size) {
808 			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
809 			    ep->node->pos, "%s", eqn_toks[tok]);
810 			break;
811 		}
812 		if (EQN_TOK_GSIZE == tok) {
813 			ep->gsize = size;
814 			break;
815 		}
816 		while (parent->args == parent->expectargs)
817 			parent = parent->parent;
818 		parent = eqn_box_alloc(ep, parent);
819 		parent->type = EQN_LIST;
820 		parent->expectargs = 1;
821 		parent->size = size;
822 		break;
823 	case EQN_TOK_FROM:
824 	case EQN_TOK_TO:
825 	case EQN_TOK_SUB:
826 	case EQN_TOK_SUP:
827 		/*
828 		 * We have a left-right-associative expression.
829 		 * Repivot under a positional node, open a child scope
830 		 * and keep on reading.
831 		 */
832 		if (parent->last == NULL) {
833 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
834 			    ep->node->pos, "%s", eqn_toks[tok]);
835 			cur = eqn_box_alloc(ep, parent);
836 			cur->type = EQN_TEXT;
837 			cur->text = mandoc_strdup("");
838 		}
839 		while (parent->expectargs == 1 && parent->args == 1)
840 			parent = parent->parent;
841 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
842 			for (cur = parent; cur != NULL; cur = cur->parent)
843 				if (cur->pos == EQNPOS_SUB ||
844 				    cur->pos == EQNPOS_SUP ||
845 				    cur->pos == EQNPOS_SUBSUP ||
846 				    cur->pos == EQNPOS_SQRT ||
847 				    cur->pos == EQNPOS_OVER)
848 					break;
849 			if (cur != NULL)
850 				parent = cur->parent;
851 		}
852 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
853 			parent->expectargs = 3;
854 			parent->pos = EQNPOS_SUBSUP;
855 			break;
856 		}
857 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
858 			parent->expectargs = 3;
859 			parent->pos = EQNPOS_FROMTO;
860 			break;
861 		}
862 		parent = eqn_box_makebinary(ep, parent);
863 		switch (tok) {
864 		case EQN_TOK_FROM:
865 			parent->pos = EQNPOS_FROM;
866 			break;
867 		case EQN_TOK_TO:
868 			parent->pos = EQNPOS_TO;
869 			break;
870 		case EQN_TOK_SUP:
871 			parent->pos = EQNPOS_SUP;
872 			break;
873 		case EQN_TOK_SUB:
874 			parent->pos = EQNPOS_SUB;
875 			break;
876 		default:
877 			abort();
878 		}
879 		break;
880 	case EQN_TOK_SQRT:
881 		while (parent->args == parent->expectargs)
882 			parent = parent->parent;
883 		/*
884 		 * Accept a left-right-associative set of arguments just
885 		 * like sub and sup and friends but without rebalancing
886 		 * under a pivot.
887 		 */
888 		parent = eqn_box_alloc(ep, parent);
889 		parent->type = EQN_SUBEXPR;
890 		parent->pos = EQNPOS_SQRT;
891 		parent->expectargs = 1;
892 		break;
893 	case EQN_TOK_OVER:
894 		/*
895 		 * We have a right-left-associative fraction.
896 		 * Close out anything that's currently open, then
897 		 * rebalance and continue reading.
898 		 */
899 		if (parent->last == NULL) {
900 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
901 			    ep->node->pos, "%s", eqn_toks[tok]);
902 			cur = eqn_box_alloc(ep, parent);
903 			cur->type = EQN_TEXT;
904 			cur->text = mandoc_strdup("");
905 		}
906 		while (parent->args == parent->expectargs)
907 			parent = parent->parent;
908 		while (EQN_SUBEXPR == parent->type)
909 			parent = parent->parent;
910 		parent = eqn_box_makebinary(ep, parent);
911 		parent->pos = EQNPOS_OVER;
912 		break;
913 	case EQN_TOK_RIGHT:
914 	case EQN_TOK_BRACE_CLOSE:
915 		/*
916 		 * Close out the existing brace.
917 		 * FIXME: this is a shitty sentinel: we should really
918 		 * have a native EQN_BRACE type or whatnot.
919 		 */
920 		for (cur = parent; cur != NULL; cur = cur->parent)
921 			if (cur->type == EQN_LIST &&
922 			    cur->expectargs > 1 &&
923 			    (tok == EQN_TOK_BRACE_CLOSE ||
924 			     cur->left != NULL))
925 				break;
926 		if (cur == NULL) {
927 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
928 			    ep->node->pos, "%s", eqn_toks[tok]);
929 			break;
930 		}
931 		parent = cur;
932 		if (EQN_TOK_RIGHT == tok) {
933 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
934 				mandoc_msg(MANDOCERR_REQ_EMPTY,
935 				    ep->node->line, ep->node->pos,
936 				    "%s", eqn_toks[tok]);
937 				break;
938 			}
939 			/* Handling depends on right/left. */
940 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
941 				parent->right = mandoc_strdup("\\[rc]");
942 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
943 				parent->right = mandoc_strdup("\\[rf]");
944 			else
945 				parent->right =
946 				    mandoc_strndup(ep->start, ep->toksz);
947 		}
948 		parent = parent->parent;
949 		if (tok == EQN_TOK_BRACE_CLOSE &&
950 		    (parent->type == EQN_PILE ||
951 		     parent->type == EQN_MATRIX))
952 			parent = parent->parent;
953 		/* Close out any "singleton" lists. */
954 		while (parent->type == EQN_LIST &&
955 		    parent->expectargs == 1 &&
956 		    parent->args == 1)
957 			parent = parent->parent;
958 		break;
959 	case EQN_TOK_BRACE_OPEN:
960 	case EQN_TOK_LEFT:
961 		/*
962 		 * If we already have something in the stack and we're
963 		 * in an expression, then rewind til we're not any more
964 		 * (just like with the text node).
965 		 */
966 		while (parent->args == parent->expectargs)
967 			parent = parent->parent;
968 		if (EQN_TOK_LEFT == tok &&
969 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
970 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
971 			    ep->node->pos, "%s", eqn_toks[tok]);
972 			break;
973 		}
974 		parent = eqn_box_alloc(ep, parent);
975 		parent->type = EQN_LIST;
976 		if (EQN_TOK_LEFT == tok) {
977 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
978 				parent->left = mandoc_strdup("\\[lc]");
979 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
980 				parent->left = mandoc_strdup("\\[lf]");
981 			else
982 				parent->left =
983 				    mandoc_strndup(ep->start, ep->toksz);
984 		}
985 		break;
986 	case EQN_TOK_PILE:
987 	case EQN_TOK_LPILE:
988 	case EQN_TOK_RPILE:
989 	case EQN_TOK_CPILE:
990 	case EQN_TOK_CCOL:
991 	case EQN_TOK_LCOL:
992 	case EQN_TOK_RCOL:
993 		while (parent->args == parent->expectargs)
994 			parent = parent->parent;
995 		parent = eqn_box_alloc(ep, parent);
996 		parent->type = EQN_PILE;
997 		parent->expectargs = 1;
998 		break;
999 	case EQN_TOK_ABOVE:
1000 		for (cur = parent; cur != NULL; cur = cur->parent)
1001 			if (cur->type == EQN_PILE)
1002 				break;
1003 		if (cur == NULL) {
1004 			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
1005 			    ep->node->pos, "%s", eqn_toks[tok]);
1006 			break;
1007 		}
1008 		parent = eqn_box_alloc(ep, cur);
1009 		parent->type = EQN_LIST;
1010 		break;
1011 	case EQN_TOK_MATRIX:
1012 		while (parent->args == parent->expectargs)
1013 			parent = parent->parent;
1014 		parent = eqn_box_alloc(ep, parent);
1015 		parent->type = EQN_MATRIX;
1016 		parent->expectargs = 1;
1017 		break;
1018 	case EQN_TOK_EOF:
1019 		return;
1020 	case EQN_TOK__MAX:
1021 	case EQN_TOK_FUNC:
1022 	case EQN_TOK_QUOTED:
1023 	case EQN_TOK_SYM:
1024 		p = ep->start;
1025 		assert(p != NULL);
1026 		/*
1027 		 * If we already have something in the stack and we're
1028 		 * in an expression, then rewind til we're not any more.
1029 		 */
1030 		while (parent->args == parent->expectargs)
1031 			parent = parent->parent;
1032 		cur = eqn_box_alloc(ep, parent);
1033 		cur->type = EQN_TEXT;
1034 		cur->text = p;
1035 		switch (tok) {
1036 		case EQN_TOK_FUNC:
1037 			cur->font = EQNFONT_ROMAN;
1038 			break;
1039 		case EQN_TOK_QUOTED:
1040 			if (cur->font == EQNFONT_NONE)
1041 				cur->font = EQNFONT_ITALIC;
1042 			break;
1043 		case EQN_TOK_SYM:
1044 			break;
1045 		default:
1046 			if (cur->font != EQNFONT_NONE || *p == '\0')
1047 				break;
1048 			cpn = p - 1;
1049 			ccln = CCL_LET;
1050 			split = NULL;
1051 			for (;;) {
1052 				/* Advance to next character. */
1053 				cp = cpn++;
1054 				ccl = ccln;
1055 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1056 				    isdigit((unsigned char)*cpn) ||
1057 				    (*cpn == '.' && (ccl == CCL_DIG ||
1058 				     isdigit((unsigned char)cpn[1]))) ?
1059 				    CCL_DIG : CCL_PUN;
1060 				/* No boundary before first character. */
1061 				if (cp < p)
1062 					continue;
1063 				cur->font = ccl == CCL_LET ?
1064 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1065 				if (*cp == '\\')
1066 					mandoc_escape(&cpn, NULL, NULL);
1067 				/* No boundary after last character. */
1068 				if (*cpn == '\0')
1069 					break;
1070 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1071 					continue;
1072 				/* Boundary found, split the text. */
1073 				if (parent->args == parent->expectargs) {
1074 					/* Remove the text from the tree. */
1075 					if (cur->prev == NULL)
1076 						parent->first = cur->next;
1077 					else
1078 						cur->prev->next = NULL;
1079 					parent->last = cur->prev;
1080 					parent->args--;
1081 					/* Set up a list instead. */
1082 					split = eqn_box_alloc(ep, parent);
1083 					split->type = EQN_LIST;
1084 					/* Insert the word into the list. */
1085 					split->first = split->last = cur;
1086 					cur->parent = split;
1087 					cur->prev = NULL;
1088 					parent = split;
1089 				}
1090 				/* Append a new text box. */
1091 				nbox = eqn_box_alloc(ep, parent);
1092 				nbox->type = EQN_TEXT;
1093 				nbox->text = mandoc_strdup(cpn);
1094 				/* Truncate the old box. */
1095 				p = mandoc_strndup(cur->text,
1096 				    cpn - cur->text);
1097 				free(cur->text);
1098 				cur->text = p;
1099 				/* Setup to process the new box. */
1100 				cur = nbox;
1101 				p = nbox->text;
1102 				cpn = p - 1;
1103 				ccln = CCL_LET;
1104 			}
1105 			if (split != NULL)
1106 				parent = split->parent;
1107 			break;
1108 		}
1109 		break;
1110 	default:
1111 		abort();
1112 	}
1113 	goto next_tok;
1114 }
1115 
1116 void
1117 eqn_free(struct eqn_node *p)
1118 {
1119 	int		 i;
1120 
1121 	if (p == NULL)
1122 		return;
1123 
1124 	for (i = 0; i < (int)p->defsz; i++) {
1125 		free(p->defs[i].key);
1126 		free(p->defs[i].val);
1127 	}
1128 
1129 	free(p->data);
1130 	free(p->defs);
1131 	free(p);
1132 }
1133