xref: /illumos-gate/usr/src/cmd/mandoc/eqn.c (revision eb6b10e69fa5ba733da194d3ad71a0e63338be29)
1 /*	$Id: eqn.c,v 1.83 2018/12/14 06:33:14 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015, 2017, 2018 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <ctype.h>
24 #include <limits.h>
25 #include <stdio.h>
26 #include <stdlib.h>
27 #include <string.h>
28 #include <time.h>
29 
30 #include "mandoc_aux.h"
31 #include "mandoc.h"
32 #include "roff.h"
33 #include "eqn.h"
34 #include "libmandoc.h"
35 #include "eqn_parse.h"
36 
37 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
38 #define	STRNEQ(p1, sz1, p2, sz2) \
39 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
40 
41 enum	eqn_tok {
42 	EQN_TOK_DYAD = 0,
43 	EQN_TOK_VEC,
44 	EQN_TOK_UNDER,
45 	EQN_TOK_BAR,
46 	EQN_TOK_TILDE,
47 	EQN_TOK_HAT,
48 	EQN_TOK_DOT,
49 	EQN_TOK_DOTDOT,
50 	EQN_TOK_FWD,
51 	EQN_TOK_BACK,
52 	EQN_TOK_DOWN,
53 	EQN_TOK_UP,
54 	EQN_TOK_FAT,
55 	EQN_TOK_ROMAN,
56 	EQN_TOK_ITALIC,
57 	EQN_TOK_BOLD,
58 	EQN_TOK_SIZE,
59 	EQN_TOK_SUB,
60 	EQN_TOK_SUP,
61 	EQN_TOK_SQRT,
62 	EQN_TOK_OVER,
63 	EQN_TOK_FROM,
64 	EQN_TOK_TO,
65 	EQN_TOK_BRACE_OPEN,
66 	EQN_TOK_BRACE_CLOSE,
67 	EQN_TOK_GSIZE,
68 	EQN_TOK_GFONT,
69 	EQN_TOK_MARK,
70 	EQN_TOK_LINEUP,
71 	EQN_TOK_LEFT,
72 	EQN_TOK_RIGHT,
73 	EQN_TOK_PILE,
74 	EQN_TOK_LPILE,
75 	EQN_TOK_RPILE,
76 	EQN_TOK_CPILE,
77 	EQN_TOK_MATRIX,
78 	EQN_TOK_CCOL,
79 	EQN_TOK_LCOL,
80 	EQN_TOK_RCOL,
81 	EQN_TOK_DELIM,
82 	EQN_TOK_DEFINE,
83 	EQN_TOK_TDEFINE,
84 	EQN_TOK_NDEFINE,
85 	EQN_TOK_UNDEF,
86 	EQN_TOK_ABOVE,
87 	EQN_TOK__MAX,
88 	EQN_TOK_FUNC,
89 	EQN_TOK_QUOTED,
90 	EQN_TOK_SYM,
91 	EQN_TOK_EOF
92 };
93 
94 static	const char *eqn_toks[EQN_TOK__MAX] = {
95 	"dyad", /* EQN_TOK_DYAD */
96 	"vec", /* EQN_TOK_VEC */
97 	"under", /* EQN_TOK_UNDER */
98 	"bar", /* EQN_TOK_BAR */
99 	"tilde", /* EQN_TOK_TILDE */
100 	"hat", /* EQN_TOK_HAT */
101 	"dot", /* EQN_TOK_DOT */
102 	"dotdot", /* EQN_TOK_DOTDOT */
103 	"fwd", /* EQN_TOK_FWD * */
104 	"back", /* EQN_TOK_BACK */
105 	"down", /* EQN_TOK_DOWN */
106 	"up", /* EQN_TOK_UP */
107 	"fat", /* EQN_TOK_FAT */
108 	"roman", /* EQN_TOK_ROMAN */
109 	"italic", /* EQN_TOK_ITALIC */
110 	"bold", /* EQN_TOK_BOLD */
111 	"size", /* EQN_TOK_SIZE */
112 	"sub", /* EQN_TOK_SUB */
113 	"sup", /* EQN_TOK_SUP */
114 	"sqrt", /* EQN_TOK_SQRT */
115 	"over", /* EQN_TOK_OVER */
116 	"from", /* EQN_TOK_FROM */
117 	"to", /* EQN_TOK_TO */
118 	"{", /* EQN_TOK_BRACE_OPEN */
119 	"}", /* EQN_TOK_BRACE_CLOSE */
120 	"gsize", /* EQN_TOK_GSIZE */
121 	"gfont", /* EQN_TOK_GFONT */
122 	"mark", /* EQN_TOK_MARK */
123 	"lineup", /* EQN_TOK_LINEUP */
124 	"left", /* EQN_TOK_LEFT */
125 	"right", /* EQN_TOK_RIGHT */
126 	"pile", /* EQN_TOK_PILE */
127 	"lpile", /* EQN_TOK_LPILE */
128 	"rpile", /* EQN_TOK_RPILE */
129 	"cpile", /* EQN_TOK_CPILE */
130 	"matrix", /* EQN_TOK_MATRIX */
131 	"ccol", /* EQN_TOK_CCOL */
132 	"lcol", /* EQN_TOK_LCOL */
133 	"rcol", /* EQN_TOK_RCOL */
134 	"delim", /* EQN_TOK_DELIM */
135 	"define", /* EQN_TOK_DEFINE */
136 	"tdefine", /* EQN_TOK_TDEFINE */
137 	"ndefine", /* EQN_TOK_NDEFINE */
138 	"undef", /* EQN_TOK_UNDEF */
139 	"above", /* EQN_TOK_ABOVE */
140 };
141 
142 static	const char *const eqn_func[] = {
143 	"acos",	"acsc",	"and",	"arc",	"asec",	"asin", "atan",
144 	"cos",	"cosh", "coth",	"csc",	"det",	"exp",	"for",
145 	"if",	"lim",	"ln",	"log",	"max",	"min",
146 	"sec",	"sin",	"sinh",	"tan",	"tanh",	"Im",	"Re",
147 };
148 
149 enum	eqn_symt {
150 	EQNSYM_alpha = 0,
151 	EQNSYM_beta,
152 	EQNSYM_chi,
153 	EQNSYM_delta,
154 	EQNSYM_epsilon,
155 	EQNSYM_eta,
156 	EQNSYM_gamma,
157 	EQNSYM_iota,
158 	EQNSYM_kappa,
159 	EQNSYM_lambda,
160 	EQNSYM_mu,
161 	EQNSYM_nu,
162 	EQNSYM_omega,
163 	EQNSYM_omicron,
164 	EQNSYM_phi,
165 	EQNSYM_pi,
166 	EQNSYM_ps,
167 	EQNSYM_rho,
168 	EQNSYM_sigma,
169 	EQNSYM_tau,
170 	EQNSYM_theta,
171 	EQNSYM_upsilon,
172 	EQNSYM_xi,
173 	EQNSYM_zeta,
174 	EQNSYM_DELTA,
175 	EQNSYM_GAMMA,
176 	EQNSYM_LAMBDA,
177 	EQNSYM_OMEGA,
178 	EQNSYM_PHI,
179 	EQNSYM_PI,
180 	EQNSYM_PSI,
181 	EQNSYM_SIGMA,
182 	EQNSYM_THETA,
183 	EQNSYM_UPSILON,
184 	EQNSYM_XI,
185 	EQNSYM_inter,
186 	EQNSYM_union,
187 	EQNSYM_prod,
188 	EQNSYM_int,
189 	EQNSYM_sum,
190 	EQNSYM_grad,
191 	EQNSYM_del,
192 	EQNSYM_times,
193 	EQNSYM_cdot,
194 	EQNSYM_nothing,
195 	EQNSYM_approx,
196 	EQNSYM_prime,
197 	EQNSYM_half,
198 	EQNSYM_partial,
199 	EQNSYM_inf,
200 	EQNSYM_muchgreat,
201 	EQNSYM_muchless,
202 	EQNSYM_larrow,
203 	EQNSYM_rarrow,
204 	EQNSYM_pm,
205 	EQNSYM_nequal,
206 	EQNSYM_equiv,
207 	EQNSYM_lessequal,
208 	EQNSYM_moreequal,
209 	EQNSYM_minus,
210 	EQNSYM__MAX
211 };
212 
213 struct	eqnsym {
214 	const char	*str;
215 	const char	*sym;
216 };
217 
218 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
219 	{ "alpha", "*a" }, /* EQNSYM_alpha */
220 	{ "beta", "*b" }, /* EQNSYM_beta */
221 	{ "chi", "*x" }, /* EQNSYM_chi */
222 	{ "delta", "*d" }, /* EQNSYM_delta */
223 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
224 	{ "eta", "*y" }, /* EQNSYM_eta */
225 	{ "gamma", "*g" }, /* EQNSYM_gamma */
226 	{ "iota", "*i" }, /* EQNSYM_iota */
227 	{ "kappa", "*k" }, /* EQNSYM_kappa */
228 	{ "lambda", "*l" }, /* EQNSYM_lambda */
229 	{ "mu", "*m" }, /* EQNSYM_mu */
230 	{ "nu", "*n" }, /* EQNSYM_nu */
231 	{ "omega", "*w" }, /* EQNSYM_omega */
232 	{ "omicron", "*o" }, /* EQNSYM_omicron */
233 	{ "phi", "*f" }, /* EQNSYM_phi */
234 	{ "pi", "*p" }, /* EQNSYM_pi */
235 	{ "psi", "*q" }, /* EQNSYM_psi */
236 	{ "rho", "*r" }, /* EQNSYM_rho */
237 	{ "sigma", "*s" }, /* EQNSYM_sigma */
238 	{ "tau", "*t" }, /* EQNSYM_tau */
239 	{ "theta", "*h" }, /* EQNSYM_theta */
240 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
241 	{ "xi", "*c" }, /* EQNSYM_xi */
242 	{ "zeta", "*z" }, /* EQNSYM_zeta */
243 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
244 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
245 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
246 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
247 	{ "PHI", "*F" }, /* EQNSYM_PHI */
248 	{ "PI", "*P" }, /* EQNSYM_PI */
249 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
250 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
251 	{ "THETA", "*H" }, /* EQNSYM_THETA */
252 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
253 	{ "XI", "*C" }, /* EQNSYM_XI */
254 	{ "inter", "ca" }, /* EQNSYM_inter */
255 	{ "union", "cu" }, /* EQNSYM_union */
256 	{ "prod", "product" }, /* EQNSYM_prod */
257 	{ "int", "integral" }, /* EQNSYM_int */
258 	{ "sum", "sum" }, /* EQNSYM_sum */
259 	{ "grad", "gr" }, /* EQNSYM_grad */
260 	{ "del", "gr" }, /* EQNSYM_del */
261 	{ "times", "mu" }, /* EQNSYM_times */
262 	{ "cdot", "pc" }, /* EQNSYM_cdot */
263 	{ "nothing", "&" }, /* EQNSYM_nothing */
264 	{ "approx", "~~" }, /* EQNSYM_approx */
265 	{ "prime", "fm" }, /* EQNSYM_prime */
266 	{ "half", "12" }, /* EQNSYM_half */
267 	{ "partial", "pd" }, /* EQNSYM_partial */
268 	{ "inf", "if" }, /* EQNSYM_inf */
269 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
270 	{ "<<", "<<" }, /* EQNSYM_muchless */
271 	{ "<-", "<-" }, /* EQNSYM_larrow */
272 	{ "->", "->" }, /* EQNSYM_rarrow */
273 	{ "+-", "+-" }, /* EQNSYM_pm */
274 	{ "!=", "!=" }, /* EQNSYM_nequal */
275 	{ "==", "==" }, /* EQNSYM_equiv */
276 	{ "<=", "<=" }, /* EQNSYM_lessequal */
277 	{ ">=", ">=" }, /* EQNSYM_moreequal */
278 	{ "-", "mi" }, /* EQNSYM_minus */
279 };
280 
281 enum	parse_mode {
282 	MODE_QUOTED,
283 	MODE_NOSUB,
284 	MODE_SUB,
285 	MODE_TOK
286 };
287 
288 struct	eqn_def {
289 	char		 *key;
290 	size_t		  keysz;
291 	char		 *val;
292 	size_t		  valsz;
293 };
294 
295 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
296 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
297 				struct eqn_box *);
298 static	void		 eqn_def(struct eqn_node *);
299 static	struct eqn_def	*eqn_def_find(struct eqn_node *);
300 static	void		 eqn_delim(struct eqn_node *);
301 static	enum eqn_tok	 eqn_next(struct eqn_node *, enum parse_mode);
302 static	void		 eqn_undef(struct eqn_node *);
303 
304 
305 struct eqn_node *
306 eqn_alloc(void)
307 {
308 	struct eqn_node *ep;
309 
310 	ep = mandoc_calloc(1, sizeof(*ep));
311 	ep->gsize = EQN_DEFSIZE;
312 	return ep;
313 }
314 
315 void
316 eqn_reset(struct eqn_node *ep)
317 {
318 	free(ep->data);
319 	ep->data = ep->start = ep->end = NULL;
320 	ep->sz = ep->toksz = 0;
321 }
322 
323 void
324 eqn_read(struct eqn_node *ep, const char *p)
325 {
326 	char		*cp;
327 
328 	if (ep->data == NULL) {
329 		ep->sz = strlen(p);
330 		ep->data = mandoc_strdup(p);
331 	} else {
332 		ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p);
333 		free(ep->data);
334 		ep->data = cp;
335 	}
336 	ep->sz += 1;
337 }
338 
339 /*
340  * Find the key "key" of the give size within our eqn-defined values.
341  */
342 static struct eqn_def *
343 eqn_def_find(struct eqn_node *ep)
344 {
345 	int		 i;
346 
347 	for (i = 0; i < (int)ep->defsz; i++)
348 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
349 		    ep->defs[i].keysz, ep->start, ep->toksz))
350 			return &ep->defs[i];
351 
352 	return NULL;
353 }
354 
355 /*
356  * Parse a token from the input text.  The modes are:
357  * MODE_QUOTED: Use *ep->start as the delimiter; the token ends
358  *   before its next occurence.  Do not interpret the token in any
359  *   way and return EQN_TOK_QUOTED.  All other modes behave like
360  *   MODE_QUOTED when *ep->start is '"'.
361  * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it;
362  *   otherwise, it ends before the next whitespace or brace.
363  *   Do not interpret the token and return EQN_TOK__MAX.
364  * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an
365  *   alias created with define.  If it is an alias, replace it with
366  *   its string value and reparse.
367  * MODE_TOK: Like MODE_SUB, but also check the token against the list
368  *   of tokens, and if there is a match, return that token.  Otherwise,
369  *   if the token matches a symbol, return EQN_TOK_SYM; if it matches
370  *   a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX.  Except for
371  *   a token match, *ep->start is set to an allocated string that the
372  *   caller is expected to free.
373  * All modes skip whitespace following the end of the token.
374  */
375 static enum eqn_tok
376 eqn_next(struct eqn_node *ep, enum parse_mode mode)
377 {
378 	static int	 last_len, lim;
379 
380 	struct eqn_def	*def;
381 	size_t		 start;
382 	int		 diff, i, quoted;
383 	enum eqn_tok	 tok;
384 
385 	/*
386 	 * Reset the recursion counter after advancing
387 	 * beyond the end of the previous substitution.
388 	 */
389 	if (ep->end - ep->data >= last_len)
390 		lim = 0;
391 
392 	ep->start = ep->end;
393 	quoted = mode == MODE_QUOTED;
394 	for (;;) {
395 		switch (*ep->start) {
396 		case '\0':
397 			ep->toksz = 0;
398 			return EQN_TOK_EOF;
399 		case '"':
400 			quoted = 1;
401 			break;
402 		default:
403 			break;
404 		}
405 		if (quoted) {
406 			ep->end = strchr(ep->start + 1, *ep->start);
407 			ep->start++;  /* Skip opening quote. */
408 			if (ep->end == NULL) {
409 				mandoc_msg(MANDOCERR_ARG_QUOTE,
410 				    ep->node->line, ep->node->pos, NULL);
411 				ep->end = strchr(ep->start, '\0');
412 			}
413 		} else {
414 			ep->end = ep->start + 1;
415 			if (*ep->start != '{' && *ep->start != '}')
416 				ep->end += strcspn(ep->end, " ^~\"{}\t");
417 		}
418 		ep->toksz = ep->end - ep->start;
419 		if (quoted && *ep->end != '\0')
420 			ep->end++;  /* Skip closing quote. */
421 		while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL)
422 			ep->end++;
423 		if (quoted)  /* Cannot return, may have to strndup. */
424 			break;
425 		if (mode == MODE_NOSUB)
426 			return EQN_TOK__MAX;
427 		if ((def = eqn_def_find(ep)) == NULL)
428 			break;
429 		if (++lim > EQN_NEST_MAX) {
430 			mandoc_msg(MANDOCERR_ROFFLOOP,
431 			    ep->node->line, ep->node->pos, NULL);
432 			return EQN_TOK_EOF;
433 		}
434 
435 		/* Replace a defined name with its string value. */
436 		if ((diff = def->valsz - ep->toksz) > 0) {
437 			start = ep->start - ep->data;
438 			ep->sz += diff;
439 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
440 			ep->start = ep->data + start;
441 		}
442 		if (diff)
443 			memmove(ep->start + def->valsz, ep->start + ep->toksz,
444 			    strlen(ep->start + ep->toksz) + 1);
445 		memcpy(ep->start, def->val, def->valsz);
446 		last_len = ep->start - ep->data + def->valsz;
447 	}
448 	if (mode != MODE_TOK)
449 		return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX;
450 	if (quoted) {
451 		ep->start = mandoc_strndup(ep->start, ep->toksz);
452 		return EQN_TOK_QUOTED;
453 	}
454 	for (tok = 0; tok < EQN_TOK__MAX; tok++)
455 		if (STRNEQ(ep->start, ep->toksz,
456 		    eqn_toks[tok], strlen(eqn_toks[tok])))
457 			return tok;
458 
459 	for (i = 0; i < EQNSYM__MAX; i++) {
460 		if (STRNEQ(ep->start, ep->toksz,
461 		    eqnsyms[i].str, strlen(eqnsyms[i].str))) {
462 			mandoc_asprintf(&ep->start,
463 			    "\\[%s]", eqnsyms[i].sym);
464 			return EQN_TOK_SYM;
465 		}
466 	}
467 	ep->start = mandoc_strndup(ep->start, ep->toksz);
468 	for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++)
469 		if (STRNEQ(ep->start, ep->toksz,
470 		    eqn_func[i], strlen(eqn_func[i])))
471 			return EQN_TOK_FUNC;
472 	return EQN_TOK__MAX;
473 }
474 
475 void
476 eqn_box_free(struct eqn_box *bp)
477 {
478 	if (bp == NULL)
479 		return;
480 
481 	if (bp->first)
482 		eqn_box_free(bp->first);
483 	if (bp->next)
484 		eqn_box_free(bp->next);
485 
486 	free(bp->text);
487 	free(bp->left);
488 	free(bp->right);
489 	free(bp->top);
490 	free(bp->bottom);
491 	free(bp);
492 }
493 
494 struct eqn_box *
495 eqn_box_new(void)
496 {
497 	struct eqn_box	*bp;
498 
499 	bp = mandoc_calloc(1, sizeof(*bp));
500 	bp->expectargs = UINT_MAX;
501 	return bp;
502 }
503 
504 /*
505  * Allocate a box as the last child of the parent node.
506  */
507 static struct eqn_box *
508 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
509 {
510 	struct eqn_box	*bp;
511 
512 	bp = eqn_box_new();
513 	bp->parent = parent;
514 	bp->parent->args++;
515 	bp->font = bp->parent->font;
516 	bp->size = ep->gsize;
517 
518 	if (NULL != parent->first) {
519 		parent->last->next = bp;
520 		bp->prev = parent->last;
521 	} else
522 		parent->first = bp;
523 
524 	parent->last = bp;
525 	return bp;
526 }
527 
528 /*
529  * Reparent the current last node (of the current parent) under a new
530  * EQN_SUBEXPR as the first element.
531  * Then return the new parent.
532  * The new EQN_SUBEXPR will have a two-child limit.
533  */
534 static struct eqn_box *
535 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent)
536 {
537 	struct eqn_box	*b, *newb;
538 
539 	assert(NULL != parent->last);
540 	b = parent->last;
541 	if (parent->last == parent->first)
542 		parent->first = NULL;
543 	parent->args--;
544 	parent->last = b->prev;
545 	b->prev = NULL;
546 	newb = eqn_box_alloc(ep, parent);
547 	newb->type = EQN_SUBEXPR;
548 	newb->expectargs = 2;
549 	newb->args = 1;
550 	newb->first = newb->last = b;
551 	newb->first->next = NULL;
552 	b->parent = newb;
553 	return newb;
554 }
555 
556 /*
557  * Parse the "delim" control statement.
558  */
559 static void
560 eqn_delim(struct eqn_node *ep)
561 {
562 	if (ep->end[0] == '\0' || ep->end[1] == '\0') {
563 		mandoc_msg(MANDOCERR_REQ_EMPTY,
564 		    ep->node->line, ep->node->pos, "delim");
565 		if (ep->end[0] != '\0')
566 			ep->end++;
567 	} else if (strncmp(ep->end, "off", 3) == 0) {
568 		ep->delim = 0;
569 		ep->end += 3;
570 	} else if (strncmp(ep->end, "on", 2) == 0) {
571 		if (ep->odelim && ep->cdelim)
572 			ep->delim = 1;
573 		ep->end += 2;
574 	} else {
575 		ep->odelim = *ep->end++;
576 		ep->cdelim = *ep->end++;
577 		ep->delim = 1;
578 	}
579 }
580 
581 /*
582  * Undefine a previously-defined string.
583  */
584 static void
585 eqn_undef(struct eqn_node *ep)
586 {
587 	struct eqn_def	*def;
588 
589 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
590 		mandoc_msg(MANDOCERR_REQ_EMPTY,
591 		    ep->node->line, ep->node->pos, "undef");
592 		return;
593 	}
594 	if ((def = eqn_def_find(ep)) == NULL)
595 		return;
596 	free(def->key);
597 	free(def->val);
598 	def->key = def->val = NULL;
599 	def->keysz = def->valsz = 0;
600 }
601 
602 static void
603 eqn_def(struct eqn_node *ep)
604 {
605 	struct eqn_def	*def;
606 	int		 i;
607 
608 	if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) {
609 		mandoc_msg(MANDOCERR_REQ_EMPTY,
610 		    ep->node->line, ep->node->pos, "define");
611 		return;
612 	}
613 
614 	/*
615 	 * Search for a key that already exists.
616 	 * Create a new key if none is found.
617 	 */
618 	if ((def = eqn_def_find(ep)) == NULL) {
619 		/* Find holes in string array. */
620 		for (i = 0; i < (int)ep->defsz; i++)
621 			if (0 == ep->defs[i].keysz)
622 				break;
623 
624 		if (i == (int)ep->defsz) {
625 			ep->defsz++;
626 			ep->defs = mandoc_reallocarray(ep->defs,
627 			    ep->defsz, sizeof(struct eqn_def));
628 			ep->defs[i].key = ep->defs[i].val = NULL;
629 		}
630 
631 		def = ep->defs + i;
632 		free(def->key);
633 		def->key = mandoc_strndup(ep->start, ep->toksz);
634 		def->keysz = ep->toksz;
635 	}
636 
637 	if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) {
638 		mandoc_msg(MANDOCERR_REQ_EMPTY,
639 		    ep->node->line, ep->node->pos, "define %s", def->key);
640 		free(def->key);
641 		free(def->val);
642 		def->key = def->val = NULL;
643 		def->keysz = def->valsz = 0;
644 		return;
645 	}
646 	free(def->val);
647 	def->val = mandoc_strndup(ep->start, ep->toksz);
648 	def->valsz = ep->toksz;
649 }
650 
651 void
652 eqn_parse(struct eqn_node *ep)
653 {
654 	struct eqn_box	*cur, *nbox, *parent, *split;
655 	const char	*cp, *cpn;
656 	char		*p;
657 	enum eqn_tok	 tok;
658 	enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln;
659 	int		 size;
660 
661 	parent = ep->node->eqn;
662 	assert(parent != NULL);
663 
664 	/*
665 	 * Empty equation.
666 	 * Do not add it to the high-level syntax tree.
667 	 */
668 
669 	if (ep->data == NULL)
670 		return;
671 
672 	ep->start = ep->end = ep->data + strspn(ep->data, " ^~");
673 
674 next_tok:
675 	tok = eqn_next(ep, MODE_TOK);
676 	switch (tok) {
677 	case EQN_TOK_UNDEF:
678 		eqn_undef(ep);
679 		break;
680 	case EQN_TOK_NDEFINE:
681 	case EQN_TOK_DEFINE:
682 		eqn_def(ep);
683 		break;
684 	case EQN_TOK_TDEFINE:
685 		if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF ||
686 		    eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF)
687 			mandoc_msg(MANDOCERR_REQ_EMPTY,
688 			    ep->node->line, ep->node->pos, "tdefine");
689 		break;
690 	case EQN_TOK_DELIM:
691 		eqn_delim(ep);
692 		break;
693 	case EQN_TOK_GFONT:
694 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
695 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
696 			    ep->node->pos, "%s", eqn_toks[tok]);
697 		break;
698 	case EQN_TOK_MARK:
699 	case EQN_TOK_LINEUP:
700 		/* Ignore these. */
701 		break;
702 	case EQN_TOK_DYAD:
703 	case EQN_TOK_VEC:
704 	case EQN_TOK_UNDER:
705 	case EQN_TOK_BAR:
706 	case EQN_TOK_TILDE:
707 	case EQN_TOK_HAT:
708 	case EQN_TOK_DOT:
709 	case EQN_TOK_DOTDOT:
710 		if (parent->last == NULL) {
711 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
712 			    ep->node->pos, "%s", eqn_toks[tok]);
713 			cur = eqn_box_alloc(ep, parent);
714 			cur->type = EQN_TEXT;
715 			cur->text = mandoc_strdup("");
716 		}
717 		parent = eqn_box_makebinary(ep, parent);
718 		parent->type = EQN_LIST;
719 		parent->expectargs = 1;
720 		parent->font = EQNFONT_ROMAN;
721 		switch (tok) {
722 		case EQN_TOK_DOTDOT:
723 			parent->top = mandoc_strdup("\\[ad]");
724 			break;
725 		case EQN_TOK_VEC:
726 			parent->top = mandoc_strdup("\\[->]");
727 			break;
728 		case EQN_TOK_DYAD:
729 			parent->top = mandoc_strdup("\\[<>]");
730 			break;
731 		case EQN_TOK_TILDE:
732 			parent->top = mandoc_strdup("\\[a~]");
733 			break;
734 		case EQN_TOK_UNDER:
735 			parent->bottom = mandoc_strdup("\\[ul]");
736 			break;
737 		case EQN_TOK_BAR:
738 			parent->top = mandoc_strdup("\\[rn]");
739 			break;
740 		case EQN_TOK_DOT:
741 			parent->top = mandoc_strdup("\\[a.]");
742 			break;
743 		case EQN_TOK_HAT:
744 			parent->top = mandoc_strdup("\\[ha]");
745 			break;
746 		default:
747 			abort();
748 		}
749 		parent = parent->parent;
750 		break;
751 	case EQN_TOK_FWD:
752 	case EQN_TOK_BACK:
753 	case EQN_TOK_DOWN:
754 	case EQN_TOK_UP:
755 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF)
756 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
757 			    ep->node->pos, "%s", eqn_toks[tok]);
758 		break;
759 	case EQN_TOK_FAT:
760 	case EQN_TOK_ROMAN:
761 	case EQN_TOK_ITALIC:
762 	case EQN_TOK_BOLD:
763 		while (parent->args == parent->expectargs)
764 			parent = parent->parent;
765 		/*
766 		 * These values apply to the next word or sequence of
767 		 * words; thus, we mark that we'll have a child with
768 		 * exactly one of those.
769 		 */
770 		parent = eqn_box_alloc(ep, parent);
771 		parent->type = EQN_LIST;
772 		parent->expectargs = 1;
773 		switch (tok) {
774 		case EQN_TOK_FAT:
775 			parent->font = EQNFONT_FAT;
776 			break;
777 		case EQN_TOK_ROMAN:
778 			parent->font = EQNFONT_ROMAN;
779 			break;
780 		case EQN_TOK_ITALIC:
781 			parent->font = EQNFONT_ITALIC;
782 			break;
783 		case EQN_TOK_BOLD:
784 			parent->font = EQNFONT_BOLD;
785 			break;
786 		default:
787 			abort();
788 		}
789 		break;
790 	case EQN_TOK_SIZE:
791 	case EQN_TOK_GSIZE:
792 		/* Accept two values: integral size and a single. */
793 		if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
794 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
795 			    ep->node->pos, "%s", eqn_toks[tok]);
796 			break;
797 		}
798 		size = mandoc_strntoi(ep->start, ep->toksz, 10);
799 		if (-1 == size) {
800 			mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line,
801 			    ep->node->pos, "%s", eqn_toks[tok]);
802 			break;
803 		}
804 		if (EQN_TOK_GSIZE == tok) {
805 			ep->gsize = size;
806 			break;
807 		}
808 		while (parent->args == parent->expectargs)
809 			parent = parent->parent;
810 		parent = eqn_box_alloc(ep, parent);
811 		parent->type = EQN_LIST;
812 		parent->expectargs = 1;
813 		parent->size = size;
814 		break;
815 	case EQN_TOK_FROM:
816 	case EQN_TOK_TO:
817 	case EQN_TOK_SUB:
818 	case EQN_TOK_SUP:
819 		/*
820 		 * We have a left-right-associative expression.
821 		 * Repivot under a positional node, open a child scope
822 		 * and keep on reading.
823 		 */
824 		if (parent->last == NULL) {
825 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
826 			    ep->node->pos, "%s", eqn_toks[tok]);
827 			cur = eqn_box_alloc(ep, parent);
828 			cur->type = EQN_TEXT;
829 			cur->text = mandoc_strdup("");
830 		}
831 		while (parent->expectargs == 1 && parent->args == 1)
832 			parent = parent->parent;
833 		if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO)  {
834 			for (cur = parent; cur != NULL; cur = cur->parent)
835 				if (cur->pos == EQNPOS_SUB ||
836 				    cur->pos == EQNPOS_SUP ||
837 				    cur->pos == EQNPOS_SUBSUP ||
838 				    cur->pos == EQNPOS_SQRT ||
839 				    cur->pos == EQNPOS_OVER)
840 					break;
841 			if (cur != NULL)
842 				parent = cur->parent;
843 		}
844 		if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) {
845 			parent->expectargs = 3;
846 			parent->pos = EQNPOS_SUBSUP;
847 			break;
848 		}
849 		if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) {
850 			parent->expectargs = 3;
851 			parent->pos = EQNPOS_FROMTO;
852 			break;
853 		}
854 		parent = eqn_box_makebinary(ep, parent);
855 		switch (tok) {
856 		case EQN_TOK_FROM:
857 			parent->pos = EQNPOS_FROM;
858 			break;
859 		case EQN_TOK_TO:
860 			parent->pos = EQNPOS_TO;
861 			break;
862 		case EQN_TOK_SUP:
863 			parent->pos = EQNPOS_SUP;
864 			break;
865 		case EQN_TOK_SUB:
866 			parent->pos = EQNPOS_SUB;
867 			break;
868 		default:
869 			abort();
870 		}
871 		break;
872 	case EQN_TOK_SQRT:
873 		while (parent->args == parent->expectargs)
874 			parent = parent->parent;
875 		/*
876 		 * Accept a left-right-associative set of arguments just
877 		 * like sub and sup and friends but without rebalancing
878 		 * under a pivot.
879 		 */
880 		parent = eqn_box_alloc(ep, parent);
881 		parent->type = EQN_SUBEXPR;
882 		parent->pos = EQNPOS_SQRT;
883 		parent->expectargs = 1;
884 		break;
885 	case EQN_TOK_OVER:
886 		/*
887 		 * We have a right-left-associative fraction.
888 		 * Close out anything that's currently open, then
889 		 * rebalance and continue reading.
890 		 */
891 		if (parent->last == NULL) {
892 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line,
893 			    ep->node->pos, "%s", eqn_toks[tok]);
894 			cur = eqn_box_alloc(ep, parent);
895 			cur->type = EQN_TEXT;
896 			cur->text = mandoc_strdup("");
897 		}
898 		while (parent->args == parent->expectargs)
899 			parent = parent->parent;
900 		while (EQN_SUBEXPR == parent->type)
901 			parent = parent->parent;
902 		parent = eqn_box_makebinary(ep, parent);
903 		parent->pos = EQNPOS_OVER;
904 		break;
905 	case EQN_TOK_RIGHT:
906 	case EQN_TOK_BRACE_CLOSE:
907 		/*
908 		 * Close out the existing brace.
909 		 * FIXME: this is a shitty sentinel: we should really
910 		 * have a native EQN_BRACE type or whatnot.
911 		 */
912 		for (cur = parent; cur != NULL; cur = cur->parent)
913 			if (cur->type == EQN_LIST &&
914 			    cur->expectargs > 1 &&
915 			    (tok == EQN_TOK_BRACE_CLOSE ||
916 			     cur->left != NULL))
917 				break;
918 		if (cur == NULL) {
919 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line,
920 			    ep->node->pos, "%s", eqn_toks[tok]);
921 			break;
922 		}
923 		parent = cur;
924 		if (EQN_TOK_RIGHT == tok) {
925 			if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
926 				mandoc_msg(MANDOCERR_REQ_EMPTY,
927 				    ep->node->line, ep->node->pos,
928 				    "%s", eqn_toks[tok]);
929 				break;
930 			}
931 			/* Handling depends on right/left. */
932 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
933 				parent->right = mandoc_strdup("\\[rc]");
934 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
935 				parent->right = mandoc_strdup("\\[rf]");
936 			else
937 				parent->right =
938 				    mandoc_strndup(ep->start, ep->toksz);
939 		}
940 		parent = parent->parent;
941 		if (tok == EQN_TOK_BRACE_CLOSE &&
942 		    (parent->type == EQN_PILE ||
943 		     parent->type == EQN_MATRIX))
944 			parent = parent->parent;
945 		/* Close out any "singleton" lists. */
946 		while (parent->type == EQN_LIST &&
947 		    parent->expectargs == 1 &&
948 		    parent->args == 1)
949 			parent = parent->parent;
950 		break;
951 	case EQN_TOK_BRACE_OPEN:
952 	case EQN_TOK_LEFT:
953 		/*
954 		 * If we already have something in the stack and we're
955 		 * in an expression, then rewind til we're not any more
956 		 * (just like with the text node).
957 		 */
958 		while (parent->args == parent->expectargs)
959 			parent = parent->parent;
960 		if (EQN_TOK_LEFT == tok &&
961 		    eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) {
962 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line,
963 			    ep->node->pos, "%s", eqn_toks[tok]);
964 			break;
965 		}
966 		parent = eqn_box_alloc(ep, parent);
967 		parent->type = EQN_LIST;
968 		if (EQN_TOK_LEFT == tok) {
969 			if (STRNEQ(ep->start, ep->toksz, "ceiling", 7))
970 				parent->left = mandoc_strdup("\\[lc]");
971 			else if (STRNEQ(ep->start, ep->toksz, "floor", 5))
972 				parent->left = mandoc_strdup("\\[lf]");
973 			else
974 				parent->left =
975 				    mandoc_strndup(ep->start, ep->toksz);
976 		}
977 		break;
978 	case EQN_TOK_PILE:
979 	case EQN_TOK_LPILE:
980 	case EQN_TOK_RPILE:
981 	case EQN_TOK_CPILE:
982 	case EQN_TOK_CCOL:
983 	case EQN_TOK_LCOL:
984 	case EQN_TOK_RCOL:
985 		while (parent->args == parent->expectargs)
986 			parent = parent->parent;
987 		parent = eqn_box_alloc(ep, parent);
988 		parent->type = EQN_PILE;
989 		parent->expectargs = 1;
990 		break;
991 	case EQN_TOK_ABOVE:
992 		for (cur = parent; cur != NULL; cur = cur->parent)
993 			if (cur->type == EQN_PILE)
994 				break;
995 		if (cur == NULL) {
996 			mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line,
997 			    ep->node->pos, "%s", eqn_toks[tok]);
998 			break;
999 		}
1000 		parent = eqn_box_alloc(ep, cur);
1001 		parent->type = EQN_LIST;
1002 		break;
1003 	case EQN_TOK_MATRIX:
1004 		while (parent->args == parent->expectargs)
1005 			parent = parent->parent;
1006 		parent = eqn_box_alloc(ep, parent);
1007 		parent->type = EQN_MATRIX;
1008 		parent->expectargs = 1;
1009 		break;
1010 	case EQN_TOK_EOF:
1011 		return;
1012 	case EQN_TOK__MAX:
1013 	case EQN_TOK_FUNC:
1014 	case EQN_TOK_QUOTED:
1015 	case EQN_TOK_SYM:
1016 		p = ep->start;
1017 		assert(p != NULL);
1018 		/*
1019 		 * If we already have something in the stack and we're
1020 		 * in an expression, then rewind til we're not any more.
1021 		 */
1022 		while (parent->args == parent->expectargs)
1023 			parent = parent->parent;
1024 		cur = eqn_box_alloc(ep, parent);
1025 		cur->type = EQN_TEXT;
1026 		cur->text = p;
1027 		switch (tok) {
1028 		case EQN_TOK_FUNC:
1029 			cur->font = EQNFONT_ROMAN;
1030 			break;
1031 		case EQN_TOK_QUOTED:
1032 			if (cur->font == EQNFONT_NONE)
1033 				cur->font = EQNFONT_ITALIC;
1034 			break;
1035 		case EQN_TOK_SYM:
1036 			break;
1037 		default:
1038 			if (cur->font != EQNFONT_NONE || *p == '\0')
1039 				break;
1040 			cpn = p - 1;
1041 			ccln = CCL_LET;
1042 			split = NULL;
1043 			for (;;) {
1044 				/* Advance to next character. */
1045 				cp = cpn++;
1046 				ccl = ccln;
1047 				ccln = isalpha((unsigned char)*cpn) ? CCL_LET :
1048 				    isdigit((unsigned char)*cpn) ||
1049 				    (*cpn == '.' && (ccl == CCL_DIG ||
1050 				     isdigit((unsigned char)cpn[1]))) ?
1051 				    CCL_DIG : CCL_PUN;
1052 				/* No boundary before first character. */
1053 				if (cp < p)
1054 					continue;
1055 				cur->font = ccl == CCL_LET ?
1056 				    EQNFONT_ITALIC : EQNFONT_ROMAN;
1057 				if (*cp == '\\')
1058 					mandoc_escape(&cpn, NULL, NULL);
1059 				/* No boundary after last character. */
1060 				if (*cpn == '\0')
1061 					break;
1062 				if (ccln == ccl && *cp != ',' && *cpn != ',')
1063 					continue;
1064 				/* Boundary found, split the text. */
1065 				if (parent->args == parent->expectargs) {
1066 					/* Remove the text from the tree. */
1067 					if (cur->prev == NULL)
1068 						parent->first = cur->next;
1069 					else
1070 						cur->prev->next = NULL;
1071 					parent->last = cur->prev;
1072 					parent->args--;
1073 					/* Set up a list instead. */
1074 					split = eqn_box_alloc(ep, parent);
1075 					split->type = EQN_LIST;
1076 					/* Insert the word into the list. */
1077 					split->first = split->last = cur;
1078 					cur->parent = split;
1079 					cur->prev = NULL;
1080 					parent = split;
1081 				}
1082 				/* Append a new text box. */
1083 				nbox = eqn_box_alloc(ep, parent);
1084 				nbox->type = EQN_TEXT;
1085 				nbox->text = mandoc_strdup(cpn);
1086 				/* Truncate the old box. */
1087 				p = mandoc_strndup(cur->text,
1088 				    cpn - cur->text);
1089 				free(cur->text);
1090 				cur->text = p;
1091 				/* Setup to process the new box. */
1092 				cur = nbox;
1093 				p = nbox->text;
1094 				cpn = p - 1;
1095 				ccln = CCL_LET;
1096 			}
1097 			if (split != NULL)
1098 				parent = split->parent;
1099 			break;
1100 		}
1101 		break;
1102 	default:
1103 		abort();
1104 	}
1105 	goto next_tok;
1106 }
1107 
1108 void
1109 eqn_free(struct eqn_node *p)
1110 {
1111 	int		 i;
1112 
1113 	if (p == NULL)
1114 		return;
1115 
1116 	for (i = 0; i < (int)p->defsz; i++) {
1117 		free(p->defs[i].key);
1118 		free(p->defs[i].val);
1119 	}
1120 
1121 	free(p->data);
1122 	free(p->defs);
1123 	free(p);
1124 }
1125