xref: /illumos-gate/usr/src/cmd/mandoc/eqn.c (revision 311c88c1ed31408e0998a9aabb8e7d0d12ef83ad)
1 /*	$Id: eqn.c,v 1.61 2016/01/08 00:50:45 schwarze Exp $ */
2 /*
3  * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4  * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #include <sys/types.h>
21 
22 #include <assert.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28 
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33 
34 #define	EQN_NEST_MAX	 128 /* maximum nesting of defines */
35 #define	STRNEQ(p1, sz1, p2, sz2) \
36 	((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37 
38 enum	eqn_tok {
39 	EQN_TOK_DYAD = 0,
40 	EQN_TOK_VEC,
41 	EQN_TOK_UNDER,
42 	EQN_TOK_BAR,
43 	EQN_TOK_TILDE,
44 	EQN_TOK_HAT,
45 	EQN_TOK_DOT,
46 	EQN_TOK_DOTDOT,
47 	EQN_TOK_FWD,
48 	EQN_TOK_BACK,
49 	EQN_TOK_DOWN,
50 	EQN_TOK_UP,
51 	EQN_TOK_FAT,
52 	EQN_TOK_ROMAN,
53 	EQN_TOK_ITALIC,
54 	EQN_TOK_BOLD,
55 	EQN_TOK_SIZE,
56 	EQN_TOK_SUB,
57 	EQN_TOK_SUP,
58 	EQN_TOK_SQRT,
59 	EQN_TOK_OVER,
60 	EQN_TOK_FROM,
61 	EQN_TOK_TO,
62 	EQN_TOK_BRACE_OPEN,
63 	EQN_TOK_BRACE_CLOSE,
64 	EQN_TOK_GSIZE,
65 	EQN_TOK_GFONT,
66 	EQN_TOK_MARK,
67 	EQN_TOK_LINEUP,
68 	EQN_TOK_LEFT,
69 	EQN_TOK_RIGHT,
70 	EQN_TOK_PILE,
71 	EQN_TOK_LPILE,
72 	EQN_TOK_RPILE,
73 	EQN_TOK_CPILE,
74 	EQN_TOK_MATRIX,
75 	EQN_TOK_CCOL,
76 	EQN_TOK_LCOL,
77 	EQN_TOK_RCOL,
78 	EQN_TOK_DELIM,
79 	EQN_TOK_DEFINE,
80 	EQN_TOK_TDEFINE,
81 	EQN_TOK_NDEFINE,
82 	EQN_TOK_UNDEF,
83 	EQN_TOK_EOF,
84 	EQN_TOK_ABOVE,
85 	EQN_TOK__MAX
86 };
87 
88 static	const char *eqn_toks[EQN_TOK__MAX] = {
89 	"dyad", /* EQN_TOK_DYAD */
90 	"vec", /* EQN_TOK_VEC */
91 	"under", /* EQN_TOK_UNDER */
92 	"bar", /* EQN_TOK_BAR */
93 	"tilde", /* EQN_TOK_TILDE */
94 	"hat", /* EQN_TOK_HAT */
95 	"dot", /* EQN_TOK_DOT */
96 	"dotdot", /* EQN_TOK_DOTDOT */
97 	"fwd", /* EQN_TOK_FWD * */
98 	"back", /* EQN_TOK_BACK */
99 	"down", /* EQN_TOK_DOWN */
100 	"up", /* EQN_TOK_UP */
101 	"fat", /* EQN_TOK_FAT */
102 	"roman", /* EQN_TOK_ROMAN */
103 	"italic", /* EQN_TOK_ITALIC */
104 	"bold", /* EQN_TOK_BOLD */
105 	"size", /* EQN_TOK_SIZE */
106 	"sub", /* EQN_TOK_SUB */
107 	"sup", /* EQN_TOK_SUP */
108 	"sqrt", /* EQN_TOK_SQRT */
109 	"over", /* EQN_TOK_OVER */
110 	"from", /* EQN_TOK_FROM */
111 	"to", /* EQN_TOK_TO */
112 	"{", /* EQN_TOK_BRACE_OPEN */
113 	"}", /* EQN_TOK_BRACE_CLOSE */
114 	"gsize", /* EQN_TOK_GSIZE */
115 	"gfont", /* EQN_TOK_GFONT */
116 	"mark", /* EQN_TOK_MARK */
117 	"lineup", /* EQN_TOK_LINEUP */
118 	"left", /* EQN_TOK_LEFT */
119 	"right", /* EQN_TOK_RIGHT */
120 	"pile", /* EQN_TOK_PILE */
121 	"lpile", /* EQN_TOK_LPILE */
122 	"rpile", /* EQN_TOK_RPILE */
123 	"cpile", /* EQN_TOK_CPILE */
124 	"matrix", /* EQN_TOK_MATRIX */
125 	"ccol", /* EQN_TOK_CCOL */
126 	"lcol", /* EQN_TOK_LCOL */
127 	"rcol", /* EQN_TOK_RCOL */
128 	"delim", /* EQN_TOK_DELIM */
129 	"define", /* EQN_TOK_DEFINE */
130 	"tdefine", /* EQN_TOK_TDEFINE */
131 	"ndefine", /* EQN_TOK_NDEFINE */
132 	"undef", /* EQN_TOK_UNDEF */
133 	NULL, /* EQN_TOK_EOF */
134 	"above", /* EQN_TOK_ABOVE */
135 };
136 
137 enum	eqn_symt {
138 	EQNSYM_alpha,
139 	EQNSYM_beta,
140 	EQNSYM_chi,
141 	EQNSYM_delta,
142 	EQNSYM_epsilon,
143 	EQNSYM_eta,
144 	EQNSYM_gamma,
145 	EQNSYM_iota,
146 	EQNSYM_kappa,
147 	EQNSYM_lambda,
148 	EQNSYM_mu,
149 	EQNSYM_nu,
150 	EQNSYM_omega,
151 	EQNSYM_omicron,
152 	EQNSYM_phi,
153 	EQNSYM_pi,
154 	EQNSYM_ps,
155 	EQNSYM_rho,
156 	EQNSYM_sigma,
157 	EQNSYM_tau,
158 	EQNSYM_theta,
159 	EQNSYM_upsilon,
160 	EQNSYM_xi,
161 	EQNSYM_zeta,
162 	EQNSYM_DELTA,
163 	EQNSYM_GAMMA,
164 	EQNSYM_LAMBDA,
165 	EQNSYM_OMEGA,
166 	EQNSYM_PHI,
167 	EQNSYM_PI,
168 	EQNSYM_PSI,
169 	EQNSYM_SIGMA,
170 	EQNSYM_THETA,
171 	EQNSYM_UPSILON,
172 	EQNSYM_XI,
173 	EQNSYM_inter,
174 	EQNSYM_union,
175 	EQNSYM_prod,
176 	EQNSYM_int,
177 	EQNSYM_sum,
178 	EQNSYM_grad,
179 	EQNSYM_del,
180 	EQNSYM_times,
181 	EQNSYM_cdot,
182 	EQNSYM_nothing,
183 	EQNSYM_approx,
184 	EQNSYM_prime,
185 	EQNSYM_half,
186 	EQNSYM_partial,
187 	EQNSYM_inf,
188 	EQNSYM_muchgreat,
189 	EQNSYM_muchless,
190 	EQNSYM_larrow,
191 	EQNSYM_rarrow,
192 	EQNSYM_pm,
193 	EQNSYM_nequal,
194 	EQNSYM_equiv,
195 	EQNSYM_lessequal,
196 	EQNSYM_moreequal,
197 	EQNSYM_minus,
198 	EQNSYM__MAX
199 };
200 
201 struct	eqnsym {
202 	const char	*str;
203 	const char	*sym;
204 };
205 
206 static	const struct eqnsym eqnsyms[EQNSYM__MAX] = {
207 	{ "alpha", "*a" }, /* EQNSYM_alpha */
208 	{ "beta", "*b" }, /* EQNSYM_beta */
209 	{ "chi", "*x" }, /* EQNSYM_chi */
210 	{ "delta", "*d" }, /* EQNSYM_delta */
211 	{ "epsilon", "*e" }, /* EQNSYM_epsilon */
212 	{ "eta", "*y" }, /* EQNSYM_eta */
213 	{ "gamma", "*g" }, /* EQNSYM_gamma */
214 	{ "iota", "*i" }, /* EQNSYM_iota */
215 	{ "kappa", "*k" }, /* EQNSYM_kappa */
216 	{ "lambda", "*l" }, /* EQNSYM_lambda */
217 	{ "mu", "*m" }, /* EQNSYM_mu */
218 	{ "nu", "*n" }, /* EQNSYM_nu */
219 	{ "omega", "*w" }, /* EQNSYM_omega */
220 	{ "omicron", "*o" }, /* EQNSYM_omicron */
221 	{ "phi", "*f" }, /* EQNSYM_phi */
222 	{ "pi", "*p" }, /* EQNSYM_pi */
223 	{ "psi", "*q" }, /* EQNSYM_psi */
224 	{ "rho", "*r" }, /* EQNSYM_rho */
225 	{ "sigma", "*s" }, /* EQNSYM_sigma */
226 	{ "tau", "*t" }, /* EQNSYM_tau */
227 	{ "theta", "*h" }, /* EQNSYM_theta */
228 	{ "upsilon", "*u" }, /* EQNSYM_upsilon */
229 	{ "xi", "*c" }, /* EQNSYM_xi */
230 	{ "zeta", "*z" }, /* EQNSYM_zeta */
231 	{ "DELTA", "*D" }, /* EQNSYM_DELTA */
232 	{ "GAMMA", "*G" }, /* EQNSYM_GAMMA */
233 	{ "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
234 	{ "OMEGA", "*W" }, /* EQNSYM_OMEGA */
235 	{ "PHI", "*F" }, /* EQNSYM_PHI */
236 	{ "PI", "*P" }, /* EQNSYM_PI */
237 	{ "PSI", "*Q" }, /* EQNSYM_PSI */
238 	{ "SIGMA", "*S" }, /* EQNSYM_SIGMA */
239 	{ "THETA", "*H" }, /* EQNSYM_THETA */
240 	{ "UPSILON", "*U" }, /* EQNSYM_UPSILON */
241 	{ "XI", "*C" }, /* EQNSYM_XI */
242 	{ "inter", "ca" }, /* EQNSYM_inter */
243 	{ "union", "cu" }, /* EQNSYM_union */
244 	{ "prod", "product" }, /* EQNSYM_prod */
245 	{ "int", "integral" }, /* EQNSYM_int */
246 	{ "sum", "sum" }, /* EQNSYM_sum */
247 	{ "grad", "gr" }, /* EQNSYM_grad */
248 	{ "del", "gr" }, /* EQNSYM_del */
249 	{ "times", "mu" }, /* EQNSYM_times */
250 	{ "cdot", "pc" }, /* EQNSYM_cdot */
251 	{ "nothing", "&" }, /* EQNSYM_nothing */
252 	{ "approx", "~~" }, /* EQNSYM_approx */
253 	{ "prime", "fm" }, /* EQNSYM_prime */
254 	{ "half", "12" }, /* EQNSYM_half */
255 	{ "partial", "pd" }, /* EQNSYM_partial */
256 	{ "inf", "if" }, /* EQNSYM_inf */
257 	{ ">>", ">>" }, /* EQNSYM_muchgreat */
258 	{ "<<", "<<" }, /* EQNSYM_muchless */
259 	{ "<-", "<-" }, /* EQNSYM_larrow */
260 	{ "->", "->" }, /* EQNSYM_rarrow */
261 	{ "+-", "+-" }, /* EQNSYM_pm */
262 	{ "!=", "!=" }, /* EQNSYM_nequal */
263 	{ "==", "==" }, /* EQNSYM_equiv */
264 	{ "<=", "<=" }, /* EQNSYM_lessequal */
265 	{ ">=", ">=" }, /* EQNSYM_moreequal */
266 	{ "-", "mi" }, /* EQNSYM_minus */
267 };
268 
269 static	struct eqn_box	*eqn_box_alloc(struct eqn_node *, struct eqn_box *);
270 static	void		 eqn_box_free(struct eqn_box *);
271 static	struct eqn_box	*eqn_box_makebinary(struct eqn_node *,
272 				enum eqn_post, struct eqn_box *);
273 static	void		 eqn_def(struct eqn_node *);
274 static	struct eqn_def	*eqn_def_find(struct eqn_node *, const char *, size_t);
275 static	void		 eqn_delim(struct eqn_node *);
276 static	const char	*eqn_next(struct eqn_node *, char, size_t *, int);
277 static	const char	*eqn_nextrawtok(struct eqn_node *, size_t *);
278 static	const char	*eqn_nexttok(struct eqn_node *, size_t *);
279 static	enum rofferr	 eqn_parse(struct eqn_node *, struct eqn_box *);
280 static	enum eqn_tok	 eqn_tok_parse(struct eqn_node *, char **);
281 static	void		 eqn_undef(struct eqn_node *);
282 
283 
284 enum rofferr
285 eqn_read(struct eqn_node **epp, int ln,
286 		const char *p, int pos, int *offs)
287 {
288 	size_t		 sz;
289 	struct eqn_node	*ep;
290 	enum rofferr	 er;
291 
292 	ep = *epp;
293 
294 	/*
295 	 * If we're the terminating mark, unset our equation status and
296 	 * validate the full equation.
297 	 */
298 
299 	if (0 == strncmp(p, ".EN", 3)) {
300 		er = eqn_end(epp);
301 		p += 3;
302 		while (' ' == *p || '\t' == *p)
303 			p++;
304 		if ('\0' == *p)
305 			return er;
306 		mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
307 		    ln, pos, "EN %s", p);
308 		return er;
309 	}
310 
311 	/*
312 	 * Build up the full string, replacing all newlines with regular
313 	 * whitespace.
314 	 */
315 
316 	sz = strlen(p + pos) + 1;
317 	ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
318 
319 	/* First invocation: nil terminate the string. */
320 
321 	if (0 == ep->sz)
322 		*ep->data = '\0';
323 
324 	ep->sz += sz;
325 	strlcat(ep->data, p + pos, ep->sz + 1);
326 	strlcat(ep->data, " ", ep->sz + 1);
327 	return ROFF_IGN;
328 }
329 
330 struct eqn_node *
331 eqn_alloc(int pos, int line, struct mparse *parse)
332 {
333 	struct eqn_node	*p;
334 
335 	p = mandoc_calloc(1, sizeof(struct eqn_node));
336 
337 	p->parse = parse;
338 	p->eqn.ln = line;
339 	p->eqn.pos = pos;
340 	p->gsize = EQN_DEFSIZE;
341 
342 	return p;
343 }
344 
345 /*
346  * Find the key "key" of the give size within our eqn-defined values.
347  */
348 static struct eqn_def *
349 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
350 {
351 	int		 i;
352 
353 	for (i = 0; i < (int)ep->defsz; i++)
354 		if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
355 		    ep->defs[i].keysz, key, sz))
356 			return &ep->defs[i];
357 
358 	return NULL;
359 }
360 
361 /*
362  * Get the next token from the input stream using the given quote
363  * character.
364  * Optionally make any replacements.
365  */
366 static const char *
367 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
368 {
369 	char		*start, *next;
370 	int		 q, diff, lim;
371 	size_t		 ssz, dummy;
372 	struct eqn_def	*def;
373 
374 	if (NULL == sz)
375 		sz = &dummy;
376 
377 	lim = 0;
378 	ep->rew = ep->cur;
379 again:
380 	/* Prevent self-definitions. */
381 
382 	if (lim >= EQN_NEST_MAX) {
383 		mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
384 		    ep->eqn.ln, ep->eqn.pos, NULL);
385 		return NULL;
386 	}
387 
388 	ep->cur = ep->rew;
389 	start = &ep->data[(int)ep->cur];
390 	q = 0;
391 
392 	if ('\0' == *start)
393 		return NULL;
394 
395 	if (quote == *start) {
396 		ep->cur++;
397 		q = 1;
398 	}
399 
400 	start = &ep->data[(int)ep->cur];
401 
402 	if ( ! q) {
403 		if ('{' == *start || '}' == *start)
404 			ssz = 1;
405 		else
406 			ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
407 		next = start + (int)ssz;
408 		if ('\0' == *next)
409 			next = NULL;
410 	} else
411 		next = strchr(start, quote);
412 
413 	if (NULL != next) {
414 		*sz = (size_t)(next - start);
415 		ep->cur += *sz;
416 		if (q)
417 			ep->cur++;
418 		while (' ' == ep->data[(int)ep->cur] ||
419 		    '\t' == ep->data[(int)ep->cur] ||
420 		    '^' == ep->data[(int)ep->cur] ||
421 		    '~' == ep->data[(int)ep->cur])
422 			ep->cur++;
423 	} else {
424 		if (q)
425 			mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
426 			    ep->eqn.ln, ep->eqn.pos, NULL);
427 		next = strchr(start, '\0');
428 		*sz = (size_t)(next - start);
429 		ep->cur += *sz;
430 	}
431 
432 	/* Quotes aren't expanded for values. */
433 
434 	if (q || ! repl)
435 		return start;
436 
437 	if (NULL != (def = eqn_def_find(ep, start, *sz))) {
438 		diff = def->valsz - *sz;
439 
440 		if (def->valsz > *sz) {
441 			ep->sz += diff;
442 			ep->data = mandoc_realloc(ep->data, ep->sz + 1);
443 			ep->data[ep->sz] = '\0';
444 			start = &ep->data[(int)ep->rew];
445 		}
446 
447 		diff = def->valsz - *sz;
448 		memmove(start + *sz + diff, start + *sz,
449 		    (strlen(start) - *sz) + 1);
450 		memcpy(start, def->val, def->valsz);
451 		lim++;
452 		goto again;
453 	}
454 
455 	return start;
456 }
457 
458 /*
459  * Get the next delimited token using the default current quote
460  * character.
461  */
462 static const char *
463 eqn_nexttok(struct eqn_node *ep, size_t *sz)
464 {
465 
466 	return eqn_next(ep, '"', sz, 1);
467 }
468 
469 /*
470  * Get next token without replacement.
471  */
472 static const char *
473 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
474 {
475 
476 	return eqn_next(ep, '"', sz, 0);
477 }
478 
479 /*
480  * Parse a token from the stream of text.
481  * A token consists of one of the recognised eqn(7) strings.
482  * Strings are separated by delimiting marks.
483  * This returns EQN_TOK_EOF when there are no more tokens.
484  * If the token is an unrecognised string literal, then it returns
485  * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
486  * string.
487  * This must be later freed with free(3).
488  */
489 static enum eqn_tok
490 eqn_tok_parse(struct eqn_node *ep, char **p)
491 {
492 	const char	*start;
493 	size_t		 i, sz;
494 	int		 quoted;
495 
496 	if (NULL != p)
497 		*p = NULL;
498 
499 	quoted = ep->data[ep->cur] == '"';
500 
501 	if (NULL == (start = eqn_nexttok(ep, &sz)))
502 		return EQN_TOK_EOF;
503 
504 	if (quoted) {
505 		if (p != NULL)
506 			*p = mandoc_strndup(start, sz);
507 		return EQN_TOK__MAX;
508 	}
509 
510 	for (i = 0; i < EQN_TOK__MAX; i++) {
511 		if (NULL == eqn_toks[i])
512 			continue;
513 		if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
514 			break;
515 	}
516 
517 	if (i == EQN_TOK__MAX && NULL != p)
518 		*p = mandoc_strndup(start, sz);
519 
520 	return i;
521 }
522 
523 static void
524 eqn_box_free(struct eqn_box *bp)
525 {
526 
527 	if (bp->first)
528 		eqn_box_free(bp->first);
529 	if (bp->next)
530 		eqn_box_free(bp->next);
531 
532 	free(bp->text);
533 	free(bp->left);
534 	free(bp->right);
535 	free(bp->top);
536 	free(bp->bottom);
537 	free(bp);
538 }
539 
540 /*
541  * Allocate a box as the last child of the parent node.
542  */
543 static struct eqn_box *
544 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
545 {
546 	struct eqn_box	*bp;
547 
548 	bp = mandoc_calloc(1, sizeof(struct eqn_box));
549 	bp->parent = parent;
550 	bp->parent->args++;
551 	bp->expectargs = UINT_MAX;
552 	bp->size = ep->gsize;
553 
554 	if (NULL != parent->first) {
555 		parent->last->next = bp;
556 		bp->prev = parent->last;
557 	} else
558 		parent->first = bp;
559 
560 	parent->last = bp;
561 	return bp;
562 }
563 
564 /*
565  * Reparent the current last node (of the current parent) under a new
566  * EQN_SUBEXPR as the first element.
567  * Then return the new parent.
568  * The new EQN_SUBEXPR will have a two-child limit.
569  */
570 static struct eqn_box *
571 eqn_box_makebinary(struct eqn_node *ep,
572 	enum eqn_post pos, struct eqn_box *parent)
573 {
574 	struct eqn_box	*b, *newb;
575 
576 	assert(NULL != parent->last);
577 	b = parent->last;
578 	if (parent->last == parent->first)
579 		parent->first = NULL;
580 	parent->args--;
581 	parent->last = b->prev;
582 	b->prev = NULL;
583 	newb = eqn_box_alloc(ep, parent);
584 	newb->pos = pos;
585 	newb->type = EQN_SUBEXPR;
586 	newb->expectargs = 2;
587 	newb->args = 1;
588 	newb->first = newb->last = b;
589 	newb->first->next = NULL;
590 	b->parent = newb;
591 	return newb;
592 }
593 
594 /*
595  * Parse the "delim" control statement.
596  */
597 static void
598 eqn_delim(struct eqn_node *ep)
599 {
600 	const char	*start;
601 	size_t		 sz;
602 
603 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
604 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
605 		    ep->eqn.ln, ep->eqn.pos, "delim");
606 	else if (strncmp(start, "off", 3) == 0)
607 		ep->delim = 0;
608 	else if (strncmp(start, "on", 2) == 0) {
609 		if (ep->odelim && ep->cdelim)
610 			ep->delim = 1;
611 	} else if (start[1] != '\0') {
612 		ep->odelim = start[0];
613 		ep->cdelim = start[1];
614 		ep->delim = 1;
615 	}
616 }
617 
618 /*
619  * Undefine a previously-defined string.
620  */
621 static void
622 eqn_undef(struct eqn_node *ep)
623 {
624 	const char	*start;
625 	struct eqn_def	*def;
626 	size_t		 sz;
627 
628 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
629 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
630 		    ep->eqn.ln, ep->eqn.pos, "undef");
631 		return;
632 	}
633 	if ((def = eqn_def_find(ep, start, sz)) == NULL)
634 		return;
635 	free(def->key);
636 	free(def->val);
637 	def->key = def->val = NULL;
638 	def->keysz = def->valsz = 0;
639 }
640 
641 static void
642 eqn_def(struct eqn_node *ep)
643 {
644 	const char	*start;
645 	size_t		 sz;
646 	struct eqn_def	*def;
647 	int		 i;
648 
649 	if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
650 		mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
651 		    ep->eqn.ln, ep->eqn.pos, "define");
652 		return;
653 	}
654 
655 	/*
656 	 * Search for a key that already exists.
657 	 * Create a new key if none is found.
658 	 */
659 	if (NULL == (def = eqn_def_find(ep, start, sz))) {
660 		/* Find holes in string array. */
661 		for (i = 0; i < (int)ep->defsz; i++)
662 			if (0 == ep->defs[i].keysz)
663 				break;
664 
665 		if (i == (int)ep->defsz) {
666 			ep->defsz++;
667 			ep->defs = mandoc_reallocarray(ep->defs,
668 			    ep->defsz, sizeof(struct eqn_def));
669 			ep->defs[i].key = ep->defs[i].val = NULL;
670 		}
671 
672 		def = ep->defs + i;
673 		free(def->key);
674 		def->key = mandoc_strndup(start, sz);
675 		def->keysz = sz;
676 	}
677 
678 	start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
679 	if (start == NULL) {
680 		mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
681 		    ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
682 		free(def->key);
683 		free(def->val);
684 		def->key = def->val = NULL;
685 		def->keysz = def->valsz = 0;
686 		return;
687 	}
688 	free(def->val);
689 	def->val = mandoc_strndup(start, sz);
690 	def->valsz = sz;
691 }
692 
693 /*
694  * Recursively parse an eqn(7) expression.
695  */
696 static enum rofferr
697 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
698 {
699 	char		 sym[64];
700 	struct eqn_box	*cur;
701 	const char	*start;
702 	char		*p;
703 	size_t		 i, sz;
704 	enum eqn_tok	 tok, subtok;
705 	enum eqn_post	 pos;
706 	int		 size;
707 
708 	assert(parent != NULL);
709 
710 	/*
711 	 * Empty equation.
712 	 * Do not add it to the high-level syntax tree.
713 	 */
714 
715 	if (ep->data == NULL)
716 		return ROFF_IGN;
717 
718 next_tok:
719 	tok = eqn_tok_parse(ep, &p);
720 
721 this_tok:
722 	switch (tok) {
723 	case (EQN_TOK_UNDEF):
724 		eqn_undef(ep);
725 		break;
726 	case (EQN_TOK_NDEFINE):
727 	case (EQN_TOK_DEFINE):
728 		eqn_def(ep);
729 		break;
730 	case (EQN_TOK_TDEFINE):
731 		if (eqn_nextrawtok(ep, NULL) == NULL ||
732 		    eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
733 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
734 			    ep->eqn.ln, ep->eqn.pos, "tdefine");
735 		break;
736 	case (EQN_TOK_DELIM):
737 		eqn_delim(ep);
738 		break;
739 	case (EQN_TOK_GFONT):
740 		if (eqn_nextrawtok(ep, NULL) == NULL)
741 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
742 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
743 		break;
744 	case (EQN_TOK_MARK):
745 	case (EQN_TOK_LINEUP):
746 		/* Ignore these. */
747 		break;
748 	case (EQN_TOK_DYAD):
749 	case (EQN_TOK_VEC):
750 	case (EQN_TOK_UNDER):
751 	case (EQN_TOK_BAR):
752 	case (EQN_TOK_TILDE):
753 	case (EQN_TOK_HAT):
754 	case (EQN_TOK_DOT):
755 	case (EQN_TOK_DOTDOT):
756 		if (parent->last == NULL) {
757 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
758 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
759 			cur = eqn_box_alloc(ep, parent);
760 			cur->type = EQN_TEXT;
761 			cur->text = mandoc_strdup("");
762 		}
763 		parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
764 		parent->type = EQN_LISTONE;
765 		parent->expectargs = 1;
766 		switch (tok) {
767 		case (EQN_TOK_DOTDOT):
768 			strlcpy(sym, "\\[ad]", sizeof(sym));
769 			break;
770 		case (EQN_TOK_VEC):
771 			strlcpy(sym, "\\[->]", sizeof(sym));
772 			break;
773 		case (EQN_TOK_DYAD):
774 			strlcpy(sym, "\\[<>]", sizeof(sym));
775 			break;
776 		case (EQN_TOK_TILDE):
777 			strlcpy(sym, "\\[a~]", sizeof(sym));
778 			break;
779 		case (EQN_TOK_UNDER):
780 			strlcpy(sym, "\\[ul]", sizeof(sym));
781 			break;
782 		case (EQN_TOK_BAR):
783 			strlcpy(sym, "\\[rl]", sizeof(sym));
784 			break;
785 		case (EQN_TOK_DOT):
786 			strlcpy(sym, "\\[a.]", sizeof(sym));
787 			break;
788 		case (EQN_TOK_HAT):
789 			strlcpy(sym, "\\[ha]", sizeof(sym));
790 			break;
791 		default:
792 			abort();
793 		}
794 
795 		switch (tok) {
796 		case (EQN_TOK_DOTDOT):
797 		case (EQN_TOK_VEC):
798 		case (EQN_TOK_DYAD):
799 		case (EQN_TOK_TILDE):
800 		case (EQN_TOK_BAR):
801 		case (EQN_TOK_DOT):
802 		case (EQN_TOK_HAT):
803 			parent->top = mandoc_strdup(sym);
804 			break;
805 		case (EQN_TOK_UNDER):
806 			parent->bottom = mandoc_strdup(sym);
807 			break;
808 		default:
809 			abort();
810 		}
811 		parent = parent->parent;
812 		break;
813 	case (EQN_TOK_FWD):
814 	case (EQN_TOK_BACK):
815 	case (EQN_TOK_DOWN):
816 	case (EQN_TOK_UP):
817 		subtok = eqn_tok_parse(ep, NULL);
818 		if (subtok != EQN_TOK__MAX) {
819 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
820 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
821 			tok = subtok;
822 			goto this_tok;
823 		}
824 		break;
825 	case (EQN_TOK_FAT):
826 	case (EQN_TOK_ROMAN):
827 	case (EQN_TOK_ITALIC):
828 	case (EQN_TOK_BOLD):
829 		while (parent->args == parent->expectargs)
830 			parent = parent->parent;
831 		/*
832 		 * These values apply to the next word or sequence of
833 		 * words; thus, we mark that we'll have a child with
834 		 * exactly one of those.
835 		 */
836 		parent = eqn_box_alloc(ep, parent);
837 		parent->type = EQN_LISTONE;
838 		parent->expectargs = 1;
839 		switch (tok) {
840 		case (EQN_TOK_FAT):
841 			parent->font = EQNFONT_FAT;
842 			break;
843 		case (EQN_TOK_ROMAN):
844 			parent->font = EQNFONT_ROMAN;
845 			break;
846 		case (EQN_TOK_ITALIC):
847 			parent->font = EQNFONT_ITALIC;
848 			break;
849 		case (EQN_TOK_BOLD):
850 			parent->font = EQNFONT_BOLD;
851 			break;
852 		default:
853 			abort();
854 		}
855 		break;
856 	case (EQN_TOK_SIZE):
857 	case (EQN_TOK_GSIZE):
858 		/* Accept two values: integral size and a single. */
859 		if (NULL == (start = eqn_nexttok(ep, &sz))) {
860 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
861 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
862 			break;
863 		}
864 		size = mandoc_strntoi(start, sz, 10);
865 		if (-1 == size) {
866 			mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
867 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
868 			break;
869 		}
870 		if (EQN_TOK_GSIZE == tok) {
871 			ep->gsize = size;
872 			break;
873 		}
874 		parent = eqn_box_alloc(ep, parent);
875 		parent->type = EQN_LISTONE;
876 		parent->expectargs = 1;
877 		parent->size = size;
878 		break;
879 	case (EQN_TOK_FROM):
880 	case (EQN_TOK_TO):
881 	case (EQN_TOK_SUB):
882 	case (EQN_TOK_SUP):
883 		/*
884 		 * We have a left-right-associative expression.
885 		 * Repivot under a positional node, open a child scope
886 		 * and keep on reading.
887 		 */
888 		if (parent->last == NULL) {
889 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
890 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
891 			cur = eqn_box_alloc(ep, parent);
892 			cur->type = EQN_TEXT;
893 			cur->text = mandoc_strdup("");
894 		}
895 		/* Handle the "subsup" and "fromto" positions. */
896 		if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
897 			parent->expectargs = 3;
898 			parent->pos = EQNPOS_SUBSUP;
899 			break;
900 		}
901 		if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
902 			parent->expectargs = 3;
903 			parent->pos = EQNPOS_FROMTO;
904 			break;
905 		}
906 		switch (tok) {
907 		case (EQN_TOK_FROM):
908 			pos = EQNPOS_FROM;
909 			break;
910 		case (EQN_TOK_TO):
911 			pos = EQNPOS_TO;
912 			break;
913 		case (EQN_TOK_SUP):
914 			pos = EQNPOS_SUP;
915 			break;
916 		case (EQN_TOK_SUB):
917 			pos = EQNPOS_SUB;
918 			break;
919 		default:
920 			abort();
921 		}
922 		parent = eqn_box_makebinary(ep, pos, parent);
923 		break;
924 	case (EQN_TOK_SQRT):
925 		while (parent->args == parent->expectargs)
926 			parent = parent->parent;
927 		/*
928 		 * Accept a left-right-associative set of arguments just
929 		 * like sub and sup and friends but without rebalancing
930 		 * under a pivot.
931 		 */
932 		parent = eqn_box_alloc(ep, parent);
933 		parent->type = EQN_SUBEXPR;
934 		parent->pos = EQNPOS_SQRT;
935 		parent->expectargs = 1;
936 		break;
937 	case (EQN_TOK_OVER):
938 		/*
939 		 * We have a right-left-associative fraction.
940 		 * Close out anything that's currently open, then
941 		 * rebalance and continue reading.
942 		 */
943 		if (parent->last == NULL) {
944 			mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
945 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
946 			cur = eqn_box_alloc(ep, parent);
947 			cur->type = EQN_TEXT;
948 			cur->text = mandoc_strdup("");
949 		}
950 		while (EQN_SUBEXPR == parent->type)
951 			parent = parent->parent;
952 		parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
953 		break;
954 	case (EQN_TOK_RIGHT):
955 	case (EQN_TOK_BRACE_CLOSE):
956 		/*
957 		 * Close out the existing brace.
958 		 * FIXME: this is a shitty sentinel: we should really
959 		 * have a native EQN_BRACE type or whatnot.
960 		 */
961 		for (cur = parent; cur != NULL; cur = cur->parent)
962 			if (cur->type == EQN_LIST &&
963 			    (tok == EQN_TOK_BRACE_CLOSE ||
964 			     cur->left != NULL))
965 				break;
966 		if (cur == NULL) {
967 			mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
968 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
969 			break;
970 		}
971 		parent = cur;
972 		if (EQN_TOK_RIGHT == tok) {
973 			if (NULL == (start = eqn_nexttok(ep, &sz))) {
974 				mandoc_msg(MANDOCERR_REQ_EMPTY,
975 				    ep->parse, ep->eqn.ln,
976 				    ep->eqn.pos, eqn_toks[tok]);
977 				break;
978 			}
979 			/* Handling depends on right/left. */
980 			if (STRNEQ(start, sz, "ceiling", 7)) {
981 				strlcpy(sym, "\\[rc]", sizeof(sym));
982 				parent->right = mandoc_strdup(sym);
983 			} else if (STRNEQ(start, sz, "floor", 5)) {
984 				strlcpy(sym, "\\[rf]", sizeof(sym));
985 				parent->right = mandoc_strdup(sym);
986 			} else
987 				parent->right = mandoc_strndup(start, sz);
988 		}
989 		parent = parent->parent;
990 		if (tok == EQN_TOK_BRACE_CLOSE &&
991 		    (parent->type == EQN_PILE ||
992 		     parent->type == EQN_MATRIX))
993 			parent = parent->parent;
994 		/* Close out any "singleton" lists. */
995 		while (parent->type == EQN_LISTONE &&
996 		    parent->args == parent->expectargs)
997 			parent = parent->parent;
998 		break;
999 	case (EQN_TOK_BRACE_OPEN):
1000 	case (EQN_TOK_LEFT):
1001 		/*
1002 		 * If we already have something in the stack and we're
1003 		 * in an expression, then rewind til we're not any more
1004 		 * (just like with the text node).
1005 		 */
1006 		while (parent->args == parent->expectargs)
1007 			parent = parent->parent;
1008 		if (EQN_TOK_LEFT == tok &&
1009 		    (start = eqn_nexttok(ep, &sz)) == NULL) {
1010 			mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1011 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1012 			break;
1013 		}
1014 		parent = eqn_box_alloc(ep, parent);
1015 		parent->type = EQN_LIST;
1016 		if (EQN_TOK_LEFT == tok) {
1017 			if (STRNEQ(start, sz, "ceiling", 7)) {
1018 				strlcpy(sym, "\\[lc]", sizeof(sym));
1019 				parent->left = mandoc_strdup(sym);
1020 			} else if (STRNEQ(start, sz, "floor", 5)) {
1021 				strlcpy(sym, "\\[lf]", sizeof(sym));
1022 				parent->left = mandoc_strdup(sym);
1023 			} else
1024 				parent->left = mandoc_strndup(start, sz);
1025 		}
1026 		break;
1027 	case (EQN_TOK_PILE):
1028 	case (EQN_TOK_LPILE):
1029 	case (EQN_TOK_RPILE):
1030 	case (EQN_TOK_CPILE):
1031 	case (EQN_TOK_CCOL):
1032 	case (EQN_TOK_LCOL):
1033 	case (EQN_TOK_RCOL):
1034 		while (parent->args == parent->expectargs)
1035 			parent = parent->parent;
1036 		parent = eqn_box_alloc(ep, parent);
1037 		parent->type = EQN_PILE;
1038 		parent->expectargs = 1;
1039 		break;
1040 	case (EQN_TOK_ABOVE):
1041 		for (cur = parent; cur != NULL; cur = cur->parent)
1042 			if (cur->type == EQN_PILE)
1043 				break;
1044 		if (cur == NULL) {
1045 			mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1046 			    ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1047 			break;
1048 		}
1049 		parent = eqn_box_alloc(ep, cur);
1050 		parent->type = EQN_LIST;
1051 		break;
1052 	case (EQN_TOK_MATRIX):
1053 		while (parent->args == parent->expectargs)
1054 			parent = parent->parent;
1055 		parent = eqn_box_alloc(ep, parent);
1056 		parent->type = EQN_MATRIX;
1057 		parent->expectargs = 1;
1058 		break;
1059 	case (EQN_TOK_EOF):
1060 		/*
1061 		 * End of file!
1062 		 * TODO: make sure we're not in an open subexpression.
1063 		 */
1064 		return ROFF_EQN;
1065 	default:
1066 		assert(tok == EQN_TOK__MAX);
1067 		assert(NULL != p);
1068 		/*
1069 		 * If we already have something in the stack and we're
1070 		 * in an expression, then rewind til we're not any more.
1071 		 */
1072 		while (parent->args == parent->expectargs)
1073 			parent = parent->parent;
1074 		cur = eqn_box_alloc(ep, parent);
1075 		cur->type = EQN_TEXT;
1076 		for (i = 0; i < EQNSYM__MAX; i++)
1077 			if (0 == strcmp(eqnsyms[i].str, p)) {
1078 				(void)snprintf(sym, sizeof(sym),
1079 					"\\[%s]", eqnsyms[i].sym);
1080 				cur->text = mandoc_strdup(sym);
1081 				free(p);
1082 				break;
1083 			}
1084 
1085 		if (i == EQNSYM__MAX)
1086 			cur->text = p;
1087 		/*
1088 		 * Post-process list status.
1089 		 */
1090 		while (parent->type == EQN_LISTONE &&
1091 		    parent->args == parent->expectargs)
1092 			parent = parent->parent;
1093 		break;
1094 	}
1095 	goto next_tok;
1096 }
1097 
1098 enum rofferr
1099 eqn_end(struct eqn_node **epp)
1100 {
1101 	struct eqn_node	*ep;
1102 
1103 	ep = *epp;
1104 	*epp = NULL;
1105 
1106 	ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1107 	ep->eqn.root->expectargs = UINT_MAX;
1108 	return eqn_parse(ep, ep->eqn.root);
1109 }
1110 
1111 void
1112 eqn_free(struct eqn_node *p)
1113 {
1114 	int		 i;
1115 
1116 	eqn_box_free(p->eqn.root);
1117 
1118 	for (i = 0; i < (int)p->defsz; i++) {
1119 		free(p->defs[i].key);
1120 		free(p->defs[i].val);
1121 	}
1122 
1123 	free(p->data);
1124 	free(p->defs);
1125 	free(p);
1126 }
1127