1 /* $Id: eqn.c,v 1.58 2015/03/04 12:19:49 schwarze Exp $ */
2 /*
3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org>
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18 #include "config.h"
19
20 #include <sys/types.h>
21
22 #include <assert.h>
23 #include <limits.h>
24 #include <stdio.h>
25 #include <stdlib.h>
26 #include <string.h>
27 #include <time.h>
28
29 #include "mandoc.h"
30 #include "mandoc_aux.h"
31 #include "libmandoc.h"
32 #include "libroff.h"
33
34 #define EQN_NEST_MAX 128 /* maximum nesting of defines */
35 #define STRNEQ(p1, sz1, p2, sz2) \
36 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1)))
37
38 enum eqn_tok {
39 EQN_TOK_DYAD = 0,
40 EQN_TOK_VEC,
41 EQN_TOK_UNDER,
42 EQN_TOK_BAR,
43 EQN_TOK_TILDE,
44 EQN_TOK_HAT,
45 EQN_TOK_DOT,
46 EQN_TOK_DOTDOT,
47 EQN_TOK_FWD,
48 EQN_TOK_BACK,
49 EQN_TOK_DOWN,
50 EQN_TOK_UP,
51 EQN_TOK_FAT,
52 EQN_TOK_ROMAN,
53 EQN_TOK_ITALIC,
54 EQN_TOK_BOLD,
55 EQN_TOK_SIZE,
56 EQN_TOK_SUB,
57 EQN_TOK_SUP,
58 EQN_TOK_SQRT,
59 EQN_TOK_OVER,
60 EQN_TOK_FROM,
61 EQN_TOK_TO,
62 EQN_TOK_BRACE_OPEN,
63 EQN_TOK_BRACE_CLOSE,
64 EQN_TOK_GSIZE,
65 EQN_TOK_GFONT,
66 EQN_TOK_MARK,
67 EQN_TOK_LINEUP,
68 EQN_TOK_LEFT,
69 EQN_TOK_RIGHT,
70 EQN_TOK_PILE,
71 EQN_TOK_LPILE,
72 EQN_TOK_RPILE,
73 EQN_TOK_CPILE,
74 EQN_TOK_MATRIX,
75 EQN_TOK_CCOL,
76 EQN_TOK_LCOL,
77 EQN_TOK_RCOL,
78 EQN_TOK_DELIM,
79 EQN_TOK_DEFINE,
80 EQN_TOK_TDEFINE,
81 EQN_TOK_NDEFINE,
82 EQN_TOK_UNDEF,
83 EQN_TOK_EOF,
84 EQN_TOK_ABOVE,
85 EQN_TOK__MAX
86 };
87
88 static const char *eqn_toks[EQN_TOK__MAX] = {
89 "dyad", /* EQN_TOK_DYAD */
90 "vec", /* EQN_TOK_VEC */
91 "under", /* EQN_TOK_UNDER */
92 "bar", /* EQN_TOK_BAR */
93 "tilde", /* EQN_TOK_TILDE */
94 "hat", /* EQN_TOK_HAT */
95 "dot", /* EQN_TOK_DOT */
96 "dotdot", /* EQN_TOK_DOTDOT */
97 "fwd", /* EQN_TOK_FWD * */
98 "back", /* EQN_TOK_BACK */
99 "down", /* EQN_TOK_DOWN */
100 "up", /* EQN_TOK_UP */
101 "fat", /* EQN_TOK_FAT */
102 "roman", /* EQN_TOK_ROMAN */
103 "italic", /* EQN_TOK_ITALIC */
104 "bold", /* EQN_TOK_BOLD */
105 "size", /* EQN_TOK_SIZE */
106 "sub", /* EQN_TOK_SUB */
107 "sup", /* EQN_TOK_SUP */
108 "sqrt", /* EQN_TOK_SQRT */
109 "over", /* EQN_TOK_OVER */
110 "from", /* EQN_TOK_FROM */
111 "to", /* EQN_TOK_TO */
112 "{", /* EQN_TOK_BRACE_OPEN */
113 "}", /* EQN_TOK_BRACE_CLOSE */
114 "gsize", /* EQN_TOK_GSIZE */
115 "gfont", /* EQN_TOK_GFONT */
116 "mark", /* EQN_TOK_MARK */
117 "lineup", /* EQN_TOK_LINEUP */
118 "left", /* EQN_TOK_LEFT */
119 "right", /* EQN_TOK_RIGHT */
120 "pile", /* EQN_TOK_PILE */
121 "lpile", /* EQN_TOK_LPILE */
122 "rpile", /* EQN_TOK_RPILE */
123 "cpile", /* EQN_TOK_CPILE */
124 "matrix", /* EQN_TOK_MATRIX */
125 "ccol", /* EQN_TOK_CCOL */
126 "lcol", /* EQN_TOK_LCOL */
127 "rcol", /* EQN_TOK_RCOL */
128 "delim", /* EQN_TOK_DELIM */
129 "define", /* EQN_TOK_DEFINE */
130 "tdefine", /* EQN_TOK_TDEFINE */
131 "ndefine", /* EQN_TOK_NDEFINE */
132 "undef", /* EQN_TOK_UNDEF */
133 NULL, /* EQN_TOK_EOF */
134 "above", /* EQN_TOK_ABOVE */
135 };
136
137 enum eqn_symt {
138 EQNSYM_alpha,
139 EQNSYM_beta,
140 EQNSYM_chi,
141 EQNSYM_delta,
142 EQNSYM_epsilon,
143 EQNSYM_eta,
144 EQNSYM_gamma,
145 EQNSYM_iota,
146 EQNSYM_kappa,
147 EQNSYM_lambda,
148 EQNSYM_mu,
149 EQNSYM_nu,
150 EQNSYM_omega,
151 EQNSYM_omicron,
152 EQNSYM_phi,
153 EQNSYM_pi,
154 EQNSYM_ps,
155 EQNSYM_rho,
156 EQNSYM_sigma,
157 EQNSYM_tau,
158 EQNSYM_theta,
159 EQNSYM_upsilon,
160 EQNSYM_xi,
161 EQNSYM_zeta,
162 EQNSYM_DELTA,
163 EQNSYM_GAMMA,
164 EQNSYM_LAMBDA,
165 EQNSYM_OMEGA,
166 EQNSYM_PHI,
167 EQNSYM_PI,
168 EQNSYM_PSI,
169 EQNSYM_SIGMA,
170 EQNSYM_THETA,
171 EQNSYM_UPSILON,
172 EQNSYM_XI,
173 EQNSYM_inter,
174 EQNSYM_union,
175 EQNSYM_prod,
176 EQNSYM_int,
177 EQNSYM_sum,
178 EQNSYM_grad,
179 EQNSYM_del,
180 EQNSYM_times,
181 EQNSYM_cdot,
182 EQNSYM_nothing,
183 EQNSYM_approx,
184 EQNSYM_prime,
185 EQNSYM_half,
186 EQNSYM_partial,
187 EQNSYM_inf,
188 EQNSYM_muchgreat,
189 EQNSYM_muchless,
190 EQNSYM_larrow,
191 EQNSYM_rarrow,
192 EQNSYM_pm,
193 EQNSYM_nequal,
194 EQNSYM_equiv,
195 EQNSYM_lessequal,
196 EQNSYM_moreequal,
197 EQNSYM_minus,
198 EQNSYM__MAX
199 };
200
201 struct eqnsym {
202 const char *str;
203 const char *sym;
204 };
205
206 static const struct eqnsym eqnsyms[EQNSYM__MAX] = {
207 { "alpha", "*a" }, /* EQNSYM_alpha */
208 { "beta", "*b" }, /* EQNSYM_beta */
209 { "chi", "*x" }, /* EQNSYM_chi */
210 { "delta", "*d" }, /* EQNSYM_delta */
211 { "epsilon", "*e" }, /* EQNSYM_epsilon */
212 { "eta", "*y" }, /* EQNSYM_eta */
213 { "gamma", "*g" }, /* EQNSYM_gamma */
214 { "iota", "*i" }, /* EQNSYM_iota */
215 { "kappa", "*k" }, /* EQNSYM_kappa */
216 { "lambda", "*l" }, /* EQNSYM_lambda */
217 { "mu", "*m" }, /* EQNSYM_mu */
218 { "nu", "*n" }, /* EQNSYM_nu */
219 { "omega", "*w" }, /* EQNSYM_omega */
220 { "omicron", "*o" }, /* EQNSYM_omicron */
221 { "phi", "*f" }, /* EQNSYM_phi */
222 { "pi", "*p" }, /* EQNSYM_pi */
223 { "psi", "*q" }, /* EQNSYM_psi */
224 { "rho", "*r" }, /* EQNSYM_rho */
225 { "sigma", "*s" }, /* EQNSYM_sigma */
226 { "tau", "*t" }, /* EQNSYM_tau */
227 { "theta", "*h" }, /* EQNSYM_theta */
228 { "upsilon", "*u" }, /* EQNSYM_upsilon */
229 { "xi", "*c" }, /* EQNSYM_xi */
230 { "zeta", "*z" }, /* EQNSYM_zeta */
231 { "DELTA", "*D" }, /* EQNSYM_DELTA */
232 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */
233 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */
234 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */
235 { "PHI", "*F" }, /* EQNSYM_PHI */
236 { "PI", "*P" }, /* EQNSYM_PI */
237 { "PSI", "*Q" }, /* EQNSYM_PSI */
238 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */
239 { "THETA", "*H" }, /* EQNSYM_THETA */
240 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */
241 { "XI", "*C" }, /* EQNSYM_XI */
242 { "inter", "ca" }, /* EQNSYM_inter */
243 { "union", "cu" }, /* EQNSYM_union */
244 { "prod", "product" }, /* EQNSYM_prod */
245 { "int", "integral" }, /* EQNSYM_int */
246 { "sum", "sum" }, /* EQNSYM_sum */
247 { "grad", "gr" }, /* EQNSYM_grad */
248 { "del", "gr" }, /* EQNSYM_del */
249 { "times", "mu" }, /* EQNSYM_times */
250 { "cdot", "pc" }, /* EQNSYM_cdot */
251 { "nothing", "&" }, /* EQNSYM_nothing */
252 { "approx", "~~" }, /* EQNSYM_approx */
253 { "prime", "fm" }, /* EQNSYM_prime */
254 { "half", "12" }, /* EQNSYM_half */
255 { "partial", "pd" }, /* EQNSYM_partial */
256 { "inf", "if" }, /* EQNSYM_inf */
257 { ">>", ">>" }, /* EQNSYM_muchgreat */
258 { "<<", "<<" }, /* EQNSYM_muchless */
259 { "<-", "<-" }, /* EQNSYM_larrow */
260 { "->", "->" }, /* EQNSYM_rarrow */
261 { "+-", "+-" }, /* EQNSYM_pm */
262 { "!=", "!=" }, /* EQNSYM_nequal */
263 { "==", "==" }, /* EQNSYM_equiv */
264 { "<=", "<=" }, /* EQNSYM_lessequal */
265 { ">=", ">=" }, /* EQNSYM_moreequal */
266 { "-", "mi" }, /* EQNSYM_minus */
267 };
268
269 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *);
270 static void eqn_box_free(struct eqn_box *);
271 static struct eqn_box *eqn_box_makebinary(struct eqn_node *,
272 enum eqn_post, struct eqn_box *);
273 static void eqn_def(struct eqn_node *);
274 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t);
275 static void eqn_delim(struct eqn_node *);
276 static const char *eqn_next(struct eqn_node *, char, size_t *, int);
277 static const char *eqn_nextrawtok(struct eqn_node *, size_t *);
278 static const char *eqn_nexttok(struct eqn_node *, size_t *);
279 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *);
280 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **);
281 static void eqn_undef(struct eqn_node *);
282
283
284 enum rofferr
eqn_read(struct eqn_node ** epp,int ln,const char * p,int pos,int * offs)285 eqn_read(struct eqn_node **epp, int ln,
286 const char *p, int pos, int *offs)
287 {
288 size_t sz;
289 struct eqn_node *ep;
290 enum rofferr er;
291
292 ep = *epp;
293
294 /*
295 * If we're the terminating mark, unset our equation status and
296 * validate the full equation.
297 */
298
299 if (0 == strncmp(p, ".EN", 3)) {
300 er = eqn_end(epp);
301 p += 3;
302 while (' ' == *p || '\t' == *p)
303 p++;
304 if ('\0' == *p)
305 return(er);
306 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse,
307 ln, pos, "EN %s", p);
308 return(er);
309 }
310
311 /*
312 * Build up the full string, replacing all newlines with regular
313 * whitespace.
314 */
315
316 sz = strlen(p + pos) + 1;
317 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1);
318
319 /* First invocation: nil terminate the string. */
320
321 if (0 == ep->sz)
322 *ep->data = '\0';
323
324 ep->sz += sz;
325 strlcat(ep->data, p + pos, ep->sz + 1);
326 strlcat(ep->data, " ", ep->sz + 1);
327 return(ROFF_IGN);
328 }
329
330 struct eqn_node *
eqn_alloc(int pos,int line,struct mparse * parse)331 eqn_alloc(int pos, int line, struct mparse *parse)
332 {
333 struct eqn_node *p;
334
335 p = mandoc_calloc(1, sizeof(struct eqn_node));
336
337 p->parse = parse;
338 p->eqn.ln = line;
339 p->eqn.pos = pos;
340 p->gsize = EQN_DEFSIZE;
341
342 return(p);
343 }
344
345 /*
346 * Find the key "key" of the give size within our eqn-defined values.
347 */
348 static struct eqn_def *
eqn_def_find(struct eqn_node * ep,const char * key,size_t sz)349 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz)
350 {
351 int i;
352
353 for (i = 0; i < (int)ep->defsz; i++)
354 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key,
355 ep->defs[i].keysz, key, sz))
356 return(&ep->defs[i]);
357
358 return(NULL);
359 }
360
361 /*
362 * Get the next token from the input stream using the given quote
363 * character.
364 * Optionally make any replacements.
365 */
366 static const char *
eqn_next(struct eqn_node * ep,char quote,size_t * sz,int repl)367 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl)
368 {
369 char *start, *next;
370 int q, diff, lim;
371 size_t ssz, dummy;
372 struct eqn_def *def;
373
374 if (NULL == sz)
375 sz = &dummy;
376
377 lim = 0;
378 ep->rew = ep->cur;
379 again:
380 /* Prevent self-definitions. */
381
382 if (lim >= EQN_NEST_MAX) {
383 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse,
384 ep->eqn.ln, ep->eqn.pos, NULL);
385 return(NULL);
386 }
387
388 ep->cur = ep->rew;
389 start = &ep->data[(int)ep->cur];
390 q = 0;
391
392 if ('\0' == *start)
393 return(NULL);
394
395 if (quote == *start) {
396 ep->cur++;
397 q = 1;
398 }
399
400 start = &ep->data[(int)ep->cur];
401
402 if ( ! q) {
403 if ('{' == *start || '}' == *start)
404 ssz = 1;
405 else
406 ssz = strcspn(start + 1, " ^~\"{}\t") + 1;
407 next = start + (int)ssz;
408 if ('\0' == *next)
409 next = NULL;
410 } else
411 next = strchr(start, quote);
412
413 if (NULL != next) {
414 *sz = (size_t)(next - start);
415 ep->cur += *sz;
416 if (q)
417 ep->cur++;
418 while (' ' == ep->data[(int)ep->cur] ||
419 '\t' == ep->data[(int)ep->cur] ||
420 '^' == ep->data[(int)ep->cur] ||
421 '~' == ep->data[(int)ep->cur])
422 ep->cur++;
423 } else {
424 if (q)
425 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse,
426 ep->eqn.ln, ep->eqn.pos, NULL);
427 next = strchr(start, '\0');
428 *sz = (size_t)(next - start);
429 ep->cur += *sz;
430 }
431
432 /* Quotes aren't expanded for values. */
433
434 if (q || ! repl)
435 return(start);
436
437 if (NULL != (def = eqn_def_find(ep, start, *sz))) {
438 diff = def->valsz - *sz;
439
440 if (def->valsz > *sz) {
441 ep->sz += diff;
442 ep->data = mandoc_realloc(ep->data, ep->sz + 1);
443 ep->data[ep->sz] = '\0';
444 start = &ep->data[(int)ep->rew];
445 }
446
447 diff = def->valsz - *sz;
448 memmove(start + *sz + diff, start + *sz,
449 (strlen(start) - *sz) + 1);
450 memcpy(start, def->val, def->valsz);
451 goto again;
452 }
453
454 return(start);
455 }
456
457 /*
458 * Get the next delimited token using the default current quote
459 * character.
460 */
461 static const char *
eqn_nexttok(struct eqn_node * ep,size_t * sz)462 eqn_nexttok(struct eqn_node *ep, size_t *sz)
463 {
464
465 return(eqn_next(ep, '"', sz, 1));
466 }
467
468 /*
469 * Get next token without replacement.
470 */
471 static const char *
eqn_nextrawtok(struct eqn_node * ep,size_t * sz)472 eqn_nextrawtok(struct eqn_node *ep, size_t *sz)
473 {
474
475 return(eqn_next(ep, '"', sz, 0));
476 }
477
478 /*
479 * Parse a token from the stream of text.
480 * A token consists of one of the recognised eqn(7) strings.
481 * Strings are separated by delimiting marks.
482 * This returns EQN_TOK_EOF when there are no more tokens.
483 * If the token is an unrecognised string literal, then it returns
484 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated
485 * string.
486 * This must be later freed with free(3).
487 */
488 static enum eqn_tok
eqn_tok_parse(struct eqn_node * ep,char ** p)489 eqn_tok_parse(struct eqn_node *ep, char **p)
490 {
491 const char *start;
492 size_t i, sz;
493 int quoted;
494
495 if (NULL != p)
496 *p = NULL;
497
498 quoted = ep->data[ep->cur] == '"';
499
500 if (NULL == (start = eqn_nexttok(ep, &sz)))
501 return(EQN_TOK_EOF);
502
503 if (quoted) {
504 if (p != NULL)
505 *p = mandoc_strndup(start, sz);
506 return(EQN_TOK__MAX);
507 }
508
509 for (i = 0; i < EQN_TOK__MAX; i++) {
510 if (NULL == eqn_toks[i])
511 continue;
512 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i])))
513 break;
514 }
515
516 if (i == EQN_TOK__MAX && NULL != p)
517 *p = mandoc_strndup(start, sz);
518
519 return(i);
520 }
521
522 static void
eqn_box_free(struct eqn_box * bp)523 eqn_box_free(struct eqn_box *bp)
524 {
525
526 if (bp->first)
527 eqn_box_free(bp->first);
528 if (bp->next)
529 eqn_box_free(bp->next);
530
531 free(bp->text);
532 free(bp->left);
533 free(bp->right);
534 free(bp->top);
535 free(bp->bottom);
536 free(bp);
537 }
538
539 /*
540 * Allocate a box as the last child of the parent node.
541 */
542 static struct eqn_box *
eqn_box_alloc(struct eqn_node * ep,struct eqn_box * parent)543 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent)
544 {
545 struct eqn_box *bp;
546
547 bp = mandoc_calloc(1, sizeof(struct eqn_box));
548 bp->parent = parent;
549 bp->parent->args++;
550 bp->expectargs = UINT_MAX;
551 bp->size = ep->gsize;
552
553 if (NULL != parent->first) {
554 parent->last->next = bp;
555 bp->prev = parent->last;
556 } else
557 parent->first = bp;
558
559 parent->last = bp;
560 return(bp);
561 }
562
563 /*
564 * Reparent the current last node (of the current parent) under a new
565 * EQN_SUBEXPR as the first element.
566 * Then return the new parent.
567 * The new EQN_SUBEXPR will have a two-child limit.
568 */
569 static struct eqn_box *
eqn_box_makebinary(struct eqn_node * ep,enum eqn_post pos,struct eqn_box * parent)570 eqn_box_makebinary(struct eqn_node *ep,
571 enum eqn_post pos, struct eqn_box *parent)
572 {
573 struct eqn_box *b, *newb;
574
575 assert(NULL != parent->last);
576 b = parent->last;
577 if (parent->last == parent->first)
578 parent->first = NULL;
579 parent->args--;
580 parent->last = b->prev;
581 b->prev = NULL;
582 newb = eqn_box_alloc(ep, parent);
583 newb->pos = pos;
584 newb->type = EQN_SUBEXPR;
585 newb->expectargs = 2;
586 newb->args = 1;
587 newb->first = newb->last = b;
588 newb->first->next = NULL;
589 b->parent = newb;
590 return(newb);
591 }
592
593 /*
594 * Parse the "delim" control statement.
595 */
596 static void
eqn_delim(struct eqn_node * ep)597 eqn_delim(struct eqn_node *ep)
598 {
599 const char *start;
600 size_t sz;
601
602 if ((start = eqn_nextrawtok(ep, &sz)) == NULL)
603 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
604 ep->eqn.ln, ep->eqn.pos, "delim");
605 else if (strncmp(start, "off", 3) == 0)
606 ep->delim = 0;
607 else if (strncmp(start, "on", 2) == 0) {
608 if (ep->odelim && ep->cdelim)
609 ep->delim = 1;
610 } else if (start[1] != '\0') {
611 ep->odelim = start[0];
612 ep->cdelim = start[1];
613 ep->delim = 1;
614 }
615 }
616
617 /*
618 * Undefine a previously-defined string.
619 */
620 static void
eqn_undef(struct eqn_node * ep)621 eqn_undef(struct eqn_node *ep)
622 {
623 const char *start;
624 struct eqn_def *def;
625 size_t sz;
626
627 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
628 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
629 ep->eqn.ln, ep->eqn.pos, "undef");
630 return;
631 }
632 if ((def = eqn_def_find(ep, start, sz)) == NULL)
633 return;
634 free(def->key);
635 free(def->val);
636 def->key = def->val = NULL;
637 def->keysz = def->valsz = 0;
638 }
639
640 static void
eqn_def(struct eqn_node * ep)641 eqn_def(struct eqn_node *ep)
642 {
643 const char *start;
644 size_t sz;
645 struct eqn_def *def;
646 int i;
647
648 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) {
649 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
650 ep->eqn.ln, ep->eqn.pos, "define");
651 return;
652 }
653
654 /*
655 * Search for a key that already exists.
656 * Create a new key if none is found.
657 */
658 if (NULL == (def = eqn_def_find(ep, start, sz))) {
659 /* Find holes in string array. */
660 for (i = 0; i < (int)ep->defsz; i++)
661 if (0 == ep->defs[i].keysz)
662 break;
663
664 if (i == (int)ep->defsz) {
665 ep->defsz++;
666 ep->defs = mandoc_reallocarray(ep->defs,
667 ep->defsz, sizeof(struct eqn_def));
668 ep->defs[i].key = ep->defs[i].val = NULL;
669 }
670
671 def = ep->defs + i;
672 free(def->key);
673 def->key = mandoc_strndup(start, sz);
674 def->keysz = sz;
675 }
676
677 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0);
678 if (start == NULL) {
679 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse,
680 ep->eqn.ln, ep->eqn.pos, "define %s", def->key);
681 free(def->key);
682 free(def->val);
683 def->key = def->val = NULL;
684 def->keysz = def->valsz = 0;
685 return;
686 }
687 free(def->val);
688 def->val = mandoc_strndup(start, sz);
689 def->valsz = sz;
690 }
691
692 /*
693 * Recursively parse an eqn(7) expression.
694 */
695 static enum rofferr
eqn_parse(struct eqn_node * ep,struct eqn_box * parent)696 eqn_parse(struct eqn_node *ep, struct eqn_box *parent)
697 {
698 char sym[64];
699 struct eqn_box *cur;
700 const char *start;
701 char *p;
702 size_t i, sz;
703 enum eqn_tok tok, subtok;
704 enum eqn_post pos;
705 int size;
706
707 assert(parent != NULL);
708
709 /*
710 * Empty equation.
711 * Do not add it to the high-level syntax tree.
712 */
713
714 if (ep->data == NULL)
715 return(ROFF_IGN);
716
717 next_tok:
718 tok = eqn_tok_parse(ep, &p);
719
720 this_tok:
721 switch (tok) {
722 case (EQN_TOK_UNDEF):
723 eqn_undef(ep);
724 break;
725 case (EQN_TOK_NDEFINE):
726 case (EQN_TOK_DEFINE):
727 eqn_def(ep);
728 break;
729 case (EQN_TOK_TDEFINE):
730 if (eqn_nextrawtok(ep, NULL) == NULL ||
731 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL)
732 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
733 ep->eqn.ln, ep->eqn.pos, "tdefine");
734 break;
735 case (EQN_TOK_DELIM):
736 eqn_delim(ep);
737 break;
738 case (EQN_TOK_GFONT):
739 if (eqn_nextrawtok(ep, NULL) == NULL)
740 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
741 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
742 break;
743 case (EQN_TOK_MARK):
744 case (EQN_TOK_LINEUP):
745 /* Ignore these. */
746 break;
747 case (EQN_TOK_DYAD):
748 case (EQN_TOK_VEC):
749 case (EQN_TOK_UNDER):
750 case (EQN_TOK_BAR):
751 case (EQN_TOK_TILDE):
752 case (EQN_TOK_HAT):
753 case (EQN_TOK_DOT):
754 case (EQN_TOK_DOTDOT):
755 if (parent->last == NULL) {
756 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
757 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
758 cur = eqn_box_alloc(ep, parent);
759 cur->type = EQN_TEXT;
760 cur->text = mandoc_strdup("");
761 }
762 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent);
763 parent->type = EQN_LISTONE;
764 parent->expectargs = 1;
765 switch (tok) {
766 case (EQN_TOK_DOTDOT):
767 strlcpy(sym, "\\[ad]", sizeof(sym));
768 break;
769 case (EQN_TOK_VEC):
770 strlcpy(sym, "\\[->]", sizeof(sym));
771 break;
772 case (EQN_TOK_DYAD):
773 strlcpy(sym, "\\[<>]", sizeof(sym));
774 break;
775 case (EQN_TOK_TILDE):
776 strlcpy(sym, "\\[a~]", sizeof(sym));
777 break;
778 case (EQN_TOK_UNDER):
779 strlcpy(sym, "\\[ul]", sizeof(sym));
780 break;
781 case (EQN_TOK_BAR):
782 strlcpy(sym, "\\[rl]", sizeof(sym));
783 break;
784 case (EQN_TOK_DOT):
785 strlcpy(sym, "\\[a.]", sizeof(sym));
786 break;
787 case (EQN_TOK_HAT):
788 strlcpy(sym, "\\[ha]", sizeof(sym));
789 break;
790 default:
791 abort();
792 }
793
794 switch (tok) {
795 case (EQN_TOK_DOTDOT):
796 case (EQN_TOK_VEC):
797 case (EQN_TOK_DYAD):
798 case (EQN_TOK_TILDE):
799 case (EQN_TOK_BAR):
800 case (EQN_TOK_DOT):
801 case (EQN_TOK_HAT):
802 parent->top = mandoc_strdup(sym);
803 break;
804 case (EQN_TOK_UNDER):
805 parent->bottom = mandoc_strdup(sym);
806 break;
807 default:
808 abort();
809 }
810 parent = parent->parent;
811 break;
812 case (EQN_TOK_FWD):
813 case (EQN_TOK_BACK):
814 case (EQN_TOK_DOWN):
815 case (EQN_TOK_UP):
816 subtok = eqn_tok_parse(ep, NULL);
817 if (subtok != EQN_TOK__MAX) {
818 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
819 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
820 tok = subtok;
821 goto this_tok;
822 }
823 break;
824 case (EQN_TOK_FAT):
825 case (EQN_TOK_ROMAN):
826 case (EQN_TOK_ITALIC):
827 case (EQN_TOK_BOLD):
828 while (parent->args == parent->expectargs)
829 parent = parent->parent;
830 /*
831 * These values apply to the next word or sequence of
832 * words; thus, we mark that we'll have a child with
833 * exactly one of those.
834 */
835 parent = eqn_box_alloc(ep, parent);
836 parent->type = EQN_LISTONE;
837 parent->expectargs = 1;
838 switch (tok) {
839 case (EQN_TOK_FAT):
840 parent->font = EQNFONT_FAT;
841 break;
842 case (EQN_TOK_ROMAN):
843 parent->font = EQNFONT_ROMAN;
844 break;
845 case (EQN_TOK_ITALIC):
846 parent->font = EQNFONT_ITALIC;
847 break;
848 case (EQN_TOK_BOLD):
849 parent->font = EQNFONT_BOLD;
850 break;
851 default:
852 abort();
853 }
854 break;
855 case (EQN_TOK_SIZE):
856 case (EQN_TOK_GSIZE):
857 /* Accept two values: integral size and a single. */
858 if (NULL == (start = eqn_nexttok(ep, &sz))) {
859 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
860 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
861 break;
862 }
863 size = mandoc_strntoi(start, sz, 10);
864 if (-1 == size) {
865 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse,
866 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
867 break;
868 }
869 if (EQN_TOK_GSIZE == tok) {
870 ep->gsize = size;
871 break;
872 }
873 parent = eqn_box_alloc(ep, parent);
874 parent->type = EQN_LISTONE;
875 parent->expectargs = 1;
876 parent->size = size;
877 break;
878 case (EQN_TOK_FROM):
879 case (EQN_TOK_TO):
880 case (EQN_TOK_SUB):
881 case (EQN_TOK_SUP):
882 /*
883 * We have a left-right-associative expression.
884 * Repivot under a positional node, open a child scope
885 * and keep on reading.
886 */
887 if (parent->last == NULL) {
888 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
889 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
890 cur = eqn_box_alloc(ep, parent);
891 cur->type = EQN_TEXT;
892 cur->text = mandoc_strdup("");
893 }
894 /* Handle the "subsup" and "fromto" positions. */
895 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) {
896 parent->expectargs = 3;
897 parent->pos = EQNPOS_SUBSUP;
898 break;
899 }
900 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) {
901 parent->expectargs = 3;
902 parent->pos = EQNPOS_FROMTO;
903 break;
904 }
905 switch (tok) {
906 case (EQN_TOK_FROM):
907 pos = EQNPOS_FROM;
908 break;
909 case (EQN_TOK_TO):
910 pos = EQNPOS_TO;
911 break;
912 case (EQN_TOK_SUP):
913 pos = EQNPOS_SUP;
914 break;
915 case (EQN_TOK_SUB):
916 pos = EQNPOS_SUB;
917 break;
918 default:
919 abort();
920 }
921 parent = eqn_box_makebinary(ep, pos, parent);
922 break;
923 case (EQN_TOK_SQRT):
924 while (parent->args == parent->expectargs)
925 parent = parent->parent;
926 /*
927 * Accept a left-right-associative set of arguments just
928 * like sub and sup and friends but without rebalancing
929 * under a pivot.
930 */
931 parent = eqn_box_alloc(ep, parent);
932 parent->type = EQN_SUBEXPR;
933 parent->pos = EQNPOS_SQRT;
934 parent->expectargs = 1;
935 break;
936 case (EQN_TOK_OVER):
937 /*
938 * We have a right-left-associative fraction.
939 * Close out anything that's currently open, then
940 * rebalance and continue reading.
941 */
942 if (parent->last == NULL) {
943 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse,
944 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
945 cur = eqn_box_alloc(ep, parent);
946 cur->type = EQN_TEXT;
947 cur->text = mandoc_strdup("");
948 }
949 while (EQN_SUBEXPR == parent->type)
950 parent = parent->parent;
951 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent);
952 break;
953 case (EQN_TOK_RIGHT):
954 case (EQN_TOK_BRACE_CLOSE):
955 /*
956 * Close out the existing brace.
957 * FIXME: this is a shitty sentinel: we should really
958 * have a native EQN_BRACE type or whatnot.
959 */
960 for (cur = parent; cur != NULL; cur = cur->parent)
961 if (cur->type == EQN_LIST &&
962 (tok == EQN_TOK_BRACE_CLOSE ||
963 cur->left != NULL))
964 break;
965 if (cur == NULL) {
966 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse,
967 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
968 break;
969 }
970 parent = cur;
971 if (EQN_TOK_RIGHT == tok) {
972 if (NULL == (start = eqn_nexttok(ep, &sz))) {
973 mandoc_msg(MANDOCERR_REQ_EMPTY,
974 ep->parse, ep->eqn.ln,
975 ep->eqn.pos, eqn_toks[tok]);
976 break;
977 }
978 /* Handling depends on right/left. */
979 if (STRNEQ(start, sz, "ceiling", 7)) {
980 strlcpy(sym, "\\[rc]", sizeof(sym));
981 parent->right = mandoc_strdup(sym);
982 } else if (STRNEQ(start, sz, "floor", 5)) {
983 strlcpy(sym, "\\[rf]", sizeof(sym));
984 parent->right = mandoc_strdup(sym);
985 } else
986 parent->right = mandoc_strndup(start, sz);
987 }
988 parent = parent->parent;
989 if (EQN_TOK_BRACE_CLOSE == tok && parent &&
990 (parent->type == EQN_PILE ||
991 parent->type == EQN_MATRIX))
992 parent = parent->parent;
993 /* Close out any "singleton" lists. */
994 while (parent->type == EQN_LISTONE &&
995 parent->args == parent->expectargs)
996 parent = parent->parent;
997 break;
998 case (EQN_TOK_BRACE_OPEN):
999 case (EQN_TOK_LEFT):
1000 /*
1001 * If we already have something in the stack and we're
1002 * in an expression, then rewind til we're not any more
1003 * (just like with the text node).
1004 */
1005 while (parent->args == parent->expectargs)
1006 parent = parent->parent;
1007 if (EQN_TOK_LEFT == tok &&
1008 (start = eqn_nexttok(ep, &sz)) == NULL) {
1009 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse,
1010 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1011 break;
1012 }
1013 parent = eqn_box_alloc(ep, parent);
1014 parent->type = EQN_LIST;
1015 if (EQN_TOK_LEFT == tok) {
1016 if (STRNEQ(start, sz, "ceiling", 7)) {
1017 strlcpy(sym, "\\[lc]", sizeof(sym));
1018 parent->left = mandoc_strdup(sym);
1019 } else if (STRNEQ(start, sz, "floor", 5)) {
1020 strlcpy(sym, "\\[lf]", sizeof(sym));
1021 parent->left = mandoc_strdup(sym);
1022 } else
1023 parent->left = mandoc_strndup(start, sz);
1024 }
1025 break;
1026 case (EQN_TOK_PILE):
1027 case (EQN_TOK_LPILE):
1028 case (EQN_TOK_RPILE):
1029 case (EQN_TOK_CPILE):
1030 case (EQN_TOK_CCOL):
1031 case (EQN_TOK_LCOL):
1032 case (EQN_TOK_RCOL):
1033 while (parent->args == parent->expectargs)
1034 parent = parent->parent;
1035 parent = eqn_box_alloc(ep, parent);
1036 parent->type = EQN_PILE;
1037 parent->expectargs = 1;
1038 break;
1039 case (EQN_TOK_ABOVE):
1040 for (cur = parent; cur != NULL; cur = cur->parent)
1041 if (cur->type == EQN_PILE)
1042 break;
1043 if (cur == NULL) {
1044 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse,
1045 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]);
1046 break;
1047 }
1048 parent = eqn_box_alloc(ep, cur);
1049 parent->type = EQN_LIST;
1050 break;
1051 case (EQN_TOK_MATRIX):
1052 while (parent->args == parent->expectargs)
1053 parent = parent->parent;
1054 parent = eqn_box_alloc(ep, parent);
1055 parent->type = EQN_MATRIX;
1056 parent->expectargs = 1;
1057 break;
1058 case (EQN_TOK_EOF):
1059 /*
1060 * End of file!
1061 * TODO: make sure we're not in an open subexpression.
1062 */
1063 return(ROFF_EQN);
1064 default:
1065 assert(tok == EQN_TOK__MAX);
1066 assert(NULL != p);
1067 /*
1068 * If we already have something in the stack and we're
1069 * in an expression, then rewind til we're not any more.
1070 */
1071 while (parent->args == parent->expectargs)
1072 parent = parent->parent;
1073 cur = eqn_box_alloc(ep, parent);
1074 cur->type = EQN_TEXT;
1075 for (i = 0; i < EQNSYM__MAX; i++)
1076 if (0 == strcmp(eqnsyms[i].str, p)) {
1077 (void)snprintf(sym, sizeof(sym),
1078 "\\[%s]", eqnsyms[i].sym);
1079 cur->text = mandoc_strdup(sym);
1080 free(p);
1081 break;
1082 }
1083
1084 if (i == EQNSYM__MAX)
1085 cur->text = p;
1086 /*
1087 * Post-process list status.
1088 */
1089 while (parent->type == EQN_LISTONE &&
1090 parent->args == parent->expectargs)
1091 parent = parent->parent;
1092 break;
1093 }
1094 goto next_tok;
1095 }
1096
1097 enum rofferr
eqn_end(struct eqn_node ** epp)1098 eqn_end(struct eqn_node **epp)
1099 {
1100 struct eqn_node *ep;
1101
1102 ep = *epp;
1103 *epp = NULL;
1104
1105 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box));
1106 ep->eqn.root->expectargs = UINT_MAX;
1107 return(eqn_parse(ep, ep->eqn.root));
1108 }
1109
1110 void
eqn_free(struct eqn_node * p)1111 eqn_free(struct eqn_node *p)
1112 {
1113 int i;
1114
1115 eqn_box_free(p->eqn.root);
1116
1117 for (i = 0; i < (int)p->defsz; i++) {
1118 free(p->defs[i].key);
1119 free(p->defs[i].val);
1120 }
1121
1122 free(p->data);
1123 free(p->defs);
1124 free(p);
1125 }
1126