1 /* $Id: eqn.c,v 1.61 2016/01/08 00:50:45 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <limits.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "libmandoc.h" 32 #include "libroff.h" 33 34 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 35 #define STRNEQ(p1, sz1, p2, sz2) \ 36 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 37 38 enum eqn_tok { 39 EQN_TOK_DYAD = 0, 40 EQN_TOK_VEC, 41 EQN_TOK_UNDER, 42 EQN_TOK_BAR, 43 EQN_TOK_TILDE, 44 EQN_TOK_HAT, 45 EQN_TOK_DOT, 46 EQN_TOK_DOTDOT, 47 EQN_TOK_FWD, 48 EQN_TOK_BACK, 49 EQN_TOK_DOWN, 50 EQN_TOK_UP, 51 EQN_TOK_FAT, 52 EQN_TOK_ROMAN, 53 EQN_TOK_ITALIC, 54 EQN_TOK_BOLD, 55 EQN_TOK_SIZE, 56 EQN_TOK_SUB, 57 EQN_TOK_SUP, 58 EQN_TOK_SQRT, 59 EQN_TOK_OVER, 60 EQN_TOK_FROM, 61 EQN_TOK_TO, 62 EQN_TOK_BRACE_OPEN, 63 EQN_TOK_BRACE_CLOSE, 64 EQN_TOK_GSIZE, 65 EQN_TOK_GFONT, 66 EQN_TOK_MARK, 67 EQN_TOK_LINEUP, 68 EQN_TOK_LEFT, 69 EQN_TOK_RIGHT, 70 EQN_TOK_PILE, 71 EQN_TOK_LPILE, 72 EQN_TOK_RPILE, 73 EQN_TOK_CPILE, 74 EQN_TOK_MATRIX, 75 EQN_TOK_CCOL, 76 EQN_TOK_LCOL, 77 EQN_TOK_RCOL, 78 EQN_TOK_DELIM, 79 EQN_TOK_DEFINE, 80 EQN_TOK_TDEFINE, 81 EQN_TOK_NDEFINE, 82 EQN_TOK_UNDEF, 83 EQN_TOK_EOF, 84 EQN_TOK_ABOVE, 85 EQN_TOK__MAX 86 }; 87 88 static const char *eqn_toks[EQN_TOK__MAX] = { 89 "dyad", /* EQN_TOK_DYAD */ 90 "vec", /* EQN_TOK_VEC */ 91 "under", /* EQN_TOK_UNDER */ 92 "bar", /* EQN_TOK_BAR */ 93 "tilde", /* EQN_TOK_TILDE */ 94 "hat", /* EQN_TOK_HAT */ 95 "dot", /* EQN_TOK_DOT */ 96 "dotdot", /* EQN_TOK_DOTDOT */ 97 "fwd", /* EQN_TOK_FWD * */ 98 "back", /* EQN_TOK_BACK */ 99 "down", /* EQN_TOK_DOWN */ 100 "up", /* EQN_TOK_UP */ 101 "fat", /* EQN_TOK_FAT */ 102 "roman", /* EQN_TOK_ROMAN */ 103 "italic", /* EQN_TOK_ITALIC */ 104 "bold", /* EQN_TOK_BOLD */ 105 "size", /* EQN_TOK_SIZE */ 106 "sub", /* EQN_TOK_SUB */ 107 "sup", /* EQN_TOK_SUP */ 108 "sqrt", /* EQN_TOK_SQRT */ 109 "over", /* EQN_TOK_OVER */ 110 "from", /* EQN_TOK_FROM */ 111 "to", /* EQN_TOK_TO */ 112 "{", /* EQN_TOK_BRACE_OPEN */ 113 "}", /* EQN_TOK_BRACE_CLOSE */ 114 "gsize", /* EQN_TOK_GSIZE */ 115 "gfont", /* EQN_TOK_GFONT */ 116 "mark", /* EQN_TOK_MARK */ 117 "lineup", /* EQN_TOK_LINEUP */ 118 "left", /* EQN_TOK_LEFT */ 119 "right", /* EQN_TOK_RIGHT */ 120 "pile", /* EQN_TOK_PILE */ 121 "lpile", /* EQN_TOK_LPILE */ 122 "rpile", /* EQN_TOK_RPILE */ 123 "cpile", /* EQN_TOK_CPILE */ 124 "matrix", /* EQN_TOK_MATRIX */ 125 "ccol", /* EQN_TOK_CCOL */ 126 "lcol", /* EQN_TOK_LCOL */ 127 "rcol", /* EQN_TOK_RCOL */ 128 "delim", /* EQN_TOK_DELIM */ 129 "define", /* EQN_TOK_DEFINE */ 130 "tdefine", /* EQN_TOK_TDEFINE */ 131 "ndefine", /* EQN_TOK_NDEFINE */ 132 "undef", /* EQN_TOK_UNDEF */ 133 NULL, /* EQN_TOK_EOF */ 134 "above", /* EQN_TOK_ABOVE */ 135 }; 136 137 enum eqn_symt { 138 EQNSYM_alpha, 139 EQNSYM_beta, 140 EQNSYM_chi, 141 EQNSYM_delta, 142 EQNSYM_epsilon, 143 EQNSYM_eta, 144 EQNSYM_gamma, 145 EQNSYM_iota, 146 EQNSYM_kappa, 147 EQNSYM_lambda, 148 EQNSYM_mu, 149 EQNSYM_nu, 150 EQNSYM_omega, 151 EQNSYM_omicron, 152 EQNSYM_phi, 153 EQNSYM_pi, 154 EQNSYM_ps, 155 EQNSYM_rho, 156 EQNSYM_sigma, 157 EQNSYM_tau, 158 EQNSYM_theta, 159 EQNSYM_upsilon, 160 EQNSYM_xi, 161 EQNSYM_zeta, 162 EQNSYM_DELTA, 163 EQNSYM_GAMMA, 164 EQNSYM_LAMBDA, 165 EQNSYM_OMEGA, 166 EQNSYM_PHI, 167 EQNSYM_PI, 168 EQNSYM_PSI, 169 EQNSYM_SIGMA, 170 EQNSYM_THETA, 171 EQNSYM_UPSILON, 172 EQNSYM_XI, 173 EQNSYM_inter, 174 EQNSYM_union, 175 EQNSYM_prod, 176 EQNSYM_int, 177 EQNSYM_sum, 178 EQNSYM_grad, 179 EQNSYM_del, 180 EQNSYM_times, 181 EQNSYM_cdot, 182 EQNSYM_nothing, 183 EQNSYM_approx, 184 EQNSYM_prime, 185 EQNSYM_half, 186 EQNSYM_partial, 187 EQNSYM_inf, 188 EQNSYM_muchgreat, 189 EQNSYM_muchless, 190 EQNSYM_larrow, 191 EQNSYM_rarrow, 192 EQNSYM_pm, 193 EQNSYM_nequal, 194 EQNSYM_equiv, 195 EQNSYM_lessequal, 196 EQNSYM_moreequal, 197 EQNSYM_minus, 198 EQNSYM__MAX 199 }; 200 201 struct eqnsym { 202 const char *str; 203 const char *sym; 204 }; 205 206 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 207 { "alpha", "*a" }, /* EQNSYM_alpha */ 208 { "beta", "*b" }, /* EQNSYM_beta */ 209 { "chi", "*x" }, /* EQNSYM_chi */ 210 { "delta", "*d" }, /* EQNSYM_delta */ 211 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 212 { "eta", "*y" }, /* EQNSYM_eta */ 213 { "gamma", "*g" }, /* EQNSYM_gamma */ 214 { "iota", "*i" }, /* EQNSYM_iota */ 215 { "kappa", "*k" }, /* EQNSYM_kappa */ 216 { "lambda", "*l" }, /* EQNSYM_lambda */ 217 { "mu", "*m" }, /* EQNSYM_mu */ 218 { "nu", "*n" }, /* EQNSYM_nu */ 219 { "omega", "*w" }, /* EQNSYM_omega */ 220 { "omicron", "*o" }, /* EQNSYM_omicron */ 221 { "phi", "*f" }, /* EQNSYM_phi */ 222 { "pi", "*p" }, /* EQNSYM_pi */ 223 { "psi", "*q" }, /* EQNSYM_psi */ 224 { "rho", "*r" }, /* EQNSYM_rho */ 225 { "sigma", "*s" }, /* EQNSYM_sigma */ 226 { "tau", "*t" }, /* EQNSYM_tau */ 227 { "theta", "*h" }, /* EQNSYM_theta */ 228 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 229 { "xi", "*c" }, /* EQNSYM_xi */ 230 { "zeta", "*z" }, /* EQNSYM_zeta */ 231 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 232 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 233 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 234 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 235 { "PHI", "*F" }, /* EQNSYM_PHI */ 236 { "PI", "*P" }, /* EQNSYM_PI */ 237 { "PSI", "*Q" }, /* EQNSYM_PSI */ 238 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 239 { "THETA", "*H" }, /* EQNSYM_THETA */ 240 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 241 { "XI", "*C" }, /* EQNSYM_XI */ 242 { "inter", "ca" }, /* EQNSYM_inter */ 243 { "union", "cu" }, /* EQNSYM_union */ 244 { "prod", "product" }, /* EQNSYM_prod */ 245 { "int", "integral" }, /* EQNSYM_int */ 246 { "sum", "sum" }, /* EQNSYM_sum */ 247 { "grad", "gr" }, /* EQNSYM_grad */ 248 { "del", "gr" }, /* EQNSYM_del */ 249 { "times", "mu" }, /* EQNSYM_times */ 250 { "cdot", "pc" }, /* EQNSYM_cdot */ 251 { "nothing", "&" }, /* EQNSYM_nothing */ 252 { "approx", "~~" }, /* EQNSYM_approx */ 253 { "prime", "fm" }, /* EQNSYM_prime */ 254 { "half", "12" }, /* EQNSYM_half */ 255 { "partial", "pd" }, /* EQNSYM_partial */ 256 { "inf", "if" }, /* EQNSYM_inf */ 257 { ">>", ">>" }, /* EQNSYM_muchgreat */ 258 { "<<", "<<" }, /* EQNSYM_muchless */ 259 { "<-", "<-" }, /* EQNSYM_larrow */ 260 { "->", "->" }, /* EQNSYM_rarrow */ 261 { "+-", "+-" }, /* EQNSYM_pm */ 262 { "!=", "!=" }, /* EQNSYM_nequal */ 263 { "==", "==" }, /* EQNSYM_equiv */ 264 { "<=", "<=" }, /* EQNSYM_lessequal */ 265 { ">=", ">=" }, /* EQNSYM_moreequal */ 266 { "-", "mi" }, /* EQNSYM_minus */ 267 }; 268 269 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 270 static void eqn_box_free(struct eqn_box *); 271 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 272 enum eqn_post, struct eqn_box *); 273 static void eqn_def(struct eqn_node *); 274 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t); 275 static void eqn_delim(struct eqn_node *); 276 static const char *eqn_next(struct eqn_node *, char, size_t *, int); 277 static const char *eqn_nextrawtok(struct eqn_node *, size_t *); 278 static const char *eqn_nexttok(struct eqn_node *, size_t *); 279 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *); 280 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **); 281 static void eqn_undef(struct eqn_node *); 282 283 284 enum rofferr 285 eqn_read(struct eqn_node **epp, int ln, 286 const char *p, int pos, int *offs) 287 { 288 size_t sz; 289 struct eqn_node *ep; 290 enum rofferr er; 291 292 ep = *epp; 293 294 /* 295 * If we're the terminating mark, unset our equation status and 296 * validate the full equation. 297 */ 298 299 if (0 == strncmp(p, ".EN", 3)) { 300 er = eqn_end(epp); 301 p += 3; 302 while (' ' == *p || '\t' == *p) 303 p++; 304 if ('\0' == *p) 305 return er; 306 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse, 307 ln, pos, "EN %s", p); 308 return er; 309 } 310 311 /* 312 * Build up the full string, replacing all newlines with regular 313 * whitespace. 314 */ 315 316 sz = strlen(p + pos) + 1; 317 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); 318 319 /* First invocation: nil terminate the string. */ 320 321 if (0 == ep->sz) 322 *ep->data = '\0'; 323 324 ep->sz += sz; 325 strlcat(ep->data, p + pos, ep->sz + 1); 326 strlcat(ep->data, " ", ep->sz + 1); 327 return ROFF_IGN; 328 } 329 330 struct eqn_node * 331 eqn_alloc(int pos, int line, struct mparse *parse) 332 { 333 struct eqn_node *p; 334 335 p = mandoc_calloc(1, sizeof(struct eqn_node)); 336 337 p->parse = parse; 338 p->eqn.ln = line; 339 p->eqn.pos = pos; 340 p->gsize = EQN_DEFSIZE; 341 342 return p; 343 } 344 345 /* 346 * Find the key "key" of the give size within our eqn-defined values. 347 */ 348 static struct eqn_def * 349 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) 350 { 351 int i; 352 353 for (i = 0; i < (int)ep->defsz; i++) 354 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 355 ep->defs[i].keysz, key, sz)) 356 return &ep->defs[i]; 357 358 return NULL; 359 } 360 361 /* 362 * Get the next token from the input stream using the given quote 363 * character. 364 * Optionally make any replacements. 365 */ 366 static const char * 367 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) 368 { 369 char *start, *next; 370 int q, diff, lim; 371 size_t ssz, dummy; 372 struct eqn_def *def; 373 374 if (NULL == sz) 375 sz = &dummy; 376 377 lim = 0; 378 ep->rew = ep->cur; 379 again: 380 /* Prevent self-definitions. */ 381 382 if (lim >= EQN_NEST_MAX) { 383 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, 384 ep->eqn.ln, ep->eqn.pos, NULL); 385 return NULL; 386 } 387 388 ep->cur = ep->rew; 389 start = &ep->data[(int)ep->cur]; 390 q = 0; 391 392 if ('\0' == *start) 393 return NULL; 394 395 if (quote == *start) { 396 ep->cur++; 397 q = 1; 398 } 399 400 start = &ep->data[(int)ep->cur]; 401 402 if ( ! q) { 403 if ('{' == *start || '}' == *start) 404 ssz = 1; 405 else 406 ssz = strcspn(start + 1, " ^~\"{}\t") + 1; 407 next = start + (int)ssz; 408 if ('\0' == *next) 409 next = NULL; 410 } else 411 next = strchr(start, quote); 412 413 if (NULL != next) { 414 *sz = (size_t)(next - start); 415 ep->cur += *sz; 416 if (q) 417 ep->cur++; 418 while (' ' == ep->data[(int)ep->cur] || 419 '\t' == ep->data[(int)ep->cur] || 420 '^' == ep->data[(int)ep->cur] || 421 '~' == ep->data[(int)ep->cur]) 422 ep->cur++; 423 } else { 424 if (q) 425 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, 426 ep->eqn.ln, ep->eqn.pos, NULL); 427 next = strchr(start, '\0'); 428 *sz = (size_t)(next - start); 429 ep->cur += *sz; 430 } 431 432 /* Quotes aren't expanded for values. */ 433 434 if (q || ! repl) 435 return start; 436 437 if (NULL != (def = eqn_def_find(ep, start, *sz))) { 438 diff = def->valsz - *sz; 439 440 if (def->valsz > *sz) { 441 ep->sz += diff; 442 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 443 ep->data[ep->sz] = '\0'; 444 start = &ep->data[(int)ep->rew]; 445 } 446 447 diff = def->valsz - *sz; 448 memmove(start + *sz + diff, start + *sz, 449 (strlen(start) - *sz) + 1); 450 memcpy(start, def->val, def->valsz); 451 lim++; 452 goto again; 453 } 454 455 return start; 456 } 457 458 /* 459 * Get the next delimited token using the default current quote 460 * character. 461 */ 462 static const char * 463 eqn_nexttok(struct eqn_node *ep, size_t *sz) 464 { 465 466 return eqn_next(ep, '"', sz, 1); 467 } 468 469 /* 470 * Get next token without replacement. 471 */ 472 static const char * 473 eqn_nextrawtok(struct eqn_node *ep, size_t *sz) 474 { 475 476 return eqn_next(ep, '"', sz, 0); 477 } 478 479 /* 480 * Parse a token from the stream of text. 481 * A token consists of one of the recognised eqn(7) strings. 482 * Strings are separated by delimiting marks. 483 * This returns EQN_TOK_EOF when there are no more tokens. 484 * If the token is an unrecognised string literal, then it returns 485 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated 486 * string. 487 * This must be later freed with free(3). 488 */ 489 static enum eqn_tok 490 eqn_tok_parse(struct eqn_node *ep, char **p) 491 { 492 const char *start; 493 size_t i, sz; 494 int quoted; 495 496 if (NULL != p) 497 *p = NULL; 498 499 quoted = ep->data[ep->cur] == '"'; 500 501 if (NULL == (start = eqn_nexttok(ep, &sz))) 502 return EQN_TOK_EOF; 503 504 if (quoted) { 505 if (p != NULL) 506 *p = mandoc_strndup(start, sz); 507 return EQN_TOK__MAX; 508 } 509 510 for (i = 0; i < EQN_TOK__MAX; i++) { 511 if (NULL == eqn_toks[i]) 512 continue; 513 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i]))) 514 break; 515 } 516 517 if (i == EQN_TOK__MAX && NULL != p) 518 *p = mandoc_strndup(start, sz); 519 520 return i; 521 } 522 523 static void 524 eqn_box_free(struct eqn_box *bp) 525 { 526 527 if (bp->first) 528 eqn_box_free(bp->first); 529 if (bp->next) 530 eqn_box_free(bp->next); 531 532 free(bp->text); 533 free(bp->left); 534 free(bp->right); 535 free(bp->top); 536 free(bp->bottom); 537 free(bp); 538 } 539 540 /* 541 * Allocate a box as the last child of the parent node. 542 */ 543 static struct eqn_box * 544 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 545 { 546 struct eqn_box *bp; 547 548 bp = mandoc_calloc(1, sizeof(struct eqn_box)); 549 bp->parent = parent; 550 bp->parent->args++; 551 bp->expectargs = UINT_MAX; 552 bp->size = ep->gsize; 553 554 if (NULL != parent->first) { 555 parent->last->next = bp; 556 bp->prev = parent->last; 557 } else 558 parent->first = bp; 559 560 parent->last = bp; 561 return bp; 562 } 563 564 /* 565 * Reparent the current last node (of the current parent) under a new 566 * EQN_SUBEXPR as the first element. 567 * Then return the new parent. 568 * The new EQN_SUBEXPR will have a two-child limit. 569 */ 570 static struct eqn_box * 571 eqn_box_makebinary(struct eqn_node *ep, 572 enum eqn_post pos, struct eqn_box *parent) 573 { 574 struct eqn_box *b, *newb; 575 576 assert(NULL != parent->last); 577 b = parent->last; 578 if (parent->last == parent->first) 579 parent->first = NULL; 580 parent->args--; 581 parent->last = b->prev; 582 b->prev = NULL; 583 newb = eqn_box_alloc(ep, parent); 584 newb->pos = pos; 585 newb->type = EQN_SUBEXPR; 586 newb->expectargs = 2; 587 newb->args = 1; 588 newb->first = newb->last = b; 589 newb->first->next = NULL; 590 b->parent = newb; 591 return newb; 592 } 593 594 /* 595 * Parse the "delim" control statement. 596 */ 597 static void 598 eqn_delim(struct eqn_node *ep) 599 { 600 const char *start; 601 size_t sz; 602 603 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) 604 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 605 ep->eqn.ln, ep->eqn.pos, "delim"); 606 else if (strncmp(start, "off", 3) == 0) 607 ep->delim = 0; 608 else if (strncmp(start, "on", 2) == 0) { 609 if (ep->odelim && ep->cdelim) 610 ep->delim = 1; 611 } else if (start[1] != '\0') { 612 ep->odelim = start[0]; 613 ep->cdelim = start[1]; 614 ep->delim = 1; 615 } 616 } 617 618 /* 619 * Undefine a previously-defined string. 620 */ 621 static void 622 eqn_undef(struct eqn_node *ep) 623 { 624 const char *start; 625 struct eqn_def *def; 626 size_t sz; 627 628 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { 629 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 630 ep->eqn.ln, ep->eqn.pos, "undef"); 631 return; 632 } 633 if ((def = eqn_def_find(ep, start, sz)) == NULL) 634 return; 635 free(def->key); 636 free(def->val); 637 def->key = def->val = NULL; 638 def->keysz = def->valsz = 0; 639 } 640 641 static void 642 eqn_def(struct eqn_node *ep) 643 { 644 const char *start; 645 size_t sz; 646 struct eqn_def *def; 647 int i; 648 649 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { 650 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 651 ep->eqn.ln, ep->eqn.pos, "define"); 652 return; 653 } 654 655 /* 656 * Search for a key that already exists. 657 * Create a new key if none is found. 658 */ 659 if (NULL == (def = eqn_def_find(ep, start, sz))) { 660 /* Find holes in string array. */ 661 for (i = 0; i < (int)ep->defsz; i++) 662 if (0 == ep->defs[i].keysz) 663 break; 664 665 if (i == (int)ep->defsz) { 666 ep->defsz++; 667 ep->defs = mandoc_reallocarray(ep->defs, 668 ep->defsz, sizeof(struct eqn_def)); 669 ep->defs[i].key = ep->defs[i].val = NULL; 670 } 671 672 def = ep->defs + i; 673 free(def->key); 674 def->key = mandoc_strndup(start, sz); 675 def->keysz = sz; 676 } 677 678 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); 679 if (start == NULL) { 680 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse, 681 ep->eqn.ln, ep->eqn.pos, "define %s", def->key); 682 free(def->key); 683 free(def->val); 684 def->key = def->val = NULL; 685 def->keysz = def->valsz = 0; 686 return; 687 } 688 free(def->val); 689 def->val = mandoc_strndup(start, sz); 690 def->valsz = sz; 691 } 692 693 /* 694 * Recursively parse an eqn(7) expression. 695 */ 696 static enum rofferr 697 eqn_parse(struct eqn_node *ep, struct eqn_box *parent) 698 { 699 char sym[64]; 700 struct eqn_box *cur; 701 const char *start; 702 char *p; 703 size_t i, sz; 704 enum eqn_tok tok, subtok; 705 enum eqn_post pos; 706 int size; 707 708 assert(parent != NULL); 709 710 /* 711 * Empty equation. 712 * Do not add it to the high-level syntax tree. 713 */ 714 715 if (ep->data == NULL) 716 return ROFF_IGN; 717 718 next_tok: 719 tok = eqn_tok_parse(ep, &p); 720 721 this_tok: 722 switch (tok) { 723 case (EQN_TOK_UNDEF): 724 eqn_undef(ep); 725 break; 726 case (EQN_TOK_NDEFINE): 727 case (EQN_TOK_DEFINE): 728 eqn_def(ep); 729 break; 730 case (EQN_TOK_TDEFINE): 731 if (eqn_nextrawtok(ep, NULL) == NULL || 732 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL) 733 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 734 ep->eqn.ln, ep->eqn.pos, "tdefine"); 735 break; 736 case (EQN_TOK_DELIM): 737 eqn_delim(ep); 738 break; 739 case (EQN_TOK_GFONT): 740 if (eqn_nextrawtok(ep, NULL) == NULL) 741 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 742 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 743 break; 744 case (EQN_TOK_MARK): 745 case (EQN_TOK_LINEUP): 746 /* Ignore these. */ 747 break; 748 case (EQN_TOK_DYAD): 749 case (EQN_TOK_VEC): 750 case (EQN_TOK_UNDER): 751 case (EQN_TOK_BAR): 752 case (EQN_TOK_TILDE): 753 case (EQN_TOK_HAT): 754 case (EQN_TOK_DOT): 755 case (EQN_TOK_DOTDOT): 756 if (parent->last == NULL) { 757 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 758 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 759 cur = eqn_box_alloc(ep, parent); 760 cur->type = EQN_TEXT; 761 cur->text = mandoc_strdup(""); 762 } 763 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); 764 parent->type = EQN_LISTONE; 765 parent->expectargs = 1; 766 switch (tok) { 767 case (EQN_TOK_DOTDOT): 768 strlcpy(sym, "\\[ad]", sizeof(sym)); 769 break; 770 case (EQN_TOK_VEC): 771 strlcpy(sym, "\\[->]", sizeof(sym)); 772 break; 773 case (EQN_TOK_DYAD): 774 strlcpy(sym, "\\[<>]", sizeof(sym)); 775 break; 776 case (EQN_TOK_TILDE): 777 strlcpy(sym, "\\[a~]", sizeof(sym)); 778 break; 779 case (EQN_TOK_UNDER): 780 strlcpy(sym, "\\[ul]", sizeof(sym)); 781 break; 782 case (EQN_TOK_BAR): 783 strlcpy(sym, "\\[rl]", sizeof(sym)); 784 break; 785 case (EQN_TOK_DOT): 786 strlcpy(sym, "\\[a.]", sizeof(sym)); 787 break; 788 case (EQN_TOK_HAT): 789 strlcpy(sym, "\\[ha]", sizeof(sym)); 790 break; 791 default: 792 abort(); 793 } 794 795 switch (tok) { 796 case (EQN_TOK_DOTDOT): 797 case (EQN_TOK_VEC): 798 case (EQN_TOK_DYAD): 799 case (EQN_TOK_TILDE): 800 case (EQN_TOK_BAR): 801 case (EQN_TOK_DOT): 802 case (EQN_TOK_HAT): 803 parent->top = mandoc_strdup(sym); 804 break; 805 case (EQN_TOK_UNDER): 806 parent->bottom = mandoc_strdup(sym); 807 break; 808 default: 809 abort(); 810 } 811 parent = parent->parent; 812 break; 813 case (EQN_TOK_FWD): 814 case (EQN_TOK_BACK): 815 case (EQN_TOK_DOWN): 816 case (EQN_TOK_UP): 817 subtok = eqn_tok_parse(ep, NULL); 818 if (subtok != EQN_TOK__MAX) { 819 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 820 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 821 tok = subtok; 822 goto this_tok; 823 } 824 break; 825 case (EQN_TOK_FAT): 826 case (EQN_TOK_ROMAN): 827 case (EQN_TOK_ITALIC): 828 case (EQN_TOK_BOLD): 829 while (parent->args == parent->expectargs) 830 parent = parent->parent; 831 /* 832 * These values apply to the next word or sequence of 833 * words; thus, we mark that we'll have a child with 834 * exactly one of those. 835 */ 836 parent = eqn_box_alloc(ep, parent); 837 parent->type = EQN_LISTONE; 838 parent->expectargs = 1; 839 switch (tok) { 840 case (EQN_TOK_FAT): 841 parent->font = EQNFONT_FAT; 842 break; 843 case (EQN_TOK_ROMAN): 844 parent->font = EQNFONT_ROMAN; 845 break; 846 case (EQN_TOK_ITALIC): 847 parent->font = EQNFONT_ITALIC; 848 break; 849 case (EQN_TOK_BOLD): 850 parent->font = EQNFONT_BOLD; 851 break; 852 default: 853 abort(); 854 } 855 break; 856 case (EQN_TOK_SIZE): 857 case (EQN_TOK_GSIZE): 858 /* Accept two values: integral size and a single. */ 859 if (NULL == (start = eqn_nexttok(ep, &sz))) { 860 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 861 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 862 break; 863 } 864 size = mandoc_strntoi(start, sz, 10); 865 if (-1 == size) { 866 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, 867 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 868 break; 869 } 870 if (EQN_TOK_GSIZE == tok) { 871 ep->gsize = size; 872 break; 873 } 874 parent = eqn_box_alloc(ep, parent); 875 parent->type = EQN_LISTONE; 876 parent->expectargs = 1; 877 parent->size = size; 878 break; 879 case (EQN_TOK_FROM): 880 case (EQN_TOK_TO): 881 case (EQN_TOK_SUB): 882 case (EQN_TOK_SUP): 883 /* 884 * We have a left-right-associative expression. 885 * Repivot under a positional node, open a child scope 886 * and keep on reading. 887 */ 888 if (parent->last == NULL) { 889 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 890 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 891 cur = eqn_box_alloc(ep, parent); 892 cur->type = EQN_TEXT; 893 cur->text = mandoc_strdup(""); 894 } 895 /* Handle the "subsup" and "fromto" positions. */ 896 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { 897 parent->expectargs = 3; 898 parent->pos = EQNPOS_SUBSUP; 899 break; 900 } 901 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) { 902 parent->expectargs = 3; 903 parent->pos = EQNPOS_FROMTO; 904 break; 905 } 906 switch (tok) { 907 case (EQN_TOK_FROM): 908 pos = EQNPOS_FROM; 909 break; 910 case (EQN_TOK_TO): 911 pos = EQNPOS_TO; 912 break; 913 case (EQN_TOK_SUP): 914 pos = EQNPOS_SUP; 915 break; 916 case (EQN_TOK_SUB): 917 pos = EQNPOS_SUB; 918 break; 919 default: 920 abort(); 921 } 922 parent = eqn_box_makebinary(ep, pos, parent); 923 break; 924 case (EQN_TOK_SQRT): 925 while (parent->args == parent->expectargs) 926 parent = parent->parent; 927 /* 928 * Accept a left-right-associative set of arguments just 929 * like sub and sup and friends but without rebalancing 930 * under a pivot. 931 */ 932 parent = eqn_box_alloc(ep, parent); 933 parent->type = EQN_SUBEXPR; 934 parent->pos = EQNPOS_SQRT; 935 parent->expectargs = 1; 936 break; 937 case (EQN_TOK_OVER): 938 /* 939 * We have a right-left-associative fraction. 940 * Close out anything that's currently open, then 941 * rebalance and continue reading. 942 */ 943 if (parent->last == NULL) { 944 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 945 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 946 cur = eqn_box_alloc(ep, parent); 947 cur->type = EQN_TEXT; 948 cur->text = mandoc_strdup(""); 949 } 950 while (EQN_SUBEXPR == parent->type) 951 parent = parent->parent; 952 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); 953 break; 954 case (EQN_TOK_RIGHT): 955 case (EQN_TOK_BRACE_CLOSE): 956 /* 957 * Close out the existing brace. 958 * FIXME: this is a shitty sentinel: we should really 959 * have a native EQN_BRACE type or whatnot. 960 */ 961 for (cur = parent; cur != NULL; cur = cur->parent) 962 if (cur->type == EQN_LIST && 963 (tok == EQN_TOK_BRACE_CLOSE || 964 cur->left != NULL)) 965 break; 966 if (cur == NULL) { 967 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, 968 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 969 break; 970 } 971 parent = cur; 972 if (EQN_TOK_RIGHT == tok) { 973 if (NULL == (start = eqn_nexttok(ep, &sz))) { 974 mandoc_msg(MANDOCERR_REQ_EMPTY, 975 ep->parse, ep->eqn.ln, 976 ep->eqn.pos, eqn_toks[tok]); 977 break; 978 } 979 /* Handling depends on right/left. */ 980 if (STRNEQ(start, sz, "ceiling", 7)) { 981 strlcpy(sym, "\\[rc]", sizeof(sym)); 982 parent->right = mandoc_strdup(sym); 983 } else if (STRNEQ(start, sz, "floor", 5)) { 984 strlcpy(sym, "\\[rf]", sizeof(sym)); 985 parent->right = mandoc_strdup(sym); 986 } else 987 parent->right = mandoc_strndup(start, sz); 988 } 989 parent = parent->parent; 990 if (tok == EQN_TOK_BRACE_CLOSE && 991 (parent->type == EQN_PILE || 992 parent->type == EQN_MATRIX)) 993 parent = parent->parent; 994 /* Close out any "singleton" lists. */ 995 while (parent->type == EQN_LISTONE && 996 parent->args == parent->expectargs) 997 parent = parent->parent; 998 break; 999 case (EQN_TOK_BRACE_OPEN): 1000 case (EQN_TOK_LEFT): 1001 /* 1002 * If we already have something in the stack and we're 1003 * in an expression, then rewind til we're not any more 1004 * (just like with the text node). 1005 */ 1006 while (parent->args == parent->expectargs) 1007 parent = parent->parent; 1008 if (EQN_TOK_LEFT == tok && 1009 (start = eqn_nexttok(ep, &sz)) == NULL) { 1010 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 1011 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 1012 break; 1013 } 1014 parent = eqn_box_alloc(ep, parent); 1015 parent->type = EQN_LIST; 1016 if (EQN_TOK_LEFT == tok) { 1017 if (STRNEQ(start, sz, "ceiling", 7)) { 1018 strlcpy(sym, "\\[lc]", sizeof(sym)); 1019 parent->left = mandoc_strdup(sym); 1020 } else if (STRNEQ(start, sz, "floor", 5)) { 1021 strlcpy(sym, "\\[lf]", sizeof(sym)); 1022 parent->left = mandoc_strdup(sym); 1023 } else 1024 parent->left = mandoc_strndup(start, sz); 1025 } 1026 break; 1027 case (EQN_TOK_PILE): 1028 case (EQN_TOK_LPILE): 1029 case (EQN_TOK_RPILE): 1030 case (EQN_TOK_CPILE): 1031 case (EQN_TOK_CCOL): 1032 case (EQN_TOK_LCOL): 1033 case (EQN_TOK_RCOL): 1034 while (parent->args == parent->expectargs) 1035 parent = parent->parent; 1036 parent = eqn_box_alloc(ep, parent); 1037 parent->type = EQN_PILE; 1038 parent->expectargs = 1; 1039 break; 1040 case (EQN_TOK_ABOVE): 1041 for (cur = parent; cur != NULL; cur = cur->parent) 1042 if (cur->type == EQN_PILE) 1043 break; 1044 if (cur == NULL) { 1045 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, 1046 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 1047 break; 1048 } 1049 parent = eqn_box_alloc(ep, cur); 1050 parent->type = EQN_LIST; 1051 break; 1052 case (EQN_TOK_MATRIX): 1053 while (parent->args == parent->expectargs) 1054 parent = parent->parent; 1055 parent = eqn_box_alloc(ep, parent); 1056 parent->type = EQN_MATRIX; 1057 parent->expectargs = 1; 1058 break; 1059 case (EQN_TOK_EOF): 1060 /* 1061 * End of file! 1062 * TODO: make sure we're not in an open subexpression. 1063 */ 1064 return ROFF_EQN; 1065 default: 1066 assert(tok == EQN_TOK__MAX); 1067 assert(NULL != p); 1068 /* 1069 * If we already have something in the stack and we're 1070 * in an expression, then rewind til we're not any more. 1071 */ 1072 while (parent->args == parent->expectargs) 1073 parent = parent->parent; 1074 cur = eqn_box_alloc(ep, parent); 1075 cur->type = EQN_TEXT; 1076 for (i = 0; i < EQNSYM__MAX; i++) 1077 if (0 == strcmp(eqnsyms[i].str, p)) { 1078 (void)snprintf(sym, sizeof(sym), 1079 "\\[%s]", eqnsyms[i].sym); 1080 cur->text = mandoc_strdup(sym); 1081 free(p); 1082 break; 1083 } 1084 1085 if (i == EQNSYM__MAX) 1086 cur->text = p; 1087 /* 1088 * Post-process list status. 1089 */ 1090 while (parent->type == EQN_LISTONE && 1091 parent->args == parent->expectargs) 1092 parent = parent->parent; 1093 break; 1094 } 1095 goto next_tok; 1096 } 1097 1098 enum rofferr 1099 eqn_end(struct eqn_node **epp) 1100 { 1101 struct eqn_node *ep; 1102 1103 ep = *epp; 1104 *epp = NULL; 1105 1106 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); 1107 ep->eqn.root->expectargs = UINT_MAX; 1108 return eqn_parse(ep, ep->eqn.root); 1109 } 1110 1111 void 1112 eqn_free(struct eqn_node *p) 1113 { 1114 int i; 1115 1116 eqn_box_free(p->eqn.root); 1117 1118 for (i = 0; i < (int)p->defsz; i++) { 1119 free(p->defs[i].key); 1120 free(p->defs[i].val); 1121 } 1122 1123 free(p->data); 1124 free(p->defs); 1125 free(p); 1126 } 1127