1 /* $Id: eqn.c,v 1.58 2015/03/04 12:19:49 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <limits.h> 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include <string.h> 27 #include <time.h> 28 29 #include "mandoc.h" 30 #include "mandoc_aux.h" 31 #include "libmandoc.h" 32 #include "libroff.h" 33 34 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 35 #define STRNEQ(p1, sz1, p2, sz2) \ 36 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 37 38 enum eqn_tok { 39 EQN_TOK_DYAD = 0, 40 EQN_TOK_VEC, 41 EQN_TOK_UNDER, 42 EQN_TOK_BAR, 43 EQN_TOK_TILDE, 44 EQN_TOK_HAT, 45 EQN_TOK_DOT, 46 EQN_TOK_DOTDOT, 47 EQN_TOK_FWD, 48 EQN_TOK_BACK, 49 EQN_TOK_DOWN, 50 EQN_TOK_UP, 51 EQN_TOK_FAT, 52 EQN_TOK_ROMAN, 53 EQN_TOK_ITALIC, 54 EQN_TOK_BOLD, 55 EQN_TOK_SIZE, 56 EQN_TOK_SUB, 57 EQN_TOK_SUP, 58 EQN_TOK_SQRT, 59 EQN_TOK_OVER, 60 EQN_TOK_FROM, 61 EQN_TOK_TO, 62 EQN_TOK_BRACE_OPEN, 63 EQN_TOK_BRACE_CLOSE, 64 EQN_TOK_GSIZE, 65 EQN_TOK_GFONT, 66 EQN_TOK_MARK, 67 EQN_TOK_LINEUP, 68 EQN_TOK_LEFT, 69 EQN_TOK_RIGHT, 70 EQN_TOK_PILE, 71 EQN_TOK_LPILE, 72 EQN_TOK_RPILE, 73 EQN_TOK_CPILE, 74 EQN_TOK_MATRIX, 75 EQN_TOK_CCOL, 76 EQN_TOK_LCOL, 77 EQN_TOK_RCOL, 78 EQN_TOK_DELIM, 79 EQN_TOK_DEFINE, 80 EQN_TOK_TDEFINE, 81 EQN_TOK_NDEFINE, 82 EQN_TOK_UNDEF, 83 EQN_TOK_EOF, 84 EQN_TOK_ABOVE, 85 EQN_TOK__MAX 86 }; 87 88 static const char *eqn_toks[EQN_TOK__MAX] = { 89 "dyad", /* EQN_TOK_DYAD */ 90 "vec", /* EQN_TOK_VEC */ 91 "under", /* EQN_TOK_UNDER */ 92 "bar", /* EQN_TOK_BAR */ 93 "tilde", /* EQN_TOK_TILDE */ 94 "hat", /* EQN_TOK_HAT */ 95 "dot", /* EQN_TOK_DOT */ 96 "dotdot", /* EQN_TOK_DOTDOT */ 97 "fwd", /* EQN_TOK_FWD * */ 98 "back", /* EQN_TOK_BACK */ 99 "down", /* EQN_TOK_DOWN */ 100 "up", /* EQN_TOK_UP */ 101 "fat", /* EQN_TOK_FAT */ 102 "roman", /* EQN_TOK_ROMAN */ 103 "italic", /* EQN_TOK_ITALIC */ 104 "bold", /* EQN_TOK_BOLD */ 105 "size", /* EQN_TOK_SIZE */ 106 "sub", /* EQN_TOK_SUB */ 107 "sup", /* EQN_TOK_SUP */ 108 "sqrt", /* EQN_TOK_SQRT */ 109 "over", /* EQN_TOK_OVER */ 110 "from", /* EQN_TOK_FROM */ 111 "to", /* EQN_TOK_TO */ 112 "{", /* EQN_TOK_BRACE_OPEN */ 113 "}", /* EQN_TOK_BRACE_CLOSE */ 114 "gsize", /* EQN_TOK_GSIZE */ 115 "gfont", /* EQN_TOK_GFONT */ 116 "mark", /* EQN_TOK_MARK */ 117 "lineup", /* EQN_TOK_LINEUP */ 118 "left", /* EQN_TOK_LEFT */ 119 "right", /* EQN_TOK_RIGHT */ 120 "pile", /* EQN_TOK_PILE */ 121 "lpile", /* EQN_TOK_LPILE */ 122 "rpile", /* EQN_TOK_RPILE */ 123 "cpile", /* EQN_TOK_CPILE */ 124 "matrix", /* EQN_TOK_MATRIX */ 125 "ccol", /* EQN_TOK_CCOL */ 126 "lcol", /* EQN_TOK_LCOL */ 127 "rcol", /* EQN_TOK_RCOL */ 128 "delim", /* EQN_TOK_DELIM */ 129 "define", /* EQN_TOK_DEFINE */ 130 "tdefine", /* EQN_TOK_TDEFINE */ 131 "ndefine", /* EQN_TOK_NDEFINE */ 132 "undef", /* EQN_TOK_UNDEF */ 133 NULL, /* EQN_TOK_EOF */ 134 "above", /* EQN_TOK_ABOVE */ 135 }; 136 137 enum eqn_symt { 138 EQNSYM_alpha, 139 EQNSYM_beta, 140 EQNSYM_chi, 141 EQNSYM_delta, 142 EQNSYM_epsilon, 143 EQNSYM_eta, 144 EQNSYM_gamma, 145 EQNSYM_iota, 146 EQNSYM_kappa, 147 EQNSYM_lambda, 148 EQNSYM_mu, 149 EQNSYM_nu, 150 EQNSYM_omega, 151 EQNSYM_omicron, 152 EQNSYM_phi, 153 EQNSYM_pi, 154 EQNSYM_ps, 155 EQNSYM_rho, 156 EQNSYM_sigma, 157 EQNSYM_tau, 158 EQNSYM_theta, 159 EQNSYM_upsilon, 160 EQNSYM_xi, 161 EQNSYM_zeta, 162 EQNSYM_DELTA, 163 EQNSYM_GAMMA, 164 EQNSYM_LAMBDA, 165 EQNSYM_OMEGA, 166 EQNSYM_PHI, 167 EQNSYM_PI, 168 EQNSYM_PSI, 169 EQNSYM_SIGMA, 170 EQNSYM_THETA, 171 EQNSYM_UPSILON, 172 EQNSYM_XI, 173 EQNSYM_inter, 174 EQNSYM_union, 175 EQNSYM_prod, 176 EQNSYM_int, 177 EQNSYM_sum, 178 EQNSYM_grad, 179 EQNSYM_del, 180 EQNSYM_times, 181 EQNSYM_cdot, 182 EQNSYM_nothing, 183 EQNSYM_approx, 184 EQNSYM_prime, 185 EQNSYM_half, 186 EQNSYM_partial, 187 EQNSYM_inf, 188 EQNSYM_muchgreat, 189 EQNSYM_muchless, 190 EQNSYM_larrow, 191 EQNSYM_rarrow, 192 EQNSYM_pm, 193 EQNSYM_nequal, 194 EQNSYM_equiv, 195 EQNSYM_lessequal, 196 EQNSYM_moreequal, 197 EQNSYM_minus, 198 EQNSYM__MAX 199 }; 200 201 struct eqnsym { 202 const char *str; 203 const char *sym; 204 }; 205 206 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 207 { "alpha", "*a" }, /* EQNSYM_alpha */ 208 { "beta", "*b" }, /* EQNSYM_beta */ 209 { "chi", "*x" }, /* EQNSYM_chi */ 210 { "delta", "*d" }, /* EQNSYM_delta */ 211 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 212 { "eta", "*y" }, /* EQNSYM_eta */ 213 { "gamma", "*g" }, /* EQNSYM_gamma */ 214 { "iota", "*i" }, /* EQNSYM_iota */ 215 { "kappa", "*k" }, /* EQNSYM_kappa */ 216 { "lambda", "*l" }, /* EQNSYM_lambda */ 217 { "mu", "*m" }, /* EQNSYM_mu */ 218 { "nu", "*n" }, /* EQNSYM_nu */ 219 { "omega", "*w" }, /* EQNSYM_omega */ 220 { "omicron", "*o" }, /* EQNSYM_omicron */ 221 { "phi", "*f" }, /* EQNSYM_phi */ 222 { "pi", "*p" }, /* EQNSYM_pi */ 223 { "psi", "*q" }, /* EQNSYM_psi */ 224 { "rho", "*r" }, /* EQNSYM_rho */ 225 { "sigma", "*s" }, /* EQNSYM_sigma */ 226 { "tau", "*t" }, /* EQNSYM_tau */ 227 { "theta", "*h" }, /* EQNSYM_theta */ 228 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 229 { "xi", "*c" }, /* EQNSYM_xi */ 230 { "zeta", "*z" }, /* EQNSYM_zeta */ 231 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 232 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 233 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 234 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 235 { "PHI", "*F" }, /* EQNSYM_PHI */ 236 { "PI", "*P" }, /* EQNSYM_PI */ 237 { "PSI", "*Q" }, /* EQNSYM_PSI */ 238 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 239 { "THETA", "*H" }, /* EQNSYM_THETA */ 240 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 241 { "XI", "*C" }, /* EQNSYM_XI */ 242 { "inter", "ca" }, /* EQNSYM_inter */ 243 { "union", "cu" }, /* EQNSYM_union */ 244 { "prod", "product" }, /* EQNSYM_prod */ 245 { "int", "integral" }, /* EQNSYM_int */ 246 { "sum", "sum" }, /* EQNSYM_sum */ 247 { "grad", "gr" }, /* EQNSYM_grad */ 248 { "del", "gr" }, /* EQNSYM_del */ 249 { "times", "mu" }, /* EQNSYM_times */ 250 { "cdot", "pc" }, /* EQNSYM_cdot */ 251 { "nothing", "&" }, /* EQNSYM_nothing */ 252 { "approx", "~~" }, /* EQNSYM_approx */ 253 { "prime", "fm" }, /* EQNSYM_prime */ 254 { "half", "12" }, /* EQNSYM_half */ 255 { "partial", "pd" }, /* EQNSYM_partial */ 256 { "inf", "if" }, /* EQNSYM_inf */ 257 { ">>", ">>" }, /* EQNSYM_muchgreat */ 258 { "<<", "<<" }, /* EQNSYM_muchless */ 259 { "<-", "<-" }, /* EQNSYM_larrow */ 260 { "->", "->" }, /* EQNSYM_rarrow */ 261 { "+-", "+-" }, /* EQNSYM_pm */ 262 { "!=", "!=" }, /* EQNSYM_nequal */ 263 { "==", "==" }, /* EQNSYM_equiv */ 264 { "<=", "<=" }, /* EQNSYM_lessequal */ 265 { ">=", ">=" }, /* EQNSYM_moreequal */ 266 { "-", "mi" }, /* EQNSYM_minus */ 267 }; 268 269 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 270 static void eqn_box_free(struct eqn_box *); 271 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 272 enum eqn_post, struct eqn_box *); 273 static void eqn_def(struct eqn_node *); 274 static struct eqn_def *eqn_def_find(struct eqn_node *, const char *, size_t); 275 static void eqn_delim(struct eqn_node *); 276 static const char *eqn_next(struct eqn_node *, char, size_t *, int); 277 static const char *eqn_nextrawtok(struct eqn_node *, size_t *); 278 static const char *eqn_nexttok(struct eqn_node *, size_t *); 279 static enum rofferr eqn_parse(struct eqn_node *, struct eqn_box *); 280 static enum eqn_tok eqn_tok_parse(struct eqn_node *, char **); 281 static void eqn_undef(struct eqn_node *); 282 283 284 enum rofferr 285 eqn_read(struct eqn_node **epp, int ln, 286 const char *p, int pos, int *offs) 287 { 288 size_t sz; 289 struct eqn_node *ep; 290 enum rofferr er; 291 292 ep = *epp; 293 294 /* 295 * If we're the terminating mark, unset our equation status and 296 * validate the full equation. 297 */ 298 299 if (0 == strncmp(p, ".EN", 3)) { 300 er = eqn_end(epp); 301 p += 3; 302 while (' ' == *p || '\t' == *p) 303 p++; 304 if ('\0' == *p) 305 return(er); 306 mandoc_vmsg(MANDOCERR_ARG_SKIP, ep->parse, 307 ln, pos, "EN %s", p); 308 return(er); 309 } 310 311 /* 312 * Build up the full string, replacing all newlines with regular 313 * whitespace. 314 */ 315 316 sz = strlen(p + pos) + 1; 317 ep->data = mandoc_realloc(ep->data, ep->sz + sz + 1); 318 319 /* First invocation: nil terminate the string. */ 320 321 if (0 == ep->sz) 322 *ep->data = '\0'; 323 324 ep->sz += sz; 325 strlcat(ep->data, p + pos, ep->sz + 1); 326 strlcat(ep->data, " ", ep->sz + 1); 327 return(ROFF_IGN); 328 } 329 330 struct eqn_node * 331 eqn_alloc(int pos, int line, struct mparse *parse) 332 { 333 struct eqn_node *p; 334 335 p = mandoc_calloc(1, sizeof(struct eqn_node)); 336 337 p->parse = parse; 338 p->eqn.ln = line; 339 p->eqn.pos = pos; 340 p->gsize = EQN_DEFSIZE; 341 342 return(p); 343 } 344 345 /* 346 * Find the key "key" of the give size within our eqn-defined values. 347 */ 348 static struct eqn_def * 349 eqn_def_find(struct eqn_node *ep, const char *key, size_t sz) 350 { 351 int i; 352 353 for (i = 0; i < (int)ep->defsz; i++) 354 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 355 ep->defs[i].keysz, key, sz)) 356 return(&ep->defs[i]); 357 358 return(NULL); 359 } 360 361 /* 362 * Get the next token from the input stream using the given quote 363 * character. 364 * Optionally make any replacements. 365 */ 366 static const char * 367 eqn_next(struct eqn_node *ep, char quote, size_t *sz, int repl) 368 { 369 char *start, *next; 370 int q, diff, lim; 371 size_t ssz, dummy; 372 struct eqn_def *def; 373 374 if (NULL == sz) 375 sz = &dummy; 376 377 lim = 0; 378 ep->rew = ep->cur; 379 again: 380 /* Prevent self-definitions. */ 381 382 if (lim >= EQN_NEST_MAX) { 383 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, 384 ep->eqn.ln, ep->eqn.pos, NULL); 385 return(NULL); 386 } 387 388 ep->cur = ep->rew; 389 start = &ep->data[(int)ep->cur]; 390 q = 0; 391 392 if ('\0' == *start) 393 return(NULL); 394 395 if (quote == *start) { 396 ep->cur++; 397 q = 1; 398 } 399 400 start = &ep->data[(int)ep->cur]; 401 402 if ( ! q) { 403 if ('{' == *start || '}' == *start) 404 ssz = 1; 405 else 406 ssz = strcspn(start + 1, " ^~\"{}\t") + 1; 407 next = start + (int)ssz; 408 if ('\0' == *next) 409 next = NULL; 410 } else 411 next = strchr(start, quote); 412 413 if (NULL != next) { 414 *sz = (size_t)(next - start); 415 ep->cur += *sz; 416 if (q) 417 ep->cur++; 418 while (' ' == ep->data[(int)ep->cur] || 419 '\t' == ep->data[(int)ep->cur] || 420 '^' == ep->data[(int)ep->cur] || 421 '~' == ep->data[(int)ep->cur]) 422 ep->cur++; 423 } else { 424 if (q) 425 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, 426 ep->eqn.ln, ep->eqn.pos, NULL); 427 next = strchr(start, '\0'); 428 *sz = (size_t)(next - start); 429 ep->cur += *sz; 430 } 431 432 /* Quotes aren't expanded for values. */ 433 434 if (q || ! repl) 435 return(start); 436 437 if (NULL != (def = eqn_def_find(ep, start, *sz))) { 438 diff = def->valsz - *sz; 439 440 if (def->valsz > *sz) { 441 ep->sz += diff; 442 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 443 ep->data[ep->sz] = '\0'; 444 start = &ep->data[(int)ep->rew]; 445 } 446 447 diff = def->valsz - *sz; 448 memmove(start + *sz + diff, start + *sz, 449 (strlen(start) - *sz) + 1); 450 memcpy(start, def->val, def->valsz); 451 goto again; 452 } 453 454 return(start); 455 } 456 457 /* 458 * Get the next delimited token using the default current quote 459 * character. 460 */ 461 static const char * 462 eqn_nexttok(struct eqn_node *ep, size_t *sz) 463 { 464 465 return(eqn_next(ep, '"', sz, 1)); 466 } 467 468 /* 469 * Get next token without replacement. 470 */ 471 static const char * 472 eqn_nextrawtok(struct eqn_node *ep, size_t *sz) 473 { 474 475 return(eqn_next(ep, '"', sz, 0)); 476 } 477 478 /* 479 * Parse a token from the stream of text. 480 * A token consists of one of the recognised eqn(7) strings. 481 * Strings are separated by delimiting marks. 482 * This returns EQN_TOK_EOF when there are no more tokens. 483 * If the token is an unrecognised string literal, then it returns 484 * EQN_TOK__MAX and sets the "p" pointer to an allocated, nil-terminated 485 * string. 486 * This must be later freed with free(3). 487 */ 488 static enum eqn_tok 489 eqn_tok_parse(struct eqn_node *ep, char **p) 490 { 491 const char *start; 492 size_t i, sz; 493 int quoted; 494 495 if (NULL != p) 496 *p = NULL; 497 498 quoted = ep->data[ep->cur] == '"'; 499 500 if (NULL == (start = eqn_nexttok(ep, &sz))) 501 return(EQN_TOK_EOF); 502 503 if (quoted) { 504 if (p != NULL) 505 *p = mandoc_strndup(start, sz); 506 return(EQN_TOK__MAX); 507 } 508 509 for (i = 0; i < EQN_TOK__MAX; i++) { 510 if (NULL == eqn_toks[i]) 511 continue; 512 if (STRNEQ(start, sz, eqn_toks[i], strlen(eqn_toks[i]))) 513 break; 514 } 515 516 if (i == EQN_TOK__MAX && NULL != p) 517 *p = mandoc_strndup(start, sz); 518 519 return(i); 520 } 521 522 static void 523 eqn_box_free(struct eqn_box *bp) 524 { 525 526 if (bp->first) 527 eqn_box_free(bp->first); 528 if (bp->next) 529 eqn_box_free(bp->next); 530 531 free(bp->text); 532 free(bp->left); 533 free(bp->right); 534 free(bp->top); 535 free(bp->bottom); 536 free(bp); 537 } 538 539 /* 540 * Allocate a box as the last child of the parent node. 541 */ 542 static struct eqn_box * 543 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 544 { 545 struct eqn_box *bp; 546 547 bp = mandoc_calloc(1, sizeof(struct eqn_box)); 548 bp->parent = parent; 549 bp->parent->args++; 550 bp->expectargs = UINT_MAX; 551 bp->size = ep->gsize; 552 553 if (NULL != parent->first) { 554 parent->last->next = bp; 555 bp->prev = parent->last; 556 } else 557 parent->first = bp; 558 559 parent->last = bp; 560 return(bp); 561 } 562 563 /* 564 * Reparent the current last node (of the current parent) under a new 565 * EQN_SUBEXPR as the first element. 566 * Then return the new parent. 567 * The new EQN_SUBEXPR will have a two-child limit. 568 */ 569 static struct eqn_box * 570 eqn_box_makebinary(struct eqn_node *ep, 571 enum eqn_post pos, struct eqn_box *parent) 572 { 573 struct eqn_box *b, *newb; 574 575 assert(NULL != parent->last); 576 b = parent->last; 577 if (parent->last == parent->first) 578 parent->first = NULL; 579 parent->args--; 580 parent->last = b->prev; 581 b->prev = NULL; 582 newb = eqn_box_alloc(ep, parent); 583 newb->pos = pos; 584 newb->type = EQN_SUBEXPR; 585 newb->expectargs = 2; 586 newb->args = 1; 587 newb->first = newb->last = b; 588 newb->first->next = NULL; 589 b->parent = newb; 590 return(newb); 591 } 592 593 /* 594 * Parse the "delim" control statement. 595 */ 596 static void 597 eqn_delim(struct eqn_node *ep) 598 { 599 const char *start; 600 size_t sz; 601 602 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) 603 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 604 ep->eqn.ln, ep->eqn.pos, "delim"); 605 else if (strncmp(start, "off", 3) == 0) 606 ep->delim = 0; 607 else if (strncmp(start, "on", 2) == 0) { 608 if (ep->odelim && ep->cdelim) 609 ep->delim = 1; 610 } else if (start[1] != '\0') { 611 ep->odelim = start[0]; 612 ep->cdelim = start[1]; 613 ep->delim = 1; 614 } 615 } 616 617 /* 618 * Undefine a previously-defined string. 619 */ 620 static void 621 eqn_undef(struct eqn_node *ep) 622 { 623 const char *start; 624 struct eqn_def *def; 625 size_t sz; 626 627 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { 628 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 629 ep->eqn.ln, ep->eqn.pos, "undef"); 630 return; 631 } 632 if ((def = eqn_def_find(ep, start, sz)) == NULL) 633 return; 634 free(def->key); 635 free(def->val); 636 def->key = def->val = NULL; 637 def->keysz = def->valsz = 0; 638 } 639 640 static void 641 eqn_def(struct eqn_node *ep) 642 { 643 const char *start; 644 size_t sz; 645 struct eqn_def *def; 646 int i; 647 648 if ((start = eqn_nextrawtok(ep, &sz)) == NULL) { 649 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 650 ep->eqn.ln, ep->eqn.pos, "define"); 651 return; 652 } 653 654 /* 655 * Search for a key that already exists. 656 * Create a new key if none is found. 657 */ 658 if (NULL == (def = eqn_def_find(ep, start, sz))) { 659 /* Find holes in string array. */ 660 for (i = 0; i < (int)ep->defsz; i++) 661 if (0 == ep->defs[i].keysz) 662 break; 663 664 if (i == (int)ep->defsz) { 665 ep->defsz++; 666 ep->defs = mandoc_reallocarray(ep->defs, 667 ep->defsz, sizeof(struct eqn_def)); 668 ep->defs[i].key = ep->defs[i].val = NULL; 669 } 670 671 def = ep->defs + i; 672 free(def->key); 673 def->key = mandoc_strndup(start, sz); 674 def->keysz = sz; 675 } 676 677 start = eqn_next(ep, ep->data[(int)ep->cur], &sz, 0); 678 if (start == NULL) { 679 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse, 680 ep->eqn.ln, ep->eqn.pos, "define %s", def->key); 681 free(def->key); 682 free(def->val); 683 def->key = def->val = NULL; 684 def->keysz = def->valsz = 0; 685 return; 686 } 687 free(def->val); 688 def->val = mandoc_strndup(start, sz); 689 def->valsz = sz; 690 } 691 692 /* 693 * Recursively parse an eqn(7) expression. 694 */ 695 static enum rofferr 696 eqn_parse(struct eqn_node *ep, struct eqn_box *parent) 697 { 698 char sym[64]; 699 struct eqn_box *cur; 700 const char *start; 701 char *p; 702 size_t i, sz; 703 enum eqn_tok tok, subtok; 704 enum eqn_post pos; 705 int size; 706 707 assert(parent != NULL); 708 709 /* 710 * Empty equation. 711 * Do not add it to the high-level syntax tree. 712 */ 713 714 if (ep->data == NULL) 715 return(ROFF_IGN); 716 717 next_tok: 718 tok = eqn_tok_parse(ep, &p); 719 720 this_tok: 721 switch (tok) { 722 case (EQN_TOK_UNDEF): 723 eqn_undef(ep); 724 break; 725 case (EQN_TOK_NDEFINE): 726 case (EQN_TOK_DEFINE): 727 eqn_def(ep); 728 break; 729 case (EQN_TOK_TDEFINE): 730 if (eqn_nextrawtok(ep, NULL) == NULL || 731 eqn_next(ep, ep->data[(int)ep->cur], NULL, 0) == NULL) 732 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 733 ep->eqn.ln, ep->eqn.pos, "tdefine"); 734 break; 735 case (EQN_TOK_DELIM): 736 eqn_delim(ep); 737 break; 738 case (EQN_TOK_GFONT): 739 if (eqn_nextrawtok(ep, NULL) == NULL) 740 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 741 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 742 break; 743 case (EQN_TOK_MARK): 744 case (EQN_TOK_LINEUP): 745 /* Ignore these. */ 746 break; 747 case (EQN_TOK_DYAD): 748 case (EQN_TOK_VEC): 749 case (EQN_TOK_UNDER): 750 case (EQN_TOK_BAR): 751 case (EQN_TOK_TILDE): 752 case (EQN_TOK_HAT): 753 case (EQN_TOK_DOT): 754 case (EQN_TOK_DOTDOT): 755 if (parent->last == NULL) { 756 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 757 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 758 cur = eqn_box_alloc(ep, parent); 759 cur->type = EQN_TEXT; 760 cur->text = mandoc_strdup(""); 761 } 762 parent = eqn_box_makebinary(ep, EQNPOS_NONE, parent); 763 parent->type = EQN_LISTONE; 764 parent->expectargs = 1; 765 switch (tok) { 766 case (EQN_TOK_DOTDOT): 767 strlcpy(sym, "\\[ad]", sizeof(sym)); 768 break; 769 case (EQN_TOK_VEC): 770 strlcpy(sym, "\\[->]", sizeof(sym)); 771 break; 772 case (EQN_TOK_DYAD): 773 strlcpy(sym, "\\[<>]", sizeof(sym)); 774 break; 775 case (EQN_TOK_TILDE): 776 strlcpy(sym, "\\[a~]", sizeof(sym)); 777 break; 778 case (EQN_TOK_UNDER): 779 strlcpy(sym, "\\[ul]", sizeof(sym)); 780 break; 781 case (EQN_TOK_BAR): 782 strlcpy(sym, "\\[rl]", sizeof(sym)); 783 break; 784 case (EQN_TOK_DOT): 785 strlcpy(sym, "\\[a.]", sizeof(sym)); 786 break; 787 case (EQN_TOK_HAT): 788 strlcpy(sym, "\\[ha]", sizeof(sym)); 789 break; 790 default: 791 abort(); 792 } 793 794 switch (tok) { 795 case (EQN_TOK_DOTDOT): 796 case (EQN_TOK_VEC): 797 case (EQN_TOK_DYAD): 798 case (EQN_TOK_TILDE): 799 case (EQN_TOK_BAR): 800 case (EQN_TOK_DOT): 801 case (EQN_TOK_HAT): 802 parent->top = mandoc_strdup(sym); 803 break; 804 case (EQN_TOK_UNDER): 805 parent->bottom = mandoc_strdup(sym); 806 break; 807 default: 808 abort(); 809 } 810 parent = parent->parent; 811 break; 812 case (EQN_TOK_FWD): 813 case (EQN_TOK_BACK): 814 case (EQN_TOK_DOWN): 815 case (EQN_TOK_UP): 816 subtok = eqn_tok_parse(ep, NULL); 817 if (subtok != EQN_TOK__MAX) { 818 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 819 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 820 tok = subtok; 821 goto this_tok; 822 } 823 break; 824 case (EQN_TOK_FAT): 825 case (EQN_TOK_ROMAN): 826 case (EQN_TOK_ITALIC): 827 case (EQN_TOK_BOLD): 828 while (parent->args == parent->expectargs) 829 parent = parent->parent; 830 /* 831 * These values apply to the next word or sequence of 832 * words; thus, we mark that we'll have a child with 833 * exactly one of those. 834 */ 835 parent = eqn_box_alloc(ep, parent); 836 parent->type = EQN_LISTONE; 837 parent->expectargs = 1; 838 switch (tok) { 839 case (EQN_TOK_FAT): 840 parent->font = EQNFONT_FAT; 841 break; 842 case (EQN_TOK_ROMAN): 843 parent->font = EQNFONT_ROMAN; 844 break; 845 case (EQN_TOK_ITALIC): 846 parent->font = EQNFONT_ITALIC; 847 break; 848 case (EQN_TOK_BOLD): 849 parent->font = EQNFONT_BOLD; 850 break; 851 default: 852 abort(); 853 } 854 break; 855 case (EQN_TOK_SIZE): 856 case (EQN_TOK_GSIZE): 857 /* Accept two values: integral size and a single. */ 858 if (NULL == (start = eqn_nexttok(ep, &sz))) { 859 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 860 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 861 break; 862 } 863 size = mandoc_strntoi(start, sz, 10); 864 if (-1 == size) { 865 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, 866 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 867 break; 868 } 869 if (EQN_TOK_GSIZE == tok) { 870 ep->gsize = size; 871 break; 872 } 873 parent = eqn_box_alloc(ep, parent); 874 parent->type = EQN_LISTONE; 875 parent->expectargs = 1; 876 parent->size = size; 877 break; 878 case (EQN_TOK_FROM): 879 case (EQN_TOK_TO): 880 case (EQN_TOK_SUB): 881 case (EQN_TOK_SUP): 882 /* 883 * We have a left-right-associative expression. 884 * Repivot under a positional node, open a child scope 885 * and keep on reading. 886 */ 887 if (parent->last == NULL) { 888 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 889 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 890 cur = eqn_box_alloc(ep, parent); 891 cur->type = EQN_TEXT; 892 cur->text = mandoc_strdup(""); 893 } 894 /* Handle the "subsup" and "fromto" positions. */ 895 if (EQN_TOK_SUP == tok && parent->pos == EQNPOS_SUB) { 896 parent->expectargs = 3; 897 parent->pos = EQNPOS_SUBSUP; 898 break; 899 } 900 if (EQN_TOK_TO == tok && parent->pos == EQNPOS_FROM) { 901 parent->expectargs = 3; 902 parent->pos = EQNPOS_FROMTO; 903 break; 904 } 905 switch (tok) { 906 case (EQN_TOK_FROM): 907 pos = EQNPOS_FROM; 908 break; 909 case (EQN_TOK_TO): 910 pos = EQNPOS_TO; 911 break; 912 case (EQN_TOK_SUP): 913 pos = EQNPOS_SUP; 914 break; 915 case (EQN_TOK_SUB): 916 pos = EQNPOS_SUB; 917 break; 918 default: 919 abort(); 920 } 921 parent = eqn_box_makebinary(ep, pos, parent); 922 break; 923 case (EQN_TOK_SQRT): 924 while (parent->args == parent->expectargs) 925 parent = parent->parent; 926 /* 927 * Accept a left-right-associative set of arguments just 928 * like sub and sup and friends but without rebalancing 929 * under a pivot. 930 */ 931 parent = eqn_box_alloc(ep, parent); 932 parent->type = EQN_SUBEXPR; 933 parent->pos = EQNPOS_SQRT; 934 parent->expectargs = 1; 935 break; 936 case (EQN_TOK_OVER): 937 /* 938 * We have a right-left-associative fraction. 939 * Close out anything that's currently open, then 940 * rebalance and continue reading. 941 */ 942 if (parent->last == NULL) { 943 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 944 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 945 cur = eqn_box_alloc(ep, parent); 946 cur->type = EQN_TEXT; 947 cur->text = mandoc_strdup(""); 948 } 949 while (EQN_SUBEXPR == parent->type) 950 parent = parent->parent; 951 parent = eqn_box_makebinary(ep, EQNPOS_OVER, parent); 952 break; 953 case (EQN_TOK_RIGHT): 954 case (EQN_TOK_BRACE_CLOSE): 955 /* 956 * Close out the existing brace. 957 * FIXME: this is a shitty sentinel: we should really 958 * have a native EQN_BRACE type or whatnot. 959 */ 960 for (cur = parent; cur != NULL; cur = cur->parent) 961 if (cur->type == EQN_LIST && 962 (tok == EQN_TOK_BRACE_CLOSE || 963 cur->left != NULL)) 964 break; 965 if (cur == NULL) { 966 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, 967 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 968 break; 969 } 970 parent = cur; 971 if (EQN_TOK_RIGHT == tok) { 972 if (NULL == (start = eqn_nexttok(ep, &sz))) { 973 mandoc_msg(MANDOCERR_REQ_EMPTY, 974 ep->parse, ep->eqn.ln, 975 ep->eqn.pos, eqn_toks[tok]); 976 break; 977 } 978 /* Handling depends on right/left. */ 979 if (STRNEQ(start, sz, "ceiling", 7)) { 980 strlcpy(sym, "\\[rc]", sizeof(sym)); 981 parent->right = mandoc_strdup(sym); 982 } else if (STRNEQ(start, sz, "floor", 5)) { 983 strlcpy(sym, "\\[rf]", sizeof(sym)); 984 parent->right = mandoc_strdup(sym); 985 } else 986 parent->right = mandoc_strndup(start, sz); 987 } 988 parent = parent->parent; 989 if (EQN_TOK_BRACE_CLOSE == tok && parent && 990 (parent->type == EQN_PILE || 991 parent->type == EQN_MATRIX)) 992 parent = parent->parent; 993 /* Close out any "singleton" lists. */ 994 while (parent->type == EQN_LISTONE && 995 parent->args == parent->expectargs) 996 parent = parent->parent; 997 break; 998 case (EQN_TOK_BRACE_OPEN): 999 case (EQN_TOK_LEFT): 1000 /* 1001 * If we already have something in the stack and we're 1002 * in an expression, then rewind til we're not any more 1003 * (just like with the text node). 1004 */ 1005 while (parent->args == parent->expectargs) 1006 parent = parent->parent; 1007 if (EQN_TOK_LEFT == tok && 1008 (start = eqn_nexttok(ep, &sz)) == NULL) { 1009 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 1010 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 1011 break; 1012 } 1013 parent = eqn_box_alloc(ep, parent); 1014 parent->type = EQN_LIST; 1015 if (EQN_TOK_LEFT == tok) { 1016 if (STRNEQ(start, sz, "ceiling", 7)) { 1017 strlcpy(sym, "\\[lc]", sizeof(sym)); 1018 parent->left = mandoc_strdup(sym); 1019 } else if (STRNEQ(start, sz, "floor", 5)) { 1020 strlcpy(sym, "\\[lf]", sizeof(sym)); 1021 parent->left = mandoc_strdup(sym); 1022 } else 1023 parent->left = mandoc_strndup(start, sz); 1024 } 1025 break; 1026 case (EQN_TOK_PILE): 1027 case (EQN_TOK_LPILE): 1028 case (EQN_TOK_RPILE): 1029 case (EQN_TOK_CPILE): 1030 case (EQN_TOK_CCOL): 1031 case (EQN_TOK_LCOL): 1032 case (EQN_TOK_RCOL): 1033 while (parent->args == parent->expectargs) 1034 parent = parent->parent; 1035 parent = eqn_box_alloc(ep, parent); 1036 parent->type = EQN_PILE; 1037 parent->expectargs = 1; 1038 break; 1039 case (EQN_TOK_ABOVE): 1040 for (cur = parent; cur != NULL; cur = cur->parent) 1041 if (cur->type == EQN_PILE) 1042 break; 1043 if (cur == NULL) { 1044 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, 1045 ep->eqn.ln, ep->eqn.pos, eqn_toks[tok]); 1046 break; 1047 } 1048 parent = eqn_box_alloc(ep, cur); 1049 parent->type = EQN_LIST; 1050 break; 1051 case (EQN_TOK_MATRIX): 1052 while (parent->args == parent->expectargs) 1053 parent = parent->parent; 1054 parent = eqn_box_alloc(ep, parent); 1055 parent->type = EQN_MATRIX; 1056 parent->expectargs = 1; 1057 break; 1058 case (EQN_TOK_EOF): 1059 /* 1060 * End of file! 1061 * TODO: make sure we're not in an open subexpression. 1062 */ 1063 return(ROFF_EQN); 1064 default: 1065 assert(tok == EQN_TOK__MAX); 1066 assert(NULL != p); 1067 /* 1068 * If we already have something in the stack and we're 1069 * in an expression, then rewind til we're not any more. 1070 */ 1071 while (parent->args == parent->expectargs) 1072 parent = parent->parent; 1073 cur = eqn_box_alloc(ep, parent); 1074 cur->type = EQN_TEXT; 1075 for (i = 0; i < EQNSYM__MAX; i++) 1076 if (0 == strcmp(eqnsyms[i].str, p)) { 1077 (void)snprintf(sym, sizeof(sym), 1078 "\\[%s]", eqnsyms[i].sym); 1079 cur->text = mandoc_strdup(sym); 1080 free(p); 1081 break; 1082 } 1083 1084 if (i == EQNSYM__MAX) 1085 cur->text = p; 1086 /* 1087 * Post-process list status. 1088 */ 1089 while (parent->type == EQN_LISTONE && 1090 parent->args == parent->expectargs) 1091 parent = parent->parent; 1092 break; 1093 } 1094 goto next_tok; 1095 } 1096 1097 enum rofferr 1098 eqn_end(struct eqn_node **epp) 1099 { 1100 struct eqn_node *ep; 1101 1102 ep = *epp; 1103 *epp = NULL; 1104 1105 ep->eqn.root = mandoc_calloc(1, sizeof(struct eqn_box)); 1106 ep->eqn.root->expectargs = UINT_MAX; 1107 return(eqn_parse(ep, ep->eqn.root)); 1108 } 1109 1110 void 1111 eqn_free(struct eqn_node *p) 1112 { 1113 int i; 1114 1115 eqn_box_free(p->eqn.root); 1116 1117 for (i = 0; i < (int)p->defsz; i++) { 1118 free(p->defs[i].key); 1119 free(p->defs[i].val); 1120 } 1121 1122 free(p->data); 1123 free(p->defs); 1124 free(p); 1125 } 1126