1 /* $Id: eqn.c,v 1.84 2020/01/08 12:16:24 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014,2015,2017,2018,2020 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "eqn.h" 34 #include "libmandoc.h" 35 #include "eqn_parse.h" 36 37 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 38 #define STRNEQ(p1, sz1, p2, sz2) \ 39 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 40 41 enum eqn_tok { 42 EQN_TOK_DYAD = 0, 43 EQN_TOK_VEC, 44 EQN_TOK_UNDER, 45 EQN_TOK_BAR, 46 EQN_TOK_TILDE, 47 EQN_TOK_HAT, 48 EQN_TOK_DOT, 49 EQN_TOK_DOTDOT, 50 EQN_TOK_FWD, 51 EQN_TOK_BACK, 52 EQN_TOK_DOWN, 53 EQN_TOK_UP, 54 EQN_TOK_FAT, 55 EQN_TOK_ROMAN, 56 EQN_TOK_ITALIC, 57 EQN_TOK_BOLD, 58 EQN_TOK_SIZE, 59 EQN_TOK_SUB, 60 EQN_TOK_SUP, 61 EQN_TOK_SQRT, 62 EQN_TOK_OVER, 63 EQN_TOK_FROM, 64 EQN_TOK_TO, 65 EQN_TOK_BRACE_OPEN, 66 EQN_TOK_BRACE_CLOSE, 67 EQN_TOK_GSIZE, 68 EQN_TOK_GFONT, 69 EQN_TOK_MARK, 70 EQN_TOK_LINEUP, 71 EQN_TOK_LEFT, 72 EQN_TOK_RIGHT, 73 EQN_TOK_PILE, 74 EQN_TOK_LPILE, 75 EQN_TOK_RPILE, 76 EQN_TOK_CPILE, 77 EQN_TOK_MATRIX, 78 EQN_TOK_CCOL, 79 EQN_TOK_LCOL, 80 EQN_TOK_RCOL, 81 EQN_TOK_DELIM, 82 EQN_TOK_DEFINE, 83 EQN_TOK_TDEFINE, 84 EQN_TOK_NDEFINE, 85 EQN_TOK_UNDEF, 86 EQN_TOK_ABOVE, 87 EQN_TOK__MAX, 88 EQN_TOK_FUNC, 89 EQN_TOK_QUOTED, 90 EQN_TOK_SYM, 91 EQN_TOK_EOF 92 }; 93 94 static const char *eqn_toks[EQN_TOK__MAX] = { 95 "dyad", /* EQN_TOK_DYAD */ 96 "vec", /* EQN_TOK_VEC */ 97 "under", /* EQN_TOK_UNDER */ 98 "bar", /* EQN_TOK_BAR */ 99 "tilde", /* EQN_TOK_TILDE */ 100 "hat", /* EQN_TOK_HAT */ 101 "dot", /* EQN_TOK_DOT */ 102 "dotdot", /* EQN_TOK_DOTDOT */ 103 "fwd", /* EQN_TOK_FWD * */ 104 "back", /* EQN_TOK_BACK */ 105 "down", /* EQN_TOK_DOWN */ 106 "up", /* EQN_TOK_UP */ 107 "fat", /* EQN_TOK_FAT */ 108 "roman", /* EQN_TOK_ROMAN */ 109 "italic", /* EQN_TOK_ITALIC */ 110 "bold", /* EQN_TOK_BOLD */ 111 "size", /* EQN_TOK_SIZE */ 112 "sub", /* EQN_TOK_SUB */ 113 "sup", /* EQN_TOK_SUP */ 114 "sqrt", /* EQN_TOK_SQRT */ 115 "over", /* EQN_TOK_OVER */ 116 "from", /* EQN_TOK_FROM */ 117 "to", /* EQN_TOK_TO */ 118 "{", /* EQN_TOK_BRACE_OPEN */ 119 "}", /* EQN_TOK_BRACE_CLOSE */ 120 "gsize", /* EQN_TOK_GSIZE */ 121 "gfont", /* EQN_TOK_GFONT */ 122 "mark", /* EQN_TOK_MARK */ 123 "lineup", /* EQN_TOK_LINEUP */ 124 "left", /* EQN_TOK_LEFT */ 125 "right", /* EQN_TOK_RIGHT */ 126 "pile", /* EQN_TOK_PILE */ 127 "lpile", /* EQN_TOK_LPILE */ 128 "rpile", /* EQN_TOK_RPILE */ 129 "cpile", /* EQN_TOK_CPILE */ 130 "matrix", /* EQN_TOK_MATRIX */ 131 "ccol", /* EQN_TOK_CCOL */ 132 "lcol", /* EQN_TOK_LCOL */ 133 "rcol", /* EQN_TOK_RCOL */ 134 "delim", /* EQN_TOK_DELIM */ 135 "define", /* EQN_TOK_DEFINE */ 136 "tdefine", /* EQN_TOK_TDEFINE */ 137 "ndefine", /* EQN_TOK_NDEFINE */ 138 "undef", /* EQN_TOK_UNDEF */ 139 "above", /* EQN_TOK_ABOVE */ 140 }; 141 142 static const char *const eqn_func[] = { 143 "acos", "acsc", "and", "arc", "asec", "asin", "atan", 144 "cos", "cosh", "coth", "csc", "det", "exp", "for", 145 "if", "lim", "ln", "log", "max", "min", 146 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 147 }; 148 149 enum eqn_symt { 150 EQNSYM_alpha = 0, 151 EQNSYM_beta, 152 EQNSYM_chi, 153 EQNSYM_delta, 154 EQNSYM_epsilon, 155 EQNSYM_eta, 156 EQNSYM_gamma, 157 EQNSYM_iota, 158 EQNSYM_kappa, 159 EQNSYM_lambda, 160 EQNSYM_mu, 161 EQNSYM_nu, 162 EQNSYM_omega, 163 EQNSYM_omicron, 164 EQNSYM_phi, 165 EQNSYM_pi, 166 EQNSYM_ps, 167 EQNSYM_rho, 168 EQNSYM_sigma, 169 EQNSYM_tau, 170 EQNSYM_theta, 171 EQNSYM_upsilon, 172 EQNSYM_xi, 173 EQNSYM_zeta, 174 EQNSYM_DELTA, 175 EQNSYM_GAMMA, 176 EQNSYM_LAMBDA, 177 EQNSYM_OMEGA, 178 EQNSYM_PHI, 179 EQNSYM_PI, 180 EQNSYM_PSI, 181 EQNSYM_SIGMA, 182 EQNSYM_THETA, 183 EQNSYM_UPSILON, 184 EQNSYM_XI, 185 EQNSYM_inter, 186 EQNSYM_union, 187 EQNSYM_prod, 188 EQNSYM_int, 189 EQNSYM_sum, 190 EQNSYM_grad, 191 EQNSYM_del, 192 EQNSYM_times, 193 EQNSYM_cdot, 194 EQNSYM_nothing, 195 EQNSYM_approx, 196 EQNSYM_prime, 197 EQNSYM_half, 198 EQNSYM_partial, 199 EQNSYM_inf, 200 EQNSYM_muchgreat, 201 EQNSYM_muchless, 202 EQNSYM_larrow, 203 EQNSYM_rarrow, 204 EQNSYM_pm, 205 EQNSYM_nequal, 206 EQNSYM_equiv, 207 EQNSYM_lessequal, 208 EQNSYM_moreequal, 209 EQNSYM_minus, 210 EQNSYM__MAX 211 }; 212 213 struct eqnsym { 214 const char *str; 215 const char *sym; 216 }; 217 218 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 219 { "alpha", "*a" }, /* EQNSYM_alpha */ 220 { "beta", "*b" }, /* EQNSYM_beta */ 221 { "chi", "*x" }, /* EQNSYM_chi */ 222 { "delta", "*d" }, /* EQNSYM_delta */ 223 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 224 { "eta", "*y" }, /* EQNSYM_eta */ 225 { "gamma", "*g" }, /* EQNSYM_gamma */ 226 { "iota", "*i" }, /* EQNSYM_iota */ 227 { "kappa", "*k" }, /* EQNSYM_kappa */ 228 { "lambda", "*l" }, /* EQNSYM_lambda */ 229 { "mu", "*m" }, /* EQNSYM_mu */ 230 { "nu", "*n" }, /* EQNSYM_nu */ 231 { "omega", "*w" }, /* EQNSYM_omega */ 232 { "omicron", "*o" }, /* EQNSYM_omicron */ 233 { "phi", "*f" }, /* EQNSYM_phi */ 234 { "pi", "*p" }, /* EQNSYM_pi */ 235 { "psi", "*q" }, /* EQNSYM_psi */ 236 { "rho", "*r" }, /* EQNSYM_rho */ 237 { "sigma", "*s" }, /* EQNSYM_sigma */ 238 { "tau", "*t" }, /* EQNSYM_tau */ 239 { "theta", "*h" }, /* EQNSYM_theta */ 240 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 241 { "xi", "*c" }, /* EQNSYM_xi */ 242 { "zeta", "*z" }, /* EQNSYM_zeta */ 243 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 244 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 245 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 246 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 247 { "PHI", "*F" }, /* EQNSYM_PHI */ 248 { "PI", "*P" }, /* EQNSYM_PI */ 249 { "PSI", "*Q" }, /* EQNSYM_PSI */ 250 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 251 { "THETA", "*H" }, /* EQNSYM_THETA */ 252 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 253 { "XI", "*C" }, /* EQNSYM_XI */ 254 { "inter", "ca" }, /* EQNSYM_inter */ 255 { "union", "cu" }, /* EQNSYM_union */ 256 { "prod", "product" }, /* EQNSYM_prod */ 257 { "int", "integral" }, /* EQNSYM_int */ 258 { "sum", "sum" }, /* EQNSYM_sum */ 259 { "grad", "gr" }, /* EQNSYM_grad */ 260 { "del", "gr" }, /* EQNSYM_del */ 261 { "times", "mu" }, /* EQNSYM_times */ 262 { "cdot", "pc" }, /* EQNSYM_cdot */ 263 { "nothing", "&" }, /* EQNSYM_nothing */ 264 { "approx", "~~" }, /* EQNSYM_approx */ 265 { "prime", "fm" }, /* EQNSYM_prime */ 266 { "half", "12" }, /* EQNSYM_half */ 267 { "partial", "pd" }, /* EQNSYM_partial */ 268 { "inf", "if" }, /* EQNSYM_inf */ 269 { ">>", ">>" }, /* EQNSYM_muchgreat */ 270 { "<<", "<<" }, /* EQNSYM_muchless */ 271 { "<-", "<-" }, /* EQNSYM_larrow */ 272 { "->", "->" }, /* EQNSYM_rarrow */ 273 { "+-", "+-" }, /* EQNSYM_pm */ 274 { "!=", "!=" }, /* EQNSYM_nequal */ 275 { "==", "==" }, /* EQNSYM_equiv */ 276 { "<=", "<=" }, /* EQNSYM_lessequal */ 277 { ">=", ">=" }, /* EQNSYM_moreequal */ 278 { "-", "mi" }, /* EQNSYM_minus */ 279 }; 280 281 enum parse_mode { 282 MODE_QUOTED, 283 MODE_NOSUB, 284 MODE_SUB, 285 MODE_TOK 286 }; 287 288 struct eqn_def { 289 char *key; 290 size_t keysz; 291 char *val; 292 size_t valsz; 293 }; 294 295 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 296 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 297 struct eqn_box *); 298 static void eqn_def(struct eqn_node *); 299 static struct eqn_def *eqn_def_find(struct eqn_node *); 300 static void eqn_delim(struct eqn_node *); 301 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 302 static void eqn_undef(struct eqn_node *); 303 304 305 struct eqn_node * 306 eqn_alloc(void) 307 { 308 struct eqn_node *ep; 309 310 ep = mandoc_calloc(1, sizeof(*ep)); 311 ep->gsize = EQN_DEFSIZE; 312 return ep; 313 } 314 315 void 316 eqn_reset(struct eqn_node *ep) 317 { 318 free(ep->data); 319 ep->data = ep->start = ep->end = NULL; 320 ep->sz = ep->toksz = 0; 321 } 322 323 void 324 eqn_read(struct eqn_node *ep, const char *p) 325 { 326 char *cp; 327 328 if (ep->data == NULL) { 329 ep->sz = strlen(p); 330 ep->data = mandoc_strdup(p); 331 } else { 332 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 333 free(ep->data); 334 ep->data = cp; 335 } 336 ep->sz += 1; 337 } 338 339 /* 340 * Find the key "key" of the give size within our eqn-defined values. 341 */ 342 static struct eqn_def * 343 eqn_def_find(struct eqn_node *ep) 344 { 345 int i; 346 347 for (i = 0; i < (int)ep->defsz; i++) 348 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 349 ep->defs[i].keysz, ep->start, ep->toksz)) 350 return &ep->defs[i]; 351 352 return NULL; 353 } 354 355 /* 356 * Parse a token from the input text. The modes are: 357 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 358 * before its next occurence. Do not interpret the token in any 359 * way and return EQN_TOK_QUOTED. All other modes behave like 360 * MODE_QUOTED when *ep->start is '"'. 361 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 362 * otherwise, it ends before the next whitespace or brace. 363 * Do not interpret the token and return EQN_TOK__MAX. 364 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 365 * alias created with define. If it is an alias, replace it with 366 * its string value and reparse. 367 * MODE_TOK: Like MODE_SUB, but also check the token against the list 368 * of tokens, and if there is a match, return that token. Otherwise, 369 * if the token matches a symbol, return EQN_TOK_SYM; if it matches 370 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 371 * a token match, *ep->start is set to an allocated string that the 372 * caller is expected to free. 373 * All modes skip whitespace following the end of the token. 374 */ 375 static enum eqn_tok 376 eqn_next(struct eqn_node *ep, enum parse_mode mode) 377 { 378 static int last_len, lim; 379 380 struct eqn_def *def; 381 size_t start; 382 int diff, i, quoted; 383 enum eqn_tok tok; 384 385 /* 386 * Reset the recursion counter after advancing 387 * beyond the end of the previous substitution. 388 */ 389 if (ep->end - ep->data >= last_len) 390 lim = 0; 391 392 ep->start = ep->end; 393 quoted = mode == MODE_QUOTED; 394 for (;;) { 395 switch (*ep->start) { 396 case '\0': 397 ep->toksz = 0; 398 return EQN_TOK_EOF; 399 case '"': 400 quoted = 1; 401 break; 402 case ' ': 403 case '\t': 404 case '~': 405 case '^': 406 if (quoted) 407 break; 408 ep->start++; 409 continue; 410 default: 411 break; 412 } 413 if (quoted) { 414 ep->end = strchr(ep->start + 1, *ep->start); 415 ep->start++; /* Skip opening quote. */ 416 if (ep->end == NULL) { 417 mandoc_msg(MANDOCERR_ARG_QUOTE, 418 ep->node->line, ep->node->pos, NULL); 419 ep->end = strchr(ep->start, '\0'); 420 } 421 } else { 422 ep->end = ep->start + 1; 423 if (*ep->start != '{' && *ep->start != '}') 424 ep->end += strcspn(ep->end, " ^~\"{}\t"); 425 } 426 ep->toksz = ep->end - ep->start; 427 if (quoted && *ep->end != '\0') 428 ep->end++; /* Skip closing quote. */ 429 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 430 ep->end++; 431 if (quoted) /* Cannot return, may have to strndup. */ 432 break; 433 if (mode == MODE_NOSUB) 434 return EQN_TOK__MAX; 435 if ((def = eqn_def_find(ep)) == NULL) 436 break; 437 if (++lim > EQN_NEST_MAX) { 438 mandoc_msg(MANDOCERR_ROFFLOOP, 439 ep->node->line, ep->node->pos, NULL); 440 return EQN_TOK_EOF; 441 } 442 443 /* Replace a defined name with its string value. */ 444 if ((diff = def->valsz - ep->toksz) > 0) { 445 start = ep->start - ep->data; 446 ep->sz += diff; 447 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 448 ep->start = ep->data + start; 449 } 450 if (diff) 451 memmove(ep->start + def->valsz, ep->start + ep->toksz, 452 strlen(ep->start + ep->toksz) + 1); 453 memcpy(ep->start, def->val, def->valsz); 454 last_len = ep->start - ep->data + def->valsz; 455 } 456 if (mode != MODE_TOK) 457 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 458 if (quoted) { 459 ep->start = mandoc_strndup(ep->start, ep->toksz); 460 return EQN_TOK_QUOTED; 461 } 462 for (tok = 0; tok < EQN_TOK__MAX; tok++) 463 if (STRNEQ(ep->start, ep->toksz, 464 eqn_toks[tok], strlen(eqn_toks[tok]))) 465 return tok; 466 467 for (i = 0; i < EQNSYM__MAX; i++) { 468 if (STRNEQ(ep->start, ep->toksz, 469 eqnsyms[i].str, strlen(eqnsyms[i].str))) { 470 mandoc_asprintf(&ep->start, 471 "\\[%s]", eqnsyms[i].sym); 472 return EQN_TOK_SYM; 473 } 474 } 475 ep->start = mandoc_strndup(ep->start, ep->toksz); 476 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 477 if (STRNEQ(ep->start, ep->toksz, 478 eqn_func[i], strlen(eqn_func[i]))) 479 return EQN_TOK_FUNC; 480 return EQN_TOK__MAX; 481 } 482 483 void 484 eqn_box_free(struct eqn_box *bp) 485 { 486 if (bp == NULL) 487 return; 488 489 if (bp->first) 490 eqn_box_free(bp->first); 491 if (bp->next) 492 eqn_box_free(bp->next); 493 494 free(bp->text); 495 free(bp->left); 496 free(bp->right); 497 free(bp->top); 498 free(bp->bottom); 499 free(bp); 500 } 501 502 struct eqn_box * 503 eqn_box_new(void) 504 { 505 struct eqn_box *bp; 506 507 bp = mandoc_calloc(1, sizeof(*bp)); 508 bp->expectargs = UINT_MAX; 509 return bp; 510 } 511 512 /* 513 * Allocate a box as the last child of the parent node. 514 */ 515 static struct eqn_box * 516 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 517 { 518 struct eqn_box *bp; 519 520 bp = eqn_box_new(); 521 bp->parent = parent; 522 bp->parent->args++; 523 bp->font = bp->parent->font; 524 bp->size = ep->gsize; 525 526 if (NULL != parent->first) { 527 parent->last->next = bp; 528 bp->prev = parent->last; 529 } else 530 parent->first = bp; 531 532 parent->last = bp; 533 return bp; 534 } 535 536 /* 537 * Reparent the current last node (of the current parent) under a new 538 * EQN_SUBEXPR as the first element. 539 * Then return the new parent. 540 * The new EQN_SUBEXPR will have a two-child limit. 541 */ 542 static struct eqn_box * 543 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 544 { 545 struct eqn_box *b, *newb; 546 547 assert(NULL != parent->last); 548 b = parent->last; 549 if (parent->last == parent->first) 550 parent->first = NULL; 551 parent->args--; 552 parent->last = b->prev; 553 b->prev = NULL; 554 newb = eqn_box_alloc(ep, parent); 555 newb->type = EQN_SUBEXPR; 556 newb->expectargs = 2; 557 newb->args = 1; 558 newb->first = newb->last = b; 559 newb->first->next = NULL; 560 b->parent = newb; 561 return newb; 562 } 563 564 /* 565 * Parse the "delim" control statement. 566 */ 567 static void 568 eqn_delim(struct eqn_node *ep) 569 { 570 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 571 mandoc_msg(MANDOCERR_REQ_EMPTY, 572 ep->node->line, ep->node->pos, "delim"); 573 if (ep->end[0] != '\0') 574 ep->end++; 575 } else if (strncmp(ep->end, "off", 3) == 0) { 576 ep->delim = 0; 577 ep->end += 3; 578 } else if (strncmp(ep->end, "on", 2) == 0) { 579 if (ep->odelim && ep->cdelim) 580 ep->delim = 1; 581 ep->end += 2; 582 } else { 583 ep->odelim = *ep->end++; 584 ep->cdelim = *ep->end++; 585 ep->delim = 1; 586 } 587 } 588 589 /* 590 * Undefine a previously-defined string. 591 */ 592 static void 593 eqn_undef(struct eqn_node *ep) 594 { 595 struct eqn_def *def; 596 597 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 598 mandoc_msg(MANDOCERR_REQ_EMPTY, 599 ep->node->line, ep->node->pos, "undef"); 600 return; 601 } 602 if ((def = eqn_def_find(ep)) == NULL) 603 return; 604 free(def->key); 605 free(def->val); 606 def->key = def->val = NULL; 607 def->keysz = def->valsz = 0; 608 } 609 610 static void 611 eqn_def(struct eqn_node *ep) 612 { 613 struct eqn_def *def; 614 int i; 615 616 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 617 mandoc_msg(MANDOCERR_REQ_EMPTY, 618 ep->node->line, ep->node->pos, "define"); 619 return; 620 } 621 622 /* 623 * Search for a key that already exists. 624 * Create a new key if none is found. 625 */ 626 if ((def = eqn_def_find(ep)) == NULL) { 627 /* Find holes in string array. */ 628 for (i = 0; i < (int)ep->defsz; i++) 629 if (0 == ep->defs[i].keysz) 630 break; 631 632 if (i == (int)ep->defsz) { 633 ep->defsz++; 634 ep->defs = mandoc_reallocarray(ep->defs, 635 ep->defsz, sizeof(struct eqn_def)); 636 ep->defs[i].key = ep->defs[i].val = NULL; 637 } 638 639 def = ep->defs + i; 640 free(def->key); 641 def->key = mandoc_strndup(ep->start, ep->toksz); 642 def->keysz = ep->toksz; 643 } 644 645 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 646 mandoc_msg(MANDOCERR_REQ_EMPTY, 647 ep->node->line, ep->node->pos, "define %s", def->key); 648 free(def->key); 649 free(def->val); 650 def->key = def->val = NULL; 651 def->keysz = def->valsz = 0; 652 return; 653 } 654 free(def->val); 655 def->val = mandoc_strndup(ep->start, ep->toksz); 656 def->valsz = ep->toksz; 657 } 658 659 void 660 eqn_parse(struct eqn_node *ep) 661 { 662 struct eqn_box *cur, *nbox, *parent, *split; 663 const char *cp, *cpn; 664 char *p; 665 enum eqn_tok tok; 666 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 667 int size; 668 669 parent = ep->node->eqn; 670 assert(parent != NULL); 671 672 /* 673 * Empty equation. 674 * Do not add it to the high-level syntax tree. 675 */ 676 677 if (ep->data == NULL) 678 return; 679 680 ep->start = ep->end = ep->data; 681 682 next_tok: 683 tok = eqn_next(ep, MODE_TOK); 684 switch (tok) { 685 case EQN_TOK_UNDEF: 686 eqn_undef(ep); 687 break; 688 case EQN_TOK_NDEFINE: 689 case EQN_TOK_DEFINE: 690 eqn_def(ep); 691 break; 692 case EQN_TOK_TDEFINE: 693 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 694 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 695 mandoc_msg(MANDOCERR_REQ_EMPTY, 696 ep->node->line, ep->node->pos, "tdefine"); 697 break; 698 case EQN_TOK_DELIM: 699 eqn_delim(ep); 700 break; 701 case EQN_TOK_GFONT: 702 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 703 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 704 ep->node->pos, "%s", eqn_toks[tok]); 705 break; 706 case EQN_TOK_MARK: 707 case EQN_TOK_LINEUP: 708 /* Ignore these. */ 709 break; 710 case EQN_TOK_DYAD: 711 case EQN_TOK_VEC: 712 case EQN_TOK_UNDER: 713 case EQN_TOK_BAR: 714 case EQN_TOK_TILDE: 715 case EQN_TOK_HAT: 716 case EQN_TOK_DOT: 717 case EQN_TOK_DOTDOT: 718 if (parent->last == NULL) { 719 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 720 ep->node->pos, "%s", eqn_toks[tok]); 721 cur = eqn_box_alloc(ep, parent); 722 cur->type = EQN_TEXT; 723 cur->text = mandoc_strdup(""); 724 } 725 parent = eqn_box_makebinary(ep, parent); 726 parent->type = EQN_LIST; 727 parent->expectargs = 1; 728 parent->font = EQNFONT_ROMAN; 729 switch (tok) { 730 case EQN_TOK_DOTDOT: 731 parent->top = mandoc_strdup("\\[ad]"); 732 break; 733 case EQN_TOK_VEC: 734 parent->top = mandoc_strdup("\\[->]"); 735 break; 736 case EQN_TOK_DYAD: 737 parent->top = mandoc_strdup("\\[<>]"); 738 break; 739 case EQN_TOK_TILDE: 740 parent->top = mandoc_strdup("\\[a~]"); 741 break; 742 case EQN_TOK_UNDER: 743 parent->bottom = mandoc_strdup("\\[ul]"); 744 break; 745 case EQN_TOK_BAR: 746 parent->top = mandoc_strdup("\\[rn]"); 747 break; 748 case EQN_TOK_DOT: 749 parent->top = mandoc_strdup("\\[a.]"); 750 break; 751 case EQN_TOK_HAT: 752 parent->top = mandoc_strdup("\\[ha]"); 753 break; 754 default: 755 abort(); 756 } 757 parent = parent->parent; 758 break; 759 case EQN_TOK_FWD: 760 case EQN_TOK_BACK: 761 case EQN_TOK_DOWN: 762 case EQN_TOK_UP: 763 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 764 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 765 ep->node->pos, "%s", eqn_toks[tok]); 766 break; 767 case EQN_TOK_FAT: 768 case EQN_TOK_ROMAN: 769 case EQN_TOK_ITALIC: 770 case EQN_TOK_BOLD: 771 while (parent->args == parent->expectargs) 772 parent = parent->parent; 773 /* 774 * These values apply to the next word or sequence of 775 * words; thus, we mark that we'll have a child with 776 * exactly one of those. 777 */ 778 parent = eqn_box_alloc(ep, parent); 779 parent->type = EQN_LIST; 780 parent->expectargs = 1; 781 switch (tok) { 782 case EQN_TOK_FAT: 783 parent->font = EQNFONT_FAT; 784 break; 785 case EQN_TOK_ROMAN: 786 parent->font = EQNFONT_ROMAN; 787 break; 788 case EQN_TOK_ITALIC: 789 parent->font = EQNFONT_ITALIC; 790 break; 791 case EQN_TOK_BOLD: 792 parent->font = EQNFONT_BOLD; 793 break; 794 default: 795 abort(); 796 } 797 break; 798 case EQN_TOK_SIZE: 799 case EQN_TOK_GSIZE: 800 /* Accept two values: integral size and a single. */ 801 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 802 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 803 ep->node->pos, "%s", eqn_toks[tok]); 804 break; 805 } 806 size = mandoc_strntoi(ep->start, ep->toksz, 10); 807 if (-1 == size) { 808 mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line, 809 ep->node->pos, "%s", eqn_toks[tok]); 810 break; 811 } 812 if (EQN_TOK_GSIZE == tok) { 813 ep->gsize = size; 814 break; 815 } 816 while (parent->args == parent->expectargs) 817 parent = parent->parent; 818 parent = eqn_box_alloc(ep, parent); 819 parent->type = EQN_LIST; 820 parent->expectargs = 1; 821 parent->size = size; 822 break; 823 case EQN_TOK_FROM: 824 case EQN_TOK_TO: 825 case EQN_TOK_SUB: 826 case EQN_TOK_SUP: 827 /* 828 * We have a left-right-associative expression. 829 * Repivot under a positional node, open a child scope 830 * and keep on reading. 831 */ 832 if (parent->last == NULL) { 833 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 834 ep->node->pos, "%s", eqn_toks[tok]); 835 cur = eqn_box_alloc(ep, parent); 836 cur->type = EQN_TEXT; 837 cur->text = mandoc_strdup(""); 838 } 839 while (parent->expectargs == 1 && parent->args == 1) 840 parent = parent->parent; 841 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 842 for (cur = parent; cur != NULL; cur = cur->parent) 843 if (cur->pos == EQNPOS_SUB || 844 cur->pos == EQNPOS_SUP || 845 cur->pos == EQNPOS_SUBSUP || 846 cur->pos == EQNPOS_SQRT || 847 cur->pos == EQNPOS_OVER) 848 break; 849 if (cur != NULL) 850 parent = cur->parent; 851 } 852 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 853 parent->expectargs = 3; 854 parent->pos = EQNPOS_SUBSUP; 855 break; 856 } 857 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 858 parent->expectargs = 3; 859 parent->pos = EQNPOS_FROMTO; 860 break; 861 } 862 parent = eqn_box_makebinary(ep, parent); 863 switch (tok) { 864 case EQN_TOK_FROM: 865 parent->pos = EQNPOS_FROM; 866 break; 867 case EQN_TOK_TO: 868 parent->pos = EQNPOS_TO; 869 break; 870 case EQN_TOK_SUP: 871 parent->pos = EQNPOS_SUP; 872 break; 873 case EQN_TOK_SUB: 874 parent->pos = EQNPOS_SUB; 875 break; 876 default: 877 abort(); 878 } 879 break; 880 case EQN_TOK_SQRT: 881 while (parent->args == parent->expectargs) 882 parent = parent->parent; 883 /* 884 * Accept a left-right-associative set of arguments just 885 * like sub and sup and friends but without rebalancing 886 * under a pivot. 887 */ 888 parent = eqn_box_alloc(ep, parent); 889 parent->type = EQN_SUBEXPR; 890 parent->pos = EQNPOS_SQRT; 891 parent->expectargs = 1; 892 break; 893 case EQN_TOK_OVER: 894 /* 895 * We have a right-left-associative fraction. 896 * Close out anything that's currently open, then 897 * rebalance and continue reading. 898 */ 899 if (parent->last == NULL) { 900 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 901 ep->node->pos, "%s", eqn_toks[tok]); 902 cur = eqn_box_alloc(ep, parent); 903 cur->type = EQN_TEXT; 904 cur->text = mandoc_strdup(""); 905 } 906 while (parent->args == parent->expectargs) 907 parent = parent->parent; 908 while (EQN_SUBEXPR == parent->type) 909 parent = parent->parent; 910 parent = eqn_box_makebinary(ep, parent); 911 parent->pos = EQNPOS_OVER; 912 break; 913 case EQN_TOK_RIGHT: 914 case EQN_TOK_BRACE_CLOSE: 915 /* 916 * Close out the existing brace. 917 * FIXME: this is a shitty sentinel: we should really 918 * have a native EQN_BRACE type or whatnot. 919 */ 920 for (cur = parent; cur != NULL; cur = cur->parent) 921 if (cur->type == EQN_LIST && 922 cur->expectargs > 1 && 923 (tok == EQN_TOK_BRACE_CLOSE || 924 cur->left != NULL)) 925 break; 926 if (cur == NULL) { 927 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line, 928 ep->node->pos, "%s", eqn_toks[tok]); 929 break; 930 } 931 parent = cur; 932 if (EQN_TOK_RIGHT == tok) { 933 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 934 mandoc_msg(MANDOCERR_REQ_EMPTY, 935 ep->node->line, ep->node->pos, 936 "%s", eqn_toks[tok]); 937 break; 938 } 939 /* Handling depends on right/left. */ 940 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 941 parent->right = mandoc_strdup("\\[rc]"); 942 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 943 parent->right = mandoc_strdup("\\[rf]"); 944 else 945 parent->right = 946 mandoc_strndup(ep->start, ep->toksz); 947 } 948 parent = parent->parent; 949 if (tok == EQN_TOK_BRACE_CLOSE && 950 (parent->type == EQN_PILE || 951 parent->type == EQN_MATRIX)) 952 parent = parent->parent; 953 /* Close out any "singleton" lists. */ 954 while (parent->type == EQN_LIST && 955 parent->expectargs == 1 && 956 parent->args == 1) 957 parent = parent->parent; 958 break; 959 case EQN_TOK_BRACE_OPEN: 960 case EQN_TOK_LEFT: 961 /* 962 * If we already have something in the stack and we're 963 * in an expression, then rewind til we're not any more 964 * (just like with the text node). 965 */ 966 while (parent->args == parent->expectargs) 967 parent = parent->parent; 968 if (EQN_TOK_LEFT == tok && 969 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 970 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 971 ep->node->pos, "%s", eqn_toks[tok]); 972 break; 973 } 974 parent = eqn_box_alloc(ep, parent); 975 parent->type = EQN_LIST; 976 if (EQN_TOK_LEFT == tok) { 977 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 978 parent->left = mandoc_strdup("\\[lc]"); 979 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 980 parent->left = mandoc_strdup("\\[lf]"); 981 else 982 parent->left = 983 mandoc_strndup(ep->start, ep->toksz); 984 } 985 break; 986 case EQN_TOK_PILE: 987 case EQN_TOK_LPILE: 988 case EQN_TOK_RPILE: 989 case EQN_TOK_CPILE: 990 case EQN_TOK_CCOL: 991 case EQN_TOK_LCOL: 992 case EQN_TOK_RCOL: 993 while (parent->args == parent->expectargs) 994 parent = parent->parent; 995 parent = eqn_box_alloc(ep, parent); 996 parent->type = EQN_PILE; 997 parent->expectargs = 1; 998 break; 999 case EQN_TOK_ABOVE: 1000 for (cur = parent; cur != NULL; cur = cur->parent) 1001 if (cur->type == EQN_PILE) 1002 break; 1003 if (cur == NULL) { 1004 mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line, 1005 ep->node->pos, "%s", eqn_toks[tok]); 1006 break; 1007 } 1008 parent = eqn_box_alloc(ep, cur); 1009 parent->type = EQN_LIST; 1010 break; 1011 case EQN_TOK_MATRIX: 1012 while (parent->args == parent->expectargs) 1013 parent = parent->parent; 1014 parent = eqn_box_alloc(ep, parent); 1015 parent->type = EQN_MATRIX; 1016 parent->expectargs = 1; 1017 break; 1018 case EQN_TOK_EOF: 1019 return; 1020 case EQN_TOK__MAX: 1021 case EQN_TOK_FUNC: 1022 case EQN_TOK_QUOTED: 1023 case EQN_TOK_SYM: 1024 p = ep->start; 1025 assert(p != NULL); 1026 /* 1027 * If we already have something in the stack and we're 1028 * in an expression, then rewind til we're not any more. 1029 */ 1030 while (parent->args == parent->expectargs) 1031 parent = parent->parent; 1032 cur = eqn_box_alloc(ep, parent); 1033 cur->type = EQN_TEXT; 1034 cur->text = p; 1035 switch (tok) { 1036 case EQN_TOK_FUNC: 1037 cur->font = EQNFONT_ROMAN; 1038 break; 1039 case EQN_TOK_QUOTED: 1040 if (cur->font == EQNFONT_NONE) 1041 cur->font = EQNFONT_ITALIC; 1042 break; 1043 case EQN_TOK_SYM: 1044 break; 1045 default: 1046 if (cur->font != EQNFONT_NONE || *p == '\0') 1047 break; 1048 cpn = p - 1; 1049 ccln = CCL_LET; 1050 split = NULL; 1051 for (;;) { 1052 /* Advance to next character. */ 1053 cp = cpn++; 1054 ccl = ccln; 1055 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1056 isdigit((unsigned char)*cpn) || 1057 (*cpn == '.' && (ccl == CCL_DIG || 1058 isdigit((unsigned char)cpn[1]))) ? 1059 CCL_DIG : CCL_PUN; 1060 /* No boundary before first character. */ 1061 if (cp < p) 1062 continue; 1063 cur->font = ccl == CCL_LET ? 1064 EQNFONT_ITALIC : EQNFONT_ROMAN; 1065 if (*cp == '\\') 1066 mandoc_escape(&cpn, NULL, NULL); 1067 /* No boundary after last character. */ 1068 if (*cpn == '\0') 1069 break; 1070 if (ccln == ccl && *cp != ',' && *cpn != ',') 1071 continue; 1072 /* Boundary found, split the text. */ 1073 if (parent->args == parent->expectargs) { 1074 /* Remove the text from the tree. */ 1075 if (cur->prev == NULL) 1076 parent->first = cur->next; 1077 else 1078 cur->prev->next = NULL; 1079 parent->last = cur->prev; 1080 parent->args--; 1081 /* Set up a list instead. */ 1082 split = eqn_box_alloc(ep, parent); 1083 split->type = EQN_LIST; 1084 /* Insert the word into the list. */ 1085 split->first = split->last = cur; 1086 cur->parent = split; 1087 cur->prev = NULL; 1088 parent = split; 1089 } 1090 /* Append a new text box. */ 1091 nbox = eqn_box_alloc(ep, parent); 1092 nbox->type = EQN_TEXT; 1093 nbox->text = mandoc_strdup(cpn); 1094 /* Truncate the old box. */ 1095 p = mandoc_strndup(cur->text, 1096 cpn - cur->text); 1097 free(cur->text); 1098 cur->text = p; 1099 /* Setup to process the new box. */ 1100 cur = nbox; 1101 p = nbox->text; 1102 cpn = p - 1; 1103 ccln = CCL_LET; 1104 } 1105 if (split != NULL) 1106 parent = split->parent; 1107 break; 1108 } 1109 break; 1110 default: 1111 abort(); 1112 } 1113 goto next_tok; 1114 } 1115 1116 void 1117 eqn_free(struct eqn_node *p) 1118 { 1119 int i; 1120 1121 if (p == NULL) 1122 return; 1123 1124 for (i = 0; i < (int)p->defsz; i++) { 1125 free(p->defs[i].key); 1126 free(p->defs[i].val); 1127 } 1128 1129 free(p->data); 1130 free(p->defs); 1131 free(p); 1132 } 1133