1 /* $Id: eqn.c,v 1.86 2023/04/28 19:11:03 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2014, 2015, 2017, 2018, 2020, 2022 4 * Ingo Schwarze <schwarze@openbsd.org> 5 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 6 * 7 * Permission to use, copy, modify, and distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 18 */ 19 #include "config.h" 20 21 #include <sys/types.h> 22 23 #include <assert.h> 24 #include <ctype.h> 25 #include <limits.h> 26 #include <stdio.h> 27 #include <stdlib.h> 28 #include <string.h> 29 #include <time.h> 30 31 #include "mandoc_aux.h" 32 #include "mandoc.h" 33 #include "roff.h" 34 #include "eqn.h" 35 #include "libmandoc.h" 36 #include "eqn_parse.h" 37 38 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 39 #define STRNEQ(p1, sz1, p2, sz2) \ 40 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 41 42 enum eqn_tok { 43 EQN_TOK_DYAD = 0, 44 EQN_TOK_VEC, 45 EQN_TOK_UNDER, 46 EQN_TOK_BAR, 47 EQN_TOK_TILDE, 48 EQN_TOK_HAT, 49 EQN_TOK_DOT, 50 EQN_TOK_DOTDOT, 51 EQN_TOK_FWD, 52 EQN_TOK_BACK, 53 EQN_TOK_DOWN, 54 EQN_TOK_UP, 55 EQN_TOK_FAT, 56 EQN_TOK_ROMAN, 57 EQN_TOK_ITALIC, 58 EQN_TOK_BOLD, 59 EQN_TOK_SIZE, 60 EQN_TOK_SUB, 61 EQN_TOK_SUP, 62 EQN_TOK_SQRT, 63 EQN_TOK_OVER, 64 EQN_TOK_FROM, 65 EQN_TOK_TO, 66 EQN_TOK_BRACE_OPEN, 67 EQN_TOK_BRACE_CLOSE, 68 EQN_TOK_GSIZE, 69 EQN_TOK_GFONT, 70 EQN_TOK_MARK, 71 EQN_TOK_LINEUP, 72 EQN_TOK_LEFT, 73 EQN_TOK_RIGHT, 74 EQN_TOK_PILE, 75 EQN_TOK_LPILE, 76 EQN_TOK_RPILE, 77 EQN_TOK_CPILE, 78 EQN_TOK_MATRIX, 79 EQN_TOK_CCOL, 80 EQN_TOK_LCOL, 81 EQN_TOK_RCOL, 82 EQN_TOK_DELIM, 83 EQN_TOK_DEFINE, 84 EQN_TOK_TDEFINE, 85 EQN_TOK_NDEFINE, 86 EQN_TOK_UNDEF, 87 EQN_TOK_ABOVE, 88 EQN_TOK__MAX, 89 EQN_TOK_FUNC, 90 EQN_TOK_QUOTED, 91 EQN_TOK_SYM, 92 EQN_TOK_EOF 93 }; 94 95 static const char *eqn_toks[EQN_TOK__MAX] = { 96 "dyad", /* EQN_TOK_DYAD */ 97 "vec", /* EQN_TOK_VEC */ 98 "under", /* EQN_TOK_UNDER */ 99 "bar", /* EQN_TOK_BAR */ 100 "tilde", /* EQN_TOK_TILDE */ 101 "hat", /* EQN_TOK_HAT */ 102 "dot", /* EQN_TOK_DOT */ 103 "dotdot", /* EQN_TOK_DOTDOT */ 104 "fwd", /* EQN_TOK_FWD * */ 105 "back", /* EQN_TOK_BACK */ 106 "down", /* EQN_TOK_DOWN */ 107 "up", /* EQN_TOK_UP */ 108 "fat", /* EQN_TOK_FAT */ 109 "roman", /* EQN_TOK_ROMAN */ 110 "italic", /* EQN_TOK_ITALIC */ 111 "bold", /* EQN_TOK_BOLD */ 112 "size", /* EQN_TOK_SIZE */ 113 "sub", /* EQN_TOK_SUB */ 114 "sup", /* EQN_TOK_SUP */ 115 "sqrt", /* EQN_TOK_SQRT */ 116 "over", /* EQN_TOK_OVER */ 117 "from", /* EQN_TOK_FROM */ 118 "to", /* EQN_TOK_TO */ 119 "{", /* EQN_TOK_BRACE_OPEN */ 120 "}", /* EQN_TOK_BRACE_CLOSE */ 121 "gsize", /* EQN_TOK_GSIZE */ 122 "gfont", /* EQN_TOK_GFONT */ 123 "mark", /* EQN_TOK_MARK */ 124 "lineup", /* EQN_TOK_LINEUP */ 125 "left", /* EQN_TOK_LEFT */ 126 "right", /* EQN_TOK_RIGHT */ 127 "pile", /* EQN_TOK_PILE */ 128 "lpile", /* EQN_TOK_LPILE */ 129 "rpile", /* EQN_TOK_RPILE */ 130 "cpile", /* EQN_TOK_CPILE */ 131 "matrix", /* EQN_TOK_MATRIX */ 132 "ccol", /* EQN_TOK_CCOL */ 133 "lcol", /* EQN_TOK_LCOL */ 134 "rcol", /* EQN_TOK_RCOL */ 135 "delim", /* EQN_TOK_DELIM */ 136 "define", /* EQN_TOK_DEFINE */ 137 "tdefine", /* EQN_TOK_TDEFINE */ 138 "ndefine", /* EQN_TOK_NDEFINE */ 139 "undef", /* EQN_TOK_UNDEF */ 140 "above", /* EQN_TOK_ABOVE */ 141 }; 142 143 static const char *const eqn_func[] = { 144 "acos", "acsc", "and", "arc", "asec", "asin", "atan", 145 "cos", "cosh", "coth", "csc", "det", "exp", "for", 146 "if", "lim", "ln", "log", "max", "min", 147 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 148 }; 149 150 enum eqn_symt { 151 EQNSYM_alpha = 0, 152 EQNSYM_beta, 153 EQNSYM_chi, 154 EQNSYM_delta, 155 EQNSYM_epsilon, 156 EQNSYM_eta, 157 EQNSYM_gamma, 158 EQNSYM_iota, 159 EQNSYM_kappa, 160 EQNSYM_lambda, 161 EQNSYM_mu, 162 EQNSYM_nu, 163 EQNSYM_omega, 164 EQNSYM_omicron, 165 EQNSYM_phi, 166 EQNSYM_pi, 167 EQNSYM_ps, 168 EQNSYM_rho, 169 EQNSYM_sigma, 170 EQNSYM_tau, 171 EQNSYM_theta, 172 EQNSYM_upsilon, 173 EQNSYM_xi, 174 EQNSYM_zeta, 175 EQNSYM_DELTA, 176 EQNSYM_GAMMA, 177 EQNSYM_LAMBDA, 178 EQNSYM_OMEGA, 179 EQNSYM_PHI, 180 EQNSYM_PI, 181 EQNSYM_PSI, 182 EQNSYM_SIGMA, 183 EQNSYM_THETA, 184 EQNSYM_UPSILON, 185 EQNSYM_XI, 186 EQNSYM_inter, 187 EQNSYM_union, 188 EQNSYM_prod, 189 EQNSYM_int, 190 EQNSYM_sum, 191 EQNSYM_grad, 192 EQNSYM_del, 193 EQNSYM_times, 194 EQNSYM_cdot, 195 EQNSYM_nothing, 196 EQNSYM_approx, 197 EQNSYM_prime, 198 EQNSYM_half, 199 EQNSYM_partial, 200 EQNSYM_inf, 201 EQNSYM_muchgreat, 202 EQNSYM_muchless, 203 EQNSYM_larrow, 204 EQNSYM_rarrow, 205 EQNSYM_pm, 206 EQNSYM_nequal, 207 EQNSYM_equiv, 208 EQNSYM_lessequal, 209 EQNSYM_moreequal, 210 EQNSYM_minus, 211 EQNSYM__MAX 212 }; 213 214 struct eqnsym { 215 const char *str; 216 const char *sym; 217 }; 218 219 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 220 { "alpha", "*a" }, /* EQNSYM_alpha */ 221 { "beta", "*b" }, /* EQNSYM_beta */ 222 { "chi", "*x" }, /* EQNSYM_chi */ 223 { "delta", "*d" }, /* EQNSYM_delta */ 224 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 225 { "eta", "*y" }, /* EQNSYM_eta */ 226 { "gamma", "*g" }, /* EQNSYM_gamma */ 227 { "iota", "*i" }, /* EQNSYM_iota */ 228 { "kappa", "*k" }, /* EQNSYM_kappa */ 229 { "lambda", "*l" }, /* EQNSYM_lambda */ 230 { "mu", "*m" }, /* EQNSYM_mu */ 231 { "nu", "*n" }, /* EQNSYM_nu */ 232 { "omega", "*w" }, /* EQNSYM_omega */ 233 { "omicron", "*o" }, /* EQNSYM_omicron */ 234 { "phi", "*f" }, /* EQNSYM_phi */ 235 { "pi", "*p" }, /* EQNSYM_pi */ 236 { "psi", "*q" }, /* EQNSYM_psi */ 237 { "rho", "*r" }, /* EQNSYM_rho */ 238 { "sigma", "*s" }, /* EQNSYM_sigma */ 239 { "tau", "*t" }, /* EQNSYM_tau */ 240 { "theta", "*h" }, /* EQNSYM_theta */ 241 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 242 { "xi", "*c" }, /* EQNSYM_xi */ 243 { "zeta", "*z" }, /* EQNSYM_zeta */ 244 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 245 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 246 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 247 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 248 { "PHI", "*F" }, /* EQNSYM_PHI */ 249 { "PI", "*P" }, /* EQNSYM_PI */ 250 { "PSI", "*Q" }, /* EQNSYM_PSI */ 251 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 252 { "THETA", "*H" }, /* EQNSYM_THETA */ 253 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 254 { "XI", "*C" }, /* EQNSYM_XI */ 255 { "inter", "ca" }, /* EQNSYM_inter */ 256 { "union", "cu" }, /* EQNSYM_union */ 257 { "prod", "product" }, /* EQNSYM_prod */ 258 { "int", "integral" }, /* EQNSYM_int */ 259 { "sum", "sum" }, /* EQNSYM_sum */ 260 { "grad", "gr" }, /* EQNSYM_grad */ 261 { "del", "gr" }, /* EQNSYM_del */ 262 { "times", "mu" }, /* EQNSYM_times */ 263 { "cdot", "pc" }, /* EQNSYM_cdot */ 264 { "nothing", "&" }, /* EQNSYM_nothing */ 265 { "approx", "~~" }, /* EQNSYM_approx */ 266 { "prime", "fm" }, /* EQNSYM_prime */ 267 { "half", "12" }, /* EQNSYM_half */ 268 { "partial", "pd" }, /* EQNSYM_partial */ 269 { "inf", "if" }, /* EQNSYM_inf */ 270 { ">>", ">>" }, /* EQNSYM_muchgreat */ 271 { "<<", "<<" }, /* EQNSYM_muchless */ 272 { "<-", "<-" }, /* EQNSYM_larrow */ 273 { "->", "->" }, /* EQNSYM_rarrow */ 274 { "+-", "+-" }, /* EQNSYM_pm */ 275 { "!=", "!=" }, /* EQNSYM_nequal */ 276 { "==", "==" }, /* EQNSYM_equiv */ 277 { "<=", "<=" }, /* EQNSYM_lessequal */ 278 { ">=", ">=" }, /* EQNSYM_moreequal */ 279 { "-", "mi" }, /* EQNSYM_minus */ 280 }; 281 282 enum parse_mode { 283 MODE_QUOTED, 284 MODE_NOSUB, 285 MODE_SUB, 286 MODE_TOK 287 }; 288 289 struct eqn_def { 290 char *key; 291 size_t keysz; 292 char *val; 293 size_t valsz; 294 }; 295 296 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 297 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 298 struct eqn_box *); 299 static void eqn_def(struct eqn_node *); 300 static struct eqn_def *eqn_def_find(struct eqn_node *); 301 static void eqn_delim(struct eqn_node *); 302 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 303 static void eqn_undef(struct eqn_node *); 304 305 306 struct eqn_node * 307 eqn_alloc(void) 308 { 309 struct eqn_node *ep; 310 311 ep = mandoc_calloc(1, sizeof(*ep)); 312 ep->gsize = EQN_DEFSIZE; 313 return ep; 314 } 315 316 void 317 eqn_reset(struct eqn_node *ep) 318 { 319 free(ep->data); 320 ep->data = ep->start = ep->end = NULL; 321 ep->sz = ep->toksz = 0; 322 } 323 324 void 325 eqn_read(struct eqn_node *ep, const char *p) 326 { 327 char *cp; 328 329 if (ep->data == NULL) { 330 ep->sz = strlen(p); 331 ep->data = mandoc_strdup(p); 332 } else { 333 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 334 free(ep->data); 335 ep->data = cp; 336 } 337 ep->sz += 1; 338 } 339 340 /* 341 * Find the key "key" of the give size within our eqn-defined values. 342 */ 343 static struct eqn_def * 344 eqn_def_find(struct eqn_node *ep) 345 { 346 int i; 347 348 for (i = 0; i < (int)ep->defsz; i++) 349 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 350 ep->defs[i].keysz, ep->start, ep->toksz)) 351 return &ep->defs[i]; 352 353 return NULL; 354 } 355 356 /* 357 * Parse a token from the input text. The modes are: 358 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 359 * before its next occurrence. Do not interpret the token in any 360 * way and return EQN_TOK_QUOTED. All other modes behave like 361 * MODE_QUOTED when *ep->start is '"'. 362 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 363 * otherwise, it ends before the next whitespace or brace. 364 * Do not interpret the token and return EQN_TOK__MAX. 365 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 366 * alias created with define. If it is an alias, replace it with 367 * its string value and reparse. 368 * MODE_TOK: Like MODE_SUB, but also check the token against the list 369 * of tokens, and if there is a match, return that token. Otherwise, 370 * if the token matches a symbol, return EQN_TOK_SYM; if it matches 371 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 372 * a token match, *ep->start is set to an allocated string that the 373 * caller is expected to free. 374 * All modes skip whitespace following the end of the token. 375 */ 376 static enum eqn_tok 377 eqn_next(struct eqn_node *ep, enum parse_mode mode) 378 { 379 struct eqn_def *def; 380 size_t start; 381 int diff, i, newlen, quoted; 382 enum eqn_tok tok; 383 384 /* 385 * Reset the recursion counter after advancing 386 * beyond the end of the rightmost substitution. 387 */ 388 if (ep->end - ep->data >= ep->sublen) 389 ep->subcnt = 0; 390 391 ep->start = ep->end; 392 quoted = mode == MODE_QUOTED; 393 for (;;) { 394 switch (*ep->start) { 395 case '\0': 396 ep->toksz = 0; 397 return EQN_TOK_EOF; 398 case '"': 399 quoted = 1; 400 break; 401 case ' ': 402 case '\t': 403 case '~': 404 case '^': 405 if (quoted) 406 break; 407 ep->start++; 408 continue; 409 default: 410 break; 411 } 412 if (quoted) { 413 ep->end = strchr(ep->start + 1, *ep->start); 414 ep->start++; /* Skip opening quote. */ 415 if (ep->end == NULL) { 416 mandoc_msg(MANDOCERR_ARG_QUOTE, 417 ep->node->line, ep->node->pos, NULL); 418 ep->end = strchr(ep->start, '\0'); 419 } 420 } else { 421 ep->end = ep->start + 1; 422 if (*ep->start != '{' && *ep->start != '}') 423 ep->end += strcspn(ep->end, " ^~\"{}\t"); 424 } 425 ep->toksz = ep->end - ep->start; 426 if (quoted && *ep->end != '\0') 427 ep->end++; /* Skip closing quote. */ 428 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 429 ep->end++; 430 if (quoted) /* Cannot return, may have to strndup. */ 431 break; 432 if (mode == MODE_NOSUB) 433 return EQN_TOK__MAX; 434 if ((def = eqn_def_find(ep)) == NULL) 435 break; 436 if (++ep->subcnt > EQN_NEST_MAX) { 437 mandoc_msg(MANDOCERR_ROFFLOOP, 438 ep->node->line, ep->node->pos, NULL); 439 break; 440 } 441 442 /* Replace a defined name with its string value. */ 443 if ((diff = def->valsz - ep->toksz) > 0) { 444 start = ep->start - ep->data; 445 ep->sz += diff; 446 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 447 ep->start = ep->data + start; 448 ep->sublen += diff; 449 } 450 if (diff) 451 memmove(ep->start + def->valsz, ep->start + ep->toksz, 452 strlen(ep->start + ep->toksz) + 1); 453 memcpy(ep->start, def->val, def->valsz); 454 newlen = ep->start - ep->data + def->valsz; 455 if (ep->sublen < newlen) 456 ep->sublen = newlen; 457 } 458 if (mode != MODE_TOK) 459 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 460 if (quoted) { 461 ep->start = mandoc_strndup(ep->start, ep->toksz); 462 return EQN_TOK_QUOTED; 463 } 464 for (tok = 0; tok < EQN_TOK__MAX; tok++) 465 if (STRNEQ(ep->start, ep->toksz, 466 eqn_toks[tok], strlen(eqn_toks[tok]))) 467 return tok; 468 469 for (i = 0; i < EQNSYM__MAX; i++) { 470 if (STRNEQ(ep->start, ep->toksz, 471 eqnsyms[i].str, strlen(eqnsyms[i].str))) { 472 mandoc_asprintf(&ep->start, 473 "\\[%s]", eqnsyms[i].sym); 474 return EQN_TOK_SYM; 475 } 476 } 477 ep->start = mandoc_strndup(ep->start, ep->toksz); 478 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 479 if (STRNEQ(ep->start, ep->toksz, 480 eqn_func[i], strlen(eqn_func[i]))) 481 return EQN_TOK_FUNC; 482 return EQN_TOK__MAX; 483 } 484 485 void 486 eqn_box_free(struct eqn_box *bp) 487 { 488 if (bp == NULL) 489 return; 490 491 if (bp->first) 492 eqn_box_free(bp->first); 493 if (bp->next) 494 eqn_box_free(bp->next); 495 496 free(bp->text); 497 free(bp->left); 498 free(bp->right); 499 free(bp->top); 500 free(bp->bottom); 501 free(bp); 502 } 503 504 struct eqn_box * 505 eqn_box_new(void) 506 { 507 struct eqn_box *bp; 508 509 bp = mandoc_calloc(1, sizeof(*bp)); 510 bp->expectargs = UINT_MAX; 511 return bp; 512 } 513 514 /* 515 * Allocate a box as the last child of the parent node. 516 */ 517 static struct eqn_box * 518 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 519 { 520 struct eqn_box *bp; 521 522 bp = eqn_box_new(); 523 bp->parent = parent; 524 bp->parent->args++; 525 bp->font = bp->parent->font; 526 bp->size = ep->gsize; 527 528 if (NULL != parent->first) { 529 parent->last->next = bp; 530 bp->prev = parent->last; 531 } else 532 parent->first = bp; 533 534 parent->last = bp; 535 return bp; 536 } 537 538 /* 539 * Reparent the current last node (of the current parent) under a new 540 * EQN_SUBEXPR as the first element. 541 * Then return the new parent. 542 * The new EQN_SUBEXPR will have a two-child limit. 543 */ 544 static struct eqn_box * 545 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 546 { 547 struct eqn_box *b, *newb; 548 549 assert(NULL != parent->last); 550 b = parent->last; 551 if (parent->last == parent->first) 552 parent->first = NULL; 553 parent->args--; 554 parent->last = b->prev; 555 b->prev = NULL; 556 newb = eqn_box_alloc(ep, parent); 557 newb->type = EQN_SUBEXPR; 558 newb->expectargs = 2; 559 newb->args = 1; 560 newb->first = newb->last = b; 561 newb->first->next = NULL; 562 b->parent = newb; 563 return newb; 564 } 565 566 /* 567 * Parse the "delim" control statement. 568 */ 569 static void 570 eqn_delim(struct eqn_node *ep) 571 { 572 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 573 mandoc_msg(MANDOCERR_REQ_EMPTY, 574 ep->node->line, ep->node->pos, "delim"); 575 if (ep->end[0] != '\0') 576 ep->end++; 577 } else if (strncmp(ep->end, "off", 3) == 0) { 578 ep->delim = 0; 579 ep->end += 3; 580 } else if (strncmp(ep->end, "on", 2) == 0) { 581 if (ep->odelim && ep->cdelim) 582 ep->delim = 1; 583 ep->end += 2; 584 } else { 585 ep->odelim = *ep->end++; 586 ep->cdelim = *ep->end++; 587 ep->delim = 1; 588 } 589 } 590 591 /* 592 * Undefine a previously-defined string. 593 */ 594 static void 595 eqn_undef(struct eqn_node *ep) 596 { 597 struct eqn_def *def; 598 599 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 600 mandoc_msg(MANDOCERR_REQ_EMPTY, 601 ep->node->line, ep->node->pos, "undef"); 602 return; 603 } 604 if ((def = eqn_def_find(ep)) == NULL) 605 return; 606 free(def->key); 607 free(def->val); 608 def->key = def->val = NULL; 609 def->keysz = def->valsz = 0; 610 } 611 612 static void 613 eqn_def(struct eqn_node *ep) 614 { 615 struct eqn_def *def; 616 int i; 617 618 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 619 mandoc_msg(MANDOCERR_REQ_EMPTY, 620 ep->node->line, ep->node->pos, "define"); 621 return; 622 } 623 624 /* 625 * Search for a key that already exists. 626 * Create a new key if none is found. 627 */ 628 if ((def = eqn_def_find(ep)) == NULL) { 629 /* Find holes in string array. */ 630 for (i = 0; i < (int)ep->defsz; i++) 631 if (0 == ep->defs[i].keysz) 632 break; 633 634 if (i == (int)ep->defsz) { 635 ep->defsz++; 636 ep->defs = mandoc_reallocarray(ep->defs, 637 ep->defsz, sizeof(struct eqn_def)); 638 ep->defs[i].key = ep->defs[i].val = NULL; 639 } 640 641 def = ep->defs + i; 642 free(def->key); 643 def->key = mandoc_strndup(ep->start, ep->toksz); 644 def->keysz = ep->toksz; 645 } 646 647 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 648 mandoc_msg(MANDOCERR_REQ_EMPTY, 649 ep->node->line, ep->node->pos, "define %s", def->key); 650 free(def->key); 651 free(def->val); 652 def->key = def->val = NULL; 653 def->keysz = def->valsz = 0; 654 return; 655 } 656 free(def->val); 657 def->val = mandoc_strndup(ep->start, ep->toksz); 658 def->valsz = ep->toksz; 659 } 660 661 void 662 eqn_parse(struct eqn_node *ep) 663 { 664 struct eqn_box *cur, *nbox, *parent, *split; 665 const char *cp, *cpn; 666 char *p; 667 enum eqn_tok tok; 668 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 669 int size; 670 671 parent = ep->node->eqn; 672 assert(parent != NULL); 673 674 /* 675 * Empty equation. 676 * Do not add it to the high-level syntax tree. 677 */ 678 679 if (ep->data == NULL) 680 return; 681 682 ep->start = ep->end = ep->data; 683 ep->sublen = 0; 684 ep->subcnt = 0; 685 686 next_tok: 687 tok = eqn_next(ep, MODE_TOK); 688 switch (tok) { 689 case EQN_TOK_UNDEF: 690 eqn_undef(ep); 691 break; 692 case EQN_TOK_NDEFINE: 693 case EQN_TOK_DEFINE: 694 eqn_def(ep); 695 break; 696 case EQN_TOK_TDEFINE: 697 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 698 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 699 mandoc_msg(MANDOCERR_REQ_EMPTY, 700 ep->node->line, ep->node->pos, "tdefine"); 701 break; 702 case EQN_TOK_DELIM: 703 eqn_delim(ep); 704 break; 705 case EQN_TOK_GFONT: 706 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 707 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 708 ep->node->pos, "%s", eqn_toks[tok]); 709 break; 710 case EQN_TOK_MARK: 711 case EQN_TOK_LINEUP: 712 /* Ignore these. */ 713 break; 714 case EQN_TOK_DYAD: 715 case EQN_TOK_VEC: 716 case EQN_TOK_UNDER: 717 case EQN_TOK_BAR: 718 case EQN_TOK_TILDE: 719 case EQN_TOK_HAT: 720 case EQN_TOK_DOT: 721 case EQN_TOK_DOTDOT: 722 if (parent->last == NULL) { 723 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 724 ep->node->pos, "%s", eqn_toks[tok]); 725 cur = eqn_box_alloc(ep, parent); 726 cur->type = EQN_TEXT; 727 cur->text = mandoc_strdup(""); 728 } 729 parent = eqn_box_makebinary(ep, parent); 730 parent->type = EQN_LIST; 731 parent->expectargs = 1; 732 parent->font = EQNFONT_ROMAN; 733 switch (tok) { 734 case EQN_TOK_DOTDOT: 735 parent->top = mandoc_strdup("\\[ad]"); 736 break; 737 case EQN_TOK_VEC: 738 parent->top = mandoc_strdup("\\[->]"); 739 break; 740 case EQN_TOK_DYAD: 741 parent->top = mandoc_strdup("\\[<>]"); 742 break; 743 case EQN_TOK_TILDE: 744 parent->top = mandoc_strdup("\\[a~]"); 745 break; 746 case EQN_TOK_UNDER: 747 parent->bottom = mandoc_strdup("\\[ul]"); 748 break; 749 case EQN_TOK_BAR: 750 parent->top = mandoc_strdup("\\[rn]"); 751 break; 752 case EQN_TOK_DOT: 753 parent->top = mandoc_strdup("\\[a.]"); 754 break; 755 case EQN_TOK_HAT: 756 parent->top = mandoc_strdup("\\[ha]"); 757 break; 758 default: 759 abort(); 760 } 761 parent = parent->parent; 762 break; 763 case EQN_TOK_FWD: 764 case EQN_TOK_BACK: 765 case EQN_TOK_DOWN: 766 case EQN_TOK_UP: 767 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 768 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 769 ep->node->pos, "%s", eqn_toks[tok]); 770 break; 771 case EQN_TOK_FAT: 772 case EQN_TOK_ROMAN: 773 case EQN_TOK_ITALIC: 774 case EQN_TOK_BOLD: 775 while (parent->args == parent->expectargs) 776 parent = parent->parent; 777 /* 778 * These values apply to the next word or sequence of 779 * words; thus, we mark that we'll have a child with 780 * exactly one of those. 781 */ 782 parent = eqn_box_alloc(ep, parent); 783 parent->type = EQN_LIST; 784 parent->expectargs = 1; 785 switch (tok) { 786 case EQN_TOK_FAT: 787 parent->font = EQNFONT_FAT; 788 break; 789 case EQN_TOK_ROMAN: 790 parent->font = EQNFONT_ROMAN; 791 break; 792 case EQN_TOK_ITALIC: 793 parent->font = EQNFONT_ITALIC; 794 break; 795 case EQN_TOK_BOLD: 796 parent->font = EQNFONT_BOLD; 797 break; 798 default: 799 abort(); 800 } 801 break; 802 case EQN_TOK_SIZE: 803 case EQN_TOK_GSIZE: 804 /* Accept two values: integral size and a single. */ 805 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 806 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 807 ep->node->pos, "%s", eqn_toks[tok]); 808 break; 809 } 810 size = mandoc_strntoi(ep->start, ep->toksz, 10); 811 if (-1 == size) { 812 mandoc_msg(MANDOCERR_IT_NONUM, ep->node->line, 813 ep->node->pos, "%s", eqn_toks[tok]); 814 break; 815 } 816 if (EQN_TOK_GSIZE == tok) { 817 ep->gsize = size; 818 break; 819 } 820 while (parent->args == parent->expectargs) 821 parent = parent->parent; 822 parent = eqn_box_alloc(ep, parent); 823 parent->type = EQN_LIST; 824 parent->expectargs = 1; 825 parent->size = size; 826 break; 827 case EQN_TOK_FROM: 828 case EQN_TOK_TO: 829 case EQN_TOK_SUB: 830 case EQN_TOK_SUP: 831 /* 832 * We have a left-right-associative expression. 833 * Repivot under a positional node, open a child scope 834 * and keep on reading. 835 */ 836 if (parent->last == NULL) { 837 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 838 ep->node->pos, "%s", eqn_toks[tok]); 839 cur = eqn_box_alloc(ep, parent); 840 cur->type = EQN_TEXT; 841 cur->text = mandoc_strdup(""); 842 } 843 while (parent->expectargs == 1 && parent->args == 1) 844 parent = parent->parent; 845 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 846 for (cur = parent; cur != NULL; cur = cur->parent) 847 if (cur->pos == EQNPOS_SUB || 848 cur->pos == EQNPOS_SUP || 849 cur->pos == EQNPOS_SUBSUP || 850 cur->pos == EQNPOS_SQRT || 851 cur->pos == EQNPOS_OVER) 852 break; 853 if (cur != NULL) 854 parent = cur->parent; 855 } 856 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 857 parent->expectargs = 3; 858 parent->pos = EQNPOS_SUBSUP; 859 break; 860 } 861 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 862 parent->expectargs = 3; 863 parent->pos = EQNPOS_FROMTO; 864 break; 865 } 866 parent = eqn_box_makebinary(ep, parent); 867 switch (tok) { 868 case EQN_TOK_FROM: 869 parent->pos = EQNPOS_FROM; 870 break; 871 case EQN_TOK_TO: 872 parent->pos = EQNPOS_TO; 873 break; 874 case EQN_TOK_SUP: 875 parent->pos = EQNPOS_SUP; 876 break; 877 case EQN_TOK_SUB: 878 parent->pos = EQNPOS_SUB; 879 break; 880 default: 881 abort(); 882 } 883 break; 884 case EQN_TOK_SQRT: 885 while (parent->args == parent->expectargs) 886 parent = parent->parent; 887 /* 888 * Accept a left-right-associative set of arguments just 889 * like sub and sup and friends but without rebalancing 890 * under a pivot. 891 */ 892 parent = eqn_box_alloc(ep, parent); 893 parent->type = EQN_SUBEXPR; 894 parent->pos = EQNPOS_SQRT; 895 parent->expectargs = 1; 896 break; 897 case EQN_TOK_OVER: 898 /* 899 * We have a right-left-associative fraction. 900 * Close out anything that's currently open, then 901 * rebalance and continue reading. 902 */ 903 if (parent->last == NULL) { 904 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->node->line, 905 ep->node->pos, "%s", eqn_toks[tok]); 906 cur = eqn_box_alloc(ep, parent); 907 cur->type = EQN_TEXT; 908 cur->text = mandoc_strdup(""); 909 } 910 while (parent->args == parent->expectargs) 911 parent = parent->parent; 912 while (EQN_SUBEXPR == parent->type) 913 parent = parent->parent; 914 parent = eqn_box_makebinary(ep, parent); 915 parent->pos = EQNPOS_OVER; 916 break; 917 case EQN_TOK_RIGHT: 918 case EQN_TOK_BRACE_CLOSE: 919 /* 920 * Close out the existing brace. 921 * FIXME: this is a shitty sentinel: we should really 922 * have a native EQN_BRACE type or whatnot. 923 */ 924 for (cur = parent; cur != NULL; cur = cur->parent) 925 if (cur->type == EQN_LIST && 926 cur->expectargs > 1 && 927 (tok == EQN_TOK_BRACE_CLOSE || 928 cur->left != NULL)) 929 break; 930 if (cur == NULL) { 931 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->node->line, 932 ep->node->pos, "%s", eqn_toks[tok]); 933 break; 934 } 935 parent = cur; 936 if (EQN_TOK_RIGHT == tok) { 937 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 938 mandoc_msg(MANDOCERR_REQ_EMPTY, 939 ep->node->line, ep->node->pos, 940 "%s", eqn_toks[tok]); 941 break; 942 } 943 /* Handling depends on right/left. */ 944 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 945 parent->right = mandoc_strdup("\\[rc]"); 946 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 947 parent->right = mandoc_strdup("\\[rf]"); 948 else 949 parent->right = 950 mandoc_strndup(ep->start, ep->toksz); 951 } 952 parent = parent->parent; 953 if (tok == EQN_TOK_BRACE_CLOSE && 954 (parent->type == EQN_PILE || 955 parent->type == EQN_MATRIX)) 956 parent = parent->parent; 957 /* Close out any "singleton" lists. */ 958 while (parent->type == EQN_LIST && 959 parent->expectargs == 1 && 960 parent->args == 1) 961 parent = parent->parent; 962 break; 963 case EQN_TOK_BRACE_OPEN: 964 case EQN_TOK_LEFT: 965 /* 966 * If we already have something in the stack and we're 967 * in an expression, then rewind til we're not any more 968 * (just like with the text node). 969 */ 970 while (parent->args == parent->expectargs) 971 parent = parent->parent; 972 if (EQN_TOK_LEFT == tok && 973 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 974 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->node->line, 975 ep->node->pos, "%s", eqn_toks[tok]); 976 break; 977 } 978 parent = eqn_box_alloc(ep, parent); 979 parent->type = EQN_LIST; 980 if (EQN_TOK_LEFT == tok) { 981 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 982 parent->left = mandoc_strdup("\\[lc]"); 983 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 984 parent->left = mandoc_strdup("\\[lf]"); 985 else 986 parent->left = 987 mandoc_strndup(ep->start, ep->toksz); 988 } 989 break; 990 case EQN_TOK_PILE: 991 case EQN_TOK_LPILE: 992 case EQN_TOK_RPILE: 993 case EQN_TOK_CPILE: 994 case EQN_TOK_CCOL: 995 case EQN_TOK_LCOL: 996 case EQN_TOK_RCOL: 997 while (parent->args == parent->expectargs) 998 parent = parent->parent; 999 parent = eqn_box_alloc(ep, parent); 1000 parent->type = EQN_PILE; 1001 parent->expectargs = 1; 1002 break; 1003 case EQN_TOK_ABOVE: 1004 for (cur = parent; cur != NULL; cur = cur->parent) 1005 if (cur->type == EQN_PILE) 1006 break; 1007 if (cur == NULL) { 1008 mandoc_msg(MANDOCERR_IT_STRAY, ep->node->line, 1009 ep->node->pos, "%s", eqn_toks[tok]); 1010 break; 1011 } 1012 parent = eqn_box_alloc(ep, cur); 1013 parent->type = EQN_LIST; 1014 break; 1015 case EQN_TOK_MATRIX: 1016 while (parent->args == parent->expectargs) 1017 parent = parent->parent; 1018 parent = eqn_box_alloc(ep, parent); 1019 parent->type = EQN_MATRIX; 1020 parent->expectargs = 1; 1021 break; 1022 case EQN_TOK_EOF: 1023 return; 1024 case EQN_TOK__MAX: 1025 case EQN_TOK_FUNC: 1026 case EQN_TOK_QUOTED: 1027 case EQN_TOK_SYM: 1028 p = ep->start; 1029 assert(p != NULL); 1030 /* 1031 * If we already have something in the stack and we're 1032 * in an expression, then rewind til we're not any more. 1033 */ 1034 while (parent->args == parent->expectargs) 1035 parent = parent->parent; 1036 cur = eqn_box_alloc(ep, parent); 1037 cur->type = EQN_TEXT; 1038 cur->text = p; 1039 switch (tok) { 1040 case EQN_TOK_FUNC: 1041 cur->font = EQNFONT_ROMAN; 1042 break; 1043 case EQN_TOK_QUOTED: 1044 if (cur->font == EQNFONT_NONE) 1045 cur->font = EQNFONT_ITALIC; 1046 break; 1047 case EQN_TOK_SYM: 1048 break; 1049 default: 1050 if (cur->font != EQNFONT_NONE || *p == '\0') 1051 break; 1052 cpn = p - 1; 1053 ccln = CCL_LET; 1054 split = NULL; 1055 for (;;) { 1056 /* Advance to next character. */ 1057 cp = cpn++; 1058 ccl = ccln; 1059 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1060 isdigit((unsigned char)*cpn) || 1061 (*cpn == '.' && (ccl == CCL_DIG || 1062 isdigit((unsigned char)cpn[1]))) ? 1063 CCL_DIG : CCL_PUN; 1064 /* No boundary before first character. */ 1065 if (cp < p) 1066 continue; 1067 cur->font = ccl == CCL_LET ? 1068 EQNFONT_ITALIC : EQNFONT_ROMAN; 1069 if (*cp == '\\') 1070 mandoc_escape(&cpn, NULL, NULL); 1071 /* No boundary after last character. */ 1072 if (*cpn == '\0') 1073 break; 1074 if (ccln == ccl && *cp != ',' && *cpn != ',') 1075 continue; 1076 /* Boundary found, split the text. */ 1077 if (parent->args == parent->expectargs) { 1078 /* Remove the text from the tree. */ 1079 if (cur->prev == NULL) 1080 parent->first = cur->next; 1081 else 1082 cur->prev->next = NULL; 1083 parent->last = cur->prev; 1084 parent->args--; 1085 /* Set up a list instead. */ 1086 split = eqn_box_alloc(ep, parent); 1087 split->type = EQN_LIST; 1088 /* Insert the word into the list. */ 1089 split->first = split->last = cur; 1090 cur->parent = split; 1091 cur->prev = NULL; 1092 parent = split; 1093 } 1094 /* Append a new text box. */ 1095 nbox = eqn_box_alloc(ep, parent); 1096 nbox->type = EQN_TEXT; 1097 nbox->text = mandoc_strdup(cpn); 1098 /* Truncate the old box. */ 1099 p = mandoc_strndup(cur->text, 1100 cpn - cur->text); 1101 free(cur->text); 1102 cur->text = p; 1103 /* Setup to process the new box. */ 1104 cur = nbox; 1105 p = nbox->text; 1106 cpn = p - 1; 1107 ccln = CCL_LET; 1108 } 1109 if (split != NULL) 1110 parent = split->parent; 1111 break; 1112 } 1113 break; 1114 default: 1115 abort(); 1116 } 1117 goto next_tok; 1118 } 1119 1120 void 1121 eqn_free(struct eqn_node *p) 1122 { 1123 int i; 1124 1125 if (p == NULL) 1126 return; 1127 1128 for (i = 0; i < (int)p->defsz; i++) { 1129 free(p->defs[i].key); 1130 free(p->defs[i].val); 1131 } 1132 1133 free(p->data); 1134 free(p->defs); 1135 free(p); 1136 } 1137