1 /* $Id: eqn.c,v 1.78 2017/07/15 16:26:17 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2011, 2014 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2014, 2015, 2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <limits.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "libmandoc.h" 34 #include "libroff.h" 35 36 #define EQN_NEST_MAX 128 /* maximum nesting of defines */ 37 #define STRNEQ(p1, sz1, p2, sz2) \ 38 ((sz1) == (sz2) && 0 == strncmp((p1), (p2), (sz1))) 39 40 enum eqn_tok { 41 EQN_TOK_DYAD = 0, 42 EQN_TOK_VEC, 43 EQN_TOK_UNDER, 44 EQN_TOK_BAR, 45 EQN_TOK_TILDE, 46 EQN_TOK_HAT, 47 EQN_TOK_DOT, 48 EQN_TOK_DOTDOT, 49 EQN_TOK_FWD, 50 EQN_TOK_BACK, 51 EQN_TOK_DOWN, 52 EQN_TOK_UP, 53 EQN_TOK_FAT, 54 EQN_TOK_ROMAN, 55 EQN_TOK_ITALIC, 56 EQN_TOK_BOLD, 57 EQN_TOK_SIZE, 58 EQN_TOK_SUB, 59 EQN_TOK_SUP, 60 EQN_TOK_SQRT, 61 EQN_TOK_OVER, 62 EQN_TOK_FROM, 63 EQN_TOK_TO, 64 EQN_TOK_BRACE_OPEN, 65 EQN_TOK_BRACE_CLOSE, 66 EQN_TOK_GSIZE, 67 EQN_TOK_GFONT, 68 EQN_TOK_MARK, 69 EQN_TOK_LINEUP, 70 EQN_TOK_LEFT, 71 EQN_TOK_RIGHT, 72 EQN_TOK_PILE, 73 EQN_TOK_LPILE, 74 EQN_TOK_RPILE, 75 EQN_TOK_CPILE, 76 EQN_TOK_MATRIX, 77 EQN_TOK_CCOL, 78 EQN_TOK_LCOL, 79 EQN_TOK_RCOL, 80 EQN_TOK_DELIM, 81 EQN_TOK_DEFINE, 82 EQN_TOK_TDEFINE, 83 EQN_TOK_NDEFINE, 84 EQN_TOK_UNDEF, 85 EQN_TOK_ABOVE, 86 EQN_TOK__MAX, 87 EQN_TOK_FUNC, 88 EQN_TOK_QUOTED, 89 EQN_TOK_SYM, 90 EQN_TOK_EOF 91 }; 92 93 static const char *eqn_toks[EQN_TOK__MAX] = { 94 "dyad", /* EQN_TOK_DYAD */ 95 "vec", /* EQN_TOK_VEC */ 96 "under", /* EQN_TOK_UNDER */ 97 "bar", /* EQN_TOK_BAR */ 98 "tilde", /* EQN_TOK_TILDE */ 99 "hat", /* EQN_TOK_HAT */ 100 "dot", /* EQN_TOK_DOT */ 101 "dotdot", /* EQN_TOK_DOTDOT */ 102 "fwd", /* EQN_TOK_FWD * */ 103 "back", /* EQN_TOK_BACK */ 104 "down", /* EQN_TOK_DOWN */ 105 "up", /* EQN_TOK_UP */ 106 "fat", /* EQN_TOK_FAT */ 107 "roman", /* EQN_TOK_ROMAN */ 108 "italic", /* EQN_TOK_ITALIC */ 109 "bold", /* EQN_TOK_BOLD */ 110 "size", /* EQN_TOK_SIZE */ 111 "sub", /* EQN_TOK_SUB */ 112 "sup", /* EQN_TOK_SUP */ 113 "sqrt", /* EQN_TOK_SQRT */ 114 "over", /* EQN_TOK_OVER */ 115 "from", /* EQN_TOK_FROM */ 116 "to", /* EQN_TOK_TO */ 117 "{", /* EQN_TOK_BRACE_OPEN */ 118 "}", /* EQN_TOK_BRACE_CLOSE */ 119 "gsize", /* EQN_TOK_GSIZE */ 120 "gfont", /* EQN_TOK_GFONT */ 121 "mark", /* EQN_TOK_MARK */ 122 "lineup", /* EQN_TOK_LINEUP */ 123 "left", /* EQN_TOK_LEFT */ 124 "right", /* EQN_TOK_RIGHT */ 125 "pile", /* EQN_TOK_PILE */ 126 "lpile", /* EQN_TOK_LPILE */ 127 "rpile", /* EQN_TOK_RPILE */ 128 "cpile", /* EQN_TOK_CPILE */ 129 "matrix", /* EQN_TOK_MATRIX */ 130 "ccol", /* EQN_TOK_CCOL */ 131 "lcol", /* EQN_TOK_LCOL */ 132 "rcol", /* EQN_TOK_RCOL */ 133 "delim", /* EQN_TOK_DELIM */ 134 "define", /* EQN_TOK_DEFINE */ 135 "tdefine", /* EQN_TOK_TDEFINE */ 136 "ndefine", /* EQN_TOK_NDEFINE */ 137 "undef", /* EQN_TOK_UNDEF */ 138 "above", /* EQN_TOK_ABOVE */ 139 }; 140 141 static const char *const eqn_func[] = { 142 "acos", "acsc", "and", "arc", "asec", "asin", "atan", 143 "cos", "cosh", "coth", "csc", "det", "exp", "for", 144 "if", "lim", "ln", "log", "max", "min", 145 "sec", "sin", "sinh", "tan", "tanh", "Im", "Re", 146 }; 147 148 enum eqn_symt { 149 EQNSYM_alpha = 0, 150 EQNSYM_beta, 151 EQNSYM_chi, 152 EQNSYM_delta, 153 EQNSYM_epsilon, 154 EQNSYM_eta, 155 EQNSYM_gamma, 156 EQNSYM_iota, 157 EQNSYM_kappa, 158 EQNSYM_lambda, 159 EQNSYM_mu, 160 EQNSYM_nu, 161 EQNSYM_omega, 162 EQNSYM_omicron, 163 EQNSYM_phi, 164 EQNSYM_pi, 165 EQNSYM_ps, 166 EQNSYM_rho, 167 EQNSYM_sigma, 168 EQNSYM_tau, 169 EQNSYM_theta, 170 EQNSYM_upsilon, 171 EQNSYM_xi, 172 EQNSYM_zeta, 173 EQNSYM_DELTA, 174 EQNSYM_GAMMA, 175 EQNSYM_LAMBDA, 176 EQNSYM_OMEGA, 177 EQNSYM_PHI, 178 EQNSYM_PI, 179 EQNSYM_PSI, 180 EQNSYM_SIGMA, 181 EQNSYM_THETA, 182 EQNSYM_UPSILON, 183 EQNSYM_XI, 184 EQNSYM_inter, 185 EQNSYM_union, 186 EQNSYM_prod, 187 EQNSYM_int, 188 EQNSYM_sum, 189 EQNSYM_grad, 190 EQNSYM_del, 191 EQNSYM_times, 192 EQNSYM_cdot, 193 EQNSYM_nothing, 194 EQNSYM_approx, 195 EQNSYM_prime, 196 EQNSYM_half, 197 EQNSYM_partial, 198 EQNSYM_inf, 199 EQNSYM_muchgreat, 200 EQNSYM_muchless, 201 EQNSYM_larrow, 202 EQNSYM_rarrow, 203 EQNSYM_pm, 204 EQNSYM_nequal, 205 EQNSYM_equiv, 206 EQNSYM_lessequal, 207 EQNSYM_moreequal, 208 EQNSYM_minus, 209 EQNSYM__MAX 210 }; 211 212 struct eqnsym { 213 const char *str; 214 const char *sym; 215 }; 216 217 static const struct eqnsym eqnsyms[EQNSYM__MAX] = { 218 { "alpha", "*a" }, /* EQNSYM_alpha */ 219 { "beta", "*b" }, /* EQNSYM_beta */ 220 { "chi", "*x" }, /* EQNSYM_chi */ 221 { "delta", "*d" }, /* EQNSYM_delta */ 222 { "epsilon", "*e" }, /* EQNSYM_epsilon */ 223 { "eta", "*y" }, /* EQNSYM_eta */ 224 { "gamma", "*g" }, /* EQNSYM_gamma */ 225 { "iota", "*i" }, /* EQNSYM_iota */ 226 { "kappa", "*k" }, /* EQNSYM_kappa */ 227 { "lambda", "*l" }, /* EQNSYM_lambda */ 228 { "mu", "*m" }, /* EQNSYM_mu */ 229 { "nu", "*n" }, /* EQNSYM_nu */ 230 { "omega", "*w" }, /* EQNSYM_omega */ 231 { "omicron", "*o" }, /* EQNSYM_omicron */ 232 { "phi", "*f" }, /* EQNSYM_phi */ 233 { "pi", "*p" }, /* EQNSYM_pi */ 234 { "psi", "*q" }, /* EQNSYM_psi */ 235 { "rho", "*r" }, /* EQNSYM_rho */ 236 { "sigma", "*s" }, /* EQNSYM_sigma */ 237 { "tau", "*t" }, /* EQNSYM_tau */ 238 { "theta", "*h" }, /* EQNSYM_theta */ 239 { "upsilon", "*u" }, /* EQNSYM_upsilon */ 240 { "xi", "*c" }, /* EQNSYM_xi */ 241 { "zeta", "*z" }, /* EQNSYM_zeta */ 242 { "DELTA", "*D" }, /* EQNSYM_DELTA */ 243 { "GAMMA", "*G" }, /* EQNSYM_GAMMA */ 244 { "LAMBDA", "*L" }, /* EQNSYM_LAMBDA */ 245 { "OMEGA", "*W" }, /* EQNSYM_OMEGA */ 246 { "PHI", "*F" }, /* EQNSYM_PHI */ 247 { "PI", "*P" }, /* EQNSYM_PI */ 248 { "PSI", "*Q" }, /* EQNSYM_PSI */ 249 { "SIGMA", "*S" }, /* EQNSYM_SIGMA */ 250 { "THETA", "*H" }, /* EQNSYM_THETA */ 251 { "UPSILON", "*U" }, /* EQNSYM_UPSILON */ 252 { "XI", "*C" }, /* EQNSYM_XI */ 253 { "inter", "ca" }, /* EQNSYM_inter */ 254 { "union", "cu" }, /* EQNSYM_union */ 255 { "prod", "product" }, /* EQNSYM_prod */ 256 { "int", "integral" }, /* EQNSYM_int */ 257 { "sum", "sum" }, /* EQNSYM_sum */ 258 { "grad", "gr" }, /* EQNSYM_grad */ 259 { "del", "gr" }, /* EQNSYM_del */ 260 { "times", "mu" }, /* EQNSYM_times */ 261 { "cdot", "pc" }, /* EQNSYM_cdot */ 262 { "nothing", "&" }, /* EQNSYM_nothing */ 263 { "approx", "~~" }, /* EQNSYM_approx */ 264 { "prime", "fm" }, /* EQNSYM_prime */ 265 { "half", "12" }, /* EQNSYM_half */ 266 { "partial", "pd" }, /* EQNSYM_partial */ 267 { "inf", "if" }, /* EQNSYM_inf */ 268 { ">>", ">>" }, /* EQNSYM_muchgreat */ 269 { "<<", "<<" }, /* EQNSYM_muchless */ 270 { "<-", "<-" }, /* EQNSYM_larrow */ 271 { "->", "->" }, /* EQNSYM_rarrow */ 272 { "+-", "+-" }, /* EQNSYM_pm */ 273 { "!=", "!=" }, /* EQNSYM_nequal */ 274 { "==", "==" }, /* EQNSYM_equiv */ 275 { "<=", "<=" }, /* EQNSYM_lessequal */ 276 { ">=", ">=" }, /* EQNSYM_moreequal */ 277 { "-", "mi" }, /* EQNSYM_minus */ 278 }; 279 280 enum parse_mode { 281 MODE_QUOTED, 282 MODE_NOSUB, 283 MODE_SUB, 284 MODE_TOK 285 }; 286 287 static struct eqn_box *eqn_box_alloc(struct eqn_node *, struct eqn_box *); 288 static struct eqn_box *eqn_box_makebinary(struct eqn_node *, 289 struct eqn_box *); 290 static void eqn_def(struct eqn_node *); 291 static struct eqn_def *eqn_def_find(struct eqn_node *); 292 static void eqn_delim(struct eqn_node *); 293 static enum eqn_tok eqn_next(struct eqn_node *, enum parse_mode); 294 static void eqn_undef(struct eqn_node *); 295 296 297 struct eqn_node * 298 eqn_alloc(struct mparse *parse) 299 { 300 struct eqn_node *ep; 301 302 ep = mandoc_calloc(1, sizeof(*ep)); 303 ep->parse = parse; 304 ep->gsize = EQN_DEFSIZE; 305 return ep; 306 } 307 308 void 309 eqn_reset(struct eqn_node *ep) 310 { 311 free(ep->data); 312 ep->data = ep->start = ep->end = NULL; 313 ep->sz = ep->toksz = 0; 314 } 315 316 void 317 eqn_read(struct eqn_node *ep, const char *p) 318 { 319 char *cp; 320 321 if (ep->data == NULL) { 322 ep->sz = strlen(p); 323 ep->data = mandoc_strdup(p); 324 } else { 325 ep->sz = mandoc_asprintf(&cp, "%s %s", ep->data, p); 326 free(ep->data); 327 ep->data = cp; 328 } 329 ep->sz += 1; 330 } 331 332 /* 333 * Find the key "key" of the give size within our eqn-defined values. 334 */ 335 static struct eqn_def * 336 eqn_def_find(struct eqn_node *ep) 337 { 338 int i; 339 340 for (i = 0; i < (int)ep->defsz; i++) 341 if (ep->defs[i].keysz && STRNEQ(ep->defs[i].key, 342 ep->defs[i].keysz, ep->start, ep->toksz)) 343 return &ep->defs[i]; 344 345 return NULL; 346 } 347 348 /* 349 * Parse a token from the input text. The modes are: 350 * MODE_QUOTED: Use *ep->start as the delimiter; the token ends 351 * before its next occurence. Do not interpret the token in any 352 * way and return EQN_TOK_QUOTED. All other modes behave like 353 * MODE_QUOTED when *ep->start is '"'. 354 * MODE_NOSUB: If *ep->start is a curly brace, the token ends after it; 355 * otherwise, it ends before the next whitespace or brace. 356 * Do not interpret the token and return EQN_TOK__MAX. 357 * MODE_SUB: Like MODE_NOSUB, but try to interpret the token as an 358 * alias created with define. If it is an alias, replace it with 359 * its string value and reparse. 360 * MODE_TOK: Like MODE_SUB, but also check the token against the list 361 * of tokens, and if there is a match, return that token. Otherwise, 362 * if the token matches a symbol, return EQN_TOK_SYM; if it matches 363 * a function name, EQN_TOK_FUNC, or else EQN_TOK__MAX. Except for 364 * a token match, *ep->start is set to an allocated string that the 365 * caller is expected to free. 366 * All modes skip whitespace following the end of the token. 367 */ 368 static enum eqn_tok 369 eqn_next(struct eqn_node *ep, enum parse_mode mode) 370 { 371 static int last_len, lim; 372 373 struct eqn_def *def; 374 size_t start; 375 int diff, i, quoted; 376 enum eqn_tok tok; 377 378 /* 379 * Reset the recursion counter after advancing 380 * beyond the end of the previous substitution. 381 */ 382 if (ep->end - ep->data >= last_len) 383 lim = 0; 384 385 ep->start = ep->end; 386 quoted = mode == MODE_QUOTED; 387 for (;;) { 388 switch (*ep->start) { 389 case '\0': 390 ep->toksz = 0; 391 return EQN_TOK_EOF; 392 case '"': 393 quoted = 1; 394 break; 395 default: 396 break; 397 } 398 if (quoted) { 399 ep->end = strchr(ep->start + 1, *ep->start); 400 ep->start++; /* Skip opening quote. */ 401 if (ep->end == NULL) { 402 mandoc_msg(MANDOCERR_ARG_QUOTE, ep->parse, 403 ep->node->line, ep->node->pos, NULL); 404 ep->end = strchr(ep->start, '\0'); 405 } 406 } else { 407 ep->end = ep->start + 1; 408 if (*ep->start != '{' && *ep->start != '}') 409 ep->end += strcspn(ep->end, " ^~\"{}\t"); 410 } 411 ep->toksz = ep->end - ep->start; 412 if (quoted && *ep->end != '\0') 413 ep->end++; /* Skip closing quote. */ 414 while (*ep->end != '\0' && strchr(" \t^~", *ep->end) != NULL) 415 ep->end++; 416 if (quoted) /* Cannot return, may have to strndup. */ 417 break; 418 if (mode == MODE_NOSUB) 419 return EQN_TOK__MAX; 420 if ((def = eqn_def_find(ep)) == NULL) 421 break; 422 if (++lim > EQN_NEST_MAX) { 423 mandoc_msg(MANDOCERR_ROFFLOOP, ep->parse, 424 ep->node->line, ep->node->pos, NULL); 425 return EQN_TOK_EOF; 426 } 427 428 /* Replace a defined name with its string value. */ 429 if ((diff = def->valsz - ep->toksz) > 0) { 430 start = ep->start - ep->data; 431 ep->sz += diff; 432 ep->data = mandoc_realloc(ep->data, ep->sz + 1); 433 ep->start = ep->data + start; 434 } 435 if (diff) 436 memmove(ep->start + def->valsz, ep->start + ep->toksz, 437 strlen(ep->start + ep->toksz) + 1); 438 memcpy(ep->start, def->val, def->valsz); 439 last_len = ep->start - ep->data + def->valsz; 440 } 441 if (mode != MODE_TOK) 442 return quoted ? EQN_TOK_QUOTED : EQN_TOK__MAX; 443 if (quoted) { 444 ep->start = mandoc_strndup(ep->start, ep->toksz); 445 return EQN_TOK_QUOTED; 446 } 447 for (tok = 0; tok < EQN_TOK__MAX; tok++) 448 if (STRNEQ(ep->start, ep->toksz, 449 eqn_toks[tok], strlen(eqn_toks[tok]))) 450 return tok; 451 452 for (i = 0; i < EQNSYM__MAX; i++) { 453 if (STRNEQ(ep->start, ep->toksz, 454 eqnsyms[i].str, strlen(eqnsyms[i].str))) { 455 mandoc_asprintf(&ep->start, 456 "\\[%s]", eqnsyms[i].sym); 457 return EQN_TOK_SYM; 458 } 459 } 460 ep->start = mandoc_strndup(ep->start, ep->toksz); 461 for (i = 0; i < (int)(sizeof(eqn_func)/sizeof(*eqn_func)); i++) 462 if (STRNEQ(ep->start, ep->toksz, 463 eqn_func[i], strlen(eqn_func[i]))) 464 return EQN_TOK_FUNC; 465 return EQN_TOK__MAX; 466 } 467 468 void 469 eqn_box_free(struct eqn_box *bp) 470 { 471 472 if (bp->first) 473 eqn_box_free(bp->first); 474 if (bp->next) 475 eqn_box_free(bp->next); 476 477 free(bp->text); 478 free(bp->left); 479 free(bp->right); 480 free(bp->top); 481 free(bp->bottom); 482 free(bp); 483 } 484 485 /* 486 * Allocate a box as the last child of the parent node. 487 */ 488 static struct eqn_box * 489 eqn_box_alloc(struct eqn_node *ep, struct eqn_box *parent) 490 { 491 struct eqn_box *bp; 492 493 bp = mandoc_calloc(1, sizeof(struct eqn_box)); 494 bp->parent = parent; 495 bp->parent->args++; 496 bp->expectargs = UINT_MAX; 497 bp->font = bp->parent->font; 498 bp->size = ep->gsize; 499 500 if (NULL != parent->first) { 501 parent->last->next = bp; 502 bp->prev = parent->last; 503 } else 504 parent->first = bp; 505 506 parent->last = bp; 507 return bp; 508 } 509 510 /* 511 * Reparent the current last node (of the current parent) under a new 512 * EQN_SUBEXPR as the first element. 513 * Then return the new parent. 514 * The new EQN_SUBEXPR will have a two-child limit. 515 */ 516 static struct eqn_box * 517 eqn_box_makebinary(struct eqn_node *ep, struct eqn_box *parent) 518 { 519 struct eqn_box *b, *newb; 520 521 assert(NULL != parent->last); 522 b = parent->last; 523 if (parent->last == parent->first) 524 parent->first = NULL; 525 parent->args--; 526 parent->last = b->prev; 527 b->prev = NULL; 528 newb = eqn_box_alloc(ep, parent); 529 newb->type = EQN_SUBEXPR; 530 newb->expectargs = 2; 531 newb->args = 1; 532 newb->first = newb->last = b; 533 newb->first->next = NULL; 534 b->parent = newb; 535 return newb; 536 } 537 538 /* 539 * Parse the "delim" control statement. 540 */ 541 static void 542 eqn_delim(struct eqn_node *ep) 543 { 544 if (ep->end[0] == '\0' || ep->end[1] == '\0') { 545 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 546 ep->node->line, ep->node->pos, "delim"); 547 if (ep->end[0] != '\0') 548 ep->end++; 549 } else if (strncmp(ep->end, "off", 3) == 0) { 550 ep->delim = 0; 551 ep->end += 3; 552 } else if (strncmp(ep->end, "on", 2) == 0) { 553 if (ep->odelim && ep->cdelim) 554 ep->delim = 1; 555 ep->end += 2; 556 } else { 557 ep->odelim = *ep->end++; 558 ep->cdelim = *ep->end++; 559 ep->delim = 1; 560 } 561 } 562 563 /* 564 * Undefine a previously-defined string. 565 */ 566 static void 567 eqn_undef(struct eqn_node *ep) 568 { 569 struct eqn_def *def; 570 571 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 572 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 573 ep->node->line, ep->node->pos, "undef"); 574 return; 575 } 576 if ((def = eqn_def_find(ep)) == NULL) 577 return; 578 free(def->key); 579 free(def->val); 580 def->key = def->val = NULL; 581 def->keysz = def->valsz = 0; 582 } 583 584 static void 585 eqn_def(struct eqn_node *ep) 586 { 587 struct eqn_def *def; 588 int i; 589 590 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF) { 591 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 592 ep->node->line, ep->node->pos, "define"); 593 return; 594 } 595 596 /* 597 * Search for a key that already exists. 598 * Create a new key if none is found. 599 */ 600 if ((def = eqn_def_find(ep)) == NULL) { 601 /* Find holes in string array. */ 602 for (i = 0; i < (int)ep->defsz; i++) 603 if (0 == ep->defs[i].keysz) 604 break; 605 606 if (i == (int)ep->defsz) { 607 ep->defsz++; 608 ep->defs = mandoc_reallocarray(ep->defs, 609 ep->defsz, sizeof(struct eqn_def)); 610 ep->defs[i].key = ep->defs[i].val = NULL; 611 } 612 613 def = ep->defs + i; 614 free(def->key); 615 def->key = mandoc_strndup(ep->start, ep->toksz); 616 def->keysz = ep->toksz; 617 } 618 619 if (eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) { 620 mandoc_vmsg(MANDOCERR_REQ_EMPTY, ep->parse, 621 ep->node->line, ep->node->pos, "define %s", def->key); 622 free(def->key); 623 free(def->val); 624 def->key = def->val = NULL; 625 def->keysz = def->valsz = 0; 626 return; 627 } 628 free(def->val); 629 def->val = mandoc_strndup(ep->start, ep->toksz); 630 def->valsz = ep->toksz; 631 } 632 633 void 634 eqn_parse(struct eqn_node *ep) 635 { 636 struct eqn_box *cur, *nbox, *parent, *split; 637 const char *cp, *cpn; 638 char *p; 639 enum eqn_tok tok; 640 enum { CCL_LET, CCL_DIG, CCL_PUN } ccl, ccln; 641 int size; 642 643 parent = ep->node->eqn; 644 assert(parent != NULL); 645 646 /* 647 * Empty equation. 648 * Do not add it to the high-level syntax tree. 649 */ 650 651 if (ep->data == NULL) 652 return; 653 654 ep->start = ep->end = ep->data + strspn(ep->data, " ^~"); 655 656 next_tok: 657 tok = eqn_next(ep, MODE_TOK); 658 switch (tok) { 659 case EQN_TOK_UNDEF: 660 eqn_undef(ep); 661 break; 662 case EQN_TOK_NDEFINE: 663 case EQN_TOK_DEFINE: 664 eqn_def(ep); 665 break; 666 case EQN_TOK_TDEFINE: 667 if (eqn_next(ep, MODE_NOSUB) == EQN_TOK_EOF || 668 eqn_next(ep, MODE_QUOTED) == EQN_TOK_EOF) 669 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 670 ep->node->line, ep->node->pos, "tdefine"); 671 break; 672 case EQN_TOK_DELIM: 673 eqn_delim(ep); 674 break; 675 case EQN_TOK_GFONT: 676 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 677 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 678 ep->node->line, ep->node->pos, eqn_toks[tok]); 679 break; 680 case EQN_TOK_MARK: 681 case EQN_TOK_LINEUP: 682 /* Ignore these. */ 683 break; 684 case EQN_TOK_DYAD: 685 case EQN_TOK_VEC: 686 case EQN_TOK_UNDER: 687 case EQN_TOK_BAR: 688 case EQN_TOK_TILDE: 689 case EQN_TOK_HAT: 690 case EQN_TOK_DOT: 691 case EQN_TOK_DOTDOT: 692 if (parent->last == NULL) { 693 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 694 ep->node->line, ep->node->pos, eqn_toks[tok]); 695 cur = eqn_box_alloc(ep, parent); 696 cur->type = EQN_TEXT; 697 cur->text = mandoc_strdup(""); 698 } 699 parent = eqn_box_makebinary(ep, parent); 700 parent->type = EQN_LIST; 701 parent->expectargs = 1; 702 parent->font = EQNFONT_ROMAN; 703 switch (tok) { 704 case EQN_TOK_DOTDOT: 705 parent->top = mandoc_strdup("\\[ad]"); 706 break; 707 case EQN_TOK_VEC: 708 parent->top = mandoc_strdup("\\[->]"); 709 break; 710 case EQN_TOK_DYAD: 711 parent->top = mandoc_strdup("\\[<>]"); 712 break; 713 case EQN_TOK_TILDE: 714 parent->top = mandoc_strdup("\\[a~]"); 715 break; 716 case EQN_TOK_UNDER: 717 parent->bottom = mandoc_strdup("\\[ul]"); 718 break; 719 case EQN_TOK_BAR: 720 parent->top = mandoc_strdup("\\[rn]"); 721 break; 722 case EQN_TOK_DOT: 723 parent->top = mandoc_strdup("\\[a.]"); 724 break; 725 case EQN_TOK_HAT: 726 parent->top = mandoc_strdup("\\[ha]"); 727 break; 728 default: 729 abort(); 730 } 731 parent = parent->parent; 732 break; 733 case EQN_TOK_FWD: 734 case EQN_TOK_BACK: 735 case EQN_TOK_DOWN: 736 case EQN_TOK_UP: 737 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) 738 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 739 ep->node->line, ep->node->pos, eqn_toks[tok]); 740 break; 741 case EQN_TOK_FAT: 742 case EQN_TOK_ROMAN: 743 case EQN_TOK_ITALIC: 744 case EQN_TOK_BOLD: 745 while (parent->args == parent->expectargs) 746 parent = parent->parent; 747 /* 748 * These values apply to the next word or sequence of 749 * words; thus, we mark that we'll have a child with 750 * exactly one of those. 751 */ 752 parent = eqn_box_alloc(ep, parent); 753 parent->type = EQN_LIST; 754 parent->expectargs = 1; 755 switch (tok) { 756 case EQN_TOK_FAT: 757 parent->font = EQNFONT_FAT; 758 break; 759 case EQN_TOK_ROMAN: 760 parent->font = EQNFONT_ROMAN; 761 break; 762 case EQN_TOK_ITALIC: 763 parent->font = EQNFONT_ITALIC; 764 break; 765 case EQN_TOK_BOLD: 766 parent->font = EQNFONT_BOLD; 767 break; 768 default: 769 abort(); 770 } 771 break; 772 case EQN_TOK_SIZE: 773 case EQN_TOK_GSIZE: 774 /* Accept two values: integral size and a single. */ 775 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 776 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 777 ep->node->line, ep->node->pos, eqn_toks[tok]); 778 break; 779 } 780 size = mandoc_strntoi(ep->start, ep->toksz, 10); 781 if (-1 == size) { 782 mandoc_msg(MANDOCERR_IT_NONUM, ep->parse, 783 ep->node->line, ep->node->pos, eqn_toks[tok]); 784 break; 785 } 786 if (EQN_TOK_GSIZE == tok) { 787 ep->gsize = size; 788 break; 789 } 790 while (parent->args == parent->expectargs) 791 parent = parent->parent; 792 parent = eqn_box_alloc(ep, parent); 793 parent->type = EQN_LIST; 794 parent->expectargs = 1; 795 parent->size = size; 796 break; 797 case EQN_TOK_FROM: 798 case EQN_TOK_TO: 799 case EQN_TOK_SUB: 800 case EQN_TOK_SUP: 801 /* 802 * We have a left-right-associative expression. 803 * Repivot under a positional node, open a child scope 804 * and keep on reading. 805 */ 806 if (parent->last == NULL) { 807 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 808 ep->node->line, ep->node->pos, eqn_toks[tok]); 809 cur = eqn_box_alloc(ep, parent); 810 cur->type = EQN_TEXT; 811 cur->text = mandoc_strdup(""); 812 } 813 while (parent->expectargs == 1 && parent->args == 1) 814 parent = parent->parent; 815 if (tok == EQN_TOK_FROM || tok == EQN_TOK_TO) { 816 for (cur = parent; cur != NULL; cur = cur->parent) 817 if (cur->pos == EQNPOS_SUB || 818 cur->pos == EQNPOS_SUP || 819 cur->pos == EQNPOS_SUBSUP || 820 cur->pos == EQNPOS_SQRT || 821 cur->pos == EQNPOS_OVER) 822 break; 823 if (cur != NULL) 824 parent = cur->parent; 825 } 826 if (tok == EQN_TOK_SUP && parent->pos == EQNPOS_SUB) { 827 parent->expectargs = 3; 828 parent->pos = EQNPOS_SUBSUP; 829 break; 830 } 831 if (tok == EQN_TOK_TO && parent->pos == EQNPOS_FROM) { 832 parent->expectargs = 3; 833 parent->pos = EQNPOS_FROMTO; 834 break; 835 } 836 parent = eqn_box_makebinary(ep, parent); 837 switch (tok) { 838 case EQN_TOK_FROM: 839 parent->pos = EQNPOS_FROM; 840 break; 841 case EQN_TOK_TO: 842 parent->pos = EQNPOS_TO; 843 break; 844 case EQN_TOK_SUP: 845 parent->pos = EQNPOS_SUP; 846 break; 847 case EQN_TOK_SUB: 848 parent->pos = EQNPOS_SUB; 849 break; 850 default: 851 abort(); 852 } 853 break; 854 case EQN_TOK_SQRT: 855 while (parent->args == parent->expectargs) 856 parent = parent->parent; 857 /* 858 * Accept a left-right-associative set of arguments just 859 * like sub and sup and friends but without rebalancing 860 * under a pivot. 861 */ 862 parent = eqn_box_alloc(ep, parent); 863 parent->type = EQN_SUBEXPR; 864 parent->pos = EQNPOS_SQRT; 865 parent->expectargs = 1; 866 break; 867 case EQN_TOK_OVER: 868 /* 869 * We have a right-left-associative fraction. 870 * Close out anything that's currently open, then 871 * rebalance and continue reading. 872 */ 873 if (parent->last == NULL) { 874 mandoc_msg(MANDOCERR_EQN_NOBOX, ep->parse, 875 ep->node->line, ep->node->pos, eqn_toks[tok]); 876 cur = eqn_box_alloc(ep, parent); 877 cur->type = EQN_TEXT; 878 cur->text = mandoc_strdup(""); 879 } 880 while (parent->args == parent->expectargs) 881 parent = parent->parent; 882 while (EQN_SUBEXPR == parent->type) 883 parent = parent->parent; 884 parent = eqn_box_makebinary(ep, parent); 885 parent->pos = EQNPOS_OVER; 886 break; 887 case EQN_TOK_RIGHT: 888 case EQN_TOK_BRACE_CLOSE: 889 /* 890 * Close out the existing brace. 891 * FIXME: this is a shitty sentinel: we should really 892 * have a native EQN_BRACE type or whatnot. 893 */ 894 for (cur = parent; cur != NULL; cur = cur->parent) 895 if (cur->type == EQN_LIST && 896 cur->expectargs > 1 && 897 (tok == EQN_TOK_BRACE_CLOSE || 898 cur->left != NULL)) 899 break; 900 if (cur == NULL) { 901 mandoc_msg(MANDOCERR_BLK_NOTOPEN, ep->parse, 902 ep->node->line, ep->node->pos, eqn_toks[tok]); 903 break; 904 } 905 parent = cur; 906 if (EQN_TOK_RIGHT == tok) { 907 if (eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 908 mandoc_msg(MANDOCERR_REQ_EMPTY, 909 ep->parse, ep->node->line, 910 ep->node->pos, eqn_toks[tok]); 911 break; 912 } 913 /* Handling depends on right/left. */ 914 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 915 parent->right = mandoc_strdup("\\[rc]"); 916 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 917 parent->right = mandoc_strdup("\\[rf]"); 918 else 919 parent->right = 920 mandoc_strndup(ep->start, ep->toksz); 921 } 922 parent = parent->parent; 923 if (tok == EQN_TOK_BRACE_CLOSE && 924 (parent->type == EQN_PILE || 925 parent->type == EQN_MATRIX)) 926 parent = parent->parent; 927 /* Close out any "singleton" lists. */ 928 while (parent->type == EQN_LIST && 929 parent->expectargs == 1 && 930 parent->args == 1) 931 parent = parent->parent; 932 break; 933 case EQN_TOK_BRACE_OPEN: 934 case EQN_TOK_LEFT: 935 /* 936 * If we already have something in the stack and we're 937 * in an expression, then rewind til we're not any more 938 * (just like with the text node). 939 */ 940 while (parent->args == parent->expectargs) 941 parent = parent->parent; 942 if (EQN_TOK_LEFT == tok && 943 eqn_next(ep, MODE_SUB) == EQN_TOK_EOF) { 944 mandoc_msg(MANDOCERR_REQ_EMPTY, ep->parse, 945 ep->node->line, ep->node->pos, eqn_toks[tok]); 946 break; 947 } 948 parent = eqn_box_alloc(ep, parent); 949 parent->type = EQN_LIST; 950 if (EQN_TOK_LEFT == tok) { 951 if (STRNEQ(ep->start, ep->toksz, "ceiling", 7)) 952 parent->left = mandoc_strdup("\\[lc]"); 953 else if (STRNEQ(ep->start, ep->toksz, "floor", 5)) 954 parent->left = mandoc_strdup("\\[lf]"); 955 else 956 parent->left = 957 mandoc_strndup(ep->start, ep->toksz); 958 } 959 break; 960 case EQN_TOK_PILE: 961 case EQN_TOK_LPILE: 962 case EQN_TOK_RPILE: 963 case EQN_TOK_CPILE: 964 case EQN_TOK_CCOL: 965 case EQN_TOK_LCOL: 966 case EQN_TOK_RCOL: 967 while (parent->args == parent->expectargs) 968 parent = parent->parent; 969 parent = eqn_box_alloc(ep, parent); 970 parent->type = EQN_PILE; 971 parent->expectargs = 1; 972 break; 973 case EQN_TOK_ABOVE: 974 for (cur = parent; cur != NULL; cur = cur->parent) 975 if (cur->type == EQN_PILE) 976 break; 977 if (cur == NULL) { 978 mandoc_msg(MANDOCERR_IT_STRAY, ep->parse, 979 ep->node->line, ep->node->pos, eqn_toks[tok]); 980 break; 981 } 982 parent = eqn_box_alloc(ep, cur); 983 parent->type = EQN_LIST; 984 break; 985 case EQN_TOK_MATRIX: 986 while (parent->args == parent->expectargs) 987 parent = parent->parent; 988 parent = eqn_box_alloc(ep, parent); 989 parent->type = EQN_MATRIX; 990 parent->expectargs = 1; 991 break; 992 case EQN_TOK_EOF: 993 return; 994 case EQN_TOK__MAX: 995 case EQN_TOK_FUNC: 996 case EQN_TOK_QUOTED: 997 case EQN_TOK_SYM: 998 p = ep->start; 999 assert(p != NULL); 1000 /* 1001 * If we already have something in the stack and we're 1002 * in an expression, then rewind til we're not any more. 1003 */ 1004 while (parent->args == parent->expectargs) 1005 parent = parent->parent; 1006 cur = eqn_box_alloc(ep, parent); 1007 cur->type = EQN_TEXT; 1008 cur->text = p; 1009 switch (tok) { 1010 case EQN_TOK_FUNC: 1011 cur->font = EQNFONT_ROMAN; 1012 break; 1013 case EQN_TOK_QUOTED: 1014 if (cur->font == EQNFONT_NONE) 1015 cur->font = EQNFONT_ITALIC; 1016 break; 1017 case EQN_TOK_SYM: 1018 break; 1019 default: 1020 if (cur->font != EQNFONT_NONE || *p == '\0') 1021 break; 1022 cpn = p - 1; 1023 ccln = CCL_LET; 1024 split = NULL; 1025 for (;;) { 1026 /* Advance to next character. */ 1027 cp = cpn++; 1028 ccl = ccln; 1029 ccln = isalpha((unsigned char)*cpn) ? CCL_LET : 1030 isdigit((unsigned char)*cpn) || 1031 (*cpn == '.' && (ccl == CCL_DIG || 1032 isdigit((unsigned char)cpn[1]))) ? 1033 CCL_DIG : CCL_PUN; 1034 /* No boundary before first character. */ 1035 if (cp < p) 1036 continue; 1037 cur->font = ccl == CCL_LET ? 1038 EQNFONT_ITALIC : EQNFONT_ROMAN; 1039 if (*cp == '\\') 1040 mandoc_escape(&cpn, NULL, NULL); 1041 /* No boundary after last character. */ 1042 if (*cpn == '\0') 1043 break; 1044 if (ccln == ccl && *cp != ',' && *cpn != ',') 1045 continue; 1046 /* Boundary found, split the text. */ 1047 if (parent->args == parent->expectargs) { 1048 /* Remove the text from the tree. */ 1049 if (cur->prev == NULL) 1050 parent->first = cur->next; 1051 else 1052 cur->prev->next = NULL; 1053 parent->last = cur->prev; 1054 parent->args--; 1055 /* Set up a list instead. */ 1056 split = eqn_box_alloc(ep, parent); 1057 split->type = EQN_LIST; 1058 /* Insert the word into the list. */ 1059 split->first = split->last = cur; 1060 cur->parent = split; 1061 cur->prev = NULL; 1062 parent = split; 1063 } 1064 /* Append a new text box. */ 1065 nbox = eqn_box_alloc(ep, parent); 1066 nbox->type = EQN_TEXT; 1067 nbox->text = mandoc_strdup(cpn); 1068 /* Truncate the old box. */ 1069 p = mandoc_strndup(cur->text, 1070 cpn - cur->text); 1071 free(cur->text); 1072 cur->text = p; 1073 /* Setup to process the new box. */ 1074 cur = nbox; 1075 p = nbox->text; 1076 cpn = p - 1; 1077 ccln = CCL_LET; 1078 } 1079 if (split != NULL) 1080 parent = split->parent; 1081 break; 1082 } 1083 break; 1084 default: 1085 abort(); 1086 } 1087 goto next_tok; 1088 } 1089 1090 void 1091 eqn_free(struct eqn_node *p) 1092 { 1093 int i; 1094 1095 for (i = 0; i < (int)p->defsz; i++) { 1096 free(p->defs[i].key); 1097 free(p->defs[i].val); 1098 } 1099 1100 free(p->data); 1101 free(p->defs); 1102 free(p); 1103 } 1104