1 /* $Id: mdoc.c,v 1.256 2015/10/30 19:04:16 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2015 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdarg.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "mdoc.h" 34 #include "libmandoc.h" 35 #include "roff_int.h" 36 #include "libmdoc.h" 37 38 const char *const __mdoc_macronames[MDOC_MAX + 1] = { 39 "text", 40 "Ap", "Dd", "Dt", "Os", 41 "Sh", "Ss", "Pp", "D1", 42 "Dl", "Bd", "Ed", "Bl", 43 "El", "It", "Ad", "An", 44 "Ar", "Cd", "Cm", "Dv", 45 "Er", "Ev", "Ex", "Fa", 46 "Fd", "Fl", "Fn", "Ft", 47 "Ic", "In", "Li", "Nd", 48 "Nm", "Op", "Ot", "Pa", 49 "Rv", "St", "Va", "Vt", 50 "Xr", "%A", "%B", "%D", 51 "%I", "%J", "%N", "%O", 52 "%P", "%R", "%T", "%V", 53 "Ac", "Ao", "Aq", "At", 54 "Bc", "Bf", "Bo", "Bq", 55 "Bsx", "Bx", "Db", "Dc", 56 "Do", "Dq", "Ec", "Ef", 57 "Em", "Eo", "Fx", "Ms", 58 "No", "Ns", "Nx", "Ox", 59 "Pc", "Pf", "Po", "Pq", 60 "Qc", "Ql", "Qo", "Qq", 61 "Re", "Rs", "Sc", "So", 62 "Sq", "Sm", "Sx", "Sy", 63 "Tn", "Ux", "Xc", "Xo", 64 "Fo", "Fc", "Oo", "Oc", 65 "Bk", "Ek", "Bt", "Hf", 66 "Fr", "Ud", "Lb", "Lp", 67 "Lk", "Mt", "Brq", "Bro", 68 "Brc", "%C", "Es", "En", 69 "Dx", "%Q", "br", "sp", 70 "%U", "Ta", "ll", 71 }; 72 73 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 74 "split", "nosplit", "ragged", 75 "unfilled", "literal", "file", 76 "offset", "bullet", "dash", 77 "hyphen", "item", "enum", 78 "tag", "diag", "hang", 79 "ohang", "inset", "column", 80 "width", "compact", "std", 81 "filled", "words", "emphasis", 82 "symbolic", "nested", "centered" 83 }; 84 85 const char * const *mdoc_macronames = __mdoc_macronames + 1; 86 const char * const *mdoc_argnames = __mdoc_argnames; 87 88 static int mdoc_ptext(struct roff_man *, int, char *, int); 89 static int mdoc_pmacro(struct roff_man *, int, char *, int); 90 91 92 /* 93 * Main parse routine. Parses a single line -- really just hands off to 94 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 95 */ 96 int 97 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 98 { 99 100 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 101 mdoc->flags |= MDOC_NEWLINE; 102 103 /* 104 * Let the roff nS register switch SYNOPSIS mode early, 105 * such that the parser knows at all times 106 * whether this mode is on or off. 107 * Note that this mode is also switched by the Sh macro. 108 */ 109 if (roff_getreg(mdoc->roff, "nS")) 110 mdoc->flags |= MDOC_SYNOPSIS; 111 else 112 mdoc->flags &= ~MDOC_SYNOPSIS; 113 114 return roff_getcontrol(mdoc->roff, buf, &offs) ? 115 mdoc_pmacro(mdoc, ln, buf, offs) : 116 mdoc_ptext(mdoc, ln, buf, offs); 117 } 118 119 void 120 mdoc_macro(MACRO_PROT_ARGS) 121 { 122 assert(tok > TOKEN_NONE && tok < MDOC_MAX); 123 124 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 125 } 126 127 void 128 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) 129 { 130 struct roff_node *p; 131 132 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 133 roff_node_append(mdoc, p); 134 mdoc->next = ROFF_NEXT_CHILD; 135 } 136 137 struct roff_node * 138 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, 139 struct roff_node *body, enum mdoc_endbody end) 140 { 141 struct roff_node *p; 142 143 body->flags |= MDOC_ENDED; 144 body->parent->flags |= MDOC_ENDED; 145 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 146 p->body = body; 147 p->norm = body->norm; 148 p->end = end; 149 roff_node_append(mdoc, p); 150 mdoc->next = ROFF_NEXT_SIBLING; 151 return p; 152 } 153 154 struct roff_node * 155 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 156 int tok, struct mdoc_arg *args) 157 { 158 struct roff_node *p; 159 160 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 161 p->args = args; 162 if (p->args) 163 (args->refcnt)++; 164 165 switch (tok) { 166 case MDOC_Bd: 167 case MDOC_Bf: 168 case MDOC_Bl: 169 case MDOC_En: 170 case MDOC_Rs: 171 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 172 break; 173 default: 174 break; 175 } 176 roff_node_append(mdoc, p); 177 mdoc->next = ROFF_NEXT_CHILD; 178 return p; 179 } 180 181 void 182 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 183 int tok, struct mdoc_arg *args) 184 { 185 struct roff_node *p; 186 187 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 188 p->args = args; 189 if (p->args) 190 (args->refcnt)++; 191 192 switch (tok) { 193 case MDOC_An: 194 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 195 break; 196 default: 197 break; 198 } 199 roff_node_append(mdoc, p); 200 mdoc->next = ROFF_NEXT_CHILD; 201 } 202 203 void 204 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 205 { 206 207 roff_node_unlink(mdoc, p); 208 p->prev = p->next = NULL; 209 roff_node_append(mdoc, p); 210 } 211 212 /* 213 * Parse free-form text, that is, a line that does not begin with the 214 * control character. 215 */ 216 static int 217 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 218 { 219 struct roff_node *n; 220 char *c, *ws, *end; 221 222 assert(mdoc->last); 223 n = mdoc->last; 224 225 /* 226 * Divert directly to list processing if we're encountering a 227 * columnar ROFFT_BLOCK with or without a prior ROFFT_BLOCK entry 228 * (a ROFFT_BODY means it's already open, in which case we should 229 * process within its context in the normal way). 230 */ 231 232 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 233 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 234 /* `Bl' is open without any children. */ 235 mdoc->flags |= MDOC_FREECOL; 236 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 237 return 1; 238 } 239 240 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 241 NULL != n->parent && 242 MDOC_Bl == n->parent->tok && 243 LIST_column == n->parent->norm->Bl.type) { 244 /* `Bl' has block-level `It' children. */ 245 mdoc->flags |= MDOC_FREECOL; 246 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 247 return 1; 248 } 249 250 /* 251 * Search for the beginning of unescaped trailing whitespace (ws) 252 * and for the first character not to be output (end). 253 */ 254 255 /* FIXME: replace with strcspn(). */ 256 ws = NULL; 257 for (c = end = buf + offs; *c; c++) { 258 switch (*c) { 259 case ' ': 260 if (NULL == ws) 261 ws = c; 262 continue; 263 case '\t': 264 /* 265 * Always warn about trailing tabs, 266 * even outside literal context, 267 * where they should be put on the next line. 268 */ 269 if (NULL == ws) 270 ws = c; 271 /* 272 * Strip trailing tabs in literal context only; 273 * outside, they affect the next line. 274 */ 275 if (MDOC_LITERAL & mdoc->flags) 276 continue; 277 break; 278 case '\\': 279 /* Skip the escaped character, too, if any. */ 280 if (c[1]) 281 c++; 282 /* FALLTHROUGH */ 283 default: 284 ws = NULL; 285 break; 286 } 287 end = c + 1; 288 } 289 *end = '\0'; 290 291 if (ws) 292 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 293 line, (int)(ws-buf), NULL); 294 295 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 296 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 297 line, (int)(c - buf), NULL); 298 299 /* 300 * Insert a `sp' in the case of a blank line. Technically, 301 * blank lines aren't allowed, but enough manuals assume this 302 * behaviour that we want to work around it. 303 */ 304 roff_elem_alloc(mdoc, line, offs, MDOC_sp); 305 mdoc->last->flags |= MDOC_VALID | MDOC_ENDED; 306 mdoc->next = ROFF_NEXT_SIBLING; 307 return 1; 308 } 309 310 roff_word_alloc(mdoc, line, offs, buf+offs); 311 312 if (mdoc->flags & MDOC_LITERAL) 313 return 1; 314 315 /* 316 * End-of-sentence check. If the last character is an unescaped 317 * EOS character, then flag the node as being the end of a 318 * sentence. The front-end will know how to interpret this. 319 */ 320 321 assert(buf < end); 322 323 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 324 mdoc->last->flags |= MDOC_EOS; 325 return 1; 326 } 327 328 /* 329 * Parse a macro line, that is, a line beginning with the control 330 * character. 331 */ 332 static int 333 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 334 { 335 struct roff_node *n; 336 const char *cp; 337 int tok; 338 int i, sv; 339 char mac[5]; 340 341 sv = offs; 342 343 /* 344 * Copy the first word into a nil-terminated buffer. 345 * Stop when a space, tab, escape, or eoln is encountered. 346 */ 347 348 i = 0; 349 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 350 mac[i++] = buf[offs++]; 351 352 mac[i] = '\0'; 353 354 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; 355 356 if (tok == TOKEN_NONE) { 357 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 358 ln, sv, buf + sv - 1); 359 return 1; 360 } 361 362 /* Skip a leading escape sequence or tab. */ 363 364 switch (buf[offs]) { 365 case '\\': 366 cp = buf + offs + 1; 367 mandoc_escape(&cp, NULL, NULL); 368 offs = cp - buf; 369 break; 370 case '\t': 371 offs++; 372 break; 373 default: 374 break; 375 } 376 377 /* Jump to the next non-whitespace word. */ 378 379 while (buf[offs] && ' ' == buf[offs]) 380 offs++; 381 382 /* 383 * Trailing whitespace. Note that tabs are allowed to be passed 384 * into the parser as "text", so we only warn about spaces here. 385 */ 386 387 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 388 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 389 ln, offs - 1, NULL); 390 391 /* 392 * If an initial macro or a list invocation, divert directly 393 * into macro processing. 394 */ 395 396 if (NULL == mdoc->last || MDOC_It == tok || MDOC_El == tok) { 397 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 398 return 1; 399 } 400 401 n = mdoc->last; 402 assert(mdoc->last); 403 404 /* 405 * If the first macro of a `Bl -column', open an `It' block 406 * context around the parsed macro. 407 */ 408 409 if (n->tok == MDOC_Bl && n->type == ROFFT_BODY && 410 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) { 411 mdoc->flags |= MDOC_FREECOL; 412 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 413 return 1; 414 } 415 416 /* 417 * If we're following a block-level `It' within a `Bl -column' 418 * context (perhaps opened in the above block or in ptext()), 419 * then open an `It' block context around the parsed macro. 420 */ 421 422 if (n->tok == MDOC_It && n->type == ROFFT_BLOCK && 423 NULL != n->parent && 424 MDOC_Bl == n->parent->tok && 425 LIST_column == n->parent->norm->Bl.type) { 426 mdoc->flags |= MDOC_FREECOL; 427 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 428 return 1; 429 } 430 431 /* Normal processing of a macro. */ 432 433 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 434 435 /* In quick mode (for mandocdb), abort after the NAME section. */ 436 437 if (mdoc->quick && MDOC_Sh == tok && 438 SEC_NAME != mdoc->last->sec) 439 return 2; 440 441 return 1; 442 } 443 444 enum mdelim 445 mdoc_isdelim(const char *p) 446 { 447 448 if ('\0' == p[0]) 449 return DELIM_NONE; 450 451 if ('\0' == p[1]) 452 switch (p[0]) { 453 case '(': 454 case '[': 455 return DELIM_OPEN; 456 case '|': 457 return DELIM_MIDDLE; 458 case '.': 459 case ',': 460 case ';': 461 case ':': 462 case '?': 463 case '!': 464 case ')': 465 case ']': 466 return DELIM_CLOSE; 467 default: 468 return DELIM_NONE; 469 } 470 471 if ('\\' != p[0]) 472 return DELIM_NONE; 473 474 if (0 == strcmp(p + 1, ".")) 475 return DELIM_CLOSE; 476 if (0 == strcmp(p + 1, "fR|\\fP")) 477 return DELIM_MIDDLE; 478 479 return DELIM_NONE; 480 } 481 482 void 483 mdoc_validate(struct roff_man *mdoc) 484 { 485 486 mdoc->last = mdoc->first; 487 mdoc_node_validate(mdoc); 488 mdoc_state_reset(mdoc); 489 } 490