1 /* $Id: mdoc.c,v 1.260 2017/02/16 03:00:23 schwarze Exp $ */ 2 /* 3 * Copyright (c) 2008, 2009, 2010, 2011 Kristaps Dzonsons <kristaps@bsd.lv> 4 * Copyright (c) 2010, 2012-2017 Ingo Schwarze <schwarze@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 #include "config.h" 19 20 #include <sys/types.h> 21 22 #include <assert.h> 23 #include <ctype.h> 24 #include <stdarg.h> 25 #include <stdio.h> 26 #include <stdlib.h> 27 #include <string.h> 28 #include <time.h> 29 30 #include "mandoc_aux.h" 31 #include "mandoc.h" 32 #include "roff.h" 33 #include "mdoc.h" 34 #include "libmandoc.h" 35 #include "roff_int.h" 36 #include "libmdoc.h" 37 38 const char *const __mdoc_macronames[MDOC_MAX + 1] = { 39 "text", 40 "Ap", "Dd", "Dt", "Os", 41 "Sh", "Ss", "Pp", "D1", 42 "Dl", "Bd", "Ed", "Bl", 43 "El", "It", "Ad", "An", 44 "Ar", "Cd", "Cm", "Dv", 45 "Er", "Ev", "Ex", "Fa", 46 "Fd", "Fl", "Fn", "Ft", 47 "Ic", "In", "Li", "Nd", 48 "Nm", "Op", "Ot", "Pa", 49 "Rv", "St", "Va", "Vt", 50 "Xr", "%A", "%B", "%D", 51 "%I", "%J", "%N", "%O", 52 "%P", "%R", "%T", "%V", 53 "Ac", "Ao", "Aq", "At", 54 "Bc", "Bf", "Bo", "Bq", 55 "Bsx", "Bx", "Db", "Dc", 56 "Do", "Dq", "Ec", "Ef", 57 "Em", "Eo", "Fx", "Ms", 58 "No", "Ns", "Nx", "Ox", 59 "Pc", "Pf", "Po", "Pq", 60 "Qc", "Ql", "Qo", "Qq", 61 "Re", "Rs", "Sc", "So", 62 "Sq", "Sm", "Sx", "Sy", 63 "Tn", "Ux", "Xc", "Xo", 64 "Fo", "Fc", "Oo", "Oc", 65 "Bk", "Ek", "Bt", "Hf", 66 "Fr", "Ud", "Lb", "Lp", 67 "Lk", "Mt", "Brq", "Bro", 68 "Brc", "%C", "Es", "En", 69 "Dx", "%Q", "br", "sp", 70 "%U", "Ta", "ll", 71 }; 72 73 const char *const __mdoc_argnames[MDOC_ARG_MAX] = { 74 "split", "nosplit", "ragged", 75 "unfilled", "literal", "file", 76 "offset", "bullet", "dash", 77 "hyphen", "item", "enum", 78 "tag", "diag", "hang", 79 "ohang", "inset", "column", 80 "width", "compact", "std", 81 "filled", "words", "emphasis", 82 "symbolic", "nested", "centered" 83 }; 84 85 const char * const *mdoc_macronames = __mdoc_macronames + 1; 86 const char * const *mdoc_argnames = __mdoc_argnames; 87 88 static int mdoc_ptext(struct roff_man *, int, char *, int); 89 static int mdoc_pmacro(struct roff_man *, int, char *, int); 90 91 92 /* 93 * Main parse routine. Parses a single line -- really just hands off to 94 * the macro (mdoc_pmacro()) or text parser (mdoc_ptext()). 95 */ 96 int 97 mdoc_parseln(struct roff_man *mdoc, int ln, char *buf, int offs) 98 { 99 100 if (mdoc->last->type != ROFFT_EQN || ln > mdoc->last->line) 101 mdoc->flags |= MDOC_NEWLINE; 102 103 /* 104 * Let the roff nS register switch SYNOPSIS mode early, 105 * such that the parser knows at all times 106 * whether this mode is on or off. 107 * Note that this mode is also switched by the Sh macro. 108 */ 109 if (roff_getreg(mdoc->roff, "nS")) 110 mdoc->flags |= MDOC_SYNOPSIS; 111 else 112 mdoc->flags &= ~MDOC_SYNOPSIS; 113 114 return roff_getcontrol(mdoc->roff, buf, &offs) ? 115 mdoc_pmacro(mdoc, ln, buf, offs) : 116 mdoc_ptext(mdoc, ln, buf, offs); 117 } 118 119 void 120 mdoc_macro(MACRO_PROT_ARGS) 121 { 122 assert(tok > TOKEN_NONE && tok < MDOC_MAX); 123 124 (*mdoc_macros[tok].fp)(mdoc, tok, line, ppos, pos, buf); 125 } 126 127 void 128 mdoc_tail_alloc(struct roff_man *mdoc, int line, int pos, int tok) 129 { 130 struct roff_node *p; 131 132 p = roff_node_alloc(mdoc, line, pos, ROFFT_TAIL, tok); 133 roff_node_append(mdoc, p); 134 mdoc->next = ROFF_NEXT_CHILD; 135 } 136 137 struct roff_node * 138 mdoc_endbody_alloc(struct roff_man *mdoc, int line, int pos, int tok, 139 struct roff_node *body) 140 { 141 struct roff_node *p; 142 143 body->flags |= NODE_ENDED; 144 body->parent->flags |= NODE_ENDED; 145 p = roff_node_alloc(mdoc, line, pos, ROFFT_BODY, tok); 146 p->body = body; 147 p->norm = body->norm; 148 p->end = ENDBODY_SPACE; 149 roff_node_append(mdoc, p); 150 mdoc->next = ROFF_NEXT_SIBLING; 151 return p; 152 } 153 154 struct roff_node * 155 mdoc_block_alloc(struct roff_man *mdoc, int line, int pos, 156 int tok, struct mdoc_arg *args) 157 { 158 struct roff_node *p; 159 160 p = roff_node_alloc(mdoc, line, pos, ROFFT_BLOCK, tok); 161 p->args = args; 162 if (p->args) 163 (args->refcnt)++; 164 165 switch (tok) { 166 case MDOC_Bd: 167 case MDOC_Bf: 168 case MDOC_Bl: 169 case MDOC_En: 170 case MDOC_Rs: 171 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 172 break; 173 default: 174 break; 175 } 176 roff_node_append(mdoc, p); 177 mdoc->next = ROFF_NEXT_CHILD; 178 return p; 179 } 180 181 void 182 mdoc_elem_alloc(struct roff_man *mdoc, int line, int pos, 183 int tok, struct mdoc_arg *args) 184 { 185 struct roff_node *p; 186 187 p = roff_node_alloc(mdoc, line, pos, ROFFT_ELEM, tok); 188 p->args = args; 189 if (p->args) 190 (args->refcnt)++; 191 192 switch (tok) { 193 case MDOC_An: 194 p->norm = mandoc_calloc(1, sizeof(union mdoc_data)); 195 break; 196 default: 197 break; 198 } 199 roff_node_append(mdoc, p); 200 mdoc->next = ROFF_NEXT_CHILD; 201 } 202 203 void 204 mdoc_node_relink(struct roff_man *mdoc, struct roff_node *p) 205 { 206 207 roff_node_unlink(mdoc, p); 208 p->prev = p->next = NULL; 209 roff_node_append(mdoc, p); 210 } 211 212 /* 213 * Parse free-form text, that is, a line that does not begin with the 214 * control character. 215 */ 216 static int 217 mdoc_ptext(struct roff_man *mdoc, int line, char *buf, int offs) 218 { 219 struct roff_node *n; 220 char *c, *ws, *end; 221 222 n = mdoc->last; 223 224 /* 225 * If a column list contains plain text, assume an implicit item 226 * macro. This can happen one or more times at the beginning 227 * of such a list, intermixed with non-It mdoc macros and with 228 * nodes generated on the roff level, for example by tbl. 229 */ 230 231 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 232 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 233 (n->parent != NULL && n->parent->tok == MDOC_Bl && 234 n->parent->norm->Bl.type == LIST_column)) { 235 mdoc->flags |= MDOC_FREECOL; 236 mdoc_macro(mdoc, MDOC_It, line, offs, &offs, buf); 237 return 1; 238 } 239 240 /* 241 * Search for the beginning of unescaped trailing whitespace (ws) 242 * and for the first character not to be output (end). 243 */ 244 245 /* FIXME: replace with strcspn(). */ 246 ws = NULL; 247 for (c = end = buf + offs; *c; c++) { 248 switch (*c) { 249 case ' ': 250 if (NULL == ws) 251 ws = c; 252 continue; 253 case '\t': 254 /* 255 * Always warn about trailing tabs, 256 * even outside literal context, 257 * where they should be put on the next line. 258 */ 259 if (NULL == ws) 260 ws = c; 261 /* 262 * Strip trailing tabs in literal context only; 263 * outside, they affect the next line. 264 */ 265 if (MDOC_LITERAL & mdoc->flags) 266 continue; 267 break; 268 case '\\': 269 /* Skip the escaped character, too, if any. */ 270 if (c[1]) 271 c++; 272 /* FALLTHROUGH */ 273 default: 274 ws = NULL; 275 break; 276 } 277 end = c + 1; 278 } 279 *end = '\0'; 280 281 if (ws) 282 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 283 line, (int)(ws-buf), NULL); 284 285 if (buf[offs] == '\0' && ! (mdoc->flags & MDOC_LITERAL)) { 286 mandoc_msg(MANDOCERR_FI_BLANK, mdoc->parse, 287 line, (int)(c - buf), NULL); 288 289 /* 290 * Insert a `sp' in the case of a blank line. Technically, 291 * blank lines aren't allowed, but enough manuals assume this 292 * behaviour that we want to work around it. 293 */ 294 roff_elem_alloc(mdoc, line, offs, MDOC_sp); 295 mdoc->last->flags |= NODE_VALID | NODE_ENDED; 296 mdoc->next = ROFF_NEXT_SIBLING; 297 return 1; 298 } 299 300 roff_word_alloc(mdoc, line, offs, buf+offs); 301 302 if (mdoc->flags & MDOC_LITERAL) 303 return 1; 304 305 /* 306 * End-of-sentence check. If the last character is an unescaped 307 * EOS character, then flag the node as being the end of a 308 * sentence. The front-end will know how to interpret this. 309 */ 310 311 assert(buf < end); 312 313 if (mandoc_eos(buf+offs, (size_t)(end-buf-offs))) 314 mdoc->last->flags |= NODE_EOS; 315 316 for (c = buf + offs; c != NULL; c = strchr(c + 1, '.')) { 317 if (c - buf < offs + 2) 318 continue; 319 if (end - c < 4) 320 break; 321 if (isalpha((unsigned char)c[-2]) && 322 isalpha((unsigned char)c[-1]) && 323 c[1] == ' ' && 324 isupper((unsigned char)(c[2] == ' ' ? c[3] : c[2])) && 325 (c[-2] != 'n' || c[-1] != 'c') && 326 (c[-2] != 'v' || c[-1] != 's')) 327 mandoc_msg(MANDOCERR_EOS, mdoc->parse, 328 line, (int)(c - buf), NULL); 329 } 330 331 return 1; 332 } 333 334 /* 335 * Parse a macro line, that is, a line beginning with the control 336 * character. 337 */ 338 static int 339 mdoc_pmacro(struct roff_man *mdoc, int ln, char *buf, int offs) 340 { 341 struct roff_node *n; 342 const char *cp; 343 int tok; 344 int i, sv; 345 char mac[5]; 346 347 sv = offs; 348 349 /* 350 * Copy the first word into a nil-terminated buffer. 351 * Stop when a space, tab, escape, or eoln is encountered. 352 */ 353 354 i = 0; 355 while (i < 4 && strchr(" \t\\", buf[offs]) == NULL) 356 mac[i++] = buf[offs++]; 357 358 mac[i] = '\0'; 359 360 tok = (i > 1 && i < 4) ? mdoc_hash_find(mac) : TOKEN_NONE; 361 362 if (tok == TOKEN_NONE) { 363 mandoc_msg(MANDOCERR_MACRO, mdoc->parse, 364 ln, sv, buf + sv - 1); 365 return 1; 366 } 367 368 /* Skip a leading escape sequence or tab. */ 369 370 switch (buf[offs]) { 371 case '\\': 372 cp = buf + offs + 1; 373 mandoc_escape(&cp, NULL, NULL); 374 offs = cp - buf; 375 break; 376 case '\t': 377 offs++; 378 break; 379 default: 380 break; 381 } 382 383 /* Jump to the next non-whitespace word. */ 384 385 while (buf[offs] && ' ' == buf[offs]) 386 offs++; 387 388 /* 389 * Trailing whitespace. Note that tabs are allowed to be passed 390 * into the parser as "text", so we only warn about spaces here. 391 */ 392 393 if ('\0' == buf[offs] && ' ' == buf[offs - 1]) 394 mandoc_msg(MANDOCERR_SPACE_EOL, mdoc->parse, 395 ln, offs - 1, NULL); 396 397 /* 398 * If an initial macro or a list invocation, divert directly 399 * into macro processing. 400 */ 401 402 n = mdoc->last; 403 if (n == NULL || tok == MDOC_It || tok == MDOC_El) { 404 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 405 return 1; 406 } 407 408 /* 409 * If a column list contains a non-It macro, assume an implicit 410 * item macro. This can happen one or more times at the 411 * beginning of such a list, intermixed with text lines and 412 * with nodes generated on the roff level, for example by tbl. 413 */ 414 415 if ((n->tok == MDOC_Bl && n->type == ROFFT_BODY && 416 n->end == ENDBODY_NOT && n->norm->Bl.type == LIST_column) || 417 (n->parent != NULL && n->parent->tok == MDOC_Bl && 418 n->parent->norm->Bl.type == LIST_column)) { 419 mdoc->flags |= MDOC_FREECOL; 420 mdoc_macro(mdoc, MDOC_It, ln, sv, &sv, buf); 421 return 1; 422 } 423 424 /* Normal processing of a macro. */ 425 426 mdoc_macro(mdoc, tok, ln, sv, &offs, buf); 427 428 /* In quick mode (for mandocdb), abort after the NAME section. */ 429 430 if (mdoc->quick && MDOC_Sh == tok && 431 SEC_NAME != mdoc->last->sec) 432 return 2; 433 434 return 1; 435 } 436 437 enum mdelim 438 mdoc_isdelim(const char *p) 439 { 440 441 if ('\0' == p[0]) 442 return DELIM_NONE; 443 444 if ('\0' == p[1]) 445 switch (p[0]) { 446 case '(': 447 case '[': 448 return DELIM_OPEN; 449 case '|': 450 return DELIM_MIDDLE; 451 case '.': 452 case ',': 453 case ';': 454 case ':': 455 case '?': 456 case '!': 457 case ')': 458 case ']': 459 return DELIM_CLOSE; 460 default: 461 return DELIM_NONE; 462 } 463 464 if ('\\' != p[0]) 465 return DELIM_NONE; 466 467 if (0 == strcmp(p + 1, ".")) 468 return DELIM_CLOSE; 469 if (0 == strcmp(p + 1, "fR|\\fP")) 470 return DELIM_MIDDLE; 471 472 return DELIM_NONE; 473 } 474 475 void 476 mdoc_validate(struct roff_man *mdoc) 477 { 478 479 mdoc->last = mdoc->first; 480 mdoc_node_validate(mdoc); 481 mdoc_state_reset(mdoc); 482 } 483