1 /* 2 * Copyright (c) 2002 John Rochester 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer, 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 31 * Copyright 2014 Garrett D'Amore <garrett@damore.org> 32 * Copyright 2022 Oxide Computer Company 33 */ 34 35 #include <sys/types.h> 36 #include <sys/stat.h> 37 #include <sys/param.h> 38 39 #include <ctype.h> 40 #include <dirent.h> 41 #include <err.h> 42 #include <signal.h> 43 #include <stddef.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 49 #include "man.h" 50 #include "stringlist.h" 51 52 53 /* Information collected about each man page in a section */ 54 struct page_info { 55 char *filename; 56 char *name; 57 char *suffix; 58 ino_t inode; 59 }; 60 61 /* An expanding string */ 62 struct sbuf { 63 char *content; /* the start of the buffer */ 64 char *end; /* just past the end of the content */ 65 char *last; /* the last allocated character */ 66 }; 67 68 /* Remove the last amount characters from the sbuf */ 69 #define sbuf_retract(sbuf, amount) ((sbuf)->end -= (amount)) 70 /* Return the length of the sbuf content */ 71 #define sbuf_length(sbuf) ((sbuf)->end - (sbuf)->content) 72 73 typedef char *edited_copy(char *from, char *to, int length); 74 75 /* 76 * While the whatis line is being formed, it is stored in whatis_proto. 77 * When finished, it is reformatted into whatis_final and then appended 78 * to whatis_lines. 79 */ 80 static struct sbuf *whatis_proto; 81 static struct sbuf *whatis_final; 82 static stringlist *whatis_lines; /* collected output lines */ 83 84 static char tempfile[MAXPATHLEN]; /* path of temporary file, if any */ 85 86 #define MDOC_COMMANDS "ArDvErEvFlLiNmPa" 87 88 89 /* Free a struct page_info and its content */ 90 static void 91 free_page_info(struct page_info *info) 92 { 93 94 free(info->filename); 95 free(info->name); 96 free(info->suffix); 97 free(info); 98 } 99 100 /* 101 * Allocate and fill in a new struct page_info given the 102 * name of the man section directory and the dirent of the file. 103 * If the file is not a man page, return NULL. 104 */ 105 static struct page_info * 106 new_page_info(char *dir, struct dirent *dirent) 107 { 108 struct page_info *info; 109 int basename_length; 110 char *suffix; 111 struct stat st; 112 113 if ((info = malloc(sizeof (struct page_info))) == NULL) 114 err(1, "malloc"); 115 basename_length = strlen(dirent->d_name); 116 suffix = &dirent->d_name[basename_length]; 117 if (asprintf(&info->filename, "%s/%s", dir, dirent->d_name) == -1) 118 err(1, "asprintf"); 119 for (;;) { 120 if (--suffix == dirent->d_name || !isalnum(*suffix)) { 121 if (*suffix == '.') 122 break; 123 free(info->filename); 124 free(info); 125 return (NULL); 126 } 127 } 128 *suffix++ = '\0'; 129 info->name = strdup(dirent->d_name); 130 info->suffix = strdup(suffix); 131 if (stat(info->filename, &st) < 0) { 132 warn("%s", info->filename); 133 free_page_info(info); 134 return (NULL); 135 } 136 if (!S_ISREG(st.st_mode)) { 137 free_page_info(info); 138 return (NULL); 139 } 140 info->inode = st.st_ino; 141 return (info); 142 } 143 144 /* 145 * Reset sbuf length to 0. 146 */ 147 static void 148 sbuf_clear(struct sbuf *sbuf) 149 { 150 151 sbuf->end = sbuf->content; 152 } 153 154 /* 155 * Allocate a new sbuf. 156 */ 157 static struct sbuf * 158 new_sbuf(void) 159 { 160 struct sbuf *sbuf; 161 162 if ((sbuf = malloc(sizeof (struct sbuf))) == NULL) 163 err(1, "malloc"); 164 if ((sbuf->content = (char *)malloc(LINE_ALLOC)) == NULL) 165 err(1, "malloc"); 166 sbuf->last = sbuf->content + LINE_ALLOC - 1; 167 sbuf_clear(sbuf); 168 169 return (sbuf); 170 } 171 172 /* 173 * Ensure that there is enough room in the sbuf 174 * for nchars more characters. 175 */ 176 static void 177 sbuf_need(struct sbuf *sbuf, int nchars) 178 { 179 char *new_content; 180 size_t size, cntsize; 181 size_t grow = 128; 182 183 while (grow < nchars) { 184 grow += 128; /* we grow in chunks of 128 bytes */ 185 } 186 187 /* Grow if the buffer isn't big enough */ 188 if (sbuf->end + nchars > sbuf->last) { 189 size = sbuf->last + 1 - sbuf->content; 190 size += grow; 191 cntsize = sbuf->end - sbuf->content; 192 193 if ((new_content = realloc(sbuf->content, size)) == NULL) { 194 perror("realloc"); 195 if (tempfile[0] != '\0') 196 (void) unlink(tempfile); 197 exit(1); 198 } 199 sbuf->content = new_content; 200 sbuf->end = new_content + cntsize; 201 sbuf->last = new_content + size - 1; 202 } 203 } 204 205 /* 206 * Append a string of a given length to the sbuf. 207 */ 208 static void 209 sbuf_append(struct sbuf *sbuf, const char *text, int length) 210 { 211 if (length > 0) { 212 sbuf_need(sbuf, length); 213 (void) memcpy(sbuf->end, text, length); 214 sbuf->end += length; 215 } 216 } 217 218 /* 219 * Append a null-terminated string to the sbuf. 220 */ 221 static void 222 sbuf_append_str(struct sbuf *sbuf, char *text) 223 { 224 225 sbuf_append(sbuf, text, strlen(text)); 226 } 227 228 /* 229 * Append an edited null-terminated string to the sbuf. 230 */ 231 static void 232 sbuf_append_edited(struct sbuf *sbuf, char *text, edited_copy copy) 233 { 234 int length; 235 236 if ((length = strlen(text)) > 0) { 237 sbuf_need(sbuf, length); 238 sbuf->end = copy(text, sbuf->end, length); 239 } 240 } 241 242 /* 243 * Strip any of a set of chars from the end of the sbuf. 244 */ 245 static void 246 sbuf_strip(struct sbuf *sbuf, const char *set) 247 { 248 249 while (sbuf->end > sbuf->content && strchr(set, sbuf->end[-1]) != NULL) 250 sbuf->end--; 251 } 252 253 /* 254 * Return the null-terminated string built by the sbuf. 255 */ 256 static char * 257 sbuf_content(struct sbuf *sbuf) 258 { 259 260 *sbuf->end = '\0'; 261 return (sbuf->content); 262 } 263 264 /* 265 * Return true if no man page exists in the directory with 266 * any of the names in the stringlist. 267 */ 268 static int 269 no_page_exists(char *dir, stringlist *names, char *suffix) 270 { 271 char path[MAXPATHLEN]; 272 char *suffixes[] = { "", ".gz", ".bz2", NULL }; 273 size_t i; 274 int j; 275 276 for (i = 0; i < names->sl_cur; i++) { 277 for (j = 0; suffixes[j] != NULL; j++) { 278 (void) snprintf(path, MAXPATHLEN, "%s/%s.%s%s", 279 dir, names->sl_str[i], suffix, suffixes[j]); 280 if (access(path, F_OK) == 0) { 281 return (0); 282 } 283 } 284 } 285 return (1); 286 } 287 288 /* ARGSUSED sig */ 289 static void 290 trap_signal(int sig) 291 { 292 293 if (tempfile[0] != '\0') 294 (void) unlink(tempfile); 295 296 exit(1); 297 } 298 299 /* 300 * Attempt to open an output file. 301 * Return NULL if unsuccessful. 302 */ 303 static FILE * 304 open_output(char *name) 305 { 306 FILE *output; 307 308 whatis_lines = sl_init(); 309 (void) snprintf(tempfile, MAXPATHLEN, "%s.tmp", name); 310 name = tempfile; 311 if ((output = fopen(name, "w")) == NULL) { 312 warn("%s", name); 313 return (NULL); 314 } 315 return (output); 316 } 317 318 static int 319 linesort(const void *a, const void *b) 320 { 321 322 return (strcmp((*(const char * const *)a), (*(const char * const *)b))); 323 } 324 325 /* 326 * Write the unique sorted lines to the output file. 327 */ 328 static void 329 finish_output(FILE *output, char *name) 330 { 331 size_t i; 332 char *prev = NULL; 333 334 qsort(whatis_lines->sl_str, whatis_lines->sl_cur, sizeof (char *), 335 linesort); 336 for (i = 0; i < whatis_lines->sl_cur; i++) { 337 char *line = whatis_lines->sl_str[i]; 338 if (i > 0 && strcmp(line, prev) == 0) 339 continue; 340 prev = line; 341 (void) fputs(line, output); 342 (void) putc('\n', output); 343 } 344 (void) fclose(output); 345 sl_free(whatis_lines, 1); 346 (void) rename(tempfile, name); 347 (void) unlink(tempfile); 348 } 349 350 static FILE * 351 open_whatis(char *mandir) 352 { 353 char filename[MAXPATHLEN]; 354 355 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); 356 return (open_output(filename)); 357 } 358 359 static void 360 finish_whatis(FILE *output, char *mandir) 361 { 362 char filename[MAXPATHLEN]; 363 364 (void) snprintf(filename, MAXPATHLEN, "%s/%s", mandir, WHATIS); 365 finish_output(output, filename); 366 } 367 368 /* 369 * Remove trailing spaces from a string, returning a pointer to just 370 * beyond the new last character. 371 */ 372 static char * 373 trim_rhs(char *str) 374 { 375 char *rhs; 376 377 rhs = &str[strlen(str)]; 378 while (--rhs > str && isspace(*rhs)) 379 ; 380 *++rhs = '\0'; 381 return (rhs); 382 } 383 384 /* 385 * Return a pointer to the next non-space character in the string. 386 */ 387 static char * 388 skip_spaces(char *s) 389 { 390 391 while (*s != '\0' && isspace(*s)) 392 s++; 393 394 return (s); 395 } 396 397 /* 398 * Return whether the line is of one of the forms: 399 * .Sh NAME 400 * .Sh "NAME" 401 * etc. 402 * assuming that section_start is ".Sh". 403 */ 404 static int 405 name_section_line(char *line, const char *section_start) 406 { 407 char *rhs; 408 409 if (strncmp(line, section_start, 3) != 0) 410 return (0); 411 line = skip_spaces(line + 3); 412 rhs = trim_rhs(line); 413 if (*line == '"') { 414 line++; 415 if (*--rhs == '"') 416 *rhs = '\0'; 417 } 418 if (strcmp(line, "NAME") == 0) 419 return (1); 420 421 return (0); 422 } 423 424 /* 425 * Copy characters while removing the most common nroff/troff markup: 426 * \(em, \(mi, \s[+-N], \& 427 * \fF, \f(fo, \f[font] 428 * \*s, \*(st, \*[stringvar] 429 */ 430 static char * 431 de_nroff_copy(char *from, char *to, int fromlen) 432 { 433 char *from_end = &from[fromlen]; 434 435 while (from < from_end) { 436 switch (*from) { 437 case '\\': 438 switch (*++from) { 439 case '(': 440 if (strncmp(&from[1], "em", 2) == 0 || 441 strncmp(&from[1], "mi", 2) == 0) { 442 from += 3; 443 continue; 444 } 445 break; 446 case 's': 447 if (*++from == '-') 448 from++; 449 while (isdigit(*from)) 450 from++; 451 continue; 452 case 'f': 453 case '*': 454 if (*++from == '(') { 455 from += 3; 456 } else if (*from == '[') { 457 while (*++from != ']' && 458 from < from_end) 459 ; 460 from++; 461 } else { 462 from++; 463 } 464 continue; 465 case '&': 466 from++; 467 continue; 468 } 469 break; 470 } 471 *to++ = *from++; 472 } 473 return (to); 474 } 475 476 /* 477 * Append a string with the nroff formatting removed. 478 */ 479 static void 480 add_nroff(char *text) 481 { 482 483 sbuf_append_edited(whatis_proto, text, de_nroff_copy); 484 } 485 486 /* 487 * Appends "name(suffix), " to whatis_final 488 */ 489 static void 490 add_whatis_name(char *name, char *suffix) 491 { 492 493 if (*name != '\0') { 494 sbuf_append_str(whatis_final, name); 495 sbuf_append(whatis_final, "(", 1); 496 sbuf_append_str(whatis_final, suffix); 497 sbuf_append(whatis_final, "), ", 3); 498 } 499 } 500 501 /* 502 * Processes an old-style man(7) line. This ignores commands with only 503 * a single number argument. 504 */ 505 static void 506 process_man_line(char *line) 507 { 508 char *p; 509 510 if (*line == '.') { 511 while (isalpha(*++line)) 512 ; 513 p = line = skip_spaces(line); 514 while (*p != '\0') { 515 if (!isdigit(*p)) 516 break; 517 p++; 518 } 519 if (*p == '\0') 520 return; 521 } else 522 line = skip_spaces(line); 523 if (*line != '\0') { 524 add_nroff(line); 525 sbuf_append(whatis_proto, " ", 1); 526 } 527 } 528 529 /* 530 * Processes a new-style mdoc(7) line. 531 */ 532 static void 533 process_mdoc_line(char *line) 534 { 535 int xref; 536 int arg = 0; 537 char *line_end = &line[strlen(line)]; 538 int orig_length = sbuf_length(whatis_proto); 539 char *next; 540 541 if (*line == '\0') 542 return; 543 if (line[0] != '.' || !isupper(line[1]) || !islower(line[2])) { 544 add_nroff(skip_spaces(line)); 545 sbuf_append(whatis_proto, " ", 1); 546 return; 547 } 548 xref = strncmp(line, ".Xr", 3) == 0; 549 line += 3; 550 while ((line = skip_spaces(line)) < line_end) { 551 if (*line == '"') { 552 next = ++line; 553 for (;;) { 554 next = strchr(next, '"'); 555 if (next == NULL) 556 break; 557 (void) memmove(next, next + 1, strlen(next)); 558 line_end--; 559 if (*next != '"') 560 break; 561 next++; 562 } 563 } else { 564 next = strpbrk(line, " \t"); 565 } 566 if (next != NULL) 567 *next++ = '\0'; 568 else 569 next = line_end; 570 if (isupper(*line) && islower(line[1]) && line[2] == '\0') { 571 if (strcmp(line, "Ns") == 0) { 572 arg = 0; 573 line = next; 574 continue; 575 } 576 if (strstr(line, MDOC_COMMANDS) != NULL) { 577 line = next; 578 continue; 579 } 580 } 581 if (arg > 0 && strchr(",.:;?!)]", *line) == 0) { 582 if (xref) { 583 sbuf_append(whatis_proto, "(", 1); 584 add_nroff(line); 585 sbuf_append(whatis_proto, ")", 1); 586 xref = 0; 587 } else { 588 sbuf_append(whatis_proto, " ", 1); 589 } 590 } 591 add_nroff(line); 592 arg++; 593 line = next; 594 } 595 if (sbuf_length(whatis_proto) > orig_length) 596 sbuf_append(whatis_proto, " ", 1); 597 } 598 599 /* 600 * Collect a list of comma-separated names from the text. 601 */ 602 static void 603 collect_names(stringlist *names, char *text) 604 { 605 char *arg; 606 607 for (;;) { 608 arg = text; 609 text = strchr(text, ','); 610 if (text != NULL) 611 *text++ = '\0'; 612 (void) sl_add(names, arg); 613 if (text == NULL) 614 return; 615 if (*text == ' ') 616 text++; 617 } 618 } 619 620 enum { STATE_UNKNOWN, STATE_MANSTYLE, STATE_MDOCNAME, STATE_MDOCDESC }; 621 622 /* 623 * Process a man page source into a single whatis line and add it 624 * to whatis_lines. 625 */ 626 static void 627 process_page(struct page_info *page, char *section_dir) 628 { 629 FILE *fp; 630 stringlist *names; 631 char *descr; 632 int state = STATE_UNKNOWN; 633 size_t i; 634 char *line = NULL; 635 size_t linecap = 0; 636 637 sbuf_clear(whatis_proto); 638 if ((fp = fopen(page->filename, "r")) == NULL) { 639 warn("%s", page->filename); 640 return; 641 } 642 while (getline(&line, &linecap, fp) > 0) { 643 /* Skip comments */ 644 if (strncmp(line, ".\\\"", 3) == 0) 645 continue; 646 switch (state) { 647 /* Haven't reached the NAME section yet */ 648 case STATE_UNKNOWN: 649 if (name_section_line(line, ".SH")) 650 state = STATE_MANSTYLE; 651 else if (name_section_line(line, ".Sh")) 652 state = STATE_MDOCNAME; 653 continue; 654 /* Inside an old-style .SH NAME section */ 655 case STATE_MANSTYLE: { 656 char *altline; 657 658 if (strncmp(line, ".SH", 3) == 0 || 659 strncmp(line, ".SS", 3) == 0) 660 break; 661 (void) trim_rhs(line); 662 if (strcmp(line, ".") == 0) 663 continue; 664 altline = line; 665 if (strncmp(altline, ".IX", 3) == 0) { 666 altline += 3; 667 altline = skip_spaces(altline); 668 } 669 process_man_line(altline); 670 continue; 671 } 672 /* Inside a new-style .Sh NAME section (the .Nm part) */ 673 case STATE_MDOCNAME: 674 (void) trim_rhs(line); 675 if (strncmp(line, ".Nm", 3) == 0) { 676 process_mdoc_line(line); 677 continue; 678 } else { 679 if (strcmp(line, ".") == 0) 680 continue; 681 sbuf_append(whatis_proto, "- ", 2); 682 state = STATE_MDOCDESC; 683 } 684 /* FALLTHROUGH */ 685 /* Inside a new-style .Sh NAME section (after the .Nm-s) */ 686 case STATE_MDOCDESC: 687 if (strncmp(line, ".Sh", 3) == 0) 688 break; 689 (void) trim_rhs(line); 690 if (strcmp(line, ".") == 0) 691 continue; 692 process_mdoc_line(line); 693 continue; 694 } 695 break; 696 } 697 (void) fclose(fp); 698 sbuf_strip(whatis_proto, " \t.-"); 699 line = sbuf_content(whatis_proto); 700 /* 701 * Line now contains the appropriate data, but without the 702 * proper indentation or the section appended to each name. 703 */ 704 descr = strstr(line, " - "); 705 if (descr == NULL) { 706 descr = strchr(line, ' '); 707 if (descr == NULL) 708 return; 709 *descr++ = '\0'; 710 } else { 711 *descr = '\0'; 712 descr += 3; 713 } 714 names = sl_init(); 715 collect_names(names, line); 716 sbuf_clear(whatis_final); 717 if (!sl_find(names, page->name) && 718 no_page_exists(section_dir, names, page->suffix)) { 719 /* 720 * Add the page name since that's the only 721 * thing that man(1) will find. 722 */ 723 add_whatis_name(page->name, page->suffix); 724 } 725 for (i = 0; i < names->sl_cur; i++) 726 add_whatis_name(names->sl_str[i], page->suffix); 727 sl_free(names, 0); 728 /* Remove last ", " */ 729 sbuf_retract(whatis_final, 2); 730 while (sbuf_length(whatis_final) < INDENT) 731 sbuf_append(whatis_final, " ", 1); 732 sbuf_append(whatis_final, " - ", 3); 733 sbuf_append_str(whatis_final, skip_spaces(descr)); 734 (void) sl_add(whatis_lines, strdup(sbuf_content(whatis_final))); 735 } 736 737 /* 738 * Sort pages first by inode number, then by name. 739 */ 740 static int 741 pagesort(const void *a, const void *b) 742 { 743 const struct page_info *p1 = *(struct page_info * const *) a; 744 const struct page_info *p2 = *(struct page_info * const *) b; 745 746 if (p1->inode == p2->inode) 747 return (strcmp(p1->name, p2->name)); 748 749 return (p1->inode - p2->inode); 750 } 751 752 /* 753 * Process a single man section. 754 */ 755 static void 756 process_section(char *section_dir) 757 { 758 struct dirent **entries; 759 int nentries; 760 struct page_info **pages; 761 int npages = 0; 762 int i; 763 ino_t prev_inode = 0; 764 765 /* Scan the man section directory for pages */ 766 nentries = scandir(section_dir, &entries, NULL, alphasort); 767 768 /* Collect information about man pages */ 769 pages = (struct page_info **)calloc(nentries, 770 sizeof (struct page_info *)); 771 for (i = 0; i < nentries; i++) { 772 struct page_info *info = new_page_info(section_dir, entries[i]); 773 if (info != NULL) 774 pages[npages++] = info; 775 free(entries[i]); 776 } 777 free(entries); 778 qsort(pages, npages, sizeof (struct page_info *), pagesort); 779 780 /* Process each unique page */ 781 for (i = 0; i < npages; i++) { 782 struct page_info *page = pages[i]; 783 if (page->inode != prev_inode) { 784 prev_inode = page->inode; 785 process_page(page, section_dir); 786 } 787 free_page_info(page); 788 } 789 free(pages); 790 } 791 792 /* 793 * Return whether the directory entry is a man page section. 794 */ 795 static int 796 select_sections(const struct dirent *entry) 797 { 798 const char *p = &entry->d_name[3]; 799 800 if (strncmp(entry->d_name, "man", 3) != 0) 801 return (0); 802 while (*p != '\0') { 803 if (!isalnum(*p++)) 804 return (0); 805 } 806 return (1); 807 } 808 809 /* 810 * Process a single top-level man directory by finding all the 811 * sub-directories named man* and processing each one in turn. 812 */ 813 void 814 mwpath(char *path) 815 { 816 FILE *fp = NULL; 817 struct dirent **entries; 818 int nsections; 819 int i; 820 821 (void) signal(SIGINT, trap_signal); 822 (void) signal(SIGHUP, trap_signal); 823 (void) signal(SIGQUIT, trap_signal); 824 (void) signal(SIGTERM, trap_signal); 825 826 whatis_proto = new_sbuf(); 827 whatis_final = new_sbuf(); 828 829 nsections = scandir(path, &entries, select_sections, alphasort); 830 if ((fp = open_whatis(path)) == NULL) 831 return; 832 for (i = 0; i < nsections; i++) { 833 char section_dir[MAXPATHLEN]; 834 835 (void) snprintf(section_dir, MAXPATHLEN, "%s/%s", 836 path, entries[i]->d_name); 837 process_section(section_dir); 838 free(entries[i]); 839 } 840 free(entries); 841 finish_whatis(fp, path); 842 } 843