mandocdb.c (cec8643b41ebefad6c677010fc784dc4bb0550f3) mandocdb.c (4d131170e62381276a07ffc0aeb1b62e527d940c)
1/* $Id: mandocdb.c,v 1.262 2018/12/30 00:49:55 schwarze Exp $ */
1/* $Id: mandocdb.c,v 1.269 2021/08/19 16:55:31 schwarze Exp $ */
2/*
2/*
3 * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org>
3 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
4 * Copyright (c) 2011-2018 Ingo Schwarze <schwarze@openbsd.org>
5 * Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
5 * Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Implementation of the makewhatis(8) program.
18 */
19#include "config.h"
20
21#include <sys/types.h>
22#include <sys/mman.h>
23#include <sys/stat.h>
24
25#include <assert.h>

--- 87 unchanged lines hidden (view full) ---

113 uint64_t mask; /* set unless handler returns 0 */
114 int taboo; /* node flags that must not be set */
115};
116
117
118int mandocdb(int, char *[]);
119
120static void dbadd(struct dba *, struct mpage *);
20 */
21#include "config.h"
22
23#include <sys/types.h>
24#include <sys/mman.h>
25#include <sys/stat.h>
26
27#include <assert.h>

--- 87 unchanged lines hidden (view full) ---

115 uint64_t mask; /* set unless handler returns 0 */
116 int taboo; /* node flags that must not be set */
117};
118
119
120int mandocdb(int, char *[]);
121
122static void dbadd(struct dba *, struct mpage *);
121static void dbadd_mlink(const struct mlink *mlink);
123static void dbadd_mlink(const struct mlink *);
122static void dbprune(struct dba *);
123static void dbwrite(struct dba *);
124static void filescan(const char *);
125#if HAVE_FTS_COMPARE_CONST
126static int fts_compare(const FTSENT *const *, const FTSENT *const *);
127#else
128static int fts_compare(const FTSENT **, const FTSENT **);
129#endif

--- 28 unchanged lines hidden (view full) ---

158static int parse_mdoc_Va(struct mpage *, const struct roff_meta *,
159 const struct roff_node *);
160static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *,
161 const struct roff_node *);
162static void putkey(const struct mpage *, char *, uint64_t);
163static void putkeys(const struct mpage *, char *, size_t, uint64_t);
164static void putmdockey(const struct mpage *,
165 const struct roff_node *, uint64_t, int);
124static void dbprune(struct dba *);
125static void dbwrite(struct dba *);
126static void filescan(const char *);
127#if HAVE_FTS_COMPARE_CONST
128static int fts_compare(const FTSENT *const *, const FTSENT *const *);
129#else
130static int fts_compare(const FTSENT **, const FTSENT **);
131#endif

--- 28 unchanged lines hidden (view full) ---

160static int parse_mdoc_Va(struct mpage *, const struct roff_meta *,
161 const struct roff_node *);
162static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *,
163 const struct roff_node *);
164static void putkey(const struct mpage *, char *, uint64_t);
165static void putkeys(const struct mpage *, char *, size_t, uint64_t);
166static void putmdockey(const struct mpage *,
167 const struct roff_node *, uint64_t, int);
168#ifdef READ_ALLOWED_PATH
169static int read_allowed(const char *);
170#endif
166static int render_string(char **, size_t *);
167static void say(const char *, const char *, ...)
168 __attribute__((__format__ (__printf__, 2, 3)));
169static int set_basedir(const char *, int);
170static int treescan(void);
171static size_t utf8(unsigned int, char [7]);
172
173static int nodb; /* no database changes */
174static int mparse_options; /* abort the parse early */
175static int use_all; /* use all found files */
176static int debug; /* print what we're doing */
177static int warnings; /* warn about crap */
178static int write_utf8; /* write UTF-8 output; else ASCII */
179static int exitcode; /* to be returned by main */
180static enum op op; /* operational mode */
181static char basedir[PATH_MAX]; /* current base directory */
171static int render_string(char **, size_t *);
172static void say(const char *, const char *, ...)
173 __attribute__((__format__ (__printf__, 2, 3)));
174static int set_basedir(const char *, int);
175static int treescan(void);
176static size_t utf8(unsigned int, char [7]);
177
178static int nodb; /* no database changes */
179static int mparse_options; /* abort the parse early */
180static int use_all; /* use all found files */
181static int debug; /* print what we're doing */
182static int warnings; /* warn about crap */
183static int write_utf8; /* write UTF-8 output; else ASCII */
184static int exitcode; /* to be returned by main */
185static enum op op; /* operational mode */
186static char basedir[PATH_MAX]; /* current base directory */
187static size_t basedir_len; /* strlen(basedir) */
182static struct mpage *mpage_head; /* list of distinct manual pages */
183static struct ohash mpages; /* table of distinct manual pages */
184static struct ohash mlinks; /* table of directory entries */
185static struct ohash names; /* table of all names */
186static struct ohash strings; /* table of all strings */
187static uint64_t name_mask;
188
189static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = {

--- 147 unchanged lines hidden (view full) ---

337 memset(&conf, 0, sizeof(conf));
338
339 /*
340 * We accept a few different invocations.
341 * The CHECKOP macro makes sure that invocation styles don't
342 * clobber each other.
343 */
344#define CHECKOP(_op, _ch) do \
188static struct mpage *mpage_head; /* list of distinct manual pages */
189static struct ohash mpages; /* table of distinct manual pages */
190static struct ohash mlinks; /* table of directory entries */
191static struct ohash names; /* table of all names */
192static struct ohash strings; /* table of all strings */
193static uint64_t name_mask;
194
195static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = {

--- 147 unchanged lines hidden (view full) ---

343 memset(&conf, 0, sizeof(conf));
344
345 /*
346 * We accept a few different invocations.
347 * The CHECKOP macro makes sure that invocation styles don't
348 * clobber each other.
349 */
350#define CHECKOP(_op, _ch) do \
345 if (OP_DEFAULT != (_op)) { \
351 if ((_op) != OP_DEFAULT) { \
346 warnx("-%c: Conflicting option", (_ch)); \
347 goto usage; \
348 } while (/*CONSTCOND*/0)
349
350 mparse_options = MPARSE_VALIDATE;
351 path_arg = NULL;
352 op = OP_DEFAULT;
353
352 warnx("-%c: Conflicting option", (_ch)); \
353 goto usage; \
354 } while (/*CONSTCOND*/0)
355
356 mparse_options = MPARSE_VALIDATE;
357 path_arg = NULL;
358 op = OP_DEFAULT;
359
354 while (-1 != (ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")))
360 while ((ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")) != -1)
355 switch (ch) {
356 case 'a':
357 use_all = 1;
358 break;
359 case 'C':
360 CHECKOP(op, ch);
361 path_arg = optarg;
362 op = OP_CONFFILE;

--- 11 unchanged lines hidden (view full) ---

374 break;
375 case 'p':
376 warnings = 1;
377 break;
378 case 'Q':
379 mparse_options |= MPARSE_QUICK;
380 break;
381 case 'T':
361 switch (ch) {
362 case 'a':
363 use_all = 1;
364 break;
365 case 'C':
366 CHECKOP(op, ch);
367 path_arg = optarg;
368 op = OP_CONFFILE;

--- 11 unchanged lines hidden (view full) ---

380 break;
381 case 'p':
382 warnings = 1;
383 break;
384 case 'Q':
385 mparse_options |= MPARSE_QUICK;
386 break;
387 case 'T':
382 if (strcmp(optarg, "utf8")) {
388 if (strcmp(optarg, "utf8") != 0) {
383 warnx("-T%s: Unsupported output format",
384 optarg);
385 goto usage;
386 }
387 write_utf8 = 1;
388 break;
389 case 't':
390 CHECKOP(op, ch);

--- 20 unchanged lines hidden (view full) ---

411 if (nodb) {
412 if (pledge("stdio rpath", NULL) == -1) {
413 warn("pledge");
414 return (int)MANDOCLEVEL_SYSERR;
415 }
416 }
417#endif
418
389 warnx("-T%s: Unsupported output format",
390 optarg);
391 goto usage;
392 }
393 write_utf8 = 1;
394 break;
395 case 't':
396 CHECKOP(op, ch);

--- 20 unchanged lines hidden (view full) ---

417 if (nodb) {
418 if (pledge("stdio rpath", NULL) == -1) {
419 warn("pledge");
420 return (int)MANDOCLEVEL_SYSERR;
421 }
422 }
423#endif
424
419 if (OP_CONFFILE == op && argc > 0) {
425 if (op == OP_CONFFILE && argc > 0) {
420 warnx("-C: Too many arguments");
421 goto usage;
422 }
423
424 exitcode = (int)MANDOCLEVEL_OK;
425 mchars_alloc();
426 mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL);
427 mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
428 mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
429
426 warnx("-C: Too many arguments");
427 goto usage;
428 }
429
430 exitcode = (int)MANDOCLEVEL_OK;
431 mchars_alloc();
432 mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL);
433 mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
434 mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
435
430 if (OP_UPDATE == op || OP_DELETE == op || OP_TEST == op) {
436 if (op == OP_UPDATE || op == OP_DELETE || op == OP_TEST) {
431
432 /*
433 * Most of these deal with a specific directory.
434 * Jump into that directory first.
435 */
437
438 /*
439 * Most of these deal with a specific directory.
440 * Jump into that directory first.
441 */
436 if (OP_TEST != op && 0 == set_basedir(path_arg, 1))
442 if (op != OP_TEST && set_basedir(path_arg, 1) == 0)
437 goto out;
438
439 dba = nodb ? dba_new(128) : dba_read(MANDOC_DB);
440 if (dba != NULL) {
441 /*
442 * The existing database is usable. Process
443 * all files specified on the command-line.
444 */

--- 4 unchanged lines hidden (view full) ---

449 dbprune(dba);
450 } else {
451 /* Database missing or corrupt. */
452 if (op != OP_UPDATE || errno != ENOENT)
453 say(MANDOC_DB, "%s: Automatically recreating"
454 " from scratch", strerror(errno));
455 exitcode = (int)MANDOCLEVEL_OK;
456 op = OP_DEFAULT;
443 goto out;
444
445 dba = nodb ? dba_new(128) : dba_read(MANDOC_DB);
446 if (dba != NULL) {
447 /*
448 * The existing database is usable. Process
449 * all files specified on the command-line.
450 */

--- 4 unchanged lines hidden (view full) ---

455 dbprune(dba);
456 } else {
457 /* Database missing or corrupt. */
458 if (op != OP_UPDATE || errno != ENOENT)
459 say(MANDOC_DB, "%s: Automatically recreating"
460 " from scratch", strerror(errno));
461 exitcode = (int)MANDOCLEVEL_OK;
462 op = OP_DEFAULT;
457 if (0 == treescan())
463 if (treescan() == 0)
458 goto out;
459 dba = dba_new(128);
460 }
464 goto out;
465 dba = dba_new(128);
466 }
461 if (OP_DELETE != op)
467 if (op != OP_DELETE)
462 mpages_merge(dba, mp);
463 if (nodb == 0)
464 dbwrite(dba);
465 dba_free(dba);
466 } else {
467 /*
468 * If we have arguments, use them as our manpaths.
469 * If we don't, use man.conf(5).

--- 17 unchanged lines hidden (view full) ---

487 * build a new database and finally move it into place.
488 * Ignore zero-length directories and strip trailing
489 * slashes.
490 */
491 for (j = 0; j < conf.manpath.sz; j++) {
492 sz = strlen(conf.manpath.paths[j]);
493 if (sz && conf.manpath.paths[j][sz - 1] == '/')
494 conf.manpath.paths[j][--sz] = '\0';
468 mpages_merge(dba, mp);
469 if (nodb == 0)
470 dbwrite(dba);
471 dba_free(dba);
472 } else {
473 /*
474 * If we have arguments, use them as our manpaths.
475 * If we don't, use man.conf(5).

--- 17 unchanged lines hidden (view full) ---

493 * build a new database and finally move it into place.
494 * Ignore zero-length directories and strip trailing
495 * slashes.
496 */
497 for (j = 0; j < conf.manpath.sz; j++) {
498 sz = strlen(conf.manpath.paths[j]);
499 if (sz && conf.manpath.paths[j][sz - 1] == '/')
500 conf.manpath.paths[j][--sz] = '\0';
495 if (0 == sz)
501 if (sz == 0)
496 continue;
497
498 if (j) {
499 mandoc_ohash_init(&mpages, 6,
500 offsetof(struct mpage, inodev));
501 mandoc_ohash_init(&mlinks, 6,
502 offsetof(struct mlink, file));
503 }
504
502 continue;
503
504 if (j) {
505 mandoc_ohash_init(&mpages, 6,
506 offsetof(struct mpage, inodev));
507 mandoc_ohash_init(&mlinks, 6,
508 offsetof(struct mlink, file));
509 }
510
505 if ( ! set_basedir(conf.manpath.paths[j], argc > 0))
511 if (set_basedir(conf.manpath.paths[j], argc > 0) == 0)
506 continue;
512 continue;
507 if (0 == treescan())
513 if (treescan() == 0)
508 continue;
509 dba = dba_new(128);
510 mpages_merge(dba, mp);
511 if (nodb == 0)
512 dbwrite(dba);
513 dba_free(dba);
514
515 if (j + 1 < conf.manpath.sz) {

--- 87 unchanged lines hidden (view full) ---

603 * then get handled just like regular files.
604 */
605 case FTS_SL:
606 if (realpath(path, buf) == NULL) {
607 if (warnings)
608 say(path, "&realpath");
609 continue;
610 }
514 continue;
515 dba = dba_new(128);
516 mpages_merge(dba, mp);
517 if (nodb == 0)
518 dbwrite(dba);
519 dba_free(dba);
520
521 if (j + 1 < conf.manpath.sz) {

--- 87 unchanged lines hidden (view full) ---

609 * then get handled just like regular files.
610 */
611 case FTS_SL:
612 if (realpath(path, buf) == NULL) {
613 if (warnings)
614 say(path, "&realpath");
615 continue;
616 }
611 if (strstr(buf, basedir) != buf
612#ifdef HOMEBREWDIR
613 && strstr(buf, HOMEBREWDIR) != buf
617 if (strncmp(buf, basedir, basedir_len) != 0
618#ifdef READ_ALLOWED_PATH
619 && !read_allowed(buf)
614#endif
615 ) {
616 if (warnings) say("",
617 "%s: outside base directory", buf);
618 continue;
619 }
620 /* Use logical inode to avoid mpages dupe. */
621 if (stat(path, ff->fts_statp) == -1) {
622 if (warnings)
623 say(path, "&stat");
624 continue;
625 }
620#endif
621 ) {
622 if (warnings) say("",
623 "%s: outside base directory", buf);
624 continue;
625 }
626 /* Use logical inode to avoid mpages dupe. */
627 if (stat(path, ff->fts_statp) == -1) {
628 if (warnings)
629 say(path, "&stat");
630 continue;
631 }
632 if ((ff->fts_statp->st_mode & S_IFMT) != S_IFREG)
633 continue;
626 /* FALLTHROUGH */
627
628 /*
629 * If we're a regular file, add an mlink by using the
630 * stored directory data and handling the filename.
631 */
632 case FTS_F:
633 if ( ! strcmp(path, MANDOC_DB))

--- 138 unchanged lines hidden (view full) ---

772 *
773 * [./]man*[/<arch>]/<name>.<section>
774 * or
775 * [./]cat<section>[/<arch>]/<name>.0
776 *
777 * See treescan() for the fts(3) version of this.
778 */
779static void
634 /* FALLTHROUGH */
635
636 /*
637 * If we're a regular file, add an mlink by using the
638 * stored directory data and handling the filename.
639 */
640 case FTS_F:
641 if ( ! strcmp(path, MANDOC_DB))

--- 138 unchanged lines hidden (view full) ---

780 *
781 * [./]man*[/<arch>]/<name>.<section>
782 * or
783 * [./]cat<section>[/<arch>]/<name>.0
784 *
785 * See treescan() for the fts(3) version of this.
786 */
787static void
780filescan(const char *file)
788filescan(const char *infile)
781{
789{
782 char buf[PATH_MAX];
783 struct stat st;
784 struct mlink *mlink;
790 struct stat st;
791 struct mlink *mlink;
785 char *p, *start;
792 char *linkfile, *p, *realdir, *start, *usefile;
793 size_t realdir_len;
786
787 assert(use_all);
788
794
795 assert(use_all);
796
789 if (0 == strncmp(file, "./", 2))
790 file += 2;
797 if (strncmp(infile, "./", 2) == 0)
798 infile += 2;
791
792 /*
793 * We have to do lstat(2) before realpath(3) loses
794 * the information whether this is a symbolic link.
795 * We need to know that because for symbolic links,
796 * we want to use the orginal file name, while for
797 * regular files, we want to use the real path.
798 */
799
800 /*
801 * We have to do lstat(2) before realpath(3) loses
802 * the information whether this is a symbolic link.
803 * We need to know that because for symbolic links,
804 * we want to use the orginal file name, while for
805 * regular files, we want to use the real path.
806 */
799 if (-1 == lstat(file, &st)) {
807 if (lstat(infile, &st) == -1) {
800 exitcode = (int)MANDOCLEVEL_BADARG;
808 exitcode = (int)MANDOCLEVEL_BADARG;
801 say(file, "&lstat");
809 say(infile, "&lstat");
802 return;
810 return;
803 } else if (0 == ((S_IFREG | S_IFLNK) & st.st_mode)) {
811 } else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) {
804 exitcode = (int)MANDOCLEVEL_BADARG;
812 exitcode = (int)MANDOCLEVEL_BADARG;
805 say(file, "Not a regular file");
813 say(infile, "Not a regular file");
806 return;
807 }
808
809 /*
810 * We have to resolve the file name to the real path
811 * in any case for the base directory check.
812 */
814 return;
815 }
816
817 /*
818 * We have to resolve the file name to the real path
819 * in any case for the base directory check.
820 */
813 if (NULL == realpath(file, buf)) {
821 if ((usefile = realpath(infile, NULL)) == NULL) {
814 exitcode = (int)MANDOCLEVEL_BADARG;
822 exitcode = (int)MANDOCLEVEL_BADARG;
815 say(file, "&realpath");
823 say(infile, "&realpath");
816 return;
817 }
818
824 return;
825 }
826
819 if (OP_TEST == op)
820 start = buf;
821 else if (strstr(buf, basedir) == buf)
822 start = buf + strlen(basedir);
823#ifdef HOMEBREWDIR
824 else if (strstr(buf, HOMEBREWDIR) == buf)
825 start = buf;
827 if (op == OP_TEST)
828 start = usefile;
829 else if (strncmp(usefile, basedir, basedir_len) == 0)
830 start = usefile + basedir_len;
831#ifdef READ_ALLOWED_PATH
832 else if (read_allowed(usefile))
833 start = usefile;
826#endif
827 else {
828 exitcode = (int)MANDOCLEVEL_BADARG;
834#endif
835 else {
836 exitcode = (int)MANDOCLEVEL_BADARG;
829 say("", "%s: outside base directory", buf);
837 say("", "%s: outside base directory", infile);
838 free(usefile);
830 return;
831 }
832
833 /*
834 * Now we are sure the file is inside our tree.
835 * If it is a symbolic link, ignore the real path
836 * and use the original name.
839 return;
840 }
841
842 /*
843 * Now we are sure the file is inside our tree.
844 * If it is a symbolic link, ignore the real path
845 * and use the original name.
837 * This implies passing stuff like "cat1/../man1/foo.1"
838 * on the command line won't work. So don't do that.
839 * Note the stat(2) can still fail if the link target
840 * doesn't exist.
841 */
846 */
842 if (S_IFLNK & st.st_mode) {
843 if (-1 == stat(buf, &st)) {
847 do {
848 if (S_ISLNK(st.st_mode) == 0)
849 break;
850
851 /*
852 * Some implementations of realpath(3) may succeed
853 * even if the target of the link does not exist,
854 * so check again for extra safety.
855 */
856 if (stat(usefile, &st) == -1) {
844 exitcode = (int)MANDOCLEVEL_BADARG;
857 exitcode = (int)MANDOCLEVEL_BADARG;
845 say(file, "&stat");
858 say(infile, "&stat");
859 free(usefile);
846 return;
847 }
860 return;
861 }
848 if (strlcpy(buf, file, sizeof(buf)) >= sizeof(buf)) {
849 say(file, "Filename too long");
850 return;
862 linkfile = mandoc_strdup(infile);
863 if (op == OP_TEST) {
864 free(usefile);
865 start = usefile = linkfile;
866 break;
851 }
867 }
852 start = buf;
853 if (OP_TEST != op && strstr(buf, basedir) == buf)
854 start += strlen(basedir);
855 }
868 if (strncmp(infile, basedir, basedir_len) == 0) {
869 free(usefile);
870 usefile = linkfile;
871 start = usefile + basedir_len;
872 break;
873 }
856
874
875 /*
876 * This symbolic link points into the basedir
877 * from the outside. Let's see whether any of
878 * the parent directories resolve to the basedir.
879 */
880 p = strchr(linkfile, '\0');
881 do {
882 while (*--p != '/')
883 continue;
884 *p = '\0';
885 if ((realdir = realpath(linkfile, NULL)) == NULL) {
886 exitcode = (int)MANDOCLEVEL_BADARG;
887 say(infile, "&realpath");
888 free(linkfile);
889 free(usefile);
890 return;
891 }
892 realdir_len = strlen(realdir) + 1;
893 free(realdir);
894 *p = '/';
895 } while (realdir_len > basedir_len);
896
897 /*
898 * If one of the directories resolves to the basedir,
899 * use the rest of the original name.
900 * Otherwise, the best we can do
901 * is to use the filename pointed to.
902 */
903 if (realdir_len == basedir_len) {
904 free(usefile);
905 usefile = linkfile;
906 start = p + 1;
907 } else {
908 free(linkfile);
909 start = usefile + basedir_len;
910 }
911 } while (/* CONSTCOND */ 0);
912
857 mlink = mandoc_calloc(1, sizeof(struct mlink));
858 mlink->dform = FORM_NONE;
859 if (strlcpy(mlink->file, start, sizeof(mlink->file)) >=
860 sizeof(mlink->file)) {
861 say(start, "Filename too long");
862 free(mlink);
913 mlink = mandoc_calloc(1, sizeof(struct mlink));
914 mlink->dform = FORM_NONE;
915 if (strlcpy(mlink->file, start, sizeof(mlink->file)) >=
916 sizeof(mlink->file)) {
917 say(start, "Filename too long");
918 free(mlink);
919 free(usefile);
863 return;
864 }
865
866 /*
867 * In test mode or when the original name is absolute
868 * but outside our tree, guess the base directory.
869 */
870
920 return;
921 }
922
923 /*
924 * In test mode or when the original name is absolute
925 * but outside our tree, guess the base directory.
926 */
927
871 if (op == OP_TEST || (start == buf && *start == '/')) {
872 if (strncmp(buf, "man/", 4) == 0)
873 start = buf + 4;
874 else if ((start = strstr(buf, "/man/")) != NULL)
928 if (op == OP_TEST || (start == usefile && *start == '/')) {
929 if (strncmp(usefile, "man/", 4) == 0)
930 start = usefile + 4;
931 else if ((start = strstr(usefile, "/man/")) != NULL)
875 start += 5;
876 else
932 start += 5;
933 else
877 start = buf;
934 start = usefile;
878 }
879
880 /*
881 * First try to guess our directory structure.
882 * If we find a separator, try to look for man* or cat*.
883 * If we find one of these and what's underneath is a directory,
884 * assume it's an architecture.
885 */
935 }
936
937 /*
938 * First try to guess our directory structure.
939 * If we find a separator, try to look for man* or cat*.
940 * If we find one of these and what's underneath is a directory,
941 * assume it's an architecture.
942 */
886 if (NULL != (p = strchr(start, '/'))) {
943 if ((p = strchr(start, '/')) != NULL) {
887 *p++ = '\0';
944 *p++ = '\0';
888 if (0 == strncmp(start, "man", 3)) {
945 if (strncmp(start, "man", 3) == 0) {
889 mlink->dform = FORM_SRC;
890 mlink->dsec = start + 3;
946 mlink->dform = FORM_SRC;
947 mlink->dsec = start + 3;
891 } else if (0 == strncmp(start, "cat", 3)) {
948 } else if (strncmp(start, "cat", 3) == 0) {
892 mlink->dform = FORM_CAT;
893 mlink->dsec = start + 3;
894 }
895
896 start = p;
949 mlink->dform = FORM_CAT;
950 mlink->dsec = start + 3;
951 }
952
953 start = p;
897 if (NULL != mlink->dsec && NULL != (p = strchr(start, '/'))) {
954 if (mlink->dsec != NULL && (p = strchr(start, '/')) != NULL) {
898 *p++ = '\0';
899 mlink->arch = start;
900 start = p;
901 }
902 }
903
904 /*
905 * Now check the file suffix.
906 * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
907 */
908 p = strrchr(start, '\0');
955 *p++ = '\0';
956 mlink->arch = start;
957 start = p;
958 }
959 }
960
961 /*
962 * Now check the file suffix.
963 * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
964 */
965 p = strrchr(start, '\0');
909 while (p-- > start && '/' != *p && '.' != *p)
910 /* Loop. */ ;
966 while (p-- > start && *p != '/' && *p != '.')
967 continue;
911
968
912 if ('.' == *p) {
969 if (*p == '.') {
913 *p++ = '\0';
914 mlink->fsec = p;
915 }
916
917 /*
918 * Now try to parse the name.
919 * Use the filename portion of the path.
920 */
921 mlink->name = start;
970 *p++ = '\0';
971 mlink->fsec = p;
972 }
973
974 /*
975 * Now try to parse the name.
976 * Use the filename portion of the path.
977 */
978 mlink->name = start;
922 if (NULL != (p = strrchr(start, '/'))) {
979 if ((p = strrchr(start, '/')) != NULL) {
923 mlink->name = p + 1;
924 *p = '\0';
925 }
926 mlink_add(mlink, &st);
980 mlink->name = p + 1;
981 *p = '\0';
982 }
983 mlink_add(mlink, &st);
984 free(usefile);
927}
928
929static void
930mlink_add(struct mlink *mlink, const struct stat *st)
931{
932 struct inodev inodev;
933 struct mpage *mpage;
934 unsigned int slot;

--- 246 unchanged lines hidden (view full) ---

1181 mlink = mlink->next;
1182 }
1183
1184 /* Move all links to the target. */
1185
1186 mlink->next = mlink_dest->next;
1187 mlink_dest->next = mpage->mlinks;
1188 mpage->mlinks = NULL;
985}
986
987static void
988mlink_add(struct mlink *mlink, const struct stat *st)
989{
990 struct inodev inodev;
991 struct mpage *mpage;
992 unsigned int slot;

--- 246 unchanged lines hidden (view full) ---

1239 mlink = mlink->next;
1240 }
1241
1242 /* Move all links to the target. */
1243
1244 mlink->next = mlink_dest->next;
1245 mlink_dest->next = mpage->mlinks;
1246 mpage->mlinks = NULL;
1247 goto nextpage;
1189 }
1248 }
1190 goto nextpage;
1191 } else if (meta != NULL && meta->macroset == MACROSET_MDOC) {
1249 meta->macroset = MACROSET_NONE;
1250 }
1251 if (meta != NULL && meta->macroset == MACROSET_MDOC) {
1192 mpage->form = FORM_SRC;
1193 mpage->sec = meta->msec;
1194 mpage->sec = mandoc_strdup(
1195 mpage->sec == NULL ? "" : mpage->sec);
1196 mpage->arch = meta->arch;
1197 mpage->arch = mandoc_strdup(
1198 mpage->arch == NULL ? "" : mpage->arch);
1199 mpage->title = mandoc_strdup(meta->title);
1200 } else if (meta != NULL && meta->macroset == MACROSET_MAN) {
1201 if (*meta->msec != '\0' || *meta->title != '\0') {
1202 mpage->form = FORM_SRC;
1203 mpage->sec = mandoc_strdup(meta->msec);
1204 mpage->arch = mandoc_strdup(mlink->arch);
1205 mpage->title = mandoc_strdup(meta->title);
1206 } else
1207 meta = NULL;
1208 }
1209
1210 assert(mpage->desc == NULL);
1252 mpage->form = FORM_SRC;
1253 mpage->sec = meta->msec;
1254 mpage->sec = mandoc_strdup(
1255 mpage->sec == NULL ? "" : mpage->sec);
1256 mpage->arch = meta->arch;
1257 mpage->arch = mandoc_strdup(
1258 mpage->arch == NULL ? "" : mpage->arch);
1259 mpage->title = mandoc_strdup(meta->title);
1260 } else if (meta != NULL && meta->macroset == MACROSET_MAN) {
1261 if (*meta->msec != '\0' || *meta->title != '\0') {
1262 mpage->form = FORM_SRC;
1263 mpage->sec = mandoc_strdup(meta->msec);
1264 mpage->arch = mandoc_strdup(mlink->arch);
1265 mpage->title = mandoc_strdup(meta->title);
1266 } else
1267 meta = NULL;
1268 }
1269
1270 assert(mpage->desc == NULL);
1211 if (meta == NULL) {
1212 mpage->form = FORM_CAT;
1271 if (meta == NULL || meta->sodest != NULL) {
1213 mpage->sec = mandoc_strdup(mlink->dsec);
1214 mpage->arch = mandoc_strdup(mlink->arch);
1215 mpage->title = mandoc_strdup(mlink->name);
1272 mpage->sec = mandoc_strdup(mlink->dsec);
1273 mpage->arch = mandoc_strdup(mlink->arch);
1274 mpage->title = mandoc_strdup(mlink->name);
1216 parse_cat(mpage, fd);
1275 if (meta == NULL) {
1276 mpage->form = FORM_CAT;
1277 parse_cat(mpage, fd);
1278 } else
1279 mpage->form = FORM_SRC;
1217 } else if (meta->macroset == MACROSET_MDOC)
1218 parse_mdoc(mpage, meta, meta->first);
1219 else
1220 parse_man(mpage, meta, meta->first);
1221 if (mpage->desc == NULL) {
1222 mpage->desc = mandoc_strdup(mlink->name);
1223 if (warnings)
1224 say(mlink->file, "No one-line description, "

--- 1015 unchanged lines hidden (view full) ---

2240}
2241
2242static int
2243set_basedir(const char *targetdir, int report_baddir)
2244{
2245 static char startdir[PATH_MAX];
2246 static int getcwd_status; /* 1 = ok, 2 = failure */
2247 static int chdir_status; /* 1 = changed directory */
1280 } else if (meta->macroset == MACROSET_MDOC)
1281 parse_mdoc(mpage, meta, meta->first);
1282 else
1283 parse_man(mpage, meta, meta->first);
1284 if (mpage->desc == NULL) {
1285 mpage->desc = mandoc_strdup(mlink->name);
1286 if (warnings)
1287 say(mlink->file, "No one-line description, "

--- 1015 unchanged lines hidden (view full) ---

2303}
2304
2305static int
2306set_basedir(const char *targetdir, int report_baddir)
2307{
2308 static char startdir[PATH_MAX];
2309 static int getcwd_status; /* 1 = ok, 2 = failure */
2310 static int chdir_status; /* 1 = changed directory */
2248 char *cp;
2249
2250 /*
2251 * Remember the original working directory, if possible.
2252 * This will be needed if the second or a later directory
2253 * on the command line is given as a relative path.
2254 * Do not error out if the current directory is not
2255 * searchable: Maybe it won't be needed after all.
2256 */
2311
2312 /*
2313 * Remember the original working directory, if possible.
2314 * This will be needed if the second or a later directory
2315 * on the command line is given as a relative path.
2316 * Do not error out if the current directory is not
2317 * searchable: Maybe it won't be needed after all.
2318 */
2257 if (0 == getcwd_status) {
2258 if (NULL == getcwd(startdir, sizeof(startdir))) {
2319 if (getcwd_status == 0) {
2320 if (getcwd(startdir, sizeof(startdir)) == NULL) {
2259 getcwd_status = 2;
2260 (void)strlcpy(startdir, strerror(errno),
2261 sizeof(startdir));
2262 } else
2263 getcwd_status = 1;
2264 }
2265
2266 /*
2267 * We are leaving the old base directory.
2268 * Do not use it any longer, not even for messages.
2269 */
2270 *basedir = '\0';
2321 getcwd_status = 2;
2322 (void)strlcpy(startdir, strerror(errno),
2323 sizeof(startdir));
2324 } else
2325 getcwd_status = 1;
2326 }
2327
2328 /*
2329 * We are leaving the old base directory.
2330 * Do not use it any longer, not even for messages.
2331 */
2332 *basedir = '\0';
2333 basedir_len = 0;
2271
2272 /*
2273 * If and only if the directory was changed earlier and
2274 * the next directory to process is given as a relative path,
2275 * first go back, or bail out if that is impossible.
2276 */
2334
2335 /*
2336 * If and only if the directory was changed earlier and
2337 * the next directory to process is given as a relative path,
2338 * first go back, or bail out if that is impossible.
2339 */
2277 if (chdir_status && '/' != *targetdir) {
2278 if (2 == getcwd_status) {
2340 if (chdir_status && *targetdir != '/') {
2341 if (getcwd_status == 2) {
2279 exitcode = (int)MANDOCLEVEL_SYSERR;
2280 say("", "getcwd: %s", startdir);
2281 return 0;
2282 }
2342 exitcode = (int)MANDOCLEVEL_SYSERR;
2343 say("", "getcwd: %s", startdir);
2344 return 0;
2345 }
2283 if (-1 == chdir(startdir)) {
2346 if (chdir(startdir) == -1) {
2284 exitcode = (int)MANDOCLEVEL_SYSERR;
2285 say("", "&chdir %s", startdir);
2286 return 0;
2287 }
2288 }
2289
2290 /*
2291 * Always resolve basedir to the canonicalized absolute
2292 * pathname and append a trailing slash, such that
2293 * we can reliably check whether files are inside.
2294 */
2347 exitcode = (int)MANDOCLEVEL_SYSERR;
2348 say("", "&chdir %s", startdir);
2349 return 0;
2350 }
2351 }
2352
2353 /*
2354 * Always resolve basedir to the canonicalized absolute
2355 * pathname and append a trailing slash, such that
2356 * we can reliably check whether files are inside.
2357 */
2295 if (NULL == realpath(targetdir, basedir)) {
2358 if (realpath(targetdir, basedir) == NULL) {
2296 if (report_baddir || errno != ENOENT) {
2297 exitcode = (int)MANDOCLEVEL_BADARG;
2298 say("", "&%s: realpath", targetdir);
2299 }
2359 if (report_baddir || errno != ENOENT) {
2360 exitcode = (int)MANDOCLEVEL_BADARG;
2361 say("", "&%s: realpath", targetdir);
2362 }
2363 *basedir = '\0';
2300 return 0;
2364 return 0;
2301 } else if (-1 == chdir(basedir)) {
2365 } else if (chdir(basedir) == -1) {
2302 if (report_baddir || errno != ENOENT) {
2303 exitcode = (int)MANDOCLEVEL_BADARG;
2304 say("", "&chdir");
2305 }
2366 if (report_baddir || errno != ENOENT) {
2367 exitcode = (int)MANDOCLEVEL_BADARG;
2368 say("", "&chdir");
2369 }
2370 *basedir = '\0';
2306 return 0;
2307 }
2308 chdir_status = 1;
2371 return 0;
2372 }
2373 chdir_status = 1;
2309 cp = strchr(basedir, '\0');
2310 if ('/' != cp[-1]) {
2311 if (cp - basedir >= PATH_MAX - 1) {
2374 basedir_len = strlen(basedir);
2375 if (basedir[basedir_len - 1] != '/') {
2376 if (basedir_len >= PATH_MAX - 1) {
2312 exitcode = (int)MANDOCLEVEL_SYSERR;
2313 say("", "Filename too long");
2377 exitcode = (int)MANDOCLEVEL_SYSERR;
2378 say("", "Filename too long");
2379 *basedir = '\0';
2380 basedir_len = 0;
2314 return 0;
2315 }
2381 return 0;
2382 }
2316 *cp++ = '/';
2317 *cp = '\0';
2383 basedir[basedir_len++] = '/';
2384 basedir[basedir_len] = '\0';
2318 }
2319 return 1;
2320}
2321
2385 }
2386 return 1;
2387}
2388
2389#ifdef READ_ALLOWED_PATH
2390static int
2391read_allowed(const char *candidate)
2392{
2393 const char *cp;
2394 size_t len;
2395
2396 for (cp = READ_ALLOWED_PATH;; cp += len) {
2397 while (*cp == ':')
2398 cp++;
2399 if (*cp == '\0')
2400 return 0;
2401 len = strcspn(cp, ":");
2402 if (strncmp(candidate, cp, len) == 0)
2403 return 1;
2404 }
2405}
2406#endif
2407
2322static void
2323say(const char *file, const char *format, ...)
2324{
2325 va_list ap;
2326 int use_errno;
2327
2408static void
2409say(const char *file, const char *format, ...)
2410{
2411 va_list ap;
2412 int use_errno;
2413
2328 if ('\0' != *basedir)
2414 if (*basedir != '\0')
2329 fprintf(stderr, "%s", basedir);
2415 fprintf(stderr, "%s", basedir);
2330 if ('\0' != *basedir && '\0' != *file)
2416 if (*basedir != '\0' && *file != '\0')
2331 fputc('/', stderr);
2417 fputc('/', stderr);
2332 if ('\0' != *file)
2418 if (*file != '\0')
2333 fprintf(stderr, "%s", file);
2334
2335 use_errno = 1;
2419 fprintf(stderr, "%s", file);
2420
2421 use_errno = 1;
2336 if (NULL != format) {
2422 if (format != NULL) {
2337 switch (*format) {
2338 case '&':
2339 format++;
2340 break;
2341 case '\0':
2342 format = NULL;
2343 break;
2344 default:
2345 use_errno = 0;
2346 break;
2347 }
2348 }
2423 switch (*format) {
2424 case '&':
2425 format++;
2426 break;
2427 case '\0':
2428 format = NULL;
2429 break;
2430 default:
2431 use_errno = 0;
2432 break;
2433 }
2434 }
2349 if (NULL != format) {
2350 if ('\0' != *basedir || '\0' != *file)
2435 if (format != NULL) {
2436 if (*basedir != '\0' || *file != '\0')
2351 fputs(": ", stderr);
2352 va_start(ap, format);
2353 vfprintf(stderr, format, ap);
2354 va_end(ap);
2355 }
2356 if (use_errno) {
2437 fputs(": ", stderr);
2438 va_start(ap, format);
2439 vfprintf(stderr, format, ap);
2440 va_end(ap);
2441 }
2442 if (use_errno) {
2357 if ('\0' != *basedir || '\0' != *file || NULL != format)
2443 if (*basedir != '\0' || *file != '\0' || format != NULL)
2358 fputs(": ", stderr);
2359 perror(NULL);
2360 } else
2361 fputc('\n', stderr);
2362}
2444 fputs(": ", stderr);
2445 perror(NULL);
2446 } else
2447 fputc('\n', stderr);
2448}