xref: /freebsd/contrib/mandoc/catman.c (revision 06410c1b51637e5e1f392d553b5008948af58014)
1 /* $Id: catman.c,v 1.30 2025/07/13 14:15:26 schwarze Exp $ */
2 /*
3  * Copyright (c) 2017, 2025 Ingo Schwarze <schwarze@openbsd.org>
4  * Copyright (c) 2017 Michael Stapelberg <stapelberg@debian.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 #include "config.h"
19 
20 #if NEED_XPG4_2
21 #define _XPG4_2
22 #endif
23 
24 #include <sys/types.h>
25 #include <sys/socket.h>
26 #include <sys/stat.h>
27 
28 #include <assert.h>
29 #if HAVE_ERR
30 #include <err.h>
31 #endif
32 #include <errno.h>
33 #include <fcntl.h>
34 #if HAVE_FTS
35 #include <fts.h>
36 #else
37 #include "compat_fts.h"
38 #endif
39 #include <signal.h>
40 #include <stdint.h>
41 #include <stdio.h>
42 #include <stdlib.h>
43 #include <string.h>
44 #include <time.h>
45 #include <unistd.h>
46 
47 int		verbose_flag = 0;
48 sig_atomic_t	got_signal = 0;
49 
50 int	 process_manpage(int, int, const char *);
51 int	 process_tree(int, int);
52 void	 run_mandocd(int, const char *, const char *)
53 		__attribute__((__noreturn__));
54 void	 signal_handler(int);
55 ssize_t	 sock_fd_write(int, int, int, int);
56 void	 usage(void) __attribute__((__noreturn__));
57 
58 
59 void
60 signal_handler(int signum)
61 {
62 	got_signal = signum;
63 }
64 
65 void
66 run_mandocd(int sockfd, const char *outtype, const char* defos)
67 {
68 	char	 sockfdstr[10];
69 	int	 len;
70 
71 	len = snprintf(sockfdstr, sizeof(sockfdstr), "%d", sockfd);
72 	if (len >= (int)sizeof(sockfdstr)) {
73 		errno = EOVERFLOW;
74 		len = -1;
75 	}
76 	if (len < 0)
77 		err(1, "snprintf");
78 	if (defos == NULL)
79 		execlp("mandocd", "mandocd", "-T", outtype,
80 		    sockfdstr, (char *)NULL);
81 	else
82 		execlp("mandocd", "mandocd", "-T", outtype,
83 		    "-I", defos, sockfdstr, (char *)NULL);
84 	err(1, "exec(mandocd)");
85 }
86 
87 ssize_t
88 sock_fd_write(int fd, int fd0, int fd1, int fd2)
89 {
90 	const struct timespec timeout = { 0, 10000000 };  /* 0.01 s */
91 	struct msghdr	 msg;
92 	struct iovec	 iov;
93 	union {
94 		struct cmsghdr	 cmsghdr;
95 		char		 control[CMSG_SPACE(3 * sizeof(int))];
96 	} cmsgu;
97 	struct cmsghdr	*cmsg;
98 	int		*walk;
99 	ssize_t		 sz;
100 	unsigned char	 dummy[1] = {'\0'};
101 
102 	iov.iov_base = dummy;
103 	iov.iov_len = sizeof(dummy);
104 
105 	msg.msg_name = NULL;
106 	msg.msg_namelen = 0;
107 	msg.msg_iov = &iov;
108 	msg.msg_iovlen = 1;
109 
110 	msg.msg_control = cmsgu.control;
111 	msg.msg_controllen = sizeof(cmsgu.control);
112 
113 	cmsg = CMSG_FIRSTHDR(&msg);
114 	cmsg->cmsg_len = CMSG_LEN(3 * sizeof(int));
115 	cmsg->cmsg_level = SOL_SOCKET;
116 	cmsg->cmsg_type = SCM_RIGHTS;
117 
118 	walk = (int *)CMSG_DATA(cmsg);
119 	*(walk++) = fd0;
120 	*(walk++) = fd1;
121 	*(walk++) = fd2;
122 
123 	/*
124 	 * It appears that on some systems, sendmsg(3)
125 	 * may return EAGAIN even in blocking mode.
126 	 * Seen for example on Oracle Solaris 11.2.
127 	 * The sleeping time was chosen by experimentation,
128 	 * to neither cause more than a handful of retries
129 	 * in normal operation nor unnecessary delays.
130 	 */
131 	while ((sz = sendmsg(fd, &msg, 0)) == -1) {
132 		if (errno != EAGAIN) {
133 			warn("FATAL: sendmsg");
134 			break;
135 		}
136 		nanosleep(&timeout, NULL);
137 	}
138 	return sz;
139 }
140 
141 int
142 process_manpage(int srv_fd, int dstdir_fd, const char *path)
143 {
144 	int	 in_fd, out_fd;
145 	int	 irc;
146 
147 	if ((in_fd = open(path, O_RDONLY)) == -1) {
148 		warn("open %s for reading", path);
149 		fflush(stderr);
150 		return 0;
151 	}
152 
153 	if ((out_fd = openat(dstdir_fd, path,
154 	    O_WRONLY | O_NOFOLLOW | O_CREAT | O_TRUNC,
155 	    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) == -1) {
156 		warn("openat %s for writing", path);
157 		fflush(stderr);
158 		close(in_fd);
159 		return 0;
160 	}
161 
162 	irc = sock_fd_write(srv_fd, in_fd, out_fd, STDERR_FILENO);
163 
164 	close(in_fd);
165 	close(out_fd);
166 
167 	return irc;
168 }
169 
170 int
171 process_tree(int srv_fd, int dstdir_fd)
172 {
173 	const struct timespec timeout = { 0, 10000000 };  /* 0.01 s */
174 	const int	 max_inflight = 16;
175 
176 	FTS		*ftsp;
177 	FTSENT		*entry;
178 	const char	*argv[2];
179 	const char	*path;
180 	int		 inflight, irc, decr, fatal;
181 	int		 gooddirs, baddirs, goodfiles, badfiles;
182 	char		 dummy[1];
183 
184 	argv[0] = ".";
185 	argv[1] = (char *)NULL;
186 
187 	if ((ftsp = fts_open((char * const *)argv,
188 	    FTS_PHYSICAL | FTS_NOCHDIR, NULL)) == NULL) {
189 		warn("fts_open");
190 		return -1;
191 	}
192 
193 	if (verbose_flag >= 2) {
194 		warnx("allowing up to %d files in flight", max_inflight);
195 		fflush(stderr);
196 	}
197 	inflight = fatal = gooddirs = baddirs = goodfiles = badfiles = 0;
198 	while (fatal == 0 && got_signal == 0 &&
199 	    (entry = fts_read(ftsp)) != NULL) {
200 		if (inflight >= max_inflight) {
201 			while (recv(srv_fd, dummy, sizeof(dummy), 0) == -1) {
202 				if (errno != EAGAIN) {
203 					warn("FATAL: recv");
204 					fatal = errno;
205 					break;
206 				}
207 				nanosleep(&timeout, NULL);
208 			}
209 			if (fatal != 0)
210 				break;
211 			decr = 1;
212 			while ((irc = recv(srv_fd, dummy, sizeof(dummy),
213 			    MSG_DONTWAIT)) > 0)
214 				decr++;
215 			assert(inflight >= decr);
216 			if (verbose_flag >= 2 && decr > 1) {
217 				warnx("files in flight: %d - %d = %d",
218 				    inflight, decr, inflight - decr);
219 				fflush(stderr);
220 			}
221 			inflight -= decr;
222 			if (irc == 0) {
223 				errno = ECONNRESET;
224 				inflight = -1;
225 			}
226 			if (errno != EAGAIN) {
227 				warn("FATAL: recv");
228 				fatal = errno;
229 				break;
230 			}
231 		}
232 		path = entry->fts_path + 2;
233 		switch (entry->fts_info) {
234 		case FTS_F:
235 			switch (process_manpage(srv_fd, dstdir_fd, path)) {
236 			case -1:
237 				fatal = errno;
238 				break;
239 			case 0:
240 				badfiles++;
241 				break;
242 			default:
243 				goodfiles++;
244 				inflight++;
245 				break;
246 			}
247 			break;
248 		case FTS_D:
249 			if (*path != '\0' &&
250 			    mkdirat(dstdir_fd, path, S_IRWXU | S_IRGRP |
251 			      S_IXGRP | S_IROTH | S_IXOTH) == -1 &&
252 			    errno != EEXIST) {
253 				warn("mkdirat %s", path);
254 				fflush(stderr);
255 				(void)fts_set(ftsp, entry, FTS_SKIP);
256 				baddirs++;
257 			} else
258 				gooddirs++;
259 			break;
260 		case FTS_DP:
261 			break;
262 		case FTS_DNR:
263 			warnx("directory %s unreadable: %s",
264 			    path, strerror(entry->fts_errno));
265 			fflush(stderr);
266 			baddirs++;
267 			break;
268 		case FTS_DC:
269 			warnx("directory %s causes cycle", path);
270 			fflush(stderr);
271 			baddirs++;
272 			break;
273 		case FTS_ERR:
274 		case FTS_NS:
275 			warnx("file %s: %s",
276 			    path, strerror(entry->fts_errno));
277 			fflush(stderr);
278 			badfiles++;
279 			break;
280 		default:
281 			warnx("file %s: not a regular file", path);
282 			fflush(stderr);
283 			badfiles++;
284 			break;
285 		}
286 	}
287 	if (got_signal != 0) {
288 		switch (got_signal) {
289 		case SIGCHLD:
290 			warnx("FATAL: mandocd child died: got SIGCHLD");
291 			break;
292 		case SIGPIPE:
293 			warnx("FATAL: mandocd child died: got SIGPIPE");
294 			break;
295 		default:
296 			warnx("FATAL: signal SIG%s", sys_signame[got_signal]);
297 			break;
298 		}
299 		inflight = -1;
300 		fatal = 1;
301 	} else if (fatal == 0 && (fatal = errno) != 0)
302 		warn("FATAL: fts_read");
303 
304 	fts_close(ftsp);
305 	if (verbose_flag >= 2 && inflight > 0) {
306 		warnx("waiting for %d files in flight", inflight);
307 		fflush(stderr);
308 	}
309 	while (inflight > 0) {
310 		irc = recv(srv_fd, dummy, sizeof(dummy), 0);
311 		if (irc > 0)
312 			inflight--;
313 		else if (irc == -1 && errno == EAGAIN)
314 			nanosleep(&timeout, NULL);
315 		else {
316 			if (irc == 0)
317 				errno = ECONNRESET;
318 			warn("recv");
319 			inflight = -1;
320 		}
321 	}
322 	if (verbose_flag)
323 		warnx("processed %d files in %d directories",
324 		    goodfiles, gooddirs);
325 	if (baddirs > 0)
326 		warnx("skipped %d %s due to errors", baddirs,
327 		    baddirs == 1 ? "directory" : "directories");
328 	if (badfiles > 0)
329 		warnx("skipped %d %s due to errors", badfiles,
330 		    badfiles == 1 ? "file" : "files");
331 	if (fatal != 0) {
332 		warnx("processing aborted due to fatal error, "
333 		    "results are probably incomplete");
334 		inflight = -1;
335 	}
336 	return inflight;
337 }
338 
339 int
340 main(int argc, char **argv)
341 {
342 	struct sigaction sa;
343 	const char	*defos, *outtype;
344 	int		 srv_fds[2];
345 	int		 dstdir_fd;
346 	int		 opt;
347 	pid_t		 pid;
348 
349 	defos = NULL;
350 	outtype = "ascii";
351 	while ((opt = getopt(argc, argv, "I:T:v")) != -1) {
352 		switch (opt) {
353 		case 'I':
354 			defos = optarg;
355 			break;
356 		case 'T':
357 			outtype = optarg;
358 			break;
359 		case 'v':
360 			verbose_flag += 1;
361 			break;
362 		default:
363 			usage();
364 		}
365 	}
366 
367 	if (argc > 0) {
368 		argc -= optind;
369 		argv += optind;
370 	}
371 	if (argc != 2) {
372 		switch (argc) {
373 		case 0:
374 			warnx("missing arguments: srcdir and dstdir");
375 			break;
376 		case 1:
377 			warnx("missing argument: dstdir");
378 			break;
379 		default:
380 			warnx("too many arguments: %s", argv[2]);
381 			break;
382 		}
383 		usage();
384 	}
385 
386 	memset(&sa, 0, sizeof(sa));
387 	sa.sa_handler = &signal_handler;
388 	sa.sa_flags = SA_NOCLDWAIT;
389 	if (sigfillset(&sa.sa_mask) == -1)
390 		err(1, "sigfillset");
391 	if (sigaction(SIGHUP, &sa, NULL) == -1)
392 		err(1, "sigaction(SIGHUP)");
393 	if (sigaction(SIGINT, &sa, NULL) == -1)
394 		err(1, "sigaction(SIGINT)");
395 	if (sigaction(SIGPIPE, &sa, NULL) == -1)
396 		err(1, "sigaction(SIGPIPE)");
397 	if (sigaction(SIGTERM, &sa, NULL) == -1)
398 		err(1, "sigaction(SIGTERM)");
399 	if (sigaction(SIGCHLD, &sa, NULL) == -1)
400 		err(1, "sigaction(SIGCHLD)");
401 
402 	if (socketpair(AF_LOCAL, SOCK_STREAM, AF_UNSPEC, srv_fds) == -1)
403 		err(1, "socketpair");
404 
405 	pid = fork();
406 	switch (pid) {
407 	case -1:
408 		err(1, "fork");
409 	case 0:
410 		close(srv_fds[0]);
411 		run_mandocd(srv_fds[1], outtype, defos);
412 	default:
413 		break;
414 	}
415 	close(srv_fds[1]);
416 
417 	if ((dstdir_fd = open(argv[1], O_RDONLY | O_DIRECTORY)) == -1) {
418 		if (errno != ENOENT)
419 			err(1, "open destination %s", argv[1]);
420 		if (mkdir(argv[1], S_IRWXU |
421 		    S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) == -1)
422 			err(1, "mkdir destination %s", argv[1]);
423 		if ((dstdir_fd = open(argv[1], O_RDONLY | O_DIRECTORY)) == -1)
424 			err(1, "open destination %s", argv[1]);
425 	}
426 
427 	if (chdir(argv[0]) == -1)
428 		err(1, "chdir to source %s", argv[0]);
429 
430 	return process_tree(srv_fds[0], dstdir_fd) == -1 ? 1 : 0;
431 }
432 
433 void
434 usage(void)
435 {
436 	fprintf(stderr, "usage: %s [-I os=name] [-T output] "
437 	    "srcdir dstdir\n", BINM_CATMAN);
438 	exit(1);
439 }
440