xref: /freebsd/contrib/ntp/libntp/work_fork.c (revision 13ec1e3155c7e9bf037b12af186351b7fa9b9450)
1 /*
2  * work_fork.c - fork implementation for blocking worker child.
3  */
4 #include <config.h>
5 #include "ntp_workimpl.h"
6 
7 #ifdef WORK_FORK
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 #include <sys/wait.h>
12 
13 #include "iosignal.h"
14 #include "ntp_stdlib.h"
15 #include "ntp_malloc.h"
16 #include "ntp_syslog.h"
17 #include "ntpd.h"
18 #include "ntp_io.h"
19 #include "ntp_assert.h"
20 #include "ntp_unixtime.h"
21 #include "ntp_worker.h"
22 
23 /* === variables === */
24 	int			worker_process;
25 	addremove_io_fd_func	addremove_io_fd;
26 static	volatile int		worker_sighup_received;
27 int	saved_argc = 0;
28 char	**saved_argv;
29 
30 /* === function prototypes === */
31 static	void		fork_blocking_child(blocking_child *);
32 static	RETSIGTYPE	worker_sighup(int);
33 static	void		send_worker_home_atexit(void);
34 static	void		cleanup_after_child(blocking_child *);
35 
36 /* === I/O helpers === */
37 /* Since we have signals enabled, there's a good chance that blocking IO
38  * via pipe suffers from EINTR -- and this goes for both directions.
39  * The next two wrappers will loop until either all the data is written
40  * or read, plus handling the EOF condition on read. They may return
41  * zero if no data was transferred at all, and effectively every return
42  * value that differs from the given transfer length signifies an error
43  * condition.
44  */
45 
46 static size_t
47 netread(
48 	int		fd,
49 	void *		vb,
50 	size_t		l
51 	)
52 {
53 	char *		b = vb;
54 	ssize_t		r;
55 
56 	while (l) {
57 		r = read(fd, b, l);
58 		if (r > 0) {
59 			l -= r;
60 			b += r;
61 		} else if (r == 0 || errno != EINTR) {
62 			l = 0;
63 		}
64 	}
65 	return (size_t)(b - (char *)vb);
66 }
67 
68 
69 static size_t
70 netwrite(
71 	int		fd,
72 	const void *	vb,
73 	size_t		l
74 	)
75 {
76 	const char *	b = vb;
77 	ssize_t		w;
78 
79 	while (l) {
80 		w = write(fd, b, l);
81 		if (w > 0) {
82 			l -= w;
83 			b += w;
84 		} else if (errno != EINTR) {
85 			l = 0;
86 		}
87 	}
88 	return (size_t)(b - (const char *)vb);
89 }
90 
91 
92 #if defined(HAVE_DROPROOT)
93 extern int set_user_group_ids(void);
94 #endif
95 
96 /* === functions === */
97 /*
98  * exit_worker()
99  *
100  * On some systems _exit() is preferred to exit() for forked children.
101  * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
102  * recommends _exit() to avoid double-flushing C runtime stream buffers
103  * and also to avoid calling the parent's atexit() routines in the
104  * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
105  * bypasses CRT cleanup, fflush() files we know might have output
106  * buffered.
107  */
108 void
109 exit_worker(
110 	int	exitcode
111 	)
112 {
113 	if (syslog_file != NULL)
114 		fflush(syslog_file);
115 	fflush(stdout);
116 	fflush(stderr);
117 	WORKER_CHILD_EXIT (exitcode);	/* space before ( required */
118 }
119 
120 
121 static RETSIGTYPE
122 worker_sighup(
123 	int sig
124 	)
125 {
126 	if (SIGHUP == sig)
127 		worker_sighup_received = 1;
128 }
129 
130 
131 int
132 worker_sleep(
133 	blocking_child *	c,
134 	time_t			seconds
135 	)
136 {
137 	u_int sleep_remain;
138 
139 	sleep_remain = (u_int)seconds;
140 	do {
141 		if (!worker_sighup_received)
142 			sleep_remain = sleep(sleep_remain);
143 		if (worker_sighup_received) {
144 			TRACE(1, ("worker SIGHUP with %us left to sleep",
145 				  sleep_remain));
146 			worker_sighup_received = 0;
147 			return -1;
148 		}
149 	} while (sleep_remain);
150 
151 	return 0;
152 }
153 
154 
155 void
156 interrupt_worker_sleep(void)
157 {
158 	u_int			idx;
159 	blocking_child *	c;
160 	int			rc;
161 
162 	for (idx = 0; idx < blocking_children_alloc; idx++) {
163 		c = blocking_children[idx];
164 
165 		if (NULL == c || c->reusable == TRUE)
166 			continue;
167 
168 		rc = kill(c->pid, SIGHUP);
169 		if (rc < 0)
170 			msyslog(LOG_ERR,
171 				"Unable to signal HUP to wake child pid %d: %m",
172 				c->pid);
173 	}
174 }
175 
176 
177 /*
178  * harvest_child_status() runs in the parent.
179  *
180  * Note the error handling -- this is an interaction with SIGCHLD.
181  * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
182  * automatically. Since we're not really interested in the result code,
183  * we simply ignore the error.
184  */
185 static void
186 harvest_child_status(
187 	blocking_child *	c
188 	)
189 {
190 	if (c->pid) {
191 		/* Wait on the child so it can finish terminating */
192 		if (waitpid(c->pid, NULL, 0) == c->pid)
193 			TRACE(4, ("harvested child %d\n", c->pid));
194 		else if (errno != ECHILD)
195 			msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
196 		c->pid = 0;
197 	}
198 }
199 
200 /*
201  * req_child_exit() runs in the parent.
202  */
203 int
204 req_child_exit(
205 	blocking_child *	c
206 	)
207 {
208 	if (-1 != c->req_write_pipe) {
209 		close(c->req_write_pipe);
210 		c->req_write_pipe = -1;
211 		return 0;
212 	}
213 	/* Closing the pipe forces the child to exit */
214 	harvest_child_status(c);
215 	return -1;
216 }
217 
218 
219 /*
220  * cleanup_after_child() runs in parent.
221  */
222 static void
223 cleanup_after_child(
224 	blocking_child *	c
225 	)
226 {
227 	harvest_child_status(c);
228 	if (-1 != c->resp_read_pipe) {
229 		(*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
230 		close(c->resp_read_pipe);
231 		c->resp_read_pipe = -1;
232 	}
233 	c->resp_read_ctx = NULL;
234 	DEBUG_INSIST(-1 == c->req_read_pipe);
235 	DEBUG_INSIST(-1 == c->resp_write_pipe);
236 	c->reusable = TRUE;
237 }
238 
239 
240 static void
241 send_worker_home_atexit(void)
242 {
243 	u_int			idx;
244 	blocking_child *	c;
245 
246 	if (worker_process)
247 		return;
248 
249 	for (idx = 0; idx < blocking_children_alloc; idx++) {
250 		c = blocking_children[idx];
251 		if (NULL == c)
252 			continue;
253 		req_child_exit(c);
254 	}
255 }
256 
257 
258 int
259 send_blocking_req_internal(
260 	blocking_child *	c,
261 	blocking_pipe_header *	hdr,
262 	void *			data
263 	)
264 {
265 	size_t	octets;
266 	size_t	rc;
267 
268 	DEBUG_REQUIRE(hdr != NULL);
269 	DEBUG_REQUIRE(data != NULL);
270 	DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
271 
272 	if (-1 == c->req_write_pipe) {
273 		fork_blocking_child(c);
274 		DEBUG_INSIST(-1 != c->req_write_pipe);
275 	}
276 
277 	octets = sizeof(*hdr);
278 	rc = netwrite(c->req_write_pipe, hdr, octets);
279 
280 	if (rc == octets) {
281 		octets = hdr->octets - sizeof(*hdr);
282 		rc = netwrite(c->req_write_pipe, data, octets);
283 		if (rc == octets)
284 			return 0;
285 	}
286 
287 	msyslog(LOG_ERR,
288 		"send_blocking_req_internal: short write (%zu of %zu), %m",
289 		rc, octets);
290 
291 	/* Fatal error.  Clean up the child process.  */
292 	req_child_exit(c);
293 	exit(1);	/* otherwise would be return -1 */
294 }
295 
296 
297 blocking_pipe_header *
298 receive_blocking_req_internal(
299 	blocking_child *	c
300 	)
301 {
302 	blocking_pipe_header	hdr;
303 	blocking_pipe_header *	req;
304 	size_t			rc;
305 	size_t			octets;
306 
307 	DEBUG_REQUIRE(-1 != c->req_read_pipe);
308 
309 	req = NULL;
310 	rc = netread(c->req_read_pipe, &hdr, sizeof(hdr));
311 
312 	if (0 == rc) {
313 		TRACE(4, ("parent closed request pipe, child %d terminating\n",
314 			  c->pid));
315 	} else if (rc != sizeof(hdr)) {
316 		msyslog(LOG_ERR,
317 			"receive_blocking_req_internal: short header read (%zu of %zu), %m",
318 			rc, sizeof(hdr));
319 	} else {
320 		INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
321 		req = emalloc(hdr.octets);
322 		memcpy(req, &hdr, sizeof(*req));
323 		octets = hdr.octets - sizeof(hdr);
324 		rc = netread(c->req_read_pipe, (char *)(req + 1),
325 			     octets);
326 
327 		if (rc != octets)
328 			msyslog(LOG_ERR,
329 				"receive_blocking_req_internal: short read (%zu of %zu), %m",
330 				rc, octets);
331 		else if (BLOCKING_REQ_MAGIC != req->magic_sig)
332 			msyslog(LOG_ERR,
333 				"receive_blocking_req_internal: packet header mismatch (0x%x)",
334 				req->magic_sig);
335 		else
336 			return req;
337 	}
338 
339 	if (req != NULL)
340 		free(req);
341 
342 	return NULL;
343 }
344 
345 
346 int
347 send_blocking_resp_internal(
348 	blocking_child *	c,
349 	blocking_pipe_header *	resp
350 	)
351 {
352 	size_t	octets;
353 	size_t	rc;
354 
355 	DEBUG_REQUIRE(-1 != c->resp_write_pipe);
356 
357 	octets = resp->octets;
358 	rc = netwrite(c->resp_write_pipe, resp, octets);
359 	free(resp);
360 
361 	if (octets == rc)
362 		return 0;
363 
364 	TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n",
365 		  rc, octets));
366 	return -1;
367 }
368 
369 
370 blocking_pipe_header *
371 receive_blocking_resp_internal(
372 	blocking_child *	c
373 	)
374 {
375 	blocking_pipe_header	hdr;
376 	blocking_pipe_header *	resp;
377 	size_t			rc;
378 	size_t			octets;
379 
380 	DEBUG_REQUIRE(c->resp_read_pipe != -1);
381 
382 	resp = NULL;
383 	rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr));
384 
385 	if (0 == rc) {
386 		/* this is the normal child exited indication */
387 	} else if (rc != sizeof(hdr)) {
388 		TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n",
389 			  rc, sizeof(hdr)));
390 	} else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
391 		TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
392 			  hdr.magic_sig));
393 	} else {
394 		INSIST(sizeof(hdr) < hdr.octets &&
395 		       hdr.octets < 16 * 1024);
396 		resp = emalloc(hdr.octets);
397 		memcpy(resp, &hdr, sizeof(*resp));
398 		octets = hdr.octets - sizeof(hdr);
399 		rc = netread(c->resp_read_pipe, (char *)(resp + 1),
400 			     octets);
401 
402 		if (rc != octets)
403 			TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n",
404 				  rc, octets));
405 		else
406 			return resp;
407 	}
408 
409 	cleanup_after_child(c);
410 
411 	if (resp != NULL)
412 		free(resp);
413 
414 	return NULL;
415 }
416 
417 
418 #if defined(HAVE_DROPROOT) && defined(WORK_FORK)
419 void
420 fork_deferred_worker(void)
421 {
422 	u_int			idx;
423 	blocking_child *	c;
424 
425 	REQUIRE(droproot && root_dropped);
426 
427 	for (idx = 0; idx < blocking_children_alloc; idx++) {
428 		c = blocking_children[idx];
429 		if (NULL == c)
430 			continue;
431 		if (-1 != c->req_write_pipe && 0 == c->pid)
432 			fork_blocking_child(c);
433 	}
434 }
435 #endif
436 
437 
438 static void
439 fork_blocking_child(
440 	blocking_child *	c
441 	)
442 {
443 	static int	atexit_installed;
444 	static int	blocking_pipes[4] = { -1, -1, -1, -1 };
445 	int		rc;
446 	int		was_pipe;
447 	int		is_pipe;
448 	int		saved_errno = 0;
449 	int		childpid;
450 	int		keep_fd;
451 	int		fd;
452 
453 	/*
454 	 * parent and child communicate via a pair of pipes.
455 	 *
456 	 * 0 child read request
457 	 * 1 parent write request
458 	 * 2 parent read response
459 	 * 3 child write response
460 	 */
461 	if (-1 == c->req_write_pipe) {
462 		rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
463 		if (0 != rc) {
464 			saved_errno = errno;
465 		} else {
466 			rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
467 			if (0 != rc) {
468 				saved_errno = errno;
469 				close(blocking_pipes[0]);
470 				close(blocking_pipes[1]);
471 			} else {
472 				INSIST(was_pipe == is_pipe);
473 			}
474 		}
475 		if (0 != rc) {
476 			errno = saved_errno;
477 			msyslog(LOG_ERR, "unable to create worker pipes: %m");
478 			exit(1);
479 		}
480 
481 		/*
482 		 * Move the descriptors the parent will keep open out of the
483 		 * low descriptors preferred by C runtime buffered FILE *.
484 		 */
485 		c->req_write_pipe = move_fd(blocking_pipes[1]);
486 		c->resp_read_pipe = move_fd(blocking_pipes[2]);
487 		/*
488 		 * wake any worker child on orderly shutdown of the
489 		 * daemon so that it can notice the broken pipes and
490 		 * go away promptly.
491 		 */
492 		if (!atexit_installed) {
493 			atexit(&send_worker_home_atexit);
494 			atexit_installed = TRUE;
495 		}
496 	}
497 
498 #if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
499 	/* defer the fork until after root is dropped */
500 	if (droproot && !root_dropped)
501 		return;
502 #endif
503 	if (syslog_file != NULL)
504 		fflush(syslog_file);
505 	fflush(stdout);
506 	fflush(stderr);
507 
508 	/* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
509 	 * or undefined effects. We don't do it and leave SIGCHLD alone.
510 	 */
511 	/* signal_no_reset(SIGCHLD, SIG_IGN); */
512 
513 	childpid = fork();
514 	if (-1 == childpid) {
515 		msyslog(LOG_ERR, "unable to fork worker: %m");
516 		exit(1);
517 	}
518 
519 	if (childpid) {
520 		/* this is the parent */
521 		TRACE(1, ("forked worker child (pid %d)\n", childpid));
522 		c->pid = childpid;
523 		c->ispipe = is_pipe;
524 
525 		/* close the child's pipe descriptors. */
526 		close(blocking_pipes[0]);
527 		close(blocking_pipes[3]);
528 
529 		memset(blocking_pipes, -1, sizeof(blocking_pipes));
530 
531 		/* wire into I/O loop */
532 		(*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
533 
534 		return;		/* parent returns */
535 	}
536 
537 	/*
538 	 * The parent gets the child pid as the return value of fork().
539 	 * The child must work for it.
540 	 */
541 	c->pid = getpid();
542 	worker_process = TRUE;
543 
544 	/*
545 	 * Change the process name of the child to avoid confusion
546 	 * about ntpd trunning twice.
547 	 */
548 	if (saved_argc != 0) {
549 		int argcc;
550 		int argvlen = 0;
551 		/* Clear argv */
552 		for (argcc = 0; argcc < saved_argc; argcc++) {
553 			int l = strlen(saved_argv[argcc]);
554 			argvlen += l + 1;
555 			memset(saved_argv[argcc], 0, l);
556 		}
557 		strlcpy(saved_argv[0], "ntpd: asynchronous dns resolver", argvlen);
558 	}
559 
560 	/*
561 	 * In the child, close all files except stdin, stdout, stderr,
562 	 * and the two child ends of the pipes.
563 	 */
564 	DEBUG_INSIST(-1 == c->req_read_pipe);
565 	DEBUG_INSIST(-1 == c->resp_write_pipe);
566 	c->req_read_pipe = blocking_pipes[0];
567 	c->resp_write_pipe = blocking_pipes[3];
568 
569 	kill_asyncio(0);
570 	closelog();
571 	if (syslog_file != NULL) {
572 		fclose(syslog_file);
573 		syslog_file = NULL;
574 		syslogit = TRUE;
575 	}
576 	keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
577 	for (fd = 3; fd < keep_fd; fd++)
578 		if (fd != c->req_read_pipe &&
579 		    fd != c->resp_write_pipe)
580 			close(fd);
581 	close_all_beyond(keep_fd);
582 	/*
583 	 * We get signals from refclock serial I/O on NetBSD in the
584 	 * worker if we do not reset SIGIO's handler to the default.
585 	 * It is not conditionalized for NetBSD alone because on
586 	 * systems where it is not needed, it is harmless, and that
587 	 * allows us to handle unknown others with NetBSD behavior.
588 	 * [Bug 1386]
589 	 */
590 #if defined(USE_SIGIO)
591 	signal_no_reset(SIGIO, SIG_DFL);
592 #elif defined(USE_SIGPOLL)
593 	signal_no_reset(SIGPOLL, SIG_DFL);
594 #endif
595 	signal_no_reset(SIGHUP, worker_sighup);
596 	init_logging("ntp_intres", 0, FALSE);
597 	setup_logfile(NULL);
598 
599 #ifdef HAVE_DROPROOT
600 	(void) set_user_group_ids();
601 #endif
602 
603 	/*
604 	 * And now back to the portable code
605 	 */
606 	exit_worker(blocking_child_common(c));
607 }
608 
609 
610 void worker_global_lock(int inOrOut)
611 {
612 	(void)inOrOut;
613 }
614 
615 #else	/* !WORK_FORK follows */
616 char work_fork_nonempty_compilation_unit;
617 #endif
618