xref: /freebsd/contrib/ntp/libntp/work_fork.c (revision 4928135658a9d0eaee37003df6137ab363fcb0b4)
1 /*
2  * work_fork.c - fork implementation for blocking worker child.
3  */
4 #include <config.h>
5 #include "ntp_workimpl.h"
6 
7 #ifdef WORK_FORK
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 #include <sys/wait.h>
12 
13 #include "iosignal.h"
14 #include "ntp_stdlib.h"
15 #include "ntp_malloc.h"
16 #include "ntp_syslog.h"
17 #include "ntpd.h"
18 #include "ntp_io.h"
19 #include "ntp_assert.h"
20 #include "ntp_unixtime.h"
21 #include "ntp_worker.h"
22 
23 /* === variables === */
24 	int			worker_process;
25 	addremove_io_fd_func	addremove_io_fd;
26 static	volatile int		worker_sighup_received;
27 int	saved_argc = 0;
28 char	**saved_argv;
29 
30 /* === function prototypes === */
31 static	void		fork_blocking_child(blocking_child *);
32 static	RETSIGTYPE	worker_sighup(int);
33 static	void		send_worker_home_atexit(void);
34 static	void		cleanup_after_child(blocking_child *);
35 
36 /* === I/O helpers === */
37 /* Since we have signals enabled, there's a good chance that blocking IO
38  * via pipe suffers from EINTR -- and this goes for both directions.
39  * The next two wrappers will loop until either all the data is written
40  * or read, plus handling the EOF condition on read. They may return
41  * zero if no data was transferred at all, and effectively every return
42  * value that differs from the given transfer length signifies an error
43  * condition.
44  */
45 
46 static size_t
47 netread(
48 	int		fd,
49 	void *		vb,
50 	size_t		l
51 	)
52 {
53 	char *		b = vb;
54 	ssize_t		r;
55 
56 	while (l) {
57 		r = read(fd, b, l);
58 		if (r > 0) {
59 			l -= r;
60 			b += r;
61 		} else if (r == 0 || errno != EINTR) {
62 			l = 0;
63 		}
64 	}
65 	return (size_t)(b - (char *)vb);
66 }
67 
68 
69 static size_t
70 netwrite(
71 	int		fd,
72 	const void *	vb,
73 	size_t		l
74 	)
75 {
76 	const char *	b = vb;
77 	ssize_t		w;
78 
79 	while (l) {
80 		w = write(fd, b, l);
81 		if (w > 0) {
82 			l -= w;
83 			b += w;
84 		} else if (errno != EINTR) {
85 			l = 0;
86 		}
87 	}
88 	return (size_t)(b - (const char *)vb);
89 }
90 
91 
92 /* === functions === */
93 /*
94  * exit_worker()
95  *
96  * On some systems _exit() is preferred to exit() for forked children.
97  * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
98  * recommends _exit() to avoid double-flushing C runtime stream buffers
99  * and also to avoid calling the parent's atexit() routines in the
100  * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
101  * bypasses CRT cleanup, fflush() files we know might have output
102  * buffered.
103  */
104 void
105 exit_worker(
106 	int	exitcode
107 	)
108 {
109 	if (syslog_file != NULL)
110 		fflush(syslog_file);
111 	fflush(stdout);
112 	fflush(stderr);
113 	WORKER_CHILD_EXIT (exitcode);	/* space before ( required */
114 }
115 
116 
117 static RETSIGTYPE
118 worker_sighup(
119 	int sig
120 	)
121 {
122 	if (SIGHUP == sig)
123 		worker_sighup_received = 1;
124 }
125 
126 
127 int
128 worker_sleep(
129 	blocking_child *	c,
130 	time_t			seconds
131 	)
132 {
133 	u_int sleep_remain;
134 
135 	sleep_remain = (u_int)seconds;
136 	do {
137 		if (!worker_sighup_received)
138 			sleep_remain = sleep(sleep_remain);
139 		if (worker_sighup_received) {
140 			TRACE(1, ("worker SIGHUP with %us left to sleep",
141 				  sleep_remain));
142 			worker_sighup_received = 0;
143 			return -1;
144 		}
145 	} while (sleep_remain);
146 
147 	return 0;
148 }
149 
150 
151 void
152 interrupt_worker_sleep(void)
153 {
154 	u_int			idx;
155 	blocking_child *	c;
156 	int			rc;
157 
158 	for (idx = 0; idx < blocking_children_alloc; idx++) {
159 		c = blocking_children[idx];
160 
161 		if (NULL == c || c->reusable == TRUE)
162 			continue;
163 
164 		rc = kill(c->pid, SIGHUP);
165 		if (rc < 0)
166 			msyslog(LOG_ERR,
167 				"Unable to signal HUP to wake child pid %d: %m",
168 				c->pid);
169 	}
170 }
171 
172 
173 /*
174  * harvest_child_status() runs in the parent.
175  *
176  * Note the error handling -- this is an interaction with SIGCHLD.
177  * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
178  * automatically. Since we're not really interested in the result code,
179  * we simply ignore the error.
180  */
181 static void
182 harvest_child_status(
183 	blocking_child *	c
184 	)
185 {
186 	if (c->pid) {
187 		/* Wait on the child so it can finish terminating */
188 		if (waitpid(c->pid, NULL, 0) == c->pid)
189 			TRACE(4, ("harvested child %d\n", c->pid));
190 		else if (errno != ECHILD)
191 			msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
192 		c->pid = 0;
193 	}
194 }
195 
196 /*
197  * req_child_exit() runs in the parent.
198  */
199 int
200 req_child_exit(
201 	blocking_child *	c
202 	)
203 {
204 	if (-1 != c->req_write_pipe) {
205 		close(c->req_write_pipe);
206 		c->req_write_pipe = -1;
207 		return 0;
208 	}
209 	/* Closing the pipe forces the child to exit */
210 	harvest_child_status(c);
211 	return -1;
212 }
213 
214 
215 /*
216  * cleanup_after_child() runs in parent.
217  */
218 static void
219 cleanup_after_child(
220 	blocking_child *	c
221 	)
222 {
223 	harvest_child_status(c);
224 	if (-1 != c->resp_read_pipe) {
225 		(*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
226 		close(c->resp_read_pipe);
227 		c->resp_read_pipe = -1;
228 	}
229 	c->resp_read_ctx = NULL;
230 	DEBUG_INSIST(-1 == c->req_read_pipe);
231 	DEBUG_INSIST(-1 == c->resp_write_pipe);
232 	c->reusable = TRUE;
233 }
234 
235 
236 static void
237 send_worker_home_atexit(void)
238 {
239 	u_int			idx;
240 	blocking_child *	c;
241 
242 	if (worker_process)
243 		return;
244 
245 	for (idx = 0; idx < blocking_children_alloc; idx++) {
246 		c = blocking_children[idx];
247 		if (NULL == c)
248 			continue;
249 		req_child_exit(c);
250 	}
251 }
252 
253 
254 int
255 send_blocking_req_internal(
256 	blocking_child *	c,
257 	blocking_pipe_header *	hdr,
258 	void *			data
259 	)
260 {
261 	size_t	octets;
262 	size_t	rc;
263 
264 	DEBUG_REQUIRE(hdr != NULL);
265 	DEBUG_REQUIRE(data != NULL);
266 	DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
267 
268 	if (-1 == c->req_write_pipe) {
269 		fork_blocking_child(c);
270 		DEBUG_INSIST(-1 != c->req_write_pipe);
271 	}
272 
273 	octets = sizeof(*hdr);
274 	rc = netwrite(c->req_write_pipe, hdr, octets);
275 
276 	if (rc == octets) {
277 		octets = hdr->octets - sizeof(*hdr);
278 		rc = netwrite(c->req_write_pipe, data, octets);
279 		if (rc == octets)
280 			return 0;
281 	}
282 
283 	msyslog(LOG_ERR,
284 		"send_blocking_req_internal: short write (%zu of %zu), %m",
285 		rc, octets);
286 
287 	/* Fatal error.  Clean up the child process.  */
288 	req_child_exit(c);
289 	exit(1);	/* otherwise would be return -1 */
290 }
291 
292 
293 blocking_pipe_header *
294 receive_blocking_req_internal(
295 	blocking_child *	c
296 	)
297 {
298 	blocking_pipe_header	hdr;
299 	blocking_pipe_header *	req;
300 	size_t			rc;
301 	size_t			octets;
302 
303 	DEBUG_REQUIRE(-1 != c->req_read_pipe);
304 
305 	req = NULL;
306 	rc = netread(c->req_read_pipe, &hdr, sizeof(hdr));
307 
308 	if (0 == rc) {
309 		TRACE(4, ("parent closed request pipe, child %d terminating\n",
310 			  c->pid));
311 	} else if (rc != sizeof(hdr)) {
312 		msyslog(LOG_ERR,
313 			"receive_blocking_req_internal: short header read (%zu of %zu), %m",
314 			rc, sizeof(hdr));
315 	} else {
316 		INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
317 		req = emalloc(hdr.octets);
318 		memcpy(req, &hdr, sizeof(*req));
319 		octets = hdr.octets - sizeof(hdr);
320 		rc = netread(c->req_read_pipe, (char *)(req + 1),
321 			     octets);
322 
323 		if (rc != octets)
324 			msyslog(LOG_ERR,
325 				"receive_blocking_req_internal: short read (%zu of %zu), %m",
326 				rc, octets);
327 		else if (BLOCKING_REQ_MAGIC != req->magic_sig)
328 			msyslog(LOG_ERR,
329 				"receive_blocking_req_internal: packet header mismatch (0x%x)",
330 				req->magic_sig);
331 		else
332 			return req;
333 	}
334 
335 	if (req != NULL)
336 		free(req);
337 
338 	return NULL;
339 }
340 
341 
342 int
343 send_blocking_resp_internal(
344 	blocking_child *	c,
345 	blocking_pipe_header *	resp
346 	)
347 {
348 	size_t	octets;
349 	size_t	rc;
350 
351 	DEBUG_REQUIRE(-1 != c->resp_write_pipe);
352 
353 	octets = resp->octets;
354 	rc = netwrite(c->resp_write_pipe, resp, octets);
355 	free(resp);
356 
357 	if (octets == rc)
358 		return 0;
359 
360 	TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n",
361 		  rc, octets));
362 	return -1;
363 }
364 
365 
366 blocking_pipe_header *
367 receive_blocking_resp_internal(
368 	blocking_child *	c
369 	)
370 {
371 	blocking_pipe_header	hdr;
372 	blocking_pipe_header *	resp;
373 	size_t			rc;
374 	size_t			octets;
375 
376 	DEBUG_REQUIRE(c->resp_read_pipe != -1);
377 
378 	resp = NULL;
379 	rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr));
380 
381 	if (0 == rc) {
382 		/* this is the normal child exited indication */
383 	} else if (rc != sizeof(hdr)) {
384 		TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n",
385 			  rc, sizeof(hdr)));
386 	} else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
387 		TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
388 			  hdr.magic_sig));
389 	} else {
390 		INSIST(sizeof(hdr) < hdr.octets &&
391 		       hdr.octets < 16 * 1024);
392 		resp = emalloc(hdr.octets);
393 		memcpy(resp, &hdr, sizeof(*resp));
394 		octets = hdr.octets - sizeof(hdr);
395 		rc = netread(c->resp_read_pipe, (char *)(resp + 1),
396 			     octets);
397 
398 		if (rc != octets)
399 			TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n",
400 				  rc, octets));
401 		else
402 			return resp;
403 	}
404 
405 	cleanup_after_child(c);
406 
407 	if (resp != NULL)
408 		free(resp);
409 
410 	return NULL;
411 }
412 
413 
414 #if defined(HAVE_DROPROOT) && defined(WORK_FORK)
415 void
416 fork_deferred_worker(void)
417 {
418 	u_int			idx;
419 	blocking_child *	c;
420 
421 	REQUIRE(droproot && root_dropped);
422 
423 	for (idx = 0; idx < blocking_children_alloc; idx++) {
424 		c = blocking_children[idx];
425 		if (NULL == c)
426 			continue;
427 		if (-1 != c->req_write_pipe && 0 == c->pid)
428 			fork_blocking_child(c);
429 	}
430 }
431 #endif
432 
433 
434 static void
435 fork_blocking_child(
436 	blocking_child *	c
437 	)
438 {
439 	static int	atexit_installed;
440 	static int	blocking_pipes[4] = { -1, -1, -1, -1 };
441 	int		rc;
442 	int		was_pipe;
443 	int		is_pipe;
444 	int		saved_errno = 0;
445 	int		childpid;
446 	int		keep_fd;
447 	int		fd;
448 
449 	/*
450 	 * parent and child communicate via a pair of pipes.
451 	 *
452 	 * 0 child read request
453 	 * 1 parent write request
454 	 * 2 parent read response
455 	 * 3 child write response
456 	 */
457 	if (-1 == c->req_write_pipe) {
458 		rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
459 		if (0 != rc) {
460 			saved_errno = errno;
461 		} else {
462 			rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
463 			if (0 != rc) {
464 				saved_errno = errno;
465 				close(blocking_pipes[0]);
466 				close(blocking_pipes[1]);
467 			} else {
468 				INSIST(was_pipe == is_pipe);
469 			}
470 		}
471 		if (0 != rc) {
472 			errno = saved_errno;
473 			msyslog(LOG_ERR, "unable to create worker pipes: %m");
474 			exit(1);
475 		}
476 
477 		/*
478 		 * Move the descriptors the parent will keep open out of the
479 		 * low descriptors preferred by C runtime buffered FILE *.
480 		 */
481 		c->req_write_pipe = move_fd(blocking_pipes[1]);
482 		c->resp_read_pipe = move_fd(blocking_pipes[2]);
483 		/*
484 		 * wake any worker child on orderly shutdown of the
485 		 * daemon so that it can notice the broken pipes and
486 		 * go away promptly.
487 		 */
488 		if (!atexit_installed) {
489 			atexit(&send_worker_home_atexit);
490 			atexit_installed = TRUE;
491 		}
492 	}
493 
494 #if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
495 	/* defer the fork until after root is dropped */
496 	if (droproot && !root_dropped)
497 		return;
498 #endif
499 	if (syslog_file != NULL)
500 		fflush(syslog_file);
501 	fflush(stdout);
502 	fflush(stderr);
503 
504 	/* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
505 	 * or undefined effects. We don't do it and leave SIGCHLD alone.
506 	 */
507 	/* signal_no_reset(SIGCHLD, SIG_IGN); */
508 
509 	childpid = fork();
510 	if (-1 == childpid) {
511 		msyslog(LOG_ERR, "unable to fork worker: %m");
512 		exit(1);
513 	}
514 
515 	if (childpid) {
516 		/* this is the parent */
517 		TRACE(1, ("forked worker child (pid %d)\n", childpid));
518 		c->pid = childpid;
519 		c->ispipe = is_pipe;
520 
521 		/* close the child's pipe descriptors. */
522 		close(blocking_pipes[0]);
523 		close(blocking_pipes[3]);
524 
525 		memset(blocking_pipes, -1, sizeof(blocking_pipes));
526 
527 		/* wire into I/O loop */
528 		(*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
529 
530 		return;		/* parent returns */
531 	}
532 
533 	/*
534 	 * The parent gets the child pid as the return value of fork().
535 	 * The child must work for it.
536 	 */
537 	c->pid = getpid();
538 	worker_process = TRUE;
539 
540 	/*
541 	 * Change the process name of the child to avoid confusion
542 	 * about ntpd trunning twice.
543 	 */
544 	if (saved_argc != 0) {
545 		int argcc;
546 		int argvlen = 0;
547 		/* Clear argv */
548 		for (argcc = 0; argcc < saved_argc; argcc++) {
549 			int l = strlen(saved_argv[argcc]);
550 			argvlen += l + 1;
551 			memset(saved_argv[argcc], 0, l);
552 		}
553 		strlcpy(saved_argv[0], "ntpd: asynchronous dns resolver", argvlen);
554 	}
555 
556 	/*
557 	 * In the child, close all files except stdin, stdout, stderr,
558 	 * and the two child ends of the pipes.
559 	 */
560 	DEBUG_INSIST(-1 == c->req_read_pipe);
561 	DEBUG_INSIST(-1 == c->resp_write_pipe);
562 	c->req_read_pipe = blocking_pipes[0];
563 	c->resp_write_pipe = blocking_pipes[3];
564 
565 	kill_asyncio(0);
566 	closelog();
567 	if (syslog_file != NULL) {
568 		fclose(syslog_file);
569 		syslog_file = NULL;
570 		syslogit = TRUE;
571 	}
572 	keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
573 	for (fd = 3; fd < keep_fd; fd++)
574 		if (fd != c->req_read_pipe &&
575 		    fd != c->resp_write_pipe)
576 			close(fd);
577 	close_all_beyond(keep_fd);
578 	/*
579 	 * We get signals from refclock serial I/O on NetBSD in the
580 	 * worker if we do not reset SIGIO's handler to the default.
581 	 * It is not conditionalized for NetBSD alone because on
582 	 * systems where it is not needed, it is harmless, and that
583 	 * allows us to handle unknown others with NetBSD behavior.
584 	 * [Bug 1386]
585 	 */
586 #if defined(USE_SIGIO)
587 	signal_no_reset(SIGIO, SIG_DFL);
588 #elif defined(USE_SIGPOLL)
589 	signal_no_reset(SIGPOLL, SIG_DFL);
590 #endif
591 	signal_no_reset(SIGHUP, worker_sighup);
592 	init_logging("ntp_intres", 0, FALSE);
593 	setup_logfile(NULL);
594 
595 	/*
596 	 * And now back to the portable code
597 	 */
598 	exit_worker(blocking_child_common(c));
599 }
600 
601 
602 void worker_global_lock(int inOrOut)
603 {
604 	(void)inOrOut;
605 }
606 
607 #else	/* !WORK_FORK follows */
608 char work_fork_nonempty_compilation_unit;
609 #endif
610