xref: /freebsd/contrib/ntp/libntp/work_fork.c (revision d66820f2c8c6cdc3d0359abf1004dac0a466a01d)
1 /*
2  * work_fork.c - fork implementation for blocking worker child.
3  */
4 #include <config.h>
5 #include "ntp_workimpl.h"
6 
7 #ifdef WORK_FORK
8 #include <stdio.h>
9 #include <ctype.h>
10 #include <signal.h>
11 #include <sys/wait.h>
12 
13 #include "iosignal.h"
14 #include "ntp_stdlib.h"
15 #include "ntp_malloc.h"
16 #include "ntp_syslog.h"
17 #include "ntpd.h"
18 #include "ntp_io.h"
19 #include "ntp_assert.h"
20 #include "ntp_unixtime.h"
21 #include "ntp_worker.h"
22 
23 /* === variables === */
24 	int			worker_process;
25 	addremove_io_fd_func	addremove_io_fd;
26 static	volatile int		worker_sighup_received;
27 int	saved_argc = 0;
28 char	**saved_argv;
29 
30 /* === function prototypes === */
31 static	void		fork_blocking_child(blocking_child *);
32 static	RETSIGTYPE	worker_sighup(int);
33 static	void		send_worker_home_atexit(void);
34 static	void		cleanup_after_child(blocking_child *);
35 
36 /* === I/O helpers === */
37 /* Since we have signals enabled, there's a good chance that blocking IO
38  * via pipe suffers from EINTR -- and this goes for both directions.
39  * The next two wrappers will loop until either all the data is written
40  * or read, plus handling the EOF condition on read. They may return
41  * zero if no data was transferred at all, and effectively every return
42  * value that differs from the given transfer length signifies an error
43  * condition.
44  */
45 
46 static size_t
47 netread(
48 	int		fd,
49 	void *		vb,
50 	size_t		l
51 	)
52 {
53 	char *		b = vb;
54 	ssize_t		r;
55 
56 	while (l) {
57 		r = read(fd, b, l);
58 		if (r > 0) {
59 			l -= r;
60 			b += r;
61 		} else if (r == 0 || errno != EINTR) {
62 			l = 0;
63 		}
64 	}
65 	return (size_t)(b - (char *)vb);
66 }
67 
68 
69 static size_t
70 netwrite(
71 	int		fd,
72 	const void *	vb,
73 	size_t		l
74 	)
75 {
76 	const char *	b = vb;
77 	ssize_t		w;
78 
79 	while (l) {
80 		w = write(fd, b, l);
81 		if (w > 0) {
82 			l -= w;
83 			b += w;
84 		} else if (errno != EINTR) {
85 			l = 0;
86 		}
87 	}
88 	return (size_t)(b - (const char *)vb);
89 }
90 
91 
92 int set_user_group_ids(void);
93 
94 /* === functions === */
95 /*
96  * exit_worker()
97  *
98  * On some systems _exit() is preferred to exit() for forked children.
99  * For example, http://netbsd.gw.com/cgi-bin/man-cgi?fork++NetBSD-5.0
100  * recommends _exit() to avoid double-flushing C runtime stream buffers
101  * and also to avoid calling the parent's atexit() routines in the
102  * child.  On those systems WORKER_CHILD_EXIT is _exit.  Since _exit
103  * bypasses CRT cleanup, fflush() files we know might have output
104  * buffered.
105  */
106 void
107 exit_worker(
108 	int	exitcode
109 	)
110 {
111 	if (syslog_file != NULL)
112 		fflush(syslog_file);
113 	fflush(stdout);
114 	fflush(stderr);
115 	WORKER_CHILD_EXIT (exitcode);	/* space before ( required */
116 }
117 
118 
119 static RETSIGTYPE
120 worker_sighup(
121 	int sig
122 	)
123 {
124 	if (SIGHUP == sig)
125 		worker_sighup_received = 1;
126 }
127 
128 
129 int
130 worker_sleep(
131 	blocking_child *	c,
132 	time_t			seconds
133 	)
134 {
135 	u_int sleep_remain;
136 
137 	sleep_remain = (u_int)seconds;
138 	do {
139 		if (!worker_sighup_received)
140 			sleep_remain = sleep(sleep_remain);
141 		if (worker_sighup_received) {
142 			TRACE(1, ("worker SIGHUP with %us left to sleep",
143 				  sleep_remain));
144 			worker_sighup_received = 0;
145 			return -1;
146 		}
147 	} while (sleep_remain);
148 
149 	return 0;
150 }
151 
152 
153 void
154 interrupt_worker_sleep(void)
155 {
156 	u_int			idx;
157 	blocking_child *	c;
158 	int			rc;
159 
160 	for (idx = 0; idx < blocking_children_alloc; idx++) {
161 		c = blocking_children[idx];
162 
163 		if (NULL == c || c->reusable == TRUE)
164 			continue;
165 
166 		rc = kill(c->pid, SIGHUP);
167 		if (rc < 0)
168 			msyslog(LOG_ERR,
169 				"Unable to signal HUP to wake child pid %d: %m",
170 				c->pid);
171 	}
172 }
173 
174 
175 /*
176  * harvest_child_status() runs in the parent.
177  *
178  * Note the error handling -- this is an interaction with SIGCHLD.
179  * SIG_IGN on SIGCHLD on some OSes means do not wait but reap
180  * automatically. Since we're not really interested in the result code,
181  * we simply ignore the error.
182  */
183 static void
184 harvest_child_status(
185 	blocking_child *	c
186 	)
187 {
188 	if (c->pid) {
189 		/* Wait on the child so it can finish terminating */
190 		if (waitpid(c->pid, NULL, 0) == c->pid)
191 			TRACE(4, ("harvested child %d\n", c->pid));
192 		else if (errno != ECHILD)
193 			msyslog(LOG_ERR, "error waiting on child %d: %m", c->pid);
194 		c->pid = 0;
195 	}
196 }
197 
198 /*
199  * req_child_exit() runs in the parent.
200  */
201 int
202 req_child_exit(
203 	blocking_child *	c
204 	)
205 {
206 	if (-1 != c->req_write_pipe) {
207 		close(c->req_write_pipe);
208 		c->req_write_pipe = -1;
209 		return 0;
210 	}
211 	/* Closing the pipe forces the child to exit */
212 	harvest_child_status(c);
213 	return -1;
214 }
215 
216 
217 /*
218  * cleanup_after_child() runs in parent.
219  */
220 static void
221 cleanup_after_child(
222 	blocking_child *	c
223 	)
224 {
225 	harvest_child_status(c);
226 	if (-1 != c->resp_read_pipe) {
227 		(*addremove_io_fd)(c->resp_read_pipe, c->ispipe, TRUE);
228 		close(c->resp_read_pipe);
229 		c->resp_read_pipe = -1;
230 	}
231 	c->resp_read_ctx = NULL;
232 	DEBUG_INSIST(-1 == c->req_read_pipe);
233 	DEBUG_INSIST(-1 == c->resp_write_pipe);
234 	c->reusable = TRUE;
235 }
236 
237 
238 static void
239 send_worker_home_atexit(void)
240 {
241 	u_int			idx;
242 	blocking_child *	c;
243 
244 	if (worker_process)
245 		return;
246 
247 	for (idx = 0; idx < blocking_children_alloc; idx++) {
248 		c = blocking_children[idx];
249 		if (NULL == c)
250 			continue;
251 		req_child_exit(c);
252 	}
253 }
254 
255 
256 int
257 send_blocking_req_internal(
258 	blocking_child *	c,
259 	blocking_pipe_header *	hdr,
260 	void *			data
261 	)
262 {
263 	size_t	octets;
264 	size_t	rc;
265 
266 	DEBUG_REQUIRE(hdr != NULL);
267 	DEBUG_REQUIRE(data != NULL);
268 	DEBUG_REQUIRE(BLOCKING_REQ_MAGIC == hdr->magic_sig);
269 
270 	if (-1 == c->req_write_pipe) {
271 		fork_blocking_child(c);
272 		DEBUG_INSIST(-1 != c->req_write_pipe);
273 	}
274 
275 	octets = sizeof(*hdr);
276 	rc = netwrite(c->req_write_pipe, hdr, octets);
277 
278 	if (rc == octets) {
279 		octets = hdr->octets - sizeof(*hdr);
280 		rc = netwrite(c->req_write_pipe, data, octets);
281 		if (rc == octets)
282 			return 0;
283 	}
284 
285 	msyslog(LOG_ERR,
286 		"send_blocking_req_internal: short write (%zu of %zu), %m",
287 		rc, octets);
288 
289 	/* Fatal error.  Clean up the child process.  */
290 	req_child_exit(c);
291 	exit(1);	/* otherwise would be return -1 */
292 }
293 
294 
295 blocking_pipe_header *
296 receive_blocking_req_internal(
297 	blocking_child *	c
298 	)
299 {
300 	blocking_pipe_header	hdr;
301 	blocking_pipe_header *	req;
302 	size_t			rc;
303 	size_t			octets;
304 
305 	DEBUG_REQUIRE(-1 != c->req_read_pipe);
306 
307 	req = NULL;
308 	rc = netread(c->req_read_pipe, &hdr, sizeof(hdr));
309 
310 	if (0 == rc) {
311 		TRACE(4, ("parent closed request pipe, child %d terminating\n",
312 			  c->pid));
313 	} else if (rc != sizeof(hdr)) {
314 		msyslog(LOG_ERR,
315 			"receive_blocking_req_internal: short header read (%zu of %zu), %m",
316 			rc, sizeof(hdr));
317 	} else {
318 		INSIST(sizeof(hdr) < hdr.octets && hdr.octets < 4 * 1024);
319 		req = emalloc(hdr.octets);
320 		memcpy(req, &hdr, sizeof(*req));
321 		octets = hdr.octets - sizeof(hdr);
322 		rc = netread(c->req_read_pipe, (char *)(req + 1),
323 			     octets);
324 
325 		if (rc != octets)
326 			msyslog(LOG_ERR,
327 				"receive_blocking_req_internal: short read (%zu of %zu), %m",
328 				rc, octets);
329 		else if (BLOCKING_REQ_MAGIC != req->magic_sig)
330 			msyslog(LOG_ERR,
331 				"receive_blocking_req_internal: packet header mismatch (0x%x)",
332 				req->magic_sig);
333 		else
334 			return req;
335 	}
336 
337 	if (req != NULL)
338 		free(req);
339 
340 	return NULL;
341 }
342 
343 
344 int
345 send_blocking_resp_internal(
346 	blocking_child *	c,
347 	blocking_pipe_header *	resp
348 	)
349 {
350 	size_t	octets;
351 	size_t	rc;
352 
353 	DEBUG_REQUIRE(-1 != c->resp_write_pipe);
354 
355 	octets = resp->octets;
356 	rc = netwrite(c->resp_write_pipe, resp, octets);
357 	free(resp);
358 
359 	if (octets == rc)
360 		return 0;
361 
362 	TRACE(1, ("send_blocking_resp_internal: short write (%zu of %zu), %m\n",
363 		  rc, octets));
364 	return -1;
365 }
366 
367 
368 blocking_pipe_header *
369 receive_blocking_resp_internal(
370 	blocking_child *	c
371 	)
372 {
373 	blocking_pipe_header	hdr;
374 	blocking_pipe_header *	resp;
375 	size_t			rc;
376 	size_t			octets;
377 
378 	DEBUG_REQUIRE(c->resp_read_pipe != -1);
379 
380 	resp = NULL;
381 	rc = netread(c->resp_read_pipe, &hdr, sizeof(hdr));
382 
383 	if (0 == rc) {
384 		/* this is the normal child exited indication */
385 	} else if (rc != sizeof(hdr)) {
386 		TRACE(1, ("receive_blocking_resp_internal: short header read (%zu of %zu), %m\n",
387 			  rc, sizeof(hdr)));
388 	} else if (BLOCKING_RESP_MAGIC != hdr.magic_sig) {
389 		TRACE(1, ("receive_blocking_resp_internal: header mismatch (0x%x)\n",
390 			  hdr.magic_sig));
391 	} else {
392 		INSIST(sizeof(hdr) < hdr.octets &&
393 		       hdr.octets < 16 * 1024);
394 		resp = emalloc(hdr.octets);
395 		memcpy(resp, &hdr, sizeof(*resp));
396 		octets = hdr.octets - sizeof(hdr);
397 		rc = netread(c->resp_read_pipe, (char *)(resp + 1),
398 			     octets);
399 
400 		if (rc != octets)
401 			TRACE(1, ("receive_blocking_resp_internal: short read (%zu of %zu), %m\n",
402 				  rc, octets));
403 		else
404 			return resp;
405 	}
406 
407 	cleanup_after_child(c);
408 
409 	if (resp != NULL)
410 		free(resp);
411 
412 	return NULL;
413 }
414 
415 
416 #if defined(HAVE_DROPROOT) && defined(WORK_FORK)
417 void
418 fork_deferred_worker(void)
419 {
420 	u_int			idx;
421 	blocking_child *	c;
422 
423 	REQUIRE(droproot && root_dropped);
424 
425 	for (idx = 0; idx < blocking_children_alloc; idx++) {
426 		c = blocking_children[idx];
427 		if (NULL == c)
428 			continue;
429 		if (-1 != c->req_write_pipe && 0 == c->pid)
430 			fork_blocking_child(c);
431 	}
432 }
433 #endif
434 
435 
436 static void
437 fork_blocking_child(
438 	blocking_child *	c
439 	)
440 {
441 	static int	atexit_installed;
442 	static int	blocking_pipes[4] = { -1, -1, -1, -1 };
443 	int		rc;
444 	int		was_pipe;
445 	int		is_pipe;
446 	int		saved_errno = 0;
447 	int		childpid;
448 	int		keep_fd;
449 	int		fd;
450 
451 	/*
452 	 * parent and child communicate via a pair of pipes.
453 	 *
454 	 * 0 child read request
455 	 * 1 parent write request
456 	 * 2 parent read response
457 	 * 3 child write response
458 	 */
459 	if (-1 == c->req_write_pipe) {
460 		rc = pipe_socketpair(&blocking_pipes[0], &was_pipe);
461 		if (0 != rc) {
462 			saved_errno = errno;
463 		} else {
464 			rc = pipe_socketpair(&blocking_pipes[2], &is_pipe);
465 			if (0 != rc) {
466 				saved_errno = errno;
467 				close(blocking_pipes[0]);
468 				close(blocking_pipes[1]);
469 			} else {
470 				INSIST(was_pipe == is_pipe);
471 			}
472 		}
473 		if (0 != rc) {
474 			errno = saved_errno;
475 			msyslog(LOG_ERR, "unable to create worker pipes: %m");
476 			exit(1);
477 		}
478 
479 		/*
480 		 * Move the descriptors the parent will keep open out of the
481 		 * low descriptors preferred by C runtime buffered FILE *.
482 		 */
483 		c->req_write_pipe = move_fd(blocking_pipes[1]);
484 		c->resp_read_pipe = move_fd(blocking_pipes[2]);
485 		/*
486 		 * wake any worker child on orderly shutdown of the
487 		 * daemon so that it can notice the broken pipes and
488 		 * go away promptly.
489 		 */
490 		if (!atexit_installed) {
491 			atexit(&send_worker_home_atexit);
492 			atexit_installed = TRUE;
493 		}
494 	}
495 
496 #if defined(HAVE_DROPROOT) && !defined(NEED_EARLY_FORK)
497 	/* defer the fork until after root is dropped */
498 	if (droproot && !root_dropped)
499 		return;
500 #endif
501 	if (syslog_file != NULL)
502 		fflush(syslog_file);
503 	fflush(stdout);
504 	fflush(stderr);
505 
506 	/* [BUG 3050] setting SIGCHLD to SIG_IGN likely causes unwanted
507 	 * or undefined effects. We don't do it and leave SIGCHLD alone.
508 	 */
509 	/* signal_no_reset(SIGCHLD, SIG_IGN); */
510 
511 	childpid = fork();
512 	if (-1 == childpid) {
513 		msyslog(LOG_ERR, "unable to fork worker: %m");
514 		exit(1);
515 	}
516 
517 	if (childpid) {
518 		/* this is the parent */
519 		TRACE(1, ("forked worker child (pid %d)\n", childpid));
520 		c->pid = childpid;
521 		c->ispipe = is_pipe;
522 
523 		/* close the child's pipe descriptors. */
524 		close(blocking_pipes[0]);
525 		close(blocking_pipes[3]);
526 
527 		memset(blocking_pipes, -1, sizeof(blocking_pipes));
528 
529 		/* wire into I/O loop */
530 		(*addremove_io_fd)(c->resp_read_pipe, is_pipe, FALSE);
531 
532 		return;		/* parent returns */
533 	}
534 
535 	/*
536 	 * The parent gets the child pid as the return value of fork().
537 	 * The child must work for it.
538 	 */
539 	c->pid = getpid();
540 	worker_process = TRUE;
541 
542 	/*
543 	 * Change the process name of the child to avoid confusion
544 	 * about ntpd trunning twice.
545 	 */
546 	if (saved_argc != 0) {
547 		int argcc;
548 		int argvlen = 0;
549 		/* Clear argv */
550 		for (argcc = 0; argcc < saved_argc; argcc++) {
551 			int l = strlen(saved_argv[argcc]);
552 			argvlen += l + 1;
553 			memset(saved_argv[argcc], 0, l);
554 		}
555 		strlcpy(saved_argv[0], "ntpd: asynchronous dns resolver", argvlen);
556 	}
557 
558 	/*
559 	 * In the child, close all files except stdin, stdout, stderr,
560 	 * and the two child ends of the pipes.
561 	 */
562 	DEBUG_INSIST(-1 == c->req_read_pipe);
563 	DEBUG_INSIST(-1 == c->resp_write_pipe);
564 	c->req_read_pipe = blocking_pipes[0];
565 	c->resp_write_pipe = blocking_pipes[3];
566 
567 	kill_asyncio(0);
568 	closelog();
569 	if (syslog_file != NULL) {
570 		fclose(syslog_file);
571 		syslog_file = NULL;
572 		syslogit = TRUE;
573 	}
574 	keep_fd = max(c->req_read_pipe, c->resp_write_pipe);
575 	for (fd = 3; fd < keep_fd; fd++)
576 		if (fd != c->req_read_pipe &&
577 		    fd != c->resp_write_pipe)
578 			close(fd);
579 	close_all_beyond(keep_fd);
580 	/*
581 	 * We get signals from refclock serial I/O on NetBSD in the
582 	 * worker if we do not reset SIGIO's handler to the default.
583 	 * It is not conditionalized for NetBSD alone because on
584 	 * systems where it is not needed, it is harmless, and that
585 	 * allows us to handle unknown others with NetBSD behavior.
586 	 * [Bug 1386]
587 	 */
588 #if defined(USE_SIGIO)
589 	signal_no_reset(SIGIO, SIG_DFL);
590 #elif defined(USE_SIGPOLL)
591 	signal_no_reset(SIGPOLL, SIG_DFL);
592 #endif
593 	signal_no_reset(SIGHUP, worker_sighup);
594 	init_logging("ntp_intres", 0, FALSE);
595 	setup_logfile(NULL);
596 
597 	(void) set_user_group_ids();
598 
599 	/*
600 	 * And now back to the portable code
601 	 */
602 	exit_worker(blocking_child_common(c));
603 }
604 
605 
606 void worker_global_lock(int inOrOut)
607 {
608 	(void)inOrOut;
609 }
610 
611 #else	/* !WORK_FORK follows */
612 char work_fork_nonempty_compilation_unit;
613 #endif
614