xref: /illumos-gate/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c (revision 8c69cc8fbe729fa7b091e901c4b50508ccc6bb33)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2016 by Delphix. All rights reserved.
25  */
26 
27 /*
28  * The core of ilbd daemon is a single-threaded event loop using
29  * event completion framework; it receives requests from client using
30  * the libilb functions, handles timeouts, initiates health checks, and
31  * populates the kernel state.
32  *
33  * The daemon has the following privileges (in addition to the basic ones):
34  *
35  * 	PRIV_PROC_OWNER, PRIV_NET_ICMPACCESS,
36  *	PRIV_SYS_IP_CONFIG, PRIV_PROC_AUDIT
37  *
38  * The aforementioned  privileges will be specified in the SMF manifest.
39  *
40  * AF_UNIX socket is used for IPC between libilb and this daemon as
41  * both processes will run on the same machine.
42  *
43  * To do health check, the daemon will create a timer for every health
44  * check probe. Each of these timers will be  associated with the
45  * event port. When a timer goes off, the daemon will initiate a
46  * pipe to a separate process to execute the specific health check
47  * probe. This new process will run with the same user-id as that of
48  * ilbd daemon and will inherit all the privileges from the ilbd
49  * daemon parent process except the following:
50  *
51  * PRIV_PROC_OWNER, PRIV_PROC_AUDIT
52  *
53  * All health checks, will be implemented as external methods
54  * (binary or script). The following arguments will be passed
55  * to external methods:
56  *
57  *	$1	VIP (literal IPv4 or IPv6 address)
58  *	$2	Server IP (literal IPv4 or IPv6 address)
59  *	$3	Protocol (UDP, TCP as a string)
60  *	$4	The load balance mode, "DSR", "NAT", "HALF_NAT"
61  *	$5	Numeric port range
62  *	$6	maximum time (in seconds) the method
63  * should wait before returning failure. If the method runs for
64  * longer, it may be killed, and the test considered failed.
65  *
66  * Upon success, a health check method should print the RTT to the
67  * it finds to its STDOUT for ilbd to consume.  The implicit unit
68  * is microseconds but only the number needs to be printed.  If it
69  * cannot find the RTT, it should print 0.  If the method decides
70  * that the server is dead, it should print -1 to its STDOUT.
71  *
72  * By default, an user-supplied health check probe process will
73  * also run with the same set of privileges as ILB's built-in
74  * probes.  If the administrator has an user-supplied health check
75  * program that requires a larger privilege set, they will have
76  * to implement setuid program.
77  *
78  * Each health check will have a timeout, such that if the health
79  * check process is hung, it will be killed after the timeout interval
80  * and the daemon will notify the kernel ILB engine of the server's
81  * unresponsiveness, so that load distribution can be appropriately
82  * adjusted.  If on the other hand the health check is successful
83  * the timeout timer is cancelled.
84  */
85 
86 #include <stdio.h>
87 #include <stdlib.h>
88 #include <strings.h>
89 #include <libgen.h>
90 #include <fcntl.h>
91 #include <stddef.h>
92 #include <signal.h>
93 #include <port.h>
94 #include <ctype.h>
95 #include <sys/types.h>
96 #include <sys/wait.h>
97 #include <sys/stat.h>
98 #include <sys/note.h>
99 #include <sys/resource.h>
100 #include <unistd.h>
101 #include <sys/socket.h>
102 #include <errno.h>
103 #include <ucred.h>
104 #include <priv_utils.h>
105 #include <net/if.h>
106 #include <libilb.h>
107 #include <assert.h>
108 #include <inet/ilb.h>
109 #include <libintl.h>
110 #include <fcntl.h>
111 #include <rpcsvc/daemon_utils.h>
112 #include "libilb_impl.h"
113 #include "ilbd.h"
114 
115 /*
116  * NOTE: The following needs to be kept up to date.
117  */
118 #define	ILBD_VERSION	"1.0"
119 #define	ILBD_COPYRIGHT	\
120 	"Copyright (c) 2005, 2010, Oracle and/or its affiliates. " \
121 	"All rights reserved.\n"
122 
123 /*
124  * Global reply buffer to client request.  Note that ilbd is single threaded,
125  * so a global buffer is OK.  If ilbd becomes multi-threaded, this needs to
126  * be changed.
127  */
128 static uint32_t reply_buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
129 
130 static void
131 ilbd_free_cli(ilbd_client_t *cli)
132 {
133 	(void) close(cli->cli_sd);
134 	if (cli->cli_cmd == ILBD_SHOW_NAT)
135 		ilbd_show_nat_cleanup();
136 	if (cli->cli_cmd == ILBD_SHOW_PERSIST)
137 		ilbd_show_sticky_cleanup();
138 	if (cli->cli_saved_reply != NULL)
139 		free(cli->cli_saved_reply);
140 	if (cli->cli_peer_ucredp != NULL)
141 		ucred_free(cli->cli_peer_ucredp);
142 	free(cli->cli_pw_buf);
143 	free(cli);
144 }
145 
146 static void
147 ilbd_reset_kernel_state(void)
148 {
149 	ilb_status_t	rc;
150 	ilb_name_cmd_t	kcmd;
151 
152 	kcmd.cmd = ILB_DESTROY_RULE;
153 	kcmd.flags = ILB_RULE_ALLRULES;
154 	kcmd.name[0] = '\0';
155 
156 	rc = do_ioctl(&kcmd, 0);
157 	if (rc != ILB_STATUS_OK)
158 		logdebug("ilbd_reset_kernel_state: do_ioctl failed: %s",
159 		    strerror(errno));
160 }
161 
162 /* Signal handler to do clean up. */
163 /* ARGSUSED */
164 static void
165 ilbd_cleanup(int sig)
166 {
167 	(void) remove(SOCKET_PATH);
168 	ilbd_reset_kernel_state();
169 	exit(0);
170 }
171 
172 /*
173  * Create a socket and return it to caller.  If there is a failure, this
174  * function calls exit(2).  Hence it always returns a valid listener socket.
175  *
176  * Note that this function is called before ilbd becomes a daemon.  So
177  * we call perror(3C) to print out error message directly so that SMF can
178  * catch them.
179  */
180 static int
181 ilbd_create_client_socket(void)
182 {
183 	int			s;
184 	mode_t			omask;
185 	struct sockaddr_un	sa;
186 	int			sobufsz;
187 
188 	s = socket(PF_UNIX, SOCK_SEQPACKET, 0);
189 	if (s == -1) {
190 		perror("ilbd_create_client_socket: socket to"
191 		    " client failed");
192 		exit(errno);
193 	}
194 	if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1) {
195 		perror("ilbd_create_client_socket: fcntl(FD_CLOEXEC)");
196 		exit(errno);
197 	}
198 
199 	sobufsz = ILBD_MSG_SIZE;
200 	if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sobufsz,
201 	    sizeof (sobufsz)) != 0) {
202 		perror("ilbd_creat_client_socket: setsockopt(SO_SNDBUF) "
203 		    "failed");
204 		exit(errno);
205 	}
206 	if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &sobufsz,
207 	    sizeof (sobufsz)) != 0) {
208 		perror("ilbd_creat_client_socket: setsockopt(SO_RCVBUF) "
209 		    "failed");
210 		exit(errno);
211 	}
212 
213 	/*
214 	 * since everybody can talk to us, we need to open up permissions
215 	 * we check peer privileges on a per-operation basis.
216 	 * This is no security issue as long as we're single-threaded.
217 	 */
218 	omask = umask(0);
219 
220 	/* just in case we didn't clean up properly after last exit */
221 	(void) remove(SOCKET_PATH);
222 
223 	bzero(&sa, sizeof (sa));
224 	sa.sun_family = AF_UNIX;
225 	(void) strlcpy(sa.sun_path, SOCKET_PATH, sizeof (sa.sun_path));
226 
227 	if (bind(s, (struct sockaddr *)&sa, sizeof (sa)) != 0) {
228 		perror("ilbd_create_client_socket(): bind to client"
229 		    " socket failed");
230 		exit(errno);
231 	}
232 
233 	/* re-instate old umask */
234 	(void) umask(omask);
235 
236 #define	QLEN	16
237 
238 	if (listen(s, QLEN) != 0) {
239 		perror("ilbd_create_client_socket: listen to client"
240 		    " socket failed");
241 		exit(errno);
242 	}
243 
244 	(void) signal(SIGHUP, SIG_IGN);
245 	(void) signal(SIGPIPE, SIG_IGN);
246 	(void) signal(SIGSTOP, SIG_IGN);
247 	(void) signal(SIGTSTP, SIG_IGN);
248 	(void) signal(SIGTTIN, SIG_IGN);
249 	(void) signal(SIGTTOU, SIG_IGN);
250 
251 	(void) signal(SIGINT, ilbd_cleanup);
252 	(void) signal(SIGTERM, ilbd_cleanup);
253 	(void) signal(SIGQUIT, ilbd_cleanup);
254 
255 	return (s);
256 }
257 
258 /*
259  * Return the minimum size of a given request.  The returned size does not
260  * include the variable part of a request.
261  */
262 static size_t
263 ilbd_cmd_size(const ilb_comm_t *ic)
264 {
265 	size_t cmd_sz;
266 
267 	cmd_sz = sizeof (*ic);
268 	switch (ic->ic_cmd) {
269 	case ILBD_RETRIEVE_SG_NAMES:
270 	case ILBD_RETRIEVE_RULE_NAMES:
271 	case ILBD_RETRIEVE_HC_NAMES:
272 	case ILBD_CMD_OK:
273 		break;
274 	case ILBD_CMD_ERROR:
275 		cmd_sz += sizeof (ilb_status_t);
276 		break;
277 	case ILBD_RETRIEVE_SG_HOSTS:
278 	case ILBD_CREATE_SERVERGROUP:
279 	case ILBD_DESTROY_SERVERGROUP:
280 	case ILBD_DESTROY_RULE:
281 	case ILBD_ENABLE_RULE:
282 	case ILBD_DISABLE_RULE:
283 	case ILBD_RETRIEVE_RULE:
284 	case ILBD_DESTROY_HC:
285 	case ILBD_GET_HC_INFO:
286 	case ILBD_GET_HC_SRVS:
287 		cmd_sz += sizeof (ilbd_name_t);
288 		break;
289 	case ILBD_ENABLE_SERVER:
290 	case ILBD_DISABLE_SERVER:
291 	case ILBD_ADD_SERVER_TO_GROUP:
292 	case ILBD_REM_SERVER_FROM_GROUP:
293 		cmd_sz += sizeof (ilb_sg_info_t);
294 		break;
295 	case ILBD_SRV_ADDR2ID:
296 	case ILBD_SRV_ID2ADDR:
297 		cmd_sz += sizeof (ilb_sg_info_t) + sizeof (ilb_sg_srv_t);
298 		break;
299 	case ILBD_CREATE_RULE:
300 		cmd_sz += sizeof (ilb_rule_info_t);
301 		break;
302 	case ILBD_CREATE_HC:
303 		cmd_sz += sizeof (ilb_hc_info_t);
304 		break;
305 	case ILBD_SHOW_NAT:
306 	case ILBD_SHOW_PERSIST:
307 		cmd_sz += sizeof (ilb_show_info_t);
308 		break;
309 	}
310 
311 	return (cmd_sz);
312 }
313 
314 /*
315  * Given a request and its size, check that the size is big enough to
316  * contain the variable part of a request.
317  */
318 static ilb_status_t
319 ilbd_check_req_size(ilb_comm_t *ic, size_t ic_sz)
320 {
321 	ilb_status_t rc = ILB_STATUS_OK;
322 	ilb_sg_info_t *sg_info;
323 	ilbd_namelist_t *nlist;
324 
325 	switch (ic->ic_cmd) {
326 	case ILBD_CREATE_SERVERGROUP:
327 	case ILBD_ENABLE_SERVER:
328 	case ILBD_DISABLE_SERVER:
329 	case ILBD_ADD_SERVER_TO_GROUP:
330 	case ILBD_REM_SERVER_FROM_GROUP:
331 		sg_info = (ilb_sg_info_t *)&ic->ic_data;
332 
333 		if (ic_sz < ilbd_cmd_size(ic) + sg_info->sg_srvcount *
334 		    sizeof (ilb_sg_srv_t)) {
335 			rc = ILB_STATUS_EINVAL;
336 		}
337 		break;
338 	case ILBD_ENABLE_RULE:
339 	case ILBD_DISABLE_RULE:
340 	case ILBD_DESTROY_RULE:
341 		nlist = (ilbd_namelist_t *)&ic->ic_data;
342 
343 		if (ic_sz < ilbd_cmd_size(ic) + nlist->ilbl_count *
344 		    sizeof (ilbd_name_t)) {
345 			rc = ILB_STATUS_EINVAL;
346 		}
347 		break;
348 	}
349 	return (rc);
350 }
351 
352 /*
353  * this function *relies* on a complete message/data struct
354  * being passed in (currently via the SOCK_SEQPACKET socket type).
355  *
356  * Note that the size of ip is at most ILBD_MSG_SIZE.
357  */
358 static ilb_status_t
359 consume_common_struct(ilb_comm_t *ic, size_t ic_sz, ilbd_client_t *cli,
360     int ev_port)
361 {
362 	ilb_status_t	rc;
363 	struct passwd	*ps;
364 	size_t		rbufsz;
365 	ssize_t		ret;
366 	boolean_t	standard_reply = B_TRUE;
367 	ilbd_name_t	name;
368 
369 	/*
370 	 * cli_ev must be overridden during handling of individual commands,
371 	 * if there's a special need; otherwise, leave this for
372 	 * the "default" case
373 	 */
374 	cli->cli_ev = ILBD_EVENT_REQ;
375 
376 	ps = &cli->cli_pw;
377 	rbufsz = ILBD_MSG_SIZE;
378 
379 	/* Sanity check on the size of the static part of a request. */
380 	if (ic_sz < ilbd_cmd_size(ic)) {
381 		rc = ILB_STATUS_EINVAL;
382 		goto out;
383 	}
384 
385 	switch (ic->ic_cmd) {
386 	case ILBD_CREATE_SERVERGROUP: {
387 		ilb_sg_info_t sg_info;
388 
389 		/*
390 		 * ilbd_create_sg() only needs the sg_name field.  But it
391 		 * takes in a ilb_sg_info_t because it is used as a callback
392 		 * in ilbd_walk_sg_pgs().
393 		 */
394 		(void) strlcpy(sg_info.sg_name, (char *)&(ic->ic_data),
395 		    sizeof (sg_info.sg_name));
396 		rc = ilbd_create_sg(&sg_info, ev_port, ps,
397 		    cli->cli_peer_ucredp);
398 		break;
399 	}
400 
401 	case ILBD_DESTROY_SERVERGROUP:
402 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
403 		rc = ilbd_destroy_sg(name, ps, cli->cli_peer_ucredp);
404 		break;
405 
406 	case ILBD_ADD_SERVER_TO_GROUP:
407 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
408 			break;
409 		rc = ilbd_add_server_to_group((ilb_sg_info_t *)&ic->ic_data,
410 		    ev_port, ps, cli->cli_peer_ucredp);
411 		break;
412 
413 	case ILBD_REM_SERVER_FROM_GROUP:
414 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
415 			break;
416 		rc = ilbd_rem_server_from_group((ilb_sg_info_t *)&ic->ic_data,
417 		    ev_port, ps, cli->cli_peer_ucredp);
418 		break;
419 
420 	case ILBD_ENABLE_SERVER:
421 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
422 			break;
423 		rc = ilbd_enable_server((ilb_sg_info_t *)&ic->ic_data, ps,
424 		    cli->cli_peer_ucredp);
425 		break;
426 
427 	case ILBD_DISABLE_SERVER:
428 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
429 			break;
430 		rc = ilbd_disable_server((ilb_sg_info_t *)&ic->ic_data, ps,
431 		    cli->cli_peer_ucredp);
432 		break;
433 
434 	case ILBD_SRV_ADDR2ID:
435 		rc = ilbd_address_to_srvID((ilb_sg_info_t *)&ic->ic_data,
436 		    reply_buf, &rbufsz);
437 		if (rc == ILB_STATUS_OK)
438 			standard_reply = B_FALSE;
439 		break;
440 
441 	case ILBD_SRV_ID2ADDR:
442 		rc = ilbd_srvID_to_address((ilb_sg_info_t *)&ic->ic_data,
443 		    reply_buf, &rbufsz);
444 		if (rc == ILB_STATUS_OK)
445 			standard_reply = B_FALSE;
446 		break;
447 
448 	case ILBD_RETRIEVE_SG_HOSTS:
449 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
450 		rc = ilbd_retrieve_sg_hosts(name, reply_buf, &rbufsz);
451 		if (rc == ILB_STATUS_OK)
452 			standard_reply = B_FALSE;
453 		break;
454 
455 	case ILBD_RETRIEVE_SG_NAMES:
456 	case ILBD_RETRIEVE_RULE_NAMES:
457 	case ILBD_RETRIEVE_HC_NAMES:
458 		rc = ilbd_retrieve_names(ic->ic_cmd, reply_buf, &rbufsz);
459 		if (rc == ILB_STATUS_OK)
460 			standard_reply = B_FALSE;
461 		break;
462 
463 	case ILBD_CREATE_RULE:
464 		rc = ilbd_create_rule((ilb_rule_info_t *)&ic->ic_data, ev_port,
465 		    ps, cli->cli_peer_ucredp);
466 		break;
467 
468 	case ILBD_DESTROY_RULE:
469 		/* Copy the name to ensure that name is NULL terminated. */
470 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
471 		rc = ilbd_destroy_rule(name, ps, cli->cli_peer_ucredp);
472 		break;
473 
474 	case ILBD_ENABLE_RULE:
475 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
476 		rc = ilbd_enable_rule(name, ps, cli->cli_peer_ucredp);
477 		break;
478 
479 	case ILBD_DISABLE_RULE:
480 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
481 		rc = ilbd_disable_rule(name, ps, cli->cli_peer_ucredp);
482 		break;
483 
484 	case ILBD_RETRIEVE_RULE:
485 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
486 		rc = ilbd_retrieve_rule(name, reply_buf, &rbufsz);
487 		if (rc == ILB_STATUS_OK)
488 			standard_reply = B_FALSE;
489 		break;
490 
491 	case ILBD_CREATE_HC:
492 		rc = ilbd_create_hc((ilb_hc_info_t *)&ic->ic_data, ev_port, ps,
493 		    cli->cli_peer_ucredp);
494 		break;
495 
496 	case ILBD_DESTROY_HC:
497 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
498 		rc = ilbd_destroy_hc(name, ps, cli->cli_peer_ucredp);
499 		break;
500 
501 	case ILBD_GET_HC_INFO:
502 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
503 		rc = ilbd_get_hc_info(name, reply_buf, &rbufsz);
504 		if (rc == ILB_STATUS_OK)
505 			standard_reply = B_FALSE;
506 		break;
507 
508 	case ILBD_GET_HC_SRVS:
509 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
510 		rc = ilbd_get_hc_srvs(name, reply_buf, &rbufsz);
511 		if (rc == ILB_STATUS_OK)
512 			standard_reply = B_FALSE;
513 		break;
514 
515 	case ILBD_SHOW_NAT:
516 		rc = ilbd_show_nat(cli, ic, reply_buf, &rbufsz);
517 		if (rc == ILB_STATUS_OK)
518 			standard_reply = B_FALSE;
519 		break;
520 
521 	case ILBD_SHOW_PERSIST:
522 		rc = ilbd_show_sticky(cli, ic, reply_buf, &rbufsz);
523 		if (rc == ILB_STATUS_OK)
524 			standard_reply = B_FALSE;
525 		break;
526 
527 	default:
528 		logdebug("consume_common_struct: unknown command");
529 		rc = ILB_STATUS_INVAL_CMD;
530 		break;
531 	}
532 
533 out:
534 	/*
535 	 * The message exchange is always in pairs, request/response.  If
536 	 * a transaction requires multiple exchanges, the client will send
537 	 * in multiple requests to get multiple responses.  The show-nat and
538 	 * show-persist request are examples of this.  The end of transaction
539 	 * is marked with ic_flags set to ILB_COMM_END.
540 	 */
541 
542 	/* This is the standard reply. */
543 	if (standard_reply) {
544 		if (rc == ILB_STATUS_OK)
545 			ilbd_reply_ok(reply_buf, &rbufsz);
546 		else
547 			ilbd_reply_err(reply_buf, &rbufsz, rc);
548 	}
549 
550 	if ((ret = send(cli->cli_sd, reply_buf, rbufsz, 0)) != rbufsz) {
551 		if (ret == -1) {
552 			if (errno != EWOULDBLOCK) {
553 				logdebug("consume_common_struct: send: %s",
554 				    strerror(errno));
555 				rc = ILB_STATUS_SEND;
556 				goto err_out;
557 			}
558 			/*
559 			 * The reply is blocked, save the reply.  handle_req()
560 			 * will associate the event port for the re-send.
561 			 */
562 			assert(cli->cli_saved_reply == NULL);
563 			if ((cli->cli_saved_reply = malloc(rbufsz)) == NULL) {
564 				/*
565 				 * Set the error to ILB_STATUS_SEND so that
566 				 * handle_req() will free the client.
567 				 */
568 				logdebug("consume_common_struct: failure to "
569 				    "allocate memory to save reply");
570 				rc = ILB_STATUS_SEND;
571 				goto err_out;
572 			}
573 			bcopy(reply_buf, cli->cli_saved_reply, rbufsz);
574 			cli->cli_saved_size = rbufsz;
575 			return (ILB_STATUS_EWOULDBLOCK);
576 		}
577 	}
578 err_out:
579 	return (rc);
580 }
581 
582 /*
583  * Accept a new client request.  A struct ilbd_client_t is allocated to
584  * store the client info.  The accepted socket is port_associate() with
585  * the given port.  And the allocated ilbd_client_t struct is passed as
586  * the user pointer.
587  */
588 static void
589 new_req(int ev_port, int listener, void *ev_obj)
590 {
591 	struct sockaddr	sa;
592 	int		sa_len;
593 	int		new_sd;
594 	int		sflags;
595 	ilbd_client_t	*cli = NULL;
596 	int		res;
597 	uid_t		uid;
598 
599 	sa_len = sizeof (sa);
600 	if ((new_sd = accept(listener, &sa, &sa_len)) == -1) {
601 		/* don't log if we're out of file descriptors */
602 		if (errno != EINTR && errno != EMFILE)
603 			logperror("new_req: accept failed");
604 		goto done;
605 	}
606 
607 	/* Set the new socket to be non-blocking. */
608 	if ((sflags = fcntl(new_sd, F_GETFL, 0)) == -1) {
609 		logperror("new_req: fcntl(F_GETFL)");
610 		goto clean_up;
611 	}
612 	if (fcntl(new_sd, F_SETFL, sflags | O_NONBLOCK) == -1) {
613 		logperror("new_req: fcntl(F_SETFL)");
614 		goto clean_up;
615 	}
616 	if (fcntl(new_sd, F_SETFD, FD_CLOEXEC) == -1) {
617 		logperror("new_req: fcntl(FD_CLOEXEC)");
618 		goto clean_up;
619 	}
620 	if ((cli = calloc(1, sizeof (ilbd_client_t))) == NULL) {
621 		logerr("new_req: malloc(ilbd_client_t)");
622 		goto clean_up;
623 	}
624 	res = getpeerucred(new_sd, &cli->cli_peer_ucredp);
625 	if (res == -1) {
626 		logperror("new_req: getpeerucred failed");
627 		goto clean_up;
628 	}
629 	if ((uid = ucred_getruid(cli->cli_peer_ucredp)) == (uid_t)-1) {
630 		logperror("new_req: ucred_getruid failed");
631 		goto clean_up;
632 	}
633 	cli->cli_pw_bufsz = (size_t)sysconf(_SC_GETPW_R_SIZE_MAX);
634 	if ((cli->cli_pw_buf = malloc(cli->cli_pw_bufsz)) == NULL) {
635 		logerr("new_req: malloc(cli_pw_buf)");
636 		goto clean_up;
637 	}
638 	if (getpwuid_r(uid, &cli->cli_pw, cli->cli_pw_buf,
639 	    cli->cli_pw_bufsz) == NULL) {
640 		logperror("new_req: invalid user");
641 		goto clean_up;
642 	}
643 	cli->cli_ev = ILBD_EVENT_REQ;
644 	cli->cli_sd = new_sd;
645 	cli->cli_cmd = ILBD_BAD_CMD;
646 	cli->cli_saved_reply = NULL;
647 	cli->cli_saved_size = 0;
648 	if (port_associate(ev_port, PORT_SOURCE_FD, new_sd, POLLRDNORM,
649 	    cli) == -1) {
650 		logperror("new_req: port_associate(cli) failed");
651 clean_up:
652 		if (cli != NULL) {
653 			if (cli->cli_peer_ucredp != NULL)
654 				ucred_free(cli->cli_peer_ucredp);
655 			free(cli->cli_pw_buf);
656 			free(cli);
657 		}
658 		(void) close(new_sd);
659 	}
660 
661 done:
662 	/* Re-associate the listener with the event port. */
663 	if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
664 	    ev_obj) == -1) {
665 		logperror("new_req: port_associate(listener) failed");
666 		exit(1);
667 	}
668 }
669 
670 static void
671 handle_req(int ev_port, ilbd_event_t event, ilbd_client_t *cli)
672 {
673 	/* All request should be smaller than ILBD_MSG_SIZE */
674 	union {
675 		ilb_comm_t	ic;
676 		uint32_t	buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
677 	} ic_u;
678 	int	rc = ILB_STATUS_OK;
679 	ssize_t	r;
680 
681 	if (event == ILBD_EVENT_REQ) {
682 		/*
683 		 * Something is wrong with the client since there is a
684 		 * pending reply, the client should not send us another
685 		 * request.  Kill this client.
686 		 */
687 		if (cli->cli_saved_reply != NULL) {
688 			logerr("handle_req: misbehaving client, more than one "
689 			    "outstanding request");
690 			rc = ILB_STATUS_INTERNAL;
691 			goto err_out;
692 		}
693 
694 		/*
695 		 * Our socket is message based so we should be able
696 		 * to get the request in one single read.
697 		 */
698 		r = recv(cli->cli_sd, (void *)ic_u.buf, sizeof (ic_u.buf), 0);
699 		if (r < 0) {
700 			if (errno != EINTR) {
701 				logperror("handle_req: read failed");
702 				rc = ILB_STATUS_READ;
703 				goto err_out;
704 			}
705 			/*
706 			 * If interrupted, just re-associate the cli_sd
707 			 * with the port.
708 			 */
709 			goto done;
710 		}
711 		cli->cli_cmd = ic_u.ic.ic_cmd;
712 
713 		rc = consume_common_struct(&ic_u.ic, r, cli, ev_port);
714 		if (rc == ILB_STATUS_EWOULDBLOCK)
715 			goto blocked;
716 		/* Fatal error communicating with client, free it. */
717 		if (rc == ILB_STATUS_SEND)
718 			goto err_out;
719 	} else {
720 		assert(event == ILBD_EVENT_REP_OK);
721 		assert(cli->cli_saved_reply != NULL);
722 
723 		/*
724 		 * The reply to client was previously blocked, we will
725 		 * send again.
726 		 */
727 		if (send(cli->cli_sd, cli->cli_saved_reply,
728 		    cli->cli_saved_size, 0) != cli->cli_saved_size) {
729 			if (errno != EWOULDBLOCK) {
730 				logdebug("handle_req: send: %s",
731 				    strerror(errno));
732 				rc = ILB_STATUS_SEND;
733 				goto err_out;
734 			}
735 			goto blocked;
736 		}
737 		free(cli->cli_saved_reply);
738 		cli->cli_saved_reply = NULL;
739 		cli->cli_saved_size = 0;
740 	}
741 done:
742 	/* Re-associate with the event port for more requests. */
743 	cli->cli_ev = ILBD_EVENT_REQ;
744 	if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd,
745 	    POLLRDNORM, cli) == -1) {
746 		logperror("handle_req: port_associate(POLLRDNORM)");
747 		rc = ILB_STATUS_INTERNAL;
748 		goto err_out;
749 	}
750 	return;
751 
752 blocked:
753 	/* Re-associate with the event port. */
754 	cli->cli_ev = ILBD_EVENT_REP_OK;
755 	if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd, POLLWRNORM,
756 	    cli) == -1) {
757 		logperror("handle_req: port_associate(POLLWRNORM)");
758 		rc = ILB_STATUS_INTERNAL;
759 		goto err_out;
760 	}
761 	return;
762 
763 err_out:
764 	ilbd_free_cli(cli);
765 }
766 
767 static void
768 i_ilbd_read_config(int ev_port)
769 {
770 	logdebug("i_ilbd_read_config: port %d", ev_port);
771 	(void) ilbd_walk_sg_pgs(ilbd_create_sg, &ev_port, NULL);
772 	(void) ilbd_walk_hc_pgs(ilbd_create_hc, &ev_port, NULL);
773 	(void) ilbd_walk_rule_pgs(ilbd_create_rule, &ev_port, NULL);
774 }
775 
776 /*
777  * main event loop for ilbd
778  * asserts that argument 'listener' is a server socket ready to accept() on.
779  */
780 static void
781 main_loop(int listener)
782 {
783 	port_event_t		p_ev;
784 	int			ev_port, ev_port_obj;
785 	ilbd_event_obj_t	ev_obj;
786 	ilbd_timer_event_obj_t	timer_ev_obj;
787 
788 	ev_port = port_create();
789 	if (ev_port == -1) {
790 		logperror("main_loop: port_create failed");
791 		exit(-1);
792 	}
793 	ilbd_hc_timer_init(ev_port, &timer_ev_obj);
794 
795 	ev_obj.ev = ILBD_EVENT_NEW_REQ;
796 	if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
797 	    &ev_obj) == -1) {
798 		logperror("main_loop: port_associate failed");
799 		exit(1);
800 	}
801 
802 	i_ilbd_read_config(ev_port);
803 	ilbd_hc_timer_update(&timer_ev_obj);
804 
805 	_NOTE(CONSTCOND)
806 	while (B_TRUE) {
807 		int r;
808 		ilbd_event_t event;
809 		ilbd_client_t *cli;
810 
811 		r = port_get(ev_port, &p_ev, NULL);
812 		if (r == -1) {
813 			if (errno == EINTR)
814 				continue;
815 			logperror("main_loop: port_get failed");
816 			break;
817 		}
818 
819 		ev_port_obj = p_ev.portev_object;
820 		event = ((ilbd_event_obj_t *)p_ev.portev_user)->ev;
821 
822 		switch (event) {
823 		case ILBD_EVENT_TIMER:
824 			ilbd_hc_timeout();
825 			break;
826 
827 		case ILBD_EVENT_PROBE:
828 			ilbd_hc_probe_return(ev_port, ev_port_obj,
829 			    p_ev.portev_events,
830 			    (ilbd_hc_probe_event_t *)p_ev.portev_user);
831 			break;
832 
833 		case ILBD_EVENT_NEW_REQ:
834 			assert(ev_port_obj == listener);
835 			/*
836 			 * An error happens in the listener.  Exit
837 			 * for now....
838 			 */
839 			if (p_ev.portev_events & (POLLHUP|POLLERR)) {
840 				logerr("main_loop: listener error");
841 				exit(1);
842 			}
843 			new_req(ev_port, ev_port_obj, &ev_obj);
844 			break;
845 
846 		case ILBD_EVENT_REP_OK:
847 		case ILBD_EVENT_REQ:
848 			cli = (ilbd_client_t *)p_ev.portev_user;
849 			assert(ev_port_obj == cli->cli_sd);
850 
851 			/*
852 			 * An error happens in the newly accepted
853 			 * client request.  Clean up the client.
854 			 * this also happens when client closes socket,
855 			 * so not necessarily a reason for alarm
856 			 */
857 			if (p_ev.portev_events & (POLLHUP|POLLERR)) {
858 				ilbd_free_cli(cli);
859 				break;
860 			}
861 
862 			handle_req(ev_port, event, cli);
863 			break;
864 
865 		default:
866 			logerr("main_loop: unknown event %d", event);
867 			exit(EXIT_FAILURE);
868 			break;
869 		}
870 
871 		ilbd_hc_timer_update(&timer_ev_obj);
872 	}
873 }
874 
875 static void
876 i_ilbd_setup_lists(void)
877 {
878 	i_setup_sg_hlist();
879 	i_setup_rule_hlist();
880 	i_ilbd_setup_hc_list();
881 }
882 
883 /*
884  * Usage message - call only during startup. it will print its
885  * message on stderr and exit
886  */
887 static void
888 Usage(char *name)
889 {
890 	(void) fprintf(stderr, gettext("Usage: %s [-d|--debug]\n"), name);
891 	exit(1);
892 }
893 
894 static void
895 print_version(char *name)
896 {
897 	(void) printf("%s %s\n", basename(name), ILBD_VERSION);
898 	(void) printf(gettext(ILBD_COPYRIGHT));
899 	exit(0);
900 }
901 
902 /*
903  * Increase the file descriptor limit for handling a lot of health check
904  * processes (each requires a pipe).
905  *
906  * Note that this function is called before ilbd becomes a daemon.  So
907  * we call perror(3C) to print out error message directly so that SMF
908  * can catch them.
909  */
910 static void
911 set_rlim(void)
912 {
913 	struct rlimit rlp;
914 
915 	if (getrlimit(RLIMIT_NOFILE, &rlp) == -1) {
916 		perror("ilbd: getrlimit");
917 		exit(errno);
918 	}
919 	rlp.rlim_cur = rlp.rlim_max;
920 	if (setrlimit(RLIMIT_NOFILE, &rlp) == -1) {
921 		perror("ilbd: setrlimit");
922 		exit(errno);
923 	}
924 }
925 
926 int
927 main(int argc, char **argv)
928 {
929 	int	s;
930 	int	c;
931 
932 	(void) setlocale(LC_ALL, "");
933 #if !defined(TEXT_DOMAIN)
934 #define	TEXT_DOMAIN "SYS_TEST"
935 #endif
936 	static const char daemon_dir[] = DAEMON_DIR;
937 
938 	(void) textdomain(TEXT_DOMAIN);
939 
940 	while ((c = getopt(argc, argv, ":V?d(debug)")) != -1) {
941 		switch ((char)c) {
942 		case '?': Usage(argv[0]);
943 			/* not reached */
944 			break;
945 		case 'V': print_version(argv[0]);
946 			/* not reached */
947 			break;
948 		case 'd': ilbd_enable_debug();
949 			break;
950 		default: Usage(argv[0]);
951 			/* not reached */
952 			break;
953 		}
954 	}
955 
956 	/*
957 	 * Whenever the daemon starts, it needs to start with a clean
958 	 * slate in the kernel. We need sys_ip_config privilege for
959 	 * this.
960 	 */
961 	ilbd_reset_kernel_state();
962 
963 	/* Increase the limit on the number of file descriptors. */
964 	set_rlim();
965 
966 	/*
967 	 * ilbd daemon starts off as root, just so it can create
968 	 * /var/run/daemon if one does not exist. After that is done
969 	 * the daemon switches to "daemon" uid. This is similar to what
970 	 * rpcbind does.
971 	 */
972 	if (mkdir(daemon_dir, DAEMON_DIR_MODE) == 0 || errno == EEXIST) {
973 		(void) chmod(daemon_dir, DAEMON_DIR_MODE);
974 		(void) chown(daemon_dir, DAEMON_UID, DAEMON_GID);
975 	} else {
976 		perror("main: mkdir failed");
977 		exit(errno);
978 	}
979 	/*
980 	 * Now lets switch ilbd as uid = daemon, gid = daemon with a
981 	 * trimmed down privilege set
982 	 */
983 	if (__init_daemon_priv(PU_RESETGROUPS | PU_LIMITPRIVS | PU_INHERITPRIVS,
984 	    DAEMON_UID, DAEMON_GID, PRIV_PROC_OWNER, PRIV_PROC_AUDIT,
985 	    PRIV_NET_ICMPACCESS, PRIV_SYS_IP_CONFIG, NULL) == -1) {
986 		(void) fprintf(stderr, "Insufficient privileges\n");
987 		exit(EXIT_FAILURE);
988 	}
989 
990 	/*
991 	 * Opens a PF_UNIX socket to the client. No privilege needed
992 	 * for this.
993 	 */
994 	s = ilbd_create_client_socket();
995 
996 	/*
997 	 * Daemonify if ilbd is not running with -d option
998 	 * Need proc_fork privilege for this
999 	 */
1000 	if (!is_debugging_on()) {
1001 		logdebug("daemonizing...");
1002 		if (daemon(0, 0) != 0) {
1003 			logperror("daemon failed");
1004 			exit(EXIT_FAILURE);
1005 		}
1006 	}
1007 	(void) priv_set(PRIV_OFF, PRIV_INHERITABLE, PRIV_PROC_OWNER,
1008 	    PRIV_PROC_AUDIT, NULL);
1009 
1010 	/* if daemonified then set up syslog */
1011 	if (!is_debugging_on())
1012 		openlog("ilbd", LOG_PID, LOG_DAEMON);
1013 
1014 	i_ilbd_setup_lists();
1015 
1016 	main_loop(s);
1017 
1018 	/*
1019 	 * if we come here, then we experienced an error or a shutdown
1020 	 * indicator, so clean up after ourselves.
1021 	 */
1022 	logdebug("main(): terminating");
1023 
1024 	(void) remove(SOCKET_PATH);
1025 	ilbd_reset_kernel_state();
1026 
1027 	return (0);
1028 }
1029