xref: /titanic_44/usr/src/cmd/cmd-inet/usr.lib/ilbd/ilbd_main.c (revision 1d9cde1dcd9c3d71413dae0f9e9b3845a667cd9c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * The core of ilbd daemon is a single-threaded event loop using
28  * event completion framework; it receives requests from client using
29  * the libilb functions, handles timeouts, initiates health checks, and
30  * populates the kernel state.
31  *
32  * The daemon has the following privileges (in addition to the basic ones):
33  *
34  * 	PRIV_PROC_OWNER, PRIV_NET_ICMPACCESS,
35  *	PRIV_SYS_IP_CONFIG, PRIV_PROC_AUDIT
36  *
37  * The aforementioned  privileges will be specified in the SMF manifest.
38  *
39  * AF_UNIX socket is used for IPC between libilb and this daemon as
40  * both processes will run on the same machine.
41  *
42  * To do health check, the daemon will create a timer for every health
43  * check probe. Each of these timers will be  associated with the
44  * event port. When a timer goes off, the daemon will initiate a
45  * pipe to a separate process to execute the specific health check
46  * probe. This new process will run with the same user-id as that of
47  * ilbd daemon and will inherit all the privileges from the ilbd
48  * daemon parent process except the following:
49  *
50  * PRIV_PROC_OWNER, PRIV_PROC_AUDIT
51  *
52  * All health checks, will be implemented as external methods
53  * (binary or script). The following arguments will be passed
54  * to external methods:
55  *
56  *	$1	VIP (literal IPv4 or IPv6 address)
57  *	$2	Server IP (literal IPv4 or IPv6 address)
58  *	$3	Protocol (UDP, TCP as a string)
59  *	$4	The load balance mode, "DSR", "NAT", "HALF_NAT"
60  *	$5	Numeric port range
61  *	$6	maximum time (in seconds) the method
62  * should wait before returning failure. If the method runs for
63  * longer, it may be killed, and the test considered failed.
64  *
65  * Upon success, a health check method should print the RTT to the
66  * it finds to its STDOUT for ilbd to consume.  The implicit unit
67  * is microseconds but only the number needs to be printed.  If it
68  * cannot find the RTT, it should print 0.  If the method decides
69  * that the server is dead, it should print -1 to its STDOUT.
70  *
71  * By default, an user-supplied health check probe process will
72  * also run with the same set of privileges as ILB's built-in
73  * probes.  If the administrator has an user-supplied health check
74  * program that requires a larger privilege set, he/she will have
75  * to implement setuid program.
76  *
77  * Each health check will have a timeout, such that if the health
78  * check process is hung, it will be killed after the timeout interval
79  * and the daemon will notify the kernel ILB engine of the server's
80  * unresponsiveness, so that load distribution can be appropriately
81  * adjusted.  If on the other hand the health check is successful
82  * the timeout timer is cancelled.
83  */
84 
85 #include <stdio.h>
86 #include <stdlib.h>
87 #include <strings.h>
88 #include <libgen.h>
89 #include <fcntl.h>
90 #include <stddef.h>
91 #include <signal.h>
92 #include <port.h>
93 #include <ctype.h>
94 #include <sys/types.h>
95 #include <sys/wait.h>
96 #include <sys/stat.h>
97 #include <sys/note.h>
98 #include <sys/resource.h>
99 #include <unistd.h>
100 #include <sys/socket.h>
101 #include <errno.h>
102 #include <ucred.h>
103 #include <priv_utils.h>
104 #include <net/if.h>
105 #include <libilb.h>
106 #include <assert.h>
107 #include <inet/ilb.h>
108 #include <libintl.h>
109 #include <fcntl.h>
110 #include <rpcsvc/daemon_utils.h>
111 #include "libilb_impl.h"
112 #include "ilbd.h"
113 
114 /*
115  * NOTE: The following needs to be kept up to date.
116  */
117 #define	ILBD_VERSION	"1.0"
118 #define	ILBD_COPYRIGHT	\
119 	"Copyright (c) 2005, 2010, Oracle and/or its affiliates. " \
120 	"All rights reserved.\n"
121 
122 /*
123  * Global reply buffer to client request.  Note that ilbd is single threaded,
124  * so a global buffer is OK.  If ilbd becomes multi-threaded, this needs to
125  * be changed.
126  */
127 static uint32_t reply_buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
128 
129 static void
130 ilbd_free_cli(ilbd_client_t *cli)
131 {
132 	(void) close(cli->cli_sd);
133 	if (cli->cli_cmd == ILBD_SHOW_NAT)
134 		ilbd_show_nat_cleanup();
135 	if (cli->cli_cmd == ILBD_SHOW_PERSIST)
136 		ilbd_show_sticky_cleanup();
137 	if (cli->cli_saved_reply != NULL)
138 		free(cli->cli_saved_reply);
139 	if (cli->cli_peer_ucredp != NULL)
140 		ucred_free(cli->cli_peer_ucredp);
141 	free(cli->cli_pw_buf);
142 	free(cli);
143 }
144 
145 static void
146 ilbd_reset_kernel_state(void)
147 {
148 	ilb_status_t	rc;
149 	ilb_name_cmd_t	kcmd;
150 
151 	kcmd.cmd = ILB_DESTROY_RULE;
152 	kcmd.flags = ILB_RULE_ALLRULES;
153 	kcmd.name[0] = '\0';
154 
155 	rc = do_ioctl(&kcmd, 0);
156 	if (rc != ILB_STATUS_OK)
157 		logdebug("ilbd_reset_kernel_state: do_ioctl failed: %s",
158 		    strerror(errno));
159 }
160 
161 /* Signal handler to do clean up. */
162 /* ARGSUSED */
163 static void
164 ilbd_cleanup(int sig)
165 {
166 	(void) remove(SOCKET_PATH);
167 	ilbd_reset_kernel_state();
168 	exit(0);
169 }
170 
171 /*
172  * Create a socket and return it to caller.  If there is a failure, this
173  * function calls exit(2).  Hence it always returns a valid listener socket.
174  *
175  * Note that this function is called before ilbd becomes a daemon.  So
176  * we call perror(3C) to print out error message directly so that SMF can
177  * catch them.
178  */
179 static int
180 ilbd_create_client_socket(void)
181 {
182 	int			s;
183 	mode_t			omask;
184 	struct sockaddr_un	sa;
185 	int			sobufsz;
186 
187 	s = socket(PF_UNIX, SOCK_SEQPACKET, 0);
188 	if (s == -1) {
189 		perror("ilbd_create_client_socket: socket to"
190 		    " client failed");
191 		exit(errno);
192 	}
193 	if (fcntl(s, F_SETFD, FD_CLOEXEC) == -1) {
194 		perror("ilbd_create_client_socket: fcntl(FD_CLOEXEC)");
195 		exit(errno);
196 	}
197 
198 	sobufsz = ILBD_MSG_SIZE;
199 	if (setsockopt(s, SOL_SOCKET, SO_SNDBUF, &sobufsz,
200 	    sizeof (sobufsz)) != 0) {
201 		perror("ilbd_creat_client_socket: setsockopt(SO_SNDBUF) "
202 		    "failed");
203 		exit(errno);
204 	}
205 	if (setsockopt(s, SOL_SOCKET, SO_RCVBUF, &sobufsz,
206 	    sizeof (sobufsz)) != 0) {
207 		perror("ilbd_creat_client_socket: setsockopt(SO_RCVBUF) "
208 		    "failed");
209 		exit(errno);
210 	}
211 
212 	/*
213 	 * since everybody can talk to us, we need to open up permissions
214 	 * we check peer privileges on a per-operation basis.
215 	 * This is no security issue as long as we're single-threaded.
216 	 */
217 	omask = umask(0);
218 
219 	/* just in case we didn't clean up properly after last exit */
220 	(void) remove(SOCKET_PATH);
221 
222 	bzero(&sa, sizeof (sa));
223 	sa.sun_family = AF_UNIX;
224 	(void) strlcpy(sa.sun_path, SOCKET_PATH, sizeof (sa.sun_path));
225 
226 	if (bind(s, (struct sockaddr *)&sa, sizeof (sa)) != 0) {
227 		perror("ilbd_create_client_socket(): bind to client"
228 		    " socket failed");
229 		exit(errno);
230 	}
231 
232 	/* re-instate old umask */
233 	(void) umask(omask);
234 
235 #define	QLEN	16
236 
237 	if (listen(s, QLEN) != 0) {
238 		perror("ilbd_create_client_socket: listen to client"
239 		    " socket failed");
240 		exit(errno);
241 	}
242 
243 	(void) signal(SIGHUP, SIG_IGN);
244 	(void) signal(SIGPIPE, SIG_IGN);
245 	(void) signal(SIGSTOP, SIG_IGN);
246 	(void) signal(SIGTSTP, SIG_IGN);
247 	(void) signal(SIGTTIN, SIG_IGN);
248 	(void) signal(SIGTTOU, SIG_IGN);
249 
250 	(void) signal(SIGINT, ilbd_cleanup);
251 	(void) signal(SIGTERM, ilbd_cleanup);
252 	(void) signal(SIGQUIT, ilbd_cleanup);
253 
254 	return (s);
255 }
256 
257 /*
258  * Return the minimum size of a given request.  The returned size does not
259  * include the variable part of a request.
260  */
261 static size_t
262 ilbd_cmd_size(const ilb_comm_t *ic)
263 {
264 	size_t cmd_sz;
265 
266 	cmd_sz = sizeof (*ic);
267 	switch (ic->ic_cmd) {
268 	case ILBD_RETRIEVE_SG_NAMES:
269 	case ILBD_RETRIEVE_RULE_NAMES:
270 	case ILBD_RETRIEVE_HC_NAMES:
271 	case ILBD_CMD_OK:
272 		break;
273 	case ILBD_CMD_ERROR:
274 		cmd_sz += sizeof (ilb_status_t);
275 		break;
276 	case ILBD_RETRIEVE_SG_HOSTS:
277 	case ILBD_CREATE_SERVERGROUP:
278 	case ILBD_DESTROY_SERVERGROUP:
279 	case ILBD_DESTROY_RULE:
280 	case ILBD_ENABLE_RULE:
281 	case ILBD_DISABLE_RULE:
282 	case ILBD_RETRIEVE_RULE:
283 	case ILBD_DESTROY_HC:
284 	case ILBD_GET_HC_INFO:
285 	case ILBD_GET_HC_SRVS:
286 		cmd_sz += sizeof (ilbd_name_t);
287 		break;
288 	case ILBD_ENABLE_SERVER:
289 	case ILBD_DISABLE_SERVER:
290 	case ILBD_ADD_SERVER_TO_GROUP:
291 	case ILBD_REM_SERVER_FROM_GROUP:
292 		cmd_sz += sizeof (ilb_sg_info_t);
293 		break;
294 	case ILBD_SRV_ADDR2ID:
295 	case ILBD_SRV_ID2ADDR:
296 		cmd_sz += sizeof (ilb_sg_info_t) + sizeof (ilb_sg_srv_t);
297 		break;
298 	case ILBD_CREATE_RULE:
299 		cmd_sz += sizeof (ilb_rule_info_t);
300 		break;
301 	case ILBD_CREATE_HC:
302 		cmd_sz += sizeof (ilb_hc_info_t);
303 		break;
304 	case ILBD_SHOW_NAT:
305 	case ILBD_SHOW_PERSIST:
306 		cmd_sz += sizeof (ilb_show_info_t);
307 		break;
308 	}
309 
310 	return (cmd_sz);
311 }
312 
313 /*
314  * Given a request and its size, check that the size is big enough to
315  * contain the variable part of a request.
316  */
317 static ilb_status_t
318 ilbd_check_req_size(ilb_comm_t *ic, size_t ic_sz)
319 {
320 	ilb_status_t rc = ILB_STATUS_OK;
321 	ilb_sg_info_t *sg_info;
322 	ilbd_namelist_t *nlist;
323 
324 	switch (ic->ic_cmd) {
325 	case ILBD_CREATE_SERVERGROUP:
326 	case ILBD_ENABLE_SERVER:
327 	case ILBD_DISABLE_SERVER:
328 	case ILBD_ADD_SERVER_TO_GROUP:
329 	case ILBD_REM_SERVER_FROM_GROUP:
330 		sg_info = (ilb_sg_info_t *)&ic->ic_data;
331 
332 		if (ic_sz < ilbd_cmd_size(ic) + sg_info->sg_srvcount *
333 		    sizeof (ilb_sg_srv_t)) {
334 			rc = ILB_STATUS_EINVAL;
335 		}
336 		break;
337 	case ILBD_ENABLE_RULE:
338 	case ILBD_DISABLE_RULE:
339 	case ILBD_DESTROY_RULE:
340 		nlist = (ilbd_namelist_t *)&ic->ic_data;
341 
342 		if (ic_sz < ilbd_cmd_size(ic) + nlist->ilbl_count *
343 		    sizeof (ilbd_name_t)) {
344 			rc = ILB_STATUS_EINVAL;
345 		}
346 		break;
347 	}
348 	return (rc);
349 }
350 
351 /*
352  * this function *relies* on a complete message/data struct
353  * being passed in (currently via the SOCK_SEQPACKET socket type).
354  *
355  * Note that the size of ip is at most ILBD_MSG_SIZE.
356  */
357 static ilb_status_t
358 consume_common_struct(ilb_comm_t *ic, size_t ic_sz, ilbd_client_t *cli,
359     int ev_port)
360 {
361 	ilb_status_t	rc;
362 	struct passwd	*ps;
363 	size_t		rbufsz;
364 	ssize_t		ret;
365 	boolean_t	standard_reply = B_TRUE;
366 	ilbd_name_t	name;
367 
368 	/*
369 	 * cli_ev must be overridden during handling of individual commands,
370 	 * if there's a special need; otherwise, leave this for
371 	 * the "default" case
372 	 */
373 	cli->cli_ev = ILBD_EVENT_REQ;
374 
375 	ps = &cli->cli_pw;
376 	rbufsz = ILBD_MSG_SIZE;
377 
378 	/* Sanity check on the size of the static part of a request. */
379 	if (ic_sz < ilbd_cmd_size(ic)) {
380 		rc = ILB_STATUS_EINVAL;
381 		goto out;
382 	}
383 
384 	switch (ic->ic_cmd) {
385 	case ILBD_CREATE_SERVERGROUP: {
386 		ilb_sg_info_t sg_info;
387 
388 		/*
389 		 * ilbd_create_sg() only needs the sg_name field.  But it
390 		 * takes in a ilb_sg_info_t because it is used as a callback
391 		 * in ilbd_walk_sg_pgs().
392 		 */
393 		(void) strlcpy(sg_info.sg_name, (char *)&(ic->ic_data),
394 		    sizeof (sg_info.sg_name));
395 		rc = ilbd_create_sg(&sg_info, ev_port, ps,
396 		    cli->cli_peer_ucredp);
397 		break;
398 	}
399 
400 	case ILBD_DESTROY_SERVERGROUP:
401 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
402 		rc = ilbd_destroy_sg(name, ps, cli->cli_peer_ucredp);
403 		break;
404 
405 	case ILBD_ADD_SERVER_TO_GROUP:
406 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
407 			break;
408 		rc = ilbd_add_server_to_group((ilb_sg_info_t *)&ic->ic_data,
409 		    ev_port, ps, cli->cli_peer_ucredp);
410 		break;
411 
412 	case ILBD_REM_SERVER_FROM_GROUP:
413 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
414 			break;
415 		rc = ilbd_rem_server_from_group((ilb_sg_info_t *)&ic->ic_data,
416 		    ev_port, ps, cli->cli_peer_ucredp);
417 		break;
418 
419 	case ILBD_ENABLE_SERVER:
420 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
421 			break;
422 		rc = ilbd_enable_server((ilb_sg_info_t *)&ic->ic_data, ps,
423 		    cli->cli_peer_ucredp);
424 		break;
425 
426 	case ILBD_DISABLE_SERVER:
427 		if ((rc = ilbd_check_req_size(ic, ic_sz)) != ILB_STATUS_OK)
428 			break;
429 		rc = ilbd_disable_server((ilb_sg_info_t *)&ic->ic_data, ps,
430 		    cli->cli_peer_ucredp);
431 		break;
432 
433 	case ILBD_SRV_ADDR2ID:
434 		rc = ilbd_address_to_srvID((ilb_sg_info_t *)&ic->ic_data,
435 		    reply_buf, &rbufsz);
436 		if (rc == ILB_STATUS_OK)
437 			standard_reply = B_FALSE;
438 		break;
439 
440 	case ILBD_SRV_ID2ADDR:
441 		rc = ilbd_srvID_to_address((ilb_sg_info_t *)&ic->ic_data,
442 		    reply_buf, &rbufsz);
443 		if (rc == ILB_STATUS_OK)
444 			standard_reply = B_FALSE;
445 		break;
446 
447 	case ILBD_RETRIEVE_SG_HOSTS:
448 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
449 		rc = ilbd_retrieve_sg_hosts(name, reply_buf, &rbufsz);
450 		if (rc == ILB_STATUS_OK)
451 			standard_reply = B_FALSE;
452 		break;
453 
454 	case ILBD_RETRIEVE_SG_NAMES:
455 	case ILBD_RETRIEVE_RULE_NAMES:
456 	case ILBD_RETRIEVE_HC_NAMES:
457 		rc = ilbd_retrieve_names(ic->ic_cmd, reply_buf, &rbufsz);
458 		if (rc == ILB_STATUS_OK)
459 			standard_reply = B_FALSE;
460 		break;
461 
462 	case ILBD_CREATE_RULE:
463 		rc = ilbd_create_rule((ilb_rule_info_t *)&ic->ic_data, ev_port,
464 		    ps, cli->cli_peer_ucredp);
465 		break;
466 
467 	case ILBD_DESTROY_RULE:
468 		/* Copy the name to ensure that name is NULL terminated. */
469 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
470 		rc = ilbd_destroy_rule(name, ps, cli->cli_peer_ucredp);
471 		break;
472 
473 	case ILBD_ENABLE_RULE:
474 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
475 		rc = ilbd_enable_rule(name, ps, cli->cli_peer_ucredp);
476 		break;
477 
478 	case ILBD_DISABLE_RULE:
479 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
480 		rc = ilbd_disable_rule(name, ps, cli->cli_peer_ucredp);
481 		break;
482 
483 	case ILBD_RETRIEVE_RULE:
484 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
485 		rc = ilbd_retrieve_rule(name, reply_buf, &rbufsz);
486 		if (rc == ILB_STATUS_OK)
487 			standard_reply = B_FALSE;
488 		break;
489 
490 	case ILBD_CREATE_HC:
491 		rc = ilbd_create_hc((ilb_hc_info_t *)&ic->ic_data, ev_port, ps,
492 		    cli->cli_peer_ucredp);
493 		break;
494 
495 	case ILBD_DESTROY_HC:
496 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
497 		rc = ilbd_destroy_hc(name, ps, cli->cli_peer_ucredp);
498 		break;
499 
500 	case ILBD_GET_HC_INFO:
501 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
502 		rc = ilbd_get_hc_info(name, reply_buf, &rbufsz);
503 		if (rc == ILB_STATUS_OK)
504 			standard_reply = B_FALSE;
505 		break;
506 
507 	case ILBD_GET_HC_SRVS:
508 		(void) strlcpy(name, (char *)&(ic->ic_data), sizeof (name));
509 		rc = ilbd_get_hc_srvs(name, reply_buf, &rbufsz);
510 		if (rc == ILB_STATUS_OK)
511 			standard_reply = B_FALSE;
512 		break;
513 
514 	case ILBD_SHOW_NAT:
515 		rc = ilbd_show_nat(cli, ic, reply_buf, &rbufsz);
516 		if (rc == ILB_STATUS_OK)
517 			standard_reply = B_FALSE;
518 		break;
519 
520 	case ILBD_SHOW_PERSIST:
521 		rc = ilbd_show_sticky(cli, ic, reply_buf, &rbufsz);
522 		if (rc == ILB_STATUS_OK)
523 			standard_reply = B_FALSE;
524 		break;
525 
526 	default:
527 		logdebug("consume_common_struct: unknown command");
528 		rc = ILB_STATUS_INVAL_CMD;
529 		break;
530 	}
531 
532 out:
533 	/*
534 	 * The message exchange is always in pairs, request/response.  If
535 	 * a transaction requires multiple exchanges, the client will send
536 	 * in multiple requests to get multiple responses.  The show-nat and
537 	 * show-persist request are examples of this.  The end of transaction
538 	 * is marked with ic_flags set to ILB_COMM_END.
539 	 */
540 
541 	/* This is the standard reply. */
542 	if (standard_reply) {
543 		if (rc == ILB_STATUS_OK)
544 			ilbd_reply_ok(reply_buf, &rbufsz);
545 		else
546 			ilbd_reply_err(reply_buf, &rbufsz, rc);
547 	}
548 
549 	if ((ret = send(cli->cli_sd, reply_buf, rbufsz, 0)) != rbufsz) {
550 		if (ret == -1) {
551 			if (errno != EWOULDBLOCK) {
552 				logdebug("consume_common_struct: send: %s",
553 				    strerror(errno));
554 				rc = ILB_STATUS_SEND;
555 				goto err_out;
556 			}
557 			/*
558 			 * The reply is blocked, save the reply.  handle_req()
559 			 * will associate the event port for the re-send.
560 			 */
561 			assert(cli->cli_saved_reply == NULL);
562 			if ((cli->cli_saved_reply = malloc(rbufsz)) == NULL) {
563 				/*
564 				 * Set the error to ILB_STATUS_SEND so that
565 				 * handle_req() will free the client.
566 				 */
567 				logdebug("consume_common_struct: failure to "
568 				    "allocate memory to save reply");
569 				rc = ILB_STATUS_SEND;
570 				goto err_out;
571 			}
572 			bcopy(reply_buf, cli->cli_saved_reply, rbufsz);
573 			cli->cli_saved_size = rbufsz;
574 			return (ILB_STATUS_EWOULDBLOCK);
575 		}
576 	}
577 err_out:
578 	return (rc);
579 }
580 
581 /*
582  * Accept a new client request.  A struct ilbd_client_t is allocated to
583  * store the client info.  The accepted socket is port_associate() with
584  * the given port.  And the allocated ilbd_client_t struct is passed as
585  * the user pointer.
586  */
587 static void
588 new_req(int ev_port, int listener, void *ev_obj)
589 {
590 	struct sockaddr	sa;
591 	int		sa_len;
592 	int		new_sd;
593 	int		sflags;
594 	ilbd_client_t	*cli = NULL;
595 	int		res;
596 	uid_t		uid;
597 
598 	sa_len = sizeof (sa);
599 	if ((new_sd = accept(listener, &sa, &sa_len)) == -1) {
600 		/* don't log if we're out of file descriptors */
601 		if (errno != EINTR && errno != EMFILE)
602 			logperror("new_req: accept failed");
603 		goto done;
604 	}
605 
606 	/* Set the new socket to be non-blocking. */
607 	if ((sflags = fcntl(new_sd, F_GETFL, 0)) == -1) {
608 		logperror("new_req: fcntl(F_GETFL)");
609 		goto clean_up;
610 	}
611 	if (fcntl(new_sd, F_SETFL, sflags | O_NONBLOCK) == -1) {
612 		logperror("new_req: fcntl(F_SETFL)");
613 		goto clean_up;
614 	}
615 	if (fcntl(new_sd, F_SETFD, FD_CLOEXEC) == -1) {
616 		logperror("new_req: fcntl(FD_CLOEXEC)");
617 		goto clean_up;
618 	}
619 	if ((cli = calloc(1, sizeof (ilbd_client_t))) == NULL) {
620 		logerr("new_req: malloc(ilbd_client_t)");
621 		goto clean_up;
622 	}
623 	res = getpeerucred(new_sd, &cli->cli_peer_ucredp);
624 	if (res == -1) {
625 		logperror("new_req: getpeerucred failed");
626 		goto clean_up;
627 	}
628 	if ((uid = ucred_getruid(cli->cli_peer_ucredp)) == (uid_t)-1) {
629 		logperror("new_req: ucred_getruid failed");
630 		goto clean_up;
631 	}
632 	cli->cli_pw_bufsz = (size_t)sysconf(_SC_GETPW_R_SIZE_MAX);
633 	if ((cli->cli_pw_buf = malloc(cli->cli_pw_bufsz)) == NULL) {
634 		logerr("new_req: malloc(cli_pw_buf)");
635 		goto clean_up;
636 	}
637 	if (getpwuid_r(uid, &cli->cli_pw, cli->cli_pw_buf,
638 	    cli->cli_pw_bufsz) == NULL) {
639 		logperror("new_req: invalid user");
640 		goto clean_up;
641 	}
642 	cli->cli_ev = ILBD_EVENT_REQ;
643 	cli->cli_sd = new_sd;
644 	cli->cli_cmd = ILBD_BAD_CMD;
645 	cli->cli_saved_reply = NULL;
646 	cli->cli_saved_size = 0;
647 	if (port_associate(ev_port, PORT_SOURCE_FD, new_sd, POLLRDNORM,
648 	    cli) == -1) {
649 		logperror("new_req: port_associate(cli) failed");
650 clean_up:
651 		if (cli != NULL) {
652 			if (cli->cli_peer_ucredp != NULL)
653 				ucred_free(cli->cli_peer_ucredp);
654 			free(cli->cli_pw_buf);
655 			free(cli);
656 		}
657 		(void) close(new_sd);
658 	}
659 
660 done:
661 	/* Re-associate the listener with the event port. */
662 	if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
663 	    ev_obj) == -1) {
664 		logperror("new_req: port_associate(listener) failed");
665 		exit(1);
666 	}
667 }
668 
669 static void
670 handle_req(int ev_port, ilbd_event_t event, ilbd_client_t *cli)
671 {
672 	/* All request should be smaller than ILBD_MSG_SIZE */
673 	union {
674 		ilb_comm_t	ic;
675 		uint32_t	buf[ILBD_MSG_SIZE / sizeof (uint32_t)];
676 	} ic_u;
677 	int	rc = ILB_STATUS_OK;
678 	ssize_t	r;
679 
680 	if (event == ILBD_EVENT_REQ) {
681 		/*
682 		 * Something is wrong with the client since there is a
683 		 * pending reply, the client should not send us another
684 		 * request.  Kill this client.
685 		 */
686 		if (cli->cli_saved_reply != NULL) {
687 			logerr("handle_req: misbehaving client, more than one "
688 			    "outstanding request");
689 			rc = ILB_STATUS_INTERNAL;
690 			goto err_out;
691 		}
692 
693 		/*
694 		 * Our socket is message based so we should be able
695 		 * to get the request in one single read.
696 		 */
697 		r = recv(cli->cli_sd, (void *)ic_u.buf, sizeof (ic_u.buf), 0);
698 		if (r < 0) {
699 			if (errno != EINTR) {
700 				logperror("handle_req: read failed");
701 				rc = ILB_STATUS_READ;
702 				goto err_out;
703 			}
704 			/*
705 			 * If interrupted, just re-associate the cli_sd
706 			 * with the port.
707 			 */
708 			goto done;
709 		}
710 		cli->cli_cmd = ic_u.ic.ic_cmd;
711 
712 		rc = consume_common_struct(&ic_u.ic, r, cli, ev_port);
713 		if (rc == ILB_STATUS_EWOULDBLOCK)
714 			goto blocked;
715 		/* Fatal error communicating with client, free it. */
716 		if (rc == ILB_STATUS_SEND)
717 			goto err_out;
718 	} else {
719 		assert(event == ILBD_EVENT_REP_OK);
720 		assert(cli->cli_saved_reply != NULL);
721 
722 		/*
723 		 * The reply to client was previously blocked, we will
724 		 * send again.
725 		 */
726 		if (send(cli->cli_sd, cli->cli_saved_reply,
727 		    cli->cli_saved_size, 0) != cli->cli_saved_size) {
728 			if (errno != EWOULDBLOCK) {
729 				logdebug("handle_req: send: %s",
730 				    strerror(errno));
731 				rc = ILB_STATUS_SEND;
732 				goto err_out;
733 			}
734 			goto blocked;
735 		}
736 		free(cli->cli_saved_reply);
737 		cli->cli_saved_reply = NULL;
738 		cli->cli_saved_size = 0;
739 	}
740 done:
741 	/* Re-associate with the event port for more requests. */
742 	cli->cli_ev = ILBD_EVENT_REQ;
743 	if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd,
744 	    POLLRDNORM, cli) == -1) {
745 		logperror("handle_req: port_associate(POLLRDNORM)");
746 		rc = ILB_STATUS_INTERNAL;
747 		goto err_out;
748 	}
749 	return;
750 
751 blocked:
752 	/* Re-associate with the event port. */
753 	cli->cli_ev = ILBD_EVENT_REP_OK;
754 	if (port_associate(ev_port, PORT_SOURCE_FD, cli->cli_sd, POLLWRNORM,
755 	    cli) == -1) {
756 		logperror("handle_req: port_associate(POLLWRNORM)");
757 		rc = ILB_STATUS_INTERNAL;
758 		goto err_out;
759 	}
760 	return;
761 
762 err_out:
763 	ilbd_free_cli(cli);
764 }
765 
766 static void
767 i_ilbd_read_config(int ev_port)
768 {
769 	logdebug("i_ilbd_read_config: port %d", ev_port);
770 	(void) ilbd_walk_sg_pgs(ilbd_create_sg, &ev_port, NULL);
771 	(void) ilbd_walk_hc_pgs(ilbd_create_hc, &ev_port, NULL);
772 	(void) ilbd_walk_rule_pgs(ilbd_create_rule, &ev_port, NULL);
773 }
774 
775 /*
776  * main event loop for ilbd
777  * asserts that argument 'listener' is a server socket ready to accept() on.
778  */
779 static void
780 main_loop(int listener)
781 {
782 	port_event_t		p_ev;
783 	int			ev_port, ev_port_obj;
784 	ilbd_event_obj_t	ev_obj;
785 	ilbd_timer_event_obj_t	timer_ev_obj;
786 
787 	ev_port = port_create();
788 	if (ev_port == -1) {
789 		logperror("main_loop: port_create failed");
790 		exit(-1);
791 	}
792 	ilbd_hc_timer_init(ev_port, &timer_ev_obj);
793 
794 	ev_obj.ev = ILBD_EVENT_NEW_REQ;
795 	if (port_associate(ev_port, PORT_SOURCE_FD, listener, POLLRDNORM,
796 	    &ev_obj) == -1) {
797 		logperror("main_loop: port_associate failed");
798 		exit(1);
799 	}
800 
801 	i_ilbd_read_config(ev_port);
802 	ilbd_hc_timer_update(&timer_ev_obj);
803 
804 	_NOTE(CONSTCOND)
805 	while (B_TRUE) {
806 		int r;
807 		ilbd_event_t event;
808 		ilbd_client_t *cli;
809 
810 		r = port_get(ev_port, &p_ev, NULL);
811 		if (r == -1) {
812 			if (errno == EINTR)
813 				continue;
814 			logperror("main_loop: port_get failed");
815 			break;
816 		}
817 
818 		ev_port_obj = p_ev.portev_object;
819 		event = ((ilbd_event_obj_t *)p_ev.portev_user)->ev;
820 
821 		switch (event) {
822 		case ILBD_EVENT_TIMER:
823 			ilbd_hc_timeout();
824 			break;
825 
826 		case ILBD_EVENT_PROBE:
827 			ilbd_hc_probe_return(ev_port, ev_port_obj,
828 			    p_ev.portev_events,
829 			    (ilbd_hc_probe_event_t *)p_ev.portev_user);
830 			break;
831 
832 		case ILBD_EVENT_NEW_REQ:
833 			assert(ev_port_obj == listener);
834 			/*
835 			 * An error happens in the listener.  Exit
836 			 * for now....
837 			 */
838 			if (p_ev.portev_events & (POLLHUP|POLLERR)) {
839 				logerr("main_loop: listener error");
840 				exit(1);
841 			}
842 			new_req(ev_port, ev_port_obj, &ev_obj);
843 			break;
844 
845 		case ILBD_EVENT_REP_OK:
846 		case ILBD_EVENT_REQ:
847 			cli = (ilbd_client_t *)p_ev.portev_user;
848 			assert(ev_port_obj == cli->cli_sd);
849 
850 			/*
851 			 * An error happens in the newly accepted
852 			 * client request.  Clean up the client.
853 			 * this also happens when client closes socket,
854 			 * so not necessarily a reason for alarm
855 			 */
856 			if (p_ev.portev_events & (POLLHUP|POLLERR)) {
857 				ilbd_free_cli(cli);
858 				break;
859 			}
860 
861 			handle_req(ev_port, event, cli);
862 			break;
863 
864 		default:
865 			logerr("main_loop: unknown event %d", event);
866 			exit(EXIT_FAILURE);
867 			break;
868 		}
869 
870 		ilbd_hc_timer_update(&timer_ev_obj);
871 	}
872 }
873 
874 static void
875 i_ilbd_setup_lists(void)
876 {
877 	i_setup_sg_hlist();
878 	i_setup_rule_hlist();
879 	i_ilbd_setup_hc_list();
880 }
881 
882 /*
883  * Usage message - call only during startup. it will print its
884  * message on stderr and exit
885  */
886 static void
887 Usage(char *name)
888 {
889 	(void) fprintf(stderr, gettext("Usage: %s [-d|--debug]\n"), name);
890 	exit(1);
891 }
892 
893 static void
894 print_version(char *name)
895 {
896 	(void) printf("%s %s\n", basename(name), ILBD_VERSION);
897 	(void) printf(gettext(ILBD_COPYRIGHT));
898 	exit(0);
899 }
900 
901 /*
902  * Increase the file descriptor limit for handling a lot of health check
903  * processes (each requires a pipe).
904  *
905  * Note that this function is called before ilbd becomes a daemon.  So
906  * we call perror(3C) to print out error message directly so that SMF
907  * can catch them.
908  */
909 static void
910 set_rlim(void)
911 {
912 	struct rlimit rlp;
913 
914 	if (getrlimit(RLIMIT_NOFILE, &rlp) == -1) {
915 		perror("ilbd: getrlimit");
916 		exit(errno);
917 	}
918 	rlp.rlim_cur = rlp.rlim_max;
919 	if (setrlimit(RLIMIT_NOFILE, &rlp) == -1) {
920 		perror("ilbd: setrlimit");
921 		exit(errno);
922 	}
923 }
924 
925 int
926 main(int argc, char **argv)
927 {
928 	int	s;
929 	int	c;
930 
931 	(void) setlocale(LC_ALL, "");
932 #if !defined(TEXT_DOMAIN)
933 #define	TEXT_DOMAIN "SYS_TEST"
934 #endif
935 	static const char daemon_dir[] = DAEMON_DIR;
936 
937 	(void) textdomain(TEXT_DOMAIN);
938 
939 	while ((c = getopt(argc, argv, ":V?d(debug)")) != -1) {
940 		switch ((char)c) {
941 		case '?': Usage(argv[0]);
942 			/* not reached */
943 			break;
944 		case 'V': print_version(argv[0]);
945 			/* not reached */
946 			break;
947 		case 'd': ilbd_enable_debug();
948 			break;
949 		default: Usage(argv[0]);
950 			/* not reached */
951 			break;
952 		}
953 	}
954 
955 	/*
956 	 * Whenever the daemon starts, it needs to start with a clean
957 	 * slate in the kernel. We need sys_ip_config privilege for
958 	 * this.
959 	 */
960 	ilbd_reset_kernel_state();
961 
962 	/* Increase the limit on the number of file descriptors. */
963 	set_rlim();
964 
965 	/*
966 	 * ilbd daemon starts off as root, just so it can create
967 	 * /var/run/daemon if one does not exist. After that is done
968 	 * the daemon switches to "daemon" uid. This is similar to what
969 	 * rpcbind does.
970 	 */
971 	if (mkdir(daemon_dir, DAEMON_DIR_MODE) == 0 || errno == EEXIST) {
972 		(void) chmod(daemon_dir, DAEMON_DIR_MODE);
973 		(void) chown(daemon_dir, DAEMON_UID, DAEMON_GID);
974 	} else {
975 		perror("main: mkdir failed");
976 		exit(errno);
977 	}
978 	/*
979 	 * Now lets switch ilbd as uid = daemon, gid = daemon with a
980 	 * trimmed down privilege set
981 	 */
982 	if (__init_daemon_priv(PU_RESETGROUPS | PU_LIMITPRIVS | PU_INHERITPRIVS,
983 	    DAEMON_UID, DAEMON_GID, PRIV_PROC_OWNER, PRIV_PROC_AUDIT,
984 	    PRIV_NET_ICMPACCESS, PRIV_SYS_IP_CONFIG, NULL) == -1) {
985 		(void) fprintf(stderr, "Insufficient privileges\n");
986 		exit(EXIT_FAILURE);
987 	}
988 
989 	/*
990 	 * Opens a PF_UNIX socket to the client. No privilege needed
991 	 * for this.
992 	 */
993 	s = ilbd_create_client_socket();
994 
995 	/*
996 	 * Daemonify if ilbd is not running with -d option
997 	 * Need proc_fork privilege for this
998 	 */
999 	if (!is_debugging_on()) {
1000 		logdebug("daemonizing...");
1001 		if (daemon(0, 0) != 0) {
1002 			logperror("daemon failed");
1003 			exit(EXIT_FAILURE);
1004 		}
1005 	}
1006 	(void) priv_set(PRIV_OFF, PRIV_INHERITABLE, PRIV_PROC_OWNER,
1007 	    PRIV_PROC_AUDIT, NULL);
1008 
1009 	/* if daemonified then set up syslog */
1010 	if (!is_debugging_on())
1011 		openlog("ilbd", LOG_PID, LOG_DAEMON);
1012 
1013 	i_ilbd_setup_lists();
1014 
1015 	main_loop(s);
1016 
1017 	/*
1018 	 * if we come here, then we experienced an error or a shutdown
1019 	 * indicator, so clean up after ourselves.
1020 	 */
1021 	logdebug("main(): terminating");
1022 
1023 	(void) remove(SOCKET_PATH);
1024 	ilbd_reset_kernel_state();
1025 
1026 	return (0);
1027 }
1028