/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/sysevent/vrrp.h>
#include <sys/sysevent/eventdefs.h>
#include <sys/varargs.h>
#include <auth_attr.h>
#include <ctype.h>
#include <fcntl.h>
#include <stdlib.h>
#include <strings.h>
#include <errno.h>
#include <unistd.h>
#include <zone.h>
#include <libsysevent.h>
#include <limits.h>
#include <locale.h>
#include <arpa/inet.h>
#include <signal.h>
#include <assert.h>
#include <ucred.h>
#include <bsm/adt.h>
#include <bsm/adt_event.h>
#include <priv_utils.h>
#include <libdllink.h>
#include <libdlvnic.h>
#include <libipadm.h>
#include <pwd.h>
#include <libvrrpadm.h>
#include <net/route.h>
#include "vrrpd_impl.h"

/*
 * A VRRP router can be only start participating the VRRP protocol of a virtual
 * router when all the following conditions are met:
 *
 * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
 * - The RX socket is successfully created over the physical interface to
 *   receive the VRRP multicast advertisement. Note that one RX socket can
 *   be shared by several VRRP routers configured over the same physical
 *   interface. (See vrrpd_init_rxsock())
 * - The TX socket is successfully created over the VNIC interface to send
 *   the VRRP advertisment. (See vrrpd_init_txsock())
 * - The primary IP address has been successfully selected over the physical
 *   interface. (See vrrpd_select_primary())
 *
 * If a VRRP router is enabled but the other conditions haven't be satisfied,
 * the router will be stay at the VRRP_STATE_INIT state. If all the above
 * conditions are met, the VRRP router will be transit to either
 * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
 * protocol.
 */

#define	skip_whitespace(p)	while (isspace(*(p))) ++(p)

#define	BUFFSIZE	65536

#define	VRRPCONF	"/etc/inet/vrrp.conf"

typedef struct vrrpd_rtsock_s {
	int		vrt_af;		/* address family */
	int		vrt_fd;		/* socket for the PF_ROUTE msg */
	iu_event_id_t	vrt_eid;	/* event ID */
} vrrpd_rtsock_t;

static ipadm_handle_t	vrrp_ipadm_handle = NULL;	/* libipadm handle */
static int		vrrp_logflag = 0;
boolean_t		vrrp_debug_level = 0;
iu_eh_t			*vrrpd_eh = NULL;
iu_tq_t			*vrrpd_timerq = NULL;
static vrrp_handle_t	vrrpd_vh = NULL;
static int		vrrpd_cmdsock_fd = -1;	/* socket to communicate */
						/* between vrrpd/libvrrpadm */
static iu_event_id_t	vrrpd_cmdsock_eid = -1;
static int		vrrpd_ctlsock_fd = -1;	/* socket to bring up/down */
						/* the virtual IP addresses */
static int		vrrpd_ctlsock6_fd = -1;
static vrrpd_rtsock_t	vrrpd_rtsocks[2] = {
	{AF_INET, -1, -1},
	{AF_INET6, -1, -1}
};
static iu_timer_id_t	vrrp_scan_timer_id = -1;

TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
static struct vrrp_vr_list_s	vrrp_vr_list;
static struct vrrp_intf_list_s	vrrp_intf_list;
static char		vrrpd_conffile[MAXPATHLEN];

/*
 * Multicast address of VRRP advertisement in network byte order
 */
static vrrp_addr_t	vrrp_muladdr4;
static vrrp_addr_t	vrrp_muladdr6;

static int		vrrpd_scan_interval = 20000;	/* ms */
static int		pfds[2];

/*
 * macros to calculate skew_time and master_down_timer
 *
 * Note that the input is in centisecs and output are in msecs
 */
#define	SKEW_TIME(pri, intv)	((intv) * (256 - (pri)) / 256)
#define	MASTER_DOWN_INTERVAL(pri, intv)	(3 * (intv) + SKEW_TIME((pri), (intv)))

#define	SKEW_TIME_VR(vr)	\
	SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
#define	MASTER_DOWN_INTERVAL_VR(vr)	\
	MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)

#define	VRRP_CONF_UPDATE	0x01
#define	VRRP_CONF_DELETE	0x02

static char *af_str(int);

static iu_tq_callback_t vrrp_adv_timeout;
static iu_tq_callback_t vrrp_b2m_timeout;
static iu_eh_callback_t vrrpd_sock_handler;
static iu_eh_callback_t vrrpd_rtsock_handler;
static iu_eh_callback_t vrrpd_cmdsock_handler;

static int daemon_init();

static vrrp_err_t vrrpd_init();
static void vrrpd_fini();
static vrrp_err_t vrrpd_cmdsock_create();
static void vrrpd_cmdsock_destroy();
static vrrp_err_t vrrpd_rtsock_create();
static void vrrpd_rtsock_destroy();
static vrrp_err_t vrrpd_ctlsock_create();
static void vrrpd_ctlsock_destroy();

static void vrrpd_scan_timer(iu_tq_t *, void *);
static void vrrpd_scan(int);
static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
static void vrrpd_fini_rxsock(vrrp_vr_t *);
static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
static void vrrpd_fini_txsock(vrrp_vr_t *);

static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
static void vrrpd_delete_vr(vrrp_vr_t *);

static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
static vrrp_err_t vrrpd_delete(const char *);
static vrrp_err_t vrrpd_enable(const char *, boolean_t);
static vrrp_err_t vrrpd_disable(const char *);
static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);

static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);

static void vrrpd_cmd_create(void *, void *, size_t *);
static void vrrpd_cmd_delete(void *, void *, size_t *);
static void vrrpd_cmd_enable(void *, void *, size_t *);
static void vrrpd_cmd_disable(void *, void *, size_t *);
static void vrrpd_cmd_modify(void *, void *, size_t *);
static void vrrpd_cmd_list(void *, void *, size_t *);
static void vrrpd_cmd_query(void *, void *, size_t *);

static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
    uint64_t flags);
static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);

static void vrrpd_init_ipcache(int);
static void vrrpd_update_ipcache(int);
static ipadm_status_t vrrpd_walk_addr_info(int);
static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
    int, uint64_t);
static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
static void vrrpd_reselect_primary(vrrp_intf_t *);
static void vrrpd_reenable_all_vr();
static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);

static uint16_t in_cksum(int, uint16_t, void *);
static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
    uint16_t, vrrp_pkt_t *);
static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
    uint16_t, vrrp_pkt_t *);
static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);

static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);

/* state transition functions */
static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
static void vrrpd_state_m2i(vrrp_vr_t *);
static void vrrpd_state_b2i(vrrp_vr_t *);
static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);

static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
    boolean_t);
static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);

static void vrrpd_initconf();
static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
static void vrrpd_cleanup();

static void vrrp_log(int, char *, ...);
static int timeval_to_milli(struct timeval);
static struct timeval timeval_delta(struct timeval, struct timeval);

typedef struct vrrpd_prop_s {
	char		*vs_propname;
	boolean_t	(*vs_propread)(vrrp_vr_conf_t *, const char *);
	int		(*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
} vrrp_prop_t;

/*
 * persistent VRRP properties array
 */
static vrrp_prop_t vrrp_prop_info_tbl[] = {
	{"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
	{"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
	{"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
	{"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
	{"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
	{"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
	{"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
	{"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
	{"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
};

#define	VRRP_PROP_INFO_TABSIZE	\
	(sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))

typedef void vrrp_cmd_func_t(void *, void *, size_t *);

typedef struct vrrp_cmd_info_s {
	vrrp_cmd_type_t	vi_cmd;
	size_t		vi_reqsize;
	size_t		vi_acksize;	/* 0 if the size is variable */
	boolean_t	vi_setop;	/* Set operation? Check credentials */
	vrrp_cmd_func_t	*vi_cmdfunc;
} vrrp_cmd_info_t;

static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
	{VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
	    sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
	{VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
	    sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
	{VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
	    sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
	{VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
	    sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
	{VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
	    sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
	{VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
	    _B_FALSE, vrrpd_cmd_query},
	{VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
	    _B_FALSE, vrrpd_cmd_list}
};

#define	VRRP_DOOR_INFO_TABLE_SIZE	\
	(sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))

static int
ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
{
	if (af == AF_INET) {
		return (memcmp(&addr1->in4.sin_addr,
		    &addr2->in4.sin_addr, sizeof (struct in_addr)));
	} else {
		return (memcmp(&addr1->in6.sin6_addr,
		    &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
	}
}

static vrrp_vr_t *
vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
{
	vrrp_vr_t *vr;

	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
		if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
		    vr->vvr_conf.vvc_vrid == vrid &&
		    vr->vvr_conf.vvc_af == af) {
			break;
		}
	}
	return (vr);
}

static vrrp_vr_t *
vrrpd_lookup_vr_by_name(const char *name)
{
	vrrp_vr_t *vr;

	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
		if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
			break;
	}
	return (vr);
}

static vrrp_intf_t *
vrrpd_lookup_if(const char *ifname, int af)
{
	vrrp_intf_t	*intf;

	TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
		if (strcmp(ifname, intf->vvi_ifname) == 0 &&
		    af == intf->vvi_af) {
			break;
		}
	}
	return (intf);
}

static vrrp_err_t
vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
    vrrp_intf_t **intfp)
{
	vrrp_intf_t	*intf;

	vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
	    ifname, af_str(af), ifindex);

	if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
		    "allocate %s/%s interface", ifname, af_str(af));
		return (VRRP_ENOMEM);
	}

	intf = *intfp;
	TAILQ_INIT(&intf->vvi_iplist);
	(void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
	intf->vvi_af = af;
	intf->vvi_sockfd = -1;
	intf->vvi_nvr = 0;
	intf->vvi_eid = -1;
	intf->vvi_pip = NULL;
	intf->vvi_ifindex = ifindex;
	intf->vvi_state = NODE_STATE_NEW;
	intf->vvi_vr_state = VRRP_STATE_INIT;
	TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
	return (VRRP_SUCCESS);
}

/*
 * An interface is deleted. If update_vr is true, the deletion of the interface
 * may cause the state transition of assoicated VRRP router (if this interface
 * is either the primary or the VNIC interface of the VRRP router); otherwise,
 * simply delete the interface without updating the VRRP router.
 */
static void
vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
{
	vrrp_ip_t	*ip;

	vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
	    intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");

	if (update_vr) {
		/*
		 * If a this interface is the physical interface or the VNIC
		 * of a VRRP router, the deletion of the interface (no IP
		 * address exists on this interface) may cause the state
		 * transition of the VRRP router. call vrrpd_remove_if()
		 * to find all corresponding VRRP router and update their
		 * states.
		 */
		vrrpd_remove_if(intf, _B_FALSE);
	}

	/*
	 * First remove and delete all the IP addresses on the interface
	 */
	while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
		ip = TAILQ_FIRST(&intf->vvi_iplist);
		vrrpd_delete_ip(intf, ip);
	}

	/*
	 * Then remove and delete the interface
	 */
	TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
	(void) free(intf);
}

static vrrp_err_t
vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
    uint64_t flags)
{
	vrrp_ip_t	*ip;
	char		abuf[INET6_ADDRSTRLEN];

	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
	vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
	    intf->vvi_ifname, lifname, abuf, flags);

	if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
		    "failed to allocate IP", lifname, abuf);
		return (VRRP_ENOMEM);
	}

	(void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
	ip->vip_state = NODE_STATE_NEW;
	ip->vip_flags = flags;
	(void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));

	/*
	 * Make sure link-local IPv6 IP addresses are at the head of the list
	 */
	if (intf->vvi_af == AF_INET6 &&
	    IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
		TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
	} else {
		TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
	}
	return (VRRP_SUCCESS);
}

static void
vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
{
	char	abuf[INET6_ADDRSTRLEN];
	int	af = intf->vvi_af;

	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
	vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
	    intf->vvi_ifname, ip->vip_lifname, abuf,
	    intf->vvi_pip == ip ? "" : "not ");

	if (intf->vvi_pip == ip)
		intf->vvi_pip = NULL;

	TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
	(void) free(ip);
}

static char *
rtm_event2str(uchar_t event)
{
	switch (event) {
	case RTM_NEWADDR:
		return ("RTM_NEWADDR");
	case RTM_DELADDR:
		return ("RTM_DELADDR");
	case RTM_IFINFO:
		return ("RTM_IFINFO");
	case RTM_ADD:
		return ("RTM_ADD");
	case RTM_DELETE:
		return ("RTM_DELETE");
	case RTM_CHANGE:
		return ("RTM_CHANGE");
	case RTM_OLDADD:
		return ("RTM_OLDADD");
	case RTM_OLDDEL:
		return ("RTM_OLDDEL");
	case RTM_CHGADDR:
		return ("RTM_CHGADDR");
	case RTM_FREEADDR:
		return ("RTM_FREEADDR");
	default:
		return ("RTM_OTHER");
	}
}

/*
 * This is called by the child process to inform the parent process to
 * exit with the given return value. Note that the child process
 * (the daemon process) informs the parent process to exit when anything
 * goes wrong or when all the intialization is done.
 */
static int
vrrpd_inform_parent_exit(int rv)
{
	int err = 0;

	/*
	 * If vrrp_debug_level is none-zero, vrrpd is not running as
	 * a daemon. Return directly.
	 */
	if (vrrp_debug_level != 0)
		return (0);

	if (write(pfds[1], &rv, sizeof (int)) != sizeof (int)) {
		err = errno;
		(void) close(pfds[1]);
		return (err);
	}
	(void) close(pfds[1]);
	return (0);
}

int
main(int argc, char *argv[])
{
	int c, err;
	struct sigaction sa;
	sigset_t mask;
	struct rlimit rl;

	(void) setlocale(LC_ALL, "");
	(void) textdomain(TEXT_DOMAIN);

	/*
	 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
	 * and PRIV_NET_ICMPACCESS to open  the raw socket, PRIV_SYS_IP_CONFIG
	 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
	 * setrlimit().
	 *
	 * Note that sysevent is not supported in non-global zones.
	 */
	if (getzoneid() == GLOBAL_ZONEID) {
		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
		    PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
	} else {
		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
		    PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
	}

	if (err == -1) {
		vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
		return (EXIT_FAILURE);
	}

	/*
	 * If vrrpd is started by other process, it will inherit the
	 * signal block mask. We unblock all signals to make sure the
	 * signal handling will work normally.
	 */
	(void) sigfillset(&mask);
	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
	sa.sa_handler = vrrpd_cleanup;
	sa.sa_flags = 0;
	(void) sigemptyset(&sa.sa_mask);
	(void) sigaction(SIGINT, &sa, NULL);
	(void) sigaction(SIGQUIT, &sa, NULL);
	(void) sigaction(SIGTERM, &sa, NULL);

	vrrp_debug_level = 0;
	(void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
	while ((c = getopt(argc, argv, "d:f:")) != EOF) {
		switch (c) {
		case 'd':
			vrrp_debug_level = atoi(optarg);
			break;
		case 'f':
			(void) strlcpy(vrrpd_conffile, optarg,
			    sizeof (vrrpd_conffile));
			break;
		default:
			break;
		}
	}

	closefrom(3);
	if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
		vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
		return (EXIT_FAILURE);
	}

	rl.rlim_cur = RLIM_INFINITY;
	rl.rlim_max = RLIM_INFINITY;
	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
		vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
		goto child_out;
	}

	if (vrrpd_init() != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
		goto child_out;
	}

	/*
	 * Get rid of unneeded privileges.
	 */
	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);

	/*
	 * Read the configuration and initialize the existing VRRP
	 * configuration
	 */
	vrrpd_initconf();

	/*
	 * Inform the parent process that it can successfully exit.
	 */
	if ((err = vrrpd_inform_parent_exit(EXIT_SUCCESS)) != 0) {
		vrrpd_cleanup();
		vrrp_log(VRRP_WARNING, "vrrpd_inform_parent_exit() failed: %s",
		    strerror(err));
		return (EXIT_FAILURE);
	}

	/*
	 * Start the loop to handle the timer and the IO events.
	 */
	switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
	case -1:
		vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
		    "abnormally");
		break;
	default:
		break;
	}

	vrrpd_cleanup();
	return (EXIT_SUCCESS);

child_out:
	(void) vrrpd_inform_parent_exit(EXIT_FAILURE);
	return (EXIT_FAILURE);
}

static int
daemon_init()
{
	pid_t	pid;
	int	rv;

	vrrp_log(VRRP_DBG0, "daemon_init()");

	if (getenv("SMF_FMRI") == NULL) {
		vrrp_log(VRRP_ERR, "daemon_init(): vrrpd is an smf(5) managed "
		    "service and should not be run from the command line.");
		return (-1);
	}

	/*
	 * Create the pipe used for the child process to inform the parent
	 * process to exit after all initialization is done.
	 */
	if (pipe(pfds) < 0) {
		vrrp_log(VRRP_ERR, "daemon_init(): pipe() failed: %s",
		    strerror(errno));
		return (-1);
	}

	if ((pid = fork()) < 0) {
		vrrp_log(VRRP_ERR, "daemon_init(): fork() failed: %s",
		    strerror(errno));
		(void) close(pfds[0]);
		(void) close(pfds[1]);
		return (-1);
	}

	if (pid != 0) { /* Parent */
		(void) close(pfds[1]);

		/*
		 * Read the child process's return value from the pfds.
		 * If the child process exits unexpectedly, read() returns -1.
		 */
		if (read(pfds[0], &rv, sizeof (int)) != sizeof (int)) {
			vrrp_log(VRRP_ERR, "daemon_init(): child process "
			    "exited unexpectedly %s", strerror(errno));
			(void) kill(pid, SIGTERM);
			rv = EXIT_FAILURE;
		}
		(void) close(pfds[0]);
		exit(rv);
	}

	/*
	 * in child process, became a daemon, and return to main() to continue.
	 */
	(void) close(pfds[0]);
	(void) chdir("/");
	(void) setsid();
	(void) close(0);
	(void) close(1);
	(void) close(2);
	(void) open("/dev/null", O_RDWR, 0);
	(void) dup2(0, 1);
	(void) dup2(0, 2);
	openlog("vrrpd", LOG_PID, LOG_DAEMON);
	vrrp_logflag = 1;
	return (0);
}

static vrrp_err_t
vrrpd_init()
{
	vrrp_err_t	err = VRRP_ESYS;

	vrrp_log(VRRP_DBG0, "vrrpd_init()");

	TAILQ_INIT(&vrrp_vr_list);
	TAILQ_INIT(&vrrp_intf_list);

	if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
		goto fail;
	}

	if ((vrrpd_timerq = iu_tq_create()) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
		goto fail;
	}

	if ((vrrpd_eh = iu_eh_create()) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
		goto fail;
	}

	/*
	 * Create the AF_UNIX socket used to communicate with libvrrpadm.
	 *
	 * This socket is used to receive the administrative requests and
	 * send back the results.
	 */
	if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
		    "failed");
		goto fail;
	}

	/*
	 * Create the VRRP control socket used to bring up/down the virtual
	 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
	 * the virtual IP addresses.
	 */
	if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
		    "failed");
		goto fail;
	}

	/*
	 * Create the PF_ROUTER socket used to listen to the routing socket
	 * messages and build the interface/IP address list.
	 */
	if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
		    "failed");
		goto fail;
	}

	/* Open the libipadm handle */
	if (ipadm_open(&vrrp_ipadm_handle, 0) != IPADM_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): ipadm_open() failed");
		goto fail;
	}

	/*
	 * Build the list of interfaces and IP addresses. Also, start the time
	 * to scan the interfaces/IP addresses periodically.
	 */
	vrrpd_scan(AF_INET);
	vrrpd_scan(AF_INET6);
	if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
	    vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
		goto fail;
	}

	/*
	 * Initialize the VRRP multicast address.
	 */
	bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
	vrrp_muladdr4.in4.sin_family = AF_INET;
	(void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);

	bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
	vrrp_muladdr6.in6.sin6_family = AF_INET6;
	(void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);

	return (VRRP_SUCCESS);

fail:
	vrrpd_fini();
	return (err);
}

static void
vrrpd_fini()
{
	vrrp_log(VRRP_DBG0, "vrrpd_fini()");

	(void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
	vrrp_scan_timer_id = -1;

	vrrpd_rtsock_destroy();
	vrrpd_ctlsock_destroy();
	vrrpd_cmdsock_destroy();

	if (vrrpd_eh != NULL) {
		iu_eh_destroy(vrrpd_eh);
		vrrpd_eh = NULL;
	}

	if (vrrpd_timerq != NULL) {
		iu_tq_destroy(vrrpd_timerq);
		vrrpd_timerq = NULL;
	}

	vrrp_close(vrrpd_vh);
	vrrpd_vh = NULL;
	assert(TAILQ_EMPTY(&vrrp_vr_list));
	assert(TAILQ_EMPTY(&vrrp_intf_list));

	ipadm_close(vrrp_ipadm_handle);
}

static void
vrrpd_cleanup(void)
{
	vrrp_vr_t	*vr;
	vrrp_intf_t	*intf;

	vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");

	while (!TAILQ_EMPTY(&vrrp_vr_list)) {
		vr = TAILQ_FIRST(&vrrp_vr_list);
		vrrpd_delete_vr(vr);
	}

	while (!TAILQ_EMPTY(&vrrp_intf_list)) {
		intf = TAILQ_FIRST(&vrrp_intf_list);
		vrrpd_delete_if(intf, _B_FALSE);
	}

	vrrpd_fini();
	closelog();
	exit(1);
}

/*
 * Read the configuration file and initialize all the existing VRRP routers.
 */
static void
vrrpd_initconf()
{
	FILE *fp;
	char line[LINE_MAX];
	int linenum = 0;
	vrrp_vr_conf_t conf;
	vrrp_err_t err;

	vrrp_log(VRRP_DBG0, "vrrpd_initconf()");

	if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
		vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
		    vrrpd_conffile);
		return;
	}

	while (fgets(line, sizeof (line), fp) != NULL) {
		linenum++;
		conf.vvc_vrid = VRRP_VRID_NONE;
		if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
			vrrp_log(VRRP_ERR, "failed to parse %d line %s",
			    linenum, line);
			continue;
		}

		/*
		 * Blank or comment line
		 */
		if (conf.vvc_vrid == VRRP_VRID_NONE)
			continue;

		/*
		 * No need to update the configuration since the VRRP router
		 * created/enabled based on the existing configuration.
		 */
		if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
			vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
			    "%s", conf.vvc_name, vrrp_err2str(err));
			continue;
		}

		if (conf.vvc_enabled &&
		    ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
		    VRRP_SUCCESS)) {
			vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
			    conf.vvc_name, vrrp_err2str(err));
		}
	}

	(void) fclose(fp);
}

/*
 * Create the AF_UNIX socket used to communicate with libvrrpadm.
 *
 * This socket is used to receive the administrative request and
 * send back the results.
 */
static vrrp_err_t
vrrpd_cmdsock_create()
{
	iu_event_id_t		eid;
	struct sockaddr_un	laddr;
	int			sock, flags;

	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");

	if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
		    "failed: %s", strerror(errno));
		return (VRRP_ESYS);
	}

	/*
	 * Set it to be non-blocking.
	 */
	flags = fcntl(sock, F_GETFL, 0);
	(void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));

	/*
	 * Unlink first in case a previous daemon instance exited ungracefully.
	 */
	(void) unlink(VRRPD_SOCKET);

	bzero(&laddr, sizeof (laddr));
	laddr.sun_family = AF_UNIX;
	(void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
	if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
		    strerror(errno));
		(void) close(sock);
		return (VRRP_ESYS);
	}

	if (listen(sock, 30) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
		    "failed: %s", strerror(errno));
		(void) close(sock);
		return (VRRP_ESYS);
	}

	if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
	    vrrpd_cmdsock_handler, NULL)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
		    " failed");
		(void) close(sock);
		return (VRRP_ESYS);
	}

	vrrpd_cmdsock_fd = sock;
	vrrpd_cmdsock_eid = eid;
	return (VRRP_SUCCESS);
}

static void
vrrpd_cmdsock_destroy()
{
	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");

	(void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
	(void) close(vrrpd_cmdsock_fd);
	vrrpd_cmdsock_fd = -1;
	vrrpd_cmdsock_eid = -1;
}

/*
 * Create the PF_ROUTER sockets used to listen to the routing socket
 * messages and build the interface/IP address list. Create one for
 * each address family (IPv4 and IPv6).
 */
static vrrp_err_t
vrrpd_rtsock_create()
{
	int		i, flags, sock;
	iu_event_id_t	eid;

	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");

	for (i = 0; i < 2; i++) {
		sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
		if (sock == -1) {
			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
			    "failed: %s", strerror(errno));
			break;
		}

		/*
		 * Set it to be non-blocking.
		 */
		if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
			    "fcntl(F_GETFL) failed: %s", strerror(errno));
			break;
		}

		if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
			    "fcntl(F_SETFL) failed: %s", strerror(errno));
			break;
		}

		if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
		    vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
			    "rtsock %d(%s) failed", sock,
			    af_str(vrrpd_rtsocks[i].vrt_af));
			break;
		}

		vrrpd_rtsocks[i].vrt_fd = sock;
		vrrpd_rtsocks[i].vrt_eid = eid;
	}

	if (i != 2) {
		(void) close(sock);
		vrrpd_rtsock_destroy();
		return (VRRP_ESYS);
	}

	return (VRRP_SUCCESS);
}

static void
vrrpd_rtsock_destroy()
{
	int		i;

	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
	for (i = 0; i < 2; i++) {
		(void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
		    NULL);
		(void) close(vrrpd_rtsocks[i].vrt_fd);
		vrrpd_rtsocks[i].vrt_eid = -1;
		vrrpd_rtsocks[i].vrt_fd = -1;
	}
}

/*
 * Create the VRRP control socket used to bring up/down the virtual
 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
 * the virtual IP addresses.
 */
static vrrp_err_t
vrrpd_ctlsock_create()
{
	int	s, s6;
	int	on = _B_TRUE;

	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
		    "failed: %s", strerror(errno));
		return (VRRP_ESYS);
	}
	if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
		    "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
		(void) close(s);
		return (VRRP_ESYS);
	}

	if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
		    "failed: %s", strerror(errno));
		(void) close(s);
		return (VRRP_ESYS);
	}
	if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
		    "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
		(void) close(s);
		(void) close(s6);
		return (VRRP_ESYS);
	}

	vrrpd_ctlsock_fd = s;
	vrrpd_ctlsock6_fd = s6;
	return (VRRP_SUCCESS);
}

static void
vrrpd_ctlsock_destroy()
{
	(void) close(vrrpd_ctlsock_fd);
	vrrpd_ctlsock_fd = -1;
	(void) close(vrrpd_ctlsock6_fd);
	vrrpd_ctlsock6_fd = -1;
}

/*ARGSUSED*/
static void
vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
{
	vrrp_cmd_create_t	*cmd = (vrrp_cmd_create_t *)arg1;
	vrrp_ret_create_t	*ret = (vrrp_ret_create_t *)arg2;
	vrrp_err_t		err;

	err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
	if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
		/*
		 * No need to update the configuration since it is already
		 * done in the above vrrpd_create() call
		 */
		err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
		if (err != VRRP_SUCCESS)
			(void) vrrpd_delete(cmd->vcc_conf.vvc_name);
	}
	ret->vrc_err = err;
}

/*ARGSUSED*/
static void
vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
{
	vrrp_cmd_delete_t	*cmd = (vrrp_cmd_delete_t *)arg1;
	vrrp_ret_delete_t	*ret = (vrrp_ret_delete_t *)arg2;

	ret->vrd_err = vrrpd_delete(cmd->vcd_name);
}

/*ARGSUSED*/
static void
vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
{
	vrrp_cmd_enable_t	*cmd = (vrrp_cmd_enable_t *)arg1;
	vrrp_ret_enable_t	*ret = (vrrp_ret_enable_t *)arg2;

	ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
}

/*ARGSUSED*/
static void
vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
{
	vrrp_cmd_disable_t	*cmd = (vrrp_cmd_disable_t *)arg1;
	vrrp_ret_disable_t	*ret = (vrrp_ret_disable_t *)arg2;

	ret->vrx_err = vrrpd_disable(cmd->vcx_name);
}

/*ARGSUSED*/
static void
vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
{
	vrrp_cmd_modify_t	*cmd = (vrrp_cmd_modify_t *)arg1;
	vrrp_ret_modify_t	*ret = (vrrp_ret_modify_t *)arg2;

	ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
}

static void
vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
{
	vrrp_cmd_query_t	*cmd = (vrrp_cmd_query_t *)arg1;

	vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
}

static void
vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
{
	vrrp_cmd_list_t	*cmd = (vrrp_cmd_list_t *)arg1;

	vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
}

/*
 * Write-type requeset must have the solaris.network.vrrp authorization.
 */
static boolean_t
vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
{
	ucred_t		*cred = NULL;
	uid_t		uid;
	struct passwd	*pw;
	boolean_t	success = _B_FALSE;

	vrrp_log(VRRP_DBG0, "vrrp_auth_check()");

	if (!cinfo->vi_setop)
		return (_B_TRUE);

	/*
	 * Validate the credential
	 */
	if (getpeerucred(connfd, &cred) == (uid_t)-1) {
		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
		    "failed: %s", strerror(errno));
		return (_B_FALSE);
	}

	if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
		vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
		    "failed: %s", strerror(errno));
		goto done;
	}

	if ((pw = getpwuid(uid)) == NULL) {
		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
		goto done;
	}

	success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);

done:
	ucred_free(cred);
	return (success);
}

/*
 * Process the administrative request from libvrrpadm
 */
/* ARGSUSED */
static void
vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
    void *arg)
{
	vrrp_cmd_info_t		*cinfo = NULL;
	vrrp_err_t		err = VRRP_SUCCESS;
	uchar_t			buf[BUFFSIZE], ackbuf[BUFFSIZE];
	size_t			cursize, acksize, len;
	uint32_t		cmd;
	int			connfd, i;
	struct sockaddr_in	from;
	socklen_t		fromlen;

	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");

	fromlen = (socklen_t)sizeof (from);
	if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
		    strerror(errno));
		return;
	}

	/*
	 * First get the type of the request
	 */
	cursize = 0;
	while (cursize < sizeof (uint32_t)) {
		len = read(connfd, buf + cursize,
		    sizeof (uint32_t) - cursize);
		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
			continue;
		} else if (len > 0) {
			cursize += len;
			continue;
		}
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
		    "length");
		(void) close(connfd);
		return;
	}

	/* LINTED E_BAD_PTR_CAST_ALIGN */
	cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
	for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
		if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
			cinfo = vrrp_cmd_info_tbl + i;
			break;
		}
	}

	if (cinfo == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
		    "type %d", cmd);
		err = VRRP_EINVAL;
		goto done;
	}

	/*
	 * Get the rest of the request.
	 */
	assert(cursize == sizeof (uint32_t));
	while (cursize < cinfo->vi_reqsize) {
		len = read(connfd, buf + cursize,
		    cinfo->vi_reqsize - cursize);
		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
			continue;
		} else if (len > 0) {
			cursize += len;
			continue;
		}
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
		    "length");
		err = VRRP_EINVAL;
		goto done;
	}

	/*
	 * Validate the authorization
	 */
	if (!vrrp_auth_check(connfd, cinfo)) {
		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
		    "not sufficient authorization");
		err = VRRP_EPERM;
	}

done:
	/*
	 * Ack the request
	 */
	if (err != 0) {
		/* LINTED E_BAD_PTR_CAST_ALIGN */
		((vrrp_ret_t *)ackbuf)->vr_err = err;
		acksize = sizeof (vrrp_ret_t);
	} else {
		/*
		 * If the size of ack is varied, the cmdfunc callback
		 * will set the right size.
		 */
		if ((acksize = cinfo->vi_acksize) == 0)
			acksize = sizeof (ackbuf);

		/* LINTED E_BAD_PTR_CAST_ALIGN */
		cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
	}

	/*
	 * Send the ack back.
	 */
	cursize = 0;
	while (cursize < acksize) {
		len = sendto(connfd, ackbuf + cursize, acksize - cursize,
		    0, (struct sockaddr *)&from, fromlen);
		if (len == (size_t)-1 && errno == EAGAIN) {
			continue;
		} else if (len > 0) {
			cursize += len;
			continue;
		} else {
			vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
			    "ack: %s", strerror(errno));
			break;
		}
	}

	(void) shutdown(connfd, SHUT_RDWR);
	(void) close(connfd);
}

/*
 * Process the routing socket messages and update the interfaces/IP addresses
 * list
 */
/* ARGSUSED */
static void
vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
    iu_event_id_t id, void *arg)
{
	char			buf[BUFFSIZE];
	struct ifa_msghdr	*ifam;
	int			nbytes;
	int			af = *(int *)arg;
	boolean_t		scanif = _B_FALSE;

	for (;;) {
		nbytes = read(s, buf, sizeof (buf));
		if (nbytes <= 0) {
			/* No more messages */
			break;
		}

		/* LINTED E_BAD_PTR_CAST_ALIGN */
		ifam = (struct ifa_msghdr *)buf;
		if (ifam->ifam_version != RTM_VERSION) {
			vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
			    "not understood", ifam->ifam_version);
			break;
		}

		vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
		    rtm_event2str(ifam->ifam_type));

		switch (ifam->ifam_type) {
		case RTM_FREEADDR:
		case RTM_CHGADDR:
		case RTM_NEWADDR:
		case RTM_DELADDR:
			/*
			 * An IP address has been created/updated/deleted or
			 * brought up/down, re-initilialize the interface/IP
			 * address list.
			 */
			scanif = _B_TRUE;
			break;
		default:
			/* Not interesting */
			break;
		}
	}

	if (scanif)
		vrrpd_scan(af);
}

/*
 * Periodically scan the interface/IP addresses on the system.
 */
/* ARGSUSED */
static void
vrrpd_scan_timer(iu_tq_t *tq, void *arg)
{
	vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
	vrrpd_scan(AF_INET);
	vrrpd_scan(AF_INET6);
}

/*
 * Get the list of the interface/IP addresses of the specified address
 * family.
 */
static void
vrrpd_scan(int af)
{
	vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));

again:
	vrrpd_init_ipcache(af);

	/* If interface index changes, walk again. */
	if (vrrpd_walk_addr_info(af) != IPADM_SUCCESS)
		goto again;

	vrrpd_update_ipcache(af);
}

/*
 * First mark all IP addresses of the specific address family to be removed.
 * This flag will then be cleared when we walk up all the IP addresses.
 */
static void
vrrpd_init_ipcache(int af)
{
	vrrp_intf_t	*intf, *next_intf;
	vrrp_ip_t	*ip, *nextip;
	char		abuf[INET6_ADDRSTRLEN];

	vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));

	next_intf = TAILQ_FIRST(&vrrp_intf_list);
	while ((intf = next_intf) != NULL) {
		next_intf = TAILQ_NEXT(intf, vvi_next);
		if (intf->vvi_af != af)
			continue;

		/*
		 * If the interface is still marked as new, it means that this
		 * vrrpd_init_ipcache() call is a result of ifindex change,
		 * which causes the re-walk of all the interfaces (see
		 * vrrpd_add_ipaddr()), and some interfaces are still marked
		 * as new during the last walk. In this case, delete this
		 * interface with the "update_vr" argument to be _B_FALSE,
		 * since no VRRP router has been assoicated with this
		 * interface yet (the association is done in
		 * vrrpd_update_ipcache()).
		 *
		 * This interface will be re-added later if it still exists.
		 */
		if (intf->vvi_state == NODE_STATE_NEW) {
			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
			    "(%d), may be added later", intf->vvi_ifname,
			    intf->vvi_ifindex);
			vrrpd_delete_if(intf, _B_FALSE);
			continue;
		}

		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
		    ip = nextip) {
			nextip = TAILQ_NEXT(ip, vip_next);
			/* LINTED E_CONSTANT_CONDITION */
			VRRPADDR2STR(af, &ip->vip_addr, abuf,
			    INET6_ADDRSTRLEN, _B_FALSE);

			if (ip->vip_state != NODE_STATE_NEW) {
				vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
				    "%s(%s/0x%x))", intf->vvi_ifname,
				    intf->vvi_ifindex, ip->vip_lifname,
				    abuf, ip->vip_flags);
				ip->vip_state = NODE_STATE_STALE;
				continue;
			}

			/*
			 * If the IP is still marked as new, it means that
			 * this vrrpd_init_ipcache() call is a result of
			 * ifindex change, which causes the re-walk of all
			 * the IP addresses (see vrrpd_add_ipaddr()).
			 * Delete this IP.
			 *
			 * This IP will be readded later if it still exists.
			 */
			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
			    "%s/%d , %s(%s)", intf->vvi_ifname,
			    intf->vvi_ifindex, ip->vip_lifname, abuf);
			vrrpd_delete_ip(intf, ip);
		}
	}
}

/*
 * Walk all the IP addresses of the given family and update its
 * addresses list. Return IPADM_FAILURE if it is required to walk
 * all the interfaces again (one of the interface index changes in between).
 */
static ipadm_status_t
vrrpd_walk_addr_info(int af)
{
	ipadm_addr_info_t	*ainfo, *ainfop;
	ipadm_status_t		ipstatus;
	char			*lifname;
	struct sockaddr_storage	stor;
	vrrp_addr_t		*addr;
	int			ifindex;
	uint64_t		flags;

	vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s)", af_str(af));

	ipstatus = ipadm_addr_info(vrrp_ipadm_handle, NULL, &ainfo, 0, 0);
	if (ipstatus != IPADM_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
		    "ipadm_addr_info() failed: %s",
		    af_str(af), ipadm_status2str(ipstatus));
		return (IPADM_SUCCESS);
	}

	for (ainfop = ainfo; ainfop != NULL; ainfop = IA_NEXT(ainfop)) {
		if (ainfop->ia_ifa.ifa_addr->sa_family != af)
			continue;

		lifname = ainfop->ia_ifa.ifa_name;
		flags = ainfop->ia_ifa.ifa_flags;
		(void) memcpy(&stor, ainfop->ia_ifa.ifa_addr, sizeof (stor));
		addr = (vrrp_addr_t *)&stor;

		vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): %s",
		    af_str(af), lifname);

		/* Skip virtual/IPMP/P2P interfaces */
		if (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) {
			vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): "
			    "skipped %s", af_str(af), lifname);
			continue;
		}

		/* Filter out the all-zero IP address */
		if (VRRPADDR_UNSPECIFIED(af, addr))
			continue;

		if ((ifindex = if_nametoindex(lifname)) == 0) {
			if (errno != ENXIO && errno != ENOENT) {
				vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
				    "if_nametoindex() failed for %s: %s",
				    af_str(af), lifname, strerror(errno));
			}
			break;
		}

		/*
		 * The interface is unplumbed/replumbed during the walk.  Try
		 * to walk the IP addresses one more time.
		 */
		if (vrrpd_add_ipaddr(lifname, af, addr, ifindex, flags)
		    == VRRP_EAGAIN) {
			ipstatus = IPADM_FAILURE;
			break;
		}
	}

	ipadm_free_addr_info(ainfo);
	return (ipstatus);
}

/*
 * Given the information of each IP address, update the interface and
 * IP addresses list
 */
static vrrp_err_t
vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
    uint64_t flags)
{
	char		ifname[LIFNAMSIZ], *c;
	vrrp_intf_t	*intf;
	vrrp_ip_t	*ip;
	char		abuf[INET6_ADDRSTRLEN];
	vrrp_err_t	err;

	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
	vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
	    abuf, ifindex, flags);

	/*
	 * Get the physical interface name from the logical interface name.
	 */
	(void) strlcpy(ifname, lifname, sizeof (ifname));
	if ((c = strchr(ifname, ':')) != NULL)
		*c = '\0';

	if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
		err = vrrpd_create_if(ifname, af, ifindex, &intf);
		if (err != VRRP_SUCCESS)
			return (err);
	} else if (intf->vvi_ifindex != ifindex) {
		/*
		 * If index changes, it means that this interface is
		 * unplumbed/replumbed since we last checked. If this
		 * interface is not used by any VRRP router, just
		 * update its ifindex, and the IP addresses list will
		 * be updated later. Otherwise, return EAGAIN to rewalk
		 * all the IP addresses from the beginning.
		 */
		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
		    "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
		if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
			intf->vvi_ifindex = ifindex;
		} else {
			/*
			 * delete this interface from the list if this
			 * interface has already been assoicated with
			 * any VRRP routers.
			 */
			vrrpd_delete_if(intf, _B_TRUE);
			return (VRRP_EAGAIN);
		}
	}

	/*
	 * Does this IP address already exist?
	 */
	TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
		if (strcmp(ip->vip_lifname, lifname) == 0)
			break;
	}

	if (ip != NULL) {
		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
		    lifname, abuf);
		ip->vip_state = NODE_STATE_NONE;
		ip->vip_flags = flags;
		if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
			/*
			 * Address has been changed, mark it as new
			 * If this address is already selected as the
			 * primary IP address, the new IP will be checked
			 * to see whether it is still qualified as the
			 * primary IP address. If not, the primary IP
			 * address will be reselected.
			 */
			(void) memcpy(&ip->vip_addr, addr,
			    sizeof (vrrp_addr_t));

			ip->vip_state = NODE_STATE_NEW;
		}
	} else {
		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
		    lifname, abuf);

		err = vrrpd_create_ip(intf, lifname, addr, flags);
		if (err != VRRP_SUCCESS)
			return (err);
	}
	return (VRRP_SUCCESS);
}

/*
 * Update the interface and IP addresses list. Remove the ones that have been
 * staled since last time we walk the IP addresses and updated the ones that
 * have been changed.
 */
static void
vrrpd_update_ipcache(int af)
{
	vrrp_intf_t	*intf, *nextif;
	vrrp_ip_t	*ip, *nextip;
	char		abuf[INET6_ADDRSTRLEN];
	boolean_t	primary_selected;
	boolean_t	primary_now_selected;
	boolean_t	need_reenable = _B_FALSE;

	vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));

	nextif = TAILQ_FIRST(&vrrp_intf_list);
	while ((intf = nextif) != NULL) {
		nextif = TAILQ_NEXT(intf, vvi_next);
		if (intf->vvi_af != af)
			continue;

		/*
		 * Does the interface already select its primary IP address?
		 */
		primary_selected = (intf->vvi_pip != NULL);
		assert(!primary_selected || IS_PRIMARY_INTF(intf));

		/*
		 * Removed the IP addresses that have been unconfigured.
		 */
		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
		    ip = nextip) {
			nextip = TAILQ_NEXT(ip, vip_next);
			if (ip->vip_state != NODE_STATE_STALE)
				continue;

			/* LINTED E_CONSTANT_CONDITION */
			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
			    _B_FALSE);
			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
			    "is removed over %s", abuf, intf->vvi_ifname);
			vrrpd_delete_ip(intf, ip);
		}

		/*
		 * No IP addresses left, delete this interface.
		 */
		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
			    "no IP left over %s", intf->vvi_ifname);
			vrrpd_delete_if(intf, _B_TRUE);
			continue;
		}

		/*
		 * If this is selected ss the physical interface for any
		 * VRRP router, reselect the primary address if needed.
		 */
		if (IS_PRIMARY_INTF(intf)) {
			vrrpd_reselect_primary(intf);
			primary_now_selected = (intf->vvi_pip != NULL);

			/*
			 * Cannot find the new primary IP address.
			 */
			if (primary_selected && !primary_now_selected) {
				vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
				    "reselect primary IP on %s failed",
				    intf->vvi_ifname);
				vrrpd_remove_if(intf, _B_TRUE);
			} else if (!primary_selected && primary_now_selected) {
				/*
				 * The primary IP address is successfully
				 * selected on the physical interfacew we
				 * need to walk through all the VRRP routers
				 * that is created on this physical interface
				 * and see whether they can now be enabled.
				 */
				need_reenable = _B_TRUE;
			}
		}

		/*
		 * For every new virtual IP address, bring up/down it based
		 * on the state of VRRP router.
		 *
		 * Note that it is fine to not update the IP's vip_flags field
		 * even if vrrpd_virtualip_updateone() changed the address's
		 * up/down state, since the vip_flags field is only used for
		 * select primary IP address over a physical interface, and
		 * vrrpd_virtualip_updateone() only affects the virtual IP
		 * address's status.
		 */
		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
		    ip = nextip) {
			nextip = TAILQ_NEXT(ip, vip_next);
			/* LINTED E_CONSTANT_CONDITION */
			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
			    _B_FALSE);
			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
			    "IP %s over %s%s", abuf, intf->vvi_ifname,
			    ip->vip_state == NODE_STATE_NEW ? " is new" : "");

			if (IS_VIRTUAL_INTF(intf)) {
				/*
				 * If this IP is new, update its up/down state
				 * based on the virtual interface's state
				 * (which is determined by the VRRP router's
				 * state). Otherwise, check only and prompt
				 * warnings if its up/down state has been
				 * changed.
				 */
				if (vrrpd_virtualip_updateone(intf, ip,
				    ip->vip_state == NODE_STATE_NONE) !=
				    VRRP_SUCCESS) {
					vrrp_log(VRRP_DBG0,
					    "vrrpd_update_ipcache(): "
					    "IP %s over %s update failed", abuf,
					    intf->vvi_ifname);
					vrrpd_delete_ip(intf, ip);
					continue;
				}
			}
			ip->vip_state = NODE_STATE_NONE;
		}

		/*
		 * The IP address is deleted when it is failed to be brought
		 * up. If no IP addresses are left, delete this interface.
		 */
		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
			    "no IP left over %s", intf->vvi_ifname);
			vrrpd_delete_if(intf, _B_TRUE);
			continue;
		}

		if (intf->vvi_state == NODE_STATE_NEW) {
			/*
			 * A new interface is found. This interface can be
			 * the primary interface or the virtual VNIC
			 * interface.  Again, we need to walk throught all
			 * the VRRP routers to see whether some of them can
			 * now be enabled because of the new primary IP
			 * address or the new virtual IP addresses.
			 */
			intf->vvi_state = NODE_STATE_NONE;
			need_reenable = _B_TRUE;
		}
	}

	if (need_reenable)
		vrrpd_reenable_all_vr();
}

/*
 * Reselect primary IP if:
 * - The existing primary IP is no longer qualified (removed or it is down or
 *   not a link-local IP for IPv6 VRRP router);
 * - This is a physical interface but no primary IP is chosen;
 */
static void
vrrpd_reselect_primary(vrrp_intf_t *intf)
{
	vrrp_ip_t	*ip;
	char		abuf[INET6_ADDRSTRLEN];

	assert(IS_PRIMARY_INTF(intf));

	/*
	 * If the interface's old primary IP address is still valid, return
	 */
	if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
		return;

	if (ip != NULL) {
		/* LINTED E_CONSTANT_CONDITION */
		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
		    sizeof (abuf), _B_FALSE);
		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
		    "is no longer qualified", intf->vvi_ifname, abuf);
	}

	ip = vrrpd_select_primary(intf);
	intf->vvi_pip = ip;

	if (ip != NULL) {
		/* LINTED E_CONSTANT_CONDITION */
		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
		    sizeof (abuf), _B_FALSE);
		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
		    "is selected", intf->vvi_ifname, abuf);
	}
}

/*
 * Select the primary IP address. Since the link-local IP address is always
 * at the head of the IP address list, try to find the first UP IP address
 * and see whether it qualify.
 */
static vrrp_ip_t *
vrrpd_select_primary(vrrp_intf_t *pif)
{
	vrrp_ip_t	*pip;
	char		abuf[INET6_ADDRSTRLEN];

	vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);

	TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
		assert(pip->vip_state != NODE_STATE_STALE);

		/* LINTED E_CONSTANT_CONDITION */
		VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
		    INET6_ADDRSTRLEN, _B_FALSE);
		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
		    pif->vvi_ifname, abuf,
		    (pip->vip_flags & IFF_UP) ? "up" : "down");

		if (pip->vip_flags & IFF_UP)
			break;
	}

	/*
	 * Is this valid primary IP address?
	 */
	if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
		    pif->vvi_ifname, af_str(pif->vvi_af));
		return (NULL);
	}
	return (pip);
}

/*
 * This is a new interface. Check whether any VRRP router is waiting for it
 */
static void
vrrpd_reenable_all_vr()
{
	vrrp_vr_t *vr;

	vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");

	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
		if (vr->vvr_conf.vvc_enabled)
			(void) vrrpd_enable_vr(vr);
	}
}

/*
 * If primary_addr_gone is _B_TRUE, it means that we failed to select
 * the primary IP address on this (physical) interface; otherwise,
 * it means the interface is no longer available.
 */
static void
vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
{
	vrrp_vr_t *vr;

	vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
	    primary_addr_gone ? "primary address gone" : "interface deleted");

	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
		if (vr->vvr_conf.vvc_enabled)
			vrrpd_disable_vr(vr, intf, primary_addr_gone);
	}
}

/*
 * Update the VRRP configuration file based on the given configuration.
 * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
 */
static vrrp_err_t
vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
{
	vrrp_vr_conf_t	conf;
	FILE		*fp, *nfp;
	int		nfd;
	char		line[LINE_MAX];
	char		newfile[MAXPATHLEN];
	boolean_t	found = _B_FALSE;
	vrrp_err_t	err = VRRP_SUCCESS;

	vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
	    op == VRRP_CONF_UPDATE ? "update" : "delete");

	if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
		    vrrpd_conffile, strerror(errno));
		return (VRRP_EDB);
	}

	(void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
	if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
	    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
		    newfile, strerror(errno));
		(void) fclose(fp);
		return (VRRP_EDB);
	}

	if ((nfp = fdopen(nfd, "wF")) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
		    newfile, strerror(errno));
		goto done;
	}

	while (fgets(line, sizeof (line), fp) != NULL) {
		conf.vvc_vrid = VRRP_VRID_NONE;
		if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
		    VRRP_SUCCESS) {
			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
			    "configuration format: %s", line);
			goto done;
		}

		/*
		 * Write this line out if:
		 * - this is a comment line; or
		 * - we've done updating/deleting the the given VR; or
		 * - if the name of the VR read from this line does not match
		 *   the VR name that we are about to update/delete;
		 */
		if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
		    strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
			if (fputs(line, nfp) != EOF)
				continue;

			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
			    "write line %s", line);
			err = VRRP_EDB;
			goto done;
		}

		/*
		 * Otherwise, update/skip the line.
		 */
		found = _B_TRUE;
		if (op == VRRP_CONF_DELETE)
			continue;

		assert(op == VRRP_CONF_UPDATE);
		if ((err = vrrpd_write_vrconf(line, sizeof (line),
		    newconf)) != VRRP_SUCCESS) {
			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
			    "update configuration for %s", newconf->vvc_name);
			goto done;
		}
		if (fputs(line, nfp) == EOF) {
			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
			    "write line %s", line);
			err = VRRP_EDB;
			goto done;
		}
	}

	/*
	 * If we get to the end of the file and have not seen the router that
	 * we are about to update, write it out.
	 */
	if (!found && op == VRRP_CONF_UPDATE) {
		if ((err = vrrpd_write_vrconf(line, sizeof (line),
		    newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
			    "write line %s", line);
			err = VRRP_EDB;
		}
	} else if (!found && op == VRRP_CONF_DELETE) {
		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
		    "configuation for %s", newconf->vvc_name);
		err = VRRP_ENOTFOUND;
	}

	if (err != VRRP_SUCCESS)
		goto done;

	if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
		    "rename file %s", newfile);
		err = VRRP_EDB;
	}

done:
	(void) fclose(fp);
	(void) fclose(nfp);
	(void) unlink(newfile);
	return (err);
}

static vrrp_err_t
vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
{
	vrrp_prop_t	*prop;
	int		n, i;

	vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);

	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
		prop = &vrrp_prop_info_tbl[i];
		n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
		    prop->vs_propname);
		if (n < 0 || n >= len)
			break;
		len -= n;
		line += n;
		n = prop->vs_propwrite(conf, line, len);
		if (n < 0 || n >= len)
			break;
		len -= n;
		line += n;
	}
	if (i != VRRP_PROP_INFO_TABSIZE) {
		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
		    "small", conf->vvc_name);
		return (VRRP_EDB);
	}
	n = snprintf(line, len, "\n");
	if (n < 0 || n >= len) {
		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
		    "small", conf->vvc_name);
		return (VRRP_EDB);
	}
	return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
{
	char		*str, *token;
	char		*next;
	vrrp_err_t	err = VRRP_SUCCESS;
	char		tmpbuf[MAXLINELEN];

	str = tmpbuf;
	(void) strlcpy(tmpbuf, line, MAXLINELEN);

	/*
	 * Skip leading spaces, blank lines, and comments.
	 */
	skip_whitespace(str);
	if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
		conf->vvc_vrid = VRRP_VRID_NONE;
		return (VRRP_SUCCESS);
	}

	/*
	 * Read each VR properties.
	 */
	for (token = strtok_r(str, " \n\t", &next); token != NULL;
	    token = strtok_r(NULL, " \n\t", &next)) {
		if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
			break;
	}

	/* All properties read but no VRID defined */
	if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
		err = VRRP_EINVAL;

	return (err);
}

static vrrp_err_t
vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
{
	vrrp_prop_t	*prop;
	char		*pstr;
	int		i;

	if ((pstr = strchr(str, '=')) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
		return (VRRP_EINVAL);
	}

	*pstr++ = '\0';
	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
		prop = &vrrp_prop_info_tbl[i];
		if (strcasecmp(str, prop->vs_propname) == 0) {
			if (prop->vs_propread(conf, pstr))
				break;
		}
	}

	if (i == VRRP_PROP_INFO_TABSIZE) {
		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
		return (VRRP_EINVAL);
	}

	return (VRRP_SUCCESS);
}

static boolean_t
vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
{
	size_t size = sizeof (conf->vvc_name);
	return (strlcpy(conf->vvc_name, str, size) < size);
}

static boolean_t
vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
{
	conf->vvc_vrid = strtol(str, NULL, 0);
	return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
	    conf->vvc_vrid > VRRP_VRID_MAX ||
	    (conf->vvc_vrid == 0 && errno != 0)));
}

static boolean_t
vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
{
	if (strcasecmp(str, "AF_INET") == 0)
		conf->vvc_af = AF_INET;
	else if (strcasecmp(str, "AF_INET6") == 0)
		conf->vvc_af = AF_INET6;
	else
		return (_B_FALSE);
	return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
{
	conf->vvc_pri = strtol(str, NULL, 0);
	return (!(conf->vvc_pri < VRRP_PRI_MIN ||
	    conf->vvc_pri > VRRP_PRI_OWNER ||
	    (conf->vvc_pri == 0 && errno != 0)));
}

static boolean_t
vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
{
	conf->vvc_adver_int = strtol(str, NULL, 0);
	return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
	    (conf->vvc_adver_int == 0 && errno != 0)));
}

static boolean_t
vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
{
	if (strcasecmp(str, "true") == 0)
		conf->vvc_preempt = _B_TRUE;
	else if (strcasecmp(str, "false") == 0)
		conf->vvc_preempt = _B_FALSE;
	else
		return (_B_FALSE);
	return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
{
	if (strcasecmp(str, "true") == 0)
		conf->vvc_accept = _B_TRUE;
	else if (strcasecmp(str, "false") == 0)
		conf->vvc_accept = _B_FALSE;
	else
		return (_B_FALSE);
	return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
{
	if (strcasecmp(str, "enabled") == 0)
		conf->vvc_enabled = _B_TRUE;
	else if (strcasecmp(str, "disabled") == 0)
		conf->vvc_enabled = _B_FALSE;
	else
		return (_B_FALSE);
	return (_B_TRUE);
}

static boolean_t
vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
{
	size_t size = sizeof (conf->vvc_link);
	return (strlcpy(conf->vvc_link, str, size) < size);
}

static int
vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%s", conf->vvc_name));
}

static int
vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%d", conf->vvc_pri));
}

static int
vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%d", conf->vvc_adver_int));
}

static int
vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%s",
	    conf->vvc_preempt ? "true" : "false"));
}

static int
vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%s",
	    conf->vvc_accept ? "true" : "false"));
}

static int
vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%s",
	    conf->vvc_enabled ? "enabled" : "disabled"));
}

static int
vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%d", conf->vvc_vrid));
}

static int
vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%s",
	    conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
}

static int
vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
{
	return (snprintf(str, size, "%s", conf->vvc_link));
}

static char *
af_str(int af)
{
	if (af == 4 || af == AF_INET)
		return ("AF_INET");
	else if (af == 6 || af == AF_INET6)
		return ("AF_INET6");
	else if (af == AF_UNSPEC)
		return ("AF_UNSPEC");
	else
		return ("AF_error");
}

static vrrp_err_t
vrrpd_create_vr(vrrp_vr_conf_t *conf)
{
	vrrp_vr_t	*vr;

	vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);

	if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
		    " failed", conf->vvc_name);
		return (VRRP_ENOMEM);
	}

	bzero(vr, sizeof (vrrp_vr_t));
	vr->vvr_state = VRRP_STATE_NONE;
	vr->vvr_timer_id = -1;
	vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
	(void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
	vr->vvr_conf.vvc_enabled = _B_FALSE;
	TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
	return (VRRP_SUCCESS);
}

static void
vrrpd_delete_vr(vrrp_vr_t *vr)
{
	vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
	if (vr->vvr_conf.vvc_enabled)
		vrrpd_disable_vr(vr, NULL, _B_FALSE);
	assert(vr->vvr_state == VRRP_STATE_INIT);
	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
	TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
	(void) free(vr);
}

static vrrp_err_t
vrrpd_enable_vr(vrrp_vr_t *vr)
{
	vrrp_err_t	rx_err, tx_err, err = VRRP_EINVAL;

	vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);

	assert(vr->vvr_conf.vvc_enabled);

	/*
	 * This VRRP router has been successfully enabled and start
	 * participating.
	 */
	if (vr->vvr_state != VRRP_STATE_INIT)
		return (VRRP_SUCCESS);

	if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
		/*
		 * Select the primary IP address. Even if this time
		 * primary IP selection failed, we will reselect the
		 * primary IP address when new IP address comes up.
		 */
		vrrpd_reselect_primary(vr->vvr_pif);
		if (vr->vvr_pif->vvi_pip == NULL) {
			vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
			    "select_primary over %s failed",
			    vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
			rx_err = VRRP_ENOPRIM;
		}
	}

	/*
	 * Initialize the TX socket used for this vrrp_vr_t to send the
	 * multicast packets.
	 */
	tx_err = vrrpd_init_txsock(vr);

	/*
	 * Only start the state transition if sockets for both RX and TX are
	 * initialized correctly.
	 */
	if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
		/*
		 * Record the error information for diagnose purpose.
		 */
		vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
		return (err);
	}

	if (vr->vvr_conf.vvc_pri == 255)
		err = vrrpd_state_i2m(vr);
	else
		err = vrrpd_state_i2b(vr);

	if (err != VRRP_SUCCESS) {
		vr->vvr_err = err;
		vr->vvr_pif->vvi_pip = NULL;
		vrrpd_fini_txsock(vr);
		vrrpd_fini_rxsock(vr);
	}
	return (err);
}

/*
 * Given the removed interface, see whether the given VRRP router would
 * be affected and stop participating the VRRP protocol.
 *
 * If intf is NULL, VR disabling request is coming from the admin.
 */
static void
vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
{
	vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
	    intf == NULL ? "requested by admin" : intf->vvi_ifname,
	    intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
	    "interface deleted"));

	/*
	 * An interface is deleted, see whether this interface is the
	 * physical interface or the VNIC of the given VRRP router.
	 * If so, continue to disable the VRRP router.
	 */
	if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
	    (intf != vr->vvr_vif)) {
		return;
	}

	/*
	 * If this is the case that the primary IP address is gone,
	 * and we failed to reselect another primary IP address,
	 * continue to disable the VRRP router.
	 */
	if (primary_addr_gone && intf != vr->vvr_pif)
		return;

	vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
	    vr->vvr_conf.vvc_name);

	if (vr->vvr_state == VRRP_STATE_MASTER) {
		/*
		 * If this router is disabled by the administrator, send
		 * the zero-priority advertisement to indicate the Master
		 * stops participating VRRP.
		 */
		if (intf == NULL)
			(void) vrrpd_send_adv(vr, _B_TRUE);

		vrrpd_state_m2i(vr);
	} else  if (vr->vvr_state == VRRP_STATE_BACKUP) {
		vrrpd_state_b2i(vr);
	}

	/*
	 * If no primary IP address can be selected, the VRRP router
	 * stays at the INIT state and will become BACKUP and MASTER when
	 * a primary IP address is reselected.
	 */
	if (primary_addr_gone) {
		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
		    "is removed", vr->vvr_conf.vvc_name);
		vr->vvr_err = VRRP_ENOPRIM;
	} else if (intf == NULL) {
		/*
		 * The VRRP router is disable by the administrator
		 */
		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
		    vr->vvr_conf.vvc_name);
		vr->vvr_err = VRRP_SUCCESS;
		vrrpd_fini_txsock(vr);
		vrrpd_fini_rxsock(vr);
	} else if (intf == vr->vvr_pif) {
		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
		    "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
		vr->vvr_err = VRRP_ENOPRIM;
		vrrpd_fini_rxsock(vr);
	} else if (intf == vr->vvr_vif) {
		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
		    " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
		vr->vvr_err = VRRP_ENOVIRT;
		vrrpd_fini_txsock(vr);
	}
}

vrrp_err_t
vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
{
	vrrp_err_t	err = VRRP_SUCCESS;

	vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
	    conf->vvc_link, conf->vvc_vrid);

	assert(conf != NULL);

	/*
	 * Sanity check
	 */
	if ((strlen(conf->vvc_name) == 0) ||
	    (strlen(conf->vvc_link) == 0) ||
	    (conf->vvc_vrid < VRRP_VRID_MIN ||
	    conf->vvc_vrid > VRRP_VRID_MAX) ||
	    (conf->vvc_pri < VRRP_PRI_MIN ||
	    conf->vvc_pri > VRRP_PRI_OWNER) ||
	    (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
	    (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
	    (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
		vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
		    conf->vvc_name);
		return (VRRP_EINVAL);
	}

	if (!vrrp_valid_name(conf->vvc_name)) {
		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
		    "name", conf->vvc_name);
		return (VRRP_EINVALVRNAME);
	}

	if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
		    conf->vvc_name);
		return (VRRP_EINSTEXIST);
	}

	if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
	    conf->vvc_af) != NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
		    "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
		    conf->vvc_link);
		return (VRRP_EVREXIST);
	}

	if (updateconf && (err = vrrpd_updateconf(conf,
	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
		    "configuration for %s", conf->vvc_name);
		return (err);
	}

	err = vrrpd_create_vr(conf);
	if (err != VRRP_SUCCESS && updateconf)
		(void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);

	return (err);
}

static vrrp_err_t
vrrpd_delete(const char *vn)
{
	vrrp_vr_t	*vr;
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);

	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
		return (VRRP_ENOTFOUND);
	}

	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
	if (err != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
		    "configuration for %s", vr->vvr_conf.vvc_name);
		return (err);
	}

	vrrpd_delete_vr(vr);
	return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_enable(const char *vn, boolean_t updateconf)
{
	vrrp_vr_t		*vr;
	vrrp_vr_conf_t		*conf;
	uint32_t		flags;
	datalink_class_t	class;
	vrrp_err_t		err = VRRP_SUCCESS;

	vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);

	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
		return (VRRP_ENOTFOUND);
	}

	/*
	 * The VR is already enabled.
	 */
	conf = &vr->vvr_conf;
	if (conf->vvc_enabled) {
		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
		    "enabled", vn);
		return (VRRP_EALREADY);
	}

	/*
	 * Check whether the link exists.
	 */
	if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
	    conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
	    !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
	    (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR))) {
		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
		    vn, conf->vvc_link);
		return (VRRP_EINVALLINK);
	}

	/*
	 * Get the associated VNIC name by the given interface/vrid/
	 * address famitly.
	 */
	err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
	    conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
	    sizeof (vr->vvr_vnic));
	if (err != VRRP_SUCCESS) {
		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
		    "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
		    conf->vvc_link);
		err = VRRP_ENOVNIC;
		goto fail;
	}

	/*
	 * Find the right VNIC, primary interface and get the list of the
	 * protected IP adressses and primary IP address. Note that if
	 * either interface is NULL (no IP addresses configured over the
	 * interface), we will still continue and mark this VRRP router
	 * as "enabled".
	 */
	vr->vvr_conf.vvc_enabled = _B_TRUE;
	if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
		    "configuration for %s", vr->vvr_conf.vvc_name);
		goto fail;
	}

	/*
	 * If vrrpd_setup_vr() fails, it is possible that there is no IP
	 * addresses over ether the primary interface or the VNIC yet,
	 * return success in this case, the VRRP router will stay in
	 * the initialized state and start to work when the IP address is
	 * configured.
	 */
	(void) vrrpd_enable_vr(vr);
	return (VRRP_SUCCESS);

fail:
	vr->vvr_conf.vvc_enabled = _B_FALSE;
	vr->vvr_vnic[0] = '\0';
	return (err);
}

static vrrp_err_t
vrrpd_disable(const char *vn)
{
	vrrp_vr_t	*vr;
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);

	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
		return (VRRP_ENOTFOUND);
	}

	/*
	 * The VR is already disable.
	 */
	if (!vr->vvr_conf.vvc_enabled) {
		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
		return (VRRP_EALREADY);
	}

	vr->vvr_conf.vvc_enabled = _B_FALSE;
	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
	if (err != VRRP_SUCCESS) {
		vr->vvr_conf.vvc_enabled = _B_TRUE;
		vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
		    "configuration for %s", vr->vvr_conf.vvc_name);
		return (err);
	}

	vrrpd_disable_vr(vr, NULL, _B_FALSE);
	vr->vvr_vnic[0] = '\0';
	return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
{
	vrrp_vr_t	*vr;
	vrrp_vr_conf_t	savconf;
	int		pri;
	boolean_t	accept, set_accept = _B_FALSE;
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);

	if (mask == 0)
		return (VRRP_SUCCESS);

	if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
		    "VR instance: %s", conf->vvc_name);
		return (VRRP_ENOTFOUND);
	}

	if (mask & VRRP_CONF_INTERVAL) {
		if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
		    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
			    "adver_interval %d", conf->vvc_name,
			    conf->vvc_adver_int);
			return (VRRP_EINVAL);
		}
	}

	pri = vr->vvr_conf.vvc_pri;
	if (mask & VRRP_CONF_PRIORITY) {
		if (conf->vvc_pri < VRRP_PRI_MIN ||
		    conf->vvc_pri > VRRP_PRI_OWNER) {
			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
			    "priority %d", conf->vvc_name, conf->vvc_pri);
			return (VRRP_EINVAL);
		}
		pri = conf->vvc_pri;
	}

	accept = vr->vvr_conf.vvc_accept;
	if (mask & VRRP_CONF_ACCEPT)
		accept = conf->vvc_accept;

	if (pri == VRRP_PRI_OWNER && !accept) {
		vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
		    "true for VRRP address owner", conf->vvc_name);
		return (VRRP_EINVAL);
	}

	if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
		err = vrrpd_set_noaccept(vr, !accept);
		if (err != VRRP_SUCCESS) {
			vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
			    "updating failed: %s", conf->vvc_name,
			    vrrp_err2str(err));
			return (err);
		}
		set_accept = _B_TRUE;
	}

	/*
	 * Save the current configuration, so it can be restored if the
	 * following fails.
	 */
	(void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
	if (mask & VRRP_CONF_PREEMPT)
		vr->vvr_conf.vvc_preempt = conf->vvc_preempt;

	if (mask & VRRP_CONF_ACCEPT)
		vr->vvr_conf.vvc_accept = accept;

	if (mask & VRRP_CONF_PRIORITY)
		vr->vvr_conf.vvc_pri = pri;

	if (mask & VRRP_CONF_INTERVAL)
		vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;

	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
	if (err != VRRP_SUCCESS) {
		vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
		    "failed: %s", conf->vvc_name, vrrp_err2str(err));
		if (set_accept)
			(void) vrrpd_set_noaccept(vr, accept);
		(void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
		return (err);
	}

	if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
		vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);

	if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
		vr->vvr_timeout = conf->vvc_adver_int;

	return (VRRP_SUCCESS);
}

static void
vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
    size_t *sizep)
{
	vrrp_vr_t	*vr;
	char		*p = (char *)ret + sizeof (vrrp_ret_list_t);
	size_t		size = (*sizep) - sizeof (vrrp_ret_list_t);

	vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));

	ret->vrl_cnt = 0;
	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
		if (vrid !=  VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
			continue;

		if (strlen(ifname) != 0 && strcmp(ifname,
		    vr->vvr_conf.vvc_link) == 0) {
			continue;
		}

		if ((af == AF_INET || af == AF_INET6) &&
		    vr->vvr_conf.vvc_af != af)
			continue;

		if (size < VRRP_NAME_MAX) {
			vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
			    "small to hold %d router names", ret->vrl_cnt);
			*sizep = sizeof (vrrp_ret_list_t);
			ret->vrl_err = VRRP_ETOOSMALL;
			return;
		}
		(void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
		p += (strlen(vr->vvr_conf.vvc_name) + 1);
		ret->vrl_cnt++;
		size -= VRRP_NAME_MAX;
	}

	*sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
	vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
	ret->vrl_err = VRRP_SUCCESS;
}

static void
vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
{
	vrrp_queryinfo_t	*infop;
	vrrp_vr_t		*vr;
	vrrp_intf_t		*vif;
	vrrp_ip_t		*ip;
	struct timeval		now;
	uint32_t		vipcnt = 0;
	size_t			size = *sizep;

	vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);

	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
		*sizep = sizeof (vrrp_ret_query_t);
		ret->vrq_err = VRRP_ENOTFOUND;
		return;
	}

	/*
	 * Get the virtual IP list if the router is not in the INIT state.
	 */
	if (vr->vvr_state != VRRP_STATE_INIT) {
		vif = vr->vvr_vif;
		TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
			vipcnt++;
		}
	}

	*sizep = sizeof (vrrp_ret_query_t);
	*sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
	if (*sizep > size) {
		vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
		    "%d virtual IPs", vipcnt);
		*sizep = sizeof (vrrp_ret_query_t);
		ret->vrq_err = VRRP_ETOOSMALL;
		return;
	}

	(void) gettimeofday(&now, NULL);

	bzero(ret, *sizep);
	infop = &ret->vrq_qinfo;
	(void) memcpy(&infop->show_vi,
	    &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
	(void) memcpy(&infop->show_vs,
	    &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
	(void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
	infop->show_vt.vt_since_last_tran = timeval_to_milli(
	    timeval_delta(now, vr->vvr_sinfo.vs_st_time));

	if (vr->vvr_state == VRRP_STATE_INIT) {
		ret->vrq_err = VRRP_SUCCESS;
		return;
	}

	vipcnt = 0;
	TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
		(void) memcpy(&infop->show_va.va_vips[vipcnt++],
		    &ip->vip_addr, sizeof (vrrp_addr_t));
	}
	infop->show_va.va_vipcnt = vipcnt;

	(void) memcpy(&infop->show_va.va_primary,
	    &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));

	(void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));

	/*
	 * Check whether there is a peer.
	 */
	if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
	    &(vr->vvr_peer.vp_addr))) {
		infop->show_vt.vt_since_last_adv = timeval_to_milli(
		    timeval_delta(now, vr->vvr_peer.vp_time));
	}

	if (vr->vvr_state == VRRP_STATE_BACKUP) {
		infop->show_vt.vt_master_down_intv =
		    MASTER_DOWN_INTERVAL_VR(vr);
	}

	ret->vrq_err = VRRP_SUCCESS;
}

/*
 * Build the VRRP packet (not including the IP header). Return the
 * payload length.
 *
 * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
 * advertisement which is sent by the Master to indicate that it has been
 * stopped participating in VRRP.
 */
static size_t
vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
{
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	vrrp_pkt_t	*vp = (vrrp_pkt_t *)buf;
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	struct in_addr	*a4 = (struct in_addr *)(vp + 1);
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
	vrrp_intf_t	*vif = vr->vvr_vif;
	vrrp_ip_t	*vip;
	int		af = vif->vvi_af;
	size_t		size = sizeof (vrrp_pkt_t);
	uint16_t	rsvd_adver_int;
	int		nip = 0;

	vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
	    vr->vvr_conf.vvc_adver_int);

	TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
		if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
		    sizeof (struct in6_addr))) > buflen) {
			vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
			    "not big enough %d", vr->vvr_conf.vvc_name, size);
			return (0);
		}

		if (af == AF_INET)
			a4[nip++] = vip->vip_addr.in4.sin_addr;
		else
			a6[nip++] = vip->vip_addr.in6.sin6_addr;
	}

	if (nip == 0) {
		vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
		    "address", vr->vvr_conf.vvc_name);
		return (0);
	}

	vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
	vp->vp_vrid = vr->vvr_conf.vvc_vrid;
	vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;

	rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
	vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
	vp->vp_ipnum = nip;

	/*
	 * Set the checksum to 0 first, then caculate it.
	 */
	vp->vp_chksum = 0;
	if (af == AF_INET) {
		vp->vp_chksum = vrrp_cksum4(
		    &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
		    &vrrp_muladdr4.in4.sin_addr, size, vp);
	} else {
		vp->vp_chksum = vrrp_cksum6(
		    &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
		    &vrrp_muladdr6.in6.sin6_addr, size, vp);
	}

	return (size);
}

/*
 * We need to build the IPv4 header on our own.
 */
static vrrp_err_t
vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
{
	/* LINTED E_BAD_PTR_CAST_ALIGN */
	struct ip *ip = (struct ip *)buf;
	size_t plen;

	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);

	if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
	    len - sizeof (struct ip), zero_pri)) == 0) {
		return (VRRP_ETOOSMALL);
	}

	ip->ip_hl = sizeof (struct ip) >> 2;
	ip->ip_v = IPV4_VERSION;
	ip->ip_tos = 0;
	plen += sizeof (struct ip);
	ip->ip_len = htons(plen);
	ip->ip_off = 0;
	ip->ip_ttl = VRRP_IP_TTL;
	ip->ip_p = IPPROTO_VRRP;
	ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
	ip->ip_dst = vrrp_muladdr4.in4.sin_addr;

	/*
	 * The kernel will set the IP cksum and the IPv4 identification.
	 */
	ip->ip_id = 0;
	ip->ip_sum = 0;

	if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
	    (const struct sockaddr *)&vrrp_muladdr4,
	    sizeof (struct sockaddr_in))) != plen) {
		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
		    "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
		    vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
		    af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
		return (VRRP_ESYS);
	}

	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
	    vr->vvr_conf.vvc_name);
	return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
{
	struct msghdr msg6;
	size_t hoplimit_space = 0;
	size_t pktinfo_space = 0;
	size_t bufspace = 0;
	struct in6_pktinfo *pktinfop;
	struct cmsghdr *cmsgp;
	uchar_t *cmsg_datap;
	struct iovec iov;
	size_t plen;

	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);

	if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
		return (VRRP_ETOOSMALL);

	msg6.msg_control = NULL;
	msg6.msg_controllen = 0;

	hoplimit_space = sizeof (int);
	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
	    hoplimit_space + _MAX_ALIGNMENT;

	pktinfo_space = sizeof (struct in6_pktinfo);
	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
	    pktinfo_space + _MAX_ALIGNMENT;

	/*
	 * We need to temporarily set the msg6.msg_controllen to bufspace
	 * (we will later trim it to actual length used). This is needed because
	 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
	 */
	bufspace += sizeof (struct cmsghdr);
	msg6.msg_controllen = bufspace;

	msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
	if (msg6.msg_control == NULL) {
		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
		return (VRRP_ENOMEM);
	}

	cmsgp = CMSG_FIRSTHDR(&msg6);

	cmsgp->cmsg_level = IPPROTO_IPV6;
	cmsgp->cmsg_type = IPV6_HOPLIMIT;
	cmsg_datap = CMSG_DATA(cmsgp);
	/* LINTED */
	*(int *)cmsg_datap = VRRP_IP_TTL;
	cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);

	cmsgp->cmsg_level = IPPROTO_IPV6;
	cmsgp->cmsg_type = IPV6_PKTINFO;
	cmsg_datap = CMSG_DATA(cmsgp);

	/* LINTED */
	pktinfop = (struct in6_pktinfo *)cmsg_datap;
	/*
	 * We don't know if pktinfop->ipi6_addr is aligned properly,
	 * therefore let's use bcopy, instead of assignment.
	 */
	(void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
	    &pktinfop->ipi6_addr, sizeof (struct in6_addr));

	/*
	 *  We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
	 */
	pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
	cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
	msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;

	msg6.msg_name = &vrrp_muladdr6;
	msg6.msg_namelen = sizeof (struct sockaddr_in6);

	iov.iov_base = buf;
	iov.iov_len = plen;
	msg6.msg_iov = &iov;
	msg6.msg_iovlen = 1;

	if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
	    (const struct msghdr *)&msg6, 0)) != plen) {
		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
		    "%s expect %d sent %d", vr->vvr_conf.vvc_name,
		    strerror(errno), plen, len);
		(void) free(msg6.msg_control);
		return (VRRP_ESYS);
	}

	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
	    vr->vvr_conf.vvc_name);
	(void) free(msg6.msg_control);
	return (VRRP_SUCCESS);
}

/*
 * Send the VRRP advertisement packets.
 */
static vrrp_err_t
vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
{
	uint64_t buf[(IP_MAXPACKET + 1)/8];

	vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");

	assert(vr->vvr_pif->vvi_pip != NULL);

	if (vr->vvr_pif->vvi_pip == NULL) {
		vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
		    "address", vr->vvr_conf.vvc_name);
		return (VRRP_EINVAL);
	}

	if (vr->vvr_conf.vvc_af == AF_INET) {
		return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
		    sizeof (buf), zero_pri));
	} else {
		return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
		    sizeof (buf), zero_pri));
	}
}

static void
vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
{
	vrrp_vr_conf_t *conf = &vr->vvr_conf;
	char		peer[INET6_ADDRSTRLEN];
	char		local[INET6_ADDRSTRLEN];
	int		addr_cmp;
	uint16_t	peer_adver_int;

	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
	    _B_FALSE);
	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
	    peer);

	if (vr->vvr_state <= VRRP_STATE_INIT) {
		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
		    "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
		return;
	}

	peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);

	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
	    local, INET6_ADDRSTRLEN, _B_FALSE);
	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
	    "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
	    vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
	    vp->vp_prio, peer_adver_int);

	addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
	    &vr->vvr_pif->vvi_pip->vip_addr);
	if (addr_cmp == 0) {
		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
		    conf->vvc_name);
		return;
	} else if (conf->vvc_pri == vp->vp_prio) {
		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
		    " than the local IP %s", conf->vvc_name, peer,
		    addr_cmp > 0 ? "greater" : "less", local);
	}

	if (conf->vvc_pri == 255) {
		vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
		    "owner received advertisement from %s", conf->vvc_name,
		    peer);
		return;
	}

	(void) gettimeofday(&vr->vvr_peer_time, NULL);
	(void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
	vr->vvr_peer_prio = vp->vp_prio;
	vr->vvr_peer_adver_int = peer_adver_int;

	if (vr->vvr_state == VRRP_STATE_BACKUP) {
		vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
		if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
		    (conf->vvc_preempt == _B_FALSE ||
		    vp->vp_prio >= conf->vvc_pri)) {
			(void) iu_cancel_timer(vrrpd_timerq,
			    vr->vvr_timer_id, NULL);
			if (vp->vp_prio == VRRP_PRIO_ZERO) {
				/* the master stops participating in VRRP */
				vr->vvr_timeout = SKEW_TIME_VR(vr);
			} else {
				vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
			}
			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
			    vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
			    vr)) == -1) {
				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
				    "start vrrp_b2m_timeout(%d) failed",
				    conf->vvc_name, vr->vvr_timeout);
			} else {
				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
				    "start vrrp_b2m_timeout(%d)",
				    conf->vvc_name, vr->vvr_timeout);
			}
		}
	} else if (vr->vvr_state == VRRP_STATE_MASTER) {
		if (vp->vp_prio == VRRP_PRIO_ZERO) {
			(void) vrrpd_send_adv(vr, _B_FALSE);
			(void) iu_cancel_timer(vrrpd_timerq,
			    vr->vvr_timer_id, NULL);
			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
			    vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
			    vr)) == -1) {
				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
				    "start vrrp_adv_timeout(%d) failed",
				    conf->vvc_name, vr->vvr_timeout);
			} else {
				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
				    "start vrrp_adv_timeout(%d)",
				    conf->vvc_name, vr->vvr_timeout);
			}
		} else if (vp->vp_prio > conf->vvc_pri ||
		    (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
			(void) vrrpd_state_m2b(vr);
		}
	} else {
		assert(_B_FALSE);
	}
}

static vrrp_err_t
vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
    vrrp_addr_t *from)
{
	vrrp_vr_t	*vr;
	uint8_t		vers_type;
	uint16_t	saved_cksum, cksum;
	char		peer[INET6_ADDRSTRLEN];

	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
	vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
	    peer);

	if (len < sizeof (vrrp_pkt_t)) {
		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
		    "length %d", len);
		return (VRRP_EINVAL);
	}

	/*
	 * Verify: VRRP version number and packet type.
	 */
	vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
	if (vers_type != VRRP_VERSION) {
		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
		    "version %d", pif->vvi_ifname, vers_type);
		return (VRRP_EINVAL);
	}

	if (vp->vp_ipnum == 0) {
		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
		    pif->vvi_ifname);
		return (VRRP_EINVAL);
	}

	if (len - sizeof (vrrp_pkt_t) !=
	    vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
	    sizeof (struct in6_addr))) {
		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
		    " %d", pif->vvi_ifname, vp->vp_ipnum);
		return (VRRP_EINVAL);
	}

	vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);

	/*
	 * verify: VRRP checksum. Note that vrrp_cksum returns network byte
	 * order checksum value;
	 */
	saved_cksum = vp->vp_chksum;
	vp->vp_chksum = 0;
	if (pif->vvi_af == AF_INET) {
		cksum = vrrp_cksum4(&from->in4.sin_addr,
		    &vrrp_muladdr4.in4.sin_addr, len, vp);
	} else {
		cksum = vrrp_cksum6(&from->in6.sin6_addr,
		    &vrrp_muladdr6.in6.sin6_addr, len, vp);
	}

	if (cksum != saved_cksum) {
		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
		    "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
		    cksum, saved_cksum);
		return (VRRP_EINVAL);
	}

	if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
	    pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
		vrrpd_process_adv(vr, from, vp);
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
		    "not configured", pif->vvi_ifname, vp->vp_vrid,
		    af_str(pif->vvi_af));
	}
	return (VRRP_SUCCESS);
}

/*
 * IPv4 socket, the IPv4 header is included.
 */
static vrrp_err_t
vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
{
	char		abuf[INET6_ADDRSTRLEN];
	struct ip	*ip;

	vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
	    pif->vvi_ifname, len);

	ip = (struct ip *)msgp->msg_iov->iov_base;

	/* Sanity check */
	if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
		    "%d", pif->vvi_ifname, len);
		return (VRRP_EINVAL);
	}

	assert(ip->ip_v == IPV4_VERSION);
	assert(ip->ip_p == IPPROTO_VRRP);
	assert(msgp->msg_namelen == sizeof (struct sockaddr_in));

	if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
		    "destination %s", pif->vvi_ifname,
		    inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
		return (VRRP_EINVAL);
	}

	if (ip->ip_ttl != VRRP_IP_TTL) {
		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
		    "ttl %d", pif->vvi_ifname, ip->ip_ttl);
		return (VRRP_EINVAL);
	}

	/*
	 * Note that the ip_len contains only the IP payload length.
	 */
	return (vrrpd_process_vrrp(pif,
	    /* LINTED E_BAD_PTR_CAST_ALIGN */
	    (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
	    (vrrp_addr_t *)msgp->msg_name));
}

/*
 * IPv6 socket, check the ancillary_data.
 */
static vrrp_err_t
vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
{
	struct cmsghdr		*cmsgp;
	uchar_t			*cmsg_datap;
	struct in6_pktinfo	*pktinfop;
	char			abuf[INET6_ADDRSTRLEN];
	int			ttl;

	vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
	    pif->vvi_ifname, len);

	/* Sanity check */
	if (len < sizeof (vrrp_pkt_t)) {
		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
		    "%d", pif->vvi_ifname, len);
		return (VRRP_EINVAL);
	}

	assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));

	for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
	    cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
		assert(cmsgp->cmsg_level == IPPROTO_IPV6);
		cmsg_datap = CMSG_DATA(cmsgp);

		switch (cmsgp->cmsg_type) {
		case IPV6_HOPLIMIT:
			/* LINTED E_BAD_PTR_CAST_ALIGN */
			if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
				break;

			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
			    "ttl %d", pif->vvi_ifname, ttl);
			return (VRRP_EINVAL);
		case IPV6_PKTINFO:
			/* LINTED E_BAD_PTR_CAST_ALIGN */
			pktinfop = (struct in6_pktinfo *)cmsg_datap;
			if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
			    &vrrp_muladdr6.in6.sin6_addr)) {
				break;
			}

			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
			    "destination %s", pif->vvi_ifname,
			    inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
			    sizeof (abuf)));
			return (VRRP_EINVAL);
		}
	}

	return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
	    msgp->msg_name));
}

/* ARGSUSED */
static void
vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
    void *arg)
{
	struct msghdr		msg;
	vrrp_addr_t		from;
	uint64_t		buf[(IP_MAXPACKET + 1)/8];
	uint64_t		ancillary_data[(IP_MAXPACKET + 1)/8];
	vrrp_intf_t		*pif = arg;
	int			af = pif->vvi_af;
	int			len;
	struct iovec		iov;

	vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);

	msg.msg_name = (struct sockaddr *)&from;
	msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
	    sizeof (struct sockaddr_in6);
	iov.iov_base = (char *)buf;
	iov.iov_len = sizeof (buf);
	msg.msg_iov = &iov;
	msg.msg_iovlen = 1;
	msg.msg_control = ancillary_data;
	msg.msg_controllen = sizeof (ancillary_data);

	if ((len = recvmsg(s, &msg, 0)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
		    "failed: %s", pif->vvi_ifname, strerror(errno));
		return;
	}

	/*
	 * Ignore packets whose control buffers that don't fit
	 */
	if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
		    "big enough", pif->vvi_ifname);
		return;
	}

	if (af == AF_INET)
		(void) vrrpd_process_adv_v4(pif, &msg, len);
	else
		(void) vrrpd_process_adv_v6(pif, &msg, len);
}

/*
 * Create the socket which is used to receive VRRP packets. Virtual routers
 * that configured on the same physical interface share the same socket.
 */
static vrrp_err_t
vrrpd_init_rxsock(vrrp_vr_t *vr)
{
	vrrp_intf_t *pif;	/* Physical interface used to recv packets */
	struct group_req greq;
	struct sockaddr_storage *muladdr;
	int af, proto;
	int on = 1;
	vrrp_err_t err = VRRP_SUCCESS;

	vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);

	/*
	 * The RX sockets may already been initialized.
	 */
	if ((pif = vr->vvr_pif) != NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
		assert(pif->vvi_sockfd != -1);
		return (VRRP_SUCCESS);
	}

	/*
	 * If no IP addresses configured on the primary interface,
	 * return failure.
	 */
	af = vr->vvr_conf.vvc_af;
	pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
	if (pif == NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
		    "over %s/%s", vr->vvr_conf.vvc_name,
		    vr->vvr_conf.vvc_link, af_str(af));
		return (VRRP_ENOPRIM);
	}

	proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
	if (pif->vvi_nvr++ == 0) {
		assert(pif->vvi_sockfd < 0);
		pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
		if (pif->vvi_sockfd < 0) {
			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
			    "failed %s", vr->vvr_conf.vvc_name,
			    strerror(errno));
			err = VRRP_ESYS;
			goto done;
		}

		/*
		 * Join the multicast group to receive VRRP packets.
		 */
		if (af == AF_INET) {
			muladdr = (struct sockaddr_storage *)
			    (void *)&vrrp_muladdr4;
		} else {
			muladdr = (struct sockaddr_storage *)
			    (void *)&vrrp_muladdr6;
		}

		greq.gr_interface = pif->vvi_ifindex;
		(void) memcpy(&greq.gr_group, muladdr,
		    sizeof (struct sockaddr_storage));
		if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
		    sizeof (struct group_req)) < 0) {
			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
			    "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
			    pif->vvi_ifindex, strerror(errno));
			err = VRRP_ESYS;
			goto done;
		} else {
			vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
			    "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
			    pif->vvi_ifindex);
		}

		/*
		 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
		 * when a packet is received. Call setsockopt() to receive such
		 * information.
		 */
		if (af == AF_INET6) {
			/*
			 * Enable receipt of destination address info
			 */
			if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
			    (char *)&on, sizeof (on)) < 0) {
				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
				    "enable recvpktinfo failed: %s",
				    vr->vvr_conf.vvc_name, strerror(errno));
				err = VRRP_ESYS;
				goto done;
			}

			/*
			 * Enable receipt of hoplimit info
			 */
			if (setsockopt(pif->vvi_sockfd, proto,
			    IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
				    "enable recvhoplimit failed: %s",
				    vr->vvr_conf.vvc_name, strerror(errno));
				err = VRRP_ESYS;
				goto done;
			}
		}

		if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
		    pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
			    "iu_register_event() failed",
			    vr->vvr_conf.vvc_name);
			err = VRRP_ESYS;
			goto done;
		}
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
		    "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
		    pif->vvi_nvr);
		assert(IS_PRIMARY_INTF(pif));
	}

done:
	vr->vvr_pif = pif;
	if (err != VRRP_SUCCESS)
		vrrpd_fini_rxsock(vr);

	return (err);
}

/*
 * Delete the socket which is used to receive VRRP packets for the given
 * VRRP router. Since all virtual routers that configured on the same
 * physical interface share the same socket, the socket is only closed
 * when the last VRRP router share this socket is deleted.
 */
static void
vrrpd_fini_rxsock(vrrp_vr_t *vr)
{
	vrrp_intf_t	*pif = vr->vvr_pif;

	vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);

	if (pif == NULL)
		return;

	if (--pif->vvi_nvr == 0) {
		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
		(void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
		(void) close(pif->vvi_sockfd);
		pif->vvi_pip = NULL;
		pif->vvi_sockfd = -1;
		pif->vvi_eid = -1;
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
		    vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
	}
	vr->vvr_pif = NULL;
}

/*
 * Create the socket which is used to send VRRP packets. Further, set
 * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
 */
static vrrp_err_t
vrrpd_init_txsock(vrrp_vr_t *vr)
{
	int		af;
	vrrp_intf_t	*vif;
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);

	if (vr->vvr_vif != NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
		    vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
		return (VRRP_SUCCESS);
	}

	af = vr->vvr_conf.vvc_af;
	if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
		    "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
		return (VRRP_ENOVIRT);
	}

	vr->vvr_vif = vif;
	if (vr->vvr_conf.vvc_af == AF_INET)
		err = vrrpd_init_txsock_v4(vr);
	else
		err = vrrpd_init_txsock_v6(vr);

	if (err != VRRP_SUCCESS)
		goto done;

	/*
	 * The interface should start with IFF_NOACCEPT flag not set, only
	 * call this function when the VRRP router requires IFF_NOACCEPT.
	 */
	if (!vr->vvr_conf.vvc_accept)
		err = vrrpd_set_noaccept(vr, _B_TRUE);

done:
	if (err != VRRP_SUCCESS) {
		(void) close(vif->vvi_sockfd);
		vif->vvi_sockfd = -1;
		vr->vvr_vif = NULL;
	}

	return (err);
}

/*
 * Create the IPv4 socket which is used to send VRRP packets. Note that
 * the destination MAC address of VRRP advertisement must be the virtual
 * MAC address, so we specify the output interface to be the specific VNIC.
 */
static vrrp_err_t
vrrpd_init_txsock_v4(vrrp_vr_t *vr)
{
	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
	vrrp_ip_t *vip;		/* The first IP over the VNIC */
	int on = 1;
	char off = 0;
	vrrp_err_t err = VRRP_SUCCESS;
	char abuf[INET6_ADDRSTRLEN];

	vif = vr->vvr_vif;
	assert(vr->vvr_conf.vvc_af == AF_INET);
	assert(vif != NULL);

	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
	    vr->vvr_conf.vvc_name, vif->vvi_ifname);

	if (vif->vvi_sockfd != -1) {
		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
		return (VRRP_SUCCESS);
	}

	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
	if (vif->vvi_sockfd < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
		err = VRRP_ESYS;
		goto done;
	}

	/*
	 * Include the IP header, so that we can specify the IP address/ttl.
	 */
	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
	    sizeof (on)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
		err = VRRP_ESYS;
		goto done;
	}

	/*
	 * Disable multicast loopback.
	 */
	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
	    sizeof (char)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
		    strerror(errno));
		err = VRRP_ESYS;
		goto done;
	}

	vip = TAILQ_FIRST(&vif->vvi_iplist);
	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
	    _B_FALSE);

	/*
	 * Set the output interface to send the VRRP packet.
	 */
	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
	    &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
		    "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
		err = VRRP_ESYS;
	} else {
		vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
		    "succeed", vr->vvr_conf.vvc_name, abuf);
	}

done:
	if (err != VRRP_SUCCESS) {
		(void) close(vif->vvi_sockfd);
		vif->vvi_sockfd = -1;
	}

	return (err);
}

/*
 * Create the IPv6 socket which is used to send VRRP packets. Note that
 * the destination must be the virtual MAC address, so we specify the output
 * interface to be the specific VNIC.
 */
static vrrp_err_t
vrrpd_init_txsock_v6(vrrp_vr_t *vr)
{
	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
	int off = 0, ttl = VRRP_IP_TTL;
	vrrp_err_t err = VRRP_SUCCESS;

	vif = vr->vvr_vif;
	assert(vr->vvr_conf.vvc_af == AF_INET6);
	assert(vif != NULL);

	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
	    vr->vvr_conf.vvc_name, vif->vvi_ifname);

	if (vif->vvi_sockfd != -1) {
		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
		return (VRRP_SUCCESS);
	}

	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
	if (vif->vvi_sockfd < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
		err = VRRP_ESYS;
		goto done;
	}

	/*
	 * Disable multicast loopback.
	 */
	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
	    &off, sizeof (int)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
		    strerror(errno));
		err = VRRP_ESYS;
		goto done;
	}

	/*
	 * Set the multicast TTL.
	 */
	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
	    &ttl, sizeof (int)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
		    "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
		    ttl, strerror(errno));
		err = VRRP_ESYS;
		goto done;
	}

	/*
	 * Set the output interface to send the VRRP packet.
	 */
	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
	    &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
		    "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
		    strerror(errno));
		err = VRRP_ESYS;
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
		    " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
	}

done:
	if (err != VRRP_SUCCESS) {
		(void) close(vif->vvi_sockfd);
		vif->vvi_sockfd = -1;
	}

	return (err);
}

/*
 * Delete the socket which is used to send VRRP packets. Further, clear
 * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
 */
static void
vrrpd_fini_txsock(vrrp_vr_t *vr)
{
	vrrp_intf_t *vif = vr->vvr_vif;

	vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);

	if (vif != NULL) {
		if (!vr->vvr_conf.vvc_accept)
			(void) vrrpd_set_noaccept(vr, _B_FALSE);
		(void) close(vif->vvi_sockfd);
		vif->vvi_sockfd = -1;
		vr->vvr_vif = NULL;
	}
}

/*
 * Given the the pseudo header cksum value (sum), caculate the cksum with
 * the rest of VRRP packet.
 */
static uint16_t
in_cksum(int sum, uint16_t plen, void *p)
{
	int nleft;
	uint16_t *w;
	uint16_t answer;
	uint16_t odd_byte = 0;

	nleft = plen;
	w = (uint16_t *)p;
	while (nleft > 1) {
		sum += *w++;
		nleft -= 2;
	}

	/* mop up an odd byte, if necessary */
	if (nleft == 1) {
		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
		sum += odd_byte;
	}

	/*
	 * add back carry outs from top 16 bits to low 16 bits
	 */
	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
	sum += (sum >> 16);			/* add carry */
	answer = ~sum;				/* truncate to 16 bits */
	return (answer == 0 ? ~0 : answer);
}

/* Pseudo header for v4 */
struct pshv4 {
	struct in_addr	ph4_src;
	struct in_addr	ph4_dst;
	uint8_t		ph4_zero;	/* always zero */
	uint8_t		ph4_protocol;	/* protocol used, IPPROTO_VRRP */
	uint16_t	ph4_len;	/* VRRP payload len */
};

/*
 * Checksum routine for VRRP checksum. Note that plen is the upper-layer
 * packet length (in the host byte order), and both IP source and destination
 * addresses are in the network byte order.
 */
static uint16_t
vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
    vrrp_pkt_t *vp)
{
	struct pshv4 ph4;
	int nleft;
	uint16_t *w;
	int sum = 0;

	ph4.ph4_src = *src;
	ph4.ph4_dst = *dst;
	ph4.ph4_zero = 0;
	ph4.ph4_protocol = IPPROTO_VRRP;
	ph4.ph4_len = htons(plen);

	/*
	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
	 *  we add sequential 16 bit words to it, and at the end, fold
	 *  back all the carry bits from the top 16 bits into the lower
	 *  16 bits.
	 */
	nleft = sizeof (struct pshv4);
	w = (uint16_t *)&ph4;
	while (nleft > 0) {
		sum += *w++;
		nleft -= 2;
	}

	return (in_cksum(sum, plen, vp));
}

/* Pseudo header for v6 */
struct pshv6 {
	struct in6_addr	ph6_src;
	struct in6_addr	ph6_dst;
	uint32_t	ph6_len;	/* VRRP payload len */
	uint32_t	ph6_zero : 24,
			ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
};

/*
 * Checksum routine for VRRP checksum. Note that plen is the upper-layer
 * packet length (in the host byte order), and both IP source and destination
 * addresses are in the network byte order.
 */
static uint16_t
vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
    vrrp_pkt_t *vp)
{
	struct pshv6 ph6;
	int nleft;
	uint16_t *w;
	int sum = 0;

	ph6.ph6_src = *src;
	ph6.ph6_dst = *dst;
	ph6.ph6_zero = 0;
	ph6.ph6_protocol = IPPROTO_VRRP;
	ph6.ph6_len = htonl((uint32_t)plen);

	/*
	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
	 *  we add sequential 16 bit words to it, and at the end, fold
	 *  back all the carry bits from the top 16 bits into the lower
	 *  16 bits.
	 */
	nleft = sizeof (struct pshv6);
	w = (uint16_t *)&ph6;
	while (nleft > 0) {
		sum += *w++;
		nleft -= 2;
	}

	return (in_cksum(sum, plen, vp));
}

vrrp_err_t
vrrpd_state_i2m(vrrp_vr_t *vr)
{
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);

	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
		return (err);

	(void) vrrpd_send_adv(vr, _B_FALSE);

	vr->vvr_err = VRRP_SUCCESS;
	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
		return (VRRP_ESYS);
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
		    vr->vvr_timeout);
	}
	return (VRRP_SUCCESS);
}

vrrp_err_t
vrrpd_state_i2b(vrrp_vr_t *vr)
{
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);

	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
		return (err);

	/*
	 * Reinitialize the Master advertisement interval to be the configured
	 * value.
	 */
	vr->vvr_err = VRRP_SUCCESS;
	vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
		return (VRRP_ESYS);
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
		    vr->vvr_timeout);
	}
	return (VRRP_SUCCESS);
}

void
vrrpd_state_m2i(vrrp_vr_t *vr)
{
	vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);

	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
	(void) vrrpd_virtualip_update(vr, _B_TRUE);
	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
}

void
vrrpd_state_b2i(vrrp_vr_t *vr)
{
	vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);

	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
	(void) vrrpd_virtualip_update(vr, _B_TRUE);
}

/* ARGSUSED */
static void
vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
{
	vrrp_vr_t *vr = (vrrp_vr_t *)arg;

	vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
	(void) vrrpd_state_b2m(vr);
}

/* ARGSUSED */
static void
vrrp_adv_timeout(iu_tq_t *tq, void *arg)
{
	vrrp_vr_t *vr = (vrrp_vr_t *)arg;

	vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);

	(void) vrrpd_send_adv(vr, _B_FALSE);
	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
		vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
		    vr->vvr_conf.vvc_name);
	} else {
		vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
		    vr->vvr_timeout);
	}
}

vrrp_err_t
vrrpd_state_b2m(vrrp_vr_t *vr)
{
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);

	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
		return (err);
	(void) vrrpd_send_adv(vr, _B_FALSE);

	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
		    vr->vvr_conf.vvc_name);
		return (VRRP_ESYS);
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
		    vr->vvr_timeout);
	}
	return (VRRP_SUCCESS);
}

vrrp_err_t
vrrpd_state_m2b(vrrp_vr_t *vr)
{
	vrrp_err_t	err;

	vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);

	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
		return (err);

	/*
	 * Cancel the adver_timer.
	 */
	vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
		vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
		    vr->vvr_conf.vvc_name);
	} else {
		vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
		    vr->vvr_timeout);
	}
	return (VRRP_SUCCESS);
}

/*
 * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
 * based on its access mode.
 */
static vrrp_err_t
vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
{
	vrrp_intf_t *vif = vr->vvr_vif;
	uint64_t curr_flags;
	struct lifreq lifr;
	int s;

	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
	    vr->vvr_conf.vvc_name, on ? "on" : "off");

	/*
	 * Possibly no virtual address exists on this VRRP router yet.
	 */
	if (vif == NULL)
		return (VRRP_SUCCESS);

	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
	    vif->vvi_ifname, vrrp_state2str(vr->vvr_state));

	s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
	(void) strncpy(lifr.lifr_name, vif->vvi_ifname,
	    sizeof (lifr.lifr_name));
	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
		if (errno != ENXIO && errno != ENOENT) {
			vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
			    "SIOCGLIFFLAGS on %s failed: %s",
			    vif->vvi_ifname, strerror(errno));
		}
		return (VRRP_ESYS);
	}

	curr_flags = lifr.lifr_flags;
	if (on)
		lifr.lifr_flags |= IFF_NOACCEPT;
	else
		lifr.lifr_flags &= ~IFF_NOACCEPT;

	if (lifr.lifr_flags != curr_flags) {
		if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
			if (errno != ENXIO && errno != ENOENT) {
				vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
				    "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
				    on ? "no_accept" : "accept",
				    lifr.lifr_flags, vif->vvi_ifname,
				    strerror(errno));
			}
			return (VRRP_ESYS);
		}
	}
	return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
{
	vrrp_state_t	state = vif->vvi_vr_state;
	struct lifreq	lifr;
	char		abuf[INET6_ADDRSTRLEN];
	int		af = vif->vvi_af;
	uint64_t	curr_flags;
	int		s;

	assert(IS_VIRTUAL_INTF(vif));

	/* LINTED E_CONSTANT_CONDITION */
	VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
	    vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");

	s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
	(void) strncpy(lifr.lifr_name, ip->vip_lifname,
	    sizeof (lifr.lifr_name));
	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
		if (errno != ENXIO && errno != ENOENT) {
			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
			    "SIOCGLIFFLAGS on %s/%s failed: %s",
			    vif->vvi_ifname, lifr.lifr_name, abuf,
			    strerror(errno));
		}
		return (VRRP_ESYS);
	}

	curr_flags = lifr.lifr_flags;
	if (state == VRRP_STATE_MASTER)
		lifr.lifr_flags |= IFF_UP;
	else
		lifr.lifr_flags &= ~IFF_UP;

	if (lifr.lifr_flags == curr_flags)
		return (VRRP_SUCCESS);

	if (checkonly) {
		vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
		    ip->vip_lifname, abuf,
		    state == VRRP_STATE_MASTER ? "down" : "up");
		return (VRRP_ESYS);
	} else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
		if (errno != ENXIO && errno != ENOENT) {
			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
			    "bring %s %s/%s failed: %s",
			    vif->vvi_ifname, vrrp_state2str(state),
			    state == VRRP_STATE_MASTER ? "up" : "down",
			    ip->vip_lifname, abuf, strerror(errno));
		}
		return (VRRP_ESYS);
	}
	return (VRRP_SUCCESS);
}

static vrrp_err_t
vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
{
	vrrp_state_t		state;
	vrrp_intf_t		*vif = vr->vvr_vif;
	vrrp_ip_t		*ip, *nextip;
	char			abuf[INET6_ADDRSTRLEN];
	vrrp_err_t		err;

	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
	    vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
	    vif->vvi_ifname, checkonly ? " checkonly" : "");

	state = vr->vvr_state;
	assert(vif != NULL);
	assert(IS_VIRTUAL_INTF(vif));
	assert(vif->vvi_vr_state != state);
	vif->vvi_vr_state = state;
	for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
		nextip = TAILQ_NEXT(ip, vip_next);
		err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
		if (!checkonly && err != VRRP_SUCCESS) {
			/* LINTED E_CONSTANT_CONDITION */
			VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
			    INET6_ADDRSTRLEN, _B_FALSE);
			vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
			    "%s over %s failed", abuf, vif->vvi_ifname);
			vrrpd_delete_ip(vif, ip);
		}
	}

	/*
	 * The IP address is deleted when it is failed to be brought
	 * up. If no IP addresses are left, delete this interface.
	 */
	if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
		vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
		    "no IP left over %s", vif->vvi_ifname);
		vrrpd_delete_if(vif, _B_TRUE);
		return (VRRP_ENOVIRT);
	}
	return (VRRP_SUCCESS);
}

void
vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
{
	vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
	    vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));

	assert(vr->vvr_state == prev_s);
	vr->vvr_state = s;
	vr->vvr_prev_state = prev_s;
	(void) gettimeofday(&vr->vvr_st_time, NULL);
	(void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
}

static int
vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
{
	sysevent_id_t	eid;
	nvlist_t	*nvl = NULL;

	/*
	 * sysevent is not supported in the non-global zone
	 */
	if (getzoneid() != GLOBAL_ZONEID)
		return (0);

	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
		goto failed;

	if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
	    VRRP_EVENT_CUR_VERSION) != 0)
		goto failed;

	if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
		goto failed;

	if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
		goto failed;

	if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
		goto failed;

	if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
	    SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
		nvlist_free(nvl);
		return (0);
	}

failed:
	vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
	    "sysevent posting failed: %s", vrrp_state2str(prev_st),
	    vrrp_state2str(st), strerror(errno));

	if (nvl != NULL)
		nvlist_free(nvl);
	return (-1);
}

/*
 * timeval processing functions
 */
static int
timeval_to_milli(struct timeval tv)
{
	return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
}

static struct timeval
timeval_delta(struct timeval t1, struct timeval t2)
{
	struct timeval t;
	t.tv_sec = t1.tv_sec - t2.tv_sec;
	t.tv_usec = t1.tv_usec - t2.tv_usec;

	if (t.tv_usec < 0) {
		t.tv_usec += 1000000;
		t.tv_sec--;
	}
	return (t);
}

/*
 * print error messages to the terminal or to syslog
 */
static void
vrrp_log(int level, char *message, ...)
{
	va_list ap;
	int log_level = -1;

	va_start(ap, message);

	if (vrrp_logflag == 0) {
		if (level <= vrrp_debug_level) {
			/*
			 * VRRP_ERR goes to stderr, others go to stdout
			 */
			FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
			(void) fprintf(out, "vrrpd: ");
			/* LINTED: E_SEC_PRINTF_VAR_FMT */
			(void) vfprintf(out, message, ap);
			(void) fprintf(out, "\n");
			(void) fflush(out);
		}
		va_end(ap);
		return;
	}

	/*
	 * translate VRRP_* to LOG_*
	 */
	switch (level) {
	case VRRP_ERR:
		log_level = LOG_ERR;
		break;
	case VRRP_WARNING:
		log_level = LOG_WARNING;
		break;
	case VRRP_NOTICE:
		log_level = LOG_NOTICE;
		break;
	case VRRP_DBG0:
		log_level = LOG_INFO;
		break;
	default:
		log_level = LOG_DEBUG;
		break;
	}

	/* LINTED: E_SEC_PRINTF_VAR_FMT */
	(void) vsyslog(log_level, message, ap);
	va_end(ap);
}