xref: /titanic_51/usr/src/cmd/cmd-inet/usr.lib/vrrpd/vrrpd.c (revision 8fc99e42676a23421c75e76660640f9765d693b1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/socket.h>
29 #include <sys/sockio.h>
30 #include <sys/sysevent/vrrp.h>
31 #include <sys/sysevent/eventdefs.h>
32 #include <sys/varargs.h>
33 #include <auth_attr.h>
34 #include <ctype.h>
35 #include <fcntl.h>
36 #include <stdlib.h>
37 #include <strings.h>
38 #include <errno.h>
39 #include <unistd.h>
40 #include <zone.h>
41 #include <libsysevent.h>
42 #include <limits.h>
43 #include <locale.h>
44 #include <inetcfg.h>
45 #include <arpa/inet.h>
46 #include <signal.h>
47 #include <assert.h>
48 #include <ucred.h>
49 #include <bsm/adt.h>
50 #include <bsm/adt_event.h>
51 #include <priv_utils.h>
52 #include <libdllink.h>
53 #include <libdlvnic.h>
54 #include <pwd.h>
55 #include <libvrrpadm.h>
56 #include <net/route.h>
57 #include "vrrpd_impl.h"
58 
59 /*
60  * A VRRP router can be only start participating the VRRP protocol of a virtual
61  * router when all the following conditions are met:
62  *
63  * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
64  * - The RX socket is successfully created over the physical interface to
65  *   receive the VRRP multicast advertisement. Note that one RX socket can
66  *   be shared by several VRRP routers configured over the same physical
67  *   interface. (See vrrpd_init_rxsock())
68  * - The TX socket is successfully created over the VNIC interface to send
69  *   the VRRP advertisment. (See vrrpd_init_txsock())
70  * - The primary IP address has been successfully selected over the physical
71  *   interface. (See vrrpd_select_primary())
72  *
73  * If a VRRP router is enabled but the other conditions haven't be satisfied,
74  * the router will be stay at the VRRP_STATE_INIT state. If all the above
75  * conditions are met, the VRRP router will be transit to either
76  * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
77  * protocol.
78  */
79 
80 #define	skip_whitespace(p)	while (isspace(*(p))) ++(p)
81 
82 #define	BUFFSIZE	65536
83 
84 #define	VRRPCONF	"/etc/inet/vrrp.conf"
85 
86 typedef struct vrrpd_rtsock_s {
87 	int		vrt_af;		/* address family */
88 	int		vrt_fd;		/* socket for the PF_ROUTE msg */
89 	iu_event_id_t	vrt_eid;	/* event ID */
90 } vrrpd_rtsock_t;
91 
92 static int		vrrp_logflag = 0;
93 boolean_t		vrrp_debug_level = 0;
94 iu_eh_t			*vrrpd_eh = NULL;
95 iu_tq_t			*vrrpd_timerq = NULL;
96 static vrrp_handle_t	vrrpd_vh = NULL;
97 static int		vrrpd_cmdsock_fd = -1;	/* socket to communicate */
98 						/* between vrrpd/libvrrpadm */
99 static iu_event_id_t	vrrpd_cmdsock_eid = -1;
100 static int		vrrpd_ctlsock_fd = -1;	/* socket to bring up/down */
101 						/* the virtual IP addresses */
102 static int		vrrpd_ctlsock6_fd = -1;
103 static vrrpd_rtsock_t	vrrpd_rtsocks[2] = {
104 	{AF_INET, -1, -1},
105 	{AF_INET6, -1, -1}
106 };
107 static iu_timer_id_t	vrrp_scan_timer_id = -1;
108 
109 TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
110 TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
111 static struct vrrp_vr_list_s	vrrp_vr_list;
112 static struct vrrp_intf_list_s	vrrp_intf_list;
113 static char		vrrpd_conffile[MAXPATHLEN];
114 
115 /*
116  * Multicast address of VRRP advertisement in network byte order
117  */
118 static vrrp_addr_t	vrrp_muladdr4;
119 static vrrp_addr_t	vrrp_muladdr6;
120 
121 static int		vrrpd_scan_interval = 20000;	/* ms */
122 static int		pfds[2];
123 
124 /*
125  * macros to calculate skew_time and master_down_timer
126  *
127  * Note that the input is in centisecs and output are in msecs
128  */
129 #define	SKEW_TIME(pri, intv)	((intv) * (256 - (pri)) / 256)
130 #define	MASTER_DOWN_INTERVAL(pri, intv)	(3 * (intv) + SKEW_TIME((pri), (intv)))
131 
132 #define	SKEW_TIME_VR(vr)	\
133 	SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
134 #define	MASTER_DOWN_INTERVAL_VR(vr)	\
135 	MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
136 
137 #define	VRRP_CONF_UPDATE	0x01
138 #define	VRRP_CONF_DELETE	0x02
139 
140 static char *af_str(int);
141 
142 static iu_tq_callback_t vrrp_adv_timeout;
143 static iu_tq_callback_t vrrp_b2m_timeout;
144 static iu_eh_callback_t vrrpd_sock_handler;
145 static iu_eh_callback_t vrrpd_rtsock_handler;
146 static iu_eh_callback_t vrrpd_cmdsock_handler;
147 
148 static int daemon_init();
149 
150 static vrrp_err_t vrrpd_init();
151 static void vrrpd_fini();
152 static vrrp_err_t vrrpd_cmdsock_create();
153 static void vrrpd_cmdsock_destroy();
154 static vrrp_err_t vrrpd_rtsock_create();
155 static void vrrpd_rtsock_destroy();
156 static vrrp_err_t vrrpd_ctlsock_create();
157 static void vrrpd_ctlsock_destroy();
158 
159 static void vrrpd_scan_timer(iu_tq_t *, void *);
160 static void vrrpd_scan(int);
161 static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
162 static void vrrpd_fini_rxsock(vrrp_vr_t *);
163 static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
164 static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
165 static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
166 static void vrrpd_fini_txsock(vrrp_vr_t *);
167 
168 static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
169 static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
170 static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
171 static void vrrpd_delete_vr(vrrp_vr_t *);
172 
173 static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
174 static vrrp_err_t vrrpd_delete(const char *);
175 static vrrp_err_t vrrpd_enable(const char *, boolean_t);
176 static vrrp_err_t vrrpd_disable(const char *);
177 static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
178 static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
179 static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);
180 
181 static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
182 static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
183 static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
184 static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
185 static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
186 static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
187 static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
188 static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
189 static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
190 static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
191 static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
192 static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
193 static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
194 static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
195 static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
196 static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
197 static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
198 static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);
199 
200 static void vrrpd_cmd_create(void *, void *, size_t *);
201 static void vrrpd_cmd_delete(void *, void *, size_t *);
202 static void vrrpd_cmd_enable(void *, void *, size_t *);
203 static void vrrpd_cmd_disable(void *, void *, size_t *);
204 static void vrrpd_cmd_modify(void *, void *, size_t *);
205 static void vrrpd_cmd_list(void *, void *, size_t *);
206 static void vrrpd_cmd_query(void *, void *, size_t *);
207 
208 static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
209 static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
210 static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
211 static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
212 static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
213 static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
214     uint64_t flags);
215 static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);
216 
217 static void vrrpd_init_ipcache(int);
218 static void vrrpd_update_ipcache(int);
219 static int vrrpd_walk_ipaddr(icfg_if_t *, void *);
220 static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
221     int, uint64_t);
222 static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
223 static void vrrpd_reselect_primary(vrrp_intf_t *);
224 static void vrrpd_reenable_all_vr();
225 static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);
226 
227 static uint16_t in_cksum(int, uint16_t, void *);
228 static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
229     uint16_t, vrrp_pkt_t *);
230 static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
231     uint16_t, vrrp_pkt_t *);
232 static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);
233 
234 static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
235 static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);
236 
237 /* state transition functions */
238 static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
239 static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
240 static void vrrpd_state_m2i(vrrp_vr_t *);
241 static void vrrpd_state_b2i(vrrp_vr_t *);
242 static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
243 static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
244 static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);
245 
246 static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
247 static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
248 static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
249     boolean_t);
250 static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);
251 
252 static void vrrpd_initconf();
253 static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
254 static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
255 static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
256 static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
257 static void vrrpd_cleanup();
258 
259 static void vrrp_log(int, char *, ...);
260 static int timeval_to_milli(struct timeval);
261 static struct timeval timeval_delta(struct timeval, struct timeval);
262 
263 typedef struct vrrpd_prop_s {
264 	char		*vs_propname;
265 	boolean_t	(*vs_propread)(vrrp_vr_conf_t *, const char *);
266 	int		(*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
267 } vrrp_prop_t;
268 
269 /*
270  * persistent VRRP properties array
271  */
272 static vrrp_prop_t vrrp_prop_info_tbl[] = {
273 	{"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
274 	{"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
275 	{"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
276 	{"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
277 	{"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
278 	{"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
279 	{"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
280 	{"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
281 	{"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
282 };
283 
284 #define	VRRP_PROP_INFO_TABSIZE	\
285 	(sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))
286 
287 typedef void vrrp_cmd_func_t(void *, void *, size_t *);
288 
289 typedef struct vrrp_cmd_info_s {
290 	vrrp_cmd_type_t	vi_cmd;
291 	size_t		vi_reqsize;
292 	size_t		vi_acksize;	/* 0 if the size is variable */
293 	boolean_t	vi_setop;	/* Set operation? Check credentials */
294 	vrrp_cmd_func_t	*vi_cmdfunc;
295 } vrrp_cmd_info_t;
296 
297 static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
298 	{VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
299 	    sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
300 	{VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
301 	    sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
302 	{VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
303 	    sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
304 	{VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
305 	    sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
306 	{VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
307 	    sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
308 	{VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
309 	    _B_FALSE, vrrpd_cmd_query},
310 	{VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
311 	    _B_FALSE, vrrpd_cmd_list}
312 };
313 
314 #define	VRRP_DOOR_INFO_TABLE_SIZE	\
315 	(sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))
316 
317 static int
318 ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
319 {
320 	if (af == AF_INET) {
321 		return (memcmp(&addr1->in4.sin_addr,
322 		    &addr2->in4.sin_addr, sizeof (struct in_addr)));
323 	} else {
324 		return (memcmp(&addr1->in6.sin6_addr,
325 		    &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
326 	}
327 }
328 
329 static vrrp_vr_t *
330 vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
331 {
332 	vrrp_vr_t *vr;
333 
334 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
335 		if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
336 		    vr->vvr_conf.vvc_vrid == vrid &&
337 		    vr->vvr_conf.vvc_af == af) {
338 			break;
339 		}
340 	}
341 	return (vr);
342 }
343 
344 static vrrp_vr_t *
345 vrrpd_lookup_vr_by_name(const char *name)
346 {
347 	vrrp_vr_t *vr;
348 
349 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
350 		if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
351 			break;
352 	}
353 	return (vr);
354 }
355 
356 static vrrp_intf_t *
357 vrrpd_lookup_if(const char *ifname, int af)
358 {
359 	vrrp_intf_t	*intf;
360 
361 	TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
362 		if (strcmp(ifname, intf->vvi_ifname) == 0 &&
363 		    af == intf->vvi_af) {
364 			break;
365 		}
366 	}
367 	return (intf);
368 }
369 
370 static vrrp_err_t
371 vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
372     vrrp_intf_t **intfp)
373 {
374 	vrrp_intf_t	*intf;
375 
376 	vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
377 	    ifname, af_str(af), ifindex);
378 
379 	if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
380 		vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
381 		    "allocate %s/%s interface", ifname, af_str(af));
382 		return (VRRP_ENOMEM);
383 	}
384 
385 	intf = *intfp;
386 	TAILQ_INIT(&intf->vvi_iplist);
387 	(void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
388 	intf->vvi_af = af;
389 	intf->vvi_sockfd = -1;
390 	intf->vvi_nvr = 0;
391 	intf->vvi_eid = -1;
392 	intf->vvi_pip = NULL;
393 	intf->vvi_ifindex = ifindex;
394 	intf->vvi_state = NODE_STATE_NEW;
395 	intf->vvi_vr_state = VRRP_STATE_INIT;
396 	TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
397 	return (VRRP_SUCCESS);
398 }
399 
400 /*
401  * An interface is deleted. If update_vr is true, the deletion of the interface
402  * may cause the state transition of assoicated VRRP router (if this interface
403  * is either the primary or the VNIC interface of the VRRP router); otherwise,
404  * simply delete the interface without updating the VRRP router.
405  */
406 static void
407 vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
408 {
409 	vrrp_ip_t	*ip;
410 
411 	vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
412 	    intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");
413 
414 	if (update_vr) {
415 		/*
416 		 * If a this interface is the physical interface or the VNIC
417 		 * of a VRRP router, the deletion of the interface (no IP
418 		 * address exists on this interface) may cause the state
419 		 * transition of the VRRP router. call vrrpd_remove_if()
420 		 * to find all corresponding VRRP router and update their
421 		 * states.
422 		 */
423 		vrrpd_remove_if(intf, _B_FALSE);
424 	}
425 
426 	/*
427 	 * First remove and delete all the IP addresses on the interface
428 	 */
429 	while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
430 		ip = TAILQ_FIRST(&intf->vvi_iplist);
431 		vrrpd_delete_ip(intf, ip);
432 	}
433 
434 	/*
435 	 * Then remove and delete the interface
436 	 */
437 	TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
438 	(void) free(intf);
439 }
440 
441 static vrrp_err_t
442 vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
443     uint64_t flags)
444 {
445 	vrrp_ip_t	*ip;
446 	char		abuf[INET6_ADDRSTRLEN];
447 
448 	/* LINTED E_CONSTANT_CONDITION */
449 	VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
450 	vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
451 	    intf->vvi_ifname, lifname, abuf, flags);
452 
453 	if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
454 		vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
455 		    "failed to allocate IP", lifname, abuf);
456 		return (VRRP_ENOMEM);
457 	}
458 
459 	(void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
460 	ip->vip_state = NODE_STATE_NEW;
461 	ip->vip_flags = flags;
462 	(void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));
463 
464 	/*
465 	 * Make sure link-local IPv6 IP addresses are at the head of the list
466 	 */
467 	if (intf->vvi_af == AF_INET6 &&
468 	    IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
469 		TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
470 	} else {
471 		TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
472 	}
473 	return (VRRP_SUCCESS);
474 }
475 
476 static void
477 vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
478 {
479 	char	abuf[INET6_ADDRSTRLEN];
480 	int	af = intf->vvi_af;
481 
482 	/* LINTED E_CONSTANT_CONDITION */
483 	VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
484 	vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
485 	    intf->vvi_ifname, ip->vip_lifname, abuf,
486 	    intf->vvi_pip == ip ? "" : "not ");
487 
488 	if (intf->vvi_pip == ip)
489 		intf->vvi_pip = NULL;
490 
491 	TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
492 	(void) free(ip);
493 }
494 
495 static char *
496 rtm_event2str(uchar_t event)
497 {
498 	switch (event) {
499 	case RTM_NEWADDR:
500 		return ("RTM_NEWADDR");
501 	case RTM_DELADDR:
502 		return ("RTM_DELADDR");
503 	case RTM_IFINFO:
504 		return ("RTM_IFINFO");
505 	case RTM_ADD:
506 		return ("RTM_ADD");
507 	case RTM_DELETE:
508 		return ("RTM_DELETE");
509 	case RTM_CHANGE:
510 		return ("RTM_CHANGE");
511 	case RTM_OLDADD:
512 		return ("RTM_OLDADD");
513 	case RTM_OLDDEL:
514 		return ("RTM_OLDDEL");
515 	case RTM_CHGADDR:
516 		return ("RTM_CHGADDR");
517 	case RTM_FREEADDR:
518 		return ("RTM_FREEADDR");
519 	default:
520 		return ("RTM_OTHER");
521 	}
522 }
523 
524 /*
525  * This is called by the child process to inform the parent process to
526  * exit with the given return value. Note that the child process
527  * (the daemon process) informs the parent process to exit when anything
528  * goes wrong or when all the intialization is done.
529  */
530 static int
531 vrrpd_inform_parent_exit(int rv)
532 {
533 	int err = 0;
534 
535 	/*
536 	 * If vrrp_debug_level is none-zero, vrrpd is not running as
537 	 * a daemon. Return directly.
538 	 */
539 	if (vrrp_debug_level != 0)
540 		return (0);
541 
542 	if (write(pfds[1], &rv, sizeof (int)) != sizeof (int)) {
543 		err = errno;
544 		(void) close(pfds[1]);
545 		return (err);
546 	}
547 	(void) close(pfds[1]);
548 	return (0);
549 }
550 
551 int
552 main(int argc, char *argv[])
553 {
554 	int c, err;
555 	struct sigaction sa;
556 	sigset_t mask;
557 	struct rlimit rl;
558 
559 	(void) setlocale(LC_ALL, "");
560 	(void) textdomain(TEXT_DOMAIN);
561 
562 	/*
563 	 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
564 	 * and PRIV_NET_ICMPACCESS to open  the raw socket, PRIV_SYS_IP_CONFIG
565 	 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
566 	 * setrlimit().
567 	 *
568 	 * Note that sysevent is not supported in non-global zones.
569 	 */
570 	if (getzoneid() == GLOBAL_ZONEID) {
571 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
572 		    PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
573 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
574 	} else {
575 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
576 		    PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
577 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
578 	}
579 
580 	if (err == -1) {
581 		vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
582 		return (EXIT_FAILURE);
583 	}
584 
585 	/*
586 	 * If vrrpd is started by other process, it will inherit the
587 	 * signal block mask. We unblock all signals to make sure the
588 	 * signal handling will work normally.
589 	 */
590 	(void) sigfillset(&mask);
591 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
592 	sa.sa_handler = vrrpd_cleanup;
593 	sa.sa_flags = 0;
594 	(void) sigemptyset(&sa.sa_mask);
595 	(void) sigaction(SIGINT, &sa, NULL);
596 	(void) sigaction(SIGQUIT, &sa, NULL);
597 	(void) sigaction(SIGTERM, &sa, NULL);
598 
599 	vrrp_debug_level = 0;
600 	(void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
601 	while ((c = getopt(argc, argv, "d:f:")) != EOF) {
602 		switch (c) {
603 		case 'd':
604 			vrrp_debug_level = atoi(optarg);
605 			break;
606 		case 'f':
607 			(void) strlcpy(vrrpd_conffile, optarg,
608 			    sizeof (vrrpd_conffile));
609 			break;
610 		default:
611 			break;
612 		}
613 	}
614 
615 	closefrom(3);
616 	if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
617 		vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
618 		return (EXIT_FAILURE);
619 	}
620 
621 	rl.rlim_cur = RLIM_INFINITY;
622 	rl.rlim_max = RLIM_INFINITY;
623 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
624 		vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
625 		goto child_out;
626 	}
627 
628 	if (vrrpd_init() != VRRP_SUCCESS) {
629 		vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
630 		goto child_out;
631 	}
632 
633 	/*
634 	 * Get rid of unneeded privileges.
635 	 */
636 	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
637 	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);
638 
639 	/*
640 	 * Read the configuration and initialize the existing VRRP
641 	 * configuration
642 	 */
643 	vrrpd_initconf();
644 
645 	/*
646 	 * Inform the parent process that it can successfully exit.
647 	 */
648 	if ((err = vrrpd_inform_parent_exit(EXIT_SUCCESS)) != 0) {
649 		vrrpd_cleanup();
650 		vrrp_log(VRRP_WARNING, "vrrpd_inform_parent_exit() failed: %s",
651 		    strerror(err));
652 		return (EXIT_FAILURE);
653 	}
654 
655 	/*
656 	 * Start the loop to handle the timer and the IO events.
657 	 */
658 	switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
659 	case -1:
660 		vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
661 		    "abnormally");
662 		break;
663 	default:
664 		break;
665 	}
666 
667 	vrrpd_cleanup();
668 	return (EXIT_SUCCESS);
669 
670 child_out:
671 	(void) vrrpd_inform_parent_exit(EXIT_FAILURE);
672 	return (EXIT_FAILURE);
673 }
674 
675 static int
676 daemon_init()
677 {
678 	pid_t	pid;
679 	int	rv;
680 
681 	vrrp_log(VRRP_DBG0, "daemon_init()");
682 
683 	if (getenv("SMF_FMRI") == NULL) {
684 		vrrp_log(VRRP_ERR, "daemon_init(): vrrpd is an smf(5) managed "
685 		    "service and should not be run from the command line.");
686 		return (-1);
687 	}
688 
689 	/*
690 	 * Create the pipe used for the child process to inform the parent
691 	 * process to exit after all initialization is done.
692 	 */
693 	if (pipe(pfds) < 0) {
694 		vrrp_log(VRRP_ERR, "daemon_init(): pipe() failed: %s",
695 		    strerror(errno));
696 		return (-1);
697 	}
698 
699 	if ((pid = fork()) < 0) {
700 		vrrp_log(VRRP_ERR, "daemon_init(): fork() failed: %s",
701 		    strerror(errno));
702 		(void) close(pfds[0]);
703 		(void) close(pfds[1]);
704 		return (-1);
705 	}
706 
707 	if (pid != 0) { /* Parent */
708 		(void) close(pfds[1]);
709 
710 		/*
711 		 * Read the child process's return value from the pfds.
712 		 * If the child process exits unexpectedly, read() returns -1.
713 		 */
714 		if (read(pfds[0], &rv, sizeof (int)) != sizeof (int)) {
715 			vrrp_log(VRRP_ERR, "daemon_init(): child process "
716 			    "exited unexpectedly %s", strerror(errno));
717 			(void) kill(pid, SIGTERM);
718 			rv = EXIT_FAILURE;
719 		}
720 		(void) close(pfds[0]);
721 		exit(rv);
722 	}
723 
724 	/*
725 	 * in child process, became a daemon, and return to main() to continue.
726 	 */
727 	(void) close(pfds[0]);
728 	(void) chdir("/");
729 	(void) setsid();
730 	(void) close(0);
731 	(void) close(1);
732 	(void) close(2);
733 	(void) open("/dev/null", O_RDWR, 0);
734 	(void) dup2(0, 1);
735 	(void) dup2(0, 2);
736 	openlog("vrrpd", LOG_PID, LOG_DAEMON);
737 	vrrp_logflag = 1;
738 	return (0);
739 }
740 
741 static vrrp_err_t
742 vrrpd_init()
743 {
744 	vrrp_err_t	err = VRRP_ESYS;
745 
746 	vrrp_log(VRRP_DBG0, "vrrpd_init()");
747 
748 	TAILQ_INIT(&vrrp_vr_list);
749 	TAILQ_INIT(&vrrp_intf_list);
750 
751 	if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
752 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
753 		goto fail;
754 	}
755 
756 	if ((vrrpd_timerq = iu_tq_create()) == NULL) {
757 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
758 		goto fail;
759 	}
760 
761 	if ((vrrpd_eh = iu_eh_create()) == NULL) {
762 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
763 		goto fail;
764 	}
765 
766 	/*
767 	 * Create the AF_UNIX socket used to communicate with libvrrpadm.
768 	 *
769 	 * This socket is used to receive the administrative requests and
770 	 * send back the results.
771 	 */
772 	if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
773 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
774 		    "failed");
775 		goto fail;
776 	}
777 
778 	/*
779 	 * Create the VRRP control socket used to bring up/down the virtual
780 	 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
781 	 * the virtual IP addresses.
782 	 */
783 	if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
784 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
785 		    "failed");
786 		goto fail;
787 	}
788 
789 	/*
790 	 * Create the PF_ROUTER socket used to listen to the routing socket
791 	 * messages and build the interface/IP address list.
792 	 */
793 	if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
794 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
795 		    "failed");
796 		goto fail;
797 	}
798 
799 	/*
800 	 * Build the list of interfaces and IP addresses. Also, start the time
801 	 * to scan the interfaces/IP addresses periodically.
802 	 */
803 	vrrpd_scan(AF_INET);
804 	vrrpd_scan(AF_INET6);
805 	if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
806 	    vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
807 		vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
808 		goto fail;
809 	}
810 
811 	/*
812 	 * Initialize the VRRP multicast address.
813 	 */
814 	bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
815 	vrrp_muladdr4.in4.sin_family = AF_INET;
816 	(void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);
817 
818 	bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
819 	vrrp_muladdr6.in6.sin6_family = AF_INET6;
820 	(void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);
821 
822 	return (VRRP_SUCCESS);
823 
824 fail:
825 	vrrpd_fini();
826 	return (err);
827 }
828 
829 static void
830 vrrpd_fini()
831 {
832 	vrrp_log(VRRP_DBG0, "vrrpd_fini()");
833 
834 	(void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
835 	vrrp_scan_timer_id = -1;
836 
837 	vrrpd_rtsock_destroy();
838 	vrrpd_ctlsock_destroy();
839 	vrrpd_cmdsock_destroy();
840 
841 	if (vrrpd_eh != NULL) {
842 		iu_eh_destroy(vrrpd_eh);
843 		vrrpd_eh = NULL;
844 	}
845 
846 	if (vrrpd_timerq != NULL) {
847 		iu_tq_destroy(vrrpd_timerq);
848 		vrrpd_timerq = NULL;
849 	}
850 
851 	vrrp_close(vrrpd_vh);
852 	vrrpd_vh = NULL;
853 	assert(TAILQ_EMPTY(&vrrp_vr_list));
854 	assert(TAILQ_EMPTY(&vrrp_intf_list));
855 }
856 
857 static void
858 vrrpd_cleanup(void)
859 {
860 	vrrp_vr_t	*vr;
861 	vrrp_intf_t	*intf;
862 
863 	vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");
864 
865 	while (!TAILQ_EMPTY(&vrrp_vr_list)) {
866 		vr = TAILQ_FIRST(&vrrp_vr_list);
867 		vrrpd_delete_vr(vr);
868 	}
869 
870 	while (!TAILQ_EMPTY(&vrrp_intf_list)) {
871 		intf = TAILQ_FIRST(&vrrp_intf_list);
872 		vrrpd_delete_if(intf, _B_FALSE);
873 	}
874 
875 	vrrpd_fini();
876 	closelog();
877 	exit(1);
878 }
879 
880 /*
881  * Read the configuration file and initialize all the existing VRRP routers.
882  */
883 static void
884 vrrpd_initconf()
885 {
886 	FILE *fp;
887 	char line[LINE_MAX];
888 	int linenum = 0;
889 	vrrp_vr_conf_t conf;
890 	vrrp_err_t err;
891 
892 	vrrp_log(VRRP_DBG0, "vrrpd_initconf()");
893 
894 	if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
895 		vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
896 		    vrrpd_conffile);
897 		return;
898 	}
899 
900 	while (fgets(line, sizeof (line), fp) != NULL) {
901 		linenum++;
902 		conf.vvc_vrid = VRRP_VRID_NONE;
903 		if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
904 			vrrp_log(VRRP_ERR, "failed to parse %d line %s",
905 			    linenum, line);
906 			continue;
907 		}
908 
909 		/*
910 		 * Blank or comment line
911 		 */
912 		if (conf.vvc_vrid == VRRP_VRID_NONE)
913 			continue;
914 
915 		/*
916 		 * No need to update the configuration since the VRRP router
917 		 * created/enabled based on the existing configuration.
918 		 */
919 		if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
920 			vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
921 			    "%s", conf.vvc_name, vrrp_err2str(err));
922 			continue;
923 		}
924 
925 		if (conf.vvc_enabled &&
926 		    ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
927 		    VRRP_SUCCESS)) {
928 			vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
929 			    conf.vvc_name, vrrp_err2str(err));
930 		}
931 	}
932 
933 	(void) fclose(fp);
934 }
935 
936 /*
937  * Create the AF_UNIX socket used to communicate with libvrrpadm.
938  *
939  * This socket is used to receive the administrative request and
940  * send back the results.
941  */
942 static vrrp_err_t
943 vrrpd_cmdsock_create()
944 {
945 	iu_event_id_t		eid;
946 	struct sockaddr_un	laddr;
947 	int			sock, flags;
948 
949 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");
950 
951 	if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
952 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
953 		    "failed: %s", strerror(errno));
954 		return (VRRP_ESYS);
955 	}
956 
957 	/*
958 	 * Set it to be non-blocking.
959 	 */
960 	flags = fcntl(sock, F_GETFL, 0);
961 	(void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));
962 
963 	/*
964 	 * Unlink first in case a previous daemon instance exited ungracefully.
965 	 */
966 	(void) unlink(VRRPD_SOCKET);
967 
968 	bzero(&laddr, sizeof (laddr));
969 	laddr.sun_family = AF_UNIX;
970 	(void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
971 	if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
972 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
973 		    strerror(errno));
974 		(void) close(sock);
975 		return (VRRP_ESYS);
976 	}
977 
978 	if (listen(sock, 30) < 0) {
979 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
980 		    "failed: %s", strerror(errno));
981 		(void) close(sock);
982 		return (VRRP_ESYS);
983 	}
984 
985 	if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
986 	    vrrpd_cmdsock_handler, NULL)) == -1) {
987 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
988 		    " failed");
989 		(void) close(sock);
990 		return (VRRP_ESYS);
991 	}
992 
993 	vrrpd_cmdsock_fd = sock;
994 	vrrpd_cmdsock_eid = eid;
995 	return (VRRP_SUCCESS);
996 }
997 
998 static void
999 vrrpd_cmdsock_destroy()
1000 {
1001 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");
1002 
1003 	(void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
1004 	(void) close(vrrpd_cmdsock_fd);
1005 	vrrpd_cmdsock_fd = -1;
1006 	vrrpd_cmdsock_eid = -1;
1007 }
1008 
1009 /*
1010  * Create the PF_ROUTER sockets used to listen to the routing socket
1011  * messages and build the interface/IP address list. Create one for
1012  * each address family (IPv4 and IPv6).
1013  */
1014 static vrrp_err_t
1015 vrrpd_rtsock_create()
1016 {
1017 	int		i, flags, sock;
1018 	iu_event_id_t	eid;
1019 
1020 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");
1021 
1022 	for (i = 0; i < 2; i++) {
1023 		sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
1024 		if (sock == -1) {
1025 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
1026 			    "failed: %s", strerror(errno));
1027 			break;
1028 		}
1029 
1030 		/*
1031 		 * Set it to be non-blocking.
1032 		 */
1033 		if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
1034 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1035 			    "fcntl(F_GETFL) failed: %s", strerror(errno));
1036 			break;
1037 		}
1038 
1039 		if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
1040 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1041 			    "fcntl(F_SETFL) failed: %s", strerror(errno));
1042 			break;
1043 		}
1044 
1045 		if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
1046 		    vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
1047 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
1048 			    "rtsock %d(%s) failed", sock,
1049 			    af_str(vrrpd_rtsocks[i].vrt_af));
1050 			break;
1051 		}
1052 
1053 		vrrpd_rtsocks[i].vrt_fd = sock;
1054 		vrrpd_rtsocks[i].vrt_eid = eid;
1055 	}
1056 
1057 	if (i != 2) {
1058 		(void) close(sock);
1059 		vrrpd_rtsock_destroy();
1060 		return (VRRP_ESYS);
1061 	}
1062 
1063 	return (VRRP_SUCCESS);
1064 }
1065 
1066 static void
1067 vrrpd_rtsock_destroy()
1068 {
1069 	int		i;
1070 
1071 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
1072 	for (i = 0; i < 2; i++) {
1073 		(void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
1074 		    NULL);
1075 		(void) close(vrrpd_rtsocks[i].vrt_fd);
1076 		vrrpd_rtsocks[i].vrt_eid = -1;
1077 		vrrpd_rtsocks[i].vrt_fd = -1;
1078 	}
1079 }
1080 
1081 /*
1082  * Create the VRRP control socket used to bring up/down the virtual
1083  * IP addresses. It is also used to set the IFF_NOACCEPT flag of
1084  * the virtual IP addresses.
1085  */
1086 static vrrp_err_t
1087 vrrpd_ctlsock_create()
1088 {
1089 	int	s, s6;
1090 	int	on = _B_TRUE;
1091 
1092 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
1093 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
1094 		    "failed: %s", strerror(errno));
1095 		return (VRRP_ESYS);
1096 	}
1097 	if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1098 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1099 		    "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
1100 		(void) close(s);
1101 		return (VRRP_ESYS);
1102 	}
1103 
1104 	if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
1105 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
1106 		    "failed: %s", strerror(errno));
1107 		(void) close(s);
1108 		return (VRRP_ESYS);
1109 	}
1110 	if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1111 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1112 		    "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
1113 		(void) close(s);
1114 		(void) close(s6);
1115 		return (VRRP_ESYS);
1116 	}
1117 
1118 	vrrpd_ctlsock_fd = s;
1119 	vrrpd_ctlsock6_fd = s6;
1120 	return (VRRP_SUCCESS);
1121 }
1122 
1123 static void
1124 vrrpd_ctlsock_destroy()
1125 {
1126 	(void) close(vrrpd_ctlsock_fd);
1127 	vrrpd_ctlsock_fd = -1;
1128 	(void) close(vrrpd_ctlsock6_fd);
1129 	vrrpd_ctlsock6_fd = -1;
1130 }
1131 
1132 /*ARGSUSED*/
1133 static void
1134 vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
1135 {
1136 	vrrp_cmd_create_t	*cmd = (vrrp_cmd_create_t *)arg1;
1137 	vrrp_ret_create_t	*ret = (vrrp_ret_create_t *)arg2;
1138 	vrrp_err_t		err;
1139 
1140 	err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
1141 	if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
1142 		/*
1143 		 * No need to update the configuration since it is already
1144 		 * done in the above vrrpd_create() call
1145 		 */
1146 		err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
1147 		if (err != VRRP_SUCCESS)
1148 			(void) vrrpd_delete(cmd->vcc_conf.vvc_name);
1149 	}
1150 	ret->vrc_err = err;
1151 }
1152 
1153 /*ARGSUSED*/
1154 static void
1155 vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
1156 {
1157 	vrrp_cmd_delete_t	*cmd = (vrrp_cmd_delete_t *)arg1;
1158 	vrrp_ret_delete_t	*ret = (vrrp_ret_delete_t *)arg2;
1159 
1160 	ret->vrd_err = vrrpd_delete(cmd->vcd_name);
1161 }
1162 
1163 /*ARGSUSED*/
1164 static void
1165 vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
1166 {
1167 	vrrp_cmd_enable_t	*cmd = (vrrp_cmd_enable_t *)arg1;
1168 	vrrp_ret_enable_t	*ret = (vrrp_ret_enable_t *)arg2;
1169 
1170 	ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
1171 }
1172 
1173 /*ARGSUSED*/
1174 static void
1175 vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
1176 {
1177 	vrrp_cmd_disable_t	*cmd = (vrrp_cmd_disable_t *)arg1;
1178 	vrrp_ret_disable_t	*ret = (vrrp_ret_disable_t *)arg2;
1179 
1180 	ret->vrx_err = vrrpd_disable(cmd->vcx_name);
1181 }
1182 
1183 /*ARGSUSED*/
1184 static void
1185 vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
1186 {
1187 	vrrp_cmd_modify_t	*cmd = (vrrp_cmd_modify_t *)arg1;
1188 	vrrp_ret_modify_t	*ret = (vrrp_ret_modify_t *)arg2;
1189 
1190 	ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
1191 }
1192 
1193 static void
1194 vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
1195 {
1196 	vrrp_cmd_query_t	*cmd = (vrrp_cmd_query_t *)arg1;
1197 
1198 	vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
1199 }
1200 
1201 static void
1202 vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
1203 {
1204 	vrrp_cmd_list_t	*cmd = (vrrp_cmd_list_t *)arg1;
1205 
1206 	vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
1207 }
1208 
1209 /*
1210  * Write-type requeset must have the solaris.network.vrrp authorization.
1211  */
1212 static boolean_t
1213 vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
1214 {
1215 	ucred_t		*cred = NULL;
1216 	uid_t		uid;
1217 	struct passwd	*pw;
1218 	boolean_t	success = _B_FALSE;
1219 
1220 	vrrp_log(VRRP_DBG0, "vrrp_auth_check()");
1221 
1222 	if (!cinfo->vi_setop)
1223 		return (_B_TRUE);
1224 
1225 	/*
1226 	 * Validate the credential
1227 	 */
1228 	if (getpeerucred(connfd, &cred) == (uid_t)-1) {
1229 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
1230 		    "failed: %s", strerror(errno));
1231 		return (_B_FALSE);
1232 	}
1233 
1234 	if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
1235 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
1236 		    "failed: %s", strerror(errno));
1237 		goto done;
1238 	}
1239 
1240 	if ((pw = getpwuid(uid)) == NULL) {
1241 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
1242 		goto done;
1243 	}
1244 
1245 	success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);
1246 
1247 done:
1248 	ucred_free(cred);
1249 	return (success);
1250 }
1251 
1252 /*
1253  * Process the administrative request from libvrrpadm
1254  */
1255 /* ARGSUSED */
1256 static void
1257 vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
1258     void *arg)
1259 {
1260 	vrrp_cmd_info_t		*cinfo = NULL;
1261 	vrrp_err_t		err = VRRP_SUCCESS;
1262 	uchar_t			buf[BUFFSIZE], ackbuf[BUFFSIZE];
1263 	size_t			cursize, acksize, len;
1264 	uint32_t		cmd;
1265 	int			connfd, i;
1266 	struct sockaddr_in	from;
1267 	socklen_t		fromlen;
1268 
1269 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");
1270 
1271 	fromlen = (socklen_t)sizeof (from);
1272 	if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
1273 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
1274 		    strerror(errno));
1275 		return;
1276 	}
1277 
1278 	/*
1279 	 * First get the type of the request
1280 	 */
1281 	cursize = 0;
1282 	while (cursize < sizeof (uint32_t)) {
1283 		len = read(connfd, buf + cursize,
1284 		    sizeof (uint32_t) - cursize);
1285 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1286 			continue;
1287 		} else if (len > 0) {
1288 			cursize += len;
1289 			continue;
1290 		}
1291 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1292 		    "length");
1293 		(void) close(connfd);
1294 		return;
1295 	}
1296 
1297 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1298 	cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
1299 	for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
1300 		if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
1301 			cinfo = vrrp_cmd_info_tbl + i;
1302 			break;
1303 		}
1304 	}
1305 
1306 	if (cinfo == NULL) {
1307 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
1308 		    "type %d", cmd);
1309 		err = VRRP_EINVAL;
1310 		goto done;
1311 	}
1312 
1313 	/*
1314 	 * Get the rest of the request.
1315 	 */
1316 	assert(cursize == sizeof (uint32_t));
1317 	while (cursize < cinfo->vi_reqsize) {
1318 		len = read(connfd, buf + cursize,
1319 		    cinfo->vi_reqsize - cursize);
1320 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1321 			continue;
1322 		} else if (len > 0) {
1323 			cursize += len;
1324 			continue;
1325 		}
1326 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1327 		    "length");
1328 		err = VRRP_EINVAL;
1329 		goto done;
1330 	}
1331 
1332 	/*
1333 	 * Validate the authorization
1334 	 */
1335 	if (!vrrp_auth_check(connfd, cinfo)) {
1336 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
1337 		    "not sufficient authorization");
1338 		err = VRRP_EPERM;
1339 	}
1340 
1341 done:
1342 	/*
1343 	 * Ack the request
1344 	 */
1345 	if (err != 0) {
1346 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1347 		((vrrp_ret_t *)ackbuf)->vr_err = err;
1348 		acksize = sizeof (vrrp_ret_t);
1349 	} else {
1350 		/*
1351 		 * If the size of ack is varied, the cmdfunc callback
1352 		 * will set the right size.
1353 		 */
1354 		if ((acksize = cinfo->vi_acksize) == 0)
1355 			acksize = sizeof (ackbuf);
1356 
1357 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1358 		cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
1359 	}
1360 
1361 	/*
1362 	 * Send the ack back.
1363 	 */
1364 	cursize = 0;
1365 	while (cursize < acksize) {
1366 		len = sendto(connfd, ackbuf + cursize, acksize - cursize,
1367 		    0, (struct sockaddr *)&from, fromlen);
1368 		if (len == (size_t)-1 && errno == EAGAIN) {
1369 			continue;
1370 		} else if (len > 0) {
1371 			cursize += len;
1372 			continue;
1373 		} else {
1374 			vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
1375 			    "ack: %s", strerror(errno));
1376 			break;
1377 		}
1378 	}
1379 
1380 	(void) shutdown(connfd, SHUT_RDWR);
1381 	(void) close(connfd);
1382 }
1383 
1384 /*
1385  * Process the routing socket messages and update the interfaces/IP addresses
1386  * list
1387  */
1388 /* ARGSUSED */
1389 static void
1390 vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
1391     iu_event_id_t id, void *arg)
1392 {
1393 	char			buf[BUFFSIZE];
1394 	struct ifa_msghdr	*ifam;
1395 	int			nbytes;
1396 	int			af = *(int *)arg;
1397 	boolean_t		scanif = _B_FALSE;
1398 
1399 	for (;;) {
1400 		nbytes = read(s, buf, sizeof (buf));
1401 		if (nbytes <= 0) {
1402 			/* No more messages */
1403 			break;
1404 		}
1405 
1406 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1407 		ifam = (struct ifa_msghdr *)buf;
1408 		if (ifam->ifam_version != RTM_VERSION) {
1409 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
1410 			    "not understood", ifam->ifam_version);
1411 			break;
1412 		}
1413 
1414 		vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
1415 		    rtm_event2str(ifam->ifam_type));
1416 
1417 		switch (ifam->ifam_type) {
1418 		case RTM_FREEADDR:
1419 		case RTM_CHGADDR:
1420 		case RTM_NEWADDR:
1421 		case RTM_DELADDR:
1422 			/*
1423 			 * An IP address has been created/updated/deleted or
1424 			 * brought up/down, re-initilialize the interface/IP
1425 			 * address list.
1426 			 */
1427 			scanif = _B_TRUE;
1428 			break;
1429 		default:
1430 			/* Not interesting */
1431 			break;
1432 		}
1433 	}
1434 
1435 	if (scanif)
1436 		vrrpd_scan(af);
1437 }
1438 
1439 /*
1440  * Periodically scan the interface/IP addresses on the system.
1441  */
1442 /* ARGSUSED */
1443 static void
1444 vrrpd_scan_timer(iu_tq_t *tq, void *arg)
1445 {
1446 	vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
1447 	vrrpd_scan(AF_INET);
1448 	vrrpd_scan(AF_INET6);
1449 }
1450 
1451 /*
1452  * Get the list of the interface/IP addresses of the specified address
1453  * family.
1454  */
1455 static void
1456 vrrpd_scan(int af)
1457 {
1458 	vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));
1459 
1460 again:
1461 	vrrpd_init_ipcache(af);
1462 
1463 	/*
1464 	 * If interface index changes, walk again.
1465 	 */
1466 	if (icfg_iterate_if(af, ICFG_PLUMBED, NULL,
1467 	    vrrpd_walk_ipaddr) != ICFG_SUCCESS)
1468 		goto again;
1469 
1470 	vrrpd_update_ipcache(af);
1471 }
1472 
1473 /*
1474  * First mark all IP addresses of the specific address family to be removed.
1475  * This flag will then be cleared when we walk up all the IP addresses.
1476  */
1477 static void
1478 vrrpd_init_ipcache(int af)
1479 {
1480 	vrrp_intf_t	*intf, *next_intf;
1481 	vrrp_ip_t	*ip, *nextip;
1482 	char		abuf[INET6_ADDRSTRLEN];
1483 
1484 	vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));
1485 
1486 	next_intf = TAILQ_FIRST(&vrrp_intf_list);
1487 	while ((intf = next_intf) != NULL) {
1488 		next_intf = TAILQ_NEXT(intf, vvi_next);
1489 		if (intf->vvi_af != af)
1490 			continue;
1491 
1492 		/*
1493 		 * If the interface is still marked as new, it means that this
1494 		 * vrrpd_init_ipcache() call is a result of ifindex change,
1495 		 * which causes the re-walk of all the interfaces (see
1496 		 * vrrpd_add_ipaddr()), and some interfaces are still marked
1497 		 * as new during the last walk. In this case, delete this
1498 		 * interface with the "update_vr" argument to be _B_FALSE,
1499 		 * since no VRRP router has been assoicated with this
1500 		 * interface yet (the association is done in
1501 		 * vrrpd_update_ipcache()).
1502 		 *
1503 		 * This interface will be re-added later if it still exists.
1504 		 */
1505 		if (intf->vvi_state == NODE_STATE_NEW) {
1506 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
1507 			    "(%d), may be added later", intf->vvi_ifname,
1508 			    intf->vvi_ifindex);
1509 			vrrpd_delete_if(intf, _B_FALSE);
1510 			continue;
1511 		}
1512 
1513 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1514 		    ip = nextip) {
1515 			nextip = TAILQ_NEXT(ip, vip_next);
1516 			/* LINTED E_CONSTANT_CONDITION */
1517 			VRRPADDR2STR(af, &ip->vip_addr, abuf,
1518 			    INET6_ADDRSTRLEN, _B_FALSE);
1519 
1520 			if (ip->vip_state != NODE_STATE_NEW) {
1521 				vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
1522 				    "%s(%s/0x%x))", intf->vvi_ifname,
1523 				    intf->vvi_ifindex, ip->vip_lifname,
1524 				    abuf, ip->vip_flags);
1525 				ip->vip_state = NODE_STATE_STALE;
1526 				continue;
1527 			}
1528 
1529 			/*
1530 			 * If the IP is still marked as new, it means that
1531 			 * this vrrpd_init_ipcache() call is a result of
1532 			 * ifindex change, which causes the re-walk of all
1533 			 * the IP addresses (see vrrpd_add_ipaddr()).
1534 			 * Delete this IP.
1535 			 *
1536 			 * This IP will be readded later if it still exists.
1537 			 */
1538 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
1539 			    "%s/%d , %s(%s)", intf->vvi_ifname,
1540 			    intf->vvi_ifindex, ip->vip_lifname, abuf);
1541 			vrrpd_delete_ip(intf, ip);
1542 		}
1543 	}
1544 }
1545 
1546 /*
1547  * Walk all the IP addresses on the given interface and update its
1548  * addresses list. Return ICFG_FAILURE if it is required to walk
1549  * all the interfaces again (one of the interface index changes in between).
1550  */
1551 /* ARGSUSED */
1552 static int
1553 vrrpd_walk_ipaddr(icfg_if_t *intf, void *arg)
1554 {
1555 	icfg_handle_t	ih;
1556 	int		ifindex;
1557 	vrrp_addr_t	addr;
1558 	socklen_t	addrlen = (socklen_t)sizeof (struct sockaddr_in6);
1559 	int		prefixlen;
1560 	uint64_t	flags;
1561 	int		err = ICFG_SUCCESS;
1562 
1563 	vrrp_log(VRRP_DBG0, "vrrpd_walk_ipaddr(%s, %s)", intf->if_name,
1564 	    af_str(intf->if_protocol));
1565 
1566 	if (icfg_open(&ih, intf) != ICFG_SUCCESS) {
1567 		vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s): icfg_open() "
1568 		    "failed: %s", intf->if_name, af_str(intf->if_protocol),
1569 		    strerror(errno));
1570 		return (err);
1571 	}
1572 
1573 	if (icfg_get_flags(ih, &flags) != ICFG_SUCCESS) {
1574 		if (errno != ENXIO && errno != ENOENT) {
1575 			vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s): "
1576 			    "icfg_get_flags() failed %s", intf->if_name,
1577 			    af_str(intf->if_protocol), strerror(errno));
1578 		}
1579 		goto done;
1580 	}
1581 
1582 	/*
1583 	 * skip virtual/IPMP/P2P interfaces.
1584 	 */
1585 	if ((flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) != 0) {
1586 		vrrp_log(VRRP_DBG0, "vrrpd_walk_ipaddr(%s, %s) skipped",
1587 		    intf->if_name, af_str(intf->if_protocol));
1588 		goto done;
1589 	}
1590 
1591 	if (icfg_get_index(ih, &ifindex) != ICFG_SUCCESS) {
1592 		if (errno != ENXIO && errno != ENOENT) {
1593 			vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s) "
1594 			    "icfg_get_index() failed: %s", intf->if_name,
1595 			    af_str(intf->if_protocol), strerror(errno));
1596 		}
1597 		goto done;
1598 	}
1599 
1600 	if (icfg_get_addr(ih, (struct sockaddr *)&addr, &addrlen,
1601 	    &prefixlen, _B_FALSE) != ICFG_SUCCESS) {
1602 		if (errno != ENXIO && errno != ENOENT) {
1603 			vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s) "
1604 			    "icfg_get_addr() failed: %s", intf->if_name,
1605 			    af_str(intf->if_protocol), strerror(errno));
1606 		}
1607 		goto done;
1608 	}
1609 
1610 	/*
1611 	 * Filter out the all-zero IP address.
1612 	 */
1613 	if (VRRPADDR_UNSPECIFIED(intf->if_protocol, &addr))
1614 		goto done;
1615 
1616 	/*
1617 	 * The interface is unplumbed/replumbed during we walk the IP
1618 	 * addresses. Try walk the IP addresses one more time.
1619 	 */
1620 	if (vrrpd_add_ipaddr(intf->if_name, intf->if_protocol,
1621 	    &addr, ifindex, flags) == VRRP_EAGAIN)
1622 		err = ICFG_FAILURE;
1623 
1624 done:
1625 	icfg_close(ih);
1626 	return (err);
1627 }
1628 
1629 /*
1630  * Given the information of each IP address, update the interface and
1631  * IP addresses list
1632  */
1633 static vrrp_err_t
1634 vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
1635     uint64_t flags)
1636 {
1637 	char		ifname[LIFNAMSIZ], *c;
1638 	vrrp_intf_t	*intf;
1639 	vrrp_ip_t	*ip;
1640 	char		abuf[INET6_ADDRSTRLEN];
1641 	vrrp_err_t	err;
1642 
1643 	/* LINTED E_CONSTANT_CONDITION */
1644 	VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
1645 	vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
1646 	    abuf, ifindex, flags);
1647 
1648 	/*
1649 	 * Get the physical interface name from the logical interface name.
1650 	 */
1651 	(void) strlcpy(ifname, lifname, sizeof (ifname));
1652 	if ((c = strchr(ifname, ':')) != NULL)
1653 		*c = '\0';
1654 
1655 	if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
1656 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
1657 		err = vrrpd_create_if(ifname, af, ifindex, &intf);
1658 		if (err != VRRP_SUCCESS)
1659 			return (err);
1660 	} else if (intf->vvi_ifindex != ifindex) {
1661 		/*
1662 		 * If index changes, it means that this interface is
1663 		 * unplumbed/replumbed since we last checked. If this
1664 		 * interface is not used by any VRRP router, just
1665 		 * update its ifindex, and the IP addresses list will
1666 		 * be updated later. Otherwise, return EAGAIN to rewalk
1667 		 * all the IP addresses from the beginning.
1668 		 */
1669 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
1670 		    "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
1671 		if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
1672 			intf->vvi_ifindex = ifindex;
1673 		} else {
1674 			/*
1675 			 * delete this interface from the list if this
1676 			 * interface has already been assoicated with
1677 			 * any VRRP routers.
1678 			 */
1679 			vrrpd_delete_if(intf, _B_TRUE);
1680 			return (VRRP_EAGAIN);
1681 		}
1682 	}
1683 
1684 	/*
1685 	 * Does this IP address already exist?
1686 	 */
1687 	TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
1688 		if (strcmp(ip->vip_lifname, lifname) == 0)
1689 			break;
1690 	}
1691 
1692 	if (ip != NULL) {
1693 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
1694 		    lifname, abuf);
1695 		ip->vip_state = NODE_STATE_NONE;
1696 		ip->vip_flags = flags;
1697 		if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
1698 			/*
1699 			 * Address has been changed, mark it as new
1700 			 * If this address is already selected as the
1701 			 * primary IP address, the new IP will be checked
1702 			 * to see whether it is still qualified as the
1703 			 * primary IP address. If not, the primary IP
1704 			 * address will be reselected.
1705 			 */
1706 			(void) memcpy(&ip->vip_addr, addr,
1707 			    sizeof (vrrp_addr_t));
1708 
1709 			ip->vip_state = NODE_STATE_NEW;
1710 		}
1711 	} else {
1712 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
1713 		    lifname, abuf);
1714 
1715 		err = vrrpd_create_ip(intf, lifname, addr, flags);
1716 		if (err != VRRP_SUCCESS)
1717 			return (err);
1718 	}
1719 	return (VRRP_SUCCESS);
1720 }
1721 
1722 /*
1723  * Update the interface and IP addresses list. Remove the ones that have been
1724  * staled since last time we walk the IP addresses and updated the ones that
1725  * have been changed.
1726  */
1727 static void
1728 vrrpd_update_ipcache(int af)
1729 {
1730 	vrrp_intf_t	*intf, *nextif;
1731 	vrrp_ip_t	*ip, *nextip;
1732 	char		abuf[INET6_ADDRSTRLEN];
1733 	boolean_t	primary_selected;
1734 	boolean_t	primary_now_selected;
1735 	boolean_t	need_reenable = _B_FALSE;
1736 
1737 	vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));
1738 
1739 	nextif = TAILQ_FIRST(&vrrp_intf_list);
1740 	while ((intf = nextif) != NULL) {
1741 		nextif = TAILQ_NEXT(intf, vvi_next);
1742 		if (intf->vvi_af != af)
1743 			continue;
1744 
1745 		/*
1746 		 * Does the interface already select its primary IP address?
1747 		 */
1748 		primary_selected = (intf->vvi_pip != NULL);
1749 		assert(!primary_selected || IS_PRIMARY_INTF(intf));
1750 
1751 		/*
1752 		 * Removed the IP addresses that have been unconfigured.
1753 		 */
1754 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1755 		    ip = nextip) {
1756 			nextip = TAILQ_NEXT(ip, vip_next);
1757 			if (ip->vip_state != NODE_STATE_STALE)
1758 				continue;
1759 
1760 			/* LINTED E_CONSTANT_CONDITION */
1761 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1762 			    _B_FALSE);
1763 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
1764 			    "is removed over %s", abuf, intf->vvi_ifname);
1765 			vrrpd_delete_ip(intf, ip);
1766 		}
1767 
1768 		/*
1769 		 * No IP addresses left, delete this interface.
1770 		 */
1771 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1772 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1773 			    "no IP left over %s", intf->vvi_ifname);
1774 			vrrpd_delete_if(intf, _B_TRUE);
1775 			continue;
1776 		}
1777 
1778 		/*
1779 		 * If this is selected ss the physical interface for any
1780 		 * VRRP router, reselect the primary address if needed.
1781 		 */
1782 		if (IS_PRIMARY_INTF(intf)) {
1783 			vrrpd_reselect_primary(intf);
1784 			primary_now_selected = (intf->vvi_pip != NULL);
1785 
1786 			/*
1787 			 * Cannot find the new primary IP address.
1788 			 */
1789 			if (primary_selected && !primary_now_selected) {
1790 				vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
1791 				    "reselect primary IP on %s failed",
1792 				    intf->vvi_ifname);
1793 				vrrpd_remove_if(intf, _B_TRUE);
1794 			} else if (!primary_selected && primary_now_selected) {
1795 				/*
1796 				 * The primary IP address is successfully
1797 				 * selected on the physical interfacew we
1798 				 * need to walk through all the VRRP routers
1799 				 * that is created on this physical interface
1800 				 * and see whether they can now be enabled.
1801 				 */
1802 				need_reenable = _B_TRUE;
1803 			}
1804 		}
1805 
1806 		/*
1807 		 * For every new virtual IP address, bring up/down it based
1808 		 * on the state of VRRP router.
1809 		 *
1810 		 * Note that it is fine to not update the IP's vip_flags field
1811 		 * even if vrrpd_virtualip_updateone() changed the address's
1812 		 * up/down state, since the vip_flags field is only used for
1813 		 * select primary IP address over a physical interface, and
1814 		 * vrrpd_virtualip_updateone() only affects the virtual IP
1815 		 * address's status.
1816 		 */
1817 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1818 		    ip = nextip) {
1819 			nextip = TAILQ_NEXT(ip, vip_next);
1820 			/* LINTED E_CONSTANT_CONDITION */
1821 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1822 			    _B_FALSE);
1823 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1824 			    "IP %s over %s%s", abuf, intf->vvi_ifname,
1825 			    ip->vip_state == NODE_STATE_NEW ? " is new" : "");
1826 
1827 			if (IS_VIRTUAL_INTF(intf)) {
1828 				/*
1829 				 * If this IP is new, update its up/down state
1830 				 * based on the virtual interface's state
1831 				 * (which is determined by the VRRP router's
1832 				 * state). Otherwise, check only and prompt
1833 				 * warnings if its up/down state has been
1834 				 * changed.
1835 				 */
1836 				if (vrrpd_virtualip_updateone(intf, ip,
1837 				    ip->vip_state == NODE_STATE_NONE) !=
1838 				    VRRP_SUCCESS) {
1839 					vrrp_log(VRRP_DBG0,
1840 					    "vrrpd_update_ipcache(): "
1841 					    "IP %s over %s update failed", abuf,
1842 					    intf->vvi_ifname);
1843 					vrrpd_delete_ip(intf, ip);
1844 					continue;
1845 				}
1846 			}
1847 			ip->vip_state = NODE_STATE_NONE;
1848 		}
1849 
1850 		/*
1851 		 * The IP address is deleted when it is failed to be brought
1852 		 * up. If no IP addresses are left, delete this interface.
1853 		 */
1854 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1855 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1856 			    "no IP left over %s", intf->vvi_ifname);
1857 			vrrpd_delete_if(intf, _B_TRUE);
1858 			continue;
1859 		}
1860 
1861 		if (intf->vvi_state == NODE_STATE_NEW) {
1862 			/*
1863 			 * A new interface is found. This interface can be
1864 			 * the primary interface or the virtual VNIC
1865 			 * interface.  Again, we need to walk throught all
1866 			 * the VRRP routers to see whether some of them can
1867 			 * now be enabled because of the new primary IP
1868 			 * address or the new virtual IP addresses.
1869 			 */
1870 			intf->vvi_state = NODE_STATE_NONE;
1871 			need_reenable = _B_TRUE;
1872 		}
1873 	}
1874 
1875 	if (need_reenable)
1876 		vrrpd_reenable_all_vr();
1877 }
1878 
1879 /*
1880  * Reselect primary IP if:
1881  * - The existing primary IP is no longer qualified (removed or it is down or
1882  *   not a link-local IP for IPv6 VRRP router);
1883  * - This is a physical interface but no primary IP is chosen;
1884  */
1885 static void
1886 vrrpd_reselect_primary(vrrp_intf_t *intf)
1887 {
1888 	vrrp_ip_t	*ip;
1889 	char		abuf[INET6_ADDRSTRLEN];
1890 
1891 	assert(IS_PRIMARY_INTF(intf));
1892 
1893 	/*
1894 	 * If the interface's old primary IP address is still valid, return
1895 	 */
1896 	if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
1897 		return;
1898 
1899 	if (ip != NULL) {
1900 		/* LINTED E_CONSTANT_CONDITION */
1901 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1902 		    sizeof (abuf), _B_FALSE);
1903 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1904 		    "is no longer qualified", intf->vvi_ifname, abuf);
1905 	}
1906 
1907 	ip = vrrpd_select_primary(intf);
1908 	intf->vvi_pip = ip;
1909 
1910 	if (ip != NULL) {
1911 		/* LINTED E_CONSTANT_CONDITION */
1912 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1913 		    sizeof (abuf), _B_FALSE);
1914 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1915 		    "is selected", intf->vvi_ifname, abuf);
1916 	}
1917 }
1918 
1919 /*
1920  * Select the primary IP address. Since the link-local IP address is always
1921  * at the head of the IP address list, try to find the first UP IP address
1922  * and see whether it qualify.
1923  */
1924 static vrrp_ip_t *
1925 vrrpd_select_primary(vrrp_intf_t *pif)
1926 {
1927 	vrrp_ip_t	*pip;
1928 	char		abuf[INET6_ADDRSTRLEN];
1929 
1930 	vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);
1931 
1932 	TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
1933 		assert(pip->vip_state != NODE_STATE_STALE);
1934 
1935 		/* LINTED E_CONSTANT_CONDITION */
1936 		VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
1937 		    INET6_ADDRSTRLEN, _B_FALSE);
1938 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
1939 		    pif->vvi_ifname, abuf,
1940 		    (pip->vip_flags & IFF_UP) ? "up" : "down");
1941 
1942 		if (pip->vip_flags & IFF_UP)
1943 			break;
1944 	}
1945 
1946 	/*
1947 	 * Is this valid primary IP address?
1948 	 */
1949 	if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
1950 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
1951 		    pif->vvi_ifname, af_str(pif->vvi_af));
1952 		return (NULL);
1953 	}
1954 	return (pip);
1955 }
1956 
1957 /*
1958  * This is a new interface. Check whether any VRRP router is waiting for it
1959  */
1960 static void
1961 vrrpd_reenable_all_vr()
1962 {
1963 	vrrp_vr_t *vr;
1964 
1965 	vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");
1966 
1967 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1968 		if (vr->vvr_conf.vvc_enabled)
1969 			(void) vrrpd_enable_vr(vr);
1970 	}
1971 }
1972 
1973 /*
1974  * If primary_addr_gone is _B_TRUE, it means that we failed to select
1975  * the primary IP address on this (physical) interface; otherwise,
1976  * it means the interface is no longer available.
1977  */
1978 static void
1979 vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
1980 {
1981 	vrrp_vr_t *vr;
1982 
1983 	vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
1984 	    primary_addr_gone ? "primary address gone" : "interface deleted");
1985 
1986 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1987 		if (vr->vvr_conf.vvc_enabled)
1988 			vrrpd_disable_vr(vr, intf, primary_addr_gone);
1989 	}
1990 }
1991 
1992 /*
1993  * Update the VRRP configuration file based on the given configuration.
1994  * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
1995  */
1996 static vrrp_err_t
1997 vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
1998 {
1999 	vrrp_vr_conf_t	conf;
2000 	FILE		*fp, *nfp;
2001 	int		nfd;
2002 	char		line[LINE_MAX];
2003 	char		newfile[MAXPATHLEN];
2004 	boolean_t	found = _B_FALSE;
2005 	vrrp_err_t	err = VRRP_SUCCESS;
2006 
2007 	vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
2008 	    op == VRRP_CONF_UPDATE ? "update" : "delete");
2009 
2010 	if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
2011 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2012 		    vrrpd_conffile, strerror(errno));
2013 		return (VRRP_EDB);
2014 	}
2015 
2016 	(void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
2017 	if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
2018 	    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
2019 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2020 		    newfile, strerror(errno));
2021 		(void) fclose(fp);
2022 		return (VRRP_EDB);
2023 	}
2024 
2025 	if ((nfp = fdopen(nfd, "wF")) == NULL) {
2026 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
2027 		    newfile, strerror(errno));
2028 		goto done;
2029 	}
2030 
2031 	while (fgets(line, sizeof (line), fp) != NULL) {
2032 		conf.vvc_vrid = VRRP_VRID_NONE;
2033 		if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
2034 		    VRRP_SUCCESS) {
2035 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
2036 			    "configuration format: %s", line);
2037 			goto done;
2038 		}
2039 
2040 		/*
2041 		 * Write this line out if:
2042 		 * - this is a comment line; or
2043 		 * - we've done updating/deleting the the given VR; or
2044 		 * - if the name of the VR read from this line does not match
2045 		 *   the VR name that we are about to update/delete;
2046 		 */
2047 		if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
2048 		    strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
2049 			if (fputs(line, nfp) != EOF)
2050 				continue;
2051 
2052 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2053 			    "write line %s", line);
2054 			err = VRRP_EDB;
2055 			goto done;
2056 		}
2057 
2058 		/*
2059 		 * Otherwise, update/skip the line.
2060 		 */
2061 		found = _B_TRUE;
2062 		if (op == VRRP_CONF_DELETE)
2063 			continue;
2064 
2065 		assert(op == VRRP_CONF_UPDATE);
2066 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2067 		    newconf)) != VRRP_SUCCESS) {
2068 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2069 			    "update configuration for %s", newconf->vvc_name);
2070 			goto done;
2071 		}
2072 		if (fputs(line, nfp) == EOF) {
2073 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2074 			    "write line %s", line);
2075 			err = VRRP_EDB;
2076 			goto done;
2077 		}
2078 	}
2079 
2080 	/*
2081 	 * If we get to the end of the file and have not seen the router that
2082 	 * we are about to update, write it out.
2083 	 */
2084 	if (!found && op == VRRP_CONF_UPDATE) {
2085 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2086 		    newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
2087 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2088 			    "write line %s", line);
2089 			err = VRRP_EDB;
2090 		}
2091 	} else if (!found && op == VRRP_CONF_DELETE) {
2092 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
2093 		    "configuation for %s", newconf->vvc_name);
2094 		err = VRRP_ENOTFOUND;
2095 	}
2096 
2097 	if (err != VRRP_SUCCESS)
2098 		goto done;
2099 
2100 	if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
2101 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2102 		    "rename file %s", newfile);
2103 		err = VRRP_EDB;
2104 	}
2105 
2106 done:
2107 	(void) fclose(fp);
2108 	(void) fclose(nfp);
2109 	(void) unlink(newfile);
2110 	return (err);
2111 }
2112 
2113 static vrrp_err_t
2114 vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
2115 {
2116 	vrrp_prop_t	*prop;
2117 	int		n, i;
2118 
2119 	vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);
2120 
2121 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2122 		prop = &vrrp_prop_info_tbl[i];
2123 		n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
2124 		    prop->vs_propname);
2125 		if (n < 0 || n >= len)
2126 			break;
2127 		len -= n;
2128 		line += n;
2129 		n = prop->vs_propwrite(conf, line, len);
2130 		if (n < 0 || n >= len)
2131 			break;
2132 		len -= n;
2133 		line += n;
2134 	}
2135 	if (i != VRRP_PROP_INFO_TABSIZE) {
2136 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2137 		    "small", conf->vvc_name);
2138 		return (VRRP_EDB);
2139 	}
2140 	n = snprintf(line, len, "\n");
2141 	if (n < 0 || n >= len) {
2142 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2143 		    "small", conf->vvc_name);
2144 		return (VRRP_EDB);
2145 	}
2146 	return (VRRP_SUCCESS);
2147 }
2148 
2149 static vrrp_err_t
2150 vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
2151 {
2152 	char		*str, *token;
2153 	char		*next;
2154 	vrrp_err_t	err = VRRP_SUCCESS;
2155 	char		tmpbuf[MAXLINELEN];
2156 
2157 	str = tmpbuf;
2158 	(void) strlcpy(tmpbuf, line, MAXLINELEN);
2159 
2160 	/*
2161 	 * Skip leading spaces, blank lines, and comments.
2162 	 */
2163 	skip_whitespace(str);
2164 	if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
2165 		conf->vvc_vrid = VRRP_VRID_NONE;
2166 		return (VRRP_SUCCESS);
2167 	}
2168 
2169 	/*
2170 	 * Read each VR properties.
2171 	 */
2172 	for (token = strtok_r(str, " \n\t", &next); token != NULL;
2173 	    token = strtok_r(NULL, " \n\t", &next)) {
2174 		if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
2175 			break;
2176 	}
2177 
2178 	/* All properties read but no VRID defined */
2179 	if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
2180 		err = VRRP_EINVAL;
2181 
2182 	return (err);
2183 }
2184 
2185 static vrrp_err_t
2186 vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
2187 {
2188 	vrrp_prop_t	*prop;
2189 	char		*pstr;
2190 	int		i;
2191 
2192 	if ((pstr = strchr(str, '=')) == NULL) {
2193 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2194 		return (VRRP_EINVAL);
2195 	}
2196 
2197 	*pstr++ = '\0';
2198 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2199 		prop = &vrrp_prop_info_tbl[i];
2200 		if (strcasecmp(str, prop->vs_propname) == 0) {
2201 			if (prop->vs_propread(conf, pstr))
2202 				break;
2203 		}
2204 	}
2205 
2206 	if (i == VRRP_PROP_INFO_TABSIZE) {
2207 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2208 		return (VRRP_EINVAL);
2209 	}
2210 
2211 	return (VRRP_SUCCESS);
2212 }
2213 
2214 static boolean_t
2215 vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
2216 {
2217 	size_t size = sizeof (conf->vvc_name);
2218 	return (strlcpy(conf->vvc_name, str, size) < size);
2219 }
2220 
2221 static boolean_t
2222 vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
2223 {
2224 	conf->vvc_vrid = strtol(str, NULL, 0);
2225 	return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
2226 	    conf->vvc_vrid > VRRP_VRID_MAX ||
2227 	    (conf->vvc_vrid == 0 && errno != 0)));
2228 }
2229 
2230 static boolean_t
2231 vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
2232 {
2233 	if (strcasecmp(str, "AF_INET") == 0)
2234 		conf->vvc_af = AF_INET;
2235 	else if (strcasecmp(str, "AF_INET6") == 0)
2236 		conf->vvc_af = AF_INET6;
2237 	else
2238 		return (_B_FALSE);
2239 	return (_B_TRUE);
2240 }
2241 
2242 static boolean_t
2243 vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
2244 {
2245 	conf->vvc_pri = strtol(str, NULL, 0);
2246 	return (!(conf->vvc_pri < VRRP_PRI_MIN ||
2247 	    conf->vvc_pri > VRRP_PRI_OWNER ||
2248 	    (conf->vvc_pri == 0 && errno != 0)));
2249 }
2250 
2251 static boolean_t
2252 vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
2253 {
2254 	conf->vvc_adver_int = strtol(str, NULL, 0);
2255 	return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2256 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
2257 	    (conf->vvc_adver_int == 0 && errno != 0)));
2258 }
2259 
2260 static boolean_t
2261 vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
2262 {
2263 	if (strcasecmp(str, "true") == 0)
2264 		conf->vvc_preempt = _B_TRUE;
2265 	else if (strcasecmp(str, "false") == 0)
2266 		conf->vvc_preempt = _B_FALSE;
2267 	else
2268 		return (_B_FALSE);
2269 	return (_B_TRUE);
2270 }
2271 
2272 static boolean_t
2273 vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
2274 {
2275 	if (strcasecmp(str, "true") == 0)
2276 		conf->vvc_accept = _B_TRUE;
2277 	else if (strcasecmp(str, "false") == 0)
2278 		conf->vvc_accept = _B_FALSE;
2279 	else
2280 		return (_B_FALSE);
2281 	return (_B_TRUE);
2282 }
2283 
2284 static boolean_t
2285 vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
2286 {
2287 	if (strcasecmp(str, "enabled") == 0)
2288 		conf->vvc_enabled = _B_TRUE;
2289 	else if (strcasecmp(str, "disabled") == 0)
2290 		conf->vvc_enabled = _B_FALSE;
2291 	else
2292 		return (_B_FALSE);
2293 	return (_B_TRUE);
2294 }
2295 
2296 static boolean_t
2297 vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
2298 {
2299 	size_t size = sizeof (conf->vvc_link);
2300 	return (strlcpy(conf->vvc_link, str, size) < size);
2301 }
2302 
2303 static int
2304 vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
2305 {
2306 	return (snprintf(str, size, "%s", conf->vvc_name));
2307 }
2308 
2309 static int
2310 vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
2311 {
2312 	return (snprintf(str, size, "%d", conf->vvc_pri));
2313 }
2314 
2315 static int
2316 vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
2317 {
2318 	return (snprintf(str, size, "%d", conf->vvc_adver_int));
2319 }
2320 
2321 static int
2322 vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
2323 {
2324 	return (snprintf(str, size, "%s",
2325 	    conf->vvc_preempt ? "true" : "false"));
2326 }
2327 
2328 static int
2329 vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
2330 {
2331 	return (snprintf(str, size, "%s",
2332 	    conf->vvc_accept ? "true" : "false"));
2333 }
2334 
2335 static int
2336 vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
2337 {
2338 	return (snprintf(str, size, "%s",
2339 	    conf->vvc_enabled ? "enabled" : "disabled"));
2340 }
2341 
2342 static int
2343 vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
2344 {
2345 	return (snprintf(str, size, "%d", conf->vvc_vrid));
2346 }
2347 
2348 static int
2349 vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
2350 {
2351 	return (snprintf(str, size, "%s",
2352 	    conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
2353 }
2354 
2355 static int
2356 vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
2357 {
2358 	return (snprintf(str, size, "%s", conf->vvc_link));
2359 }
2360 
2361 static char *
2362 af_str(int af)
2363 {
2364 	if (af == 4 || af == AF_INET)
2365 		return ("AF_INET");
2366 	else if (af == 6 || af == AF_INET6)
2367 		return ("AF_INET6");
2368 	else if (af == AF_UNSPEC)
2369 		return ("AF_UNSPEC");
2370 	else
2371 		return ("AF_error");
2372 }
2373 
2374 static vrrp_err_t
2375 vrrpd_create_vr(vrrp_vr_conf_t *conf)
2376 {
2377 	vrrp_vr_t	*vr;
2378 
2379 	vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);
2380 
2381 	if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
2382 		vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
2383 		    " failed", conf->vvc_name);
2384 		return (VRRP_ENOMEM);
2385 	}
2386 
2387 	bzero(vr, sizeof (vrrp_vr_t));
2388 	vr->vvr_state = VRRP_STATE_NONE;
2389 	vr->vvr_timer_id = -1;
2390 	vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
2391 	(void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
2392 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2393 	TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
2394 	return (VRRP_SUCCESS);
2395 }
2396 
2397 static void
2398 vrrpd_delete_vr(vrrp_vr_t *vr)
2399 {
2400 	vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
2401 	if (vr->vvr_conf.vvc_enabled)
2402 		vrrpd_disable_vr(vr, NULL, _B_FALSE);
2403 	assert(vr->vvr_state == VRRP_STATE_INIT);
2404 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
2405 	TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
2406 	(void) free(vr);
2407 }
2408 
2409 static vrrp_err_t
2410 vrrpd_enable_vr(vrrp_vr_t *vr)
2411 {
2412 	vrrp_err_t	rx_err, tx_err, err = VRRP_EINVAL;
2413 
2414 	vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);
2415 
2416 	assert(vr->vvr_conf.vvc_enabled);
2417 
2418 	/*
2419 	 * This VRRP router has been successfully enabled and start
2420 	 * participating.
2421 	 */
2422 	if (vr->vvr_state != VRRP_STATE_INIT)
2423 		return (VRRP_SUCCESS);
2424 
2425 	if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
2426 		/*
2427 		 * Select the primary IP address. Even if this time
2428 		 * primary IP selection failed, we will reselect the
2429 		 * primary IP address when new IP address comes up.
2430 		 */
2431 		vrrpd_reselect_primary(vr->vvr_pif);
2432 		if (vr->vvr_pif->vvi_pip == NULL) {
2433 			vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
2434 			    "select_primary over %s failed",
2435 			    vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
2436 			rx_err = VRRP_ENOPRIM;
2437 		}
2438 	}
2439 
2440 	/*
2441 	 * Initialize the TX socket used for this vrrp_vr_t to send the
2442 	 * multicast packets.
2443 	 */
2444 	tx_err = vrrpd_init_txsock(vr);
2445 
2446 	/*
2447 	 * Only start the state transition if sockets for both RX and TX are
2448 	 * initialized correctly.
2449 	 */
2450 	if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
2451 		/*
2452 		 * Record the error information for diagnose purpose.
2453 		 */
2454 		vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
2455 		return (err);
2456 	}
2457 
2458 	if (vr->vvr_conf.vvc_pri == 255)
2459 		err = vrrpd_state_i2m(vr);
2460 	else
2461 		err = vrrpd_state_i2b(vr);
2462 
2463 	if (err != VRRP_SUCCESS) {
2464 		vr->vvr_err = err;
2465 		vr->vvr_pif->vvi_pip = NULL;
2466 		vrrpd_fini_txsock(vr);
2467 		vrrpd_fini_rxsock(vr);
2468 	}
2469 	return (err);
2470 }
2471 
2472 /*
2473  * Given the removed interface, see whether the given VRRP router would
2474  * be affected and stop participating the VRRP protocol.
2475  *
2476  * If intf is NULL, VR disabling request is coming from the admin.
2477  */
2478 static void
2479 vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
2480 {
2481 	vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
2482 	    intf == NULL ? "requested by admin" : intf->vvi_ifname,
2483 	    intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
2484 	    "interface deleted"));
2485 
2486 	/*
2487 	 * An interface is deleted, see whether this interface is the
2488 	 * physical interface or the VNIC of the given VRRP router.
2489 	 * If so, continue to disable the VRRP router.
2490 	 */
2491 	if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
2492 	    (intf != vr->vvr_vif)) {
2493 		return;
2494 	}
2495 
2496 	/*
2497 	 * If this is the case that the primary IP address is gone,
2498 	 * and we failed to reselect another primary IP address,
2499 	 * continue to disable the VRRP router.
2500 	 */
2501 	if (primary_addr_gone && intf != vr->vvr_pif)
2502 		return;
2503 
2504 	vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
2505 	    vr->vvr_conf.vvc_name);
2506 
2507 	if (vr->vvr_state == VRRP_STATE_MASTER) {
2508 		/*
2509 		 * If this router is disabled by the administrator, send
2510 		 * the zero-priority advertisement to indicate the Master
2511 		 * stops participating VRRP.
2512 		 */
2513 		if (intf == NULL)
2514 			(void) vrrpd_send_adv(vr, _B_TRUE);
2515 
2516 		vrrpd_state_m2i(vr);
2517 	} else  if (vr->vvr_state == VRRP_STATE_BACKUP) {
2518 		vrrpd_state_b2i(vr);
2519 	}
2520 
2521 	/*
2522 	 * If no primary IP address can be selected, the VRRP router
2523 	 * stays at the INIT state and will become BACKUP and MASTER when
2524 	 * a primary IP address is reselected.
2525 	 */
2526 	if (primary_addr_gone) {
2527 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
2528 		    "is removed", vr->vvr_conf.vvc_name);
2529 		vr->vvr_err = VRRP_ENOPRIM;
2530 	} else if (intf == NULL) {
2531 		/*
2532 		 * The VRRP router is disable by the administrator
2533 		 */
2534 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
2535 		    vr->vvr_conf.vvc_name);
2536 		vr->vvr_err = VRRP_SUCCESS;
2537 		vrrpd_fini_txsock(vr);
2538 		vrrpd_fini_rxsock(vr);
2539 	} else if (intf == vr->vvr_pif) {
2540 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
2541 		    "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2542 		vr->vvr_err = VRRP_ENOPRIM;
2543 		vrrpd_fini_rxsock(vr);
2544 	} else if (intf == vr->vvr_vif) {
2545 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
2546 		    " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2547 		vr->vvr_err = VRRP_ENOVIRT;
2548 		vrrpd_fini_txsock(vr);
2549 	}
2550 }
2551 
2552 vrrp_err_t
2553 vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
2554 {
2555 	vrrp_err_t	err = VRRP_SUCCESS;
2556 
2557 	vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
2558 	    conf->vvc_link, conf->vvc_vrid);
2559 
2560 	assert(conf != NULL);
2561 
2562 	/*
2563 	 * Sanity check
2564 	 */
2565 	if ((strlen(conf->vvc_name) == 0) ||
2566 	    (strlen(conf->vvc_link) == 0) ||
2567 	    (conf->vvc_vrid < VRRP_VRID_MIN ||
2568 	    conf->vvc_vrid > VRRP_VRID_MAX) ||
2569 	    (conf->vvc_pri < VRRP_PRI_MIN ||
2570 	    conf->vvc_pri > VRRP_PRI_OWNER) ||
2571 	    (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2572 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
2573 	    (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
2574 	    (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
2575 		vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
2576 		    conf->vvc_name);
2577 		return (VRRP_EINVAL);
2578 	}
2579 
2580 	if (!vrrp_valid_name(conf->vvc_name)) {
2581 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
2582 		    "name", conf->vvc_name);
2583 		return (VRRP_EINVALVRNAME);
2584 	}
2585 
2586 	if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
2587 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
2588 		    conf->vvc_name);
2589 		return (VRRP_EINSTEXIST);
2590 	}
2591 
2592 	if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
2593 	    conf->vvc_af) != NULL) {
2594 		vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
2595 		    "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
2596 		    conf->vvc_link);
2597 		return (VRRP_EVREXIST);
2598 	}
2599 
2600 	if (updateconf && (err = vrrpd_updateconf(conf,
2601 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2602 		vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
2603 		    "configuration for %s", conf->vvc_name);
2604 		return (err);
2605 	}
2606 
2607 	err = vrrpd_create_vr(conf);
2608 	if (err != VRRP_SUCCESS && updateconf)
2609 		(void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);
2610 
2611 	return (err);
2612 }
2613 
2614 static vrrp_err_t
2615 vrrpd_delete(const char *vn)
2616 {
2617 	vrrp_vr_t	*vr;
2618 	vrrp_err_t	err;
2619 
2620 	vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);
2621 
2622 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2623 		vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
2624 		return (VRRP_ENOTFOUND);
2625 	}
2626 
2627 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
2628 	if (err != VRRP_SUCCESS) {
2629 		vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
2630 		    "configuration for %s", vr->vvr_conf.vvc_name);
2631 		return (err);
2632 	}
2633 
2634 	vrrpd_delete_vr(vr);
2635 	return (VRRP_SUCCESS);
2636 }
2637 
2638 static vrrp_err_t
2639 vrrpd_enable(const char *vn, boolean_t updateconf)
2640 {
2641 	vrrp_vr_t		*vr;
2642 	vrrp_vr_conf_t		*conf;
2643 	uint32_t		flags;
2644 	datalink_class_t	class;
2645 	vrrp_err_t		err = VRRP_SUCCESS;
2646 
2647 	vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);
2648 
2649 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2650 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
2651 		return (VRRP_ENOTFOUND);
2652 	}
2653 
2654 	/*
2655 	 * The VR is already enabled.
2656 	 */
2657 	conf = &vr->vvr_conf;
2658 	if (conf->vvc_enabled) {
2659 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
2660 		    "enabled", vn);
2661 		return (VRRP_EALREADY);
2662 	}
2663 
2664 	/*
2665 	 * Check whether the link exists.
2666 	 */
2667 	if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
2668 	    conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
2669 	    !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
2670 	    (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR))) {
2671 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
2672 		    vn, conf->vvc_link);
2673 		return (VRRP_EINVALLINK);
2674 	}
2675 
2676 	/*
2677 	 * Get the associated VNIC name by the given interface/vrid/
2678 	 * address famitly.
2679 	 */
2680 	err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
2681 	    conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
2682 	    sizeof (vr->vvr_vnic));
2683 	if (err != VRRP_SUCCESS) {
2684 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
2685 		    "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
2686 		    conf->vvc_link);
2687 		err = VRRP_ENOVNIC;
2688 		goto fail;
2689 	}
2690 
2691 	/*
2692 	 * Find the right VNIC, primary interface and get the list of the
2693 	 * protected IP adressses and primary IP address. Note that if
2694 	 * either interface is NULL (no IP addresses configured over the
2695 	 * interface), we will still continue and mark this VRRP router
2696 	 * as "enabled".
2697 	 */
2698 	vr->vvr_conf.vvc_enabled = _B_TRUE;
2699 	if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
2700 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2701 		vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
2702 		    "configuration for %s", vr->vvr_conf.vvc_name);
2703 		goto fail;
2704 	}
2705 
2706 	/*
2707 	 * If vrrpd_setup_vr() fails, it is possible that there is no IP
2708 	 * addresses over ether the primary interface or the VNIC yet,
2709 	 * return success in this case, the VRRP router will stay in
2710 	 * the initialized state and start to work when the IP address is
2711 	 * configured.
2712 	 */
2713 	(void) vrrpd_enable_vr(vr);
2714 	return (VRRP_SUCCESS);
2715 
2716 fail:
2717 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2718 	vr->vvr_vnic[0] = '\0';
2719 	return (err);
2720 }
2721 
2722 static vrrp_err_t
2723 vrrpd_disable(const char *vn)
2724 {
2725 	vrrp_vr_t	*vr;
2726 	vrrp_err_t	err;
2727 
2728 	vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);
2729 
2730 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2731 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
2732 		return (VRRP_ENOTFOUND);
2733 	}
2734 
2735 	/*
2736 	 * The VR is already disable.
2737 	 */
2738 	if (!vr->vvr_conf.vvc_enabled) {
2739 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
2740 		return (VRRP_EALREADY);
2741 	}
2742 
2743 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2744 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2745 	if (err != VRRP_SUCCESS) {
2746 		vr->vvr_conf.vvc_enabled = _B_TRUE;
2747 		vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
2748 		    "configuration for %s", vr->vvr_conf.vvc_name);
2749 		return (err);
2750 	}
2751 
2752 	vrrpd_disable_vr(vr, NULL, _B_FALSE);
2753 	vr->vvr_vnic[0] = '\0';
2754 	return (VRRP_SUCCESS);
2755 }
2756 
2757 static vrrp_err_t
2758 vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
2759 {
2760 	vrrp_vr_t	*vr;
2761 	vrrp_vr_conf_t	savconf;
2762 	int		pri;
2763 	boolean_t	accept, set_accept = _B_FALSE;
2764 	vrrp_err_t	err;
2765 
2766 	vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);
2767 
2768 	if (mask == 0)
2769 		return (VRRP_SUCCESS);
2770 
2771 	if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
2772 		vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
2773 		    "VR instance: %s", conf->vvc_name);
2774 		return (VRRP_ENOTFOUND);
2775 	}
2776 
2777 	if (mask & VRRP_CONF_INTERVAL) {
2778 		if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2779 		    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
2780 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2781 			    "adver_interval %d", conf->vvc_name,
2782 			    conf->vvc_adver_int);
2783 			return (VRRP_EINVAL);
2784 		}
2785 	}
2786 
2787 	pri = vr->vvr_conf.vvc_pri;
2788 	if (mask & VRRP_CONF_PRIORITY) {
2789 		if (conf->vvc_pri < VRRP_PRI_MIN ||
2790 		    conf->vvc_pri > VRRP_PRI_OWNER) {
2791 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2792 			    "priority %d", conf->vvc_name, conf->vvc_pri);
2793 			return (VRRP_EINVAL);
2794 		}
2795 		pri = conf->vvc_pri;
2796 	}
2797 
2798 	accept = vr->vvr_conf.vvc_accept;
2799 	if (mask & VRRP_CONF_ACCEPT)
2800 		accept = conf->vvc_accept;
2801 
2802 	if (pri == VRRP_PRI_OWNER && !accept) {
2803 		vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
2804 		    "true for VRRP address owner", conf->vvc_name);
2805 		return (VRRP_EINVAL);
2806 	}
2807 
2808 	if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
2809 		err = vrrpd_set_noaccept(vr, !accept);
2810 		if (err != VRRP_SUCCESS) {
2811 			vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
2812 			    "updating failed: %s", conf->vvc_name,
2813 			    vrrp_err2str(err));
2814 			return (err);
2815 		}
2816 		set_accept = _B_TRUE;
2817 	}
2818 
2819 	/*
2820 	 * Save the current configuration, so it can be restored if the
2821 	 * following fails.
2822 	 */
2823 	(void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
2824 	if (mask & VRRP_CONF_PREEMPT)
2825 		vr->vvr_conf.vvc_preempt = conf->vvc_preempt;
2826 
2827 	if (mask & VRRP_CONF_ACCEPT)
2828 		vr->vvr_conf.vvc_accept = accept;
2829 
2830 	if (mask & VRRP_CONF_PRIORITY)
2831 		vr->vvr_conf.vvc_pri = pri;
2832 
2833 	if (mask & VRRP_CONF_INTERVAL)
2834 		vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;
2835 
2836 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2837 	if (err != VRRP_SUCCESS) {
2838 		vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
2839 		    "failed: %s", conf->vvc_name, vrrp_err2str(err));
2840 		if (set_accept)
2841 			(void) vrrpd_set_noaccept(vr, accept);
2842 		(void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
2843 		return (err);
2844 	}
2845 
2846 	if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
2847 		vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
2848 
2849 	if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
2850 		vr->vvr_timeout = conf->vvc_adver_int;
2851 
2852 	return (VRRP_SUCCESS);
2853 }
2854 
2855 static void
2856 vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
2857     size_t *sizep)
2858 {
2859 	vrrp_vr_t	*vr;
2860 	char		*p = (char *)ret + sizeof (vrrp_ret_list_t);
2861 	size_t		size = (*sizep) - sizeof (vrrp_ret_list_t);
2862 
2863 	vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));
2864 
2865 	ret->vrl_cnt = 0;
2866 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
2867 		if (vrid !=  VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
2868 			continue;
2869 
2870 		if (strlen(ifname) != 0 && strcmp(ifname,
2871 		    vr->vvr_conf.vvc_link) == 0) {
2872 			continue;
2873 		}
2874 
2875 		if ((af == AF_INET || af == AF_INET6) &&
2876 		    vr->vvr_conf.vvc_af != af)
2877 			continue;
2878 
2879 		if (size < VRRP_NAME_MAX) {
2880 			vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
2881 			    "small to hold %d router names", ret->vrl_cnt);
2882 			*sizep = sizeof (vrrp_ret_list_t);
2883 			ret->vrl_err = VRRP_ETOOSMALL;
2884 			return;
2885 		}
2886 		(void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
2887 		p += (strlen(vr->vvr_conf.vvc_name) + 1);
2888 		ret->vrl_cnt++;
2889 		size -= VRRP_NAME_MAX;
2890 	}
2891 
2892 	*sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
2893 	vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
2894 	ret->vrl_err = VRRP_SUCCESS;
2895 }
2896 
2897 static void
2898 vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
2899 {
2900 	vrrp_queryinfo_t	*infop;
2901 	vrrp_vr_t		*vr;
2902 	vrrp_intf_t		*vif;
2903 	vrrp_ip_t		*ip;
2904 	struct timeval		now;
2905 	uint32_t		vipcnt = 0;
2906 	size_t			size = *sizep;
2907 
2908 	vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);
2909 
2910 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2911 		vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
2912 		*sizep = sizeof (vrrp_ret_query_t);
2913 		ret->vrq_err = VRRP_ENOTFOUND;
2914 		return;
2915 	}
2916 
2917 	/*
2918 	 * Get the virtual IP list if the router is not in the INIT state.
2919 	 */
2920 	if (vr->vvr_state != VRRP_STATE_INIT) {
2921 		vif = vr->vvr_vif;
2922 		TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2923 			vipcnt++;
2924 		}
2925 	}
2926 
2927 	*sizep = sizeof (vrrp_ret_query_t);
2928 	*sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
2929 	if (*sizep > size) {
2930 		vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
2931 		    "%d virtual IPs", vipcnt);
2932 		*sizep = sizeof (vrrp_ret_query_t);
2933 		ret->vrq_err = VRRP_ETOOSMALL;
2934 		return;
2935 	}
2936 
2937 	(void) gettimeofday(&now, NULL);
2938 
2939 	bzero(ret, *sizep);
2940 	infop = &ret->vrq_qinfo;
2941 	(void) memcpy(&infop->show_vi,
2942 	    &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
2943 	(void) memcpy(&infop->show_vs,
2944 	    &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
2945 	(void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
2946 	infop->show_vt.vt_since_last_tran = timeval_to_milli(
2947 	    timeval_delta(now, vr->vvr_sinfo.vs_st_time));
2948 
2949 	if (vr->vvr_state == VRRP_STATE_INIT) {
2950 		ret->vrq_err = VRRP_SUCCESS;
2951 		return;
2952 	}
2953 
2954 	vipcnt = 0;
2955 	TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2956 		(void) memcpy(&infop->show_va.va_vips[vipcnt++],
2957 		    &ip->vip_addr, sizeof (vrrp_addr_t));
2958 	}
2959 	infop->show_va.va_vipcnt = vipcnt;
2960 
2961 	(void) memcpy(&infop->show_va.va_primary,
2962 	    &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));
2963 
2964 	(void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));
2965 
2966 	/*
2967 	 * Check whether there is a peer.
2968 	 */
2969 	if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
2970 	    &(vr->vvr_peer.vp_addr))) {
2971 		infop->show_vt.vt_since_last_adv = timeval_to_milli(
2972 		    timeval_delta(now, vr->vvr_peer.vp_time));
2973 	}
2974 
2975 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
2976 		infop->show_vt.vt_master_down_intv =
2977 		    MASTER_DOWN_INTERVAL_VR(vr);
2978 	}
2979 
2980 	ret->vrq_err = VRRP_SUCCESS;
2981 }
2982 
2983 /*
2984  * Build the VRRP packet (not including the IP header). Return the
2985  * payload length.
2986  *
2987  * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
2988  * advertisement which is sent by the Master to indicate that it has been
2989  * stopped participating in VRRP.
2990  */
2991 static size_t
2992 vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
2993 {
2994 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2995 	vrrp_pkt_t	*vp = (vrrp_pkt_t *)buf;
2996 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2997 	struct in_addr	*a4 = (struct in_addr *)(vp + 1);
2998 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2999 	struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
3000 	vrrp_intf_t	*vif = vr->vvr_vif;
3001 	vrrp_ip_t	*vip;
3002 	int		af = vif->vvi_af;
3003 	size_t		size = sizeof (vrrp_pkt_t);
3004 	uint16_t	rsvd_adver_int;
3005 	int		nip = 0;
3006 
3007 	vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
3008 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
3009 	    vr->vvr_conf.vvc_adver_int);
3010 
3011 	TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
3012 		if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
3013 		    sizeof (struct in6_addr))) > buflen) {
3014 			vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
3015 			    "not big enough %d", vr->vvr_conf.vvc_name, size);
3016 			return (0);
3017 		}
3018 
3019 		if (af == AF_INET)
3020 			a4[nip++] = vip->vip_addr.in4.sin_addr;
3021 		else
3022 			a6[nip++] = vip->vip_addr.in6.sin6_addr;
3023 	}
3024 
3025 	if (nip == 0) {
3026 		vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
3027 		    "address", vr->vvr_conf.vvc_name);
3028 		return (0);
3029 	}
3030 
3031 	vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
3032 	vp->vp_vrid = vr->vvr_conf.vvc_vrid;
3033 	vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;
3034 
3035 	rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
3036 	vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
3037 	vp->vp_ipnum = nip;
3038 
3039 	/*
3040 	 * Set the checksum to 0 first, then caculate it.
3041 	 */
3042 	vp->vp_chksum = 0;
3043 	if (af == AF_INET) {
3044 		vp->vp_chksum = vrrp_cksum4(
3045 		    &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
3046 		    &vrrp_muladdr4.in4.sin_addr, size, vp);
3047 	} else {
3048 		vp->vp_chksum = vrrp_cksum6(
3049 		    &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3050 		    &vrrp_muladdr6.in6.sin6_addr, size, vp);
3051 	}
3052 
3053 	return (size);
3054 }
3055 
3056 /*
3057  * We need to build the IPv4 header on our own.
3058  */
3059 static vrrp_err_t
3060 vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3061 {
3062 	/* LINTED E_BAD_PTR_CAST_ALIGN */
3063 	struct ip *ip = (struct ip *)buf;
3064 	size_t plen;
3065 
3066 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);
3067 
3068 	if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
3069 	    len - sizeof (struct ip), zero_pri)) == 0) {
3070 		return (VRRP_ETOOSMALL);
3071 	}
3072 
3073 	ip->ip_hl = sizeof (struct ip) >> 2;
3074 	ip->ip_v = IPV4_VERSION;
3075 	ip->ip_tos = 0;
3076 	plen += sizeof (struct ip);
3077 	ip->ip_len = htons(plen);
3078 	ip->ip_off = 0;
3079 	ip->ip_ttl = VRRP_IP_TTL;
3080 	ip->ip_p = IPPROTO_VRRP;
3081 	ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
3082 	ip->ip_dst = vrrp_muladdr4.in4.sin_addr;
3083 
3084 	/*
3085 	 * The kernel will set the IP cksum and the IPv4 identification.
3086 	 */
3087 	ip->ip_id = 0;
3088 	ip->ip_sum = 0;
3089 
3090 	if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
3091 	    (const struct sockaddr *)&vrrp_muladdr4,
3092 	    sizeof (struct sockaddr_in))) != plen) {
3093 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
3094 		    "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
3095 		    vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
3096 		    af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
3097 		return (VRRP_ESYS);
3098 	}
3099 
3100 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
3101 	    vr->vvr_conf.vvc_name);
3102 	return (VRRP_SUCCESS);
3103 }
3104 
3105 static vrrp_err_t
3106 vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3107 {
3108 	struct msghdr msg6;
3109 	size_t hoplimit_space = 0;
3110 	size_t pktinfo_space = 0;
3111 	size_t bufspace = 0;
3112 	struct in6_pktinfo *pktinfop;
3113 	struct cmsghdr *cmsgp;
3114 	uchar_t *cmsg_datap;
3115 	struct iovec iov;
3116 	size_t plen;
3117 
3118 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);
3119 
3120 	if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
3121 		return (VRRP_ETOOSMALL);
3122 
3123 	msg6.msg_control = NULL;
3124 	msg6.msg_controllen = 0;
3125 
3126 	hoplimit_space = sizeof (int);
3127 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3128 	    hoplimit_space + _MAX_ALIGNMENT;
3129 
3130 	pktinfo_space = sizeof (struct in6_pktinfo);
3131 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3132 	    pktinfo_space + _MAX_ALIGNMENT;
3133 
3134 	/*
3135 	 * We need to temporarily set the msg6.msg_controllen to bufspace
3136 	 * (we will later trim it to actual length used). This is needed because
3137 	 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
3138 	 */
3139 	bufspace += sizeof (struct cmsghdr);
3140 	msg6.msg_controllen = bufspace;
3141 
3142 	msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
3143 	if (msg6.msg_control == NULL) {
3144 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
3145 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3146 		return (VRRP_ENOMEM);
3147 	}
3148 
3149 	cmsgp = CMSG_FIRSTHDR(&msg6);
3150 
3151 	cmsgp->cmsg_level = IPPROTO_IPV6;
3152 	cmsgp->cmsg_type = IPV6_HOPLIMIT;
3153 	cmsg_datap = CMSG_DATA(cmsgp);
3154 	/* LINTED */
3155 	*(int *)cmsg_datap = VRRP_IP_TTL;
3156 	cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
3157 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3158 
3159 	cmsgp->cmsg_level = IPPROTO_IPV6;
3160 	cmsgp->cmsg_type = IPV6_PKTINFO;
3161 	cmsg_datap = CMSG_DATA(cmsgp);
3162 
3163 	/* LINTED */
3164 	pktinfop = (struct in6_pktinfo *)cmsg_datap;
3165 	/*
3166 	 * We don't know if pktinfop->ipi6_addr is aligned properly,
3167 	 * therefore let's use bcopy, instead of assignment.
3168 	 */
3169 	(void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3170 	    &pktinfop->ipi6_addr, sizeof (struct in6_addr));
3171 
3172 	/*
3173 	 *  We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
3174 	 */
3175 	pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
3176 	cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
3177 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3178 	msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;
3179 
3180 	msg6.msg_name = &vrrp_muladdr6;
3181 	msg6.msg_namelen = sizeof (struct sockaddr_in6);
3182 
3183 	iov.iov_base = buf;
3184 	iov.iov_len = plen;
3185 	msg6.msg_iov = &iov;
3186 	msg6.msg_iovlen = 1;
3187 
3188 	if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
3189 	    (const struct msghdr *)&msg6, 0)) != plen) {
3190 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
3191 		    "%s expect %d sent %d", vr->vvr_conf.vvc_name,
3192 		    strerror(errno), plen, len);
3193 		(void) free(msg6.msg_control);
3194 		return (VRRP_ESYS);
3195 	}
3196 
3197 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
3198 	    vr->vvr_conf.vvc_name);
3199 	(void) free(msg6.msg_control);
3200 	return (VRRP_SUCCESS);
3201 }
3202 
3203 /*
3204  * Send the VRRP advertisement packets.
3205  */
3206 static vrrp_err_t
3207 vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
3208 {
3209 	uint64_t buf[(IP_MAXPACKET + 1)/8];
3210 
3211 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
3212 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");
3213 
3214 	assert(vr->vvr_pif->vvi_pip != NULL);
3215 
3216 	if (vr->vvr_pif->vvi_pip == NULL) {
3217 		vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
3218 		    "address", vr->vvr_conf.vvc_name);
3219 		return (VRRP_EINVAL);
3220 	}
3221 
3222 	if (vr->vvr_conf.vvc_af == AF_INET) {
3223 		return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
3224 		    sizeof (buf), zero_pri));
3225 	} else {
3226 		return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
3227 		    sizeof (buf), zero_pri));
3228 	}
3229 }
3230 
3231 static void
3232 vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
3233 {
3234 	vrrp_vr_conf_t *conf = &vr->vvr_conf;
3235 	char		peer[INET6_ADDRSTRLEN];
3236 	char		local[INET6_ADDRSTRLEN];
3237 	int		addr_cmp;
3238 	uint16_t	peer_adver_int;
3239 
3240 	/* LINTED E_CONSTANT_CONDITION */
3241 	VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
3242 	    _B_FALSE);
3243 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
3244 	    peer);
3245 
3246 	if (vr->vvr_state <= VRRP_STATE_INIT) {
3247 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
3248 		    "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
3249 		return;
3250 	}
3251 
3252 	peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);
3253 
3254 	/* LINTED E_CONSTANT_CONDITION */
3255 	VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
3256 	    local, INET6_ADDRSTRLEN, _B_FALSE);
3257 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
3258 	    "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
3259 	    vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
3260 	    vp->vp_prio, peer_adver_int);
3261 
3262 	addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
3263 	    &vr->vvr_pif->vvi_pip->vip_addr);
3264 	if (addr_cmp == 0) {
3265 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
3266 		    conf->vvc_name);
3267 		return;
3268 	} else if (conf->vvc_pri == vp->vp_prio) {
3269 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
3270 		    " than the local IP %s", conf->vvc_name, peer,
3271 		    addr_cmp > 0 ? "greater" : "less", local);
3272 	}
3273 
3274 	if (conf->vvc_pri == 255) {
3275 		vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
3276 		    "owner received advertisement from %s", conf->vvc_name,
3277 		    peer);
3278 		return;
3279 	}
3280 
3281 	(void) gettimeofday(&vr->vvr_peer_time, NULL);
3282 	(void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
3283 	vr->vvr_peer_prio = vp->vp_prio;
3284 	vr->vvr_peer_adver_int = peer_adver_int;
3285 
3286 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
3287 		vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
3288 		if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
3289 		    (conf->vvc_preempt == _B_FALSE ||
3290 		    vp->vp_prio >= conf->vvc_pri)) {
3291 			(void) iu_cancel_timer(vrrpd_timerq,
3292 			    vr->vvr_timer_id, NULL);
3293 			if (vp->vp_prio == VRRP_PRIO_ZERO) {
3294 				/* the master stops participating in VRRP */
3295 				vr->vvr_timeout = SKEW_TIME_VR(vr);
3296 			} else {
3297 				vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
3298 			}
3299 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3300 			    vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
3301 			    vr)) == -1) {
3302 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3303 				    "start vrrp_b2m_timeout(%d) failed",
3304 				    conf->vvc_name, vr->vvr_timeout);
3305 			} else {
3306 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3307 				    "start vrrp_b2m_timeout(%d)",
3308 				    conf->vvc_name, vr->vvr_timeout);
3309 			}
3310 		}
3311 	} else if (vr->vvr_state == VRRP_STATE_MASTER) {
3312 		if (vp->vp_prio == VRRP_PRIO_ZERO) {
3313 			(void) vrrpd_send_adv(vr, _B_FALSE);
3314 			(void) iu_cancel_timer(vrrpd_timerq,
3315 			    vr->vvr_timer_id, NULL);
3316 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3317 			    vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
3318 			    vr)) == -1) {
3319 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3320 				    "start vrrp_adv_timeout(%d) failed",
3321 				    conf->vvc_name, vr->vvr_timeout);
3322 			} else {
3323 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3324 				    "start vrrp_adv_timeout(%d)",
3325 				    conf->vvc_name, vr->vvr_timeout);
3326 			}
3327 		} else if (vp->vp_prio > conf->vvc_pri ||
3328 		    (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
3329 			(void) vrrpd_state_m2b(vr);
3330 		}
3331 	} else {
3332 		assert(_B_FALSE);
3333 	}
3334 }
3335 
3336 static vrrp_err_t
3337 vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
3338     vrrp_addr_t *from)
3339 {
3340 	vrrp_vr_t	*vr;
3341 	uint8_t		vers_type;
3342 	uint16_t	saved_cksum, cksum;
3343 	char		peer[INET6_ADDRSTRLEN];
3344 
3345 	/* LINTED E_CONSTANT_CONDITION */
3346 	VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
3347 	vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
3348 	    peer);
3349 
3350 	if (len < sizeof (vrrp_pkt_t)) {
3351 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
3352 		    "length %d", len);
3353 		return (VRRP_EINVAL);
3354 	}
3355 
3356 	/*
3357 	 * Verify: VRRP version number and packet type.
3358 	 */
3359 	vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
3360 	if (vers_type != VRRP_VERSION) {
3361 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
3362 		    "version %d", pif->vvi_ifname, vers_type);
3363 		return (VRRP_EINVAL);
3364 	}
3365 
3366 	if (vp->vp_ipnum == 0) {
3367 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
3368 		    pif->vvi_ifname);
3369 		return (VRRP_EINVAL);
3370 	}
3371 
3372 	if (len - sizeof (vrrp_pkt_t) !=
3373 	    vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
3374 	    sizeof (struct in6_addr))) {
3375 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
3376 		    " %d", pif->vvi_ifname, vp->vp_ipnum);
3377 		return (VRRP_EINVAL);
3378 	}
3379 
3380 	vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);
3381 
3382 	/*
3383 	 * verify: VRRP checksum. Note that vrrp_cksum returns network byte
3384 	 * order checksum value;
3385 	 */
3386 	saved_cksum = vp->vp_chksum;
3387 	vp->vp_chksum = 0;
3388 	if (pif->vvi_af == AF_INET) {
3389 		cksum = vrrp_cksum4(&from->in4.sin_addr,
3390 		    &vrrp_muladdr4.in4.sin_addr, len, vp);
3391 	} else {
3392 		cksum = vrrp_cksum6(&from->in6.sin6_addr,
3393 		    &vrrp_muladdr6.in6.sin6_addr, len, vp);
3394 	}
3395 
3396 	if (cksum != saved_cksum) {
3397 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
3398 		    "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
3399 		    cksum, saved_cksum);
3400 		return (VRRP_EINVAL);
3401 	}
3402 
3403 	if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
3404 	    pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
3405 		vrrpd_process_adv(vr, from, vp);
3406 	} else {
3407 		vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
3408 		    "not configured", pif->vvi_ifname, vp->vp_vrid,
3409 		    af_str(pif->vvi_af));
3410 	}
3411 	return (VRRP_SUCCESS);
3412 }
3413 
3414 /*
3415  * IPv4 socket, the IPv4 header is included.
3416  */
3417 static vrrp_err_t
3418 vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3419 {
3420 	char		abuf[INET6_ADDRSTRLEN];
3421 	struct ip	*ip;
3422 
3423 	vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
3424 	    pif->vvi_ifname, len);
3425 
3426 	ip = (struct ip *)msgp->msg_iov->iov_base;
3427 
3428 	/* Sanity check */
3429 	if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
3430 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
3431 		    "%d", pif->vvi_ifname, len);
3432 		return (VRRP_EINVAL);
3433 	}
3434 
3435 	assert(ip->ip_v == IPV4_VERSION);
3436 	assert(ip->ip_p == IPPROTO_VRRP);
3437 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in));
3438 
3439 	if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
3440 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3441 		    "destination %s", pif->vvi_ifname,
3442 		    inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
3443 		return (VRRP_EINVAL);
3444 	}
3445 
3446 	if (ip->ip_ttl != VRRP_IP_TTL) {
3447 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3448 		    "ttl %d", pif->vvi_ifname, ip->ip_ttl);
3449 		return (VRRP_EINVAL);
3450 	}
3451 
3452 	/*
3453 	 * Note that the ip_len contains only the IP payload length.
3454 	 */
3455 	return (vrrpd_process_vrrp(pif,
3456 	    /* LINTED E_BAD_PTR_CAST_ALIGN */
3457 	    (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
3458 	    (vrrp_addr_t *)msgp->msg_name));
3459 }
3460 
3461 /*
3462  * IPv6 socket, check the ancillary_data.
3463  */
3464 static vrrp_err_t
3465 vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3466 {
3467 	struct cmsghdr		*cmsgp;
3468 	uchar_t			*cmsg_datap;
3469 	struct in6_pktinfo	*pktinfop;
3470 	char			abuf[INET6_ADDRSTRLEN];
3471 	int			ttl;
3472 
3473 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
3474 	    pif->vvi_ifname, len);
3475 
3476 	/* Sanity check */
3477 	if (len < sizeof (vrrp_pkt_t)) {
3478 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
3479 		    "%d", pif->vvi_ifname, len);
3480 		return (VRRP_EINVAL);
3481 	}
3482 
3483 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));
3484 
3485 	for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
3486 	    cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
3487 		assert(cmsgp->cmsg_level == IPPROTO_IPV6);
3488 		cmsg_datap = CMSG_DATA(cmsgp);
3489 
3490 		switch (cmsgp->cmsg_type) {
3491 		case IPV6_HOPLIMIT:
3492 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3493 			if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
3494 				break;
3495 
3496 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3497 			    "ttl %d", pif->vvi_ifname, ttl);
3498 			return (VRRP_EINVAL);
3499 		case IPV6_PKTINFO:
3500 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3501 			pktinfop = (struct in6_pktinfo *)cmsg_datap;
3502 			if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
3503 			    &vrrp_muladdr6.in6.sin6_addr)) {
3504 				break;
3505 			}
3506 
3507 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3508 			    "destination %s", pif->vvi_ifname,
3509 			    inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
3510 			    sizeof (abuf)));
3511 			return (VRRP_EINVAL);
3512 		}
3513 	}
3514 
3515 	return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
3516 	    msgp->msg_name));
3517 }
3518 
3519 /* ARGSUSED */
3520 static void
3521 vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
3522     void *arg)
3523 {
3524 	struct msghdr		msg;
3525 	vrrp_addr_t		from;
3526 	uint64_t		buf[(IP_MAXPACKET + 1)/8];
3527 	uint64_t		ancillary_data[(IP_MAXPACKET + 1)/8];
3528 	vrrp_intf_t		*pif = arg;
3529 	int			af = pif->vvi_af;
3530 	int			len;
3531 	struct iovec		iov;
3532 
3533 	vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);
3534 
3535 	msg.msg_name = (struct sockaddr *)&from;
3536 	msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
3537 	    sizeof (struct sockaddr_in6);
3538 	iov.iov_base = (char *)buf;
3539 	iov.iov_len = sizeof (buf);
3540 	msg.msg_iov = &iov;
3541 	msg.msg_iovlen = 1;
3542 	msg.msg_control = ancillary_data;
3543 	msg.msg_controllen = sizeof (ancillary_data);
3544 
3545 	if ((len = recvmsg(s, &msg, 0)) == -1) {
3546 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
3547 		    "failed: %s", pif->vvi_ifname, strerror(errno));
3548 		return;
3549 	}
3550 
3551 	/*
3552 	 * Ignore packets whose control buffers that don't fit
3553 	 */
3554 	if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
3555 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
3556 		    "big enough", pif->vvi_ifname);
3557 		return;
3558 	}
3559 
3560 	if (af == AF_INET)
3561 		(void) vrrpd_process_adv_v4(pif, &msg, len);
3562 	else
3563 		(void) vrrpd_process_adv_v6(pif, &msg, len);
3564 }
3565 
3566 /*
3567  * Create the socket which is used to receive VRRP packets. Virtual routers
3568  * that configured on the same physical interface share the same socket.
3569  */
3570 static vrrp_err_t
3571 vrrpd_init_rxsock(vrrp_vr_t *vr)
3572 {
3573 	vrrp_intf_t *pif;	/* Physical interface used to recv packets */
3574 	struct group_req greq;
3575 	struct sockaddr_storage *muladdr;
3576 	int af, proto;
3577 	int on = 1;
3578 	vrrp_err_t err = VRRP_SUCCESS;
3579 
3580 	vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);
3581 
3582 	/*
3583 	 * The RX sockets may already been initialized.
3584 	 */
3585 	if ((pif = vr->vvr_pif) != NULL) {
3586 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
3587 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3588 		assert(pif->vvi_sockfd != -1);
3589 		return (VRRP_SUCCESS);
3590 	}
3591 
3592 	/*
3593 	 * If no IP addresses configured on the primary interface,
3594 	 * return failure.
3595 	 */
3596 	af = vr->vvr_conf.vvc_af;
3597 	pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
3598 	if (pif == NULL) {
3599 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
3600 		    "over %s/%s", vr->vvr_conf.vvc_name,
3601 		    vr->vvr_conf.vvc_link, af_str(af));
3602 		return (VRRP_ENOPRIM);
3603 	}
3604 
3605 	proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
3606 	if (pif->vvi_nvr++ == 0) {
3607 		assert(pif->vvi_sockfd < 0);
3608 		pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
3609 		if (pif->vvi_sockfd < 0) {
3610 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
3611 			    "failed %s", vr->vvr_conf.vvc_name,
3612 			    strerror(errno));
3613 			err = VRRP_ESYS;
3614 			goto done;
3615 		}
3616 
3617 		/*
3618 		 * Join the multicast group to receive VRRP packets.
3619 		 */
3620 		if (af == AF_INET) {
3621 			muladdr = (struct sockaddr_storage *)
3622 			    (void *)&vrrp_muladdr4;
3623 		} else {
3624 			muladdr = (struct sockaddr_storage *)
3625 			    (void *)&vrrp_muladdr6;
3626 		}
3627 
3628 		greq.gr_interface = pif->vvi_ifindex;
3629 		(void) memcpy(&greq.gr_group, muladdr,
3630 		    sizeof (struct sockaddr_storage));
3631 		if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
3632 		    sizeof (struct group_req)) < 0) {
3633 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3634 			    "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
3635 			    pif->vvi_ifindex, strerror(errno));
3636 			err = VRRP_ESYS;
3637 			goto done;
3638 		} else {
3639 			vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
3640 			    "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
3641 			    pif->vvi_ifindex);
3642 		}
3643 
3644 		/*
3645 		 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
3646 		 * when a packet is received. Call setsockopt() to receive such
3647 		 * information.
3648 		 */
3649 		if (af == AF_INET6) {
3650 			/*
3651 			 * Enable receipt of destination address info
3652 			 */
3653 			if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
3654 			    (char *)&on, sizeof (on)) < 0) {
3655 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3656 				    "enable recvpktinfo failed: %s",
3657 				    vr->vvr_conf.vvc_name, strerror(errno));
3658 				err = VRRP_ESYS;
3659 				goto done;
3660 			}
3661 
3662 			/*
3663 			 * Enable receipt of hoplimit info
3664 			 */
3665 			if (setsockopt(pif->vvi_sockfd, proto,
3666 			    IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
3667 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3668 				    "enable recvhoplimit failed: %s",
3669 				    vr->vvr_conf.vvc_name, strerror(errno));
3670 				err = VRRP_ESYS;
3671 				goto done;
3672 			}
3673 		}
3674 
3675 		if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
3676 		    pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
3677 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3678 			    "iu_register_event() failed",
3679 			    vr->vvr_conf.vvc_name);
3680 			err = VRRP_ESYS;
3681 			goto done;
3682 		}
3683 	} else {
3684 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
3685 		    "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
3686 		    pif->vvi_nvr);
3687 		assert(IS_PRIMARY_INTF(pif));
3688 	}
3689 
3690 done:
3691 	vr->vvr_pif = pif;
3692 	if (err != VRRP_SUCCESS)
3693 		vrrpd_fini_rxsock(vr);
3694 
3695 	return (err);
3696 }
3697 
3698 /*
3699  * Delete the socket which is used to receive VRRP packets for the given
3700  * VRRP router. Since all virtual routers that configured on the same
3701  * physical interface share the same socket, the socket is only closed
3702  * when the last VRRP router share this socket is deleted.
3703  */
3704 static void
3705 vrrpd_fini_rxsock(vrrp_vr_t *vr)
3706 {
3707 	vrrp_intf_t	*pif = vr->vvr_pif;
3708 
3709 	vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);
3710 
3711 	if (pif == NULL)
3712 		return;
3713 
3714 	if (--pif->vvi_nvr == 0) {
3715 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
3716 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3717 		(void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
3718 		(void) close(pif->vvi_sockfd);
3719 		pif->vvi_pip = NULL;
3720 		pif->vvi_sockfd = -1;
3721 		pif->vvi_eid = -1;
3722 	} else {
3723 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
3724 		    vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
3725 	}
3726 	vr->vvr_pif = NULL;
3727 }
3728 
3729 /*
3730  * Create the socket which is used to send VRRP packets. Further, set
3731  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3732  */
3733 static vrrp_err_t
3734 vrrpd_init_txsock(vrrp_vr_t *vr)
3735 {
3736 	int		af;
3737 	vrrp_intf_t	*vif;
3738 	vrrp_err_t	err;
3739 
3740 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);
3741 
3742 	if (vr->vvr_vif != NULL) {
3743 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
3744 		    vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
3745 		return (VRRP_SUCCESS);
3746 	}
3747 
3748 	af = vr->vvr_conf.vvc_af;
3749 	if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
3750 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
3751 		    "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
3752 		return (VRRP_ENOVIRT);
3753 	}
3754 
3755 	vr->vvr_vif = vif;
3756 	if (vr->vvr_conf.vvc_af == AF_INET)
3757 		err = vrrpd_init_txsock_v4(vr);
3758 	else
3759 		err = vrrpd_init_txsock_v6(vr);
3760 
3761 	if (err != VRRP_SUCCESS)
3762 		goto done;
3763 
3764 	/*
3765 	 * The interface should start with IFF_NOACCEPT flag not set, only
3766 	 * call this function when the VRRP router requires IFF_NOACCEPT.
3767 	 */
3768 	if (!vr->vvr_conf.vvc_accept)
3769 		err = vrrpd_set_noaccept(vr, _B_TRUE);
3770 
3771 done:
3772 	if (err != VRRP_SUCCESS) {
3773 		(void) close(vif->vvi_sockfd);
3774 		vif->vvi_sockfd = -1;
3775 		vr->vvr_vif = NULL;
3776 	}
3777 
3778 	return (err);
3779 }
3780 
3781 /*
3782  * Create the IPv4 socket which is used to send VRRP packets. Note that
3783  * the destination MAC address of VRRP advertisement must be the virtual
3784  * MAC address, so we specify the output interface to be the specific VNIC.
3785  */
3786 static vrrp_err_t
3787 vrrpd_init_txsock_v4(vrrp_vr_t *vr)
3788 {
3789 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3790 	vrrp_ip_t *vip;		/* The first IP over the VNIC */
3791 	int on = 1;
3792 	char off = 0;
3793 	vrrp_err_t err = VRRP_SUCCESS;
3794 	char abuf[INET6_ADDRSTRLEN];
3795 
3796 	vif = vr->vvr_vif;
3797 	assert(vr->vvr_conf.vvc_af == AF_INET);
3798 	assert(vif != NULL);
3799 
3800 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
3801 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3802 
3803 	if (vif->vvi_sockfd != -1) {
3804 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
3805 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3806 		return (VRRP_SUCCESS);
3807 	}
3808 
3809 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3810 	if (vif->vvi_sockfd < 0) {
3811 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
3812 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3813 		err = VRRP_ESYS;
3814 		goto done;
3815 	}
3816 
3817 	/*
3818 	 * Include the IP header, so that we can specify the IP address/ttl.
3819 	 */
3820 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
3821 	    sizeof (on)) < 0) {
3822 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
3823 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3824 		err = VRRP_ESYS;
3825 		goto done;
3826 	}
3827 
3828 	/*
3829 	 * Disable multicast loopback.
3830 	 */
3831 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
3832 	    sizeof (char)) == -1) {
3833 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
3834 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3835 		    strerror(errno));
3836 		err = VRRP_ESYS;
3837 		goto done;
3838 	}
3839 
3840 	vip = TAILQ_FIRST(&vif->vvi_iplist);
3841 	/* LINTED E_CONSTANT_CONDITION */
3842 	VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
3843 	    _B_FALSE);
3844 
3845 	/*
3846 	 * Set the output interface to send the VRRP packet.
3847 	 */
3848 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
3849 	    &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
3850 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3851 		    "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
3852 		err = VRRP_ESYS;
3853 	} else {
3854 		vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3855 		    "succeed", vr->vvr_conf.vvc_name, abuf);
3856 	}
3857 
3858 done:
3859 	if (err != VRRP_SUCCESS) {
3860 		(void) close(vif->vvi_sockfd);
3861 		vif->vvi_sockfd = -1;
3862 	}
3863 
3864 	return (err);
3865 }
3866 
3867 /*
3868  * Create the IPv6 socket which is used to send VRRP packets. Note that
3869  * the destination must be the virtual MAC address, so we specify the output
3870  * interface to be the specific VNIC.
3871  */
3872 static vrrp_err_t
3873 vrrpd_init_txsock_v6(vrrp_vr_t *vr)
3874 {
3875 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3876 	int off = 0, ttl = VRRP_IP_TTL;
3877 	vrrp_err_t err = VRRP_SUCCESS;
3878 
3879 	vif = vr->vvr_vif;
3880 	assert(vr->vvr_conf.vvc_af == AF_INET6);
3881 	assert(vif != NULL);
3882 
3883 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
3884 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3885 
3886 	if (vif->vvi_sockfd != -1) {
3887 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
3888 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3889 		return (VRRP_SUCCESS);
3890 	}
3891 
3892 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3893 	if (vif->vvi_sockfd < 0) {
3894 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
3895 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3896 		err = VRRP_ESYS;
3897 		goto done;
3898 	}
3899 
3900 	/*
3901 	 * Disable multicast loopback.
3902 	 */
3903 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
3904 	    &off, sizeof (int)) == -1) {
3905 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
3906 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3907 		    strerror(errno));
3908 		err = VRRP_ESYS;
3909 		goto done;
3910 	}
3911 
3912 	/*
3913 	 * Set the multicast TTL.
3914 	 */
3915 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
3916 	    &ttl, sizeof (int)) == -1) {
3917 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
3918 		    "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
3919 		    ttl, strerror(errno));
3920 		err = VRRP_ESYS;
3921 		goto done;
3922 	}
3923 
3924 	/*
3925 	 * Set the output interface to send the VRRP packet.
3926 	 */
3927 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
3928 	    &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
3929 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
3930 		    "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
3931 		    strerror(errno));
3932 		err = VRRP_ESYS;
3933 	} else {
3934 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
3935 		    " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
3936 	}
3937 
3938 done:
3939 	if (err != VRRP_SUCCESS) {
3940 		(void) close(vif->vvi_sockfd);
3941 		vif->vvi_sockfd = -1;
3942 	}
3943 
3944 	return (err);
3945 }
3946 
3947 /*
3948  * Delete the socket which is used to send VRRP packets. Further, clear
3949  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3950  */
3951 static void
3952 vrrpd_fini_txsock(vrrp_vr_t *vr)
3953 {
3954 	vrrp_intf_t *vif = vr->vvr_vif;
3955 
3956 	vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);
3957 
3958 	if (vif != NULL) {
3959 		if (!vr->vvr_conf.vvc_accept)
3960 			(void) vrrpd_set_noaccept(vr, _B_FALSE);
3961 		(void) close(vif->vvi_sockfd);
3962 		vif->vvi_sockfd = -1;
3963 		vr->vvr_vif = NULL;
3964 	}
3965 }
3966 
3967 /*
3968  * Given the the pseudo header cksum value (sum), caculate the cksum with
3969  * the rest of VRRP packet.
3970  */
3971 static uint16_t
3972 in_cksum(int sum, uint16_t plen, void *p)
3973 {
3974 	int nleft;
3975 	uint16_t *w;
3976 	uint16_t answer;
3977 	uint16_t odd_byte = 0;
3978 
3979 	nleft = plen;
3980 	w = (uint16_t *)p;
3981 	while (nleft > 1) {
3982 		sum += *w++;
3983 		nleft -= 2;
3984 	}
3985 
3986 	/* mop up an odd byte, if necessary */
3987 	if (nleft == 1) {
3988 		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
3989 		sum += odd_byte;
3990 	}
3991 
3992 	/*
3993 	 * add back carry outs from top 16 bits to low 16 bits
3994 	 */
3995 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
3996 	sum += (sum >> 16);			/* add carry */
3997 	answer = ~sum;				/* truncate to 16 bits */
3998 	return (answer == 0 ? ~0 : answer);
3999 }
4000 
4001 /* Pseudo header for v4 */
4002 struct pshv4 {
4003 	struct in_addr	ph4_src;
4004 	struct in_addr	ph4_dst;
4005 	uint8_t		ph4_zero;	/* always zero */
4006 	uint8_t		ph4_protocol;	/* protocol used, IPPROTO_VRRP */
4007 	uint16_t	ph4_len;	/* VRRP payload len */
4008 };
4009 
4010 /*
4011  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4012  * packet length (in the host byte order), and both IP source and destination
4013  * addresses are in the network byte order.
4014  */
4015 static uint16_t
4016 vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
4017     vrrp_pkt_t *vp)
4018 {
4019 	struct pshv4 ph4;
4020 	int nleft;
4021 	uint16_t *w;
4022 	int sum = 0;
4023 
4024 	ph4.ph4_src = *src;
4025 	ph4.ph4_dst = *dst;
4026 	ph4.ph4_zero = 0;
4027 	ph4.ph4_protocol = IPPROTO_VRRP;
4028 	ph4.ph4_len = htons(plen);
4029 
4030 	/*
4031 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4032 	 *  we add sequential 16 bit words to it, and at the end, fold
4033 	 *  back all the carry bits from the top 16 bits into the lower
4034 	 *  16 bits.
4035 	 */
4036 	nleft = sizeof (struct pshv4);
4037 	w = (uint16_t *)&ph4;
4038 	while (nleft > 0) {
4039 		sum += *w++;
4040 		nleft -= 2;
4041 	}
4042 
4043 	return (in_cksum(sum, plen, vp));
4044 }
4045 
4046 /* Pseudo header for v6 */
4047 struct pshv6 {
4048 	struct in6_addr	ph6_src;
4049 	struct in6_addr	ph6_dst;
4050 	uint32_t	ph6_len;	/* VRRP payload len */
4051 	uint32_t	ph6_zero : 24,
4052 			ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
4053 };
4054 
4055 /*
4056  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4057  * packet length (in the host byte order), and both IP source and destination
4058  * addresses are in the network byte order.
4059  */
4060 static uint16_t
4061 vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
4062     vrrp_pkt_t *vp)
4063 {
4064 	struct pshv6 ph6;
4065 	int nleft;
4066 	uint16_t *w;
4067 	int sum = 0;
4068 
4069 	ph6.ph6_src = *src;
4070 	ph6.ph6_dst = *dst;
4071 	ph6.ph6_zero = 0;
4072 	ph6.ph6_protocol = IPPROTO_VRRP;
4073 	ph6.ph6_len = htonl((uint32_t)plen);
4074 
4075 	/*
4076 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4077 	 *  we add sequential 16 bit words to it, and at the end, fold
4078 	 *  back all the carry bits from the top 16 bits into the lower
4079 	 *  16 bits.
4080 	 */
4081 	nleft = sizeof (struct pshv6);
4082 	w = (uint16_t *)&ph6;
4083 	while (nleft > 0) {
4084 		sum += *w++;
4085 		nleft -= 2;
4086 	}
4087 
4088 	return (in_cksum(sum, plen, vp));
4089 }
4090 
4091 vrrp_err_t
4092 vrrpd_state_i2m(vrrp_vr_t *vr)
4093 {
4094 	vrrp_err_t	err;
4095 
4096 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);
4097 
4098 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
4099 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4100 		return (err);
4101 
4102 	(void) vrrpd_send_adv(vr, _B_FALSE);
4103 
4104 	vr->vvr_err = VRRP_SUCCESS;
4105 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4106 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4107 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4108 		vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
4109 		return (VRRP_ESYS);
4110 	} else {
4111 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
4112 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4113 		    vr->vvr_timeout);
4114 	}
4115 	return (VRRP_SUCCESS);
4116 }
4117 
4118 vrrp_err_t
4119 vrrpd_state_i2b(vrrp_vr_t *vr)
4120 {
4121 	vrrp_err_t	err;
4122 
4123 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);
4124 
4125 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
4126 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4127 		return (err);
4128 
4129 	/*
4130 	 * Reinitialize the Master advertisement interval to be the configured
4131 	 * value.
4132 	 */
4133 	vr->vvr_err = VRRP_SUCCESS;
4134 	vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
4135 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4136 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4137 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4138 		vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
4139 		return (VRRP_ESYS);
4140 	} else {
4141 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
4142 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4143 		    vr->vvr_timeout);
4144 	}
4145 	return (VRRP_SUCCESS);
4146 }
4147 
4148 void
4149 vrrpd_state_m2i(vrrp_vr_t *vr)
4150 {
4151 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);
4152 
4153 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
4154 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4155 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4156 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4157 }
4158 
4159 void
4160 vrrpd_state_b2i(vrrp_vr_t *vr)
4161 {
4162 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);
4163 
4164 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4165 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4166 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
4167 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4168 }
4169 
4170 /* ARGSUSED */
4171 static void
4172 vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
4173 {
4174 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4175 
4176 	vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
4177 	(void) vrrpd_state_b2m(vr);
4178 }
4179 
4180 /* ARGSUSED */
4181 static void
4182 vrrp_adv_timeout(iu_tq_t *tq, void *arg)
4183 {
4184 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4185 
4186 	vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);
4187 
4188 	(void) vrrpd_send_adv(vr, _B_FALSE);
4189 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4190 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4191 		vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
4192 		    vr->vvr_conf.vvc_name);
4193 	} else {
4194 		vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
4195 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4196 		    vr->vvr_timeout);
4197 	}
4198 }
4199 
4200 vrrp_err_t
4201 vrrpd_state_b2m(vrrp_vr_t *vr)
4202 {
4203 	vrrp_err_t	err;
4204 
4205 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);
4206 
4207 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
4208 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4209 		return (err);
4210 	(void) vrrpd_send_adv(vr, _B_FALSE);
4211 
4212 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4213 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4214 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4215 		vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
4216 		    vr->vvr_conf.vvc_name);
4217 		return (VRRP_ESYS);
4218 	} else {
4219 		vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
4220 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4221 		    vr->vvr_timeout);
4222 	}
4223 	return (VRRP_SUCCESS);
4224 }
4225 
4226 vrrp_err_t
4227 vrrpd_state_m2b(vrrp_vr_t *vr)
4228 {
4229 	vrrp_err_t	err;
4230 
4231 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);
4232 
4233 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
4234 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4235 		return (err);
4236 
4237 	/*
4238 	 * Cancel the adver_timer.
4239 	 */
4240 	vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
4241 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4242 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4243 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4244 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4245 		vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
4246 		    vr->vvr_conf.vvc_name);
4247 	} else {
4248 		vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
4249 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4250 		    vr->vvr_timeout);
4251 	}
4252 	return (VRRP_SUCCESS);
4253 }
4254 
4255 /*
4256  * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
4257  * based on its access mode.
4258  */
4259 static vrrp_err_t
4260 vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
4261 {
4262 	vrrp_intf_t *vif = vr->vvr_vif;
4263 	uint64_t curr_flags;
4264 	struct lifreq lifr;
4265 	int s;
4266 
4267 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4268 	    vr->vvr_conf.vvc_name, on ? "on" : "off");
4269 
4270 	/*
4271 	 * Possibly no virtual address exists on this VRRP router yet.
4272 	 */
4273 	if (vif == NULL)
4274 		return (VRRP_SUCCESS);
4275 
4276 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4277 	    vif->vvi_ifname, vrrp_state2str(vr->vvr_state));
4278 
4279 	s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4280 	(void) strncpy(lifr.lifr_name, vif->vvi_ifname,
4281 	    sizeof (lifr.lifr_name));
4282 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4283 		if (errno != ENXIO && errno != ENOENT) {
4284 			vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
4285 			    "SIOCGLIFFLAGS on %s failed: %s",
4286 			    vif->vvi_ifname, strerror(errno));
4287 		}
4288 		return (VRRP_ESYS);
4289 	}
4290 
4291 	curr_flags = lifr.lifr_flags;
4292 	if (on)
4293 		lifr.lifr_flags |= IFF_NOACCEPT;
4294 	else
4295 		lifr.lifr_flags &= ~IFF_NOACCEPT;
4296 
4297 	if (lifr.lifr_flags != curr_flags) {
4298 		if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4299 			if (errno != ENXIO && errno != ENOENT) {
4300 				vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
4301 				    "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
4302 				    on ? "no_accept" : "accept",
4303 				    lifr.lifr_flags, vif->vvi_ifname,
4304 				    strerror(errno));
4305 			}
4306 			return (VRRP_ESYS);
4307 		}
4308 	}
4309 	return (VRRP_SUCCESS);
4310 }
4311 
4312 static vrrp_err_t
4313 vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
4314 {
4315 	vrrp_state_t	state = vif->vvi_vr_state;
4316 	struct lifreq	lifr;
4317 	char		abuf[INET6_ADDRSTRLEN];
4318 	int		af = vif->vvi_af;
4319 	uint64_t	curr_flags;
4320 	int		s;
4321 
4322 	assert(IS_VIRTUAL_INTF(vif));
4323 
4324 	/* LINTED E_CONSTANT_CONDITION */
4325 	VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
4326 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
4327 	    vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");
4328 
4329 	s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4330 	(void) strncpy(lifr.lifr_name, ip->vip_lifname,
4331 	    sizeof (lifr.lifr_name));
4332 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4333 		if (errno != ENXIO && errno != ENOENT) {
4334 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
4335 			    "SIOCGLIFFLAGS on %s/%s failed: %s",
4336 			    vif->vvi_ifname, lifr.lifr_name, abuf,
4337 			    strerror(errno));
4338 		}
4339 		return (VRRP_ESYS);
4340 	}
4341 
4342 	curr_flags = lifr.lifr_flags;
4343 	if (state == VRRP_STATE_MASTER)
4344 		lifr.lifr_flags |= IFF_UP;
4345 	else
4346 		lifr.lifr_flags &= ~IFF_UP;
4347 
4348 	if (lifr.lifr_flags == curr_flags)
4349 		return (VRRP_SUCCESS);
4350 
4351 	if (checkonly) {
4352 		vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
4353 		    ip->vip_lifname, abuf,
4354 		    state == VRRP_STATE_MASTER ? "down" : "up");
4355 		return (VRRP_ESYS);
4356 	} else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4357 		if (errno != ENXIO && errno != ENOENT) {
4358 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
4359 			    "bring %s %s/%s failed: %s",
4360 			    vif->vvi_ifname, vrrp_state2str(state),
4361 			    state == VRRP_STATE_MASTER ? "up" : "down",
4362 			    ip->vip_lifname, abuf, strerror(errno));
4363 		}
4364 		return (VRRP_ESYS);
4365 	}
4366 	return (VRRP_SUCCESS);
4367 }
4368 
4369 static vrrp_err_t
4370 vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
4371 {
4372 	vrrp_state_t		state;
4373 	vrrp_intf_t		*vif = vr->vvr_vif;
4374 	vrrp_ip_t		*ip, *nextip;
4375 	char			abuf[INET6_ADDRSTRLEN];
4376 	vrrp_err_t		err;
4377 
4378 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
4379 	    vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
4380 	    vif->vvi_ifname, checkonly ? " checkonly" : "");
4381 
4382 	state = vr->vvr_state;
4383 	assert(vif != NULL);
4384 	assert(IS_VIRTUAL_INTF(vif));
4385 	assert(vif->vvi_vr_state != state);
4386 	vif->vvi_vr_state = state;
4387 	for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
4388 		nextip = TAILQ_NEXT(ip, vip_next);
4389 		err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
4390 		if (!checkonly && err != VRRP_SUCCESS) {
4391 			/* LINTED E_CONSTANT_CONDITION */
4392 			VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
4393 			    INET6_ADDRSTRLEN, _B_FALSE);
4394 			vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
4395 			    "%s over %s failed", abuf, vif->vvi_ifname);
4396 			vrrpd_delete_ip(vif, ip);
4397 		}
4398 	}
4399 
4400 	/*
4401 	 * The IP address is deleted when it is failed to be brought
4402 	 * up. If no IP addresses are left, delete this interface.
4403 	 */
4404 	if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
4405 		vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
4406 		    "no IP left over %s", vif->vvi_ifname);
4407 		vrrpd_delete_if(vif, _B_TRUE);
4408 		return (VRRP_ENOVIRT);
4409 	}
4410 	return (VRRP_SUCCESS);
4411 }
4412 
4413 void
4414 vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
4415 {
4416 	vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
4417 	    vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));
4418 
4419 	assert(vr->vvr_state == prev_s);
4420 	vr->vvr_state = s;
4421 	vr->vvr_prev_state = prev_s;
4422 	(void) gettimeofday(&vr->vvr_st_time, NULL);
4423 	(void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
4424 }
4425 
4426 static int
4427 vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
4428 {
4429 	sysevent_id_t	eid;
4430 	nvlist_t	*nvl = NULL;
4431 
4432 	/*
4433 	 * sysevent is not supported in the non-global zone
4434 	 */
4435 	if (getzoneid() != GLOBAL_ZONEID)
4436 		return (0);
4437 
4438 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
4439 		goto failed;
4440 
4441 	if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
4442 	    VRRP_EVENT_CUR_VERSION) != 0)
4443 		goto failed;
4444 
4445 	if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
4446 		goto failed;
4447 
4448 	if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
4449 		goto failed;
4450 
4451 	if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
4452 		goto failed;
4453 
4454 	if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
4455 	    SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
4456 		nvlist_free(nvl);
4457 		return (0);
4458 	}
4459 
4460 failed:
4461 	vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
4462 	    "sysevent posting failed: %s", vrrp_state2str(prev_st),
4463 	    vrrp_state2str(st), strerror(errno));
4464 
4465 	if (nvl != NULL)
4466 		nvlist_free(nvl);
4467 	return (-1);
4468 }
4469 
4470 /*
4471  * timeval processing functions
4472  */
4473 static int
4474 timeval_to_milli(struct timeval tv)
4475 {
4476 	return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
4477 }
4478 
4479 static struct timeval
4480 timeval_delta(struct timeval t1, struct timeval t2)
4481 {
4482 	struct timeval t;
4483 	t.tv_sec = t1.tv_sec - t2.tv_sec;
4484 	t.tv_usec = t1.tv_usec - t2.tv_usec;
4485 
4486 	if (t.tv_usec < 0) {
4487 		t.tv_usec += 1000000;
4488 		t.tv_sec--;
4489 	}
4490 	return (t);
4491 }
4492 
4493 /*
4494  * print error messages to the terminal or to syslog
4495  */
4496 static void
4497 vrrp_log(int level, char *message, ...)
4498 {
4499 	va_list ap;
4500 	int log_level = -1;
4501 
4502 	va_start(ap, message);
4503 
4504 	if (vrrp_logflag == 0) {
4505 		if (level <= vrrp_debug_level) {
4506 			/*
4507 			 * VRRP_ERR goes to stderr, others go to stdout
4508 			 */
4509 			FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
4510 			(void) fprintf(out, "vrrpd: ");
4511 			/* LINTED: E_SEC_PRINTF_VAR_FMT */
4512 			(void) vfprintf(out, message, ap);
4513 			(void) fprintf(out, "\n");
4514 			(void) fflush(out);
4515 		}
4516 		va_end(ap);
4517 		return;
4518 	}
4519 
4520 	/*
4521 	 * translate VRRP_* to LOG_*
4522 	 */
4523 	switch (level) {
4524 	case VRRP_ERR:
4525 		log_level = LOG_ERR;
4526 		break;
4527 	case VRRP_WARNING:
4528 		log_level = LOG_WARNING;
4529 		break;
4530 	case VRRP_NOTICE:
4531 		log_level = LOG_NOTICE;
4532 		break;
4533 	case VRRP_DBG0:
4534 		log_level = LOG_INFO;
4535 		break;
4536 	default:
4537 		log_level = LOG_DEBUG;
4538 		break;
4539 	}
4540 
4541 	/* LINTED: E_SEC_PRINTF_VAR_FMT */
4542 	(void) vsyslog(log_level, message, ap);
4543 	va_end(ap);
4544 }
4545