xref: /titanic_44/usr/src/cmd/cmd-inet/usr.lib/vrrpd/vrrpd.c (revision 3c112a2b34403220c06c3e2fcac403358cfba168)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/socket.h>
28 #include <sys/sockio.h>
29 #include <sys/sysevent/vrrp.h>
30 #include <sys/sysevent/eventdefs.h>
31 #include <sys/varargs.h>
32 #include <auth_attr.h>
33 #include <ctype.h>
34 #include <fcntl.h>
35 #include <stdlib.h>
36 #include <strings.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <zone.h>
40 #include <libsysevent.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <arpa/inet.h>
44 #include <signal.h>
45 #include <assert.h>
46 #include <ucred.h>
47 #include <bsm/adt.h>
48 #include <bsm/adt_event.h>
49 #include <priv_utils.h>
50 #include <libdllink.h>
51 #include <libdlvnic.h>
52 #include <libipadm.h>
53 #include <pwd.h>
54 #include <libvrrpadm.h>
55 #include <net/route.h>
56 #include "vrrpd_impl.h"
57 
58 /*
59  * A VRRP router can be only start participating the VRRP protocol of a virtual
60  * router when all the following conditions are met:
61  *
62  * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
63  * - The RX socket is successfully created over the physical interface to
64  *   receive the VRRP multicast advertisement. Note that one RX socket can
65  *   be shared by several VRRP routers configured over the same physical
66  *   interface. (See vrrpd_init_rxsock())
67  * - The TX socket is successfully created over the VNIC interface to send
68  *   the VRRP advertisment. (See vrrpd_init_txsock())
69  * - The primary IP address has been successfully selected over the physical
70  *   interface. (See vrrpd_select_primary())
71  *
72  * If a VRRP router is enabled but the other conditions haven't be satisfied,
73  * the router will be stay at the VRRP_STATE_INIT state. If all the above
74  * conditions are met, the VRRP router will be transit to either
75  * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
76  * protocol.
77  */
78 
79 #define	skip_whitespace(p)	while (isspace(*(p))) ++(p)
80 
81 #define	BUFFSIZE	65536
82 
83 #define	VRRPCONF	"/etc/inet/vrrp.conf"
84 
85 typedef struct vrrpd_rtsock_s {
86 	int		vrt_af;		/* address family */
87 	int		vrt_fd;		/* socket for the PF_ROUTE msg */
88 	iu_event_id_t	vrt_eid;	/* event ID */
89 } vrrpd_rtsock_t;
90 
91 static ipadm_handle_t	vrrp_ipadm_handle = NULL;	/* libipadm handle */
92 static int		vrrp_logflag = 0;
93 boolean_t		vrrp_debug_level = 0;
94 iu_eh_t			*vrrpd_eh = NULL;
95 iu_tq_t			*vrrpd_timerq = NULL;
96 static vrrp_handle_t	vrrpd_vh = NULL;
97 static int		vrrpd_cmdsock_fd = -1;	/* socket to communicate */
98 						/* between vrrpd/libvrrpadm */
99 static iu_event_id_t	vrrpd_cmdsock_eid = -1;
100 static int		vrrpd_ctlsock_fd = -1;	/* socket to bring up/down */
101 						/* the virtual IP addresses */
102 static int		vrrpd_ctlsock6_fd = -1;
103 static vrrpd_rtsock_t	vrrpd_rtsocks[2] = {
104 	{AF_INET, -1, -1},
105 	{AF_INET6, -1, -1}
106 };
107 static iu_timer_id_t	vrrp_scan_timer_id = -1;
108 
109 TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
110 TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
111 static struct vrrp_vr_list_s	vrrp_vr_list;
112 static struct vrrp_intf_list_s	vrrp_intf_list;
113 static char		vrrpd_conffile[MAXPATHLEN];
114 
115 /*
116  * Multicast address of VRRP advertisement in network byte order
117  */
118 static vrrp_addr_t	vrrp_muladdr4;
119 static vrrp_addr_t	vrrp_muladdr6;
120 
121 static int		vrrpd_scan_interval = 20000;	/* ms */
122 static int		pfds[2];
123 
124 /*
125  * macros to calculate skew_time and master_down_timer
126  *
127  * Note that the input is in centisecs and output are in msecs
128  */
129 #define	SKEW_TIME(pri, intv)	((intv) * (256 - (pri)) / 256)
130 #define	MASTER_DOWN_INTERVAL(pri, intv)	(3 * (intv) + SKEW_TIME((pri), (intv)))
131 
132 #define	SKEW_TIME_VR(vr)	\
133 	SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
134 #define	MASTER_DOWN_INTERVAL_VR(vr)	\
135 	MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
136 
137 #define	VRRP_CONF_UPDATE	0x01
138 #define	VRRP_CONF_DELETE	0x02
139 
140 static char *af_str(int);
141 
142 static iu_tq_callback_t vrrp_adv_timeout;
143 static iu_tq_callback_t vrrp_b2m_timeout;
144 static iu_eh_callback_t vrrpd_sock_handler;
145 static iu_eh_callback_t vrrpd_rtsock_handler;
146 static iu_eh_callback_t vrrpd_cmdsock_handler;
147 
148 static int daemon_init();
149 
150 static vrrp_err_t vrrpd_init();
151 static void vrrpd_fini();
152 static vrrp_err_t vrrpd_cmdsock_create();
153 static void vrrpd_cmdsock_destroy();
154 static vrrp_err_t vrrpd_rtsock_create();
155 static void vrrpd_rtsock_destroy();
156 static vrrp_err_t vrrpd_ctlsock_create();
157 static void vrrpd_ctlsock_destroy();
158 
159 static void vrrpd_scan_timer(iu_tq_t *, void *);
160 static void vrrpd_scan(int);
161 static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
162 static void vrrpd_fini_rxsock(vrrp_vr_t *);
163 static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
164 static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
165 static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
166 static void vrrpd_fini_txsock(vrrp_vr_t *);
167 
168 static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
169 static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
170 static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
171 static void vrrpd_delete_vr(vrrp_vr_t *);
172 
173 static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
174 static vrrp_err_t vrrpd_delete(const char *);
175 static vrrp_err_t vrrpd_enable(const char *, boolean_t);
176 static vrrp_err_t vrrpd_disable(const char *);
177 static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
178 static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
179 static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);
180 
181 static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
182 static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
183 static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
184 static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
185 static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
186 static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
187 static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
188 static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
189 static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
190 static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
191 static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
192 static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
193 static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
194 static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
195 static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
196 static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
197 static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
198 static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);
199 
200 static void vrrpd_cmd_create(void *, void *, size_t *);
201 static void vrrpd_cmd_delete(void *, void *, size_t *);
202 static void vrrpd_cmd_enable(void *, void *, size_t *);
203 static void vrrpd_cmd_disable(void *, void *, size_t *);
204 static void vrrpd_cmd_modify(void *, void *, size_t *);
205 static void vrrpd_cmd_list(void *, void *, size_t *);
206 static void vrrpd_cmd_query(void *, void *, size_t *);
207 
208 static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
209 static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
210 static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
211 static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
212 static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
213 static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
214     uint64_t flags);
215 static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);
216 
217 static void vrrpd_init_ipcache(int);
218 static void vrrpd_update_ipcache(int);
219 static ipadm_status_t vrrpd_walk_addr_info(int);
220 static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
221     int, uint64_t);
222 static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
223 static void vrrpd_reselect_primary(vrrp_intf_t *);
224 static void vrrpd_reenable_all_vr();
225 static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);
226 
227 static uint16_t in_cksum(int, uint16_t, void *);
228 static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
229     uint16_t, vrrp_pkt_t *);
230 static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
231     uint16_t, vrrp_pkt_t *);
232 static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);
233 
234 static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
235 static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);
236 
237 /* state transition functions */
238 static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
239 static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
240 static void vrrpd_state_m2i(vrrp_vr_t *);
241 static void vrrpd_state_b2i(vrrp_vr_t *);
242 static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
243 static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
244 static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);
245 
246 static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
247 static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
248 static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
249     boolean_t);
250 static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);
251 
252 static void vrrpd_initconf();
253 static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
254 static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
255 static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
256 static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
257 static void vrrpd_cleanup();
258 
259 static void vrrp_log(int, char *, ...);
260 static int timeval_to_milli(struct timeval);
261 static struct timeval timeval_delta(struct timeval, struct timeval);
262 
263 typedef struct vrrpd_prop_s {
264 	char		*vs_propname;
265 	boolean_t	(*vs_propread)(vrrp_vr_conf_t *, const char *);
266 	int		(*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
267 } vrrp_prop_t;
268 
269 /*
270  * persistent VRRP properties array
271  */
272 static vrrp_prop_t vrrp_prop_info_tbl[] = {
273 	{"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
274 	{"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
275 	{"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
276 	{"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
277 	{"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
278 	{"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
279 	{"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
280 	{"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
281 	{"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
282 };
283 
284 #define	VRRP_PROP_INFO_TABSIZE	\
285 	(sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))
286 
287 typedef void vrrp_cmd_func_t(void *, void *, size_t *);
288 
289 typedef struct vrrp_cmd_info_s {
290 	vrrp_cmd_type_t	vi_cmd;
291 	size_t		vi_reqsize;
292 	size_t		vi_acksize;	/* 0 if the size is variable */
293 	boolean_t	vi_setop;	/* Set operation? Check credentials */
294 	vrrp_cmd_func_t	*vi_cmdfunc;
295 } vrrp_cmd_info_t;
296 
297 static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
298 	{VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
299 	    sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
300 	{VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
301 	    sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
302 	{VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
303 	    sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
304 	{VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
305 	    sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
306 	{VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
307 	    sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
308 	{VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
309 	    _B_FALSE, vrrpd_cmd_query},
310 	{VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
311 	    _B_FALSE, vrrpd_cmd_list}
312 };
313 
314 #define	VRRP_DOOR_INFO_TABLE_SIZE	\
315 	(sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))
316 
317 static int
318 ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
319 {
320 	if (af == AF_INET) {
321 		return (memcmp(&addr1->in4.sin_addr,
322 		    &addr2->in4.sin_addr, sizeof (struct in_addr)));
323 	} else {
324 		return (memcmp(&addr1->in6.sin6_addr,
325 		    &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
326 	}
327 }
328 
329 static vrrp_vr_t *
330 vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
331 {
332 	vrrp_vr_t *vr;
333 
334 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
335 		if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
336 		    vr->vvr_conf.vvc_vrid == vrid &&
337 		    vr->vvr_conf.vvc_af == af) {
338 			break;
339 		}
340 	}
341 	return (vr);
342 }
343 
344 static vrrp_vr_t *
345 vrrpd_lookup_vr_by_name(const char *name)
346 {
347 	vrrp_vr_t *vr;
348 
349 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
350 		if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
351 			break;
352 	}
353 	return (vr);
354 }
355 
356 static vrrp_intf_t *
357 vrrpd_lookup_if(const char *ifname, int af)
358 {
359 	vrrp_intf_t	*intf;
360 
361 	TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
362 		if (strcmp(ifname, intf->vvi_ifname) == 0 &&
363 		    af == intf->vvi_af) {
364 			break;
365 		}
366 	}
367 	return (intf);
368 }
369 
370 static vrrp_err_t
371 vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
372     vrrp_intf_t **intfp)
373 {
374 	vrrp_intf_t	*intf;
375 
376 	vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
377 	    ifname, af_str(af), ifindex);
378 
379 	if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
380 		vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
381 		    "allocate %s/%s interface", ifname, af_str(af));
382 		return (VRRP_ENOMEM);
383 	}
384 
385 	intf = *intfp;
386 	TAILQ_INIT(&intf->vvi_iplist);
387 	(void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
388 	intf->vvi_af = af;
389 	intf->vvi_sockfd = -1;
390 	intf->vvi_nvr = 0;
391 	intf->vvi_eid = -1;
392 	intf->vvi_pip = NULL;
393 	intf->vvi_ifindex = ifindex;
394 	intf->vvi_state = NODE_STATE_NEW;
395 	intf->vvi_vr_state = VRRP_STATE_INIT;
396 	TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
397 	return (VRRP_SUCCESS);
398 }
399 
400 /*
401  * An interface is deleted. If update_vr is true, the deletion of the interface
402  * may cause the state transition of assoicated VRRP router (if this interface
403  * is either the primary or the VNIC interface of the VRRP router); otherwise,
404  * simply delete the interface without updating the VRRP router.
405  */
406 static void
407 vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
408 {
409 	vrrp_ip_t	*ip;
410 
411 	vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
412 	    intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");
413 
414 	if (update_vr) {
415 		/*
416 		 * If a this interface is the physical interface or the VNIC
417 		 * of a VRRP router, the deletion of the interface (no IP
418 		 * address exists on this interface) may cause the state
419 		 * transition of the VRRP router. call vrrpd_remove_if()
420 		 * to find all corresponding VRRP router and update their
421 		 * states.
422 		 */
423 		vrrpd_remove_if(intf, _B_FALSE);
424 	}
425 
426 	/*
427 	 * First remove and delete all the IP addresses on the interface
428 	 */
429 	while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
430 		ip = TAILQ_FIRST(&intf->vvi_iplist);
431 		vrrpd_delete_ip(intf, ip);
432 	}
433 
434 	/*
435 	 * Then remove and delete the interface
436 	 */
437 	TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
438 	(void) free(intf);
439 }
440 
441 static vrrp_err_t
442 vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
443     uint64_t flags)
444 {
445 	vrrp_ip_t	*ip;
446 	char		abuf[INET6_ADDRSTRLEN];
447 
448 	/* LINTED E_CONSTANT_CONDITION */
449 	VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
450 	vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
451 	    intf->vvi_ifname, lifname, abuf, flags);
452 
453 	if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
454 		vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
455 		    "failed to allocate IP", lifname, abuf);
456 		return (VRRP_ENOMEM);
457 	}
458 
459 	(void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
460 	ip->vip_state = NODE_STATE_NEW;
461 	ip->vip_flags = flags;
462 	(void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));
463 
464 	/*
465 	 * Make sure link-local IPv6 IP addresses are at the head of the list
466 	 */
467 	if (intf->vvi_af == AF_INET6 &&
468 	    IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
469 		TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
470 	} else {
471 		TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
472 	}
473 	return (VRRP_SUCCESS);
474 }
475 
476 static void
477 vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
478 {
479 	char	abuf[INET6_ADDRSTRLEN];
480 	int	af = intf->vvi_af;
481 
482 	/* LINTED E_CONSTANT_CONDITION */
483 	VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
484 	vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
485 	    intf->vvi_ifname, ip->vip_lifname, abuf,
486 	    intf->vvi_pip == ip ? "" : "not ");
487 
488 	if (intf->vvi_pip == ip)
489 		intf->vvi_pip = NULL;
490 
491 	TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
492 	(void) free(ip);
493 }
494 
495 static char *
496 rtm_event2str(uchar_t event)
497 {
498 	switch (event) {
499 	case RTM_NEWADDR:
500 		return ("RTM_NEWADDR");
501 	case RTM_DELADDR:
502 		return ("RTM_DELADDR");
503 	case RTM_IFINFO:
504 		return ("RTM_IFINFO");
505 	case RTM_ADD:
506 		return ("RTM_ADD");
507 	case RTM_DELETE:
508 		return ("RTM_DELETE");
509 	case RTM_CHANGE:
510 		return ("RTM_CHANGE");
511 	case RTM_OLDADD:
512 		return ("RTM_OLDADD");
513 	case RTM_OLDDEL:
514 		return ("RTM_OLDDEL");
515 	case RTM_CHGADDR:
516 		return ("RTM_CHGADDR");
517 	case RTM_FREEADDR:
518 		return ("RTM_FREEADDR");
519 	default:
520 		return ("RTM_OTHER");
521 	}
522 }
523 
524 /*
525  * This is called by the child process to inform the parent process to
526  * exit with the given return value. Note that the child process
527  * (the daemon process) informs the parent process to exit when anything
528  * goes wrong or when all the intialization is done.
529  */
530 static int
531 vrrpd_inform_parent_exit(int rv)
532 {
533 	int err = 0;
534 
535 	/*
536 	 * If vrrp_debug_level is none-zero, vrrpd is not running as
537 	 * a daemon. Return directly.
538 	 */
539 	if (vrrp_debug_level != 0)
540 		return (0);
541 
542 	if (write(pfds[1], &rv, sizeof (int)) != sizeof (int)) {
543 		err = errno;
544 		(void) close(pfds[1]);
545 		return (err);
546 	}
547 	(void) close(pfds[1]);
548 	return (0);
549 }
550 
551 int
552 main(int argc, char *argv[])
553 {
554 	int c, err;
555 	struct sigaction sa;
556 	sigset_t mask;
557 	struct rlimit rl;
558 
559 	(void) setlocale(LC_ALL, "");
560 	(void) textdomain(TEXT_DOMAIN);
561 
562 	/*
563 	 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
564 	 * and PRIV_NET_ICMPACCESS to open  the raw socket, PRIV_SYS_IP_CONFIG
565 	 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
566 	 * setrlimit().
567 	 *
568 	 * Note that sysevent is not supported in non-global zones.
569 	 */
570 	if (getzoneid() == GLOBAL_ZONEID) {
571 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
572 		    PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
573 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
574 	} else {
575 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
576 		    PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
577 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
578 	}
579 
580 	if (err == -1) {
581 		vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
582 		return (EXIT_FAILURE);
583 	}
584 
585 	/*
586 	 * If vrrpd is started by other process, it will inherit the
587 	 * signal block mask. We unblock all signals to make sure the
588 	 * signal handling will work normally.
589 	 */
590 	(void) sigfillset(&mask);
591 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
592 	sa.sa_handler = vrrpd_cleanup;
593 	sa.sa_flags = 0;
594 	(void) sigemptyset(&sa.sa_mask);
595 	(void) sigaction(SIGINT, &sa, NULL);
596 	(void) sigaction(SIGQUIT, &sa, NULL);
597 	(void) sigaction(SIGTERM, &sa, NULL);
598 
599 	vrrp_debug_level = 0;
600 	(void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
601 	while ((c = getopt(argc, argv, "d:f:")) != EOF) {
602 		switch (c) {
603 		case 'd':
604 			vrrp_debug_level = atoi(optarg);
605 			break;
606 		case 'f':
607 			(void) strlcpy(vrrpd_conffile, optarg,
608 			    sizeof (vrrpd_conffile));
609 			break;
610 		default:
611 			break;
612 		}
613 	}
614 
615 	closefrom(3);
616 	if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
617 		vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
618 		return (EXIT_FAILURE);
619 	}
620 
621 	rl.rlim_cur = RLIM_INFINITY;
622 	rl.rlim_max = RLIM_INFINITY;
623 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
624 		vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
625 		goto child_out;
626 	}
627 
628 	if (vrrpd_init() != VRRP_SUCCESS) {
629 		vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
630 		goto child_out;
631 	}
632 
633 	/*
634 	 * Get rid of unneeded privileges.
635 	 */
636 	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
637 	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);
638 
639 	/*
640 	 * Read the configuration and initialize the existing VRRP
641 	 * configuration
642 	 */
643 	vrrpd_initconf();
644 
645 	/*
646 	 * Inform the parent process that it can successfully exit.
647 	 */
648 	if ((err = vrrpd_inform_parent_exit(EXIT_SUCCESS)) != 0) {
649 		vrrpd_cleanup();
650 		vrrp_log(VRRP_WARNING, "vrrpd_inform_parent_exit() failed: %s",
651 		    strerror(err));
652 		return (EXIT_FAILURE);
653 	}
654 
655 	/*
656 	 * Start the loop to handle the timer and the IO events.
657 	 */
658 	switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
659 	case -1:
660 		vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
661 		    "abnormally");
662 		break;
663 	default:
664 		break;
665 	}
666 
667 	vrrpd_cleanup();
668 	return (EXIT_SUCCESS);
669 
670 child_out:
671 	(void) vrrpd_inform_parent_exit(EXIT_FAILURE);
672 	return (EXIT_FAILURE);
673 }
674 
675 static int
676 daemon_init()
677 {
678 	pid_t	pid;
679 	int	rv;
680 
681 	vrrp_log(VRRP_DBG0, "daemon_init()");
682 
683 	if (getenv("SMF_FMRI") == NULL) {
684 		vrrp_log(VRRP_ERR, "daemon_init(): vrrpd is an smf(5) managed "
685 		    "service and should not be run from the command line.");
686 		return (-1);
687 	}
688 
689 	/*
690 	 * Create the pipe used for the child process to inform the parent
691 	 * process to exit after all initialization is done.
692 	 */
693 	if (pipe(pfds) < 0) {
694 		vrrp_log(VRRP_ERR, "daemon_init(): pipe() failed: %s",
695 		    strerror(errno));
696 		return (-1);
697 	}
698 
699 	if ((pid = fork()) < 0) {
700 		vrrp_log(VRRP_ERR, "daemon_init(): fork() failed: %s",
701 		    strerror(errno));
702 		(void) close(pfds[0]);
703 		(void) close(pfds[1]);
704 		return (-1);
705 	}
706 
707 	if (pid != 0) { /* Parent */
708 		(void) close(pfds[1]);
709 
710 		/*
711 		 * Read the child process's return value from the pfds.
712 		 * If the child process exits unexpectedly, read() returns -1.
713 		 */
714 		if (read(pfds[0], &rv, sizeof (int)) != sizeof (int)) {
715 			vrrp_log(VRRP_ERR, "daemon_init(): child process "
716 			    "exited unexpectedly %s", strerror(errno));
717 			(void) kill(pid, SIGTERM);
718 			rv = EXIT_FAILURE;
719 		}
720 		(void) close(pfds[0]);
721 		exit(rv);
722 	}
723 
724 	/*
725 	 * in child process, became a daemon, and return to main() to continue.
726 	 */
727 	(void) close(pfds[0]);
728 	(void) chdir("/");
729 	(void) setsid();
730 	(void) close(0);
731 	(void) close(1);
732 	(void) close(2);
733 	(void) open("/dev/null", O_RDWR, 0);
734 	(void) dup2(0, 1);
735 	(void) dup2(0, 2);
736 	openlog("vrrpd", LOG_PID, LOG_DAEMON);
737 	vrrp_logflag = 1;
738 	return (0);
739 }
740 
741 static vrrp_err_t
742 vrrpd_init()
743 {
744 	vrrp_err_t	err = VRRP_ESYS;
745 
746 	vrrp_log(VRRP_DBG0, "vrrpd_init()");
747 
748 	TAILQ_INIT(&vrrp_vr_list);
749 	TAILQ_INIT(&vrrp_intf_list);
750 
751 	if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
752 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
753 		goto fail;
754 	}
755 
756 	if ((vrrpd_timerq = iu_tq_create()) == NULL) {
757 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
758 		goto fail;
759 	}
760 
761 	if ((vrrpd_eh = iu_eh_create()) == NULL) {
762 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
763 		goto fail;
764 	}
765 
766 	/*
767 	 * Create the AF_UNIX socket used to communicate with libvrrpadm.
768 	 *
769 	 * This socket is used to receive the administrative requests and
770 	 * send back the results.
771 	 */
772 	if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
773 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
774 		    "failed");
775 		goto fail;
776 	}
777 
778 	/*
779 	 * Create the VRRP control socket used to bring up/down the virtual
780 	 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
781 	 * the virtual IP addresses.
782 	 */
783 	if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
784 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
785 		    "failed");
786 		goto fail;
787 	}
788 
789 	/*
790 	 * Create the PF_ROUTER socket used to listen to the routing socket
791 	 * messages and build the interface/IP address list.
792 	 */
793 	if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
794 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
795 		    "failed");
796 		goto fail;
797 	}
798 
799 	/* Open the libipadm handle */
800 	if (ipadm_open(&vrrp_ipadm_handle, 0) != IPADM_SUCCESS) {
801 		vrrp_log(VRRP_ERR, "vrrpd_init(): ipadm_open() failed");
802 		goto fail;
803 	}
804 
805 	/*
806 	 * Build the list of interfaces and IP addresses. Also, start the time
807 	 * to scan the interfaces/IP addresses periodically.
808 	 */
809 	vrrpd_scan(AF_INET);
810 	vrrpd_scan(AF_INET6);
811 	if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
812 	    vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
813 		vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
814 		goto fail;
815 	}
816 
817 	/*
818 	 * Initialize the VRRP multicast address.
819 	 */
820 	bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
821 	vrrp_muladdr4.in4.sin_family = AF_INET;
822 	(void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);
823 
824 	bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
825 	vrrp_muladdr6.in6.sin6_family = AF_INET6;
826 	(void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);
827 
828 	return (VRRP_SUCCESS);
829 
830 fail:
831 	vrrpd_fini();
832 	return (err);
833 }
834 
835 static void
836 vrrpd_fini()
837 {
838 	vrrp_log(VRRP_DBG0, "vrrpd_fini()");
839 
840 	(void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
841 	vrrp_scan_timer_id = -1;
842 
843 	vrrpd_rtsock_destroy();
844 	vrrpd_ctlsock_destroy();
845 	vrrpd_cmdsock_destroy();
846 
847 	if (vrrpd_eh != NULL) {
848 		iu_eh_destroy(vrrpd_eh);
849 		vrrpd_eh = NULL;
850 	}
851 
852 	if (vrrpd_timerq != NULL) {
853 		iu_tq_destroy(vrrpd_timerq);
854 		vrrpd_timerq = NULL;
855 	}
856 
857 	vrrp_close(vrrpd_vh);
858 	vrrpd_vh = NULL;
859 	assert(TAILQ_EMPTY(&vrrp_vr_list));
860 	assert(TAILQ_EMPTY(&vrrp_intf_list));
861 
862 	ipadm_close(vrrp_ipadm_handle);
863 }
864 
865 static void
866 vrrpd_cleanup(void)
867 {
868 	vrrp_vr_t	*vr;
869 	vrrp_intf_t	*intf;
870 
871 	vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");
872 
873 	while (!TAILQ_EMPTY(&vrrp_vr_list)) {
874 		vr = TAILQ_FIRST(&vrrp_vr_list);
875 		vrrpd_delete_vr(vr);
876 	}
877 
878 	while (!TAILQ_EMPTY(&vrrp_intf_list)) {
879 		intf = TAILQ_FIRST(&vrrp_intf_list);
880 		vrrpd_delete_if(intf, _B_FALSE);
881 	}
882 
883 	vrrpd_fini();
884 	closelog();
885 	exit(1);
886 }
887 
888 /*
889  * Read the configuration file and initialize all the existing VRRP routers.
890  */
891 static void
892 vrrpd_initconf()
893 {
894 	FILE *fp;
895 	char line[LINE_MAX];
896 	int linenum = 0;
897 	vrrp_vr_conf_t conf;
898 	vrrp_err_t err;
899 
900 	vrrp_log(VRRP_DBG0, "vrrpd_initconf()");
901 
902 	if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
903 		vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
904 		    vrrpd_conffile);
905 		return;
906 	}
907 
908 	while (fgets(line, sizeof (line), fp) != NULL) {
909 		linenum++;
910 		conf.vvc_vrid = VRRP_VRID_NONE;
911 		if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
912 			vrrp_log(VRRP_ERR, "failed to parse %d line %s",
913 			    linenum, line);
914 			continue;
915 		}
916 
917 		/*
918 		 * Blank or comment line
919 		 */
920 		if (conf.vvc_vrid == VRRP_VRID_NONE)
921 			continue;
922 
923 		/*
924 		 * No need to update the configuration since the VRRP router
925 		 * created/enabled based on the existing configuration.
926 		 */
927 		if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
928 			vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
929 			    "%s", conf.vvc_name, vrrp_err2str(err));
930 			continue;
931 		}
932 
933 		if (conf.vvc_enabled &&
934 		    ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
935 		    VRRP_SUCCESS)) {
936 			vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
937 			    conf.vvc_name, vrrp_err2str(err));
938 		}
939 	}
940 
941 	(void) fclose(fp);
942 }
943 
944 /*
945  * Create the AF_UNIX socket used to communicate with libvrrpadm.
946  *
947  * This socket is used to receive the administrative request and
948  * send back the results.
949  */
950 static vrrp_err_t
951 vrrpd_cmdsock_create()
952 {
953 	iu_event_id_t		eid;
954 	struct sockaddr_un	laddr;
955 	int			sock, flags;
956 
957 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");
958 
959 	if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
960 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
961 		    "failed: %s", strerror(errno));
962 		return (VRRP_ESYS);
963 	}
964 
965 	/*
966 	 * Set it to be non-blocking.
967 	 */
968 	flags = fcntl(sock, F_GETFL, 0);
969 	(void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));
970 
971 	/*
972 	 * Unlink first in case a previous daemon instance exited ungracefully.
973 	 */
974 	(void) unlink(VRRPD_SOCKET);
975 
976 	bzero(&laddr, sizeof (laddr));
977 	laddr.sun_family = AF_UNIX;
978 	(void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
979 	if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
980 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
981 		    strerror(errno));
982 		(void) close(sock);
983 		return (VRRP_ESYS);
984 	}
985 
986 	if (listen(sock, 30) < 0) {
987 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
988 		    "failed: %s", strerror(errno));
989 		(void) close(sock);
990 		return (VRRP_ESYS);
991 	}
992 
993 	if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
994 	    vrrpd_cmdsock_handler, NULL)) == -1) {
995 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
996 		    " failed");
997 		(void) close(sock);
998 		return (VRRP_ESYS);
999 	}
1000 
1001 	vrrpd_cmdsock_fd = sock;
1002 	vrrpd_cmdsock_eid = eid;
1003 	return (VRRP_SUCCESS);
1004 }
1005 
1006 static void
1007 vrrpd_cmdsock_destroy()
1008 {
1009 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");
1010 
1011 	(void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
1012 	(void) close(vrrpd_cmdsock_fd);
1013 	vrrpd_cmdsock_fd = -1;
1014 	vrrpd_cmdsock_eid = -1;
1015 }
1016 
1017 /*
1018  * Create the PF_ROUTER sockets used to listen to the routing socket
1019  * messages and build the interface/IP address list. Create one for
1020  * each address family (IPv4 and IPv6).
1021  */
1022 static vrrp_err_t
1023 vrrpd_rtsock_create()
1024 {
1025 	int		i, flags, sock;
1026 	iu_event_id_t	eid;
1027 
1028 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");
1029 
1030 	for (i = 0; i < 2; i++) {
1031 		sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
1032 		if (sock == -1) {
1033 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
1034 			    "failed: %s", strerror(errno));
1035 			break;
1036 		}
1037 
1038 		/*
1039 		 * Set it to be non-blocking.
1040 		 */
1041 		if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
1042 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1043 			    "fcntl(F_GETFL) failed: %s", strerror(errno));
1044 			break;
1045 		}
1046 
1047 		if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
1048 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1049 			    "fcntl(F_SETFL) failed: %s", strerror(errno));
1050 			break;
1051 		}
1052 
1053 		if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
1054 		    vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
1055 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
1056 			    "rtsock %d(%s) failed", sock,
1057 			    af_str(vrrpd_rtsocks[i].vrt_af));
1058 			break;
1059 		}
1060 
1061 		vrrpd_rtsocks[i].vrt_fd = sock;
1062 		vrrpd_rtsocks[i].vrt_eid = eid;
1063 	}
1064 
1065 	if (i != 2) {
1066 		(void) close(sock);
1067 		vrrpd_rtsock_destroy();
1068 		return (VRRP_ESYS);
1069 	}
1070 
1071 	return (VRRP_SUCCESS);
1072 }
1073 
1074 static void
1075 vrrpd_rtsock_destroy()
1076 {
1077 	int		i;
1078 
1079 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
1080 	for (i = 0; i < 2; i++) {
1081 		(void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
1082 		    NULL);
1083 		(void) close(vrrpd_rtsocks[i].vrt_fd);
1084 		vrrpd_rtsocks[i].vrt_eid = -1;
1085 		vrrpd_rtsocks[i].vrt_fd = -1;
1086 	}
1087 }
1088 
1089 /*
1090  * Create the VRRP control socket used to bring up/down the virtual
1091  * IP addresses. It is also used to set the IFF_NOACCEPT flag of
1092  * the virtual IP addresses.
1093  */
1094 static vrrp_err_t
1095 vrrpd_ctlsock_create()
1096 {
1097 	int	s, s6;
1098 	int	on = _B_TRUE;
1099 
1100 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
1101 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
1102 		    "failed: %s", strerror(errno));
1103 		return (VRRP_ESYS);
1104 	}
1105 	if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1106 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1107 		    "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
1108 		(void) close(s);
1109 		return (VRRP_ESYS);
1110 	}
1111 
1112 	if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
1113 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
1114 		    "failed: %s", strerror(errno));
1115 		(void) close(s);
1116 		return (VRRP_ESYS);
1117 	}
1118 	if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1119 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1120 		    "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
1121 		(void) close(s);
1122 		(void) close(s6);
1123 		return (VRRP_ESYS);
1124 	}
1125 
1126 	vrrpd_ctlsock_fd = s;
1127 	vrrpd_ctlsock6_fd = s6;
1128 	return (VRRP_SUCCESS);
1129 }
1130 
1131 static void
1132 vrrpd_ctlsock_destroy()
1133 {
1134 	(void) close(vrrpd_ctlsock_fd);
1135 	vrrpd_ctlsock_fd = -1;
1136 	(void) close(vrrpd_ctlsock6_fd);
1137 	vrrpd_ctlsock6_fd = -1;
1138 }
1139 
1140 /*ARGSUSED*/
1141 static void
1142 vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
1143 {
1144 	vrrp_cmd_create_t	*cmd = (vrrp_cmd_create_t *)arg1;
1145 	vrrp_ret_create_t	*ret = (vrrp_ret_create_t *)arg2;
1146 	vrrp_err_t		err;
1147 
1148 	err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
1149 	if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
1150 		/*
1151 		 * No need to update the configuration since it is already
1152 		 * done in the above vrrpd_create() call
1153 		 */
1154 		err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
1155 		if (err != VRRP_SUCCESS)
1156 			(void) vrrpd_delete(cmd->vcc_conf.vvc_name);
1157 	}
1158 	ret->vrc_err = err;
1159 }
1160 
1161 /*ARGSUSED*/
1162 static void
1163 vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
1164 {
1165 	vrrp_cmd_delete_t	*cmd = (vrrp_cmd_delete_t *)arg1;
1166 	vrrp_ret_delete_t	*ret = (vrrp_ret_delete_t *)arg2;
1167 
1168 	ret->vrd_err = vrrpd_delete(cmd->vcd_name);
1169 }
1170 
1171 /*ARGSUSED*/
1172 static void
1173 vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
1174 {
1175 	vrrp_cmd_enable_t	*cmd = (vrrp_cmd_enable_t *)arg1;
1176 	vrrp_ret_enable_t	*ret = (vrrp_ret_enable_t *)arg2;
1177 
1178 	ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
1179 }
1180 
1181 /*ARGSUSED*/
1182 static void
1183 vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
1184 {
1185 	vrrp_cmd_disable_t	*cmd = (vrrp_cmd_disable_t *)arg1;
1186 	vrrp_ret_disable_t	*ret = (vrrp_ret_disable_t *)arg2;
1187 
1188 	ret->vrx_err = vrrpd_disable(cmd->vcx_name);
1189 }
1190 
1191 /*ARGSUSED*/
1192 static void
1193 vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
1194 {
1195 	vrrp_cmd_modify_t	*cmd = (vrrp_cmd_modify_t *)arg1;
1196 	vrrp_ret_modify_t	*ret = (vrrp_ret_modify_t *)arg2;
1197 
1198 	ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
1199 }
1200 
1201 static void
1202 vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
1203 {
1204 	vrrp_cmd_query_t	*cmd = (vrrp_cmd_query_t *)arg1;
1205 
1206 	vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
1207 }
1208 
1209 static void
1210 vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
1211 {
1212 	vrrp_cmd_list_t	*cmd = (vrrp_cmd_list_t *)arg1;
1213 
1214 	vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
1215 }
1216 
1217 /*
1218  * Write-type requeset must have the solaris.network.vrrp authorization.
1219  */
1220 static boolean_t
1221 vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
1222 {
1223 	ucred_t		*cred = NULL;
1224 	uid_t		uid;
1225 	struct passwd	*pw;
1226 	boolean_t	success = _B_FALSE;
1227 
1228 	vrrp_log(VRRP_DBG0, "vrrp_auth_check()");
1229 
1230 	if (!cinfo->vi_setop)
1231 		return (_B_TRUE);
1232 
1233 	/*
1234 	 * Validate the credential
1235 	 */
1236 	if (getpeerucred(connfd, &cred) == (uid_t)-1) {
1237 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
1238 		    "failed: %s", strerror(errno));
1239 		return (_B_FALSE);
1240 	}
1241 
1242 	if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
1243 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
1244 		    "failed: %s", strerror(errno));
1245 		goto done;
1246 	}
1247 
1248 	if ((pw = getpwuid(uid)) == NULL) {
1249 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
1250 		goto done;
1251 	}
1252 
1253 	success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);
1254 
1255 done:
1256 	ucred_free(cred);
1257 	return (success);
1258 }
1259 
1260 /*
1261  * Process the administrative request from libvrrpadm
1262  */
1263 /* ARGSUSED */
1264 static void
1265 vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
1266     void *arg)
1267 {
1268 	vrrp_cmd_info_t		*cinfo = NULL;
1269 	vrrp_err_t		err = VRRP_SUCCESS;
1270 	uchar_t			buf[BUFFSIZE], ackbuf[BUFFSIZE];
1271 	size_t			cursize, acksize, len;
1272 	uint32_t		cmd;
1273 	int			connfd, i;
1274 	struct sockaddr_in	from;
1275 	socklen_t		fromlen;
1276 
1277 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");
1278 
1279 	fromlen = (socklen_t)sizeof (from);
1280 	if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
1281 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
1282 		    strerror(errno));
1283 		return;
1284 	}
1285 
1286 	/*
1287 	 * First get the type of the request
1288 	 */
1289 	cursize = 0;
1290 	while (cursize < sizeof (uint32_t)) {
1291 		len = read(connfd, buf + cursize,
1292 		    sizeof (uint32_t) - cursize);
1293 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1294 			continue;
1295 		} else if (len > 0) {
1296 			cursize += len;
1297 			continue;
1298 		}
1299 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1300 		    "length");
1301 		(void) close(connfd);
1302 		return;
1303 	}
1304 
1305 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1306 	cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
1307 	for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
1308 		if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
1309 			cinfo = vrrp_cmd_info_tbl + i;
1310 			break;
1311 		}
1312 	}
1313 
1314 	if (cinfo == NULL) {
1315 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
1316 		    "type %d", cmd);
1317 		err = VRRP_EINVAL;
1318 		goto done;
1319 	}
1320 
1321 	/*
1322 	 * Get the rest of the request.
1323 	 */
1324 	assert(cursize == sizeof (uint32_t));
1325 	while (cursize < cinfo->vi_reqsize) {
1326 		len = read(connfd, buf + cursize,
1327 		    cinfo->vi_reqsize - cursize);
1328 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1329 			continue;
1330 		} else if (len > 0) {
1331 			cursize += len;
1332 			continue;
1333 		}
1334 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1335 		    "length");
1336 		err = VRRP_EINVAL;
1337 		goto done;
1338 	}
1339 
1340 	/*
1341 	 * Validate the authorization
1342 	 */
1343 	if (!vrrp_auth_check(connfd, cinfo)) {
1344 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
1345 		    "not sufficient authorization");
1346 		err = VRRP_EPERM;
1347 	}
1348 
1349 done:
1350 	/*
1351 	 * Ack the request
1352 	 */
1353 	if (err != 0) {
1354 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1355 		((vrrp_ret_t *)ackbuf)->vr_err = err;
1356 		acksize = sizeof (vrrp_ret_t);
1357 	} else {
1358 		/*
1359 		 * If the size of ack is varied, the cmdfunc callback
1360 		 * will set the right size.
1361 		 */
1362 		if ((acksize = cinfo->vi_acksize) == 0)
1363 			acksize = sizeof (ackbuf);
1364 
1365 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1366 		cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
1367 	}
1368 
1369 	/*
1370 	 * Send the ack back.
1371 	 */
1372 	cursize = 0;
1373 	while (cursize < acksize) {
1374 		len = sendto(connfd, ackbuf + cursize, acksize - cursize,
1375 		    0, (struct sockaddr *)&from, fromlen);
1376 		if (len == (size_t)-1 && errno == EAGAIN) {
1377 			continue;
1378 		} else if (len > 0) {
1379 			cursize += len;
1380 			continue;
1381 		} else {
1382 			vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
1383 			    "ack: %s", strerror(errno));
1384 			break;
1385 		}
1386 	}
1387 
1388 	(void) shutdown(connfd, SHUT_RDWR);
1389 	(void) close(connfd);
1390 }
1391 
1392 /*
1393  * Process the routing socket messages and update the interfaces/IP addresses
1394  * list
1395  */
1396 /* ARGSUSED */
1397 static void
1398 vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
1399     iu_event_id_t id, void *arg)
1400 {
1401 	char			buf[BUFFSIZE];
1402 	struct ifa_msghdr	*ifam;
1403 	int			nbytes;
1404 	int			af = *(int *)arg;
1405 	boolean_t		scanif = _B_FALSE;
1406 
1407 	for (;;) {
1408 		nbytes = read(s, buf, sizeof (buf));
1409 		if (nbytes <= 0) {
1410 			/* No more messages */
1411 			break;
1412 		}
1413 
1414 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1415 		ifam = (struct ifa_msghdr *)buf;
1416 		if (ifam->ifam_version != RTM_VERSION) {
1417 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
1418 			    "not understood", ifam->ifam_version);
1419 			break;
1420 		}
1421 
1422 		vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
1423 		    rtm_event2str(ifam->ifam_type));
1424 
1425 		switch (ifam->ifam_type) {
1426 		case RTM_FREEADDR:
1427 		case RTM_CHGADDR:
1428 		case RTM_NEWADDR:
1429 		case RTM_DELADDR:
1430 			/*
1431 			 * An IP address has been created/updated/deleted or
1432 			 * brought up/down, re-initilialize the interface/IP
1433 			 * address list.
1434 			 */
1435 			scanif = _B_TRUE;
1436 			break;
1437 		default:
1438 			/* Not interesting */
1439 			break;
1440 		}
1441 	}
1442 
1443 	if (scanif)
1444 		vrrpd_scan(af);
1445 }
1446 
1447 /*
1448  * Periodically scan the interface/IP addresses on the system.
1449  */
1450 /* ARGSUSED */
1451 static void
1452 vrrpd_scan_timer(iu_tq_t *tq, void *arg)
1453 {
1454 	vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
1455 	vrrpd_scan(AF_INET);
1456 	vrrpd_scan(AF_INET6);
1457 }
1458 
1459 /*
1460  * Get the list of the interface/IP addresses of the specified address
1461  * family.
1462  */
1463 static void
1464 vrrpd_scan(int af)
1465 {
1466 	vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));
1467 
1468 again:
1469 	vrrpd_init_ipcache(af);
1470 
1471 	/* If interface index changes, walk again. */
1472 	if (vrrpd_walk_addr_info(af) != IPADM_SUCCESS)
1473 		goto again;
1474 
1475 	vrrpd_update_ipcache(af);
1476 }
1477 
1478 /*
1479  * First mark all IP addresses of the specific address family to be removed.
1480  * This flag will then be cleared when we walk up all the IP addresses.
1481  */
1482 static void
1483 vrrpd_init_ipcache(int af)
1484 {
1485 	vrrp_intf_t	*intf, *next_intf;
1486 	vrrp_ip_t	*ip, *nextip;
1487 	char		abuf[INET6_ADDRSTRLEN];
1488 
1489 	vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));
1490 
1491 	next_intf = TAILQ_FIRST(&vrrp_intf_list);
1492 	while ((intf = next_intf) != NULL) {
1493 		next_intf = TAILQ_NEXT(intf, vvi_next);
1494 		if (intf->vvi_af != af)
1495 			continue;
1496 
1497 		/*
1498 		 * If the interface is still marked as new, it means that this
1499 		 * vrrpd_init_ipcache() call is a result of ifindex change,
1500 		 * which causes the re-walk of all the interfaces (see
1501 		 * vrrpd_add_ipaddr()), and some interfaces are still marked
1502 		 * as new during the last walk. In this case, delete this
1503 		 * interface with the "update_vr" argument to be _B_FALSE,
1504 		 * since no VRRP router has been assoicated with this
1505 		 * interface yet (the association is done in
1506 		 * vrrpd_update_ipcache()).
1507 		 *
1508 		 * This interface will be re-added later if it still exists.
1509 		 */
1510 		if (intf->vvi_state == NODE_STATE_NEW) {
1511 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
1512 			    "(%d), may be added later", intf->vvi_ifname,
1513 			    intf->vvi_ifindex);
1514 			vrrpd_delete_if(intf, _B_FALSE);
1515 			continue;
1516 		}
1517 
1518 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1519 		    ip = nextip) {
1520 			nextip = TAILQ_NEXT(ip, vip_next);
1521 			/* LINTED E_CONSTANT_CONDITION */
1522 			VRRPADDR2STR(af, &ip->vip_addr, abuf,
1523 			    INET6_ADDRSTRLEN, _B_FALSE);
1524 
1525 			if (ip->vip_state != NODE_STATE_NEW) {
1526 				vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
1527 				    "%s(%s/0x%x))", intf->vvi_ifname,
1528 				    intf->vvi_ifindex, ip->vip_lifname,
1529 				    abuf, ip->vip_flags);
1530 				ip->vip_state = NODE_STATE_STALE;
1531 				continue;
1532 			}
1533 
1534 			/*
1535 			 * If the IP is still marked as new, it means that
1536 			 * this vrrpd_init_ipcache() call is a result of
1537 			 * ifindex change, which causes the re-walk of all
1538 			 * the IP addresses (see vrrpd_add_ipaddr()).
1539 			 * Delete this IP.
1540 			 *
1541 			 * This IP will be readded later if it still exists.
1542 			 */
1543 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
1544 			    "%s/%d , %s(%s)", intf->vvi_ifname,
1545 			    intf->vvi_ifindex, ip->vip_lifname, abuf);
1546 			vrrpd_delete_ip(intf, ip);
1547 		}
1548 	}
1549 }
1550 
1551 /*
1552  * Walk all the IP addresses of the given family and update its
1553  * addresses list. Return IPADM_FAILURE if it is required to walk
1554  * all the interfaces again (one of the interface index changes in between).
1555  */
1556 static ipadm_status_t
1557 vrrpd_walk_addr_info(int af)
1558 {
1559 	ipadm_addr_info_t	*ainfo, *ainfop;
1560 	ipadm_status_t		ipstatus;
1561 	char			*lifname;
1562 	vrrp_addr_t		*addr;
1563 	int			ifindex;
1564 	uint64_t		flags;
1565 
1566 	vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s)", af_str(af));
1567 
1568 	ipstatus = ipadm_addr_info(vrrp_ipadm_handle, NULL, &ainfo, 0, 0);
1569 	if (ipstatus != IPADM_SUCCESS) {
1570 		vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1571 		    "ipadm_addr_info() failed: %s",
1572 		    af_str(af), ipadm_status2str(ipstatus));
1573 		return (IPADM_SUCCESS);
1574 	}
1575 
1576 	for (ainfop = ainfo; ainfop != NULL; ainfop = IA_NEXT(ainfop)) {
1577 		if (ainfop->ia_ifa.ifa_addr->ss_family != af)
1578 			continue;
1579 
1580 		lifname = ainfop->ia_ifa.ifa_name;
1581 		flags = ainfop->ia_ifa.ifa_flags;
1582 		addr = (vrrp_addr_t *)ainfop->ia_ifa.ifa_addr;
1583 
1584 		vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): %s",
1585 		    af_str(af), lifname);
1586 
1587 		/* Skip virtual/IPMP/P2P interfaces */
1588 		if (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) {
1589 			vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): "
1590 			    "skipped %s", af_str(af), lifname);
1591 			continue;
1592 		}
1593 
1594 		/* Filter out the all-zero IP address */
1595 		if (VRRPADDR_UNSPECIFIED(af, addr))
1596 			continue;
1597 
1598 		if ((ifindex = if_nametoindex(lifname)) == 0) {
1599 			if (errno != ENXIO && errno != ENOENT) {
1600 				vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1601 				    "if_nametoindex() failed for %s: %s",
1602 				    af_str(af), lifname, strerror(errno));
1603 			}
1604 			break;
1605 		}
1606 
1607 		/*
1608 		 * The interface is unplumbed/replumbed during the walk.  Try
1609 		 * to walk the IP addresses one more time.
1610 		 */
1611 		if (vrrpd_add_ipaddr(lifname, af, addr, ifindex, flags)
1612 		    == VRRP_EAGAIN) {
1613 			ipstatus = IPADM_FAILURE;
1614 			break;
1615 		}
1616 	}
1617 
1618 	ipadm_free_addr_info(ainfo);
1619 	return (ipstatus);
1620 }
1621 
1622 /*
1623  * Given the information of each IP address, update the interface and
1624  * IP addresses list
1625  */
1626 static vrrp_err_t
1627 vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
1628     uint64_t flags)
1629 {
1630 	char		ifname[LIFNAMSIZ], *c;
1631 	vrrp_intf_t	*intf;
1632 	vrrp_ip_t	*ip;
1633 	char		abuf[INET6_ADDRSTRLEN];
1634 	vrrp_err_t	err;
1635 
1636 	/* LINTED E_CONSTANT_CONDITION */
1637 	VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
1638 	vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
1639 	    abuf, ifindex, flags);
1640 
1641 	/*
1642 	 * Get the physical interface name from the logical interface name.
1643 	 */
1644 	(void) strlcpy(ifname, lifname, sizeof (ifname));
1645 	if ((c = strchr(ifname, ':')) != NULL)
1646 		*c = '\0';
1647 
1648 	if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
1649 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
1650 		err = vrrpd_create_if(ifname, af, ifindex, &intf);
1651 		if (err != VRRP_SUCCESS)
1652 			return (err);
1653 	} else if (intf->vvi_ifindex != ifindex) {
1654 		/*
1655 		 * If index changes, it means that this interface is
1656 		 * unplumbed/replumbed since we last checked. If this
1657 		 * interface is not used by any VRRP router, just
1658 		 * update its ifindex, and the IP addresses list will
1659 		 * be updated later. Otherwise, return EAGAIN to rewalk
1660 		 * all the IP addresses from the beginning.
1661 		 */
1662 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
1663 		    "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
1664 		if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
1665 			intf->vvi_ifindex = ifindex;
1666 		} else {
1667 			/*
1668 			 * delete this interface from the list if this
1669 			 * interface has already been assoicated with
1670 			 * any VRRP routers.
1671 			 */
1672 			vrrpd_delete_if(intf, _B_TRUE);
1673 			return (VRRP_EAGAIN);
1674 		}
1675 	}
1676 
1677 	/*
1678 	 * Does this IP address already exist?
1679 	 */
1680 	TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
1681 		if (strcmp(ip->vip_lifname, lifname) == 0)
1682 			break;
1683 	}
1684 
1685 	if (ip != NULL) {
1686 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
1687 		    lifname, abuf);
1688 		ip->vip_state = NODE_STATE_NONE;
1689 		ip->vip_flags = flags;
1690 		if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
1691 			/*
1692 			 * Address has been changed, mark it as new
1693 			 * If this address is already selected as the
1694 			 * primary IP address, the new IP will be checked
1695 			 * to see whether it is still qualified as the
1696 			 * primary IP address. If not, the primary IP
1697 			 * address will be reselected.
1698 			 */
1699 			(void) memcpy(&ip->vip_addr, addr,
1700 			    sizeof (vrrp_addr_t));
1701 
1702 			ip->vip_state = NODE_STATE_NEW;
1703 		}
1704 	} else {
1705 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
1706 		    lifname, abuf);
1707 
1708 		err = vrrpd_create_ip(intf, lifname, addr, flags);
1709 		if (err != VRRP_SUCCESS)
1710 			return (err);
1711 	}
1712 	return (VRRP_SUCCESS);
1713 }
1714 
1715 /*
1716  * Update the interface and IP addresses list. Remove the ones that have been
1717  * staled since last time we walk the IP addresses and updated the ones that
1718  * have been changed.
1719  */
1720 static void
1721 vrrpd_update_ipcache(int af)
1722 {
1723 	vrrp_intf_t	*intf, *nextif;
1724 	vrrp_ip_t	*ip, *nextip;
1725 	char		abuf[INET6_ADDRSTRLEN];
1726 	boolean_t	primary_selected;
1727 	boolean_t	primary_now_selected;
1728 	boolean_t	need_reenable = _B_FALSE;
1729 
1730 	vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));
1731 
1732 	nextif = TAILQ_FIRST(&vrrp_intf_list);
1733 	while ((intf = nextif) != NULL) {
1734 		nextif = TAILQ_NEXT(intf, vvi_next);
1735 		if (intf->vvi_af != af)
1736 			continue;
1737 
1738 		/*
1739 		 * Does the interface already select its primary IP address?
1740 		 */
1741 		primary_selected = (intf->vvi_pip != NULL);
1742 		assert(!primary_selected || IS_PRIMARY_INTF(intf));
1743 
1744 		/*
1745 		 * Removed the IP addresses that have been unconfigured.
1746 		 */
1747 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1748 		    ip = nextip) {
1749 			nextip = TAILQ_NEXT(ip, vip_next);
1750 			if (ip->vip_state != NODE_STATE_STALE)
1751 				continue;
1752 
1753 			/* LINTED E_CONSTANT_CONDITION */
1754 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1755 			    _B_FALSE);
1756 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
1757 			    "is removed over %s", abuf, intf->vvi_ifname);
1758 			vrrpd_delete_ip(intf, ip);
1759 		}
1760 
1761 		/*
1762 		 * No IP addresses left, delete this interface.
1763 		 */
1764 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1765 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1766 			    "no IP left over %s", intf->vvi_ifname);
1767 			vrrpd_delete_if(intf, _B_TRUE);
1768 			continue;
1769 		}
1770 
1771 		/*
1772 		 * If this is selected ss the physical interface for any
1773 		 * VRRP router, reselect the primary address if needed.
1774 		 */
1775 		if (IS_PRIMARY_INTF(intf)) {
1776 			vrrpd_reselect_primary(intf);
1777 			primary_now_selected = (intf->vvi_pip != NULL);
1778 
1779 			/*
1780 			 * Cannot find the new primary IP address.
1781 			 */
1782 			if (primary_selected && !primary_now_selected) {
1783 				vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
1784 				    "reselect primary IP on %s failed",
1785 				    intf->vvi_ifname);
1786 				vrrpd_remove_if(intf, _B_TRUE);
1787 			} else if (!primary_selected && primary_now_selected) {
1788 				/*
1789 				 * The primary IP address is successfully
1790 				 * selected on the physical interfacew we
1791 				 * need to walk through all the VRRP routers
1792 				 * that is created on this physical interface
1793 				 * and see whether they can now be enabled.
1794 				 */
1795 				need_reenable = _B_TRUE;
1796 			}
1797 		}
1798 
1799 		/*
1800 		 * For every new virtual IP address, bring up/down it based
1801 		 * on the state of VRRP router.
1802 		 *
1803 		 * Note that it is fine to not update the IP's vip_flags field
1804 		 * even if vrrpd_virtualip_updateone() changed the address's
1805 		 * up/down state, since the vip_flags field is only used for
1806 		 * select primary IP address over a physical interface, and
1807 		 * vrrpd_virtualip_updateone() only affects the virtual IP
1808 		 * address's status.
1809 		 */
1810 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1811 		    ip = nextip) {
1812 			nextip = TAILQ_NEXT(ip, vip_next);
1813 			/* LINTED E_CONSTANT_CONDITION */
1814 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1815 			    _B_FALSE);
1816 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1817 			    "IP %s over %s%s", abuf, intf->vvi_ifname,
1818 			    ip->vip_state == NODE_STATE_NEW ? " is new" : "");
1819 
1820 			if (IS_VIRTUAL_INTF(intf)) {
1821 				/*
1822 				 * If this IP is new, update its up/down state
1823 				 * based on the virtual interface's state
1824 				 * (which is determined by the VRRP router's
1825 				 * state). Otherwise, check only and prompt
1826 				 * warnings if its up/down state has been
1827 				 * changed.
1828 				 */
1829 				if (vrrpd_virtualip_updateone(intf, ip,
1830 				    ip->vip_state == NODE_STATE_NONE) !=
1831 				    VRRP_SUCCESS) {
1832 					vrrp_log(VRRP_DBG0,
1833 					    "vrrpd_update_ipcache(): "
1834 					    "IP %s over %s update failed", abuf,
1835 					    intf->vvi_ifname);
1836 					vrrpd_delete_ip(intf, ip);
1837 					continue;
1838 				}
1839 			}
1840 			ip->vip_state = NODE_STATE_NONE;
1841 		}
1842 
1843 		/*
1844 		 * The IP address is deleted when it is failed to be brought
1845 		 * up. If no IP addresses are left, delete this interface.
1846 		 */
1847 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1848 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1849 			    "no IP left over %s", intf->vvi_ifname);
1850 			vrrpd_delete_if(intf, _B_TRUE);
1851 			continue;
1852 		}
1853 
1854 		if (intf->vvi_state == NODE_STATE_NEW) {
1855 			/*
1856 			 * A new interface is found. This interface can be
1857 			 * the primary interface or the virtual VNIC
1858 			 * interface.  Again, we need to walk throught all
1859 			 * the VRRP routers to see whether some of them can
1860 			 * now be enabled because of the new primary IP
1861 			 * address or the new virtual IP addresses.
1862 			 */
1863 			intf->vvi_state = NODE_STATE_NONE;
1864 			need_reenable = _B_TRUE;
1865 		}
1866 	}
1867 
1868 	if (need_reenable)
1869 		vrrpd_reenable_all_vr();
1870 }
1871 
1872 /*
1873  * Reselect primary IP if:
1874  * - The existing primary IP is no longer qualified (removed or it is down or
1875  *   not a link-local IP for IPv6 VRRP router);
1876  * - This is a physical interface but no primary IP is chosen;
1877  */
1878 static void
1879 vrrpd_reselect_primary(vrrp_intf_t *intf)
1880 {
1881 	vrrp_ip_t	*ip;
1882 	char		abuf[INET6_ADDRSTRLEN];
1883 
1884 	assert(IS_PRIMARY_INTF(intf));
1885 
1886 	/*
1887 	 * If the interface's old primary IP address is still valid, return
1888 	 */
1889 	if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
1890 		return;
1891 
1892 	if (ip != NULL) {
1893 		/* LINTED E_CONSTANT_CONDITION */
1894 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1895 		    sizeof (abuf), _B_FALSE);
1896 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1897 		    "is no longer qualified", intf->vvi_ifname, abuf);
1898 	}
1899 
1900 	ip = vrrpd_select_primary(intf);
1901 	intf->vvi_pip = ip;
1902 
1903 	if (ip != NULL) {
1904 		/* LINTED E_CONSTANT_CONDITION */
1905 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1906 		    sizeof (abuf), _B_FALSE);
1907 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1908 		    "is selected", intf->vvi_ifname, abuf);
1909 	}
1910 }
1911 
1912 /*
1913  * Select the primary IP address. Since the link-local IP address is always
1914  * at the head of the IP address list, try to find the first UP IP address
1915  * and see whether it qualify.
1916  */
1917 static vrrp_ip_t *
1918 vrrpd_select_primary(vrrp_intf_t *pif)
1919 {
1920 	vrrp_ip_t	*pip;
1921 	char		abuf[INET6_ADDRSTRLEN];
1922 
1923 	vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);
1924 
1925 	TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
1926 		assert(pip->vip_state != NODE_STATE_STALE);
1927 
1928 		/* LINTED E_CONSTANT_CONDITION */
1929 		VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
1930 		    INET6_ADDRSTRLEN, _B_FALSE);
1931 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
1932 		    pif->vvi_ifname, abuf,
1933 		    (pip->vip_flags & IFF_UP) ? "up" : "down");
1934 
1935 		if (pip->vip_flags & IFF_UP)
1936 			break;
1937 	}
1938 
1939 	/*
1940 	 * Is this valid primary IP address?
1941 	 */
1942 	if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
1943 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
1944 		    pif->vvi_ifname, af_str(pif->vvi_af));
1945 		return (NULL);
1946 	}
1947 	return (pip);
1948 }
1949 
1950 /*
1951  * This is a new interface. Check whether any VRRP router is waiting for it
1952  */
1953 static void
1954 vrrpd_reenable_all_vr()
1955 {
1956 	vrrp_vr_t *vr;
1957 
1958 	vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");
1959 
1960 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1961 		if (vr->vvr_conf.vvc_enabled)
1962 			(void) vrrpd_enable_vr(vr);
1963 	}
1964 }
1965 
1966 /*
1967  * If primary_addr_gone is _B_TRUE, it means that we failed to select
1968  * the primary IP address on this (physical) interface; otherwise,
1969  * it means the interface is no longer available.
1970  */
1971 static void
1972 vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
1973 {
1974 	vrrp_vr_t *vr;
1975 
1976 	vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
1977 	    primary_addr_gone ? "primary address gone" : "interface deleted");
1978 
1979 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1980 		if (vr->vvr_conf.vvc_enabled)
1981 			vrrpd_disable_vr(vr, intf, primary_addr_gone);
1982 	}
1983 }
1984 
1985 /*
1986  * Update the VRRP configuration file based on the given configuration.
1987  * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
1988  */
1989 static vrrp_err_t
1990 vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
1991 {
1992 	vrrp_vr_conf_t	conf;
1993 	FILE		*fp, *nfp;
1994 	int		nfd;
1995 	char		line[LINE_MAX];
1996 	char		newfile[MAXPATHLEN];
1997 	boolean_t	found = _B_FALSE;
1998 	vrrp_err_t	err = VRRP_SUCCESS;
1999 
2000 	vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
2001 	    op == VRRP_CONF_UPDATE ? "update" : "delete");
2002 
2003 	if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
2004 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2005 		    vrrpd_conffile, strerror(errno));
2006 		return (VRRP_EDB);
2007 	}
2008 
2009 	(void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
2010 	if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
2011 	    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
2012 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2013 		    newfile, strerror(errno));
2014 		(void) fclose(fp);
2015 		return (VRRP_EDB);
2016 	}
2017 
2018 	if ((nfp = fdopen(nfd, "wF")) == NULL) {
2019 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
2020 		    newfile, strerror(errno));
2021 		goto done;
2022 	}
2023 
2024 	while (fgets(line, sizeof (line), fp) != NULL) {
2025 		conf.vvc_vrid = VRRP_VRID_NONE;
2026 		if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
2027 		    VRRP_SUCCESS) {
2028 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
2029 			    "configuration format: %s", line);
2030 			goto done;
2031 		}
2032 
2033 		/*
2034 		 * Write this line out if:
2035 		 * - this is a comment line; or
2036 		 * - we've done updating/deleting the the given VR; or
2037 		 * - if the name of the VR read from this line does not match
2038 		 *   the VR name that we are about to update/delete;
2039 		 */
2040 		if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
2041 		    strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
2042 			if (fputs(line, nfp) != EOF)
2043 				continue;
2044 
2045 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2046 			    "write line %s", line);
2047 			err = VRRP_EDB;
2048 			goto done;
2049 		}
2050 
2051 		/*
2052 		 * Otherwise, update/skip the line.
2053 		 */
2054 		found = _B_TRUE;
2055 		if (op == VRRP_CONF_DELETE)
2056 			continue;
2057 
2058 		assert(op == VRRP_CONF_UPDATE);
2059 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2060 		    newconf)) != VRRP_SUCCESS) {
2061 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2062 			    "update configuration for %s", newconf->vvc_name);
2063 			goto done;
2064 		}
2065 		if (fputs(line, nfp) == EOF) {
2066 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2067 			    "write line %s", line);
2068 			err = VRRP_EDB;
2069 			goto done;
2070 		}
2071 	}
2072 
2073 	/*
2074 	 * If we get to the end of the file and have not seen the router that
2075 	 * we are about to update, write it out.
2076 	 */
2077 	if (!found && op == VRRP_CONF_UPDATE) {
2078 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2079 		    newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
2080 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2081 			    "write line %s", line);
2082 			err = VRRP_EDB;
2083 		}
2084 	} else if (!found && op == VRRP_CONF_DELETE) {
2085 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
2086 		    "configuation for %s", newconf->vvc_name);
2087 		err = VRRP_ENOTFOUND;
2088 	}
2089 
2090 	if (err != VRRP_SUCCESS)
2091 		goto done;
2092 
2093 	if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
2094 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2095 		    "rename file %s", newfile);
2096 		err = VRRP_EDB;
2097 	}
2098 
2099 done:
2100 	(void) fclose(fp);
2101 	(void) fclose(nfp);
2102 	(void) unlink(newfile);
2103 	return (err);
2104 }
2105 
2106 static vrrp_err_t
2107 vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
2108 {
2109 	vrrp_prop_t	*prop;
2110 	int		n, i;
2111 
2112 	vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);
2113 
2114 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2115 		prop = &vrrp_prop_info_tbl[i];
2116 		n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
2117 		    prop->vs_propname);
2118 		if (n < 0 || n >= len)
2119 			break;
2120 		len -= n;
2121 		line += n;
2122 		n = prop->vs_propwrite(conf, line, len);
2123 		if (n < 0 || n >= len)
2124 			break;
2125 		len -= n;
2126 		line += n;
2127 	}
2128 	if (i != VRRP_PROP_INFO_TABSIZE) {
2129 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2130 		    "small", conf->vvc_name);
2131 		return (VRRP_EDB);
2132 	}
2133 	n = snprintf(line, len, "\n");
2134 	if (n < 0 || n >= len) {
2135 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2136 		    "small", conf->vvc_name);
2137 		return (VRRP_EDB);
2138 	}
2139 	return (VRRP_SUCCESS);
2140 }
2141 
2142 static vrrp_err_t
2143 vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
2144 {
2145 	char		*str, *token;
2146 	char		*next;
2147 	vrrp_err_t	err = VRRP_SUCCESS;
2148 	char		tmpbuf[MAXLINELEN];
2149 
2150 	str = tmpbuf;
2151 	(void) strlcpy(tmpbuf, line, MAXLINELEN);
2152 
2153 	/*
2154 	 * Skip leading spaces, blank lines, and comments.
2155 	 */
2156 	skip_whitespace(str);
2157 	if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
2158 		conf->vvc_vrid = VRRP_VRID_NONE;
2159 		return (VRRP_SUCCESS);
2160 	}
2161 
2162 	/*
2163 	 * Read each VR properties.
2164 	 */
2165 	for (token = strtok_r(str, " \n\t", &next); token != NULL;
2166 	    token = strtok_r(NULL, " \n\t", &next)) {
2167 		if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
2168 			break;
2169 	}
2170 
2171 	/* All properties read but no VRID defined */
2172 	if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
2173 		err = VRRP_EINVAL;
2174 
2175 	return (err);
2176 }
2177 
2178 static vrrp_err_t
2179 vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
2180 {
2181 	vrrp_prop_t	*prop;
2182 	char		*pstr;
2183 	int		i;
2184 
2185 	if ((pstr = strchr(str, '=')) == NULL) {
2186 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2187 		return (VRRP_EINVAL);
2188 	}
2189 
2190 	*pstr++ = '\0';
2191 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2192 		prop = &vrrp_prop_info_tbl[i];
2193 		if (strcasecmp(str, prop->vs_propname) == 0) {
2194 			if (prop->vs_propread(conf, pstr))
2195 				break;
2196 		}
2197 	}
2198 
2199 	if (i == VRRP_PROP_INFO_TABSIZE) {
2200 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2201 		return (VRRP_EINVAL);
2202 	}
2203 
2204 	return (VRRP_SUCCESS);
2205 }
2206 
2207 static boolean_t
2208 vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
2209 {
2210 	size_t size = sizeof (conf->vvc_name);
2211 	return (strlcpy(conf->vvc_name, str, size) < size);
2212 }
2213 
2214 static boolean_t
2215 vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
2216 {
2217 	conf->vvc_vrid = strtol(str, NULL, 0);
2218 	return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
2219 	    conf->vvc_vrid > VRRP_VRID_MAX ||
2220 	    (conf->vvc_vrid == 0 && errno != 0)));
2221 }
2222 
2223 static boolean_t
2224 vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
2225 {
2226 	if (strcasecmp(str, "AF_INET") == 0)
2227 		conf->vvc_af = AF_INET;
2228 	else if (strcasecmp(str, "AF_INET6") == 0)
2229 		conf->vvc_af = AF_INET6;
2230 	else
2231 		return (_B_FALSE);
2232 	return (_B_TRUE);
2233 }
2234 
2235 static boolean_t
2236 vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
2237 {
2238 	conf->vvc_pri = strtol(str, NULL, 0);
2239 	return (!(conf->vvc_pri < VRRP_PRI_MIN ||
2240 	    conf->vvc_pri > VRRP_PRI_OWNER ||
2241 	    (conf->vvc_pri == 0 && errno != 0)));
2242 }
2243 
2244 static boolean_t
2245 vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
2246 {
2247 	conf->vvc_adver_int = strtol(str, NULL, 0);
2248 	return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2249 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
2250 	    (conf->vvc_adver_int == 0 && errno != 0)));
2251 }
2252 
2253 static boolean_t
2254 vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
2255 {
2256 	if (strcasecmp(str, "true") == 0)
2257 		conf->vvc_preempt = _B_TRUE;
2258 	else if (strcasecmp(str, "false") == 0)
2259 		conf->vvc_preempt = _B_FALSE;
2260 	else
2261 		return (_B_FALSE);
2262 	return (_B_TRUE);
2263 }
2264 
2265 static boolean_t
2266 vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
2267 {
2268 	if (strcasecmp(str, "true") == 0)
2269 		conf->vvc_accept = _B_TRUE;
2270 	else if (strcasecmp(str, "false") == 0)
2271 		conf->vvc_accept = _B_FALSE;
2272 	else
2273 		return (_B_FALSE);
2274 	return (_B_TRUE);
2275 }
2276 
2277 static boolean_t
2278 vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
2279 {
2280 	if (strcasecmp(str, "enabled") == 0)
2281 		conf->vvc_enabled = _B_TRUE;
2282 	else if (strcasecmp(str, "disabled") == 0)
2283 		conf->vvc_enabled = _B_FALSE;
2284 	else
2285 		return (_B_FALSE);
2286 	return (_B_TRUE);
2287 }
2288 
2289 static boolean_t
2290 vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
2291 {
2292 	size_t size = sizeof (conf->vvc_link);
2293 	return (strlcpy(conf->vvc_link, str, size) < size);
2294 }
2295 
2296 static int
2297 vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
2298 {
2299 	return (snprintf(str, size, "%s", conf->vvc_name));
2300 }
2301 
2302 static int
2303 vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
2304 {
2305 	return (snprintf(str, size, "%d", conf->vvc_pri));
2306 }
2307 
2308 static int
2309 vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
2310 {
2311 	return (snprintf(str, size, "%d", conf->vvc_adver_int));
2312 }
2313 
2314 static int
2315 vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
2316 {
2317 	return (snprintf(str, size, "%s",
2318 	    conf->vvc_preempt ? "true" : "false"));
2319 }
2320 
2321 static int
2322 vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
2323 {
2324 	return (snprintf(str, size, "%s",
2325 	    conf->vvc_accept ? "true" : "false"));
2326 }
2327 
2328 static int
2329 vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
2330 {
2331 	return (snprintf(str, size, "%s",
2332 	    conf->vvc_enabled ? "enabled" : "disabled"));
2333 }
2334 
2335 static int
2336 vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
2337 {
2338 	return (snprintf(str, size, "%d", conf->vvc_vrid));
2339 }
2340 
2341 static int
2342 vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
2343 {
2344 	return (snprintf(str, size, "%s",
2345 	    conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
2346 }
2347 
2348 static int
2349 vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
2350 {
2351 	return (snprintf(str, size, "%s", conf->vvc_link));
2352 }
2353 
2354 static char *
2355 af_str(int af)
2356 {
2357 	if (af == 4 || af == AF_INET)
2358 		return ("AF_INET");
2359 	else if (af == 6 || af == AF_INET6)
2360 		return ("AF_INET6");
2361 	else if (af == AF_UNSPEC)
2362 		return ("AF_UNSPEC");
2363 	else
2364 		return ("AF_error");
2365 }
2366 
2367 static vrrp_err_t
2368 vrrpd_create_vr(vrrp_vr_conf_t *conf)
2369 {
2370 	vrrp_vr_t	*vr;
2371 
2372 	vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);
2373 
2374 	if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
2375 		vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
2376 		    " failed", conf->vvc_name);
2377 		return (VRRP_ENOMEM);
2378 	}
2379 
2380 	bzero(vr, sizeof (vrrp_vr_t));
2381 	vr->vvr_state = VRRP_STATE_NONE;
2382 	vr->vvr_timer_id = -1;
2383 	vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
2384 	(void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
2385 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2386 	TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
2387 	return (VRRP_SUCCESS);
2388 }
2389 
2390 static void
2391 vrrpd_delete_vr(vrrp_vr_t *vr)
2392 {
2393 	vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
2394 	if (vr->vvr_conf.vvc_enabled)
2395 		vrrpd_disable_vr(vr, NULL, _B_FALSE);
2396 	assert(vr->vvr_state == VRRP_STATE_INIT);
2397 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
2398 	TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
2399 	(void) free(vr);
2400 }
2401 
2402 static vrrp_err_t
2403 vrrpd_enable_vr(vrrp_vr_t *vr)
2404 {
2405 	vrrp_err_t	rx_err, tx_err, err = VRRP_EINVAL;
2406 
2407 	vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);
2408 
2409 	assert(vr->vvr_conf.vvc_enabled);
2410 
2411 	/*
2412 	 * This VRRP router has been successfully enabled and start
2413 	 * participating.
2414 	 */
2415 	if (vr->vvr_state != VRRP_STATE_INIT)
2416 		return (VRRP_SUCCESS);
2417 
2418 	if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
2419 		/*
2420 		 * Select the primary IP address. Even if this time
2421 		 * primary IP selection failed, we will reselect the
2422 		 * primary IP address when new IP address comes up.
2423 		 */
2424 		vrrpd_reselect_primary(vr->vvr_pif);
2425 		if (vr->vvr_pif->vvi_pip == NULL) {
2426 			vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
2427 			    "select_primary over %s failed",
2428 			    vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
2429 			rx_err = VRRP_ENOPRIM;
2430 		}
2431 	}
2432 
2433 	/*
2434 	 * Initialize the TX socket used for this vrrp_vr_t to send the
2435 	 * multicast packets.
2436 	 */
2437 	tx_err = vrrpd_init_txsock(vr);
2438 
2439 	/*
2440 	 * Only start the state transition if sockets for both RX and TX are
2441 	 * initialized correctly.
2442 	 */
2443 	if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
2444 		/*
2445 		 * Record the error information for diagnose purpose.
2446 		 */
2447 		vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
2448 		return (err);
2449 	}
2450 
2451 	if (vr->vvr_conf.vvc_pri == 255)
2452 		err = vrrpd_state_i2m(vr);
2453 	else
2454 		err = vrrpd_state_i2b(vr);
2455 
2456 	if (err != VRRP_SUCCESS) {
2457 		vr->vvr_err = err;
2458 		vr->vvr_pif->vvi_pip = NULL;
2459 		vrrpd_fini_txsock(vr);
2460 		vrrpd_fini_rxsock(vr);
2461 	}
2462 	return (err);
2463 }
2464 
2465 /*
2466  * Given the removed interface, see whether the given VRRP router would
2467  * be affected and stop participating the VRRP protocol.
2468  *
2469  * If intf is NULL, VR disabling request is coming from the admin.
2470  */
2471 static void
2472 vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
2473 {
2474 	vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
2475 	    intf == NULL ? "requested by admin" : intf->vvi_ifname,
2476 	    intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
2477 	    "interface deleted"));
2478 
2479 	/*
2480 	 * An interface is deleted, see whether this interface is the
2481 	 * physical interface or the VNIC of the given VRRP router.
2482 	 * If so, continue to disable the VRRP router.
2483 	 */
2484 	if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
2485 	    (intf != vr->vvr_vif)) {
2486 		return;
2487 	}
2488 
2489 	/*
2490 	 * If this is the case that the primary IP address is gone,
2491 	 * and we failed to reselect another primary IP address,
2492 	 * continue to disable the VRRP router.
2493 	 */
2494 	if (primary_addr_gone && intf != vr->vvr_pif)
2495 		return;
2496 
2497 	vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
2498 	    vr->vvr_conf.vvc_name);
2499 
2500 	if (vr->vvr_state == VRRP_STATE_MASTER) {
2501 		/*
2502 		 * If this router is disabled by the administrator, send
2503 		 * the zero-priority advertisement to indicate the Master
2504 		 * stops participating VRRP.
2505 		 */
2506 		if (intf == NULL)
2507 			(void) vrrpd_send_adv(vr, _B_TRUE);
2508 
2509 		vrrpd_state_m2i(vr);
2510 	} else  if (vr->vvr_state == VRRP_STATE_BACKUP) {
2511 		vrrpd_state_b2i(vr);
2512 	}
2513 
2514 	/*
2515 	 * If no primary IP address can be selected, the VRRP router
2516 	 * stays at the INIT state and will become BACKUP and MASTER when
2517 	 * a primary IP address is reselected.
2518 	 */
2519 	if (primary_addr_gone) {
2520 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
2521 		    "is removed", vr->vvr_conf.vvc_name);
2522 		vr->vvr_err = VRRP_ENOPRIM;
2523 	} else if (intf == NULL) {
2524 		/*
2525 		 * The VRRP router is disable by the administrator
2526 		 */
2527 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
2528 		    vr->vvr_conf.vvc_name);
2529 		vr->vvr_err = VRRP_SUCCESS;
2530 		vrrpd_fini_txsock(vr);
2531 		vrrpd_fini_rxsock(vr);
2532 	} else if (intf == vr->vvr_pif) {
2533 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
2534 		    "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2535 		vr->vvr_err = VRRP_ENOPRIM;
2536 		vrrpd_fini_rxsock(vr);
2537 	} else if (intf == vr->vvr_vif) {
2538 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
2539 		    " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2540 		vr->vvr_err = VRRP_ENOVIRT;
2541 		vrrpd_fini_txsock(vr);
2542 	}
2543 }
2544 
2545 vrrp_err_t
2546 vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
2547 {
2548 	vrrp_err_t	err = VRRP_SUCCESS;
2549 
2550 	vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
2551 	    conf->vvc_link, conf->vvc_vrid);
2552 
2553 	assert(conf != NULL);
2554 
2555 	/*
2556 	 * Sanity check
2557 	 */
2558 	if ((strlen(conf->vvc_name) == 0) ||
2559 	    (strlen(conf->vvc_link) == 0) ||
2560 	    (conf->vvc_vrid < VRRP_VRID_MIN ||
2561 	    conf->vvc_vrid > VRRP_VRID_MAX) ||
2562 	    (conf->vvc_pri < VRRP_PRI_MIN ||
2563 	    conf->vvc_pri > VRRP_PRI_OWNER) ||
2564 	    (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2565 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
2566 	    (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
2567 	    (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
2568 		vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
2569 		    conf->vvc_name);
2570 		return (VRRP_EINVAL);
2571 	}
2572 
2573 	if (!vrrp_valid_name(conf->vvc_name)) {
2574 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
2575 		    "name", conf->vvc_name);
2576 		return (VRRP_EINVALVRNAME);
2577 	}
2578 
2579 	if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
2580 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
2581 		    conf->vvc_name);
2582 		return (VRRP_EINSTEXIST);
2583 	}
2584 
2585 	if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
2586 	    conf->vvc_af) != NULL) {
2587 		vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
2588 		    "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
2589 		    conf->vvc_link);
2590 		return (VRRP_EVREXIST);
2591 	}
2592 
2593 	if (updateconf && (err = vrrpd_updateconf(conf,
2594 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2595 		vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
2596 		    "configuration for %s", conf->vvc_name);
2597 		return (err);
2598 	}
2599 
2600 	err = vrrpd_create_vr(conf);
2601 	if (err != VRRP_SUCCESS && updateconf)
2602 		(void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);
2603 
2604 	return (err);
2605 }
2606 
2607 static vrrp_err_t
2608 vrrpd_delete(const char *vn)
2609 {
2610 	vrrp_vr_t	*vr;
2611 	vrrp_err_t	err;
2612 
2613 	vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);
2614 
2615 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2616 		vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
2617 		return (VRRP_ENOTFOUND);
2618 	}
2619 
2620 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
2621 	if (err != VRRP_SUCCESS) {
2622 		vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
2623 		    "configuration for %s", vr->vvr_conf.vvc_name);
2624 		return (err);
2625 	}
2626 
2627 	vrrpd_delete_vr(vr);
2628 	return (VRRP_SUCCESS);
2629 }
2630 
2631 static vrrp_err_t
2632 vrrpd_enable(const char *vn, boolean_t updateconf)
2633 {
2634 	vrrp_vr_t		*vr;
2635 	vrrp_vr_conf_t		*conf;
2636 	uint32_t		flags;
2637 	datalink_class_t	class;
2638 	vrrp_err_t		err = VRRP_SUCCESS;
2639 
2640 	vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);
2641 
2642 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2643 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
2644 		return (VRRP_ENOTFOUND);
2645 	}
2646 
2647 	/*
2648 	 * The VR is already enabled.
2649 	 */
2650 	conf = &vr->vvr_conf;
2651 	if (conf->vvc_enabled) {
2652 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
2653 		    "enabled", vn);
2654 		return (VRRP_EALREADY);
2655 	}
2656 
2657 	/*
2658 	 * Check whether the link exists.
2659 	 */
2660 	if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
2661 	    conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
2662 	    !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
2663 	    (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR))) {
2664 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
2665 		    vn, conf->vvc_link);
2666 		return (VRRP_EINVALLINK);
2667 	}
2668 
2669 	/*
2670 	 * Get the associated VNIC name by the given interface/vrid/
2671 	 * address famitly.
2672 	 */
2673 	err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
2674 	    conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
2675 	    sizeof (vr->vvr_vnic));
2676 	if (err != VRRP_SUCCESS) {
2677 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
2678 		    "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
2679 		    conf->vvc_link);
2680 		err = VRRP_ENOVNIC;
2681 		goto fail;
2682 	}
2683 
2684 	/*
2685 	 * Find the right VNIC, primary interface and get the list of the
2686 	 * protected IP adressses and primary IP address. Note that if
2687 	 * either interface is NULL (no IP addresses configured over the
2688 	 * interface), we will still continue and mark this VRRP router
2689 	 * as "enabled".
2690 	 */
2691 	vr->vvr_conf.vvc_enabled = _B_TRUE;
2692 	if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
2693 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2694 		vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
2695 		    "configuration for %s", vr->vvr_conf.vvc_name);
2696 		goto fail;
2697 	}
2698 
2699 	/*
2700 	 * If vrrpd_setup_vr() fails, it is possible that there is no IP
2701 	 * addresses over ether the primary interface or the VNIC yet,
2702 	 * return success in this case, the VRRP router will stay in
2703 	 * the initialized state and start to work when the IP address is
2704 	 * configured.
2705 	 */
2706 	(void) vrrpd_enable_vr(vr);
2707 	return (VRRP_SUCCESS);
2708 
2709 fail:
2710 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2711 	vr->vvr_vnic[0] = '\0';
2712 	return (err);
2713 }
2714 
2715 static vrrp_err_t
2716 vrrpd_disable(const char *vn)
2717 {
2718 	vrrp_vr_t	*vr;
2719 	vrrp_err_t	err;
2720 
2721 	vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);
2722 
2723 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2724 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
2725 		return (VRRP_ENOTFOUND);
2726 	}
2727 
2728 	/*
2729 	 * The VR is already disable.
2730 	 */
2731 	if (!vr->vvr_conf.vvc_enabled) {
2732 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
2733 		return (VRRP_EALREADY);
2734 	}
2735 
2736 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2737 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2738 	if (err != VRRP_SUCCESS) {
2739 		vr->vvr_conf.vvc_enabled = _B_TRUE;
2740 		vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
2741 		    "configuration for %s", vr->vvr_conf.vvc_name);
2742 		return (err);
2743 	}
2744 
2745 	vrrpd_disable_vr(vr, NULL, _B_FALSE);
2746 	vr->vvr_vnic[0] = '\0';
2747 	return (VRRP_SUCCESS);
2748 }
2749 
2750 static vrrp_err_t
2751 vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
2752 {
2753 	vrrp_vr_t	*vr;
2754 	vrrp_vr_conf_t	savconf;
2755 	int		pri;
2756 	boolean_t	accept, set_accept = _B_FALSE;
2757 	vrrp_err_t	err;
2758 
2759 	vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);
2760 
2761 	if (mask == 0)
2762 		return (VRRP_SUCCESS);
2763 
2764 	if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
2765 		vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
2766 		    "VR instance: %s", conf->vvc_name);
2767 		return (VRRP_ENOTFOUND);
2768 	}
2769 
2770 	if (mask & VRRP_CONF_INTERVAL) {
2771 		if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2772 		    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
2773 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2774 			    "adver_interval %d", conf->vvc_name,
2775 			    conf->vvc_adver_int);
2776 			return (VRRP_EINVAL);
2777 		}
2778 	}
2779 
2780 	pri = vr->vvr_conf.vvc_pri;
2781 	if (mask & VRRP_CONF_PRIORITY) {
2782 		if (conf->vvc_pri < VRRP_PRI_MIN ||
2783 		    conf->vvc_pri > VRRP_PRI_OWNER) {
2784 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2785 			    "priority %d", conf->vvc_name, conf->vvc_pri);
2786 			return (VRRP_EINVAL);
2787 		}
2788 		pri = conf->vvc_pri;
2789 	}
2790 
2791 	accept = vr->vvr_conf.vvc_accept;
2792 	if (mask & VRRP_CONF_ACCEPT)
2793 		accept = conf->vvc_accept;
2794 
2795 	if (pri == VRRP_PRI_OWNER && !accept) {
2796 		vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
2797 		    "true for VRRP address owner", conf->vvc_name);
2798 		return (VRRP_EINVAL);
2799 	}
2800 
2801 	if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
2802 		err = vrrpd_set_noaccept(vr, !accept);
2803 		if (err != VRRP_SUCCESS) {
2804 			vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
2805 			    "updating failed: %s", conf->vvc_name,
2806 			    vrrp_err2str(err));
2807 			return (err);
2808 		}
2809 		set_accept = _B_TRUE;
2810 	}
2811 
2812 	/*
2813 	 * Save the current configuration, so it can be restored if the
2814 	 * following fails.
2815 	 */
2816 	(void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
2817 	if (mask & VRRP_CONF_PREEMPT)
2818 		vr->vvr_conf.vvc_preempt = conf->vvc_preempt;
2819 
2820 	if (mask & VRRP_CONF_ACCEPT)
2821 		vr->vvr_conf.vvc_accept = accept;
2822 
2823 	if (mask & VRRP_CONF_PRIORITY)
2824 		vr->vvr_conf.vvc_pri = pri;
2825 
2826 	if (mask & VRRP_CONF_INTERVAL)
2827 		vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;
2828 
2829 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2830 	if (err != VRRP_SUCCESS) {
2831 		vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
2832 		    "failed: %s", conf->vvc_name, vrrp_err2str(err));
2833 		if (set_accept)
2834 			(void) vrrpd_set_noaccept(vr, accept);
2835 		(void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
2836 		return (err);
2837 	}
2838 
2839 	if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
2840 		vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
2841 
2842 	if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
2843 		vr->vvr_timeout = conf->vvc_adver_int;
2844 
2845 	return (VRRP_SUCCESS);
2846 }
2847 
2848 static void
2849 vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
2850     size_t *sizep)
2851 {
2852 	vrrp_vr_t	*vr;
2853 	char		*p = (char *)ret + sizeof (vrrp_ret_list_t);
2854 	size_t		size = (*sizep) - sizeof (vrrp_ret_list_t);
2855 
2856 	vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));
2857 
2858 	ret->vrl_cnt = 0;
2859 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
2860 		if (vrid !=  VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
2861 			continue;
2862 
2863 		if (strlen(ifname) != 0 && strcmp(ifname,
2864 		    vr->vvr_conf.vvc_link) == 0) {
2865 			continue;
2866 		}
2867 
2868 		if ((af == AF_INET || af == AF_INET6) &&
2869 		    vr->vvr_conf.vvc_af != af)
2870 			continue;
2871 
2872 		if (size < VRRP_NAME_MAX) {
2873 			vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
2874 			    "small to hold %d router names", ret->vrl_cnt);
2875 			*sizep = sizeof (vrrp_ret_list_t);
2876 			ret->vrl_err = VRRP_ETOOSMALL;
2877 			return;
2878 		}
2879 		(void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
2880 		p += (strlen(vr->vvr_conf.vvc_name) + 1);
2881 		ret->vrl_cnt++;
2882 		size -= VRRP_NAME_MAX;
2883 	}
2884 
2885 	*sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
2886 	vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
2887 	ret->vrl_err = VRRP_SUCCESS;
2888 }
2889 
2890 static void
2891 vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
2892 {
2893 	vrrp_queryinfo_t	*infop;
2894 	vrrp_vr_t		*vr;
2895 	vrrp_intf_t		*vif;
2896 	vrrp_ip_t		*ip;
2897 	struct timeval		now;
2898 	uint32_t		vipcnt = 0;
2899 	size_t			size = *sizep;
2900 
2901 	vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);
2902 
2903 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2904 		vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
2905 		*sizep = sizeof (vrrp_ret_query_t);
2906 		ret->vrq_err = VRRP_ENOTFOUND;
2907 		return;
2908 	}
2909 
2910 	/*
2911 	 * Get the virtual IP list if the router is not in the INIT state.
2912 	 */
2913 	if (vr->vvr_state != VRRP_STATE_INIT) {
2914 		vif = vr->vvr_vif;
2915 		TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2916 			vipcnt++;
2917 		}
2918 	}
2919 
2920 	*sizep = sizeof (vrrp_ret_query_t);
2921 	*sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
2922 	if (*sizep > size) {
2923 		vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
2924 		    "%d virtual IPs", vipcnt);
2925 		*sizep = sizeof (vrrp_ret_query_t);
2926 		ret->vrq_err = VRRP_ETOOSMALL;
2927 		return;
2928 	}
2929 
2930 	(void) gettimeofday(&now, NULL);
2931 
2932 	bzero(ret, *sizep);
2933 	infop = &ret->vrq_qinfo;
2934 	(void) memcpy(&infop->show_vi,
2935 	    &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
2936 	(void) memcpy(&infop->show_vs,
2937 	    &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
2938 	(void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
2939 	infop->show_vt.vt_since_last_tran = timeval_to_milli(
2940 	    timeval_delta(now, vr->vvr_sinfo.vs_st_time));
2941 
2942 	if (vr->vvr_state == VRRP_STATE_INIT) {
2943 		ret->vrq_err = VRRP_SUCCESS;
2944 		return;
2945 	}
2946 
2947 	vipcnt = 0;
2948 	TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2949 		(void) memcpy(&infop->show_va.va_vips[vipcnt++],
2950 		    &ip->vip_addr, sizeof (vrrp_addr_t));
2951 	}
2952 	infop->show_va.va_vipcnt = vipcnt;
2953 
2954 	(void) memcpy(&infop->show_va.va_primary,
2955 	    &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));
2956 
2957 	(void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));
2958 
2959 	/*
2960 	 * Check whether there is a peer.
2961 	 */
2962 	if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
2963 	    &(vr->vvr_peer.vp_addr))) {
2964 		infop->show_vt.vt_since_last_adv = timeval_to_milli(
2965 		    timeval_delta(now, vr->vvr_peer.vp_time));
2966 	}
2967 
2968 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
2969 		infop->show_vt.vt_master_down_intv =
2970 		    MASTER_DOWN_INTERVAL_VR(vr);
2971 	}
2972 
2973 	ret->vrq_err = VRRP_SUCCESS;
2974 }
2975 
2976 /*
2977  * Build the VRRP packet (not including the IP header). Return the
2978  * payload length.
2979  *
2980  * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
2981  * advertisement which is sent by the Master to indicate that it has been
2982  * stopped participating in VRRP.
2983  */
2984 static size_t
2985 vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
2986 {
2987 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2988 	vrrp_pkt_t	*vp = (vrrp_pkt_t *)buf;
2989 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2990 	struct in_addr	*a4 = (struct in_addr *)(vp + 1);
2991 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2992 	struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
2993 	vrrp_intf_t	*vif = vr->vvr_vif;
2994 	vrrp_ip_t	*vip;
2995 	int		af = vif->vvi_af;
2996 	size_t		size = sizeof (vrrp_pkt_t);
2997 	uint16_t	rsvd_adver_int;
2998 	int		nip = 0;
2999 
3000 	vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
3001 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
3002 	    vr->vvr_conf.vvc_adver_int);
3003 
3004 	TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
3005 		if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
3006 		    sizeof (struct in6_addr))) > buflen) {
3007 			vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
3008 			    "not big enough %d", vr->vvr_conf.vvc_name, size);
3009 			return (0);
3010 		}
3011 
3012 		if (af == AF_INET)
3013 			a4[nip++] = vip->vip_addr.in4.sin_addr;
3014 		else
3015 			a6[nip++] = vip->vip_addr.in6.sin6_addr;
3016 	}
3017 
3018 	if (nip == 0) {
3019 		vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
3020 		    "address", vr->vvr_conf.vvc_name);
3021 		return (0);
3022 	}
3023 
3024 	vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
3025 	vp->vp_vrid = vr->vvr_conf.vvc_vrid;
3026 	vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;
3027 
3028 	rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
3029 	vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
3030 	vp->vp_ipnum = nip;
3031 
3032 	/*
3033 	 * Set the checksum to 0 first, then caculate it.
3034 	 */
3035 	vp->vp_chksum = 0;
3036 	if (af == AF_INET) {
3037 		vp->vp_chksum = vrrp_cksum4(
3038 		    &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
3039 		    &vrrp_muladdr4.in4.sin_addr, size, vp);
3040 	} else {
3041 		vp->vp_chksum = vrrp_cksum6(
3042 		    &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3043 		    &vrrp_muladdr6.in6.sin6_addr, size, vp);
3044 	}
3045 
3046 	return (size);
3047 }
3048 
3049 /*
3050  * We need to build the IPv4 header on our own.
3051  */
3052 static vrrp_err_t
3053 vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3054 {
3055 	/* LINTED E_BAD_PTR_CAST_ALIGN */
3056 	struct ip *ip = (struct ip *)buf;
3057 	size_t plen;
3058 
3059 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);
3060 
3061 	if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
3062 	    len - sizeof (struct ip), zero_pri)) == 0) {
3063 		return (VRRP_ETOOSMALL);
3064 	}
3065 
3066 	ip->ip_hl = sizeof (struct ip) >> 2;
3067 	ip->ip_v = IPV4_VERSION;
3068 	ip->ip_tos = 0;
3069 	plen += sizeof (struct ip);
3070 	ip->ip_len = htons(plen);
3071 	ip->ip_off = 0;
3072 	ip->ip_ttl = VRRP_IP_TTL;
3073 	ip->ip_p = IPPROTO_VRRP;
3074 	ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
3075 	ip->ip_dst = vrrp_muladdr4.in4.sin_addr;
3076 
3077 	/*
3078 	 * The kernel will set the IP cksum and the IPv4 identification.
3079 	 */
3080 	ip->ip_id = 0;
3081 	ip->ip_sum = 0;
3082 
3083 	if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
3084 	    (const struct sockaddr *)&vrrp_muladdr4,
3085 	    sizeof (struct sockaddr_in))) != plen) {
3086 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
3087 		    "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
3088 		    vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
3089 		    af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
3090 		return (VRRP_ESYS);
3091 	}
3092 
3093 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
3094 	    vr->vvr_conf.vvc_name);
3095 	return (VRRP_SUCCESS);
3096 }
3097 
3098 static vrrp_err_t
3099 vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3100 {
3101 	struct msghdr msg6;
3102 	size_t hoplimit_space = 0;
3103 	size_t pktinfo_space = 0;
3104 	size_t bufspace = 0;
3105 	struct in6_pktinfo *pktinfop;
3106 	struct cmsghdr *cmsgp;
3107 	uchar_t *cmsg_datap;
3108 	struct iovec iov;
3109 	size_t plen;
3110 
3111 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);
3112 
3113 	if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
3114 		return (VRRP_ETOOSMALL);
3115 
3116 	msg6.msg_control = NULL;
3117 	msg6.msg_controllen = 0;
3118 
3119 	hoplimit_space = sizeof (int);
3120 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3121 	    hoplimit_space + _MAX_ALIGNMENT;
3122 
3123 	pktinfo_space = sizeof (struct in6_pktinfo);
3124 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3125 	    pktinfo_space + _MAX_ALIGNMENT;
3126 
3127 	/*
3128 	 * We need to temporarily set the msg6.msg_controllen to bufspace
3129 	 * (we will later trim it to actual length used). This is needed because
3130 	 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
3131 	 */
3132 	bufspace += sizeof (struct cmsghdr);
3133 	msg6.msg_controllen = bufspace;
3134 
3135 	msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
3136 	if (msg6.msg_control == NULL) {
3137 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
3138 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3139 		return (VRRP_ENOMEM);
3140 	}
3141 
3142 	cmsgp = CMSG_FIRSTHDR(&msg6);
3143 
3144 	cmsgp->cmsg_level = IPPROTO_IPV6;
3145 	cmsgp->cmsg_type = IPV6_HOPLIMIT;
3146 	cmsg_datap = CMSG_DATA(cmsgp);
3147 	/* LINTED */
3148 	*(int *)cmsg_datap = VRRP_IP_TTL;
3149 	cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
3150 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3151 
3152 	cmsgp->cmsg_level = IPPROTO_IPV6;
3153 	cmsgp->cmsg_type = IPV6_PKTINFO;
3154 	cmsg_datap = CMSG_DATA(cmsgp);
3155 
3156 	/* LINTED */
3157 	pktinfop = (struct in6_pktinfo *)cmsg_datap;
3158 	/*
3159 	 * We don't know if pktinfop->ipi6_addr is aligned properly,
3160 	 * therefore let's use bcopy, instead of assignment.
3161 	 */
3162 	(void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3163 	    &pktinfop->ipi6_addr, sizeof (struct in6_addr));
3164 
3165 	/*
3166 	 *  We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
3167 	 */
3168 	pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
3169 	cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
3170 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3171 	msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;
3172 
3173 	msg6.msg_name = &vrrp_muladdr6;
3174 	msg6.msg_namelen = sizeof (struct sockaddr_in6);
3175 
3176 	iov.iov_base = buf;
3177 	iov.iov_len = plen;
3178 	msg6.msg_iov = &iov;
3179 	msg6.msg_iovlen = 1;
3180 
3181 	if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
3182 	    (const struct msghdr *)&msg6, 0)) != plen) {
3183 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
3184 		    "%s expect %d sent %d", vr->vvr_conf.vvc_name,
3185 		    strerror(errno), plen, len);
3186 		(void) free(msg6.msg_control);
3187 		return (VRRP_ESYS);
3188 	}
3189 
3190 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
3191 	    vr->vvr_conf.vvc_name);
3192 	(void) free(msg6.msg_control);
3193 	return (VRRP_SUCCESS);
3194 }
3195 
3196 /*
3197  * Send the VRRP advertisement packets.
3198  */
3199 static vrrp_err_t
3200 vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
3201 {
3202 	uint64_t buf[(IP_MAXPACKET + 1)/8];
3203 
3204 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
3205 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");
3206 
3207 	assert(vr->vvr_pif->vvi_pip != NULL);
3208 
3209 	if (vr->vvr_pif->vvi_pip == NULL) {
3210 		vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
3211 		    "address", vr->vvr_conf.vvc_name);
3212 		return (VRRP_EINVAL);
3213 	}
3214 
3215 	if (vr->vvr_conf.vvc_af == AF_INET) {
3216 		return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
3217 		    sizeof (buf), zero_pri));
3218 	} else {
3219 		return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
3220 		    sizeof (buf), zero_pri));
3221 	}
3222 }
3223 
3224 static void
3225 vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
3226 {
3227 	vrrp_vr_conf_t *conf = &vr->vvr_conf;
3228 	char		peer[INET6_ADDRSTRLEN];
3229 	char		local[INET6_ADDRSTRLEN];
3230 	int		addr_cmp;
3231 	uint16_t	peer_adver_int;
3232 
3233 	/* LINTED E_CONSTANT_CONDITION */
3234 	VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
3235 	    _B_FALSE);
3236 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
3237 	    peer);
3238 
3239 	if (vr->vvr_state <= VRRP_STATE_INIT) {
3240 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
3241 		    "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
3242 		return;
3243 	}
3244 
3245 	peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);
3246 
3247 	/* LINTED E_CONSTANT_CONDITION */
3248 	VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
3249 	    local, INET6_ADDRSTRLEN, _B_FALSE);
3250 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
3251 	    "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
3252 	    vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
3253 	    vp->vp_prio, peer_adver_int);
3254 
3255 	addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
3256 	    &vr->vvr_pif->vvi_pip->vip_addr);
3257 	if (addr_cmp == 0) {
3258 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
3259 		    conf->vvc_name);
3260 		return;
3261 	} else if (conf->vvc_pri == vp->vp_prio) {
3262 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
3263 		    " than the local IP %s", conf->vvc_name, peer,
3264 		    addr_cmp > 0 ? "greater" : "less", local);
3265 	}
3266 
3267 	if (conf->vvc_pri == 255) {
3268 		vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
3269 		    "owner received advertisement from %s", conf->vvc_name,
3270 		    peer);
3271 		return;
3272 	}
3273 
3274 	(void) gettimeofday(&vr->vvr_peer_time, NULL);
3275 	(void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
3276 	vr->vvr_peer_prio = vp->vp_prio;
3277 	vr->vvr_peer_adver_int = peer_adver_int;
3278 
3279 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
3280 		vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
3281 		if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
3282 		    (conf->vvc_preempt == _B_FALSE ||
3283 		    vp->vp_prio >= conf->vvc_pri)) {
3284 			(void) iu_cancel_timer(vrrpd_timerq,
3285 			    vr->vvr_timer_id, NULL);
3286 			if (vp->vp_prio == VRRP_PRIO_ZERO) {
3287 				/* the master stops participating in VRRP */
3288 				vr->vvr_timeout = SKEW_TIME_VR(vr);
3289 			} else {
3290 				vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
3291 			}
3292 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3293 			    vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
3294 			    vr)) == -1) {
3295 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3296 				    "start vrrp_b2m_timeout(%d) failed",
3297 				    conf->vvc_name, vr->vvr_timeout);
3298 			} else {
3299 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3300 				    "start vrrp_b2m_timeout(%d)",
3301 				    conf->vvc_name, vr->vvr_timeout);
3302 			}
3303 		}
3304 	} else if (vr->vvr_state == VRRP_STATE_MASTER) {
3305 		if (vp->vp_prio == VRRP_PRIO_ZERO) {
3306 			(void) vrrpd_send_adv(vr, _B_FALSE);
3307 			(void) iu_cancel_timer(vrrpd_timerq,
3308 			    vr->vvr_timer_id, NULL);
3309 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3310 			    vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
3311 			    vr)) == -1) {
3312 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3313 				    "start vrrp_adv_timeout(%d) failed",
3314 				    conf->vvc_name, vr->vvr_timeout);
3315 			} else {
3316 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3317 				    "start vrrp_adv_timeout(%d)",
3318 				    conf->vvc_name, vr->vvr_timeout);
3319 			}
3320 		} else if (vp->vp_prio > conf->vvc_pri ||
3321 		    (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
3322 			(void) vrrpd_state_m2b(vr);
3323 		}
3324 	} else {
3325 		assert(_B_FALSE);
3326 	}
3327 }
3328 
3329 static vrrp_err_t
3330 vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
3331     vrrp_addr_t *from)
3332 {
3333 	vrrp_vr_t	*vr;
3334 	uint8_t		vers_type;
3335 	uint16_t	saved_cksum, cksum;
3336 	char		peer[INET6_ADDRSTRLEN];
3337 
3338 	/* LINTED E_CONSTANT_CONDITION */
3339 	VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
3340 	vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
3341 	    peer);
3342 
3343 	if (len < sizeof (vrrp_pkt_t)) {
3344 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
3345 		    "length %d", len);
3346 		return (VRRP_EINVAL);
3347 	}
3348 
3349 	/*
3350 	 * Verify: VRRP version number and packet type.
3351 	 */
3352 	vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
3353 	if (vers_type != VRRP_VERSION) {
3354 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
3355 		    "version %d", pif->vvi_ifname, vers_type);
3356 		return (VRRP_EINVAL);
3357 	}
3358 
3359 	if (vp->vp_ipnum == 0) {
3360 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
3361 		    pif->vvi_ifname);
3362 		return (VRRP_EINVAL);
3363 	}
3364 
3365 	if (len - sizeof (vrrp_pkt_t) !=
3366 	    vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
3367 	    sizeof (struct in6_addr))) {
3368 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
3369 		    " %d", pif->vvi_ifname, vp->vp_ipnum);
3370 		return (VRRP_EINVAL);
3371 	}
3372 
3373 	vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);
3374 
3375 	/*
3376 	 * verify: VRRP checksum. Note that vrrp_cksum returns network byte
3377 	 * order checksum value;
3378 	 */
3379 	saved_cksum = vp->vp_chksum;
3380 	vp->vp_chksum = 0;
3381 	if (pif->vvi_af == AF_INET) {
3382 		cksum = vrrp_cksum4(&from->in4.sin_addr,
3383 		    &vrrp_muladdr4.in4.sin_addr, len, vp);
3384 	} else {
3385 		cksum = vrrp_cksum6(&from->in6.sin6_addr,
3386 		    &vrrp_muladdr6.in6.sin6_addr, len, vp);
3387 	}
3388 
3389 	if (cksum != saved_cksum) {
3390 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
3391 		    "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
3392 		    cksum, saved_cksum);
3393 		return (VRRP_EINVAL);
3394 	}
3395 
3396 	if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
3397 	    pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
3398 		vrrpd_process_adv(vr, from, vp);
3399 	} else {
3400 		vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
3401 		    "not configured", pif->vvi_ifname, vp->vp_vrid,
3402 		    af_str(pif->vvi_af));
3403 	}
3404 	return (VRRP_SUCCESS);
3405 }
3406 
3407 /*
3408  * IPv4 socket, the IPv4 header is included.
3409  */
3410 static vrrp_err_t
3411 vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3412 {
3413 	char		abuf[INET6_ADDRSTRLEN];
3414 	struct ip	*ip;
3415 
3416 	vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
3417 	    pif->vvi_ifname, len);
3418 
3419 	ip = (struct ip *)msgp->msg_iov->iov_base;
3420 
3421 	/* Sanity check */
3422 	if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
3423 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
3424 		    "%d", pif->vvi_ifname, len);
3425 		return (VRRP_EINVAL);
3426 	}
3427 
3428 	assert(ip->ip_v == IPV4_VERSION);
3429 	assert(ip->ip_p == IPPROTO_VRRP);
3430 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in));
3431 
3432 	if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
3433 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3434 		    "destination %s", pif->vvi_ifname,
3435 		    inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
3436 		return (VRRP_EINVAL);
3437 	}
3438 
3439 	if (ip->ip_ttl != VRRP_IP_TTL) {
3440 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3441 		    "ttl %d", pif->vvi_ifname, ip->ip_ttl);
3442 		return (VRRP_EINVAL);
3443 	}
3444 
3445 	/*
3446 	 * Note that the ip_len contains only the IP payload length.
3447 	 */
3448 	return (vrrpd_process_vrrp(pif,
3449 	    /* LINTED E_BAD_PTR_CAST_ALIGN */
3450 	    (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
3451 	    (vrrp_addr_t *)msgp->msg_name));
3452 }
3453 
3454 /*
3455  * IPv6 socket, check the ancillary_data.
3456  */
3457 static vrrp_err_t
3458 vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3459 {
3460 	struct cmsghdr		*cmsgp;
3461 	uchar_t			*cmsg_datap;
3462 	struct in6_pktinfo	*pktinfop;
3463 	char			abuf[INET6_ADDRSTRLEN];
3464 	int			ttl;
3465 
3466 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
3467 	    pif->vvi_ifname, len);
3468 
3469 	/* Sanity check */
3470 	if (len < sizeof (vrrp_pkt_t)) {
3471 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
3472 		    "%d", pif->vvi_ifname, len);
3473 		return (VRRP_EINVAL);
3474 	}
3475 
3476 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));
3477 
3478 	for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
3479 	    cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
3480 		assert(cmsgp->cmsg_level == IPPROTO_IPV6);
3481 		cmsg_datap = CMSG_DATA(cmsgp);
3482 
3483 		switch (cmsgp->cmsg_type) {
3484 		case IPV6_HOPLIMIT:
3485 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3486 			if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
3487 				break;
3488 
3489 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3490 			    "ttl %d", pif->vvi_ifname, ttl);
3491 			return (VRRP_EINVAL);
3492 		case IPV6_PKTINFO:
3493 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3494 			pktinfop = (struct in6_pktinfo *)cmsg_datap;
3495 			if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
3496 			    &vrrp_muladdr6.in6.sin6_addr)) {
3497 				break;
3498 			}
3499 
3500 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3501 			    "destination %s", pif->vvi_ifname,
3502 			    inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
3503 			    sizeof (abuf)));
3504 			return (VRRP_EINVAL);
3505 		}
3506 	}
3507 
3508 	return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
3509 	    msgp->msg_name));
3510 }
3511 
3512 /* ARGSUSED */
3513 static void
3514 vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
3515     void *arg)
3516 {
3517 	struct msghdr		msg;
3518 	vrrp_addr_t		from;
3519 	uint64_t		buf[(IP_MAXPACKET + 1)/8];
3520 	uint64_t		ancillary_data[(IP_MAXPACKET + 1)/8];
3521 	vrrp_intf_t		*pif = arg;
3522 	int			af = pif->vvi_af;
3523 	int			len;
3524 	struct iovec		iov;
3525 
3526 	vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);
3527 
3528 	msg.msg_name = (struct sockaddr *)&from;
3529 	msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
3530 	    sizeof (struct sockaddr_in6);
3531 	iov.iov_base = (char *)buf;
3532 	iov.iov_len = sizeof (buf);
3533 	msg.msg_iov = &iov;
3534 	msg.msg_iovlen = 1;
3535 	msg.msg_control = ancillary_data;
3536 	msg.msg_controllen = sizeof (ancillary_data);
3537 
3538 	if ((len = recvmsg(s, &msg, 0)) == -1) {
3539 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
3540 		    "failed: %s", pif->vvi_ifname, strerror(errno));
3541 		return;
3542 	}
3543 
3544 	/*
3545 	 * Ignore packets whose control buffers that don't fit
3546 	 */
3547 	if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
3548 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
3549 		    "big enough", pif->vvi_ifname);
3550 		return;
3551 	}
3552 
3553 	if (af == AF_INET)
3554 		(void) vrrpd_process_adv_v4(pif, &msg, len);
3555 	else
3556 		(void) vrrpd_process_adv_v6(pif, &msg, len);
3557 }
3558 
3559 /*
3560  * Create the socket which is used to receive VRRP packets. Virtual routers
3561  * that configured on the same physical interface share the same socket.
3562  */
3563 static vrrp_err_t
3564 vrrpd_init_rxsock(vrrp_vr_t *vr)
3565 {
3566 	vrrp_intf_t *pif;	/* Physical interface used to recv packets */
3567 	struct group_req greq;
3568 	struct sockaddr_storage *muladdr;
3569 	int af, proto;
3570 	int on = 1;
3571 	vrrp_err_t err = VRRP_SUCCESS;
3572 
3573 	vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);
3574 
3575 	/*
3576 	 * The RX sockets may already been initialized.
3577 	 */
3578 	if ((pif = vr->vvr_pif) != NULL) {
3579 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
3580 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3581 		assert(pif->vvi_sockfd != -1);
3582 		return (VRRP_SUCCESS);
3583 	}
3584 
3585 	/*
3586 	 * If no IP addresses configured on the primary interface,
3587 	 * return failure.
3588 	 */
3589 	af = vr->vvr_conf.vvc_af;
3590 	pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
3591 	if (pif == NULL) {
3592 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
3593 		    "over %s/%s", vr->vvr_conf.vvc_name,
3594 		    vr->vvr_conf.vvc_link, af_str(af));
3595 		return (VRRP_ENOPRIM);
3596 	}
3597 
3598 	proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
3599 	if (pif->vvi_nvr++ == 0) {
3600 		assert(pif->vvi_sockfd < 0);
3601 		pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
3602 		if (pif->vvi_sockfd < 0) {
3603 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
3604 			    "failed %s", vr->vvr_conf.vvc_name,
3605 			    strerror(errno));
3606 			err = VRRP_ESYS;
3607 			goto done;
3608 		}
3609 
3610 		/*
3611 		 * Join the multicast group to receive VRRP packets.
3612 		 */
3613 		if (af == AF_INET) {
3614 			muladdr = (struct sockaddr_storage *)
3615 			    (void *)&vrrp_muladdr4;
3616 		} else {
3617 			muladdr = (struct sockaddr_storage *)
3618 			    (void *)&vrrp_muladdr6;
3619 		}
3620 
3621 		greq.gr_interface = pif->vvi_ifindex;
3622 		(void) memcpy(&greq.gr_group, muladdr,
3623 		    sizeof (struct sockaddr_storage));
3624 		if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
3625 		    sizeof (struct group_req)) < 0) {
3626 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3627 			    "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
3628 			    pif->vvi_ifindex, strerror(errno));
3629 			err = VRRP_ESYS;
3630 			goto done;
3631 		} else {
3632 			vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
3633 			    "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
3634 			    pif->vvi_ifindex);
3635 		}
3636 
3637 		/*
3638 		 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
3639 		 * when a packet is received. Call setsockopt() to receive such
3640 		 * information.
3641 		 */
3642 		if (af == AF_INET6) {
3643 			/*
3644 			 * Enable receipt of destination address info
3645 			 */
3646 			if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
3647 			    (char *)&on, sizeof (on)) < 0) {
3648 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3649 				    "enable recvpktinfo failed: %s",
3650 				    vr->vvr_conf.vvc_name, strerror(errno));
3651 				err = VRRP_ESYS;
3652 				goto done;
3653 			}
3654 
3655 			/*
3656 			 * Enable receipt of hoplimit info
3657 			 */
3658 			if (setsockopt(pif->vvi_sockfd, proto,
3659 			    IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
3660 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3661 				    "enable recvhoplimit failed: %s",
3662 				    vr->vvr_conf.vvc_name, strerror(errno));
3663 				err = VRRP_ESYS;
3664 				goto done;
3665 			}
3666 		}
3667 
3668 		if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
3669 		    pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
3670 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3671 			    "iu_register_event() failed",
3672 			    vr->vvr_conf.vvc_name);
3673 			err = VRRP_ESYS;
3674 			goto done;
3675 		}
3676 	} else {
3677 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
3678 		    "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
3679 		    pif->vvi_nvr);
3680 		assert(IS_PRIMARY_INTF(pif));
3681 	}
3682 
3683 done:
3684 	vr->vvr_pif = pif;
3685 	if (err != VRRP_SUCCESS)
3686 		vrrpd_fini_rxsock(vr);
3687 
3688 	return (err);
3689 }
3690 
3691 /*
3692  * Delete the socket which is used to receive VRRP packets for the given
3693  * VRRP router. Since all virtual routers that configured on the same
3694  * physical interface share the same socket, the socket is only closed
3695  * when the last VRRP router share this socket is deleted.
3696  */
3697 static void
3698 vrrpd_fini_rxsock(vrrp_vr_t *vr)
3699 {
3700 	vrrp_intf_t	*pif = vr->vvr_pif;
3701 
3702 	vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);
3703 
3704 	if (pif == NULL)
3705 		return;
3706 
3707 	if (--pif->vvi_nvr == 0) {
3708 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
3709 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3710 		(void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
3711 		(void) close(pif->vvi_sockfd);
3712 		pif->vvi_pip = NULL;
3713 		pif->vvi_sockfd = -1;
3714 		pif->vvi_eid = -1;
3715 	} else {
3716 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
3717 		    vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
3718 	}
3719 	vr->vvr_pif = NULL;
3720 }
3721 
3722 /*
3723  * Create the socket which is used to send VRRP packets. Further, set
3724  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3725  */
3726 static vrrp_err_t
3727 vrrpd_init_txsock(vrrp_vr_t *vr)
3728 {
3729 	int		af;
3730 	vrrp_intf_t	*vif;
3731 	vrrp_err_t	err;
3732 
3733 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);
3734 
3735 	if (vr->vvr_vif != NULL) {
3736 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
3737 		    vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
3738 		return (VRRP_SUCCESS);
3739 	}
3740 
3741 	af = vr->vvr_conf.vvc_af;
3742 	if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
3743 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
3744 		    "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
3745 		return (VRRP_ENOVIRT);
3746 	}
3747 
3748 	vr->vvr_vif = vif;
3749 	if (vr->vvr_conf.vvc_af == AF_INET)
3750 		err = vrrpd_init_txsock_v4(vr);
3751 	else
3752 		err = vrrpd_init_txsock_v6(vr);
3753 
3754 	if (err != VRRP_SUCCESS)
3755 		goto done;
3756 
3757 	/*
3758 	 * The interface should start with IFF_NOACCEPT flag not set, only
3759 	 * call this function when the VRRP router requires IFF_NOACCEPT.
3760 	 */
3761 	if (!vr->vvr_conf.vvc_accept)
3762 		err = vrrpd_set_noaccept(vr, _B_TRUE);
3763 
3764 done:
3765 	if (err != VRRP_SUCCESS) {
3766 		(void) close(vif->vvi_sockfd);
3767 		vif->vvi_sockfd = -1;
3768 		vr->vvr_vif = NULL;
3769 	}
3770 
3771 	return (err);
3772 }
3773 
3774 /*
3775  * Create the IPv4 socket which is used to send VRRP packets. Note that
3776  * the destination MAC address of VRRP advertisement must be the virtual
3777  * MAC address, so we specify the output interface to be the specific VNIC.
3778  */
3779 static vrrp_err_t
3780 vrrpd_init_txsock_v4(vrrp_vr_t *vr)
3781 {
3782 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3783 	vrrp_ip_t *vip;		/* The first IP over the VNIC */
3784 	int on = 1;
3785 	char off = 0;
3786 	vrrp_err_t err = VRRP_SUCCESS;
3787 	char abuf[INET6_ADDRSTRLEN];
3788 
3789 	vif = vr->vvr_vif;
3790 	assert(vr->vvr_conf.vvc_af == AF_INET);
3791 	assert(vif != NULL);
3792 
3793 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
3794 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3795 
3796 	if (vif->vvi_sockfd != -1) {
3797 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
3798 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3799 		return (VRRP_SUCCESS);
3800 	}
3801 
3802 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3803 	if (vif->vvi_sockfd < 0) {
3804 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
3805 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3806 		err = VRRP_ESYS;
3807 		goto done;
3808 	}
3809 
3810 	/*
3811 	 * Include the IP header, so that we can specify the IP address/ttl.
3812 	 */
3813 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
3814 	    sizeof (on)) < 0) {
3815 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
3816 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3817 		err = VRRP_ESYS;
3818 		goto done;
3819 	}
3820 
3821 	/*
3822 	 * Disable multicast loopback.
3823 	 */
3824 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
3825 	    sizeof (char)) == -1) {
3826 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
3827 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3828 		    strerror(errno));
3829 		err = VRRP_ESYS;
3830 		goto done;
3831 	}
3832 
3833 	vip = TAILQ_FIRST(&vif->vvi_iplist);
3834 	/* LINTED E_CONSTANT_CONDITION */
3835 	VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
3836 	    _B_FALSE);
3837 
3838 	/*
3839 	 * Set the output interface to send the VRRP packet.
3840 	 */
3841 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
3842 	    &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
3843 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3844 		    "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
3845 		err = VRRP_ESYS;
3846 	} else {
3847 		vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3848 		    "succeed", vr->vvr_conf.vvc_name, abuf);
3849 	}
3850 
3851 done:
3852 	if (err != VRRP_SUCCESS) {
3853 		(void) close(vif->vvi_sockfd);
3854 		vif->vvi_sockfd = -1;
3855 	}
3856 
3857 	return (err);
3858 }
3859 
3860 /*
3861  * Create the IPv6 socket which is used to send VRRP packets. Note that
3862  * the destination must be the virtual MAC address, so we specify the output
3863  * interface to be the specific VNIC.
3864  */
3865 static vrrp_err_t
3866 vrrpd_init_txsock_v6(vrrp_vr_t *vr)
3867 {
3868 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3869 	int off = 0, ttl = VRRP_IP_TTL;
3870 	vrrp_err_t err = VRRP_SUCCESS;
3871 
3872 	vif = vr->vvr_vif;
3873 	assert(vr->vvr_conf.vvc_af == AF_INET6);
3874 	assert(vif != NULL);
3875 
3876 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
3877 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3878 
3879 	if (vif->vvi_sockfd != -1) {
3880 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
3881 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3882 		return (VRRP_SUCCESS);
3883 	}
3884 
3885 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3886 	if (vif->vvi_sockfd < 0) {
3887 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
3888 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3889 		err = VRRP_ESYS;
3890 		goto done;
3891 	}
3892 
3893 	/*
3894 	 * Disable multicast loopback.
3895 	 */
3896 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
3897 	    &off, sizeof (int)) == -1) {
3898 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
3899 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3900 		    strerror(errno));
3901 		err = VRRP_ESYS;
3902 		goto done;
3903 	}
3904 
3905 	/*
3906 	 * Set the multicast TTL.
3907 	 */
3908 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
3909 	    &ttl, sizeof (int)) == -1) {
3910 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
3911 		    "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
3912 		    ttl, strerror(errno));
3913 		err = VRRP_ESYS;
3914 		goto done;
3915 	}
3916 
3917 	/*
3918 	 * Set the output interface to send the VRRP packet.
3919 	 */
3920 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
3921 	    &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
3922 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
3923 		    "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
3924 		    strerror(errno));
3925 		err = VRRP_ESYS;
3926 	} else {
3927 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
3928 		    " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
3929 	}
3930 
3931 done:
3932 	if (err != VRRP_SUCCESS) {
3933 		(void) close(vif->vvi_sockfd);
3934 		vif->vvi_sockfd = -1;
3935 	}
3936 
3937 	return (err);
3938 }
3939 
3940 /*
3941  * Delete the socket which is used to send VRRP packets. Further, clear
3942  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3943  */
3944 static void
3945 vrrpd_fini_txsock(vrrp_vr_t *vr)
3946 {
3947 	vrrp_intf_t *vif = vr->vvr_vif;
3948 
3949 	vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);
3950 
3951 	if (vif != NULL) {
3952 		if (!vr->vvr_conf.vvc_accept)
3953 			(void) vrrpd_set_noaccept(vr, _B_FALSE);
3954 		(void) close(vif->vvi_sockfd);
3955 		vif->vvi_sockfd = -1;
3956 		vr->vvr_vif = NULL;
3957 	}
3958 }
3959 
3960 /*
3961  * Given the the pseudo header cksum value (sum), caculate the cksum with
3962  * the rest of VRRP packet.
3963  */
3964 static uint16_t
3965 in_cksum(int sum, uint16_t plen, void *p)
3966 {
3967 	int nleft;
3968 	uint16_t *w;
3969 	uint16_t answer;
3970 	uint16_t odd_byte = 0;
3971 
3972 	nleft = plen;
3973 	w = (uint16_t *)p;
3974 	while (nleft > 1) {
3975 		sum += *w++;
3976 		nleft -= 2;
3977 	}
3978 
3979 	/* mop up an odd byte, if necessary */
3980 	if (nleft == 1) {
3981 		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
3982 		sum += odd_byte;
3983 	}
3984 
3985 	/*
3986 	 * add back carry outs from top 16 bits to low 16 bits
3987 	 */
3988 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
3989 	sum += (sum >> 16);			/* add carry */
3990 	answer = ~sum;				/* truncate to 16 bits */
3991 	return (answer == 0 ? ~0 : answer);
3992 }
3993 
3994 /* Pseudo header for v4 */
3995 struct pshv4 {
3996 	struct in_addr	ph4_src;
3997 	struct in_addr	ph4_dst;
3998 	uint8_t		ph4_zero;	/* always zero */
3999 	uint8_t		ph4_protocol;	/* protocol used, IPPROTO_VRRP */
4000 	uint16_t	ph4_len;	/* VRRP payload len */
4001 };
4002 
4003 /*
4004  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4005  * packet length (in the host byte order), and both IP source and destination
4006  * addresses are in the network byte order.
4007  */
4008 static uint16_t
4009 vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
4010     vrrp_pkt_t *vp)
4011 {
4012 	struct pshv4 ph4;
4013 	int nleft;
4014 	uint16_t *w;
4015 	int sum = 0;
4016 
4017 	ph4.ph4_src = *src;
4018 	ph4.ph4_dst = *dst;
4019 	ph4.ph4_zero = 0;
4020 	ph4.ph4_protocol = IPPROTO_VRRP;
4021 	ph4.ph4_len = htons(plen);
4022 
4023 	/*
4024 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4025 	 *  we add sequential 16 bit words to it, and at the end, fold
4026 	 *  back all the carry bits from the top 16 bits into the lower
4027 	 *  16 bits.
4028 	 */
4029 	nleft = sizeof (struct pshv4);
4030 	w = (uint16_t *)&ph4;
4031 	while (nleft > 0) {
4032 		sum += *w++;
4033 		nleft -= 2;
4034 	}
4035 
4036 	return (in_cksum(sum, plen, vp));
4037 }
4038 
4039 /* Pseudo header for v6 */
4040 struct pshv6 {
4041 	struct in6_addr	ph6_src;
4042 	struct in6_addr	ph6_dst;
4043 	uint32_t	ph6_len;	/* VRRP payload len */
4044 	uint32_t	ph6_zero : 24,
4045 			ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
4046 };
4047 
4048 /*
4049  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4050  * packet length (in the host byte order), and both IP source and destination
4051  * addresses are in the network byte order.
4052  */
4053 static uint16_t
4054 vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
4055     vrrp_pkt_t *vp)
4056 {
4057 	struct pshv6 ph6;
4058 	int nleft;
4059 	uint16_t *w;
4060 	int sum = 0;
4061 
4062 	ph6.ph6_src = *src;
4063 	ph6.ph6_dst = *dst;
4064 	ph6.ph6_zero = 0;
4065 	ph6.ph6_protocol = IPPROTO_VRRP;
4066 	ph6.ph6_len = htonl((uint32_t)plen);
4067 
4068 	/*
4069 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4070 	 *  we add sequential 16 bit words to it, and at the end, fold
4071 	 *  back all the carry bits from the top 16 bits into the lower
4072 	 *  16 bits.
4073 	 */
4074 	nleft = sizeof (struct pshv6);
4075 	w = (uint16_t *)&ph6;
4076 	while (nleft > 0) {
4077 		sum += *w++;
4078 		nleft -= 2;
4079 	}
4080 
4081 	return (in_cksum(sum, plen, vp));
4082 }
4083 
4084 vrrp_err_t
4085 vrrpd_state_i2m(vrrp_vr_t *vr)
4086 {
4087 	vrrp_err_t	err;
4088 
4089 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);
4090 
4091 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
4092 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4093 		return (err);
4094 
4095 	(void) vrrpd_send_adv(vr, _B_FALSE);
4096 
4097 	vr->vvr_err = VRRP_SUCCESS;
4098 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4099 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4100 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4101 		vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
4102 		return (VRRP_ESYS);
4103 	} else {
4104 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
4105 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4106 		    vr->vvr_timeout);
4107 	}
4108 	return (VRRP_SUCCESS);
4109 }
4110 
4111 vrrp_err_t
4112 vrrpd_state_i2b(vrrp_vr_t *vr)
4113 {
4114 	vrrp_err_t	err;
4115 
4116 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);
4117 
4118 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
4119 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4120 		return (err);
4121 
4122 	/*
4123 	 * Reinitialize the Master advertisement interval to be the configured
4124 	 * value.
4125 	 */
4126 	vr->vvr_err = VRRP_SUCCESS;
4127 	vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
4128 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4129 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4130 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4131 		vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
4132 		return (VRRP_ESYS);
4133 	} else {
4134 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
4135 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4136 		    vr->vvr_timeout);
4137 	}
4138 	return (VRRP_SUCCESS);
4139 }
4140 
4141 void
4142 vrrpd_state_m2i(vrrp_vr_t *vr)
4143 {
4144 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);
4145 
4146 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
4147 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4148 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4149 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4150 }
4151 
4152 void
4153 vrrpd_state_b2i(vrrp_vr_t *vr)
4154 {
4155 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);
4156 
4157 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4158 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4159 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
4160 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4161 }
4162 
4163 /* ARGSUSED */
4164 static void
4165 vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
4166 {
4167 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4168 
4169 	vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
4170 	(void) vrrpd_state_b2m(vr);
4171 }
4172 
4173 /* ARGSUSED */
4174 static void
4175 vrrp_adv_timeout(iu_tq_t *tq, void *arg)
4176 {
4177 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4178 
4179 	vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);
4180 
4181 	(void) vrrpd_send_adv(vr, _B_FALSE);
4182 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4183 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4184 		vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
4185 		    vr->vvr_conf.vvc_name);
4186 	} else {
4187 		vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
4188 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4189 		    vr->vvr_timeout);
4190 	}
4191 }
4192 
4193 vrrp_err_t
4194 vrrpd_state_b2m(vrrp_vr_t *vr)
4195 {
4196 	vrrp_err_t	err;
4197 
4198 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);
4199 
4200 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
4201 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4202 		return (err);
4203 	(void) vrrpd_send_adv(vr, _B_FALSE);
4204 
4205 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4206 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4207 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4208 		vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
4209 		    vr->vvr_conf.vvc_name);
4210 		return (VRRP_ESYS);
4211 	} else {
4212 		vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
4213 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4214 		    vr->vvr_timeout);
4215 	}
4216 	return (VRRP_SUCCESS);
4217 }
4218 
4219 vrrp_err_t
4220 vrrpd_state_m2b(vrrp_vr_t *vr)
4221 {
4222 	vrrp_err_t	err;
4223 
4224 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);
4225 
4226 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
4227 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4228 		return (err);
4229 
4230 	/*
4231 	 * Cancel the adver_timer.
4232 	 */
4233 	vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
4234 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4235 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4236 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4237 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4238 		vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
4239 		    vr->vvr_conf.vvc_name);
4240 	} else {
4241 		vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
4242 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4243 		    vr->vvr_timeout);
4244 	}
4245 	return (VRRP_SUCCESS);
4246 }
4247 
4248 /*
4249  * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
4250  * based on its access mode.
4251  */
4252 static vrrp_err_t
4253 vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
4254 {
4255 	vrrp_intf_t *vif = vr->vvr_vif;
4256 	uint64_t curr_flags;
4257 	struct lifreq lifr;
4258 	int s;
4259 
4260 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4261 	    vr->vvr_conf.vvc_name, on ? "on" : "off");
4262 
4263 	/*
4264 	 * Possibly no virtual address exists on this VRRP router yet.
4265 	 */
4266 	if (vif == NULL)
4267 		return (VRRP_SUCCESS);
4268 
4269 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4270 	    vif->vvi_ifname, vrrp_state2str(vr->vvr_state));
4271 
4272 	s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4273 	(void) strncpy(lifr.lifr_name, vif->vvi_ifname,
4274 	    sizeof (lifr.lifr_name));
4275 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4276 		if (errno != ENXIO && errno != ENOENT) {
4277 			vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
4278 			    "SIOCGLIFFLAGS on %s failed: %s",
4279 			    vif->vvi_ifname, strerror(errno));
4280 		}
4281 		return (VRRP_ESYS);
4282 	}
4283 
4284 	curr_flags = lifr.lifr_flags;
4285 	if (on)
4286 		lifr.lifr_flags |= IFF_NOACCEPT;
4287 	else
4288 		lifr.lifr_flags &= ~IFF_NOACCEPT;
4289 
4290 	if (lifr.lifr_flags != curr_flags) {
4291 		if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4292 			if (errno != ENXIO && errno != ENOENT) {
4293 				vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
4294 				    "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
4295 				    on ? "no_accept" : "accept",
4296 				    lifr.lifr_flags, vif->vvi_ifname,
4297 				    strerror(errno));
4298 			}
4299 			return (VRRP_ESYS);
4300 		}
4301 	}
4302 	return (VRRP_SUCCESS);
4303 }
4304 
4305 static vrrp_err_t
4306 vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
4307 {
4308 	vrrp_state_t	state = vif->vvi_vr_state;
4309 	struct lifreq	lifr;
4310 	char		abuf[INET6_ADDRSTRLEN];
4311 	int		af = vif->vvi_af;
4312 	uint64_t	curr_flags;
4313 	int		s;
4314 
4315 	assert(IS_VIRTUAL_INTF(vif));
4316 
4317 	/* LINTED E_CONSTANT_CONDITION */
4318 	VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
4319 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
4320 	    vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");
4321 
4322 	s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4323 	(void) strncpy(lifr.lifr_name, ip->vip_lifname,
4324 	    sizeof (lifr.lifr_name));
4325 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4326 		if (errno != ENXIO && errno != ENOENT) {
4327 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
4328 			    "SIOCGLIFFLAGS on %s/%s failed: %s",
4329 			    vif->vvi_ifname, lifr.lifr_name, abuf,
4330 			    strerror(errno));
4331 		}
4332 		return (VRRP_ESYS);
4333 	}
4334 
4335 	curr_flags = lifr.lifr_flags;
4336 	if (state == VRRP_STATE_MASTER)
4337 		lifr.lifr_flags |= IFF_UP;
4338 	else
4339 		lifr.lifr_flags &= ~IFF_UP;
4340 
4341 	if (lifr.lifr_flags == curr_flags)
4342 		return (VRRP_SUCCESS);
4343 
4344 	if (checkonly) {
4345 		vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
4346 		    ip->vip_lifname, abuf,
4347 		    state == VRRP_STATE_MASTER ? "down" : "up");
4348 		return (VRRP_ESYS);
4349 	} else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4350 		if (errno != ENXIO && errno != ENOENT) {
4351 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
4352 			    "bring %s %s/%s failed: %s",
4353 			    vif->vvi_ifname, vrrp_state2str(state),
4354 			    state == VRRP_STATE_MASTER ? "up" : "down",
4355 			    ip->vip_lifname, abuf, strerror(errno));
4356 		}
4357 		return (VRRP_ESYS);
4358 	}
4359 	return (VRRP_SUCCESS);
4360 }
4361 
4362 static vrrp_err_t
4363 vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
4364 {
4365 	vrrp_state_t		state;
4366 	vrrp_intf_t		*vif = vr->vvr_vif;
4367 	vrrp_ip_t		*ip, *nextip;
4368 	char			abuf[INET6_ADDRSTRLEN];
4369 	vrrp_err_t		err;
4370 
4371 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
4372 	    vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
4373 	    vif->vvi_ifname, checkonly ? " checkonly" : "");
4374 
4375 	state = vr->vvr_state;
4376 	assert(vif != NULL);
4377 	assert(IS_VIRTUAL_INTF(vif));
4378 	assert(vif->vvi_vr_state != state);
4379 	vif->vvi_vr_state = state;
4380 	for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
4381 		nextip = TAILQ_NEXT(ip, vip_next);
4382 		err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
4383 		if (!checkonly && err != VRRP_SUCCESS) {
4384 			/* LINTED E_CONSTANT_CONDITION */
4385 			VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
4386 			    INET6_ADDRSTRLEN, _B_FALSE);
4387 			vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
4388 			    "%s over %s failed", abuf, vif->vvi_ifname);
4389 			vrrpd_delete_ip(vif, ip);
4390 		}
4391 	}
4392 
4393 	/*
4394 	 * The IP address is deleted when it is failed to be brought
4395 	 * up. If no IP addresses are left, delete this interface.
4396 	 */
4397 	if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
4398 		vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
4399 		    "no IP left over %s", vif->vvi_ifname);
4400 		vrrpd_delete_if(vif, _B_TRUE);
4401 		return (VRRP_ENOVIRT);
4402 	}
4403 	return (VRRP_SUCCESS);
4404 }
4405 
4406 void
4407 vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
4408 {
4409 	vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
4410 	    vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));
4411 
4412 	assert(vr->vvr_state == prev_s);
4413 	vr->vvr_state = s;
4414 	vr->vvr_prev_state = prev_s;
4415 	(void) gettimeofday(&vr->vvr_st_time, NULL);
4416 	(void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
4417 }
4418 
4419 static int
4420 vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
4421 {
4422 	sysevent_id_t	eid;
4423 	nvlist_t	*nvl = NULL;
4424 
4425 	/*
4426 	 * sysevent is not supported in the non-global zone
4427 	 */
4428 	if (getzoneid() != GLOBAL_ZONEID)
4429 		return (0);
4430 
4431 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
4432 		goto failed;
4433 
4434 	if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
4435 	    VRRP_EVENT_CUR_VERSION) != 0)
4436 		goto failed;
4437 
4438 	if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
4439 		goto failed;
4440 
4441 	if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
4442 		goto failed;
4443 
4444 	if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
4445 		goto failed;
4446 
4447 	if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
4448 	    SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
4449 		nvlist_free(nvl);
4450 		return (0);
4451 	}
4452 
4453 failed:
4454 	vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
4455 	    "sysevent posting failed: %s", vrrp_state2str(prev_st),
4456 	    vrrp_state2str(st), strerror(errno));
4457 
4458 	if (nvl != NULL)
4459 		nvlist_free(nvl);
4460 	return (-1);
4461 }
4462 
4463 /*
4464  * timeval processing functions
4465  */
4466 static int
4467 timeval_to_milli(struct timeval tv)
4468 {
4469 	return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
4470 }
4471 
4472 static struct timeval
4473 timeval_delta(struct timeval t1, struct timeval t2)
4474 {
4475 	struct timeval t;
4476 	t.tv_sec = t1.tv_sec - t2.tv_sec;
4477 	t.tv_usec = t1.tv_usec - t2.tv_usec;
4478 
4479 	if (t.tv_usec < 0) {
4480 		t.tv_usec += 1000000;
4481 		t.tv_sec--;
4482 	}
4483 	return (t);
4484 }
4485 
4486 /*
4487  * print error messages to the terminal or to syslog
4488  */
4489 static void
4490 vrrp_log(int level, char *message, ...)
4491 {
4492 	va_list ap;
4493 	int log_level = -1;
4494 
4495 	va_start(ap, message);
4496 
4497 	if (vrrp_logflag == 0) {
4498 		if (level <= vrrp_debug_level) {
4499 			/*
4500 			 * VRRP_ERR goes to stderr, others go to stdout
4501 			 */
4502 			FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
4503 			(void) fprintf(out, "vrrpd: ");
4504 			/* LINTED: E_SEC_PRINTF_VAR_FMT */
4505 			(void) vfprintf(out, message, ap);
4506 			(void) fprintf(out, "\n");
4507 			(void) fflush(out);
4508 		}
4509 		va_end(ap);
4510 		return;
4511 	}
4512 
4513 	/*
4514 	 * translate VRRP_* to LOG_*
4515 	 */
4516 	switch (level) {
4517 	case VRRP_ERR:
4518 		log_level = LOG_ERR;
4519 		break;
4520 	case VRRP_WARNING:
4521 		log_level = LOG_WARNING;
4522 		break;
4523 	case VRRP_NOTICE:
4524 		log_level = LOG_NOTICE;
4525 		break;
4526 	case VRRP_DBG0:
4527 		log_level = LOG_INFO;
4528 		break;
4529 	default:
4530 		log_level = LOG_DEBUG;
4531 		break;
4532 	}
4533 
4534 	/* LINTED: E_SEC_PRINTF_VAR_FMT */
4535 	(void) vsyslog(log_level, message, ap);
4536 	va_end(ap);
4537 }
4538