xref: /titanic_52/usr/src/cmd/cmd-inet/usr.lib/vrrpd/vrrpd.c (revision af28f636873b7156cfd73ceffa927658cca33fd0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 #include <sys/types.h>
27 #include <sys/socket.h>
28 #include <sys/sockio.h>
29 #include <sys/sysevent/vrrp.h>
30 #include <sys/sysevent/eventdefs.h>
31 #include <sys/varargs.h>
32 #include <auth_attr.h>
33 #include <ctype.h>
34 #include <fcntl.h>
35 #include <stdlib.h>
36 #include <strings.h>
37 #include <errno.h>
38 #include <unistd.h>
39 #include <zone.h>
40 #include <libsysevent.h>
41 #include <limits.h>
42 #include <locale.h>
43 #include <arpa/inet.h>
44 #include <signal.h>
45 #include <assert.h>
46 #include <ucred.h>
47 #include <bsm/adt.h>
48 #include <bsm/adt_event.h>
49 #include <priv_utils.h>
50 #include <libdllink.h>
51 #include <libdlvnic.h>
52 #include <libipadm.h>
53 #include <pwd.h>
54 #include <libvrrpadm.h>
55 #include <net/route.h>
56 #include "vrrpd_impl.h"
57 
58 /*
59  * A VRRP router can be only start participating the VRRP protocol of a virtual
60  * router when all the following conditions are met:
61  *
62  * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
63  * - The RX socket is successfully created over the physical interface to
64  *   receive the VRRP multicast advertisement. Note that one RX socket can
65  *   be shared by several VRRP routers configured over the same physical
66  *   interface. (See vrrpd_init_rxsock())
67  * - The TX socket is successfully created over the VNIC interface to send
68  *   the VRRP advertisment. (See vrrpd_init_txsock())
69  * - The primary IP address has been successfully selected over the physical
70  *   interface. (See vrrpd_select_primary())
71  *
72  * If a VRRP router is enabled but the other conditions haven't be satisfied,
73  * the router will be stay at the VRRP_STATE_INIT state. If all the above
74  * conditions are met, the VRRP router will be transit to either
75  * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
76  * protocol.
77  */
78 
79 #define	skip_whitespace(p)	while (isspace(*(p))) ++(p)
80 
81 #define	BUFFSIZE	65536
82 
83 #define	VRRPCONF	"/etc/inet/vrrp.conf"
84 
85 typedef struct vrrpd_rtsock_s {
86 	int		vrt_af;		/* address family */
87 	int		vrt_fd;		/* socket for the PF_ROUTE msg */
88 	iu_event_id_t	vrt_eid;	/* event ID */
89 } vrrpd_rtsock_t;
90 
91 static ipadm_handle_t	vrrp_ipadm_handle = NULL;	/* libipadm handle */
92 static int		vrrp_logflag = 0;
93 boolean_t		vrrp_debug_level = 0;
94 iu_eh_t			*vrrpd_eh = NULL;
95 iu_tq_t			*vrrpd_timerq = NULL;
96 static vrrp_handle_t	vrrpd_vh = NULL;
97 static int		vrrpd_cmdsock_fd = -1;	/* socket to communicate */
98 						/* between vrrpd/libvrrpadm */
99 static iu_event_id_t	vrrpd_cmdsock_eid = -1;
100 static int		vrrpd_ctlsock_fd = -1;	/* socket to bring up/down */
101 						/* the virtual IP addresses */
102 static int		vrrpd_ctlsock6_fd = -1;
103 static vrrpd_rtsock_t	vrrpd_rtsocks[2] = {
104 	{AF_INET, -1, -1},
105 	{AF_INET6, -1, -1}
106 };
107 static iu_timer_id_t	vrrp_scan_timer_id = -1;
108 
109 TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
110 TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
111 static struct vrrp_vr_list_s	vrrp_vr_list;
112 static struct vrrp_intf_list_s	vrrp_intf_list;
113 static char		vrrpd_conffile[MAXPATHLEN];
114 
115 /*
116  * Multicast address of VRRP advertisement in network byte order
117  */
118 static vrrp_addr_t	vrrp_muladdr4;
119 static vrrp_addr_t	vrrp_muladdr6;
120 
121 static int		vrrpd_scan_interval = 20000;	/* ms */
122 static int		pfds[2];
123 
124 /*
125  * macros to calculate skew_time and master_down_timer
126  *
127  * Note that the input is in centisecs and output are in msecs
128  */
129 #define	SKEW_TIME(pri, intv)	((intv) * (256 - (pri)) / 256)
130 #define	MASTER_DOWN_INTERVAL(pri, intv)	(3 * (intv) + SKEW_TIME((pri), (intv)))
131 
132 #define	SKEW_TIME_VR(vr)	\
133 	SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
134 #define	MASTER_DOWN_INTERVAL_VR(vr)	\
135 	MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
136 
137 #define	VRRP_CONF_UPDATE	0x01
138 #define	VRRP_CONF_DELETE	0x02
139 
140 static char *af_str(int);
141 
142 static iu_tq_callback_t vrrp_adv_timeout;
143 static iu_tq_callback_t vrrp_b2m_timeout;
144 static iu_eh_callback_t vrrpd_sock_handler;
145 static iu_eh_callback_t vrrpd_rtsock_handler;
146 static iu_eh_callback_t vrrpd_cmdsock_handler;
147 
148 static int daemon_init();
149 
150 static vrrp_err_t vrrpd_init();
151 static void vrrpd_fini();
152 static vrrp_err_t vrrpd_cmdsock_create();
153 static void vrrpd_cmdsock_destroy();
154 static vrrp_err_t vrrpd_rtsock_create();
155 static void vrrpd_rtsock_destroy();
156 static vrrp_err_t vrrpd_ctlsock_create();
157 static void vrrpd_ctlsock_destroy();
158 
159 static void vrrpd_scan_timer(iu_tq_t *, void *);
160 static void vrrpd_scan(int);
161 static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
162 static void vrrpd_fini_rxsock(vrrp_vr_t *);
163 static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
164 static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
165 static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
166 static void vrrpd_fini_txsock(vrrp_vr_t *);
167 
168 static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
169 static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
170 static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
171 static void vrrpd_delete_vr(vrrp_vr_t *);
172 
173 static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
174 static vrrp_err_t vrrpd_delete(const char *);
175 static vrrp_err_t vrrpd_enable(const char *, boolean_t);
176 static vrrp_err_t vrrpd_disable(const char *);
177 static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
178 static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
179 static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);
180 
181 static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
182 static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
183 static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
184 static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
185 static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
186 static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
187 static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
188 static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
189 static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
190 static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
191 static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
192 static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
193 static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
194 static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
195 static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
196 static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
197 static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
198 static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);
199 
200 static void vrrpd_cmd_create(void *, void *, size_t *);
201 static void vrrpd_cmd_delete(void *, void *, size_t *);
202 static void vrrpd_cmd_enable(void *, void *, size_t *);
203 static void vrrpd_cmd_disable(void *, void *, size_t *);
204 static void vrrpd_cmd_modify(void *, void *, size_t *);
205 static void vrrpd_cmd_list(void *, void *, size_t *);
206 static void vrrpd_cmd_query(void *, void *, size_t *);
207 
208 static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
209 static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
210 static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
211 static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
212 static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
213 static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
214     uint64_t flags);
215 static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);
216 
217 static void vrrpd_init_ipcache(int);
218 static void vrrpd_update_ipcache(int);
219 static ipadm_status_t vrrpd_walk_addr_info(int);
220 static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
221     int, uint64_t);
222 static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
223 static void vrrpd_reselect_primary(vrrp_intf_t *);
224 static void vrrpd_reenable_all_vr();
225 static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);
226 
227 static uint16_t in_cksum(int, uint16_t, void *);
228 static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
229     uint16_t, vrrp_pkt_t *);
230 static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
231     uint16_t, vrrp_pkt_t *);
232 static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);
233 
234 static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
235 static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);
236 
237 /* state transition functions */
238 static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
239 static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
240 static void vrrpd_state_m2i(vrrp_vr_t *);
241 static void vrrpd_state_b2i(vrrp_vr_t *);
242 static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
243 static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
244 static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);
245 
246 static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
247 static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
248 static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
249     boolean_t);
250 static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);
251 
252 static void vrrpd_initconf();
253 static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
254 static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
255 static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
256 static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
257 static void vrrpd_cleanup();
258 
259 static void vrrp_log(int, char *, ...);
260 static int timeval_to_milli(struct timeval);
261 static struct timeval timeval_delta(struct timeval, struct timeval);
262 
263 typedef struct vrrpd_prop_s {
264 	char		*vs_propname;
265 	boolean_t	(*vs_propread)(vrrp_vr_conf_t *, const char *);
266 	int		(*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
267 } vrrp_prop_t;
268 
269 /*
270  * persistent VRRP properties array
271  */
272 static vrrp_prop_t vrrp_prop_info_tbl[] = {
273 	{"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
274 	{"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
275 	{"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
276 	{"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
277 	{"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
278 	{"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
279 	{"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
280 	{"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
281 	{"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
282 };
283 
284 #define	VRRP_PROP_INFO_TABSIZE	\
285 	(sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))
286 
287 typedef void vrrp_cmd_func_t(void *, void *, size_t *);
288 
289 typedef struct vrrp_cmd_info_s {
290 	vrrp_cmd_type_t	vi_cmd;
291 	size_t		vi_reqsize;
292 	size_t		vi_acksize;	/* 0 if the size is variable */
293 	boolean_t	vi_setop;	/* Set operation? Check credentials */
294 	vrrp_cmd_func_t	*vi_cmdfunc;
295 } vrrp_cmd_info_t;
296 
297 static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
298 	{VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
299 	    sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
300 	{VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
301 	    sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
302 	{VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
303 	    sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
304 	{VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
305 	    sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
306 	{VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
307 	    sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
308 	{VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
309 	    _B_FALSE, vrrpd_cmd_query},
310 	{VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
311 	    _B_FALSE, vrrpd_cmd_list}
312 };
313 
314 #define	VRRP_DOOR_INFO_TABLE_SIZE	\
315 	(sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))
316 
317 static int
318 ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
319 {
320 	if (af == AF_INET) {
321 		return (memcmp(&addr1->in4.sin_addr,
322 		    &addr2->in4.sin_addr, sizeof (struct in_addr)));
323 	} else {
324 		return (memcmp(&addr1->in6.sin6_addr,
325 		    &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
326 	}
327 }
328 
329 static vrrp_vr_t *
330 vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
331 {
332 	vrrp_vr_t *vr;
333 
334 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
335 		if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
336 		    vr->vvr_conf.vvc_vrid == vrid &&
337 		    vr->vvr_conf.vvc_af == af) {
338 			break;
339 		}
340 	}
341 	return (vr);
342 }
343 
344 static vrrp_vr_t *
345 vrrpd_lookup_vr_by_name(const char *name)
346 {
347 	vrrp_vr_t *vr;
348 
349 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
350 		if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
351 			break;
352 	}
353 	return (vr);
354 }
355 
356 static vrrp_intf_t *
357 vrrpd_lookup_if(const char *ifname, int af)
358 {
359 	vrrp_intf_t	*intf;
360 
361 	TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
362 		if (strcmp(ifname, intf->vvi_ifname) == 0 &&
363 		    af == intf->vvi_af) {
364 			break;
365 		}
366 	}
367 	return (intf);
368 }
369 
370 static vrrp_err_t
371 vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
372     vrrp_intf_t **intfp)
373 {
374 	vrrp_intf_t	*intf;
375 
376 	vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
377 	    ifname, af_str(af), ifindex);
378 
379 	if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
380 		vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
381 		    "allocate %s/%s interface", ifname, af_str(af));
382 		return (VRRP_ENOMEM);
383 	}
384 
385 	intf = *intfp;
386 	TAILQ_INIT(&intf->vvi_iplist);
387 	(void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
388 	intf->vvi_af = af;
389 	intf->vvi_sockfd = -1;
390 	intf->vvi_nvr = 0;
391 	intf->vvi_eid = -1;
392 	intf->vvi_pip = NULL;
393 	intf->vvi_ifindex = ifindex;
394 	intf->vvi_state = NODE_STATE_NEW;
395 	intf->vvi_vr_state = VRRP_STATE_INIT;
396 	TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
397 	return (VRRP_SUCCESS);
398 }
399 
400 /*
401  * An interface is deleted. If update_vr is true, the deletion of the interface
402  * may cause the state transition of assoicated VRRP router (if this interface
403  * is either the primary or the VNIC interface of the VRRP router); otherwise,
404  * simply delete the interface without updating the VRRP router.
405  */
406 static void
407 vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
408 {
409 	vrrp_ip_t	*ip;
410 
411 	vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
412 	    intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");
413 
414 	if (update_vr) {
415 		/*
416 		 * If a this interface is the physical interface or the VNIC
417 		 * of a VRRP router, the deletion of the interface (no IP
418 		 * address exists on this interface) may cause the state
419 		 * transition of the VRRP router. call vrrpd_remove_if()
420 		 * to find all corresponding VRRP router and update their
421 		 * states.
422 		 */
423 		vrrpd_remove_if(intf, _B_FALSE);
424 	}
425 
426 	/*
427 	 * First remove and delete all the IP addresses on the interface
428 	 */
429 	while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
430 		ip = TAILQ_FIRST(&intf->vvi_iplist);
431 		vrrpd_delete_ip(intf, ip);
432 	}
433 
434 	/*
435 	 * Then remove and delete the interface
436 	 */
437 	TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
438 	(void) free(intf);
439 }
440 
441 static vrrp_err_t
442 vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
443     uint64_t flags)
444 {
445 	vrrp_ip_t	*ip;
446 	char		abuf[INET6_ADDRSTRLEN];
447 
448 	/* LINTED E_CONSTANT_CONDITION */
449 	VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
450 	vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
451 	    intf->vvi_ifname, lifname, abuf, flags);
452 
453 	if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
454 		vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
455 		    "failed to allocate IP", lifname, abuf);
456 		return (VRRP_ENOMEM);
457 	}
458 
459 	(void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
460 	ip->vip_state = NODE_STATE_NEW;
461 	ip->vip_flags = flags;
462 	(void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));
463 
464 	/*
465 	 * Make sure link-local IPv6 IP addresses are at the head of the list
466 	 */
467 	if (intf->vvi_af == AF_INET6 &&
468 	    IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
469 		TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
470 	} else {
471 		TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
472 	}
473 	return (VRRP_SUCCESS);
474 }
475 
476 static void
477 vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
478 {
479 	char	abuf[INET6_ADDRSTRLEN];
480 	int	af = intf->vvi_af;
481 
482 	/* LINTED E_CONSTANT_CONDITION */
483 	VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
484 	vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
485 	    intf->vvi_ifname, ip->vip_lifname, abuf,
486 	    intf->vvi_pip == ip ? "" : "not ");
487 
488 	if (intf->vvi_pip == ip)
489 		intf->vvi_pip = NULL;
490 
491 	TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
492 	(void) free(ip);
493 }
494 
495 static char *
496 rtm_event2str(uchar_t event)
497 {
498 	switch (event) {
499 	case RTM_NEWADDR:
500 		return ("RTM_NEWADDR");
501 	case RTM_DELADDR:
502 		return ("RTM_DELADDR");
503 	case RTM_IFINFO:
504 		return ("RTM_IFINFO");
505 	case RTM_ADD:
506 		return ("RTM_ADD");
507 	case RTM_DELETE:
508 		return ("RTM_DELETE");
509 	case RTM_CHANGE:
510 		return ("RTM_CHANGE");
511 	case RTM_OLDADD:
512 		return ("RTM_OLDADD");
513 	case RTM_OLDDEL:
514 		return ("RTM_OLDDEL");
515 	case RTM_CHGADDR:
516 		return ("RTM_CHGADDR");
517 	case RTM_FREEADDR:
518 		return ("RTM_FREEADDR");
519 	default:
520 		return ("RTM_OTHER");
521 	}
522 }
523 
524 /*
525  * This is called by the child process to inform the parent process to
526  * exit with the given return value. Note that the child process
527  * (the daemon process) informs the parent process to exit when anything
528  * goes wrong or when all the intialization is done.
529  */
530 static int
531 vrrpd_inform_parent_exit(int rv)
532 {
533 	int err = 0;
534 
535 	/*
536 	 * If vrrp_debug_level is none-zero, vrrpd is not running as
537 	 * a daemon. Return directly.
538 	 */
539 	if (vrrp_debug_level != 0)
540 		return (0);
541 
542 	if (write(pfds[1], &rv, sizeof (int)) != sizeof (int)) {
543 		err = errno;
544 		(void) close(pfds[1]);
545 		return (err);
546 	}
547 	(void) close(pfds[1]);
548 	return (0);
549 }
550 
551 int
552 main(int argc, char *argv[])
553 {
554 	int c, err;
555 	struct sigaction sa;
556 	sigset_t mask;
557 	struct rlimit rl;
558 
559 	(void) setlocale(LC_ALL, "");
560 	(void) textdomain(TEXT_DOMAIN);
561 
562 	/*
563 	 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
564 	 * and PRIV_NET_ICMPACCESS to open  the raw socket, PRIV_SYS_IP_CONFIG
565 	 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
566 	 * setrlimit().
567 	 *
568 	 * Note that sysevent is not supported in non-global zones.
569 	 */
570 	if (getzoneid() == GLOBAL_ZONEID) {
571 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
572 		    PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
573 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
574 	} else {
575 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
576 		    PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
577 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
578 	}
579 
580 	if (err == -1) {
581 		vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
582 		return (EXIT_FAILURE);
583 	}
584 
585 	/*
586 	 * If vrrpd is started by other process, it will inherit the
587 	 * signal block mask. We unblock all signals to make sure the
588 	 * signal handling will work normally.
589 	 */
590 	(void) sigfillset(&mask);
591 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
592 	sa.sa_handler = vrrpd_cleanup;
593 	sa.sa_flags = 0;
594 	(void) sigemptyset(&sa.sa_mask);
595 	(void) sigaction(SIGINT, &sa, NULL);
596 	(void) sigaction(SIGQUIT, &sa, NULL);
597 	(void) sigaction(SIGTERM, &sa, NULL);
598 
599 	vrrp_debug_level = 0;
600 	(void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
601 	while ((c = getopt(argc, argv, "d:f:")) != EOF) {
602 		switch (c) {
603 		case 'd':
604 			vrrp_debug_level = atoi(optarg);
605 			break;
606 		case 'f':
607 			(void) strlcpy(vrrpd_conffile, optarg,
608 			    sizeof (vrrpd_conffile));
609 			break;
610 		default:
611 			break;
612 		}
613 	}
614 
615 	closefrom(3);
616 	if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
617 		vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
618 		return (EXIT_FAILURE);
619 	}
620 
621 	rl.rlim_cur = RLIM_INFINITY;
622 	rl.rlim_max = RLIM_INFINITY;
623 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
624 		vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
625 		goto child_out;
626 	}
627 
628 	if (vrrpd_init() != VRRP_SUCCESS) {
629 		vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
630 		goto child_out;
631 	}
632 
633 	/*
634 	 * Get rid of unneeded privileges.
635 	 */
636 	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
637 	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);
638 
639 	/*
640 	 * Read the configuration and initialize the existing VRRP
641 	 * configuration
642 	 */
643 	vrrpd_initconf();
644 
645 	/*
646 	 * Inform the parent process that it can successfully exit.
647 	 */
648 	if ((err = vrrpd_inform_parent_exit(EXIT_SUCCESS)) != 0) {
649 		vrrpd_cleanup();
650 		vrrp_log(VRRP_WARNING, "vrrpd_inform_parent_exit() failed: %s",
651 		    strerror(err));
652 		return (EXIT_FAILURE);
653 	}
654 
655 	/*
656 	 * Start the loop to handle the timer and the IO events.
657 	 */
658 	switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
659 	case -1:
660 		vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
661 		    "abnormally");
662 		break;
663 	default:
664 		break;
665 	}
666 
667 	vrrpd_cleanup();
668 	return (EXIT_SUCCESS);
669 
670 child_out:
671 	(void) vrrpd_inform_parent_exit(EXIT_FAILURE);
672 	return (EXIT_FAILURE);
673 }
674 
675 static int
676 daemon_init()
677 {
678 	pid_t	pid;
679 	int	rv;
680 
681 	vrrp_log(VRRP_DBG0, "daemon_init()");
682 
683 	if (getenv("SMF_FMRI") == NULL) {
684 		vrrp_log(VRRP_ERR, "daemon_init(): vrrpd is an smf(5) managed "
685 		    "service and should not be run from the command line.");
686 		return (-1);
687 	}
688 
689 	/*
690 	 * Create the pipe used for the child process to inform the parent
691 	 * process to exit after all initialization is done.
692 	 */
693 	if (pipe(pfds) < 0) {
694 		vrrp_log(VRRP_ERR, "daemon_init(): pipe() failed: %s",
695 		    strerror(errno));
696 		return (-1);
697 	}
698 
699 	if ((pid = fork()) < 0) {
700 		vrrp_log(VRRP_ERR, "daemon_init(): fork() failed: %s",
701 		    strerror(errno));
702 		(void) close(pfds[0]);
703 		(void) close(pfds[1]);
704 		return (-1);
705 	}
706 
707 	if (pid != 0) { /* Parent */
708 		(void) close(pfds[1]);
709 
710 		/*
711 		 * Read the child process's return value from the pfds.
712 		 * If the child process exits unexpectedly, read() returns -1.
713 		 */
714 		if (read(pfds[0], &rv, sizeof (int)) != sizeof (int)) {
715 			vrrp_log(VRRP_ERR, "daemon_init(): child process "
716 			    "exited unexpectedly %s", strerror(errno));
717 			(void) kill(pid, SIGTERM);
718 			rv = EXIT_FAILURE;
719 		}
720 		(void) close(pfds[0]);
721 		exit(rv);
722 	}
723 
724 	/*
725 	 * in child process, became a daemon, and return to main() to continue.
726 	 */
727 	(void) close(pfds[0]);
728 	(void) chdir("/");
729 	(void) setsid();
730 	(void) close(0);
731 	(void) close(1);
732 	(void) close(2);
733 	(void) open("/dev/null", O_RDWR, 0);
734 	(void) dup2(0, 1);
735 	(void) dup2(0, 2);
736 	openlog("vrrpd", LOG_PID, LOG_DAEMON);
737 	vrrp_logflag = 1;
738 	return (0);
739 }
740 
741 static vrrp_err_t
742 vrrpd_init()
743 {
744 	vrrp_err_t	err = VRRP_ESYS;
745 
746 	vrrp_log(VRRP_DBG0, "vrrpd_init()");
747 
748 	TAILQ_INIT(&vrrp_vr_list);
749 	TAILQ_INIT(&vrrp_intf_list);
750 
751 	if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
752 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
753 		goto fail;
754 	}
755 
756 	if ((vrrpd_timerq = iu_tq_create()) == NULL) {
757 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
758 		goto fail;
759 	}
760 
761 	if ((vrrpd_eh = iu_eh_create()) == NULL) {
762 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
763 		goto fail;
764 	}
765 
766 	/*
767 	 * Create the AF_UNIX socket used to communicate with libvrrpadm.
768 	 *
769 	 * This socket is used to receive the administrative requests and
770 	 * send back the results.
771 	 */
772 	if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
773 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
774 		    "failed");
775 		goto fail;
776 	}
777 
778 	/*
779 	 * Create the VRRP control socket used to bring up/down the virtual
780 	 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
781 	 * the virtual IP addresses.
782 	 */
783 	if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
784 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
785 		    "failed");
786 		goto fail;
787 	}
788 
789 	/*
790 	 * Create the PF_ROUTER socket used to listen to the routing socket
791 	 * messages and build the interface/IP address list.
792 	 */
793 	if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
794 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
795 		    "failed");
796 		goto fail;
797 	}
798 
799 	/* Open the libipadm handle */
800 	if (ipadm_open(&vrrp_ipadm_handle, 0) != IPADM_SUCCESS) {
801 		vrrp_log(VRRP_ERR, "vrrpd_init(): ipadm_open() failed");
802 		goto fail;
803 	}
804 
805 	/*
806 	 * Build the list of interfaces and IP addresses. Also, start the time
807 	 * to scan the interfaces/IP addresses periodically.
808 	 */
809 	vrrpd_scan(AF_INET);
810 	vrrpd_scan(AF_INET6);
811 	if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
812 	    vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
813 		vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
814 		goto fail;
815 	}
816 
817 	/*
818 	 * Initialize the VRRP multicast address.
819 	 */
820 	bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
821 	vrrp_muladdr4.in4.sin_family = AF_INET;
822 	(void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);
823 
824 	bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
825 	vrrp_muladdr6.in6.sin6_family = AF_INET6;
826 	(void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);
827 
828 	return (VRRP_SUCCESS);
829 
830 fail:
831 	vrrpd_fini();
832 	return (err);
833 }
834 
835 static void
836 vrrpd_fini()
837 {
838 	vrrp_log(VRRP_DBG0, "vrrpd_fini()");
839 
840 	(void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
841 	vrrp_scan_timer_id = -1;
842 
843 	vrrpd_rtsock_destroy();
844 	vrrpd_ctlsock_destroy();
845 	vrrpd_cmdsock_destroy();
846 
847 	if (vrrpd_eh != NULL) {
848 		iu_eh_destroy(vrrpd_eh);
849 		vrrpd_eh = NULL;
850 	}
851 
852 	if (vrrpd_timerq != NULL) {
853 		iu_tq_destroy(vrrpd_timerq);
854 		vrrpd_timerq = NULL;
855 	}
856 
857 	vrrp_close(vrrpd_vh);
858 	vrrpd_vh = NULL;
859 	assert(TAILQ_EMPTY(&vrrp_vr_list));
860 	assert(TAILQ_EMPTY(&vrrp_intf_list));
861 
862 	ipadm_close(vrrp_ipadm_handle);
863 }
864 
865 static void
866 vrrpd_cleanup(void)
867 {
868 	vrrp_vr_t	*vr;
869 	vrrp_intf_t	*intf;
870 
871 	vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");
872 
873 	while (!TAILQ_EMPTY(&vrrp_vr_list)) {
874 		vr = TAILQ_FIRST(&vrrp_vr_list);
875 		vrrpd_delete_vr(vr);
876 	}
877 
878 	while (!TAILQ_EMPTY(&vrrp_intf_list)) {
879 		intf = TAILQ_FIRST(&vrrp_intf_list);
880 		vrrpd_delete_if(intf, _B_FALSE);
881 	}
882 
883 	vrrpd_fini();
884 	closelog();
885 	exit(1);
886 }
887 
888 /*
889  * Read the configuration file and initialize all the existing VRRP routers.
890  */
891 static void
892 vrrpd_initconf()
893 {
894 	FILE *fp;
895 	char line[LINE_MAX];
896 	int linenum = 0;
897 	vrrp_vr_conf_t conf;
898 	vrrp_err_t err;
899 
900 	vrrp_log(VRRP_DBG0, "vrrpd_initconf()");
901 
902 	if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
903 		vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
904 		    vrrpd_conffile);
905 		return;
906 	}
907 
908 	while (fgets(line, sizeof (line), fp) != NULL) {
909 		linenum++;
910 		conf.vvc_vrid = VRRP_VRID_NONE;
911 		if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
912 			vrrp_log(VRRP_ERR, "failed to parse %d line %s",
913 			    linenum, line);
914 			continue;
915 		}
916 
917 		/*
918 		 * Blank or comment line
919 		 */
920 		if (conf.vvc_vrid == VRRP_VRID_NONE)
921 			continue;
922 
923 		/*
924 		 * No need to update the configuration since the VRRP router
925 		 * created/enabled based on the existing configuration.
926 		 */
927 		if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
928 			vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
929 			    "%s", conf.vvc_name, vrrp_err2str(err));
930 			continue;
931 		}
932 
933 		if (conf.vvc_enabled &&
934 		    ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
935 		    VRRP_SUCCESS)) {
936 			vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
937 			    conf.vvc_name, vrrp_err2str(err));
938 		}
939 	}
940 
941 	(void) fclose(fp);
942 }
943 
944 /*
945  * Create the AF_UNIX socket used to communicate with libvrrpadm.
946  *
947  * This socket is used to receive the administrative request and
948  * send back the results.
949  */
950 static vrrp_err_t
951 vrrpd_cmdsock_create()
952 {
953 	iu_event_id_t		eid;
954 	struct sockaddr_un	laddr;
955 	int			sock, flags;
956 
957 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");
958 
959 	if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
960 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
961 		    "failed: %s", strerror(errno));
962 		return (VRRP_ESYS);
963 	}
964 
965 	/*
966 	 * Set it to be non-blocking.
967 	 */
968 	flags = fcntl(sock, F_GETFL, 0);
969 	(void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));
970 
971 	/*
972 	 * Unlink first in case a previous daemon instance exited ungracefully.
973 	 */
974 	(void) unlink(VRRPD_SOCKET);
975 
976 	bzero(&laddr, sizeof (laddr));
977 	laddr.sun_family = AF_UNIX;
978 	(void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
979 	if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
980 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
981 		    strerror(errno));
982 		(void) close(sock);
983 		return (VRRP_ESYS);
984 	}
985 
986 	if (listen(sock, 30) < 0) {
987 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
988 		    "failed: %s", strerror(errno));
989 		(void) close(sock);
990 		return (VRRP_ESYS);
991 	}
992 
993 	if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
994 	    vrrpd_cmdsock_handler, NULL)) == -1) {
995 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
996 		    " failed");
997 		(void) close(sock);
998 		return (VRRP_ESYS);
999 	}
1000 
1001 	vrrpd_cmdsock_fd = sock;
1002 	vrrpd_cmdsock_eid = eid;
1003 	return (VRRP_SUCCESS);
1004 }
1005 
1006 static void
1007 vrrpd_cmdsock_destroy()
1008 {
1009 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");
1010 
1011 	(void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
1012 	(void) close(vrrpd_cmdsock_fd);
1013 	vrrpd_cmdsock_fd = -1;
1014 	vrrpd_cmdsock_eid = -1;
1015 }
1016 
1017 /*
1018  * Create the PF_ROUTER sockets used to listen to the routing socket
1019  * messages and build the interface/IP address list. Create one for
1020  * each address family (IPv4 and IPv6).
1021  */
1022 static vrrp_err_t
1023 vrrpd_rtsock_create()
1024 {
1025 	int		i, flags, sock;
1026 	iu_event_id_t	eid;
1027 
1028 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");
1029 
1030 	for (i = 0; i < 2; i++) {
1031 		sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
1032 		if (sock == -1) {
1033 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
1034 			    "failed: %s", strerror(errno));
1035 			break;
1036 		}
1037 
1038 		/*
1039 		 * Set it to be non-blocking.
1040 		 */
1041 		if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
1042 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1043 			    "fcntl(F_GETFL) failed: %s", strerror(errno));
1044 			break;
1045 		}
1046 
1047 		if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
1048 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1049 			    "fcntl(F_SETFL) failed: %s", strerror(errno));
1050 			break;
1051 		}
1052 
1053 		if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
1054 		    vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
1055 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
1056 			    "rtsock %d(%s) failed", sock,
1057 			    af_str(vrrpd_rtsocks[i].vrt_af));
1058 			break;
1059 		}
1060 
1061 		vrrpd_rtsocks[i].vrt_fd = sock;
1062 		vrrpd_rtsocks[i].vrt_eid = eid;
1063 	}
1064 
1065 	if (i != 2) {
1066 		(void) close(sock);
1067 		vrrpd_rtsock_destroy();
1068 		return (VRRP_ESYS);
1069 	}
1070 
1071 	return (VRRP_SUCCESS);
1072 }
1073 
1074 static void
1075 vrrpd_rtsock_destroy()
1076 {
1077 	int		i;
1078 
1079 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
1080 	for (i = 0; i < 2; i++) {
1081 		(void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
1082 		    NULL);
1083 		(void) close(vrrpd_rtsocks[i].vrt_fd);
1084 		vrrpd_rtsocks[i].vrt_eid = -1;
1085 		vrrpd_rtsocks[i].vrt_fd = -1;
1086 	}
1087 }
1088 
1089 /*
1090  * Create the VRRP control socket used to bring up/down the virtual
1091  * IP addresses. It is also used to set the IFF_NOACCEPT flag of
1092  * the virtual IP addresses.
1093  */
1094 static vrrp_err_t
1095 vrrpd_ctlsock_create()
1096 {
1097 	int	s, s6;
1098 	int	on = _B_TRUE;
1099 
1100 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
1101 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
1102 		    "failed: %s", strerror(errno));
1103 		return (VRRP_ESYS);
1104 	}
1105 	if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1106 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1107 		    "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
1108 		(void) close(s);
1109 		return (VRRP_ESYS);
1110 	}
1111 
1112 	if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
1113 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
1114 		    "failed: %s", strerror(errno));
1115 		(void) close(s);
1116 		return (VRRP_ESYS);
1117 	}
1118 	if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1119 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1120 		    "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
1121 		(void) close(s);
1122 		(void) close(s6);
1123 		return (VRRP_ESYS);
1124 	}
1125 
1126 	vrrpd_ctlsock_fd = s;
1127 	vrrpd_ctlsock6_fd = s6;
1128 	return (VRRP_SUCCESS);
1129 }
1130 
1131 static void
1132 vrrpd_ctlsock_destroy()
1133 {
1134 	(void) close(vrrpd_ctlsock_fd);
1135 	vrrpd_ctlsock_fd = -1;
1136 	(void) close(vrrpd_ctlsock6_fd);
1137 	vrrpd_ctlsock6_fd = -1;
1138 }
1139 
1140 /*ARGSUSED*/
1141 static void
1142 vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
1143 {
1144 	vrrp_cmd_create_t	*cmd = (vrrp_cmd_create_t *)arg1;
1145 	vrrp_ret_create_t	*ret = (vrrp_ret_create_t *)arg2;
1146 	vrrp_err_t		err;
1147 
1148 	err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
1149 	if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
1150 		/*
1151 		 * No need to update the configuration since it is already
1152 		 * done in the above vrrpd_create() call
1153 		 */
1154 		err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
1155 		if (err != VRRP_SUCCESS)
1156 			(void) vrrpd_delete(cmd->vcc_conf.vvc_name);
1157 	}
1158 	ret->vrc_err = err;
1159 }
1160 
1161 /*ARGSUSED*/
1162 static void
1163 vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
1164 {
1165 	vrrp_cmd_delete_t	*cmd = (vrrp_cmd_delete_t *)arg1;
1166 	vrrp_ret_delete_t	*ret = (vrrp_ret_delete_t *)arg2;
1167 
1168 	ret->vrd_err = vrrpd_delete(cmd->vcd_name);
1169 }
1170 
1171 /*ARGSUSED*/
1172 static void
1173 vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
1174 {
1175 	vrrp_cmd_enable_t	*cmd = (vrrp_cmd_enable_t *)arg1;
1176 	vrrp_ret_enable_t	*ret = (vrrp_ret_enable_t *)arg2;
1177 
1178 	ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
1179 }
1180 
1181 /*ARGSUSED*/
1182 static void
1183 vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
1184 {
1185 	vrrp_cmd_disable_t	*cmd = (vrrp_cmd_disable_t *)arg1;
1186 	vrrp_ret_disable_t	*ret = (vrrp_ret_disable_t *)arg2;
1187 
1188 	ret->vrx_err = vrrpd_disable(cmd->vcx_name);
1189 }
1190 
1191 /*ARGSUSED*/
1192 static void
1193 vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
1194 {
1195 	vrrp_cmd_modify_t	*cmd = (vrrp_cmd_modify_t *)arg1;
1196 	vrrp_ret_modify_t	*ret = (vrrp_ret_modify_t *)arg2;
1197 
1198 	ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
1199 }
1200 
1201 static void
1202 vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
1203 {
1204 	vrrp_cmd_query_t	*cmd = (vrrp_cmd_query_t *)arg1;
1205 
1206 	vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
1207 }
1208 
1209 static void
1210 vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
1211 {
1212 	vrrp_cmd_list_t	*cmd = (vrrp_cmd_list_t *)arg1;
1213 
1214 	vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
1215 }
1216 
1217 /*
1218  * Write-type requeset must have the solaris.network.vrrp authorization.
1219  */
1220 static boolean_t
1221 vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
1222 {
1223 	ucred_t		*cred = NULL;
1224 	uid_t		uid;
1225 	struct passwd	*pw;
1226 	boolean_t	success = _B_FALSE;
1227 
1228 	vrrp_log(VRRP_DBG0, "vrrp_auth_check()");
1229 
1230 	if (!cinfo->vi_setop)
1231 		return (_B_TRUE);
1232 
1233 	/*
1234 	 * Validate the credential
1235 	 */
1236 	if (getpeerucred(connfd, &cred) == (uid_t)-1) {
1237 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
1238 		    "failed: %s", strerror(errno));
1239 		return (_B_FALSE);
1240 	}
1241 
1242 	if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
1243 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
1244 		    "failed: %s", strerror(errno));
1245 		goto done;
1246 	}
1247 
1248 	if ((pw = getpwuid(uid)) == NULL) {
1249 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
1250 		goto done;
1251 	}
1252 
1253 	success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);
1254 
1255 done:
1256 	ucred_free(cred);
1257 	return (success);
1258 }
1259 
1260 /*
1261  * Process the administrative request from libvrrpadm
1262  */
1263 /* ARGSUSED */
1264 static void
1265 vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
1266     void *arg)
1267 {
1268 	vrrp_cmd_info_t		*cinfo = NULL;
1269 	vrrp_err_t		err = VRRP_SUCCESS;
1270 	uchar_t			buf[BUFFSIZE], ackbuf[BUFFSIZE];
1271 	size_t			cursize, acksize, len;
1272 	uint32_t		cmd;
1273 	int			connfd, i;
1274 	struct sockaddr_in	from;
1275 	socklen_t		fromlen;
1276 
1277 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");
1278 
1279 	fromlen = (socklen_t)sizeof (from);
1280 	if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
1281 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
1282 		    strerror(errno));
1283 		return;
1284 	}
1285 
1286 	/*
1287 	 * First get the type of the request
1288 	 */
1289 	cursize = 0;
1290 	while (cursize < sizeof (uint32_t)) {
1291 		len = read(connfd, buf + cursize,
1292 		    sizeof (uint32_t) - cursize);
1293 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1294 			continue;
1295 		} else if (len > 0) {
1296 			cursize += len;
1297 			continue;
1298 		}
1299 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1300 		    "length");
1301 		(void) close(connfd);
1302 		return;
1303 	}
1304 
1305 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1306 	cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
1307 	for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
1308 		if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
1309 			cinfo = vrrp_cmd_info_tbl + i;
1310 			break;
1311 		}
1312 	}
1313 
1314 	if (cinfo == NULL) {
1315 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
1316 		    "type %d", cmd);
1317 		err = VRRP_EINVAL;
1318 		goto done;
1319 	}
1320 
1321 	/*
1322 	 * Get the rest of the request.
1323 	 */
1324 	assert(cursize == sizeof (uint32_t));
1325 	while (cursize < cinfo->vi_reqsize) {
1326 		len = read(connfd, buf + cursize,
1327 		    cinfo->vi_reqsize - cursize);
1328 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1329 			continue;
1330 		} else if (len > 0) {
1331 			cursize += len;
1332 			continue;
1333 		}
1334 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1335 		    "length");
1336 		err = VRRP_EINVAL;
1337 		goto done;
1338 	}
1339 
1340 	/*
1341 	 * Validate the authorization
1342 	 */
1343 	if (!vrrp_auth_check(connfd, cinfo)) {
1344 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
1345 		    "not sufficient authorization");
1346 		err = VRRP_EPERM;
1347 	}
1348 
1349 done:
1350 	/*
1351 	 * Ack the request
1352 	 */
1353 	if (err != 0) {
1354 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1355 		((vrrp_ret_t *)ackbuf)->vr_err = err;
1356 		acksize = sizeof (vrrp_ret_t);
1357 	} else {
1358 		/*
1359 		 * If the size of ack is varied, the cmdfunc callback
1360 		 * will set the right size.
1361 		 */
1362 		if ((acksize = cinfo->vi_acksize) == 0)
1363 			acksize = sizeof (ackbuf);
1364 
1365 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1366 		cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
1367 	}
1368 
1369 	/*
1370 	 * Send the ack back.
1371 	 */
1372 	cursize = 0;
1373 	while (cursize < acksize) {
1374 		len = sendto(connfd, ackbuf + cursize, acksize - cursize,
1375 		    0, (struct sockaddr *)&from, fromlen);
1376 		if (len == (size_t)-1 && errno == EAGAIN) {
1377 			continue;
1378 		} else if (len > 0) {
1379 			cursize += len;
1380 			continue;
1381 		} else {
1382 			vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
1383 			    "ack: %s", strerror(errno));
1384 			break;
1385 		}
1386 	}
1387 
1388 	(void) shutdown(connfd, SHUT_RDWR);
1389 	(void) close(connfd);
1390 }
1391 
1392 /*
1393  * Process the routing socket messages and update the interfaces/IP addresses
1394  * list
1395  */
1396 /* ARGSUSED */
1397 static void
1398 vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
1399     iu_event_id_t id, void *arg)
1400 {
1401 	char			buf[BUFFSIZE];
1402 	struct ifa_msghdr	*ifam;
1403 	int			nbytes;
1404 	int			af = *(int *)arg;
1405 	boolean_t		scanif = _B_FALSE;
1406 
1407 	for (;;) {
1408 		nbytes = read(s, buf, sizeof (buf));
1409 		if (nbytes <= 0) {
1410 			/* No more messages */
1411 			break;
1412 		}
1413 
1414 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1415 		ifam = (struct ifa_msghdr *)buf;
1416 		if (ifam->ifam_version != RTM_VERSION) {
1417 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
1418 			    "not understood", ifam->ifam_version);
1419 			break;
1420 		}
1421 
1422 		vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
1423 		    rtm_event2str(ifam->ifam_type));
1424 
1425 		switch (ifam->ifam_type) {
1426 		case RTM_FREEADDR:
1427 		case RTM_CHGADDR:
1428 		case RTM_NEWADDR:
1429 		case RTM_DELADDR:
1430 			/*
1431 			 * An IP address has been created/updated/deleted or
1432 			 * brought up/down, re-initilialize the interface/IP
1433 			 * address list.
1434 			 */
1435 			scanif = _B_TRUE;
1436 			break;
1437 		default:
1438 			/* Not interesting */
1439 			break;
1440 		}
1441 	}
1442 
1443 	if (scanif)
1444 		vrrpd_scan(af);
1445 }
1446 
1447 /*
1448  * Periodically scan the interface/IP addresses on the system.
1449  */
1450 /* ARGSUSED */
1451 static void
1452 vrrpd_scan_timer(iu_tq_t *tq, void *arg)
1453 {
1454 	vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
1455 	vrrpd_scan(AF_INET);
1456 	vrrpd_scan(AF_INET6);
1457 }
1458 
1459 /*
1460  * Get the list of the interface/IP addresses of the specified address
1461  * family.
1462  */
1463 static void
1464 vrrpd_scan(int af)
1465 {
1466 	vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));
1467 
1468 again:
1469 	vrrpd_init_ipcache(af);
1470 
1471 	/* If interface index changes, walk again. */
1472 	if (vrrpd_walk_addr_info(af) != IPADM_SUCCESS)
1473 		goto again;
1474 
1475 	vrrpd_update_ipcache(af);
1476 }
1477 
1478 /*
1479  * First mark all IP addresses of the specific address family to be removed.
1480  * This flag will then be cleared when we walk up all the IP addresses.
1481  */
1482 static void
1483 vrrpd_init_ipcache(int af)
1484 {
1485 	vrrp_intf_t	*intf, *next_intf;
1486 	vrrp_ip_t	*ip, *nextip;
1487 	char		abuf[INET6_ADDRSTRLEN];
1488 
1489 	vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));
1490 
1491 	next_intf = TAILQ_FIRST(&vrrp_intf_list);
1492 	while ((intf = next_intf) != NULL) {
1493 		next_intf = TAILQ_NEXT(intf, vvi_next);
1494 		if (intf->vvi_af != af)
1495 			continue;
1496 
1497 		/*
1498 		 * If the interface is still marked as new, it means that this
1499 		 * vrrpd_init_ipcache() call is a result of ifindex change,
1500 		 * which causes the re-walk of all the interfaces (see
1501 		 * vrrpd_add_ipaddr()), and some interfaces are still marked
1502 		 * as new during the last walk. In this case, delete this
1503 		 * interface with the "update_vr" argument to be _B_FALSE,
1504 		 * since no VRRP router has been assoicated with this
1505 		 * interface yet (the association is done in
1506 		 * vrrpd_update_ipcache()).
1507 		 *
1508 		 * This interface will be re-added later if it still exists.
1509 		 */
1510 		if (intf->vvi_state == NODE_STATE_NEW) {
1511 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
1512 			    "(%d), may be added later", intf->vvi_ifname,
1513 			    intf->vvi_ifindex);
1514 			vrrpd_delete_if(intf, _B_FALSE);
1515 			continue;
1516 		}
1517 
1518 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1519 		    ip = nextip) {
1520 			nextip = TAILQ_NEXT(ip, vip_next);
1521 			/* LINTED E_CONSTANT_CONDITION */
1522 			VRRPADDR2STR(af, &ip->vip_addr, abuf,
1523 			    INET6_ADDRSTRLEN, _B_FALSE);
1524 
1525 			if (ip->vip_state != NODE_STATE_NEW) {
1526 				vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
1527 				    "%s(%s/0x%x))", intf->vvi_ifname,
1528 				    intf->vvi_ifindex, ip->vip_lifname,
1529 				    abuf, ip->vip_flags);
1530 				ip->vip_state = NODE_STATE_STALE;
1531 				continue;
1532 			}
1533 
1534 			/*
1535 			 * If the IP is still marked as new, it means that
1536 			 * this vrrpd_init_ipcache() call is a result of
1537 			 * ifindex change, which causes the re-walk of all
1538 			 * the IP addresses (see vrrpd_add_ipaddr()).
1539 			 * Delete this IP.
1540 			 *
1541 			 * This IP will be readded later if it still exists.
1542 			 */
1543 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
1544 			    "%s/%d , %s(%s)", intf->vvi_ifname,
1545 			    intf->vvi_ifindex, ip->vip_lifname, abuf);
1546 			vrrpd_delete_ip(intf, ip);
1547 		}
1548 	}
1549 }
1550 
1551 /*
1552  * Walk all the IP addresses of the given family and update its
1553  * addresses list. Return IPADM_FAILURE if it is required to walk
1554  * all the interfaces again (one of the interface index changes in between).
1555  */
1556 static ipadm_status_t
1557 vrrpd_walk_addr_info(int af)
1558 {
1559 	ipadm_addr_info_t	*ainfo, *ainfop;
1560 	ipadm_status_t		ipstatus;
1561 	char			*lifname;
1562 	struct sockaddr_storage	stor;
1563 	vrrp_addr_t		*addr;
1564 	int			ifindex;
1565 	uint64_t		flags;
1566 
1567 	vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s)", af_str(af));
1568 
1569 	ipstatus = ipadm_addr_info(vrrp_ipadm_handle, NULL, &ainfo, 0, 0);
1570 	if (ipstatus != IPADM_SUCCESS) {
1571 		vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1572 		    "ipadm_addr_info() failed: %s",
1573 		    af_str(af), ipadm_status2str(ipstatus));
1574 		return (IPADM_SUCCESS);
1575 	}
1576 
1577 	for (ainfop = ainfo; ainfop != NULL; ainfop = IA_NEXT(ainfop)) {
1578 		if (ainfop->ia_ifa.ifa_addr->sa_family != af)
1579 			continue;
1580 
1581 		lifname = ainfop->ia_ifa.ifa_name;
1582 		flags = ainfop->ia_ifa.ifa_flags;
1583 		(void) memcpy(&stor, ainfop->ia_ifa.ifa_addr, sizeof (stor));
1584 		addr = (vrrp_addr_t *)&stor;
1585 
1586 		vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): %s",
1587 		    af_str(af), lifname);
1588 
1589 		/* Skip virtual/IPMP/P2P interfaces */
1590 		if (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) {
1591 			vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): "
1592 			    "skipped %s", af_str(af), lifname);
1593 			continue;
1594 		}
1595 
1596 		/* Filter out the all-zero IP address */
1597 		if (VRRPADDR_UNSPECIFIED(af, addr))
1598 			continue;
1599 
1600 		if ((ifindex = if_nametoindex(lifname)) == 0) {
1601 			if (errno != ENXIO && errno != ENOENT) {
1602 				vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1603 				    "if_nametoindex() failed for %s: %s",
1604 				    af_str(af), lifname, strerror(errno));
1605 			}
1606 			break;
1607 		}
1608 
1609 		/*
1610 		 * The interface is unplumbed/replumbed during the walk.  Try
1611 		 * to walk the IP addresses one more time.
1612 		 */
1613 		if (vrrpd_add_ipaddr(lifname, af, addr, ifindex, flags)
1614 		    == VRRP_EAGAIN) {
1615 			ipstatus = IPADM_FAILURE;
1616 			break;
1617 		}
1618 	}
1619 
1620 	ipadm_free_addr_info(ainfo);
1621 	return (ipstatus);
1622 }
1623 
1624 /*
1625  * Given the information of each IP address, update the interface and
1626  * IP addresses list
1627  */
1628 static vrrp_err_t
1629 vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
1630     uint64_t flags)
1631 {
1632 	char		ifname[LIFNAMSIZ], *c;
1633 	vrrp_intf_t	*intf;
1634 	vrrp_ip_t	*ip;
1635 	char		abuf[INET6_ADDRSTRLEN];
1636 	vrrp_err_t	err;
1637 
1638 	/* LINTED E_CONSTANT_CONDITION */
1639 	VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
1640 	vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
1641 	    abuf, ifindex, flags);
1642 
1643 	/*
1644 	 * Get the physical interface name from the logical interface name.
1645 	 */
1646 	(void) strlcpy(ifname, lifname, sizeof (ifname));
1647 	if ((c = strchr(ifname, ':')) != NULL)
1648 		*c = '\0';
1649 
1650 	if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
1651 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
1652 		err = vrrpd_create_if(ifname, af, ifindex, &intf);
1653 		if (err != VRRP_SUCCESS)
1654 			return (err);
1655 	} else if (intf->vvi_ifindex != ifindex) {
1656 		/*
1657 		 * If index changes, it means that this interface is
1658 		 * unplumbed/replumbed since we last checked. If this
1659 		 * interface is not used by any VRRP router, just
1660 		 * update its ifindex, and the IP addresses list will
1661 		 * be updated later. Otherwise, return EAGAIN to rewalk
1662 		 * all the IP addresses from the beginning.
1663 		 */
1664 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
1665 		    "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
1666 		if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
1667 			intf->vvi_ifindex = ifindex;
1668 		} else {
1669 			/*
1670 			 * delete this interface from the list if this
1671 			 * interface has already been assoicated with
1672 			 * any VRRP routers.
1673 			 */
1674 			vrrpd_delete_if(intf, _B_TRUE);
1675 			return (VRRP_EAGAIN);
1676 		}
1677 	}
1678 
1679 	/*
1680 	 * Does this IP address already exist?
1681 	 */
1682 	TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
1683 		if (strcmp(ip->vip_lifname, lifname) == 0)
1684 			break;
1685 	}
1686 
1687 	if (ip != NULL) {
1688 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
1689 		    lifname, abuf);
1690 		ip->vip_state = NODE_STATE_NONE;
1691 		ip->vip_flags = flags;
1692 		if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
1693 			/*
1694 			 * Address has been changed, mark it as new
1695 			 * If this address is already selected as the
1696 			 * primary IP address, the new IP will be checked
1697 			 * to see whether it is still qualified as the
1698 			 * primary IP address. If not, the primary IP
1699 			 * address will be reselected.
1700 			 */
1701 			(void) memcpy(&ip->vip_addr, addr,
1702 			    sizeof (vrrp_addr_t));
1703 
1704 			ip->vip_state = NODE_STATE_NEW;
1705 		}
1706 	} else {
1707 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
1708 		    lifname, abuf);
1709 
1710 		err = vrrpd_create_ip(intf, lifname, addr, flags);
1711 		if (err != VRRP_SUCCESS)
1712 			return (err);
1713 	}
1714 	return (VRRP_SUCCESS);
1715 }
1716 
1717 /*
1718  * Update the interface and IP addresses list. Remove the ones that have been
1719  * staled since last time we walk the IP addresses and updated the ones that
1720  * have been changed.
1721  */
1722 static void
1723 vrrpd_update_ipcache(int af)
1724 {
1725 	vrrp_intf_t	*intf, *nextif;
1726 	vrrp_ip_t	*ip, *nextip;
1727 	char		abuf[INET6_ADDRSTRLEN];
1728 	boolean_t	primary_selected;
1729 	boolean_t	primary_now_selected;
1730 	boolean_t	need_reenable = _B_FALSE;
1731 
1732 	vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));
1733 
1734 	nextif = TAILQ_FIRST(&vrrp_intf_list);
1735 	while ((intf = nextif) != NULL) {
1736 		nextif = TAILQ_NEXT(intf, vvi_next);
1737 		if (intf->vvi_af != af)
1738 			continue;
1739 
1740 		/*
1741 		 * Does the interface already select its primary IP address?
1742 		 */
1743 		primary_selected = (intf->vvi_pip != NULL);
1744 		assert(!primary_selected || IS_PRIMARY_INTF(intf));
1745 
1746 		/*
1747 		 * Removed the IP addresses that have been unconfigured.
1748 		 */
1749 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1750 		    ip = nextip) {
1751 			nextip = TAILQ_NEXT(ip, vip_next);
1752 			if (ip->vip_state != NODE_STATE_STALE)
1753 				continue;
1754 
1755 			/* LINTED E_CONSTANT_CONDITION */
1756 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1757 			    _B_FALSE);
1758 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
1759 			    "is removed over %s", abuf, intf->vvi_ifname);
1760 			vrrpd_delete_ip(intf, ip);
1761 		}
1762 
1763 		/*
1764 		 * No IP addresses left, delete this interface.
1765 		 */
1766 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1767 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1768 			    "no IP left over %s", intf->vvi_ifname);
1769 			vrrpd_delete_if(intf, _B_TRUE);
1770 			continue;
1771 		}
1772 
1773 		/*
1774 		 * If this is selected ss the physical interface for any
1775 		 * VRRP router, reselect the primary address if needed.
1776 		 */
1777 		if (IS_PRIMARY_INTF(intf)) {
1778 			vrrpd_reselect_primary(intf);
1779 			primary_now_selected = (intf->vvi_pip != NULL);
1780 
1781 			/*
1782 			 * Cannot find the new primary IP address.
1783 			 */
1784 			if (primary_selected && !primary_now_selected) {
1785 				vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
1786 				    "reselect primary IP on %s failed",
1787 				    intf->vvi_ifname);
1788 				vrrpd_remove_if(intf, _B_TRUE);
1789 			} else if (!primary_selected && primary_now_selected) {
1790 				/*
1791 				 * The primary IP address is successfully
1792 				 * selected on the physical interfacew we
1793 				 * need to walk through all the VRRP routers
1794 				 * that is created on this physical interface
1795 				 * and see whether they can now be enabled.
1796 				 */
1797 				need_reenable = _B_TRUE;
1798 			}
1799 		}
1800 
1801 		/*
1802 		 * For every new virtual IP address, bring up/down it based
1803 		 * on the state of VRRP router.
1804 		 *
1805 		 * Note that it is fine to not update the IP's vip_flags field
1806 		 * even if vrrpd_virtualip_updateone() changed the address's
1807 		 * up/down state, since the vip_flags field is only used for
1808 		 * select primary IP address over a physical interface, and
1809 		 * vrrpd_virtualip_updateone() only affects the virtual IP
1810 		 * address's status.
1811 		 */
1812 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1813 		    ip = nextip) {
1814 			nextip = TAILQ_NEXT(ip, vip_next);
1815 			/* LINTED E_CONSTANT_CONDITION */
1816 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1817 			    _B_FALSE);
1818 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1819 			    "IP %s over %s%s", abuf, intf->vvi_ifname,
1820 			    ip->vip_state == NODE_STATE_NEW ? " is new" : "");
1821 
1822 			if (IS_VIRTUAL_INTF(intf)) {
1823 				/*
1824 				 * If this IP is new, update its up/down state
1825 				 * based on the virtual interface's state
1826 				 * (which is determined by the VRRP router's
1827 				 * state). Otherwise, check only and prompt
1828 				 * warnings if its up/down state has been
1829 				 * changed.
1830 				 */
1831 				if (vrrpd_virtualip_updateone(intf, ip,
1832 				    ip->vip_state == NODE_STATE_NONE) !=
1833 				    VRRP_SUCCESS) {
1834 					vrrp_log(VRRP_DBG0,
1835 					    "vrrpd_update_ipcache(): "
1836 					    "IP %s over %s update failed", abuf,
1837 					    intf->vvi_ifname);
1838 					vrrpd_delete_ip(intf, ip);
1839 					continue;
1840 				}
1841 			}
1842 			ip->vip_state = NODE_STATE_NONE;
1843 		}
1844 
1845 		/*
1846 		 * The IP address is deleted when it is failed to be brought
1847 		 * up. If no IP addresses are left, delete this interface.
1848 		 */
1849 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1850 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1851 			    "no IP left over %s", intf->vvi_ifname);
1852 			vrrpd_delete_if(intf, _B_TRUE);
1853 			continue;
1854 		}
1855 
1856 		if (intf->vvi_state == NODE_STATE_NEW) {
1857 			/*
1858 			 * A new interface is found. This interface can be
1859 			 * the primary interface or the virtual VNIC
1860 			 * interface.  Again, we need to walk throught all
1861 			 * the VRRP routers to see whether some of them can
1862 			 * now be enabled because of the new primary IP
1863 			 * address or the new virtual IP addresses.
1864 			 */
1865 			intf->vvi_state = NODE_STATE_NONE;
1866 			need_reenable = _B_TRUE;
1867 		}
1868 	}
1869 
1870 	if (need_reenable)
1871 		vrrpd_reenable_all_vr();
1872 }
1873 
1874 /*
1875  * Reselect primary IP if:
1876  * - The existing primary IP is no longer qualified (removed or it is down or
1877  *   not a link-local IP for IPv6 VRRP router);
1878  * - This is a physical interface but no primary IP is chosen;
1879  */
1880 static void
1881 vrrpd_reselect_primary(vrrp_intf_t *intf)
1882 {
1883 	vrrp_ip_t	*ip;
1884 	char		abuf[INET6_ADDRSTRLEN];
1885 
1886 	assert(IS_PRIMARY_INTF(intf));
1887 
1888 	/*
1889 	 * If the interface's old primary IP address is still valid, return
1890 	 */
1891 	if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
1892 		return;
1893 
1894 	if (ip != NULL) {
1895 		/* LINTED E_CONSTANT_CONDITION */
1896 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1897 		    sizeof (abuf), _B_FALSE);
1898 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1899 		    "is no longer qualified", intf->vvi_ifname, abuf);
1900 	}
1901 
1902 	ip = vrrpd_select_primary(intf);
1903 	intf->vvi_pip = ip;
1904 
1905 	if (ip != NULL) {
1906 		/* LINTED E_CONSTANT_CONDITION */
1907 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1908 		    sizeof (abuf), _B_FALSE);
1909 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1910 		    "is selected", intf->vvi_ifname, abuf);
1911 	}
1912 }
1913 
1914 /*
1915  * Select the primary IP address. Since the link-local IP address is always
1916  * at the head of the IP address list, try to find the first UP IP address
1917  * and see whether it qualify.
1918  */
1919 static vrrp_ip_t *
1920 vrrpd_select_primary(vrrp_intf_t *pif)
1921 {
1922 	vrrp_ip_t	*pip;
1923 	char		abuf[INET6_ADDRSTRLEN];
1924 
1925 	vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);
1926 
1927 	TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
1928 		assert(pip->vip_state != NODE_STATE_STALE);
1929 
1930 		/* LINTED E_CONSTANT_CONDITION */
1931 		VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
1932 		    INET6_ADDRSTRLEN, _B_FALSE);
1933 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
1934 		    pif->vvi_ifname, abuf,
1935 		    (pip->vip_flags & IFF_UP) ? "up" : "down");
1936 
1937 		if (pip->vip_flags & IFF_UP)
1938 			break;
1939 	}
1940 
1941 	/*
1942 	 * Is this valid primary IP address?
1943 	 */
1944 	if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
1945 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
1946 		    pif->vvi_ifname, af_str(pif->vvi_af));
1947 		return (NULL);
1948 	}
1949 	return (pip);
1950 }
1951 
1952 /*
1953  * This is a new interface. Check whether any VRRP router is waiting for it
1954  */
1955 static void
1956 vrrpd_reenable_all_vr()
1957 {
1958 	vrrp_vr_t *vr;
1959 
1960 	vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");
1961 
1962 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1963 		if (vr->vvr_conf.vvc_enabled)
1964 			(void) vrrpd_enable_vr(vr);
1965 	}
1966 }
1967 
1968 /*
1969  * If primary_addr_gone is _B_TRUE, it means that we failed to select
1970  * the primary IP address on this (physical) interface; otherwise,
1971  * it means the interface is no longer available.
1972  */
1973 static void
1974 vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
1975 {
1976 	vrrp_vr_t *vr;
1977 
1978 	vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
1979 	    primary_addr_gone ? "primary address gone" : "interface deleted");
1980 
1981 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1982 		if (vr->vvr_conf.vvc_enabled)
1983 			vrrpd_disable_vr(vr, intf, primary_addr_gone);
1984 	}
1985 }
1986 
1987 /*
1988  * Update the VRRP configuration file based on the given configuration.
1989  * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
1990  */
1991 static vrrp_err_t
1992 vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
1993 {
1994 	vrrp_vr_conf_t	conf;
1995 	FILE		*fp, *nfp;
1996 	int		nfd;
1997 	char		line[LINE_MAX];
1998 	char		newfile[MAXPATHLEN];
1999 	boolean_t	found = _B_FALSE;
2000 	vrrp_err_t	err = VRRP_SUCCESS;
2001 
2002 	vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
2003 	    op == VRRP_CONF_UPDATE ? "update" : "delete");
2004 
2005 	if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
2006 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2007 		    vrrpd_conffile, strerror(errno));
2008 		return (VRRP_EDB);
2009 	}
2010 
2011 	(void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
2012 	if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
2013 	    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
2014 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2015 		    newfile, strerror(errno));
2016 		(void) fclose(fp);
2017 		return (VRRP_EDB);
2018 	}
2019 
2020 	if ((nfp = fdopen(nfd, "wF")) == NULL) {
2021 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
2022 		    newfile, strerror(errno));
2023 		goto done;
2024 	}
2025 
2026 	while (fgets(line, sizeof (line), fp) != NULL) {
2027 		conf.vvc_vrid = VRRP_VRID_NONE;
2028 		if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
2029 		    VRRP_SUCCESS) {
2030 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
2031 			    "configuration format: %s", line);
2032 			goto done;
2033 		}
2034 
2035 		/*
2036 		 * Write this line out if:
2037 		 * - this is a comment line; or
2038 		 * - we've done updating/deleting the the given VR; or
2039 		 * - if the name of the VR read from this line does not match
2040 		 *   the VR name that we are about to update/delete;
2041 		 */
2042 		if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
2043 		    strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
2044 			if (fputs(line, nfp) != EOF)
2045 				continue;
2046 
2047 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2048 			    "write line %s", line);
2049 			err = VRRP_EDB;
2050 			goto done;
2051 		}
2052 
2053 		/*
2054 		 * Otherwise, update/skip the line.
2055 		 */
2056 		found = _B_TRUE;
2057 		if (op == VRRP_CONF_DELETE)
2058 			continue;
2059 
2060 		assert(op == VRRP_CONF_UPDATE);
2061 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2062 		    newconf)) != VRRP_SUCCESS) {
2063 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2064 			    "update configuration for %s", newconf->vvc_name);
2065 			goto done;
2066 		}
2067 		if (fputs(line, nfp) == EOF) {
2068 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2069 			    "write line %s", line);
2070 			err = VRRP_EDB;
2071 			goto done;
2072 		}
2073 	}
2074 
2075 	/*
2076 	 * If we get to the end of the file and have not seen the router that
2077 	 * we are about to update, write it out.
2078 	 */
2079 	if (!found && op == VRRP_CONF_UPDATE) {
2080 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2081 		    newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
2082 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2083 			    "write line %s", line);
2084 			err = VRRP_EDB;
2085 		}
2086 	} else if (!found && op == VRRP_CONF_DELETE) {
2087 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
2088 		    "configuation for %s", newconf->vvc_name);
2089 		err = VRRP_ENOTFOUND;
2090 	}
2091 
2092 	if (err != VRRP_SUCCESS)
2093 		goto done;
2094 
2095 	if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
2096 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2097 		    "rename file %s", newfile);
2098 		err = VRRP_EDB;
2099 	}
2100 
2101 done:
2102 	(void) fclose(fp);
2103 	(void) fclose(nfp);
2104 	(void) unlink(newfile);
2105 	return (err);
2106 }
2107 
2108 static vrrp_err_t
2109 vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
2110 {
2111 	vrrp_prop_t	*prop;
2112 	int		n, i;
2113 
2114 	vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);
2115 
2116 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2117 		prop = &vrrp_prop_info_tbl[i];
2118 		n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
2119 		    prop->vs_propname);
2120 		if (n < 0 || n >= len)
2121 			break;
2122 		len -= n;
2123 		line += n;
2124 		n = prop->vs_propwrite(conf, line, len);
2125 		if (n < 0 || n >= len)
2126 			break;
2127 		len -= n;
2128 		line += n;
2129 	}
2130 	if (i != VRRP_PROP_INFO_TABSIZE) {
2131 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2132 		    "small", conf->vvc_name);
2133 		return (VRRP_EDB);
2134 	}
2135 	n = snprintf(line, len, "\n");
2136 	if (n < 0 || n >= len) {
2137 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2138 		    "small", conf->vvc_name);
2139 		return (VRRP_EDB);
2140 	}
2141 	return (VRRP_SUCCESS);
2142 }
2143 
2144 static vrrp_err_t
2145 vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
2146 {
2147 	char		*str, *token;
2148 	char		*next;
2149 	vrrp_err_t	err = VRRP_SUCCESS;
2150 	char		tmpbuf[MAXLINELEN];
2151 
2152 	str = tmpbuf;
2153 	(void) strlcpy(tmpbuf, line, MAXLINELEN);
2154 
2155 	/*
2156 	 * Skip leading spaces, blank lines, and comments.
2157 	 */
2158 	skip_whitespace(str);
2159 	if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
2160 		conf->vvc_vrid = VRRP_VRID_NONE;
2161 		return (VRRP_SUCCESS);
2162 	}
2163 
2164 	/*
2165 	 * Read each VR properties.
2166 	 */
2167 	for (token = strtok_r(str, " \n\t", &next); token != NULL;
2168 	    token = strtok_r(NULL, " \n\t", &next)) {
2169 		if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
2170 			break;
2171 	}
2172 
2173 	/* All properties read but no VRID defined */
2174 	if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
2175 		err = VRRP_EINVAL;
2176 
2177 	return (err);
2178 }
2179 
2180 static vrrp_err_t
2181 vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
2182 {
2183 	vrrp_prop_t	*prop;
2184 	char		*pstr;
2185 	int		i;
2186 
2187 	if ((pstr = strchr(str, '=')) == NULL) {
2188 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2189 		return (VRRP_EINVAL);
2190 	}
2191 
2192 	*pstr++ = '\0';
2193 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2194 		prop = &vrrp_prop_info_tbl[i];
2195 		if (strcasecmp(str, prop->vs_propname) == 0) {
2196 			if (prop->vs_propread(conf, pstr))
2197 				break;
2198 		}
2199 	}
2200 
2201 	if (i == VRRP_PROP_INFO_TABSIZE) {
2202 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2203 		return (VRRP_EINVAL);
2204 	}
2205 
2206 	return (VRRP_SUCCESS);
2207 }
2208 
2209 static boolean_t
2210 vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
2211 {
2212 	size_t size = sizeof (conf->vvc_name);
2213 	return (strlcpy(conf->vvc_name, str, size) < size);
2214 }
2215 
2216 static boolean_t
2217 vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
2218 {
2219 	conf->vvc_vrid = strtol(str, NULL, 0);
2220 	return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
2221 	    conf->vvc_vrid > VRRP_VRID_MAX ||
2222 	    (conf->vvc_vrid == 0 && errno != 0)));
2223 }
2224 
2225 static boolean_t
2226 vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
2227 {
2228 	if (strcasecmp(str, "AF_INET") == 0)
2229 		conf->vvc_af = AF_INET;
2230 	else if (strcasecmp(str, "AF_INET6") == 0)
2231 		conf->vvc_af = AF_INET6;
2232 	else
2233 		return (_B_FALSE);
2234 	return (_B_TRUE);
2235 }
2236 
2237 static boolean_t
2238 vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
2239 {
2240 	conf->vvc_pri = strtol(str, NULL, 0);
2241 	return (!(conf->vvc_pri < VRRP_PRI_MIN ||
2242 	    conf->vvc_pri > VRRP_PRI_OWNER ||
2243 	    (conf->vvc_pri == 0 && errno != 0)));
2244 }
2245 
2246 static boolean_t
2247 vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
2248 {
2249 	conf->vvc_adver_int = strtol(str, NULL, 0);
2250 	return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2251 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
2252 	    (conf->vvc_adver_int == 0 && errno != 0)));
2253 }
2254 
2255 static boolean_t
2256 vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
2257 {
2258 	if (strcasecmp(str, "true") == 0)
2259 		conf->vvc_preempt = _B_TRUE;
2260 	else if (strcasecmp(str, "false") == 0)
2261 		conf->vvc_preempt = _B_FALSE;
2262 	else
2263 		return (_B_FALSE);
2264 	return (_B_TRUE);
2265 }
2266 
2267 static boolean_t
2268 vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
2269 {
2270 	if (strcasecmp(str, "true") == 0)
2271 		conf->vvc_accept = _B_TRUE;
2272 	else if (strcasecmp(str, "false") == 0)
2273 		conf->vvc_accept = _B_FALSE;
2274 	else
2275 		return (_B_FALSE);
2276 	return (_B_TRUE);
2277 }
2278 
2279 static boolean_t
2280 vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
2281 {
2282 	if (strcasecmp(str, "enabled") == 0)
2283 		conf->vvc_enabled = _B_TRUE;
2284 	else if (strcasecmp(str, "disabled") == 0)
2285 		conf->vvc_enabled = _B_FALSE;
2286 	else
2287 		return (_B_FALSE);
2288 	return (_B_TRUE);
2289 }
2290 
2291 static boolean_t
2292 vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
2293 {
2294 	size_t size = sizeof (conf->vvc_link);
2295 	return (strlcpy(conf->vvc_link, str, size) < size);
2296 }
2297 
2298 static int
2299 vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
2300 {
2301 	return (snprintf(str, size, "%s", conf->vvc_name));
2302 }
2303 
2304 static int
2305 vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
2306 {
2307 	return (snprintf(str, size, "%d", conf->vvc_pri));
2308 }
2309 
2310 static int
2311 vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
2312 {
2313 	return (snprintf(str, size, "%d", conf->vvc_adver_int));
2314 }
2315 
2316 static int
2317 vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
2318 {
2319 	return (snprintf(str, size, "%s",
2320 	    conf->vvc_preempt ? "true" : "false"));
2321 }
2322 
2323 static int
2324 vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
2325 {
2326 	return (snprintf(str, size, "%s",
2327 	    conf->vvc_accept ? "true" : "false"));
2328 }
2329 
2330 static int
2331 vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
2332 {
2333 	return (snprintf(str, size, "%s",
2334 	    conf->vvc_enabled ? "enabled" : "disabled"));
2335 }
2336 
2337 static int
2338 vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
2339 {
2340 	return (snprintf(str, size, "%d", conf->vvc_vrid));
2341 }
2342 
2343 static int
2344 vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
2345 {
2346 	return (snprintf(str, size, "%s",
2347 	    conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
2348 }
2349 
2350 static int
2351 vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
2352 {
2353 	return (snprintf(str, size, "%s", conf->vvc_link));
2354 }
2355 
2356 static char *
2357 af_str(int af)
2358 {
2359 	if (af == 4 || af == AF_INET)
2360 		return ("AF_INET");
2361 	else if (af == 6 || af == AF_INET6)
2362 		return ("AF_INET6");
2363 	else if (af == AF_UNSPEC)
2364 		return ("AF_UNSPEC");
2365 	else
2366 		return ("AF_error");
2367 }
2368 
2369 static vrrp_err_t
2370 vrrpd_create_vr(vrrp_vr_conf_t *conf)
2371 {
2372 	vrrp_vr_t	*vr;
2373 
2374 	vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);
2375 
2376 	if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
2377 		vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
2378 		    " failed", conf->vvc_name);
2379 		return (VRRP_ENOMEM);
2380 	}
2381 
2382 	bzero(vr, sizeof (vrrp_vr_t));
2383 	vr->vvr_state = VRRP_STATE_NONE;
2384 	vr->vvr_timer_id = -1;
2385 	vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
2386 	(void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
2387 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2388 	TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
2389 	return (VRRP_SUCCESS);
2390 }
2391 
2392 static void
2393 vrrpd_delete_vr(vrrp_vr_t *vr)
2394 {
2395 	vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
2396 	if (vr->vvr_conf.vvc_enabled)
2397 		vrrpd_disable_vr(vr, NULL, _B_FALSE);
2398 	assert(vr->vvr_state == VRRP_STATE_INIT);
2399 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
2400 	TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
2401 	(void) free(vr);
2402 }
2403 
2404 static vrrp_err_t
2405 vrrpd_enable_vr(vrrp_vr_t *vr)
2406 {
2407 	vrrp_err_t	rx_err, tx_err, err = VRRP_EINVAL;
2408 
2409 	vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);
2410 
2411 	assert(vr->vvr_conf.vvc_enabled);
2412 
2413 	/*
2414 	 * This VRRP router has been successfully enabled and start
2415 	 * participating.
2416 	 */
2417 	if (vr->vvr_state != VRRP_STATE_INIT)
2418 		return (VRRP_SUCCESS);
2419 
2420 	if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
2421 		/*
2422 		 * Select the primary IP address. Even if this time
2423 		 * primary IP selection failed, we will reselect the
2424 		 * primary IP address when new IP address comes up.
2425 		 */
2426 		vrrpd_reselect_primary(vr->vvr_pif);
2427 		if (vr->vvr_pif->vvi_pip == NULL) {
2428 			vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
2429 			    "select_primary over %s failed",
2430 			    vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
2431 			rx_err = VRRP_ENOPRIM;
2432 		}
2433 	}
2434 
2435 	/*
2436 	 * Initialize the TX socket used for this vrrp_vr_t to send the
2437 	 * multicast packets.
2438 	 */
2439 	tx_err = vrrpd_init_txsock(vr);
2440 
2441 	/*
2442 	 * Only start the state transition if sockets for both RX and TX are
2443 	 * initialized correctly.
2444 	 */
2445 	if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
2446 		/*
2447 		 * Record the error information for diagnose purpose.
2448 		 */
2449 		vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
2450 		return (err);
2451 	}
2452 
2453 	if (vr->vvr_conf.vvc_pri == 255)
2454 		err = vrrpd_state_i2m(vr);
2455 	else
2456 		err = vrrpd_state_i2b(vr);
2457 
2458 	if (err != VRRP_SUCCESS) {
2459 		vr->vvr_err = err;
2460 		vr->vvr_pif->vvi_pip = NULL;
2461 		vrrpd_fini_txsock(vr);
2462 		vrrpd_fini_rxsock(vr);
2463 	}
2464 	return (err);
2465 }
2466 
2467 /*
2468  * Given the removed interface, see whether the given VRRP router would
2469  * be affected and stop participating the VRRP protocol.
2470  *
2471  * If intf is NULL, VR disabling request is coming from the admin.
2472  */
2473 static void
2474 vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
2475 {
2476 	vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
2477 	    intf == NULL ? "requested by admin" : intf->vvi_ifname,
2478 	    intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
2479 	    "interface deleted"));
2480 
2481 	/*
2482 	 * An interface is deleted, see whether this interface is the
2483 	 * physical interface or the VNIC of the given VRRP router.
2484 	 * If so, continue to disable the VRRP router.
2485 	 */
2486 	if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
2487 	    (intf != vr->vvr_vif)) {
2488 		return;
2489 	}
2490 
2491 	/*
2492 	 * If this is the case that the primary IP address is gone,
2493 	 * and we failed to reselect another primary IP address,
2494 	 * continue to disable the VRRP router.
2495 	 */
2496 	if (primary_addr_gone && intf != vr->vvr_pif)
2497 		return;
2498 
2499 	vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
2500 	    vr->vvr_conf.vvc_name);
2501 
2502 	if (vr->vvr_state == VRRP_STATE_MASTER) {
2503 		/*
2504 		 * If this router is disabled by the administrator, send
2505 		 * the zero-priority advertisement to indicate the Master
2506 		 * stops participating VRRP.
2507 		 */
2508 		if (intf == NULL)
2509 			(void) vrrpd_send_adv(vr, _B_TRUE);
2510 
2511 		vrrpd_state_m2i(vr);
2512 	} else  if (vr->vvr_state == VRRP_STATE_BACKUP) {
2513 		vrrpd_state_b2i(vr);
2514 	}
2515 
2516 	/*
2517 	 * If no primary IP address can be selected, the VRRP router
2518 	 * stays at the INIT state and will become BACKUP and MASTER when
2519 	 * a primary IP address is reselected.
2520 	 */
2521 	if (primary_addr_gone) {
2522 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
2523 		    "is removed", vr->vvr_conf.vvc_name);
2524 		vr->vvr_err = VRRP_ENOPRIM;
2525 	} else if (intf == NULL) {
2526 		/*
2527 		 * The VRRP router is disable by the administrator
2528 		 */
2529 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
2530 		    vr->vvr_conf.vvc_name);
2531 		vr->vvr_err = VRRP_SUCCESS;
2532 		vrrpd_fini_txsock(vr);
2533 		vrrpd_fini_rxsock(vr);
2534 	} else if (intf == vr->vvr_pif) {
2535 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
2536 		    "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2537 		vr->vvr_err = VRRP_ENOPRIM;
2538 		vrrpd_fini_rxsock(vr);
2539 	} else if (intf == vr->vvr_vif) {
2540 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
2541 		    " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2542 		vr->vvr_err = VRRP_ENOVIRT;
2543 		vrrpd_fini_txsock(vr);
2544 	}
2545 }
2546 
2547 vrrp_err_t
2548 vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
2549 {
2550 	vrrp_err_t	err = VRRP_SUCCESS;
2551 
2552 	vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
2553 	    conf->vvc_link, conf->vvc_vrid);
2554 
2555 	assert(conf != NULL);
2556 
2557 	/*
2558 	 * Sanity check
2559 	 */
2560 	if ((strlen(conf->vvc_name) == 0) ||
2561 	    (strlen(conf->vvc_link) == 0) ||
2562 	    (conf->vvc_vrid < VRRP_VRID_MIN ||
2563 	    conf->vvc_vrid > VRRP_VRID_MAX) ||
2564 	    (conf->vvc_pri < VRRP_PRI_MIN ||
2565 	    conf->vvc_pri > VRRP_PRI_OWNER) ||
2566 	    (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2567 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
2568 	    (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
2569 	    (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
2570 		vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
2571 		    conf->vvc_name);
2572 		return (VRRP_EINVAL);
2573 	}
2574 
2575 	if (!vrrp_valid_name(conf->vvc_name)) {
2576 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
2577 		    "name", conf->vvc_name);
2578 		return (VRRP_EINVALVRNAME);
2579 	}
2580 
2581 	if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
2582 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
2583 		    conf->vvc_name);
2584 		return (VRRP_EINSTEXIST);
2585 	}
2586 
2587 	if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
2588 	    conf->vvc_af) != NULL) {
2589 		vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
2590 		    "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
2591 		    conf->vvc_link);
2592 		return (VRRP_EVREXIST);
2593 	}
2594 
2595 	if (updateconf && (err = vrrpd_updateconf(conf,
2596 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2597 		vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
2598 		    "configuration for %s", conf->vvc_name);
2599 		return (err);
2600 	}
2601 
2602 	err = vrrpd_create_vr(conf);
2603 	if (err != VRRP_SUCCESS && updateconf)
2604 		(void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);
2605 
2606 	return (err);
2607 }
2608 
2609 static vrrp_err_t
2610 vrrpd_delete(const char *vn)
2611 {
2612 	vrrp_vr_t	*vr;
2613 	vrrp_err_t	err;
2614 
2615 	vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);
2616 
2617 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2618 		vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
2619 		return (VRRP_ENOTFOUND);
2620 	}
2621 
2622 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
2623 	if (err != VRRP_SUCCESS) {
2624 		vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
2625 		    "configuration for %s", vr->vvr_conf.vvc_name);
2626 		return (err);
2627 	}
2628 
2629 	vrrpd_delete_vr(vr);
2630 	return (VRRP_SUCCESS);
2631 }
2632 
2633 static vrrp_err_t
2634 vrrpd_enable(const char *vn, boolean_t updateconf)
2635 {
2636 	vrrp_vr_t		*vr;
2637 	vrrp_vr_conf_t		*conf;
2638 	uint32_t		flags;
2639 	datalink_class_t	class;
2640 	vrrp_err_t		err = VRRP_SUCCESS;
2641 
2642 	vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);
2643 
2644 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2645 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
2646 		return (VRRP_ENOTFOUND);
2647 	}
2648 
2649 	/*
2650 	 * The VR is already enabled.
2651 	 */
2652 	conf = &vr->vvr_conf;
2653 	if (conf->vvc_enabled) {
2654 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
2655 		    "enabled", vn);
2656 		return (VRRP_EALREADY);
2657 	}
2658 
2659 	/*
2660 	 * Check whether the link exists.
2661 	 */
2662 	if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
2663 	    conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
2664 	    !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
2665 	    (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR))) {
2666 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
2667 		    vn, conf->vvc_link);
2668 		return (VRRP_EINVALLINK);
2669 	}
2670 
2671 	/*
2672 	 * Get the associated VNIC name by the given interface/vrid/
2673 	 * address famitly.
2674 	 */
2675 	err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
2676 	    conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
2677 	    sizeof (vr->vvr_vnic));
2678 	if (err != VRRP_SUCCESS) {
2679 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
2680 		    "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
2681 		    conf->vvc_link);
2682 		err = VRRP_ENOVNIC;
2683 		goto fail;
2684 	}
2685 
2686 	/*
2687 	 * Find the right VNIC, primary interface and get the list of the
2688 	 * protected IP adressses and primary IP address. Note that if
2689 	 * either interface is NULL (no IP addresses configured over the
2690 	 * interface), we will still continue and mark this VRRP router
2691 	 * as "enabled".
2692 	 */
2693 	vr->vvr_conf.vvc_enabled = _B_TRUE;
2694 	if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
2695 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2696 		vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
2697 		    "configuration for %s", vr->vvr_conf.vvc_name);
2698 		goto fail;
2699 	}
2700 
2701 	/*
2702 	 * If vrrpd_setup_vr() fails, it is possible that there is no IP
2703 	 * addresses over ether the primary interface or the VNIC yet,
2704 	 * return success in this case, the VRRP router will stay in
2705 	 * the initialized state and start to work when the IP address is
2706 	 * configured.
2707 	 */
2708 	(void) vrrpd_enable_vr(vr);
2709 	return (VRRP_SUCCESS);
2710 
2711 fail:
2712 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2713 	vr->vvr_vnic[0] = '\0';
2714 	return (err);
2715 }
2716 
2717 static vrrp_err_t
2718 vrrpd_disable(const char *vn)
2719 {
2720 	vrrp_vr_t	*vr;
2721 	vrrp_err_t	err;
2722 
2723 	vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);
2724 
2725 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2726 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
2727 		return (VRRP_ENOTFOUND);
2728 	}
2729 
2730 	/*
2731 	 * The VR is already disable.
2732 	 */
2733 	if (!vr->vvr_conf.vvc_enabled) {
2734 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
2735 		return (VRRP_EALREADY);
2736 	}
2737 
2738 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2739 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2740 	if (err != VRRP_SUCCESS) {
2741 		vr->vvr_conf.vvc_enabled = _B_TRUE;
2742 		vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
2743 		    "configuration for %s", vr->vvr_conf.vvc_name);
2744 		return (err);
2745 	}
2746 
2747 	vrrpd_disable_vr(vr, NULL, _B_FALSE);
2748 	vr->vvr_vnic[0] = '\0';
2749 	return (VRRP_SUCCESS);
2750 }
2751 
2752 static vrrp_err_t
2753 vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
2754 {
2755 	vrrp_vr_t	*vr;
2756 	vrrp_vr_conf_t	savconf;
2757 	int		pri;
2758 	boolean_t	accept, set_accept = _B_FALSE;
2759 	vrrp_err_t	err;
2760 
2761 	vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);
2762 
2763 	if (mask == 0)
2764 		return (VRRP_SUCCESS);
2765 
2766 	if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
2767 		vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
2768 		    "VR instance: %s", conf->vvc_name);
2769 		return (VRRP_ENOTFOUND);
2770 	}
2771 
2772 	if (mask & VRRP_CONF_INTERVAL) {
2773 		if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2774 		    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
2775 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2776 			    "adver_interval %d", conf->vvc_name,
2777 			    conf->vvc_adver_int);
2778 			return (VRRP_EINVAL);
2779 		}
2780 	}
2781 
2782 	pri = vr->vvr_conf.vvc_pri;
2783 	if (mask & VRRP_CONF_PRIORITY) {
2784 		if (conf->vvc_pri < VRRP_PRI_MIN ||
2785 		    conf->vvc_pri > VRRP_PRI_OWNER) {
2786 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2787 			    "priority %d", conf->vvc_name, conf->vvc_pri);
2788 			return (VRRP_EINVAL);
2789 		}
2790 		pri = conf->vvc_pri;
2791 	}
2792 
2793 	accept = vr->vvr_conf.vvc_accept;
2794 	if (mask & VRRP_CONF_ACCEPT)
2795 		accept = conf->vvc_accept;
2796 
2797 	if (pri == VRRP_PRI_OWNER && !accept) {
2798 		vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
2799 		    "true for VRRP address owner", conf->vvc_name);
2800 		return (VRRP_EINVAL);
2801 	}
2802 
2803 	if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
2804 		err = vrrpd_set_noaccept(vr, !accept);
2805 		if (err != VRRP_SUCCESS) {
2806 			vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
2807 			    "updating failed: %s", conf->vvc_name,
2808 			    vrrp_err2str(err));
2809 			return (err);
2810 		}
2811 		set_accept = _B_TRUE;
2812 	}
2813 
2814 	/*
2815 	 * Save the current configuration, so it can be restored if the
2816 	 * following fails.
2817 	 */
2818 	(void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
2819 	if (mask & VRRP_CONF_PREEMPT)
2820 		vr->vvr_conf.vvc_preempt = conf->vvc_preempt;
2821 
2822 	if (mask & VRRP_CONF_ACCEPT)
2823 		vr->vvr_conf.vvc_accept = accept;
2824 
2825 	if (mask & VRRP_CONF_PRIORITY)
2826 		vr->vvr_conf.vvc_pri = pri;
2827 
2828 	if (mask & VRRP_CONF_INTERVAL)
2829 		vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;
2830 
2831 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2832 	if (err != VRRP_SUCCESS) {
2833 		vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
2834 		    "failed: %s", conf->vvc_name, vrrp_err2str(err));
2835 		if (set_accept)
2836 			(void) vrrpd_set_noaccept(vr, accept);
2837 		(void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
2838 		return (err);
2839 	}
2840 
2841 	if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
2842 		vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
2843 
2844 	if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
2845 		vr->vvr_timeout = conf->vvc_adver_int;
2846 
2847 	return (VRRP_SUCCESS);
2848 }
2849 
2850 static void
2851 vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
2852     size_t *sizep)
2853 {
2854 	vrrp_vr_t	*vr;
2855 	char		*p = (char *)ret + sizeof (vrrp_ret_list_t);
2856 	size_t		size = (*sizep) - sizeof (vrrp_ret_list_t);
2857 
2858 	vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));
2859 
2860 	ret->vrl_cnt = 0;
2861 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
2862 		if (vrid !=  VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
2863 			continue;
2864 
2865 		if (strlen(ifname) != 0 && strcmp(ifname,
2866 		    vr->vvr_conf.vvc_link) == 0) {
2867 			continue;
2868 		}
2869 
2870 		if ((af == AF_INET || af == AF_INET6) &&
2871 		    vr->vvr_conf.vvc_af != af)
2872 			continue;
2873 
2874 		if (size < VRRP_NAME_MAX) {
2875 			vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
2876 			    "small to hold %d router names", ret->vrl_cnt);
2877 			*sizep = sizeof (vrrp_ret_list_t);
2878 			ret->vrl_err = VRRP_ETOOSMALL;
2879 			return;
2880 		}
2881 		(void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
2882 		p += (strlen(vr->vvr_conf.vvc_name) + 1);
2883 		ret->vrl_cnt++;
2884 		size -= VRRP_NAME_MAX;
2885 	}
2886 
2887 	*sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
2888 	vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
2889 	ret->vrl_err = VRRP_SUCCESS;
2890 }
2891 
2892 static void
2893 vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
2894 {
2895 	vrrp_queryinfo_t	*infop;
2896 	vrrp_vr_t		*vr;
2897 	vrrp_intf_t		*vif;
2898 	vrrp_ip_t		*ip;
2899 	struct timeval		now;
2900 	uint32_t		vipcnt = 0;
2901 	size_t			size = *sizep;
2902 
2903 	vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);
2904 
2905 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2906 		vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
2907 		*sizep = sizeof (vrrp_ret_query_t);
2908 		ret->vrq_err = VRRP_ENOTFOUND;
2909 		return;
2910 	}
2911 
2912 	/*
2913 	 * Get the virtual IP list if the router is not in the INIT state.
2914 	 */
2915 	if (vr->vvr_state != VRRP_STATE_INIT) {
2916 		vif = vr->vvr_vif;
2917 		TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2918 			vipcnt++;
2919 		}
2920 	}
2921 
2922 	*sizep = sizeof (vrrp_ret_query_t);
2923 	*sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
2924 	if (*sizep > size) {
2925 		vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
2926 		    "%d virtual IPs", vipcnt);
2927 		*sizep = sizeof (vrrp_ret_query_t);
2928 		ret->vrq_err = VRRP_ETOOSMALL;
2929 		return;
2930 	}
2931 
2932 	(void) gettimeofday(&now, NULL);
2933 
2934 	bzero(ret, *sizep);
2935 	infop = &ret->vrq_qinfo;
2936 	(void) memcpy(&infop->show_vi,
2937 	    &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
2938 	(void) memcpy(&infop->show_vs,
2939 	    &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
2940 	(void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
2941 	infop->show_vt.vt_since_last_tran = timeval_to_milli(
2942 	    timeval_delta(now, vr->vvr_sinfo.vs_st_time));
2943 
2944 	if (vr->vvr_state == VRRP_STATE_INIT) {
2945 		ret->vrq_err = VRRP_SUCCESS;
2946 		return;
2947 	}
2948 
2949 	vipcnt = 0;
2950 	TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2951 		(void) memcpy(&infop->show_va.va_vips[vipcnt++],
2952 		    &ip->vip_addr, sizeof (vrrp_addr_t));
2953 	}
2954 	infop->show_va.va_vipcnt = vipcnt;
2955 
2956 	(void) memcpy(&infop->show_va.va_primary,
2957 	    &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));
2958 
2959 	(void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));
2960 
2961 	/*
2962 	 * Check whether there is a peer.
2963 	 */
2964 	if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
2965 	    &(vr->vvr_peer.vp_addr))) {
2966 		infop->show_vt.vt_since_last_adv = timeval_to_milli(
2967 		    timeval_delta(now, vr->vvr_peer.vp_time));
2968 	}
2969 
2970 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
2971 		infop->show_vt.vt_master_down_intv =
2972 		    MASTER_DOWN_INTERVAL_VR(vr);
2973 	}
2974 
2975 	ret->vrq_err = VRRP_SUCCESS;
2976 }
2977 
2978 /*
2979  * Build the VRRP packet (not including the IP header). Return the
2980  * payload length.
2981  *
2982  * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
2983  * advertisement which is sent by the Master to indicate that it has been
2984  * stopped participating in VRRP.
2985  */
2986 static size_t
2987 vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
2988 {
2989 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2990 	vrrp_pkt_t	*vp = (vrrp_pkt_t *)buf;
2991 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2992 	struct in_addr	*a4 = (struct in_addr *)(vp + 1);
2993 	/* LINTED E_BAD_PTR_CAST_ALIGN */
2994 	struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
2995 	vrrp_intf_t	*vif = vr->vvr_vif;
2996 	vrrp_ip_t	*vip;
2997 	int		af = vif->vvi_af;
2998 	size_t		size = sizeof (vrrp_pkt_t);
2999 	uint16_t	rsvd_adver_int;
3000 	int		nip = 0;
3001 
3002 	vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
3003 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
3004 	    vr->vvr_conf.vvc_adver_int);
3005 
3006 	TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
3007 		if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
3008 		    sizeof (struct in6_addr))) > buflen) {
3009 			vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
3010 			    "not big enough %d", vr->vvr_conf.vvc_name, size);
3011 			return (0);
3012 		}
3013 
3014 		if (af == AF_INET)
3015 			a4[nip++] = vip->vip_addr.in4.sin_addr;
3016 		else
3017 			a6[nip++] = vip->vip_addr.in6.sin6_addr;
3018 	}
3019 
3020 	if (nip == 0) {
3021 		vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
3022 		    "address", vr->vvr_conf.vvc_name);
3023 		return (0);
3024 	}
3025 
3026 	vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
3027 	vp->vp_vrid = vr->vvr_conf.vvc_vrid;
3028 	vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;
3029 
3030 	rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
3031 	vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
3032 	vp->vp_ipnum = nip;
3033 
3034 	/*
3035 	 * Set the checksum to 0 first, then caculate it.
3036 	 */
3037 	vp->vp_chksum = 0;
3038 	if (af == AF_INET) {
3039 		vp->vp_chksum = vrrp_cksum4(
3040 		    &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
3041 		    &vrrp_muladdr4.in4.sin_addr, size, vp);
3042 	} else {
3043 		vp->vp_chksum = vrrp_cksum6(
3044 		    &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3045 		    &vrrp_muladdr6.in6.sin6_addr, size, vp);
3046 	}
3047 
3048 	return (size);
3049 }
3050 
3051 /*
3052  * We need to build the IPv4 header on our own.
3053  */
3054 static vrrp_err_t
3055 vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3056 {
3057 	/* LINTED E_BAD_PTR_CAST_ALIGN */
3058 	struct ip *ip = (struct ip *)buf;
3059 	size_t plen;
3060 
3061 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);
3062 
3063 	if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
3064 	    len - sizeof (struct ip), zero_pri)) == 0) {
3065 		return (VRRP_ETOOSMALL);
3066 	}
3067 
3068 	ip->ip_hl = sizeof (struct ip) >> 2;
3069 	ip->ip_v = IPV4_VERSION;
3070 	ip->ip_tos = 0;
3071 	plen += sizeof (struct ip);
3072 	ip->ip_len = htons(plen);
3073 	ip->ip_off = 0;
3074 	ip->ip_ttl = VRRP_IP_TTL;
3075 	ip->ip_p = IPPROTO_VRRP;
3076 	ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
3077 	ip->ip_dst = vrrp_muladdr4.in4.sin_addr;
3078 
3079 	/*
3080 	 * The kernel will set the IP cksum and the IPv4 identification.
3081 	 */
3082 	ip->ip_id = 0;
3083 	ip->ip_sum = 0;
3084 
3085 	if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
3086 	    (const struct sockaddr *)&vrrp_muladdr4,
3087 	    sizeof (struct sockaddr_in))) != plen) {
3088 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
3089 		    "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
3090 		    vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
3091 		    af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
3092 		return (VRRP_ESYS);
3093 	}
3094 
3095 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
3096 	    vr->vvr_conf.vvc_name);
3097 	return (VRRP_SUCCESS);
3098 }
3099 
3100 static vrrp_err_t
3101 vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3102 {
3103 	struct msghdr msg6;
3104 	size_t hoplimit_space = 0;
3105 	size_t pktinfo_space = 0;
3106 	size_t bufspace = 0;
3107 	struct in6_pktinfo *pktinfop;
3108 	struct cmsghdr *cmsgp;
3109 	uchar_t *cmsg_datap;
3110 	struct iovec iov;
3111 	size_t plen;
3112 
3113 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);
3114 
3115 	if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
3116 		return (VRRP_ETOOSMALL);
3117 
3118 	msg6.msg_control = NULL;
3119 	msg6.msg_controllen = 0;
3120 
3121 	hoplimit_space = sizeof (int);
3122 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3123 	    hoplimit_space + _MAX_ALIGNMENT;
3124 
3125 	pktinfo_space = sizeof (struct in6_pktinfo);
3126 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3127 	    pktinfo_space + _MAX_ALIGNMENT;
3128 
3129 	/*
3130 	 * We need to temporarily set the msg6.msg_controllen to bufspace
3131 	 * (we will later trim it to actual length used). This is needed because
3132 	 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
3133 	 */
3134 	bufspace += sizeof (struct cmsghdr);
3135 	msg6.msg_controllen = bufspace;
3136 
3137 	msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
3138 	if (msg6.msg_control == NULL) {
3139 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
3140 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3141 		return (VRRP_ENOMEM);
3142 	}
3143 
3144 	cmsgp = CMSG_FIRSTHDR(&msg6);
3145 
3146 	cmsgp->cmsg_level = IPPROTO_IPV6;
3147 	cmsgp->cmsg_type = IPV6_HOPLIMIT;
3148 	cmsg_datap = CMSG_DATA(cmsgp);
3149 	/* LINTED */
3150 	*(int *)cmsg_datap = VRRP_IP_TTL;
3151 	cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
3152 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3153 
3154 	cmsgp->cmsg_level = IPPROTO_IPV6;
3155 	cmsgp->cmsg_type = IPV6_PKTINFO;
3156 	cmsg_datap = CMSG_DATA(cmsgp);
3157 
3158 	/* LINTED */
3159 	pktinfop = (struct in6_pktinfo *)cmsg_datap;
3160 	/*
3161 	 * We don't know if pktinfop->ipi6_addr is aligned properly,
3162 	 * therefore let's use bcopy, instead of assignment.
3163 	 */
3164 	(void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3165 	    &pktinfop->ipi6_addr, sizeof (struct in6_addr));
3166 
3167 	/*
3168 	 *  We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
3169 	 */
3170 	pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
3171 	cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
3172 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3173 	msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;
3174 
3175 	msg6.msg_name = &vrrp_muladdr6;
3176 	msg6.msg_namelen = sizeof (struct sockaddr_in6);
3177 
3178 	iov.iov_base = buf;
3179 	iov.iov_len = plen;
3180 	msg6.msg_iov = &iov;
3181 	msg6.msg_iovlen = 1;
3182 
3183 	if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
3184 	    (const struct msghdr *)&msg6, 0)) != plen) {
3185 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
3186 		    "%s expect %d sent %d", vr->vvr_conf.vvc_name,
3187 		    strerror(errno), plen, len);
3188 		(void) free(msg6.msg_control);
3189 		return (VRRP_ESYS);
3190 	}
3191 
3192 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
3193 	    vr->vvr_conf.vvc_name);
3194 	(void) free(msg6.msg_control);
3195 	return (VRRP_SUCCESS);
3196 }
3197 
3198 /*
3199  * Send the VRRP advertisement packets.
3200  */
3201 static vrrp_err_t
3202 vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
3203 {
3204 	uint64_t buf[(IP_MAXPACKET + 1)/8];
3205 
3206 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
3207 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");
3208 
3209 	assert(vr->vvr_pif->vvi_pip != NULL);
3210 
3211 	if (vr->vvr_pif->vvi_pip == NULL) {
3212 		vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
3213 		    "address", vr->vvr_conf.vvc_name);
3214 		return (VRRP_EINVAL);
3215 	}
3216 
3217 	if (vr->vvr_conf.vvc_af == AF_INET) {
3218 		return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
3219 		    sizeof (buf), zero_pri));
3220 	} else {
3221 		return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
3222 		    sizeof (buf), zero_pri));
3223 	}
3224 }
3225 
3226 static void
3227 vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
3228 {
3229 	vrrp_vr_conf_t *conf = &vr->vvr_conf;
3230 	char		peer[INET6_ADDRSTRLEN];
3231 	char		local[INET6_ADDRSTRLEN];
3232 	int		addr_cmp;
3233 	uint16_t	peer_adver_int;
3234 
3235 	/* LINTED E_CONSTANT_CONDITION */
3236 	VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
3237 	    _B_FALSE);
3238 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
3239 	    peer);
3240 
3241 	if (vr->vvr_state <= VRRP_STATE_INIT) {
3242 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
3243 		    "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
3244 		return;
3245 	}
3246 
3247 	peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);
3248 
3249 	/* LINTED E_CONSTANT_CONDITION */
3250 	VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
3251 	    local, INET6_ADDRSTRLEN, _B_FALSE);
3252 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
3253 	    "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
3254 	    vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
3255 	    vp->vp_prio, peer_adver_int);
3256 
3257 	addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
3258 	    &vr->vvr_pif->vvi_pip->vip_addr);
3259 	if (addr_cmp == 0) {
3260 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
3261 		    conf->vvc_name);
3262 		return;
3263 	} else if (conf->vvc_pri == vp->vp_prio) {
3264 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
3265 		    " than the local IP %s", conf->vvc_name, peer,
3266 		    addr_cmp > 0 ? "greater" : "less", local);
3267 	}
3268 
3269 	if (conf->vvc_pri == 255) {
3270 		vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
3271 		    "owner received advertisement from %s", conf->vvc_name,
3272 		    peer);
3273 		return;
3274 	}
3275 
3276 	(void) gettimeofday(&vr->vvr_peer_time, NULL);
3277 	(void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
3278 	vr->vvr_peer_prio = vp->vp_prio;
3279 	vr->vvr_peer_adver_int = peer_adver_int;
3280 
3281 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
3282 		vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
3283 		if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
3284 		    (conf->vvc_preempt == _B_FALSE ||
3285 		    vp->vp_prio >= conf->vvc_pri)) {
3286 			(void) iu_cancel_timer(vrrpd_timerq,
3287 			    vr->vvr_timer_id, NULL);
3288 			if (vp->vp_prio == VRRP_PRIO_ZERO) {
3289 				/* the master stops participating in VRRP */
3290 				vr->vvr_timeout = SKEW_TIME_VR(vr);
3291 			} else {
3292 				vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
3293 			}
3294 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3295 			    vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
3296 			    vr)) == -1) {
3297 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3298 				    "start vrrp_b2m_timeout(%d) failed",
3299 				    conf->vvc_name, vr->vvr_timeout);
3300 			} else {
3301 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3302 				    "start vrrp_b2m_timeout(%d)",
3303 				    conf->vvc_name, vr->vvr_timeout);
3304 			}
3305 		}
3306 	} else if (vr->vvr_state == VRRP_STATE_MASTER) {
3307 		if (vp->vp_prio == VRRP_PRIO_ZERO) {
3308 			(void) vrrpd_send_adv(vr, _B_FALSE);
3309 			(void) iu_cancel_timer(vrrpd_timerq,
3310 			    vr->vvr_timer_id, NULL);
3311 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3312 			    vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
3313 			    vr)) == -1) {
3314 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3315 				    "start vrrp_adv_timeout(%d) failed",
3316 				    conf->vvc_name, vr->vvr_timeout);
3317 			} else {
3318 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3319 				    "start vrrp_adv_timeout(%d)",
3320 				    conf->vvc_name, vr->vvr_timeout);
3321 			}
3322 		} else if (vp->vp_prio > conf->vvc_pri ||
3323 		    (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
3324 			(void) vrrpd_state_m2b(vr);
3325 		}
3326 	} else {
3327 		assert(_B_FALSE);
3328 	}
3329 }
3330 
3331 static vrrp_err_t
3332 vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
3333     vrrp_addr_t *from)
3334 {
3335 	vrrp_vr_t	*vr;
3336 	uint8_t		vers_type;
3337 	uint16_t	saved_cksum, cksum;
3338 	char		peer[INET6_ADDRSTRLEN];
3339 
3340 	/* LINTED E_CONSTANT_CONDITION */
3341 	VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
3342 	vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
3343 	    peer);
3344 
3345 	if (len < sizeof (vrrp_pkt_t)) {
3346 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
3347 		    "length %d", len);
3348 		return (VRRP_EINVAL);
3349 	}
3350 
3351 	/*
3352 	 * Verify: VRRP version number and packet type.
3353 	 */
3354 	vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
3355 	if (vers_type != VRRP_VERSION) {
3356 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
3357 		    "version %d", pif->vvi_ifname, vers_type);
3358 		return (VRRP_EINVAL);
3359 	}
3360 
3361 	if (vp->vp_ipnum == 0) {
3362 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
3363 		    pif->vvi_ifname);
3364 		return (VRRP_EINVAL);
3365 	}
3366 
3367 	if (len - sizeof (vrrp_pkt_t) !=
3368 	    vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
3369 	    sizeof (struct in6_addr))) {
3370 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
3371 		    " %d", pif->vvi_ifname, vp->vp_ipnum);
3372 		return (VRRP_EINVAL);
3373 	}
3374 
3375 	vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);
3376 
3377 	/*
3378 	 * verify: VRRP checksum. Note that vrrp_cksum returns network byte
3379 	 * order checksum value;
3380 	 */
3381 	saved_cksum = vp->vp_chksum;
3382 	vp->vp_chksum = 0;
3383 	if (pif->vvi_af == AF_INET) {
3384 		cksum = vrrp_cksum4(&from->in4.sin_addr,
3385 		    &vrrp_muladdr4.in4.sin_addr, len, vp);
3386 	} else {
3387 		cksum = vrrp_cksum6(&from->in6.sin6_addr,
3388 		    &vrrp_muladdr6.in6.sin6_addr, len, vp);
3389 	}
3390 
3391 	if (cksum != saved_cksum) {
3392 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
3393 		    "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
3394 		    cksum, saved_cksum);
3395 		return (VRRP_EINVAL);
3396 	}
3397 
3398 	if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
3399 	    pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
3400 		vrrpd_process_adv(vr, from, vp);
3401 	} else {
3402 		vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
3403 		    "not configured", pif->vvi_ifname, vp->vp_vrid,
3404 		    af_str(pif->vvi_af));
3405 	}
3406 	return (VRRP_SUCCESS);
3407 }
3408 
3409 /*
3410  * IPv4 socket, the IPv4 header is included.
3411  */
3412 static vrrp_err_t
3413 vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3414 {
3415 	char		abuf[INET6_ADDRSTRLEN];
3416 	struct ip	*ip;
3417 
3418 	vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
3419 	    pif->vvi_ifname, len);
3420 
3421 	ip = (struct ip *)msgp->msg_iov->iov_base;
3422 
3423 	/* Sanity check */
3424 	if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
3425 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
3426 		    "%d", pif->vvi_ifname, len);
3427 		return (VRRP_EINVAL);
3428 	}
3429 
3430 	assert(ip->ip_v == IPV4_VERSION);
3431 	assert(ip->ip_p == IPPROTO_VRRP);
3432 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in));
3433 
3434 	if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
3435 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3436 		    "destination %s", pif->vvi_ifname,
3437 		    inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
3438 		return (VRRP_EINVAL);
3439 	}
3440 
3441 	if (ip->ip_ttl != VRRP_IP_TTL) {
3442 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3443 		    "ttl %d", pif->vvi_ifname, ip->ip_ttl);
3444 		return (VRRP_EINVAL);
3445 	}
3446 
3447 	/*
3448 	 * Note that the ip_len contains only the IP payload length.
3449 	 */
3450 	return (vrrpd_process_vrrp(pif,
3451 	    /* LINTED E_BAD_PTR_CAST_ALIGN */
3452 	    (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
3453 	    (vrrp_addr_t *)msgp->msg_name));
3454 }
3455 
3456 /*
3457  * IPv6 socket, check the ancillary_data.
3458  */
3459 static vrrp_err_t
3460 vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3461 {
3462 	struct cmsghdr		*cmsgp;
3463 	uchar_t			*cmsg_datap;
3464 	struct in6_pktinfo	*pktinfop;
3465 	char			abuf[INET6_ADDRSTRLEN];
3466 	int			ttl;
3467 
3468 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
3469 	    pif->vvi_ifname, len);
3470 
3471 	/* Sanity check */
3472 	if (len < sizeof (vrrp_pkt_t)) {
3473 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
3474 		    "%d", pif->vvi_ifname, len);
3475 		return (VRRP_EINVAL);
3476 	}
3477 
3478 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));
3479 
3480 	for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
3481 	    cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
3482 		assert(cmsgp->cmsg_level == IPPROTO_IPV6);
3483 		cmsg_datap = CMSG_DATA(cmsgp);
3484 
3485 		switch (cmsgp->cmsg_type) {
3486 		case IPV6_HOPLIMIT:
3487 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3488 			if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
3489 				break;
3490 
3491 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3492 			    "ttl %d", pif->vvi_ifname, ttl);
3493 			return (VRRP_EINVAL);
3494 		case IPV6_PKTINFO:
3495 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3496 			pktinfop = (struct in6_pktinfo *)cmsg_datap;
3497 			if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
3498 			    &vrrp_muladdr6.in6.sin6_addr)) {
3499 				break;
3500 			}
3501 
3502 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3503 			    "destination %s", pif->vvi_ifname,
3504 			    inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
3505 			    sizeof (abuf)));
3506 			return (VRRP_EINVAL);
3507 		}
3508 	}
3509 
3510 	return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
3511 	    msgp->msg_name));
3512 }
3513 
3514 /* ARGSUSED */
3515 static void
3516 vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
3517     void *arg)
3518 {
3519 	struct msghdr		msg;
3520 	vrrp_addr_t		from;
3521 	uint64_t		buf[(IP_MAXPACKET + 1)/8];
3522 	uint64_t		ancillary_data[(IP_MAXPACKET + 1)/8];
3523 	vrrp_intf_t		*pif = arg;
3524 	int			af = pif->vvi_af;
3525 	int			len;
3526 	struct iovec		iov;
3527 
3528 	vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);
3529 
3530 	msg.msg_name = (struct sockaddr *)&from;
3531 	msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
3532 	    sizeof (struct sockaddr_in6);
3533 	iov.iov_base = (char *)buf;
3534 	iov.iov_len = sizeof (buf);
3535 	msg.msg_iov = &iov;
3536 	msg.msg_iovlen = 1;
3537 	msg.msg_control = ancillary_data;
3538 	msg.msg_controllen = sizeof (ancillary_data);
3539 
3540 	if ((len = recvmsg(s, &msg, 0)) == -1) {
3541 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
3542 		    "failed: %s", pif->vvi_ifname, strerror(errno));
3543 		return;
3544 	}
3545 
3546 	/*
3547 	 * Ignore packets whose control buffers that don't fit
3548 	 */
3549 	if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
3550 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
3551 		    "big enough", pif->vvi_ifname);
3552 		return;
3553 	}
3554 
3555 	if (af == AF_INET)
3556 		(void) vrrpd_process_adv_v4(pif, &msg, len);
3557 	else
3558 		(void) vrrpd_process_adv_v6(pif, &msg, len);
3559 }
3560 
3561 /*
3562  * Create the socket which is used to receive VRRP packets. Virtual routers
3563  * that configured on the same physical interface share the same socket.
3564  */
3565 static vrrp_err_t
3566 vrrpd_init_rxsock(vrrp_vr_t *vr)
3567 {
3568 	vrrp_intf_t *pif;	/* Physical interface used to recv packets */
3569 	struct group_req greq;
3570 	struct sockaddr_storage *muladdr;
3571 	int af, proto;
3572 	int on = 1;
3573 	vrrp_err_t err = VRRP_SUCCESS;
3574 
3575 	vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);
3576 
3577 	/*
3578 	 * The RX sockets may already been initialized.
3579 	 */
3580 	if ((pif = vr->vvr_pif) != NULL) {
3581 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
3582 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3583 		assert(pif->vvi_sockfd != -1);
3584 		return (VRRP_SUCCESS);
3585 	}
3586 
3587 	/*
3588 	 * If no IP addresses configured on the primary interface,
3589 	 * return failure.
3590 	 */
3591 	af = vr->vvr_conf.vvc_af;
3592 	pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
3593 	if (pif == NULL) {
3594 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
3595 		    "over %s/%s", vr->vvr_conf.vvc_name,
3596 		    vr->vvr_conf.vvc_link, af_str(af));
3597 		return (VRRP_ENOPRIM);
3598 	}
3599 
3600 	proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
3601 	if (pif->vvi_nvr++ == 0) {
3602 		assert(pif->vvi_sockfd < 0);
3603 		pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
3604 		if (pif->vvi_sockfd < 0) {
3605 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
3606 			    "failed %s", vr->vvr_conf.vvc_name,
3607 			    strerror(errno));
3608 			err = VRRP_ESYS;
3609 			goto done;
3610 		}
3611 
3612 		/*
3613 		 * Join the multicast group to receive VRRP packets.
3614 		 */
3615 		if (af == AF_INET) {
3616 			muladdr = (struct sockaddr_storage *)
3617 			    (void *)&vrrp_muladdr4;
3618 		} else {
3619 			muladdr = (struct sockaddr_storage *)
3620 			    (void *)&vrrp_muladdr6;
3621 		}
3622 
3623 		greq.gr_interface = pif->vvi_ifindex;
3624 		(void) memcpy(&greq.gr_group, muladdr,
3625 		    sizeof (struct sockaddr_storage));
3626 		if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
3627 		    sizeof (struct group_req)) < 0) {
3628 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3629 			    "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
3630 			    pif->vvi_ifindex, strerror(errno));
3631 			err = VRRP_ESYS;
3632 			goto done;
3633 		} else {
3634 			vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
3635 			    "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
3636 			    pif->vvi_ifindex);
3637 		}
3638 
3639 		/*
3640 		 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
3641 		 * when a packet is received. Call setsockopt() to receive such
3642 		 * information.
3643 		 */
3644 		if (af == AF_INET6) {
3645 			/*
3646 			 * Enable receipt of destination address info
3647 			 */
3648 			if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
3649 			    (char *)&on, sizeof (on)) < 0) {
3650 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3651 				    "enable recvpktinfo failed: %s",
3652 				    vr->vvr_conf.vvc_name, strerror(errno));
3653 				err = VRRP_ESYS;
3654 				goto done;
3655 			}
3656 
3657 			/*
3658 			 * Enable receipt of hoplimit info
3659 			 */
3660 			if (setsockopt(pif->vvi_sockfd, proto,
3661 			    IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
3662 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3663 				    "enable recvhoplimit failed: %s",
3664 				    vr->vvr_conf.vvc_name, strerror(errno));
3665 				err = VRRP_ESYS;
3666 				goto done;
3667 			}
3668 		}
3669 
3670 		if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
3671 		    pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
3672 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3673 			    "iu_register_event() failed",
3674 			    vr->vvr_conf.vvc_name);
3675 			err = VRRP_ESYS;
3676 			goto done;
3677 		}
3678 	} else {
3679 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
3680 		    "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
3681 		    pif->vvi_nvr);
3682 		assert(IS_PRIMARY_INTF(pif));
3683 	}
3684 
3685 done:
3686 	vr->vvr_pif = pif;
3687 	if (err != VRRP_SUCCESS)
3688 		vrrpd_fini_rxsock(vr);
3689 
3690 	return (err);
3691 }
3692 
3693 /*
3694  * Delete the socket which is used to receive VRRP packets for the given
3695  * VRRP router. Since all virtual routers that configured on the same
3696  * physical interface share the same socket, the socket is only closed
3697  * when the last VRRP router share this socket is deleted.
3698  */
3699 static void
3700 vrrpd_fini_rxsock(vrrp_vr_t *vr)
3701 {
3702 	vrrp_intf_t	*pif = vr->vvr_pif;
3703 
3704 	vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);
3705 
3706 	if (pif == NULL)
3707 		return;
3708 
3709 	if (--pif->vvi_nvr == 0) {
3710 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
3711 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3712 		(void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
3713 		(void) close(pif->vvi_sockfd);
3714 		pif->vvi_pip = NULL;
3715 		pif->vvi_sockfd = -1;
3716 		pif->vvi_eid = -1;
3717 	} else {
3718 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
3719 		    vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
3720 	}
3721 	vr->vvr_pif = NULL;
3722 }
3723 
3724 /*
3725  * Create the socket which is used to send VRRP packets. Further, set
3726  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3727  */
3728 static vrrp_err_t
3729 vrrpd_init_txsock(vrrp_vr_t *vr)
3730 {
3731 	int		af;
3732 	vrrp_intf_t	*vif;
3733 	vrrp_err_t	err;
3734 
3735 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);
3736 
3737 	if (vr->vvr_vif != NULL) {
3738 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
3739 		    vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
3740 		return (VRRP_SUCCESS);
3741 	}
3742 
3743 	af = vr->vvr_conf.vvc_af;
3744 	if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
3745 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
3746 		    "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
3747 		return (VRRP_ENOVIRT);
3748 	}
3749 
3750 	vr->vvr_vif = vif;
3751 	if (vr->vvr_conf.vvc_af == AF_INET)
3752 		err = vrrpd_init_txsock_v4(vr);
3753 	else
3754 		err = vrrpd_init_txsock_v6(vr);
3755 
3756 	if (err != VRRP_SUCCESS)
3757 		goto done;
3758 
3759 	/*
3760 	 * The interface should start with IFF_NOACCEPT flag not set, only
3761 	 * call this function when the VRRP router requires IFF_NOACCEPT.
3762 	 */
3763 	if (!vr->vvr_conf.vvc_accept)
3764 		err = vrrpd_set_noaccept(vr, _B_TRUE);
3765 
3766 done:
3767 	if (err != VRRP_SUCCESS) {
3768 		(void) close(vif->vvi_sockfd);
3769 		vif->vvi_sockfd = -1;
3770 		vr->vvr_vif = NULL;
3771 	}
3772 
3773 	return (err);
3774 }
3775 
3776 /*
3777  * Create the IPv4 socket which is used to send VRRP packets. Note that
3778  * the destination MAC address of VRRP advertisement must be the virtual
3779  * MAC address, so we specify the output interface to be the specific VNIC.
3780  */
3781 static vrrp_err_t
3782 vrrpd_init_txsock_v4(vrrp_vr_t *vr)
3783 {
3784 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3785 	vrrp_ip_t *vip;		/* The first IP over the VNIC */
3786 	int on = 1;
3787 	char off = 0;
3788 	vrrp_err_t err = VRRP_SUCCESS;
3789 	char abuf[INET6_ADDRSTRLEN];
3790 
3791 	vif = vr->vvr_vif;
3792 	assert(vr->vvr_conf.vvc_af == AF_INET);
3793 	assert(vif != NULL);
3794 
3795 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
3796 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3797 
3798 	if (vif->vvi_sockfd != -1) {
3799 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
3800 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3801 		return (VRRP_SUCCESS);
3802 	}
3803 
3804 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3805 	if (vif->vvi_sockfd < 0) {
3806 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
3807 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3808 		err = VRRP_ESYS;
3809 		goto done;
3810 	}
3811 
3812 	/*
3813 	 * Include the IP header, so that we can specify the IP address/ttl.
3814 	 */
3815 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
3816 	    sizeof (on)) < 0) {
3817 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
3818 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3819 		err = VRRP_ESYS;
3820 		goto done;
3821 	}
3822 
3823 	/*
3824 	 * Disable multicast loopback.
3825 	 */
3826 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
3827 	    sizeof (char)) == -1) {
3828 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
3829 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3830 		    strerror(errno));
3831 		err = VRRP_ESYS;
3832 		goto done;
3833 	}
3834 
3835 	vip = TAILQ_FIRST(&vif->vvi_iplist);
3836 	/* LINTED E_CONSTANT_CONDITION */
3837 	VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
3838 	    _B_FALSE);
3839 
3840 	/*
3841 	 * Set the output interface to send the VRRP packet.
3842 	 */
3843 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
3844 	    &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
3845 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3846 		    "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
3847 		err = VRRP_ESYS;
3848 	} else {
3849 		vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3850 		    "succeed", vr->vvr_conf.vvc_name, abuf);
3851 	}
3852 
3853 done:
3854 	if (err != VRRP_SUCCESS) {
3855 		(void) close(vif->vvi_sockfd);
3856 		vif->vvi_sockfd = -1;
3857 	}
3858 
3859 	return (err);
3860 }
3861 
3862 /*
3863  * Create the IPv6 socket which is used to send VRRP packets. Note that
3864  * the destination must be the virtual MAC address, so we specify the output
3865  * interface to be the specific VNIC.
3866  */
3867 static vrrp_err_t
3868 vrrpd_init_txsock_v6(vrrp_vr_t *vr)
3869 {
3870 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3871 	int off = 0, ttl = VRRP_IP_TTL;
3872 	vrrp_err_t err = VRRP_SUCCESS;
3873 
3874 	vif = vr->vvr_vif;
3875 	assert(vr->vvr_conf.vvc_af == AF_INET6);
3876 	assert(vif != NULL);
3877 
3878 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
3879 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3880 
3881 	if (vif->vvi_sockfd != -1) {
3882 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
3883 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3884 		return (VRRP_SUCCESS);
3885 	}
3886 
3887 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3888 	if (vif->vvi_sockfd < 0) {
3889 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
3890 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3891 		err = VRRP_ESYS;
3892 		goto done;
3893 	}
3894 
3895 	/*
3896 	 * Disable multicast loopback.
3897 	 */
3898 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
3899 	    &off, sizeof (int)) == -1) {
3900 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
3901 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3902 		    strerror(errno));
3903 		err = VRRP_ESYS;
3904 		goto done;
3905 	}
3906 
3907 	/*
3908 	 * Set the multicast TTL.
3909 	 */
3910 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
3911 	    &ttl, sizeof (int)) == -1) {
3912 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
3913 		    "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
3914 		    ttl, strerror(errno));
3915 		err = VRRP_ESYS;
3916 		goto done;
3917 	}
3918 
3919 	/*
3920 	 * Set the output interface to send the VRRP packet.
3921 	 */
3922 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
3923 	    &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
3924 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
3925 		    "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
3926 		    strerror(errno));
3927 		err = VRRP_ESYS;
3928 	} else {
3929 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
3930 		    " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
3931 	}
3932 
3933 done:
3934 	if (err != VRRP_SUCCESS) {
3935 		(void) close(vif->vvi_sockfd);
3936 		vif->vvi_sockfd = -1;
3937 	}
3938 
3939 	return (err);
3940 }
3941 
3942 /*
3943  * Delete the socket which is used to send VRRP packets. Further, clear
3944  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3945  */
3946 static void
3947 vrrpd_fini_txsock(vrrp_vr_t *vr)
3948 {
3949 	vrrp_intf_t *vif = vr->vvr_vif;
3950 
3951 	vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);
3952 
3953 	if (vif != NULL) {
3954 		if (!vr->vvr_conf.vvc_accept)
3955 			(void) vrrpd_set_noaccept(vr, _B_FALSE);
3956 		(void) close(vif->vvi_sockfd);
3957 		vif->vvi_sockfd = -1;
3958 		vr->vvr_vif = NULL;
3959 	}
3960 }
3961 
3962 /*
3963  * Given the the pseudo header cksum value (sum), caculate the cksum with
3964  * the rest of VRRP packet.
3965  */
3966 static uint16_t
3967 in_cksum(int sum, uint16_t plen, void *p)
3968 {
3969 	int nleft;
3970 	uint16_t *w;
3971 	uint16_t answer;
3972 	uint16_t odd_byte = 0;
3973 
3974 	nleft = plen;
3975 	w = (uint16_t *)p;
3976 	while (nleft > 1) {
3977 		sum += *w++;
3978 		nleft -= 2;
3979 	}
3980 
3981 	/* mop up an odd byte, if necessary */
3982 	if (nleft == 1) {
3983 		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
3984 		sum += odd_byte;
3985 	}
3986 
3987 	/*
3988 	 * add back carry outs from top 16 bits to low 16 bits
3989 	 */
3990 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
3991 	sum += (sum >> 16);			/* add carry */
3992 	answer = ~sum;				/* truncate to 16 bits */
3993 	return (answer == 0 ? ~0 : answer);
3994 }
3995 
3996 /* Pseudo header for v4 */
3997 struct pshv4 {
3998 	struct in_addr	ph4_src;
3999 	struct in_addr	ph4_dst;
4000 	uint8_t		ph4_zero;	/* always zero */
4001 	uint8_t		ph4_protocol;	/* protocol used, IPPROTO_VRRP */
4002 	uint16_t	ph4_len;	/* VRRP payload len */
4003 };
4004 
4005 /*
4006  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4007  * packet length (in the host byte order), and both IP source and destination
4008  * addresses are in the network byte order.
4009  */
4010 static uint16_t
4011 vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
4012     vrrp_pkt_t *vp)
4013 {
4014 	struct pshv4 ph4;
4015 	int nleft;
4016 	uint16_t *w;
4017 	int sum = 0;
4018 
4019 	ph4.ph4_src = *src;
4020 	ph4.ph4_dst = *dst;
4021 	ph4.ph4_zero = 0;
4022 	ph4.ph4_protocol = IPPROTO_VRRP;
4023 	ph4.ph4_len = htons(plen);
4024 
4025 	/*
4026 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4027 	 *  we add sequential 16 bit words to it, and at the end, fold
4028 	 *  back all the carry bits from the top 16 bits into the lower
4029 	 *  16 bits.
4030 	 */
4031 	nleft = sizeof (struct pshv4);
4032 	w = (uint16_t *)&ph4;
4033 	while (nleft > 0) {
4034 		sum += *w++;
4035 		nleft -= 2;
4036 	}
4037 
4038 	return (in_cksum(sum, plen, vp));
4039 }
4040 
4041 /* Pseudo header for v6 */
4042 struct pshv6 {
4043 	struct in6_addr	ph6_src;
4044 	struct in6_addr	ph6_dst;
4045 	uint32_t	ph6_len;	/* VRRP payload len */
4046 	uint32_t	ph6_zero : 24,
4047 			ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
4048 };
4049 
4050 /*
4051  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4052  * packet length (in the host byte order), and both IP source and destination
4053  * addresses are in the network byte order.
4054  */
4055 static uint16_t
4056 vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
4057     vrrp_pkt_t *vp)
4058 {
4059 	struct pshv6 ph6;
4060 	int nleft;
4061 	uint16_t *w;
4062 	int sum = 0;
4063 
4064 	ph6.ph6_src = *src;
4065 	ph6.ph6_dst = *dst;
4066 	ph6.ph6_zero = 0;
4067 	ph6.ph6_protocol = IPPROTO_VRRP;
4068 	ph6.ph6_len = htonl((uint32_t)plen);
4069 
4070 	/*
4071 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4072 	 *  we add sequential 16 bit words to it, and at the end, fold
4073 	 *  back all the carry bits from the top 16 bits into the lower
4074 	 *  16 bits.
4075 	 */
4076 	nleft = sizeof (struct pshv6);
4077 	w = (uint16_t *)&ph6;
4078 	while (nleft > 0) {
4079 		sum += *w++;
4080 		nleft -= 2;
4081 	}
4082 
4083 	return (in_cksum(sum, plen, vp));
4084 }
4085 
4086 vrrp_err_t
4087 vrrpd_state_i2m(vrrp_vr_t *vr)
4088 {
4089 	vrrp_err_t	err;
4090 
4091 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);
4092 
4093 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
4094 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4095 		return (err);
4096 
4097 	(void) vrrpd_send_adv(vr, _B_FALSE);
4098 
4099 	vr->vvr_err = VRRP_SUCCESS;
4100 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4101 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4102 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4103 		vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
4104 		return (VRRP_ESYS);
4105 	} else {
4106 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
4107 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4108 		    vr->vvr_timeout);
4109 	}
4110 	return (VRRP_SUCCESS);
4111 }
4112 
4113 vrrp_err_t
4114 vrrpd_state_i2b(vrrp_vr_t *vr)
4115 {
4116 	vrrp_err_t	err;
4117 
4118 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);
4119 
4120 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
4121 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4122 		return (err);
4123 
4124 	/*
4125 	 * Reinitialize the Master advertisement interval to be the configured
4126 	 * value.
4127 	 */
4128 	vr->vvr_err = VRRP_SUCCESS;
4129 	vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
4130 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4131 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4132 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4133 		vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
4134 		return (VRRP_ESYS);
4135 	} else {
4136 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
4137 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4138 		    vr->vvr_timeout);
4139 	}
4140 	return (VRRP_SUCCESS);
4141 }
4142 
4143 void
4144 vrrpd_state_m2i(vrrp_vr_t *vr)
4145 {
4146 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);
4147 
4148 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
4149 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4150 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4151 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4152 }
4153 
4154 void
4155 vrrpd_state_b2i(vrrp_vr_t *vr)
4156 {
4157 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);
4158 
4159 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4160 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4161 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
4162 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4163 }
4164 
4165 /* ARGSUSED */
4166 static void
4167 vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
4168 {
4169 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4170 
4171 	vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
4172 	(void) vrrpd_state_b2m(vr);
4173 }
4174 
4175 /* ARGSUSED */
4176 static void
4177 vrrp_adv_timeout(iu_tq_t *tq, void *arg)
4178 {
4179 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4180 
4181 	vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);
4182 
4183 	(void) vrrpd_send_adv(vr, _B_FALSE);
4184 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4185 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4186 		vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
4187 		    vr->vvr_conf.vvc_name);
4188 	} else {
4189 		vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
4190 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4191 		    vr->vvr_timeout);
4192 	}
4193 }
4194 
4195 vrrp_err_t
4196 vrrpd_state_b2m(vrrp_vr_t *vr)
4197 {
4198 	vrrp_err_t	err;
4199 
4200 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);
4201 
4202 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
4203 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4204 		return (err);
4205 	(void) vrrpd_send_adv(vr, _B_FALSE);
4206 
4207 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4208 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4209 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4210 		vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
4211 		    vr->vvr_conf.vvc_name);
4212 		return (VRRP_ESYS);
4213 	} else {
4214 		vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
4215 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4216 		    vr->vvr_timeout);
4217 	}
4218 	return (VRRP_SUCCESS);
4219 }
4220 
4221 vrrp_err_t
4222 vrrpd_state_m2b(vrrp_vr_t *vr)
4223 {
4224 	vrrp_err_t	err;
4225 
4226 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);
4227 
4228 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
4229 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4230 		return (err);
4231 
4232 	/*
4233 	 * Cancel the adver_timer.
4234 	 */
4235 	vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
4236 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4237 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4238 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4239 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4240 		vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
4241 		    vr->vvr_conf.vvc_name);
4242 	} else {
4243 		vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
4244 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4245 		    vr->vvr_timeout);
4246 	}
4247 	return (VRRP_SUCCESS);
4248 }
4249 
4250 /*
4251  * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
4252  * based on its access mode.
4253  */
4254 static vrrp_err_t
4255 vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
4256 {
4257 	vrrp_intf_t *vif = vr->vvr_vif;
4258 	uint64_t curr_flags;
4259 	struct lifreq lifr;
4260 	int s;
4261 
4262 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4263 	    vr->vvr_conf.vvc_name, on ? "on" : "off");
4264 
4265 	/*
4266 	 * Possibly no virtual address exists on this VRRP router yet.
4267 	 */
4268 	if (vif == NULL)
4269 		return (VRRP_SUCCESS);
4270 
4271 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4272 	    vif->vvi_ifname, vrrp_state2str(vr->vvr_state));
4273 
4274 	s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4275 	(void) strncpy(lifr.lifr_name, vif->vvi_ifname,
4276 	    sizeof (lifr.lifr_name));
4277 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4278 		if (errno != ENXIO && errno != ENOENT) {
4279 			vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
4280 			    "SIOCGLIFFLAGS on %s failed: %s",
4281 			    vif->vvi_ifname, strerror(errno));
4282 		}
4283 		return (VRRP_ESYS);
4284 	}
4285 
4286 	curr_flags = lifr.lifr_flags;
4287 	if (on)
4288 		lifr.lifr_flags |= IFF_NOACCEPT;
4289 	else
4290 		lifr.lifr_flags &= ~IFF_NOACCEPT;
4291 
4292 	if (lifr.lifr_flags != curr_flags) {
4293 		if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4294 			if (errno != ENXIO && errno != ENOENT) {
4295 				vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
4296 				    "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
4297 				    on ? "no_accept" : "accept",
4298 				    lifr.lifr_flags, vif->vvi_ifname,
4299 				    strerror(errno));
4300 			}
4301 			return (VRRP_ESYS);
4302 		}
4303 	}
4304 	return (VRRP_SUCCESS);
4305 }
4306 
4307 static vrrp_err_t
4308 vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
4309 {
4310 	vrrp_state_t	state = vif->vvi_vr_state;
4311 	struct lifreq	lifr;
4312 	char		abuf[INET6_ADDRSTRLEN];
4313 	int		af = vif->vvi_af;
4314 	uint64_t	curr_flags;
4315 	int		s;
4316 
4317 	assert(IS_VIRTUAL_INTF(vif));
4318 
4319 	/* LINTED E_CONSTANT_CONDITION */
4320 	VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
4321 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
4322 	    vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");
4323 
4324 	s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4325 	(void) strncpy(lifr.lifr_name, ip->vip_lifname,
4326 	    sizeof (lifr.lifr_name));
4327 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4328 		if (errno != ENXIO && errno != ENOENT) {
4329 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
4330 			    "SIOCGLIFFLAGS on %s/%s failed: %s",
4331 			    vif->vvi_ifname, lifr.lifr_name, abuf,
4332 			    strerror(errno));
4333 		}
4334 		return (VRRP_ESYS);
4335 	}
4336 
4337 	curr_flags = lifr.lifr_flags;
4338 	if (state == VRRP_STATE_MASTER)
4339 		lifr.lifr_flags |= IFF_UP;
4340 	else
4341 		lifr.lifr_flags &= ~IFF_UP;
4342 
4343 	if (lifr.lifr_flags == curr_flags)
4344 		return (VRRP_SUCCESS);
4345 
4346 	if (checkonly) {
4347 		vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
4348 		    ip->vip_lifname, abuf,
4349 		    state == VRRP_STATE_MASTER ? "down" : "up");
4350 		return (VRRP_ESYS);
4351 	} else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4352 		if (errno != ENXIO && errno != ENOENT) {
4353 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
4354 			    "bring %s %s/%s failed: %s",
4355 			    vif->vvi_ifname, vrrp_state2str(state),
4356 			    state == VRRP_STATE_MASTER ? "up" : "down",
4357 			    ip->vip_lifname, abuf, strerror(errno));
4358 		}
4359 		return (VRRP_ESYS);
4360 	}
4361 	return (VRRP_SUCCESS);
4362 }
4363 
4364 static vrrp_err_t
4365 vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
4366 {
4367 	vrrp_state_t		state;
4368 	vrrp_intf_t		*vif = vr->vvr_vif;
4369 	vrrp_ip_t		*ip, *nextip;
4370 	char			abuf[INET6_ADDRSTRLEN];
4371 	vrrp_err_t		err;
4372 
4373 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
4374 	    vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
4375 	    vif->vvi_ifname, checkonly ? " checkonly" : "");
4376 
4377 	state = vr->vvr_state;
4378 	assert(vif != NULL);
4379 	assert(IS_VIRTUAL_INTF(vif));
4380 	assert(vif->vvi_vr_state != state);
4381 	vif->vvi_vr_state = state;
4382 	for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
4383 		nextip = TAILQ_NEXT(ip, vip_next);
4384 		err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
4385 		if (!checkonly && err != VRRP_SUCCESS) {
4386 			/* LINTED E_CONSTANT_CONDITION */
4387 			VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
4388 			    INET6_ADDRSTRLEN, _B_FALSE);
4389 			vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
4390 			    "%s over %s failed", abuf, vif->vvi_ifname);
4391 			vrrpd_delete_ip(vif, ip);
4392 		}
4393 	}
4394 
4395 	/*
4396 	 * The IP address is deleted when it is failed to be brought
4397 	 * up. If no IP addresses are left, delete this interface.
4398 	 */
4399 	if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
4400 		vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
4401 		    "no IP left over %s", vif->vvi_ifname);
4402 		vrrpd_delete_if(vif, _B_TRUE);
4403 		return (VRRP_ENOVIRT);
4404 	}
4405 	return (VRRP_SUCCESS);
4406 }
4407 
4408 void
4409 vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
4410 {
4411 	vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
4412 	    vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));
4413 
4414 	assert(vr->vvr_state == prev_s);
4415 	vr->vvr_state = s;
4416 	vr->vvr_prev_state = prev_s;
4417 	(void) gettimeofday(&vr->vvr_st_time, NULL);
4418 	(void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
4419 }
4420 
4421 static int
4422 vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
4423 {
4424 	sysevent_id_t	eid;
4425 	nvlist_t	*nvl = NULL;
4426 
4427 	/*
4428 	 * sysevent is not supported in the non-global zone
4429 	 */
4430 	if (getzoneid() != GLOBAL_ZONEID)
4431 		return (0);
4432 
4433 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
4434 		goto failed;
4435 
4436 	if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
4437 	    VRRP_EVENT_CUR_VERSION) != 0)
4438 		goto failed;
4439 
4440 	if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
4441 		goto failed;
4442 
4443 	if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
4444 		goto failed;
4445 
4446 	if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
4447 		goto failed;
4448 
4449 	if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
4450 	    SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
4451 		nvlist_free(nvl);
4452 		return (0);
4453 	}
4454 
4455 failed:
4456 	vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
4457 	    "sysevent posting failed: %s", vrrp_state2str(prev_st),
4458 	    vrrp_state2str(st), strerror(errno));
4459 
4460 	if (nvl != NULL)
4461 		nvlist_free(nvl);
4462 	return (-1);
4463 }
4464 
4465 /*
4466  * timeval processing functions
4467  */
4468 static int
4469 timeval_to_milli(struct timeval tv)
4470 {
4471 	return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
4472 }
4473 
4474 static struct timeval
4475 timeval_delta(struct timeval t1, struct timeval t2)
4476 {
4477 	struct timeval t;
4478 	t.tv_sec = t1.tv_sec - t2.tv_sec;
4479 	t.tv_usec = t1.tv_usec - t2.tv_usec;
4480 
4481 	if (t.tv_usec < 0) {
4482 		t.tv_usec += 1000000;
4483 		t.tv_sec--;
4484 	}
4485 	return (t);
4486 }
4487 
4488 /*
4489  * print error messages to the terminal or to syslog
4490  */
4491 static void
4492 vrrp_log(int level, char *message, ...)
4493 {
4494 	va_list ap;
4495 	int log_level = -1;
4496 
4497 	va_start(ap, message);
4498 
4499 	if (vrrp_logflag == 0) {
4500 		if (level <= vrrp_debug_level) {
4501 			/*
4502 			 * VRRP_ERR goes to stderr, others go to stdout
4503 			 */
4504 			FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
4505 			(void) fprintf(out, "vrrpd: ");
4506 			/* LINTED: E_SEC_PRINTF_VAR_FMT */
4507 			(void) vfprintf(out, message, ap);
4508 			(void) fprintf(out, "\n");
4509 			(void) fflush(out);
4510 		}
4511 		va_end(ap);
4512 		return;
4513 	}
4514 
4515 	/*
4516 	 * translate VRRP_* to LOG_*
4517 	 */
4518 	switch (level) {
4519 	case VRRP_ERR:
4520 		log_level = LOG_ERR;
4521 		break;
4522 	case VRRP_WARNING:
4523 		log_level = LOG_WARNING;
4524 		break;
4525 	case VRRP_NOTICE:
4526 		log_level = LOG_NOTICE;
4527 		break;
4528 	case VRRP_DBG0:
4529 		log_level = LOG_INFO;
4530 		break;
4531 	default:
4532 		log_level = LOG_DEBUG;
4533 		break;
4534 	}
4535 
4536 	/* LINTED: E_SEC_PRINTF_VAR_FMT */
4537 	(void) vsyslog(log_level, message, ap);
4538 	va_end(ap);
4539 }
4540