xref: /titanic_50/usr/src/cmd/cmd-inet/usr.lib/vrrpd/vrrpd.c (revision aab83bb83be7342f6cfccaed8d5fe0b2f404855d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28  */
29 
30 #include <sys/types.h>
31 #include <sys/socket.h>
32 #include <sys/sockio.h>
33 #include <sys/sysevent/vrrp.h>
34 #include <sys/sysevent/eventdefs.h>
35 #include <sys/varargs.h>
36 #include <auth_attr.h>
37 #include <ctype.h>
38 #include <fcntl.h>
39 #include <stdlib.h>
40 #include <strings.h>
41 #include <errno.h>
42 #include <unistd.h>
43 #include <zone.h>
44 #include <libsysevent.h>
45 #include <limits.h>
46 #include <locale.h>
47 #include <arpa/inet.h>
48 #include <signal.h>
49 #include <assert.h>
50 #include <ucred.h>
51 #include <bsm/adt.h>
52 #include <bsm/adt_event.h>
53 #include <priv_utils.h>
54 #include <libdllink.h>
55 #include <libdlvnic.h>
56 #include <libipadm.h>
57 #include <pwd.h>
58 #include <libvrrpadm.h>
59 #include <net/route.h>
60 #include "vrrpd_impl.h"
61 
62 /*
63  * A VRRP router can be only start participating the VRRP protocol of a virtual
64  * router when all the following conditions are met:
65  *
66  * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE)
67  * - The RX socket is successfully created over the physical interface to
68  *   receive the VRRP multicast advertisement. Note that one RX socket can
69  *   be shared by several VRRP routers configured over the same physical
70  *   interface. (See vrrpd_init_rxsock())
71  * - The TX socket is successfully created over the VNIC interface to send
72  *   the VRRP advertisment. (See vrrpd_init_txsock())
73  * - The primary IP address has been successfully selected over the physical
74  *   interface. (See vrrpd_select_primary())
75  *
76  * If a VRRP router is enabled but the other conditions haven't be satisfied,
77  * the router will be stay at the VRRP_STATE_INIT state. If all the above
78  * conditions are met, the VRRP router will be transit to either
79  * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP
80  * protocol.
81  */
82 
83 #define	skip_whitespace(p)	while (isspace(*(p))) ++(p)
84 
85 #define	BUFFSIZE	65536
86 
87 #define	VRRPCONF	"/etc/inet/vrrp.conf"
88 
89 typedef struct vrrpd_rtsock_s {
90 	int		vrt_af;		/* address family */
91 	int		vrt_fd;		/* socket for the PF_ROUTE msg */
92 	iu_event_id_t	vrt_eid;	/* event ID */
93 } vrrpd_rtsock_t;
94 
95 static ipadm_handle_t	vrrp_ipadm_handle = NULL;	/* libipadm handle */
96 static int		vrrp_logflag = 0;
97 boolean_t		vrrp_debug_level = 0;
98 iu_eh_t			*vrrpd_eh = NULL;
99 iu_tq_t			*vrrpd_timerq = NULL;
100 static vrrp_handle_t	vrrpd_vh = NULL;
101 static int		vrrpd_cmdsock_fd = -1;	/* socket to communicate */
102 						/* between vrrpd/libvrrpadm */
103 static iu_event_id_t	vrrpd_cmdsock_eid = -1;
104 static int		vrrpd_ctlsock_fd = -1;	/* socket to bring up/down */
105 						/* the virtual IP addresses */
106 static int		vrrpd_ctlsock6_fd = -1;
107 static vrrpd_rtsock_t	vrrpd_rtsocks[2] = {
108 	{AF_INET, -1, -1},
109 	{AF_INET6, -1, -1}
110 };
111 static iu_timer_id_t	vrrp_scan_timer_id = -1;
112 
113 TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s);
114 TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s);
115 static struct vrrp_vr_list_s	vrrp_vr_list;
116 static struct vrrp_intf_list_s	vrrp_intf_list;
117 static char		vrrpd_conffile[MAXPATHLEN];
118 
119 /*
120  * Multicast address of VRRP advertisement in network byte order
121  */
122 static vrrp_addr_t	vrrp_muladdr4;
123 static vrrp_addr_t	vrrp_muladdr6;
124 
125 static int		vrrpd_scan_interval = 20000;	/* ms */
126 static int		pfds[2];
127 
128 /*
129  * macros to calculate skew_time and master_down_timer
130  *
131  * Note that the input is in centisecs and output are in msecs
132  */
133 #define	SKEW_TIME(pri, intv)	((intv) * (256 - (pri)) / 256)
134 #define	MASTER_DOWN_INTERVAL(pri, intv)	(3 * (intv) + SKEW_TIME((pri), (intv)))
135 
136 #define	SKEW_TIME_VR(vr)	\
137 	SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
138 #define	MASTER_DOWN_INTERVAL_VR(vr)	\
139 	MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int)
140 
141 #define	VRRP_CONF_UPDATE	0x01
142 #define	VRRP_CONF_DELETE	0x02
143 
144 static char *af_str(int);
145 
146 static iu_tq_callback_t vrrp_adv_timeout;
147 static iu_tq_callback_t vrrp_b2m_timeout;
148 static iu_eh_callback_t vrrpd_sock_handler;
149 static iu_eh_callback_t vrrpd_rtsock_handler;
150 static iu_eh_callback_t vrrpd_cmdsock_handler;
151 
152 static int daemon_init();
153 
154 static vrrp_err_t vrrpd_init();
155 static void vrrpd_fini();
156 static vrrp_err_t vrrpd_cmdsock_create();
157 static void vrrpd_cmdsock_destroy();
158 static vrrp_err_t vrrpd_rtsock_create();
159 static void vrrpd_rtsock_destroy();
160 static vrrp_err_t vrrpd_ctlsock_create();
161 static void vrrpd_ctlsock_destroy();
162 
163 static void vrrpd_scan_timer(iu_tq_t *, void *);
164 static void vrrpd_scan(int);
165 static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *);
166 static void vrrpd_fini_rxsock(vrrp_vr_t *);
167 static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *);
168 static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *);
169 static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *);
170 static void vrrpd_fini_txsock(vrrp_vr_t *);
171 
172 static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *);
173 static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *);
174 static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t);
175 static void vrrpd_delete_vr(vrrp_vr_t *);
176 
177 static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t);
178 static vrrp_err_t vrrpd_delete(const char *);
179 static vrrp_err_t vrrpd_enable(const char *, boolean_t);
180 static vrrp_err_t vrrpd_disable(const char *);
181 static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t);
182 static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *);
183 static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *);
184 
185 static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *);
186 static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *);
187 static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *);
188 static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *);
189 static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *);
190 static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *);
191 static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *);
192 static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *);
193 static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *);
194 static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t);
195 static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t);
196 static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t);
197 static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t);
198 static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t);
199 static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t);
200 static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t);
201 static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t);
202 static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t);
203 
204 static void vrrpd_cmd_create(void *, void *, size_t *);
205 static void vrrpd_cmd_delete(void *, void *, size_t *);
206 static void vrrpd_cmd_enable(void *, void *, size_t *);
207 static void vrrpd_cmd_disable(void *, void *, size_t *);
208 static void vrrpd_cmd_modify(void *, void *, size_t *);
209 static void vrrpd_cmd_list(void *, void *, size_t *);
210 static void vrrpd_cmd_query(void *, void *, size_t *);
211 
212 static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int);
213 static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *);
214 static vrrp_intf_t *vrrpd_lookup_if(const char *, int);
215 static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **);
216 static void vrrpd_delete_if(vrrp_intf_t *, boolean_t);
217 static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *,
218     uint64_t flags);
219 static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *);
220 
221 static void vrrpd_init_ipcache(int);
222 static void vrrpd_update_ipcache(int);
223 static ipadm_status_t vrrpd_walk_addr_info(int);
224 static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *,
225     int, uint64_t);
226 static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *);
227 static void vrrpd_reselect_primary(vrrp_intf_t *);
228 static void vrrpd_reenable_all_vr();
229 static void vrrpd_remove_if(vrrp_intf_t *, boolean_t);
230 
231 static uint16_t in_cksum(int, uint16_t, void *);
232 static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *,
233     uint16_t, vrrp_pkt_t *);
234 static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *,
235     uint16_t, vrrp_pkt_t *);
236 static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t);
237 
238 static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *);
239 static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t);
240 
241 /* state transition functions */
242 static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *);
243 static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *);
244 static void vrrpd_state_m2i(vrrp_vr_t *);
245 static void vrrpd_state_b2i(vrrp_vr_t *);
246 static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *);
247 static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *);
248 static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *);
249 
250 static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t);
251 static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t);
252 static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *,
253     boolean_t);
254 static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t);
255 
256 static void vrrpd_initconf();
257 static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t);
258 static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *);
259 static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *);
260 static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *);
261 static void vrrpd_cleanup();
262 
263 static void vrrp_log(int, char *, ...);
264 static int timeval_to_milli(struct timeval);
265 static struct timeval timeval_delta(struct timeval, struct timeval);
266 
267 typedef struct vrrpd_prop_s {
268 	char		*vs_propname;
269 	boolean_t	(*vs_propread)(vrrp_vr_conf_t *, const char *);
270 	int		(*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t);
271 } vrrp_prop_t;
272 
273 /*
274  * persistent VRRP properties array
275  */
276 static vrrp_prop_t vrrp_prop_info_tbl[] = {
277 	{"name", vrrp_rd_prop_name, vrrp_wt_prop_name},
278 	{"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid},
279 	{"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri},
280 	{"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int},
281 	{"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt},
282 	{"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept},
283 	{"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname},
284 	{"af", vrrp_rd_prop_af, vrrp_wt_prop_af},
285 	{"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled}
286 };
287 
288 #define	VRRP_PROP_INFO_TABSIZE	\
289 	(sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t))
290 
291 typedef void vrrp_cmd_func_t(void *, void *, size_t *);
292 
293 typedef struct vrrp_cmd_info_s {
294 	vrrp_cmd_type_t	vi_cmd;
295 	size_t		vi_reqsize;
296 	size_t		vi_acksize;	/* 0 if the size is variable */
297 	boolean_t	vi_setop;	/* Set operation? Check credentials */
298 	vrrp_cmd_func_t	*vi_cmdfunc;
299 } vrrp_cmd_info_t;
300 
301 static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = {
302 	{VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t),
303 	    sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create},
304 	{VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t),
305 	    sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete},
306 	{VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t),
307 	    sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable},
308 	{VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t),
309 	    sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable},
310 	{VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t),
311 	    sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify},
312 	{VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0,
313 	    _B_FALSE, vrrpd_cmd_query},
314 	{VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0,
315 	    _B_FALSE, vrrpd_cmd_list}
316 };
317 
318 #define	VRRP_DOOR_INFO_TABLE_SIZE	\
319 	(sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t))
320 
321 static int
ipaddr_cmp(int af,vrrp_addr_t * addr1,vrrp_addr_t * addr2)322 ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2)
323 {
324 	if (af == AF_INET) {
325 		return (memcmp(&addr1->in4.sin_addr,
326 		    &addr2->in4.sin_addr, sizeof (struct in_addr)));
327 	} else {
328 		return (memcmp(&addr1->in6.sin6_addr,
329 		    &addr2->in6.sin6_addr, sizeof (struct in6_addr)));
330 	}
331 }
332 
333 static vrrp_vr_t *
vrrpd_lookup_vr_by_vrid(char * ifname,vrid_t vrid,int af)334 vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af)
335 {
336 	vrrp_vr_t *vr;
337 
338 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
339 		if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 &&
340 		    vr->vvr_conf.vvc_vrid == vrid &&
341 		    vr->vvr_conf.vvc_af == af) {
342 			break;
343 		}
344 	}
345 	return (vr);
346 }
347 
348 static vrrp_vr_t *
vrrpd_lookup_vr_by_name(const char * name)349 vrrpd_lookup_vr_by_name(const char *name)
350 {
351 	vrrp_vr_t *vr;
352 
353 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
354 		if (strcmp(vr->vvr_conf.vvc_name, name) == 0)
355 			break;
356 	}
357 	return (vr);
358 }
359 
360 static vrrp_intf_t *
vrrpd_lookup_if(const char * ifname,int af)361 vrrpd_lookup_if(const char *ifname, int af)
362 {
363 	vrrp_intf_t	*intf;
364 
365 	TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) {
366 		if (strcmp(ifname, intf->vvi_ifname) == 0 &&
367 		    af == intf->vvi_af) {
368 			break;
369 		}
370 	}
371 	return (intf);
372 }
373 
374 static vrrp_err_t
vrrpd_create_if(const char * ifname,int af,uint32_t ifindex,vrrp_intf_t ** intfp)375 vrrpd_create_if(const char *ifname, int af, uint32_t ifindex,
376     vrrp_intf_t **intfp)
377 {
378 	vrrp_intf_t	*intf;
379 
380 	vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)",
381 	    ifname, af_str(af), ifindex);
382 
383 	if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) {
384 		vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to "
385 		    "allocate %s/%s interface", ifname, af_str(af));
386 		return (VRRP_ENOMEM);
387 	}
388 
389 	intf = *intfp;
390 	TAILQ_INIT(&intf->vvi_iplist);
391 	(void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname));
392 	intf->vvi_af = af;
393 	intf->vvi_sockfd = -1;
394 	intf->vvi_nvr = 0;
395 	intf->vvi_eid = -1;
396 	intf->vvi_pip = NULL;
397 	intf->vvi_ifindex = ifindex;
398 	intf->vvi_state = NODE_STATE_NEW;
399 	intf->vvi_vr_state = VRRP_STATE_INIT;
400 	TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next);
401 	return (VRRP_SUCCESS);
402 }
403 
404 /*
405  * An interface is deleted. If update_vr is true, the deletion of the interface
406  * may cause the state transition of assoicated VRRP router (if this interface
407  * is either the primary or the VNIC interface of the VRRP router); otherwise,
408  * simply delete the interface without updating the VRRP router.
409  */
410 static void
vrrpd_delete_if(vrrp_intf_t * intf,boolean_t update_vr)411 vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr)
412 {
413 	vrrp_ip_t	*ip;
414 
415 	vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)",
416 	    intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_");
417 
418 	if (update_vr) {
419 		/*
420 		 * If a this interface is the physical interface or the VNIC
421 		 * of a VRRP router, the deletion of the interface (no IP
422 		 * address exists on this interface) may cause the state
423 		 * transition of the VRRP router. call vrrpd_remove_if()
424 		 * to find all corresponding VRRP router and update their
425 		 * states.
426 		 */
427 		vrrpd_remove_if(intf, _B_FALSE);
428 	}
429 
430 	/*
431 	 * First remove and delete all the IP addresses on the interface
432 	 */
433 	while (!TAILQ_EMPTY(&intf->vvi_iplist)) {
434 		ip = TAILQ_FIRST(&intf->vvi_iplist);
435 		vrrpd_delete_ip(intf, ip);
436 	}
437 
438 	/*
439 	 * Then remove and delete the interface
440 	 */
441 	TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next);
442 	(void) free(intf);
443 }
444 
445 static vrrp_err_t
vrrpd_create_ip(vrrp_intf_t * intf,const char * lifname,vrrp_addr_t * addr,uint64_t flags)446 vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr,
447     uint64_t flags)
448 {
449 	vrrp_ip_t	*ip;
450 	char		abuf[INET6_ADDRSTRLEN];
451 
452 	/* LINTED E_CONSTANT_CONDITION */
453 	VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
454 	vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)",
455 	    intf->vvi_ifname, lifname, abuf, flags);
456 
457 	if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) {
458 		vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):"
459 		    "failed to allocate IP", lifname, abuf);
460 		return (VRRP_ENOMEM);
461 	}
462 
463 	(void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname));
464 	ip->vip_state = NODE_STATE_NEW;
465 	ip->vip_flags = flags;
466 	(void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr));
467 
468 	/*
469 	 * Make sure link-local IPv6 IP addresses are at the head of the list
470 	 */
471 	if (intf->vvi_af == AF_INET6 &&
472 	    IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) {
473 		TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next);
474 	} else {
475 		TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next);
476 	}
477 	return (VRRP_SUCCESS);
478 }
479 
480 static void
vrrpd_delete_ip(vrrp_intf_t * intf,vrrp_ip_t * ip)481 vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip)
482 {
483 	char	abuf[INET6_ADDRSTRLEN];
484 	int	af = intf->vvi_af;
485 
486 	/* LINTED E_CONSTANT_CONDITION */
487 	VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE);
488 	vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary",
489 	    intf->vvi_ifname, ip->vip_lifname, abuf,
490 	    intf->vvi_pip == ip ? "" : "not ");
491 
492 	if (intf->vvi_pip == ip)
493 		intf->vvi_pip = NULL;
494 
495 	TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next);
496 	(void) free(ip);
497 }
498 
499 static char *
rtm_event2str(uchar_t event)500 rtm_event2str(uchar_t event)
501 {
502 	switch (event) {
503 	case RTM_NEWADDR:
504 		return ("RTM_NEWADDR");
505 	case RTM_DELADDR:
506 		return ("RTM_DELADDR");
507 	case RTM_IFINFO:
508 		return ("RTM_IFINFO");
509 	case RTM_ADD:
510 		return ("RTM_ADD");
511 	case RTM_DELETE:
512 		return ("RTM_DELETE");
513 	case RTM_CHANGE:
514 		return ("RTM_CHANGE");
515 	case RTM_OLDADD:
516 		return ("RTM_OLDADD");
517 	case RTM_OLDDEL:
518 		return ("RTM_OLDDEL");
519 	case RTM_CHGADDR:
520 		return ("RTM_CHGADDR");
521 	case RTM_FREEADDR:
522 		return ("RTM_FREEADDR");
523 	default:
524 		return ("RTM_OTHER");
525 	}
526 }
527 
528 /*
529  * This is called by the child process to inform the parent process to
530  * exit with the given return value. Note that the child process
531  * (the daemon process) informs the parent process to exit when anything
532  * goes wrong or when all the intialization is done.
533  */
534 static int
vrrpd_inform_parent_exit(int rv)535 vrrpd_inform_parent_exit(int rv)
536 {
537 	int err = 0;
538 
539 	/*
540 	 * If vrrp_debug_level is none-zero, vrrpd is not running as
541 	 * a daemon. Return directly.
542 	 */
543 	if (vrrp_debug_level != 0)
544 		return (0);
545 
546 	if (write(pfds[1], &rv, sizeof (int)) != sizeof (int)) {
547 		err = errno;
548 		(void) close(pfds[1]);
549 		return (err);
550 	}
551 	(void) close(pfds[1]);
552 	return (0);
553 }
554 
555 int
main(int argc,char * argv[])556 main(int argc, char *argv[])
557 {
558 	int c, err;
559 	struct sigaction sa;
560 	sigset_t mask;
561 	struct rlimit rl;
562 
563 	(void) setlocale(LC_ALL, "");
564 	(void) textdomain(TEXT_DOMAIN);
565 
566 	/*
567 	 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS
568 	 * and PRIV_NET_ICMPACCESS to open  the raw socket, PRIV_SYS_IP_CONFIG
569 	 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to
570 	 * setrlimit().
571 	 *
572 	 * Note that sysevent is not supported in non-global zones.
573 	 */
574 	if (getzoneid() == GLOBAL_ZONEID) {
575 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
576 		    PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
577 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
578 	} else {
579 		err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0,
580 		    PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS,
581 		    PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL);
582 	}
583 
584 	if (err == -1) {
585 		vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed");
586 		return (EXIT_FAILURE);
587 	}
588 
589 	/*
590 	 * If vrrpd is started by other process, it will inherit the
591 	 * signal block mask. We unblock all signals to make sure the
592 	 * signal handling will work normally.
593 	 */
594 	(void) sigfillset(&mask);
595 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
596 	sa.sa_handler = vrrpd_cleanup;
597 	sa.sa_flags = 0;
598 	(void) sigemptyset(&sa.sa_mask);
599 	(void) sigaction(SIGINT, &sa, NULL);
600 	(void) sigaction(SIGQUIT, &sa, NULL);
601 	(void) sigaction(SIGTERM, &sa, NULL);
602 
603 	vrrp_debug_level = 0;
604 	(void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile));
605 	while ((c = getopt(argc, argv, "d:f:")) != EOF) {
606 		switch (c) {
607 		case 'd':
608 			vrrp_debug_level = atoi(optarg);
609 			break;
610 		case 'f':
611 			(void) strlcpy(vrrpd_conffile, optarg,
612 			    sizeof (vrrpd_conffile));
613 			break;
614 		default:
615 			break;
616 		}
617 	}
618 
619 	closefrom(3);
620 	if (vrrp_debug_level == 0 && (daemon_init() != 0)) {
621 		vrrp_log(VRRP_ERR, "main(): daemon_init() failed");
622 		return (EXIT_FAILURE);
623 	}
624 
625 	rl.rlim_cur = RLIM_INFINITY;
626 	rl.rlim_max = RLIM_INFINITY;
627 	if (setrlimit(RLIMIT_NOFILE, &rl) == -1) {
628 		vrrp_log(VRRP_ERR, "main(): setrlimit() failed");
629 		goto child_out;
630 	}
631 
632 	if (vrrpd_init() != VRRP_SUCCESS) {
633 		vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed");
634 		goto child_out;
635 	}
636 
637 	/*
638 	 * Get rid of unneeded privileges.
639 	 */
640 	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
641 	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL);
642 
643 	/*
644 	 * Read the configuration and initialize the existing VRRP
645 	 * configuration
646 	 */
647 	vrrpd_initconf();
648 
649 	/*
650 	 * Inform the parent process that it can successfully exit.
651 	 */
652 	if ((err = vrrpd_inform_parent_exit(EXIT_SUCCESS)) != 0) {
653 		vrrpd_cleanup();
654 		vrrp_log(VRRP_WARNING, "vrrpd_inform_parent_exit() failed: %s",
655 		    strerror(err));
656 		return (EXIT_FAILURE);
657 	}
658 
659 	/*
660 	 * Start the loop to handle the timer and the IO events.
661 	 */
662 	switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) {
663 	case -1:
664 		vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed "
665 		    "abnormally");
666 		break;
667 	default:
668 		break;
669 	}
670 
671 	vrrpd_cleanup();
672 	return (EXIT_SUCCESS);
673 
674 child_out:
675 	(void) vrrpd_inform_parent_exit(EXIT_FAILURE);
676 	return (EXIT_FAILURE);
677 }
678 
679 static int
daemon_init()680 daemon_init()
681 {
682 	pid_t	pid;
683 	int	rv;
684 
685 	vrrp_log(VRRP_DBG0, "daemon_init()");
686 
687 	if (getenv("SMF_FMRI") == NULL) {
688 		vrrp_log(VRRP_ERR, "daemon_init(): vrrpd is an smf(5) managed "
689 		    "service and should not be run from the command line.");
690 		return (-1);
691 	}
692 
693 	/*
694 	 * Create the pipe used for the child process to inform the parent
695 	 * process to exit after all initialization is done.
696 	 */
697 	if (pipe(pfds) < 0) {
698 		vrrp_log(VRRP_ERR, "daemon_init(): pipe() failed: %s",
699 		    strerror(errno));
700 		return (-1);
701 	}
702 
703 	if ((pid = fork()) < 0) {
704 		vrrp_log(VRRP_ERR, "daemon_init(): fork() failed: %s",
705 		    strerror(errno));
706 		(void) close(pfds[0]);
707 		(void) close(pfds[1]);
708 		return (-1);
709 	}
710 
711 	if (pid != 0) { /* Parent */
712 		(void) close(pfds[1]);
713 
714 		/*
715 		 * Read the child process's return value from the pfds.
716 		 * If the child process exits unexpectedly, read() returns -1.
717 		 */
718 		if (read(pfds[0], &rv, sizeof (int)) != sizeof (int)) {
719 			vrrp_log(VRRP_ERR, "daemon_init(): child process "
720 			    "exited unexpectedly %s", strerror(errno));
721 			(void) kill(pid, SIGTERM);
722 			rv = EXIT_FAILURE;
723 		}
724 		(void) close(pfds[0]);
725 		exit(rv);
726 	}
727 
728 	/*
729 	 * in child process, became a daemon, and return to main() to continue.
730 	 */
731 	(void) close(pfds[0]);
732 	(void) chdir("/");
733 	(void) setsid();
734 	(void) close(0);
735 	(void) close(1);
736 	(void) close(2);
737 	(void) open("/dev/null", O_RDWR, 0);
738 	(void) dup2(0, 1);
739 	(void) dup2(0, 2);
740 	openlog("vrrpd", LOG_PID, LOG_DAEMON);
741 	vrrp_logflag = 1;
742 	return (0);
743 }
744 
745 static vrrp_err_t
vrrpd_init()746 vrrpd_init()
747 {
748 	vrrp_err_t	err = VRRP_ESYS;
749 
750 	vrrp_log(VRRP_DBG0, "vrrpd_init()");
751 
752 	TAILQ_INIT(&vrrp_vr_list);
753 	TAILQ_INIT(&vrrp_intf_list);
754 
755 	if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) {
756 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed");
757 		goto fail;
758 	}
759 
760 	if ((vrrpd_timerq = iu_tq_create()) == NULL) {
761 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed");
762 		goto fail;
763 	}
764 
765 	if ((vrrpd_eh = iu_eh_create()) == NULL) {
766 		vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed");
767 		goto fail;
768 	}
769 
770 	/*
771 	 * Create the AF_UNIX socket used to communicate with libvrrpadm.
772 	 *
773 	 * This socket is used to receive the administrative requests and
774 	 * send back the results.
775 	 */
776 	if (vrrpd_cmdsock_create() != VRRP_SUCCESS) {
777 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() "
778 		    "failed");
779 		goto fail;
780 	}
781 
782 	/*
783 	 * Create the VRRP control socket used to bring up/down the virtual
784 	 * IP addresses. It is also used to set the IFF_NOACCEPT flag of
785 	 * the virtual IP addresses.
786 	 */
787 	if (vrrpd_ctlsock_create() != VRRP_SUCCESS) {
788 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() "
789 		    "failed");
790 		goto fail;
791 	}
792 
793 	/*
794 	 * Create the PF_ROUTER socket used to listen to the routing socket
795 	 * messages and build the interface/IP address list.
796 	 */
797 	if (vrrpd_rtsock_create() != VRRP_SUCCESS) {
798 		vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() "
799 		    "failed");
800 		goto fail;
801 	}
802 
803 	/* Open the libipadm handle */
804 	if (ipadm_open(&vrrp_ipadm_handle, 0) != IPADM_SUCCESS) {
805 		vrrp_log(VRRP_ERR, "vrrpd_init(): ipadm_open() failed");
806 		goto fail;
807 	}
808 
809 	/*
810 	 * Build the list of interfaces and IP addresses. Also, start the time
811 	 * to scan the interfaces/IP addresses periodically.
812 	 */
813 	vrrpd_scan(AF_INET);
814 	vrrpd_scan(AF_INET6);
815 	if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
816 	    vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) {
817 		vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed");
818 		goto fail;
819 	}
820 
821 	/*
822 	 * Initialize the VRRP multicast address.
823 	 */
824 	bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t));
825 	vrrp_muladdr4.in4.sin_family = AF_INET;
826 	(void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr);
827 
828 	bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t));
829 	vrrp_muladdr6.in6.sin6_family = AF_INET6;
830 	(void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr);
831 
832 	return (VRRP_SUCCESS);
833 
834 fail:
835 	vrrpd_fini();
836 	return (err);
837 }
838 
839 static void
vrrpd_fini()840 vrrpd_fini()
841 {
842 	vrrp_log(VRRP_DBG0, "vrrpd_fini()");
843 
844 	(void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL);
845 	vrrp_scan_timer_id = -1;
846 
847 	vrrpd_rtsock_destroy();
848 	vrrpd_ctlsock_destroy();
849 	vrrpd_cmdsock_destroy();
850 
851 	if (vrrpd_eh != NULL) {
852 		iu_eh_destroy(vrrpd_eh);
853 		vrrpd_eh = NULL;
854 	}
855 
856 	if (vrrpd_timerq != NULL) {
857 		iu_tq_destroy(vrrpd_timerq);
858 		vrrpd_timerq = NULL;
859 	}
860 
861 	vrrp_close(vrrpd_vh);
862 	vrrpd_vh = NULL;
863 	assert(TAILQ_EMPTY(&vrrp_vr_list));
864 	assert(TAILQ_EMPTY(&vrrp_intf_list));
865 
866 	ipadm_close(vrrp_ipadm_handle);
867 }
868 
869 static void
vrrpd_cleanup(void)870 vrrpd_cleanup(void)
871 {
872 	vrrp_vr_t	*vr;
873 	vrrp_intf_t	*intf;
874 
875 	vrrp_log(VRRP_DBG0, "vrrpd_cleanup()");
876 
877 	while (!TAILQ_EMPTY(&vrrp_vr_list)) {
878 		vr = TAILQ_FIRST(&vrrp_vr_list);
879 		vrrpd_delete_vr(vr);
880 	}
881 
882 	while (!TAILQ_EMPTY(&vrrp_intf_list)) {
883 		intf = TAILQ_FIRST(&vrrp_intf_list);
884 		vrrpd_delete_if(intf, _B_FALSE);
885 	}
886 
887 	vrrpd_fini();
888 	closelog();
889 	exit(1);
890 }
891 
892 /*
893  * Read the configuration file and initialize all the existing VRRP routers.
894  */
895 static void
vrrpd_initconf()896 vrrpd_initconf()
897 {
898 	FILE *fp;
899 	char line[LINE_MAX];
900 	int linenum = 0;
901 	vrrp_vr_conf_t conf;
902 	vrrp_err_t err;
903 
904 	vrrp_log(VRRP_DBG0, "vrrpd_initconf()");
905 
906 	if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) {
907 		vrrp_log(VRRP_ERR, "failed to open the configuration file %s",
908 		    vrrpd_conffile);
909 		return;
910 	}
911 
912 	while (fgets(line, sizeof (line), fp) != NULL) {
913 		linenum++;
914 		conf.vvc_vrid = VRRP_VRID_NONE;
915 		if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) {
916 			vrrp_log(VRRP_ERR, "failed to parse %d line %s",
917 			    linenum, line);
918 			continue;
919 		}
920 
921 		/*
922 		 * Blank or comment line
923 		 */
924 		if (conf.vvc_vrid == VRRP_VRID_NONE)
925 			continue;
926 
927 		/*
928 		 * No need to update the configuration since the VRRP router
929 		 * created/enabled based on the existing configuration.
930 		 */
931 		if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) {
932 			vrrp_log(VRRP_ERR, "VRRP router %s creation failed: "
933 			    "%s", conf.vvc_name, vrrp_err2str(err));
934 			continue;
935 		}
936 
937 		if (conf.vvc_enabled &&
938 		    ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) !=
939 		    VRRP_SUCCESS)) {
940 			vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s",
941 			    conf.vvc_name, vrrp_err2str(err));
942 		}
943 	}
944 
945 	(void) fclose(fp);
946 }
947 
948 /*
949  * Create the AF_UNIX socket used to communicate with libvrrpadm.
950  *
951  * This socket is used to receive the administrative request and
952  * send back the results.
953  */
954 static vrrp_err_t
vrrpd_cmdsock_create()955 vrrpd_cmdsock_create()
956 {
957 	iu_event_id_t		eid;
958 	struct sockaddr_un	laddr;
959 	int			sock, flags;
960 
961 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()");
962 
963 	if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
964 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) "
965 		    "failed: %s", strerror(errno));
966 		return (VRRP_ESYS);
967 	}
968 
969 	/*
970 	 * Set it to be non-blocking.
971 	 */
972 	flags = fcntl(sock, F_GETFL, 0);
973 	(void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK));
974 
975 	/*
976 	 * Unlink first in case a previous daemon instance exited ungracefully.
977 	 */
978 	(void) unlink(VRRPD_SOCKET);
979 
980 	bzero(&laddr, sizeof (laddr));
981 	laddr.sun_family = AF_UNIX;
982 	(void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path));
983 	if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) {
984 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s",
985 		    strerror(errno));
986 		(void) close(sock);
987 		return (VRRP_ESYS);
988 	}
989 
990 	if (listen(sock, 30) < 0) {
991 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() "
992 		    "failed: %s", strerror(errno));
993 		(void) close(sock);
994 		return (VRRP_ESYS);
995 	}
996 
997 	if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
998 	    vrrpd_cmdsock_handler, NULL)) == -1) {
999 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()"
1000 		    " failed");
1001 		(void) close(sock);
1002 		return (VRRP_ESYS);
1003 	}
1004 
1005 	vrrpd_cmdsock_fd = sock;
1006 	vrrpd_cmdsock_eid = eid;
1007 	return (VRRP_SUCCESS);
1008 }
1009 
1010 static void
vrrpd_cmdsock_destroy()1011 vrrpd_cmdsock_destroy()
1012 {
1013 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()");
1014 
1015 	(void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL);
1016 	(void) close(vrrpd_cmdsock_fd);
1017 	vrrpd_cmdsock_fd = -1;
1018 	vrrpd_cmdsock_eid = -1;
1019 }
1020 
1021 /*
1022  * Create the PF_ROUTER sockets used to listen to the routing socket
1023  * messages and build the interface/IP address list. Create one for
1024  * each address family (IPv4 and IPv6).
1025  */
1026 static vrrp_err_t
vrrpd_rtsock_create()1027 vrrpd_rtsock_create()
1028 {
1029 	int		i, flags, sock;
1030 	iu_event_id_t	eid;
1031 
1032 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()");
1033 
1034 	for (i = 0; i < 2; i++) {
1035 		sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af);
1036 		if (sock == -1) {
1037 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() "
1038 			    "failed: %s", strerror(errno));
1039 			break;
1040 		}
1041 
1042 		/*
1043 		 * Set it to be non-blocking.
1044 		 */
1045 		if ((flags = fcntl(sock, F_GETFL, 0)) < 0) {
1046 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1047 			    "fcntl(F_GETFL) failed: %s", strerror(errno));
1048 			break;
1049 		}
1050 
1051 		if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) {
1052 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): "
1053 			    "fcntl(F_SETFL) failed: %s", strerror(errno));
1054 			break;
1055 		}
1056 
1057 		if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN,
1058 		    vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) {
1059 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register "
1060 			    "rtsock %d(%s) failed", sock,
1061 			    af_str(vrrpd_rtsocks[i].vrt_af));
1062 			break;
1063 		}
1064 
1065 		vrrpd_rtsocks[i].vrt_fd = sock;
1066 		vrrpd_rtsocks[i].vrt_eid = eid;
1067 	}
1068 
1069 	if (i != 2) {
1070 		(void) close(sock);
1071 		vrrpd_rtsock_destroy();
1072 		return (VRRP_ESYS);
1073 	}
1074 
1075 	return (VRRP_SUCCESS);
1076 }
1077 
1078 static void
vrrpd_rtsock_destroy()1079 vrrpd_rtsock_destroy()
1080 {
1081 	int		i;
1082 
1083 	vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()");
1084 	for (i = 0; i < 2; i++) {
1085 		(void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid,
1086 		    NULL);
1087 		(void) close(vrrpd_rtsocks[i].vrt_fd);
1088 		vrrpd_rtsocks[i].vrt_eid = -1;
1089 		vrrpd_rtsocks[i].vrt_fd = -1;
1090 	}
1091 }
1092 
1093 /*
1094  * Create the VRRP control socket used to bring up/down the virtual
1095  * IP addresses. It is also used to set the IFF_NOACCEPT flag of
1096  * the virtual IP addresses.
1097  */
1098 static vrrp_err_t
vrrpd_ctlsock_create()1099 vrrpd_ctlsock_create()
1100 {
1101 	int	s, s6;
1102 	int	on = _B_TRUE;
1103 
1104 	if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
1105 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) "
1106 		    "failed: %s", strerror(errno));
1107 		return (VRRP_ESYS);
1108 	}
1109 	if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1110 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1111 		    "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno));
1112 		(void) close(s);
1113 		return (VRRP_ESYS);
1114 	}
1115 
1116 	if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) {
1117 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) "
1118 		    "failed: %s", strerror(errno));
1119 		(void) close(s);
1120 		return (VRRP_ESYS);
1121 	}
1122 	if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) {
1123 		vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): "
1124 		    "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno));
1125 		(void) close(s);
1126 		(void) close(s6);
1127 		return (VRRP_ESYS);
1128 	}
1129 
1130 	vrrpd_ctlsock_fd = s;
1131 	vrrpd_ctlsock6_fd = s6;
1132 	return (VRRP_SUCCESS);
1133 }
1134 
1135 static void
vrrpd_ctlsock_destroy()1136 vrrpd_ctlsock_destroy()
1137 {
1138 	(void) close(vrrpd_ctlsock_fd);
1139 	vrrpd_ctlsock_fd = -1;
1140 	(void) close(vrrpd_ctlsock6_fd);
1141 	vrrpd_ctlsock6_fd = -1;
1142 }
1143 
1144 /*ARGSUSED*/
1145 static void
vrrpd_cmd_create(void * arg1,void * arg2,size_t * arg2_sz)1146 vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz)
1147 {
1148 	vrrp_cmd_create_t	*cmd = (vrrp_cmd_create_t *)arg1;
1149 	vrrp_ret_create_t	*ret = (vrrp_ret_create_t *)arg2;
1150 	vrrp_err_t		err;
1151 
1152 	err = vrrpd_create(&cmd->vcc_conf, _B_TRUE);
1153 	if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) {
1154 		/*
1155 		 * No need to update the configuration since it is already
1156 		 * done in the above vrrpd_create() call
1157 		 */
1158 		err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE);
1159 		if (err != VRRP_SUCCESS)
1160 			(void) vrrpd_delete(cmd->vcc_conf.vvc_name);
1161 	}
1162 	ret->vrc_err = err;
1163 }
1164 
1165 /*ARGSUSED*/
1166 static void
vrrpd_cmd_delete(void * arg1,void * arg2,size_t * arg2_sz)1167 vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz)
1168 {
1169 	vrrp_cmd_delete_t	*cmd = (vrrp_cmd_delete_t *)arg1;
1170 	vrrp_ret_delete_t	*ret = (vrrp_ret_delete_t *)arg2;
1171 
1172 	ret->vrd_err = vrrpd_delete(cmd->vcd_name);
1173 }
1174 
1175 /*ARGSUSED*/
1176 static void
vrrpd_cmd_enable(void * arg1,void * arg2,size_t * arg2_sz)1177 vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz)
1178 {
1179 	vrrp_cmd_enable_t	*cmd = (vrrp_cmd_enable_t *)arg1;
1180 	vrrp_ret_enable_t	*ret = (vrrp_ret_enable_t *)arg2;
1181 
1182 	ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE);
1183 }
1184 
1185 /*ARGSUSED*/
1186 static void
vrrpd_cmd_disable(void * arg1,void * arg2,size_t * arg2_sz)1187 vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz)
1188 {
1189 	vrrp_cmd_disable_t	*cmd = (vrrp_cmd_disable_t *)arg1;
1190 	vrrp_ret_disable_t	*ret = (vrrp_ret_disable_t *)arg2;
1191 
1192 	ret->vrx_err = vrrpd_disable(cmd->vcx_name);
1193 }
1194 
1195 /*ARGSUSED*/
1196 static void
vrrpd_cmd_modify(void * arg1,void * arg2,size_t * arg2_sz)1197 vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz)
1198 {
1199 	vrrp_cmd_modify_t	*cmd = (vrrp_cmd_modify_t *)arg1;
1200 	vrrp_ret_modify_t	*ret = (vrrp_ret_modify_t *)arg2;
1201 
1202 	ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask);
1203 }
1204 
1205 static void
vrrpd_cmd_query(void * arg1,void * arg2,size_t * arg2_sz)1206 vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz)
1207 {
1208 	vrrp_cmd_query_t	*cmd = (vrrp_cmd_query_t *)arg1;
1209 
1210 	vrrpd_query(cmd->vcq_name, arg2, arg2_sz);
1211 }
1212 
1213 static void
vrrpd_cmd_list(void * arg1,void * arg2,size_t * arg2_sz)1214 vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz)
1215 {
1216 	vrrp_cmd_list_t	*cmd = (vrrp_cmd_list_t *)arg1;
1217 
1218 	vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz);
1219 }
1220 
1221 /*
1222  * Write-type requeset must have the solaris.network.vrrp authorization.
1223  */
1224 static boolean_t
vrrp_auth_check(int connfd,vrrp_cmd_info_t * cinfo)1225 vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo)
1226 {
1227 	ucred_t		*cred = NULL;
1228 	uid_t		uid;
1229 	struct passwd	*pw;
1230 	boolean_t	success = _B_FALSE;
1231 
1232 	vrrp_log(VRRP_DBG0, "vrrp_auth_check()");
1233 
1234 	if (!cinfo->vi_setop)
1235 		return (_B_TRUE);
1236 
1237 	/*
1238 	 * Validate the credential
1239 	 */
1240 	if (getpeerucred(connfd, &cred) == (uid_t)-1) {
1241 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() "
1242 		    "failed: %s", strerror(errno));
1243 		return (_B_FALSE);
1244 	}
1245 
1246 	if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) {
1247 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() "
1248 		    "failed: %s", strerror(errno));
1249 		goto done;
1250 	}
1251 
1252 	if ((pw = getpwuid(uid)) == NULL) {
1253 		vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed");
1254 		goto done;
1255 	}
1256 
1257 	success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1);
1258 
1259 done:
1260 	ucred_free(cred);
1261 	return (success);
1262 }
1263 
1264 /*
1265  * Process the administrative request from libvrrpadm
1266  */
1267 /* ARGSUSED */
1268 static void
vrrpd_cmdsock_handler(iu_eh_t * eh,int s,short events,iu_event_id_t id,void * arg)1269 vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
1270     void *arg)
1271 {
1272 	vrrp_cmd_info_t		*cinfo = NULL;
1273 	vrrp_err_t		err = VRRP_SUCCESS;
1274 	uchar_t			buf[BUFFSIZE], ackbuf[BUFFSIZE];
1275 	size_t			cursize, acksize, len;
1276 	uint32_t		cmd;
1277 	int			connfd, i;
1278 	struct sockaddr_in	from;
1279 	socklen_t		fromlen;
1280 
1281 	vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()");
1282 
1283 	fromlen = (socklen_t)sizeof (from);
1284 	if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) {
1285 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s",
1286 		    strerror(errno));
1287 		return;
1288 	}
1289 
1290 	/*
1291 	 * First get the type of the request
1292 	 */
1293 	cursize = 0;
1294 	while (cursize < sizeof (uint32_t)) {
1295 		len = read(connfd, buf + cursize,
1296 		    sizeof (uint32_t) - cursize);
1297 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1298 			continue;
1299 		} else if (len > 0) {
1300 			cursize += len;
1301 			continue;
1302 		}
1303 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1304 		    "length");
1305 		(void) close(connfd);
1306 		return;
1307 	}
1308 
1309 	/* LINTED E_BAD_PTR_CAST_ALIGN */
1310 	cmd = ((vrrp_cmd_t *)buf)->vc_cmd;
1311 	for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) {
1312 		if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) {
1313 			cinfo = vrrp_cmd_info_tbl + i;
1314 			break;
1315 		}
1316 	}
1317 
1318 	if (cinfo == NULL) {
1319 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request "
1320 		    "type %d", cmd);
1321 		err = VRRP_EINVAL;
1322 		goto done;
1323 	}
1324 
1325 	/*
1326 	 * Get the rest of the request.
1327 	 */
1328 	assert(cursize == sizeof (uint32_t));
1329 	while (cursize < cinfo->vi_reqsize) {
1330 		len = read(connfd, buf + cursize,
1331 		    cinfo->vi_reqsize - cursize);
1332 		if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) {
1333 			continue;
1334 		} else if (len > 0) {
1335 			cursize += len;
1336 			continue;
1337 		}
1338 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message "
1339 		    "length");
1340 		err = VRRP_EINVAL;
1341 		goto done;
1342 	}
1343 
1344 	/*
1345 	 * Validate the authorization
1346 	 */
1347 	if (!vrrp_auth_check(connfd, cinfo)) {
1348 		vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): "
1349 		    "not sufficient authorization");
1350 		err = VRRP_EPERM;
1351 	}
1352 
1353 done:
1354 	/*
1355 	 * Ack the request
1356 	 */
1357 	if (err != 0) {
1358 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1359 		((vrrp_ret_t *)ackbuf)->vr_err = err;
1360 		acksize = sizeof (vrrp_ret_t);
1361 	} else {
1362 		/*
1363 		 * If the size of ack is varied, the cmdfunc callback
1364 		 * will set the right size.
1365 		 */
1366 		if ((acksize = cinfo->vi_acksize) == 0)
1367 			acksize = sizeof (ackbuf);
1368 
1369 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1370 		cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize);
1371 	}
1372 
1373 	/*
1374 	 * Send the ack back.
1375 	 */
1376 	cursize = 0;
1377 	while (cursize < acksize) {
1378 		len = sendto(connfd, ackbuf + cursize, acksize - cursize,
1379 		    0, (struct sockaddr *)&from, fromlen);
1380 		if (len == (size_t)-1 && errno == EAGAIN) {
1381 			continue;
1382 		} else if (len > 0) {
1383 			cursize += len;
1384 			continue;
1385 		} else {
1386 			vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to "
1387 			    "ack: %s", strerror(errno));
1388 			break;
1389 		}
1390 	}
1391 
1392 	(void) shutdown(connfd, SHUT_RDWR);
1393 	(void) close(connfd);
1394 }
1395 
1396 /*
1397  * Process the routing socket messages and update the interfaces/IP addresses
1398  * list
1399  */
1400 /* ARGSUSED */
1401 static void
vrrpd_rtsock_handler(iu_eh_t * eh,int s,short events,iu_event_id_t id,void * arg)1402 vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events,
1403     iu_event_id_t id, void *arg)
1404 {
1405 	char			buf[BUFFSIZE];
1406 	struct ifa_msghdr	*ifam;
1407 	int			nbytes;
1408 	int			af = *(int *)arg;
1409 	boolean_t		scanif = _B_FALSE;
1410 
1411 	for (;;) {
1412 		nbytes = read(s, buf, sizeof (buf));
1413 		if (nbytes <= 0) {
1414 			/* No more messages */
1415 			break;
1416 		}
1417 
1418 		/* LINTED E_BAD_PTR_CAST_ALIGN */
1419 		ifam = (struct ifa_msghdr *)buf;
1420 		if (ifam->ifam_version != RTM_VERSION) {
1421 			vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d "
1422 			    "not understood", ifam->ifam_version);
1423 			break;
1424 		}
1425 
1426 		vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event",
1427 		    rtm_event2str(ifam->ifam_type));
1428 
1429 		switch (ifam->ifam_type) {
1430 		case RTM_FREEADDR:
1431 		case RTM_CHGADDR:
1432 		case RTM_NEWADDR:
1433 		case RTM_DELADDR:
1434 			/*
1435 			 * An IP address has been created/updated/deleted or
1436 			 * brought up/down, re-initilialize the interface/IP
1437 			 * address list.
1438 			 */
1439 			scanif = _B_TRUE;
1440 			break;
1441 		default:
1442 			/* Not interesting */
1443 			break;
1444 		}
1445 	}
1446 
1447 	if (scanif)
1448 		vrrpd_scan(af);
1449 }
1450 
1451 /*
1452  * Periodically scan the interface/IP addresses on the system.
1453  */
1454 /* ARGSUSED */
1455 static void
vrrpd_scan_timer(iu_tq_t * tq,void * arg)1456 vrrpd_scan_timer(iu_tq_t *tq, void *arg)
1457 {
1458 	vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()");
1459 	vrrpd_scan(AF_INET);
1460 	vrrpd_scan(AF_INET6);
1461 }
1462 
1463 /*
1464  * Get the list of the interface/IP addresses of the specified address
1465  * family.
1466  */
1467 static void
vrrpd_scan(int af)1468 vrrpd_scan(int af)
1469 {
1470 	vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af));
1471 
1472 again:
1473 	vrrpd_init_ipcache(af);
1474 
1475 	/* If interface index changes, walk again. */
1476 	if (vrrpd_walk_addr_info(af) != IPADM_SUCCESS)
1477 		goto again;
1478 
1479 	vrrpd_update_ipcache(af);
1480 }
1481 
1482 /*
1483  * First mark all IP addresses of the specific address family to be removed.
1484  * This flag will then be cleared when we walk up all the IP addresses.
1485  */
1486 static void
vrrpd_init_ipcache(int af)1487 vrrpd_init_ipcache(int af)
1488 {
1489 	vrrp_intf_t	*intf, *next_intf;
1490 	vrrp_ip_t	*ip, *nextip;
1491 	char		abuf[INET6_ADDRSTRLEN];
1492 
1493 	vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af));
1494 
1495 	next_intf = TAILQ_FIRST(&vrrp_intf_list);
1496 	while ((intf = next_intf) != NULL) {
1497 		next_intf = TAILQ_NEXT(intf, vvi_next);
1498 		if (intf->vvi_af != af)
1499 			continue;
1500 
1501 		/*
1502 		 * If the interface is still marked as new, it means that this
1503 		 * vrrpd_init_ipcache() call is a result of ifindex change,
1504 		 * which causes the re-walk of all the interfaces (see
1505 		 * vrrpd_add_ipaddr()), and some interfaces are still marked
1506 		 * as new during the last walk. In this case, delete this
1507 		 * interface with the "update_vr" argument to be _B_FALSE,
1508 		 * since no VRRP router has been assoicated with this
1509 		 * interface yet (the association is done in
1510 		 * vrrpd_update_ipcache()).
1511 		 *
1512 		 * This interface will be re-added later if it still exists.
1513 		 */
1514 		if (intf->vvi_state == NODE_STATE_NEW) {
1515 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s "
1516 			    "(%d), may be added later", intf->vvi_ifname,
1517 			    intf->vvi_ifindex);
1518 			vrrpd_delete_if(intf, _B_FALSE);
1519 			continue;
1520 		}
1521 
1522 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1523 		    ip = nextip) {
1524 			nextip = TAILQ_NEXT(ip, vip_next);
1525 			/* LINTED E_CONSTANT_CONDITION */
1526 			VRRPADDR2STR(af, &ip->vip_addr, abuf,
1527 			    INET6_ADDRSTRLEN, _B_FALSE);
1528 
1529 			if (ip->vip_state != NODE_STATE_NEW) {
1530 				vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, "
1531 				    "%s(%s/0x%x))", intf->vvi_ifname,
1532 				    intf->vvi_ifindex, ip->vip_lifname,
1533 				    abuf, ip->vip_flags);
1534 				ip->vip_state = NODE_STATE_STALE;
1535 				continue;
1536 			}
1537 
1538 			/*
1539 			 * If the IP is still marked as new, it means that
1540 			 * this vrrpd_init_ipcache() call is a result of
1541 			 * ifindex change, which causes the re-walk of all
1542 			 * the IP addresses (see vrrpd_add_ipaddr()).
1543 			 * Delete this IP.
1544 			 *
1545 			 * This IP will be readded later if it still exists.
1546 			 */
1547 			vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove "
1548 			    "%s/%d , %s(%s)", intf->vvi_ifname,
1549 			    intf->vvi_ifindex, ip->vip_lifname, abuf);
1550 			vrrpd_delete_ip(intf, ip);
1551 		}
1552 	}
1553 }
1554 
1555 /*
1556  * Walk all the IP addresses of the given family and update its
1557  * addresses list. Return IPADM_FAILURE if it is required to walk
1558  * all the interfaces again (one of the interface index changes in between).
1559  */
1560 static ipadm_status_t
vrrpd_walk_addr_info(int af)1561 vrrpd_walk_addr_info(int af)
1562 {
1563 	ipadm_addr_info_t	*ainfo, *ainfop;
1564 	ipadm_status_t		ipstatus;
1565 	char			*lifname;
1566 	struct sockaddr_storage	stor;
1567 	vrrp_addr_t		*addr;
1568 	int			ifindex;
1569 	uint64_t		flags;
1570 
1571 	vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s)", af_str(af));
1572 
1573 	ipstatus = ipadm_addr_info(vrrp_ipadm_handle, NULL, &ainfo, 0, 0);
1574 	if (ipstatus != IPADM_SUCCESS) {
1575 		vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1576 		    "ipadm_addr_info() failed: %s",
1577 		    af_str(af), ipadm_status2str(ipstatus));
1578 		return (IPADM_SUCCESS);
1579 	}
1580 
1581 	for (ainfop = ainfo; ainfop != NULL; ainfop = IA_NEXT(ainfop)) {
1582 		if (ainfop->ia_ifa.ifa_addr->sa_family != af)
1583 			continue;
1584 
1585 		lifname = ainfop->ia_ifa.ifa_name;
1586 		flags = ainfop->ia_ifa.ifa_flags;
1587 		(void) memcpy(&stor, ainfop->ia_ifa.ifa_addr, sizeof (stor));
1588 		addr = (vrrp_addr_t *)&stor;
1589 
1590 		vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): %s",
1591 		    af_str(af), lifname);
1592 
1593 		/* Skip virtual/IPMP/P2P interfaces */
1594 		if (flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) {
1595 			vrrp_log(VRRP_DBG0, "vrrpd_walk_addr_info(%s): "
1596 			    "skipped %s", af_str(af), lifname);
1597 			continue;
1598 		}
1599 
1600 		/* Filter out the all-zero IP address */
1601 		if (VRRPADDR_UNSPECIFIED(af, addr))
1602 			continue;
1603 
1604 		if ((ifindex = if_nametoindex(lifname)) == 0) {
1605 			if (errno != ENXIO && errno != ENOENT) {
1606 				vrrp_log(VRRP_ERR, "vrrpd_walk_addr_info(%s): "
1607 				    "if_nametoindex() failed for %s: %s",
1608 				    af_str(af), lifname, strerror(errno));
1609 			}
1610 			break;
1611 		}
1612 
1613 		/*
1614 		 * The interface is unplumbed/replumbed during the walk.  Try
1615 		 * to walk the IP addresses one more time.
1616 		 */
1617 		if (vrrpd_add_ipaddr(lifname, af, addr, ifindex, flags)
1618 		    == VRRP_EAGAIN) {
1619 			ipstatus = IPADM_FAILURE;
1620 			break;
1621 		}
1622 	}
1623 
1624 	ipadm_free_addr_info(ainfo);
1625 	return (ipstatus);
1626 }
1627 
1628 /*
1629  * Given the information of each IP address, update the interface and
1630  * IP addresses list
1631  */
1632 static vrrp_err_t
vrrpd_add_ipaddr(char * lifname,int af,vrrp_addr_t * addr,int ifindex,uint64_t flags)1633 vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex,
1634     uint64_t flags)
1635 {
1636 	char		ifname[LIFNAMSIZ], *c;
1637 	vrrp_intf_t	*intf;
1638 	vrrp_ip_t	*ip;
1639 	char		abuf[INET6_ADDRSTRLEN];
1640 	vrrp_err_t	err;
1641 
1642 	/* LINTED E_CONSTANT_CONDITION */
1643 	VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
1644 	vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname,
1645 	    abuf, ifindex, flags);
1646 
1647 	/*
1648 	 * Get the physical interface name from the logical interface name.
1649 	 */
1650 	(void) strlcpy(ifname, lifname, sizeof (ifname));
1651 	if ((c = strchr(ifname, ':')) != NULL)
1652 		*c = '\0';
1653 
1654 	if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) {
1655 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname);
1656 		err = vrrpd_create_if(ifname, af, ifindex, &intf);
1657 		if (err != VRRP_SUCCESS)
1658 			return (err);
1659 	} else if (intf->vvi_ifindex != ifindex) {
1660 		/*
1661 		 * If index changes, it means that this interface is
1662 		 * unplumbed/replumbed since we last checked. If this
1663 		 * interface is not used by any VRRP router, just
1664 		 * update its ifindex, and the IP addresses list will
1665 		 * be updated later. Otherwise, return EAGAIN to rewalk
1666 		 * all the IP addresses from the beginning.
1667 		 */
1668 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ",
1669 		    "from %d to %d", ifname, intf->vvi_ifindex, ifindex);
1670 		if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) {
1671 			intf->vvi_ifindex = ifindex;
1672 		} else {
1673 			/*
1674 			 * delete this interface from the list if this
1675 			 * interface has already been assoicated with
1676 			 * any VRRP routers.
1677 			 */
1678 			vrrpd_delete_if(intf, _B_TRUE);
1679 			return (VRRP_EAGAIN);
1680 		}
1681 	}
1682 
1683 	/*
1684 	 * Does this IP address already exist?
1685 	 */
1686 	TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) {
1687 		if (strcmp(ip->vip_lifname, lifname) == 0)
1688 			break;
1689 	}
1690 
1691 	if (ip != NULL) {
1692 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists",
1693 		    lifname, abuf);
1694 		ip->vip_state = NODE_STATE_NONE;
1695 		ip->vip_flags = flags;
1696 		if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) {
1697 			/*
1698 			 * Address has been changed, mark it as new
1699 			 * If this address is already selected as the
1700 			 * primary IP address, the new IP will be checked
1701 			 * to see whether it is still qualified as the
1702 			 * primary IP address. If not, the primary IP
1703 			 * address will be reselected.
1704 			 */
1705 			(void) memcpy(&ip->vip_addr, addr,
1706 			    sizeof (vrrp_addr_t));
1707 
1708 			ip->vip_state = NODE_STATE_NEW;
1709 		}
1710 	} else {
1711 		vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new",
1712 		    lifname, abuf);
1713 
1714 		err = vrrpd_create_ip(intf, lifname, addr, flags);
1715 		if (err != VRRP_SUCCESS)
1716 			return (err);
1717 	}
1718 	return (VRRP_SUCCESS);
1719 }
1720 
1721 /*
1722  * Update the interface and IP addresses list. Remove the ones that have been
1723  * staled since last time we walk the IP addresses and updated the ones that
1724  * have been changed.
1725  */
1726 static void
vrrpd_update_ipcache(int af)1727 vrrpd_update_ipcache(int af)
1728 {
1729 	vrrp_intf_t	*intf, *nextif;
1730 	vrrp_ip_t	*ip, *nextip;
1731 	char		abuf[INET6_ADDRSTRLEN];
1732 	boolean_t	primary_selected;
1733 	boolean_t	primary_now_selected;
1734 	boolean_t	need_reenable = _B_FALSE;
1735 
1736 	vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af));
1737 
1738 	nextif = TAILQ_FIRST(&vrrp_intf_list);
1739 	while ((intf = nextif) != NULL) {
1740 		nextif = TAILQ_NEXT(intf, vvi_next);
1741 		if (intf->vvi_af != af)
1742 			continue;
1743 
1744 		/*
1745 		 * Does the interface already select its primary IP address?
1746 		 */
1747 		primary_selected = (intf->vvi_pip != NULL);
1748 		assert(!primary_selected || IS_PRIMARY_INTF(intf));
1749 
1750 		/*
1751 		 * Removed the IP addresses that have been unconfigured.
1752 		 */
1753 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1754 		    ip = nextip) {
1755 			nextip = TAILQ_NEXT(ip, vip_next);
1756 			if (ip->vip_state != NODE_STATE_STALE)
1757 				continue;
1758 
1759 			/* LINTED E_CONSTANT_CONDITION */
1760 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1761 			    _B_FALSE);
1762 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s "
1763 			    "is removed over %s", abuf, intf->vvi_ifname);
1764 			vrrpd_delete_ip(intf, ip);
1765 		}
1766 
1767 		/*
1768 		 * No IP addresses left, delete this interface.
1769 		 */
1770 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1771 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1772 			    "no IP left over %s", intf->vvi_ifname);
1773 			vrrpd_delete_if(intf, _B_TRUE);
1774 			continue;
1775 		}
1776 
1777 		/*
1778 		 * If this is selected ss the physical interface for any
1779 		 * VRRP router, reselect the primary address if needed.
1780 		 */
1781 		if (IS_PRIMARY_INTF(intf)) {
1782 			vrrpd_reselect_primary(intf);
1783 			primary_now_selected = (intf->vvi_pip != NULL);
1784 
1785 			/*
1786 			 * Cannot find the new primary IP address.
1787 			 */
1788 			if (primary_selected && !primary_now_selected) {
1789 				vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() "
1790 				    "reselect primary IP on %s failed",
1791 				    intf->vvi_ifname);
1792 				vrrpd_remove_if(intf, _B_TRUE);
1793 			} else if (!primary_selected && primary_now_selected) {
1794 				/*
1795 				 * The primary IP address is successfully
1796 				 * selected on the physical interfacew we
1797 				 * need to walk through all the VRRP routers
1798 				 * that is created on this physical interface
1799 				 * and see whether they can now be enabled.
1800 				 */
1801 				need_reenable = _B_TRUE;
1802 			}
1803 		}
1804 
1805 		/*
1806 		 * For every new virtual IP address, bring up/down it based
1807 		 * on the state of VRRP router.
1808 		 *
1809 		 * Note that it is fine to not update the IP's vip_flags field
1810 		 * even if vrrpd_virtualip_updateone() changed the address's
1811 		 * up/down state, since the vip_flags field is only used for
1812 		 * select primary IP address over a physical interface, and
1813 		 * vrrpd_virtualip_updateone() only affects the virtual IP
1814 		 * address's status.
1815 		 */
1816 		for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL;
1817 		    ip = nextip) {
1818 			nextip = TAILQ_NEXT(ip, vip_next);
1819 			/* LINTED E_CONSTANT_CONDITION */
1820 			VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN,
1821 			    _B_FALSE);
1822 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1823 			    "IP %s over %s%s", abuf, intf->vvi_ifname,
1824 			    ip->vip_state == NODE_STATE_NEW ? " is new" : "");
1825 
1826 			if (IS_VIRTUAL_INTF(intf)) {
1827 				/*
1828 				 * If this IP is new, update its up/down state
1829 				 * based on the virtual interface's state
1830 				 * (which is determined by the VRRP router's
1831 				 * state). Otherwise, check only and prompt
1832 				 * warnings if its up/down state has been
1833 				 * changed.
1834 				 */
1835 				if (vrrpd_virtualip_updateone(intf, ip,
1836 				    ip->vip_state == NODE_STATE_NONE) !=
1837 				    VRRP_SUCCESS) {
1838 					vrrp_log(VRRP_DBG0,
1839 					    "vrrpd_update_ipcache(): "
1840 					    "IP %s over %s update failed", abuf,
1841 					    intf->vvi_ifname);
1842 					vrrpd_delete_ip(intf, ip);
1843 					continue;
1844 				}
1845 			}
1846 			ip->vip_state = NODE_STATE_NONE;
1847 		}
1848 
1849 		/*
1850 		 * The IP address is deleted when it is failed to be brought
1851 		 * up. If no IP addresses are left, delete this interface.
1852 		 */
1853 		if (TAILQ_EMPTY(&intf->vvi_iplist)) {
1854 			vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): "
1855 			    "no IP left over %s", intf->vvi_ifname);
1856 			vrrpd_delete_if(intf, _B_TRUE);
1857 			continue;
1858 		}
1859 
1860 		if (intf->vvi_state == NODE_STATE_NEW) {
1861 			/*
1862 			 * A new interface is found. This interface can be
1863 			 * the primary interface or the virtual VNIC
1864 			 * interface.  Again, we need to walk throught all
1865 			 * the VRRP routers to see whether some of them can
1866 			 * now be enabled because of the new primary IP
1867 			 * address or the new virtual IP addresses.
1868 			 */
1869 			intf->vvi_state = NODE_STATE_NONE;
1870 			need_reenable = _B_TRUE;
1871 		}
1872 	}
1873 
1874 	if (need_reenable)
1875 		vrrpd_reenable_all_vr();
1876 }
1877 
1878 /*
1879  * Reselect primary IP if:
1880  * - The existing primary IP is no longer qualified (removed or it is down or
1881  *   not a link-local IP for IPv6 VRRP router);
1882  * - This is a physical interface but no primary IP is chosen;
1883  */
1884 static void
vrrpd_reselect_primary(vrrp_intf_t * intf)1885 vrrpd_reselect_primary(vrrp_intf_t *intf)
1886 {
1887 	vrrp_ip_t	*ip;
1888 	char		abuf[INET6_ADDRSTRLEN];
1889 
1890 	assert(IS_PRIMARY_INTF(intf));
1891 
1892 	/*
1893 	 * If the interface's old primary IP address is still valid, return
1894 	 */
1895 	if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip)))
1896 		return;
1897 
1898 	if (ip != NULL) {
1899 		/* LINTED E_CONSTANT_CONDITION */
1900 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1901 		    sizeof (abuf), _B_FALSE);
1902 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1903 		    "is no longer qualified", intf->vvi_ifname, abuf);
1904 	}
1905 
1906 	ip = vrrpd_select_primary(intf);
1907 	intf->vvi_pip = ip;
1908 
1909 	if (ip != NULL) {
1910 		/* LINTED E_CONSTANT_CONDITION */
1911 		VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf,
1912 		    sizeof (abuf), _B_FALSE);
1913 		vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s "
1914 		    "is selected", intf->vvi_ifname, abuf);
1915 	}
1916 }
1917 
1918 /*
1919  * Select the primary IP address. Since the link-local IP address is always
1920  * at the head of the IP address list, try to find the first UP IP address
1921  * and see whether it qualify.
1922  */
1923 static vrrp_ip_t *
vrrpd_select_primary(vrrp_intf_t * pif)1924 vrrpd_select_primary(vrrp_intf_t *pif)
1925 {
1926 	vrrp_ip_t	*pip;
1927 	char		abuf[INET6_ADDRSTRLEN];
1928 
1929 	vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname);
1930 
1931 	TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) {
1932 		assert(pip->vip_state != NODE_STATE_STALE);
1933 
1934 		/* LINTED E_CONSTANT_CONDITION */
1935 		VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf,
1936 		    INET6_ADDRSTRLEN, _B_FALSE);
1937 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s",
1938 		    pif->vvi_ifname, abuf,
1939 		    (pip->vip_flags & IFF_UP) ? "up" : "down");
1940 
1941 		if (pip->vip_flags & IFF_UP)
1942 			break;
1943 	}
1944 
1945 	/*
1946 	 * Is this valid primary IP address?
1947 	 */
1948 	if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) {
1949 		vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed",
1950 		    pif->vvi_ifname, af_str(pif->vvi_af));
1951 		return (NULL);
1952 	}
1953 	return (pip);
1954 }
1955 
1956 /*
1957  * This is a new interface. Check whether any VRRP router is waiting for it
1958  */
1959 static void
vrrpd_reenable_all_vr()1960 vrrpd_reenable_all_vr()
1961 {
1962 	vrrp_vr_t *vr;
1963 
1964 	vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()");
1965 
1966 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1967 		if (vr->vvr_conf.vvc_enabled)
1968 			(void) vrrpd_enable_vr(vr);
1969 	}
1970 }
1971 
1972 /*
1973  * If primary_addr_gone is _B_TRUE, it means that we failed to select
1974  * the primary IP address on this (physical) interface; otherwise,
1975  * it means the interface is no longer available.
1976  */
1977 static void
vrrpd_remove_if(vrrp_intf_t * intf,boolean_t primary_addr_gone)1978 vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone)
1979 {
1980 	vrrp_vr_t *vr;
1981 
1982 	vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname,
1983 	    primary_addr_gone ? "primary address gone" : "interface deleted");
1984 
1985 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
1986 		if (vr->vvr_conf.vvc_enabled)
1987 			vrrpd_disable_vr(vr, intf, primary_addr_gone);
1988 	}
1989 }
1990 
1991 /*
1992  * Update the VRRP configuration file based on the given configuration.
1993  * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE
1994  */
1995 static vrrp_err_t
vrrpd_updateconf(vrrp_vr_conf_t * newconf,uint_t op)1996 vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op)
1997 {
1998 	vrrp_vr_conf_t	conf;
1999 	FILE		*fp, *nfp;
2000 	int		nfd;
2001 	char		line[LINE_MAX];
2002 	char		newfile[MAXPATHLEN];
2003 	boolean_t	found = _B_FALSE;
2004 	vrrp_err_t	err = VRRP_SUCCESS;
2005 
2006 	vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name,
2007 	    op == VRRP_CONF_UPDATE ? "update" : "delete");
2008 
2009 	if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) {
2010 		if (errno != ENOENT) {
2011 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s for "
2012 			    "update failed: %s", vrrpd_conffile,
2013 			    strerror(errno));
2014 			return (VRRP_EDB);
2015 		}
2016 
2017 		if ((fp = fopen(vrrpd_conffile, "w+F")) == NULL) {
2018 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s for "
2019 			    "write failed: %s", vrrpd_conffile,
2020 			    strerror(errno));
2021 			return (VRRP_EDB);
2022 		}
2023 	}
2024 
2025 	(void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile);
2026 	if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC,
2027 	    S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) {
2028 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s",
2029 		    newfile, strerror(errno));
2030 		(void) fclose(fp);
2031 		return (VRRP_EDB);
2032 	}
2033 
2034 	if ((nfp = fdopen(nfd, "wF")) == NULL) {
2035 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s",
2036 		    newfile, strerror(errno));
2037 		goto done;
2038 	}
2039 
2040 	while (fgets(line, sizeof (line), fp) != NULL) {
2041 		conf.vvc_vrid = VRRP_VRID_NONE;
2042 		if (!found && (err = vrrpd_read_vrconf(line, &conf)) !=
2043 		    VRRP_SUCCESS) {
2044 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid "
2045 			    "configuration format: %s", line);
2046 			goto done;
2047 		}
2048 
2049 		/*
2050 		 * Write this line out if:
2051 		 * - this is a comment line; or
2052 		 * - we've done updating/deleting the the given VR; or
2053 		 * - if the name of the VR read from this line does not match
2054 		 *   the VR name that we are about to update/delete;
2055 		 */
2056 		if (found || conf.vvc_vrid == VRRP_VRID_NONE ||
2057 		    strcmp(conf.vvc_name, newconf->vvc_name) != 0) {
2058 			if (fputs(line, nfp) != EOF)
2059 				continue;
2060 
2061 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2062 			    "write line %s", line);
2063 			err = VRRP_EDB;
2064 			goto done;
2065 		}
2066 
2067 		/*
2068 		 * Otherwise, update/skip the line.
2069 		 */
2070 		found = _B_TRUE;
2071 		if (op == VRRP_CONF_DELETE)
2072 			continue;
2073 
2074 		assert(op == VRRP_CONF_UPDATE);
2075 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2076 		    newconf)) != VRRP_SUCCESS) {
2077 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2078 			    "update configuration for %s", newconf->vvc_name);
2079 			goto done;
2080 		}
2081 		if (fputs(line, nfp) == EOF) {
2082 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2083 			    "write line %s", line);
2084 			err = VRRP_EDB;
2085 			goto done;
2086 		}
2087 	}
2088 
2089 	/*
2090 	 * If we get to the end of the file and have not seen the router that
2091 	 * we are about to update, write it out.
2092 	 */
2093 	if (!found && op == VRRP_CONF_UPDATE) {
2094 		if ((err = vrrpd_write_vrconf(line, sizeof (line),
2095 		    newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) {
2096 			vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2097 			    "write line %s", line);
2098 			err = VRRP_EDB;
2099 		}
2100 	} else if (!found && op == VRRP_CONF_DELETE) {
2101 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find "
2102 		    "configuation for %s", newconf->vvc_name);
2103 		err = VRRP_ENOTFOUND;
2104 	}
2105 
2106 	if (err != VRRP_SUCCESS)
2107 		goto done;
2108 
2109 	if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) {
2110 		vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to "
2111 		    "rename file %s", newfile);
2112 		err = VRRP_EDB;
2113 	}
2114 
2115 done:
2116 	(void) fclose(fp);
2117 	(void) fclose(nfp);
2118 	(void) unlink(newfile);
2119 	return (err);
2120 }
2121 
2122 static vrrp_err_t
vrrpd_write_vrconf(char * line,size_t len,vrrp_vr_conf_t * conf)2123 vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf)
2124 {
2125 	vrrp_prop_t	*prop;
2126 	int		n, i;
2127 
2128 	vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name);
2129 
2130 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2131 		prop = &vrrp_prop_info_tbl[i];
2132 		n = snprintf(line, len, i == 0 ? "%s=" : " %s=",
2133 		    prop->vs_propname);
2134 		if (n < 0 || n >= len)
2135 			break;
2136 		len -= n;
2137 		line += n;
2138 		n = prop->vs_propwrite(conf, line, len);
2139 		if (n < 0 || n >= len)
2140 			break;
2141 		len -= n;
2142 		line += n;
2143 	}
2144 	if (i != VRRP_PROP_INFO_TABSIZE) {
2145 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2146 		    "small", conf->vvc_name);
2147 		return (VRRP_EDB);
2148 	}
2149 	n = snprintf(line, len, "\n");
2150 	if (n < 0 || n >= len) {
2151 		vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too"
2152 		    "small", conf->vvc_name);
2153 		return (VRRP_EDB);
2154 	}
2155 	return (VRRP_SUCCESS);
2156 }
2157 
2158 static vrrp_err_t
vrrpd_read_vrconf(char * line,vrrp_vr_conf_t * conf)2159 vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf)
2160 {
2161 	char		*str, *token;
2162 	char		*next;
2163 	vrrp_err_t	err = VRRP_SUCCESS;
2164 	char		tmpbuf[MAXLINELEN];
2165 
2166 	str = tmpbuf;
2167 	(void) strlcpy(tmpbuf, line, MAXLINELEN);
2168 
2169 	/*
2170 	 * Skip leading spaces, blank lines, and comments.
2171 	 */
2172 	skip_whitespace(str);
2173 	if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) {
2174 		conf->vvc_vrid = VRRP_VRID_NONE;
2175 		return (VRRP_SUCCESS);
2176 	}
2177 
2178 	/*
2179 	 * Read each VR properties.
2180 	 */
2181 	for (token = strtok_r(str, " \n\t", &next); token != NULL;
2182 	    token = strtok_r(NULL, " \n\t", &next)) {
2183 		if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS)
2184 			break;
2185 	}
2186 
2187 	/* All properties read but no VRID defined */
2188 	if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE)
2189 		err = VRRP_EINVAL;
2190 
2191 	return (err);
2192 }
2193 
2194 static vrrp_err_t
vrrpd_readprop(const char * str,vrrp_vr_conf_t * conf)2195 vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf)
2196 {
2197 	vrrp_prop_t	*prop;
2198 	char		*pstr;
2199 	int		i;
2200 
2201 	if ((pstr = strchr(str, '=')) == NULL) {
2202 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2203 		return (VRRP_EINVAL);
2204 	}
2205 
2206 	*pstr++ = '\0';
2207 	for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) {
2208 		prop = &vrrp_prop_info_tbl[i];
2209 		if (strcasecmp(str, prop->vs_propname) == 0) {
2210 			if (prop->vs_propread(conf, pstr))
2211 				break;
2212 		}
2213 	}
2214 
2215 	if (i == VRRP_PROP_INFO_TABSIZE) {
2216 		vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str);
2217 		return (VRRP_EINVAL);
2218 	}
2219 
2220 	return (VRRP_SUCCESS);
2221 }
2222 
2223 static boolean_t
vrrp_rd_prop_name(vrrp_vr_conf_t * conf,const char * str)2224 vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str)
2225 {
2226 	size_t size = sizeof (conf->vvc_name);
2227 	return (strlcpy(conf->vvc_name, str, size) < size);
2228 }
2229 
2230 static boolean_t
vrrp_rd_prop_vrid(vrrp_vr_conf_t * conf,const char * str)2231 vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str)
2232 {
2233 	conf->vvc_vrid = strtol(str, NULL, 0);
2234 	return (!(conf->vvc_vrid < VRRP_VRID_MIN ||
2235 	    conf->vvc_vrid > VRRP_VRID_MAX ||
2236 	    (conf->vvc_vrid == 0 && errno != 0)));
2237 }
2238 
2239 static boolean_t
vrrp_rd_prop_af(vrrp_vr_conf_t * conf,const char * str)2240 vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str)
2241 {
2242 	if (strcasecmp(str, "AF_INET") == 0)
2243 		conf->vvc_af = AF_INET;
2244 	else if (strcasecmp(str, "AF_INET6") == 0)
2245 		conf->vvc_af = AF_INET6;
2246 	else
2247 		return (_B_FALSE);
2248 	return (_B_TRUE);
2249 }
2250 
2251 static boolean_t
vrrp_rd_prop_pri(vrrp_vr_conf_t * conf,const char * str)2252 vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str)
2253 {
2254 	conf->vvc_pri = strtol(str, NULL, 0);
2255 	return (!(conf->vvc_pri < VRRP_PRI_MIN ||
2256 	    conf->vvc_pri > VRRP_PRI_OWNER ||
2257 	    (conf->vvc_pri == 0 && errno != 0)));
2258 }
2259 
2260 static boolean_t
vrrp_rd_prop_adver_int(vrrp_vr_conf_t * conf,const char * str)2261 vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str)
2262 {
2263 	conf->vvc_adver_int = strtol(str, NULL, 0);
2264 	return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2265 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX ||
2266 	    (conf->vvc_adver_int == 0 && errno != 0)));
2267 }
2268 
2269 static boolean_t
vrrp_rd_prop_preempt(vrrp_vr_conf_t * conf,const char * str)2270 vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str)
2271 {
2272 	if (strcasecmp(str, "true") == 0)
2273 		conf->vvc_preempt = _B_TRUE;
2274 	else if (strcasecmp(str, "false") == 0)
2275 		conf->vvc_preempt = _B_FALSE;
2276 	else
2277 		return (_B_FALSE);
2278 	return (_B_TRUE);
2279 }
2280 
2281 static boolean_t
vrrp_rd_prop_accept(vrrp_vr_conf_t * conf,const char * str)2282 vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str)
2283 {
2284 	if (strcasecmp(str, "true") == 0)
2285 		conf->vvc_accept = _B_TRUE;
2286 	else if (strcasecmp(str, "false") == 0)
2287 		conf->vvc_accept = _B_FALSE;
2288 	else
2289 		return (_B_FALSE);
2290 	return (_B_TRUE);
2291 }
2292 
2293 static boolean_t
vrrp_rd_prop_enabled(vrrp_vr_conf_t * conf,const char * str)2294 vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str)
2295 {
2296 	if (strcasecmp(str, "enabled") == 0)
2297 		conf->vvc_enabled = _B_TRUE;
2298 	else if (strcasecmp(str, "disabled") == 0)
2299 		conf->vvc_enabled = _B_FALSE;
2300 	else
2301 		return (_B_FALSE);
2302 	return (_B_TRUE);
2303 }
2304 
2305 static boolean_t
vrrp_rd_prop_ifname(vrrp_vr_conf_t * conf,const char * str)2306 vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str)
2307 {
2308 	size_t size = sizeof (conf->vvc_link);
2309 	return (strlcpy(conf->vvc_link, str, size) < size);
2310 }
2311 
2312 static int
vrrp_wt_prop_name(vrrp_vr_conf_t * conf,char * str,size_t size)2313 vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size)
2314 {
2315 	return (snprintf(str, size, "%s", conf->vvc_name));
2316 }
2317 
2318 static int
vrrp_wt_prop_pri(vrrp_vr_conf_t * conf,char * str,size_t size)2319 vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size)
2320 {
2321 	return (snprintf(str, size, "%d", conf->vvc_pri));
2322 }
2323 
2324 static int
vrrp_wt_prop_adver_int(vrrp_vr_conf_t * conf,char * str,size_t size)2325 vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size)
2326 {
2327 	return (snprintf(str, size, "%d", conf->vvc_adver_int));
2328 }
2329 
2330 static int
vrrp_wt_prop_preempt(vrrp_vr_conf_t * conf,char * str,size_t size)2331 vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size)
2332 {
2333 	return (snprintf(str, size, "%s",
2334 	    conf->vvc_preempt ? "true" : "false"));
2335 }
2336 
2337 static int
vrrp_wt_prop_accept(vrrp_vr_conf_t * conf,char * str,size_t size)2338 vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size)
2339 {
2340 	return (snprintf(str, size, "%s",
2341 	    conf->vvc_accept ? "true" : "false"));
2342 }
2343 
2344 static int
vrrp_wt_prop_enabled(vrrp_vr_conf_t * conf,char * str,size_t size)2345 vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size)
2346 {
2347 	return (snprintf(str, size, "%s",
2348 	    conf->vvc_enabled ? "enabled" : "disabled"));
2349 }
2350 
2351 static int
vrrp_wt_prop_vrid(vrrp_vr_conf_t * conf,char * str,size_t size)2352 vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size)
2353 {
2354 	return (snprintf(str, size, "%d", conf->vvc_vrid));
2355 }
2356 
2357 static int
vrrp_wt_prop_af(vrrp_vr_conf_t * conf,char * str,size_t size)2358 vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size)
2359 {
2360 	return (snprintf(str, size, "%s",
2361 	    conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6"));
2362 }
2363 
2364 static int
vrrp_wt_prop_ifname(vrrp_vr_conf_t * conf,char * str,size_t size)2365 vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size)
2366 {
2367 	return (snprintf(str, size, "%s", conf->vvc_link));
2368 }
2369 
2370 static char *
af_str(int af)2371 af_str(int af)
2372 {
2373 	if (af == 4 || af == AF_INET)
2374 		return ("AF_INET");
2375 	else if (af == 6 || af == AF_INET6)
2376 		return ("AF_INET6");
2377 	else if (af == AF_UNSPEC)
2378 		return ("AF_UNSPEC");
2379 	else
2380 		return ("AF_error");
2381 }
2382 
2383 static vrrp_err_t
vrrpd_create_vr(vrrp_vr_conf_t * conf)2384 vrrpd_create_vr(vrrp_vr_conf_t *conf)
2385 {
2386 	vrrp_vr_t	*vr;
2387 
2388 	vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name);
2389 
2390 	if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) {
2391 		vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s"
2392 		    " failed", conf->vvc_name);
2393 		return (VRRP_ENOMEM);
2394 	}
2395 
2396 	bzero(vr, sizeof (vrrp_vr_t));
2397 	vr->vvr_state = VRRP_STATE_NONE;
2398 	vr->vvr_timer_id = -1;
2399 	vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr);
2400 	(void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t));
2401 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2402 	TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next);
2403 	return (VRRP_SUCCESS);
2404 }
2405 
2406 static void
vrrpd_delete_vr(vrrp_vr_t * vr)2407 vrrpd_delete_vr(vrrp_vr_t *vr)
2408 {
2409 	vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name);
2410 	if (vr->vvr_conf.vvc_enabled)
2411 		vrrpd_disable_vr(vr, NULL, _B_FALSE);
2412 	assert(vr->vvr_state == VRRP_STATE_INIT);
2413 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr);
2414 	TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next);
2415 	(void) free(vr);
2416 }
2417 
2418 static vrrp_err_t
vrrpd_enable_vr(vrrp_vr_t * vr)2419 vrrpd_enable_vr(vrrp_vr_t *vr)
2420 {
2421 	vrrp_err_t	rx_err, tx_err, err = VRRP_EINVAL;
2422 
2423 	vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name);
2424 
2425 	assert(vr->vvr_conf.vvc_enabled);
2426 
2427 	/*
2428 	 * This VRRP router has been successfully enabled and start
2429 	 * participating.
2430 	 */
2431 	if (vr->vvr_state != VRRP_STATE_INIT)
2432 		return (VRRP_SUCCESS);
2433 
2434 	if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) {
2435 		/*
2436 		 * Select the primary IP address. Even if this time
2437 		 * primary IP selection failed, we will reselect the
2438 		 * primary IP address when new IP address comes up.
2439 		 */
2440 		vrrpd_reselect_primary(vr->vvr_pif);
2441 		if (vr->vvr_pif->vvi_pip == NULL) {
2442 			vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): "
2443 			    "select_primary over %s failed",
2444 			    vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname);
2445 			rx_err = VRRP_ENOPRIM;
2446 		}
2447 	}
2448 
2449 	/*
2450 	 * Initialize the TX socket used for this vrrp_vr_t to send the
2451 	 * multicast packets.
2452 	 */
2453 	tx_err = vrrpd_init_txsock(vr);
2454 
2455 	/*
2456 	 * Only start the state transition if sockets for both RX and TX are
2457 	 * initialized correctly.
2458 	 */
2459 	if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) {
2460 		/*
2461 		 * Record the error information for diagnose purpose.
2462 		 */
2463 		vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err;
2464 		return (err);
2465 	}
2466 
2467 	if (vr->vvr_conf.vvc_pri == 255)
2468 		err = vrrpd_state_i2m(vr);
2469 	else
2470 		err = vrrpd_state_i2b(vr);
2471 
2472 	if (err != VRRP_SUCCESS) {
2473 		vr->vvr_err = err;
2474 		vr->vvr_pif->vvi_pip = NULL;
2475 		vrrpd_fini_txsock(vr);
2476 		vrrpd_fini_rxsock(vr);
2477 	}
2478 	return (err);
2479 }
2480 
2481 /*
2482  * Given the removed interface, see whether the given VRRP router would
2483  * be affected and stop participating the VRRP protocol.
2484  *
2485  * If intf is NULL, VR disabling request is coming from the admin.
2486  */
2487 static void
vrrpd_disable_vr(vrrp_vr_t * vr,vrrp_intf_t * intf,boolean_t primary_addr_gone)2488 vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone)
2489 {
2490 	vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name,
2491 	    intf == NULL ? "requested by admin" : intf->vvi_ifname,
2492 	    intf == NULL ? "" : (primary_addr_gone ? "primary address gone" :
2493 	    "interface deleted"));
2494 
2495 	/*
2496 	 * An interface is deleted, see whether this interface is the
2497 	 * physical interface or the VNIC of the given VRRP router.
2498 	 * If so, continue to disable the VRRP router.
2499 	 */
2500 	if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) &&
2501 	    (intf != vr->vvr_vif)) {
2502 		return;
2503 	}
2504 
2505 	/*
2506 	 * If this is the case that the primary IP address is gone,
2507 	 * and we failed to reselect another primary IP address,
2508 	 * continue to disable the VRRP router.
2509 	 */
2510 	if (primary_addr_gone && intf != vr->vvr_pif)
2511 		return;
2512 
2513 	vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling",
2514 	    vr->vvr_conf.vvc_name);
2515 
2516 	if (vr->vvr_state == VRRP_STATE_MASTER) {
2517 		/*
2518 		 * If this router is disabled by the administrator, send
2519 		 * the zero-priority advertisement to indicate the Master
2520 		 * stops participating VRRP.
2521 		 */
2522 		if (intf == NULL)
2523 			(void) vrrpd_send_adv(vr, _B_TRUE);
2524 
2525 		vrrpd_state_m2i(vr);
2526 	} else  if (vr->vvr_state == VRRP_STATE_BACKUP) {
2527 		vrrpd_state_b2i(vr);
2528 	}
2529 
2530 	/*
2531 	 * If no primary IP address can be selected, the VRRP router
2532 	 * stays at the INIT state and will become BACKUP and MASTER when
2533 	 * a primary IP address is reselected.
2534 	 */
2535 	if (primary_addr_gone) {
2536 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP "
2537 		    "is removed", vr->vvr_conf.vvc_name);
2538 		vr->vvr_err = VRRP_ENOPRIM;
2539 	} else if (intf == NULL) {
2540 		/*
2541 		 * The VRRP router is disable by the administrator
2542 		 */
2543 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin",
2544 		    vr->vvr_conf.vvc_name);
2545 		vr->vvr_err = VRRP_SUCCESS;
2546 		vrrpd_fini_txsock(vr);
2547 		vrrpd_fini_rxsock(vr);
2548 	} else if (intf == vr->vvr_pif) {
2549 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface "
2550 		    "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2551 		vr->vvr_err = VRRP_ENOPRIM;
2552 		vrrpd_fini_rxsock(vr);
2553 	} else if (intf == vr->vvr_vif) {
2554 		vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s"
2555 		    " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname);
2556 		vr->vvr_err = VRRP_ENOVIRT;
2557 		vrrpd_fini_txsock(vr);
2558 	}
2559 }
2560 
2561 vrrp_err_t
vrrpd_create(vrrp_vr_conf_t * conf,boolean_t updateconf)2562 vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf)
2563 {
2564 	vrrp_err_t	err = VRRP_SUCCESS;
2565 
2566 	vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name,
2567 	    conf->vvc_link, conf->vvc_vrid);
2568 
2569 	assert(conf != NULL);
2570 
2571 	/*
2572 	 * Sanity check
2573 	 */
2574 	if ((strlen(conf->vvc_name) == 0) ||
2575 	    (strlen(conf->vvc_link) == 0) ||
2576 	    (conf->vvc_vrid < VRRP_VRID_MIN ||
2577 	    conf->vvc_vrid > VRRP_VRID_MAX) ||
2578 	    (conf->vvc_pri < VRRP_PRI_MIN ||
2579 	    conf->vvc_pri > VRRP_PRI_OWNER) ||
2580 	    (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2581 	    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) ||
2582 	    (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) ||
2583 	    (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) {
2584 		vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument",
2585 		    conf->vvc_name);
2586 		return (VRRP_EINVAL);
2587 	}
2588 
2589 	if (!vrrp_valid_name(conf->vvc_name)) {
2590 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router "
2591 		    "name", conf->vvc_name);
2592 		return (VRRP_EINVALVRNAME);
2593 	}
2594 
2595 	if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) {
2596 		vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists",
2597 		    conf->vvc_name);
2598 		return (VRRP_EINSTEXIST);
2599 	}
2600 
2601 	if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid,
2602 	    conf->vvc_af) != NULL) {
2603 		vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s "
2604 		    "already exists", conf->vvc_vrid, af_str(conf->vvc_af),
2605 		    conf->vvc_link);
2606 		return (VRRP_EVREXIST);
2607 	}
2608 
2609 	if (updateconf && (err = vrrpd_updateconf(conf,
2610 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2611 		vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update "
2612 		    "configuration for %s", conf->vvc_name);
2613 		return (err);
2614 	}
2615 
2616 	err = vrrpd_create_vr(conf);
2617 	if (err != VRRP_SUCCESS && updateconf)
2618 		(void) vrrpd_updateconf(conf, VRRP_CONF_DELETE);
2619 
2620 	return (err);
2621 }
2622 
2623 static vrrp_err_t
vrrpd_delete(const char * vn)2624 vrrpd_delete(const char *vn)
2625 {
2626 	vrrp_vr_t	*vr;
2627 	vrrp_err_t	err;
2628 
2629 	vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn);
2630 
2631 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2632 		vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn);
2633 		return (VRRP_ENOTFOUND);
2634 	}
2635 
2636 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE);
2637 	if (err != VRRP_SUCCESS) {
2638 		vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete "
2639 		    "configuration for %s", vr->vvr_conf.vvc_name);
2640 		return (err);
2641 	}
2642 
2643 	vrrpd_delete_vr(vr);
2644 	return (VRRP_SUCCESS);
2645 }
2646 
2647 static vrrp_err_t
vrrpd_enable(const char * vn,boolean_t updateconf)2648 vrrpd_enable(const char *vn, boolean_t updateconf)
2649 {
2650 	vrrp_vr_t		*vr;
2651 	vrrp_vr_conf_t		*conf;
2652 	uint32_t		flags;
2653 	datalink_class_t	class;
2654 	vrrp_err_t		err = VRRP_SUCCESS;
2655 
2656 	vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn);
2657 
2658 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2659 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn);
2660 		return (VRRP_ENOTFOUND);
2661 	}
2662 
2663 	/*
2664 	 * The VR is already enabled.
2665 	 */
2666 	conf = &vr->vvr_conf;
2667 	if (conf->vvc_enabled) {
2668 		vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already "
2669 		    "enabled", vn);
2670 		return (VRRP_EALREADY);
2671 	}
2672 
2673 	/*
2674 	 * Check whether the link exists.
2675 	 */
2676 	if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh,
2677 	    conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK ||
2678 	    !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) &&
2679 	    (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR) &&
2680 	    (class != DATALINK_CLASS_VNIC))) {
2681 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s",
2682 		    vn, conf->vvc_link);
2683 		return (VRRP_EINVALLINK);
2684 	}
2685 
2686 	/*
2687 	 * Get the associated VNIC name by the given interface/vrid/
2688 	 * address famitly.
2689 	 */
2690 	err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid,
2691 	    conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic,
2692 	    sizeof (vr->vvr_vnic));
2693 	if (err != VRRP_SUCCESS) {
2694 		vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s "
2695 		    "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af),
2696 		    conf->vvc_link);
2697 		err = VRRP_ENOVNIC;
2698 		goto fail;
2699 	}
2700 
2701 	/*
2702 	 * Find the right VNIC, primary interface and get the list of the
2703 	 * protected IP adressses and primary IP address. Note that if
2704 	 * either interface is NULL (no IP addresses configured over the
2705 	 * interface), we will still continue and mark this VRRP router
2706 	 * as "enabled".
2707 	 */
2708 	vr->vvr_conf.vvc_enabled = _B_TRUE;
2709 	if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf,
2710 	    VRRP_CONF_UPDATE)) != VRRP_SUCCESS) {
2711 		vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update "
2712 		    "configuration for %s", vr->vvr_conf.vvc_name);
2713 		goto fail;
2714 	}
2715 
2716 	/*
2717 	 * If vrrpd_setup_vr() fails, it is possible that there is no IP
2718 	 * addresses over ether the primary interface or the VNIC yet,
2719 	 * return success in this case, the VRRP router will stay in
2720 	 * the initialized state and start to work when the IP address is
2721 	 * configured.
2722 	 */
2723 	(void) vrrpd_enable_vr(vr);
2724 	return (VRRP_SUCCESS);
2725 
2726 fail:
2727 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2728 	vr->vvr_vnic[0] = '\0';
2729 	return (err);
2730 }
2731 
2732 static vrrp_err_t
vrrpd_disable(const char * vn)2733 vrrpd_disable(const char *vn)
2734 {
2735 	vrrp_vr_t	*vr;
2736 	vrrp_err_t	err;
2737 
2738 	vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn);
2739 
2740 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2741 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn);
2742 		return (VRRP_ENOTFOUND);
2743 	}
2744 
2745 	/*
2746 	 * The VR is already disable.
2747 	 */
2748 	if (!vr->vvr_conf.vvc_enabled) {
2749 		vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn);
2750 		return (VRRP_EALREADY);
2751 	}
2752 
2753 	vr->vvr_conf.vvc_enabled = _B_FALSE;
2754 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2755 	if (err != VRRP_SUCCESS) {
2756 		vr->vvr_conf.vvc_enabled = _B_TRUE;
2757 		vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update "
2758 		    "configuration for %s", vr->vvr_conf.vvc_name);
2759 		return (err);
2760 	}
2761 
2762 	vrrpd_disable_vr(vr, NULL, _B_FALSE);
2763 	vr->vvr_vnic[0] = '\0';
2764 	return (VRRP_SUCCESS);
2765 }
2766 
2767 static vrrp_err_t
vrrpd_modify(vrrp_vr_conf_t * conf,uint32_t mask)2768 vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask)
2769 {
2770 	vrrp_vr_t	*vr;
2771 	vrrp_vr_conf_t	savconf;
2772 	int		pri;
2773 	boolean_t	accept, set_accept = _B_FALSE;
2774 	vrrp_err_t	err;
2775 
2776 	vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name);
2777 
2778 	if (mask == 0)
2779 		return (VRRP_SUCCESS);
2780 
2781 	if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) {
2782 		vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given "
2783 		    "VR instance: %s", conf->vvc_name);
2784 		return (VRRP_ENOTFOUND);
2785 	}
2786 
2787 	if (mask & VRRP_CONF_INTERVAL) {
2788 		if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN ||
2789 		    conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) {
2790 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2791 			    "adver_interval %d", conf->vvc_name,
2792 			    conf->vvc_adver_int);
2793 			return (VRRP_EINVAL);
2794 		}
2795 	}
2796 
2797 	pri = vr->vvr_conf.vvc_pri;
2798 	if (mask & VRRP_CONF_PRIORITY) {
2799 		if (conf->vvc_pri < VRRP_PRI_MIN ||
2800 		    conf->vvc_pri > VRRP_PRI_OWNER) {
2801 			vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid "
2802 			    "priority %d", conf->vvc_name, conf->vvc_pri);
2803 			return (VRRP_EINVAL);
2804 		}
2805 		pri = conf->vvc_pri;
2806 	}
2807 
2808 	accept = vr->vvr_conf.vvc_accept;
2809 	if (mask & VRRP_CONF_ACCEPT)
2810 		accept = conf->vvc_accept;
2811 
2812 	if (pri == VRRP_PRI_OWNER && !accept) {
2813 		vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be "
2814 		    "true for VRRP address owner", conf->vvc_name);
2815 		return (VRRP_EINVAL);
2816 	}
2817 
2818 	if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) {
2819 		err = vrrpd_set_noaccept(vr, !accept);
2820 		if (err != VRRP_SUCCESS) {
2821 			vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode "
2822 			    "updating failed: %s", conf->vvc_name,
2823 			    vrrp_err2str(err));
2824 			return (err);
2825 		}
2826 		set_accept = _B_TRUE;
2827 	}
2828 
2829 	/*
2830 	 * Save the current configuration, so it can be restored if the
2831 	 * following fails.
2832 	 */
2833 	(void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t));
2834 	if (mask & VRRP_CONF_PREEMPT)
2835 		vr->vvr_conf.vvc_preempt = conf->vvc_preempt;
2836 
2837 	if (mask & VRRP_CONF_ACCEPT)
2838 		vr->vvr_conf.vvc_accept = accept;
2839 
2840 	if (mask & VRRP_CONF_PRIORITY)
2841 		vr->vvr_conf.vvc_pri = pri;
2842 
2843 	if (mask & VRRP_CONF_INTERVAL)
2844 		vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int;
2845 
2846 	err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE);
2847 	if (err != VRRP_SUCCESS) {
2848 		vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update "
2849 		    "failed: %s", conf->vvc_name, vrrp_err2str(err));
2850 		if (set_accept)
2851 			(void) vrrpd_set_noaccept(vr, accept);
2852 		(void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t));
2853 		return (err);
2854 	}
2855 
2856 	if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP))
2857 		vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
2858 
2859 	if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER))
2860 		vr->vvr_timeout = conf->vvc_adver_int;
2861 
2862 	return (VRRP_SUCCESS);
2863 }
2864 
2865 static void
vrrpd_list(vrid_t vrid,char * ifname,int af,vrrp_ret_list_t * ret,size_t * sizep)2866 vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret,
2867     size_t *sizep)
2868 {
2869 	vrrp_vr_t	*vr;
2870 	char		*p = (char *)ret + sizeof (vrrp_ret_list_t);
2871 	size_t		size = (*sizep) - sizeof (vrrp_ret_list_t);
2872 
2873 	vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af));
2874 
2875 	ret->vrl_cnt = 0;
2876 	TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) {
2877 		if (vrid !=  VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid)
2878 			continue;
2879 
2880 		if (strlen(ifname) != 0 && strcmp(ifname,
2881 		    vr->vvr_conf.vvc_link) == 0) {
2882 			continue;
2883 		}
2884 
2885 		if ((af == AF_INET || af == AF_INET6) &&
2886 		    vr->vvr_conf.vvc_af != af)
2887 			continue;
2888 
2889 		if (size < VRRP_NAME_MAX) {
2890 			vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too "
2891 			    "small to hold %d router names", ret->vrl_cnt);
2892 			*sizep = sizeof (vrrp_ret_list_t);
2893 			ret->vrl_err = VRRP_ETOOSMALL;
2894 			return;
2895 		}
2896 		(void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX);
2897 		p += (strlen(vr->vvr_conf.vvc_name) + 1);
2898 		ret->vrl_cnt++;
2899 		size -= VRRP_NAME_MAX;
2900 	}
2901 
2902 	*sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX;
2903 	vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt);
2904 	ret->vrl_err = VRRP_SUCCESS;
2905 }
2906 
2907 static void
vrrpd_query(const char * vn,vrrp_ret_query_t * ret,size_t * sizep)2908 vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep)
2909 {
2910 	vrrp_queryinfo_t	*infop;
2911 	vrrp_vr_t		*vr;
2912 	vrrp_intf_t		*vif;
2913 	vrrp_ip_t		*ip;
2914 	struct timeval		now;
2915 	uint32_t		vipcnt = 0;
2916 	size_t			size = *sizep;
2917 
2918 	vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn);
2919 
2920 	if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) {
2921 		vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn);
2922 		*sizep = sizeof (vrrp_ret_query_t);
2923 		ret->vrq_err = VRRP_ENOTFOUND;
2924 		return;
2925 	}
2926 
2927 	/*
2928 	 * Get the virtual IP list if the router is not in the INIT state.
2929 	 */
2930 	if (vr->vvr_state != VRRP_STATE_INIT) {
2931 		vif = vr->vvr_vif;
2932 		TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2933 			vipcnt++;
2934 		}
2935 	}
2936 
2937 	*sizep = sizeof (vrrp_ret_query_t);
2938 	*sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t);
2939 	if (*sizep > size) {
2940 		vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold "
2941 		    "%d virtual IPs", vipcnt);
2942 		*sizep = sizeof (vrrp_ret_query_t);
2943 		ret->vrq_err = VRRP_ETOOSMALL;
2944 		return;
2945 	}
2946 
2947 	(void) gettimeofday(&now, NULL);
2948 
2949 	bzero(ret, *sizep);
2950 	infop = &ret->vrq_qinfo;
2951 	(void) memcpy(&infop->show_vi,
2952 	    &(vr->vvr_conf), sizeof (vrrp_vr_conf_t));
2953 	(void) memcpy(&infop->show_vs,
2954 	    &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t));
2955 	(void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN);
2956 	infop->show_vt.vt_since_last_tran = timeval_to_milli(
2957 	    timeval_delta(now, vr->vvr_sinfo.vs_st_time));
2958 
2959 	if (vr->vvr_state == VRRP_STATE_INIT) {
2960 		ret->vrq_err = VRRP_SUCCESS;
2961 		return;
2962 	}
2963 
2964 	vipcnt = 0;
2965 	TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) {
2966 		(void) memcpy(&infop->show_va.va_vips[vipcnt++],
2967 		    &ip->vip_addr, sizeof (vrrp_addr_t));
2968 	}
2969 	infop->show_va.va_vipcnt = vipcnt;
2970 
2971 	(void) memcpy(&infop->show_va.va_primary,
2972 	    &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t));
2973 
2974 	(void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t));
2975 
2976 	/*
2977 	 * Check whether there is a peer.
2978 	 */
2979 	if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af,
2980 	    &(vr->vvr_peer.vp_addr))) {
2981 		infop->show_vt.vt_since_last_adv = timeval_to_milli(
2982 		    timeval_delta(now, vr->vvr_peer.vp_time));
2983 	}
2984 
2985 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
2986 		infop->show_vt.vt_master_down_intv =
2987 		    MASTER_DOWN_INTERVAL_VR(vr);
2988 	}
2989 
2990 	ret->vrq_err = VRRP_SUCCESS;
2991 }
2992 
2993 /*
2994  * Build the VRRP packet (not including the IP header). Return the
2995  * payload length.
2996  *
2997  * If zero_pri is set to be B_TRUE, then this is the specical zero-priority
2998  * advertisement which is sent by the Master to indicate that it has been
2999  * stopped participating in VRRP.
3000  */
3001 static size_t
vrrpd_build_vrrp(vrrp_vr_t * vr,uchar_t * buf,int buflen,boolean_t zero_pri)3002 vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri)
3003 {
3004 	/* LINTED E_BAD_PTR_CAST_ALIGN */
3005 	vrrp_pkt_t	*vp = (vrrp_pkt_t *)buf;
3006 	/* LINTED E_BAD_PTR_CAST_ALIGN */
3007 	struct in_addr	*a4 = (struct in_addr *)(vp + 1);
3008 	/* LINTED E_BAD_PTR_CAST_ALIGN */
3009 	struct in6_addr *a6 = (struct in6_addr *)(vp + 1);
3010 	vrrp_intf_t	*vif = vr->vvr_vif;
3011 	vrrp_ip_t	*vip;
3012 	int		af = vif->vvi_af;
3013 	size_t		size = sizeof (vrrp_pkt_t);
3014 	uint16_t	rsvd_adver_int;
3015 	int		nip = 0;
3016 
3017 	vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d",
3018 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero",
3019 	    vr->vvr_conf.vvc_adver_int);
3020 
3021 	TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) {
3022 		if ((size += ((af == AF_INET) ? sizeof (struct in_addr) :
3023 		    sizeof (struct in6_addr))) > buflen) {
3024 			vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size "
3025 			    "not big enough %d", vr->vvr_conf.vvc_name, size);
3026 			return (0);
3027 		}
3028 
3029 		if (af == AF_INET)
3030 			a4[nip++] = vip->vip_addr.in4.sin_addr;
3031 		else
3032 			a6[nip++] = vip->vip_addr.in6.sin6_addr;
3033 	}
3034 
3035 	if (nip == 0) {
3036 		vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP "
3037 		    "address", vr->vvr_conf.vvc_name);
3038 		return (0);
3039 	}
3040 
3041 	vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT;
3042 	vp->vp_vrid = vr->vvr_conf.vvc_vrid;
3043 	vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri;
3044 
3045 	rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff;
3046 	vp->vp_rsvd_adver_int = htons(rsvd_adver_int);
3047 	vp->vp_ipnum = nip;
3048 
3049 	/*
3050 	 * Set the checksum to 0 first, then caculate it.
3051 	 */
3052 	vp->vp_chksum = 0;
3053 	if (af == AF_INET) {
3054 		vp->vp_chksum = vrrp_cksum4(
3055 		    &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr,
3056 		    &vrrp_muladdr4.in4.sin_addr, size, vp);
3057 	} else {
3058 		vp->vp_chksum = vrrp_cksum6(
3059 		    &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3060 		    &vrrp_muladdr6.in6.sin6_addr, size, vp);
3061 	}
3062 
3063 	return (size);
3064 }
3065 
3066 /*
3067  * We need to build the IPv4 header on our own.
3068  */
3069 static vrrp_err_t
vrrpd_send_adv_v4(vrrp_vr_t * vr,uchar_t * buf,size_t len,boolean_t zero_pri)3070 vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3071 {
3072 	/* LINTED E_BAD_PTR_CAST_ALIGN */
3073 	struct ip *ip = (struct ip *)buf;
3074 	size_t plen;
3075 
3076 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name);
3077 
3078 	if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip),
3079 	    len - sizeof (struct ip), zero_pri)) == 0) {
3080 		return (VRRP_ETOOSMALL);
3081 	}
3082 
3083 	ip->ip_hl = sizeof (struct ip) >> 2;
3084 	ip->ip_v = IPV4_VERSION;
3085 	ip->ip_tos = 0;
3086 	plen += sizeof (struct ip);
3087 	ip->ip_len = htons(plen);
3088 	ip->ip_off = 0;
3089 	ip->ip_ttl = VRRP_IP_TTL;
3090 	ip->ip_p = IPPROTO_VRRP;
3091 	ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr;
3092 	ip->ip_dst = vrrp_muladdr4.in4.sin_addr;
3093 
3094 	/*
3095 	 * The kernel will set the IP cksum and the IPv4 identification.
3096 	 */
3097 	ip->ip_id = 0;
3098 	ip->ip_sum = 0;
3099 
3100 	if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0,
3101 	    (const struct sockaddr *)&vrrp_muladdr4,
3102 	    sizeof (struct sockaddr_in))) != plen) {
3103 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on "
3104 		    "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d",
3105 		    vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname,
3106 		    af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen);
3107 		return (VRRP_ESYS);
3108 	}
3109 
3110 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed",
3111 	    vr->vvr_conf.vvc_name);
3112 	return (VRRP_SUCCESS);
3113 }
3114 
3115 static vrrp_err_t
vrrpd_send_adv_v6(vrrp_vr_t * vr,uchar_t * buf,size_t len,boolean_t zero_pri)3116 vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri)
3117 {
3118 	struct msghdr msg6;
3119 	size_t hoplimit_space = 0;
3120 	size_t pktinfo_space = 0;
3121 	size_t bufspace = 0;
3122 	struct in6_pktinfo *pktinfop;
3123 	struct cmsghdr *cmsgp;
3124 	uchar_t *cmsg_datap;
3125 	struct iovec iov;
3126 	size_t plen;
3127 
3128 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name);
3129 
3130 	if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0)
3131 		return (VRRP_ETOOSMALL);
3132 
3133 	msg6.msg_control = NULL;
3134 	msg6.msg_controllen = 0;
3135 
3136 	hoplimit_space = sizeof (int);
3137 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3138 	    hoplimit_space + _MAX_ALIGNMENT;
3139 
3140 	pktinfo_space = sizeof (struct in6_pktinfo);
3141 	bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT +
3142 	    pktinfo_space + _MAX_ALIGNMENT;
3143 
3144 	/*
3145 	 * We need to temporarily set the msg6.msg_controllen to bufspace
3146 	 * (we will later trim it to actual length used). This is needed because
3147 	 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds.
3148 	 */
3149 	bufspace += sizeof (struct cmsghdr);
3150 	msg6.msg_controllen = bufspace;
3151 
3152 	msg6.msg_control = (struct cmsghdr *)malloc(bufspace);
3153 	if (msg6.msg_control == NULL) {
3154 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation "
3155 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3156 		return (VRRP_ENOMEM);
3157 	}
3158 
3159 	cmsgp = CMSG_FIRSTHDR(&msg6);
3160 
3161 	cmsgp->cmsg_level = IPPROTO_IPV6;
3162 	cmsgp->cmsg_type = IPV6_HOPLIMIT;
3163 	cmsg_datap = CMSG_DATA(cmsgp);
3164 	/* LINTED */
3165 	*(int *)cmsg_datap = VRRP_IP_TTL;
3166 	cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp;
3167 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3168 
3169 	cmsgp->cmsg_level = IPPROTO_IPV6;
3170 	cmsgp->cmsg_type = IPV6_PKTINFO;
3171 	cmsg_datap = CMSG_DATA(cmsgp);
3172 
3173 	/* LINTED */
3174 	pktinfop = (struct in6_pktinfo *)cmsg_datap;
3175 	/*
3176 	 * We don't know if pktinfop->ipi6_addr is aligned properly,
3177 	 * therefore let's use bcopy, instead of assignment.
3178 	 */
3179 	(void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr,
3180 	    &pktinfop->ipi6_addr, sizeof (struct in6_addr));
3181 
3182 	/*
3183 	 *  We can assume pktinfop->ipi6_ifindex is 32 bit aligned.
3184 	 */
3185 	pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex;
3186 	cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp;
3187 	cmsgp = CMSG_NXTHDR(&msg6, cmsgp);
3188 	msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control;
3189 
3190 	msg6.msg_name = &vrrp_muladdr6;
3191 	msg6.msg_namelen = sizeof (struct sockaddr_in6);
3192 
3193 	iov.iov_base = buf;
3194 	iov.iov_len = plen;
3195 	msg6.msg_iov = &iov;
3196 	msg6.msg_iovlen = 1;
3197 
3198 	if ((len = sendmsg(vr->vvr_vif->vvi_sockfd,
3199 	    (const struct msghdr *)&msg6, 0)) != plen) {
3200 		vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: "
3201 		    "%s expect %d sent %d", vr->vvr_conf.vvc_name,
3202 		    strerror(errno), plen, len);
3203 		(void) free(msg6.msg_control);
3204 		return (VRRP_ESYS);
3205 	}
3206 
3207 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed",
3208 	    vr->vvr_conf.vvc_name);
3209 	(void) free(msg6.msg_control);
3210 	return (VRRP_SUCCESS);
3211 }
3212 
3213 /*
3214  * Send the VRRP advertisement packets.
3215  */
3216 static vrrp_err_t
vrrpd_send_adv(vrrp_vr_t * vr,boolean_t zero_pri)3217 vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri)
3218 {
3219 	uint64_t buf[(IP_MAXPACKET + 1)/8];
3220 
3221 	vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)",
3222 	    vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero");
3223 
3224 	assert(vr->vvr_pif->vvi_pip != NULL);
3225 
3226 	if (vr->vvr_pif->vvi_pip == NULL) {
3227 		vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP "
3228 		    "address", vr->vvr_conf.vvc_name);
3229 		return (VRRP_EINVAL);
3230 	}
3231 
3232 	if (vr->vvr_conf.vvc_af == AF_INET) {
3233 		return (vrrpd_send_adv_v4(vr, (uchar_t *)buf,
3234 		    sizeof (buf), zero_pri));
3235 	} else {
3236 		return (vrrpd_send_adv_v6(vr, (uchar_t *)buf,
3237 		    sizeof (buf), zero_pri));
3238 	}
3239 }
3240 
3241 static void
vrrpd_process_adv(vrrp_vr_t * vr,vrrp_addr_t * from,vrrp_pkt_t * vp)3242 vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp)
3243 {
3244 	vrrp_vr_conf_t *conf = &vr->vvr_conf;
3245 	char		peer[INET6_ADDRSTRLEN];
3246 	char		local[INET6_ADDRSTRLEN];
3247 	int		addr_cmp;
3248 	uint16_t	peer_adver_int;
3249 
3250 	/* LINTED E_CONSTANT_CONDITION */
3251 	VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN,
3252 	    _B_FALSE);
3253 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name,
3254 	    peer);
3255 
3256 	if (vr->vvr_state <= VRRP_STATE_INIT) {
3257 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not "
3258 		    "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state));
3259 		return;
3260 	}
3261 
3262 	peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff);
3263 
3264 	/* LINTED E_CONSTANT_CONDITION */
3265 	VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr,
3266 	    local, INET6_ADDRSTRLEN, _B_FALSE);
3267 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri"
3268 	    "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local,
3269 	    vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer,
3270 	    vp->vp_prio, peer_adver_int);
3271 
3272 	addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from,
3273 	    &vr->vvr_pif->vvi_pip->vip_addr);
3274 	if (addr_cmp == 0) {
3275 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message",
3276 		    conf->vvc_name);
3277 		return;
3278 	} else if (conf->vvc_pri == vp->vp_prio) {
3279 		vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s"
3280 		    " than the local IP %s", conf->vvc_name, peer,
3281 		    addr_cmp > 0 ? "greater" : "less", local);
3282 	}
3283 
3284 	if (conf->vvc_pri == 255) {
3285 		vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address "
3286 		    "owner received advertisement from %s", conf->vvc_name,
3287 		    peer);
3288 		return;
3289 	}
3290 
3291 	(void) gettimeofday(&vr->vvr_peer_time, NULL);
3292 	(void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t));
3293 	vr->vvr_peer_prio = vp->vp_prio;
3294 	vr->vvr_peer_adver_int = peer_adver_int;
3295 
3296 	if (vr->vvr_state == VRRP_STATE_BACKUP) {
3297 		vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
3298 		if ((vp->vp_prio == VRRP_PRIO_ZERO) ||
3299 		    (conf->vvc_preempt == _B_FALSE ||
3300 		    vp->vp_prio >= conf->vvc_pri)) {
3301 			(void) iu_cancel_timer(vrrpd_timerq,
3302 			    vr->vvr_timer_id, NULL);
3303 			if (vp->vp_prio == VRRP_PRIO_ZERO) {
3304 				/* the master stops participating in VRRP */
3305 				vr->vvr_timeout = SKEW_TIME_VR(vr);
3306 			} else {
3307 				vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
3308 			}
3309 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3310 			    vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout,
3311 			    vr)) == -1) {
3312 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3313 				    "start vrrp_b2m_timeout(%d) failed",
3314 				    conf->vvc_name, vr->vvr_timeout);
3315 			} else {
3316 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3317 				    "start vrrp_b2m_timeout(%d)",
3318 				    conf->vvc_name, vr->vvr_timeout);
3319 			}
3320 		}
3321 	} else if (vr->vvr_state == VRRP_STATE_MASTER) {
3322 		if (vp->vp_prio == VRRP_PRIO_ZERO) {
3323 			(void) vrrpd_send_adv(vr, _B_FALSE);
3324 			(void) iu_cancel_timer(vrrpd_timerq,
3325 			    vr->vvr_timer_id, NULL);
3326 			if ((vr->vvr_timer_id = iu_schedule_timer_ms(
3327 			    vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout,
3328 			    vr)) == -1) {
3329 				vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): "
3330 				    "start vrrp_adv_timeout(%d) failed",
3331 				    conf->vvc_name, vr->vvr_timeout);
3332 			} else {
3333 				vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): "
3334 				    "start vrrp_adv_timeout(%d)",
3335 				    conf->vvc_name, vr->vvr_timeout);
3336 			}
3337 		} else if (vp->vp_prio > conf->vvc_pri ||
3338 		    (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) {
3339 			(void) vrrpd_state_m2b(vr);
3340 		}
3341 	} else {
3342 		assert(_B_FALSE);
3343 	}
3344 }
3345 
3346 static vrrp_err_t
vrrpd_process_vrrp(vrrp_intf_t * pif,vrrp_pkt_t * vp,size_t len,vrrp_addr_t * from)3347 vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len,
3348     vrrp_addr_t *from)
3349 {
3350 	vrrp_vr_t	*vr;
3351 	uint8_t		vers_type;
3352 	uint16_t	saved_cksum, cksum;
3353 	char		peer[INET6_ADDRSTRLEN];
3354 
3355 	/* LINTED E_CONSTANT_CONDITION */
3356 	VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE);
3357 	vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname,
3358 	    peer);
3359 
3360 	if (len < sizeof (vrrp_pkt_t)) {
3361 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message "
3362 		    "length %d", len);
3363 		return (VRRP_EINVAL);
3364 	}
3365 
3366 	/*
3367 	 * Verify: VRRP version number and packet type.
3368 	 */
3369 	vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4);
3370 	if (vers_type != VRRP_VERSION) {
3371 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported "
3372 		    "version %d", pif->vvi_ifname, vers_type);
3373 		return (VRRP_EINVAL);
3374 	}
3375 
3376 	if (vp->vp_ipnum == 0) {
3377 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count",
3378 		    pif->vvi_ifname);
3379 		return (VRRP_EINVAL);
3380 	}
3381 
3382 	if (len - sizeof (vrrp_pkt_t) !=
3383 	    vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) :
3384 	    sizeof (struct in6_addr))) {
3385 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count"
3386 		    " %d", pif->vvi_ifname, vp->vp_ipnum);
3387 		return (VRRP_EINVAL);
3388 	}
3389 
3390 	vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK);
3391 
3392 	/*
3393 	 * verify: VRRP checksum. Note that vrrp_cksum returns network byte
3394 	 * order checksum value;
3395 	 */
3396 	saved_cksum = vp->vp_chksum;
3397 	vp->vp_chksum = 0;
3398 	if (pif->vvi_af == AF_INET) {
3399 		cksum = vrrp_cksum4(&from->in4.sin_addr,
3400 		    &vrrp_muladdr4.in4.sin_addr, len, vp);
3401 	} else {
3402 		cksum = vrrp_cksum6(&from->in6.sin6_addr,
3403 		    &vrrp_muladdr6.in6.sin6_addr, len, vp);
3404 	}
3405 
3406 	if (cksum != saved_cksum) {
3407 		vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid "
3408 		    "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname,
3409 		    cksum, saved_cksum);
3410 		return (VRRP_EINVAL);
3411 	}
3412 
3413 	if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid,
3414 	    pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) {
3415 		vrrpd_process_adv(vr, from, vp);
3416 	} else {
3417 		vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) "
3418 		    "not configured", pif->vvi_ifname, vp->vp_vrid,
3419 		    af_str(pif->vvi_af));
3420 	}
3421 	return (VRRP_SUCCESS);
3422 }
3423 
3424 /*
3425  * IPv4 socket, the IPv4 header is included.
3426  */
3427 static vrrp_err_t
vrrpd_process_adv_v4(vrrp_intf_t * pif,struct msghdr * msgp,size_t len)3428 vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3429 {
3430 	char		abuf[INET6_ADDRSTRLEN];
3431 	struct ip	*ip;
3432 
3433 	vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)",
3434 	    pif->vvi_ifname, len);
3435 
3436 	ip = (struct ip *)msgp->msg_iov->iov_base;
3437 
3438 	/* Sanity check */
3439 	if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) {
3440 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length "
3441 		    "%d", pif->vvi_ifname, len);
3442 		return (VRRP_EINVAL);
3443 	}
3444 
3445 	assert(ip->ip_v == IPV4_VERSION);
3446 	assert(ip->ip_p == IPPROTO_VRRP);
3447 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in));
3448 
3449 	if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) {
3450 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3451 		    "destination %s", pif->vvi_ifname,
3452 		    inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf)));
3453 		return (VRRP_EINVAL);
3454 	}
3455 
3456 	if (ip->ip_ttl != VRRP_IP_TTL) {
3457 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3458 		    "ttl %d", pif->vvi_ifname, ip->ip_ttl);
3459 		return (VRRP_EINVAL);
3460 	}
3461 
3462 	/*
3463 	 * Note that the ip_len contains only the IP payload length.
3464 	 */
3465 	return (vrrpd_process_vrrp(pif,
3466 	    /* LINTED E_BAD_PTR_CAST_ALIGN */
3467 	    (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len),
3468 	    (vrrp_addr_t *)msgp->msg_name));
3469 }
3470 
3471 /*
3472  * IPv6 socket, check the ancillary_data.
3473  */
3474 static vrrp_err_t
vrrpd_process_adv_v6(vrrp_intf_t * pif,struct msghdr * msgp,size_t len)3475 vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len)
3476 {
3477 	struct cmsghdr		*cmsgp;
3478 	uchar_t			*cmsg_datap;
3479 	struct in6_pktinfo	*pktinfop;
3480 	char			abuf[INET6_ADDRSTRLEN];
3481 	int			ttl;
3482 
3483 	vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)",
3484 	    pif->vvi_ifname, len);
3485 
3486 	/* Sanity check */
3487 	if (len < sizeof (vrrp_pkt_t)) {
3488 		vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length "
3489 		    "%d", pif->vvi_ifname, len);
3490 		return (VRRP_EINVAL);
3491 	}
3492 
3493 	assert(msgp->msg_namelen == sizeof (struct sockaddr_in6));
3494 
3495 	for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL;
3496 	    cmsgp = CMSG_NXTHDR(msgp, cmsgp)) {
3497 		assert(cmsgp->cmsg_level == IPPROTO_IPV6);
3498 		cmsg_datap = CMSG_DATA(cmsgp);
3499 
3500 		switch (cmsgp->cmsg_type) {
3501 		case IPV6_HOPLIMIT:
3502 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3503 			if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL)
3504 				break;
3505 
3506 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3507 			    "ttl %d", pif->vvi_ifname, ttl);
3508 			return (VRRP_EINVAL);
3509 		case IPV6_PKTINFO:
3510 			/* LINTED E_BAD_PTR_CAST_ALIGN */
3511 			pktinfop = (struct in6_pktinfo *)cmsg_datap;
3512 			if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr,
3513 			    &vrrp_muladdr6.in6.sin6_addr)) {
3514 				break;
3515 			}
3516 
3517 			vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid "
3518 			    "destination %s", pif->vvi_ifname,
3519 			    inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf,
3520 			    sizeof (abuf)));
3521 			return (VRRP_EINVAL);
3522 		}
3523 	}
3524 
3525 	return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len,
3526 	    msgp->msg_name));
3527 }
3528 
3529 /* ARGSUSED */
3530 static void
vrrpd_sock_handler(iu_eh_t * eh,int s,short events,iu_event_id_t id,void * arg)3531 vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id,
3532     void *arg)
3533 {
3534 	struct msghdr		msg;
3535 	vrrp_addr_t		from;
3536 	uint64_t		buf[(IP_MAXPACKET + 1)/8];
3537 	uint64_t		ancillary_data[(IP_MAXPACKET + 1)/8];
3538 	vrrp_intf_t		*pif = arg;
3539 	int			af = pif->vvi_af;
3540 	int			len;
3541 	struct iovec		iov;
3542 
3543 	vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname);
3544 
3545 	msg.msg_name = (struct sockaddr *)&from;
3546 	msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) :
3547 	    sizeof (struct sockaddr_in6);
3548 	iov.iov_base = (char *)buf;
3549 	iov.iov_len = sizeof (buf);
3550 	msg.msg_iov = &iov;
3551 	msg.msg_iovlen = 1;
3552 	msg.msg_control = ancillary_data;
3553 	msg.msg_controllen = sizeof (ancillary_data);
3554 
3555 	if ((len = recvmsg(s, &msg, 0)) == -1) {
3556 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) "
3557 		    "failed: %s", pif->vvi_ifname, strerror(errno));
3558 		return;
3559 	}
3560 
3561 	/*
3562 	 * Ignore packets whose control buffers that don't fit
3563 	 */
3564 	if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) {
3565 		vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not "
3566 		    "big enough", pif->vvi_ifname);
3567 		return;
3568 	}
3569 
3570 	if (af == AF_INET)
3571 		(void) vrrpd_process_adv_v4(pif, &msg, len);
3572 	else
3573 		(void) vrrpd_process_adv_v6(pif, &msg, len);
3574 }
3575 
3576 /*
3577  * Create the socket which is used to receive VRRP packets. Virtual routers
3578  * that configured on the same physical interface share the same socket.
3579  */
3580 static vrrp_err_t
vrrpd_init_rxsock(vrrp_vr_t * vr)3581 vrrpd_init_rxsock(vrrp_vr_t *vr)
3582 {
3583 	vrrp_intf_t *pif;	/* Physical interface used to recv packets */
3584 	struct group_req greq;
3585 	struct sockaddr_storage *muladdr;
3586 	int af, proto;
3587 	int on = 1;
3588 	vrrp_err_t err = VRRP_SUCCESS;
3589 
3590 	vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name);
3591 
3592 	/*
3593 	 * The RX sockets may already been initialized.
3594 	 */
3595 	if ((pif = vr->vvr_pif) != NULL) {
3596 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s",
3597 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3598 		assert(pif->vvi_sockfd != -1);
3599 		return (VRRP_SUCCESS);
3600 	}
3601 
3602 	/*
3603 	 * If no IP addresses configured on the primary interface,
3604 	 * return failure.
3605 	 */
3606 	af = vr->vvr_conf.vvc_af;
3607 	pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af);
3608 	if (pif == NULL) {
3609 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address "
3610 		    "over %s/%s", vr->vvr_conf.vvc_name,
3611 		    vr->vvr_conf.vvc_link, af_str(af));
3612 		return (VRRP_ENOPRIM);
3613 	}
3614 
3615 	proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6);
3616 	if (pif->vvi_nvr++ == 0) {
3617 		assert(pif->vvi_sockfd < 0);
3618 		pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP);
3619 		if (pif->vvi_sockfd < 0) {
3620 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() "
3621 			    "failed %s", vr->vvr_conf.vvc_name,
3622 			    strerror(errno));
3623 			err = VRRP_ESYS;
3624 			goto done;
3625 		}
3626 
3627 		/*
3628 		 * Join the multicast group to receive VRRP packets.
3629 		 */
3630 		if (af == AF_INET) {
3631 			muladdr = (struct sockaddr_storage *)
3632 			    (void *)&vrrp_muladdr4;
3633 		} else {
3634 			muladdr = (struct sockaddr_storage *)
3635 			    (void *)&vrrp_muladdr6;
3636 		}
3637 
3638 		greq.gr_interface = pif->vvi_ifindex;
3639 		(void) memcpy(&greq.gr_group, muladdr,
3640 		    sizeof (struct sockaddr_storage));
3641 		if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq,
3642 		    sizeof (struct group_req)) < 0) {
3643 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3644 			    "join_group(%d) failed: %s", vr->vvr_conf.vvc_name,
3645 			    pif->vvi_ifindex, strerror(errno));
3646 			err = VRRP_ESYS;
3647 			goto done;
3648 		} else {
3649 			vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): "
3650 			    "join_group(%d) succeeded", vr->vvr_conf.vvc_name,
3651 			    pif->vvi_ifindex);
3652 		}
3653 
3654 		/*
3655 		 * Unlike IPv4, the IPv6 raw socket does not pass the IP header
3656 		 * when a packet is received. Call setsockopt() to receive such
3657 		 * information.
3658 		 */
3659 		if (af == AF_INET6) {
3660 			/*
3661 			 * Enable receipt of destination address info
3662 			 */
3663 			if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO,
3664 			    (char *)&on, sizeof (on)) < 0) {
3665 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3666 				    "enable recvpktinfo failed: %s",
3667 				    vr->vvr_conf.vvc_name, strerror(errno));
3668 				err = VRRP_ESYS;
3669 				goto done;
3670 			}
3671 
3672 			/*
3673 			 * Enable receipt of hoplimit info
3674 			 */
3675 			if (setsockopt(pif->vvi_sockfd, proto,
3676 			    IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) {
3677 				vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3678 				    "enable recvhoplimit failed: %s",
3679 				    vr->vvr_conf.vvc_name, strerror(errno));
3680 				err = VRRP_ESYS;
3681 				goto done;
3682 			}
3683 		}
3684 
3685 		if ((pif->vvi_eid = iu_register_event(vrrpd_eh,
3686 		    pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) {
3687 			vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): "
3688 			    "iu_register_event() failed",
3689 			    vr->vvr_conf.vvc_name);
3690 			err = VRRP_ESYS;
3691 			goto done;
3692 		}
3693 	} else {
3694 		vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already "
3695 		    "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname,
3696 		    pif->vvi_nvr);
3697 		assert(IS_PRIMARY_INTF(pif));
3698 	}
3699 
3700 done:
3701 	vr->vvr_pif = pif;
3702 	if (err != VRRP_SUCCESS)
3703 		vrrpd_fini_rxsock(vr);
3704 
3705 	return (err);
3706 }
3707 
3708 /*
3709  * Delete the socket which is used to receive VRRP packets for the given
3710  * VRRP router. Since all virtual routers that configured on the same
3711  * physical interface share the same socket, the socket is only closed
3712  * when the last VRRP router share this socket is deleted.
3713  */
3714 static void
vrrpd_fini_rxsock(vrrp_vr_t * vr)3715 vrrpd_fini_rxsock(vrrp_vr_t *vr)
3716 {
3717 	vrrp_intf_t	*pif = vr->vvr_pif;
3718 
3719 	vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name);
3720 
3721 	if (pif == NULL)
3722 		return;
3723 
3724 	if (--pif->vvi_nvr == 0) {
3725 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s",
3726 		    vr->vvr_conf.vvc_name, pif->vvi_ifname);
3727 		(void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL);
3728 		(void) close(pif->vvi_sockfd);
3729 		pif->vvi_pip = NULL;
3730 		pif->vvi_sockfd = -1;
3731 		pif->vvi_eid = -1;
3732 	} else {
3733 		vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d",
3734 		    vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr);
3735 	}
3736 	vr->vvr_pif = NULL;
3737 }
3738 
3739 /*
3740  * Create the socket which is used to send VRRP packets. Further, set
3741  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3742  */
3743 static vrrp_err_t
vrrpd_init_txsock(vrrp_vr_t * vr)3744 vrrpd_init_txsock(vrrp_vr_t *vr)
3745 {
3746 	int		af;
3747 	vrrp_intf_t	*vif;
3748 	vrrp_err_t	err;
3749 
3750 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name);
3751 
3752 	if (vr->vvr_vif != NULL) {
3753 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s",
3754 		    vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname);
3755 		return (VRRP_SUCCESS);
3756 	}
3757 
3758 	af = vr->vvr_conf.vvc_af;
3759 	if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) {
3760 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over "
3761 		    "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af));
3762 		return (VRRP_ENOVIRT);
3763 	}
3764 
3765 	vr->vvr_vif = vif;
3766 	if (vr->vvr_conf.vvc_af == AF_INET)
3767 		err = vrrpd_init_txsock_v4(vr);
3768 	else
3769 		err = vrrpd_init_txsock_v6(vr);
3770 
3771 	if (err != VRRP_SUCCESS)
3772 		goto done;
3773 
3774 	/*
3775 	 * The interface should start with IFF_NOACCEPT flag not set, only
3776 	 * call this function when the VRRP router requires IFF_NOACCEPT.
3777 	 */
3778 	if (!vr->vvr_conf.vvc_accept)
3779 		err = vrrpd_set_noaccept(vr, _B_TRUE);
3780 
3781 done:
3782 	if (err != VRRP_SUCCESS) {
3783 		(void) close(vif->vvi_sockfd);
3784 		vif->vvi_sockfd = -1;
3785 		vr->vvr_vif = NULL;
3786 	}
3787 
3788 	return (err);
3789 }
3790 
3791 /*
3792  * Create the IPv4 socket which is used to send VRRP packets. Note that
3793  * the destination MAC address of VRRP advertisement must be the virtual
3794  * MAC address, so we specify the output interface to be the specific VNIC.
3795  */
3796 static vrrp_err_t
vrrpd_init_txsock_v4(vrrp_vr_t * vr)3797 vrrpd_init_txsock_v4(vrrp_vr_t *vr)
3798 {
3799 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3800 	vrrp_ip_t *vip;		/* The first IP over the VNIC */
3801 	int on = 1;
3802 	char off = 0;
3803 	vrrp_err_t err = VRRP_SUCCESS;
3804 	char abuf[INET6_ADDRSTRLEN];
3805 
3806 	vif = vr->vvr_vif;
3807 	assert(vr->vvr_conf.vvc_af == AF_INET);
3808 	assert(vif != NULL);
3809 
3810 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s",
3811 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3812 
3813 	if (vif->vvi_sockfd != -1) {
3814 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done "
3815 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3816 		return (VRRP_SUCCESS);
3817 	}
3818 
3819 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3820 	if (vif->vvi_sockfd < 0) {
3821 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() "
3822 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3823 		err = VRRP_ESYS;
3824 		goto done;
3825 	}
3826 
3827 	/*
3828 	 * Include the IP header, so that we can specify the IP address/ttl.
3829 	 */
3830 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on,
3831 	    sizeof (on)) < 0) {
3832 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl "
3833 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3834 		err = VRRP_ESYS;
3835 		goto done;
3836 	}
3837 
3838 	/*
3839 	 * Disable multicast loopback.
3840 	 */
3841 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off,
3842 	    sizeof (char)) == -1) {
3843 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable "
3844 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3845 		    strerror(errno));
3846 		err = VRRP_ESYS;
3847 		goto done;
3848 	}
3849 
3850 	vip = TAILQ_FIRST(&vif->vvi_iplist);
3851 	/* LINTED E_CONSTANT_CONDITION */
3852 	VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN,
3853 	    _B_FALSE);
3854 
3855 	/*
3856 	 * Set the output interface to send the VRRP packet.
3857 	 */
3858 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF,
3859 	    &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) {
3860 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3861 		    "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno));
3862 		err = VRRP_ESYS;
3863 	} else {
3864 		vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) "
3865 		    "succeed", vr->vvr_conf.vvc_name, abuf);
3866 	}
3867 
3868 done:
3869 	if (err != VRRP_SUCCESS) {
3870 		(void) close(vif->vvi_sockfd);
3871 		vif->vvi_sockfd = -1;
3872 	}
3873 
3874 	return (err);
3875 }
3876 
3877 /*
3878  * Create the IPv6 socket which is used to send VRRP packets. Note that
3879  * the destination must be the virtual MAC address, so we specify the output
3880  * interface to be the specific VNIC.
3881  */
3882 static vrrp_err_t
vrrpd_init_txsock_v6(vrrp_vr_t * vr)3883 vrrpd_init_txsock_v6(vrrp_vr_t *vr)
3884 {
3885 	vrrp_intf_t *vif;	/* VNIC interface used to send packets */
3886 	int off = 0, ttl = VRRP_IP_TTL;
3887 	vrrp_err_t err = VRRP_SUCCESS;
3888 
3889 	vif = vr->vvr_vif;
3890 	assert(vr->vvr_conf.vvc_af == AF_INET6);
3891 	assert(vif != NULL);
3892 
3893 	vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s",
3894 	    vr->vvr_conf.vvc_name, vif->vvi_ifname);
3895 
3896 	if (vif->vvi_sockfd != -1) {
3897 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done "
3898 		    "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname);
3899 		return (VRRP_SUCCESS);
3900 	}
3901 
3902 	vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP);
3903 	if (vif->vvi_sockfd < 0) {
3904 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() "
3905 		    "failed: %s", vr->vvr_conf.vvc_name, strerror(errno));
3906 		err = VRRP_ESYS;
3907 		goto done;
3908 	}
3909 
3910 	/*
3911 	 * Disable multicast loopback.
3912 	 */
3913 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP,
3914 	    &off, sizeof (int)) == -1) {
3915 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable "
3916 		    "multicast_loop failed: %s", vr->vvr_conf.vvc_name,
3917 		    strerror(errno));
3918 		err = VRRP_ESYS;
3919 		goto done;
3920 	}
3921 
3922 	/*
3923 	 * Set the multicast TTL.
3924 	 */
3925 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS,
3926 	    &ttl, sizeof (int)) == -1) {
3927 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable "
3928 		    "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name,
3929 		    ttl, strerror(errno));
3930 		err = VRRP_ESYS;
3931 		goto done;
3932 	}
3933 
3934 	/*
3935 	 * Set the output interface to send the VRRP packet.
3936 	 */
3937 	if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF,
3938 	    &vif->vvi_ifindex, sizeof (uint32_t)) < 0) {
3939 		vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) "
3940 		    "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex,
3941 		    strerror(errno));
3942 		err = VRRP_ESYS;
3943 	} else {
3944 		vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)"
3945 		    " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex);
3946 	}
3947 
3948 done:
3949 	if (err != VRRP_SUCCESS) {
3950 		(void) close(vif->vvi_sockfd);
3951 		vif->vvi_sockfd = -1;
3952 	}
3953 
3954 	return (err);
3955 }
3956 
3957 /*
3958  * Delete the socket which is used to send VRRP packets. Further, clear
3959  * the IFF_NOACCEPT flag based on the VRRP router's accept mode.
3960  */
3961 static void
vrrpd_fini_txsock(vrrp_vr_t * vr)3962 vrrpd_fini_txsock(vrrp_vr_t *vr)
3963 {
3964 	vrrp_intf_t *vif = vr->vvr_vif;
3965 
3966 	vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name);
3967 
3968 	if (vif != NULL) {
3969 		if (!vr->vvr_conf.vvc_accept)
3970 			(void) vrrpd_set_noaccept(vr, _B_FALSE);
3971 		(void) close(vif->vvi_sockfd);
3972 		vif->vvi_sockfd = -1;
3973 		vr->vvr_vif = NULL;
3974 	}
3975 }
3976 
3977 /*
3978  * Given the the pseudo header cksum value (sum), caculate the cksum with
3979  * the rest of VRRP packet.
3980  */
3981 static uint16_t
in_cksum(int sum,uint16_t plen,void * p)3982 in_cksum(int sum, uint16_t plen, void *p)
3983 {
3984 	int nleft;
3985 	uint16_t *w;
3986 	uint16_t answer;
3987 	uint16_t odd_byte = 0;
3988 
3989 	nleft = plen;
3990 	w = (uint16_t *)p;
3991 	while (nleft > 1) {
3992 		sum += *w++;
3993 		nleft -= 2;
3994 	}
3995 
3996 	/* mop up an odd byte, if necessary */
3997 	if (nleft == 1) {
3998 		*(uchar_t *)(&odd_byte) = *(uchar_t *)w;
3999 		sum += odd_byte;
4000 	}
4001 
4002 	/*
4003 	 * add back carry outs from top 16 bits to low 16 bits
4004 	 */
4005 	sum = (sum >> 16) + (sum & 0xffff);	/* add hi 16 to low 16 */
4006 	sum += (sum >> 16);			/* add carry */
4007 	answer = ~sum;				/* truncate to 16 bits */
4008 	return (answer == 0 ? ~0 : answer);
4009 }
4010 
4011 /* Pseudo header for v4 */
4012 struct pshv4 {
4013 	struct in_addr	ph4_src;
4014 	struct in_addr	ph4_dst;
4015 	uint8_t		ph4_zero;	/* always zero */
4016 	uint8_t		ph4_protocol;	/* protocol used, IPPROTO_VRRP */
4017 	uint16_t	ph4_len;	/* VRRP payload len */
4018 };
4019 
4020 /*
4021  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4022  * packet length (in the host byte order), and both IP source and destination
4023  * addresses are in the network byte order.
4024  */
4025 static uint16_t
vrrp_cksum4(struct in_addr * src,struct in_addr * dst,uint16_t plen,vrrp_pkt_t * vp)4026 vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen,
4027     vrrp_pkt_t *vp)
4028 {
4029 	struct pshv4 ph4;
4030 	int nleft;
4031 	uint16_t *w;
4032 	int sum = 0;
4033 
4034 	ph4.ph4_src = *src;
4035 	ph4.ph4_dst = *dst;
4036 	ph4.ph4_zero = 0;
4037 	ph4.ph4_protocol = IPPROTO_VRRP;
4038 	ph4.ph4_len = htons(plen);
4039 
4040 	/*
4041 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4042 	 *  we add sequential 16 bit words to it, and at the end, fold
4043 	 *  back all the carry bits from the top 16 bits into the lower
4044 	 *  16 bits.
4045 	 */
4046 	nleft = sizeof (struct pshv4);
4047 	w = (uint16_t *)&ph4;
4048 	while (nleft > 0) {
4049 		sum += *w++;
4050 		nleft -= 2;
4051 	}
4052 
4053 	return (in_cksum(sum, plen, vp));
4054 }
4055 
4056 /* Pseudo header for v6 */
4057 struct pshv6 {
4058 	struct in6_addr	ph6_src;
4059 	struct in6_addr	ph6_dst;
4060 	uint32_t	ph6_len;	/* VRRP payload len */
4061 	uint32_t	ph6_zero : 24,
4062 			ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */
4063 };
4064 
4065 /*
4066  * Checksum routine for VRRP checksum. Note that plen is the upper-layer
4067  * packet length (in the host byte order), and both IP source and destination
4068  * addresses are in the network byte order.
4069  */
4070 static uint16_t
vrrp_cksum6(struct in6_addr * src,struct in6_addr * dst,uint16_t plen,vrrp_pkt_t * vp)4071 vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen,
4072     vrrp_pkt_t *vp)
4073 {
4074 	struct pshv6 ph6;
4075 	int nleft;
4076 	uint16_t *w;
4077 	int sum = 0;
4078 
4079 	ph6.ph6_src = *src;
4080 	ph6.ph6_dst = *dst;
4081 	ph6.ph6_zero = 0;
4082 	ph6.ph6_protocol = IPPROTO_VRRP;
4083 	ph6.ph6_len = htonl((uint32_t)plen);
4084 
4085 	/*
4086 	 *  Our algorithm is simple, using a 32 bit accumulator (sum),
4087 	 *  we add sequential 16 bit words to it, and at the end, fold
4088 	 *  back all the carry bits from the top 16 bits into the lower
4089 	 *  16 bits.
4090 	 */
4091 	nleft = sizeof (struct pshv6);
4092 	w = (uint16_t *)&ph6;
4093 	while (nleft > 0) {
4094 		sum += *w++;
4095 		nleft -= 2;
4096 	}
4097 
4098 	return (in_cksum(sum, plen, vp));
4099 }
4100 
4101 vrrp_err_t
vrrpd_state_i2m(vrrp_vr_t * vr)4102 vrrpd_state_i2m(vrrp_vr_t *vr)
4103 {
4104 	vrrp_err_t	err;
4105 
4106 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name);
4107 
4108 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr);
4109 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4110 		return (err);
4111 
4112 	(void) vrrpd_send_adv(vr, _B_FALSE);
4113 
4114 	vr->vvr_err = VRRP_SUCCESS;
4115 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4116 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4117 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4118 		vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer");
4119 		return (VRRP_ESYS);
4120 	} else {
4121 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start "
4122 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4123 		    vr->vvr_timeout);
4124 	}
4125 	return (VRRP_SUCCESS);
4126 }
4127 
4128 vrrp_err_t
vrrpd_state_i2b(vrrp_vr_t * vr)4129 vrrpd_state_i2b(vrrp_vr_t *vr)
4130 {
4131 	vrrp_err_t	err;
4132 
4133 	vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name);
4134 
4135 	vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr);
4136 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4137 		return (err);
4138 
4139 	/*
4140 	 * Reinitialize the Master advertisement interval to be the configured
4141 	 * value.
4142 	 */
4143 	vr->vvr_err = VRRP_SUCCESS;
4144 	vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int;
4145 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4146 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4147 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4148 		vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer");
4149 		return (VRRP_ESYS);
4150 	} else {
4151 		vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start "
4152 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4153 		    vr->vvr_timeout);
4154 	}
4155 	return (VRRP_SUCCESS);
4156 }
4157 
4158 void
vrrpd_state_m2i(vrrp_vr_t * vr)4159 vrrpd_state_m2i(vrrp_vr_t *vr)
4160 {
4161 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name);
4162 
4163 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr);
4164 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4165 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4166 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4167 }
4168 
4169 void
vrrpd_state_b2i(vrrp_vr_t * vr)4170 vrrpd_state_b2i(vrrp_vr_t *vr)
4171 {
4172 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name);
4173 
4174 	bzero(&vr->vvr_peer, sizeof (vrrp_peer_t));
4175 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4176 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr);
4177 	(void) vrrpd_virtualip_update(vr, _B_TRUE);
4178 }
4179 
4180 /* ARGSUSED */
4181 static void
vrrp_b2m_timeout(iu_tq_t * tq,void * arg)4182 vrrp_b2m_timeout(iu_tq_t *tq, void *arg)
4183 {
4184 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4185 
4186 	vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name);
4187 	(void) vrrpd_state_b2m(vr);
4188 }
4189 
4190 /* ARGSUSED */
4191 static void
vrrp_adv_timeout(iu_tq_t * tq,void * arg)4192 vrrp_adv_timeout(iu_tq_t *tq, void *arg)
4193 {
4194 	vrrp_vr_t *vr = (vrrp_vr_t *)arg;
4195 
4196 	vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name);
4197 
4198 	(void) vrrpd_send_adv(vr, _B_FALSE);
4199 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4200 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4201 		vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed",
4202 		    vr->vvr_conf.vvc_name);
4203 	} else {
4204 		vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start "
4205 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4206 		    vr->vvr_timeout);
4207 	}
4208 }
4209 
4210 vrrp_err_t
vrrpd_state_b2m(vrrp_vr_t * vr)4211 vrrpd_state_b2m(vrrp_vr_t *vr)
4212 {
4213 	vrrp_err_t	err;
4214 
4215 	vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name);
4216 
4217 	vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr);
4218 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4219 		return (err);
4220 	(void) vrrpd_send_adv(vr, _B_FALSE);
4221 
4222 	vr->vvr_timeout = vr->vvr_conf.vvc_adver_int;
4223 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4224 	    vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) {
4225 		vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed",
4226 		    vr->vvr_conf.vvc_name);
4227 		return (VRRP_ESYS);
4228 	} else {
4229 		vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start "
4230 		    "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name,
4231 		    vr->vvr_timeout);
4232 	}
4233 	return (VRRP_SUCCESS);
4234 }
4235 
4236 vrrp_err_t
vrrpd_state_m2b(vrrp_vr_t * vr)4237 vrrpd_state_m2b(vrrp_vr_t *vr)
4238 {
4239 	vrrp_err_t	err;
4240 
4241 	vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name);
4242 
4243 	vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr);
4244 	if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS)
4245 		return (err);
4246 
4247 	/*
4248 	 * Cancel the adver_timer.
4249 	 */
4250 	vr->vvr_master_adver_int = vr->vvr_peer_adver_int;
4251 	(void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL);
4252 	vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr);
4253 	if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq,
4254 	    vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) {
4255 		vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed",
4256 		    vr->vvr_conf.vvc_name);
4257 	} else {
4258 		vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start "
4259 		    "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name,
4260 		    vr->vvr_timeout);
4261 	}
4262 	return (VRRP_SUCCESS);
4263 }
4264 
4265 /*
4266  * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router
4267  * based on its access mode.
4268  */
4269 static vrrp_err_t
vrrpd_set_noaccept(vrrp_vr_t * vr,boolean_t on)4270 vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on)
4271 {
4272 	vrrp_intf_t *vif = vr->vvr_vif;
4273 	uint64_t curr_flags;
4274 	struct lifreq lifr;
4275 	int s;
4276 
4277 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4278 	    vr->vvr_conf.vvc_name, on ? "on" : "off");
4279 
4280 	/*
4281 	 * Possibly no virtual address exists on this VRRP router yet.
4282 	 */
4283 	if (vif == NULL)
4284 		return (VRRP_SUCCESS);
4285 
4286 	vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)",
4287 	    vif->vvi_ifname, vrrp_state2str(vr->vvr_state));
4288 
4289 	s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4290 	(void) strncpy(lifr.lifr_name, vif->vvi_ifname,
4291 	    sizeof (lifr.lifr_name));
4292 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4293 		if (errno != ENXIO && errno != ENOENT) {
4294 			vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): "
4295 			    "SIOCGLIFFLAGS on %s failed: %s",
4296 			    vif->vvi_ifname, strerror(errno));
4297 		}
4298 		return (VRRP_ESYS);
4299 	}
4300 
4301 	curr_flags = lifr.lifr_flags;
4302 	if (on)
4303 		lifr.lifr_flags |= IFF_NOACCEPT;
4304 	else
4305 		lifr.lifr_flags &= ~IFF_NOACCEPT;
4306 
4307 	if (lifr.lifr_flags != curr_flags) {
4308 		if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4309 			if (errno != ENXIO && errno != ENOENT) {
4310 				vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): "
4311 				    "SIOCSLIFFLAGS 0x%llx on %s failed: %s",
4312 				    on ? "no_accept" : "accept",
4313 				    lifr.lifr_flags, vif->vvi_ifname,
4314 				    strerror(errno));
4315 			}
4316 			return (VRRP_ESYS);
4317 		}
4318 	}
4319 	return (VRRP_SUCCESS);
4320 }
4321 
4322 static vrrp_err_t
vrrpd_virtualip_updateone(vrrp_intf_t * vif,vrrp_ip_t * ip,boolean_t checkonly)4323 vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly)
4324 {
4325 	vrrp_state_t	state = vif->vvi_vr_state;
4326 	struct lifreq	lifr;
4327 	char		abuf[INET6_ADDRSTRLEN];
4328 	int		af = vif->vvi_af;
4329 	uint64_t	curr_flags;
4330 	int		s;
4331 
4332 	assert(IS_VIRTUAL_INTF(vif));
4333 
4334 	/* LINTED E_CONSTANT_CONDITION */
4335 	VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE);
4336 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)",
4337 	    vif->vvi_ifname, abuf, checkonly ? ", checkonly" : "");
4338 
4339 	s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd;
4340 	(void) strncpy(lifr.lifr_name, ip->vip_lifname,
4341 	    sizeof (lifr.lifr_name));
4342 	if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) {
4343 		if (errno != ENXIO && errno != ENOENT) {
4344 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): "
4345 			    "SIOCGLIFFLAGS on %s/%s failed: %s",
4346 			    vif->vvi_ifname, lifr.lifr_name, abuf,
4347 			    strerror(errno));
4348 		}
4349 		return (VRRP_ESYS);
4350 	}
4351 
4352 	curr_flags = lifr.lifr_flags;
4353 	if (state == VRRP_STATE_MASTER)
4354 		lifr.lifr_flags |= IFF_UP;
4355 	else
4356 		lifr.lifr_flags &= ~IFF_UP;
4357 
4358 	if (lifr.lifr_flags == curr_flags)
4359 		return (VRRP_SUCCESS);
4360 
4361 	if (checkonly) {
4362 		vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s",
4363 		    ip->vip_lifname, abuf,
4364 		    state == VRRP_STATE_MASTER ? "down" : "up");
4365 		return (VRRP_ESYS);
4366 	} else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) {
4367 		if (errno != ENXIO && errno != ENOENT) {
4368 			vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): "
4369 			    "bring %s %s/%s failed: %s",
4370 			    vif->vvi_ifname, vrrp_state2str(state),
4371 			    state == VRRP_STATE_MASTER ? "up" : "down",
4372 			    ip->vip_lifname, abuf, strerror(errno));
4373 		}
4374 		return (VRRP_ESYS);
4375 	}
4376 	return (VRRP_SUCCESS);
4377 }
4378 
4379 static vrrp_err_t
vrrpd_virtualip_update(vrrp_vr_t * vr,boolean_t checkonly)4380 vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly)
4381 {
4382 	vrrp_state_t		state;
4383 	vrrp_intf_t		*vif = vr->vvr_vif;
4384 	vrrp_ip_t		*ip, *nextip;
4385 	char			abuf[INET6_ADDRSTRLEN];
4386 	vrrp_err_t		err;
4387 
4388 	vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s",
4389 	    vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state),
4390 	    vif->vvi_ifname, checkonly ? " checkonly" : "");
4391 
4392 	state = vr->vvr_state;
4393 	assert(vif != NULL);
4394 	assert(IS_VIRTUAL_INTF(vif));
4395 	assert(vif->vvi_vr_state != state);
4396 	vif->vvi_vr_state = state;
4397 	for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) {
4398 		nextip = TAILQ_NEXT(ip, vip_next);
4399 		err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE);
4400 		if (!checkonly && err != VRRP_SUCCESS) {
4401 			/* LINTED E_CONSTANT_CONDITION */
4402 			VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf,
4403 			    INET6_ADDRSTRLEN, _B_FALSE);
4404 			vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update "
4405 			    "%s over %s failed", abuf, vif->vvi_ifname);
4406 			vrrpd_delete_ip(vif, ip);
4407 		}
4408 	}
4409 
4410 	/*
4411 	 * The IP address is deleted when it is failed to be brought
4412 	 * up. If no IP addresses are left, delete this interface.
4413 	 */
4414 	if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) {
4415 		vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): "
4416 		    "no IP left over %s", vif->vvi_ifname);
4417 		vrrpd_delete_if(vif, _B_TRUE);
4418 		return (VRRP_ENOVIRT);
4419 	}
4420 	return (VRRP_SUCCESS);
4421 }
4422 
4423 void
vrrpd_state_trans(vrrp_state_t prev_s,vrrp_state_t s,vrrp_vr_t * vr)4424 vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr)
4425 {
4426 	vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s",
4427 	    vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s));
4428 
4429 	assert(vr->vvr_state == prev_s);
4430 	vr->vvr_state = s;
4431 	vr->vvr_prev_state = prev_s;
4432 	(void) gettimeofday(&vr->vvr_st_time, NULL);
4433 	(void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s);
4434 }
4435 
4436 static int
vrrpd_post_event(const char * name,vrrp_state_t prev_st,vrrp_state_t st)4437 vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st)
4438 {
4439 	sysevent_id_t	eid;
4440 	nvlist_t	*nvl = NULL;
4441 
4442 	/*
4443 	 * sysevent is not supported in the non-global zone
4444 	 */
4445 	if (getzoneid() != GLOBAL_ZONEID)
4446 		return (0);
4447 
4448 	if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
4449 		goto failed;
4450 
4451 	if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION,
4452 	    VRRP_EVENT_CUR_VERSION) != 0)
4453 		goto failed;
4454 
4455 	if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0)
4456 		goto failed;
4457 
4458 	if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0)
4459 		goto failed;
4460 
4461 	if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0)
4462 		goto failed;
4463 
4464 	if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE,
4465 	    SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) {
4466 		nvlist_free(nvl);
4467 		return (0);
4468 	}
4469 
4470 failed:
4471 	vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' "
4472 	    "sysevent posting failed: %s", vrrp_state2str(prev_st),
4473 	    vrrp_state2str(st), strerror(errno));
4474 
4475 	nvlist_free(nvl);
4476 	return (-1);
4477 }
4478 
4479 /*
4480  * timeval processing functions
4481  */
4482 static int
timeval_to_milli(struct timeval tv)4483 timeval_to_milli(struct timeval tv)
4484 {
4485 	return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5));
4486 }
4487 
4488 static struct timeval
timeval_delta(struct timeval t1,struct timeval t2)4489 timeval_delta(struct timeval t1, struct timeval t2)
4490 {
4491 	struct timeval t;
4492 	t.tv_sec = t1.tv_sec - t2.tv_sec;
4493 	t.tv_usec = t1.tv_usec - t2.tv_usec;
4494 
4495 	if (t.tv_usec < 0) {
4496 		t.tv_usec += 1000000;
4497 		t.tv_sec--;
4498 	}
4499 	return (t);
4500 }
4501 
4502 /*
4503  * print error messages to the terminal or to syslog
4504  */
4505 static void
vrrp_log(int level,char * message,...)4506 vrrp_log(int level, char *message, ...)
4507 {
4508 	va_list ap;
4509 	int log_level = -1;
4510 
4511 	va_start(ap, message);
4512 
4513 	if (vrrp_logflag == 0) {
4514 		if (level <= vrrp_debug_level) {
4515 			/*
4516 			 * VRRP_ERR goes to stderr, others go to stdout
4517 			 */
4518 			FILE *out = (level <= VRRP_ERR) ? stderr : stdout;
4519 			(void) fprintf(out, "vrrpd: ");
4520 			/* LINTED: E_SEC_PRINTF_VAR_FMT */
4521 			(void) vfprintf(out, message, ap);
4522 			(void) fprintf(out, "\n");
4523 			(void) fflush(out);
4524 		}
4525 		va_end(ap);
4526 		return;
4527 	}
4528 
4529 	/*
4530 	 * translate VRRP_* to LOG_*
4531 	 */
4532 	switch (level) {
4533 	case VRRP_ERR:
4534 		log_level = LOG_ERR;
4535 		break;
4536 	case VRRP_WARNING:
4537 		log_level = LOG_WARNING;
4538 		break;
4539 	case VRRP_NOTICE:
4540 		log_level = LOG_NOTICE;
4541 		break;
4542 	case VRRP_DBG0:
4543 		log_level = LOG_INFO;
4544 		break;
4545 	default:
4546 		log_level = LOG_DEBUG;
4547 		break;
4548 	}
4549 
4550 	/* LINTED: E_SEC_PRINTF_VAR_FMT */
4551 	(void) vsyslog(log_level, message, ap);
4552 	va_end(ap);
4553 }
4554