1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/socket.h> 29 #include <sys/sockio.h> 30 #include <sys/sysevent/vrrp.h> 31 #include <sys/sysevent/eventdefs.h> 32 #include <sys/varargs.h> 33 #include <auth_attr.h> 34 #include <ctype.h> 35 #include <fcntl.h> 36 #include <stdlib.h> 37 #include <strings.h> 38 #include <errno.h> 39 #include <unistd.h> 40 #include <zone.h> 41 #include <libsysevent.h> 42 #include <limits.h> 43 #include <locale.h> 44 #include <inetcfg.h> 45 #include <arpa/inet.h> 46 #include <signal.h> 47 #include <assert.h> 48 #include <ucred.h> 49 #include <bsm/adt.h> 50 #include <bsm/adt_event.h> 51 #include <priv_utils.h> 52 #include <libdllink.h> 53 #include <libdlvnic.h> 54 #include <pwd.h> 55 #include <libvrrpadm.h> 56 #include <net/route.h> 57 #include "vrrpd_impl.h" 58 59 /* 60 * A VRRP router can be only start participating the VRRP protocol of a virtual 61 * router when all the following conditions are met: 62 * 63 * - The VRRP router is enabled (vr->vvr_conf.vvc_enabled is _B_TRUE) 64 * - The RX socket is successfully created over the physical interface to 65 * receive the VRRP multicast advertisement. Note that one RX socket can 66 * be shared by several VRRP routers configured over the same physical 67 * interface. (See vrrpd_init_rxsock()) 68 * - The TX socket is successfully created over the VNIC interface to send 69 * the VRRP advertisment. (See vrrpd_init_txsock()) 70 * - The primary IP address has been successfully selected over the physical 71 * interface. (See vrrpd_select_primary()) 72 * 73 * If a VRRP router is enabled but the other conditions haven't be satisfied, 74 * the router will be stay at the VRRP_STATE_INIT state. If all the above 75 * conditions are met, the VRRP router will be transit to either 76 * the VRRP_STATE_MASTER or the VRRP_STATE_BACKUP state, depends on the VRRP 77 * protocol. 78 */ 79 80 #define skip_whitespace(p) while (isspace(*(p))) ++(p) 81 82 #define BUFFSIZE 65536 83 84 #define VRRPCONF "/etc/inet/vrrp.conf" 85 86 typedef struct vrrpd_rtsock_s { 87 int vrt_af; /* address family */ 88 int vrt_fd; /* socket for the PF_ROUTE msg */ 89 iu_event_id_t vrt_eid; /* event ID */ 90 } vrrpd_rtsock_t; 91 92 static int vrrp_logflag = 0; 93 boolean_t vrrp_debug_level = 0; 94 iu_eh_t *vrrpd_eh = NULL; 95 iu_tq_t *vrrpd_timerq = NULL; 96 static vrrp_handle_t vrrpd_vh = NULL; 97 static int vrrpd_cmdsock_fd = -1; /* socket to communicate */ 98 /* between vrrpd/libvrrpadm */ 99 static iu_event_id_t vrrpd_cmdsock_eid = -1; 100 static int vrrpd_ctlsock_fd = -1; /* socket to bring up/down */ 101 /* the virtual IP addresses */ 102 static int vrrpd_ctlsock6_fd = -1; 103 static vrrpd_rtsock_t vrrpd_rtsocks[2] = { 104 {AF_INET, -1, -1}, 105 {AF_INET6, -1, -1} 106 }; 107 static iu_timer_id_t vrrp_scan_timer_id = -1; 108 109 TAILQ_HEAD(vrrp_vr_list_s, vrrp_vr_s); 110 TAILQ_HEAD(vrrp_intf_list_s, vrrp_intf_s); 111 static struct vrrp_vr_list_s vrrp_vr_list; 112 static struct vrrp_intf_list_s vrrp_intf_list; 113 static char vrrpd_conffile[MAXPATHLEN]; 114 115 /* 116 * Multicast address of VRRP advertisement in network byte order 117 */ 118 static vrrp_addr_t vrrp_muladdr4; 119 static vrrp_addr_t vrrp_muladdr6; 120 121 static int vrrpd_scan_interval = 20000; /* ms */ 122 123 /* 124 * macros to calculate skew_time and master_down_timer 125 * 126 * Note that the input is in centisecs and output are in msecs 127 */ 128 #define SKEW_TIME(pri, intv) ((intv) * (256 - (pri)) / 256) 129 #define MASTER_DOWN_INTERVAL(pri, intv) (3 * (intv) + SKEW_TIME((pri), (intv))) 130 131 #define SKEW_TIME_VR(vr) \ 132 SKEW_TIME((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int) 133 #define MASTER_DOWN_INTERVAL_VR(vr) \ 134 MASTER_DOWN_INTERVAL((vr)->vvr_conf.vvc_pri, (vr)->vvr_master_adver_int) 135 136 #define VRRP_CONF_UPDATE 0x01 137 #define VRRP_CONF_DELETE 0x02 138 139 static char *af_str(int); 140 141 static iu_tq_callback_t vrrp_adv_timeout; 142 static iu_tq_callback_t vrrp_b2m_timeout; 143 static iu_eh_callback_t vrrpd_sock_handler; 144 static iu_eh_callback_t vrrpd_rtsock_handler; 145 static iu_eh_callback_t vrrpd_cmdsock_handler; 146 147 static int daemon_init(); 148 149 static vrrp_err_t vrrpd_init(); 150 static void vrrpd_fini(); 151 static vrrp_err_t vrrpd_cmdsock_create(); 152 static void vrrpd_cmdsock_destroy(); 153 static vrrp_err_t vrrpd_rtsock_create(); 154 static void vrrpd_rtsock_destroy(); 155 static vrrp_err_t vrrpd_ctlsock_create(); 156 static void vrrpd_ctlsock_destroy(); 157 158 static void vrrpd_scan_timer(iu_tq_t *, void *); 159 static void vrrpd_scan(int); 160 static vrrp_err_t vrrpd_init_rxsock(vrrp_vr_t *); 161 static void vrrpd_fini_rxsock(vrrp_vr_t *); 162 static vrrp_err_t vrrpd_init_txsock(vrrp_vr_t *); 163 static vrrp_err_t vrrpd_init_txsock_v4(vrrp_vr_t *); 164 static vrrp_err_t vrrpd_init_txsock_v6(vrrp_vr_t *); 165 static void vrrpd_fini_txsock(vrrp_vr_t *); 166 167 static vrrp_err_t vrrpd_create_vr(vrrp_vr_conf_t *); 168 static vrrp_err_t vrrpd_enable_vr(vrrp_vr_t *); 169 static void vrrpd_disable_vr(vrrp_vr_t *, vrrp_intf_t *, boolean_t); 170 static void vrrpd_delete_vr(vrrp_vr_t *); 171 172 static vrrp_err_t vrrpd_create(vrrp_vr_conf_t *, boolean_t); 173 static vrrp_err_t vrrpd_delete(const char *); 174 static vrrp_err_t vrrpd_enable(const char *, boolean_t); 175 static vrrp_err_t vrrpd_disable(const char *); 176 static vrrp_err_t vrrpd_modify(vrrp_vr_conf_t *, uint32_t); 177 static void vrrpd_list(vrid_t, char *, int, vrrp_ret_list_t *, size_t *); 178 static void vrrpd_query(const char *, vrrp_ret_query_t *, size_t *); 179 180 static boolean_t vrrp_rd_prop_name(vrrp_vr_conf_t *, const char *); 181 static boolean_t vrrp_rd_prop_vrid(vrrp_vr_conf_t *, const char *); 182 static boolean_t vrrp_rd_prop_af(vrrp_vr_conf_t *, const char *); 183 static boolean_t vrrp_rd_prop_pri(vrrp_vr_conf_t *, const char *); 184 static boolean_t vrrp_rd_prop_adver_int(vrrp_vr_conf_t *, const char *); 185 static boolean_t vrrp_rd_prop_preempt(vrrp_vr_conf_t *, const char *); 186 static boolean_t vrrp_rd_prop_accept(vrrp_vr_conf_t *, const char *); 187 static boolean_t vrrp_rd_prop_ifname(vrrp_vr_conf_t *, const char *); 188 static boolean_t vrrp_rd_prop_enabled(vrrp_vr_conf_t *, const char *); 189 static int vrrp_wt_prop_name(vrrp_vr_conf_t *, char *, size_t); 190 static int vrrp_wt_prop_vrid(vrrp_vr_conf_t *, char *, size_t); 191 static int vrrp_wt_prop_af(vrrp_vr_conf_t *, char *, size_t); 192 static int vrrp_wt_prop_pri(vrrp_vr_conf_t *, char *, size_t); 193 static int vrrp_wt_prop_adver_int(vrrp_vr_conf_t *, char *, size_t); 194 static int vrrp_wt_prop_preempt(vrrp_vr_conf_t *, char *, size_t); 195 static int vrrp_wt_prop_accept(vrrp_vr_conf_t *, char *, size_t); 196 static int vrrp_wt_prop_ifname(vrrp_vr_conf_t *, char *, size_t); 197 static int vrrp_wt_prop_enabled(vrrp_vr_conf_t *, char *, size_t); 198 199 static void vrrpd_cmd_create(void *, void *, size_t *); 200 static void vrrpd_cmd_delete(void *, void *, size_t *); 201 static void vrrpd_cmd_enable(void *, void *, size_t *); 202 static void vrrpd_cmd_disable(void *, void *, size_t *); 203 static void vrrpd_cmd_modify(void *, void *, size_t *); 204 static void vrrpd_cmd_list(void *, void *, size_t *); 205 static void vrrpd_cmd_query(void *, void *, size_t *); 206 207 static vrrp_vr_t *vrrpd_lookup_vr_by_vrid(char *, vrid_t vrid_t, int); 208 static vrrp_vr_t *vrrpd_lookup_vr_by_name(const char *); 209 static vrrp_intf_t *vrrpd_lookup_if(const char *, int); 210 static vrrp_err_t vrrpd_create_if(const char *, int, uint32_t, vrrp_intf_t **); 211 static void vrrpd_delete_if(vrrp_intf_t *, boolean_t); 212 static vrrp_err_t vrrpd_create_ip(vrrp_intf_t *, const char *, vrrp_addr_t *, 213 uint64_t flags); 214 static void vrrpd_delete_ip(vrrp_intf_t *, vrrp_ip_t *); 215 216 static void vrrpd_init_ipcache(int); 217 static void vrrpd_update_ipcache(int); 218 static int vrrpd_walk_ipaddr(icfg_if_t *, void *); 219 static vrrp_err_t vrrpd_add_ipaddr(char *, int, vrrp_addr_t *, 220 int, uint64_t); 221 static vrrp_ip_t *vrrpd_select_primary(vrrp_intf_t *); 222 static void vrrpd_reselect_primary(vrrp_intf_t *); 223 static void vrrpd_reenable_all_vr(); 224 static void vrrpd_remove_if(vrrp_intf_t *, boolean_t); 225 226 static uint16_t in_cksum(int, uint16_t, void *); 227 static uint16_t vrrp_cksum4(struct in_addr *, struct in_addr *, 228 uint16_t, vrrp_pkt_t *); 229 static uint16_t vrrp_cksum6(struct in6_addr *, struct in6_addr *, 230 uint16_t, vrrp_pkt_t *); 231 static size_t vrrpd_build_vrrp(vrrp_vr_t *, uchar_t *, int, boolean_t); 232 233 static void vrrpd_process_adv(vrrp_vr_t *, vrrp_addr_t *, vrrp_pkt_t *); 234 static vrrp_err_t vrrpd_send_adv(vrrp_vr_t *, boolean_t); 235 236 /* state transition functions */ 237 static vrrp_err_t vrrpd_state_i2m(vrrp_vr_t *); 238 static vrrp_err_t vrrpd_state_i2b(vrrp_vr_t *); 239 static void vrrpd_state_m2i(vrrp_vr_t *); 240 static void vrrpd_state_b2i(vrrp_vr_t *); 241 static vrrp_err_t vrrpd_state_b2m(vrrp_vr_t *); 242 static vrrp_err_t vrrpd_state_m2b(vrrp_vr_t *); 243 static void vrrpd_state_trans(vrrp_state_t, vrrp_state_t, vrrp_vr_t *); 244 245 static vrrp_err_t vrrpd_set_noaccept(vrrp_vr_t *, boolean_t); 246 static vrrp_err_t vrrpd_virtualip_update(vrrp_vr_t *, boolean_t); 247 static vrrp_err_t vrrpd_virtualip_updateone(vrrp_intf_t *, vrrp_ip_t *, 248 boolean_t); 249 static int vrrpd_post_event(const char *, vrrp_state_t, vrrp_state_t); 250 251 static void vrrpd_initconf(); 252 static vrrp_err_t vrrpd_updateconf(vrrp_vr_conf_t *, uint_t); 253 static vrrp_err_t vrrpd_write_vrconf(char *, size_t, vrrp_vr_conf_t *); 254 static vrrp_err_t vrrpd_read_vrconf(char *, vrrp_vr_conf_t *); 255 static vrrp_err_t vrrpd_readprop(const char *, vrrp_vr_conf_t *); 256 static void vrrpd_cleanup(); 257 258 static void vrrp_log(int, char *, ...); 259 static int timeval_to_milli(struct timeval); 260 static struct timeval timeval_delta(struct timeval, struct timeval); 261 262 typedef struct vrrpd_prop_s { 263 char *vs_propname; 264 boolean_t (*vs_propread)(vrrp_vr_conf_t *, const char *); 265 int (*vs_propwrite)(vrrp_vr_conf_t *, char *, size_t); 266 } vrrp_prop_t; 267 268 /* 269 * persistent VRRP properties array 270 */ 271 static vrrp_prop_t vrrp_prop_info_tbl[] = { 272 {"name", vrrp_rd_prop_name, vrrp_wt_prop_name}, 273 {"vrid", vrrp_rd_prop_vrid, vrrp_wt_prop_vrid}, 274 {"priority", vrrp_rd_prop_pri, vrrp_wt_prop_pri}, 275 {"adv_intval", vrrp_rd_prop_adver_int, vrrp_wt_prop_adver_int}, 276 {"preempt_mode", vrrp_rd_prop_preempt, vrrp_wt_prop_preempt}, 277 {"accept_mode", vrrp_rd_prop_accept, vrrp_wt_prop_accept}, 278 {"interface", vrrp_rd_prop_ifname, vrrp_wt_prop_ifname}, 279 {"af", vrrp_rd_prop_af, vrrp_wt_prop_af}, 280 {"enabled", vrrp_rd_prop_enabled, vrrp_wt_prop_enabled} 281 }; 282 283 #define VRRP_PROP_INFO_TABSIZE \ 284 (sizeof (vrrp_prop_info_tbl) / sizeof (vrrp_prop_t)) 285 286 typedef void vrrp_cmd_func_t(void *, void *, size_t *); 287 288 typedef struct vrrp_cmd_info_s { 289 vrrp_cmd_type_t vi_cmd; 290 size_t vi_reqsize; 291 size_t vi_acksize; /* 0 if the size is variable */ 292 boolean_t vi_setop; /* Set operation? Check credentials */ 293 vrrp_cmd_func_t *vi_cmdfunc; 294 } vrrp_cmd_info_t; 295 296 static vrrp_cmd_info_t vrrp_cmd_info_tbl[] = { 297 {VRRP_CMD_CREATE, sizeof (vrrp_cmd_create_t), 298 sizeof (vrrp_ret_create_t), _B_TRUE, vrrpd_cmd_create}, 299 {VRRP_CMD_DELETE, sizeof (vrrp_cmd_delete_t), 300 sizeof (vrrp_ret_delete_t), _B_TRUE, vrrpd_cmd_delete}, 301 {VRRP_CMD_ENABLE, sizeof (vrrp_cmd_enable_t), 302 sizeof (vrrp_ret_enable_t), _B_TRUE, vrrpd_cmd_enable}, 303 {VRRP_CMD_DISABLE, sizeof (vrrp_cmd_disable_t), 304 sizeof (vrrp_ret_disable_t), _B_TRUE, vrrpd_cmd_disable}, 305 {VRRP_CMD_MODIFY, sizeof (vrrp_cmd_modify_t), 306 sizeof (vrrp_ret_modify_t), _B_TRUE, vrrpd_cmd_modify}, 307 {VRRP_CMD_QUERY, sizeof (vrrp_cmd_query_t), 0, 308 _B_FALSE, vrrpd_cmd_query}, 309 {VRRP_CMD_LIST, sizeof (vrrp_cmd_list_t), 0, 310 _B_FALSE, vrrpd_cmd_list} 311 }; 312 313 #define VRRP_DOOR_INFO_TABLE_SIZE \ 314 (sizeof (vrrp_cmd_info_tbl) / sizeof (vrrp_cmd_info_t)) 315 316 static int 317 ipaddr_cmp(int af, vrrp_addr_t *addr1, vrrp_addr_t *addr2) 318 { 319 if (af == AF_INET) { 320 return (memcmp(&addr1->in4.sin_addr, 321 &addr2->in4.sin_addr, sizeof (struct in_addr))); 322 } else { 323 return (memcmp(&addr1->in6.sin6_addr, 324 &addr2->in6.sin6_addr, sizeof (struct in6_addr))); 325 } 326 } 327 328 static vrrp_vr_t * 329 vrrpd_lookup_vr_by_vrid(char *ifname, vrid_t vrid, int af) 330 { 331 vrrp_vr_t *vr; 332 333 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) { 334 if (strcmp(vr->vvr_conf.vvc_link, ifname) == 0 && 335 vr->vvr_conf.vvc_vrid == vrid && 336 vr->vvr_conf.vvc_af == af) { 337 break; 338 } 339 } 340 return (vr); 341 } 342 343 static vrrp_vr_t * 344 vrrpd_lookup_vr_by_name(const char *name) 345 { 346 vrrp_vr_t *vr; 347 348 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) { 349 if (strcmp(vr->vvr_conf.vvc_name, name) == 0) 350 break; 351 } 352 return (vr); 353 } 354 355 static vrrp_intf_t * 356 vrrpd_lookup_if(const char *ifname, int af) 357 { 358 vrrp_intf_t *intf; 359 360 TAILQ_FOREACH(intf, &vrrp_intf_list, vvi_next) { 361 if (strcmp(ifname, intf->vvi_ifname) == 0 && 362 af == intf->vvi_af) { 363 break; 364 } 365 } 366 return (intf); 367 } 368 369 static vrrp_err_t 370 vrrpd_create_if(const char *ifname, int af, uint32_t ifindex, 371 vrrp_intf_t **intfp) 372 { 373 vrrp_intf_t *intf; 374 375 vrrp_log(VRRP_DBG0, "vrrpd_create_if(%s, %s, %d)", 376 ifname, af_str(af), ifindex); 377 378 if (((*intfp) = malloc(sizeof (vrrp_intf_t))) == NULL) { 379 vrrp_log(VRRP_ERR, "vrrpd_create_if(): failed to " 380 "allocate %s/%s interface", ifname, af_str(af)); 381 return (VRRP_ENOMEM); 382 } 383 384 intf = *intfp; 385 TAILQ_INIT(&intf->vvi_iplist); 386 (void) strlcpy(intf->vvi_ifname, ifname, sizeof (intf->vvi_ifname)); 387 intf->vvi_af = af; 388 intf->vvi_sockfd = -1; 389 intf->vvi_nvr = 0; 390 intf->vvi_eid = -1; 391 intf->vvi_pip = NULL; 392 intf->vvi_ifindex = ifindex; 393 intf->vvi_state = NODE_STATE_NEW; 394 intf->vvi_vr_state = VRRP_STATE_INIT; 395 TAILQ_INSERT_TAIL(&vrrp_intf_list, intf, vvi_next); 396 return (VRRP_SUCCESS); 397 } 398 399 /* 400 * An interface is deleted. If update_vr is true, the deletion of the interface 401 * may cause the state transition of assoicated VRRP router (if this interface 402 * is either the primary or the VNIC interface of the VRRP router); otherwise, 403 * simply delete the interface without updating the VRRP router. 404 */ 405 static void 406 vrrpd_delete_if(vrrp_intf_t *intf, boolean_t update_vr) 407 { 408 vrrp_ip_t *ip; 409 410 vrrp_log(VRRP_DBG0, "vrrpd_delete_if(%s, %s, %supdate_vr)", 411 intf->vvi_ifname, af_str(intf->vvi_af), update_vr ? "" : "no_"); 412 413 if (update_vr) { 414 /* 415 * If a this interface is the physical interface or the VNIC 416 * of a VRRP router, the deletion of the interface (no IP 417 * address exists on this interface) may cause the state 418 * transition of the VRRP router. call vrrpd_remove_if() 419 * to find all corresponding VRRP router and update their 420 * states. 421 */ 422 vrrpd_remove_if(intf, _B_FALSE); 423 } 424 425 /* 426 * First remove and delete all the IP addresses on the interface 427 */ 428 while (!TAILQ_EMPTY(&intf->vvi_iplist)) { 429 ip = TAILQ_FIRST(&intf->vvi_iplist); 430 vrrpd_delete_ip(intf, ip); 431 } 432 433 /* 434 * Then remove and delete the interface 435 */ 436 TAILQ_REMOVE(&vrrp_intf_list, intf, vvi_next); 437 (void) free(intf); 438 } 439 440 static vrrp_err_t 441 vrrpd_create_ip(vrrp_intf_t *intf, const char *lifname, vrrp_addr_t *addr, 442 uint64_t flags) 443 { 444 vrrp_ip_t *ip; 445 char abuf[INET6_ADDRSTRLEN]; 446 447 /* LINTED E_CONSTANT_CONDITION */ 448 VRRPADDR2STR(intf->vvi_af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE); 449 vrrp_log(VRRP_DBG0, "vrrpd_create_ip(%s, %s, %s, 0x%x)", 450 intf->vvi_ifname, lifname, abuf, flags); 451 452 if ((ip = malloc(sizeof (vrrp_ip_t))) == NULL) { 453 vrrp_log(VRRP_ERR, "vrrpd_create_ip(%s, %s):" 454 "failed to allocate IP", lifname, abuf); 455 return (VRRP_ENOMEM); 456 } 457 458 (void) strncpy(ip->vip_lifname, lifname, sizeof (ip->vip_lifname)); 459 ip->vip_state = NODE_STATE_NEW; 460 ip->vip_flags = flags; 461 (void) memcpy(&ip->vip_addr, addr, sizeof (ip->vip_addr)); 462 463 /* 464 * Make sure link-local IPv6 IP addresses are at the head of the list 465 */ 466 if (intf->vvi_af == AF_INET6 && 467 IN6_IS_ADDR_LINKLOCAL(&addr->in6.sin6_addr)) { 468 TAILQ_INSERT_HEAD(&intf->vvi_iplist, ip, vip_next); 469 } else { 470 TAILQ_INSERT_TAIL(&intf->vvi_iplist, ip, vip_next); 471 } 472 return (VRRP_SUCCESS); 473 } 474 475 static void 476 vrrpd_delete_ip(vrrp_intf_t *intf, vrrp_ip_t *ip) 477 { 478 char abuf[INET6_ADDRSTRLEN]; 479 int af = intf->vvi_af; 480 481 /* LINTED E_CONSTANT_CONDITION */ 482 VRRPADDR2STR(af, &ip->vip_addr, abuf, sizeof (abuf), _B_FALSE); 483 vrrp_log(VRRP_DBG0, "vrrpd_delete_ip(%s, %s, %s) is %sprimary", 484 intf->vvi_ifname, ip->vip_lifname, abuf, 485 intf->vvi_pip == ip ? "" : "not "); 486 487 if (intf->vvi_pip == ip) 488 intf->vvi_pip = NULL; 489 490 TAILQ_REMOVE(&intf->vvi_iplist, ip, vip_next); 491 (void) free(ip); 492 } 493 494 static char * 495 rtm_event2str(uchar_t event) 496 { 497 switch (event) { 498 case RTM_NEWADDR: 499 return ("RTM_NEWADDR"); 500 case RTM_DELADDR: 501 return ("RTM_DELADDR"); 502 case RTM_IFINFO: 503 return ("RTM_IFINFO"); 504 case RTM_ADD: 505 return ("RTM_ADD"); 506 case RTM_DELETE: 507 return ("RTM_DELETE"); 508 case RTM_CHANGE: 509 return ("RTM_CHANGE"); 510 case RTM_OLDADD: 511 return ("RTM_OLDADD"); 512 case RTM_OLDDEL: 513 return ("RTM_OLDDEL"); 514 case RTM_CHGADDR: 515 return ("RTM_CHGADDR"); 516 case RTM_FREEADDR: 517 return ("RTM_FREEADDR"); 518 default: 519 return ("RTM_OTHER"); 520 } 521 } 522 523 int 524 main(int argc, char *argv[]) 525 { 526 int c, err; 527 struct sigaction sa; 528 sigset_t mask; 529 struct rlimit rl; 530 531 (void) setlocale(LC_ALL, ""); 532 (void) textdomain(TEXT_DOMAIN); 533 534 /* 535 * We need PRIV_SYS_CONFIG to post VRRP sysevent, PRIV_NET_RAWACESS 536 * and PRIV_NET_ICMPACCESS to open the raw socket, PRIV_SYS_IP_CONFIG 537 * to bring up/down the virtual IP addresses, and PRIV_SYS_RESOURCE to 538 * setrlimit(). 539 * 540 * Note that sysevent is not supported in non-global zones. 541 */ 542 if (getzoneid() == GLOBAL_ZONEID) { 543 err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0, 544 PRIV_SYS_CONFIG, PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS, 545 PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL); 546 } else { 547 err = __init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET, 0, 0, 548 PRIV_NET_RAWACCESS, PRIV_NET_ICMPACCESS, 549 PRIV_SYS_IP_CONFIG, PRIV_SYS_RESOURCE, NULL); 550 } 551 552 if (err == -1) { 553 vrrp_log(VRRP_ERR, "main(): init_daemon_priv() failed"); 554 return (EXIT_FAILURE); 555 } 556 557 /* 558 * If vrrpd is started by other process, it will inherit the 559 * signal block mask. We unblock all signals to make sure the 560 * signal handling will work normally. 561 */ 562 (void) sigfillset(&mask); 563 (void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL); 564 sa.sa_handler = vrrpd_cleanup; 565 sa.sa_flags = 0; 566 (void) sigemptyset(&sa.sa_mask); 567 (void) sigaction(SIGINT, &sa, NULL); 568 (void) sigaction(SIGQUIT, &sa, NULL); 569 (void) sigaction(SIGTERM, &sa, NULL); 570 571 vrrp_debug_level = 0; 572 (void) strlcpy(vrrpd_conffile, VRRPCONF, sizeof (vrrpd_conffile)); 573 while ((c = getopt(argc, argv, "d:f:")) != EOF) { 574 switch (c) { 575 case 'd': 576 vrrp_debug_level = atoi(optarg); 577 break; 578 case 'f': 579 (void) strlcpy(vrrpd_conffile, optarg, 580 sizeof (vrrpd_conffile)); 581 break; 582 default: 583 break; 584 } 585 } 586 587 closefrom(3); 588 if (vrrp_debug_level == 0 && (daemon_init() != 0)) { 589 vrrp_log(VRRP_ERR, "main(): daemon_init() failed"); 590 return (EXIT_FAILURE); 591 } 592 593 rl.rlim_cur = RLIM_INFINITY; 594 rl.rlim_max = RLIM_INFINITY; 595 if (setrlimit(RLIMIT_NOFILE, &rl) == -1) { 596 vrrp_log(VRRP_ERR, "main(): setrlimit() failed"); 597 return (EXIT_FAILURE); 598 } 599 600 if (vrrpd_init() != VRRP_SUCCESS) { 601 vrrp_log(VRRP_ERR, "main(): vrrpd_init() failed"); 602 return (EXIT_FAILURE); 603 } 604 605 /* 606 * Get rid of unneeded privileges. 607 */ 608 __fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION, 609 PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, PRIV_SYS_RESOURCE, NULL); 610 611 /* 612 * Read the configuration and initialize the existing VRRP 613 * configuration 614 */ 615 vrrpd_initconf(); 616 617 /* 618 * Start the loop to handle the timer and the IO events. 619 */ 620 switch (iu_handle_events(vrrpd_eh, vrrpd_timerq)) { 621 case -1: 622 vrrp_log(VRRP_ERR, "main(): iu_handle_events() failed " 623 "abnormally"); 624 break; 625 default: 626 break; 627 } 628 629 vrrpd_cleanup(); 630 return (EXIT_SUCCESS); 631 } 632 633 static int 634 daemon_init() 635 { 636 pid_t pid; 637 638 vrrp_log(VRRP_DBG0, "daemon_init()"); 639 640 if (getenv("SMF_FMRI") == NULL) { 641 vrrp_log(VRRP_ERR, "main(): vrrpd is an smf(5) managed service " 642 "and should not be run from the command line."); 643 return (-1); 644 } 645 646 if ((pid = fork()) < 0) 647 return (-1); 648 649 if (pid != 0) { 650 /* in parent process: do nothing. */ 651 exit(0); 652 } 653 654 /* 655 * in child process, became a daemon, and return to main() to continue. 656 */ 657 (void) chdir("/"); 658 (void) setsid(); 659 (void) close(0); 660 (void) close(1); 661 (void) close(2); 662 (void) open("/dev/null", O_RDWR, 0); 663 (void) dup2(0, 1); 664 (void) dup2(0, 2); 665 openlog("vrrpd", LOG_PID, LOG_DAEMON); 666 vrrp_logflag = 1; 667 return (0); 668 } 669 670 static vrrp_err_t 671 vrrpd_init() 672 { 673 vrrp_err_t err = VRRP_ESYS; 674 675 vrrp_log(VRRP_DBG0, "vrrpd_init()"); 676 677 TAILQ_INIT(&vrrp_vr_list); 678 TAILQ_INIT(&vrrp_intf_list); 679 680 if (vrrp_open(&vrrpd_vh) != VRRP_SUCCESS) { 681 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrp_open() failed"); 682 goto fail; 683 } 684 685 if ((vrrpd_timerq = iu_tq_create()) == NULL) { 686 vrrp_log(VRRP_ERR, "vrrpd_init(): iu_tq_create() failed"); 687 goto fail; 688 } 689 690 if ((vrrpd_eh = iu_eh_create()) == NULL) { 691 vrrp_log(VRRP_ERR, "vrrpd_init(): iu_eh_create() failed"); 692 goto fail; 693 } 694 695 /* 696 * Create the AF_UNIX socket used to communicate with libvrrpadm. 697 * 698 * This socket is used to receive the administrative requests and 699 * send back the results. 700 */ 701 if (vrrpd_cmdsock_create() != VRRP_SUCCESS) { 702 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_cmdsock_create() " 703 "failed"); 704 goto fail; 705 } 706 707 /* 708 * Create the VRRP control socket used to bring up/down the virtual 709 * IP addresses. It is also used to set the IFF_NOACCEPT flag of 710 * the virtual IP addresses. 711 */ 712 if (vrrpd_ctlsock_create() != VRRP_SUCCESS) { 713 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_ctlsock_create() " 714 "failed"); 715 goto fail; 716 } 717 718 /* 719 * Create the PF_ROUTER socket used to listen to the routing socket 720 * messages and build the interface/IP address list. 721 */ 722 if (vrrpd_rtsock_create() != VRRP_SUCCESS) { 723 vrrp_log(VRRP_ERR, "vrrpd_init(): vrrpd_rtsock_create() " 724 "failed"); 725 goto fail; 726 } 727 728 /* 729 * Build the list of interfaces and IP addresses. Also, start the time 730 * to scan the interfaces/IP addresses periodically. 731 */ 732 vrrpd_scan(AF_INET); 733 vrrpd_scan(AF_INET6); 734 if ((vrrp_scan_timer_id = iu_schedule_timer_ms(vrrpd_timerq, 735 vrrpd_scan_interval, vrrpd_scan_timer, NULL)) == -1) { 736 vrrp_log(VRRP_ERR, "vrrpd_init(): start scan_timer failed"); 737 goto fail; 738 } 739 740 /* 741 * Initialize the VRRP multicast address. 742 */ 743 bzero(&vrrp_muladdr4, sizeof (vrrp_addr_t)); 744 vrrp_muladdr4.in4.sin_family = AF_INET; 745 (void) inet_pton(AF_INET, "224.0.0.18", &vrrp_muladdr4.in4.sin_addr); 746 747 bzero(&vrrp_muladdr6, sizeof (vrrp_addr_t)); 748 vrrp_muladdr6.in6.sin6_family = AF_INET6; 749 (void) inet_pton(AF_INET6, "ff02::12", &vrrp_muladdr6.in6.sin6_addr); 750 751 return (VRRP_SUCCESS); 752 753 fail: 754 vrrpd_fini(); 755 return (err); 756 } 757 758 static void 759 vrrpd_fini() 760 { 761 vrrp_log(VRRP_DBG0, "vrrpd_fini()"); 762 763 (void) iu_cancel_timer(vrrpd_timerq, vrrp_scan_timer_id, NULL); 764 vrrp_scan_timer_id = -1; 765 766 vrrpd_rtsock_destroy(); 767 vrrpd_ctlsock_destroy(); 768 vrrpd_cmdsock_destroy(); 769 770 if (vrrpd_eh != NULL) { 771 iu_eh_destroy(vrrpd_eh); 772 vrrpd_eh = NULL; 773 } 774 775 if (vrrpd_timerq != NULL) { 776 iu_tq_destroy(vrrpd_timerq); 777 vrrpd_timerq = NULL; 778 } 779 780 vrrp_close(vrrpd_vh); 781 vrrpd_vh = NULL; 782 assert(TAILQ_EMPTY(&vrrp_vr_list)); 783 assert(TAILQ_EMPTY(&vrrp_intf_list)); 784 } 785 786 static void 787 vrrpd_cleanup(void) 788 { 789 vrrp_vr_t *vr; 790 vrrp_intf_t *intf; 791 792 vrrp_log(VRRP_DBG0, "vrrpd_cleanup()"); 793 794 while (!TAILQ_EMPTY(&vrrp_vr_list)) { 795 vr = TAILQ_FIRST(&vrrp_vr_list); 796 vrrpd_delete_vr(vr); 797 } 798 799 while (!TAILQ_EMPTY(&vrrp_intf_list)) { 800 intf = TAILQ_FIRST(&vrrp_intf_list); 801 vrrpd_delete_if(intf, _B_FALSE); 802 } 803 804 vrrpd_fini(); 805 closelog(); 806 exit(1); 807 } 808 809 /* 810 * Read the configuration file and initialize all the existing VRRP routers. 811 */ 812 static void 813 vrrpd_initconf() 814 { 815 FILE *fp; 816 char line[LINE_MAX]; 817 int linenum = 0; 818 vrrp_vr_conf_t conf; 819 vrrp_err_t err; 820 821 vrrp_log(VRRP_DBG0, "vrrpd_initconf()"); 822 823 if ((fp = fopen(vrrpd_conffile, "rF")) == NULL) { 824 vrrp_log(VRRP_ERR, "failed to open the configuration file %s", 825 vrrpd_conffile); 826 return; 827 } 828 829 while (fgets(line, sizeof (line), fp) != NULL) { 830 linenum++; 831 conf.vvc_vrid = VRRP_VRID_NONE; 832 if ((err = vrrpd_read_vrconf(line, &conf)) != VRRP_SUCCESS) { 833 vrrp_log(VRRP_ERR, "failed to parse %d line %s", 834 linenum, line); 835 continue; 836 } 837 838 /* 839 * Blank or comment line 840 */ 841 if (conf.vvc_vrid == VRRP_VRID_NONE) 842 continue; 843 844 /* 845 * No need to update the configuration since the VRRP router 846 * created/enabled based on the existing configuration. 847 */ 848 if ((err = vrrpd_create(&conf, _B_FALSE)) != VRRP_SUCCESS) { 849 vrrp_log(VRRP_ERR, "VRRP router %s creation failed: " 850 "%s", conf.vvc_name, vrrp_err2str(err)); 851 continue; 852 } 853 854 if (conf.vvc_enabled && 855 ((err = vrrpd_enable(conf.vvc_name, _B_FALSE)) != 856 VRRP_SUCCESS)) { 857 vrrp_log(VRRP_ERR, "VRRP router %s enable failed: %s", 858 conf.vvc_name, vrrp_err2str(err)); 859 } 860 } 861 862 (void) fclose(fp); 863 } 864 865 /* 866 * Create the AF_UNIX socket used to communicate with libvrrpadm. 867 * 868 * This socket is used to receive the administrative request and 869 * send back the results. 870 */ 871 static vrrp_err_t 872 vrrpd_cmdsock_create() 873 { 874 iu_event_id_t eid; 875 struct sockaddr_un laddr; 876 int sock, flags; 877 878 vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_create()"); 879 880 if ((sock = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) { 881 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): socket(AF_UNIX) " 882 "failed: %s", strerror(errno)); 883 return (VRRP_ESYS); 884 } 885 886 /* 887 * Set it to be non-blocking. 888 */ 889 flags = fcntl(sock, F_GETFL, 0); 890 (void) fcntl(sock, F_SETFL, (flags | O_NONBLOCK)); 891 892 /* 893 * Unlink first in case a previous daemon instance exited ungracefully. 894 */ 895 (void) unlink(VRRPD_SOCKET); 896 897 bzero(&laddr, sizeof (laddr)); 898 laddr.sun_family = AF_UNIX; 899 (void) strlcpy(laddr.sun_path, VRRPD_SOCKET, sizeof (laddr.sun_path)); 900 if (bind(sock, (struct sockaddr *)&laddr, sizeof (laddr)) < 0) { 901 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): bind() failed: %s", 902 strerror(errno)); 903 (void) close(sock); 904 return (VRRP_ESYS); 905 } 906 907 if (listen(sock, 30) < 0) { 908 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): listen() " 909 "failed: %s", strerror(errno)); 910 (void) close(sock); 911 return (VRRP_ESYS); 912 } 913 914 if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN, 915 vrrpd_cmdsock_handler, NULL)) == -1) { 916 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_create(): iu_register_event()" 917 " failed"); 918 (void) close(sock); 919 return (VRRP_ESYS); 920 } 921 922 vrrpd_cmdsock_fd = sock; 923 vrrpd_cmdsock_eid = eid; 924 return (VRRP_SUCCESS); 925 } 926 927 static void 928 vrrpd_cmdsock_destroy() 929 { 930 vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_destroy()"); 931 932 (void) iu_unregister_event(vrrpd_eh, vrrpd_cmdsock_eid, NULL); 933 (void) close(vrrpd_cmdsock_fd); 934 vrrpd_cmdsock_fd = -1; 935 vrrpd_cmdsock_eid = -1; 936 } 937 938 /* 939 * Create the PF_ROUTER sockets used to listen to the routing socket 940 * messages and build the interface/IP address list. Create one for 941 * each address family (IPv4 and IPv6). 942 */ 943 static vrrp_err_t 944 vrrpd_rtsock_create() 945 { 946 int i, flags, sock; 947 iu_event_id_t eid; 948 949 vrrp_log(VRRP_DBG0, "vrrpd_rtsock_create()"); 950 951 for (i = 0; i < 2; i++) { 952 sock = socket(PF_ROUTE, SOCK_RAW, vrrpd_rtsocks[i].vrt_af); 953 if (sock == -1) { 954 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): socket() " 955 "failed: %s", strerror(errno)); 956 break; 957 } 958 959 /* 960 * Set it to be non-blocking. 961 */ 962 if ((flags = fcntl(sock, F_GETFL, 0)) < 0) { 963 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): " 964 "fcntl(F_GETFL) failed: %s", strerror(errno)); 965 break; 966 } 967 968 if ((fcntl(sock, F_SETFL, flags | O_NONBLOCK)) < 0) { 969 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): " 970 "fcntl(F_SETFL) failed: %s", strerror(errno)); 971 break; 972 } 973 974 if ((eid = iu_register_event(vrrpd_eh, sock, POLLIN, 975 vrrpd_rtsock_handler, &(vrrpd_rtsocks[i].vrt_af))) == -1) { 976 vrrp_log(VRRP_ERR, "vrrpd_rtsock_create(): register " 977 "rtsock %d(%s) failed", sock, 978 af_str(vrrpd_rtsocks[i].vrt_af)); 979 break; 980 } 981 982 vrrpd_rtsocks[i].vrt_fd = sock; 983 vrrpd_rtsocks[i].vrt_eid = eid; 984 } 985 986 if (i != 2) { 987 (void) close(sock); 988 vrrpd_rtsock_destroy(); 989 return (VRRP_ESYS); 990 } 991 992 return (VRRP_SUCCESS); 993 } 994 995 static void 996 vrrpd_rtsock_destroy() 997 { 998 int i; 999 1000 vrrp_log(VRRP_DBG0, "vrrpd_rtsock_destroy()"); 1001 for (i = 0; i < 2; i++) { 1002 (void) iu_unregister_event(vrrpd_eh, vrrpd_rtsocks[i].vrt_eid, 1003 NULL); 1004 (void) close(vrrpd_rtsocks[i].vrt_fd); 1005 vrrpd_rtsocks[i].vrt_eid = -1; 1006 vrrpd_rtsocks[i].vrt_fd = -1; 1007 } 1008 } 1009 1010 /* 1011 * Create the VRRP control socket used to bring up/down the virtual 1012 * IP addresses. It is also used to set the IFF_NOACCEPT flag of 1013 * the virtual IP addresses. 1014 */ 1015 static vrrp_err_t 1016 vrrpd_ctlsock_create() 1017 { 1018 int s, s6; 1019 int on = _B_TRUE; 1020 1021 if ((s = socket(AF_INET, SOCK_DGRAM, 0)) < 0) { 1022 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET) " 1023 "failed: %s", strerror(errno)); 1024 return (VRRP_ESYS); 1025 } 1026 if (setsockopt(s, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) { 1027 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): " 1028 "setsockopt(INET, SO_VRRP) failed: %s", strerror(errno)); 1029 (void) close(s); 1030 return (VRRP_ESYS); 1031 } 1032 1033 if ((s6 = socket(AF_INET6, SOCK_DGRAM, 0)) < 0) { 1034 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): socket(INET6) " 1035 "failed: %s", strerror(errno)); 1036 (void) close(s); 1037 return (VRRP_ESYS); 1038 } 1039 if (setsockopt(s6, SOL_SOCKET, SO_VRRP, &on, sizeof (on)) < 0) { 1040 vrrp_log(VRRP_ERR, "vrrpd_ctlsock_create(): " 1041 "setsockopt(INET6, SO_VRRP) failed: %s", strerror(errno)); 1042 (void) close(s); 1043 (void) close(s6); 1044 return (VRRP_ESYS); 1045 } 1046 1047 vrrpd_ctlsock_fd = s; 1048 vrrpd_ctlsock6_fd = s6; 1049 return (VRRP_SUCCESS); 1050 } 1051 1052 static void 1053 vrrpd_ctlsock_destroy() 1054 { 1055 (void) close(vrrpd_ctlsock_fd); 1056 vrrpd_ctlsock_fd = -1; 1057 (void) close(vrrpd_ctlsock6_fd); 1058 vrrpd_ctlsock6_fd = -1; 1059 } 1060 1061 /*ARGSUSED*/ 1062 static void 1063 vrrpd_cmd_create(void *arg1, void *arg2, size_t *arg2_sz) 1064 { 1065 vrrp_cmd_create_t *cmd = (vrrp_cmd_create_t *)arg1; 1066 vrrp_ret_create_t *ret = (vrrp_ret_create_t *)arg2; 1067 vrrp_err_t err; 1068 1069 err = vrrpd_create(&cmd->vcc_conf, _B_TRUE); 1070 if (err == VRRP_SUCCESS && cmd->vcc_conf.vvc_enabled) { 1071 /* 1072 * No need to update the configuration since it is already 1073 * done in the above vrrpd_create() call 1074 */ 1075 err = vrrpd_enable(cmd->vcc_conf.vvc_name, _B_FALSE); 1076 if (err != VRRP_SUCCESS) 1077 (void) vrrpd_delete(cmd->vcc_conf.vvc_name); 1078 } 1079 ret->vrc_err = err; 1080 } 1081 1082 /*ARGSUSED*/ 1083 static void 1084 vrrpd_cmd_delete(void *arg1, void *arg2, size_t *arg2_sz) 1085 { 1086 vrrp_cmd_delete_t *cmd = (vrrp_cmd_delete_t *)arg1; 1087 vrrp_ret_delete_t *ret = (vrrp_ret_delete_t *)arg2; 1088 1089 ret->vrd_err = vrrpd_delete(cmd->vcd_name); 1090 } 1091 1092 /*ARGSUSED*/ 1093 static void 1094 vrrpd_cmd_enable(void *arg1, void *arg2, size_t *arg2_sz) 1095 { 1096 vrrp_cmd_enable_t *cmd = (vrrp_cmd_enable_t *)arg1; 1097 vrrp_ret_enable_t *ret = (vrrp_ret_enable_t *)arg2; 1098 1099 ret->vrs_err = vrrpd_enable(cmd->vcs_name, _B_TRUE); 1100 } 1101 1102 /*ARGSUSED*/ 1103 static void 1104 vrrpd_cmd_disable(void *arg1, void *arg2, size_t *arg2_sz) 1105 { 1106 vrrp_cmd_disable_t *cmd = (vrrp_cmd_disable_t *)arg1; 1107 vrrp_ret_disable_t *ret = (vrrp_ret_disable_t *)arg2; 1108 1109 ret->vrx_err = vrrpd_disable(cmd->vcx_name); 1110 } 1111 1112 /*ARGSUSED*/ 1113 static void 1114 vrrpd_cmd_modify(void *arg1, void *arg2, size_t *arg2_sz) 1115 { 1116 vrrp_cmd_modify_t *cmd = (vrrp_cmd_modify_t *)arg1; 1117 vrrp_ret_modify_t *ret = (vrrp_ret_modify_t *)arg2; 1118 1119 ret->vrm_err = vrrpd_modify(&cmd->vcm_conf, cmd->vcm_mask); 1120 } 1121 1122 static void 1123 vrrpd_cmd_query(void *arg1, void *arg2, size_t *arg2_sz) 1124 { 1125 vrrp_cmd_query_t *cmd = (vrrp_cmd_query_t *)arg1; 1126 1127 vrrpd_query(cmd->vcq_name, arg2, arg2_sz); 1128 } 1129 1130 static void 1131 vrrpd_cmd_list(void *arg1, void *arg2, size_t *arg2_sz) 1132 { 1133 vrrp_cmd_list_t *cmd = (vrrp_cmd_list_t *)arg1; 1134 1135 vrrpd_list(cmd->vcl_vrid, cmd->vcl_ifname, cmd->vcl_af, arg2, arg2_sz); 1136 } 1137 1138 /* 1139 * Write-type requeset must have the solaris.network.vrrp authorization. 1140 */ 1141 static boolean_t 1142 vrrp_auth_check(int connfd, vrrp_cmd_info_t *cinfo) 1143 { 1144 ucred_t *cred = NULL; 1145 uid_t uid; 1146 struct passwd *pw; 1147 boolean_t success = _B_FALSE; 1148 1149 vrrp_log(VRRP_DBG0, "vrrp_auth_check()"); 1150 1151 if (!cinfo->vi_setop) 1152 return (_B_TRUE); 1153 1154 /* 1155 * Validate the credential 1156 */ 1157 if (getpeerucred(connfd, &cred) == (uid_t)-1) { 1158 vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpeerucred() " 1159 "failed: %s", strerror(errno)); 1160 return (_B_FALSE); 1161 } 1162 1163 if ((uid = ucred_getruid((const ucred_t *)cred)) == (uid_t)-1) { 1164 vrrp_log(VRRP_ERR, "vrrp_auth_check(): ucred_getruid() " 1165 "failed: %s", strerror(errno)); 1166 goto done; 1167 } 1168 1169 if ((pw = getpwuid(uid)) == NULL) { 1170 vrrp_log(VRRP_ERR, "vrrp_auth_check(): getpwuid() failed"); 1171 goto done; 1172 } 1173 1174 success = (chkauthattr("solaris.network.vrrp", pw->pw_name) == 1); 1175 1176 done: 1177 ucred_free(cred); 1178 return (success); 1179 } 1180 1181 /* 1182 * Process the administrative request from libvrrpadm 1183 */ 1184 /* ARGSUSED */ 1185 static void 1186 vrrpd_cmdsock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id, 1187 void *arg) 1188 { 1189 vrrp_cmd_info_t *cinfo = NULL; 1190 vrrp_err_t err = VRRP_SUCCESS; 1191 uchar_t buf[BUFFSIZE], ackbuf[BUFFSIZE]; 1192 size_t cursize, acksize, len; 1193 uint32_t cmd; 1194 int connfd, i; 1195 struct sockaddr_in from; 1196 socklen_t fromlen; 1197 1198 vrrp_log(VRRP_DBG0, "vrrpd_cmdsock_handler()"); 1199 1200 fromlen = (socklen_t)sizeof (from); 1201 if ((connfd = accept(s, (struct sockaddr *)&from, &fromlen)) < 0) { 1202 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() accept(): %s", 1203 strerror(errno)); 1204 return; 1205 } 1206 1207 /* 1208 * First get the type of the request 1209 */ 1210 cursize = 0; 1211 while (cursize < sizeof (uint32_t)) { 1212 len = read(connfd, buf + cursize, 1213 sizeof (uint32_t) - cursize); 1214 if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) { 1215 continue; 1216 } else if (len > 0) { 1217 cursize += len; 1218 continue; 1219 } 1220 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message " 1221 "length"); 1222 (void) close(connfd); 1223 return; 1224 } 1225 1226 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1227 cmd = ((vrrp_cmd_t *)buf)->vc_cmd; 1228 for (i = 0; i < VRRP_DOOR_INFO_TABLE_SIZE; i++) { 1229 if (vrrp_cmd_info_tbl[i].vi_cmd == cmd) { 1230 cinfo = vrrp_cmd_info_tbl + i; 1231 break; 1232 } 1233 } 1234 1235 if (cinfo == NULL) { 1236 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid request " 1237 "type %d", cmd); 1238 err = VRRP_EINVAL; 1239 goto done; 1240 } 1241 1242 /* 1243 * Get the rest of the request. 1244 */ 1245 assert(cursize == sizeof (uint32_t)); 1246 while (cursize < cinfo->vi_reqsize) { 1247 len = read(connfd, buf + cursize, 1248 cinfo->vi_reqsize - cursize); 1249 if (len == (size_t)-1 && (errno == EAGAIN || errno == EINTR)) { 1250 continue; 1251 } else if (len > 0) { 1252 cursize += len; 1253 continue; 1254 } 1255 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): invalid message " 1256 "length"); 1257 err = VRRP_EINVAL; 1258 goto done; 1259 } 1260 1261 /* 1262 * Validate the authorization 1263 */ 1264 if (!vrrp_auth_check(connfd, cinfo)) { 1265 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler(): " 1266 "not sufficient authorization"); 1267 err = VRRP_EPERM; 1268 } 1269 1270 done: 1271 /* 1272 * Ack the request 1273 */ 1274 if (err != 0) { 1275 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1276 ((vrrp_ret_t *)ackbuf)->vr_err = err; 1277 acksize = sizeof (vrrp_ret_t); 1278 } else { 1279 /* 1280 * If the size of ack is varied, the cmdfunc callback 1281 * will set the right size. 1282 */ 1283 if ((acksize = cinfo->vi_acksize) == 0) 1284 acksize = sizeof (ackbuf); 1285 1286 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1287 cinfo->vi_cmdfunc((vrrp_cmd_t *)buf, ackbuf, &acksize); 1288 } 1289 1290 /* 1291 * Send the ack back. 1292 */ 1293 cursize = 0; 1294 while (cursize < acksize) { 1295 len = sendto(connfd, ackbuf + cursize, acksize - cursize, 1296 0, (struct sockaddr *)&from, fromlen); 1297 if (len == (size_t)-1 && errno == EAGAIN) { 1298 continue; 1299 } else if (len > 0) { 1300 cursize += len; 1301 continue; 1302 } else { 1303 vrrp_log(VRRP_ERR, "vrrpd_cmdsock_handler() failed to " 1304 "ack: %s", strerror(errno)); 1305 break; 1306 } 1307 } 1308 1309 (void) shutdown(connfd, SHUT_RDWR); 1310 (void) close(connfd); 1311 } 1312 1313 /* 1314 * Process the routing socket messages and update the interfaces/IP addresses 1315 * list 1316 */ 1317 /* ARGSUSED */ 1318 static void 1319 vrrpd_rtsock_handler(iu_eh_t *eh, int s, short events, 1320 iu_event_id_t id, void *arg) 1321 { 1322 char buf[BUFFSIZE]; 1323 struct ifa_msghdr *ifam; 1324 int nbytes; 1325 int af = *(int *)arg; 1326 boolean_t scanif = _B_FALSE; 1327 1328 for (;;) { 1329 nbytes = read(s, buf, sizeof (buf)); 1330 if (nbytes <= 0) { 1331 /* No more messages */ 1332 break; 1333 } 1334 1335 /* LINTED E_BAD_PTR_CAST_ALIGN */ 1336 ifam = (struct ifa_msghdr *)buf; 1337 if (ifam->ifam_version != RTM_VERSION) { 1338 vrrp_log(VRRP_ERR, "vrrpd_rtsock_handler(): version %d " 1339 "not understood", ifam->ifam_version); 1340 break; 1341 } 1342 1343 vrrp_log(VRRP_DBG0, "vrrpd_rtsock_handler(): recv %s event", 1344 rtm_event2str(ifam->ifam_type)); 1345 1346 switch (ifam->ifam_type) { 1347 case RTM_FREEADDR: 1348 case RTM_CHGADDR: 1349 case RTM_NEWADDR: 1350 case RTM_DELADDR: 1351 /* 1352 * An IP address has been created/updated/deleted or 1353 * brought up/down, re-initilialize the interface/IP 1354 * address list. 1355 */ 1356 scanif = _B_TRUE; 1357 break; 1358 default: 1359 /* Not interesting */ 1360 break; 1361 } 1362 } 1363 1364 if (scanif) 1365 vrrpd_scan(af); 1366 } 1367 1368 /* 1369 * Periodically scan the interface/IP addresses on the system. 1370 */ 1371 /* ARGSUSED */ 1372 static void 1373 vrrpd_scan_timer(iu_tq_t *tq, void *arg) 1374 { 1375 vrrp_log(VRRP_DBG0, "vrrpd_scan_timer()"); 1376 vrrpd_scan(AF_INET); 1377 vrrpd_scan(AF_INET6); 1378 } 1379 1380 /* 1381 * Get the list of the interface/IP addresses of the specified address 1382 * family. 1383 */ 1384 static void 1385 vrrpd_scan(int af) 1386 { 1387 vrrp_log(VRRP_DBG0, "vrrpd_scan(%s)", af_str(af)); 1388 1389 again: 1390 vrrpd_init_ipcache(af); 1391 1392 /* 1393 * If interface index changes, walk again. 1394 */ 1395 if (icfg_iterate_if(af, ICFG_PLUMBED, NULL, 1396 vrrpd_walk_ipaddr) != ICFG_SUCCESS) 1397 goto again; 1398 1399 vrrpd_update_ipcache(af); 1400 } 1401 1402 /* 1403 * First mark all IP addresses of the specific address family to be removed. 1404 * This flag will then be cleared when we walk up all the IP addresses. 1405 */ 1406 static void 1407 vrrpd_init_ipcache(int af) 1408 { 1409 vrrp_intf_t *intf, *next_intf; 1410 vrrp_ip_t *ip, *nextip; 1411 char abuf[INET6_ADDRSTRLEN]; 1412 1413 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s)", af_str(af)); 1414 1415 next_intf = TAILQ_FIRST(&vrrp_intf_list); 1416 while ((intf = next_intf) != NULL) { 1417 next_intf = TAILQ_NEXT(intf, vvi_next); 1418 if (intf->vvi_af != af) 1419 continue; 1420 1421 /* 1422 * If the interface is still marked as new, it means that this 1423 * vrrpd_init_ipcache() call is a result of ifindex change, 1424 * which causes the re-walk of all the interfaces (see 1425 * vrrpd_add_ipaddr()), and some interfaces are still marked 1426 * as new during the last walk. In this case, delete this 1427 * interface with the "update_vr" argument to be _B_FALSE, 1428 * since no VRRP router has been assoicated with this 1429 * interface yet (the association is done in 1430 * vrrpd_update_ipcache()). 1431 * 1432 * This interface will be re-added later if it still exists. 1433 */ 1434 if (intf->vvi_state == NODE_STATE_NEW) { 1435 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove %s " 1436 "(%d), may be added later", intf->vvi_ifname, 1437 intf->vvi_ifindex); 1438 vrrpd_delete_if(intf, _B_FALSE); 1439 continue; 1440 } 1441 1442 for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL; 1443 ip = nextip) { 1444 nextip = TAILQ_NEXT(ip, vip_next); 1445 /* LINTED E_CONSTANT_CONDITION */ 1446 VRRPADDR2STR(af, &ip->vip_addr, abuf, 1447 INET6_ADDRSTRLEN, _B_FALSE); 1448 1449 if (ip->vip_state != NODE_STATE_NEW) { 1450 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(%s/%d, " 1451 "%s(%s/0x%x))", intf->vvi_ifname, 1452 intf->vvi_ifindex, ip->vip_lifname, 1453 abuf, ip->vip_flags); 1454 ip->vip_state = NODE_STATE_STALE; 1455 continue; 1456 } 1457 1458 /* 1459 * If the IP is still marked as new, it means that 1460 * this vrrpd_init_ipcache() call is a result of 1461 * ifindex change, which causes the re-walk of all 1462 * the IP addresses (see vrrpd_add_ipaddr()). 1463 * Delete this IP. 1464 * 1465 * This IP will be readded later if it still exists. 1466 */ 1467 vrrp_log(VRRP_DBG0, "vrrpd_init_ipcache(): remove " 1468 "%s/%d , %s(%s)", intf->vvi_ifname, 1469 intf->vvi_ifindex, ip->vip_lifname, abuf); 1470 vrrpd_delete_ip(intf, ip); 1471 } 1472 } 1473 } 1474 1475 /* 1476 * Walk all the IP addresses on the given interface and update its 1477 * addresses list. Return ICFG_FAILURE if it is required to walk 1478 * all the interfaces again (one of the interface index changes in between). 1479 */ 1480 /* ARGSUSED */ 1481 static int 1482 vrrpd_walk_ipaddr(icfg_if_t *intf, void *arg) 1483 { 1484 icfg_handle_t ih; 1485 int ifindex; 1486 vrrp_addr_t addr; 1487 socklen_t addrlen = (socklen_t)sizeof (struct sockaddr_in6); 1488 int prefixlen; 1489 uint64_t flags; 1490 int err = ICFG_SUCCESS; 1491 1492 vrrp_log(VRRP_DBG0, "vrrpd_walk_ipaddr(%s, %s)", intf->if_name, 1493 af_str(intf->if_protocol)); 1494 1495 if (icfg_open(&ih, intf) != ICFG_SUCCESS) { 1496 vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s): icfg_open() " 1497 "failed: %s", intf->if_name, af_str(intf->if_protocol), 1498 strerror(errno)); 1499 return (err); 1500 } 1501 1502 if (icfg_get_flags(ih, &flags) != ICFG_SUCCESS) { 1503 if (errno != ENXIO && errno != ENOENT) { 1504 vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s): " 1505 "icfg_get_flags() failed %s", intf->if_name, 1506 af_str(intf->if_protocol), strerror(errno)); 1507 } 1508 goto done; 1509 } 1510 1511 /* 1512 * skip virtual/IPMP/P2P interfaces. 1513 */ 1514 if ((flags & (IFF_VIRTUAL|IFF_IPMP|IFF_POINTOPOINT)) != 0) { 1515 vrrp_log(VRRP_DBG0, "vrrpd_walk_ipaddr(%s, %s) skipped", 1516 intf->if_name, af_str(intf->if_protocol)); 1517 goto done; 1518 } 1519 1520 if (icfg_get_index(ih, &ifindex) != ICFG_SUCCESS) { 1521 if (errno != ENXIO && errno != ENOENT) { 1522 vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s) " 1523 "icfg_get_index() failed: %s", intf->if_name, 1524 af_str(intf->if_protocol), strerror(errno)); 1525 } 1526 goto done; 1527 } 1528 1529 if (icfg_get_addr(ih, (struct sockaddr *)&addr, &addrlen, 1530 &prefixlen, _B_FALSE) != ICFG_SUCCESS) { 1531 if (errno != ENXIO && errno != ENOENT) { 1532 vrrp_log(VRRP_ERR, "vrrpd_walk_ipaddr(%s, %s) " 1533 "icfg_get_addr() failed: %s", intf->if_name, 1534 af_str(intf->if_protocol), strerror(errno)); 1535 } 1536 goto done; 1537 } 1538 1539 /* 1540 * Filter out the all-zero IP address. 1541 */ 1542 if (VRRPADDR_UNSPECIFIED(intf->if_protocol, &addr)) 1543 goto done; 1544 1545 /* 1546 * The interface is unplumbed/replumbed during we walk the IP 1547 * addresses. Try walk the IP addresses one more time. 1548 */ 1549 if (vrrpd_add_ipaddr(intf->if_name, intf->if_protocol, 1550 &addr, ifindex, flags) == VRRP_EAGAIN) 1551 err = ICFG_FAILURE; 1552 1553 done: 1554 icfg_close(ih); 1555 return (err); 1556 } 1557 1558 /* 1559 * Given the information of each IP address, update the interface and 1560 * IP addresses list 1561 */ 1562 static vrrp_err_t 1563 vrrpd_add_ipaddr(char *lifname, int af, vrrp_addr_t *addr, int ifindex, 1564 uint64_t flags) 1565 { 1566 char ifname[LIFNAMSIZ], *c; 1567 vrrp_intf_t *intf; 1568 vrrp_ip_t *ip; 1569 char abuf[INET6_ADDRSTRLEN]; 1570 vrrp_err_t err; 1571 1572 /* LINTED E_CONSTANT_CONDITION */ 1573 VRRPADDR2STR(af, addr, abuf, INET6_ADDRSTRLEN, _B_FALSE); 1574 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s, %d, 0x%x)", lifname, 1575 abuf, ifindex, flags); 1576 1577 /* 1578 * Get the physical interface name from the logical interface name. 1579 */ 1580 (void) strlcpy(ifname, lifname, sizeof (ifname)); 1581 if ((c = strchr(ifname, ':')) != NULL) 1582 *c = '\0'; 1583 1584 if ((intf = vrrpd_lookup_if(ifname, af)) == NULL) { 1585 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(): %s is new", ifname); 1586 err = vrrpd_create_if(ifname, af, ifindex, &intf); 1587 if (err != VRRP_SUCCESS) 1588 return (err); 1589 } else if (intf->vvi_ifindex != ifindex) { 1590 /* 1591 * If index changes, it means that this interface is 1592 * unplumbed/replumbed since we last checked. If this 1593 * interface is not used by any VRRP router, just 1594 * update its ifindex, and the IP addresses list will 1595 * be updated later. Otherwise, return EAGAIN to rewalk 1596 * all the IP addresses from the beginning. 1597 */ 1598 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s) ifindex changed ", 1599 "from %d to %d", ifname, intf->vvi_ifindex, ifindex); 1600 if (!IS_PRIMARY_INTF(intf) && !IS_VIRTUAL_INTF(intf)) { 1601 intf->vvi_ifindex = ifindex; 1602 } else { 1603 /* 1604 * delete this interface from the list if this 1605 * interface has already been assoicated with 1606 * any VRRP routers. 1607 */ 1608 vrrpd_delete_if(intf, _B_TRUE); 1609 return (VRRP_EAGAIN); 1610 } 1611 } 1612 1613 /* 1614 * Does this IP address already exist? 1615 */ 1616 TAILQ_FOREACH(ip, &intf->vvi_iplist, vip_next) { 1617 if (strcmp(ip->vip_lifname, lifname) == 0) 1618 break; 1619 } 1620 1621 if (ip != NULL) { 1622 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP exists", 1623 lifname, abuf); 1624 ip->vip_state = NODE_STATE_NONE; 1625 ip->vip_flags = flags; 1626 if (ipaddr_cmp(af, addr, &ip->vip_addr) != 0) { 1627 /* 1628 * Address has been changed, mark it as new 1629 * If this address is already selected as the 1630 * primary IP address, the new IP will be checked 1631 * to see whether it is still qualified as the 1632 * primary IP address. If not, the primary IP 1633 * address will be reselected. 1634 */ 1635 (void) memcpy(&ip->vip_addr, addr, 1636 sizeof (vrrp_addr_t)); 1637 1638 ip->vip_state = NODE_STATE_NEW; 1639 } 1640 } else { 1641 vrrp_log(VRRP_DBG0, "vrrpd_add_ipaddr(%s, %s) IP is new", 1642 lifname, abuf); 1643 1644 err = vrrpd_create_ip(intf, lifname, addr, flags); 1645 if (err != VRRP_SUCCESS) 1646 return (err); 1647 } 1648 return (VRRP_SUCCESS); 1649 } 1650 1651 /* 1652 * Update the interface and IP addresses list. Remove the ones that have been 1653 * staled since last time we walk the IP addresses and updated the ones that 1654 * have been changed. 1655 */ 1656 static void 1657 vrrpd_update_ipcache(int af) 1658 { 1659 vrrp_intf_t *intf, *nextif; 1660 vrrp_ip_t *ip, *nextip; 1661 char abuf[INET6_ADDRSTRLEN]; 1662 boolean_t primary_selected; 1663 boolean_t primary_now_selected; 1664 boolean_t need_reenable = _B_FALSE; 1665 1666 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(%s)", af_str(af)); 1667 1668 nextif = TAILQ_FIRST(&vrrp_intf_list); 1669 while ((intf = nextif) != NULL) { 1670 nextif = TAILQ_NEXT(intf, vvi_next); 1671 if (intf->vvi_af != af) 1672 continue; 1673 1674 /* 1675 * Does the interface already select its primary IP address? 1676 */ 1677 primary_selected = (intf->vvi_pip != NULL); 1678 assert(!primary_selected || IS_PRIMARY_INTF(intf)); 1679 1680 /* 1681 * Removed the IP addresses that have been unconfigured. 1682 */ 1683 for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL; 1684 ip = nextip) { 1685 nextip = TAILQ_NEXT(ip, vip_next); 1686 if (ip->vip_state != NODE_STATE_STALE) 1687 continue; 1688 1689 /* LINTED E_CONSTANT_CONDITION */ 1690 VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, 1691 _B_FALSE); 1692 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): IP %s " 1693 "is removed over %s", abuf, intf->vvi_ifname); 1694 vrrpd_delete_ip(intf, ip); 1695 } 1696 1697 /* 1698 * No IP addresses left, delete this interface. 1699 */ 1700 if (TAILQ_EMPTY(&intf->vvi_iplist)) { 1701 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): " 1702 "no IP left over %s", intf->vvi_ifname); 1703 vrrpd_delete_if(intf, _B_TRUE); 1704 continue; 1705 } 1706 1707 /* 1708 * If this is selected ss the physical interface for any 1709 * VRRP router, reselect the primary address if needed. 1710 */ 1711 if (IS_PRIMARY_INTF(intf)) { 1712 vrrpd_reselect_primary(intf); 1713 primary_now_selected = (intf->vvi_pip != NULL); 1714 1715 /* 1716 * Cannot find the new primary IP address. 1717 */ 1718 if (primary_selected && !primary_now_selected) { 1719 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache() " 1720 "reselect primary IP on %s failed", 1721 intf->vvi_ifname); 1722 vrrpd_remove_if(intf, _B_TRUE); 1723 } else if (!primary_selected && primary_now_selected) { 1724 /* 1725 * The primary IP address is successfully 1726 * selected on the physical interfacew we 1727 * need to walk through all the VRRP routers 1728 * that is created on this physical interface 1729 * and see whether they can now be enabled. 1730 */ 1731 need_reenable = _B_TRUE; 1732 } 1733 } 1734 1735 /* 1736 * For every new virtual IP address, bring up/down it based 1737 * on the state of VRRP router. 1738 * 1739 * Note that it is fine to not update the IP's vip_flags field 1740 * even if vrrpd_virtualip_updateone() changed the address's 1741 * up/down state, since the vip_flags field is only used for 1742 * select primary IP address over a physical interface, and 1743 * vrrpd_virtualip_updateone() only affects the virtual IP 1744 * address's status. 1745 */ 1746 for (ip = TAILQ_FIRST(&intf->vvi_iplist); ip != NULL; 1747 ip = nextip) { 1748 nextip = TAILQ_NEXT(ip, vip_next); 1749 /* LINTED E_CONSTANT_CONDITION */ 1750 VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, 1751 _B_FALSE); 1752 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): " 1753 "IP %s over %s%s", abuf, intf->vvi_ifname, 1754 ip->vip_state == NODE_STATE_NEW ? " is new" : ""); 1755 1756 if (IS_VIRTUAL_INTF(intf)) { 1757 /* 1758 * If this IP is new, update its up/down state 1759 * based on the virtual interface's state 1760 * (which is determined by the VRRP router's 1761 * state). Otherwise, check only and prompt 1762 * warnings if its up/down state has been 1763 * changed. 1764 */ 1765 if (vrrpd_virtualip_updateone(intf, ip, 1766 ip->vip_state == NODE_STATE_NONE) != 1767 VRRP_SUCCESS) { 1768 vrrp_log(VRRP_DBG0, 1769 "vrrpd_update_ipcache(): " 1770 "IP %s over %s update failed", abuf, 1771 intf->vvi_ifname); 1772 vrrpd_delete_ip(intf, ip); 1773 continue; 1774 } 1775 } 1776 ip->vip_state = NODE_STATE_NONE; 1777 } 1778 1779 /* 1780 * The IP address is deleted when it is failed to be brought 1781 * up. If no IP addresses are left, delete this interface. 1782 */ 1783 if (TAILQ_EMPTY(&intf->vvi_iplist)) { 1784 vrrp_log(VRRP_DBG0, "vrrpd_update_ipcache(): " 1785 "no IP left over %s", intf->vvi_ifname); 1786 vrrpd_delete_if(intf, _B_TRUE); 1787 continue; 1788 } 1789 1790 if (intf->vvi_state == NODE_STATE_NEW) { 1791 /* 1792 * A new interface is found. This interface can be 1793 * the primary interface or the virtual VNIC 1794 * interface. Again, we need to walk throught all 1795 * the VRRP routers to see whether some of them can 1796 * now be enabled because of the new primary IP 1797 * address or the new virtual IP addresses. 1798 */ 1799 intf->vvi_state = NODE_STATE_NONE; 1800 need_reenable = _B_TRUE; 1801 } 1802 } 1803 1804 if (need_reenable) 1805 vrrpd_reenable_all_vr(); 1806 } 1807 1808 /* 1809 * Reselect primary IP if: 1810 * - The existing primary IP is no longer qualified (removed or it is down or 1811 * not a link-local IP for IPv6 VRRP router); 1812 * - This is a physical interface but no primary IP is chosen; 1813 */ 1814 static void 1815 vrrpd_reselect_primary(vrrp_intf_t *intf) 1816 { 1817 vrrp_ip_t *ip; 1818 char abuf[INET6_ADDRSTRLEN]; 1819 1820 assert(IS_PRIMARY_INTF(intf)); 1821 1822 /* 1823 * If the interface's old primary IP address is still valid, return 1824 */ 1825 if (((ip = intf->vvi_pip) != NULL) && (QUALIFY_PRIMARY_ADDR(intf, ip))) 1826 return; 1827 1828 if (ip != NULL) { 1829 /* LINTED E_CONSTANT_CONDITION */ 1830 VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf, 1831 sizeof (abuf), _B_FALSE); 1832 vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s " 1833 "is no longer qualified", intf->vvi_ifname, abuf); 1834 } 1835 1836 ip = vrrpd_select_primary(intf); 1837 intf->vvi_pip = ip; 1838 1839 if (ip != NULL) { 1840 /* LINTED E_CONSTANT_CONDITION */ 1841 VRRPADDR2STR(intf->vvi_af, &ip->vip_addr, abuf, 1842 sizeof (abuf), _B_FALSE); 1843 vrrp_log(VRRP_DBG0, "vrrpd_reselect_primary(%s): primary IP %s " 1844 "is selected", intf->vvi_ifname, abuf); 1845 } 1846 } 1847 1848 /* 1849 * Select the primary IP address. Since the link-local IP address is always 1850 * at the head of the IP address list, try to find the first UP IP address 1851 * and see whether it qualify. 1852 */ 1853 static vrrp_ip_t * 1854 vrrpd_select_primary(vrrp_intf_t *pif) 1855 { 1856 vrrp_ip_t *pip; 1857 char abuf[INET6_ADDRSTRLEN]; 1858 1859 vrrp_log(VRRP_DBG1, "vrrpd_select_primary(%s)", pif->vvi_ifname); 1860 1861 TAILQ_FOREACH(pip, &pif->vvi_iplist, vip_next) { 1862 assert(pip->vip_state != NODE_STATE_STALE); 1863 1864 /* LINTED E_CONSTANT_CONDITION */ 1865 VRRPADDR2STR(pif->vvi_af, &pip->vip_addr, abuf, 1866 INET6_ADDRSTRLEN, _B_FALSE); 1867 vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s): %s is %s", 1868 pif->vvi_ifname, abuf, 1869 (pip->vip_flags & IFF_UP) ? "up" : "down"); 1870 1871 if (pip->vip_flags & IFF_UP) 1872 break; 1873 } 1874 1875 /* 1876 * Is this valid primary IP address? 1877 */ 1878 if (pip == NULL || !QUALIFY_PRIMARY_ADDR(pif, pip)) { 1879 vrrp_log(VRRP_DBG0, "vrrpd_select_primary(%s/%s) failed", 1880 pif->vvi_ifname, af_str(pif->vvi_af)); 1881 return (NULL); 1882 } 1883 return (pip); 1884 } 1885 1886 /* 1887 * This is a new interface. Check whether any VRRP router is waiting for it 1888 */ 1889 static void 1890 vrrpd_reenable_all_vr() 1891 { 1892 vrrp_vr_t *vr; 1893 1894 vrrp_log(VRRP_DBG0, "vrrpd_reenable_all_vr()"); 1895 1896 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) { 1897 if (vr->vvr_conf.vvc_enabled) 1898 (void) vrrpd_enable_vr(vr); 1899 } 1900 } 1901 1902 /* 1903 * If primary_addr_gone is _B_TRUE, it means that we failed to select 1904 * the primary IP address on this (physical) interface; otherwise, 1905 * it means the interface is no longer available. 1906 */ 1907 static void 1908 vrrpd_remove_if(vrrp_intf_t *intf, boolean_t primary_addr_gone) 1909 { 1910 vrrp_vr_t *vr; 1911 1912 vrrp_log(VRRP_DBG0, "vrrpd_remove_if(%s): %s", intf->vvi_ifname, 1913 primary_addr_gone ? "primary address gone" : "interface deleted"); 1914 1915 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) { 1916 if (vr->vvr_conf.vvc_enabled) 1917 vrrpd_disable_vr(vr, intf, primary_addr_gone); 1918 } 1919 } 1920 1921 /* 1922 * Update the VRRP configuration file based on the given configuration. 1923 * op is either VRRP_CONF_UPDATE or VRRP_CONF_DELETE 1924 */ 1925 static vrrp_err_t 1926 vrrpd_updateconf(vrrp_vr_conf_t *newconf, uint_t op) 1927 { 1928 vrrp_vr_conf_t conf; 1929 FILE *fp, *nfp; 1930 int nfd; 1931 char line[LINE_MAX]; 1932 char newfile[MAXPATHLEN]; 1933 boolean_t found = _B_FALSE; 1934 vrrp_err_t err = VRRP_SUCCESS; 1935 1936 vrrp_log(VRRP_DBG0, "vrrpd_updateconf(%s, %s)", newconf->vvc_name, 1937 op == VRRP_CONF_UPDATE ? "update" : "delete"); 1938 1939 if ((fp = fopen(vrrpd_conffile, "r+F")) == NULL) { 1940 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s", 1941 vrrpd_conffile, strerror(errno)); 1942 return (VRRP_EDB); 1943 } 1944 1945 (void) snprintf(newfile, MAXPATHLEN, "%s.new", vrrpd_conffile); 1946 if ((nfd = open(newfile, O_WRONLY | O_CREAT | O_TRUNC, 1947 S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH)) < 0) { 1948 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): open %s failed: %s", 1949 newfile, strerror(errno)); 1950 (void) fclose(fp); 1951 return (VRRP_EDB); 1952 } 1953 1954 if ((nfp = fdopen(nfd, "wF")) == NULL) { 1955 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): fdopen(%s) failed: %s", 1956 newfile, strerror(errno)); 1957 goto done; 1958 } 1959 1960 while (fgets(line, sizeof (line), fp) != NULL) { 1961 conf.vvc_vrid = VRRP_VRID_NONE; 1962 if (!found && (err = vrrpd_read_vrconf(line, &conf)) != 1963 VRRP_SUCCESS) { 1964 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): invalid " 1965 "configuration format: %s", line); 1966 goto done; 1967 } 1968 1969 /* 1970 * Write this line out if: 1971 * - this is a comment line; or 1972 * - we've done updating/deleting the the given VR; or 1973 * - if the name of the VR read from this line does not match 1974 * the VR name that we are about to update/delete; 1975 */ 1976 if (found || conf.vvc_vrid == VRRP_VRID_NONE || 1977 strcmp(conf.vvc_name, newconf->vvc_name) != 0) { 1978 if (fputs(line, nfp) != EOF) 1979 continue; 1980 1981 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to " 1982 "write line %s", line); 1983 err = VRRP_EDB; 1984 goto done; 1985 } 1986 1987 /* 1988 * Otherwise, update/skip the line. 1989 */ 1990 found = _B_TRUE; 1991 if (op == VRRP_CONF_DELETE) 1992 continue; 1993 1994 assert(op == VRRP_CONF_UPDATE); 1995 if ((err = vrrpd_write_vrconf(line, sizeof (line), 1996 newconf)) != VRRP_SUCCESS) { 1997 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to " 1998 "update configuration for %s", newconf->vvc_name); 1999 goto done; 2000 } 2001 if (fputs(line, nfp) == EOF) { 2002 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to " 2003 "write line %s", line); 2004 err = VRRP_EDB; 2005 goto done; 2006 } 2007 } 2008 2009 /* 2010 * If we get to the end of the file and have not seen the router that 2011 * we are about to update, write it out. 2012 */ 2013 if (!found && op == VRRP_CONF_UPDATE) { 2014 if ((err = vrrpd_write_vrconf(line, sizeof (line), 2015 newconf)) == VRRP_SUCCESS && fputs(line, nfp) == EOF) { 2016 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to " 2017 "write line %s", line); 2018 err = VRRP_EDB; 2019 } 2020 } else if (!found && op == VRRP_CONF_DELETE) { 2021 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to find " 2022 "configuation for %s", newconf->vvc_name); 2023 err = VRRP_ENOTFOUND; 2024 } 2025 2026 if (err != VRRP_SUCCESS) 2027 goto done; 2028 2029 if (fflush(nfp) == EOF || rename(newfile, vrrpd_conffile) < 0) { 2030 vrrp_log(VRRP_ERR, "vrrpd_updateconf(): failed to " 2031 "rename file %s", newfile); 2032 err = VRRP_EDB; 2033 } 2034 2035 done: 2036 (void) fclose(fp); 2037 (void) fclose(nfp); 2038 (void) unlink(newfile); 2039 return (err); 2040 } 2041 2042 static vrrp_err_t 2043 vrrpd_write_vrconf(char *line, size_t len, vrrp_vr_conf_t *conf) 2044 { 2045 vrrp_prop_t *prop; 2046 int n, i; 2047 2048 vrrp_log(VRRP_DBG0, "vrrpd_write_vrconf(%s)", conf->vvc_name); 2049 2050 for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) { 2051 prop = &vrrp_prop_info_tbl[i]; 2052 n = snprintf(line, len, i == 0 ? "%s=" : " %s=", 2053 prop->vs_propname); 2054 if (n < 0 || n >= len) 2055 break; 2056 len -= n; 2057 line += n; 2058 n = prop->vs_propwrite(conf, line, len); 2059 if (n < 0 || n >= len) 2060 break; 2061 len -= n; 2062 line += n; 2063 } 2064 if (i != VRRP_PROP_INFO_TABSIZE) { 2065 vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too" 2066 "small", conf->vvc_name); 2067 return (VRRP_EDB); 2068 } 2069 n = snprintf(line, len, "\n"); 2070 if (n < 0 || n >= len) { 2071 vrrp_log(VRRP_ERR, "vrrpd_write_vrconf(%s): buffer size too" 2072 "small", conf->vvc_name); 2073 return (VRRP_EDB); 2074 } 2075 return (VRRP_SUCCESS); 2076 } 2077 2078 static vrrp_err_t 2079 vrrpd_read_vrconf(char *line, vrrp_vr_conf_t *conf) 2080 { 2081 char *str, *token; 2082 char *next; 2083 vrrp_err_t err = VRRP_SUCCESS; 2084 char tmpbuf[MAXLINELEN]; 2085 2086 str = tmpbuf; 2087 (void) strlcpy(tmpbuf, line, MAXLINELEN); 2088 2089 /* 2090 * Skip leading spaces, blank lines, and comments. 2091 */ 2092 skip_whitespace(str); 2093 if ((str - tmpbuf == strlen(tmpbuf)) || (*str == '#')) { 2094 conf->vvc_vrid = VRRP_VRID_NONE; 2095 return (VRRP_SUCCESS); 2096 } 2097 2098 /* 2099 * Read each VR properties. 2100 */ 2101 for (token = strtok_r(str, " \n\t", &next); token != NULL; 2102 token = strtok_r(NULL, " \n\t", &next)) { 2103 if ((err = vrrpd_readprop(token, conf)) != VRRP_SUCCESS) 2104 break; 2105 } 2106 2107 /* All properties read but no VRID defined */ 2108 if (err == VRRP_SUCCESS && conf->vvc_vrid == VRRP_VRID_NONE) 2109 err = VRRP_EINVAL; 2110 2111 return (err); 2112 } 2113 2114 static vrrp_err_t 2115 vrrpd_readprop(const char *str, vrrp_vr_conf_t *conf) 2116 { 2117 vrrp_prop_t *prop; 2118 char *pstr; 2119 int i; 2120 2121 if ((pstr = strchr(str, '=')) == NULL) { 2122 vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str); 2123 return (VRRP_EINVAL); 2124 } 2125 2126 *pstr++ = '\0'; 2127 for (i = 0; i < VRRP_PROP_INFO_TABSIZE; i++) { 2128 prop = &vrrp_prop_info_tbl[i]; 2129 if (strcasecmp(str, prop->vs_propname) == 0) { 2130 if (prop->vs_propread(conf, pstr)) 2131 break; 2132 } 2133 } 2134 2135 if (i == VRRP_PROP_INFO_TABSIZE) { 2136 vrrp_log(VRRP_ERR, "vrrpd_readprop(%s): invalid property", str); 2137 return (VRRP_EINVAL); 2138 } 2139 2140 return (VRRP_SUCCESS); 2141 } 2142 2143 static boolean_t 2144 vrrp_rd_prop_name(vrrp_vr_conf_t *conf, const char *str) 2145 { 2146 size_t size = sizeof (conf->vvc_name); 2147 return (strlcpy(conf->vvc_name, str, size) < size); 2148 } 2149 2150 static boolean_t 2151 vrrp_rd_prop_vrid(vrrp_vr_conf_t *conf, const char *str) 2152 { 2153 conf->vvc_vrid = strtol(str, NULL, 0); 2154 return (!(conf->vvc_vrid < VRRP_VRID_MIN || 2155 conf->vvc_vrid > VRRP_VRID_MAX || 2156 (conf->vvc_vrid == 0 && errno != 0))); 2157 } 2158 2159 static boolean_t 2160 vrrp_rd_prop_af(vrrp_vr_conf_t *conf, const char *str) 2161 { 2162 if (strcasecmp(str, "AF_INET") == 0) 2163 conf->vvc_af = AF_INET; 2164 else if (strcasecmp(str, "AF_INET6") == 0) 2165 conf->vvc_af = AF_INET6; 2166 else 2167 return (_B_FALSE); 2168 return (_B_TRUE); 2169 } 2170 2171 static boolean_t 2172 vrrp_rd_prop_pri(vrrp_vr_conf_t *conf, const char *str) 2173 { 2174 conf->vvc_pri = strtol(str, NULL, 0); 2175 return (!(conf->vvc_pri < VRRP_PRI_MIN || 2176 conf->vvc_pri > VRRP_PRI_OWNER || 2177 (conf->vvc_pri == 0 && errno != 0))); 2178 } 2179 2180 static boolean_t 2181 vrrp_rd_prop_adver_int(vrrp_vr_conf_t *conf, const char *str) 2182 { 2183 conf->vvc_adver_int = strtol(str, NULL, 0); 2184 return (!(conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN || 2185 conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX || 2186 (conf->vvc_adver_int == 0 && errno != 0))); 2187 } 2188 2189 static boolean_t 2190 vrrp_rd_prop_preempt(vrrp_vr_conf_t *conf, const char *str) 2191 { 2192 if (strcasecmp(str, "true") == 0) 2193 conf->vvc_preempt = _B_TRUE; 2194 else if (strcasecmp(str, "false") == 0) 2195 conf->vvc_preempt = _B_FALSE; 2196 else 2197 return (_B_FALSE); 2198 return (_B_TRUE); 2199 } 2200 2201 static boolean_t 2202 vrrp_rd_prop_accept(vrrp_vr_conf_t *conf, const char *str) 2203 { 2204 if (strcasecmp(str, "true") == 0) 2205 conf->vvc_accept = _B_TRUE; 2206 else if (strcasecmp(str, "false") == 0) 2207 conf->vvc_accept = _B_FALSE; 2208 else 2209 return (_B_FALSE); 2210 return (_B_TRUE); 2211 } 2212 2213 static boolean_t 2214 vrrp_rd_prop_enabled(vrrp_vr_conf_t *conf, const char *str) 2215 { 2216 if (strcasecmp(str, "enabled") == 0) 2217 conf->vvc_enabled = _B_TRUE; 2218 else if (strcasecmp(str, "disabled") == 0) 2219 conf->vvc_enabled = _B_FALSE; 2220 else 2221 return (_B_FALSE); 2222 return (_B_TRUE); 2223 } 2224 2225 static boolean_t 2226 vrrp_rd_prop_ifname(vrrp_vr_conf_t *conf, const char *str) 2227 { 2228 size_t size = sizeof (conf->vvc_link); 2229 return (strlcpy(conf->vvc_link, str, size) < size); 2230 } 2231 2232 static int 2233 vrrp_wt_prop_name(vrrp_vr_conf_t *conf, char *str, size_t size) 2234 { 2235 return (snprintf(str, size, "%s", conf->vvc_name)); 2236 } 2237 2238 static int 2239 vrrp_wt_prop_pri(vrrp_vr_conf_t *conf, char *str, size_t size) 2240 { 2241 return (snprintf(str, size, "%d", conf->vvc_pri)); 2242 } 2243 2244 static int 2245 vrrp_wt_prop_adver_int(vrrp_vr_conf_t *conf, char *str, size_t size) 2246 { 2247 return (snprintf(str, size, "%d", conf->vvc_adver_int)); 2248 } 2249 2250 static int 2251 vrrp_wt_prop_preempt(vrrp_vr_conf_t *conf, char *str, size_t size) 2252 { 2253 return (snprintf(str, size, "%s", 2254 conf->vvc_preempt ? "true" : "false")); 2255 } 2256 2257 static int 2258 vrrp_wt_prop_accept(vrrp_vr_conf_t *conf, char *str, size_t size) 2259 { 2260 return (snprintf(str, size, "%s", 2261 conf->vvc_accept ? "true" : "false")); 2262 } 2263 2264 static int 2265 vrrp_wt_prop_enabled(vrrp_vr_conf_t *conf, char *str, size_t size) 2266 { 2267 return (snprintf(str, size, "%s", 2268 conf->vvc_enabled ? "enabled" : "disabled")); 2269 } 2270 2271 static int 2272 vrrp_wt_prop_vrid(vrrp_vr_conf_t *conf, char *str, size_t size) 2273 { 2274 return (snprintf(str, size, "%d", conf->vvc_vrid)); 2275 } 2276 2277 static int 2278 vrrp_wt_prop_af(vrrp_vr_conf_t *conf, char *str, size_t size) 2279 { 2280 return (snprintf(str, size, "%s", 2281 conf->vvc_af == AF_INET ? "AF_INET" : "AF_INET6")); 2282 } 2283 2284 static int 2285 vrrp_wt_prop_ifname(vrrp_vr_conf_t *conf, char *str, size_t size) 2286 { 2287 return (snprintf(str, size, "%s", conf->vvc_link)); 2288 } 2289 2290 static char * 2291 af_str(int af) 2292 { 2293 if (af == 4 || af == AF_INET) 2294 return ("AF_INET"); 2295 else if (af == 6 || af == AF_INET6) 2296 return ("AF_INET6"); 2297 else if (af == AF_UNSPEC) 2298 return ("AF_UNSPEC"); 2299 else 2300 return ("AF_error"); 2301 } 2302 2303 static vrrp_err_t 2304 vrrpd_create_vr(vrrp_vr_conf_t *conf) 2305 { 2306 vrrp_vr_t *vr; 2307 2308 vrrp_log(VRRP_DBG0, "vrrpd_create_vr(%s)", conf->vvc_name); 2309 2310 if ((vr = malloc(sizeof (vrrp_vr_t))) == NULL) { 2311 vrrp_log(VRRP_ERR, "vrrpd_create_vr(): memory allocation for %s" 2312 " failed", conf->vvc_name); 2313 return (VRRP_ENOMEM); 2314 } 2315 2316 bzero(vr, sizeof (vrrp_vr_t)); 2317 vr->vvr_state = VRRP_STATE_NONE; 2318 vr->vvr_timer_id = -1; 2319 vrrpd_state_trans(VRRP_STATE_NONE, VRRP_STATE_INIT, vr); 2320 (void) memcpy(&vr->vvr_conf, conf, sizeof (vrrp_vr_conf_t)); 2321 vr->vvr_conf.vvc_enabled = _B_FALSE; 2322 TAILQ_INSERT_HEAD(&vrrp_vr_list, vr, vvr_next); 2323 return (VRRP_SUCCESS); 2324 } 2325 2326 static void 2327 vrrpd_delete_vr(vrrp_vr_t *vr) 2328 { 2329 vrrp_log(VRRP_DBG0, "vrrpd_delete_vr(%s)", vr->vvr_conf.vvc_name); 2330 if (vr->vvr_conf.vvc_enabled) 2331 vrrpd_disable_vr(vr, NULL, _B_FALSE); 2332 assert(vr->vvr_state == VRRP_STATE_INIT); 2333 vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_NONE, vr); 2334 TAILQ_REMOVE(&vrrp_vr_list, vr, vvr_next); 2335 (void) free(vr); 2336 } 2337 2338 static vrrp_err_t 2339 vrrpd_enable_vr(vrrp_vr_t *vr) 2340 { 2341 vrrp_err_t rx_err, tx_err, err = VRRP_EINVAL; 2342 2343 vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s)", vr->vvr_conf.vvc_name); 2344 2345 assert(vr->vvr_conf.vvc_enabled); 2346 2347 /* 2348 * This VRRP router has been successfully enabled and start 2349 * participating. 2350 */ 2351 if (vr->vvr_state != VRRP_STATE_INIT) 2352 return (VRRP_SUCCESS); 2353 2354 if ((rx_err = vrrpd_init_rxsock(vr)) == VRRP_SUCCESS) { 2355 /* 2356 * Select the primary IP address. Even if this time 2357 * primary IP selection failed, we will reselect the 2358 * primary IP address when new IP address comes up. 2359 */ 2360 vrrpd_reselect_primary(vr->vvr_pif); 2361 if (vr->vvr_pif->vvi_pip == NULL) { 2362 vrrp_log(VRRP_DBG0, "vrrpd_enable_vr(%s): " 2363 "select_primary over %s failed", 2364 vr->vvr_conf.vvc_name, vr->vvr_pif->vvi_ifname); 2365 rx_err = VRRP_ENOPRIM; 2366 } 2367 } 2368 2369 /* 2370 * Initialize the TX socket used for this vrrp_vr_t to send the 2371 * multicast packets. 2372 */ 2373 tx_err = vrrpd_init_txsock(vr); 2374 2375 /* 2376 * Only start the state transition if sockets for both RX and TX are 2377 * initialized correctly. 2378 */ 2379 if (rx_err != VRRP_SUCCESS || tx_err != VRRP_SUCCESS) { 2380 /* 2381 * Record the error information for diagnose purpose. 2382 */ 2383 vr->vvr_err = (rx_err == VRRP_SUCCESS) ? tx_err : rx_err; 2384 return (err); 2385 } 2386 2387 if (vr->vvr_conf.vvc_pri == 255) 2388 err = vrrpd_state_i2m(vr); 2389 else 2390 err = vrrpd_state_i2b(vr); 2391 2392 if (err != VRRP_SUCCESS) { 2393 vr->vvr_err = err; 2394 vr->vvr_pif->vvi_pip = NULL; 2395 vrrpd_fini_txsock(vr); 2396 vrrpd_fini_rxsock(vr); 2397 } 2398 return (err); 2399 } 2400 2401 /* 2402 * Given the removed interface, see whether the given VRRP router would 2403 * be affected and stop participating the VRRP protocol. 2404 * 2405 * If intf is NULL, VR disabling request is coming from the admin. 2406 */ 2407 static void 2408 vrrpd_disable_vr(vrrp_vr_t *vr, vrrp_intf_t *intf, boolean_t primary_addr_gone) 2409 { 2410 vrrp_log(VRRP_DBG0, "vrrpd_disable_vr(%s): %s%s", vr->vvr_conf.vvc_name, 2411 intf == NULL ? "requested by admin" : intf->vvi_ifname, 2412 intf == NULL ? "" : (primary_addr_gone ? "primary address gone" : 2413 "interface deleted")); 2414 2415 /* 2416 * An interface is deleted, see whether this interface is the 2417 * physical interface or the VNIC of the given VRRP router. 2418 * If so, continue to disable the VRRP router. 2419 */ 2420 if (!primary_addr_gone && (intf != NULL) && (intf != vr->vvr_pif) && 2421 (intf != vr->vvr_vif)) { 2422 return; 2423 } 2424 2425 /* 2426 * If this is the case that the primary IP address is gone, 2427 * and we failed to reselect another primary IP address, 2428 * continue to disable the VRRP router. 2429 */ 2430 if (primary_addr_gone && intf != vr->vvr_pif) 2431 return; 2432 2433 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabling", 2434 vr->vvr_conf.vvc_name); 2435 2436 if (vr->vvr_state == VRRP_STATE_MASTER) { 2437 /* 2438 * If this router is disabled by the administrator, send 2439 * the zero-priority advertisement to indicate the Master 2440 * stops participating VRRP. 2441 */ 2442 if (intf == NULL) 2443 (void) vrrpd_send_adv(vr, _B_TRUE); 2444 2445 vrrpd_state_m2i(vr); 2446 } else if (vr->vvr_state == VRRP_STATE_BACKUP) { 2447 vrrpd_state_b2i(vr); 2448 } 2449 2450 /* 2451 * If no primary IP address can be selected, the VRRP router 2452 * stays at the INIT state and will become BACKUP and MASTER when 2453 * a primary IP address is reselected. 2454 */ 2455 if (primary_addr_gone) { 2456 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): primary IP " 2457 "is removed", vr->vvr_conf.vvc_name); 2458 vr->vvr_err = VRRP_ENOPRIM; 2459 } else if (intf == NULL) { 2460 /* 2461 * The VRRP router is disable by the administrator 2462 */ 2463 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): disabled by admin", 2464 vr->vvr_conf.vvc_name); 2465 vr->vvr_err = VRRP_SUCCESS; 2466 vrrpd_fini_txsock(vr); 2467 vrrpd_fini_rxsock(vr); 2468 } else if (intf == vr->vvr_pif) { 2469 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): physical interface " 2470 "%s removed", vr->vvr_conf.vvc_name, intf->vvi_ifname); 2471 vr->vvr_err = VRRP_ENOPRIM; 2472 vrrpd_fini_rxsock(vr); 2473 } else if (intf == vr->vvr_vif) { 2474 vrrp_log(VRRP_DBG1, "vrrpd_disable_vr(%s): VNIC interface %s" 2475 " removed", vr->vvr_conf.vvc_name, intf->vvi_ifname); 2476 vr->vvr_err = VRRP_ENOVIRT; 2477 vrrpd_fini_txsock(vr); 2478 } 2479 } 2480 2481 vrrp_err_t 2482 vrrpd_create(vrrp_vr_conf_t *conf, boolean_t updateconf) 2483 { 2484 vrrp_err_t err = VRRP_SUCCESS; 2485 2486 vrrp_log(VRRP_DBG0, "vrrpd_create(%s, %s, %d)", conf->vvc_name, 2487 conf->vvc_link, conf->vvc_vrid); 2488 2489 assert(conf != NULL); 2490 2491 /* 2492 * Sanity check 2493 */ 2494 if ((strlen(conf->vvc_name) == 0) || 2495 (strlen(conf->vvc_link) == 0) || 2496 (conf->vvc_vrid < VRRP_VRID_MIN || 2497 conf->vvc_vrid > VRRP_VRID_MAX) || 2498 (conf->vvc_pri < VRRP_PRI_MIN || 2499 conf->vvc_pri > VRRP_PRI_OWNER) || 2500 (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN || 2501 conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) || 2502 (conf->vvc_af != AF_INET && conf->vvc_af != AF_INET6) || 2503 (conf->vvc_pri == VRRP_PRI_OWNER && !conf->vvc_accept)) { 2504 vrrp_log(VRRP_DBG1, "vrrpd_create(%s): invalid argument", 2505 conf->vvc_name); 2506 return (VRRP_EINVAL); 2507 } 2508 2509 if (!vrrp_valid_name(conf->vvc_name)) { 2510 vrrp_log(VRRP_DBG1, "vrrpd_create(): %s is not a valid router " 2511 "name", conf->vvc_name); 2512 return (VRRP_EINVALVRNAME); 2513 } 2514 2515 if (vrrpd_lookup_vr_by_name(conf->vvc_name) != NULL) { 2516 vrrp_log(VRRP_DBG1, "vrrpd_create(): %s already exists", 2517 conf->vvc_name); 2518 return (VRRP_EINSTEXIST); 2519 } 2520 2521 if (vrrpd_lookup_vr_by_vrid(conf->vvc_link, conf->vvc_vrid, 2522 conf->vvc_af) != NULL) { 2523 vrrp_log(VRRP_DBG1, "vrrpd_create(): VRID %d/%s over %s " 2524 "already exists", conf->vvc_vrid, af_str(conf->vvc_af), 2525 conf->vvc_link); 2526 return (VRRP_EVREXIST); 2527 } 2528 2529 if (updateconf && (err = vrrpd_updateconf(conf, 2530 VRRP_CONF_UPDATE)) != VRRP_SUCCESS) { 2531 vrrp_log(VRRP_ERR, "vrrpd_create(): failed to update " 2532 "configuration for %s", conf->vvc_name); 2533 return (err); 2534 } 2535 2536 err = vrrpd_create_vr(conf); 2537 if (err != VRRP_SUCCESS && updateconf) 2538 (void) vrrpd_updateconf(conf, VRRP_CONF_DELETE); 2539 2540 return (err); 2541 } 2542 2543 static vrrp_err_t 2544 vrrpd_delete(const char *vn) 2545 { 2546 vrrp_vr_t *vr; 2547 vrrp_err_t err; 2548 2549 vrrp_log(VRRP_DBG0, "vrrpd_delete(%s)", vn); 2550 2551 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) { 2552 vrrp_log(VRRP_DBG1, "vrrpd_delete(): %s not exists", vn); 2553 return (VRRP_ENOTFOUND); 2554 } 2555 2556 err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_DELETE); 2557 if (err != VRRP_SUCCESS) { 2558 vrrp_log(VRRP_ERR, "vrrpd_delete(): failed to delete " 2559 "configuration for %s", vr->vvr_conf.vvc_name); 2560 return (err); 2561 } 2562 2563 vrrpd_delete_vr(vr); 2564 return (VRRP_SUCCESS); 2565 } 2566 2567 static vrrp_err_t 2568 vrrpd_enable(const char *vn, boolean_t updateconf) 2569 { 2570 vrrp_vr_t *vr; 2571 vrrp_vr_conf_t *conf; 2572 uint32_t flags; 2573 datalink_class_t class; 2574 vrrp_err_t err = VRRP_SUCCESS; 2575 2576 vrrp_log(VRRP_DBG0, "vrrpd_enable(%s)", vn); 2577 2578 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) { 2579 vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s does not exist", vn); 2580 return (VRRP_ENOTFOUND); 2581 } 2582 2583 /* 2584 * The VR is already enabled. 2585 */ 2586 conf = &vr->vvr_conf; 2587 if (conf->vvc_enabled) { 2588 vrrp_log(VRRP_DBG1, "vrrpd_enable(): %s is already " 2589 "enabled", vn); 2590 return (VRRP_EALREADY); 2591 } 2592 2593 /* 2594 * Check whether the link exists. 2595 */ 2596 if ((strlen(conf->vvc_link) == 0) || dladm_name2info(vrrpd_vh->vh_dh, 2597 conf->vvc_link, NULL, &flags, &class, NULL) != DLADM_STATUS_OK || 2598 !(flags & DLADM_OPT_ACTIVE) || ((class != DATALINK_CLASS_PHYS) && 2599 (class != DATALINK_CLASS_VLAN) && (class != DATALINK_CLASS_AGGR))) { 2600 vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): invalid link %s", 2601 vn, conf->vvc_link); 2602 return (VRRP_EINVALLINK); 2603 } 2604 2605 /* 2606 * Get the associated VNIC name by the given interface/vrid/ 2607 * address famitly. 2608 */ 2609 err = vrrp_get_vnicname(vrrpd_vh, conf->vvc_vrid, 2610 conf->vvc_af, conf->vvc_link, NULL, NULL, vr->vvr_vnic, 2611 sizeof (vr->vvr_vnic)); 2612 if (err != VRRP_SUCCESS) { 2613 vrrp_log(VRRP_DBG1, "vrrpd_enable(%s): no VNIC for VRID %d/%s " 2614 "over %s", vn, conf->vvc_vrid, af_str(conf->vvc_af), 2615 conf->vvc_link); 2616 err = VRRP_ENOVNIC; 2617 goto fail; 2618 } 2619 2620 /* 2621 * Find the right VNIC, primary interface and get the list of the 2622 * protected IP adressses and primary IP address. Note that if 2623 * either interface is NULL (no IP addresses configured over the 2624 * interface), we will still continue and mark this VRRP router 2625 * as "enabled". 2626 */ 2627 vr->vvr_conf.vvc_enabled = _B_TRUE; 2628 if (updateconf && (err = vrrpd_updateconf(&vr->vvr_conf, 2629 VRRP_CONF_UPDATE)) != VRRP_SUCCESS) { 2630 vrrp_log(VRRP_ERR, "vrrpd_enable(): failed to update " 2631 "configuration for %s", vr->vvr_conf.vvc_name); 2632 goto fail; 2633 } 2634 2635 /* 2636 * If vrrpd_setup_vr() fails, it is possible that there is no IP 2637 * addresses over ether the primary interface or the VNIC yet, 2638 * return success in this case, the VRRP router will stay in 2639 * the initialized state and start to work when the IP address is 2640 * configured. 2641 */ 2642 (void) vrrpd_enable_vr(vr); 2643 return (VRRP_SUCCESS); 2644 2645 fail: 2646 vr->vvr_conf.vvc_enabled = _B_FALSE; 2647 vr->vvr_vnic[0] = '\0'; 2648 return (err); 2649 } 2650 2651 static vrrp_err_t 2652 vrrpd_disable(const char *vn) 2653 { 2654 vrrp_vr_t *vr; 2655 vrrp_err_t err; 2656 2657 vrrp_log(VRRP_DBG0, "vrrpd_disable(%s)", vn); 2658 2659 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) { 2660 vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s does not exist", vn); 2661 return (VRRP_ENOTFOUND); 2662 } 2663 2664 /* 2665 * The VR is already disable. 2666 */ 2667 if (!vr->vvr_conf.vvc_enabled) { 2668 vrrp_log(VRRP_DBG1, "vrrpd_disable(): %s was not enabled", vn); 2669 return (VRRP_EALREADY); 2670 } 2671 2672 vr->vvr_conf.vvc_enabled = _B_FALSE; 2673 err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE); 2674 if (err != VRRP_SUCCESS) { 2675 vr->vvr_conf.vvc_enabled = _B_TRUE; 2676 vrrp_log(VRRP_ERR, "vrrpd_disable(): failed to update " 2677 "configuration for %s", vr->vvr_conf.vvc_name); 2678 return (err); 2679 } 2680 2681 vrrpd_disable_vr(vr, NULL, _B_FALSE); 2682 vr->vvr_vnic[0] = '\0'; 2683 return (VRRP_SUCCESS); 2684 } 2685 2686 static vrrp_err_t 2687 vrrpd_modify(vrrp_vr_conf_t *conf, uint32_t mask) 2688 { 2689 vrrp_vr_t *vr; 2690 vrrp_vr_conf_t savconf; 2691 int pri; 2692 boolean_t accept, set_accept = _B_FALSE; 2693 vrrp_err_t err; 2694 2695 vrrp_log(VRRP_DBG0, "vrrpd_modify(%s)", conf->vvc_name); 2696 2697 if (mask == 0) 2698 return (VRRP_SUCCESS); 2699 2700 if ((vr = vrrpd_lookup_vr_by_name(conf->vvc_name)) == NULL) { 2701 vrrp_log(VRRP_DBG1, "vrrpd_modify(): cannot find the given " 2702 "VR instance: %s", conf->vvc_name); 2703 return (VRRP_ENOTFOUND); 2704 } 2705 2706 if (mask & VRRP_CONF_INTERVAL) { 2707 if (conf->vvc_adver_int < VRRP_MAX_ADVER_INT_MIN || 2708 conf->vvc_adver_int > VRRP_MAX_ADVER_INT_MAX) { 2709 vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid " 2710 "adver_interval %d", conf->vvc_name, 2711 conf->vvc_adver_int); 2712 return (VRRP_EINVAL); 2713 } 2714 } 2715 2716 pri = vr->vvr_conf.vvc_pri; 2717 if (mask & VRRP_CONF_PRIORITY) { 2718 if (conf->vvc_pri < VRRP_PRI_MIN || 2719 conf->vvc_pri > VRRP_PRI_OWNER) { 2720 vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): invalid " 2721 "priority %d", conf->vvc_name, conf->vvc_pri); 2722 return (VRRP_EINVAL); 2723 } 2724 pri = conf->vvc_pri; 2725 } 2726 2727 accept = vr->vvr_conf.vvc_accept; 2728 if (mask & VRRP_CONF_ACCEPT) 2729 accept = conf->vvc_accept; 2730 2731 if (pri == VRRP_PRI_OWNER && !accept) { 2732 vrrp_log(VRRP_DBG1, "vrrpd_modify(%s): accept mode must be " 2733 "true for VRRP address owner", conf->vvc_name); 2734 return (VRRP_EINVAL); 2735 } 2736 2737 if ((mask & VRRP_CONF_ACCEPT) && (vr->vvr_conf.vvc_accept != accept)) { 2738 err = vrrpd_set_noaccept(vr, !accept); 2739 if (err != VRRP_SUCCESS) { 2740 vrrp_log(VRRP_ERR, "vrrpd_modify(%s): access mode " 2741 "updating failed: %s", conf->vvc_name, 2742 vrrp_err2str(err)); 2743 return (err); 2744 } 2745 set_accept = _B_TRUE; 2746 } 2747 2748 /* 2749 * Save the current configuration, so it can be restored if the 2750 * following fails. 2751 */ 2752 (void) memcpy(&savconf, &vr->vvr_conf, sizeof (vrrp_vr_conf_t)); 2753 if (mask & VRRP_CONF_PREEMPT) 2754 vr->vvr_conf.vvc_preempt = conf->vvc_preempt; 2755 2756 if (mask & VRRP_CONF_ACCEPT) 2757 vr->vvr_conf.vvc_accept = accept; 2758 2759 if (mask & VRRP_CONF_PRIORITY) 2760 vr->vvr_conf.vvc_pri = pri; 2761 2762 if (mask & VRRP_CONF_INTERVAL) 2763 vr->vvr_conf.vvc_adver_int = conf->vvc_adver_int; 2764 2765 err = vrrpd_updateconf(&vr->vvr_conf, VRRP_CONF_UPDATE); 2766 if (err != VRRP_SUCCESS) { 2767 vrrp_log(VRRP_ERR, "vrrpd_modify(%s): configuration update " 2768 "failed: %s", conf->vvc_name, vrrp_err2str(err)); 2769 if (set_accept) 2770 (void) vrrpd_set_noaccept(vr, accept); 2771 (void) memcpy(&vr->vvr_conf, &savconf, sizeof (vrrp_vr_conf_t)); 2772 return (err); 2773 } 2774 2775 if ((mask & VRRP_CONF_PRIORITY) && (vr->vvr_state == VRRP_STATE_BACKUP)) 2776 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr); 2777 2778 if ((mask & VRRP_CONF_INTERVAL) && (vr->vvr_state == VRRP_STATE_MASTER)) 2779 vr->vvr_timeout = conf->vvc_adver_int; 2780 2781 return (VRRP_SUCCESS); 2782 } 2783 2784 static void 2785 vrrpd_list(vrid_t vrid, char *ifname, int af, vrrp_ret_list_t *ret, 2786 size_t *sizep) 2787 { 2788 vrrp_vr_t *vr; 2789 char *p = (char *)ret + sizeof (vrrp_ret_list_t); 2790 size_t size = (*sizep) - sizeof (vrrp_ret_list_t); 2791 2792 vrrp_log(VRRP_DBG0, "vrrpd_list(%d_%s_%s)", vrid, ifname, af_str(af)); 2793 2794 ret->vrl_cnt = 0; 2795 TAILQ_FOREACH(vr, &vrrp_vr_list, vvr_next) { 2796 if (vrid != VRRP_VRID_NONE && vr->vvr_conf.vvc_vrid != vrid) 2797 continue; 2798 2799 if (strlen(ifname) != 0 && strcmp(ifname, 2800 vr->vvr_conf.vvc_link) == 0) { 2801 continue; 2802 } 2803 2804 if ((af == AF_INET || af == AF_INET6) && 2805 vr->vvr_conf.vvc_af != af) 2806 continue; 2807 2808 if (size < VRRP_NAME_MAX) { 2809 vrrp_log(VRRP_DBG1, "vrrpd_list(): buffer size too " 2810 "small to hold %d router names", ret->vrl_cnt); 2811 *sizep = sizeof (vrrp_ret_list_t); 2812 ret->vrl_err = VRRP_ETOOSMALL; 2813 return; 2814 } 2815 (void) strlcpy(p, vr->vvr_conf.vvc_name, VRRP_NAME_MAX); 2816 p += (strlen(vr->vvr_conf.vvc_name) + 1); 2817 ret->vrl_cnt++; 2818 size -= VRRP_NAME_MAX; 2819 } 2820 2821 *sizep = sizeof (vrrp_ret_list_t) + ret->vrl_cnt * VRRP_NAME_MAX; 2822 vrrp_log(VRRP_DBG1, "vrrpd_list() return %d", ret->vrl_cnt); 2823 ret->vrl_err = VRRP_SUCCESS; 2824 } 2825 2826 static void 2827 vrrpd_query(const char *vn, vrrp_ret_query_t *ret, size_t *sizep) 2828 { 2829 vrrp_queryinfo_t *infop; 2830 vrrp_vr_t *vr; 2831 vrrp_intf_t *vif; 2832 vrrp_ip_t *ip; 2833 struct timeval now; 2834 uint32_t vipcnt = 0; 2835 size_t size = *sizep; 2836 2837 vrrp_log(VRRP_DBG1, "vrrpd_query(%s)", vn); 2838 2839 if ((vr = vrrpd_lookup_vr_by_name(vn)) == NULL) { 2840 vrrp_log(VRRP_DBG1, "vrrpd_query(): %s does not exist", vn); 2841 *sizep = sizeof (vrrp_ret_query_t); 2842 ret->vrq_err = VRRP_ENOTFOUND; 2843 return; 2844 } 2845 2846 /* 2847 * Get the virtual IP list if the router is not in the INIT state. 2848 */ 2849 if (vr->vvr_state != VRRP_STATE_INIT) { 2850 vif = vr->vvr_vif; 2851 TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) { 2852 vipcnt++; 2853 } 2854 } 2855 2856 *sizep = sizeof (vrrp_ret_query_t); 2857 *sizep += (vipcnt == 0) ? 0 : (vipcnt - 1) * sizeof (vrrp_addr_t); 2858 if (*sizep > size) { 2859 vrrp_log(VRRP_ERR, "vrrpd_query(): not enough space to hold " 2860 "%d virtual IPs", vipcnt); 2861 *sizep = sizeof (vrrp_ret_query_t); 2862 ret->vrq_err = VRRP_ETOOSMALL; 2863 return; 2864 } 2865 2866 (void) gettimeofday(&now, NULL); 2867 2868 bzero(ret, *sizep); 2869 infop = &ret->vrq_qinfo; 2870 (void) memcpy(&infop->show_vi, 2871 &(vr->vvr_conf), sizeof (vrrp_vr_conf_t)); 2872 (void) memcpy(&infop->show_vs, 2873 &(vr->vvr_sinfo), sizeof (vrrp_stateinfo_t)); 2874 (void) strlcpy(infop->show_va.va_vnic, vr->vvr_vnic, MAXLINKNAMELEN); 2875 infop->show_vt.vt_since_last_tran = timeval_to_milli( 2876 timeval_delta(now, vr->vvr_sinfo.vs_st_time)); 2877 2878 if (vr->vvr_state == VRRP_STATE_INIT) { 2879 ret->vrq_err = VRRP_SUCCESS; 2880 return; 2881 } 2882 2883 vipcnt = 0; 2884 TAILQ_FOREACH(ip, &vif->vvi_iplist, vip_next) { 2885 (void) memcpy(&infop->show_va.va_vips[vipcnt++], 2886 &ip->vip_addr, sizeof (vrrp_addr_t)); 2887 } 2888 infop->show_va.va_vipcnt = vipcnt; 2889 2890 (void) memcpy(&infop->show_va.va_primary, 2891 &vr->vvr_pif->vvi_pip->vip_addr, sizeof (vrrp_addr_t)); 2892 2893 (void) memcpy(&infop->show_vp, &(vr->vvr_peer), sizeof (vrrp_peer_t)); 2894 2895 /* 2896 * Check whether there is a peer. 2897 */ 2898 if (!VRRPADDR_UNSPECIFIED(vr->vvr_conf.vvc_af, 2899 &(vr->vvr_peer.vp_addr))) { 2900 infop->show_vt.vt_since_last_adv = timeval_to_milli( 2901 timeval_delta(now, vr->vvr_peer.vp_time)); 2902 } 2903 2904 if (vr->vvr_state == VRRP_STATE_BACKUP) { 2905 infop->show_vt.vt_master_down_intv = 2906 MASTER_DOWN_INTERVAL_VR(vr); 2907 } 2908 2909 ret->vrq_err = VRRP_SUCCESS; 2910 } 2911 2912 /* 2913 * Build the VRRP packet (not including the IP header). Return the 2914 * payload length. 2915 * 2916 * If zero_pri is set to be B_TRUE, then this is the specical zero-priority 2917 * advertisement which is sent by the Master to indicate that it has been 2918 * stopped participating in VRRP. 2919 */ 2920 static size_t 2921 vrrpd_build_vrrp(vrrp_vr_t *vr, uchar_t *buf, int buflen, boolean_t zero_pri) 2922 { 2923 /* LINTED E_BAD_PTR_CAST_ALIGN */ 2924 vrrp_pkt_t *vp = (vrrp_pkt_t *)buf; 2925 /* LINTED E_BAD_PTR_CAST_ALIGN */ 2926 struct in_addr *a4 = (struct in_addr *)(vp + 1); 2927 /* LINTED E_BAD_PTR_CAST_ALIGN */ 2928 struct in6_addr *a6 = (struct in6_addr *)(vp + 1); 2929 vrrp_intf_t *vif = vr->vvr_vif; 2930 vrrp_ip_t *vip; 2931 int af = vif->vvi_af; 2932 size_t size = sizeof (vrrp_pkt_t); 2933 uint16_t rsvd_adver_int; 2934 int nip = 0; 2935 2936 vrrp_log(VRRP_DBG1, "vrrpd_build_vrrp(%s, %s_priority): intv %d", 2937 vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non-zero", 2938 vr->vvr_conf.vvc_adver_int); 2939 2940 TAILQ_FOREACH(vip, &vif->vvi_iplist, vip_next) { 2941 if ((size += ((af == AF_INET) ? sizeof (struct in_addr) : 2942 sizeof (struct in6_addr))) > buflen) { 2943 vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): buffer size " 2944 "not big enough %d", vr->vvr_conf.vvc_name, size); 2945 return (0); 2946 } 2947 2948 if (af == AF_INET) 2949 a4[nip++] = vip->vip_addr.in4.sin_addr; 2950 else 2951 a6[nip++] = vip->vip_addr.in6.sin6_addr; 2952 } 2953 2954 if (nip == 0) { 2955 vrrp_log(VRRP_ERR, "vrrpd_build_vrrp(%s): no virtual IP " 2956 "address", vr->vvr_conf.vvc_name); 2957 return (0); 2958 } 2959 2960 vp->vp_vers_type = (VRRP_VERSION << 4) | VRRP_PKT_ADVERT; 2961 vp->vp_vrid = vr->vvr_conf.vvc_vrid; 2962 vp->vp_prio = zero_pri ? VRRP_PRIO_ZERO : vr->vvr_conf.vvc_pri; 2963 2964 rsvd_adver_int = MSEC2CENTISEC(vr->vvr_conf.vvc_adver_int) & 0x0fff; 2965 vp->vp_rsvd_adver_int = htons(rsvd_adver_int); 2966 vp->vp_ipnum = nip; 2967 2968 /* 2969 * Set the checksum to 0 first, then caculate it. 2970 */ 2971 vp->vp_chksum = 0; 2972 if (af == AF_INET) { 2973 vp->vp_chksum = vrrp_cksum4( 2974 &vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr, 2975 &vrrp_muladdr4.in4.sin_addr, size, vp); 2976 } else { 2977 vp->vp_chksum = vrrp_cksum6( 2978 &vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr, 2979 &vrrp_muladdr6.in6.sin6_addr, size, vp); 2980 } 2981 2982 return (size); 2983 } 2984 2985 /* 2986 * We need to build the IPv4 header on our own. 2987 */ 2988 static vrrp_err_t 2989 vrrpd_send_adv_v4(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri) 2990 { 2991 /* LINTED E_BAD_PTR_CAST_ALIGN */ 2992 struct ip *ip = (struct ip *)buf; 2993 size_t plen; 2994 2995 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s)", vr->vvr_conf.vvc_name); 2996 2997 if ((plen = vrrpd_build_vrrp(vr, buf + sizeof (struct ip), 2998 len - sizeof (struct ip), zero_pri)) == 0) { 2999 return (VRRP_ETOOSMALL); 3000 } 3001 3002 ip->ip_hl = sizeof (struct ip) >> 2; 3003 ip->ip_v = IPV4_VERSION; 3004 ip->ip_tos = 0; 3005 plen += sizeof (struct ip); 3006 ip->ip_len = htons(plen); 3007 ip->ip_off = 0; 3008 ip->ip_ttl = VRRP_IP_TTL; 3009 ip->ip_p = IPPROTO_VRRP; 3010 ip->ip_src = vr->vvr_pif->vvi_pip->vip_addr.in4.sin_addr; 3011 ip->ip_dst = vrrp_muladdr4.in4.sin_addr; 3012 3013 /* 3014 * The kernel will set the IP cksum and the IPv4 identification. 3015 */ 3016 ip->ip_id = 0; 3017 ip->ip_sum = 0; 3018 3019 if ((len = sendto(vr->vvr_vif->vvi_sockfd, buf, plen, 0, 3020 (const struct sockaddr *)&vrrp_muladdr4, 3021 sizeof (struct sockaddr_in))) != plen) { 3022 vrrp_log(VRRP_ERR, "vrrpd_send_adv_v4(): sendto() on " 3023 "(vrid:%d, %s, %s) failed: %s sent:%d expect:%d", 3024 vr->vvr_conf.vvc_vrid, vr->vvr_vif->vvi_ifname, 3025 af_str(vr->vvr_conf.vvc_af), strerror(errno), len, plen); 3026 return (VRRP_ESYS); 3027 } 3028 3029 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v4(%s) succeed", 3030 vr->vvr_conf.vvc_name); 3031 return (VRRP_SUCCESS); 3032 } 3033 3034 static vrrp_err_t 3035 vrrpd_send_adv_v6(vrrp_vr_t *vr, uchar_t *buf, size_t len, boolean_t zero_pri) 3036 { 3037 struct msghdr msg6; 3038 size_t hoplimit_space = 0; 3039 size_t pktinfo_space = 0; 3040 size_t bufspace = 0; 3041 struct in6_pktinfo *pktinfop; 3042 struct cmsghdr *cmsgp; 3043 uchar_t *cmsg_datap; 3044 struct iovec iov; 3045 size_t plen; 3046 3047 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s)", vr->vvr_conf.vvc_name); 3048 3049 if ((plen = vrrpd_build_vrrp(vr, buf, len, zero_pri)) == 0) 3050 return (VRRP_ETOOSMALL); 3051 3052 msg6.msg_control = NULL; 3053 msg6.msg_controllen = 0; 3054 3055 hoplimit_space = sizeof (int); 3056 bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT + 3057 hoplimit_space + _MAX_ALIGNMENT; 3058 3059 pktinfo_space = sizeof (struct in6_pktinfo); 3060 bufspace += sizeof (struct cmsghdr) + _MAX_ALIGNMENT + 3061 pktinfo_space + _MAX_ALIGNMENT; 3062 3063 /* 3064 * We need to temporarily set the msg6.msg_controllen to bufspace 3065 * (we will later trim it to actual length used). This is needed because 3066 * CMSG_NXTHDR() uses it to check we have not exceeded the bounds. 3067 */ 3068 bufspace += sizeof (struct cmsghdr); 3069 msg6.msg_controllen = bufspace; 3070 3071 msg6.msg_control = (struct cmsghdr *)malloc(bufspace); 3072 if (msg6.msg_control == NULL) { 3073 vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): memory allocation " 3074 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno)); 3075 return (VRRP_ENOMEM); 3076 } 3077 3078 cmsgp = CMSG_FIRSTHDR(&msg6); 3079 3080 cmsgp->cmsg_level = IPPROTO_IPV6; 3081 cmsgp->cmsg_type = IPV6_HOPLIMIT; 3082 cmsg_datap = CMSG_DATA(cmsgp); 3083 /* LINTED */ 3084 *(int *)cmsg_datap = VRRP_IP_TTL; 3085 cmsgp->cmsg_len = cmsg_datap + hoplimit_space - (uchar_t *)cmsgp; 3086 cmsgp = CMSG_NXTHDR(&msg6, cmsgp); 3087 3088 cmsgp->cmsg_level = IPPROTO_IPV6; 3089 cmsgp->cmsg_type = IPV6_PKTINFO; 3090 cmsg_datap = CMSG_DATA(cmsgp); 3091 3092 /* LINTED */ 3093 pktinfop = (struct in6_pktinfo *)cmsg_datap; 3094 /* 3095 * We don't know if pktinfop->ipi6_addr is aligned properly, 3096 * therefore let's use bcopy, instead of assignment. 3097 */ 3098 (void) bcopy(&vr->vvr_pif->vvi_pip->vip_addr.in6.sin6_addr, 3099 &pktinfop->ipi6_addr, sizeof (struct in6_addr)); 3100 3101 /* 3102 * We can assume pktinfop->ipi6_ifindex is 32 bit aligned. 3103 */ 3104 pktinfop->ipi6_ifindex = vr->vvr_vif->vvi_ifindex; 3105 cmsgp->cmsg_len = cmsg_datap + pktinfo_space - (uchar_t *)cmsgp; 3106 cmsgp = CMSG_NXTHDR(&msg6, cmsgp); 3107 msg6.msg_controllen = (char *)cmsgp - (char *)msg6.msg_control; 3108 3109 msg6.msg_name = &vrrp_muladdr6; 3110 msg6.msg_namelen = sizeof (struct sockaddr_in6); 3111 3112 iov.iov_base = buf; 3113 iov.iov_len = plen; 3114 msg6.msg_iov = &iov; 3115 msg6.msg_iovlen = 1; 3116 3117 if ((len = sendmsg(vr->vvr_vif->vvi_sockfd, 3118 (const struct msghdr *)&msg6, 0)) != plen) { 3119 vrrp_log(VRRP_ERR, "vrrpd_send_adv_v6(%s): sendmsg() failed: " 3120 "%s expect %d sent %d", vr->vvr_conf.vvc_name, 3121 strerror(errno), plen, len); 3122 (void) free(msg6.msg_control); 3123 return (VRRP_ESYS); 3124 } 3125 3126 vrrp_log(VRRP_DBG1, "vrrpd_send_adv_v6(%s) succeed", 3127 vr->vvr_conf.vvc_name); 3128 (void) free(msg6.msg_control); 3129 return (VRRP_SUCCESS); 3130 } 3131 3132 /* 3133 * Send the VRRP advertisement packets. 3134 */ 3135 static vrrp_err_t 3136 vrrpd_send_adv(vrrp_vr_t *vr, boolean_t zero_pri) 3137 { 3138 uint64_t buf[(IP_MAXPACKET + 1)/8]; 3139 3140 vrrp_log(VRRP_DBG1, "vrrpd_send_adv(%s, %s_priority)", 3141 vr->vvr_conf.vvc_name, zero_pri ? "zero" : "non_zero"); 3142 3143 assert(vr->vvr_pif->vvi_pip != NULL); 3144 3145 if (vr->vvr_pif->vvi_pip == NULL) { 3146 vrrp_log(VRRP_DBG0, "vrrpd_send_adv(%s): no primary IP " 3147 "address", vr->vvr_conf.vvc_name); 3148 return (VRRP_EINVAL); 3149 } 3150 3151 if (vr->vvr_conf.vvc_af == AF_INET) { 3152 return (vrrpd_send_adv_v4(vr, (uchar_t *)buf, 3153 sizeof (buf), zero_pri)); 3154 } else { 3155 return (vrrpd_send_adv_v6(vr, (uchar_t *)buf, 3156 sizeof (buf), zero_pri)); 3157 } 3158 } 3159 3160 static void 3161 vrrpd_process_adv(vrrp_vr_t *vr, vrrp_addr_t *from, vrrp_pkt_t *vp) 3162 { 3163 vrrp_vr_conf_t *conf = &vr->vvr_conf; 3164 char peer[INET6_ADDRSTRLEN]; 3165 char local[INET6_ADDRSTRLEN]; 3166 int addr_cmp; 3167 uint16_t peer_adver_int; 3168 3169 /* LINTED E_CONSTANT_CONDITION */ 3170 VRRPADDR2STR(vr->vvr_conf.vvc_af, from, peer, INET6_ADDRSTRLEN, 3171 _B_FALSE); 3172 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s) from %s", conf->vvc_name, 3173 peer); 3174 3175 if (vr->vvr_state <= VRRP_STATE_INIT) { 3176 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): state: %s, not " 3177 "ready", conf->vvc_name, vrrp_state2str(vr->vvr_state)); 3178 return; 3179 } 3180 3181 peer_adver_int = CENTISEC2MSEC(ntohs(vp->vp_rsvd_adver_int) & 0x0fff); 3182 3183 /* LINTED E_CONSTANT_CONDITION */ 3184 VRRPADDR2STR(vr->vvr_pif->vvi_af, &vr->vvr_pif->vvi_pip->vip_addr, 3185 local, INET6_ADDRSTRLEN, _B_FALSE); 3186 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local/state/pri" 3187 "(%s/%s/%d) peer/pri/intv(%s/%d/%d)", conf->vvc_name, local, 3188 vrrp_state2str(vr->vvr_state), conf->vvc_pri, peer, 3189 vp->vp_prio, peer_adver_int); 3190 3191 addr_cmp = ipaddr_cmp(vr->vvr_pif->vvi_af, from, 3192 &vr->vvr_pif->vvi_pip->vip_addr); 3193 if (addr_cmp == 0) { 3194 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): local message", 3195 conf->vvc_name); 3196 return; 3197 } else if (conf->vvc_pri == vp->vp_prio) { 3198 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): peer IP %s is %s" 3199 " than the local IP %s", conf->vvc_name, peer, 3200 addr_cmp > 0 ? "greater" : "less", local); 3201 } 3202 3203 if (conf->vvc_pri == 255) { 3204 vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): virtual address " 3205 "owner received advertisement from %s", conf->vvc_name, 3206 peer); 3207 return; 3208 } 3209 3210 (void) gettimeofday(&vr->vvr_peer_time, NULL); 3211 (void) memcpy(&vr->vvr_peer_addr, from, sizeof (vrrp_addr_t)); 3212 vr->vvr_peer_prio = vp->vp_prio; 3213 vr->vvr_peer_adver_int = peer_adver_int; 3214 3215 if (vr->vvr_state == VRRP_STATE_BACKUP) { 3216 vr->vvr_master_adver_int = vr->vvr_peer_adver_int; 3217 if ((vp->vp_prio == VRRP_PRIO_ZERO) || 3218 (conf->vvc_preempt == _B_FALSE || 3219 vp->vp_prio >= conf->vvc_pri)) { 3220 (void) iu_cancel_timer(vrrpd_timerq, 3221 vr->vvr_timer_id, NULL); 3222 if (vp->vp_prio == VRRP_PRIO_ZERO) { 3223 /* the master stops participating in VRRP */ 3224 vr->vvr_timeout = SKEW_TIME_VR(vr); 3225 } else { 3226 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr); 3227 } 3228 if ((vr->vvr_timer_id = iu_schedule_timer_ms( 3229 vrrpd_timerq, vr->vvr_timeout, vrrp_b2m_timeout, 3230 vr)) == -1) { 3231 vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): " 3232 "start vrrp_b2m_timeout(%d) failed", 3233 conf->vvc_name, vr->vvr_timeout); 3234 } else { 3235 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): " 3236 "start vrrp_b2m_timeout(%d)", 3237 conf->vvc_name, vr->vvr_timeout); 3238 } 3239 } 3240 } else if (vr->vvr_state == VRRP_STATE_MASTER) { 3241 if (vp->vp_prio == VRRP_PRIO_ZERO) { 3242 (void) vrrpd_send_adv(vr, _B_FALSE); 3243 (void) iu_cancel_timer(vrrpd_timerq, 3244 vr->vvr_timer_id, NULL); 3245 if ((vr->vvr_timer_id = iu_schedule_timer_ms( 3246 vrrpd_timerq, vr->vvr_timeout, vrrp_adv_timeout, 3247 vr)) == -1) { 3248 vrrp_log(VRRP_ERR, "vrrpd_process_adv(%s): " 3249 "start vrrp_adv_timeout(%d) failed", 3250 conf->vvc_name, vr->vvr_timeout); 3251 } else { 3252 vrrp_log(VRRP_DBG1, "vrrpd_process_adv(%s): " 3253 "start vrrp_adv_timeout(%d)", 3254 conf->vvc_name, vr->vvr_timeout); 3255 } 3256 } else if (vp->vp_prio > conf->vvc_pri || 3257 (vp->vp_prio == conf->vvc_pri && addr_cmp > 0)) { 3258 (void) vrrpd_state_m2b(vr); 3259 } 3260 } else { 3261 assert(_B_FALSE); 3262 } 3263 } 3264 3265 static vrrp_err_t 3266 vrrpd_process_vrrp(vrrp_intf_t *pif, vrrp_pkt_t *vp, size_t len, 3267 vrrp_addr_t *from) 3268 { 3269 vrrp_vr_t *vr; 3270 uint8_t vers_type; 3271 uint16_t saved_cksum, cksum; 3272 char peer[INET6_ADDRSTRLEN]; 3273 3274 /* LINTED E_CONSTANT_CONDITION */ 3275 VRRPADDR2STR(pif->vvi_af, from, peer, INET6_ADDRSTRLEN, _B_FALSE); 3276 vrrp_log(VRRP_DBG0, "vrrpd_process_vrrp(%s) from %s", pif->vvi_ifname, 3277 peer); 3278 3279 if (len < sizeof (vrrp_pkt_t)) { 3280 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid message " 3281 "length %d", len); 3282 return (VRRP_EINVAL); 3283 } 3284 3285 /* 3286 * Verify: VRRP version number and packet type. 3287 */ 3288 vers_type = ((vp->vp_vers_type & VRRP_VER_MASK) >> 4); 3289 if (vers_type != VRRP_VERSION) { 3290 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) unsupported " 3291 "version %d", pif->vvi_ifname, vers_type); 3292 return (VRRP_EINVAL); 3293 } 3294 3295 if (vp->vp_ipnum == 0) { 3296 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): zero IPvX count", 3297 pif->vvi_ifname); 3298 return (VRRP_EINVAL); 3299 } 3300 3301 if (len - sizeof (vrrp_pkt_t) != 3302 vp->vp_ipnum * (pif->vvi_af == AF_INET ? sizeof (struct in_addr) : 3303 sizeof (struct in6_addr))) { 3304 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s): invalid IPvX count" 3305 " %d", pif->vvi_ifname, vp->vp_ipnum); 3306 return (VRRP_EINVAL); 3307 } 3308 3309 vers_type = (vp->vp_vers_type & VRRP_TYPE_MASK); 3310 3311 /* 3312 * verify: VRRP checksum. Note that vrrp_cksum returns network byte 3313 * order checksum value; 3314 */ 3315 saved_cksum = vp->vp_chksum; 3316 vp->vp_chksum = 0; 3317 if (pif->vvi_af == AF_INET) { 3318 cksum = vrrp_cksum4(&from->in4.sin_addr, 3319 &vrrp_muladdr4.in4.sin_addr, len, vp); 3320 } else { 3321 cksum = vrrp_cksum6(&from->in6.sin6_addr, 3322 &vrrp_muladdr6.in6.sin6_addr, len, vp); 3323 } 3324 3325 if (cksum != saved_cksum) { 3326 vrrp_log(VRRP_ERR, "vrrpd_process_vrrp(%s) invalid " 3327 "checksum: expected/real(0x%x/0x%x)", pif->vvi_ifname, 3328 cksum, saved_cksum); 3329 return (VRRP_EINVAL); 3330 } 3331 3332 if ((vr = vrrpd_lookup_vr_by_vrid(pif->vvi_ifname, vp->vp_vrid, 3333 pif->vvi_af)) != NULL && vers_type == VRRP_PKT_ADVERT) { 3334 vrrpd_process_adv(vr, from, vp); 3335 } else { 3336 vrrp_log(VRRP_DBG1, "vrrpd_process_vrrp(%s) VRID(%d/%s) " 3337 "not configured", pif->vvi_ifname, vp->vp_vrid, 3338 af_str(pif->vvi_af)); 3339 } 3340 return (VRRP_SUCCESS); 3341 } 3342 3343 /* 3344 * IPv4 socket, the IPv4 header is included. 3345 */ 3346 static vrrp_err_t 3347 vrrpd_process_adv_v4(vrrp_intf_t *pif, struct msghdr *msgp, size_t len) 3348 { 3349 char abuf[INET6_ADDRSTRLEN]; 3350 struct ip *ip; 3351 3352 vrrp_log(VRRP_DBG0, "vrrpd_process_adv_v4(%s, %d)", 3353 pif->vvi_ifname, len); 3354 3355 ip = (struct ip *)msgp->msg_iov->iov_base; 3356 3357 /* Sanity check */ 3358 if (len < sizeof (struct ip) || len < ntohs(ip->ip_len)) { 3359 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid length " 3360 "%d", pif->vvi_ifname, len); 3361 return (VRRP_EINVAL); 3362 } 3363 3364 assert(ip->ip_v == IPV4_VERSION); 3365 assert(ip->ip_p == IPPROTO_VRRP); 3366 assert(msgp->msg_namelen == sizeof (struct sockaddr_in)); 3367 3368 if (vrrp_muladdr4.in4.sin_addr.s_addr != ip->ip_dst.s_addr) { 3369 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid " 3370 "destination %s", pif->vvi_ifname, 3371 inet_ntop(pif->vvi_af, &(ip->ip_dst), abuf, sizeof (abuf))); 3372 return (VRRP_EINVAL); 3373 } 3374 3375 if (ip->ip_ttl != VRRP_IP_TTL) { 3376 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid " 3377 "ttl %d", pif->vvi_ifname, ip->ip_ttl); 3378 return (VRRP_EINVAL); 3379 } 3380 3381 /* 3382 * Note that the ip_len contains only the IP payload length. 3383 */ 3384 return (vrrpd_process_vrrp(pif, 3385 /* LINTED E_BAD_PTR_CAST_ALIGN */ 3386 (vrrp_pkt_t *)((char *)ip + ip->ip_hl * 4), ntohs(ip->ip_len), 3387 (vrrp_addr_t *)msgp->msg_name)); 3388 } 3389 3390 /* 3391 * IPv6 socket, check the ancillary_data. 3392 */ 3393 static vrrp_err_t 3394 vrrpd_process_adv_v6(vrrp_intf_t *pif, struct msghdr *msgp, size_t len) 3395 { 3396 struct cmsghdr *cmsgp; 3397 uchar_t *cmsg_datap; 3398 struct in6_pktinfo *pktinfop; 3399 char abuf[INET6_ADDRSTRLEN]; 3400 int ttl; 3401 3402 vrrp_log(VRRP_DBG1, "vrrpd_process_adv_v6(%s, %d)", 3403 pif->vvi_ifname, len); 3404 3405 /* Sanity check */ 3406 if (len < sizeof (vrrp_pkt_t)) { 3407 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v6(%s): invalid length " 3408 "%d", pif->vvi_ifname, len); 3409 return (VRRP_EINVAL); 3410 } 3411 3412 assert(msgp->msg_namelen == sizeof (struct sockaddr_in6)); 3413 3414 for (cmsgp = CMSG_FIRSTHDR(msgp); cmsgp != NULL; 3415 cmsgp = CMSG_NXTHDR(msgp, cmsgp)) { 3416 assert(cmsgp->cmsg_level == IPPROTO_IPV6); 3417 cmsg_datap = CMSG_DATA(cmsgp); 3418 3419 switch (cmsgp->cmsg_type) { 3420 case IPV6_HOPLIMIT: 3421 /* LINTED E_BAD_PTR_CAST_ALIGN */ 3422 if ((ttl = *(int *)cmsg_datap) == VRRP_IP_TTL) 3423 break; 3424 3425 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid " 3426 "ttl %d", pif->vvi_ifname, ttl); 3427 return (VRRP_EINVAL); 3428 case IPV6_PKTINFO: 3429 /* LINTED E_BAD_PTR_CAST_ALIGN */ 3430 pktinfop = (struct in6_pktinfo *)cmsg_datap; 3431 if (IN6_ARE_ADDR_EQUAL(&pktinfop->ipi6_addr, 3432 &vrrp_muladdr6.in6.sin6_addr)) { 3433 break; 3434 } 3435 3436 vrrp_log(VRRP_ERR, "vrrpd_process_adv_v4(%s): invalid " 3437 "destination %s", pif->vvi_ifname, 3438 inet_ntop(pif->vvi_af, &pktinfop->ipi6_addr, abuf, 3439 sizeof (abuf))); 3440 return (VRRP_EINVAL); 3441 } 3442 } 3443 3444 return (vrrpd_process_vrrp(pif, msgp->msg_iov->iov_base, len, 3445 msgp->msg_name)); 3446 } 3447 3448 /* ARGSUSED */ 3449 static void 3450 vrrpd_sock_handler(iu_eh_t *eh, int s, short events, iu_event_id_t id, 3451 void *arg) 3452 { 3453 struct msghdr msg; 3454 vrrp_addr_t from; 3455 uint64_t buf[(IP_MAXPACKET + 1)/8]; 3456 uint64_t ancillary_data[(IP_MAXPACKET + 1)/8]; 3457 vrrp_intf_t *pif = arg; 3458 int af = pif->vvi_af; 3459 int len; 3460 struct iovec iov; 3461 3462 vrrp_log(VRRP_DBG1, "vrrpd_sock_handler(%s)", pif->vvi_ifname); 3463 3464 msg.msg_name = (struct sockaddr *)&from; 3465 msg.msg_namelen = (af == AF_INET) ? sizeof (struct sockaddr_in) : 3466 sizeof (struct sockaddr_in6); 3467 iov.iov_base = (char *)buf; 3468 iov.iov_len = sizeof (buf); 3469 msg.msg_iov = &iov; 3470 msg.msg_iovlen = 1; 3471 msg.msg_control = ancillary_data; 3472 msg.msg_controllen = sizeof (ancillary_data); 3473 3474 if ((len = recvmsg(s, &msg, 0)) == -1) { 3475 vrrp_log(VRRP_ERR, "vrrpd_sock_handler() recvmsg(%s) " 3476 "failed: %s", pif->vvi_ifname, strerror(errno)); 3477 return; 3478 } 3479 3480 /* 3481 * Ignore packets whose control buffers that don't fit 3482 */ 3483 if (msg.msg_flags & (MSG_TRUNC|MSG_CTRUNC)) { 3484 vrrp_log(VRRP_ERR, "vrrpd_sock_handler() %s buffer not " 3485 "big enough", pif->vvi_ifname); 3486 return; 3487 } 3488 3489 if (af == AF_INET) 3490 (void) vrrpd_process_adv_v4(pif, &msg, len); 3491 else 3492 (void) vrrpd_process_adv_v6(pif, &msg, len); 3493 } 3494 3495 /* 3496 * Create the socket which is used to receive VRRP packets. Virtual routers 3497 * that configured on the same physical interface share the same socket. 3498 */ 3499 static vrrp_err_t 3500 vrrpd_init_rxsock(vrrp_vr_t *vr) 3501 { 3502 vrrp_intf_t *pif; /* Physical interface used to recv packets */ 3503 struct group_req greq; 3504 struct sockaddr_storage *muladdr; 3505 int af, proto; 3506 int on = 1; 3507 vrrp_err_t err = VRRP_SUCCESS; 3508 3509 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s)", vr->vvr_conf.vvc_name); 3510 3511 /* 3512 * The RX sockets may already been initialized. 3513 */ 3514 if ((pif = vr->vvr_pif) != NULL) { 3515 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) already done on %s", 3516 vr->vvr_conf.vvc_name, pif->vvi_ifname); 3517 assert(pif->vvi_sockfd != -1); 3518 return (VRRP_SUCCESS); 3519 } 3520 3521 /* 3522 * If no IP addresses configured on the primary interface, 3523 * return failure. 3524 */ 3525 af = vr->vvr_conf.vvc_af; 3526 pif = vrrpd_lookup_if(vr->vvr_conf.vvc_link, af); 3527 if (pif == NULL) { 3528 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): no IP address " 3529 "over %s/%s", vr->vvr_conf.vvc_name, 3530 vr->vvr_conf.vvc_link, af_str(af)); 3531 return (VRRP_ENOPRIM); 3532 } 3533 3534 proto = (af == AF_INET ? IPPROTO_IP : IPPROTO_IPV6); 3535 if (pif->vvi_nvr++ == 0) { 3536 assert(pif->vvi_sockfd < 0); 3537 pif->vvi_sockfd = socket(af, SOCK_RAW, IPPROTO_VRRP); 3538 if (pif->vvi_sockfd < 0) { 3539 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): socket() " 3540 "failed %s", vr->vvr_conf.vvc_name, 3541 strerror(errno)); 3542 err = VRRP_ESYS; 3543 goto done; 3544 } 3545 3546 /* 3547 * Join the multicast group to receive VRRP packets. 3548 */ 3549 if (af == AF_INET) { 3550 muladdr = (struct sockaddr_storage *) 3551 (void *)&vrrp_muladdr4; 3552 } else { 3553 muladdr = (struct sockaddr_storage *) 3554 (void *)&vrrp_muladdr6; 3555 } 3556 3557 greq.gr_interface = pif->vvi_ifindex; 3558 (void) memcpy(&greq.gr_group, muladdr, 3559 sizeof (struct sockaddr_storage)); 3560 if (setsockopt(pif->vvi_sockfd, proto, MCAST_JOIN_GROUP, &greq, 3561 sizeof (struct group_req)) < 0) { 3562 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): " 3563 "join_group(%d) failed: %s", vr->vvr_conf.vvc_name, 3564 pif->vvi_ifindex, strerror(errno)); 3565 err = VRRP_ESYS; 3566 goto done; 3567 } else { 3568 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s): " 3569 "join_group(%d) succeeded", vr->vvr_conf.vvc_name, 3570 pif->vvi_ifindex); 3571 } 3572 3573 /* 3574 * Unlike IPv4, the IPv6 raw socket does not pass the IP header 3575 * when a packet is received. Call setsockopt() to receive such 3576 * information. 3577 */ 3578 if (af == AF_INET6) { 3579 /* 3580 * Enable receipt of destination address info 3581 */ 3582 if (setsockopt(pif->vvi_sockfd, proto, IPV6_RECVPKTINFO, 3583 (char *)&on, sizeof (on)) < 0) { 3584 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): " 3585 "enable recvpktinfo failed: %s", 3586 vr->vvr_conf.vvc_name, strerror(errno)); 3587 err = VRRP_ESYS; 3588 goto done; 3589 } 3590 3591 /* 3592 * Enable receipt of hoplimit info 3593 */ 3594 if (setsockopt(pif->vvi_sockfd, proto, 3595 IPV6_RECVHOPLIMIT, (char *)&on, sizeof (on)) < 0) { 3596 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): " 3597 "enable recvhoplimit failed: %s", 3598 vr->vvr_conf.vvc_name, strerror(errno)); 3599 err = VRRP_ESYS; 3600 goto done; 3601 } 3602 } 3603 3604 if ((pif->vvi_eid = iu_register_event(vrrpd_eh, 3605 pif->vvi_sockfd, POLLIN, vrrpd_sock_handler, pif)) == -1) { 3606 vrrp_log(VRRP_ERR, "vrrpd_init_rxsock(%s): " 3607 "iu_register_event() failed", 3608 vr->vvr_conf.vvc_name); 3609 err = VRRP_ESYS; 3610 goto done; 3611 } 3612 } else { 3613 vrrp_log(VRRP_DBG1, "vrrpd_init_rxsock(%s) over %s already " 3614 "done %d", vr->vvr_conf.vvc_name, pif->vvi_ifname, 3615 pif->vvi_nvr); 3616 assert(IS_PRIMARY_INTF(pif)); 3617 } 3618 3619 done: 3620 vr->vvr_pif = pif; 3621 if (err != VRRP_SUCCESS) 3622 vrrpd_fini_rxsock(vr); 3623 3624 return (err); 3625 } 3626 3627 /* 3628 * Delete the socket which is used to receive VRRP packets for the given 3629 * VRRP router. Since all virtual routers that configured on the same 3630 * physical interface share the same socket, the socket is only closed 3631 * when the last VRRP router share this socket is deleted. 3632 */ 3633 static void 3634 vrrpd_fini_rxsock(vrrp_vr_t *vr) 3635 { 3636 vrrp_intf_t *pif = vr->vvr_pif; 3637 3638 vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s)", vr->vvr_conf.vvc_name); 3639 3640 if (pif == NULL) 3641 return; 3642 3643 if (--pif->vvi_nvr == 0) { 3644 vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s", 3645 vr->vvr_conf.vvc_name, pif->vvi_ifname); 3646 (void) iu_unregister_event(vrrpd_eh, pif->vvi_eid, NULL); 3647 (void) close(pif->vvi_sockfd); 3648 pif->vvi_pip = NULL; 3649 pif->vvi_sockfd = -1; 3650 pif->vvi_eid = -1; 3651 } else { 3652 vrrp_log(VRRP_DBG1, "vrrpd_fini_rxsock(%s) over %s %d", 3653 vr->vvr_conf.vvc_name, pif->vvi_ifname, pif->vvi_nvr); 3654 } 3655 vr->vvr_pif = NULL; 3656 } 3657 3658 /* 3659 * Create the socket which is used to send VRRP packets. Further, set 3660 * the IFF_NOACCEPT flag based on the VRRP router's accept mode. 3661 */ 3662 static vrrp_err_t 3663 vrrpd_init_txsock(vrrp_vr_t *vr) 3664 { 3665 int af; 3666 vrrp_intf_t *vif; 3667 vrrp_err_t err; 3668 3669 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s)", vr->vvr_conf.vvc_name); 3670 3671 if (vr->vvr_vif != NULL) { 3672 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) already done on %s", 3673 vr->vvr_conf.vvc_name, vr->vvr_vif->vvi_ifname); 3674 return (VRRP_SUCCESS); 3675 } 3676 3677 af = vr->vvr_conf.vvc_af; 3678 if ((vif = vrrpd_lookup_if(vr->vvr_vnic, af)) == NULL) { 3679 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock(%s) no IP address over " 3680 "%s/%s", vr->vvr_conf.vvc_name, vr->vvr_vnic, af_str(af)); 3681 return (VRRP_ENOVIRT); 3682 } 3683 3684 vr->vvr_vif = vif; 3685 if (vr->vvr_conf.vvc_af == AF_INET) 3686 err = vrrpd_init_txsock_v4(vr); 3687 else 3688 err = vrrpd_init_txsock_v6(vr); 3689 3690 if (err != VRRP_SUCCESS) 3691 goto done; 3692 3693 /* 3694 * The interface should start with IFF_NOACCEPT flag not set, only 3695 * call this function when the VRRP router requires IFF_NOACCEPT. 3696 */ 3697 if (!vr->vvr_conf.vvc_accept) 3698 err = vrrpd_set_noaccept(vr, _B_TRUE); 3699 3700 done: 3701 if (err != VRRP_SUCCESS) { 3702 (void) close(vif->vvi_sockfd); 3703 vif->vvi_sockfd = -1; 3704 vr->vvr_vif = NULL; 3705 } 3706 3707 return (err); 3708 } 3709 3710 /* 3711 * Create the IPv4 socket which is used to send VRRP packets. Note that 3712 * the destination MAC address of VRRP advertisement must be the virtual 3713 * MAC address, so we specify the output interface to be the specific VNIC. 3714 */ 3715 static vrrp_err_t 3716 vrrpd_init_txsock_v4(vrrp_vr_t *vr) 3717 { 3718 vrrp_intf_t *vif; /* VNIC interface used to send packets */ 3719 vrrp_ip_t *vip; /* The first IP over the VNIC */ 3720 int on = 1; 3721 char off = 0; 3722 vrrp_err_t err = VRRP_SUCCESS; 3723 char abuf[INET6_ADDRSTRLEN]; 3724 3725 vif = vr->vvr_vif; 3726 assert(vr->vvr_conf.vvc_af == AF_INET); 3727 assert(vif != NULL); 3728 3729 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) over %s", 3730 vr->vvr_conf.vvc_name, vif->vvi_ifname); 3731 3732 if (vif->vvi_sockfd != -1) { 3733 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v4(%s) already done " 3734 "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname); 3735 return (VRRP_SUCCESS); 3736 } 3737 3738 vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP); 3739 if (vif->vvi_sockfd < 0) { 3740 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): socket() " 3741 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno)); 3742 err = VRRP_ESYS; 3743 goto done; 3744 } 3745 3746 /* 3747 * Include the IP header, so that we can specify the IP address/ttl. 3748 */ 3749 if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_HDRINCL, (char *)&on, 3750 sizeof (on)) < 0) { 3751 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): ip_hdrincl " 3752 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno)); 3753 err = VRRP_ESYS; 3754 goto done; 3755 } 3756 3757 /* 3758 * Disable multicast loopback. 3759 */ 3760 if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_LOOP, &off, 3761 sizeof (char)) == -1) { 3762 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): disable " 3763 "multicast_loop failed: %s", vr->vvr_conf.vvc_name, 3764 strerror(errno)); 3765 err = VRRP_ESYS; 3766 goto done; 3767 } 3768 3769 vip = TAILQ_FIRST(&vif->vvi_iplist); 3770 /* LINTED E_CONSTANT_CONDITION */ 3771 VRRPADDR2STR(vif->vvi_af, &vip->vip_addr, abuf, INET6_ADDRSTRLEN, 3772 _B_FALSE); 3773 3774 /* 3775 * Set the output interface to send the VRRP packet. 3776 */ 3777 if (setsockopt(vif->vvi_sockfd, IPPROTO_IP, IP_MULTICAST_IF, 3778 &vip->vip_addr.in4.sin_addr, sizeof (struct in_addr)) < 0) { 3779 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v4(%s): multcast_if(%s) " 3780 "failed: %s", vr->vvr_conf.vvc_name, abuf, strerror(errno)); 3781 err = VRRP_ESYS; 3782 } else { 3783 vrrp_log(VRRP_DBG0, "vrrpd_init_txsock_v4(%s): multcast_if(%s) " 3784 "succeed", vr->vvr_conf.vvc_name, abuf); 3785 } 3786 3787 done: 3788 if (err != VRRP_SUCCESS) { 3789 (void) close(vif->vvi_sockfd); 3790 vif->vvi_sockfd = -1; 3791 } 3792 3793 return (err); 3794 } 3795 3796 /* 3797 * Create the IPv6 socket which is used to send VRRP packets. Note that 3798 * the destination must be the virtual MAC address, so we specify the output 3799 * interface to be the specific VNIC. 3800 */ 3801 static vrrp_err_t 3802 vrrpd_init_txsock_v6(vrrp_vr_t *vr) 3803 { 3804 vrrp_intf_t *vif; /* VNIC interface used to send packets */ 3805 int off = 0, ttl = VRRP_IP_TTL; 3806 vrrp_err_t err = VRRP_SUCCESS; 3807 3808 vif = vr->vvr_vif; 3809 assert(vr->vvr_conf.vvc_af == AF_INET6); 3810 assert(vif != NULL); 3811 3812 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) over %s", 3813 vr->vvr_conf.vvc_name, vif->vvi_ifname); 3814 3815 if (vif->vvi_sockfd != -1) { 3816 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s) already done " 3817 "over %s", vr->vvr_conf.vvc_name, vif->vvi_ifname); 3818 return (VRRP_SUCCESS); 3819 } 3820 3821 vif->vvi_sockfd = socket(vif->vvi_af, SOCK_RAW, IPPROTO_VRRP); 3822 if (vif->vvi_sockfd < 0) { 3823 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): socket() " 3824 "failed: %s", vr->vvr_conf.vvc_name, strerror(errno)); 3825 err = VRRP_ESYS; 3826 goto done; 3827 } 3828 3829 /* 3830 * Disable multicast loopback. 3831 */ 3832 if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_LOOP, 3833 &off, sizeof (int)) == -1) { 3834 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): disable " 3835 "multicast_loop failed: %s", vr->vvr_conf.vvc_name, 3836 strerror(errno)); 3837 err = VRRP_ESYS; 3838 goto done; 3839 } 3840 3841 /* 3842 * Set the multicast TTL. 3843 */ 3844 if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_HOPS, 3845 &ttl, sizeof (int)) == -1) { 3846 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): enable " 3847 "multicast_hops %d failed: %s", vr->vvr_conf.vvc_name, 3848 ttl, strerror(errno)); 3849 err = VRRP_ESYS; 3850 goto done; 3851 } 3852 3853 /* 3854 * Set the output interface to send the VRRP packet. 3855 */ 3856 if (setsockopt(vif->vvi_sockfd, IPPROTO_IPV6, IPV6_MULTICAST_IF, 3857 &vif->vvi_ifindex, sizeof (uint32_t)) < 0) { 3858 vrrp_log(VRRP_ERR, "vrrpd_init_txsock_v6(%s): multicast_if(%d) " 3859 "failed: %s", vr->vvr_conf.vvc_name, vif->vvi_ifindex, 3860 strerror(errno)); 3861 err = VRRP_ESYS; 3862 } else { 3863 vrrp_log(VRRP_DBG1, "vrrpd_init_txsock_v6(%s): multicast_if(%d)" 3864 " succeed", vr->vvr_conf.vvc_name, vif->vvi_ifindex); 3865 } 3866 3867 done: 3868 if (err != VRRP_SUCCESS) { 3869 (void) close(vif->vvi_sockfd); 3870 vif->vvi_sockfd = -1; 3871 } 3872 3873 return (err); 3874 } 3875 3876 /* 3877 * Delete the socket which is used to send VRRP packets. Further, clear 3878 * the IFF_NOACCEPT flag based on the VRRP router's accept mode. 3879 */ 3880 static void 3881 vrrpd_fini_txsock(vrrp_vr_t *vr) 3882 { 3883 vrrp_intf_t *vif = vr->vvr_vif; 3884 3885 vrrp_log(VRRP_DBG1, "vrrpd_fini_txsock(%s)", vr->vvr_conf.vvc_name); 3886 3887 if (vif != NULL) { 3888 if (!vr->vvr_conf.vvc_accept) 3889 (void) vrrpd_set_noaccept(vr, _B_FALSE); 3890 (void) close(vif->vvi_sockfd); 3891 vif->vvi_sockfd = -1; 3892 vr->vvr_vif = NULL; 3893 } 3894 } 3895 3896 /* 3897 * Given the the pseudo header cksum value (sum), caculate the cksum with 3898 * the rest of VRRP packet. 3899 */ 3900 static uint16_t 3901 in_cksum(int sum, uint16_t plen, void *p) 3902 { 3903 int nleft; 3904 uint16_t *w; 3905 uint16_t answer; 3906 uint16_t odd_byte = 0; 3907 3908 nleft = plen; 3909 w = (uint16_t *)p; 3910 while (nleft > 1) { 3911 sum += *w++; 3912 nleft -= 2; 3913 } 3914 3915 /* mop up an odd byte, if necessary */ 3916 if (nleft == 1) { 3917 *(uchar_t *)(&odd_byte) = *(uchar_t *)w; 3918 sum += odd_byte; 3919 } 3920 3921 /* 3922 * add back carry outs from top 16 bits to low 16 bits 3923 */ 3924 sum = (sum >> 16) + (sum & 0xffff); /* add hi 16 to low 16 */ 3925 sum += (sum >> 16); /* add carry */ 3926 answer = ~sum; /* truncate to 16 bits */ 3927 return (answer == 0 ? ~0 : answer); 3928 } 3929 3930 /* Pseudo header for v4 */ 3931 struct pshv4 { 3932 struct in_addr ph4_src; 3933 struct in_addr ph4_dst; 3934 uint8_t ph4_zero; /* always zero */ 3935 uint8_t ph4_protocol; /* protocol used, IPPROTO_VRRP */ 3936 uint16_t ph4_len; /* VRRP payload len */ 3937 }; 3938 3939 /* 3940 * Checksum routine for VRRP checksum. Note that plen is the upper-layer 3941 * packet length (in the host byte order), and both IP source and destination 3942 * addresses are in the network byte order. 3943 */ 3944 static uint16_t 3945 vrrp_cksum4(struct in_addr *src, struct in_addr *dst, uint16_t plen, 3946 vrrp_pkt_t *vp) 3947 { 3948 struct pshv4 ph4; 3949 int nleft; 3950 uint16_t *w; 3951 int sum = 0; 3952 3953 ph4.ph4_src = *src; 3954 ph4.ph4_dst = *dst; 3955 ph4.ph4_zero = 0; 3956 ph4.ph4_protocol = IPPROTO_VRRP; 3957 ph4.ph4_len = htons(plen); 3958 3959 /* 3960 * Our algorithm is simple, using a 32 bit accumulator (sum), 3961 * we add sequential 16 bit words to it, and at the end, fold 3962 * back all the carry bits from the top 16 bits into the lower 3963 * 16 bits. 3964 */ 3965 nleft = sizeof (struct pshv4); 3966 w = (uint16_t *)&ph4; 3967 while (nleft > 0) { 3968 sum += *w++; 3969 nleft -= 2; 3970 } 3971 3972 return (in_cksum(sum, plen, vp)); 3973 } 3974 3975 /* Pseudo header for v6 */ 3976 struct pshv6 { 3977 struct in6_addr ph6_src; 3978 struct in6_addr ph6_dst; 3979 uint32_t ph6_len; /* VRRP payload len */ 3980 uint32_t ph6_zero : 24, 3981 ph6_protocol : 8; /* protocol used, IPPROTO_VRRP */ 3982 }; 3983 3984 /* 3985 * Checksum routine for VRRP checksum. Note that plen is the upper-layer 3986 * packet length (in the host byte order), and both IP source and destination 3987 * addresses are in the network byte order. 3988 */ 3989 static uint16_t 3990 vrrp_cksum6(struct in6_addr *src, struct in6_addr *dst, uint16_t plen, 3991 vrrp_pkt_t *vp) 3992 { 3993 struct pshv6 ph6; 3994 int nleft; 3995 uint16_t *w; 3996 int sum = 0; 3997 3998 ph6.ph6_src = *src; 3999 ph6.ph6_dst = *dst; 4000 ph6.ph6_zero = 0; 4001 ph6.ph6_protocol = IPPROTO_VRRP; 4002 ph6.ph6_len = htonl((uint32_t)plen); 4003 4004 /* 4005 * Our algorithm is simple, using a 32 bit accumulator (sum), 4006 * we add sequential 16 bit words to it, and at the end, fold 4007 * back all the carry bits from the top 16 bits into the lower 4008 * 16 bits. 4009 */ 4010 nleft = sizeof (struct pshv6); 4011 w = (uint16_t *)&ph6; 4012 while (nleft > 0) { 4013 sum += *w++; 4014 nleft -= 2; 4015 } 4016 4017 return (in_cksum(sum, plen, vp)); 4018 } 4019 4020 vrrp_err_t 4021 vrrpd_state_i2m(vrrp_vr_t *vr) 4022 { 4023 vrrp_err_t err; 4024 4025 vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s)", vr->vvr_conf.vvc_name); 4026 4027 vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_MASTER, vr); 4028 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS) 4029 return (err); 4030 4031 (void) vrrpd_send_adv(vr, _B_FALSE); 4032 4033 vr->vvr_err = VRRP_SUCCESS; 4034 vr->vvr_timeout = vr->vvr_conf.vvc_adver_int; 4035 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq, 4036 vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) { 4037 vrrp_log(VRRP_ERR, "vrrpd_state_i2m(): unable to start timer"); 4038 return (VRRP_ESYS); 4039 } else { 4040 vrrp_log(VRRP_DBG1, "vrrpd_state_i2m(%s): start " 4041 "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name, 4042 vr->vvr_timeout); 4043 } 4044 return (VRRP_SUCCESS); 4045 } 4046 4047 vrrp_err_t 4048 vrrpd_state_i2b(vrrp_vr_t *vr) 4049 { 4050 vrrp_err_t err; 4051 4052 vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s)", vr->vvr_conf.vvc_name); 4053 4054 vrrpd_state_trans(VRRP_STATE_INIT, VRRP_STATE_BACKUP, vr); 4055 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS) 4056 return (err); 4057 4058 /* 4059 * Reinitialize the Master advertisement interval to be the configured 4060 * value. 4061 */ 4062 vr->vvr_err = VRRP_SUCCESS; 4063 vr->vvr_master_adver_int = vr->vvr_conf.vvc_adver_int; 4064 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr); 4065 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq, 4066 vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) { 4067 vrrp_log(VRRP_ERR, "vrrpd_state_i2b(): unable to set timer"); 4068 return (VRRP_ESYS); 4069 } else { 4070 vrrp_log(VRRP_DBG1, "vrrpd_state_i2b(%s): start " 4071 "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name, 4072 vr->vvr_timeout); 4073 } 4074 return (VRRP_SUCCESS); 4075 } 4076 4077 void 4078 vrrpd_state_m2i(vrrp_vr_t *vr) 4079 { 4080 vrrp_log(VRRP_DBG1, "vrrpd_state_m2i(%s)", vr->vvr_conf.vvc_name); 4081 4082 vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_INIT, vr); 4083 (void) vrrpd_virtualip_update(vr, _B_TRUE); 4084 bzero(&vr->vvr_peer, sizeof (vrrp_peer_t)); 4085 (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL); 4086 } 4087 4088 void 4089 vrrpd_state_b2i(vrrp_vr_t *vr) 4090 { 4091 vrrp_log(VRRP_DBG1, "vrrpd_state_b2i(%s)", vr->vvr_conf.vvc_name); 4092 4093 bzero(&vr->vvr_peer, sizeof (vrrp_peer_t)); 4094 (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL); 4095 vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_INIT, vr); 4096 (void) vrrpd_virtualip_update(vr, _B_TRUE); 4097 } 4098 4099 /* ARGSUSED */ 4100 static void 4101 vrrp_b2m_timeout(iu_tq_t *tq, void *arg) 4102 { 4103 vrrp_vr_t *vr = (vrrp_vr_t *)arg; 4104 4105 vrrp_log(VRRP_DBG1, "vrrp_b2m_timeout(%s)", vr->vvr_conf.vvc_name); 4106 (void) vrrpd_state_b2m(vr); 4107 } 4108 4109 /* ARGSUSED */ 4110 static void 4111 vrrp_adv_timeout(iu_tq_t *tq, void *arg) 4112 { 4113 vrrp_vr_t *vr = (vrrp_vr_t *)arg; 4114 4115 vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s)", vr->vvr_conf.vvc_name); 4116 4117 (void) vrrpd_send_adv(vr, _B_FALSE); 4118 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq, 4119 vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) { 4120 vrrp_log(VRRP_ERR, "vrrp_adv_timeout(%s): start timer failed", 4121 vr->vvr_conf.vvc_name); 4122 } else { 4123 vrrp_log(VRRP_DBG1, "vrrp_adv_timeout(%s): start " 4124 "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name, 4125 vr->vvr_timeout); 4126 } 4127 } 4128 4129 vrrp_err_t 4130 vrrpd_state_b2m(vrrp_vr_t *vr) 4131 { 4132 vrrp_err_t err; 4133 4134 vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s)", vr->vvr_conf.vvc_name); 4135 4136 vrrpd_state_trans(VRRP_STATE_BACKUP, VRRP_STATE_MASTER, vr); 4137 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS) 4138 return (err); 4139 (void) vrrpd_send_adv(vr, _B_FALSE); 4140 4141 vr->vvr_timeout = vr->vvr_conf.vvc_adver_int; 4142 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq, 4143 vr->vvr_timeout, vrrp_adv_timeout, vr)) == -1) { 4144 vrrp_log(VRRP_ERR, "vrrpd_state_b2m(%s): start timer failed", 4145 vr->vvr_conf.vvc_name); 4146 return (VRRP_ESYS); 4147 } else { 4148 vrrp_log(VRRP_DBG1, "vrrpd_state_b2m(%s): start " 4149 "vrrp_adv_timeout(%d)", vr->vvr_conf.vvc_name, 4150 vr->vvr_timeout); 4151 } 4152 return (VRRP_SUCCESS); 4153 } 4154 4155 vrrp_err_t 4156 vrrpd_state_m2b(vrrp_vr_t *vr) 4157 { 4158 vrrp_err_t err; 4159 4160 vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s)", vr->vvr_conf.vvc_name); 4161 4162 vrrpd_state_trans(VRRP_STATE_MASTER, VRRP_STATE_BACKUP, vr); 4163 if ((err = vrrpd_virtualip_update(vr, _B_FALSE)) != VRRP_SUCCESS) 4164 return (err); 4165 4166 /* 4167 * Cancel the adver_timer. 4168 */ 4169 vr->vvr_master_adver_int = vr->vvr_peer_adver_int; 4170 (void) iu_cancel_timer(vrrpd_timerq, vr->vvr_timer_id, NULL); 4171 vr->vvr_timeout = MASTER_DOWN_INTERVAL_VR(vr); 4172 if ((vr->vvr_timer_id = iu_schedule_timer_ms(vrrpd_timerq, 4173 vr->vvr_timeout, vrrp_b2m_timeout, vr)) == -1) { 4174 vrrp_log(VRRP_ERR, "vrrpd_state_m2b(%s): start timer failed", 4175 vr->vvr_conf.vvc_name); 4176 } else { 4177 vrrp_log(VRRP_DBG1, "vrrpd_state_m2b(%s) start " 4178 "vrrp_b2m_timeout(%d)", vr->vvr_conf.vvc_name, 4179 vr->vvr_timeout); 4180 } 4181 return (VRRP_SUCCESS); 4182 } 4183 4184 /* 4185 * Set the IFF_NOACCESS flag on the VNIC interface of the VRRP router 4186 * based on its access mode. 4187 */ 4188 static vrrp_err_t 4189 vrrpd_set_noaccept(vrrp_vr_t *vr, boolean_t on) 4190 { 4191 vrrp_intf_t *vif = vr->vvr_vif; 4192 uint64_t curr_flags; 4193 struct lifreq lifr; 4194 int s; 4195 4196 vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)", 4197 vr->vvr_conf.vvc_name, on ? "on" : "off"); 4198 4199 /* 4200 * Possibly no virtual address exists on this VRRP router yet. 4201 */ 4202 if (vif == NULL) 4203 return (VRRP_SUCCESS); 4204 4205 vrrp_log(VRRP_DBG1, "vrrpd_set_noaccept(%s, %s)", 4206 vif->vvi_ifname, vrrp_state2str(vr->vvr_state)); 4207 4208 s = (vif->vvi_af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd; 4209 (void) strncpy(lifr.lifr_name, vif->vvi_ifname, 4210 sizeof (lifr.lifr_name)); 4211 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 4212 if (errno != ENXIO && errno != ENOENT) { 4213 vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(): " 4214 "SIOCGLIFFLAGS on %s failed: %s", 4215 vif->vvi_ifname, strerror(errno)); 4216 } 4217 return (VRRP_ESYS); 4218 } 4219 4220 curr_flags = lifr.lifr_flags; 4221 if (on) 4222 lifr.lifr_flags |= IFF_NOACCEPT; 4223 else 4224 lifr.lifr_flags &= ~IFF_NOACCEPT; 4225 4226 if (lifr.lifr_flags != curr_flags) { 4227 if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 4228 if (errno != ENXIO && errno != ENOENT) { 4229 vrrp_log(VRRP_ERR, "vrrpd_set_noaccept(%s): " 4230 "SIOCSLIFFLAGS 0x%llx on %s failed: %s", 4231 on ? "no_accept" : "accept", 4232 lifr.lifr_flags, vif->vvi_ifname, 4233 strerror(errno)); 4234 } 4235 return (VRRP_ESYS); 4236 } 4237 } 4238 return (VRRP_SUCCESS); 4239 } 4240 4241 static vrrp_err_t 4242 vrrpd_virtualip_updateone(vrrp_intf_t *vif, vrrp_ip_t *ip, boolean_t checkonly) 4243 { 4244 vrrp_state_t state = vif->vvi_vr_state; 4245 struct lifreq lifr; 4246 char abuf[INET6_ADDRSTRLEN]; 4247 int af = vif->vvi_af; 4248 uint64_t curr_flags; 4249 int s; 4250 4251 assert(IS_VIRTUAL_INTF(vif)); 4252 4253 /* LINTED E_CONSTANT_CONDITION */ 4254 VRRPADDR2STR(af, &ip->vip_addr, abuf, INET6_ADDRSTRLEN, _B_FALSE); 4255 vrrp_log(VRRP_DBG1, "vrrpd_virtualip_updateone(%s, %s%s)", 4256 vif->vvi_ifname, abuf, checkonly ? ", checkonly" : ""); 4257 4258 s = (af == AF_INET) ? vrrpd_ctlsock_fd : vrrpd_ctlsock6_fd; 4259 (void) strncpy(lifr.lifr_name, ip->vip_lifname, 4260 sizeof (lifr.lifr_name)); 4261 if (ioctl(s, SIOCGLIFFLAGS, (caddr_t)&lifr) < 0) { 4262 if (errno != ENXIO && errno != ENOENT) { 4263 vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s): " 4264 "SIOCGLIFFLAGS on %s/%s failed: %s", 4265 vif->vvi_ifname, lifr.lifr_name, abuf, 4266 strerror(errno)); 4267 } 4268 return (VRRP_ESYS); 4269 } 4270 4271 curr_flags = lifr.lifr_flags; 4272 if (state == VRRP_STATE_MASTER) 4273 lifr.lifr_flags |= IFF_UP; 4274 else 4275 lifr.lifr_flags &= ~IFF_UP; 4276 4277 if (lifr.lifr_flags == curr_flags) 4278 return (VRRP_SUCCESS); 4279 4280 if (checkonly) { 4281 vrrp_log(VRRP_ERR, "VRRP virtual IP %s/%s was brought %s", 4282 ip->vip_lifname, abuf, 4283 state == VRRP_STATE_MASTER ? "down" : "up"); 4284 return (VRRP_ESYS); 4285 } else if (ioctl(s, SIOCSLIFFLAGS, (caddr_t)&lifr) < 0) { 4286 if (errno != ENXIO && errno != ENOENT) { 4287 vrrp_log(VRRP_ERR, "vrrpd_virtualip_updateone(%s, %s): " 4288 "bring %s %s/%s failed: %s", 4289 vif->vvi_ifname, vrrp_state2str(state), 4290 state == VRRP_STATE_MASTER ? "up" : "down", 4291 ip->vip_lifname, abuf, strerror(errno)); 4292 } 4293 return (VRRP_ESYS); 4294 } 4295 return (VRRP_SUCCESS); 4296 } 4297 4298 static vrrp_err_t 4299 vrrpd_virtualip_update(vrrp_vr_t *vr, boolean_t checkonly) 4300 { 4301 vrrp_state_t state; 4302 vrrp_intf_t *vif = vr->vvr_vif; 4303 vrrp_ip_t *ip, *nextip; 4304 char abuf[INET6_ADDRSTRLEN]; 4305 vrrp_err_t err; 4306 4307 vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update(%s, %s, %s)%s", 4308 vr->vvr_conf.vvc_name, vrrp_state2str(vr->vvr_state), 4309 vif->vvi_ifname, checkonly ? " checkonly" : ""); 4310 4311 state = vr->vvr_state; 4312 assert(vif != NULL); 4313 assert(IS_VIRTUAL_INTF(vif)); 4314 assert(vif->vvi_vr_state != state); 4315 vif->vvi_vr_state = state; 4316 for (ip = TAILQ_FIRST(&vif->vvi_iplist); ip != NULL; ip = nextip) { 4317 nextip = TAILQ_NEXT(ip, vip_next); 4318 err = vrrpd_virtualip_updateone(vif, ip, _B_FALSE); 4319 if (!checkonly && err != VRRP_SUCCESS) { 4320 /* LINTED E_CONSTANT_CONDITION */ 4321 VRRPADDR2STR(vif->vvi_af, &ip->vip_addr, abuf, 4322 INET6_ADDRSTRLEN, _B_FALSE); 4323 vrrp_log(VRRP_DBG1, "vrrpd_virtualip_update() update " 4324 "%s over %s failed", abuf, vif->vvi_ifname); 4325 vrrpd_delete_ip(vif, ip); 4326 } 4327 } 4328 4329 /* 4330 * The IP address is deleted when it is failed to be brought 4331 * up. If no IP addresses are left, delete this interface. 4332 */ 4333 if (!checkonly && TAILQ_EMPTY(&vif->vvi_iplist)) { 4334 vrrp_log(VRRP_DBG0, "vrrpd_virtualip_update(): " 4335 "no IP left over %s", vif->vvi_ifname); 4336 vrrpd_delete_if(vif, _B_TRUE); 4337 return (VRRP_ENOVIRT); 4338 } 4339 return (VRRP_SUCCESS); 4340 } 4341 4342 void 4343 vrrpd_state_trans(vrrp_state_t prev_s, vrrp_state_t s, vrrp_vr_t *vr) 4344 { 4345 vrrp_log(VRRP_DBG1, "vrrpd_state_trans(%s): %s --> %s", 4346 vr->vvr_conf.vvc_name, vrrp_state2str(prev_s), vrrp_state2str(s)); 4347 4348 assert(vr->vvr_state == prev_s); 4349 vr->vvr_state = s; 4350 vr->vvr_prev_state = prev_s; 4351 (void) gettimeofday(&vr->vvr_st_time, NULL); 4352 (void) vrrpd_post_event(vr->vvr_conf.vvc_name, prev_s, s); 4353 } 4354 4355 static int 4356 vrrpd_post_event(const char *name, vrrp_state_t prev_st, vrrp_state_t st) 4357 { 4358 sysevent_id_t eid; 4359 nvlist_t *nvl = NULL; 4360 4361 /* 4362 * sysevent is not supported in the non-global zone 4363 */ 4364 if (getzoneid() != GLOBAL_ZONEID) 4365 return (0); 4366 4367 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0) 4368 goto failed; 4369 4370 if (nvlist_add_uint8(nvl, VRRP_EVENT_VERSION, 4371 VRRP_EVENT_CUR_VERSION) != 0) 4372 goto failed; 4373 4374 if (nvlist_add_string(nvl, VRRP_EVENT_ROUTER_NAME, name) != 0) 4375 goto failed; 4376 4377 if (nvlist_add_uint8(nvl, VRRP_EVENT_STATE, st) != 0) 4378 goto failed; 4379 4380 if (nvlist_add_uint8(nvl, VRRP_EVENT_PREV_STATE, prev_st) != 0) 4381 goto failed; 4382 4383 if (sysevent_post_event(EC_VRRP, ESC_VRRP_STATE_CHANGE, 4384 SUNW_VENDOR, VRRP_EVENT_PUBLISHER, nvl, &eid) == 0) { 4385 nvlist_free(nvl); 4386 return (0); 4387 } 4388 4389 failed: 4390 vrrp_log(VRRP_ERR, "vrrpd_post_event(): `state change (%s --> %s)' " 4391 "sysevent posting failed: %s", vrrp_state2str(prev_st), 4392 vrrp_state2str(st), strerror(errno)); 4393 4394 if (nvl != NULL) 4395 nvlist_free(nvl); 4396 return (-1); 4397 } 4398 4399 /* 4400 * timeval processing functions 4401 */ 4402 static int 4403 timeval_to_milli(struct timeval tv) 4404 { 4405 return ((int)(tv.tv_sec * 1000 + tv.tv_usec / 1000 + 0.5)); 4406 } 4407 4408 static struct timeval 4409 timeval_delta(struct timeval t1, struct timeval t2) 4410 { 4411 struct timeval t; 4412 t.tv_sec = t1.tv_sec - t2.tv_sec; 4413 t.tv_usec = t1.tv_usec - t2.tv_usec; 4414 4415 if (t.tv_usec < 0) { 4416 t.tv_usec += 1000000; 4417 t.tv_sec--; 4418 } 4419 return (t); 4420 } 4421 4422 /* 4423 * print error messages to the terminal or to syslog 4424 */ 4425 static void 4426 vrrp_log(int level, char *message, ...) 4427 { 4428 va_list ap; 4429 int log_level = -1; 4430 4431 va_start(ap, message); 4432 4433 if (vrrp_logflag == 0) { 4434 if (level <= vrrp_debug_level) { 4435 /* 4436 * VRRP_ERR goes to stderr, others go to stdout 4437 */ 4438 FILE *out = (level <= VRRP_ERR) ? stderr : stdout; 4439 /* LINTED: E_SEC_PRINTF_VAR_FMT */ 4440 (void) vfprintf(out, message, ap); 4441 (void) fprintf(out, "\n"); 4442 (void) fflush(out); 4443 } 4444 va_end(ap); 4445 return; 4446 } 4447 4448 /* 4449 * translate VRRP_* to LOG_* 4450 */ 4451 switch (level) { 4452 case VRRP_ERR: 4453 log_level = LOG_ERR; 4454 break; 4455 case VRRP_WARNING: 4456 log_level = LOG_WARNING; 4457 break; 4458 case VRRP_NOTICE: 4459 log_level = LOG_NOTICE; 4460 break; 4461 case VRRP_DBG0: 4462 log_level = LOG_INFO; 4463 break; 4464 default: 4465 log_level = LOG_DEBUG; 4466 break; 4467 } 4468 4469 /* LINTED: E_SEC_PRINTF_VAR_FMT */ 4470 (void) vsyslog(log_level, message, ap); 4471 va_end(ap); 4472 } 4473