1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <inet/ip_arp.h> 28 #include <inet/ip_ndp.h> 29 #include <net/if_arp.h> 30 #include <netinet/if_ether.h> 31 #include <sys/strsubr.h> 32 #include <inet/ip6.h> 33 #include <inet/ip.h> 34 #include <inet/ip_ire.h> 35 #include <inet/ip_if.h> 36 #include <sys/dlpi.h> 37 #include <sys/sunddi.h> 38 #include <sys/strsun.h> 39 #include <sys/sdt.h> 40 #include <inet/mi.h> 41 #include <inet/arp.h> 42 #include <inet/ipdrop.h> 43 #include <sys/sockio.h> 44 #include <inet/ip_impl.h> 45 #include <sys/policy.h> 46 47 #define ARL_LL_ADDR_OFFSET(arl) (((arl)->arl_sap_length) < 0 ? \ 48 (sizeof (dl_unitdata_req_t)) : \ 49 ((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length)))) 50 51 /* 52 * MAC-specific intelligence. Shouldn't be needed, but the DL_INFO_ACK 53 * doesn't quite do it for us. 54 */ 55 typedef struct arp_m_s { 56 t_uscalar_t arp_mac_type; 57 uint32_t arp_mac_arp_hw_type; 58 t_scalar_t arp_mac_sap_length; 59 uint32_t arp_mac_hw_addr_length; 60 } arp_m_t; 61 62 static int arp_close(queue_t *, int); 63 static void arp_rput(queue_t *, mblk_t *); 64 static void arp_wput(queue_t *, mblk_t *); 65 static arp_m_t *arp_m_lookup(t_uscalar_t mac_type); 66 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *, 67 ncec_t *); 68 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *, 69 const uchar_t *, const uchar_t *, uchar_t *); 70 static int arp_modclose(arl_t *); 71 static void arp_mod_close_tail(arl_t *); 72 static mblk_t *arl_unbind(arl_t *); 73 static void arp_process_packet(ill_t *, mblk_t *); 74 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *); 75 static void arp_drop_packet(const char *str, mblk_t *, ill_t *); 76 static int arp_open(queue_t *, dev_t *, int, int, cred_t *); 77 static int ip_sioctl_ifunitsel_arp(queue_t *, int *); 78 static int ip_sioctl_slifname_arp(queue_t *, void *); 79 static void arp_dlpi_send(arl_t *, mblk_t *); 80 static void arl_defaults_common(arl_t *, mblk_t *); 81 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *); 82 static void arp_ifname_notify(arl_t *); 83 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *); 84 static arl_t *ill_to_arl(ill_t *); 85 86 #define DL_PRIM(mp) (((union DL_primitives *)(mp)->b_rptr)->dl_primitive) 87 #define IS_DLPI_DATA(mp) \ 88 ((DB_TYPE(mp) == M_PROTO) && \ 89 MBLKL(mp) >= sizeof (dl_unitdata_ind_t) && \ 90 (DL_PRIM(mp) == DL_UNITDATA_IND)) 91 92 #define AR_NOTFOUND 1 /* No matching ace found in cache */ 93 #define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */ 94 #define AR_LOOPBACK 3 /* Our own arp packet was received */ 95 #define AR_BOGON 4 /* Another host has our IP addr. */ 96 #define AR_FAILED 5 /* Duplicate Address Detection has failed */ 97 #define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */ 98 99 boolean_t arp_no_defense; 100 101 struct module_info arp_mod_info = { 102 IP_MOD_ID, "arpip", 1, INFPSZ, 65536, 1024 103 }; 104 static struct qinit rinit_arp = { 105 (pfi_t)arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info 106 }; 107 static struct qinit winit_arp = { 108 (pfi_t)arp_wput, NULL, arp_open, arp_close, NULL, 109 &arp_mod_info 110 }; 111 struct streamtab arpinfo = { 112 &rinit_arp, &winit_arp 113 }; 114 #define ARH_FIXED_LEN 8 115 #define AR_LL_HDR_SLACK 32 116 117 /* 118 * pfhooks for ARP. 119 */ 120 #define ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst) \ 121 \ 122 if ((_hook).he_interested) { \ 123 hook_pkt_event_t info; \ 124 \ 125 info.hpe_protocol = ipst->ips_arp_net_data; \ 126 info.hpe_ifp = _ilp; \ 127 info.hpe_ofp = 0; \ 128 info.hpe_hdr = _hdr; \ 129 info.hpe_mp = &(_fm); \ 130 info.hpe_mb = _m; \ 131 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 132 _event, (hook_data_t)&info) != 0) { \ 133 if (_fm != NULL) { \ 134 freemsg(_fm); \ 135 _fm = NULL; \ 136 } \ 137 _hdr = NULL; \ 138 _m = NULL; \ 139 } else { \ 140 _hdr = info.hpe_hdr; \ 141 _m = info.hpe_mb; \ 142 } \ 143 } 144 145 #define ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst) \ 146 \ 147 if ((_hook).he_interested) { \ 148 hook_pkt_event_t info; \ 149 \ 150 info.hpe_protocol = ipst->ips_arp_net_data; \ 151 info.hpe_ifp = 0; \ 152 info.hpe_ofp = _olp; \ 153 info.hpe_hdr = _hdr; \ 154 info.hpe_mp = &(_fm); \ 155 info.hpe_mb = _m; \ 156 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 157 _event, (hook_data_t)&info) != 0) { \ 158 if (_fm != NULL) { \ 159 freemsg(_fm); \ 160 _fm = NULL; \ 161 } \ 162 _hdr = NULL; \ 163 _m = NULL; \ 164 } else { \ 165 _hdr = info.hpe_hdr; \ 166 _m = info.hpe_mb; \ 167 } \ 168 } 169 170 static arp_m_t arp_m_tbl[] = { 171 { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */ 172 { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */ 173 { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */ 174 { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */ 175 { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */ 176 { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */ 177 { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */ 178 { DL_OTHER, ARPHRD_ETHER, -2, 6} /* unknown */ 179 }; 180 181 static void 182 arl_refhold_locked(arl_t *arl) 183 { 184 ASSERT(MUTEX_HELD(&arl->arl_lock)); 185 arl->arl_refcnt++; 186 ASSERT(arl->arl_refcnt != 0); 187 } 188 189 static void 190 arl_refrele(arl_t *arl) 191 { 192 mutex_enter(&arl->arl_lock); 193 ASSERT(arl->arl_refcnt != 0); 194 arl->arl_refcnt--; 195 if (arl->arl_refcnt > 1) { 196 mutex_exit(&arl->arl_lock); 197 return; 198 } 199 200 /* ill_close or arp_unbind_complete may be waiting */ 201 cv_broadcast(&arl->arl_cv); 202 mutex_exit(&arl->arl_lock); 203 } 204 205 /* 206 * wake up any pending ip ioctls. 207 */ 208 static void 209 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim) 210 { 211 if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing) 212 arp_replumb_done(ill, 0); 213 else 214 arp_bringup_done(ill, err); 215 } 216 217 static int 218 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen, 219 const in_addr_t *src_paddr, ncec_t **sncec, int op) 220 { 221 int retv; 222 ncec_t *ncec; 223 boolean_t ll_changed; 224 uchar_t *lladdr = NULL; 225 int new_state; 226 227 ASSERT(ill != NULL); 228 229 ncec = ncec_lookup_illgrp_v4(ill, src_paddr); 230 *sncec = ncec; 231 232 if (ncec == NULL) { 233 retv = AR_NOTFOUND; 234 goto done; 235 } 236 237 mutex_enter(&ncec->ncec_lock); 238 /* 239 * IP addr and hardware address match what we already 240 * have, then this is a broadcast packet emitted by one of our 241 * interfaces, reflected by the switch and received on another 242 * interface. We return AR_LOOPBACK. 243 */ 244 lladdr = ncec->ncec_lladdr; 245 if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length && 246 bcmp(lladdr, src_haddr, hlen) == 0) { 247 mutex_exit(&ncec->ncec_lock); 248 retv = AR_LOOPBACK; 249 goto done; 250 } 251 /* 252 * If the entry is unverified, then we've just verified that 253 * someone else already owns this address, because this is a 254 * message with the same protocol address but different 255 * hardware address. 256 */ 257 if (ncec->ncec_flags & NCE_F_UNVERIFIED) { 258 mutex_exit(&ncec->ncec_lock); 259 ncec_delete(ncec); 260 ncec_refrele(ncec); 261 *sncec = NULL; 262 retv = AR_FAILED; 263 goto done; 264 } 265 266 /* 267 * If the IP address matches ours and we're authoritative for 268 * this entry, then some other node is using our IP addr, so 269 * return AR_BOGON. Also reset the transmit count to zero so 270 * that, if we're currently in initial announcement mode, we 271 * switch back to the lazier defense mode. Knowing that 272 * there's at least one duplicate out there, we ought not 273 * blindly announce. 274 * 275 * NCE_F_AUTHORITY is set in one of two ways: 276 * 1. /sbin/arp told us so, via the "permanent" flag. 277 * 2. This is one of my addresses. 278 */ 279 if (ncec->ncec_flags & NCE_F_AUTHORITY) { 280 ncec->ncec_unsolicit_count = 0; 281 mutex_exit(&ncec->ncec_lock); 282 retv = AR_BOGON; 283 goto done; 284 } 285 286 /* 287 * No address conflict was detected, and we are getting 288 * ready to update the ncec's hwaddr. The nce MUST NOT be on an 289 * under interface, because all dynamic nce's are created on the 290 * native interface (in the non-IPMP case) or on the IPMP 291 * meta-interface (in the IPMP case) 292 */ 293 ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill)); 294 295 /* 296 * update ncec with src_haddr, hlen. 297 * 298 * We are trying to resolve this ncec_addr/src_paddr and we 299 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr. 300 * So the new_state is at least "STALE". If, in addition, 301 * this a solicited, unicast ARP_RESPONSE, we can transition 302 * to REACHABLE. 303 */ 304 new_state = ND_STALE; 305 ip1dbg(("got info for ncec %p from addr %x\n", 306 (void *)ncec, *src_paddr)); 307 retv = AR_MERGED; 308 if (ncec->ncec_state == ND_INCOMPLETE || 309 ncec->ncec_state == ND_INITIAL) { 310 ll_changed = B_TRUE; 311 } else { 312 ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen); 313 if (!ll_changed) 314 new_state = ND_UNCHANGED; 315 else 316 retv = AR_CHANGED; 317 } 318 /* 319 * We don't have the equivalent of the IPv6 'S' flag indicating 320 * a solicited response, so we assume that if we are in 321 * INCOMPLETE, or got back an unchanged lladdr in PROBE state, 322 * and this is an ARP_RESPONSE, it must be a 323 * solicited response allowing us to transtion to REACHABLE. 324 */ 325 if (op == ARP_RESPONSE) { 326 switch (ncec->ncec_state) { 327 case ND_PROBE: 328 new_state = (ll_changed ? ND_STALE : ND_REACHABLE); 329 break; 330 case ND_INCOMPLETE: 331 new_state = ND_REACHABLE; 332 break; 333 } 334 } 335 /* 336 * Call nce_update() to refresh fastpath information on any 337 * dependent nce_t entries. 338 */ 339 nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL)); 340 mutex_exit(&ncec->ncec_lock); 341 nce_resolv_ok(ncec); 342 done: 343 return (retv); 344 } 345 346 /* Find an entry for a particular MAC type in the arp_m_tbl. */ 347 static arp_m_t * 348 arp_m_lookup(t_uscalar_t mac_type) 349 { 350 arp_m_t *arm; 351 352 for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) { 353 if (arm->arp_mac_type == mac_type) 354 return (arm); 355 } 356 return (NULL); 357 } 358 359 static uint32_t 360 arp_hw_type(t_uscalar_t mactype) 361 { 362 arp_m_t *arm; 363 364 if ((arm = arp_m_lookup(mactype)) == NULL) 365 arm = arp_m_lookup(DL_OTHER); 366 return (arm->arp_mac_arp_hw_type); 367 } 368 369 /* 370 * Called when an DLPI control message has been acked; send down the next 371 * queued message (if any). 372 * The DLPI messages of interest being bind, attach and unbind since 373 * these are the only ones sent by ARP via arp_dlpi_send. 374 */ 375 static void 376 arp_dlpi_done(arl_t *arl, ill_t *ill) 377 { 378 mblk_t *mp; 379 int err; 380 t_uscalar_t prim; 381 382 mutex_enter(&arl->arl_lock); 383 prim = arl->arl_dlpi_pending; 384 385 if ((mp = arl->arl_dlpi_deferred) == NULL) { 386 arl->arl_dlpi_pending = DL_PRIM_INVAL; 387 if (arl->arl_state_flags & ARL_LL_DOWN) 388 err = ENETDOWN; 389 else 390 err = 0; 391 mutex_exit(&arl->arl_lock); 392 393 mutex_enter(&ill->ill_lock); 394 ill->ill_arl_dlpi_pending = 0; 395 mutex_exit(&ill->ill_lock); 396 arp_cmd_done(ill, err, prim); 397 return; 398 } 399 400 arl->arl_dlpi_deferred = mp->b_next; 401 mp->b_next = NULL; 402 403 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 404 405 arl->arl_dlpi_pending = DL_PRIM(mp); 406 mutex_exit(&arl->arl_lock); 407 408 mutex_enter(&ill->ill_lock); 409 ill->ill_arl_dlpi_pending = 1; 410 mutex_exit(&ill->ill_lock); 411 412 putnext(arl->arl_wq, mp); 413 } 414 415 /* 416 * This routine is called during module initialization when the DL_INFO_ACK 417 * comes back from the device. We set up defaults for all the device dependent 418 * doo-dads we are going to need. This will leave us ready to roll if we are 419 * attempting auto-configuration. Alternatively, these defaults can be 420 * overridden by initialization procedures possessing higher intelligence. 421 * 422 * Caller will free the mp. 423 */ 424 static void 425 arp_ll_set_defaults(arl_t *arl, mblk_t *mp) 426 { 427 arp_m_t *arm; 428 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 429 430 if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL) 431 arm = arp_m_lookup(DL_OTHER); 432 ASSERT(arm != NULL); 433 434 /* 435 * We initialize based on parameters in the (currently) not too 436 * exhaustive arp_m_tbl. 437 */ 438 if (dlia->dl_version == DL_VERSION_2) { 439 arl->arl_sap_length = dlia->dl_sap_length; 440 arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length; 441 if (dlia->dl_provider_style == DL_STYLE2) 442 arl->arl_needs_attach = 1; 443 } else { 444 arl->arl_sap_length = arm->arp_mac_sap_length; 445 arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length; 446 } 447 /* 448 * Note: the arp_hw_type in the arp header may be derived from 449 * the ill_mac_type and arp_m_lookup(). 450 */ 451 arl->arl_sap = ETHERTYPE_ARP; 452 arl_defaults_common(arl, mp); 453 } 454 455 static void 456 arp_wput(queue_t *q, mblk_t *mp) 457 { 458 int err = EINVAL; 459 struct iocblk *ioc; 460 mblk_t *mp1; 461 462 switch (DB_TYPE(mp)) { 463 case M_IOCTL: 464 ASSERT(q->q_next != NULL); 465 ioc = (struct iocblk *)mp->b_rptr; 466 if (ioc->ioc_cmd != SIOCSLIFNAME && 467 ioc->ioc_cmd != IF_UNITSEL) { 468 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput", 469 char *, "<some ioctl>", char *, "-", 470 arl_t *, (arl_t *)q->q_ptr); 471 putnext(q, mp); 472 return; 473 } 474 if ((mp1 = mp->b_cont) == 0) 475 err = EINVAL; 476 else if (ioc->ioc_cmd == SIOCSLIFNAME) 477 err = ip_sioctl_slifname_arp(q, mp1->b_rptr); 478 else if (ioc->ioc_cmd == IF_UNITSEL) 479 err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr); 480 if (err == 0) 481 miocack(q, mp, 0, 0); 482 else 483 miocnak(q, mp, 0, err); 484 return; 485 default: 486 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default", 487 char *, "default mblk", char *, "-", 488 arl_t *, (arl_t *)q->q_ptr); 489 putnext(q, mp); 490 return; 491 } 492 } 493 494 /* 495 * similar to ill_dlpi_pending(): verify that the received DLPI response 496 * matches the one that is pending for the arl. 497 */ 498 static boolean_t 499 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim) 500 { 501 t_uscalar_t pending; 502 503 mutex_enter(&arl->arl_lock); 504 if (arl->arl_dlpi_pending == prim) { 505 mutex_exit(&arl->arl_lock); 506 return (B_TRUE); 507 } 508 509 if (arl->arl_state_flags & ARL_CONDEMNED) { 510 mutex_exit(&arl->arl_lock); 511 return (B_FALSE); 512 } 513 pending = arl->arl_dlpi_pending; 514 mutex_exit(&arl->arl_lock); 515 516 if (pending == DL_PRIM_INVAL) { 517 ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s", 518 dl_primstr(prim), arl->arl_name)); 519 } else { 520 ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s", 521 dl_primstr(prim), arl->arl_name, dl_primstr(pending))); 522 } 523 return (B_FALSE); 524 } 525 526 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */ 527 static void 528 arp_rput_dlpi(queue_t *q, mblk_t *mp) 529 { 530 arl_t *arl = (arl_t *)q->q_ptr; 531 union DL_primitives *dlp; 532 t_uscalar_t prim; 533 t_uscalar_t reqprim = DL_PRIM_INVAL; 534 ill_t *ill; 535 536 if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) { 537 putnext(q, mp); 538 return; 539 } 540 dlp = (union DL_primitives *)mp->b_rptr; 541 prim = dlp->dl_primitive; 542 543 /* 544 * If we received an ACK but didn't send a request for it, then it 545 * can't be part of any pending operation; discard up-front. 546 */ 547 switch (prim) { 548 case DL_ERROR_ACK: 549 /* 550 * ce is confused about how DLPI works, so we have to interpret 551 * an "error" on DL_NOTIFY_ACK (which we never could have sent) 552 * as really meaning an error on DL_NOTIFY_REQ. 553 * 554 * Note that supporting DL_NOTIFY_REQ is optional, so printing 555 * out an error message on the console isn't warranted except 556 * for debug. 557 */ 558 if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK || 559 dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) { 560 reqprim = DL_NOTIFY_REQ; 561 } else { 562 reqprim = dlp->error_ack.dl_error_primitive; 563 } 564 break; 565 case DL_INFO_ACK: 566 reqprim = DL_INFO_REQ; 567 break; 568 case DL_OK_ACK: 569 reqprim = dlp->ok_ack.dl_correct_primitive; 570 break; 571 case DL_BIND_ACK: 572 reqprim = DL_BIND_REQ; 573 break; 574 default: 575 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 576 union DL_primitives *, dlp); 577 putnext(q, mp); 578 return; 579 } 580 if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) { 581 freemsg(mp); 582 return; 583 } 584 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received", 585 char *, dl_primstr(prim), char *, dl_primstr(reqprim), 586 arl_t *, arl); 587 588 ASSERT(prim != DL_NOTIFY_IND); 589 590 ill = arl_to_ill(arl); 591 592 switch (reqprim) { 593 case DL_INFO_REQ: 594 /* 595 * ill has not been set up yet for this case. This is the 596 * DL_INFO_ACK for the first DL_INFO_REQ sent from 597 * arp_modopen(). There should be no other arl_dlpi_deferred 598 * messages pending. We initialize the arl here. 599 */ 600 ASSERT(!arl->arl_dlpi_style_set); 601 ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ); 602 ASSERT(arl->arl_dlpi_deferred == NULL); 603 arl->arl_dlpi_pending = DL_PRIM_INVAL; 604 arp_ll_set_defaults(arl, mp); 605 freemsg(mp); 606 return; 607 case DL_UNBIND_REQ: 608 mutex_enter(&arl->arl_lock); 609 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 610 /* 611 * This is not an error, so we don't set ARL_LL_DOWN 612 */ 613 arl->arl_state_flags &= ~ARL_LL_UP; 614 arl->arl_state_flags |= ARL_LL_UNBOUND; 615 if (arl->arl_state_flags & ARL_CONDEMNED) { 616 /* 617 * if this is part of the unplumb the arl may 618 * vaporize any moment after we cv_signal the 619 * arl_cv so we reset arl_dlpi_pending here. 620 * All other cases (including replumb) will 621 * have the arl_dlpi_pending reset in 622 * arp_dlpi_done. 623 */ 624 arl->arl_dlpi_pending = DL_PRIM_INVAL; 625 } 626 cv_signal(&arl->arl_cv); 627 mutex_exit(&arl->arl_lock); 628 break; 629 } 630 if (ill != NULL) { 631 /* 632 * ill ref obtained by arl_to_ill() will be released 633 * by qwriter_ip() 634 */ 635 qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer, 636 CUR_OP, B_TRUE); 637 return; 638 } 639 freemsg(mp); 640 } 641 642 /* 643 * Handling of DLPI messages that require exclusive access to the ipsq. 644 */ 645 /* ARGSUSED */ 646 static void 647 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) 648 { 649 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 650 ill_t *ill = (ill_t *)q->q_ptr; 651 arl_t *arl = ill_to_arl(ill); 652 653 if (arl == NULL) { 654 /* 655 * happens as a result arp_modclose triggering unbind. 656 * arp_rput_dlpi will cv_signal the arl_cv and the modclose 657 * will complete, but when it does ipsq_exit, the waiting 658 * qwriter_ip gets into the ipsq but will find the arl null. 659 * There should be no deferred messages in this case, so 660 * just complete and exit. 661 */ 662 arp_cmd_done(ill, 0, DL_UNBIND_REQ); 663 freemsg(mp); 664 return; 665 } 666 switch (dlp->dl_primitive) { 667 case DL_ERROR_ACK: 668 switch (dlp->error_ack.dl_error_primitive) { 669 case DL_UNBIND_REQ: 670 mutex_enter(&arl->arl_lock); 671 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 672 arl->arl_state_flags &= ~ARL_LL_UP; 673 arl->arl_state_flags |= ARL_LL_UNBOUND; 674 arl->arl_state_flags |= ARL_LL_DOWN; 675 cv_signal(&arl->arl_cv); 676 mutex_exit(&arl->arl_lock); 677 break; 678 case DL_BIND_REQ: 679 mutex_enter(&arl->arl_lock); 680 arl->arl_state_flags &= ~ARL_LL_UP; 681 arl->arl_state_flags |= ARL_LL_DOWN; 682 arl->arl_state_flags |= ARL_LL_UNBOUND; 683 cv_signal(&arl->arl_cv); 684 mutex_exit(&arl->arl_lock); 685 break; 686 case DL_ATTACH_REQ: 687 break; 688 default: 689 /* If it's anything else, we didn't send it. */ 690 arl_refrele(arl); 691 putnext(q, mp); 692 return; 693 } 694 break; 695 case DL_OK_ACK: 696 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok", 697 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 698 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 699 arl_t *, arl); 700 mutex_enter(&arl->arl_lock); 701 switch (dlp->ok_ack.dl_correct_primitive) { 702 case DL_UNBIND_REQ: 703 case DL_ATTACH_REQ: 704 break; 705 default: 706 ip0dbg(("Dropping unrecognized DL_OK_ACK for %s", 707 dl_primstr(dlp->ok_ack.dl_correct_primitive))); 708 mutex_exit(&arl->arl_lock); 709 arl_refrele(arl); 710 freemsg(mp); 711 return; 712 } 713 mutex_exit(&arl->arl_lock); 714 break; 715 case DL_BIND_ACK: 716 DTRACE_PROBE2(rput_dl_bind, arl_t *, arl, 717 dl_bind_ack_t *, &dlp->bind_ack); 718 719 mutex_enter(&arl->arl_lock); 720 ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING); 721 arl->arl_state_flags &= 722 ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND); 723 arl->arl_state_flags |= ARL_LL_UP; 724 mutex_exit(&arl->arl_lock); 725 break; 726 case DL_UDERROR_IND: 727 DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl, 728 dl_uderror_ind_t *, &dlp->uderror_ind); 729 arl_refrele(arl); 730 putnext(q, mp); 731 return; 732 default: 733 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 734 union DL_primitives *, dlp); 735 arl_refrele(arl); 736 putnext(q, mp); 737 return; 738 } 739 arp_dlpi_done(arl, ill); 740 arl_refrele(arl); 741 freemsg(mp); 742 } 743 744 void 745 arp_rput(queue_t *q, mblk_t *mp) 746 { 747 arl_t *arl = q->q_ptr; 748 boolean_t need_refrele = B_FALSE; 749 750 mutex_enter(&arl->arl_lock); 751 if (((arl->arl_state_flags & 752 (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) { 753 /* 754 * Only allow high priority DLPI messages during unplumb or 755 * replumb, and we don't take an arl_refcnt for that case. 756 */ 757 if (DB_TYPE(mp) != M_PCPROTO) { 758 mutex_exit(&arl->arl_lock); 759 freemsg(mp); 760 return; 761 } 762 } else { 763 arl_refhold_locked(arl); 764 need_refrele = B_TRUE; 765 } 766 mutex_exit(&arl->arl_lock); 767 768 switch (DB_TYPE(mp)) { 769 case M_PCPROTO: 770 case M_PROTO: { 771 ill_t *ill; 772 773 /* 774 * could be one of 775 * (i) real message from the wire, (DLPI_DATA) 776 * (ii) DLPI message 777 * Take a ref on the ill associated with this arl to 778 * prevent the ill from being unplumbed until this thread 779 * is done. 780 */ 781 if (IS_DLPI_DATA(mp)) { 782 ill = arl_to_ill(arl); 783 if (ill == NULL) { 784 arp_drop_packet("No ill", mp, ill); 785 break; 786 } 787 arp_process_packet(ill, mp); 788 ill_refrele(ill); 789 break; 790 } 791 /* Miscellaneous DLPI messages get shuffled off. */ 792 arp_rput_dlpi(q, mp); 793 break; 794 } 795 case M_ERROR: 796 case M_HANGUP: 797 if (mp->b_rptr < mp->b_wptr) 798 arl->arl_error = (int)(*mp->b_rptr & 0xFF); 799 if (arl->arl_error == 0) 800 arl->arl_error = ENXIO; 801 freemsg(mp); 802 break; 803 default: 804 ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp))); 805 putnext(q, mp); 806 break; 807 } 808 if (need_refrele) 809 arl_refrele(arl); 810 } 811 812 static void 813 arp_process_packet(ill_t *ill, mblk_t *mp) 814 { 815 mblk_t *mp1; 816 arh_t *arh; 817 in_addr_t src_paddr, dst_paddr; 818 uint32_t hlen, plen; 819 boolean_t is_probe; 820 int op; 821 ncec_t *dst_ncec, *src_ncec = NULL; 822 uchar_t *src_haddr, *arhp, *dst_haddr, *dp, *sp; 823 int err; 824 ip_stack_t *ipst; 825 boolean_t need_ill_refrele = B_FALSE; 826 nce_t *nce; 827 uchar_t *src_lladdr; 828 dl_unitdata_ind_t *dlui; 829 ip_recv_attr_t iras; 830 831 ASSERT(ill != NULL); 832 if (ill->ill_flags & ILLF_NOARP) { 833 arp_drop_packet("Interface does not support ARP", mp, ill); 834 return; 835 } 836 ipst = ill->ill_ipst; 837 /* 838 * What we should have at this point is a DL_UNITDATA_IND message 839 * followed by an ARP packet. We do some initial checks and then 840 * get to work. 841 */ 842 dlui = (dl_unitdata_ind_t *)mp->b_rptr; 843 if (dlui->dl_group_address == 1) { 844 /* 845 * multicast or broadcast packet. Only accept on the ipmp 846 * nominated interface for multicasts ('cast_ill'). 847 * If we have no cast_ill we are liberal and accept everything. 848 */ 849 if (IS_UNDER_IPMP(ill)) { 850 /* For an under ill_grp can change under lock */ 851 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 852 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 853 ill->ill_grp->ig_cast_ill != NULL) { 854 rw_exit(&ipst->ips_ill_g_lock); 855 arp_drop_packet("Interface is not nominated " 856 "for multicast sends and receives", 857 mp, ill); 858 return; 859 } 860 rw_exit(&ipst->ips_ill_g_lock); 861 } 862 } 863 mp1 = mp->b_cont; 864 if (mp1 == NULL) { 865 arp_drop_packet("Missing ARP packet", mp, ill); 866 return; 867 } 868 if (mp1->b_cont != NULL) { 869 /* No fooling around with funny messages. */ 870 if (!pullupmsg(mp1, -1)) { 871 arp_drop_packet("Funny message: pullup failed", 872 mp, ill); 873 return; 874 } 875 } 876 arh = (arh_t *)mp1->b_rptr; 877 hlen = arh->arh_hlen; 878 plen = arh->arh_plen; 879 if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) { 880 arp_drop_packet("mblk len too small", mp, ill); 881 return; 882 } 883 /* 884 * hlen 0 is used for RFC 1868 UnARP. 885 * 886 * Note that the rest of the code checks that hlen is what we expect 887 * for this hardware address type, so might as well discard packets 888 * here that don't match. 889 */ 890 if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) { 891 DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1); 892 arp_drop_packet("Bogus hlen or plen", mp, ill); 893 return; 894 } 895 /* 896 * Historically, Solaris has been lenient about hardware type numbers. 897 * We should check here, but don't. 898 */ 899 DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh, 900 mblk_t *, mp); 901 /* 902 * If ill is in an ipmp group, it will be the under ill. If we want 903 * to report the packet as coming up the IPMP interface, we should 904 * convert it to the ipmp ill. 905 */ 906 ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in, 907 ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst); 908 DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp); 909 if (mp == NULL) 910 return; 911 arhp = (uchar_t *)arh + ARH_FIXED_LEN; 912 src_haddr = arhp; /* ar$sha */ 913 arhp += hlen; 914 bcopy(arhp, &src_paddr, IP_ADDR_LEN); /* ar$spa */ 915 sp = arhp; 916 arhp += IP_ADDR_LEN; 917 dst_haddr = arhp; /* ar$dha */ 918 arhp += hlen; 919 bcopy(arhp, &dst_paddr, IP_ADDR_LEN); /* ar$tpa */ 920 dp = arhp; 921 op = BE16_TO_U16(arh->arh_operation); 922 923 DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr, 924 (in_addr_t), dst_paddr); 925 926 /* Determine if this is just a probe */ 927 is_probe = (src_paddr == INADDR_ANY); 928 929 /* 930 * ira_ill is the only field used down the arp_notify path. 931 */ 932 bzero(&iras, sizeof (iras)); 933 iras.ira_ill = iras.ira_rill = ill; 934 /* 935 * RFC 826: first check if the <protocol, sender protocol address> is 936 * in the cache, if there is a sender protocol address. Note that this 937 * step also handles resolutions based on source. 938 */ 939 /* Note: after here we need to freeb(mp) and freemsg(mp1) separately */ 940 mp->b_cont = NULL; 941 if (is_probe) { 942 err = AR_NOTFOUND; 943 } else { 944 if (plen != 4) { 945 arp_drop_packet("bad protocol len", mp, ill); 946 return; 947 } 948 err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr, 949 &src_ncec, op); 950 switch (err) { 951 case AR_BOGON: 952 ASSERT(src_ncec != NULL); 953 arp_notify(src_paddr, mp1, AR_CN_BOGON, 954 &iras, src_ncec); 955 break; 956 case AR_FAILED: 957 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 958 src_ncec); 959 break; 960 case AR_LOOPBACK: 961 DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *, 962 arh); 963 freemsg(mp1); 964 break; 965 default: 966 goto update; 967 } 968 freemsg(mp); 969 if (src_ncec != NULL) 970 ncec_refrele(src_ncec); 971 return; 972 } 973 update: 974 /* 975 * Now look up the destination address. By RFC 826, we ignore the 976 * packet at this step if the target isn't one of our addresses (i.e., 977 * one we have been asked to PUBLISH). This is true even if the 978 * target is something we're trying to resolve and the packet 979 * is a response. 980 */ 981 dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr); 982 if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) { 983 /* 984 * Let the client know if the source mapping has changed, even 985 * if the destination provides no useful information for the 986 * client. 987 */ 988 if (err == AR_CHANGED) { 989 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 990 NULL); 991 freemsg(mp); 992 } else { 993 freemsg(mp); 994 arp_drop_packet("Target is not interesting", mp1, ill); 995 } 996 if (dst_ncec != NULL) 997 ncec_refrele(dst_ncec); 998 if (src_ncec != NULL) 999 ncec_refrele(src_ncec); 1000 return; 1001 } 1002 1003 if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) { 1004 /* 1005 * Check for a reflection. Some misbehaving bridges will 1006 * reflect our own transmitted packets back to us. 1007 */ 1008 ASSERT(NCE_PUBLISH(dst_ncec)); 1009 if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) { 1010 ncec_refrele(dst_ncec); 1011 if (src_ncec != NULL) 1012 ncec_refrele(src_ncec); 1013 freemsg(mp); 1014 arp_drop_packet("bad arh_len", mp1, ill); 1015 return; 1016 } 1017 if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) { 1018 DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill, 1019 arh_t *, arh, ncec_t *, dst_ncec); 1020 ncec_refrele(dst_ncec); 1021 if (src_ncec != NULL) 1022 ncec_refrele(src_ncec); 1023 freemsg(mp); 1024 arp_drop_packet("Reflected probe", mp1, ill); 1025 return; 1026 } 1027 /* 1028 * Responses targeting our HW address that are not responses to 1029 * our DAD probe must be ignored as they are related to requests 1030 * sent before DAD was restarted. 1031 */ 1032 if (op == ARP_RESPONSE && 1033 (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) { 1034 ncec_refrele(dst_ncec); 1035 if (src_ncec != NULL) 1036 ncec_refrele(src_ncec); 1037 freemsg(mp); 1038 arp_drop_packet( 1039 "Response to request that was sent before DAD", 1040 mp1, ill); 1041 return; 1042 } 1043 /* 1044 * Responses targeted to HW addresses which are not ours but 1045 * sent to our unverified proto address are also conflicts. 1046 * These may be reported by a proxy rather than the interface 1047 * with the conflicting address, dst_paddr is in conflict 1048 * rather than src_paddr. To ensure IP can locate the correct 1049 * ipif to take down, it is necessary to copy dst_paddr to 1050 * the src_paddr field before sending it to IP. The same is 1051 * required for probes, where src_paddr will be INADDR_ANY. 1052 */ 1053 if (is_probe || op == ARP_RESPONSE) { 1054 bcopy(dp, sp, plen); 1055 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 1056 NULL); 1057 ncec_delete(dst_ncec); 1058 } else if (err == AR_CHANGED) { 1059 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 1060 NULL); 1061 } else { 1062 DTRACE_PROBE3(rput_request_unverified, 1063 ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec); 1064 arp_drop_packet("Unverified request", mp1, ill); 1065 } 1066 freemsg(mp); 1067 ncec_refrele(dst_ncec); 1068 if (src_ncec != NULL) 1069 ncec_refrele(src_ncec); 1070 return; 1071 } 1072 /* 1073 * If it's a request, then we reply to this, and if we think the 1074 * sender's unknown, then we create an entry to avoid unnecessary ARPs. 1075 * The design assumption is that someone ARPing us is likely to send us 1076 * a packet soon, and that we'll want to reply to it. 1077 */ 1078 if (op == ARP_REQUEST) { 1079 const uchar_t *nce_hwaddr; 1080 struct in_addr nce_paddr; 1081 clock_t now; 1082 ill_t *under_ill = ill; 1083 boolean_t send_unicast = B_TRUE; 1084 1085 ASSERT(NCE_PUBLISH(dst_ncec)); 1086 1087 if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) { 1088 /* 1089 * Ignore senders who are deliberately or accidentally 1090 * confused. 1091 */ 1092 goto bail; 1093 } 1094 1095 if (!is_probe && err == AR_NOTFOUND) { 1096 ASSERT(src_ncec == NULL); 1097 1098 if (IS_UNDER_IPMP(under_ill)) { 1099 /* 1100 * create the ncec for the sender on ipmp_ill. 1101 * We pass in the ipmp_ill itself to avoid 1102 * creating an nce_t on the under_ill. 1103 */ 1104 ill = ipmp_ill_hold_ipmp_ill(under_ill); 1105 if (ill == NULL) 1106 ill = under_ill; 1107 else 1108 need_ill_refrele = B_TRUE; 1109 } 1110 1111 err = nce_lookup_then_add_v4(ill, src_haddr, hlen, 1112 &src_paddr, 0, ND_STALE, &nce); 1113 1114 switch (err) { 1115 case 0: 1116 case EEXIST: 1117 ip1dbg(("added ncec %p in state %d ill %s\n", 1118 (void *)src_ncec, src_ncec->ncec_state, 1119 ill->ill_name)); 1120 src_ncec = nce->nce_common; 1121 break; 1122 default: 1123 /* 1124 * Either no memory, or the outgoing interface 1125 * is in the process of down/unplumb. In the 1126 * latter case, we will fail the send anyway, 1127 * and in the former case, we should try to send 1128 * the ARP response. 1129 */ 1130 src_lladdr = src_haddr; 1131 goto send_response; 1132 } 1133 ncec_refhold(src_ncec); 1134 nce_refrele(nce); 1135 /* set up cleanup interval on ncec */ 1136 } 1137 1138 /* 1139 * This implements periodic address defense based on a modified 1140 * version of the RFC 3927 requirements. Instead of sending a 1141 * broadcasted reply every time, as demanded by the RFC, we 1142 * send at most one broadcast reply per arp_broadcast_interval. 1143 */ 1144 now = ddi_get_lbolt(); 1145 if ((now - dst_ncec->ncec_last_time_defended) > 1146 MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) { 1147 dst_ncec->ncec_last_time_defended = now; 1148 /* 1149 * If this is one of the long-suffering entries, 1150 * pull it out now. It no longer needs separate 1151 * defense, because we're now doing that with this 1152 * broadcasted reply. 1153 */ 1154 dst_ncec->ncec_flags &= ~NCE_F_DELAYED; 1155 send_unicast = B_FALSE; 1156 } 1157 if (src_ncec != NULL && send_unicast) { 1158 src_lladdr = src_ncec->ncec_lladdr; 1159 } else { 1160 src_lladdr = under_ill->ill_bcast_mp->b_rptr + 1161 NCE_LL_ADDR_OFFSET(under_ill); 1162 } 1163 send_response: 1164 nce_hwaddr = dst_ncec->ncec_lladdr; 1165 IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr); 1166 1167 (void) arp_output(under_ill, ARP_RESPONSE, 1168 nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr, 1169 (uchar_t *)&src_paddr, src_lladdr); 1170 } 1171 bail: 1172 if (dst_ncec != NULL) { 1173 ncec_refrele(dst_ncec); 1174 } 1175 if (src_ncec != NULL) { 1176 ncec_refrele(src_ncec); 1177 } 1178 if (err == AR_CHANGED) { 1179 mp->b_cont = NULL; 1180 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL); 1181 mp1 = NULL; 1182 } 1183 if (need_ill_refrele) 1184 ill_refrele(ill); 1185 done: 1186 freemsg(mp); 1187 freemsg(mp1); 1188 } 1189 1190 /* 1191 * Basic initialization of the arl_t and the arl_common structure shared with 1192 * the ill_t that is done after SLIFNAME/IF_UNITSEL. 1193 */ 1194 static int 1195 arl_ill_init(arl_t *arl, char *ill_name) 1196 { 1197 ill_t *ill; 1198 arl_ill_common_t *ai; 1199 1200 ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE, 1201 arl->arl_ipst); 1202 1203 if (ill == NULL) 1204 return (ENXIO); 1205 1206 /* 1207 * By the time we set up the arl, we expect the ETHERTYPE_IP 1208 * stream to be fully bound and attached. So we copy/verify 1209 * relevant information as possible from/against the ill. 1210 * 1211 * The following should have been set up in arp_ll_set_defaults() 1212 * after the first DL_INFO_ACK was received. 1213 */ 1214 ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length); 1215 ASSERT(arl->arl_sap == ETHERTYPE_ARP); 1216 ASSERT(arl->arl_mactype == ill->ill_mactype); 1217 ASSERT(arl->arl_sap_length == ill->ill_sap_length); 1218 1219 ai = kmem_zalloc(sizeof (*ai), KM_SLEEP); 1220 mutex_enter(&ill->ill_lock); 1221 /* First ensure that the ill is not CONDEMNED. */ 1222 if (ill->ill_state_flags & ILL_CONDEMNED) { 1223 mutex_exit(&ill->ill_lock); 1224 ill_refrele(ill); 1225 kmem_free(ai, sizeof (*ai)); 1226 return (ENXIO); 1227 } 1228 if (ill->ill_common != NULL || arl->arl_common != NULL) { 1229 mutex_exit(&ill->ill_lock); 1230 ip0dbg(("%s: PPA already exists", ill->ill_name)); 1231 ill_refrele(ill); 1232 kmem_free(ai, sizeof (*ai)); 1233 return (EEXIST); 1234 } 1235 mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL); 1236 ai->ai_arl = arl; 1237 ai->ai_ill = ill; 1238 ill->ill_common = ai; 1239 arl->arl_common = ai; 1240 mutex_exit(&ill->ill_lock); 1241 (void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ); 1242 arl->arl_name_length = ill->ill_name_length; 1243 ill_refrele(ill); 1244 arp_ifname_notify(arl); 1245 return (0); 1246 } 1247 1248 /* Allocate and do common initializations for DLPI messages. */ 1249 static mblk_t * 1250 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size) 1251 { 1252 mblk_t *mp; 1253 1254 if ((mp = allocb(size, BPRI_HI)) == NULL) 1255 return (NULL); 1256 1257 /* 1258 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter 1259 * of which we don't seem to use) are sent with M_PCPROTO, and 1260 * that other DLPI are M_PROTO. 1261 */ 1262 DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO; 1263 1264 mp->b_wptr = mp->b_rptr + size; 1265 bzero(mp->b_rptr, size); 1266 DL_PRIM(mp) = prim; 1267 return (mp); 1268 } 1269 1270 1271 int 1272 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa) 1273 { 1274 arl_t *arl; 1275 char *cp, ill_name[LIFNAMSIZ]; 1276 1277 if (q->q_next == NULL) 1278 return (EINVAL); 1279 1280 do { 1281 q = q->q_next; 1282 } while (q->q_next != NULL); 1283 cp = q->q_qinfo->qi_minfo->mi_idname; 1284 1285 arl = (arl_t *)q->q_ptr; 1286 (void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa); 1287 arl->arl_ppa = *ppa; 1288 return (arl_ill_init(arl, ill_name)); 1289 } 1290 1291 int 1292 ip_sioctl_slifname_arp(queue_t *q, void *lifreq) 1293 { 1294 arl_t *arl; 1295 struct lifreq *lifr = lifreq; 1296 1297 /* ioctl not valid when IP opened as a device */ 1298 if (q->q_next == NULL) 1299 return (EINVAL); 1300 1301 arl = (arl_t *)q->q_ptr; 1302 arl->arl_ppa = lifr->lifr_ppa; 1303 return (arl_ill_init(arl, lifr->lifr_name)); 1304 } 1305 1306 arl_t * 1307 ill_to_arl(ill_t *ill) 1308 { 1309 arl_ill_common_t *ai = ill->ill_common; 1310 arl_t *arl = NULL; 1311 1312 if (ai == NULL) 1313 return (NULL); 1314 /* 1315 * Find the arl_t that corresponds to this ill_t from the shared 1316 * ill_common structure. We can safely access the ai here as it 1317 * will only be freed in arp_modclose() after we have become 1318 * single-threaded. 1319 */ 1320 mutex_enter(&ai->ai_lock); 1321 if ((arl = ai->ai_arl) != NULL) { 1322 mutex_enter(&arl->arl_lock); 1323 if (!(arl->arl_state_flags & ARL_CONDEMNED)) { 1324 arl_refhold_locked(arl); 1325 mutex_exit(&arl->arl_lock); 1326 } else { 1327 mutex_exit(&arl->arl_lock); 1328 arl = NULL; 1329 } 1330 } 1331 mutex_exit(&ai->ai_lock); 1332 return (arl); 1333 } 1334 1335 ill_t * 1336 arl_to_ill(arl_t *arl) 1337 { 1338 arl_ill_common_t *ai = arl->arl_common; 1339 ill_t *ill = NULL; 1340 1341 if (ai == NULL) { 1342 /* 1343 * happens when the arp stream is just being opened, and 1344 * arl_ill_init has not been executed yet. 1345 */ 1346 return (NULL); 1347 } 1348 /* 1349 * Find the ill_t that corresponds to this arl_t from the shared 1350 * arl_common structure. We can safely access the ai here as it 1351 * will only be freed in arp_modclose() after we have become 1352 * single-threaded. 1353 */ 1354 mutex_enter(&ai->ai_lock); 1355 if ((ill = ai->ai_ill) != NULL) { 1356 mutex_enter(&ill->ill_lock); 1357 if (!ILL_IS_CONDEMNED(ill)) { 1358 ill_refhold_locked(ill); 1359 mutex_exit(&ill->ill_lock); 1360 } else { 1361 mutex_exit(&ill->ill_lock); 1362 ill = NULL; 1363 } 1364 } 1365 mutex_exit(&ai->ai_lock); 1366 return (ill); 1367 } 1368 1369 int 1370 arp_ll_up(ill_t *ill) 1371 { 1372 mblk_t *attach_mp = NULL; 1373 mblk_t *bind_mp = NULL; 1374 mblk_t *unbind_mp = NULL; 1375 arl_t *arl; 1376 1377 ASSERT(IAM_WRITER_ILL(ill)); 1378 arl = ill_to_arl(ill); 1379 1380 DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill); 1381 if (arl == NULL) 1382 return (ENXIO); 1383 DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl); 1384 if ((arl->arl_state_flags & ARL_LL_UP) != 0) { 1385 arl_refrele(arl); 1386 return (0); 1387 } 1388 if (arl->arl_needs_attach) { /* DL_STYLE2 */ 1389 attach_mp = 1390 ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t)); 1391 if (attach_mp == NULL) 1392 goto bad; 1393 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa; 1394 } 1395 1396 /* Allocate and initialize a bind message. */ 1397 bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t)); 1398 if (bind_mp == NULL) 1399 goto bad; 1400 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP; 1401 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 1402 1403 unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t)); 1404 if (unbind_mp == NULL) 1405 goto bad; 1406 if (arl->arl_needs_attach) { 1407 arp_dlpi_send(arl, attach_mp); 1408 } 1409 arl->arl_unbind_mp = unbind_mp; 1410 1411 arl->arl_state_flags |= ARL_LL_BIND_PENDING; 1412 arp_dlpi_send(arl, bind_mp); 1413 arl_refrele(arl); 1414 return (EINPROGRESS); 1415 1416 bad: 1417 freemsg(attach_mp); 1418 freemsg(bind_mp); 1419 freemsg(unbind_mp); 1420 arl_refrele(arl); 1421 return (ENOMEM); 1422 } 1423 1424 /* 1425 * consumes/frees mp 1426 */ 1427 static void 1428 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code, 1429 ip_recv_attr_t *ira, ncec_t *ncec) 1430 { 1431 char hbuf[MAC_STR_LEN]; 1432 char sbuf[INET_ADDRSTRLEN]; 1433 ill_t *ill = ira->ira_ill; 1434 ip_stack_t *ipst = ill->ill_ipst; 1435 arh_t *arh = (arh_t *)mp->b_rptr; 1436 1437 switch (arcn_code) { 1438 case AR_CN_BOGON: 1439 /* 1440 * Someone is sending ARP packets with a source protocol 1441 * address that we have published and for which we believe our 1442 * entry is authoritative and verified to be unique on 1443 * the network. 1444 * 1445 * arp_process_packet() sends AR_CN_FAILED for the case when 1446 * a DAD probe is received and the hardware address of a 1447 * non-authoritative entry has changed. Thus, AR_CN_BOGON 1448 * indicates a real conflict, and we have to do resolution. 1449 * 1450 * We back away quickly from the address if it's from DHCP or 1451 * otherwise temporary and hasn't been used recently (or at 1452 * all). We'd like to include "deprecated" addresses here as 1453 * well (as there's no real reason to defend something we're 1454 * discarding), but IPMP "reuses" this flag to mean something 1455 * other than the standard meaning. 1456 */ 1457 if (ip_nce_conflict(mp, ira, ncec)) { 1458 (void) mac_colon_addr((uint8_t *)(arh + 1), 1459 arh->arh_hlen, hbuf, sizeof (hbuf)); 1460 (void) ip_dot_addr(src, sbuf); 1461 cmn_err(CE_WARN, 1462 "proxy ARP problem? Node '%s' is using %s on %s", 1463 hbuf, sbuf, ill->ill_name); 1464 if (!arp_no_defense) 1465 (void) arp_announce(ncec); 1466 /* 1467 * ncec_last_time_defended has been adjusted in 1468 * ip_nce_conflict. 1469 */ 1470 } else { 1471 ncec_delete(ncec); 1472 } 1473 freemsg(mp); 1474 break; 1475 case AR_CN_ANNOUNCE: { 1476 nce_hw_map_t hwm; 1477 /* 1478 * ARP gives us a copy of any packet where it thinks 1479 * the address has changed, so that we can update our 1480 * caches. We're responsible for caching known answers 1481 * in the current design. We check whether the 1482 * hardware address really has changed in all of our 1483 * entries that have cached this mapping, and if so, we 1484 * blow them away. This way we will immediately pick 1485 * up the rare case of a host changing hardware 1486 * address. 1487 */ 1488 if (src == 0) { 1489 freemsg(mp); 1490 break; 1491 } 1492 hwm.hwm_addr = src; 1493 hwm.hwm_hwlen = arh->arh_hlen; 1494 hwm.hwm_hwaddr = (uchar_t *)(arh + 1); 1495 hwm.hwm_flags = 0; 1496 ncec_walk_common(ipst->ips_ndp4, NULL, 1497 (pfi_t)nce_update_hw_changed, &hwm, B_TRUE); 1498 freemsg(mp); 1499 break; 1500 } 1501 case AR_CN_FAILED: 1502 if (arp_no_defense) { 1503 (void) mac_colon_addr((uint8_t *)(arh + 1), 1504 arh->arh_hlen, hbuf, sizeof (hbuf)); 1505 (void) ip_dot_addr(src, sbuf); 1506 1507 cmn_err(CE_WARN, 1508 "node %s is using our IP address %s on %s", 1509 hbuf, sbuf, ill->ill_name); 1510 freemsg(mp); 1511 break; 1512 } 1513 /* 1514 * mp will be freed by arp_excl. 1515 */ 1516 ill_refhold(ill); 1517 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 1518 return; 1519 default: 1520 ASSERT(0); 1521 freemsg(mp); 1522 break; 1523 } 1524 } 1525 1526 /* 1527 * arp_output is called to transmit an ARP Request or Response. The mapping 1528 * to RFC 826 variables is: 1529 * haddr1 == ar$sha 1530 * paddr1 == ar$spa 1531 * haddr2 == ar$tha 1532 * paddr2 == ar$tpa 1533 * The ARP frame is sent to the ether_dst in dst_lladdr. 1534 */ 1535 static int 1536 arp_output(ill_t *ill, uint32_t operation, 1537 const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2, 1538 const uchar_t *paddr2, uchar_t *dst_lladdr) 1539 { 1540 arh_t *arh; 1541 uint8_t *cp; 1542 uint_t hlen; 1543 uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */ 1544 uint32_t proto = IP_ARP_PROTO_TYPE; 1545 mblk_t *mp; 1546 arl_t *arl; 1547 1548 ASSERT(dst_lladdr != NULL); 1549 hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */ 1550 mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length); 1551 1552 if (mp == NULL) 1553 return (ENOMEM); 1554 1555 /* IFF_NOARP flag is set or link down: do not send arp messages */ 1556 if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) { 1557 freemsg(mp); 1558 return (ENXIO); 1559 } 1560 1561 mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) + 1562 plen + plen, BPRI_MED); 1563 if (mp->b_cont == NULL) { 1564 freeb(mp); 1565 return (ENOMEM); 1566 } 1567 1568 /* Fill in the ARP header. */ 1569 cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); 1570 mp->b_cont->b_rptr = cp; 1571 arh = (arh_t *)cp; 1572 U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware); 1573 U16_TO_BE16(proto, arh->arh_proto); 1574 arh->arh_hlen = (uint8_t)hlen; 1575 arh->arh_plen = (uint8_t)plen; 1576 U16_TO_BE16(operation, arh->arh_operation); 1577 cp += ARH_FIXED_LEN; 1578 bcopy(haddr1, cp, hlen); 1579 cp += hlen; 1580 if (paddr1 == NULL) 1581 bzero(cp, plen); 1582 else 1583 bcopy(paddr1, cp, plen); 1584 cp += plen; 1585 if (haddr2 == NULL) 1586 bzero(cp, hlen); 1587 else 1588 bcopy(haddr2, cp, hlen); 1589 cp += hlen; 1590 bcopy(paddr2, cp, plen); 1591 cp += plen; 1592 mp->b_cont->b_wptr = cp; 1593 1594 DTRACE_PROBE3(arp__physical__out__start, 1595 ill_t *, ill, arh_t *, arh, mblk_t *, mp); 1596 ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event, 1597 ill->ill_ipst->ips_arp_physical_out, 1598 ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont, 1599 ill->ill_ipst); 1600 DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp); 1601 if (mp == NULL) 1602 return (0); 1603 1604 /* Ship it out. */ 1605 arl = ill_to_arl(ill); 1606 if (arl == NULL) { 1607 freemsg(mp); 1608 return (0); 1609 } 1610 if (canputnext(arl->arl_wq)) 1611 putnext(arl->arl_wq, mp); 1612 else 1613 freemsg(mp); 1614 arl_refrele(arl); 1615 return (0); 1616 } 1617 1618 /* 1619 * Process resolve requests. 1620 * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise 1621 * we leave it alone (the caller will check and manage ncec_pcnt in those 1622 * cases.) 1623 */ 1624 int 1625 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill) 1626 { 1627 int err; 1628 const uchar_t *target_hwaddr; 1629 struct in_addr nce_paddr; 1630 uchar_t *dst_lladdr; 1631 boolean_t use_rcnt = !NCE_ISREACHABLE(ncec); 1632 1633 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 1634 ASSERT(!IS_IPMP(ill)); 1635 1636 if (use_rcnt && ncec->ncec_rcnt == 0) { 1637 /* not allowed any more retransmits. */ 1638 return (0); 1639 } 1640 1641 if ((ill->ill_flags & ILLF_NOARP) != 0) 1642 return (0); 1643 1644 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr); 1645 1646 target_hwaddr = 1647 ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1648 1649 if (NCE_ISREACHABLE(ncec)) { 1650 dst_lladdr = ncec->ncec_lladdr; 1651 } else { 1652 dst_lladdr = ill->ill_bcast_mp->b_rptr + 1653 NCE_LL_ADDR_OFFSET(ill); 1654 } 1655 1656 mutex_exit(&ncec->ncec_lock); 1657 err = arp_output(ill, ARP_REQUEST, 1658 ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr, 1659 (uchar_t *)&nce_paddr, dst_lladdr); 1660 mutex_enter(&ncec->ncec_lock); 1661 1662 if (err != 0) { 1663 /* 1664 * Some transient error such as ENOMEM or a down link was 1665 * encountered. If the link has been taken down permanently, 1666 * the ncec will eventually be cleaned up (ipif_down_tail() 1667 * will call ipif_nce_down() and flush the ncec), to terminate 1668 * recurring attempts to send ARP requests. In all other cases, 1669 * allow the caller another chance at success next time. 1670 */ 1671 return (ncec->ncec_ill->ill_reachable_retrans_time); 1672 } 1673 1674 if (use_rcnt) 1675 ncec->ncec_rcnt--; 1676 1677 return (ncec->ncec_ill->ill_reachable_retrans_time); 1678 } 1679 1680 /* return B_TRUE if dropped */ 1681 boolean_t 1682 arp_announce(ncec_t *ncec) 1683 { 1684 ill_t *ill; 1685 int err; 1686 uchar_t *sphys_addr, *bcast_addr; 1687 struct in_addr ncec_addr; 1688 boolean_t need_refrele = B_FALSE; 1689 1690 ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0); 1691 ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0); 1692 1693 if (IS_IPMP(ncec->ncec_ill)) { 1694 /* sent on the cast_ill */ 1695 ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE); 1696 if (ill == NULL) 1697 return (B_TRUE); 1698 need_refrele = B_TRUE; 1699 } else { 1700 ill = ncec->ncec_ill; 1701 } 1702 1703 /* 1704 * broadcast an announce to ill_bcast address. 1705 */ 1706 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1707 1708 sphys_addr = ncec->ncec_lladdr; 1709 bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1710 1711 err = arp_output(ill, ARP_REQUEST, 1712 sphys_addr, (uchar_t *)&ncec_addr, bcast_addr, 1713 (uchar_t *)&ncec_addr, bcast_addr); 1714 1715 if (need_refrele) 1716 ill_refrele(ill); 1717 return (err != 0); 1718 } 1719 1720 /* return B_TRUE if dropped */ 1721 boolean_t 1722 arp_probe(ncec_t *ncec) 1723 { 1724 ill_t *ill; 1725 int err; 1726 struct in_addr ncec_addr; 1727 uchar_t *sphys_addr, *dst_lladdr; 1728 1729 if (IS_IPMP(ncec->ncec_ill)) { 1730 ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE); 1731 if (ill == NULL) 1732 return (B_TRUE); 1733 } else { 1734 ill = ncec->ncec_ill; 1735 } 1736 1737 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1738 1739 sphys_addr = ncec->ncec_lladdr; 1740 dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1741 err = arp_output(ill, ARP_REQUEST, 1742 sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr); 1743 1744 if (IS_IPMP(ncec->ncec_ill)) 1745 ill_refrele(ill); 1746 return (err != 0); 1747 } 1748 1749 static mblk_t * 1750 arl_unbind(arl_t *arl) 1751 { 1752 mblk_t *mp; 1753 1754 if ((mp = arl->arl_unbind_mp) != NULL) { 1755 arl->arl_unbind_mp = NULL; 1756 arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS; 1757 } 1758 return (mp); 1759 } 1760 1761 int 1762 arp_ll_down(ill_t *ill) 1763 { 1764 arl_t *arl; 1765 mblk_t *unbind_mp; 1766 int err = 0; 1767 boolean_t replumb = (ill->ill_replumbing == 1); 1768 1769 DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill); 1770 if ((arl = ill_to_arl(ill)) == NULL) 1771 return (ENXIO); 1772 DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl); 1773 mutex_enter(&arl->arl_lock); 1774 unbind_mp = arl_unbind(arl); 1775 if (unbind_mp != NULL) { 1776 ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS); 1777 DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp, 1778 arl_t *, arl); 1779 err = EINPROGRESS; 1780 if (replumb) 1781 arl->arl_state_flags |= ARL_LL_REPLUMBING; 1782 } 1783 mutex_exit(&arl->arl_lock); 1784 if (unbind_mp != NULL) 1785 arp_dlpi_send(arl, unbind_mp); 1786 arl_refrele(arl); 1787 return (err); 1788 } 1789 1790 /* ARGSUSED */ 1791 int 1792 arp_close(queue_t *q, int flags) 1793 { 1794 if (WR(q)->q_next != NULL) { 1795 /* This is a module close */ 1796 return (arp_modclose(q->q_ptr)); 1797 } 1798 qprocsoff(q); 1799 q->q_ptr = WR(q)->q_ptr = NULL; 1800 return (0); 1801 } 1802 1803 static int 1804 arp_modclose(arl_t *arl) 1805 { 1806 arl_ill_common_t *ai = arl->arl_common; 1807 ill_t *ill; 1808 queue_t *q = arl->arl_rq; 1809 mblk_t *mp, *nextmp; 1810 ipsq_t *ipsq = NULL; 1811 1812 ill = arl_to_ill(arl); 1813 if (ill != NULL) { 1814 if (!ill_waiter_inc(ill)) { 1815 ill_refrele(ill); 1816 } else { 1817 ill_refrele(ill); 1818 if (ipsq_enter(ill, B_FALSE, NEW_OP)) 1819 ipsq = ill->ill_phyint->phyint_ipsq; 1820 ill_waiter_dcr(ill); 1821 } 1822 if (ipsq == NULL) { 1823 /* 1824 * could not enter the ipsq because ill is already 1825 * marked CONDEMNED. 1826 */ 1827 ill = NULL; 1828 } 1829 } 1830 if (ai != NULL && ipsq == NULL) { 1831 /* 1832 * Either we did not get an ill because it was marked CONDEMNED 1833 * or we could not enter the ipsq because it was unplumbing. 1834 * In both cases, wait for the ill to complete ip_modclose(). 1835 * 1836 * If the arp_modclose happened even before SLIFNAME, the ai 1837 * itself would be NULL, in which case we can complete the close 1838 * without waiting. 1839 */ 1840 mutex_enter(&ai->ai_lock); 1841 while (ai->ai_ill != NULL) 1842 cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock); 1843 mutex_exit(&ai->ai_lock); 1844 } 1845 ASSERT(ill == NULL || IAM_WRITER_ILL(ill)); 1846 1847 mutex_enter(&arl->arl_lock); 1848 /* 1849 * If the ill had completed unplumbing before arp_modclose(), there 1850 * would be no ill (and therefore, no ipsq) to serialize arp_modclose() 1851 * so that we need to explicitly check for ARL_CONDEMNED and back off 1852 * if it is set. 1853 */ 1854 if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) { 1855 mutex_exit(&arl->arl_lock); 1856 ASSERT(ipsq == NULL); 1857 return (0); 1858 } 1859 arl->arl_state_flags |= ARL_CONDEMNED; 1860 1861 /* 1862 * send out all pending dlpi messages, don't wait for the ack (which 1863 * will be ignored in arp_rput when CONDEMNED is set) 1864 * 1865 * We have to check for pending DL_UNBIND_REQ because, in the case 1866 * that ip_modclose() executed before arp_modclose(), the call to 1867 * ill_delete_tail->ipif_arp_down() would have triggered a 1868 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail 1869 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not 1870 * have been processed yet. In this scenario, we cannot reset 1871 * arl_dlpi_pending, because the setting/clearing of arl_state_flags 1872 * related to unbind, and the associated cv_waits must be allowed to 1873 * continue. 1874 */ 1875 if (arl->arl_dlpi_pending != DL_UNBIND_REQ) 1876 arl->arl_dlpi_pending = DL_PRIM_INVAL; 1877 mp = arl->arl_dlpi_deferred; 1878 arl->arl_dlpi_deferred = NULL; 1879 mutex_exit(&arl->arl_lock); 1880 1881 for (; mp != NULL; mp = nextmp) { 1882 nextmp = mp->b_next; 1883 mp->b_next = NULL; 1884 putnext(arl->arl_wq, mp); 1885 } 1886 1887 /* Wait for data paths to quiesce */ 1888 mutex_enter(&arl->arl_lock); 1889 while (arl->arl_refcnt != 0) 1890 cv_wait(&arl->arl_cv, &arl->arl_lock); 1891 1892 /* 1893 * unbind, so that nothing else can come up from driver. 1894 */ 1895 mp = arl_unbind(arl); 1896 mutex_exit(&arl->arl_lock); 1897 if (mp != NULL) 1898 arp_dlpi_send(arl, mp); 1899 mutex_enter(&arl->arl_lock); 1900 1901 /* wait for unbind ack */ 1902 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 1903 cv_wait(&arl->arl_cv, &arl->arl_lock); 1904 mutex_exit(&arl->arl_lock); 1905 1906 qprocsoff(q); 1907 1908 if (ill != NULL) { 1909 mutex_enter(&ill->ill_lock); 1910 ill->ill_arl_dlpi_pending = 0; 1911 mutex_exit(&ill->ill_lock); 1912 } 1913 1914 if (ai != NULL) { 1915 mutex_enter(&ai->ai_lock); 1916 ai->ai_arl = NULL; 1917 if (ai->ai_ill == NULL) { 1918 mutex_destroy(&ai->ai_lock); 1919 kmem_free(ai, sizeof (*ai)); 1920 } else { 1921 mutex_exit(&ai->ai_lock); 1922 } 1923 } 1924 1925 /* free up the rest */ 1926 arp_mod_close_tail(arl); 1927 1928 q->q_ptr = WR(q)->q_ptr = NULL; 1929 1930 if (ipsq != NULL) 1931 ipsq_exit(ipsq); 1932 1933 return (0); 1934 } 1935 1936 static void 1937 arp_mod_close_tail(arl_t *arl) 1938 { 1939 ip_stack_t *ipst = arl->arl_ipst; 1940 mblk_t **mpp; 1941 1942 netstack_hold(ipst->ips_netstack); 1943 1944 mutex_enter(&ipst->ips_ip_mi_lock); 1945 mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl); 1946 mutex_exit(&ipst->ips_ip_mi_lock); 1947 1948 /* 1949 * credp could be null if the open didn't succeed and ip_modopen 1950 * itself calls ip_close. 1951 */ 1952 if (arl->arl_credp != NULL) 1953 crfree(arl->arl_credp); 1954 1955 /* Free all retained control messages. */ 1956 mpp = &arl->arl_first_mp_to_free; 1957 do { 1958 while (mpp[0]) { 1959 mblk_t *mp; 1960 mblk_t *mp1; 1961 1962 mp = mpp[0]; 1963 mpp[0] = mp->b_next; 1964 for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) { 1965 mp1->b_next = NULL; 1966 mp1->b_prev = NULL; 1967 } 1968 freemsg(mp); 1969 } 1970 } while (mpp++ != &arl->arl_last_mp_to_free); 1971 1972 netstack_rele(ipst->ips_netstack); 1973 mi_free(arl->arl_name); 1974 mi_close_free((IDP)arl); 1975 } 1976 1977 /* 1978 * DAD failed. Tear down ipifs with the specified srce address. Note that 1979 * tearing down the ipif also meas deleting the ncec through ipif_down, 1980 * so it is not possible to use nce_timer for recovery. Instead we start 1981 * a timer on the ipif. Caller has to free the mp. 1982 */ 1983 void 1984 arp_failure(mblk_t *mp, ip_recv_attr_t *ira) 1985 { 1986 ill_t *ill = ira->ira_ill; 1987 1988 if ((mp = copymsg(mp)) != NULL) { 1989 ill_refhold(ill); 1990 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 1991 } 1992 } 1993 1994 /* 1995 * This is for exclusive changes due to ARP. Tear down an interface due 1996 * to AR_CN_FAILED and AR_CN_BOGON. 1997 */ 1998 /* ARGSUSED */ 1999 static void 2000 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 2001 { 2002 ill_t *ill = rq->q_ptr; 2003 arh_t *arh; 2004 ipaddr_t src; 2005 ipif_t *ipif; 2006 ip_stack_t *ipst = ill->ill_ipst; 2007 uchar_t *haddr; 2008 uint_t haddrlen; 2009 2010 /* first try src = ar$spa */ 2011 arh = (arh_t *)mp->b_rptr; 2012 bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN); 2013 2014 haddrlen = arh->arh_hlen; 2015 haddr = (uint8_t *)(arh + 1); 2016 2017 if (haddrlen == ill->ill_phys_addr_length) { 2018 /* 2019 * Ignore conflicts generated by misbehaving switches that 2020 * just reflect our own messages back to us. For IPMP, we may 2021 * see reflections across any ill in the illgrp. 2022 */ 2023 /* For an under ill_grp can change under lock */ 2024 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2025 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 2026 IS_UNDER_IPMP(ill) && ill->ill_grp != NULL && 2027 ipmp_illgrp_find_ill(ill->ill_grp, haddr, 2028 haddrlen) != NULL) { 2029 rw_exit(&ipst->ips_ill_g_lock); 2030 goto ignore_conflict; 2031 } 2032 rw_exit(&ipst->ips_ill_g_lock); 2033 } 2034 2035 /* 2036 * Look up the appropriate ipif. 2037 */ 2038 ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst); 2039 if (ipif == NULL) 2040 goto ignore_conflict; 2041 2042 /* Reload the ill to match the ipif */ 2043 ill = ipif->ipif_ill; 2044 2045 /* If it's already duplicate or ineligible, then don't do anything. */ 2046 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 2047 ipif_refrele(ipif); 2048 goto ignore_conflict; 2049 } 2050 2051 /* 2052 * If we failed on a recovery probe, then restart the timer to 2053 * try again later. 2054 */ 2055 if (!ipif->ipif_was_dup) { 2056 char hbuf[MAC_STR_LEN]; 2057 char sbuf[INET_ADDRSTRLEN]; 2058 char ibuf[LIFNAMSIZ]; 2059 2060 (void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)); 2061 (void) ip_dot_addr(src, sbuf); 2062 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 2063 2064 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 2065 " disabled", ibuf, sbuf, hbuf); 2066 } 2067 mutex_enter(&ill->ill_lock); 2068 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 2069 ipif->ipif_flags |= IPIF_DUPLICATE; 2070 ill->ill_ipif_dup_count++; 2071 mutex_exit(&ill->ill_lock); 2072 (void) ipif_down(ipif, NULL, NULL); 2073 (void) ipif_down_tail(ipif); 2074 mutex_enter(&ill->ill_lock); 2075 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 2076 ill->ill_net_type == IRE_IF_RESOLVER && 2077 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 2078 ipst->ips_ip_dup_recovery > 0) { 2079 ASSERT(ipif->ipif_recovery_id == 0); 2080 ipif->ipif_recovery_id = timeout(ipif_dup_recovery, 2081 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 2082 } 2083 mutex_exit(&ill->ill_lock); 2084 ipif_refrele(ipif); 2085 2086 ignore_conflict: 2087 freemsg(mp); 2088 } 2089 2090 /* 2091 * This is a place for a dtrace hook. 2092 * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload, 2093 * or just the ARP packet payload as an M_DATA. 2094 */ 2095 /* ARGSUSED */ 2096 static void 2097 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill) 2098 { 2099 freemsg(mp); 2100 } 2101 2102 static boolean_t 2103 arp_over_driver(queue_t *q) 2104 { 2105 queue_t *qnext = STREAM(q)->sd_wrq->q_next; 2106 2107 /* 2108 * check if first module below stream head is IP or UDP. 2109 */ 2110 ASSERT(qnext != NULL); 2111 if (strcmp(Q2NAME(qnext), "ip") != 0 && 2112 strcmp(Q2NAME(qnext), "udp") != 0) { 2113 /* 2114 * module below is not ip or udp, so arp has been pushed 2115 * on the driver. 2116 */ 2117 return (B_TRUE); 2118 } 2119 return (B_FALSE); 2120 } 2121 2122 static int 2123 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2124 { 2125 int err; 2126 2127 ASSERT(sflag & MODOPEN); 2128 if (!arp_over_driver(q)) { 2129 q->q_qinfo = dummymodinfo.st_rdinit; 2130 WR(q)->q_qinfo = dummymodinfo.st_wrinit; 2131 return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag, 2132 sflag, credp)); 2133 } 2134 err = arp_modopen(q, devp, flag, sflag, credp); 2135 return (err); 2136 } 2137 2138 /* 2139 * In most cases we must be a writer on the IP stream before coming to 2140 * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions 2141 * when we are not a writer are very early duing initialization (in 2142 * arl_init, before the arl has done a SLIFNAME, so that we don't yet know 2143 * the associated ill) or during arp_mod_close, when we could not enter the 2144 * ipsq because the ill has already unplumbed. 2145 */ 2146 static void 2147 arp_dlpi_send(arl_t *arl, mblk_t *mp) 2148 { 2149 mblk_t **mpp; 2150 t_uscalar_t prim; 2151 arl_ill_common_t *ai; 2152 2153 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2154 2155 #ifdef DEBUG 2156 ai = arl->arl_common; 2157 if (ai != NULL) { 2158 mutex_enter(&ai->ai_lock); 2159 if (ai->ai_ill != NULL) 2160 ASSERT(IAM_WRITER_ILL(ai->ai_ill)); 2161 mutex_exit(&ai->ai_lock); 2162 } 2163 #endif /* DEBUG */ 2164 2165 mutex_enter(&arl->arl_lock); 2166 if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { 2167 /* Must queue message. Tail insertion */ 2168 mpp = &arl->arl_dlpi_deferred; 2169 while (*mpp != NULL) 2170 mpp = &((*mpp)->b_next); 2171 2172 *mpp = mp; 2173 mutex_exit(&arl->arl_lock); 2174 return; 2175 } 2176 mutex_exit(&arl->arl_lock); 2177 if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive) 2178 == DL_BIND_REQ) { 2179 ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0); 2180 } 2181 /* 2182 * No need to take the arl_lock to examine ARL_CONDEMNED at this point 2183 * because the only thread that can see ARL_CONDEMNED here is the 2184 * closing arp_modclose() thread which sets the flag after becoming a 2185 * writer on the ipsq. Threads from IP must have finished and 2186 * cannot be active now. 2187 */ 2188 if (!(arl->arl_state_flags & ARL_CONDEMNED) || 2189 (prim == DL_UNBIND_REQ)) { 2190 if (prim != DL_NOTIFY_CONF) { 2191 ill_t *ill = arl_to_ill(arl); 2192 2193 arl->arl_dlpi_pending = prim; 2194 if (ill != NULL) { 2195 mutex_enter(&ill->ill_lock); 2196 ill->ill_arl_dlpi_pending = 1; 2197 mutex_exit(&ill->ill_lock); 2198 ill_refrele(ill); 2199 } 2200 } 2201 } 2202 DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send", 2203 char *, dl_primstr(prim), char *, "-", arl_t *, arl); 2204 putnext(arl->arl_wq, mp); 2205 } 2206 2207 static void 2208 arl_defaults_common(arl_t *arl, mblk_t *mp) 2209 { 2210 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 2211 /* 2212 * Till the ill is fully up the ill is not globally visible. 2213 * So no need for a lock. 2214 */ 2215 arl->arl_mactype = dlia->dl_mac_type; 2216 arl->arl_sap_length = dlia->dl_sap_length; 2217 2218 if (!arl->arl_dlpi_style_set) { 2219 if (dlia->dl_provider_style == DL_STYLE2) 2220 arl->arl_needs_attach = 1; 2221 mutex_enter(&arl->arl_lock); 2222 ASSERT(arl->arl_dlpi_style_set == 0); 2223 arl->arl_dlpi_style_set = 1; 2224 arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING; 2225 cv_broadcast(&arl->arl_cv); 2226 mutex_exit(&arl->arl_lock); 2227 } 2228 } 2229 2230 int 2231 arl_init(queue_t *q, arl_t *arl) 2232 { 2233 mblk_t *info_mp; 2234 dl_info_req_t *dlir; 2235 2236 /* subset of ill_init */ 2237 mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0); 2238 2239 arl->arl_rq = q; 2240 arl->arl_wq = WR(q); 2241 2242 info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)), 2243 BPRI_HI); 2244 if (info_mp == NULL) 2245 return (ENOMEM); 2246 /* 2247 * allocate sufficient space to contain device name. 2248 */ 2249 arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ)); 2250 arl->arl_ppa = UINT_MAX; 2251 arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND); 2252 2253 /* Send down the Info Request to the driver. */ 2254 info_mp->b_datap->db_type = M_PCPROTO; 2255 dlir = (dl_info_req_t *)info_mp->b_rptr; 2256 info_mp->b_wptr = (uchar_t *)&dlir[1]; 2257 dlir->dl_primitive = DL_INFO_REQ; 2258 arl->arl_dlpi_pending = DL_PRIM_INVAL; 2259 qprocson(q); 2260 2261 arp_dlpi_send(arl, info_mp); 2262 return (0); 2263 } 2264 2265 int 2266 arl_wait_for_info_ack(arl_t *arl) 2267 { 2268 int err; 2269 2270 mutex_enter(&arl->arl_lock); 2271 while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) { 2272 /* 2273 * Return value of 0 indicates a pending signal. 2274 */ 2275 err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock); 2276 if (err == 0) { 2277 mutex_exit(&arl->arl_lock); 2278 return (EINTR); 2279 } 2280 } 2281 mutex_exit(&arl->arl_lock); 2282 /* 2283 * ip_rput_other could have set an error in ill_error on 2284 * receipt of M_ERROR. 2285 */ 2286 return (arl->arl_error); 2287 } 2288 2289 void 2290 arl_set_muxid(ill_t *ill, int muxid) 2291 { 2292 arl_t *arl; 2293 2294 arl = ill_to_arl(ill); 2295 if (arl != NULL) { 2296 arl->arl_muxid = muxid; 2297 arl_refrele(arl); 2298 } 2299 } 2300 2301 int 2302 arl_get_muxid(ill_t *ill) 2303 { 2304 arl_t *arl; 2305 int muxid = 0; 2306 2307 arl = ill_to_arl(ill); 2308 if (arl != NULL) { 2309 muxid = arl->arl_muxid; 2310 arl_refrele(arl); 2311 } 2312 return (muxid); 2313 } 2314 2315 static int 2316 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2317 { 2318 int err; 2319 zoneid_t zoneid; 2320 netstack_t *ns; 2321 ip_stack_t *ipst; 2322 arl_t *arl = NULL; 2323 2324 /* 2325 * Prevent unprivileged processes from pushing IP so that 2326 * they can't send raw IP. 2327 */ 2328 if (secpolicy_net_rawaccess(credp) != 0) 2329 return (EPERM); 2330 2331 ns = netstack_find_by_cred(credp); 2332 ASSERT(ns != NULL); 2333 ipst = ns->netstack_ip; 2334 ASSERT(ipst != NULL); 2335 2336 /* 2337 * For exclusive stacks we set the zoneid to zero 2338 * to make IP operate as if in the global zone. 2339 */ 2340 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2341 zoneid = GLOBAL_ZONEID; 2342 else 2343 zoneid = crgetzoneid(credp); 2344 2345 arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t)); 2346 q->q_ptr = WR(q)->q_ptr = arl; 2347 arl->arl_ipst = ipst; 2348 arl->arl_zoneid = zoneid; 2349 err = arl_init(q, arl); 2350 2351 if (err != 0) { 2352 mi_free(arl->arl_name); 2353 mi_free(arl); 2354 netstack_rele(ipst->ips_netstack); 2355 q->q_ptr = NULL; 2356 WR(q)->q_ptr = NULL; 2357 return (err); 2358 } 2359 2360 /* 2361 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent. 2362 */ 2363 err = arl_wait_for_info_ack(arl); 2364 if (err == 0) 2365 arl->arl_credp = credp; 2366 else 2367 goto fail; 2368 2369 crhold(credp); 2370 2371 mutex_enter(&ipst->ips_ip_mi_lock); 2372 err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag, 2373 sflag, credp); 2374 mutex_exit(&ipst->ips_ip_mi_lock); 2375 fail: 2376 if (err) { 2377 (void) arp_close(q, 0); 2378 return (err); 2379 } 2380 return (0); 2381 } 2382 2383 /* 2384 * Notify any downstream modules (esp softmac and hitbox) of the name 2385 * of this interface using an M_CTL. 2386 */ 2387 static void 2388 arp_ifname_notify(arl_t *arl) 2389 { 2390 mblk_t *mp1, *mp2; 2391 struct iocblk *iocp; 2392 struct lifreq *lifr; 2393 2394 if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL) 2395 return; 2396 if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) { 2397 freemsg(mp1); 2398 return; 2399 } 2400 2401 lifr = (struct lifreq *)mp2->b_rptr; 2402 mp2->b_wptr += sizeof (struct lifreq); 2403 bzero(lifr, sizeof (struct lifreq)); 2404 2405 (void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ); 2406 lifr->lifr_ppa = arl->arl_ppa; 2407 lifr->lifr_flags = ILLF_IPV4; 2408 2409 /* Use M_CTL to avoid confusing anyone else who might be listening. */ 2410 DB_TYPE(mp1) = M_CTL; 2411 mp1->b_cont = mp2; 2412 iocp = (struct iocblk *)mp1->b_rptr; 2413 iocp->ioc_count = msgsize(mp1->b_cont); 2414 DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify", 2415 char *, "SIOCSLIFNAME", char *, "-", arl_t *, arl); 2416 putnext(arl->arl_wq, mp1); 2417 } 2418 2419 void 2420 arp_send_replumb_conf(ill_t *ill) 2421 { 2422 mblk_t *mp; 2423 arl_t *arl = ill_to_arl(ill); 2424 2425 if (arl == NULL) 2426 return; 2427 /* 2428 * arl_got_replumb and arl_got_unbind to be cleared after we complete 2429 * arp_cmd_done. 2430 */ 2431 mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO, 2432 DL_NOTIFY_CONF); 2433 ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification = 2434 DL_NOTE_REPLUMB_DONE; 2435 arp_dlpi_send(arl, mp); 2436 mutex_enter(&arl->arl_lock); 2437 arl->arl_state_flags &= ~ARL_LL_REPLUMBING; 2438 mutex_exit(&arl->arl_lock); 2439 arl_refrele(arl); 2440 } 2441 2442 /* 2443 * The unplumb code paths call arp_unbind_complete() to make sure that it is 2444 * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also 2445 * for the arl_refcnt to fall to one so that, when we return from 2446 * arp_unbind_complete(), we know for certain that there are no threads in 2447 * arp_rput() that might access the arl_ill. 2448 */ 2449 void 2450 arp_unbind_complete(ill_t *ill) 2451 { 2452 arl_t *arl = ill_to_arl(ill); 2453 2454 if (arl == NULL) 2455 return; 2456 mutex_enter(&arl->arl_lock); 2457 /* 2458 * wait for unbind ack and arl_refcnt to drop to 1. Note that the 2459 * quiescent arl_refcnt for this function is 1 (and not 0) because 2460 * ill_to_arl() will itself return after taking a ref on the arl_t. 2461 */ 2462 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 2463 cv_wait(&arl->arl_cv, &arl->arl_lock); 2464 while (arl->arl_refcnt != 1) 2465 cv_wait(&arl->arl_cv, &arl->arl_lock); 2466 mutex_exit(&arl->arl_lock); 2467 arl_refrele(arl); 2468 } 2469