1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <inet/ip_arp.h> 28 #include <inet/ip_ndp.h> 29 #include <net/if_arp.h> 30 #include <netinet/if_ether.h> 31 #include <sys/strsubr.h> 32 #include <inet/ip6.h> 33 #include <inet/ip.h> 34 #include <inet/ip_ire.h> 35 #include <inet/ip_if.h> 36 #include <sys/dlpi.h> 37 #include <sys/sunddi.h> 38 #include <sys/strsun.h> 39 #include <sys/sdt.h> 40 #include <inet/mi.h> 41 #include <inet/arp.h> 42 #include <inet/ipdrop.h> 43 #include <sys/sockio.h> 44 #include <inet/ip_impl.h> 45 #include <sys/policy.h> 46 47 #define ARL_LL_ADDR_OFFSET(arl) (((arl)->arl_sap_length) < 0 ? \ 48 (sizeof (dl_unitdata_req_t)) : \ 49 ((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length)))) 50 51 /* 52 * MAC-specific intelligence. Shouldn't be needed, but the DL_INFO_ACK 53 * doesn't quite do it for us. 54 */ 55 typedef struct arp_m_s { 56 t_uscalar_t arp_mac_type; 57 uint32_t arp_mac_arp_hw_type; 58 t_scalar_t arp_mac_sap_length; 59 uint32_t arp_mac_hw_addr_length; 60 } arp_m_t; 61 62 static int arp_close(queue_t *, int); 63 static void arp_rput(queue_t *, mblk_t *); 64 static void arp_wput(queue_t *, mblk_t *); 65 static arp_m_t *arp_m_lookup(t_uscalar_t mac_type); 66 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *, 67 ncec_t *); 68 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *, 69 const uchar_t *, const uchar_t *, uchar_t *); 70 static int arp_modclose(arl_t *); 71 static void arp_mod_close_tail(arl_t *); 72 static mblk_t *arl_unbind(arl_t *); 73 static void arp_process_packet(ill_t *, mblk_t *); 74 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *); 75 static void arp_drop_packet(const char *str, mblk_t *, ill_t *); 76 static int arp_open(queue_t *, dev_t *, int, int, cred_t *); 77 static int ip_sioctl_ifunitsel_arp(queue_t *, int *); 78 static int ip_sioctl_slifname_arp(queue_t *, void *); 79 static void arp_dlpi_send(arl_t *, mblk_t *); 80 static void arl_defaults_common(arl_t *, mblk_t *); 81 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *); 82 static void arp_ifname_notify(arl_t *); 83 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *); 84 static arl_t *ill_to_arl(ill_t *); 85 86 #define DL_PRIM(mp) (((union DL_primitives *)(mp)->b_rptr)->dl_primitive) 87 #define IS_DLPI_DATA(mp) \ 88 ((DB_TYPE(mp) == M_PROTO) && \ 89 MBLKL(mp) >= sizeof (dl_unitdata_ind_t) && \ 90 (DL_PRIM(mp) == DL_UNITDATA_IND)) 91 92 #define AR_NOTFOUND 1 /* No matching ace found in cache */ 93 #define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */ 94 #define AR_LOOPBACK 3 /* Our own arp packet was received */ 95 #define AR_BOGON 4 /* Another host has our IP addr. */ 96 #define AR_FAILED 5 /* Duplicate Address Detection has failed */ 97 #define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */ 98 99 boolean_t arp_no_defense; 100 101 struct module_info arp_mod_info = { 102 IP_MOD_ID, "arp", 1, INFPSZ, 65536, 1024 103 }; 104 static struct qinit rinit_arp = { 105 (pfi_t)arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info 106 }; 107 static struct qinit winit_arp = { 108 (pfi_t)arp_wput, NULL, arp_open, arp_close, NULL, 109 &arp_mod_info 110 }; 111 struct streamtab arpinfo = { 112 &rinit_arp, &winit_arp 113 }; 114 #define ARH_FIXED_LEN 8 115 #define AR_LL_HDR_SLACK 32 116 117 /* 118 * pfhooks for ARP. 119 */ 120 #define ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst) \ 121 \ 122 if ((_hook).he_interested) { \ 123 hook_pkt_event_t info; \ 124 \ 125 info.hpe_protocol = ipst->ips_arp_net_data; \ 126 info.hpe_ifp = _ilp; \ 127 info.hpe_ofp = 0; \ 128 info.hpe_hdr = _hdr; \ 129 info.hpe_mp = &(_fm); \ 130 info.hpe_mb = _m; \ 131 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 132 _event, (hook_data_t)&info) != 0) { \ 133 if (_fm != NULL) { \ 134 freemsg(_fm); \ 135 _fm = NULL; \ 136 } \ 137 _hdr = NULL; \ 138 _m = NULL; \ 139 } else { \ 140 _hdr = info.hpe_hdr; \ 141 _m = info.hpe_mb; \ 142 } \ 143 } 144 145 #define ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst) \ 146 \ 147 if ((_hook).he_interested) { \ 148 hook_pkt_event_t info; \ 149 \ 150 info.hpe_protocol = ipst->ips_arp_net_data; \ 151 info.hpe_ifp = 0; \ 152 info.hpe_ofp = _olp; \ 153 info.hpe_hdr = _hdr; \ 154 info.hpe_mp = &(_fm); \ 155 info.hpe_mb = _m; \ 156 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 157 _event, (hook_data_t)&info) != 0) { \ 158 if (_fm != NULL) { \ 159 freemsg(_fm); \ 160 _fm = NULL; \ 161 } \ 162 _hdr = NULL; \ 163 _m = NULL; \ 164 } else { \ 165 _hdr = info.hpe_hdr; \ 166 _m = info.hpe_mb; \ 167 } \ 168 } 169 170 static arp_m_t arp_m_tbl[] = { 171 { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */ 172 { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */ 173 { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */ 174 { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */ 175 { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */ 176 { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */ 177 { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */ 178 { DL_OTHER, ARPHRD_ETHER, -2, 6} /* unknown */ 179 }; 180 181 static void 182 arl_refhold_locked(arl_t *arl) 183 { 184 ASSERT(MUTEX_HELD(&arl->arl_lock)); 185 arl->arl_refcnt++; 186 ASSERT(arl->arl_refcnt != 0); 187 } 188 189 static void 190 arl_refrele(arl_t *arl) 191 { 192 mutex_enter(&arl->arl_lock); 193 ASSERT(arl->arl_refcnt != 0); 194 arl->arl_refcnt--; 195 if (arl->arl_refcnt > 1) { 196 mutex_exit(&arl->arl_lock); 197 return; 198 } 199 200 /* ill_close or arp_unbind_complete may be waiting */ 201 cv_broadcast(&arl->arl_cv); 202 mutex_exit(&arl->arl_lock); 203 } 204 205 /* 206 * wake up any pending ip ioctls. 207 */ 208 static void 209 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim) 210 { 211 if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing) 212 arp_replumb_done(ill, 0); 213 else 214 arp_bringup_done(ill, err); 215 } 216 217 static int 218 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen, 219 const in_addr_t *src_paddr, ncec_t **sncec, int op) 220 { 221 int retv; 222 ncec_t *ncec; 223 boolean_t ll_changed; 224 uchar_t *lladdr = NULL; 225 int new_state; 226 227 ASSERT(ill != NULL); 228 229 ncec = ncec_lookup_illgrp_v4(ill, src_paddr); 230 *sncec = ncec; 231 232 if (ncec == NULL) { 233 retv = AR_NOTFOUND; 234 goto done; 235 } 236 237 mutex_enter(&ncec->ncec_lock); 238 /* 239 * IP addr and hardware address match what we already 240 * have, then this is a broadcast packet emitted by one of our 241 * interfaces, reflected by the switch and received on another 242 * interface. We return AR_LOOPBACK. 243 */ 244 lladdr = ncec->ncec_lladdr; 245 if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length && 246 bcmp(lladdr, src_haddr, hlen) == 0) { 247 mutex_exit(&ncec->ncec_lock); 248 retv = AR_LOOPBACK; 249 goto done; 250 } 251 /* 252 * If the entry is unverified, then we've just verified that 253 * someone else already owns this address, because this is a 254 * message with the same protocol address but different 255 * hardware address. 256 */ 257 if (ncec->ncec_flags & NCE_F_UNVERIFIED) { 258 mutex_exit(&ncec->ncec_lock); 259 ncec_delete(ncec); 260 ncec_refrele(ncec); 261 *sncec = NULL; 262 retv = AR_FAILED; 263 goto done; 264 } 265 266 /* 267 * If the IP address matches ours and we're authoritative for 268 * this entry, then some other node is using our IP addr, so 269 * return AR_BOGON. Also reset the transmit count to zero so 270 * that, if we're currently in initial announcement mode, we 271 * switch back to the lazier defense mode. Knowing that 272 * there's at least one duplicate out there, we ought not 273 * blindly announce. 274 * 275 * NCE_F_AUTHORITY is set in one of two ways: 276 * 1. /sbin/arp told us so, via the "permanent" flag. 277 * 2. This is one of my addresses. 278 */ 279 if (ncec->ncec_flags & NCE_F_AUTHORITY) { 280 ncec->ncec_unsolicit_count = 0; 281 mutex_exit(&ncec->ncec_lock); 282 retv = AR_BOGON; 283 goto done; 284 } 285 286 /* 287 * No address conflict was detected, and we are getting 288 * ready to update the ncec's hwaddr. The nce MUST NOT be on an 289 * under interface, because all dynamic nce's are created on the 290 * native interface (in the non-IPMP case) or on the IPMP 291 * meta-interface (in the IPMP case) 292 */ 293 ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill)); 294 295 /* 296 * update ncec with src_haddr, hlen. 297 * 298 * We are trying to resolve this ncec_addr/src_paddr and we 299 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr. 300 * So the new_state is at least "STALE". If, in addition, 301 * this a solicited, unicast ARP_RESPONSE, we can transition 302 * to REACHABLE. 303 */ 304 new_state = ND_STALE; 305 ip1dbg(("got info for ncec %p from addr %x\n", 306 (void *)ncec, *src_paddr)); 307 retv = AR_MERGED; 308 if (ncec->ncec_state == ND_INCOMPLETE || 309 ncec->ncec_state == ND_INITIAL) { 310 ll_changed = B_TRUE; 311 } else { 312 ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen); 313 if (!ll_changed) 314 new_state = ND_UNCHANGED; 315 else 316 retv = AR_CHANGED; 317 } 318 /* 319 * We don't have the equivalent of the IPv6 'S' flag indicating 320 * a solicited response, so we assume that if we are in 321 * INCOMPLETE, or got back an unchanged lladdr in PROBE state, 322 * and this is an ARP_RESPONSE, it must be a 323 * solicited response allowing us to transtion to REACHABLE. 324 */ 325 if (op == ARP_RESPONSE) { 326 switch (ncec->ncec_state) { 327 case ND_PROBE: 328 new_state = (ll_changed ? ND_STALE : ND_REACHABLE); 329 break; 330 case ND_INCOMPLETE: 331 new_state = ND_REACHABLE; 332 break; 333 } 334 } 335 /* 336 * Call nce_update() to refresh fastpath information on any 337 * dependent nce_t entries. 338 */ 339 nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL)); 340 mutex_exit(&ncec->ncec_lock); 341 nce_resolv_ok(ncec); 342 done: 343 return (retv); 344 } 345 346 /* Find an entry for a particular MAC type in the arp_m_tbl. */ 347 static arp_m_t * 348 arp_m_lookup(t_uscalar_t mac_type) 349 { 350 arp_m_t *arm; 351 352 for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) { 353 if (arm->arp_mac_type == mac_type) 354 return (arm); 355 } 356 return (NULL); 357 } 358 359 static uint32_t 360 arp_hw_type(t_uscalar_t mactype) 361 { 362 arp_m_t *arm; 363 364 if ((arm = arp_m_lookup(mactype)) == NULL) 365 arm = arp_m_lookup(DL_OTHER); 366 return (arm->arp_mac_arp_hw_type); 367 } 368 369 /* 370 * Called when an DLPI control message has been acked; send down the next 371 * queued message (if any). 372 * The DLPI messages of interest being bind, attach and unbind since 373 * these are the only ones sent by ARP via arp_dlpi_send. 374 */ 375 static void 376 arp_dlpi_done(arl_t *arl, ill_t *ill) 377 { 378 mblk_t *mp; 379 int err; 380 t_uscalar_t prim; 381 382 mutex_enter(&arl->arl_lock); 383 prim = arl->arl_dlpi_pending; 384 385 if ((mp = arl->arl_dlpi_deferred) == NULL) { 386 arl->arl_dlpi_pending = DL_PRIM_INVAL; 387 if (arl->arl_state_flags & ARL_LL_DOWN) 388 err = ENETDOWN; 389 else 390 err = 0; 391 mutex_exit(&arl->arl_lock); 392 393 mutex_enter(&ill->ill_lock); 394 ill->ill_arl_dlpi_pending = 0; 395 mutex_exit(&ill->ill_lock); 396 arp_cmd_done(ill, err, prim); 397 return; 398 } 399 400 arl->arl_dlpi_deferred = mp->b_next; 401 mp->b_next = NULL; 402 403 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 404 405 arl->arl_dlpi_pending = DL_PRIM(mp); 406 mutex_exit(&arl->arl_lock); 407 408 mutex_enter(&ill->ill_lock); 409 ill->ill_arl_dlpi_pending = 1; 410 mutex_exit(&ill->ill_lock); 411 412 putnext(arl->arl_wq, mp); 413 } 414 415 /* 416 * This routine is called during module initialization when the DL_INFO_ACK 417 * comes back from the device. We set up defaults for all the device dependent 418 * doo-dads we are going to need. This will leave us ready to roll if we are 419 * attempting auto-configuration. Alternatively, these defaults can be 420 * overridden by initialization procedures possessing higher intelligence. 421 * 422 * Caller will free the mp. 423 */ 424 static void 425 arp_ll_set_defaults(arl_t *arl, mblk_t *mp) 426 { 427 arp_m_t *arm; 428 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 429 430 if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL) 431 arm = arp_m_lookup(DL_OTHER); 432 ASSERT(arm != NULL); 433 434 /* 435 * We initialize based on parameters in the (currently) not too 436 * exhaustive arp_m_tbl. 437 */ 438 if (dlia->dl_version == DL_VERSION_2) { 439 arl->arl_sap_length = dlia->dl_sap_length; 440 arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length; 441 if (dlia->dl_provider_style == DL_STYLE2) 442 arl->arl_needs_attach = 1; 443 } else { 444 arl->arl_sap_length = arm->arp_mac_sap_length; 445 arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length; 446 } 447 /* 448 * Note: the arp_hw_type in the arp header may be derived from 449 * the ill_mac_type and arp_m_lookup(). 450 */ 451 arl->arl_sap = ETHERTYPE_ARP; 452 arl_defaults_common(arl, mp); 453 } 454 455 static void 456 arp_wput(queue_t *q, mblk_t *mp) 457 { 458 int err = EINVAL; 459 struct iocblk *ioc; 460 mblk_t *mp1; 461 462 switch (DB_TYPE(mp)) { 463 case M_IOCTL: 464 ASSERT(q->q_next != NULL); 465 ioc = (struct iocblk *)mp->b_rptr; 466 if (ioc->ioc_cmd != SIOCSLIFNAME && 467 ioc->ioc_cmd != IF_UNITSEL) { 468 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput", 469 char *, "<some ioctl>", char *, "-", 470 arl_t *, (arl_t *)q->q_ptr); 471 putnext(q, mp); 472 return; 473 } 474 if ((mp1 = mp->b_cont) == 0) 475 err = EINVAL; 476 else if (ioc->ioc_cmd == SIOCSLIFNAME) 477 err = ip_sioctl_slifname_arp(q, mp1->b_rptr); 478 else if (ioc->ioc_cmd == IF_UNITSEL) 479 err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr); 480 if (err == 0) 481 miocack(q, mp, 0, 0); 482 else 483 miocnak(q, mp, 0, err); 484 return; 485 default: 486 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default", 487 char *, "default mblk", char *, "-", 488 arl_t *, (arl_t *)q->q_ptr); 489 putnext(q, mp); 490 return; 491 } 492 } 493 494 /* 495 * similar to ill_dlpi_pending(): verify that the received DLPI response 496 * matches the one that is pending for the arl. 497 */ 498 static boolean_t 499 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim) 500 { 501 t_uscalar_t pending; 502 503 mutex_enter(&arl->arl_lock); 504 if (arl->arl_dlpi_pending == prim) { 505 mutex_exit(&arl->arl_lock); 506 return (B_TRUE); 507 } 508 509 if (arl->arl_state_flags & ARL_CONDEMNED) { 510 mutex_exit(&arl->arl_lock); 511 return (B_FALSE); 512 } 513 pending = arl->arl_dlpi_pending; 514 mutex_exit(&arl->arl_lock); 515 516 if (pending == DL_PRIM_INVAL) { 517 ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s", 518 dl_primstr(prim), arl->arl_name)); 519 } else { 520 ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s", 521 dl_primstr(prim), arl->arl_name, dl_primstr(pending))); 522 } 523 return (B_FALSE); 524 } 525 526 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */ 527 static void 528 arp_rput_dlpi(queue_t *q, mblk_t *mp) 529 { 530 arl_t *arl = (arl_t *)q->q_ptr; 531 union DL_primitives *dlp; 532 t_uscalar_t prim; 533 t_uscalar_t reqprim = DL_PRIM_INVAL; 534 ill_t *ill; 535 536 if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) { 537 putnext(q, mp); 538 return; 539 } 540 dlp = (union DL_primitives *)mp->b_rptr; 541 prim = dlp->dl_primitive; 542 543 /* 544 * If we received an ACK but didn't send a request for it, then it 545 * can't be part of any pending operation; discard up-front. 546 */ 547 switch (prim) { 548 case DL_ERROR_ACK: 549 /* 550 * ce is confused about how DLPI works, so we have to interpret 551 * an "error" on DL_NOTIFY_ACK (which we never could have sent) 552 * as really meaning an error on DL_NOTIFY_REQ. 553 * 554 * Note that supporting DL_NOTIFY_REQ is optional, so printing 555 * out an error message on the console isn't warranted except 556 * for debug. 557 */ 558 if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK || 559 dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) { 560 reqprim = DL_NOTIFY_REQ; 561 } else { 562 reqprim = dlp->error_ack.dl_error_primitive; 563 } 564 break; 565 case DL_INFO_ACK: 566 reqprim = DL_INFO_REQ; 567 break; 568 case DL_OK_ACK: 569 reqprim = dlp->ok_ack.dl_correct_primitive; 570 break; 571 case DL_BIND_ACK: 572 reqprim = DL_BIND_REQ; 573 break; 574 default: 575 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 576 union DL_primitives *, dlp); 577 putnext(q, mp); 578 return; 579 } 580 if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) { 581 freemsg(mp); 582 return; 583 } 584 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received", 585 char *, dl_primstr(prim), char *, dl_primstr(reqprim), 586 arl_t *, arl); 587 588 ASSERT(prim != DL_NOTIFY_IND); 589 590 ill = arl_to_ill(arl); 591 592 switch (reqprim) { 593 case DL_INFO_REQ: 594 /* 595 * ill has not been set up yet for this case. This is the 596 * DL_INFO_ACK for the first DL_INFO_REQ sent from 597 * arp_modopen(). There should be no other arl_dlpi_deferred 598 * messages pending. We initialize the arl here. 599 */ 600 ASSERT(!arl->arl_dlpi_style_set); 601 ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ); 602 ASSERT(arl->arl_dlpi_deferred == NULL); 603 arl->arl_dlpi_pending = DL_PRIM_INVAL; 604 arp_ll_set_defaults(arl, mp); 605 freemsg(mp); 606 return; 607 case DL_UNBIND_REQ: 608 mutex_enter(&arl->arl_lock); 609 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 610 /* 611 * This is not an error, so we don't set ARL_LL_DOWN 612 */ 613 arl->arl_state_flags &= ~ARL_LL_UP; 614 arl->arl_state_flags |= ARL_LL_UNBOUND; 615 if (arl->arl_state_flags & ARL_CONDEMNED) { 616 /* 617 * if this is part of the unplumb the arl may 618 * vaporize any moment after we cv_signal the 619 * arl_cv so we reset arl_dlpi_pending here. 620 * All other cases (including replumb) will 621 * have the arl_dlpi_pending reset in 622 * arp_dlpi_done. 623 */ 624 arl->arl_dlpi_pending = DL_PRIM_INVAL; 625 } 626 cv_signal(&arl->arl_cv); 627 mutex_exit(&arl->arl_lock); 628 break; 629 } 630 if (ill != NULL) { 631 /* 632 * ill ref obtained by arl_to_ill() will be released 633 * by qwriter_ip() 634 */ 635 qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer, 636 CUR_OP, B_TRUE); 637 return; 638 } 639 freemsg(mp); 640 } 641 642 /* 643 * Handling of DLPI messages that require exclusive access to the ipsq. 644 */ 645 /* ARGSUSED */ 646 static void 647 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) 648 { 649 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 650 ill_t *ill = (ill_t *)q->q_ptr; 651 arl_t *arl = ill_to_arl(ill); 652 653 if (arl == NULL) { 654 /* 655 * happens as a result arp_modclose triggering unbind. 656 * arp_rput_dlpi will cv_signal the arl_cv and the modclose 657 * will complete, but when it does ipsq_exit, the waiting 658 * qwriter_ip gets into the ipsq but will find the arl null. 659 * There should be no deferred messages in this case, so 660 * just complete and exit. 661 */ 662 arp_cmd_done(ill, 0, DL_UNBIND_REQ); 663 freemsg(mp); 664 return; 665 } 666 switch (dlp->dl_primitive) { 667 case DL_ERROR_ACK: 668 switch (dlp->error_ack.dl_error_primitive) { 669 case DL_UNBIND_REQ: 670 mutex_enter(&arl->arl_lock); 671 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 672 arl->arl_state_flags &= ~ARL_LL_UP; 673 arl->arl_state_flags |= ARL_LL_UNBOUND; 674 arl->arl_state_flags |= ARL_LL_DOWN; 675 cv_signal(&arl->arl_cv); 676 mutex_exit(&arl->arl_lock); 677 break; 678 case DL_BIND_REQ: 679 mutex_enter(&arl->arl_lock); 680 arl->arl_state_flags &= ~ARL_LL_UP; 681 arl->arl_state_flags |= ARL_LL_DOWN; 682 arl->arl_state_flags |= ARL_LL_UNBOUND; 683 cv_signal(&arl->arl_cv); 684 mutex_exit(&arl->arl_lock); 685 break; 686 case DL_ATTACH_REQ: 687 break; 688 default: 689 /* If it's anything else, we didn't send it. */ 690 arl_refrele(arl); 691 putnext(q, mp); 692 return; 693 } 694 break; 695 case DL_OK_ACK: 696 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok", 697 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 698 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 699 arl_t *, arl); 700 mutex_enter(&arl->arl_lock); 701 switch (dlp->ok_ack.dl_correct_primitive) { 702 case DL_UNBIND_REQ: 703 case DL_ATTACH_REQ: 704 break; 705 default: 706 ip0dbg(("Dropping unrecognized DL_OK_ACK for %s", 707 dl_primstr(dlp->ok_ack.dl_correct_primitive))); 708 mutex_exit(&arl->arl_lock); 709 arl_refrele(arl); 710 freemsg(mp); 711 return; 712 } 713 mutex_exit(&arl->arl_lock); 714 break; 715 case DL_BIND_ACK: 716 DTRACE_PROBE2(rput_dl_bind, arl_t *, arl, 717 dl_bind_ack_t *, &dlp->bind_ack); 718 719 mutex_enter(&arl->arl_lock); 720 ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING); 721 arl->arl_state_flags &= 722 ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND); 723 arl->arl_state_flags |= ARL_LL_UP; 724 mutex_exit(&arl->arl_lock); 725 break; 726 case DL_UDERROR_IND: 727 DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl, 728 dl_uderror_ind_t *, &dlp->uderror_ind); 729 arl_refrele(arl); 730 putnext(q, mp); 731 return; 732 default: 733 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 734 union DL_primitives *, dlp); 735 arl_refrele(arl); 736 putnext(q, mp); 737 return; 738 } 739 arp_dlpi_done(arl, ill); 740 arl_refrele(arl); 741 freemsg(mp); 742 } 743 744 void 745 arp_rput(queue_t *q, mblk_t *mp) 746 { 747 arl_t *arl = q->q_ptr; 748 boolean_t need_refrele = B_FALSE; 749 750 mutex_enter(&arl->arl_lock); 751 if (((arl->arl_state_flags & 752 (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) { 753 /* 754 * Only allow high priority DLPI messages during unplumb or 755 * replumb, and we don't take an arl_refcnt for that case. 756 */ 757 if (DB_TYPE(mp) != M_PCPROTO) { 758 mutex_exit(&arl->arl_lock); 759 freemsg(mp); 760 return; 761 } 762 } else { 763 arl_refhold_locked(arl); 764 need_refrele = B_TRUE; 765 } 766 mutex_exit(&arl->arl_lock); 767 768 switch (DB_TYPE(mp)) { 769 case M_PCPROTO: 770 case M_PROTO: { 771 ill_t *ill; 772 773 /* 774 * could be one of 775 * (i) real message from the wire, (DLPI_DATA) 776 * (ii) DLPI message 777 * Take a ref on the ill associated with this arl to 778 * prevent the ill from being unplumbed until this thread 779 * is done. 780 */ 781 if (IS_DLPI_DATA(mp)) { 782 ill = arl_to_ill(arl); 783 if (ill == NULL) { 784 arp_drop_packet("No ill", mp, ill); 785 break; 786 } 787 arp_process_packet(ill, mp); 788 ill_refrele(ill); 789 break; 790 } 791 /* Miscellaneous DLPI messages get shuffled off. */ 792 arp_rput_dlpi(q, mp); 793 break; 794 } 795 case M_ERROR: 796 case M_HANGUP: 797 if (mp->b_rptr < mp->b_wptr) 798 arl->arl_error = (int)(*mp->b_rptr & 0xFF); 799 if (arl->arl_error == 0) 800 arl->arl_error = ENXIO; 801 freemsg(mp); 802 break; 803 default: 804 ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp))); 805 putnext(q, mp); 806 break; 807 } 808 if (need_refrele) 809 arl_refrele(arl); 810 } 811 812 static void 813 arp_process_packet(ill_t *ill, mblk_t *mp) 814 { 815 mblk_t *mp1; 816 arh_t *arh; 817 in_addr_t src_paddr, dst_paddr; 818 uint32_t hlen, plen; 819 boolean_t is_probe; 820 int op; 821 ncec_t *dst_ncec, *src_ncec = NULL; 822 uchar_t *src_haddr, *arhp, *dst_haddr, *dp, *sp; 823 int err; 824 ip_stack_t *ipst; 825 boolean_t need_ill_refrele = B_FALSE; 826 nce_t *nce; 827 uchar_t *src_lladdr; 828 dl_unitdata_ind_t *dlui; 829 ip_recv_attr_t iras; 830 831 ASSERT(ill != NULL); 832 if (ill->ill_flags & ILLF_NOARP) { 833 arp_drop_packet("Interface does not support ARP", mp, ill); 834 return; 835 } 836 ipst = ill->ill_ipst; 837 /* 838 * What we should have at this point is a DL_UNITDATA_IND message 839 * followed by an ARP packet. We do some initial checks and then 840 * get to work. 841 */ 842 dlui = (dl_unitdata_ind_t *)mp->b_rptr; 843 if (dlui->dl_group_address == 1) { 844 /* 845 * multicast or broadcast packet. Only accept on the ipmp 846 * nominated interface for multicasts ('cast_ill'). 847 * If we have no cast_ill we are liberal and accept everything. 848 */ 849 if (IS_UNDER_IPMP(ill)) { 850 /* For an under ill_grp can change under lock */ 851 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 852 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 853 ill->ill_grp->ig_cast_ill != NULL) { 854 rw_exit(&ipst->ips_ill_g_lock); 855 arp_drop_packet("Interface is not nominated " 856 "for multicast sends and receives", 857 mp, ill); 858 return; 859 } 860 rw_exit(&ipst->ips_ill_g_lock); 861 } 862 } 863 mp1 = mp->b_cont; 864 if (mp1 == NULL) { 865 arp_drop_packet("Missing ARP packet", mp, ill); 866 return; 867 } 868 if (mp1->b_cont != NULL) { 869 /* No fooling around with funny messages. */ 870 if (!pullupmsg(mp1, -1)) { 871 arp_drop_packet("Funny message: pullup failed", 872 mp, ill); 873 return; 874 } 875 } 876 arh = (arh_t *)mp1->b_rptr; 877 hlen = arh->arh_hlen; 878 plen = arh->arh_plen; 879 if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) { 880 arp_drop_packet("mblk len too small", mp, ill); 881 return; 882 } 883 /* 884 * hlen 0 is used for RFC 1868 UnARP. 885 * 886 * Note that the rest of the code checks that hlen is what we expect 887 * for this hardware address type, so might as well discard packets 888 * here that don't match. 889 */ 890 if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) { 891 DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1); 892 arp_drop_packet("Bogus hlen or plen", mp, ill); 893 return; 894 } 895 /* 896 * Historically, Solaris has been lenient about hardware type numbers. 897 * We should check here, but don't. 898 */ 899 DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh, 900 mblk_t *, mp); 901 /* 902 * If ill is in an ipmp group, it will be the under ill. If we want 903 * to report the packet as coming up the IPMP interface, we should 904 * convert it to the ipmp ill. 905 */ 906 ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in, 907 ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst); 908 DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp); 909 if (mp == NULL) 910 return; 911 arhp = (uchar_t *)arh + ARH_FIXED_LEN; 912 src_haddr = arhp; /* ar$sha */ 913 arhp += hlen; 914 bcopy(arhp, &src_paddr, IP_ADDR_LEN); /* ar$spa */ 915 sp = arhp; 916 arhp += IP_ADDR_LEN; 917 dst_haddr = arhp; /* ar$dha */ 918 arhp += hlen; 919 bcopy(arhp, &dst_paddr, IP_ADDR_LEN); /* ar$tpa */ 920 dp = arhp; 921 op = BE16_TO_U16(arh->arh_operation); 922 923 DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr, 924 (in_addr_t), dst_paddr); 925 926 /* Determine if this is just a probe */ 927 is_probe = (src_paddr == INADDR_ANY); 928 929 /* 930 * The following test for loopback is faster than 931 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 932 * operations. 933 * Note that these addresses are always in network byte order 934 */ 935 if ((*(uint8_t *)&src_paddr) == IN_LOOPBACKNET || 936 (*(uint8_t *)&dst_paddr) == IN_LOOPBACKNET || 937 IN_MULTICAST(src_paddr) || IN_MULTICAST(dst_paddr)) { 938 arp_drop_packet("Martian IP addr", mp, ill); 939 return; 940 } 941 942 /* 943 * ira_ill is the only field used down the arp_notify path. 944 */ 945 bzero(&iras, sizeof (iras)); 946 iras.ira_ill = iras.ira_rill = ill; 947 /* 948 * RFC 826: first check if the <protocol, sender protocol address> is 949 * in the cache, if there is a sender protocol address. Note that this 950 * step also handles resolutions based on source. 951 */ 952 /* Note: after here we need to freeb(mp) and freemsg(mp1) separately */ 953 mp->b_cont = NULL; 954 if (is_probe) { 955 err = AR_NOTFOUND; 956 } else { 957 if (plen != 4) { 958 arp_drop_packet("bad protocol len", mp, ill); 959 return; 960 } 961 err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr, 962 &src_ncec, op); 963 switch (err) { 964 case AR_BOGON: 965 ASSERT(src_ncec != NULL); 966 arp_notify(src_paddr, mp1, AR_CN_BOGON, 967 &iras, src_ncec); 968 break; 969 case AR_FAILED: 970 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 971 src_ncec); 972 break; 973 case AR_LOOPBACK: 974 DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *, 975 arh); 976 freemsg(mp1); 977 break; 978 default: 979 goto update; 980 } 981 freemsg(mp); 982 if (src_ncec != NULL) 983 ncec_refrele(src_ncec); 984 return; 985 } 986 update: 987 /* 988 * Now look up the destination address. By RFC 826, we ignore the 989 * packet at this step if the target isn't one of our addresses (i.e., 990 * one we have been asked to PUBLISH). This is true even if the 991 * target is something we're trying to resolve and the packet 992 * is a response. 993 */ 994 dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr); 995 if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) { 996 /* 997 * Let the client know if the source mapping has changed, even 998 * if the destination provides no useful information for the 999 * client. 1000 */ 1001 if (err == AR_CHANGED) { 1002 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 1003 NULL); 1004 freemsg(mp); 1005 } else { 1006 freemsg(mp); 1007 arp_drop_packet("Target is not interesting", mp1, ill); 1008 } 1009 if (dst_ncec != NULL) 1010 ncec_refrele(dst_ncec); 1011 if (src_ncec != NULL) 1012 ncec_refrele(src_ncec); 1013 return; 1014 } 1015 1016 if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) { 1017 /* 1018 * Check for a reflection. Some misbehaving bridges will 1019 * reflect our own transmitted packets back to us. 1020 */ 1021 ASSERT(NCE_PUBLISH(dst_ncec)); 1022 if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) { 1023 ncec_refrele(dst_ncec); 1024 if (src_ncec != NULL) 1025 ncec_refrele(src_ncec); 1026 freemsg(mp); 1027 arp_drop_packet("bad arh_len", mp1, ill); 1028 return; 1029 } 1030 if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) { 1031 DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill, 1032 arh_t *, arh, ncec_t *, dst_ncec); 1033 ncec_refrele(dst_ncec); 1034 if (src_ncec != NULL) 1035 ncec_refrele(src_ncec); 1036 freemsg(mp); 1037 arp_drop_packet("Reflected probe", mp1, ill); 1038 return; 1039 } 1040 /* 1041 * Responses targeting our HW address that are not responses to 1042 * our DAD probe must be ignored as they are related to requests 1043 * sent before DAD was restarted. 1044 */ 1045 if (op == ARP_RESPONSE && 1046 (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) { 1047 ncec_refrele(dst_ncec); 1048 if (src_ncec != NULL) 1049 ncec_refrele(src_ncec); 1050 freemsg(mp); 1051 arp_drop_packet( 1052 "Response to request that was sent before DAD", 1053 mp1, ill); 1054 return; 1055 } 1056 /* 1057 * Responses targeted to HW addresses which are not ours but 1058 * sent to our unverified proto address are also conflicts. 1059 * These may be reported by a proxy rather than the interface 1060 * with the conflicting address, dst_paddr is in conflict 1061 * rather than src_paddr. To ensure IP can locate the correct 1062 * ipif to take down, it is necessary to copy dst_paddr to 1063 * the src_paddr field before sending it to IP. The same is 1064 * required for probes, where src_paddr will be INADDR_ANY. 1065 */ 1066 if (is_probe || op == ARP_RESPONSE) { 1067 bcopy(dp, sp, plen); 1068 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 1069 NULL); 1070 ncec_delete(dst_ncec); 1071 } else if (err == AR_CHANGED) { 1072 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 1073 NULL); 1074 } else { 1075 DTRACE_PROBE3(rput_request_unverified, 1076 ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec); 1077 arp_drop_packet("Unverified request", mp1, ill); 1078 } 1079 freemsg(mp); 1080 ncec_refrele(dst_ncec); 1081 if (src_ncec != NULL) 1082 ncec_refrele(src_ncec); 1083 return; 1084 } 1085 /* 1086 * If it's a request, then we reply to this, and if we think the 1087 * sender's unknown, then we create an entry to avoid unnecessary ARPs. 1088 * The design assumption is that someone ARPing us is likely to send us 1089 * a packet soon, and that we'll want to reply to it. 1090 */ 1091 if (op == ARP_REQUEST) { 1092 const uchar_t *nce_hwaddr; 1093 struct in_addr nce_paddr; 1094 clock_t now; 1095 ill_t *under_ill = ill; 1096 boolean_t send_unicast = B_TRUE; 1097 1098 ASSERT(NCE_PUBLISH(dst_ncec)); 1099 1100 if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) { 1101 /* 1102 * Ignore senders who are deliberately or accidentally 1103 * confused. 1104 */ 1105 goto bail; 1106 } 1107 1108 if (!is_probe && err == AR_NOTFOUND) { 1109 ASSERT(src_ncec == NULL); 1110 1111 if (IS_UNDER_IPMP(under_ill)) { 1112 /* 1113 * create the ncec for the sender on ipmp_ill. 1114 * We pass in the ipmp_ill itself to avoid 1115 * creating an nce_t on the under_ill. 1116 */ 1117 ill = ipmp_ill_hold_ipmp_ill(under_ill); 1118 if (ill == NULL) 1119 ill = under_ill; 1120 else 1121 need_ill_refrele = B_TRUE; 1122 } 1123 1124 err = nce_lookup_then_add_v4(ill, src_haddr, hlen, 1125 &src_paddr, 0, ND_STALE, &nce); 1126 1127 switch (err) { 1128 case 0: 1129 case EEXIST: 1130 ip1dbg(("added ncec %p in state %d ill %s\n", 1131 (void *)src_ncec, src_ncec->ncec_state, 1132 ill->ill_name)); 1133 src_ncec = nce->nce_common; 1134 break; 1135 default: 1136 /* 1137 * Either no memory, or the outgoing interface 1138 * is in the process of down/unplumb. In the 1139 * latter case, we will fail the send anyway, 1140 * and in the former case, we should try to send 1141 * the ARP response. 1142 */ 1143 src_lladdr = src_haddr; 1144 goto send_response; 1145 } 1146 ncec_refhold(src_ncec); 1147 nce_refrele(nce); 1148 /* set up cleanup interval on ncec */ 1149 } 1150 1151 /* 1152 * This implements periodic address defense based on a modified 1153 * version of the RFC 3927 requirements. Instead of sending a 1154 * broadcasted reply every time, as demanded by the RFC, we 1155 * send at most one broadcast reply per arp_broadcast_interval. 1156 */ 1157 now = ddi_get_lbolt(); 1158 if ((now - dst_ncec->ncec_last_time_defended) > 1159 MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) { 1160 dst_ncec->ncec_last_time_defended = now; 1161 /* 1162 * If this is one of the long-suffering entries, 1163 * pull it out now. It no longer needs separate 1164 * defense, because we're now doing that with this 1165 * broadcasted reply. 1166 */ 1167 dst_ncec->ncec_flags &= ~NCE_F_DELAYED; 1168 send_unicast = B_FALSE; 1169 } 1170 if (src_ncec != NULL && send_unicast) { 1171 src_lladdr = src_ncec->ncec_lladdr; 1172 } else { 1173 src_lladdr = under_ill->ill_bcast_mp->b_rptr + 1174 NCE_LL_ADDR_OFFSET(under_ill); 1175 } 1176 send_response: 1177 nce_hwaddr = dst_ncec->ncec_lladdr; 1178 IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr); 1179 1180 (void) arp_output(under_ill, ARP_RESPONSE, 1181 nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr, 1182 (uchar_t *)&src_paddr, src_lladdr); 1183 } 1184 bail: 1185 if (dst_ncec != NULL) { 1186 ncec_refrele(dst_ncec); 1187 } 1188 if (src_ncec != NULL) { 1189 ncec_refrele(src_ncec); 1190 } 1191 if (err == AR_CHANGED) { 1192 mp->b_cont = NULL; 1193 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL); 1194 mp1 = NULL; 1195 } 1196 if (need_ill_refrele) 1197 ill_refrele(ill); 1198 done: 1199 freemsg(mp); 1200 freemsg(mp1); 1201 } 1202 1203 /* 1204 * Basic initialization of the arl_t and the arl_common structure shared with 1205 * the ill_t that is done after SLIFNAME/IF_UNITSEL. 1206 */ 1207 static int 1208 arl_ill_init(arl_t *arl, char *ill_name) 1209 { 1210 ill_t *ill; 1211 arl_ill_common_t *ai; 1212 1213 ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE, 1214 arl->arl_ipst); 1215 1216 if (ill == NULL) 1217 return (ENXIO); 1218 1219 /* 1220 * By the time we set up the arl, we expect the ETHERTYPE_IP 1221 * stream to be fully bound and attached. So we copy/verify 1222 * relevant information as possible from/against the ill. 1223 * 1224 * The following should have been set up in arp_ll_set_defaults() 1225 * after the first DL_INFO_ACK was received. 1226 */ 1227 ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length); 1228 ASSERT(arl->arl_sap == ETHERTYPE_ARP); 1229 ASSERT(arl->arl_mactype == ill->ill_mactype); 1230 ASSERT(arl->arl_sap_length == ill->ill_sap_length); 1231 1232 ai = kmem_zalloc(sizeof (*ai), KM_SLEEP); 1233 mutex_enter(&ill->ill_lock); 1234 /* First ensure that the ill is not CONDEMNED. */ 1235 if (ill->ill_state_flags & ILL_CONDEMNED) { 1236 mutex_exit(&ill->ill_lock); 1237 ill_refrele(ill); 1238 kmem_free(ai, sizeof (*ai)); 1239 return (ENXIO); 1240 } 1241 if (ill->ill_common != NULL || arl->arl_common != NULL) { 1242 mutex_exit(&ill->ill_lock); 1243 ip0dbg(("%s: PPA already exists", ill->ill_name)); 1244 ill_refrele(ill); 1245 kmem_free(ai, sizeof (*ai)); 1246 return (EEXIST); 1247 } 1248 mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL); 1249 ai->ai_arl = arl; 1250 ai->ai_ill = ill; 1251 ill->ill_common = ai; 1252 arl->arl_common = ai; 1253 mutex_exit(&ill->ill_lock); 1254 (void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ); 1255 arl->arl_name_length = ill->ill_name_length; 1256 ill_refrele(ill); 1257 arp_ifname_notify(arl); 1258 return (0); 1259 } 1260 1261 /* Allocate and do common initializations for DLPI messages. */ 1262 static mblk_t * 1263 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size) 1264 { 1265 mblk_t *mp; 1266 1267 if ((mp = allocb(size, BPRI_HI)) == NULL) 1268 return (NULL); 1269 1270 /* 1271 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter 1272 * of which we don't seem to use) are sent with M_PCPROTO, and 1273 * that other DLPI are M_PROTO. 1274 */ 1275 DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO; 1276 1277 mp->b_wptr = mp->b_rptr + size; 1278 bzero(mp->b_rptr, size); 1279 DL_PRIM(mp) = prim; 1280 return (mp); 1281 } 1282 1283 1284 int 1285 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa) 1286 { 1287 arl_t *arl; 1288 char *cp, ill_name[LIFNAMSIZ]; 1289 1290 if (q->q_next == NULL) 1291 return (EINVAL); 1292 1293 do { 1294 q = q->q_next; 1295 } while (q->q_next != NULL); 1296 cp = q->q_qinfo->qi_minfo->mi_idname; 1297 1298 arl = (arl_t *)q->q_ptr; 1299 (void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa); 1300 arl->arl_ppa = *ppa; 1301 return (arl_ill_init(arl, ill_name)); 1302 } 1303 1304 int 1305 ip_sioctl_slifname_arp(queue_t *q, void *lifreq) 1306 { 1307 arl_t *arl; 1308 struct lifreq *lifr = lifreq; 1309 1310 /* ioctl not valid when IP opened as a device */ 1311 if (q->q_next == NULL) 1312 return (EINVAL); 1313 1314 arl = (arl_t *)q->q_ptr; 1315 arl->arl_ppa = lifr->lifr_ppa; 1316 return (arl_ill_init(arl, lifr->lifr_name)); 1317 } 1318 1319 arl_t * 1320 ill_to_arl(ill_t *ill) 1321 { 1322 arl_ill_common_t *ai = ill->ill_common; 1323 arl_t *arl = NULL; 1324 1325 if (ai == NULL) 1326 return (NULL); 1327 /* 1328 * Find the arl_t that corresponds to this ill_t from the shared 1329 * ill_common structure. We can safely access the ai here as it 1330 * will only be freed in arp_modclose() after we have become 1331 * single-threaded. 1332 */ 1333 mutex_enter(&ai->ai_lock); 1334 if ((arl = ai->ai_arl) != NULL) { 1335 mutex_enter(&arl->arl_lock); 1336 if (!(arl->arl_state_flags & ARL_CONDEMNED)) { 1337 arl_refhold_locked(arl); 1338 mutex_exit(&arl->arl_lock); 1339 } else { 1340 mutex_exit(&arl->arl_lock); 1341 arl = NULL; 1342 } 1343 } 1344 mutex_exit(&ai->ai_lock); 1345 return (arl); 1346 } 1347 1348 ill_t * 1349 arl_to_ill(arl_t *arl) 1350 { 1351 arl_ill_common_t *ai = arl->arl_common; 1352 ill_t *ill = NULL; 1353 1354 if (ai == NULL) { 1355 /* 1356 * happens when the arp stream is just being opened, and 1357 * arl_ill_init has not been executed yet. 1358 */ 1359 return (NULL); 1360 } 1361 /* 1362 * Find the ill_t that corresponds to this arl_t from the shared 1363 * arl_common structure. We can safely access the ai here as it 1364 * will only be freed in arp_modclose() after we have become 1365 * single-threaded. 1366 */ 1367 mutex_enter(&ai->ai_lock); 1368 if ((ill = ai->ai_ill) != NULL) { 1369 mutex_enter(&ill->ill_lock); 1370 if (!ILL_IS_CONDEMNED(ill)) { 1371 ill_refhold_locked(ill); 1372 mutex_exit(&ill->ill_lock); 1373 } else { 1374 mutex_exit(&ill->ill_lock); 1375 ill = NULL; 1376 } 1377 } 1378 mutex_exit(&ai->ai_lock); 1379 return (ill); 1380 } 1381 1382 int 1383 arp_ll_up(ill_t *ill) 1384 { 1385 mblk_t *attach_mp = NULL; 1386 mblk_t *bind_mp = NULL; 1387 mblk_t *unbind_mp = NULL; 1388 arl_t *arl; 1389 1390 ASSERT(IAM_WRITER_ILL(ill)); 1391 arl = ill_to_arl(ill); 1392 1393 DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill); 1394 if (arl == NULL) 1395 return (ENXIO); 1396 DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl); 1397 if ((arl->arl_state_flags & ARL_LL_UP) != 0) { 1398 arl_refrele(arl); 1399 return (0); 1400 } 1401 if (arl->arl_needs_attach) { /* DL_STYLE2 */ 1402 attach_mp = 1403 ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t)); 1404 if (attach_mp == NULL) 1405 goto bad; 1406 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa; 1407 } 1408 1409 /* Allocate and initialize a bind message. */ 1410 bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t)); 1411 if (bind_mp == NULL) 1412 goto bad; 1413 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP; 1414 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 1415 1416 unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t)); 1417 if (unbind_mp == NULL) 1418 goto bad; 1419 if (arl->arl_needs_attach) { 1420 arp_dlpi_send(arl, attach_mp); 1421 } 1422 arl->arl_unbind_mp = unbind_mp; 1423 1424 arl->arl_state_flags |= ARL_LL_BIND_PENDING; 1425 arp_dlpi_send(arl, bind_mp); 1426 arl_refrele(arl); 1427 return (EINPROGRESS); 1428 1429 bad: 1430 freemsg(attach_mp); 1431 freemsg(bind_mp); 1432 freemsg(unbind_mp); 1433 arl_refrele(arl); 1434 return (ENOMEM); 1435 } 1436 1437 /* 1438 * consumes/frees mp 1439 */ 1440 static void 1441 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code, 1442 ip_recv_attr_t *ira, ncec_t *ncec) 1443 { 1444 char hbuf[MAC_STR_LEN]; 1445 char sbuf[INET_ADDRSTRLEN]; 1446 ill_t *ill = ira->ira_ill; 1447 ip_stack_t *ipst = ill->ill_ipst; 1448 arh_t *arh = (arh_t *)mp->b_rptr; 1449 1450 switch (arcn_code) { 1451 case AR_CN_BOGON: 1452 /* 1453 * Someone is sending ARP packets with a source protocol 1454 * address that we have published and for which we believe our 1455 * entry is authoritative and verified to be unique on 1456 * the network. 1457 * 1458 * arp_process_packet() sends AR_CN_FAILED for the case when 1459 * a DAD probe is received and the hardware address of a 1460 * non-authoritative entry has changed. Thus, AR_CN_BOGON 1461 * indicates a real conflict, and we have to do resolution. 1462 * 1463 * We back away quickly from the address if it's from DHCP or 1464 * otherwise temporary and hasn't been used recently (or at 1465 * all). We'd like to include "deprecated" addresses here as 1466 * well (as there's no real reason to defend something we're 1467 * discarding), but IPMP "reuses" this flag to mean something 1468 * other than the standard meaning. 1469 */ 1470 if (ip_nce_conflict(mp, ira, ncec)) { 1471 (void) mac_colon_addr((uint8_t *)(arh + 1), 1472 arh->arh_hlen, hbuf, sizeof (hbuf)); 1473 (void) ip_dot_addr(src, sbuf); 1474 cmn_err(CE_WARN, 1475 "proxy ARP problem? Node '%s' is using %s on %s", 1476 hbuf, sbuf, ill->ill_name); 1477 if (!arp_no_defense) 1478 (void) arp_announce(ncec); 1479 /* 1480 * ncec_last_time_defended has been adjusted in 1481 * ip_nce_conflict. 1482 */ 1483 } else { 1484 ncec_delete(ncec); 1485 } 1486 freemsg(mp); 1487 break; 1488 case AR_CN_ANNOUNCE: { 1489 nce_hw_map_t hwm; 1490 /* 1491 * ARP gives us a copy of any packet where it thinks 1492 * the address has changed, so that we can update our 1493 * caches. We're responsible for caching known answers 1494 * in the current design. We check whether the 1495 * hardware address really has changed in all of our 1496 * entries that have cached this mapping, and if so, we 1497 * blow them away. This way we will immediately pick 1498 * up the rare case of a host changing hardware 1499 * address. 1500 */ 1501 if (src == 0) { 1502 freemsg(mp); 1503 break; 1504 } 1505 hwm.hwm_addr = src; 1506 hwm.hwm_hwlen = arh->arh_hlen; 1507 hwm.hwm_hwaddr = (uchar_t *)(arh + 1); 1508 hwm.hwm_flags = 0; 1509 ncec_walk_common(ipst->ips_ndp4, NULL, 1510 (pfi_t)nce_update_hw_changed, &hwm, B_TRUE); 1511 freemsg(mp); 1512 break; 1513 } 1514 case AR_CN_FAILED: 1515 if (arp_no_defense) { 1516 (void) mac_colon_addr((uint8_t *)(arh + 1), 1517 arh->arh_hlen, hbuf, sizeof (hbuf)); 1518 (void) ip_dot_addr(src, sbuf); 1519 1520 cmn_err(CE_WARN, 1521 "node %s is using our IP address %s on %s", 1522 hbuf, sbuf, ill->ill_name); 1523 freemsg(mp); 1524 break; 1525 } 1526 /* 1527 * mp will be freed by arp_excl. 1528 */ 1529 ill_refhold(ill); 1530 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 1531 return; 1532 default: 1533 ASSERT(0); 1534 freemsg(mp); 1535 break; 1536 } 1537 } 1538 1539 /* 1540 * arp_output is called to transmit an ARP Request or Response. The mapping 1541 * to RFC 826 variables is: 1542 * haddr1 == ar$sha 1543 * paddr1 == ar$spa 1544 * haddr2 == ar$tha 1545 * paddr2 == ar$tpa 1546 * The ARP frame is sent to the ether_dst in dst_lladdr. 1547 */ 1548 static int 1549 arp_output(ill_t *ill, uint32_t operation, 1550 const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2, 1551 const uchar_t *paddr2, uchar_t *dst_lladdr) 1552 { 1553 arh_t *arh; 1554 uint8_t *cp; 1555 uint_t hlen; 1556 uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */ 1557 uint32_t proto = IP_ARP_PROTO_TYPE; 1558 mblk_t *mp; 1559 arl_t *arl; 1560 1561 ASSERT(dst_lladdr != NULL); 1562 hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */ 1563 mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length); 1564 1565 if (mp == NULL) 1566 return (ENOMEM); 1567 1568 /* IFF_NOARP flag is set or link down: do not send arp messages */ 1569 if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) { 1570 freemsg(mp); 1571 return (ENXIO); 1572 } 1573 1574 mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) + 1575 plen + plen, BPRI_MED); 1576 if (mp->b_cont == NULL) { 1577 freeb(mp); 1578 return (ENOMEM); 1579 } 1580 1581 /* Fill in the ARP header. */ 1582 cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); 1583 mp->b_cont->b_rptr = cp; 1584 arh = (arh_t *)cp; 1585 U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware); 1586 U16_TO_BE16(proto, arh->arh_proto); 1587 arh->arh_hlen = (uint8_t)hlen; 1588 arh->arh_plen = (uint8_t)plen; 1589 U16_TO_BE16(operation, arh->arh_operation); 1590 cp += ARH_FIXED_LEN; 1591 bcopy(haddr1, cp, hlen); 1592 cp += hlen; 1593 if (paddr1 == NULL) 1594 bzero(cp, plen); 1595 else 1596 bcopy(paddr1, cp, plen); 1597 cp += plen; 1598 if (haddr2 == NULL) 1599 bzero(cp, hlen); 1600 else 1601 bcopy(haddr2, cp, hlen); 1602 cp += hlen; 1603 bcopy(paddr2, cp, plen); 1604 cp += plen; 1605 mp->b_cont->b_wptr = cp; 1606 1607 DTRACE_PROBE3(arp__physical__out__start, 1608 ill_t *, ill, arh_t *, arh, mblk_t *, mp); 1609 ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event, 1610 ill->ill_ipst->ips_arp_physical_out, 1611 ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont, 1612 ill->ill_ipst); 1613 DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp); 1614 if (mp == NULL) 1615 return (0); 1616 1617 /* Ship it out. */ 1618 arl = ill_to_arl(ill); 1619 if (arl == NULL) { 1620 freemsg(mp); 1621 return (0); 1622 } 1623 if (canputnext(arl->arl_wq)) 1624 putnext(arl->arl_wq, mp); 1625 else 1626 freemsg(mp); 1627 arl_refrele(arl); 1628 return (0); 1629 } 1630 1631 /* 1632 * Process resolve requests. 1633 * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise 1634 * we leave it alone (the caller will check and manage ncec_pcnt in those 1635 * cases.) 1636 */ 1637 int 1638 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill) 1639 { 1640 int err; 1641 const uchar_t *target_hwaddr; 1642 struct in_addr nce_paddr; 1643 uchar_t *dst_lladdr; 1644 boolean_t use_rcnt = !NCE_ISREACHABLE(ncec); 1645 1646 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 1647 ASSERT(!IS_IPMP(ill)); 1648 1649 if (use_rcnt && ncec->ncec_rcnt == 0) { 1650 /* not allowed any more retransmits. */ 1651 return (0); 1652 } 1653 1654 if ((ill->ill_flags & ILLF_NOARP) != 0) 1655 return (0); 1656 1657 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr); 1658 1659 target_hwaddr = 1660 ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1661 1662 if (NCE_ISREACHABLE(ncec)) { 1663 dst_lladdr = ncec->ncec_lladdr; 1664 } else { 1665 dst_lladdr = ill->ill_bcast_mp->b_rptr + 1666 NCE_LL_ADDR_OFFSET(ill); 1667 } 1668 1669 mutex_exit(&ncec->ncec_lock); 1670 err = arp_output(ill, ARP_REQUEST, 1671 ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr, 1672 (uchar_t *)&nce_paddr, dst_lladdr); 1673 mutex_enter(&ncec->ncec_lock); 1674 1675 if (err != 0) { 1676 /* 1677 * Some transient error such as ENOMEM or a down link was 1678 * encountered. If the link has been taken down permanently, 1679 * the ncec will eventually be cleaned up (ipif_down_tail() 1680 * will call ipif_nce_down() and flush the ncec), to terminate 1681 * recurring attempts to send ARP requests. In all other cases, 1682 * allow the caller another chance at success next time. 1683 */ 1684 return (ncec->ncec_ill->ill_reachable_retrans_time); 1685 } 1686 1687 if (use_rcnt) 1688 ncec->ncec_rcnt--; 1689 1690 return (ncec->ncec_ill->ill_reachable_retrans_time); 1691 } 1692 1693 /* return B_TRUE if dropped */ 1694 boolean_t 1695 arp_announce(ncec_t *ncec) 1696 { 1697 ill_t *ill; 1698 int err; 1699 uchar_t *sphys_addr, *bcast_addr; 1700 struct in_addr ncec_addr; 1701 boolean_t need_refrele = B_FALSE; 1702 1703 ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0); 1704 ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0); 1705 1706 if (IS_IPMP(ncec->ncec_ill)) { 1707 /* sent on the cast_ill */ 1708 ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE); 1709 if (ill == NULL) 1710 return (B_TRUE); 1711 need_refrele = B_TRUE; 1712 } else { 1713 ill = ncec->ncec_ill; 1714 } 1715 1716 /* 1717 * broadcast an announce to ill_bcast address. 1718 */ 1719 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1720 1721 sphys_addr = ncec->ncec_lladdr; 1722 bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1723 1724 err = arp_output(ill, ARP_REQUEST, 1725 sphys_addr, (uchar_t *)&ncec_addr, bcast_addr, 1726 (uchar_t *)&ncec_addr, bcast_addr); 1727 1728 if (need_refrele) 1729 ill_refrele(ill); 1730 return (err != 0); 1731 } 1732 1733 /* return B_TRUE if dropped */ 1734 boolean_t 1735 arp_probe(ncec_t *ncec) 1736 { 1737 ill_t *ill; 1738 int err; 1739 struct in_addr ncec_addr; 1740 uchar_t *sphys_addr, *dst_lladdr; 1741 1742 if (IS_IPMP(ncec->ncec_ill)) { 1743 ill = ipmp_ill_get_xmit_ill(ncec->ncec_ill, B_FALSE); 1744 if (ill == NULL) 1745 return (B_TRUE); 1746 } else { 1747 ill = ncec->ncec_ill; 1748 } 1749 1750 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1751 1752 sphys_addr = ncec->ncec_lladdr; 1753 dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1754 err = arp_output(ill, ARP_REQUEST, 1755 sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr); 1756 1757 if (IS_IPMP(ncec->ncec_ill)) 1758 ill_refrele(ill); 1759 return (err != 0); 1760 } 1761 1762 static mblk_t * 1763 arl_unbind(arl_t *arl) 1764 { 1765 mblk_t *mp; 1766 1767 if ((mp = arl->arl_unbind_mp) != NULL) { 1768 arl->arl_unbind_mp = NULL; 1769 arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS; 1770 } 1771 return (mp); 1772 } 1773 1774 int 1775 arp_ll_down(ill_t *ill) 1776 { 1777 arl_t *arl; 1778 mblk_t *unbind_mp; 1779 int err = 0; 1780 boolean_t replumb = (ill->ill_replumbing == 1); 1781 1782 DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill); 1783 if ((arl = ill_to_arl(ill)) == NULL) 1784 return (ENXIO); 1785 DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl); 1786 mutex_enter(&arl->arl_lock); 1787 unbind_mp = arl_unbind(arl); 1788 if (unbind_mp != NULL) { 1789 ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS); 1790 DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp, 1791 arl_t *, arl); 1792 err = EINPROGRESS; 1793 if (replumb) 1794 arl->arl_state_flags |= ARL_LL_REPLUMBING; 1795 } 1796 mutex_exit(&arl->arl_lock); 1797 if (unbind_mp != NULL) 1798 arp_dlpi_send(arl, unbind_mp); 1799 arl_refrele(arl); 1800 return (err); 1801 } 1802 1803 /* ARGSUSED */ 1804 int 1805 arp_close(queue_t *q, int flags) 1806 { 1807 if (WR(q)->q_next != NULL) { 1808 /* This is a module close */ 1809 return (arp_modclose(q->q_ptr)); 1810 } 1811 qprocsoff(q); 1812 q->q_ptr = WR(q)->q_ptr = NULL; 1813 return (0); 1814 } 1815 1816 static int 1817 arp_modclose(arl_t *arl) 1818 { 1819 arl_ill_common_t *ai = arl->arl_common; 1820 ill_t *ill; 1821 queue_t *q = arl->arl_rq; 1822 mblk_t *mp, *nextmp; 1823 ipsq_t *ipsq = NULL; 1824 1825 ill = arl_to_ill(arl); 1826 if (ill != NULL) { 1827 if (!ill_waiter_inc(ill)) { 1828 ill_refrele(ill); 1829 } else { 1830 ill_refrele(ill); 1831 if (ipsq_enter(ill, B_FALSE, NEW_OP)) 1832 ipsq = ill->ill_phyint->phyint_ipsq; 1833 ill_waiter_dcr(ill); 1834 } 1835 if (ipsq == NULL) { 1836 /* 1837 * could not enter the ipsq because ill is already 1838 * marked CONDEMNED. 1839 */ 1840 ill = NULL; 1841 } 1842 } 1843 if (ai != NULL && ipsq == NULL) { 1844 /* 1845 * Either we did not get an ill because it was marked CONDEMNED 1846 * or we could not enter the ipsq because it was unplumbing. 1847 * In both cases, wait for the ill to complete ip_modclose(). 1848 * 1849 * If the arp_modclose happened even before SLIFNAME, the ai 1850 * itself would be NULL, in which case we can complete the close 1851 * without waiting. 1852 */ 1853 mutex_enter(&ai->ai_lock); 1854 while (ai->ai_ill != NULL) 1855 cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock); 1856 mutex_exit(&ai->ai_lock); 1857 } 1858 ASSERT(ill == NULL || IAM_WRITER_ILL(ill)); 1859 1860 mutex_enter(&arl->arl_lock); 1861 /* 1862 * If the ill had completed unplumbing before arp_modclose(), there 1863 * would be no ill (and therefore, no ipsq) to serialize arp_modclose() 1864 * so that we need to explicitly check for ARL_CONDEMNED and back off 1865 * if it is set. 1866 */ 1867 if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) { 1868 mutex_exit(&arl->arl_lock); 1869 ASSERT(ipsq == NULL); 1870 return (0); 1871 } 1872 arl->arl_state_flags |= ARL_CONDEMNED; 1873 1874 /* 1875 * send out all pending dlpi messages, don't wait for the ack (which 1876 * will be ignored in arp_rput when CONDEMNED is set) 1877 * 1878 * We have to check for pending DL_UNBIND_REQ because, in the case 1879 * that ip_modclose() executed before arp_modclose(), the call to 1880 * ill_delete_tail->ipif_arp_down() would have triggered a 1881 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail 1882 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not 1883 * have been processed yet. In this scenario, we cannot reset 1884 * arl_dlpi_pending, because the setting/clearing of arl_state_flags 1885 * related to unbind, and the associated cv_waits must be allowed to 1886 * continue. 1887 */ 1888 if (arl->arl_dlpi_pending != DL_UNBIND_REQ) 1889 arl->arl_dlpi_pending = DL_PRIM_INVAL; 1890 mp = arl->arl_dlpi_deferred; 1891 arl->arl_dlpi_deferred = NULL; 1892 mutex_exit(&arl->arl_lock); 1893 1894 for (; mp != NULL; mp = nextmp) { 1895 nextmp = mp->b_next; 1896 mp->b_next = NULL; 1897 putnext(arl->arl_wq, mp); 1898 } 1899 1900 /* Wait for data paths to quiesce */ 1901 mutex_enter(&arl->arl_lock); 1902 while (arl->arl_refcnt != 0) 1903 cv_wait(&arl->arl_cv, &arl->arl_lock); 1904 1905 /* 1906 * unbind, so that nothing else can come up from driver. 1907 */ 1908 mp = arl_unbind(arl); 1909 mutex_exit(&arl->arl_lock); 1910 if (mp != NULL) 1911 arp_dlpi_send(arl, mp); 1912 mutex_enter(&arl->arl_lock); 1913 1914 /* wait for unbind ack */ 1915 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 1916 cv_wait(&arl->arl_cv, &arl->arl_lock); 1917 mutex_exit(&arl->arl_lock); 1918 1919 qprocsoff(q); 1920 1921 if (ill != NULL) { 1922 mutex_enter(&ill->ill_lock); 1923 ill->ill_arl_dlpi_pending = 0; 1924 mutex_exit(&ill->ill_lock); 1925 } 1926 1927 if (ai != NULL) { 1928 mutex_enter(&ai->ai_lock); 1929 ai->ai_arl = NULL; 1930 if (ai->ai_ill == NULL) { 1931 mutex_destroy(&ai->ai_lock); 1932 kmem_free(ai, sizeof (*ai)); 1933 } else { 1934 mutex_exit(&ai->ai_lock); 1935 } 1936 } 1937 1938 /* free up the rest */ 1939 arp_mod_close_tail(arl); 1940 1941 q->q_ptr = WR(q)->q_ptr = NULL; 1942 1943 if (ipsq != NULL) 1944 ipsq_exit(ipsq); 1945 1946 return (0); 1947 } 1948 1949 static void 1950 arp_mod_close_tail(arl_t *arl) 1951 { 1952 ip_stack_t *ipst = arl->arl_ipst; 1953 mblk_t **mpp; 1954 1955 netstack_hold(ipst->ips_netstack); 1956 1957 mutex_enter(&ipst->ips_ip_mi_lock); 1958 mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl); 1959 mutex_exit(&ipst->ips_ip_mi_lock); 1960 1961 /* 1962 * credp could be null if the open didn't succeed and ip_modopen 1963 * itself calls ip_close. 1964 */ 1965 if (arl->arl_credp != NULL) 1966 crfree(arl->arl_credp); 1967 1968 /* Free all retained control messages. */ 1969 mpp = &arl->arl_first_mp_to_free; 1970 do { 1971 while (mpp[0]) { 1972 mblk_t *mp; 1973 mblk_t *mp1; 1974 1975 mp = mpp[0]; 1976 mpp[0] = mp->b_next; 1977 for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) { 1978 mp1->b_next = NULL; 1979 mp1->b_prev = NULL; 1980 } 1981 freemsg(mp); 1982 } 1983 } while (mpp++ != &arl->arl_last_mp_to_free); 1984 1985 netstack_rele(ipst->ips_netstack); 1986 mi_free(arl->arl_name); 1987 mi_close_free((IDP)arl); 1988 } 1989 1990 /* 1991 * DAD failed. Tear down ipifs with the specified srce address. Note that 1992 * tearing down the ipif also meas deleting the ncec through ipif_down, 1993 * so it is not possible to use nce_timer for recovery. Instead we start 1994 * a timer on the ipif. Caller has to free the mp. 1995 */ 1996 void 1997 arp_failure(mblk_t *mp, ip_recv_attr_t *ira) 1998 { 1999 ill_t *ill = ira->ira_ill; 2000 2001 if ((mp = copymsg(mp)) != NULL) { 2002 ill_refhold(ill); 2003 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 2004 } 2005 } 2006 2007 /* 2008 * This is for exclusive changes due to ARP. Tear down an interface due 2009 * to AR_CN_FAILED and AR_CN_BOGON. 2010 */ 2011 /* ARGSUSED */ 2012 static void 2013 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 2014 { 2015 ill_t *ill = rq->q_ptr; 2016 arh_t *arh; 2017 ipaddr_t src; 2018 ipif_t *ipif; 2019 ip_stack_t *ipst = ill->ill_ipst; 2020 uchar_t *haddr; 2021 uint_t haddrlen; 2022 2023 /* first try src = ar$spa */ 2024 arh = (arh_t *)mp->b_rptr; 2025 bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN); 2026 2027 haddrlen = arh->arh_hlen; 2028 haddr = (uint8_t *)(arh + 1); 2029 2030 if (haddrlen == ill->ill_phys_addr_length) { 2031 /* 2032 * Ignore conflicts generated by misbehaving switches that 2033 * just reflect our own messages back to us. For IPMP, we may 2034 * see reflections across any ill in the illgrp. 2035 */ 2036 /* For an under ill_grp can change under lock */ 2037 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2038 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 2039 IS_UNDER_IPMP(ill) && ill->ill_grp != NULL && 2040 ipmp_illgrp_find_ill(ill->ill_grp, haddr, 2041 haddrlen) != NULL) { 2042 rw_exit(&ipst->ips_ill_g_lock); 2043 goto ignore_conflict; 2044 } 2045 rw_exit(&ipst->ips_ill_g_lock); 2046 } 2047 2048 /* 2049 * Look up the appropriate ipif. 2050 */ 2051 ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst); 2052 if (ipif == NULL) 2053 goto ignore_conflict; 2054 2055 /* Reload the ill to match the ipif */ 2056 ill = ipif->ipif_ill; 2057 2058 /* If it's already duplicate or ineligible, then don't do anything. */ 2059 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 2060 ipif_refrele(ipif); 2061 goto ignore_conflict; 2062 } 2063 2064 /* 2065 * If we failed on a recovery probe, then restart the timer to 2066 * try again later. 2067 */ 2068 if (!ipif->ipif_was_dup) { 2069 char hbuf[MAC_STR_LEN]; 2070 char sbuf[INET_ADDRSTRLEN]; 2071 char ibuf[LIFNAMSIZ]; 2072 2073 (void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)); 2074 (void) ip_dot_addr(src, sbuf); 2075 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 2076 2077 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 2078 " disabled", ibuf, sbuf, hbuf); 2079 } 2080 mutex_enter(&ill->ill_lock); 2081 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 2082 ipif->ipif_flags |= IPIF_DUPLICATE; 2083 ill->ill_ipif_dup_count++; 2084 mutex_exit(&ill->ill_lock); 2085 (void) ipif_down(ipif, NULL, NULL); 2086 (void) ipif_down_tail(ipif); 2087 mutex_enter(&ill->ill_lock); 2088 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 2089 ill->ill_net_type == IRE_IF_RESOLVER && 2090 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 2091 ipst->ips_ip_dup_recovery > 0) { 2092 ASSERT(ipif->ipif_recovery_id == 0); 2093 ipif->ipif_recovery_id = timeout(ipif_dup_recovery, 2094 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 2095 } 2096 mutex_exit(&ill->ill_lock); 2097 ipif_refrele(ipif); 2098 2099 ignore_conflict: 2100 freemsg(mp); 2101 } 2102 2103 /* 2104 * This is a place for a dtrace hook. 2105 * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload, 2106 * or just the ARP packet payload as an M_DATA. 2107 */ 2108 /* ARGSUSED */ 2109 static void 2110 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill) 2111 { 2112 freemsg(mp); 2113 } 2114 2115 static boolean_t 2116 arp_over_driver(queue_t *q) 2117 { 2118 queue_t *qnext = STREAM(q)->sd_wrq->q_next; 2119 2120 /* 2121 * check if first module below stream head is IP or UDP. 2122 */ 2123 ASSERT(qnext != NULL); 2124 if (strcmp(Q2NAME(qnext), "ip") != 0 && 2125 strcmp(Q2NAME(qnext), "udp") != 0) { 2126 /* 2127 * module below is not ip or udp, so arp has been pushed 2128 * on the driver. 2129 */ 2130 return (B_TRUE); 2131 } 2132 return (B_FALSE); 2133 } 2134 2135 static int 2136 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2137 { 2138 int err; 2139 2140 ASSERT(sflag & MODOPEN); 2141 if (!arp_over_driver(q)) { 2142 q->q_qinfo = dummymodinfo.st_rdinit; 2143 WR(q)->q_qinfo = dummymodinfo.st_wrinit; 2144 return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag, 2145 sflag, credp)); 2146 } 2147 err = arp_modopen(q, devp, flag, sflag, credp); 2148 return (err); 2149 } 2150 2151 /* 2152 * In most cases we must be a writer on the IP stream before coming to 2153 * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions 2154 * when we are not a writer are very early duing initialization (in 2155 * arl_init, before the arl has done a SLIFNAME, so that we don't yet know 2156 * the associated ill) or during arp_mod_close, when we could not enter the 2157 * ipsq because the ill has already unplumbed. 2158 */ 2159 static void 2160 arp_dlpi_send(arl_t *arl, mblk_t *mp) 2161 { 2162 mblk_t **mpp; 2163 t_uscalar_t prim; 2164 arl_ill_common_t *ai; 2165 2166 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2167 2168 #ifdef DEBUG 2169 ai = arl->arl_common; 2170 if (ai != NULL) { 2171 mutex_enter(&ai->ai_lock); 2172 if (ai->ai_ill != NULL) 2173 ASSERT(IAM_WRITER_ILL(ai->ai_ill)); 2174 mutex_exit(&ai->ai_lock); 2175 } 2176 #endif /* DEBUG */ 2177 2178 mutex_enter(&arl->arl_lock); 2179 if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { 2180 /* Must queue message. Tail insertion */ 2181 mpp = &arl->arl_dlpi_deferred; 2182 while (*mpp != NULL) 2183 mpp = &((*mpp)->b_next); 2184 2185 *mpp = mp; 2186 mutex_exit(&arl->arl_lock); 2187 return; 2188 } 2189 mutex_exit(&arl->arl_lock); 2190 if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive) 2191 == DL_BIND_REQ) { 2192 ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0); 2193 } 2194 /* 2195 * No need to take the arl_lock to examine ARL_CONDEMNED at this point 2196 * because the only thread that can see ARL_CONDEMNED here is the 2197 * closing arp_modclose() thread which sets the flag after becoming a 2198 * writer on the ipsq. Threads from IP must have finished and 2199 * cannot be active now. 2200 */ 2201 if (!(arl->arl_state_flags & ARL_CONDEMNED) || 2202 (prim == DL_UNBIND_REQ)) { 2203 if (prim != DL_NOTIFY_CONF) { 2204 ill_t *ill = arl_to_ill(arl); 2205 2206 arl->arl_dlpi_pending = prim; 2207 if (ill != NULL) { 2208 mutex_enter(&ill->ill_lock); 2209 ill->ill_arl_dlpi_pending = 1; 2210 mutex_exit(&ill->ill_lock); 2211 ill_refrele(ill); 2212 } 2213 } 2214 } 2215 DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send", 2216 char *, dl_primstr(prim), char *, "-", arl_t *, arl); 2217 putnext(arl->arl_wq, mp); 2218 } 2219 2220 static void 2221 arl_defaults_common(arl_t *arl, mblk_t *mp) 2222 { 2223 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 2224 /* 2225 * Till the ill is fully up the ill is not globally visible. 2226 * So no need for a lock. 2227 */ 2228 arl->arl_mactype = dlia->dl_mac_type; 2229 arl->arl_sap_length = dlia->dl_sap_length; 2230 2231 if (!arl->arl_dlpi_style_set) { 2232 if (dlia->dl_provider_style == DL_STYLE2) 2233 arl->arl_needs_attach = 1; 2234 mutex_enter(&arl->arl_lock); 2235 ASSERT(arl->arl_dlpi_style_set == 0); 2236 arl->arl_dlpi_style_set = 1; 2237 arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING; 2238 cv_broadcast(&arl->arl_cv); 2239 mutex_exit(&arl->arl_lock); 2240 } 2241 } 2242 2243 int 2244 arl_init(queue_t *q, arl_t *arl) 2245 { 2246 mblk_t *info_mp; 2247 dl_info_req_t *dlir; 2248 2249 /* subset of ill_init */ 2250 mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0); 2251 2252 arl->arl_rq = q; 2253 arl->arl_wq = WR(q); 2254 2255 info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)), 2256 BPRI_HI); 2257 if (info_mp == NULL) 2258 return (ENOMEM); 2259 /* 2260 * allocate sufficient space to contain device name. 2261 */ 2262 arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ)); 2263 arl->arl_ppa = UINT_MAX; 2264 arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND); 2265 2266 /* Send down the Info Request to the driver. */ 2267 info_mp->b_datap->db_type = M_PCPROTO; 2268 dlir = (dl_info_req_t *)info_mp->b_rptr; 2269 info_mp->b_wptr = (uchar_t *)&dlir[1]; 2270 dlir->dl_primitive = DL_INFO_REQ; 2271 arl->arl_dlpi_pending = DL_PRIM_INVAL; 2272 qprocson(q); 2273 2274 arp_dlpi_send(arl, info_mp); 2275 return (0); 2276 } 2277 2278 int 2279 arl_wait_for_info_ack(arl_t *arl) 2280 { 2281 int err; 2282 2283 mutex_enter(&arl->arl_lock); 2284 while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) { 2285 /* 2286 * Return value of 0 indicates a pending signal. 2287 */ 2288 err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock); 2289 if (err == 0) { 2290 mutex_exit(&arl->arl_lock); 2291 return (EINTR); 2292 } 2293 } 2294 mutex_exit(&arl->arl_lock); 2295 /* 2296 * ip_rput_other could have set an error in ill_error on 2297 * receipt of M_ERROR. 2298 */ 2299 return (arl->arl_error); 2300 } 2301 2302 void 2303 arl_set_muxid(ill_t *ill, int muxid) 2304 { 2305 arl_t *arl; 2306 2307 arl = ill_to_arl(ill); 2308 if (arl != NULL) { 2309 arl->arl_muxid = muxid; 2310 arl_refrele(arl); 2311 } 2312 } 2313 2314 int 2315 arl_get_muxid(ill_t *ill) 2316 { 2317 arl_t *arl; 2318 int muxid = 0; 2319 2320 arl = ill_to_arl(ill); 2321 if (arl != NULL) { 2322 muxid = arl->arl_muxid; 2323 arl_refrele(arl); 2324 } 2325 return (muxid); 2326 } 2327 2328 static int 2329 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2330 { 2331 int err; 2332 zoneid_t zoneid; 2333 netstack_t *ns; 2334 ip_stack_t *ipst; 2335 arl_t *arl = NULL; 2336 2337 /* 2338 * Prevent unprivileged processes from pushing IP so that 2339 * they can't send raw IP. 2340 */ 2341 if (secpolicy_net_rawaccess(credp) != 0) 2342 return (EPERM); 2343 2344 ns = netstack_find_by_cred(credp); 2345 ASSERT(ns != NULL); 2346 ipst = ns->netstack_ip; 2347 ASSERT(ipst != NULL); 2348 2349 /* 2350 * For exclusive stacks we set the zoneid to zero 2351 * to make IP operate as if in the global zone. 2352 */ 2353 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2354 zoneid = GLOBAL_ZONEID; 2355 else 2356 zoneid = crgetzoneid(credp); 2357 2358 arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t)); 2359 q->q_ptr = WR(q)->q_ptr = arl; 2360 arl->arl_ipst = ipst; 2361 arl->arl_zoneid = zoneid; 2362 err = arl_init(q, arl); 2363 2364 if (err != 0) { 2365 mi_free(arl->arl_name); 2366 mi_free(arl); 2367 netstack_rele(ipst->ips_netstack); 2368 q->q_ptr = NULL; 2369 WR(q)->q_ptr = NULL; 2370 return (err); 2371 } 2372 2373 /* 2374 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent. 2375 */ 2376 err = arl_wait_for_info_ack(arl); 2377 if (err == 0) 2378 arl->arl_credp = credp; 2379 else 2380 goto fail; 2381 2382 crhold(credp); 2383 2384 mutex_enter(&ipst->ips_ip_mi_lock); 2385 err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag, 2386 sflag, credp); 2387 mutex_exit(&ipst->ips_ip_mi_lock); 2388 fail: 2389 if (err) { 2390 (void) arp_close(q, 0); 2391 return (err); 2392 } 2393 return (0); 2394 } 2395 2396 /* 2397 * Notify any downstream modules (esp softmac and hitbox) of the name 2398 * of this interface using an M_CTL. 2399 */ 2400 static void 2401 arp_ifname_notify(arl_t *arl) 2402 { 2403 mblk_t *mp1, *mp2; 2404 struct iocblk *iocp; 2405 struct lifreq *lifr; 2406 2407 if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL) 2408 return; 2409 if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) { 2410 freemsg(mp1); 2411 return; 2412 } 2413 2414 lifr = (struct lifreq *)mp2->b_rptr; 2415 mp2->b_wptr += sizeof (struct lifreq); 2416 bzero(lifr, sizeof (struct lifreq)); 2417 2418 (void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ); 2419 lifr->lifr_ppa = arl->arl_ppa; 2420 lifr->lifr_flags = ILLF_IPV4; 2421 2422 /* Use M_CTL to avoid confusing anyone else who might be listening. */ 2423 DB_TYPE(mp1) = M_CTL; 2424 mp1->b_cont = mp2; 2425 iocp = (struct iocblk *)mp1->b_rptr; 2426 iocp->ioc_count = msgsize(mp1->b_cont); 2427 DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify", 2428 char *, "SIOCSLIFNAME", char *, "-", arl_t *, arl); 2429 putnext(arl->arl_wq, mp1); 2430 } 2431 2432 void 2433 arp_send_replumb_conf(ill_t *ill) 2434 { 2435 mblk_t *mp; 2436 arl_t *arl = ill_to_arl(ill); 2437 2438 if (arl == NULL) 2439 return; 2440 /* 2441 * arl_got_replumb and arl_got_unbind to be cleared after we complete 2442 * arp_cmd_done. 2443 */ 2444 mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO, 2445 DL_NOTIFY_CONF); 2446 ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification = 2447 DL_NOTE_REPLUMB_DONE; 2448 arp_dlpi_send(arl, mp); 2449 mutex_enter(&arl->arl_lock); 2450 arl->arl_state_flags &= ~ARL_LL_REPLUMBING; 2451 mutex_exit(&arl->arl_lock); 2452 arl_refrele(arl); 2453 } 2454 2455 /* 2456 * The unplumb code paths call arp_unbind_complete() to make sure that it is 2457 * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also 2458 * for the arl_refcnt to fall to one so that, when we return from 2459 * arp_unbind_complete(), we know for certain that there are no threads in 2460 * arp_rput() that might access the arl_ill. 2461 */ 2462 void 2463 arp_unbind_complete(ill_t *ill) 2464 { 2465 arl_t *arl = ill_to_arl(ill); 2466 2467 if (arl == NULL) 2468 return; 2469 mutex_enter(&arl->arl_lock); 2470 /* 2471 * wait for unbind ack and arl_refcnt to drop to 1. Note that the 2472 * quiescent arl_refcnt for this function is 1 (and not 0) because 2473 * ill_to_arl() will itself return after taking a ref on the arl_t. 2474 */ 2475 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 2476 cv_wait(&arl->arl_cv, &arl->arl_lock); 2477 while (arl->arl_refcnt != 1) 2478 cv_wait(&arl->arl_cv, &arl->arl_lock); 2479 mutex_exit(&arl->arl_lock); 2480 arl_refrele(arl); 2481 } 2482