1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <inet/ip_arp.h> 27 #include <inet/ip_ndp.h> 28 #include <net/if_arp.h> 29 #include <netinet/if_ether.h> 30 #include <sys/strsubr.h> 31 #include <inet/ip6.h> 32 #include <inet/ip.h> 33 #include <inet/ip_ire.h> 34 #include <inet/ip_if.h> 35 #include <sys/dlpi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsun.h> 38 #include <sys/sdt.h> 39 #include <inet/mi.h> 40 #include <inet/arp.h> 41 #include <inet/ipdrop.h> 42 #include <sys/sockio.h> 43 #include <inet/ip_impl.h> 44 #include <sys/policy.h> 45 46 #define ARL_LL_ADDR_OFFSET(arl) (((arl)->arl_sap_length) < 0 ? \ 47 (sizeof (dl_unitdata_req_t)) : \ 48 ((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length)))) 49 50 /* 51 * MAC-specific intelligence. Shouldn't be needed, but the DL_INFO_ACK 52 * doesn't quite do it for us. 53 */ 54 typedef struct arp_m_s { 55 t_uscalar_t arp_mac_type; 56 uint32_t arp_mac_arp_hw_type; 57 t_scalar_t arp_mac_sap_length; 58 uint32_t arp_mac_hw_addr_length; 59 } arp_m_t; 60 61 static int arp_close(queue_t *, int, cred_t *); 62 static int arp_rput(queue_t *, mblk_t *); 63 static int arp_wput(queue_t *, mblk_t *); 64 static arp_m_t *arp_m_lookup(t_uscalar_t mac_type); 65 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *, 66 ncec_t *); 67 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *, 68 const uchar_t *, const uchar_t *, uchar_t *); 69 static int arp_modclose(arl_t *); 70 static void arp_mod_close_tail(arl_t *); 71 static mblk_t *arl_unbind(arl_t *); 72 static void arp_process_packet(ill_t *, mblk_t *); 73 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *); 74 static void arp_drop_packet(const char *str, mblk_t *, ill_t *); 75 static int arp_open(queue_t *, dev_t *, int, int, cred_t *); 76 static int ip_sioctl_ifunitsel_arp(queue_t *, int *); 77 static int ip_sioctl_slifname_arp(queue_t *, void *); 78 static void arp_dlpi_send(arl_t *, mblk_t *); 79 static void arl_defaults_common(arl_t *, mblk_t *); 80 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *); 81 static void arp_ifname_notify(arl_t *); 82 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *); 83 static arl_t *ill_to_arl(ill_t *); 84 85 #define DL_PRIM(mp) (((union DL_primitives *)(mp)->b_rptr)->dl_primitive) 86 #define IS_DLPI_DATA(mp) \ 87 ((DB_TYPE(mp) == M_PROTO) && \ 88 MBLKL(mp) >= sizeof (dl_unitdata_ind_t) && \ 89 (DL_PRIM(mp) == DL_UNITDATA_IND)) 90 91 #define AR_NOTFOUND 1 /* No matching ace found in cache */ 92 #define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */ 93 #define AR_LOOPBACK 3 /* Our own arp packet was received */ 94 #define AR_BOGON 4 /* Another host has our IP addr. */ 95 #define AR_FAILED 5 /* Duplicate Address Detection has failed */ 96 #define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */ 97 98 boolean_t arp_no_defense; 99 100 struct module_info arp_mod_info = { 101 IP_MOD_ID, "arp", 1, INFPSZ, 65536, 1024 102 }; 103 static struct qinit rinit_arp = { 104 arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info 105 }; 106 static struct qinit winit_arp = { 107 arp_wput, NULL, arp_open, arp_close, NULL, &arp_mod_info 108 }; 109 struct streamtab arpinfo = { 110 &rinit_arp, &winit_arp 111 }; 112 #define ARH_FIXED_LEN 8 113 #define AR_LL_HDR_SLACK 32 114 115 /* 116 * pfhooks for ARP. 117 */ 118 #define ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst) \ 119 \ 120 if ((_hook).he_interested) { \ 121 hook_pkt_event_t info; \ 122 \ 123 info.hpe_protocol = ipst->ips_arp_net_data; \ 124 info.hpe_ifp = _ilp; \ 125 info.hpe_ofp = 0; \ 126 info.hpe_hdr = _hdr; \ 127 info.hpe_mp = &(_fm); \ 128 info.hpe_mb = _m; \ 129 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 130 _event, (hook_data_t)&info) != 0) { \ 131 if (_fm != NULL) { \ 132 freemsg(_fm); \ 133 _fm = NULL; \ 134 } \ 135 _hdr = NULL; \ 136 _m = NULL; \ 137 } else { \ 138 _hdr = info.hpe_hdr; \ 139 _m = info.hpe_mb; \ 140 } \ 141 } 142 143 #define ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst) \ 144 \ 145 if ((_hook).he_interested) { \ 146 hook_pkt_event_t info; \ 147 \ 148 info.hpe_protocol = ipst->ips_arp_net_data; \ 149 info.hpe_ifp = 0; \ 150 info.hpe_ofp = _olp; \ 151 info.hpe_hdr = _hdr; \ 152 info.hpe_mp = &(_fm); \ 153 info.hpe_mb = _m; \ 154 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 155 _event, (hook_data_t)&info) != 0) { \ 156 if (_fm != NULL) { \ 157 freemsg(_fm); \ 158 _fm = NULL; \ 159 } \ 160 _hdr = NULL; \ 161 _m = NULL; \ 162 } else { \ 163 _hdr = info.hpe_hdr; \ 164 _m = info.hpe_mb; \ 165 } \ 166 } 167 168 static arp_m_t arp_m_tbl[] = { 169 { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */ 170 { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */ 171 { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */ 172 { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */ 173 { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */ 174 { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */ 175 { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */ 176 { DL_OTHER, ARPHRD_ETHER, -2, 6} /* unknown */ 177 }; 178 179 static void 180 arl_refhold_locked(arl_t *arl) 181 { 182 ASSERT(MUTEX_HELD(&arl->arl_lock)); 183 arl->arl_refcnt++; 184 ASSERT(arl->arl_refcnt != 0); 185 } 186 187 static void 188 arl_refrele(arl_t *arl) 189 { 190 mutex_enter(&arl->arl_lock); 191 ASSERT(arl->arl_refcnt != 0); 192 arl->arl_refcnt--; 193 if (arl->arl_refcnt > 1) { 194 mutex_exit(&arl->arl_lock); 195 return; 196 } 197 198 /* ill_close or arp_unbind_complete may be waiting */ 199 cv_broadcast(&arl->arl_cv); 200 mutex_exit(&arl->arl_lock); 201 } 202 203 /* 204 * wake up any pending ip ioctls. 205 */ 206 static void 207 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim) 208 { 209 if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing) 210 arp_replumb_done(ill, 0); 211 else 212 arp_bringup_done(ill, err); 213 } 214 215 static int 216 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen, 217 const in_addr_t *src_paddr, ncec_t **sncec, int op) 218 { 219 int retv; 220 ncec_t *ncec; 221 boolean_t ll_changed; 222 uchar_t *lladdr = NULL; 223 int new_state; 224 225 ASSERT(ill != NULL); 226 227 ncec = ncec_lookup_illgrp_v4(ill, src_paddr); 228 *sncec = ncec; 229 230 if (ncec == NULL) { 231 retv = AR_NOTFOUND; 232 goto done; 233 } 234 235 mutex_enter(&ncec->ncec_lock); 236 /* 237 * IP addr and hardware address match what we already 238 * have, then this is a broadcast packet emitted by one of our 239 * interfaces, reflected by the switch and received on another 240 * interface. We return AR_LOOPBACK. 241 */ 242 lladdr = ncec->ncec_lladdr; 243 if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length && 244 bcmp(lladdr, src_haddr, hlen) == 0) { 245 mutex_exit(&ncec->ncec_lock); 246 retv = AR_LOOPBACK; 247 goto done; 248 } 249 /* 250 * If the entry is unverified, then we've just verified that 251 * someone else already owns this address, because this is a 252 * message with the same protocol address but different 253 * hardware address. 254 */ 255 if (ncec->ncec_flags & NCE_F_UNVERIFIED) { 256 mutex_exit(&ncec->ncec_lock); 257 ncec_delete(ncec); 258 ncec_refrele(ncec); 259 *sncec = NULL; 260 retv = AR_FAILED; 261 goto done; 262 } 263 264 /* 265 * If the IP address matches ours and we're authoritative for 266 * this entry, then some other node is using our IP addr, so 267 * return AR_BOGON. Also reset the transmit count to zero so 268 * that, if we're currently in initial announcement mode, we 269 * switch back to the lazier defense mode. Knowing that 270 * there's at least one duplicate out there, we ought not 271 * blindly announce. 272 * 273 * NCE_F_AUTHORITY is set in one of two ways: 274 * 1. /sbin/arp told us so, via the "permanent" flag. 275 * 2. This is one of my addresses. 276 */ 277 if (ncec->ncec_flags & NCE_F_AUTHORITY) { 278 ncec->ncec_unsolicit_count = 0; 279 mutex_exit(&ncec->ncec_lock); 280 retv = AR_BOGON; 281 goto done; 282 } 283 284 /* 285 * No address conflict was detected, and we are getting 286 * ready to update the ncec's hwaddr. The nce MUST NOT be on an 287 * under interface, because all dynamic nce's are created on the 288 * native interface (in the non-IPMP case) or on the IPMP 289 * meta-interface (in the IPMP case) 290 */ 291 ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill)); 292 293 /* 294 * update ncec with src_haddr, hlen. 295 * 296 * We are trying to resolve this ncec_addr/src_paddr and we 297 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr. 298 * So the new_state is at least "STALE". If, in addition, 299 * this a solicited, unicast ARP_RESPONSE, we can transition 300 * to REACHABLE. 301 */ 302 new_state = ND_STALE; 303 ip1dbg(("got info for ncec %p from addr %x\n", 304 (void *)ncec, *src_paddr)); 305 retv = AR_MERGED; 306 if (ncec->ncec_state == ND_INCOMPLETE || 307 ncec->ncec_state == ND_INITIAL) { 308 ll_changed = B_TRUE; 309 } else { 310 ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen); 311 if (!ll_changed) 312 new_state = ND_UNCHANGED; 313 else 314 retv = AR_CHANGED; 315 } 316 /* 317 * We don't have the equivalent of the IPv6 'S' flag indicating 318 * a solicited response, so we assume that if we are in 319 * INCOMPLETE, or got back an unchanged lladdr in PROBE state, 320 * and this is an ARP_RESPONSE, it must be a 321 * solicited response allowing us to transtion to REACHABLE. 322 */ 323 if (op == ARP_RESPONSE) { 324 switch (ncec->ncec_state) { 325 case ND_PROBE: 326 new_state = (ll_changed ? ND_STALE : ND_REACHABLE); 327 break; 328 case ND_INCOMPLETE: 329 new_state = ND_REACHABLE; 330 break; 331 } 332 } 333 /* 334 * Call nce_update() to refresh fastpath information on any 335 * dependent nce_t entries. 336 */ 337 nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL)); 338 mutex_exit(&ncec->ncec_lock); 339 nce_resolv_ok(ncec); 340 done: 341 return (retv); 342 } 343 344 /* Find an entry for a particular MAC type in the arp_m_tbl. */ 345 static arp_m_t * 346 arp_m_lookup(t_uscalar_t mac_type) 347 { 348 arp_m_t *arm; 349 350 for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) { 351 if (arm->arp_mac_type == mac_type) 352 return (arm); 353 } 354 return (NULL); 355 } 356 357 uint32_t 358 arp_hw_type(t_uscalar_t mactype) 359 { 360 arp_m_t *arm; 361 362 if ((arm = arp_m_lookup(mactype)) == NULL) 363 arm = arp_m_lookup(DL_OTHER); 364 return (arm->arp_mac_arp_hw_type); 365 } 366 367 /* 368 * Called when an DLPI control message has been acked; send down the next 369 * queued message (if any). 370 * The DLPI messages of interest being bind, attach and unbind since 371 * these are the only ones sent by ARP via arp_dlpi_send. 372 */ 373 static void 374 arp_dlpi_done(arl_t *arl, ill_t *ill) 375 { 376 mblk_t *mp; 377 int err; 378 t_uscalar_t prim; 379 380 mutex_enter(&arl->arl_lock); 381 prim = arl->arl_dlpi_pending; 382 383 if ((mp = arl->arl_dlpi_deferred) == NULL) { 384 arl->arl_dlpi_pending = DL_PRIM_INVAL; 385 if (arl->arl_state_flags & ARL_LL_DOWN) 386 err = ENETDOWN; 387 else 388 err = 0; 389 mutex_exit(&arl->arl_lock); 390 391 mutex_enter(&ill->ill_lock); 392 ill->ill_arl_dlpi_pending = 0; 393 mutex_exit(&ill->ill_lock); 394 arp_cmd_done(ill, err, prim); 395 return; 396 } 397 398 arl->arl_dlpi_deferred = mp->b_next; 399 mp->b_next = NULL; 400 401 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 402 403 arl->arl_dlpi_pending = DL_PRIM(mp); 404 mutex_exit(&arl->arl_lock); 405 406 mutex_enter(&ill->ill_lock); 407 ill->ill_arl_dlpi_pending = 1; 408 mutex_exit(&ill->ill_lock); 409 410 putnext(arl->arl_wq, mp); 411 } 412 413 /* 414 * This routine is called during module initialization when the DL_INFO_ACK 415 * comes back from the device. We set up defaults for all the device dependent 416 * doo-dads we are going to need. This will leave us ready to roll if we are 417 * attempting auto-configuration. Alternatively, these defaults can be 418 * overridden by initialization procedures possessing higher intelligence. 419 * 420 * Caller will free the mp. 421 */ 422 static void 423 arp_ll_set_defaults(arl_t *arl, mblk_t *mp) 424 { 425 arp_m_t *arm; 426 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 427 428 if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL) 429 arm = arp_m_lookup(DL_OTHER); 430 ASSERT(arm != NULL); 431 432 /* 433 * We initialize based on parameters in the (currently) not too 434 * exhaustive arp_m_tbl. 435 */ 436 if (dlia->dl_version == DL_VERSION_2) { 437 arl->arl_sap_length = dlia->dl_sap_length; 438 arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length; 439 if (dlia->dl_provider_style == DL_STYLE2) 440 arl->arl_needs_attach = 1; 441 } else { 442 arl->arl_sap_length = arm->arp_mac_sap_length; 443 arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length; 444 } 445 /* 446 * Note: the arp_hw_type in the arp header may be derived from 447 * the ill_mac_type and arp_m_lookup(). 448 */ 449 arl->arl_sap = ETHERTYPE_ARP; 450 arl_defaults_common(arl, mp); 451 } 452 453 static int 454 arp_wput(queue_t *q, mblk_t *mp) 455 { 456 int err = EINVAL; 457 struct iocblk *ioc; 458 mblk_t *mp1; 459 460 switch (DB_TYPE(mp)) { 461 case M_IOCTL: 462 ASSERT(q->q_next != NULL); 463 ioc = (struct iocblk *)mp->b_rptr; 464 if (ioc->ioc_cmd != SIOCSLIFNAME && 465 ioc->ioc_cmd != IF_UNITSEL) { 466 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput", 467 char *, "<some ioctl>", char *, "-", 468 arl_t *, (arl_t *)q->q_ptr); 469 putnext(q, mp); 470 break; 471 } 472 if ((mp1 = mp->b_cont) == 0) 473 err = EINVAL; 474 else if (ioc->ioc_cmd == SIOCSLIFNAME) 475 err = ip_sioctl_slifname_arp(q, mp1->b_rptr); 476 else if (ioc->ioc_cmd == IF_UNITSEL) 477 err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr); 478 if (err == 0) 479 miocack(q, mp, 0, 0); 480 else 481 miocnak(q, mp, 0, err); 482 break; 483 default: 484 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default", 485 char *, "default mblk", char *, "-", 486 arl_t *, (arl_t *)q->q_ptr); 487 putnext(q, mp); 488 break; 489 } 490 return (0); 491 } 492 493 /* 494 * similar to ill_dlpi_pending(): verify that the received DLPI response 495 * matches the one that is pending for the arl. 496 */ 497 static boolean_t 498 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim) 499 { 500 t_uscalar_t pending; 501 502 mutex_enter(&arl->arl_lock); 503 if (arl->arl_dlpi_pending == prim) { 504 mutex_exit(&arl->arl_lock); 505 return (B_TRUE); 506 } 507 508 if (arl->arl_state_flags & ARL_CONDEMNED) { 509 mutex_exit(&arl->arl_lock); 510 return (B_FALSE); 511 } 512 pending = arl->arl_dlpi_pending; 513 mutex_exit(&arl->arl_lock); 514 515 if (pending == DL_PRIM_INVAL) { 516 ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s", 517 dl_primstr(prim), arl->arl_name)); 518 } else { 519 ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s", 520 dl_primstr(prim), arl->arl_name, dl_primstr(pending))); 521 } 522 return (B_FALSE); 523 } 524 525 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */ 526 static void 527 arp_rput_dlpi(queue_t *q, mblk_t *mp) 528 { 529 arl_t *arl = (arl_t *)q->q_ptr; 530 union DL_primitives *dlp; 531 t_uscalar_t prim; 532 t_uscalar_t reqprim = DL_PRIM_INVAL; 533 ill_t *ill; 534 535 if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) { 536 putnext(q, mp); 537 return; 538 } 539 dlp = (union DL_primitives *)mp->b_rptr; 540 prim = dlp->dl_primitive; 541 542 /* 543 * If we received an ACK but didn't send a request for it, then it 544 * can't be part of any pending operation; discard up-front. 545 */ 546 switch (prim) { 547 case DL_ERROR_ACK: 548 /* 549 * ce is confused about how DLPI works, so we have to interpret 550 * an "error" on DL_NOTIFY_ACK (which we never could have sent) 551 * as really meaning an error on DL_NOTIFY_REQ. 552 * 553 * Note that supporting DL_NOTIFY_REQ is optional, so printing 554 * out an error message on the console isn't warranted except 555 * for debug. 556 */ 557 if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK || 558 dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) { 559 reqprim = DL_NOTIFY_REQ; 560 } else { 561 reqprim = dlp->error_ack.dl_error_primitive; 562 } 563 break; 564 case DL_INFO_ACK: 565 reqprim = DL_INFO_REQ; 566 break; 567 case DL_OK_ACK: 568 reqprim = dlp->ok_ack.dl_correct_primitive; 569 break; 570 case DL_BIND_ACK: 571 reqprim = DL_BIND_REQ; 572 break; 573 default: 574 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 575 union DL_primitives *, dlp); 576 putnext(q, mp); 577 return; 578 } 579 if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) { 580 freemsg(mp); 581 return; 582 } 583 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received", 584 char *, dl_primstr(prim), char *, dl_primstr(reqprim), 585 arl_t *, arl); 586 587 ASSERT(prim != DL_NOTIFY_IND); 588 589 ill = arl_to_ill(arl); 590 591 switch (reqprim) { 592 case DL_INFO_REQ: 593 /* 594 * ill has not been set up yet for this case. This is the 595 * DL_INFO_ACK for the first DL_INFO_REQ sent from 596 * arp_modopen(). There should be no other arl_dlpi_deferred 597 * messages pending. We initialize the arl here. 598 */ 599 ASSERT(!arl->arl_dlpi_style_set); 600 ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ); 601 ASSERT(arl->arl_dlpi_deferred == NULL); 602 arl->arl_dlpi_pending = DL_PRIM_INVAL; 603 arp_ll_set_defaults(arl, mp); 604 freemsg(mp); 605 return; 606 case DL_UNBIND_REQ: 607 mutex_enter(&arl->arl_lock); 608 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 609 /* 610 * This is not an error, so we don't set ARL_LL_DOWN 611 */ 612 arl->arl_state_flags &= ~ARL_LL_UP; 613 arl->arl_state_flags |= ARL_LL_UNBOUND; 614 if (arl->arl_state_flags & ARL_CONDEMNED) { 615 /* 616 * if this is part of the unplumb the arl may 617 * vaporize any moment after we cv_signal the 618 * arl_cv so we reset arl_dlpi_pending here. 619 * All other cases (including replumb) will 620 * have the arl_dlpi_pending reset in 621 * arp_dlpi_done. 622 */ 623 arl->arl_dlpi_pending = DL_PRIM_INVAL; 624 } 625 cv_signal(&arl->arl_cv); 626 mutex_exit(&arl->arl_lock); 627 break; 628 } 629 if (ill != NULL) { 630 /* 631 * ill ref obtained by arl_to_ill() will be released 632 * by qwriter_ip() 633 */ 634 qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer, 635 CUR_OP, B_TRUE); 636 return; 637 } 638 freemsg(mp); 639 } 640 641 /* 642 * Handling of DLPI messages that require exclusive access to the ipsq. 643 */ 644 /* ARGSUSED */ 645 static void 646 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) 647 { 648 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 649 ill_t *ill = (ill_t *)q->q_ptr; 650 arl_t *arl = ill_to_arl(ill); 651 652 if (arl == NULL) { 653 /* 654 * happens as a result arp_modclose triggering unbind. 655 * arp_rput_dlpi will cv_signal the arl_cv and the modclose 656 * will complete, but when it does ipsq_exit, the waiting 657 * qwriter_ip gets into the ipsq but will find the arl null. 658 * There should be no deferred messages in this case, so 659 * just complete and exit. 660 */ 661 arp_cmd_done(ill, 0, DL_UNBIND_REQ); 662 freemsg(mp); 663 return; 664 } 665 switch (dlp->dl_primitive) { 666 case DL_ERROR_ACK: 667 switch (dlp->error_ack.dl_error_primitive) { 668 case DL_UNBIND_REQ: 669 mutex_enter(&arl->arl_lock); 670 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 671 arl->arl_state_flags &= ~ARL_LL_UP; 672 arl->arl_state_flags |= ARL_LL_UNBOUND; 673 arl->arl_state_flags |= ARL_LL_DOWN; 674 cv_signal(&arl->arl_cv); 675 mutex_exit(&arl->arl_lock); 676 break; 677 case DL_BIND_REQ: 678 mutex_enter(&arl->arl_lock); 679 arl->arl_state_flags &= ~ARL_LL_UP; 680 arl->arl_state_flags |= ARL_LL_DOWN; 681 arl->arl_state_flags |= ARL_LL_UNBOUND; 682 cv_signal(&arl->arl_cv); 683 mutex_exit(&arl->arl_lock); 684 break; 685 case DL_ATTACH_REQ: 686 break; 687 default: 688 /* If it's anything else, we didn't send it. */ 689 arl_refrele(arl); 690 putnext(q, mp); 691 return; 692 } 693 break; 694 case DL_OK_ACK: 695 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok", 696 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 697 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 698 arl_t *, arl); 699 mutex_enter(&arl->arl_lock); 700 switch (dlp->ok_ack.dl_correct_primitive) { 701 case DL_UNBIND_REQ: 702 case DL_ATTACH_REQ: 703 break; 704 default: 705 ip0dbg(("Dropping unrecognized DL_OK_ACK for %s", 706 dl_primstr(dlp->ok_ack.dl_correct_primitive))); 707 mutex_exit(&arl->arl_lock); 708 arl_refrele(arl); 709 freemsg(mp); 710 return; 711 } 712 mutex_exit(&arl->arl_lock); 713 break; 714 case DL_BIND_ACK: 715 DTRACE_PROBE2(rput_dl_bind, arl_t *, arl, 716 dl_bind_ack_t *, &dlp->bind_ack); 717 718 mutex_enter(&arl->arl_lock); 719 ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING); 720 arl->arl_state_flags &= 721 ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND); 722 arl->arl_state_flags |= ARL_LL_UP; 723 mutex_exit(&arl->arl_lock); 724 break; 725 case DL_UDERROR_IND: 726 DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl, 727 dl_uderror_ind_t *, &dlp->uderror_ind); 728 arl_refrele(arl); 729 putnext(q, mp); 730 return; 731 default: 732 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 733 union DL_primitives *, dlp); 734 arl_refrele(arl); 735 putnext(q, mp); 736 return; 737 } 738 arp_dlpi_done(arl, ill); 739 arl_refrele(arl); 740 freemsg(mp); 741 } 742 743 int 744 arp_rput(queue_t *q, mblk_t *mp) 745 { 746 arl_t *arl = q->q_ptr; 747 boolean_t need_refrele = B_FALSE; 748 749 mutex_enter(&arl->arl_lock); 750 if (((arl->arl_state_flags & 751 (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) { 752 /* 753 * Only allow high priority DLPI messages during unplumb or 754 * replumb, and we don't take an arl_refcnt for that case. 755 */ 756 if (DB_TYPE(mp) != M_PCPROTO) { 757 mutex_exit(&arl->arl_lock); 758 freemsg(mp); 759 return (0); 760 } 761 } else { 762 arl_refhold_locked(arl); 763 need_refrele = B_TRUE; 764 } 765 mutex_exit(&arl->arl_lock); 766 767 switch (DB_TYPE(mp)) { 768 case M_PCPROTO: 769 case M_PROTO: { 770 ill_t *ill; 771 772 /* 773 * could be one of 774 * (i) real message from the wire, (DLPI_DATA) 775 * (ii) DLPI message 776 * Take a ref on the ill associated with this arl to 777 * prevent the ill from being unplumbed until this thread 778 * is done. 779 */ 780 if (IS_DLPI_DATA(mp)) { 781 ill = arl_to_ill(arl); 782 if (ill == NULL) { 783 arp_drop_packet("No ill", mp, ill); 784 break; 785 } 786 arp_process_packet(ill, mp); 787 ill_refrele(ill); 788 break; 789 } 790 /* Miscellaneous DLPI messages get shuffled off. */ 791 arp_rput_dlpi(q, mp); 792 break; 793 } 794 case M_ERROR: 795 case M_HANGUP: 796 if (mp->b_rptr < mp->b_wptr) 797 arl->arl_error = (int)(*mp->b_rptr & 0xFF); 798 if (arl->arl_error == 0) 799 arl->arl_error = ENXIO; 800 freemsg(mp); 801 break; 802 default: 803 ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp))); 804 putnext(q, mp); 805 break; 806 } 807 if (need_refrele) 808 arl_refrele(arl); 809 return (0); 810 } 811 812 static void 813 arp_process_packet(ill_t *ill, mblk_t *mp) 814 { 815 mblk_t *mp1; 816 arh_t *arh; 817 in_addr_t src_paddr, dst_paddr; 818 uint32_t hlen, plen; 819 boolean_t is_probe; 820 int op; 821 ncec_t *dst_ncec, *src_ncec = NULL; 822 uchar_t *src_haddr, *arhp, *dst_haddr, *dp, *sp; 823 int err; 824 ip_stack_t *ipst; 825 boolean_t need_ill_refrele = B_FALSE; 826 nce_t *nce; 827 uchar_t *src_lladdr; 828 dl_unitdata_ind_t *dlui; 829 ip_recv_attr_t iras; 830 831 ASSERT(ill != NULL); 832 if (ill->ill_flags & ILLF_NOARP) { 833 arp_drop_packet("Interface does not support ARP", mp, ill); 834 return; 835 } 836 ipst = ill->ill_ipst; 837 /* 838 * What we should have at this point is a DL_UNITDATA_IND message 839 * followed by an ARP packet. We do some initial checks and then 840 * get to work. 841 */ 842 dlui = (dl_unitdata_ind_t *)mp->b_rptr; 843 if (dlui->dl_group_address == 1) { 844 /* 845 * multicast or broadcast packet. Only accept on the ipmp 846 * nominated interface for multicasts ('cast_ill'). 847 * If we have no cast_ill we are liberal and accept everything. 848 */ 849 if (IS_UNDER_IPMP(ill)) { 850 /* For an under ill_grp can change under lock */ 851 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 852 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 853 ill->ill_grp->ig_cast_ill != NULL) { 854 rw_exit(&ipst->ips_ill_g_lock); 855 arp_drop_packet("Interface is not nominated " 856 "for multicast sends and receives", 857 mp, ill); 858 return; 859 } 860 rw_exit(&ipst->ips_ill_g_lock); 861 } 862 } 863 mp1 = mp->b_cont; 864 if (mp1 == NULL) { 865 arp_drop_packet("Missing ARP packet", mp, ill); 866 return; 867 } 868 if (mp1->b_cont != NULL) { 869 /* No fooling around with funny messages. */ 870 if (!pullupmsg(mp1, -1)) { 871 arp_drop_packet("Funny message: pullup failed", 872 mp, ill); 873 return; 874 } 875 } 876 arh = (arh_t *)mp1->b_rptr; 877 hlen = arh->arh_hlen; 878 plen = arh->arh_plen; 879 if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) { 880 arp_drop_packet("mblk len too small", mp, ill); 881 return; 882 } 883 /* 884 * hlen 0 is used for RFC 1868 UnARP. 885 * 886 * Note that the rest of the code checks that hlen is what we expect 887 * for this hardware address type, so might as well discard packets 888 * here that don't match. 889 */ 890 if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) { 891 DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1); 892 arp_drop_packet("Bogus hlen or plen", mp, ill); 893 return; 894 } 895 /* 896 * Historically, Solaris has been lenient about hardware type numbers. 897 * We should check here, but don't. 898 */ 899 DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh, 900 mblk_t *, mp); 901 /* 902 * If ill is in an ipmp group, it will be the under ill. If we want 903 * to report the packet as coming up the IPMP interface, we should 904 * convert it to the ipmp ill. 905 */ 906 ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in, 907 ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst); 908 DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp); 909 if (mp == NULL) 910 return; 911 arhp = (uchar_t *)arh + ARH_FIXED_LEN; 912 src_haddr = arhp; /* ar$sha */ 913 arhp += hlen; 914 bcopy(arhp, &src_paddr, IP_ADDR_LEN); /* ar$spa */ 915 sp = arhp; 916 arhp += IP_ADDR_LEN; 917 dst_haddr = arhp; /* ar$dha */ 918 arhp += hlen; 919 bcopy(arhp, &dst_paddr, IP_ADDR_LEN); /* ar$tpa */ 920 dp = arhp; 921 op = BE16_TO_U16(arh->arh_operation); 922 923 DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr, 924 (in_addr_t), dst_paddr); 925 926 /* Determine if this is just a probe */ 927 is_probe = (src_paddr == INADDR_ANY); 928 929 /* 930 * The following test for loopback is faster than 931 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 932 * operations. 933 * Note that these addresses are always in network byte order 934 */ 935 if ((*(uint8_t *)&src_paddr) == IN_LOOPBACKNET || 936 (*(uint8_t *)&dst_paddr) == IN_LOOPBACKNET || 937 CLASSD(src_paddr) || CLASSD(dst_paddr)) { 938 arp_drop_packet("Martian IP addr", mp, ill); 939 return; 940 } 941 942 /* 943 * ira_ill is the only field used down the arp_notify path. 944 */ 945 bzero(&iras, sizeof (iras)); 946 iras.ira_ill = iras.ira_rill = ill; 947 /* 948 * RFC 826: first check if the <protocol, sender protocol address> is 949 * in the cache, if there is a sender protocol address. Note that this 950 * step also handles resolutions based on source. 951 */ 952 /* Note: after here we need to freeb(mp) and freemsg(mp1) separately */ 953 mp->b_cont = NULL; 954 if (is_probe) { 955 err = AR_NOTFOUND; 956 } else { 957 if (plen != 4) { 958 arp_drop_packet("bad protocol len", mp, ill); 959 return; 960 } 961 err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr, 962 &src_ncec, op); 963 switch (err) { 964 case AR_BOGON: 965 ASSERT(src_ncec != NULL); 966 arp_notify(src_paddr, mp1, AR_CN_BOGON, 967 &iras, src_ncec); 968 break; 969 case AR_FAILED: 970 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 971 src_ncec); 972 break; 973 case AR_LOOPBACK: 974 DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *, 975 arh); 976 freemsg(mp1); 977 break; 978 default: 979 goto update; 980 } 981 freemsg(mp); 982 if (src_ncec != NULL) 983 ncec_refrele(src_ncec); 984 return; 985 } 986 update: 987 /* 988 * Now look up the destination address. By RFC 826, we ignore the 989 * packet at this step if the target isn't one of our addresses (i.e., 990 * one we have been asked to PUBLISH). This is true even if the 991 * target is something we're trying to resolve and the packet 992 * is a response. 993 */ 994 dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr); 995 if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) { 996 /* 997 * Let the client know if the source mapping has changed, even 998 * if the destination provides no useful information for the 999 * client. 1000 */ 1001 if (err == AR_CHANGED) { 1002 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 1003 NULL); 1004 freemsg(mp); 1005 } else { 1006 freemsg(mp); 1007 arp_drop_packet("Target is not interesting", mp1, ill); 1008 } 1009 if (dst_ncec != NULL) 1010 ncec_refrele(dst_ncec); 1011 if (src_ncec != NULL) 1012 ncec_refrele(src_ncec); 1013 return; 1014 } 1015 1016 if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) { 1017 /* 1018 * Check for a reflection. Some misbehaving bridges will 1019 * reflect our own transmitted packets back to us. 1020 */ 1021 ASSERT(NCE_PUBLISH(dst_ncec)); 1022 if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) { 1023 ncec_refrele(dst_ncec); 1024 if (src_ncec != NULL) 1025 ncec_refrele(src_ncec); 1026 freemsg(mp); 1027 arp_drop_packet("bad arh_len", mp1, ill); 1028 return; 1029 } 1030 if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) { 1031 DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill, 1032 arh_t *, arh, ncec_t *, dst_ncec); 1033 ncec_refrele(dst_ncec); 1034 if (src_ncec != NULL) 1035 ncec_refrele(src_ncec); 1036 freemsg(mp); 1037 arp_drop_packet("Reflected probe", mp1, ill); 1038 return; 1039 } 1040 /* 1041 * Responses targeting our HW address that are not responses to 1042 * our DAD probe must be ignored as they are related to requests 1043 * sent before DAD was restarted. 1044 */ 1045 if (op == ARP_RESPONSE && 1046 (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) { 1047 ncec_refrele(dst_ncec); 1048 if (src_ncec != NULL) 1049 ncec_refrele(src_ncec); 1050 freemsg(mp); 1051 arp_drop_packet( 1052 "Response to request that was sent before DAD", 1053 mp1, ill); 1054 return; 1055 } 1056 /* 1057 * Responses targeted to HW addresses which are not ours but 1058 * sent to our unverified proto address are also conflicts. 1059 * These may be reported by a proxy rather than the interface 1060 * with the conflicting address, dst_paddr is in conflict 1061 * rather than src_paddr. To ensure IP can locate the correct 1062 * ipif to take down, it is necessary to copy dst_paddr to 1063 * the src_paddr field before sending it to IP. The same is 1064 * required for probes, where src_paddr will be INADDR_ANY. 1065 */ 1066 if (is_probe || op == ARP_RESPONSE) { 1067 bcopy(dp, sp, plen); 1068 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 1069 NULL); 1070 ncec_delete(dst_ncec); 1071 } else if (err == AR_CHANGED) { 1072 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 1073 NULL); 1074 } else { 1075 DTRACE_PROBE3(rput_request_unverified, 1076 ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec); 1077 arp_drop_packet("Unverified request", mp1, ill); 1078 } 1079 freemsg(mp); 1080 ncec_refrele(dst_ncec); 1081 if (src_ncec != NULL) 1082 ncec_refrele(src_ncec); 1083 return; 1084 } 1085 /* 1086 * If it's a request, then we reply to this, and if we think the 1087 * sender's unknown, then we create an entry to avoid unnecessary ARPs. 1088 * The design assumption is that someone ARPing us is likely to send us 1089 * a packet soon, and that we'll want to reply to it. 1090 */ 1091 if (op == ARP_REQUEST) { 1092 const uchar_t *nce_hwaddr; 1093 struct in_addr nce_paddr; 1094 clock_t now; 1095 ill_t *under_ill = ill; 1096 boolean_t send_unicast = B_TRUE; 1097 1098 ASSERT(NCE_PUBLISH(dst_ncec)); 1099 1100 if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) { 1101 /* 1102 * Ignore senders who are deliberately or accidentally 1103 * confused. 1104 */ 1105 goto bail; 1106 } 1107 1108 if (!is_probe && err == AR_NOTFOUND) { 1109 ASSERT(src_ncec == NULL); 1110 1111 if (IS_UNDER_IPMP(under_ill)) { 1112 /* 1113 * create the ncec for the sender on ipmp_ill. 1114 * We pass in the ipmp_ill itself to avoid 1115 * creating an nce_t on the under_ill. 1116 */ 1117 ill = ipmp_ill_hold_ipmp_ill(under_ill); 1118 if (ill == NULL) 1119 ill = under_ill; 1120 else 1121 need_ill_refrele = B_TRUE; 1122 } 1123 1124 err = nce_lookup_then_add_v4(ill, src_haddr, hlen, 1125 &src_paddr, 0, ND_STALE, &nce); 1126 1127 switch (err) { 1128 case 0: 1129 case EEXIST: 1130 ip1dbg(("added ncec %p in state %d ill %s\n", 1131 (void *)src_ncec, src_ncec->ncec_state, 1132 ill->ill_name)); 1133 src_ncec = nce->nce_common; 1134 break; 1135 default: 1136 /* 1137 * Either no memory, or the outgoing interface 1138 * is in the process of down/unplumb. In the 1139 * latter case, we will fail the send anyway, 1140 * and in the former case, we should try to send 1141 * the ARP response. 1142 */ 1143 src_lladdr = src_haddr; 1144 goto send_response; 1145 } 1146 ncec_refhold(src_ncec); 1147 nce_refrele(nce); 1148 /* set up cleanup interval on ncec */ 1149 } 1150 1151 /* 1152 * This implements periodic address defense based on a modified 1153 * version of the RFC 3927 requirements. Instead of sending a 1154 * broadcasted reply every time, as demanded by the RFC, we 1155 * send at most one broadcast reply per arp_broadcast_interval. 1156 */ 1157 now = ddi_get_lbolt(); 1158 if ((now - dst_ncec->ncec_last_time_defended) > 1159 MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) { 1160 dst_ncec->ncec_last_time_defended = now; 1161 /* 1162 * If this is one of the long-suffering entries, 1163 * pull it out now. It no longer needs separate 1164 * defense, because we're now doing that with this 1165 * broadcasted reply. 1166 */ 1167 dst_ncec->ncec_flags &= ~NCE_F_DELAYED; 1168 send_unicast = B_FALSE; 1169 } 1170 if (src_ncec != NULL && send_unicast) { 1171 src_lladdr = src_ncec->ncec_lladdr; 1172 } else { 1173 src_lladdr = under_ill->ill_bcast_mp->b_rptr + 1174 NCE_LL_ADDR_OFFSET(under_ill); 1175 } 1176 send_response: 1177 nce_hwaddr = dst_ncec->ncec_lladdr; 1178 IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr); 1179 1180 (void) arp_output(under_ill, ARP_RESPONSE, 1181 nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr, 1182 (uchar_t *)&src_paddr, src_lladdr); 1183 } 1184 bail: 1185 if (dst_ncec != NULL) { 1186 ncec_refrele(dst_ncec); 1187 } 1188 if (src_ncec != NULL) { 1189 ncec_refrele(src_ncec); 1190 } 1191 if (err == AR_CHANGED) { 1192 mp->b_cont = NULL; 1193 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL); 1194 mp1 = NULL; 1195 } 1196 if (need_ill_refrele) 1197 ill_refrele(ill); 1198 done: 1199 freemsg(mp); 1200 freemsg(mp1); 1201 } 1202 1203 /* 1204 * Basic initialization of the arl_t and the arl_common structure shared with 1205 * the ill_t that is done after SLIFNAME/IF_UNITSEL. 1206 */ 1207 static int 1208 arl_ill_init(arl_t *arl, char *ill_name) 1209 { 1210 ill_t *ill; 1211 arl_ill_common_t *ai; 1212 1213 ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE, 1214 arl->arl_ipst); 1215 1216 if (ill == NULL) 1217 return (ENXIO); 1218 1219 /* 1220 * By the time we set up the arl, we expect the ETHERTYPE_IP 1221 * stream to be fully bound and attached. So we copy/verify 1222 * relevant information as possible from/against the ill. 1223 * 1224 * The following should have been set up in arp_ll_set_defaults() 1225 * after the first DL_INFO_ACK was received. 1226 */ 1227 ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length); 1228 ASSERT(arl->arl_sap == ETHERTYPE_ARP); 1229 ASSERT(arl->arl_mactype == ill->ill_mactype); 1230 ASSERT(arl->arl_sap_length == ill->ill_sap_length); 1231 1232 ai = kmem_zalloc(sizeof (*ai), KM_SLEEP); 1233 mutex_enter(&ill->ill_lock); 1234 /* First ensure that the ill is not CONDEMNED. */ 1235 if (ill->ill_state_flags & ILL_CONDEMNED) { 1236 mutex_exit(&ill->ill_lock); 1237 ill_refrele(ill); 1238 kmem_free(ai, sizeof (*ai)); 1239 return (ENXIO); 1240 } 1241 if (ill->ill_common != NULL || arl->arl_common != NULL) { 1242 mutex_exit(&ill->ill_lock); 1243 ip0dbg(("%s: PPA already exists", ill->ill_name)); 1244 ill_refrele(ill); 1245 kmem_free(ai, sizeof (*ai)); 1246 return (EEXIST); 1247 } 1248 mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL); 1249 ai->ai_arl = arl; 1250 ai->ai_ill = ill; 1251 ill->ill_common = ai; 1252 arl->arl_common = ai; 1253 mutex_exit(&ill->ill_lock); 1254 (void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ); 1255 arl->arl_name_length = ill->ill_name_length; 1256 ill_refrele(ill); 1257 arp_ifname_notify(arl); 1258 return (0); 1259 } 1260 1261 /* Allocate and do common initializations for DLPI messages. */ 1262 static mblk_t * 1263 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size) 1264 { 1265 mblk_t *mp; 1266 1267 if ((mp = allocb(size, BPRI_HI)) == NULL) 1268 return (NULL); 1269 1270 /* 1271 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter 1272 * of which we don't seem to use) are sent with M_PCPROTO, and 1273 * that other DLPI are M_PROTO. 1274 */ 1275 DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO; 1276 1277 mp->b_wptr = mp->b_rptr + size; 1278 bzero(mp->b_rptr, size); 1279 DL_PRIM(mp) = prim; 1280 return (mp); 1281 } 1282 1283 1284 int 1285 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa) 1286 { 1287 arl_t *arl; 1288 char *cp, ill_name[LIFNAMSIZ]; 1289 1290 if (q->q_next == NULL) 1291 return (EINVAL); 1292 1293 do { 1294 q = q->q_next; 1295 } while (q->q_next != NULL); 1296 cp = q->q_qinfo->qi_minfo->mi_idname; 1297 1298 arl = (arl_t *)q->q_ptr; 1299 (void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa); 1300 arl->arl_ppa = *ppa; 1301 return (arl_ill_init(arl, ill_name)); 1302 } 1303 1304 int 1305 ip_sioctl_slifname_arp(queue_t *q, void *lifreq) 1306 { 1307 arl_t *arl; 1308 struct lifreq *lifr = lifreq; 1309 1310 /* ioctl not valid when IP opened as a device */ 1311 if (q->q_next == NULL) 1312 return (EINVAL); 1313 1314 arl = (arl_t *)q->q_ptr; 1315 arl->arl_ppa = lifr->lifr_ppa; 1316 return (arl_ill_init(arl, lifr->lifr_name)); 1317 } 1318 1319 arl_t * 1320 ill_to_arl(ill_t *ill) 1321 { 1322 arl_ill_common_t *ai = ill->ill_common; 1323 arl_t *arl = NULL; 1324 1325 if (ai == NULL) 1326 return (NULL); 1327 /* 1328 * Find the arl_t that corresponds to this ill_t from the shared 1329 * ill_common structure. We can safely access the ai here as it 1330 * will only be freed in arp_modclose() after we have become 1331 * single-threaded. 1332 */ 1333 mutex_enter(&ai->ai_lock); 1334 if ((arl = ai->ai_arl) != NULL) { 1335 mutex_enter(&arl->arl_lock); 1336 if (!(arl->arl_state_flags & ARL_CONDEMNED)) { 1337 arl_refhold_locked(arl); 1338 mutex_exit(&arl->arl_lock); 1339 } else { 1340 mutex_exit(&arl->arl_lock); 1341 arl = NULL; 1342 } 1343 } 1344 mutex_exit(&ai->ai_lock); 1345 return (arl); 1346 } 1347 1348 ill_t * 1349 arl_to_ill(arl_t *arl) 1350 { 1351 arl_ill_common_t *ai = arl->arl_common; 1352 ill_t *ill = NULL; 1353 1354 if (ai == NULL) { 1355 /* 1356 * happens when the arp stream is just being opened, and 1357 * arl_ill_init has not been executed yet. 1358 */ 1359 return (NULL); 1360 } 1361 /* 1362 * Find the ill_t that corresponds to this arl_t from the shared 1363 * arl_common structure. We can safely access the ai here as it 1364 * will only be freed in arp_modclose() after we have become 1365 * single-threaded. 1366 */ 1367 mutex_enter(&ai->ai_lock); 1368 if ((ill = ai->ai_ill) != NULL) { 1369 mutex_enter(&ill->ill_lock); 1370 if (!ILL_IS_CONDEMNED(ill)) { 1371 ill_refhold_locked(ill); 1372 mutex_exit(&ill->ill_lock); 1373 } else { 1374 mutex_exit(&ill->ill_lock); 1375 ill = NULL; 1376 } 1377 } 1378 mutex_exit(&ai->ai_lock); 1379 return (ill); 1380 } 1381 1382 int 1383 arp_ll_up(ill_t *ill) 1384 { 1385 mblk_t *attach_mp = NULL; 1386 mblk_t *bind_mp = NULL; 1387 mblk_t *unbind_mp = NULL; 1388 arl_t *arl; 1389 1390 ASSERT(IAM_WRITER_ILL(ill)); 1391 arl = ill_to_arl(ill); 1392 1393 DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill); 1394 if (arl == NULL) 1395 return (ENXIO); 1396 DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl); 1397 if ((arl->arl_state_flags & ARL_LL_UP) != 0) { 1398 arl_refrele(arl); 1399 return (0); 1400 } 1401 if (arl->arl_needs_attach) { /* DL_STYLE2 */ 1402 attach_mp = 1403 ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t)); 1404 if (attach_mp == NULL) 1405 goto bad; 1406 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa; 1407 } 1408 1409 /* Allocate and initialize a bind message. */ 1410 bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t)); 1411 if (bind_mp == NULL) 1412 goto bad; 1413 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP; 1414 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 1415 1416 unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t)); 1417 if (unbind_mp == NULL) 1418 goto bad; 1419 if (arl->arl_needs_attach) { 1420 arp_dlpi_send(arl, attach_mp); 1421 } 1422 arl->arl_unbind_mp = unbind_mp; 1423 1424 arl->arl_state_flags |= ARL_LL_BIND_PENDING; 1425 arp_dlpi_send(arl, bind_mp); 1426 arl_refrele(arl); 1427 return (EINPROGRESS); 1428 1429 bad: 1430 freemsg(attach_mp); 1431 freemsg(bind_mp); 1432 freemsg(unbind_mp); 1433 arl_refrele(arl); 1434 return (ENOMEM); 1435 } 1436 1437 /* 1438 * consumes/frees mp 1439 */ 1440 static void 1441 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code, 1442 ip_recv_attr_t *ira, ncec_t *ncec) 1443 { 1444 char hbuf[MAC_STR_LEN]; 1445 char sbuf[INET_ADDRSTRLEN]; 1446 ill_t *ill = ira->ira_ill; 1447 ip_stack_t *ipst = ill->ill_ipst; 1448 arh_t *arh = (arh_t *)mp->b_rptr; 1449 1450 switch (arcn_code) { 1451 case AR_CN_BOGON: 1452 /* 1453 * Someone is sending ARP packets with a source protocol 1454 * address that we have published and for which we believe our 1455 * entry is authoritative and verified to be unique on 1456 * the network. 1457 * 1458 * arp_process_packet() sends AR_CN_FAILED for the case when 1459 * a DAD probe is received and the hardware address of a 1460 * non-authoritative entry has changed. Thus, AR_CN_BOGON 1461 * indicates a real conflict, and we have to do resolution. 1462 * 1463 * We back away quickly from the address if it's from DHCP or 1464 * otherwise temporary and hasn't been used recently (or at 1465 * all). We'd like to include "deprecated" addresses here as 1466 * well (as there's no real reason to defend something we're 1467 * discarding), but IPMP "reuses" this flag to mean something 1468 * other than the standard meaning. 1469 */ 1470 if (ip_nce_conflict(mp, ira, ncec)) { 1471 (void) mac_colon_addr((uint8_t *)(arh + 1), 1472 arh->arh_hlen, hbuf, sizeof (hbuf)); 1473 (void) ip_dot_addr(src, sbuf); 1474 cmn_err(CE_WARN, 1475 "proxy ARP problem? Node '%s' is using %s on %s", 1476 hbuf, sbuf, ill->ill_name); 1477 if (!arp_no_defense) 1478 (void) arp_announce(ncec); 1479 /* 1480 * ncec_last_time_defended has been adjusted in 1481 * ip_nce_conflict. 1482 */ 1483 } else { 1484 ncec_delete(ncec); 1485 } 1486 freemsg(mp); 1487 break; 1488 case AR_CN_ANNOUNCE: { 1489 nce_hw_map_t hwm; 1490 /* 1491 * ARP gives us a copy of any packet where it thinks 1492 * the address has changed, so that we can update our 1493 * caches. We're responsible for caching known answers 1494 * in the current design. We check whether the 1495 * hardware address really has changed in all of our 1496 * entries that have cached this mapping, and if so, we 1497 * blow them away. This way we will immediately pick 1498 * up the rare case of a host changing hardware 1499 * address. 1500 */ 1501 if (src == 0) { 1502 freemsg(mp); 1503 break; 1504 } 1505 hwm.hwm_addr = src; 1506 hwm.hwm_hwlen = arh->arh_hlen; 1507 hwm.hwm_hwaddr = (uchar_t *)(arh + 1); 1508 hwm.hwm_flags = 0; 1509 ncec_walk_common(ipst->ips_ndp4, NULL, 1510 nce_update_hw_changed, &hwm, B_TRUE); 1511 freemsg(mp); 1512 break; 1513 } 1514 case AR_CN_FAILED: 1515 if (arp_no_defense) { 1516 (void) mac_colon_addr((uint8_t *)(arh + 1), 1517 arh->arh_hlen, hbuf, sizeof (hbuf)); 1518 (void) ip_dot_addr(src, sbuf); 1519 1520 cmn_err(CE_WARN, 1521 "node %s is using our IP address %s on %s", 1522 hbuf, sbuf, ill->ill_name); 1523 freemsg(mp); 1524 break; 1525 } 1526 /* 1527 * mp will be freed by arp_excl. 1528 */ 1529 ill_refhold(ill); 1530 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 1531 return; 1532 default: 1533 ASSERT(0); 1534 freemsg(mp); 1535 break; 1536 } 1537 } 1538 1539 /* 1540 * arp_output is called to transmit an ARP Request or Response. The mapping 1541 * to RFC 826 variables is: 1542 * haddr1 == ar$sha 1543 * paddr1 == ar$spa 1544 * haddr2 == ar$tha 1545 * paddr2 == ar$tpa 1546 * The ARP frame is sent to the ether_dst in dst_lladdr. 1547 */ 1548 static int 1549 arp_output(ill_t *ill, uint32_t operation, 1550 const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2, 1551 const uchar_t *paddr2, uchar_t *dst_lladdr) 1552 { 1553 arh_t *arh; 1554 uint8_t *cp; 1555 uint_t hlen; 1556 uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */ 1557 uint32_t proto = IP_ARP_PROTO_TYPE; 1558 mblk_t *mp; 1559 arl_t *arl; 1560 1561 ASSERT(dst_lladdr != NULL); 1562 hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */ 1563 mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length); 1564 1565 if (mp == NULL) 1566 return (ENOMEM); 1567 1568 /* IFF_NOARP flag is set or link down: do not send arp messages */ 1569 if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) { 1570 freemsg(mp); 1571 return (ENXIO); 1572 } 1573 1574 mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) + 1575 plen + plen, BPRI_MED); 1576 if (mp->b_cont == NULL) { 1577 freeb(mp); 1578 return (ENOMEM); 1579 } 1580 1581 /* Fill in the ARP header. */ 1582 cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); 1583 mp->b_cont->b_rptr = cp; 1584 arh = (arh_t *)cp; 1585 U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware); 1586 U16_TO_BE16(proto, arh->arh_proto); 1587 arh->arh_hlen = (uint8_t)hlen; 1588 arh->arh_plen = (uint8_t)plen; 1589 U16_TO_BE16(operation, arh->arh_operation); 1590 cp += ARH_FIXED_LEN; 1591 bcopy(haddr1, cp, hlen); 1592 cp += hlen; 1593 if (paddr1 == NULL) 1594 bzero(cp, plen); 1595 else 1596 bcopy(paddr1, cp, plen); 1597 cp += plen; 1598 if (haddr2 == NULL) 1599 bzero(cp, hlen); 1600 else 1601 bcopy(haddr2, cp, hlen); 1602 cp += hlen; 1603 bcopy(paddr2, cp, plen); 1604 cp += plen; 1605 mp->b_cont->b_wptr = cp; 1606 1607 DTRACE_PROBE3(arp__physical__out__start, 1608 ill_t *, ill, arh_t *, arh, mblk_t *, mp); 1609 ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event, 1610 ill->ill_ipst->ips_arp_physical_out, 1611 ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont, 1612 ill->ill_ipst); 1613 DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp); 1614 if (mp == NULL) 1615 return (0); 1616 1617 /* Ship it out. */ 1618 arl = ill_to_arl(ill); 1619 if (arl == NULL) { 1620 freemsg(mp); 1621 return (0); 1622 } 1623 if (canputnext(arl->arl_wq)) 1624 putnext(arl->arl_wq, mp); 1625 else 1626 freemsg(mp); 1627 arl_refrele(arl); 1628 return (0); 1629 } 1630 1631 /* 1632 * Process resolve requests. 1633 * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise 1634 * we leave it alone (the caller will check and manage ncec_pcnt in those 1635 * cases.) 1636 */ 1637 int 1638 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill) 1639 { 1640 int err; 1641 const uchar_t *target_hwaddr; 1642 struct in_addr nce_paddr; 1643 uchar_t *dst_lladdr; 1644 boolean_t use_rcnt = !NCE_ISREACHABLE(ncec); 1645 1646 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 1647 ASSERT(!IS_IPMP(ill)); 1648 1649 if (use_rcnt && ncec->ncec_rcnt == 0) { 1650 /* not allowed any more retransmits. */ 1651 return (0); 1652 } 1653 1654 if ((ill->ill_flags & ILLF_NOARP) != 0) 1655 return (0); 1656 1657 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr); 1658 1659 target_hwaddr = 1660 ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1661 1662 if (NCE_ISREACHABLE(ncec)) { 1663 dst_lladdr = ncec->ncec_lladdr; 1664 } else { 1665 dst_lladdr = ill->ill_bcast_mp->b_rptr + 1666 NCE_LL_ADDR_OFFSET(ill); 1667 } 1668 1669 mutex_exit(&ncec->ncec_lock); 1670 err = arp_output(ill, ARP_REQUEST, 1671 ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr, 1672 (uchar_t *)&nce_paddr, dst_lladdr); 1673 mutex_enter(&ncec->ncec_lock); 1674 1675 if (err != 0) { 1676 /* 1677 * Some transient error such as ENOMEM or a down link was 1678 * encountered. If the link has been taken down permanently, 1679 * the ncec will eventually be cleaned up (ipif_down_tail() 1680 * will call ipif_nce_down() and flush the ncec), to terminate 1681 * recurring attempts to send ARP requests. In all other cases, 1682 * allow the caller another chance at success next time. 1683 */ 1684 return (ncec->ncec_ill->ill_reachable_retrans_time); 1685 } 1686 1687 if (use_rcnt) 1688 ncec->ncec_rcnt--; 1689 1690 return (ncec->ncec_ill->ill_reachable_retrans_time); 1691 } 1692 1693 /* return B_TRUE if dropped */ 1694 boolean_t 1695 arp_announce(ncec_t *ncec) 1696 { 1697 ill_t *ill; 1698 int err; 1699 uchar_t *sphys_addr, *bcast_addr; 1700 struct in_addr ncec_addr; 1701 boolean_t need_refrele = B_FALSE; 1702 1703 ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0); 1704 ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0); 1705 1706 if (IS_IPMP(ncec->ncec_ill)) { 1707 /* sent on the cast_ill */ 1708 ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE); 1709 if (ill == NULL) 1710 return (B_TRUE); 1711 need_refrele = B_TRUE; 1712 } else { 1713 ill = ncec->ncec_ill; 1714 } 1715 1716 /* 1717 * broadcast an announce to ill_bcast address. 1718 */ 1719 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1720 1721 sphys_addr = ncec->ncec_lladdr; 1722 bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1723 1724 err = arp_output(ill, ARP_REQUEST, 1725 sphys_addr, (uchar_t *)&ncec_addr, bcast_addr, 1726 (uchar_t *)&ncec_addr, bcast_addr); 1727 1728 if (need_refrele) 1729 ill_refrele(ill); 1730 return (err != 0); 1731 } 1732 1733 /* return B_TRUE if dropped */ 1734 boolean_t 1735 arp_probe(ncec_t *ncec) 1736 { 1737 ill_t *ill; 1738 int err; 1739 struct in_addr ncec_addr; 1740 uchar_t *sphys_addr, *dst_lladdr; 1741 1742 if (IS_IPMP(ncec->ncec_ill)) { 1743 ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE); 1744 if (ill == NULL) 1745 return (B_TRUE); 1746 } else { 1747 ill = ncec->ncec_ill; 1748 } 1749 1750 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1751 1752 sphys_addr = ncec->ncec_lladdr; 1753 dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1754 err = arp_output(ill, ARP_REQUEST, 1755 sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr); 1756 1757 if (IS_IPMP(ncec->ncec_ill)) 1758 ill_refrele(ill); 1759 return (err != 0); 1760 } 1761 1762 static mblk_t * 1763 arl_unbind(arl_t *arl) 1764 { 1765 mblk_t *mp; 1766 1767 if ((mp = arl->arl_unbind_mp) != NULL) { 1768 arl->arl_unbind_mp = NULL; 1769 arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS; 1770 } 1771 return (mp); 1772 } 1773 1774 int 1775 arp_ll_down(ill_t *ill) 1776 { 1777 arl_t *arl; 1778 mblk_t *unbind_mp; 1779 int err = 0; 1780 boolean_t replumb = (ill->ill_replumbing == 1); 1781 1782 DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill); 1783 if ((arl = ill_to_arl(ill)) == NULL) 1784 return (ENXIO); 1785 DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl); 1786 mutex_enter(&arl->arl_lock); 1787 unbind_mp = arl_unbind(arl); 1788 if (unbind_mp != NULL) { 1789 ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS); 1790 DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp, 1791 arl_t *, arl); 1792 err = EINPROGRESS; 1793 if (replumb) 1794 arl->arl_state_flags |= ARL_LL_REPLUMBING; 1795 } 1796 mutex_exit(&arl->arl_lock); 1797 if (unbind_mp != NULL) 1798 arp_dlpi_send(arl, unbind_mp); 1799 arl_refrele(arl); 1800 return (err); 1801 } 1802 1803 /* ARGSUSED */ 1804 int 1805 arp_close(queue_t *q, int flags __unused, cred_t *credp __unused) 1806 { 1807 if (WR(q)->q_next != NULL) { 1808 /* This is a module close */ 1809 return (arp_modclose(q->q_ptr)); 1810 } 1811 qprocsoff(q); 1812 q->q_ptr = WR(q)->q_ptr = NULL; 1813 return (0); 1814 } 1815 1816 static int 1817 arp_modclose(arl_t *arl) 1818 { 1819 arl_ill_common_t *ai = arl->arl_common; 1820 ill_t *ill; 1821 queue_t *q = arl->arl_rq; 1822 mblk_t *mp, *nextmp; 1823 ipsq_t *ipsq = NULL; 1824 1825 ill = arl_to_ill(arl); 1826 if (ill != NULL) { 1827 if (!ill_waiter_inc(ill)) { 1828 ill_refrele(ill); 1829 } else { 1830 ill_refrele(ill); 1831 if (ipsq_enter(ill, B_FALSE, NEW_OP)) 1832 ipsq = ill->ill_phyint->phyint_ipsq; 1833 ill_waiter_dcr(ill); 1834 } 1835 if (ipsq == NULL) { 1836 /* 1837 * could not enter the ipsq because ill is already 1838 * marked CONDEMNED. 1839 */ 1840 ill = NULL; 1841 } 1842 } 1843 if (ai != NULL && ipsq == NULL) { 1844 /* 1845 * Either we did not get an ill because it was marked CONDEMNED 1846 * or we could not enter the ipsq because it was unplumbing. 1847 * In both cases, wait for the ill to complete ip_modclose(). 1848 * 1849 * If the arp_modclose happened even before SLIFNAME, the ai 1850 * itself would be NULL, in which case we can complete the close 1851 * without waiting. 1852 */ 1853 mutex_enter(&ai->ai_lock); 1854 while (ai->ai_ill != NULL) 1855 cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock); 1856 mutex_exit(&ai->ai_lock); 1857 } 1858 ASSERT(ill == NULL || IAM_WRITER_ILL(ill)); 1859 1860 mutex_enter(&arl->arl_lock); 1861 /* 1862 * If the ill had completed unplumbing before arp_modclose(), there 1863 * would be no ill (and therefore, no ipsq) to serialize arp_modclose() 1864 * so that we need to explicitly check for ARL_CONDEMNED and back off 1865 * if it is set. 1866 */ 1867 if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) { 1868 mutex_exit(&arl->arl_lock); 1869 ASSERT(ipsq == NULL); 1870 return (0); 1871 } 1872 arl->arl_state_flags |= ARL_CONDEMNED; 1873 1874 /* 1875 * send out all pending dlpi messages, don't wait for the ack (which 1876 * will be ignored in arp_rput when CONDEMNED is set) 1877 * 1878 * We have to check for pending DL_UNBIND_REQ because, in the case 1879 * that ip_modclose() executed before arp_modclose(), the call to 1880 * ill_delete_tail->ipif_arp_down() would have triggered a 1881 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail 1882 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not 1883 * have been processed yet. In this scenario, we cannot reset 1884 * arl_dlpi_pending, because the setting/clearing of arl_state_flags 1885 * related to unbind, and the associated cv_waits must be allowed to 1886 * continue. 1887 */ 1888 if (arl->arl_dlpi_pending != DL_UNBIND_REQ) 1889 arl->arl_dlpi_pending = DL_PRIM_INVAL; 1890 mp = arl->arl_dlpi_deferred; 1891 arl->arl_dlpi_deferred = NULL; 1892 mutex_exit(&arl->arl_lock); 1893 1894 for (; mp != NULL; mp = nextmp) { 1895 nextmp = mp->b_next; 1896 mp->b_next = NULL; 1897 putnext(arl->arl_wq, mp); 1898 } 1899 1900 /* Wait for data paths to quiesce */ 1901 mutex_enter(&arl->arl_lock); 1902 while (arl->arl_refcnt != 0) 1903 cv_wait(&arl->arl_cv, &arl->arl_lock); 1904 1905 /* 1906 * unbind, so that nothing else can come up from driver. 1907 */ 1908 mp = arl_unbind(arl); 1909 mutex_exit(&arl->arl_lock); 1910 if (mp != NULL) 1911 arp_dlpi_send(arl, mp); 1912 mutex_enter(&arl->arl_lock); 1913 1914 /* wait for unbind ack */ 1915 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 1916 cv_wait(&arl->arl_cv, &arl->arl_lock); 1917 mutex_exit(&arl->arl_lock); 1918 1919 qprocsoff(q); 1920 1921 if (ill != NULL) { 1922 mutex_enter(&ill->ill_lock); 1923 ill->ill_arl_dlpi_pending = 0; 1924 mutex_exit(&ill->ill_lock); 1925 } 1926 1927 if (ai != NULL) { 1928 mutex_enter(&ai->ai_lock); 1929 ai->ai_arl = NULL; 1930 if (ai->ai_ill == NULL) { 1931 mutex_destroy(&ai->ai_lock); 1932 kmem_free(ai, sizeof (*ai)); 1933 } else { 1934 mutex_exit(&ai->ai_lock); 1935 } 1936 } 1937 1938 /* free up the rest */ 1939 arp_mod_close_tail(arl); 1940 1941 q->q_ptr = WR(q)->q_ptr = NULL; 1942 1943 if (ipsq != NULL) 1944 ipsq_exit(ipsq); 1945 1946 return (0); 1947 } 1948 1949 static void 1950 arp_mod_close_tail(arl_t *arl) 1951 { 1952 ip_stack_t *ipst = arl->arl_ipst; 1953 mblk_t **mpp; 1954 1955 mutex_enter(&ipst->ips_ip_mi_lock); 1956 mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl); 1957 mutex_exit(&ipst->ips_ip_mi_lock); 1958 1959 /* 1960 * credp could be null if the open didn't succeed and ip_modopen 1961 * itself calls ip_close. 1962 */ 1963 if (arl->arl_credp != NULL) 1964 crfree(arl->arl_credp); 1965 1966 /* Free all retained control messages. */ 1967 mpp = &arl->arl_first_mp_to_free; 1968 do { 1969 while (mpp[0]) { 1970 mblk_t *mp; 1971 mblk_t *mp1; 1972 1973 mp = mpp[0]; 1974 mpp[0] = mp->b_next; 1975 for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) { 1976 mp1->b_next = NULL; 1977 mp1->b_prev = NULL; 1978 } 1979 freemsg(mp); 1980 } 1981 } while (mpp++ != &arl->arl_last_mp_to_free); 1982 1983 netstack_rele(ipst->ips_netstack); 1984 mi_free(arl->arl_name); 1985 mi_close_free((IDP)arl); 1986 } 1987 1988 /* 1989 * DAD failed. Tear down ipifs with the specified srce address. Note that 1990 * tearing down the ipif also meas deleting the ncec through ipif_down, 1991 * so it is not possible to use nce_timer for recovery. Instead we start 1992 * a timer on the ipif. Caller has to free the mp. 1993 */ 1994 void 1995 arp_failure(mblk_t *mp, ip_recv_attr_t *ira) 1996 { 1997 ill_t *ill = ira->ira_ill; 1998 1999 if ((mp = copymsg(mp)) != NULL) { 2000 ill_refhold(ill); 2001 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 2002 } 2003 } 2004 2005 /* 2006 * This is for exclusive changes due to ARP. Tear down an interface due 2007 * to AR_CN_FAILED and AR_CN_BOGON. 2008 */ 2009 /* ARGSUSED */ 2010 static void 2011 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 2012 { 2013 ill_t *ill = rq->q_ptr; 2014 arh_t *arh; 2015 ipaddr_t src; 2016 ipif_t *ipif; 2017 ip_stack_t *ipst = ill->ill_ipst; 2018 uchar_t *haddr; 2019 uint_t haddrlen; 2020 2021 /* first try src = ar$spa */ 2022 arh = (arh_t *)mp->b_rptr; 2023 bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN); 2024 2025 haddrlen = arh->arh_hlen; 2026 haddr = (uint8_t *)(arh + 1); 2027 2028 if (haddrlen == ill->ill_phys_addr_length) { 2029 /* 2030 * Ignore conflicts generated by misbehaving switches that 2031 * just reflect our own messages back to us. For IPMP, we may 2032 * see reflections across any ill in the illgrp. 2033 */ 2034 /* For an under ill_grp can change under lock */ 2035 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2036 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 2037 IS_UNDER_IPMP(ill) && ill->ill_grp != NULL && 2038 ipmp_illgrp_find_ill(ill->ill_grp, haddr, 2039 haddrlen) != NULL) { 2040 rw_exit(&ipst->ips_ill_g_lock); 2041 goto ignore_conflict; 2042 } 2043 rw_exit(&ipst->ips_ill_g_lock); 2044 } 2045 2046 /* 2047 * Look up the appropriate ipif. 2048 */ 2049 ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst); 2050 if (ipif == NULL) 2051 goto ignore_conflict; 2052 2053 /* Reload the ill to match the ipif */ 2054 ill = ipif->ipif_ill; 2055 2056 /* If it's already duplicate or ineligible, then don't do anything. */ 2057 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 2058 ipif_refrele(ipif); 2059 goto ignore_conflict; 2060 } 2061 2062 /* 2063 * If we failed on a recovery probe, then restart the timer to 2064 * try again later. 2065 */ 2066 if (!ipif->ipif_was_dup) { 2067 char hbuf[MAC_STR_LEN]; 2068 char sbuf[INET_ADDRSTRLEN]; 2069 char ibuf[LIFNAMSIZ]; 2070 2071 (void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)); 2072 (void) ip_dot_addr(src, sbuf); 2073 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 2074 2075 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 2076 " disabled", ibuf, sbuf, hbuf); 2077 } 2078 mutex_enter(&ill->ill_lock); 2079 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 2080 ipif->ipif_flags |= IPIF_DUPLICATE; 2081 ill->ill_ipif_dup_count++; 2082 mutex_exit(&ill->ill_lock); 2083 (void) ipif_down(ipif, NULL, NULL); 2084 (void) ipif_down_tail(ipif); 2085 mutex_enter(&ill->ill_lock); 2086 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 2087 ill->ill_net_type == IRE_IF_RESOLVER && 2088 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 2089 ipst->ips_ip_dup_recovery > 0) { 2090 ASSERT(ipif->ipif_recovery_id == 0); 2091 ipif->ipif_recovery_id = timeout(ipif_dup_recovery, 2092 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 2093 } 2094 mutex_exit(&ill->ill_lock); 2095 ipif_refrele(ipif); 2096 2097 ignore_conflict: 2098 freemsg(mp); 2099 } 2100 2101 /* 2102 * This is a place for a dtrace hook. 2103 * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload, 2104 * or just the ARP packet payload as an M_DATA. 2105 */ 2106 /* ARGSUSED */ 2107 static void 2108 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill) 2109 { 2110 freemsg(mp); 2111 } 2112 2113 static boolean_t 2114 arp_over_driver(queue_t *q) 2115 { 2116 queue_t *qnext = STREAM(q)->sd_wrq->q_next; 2117 2118 /* 2119 * check if first module below stream head is IP or UDP. 2120 */ 2121 ASSERT(qnext != NULL); 2122 if (strcmp(Q2NAME(qnext), "ip") != 0 && 2123 strcmp(Q2NAME(qnext), "udp") != 0) { 2124 /* 2125 * module below is not ip or udp, so arp has been pushed 2126 * on the driver. 2127 */ 2128 return (B_TRUE); 2129 } 2130 return (B_FALSE); 2131 } 2132 2133 static int 2134 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2135 { 2136 int err; 2137 2138 ASSERT(sflag & MODOPEN); 2139 if (!arp_over_driver(q)) { 2140 q->q_qinfo = dummymodinfo.st_rdinit; 2141 WR(q)->q_qinfo = dummymodinfo.st_wrinit; 2142 return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag, 2143 sflag, credp)); 2144 } 2145 err = arp_modopen(q, devp, flag, sflag, credp); 2146 return (err); 2147 } 2148 2149 /* 2150 * In most cases we must be a writer on the IP stream before coming to 2151 * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions 2152 * when we are not a writer are very early duing initialization (in 2153 * arl_init, before the arl has done a SLIFNAME, so that we don't yet know 2154 * the associated ill) or during arp_mod_close, when we could not enter the 2155 * ipsq because the ill has already unplumbed. 2156 */ 2157 static void 2158 arp_dlpi_send(arl_t *arl, mblk_t *mp) 2159 { 2160 mblk_t **mpp; 2161 t_uscalar_t prim; 2162 arl_ill_common_t *ai; 2163 2164 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2165 2166 #ifdef DEBUG 2167 ai = arl->arl_common; 2168 if (ai != NULL) { 2169 mutex_enter(&ai->ai_lock); 2170 if (ai->ai_ill != NULL) 2171 ASSERT(IAM_WRITER_ILL(ai->ai_ill)); 2172 mutex_exit(&ai->ai_lock); 2173 } 2174 #endif /* DEBUG */ 2175 2176 mutex_enter(&arl->arl_lock); 2177 if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { 2178 /* Must queue message. Tail insertion */ 2179 mpp = &arl->arl_dlpi_deferred; 2180 while (*mpp != NULL) 2181 mpp = &((*mpp)->b_next); 2182 2183 *mpp = mp; 2184 mutex_exit(&arl->arl_lock); 2185 return; 2186 } 2187 mutex_exit(&arl->arl_lock); 2188 if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive) 2189 == DL_BIND_REQ) { 2190 ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0); 2191 } 2192 /* 2193 * No need to take the arl_lock to examine ARL_CONDEMNED at this point 2194 * because the only thread that can see ARL_CONDEMNED here is the 2195 * closing arp_modclose() thread which sets the flag after becoming a 2196 * writer on the ipsq. Threads from IP must have finished and 2197 * cannot be active now. 2198 */ 2199 if (!(arl->arl_state_flags & ARL_CONDEMNED) || 2200 (prim == DL_UNBIND_REQ)) { 2201 if (prim != DL_NOTIFY_CONF) { 2202 ill_t *ill = arl_to_ill(arl); 2203 2204 arl->arl_dlpi_pending = prim; 2205 if (ill != NULL) { 2206 mutex_enter(&ill->ill_lock); 2207 ill->ill_arl_dlpi_pending = 1; 2208 mutex_exit(&ill->ill_lock); 2209 ill_refrele(ill); 2210 } 2211 } 2212 } 2213 DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send", 2214 char *, dl_primstr(prim), char *, "-", arl_t *, arl); 2215 putnext(arl->arl_wq, mp); 2216 } 2217 2218 static void 2219 arl_defaults_common(arl_t *arl, mblk_t *mp) 2220 { 2221 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 2222 /* 2223 * Till the ill is fully up the ill is not globally visible. 2224 * So no need for a lock. 2225 */ 2226 arl->arl_mactype = dlia->dl_mac_type; 2227 arl->arl_sap_length = dlia->dl_sap_length; 2228 2229 if (!arl->arl_dlpi_style_set) { 2230 if (dlia->dl_provider_style == DL_STYLE2) 2231 arl->arl_needs_attach = 1; 2232 mutex_enter(&arl->arl_lock); 2233 ASSERT(arl->arl_dlpi_style_set == 0); 2234 arl->arl_dlpi_style_set = 1; 2235 arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING; 2236 cv_broadcast(&arl->arl_cv); 2237 mutex_exit(&arl->arl_lock); 2238 } 2239 } 2240 2241 int 2242 arl_init(queue_t *q, arl_t *arl) 2243 { 2244 mblk_t *info_mp; 2245 dl_info_req_t *dlir; 2246 2247 /* subset of ill_init */ 2248 mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0); 2249 2250 arl->arl_rq = q; 2251 arl->arl_wq = WR(q); 2252 2253 info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)), 2254 BPRI_HI); 2255 if (info_mp == NULL) 2256 return (ENOMEM); 2257 /* 2258 * allocate sufficient space to contain device name. 2259 */ 2260 arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ)); 2261 arl->arl_ppa = UINT_MAX; 2262 arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND); 2263 2264 /* Send down the Info Request to the driver. */ 2265 info_mp->b_datap->db_type = M_PCPROTO; 2266 dlir = (dl_info_req_t *)info_mp->b_rptr; 2267 info_mp->b_wptr = (uchar_t *)&dlir[1]; 2268 dlir->dl_primitive = DL_INFO_REQ; 2269 arl->arl_dlpi_pending = DL_PRIM_INVAL; 2270 qprocson(q); 2271 2272 arp_dlpi_send(arl, info_mp); 2273 return (0); 2274 } 2275 2276 int 2277 arl_wait_for_info_ack(arl_t *arl) 2278 { 2279 int err; 2280 2281 mutex_enter(&arl->arl_lock); 2282 while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) { 2283 /* 2284 * Return value of 0 indicates a pending signal. 2285 */ 2286 err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock); 2287 if (err == 0) { 2288 mutex_exit(&arl->arl_lock); 2289 return (EINTR); 2290 } 2291 } 2292 mutex_exit(&arl->arl_lock); 2293 /* 2294 * ip_rput_other could have set an error in ill_error on 2295 * receipt of M_ERROR. 2296 */ 2297 return (arl->arl_error); 2298 } 2299 2300 void 2301 arl_set_muxid(ill_t *ill, int muxid) 2302 { 2303 arl_t *arl; 2304 2305 arl = ill_to_arl(ill); 2306 if (arl != NULL) { 2307 arl->arl_muxid = muxid; 2308 arl_refrele(arl); 2309 } 2310 } 2311 2312 int 2313 arl_get_muxid(ill_t *ill) 2314 { 2315 arl_t *arl; 2316 int muxid = 0; 2317 2318 arl = ill_to_arl(ill); 2319 if (arl != NULL) { 2320 muxid = arl->arl_muxid; 2321 arl_refrele(arl); 2322 } 2323 return (muxid); 2324 } 2325 2326 static int 2327 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2328 { 2329 int err; 2330 zoneid_t zoneid; 2331 netstack_t *ns; 2332 ip_stack_t *ipst; 2333 arl_t *arl = NULL; 2334 2335 /* 2336 * Prevent unprivileged processes from pushing IP so that 2337 * they can't send raw IP. 2338 */ 2339 if (secpolicy_net_rawaccess(credp) != 0) 2340 return (EPERM); 2341 2342 ns = netstack_find_by_cred(credp); 2343 ASSERT(ns != NULL); 2344 ipst = ns->netstack_ip; 2345 ASSERT(ipst != NULL); 2346 2347 /* 2348 * For exclusive stacks we set the zoneid to zero 2349 * to make IP operate as if in the global zone. 2350 */ 2351 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2352 zoneid = GLOBAL_ZONEID; 2353 else 2354 zoneid = crgetzoneid(credp); 2355 2356 arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t)); 2357 q->q_ptr = WR(q)->q_ptr = arl; 2358 arl->arl_ipst = ipst; 2359 arl->arl_zoneid = zoneid; 2360 err = arl_init(q, arl); 2361 2362 if (err != 0) { 2363 mi_free(arl->arl_name); 2364 mi_free(arl); 2365 netstack_rele(ipst->ips_netstack); 2366 q->q_ptr = NULL; 2367 WR(q)->q_ptr = NULL; 2368 return (err); 2369 } 2370 2371 /* 2372 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent. 2373 */ 2374 err = arl_wait_for_info_ack(arl); 2375 if (err == 0) 2376 arl->arl_credp = credp; 2377 else 2378 goto fail; 2379 2380 crhold(credp); 2381 2382 mutex_enter(&ipst->ips_ip_mi_lock); 2383 err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag, 2384 sflag, credp); 2385 mutex_exit(&ipst->ips_ip_mi_lock); 2386 fail: 2387 if (err) { 2388 (void) arp_close(q, 0, credp); 2389 return (err); 2390 } 2391 return (0); 2392 } 2393 2394 /* 2395 * Notify any downstream modules (esp softmac and hitbox) of the name 2396 * of this interface using an M_CTL. 2397 */ 2398 static void 2399 arp_ifname_notify(arl_t *arl) 2400 { 2401 mblk_t *mp1, *mp2; 2402 struct iocblk *iocp; 2403 struct lifreq *lifr; 2404 2405 if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL) 2406 return; 2407 if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) { 2408 freemsg(mp1); 2409 return; 2410 } 2411 2412 lifr = (struct lifreq *)mp2->b_rptr; 2413 mp2->b_wptr += sizeof (struct lifreq); 2414 bzero(lifr, sizeof (struct lifreq)); 2415 2416 (void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ); 2417 lifr->lifr_ppa = arl->arl_ppa; 2418 lifr->lifr_flags = ILLF_IPV4; 2419 2420 /* Use M_CTL to avoid confusing anyone else who might be listening. */ 2421 DB_TYPE(mp1) = M_CTL; 2422 mp1->b_cont = mp2; 2423 iocp = (struct iocblk *)mp1->b_rptr; 2424 iocp->ioc_count = msgsize(mp1->b_cont); 2425 DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify", 2426 char *, "SIOCSLIFNAME", char *, "-", arl_t *, arl); 2427 putnext(arl->arl_wq, mp1); 2428 } 2429 2430 void 2431 arp_send_replumb_conf(ill_t *ill) 2432 { 2433 mblk_t *mp; 2434 arl_t *arl = ill_to_arl(ill); 2435 2436 if (arl == NULL) 2437 return; 2438 /* 2439 * arl_got_replumb and arl_got_unbind to be cleared after we complete 2440 * arp_cmd_done. 2441 */ 2442 mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO, 2443 DL_NOTIFY_CONF); 2444 ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification = 2445 DL_NOTE_REPLUMB_DONE; 2446 arp_dlpi_send(arl, mp); 2447 mutex_enter(&arl->arl_lock); 2448 arl->arl_state_flags &= ~ARL_LL_REPLUMBING; 2449 mutex_exit(&arl->arl_lock); 2450 arl_refrele(arl); 2451 } 2452 2453 /* 2454 * The unplumb code paths call arp_unbind_complete() to make sure that it is 2455 * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also 2456 * for the arl_refcnt to fall to one so that, when we return from 2457 * arp_unbind_complete(), we know for certain that there are no threads in 2458 * arp_rput() that might access the arl_ill. 2459 */ 2460 void 2461 arp_unbind_complete(ill_t *ill) 2462 { 2463 arl_t *arl = ill_to_arl(ill); 2464 2465 if (arl == NULL) 2466 return; 2467 mutex_enter(&arl->arl_lock); 2468 /* 2469 * wait for unbind ack and arl_refcnt to drop to 1. Note that the 2470 * quiescent arl_refcnt for this function is 1 (and not 0) because 2471 * ill_to_arl() will itself return after taking a ref on the arl_t. 2472 */ 2473 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 2474 cv_wait(&arl->arl_cv, &arl->arl_lock); 2475 while (arl->arl_refcnt != 1) 2476 cv_wait(&arl->arl_cv, &arl->arl_lock); 2477 mutex_exit(&arl->arl_lock); 2478 arl_refrele(arl); 2479 } 2480