1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 #include <inet/ip_arp.h> 27 #include <inet/ip_ndp.h> 28 #include <net/if_arp.h> 29 #include <netinet/if_ether.h> 30 #include <sys/strsubr.h> 31 #include <inet/ip6.h> 32 #include <inet/ip.h> 33 #include <inet/ip_ire.h> 34 #include <inet/ip_if.h> 35 #include <sys/dlpi.h> 36 #include <sys/sunddi.h> 37 #include <sys/strsun.h> 38 #include <sys/sdt.h> 39 #include <inet/mi.h> 40 #include <inet/arp.h> 41 #include <inet/ipdrop.h> 42 #include <sys/sockio.h> 43 #include <inet/ip_impl.h> 44 #include <sys/policy.h> 45 46 #define ARL_LL_ADDR_OFFSET(arl) (((arl)->arl_sap_length) < 0 ? \ 47 (sizeof (dl_unitdata_req_t)) : \ 48 ((sizeof (dl_unitdata_req_t)) + (ABS((arl)->arl_sap_length)))) 49 50 /* 51 * MAC-specific intelligence. Shouldn't be needed, but the DL_INFO_ACK 52 * doesn't quite do it for us. 53 */ 54 typedef struct arp_m_s { 55 t_uscalar_t arp_mac_type; 56 uint32_t arp_mac_arp_hw_type; 57 t_scalar_t arp_mac_sap_length; 58 uint32_t arp_mac_hw_addr_length; 59 } arp_m_t; 60 61 static int arp_close(queue_t *, int); 62 static void arp_rput(queue_t *, mblk_t *); 63 static void arp_wput(queue_t *, mblk_t *); 64 static arp_m_t *arp_m_lookup(t_uscalar_t mac_type); 65 static void arp_notify(ipaddr_t, mblk_t *, uint32_t, ip_recv_attr_t *, 66 ncec_t *); 67 static int arp_output(ill_t *, uint32_t, const uchar_t *, const uchar_t *, 68 const uchar_t *, const uchar_t *, uchar_t *); 69 static int arp_modclose(arl_t *); 70 static void arp_mod_close_tail(arl_t *); 71 static mblk_t *arl_unbind(arl_t *); 72 static void arp_process_packet(ill_t *, mblk_t *); 73 static void arp_excl(ipsq_t *, queue_t *, mblk_t *, void *); 74 static void arp_drop_packet(const char *str, mblk_t *, ill_t *); 75 static int arp_open(queue_t *, dev_t *, int, int, cred_t *); 76 static int ip_sioctl_ifunitsel_arp(queue_t *, int *); 77 static int ip_sioctl_slifname_arp(queue_t *, void *); 78 static void arp_dlpi_send(arl_t *, mblk_t *); 79 static void arl_defaults_common(arl_t *, mblk_t *); 80 static int arp_modopen(queue_t *, dev_t *, int, int, cred_t *); 81 static void arp_ifname_notify(arl_t *); 82 static void arp_rput_dlpi_writer(ipsq_t *, queue_t *, mblk_t *, void *); 83 static arl_t *ill_to_arl(ill_t *); 84 85 #define DL_PRIM(mp) (((union DL_primitives *)(mp)->b_rptr)->dl_primitive) 86 #define IS_DLPI_DATA(mp) \ 87 ((DB_TYPE(mp) == M_PROTO) && \ 88 MBLKL(mp) >= sizeof (dl_unitdata_ind_t) && \ 89 (DL_PRIM(mp) == DL_UNITDATA_IND)) 90 91 #define AR_NOTFOUND 1 /* No matching ace found in cache */ 92 #define AR_MERGED 2 /* Matching ace updated (RFC 826 Merge_flag) */ 93 #define AR_LOOPBACK 3 /* Our own arp packet was received */ 94 #define AR_BOGON 4 /* Another host has our IP addr. */ 95 #define AR_FAILED 5 /* Duplicate Address Detection has failed */ 96 #define AR_CHANGED 6 /* Address has changed; tell IP (and merged) */ 97 98 boolean_t arp_no_defense; 99 100 struct module_info arp_mod_info = { 101 IP_MOD_ID, "arp", 1, INFPSZ, 65536, 1024 102 }; 103 static struct qinit rinit_arp = { 104 (pfi_t)arp_rput, NULL, arp_open, arp_close, NULL, &arp_mod_info 105 }; 106 static struct qinit winit_arp = { 107 (pfi_t)arp_wput, NULL, arp_open, arp_close, NULL, 108 &arp_mod_info 109 }; 110 struct streamtab arpinfo = { 111 &rinit_arp, &winit_arp 112 }; 113 #define ARH_FIXED_LEN 8 114 #define AR_LL_HDR_SLACK 32 115 116 /* 117 * pfhooks for ARP. 118 */ 119 #define ARP_HOOK_IN(_hook, _event, _ilp, _hdr, _fm, _m, ipst) \ 120 \ 121 if ((_hook).he_interested) { \ 122 hook_pkt_event_t info; \ 123 \ 124 info.hpe_protocol = ipst->ips_arp_net_data; \ 125 info.hpe_ifp = _ilp; \ 126 info.hpe_ofp = 0; \ 127 info.hpe_hdr = _hdr; \ 128 info.hpe_mp = &(_fm); \ 129 info.hpe_mb = _m; \ 130 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 131 _event, (hook_data_t)&info) != 0) { \ 132 if (_fm != NULL) { \ 133 freemsg(_fm); \ 134 _fm = NULL; \ 135 } \ 136 _hdr = NULL; \ 137 _m = NULL; \ 138 } else { \ 139 _hdr = info.hpe_hdr; \ 140 _m = info.hpe_mb; \ 141 } \ 142 } 143 144 #define ARP_HOOK_OUT(_hook, _event, _olp, _hdr, _fm, _m, ipst) \ 145 \ 146 if ((_hook).he_interested) { \ 147 hook_pkt_event_t info; \ 148 \ 149 info.hpe_protocol = ipst->ips_arp_net_data; \ 150 info.hpe_ifp = 0; \ 151 info.hpe_ofp = _olp; \ 152 info.hpe_hdr = _hdr; \ 153 info.hpe_mp = &(_fm); \ 154 info.hpe_mb = _m; \ 155 if (hook_run(ipst->ips_arp_net_data->netd_hooks, \ 156 _event, (hook_data_t)&info) != 0) { \ 157 if (_fm != NULL) { \ 158 freemsg(_fm); \ 159 _fm = NULL; \ 160 } \ 161 _hdr = NULL; \ 162 _m = NULL; \ 163 } else { \ 164 _hdr = info.hpe_hdr; \ 165 _m = info.hpe_mb; \ 166 } \ 167 } 168 169 static arp_m_t arp_m_tbl[] = { 170 { DL_CSMACD, ARPHRD_ETHER, -2, 6}, /* 802.3 */ 171 { DL_TPB, ARPHRD_IEEE802, -2, 6}, /* 802.4 */ 172 { DL_TPR, ARPHRD_IEEE802, -2, 6}, /* 802.5 */ 173 { DL_METRO, ARPHRD_IEEE802, -2, 6}, /* 802.6 */ 174 { DL_ETHER, ARPHRD_ETHER, -2, 6}, /* Ethernet */ 175 { DL_FDDI, ARPHRD_ETHER, -2, 6}, /* FDDI */ 176 { DL_IB, ARPHRD_IB, -2, 20}, /* Infiniband */ 177 { DL_OTHER, ARPHRD_ETHER, -2, 6} /* unknown */ 178 }; 179 180 static void 181 arl_refhold_locked(arl_t *arl) 182 { 183 ASSERT(MUTEX_HELD(&arl->arl_lock)); 184 arl->arl_refcnt++; 185 ASSERT(arl->arl_refcnt != 0); 186 } 187 188 static void 189 arl_refrele(arl_t *arl) 190 { 191 mutex_enter(&arl->arl_lock); 192 ASSERT(arl->arl_refcnt != 0); 193 arl->arl_refcnt--; 194 if (arl->arl_refcnt > 1) { 195 mutex_exit(&arl->arl_lock); 196 return; 197 } 198 199 /* ill_close or arp_unbind_complete may be waiting */ 200 cv_broadcast(&arl->arl_cv); 201 mutex_exit(&arl->arl_lock); 202 } 203 204 /* 205 * wake up any pending ip ioctls. 206 */ 207 static void 208 arp_cmd_done(ill_t *ill, int err, t_uscalar_t lastprim) 209 { 210 if (lastprim == DL_UNBIND_REQ && ill->ill_replumbing) 211 arp_replumb_done(ill, 0); 212 else 213 arp_bringup_done(ill, err); 214 } 215 216 static int 217 ip_nce_resolve_all(ill_t *ill, uchar_t *src_haddr, uint32_t hlen, 218 const in_addr_t *src_paddr, ncec_t **sncec, int op) 219 { 220 int retv; 221 ncec_t *ncec; 222 boolean_t ll_changed; 223 uchar_t *lladdr = NULL; 224 int new_state; 225 226 ASSERT(ill != NULL); 227 228 ncec = ncec_lookup_illgrp_v4(ill, src_paddr); 229 *sncec = ncec; 230 231 if (ncec == NULL) { 232 retv = AR_NOTFOUND; 233 goto done; 234 } 235 236 mutex_enter(&ncec->ncec_lock); 237 /* 238 * IP addr and hardware address match what we already 239 * have, then this is a broadcast packet emitted by one of our 240 * interfaces, reflected by the switch and received on another 241 * interface. We return AR_LOOPBACK. 242 */ 243 lladdr = ncec->ncec_lladdr; 244 if (NCE_MYADDR(ncec) && hlen == ncec->ncec_ill->ill_phys_addr_length && 245 bcmp(lladdr, src_haddr, hlen) == 0) { 246 mutex_exit(&ncec->ncec_lock); 247 retv = AR_LOOPBACK; 248 goto done; 249 } 250 /* 251 * If the entry is unverified, then we've just verified that 252 * someone else already owns this address, because this is a 253 * message with the same protocol address but different 254 * hardware address. 255 */ 256 if (ncec->ncec_flags & NCE_F_UNVERIFIED) { 257 mutex_exit(&ncec->ncec_lock); 258 ncec_delete(ncec); 259 ncec_refrele(ncec); 260 *sncec = NULL; 261 retv = AR_FAILED; 262 goto done; 263 } 264 265 /* 266 * If the IP address matches ours and we're authoritative for 267 * this entry, then some other node is using our IP addr, so 268 * return AR_BOGON. Also reset the transmit count to zero so 269 * that, if we're currently in initial announcement mode, we 270 * switch back to the lazier defense mode. Knowing that 271 * there's at least one duplicate out there, we ought not 272 * blindly announce. 273 * 274 * NCE_F_AUTHORITY is set in one of two ways: 275 * 1. /sbin/arp told us so, via the "permanent" flag. 276 * 2. This is one of my addresses. 277 */ 278 if (ncec->ncec_flags & NCE_F_AUTHORITY) { 279 ncec->ncec_unsolicit_count = 0; 280 mutex_exit(&ncec->ncec_lock); 281 retv = AR_BOGON; 282 goto done; 283 } 284 285 /* 286 * No address conflict was detected, and we are getting 287 * ready to update the ncec's hwaddr. The nce MUST NOT be on an 288 * under interface, because all dynamic nce's are created on the 289 * native interface (in the non-IPMP case) or on the IPMP 290 * meta-interface (in the IPMP case) 291 */ 292 ASSERT(!IS_UNDER_IPMP(ncec->ncec_ill)); 293 294 /* 295 * update ncec with src_haddr, hlen. 296 * 297 * We are trying to resolve this ncec_addr/src_paddr and we 298 * got a REQUEST/RESPONSE from the ncec_addr/src_paddr. 299 * So the new_state is at least "STALE". If, in addition, 300 * this a solicited, unicast ARP_RESPONSE, we can transition 301 * to REACHABLE. 302 */ 303 new_state = ND_STALE; 304 ip1dbg(("got info for ncec %p from addr %x\n", 305 (void *)ncec, *src_paddr)); 306 retv = AR_MERGED; 307 if (ncec->ncec_state == ND_INCOMPLETE || 308 ncec->ncec_state == ND_INITIAL) { 309 ll_changed = B_TRUE; 310 } else { 311 ll_changed = nce_cmp_ll_addr(ncec, src_haddr, hlen); 312 if (!ll_changed) 313 new_state = ND_UNCHANGED; 314 else 315 retv = AR_CHANGED; 316 } 317 /* 318 * We don't have the equivalent of the IPv6 'S' flag indicating 319 * a solicited response, so we assume that if we are in 320 * INCOMPLETE, or got back an unchanged lladdr in PROBE state, 321 * and this is an ARP_RESPONSE, it must be a 322 * solicited response allowing us to transtion to REACHABLE. 323 */ 324 if (op == ARP_RESPONSE) { 325 switch (ncec->ncec_state) { 326 case ND_PROBE: 327 new_state = (ll_changed ? ND_STALE : ND_REACHABLE); 328 break; 329 case ND_INCOMPLETE: 330 new_state = ND_REACHABLE; 331 break; 332 } 333 } 334 /* 335 * Call nce_update() to refresh fastpath information on any 336 * dependent nce_t entries. 337 */ 338 nce_update(ncec, new_state, (ll_changed ? src_haddr : NULL)); 339 mutex_exit(&ncec->ncec_lock); 340 nce_resolv_ok(ncec); 341 done: 342 return (retv); 343 } 344 345 /* Find an entry for a particular MAC type in the arp_m_tbl. */ 346 static arp_m_t * 347 arp_m_lookup(t_uscalar_t mac_type) 348 { 349 arp_m_t *arm; 350 351 for (arm = arp_m_tbl; arm < A_END(arp_m_tbl); arm++) { 352 if (arm->arp_mac_type == mac_type) 353 return (arm); 354 } 355 return (NULL); 356 } 357 358 static uint32_t 359 arp_hw_type(t_uscalar_t mactype) 360 { 361 arp_m_t *arm; 362 363 if ((arm = arp_m_lookup(mactype)) == NULL) 364 arm = arp_m_lookup(DL_OTHER); 365 return (arm->arp_mac_arp_hw_type); 366 } 367 368 /* 369 * Called when an DLPI control message has been acked; send down the next 370 * queued message (if any). 371 * The DLPI messages of interest being bind, attach and unbind since 372 * these are the only ones sent by ARP via arp_dlpi_send. 373 */ 374 static void 375 arp_dlpi_done(arl_t *arl, ill_t *ill) 376 { 377 mblk_t *mp; 378 int err; 379 t_uscalar_t prim; 380 381 mutex_enter(&arl->arl_lock); 382 prim = arl->arl_dlpi_pending; 383 384 if ((mp = arl->arl_dlpi_deferred) == NULL) { 385 arl->arl_dlpi_pending = DL_PRIM_INVAL; 386 if (arl->arl_state_flags & ARL_LL_DOWN) 387 err = ENETDOWN; 388 else 389 err = 0; 390 mutex_exit(&arl->arl_lock); 391 392 mutex_enter(&ill->ill_lock); 393 ill->ill_arl_dlpi_pending = 0; 394 mutex_exit(&ill->ill_lock); 395 arp_cmd_done(ill, err, prim); 396 return; 397 } 398 399 arl->arl_dlpi_deferred = mp->b_next; 400 mp->b_next = NULL; 401 402 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 403 404 arl->arl_dlpi_pending = DL_PRIM(mp); 405 mutex_exit(&arl->arl_lock); 406 407 mutex_enter(&ill->ill_lock); 408 ill->ill_arl_dlpi_pending = 1; 409 mutex_exit(&ill->ill_lock); 410 411 putnext(arl->arl_wq, mp); 412 } 413 414 /* 415 * This routine is called during module initialization when the DL_INFO_ACK 416 * comes back from the device. We set up defaults for all the device dependent 417 * doo-dads we are going to need. This will leave us ready to roll if we are 418 * attempting auto-configuration. Alternatively, these defaults can be 419 * overridden by initialization procedures possessing higher intelligence. 420 * 421 * Caller will free the mp. 422 */ 423 static void 424 arp_ll_set_defaults(arl_t *arl, mblk_t *mp) 425 { 426 arp_m_t *arm; 427 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 428 429 if ((arm = arp_m_lookup(dlia->dl_mac_type)) == NULL) 430 arm = arp_m_lookup(DL_OTHER); 431 ASSERT(arm != NULL); 432 433 /* 434 * We initialize based on parameters in the (currently) not too 435 * exhaustive arp_m_tbl. 436 */ 437 if (dlia->dl_version == DL_VERSION_2) { 438 arl->arl_sap_length = dlia->dl_sap_length; 439 arl->arl_phys_addr_length = dlia->dl_brdcst_addr_length; 440 if (dlia->dl_provider_style == DL_STYLE2) 441 arl->arl_needs_attach = 1; 442 } else { 443 arl->arl_sap_length = arm->arp_mac_sap_length; 444 arl->arl_phys_addr_length = arm->arp_mac_hw_addr_length; 445 } 446 /* 447 * Note: the arp_hw_type in the arp header may be derived from 448 * the ill_mac_type and arp_m_lookup(). 449 */ 450 arl->arl_sap = ETHERTYPE_ARP; 451 arl_defaults_common(arl, mp); 452 } 453 454 static void 455 arp_wput(queue_t *q, mblk_t *mp) 456 { 457 int err = EINVAL; 458 struct iocblk *ioc; 459 mblk_t *mp1; 460 461 switch (DB_TYPE(mp)) { 462 case M_IOCTL: 463 ASSERT(q->q_next != NULL); 464 ioc = (struct iocblk *)mp->b_rptr; 465 if (ioc->ioc_cmd != SIOCSLIFNAME && 466 ioc->ioc_cmd != IF_UNITSEL) { 467 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput", 468 char *, "<some ioctl>", char *, "-", 469 arl_t *, (arl_t *)q->q_ptr); 470 putnext(q, mp); 471 return; 472 } 473 if ((mp1 = mp->b_cont) == 0) 474 err = EINVAL; 475 else if (ioc->ioc_cmd == SIOCSLIFNAME) 476 err = ip_sioctl_slifname_arp(q, mp1->b_rptr); 477 else if (ioc->ioc_cmd == IF_UNITSEL) 478 err = ip_sioctl_ifunitsel_arp(q, (int *)mp1->b_rptr); 479 if (err == 0) 480 miocack(q, mp, 0, 0); 481 else 482 miocnak(q, mp, 0, err); 483 return; 484 default: 485 DTRACE_PROBE4(arl__dlpi, char *, "arp_wput default", 486 char *, "default mblk", char *, "-", 487 arl_t *, (arl_t *)q->q_ptr); 488 putnext(q, mp); 489 return; 490 } 491 } 492 493 /* 494 * similar to ill_dlpi_pending(): verify that the received DLPI response 495 * matches the one that is pending for the arl. 496 */ 497 static boolean_t 498 arl_dlpi_pending(arl_t *arl, t_uscalar_t prim) 499 { 500 t_uscalar_t pending; 501 502 mutex_enter(&arl->arl_lock); 503 if (arl->arl_dlpi_pending == prim) { 504 mutex_exit(&arl->arl_lock); 505 return (B_TRUE); 506 } 507 508 if (arl->arl_state_flags & ARL_CONDEMNED) { 509 mutex_exit(&arl->arl_lock); 510 return (B_FALSE); 511 } 512 pending = arl->arl_dlpi_pending; 513 mutex_exit(&arl->arl_lock); 514 515 if (pending == DL_PRIM_INVAL) { 516 ip0dbg(("arl_dlpi_pending unsolicited ack for %s on %s", 517 dl_primstr(prim), arl->arl_name)); 518 } else { 519 ip0dbg(("arl_dlpi_pending ack for %s on %s expect %s", 520 dl_primstr(prim), arl->arl_name, dl_primstr(pending))); 521 } 522 return (B_FALSE); 523 } 524 525 /* DLPI messages, other than DL_UNITDATA_IND are handled here. */ 526 static void 527 arp_rput_dlpi(queue_t *q, mblk_t *mp) 528 { 529 arl_t *arl = (arl_t *)q->q_ptr; 530 union DL_primitives *dlp; 531 t_uscalar_t prim; 532 t_uscalar_t reqprim = DL_PRIM_INVAL; 533 ill_t *ill; 534 535 if ((mp->b_wptr - mp->b_rptr) < sizeof (dlp->dl_primitive)) { 536 putnext(q, mp); 537 return; 538 } 539 dlp = (union DL_primitives *)mp->b_rptr; 540 prim = dlp->dl_primitive; 541 542 /* 543 * If we received an ACK but didn't send a request for it, then it 544 * can't be part of any pending operation; discard up-front. 545 */ 546 switch (prim) { 547 case DL_ERROR_ACK: 548 /* 549 * ce is confused about how DLPI works, so we have to interpret 550 * an "error" on DL_NOTIFY_ACK (which we never could have sent) 551 * as really meaning an error on DL_NOTIFY_REQ. 552 * 553 * Note that supporting DL_NOTIFY_REQ is optional, so printing 554 * out an error message on the console isn't warranted except 555 * for debug. 556 */ 557 if (dlp->error_ack.dl_error_primitive == DL_NOTIFY_ACK || 558 dlp->error_ack.dl_error_primitive == DL_NOTIFY_REQ) { 559 reqprim = DL_NOTIFY_REQ; 560 } else { 561 reqprim = dlp->error_ack.dl_error_primitive; 562 } 563 break; 564 case DL_INFO_ACK: 565 reqprim = DL_INFO_REQ; 566 break; 567 case DL_OK_ACK: 568 reqprim = dlp->ok_ack.dl_correct_primitive; 569 break; 570 case DL_BIND_ACK: 571 reqprim = DL_BIND_REQ; 572 break; 573 default: 574 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 575 union DL_primitives *, dlp); 576 putnext(q, mp); 577 return; 578 } 579 if (reqprim == DL_PRIM_INVAL || !arl_dlpi_pending(arl, reqprim)) { 580 freemsg(mp); 581 return; 582 } 583 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi received", 584 char *, dl_primstr(prim), char *, dl_primstr(reqprim), 585 arl_t *, arl); 586 587 ASSERT(prim != DL_NOTIFY_IND); 588 589 ill = arl_to_ill(arl); 590 591 switch (reqprim) { 592 case DL_INFO_REQ: 593 /* 594 * ill has not been set up yet for this case. This is the 595 * DL_INFO_ACK for the first DL_INFO_REQ sent from 596 * arp_modopen(). There should be no other arl_dlpi_deferred 597 * messages pending. We initialize the arl here. 598 */ 599 ASSERT(!arl->arl_dlpi_style_set); 600 ASSERT(arl->arl_dlpi_pending == DL_INFO_REQ); 601 ASSERT(arl->arl_dlpi_deferred == NULL); 602 arl->arl_dlpi_pending = DL_PRIM_INVAL; 603 arp_ll_set_defaults(arl, mp); 604 freemsg(mp); 605 return; 606 case DL_UNBIND_REQ: 607 mutex_enter(&arl->arl_lock); 608 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 609 /* 610 * This is not an error, so we don't set ARL_LL_DOWN 611 */ 612 arl->arl_state_flags &= ~ARL_LL_UP; 613 arl->arl_state_flags |= ARL_LL_UNBOUND; 614 if (arl->arl_state_flags & ARL_CONDEMNED) { 615 /* 616 * if this is part of the unplumb the arl may 617 * vaporize any moment after we cv_signal the 618 * arl_cv so we reset arl_dlpi_pending here. 619 * All other cases (including replumb) will 620 * have the arl_dlpi_pending reset in 621 * arp_dlpi_done. 622 */ 623 arl->arl_dlpi_pending = DL_PRIM_INVAL; 624 } 625 cv_signal(&arl->arl_cv); 626 mutex_exit(&arl->arl_lock); 627 break; 628 } 629 if (ill != NULL) { 630 /* 631 * ill ref obtained by arl_to_ill() will be released 632 * by qwriter_ip() 633 */ 634 qwriter_ip(ill, ill->ill_wq, mp, arp_rput_dlpi_writer, 635 CUR_OP, B_TRUE); 636 return; 637 } 638 freemsg(mp); 639 } 640 641 /* 642 * Handling of DLPI messages that require exclusive access to the ipsq. 643 */ 644 /* ARGSUSED */ 645 static void 646 arp_rput_dlpi_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *dummy_arg) 647 { 648 union DL_primitives *dlp = (union DL_primitives *)mp->b_rptr; 649 ill_t *ill = (ill_t *)q->q_ptr; 650 arl_t *arl = ill_to_arl(ill); 651 652 if (arl == NULL) { 653 /* 654 * happens as a result arp_modclose triggering unbind. 655 * arp_rput_dlpi will cv_signal the arl_cv and the modclose 656 * will complete, but when it does ipsq_exit, the waiting 657 * qwriter_ip gets into the ipsq but will find the arl null. 658 * There should be no deferred messages in this case, so 659 * just complete and exit. 660 */ 661 arp_cmd_done(ill, 0, DL_UNBIND_REQ); 662 freemsg(mp); 663 return; 664 } 665 switch (dlp->dl_primitive) { 666 case DL_ERROR_ACK: 667 switch (dlp->error_ack.dl_error_primitive) { 668 case DL_UNBIND_REQ: 669 mutex_enter(&arl->arl_lock); 670 arl->arl_state_flags &= ~ARL_DL_UNBIND_IN_PROGRESS; 671 arl->arl_state_flags &= ~ARL_LL_UP; 672 arl->arl_state_flags |= ARL_LL_UNBOUND; 673 arl->arl_state_flags |= ARL_LL_DOWN; 674 cv_signal(&arl->arl_cv); 675 mutex_exit(&arl->arl_lock); 676 break; 677 case DL_BIND_REQ: 678 mutex_enter(&arl->arl_lock); 679 arl->arl_state_flags &= ~ARL_LL_UP; 680 arl->arl_state_flags |= ARL_LL_DOWN; 681 arl->arl_state_flags |= ARL_LL_UNBOUND; 682 cv_signal(&arl->arl_cv); 683 mutex_exit(&arl->arl_lock); 684 break; 685 case DL_ATTACH_REQ: 686 break; 687 default: 688 /* If it's anything else, we didn't send it. */ 689 arl_refrele(arl); 690 putnext(q, mp); 691 return; 692 } 693 break; 694 case DL_OK_ACK: 695 DTRACE_PROBE4(arl__dlpi, char *, "arp_rput_dlpi_writer ok", 696 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 697 char *, dl_primstr(dlp->ok_ack.dl_correct_primitive), 698 arl_t *, arl); 699 mutex_enter(&arl->arl_lock); 700 switch (dlp->ok_ack.dl_correct_primitive) { 701 case DL_UNBIND_REQ: 702 case DL_ATTACH_REQ: 703 break; 704 default: 705 ip0dbg(("Dropping unrecognized DL_OK_ACK for %s", 706 dl_primstr(dlp->ok_ack.dl_correct_primitive))); 707 mutex_exit(&arl->arl_lock); 708 arl_refrele(arl); 709 freemsg(mp); 710 return; 711 } 712 mutex_exit(&arl->arl_lock); 713 break; 714 case DL_BIND_ACK: 715 DTRACE_PROBE2(rput_dl_bind, arl_t *, arl, 716 dl_bind_ack_t *, &dlp->bind_ack); 717 718 mutex_enter(&arl->arl_lock); 719 ASSERT(arl->arl_state_flags & ARL_LL_BIND_PENDING); 720 arl->arl_state_flags &= 721 ~(ARL_LL_BIND_PENDING|ARL_LL_DOWN|ARL_LL_UNBOUND); 722 arl->arl_state_flags |= ARL_LL_UP; 723 mutex_exit(&arl->arl_lock); 724 break; 725 case DL_UDERROR_IND: 726 DTRACE_PROBE2(rput_dl_uderror, arl_t *, arl, 727 dl_uderror_ind_t *, &dlp->uderror_ind); 728 arl_refrele(arl); 729 putnext(q, mp); 730 return; 731 default: 732 DTRACE_PROBE2(rput_dl_badprim, arl_t *, arl, 733 union DL_primitives *, dlp); 734 arl_refrele(arl); 735 putnext(q, mp); 736 return; 737 } 738 arp_dlpi_done(arl, ill); 739 arl_refrele(arl); 740 freemsg(mp); 741 } 742 743 void 744 arp_rput(queue_t *q, mblk_t *mp) 745 { 746 arl_t *arl = q->q_ptr; 747 boolean_t need_refrele = B_FALSE; 748 749 mutex_enter(&arl->arl_lock); 750 if (((arl->arl_state_flags & 751 (ARL_CONDEMNED | ARL_LL_REPLUMBING)) != 0)) { 752 /* 753 * Only allow high priority DLPI messages during unplumb or 754 * replumb, and we don't take an arl_refcnt for that case. 755 */ 756 if (DB_TYPE(mp) != M_PCPROTO) { 757 mutex_exit(&arl->arl_lock); 758 freemsg(mp); 759 return; 760 } 761 } else { 762 arl_refhold_locked(arl); 763 need_refrele = B_TRUE; 764 } 765 mutex_exit(&arl->arl_lock); 766 767 switch (DB_TYPE(mp)) { 768 case M_PCPROTO: 769 case M_PROTO: { 770 ill_t *ill; 771 772 /* 773 * could be one of 774 * (i) real message from the wire, (DLPI_DATA) 775 * (ii) DLPI message 776 * Take a ref on the ill associated with this arl to 777 * prevent the ill from being unplumbed until this thread 778 * is done. 779 */ 780 if (IS_DLPI_DATA(mp)) { 781 ill = arl_to_ill(arl); 782 if (ill == NULL) { 783 arp_drop_packet("No ill", mp, ill); 784 break; 785 } 786 arp_process_packet(ill, mp); 787 ill_refrele(ill); 788 break; 789 } 790 /* Miscellaneous DLPI messages get shuffled off. */ 791 arp_rput_dlpi(q, mp); 792 break; 793 } 794 case M_ERROR: 795 case M_HANGUP: 796 if (mp->b_rptr < mp->b_wptr) 797 arl->arl_error = (int)(*mp->b_rptr & 0xFF); 798 if (arl->arl_error == 0) 799 arl->arl_error = ENXIO; 800 freemsg(mp); 801 break; 802 default: 803 ip1dbg(("arp_rput other db type %x\n", DB_TYPE(mp))); 804 putnext(q, mp); 805 break; 806 } 807 if (need_refrele) 808 arl_refrele(arl); 809 } 810 811 static void 812 arp_process_packet(ill_t *ill, mblk_t *mp) 813 { 814 mblk_t *mp1; 815 arh_t *arh; 816 in_addr_t src_paddr, dst_paddr; 817 uint32_t hlen, plen; 818 boolean_t is_probe; 819 int op; 820 ncec_t *dst_ncec, *src_ncec = NULL; 821 uchar_t *src_haddr, *arhp, *dst_haddr, *dp, *sp; 822 int err; 823 ip_stack_t *ipst; 824 boolean_t need_ill_refrele = B_FALSE; 825 nce_t *nce; 826 uchar_t *src_lladdr; 827 dl_unitdata_ind_t *dlui; 828 ip_recv_attr_t iras; 829 830 ASSERT(ill != NULL); 831 if (ill->ill_flags & ILLF_NOARP) { 832 arp_drop_packet("Interface does not support ARP", mp, ill); 833 return; 834 } 835 ipst = ill->ill_ipst; 836 /* 837 * What we should have at this point is a DL_UNITDATA_IND message 838 * followed by an ARP packet. We do some initial checks and then 839 * get to work. 840 */ 841 dlui = (dl_unitdata_ind_t *)mp->b_rptr; 842 if (dlui->dl_group_address == 1) { 843 /* 844 * multicast or broadcast packet. Only accept on the ipmp 845 * nominated interface for multicasts ('cast_ill'). 846 * If we have no cast_ill we are liberal and accept everything. 847 */ 848 if (IS_UNDER_IPMP(ill)) { 849 /* For an under ill_grp can change under lock */ 850 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 851 if (!ill->ill_nom_cast && ill->ill_grp != NULL && 852 ill->ill_grp->ig_cast_ill != NULL) { 853 rw_exit(&ipst->ips_ill_g_lock); 854 arp_drop_packet("Interface is not nominated " 855 "for multicast sends and receives", 856 mp, ill); 857 return; 858 } 859 rw_exit(&ipst->ips_ill_g_lock); 860 } 861 } 862 mp1 = mp->b_cont; 863 if (mp1 == NULL) { 864 arp_drop_packet("Missing ARP packet", mp, ill); 865 return; 866 } 867 if (mp1->b_cont != NULL) { 868 /* No fooling around with funny messages. */ 869 if (!pullupmsg(mp1, -1)) { 870 arp_drop_packet("Funny message: pullup failed", 871 mp, ill); 872 return; 873 } 874 } 875 arh = (arh_t *)mp1->b_rptr; 876 hlen = arh->arh_hlen; 877 plen = arh->arh_plen; 878 if (MBLKL(mp1) < ARH_FIXED_LEN + 2 * hlen + 2 * plen) { 879 arp_drop_packet("mblk len too small", mp, ill); 880 return; 881 } 882 /* 883 * hlen 0 is used for RFC 1868 UnARP. 884 * 885 * Note that the rest of the code checks that hlen is what we expect 886 * for this hardware address type, so might as well discard packets 887 * here that don't match. 888 */ 889 if ((hlen > 0 && hlen != ill->ill_phys_addr_length) || plen == 0) { 890 DTRACE_PROBE2(rput_bogus, ill_t *, ill, mblk_t *, mp1); 891 arp_drop_packet("Bogus hlen or plen", mp, ill); 892 return; 893 } 894 /* 895 * Historically, Solaris has been lenient about hardware type numbers. 896 * We should check here, but don't. 897 */ 898 DTRACE_PROBE3(arp__physical__in__start, ill_t *, ill, arh_t *, arh, 899 mblk_t *, mp); 900 /* 901 * If ill is in an ipmp group, it will be the under ill. If we want 902 * to report the packet as coming up the IPMP interface, we should 903 * convert it to the ipmp ill. 904 */ 905 ARP_HOOK_IN(ipst->ips_arp_physical_in_event, ipst->ips_arp_physical_in, 906 ill->ill_phyint->phyint_ifindex, arh, mp, mp1, ipst); 907 DTRACE_PROBE1(arp__physical__in__end, mblk_t *, mp); 908 if (mp == NULL) 909 return; 910 arhp = (uchar_t *)arh + ARH_FIXED_LEN; 911 src_haddr = arhp; /* ar$sha */ 912 arhp += hlen; 913 bcopy(arhp, &src_paddr, IP_ADDR_LEN); /* ar$spa */ 914 sp = arhp; 915 arhp += IP_ADDR_LEN; 916 dst_haddr = arhp; /* ar$dha */ 917 arhp += hlen; 918 bcopy(arhp, &dst_paddr, IP_ADDR_LEN); /* ar$tpa */ 919 dp = arhp; 920 op = BE16_TO_U16(arh->arh_operation); 921 922 DTRACE_PROBE2(ip__arp__input, (in_addr_t), src_paddr, 923 (in_addr_t), dst_paddr); 924 925 /* Determine if this is just a probe */ 926 is_probe = (src_paddr == INADDR_ANY); 927 928 /* 929 * The following test for loopback is faster than 930 * IP_LOOPBACK_ADDR(), because it avoids any bitwise 931 * operations. 932 * Note that these addresses are always in network byte order 933 */ 934 if ((*(uint8_t *)&src_paddr) == IN_LOOPBACKNET || 935 (*(uint8_t *)&dst_paddr) == IN_LOOPBACKNET || 936 CLASSD(src_paddr) || CLASSD(dst_paddr)) { 937 arp_drop_packet("Martian IP addr", mp, ill); 938 return; 939 } 940 941 /* 942 * ira_ill is the only field used down the arp_notify path. 943 */ 944 bzero(&iras, sizeof (iras)); 945 iras.ira_ill = iras.ira_rill = ill; 946 /* 947 * RFC 826: first check if the <protocol, sender protocol address> is 948 * in the cache, if there is a sender protocol address. Note that this 949 * step also handles resolutions based on source. 950 */ 951 /* Note: after here we need to freeb(mp) and freemsg(mp1) separately */ 952 mp->b_cont = NULL; 953 if (is_probe) { 954 err = AR_NOTFOUND; 955 } else { 956 if (plen != 4) { 957 arp_drop_packet("bad protocol len", mp, ill); 958 return; 959 } 960 err = ip_nce_resolve_all(ill, src_haddr, hlen, &src_paddr, 961 &src_ncec, op); 962 switch (err) { 963 case AR_BOGON: 964 ASSERT(src_ncec != NULL); 965 arp_notify(src_paddr, mp1, AR_CN_BOGON, 966 &iras, src_ncec); 967 break; 968 case AR_FAILED: 969 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 970 src_ncec); 971 break; 972 case AR_LOOPBACK: 973 DTRACE_PROBE2(rput_loopback, ill_t *, ill, arh_t *, 974 arh); 975 freemsg(mp1); 976 break; 977 default: 978 goto update; 979 } 980 freemsg(mp); 981 if (src_ncec != NULL) 982 ncec_refrele(src_ncec); 983 return; 984 } 985 update: 986 /* 987 * Now look up the destination address. By RFC 826, we ignore the 988 * packet at this step if the target isn't one of our addresses (i.e., 989 * one we have been asked to PUBLISH). This is true even if the 990 * target is something we're trying to resolve and the packet 991 * is a response. 992 */ 993 dst_ncec = ncec_lookup_illgrp_v4(ill, &dst_paddr); 994 if (dst_ncec == NULL || !NCE_PUBLISH(dst_ncec)) { 995 /* 996 * Let the client know if the source mapping has changed, even 997 * if the destination provides no useful information for the 998 * client. 999 */ 1000 if (err == AR_CHANGED) { 1001 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 1002 NULL); 1003 freemsg(mp); 1004 } else { 1005 freemsg(mp); 1006 arp_drop_packet("Target is not interesting", mp1, ill); 1007 } 1008 if (dst_ncec != NULL) 1009 ncec_refrele(dst_ncec); 1010 if (src_ncec != NULL) 1011 ncec_refrele(src_ncec); 1012 return; 1013 } 1014 1015 if (dst_ncec->ncec_flags & NCE_F_UNVERIFIED) { 1016 /* 1017 * Check for a reflection. Some misbehaving bridges will 1018 * reflect our own transmitted packets back to us. 1019 */ 1020 ASSERT(NCE_PUBLISH(dst_ncec)); 1021 if (hlen != dst_ncec->ncec_ill->ill_phys_addr_length) { 1022 ncec_refrele(dst_ncec); 1023 if (src_ncec != NULL) 1024 ncec_refrele(src_ncec); 1025 freemsg(mp); 1026 arp_drop_packet("bad arh_len", mp1, ill); 1027 return; 1028 } 1029 if (!nce_cmp_ll_addr(dst_ncec, src_haddr, hlen)) { 1030 DTRACE_PROBE3(rput_probe_reflected, ill_t *, ill, 1031 arh_t *, arh, ncec_t *, dst_ncec); 1032 ncec_refrele(dst_ncec); 1033 if (src_ncec != NULL) 1034 ncec_refrele(src_ncec); 1035 freemsg(mp); 1036 arp_drop_packet("Reflected probe", mp1, ill); 1037 return; 1038 } 1039 /* 1040 * Responses targeting our HW address that are not responses to 1041 * our DAD probe must be ignored as they are related to requests 1042 * sent before DAD was restarted. 1043 */ 1044 if (op == ARP_RESPONSE && 1045 (nce_cmp_ll_addr(dst_ncec, dst_haddr, hlen) == 0)) { 1046 ncec_refrele(dst_ncec); 1047 if (src_ncec != NULL) 1048 ncec_refrele(src_ncec); 1049 freemsg(mp); 1050 arp_drop_packet( 1051 "Response to request that was sent before DAD", 1052 mp1, ill); 1053 return; 1054 } 1055 /* 1056 * Responses targeted to HW addresses which are not ours but 1057 * sent to our unverified proto address are also conflicts. 1058 * These may be reported by a proxy rather than the interface 1059 * with the conflicting address, dst_paddr is in conflict 1060 * rather than src_paddr. To ensure IP can locate the correct 1061 * ipif to take down, it is necessary to copy dst_paddr to 1062 * the src_paddr field before sending it to IP. The same is 1063 * required for probes, where src_paddr will be INADDR_ANY. 1064 */ 1065 if (is_probe || op == ARP_RESPONSE) { 1066 bcopy(dp, sp, plen); 1067 arp_notify(src_paddr, mp1, AR_CN_FAILED, &iras, 1068 NULL); 1069 ncec_delete(dst_ncec); 1070 } else if (err == AR_CHANGED) { 1071 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, 1072 NULL); 1073 } else { 1074 DTRACE_PROBE3(rput_request_unverified, 1075 ill_t *, ill, arh_t *, arh, ncec_t *, dst_ncec); 1076 arp_drop_packet("Unverified request", mp1, ill); 1077 } 1078 freemsg(mp); 1079 ncec_refrele(dst_ncec); 1080 if (src_ncec != NULL) 1081 ncec_refrele(src_ncec); 1082 return; 1083 } 1084 /* 1085 * If it's a request, then we reply to this, and if we think the 1086 * sender's unknown, then we create an entry to avoid unnecessary ARPs. 1087 * The design assumption is that someone ARPing us is likely to send us 1088 * a packet soon, and that we'll want to reply to it. 1089 */ 1090 if (op == ARP_REQUEST) { 1091 const uchar_t *nce_hwaddr; 1092 struct in_addr nce_paddr; 1093 clock_t now; 1094 ill_t *under_ill = ill; 1095 boolean_t send_unicast = B_TRUE; 1096 1097 ASSERT(NCE_PUBLISH(dst_ncec)); 1098 1099 if ((dst_ncec->ncec_flags & (NCE_F_BCAST|NCE_F_MCAST)) != 0) { 1100 /* 1101 * Ignore senders who are deliberately or accidentally 1102 * confused. 1103 */ 1104 goto bail; 1105 } 1106 1107 if (!is_probe && err == AR_NOTFOUND) { 1108 ASSERT(src_ncec == NULL); 1109 1110 if (IS_UNDER_IPMP(under_ill)) { 1111 /* 1112 * create the ncec for the sender on ipmp_ill. 1113 * We pass in the ipmp_ill itself to avoid 1114 * creating an nce_t on the under_ill. 1115 */ 1116 ill = ipmp_ill_hold_ipmp_ill(under_ill); 1117 if (ill == NULL) 1118 ill = under_ill; 1119 else 1120 need_ill_refrele = B_TRUE; 1121 } 1122 1123 err = nce_lookup_then_add_v4(ill, src_haddr, hlen, 1124 &src_paddr, 0, ND_STALE, &nce); 1125 1126 switch (err) { 1127 case 0: 1128 case EEXIST: 1129 ip1dbg(("added ncec %p in state %d ill %s\n", 1130 (void *)src_ncec, src_ncec->ncec_state, 1131 ill->ill_name)); 1132 src_ncec = nce->nce_common; 1133 break; 1134 default: 1135 /* 1136 * Either no memory, or the outgoing interface 1137 * is in the process of down/unplumb. In the 1138 * latter case, we will fail the send anyway, 1139 * and in the former case, we should try to send 1140 * the ARP response. 1141 */ 1142 src_lladdr = src_haddr; 1143 goto send_response; 1144 } 1145 ncec_refhold(src_ncec); 1146 nce_refrele(nce); 1147 /* set up cleanup interval on ncec */ 1148 } 1149 1150 /* 1151 * This implements periodic address defense based on a modified 1152 * version of the RFC 3927 requirements. Instead of sending a 1153 * broadcasted reply every time, as demanded by the RFC, we 1154 * send at most one broadcast reply per arp_broadcast_interval. 1155 */ 1156 now = ddi_get_lbolt(); 1157 if ((now - dst_ncec->ncec_last_time_defended) > 1158 MSEC_TO_TICK(ipst->ips_ipv4_dad_announce_interval)) { 1159 dst_ncec->ncec_last_time_defended = now; 1160 /* 1161 * If this is one of the long-suffering entries, 1162 * pull it out now. It no longer needs separate 1163 * defense, because we're now doing that with this 1164 * broadcasted reply. 1165 */ 1166 dst_ncec->ncec_flags &= ~NCE_F_DELAYED; 1167 send_unicast = B_FALSE; 1168 } 1169 if (src_ncec != NULL && send_unicast) { 1170 src_lladdr = src_ncec->ncec_lladdr; 1171 } else { 1172 src_lladdr = under_ill->ill_bcast_mp->b_rptr + 1173 NCE_LL_ADDR_OFFSET(under_ill); 1174 } 1175 send_response: 1176 nce_hwaddr = dst_ncec->ncec_lladdr; 1177 IN6_V4MAPPED_TO_INADDR(&dst_ncec->ncec_addr, &nce_paddr); 1178 1179 (void) arp_output(under_ill, ARP_RESPONSE, 1180 nce_hwaddr, (uchar_t *)&nce_paddr, src_haddr, 1181 (uchar_t *)&src_paddr, src_lladdr); 1182 } 1183 bail: 1184 if (dst_ncec != NULL) { 1185 ncec_refrele(dst_ncec); 1186 } 1187 if (src_ncec != NULL) { 1188 ncec_refrele(src_ncec); 1189 } 1190 if (err == AR_CHANGED) { 1191 mp->b_cont = NULL; 1192 arp_notify(src_paddr, mp1, AR_CN_ANNOUNCE, &iras, NULL); 1193 mp1 = NULL; 1194 } 1195 if (need_ill_refrele) 1196 ill_refrele(ill); 1197 done: 1198 freemsg(mp); 1199 freemsg(mp1); 1200 } 1201 1202 /* 1203 * Basic initialization of the arl_t and the arl_common structure shared with 1204 * the ill_t that is done after SLIFNAME/IF_UNITSEL. 1205 */ 1206 static int 1207 arl_ill_init(arl_t *arl, char *ill_name) 1208 { 1209 ill_t *ill; 1210 arl_ill_common_t *ai; 1211 1212 ill = ill_lookup_on_name(ill_name, B_FALSE, B_FALSE, B_FALSE, 1213 arl->arl_ipst); 1214 1215 if (ill == NULL) 1216 return (ENXIO); 1217 1218 /* 1219 * By the time we set up the arl, we expect the ETHERTYPE_IP 1220 * stream to be fully bound and attached. So we copy/verify 1221 * relevant information as possible from/against the ill. 1222 * 1223 * The following should have been set up in arp_ll_set_defaults() 1224 * after the first DL_INFO_ACK was received. 1225 */ 1226 ASSERT(arl->arl_phys_addr_length == ill->ill_phys_addr_length); 1227 ASSERT(arl->arl_sap == ETHERTYPE_ARP); 1228 ASSERT(arl->arl_mactype == ill->ill_mactype); 1229 ASSERT(arl->arl_sap_length == ill->ill_sap_length); 1230 1231 ai = kmem_zalloc(sizeof (*ai), KM_SLEEP); 1232 mutex_enter(&ill->ill_lock); 1233 /* First ensure that the ill is not CONDEMNED. */ 1234 if (ill->ill_state_flags & ILL_CONDEMNED) { 1235 mutex_exit(&ill->ill_lock); 1236 ill_refrele(ill); 1237 kmem_free(ai, sizeof (*ai)); 1238 return (ENXIO); 1239 } 1240 if (ill->ill_common != NULL || arl->arl_common != NULL) { 1241 mutex_exit(&ill->ill_lock); 1242 ip0dbg(("%s: PPA already exists", ill->ill_name)); 1243 ill_refrele(ill); 1244 kmem_free(ai, sizeof (*ai)); 1245 return (EEXIST); 1246 } 1247 mutex_init(&ai->ai_lock, NULL, MUTEX_DEFAULT, NULL); 1248 ai->ai_arl = arl; 1249 ai->ai_ill = ill; 1250 ill->ill_common = ai; 1251 arl->arl_common = ai; 1252 mutex_exit(&ill->ill_lock); 1253 (void) strlcpy(arl->arl_name, ill->ill_name, LIFNAMSIZ); 1254 arl->arl_name_length = ill->ill_name_length; 1255 ill_refrele(ill); 1256 arp_ifname_notify(arl); 1257 return (0); 1258 } 1259 1260 /* Allocate and do common initializations for DLPI messages. */ 1261 static mblk_t * 1262 ip_ar_dlpi_comm(t_uscalar_t prim, size_t size) 1263 { 1264 mblk_t *mp; 1265 1266 if ((mp = allocb(size, BPRI_HI)) == NULL) 1267 return (NULL); 1268 1269 /* 1270 * DLPIv2 says that DL_INFO_REQ and DL_TOKEN_REQ (the latter 1271 * of which we don't seem to use) are sent with M_PCPROTO, and 1272 * that other DLPI are M_PROTO. 1273 */ 1274 DB_TYPE(mp) = (prim == DL_INFO_REQ) ? M_PCPROTO : M_PROTO; 1275 1276 mp->b_wptr = mp->b_rptr + size; 1277 bzero(mp->b_rptr, size); 1278 DL_PRIM(mp) = prim; 1279 return (mp); 1280 } 1281 1282 1283 int 1284 ip_sioctl_ifunitsel_arp(queue_t *q, int *ppa) 1285 { 1286 arl_t *arl; 1287 char *cp, ill_name[LIFNAMSIZ]; 1288 1289 if (q->q_next == NULL) 1290 return (EINVAL); 1291 1292 do { 1293 q = q->q_next; 1294 } while (q->q_next != NULL); 1295 cp = q->q_qinfo->qi_minfo->mi_idname; 1296 1297 arl = (arl_t *)q->q_ptr; 1298 (void) snprintf(ill_name, sizeof (ill_name), "%s%d", cp, *ppa); 1299 arl->arl_ppa = *ppa; 1300 return (arl_ill_init(arl, ill_name)); 1301 } 1302 1303 int 1304 ip_sioctl_slifname_arp(queue_t *q, void *lifreq) 1305 { 1306 arl_t *arl; 1307 struct lifreq *lifr = lifreq; 1308 1309 /* ioctl not valid when IP opened as a device */ 1310 if (q->q_next == NULL) 1311 return (EINVAL); 1312 1313 arl = (arl_t *)q->q_ptr; 1314 arl->arl_ppa = lifr->lifr_ppa; 1315 return (arl_ill_init(arl, lifr->lifr_name)); 1316 } 1317 1318 arl_t * 1319 ill_to_arl(ill_t *ill) 1320 { 1321 arl_ill_common_t *ai = ill->ill_common; 1322 arl_t *arl = NULL; 1323 1324 if (ai == NULL) 1325 return (NULL); 1326 /* 1327 * Find the arl_t that corresponds to this ill_t from the shared 1328 * ill_common structure. We can safely access the ai here as it 1329 * will only be freed in arp_modclose() after we have become 1330 * single-threaded. 1331 */ 1332 mutex_enter(&ai->ai_lock); 1333 if ((arl = ai->ai_arl) != NULL) { 1334 mutex_enter(&arl->arl_lock); 1335 if (!(arl->arl_state_flags & ARL_CONDEMNED)) { 1336 arl_refhold_locked(arl); 1337 mutex_exit(&arl->arl_lock); 1338 } else { 1339 mutex_exit(&arl->arl_lock); 1340 arl = NULL; 1341 } 1342 } 1343 mutex_exit(&ai->ai_lock); 1344 return (arl); 1345 } 1346 1347 ill_t * 1348 arl_to_ill(arl_t *arl) 1349 { 1350 arl_ill_common_t *ai = arl->arl_common; 1351 ill_t *ill = NULL; 1352 1353 if (ai == NULL) { 1354 /* 1355 * happens when the arp stream is just being opened, and 1356 * arl_ill_init has not been executed yet. 1357 */ 1358 return (NULL); 1359 } 1360 /* 1361 * Find the ill_t that corresponds to this arl_t from the shared 1362 * arl_common structure. We can safely access the ai here as it 1363 * will only be freed in arp_modclose() after we have become 1364 * single-threaded. 1365 */ 1366 mutex_enter(&ai->ai_lock); 1367 if ((ill = ai->ai_ill) != NULL) { 1368 mutex_enter(&ill->ill_lock); 1369 if (!ILL_IS_CONDEMNED(ill)) { 1370 ill_refhold_locked(ill); 1371 mutex_exit(&ill->ill_lock); 1372 } else { 1373 mutex_exit(&ill->ill_lock); 1374 ill = NULL; 1375 } 1376 } 1377 mutex_exit(&ai->ai_lock); 1378 return (ill); 1379 } 1380 1381 int 1382 arp_ll_up(ill_t *ill) 1383 { 1384 mblk_t *attach_mp = NULL; 1385 mblk_t *bind_mp = NULL; 1386 mblk_t *unbind_mp = NULL; 1387 arl_t *arl; 1388 1389 ASSERT(IAM_WRITER_ILL(ill)); 1390 arl = ill_to_arl(ill); 1391 1392 DTRACE_PROBE2(ill__downup, char *, "arp_ll_up", ill_t *, ill); 1393 if (arl == NULL) 1394 return (ENXIO); 1395 DTRACE_PROBE2(arl__downup, char *, "arp_ll_up", arl_t *, arl); 1396 if ((arl->arl_state_flags & ARL_LL_UP) != 0) { 1397 arl_refrele(arl); 1398 return (0); 1399 } 1400 if (arl->arl_needs_attach) { /* DL_STYLE2 */ 1401 attach_mp = 1402 ip_ar_dlpi_comm(DL_ATTACH_REQ, sizeof (dl_attach_req_t)); 1403 if (attach_mp == NULL) 1404 goto bad; 1405 ((dl_attach_req_t *)attach_mp->b_rptr)->dl_ppa = arl->arl_ppa; 1406 } 1407 1408 /* Allocate and initialize a bind message. */ 1409 bind_mp = ip_ar_dlpi_comm(DL_BIND_REQ, sizeof (dl_bind_req_t)); 1410 if (bind_mp == NULL) 1411 goto bad; 1412 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_sap = ETHERTYPE_ARP; 1413 ((dl_bind_req_t *)bind_mp->b_rptr)->dl_service_mode = DL_CLDLS; 1414 1415 unbind_mp = ip_ar_dlpi_comm(DL_UNBIND_REQ, sizeof (dl_unbind_req_t)); 1416 if (unbind_mp == NULL) 1417 goto bad; 1418 if (arl->arl_needs_attach) { 1419 arp_dlpi_send(arl, attach_mp); 1420 } 1421 arl->arl_unbind_mp = unbind_mp; 1422 1423 arl->arl_state_flags |= ARL_LL_BIND_PENDING; 1424 arp_dlpi_send(arl, bind_mp); 1425 arl_refrele(arl); 1426 return (EINPROGRESS); 1427 1428 bad: 1429 freemsg(attach_mp); 1430 freemsg(bind_mp); 1431 freemsg(unbind_mp); 1432 arl_refrele(arl); 1433 return (ENOMEM); 1434 } 1435 1436 /* 1437 * consumes/frees mp 1438 */ 1439 static void 1440 arp_notify(in_addr_t src, mblk_t *mp, uint32_t arcn_code, 1441 ip_recv_attr_t *ira, ncec_t *ncec) 1442 { 1443 char hbuf[MAC_STR_LEN]; 1444 char sbuf[INET_ADDRSTRLEN]; 1445 ill_t *ill = ira->ira_ill; 1446 ip_stack_t *ipst = ill->ill_ipst; 1447 arh_t *arh = (arh_t *)mp->b_rptr; 1448 1449 switch (arcn_code) { 1450 case AR_CN_BOGON: 1451 /* 1452 * Someone is sending ARP packets with a source protocol 1453 * address that we have published and for which we believe our 1454 * entry is authoritative and verified to be unique on 1455 * the network. 1456 * 1457 * arp_process_packet() sends AR_CN_FAILED for the case when 1458 * a DAD probe is received and the hardware address of a 1459 * non-authoritative entry has changed. Thus, AR_CN_BOGON 1460 * indicates a real conflict, and we have to do resolution. 1461 * 1462 * We back away quickly from the address if it's from DHCP or 1463 * otherwise temporary and hasn't been used recently (or at 1464 * all). We'd like to include "deprecated" addresses here as 1465 * well (as there's no real reason to defend something we're 1466 * discarding), but IPMP "reuses" this flag to mean something 1467 * other than the standard meaning. 1468 */ 1469 if (ip_nce_conflict(mp, ira, ncec)) { 1470 (void) mac_colon_addr((uint8_t *)(arh + 1), 1471 arh->arh_hlen, hbuf, sizeof (hbuf)); 1472 (void) ip_dot_addr(src, sbuf); 1473 cmn_err(CE_WARN, 1474 "proxy ARP problem? Node '%s' is using %s on %s", 1475 hbuf, sbuf, ill->ill_name); 1476 if (!arp_no_defense) 1477 (void) arp_announce(ncec); 1478 /* 1479 * ncec_last_time_defended has been adjusted in 1480 * ip_nce_conflict. 1481 */ 1482 } else { 1483 ncec_delete(ncec); 1484 } 1485 freemsg(mp); 1486 break; 1487 case AR_CN_ANNOUNCE: { 1488 nce_hw_map_t hwm; 1489 /* 1490 * ARP gives us a copy of any packet where it thinks 1491 * the address has changed, so that we can update our 1492 * caches. We're responsible for caching known answers 1493 * in the current design. We check whether the 1494 * hardware address really has changed in all of our 1495 * entries that have cached this mapping, and if so, we 1496 * blow them away. This way we will immediately pick 1497 * up the rare case of a host changing hardware 1498 * address. 1499 */ 1500 if (src == 0) { 1501 freemsg(mp); 1502 break; 1503 } 1504 hwm.hwm_addr = src; 1505 hwm.hwm_hwlen = arh->arh_hlen; 1506 hwm.hwm_hwaddr = (uchar_t *)(arh + 1); 1507 hwm.hwm_flags = 0; 1508 ncec_walk_common(ipst->ips_ndp4, NULL, 1509 (pfi_t)nce_update_hw_changed, &hwm, B_TRUE); 1510 freemsg(mp); 1511 break; 1512 } 1513 case AR_CN_FAILED: 1514 if (arp_no_defense) { 1515 (void) mac_colon_addr((uint8_t *)(arh + 1), 1516 arh->arh_hlen, hbuf, sizeof (hbuf)); 1517 (void) ip_dot_addr(src, sbuf); 1518 1519 cmn_err(CE_WARN, 1520 "node %s is using our IP address %s on %s", 1521 hbuf, sbuf, ill->ill_name); 1522 freemsg(mp); 1523 break; 1524 } 1525 /* 1526 * mp will be freed by arp_excl. 1527 */ 1528 ill_refhold(ill); 1529 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 1530 return; 1531 default: 1532 ASSERT(0); 1533 freemsg(mp); 1534 break; 1535 } 1536 } 1537 1538 /* 1539 * arp_output is called to transmit an ARP Request or Response. The mapping 1540 * to RFC 826 variables is: 1541 * haddr1 == ar$sha 1542 * paddr1 == ar$spa 1543 * haddr2 == ar$tha 1544 * paddr2 == ar$tpa 1545 * The ARP frame is sent to the ether_dst in dst_lladdr. 1546 */ 1547 static int 1548 arp_output(ill_t *ill, uint32_t operation, 1549 const uchar_t *haddr1, const uchar_t *paddr1, const uchar_t *haddr2, 1550 const uchar_t *paddr2, uchar_t *dst_lladdr) 1551 { 1552 arh_t *arh; 1553 uint8_t *cp; 1554 uint_t hlen; 1555 uint32_t plen = IPV4_ADDR_LEN; /* ar$pln from RFC 826 */ 1556 uint32_t proto = IP_ARP_PROTO_TYPE; 1557 mblk_t *mp; 1558 arl_t *arl; 1559 1560 ASSERT(dst_lladdr != NULL); 1561 hlen = ill->ill_phys_addr_length; /* ar$hln from RFC 826 */ 1562 mp = ill_dlur_gen(dst_lladdr, hlen, ETHERTYPE_ARP, ill->ill_sap_length); 1563 1564 if (mp == NULL) 1565 return (ENOMEM); 1566 1567 /* IFF_NOARP flag is set or link down: do not send arp messages */ 1568 if ((ill->ill_flags & ILLF_NOARP) || !ill->ill_dl_up) { 1569 freemsg(mp); 1570 return (ENXIO); 1571 } 1572 1573 mp->b_cont = allocb(AR_LL_HDR_SLACK + ARH_FIXED_LEN + (hlen * 4) + 1574 plen + plen, BPRI_MED); 1575 if (mp->b_cont == NULL) { 1576 freeb(mp); 1577 return (ENOMEM); 1578 } 1579 1580 /* Fill in the ARP header. */ 1581 cp = mp->b_cont->b_rptr + (AR_LL_HDR_SLACK + hlen + hlen); 1582 mp->b_cont->b_rptr = cp; 1583 arh = (arh_t *)cp; 1584 U16_TO_BE16(arp_hw_type(ill->ill_mactype), arh->arh_hardware); 1585 U16_TO_BE16(proto, arh->arh_proto); 1586 arh->arh_hlen = (uint8_t)hlen; 1587 arh->arh_plen = (uint8_t)plen; 1588 U16_TO_BE16(operation, arh->arh_operation); 1589 cp += ARH_FIXED_LEN; 1590 bcopy(haddr1, cp, hlen); 1591 cp += hlen; 1592 if (paddr1 == NULL) 1593 bzero(cp, plen); 1594 else 1595 bcopy(paddr1, cp, plen); 1596 cp += plen; 1597 if (haddr2 == NULL) 1598 bzero(cp, hlen); 1599 else 1600 bcopy(haddr2, cp, hlen); 1601 cp += hlen; 1602 bcopy(paddr2, cp, plen); 1603 cp += plen; 1604 mp->b_cont->b_wptr = cp; 1605 1606 DTRACE_PROBE3(arp__physical__out__start, 1607 ill_t *, ill, arh_t *, arh, mblk_t *, mp); 1608 ARP_HOOK_OUT(ill->ill_ipst->ips_arp_physical_out_event, 1609 ill->ill_ipst->ips_arp_physical_out, 1610 ill->ill_phyint->phyint_ifindex, arh, mp, mp->b_cont, 1611 ill->ill_ipst); 1612 DTRACE_PROBE1(arp__physical__out__end, mblk_t *, mp); 1613 if (mp == NULL) 1614 return (0); 1615 1616 /* Ship it out. */ 1617 arl = ill_to_arl(ill); 1618 if (arl == NULL) { 1619 freemsg(mp); 1620 return (0); 1621 } 1622 if (canputnext(arl->arl_wq)) 1623 putnext(arl->arl_wq, mp); 1624 else 1625 freemsg(mp); 1626 arl_refrele(arl); 1627 return (0); 1628 } 1629 1630 /* 1631 * Process resolve requests. 1632 * If we are not yet reachable then we check and decrease ncec_rcnt; otherwise 1633 * we leave it alone (the caller will check and manage ncec_pcnt in those 1634 * cases.) 1635 */ 1636 int 1637 arp_request(ncec_t *ncec, in_addr_t sender, ill_t *ill) 1638 { 1639 int err; 1640 const uchar_t *target_hwaddr; 1641 struct in_addr nce_paddr; 1642 uchar_t *dst_lladdr; 1643 boolean_t use_rcnt = !NCE_ISREACHABLE(ncec); 1644 1645 ASSERT(MUTEX_HELD(&ncec->ncec_lock)); 1646 ASSERT(!IS_IPMP(ill)); 1647 1648 if (use_rcnt && ncec->ncec_rcnt == 0) { 1649 /* not allowed any more retransmits. */ 1650 return (0); 1651 } 1652 1653 if ((ill->ill_flags & ILLF_NOARP) != 0) 1654 return (0); 1655 1656 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &nce_paddr); 1657 1658 target_hwaddr = 1659 ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1660 1661 if (NCE_ISREACHABLE(ncec)) { 1662 dst_lladdr = ncec->ncec_lladdr; 1663 } else { 1664 dst_lladdr = ill->ill_bcast_mp->b_rptr + 1665 NCE_LL_ADDR_OFFSET(ill); 1666 } 1667 1668 mutex_exit(&ncec->ncec_lock); 1669 err = arp_output(ill, ARP_REQUEST, 1670 ill->ill_phys_addr, (uchar_t *)&sender, target_hwaddr, 1671 (uchar_t *)&nce_paddr, dst_lladdr); 1672 mutex_enter(&ncec->ncec_lock); 1673 1674 if (err != 0) { 1675 /* 1676 * Some transient error such as ENOMEM or a down link was 1677 * encountered. If the link has been taken down permanently, 1678 * the ncec will eventually be cleaned up (ipif_down_tail() 1679 * will call ipif_nce_down() and flush the ncec), to terminate 1680 * recurring attempts to send ARP requests. In all other cases, 1681 * allow the caller another chance at success next time. 1682 */ 1683 return (ncec->ncec_ill->ill_reachable_retrans_time); 1684 } 1685 1686 if (use_rcnt) 1687 ncec->ncec_rcnt--; 1688 1689 return (ncec->ncec_ill->ill_reachable_retrans_time); 1690 } 1691 1692 /* return B_TRUE if dropped */ 1693 boolean_t 1694 arp_announce(ncec_t *ncec) 1695 { 1696 ill_t *ill; 1697 int err; 1698 uchar_t *sphys_addr, *bcast_addr; 1699 struct in_addr ncec_addr; 1700 boolean_t need_refrele = B_FALSE; 1701 1702 ASSERT((ncec->ncec_flags & NCE_F_BCAST) == 0); 1703 ASSERT((ncec->ncec_flags & NCE_F_MCAST) == 0); 1704 1705 if (IS_IPMP(ncec->ncec_ill)) { 1706 /* sent on the cast_ill */ 1707 ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE); 1708 if (ill == NULL) 1709 return (B_TRUE); 1710 need_refrele = B_TRUE; 1711 } else { 1712 ill = ncec->ncec_ill; 1713 } 1714 1715 /* 1716 * broadcast an announce to ill_bcast address. 1717 */ 1718 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1719 1720 sphys_addr = ncec->ncec_lladdr; 1721 bcast_addr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1722 1723 err = arp_output(ill, ARP_REQUEST, 1724 sphys_addr, (uchar_t *)&ncec_addr, bcast_addr, 1725 (uchar_t *)&ncec_addr, bcast_addr); 1726 1727 if (need_refrele) 1728 ill_refrele(ill); 1729 return (err != 0); 1730 } 1731 1732 /* return B_TRUE if dropped */ 1733 boolean_t 1734 arp_probe(ncec_t *ncec) 1735 { 1736 ill_t *ill; 1737 int err; 1738 struct in_addr ncec_addr; 1739 uchar_t *sphys_addr, *dst_lladdr; 1740 1741 if (IS_IPMP(ncec->ncec_ill)) { 1742 ill = ipmp_ill_hold_xmit_ill(ncec->ncec_ill, B_FALSE); 1743 if (ill == NULL) 1744 return (B_TRUE); 1745 } else { 1746 ill = ncec->ncec_ill; 1747 } 1748 1749 IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &ncec_addr); 1750 1751 sphys_addr = ncec->ncec_lladdr; 1752 dst_lladdr = ill->ill_bcast_mp->b_rptr + NCE_LL_ADDR_OFFSET(ill); 1753 err = arp_output(ill, ARP_REQUEST, 1754 sphys_addr, NULL, NULL, (uchar_t *)&ncec_addr, dst_lladdr); 1755 1756 if (IS_IPMP(ncec->ncec_ill)) 1757 ill_refrele(ill); 1758 return (err != 0); 1759 } 1760 1761 static mblk_t * 1762 arl_unbind(arl_t *arl) 1763 { 1764 mblk_t *mp; 1765 1766 if ((mp = arl->arl_unbind_mp) != NULL) { 1767 arl->arl_unbind_mp = NULL; 1768 arl->arl_state_flags |= ARL_DL_UNBIND_IN_PROGRESS; 1769 } 1770 return (mp); 1771 } 1772 1773 int 1774 arp_ll_down(ill_t *ill) 1775 { 1776 arl_t *arl; 1777 mblk_t *unbind_mp; 1778 int err = 0; 1779 boolean_t replumb = (ill->ill_replumbing == 1); 1780 1781 DTRACE_PROBE2(ill__downup, char *, "arp_ll_down", ill_t *, ill); 1782 if ((arl = ill_to_arl(ill)) == NULL) 1783 return (ENXIO); 1784 DTRACE_PROBE2(arl__downup, char *, "arp_ll_down", arl_t *, arl); 1785 mutex_enter(&arl->arl_lock); 1786 unbind_mp = arl_unbind(arl); 1787 if (unbind_mp != NULL) { 1788 ASSERT(arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS); 1789 DTRACE_PROBE2(arp__unbinding, mblk_t *, unbind_mp, 1790 arl_t *, arl); 1791 err = EINPROGRESS; 1792 if (replumb) 1793 arl->arl_state_flags |= ARL_LL_REPLUMBING; 1794 } 1795 mutex_exit(&arl->arl_lock); 1796 if (unbind_mp != NULL) 1797 arp_dlpi_send(arl, unbind_mp); 1798 arl_refrele(arl); 1799 return (err); 1800 } 1801 1802 /* ARGSUSED */ 1803 int 1804 arp_close(queue_t *q, int flags) 1805 { 1806 if (WR(q)->q_next != NULL) { 1807 /* This is a module close */ 1808 return (arp_modclose(q->q_ptr)); 1809 } 1810 qprocsoff(q); 1811 q->q_ptr = WR(q)->q_ptr = NULL; 1812 return (0); 1813 } 1814 1815 static int 1816 arp_modclose(arl_t *arl) 1817 { 1818 arl_ill_common_t *ai = arl->arl_common; 1819 ill_t *ill; 1820 queue_t *q = arl->arl_rq; 1821 mblk_t *mp, *nextmp; 1822 ipsq_t *ipsq = NULL; 1823 1824 ill = arl_to_ill(arl); 1825 if (ill != NULL) { 1826 if (!ill_waiter_inc(ill)) { 1827 ill_refrele(ill); 1828 } else { 1829 ill_refrele(ill); 1830 if (ipsq_enter(ill, B_FALSE, NEW_OP)) 1831 ipsq = ill->ill_phyint->phyint_ipsq; 1832 ill_waiter_dcr(ill); 1833 } 1834 if (ipsq == NULL) { 1835 /* 1836 * could not enter the ipsq because ill is already 1837 * marked CONDEMNED. 1838 */ 1839 ill = NULL; 1840 } 1841 } 1842 if (ai != NULL && ipsq == NULL) { 1843 /* 1844 * Either we did not get an ill because it was marked CONDEMNED 1845 * or we could not enter the ipsq because it was unplumbing. 1846 * In both cases, wait for the ill to complete ip_modclose(). 1847 * 1848 * If the arp_modclose happened even before SLIFNAME, the ai 1849 * itself would be NULL, in which case we can complete the close 1850 * without waiting. 1851 */ 1852 mutex_enter(&ai->ai_lock); 1853 while (ai->ai_ill != NULL) 1854 cv_wait(&ai->ai_ill_unplumb_done, &ai->ai_lock); 1855 mutex_exit(&ai->ai_lock); 1856 } 1857 ASSERT(ill == NULL || IAM_WRITER_ILL(ill)); 1858 1859 mutex_enter(&arl->arl_lock); 1860 /* 1861 * If the ill had completed unplumbing before arp_modclose(), there 1862 * would be no ill (and therefore, no ipsq) to serialize arp_modclose() 1863 * so that we need to explicitly check for ARL_CONDEMNED and back off 1864 * if it is set. 1865 */ 1866 if ((arl->arl_state_flags & ARL_CONDEMNED) != 0) { 1867 mutex_exit(&arl->arl_lock); 1868 ASSERT(ipsq == NULL); 1869 return (0); 1870 } 1871 arl->arl_state_flags |= ARL_CONDEMNED; 1872 1873 /* 1874 * send out all pending dlpi messages, don't wait for the ack (which 1875 * will be ignored in arp_rput when CONDEMNED is set) 1876 * 1877 * We have to check for pending DL_UNBIND_REQ because, in the case 1878 * that ip_modclose() executed before arp_modclose(), the call to 1879 * ill_delete_tail->ipif_arp_down() would have triggered a 1880 * DL_UNBIND_REQ. When arp_modclose() executes ipsq_enter() will fail 1881 * (since ip_modclose() is in the ipsq) but the DL_UNBIND_ACK may not 1882 * have been processed yet. In this scenario, we cannot reset 1883 * arl_dlpi_pending, because the setting/clearing of arl_state_flags 1884 * related to unbind, and the associated cv_waits must be allowed to 1885 * continue. 1886 */ 1887 if (arl->arl_dlpi_pending != DL_UNBIND_REQ) 1888 arl->arl_dlpi_pending = DL_PRIM_INVAL; 1889 mp = arl->arl_dlpi_deferred; 1890 arl->arl_dlpi_deferred = NULL; 1891 mutex_exit(&arl->arl_lock); 1892 1893 for (; mp != NULL; mp = nextmp) { 1894 nextmp = mp->b_next; 1895 mp->b_next = NULL; 1896 putnext(arl->arl_wq, mp); 1897 } 1898 1899 /* Wait for data paths to quiesce */ 1900 mutex_enter(&arl->arl_lock); 1901 while (arl->arl_refcnt != 0) 1902 cv_wait(&arl->arl_cv, &arl->arl_lock); 1903 1904 /* 1905 * unbind, so that nothing else can come up from driver. 1906 */ 1907 mp = arl_unbind(arl); 1908 mutex_exit(&arl->arl_lock); 1909 if (mp != NULL) 1910 arp_dlpi_send(arl, mp); 1911 mutex_enter(&arl->arl_lock); 1912 1913 /* wait for unbind ack */ 1914 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 1915 cv_wait(&arl->arl_cv, &arl->arl_lock); 1916 mutex_exit(&arl->arl_lock); 1917 1918 qprocsoff(q); 1919 1920 if (ill != NULL) { 1921 mutex_enter(&ill->ill_lock); 1922 ill->ill_arl_dlpi_pending = 0; 1923 mutex_exit(&ill->ill_lock); 1924 } 1925 1926 if (ai != NULL) { 1927 mutex_enter(&ai->ai_lock); 1928 ai->ai_arl = NULL; 1929 if (ai->ai_ill == NULL) { 1930 mutex_destroy(&ai->ai_lock); 1931 kmem_free(ai, sizeof (*ai)); 1932 } else { 1933 mutex_exit(&ai->ai_lock); 1934 } 1935 } 1936 1937 /* free up the rest */ 1938 arp_mod_close_tail(arl); 1939 1940 q->q_ptr = WR(q)->q_ptr = NULL; 1941 1942 if (ipsq != NULL) 1943 ipsq_exit(ipsq); 1944 1945 return (0); 1946 } 1947 1948 static void 1949 arp_mod_close_tail(arl_t *arl) 1950 { 1951 ip_stack_t *ipst = arl->arl_ipst; 1952 mblk_t **mpp; 1953 1954 netstack_hold(ipst->ips_netstack); 1955 1956 mutex_enter(&ipst->ips_ip_mi_lock); 1957 mi_close_unlink(&ipst->ips_arp_g_head, (IDP)arl); 1958 mutex_exit(&ipst->ips_ip_mi_lock); 1959 1960 /* 1961 * credp could be null if the open didn't succeed and ip_modopen 1962 * itself calls ip_close. 1963 */ 1964 if (arl->arl_credp != NULL) 1965 crfree(arl->arl_credp); 1966 1967 /* Free all retained control messages. */ 1968 mpp = &arl->arl_first_mp_to_free; 1969 do { 1970 while (mpp[0]) { 1971 mblk_t *mp; 1972 mblk_t *mp1; 1973 1974 mp = mpp[0]; 1975 mpp[0] = mp->b_next; 1976 for (mp1 = mp; mp1 != NULL; mp1 = mp1->b_cont) { 1977 mp1->b_next = NULL; 1978 mp1->b_prev = NULL; 1979 } 1980 freemsg(mp); 1981 } 1982 } while (mpp++ != &arl->arl_last_mp_to_free); 1983 1984 netstack_rele(ipst->ips_netstack); 1985 mi_free(arl->arl_name); 1986 mi_close_free((IDP)arl); 1987 } 1988 1989 /* 1990 * DAD failed. Tear down ipifs with the specified srce address. Note that 1991 * tearing down the ipif also meas deleting the ncec through ipif_down, 1992 * so it is not possible to use nce_timer for recovery. Instead we start 1993 * a timer on the ipif. Caller has to free the mp. 1994 */ 1995 void 1996 arp_failure(mblk_t *mp, ip_recv_attr_t *ira) 1997 { 1998 ill_t *ill = ira->ira_ill; 1999 2000 if ((mp = copymsg(mp)) != NULL) { 2001 ill_refhold(ill); 2002 qwriter_ip(ill, ill->ill_rq, mp, arp_excl, NEW_OP, B_FALSE); 2003 } 2004 } 2005 2006 /* 2007 * This is for exclusive changes due to ARP. Tear down an interface due 2008 * to AR_CN_FAILED and AR_CN_BOGON. 2009 */ 2010 /* ARGSUSED */ 2011 static void 2012 arp_excl(ipsq_t *ipsq, queue_t *rq, mblk_t *mp, void *dummy_arg) 2013 { 2014 ill_t *ill = rq->q_ptr; 2015 arh_t *arh; 2016 ipaddr_t src; 2017 ipif_t *ipif; 2018 ip_stack_t *ipst = ill->ill_ipst; 2019 uchar_t *haddr; 2020 uint_t haddrlen; 2021 2022 /* first try src = ar$spa */ 2023 arh = (arh_t *)mp->b_rptr; 2024 bcopy((char *)&arh[1] + arh->arh_hlen, &src, IP_ADDR_LEN); 2025 2026 haddrlen = arh->arh_hlen; 2027 haddr = (uint8_t *)(arh + 1); 2028 2029 if (haddrlen == ill->ill_phys_addr_length) { 2030 /* 2031 * Ignore conflicts generated by misbehaving switches that 2032 * just reflect our own messages back to us. For IPMP, we may 2033 * see reflections across any ill in the illgrp. 2034 */ 2035 /* For an under ill_grp can change under lock */ 2036 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 2037 if (bcmp(haddr, ill->ill_phys_addr, haddrlen) == 0 || 2038 IS_UNDER_IPMP(ill) && ill->ill_grp != NULL && 2039 ipmp_illgrp_find_ill(ill->ill_grp, haddr, 2040 haddrlen) != NULL) { 2041 rw_exit(&ipst->ips_ill_g_lock); 2042 goto ignore_conflict; 2043 } 2044 rw_exit(&ipst->ips_ill_g_lock); 2045 } 2046 2047 /* 2048 * Look up the appropriate ipif. 2049 */ 2050 ipif = ipif_lookup_addr(src, ill, ALL_ZONES, ipst); 2051 if (ipif == NULL) 2052 goto ignore_conflict; 2053 2054 /* Reload the ill to match the ipif */ 2055 ill = ipif->ipif_ill; 2056 2057 /* If it's already duplicate or ineligible, then don't do anything. */ 2058 if (ipif->ipif_flags & (IPIF_POINTOPOINT|IPIF_DUPLICATE)) { 2059 ipif_refrele(ipif); 2060 goto ignore_conflict; 2061 } 2062 2063 /* 2064 * If we failed on a recovery probe, then restart the timer to 2065 * try again later. 2066 */ 2067 if (!ipif->ipif_was_dup) { 2068 char hbuf[MAC_STR_LEN]; 2069 char sbuf[INET_ADDRSTRLEN]; 2070 char ibuf[LIFNAMSIZ]; 2071 2072 (void) mac_colon_addr(haddr, haddrlen, hbuf, sizeof (hbuf)); 2073 (void) ip_dot_addr(src, sbuf); 2074 ipif_get_name(ipif, ibuf, sizeof (ibuf)); 2075 2076 cmn_err(CE_WARN, "%s has duplicate address %s (in use by %s);" 2077 " disabled", ibuf, sbuf, hbuf); 2078 } 2079 mutex_enter(&ill->ill_lock); 2080 ASSERT(!(ipif->ipif_flags & IPIF_DUPLICATE)); 2081 ipif->ipif_flags |= IPIF_DUPLICATE; 2082 ill->ill_ipif_dup_count++; 2083 mutex_exit(&ill->ill_lock); 2084 (void) ipif_down(ipif, NULL, NULL); 2085 (void) ipif_down_tail(ipif); 2086 mutex_enter(&ill->ill_lock); 2087 if (!(ipif->ipif_flags & (IPIF_DHCPRUNNING|IPIF_TEMPORARY)) && 2088 ill->ill_net_type == IRE_IF_RESOLVER && 2089 !(ipif->ipif_state_flags & IPIF_CONDEMNED) && 2090 ipst->ips_ip_dup_recovery > 0) { 2091 ASSERT(ipif->ipif_recovery_id == 0); 2092 ipif->ipif_recovery_id = timeout(ipif_dup_recovery, 2093 ipif, MSEC_TO_TICK(ipst->ips_ip_dup_recovery)); 2094 } 2095 mutex_exit(&ill->ill_lock); 2096 ipif_refrele(ipif); 2097 2098 ignore_conflict: 2099 freemsg(mp); 2100 } 2101 2102 /* 2103 * This is a place for a dtrace hook. 2104 * Note that mp can be either the DL_UNITDATA_IND with a b_cont payload, 2105 * or just the ARP packet payload as an M_DATA. 2106 */ 2107 /* ARGSUSED */ 2108 static void 2109 arp_drop_packet(const char *str, mblk_t *mp, ill_t *ill) 2110 { 2111 freemsg(mp); 2112 } 2113 2114 static boolean_t 2115 arp_over_driver(queue_t *q) 2116 { 2117 queue_t *qnext = STREAM(q)->sd_wrq->q_next; 2118 2119 /* 2120 * check if first module below stream head is IP or UDP. 2121 */ 2122 ASSERT(qnext != NULL); 2123 if (strcmp(Q2NAME(qnext), "ip") != 0 && 2124 strcmp(Q2NAME(qnext), "udp") != 0) { 2125 /* 2126 * module below is not ip or udp, so arp has been pushed 2127 * on the driver. 2128 */ 2129 return (B_TRUE); 2130 } 2131 return (B_FALSE); 2132 } 2133 2134 static int 2135 arp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2136 { 2137 int err; 2138 2139 ASSERT(sflag & MODOPEN); 2140 if (!arp_over_driver(q)) { 2141 q->q_qinfo = dummymodinfo.st_rdinit; 2142 WR(q)->q_qinfo = dummymodinfo.st_wrinit; 2143 return ((*dummymodinfo.st_rdinit->qi_qopen)(q, devp, flag, 2144 sflag, credp)); 2145 } 2146 err = arp_modopen(q, devp, flag, sflag, credp); 2147 return (err); 2148 } 2149 2150 /* 2151 * In most cases we must be a writer on the IP stream before coming to 2152 * arp_dlpi_send(), to serialize DLPI sends to the driver. The exceptions 2153 * when we are not a writer are very early duing initialization (in 2154 * arl_init, before the arl has done a SLIFNAME, so that we don't yet know 2155 * the associated ill) or during arp_mod_close, when we could not enter the 2156 * ipsq because the ill has already unplumbed. 2157 */ 2158 static void 2159 arp_dlpi_send(arl_t *arl, mblk_t *mp) 2160 { 2161 mblk_t **mpp; 2162 t_uscalar_t prim; 2163 arl_ill_common_t *ai; 2164 2165 ASSERT(DB_TYPE(mp) == M_PROTO || DB_TYPE(mp) == M_PCPROTO); 2166 2167 #ifdef DEBUG 2168 ai = arl->arl_common; 2169 if (ai != NULL) { 2170 mutex_enter(&ai->ai_lock); 2171 if (ai->ai_ill != NULL) 2172 ASSERT(IAM_WRITER_ILL(ai->ai_ill)); 2173 mutex_exit(&ai->ai_lock); 2174 } 2175 #endif /* DEBUG */ 2176 2177 mutex_enter(&arl->arl_lock); 2178 if (arl->arl_dlpi_pending != DL_PRIM_INVAL) { 2179 /* Must queue message. Tail insertion */ 2180 mpp = &arl->arl_dlpi_deferred; 2181 while (*mpp != NULL) 2182 mpp = &((*mpp)->b_next); 2183 2184 *mpp = mp; 2185 mutex_exit(&arl->arl_lock); 2186 return; 2187 } 2188 mutex_exit(&arl->arl_lock); 2189 if ((prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive) 2190 == DL_BIND_REQ) { 2191 ASSERT((arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) == 0); 2192 } 2193 /* 2194 * No need to take the arl_lock to examine ARL_CONDEMNED at this point 2195 * because the only thread that can see ARL_CONDEMNED here is the 2196 * closing arp_modclose() thread which sets the flag after becoming a 2197 * writer on the ipsq. Threads from IP must have finished and 2198 * cannot be active now. 2199 */ 2200 if (!(arl->arl_state_flags & ARL_CONDEMNED) || 2201 (prim == DL_UNBIND_REQ)) { 2202 if (prim != DL_NOTIFY_CONF) { 2203 ill_t *ill = arl_to_ill(arl); 2204 2205 arl->arl_dlpi_pending = prim; 2206 if (ill != NULL) { 2207 mutex_enter(&ill->ill_lock); 2208 ill->ill_arl_dlpi_pending = 1; 2209 mutex_exit(&ill->ill_lock); 2210 ill_refrele(ill); 2211 } 2212 } 2213 } 2214 DTRACE_PROBE4(arl__dlpi, char *, "arp_dlpi_send", 2215 char *, dl_primstr(prim), char *, "-", arl_t *, arl); 2216 putnext(arl->arl_wq, mp); 2217 } 2218 2219 static void 2220 arl_defaults_common(arl_t *arl, mblk_t *mp) 2221 { 2222 dl_info_ack_t *dlia = (dl_info_ack_t *)mp->b_rptr; 2223 /* 2224 * Till the ill is fully up the ill is not globally visible. 2225 * So no need for a lock. 2226 */ 2227 arl->arl_mactype = dlia->dl_mac_type; 2228 arl->arl_sap_length = dlia->dl_sap_length; 2229 2230 if (!arl->arl_dlpi_style_set) { 2231 if (dlia->dl_provider_style == DL_STYLE2) 2232 arl->arl_needs_attach = 1; 2233 mutex_enter(&arl->arl_lock); 2234 ASSERT(arl->arl_dlpi_style_set == 0); 2235 arl->arl_dlpi_style_set = 1; 2236 arl->arl_state_flags &= ~ARL_LL_SUBNET_PENDING; 2237 cv_broadcast(&arl->arl_cv); 2238 mutex_exit(&arl->arl_lock); 2239 } 2240 } 2241 2242 int 2243 arl_init(queue_t *q, arl_t *arl) 2244 { 2245 mblk_t *info_mp; 2246 dl_info_req_t *dlir; 2247 2248 /* subset of ill_init */ 2249 mutex_init(&arl->arl_lock, NULL, MUTEX_DEFAULT, 0); 2250 2251 arl->arl_rq = q; 2252 arl->arl_wq = WR(q); 2253 2254 info_mp = allocb(MAX(sizeof (dl_info_req_t), sizeof (dl_info_ack_t)), 2255 BPRI_HI); 2256 if (info_mp == NULL) 2257 return (ENOMEM); 2258 /* 2259 * allocate sufficient space to contain device name. 2260 */ 2261 arl->arl_name = (char *)(mi_zalloc(2 * LIFNAMSIZ)); 2262 arl->arl_ppa = UINT_MAX; 2263 arl->arl_state_flags |= (ARL_LL_SUBNET_PENDING | ARL_LL_UNBOUND); 2264 2265 /* Send down the Info Request to the driver. */ 2266 info_mp->b_datap->db_type = M_PCPROTO; 2267 dlir = (dl_info_req_t *)info_mp->b_rptr; 2268 info_mp->b_wptr = (uchar_t *)&dlir[1]; 2269 dlir->dl_primitive = DL_INFO_REQ; 2270 arl->arl_dlpi_pending = DL_PRIM_INVAL; 2271 qprocson(q); 2272 2273 arp_dlpi_send(arl, info_mp); 2274 return (0); 2275 } 2276 2277 int 2278 arl_wait_for_info_ack(arl_t *arl) 2279 { 2280 int err; 2281 2282 mutex_enter(&arl->arl_lock); 2283 while (arl->arl_state_flags & ARL_LL_SUBNET_PENDING) { 2284 /* 2285 * Return value of 0 indicates a pending signal. 2286 */ 2287 err = cv_wait_sig(&arl->arl_cv, &arl->arl_lock); 2288 if (err == 0) { 2289 mutex_exit(&arl->arl_lock); 2290 return (EINTR); 2291 } 2292 } 2293 mutex_exit(&arl->arl_lock); 2294 /* 2295 * ip_rput_other could have set an error in ill_error on 2296 * receipt of M_ERROR. 2297 */ 2298 return (arl->arl_error); 2299 } 2300 2301 void 2302 arl_set_muxid(ill_t *ill, int muxid) 2303 { 2304 arl_t *arl; 2305 2306 arl = ill_to_arl(ill); 2307 if (arl != NULL) { 2308 arl->arl_muxid = muxid; 2309 arl_refrele(arl); 2310 } 2311 } 2312 2313 int 2314 arl_get_muxid(ill_t *ill) 2315 { 2316 arl_t *arl; 2317 int muxid = 0; 2318 2319 arl = ill_to_arl(ill); 2320 if (arl != NULL) { 2321 muxid = arl->arl_muxid; 2322 arl_refrele(arl); 2323 } 2324 return (muxid); 2325 } 2326 2327 static int 2328 arp_modopen(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp) 2329 { 2330 int err; 2331 zoneid_t zoneid; 2332 netstack_t *ns; 2333 ip_stack_t *ipst; 2334 arl_t *arl = NULL; 2335 2336 /* 2337 * Prevent unprivileged processes from pushing IP so that 2338 * they can't send raw IP. 2339 */ 2340 if (secpolicy_net_rawaccess(credp) != 0) 2341 return (EPERM); 2342 2343 ns = netstack_find_by_cred(credp); 2344 ASSERT(ns != NULL); 2345 ipst = ns->netstack_ip; 2346 ASSERT(ipst != NULL); 2347 2348 /* 2349 * For exclusive stacks we set the zoneid to zero 2350 * to make IP operate as if in the global zone. 2351 */ 2352 if (ipst->ips_netstack->netstack_stackid != GLOBAL_NETSTACKID) 2353 zoneid = GLOBAL_ZONEID; 2354 else 2355 zoneid = crgetzoneid(credp); 2356 2357 arl = (arl_t *)mi_open_alloc_sleep(sizeof (arl_t)); 2358 q->q_ptr = WR(q)->q_ptr = arl; 2359 arl->arl_ipst = ipst; 2360 arl->arl_zoneid = zoneid; 2361 err = arl_init(q, arl); 2362 2363 if (err != 0) { 2364 mi_free(arl->arl_name); 2365 mi_free(arl); 2366 netstack_rele(ipst->ips_netstack); 2367 q->q_ptr = NULL; 2368 WR(q)->q_ptr = NULL; 2369 return (err); 2370 } 2371 2372 /* 2373 * Wait for the DL_INFO_ACK if a DL_INFO_REQ was sent. 2374 */ 2375 err = arl_wait_for_info_ack(arl); 2376 if (err == 0) 2377 arl->arl_credp = credp; 2378 else 2379 goto fail; 2380 2381 crhold(credp); 2382 2383 mutex_enter(&ipst->ips_ip_mi_lock); 2384 err = mi_open_link(&ipst->ips_arp_g_head, (IDP)q->q_ptr, devp, flag, 2385 sflag, credp); 2386 mutex_exit(&ipst->ips_ip_mi_lock); 2387 fail: 2388 if (err) { 2389 (void) arp_close(q, 0); 2390 return (err); 2391 } 2392 return (0); 2393 } 2394 2395 /* 2396 * Notify any downstream modules (esp softmac and hitbox) of the name 2397 * of this interface using an M_CTL. 2398 */ 2399 static void 2400 arp_ifname_notify(arl_t *arl) 2401 { 2402 mblk_t *mp1, *mp2; 2403 struct iocblk *iocp; 2404 struct lifreq *lifr; 2405 2406 if ((mp1 = mkiocb(SIOCSLIFNAME)) == NULL) 2407 return; 2408 if ((mp2 = allocb(sizeof (struct lifreq), BPRI_HI)) == NULL) { 2409 freemsg(mp1); 2410 return; 2411 } 2412 2413 lifr = (struct lifreq *)mp2->b_rptr; 2414 mp2->b_wptr += sizeof (struct lifreq); 2415 bzero(lifr, sizeof (struct lifreq)); 2416 2417 (void) strncpy(lifr->lifr_name, arl->arl_name, LIFNAMSIZ); 2418 lifr->lifr_ppa = arl->arl_ppa; 2419 lifr->lifr_flags = ILLF_IPV4; 2420 2421 /* Use M_CTL to avoid confusing anyone else who might be listening. */ 2422 DB_TYPE(mp1) = M_CTL; 2423 mp1->b_cont = mp2; 2424 iocp = (struct iocblk *)mp1->b_rptr; 2425 iocp->ioc_count = msgsize(mp1->b_cont); 2426 DTRACE_PROBE4(arl__dlpi, char *, "arp_ifname_notify", 2427 char *, "SIOCSLIFNAME", char *, "-", arl_t *, arl); 2428 putnext(arl->arl_wq, mp1); 2429 } 2430 2431 void 2432 arp_send_replumb_conf(ill_t *ill) 2433 { 2434 mblk_t *mp; 2435 arl_t *arl = ill_to_arl(ill); 2436 2437 if (arl == NULL) 2438 return; 2439 /* 2440 * arl_got_replumb and arl_got_unbind to be cleared after we complete 2441 * arp_cmd_done. 2442 */ 2443 mp = mexchange(NULL, NULL, sizeof (dl_notify_conf_t), M_PROTO, 2444 DL_NOTIFY_CONF); 2445 ((dl_notify_conf_t *)(mp->b_rptr))->dl_notification = 2446 DL_NOTE_REPLUMB_DONE; 2447 arp_dlpi_send(arl, mp); 2448 mutex_enter(&arl->arl_lock); 2449 arl->arl_state_flags &= ~ARL_LL_REPLUMBING; 2450 mutex_exit(&arl->arl_lock); 2451 arl_refrele(arl); 2452 } 2453 2454 /* 2455 * The unplumb code paths call arp_unbind_complete() to make sure that it is 2456 * safe to tear down the ill. We wait for DL_UNBIND_ACK to complete, and also 2457 * for the arl_refcnt to fall to one so that, when we return from 2458 * arp_unbind_complete(), we know for certain that there are no threads in 2459 * arp_rput() that might access the arl_ill. 2460 */ 2461 void 2462 arp_unbind_complete(ill_t *ill) 2463 { 2464 arl_t *arl = ill_to_arl(ill); 2465 2466 if (arl == NULL) 2467 return; 2468 mutex_enter(&arl->arl_lock); 2469 /* 2470 * wait for unbind ack and arl_refcnt to drop to 1. Note that the 2471 * quiescent arl_refcnt for this function is 1 (and not 0) because 2472 * ill_to_arl() will itself return after taking a ref on the arl_t. 2473 */ 2474 while (arl->arl_state_flags & ARL_DL_UNBIND_IN_PROGRESS) 2475 cv_wait(&arl->arl_cv, &arl->arl_lock); 2476 while (arl->arl_refcnt != 1) 2477 cv_wait(&arl->arl_cv, &arl->arl_lock); 2478 mutex_exit(&arl->arl_lock); 2479 arl_refrele(arl); 2480 } 2481