1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Joyent, Inc. 14 * Copyright 2022 MNX Cloud, Inc. 15 */ 16 17 /* 18 * Overlay device target cache management 19 * 20 * For more information, see the big theory statement in 21 * uts/common/io/overlay/overlay.c 22 */ 23 24 #include <sys/types.h> 25 #include <sys/ethernet.h> 26 #include <sys/kmem.h> 27 #include <sys/policy.h> 28 #include <sys/sysmacros.h> 29 #include <sys/stream.h> 30 #include <sys/strsun.h> 31 #include <sys/strsubr.h> 32 #include <sys/mac_provider.h> 33 #include <sys/mac_client.h> 34 #include <sys/mac_client_priv.h> 35 #include <sys/vlan.h> 36 #include <sys/crc32.h> 37 #include <sys/cred.h> 38 #include <sys/file.h> 39 #include <sys/errno.h> 40 #include <sys/ddi.h> 41 #include <sys/sunddi.h> 42 43 #include <sys/overlay_impl.h> 44 #include <sys/sdt.h> 45 46 /* 47 * This is total straw man, but at least it's a prime number. Here we're 48 * going to have to go through and do a lot of evaluation and understanding as 49 * to how these target caches should grow and shrink, as well as, memory 50 * pressure and evictions. This just gives us a starting point that'll be 'good 51 * enough', until it's not. 52 */ 53 #define OVERLAY_HSIZE 823 54 55 /* 56 * We use this data structure to keep track of what requests have been actively 57 * allocated to a given instance so we know what to put back on the pending 58 * list. 59 */ 60 typedef struct overlay_target_hdl { 61 minor_t oth_minor; /* RO */ 62 zoneid_t oth_zoneid; /* RO */ 63 int oth_oflags; /* RO */ 64 list_node_t oth_link; /* overlay_target_lock */ 65 kmutex_t oth_lock; 66 list_t oth_outstanding; /* oth_lock */ 67 } overlay_target_hdl_t; 68 69 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int); 70 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *); 71 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int); 72 73 typedef struct overlay_target_ioctl { 74 int oti_cmd; /* ioctl id */ 75 boolean_t oti_write; /* ioctl requires FWRITE */ 76 boolean_t oti_ncopyout; /* copyout data? */ 77 overlay_target_copyin_f oti_copyin; /* copyin func */ 78 overlay_target_ioctl_f oti_func; /* function to call */ 79 overlay_target_copyout_f oti_copyout; /* copyin func */ 80 size_t oti_size; /* size of user level structure */ 81 } overlay_target_ioctl_t; 82 83 static kmem_cache_t *overlay_target_cache; 84 static kmem_cache_t *overlay_entry_cache; 85 static id_space_t *overlay_thdl_idspace; 86 static void *overlay_thdl_state; 87 88 /* 89 * When we support overlay devices in the NGZ, then all of these need to become 90 * zone aware, by plugging into the netstack engine and becoming per-netstack 91 * data. 92 */ 93 static list_t overlay_thdl_list; 94 static kmutex_t overlay_target_lock; 95 static kcondvar_t overlay_target_condvar; 96 static list_t overlay_target_list; 97 static boolean_t overlay_target_excl; 98 99 /* 100 * Outstanding data per hash table entry. 101 */ 102 static int overlay_ent_size = 128 * 1024; 103 104 /* ARGSUSED */ 105 static int 106 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs) 107 { 108 overlay_target_t *ott = buf; 109 110 mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL); 111 cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL); 112 return (0); 113 } 114 115 /* ARGSUSED */ 116 static void 117 overlay_target_cache_destructor(void *buf, void *arg) 118 { 119 overlay_target_t *ott = buf; 120 121 cv_destroy(&ott->ott_cond); 122 mutex_destroy(&ott->ott_lock); 123 } 124 125 /* ARGSUSED */ 126 static int 127 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs) 128 { 129 overlay_target_entry_t *ote = buf; 130 131 bzero(ote, sizeof (overlay_target_entry_t)); 132 mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL); 133 return (0); 134 } 135 136 /* ARGSUSED */ 137 static void 138 overlay_entry_cache_destructor(void *buf, void *arg) 139 { 140 overlay_target_entry_t *ote = buf; 141 142 mutex_destroy(&ote->ote_lock); 143 } 144 145 static uint64_t 146 overlay_mac_hash(const void *v) 147 { 148 uint32_t crc; 149 CRC32(crc, v, ETHERADDRL, -1U, crc32_table); 150 return (crc); 151 } 152 153 static int 154 overlay_mac_cmp(const void *a, const void *b) 155 { 156 return (bcmp(a, b, ETHERADDRL)); 157 } 158 159 /* ARGSUSED */ 160 static void 161 overlay_target_entry_dtor(void *arg) 162 { 163 overlay_target_entry_t *ote = arg; 164 165 ote->ote_flags = 0; 166 bzero(ote->ote_addr, ETHERADDRL); 167 ote->ote_ott = NULL; 168 ote->ote_odd = NULL; 169 freemsgchain(ote->ote_chead); 170 ote->ote_chead = ote->ote_ctail = NULL; 171 ote->ote_mbsize = 0; 172 ote->ote_vtime = 0; 173 kmem_cache_free(overlay_entry_cache, ote); 174 } 175 176 static int 177 overlay_mac_avl(const void *a, const void *b) 178 { 179 int i; 180 const overlay_target_entry_t *l, *r; 181 l = a; 182 r = b; 183 184 for (i = 0; i < ETHERADDRL; i++) { 185 if (l->ote_addr[i] > r->ote_addr[i]) 186 return (1); 187 else if (l->ote_addr[i] < r->ote_addr[i]) 188 return (-1); 189 } 190 191 return (0); 192 } 193 194 void 195 overlay_target_init(void) 196 { 197 int ret; 198 ret = ddi_soft_state_init(&overlay_thdl_state, 199 sizeof (overlay_target_hdl_t), 1); 200 VERIFY(ret == 0); 201 overlay_target_cache = kmem_cache_create("overlay_target", 202 sizeof (overlay_target_t), 0, overlay_target_cache_constructor, 203 overlay_target_cache_destructor, NULL, NULL, NULL, 0); 204 overlay_entry_cache = kmem_cache_create("overlay_entry", 205 sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor, 206 overlay_entry_cache_destructor, NULL, NULL, NULL, 0); 207 mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL); 208 cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL); 209 list_create(&overlay_target_list, sizeof (overlay_target_entry_t), 210 offsetof(overlay_target_entry_t, ote_qlink)); 211 list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t), 212 offsetof(overlay_target_hdl_t, oth_link)); 213 overlay_thdl_idspace = id_space_create("overlay_target_minors", 214 1, INT32_MAX); 215 } 216 217 void 218 overlay_target_fini(void) 219 { 220 id_space_destroy(overlay_thdl_idspace); 221 list_destroy(&overlay_thdl_list); 222 list_destroy(&overlay_target_list); 223 cv_destroy(&overlay_target_condvar); 224 mutex_destroy(&overlay_target_lock); 225 kmem_cache_destroy(overlay_entry_cache); 226 kmem_cache_destroy(overlay_target_cache); 227 ddi_soft_state_fini(&overlay_thdl_state); 228 } 229 230 void 231 overlay_target_free(overlay_dev_t *odd) 232 { 233 if (odd->odd_target == NULL) 234 return; 235 236 if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) { 237 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash; 238 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree; 239 overlay_target_entry_t *ote; 240 241 /* 242 * Our AVL tree and hashtable contain the same elements, 243 * therefore we should just remove it from the tree, but then 244 * delete the entries when we remove them from the hash table 245 * (which happens through the refhash dtor). 246 */ 247 while ((ote = avl_first(ap)) != NULL) 248 avl_remove(ap, ote); 249 250 avl_destroy(ap); 251 for (ote = refhash_first(rp); ote != NULL; 252 ote = refhash_next(rp, ote)) { 253 refhash_remove(rp, ote); 254 } 255 refhash_destroy(rp); 256 } 257 258 ASSERT(odd->odd_target->ott_ocount == 0); 259 kmem_cache_free(overlay_target_cache, odd->odd_target); 260 } 261 262 int 263 overlay_target_busy() 264 { 265 int ret; 266 267 mutex_enter(&overlay_target_lock); 268 ret = !list_is_empty(&overlay_thdl_list); 269 mutex_exit(&overlay_target_lock); 270 271 return (ret); 272 } 273 274 static void 275 overlay_target_queue(overlay_target_entry_t *entry) 276 { 277 mutex_enter(&overlay_target_lock); 278 mutex_enter(&entry->ote_ott->ott_lock); 279 if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) { 280 mutex_exit(&entry->ote_ott->ott_lock); 281 mutex_exit(&overlay_target_lock); 282 return; 283 } 284 entry->ote_ott->ott_ocount++; 285 mutex_exit(&entry->ote_ott->ott_lock); 286 list_insert_tail(&overlay_target_list, entry); 287 cv_signal(&overlay_target_condvar); 288 mutex_exit(&overlay_target_lock); 289 } 290 291 void 292 overlay_target_quiesce(overlay_target_t *ott) 293 { 294 if (ott == NULL) 295 return; 296 mutex_enter(&ott->ott_lock); 297 ott->ott_flags |= OVERLAY_T_TEARDOWN; 298 while (ott->ott_ocount != 0) 299 cv_wait(&ott->ott_cond, &ott->ott_lock); 300 mutex_exit(&ott->ott_lock); 301 } 302 303 /* 304 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP | 305 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at 306 * this time, say for NVGRE, we drop all packets that mcuh this. 307 */ 308 int 309 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock, 310 socklen_t *slenp) 311 { 312 int ret; 313 struct sockaddr_in6 *v6; 314 overlay_target_t *ott; 315 mac_header_info_t mhi; 316 overlay_target_entry_t *entry; 317 318 ASSERT(odd->odd_target != NULL); 319 320 /* 321 * At this point, the overlay device is in a mux which means that it's 322 * been activated. At this point, parts of the target, such as the mode 323 * and the destination are now read-only and we don't have to worry 324 * about synchronization for them. 325 */ 326 ott = odd->odd_target; 327 if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) 328 return (OVERLAY_TARGET_DROP); 329 330 v6 = (struct sockaddr_in6 *)sock; 331 bzero(v6, sizeof (struct sockaddr_in6)); 332 v6->sin6_family = AF_INET6; 333 334 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 335 mutex_enter(&ott->ott_lock); 336 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr, 337 sizeof (struct in6_addr)); 338 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port); 339 mutex_exit(&ott->ott_lock); 340 *slenp = sizeof (struct sockaddr_in6); 341 342 return (OVERLAY_TARGET_OK); 343 } 344 345 ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC); 346 347 /* 348 * Note we only want the MAC address here, therefore we won't bother 349 * using mac_vlan_header_info(). If any caller needs the vlan info at 350 * this point, this should change to a call to mac_vlan_header_info(). 351 */ 352 if (mac_header_info(odd->odd_mh, mp, &mhi) != 0) 353 return (OVERLAY_TARGET_DROP); 354 mutex_enter(&ott->ott_lock); 355 entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 356 mhi.mhi_daddr); 357 if (entry == NULL) { 358 entry = kmem_cache_alloc(overlay_entry_cache, KM_NOSLEEP_LAZY); 359 if (entry == NULL) { 360 mutex_exit(&ott->ott_lock); 361 return (OVERLAY_TARGET_DROP); 362 } 363 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL); 364 entry->ote_chead = entry->ote_ctail = mp; 365 entry->ote_mbsize = msgsize(mp); 366 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; 367 entry->ote_ott = ott; 368 entry->ote_odd = odd; 369 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry); 370 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry); 371 mutex_exit(&ott->ott_lock); 372 overlay_target_queue(entry); 373 return (OVERLAY_TARGET_ASYNC); 374 } 375 refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry); 376 mutex_exit(&ott->ott_lock); 377 378 mutex_enter(&entry->ote_lock); 379 if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) { 380 ret = OVERLAY_TARGET_DROP; 381 } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 382 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr, 383 sizeof (struct in6_addr)); 384 v6->sin6_port = htons(entry->ote_dest.otp_port); 385 *slenp = sizeof (struct sockaddr_in6); 386 ret = OVERLAY_TARGET_OK; 387 } else { 388 size_t mlen = msgsize(mp); 389 390 if (mlen + entry->ote_mbsize > overlay_ent_size) { 391 ret = OVERLAY_TARGET_DROP; 392 } else { 393 if (entry->ote_ctail != NULL) { 394 ASSERT(entry->ote_ctail->b_next == 395 NULL); 396 entry->ote_ctail->b_next = mp; 397 entry->ote_ctail = mp; 398 } else { 399 entry->ote_chead = mp; 400 entry->ote_ctail = mp; 401 } 402 entry->ote_mbsize += mlen; 403 if ((entry->ote_flags & 404 OVERLAY_ENTRY_F_PENDING) == 0) { 405 entry->ote_flags |= 406 OVERLAY_ENTRY_F_PENDING; 407 overlay_target_queue(entry); 408 } 409 ret = OVERLAY_TARGET_ASYNC; 410 } 411 } 412 mutex_exit(&entry->ote_lock); 413 414 mutex_enter(&ott->ott_lock); 415 refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry); 416 mutex_exit(&ott->ott_lock); 417 418 return (ret); 419 } 420 421 /* ARGSUSED */ 422 static int 423 overlay_target_info(overlay_target_hdl_t *thdl, void *arg) 424 { 425 overlay_dev_t *odd; 426 overlay_targ_info_t *oti = arg; 427 428 odd = overlay_hold_by_dlid(oti->oti_linkid); 429 if (odd == NULL) 430 return (ENOENT); 431 432 mutex_enter(&odd->odd_lock); 433 oti->oti_flags = 0; 434 oti->oti_needs = odd->odd_plugin->ovp_dest; 435 if (odd->odd_flags & OVERLAY_F_DEGRADED) 436 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED; 437 if (odd->odd_flags & OVERLAY_F_ACTIVATED) 438 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE; 439 oti->oti_vnetid = odd->odd_vid; 440 mutex_exit(&odd->odd_lock); 441 overlay_hold_rele(odd); 442 return (0); 443 } 444 445 /* ARGSUSED */ 446 static int 447 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg) 448 { 449 overlay_dev_t *odd; 450 overlay_target_t *ott; 451 overlay_targ_associate_t *ota = arg; 452 453 odd = overlay_hold_by_dlid(ota->ota_linkid); 454 if (odd == NULL) 455 return (ENOENT); 456 457 if (ota->ota_id == 0) { 458 overlay_hold_rele(odd); 459 return (EINVAL); 460 } 461 462 if (ota->ota_mode != OVERLAY_TARGET_POINT && 463 ota->ota_mode != OVERLAY_TARGET_DYNAMIC) { 464 overlay_hold_rele(odd); 465 return (EINVAL); 466 } 467 468 if (ota->ota_provides != odd->odd_plugin->ovp_dest) { 469 overlay_hold_rele(odd); 470 return (EINVAL); 471 } 472 473 if (ota->ota_mode == OVERLAY_TARGET_POINT) { 474 if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) { 475 if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) || 476 IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) || 477 IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) { 478 overlay_hold_rele(odd); 479 return (EINVAL); 480 } 481 } 482 483 if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) { 484 if (ota->ota_point.otp_port == 0) { 485 overlay_hold_rele(odd); 486 return (EINVAL); 487 } 488 } 489 } 490 491 ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP); 492 ott->ott_flags = 0; 493 ott->ott_ocount = 0; 494 ott->ott_mode = ota->ota_mode; 495 ott->ott_dest = ota->ota_provides; 496 ott->ott_id = ota->ota_id; 497 498 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 499 bcopy(&ota->ota_point, &ott->ott_u.ott_point, 500 sizeof (overlay_target_point_t)); 501 } else { 502 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE, 503 overlay_mac_hash, overlay_mac_cmp, 504 overlay_target_entry_dtor, sizeof (overlay_target_entry_t), 505 offsetof(overlay_target_entry_t, ote_reflink), 506 offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP); 507 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl, 508 sizeof (overlay_target_entry_t), 509 offsetof(overlay_target_entry_t, ote_avllink)); 510 } 511 mutex_enter(&odd->odd_lock); 512 if (odd->odd_flags & OVERLAY_F_VARPD) { 513 mutex_exit(&odd->odd_lock); 514 kmem_cache_free(overlay_target_cache, ott); 515 overlay_hold_rele(odd); 516 return (EEXIST); 517 } 518 519 odd->odd_flags |= OVERLAY_F_VARPD; 520 odd->odd_target = ott; 521 mutex_exit(&odd->odd_lock); 522 523 overlay_hold_rele(odd); 524 525 526 return (0); 527 } 528 529 530 /* ARGSUSED */ 531 static int 532 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg) 533 { 534 overlay_dev_t *odd; 535 overlay_targ_degrade_t *otd = arg; 536 537 odd = overlay_hold_by_dlid(otd->otd_linkid); 538 if (odd == NULL) 539 return (ENOENT); 540 541 overlay_fm_degrade(odd, otd->otd_buf); 542 overlay_hold_rele(odd); 543 return (0); 544 } 545 546 /* ARGSUSED */ 547 static int 548 overlay_target_restore(overlay_target_hdl_t *thdl, void *arg) 549 { 550 overlay_dev_t *odd; 551 overlay_targ_id_t *otid = arg; 552 553 odd = overlay_hold_by_dlid(otid->otid_linkid); 554 if (odd == NULL) 555 return (ENOENT); 556 557 overlay_fm_restore(odd); 558 overlay_hold_rele(odd); 559 return (0); 560 } 561 562 /* ARGSUSED */ 563 static int 564 overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg) 565 { 566 overlay_dev_t *odd; 567 overlay_targ_id_t *otid = arg; 568 569 odd = overlay_hold_by_dlid(otid->otid_linkid); 570 if (odd == NULL) 571 return (ENOENT); 572 573 mutex_enter(&odd->odd_lock); 574 odd->odd_flags &= ~OVERLAY_F_VARPD; 575 mutex_exit(&odd->odd_lock); 576 577 overlay_hold_rele(odd); 578 return (0); 579 580 } 581 582 static int 583 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg) 584 { 585 overlay_targ_lookup_t *otl = arg; 586 overlay_target_entry_t *entry; 587 clock_t ret, timeout; 588 mac_header_info_t mhi; 589 590 timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC); 591 again: 592 mutex_enter(&overlay_target_lock); 593 while (list_is_empty(&overlay_target_list)) { 594 ret = cv_timedwait(&overlay_target_condvar, 595 &overlay_target_lock, timeout); 596 if (ret == -1) { 597 mutex_exit(&overlay_target_lock); 598 return (ETIME); 599 } 600 } 601 entry = list_remove_head(&overlay_target_list); 602 mutex_exit(&overlay_target_lock); 603 mutex_enter(&entry->ote_lock); 604 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 605 ASSERT(entry->ote_chead == NULL); 606 mutex_exit(&entry->ote_lock); 607 goto again; 608 } 609 ASSERT(entry->ote_chead != NULL); 610 611 /* 612 * If we have a bogon that doesn't have a valid mac header, drop it and 613 * try again. 614 */ 615 if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead, 616 &mhi) != 0) { 617 boolean_t queue = B_FALSE; 618 mblk_t *mp = entry->ote_chead; 619 entry->ote_chead = mp->b_next; 620 mp->b_next = NULL; 621 if (entry->ote_ctail == mp) 622 entry->ote_ctail = entry->ote_chead; 623 entry->ote_mbsize -= msgsize(mp); 624 if (entry->ote_chead != NULL) 625 queue = B_TRUE; 626 mutex_exit(&entry->ote_lock); 627 if (queue == B_TRUE) 628 overlay_target_queue(entry); 629 freemsg(mp); 630 goto again; 631 } 632 633 otl->otl_dlid = entry->ote_odd->odd_linkid; 634 otl->otl_reqid = (uintptr_t)entry; 635 otl->otl_varpdid = entry->ote_ott->ott_id; 636 otl->otl_vnetid = entry->ote_odd->odd_vid; 637 638 otl->otl_hdrsize = mhi.mhi_hdrsize; 639 otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize; 640 bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL); 641 bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL); 642 otl->otl_dsttype = mhi.mhi_dsttype; 643 otl->otl_sap = mhi.mhi_bindsap; 644 otl->otl_vlan = VLAN_ID(mhi.mhi_tci); 645 mutex_exit(&entry->ote_lock); 646 647 mutex_enter(&thdl->oth_lock); 648 list_insert_tail(&thdl->oth_outstanding, entry); 649 mutex_exit(&thdl->oth_lock); 650 651 return (0); 652 } 653 654 static int 655 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg) 656 { 657 const overlay_targ_resp_t *otr = arg; 658 overlay_target_entry_t *entry; 659 mblk_t *mp; 660 661 mutex_enter(&thdl->oth_lock); 662 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 663 entry = list_next(&thdl->oth_outstanding, entry)) { 664 if ((uintptr_t)entry == otr->otr_reqid) 665 break; 666 } 667 668 if (entry == NULL) { 669 mutex_exit(&thdl->oth_lock); 670 return (EINVAL); 671 } 672 list_remove(&thdl->oth_outstanding, entry); 673 mutex_exit(&thdl->oth_lock); 674 675 mutex_enter(&entry->ote_lock); 676 bcopy(&otr->otr_answer, &entry->ote_dest, 677 sizeof (overlay_target_point_t)); 678 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 679 entry->ote_flags |= OVERLAY_ENTRY_F_VALID; 680 mp = entry->ote_chead; 681 entry->ote_chead = NULL; 682 entry->ote_ctail = NULL; 683 entry->ote_mbsize = 0; 684 entry->ote_vtime = gethrtime(); 685 mutex_exit(&entry->ote_lock); 686 687 /* 688 * For now do an in-situ drain. 689 */ 690 mp = overlay_m_tx(entry->ote_odd, mp); 691 freemsgchain(mp); 692 693 mutex_enter(&entry->ote_ott->ott_lock); 694 entry->ote_ott->ott_ocount--; 695 cv_signal(&entry->ote_ott->ott_cond); 696 mutex_exit(&entry->ote_ott->ott_lock); 697 698 return (0); 699 } 700 701 static int 702 overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg) 703 { 704 const overlay_targ_resp_t *otr = arg; 705 overlay_target_entry_t *entry; 706 mblk_t *mp; 707 boolean_t queue = B_FALSE; 708 709 mutex_enter(&thdl->oth_lock); 710 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 711 entry = list_next(&thdl->oth_outstanding, entry)) { 712 if ((uintptr_t)entry == otr->otr_reqid) 713 break; 714 } 715 716 if (entry == NULL) { 717 mutex_exit(&thdl->oth_lock); 718 return (EINVAL); 719 } 720 list_remove(&thdl->oth_outstanding, entry); 721 mutex_exit(&thdl->oth_lock); 722 723 mutex_enter(&entry->ote_lock); 724 725 /* Safeguard against a confused varpd */ 726 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 727 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 728 DTRACE_PROBE1(overlay__target__valid__drop, 729 overlay_target_entry_t *, entry); 730 mutex_exit(&entry->ote_lock); 731 goto done; 732 } 733 734 mp = entry->ote_chead; 735 if (mp != NULL) { 736 entry->ote_chead = mp->b_next; 737 mp->b_next = NULL; 738 if (entry->ote_ctail == mp) 739 entry->ote_ctail = entry->ote_chead; 740 entry->ote_mbsize -= msgsize(mp); 741 } 742 if (entry->ote_chead != NULL) { 743 queue = B_TRUE; 744 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; 745 } else { 746 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 747 } 748 mutex_exit(&entry->ote_lock); 749 750 if (queue == B_TRUE) 751 overlay_target_queue(entry); 752 freemsg(mp); 753 754 done: 755 mutex_enter(&entry->ote_ott->ott_lock); 756 entry->ote_ott->ott_ocount--; 757 cv_signal(&entry->ote_ott->ott_cond); 758 mutex_exit(&entry->ote_ott->ott_lock); 759 760 return (0); 761 } 762 763 /* ARGSUSED */ 764 static int 765 overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize, 766 int flags) 767 { 768 overlay_targ_pkt_t *pkt; 769 overlay_targ_pkt32_t *pkt32; 770 771 pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP); 772 *outp = pkt; 773 *bsize = sizeof (overlay_targ_pkt_t); 774 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { 775 uintptr_t addr; 776 777 if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t), 778 flags & FKIOCTL) != 0) { 779 kmem_free(pkt, *bsize); 780 return (EFAULT); 781 } 782 pkt32 = (overlay_targ_pkt32_t *)pkt; 783 addr = pkt32->otp_buf; 784 pkt->otp_buf = (void *)addr; 785 } else { 786 if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) { 787 kmem_free(pkt, *bsize); 788 return (EFAULT); 789 } 790 } 791 return (0); 792 } 793 794 static int 795 overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize, 796 int flags) 797 { 798 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { 799 overlay_targ_pkt_t *pkt = buf; 800 overlay_targ_pkt32_t *pkt32 = buf; 801 uintptr_t addr = (uintptr_t)pkt->otp_buf; 802 pkt32->otp_buf = (caddr32_t)addr; 803 if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t), 804 flags & FKIOCTL) != 0) 805 return (EFAULT); 806 } else { 807 if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0) 808 return (EFAULT); 809 } 810 return (0); 811 } 812 813 static int 814 overlay_target_packet(overlay_target_hdl_t *thdl, void *arg) 815 { 816 overlay_targ_pkt_t *pkt = arg; 817 overlay_target_entry_t *entry; 818 mblk_t *mp; 819 size_t mlen; 820 size_t boff; 821 822 mutex_enter(&thdl->oth_lock); 823 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 824 entry = list_next(&thdl->oth_outstanding, entry)) { 825 if ((uintptr_t)entry == pkt->otp_reqid) 826 break; 827 } 828 829 if (entry == NULL) { 830 mutex_exit(&thdl->oth_lock); 831 return (EINVAL); 832 } 833 mutex_enter(&entry->ote_lock); 834 mutex_exit(&thdl->oth_lock); 835 mp = entry->ote_chead; 836 /* Protect against a rogue varpd */ 837 if (mp == NULL) { 838 mutex_exit(&entry->ote_lock); 839 return (EINVAL); 840 } 841 mlen = MIN(msgsize(mp), pkt->otp_size); 842 pkt->otp_size = mlen; 843 boff = 0; 844 while (mlen > 0) { 845 size_t wlen = MIN(MBLKL(mp), mlen); 846 if (ddi_copyout(mp->b_rptr, 847 (void *)((uintptr_t)pkt->otp_buf + boff), 848 wlen, 0) != 0) { 849 mutex_exit(&entry->ote_lock); 850 return (EFAULT); 851 } 852 mlen -= wlen; 853 boff += wlen; 854 mp = mp->b_cont; 855 } 856 mutex_exit(&entry->ote_lock); 857 return (0); 858 } 859 860 static int 861 overlay_target_inject(overlay_target_hdl_t *thdl, void *arg) 862 { 863 overlay_targ_pkt_t *pkt = arg; 864 overlay_target_entry_t *entry; 865 overlay_dev_t *odd; 866 mblk_t *mp; 867 868 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) 869 return (EINVAL); 870 871 mp = allocb(pkt->otp_size, 0); 872 if (mp == NULL) 873 return (ENOMEM); 874 875 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { 876 freeb(mp); 877 return (EFAULT); 878 } 879 mp->b_wptr += pkt->otp_size; 880 881 if (pkt->otp_linkid != UINT64_MAX) { 882 odd = overlay_hold_by_dlid(pkt->otp_linkid); 883 if (odd == NULL) { 884 freeb(mp); 885 return (ENOENT); 886 } 887 } else { 888 mutex_enter(&thdl->oth_lock); 889 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 890 entry = list_next(&thdl->oth_outstanding, entry)) { 891 if ((uintptr_t)entry == pkt->otp_reqid) 892 break; 893 } 894 895 if (entry == NULL) { 896 mutex_exit(&thdl->oth_lock); 897 freeb(mp); 898 return (ENOENT); 899 } 900 odd = entry->ote_odd; 901 mutex_exit(&thdl->oth_lock); 902 } 903 904 mutex_enter(&odd->odd_lock); 905 if ((odd->odd_flags & OVERLAY_F_MDDROP) || 906 !(odd->odd_flags & OVERLAY_F_IN_MUX)) { 907 /* Can't do receive... */ 908 mutex_exit(&odd->odd_lock); 909 OVERLAY_FREEMSG(mp, "dev dropped"); 910 freeb(mp); 911 return (EBUSY); 912 } 913 overlay_io_start(odd, OVERLAY_F_IN_RX); 914 mutex_exit(&odd->odd_lock); 915 916 mac_rx(odd->odd_mh, NULL, mp); 917 918 mutex_enter(&odd->odd_lock); 919 overlay_io_done(odd, OVERLAY_F_IN_RX); 920 mutex_exit(&odd->odd_lock); 921 922 return (0); 923 } 924 925 static int 926 overlay_target_resend(overlay_target_hdl_t *thdl, void *arg) 927 { 928 overlay_targ_pkt_t *pkt = arg; 929 overlay_target_entry_t *entry; 930 overlay_dev_t *odd; 931 mblk_t *mp; 932 933 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) 934 return (EINVAL); 935 936 mp = allocb(pkt->otp_size, 0); 937 if (mp == NULL) 938 return (ENOMEM); 939 940 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { 941 freeb(mp); 942 return (EFAULT); 943 } 944 mp->b_wptr += pkt->otp_size; 945 946 if (pkt->otp_linkid != UINT64_MAX) { 947 odd = overlay_hold_by_dlid(pkt->otp_linkid); 948 if (odd == NULL) { 949 freeb(mp); 950 return (ENOENT); 951 } 952 } else { 953 mutex_enter(&thdl->oth_lock); 954 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 955 entry = list_next(&thdl->oth_outstanding, entry)) { 956 if ((uintptr_t)entry == pkt->otp_reqid) 957 break; 958 } 959 960 if (entry == NULL) { 961 mutex_exit(&thdl->oth_lock); 962 freeb(mp); 963 return (ENOENT); 964 } 965 odd = entry->ote_odd; 966 mutex_exit(&thdl->oth_lock); 967 } 968 969 mp = overlay_m_tx(odd, mp); 970 freemsgchain(mp); 971 972 return (0); 973 } 974 975 typedef struct overlay_targ_list_int { 976 boolean_t otli_count; 977 uint32_t otli_cur; 978 uint32_t otli_nents; 979 uint32_t otli_ents[]; 980 } overlay_targ_list_int_t; 981 982 static int 983 overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize, 984 int flags) 985 { 986 overlay_targ_list_t n; 987 overlay_targ_list_int_t *otl; 988 989 if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t), 990 flags & FKIOCTL) != 0) 991 return (EFAULT); 992 993 /* 994 */ 995 if (n.otl_nents >= INT32_MAX / sizeof (uint32_t)) 996 return (EINVAL); 997 *bsize = sizeof (overlay_targ_list_int_t) + 998 sizeof (uint32_t) * n.otl_nents; 999 otl = kmem_zalloc(*bsize, KM_SLEEP); 1000 otl->otli_cur = 0; 1001 otl->otli_nents = n.otl_nents; 1002 if (otl->otli_nents != 0) { 1003 otl->otli_count = B_FALSE; 1004 if (ddi_copyin((void *)((uintptr_t)ubuf + 1005 offsetof(overlay_targ_list_t, otl_ents)), 1006 otl->otli_ents, n.otl_nents * sizeof (uint32_t), 1007 flags & FKIOCTL) != 0) { 1008 kmem_free(otl, *bsize); 1009 return (EFAULT); 1010 } 1011 } else { 1012 otl->otli_count = B_TRUE; 1013 } 1014 1015 *outp = otl; 1016 return (0); 1017 } 1018 1019 static int 1020 overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg) 1021 { 1022 overlay_targ_list_int_t *otl = arg; 1023 1024 if (otl->otli_cur < otl->otli_nents) 1025 otl->otli_ents[otl->otli_cur] = odd->odd_linkid; 1026 otl->otli_cur++; 1027 return (0); 1028 } 1029 1030 /* ARGSUSED */ 1031 static int 1032 overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg) 1033 { 1034 overlay_dev_iter(overlay_target_ioctl_list_cb, arg); 1035 return (0); 1036 } 1037 1038 /* ARGSUSED */ 1039 static int 1040 overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags) 1041 { 1042 overlay_targ_list_int_t *otl = buf; 1043 1044 if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t), 1045 flags & FKIOCTL) != 0) 1046 return (EFAULT); 1047 1048 if (otl->otli_count == B_FALSE) { 1049 if (ddi_copyout(otl->otli_ents, 1050 (void *)((uintptr_t)ubuf + 1051 offsetof(overlay_targ_list_t, otl_ents)), 1052 sizeof (uint32_t) * otl->otli_nents, 1053 flags & FKIOCTL) != 0) 1054 return (EFAULT); 1055 } 1056 return (0); 1057 } 1058 1059 /* ARGSUSED */ 1060 static int 1061 overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg) 1062 { 1063 int ret = 0; 1064 overlay_dev_t *odd; 1065 overlay_target_t *ott; 1066 overlay_targ_cache_t *otc = arg; 1067 1068 odd = overlay_hold_by_dlid(otc->otc_linkid); 1069 if (odd == NULL) 1070 return (ENOENT); 1071 1072 mutex_enter(&odd->odd_lock); 1073 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1074 mutex_exit(&odd->odd_lock); 1075 overlay_hold_rele(odd); 1076 return (ENXIO); 1077 } 1078 ott = odd->odd_target; 1079 if (ott->ott_mode != OVERLAY_TARGET_POINT && 1080 ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1081 mutex_exit(&odd->odd_lock); 1082 overlay_hold_rele(odd); 1083 return (ENOTSUP); 1084 } 1085 mutex_enter(&ott->ott_lock); 1086 mutex_exit(&odd->odd_lock); 1087 1088 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 1089 otc->otc_entry.otce_flags = 0; 1090 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest, 1091 sizeof (overlay_target_point_t)); 1092 } else { 1093 overlay_target_entry_t *ote; 1094 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1095 otc->otc_entry.otce_mac); 1096 if (ote != NULL) { 1097 mutex_enter(&ote->ote_lock); 1098 if ((ote->ote_flags & 1099 OVERLAY_ENTRY_F_VALID_MASK) != 0) { 1100 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) { 1101 otc->otc_entry.otce_flags = 1102 OVERLAY_TARGET_CACHE_DROP; 1103 } else { 1104 otc->otc_entry.otce_flags = 0; 1105 bcopy(&ote->ote_dest, 1106 &otc->otc_entry.otce_dest, 1107 sizeof (overlay_target_point_t)); 1108 } 1109 ret = 0; 1110 } else { 1111 ret = ENOENT; 1112 } 1113 mutex_exit(&ote->ote_lock); 1114 } else { 1115 ret = ENOENT; 1116 } 1117 } 1118 1119 mutex_exit(&ott->ott_lock); 1120 overlay_hold_rele(odd); 1121 1122 return (ret); 1123 } 1124 1125 /* ARGSUSED */ 1126 static int 1127 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg) 1128 { 1129 overlay_dev_t *odd; 1130 overlay_target_t *ott; 1131 overlay_target_entry_t *ote; 1132 overlay_targ_cache_t *otc = arg; 1133 mblk_t *mp = NULL; 1134 1135 if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP) 1136 return (EINVAL); 1137 1138 odd = overlay_hold_by_dlid(otc->otc_linkid); 1139 if (odd == NULL) 1140 return (ENOENT); 1141 1142 mutex_enter(&odd->odd_lock); 1143 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1144 mutex_exit(&odd->odd_lock); 1145 overlay_hold_rele(odd); 1146 return (ENXIO); 1147 } 1148 ott = odd->odd_target; 1149 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1150 mutex_exit(&odd->odd_lock); 1151 overlay_hold_rele(odd); 1152 return (ENOTSUP); 1153 } 1154 mutex_enter(&ott->ott_lock); 1155 mutex_exit(&odd->odd_lock); 1156 1157 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1158 otc->otc_entry.otce_mac); 1159 if (ote == NULL) { 1160 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP); 1161 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL); 1162 ote->ote_chead = ote->ote_ctail = NULL; 1163 ote->ote_mbsize = 0; 1164 ote->ote_ott = ott; 1165 ote->ote_odd = odd; 1166 mutex_enter(&ote->ote_lock); 1167 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote); 1168 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote); 1169 } else { 1170 mutex_enter(&ote->ote_lock); 1171 } 1172 1173 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) { 1174 ote->ote_flags |= OVERLAY_ENTRY_F_DROP; 1175 } else { 1176 ote->ote_flags |= OVERLAY_ENTRY_F_VALID; 1177 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest, 1178 sizeof (overlay_target_point_t)); 1179 mp = ote->ote_chead; 1180 ote->ote_chead = NULL; 1181 ote->ote_ctail = NULL; 1182 ote->ote_mbsize = 0; 1183 ote->ote_vtime = gethrtime(); 1184 } 1185 1186 mutex_exit(&ote->ote_lock); 1187 mutex_exit(&ott->ott_lock); 1188 1189 if (mp != NULL) { 1190 mp = overlay_m_tx(ote->ote_odd, mp); 1191 freemsgchain(mp); 1192 } 1193 1194 overlay_hold_rele(odd); 1195 1196 return (0); 1197 } 1198 1199 /* ARGSUSED */ 1200 static int 1201 overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg) 1202 { 1203 int ret = 0; 1204 overlay_dev_t *odd; 1205 overlay_target_t *ott; 1206 overlay_target_entry_t *ote; 1207 overlay_targ_cache_t *otc = arg; 1208 1209 odd = overlay_hold_by_dlid(otc->otc_linkid); 1210 if (odd == NULL) 1211 return (ENOENT); 1212 1213 mutex_enter(&odd->odd_lock); 1214 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1215 mutex_exit(&odd->odd_lock); 1216 overlay_hold_rele(odd); 1217 return (ENXIO); 1218 } 1219 ott = odd->odd_target; 1220 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1221 mutex_exit(&odd->odd_lock); 1222 overlay_hold_rele(odd); 1223 return (ENOTSUP); 1224 } 1225 mutex_enter(&ott->ott_lock); 1226 mutex_exit(&odd->odd_lock); 1227 1228 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1229 otc->otc_entry.otce_mac); 1230 if (ote != NULL) { 1231 mutex_enter(&ote->ote_lock); 1232 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; 1233 mutex_exit(&ote->ote_lock); 1234 ret = 0; 1235 } else { 1236 ret = ENOENT; 1237 } 1238 1239 mutex_exit(&ott->ott_lock); 1240 overlay_hold_rele(odd); 1241 1242 return (ret); 1243 } 1244 1245 /* ARGSUSED */ 1246 static int 1247 overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg) 1248 { 1249 avl_tree_t *avl; 1250 overlay_dev_t *odd; 1251 overlay_target_t *ott; 1252 overlay_target_entry_t *ote; 1253 overlay_targ_cache_t *otc = arg; 1254 1255 odd = overlay_hold_by_dlid(otc->otc_linkid); 1256 if (odd == NULL) 1257 return (ENOENT); 1258 1259 mutex_enter(&odd->odd_lock); 1260 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1261 mutex_exit(&odd->odd_lock); 1262 overlay_hold_rele(odd); 1263 return (ENXIO); 1264 } 1265 ott = odd->odd_target; 1266 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1267 mutex_exit(&odd->odd_lock); 1268 overlay_hold_rele(odd); 1269 return (ENOTSUP); 1270 } 1271 mutex_enter(&ott->ott_lock); 1272 mutex_exit(&odd->odd_lock); 1273 avl = &ott->ott_u.ott_dyn.ott_tree; 1274 1275 for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) { 1276 mutex_enter(&ote->ote_lock); 1277 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; 1278 mutex_exit(&ote->ote_lock); 1279 } 1280 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1281 otc->otc_entry.otce_mac); 1282 1283 mutex_exit(&ott->ott_lock); 1284 overlay_hold_rele(odd); 1285 1286 return (0); 1287 } 1288 1289 static int 1290 overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize, 1291 int flags) 1292 { 1293 overlay_targ_cache_iter_t base, *iter; 1294 1295 if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t), 1296 flags & FKIOCTL) != 0) 1297 return (EFAULT); 1298 1299 if (base.otci_count > OVERLAY_TARGET_ITER_MAX) 1300 return (E2BIG); 1301 1302 if (base.otci_count == 0) 1303 return (EINVAL); 1304 1305 *bsize = sizeof (overlay_targ_cache_iter_t) + 1306 base.otci_count * sizeof (overlay_targ_cache_entry_t); 1307 iter = kmem_alloc(*bsize, KM_SLEEP); 1308 bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t)); 1309 *outp = iter; 1310 1311 return (0); 1312 } 1313 1314 typedef struct overlay_targ_cache_marker { 1315 uint8_t otcm_mac[ETHERADDRL]; 1316 uint16_t otcm_done; 1317 } overlay_targ_cache_marker_t; 1318 1319 /* ARGSUSED */ 1320 static int 1321 overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg) 1322 { 1323 overlay_dev_t *odd; 1324 overlay_target_t *ott; 1325 overlay_target_entry_t lookup, *ent; 1326 overlay_targ_cache_marker_t *mark; 1327 avl_index_t where; 1328 avl_tree_t *avl; 1329 uint16_t written = 0; 1330 1331 overlay_targ_cache_iter_t *iter = arg; 1332 mark = (void *)&iter->otci_marker; 1333 1334 if (mark->otcm_done != 0) { 1335 iter->otci_count = 0; 1336 return (0); 1337 } 1338 1339 odd = overlay_hold_by_dlid(iter->otci_linkid); 1340 if (odd == NULL) 1341 return (ENOENT); 1342 1343 mutex_enter(&odd->odd_lock); 1344 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1345 mutex_exit(&odd->odd_lock); 1346 overlay_hold_rele(odd); 1347 return (ENXIO); 1348 } 1349 ott = odd->odd_target; 1350 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC && 1351 ott->ott_mode != OVERLAY_TARGET_POINT) { 1352 mutex_exit(&odd->odd_lock); 1353 overlay_hold_rele(odd); 1354 return (ENOTSUP); 1355 } 1356 1357 /* 1358 * Holding this lock across the entire iteration probably isn't very 1359 * good. We should perhaps add an r/w lock for the avl tree. But we'll 1360 * wait until we now it's necessary before we do more. 1361 */ 1362 mutex_enter(&ott->ott_lock); 1363 mutex_exit(&odd->odd_lock); 1364 1365 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 1366 overlay_targ_cache_entry_t *out = &iter->otci_ents[0]; 1367 bzero(out->otce_mac, ETHERADDRL); 1368 out->otce_flags = 0; 1369 bcopy(&ott->ott_u.ott_point, &out->otce_dest, 1370 sizeof (overlay_target_point_t)); 1371 written++; 1372 mark->otcm_done = 1; 1373 } 1374 1375 avl = &ott->ott_u.ott_dyn.ott_tree; 1376 bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL); 1377 ent = avl_find(avl, &lookup, &where); 1378 1379 /* 1380 * NULL ent means that the entry does not exist, so we want to start 1381 * with the closest node in the tree. This means that we implicitly rely 1382 * on the tree's order and the first node will be the mac 00:00:00:00:00 1383 * and the last will be ff:ff:ff:ff:ff:ff. 1384 */ 1385 if (ent == NULL) { 1386 ent = avl_nearest(avl, where, AVL_AFTER); 1387 if (ent == NULL) { 1388 mark->otcm_done = 1; 1389 goto done; 1390 } 1391 } 1392 1393 for (; ent != NULL && written < iter->otci_count; 1394 ent = AVL_NEXT(avl, ent)) { 1395 overlay_targ_cache_entry_t *out = &iter->otci_ents[written]; 1396 mutex_enter(&ent->ote_lock); 1397 if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) { 1398 mutex_exit(&ent->ote_lock); 1399 continue; 1400 } 1401 bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL); 1402 out->otce_flags = 0; 1403 if (ent->ote_flags & OVERLAY_ENTRY_F_DROP) 1404 out->otce_flags |= OVERLAY_TARGET_CACHE_DROP; 1405 if (ent->ote_flags & OVERLAY_ENTRY_F_VALID) 1406 bcopy(&ent->ote_dest, &out->otce_dest, 1407 sizeof (overlay_target_point_t)); 1408 written++; 1409 mutex_exit(&ent->ote_lock); 1410 } 1411 1412 if (ent != NULL) { 1413 bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL); 1414 } else { 1415 mark->otcm_done = 1; 1416 } 1417 1418 done: 1419 iter->otci_count = written; 1420 mutex_exit(&ott->ott_lock); 1421 overlay_hold_rele(odd); 1422 1423 return (0); 1424 } 1425 1426 /* ARGSUSED */ 1427 static int 1428 overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize, 1429 int flags) 1430 { 1431 size_t outsize; 1432 const overlay_targ_cache_iter_t *iter = buf; 1433 1434 outsize = sizeof (overlay_targ_cache_iter_t) + 1435 iter->otci_count * sizeof (overlay_targ_cache_entry_t); 1436 1437 if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0) 1438 return (EFAULT); 1439 1440 return (0); 1441 } 1442 1443 static overlay_target_ioctl_t overlay_target_ioctab[] = { 1444 { OVERLAY_TARG_INFO, B_TRUE, B_TRUE, 1445 NULL, overlay_target_info, 1446 NULL, sizeof (overlay_targ_info_t) }, 1447 { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE, 1448 NULL, overlay_target_associate, 1449 NULL, sizeof (overlay_targ_associate_t) }, 1450 { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE, 1451 NULL, overlay_target_disassociate, 1452 NULL, sizeof (overlay_targ_id_t) }, 1453 { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE, 1454 NULL, overlay_target_degrade, 1455 NULL, sizeof (overlay_targ_degrade_t) }, 1456 { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE, 1457 NULL, overlay_target_restore, 1458 NULL, sizeof (overlay_targ_id_t) }, 1459 { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE, 1460 NULL, overlay_target_lookup_request, 1461 NULL, sizeof (overlay_targ_lookup_t) }, 1462 { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE, 1463 NULL, overlay_target_lookup_respond, 1464 NULL, sizeof (overlay_targ_resp_t) }, 1465 { OVERLAY_TARG_DROP, B_TRUE, B_FALSE, 1466 NULL, overlay_target_lookup_drop, 1467 NULL, sizeof (overlay_targ_resp_t) }, 1468 { OVERLAY_TARG_PKT, B_TRUE, B_TRUE, 1469 overlay_target_pkt_copyin, 1470 overlay_target_packet, 1471 overlay_target_pkt_copyout, 1472 sizeof (overlay_targ_pkt_t) }, 1473 { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE, 1474 overlay_target_pkt_copyin, 1475 overlay_target_inject, 1476 NULL, sizeof (overlay_targ_pkt_t) }, 1477 { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE, 1478 overlay_target_pkt_copyin, 1479 overlay_target_resend, 1480 NULL, sizeof (overlay_targ_pkt_t) }, 1481 { OVERLAY_TARG_LIST, B_FALSE, B_TRUE, 1482 overlay_target_list_copyin, 1483 overlay_target_ioctl_list, 1484 overlay_target_list_copyout, 1485 sizeof (overlay_targ_list_t) }, 1486 { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE, 1487 NULL, overlay_target_cache_get, 1488 NULL, sizeof (overlay_targ_cache_t) }, 1489 { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE, 1490 NULL, overlay_target_cache_set, 1491 NULL, sizeof (overlay_targ_cache_t) }, 1492 { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE, 1493 NULL, overlay_target_cache_remove, 1494 NULL, sizeof (overlay_targ_cache_t) }, 1495 { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE, 1496 NULL, overlay_target_cache_flush, 1497 NULL, sizeof (overlay_targ_cache_t) }, 1498 { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE, 1499 overlay_target_cache_iter_copyin, 1500 overlay_target_cache_iter, 1501 overlay_target_cache_iter_copyout, 1502 sizeof (overlay_targ_cache_iter_t) }, 1503 { 0 } 1504 }; 1505 1506 int 1507 overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp) 1508 { 1509 minor_t mid; 1510 overlay_target_hdl_t *thdl; 1511 1512 if (secpolicy_dl_config(credp) != 0) 1513 return (EPERM); 1514 1515 if (getminor(*devp) != 0) 1516 return (ENXIO); 1517 1518 if (otype & OTYP_BLK) 1519 return (EINVAL); 1520 1521 if (flags & ~(FREAD | FWRITE | FEXCL)) 1522 return (EINVAL); 1523 1524 if ((flags & FWRITE) && 1525 !(flags & FEXCL)) 1526 return (EINVAL); 1527 1528 if (!(flags & FREAD) && !(flags & FWRITE)) 1529 return (EINVAL); 1530 1531 if (crgetzoneid(credp) != GLOBAL_ZONEID) 1532 return (EPERM); 1533 1534 mid = id_alloc(overlay_thdl_idspace); 1535 if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) { 1536 id_free(overlay_thdl_idspace, mid); 1537 return (ENXIO); 1538 } 1539 1540 thdl = ddi_get_soft_state(overlay_thdl_state, mid); 1541 VERIFY(thdl != NULL); 1542 thdl->oth_minor = mid; 1543 thdl->oth_zoneid = crgetzoneid(credp); 1544 thdl->oth_oflags = flags; 1545 mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL); 1546 list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t), 1547 offsetof(overlay_target_entry_t, ote_qlink)); 1548 *devp = makedevice(getmajor(*devp), mid); 1549 1550 mutex_enter(&overlay_target_lock); 1551 if ((flags & FEXCL) && overlay_target_excl == B_TRUE) { 1552 mutex_exit(&overlay_target_lock); 1553 list_destroy(&thdl->oth_outstanding); 1554 mutex_destroy(&thdl->oth_lock); 1555 ddi_soft_state_free(overlay_thdl_state, mid); 1556 id_free(overlay_thdl_idspace, mid); 1557 return (EEXIST); 1558 } else if ((flags & FEXCL) != 0) { 1559 VERIFY(overlay_target_excl == B_FALSE); 1560 overlay_target_excl = B_TRUE; 1561 } 1562 list_insert_tail(&overlay_thdl_list, thdl); 1563 mutex_exit(&overlay_target_lock); 1564 1565 return (0); 1566 } 1567 1568 /* ARGSUSED */ 1569 int 1570 overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1571 int *rvalp) 1572 { 1573 overlay_target_ioctl_t *ioc; 1574 overlay_target_hdl_t *thdl; 1575 1576 if (secpolicy_dl_config(credp) != 0) 1577 return (EPERM); 1578 1579 if ((thdl = ddi_get_soft_state(overlay_thdl_state, 1580 getminor(dev))) == NULL) 1581 return (ENXIO); 1582 1583 for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) { 1584 int ret; 1585 caddr_t buf; 1586 size_t bufsize; 1587 1588 if (ioc->oti_cmd != cmd) 1589 continue; 1590 1591 if (ioc->oti_write == B_TRUE && !(mode & FWRITE)) 1592 return (EBADF); 1593 1594 if (ioc->oti_copyin == NULL) { 1595 bufsize = ioc->oti_size; 1596 buf = kmem_alloc(bufsize, KM_SLEEP); 1597 if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize, 1598 mode & FKIOCTL) != 0) { 1599 kmem_free(buf, bufsize); 1600 return (EFAULT); 1601 } 1602 } else { 1603 if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg, 1604 (void **)&buf, &bufsize, mode)) != 0) 1605 return (ret); 1606 } 1607 1608 ret = ioc->oti_func(thdl, buf); 1609 if (ret == 0 && ioc->oti_size != 0 && 1610 ioc->oti_ncopyout == B_TRUE) { 1611 if (ioc->oti_copyout == NULL) { 1612 if (ddi_copyout(buf, (void *)(uintptr_t)arg, 1613 bufsize, mode & FKIOCTL) != 0) 1614 ret = EFAULT; 1615 } else { 1616 ret = ioc->oti_copyout((void *)(uintptr_t)arg, 1617 buf, bufsize, mode); 1618 } 1619 } 1620 1621 kmem_free(buf, bufsize); 1622 return (ret); 1623 } 1624 1625 return (ENOTTY); 1626 } 1627 1628 /* ARGSUSED */ 1629 int 1630 overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp) 1631 { 1632 overlay_target_hdl_t *thdl; 1633 overlay_target_entry_t *entry; 1634 minor_t mid = getminor(dev); 1635 1636 if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL) 1637 return (ENXIO); 1638 1639 mutex_enter(&overlay_target_lock); 1640 list_remove(&overlay_thdl_list, thdl); 1641 mutex_enter(&thdl->oth_lock); 1642 while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL) 1643 list_insert_tail(&overlay_target_list, entry); 1644 cv_signal(&overlay_target_condvar); 1645 mutex_exit(&thdl->oth_lock); 1646 if ((thdl->oth_oflags & FEXCL) != 0) { 1647 VERIFY(overlay_target_excl == B_TRUE); 1648 overlay_target_excl = B_FALSE; 1649 } 1650 mutex_exit(&overlay_target_lock); 1651 1652 list_destroy(&thdl->oth_outstanding); 1653 mutex_destroy(&thdl->oth_lock); 1654 mid = thdl->oth_minor; 1655 ddi_soft_state_free(overlay_thdl_state, mid); 1656 id_free(overlay_thdl_idspace, mid); 1657 1658 return (0); 1659 } 1660