1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Joyent, Inc. 14 */ 15 16 /* 17 * Overlay device target cache management 18 * 19 * For more information, see the big theory statement in 20 * uts/common/io/overlay/overlay.c 21 */ 22 23 #include <sys/types.h> 24 #include <sys/ethernet.h> 25 #include <sys/kmem.h> 26 #include <sys/policy.h> 27 #include <sys/sysmacros.h> 28 #include <sys/stream.h> 29 #include <sys/strsun.h> 30 #include <sys/strsubr.h> 31 #include <sys/mac_provider.h> 32 #include <sys/mac_client.h> 33 #include <sys/mac_client_priv.h> 34 #include <sys/vlan.h> 35 #include <sys/crc32.h> 36 #include <sys/cred.h> 37 #include <sys/file.h> 38 #include <sys/errno.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 42 #include <sys/overlay_impl.h> 43 #include <sys/sdt.h> 44 45 /* 46 * This is total straw man, but at least it's a prime number. Here we're 47 * going to have to go through and do a lot of evaluation and understanding as 48 * to how these target caches should grow and shrink, as well as, memory 49 * pressure and evictions. This just gives us a starting point that'll be 'good 50 * enough', until it's not. 51 */ 52 #define OVERLAY_HSIZE 823 53 54 /* 55 * We use this data structure to keep track of what requests have been actively 56 * allocated to a given instance so we know what to put back on the pending 57 * list. 58 */ 59 typedef struct overlay_target_hdl { 60 minor_t oth_minor; /* RO */ 61 zoneid_t oth_zoneid; /* RO */ 62 int oth_oflags; /* RO */ 63 list_node_t oth_link; /* overlay_target_lock */ 64 kmutex_t oth_lock; 65 list_t oth_outstanding; /* oth_lock */ 66 } overlay_target_hdl_t; 67 68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int); 69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *); 70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int); 71 72 typedef struct overlay_target_ioctl { 73 int oti_cmd; /* ioctl id */ 74 boolean_t oti_write; /* ioctl requires FWRITE */ 75 boolean_t oti_ncopyout; /* copyout data? */ 76 overlay_target_copyin_f oti_copyin; /* copyin func */ 77 overlay_target_ioctl_f oti_func; /* function to call */ 78 overlay_target_copyout_f oti_copyout; /* copyin func */ 79 size_t oti_size; /* size of user level structure */ 80 } overlay_target_ioctl_t; 81 82 static kmem_cache_t *overlay_target_cache; 83 static kmem_cache_t *overlay_entry_cache; 84 static id_space_t *overlay_thdl_idspace; 85 static void *overlay_thdl_state; 86 87 /* 88 * When we support overlay devices in the NGZ, then all of these need to become 89 * zone aware, by plugging into the netstack engine and becoming per-netstack 90 * data. 91 */ 92 static list_t overlay_thdl_list; 93 static kmutex_t overlay_target_lock; 94 static kcondvar_t overlay_target_condvar; 95 static list_t overlay_target_list; 96 static boolean_t overlay_target_excl; 97 98 /* 99 * Outstanding data per hash table entry. 100 */ 101 static int overlay_ent_size = 128 * 1024; 102 103 /* ARGSUSED */ 104 static int 105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs) 106 { 107 overlay_target_t *ott = buf; 108 109 mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL); 110 cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL); 111 return (0); 112 } 113 114 /* ARGSUSED */ 115 static void 116 overlay_target_cache_destructor(void *buf, void *arg) 117 { 118 overlay_target_t *ott = buf; 119 120 cv_destroy(&ott->ott_cond); 121 mutex_destroy(&ott->ott_lock); 122 } 123 124 /* ARGSUSED */ 125 static int 126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs) 127 { 128 overlay_target_entry_t *ote = buf; 129 130 bzero(ote, sizeof (overlay_target_entry_t)); 131 mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL); 132 return (0); 133 } 134 135 /* ARGSUSED */ 136 static void 137 overlay_entry_cache_destructor(void *buf, void *arg) 138 { 139 overlay_target_entry_t *ote = buf; 140 141 mutex_destroy(&ote->ote_lock); 142 } 143 144 static uint64_t 145 overlay_mac_hash(const void *v) 146 { 147 uint32_t crc; 148 CRC32(crc, v, ETHERADDRL, -1U, crc32_table); 149 return (crc); 150 } 151 152 static int 153 overlay_mac_cmp(const void *a, const void *b) 154 { 155 return (bcmp(a, b, ETHERADDRL)); 156 } 157 158 /* ARGSUSED */ 159 static void 160 overlay_target_entry_dtor(void *arg) 161 { 162 overlay_target_entry_t *ote = arg; 163 164 ote->ote_flags = 0; 165 bzero(ote->ote_addr, ETHERADDRL); 166 ote->ote_ott = NULL; 167 ote->ote_odd = NULL; 168 freemsgchain(ote->ote_chead); 169 ote->ote_chead = ote->ote_ctail = NULL; 170 ote->ote_mbsize = 0; 171 ote->ote_vtime = 0; 172 kmem_cache_free(overlay_entry_cache, ote); 173 } 174 175 static int 176 overlay_mac_avl(const void *a, const void *b) 177 { 178 int i; 179 const overlay_target_entry_t *l, *r; 180 l = a; 181 r = b; 182 183 for (i = 0; i < ETHERADDRL; i++) { 184 if (l->ote_addr[i] > r->ote_addr[i]) 185 return (1); 186 else if (l->ote_addr[i] < r->ote_addr[i]) 187 return (-1); 188 } 189 190 return (0); 191 } 192 193 void 194 overlay_target_init(void) 195 { 196 int ret; 197 ret = ddi_soft_state_init(&overlay_thdl_state, 198 sizeof (overlay_target_hdl_t), 1); 199 VERIFY(ret == 0); 200 overlay_target_cache = kmem_cache_create("overlay_target", 201 sizeof (overlay_target_t), 0, overlay_target_cache_constructor, 202 overlay_target_cache_destructor, NULL, NULL, NULL, 0); 203 overlay_entry_cache = kmem_cache_create("overlay_entry", 204 sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor, 205 overlay_entry_cache_destructor, NULL, NULL, NULL, 0); 206 mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL); 207 cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL); 208 list_create(&overlay_target_list, sizeof (overlay_target_entry_t), 209 offsetof(overlay_target_entry_t, ote_qlink)); 210 list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t), 211 offsetof(overlay_target_hdl_t, oth_link)); 212 overlay_thdl_idspace = id_space_create("overlay_target_minors", 213 1, INT32_MAX); 214 } 215 216 void 217 overlay_target_fini(void) 218 { 219 id_space_destroy(overlay_thdl_idspace); 220 list_destroy(&overlay_thdl_list); 221 list_destroy(&overlay_target_list); 222 cv_destroy(&overlay_target_condvar); 223 mutex_destroy(&overlay_target_lock); 224 kmem_cache_destroy(overlay_entry_cache); 225 kmem_cache_destroy(overlay_target_cache); 226 ddi_soft_state_fini(&overlay_thdl_state); 227 } 228 229 void 230 overlay_target_free(overlay_dev_t *odd) 231 { 232 if (odd->odd_target == NULL) 233 return; 234 235 if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) { 236 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash; 237 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree; 238 overlay_target_entry_t *ote; 239 240 /* 241 * Our AVL tree and hashtable contain the same elements, 242 * therefore we should just remove it from the tree, but then 243 * delete the entries when we remove them from the hash table 244 * (which happens through the refhash dtor). 245 */ 246 while ((ote = avl_first(ap)) != NULL) 247 avl_remove(ap, ote); 248 249 avl_destroy(ap); 250 for (ote = refhash_first(rp); ote != NULL; 251 ote = refhash_next(rp, ote)) { 252 refhash_remove(rp, ote); 253 } 254 refhash_destroy(rp); 255 } 256 257 ASSERT(odd->odd_target->ott_ocount == 0); 258 kmem_cache_free(overlay_target_cache, odd->odd_target); 259 } 260 261 int 262 overlay_target_busy() 263 { 264 int ret; 265 266 mutex_enter(&overlay_target_lock); 267 ret = !list_is_empty(&overlay_thdl_list); 268 mutex_exit(&overlay_target_lock); 269 270 return (ret); 271 } 272 273 static void 274 overlay_target_queue(overlay_target_entry_t *entry) 275 { 276 mutex_enter(&overlay_target_lock); 277 mutex_enter(&entry->ote_ott->ott_lock); 278 if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) { 279 mutex_exit(&entry->ote_ott->ott_lock); 280 mutex_exit(&overlay_target_lock); 281 return; 282 } 283 entry->ote_ott->ott_ocount++; 284 mutex_exit(&entry->ote_ott->ott_lock); 285 list_insert_tail(&overlay_target_list, entry); 286 cv_signal(&overlay_target_condvar); 287 mutex_exit(&overlay_target_lock); 288 } 289 290 void 291 overlay_target_quiesce(overlay_target_t *ott) 292 { 293 if (ott == NULL) 294 return; 295 mutex_enter(&ott->ott_lock); 296 ott->ott_flags |= OVERLAY_T_TEARDOWN; 297 while (ott->ott_ocount != 0) 298 cv_wait(&ott->ott_cond, &ott->ott_lock); 299 mutex_exit(&ott->ott_lock); 300 } 301 302 /* 303 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP | 304 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at 305 * this time, say for NVGRE, we drop all packets that mcuh this. 306 */ 307 int 308 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock, 309 socklen_t *slenp) 310 { 311 int ret; 312 struct sockaddr_in6 *v6; 313 overlay_target_t *ott; 314 mac_header_info_t mhi; 315 overlay_target_entry_t *entry; 316 317 ASSERT(odd->odd_target != NULL); 318 319 /* 320 * At this point, the overlay device is in a mux which means that it's 321 * been activated. At this point, parts of the target, such as the mode 322 * and the destination are now read-only and we don't have to worry 323 * about synchronization for them. 324 */ 325 ott = odd->odd_target; 326 if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) 327 return (OVERLAY_TARGET_DROP); 328 329 v6 = (struct sockaddr_in6 *)sock; 330 bzero(v6, sizeof (struct sockaddr_in6)); 331 v6->sin6_family = AF_INET6; 332 333 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 334 mutex_enter(&ott->ott_lock); 335 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr, 336 sizeof (struct in6_addr)); 337 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port); 338 mutex_exit(&ott->ott_lock); 339 *slenp = sizeof (struct sockaddr_in6); 340 341 return (OVERLAY_TARGET_OK); 342 } 343 344 ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC); 345 346 /* 347 * Note we only want the MAC address here, therefore we won't bother 348 * using mac_vlan_header_info(). If any caller needs the vlan info at 349 * this point, this should change to a call to mac_vlan_header_info(). 350 */ 351 if (mac_header_info(odd->odd_mh, mp, &mhi) != 0) 352 return (OVERLAY_TARGET_DROP); 353 mutex_enter(&ott->ott_lock); 354 entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 355 mhi.mhi_daddr); 356 if (entry == NULL) { 357 entry = kmem_cache_alloc(overlay_entry_cache, KM_NOSLEEP_LAZY); 358 if (entry == NULL) { 359 mutex_exit(&ott->ott_lock); 360 return (OVERLAY_TARGET_DROP); 361 } 362 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL); 363 entry->ote_chead = entry->ote_ctail = mp; 364 entry->ote_mbsize = msgsize(mp); 365 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; 366 entry->ote_ott = ott; 367 entry->ote_odd = odd; 368 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry); 369 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry); 370 mutex_exit(&ott->ott_lock); 371 overlay_target_queue(entry); 372 return (OVERLAY_TARGET_ASYNC); 373 } 374 refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry); 375 mutex_exit(&ott->ott_lock); 376 377 mutex_enter(&entry->ote_lock); 378 if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) { 379 ret = OVERLAY_TARGET_DROP; 380 } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 381 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr, 382 sizeof (struct in6_addr)); 383 v6->sin6_port = htons(entry->ote_dest.otp_port); 384 *slenp = sizeof (struct sockaddr_in6); 385 ret = OVERLAY_TARGET_OK; 386 } else { 387 size_t mlen = msgsize(mp); 388 389 if (mlen + entry->ote_mbsize > overlay_ent_size) { 390 ret = OVERLAY_TARGET_DROP; 391 } else { 392 if (entry->ote_ctail != NULL) { 393 ASSERT(entry->ote_ctail->b_next == 394 NULL); 395 entry->ote_ctail->b_next = mp; 396 entry->ote_ctail = mp; 397 } else { 398 entry->ote_chead = mp; 399 entry->ote_ctail = mp; 400 } 401 entry->ote_mbsize += mlen; 402 if ((entry->ote_flags & 403 OVERLAY_ENTRY_F_PENDING) == 0) { 404 entry->ote_flags |= 405 OVERLAY_ENTRY_F_PENDING; 406 overlay_target_queue(entry); 407 } 408 ret = OVERLAY_TARGET_ASYNC; 409 } 410 } 411 mutex_exit(&entry->ote_lock); 412 413 mutex_enter(&ott->ott_lock); 414 refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry); 415 mutex_exit(&ott->ott_lock); 416 417 return (ret); 418 } 419 420 /* ARGSUSED */ 421 static int 422 overlay_target_info(overlay_target_hdl_t *thdl, void *arg) 423 { 424 overlay_dev_t *odd; 425 overlay_targ_info_t *oti = arg; 426 427 odd = overlay_hold_by_dlid(oti->oti_linkid); 428 if (odd == NULL) 429 return (ENOENT); 430 431 mutex_enter(&odd->odd_lock); 432 oti->oti_flags = 0; 433 oti->oti_needs = odd->odd_plugin->ovp_dest; 434 if (odd->odd_flags & OVERLAY_F_DEGRADED) 435 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED; 436 if (odd->odd_flags & OVERLAY_F_ACTIVATED) 437 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE; 438 oti->oti_vnetid = odd->odd_vid; 439 mutex_exit(&odd->odd_lock); 440 overlay_hold_rele(odd); 441 return (0); 442 } 443 444 /* ARGSUSED */ 445 static int 446 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg) 447 { 448 overlay_dev_t *odd; 449 overlay_target_t *ott; 450 overlay_targ_associate_t *ota = arg; 451 452 odd = overlay_hold_by_dlid(ota->ota_linkid); 453 if (odd == NULL) 454 return (ENOENT); 455 456 if (ota->ota_id == 0) { 457 overlay_hold_rele(odd); 458 return (EINVAL); 459 } 460 461 if (ota->ota_mode != OVERLAY_TARGET_POINT && 462 ota->ota_mode != OVERLAY_TARGET_DYNAMIC) { 463 overlay_hold_rele(odd); 464 return (EINVAL); 465 } 466 467 if (ota->ota_provides != odd->odd_plugin->ovp_dest) { 468 overlay_hold_rele(odd); 469 return (EINVAL); 470 } 471 472 if (ota->ota_mode == OVERLAY_TARGET_POINT) { 473 if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) { 474 if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) || 475 IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) || 476 IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) { 477 overlay_hold_rele(odd); 478 return (EINVAL); 479 } 480 } 481 482 if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) { 483 if (ota->ota_point.otp_port == 0) { 484 overlay_hold_rele(odd); 485 return (EINVAL); 486 } 487 } 488 } 489 490 ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP); 491 ott->ott_flags = 0; 492 ott->ott_ocount = 0; 493 ott->ott_mode = ota->ota_mode; 494 ott->ott_dest = ota->ota_provides; 495 ott->ott_id = ota->ota_id; 496 497 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 498 bcopy(&ota->ota_point, &ott->ott_u.ott_point, 499 sizeof (overlay_target_point_t)); 500 } else { 501 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE, 502 overlay_mac_hash, overlay_mac_cmp, 503 overlay_target_entry_dtor, sizeof (overlay_target_entry_t), 504 offsetof(overlay_target_entry_t, ote_reflink), 505 offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP); 506 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl, 507 sizeof (overlay_target_entry_t), 508 offsetof(overlay_target_entry_t, ote_avllink)); 509 } 510 mutex_enter(&odd->odd_lock); 511 if (odd->odd_flags & OVERLAY_F_VARPD) { 512 mutex_exit(&odd->odd_lock); 513 kmem_cache_free(overlay_target_cache, ott); 514 overlay_hold_rele(odd); 515 return (EEXIST); 516 } 517 518 odd->odd_flags |= OVERLAY_F_VARPD; 519 odd->odd_target = ott; 520 mutex_exit(&odd->odd_lock); 521 522 overlay_hold_rele(odd); 523 524 525 return (0); 526 } 527 528 529 /* ARGSUSED */ 530 static int 531 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg) 532 { 533 overlay_dev_t *odd; 534 overlay_targ_degrade_t *otd = arg; 535 536 odd = overlay_hold_by_dlid(otd->otd_linkid); 537 if (odd == NULL) 538 return (ENOENT); 539 540 overlay_fm_degrade(odd, otd->otd_buf); 541 overlay_hold_rele(odd); 542 return (0); 543 } 544 545 /* ARGSUSED */ 546 static int 547 overlay_target_restore(overlay_target_hdl_t *thdl, void *arg) 548 { 549 overlay_dev_t *odd; 550 overlay_targ_id_t *otid = arg; 551 552 odd = overlay_hold_by_dlid(otid->otid_linkid); 553 if (odd == NULL) 554 return (ENOENT); 555 556 overlay_fm_restore(odd); 557 overlay_hold_rele(odd); 558 return (0); 559 } 560 561 /* ARGSUSED */ 562 static int 563 overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg) 564 { 565 overlay_dev_t *odd; 566 overlay_targ_id_t *otid = arg; 567 568 odd = overlay_hold_by_dlid(otid->otid_linkid); 569 if (odd == NULL) 570 return (ENOENT); 571 572 mutex_enter(&odd->odd_lock); 573 odd->odd_flags &= ~OVERLAY_F_VARPD; 574 mutex_exit(&odd->odd_lock); 575 576 overlay_hold_rele(odd); 577 return (0); 578 579 } 580 581 static int 582 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg) 583 { 584 overlay_targ_lookup_t *otl = arg; 585 overlay_target_entry_t *entry; 586 clock_t ret, timeout; 587 mac_header_info_t mhi; 588 589 timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC); 590 again: 591 mutex_enter(&overlay_target_lock); 592 while (list_is_empty(&overlay_target_list)) { 593 ret = cv_timedwait(&overlay_target_condvar, 594 &overlay_target_lock, timeout); 595 if (ret == -1) { 596 mutex_exit(&overlay_target_lock); 597 return (ETIME); 598 } 599 } 600 entry = list_remove_head(&overlay_target_list); 601 mutex_exit(&overlay_target_lock); 602 mutex_enter(&entry->ote_lock); 603 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 604 ASSERT(entry->ote_chead == NULL); 605 mutex_exit(&entry->ote_lock); 606 goto again; 607 } 608 ASSERT(entry->ote_chead != NULL); 609 610 /* 611 * If we have a bogon that doesn't have a valid mac header, drop it and 612 * try again. 613 */ 614 if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead, 615 &mhi) != 0) { 616 boolean_t queue = B_FALSE; 617 mblk_t *mp = entry->ote_chead; 618 entry->ote_chead = mp->b_next; 619 mp->b_next = NULL; 620 if (entry->ote_ctail == mp) 621 entry->ote_ctail = entry->ote_chead; 622 entry->ote_mbsize -= msgsize(mp); 623 if (entry->ote_chead != NULL) 624 queue = B_TRUE; 625 mutex_exit(&entry->ote_lock); 626 if (queue == B_TRUE) 627 overlay_target_queue(entry); 628 freemsg(mp); 629 goto again; 630 } 631 632 otl->otl_dlid = entry->ote_odd->odd_linkid; 633 otl->otl_reqid = (uintptr_t)entry; 634 otl->otl_varpdid = entry->ote_ott->ott_id; 635 otl->otl_vnetid = entry->ote_odd->odd_vid; 636 637 otl->otl_hdrsize = mhi.mhi_hdrsize; 638 otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize; 639 bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL); 640 bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL); 641 otl->otl_dsttype = mhi.mhi_dsttype; 642 otl->otl_sap = mhi.mhi_bindsap; 643 otl->otl_vlan = VLAN_ID(mhi.mhi_tci); 644 mutex_exit(&entry->ote_lock); 645 646 mutex_enter(&thdl->oth_lock); 647 list_insert_tail(&thdl->oth_outstanding, entry); 648 mutex_exit(&thdl->oth_lock); 649 650 return (0); 651 } 652 653 static int 654 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg) 655 { 656 const overlay_targ_resp_t *otr = arg; 657 overlay_target_entry_t *entry; 658 mblk_t *mp; 659 660 mutex_enter(&thdl->oth_lock); 661 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 662 entry = list_next(&thdl->oth_outstanding, entry)) { 663 if ((uintptr_t)entry == otr->otr_reqid) 664 break; 665 } 666 667 if (entry == NULL) { 668 mutex_exit(&thdl->oth_lock); 669 return (EINVAL); 670 } 671 list_remove(&thdl->oth_outstanding, entry); 672 mutex_exit(&thdl->oth_lock); 673 674 mutex_enter(&entry->ote_lock); 675 bcopy(&otr->otr_answer, &entry->ote_dest, 676 sizeof (overlay_target_point_t)); 677 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 678 entry->ote_flags |= OVERLAY_ENTRY_F_VALID; 679 mp = entry->ote_chead; 680 entry->ote_chead = NULL; 681 entry->ote_ctail = NULL; 682 entry->ote_mbsize = 0; 683 entry->ote_vtime = gethrtime(); 684 mutex_exit(&entry->ote_lock); 685 686 /* 687 * For now do an in-situ drain. 688 */ 689 mp = overlay_m_tx(entry->ote_odd, mp); 690 freemsgchain(mp); 691 692 mutex_enter(&entry->ote_ott->ott_lock); 693 entry->ote_ott->ott_ocount--; 694 cv_signal(&entry->ote_ott->ott_cond); 695 mutex_exit(&entry->ote_ott->ott_lock); 696 697 return (0); 698 } 699 700 static int 701 overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg) 702 { 703 const overlay_targ_resp_t *otr = arg; 704 overlay_target_entry_t *entry; 705 mblk_t *mp; 706 boolean_t queue = B_FALSE; 707 708 mutex_enter(&thdl->oth_lock); 709 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 710 entry = list_next(&thdl->oth_outstanding, entry)) { 711 if ((uintptr_t)entry == otr->otr_reqid) 712 break; 713 } 714 715 if (entry == NULL) { 716 mutex_exit(&thdl->oth_lock); 717 return (EINVAL); 718 } 719 list_remove(&thdl->oth_outstanding, entry); 720 mutex_exit(&thdl->oth_lock); 721 722 mutex_enter(&entry->ote_lock); 723 724 /* Safeguard against a confused varpd */ 725 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 726 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 727 DTRACE_PROBE1(overlay__target__valid__drop, 728 overlay_target_entry_t *, entry); 729 mutex_exit(&entry->ote_lock); 730 goto done; 731 } 732 733 mp = entry->ote_chead; 734 if (mp != NULL) { 735 entry->ote_chead = mp->b_next; 736 mp->b_next = NULL; 737 if (entry->ote_ctail == mp) 738 entry->ote_ctail = entry->ote_chead; 739 entry->ote_mbsize -= msgsize(mp); 740 } 741 if (entry->ote_chead != NULL) { 742 queue = B_TRUE; 743 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; 744 } else { 745 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 746 } 747 mutex_exit(&entry->ote_lock); 748 749 if (queue == B_TRUE) 750 overlay_target_queue(entry); 751 freemsg(mp); 752 753 done: 754 mutex_enter(&entry->ote_ott->ott_lock); 755 entry->ote_ott->ott_ocount--; 756 cv_signal(&entry->ote_ott->ott_cond); 757 mutex_exit(&entry->ote_ott->ott_lock); 758 759 return (0); 760 } 761 762 /* ARGSUSED */ 763 static int 764 overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize, 765 int flags) 766 { 767 overlay_targ_pkt_t *pkt; 768 overlay_targ_pkt32_t *pkt32; 769 770 pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP); 771 *outp = pkt; 772 *bsize = sizeof (overlay_targ_pkt_t); 773 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { 774 uintptr_t addr; 775 776 if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t), 777 flags & FKIOCTL) != 0) { 778 kmem_free(pkt, *bsize); 779 return (EFAULT); 780 } 781 pkt32 = (overlay_targ_pkt32_t *)pkt; 782 addr = pkt32->otp_buf; 783 pkt->otp_buf = (void *)addr; 784 } else { 785 if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) { 786 kmem_free(pkt, *bsize); 787 return (EFAULT); 788 } 789 } 790 return (0); 791 } 792 793 static int 794 overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize, 795 int flags) 796 { 797 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { 798 overlay_targ_pkt_t *pkt = buf; 799 overlay_targ_pkt32_t *pkt32 = buf; 800 uintptr_t addr = (uintptr_t)pkt->otp_buf; 801 pkt32->otp_buf = (caddr32_t)addr; 802 if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t), 803 flags & FKIOCTL) != 0) 804 return (EFAULT); 805 } else { 806 if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0) 807 return (EFAULT); 808 } 809 return (0); 810 } 811 812 static int 813 overlay_target_packet(overlay_target_hdl_t *thdl, void *arg) 814 { 815 overlay_targ_pkt_t *pkt = arg; 816 overlay_target_entry_t *entry; 817 mblk_t *mp; 818 size_t mlen; 819 size_t boff; 820 821 mutex_enter(&thdl->oth_lock); 822 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 823 entry = list_next(&thdl->oth_outstanding, entry)) { 824 if ((uintptr_t)entry == pkt->otp_reqid) 825 break; 826 } 827 828 if (entry == NULL) { 829 mutex_exit(&thdl->oth_lock); 830 return (EINVAL); 831 } 832 mutex_enter(&entry->ote_lock); 833 mutex_exit(&thdl->oth_lock); 834 mp = entry->ote_chead; 835 /* Protect against a rogue varpd */ 836 if (mp == NULL) { 837 mutex_exit(&entry->ote_lock); 838 return (EINVAL); 839 } 840 mlen = MIN(msgsize(mp), pkt->otp_size); 841 pkt->otp_size = mlen; 842 boff = 0; 843 while (mlen > 0) { 844 size_t wlen = MIN(MBLKL(mp), mlen); 845 if (ddi_copyout(mp->b_rptr, 846 (void *)((uintptr_t)pkt->otp_buf + boff), 847 wlen, 0) != 0) { 848 mutex_exit(&entry->ote_lock); 849 return (EFAULT); 850 } 851 mlen -= wlen; 852 boff += wlen; 853 mp = mp->b_cont; 854 } 855 mutex_exit(&entry->ote_lock); 856 return (0); 857 } 858 859 static int 860 overlay_target_inject(overlay_target_hdl_t *thdl, void *arg) 861 { 862 overlay_targ_pkt_t *pkt = arg; 863 overlay_target_entry_t *entry; 864 overlay_dev_t *odd; 865 mblk_t *mp; 866 867 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) 868 return (EINVAL); 869 870 mp = allocb(pkt->otp_size, 0); 871 if (mp == NULL) 872 return (ENOMEM); 873 874 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { 875 freeb(mp); 876 return (EFAULT); 877 } 878 mp->b_wptr += pkt->otp_size; 879 880 if (pkt->otp_linkid != UINT64_MAX) { 881 odd = overlay_hold_by_dlid(pkt->otp_linkid); 882 if (odd == NULL) { 883 freeb(mp); 884 return (ENOENT); 885 } 886 } else { 887 mutex_enter(&thdl->oth_lock); 888 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 889 entry = list_next(&thdl->oth_outstanding, entry)) { 890 if ((uintptr_t)entry == pkt->otp_reqid) 891 break; 892 } 893 894 if (entry == NULL) { 895 mutex_exit(&thdl->oth_lock); 896 freeb(mp); 897 return (ENOENT); 898 } 899 odd = entry->ote_odd; 900 mutex_exit(&thdl->oth_lock); 901 } 902 903 mutex_enter(&odd->odd_lock); 904 overlay_io_start(odd, OVERLAY_F_IN_RX); 905 mutex_exit(&odd->odd_lock); 906 907 mac_rx(odd->odd_mh, NULL, mp); 908 909 mutex_enter(&odd->odd_lock); 910 overlay_io_done(odd, OVERLAY_F_IN_RX); 911 mutex_exit(&odd->odd_lock); 912 913 return (0); 914 } 915 916 static int 917 overlay_target_resend(overlay_target_hdl_t *thdl, void *arg) 918 { 919 overlay_targ_pkt_t *pkt = arg; 920 overlay_target_entry_t *entry; 921 overlay_dev_t *odd; 922 mblk_t *mp; 923 924 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) 925 return (EINVAL); 926 927 mp = allocb(pkt->otp_size, 0); 928 if (mp == NULL) 929 return (ENOMEM); 930 931 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { 932 freeb(mp); 933 return (EFAULT); 934 } 935 mp->b_wptr += pkt->otp_size; 936 937 if (pkt->otp_linkid != UINT64_MAX) { 938 odd = overlay_hold_by_dlid(pkt->otp_linkid); 939 if (odd == NULL) { 940 freeb(mp); 941 return (ENOENT); 942 } 943 } else { 944 mutex_enter(&thdl->oth_lock); 945 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 946 entry = list_next(&thdl->oth_outstanding, entry)) { 947 if ((uintptr_t)entry == pkt->otp_reqid) 948 break; 949 } 950 951 if (entry == NULL) { 952 mutex_exit(&thdl->oth_lock); 953 freeb(mp); 954 return (ENOENT); 955 } 956 odd = entry->ote_odd; 957 mutex_exit(&thdl->oth_lock); 958 } 959 960 mp = overlay_m_tx(odd, mp); 961 freemsgchain(mp); 962 963 return (0); 964 } 965 966 typedef struct overlay_targ_list_int { 967 boolean_t otli_count; 968 uint32_t otli_cur; 969 uint32_t otli_nents; 970 uint32_t otli_ents[]; 971 } overlay_targ_list_int_t; 972 973 static int 974 overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize, 975 int flags) 976 { 977 overlay_targ_list_t n; 978 overlay_targ_list_int_t *otl; 979 980 if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t), 981 flags & FKIOCTL) != 0) 982 return (EFAULT); 983 984 /* 985 */ 986 if (n.otl_nents >= INT32_MAX / sizeof (uint32_t)) 987 return (EINVAL); 988 *bsize = sizeof (overlay_targ_list_int_t) + 989 sizeof (uint32_t) * n.otl_nents; 990 otl = kmem_zalloc(*bsize, KM_SLEEP); 991 otl->otli_cur = 0; 992 otl->otli_nents = n.otl_nents; 993 if (otl->otli_nents != 0) { 994 otl->otli_count = B_FALSE; 995 if (ddi_copyin((void *)((uintptr_t)ubuf + 996 offsetof(overlay_targ_list_t, otl_ents)), 997 otl->otli_ents, n.otl_nents * sizeof (uint32_t), 998 flags & FKIOCTL) != 0) { 999 kmem_free(otl, *bsize); 1000 return (EFAULT); 1001 } 1002 } else { 1003 otl->otli_count = B_TRUE; 1004 } 1005 1006 *outp = otl; 1007 return (0); 1008 } 1009 1010 static int 1011 overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg) 1012 { 1013 overlay_targ_list_int_t *otl = arg; 1014 1015 if (otl->otli_cur < otl->otli_nents) 1016 otl->otli_ents[otl->otli_cur] = odd->odd_linkid; 1017 otl->otli_cur++; 1018 return (0); 1019 } 1020 1021 /* ARGSUSED */ 1022 static int 1023 overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg) 1024 { 1025 overlay_dev_iter(overlay_target_ioctl_list_cb, arg); 1026 return (0); 1027 } 1028 1029 /* ARGSUSED */ 1030 static int 1031 overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags) 1032 { 1033 overlay_targ_list_int_t *otl = buf; 1034 1035 if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t), 1036 flags & FKIOCTL) != 0) 1037 return (EFAULT); 1038 1039 if (otl->otli_count == B_FALSE) { 1040 if (ddi_copyout(otl->otli_ents, 1041 (void *)((uintptr_t)ubuf + 1042 offsetof(overlay_targ_list_t, otl_ents)), 1043 sizeof (uint32_t) * otl->otli_nents, 1044 flags & FKIOCTL) != 0) 1045 return (EFAULT); 1046 } 1047 return (0); 1048 } 1049 1050 /* ARGSUSED */ 1051 static int 1052 overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg) 1053 { 1054 int ret = 0; 1055 overlay_dev_t *odd; 1056 overlay_target_t *ott; 1057 overlay_targ_cache_t *otc = arg; 1058 1059 odd = overlay_hold_by_dlid(otc->otc_linkid); 1060 if (odd == NULL) 1061 return (ENOENT); 1062 1063 mutex_enter(&odd->odd_lock); 1064 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1065 mutex_exit(&odd->odd_lock); 1066 overlay_hold_rele(odd); 1067 return (ENXIO); 1068 } 1069 ott = odd->odd_target; 1070 if (ott->ott_mode != OVERLAY_TARGET_POINT && 1071 ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1072 mutex_exit(&odd->odd_lock); 1073 overlay_hold_rele(odd); 1074 return (ENOTSUP); 1075 } 1076 mutex_enter(&ott->ott_lock); 1077 mutex_exit(&odd->odd_lock); 1078 1079 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 1080 otc->otc_entry.otce_flags = 0; 1081 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest, 1082 sizeof (overlay_target_point_t)); 1083 } else { 1084 overlay_target_entry_t *ote; 1085 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1086 otc->otc_entry.otce_mac); 1087 if (ote != NULL) { 1088 mutex_enter(&ote->ote_lock); 1089 if ((ote->ote_flags & 1090 OVERLAY_ENTRY_F_VALID_MASK) != 0) { 1091 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) { 1092 otc->otc_entry.otce_flags = 1093 OVERLAY_TARGET_CACHE_DROP; 1094 } else { 1095 otc->otc_entry.otce_flags = 0; 1096 bcopy(&ote->ote_dest, 1097 &otc->otc_entry.otce_dest, 1098 sizeof (overlay_target_point_t)); 1099 } 1100 ret = 0; 1101 } else { 1102 ret = ENOENT; 1103 } 1104 mutex_exit(&ote->ote_lock); 1105 } else { 1106 ret = ENOENT; 1107 } 1108 } 1109 1110 mutex_exit(&ott->ott_lock); 1111 overlay_hold_rele(odd); 1112 1113 return (ret); 1114 } 1115 1116 /* ARGSUSED */ 1117 static int 1118 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg) 1119 { 1120 overlay_dev_t *odd; 1121 overlay_target_t *ott; 1122 overlay_target_entry_t *ote; 1123 overlay_targ_cache_t *otc = arg; 1124 mblk_t *mp = NULL; 1125 1126 if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP) 1127 return (EINVAL); 1128 1129 odd = overlay_hold_by_dlid(otc->otc_linkid); 1130 if (odd == NULL) 1131 return (ENOENT); 1132 1133 mutex_enter(&odd->odd_lock); 1134 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1135 mutex_exit(&odd->odd_lock); 1136 overlay_hold_rele(odd); 1137 return (ENXIO); 1138 } 1139 ott = odd->odd_target; 1140 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1141 mutex_exit(&odd->odd_lock); 1142 overlay_hold_rele(odd); 1143 return (ENOTSUP); 1144 } 1145 mutex_enter(&ott->ott_lock); 1146 mutex_exit(&odd->odd_lock); 1147 1148 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1149 otc->otc_entry.otce_mac); 1150 if (ote == NULL) { 1151 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP); 1152 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL); 1153 ote->ote_chead = ote->ote_ctail = NULL; 1154 ote->ote_mbsize = 0; 1155 ote->ote_ott = ott; 1156 ote->ote_odd = odd; 1157 mutex_enter(&ote->ote_lock); 1158 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote); 1159 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote); 1160 } else { 1161 mutex_enter(&ote->ote_lock); 1162 } 1163 1164 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) { 1165 ote->ote_flags |= OVERLAY_ENTRY_F_DROP; 1166 } else { 1167 ote->ote_flags |= OVERLAY_ENTRY_F_VALID; 1168 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest, 1169 sizeof (overlay_target_point_t)); 1170 mp = ote->ote_chead; 1171 ote->ote_chead = NULL; 1172 ote->ote_ctail = NULL; 1173 ote->ote_mbsize = 0; 1174 ote->ote_vtime = gethrtime(); 1175 } 1176 1177 mutex_exit(&ote->ote_lock); 1178 mutex_exit(&ott->ott_lock); 1179 1180 if (mp != NULL) { 1181 mp = overlay_m_tx(ote->ote_odd, mp); 1182 freemsgchain(mp); 1183 } 1184 1185 overlay_hold_rele(odd); 1186 1187 return (0); 1188 } 1189 1190 /* ARGSUSED */ 1191 static int 1192 overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg) 1193 { 1194 int ret = 0; 1195 overlay_dev_t *odd; 1196 overlay_target_t *ott; 1197 overlay_target_entry_t *ote; 1198 overlay_targ_cache_t *otc = arg; 1199 1200 odd = overlay_hold_by_dlid(otc->otc_linkid); 1201 if (odd == NULL) 1202 return (ENOENT); 1203 1204 mutex_enter(&odd->odd_lock); 1205 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1206 mutex_exit(&odd->odd_lock); 1207 overlay_hold_rele(odd); 1208 return (ENXIO); 1209 } 1210 ott = odd->odd_target; 1211 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1212 mutex_exit(&odd->odd_lock); 1213 overlay_hold_rele(odd); 1214 return (ENOTSUP); 1215 } 1216 mutex_enter(&ott->ott_lock); 1217 mutex_exit(&odd->odd_lock); 1218 1219 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1220 otc->otc_entry.otce_mac); 1221 if (ote != NULL) { 1222 mutex_enter(&ote->ote_lock); 1223 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; 1224 mutex_exit(&ote->ote_lock); 1225 ret = 0; 1226 } else { 1227 ret = ENOENT; 1228 } 1229 1230 mutex_exit(&ott->ott_lock); 1231 overlay_hold_rele(odd); 1232 1233 return (ret); 1234 } 1235 1236 /* ARGSUSED */ 1237 static int 1238 overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg) 1239 { 1240 avl_tree_t *avl; 1241 overlay_dev_t *odd; 1242 overlay_target_t *ott; 1243 overlay_target_entry_t *ote; 1244 overlay_targ_cache_t *otc = arg; 1245 1246 odd = overlay_hold_by_dlid(otc->otc_linkid); 1247 if (odd == NULL) 1248 return (ENOENT); 1249 1250 mutex_enter(&odd->odd_lock); 1251 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1252 mutex_exit(&odd->odd_lock); 1253 overlay_hold_rele(odd); 1254 return (ENXIO); 1255 } 1256 ott = odd->odd_target; 1257 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1258 mutex_exit(&odd->odd_lock); 1259 overlay_hold_rele(odd); 1260 return (ENOTSUP); 1261 } 1262 mutex_enter(&ott->ott_lock); 1263 mutex_exit(&odd->odd_lock); 1264 avl = &ott->ott_u.ott_dyn.ott_tree; 1265 1266 for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) { 1267 mutex_enter(&ote->ote_lock); 1268 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; 1269 mutex_exit(&ote->ote_lock); 1270 } 1271 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1272 otc->otc_entry.otce_mac); 1273 1274 mutex_exit(&ott->ott_lock); 1275 overlay_hold_rele(odd); 1276 1277 return (0); 1278 } 1279 1280 static int 1281 overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize, 1282 int flags) 1283 { 1284 overlay_targ_cache_iter_t base, *iter; 1285 1286 if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t), 1287 flags & FKIOCTL) != 0) 1288 return (EFAULT); 1289 1290 if (base.otci_count > OVERLAY_TARGET_ITER_MAX) 1291 return (E2BIG); 1292 1293 if (base.otci_count == 0) 1294 return (EINVAL); 1295 1296 *bsize = sizeof (overlay_targ_cache_iter_t) + 1297 base.otci_count * sizeof (overlay_targ_cache_entry_t); 1298 iter = kmem_alloc(*bsize, KM_SLEEP); 1299 bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t)); 1300 *outp = iter; 1301 1302 return (0); 1303 } 1304 1305 typedef struct overlay_targ_cache_marker { 1306 uint8_t otcm_mac[ETHERADDRL]; 1307 uint16_t otcm_done; 1308 } overlay_targ_cache_marker_t; 1309 1310 /* ARGSUSED */ 1311 static int 1312 overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg) 1313 { 1314 overlay_dev_t *odd; 1315 overlay_target_t *ott; 1316 overlay_target_entry_t lookup, *ent; 1317 overlay_targ_cache_marker_t *mark; 1318 avl_index_t where; 1319 avl_tree_t *avl; 1320 uint16_t written = 0; 1321 1322 overlay_targ_cache_iter_t *iter = arg; 1323 mark = (void *)&iter->otci_marker; 1324 1325 if (mark->otcm_done != 0) { 1326 iter->otci_count = 0; 1327 return (0); 1328 } 1329 1330 odd = overlay_hold_by_dlid(iter->otci_linkid); 1331 if (odd == NULL) 1332 return (ENOENT); 1333 1334 mutex_enter(&odd->odd_lock); 1335 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1336 mutex_exit(&odd->odd_lock); 1337 overlay_hold_rele(odd); 1338 return (ENXIO); 1339 } 1340 ott = odd->odd_target; 1341 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC && 1342 ott->ott_mode != OVERLAY_TARGET_POINT) { 1343 mutex_exit(&odd->odd_lock); 1344 overlay_hold_rele(odd); 1345 return (ENOTSUP); 1346 } 1347 1348 /* 1349 * Holding this lock across the entire iteration probably isn't very 1350 * good. We should perhaps add an r/w lock for the avl tree. But we'll 1351 * wait until we now it's necessary before we do more. 1352 */ 1353 mutex_enter(&ott->ott_lock); 1354 mutex_exit(&odd->odd_lock); 1355 1356 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 1357 overlay_targ_cache_entry_t *out = &iter->otci_ents[0]; 1358 bzero(out->otce_mac, ETHERADDRL); 1359 out->otce_flags = 0; 1360 bcopy(&ott->ott_u.ott_point, &out->otce_dest, 1361 sizeof (overlay_target_point_t)); 1362 written++; 1363 mark->otcm_done = 1; 1364 } 1365 1366 avl = &ott->ott_u.ott_dyn.ott_tree; 1367 bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL); 1368 ent = avl_find(avl, &lookup, &where); 1369 1370 /* 1371 * NULL ent means that the entry does not exist, so we want to start 1372 * with the closest node in the tree. This means that we implicitly rely 1373 * on the tree's order and the first node will be the mac 00:00:00:00:00 1374 * and the last will be ff:ff:ff:ff:ff:ff. 1375 */ 1376 if (ent == NULL) { 1377 ent = avl_nearest(avl, where, AVL_AFTER); 1378 if (ent == NULL) { 1379 mark->otcm_done = 1; 1380 goto done; 1381 } 1382 } 1383 1384 for (; ent != NULL && written < iter->otci_count; 1385 ent = AVL_NEXT(avl, ent)) { 1386 overlay_targ_cache_entry_t *out = &iter->otci_ents[written]; 1387 mutex_enter(&ent->ote_lock); 1388 if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) { 1389 mutex_exit(&ent->ote_lock); 1390 continue; 1391 } 1392 bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL); 1393 out->otce_flags = 0; 1394 if (ent->ote_flags & OVERLAY_ENTRY_F_DROP) 1395 out->otce_flags |= OVERLAY_TARGET_CACHE_DROP; 1396 if (ent->ote_flags & OVERLAY_ENTRY_F_VALID) 1397 bcopy(&ent->ote_dest, &out->otce_dest, 1398 sizeof (overlay_target_point_t)); 1399 written++; 1400 mutex_exit(&ent->ote_lock); 1401 } 1402 1403 if (ent != NULL) { 1404 bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL); 1405 } else { 1406 mark->otcm_done = 1; 1407 } 1408 1409 done: 1410 iter->otci_count = written; 1411 mutex_exit(&ott->ott_lock); 1412 overlay_hold_rele(odd); 1413 1414 return (0); 1415 } 1416 1417 /* ARGSUSED */ 1418 static int 1419 overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize, 1420 int flags) 1421 { 1422 size_t outsize; 1423 const overlay_targ_cache_iter_t *iter = buf; 1424 1425 outsize = sizeof (overlay_targ_cache_iter_t) + 1426 iter->otci_count * sizeof (overlay_targ_cache_entry_t); 1427 1428 if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0) 1429 return (EFAULT); 1430 1431 return (0); 1432 } 1433 1434 static overlay_target_ioctl_t overlay_target_ioctab[] = { 1435 { OVERLAY_TARG_INFO, B_TRUE, B_TRUE, 1436 NULL, overlay_target_info, 1437 NULL, sizeof (overlay_targ_info_t) }, 1438 { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE, 1439 NULL, overlay_target_associate, 1440 NULL, sizeof (overlay_targ_associate_t) }, 1441 { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE, 1442 NULL, overlay_target_disassociate, 1443 NULL, sizeof (overlay_targ_id_t) }, 1444 { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE, 1445 NULL, overlay_target_degrade, 1446 NULL, sizeof (overlay_targ_degrade_t) }, 1447 { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE, 1448 NULL, overlay_target_restore, 1449 NULL, sizeof (overlay_targ_id_t) }, 1450 { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE, 1451 NULL, overlay_target_lookup_request, 1452 NULL, sizeof (overlay_targ_lookup_t) }, 1453 { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE, 1454 NULL, overlay_target_lookup_respond, 1455 NULL, sizeof (overlay_targ_resp_t) }, 1456 { OVERLAY_TARG_DROP, B_TRUE, B_FALSE, 1457 NULL, overlay_target_lookup_drop, 1458 NULL, sizeof (overlay_targ_resp_t) }, 1459 { OVERLAY_TARG_PKT, B_TRUE, B_TRUE, 1460 overlay_target_pkt_copyin, 1461 overlay_target_packet, 1462 overlay_target_pkt_copyout, 1463 sizeof (overlay_targ_pkt_t) }, 1464 { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE, 1465 overlay_target_pkt_copyin, 1466 overlay_target_inject, 1467 NULL, sizeof (overlay_targ_pkt_t) }, 1468 { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE, 1469 overlay_target_pkt_copyin, 1470 overlay_target_resend, 1471 NULL, sizeof (overlay_targ_pkt_t) }, 1472 { OVERLAY_TARG_LIST, B_FALSE, B_TRUE, 1473 overlay_target_list_copyin, 1474 overlay_target_ioctl_list, 1475 overlay_target_list_copyout, 1476 sizeof (overlay_targ_list_t) }, 1477 { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE, 1478 NULL, overlay_target_cache_get, 1479 NULL, sizeof (overlay_targ_cache_t) }, 1480 { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE, 1481 NULL, overlay_target_cache_set, 1482 NULL, sizeof (overlay_targ_cache_t) }, 1483 { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE, 1484 NULL, overlay_target_cache_remove, 1485 NULL, sizeof (overlay_targ_cache_t) }, 1486 { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE, 1487 NULL, overlay_target_cache_flush, 1488 NULL, sizeof (overlay_targ_cache_t) }, 1489 { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE, 1490 overlay_target_cache_iter_copyin, 1491 overlay_target_cache_iter, 1492 overlay_target_cache_iter_copyout, 1493 sizeof (overlay_targ_cache_iter_t) }, 1494 { 0 } 1495 }; 1496 1497 int 1498 overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp) 1499 { 1500 minor_t mid; 1501 overlay_target_hdl_t *thdl; 1502 1503 if (secpolicy_dl_config(credp) != 0) 1504 return (EPERM); 1505 1506 if (getminor(*devp) != 0) 1507 return (ENXIO); 1508 1509 if (otype & OTYP_BLK) 1510 return (EINVAL); 1511 1512 if (flags & ~(FREAD | FWRITE | FEXCL)) 1513 return (EINVAL); 1514 1515 if ((flags & FWRITE) && 1516 !(flags & FEXCL)) 1517 return (EINVAL); 1518 1519 if (!(flags & FREAD) && !(flags & FWRITE)) 1520 return (EINVAL); 1521 1522 if (crgetzoneid(credp) != GLOBAL_ZONEID) 1523 return (EPERM); 1524 1525 mid = id_alloc(overlay_thdl_idspace); 1526 if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) { 1527 id_free(overlay_thdl_idspace, mid); 1528 return (ENXIO); 1529 } 1530 1531 thdl = ddi_get_soft_state(overlay_thdl_state, mid); 1532 VERIFY(thdl != NULL); 1533 thdl->oth_minor = mid; 1534 thdl->oth_zoneid = crgetzoneid(credp); 1535 thdl->oth_oflags = flags; 1536 mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL); 1537 list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t), 1538 offsetof(overlay_target_entry_t, ote_qlink)); 1539 *devp = makedevice(getmajor(*devp), mid); 1540 1541 mutex_enter(&overlay_target_lock); 1542 if ((flags & FEXCL) && overlay_target_excl == B_TRUE) { 1543 mutex_exit(&overlay_target_lock); 1544 list_destroy(&thdl->oth_outstanding); 1545 mutex_destroy(&thdl->oth_lock); 1546 ddi_soft_state_free(overlay_thdl_state, mid); 1547 id_free(overlay_thdl_idspace, mid); 1548 return (EEXIST); 1549 } else if ((flags & FEXCL) != 0) { 1550 VERIFY(overlay_target_excl == B_FALSE); 1551 overlay_target_excl = B_TRUE; 1552 } 1553 list_insert_tail(&overlay_thdl_list, thdl); 1554 mutex_exit(&overlay_target_lock); 1555 1556 return (0); 1557 } 1558 1559 /* ARGSUSED */ 1560 int 1561 overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1562 int *rvalp) 1563 { 1564 overlay_target_ioctl_t *ioc; 1565 overlay_target_hdl_t *thdl; 1566 1567 if (secpolicy_dl_config(credp) != 0) 1568 return (EPERM); 1569 1570 if ((thdl = ddi_get_soft_state(overlay_thdl_state, 1571 getminor(dev))) == NULL) 1572 return (ENXIO); 1573 1574 for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) { 1575 int ret; 1576 caddr_t buf; 1577 size_t bufsize; 1578 1579 if (ioc->oti_cmd != cmd) 1580 continue; 1581 1582 if (ioc->oti_write == B_TRUE && !(mode & FWRITE)) 1583 return (EBADF); 1584 1585 if (ioc->oti_copyin == NULL) { 1586 bufsize = ioc->oti_size; 1587 buf = kmem_alloc(bufsize, KM_SLEEP); 1588 if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize, 1589 mode & FKIOCTL) != 0) { 1590 kmem_free(buf, bufsize); 1591 return (EFAULT); 1592 } 1593 } else { 1594 if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg, 1595 (void **)&buf, &bufsize, mode)) != 0) 1596 return (ret); 1597 } 1598 1599 ret = ioc->oti_func(thdl, buf); 1600 if (ret == 0 && ioc->oti_size != 0 && 1601 ioc->oti_ncopyout == B_TRUE) { 1602 if (ioc->oti_copyout == NULL) { 1603 if (ddi_copyout(buf, (void *)(uintptr_t)arg, 1604 bufsize, mode & FKIOCTL) != 0) 1605 ret = EFAULT; 1606 } else { 1607 ret = ioc->oti_copyout((void *)(uintptr_t)arg, 1608 buf, bufsize, mode); 1609 } 1610 } 1611 1612 kmem_free(buf, bufsize); 1613 return (ret); 1614 } 1615 1616 return (ENOTTY); 1617 } 1618 1619 /* ARGSUSED */ 1620 int 1621 overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp) 1622 { 1623 overlay_target_hdl_t *thdl; 1624 overlay_target_entry_t *entry; 1625 minor_t mid = getminor(dev); 1626 1627 if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL) 1628 return (ENXIO); 1629 1630 mutex_enter(&overlay_target_lock); 1631 list_remove(&overlay_thdl_list, thdl); 1632 mutex_enter(&thdl->oth_lock); 1633 while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL) 1634 list_insert_tail(&overlay_target_list, entry); 1635 cv_signal(&overlay_target_condvar); 1636 mutex_exit(&thdl->oth_lock); 1637 if ((thdl->oth_oflags & FEXCL) != 0) { 1638 VERIFY(overlay_target_excl == B_TRUE); 1639 overlay_target_excl = B_FALSE; 1640 } 1641 mutex_exit(&overlay_target_lock); 1642 1643 list_destroy(&thdl->oth_outstanding); 1644 mutex_destroy(&thdl->oth_lock); 1645 mid = thdl->oth_minor; 1646 ddi_soft_state_free(overlay_thdl_state, mid); 1647 id_free(overlay_thdl_idspace, mid); 1648 1649 return (0); 1650 } 1651