1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Joyent, Inc. 14 */ 15 16 /* 17 * Overlay device target cache management 18 * 19 * For more information, see the big theory statement in 20 * uts/common/io/overlay/overlay.c 21 */ 22 23 #include <sys/types.h> 24 #include <sys/ethernet.h> 25 #include <sys/kmem.h> 26 #include <sys/policy.h> 27 #include <sys/sysmacros.h> 28 #include <sys/stream.h> 29 #include <sys/strsun.h> 30 #include <sys/strsubr.h> 31 #include <sys/mac_provider.h> 32 #include <sys/mac_client.h> 33 #include <sys/mac_client_priv.h> 34 #include <sys/vlan.h> 35 #include <sys/crc32.h> 36 #include <sys/cred.h> 37 #include <sys/file.h> 38 #include <sys/errno.h> 39 #include <sys/ddi.h> 40 #include <sys/sunddi.h> 41 42 #include <sys/overlay_impl.h> 43 #include <sys/sdt.h> 44 45 /* 46 * This is total straw man, but at least it's a prime number. Here we're 47 * going to have to go through and do a lot of evaluation and understanding as 48 * to how these target caches should grow and shrink, as well as, memory 49 * pressure and evictions. This just gives us a starting point that'll be 'good 50 * enough', until it's not. 51 */ 52 #define OVERLAY_HSIZE 823 53 54 /* 55 * We use this data structure to keep track of what requests have been actively 56 * allocated to a given instance so we know what to put back on the pending 57 * list. 58 */ 59 typedef struct overlay_target_hdl { 60 minor_t oth_minor; /* RO */ 61 zoneid_t oth_zoneid; /* RO */ 62 int oth_oflags; /* RO */ 63 list_node_t oth_link; /* overlay_target_lock */ 64 kmutex_t oth_lock; 65 list_t oth_outstanding; /* oth_lock */ 66 } overlay_target_hdl_t; 67 68 typedef int (*overlay_target_copyin_f)(const void *, void **, size_t *, int); 69 typedef int (*overlay_target_ioctl_f)(overlay_target_hdl_t *, void *); 70 typedef int (*overlay_target_copyout_f)(void *, void *, size_t, int); 71 72 typedef struct overlay_target_ioctl { 73 int oti_cmd; /* ioctl id */ 74 boolean_t oti_write; /* ioctl requires FWRITE */ 75 boolean_t oti_ncopyout; /* copyout data? */ 76 overlay_target_copyin_f oti_copyin; /* copyin func */ 77 overlay_target_ioctl_f oti_func; /* function to call */ 78 overlay_target_copyout_f oti_copyout; /* copyin func */ 79 size_t oti_size; /* size of user level structure */ 80 } overlay_target_ioctl_t; 81 82 static kmem_cache_t *overlay_target_cache; 83 static kmem_cache_t *overlay_entry_cache; 84 static id_space_t *overlay_thdl_idspace; 85 static void *overlay_thdl_state; 86 87 /* 88 * When we support overlay devices in the NGZ, then all of these need to become 89 * zone aware, by plugging into the netstack engine and becoming per-netstack 90 * data. 91 */ 92 static list_t overlay_thdl_list; 93 static kmutex_t overlay_target_lock; 94 static kcondvar_t overlay_target_condvar; 95 static list_t overlay_target_list; 96 static boolean_t overlay_target_excl; 97 98 /* 99 * Outstanding data per hash table entry. 100 */ 101 static int overlay_ent_size = 128 * 1024; 102 103 /* ARGSUSED */ 104 static int 105 overlay_target_cache_constructor(void *buf, void *arg, int kmflgs) 106 { 107 overlay_target_t *ott = buf; 108 109 mutex_init(&ott->ott_lock, NULL, MUTEX_DRIVER, NULL); 110 cv_init(&ott->ott_cond, NULL, CV_DRIVER, NULL); 111 return (0); 112 } 113 114 /* ARGSUSED */ 115 static void 116 overlay_target_cache_destructor(void *buf, void *arg) 117 { 118 overlay_target_t *ott = buf; 119 120 cv_destroy(&ott->ott_cond); 121 mutex_destroy(&ott->ott_lock); 122 } 123 124 /* ARGSUSED */ 125 static int 126 overlay_entry_cache_constructor(void *buf, void *arg, int kmflgs) 127 { 128 overlay_target_entry_t *ote = buf; 129 130 bzero(ote, sizeof (overlay_target_entry_t)); 131 mutex_init(&ote->ote_lock, NULL, MUTEX_DRIVER, NULL); 132 return (0); 133 } 134 135 /* ARGSUSED */ 136 static void 137 overlay_entry_cache_destructor(void *buf, void *arg) 138 { 139 overlay_target_entry_t *ote = buf; 140 141 mutex_destroy(&ote->ote_lock); 142 } 143 144 static uint64_t 145 overlay_mac_hash(const void *v) 146 { 147 uint32_t crc; 148 CRC32(crc, v, ETHERADDRL, -1U, crc32_table); 149 return (crc); 150 } 151 152 static int 153 overlay_mac_cmp(const void *a, const void *b) 154 { 155 return (bcmp(a, b, ETHERADDRL)); 156 } 157 158 /* ARGSUSED */ 159 static void 160 overlay_target_entry_dtor(void *arg) 161 { 162 overlay_target_entry_t *ote = arg; 163 164 ote->ote_flags = 0; 165 bzero(ote->ote_addr, ETHERADDRL); 166 ote->ote_ott = NULL; 167 ote->ote_odd = NULL; 168 freemsgchain(ote->ote_chead); 169 ote->ote_chead = ote->ote_ctail = NULL; 170 ote->ote_mbsize = 0; 171 ote->ote_vtime = 0; 172 kmem_cache_free(overlay_entry_cache, ote); 173 } 174 175 static int 176 overlay_mac_avl(const void *a, const void *b) 177 { 178 int i; 179 const overlay_target_entry_t *l, *r; 180 l = a; 181 r = b; 182 183 for (i = 0; i < ETHERADDRL; i++) { 184 if (l->ote_addr[i] > r->ote_addr[i]) 185 return (1); 186 else if (l->ote_addr[i] < r->ote_addr[i]) 187 return (-1); 188 } 189 190 return (0); 191 } 192 193 void 194 overlay_target_init(void) 195 { 196 int ret; 197 ret = ddi_soft_state_init(&overlay_thdl_state, 198 sizeof (overlay_target_hdl_t), 1); 199 VERIFY(ret == 0); 200 overlay_target_cache = kmem_cache_create("overlay_target", 201 sizeof (overlay_target_t), 0, overlay_target_cache_constructor, 202 overlay_target_cache_destructor, NULL, NULL, NULL, 0); 203 overlay_entry_cache = kmem_cache_create("overlay_entry", 204 sizeof (overlay_target_entry_t), 0, overlay_entry_cache_constructor, 205 overlay_entry_cache_destructor, NULL, NULL, NULL, 0); 206 mutex_init(&overlay_target_lock, NULL, MUTEX_DRIVER, NULL); 207 cv_init(&overlay_target_condvar, NULL, CV_DRIVER, NULL); 208 list_create(&overlay_target_list, sizeof (overlay_target_entry_t), 209 offsetof(overlay_target_entry_t, ote_qlink)); 210 list_create(&overlay_thdl_list, sizeof (overlay_target_hdl_t), 211 offsetof(overlay_target_hdl_t, oth_link)); 212 overlay_thdl_idspace = id_space_create("overlay_target_minors", 213 1, INT32_MAX); 214 } 215 216 void 217 overlay_target_fini(void) 218 { 219 id_space_destroy(overlay_thdl_idspace); 220 list_destroy(&overlay_thdl_list); 221 list_destroy(&overlay_target_list); 222 cv_destroy(&overlay_target_condvar); 223 mutex_destroy(&overlay_target_lock); 224 kmem_cache_destroy(overlay_entry_cache); 225 kmem_cache_destroy(overlay_target_cache); 226 ddi_soft_state_fini(&overlay_thdl_state); 227 } 228 229 void 230 overlay_target_free(overlay_dev_t *odd) 231 { 232 if (odd->odd_target == NULL) 233 return; 234 235 if (odd->odd_target->ott_mode == OVERLAY_TARGET_DYNAMIC) { 236 refhash_t *rp = odd->odd_target->ott_u.ott_dyn.ott_dhash; 237 avl_tree_t *ap = &odd->odd_target->ott_u.ott_dyn.ott_tree; 238 overlay_target_entry_t *ote; 239 240 /* 241 * Our AVL tree and hashtable contain the same elements, 242 * therefore we should just remove it from the tree, but then 243 * delete the entries when we remove them from the hash table 244 * (which happens through the refhash dtor). 245 */ 246 while ((ote = avl_first(ap)) != NULL) 247 avl_remove(ap, ote); 248 249 avl_destroy(ap); 250 for (ote = refhash_first(rp); ote != NULL; 251 ote = refhash_next(rp, ote)) { 252 refhash_remove(rp, ote); 253 } 254 refhash_destroy(rp); 255 } 256 257 ASSERT(odd->odd_target->ott_ocount == 0); 258 kmem_cache_free(overlay_target_cache, odd->odd_target); 259 } 260 261 int 262 overlay_target_busy() 263 { 264 int ret; 265 266 mutex_enter(&overlay_target_lock); 267 ret = !list_is_empty(&overlay_thdl_list); 268 mutex_exit(&overlay_target_lock); 269 270 return (ret); 271 } 272 273 static void 274 overlay_target_queue(overlay_target_entry_t *entry) 275 { 276 mutex_enter(&overlay_target_lock); 277 mutex_enter(&entry->ote_ott->ott_lock); 278 if (entry->ote_ott->ott_flags & OVERLAY_T_TEARDOWN) { 279 mutex_exit(&entry->ote_ott->ott_lock); 280 mutex_exit(&overlay_target_lock); 281 return; 282 } 283 entry->ote_ott->ott_ocount++; 284 mutex_exit(&entry->ote_ott->ott_lock); 285 list_insert_tail(&overlay_target_list, entry); 286 cv_signal(&overlay_target_condvar); 287 mutex_exit(&overlay_target_lock); 288 } 289 290 void 291 overlay_target_quiesce(overlay_target_t *ott) 292 { 293 if (ott == NULL) 294 return; 295 mutex_enter(&ott->ott_lock); 296 ott->ott_flags |= OVERLAY_T_TEARDOWN; 297 while (ott->ott_ocount != 0) 298 cv_wait(&ott->ott_cond, &ott->ott_lock); 299 mutex_exit(&ott->ott_lock); 300 } 301 302 /* 303 * This functions assumes that the destination mode is OVERLAY_PLUGIN_D_IP | 304 * OVERLAY_PLUGIN_D_PORT. As we don't have an implementation of anything else at 305 * this time, say for NVGRE, we drop all packets that mcuh this. 306 */ 307 int 308 overlay_target_lookup(overlay_dev_t *odd, mblk_t *mp, struct sockaddr *sock, 309 socklen_t *slenp) 310 { 311 int ret; 312 struct sockaddr_in6 *v6; 313 overlay_target_t *ott; 314 mac_header_info_t mhi; 315 overlay_target_entry_t *entry; 316 317 ASSERT(odd->odd_target != NULL); 318 319 /* 320 * At this point, the overlay device is in a mux which means that it's 321 * been activated. At this point, parts of the target, such as the mode 322 * and the destination are now read-only and we don't have to worry 323 * about synchronization for them. 324 */ 325 ott = odd->odd_target; 326 if (ott->ott_dest != (OVERLAY_PLUGIN_D_IP | OVERLAY_PLUGIN_D_PORT)) 327 return (OVERLAY_TARGET_DROP); 328 329 v6 = (struct sockaddr_in6 *)sock; 330 bzero(v6, sizeof (struct sockaddr_in6)); 331 v6->sin6_family = AF_INET6; 332 333 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 334 mutex_enter(&ott->ott_lock); 335 bcopy(&ott->ott_u.ott_point.otp_ip, &v6->sin6_addr, 336 sizeof (struct in6_addr)); 337 v6->sin6_port = htons(ott->ott_u.ott_point.otp_port); 338 mutex_exit(&ott->ott_lock); 339 *slenp = sizeof (struct sockaddr_in6); 340 341 return (OVERLAY_TARGET_OK); 342 } 343 344 ASSERT(ott->ott_mode == OVERLAY_TARGET_DYNAMIC); 345 346 /* 347 * Note we only want the MAC address here, therefore we won't bother 348 * using mac_vlan_header_info(). If any caller needs the vlan info at 349 * this point, this should change to a call to mac_vlan_header_info(). 350 */ 351 if (mac_header_info(odd->odd_mh, mp, &mhi) != 0) 352 return (OVERLAY_TARGET_DROP); 353 mutex_enter(&ott->ott_lock); 354 entry = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 355 mhi.mhi_daddr); 356 if (entry == NULL) { 357 entry = kmem_cache_alloc(overlay_entry_cache, 358 KM_NOSLEEP | KM_NORMALPRI); 359 if (entry == NULL) { 360 mutex_exit(&ott->ott_lock); 361 return (OVERLAY_TARGET_DROP); 362 } 363 bcopy(mhi.mhi_daddr, entry->ote_addr, ETHERADDRL); 364 entry->ote_chead = entry->ote_ctail = mp; 365 entry->ote_mbsize = msgsize(mp); 366 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; 367 entry->ote_ott = ott; 368 entry->ote_odd = odd; 369 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, entry); 370 avl_add(&ott->ott_u.ott_dyn.ott_tree, entry); 371 mutex_exit(&ott->ott_lock); 372 overlay_target_queue(entry); 373 return (OVERLAY_TARGET_ASYNC); 374 } 375 refhash_hold(ott->ott_u.ott_dyn.ott_dhash, entry); 376 mutex_exit(&ott->ott_lock); 377 378 mutex_enter(&entry->ote_lock); 379 if (entry->ote_flags & OVERLAY_ENTRY_F_DROP) { 380 ret = OVERLAY_TARGET_DROP; 381 } else if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 382 bcopy(&entry->ote_dest.otp_ip, &v6->sin6_addr, 383 sizeof (struct in6_addr)); 384 v6->sin6_port = htons(entry->ote_dest.otp_port); 385 *slenp = sizeof (struct sockaddr_in6); 386 ret = OVERLAY_TARGET_OK; 387 } else { 388 size_t mlen = msgsize(mp); 389 390 if (mlen + entry->ote_mbsize > overlay_ent_size) { 391 ret = OVERLAY_TARGET_DROP; 392 } else { 393 if (entry->ote_ctail != NULL) { 394 ASSERT(entry->ote_ctail->b_next == 395 NULL); 396 entry->ote_ctail->b_next = mp; 397 entry->ote_ctail = mp; 398 } else { 399 entry->ote_chead = mp; 400 entry->ote_ctail = mp; 401 } 402 entry->ote_mbsize += mlen; 403 if ((entry->ote_flags & 404 OVERLAY_ENTRY_F_PENDING) == 0) { 405 entry->ote_flags |= 406 OVERLAY_ENTRY_F_PENDING; 407 overlay_target_queue(entry); 408 } 409 ret = OVERLAY_TARGET_ASYNC; 410 } 411 } 412 mutex_exit(&entry->ote_lock); 413 414 mutex_enter(&ott->ott_lock); 415 refhash_rele(ott->ott_u.ott_dyn.ott_dhash, entry); 416 mutex_exit(&ott->ott_lock); 417 418 return (ret); 419 } 420 421 /* ARGSUSED */ 422 static int 423 overlay_target_info(overlay_target_hdl_t *thdl, void *arg) 424 { 425 overlay_dev_t *odd; 426 overlay_targ_info_t *oti = arg; 427 428 odd = overlay_hold_by_dlid(oti->oti_linkid); 429 if (odd == NULL) 430 return (ENOENT); 431 432 mutex_enter(&odd->odd_lock); 433 oti->oti_flags = 0; 434 oti->oti_needs = odd->odd_plugin->ovp_dest; 435 if (odd->odd_flags & OVERLAY_F_DEGRADED) 436 oti->oti_flags |= OVERLAY_TARG_INFO_F_DEGRADED; 437 if (odd->odd_flags & OVERLAY_F_ACTIVATED) 438 oti->oti_flags |= OVERLAY_TARG_INFO_F_ACTIVE; 439 oti->oti_vnetid = odd->odd_vid; 440 mutex_exit(&odd->odd_lock); 441 overlay_hold_rele(odd); 442 return (0); 443 } 444 445 /* ARGSUSED */ 446 static int 447 overlay_target_associate(overlay_target_hdl_t *thdl, void *arg) 448 { 449 overlay_dev_t *odd; 450 overlay_target_t *ott; 451 overlay_targ_associate_t *ota = arg; 452 453 odd = overlay_hold_by_dlid(ota->ota_linkid); 454 if (odd == NULL) 455 return (ENOENT); 456 457 if (ota->ota_id == 0) { 458 overlay_hold_rele(odd); 459 return (EINVAL); 460 } 461 462 if (ota->ota_mode != OVERLAY_TARGET_POINT && 463 ota->ota_mode != OVERLAY_TARGET_DYNAMIC) { 464 overlay_hold_rele(odd); 465 return (EINVAL); 466 } 467 468 if (ota->ota_provides != odd->odd_plugin->ovp_dest) { 469 overlay_hold_rele(odd); 470 return (EINVAL); 471 } 472 473 if (ota->ota_mode == OVERLAY_TARGET_POINT) { 474 if (ota->ota_provides & OVERLAY_PLUGIN_D_IP) { 475 if (IN6_IS_ADDR_UNSPECIFIED(&ota->ota_point.otp_ip) || 476 IN6_IS_ADDR_V4COMPAT(&ota->ota_point.otp_ip) || 477 IN6_IS_ADDR_V4MAPPED_ANY(&ota->ota_point.otp_ip)) { 478 overlay_hold_rele(odd); 479 return (EINVAL); 480 } 481 } 482 483 if (ota->ota_provides & OVERLAY_PLUGIN_D_PORT) { 484 if (ota->ota_point.otp_port == 0) { 485 overlay_hold_rele(odd); 486 return (EINVAL); 487 } 488 } 489 } 490 491 ott = kmem_cache_alloc(overlay_target_cache, KM_SLEEP); 492 ott->ott_flags = 0; 493 ott->ott_ocount = 0; 494 ott->ott_mode = ota->ota_mode; 495 ott->ott_dest = ota->ota_provides; 496 ott->ott_id = ota->ota_id; 497 498 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 499 bcopy(&ota->ota_point, &ott->ott_u.ott_point, 500 sizeof (overlay_target_point_t)); 501 } else { 502 ott->ott_u.ott_dyn.ott_dhash = refhash_create(OVERLAY_HSIZE, 503 overlay_mac_hash, overlay_mac_cmp, 504 overlay_target_entry_dtor, sizeof (overlay_target_entry_t), 505 offsetof(overlay_target_entry_t, ote_reflink), 506 offsetof(overlay_target_entry_t, ote_addr), KM_SLEEP); 507 avl_create(&ott->ott_u.ott_dyn.ott_tree, overlay_mac_avl, 508 sizeof (overlay_target_entry_t), 509 offsetof(overlay_target_entry_t, ote_avllink)); 510 } 511 mutex_enter(&odd->odd_lock); 512 if (odd->odd_flags & OVERLAY_F_VARPD) { 513 mutex_exit(&odd->odd_lock); 514 kmem_cache_free(overlay_target_cache, ott); 515 overlay_hold_rele(odd); 516 return (EEXIST); 517 } 518 519 odd->odd_flags |= OVERLAY_F_VARPD; 520 odd->odd_target = ott; 521 mutex_exit(&odd->odd_lock); 522 523 overlay_hold_rele(odd); 524 525 526 return (0); 527 } 528 529 530 /* ARGSUSED */ 531 static int 532 overlay_target_degrade(overlay_target_hdl_t *thdl, void *arg) 533 { 534 overlay_dev_t *odd; 535 overlay_targ_degrade_t *otd = arg; 536 537 odd = overlay_hold_by_dlid(otd->otd_linkid); 538 if (odd == NULL) 539 return (ENOENT); 540 541 overlay_fm_degrade(odd, otd->otd_buf); 542 overlay_hold_rele(odd); 543 return (0); 544 } 545 546 /* ARGSUSED */ 547 static int 548 overlay_target_restore(overlay_target_hdl_t *thdl, void *arg) 549 { 550 overlay_dev_t *odd; 551 overlay_targ_id_t *otid = arg; 552 553 odd = overlay_hold_by_dlid(otid->otid_linkid); 554 if (odd == NULL) 555 return (ENOENT); 556 557 overlay_fm_restore(odd); 558 overlay_hold_rele(odd); 559 return (0); 560 } 561 562 /* ARGSUSED */ 563 static int 564 overlay_target_disassociate(overlay_target_hdl_t *thdl, void *arg) 565 { 566 overlay_dev_t *odd; 567 overlay_targ_id_t *otid = arg; 568 569 odd = overlay_hold_by_dlid(otid->otid_linkid); 570 if (odd == NULL) 571 return (ENOENT); 572 573 mutex_enter(&odd->odd_lock); 574 odd->odd_flags &= ~OVERLAY_F_VARPD; 575 mutex_exit(&odd->odd_lock); 576 577 overlay_hold_rele(odd); 578 return (0); 579 580 } 581 582 static int 583 overlay_target_lookup_request(overlay_target_hdl_t *thdl, void *arg) 584 { 585 overlay_targ_lookup_t *otl = arg; 586 overlay_target_entry_t *entry; 587 clock_t ret, timeout; 588 mac_header_info_t mhi; 589 590 timeout = ddi_get_lbolt() + drv_usectohz(MICROSEC); 591 again: 592 mutex_enter(&overlay_target_lock); 593 while (list_is_empty(&overlay_target_list)) { 594 ret = cv_timedwait(&overlay_target_condvar, 595 &overlay_target_lock, timeout); 596 if (ret == -1) { 597 mutex_exit(&overlay_target_lock); 598 return (ETIME); 599 } 600 } 601 entry = list_remove_head(&overlay_target_list); 602 mutex_exit(&overlay_target_lock); 603 mutex_enter(&entry->ote_lock); 604 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 605 ASSERT(entry->ote_chead == NULL); 606 mutex_exit(&entry->ote_lock); 607 goto again; 608 } 609 ASSERT(entry->ote_chead != NULL); 610 611 /* 612 * If we have a bogon that doesn't have a valid mac header, drop it and 613 * try again. 614 */ 615 if (mac_vlan_header_info(entry->ote_odd->odd_mh, entry->ote_chead, 616 &mhi) != 0) { 617 boolean_t queue = B_FALSE; 618 mblk_t *mp = entry->ote_chead; 619 entry->ote_chead = mp->b_next; 620 mp->b_next = NULL; 621 if (entry->ote_ctail == mp) 622 entry->ote_ctail = entry->ote_chead; 623 entry->ote_mbsize -= msgsize(mp); 624 if (entry->ote_chead != NULL) 625 queue = B_TRUE; 626 mutex_exit(&entry->ote_lock); 627 if (queue == B_TRUE) 628 overlay_target_queue(entry); 629 freemsg(mp); 630 goto again; 631 } 632 633 otl->otl_dlid = entry->ote_odd->odd_linkid; 634 otl->otl_reqid = (uintptr_t)entry; 635 otl->otl_varpdid = entry->ote_ott->ott_id; 636 otl->otl_vnetid = entry->ote_odd->odd_vid; 637 638 otl->otl_hdrsize = mhi.mhi_hdrsize; 639 otl->otl_pktsize = msgsize(entry->ote_chead) - otl->otl_hdrsize; 640 bcopy(mhi.mhi_daddr, otl->otl_dstaddr, ETHERADDRL); 641 bcopy(mhi.mhi_saddr, otl->otl_srcaddr, ETHERADDRL); 642 otl->otl_dsttype = mhi.mhi_dsttype; 643 otl->otl_sap = mhi.mhi_bindsap; 644 otl->otl_vlan = VLAN_ID(mhi.mhi_tci); 645 mutex_exit(&entry->ote_lock); 646 647 mutex_enter(&thdl->oth_lock); 648 list_insert_tail(&thdl->oth_outstanding, entry); 649 mutex_exit(&thdl->oth_lock); 650 651 return (0); 652 } 653 654 static int 655 overlay_target_lookup_respond(overlay_target_hdl_t *thdl, void *arg) 656 { 657 const overlay_targ_resp_t *otr = arg; 658 overlay_target_entry_t *entry; 659 mblk_t *mp; 660 661 mutex_enter(&thdl->oth_lock); 662 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 663 entry = list_next(&thdl->oth_outstanding, entry)) { 664 if ((uintptr_t)entry == otr->otr_reqid) 665 break; 666 } 667 668 if (entry == NULL) { 669 mutex_exit(&thdl->oth_lock); 670 return (EINVAL); 671 } 672 list_remove(&thdl->oth_outstanding, entry); 673 mutex_exit(&thdl->oth_lock); 674 675 mutex_enter(&entry->ote_lock); 676 bcopy(&otr->otr_answer, &entry->ote_dest, 677 sizeof (overlay_target_point_t)); 678 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 679 entry->ote_flags |= OVERLAY_ENTRY_F_VALID; 680 mp = entry->ote_chead; 681 entry->ote_chead = NULL; 682 entry->ote_ctail = NULL; 683 entry->ote_mbsize = 0; 684 entry->ote_vtime = gethrtime(); 685 mutex_exit(&entry->ote_lock); 686 687 /* 688 * For now do an in-situ drain. 689 */ 690 mp = overlay_m_tx(entry->ote_odd, mp); 691 freemsgchain(mp); 692 693 mutex_enter(&entry->ote_ott->ott_lock); 694 entry->ote_ott->ott_ocount--; 695 cv_signal(&entry->ote_ott->ott_cond); 696 mutex_exit(&entry->ote_ott->ott_lock); 697 698 return (0); 699 } 700 701 static int 702 overlay_target_lookup_drop(overlay_target_hdl_t *thdl, void *arg) 703 { 704 const overlay_targ_resp_t *otr = arg; 705 overlay_target_entry_t *entry; 706 mblk_t *mp; 707 boolean_t queue = B_FALSE; 708 709 mutex_enter(&thdl->oth_lock); 710 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 711 entry = list_next(&thdl->oth_outstanding, entry)) { 712 if ((uintptr_t)entry == otr->otr_reqid) 713 break; 714 } 715 716 if (entry == NULL) { 717 mutex_exit(&thdl->oth_lock); 718 return (EINVAL); 719 } 720 list_remove(&thdl->oth_outstanding, entry); 721 mutex_exit(&thdl->oth_lock); 722 723 mutex_enter(&entry->ote_lock); 724 725 /* Safeguard against a confused varpd */ 726 if (entry->ote_flags & OVERLAY_ENTRY_F_VALID) { 727 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 728 DTRACE_PROBE1(overlay__target__valid__drop, 729 overlay_target_entry_t *, entry); 730 mutex_exit(&entry->ote_lock); 731 goto done; 732 } 733 734 mp = entry->ote_chead; 735 if (mp != NULL) { 736 entry->ote_chead = mp->b_next; 737 mp->b_next = NULL; 738 if (entry->ote_ctail == mp) 739 entry->ote_ctail = entry->ote_chead; 740 entry->ote_mbsize -= msgsize(mp); 741 } 742 if (entry->ote_chead != NULL) { 743 queue = B_TRUE; 744 entry->ote_flags |= OVERLAY_ENTRY_F_PENDING; 745 } else { 746 entry->ote_flags &= ~OVERLAY_ENTRY_F_PENDING; 747 } 748 mutex_exit(&entry->ote_lock); 749 750 if (queue == B_TRUE) 751 overlay_target_queue(entry); 752 freemsg(mp); 753 754 done: 755 mutex_enter(&entry->ote_ott->ott_lock); 756 entry->ote_ott->ott_ocount--; 757 cv_signal(&entry->ote_ott->ott_cond); 758 mutex_exit(&entry->ote_ott->ott_lock); 759 760 return (0); 761 } 762 763 /* ARGSUSED */ 764 static int 765 overlay_target_pkt_copyin(const void *ubuf, void **outp, size_t *bsize, 766 int flags) 767 { 768 overlay_targ_pkt_t *pkt; 769 overlay_targ_pkt32_t *pkt32; 770 771 pkt = kmem_alloc(sizeof (overlay_targ_pkt_t), KM_SLEEP); 772 *outp = pkt; 773 *bsize = sizeof (overlay_targ_pkt_t); 774 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { 775 uintptr_t addr; 776 777 if (ddi_copyin(ubuf, pkt, sizeof (overlay_targ_pkt32_t), 778 flags & FKIOCTL) != 0) { 779 kmem_free(pkt, *bsize); 780 return (EFAULT); 781 } 782 pkt32 = (overlay_targ_pkt32_t *)pkt; 783 addr = pkt32->otp_buf; 784 pkt->otp_buf = (void *)addr; 785 } else { 786 if (ddi_copyin(ubuf, pkt, *bsize, flags & FKIOCTL) != 0) { 787 kmem_free(pkt, *bsize); 788 return (EFAULT); 789 } 790 } 791 return (0); 792 } 793 794 static int 795 overlay_target_pkt_copyout(void *ubuf, void *buf, size_t bufsize, 796 int flags) 797 { 798 if (ddi_model_convert_from(flags & FMODELS) == DDI_MODEL_ILP32) { 799 overlay_targ_pkt_t *pkt = buf; 800 overlay_targ_pkt32_t *pkt32 = buf; 801 uintptr_t addr = (uintptr_t)pkt->otp_buf; 802 pkt32->otp_buf = (caddr32_t)addr; 803 if (ddi_copyout(buf, ubuf, sizeof (overlay_targ_pkt32_t), 804 flags & FKIOCTL) != 0) 805 return (EFAULT); 806 } else { 807 if (ddi_copyout(buf, ubuf, bufsize, flags & FKIOCTL) != 0) 808 return (EFAULT); 809 } 810 return (0); 811 } 812 813 static int 814 overlay_target_packet(overlay_target_hdl_t *thdl, void *arg) 815 { 816 overlay_targ_pkt_t *pkt = arg; 817 overlay_target_entry_t *entry; 818 mblk_t *mp; 819 size_t mlen; 820 size_t boff; 821 822 mutex_enter(&thdl->oth_lock); 823 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 824 entry = list_next(&thdl->oth_outstanding, entry)) { 825 if ((uintptr_t)entry == pkt->otp_reqid) 826 break; 827 } 828 829 if (entry == NULL) { 830 mutex_exit(&thdl->oth_lock); 831 return (EINVAL); 832 } 833 mutex_enter(&entry->ote_lock); 834 mutex_exit(&thdl->oth_lock); 835 mp = entry->ote_chead; 836 /* Protect against a rogue varpd */ 837 if (mp == NULL) { 838 mutex_exit(&entry->ote_lock); 839 return (EINVAL); 840 } 841 mlen = MIN(msgsize(mp), pkt->otp_size); 842 pkt->otp_size = mlen; 843 boff = 0; 844 while (mlen > 0) { 845 size_t wlen = MIN(MBLKL(mp), mlen); 846 if (ddi_copyout(mp->b_rptr, 847 (void *)((uintptr_t)pkt->otp_buf + boff), 848 wlen, 0) != 0) { 849 mutex_exit(&entry->ote_lock); 850 return (EFAULT); 851 } 852 mlen -= wlen; 853 boff += wlen; 854 mp = mp->b_cont; 855 } 856 mutex_exit(&entry->ote_lock); 857 return (0); 858 } 859 860 static int 861 overlay_target_inject(overlay_target_hdl_t *thdl, void *arg) 862 { 863 overlay_targ_pkt_t *pkt = arg; 864 overlay_target_entry_t *entry; 865 overlay_dev_t *odd; 866 mblk_t *mp; 867 868 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) 869 return (EINVAL); 870 871 mp = allocb(pkt->otp_size, 0); 872 if (mp == NULL) 873 return (ENOMEM); 874 875 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { 876 freeb(mp); 877 return (EFAULT); 878 } 879 mp->b_wptr += pkt->otp_size; 880 881 if (pkt->otp_linkid != UINT64_MAX) { 882 odd = overlay_hold_by_dlid(pkt->otp_linkid); 883 if (odd == NULL) { 884 freeb(mp); 885 return (ENOENT); 886 } 887 } else { 888 mutex_enter(&thdl->oth_lock); 889 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 890 entry = list_next(&thdl->oth_outstanding, entry)) { 891 if ((uintptr_t)entry == pkt->otp_reqid) 892 break; 893 } 894 895 if (entry == NULL) { 896 mutex_exit(&thdl->oth_lock); 897 freeb(mp); 898 return (ENOENT); 899 } 900 odd = entry->ote_odd; 901 mutex_exit(&thdl->oth_lock); 902 } 903 904 mutex_enter(&odd->odd_lock); 905 overlay_io_start(odd, OVERLAY_F_IN_RX); 906 mutex_exit(&odd->odd_lock); 907 908 mac_rx(odd->odd_mh, NULL, mp); 909 910 mutex_enter(&odd->odd_lock); 911 overlay_io_done(odd, OVERLAY_F_IN_RX); 912 mutex_exit(&odd->odd_lock); 913 914 return (0); 915 } 916 917 static int 918 overlay_target_resend(overlay_target_hdl_t *thdl, void *arg) 919 { 920 overlay_targ_pkt_t *pkt = arg; 921 overlay_target_entry_t *entry; 922 overlay_dev_t *odd; 923 mblk_t *mp; 924 925 if (pkt->otp_size > ETHERMAX + VLAN_TAGSZ) 926 return (EINVAL); 927 928 mp = allocb(pkt->otp_size, 0); 929 if (mp == NULL) 930 return (ENOMEM); 931 932 if (ddi_copyin(pkt->otp_buf, mp->b_rptr, pkt->otp_size, 0) != 0) { 933 freeb(mp); 934 return (EFAULT); 935 } 936 mp->b_wptr += pkt->otp_size; 937 938 if (pkt->otp_linkid != UINT64_MAX) { 939 odd = overlay_hold_by_dlid(pkt->otp_linkid); 940 if (odd == NULL) { 941 freeb(mp); 942 return (ENOENT); 943 } 944 } else { 945 mutex_enter(&thdl->oth_lock); 946 for (entry = list_head(&thdl->oth_outstanding); entry != NULL; 947 entry = list_next(&thdl->oth_outstanding, entry)) { 948 if ((uintptr_t)entry == pkt->otp_reqid) 949 break; 950 } 951 952 if (entry == NULL) { 953 mutex_exit(&thdl->oth_lock); 954 freeb(mp); 955 return (ENOENT); 956 } 957 odd = entry->ote_odd; 958 mutex_exit(&thdl->oth_lock); 959 } 960 961 mp = overlay_m_tx(odd, mp); 962 freemsgchain(mp); 963 964 return (0); 965 } 966 967 typedef struct overlay_targ_list_int { 968 boolean_t otli_count; 969 uint32_t otli_cur; 970 uint32_t otli_nents; 971 uint32_t otli_ents[]; 972 } overlay_targ_list_int_t; 973 974 static int 975 overlay_target_list_copyin(const void *ubuf, void **outp, size_t *bsize, 976 int flags) 977 { 978 overlay_targ_list_t n; 979 overlay_targ_list_int_t *otl; 980 981 if (ddi_copyin(ubuf, &n, sizeof (overlay_targ_list_t), 982 flags & FKIOCTL) != 0) 983 return (EFAULT); 984 985 /* 986 */ 987 if (n.otl_nents >= INT32_MAX / sizeof (uint32_t)) 988 return (EINVAL); 989 *bsize = sizeof (overlay_targ_list_int_t) + 990 sizeof (uint32_t) * n.otl_nents; 991 otl = kmem_zalloc(*bsize, KM_SLEEP); 992 otl->otli_cur = 0; 993 otl->otli_nents = n.otl_nents; 994 if (otl->otli_nents != 0) { 995 otl->otli_count = B_FALSE; 996 if (ddi_copyin((void *)((uintptr_t)ubuf + 997 offsetof(overlay_targ_list_t, otl_ents)), 998 otl->otli_ents, n.otl_nents * sizeof (uint32_t), 999 flags & FKIOCTL) != 0) { 1000 kmem_free(otl, *bsize); 1001 return (EFAULT); 1002 } 1003 } else { 1004 otl->otli_count = B_TRUE; 1005 } 1006 1007 *outp = otl; 1008 return (0); 1009 } 1010 1011 static int 1012 overlay_target_ioctl_list_cb(overlay_dev_t *odd, void *arg) 1013 { 1014 overlay_targ_list_int_t *otl = arg; 1015 1016 if (otl->otli_cur < otl->otli_nents) 1017 otl->otli_ents[otl->otli_cur] = odd->odd_linkid; 1018 otl->otli_cur++; 1019 return (0); 1020 } 1021 1022 /* ARGSUSED */ 1023 static int 1024 overlay_target_ioctl_list(overlay_target_hdl_t *thdl, void *arg) 1025 { 1026 overlay_dev_iter(overlay_target_ioctl_list_cb, arg); 1027 return (0); 1028 } 1029 1030 /* ARGSUSED */ 1031 static int 1032 overlay_target_list_copyout(void *ubuf, void *buf, size_t bufsize, int flags) 1033 { 1034 overlay_targ_list_int_t *otl = buf; 1035 1036 if (ddi_copyout(&otl->otli_cur, ubuf, sizeof (uint32_t), 1037 flags & FKIOCTL) != 0) 1038 return (EFAULT); 1039 1040 if (otl->otli_count == B_FALSE) { 1041 if (ddi_copyout(otl->otli_ents, 1042 (void *)((uintptr_t)ubuf + 1043 offsetof(overlay_targ_list_t, otl_ents)), 1044 sizeof (uint32_t) * otl->otli_nents, 1045 flags & FKIOCTL) != 0) 1046 return (EFAULT); 1047 } 1048 return (0); 1049 } 1050 1051 /* ARGSUSED */ 1052 static int 1053 overlay_target_cache_get(overlay_target_hdl_t *thdl, void *arg) 1054 { 1055 int ret = 0; 1056 overlay_dev_t *odd; 1057 overlay_target_t *ott; 1058 overlay_targ_cache_t *otc = arg; 1059 1060 odd = overlay_hold_by_dlid(otc->otc_linkid); 1061 if (odd == NULL) 1062 return (ENOENT); 1063 1064 mutex_enter(&odd->odd_lock); 1065 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1066 mutex_exit(&odd->odd_lock); 1067 overlay_hold_rele(odd); 1068 return (ENXIO); 1069 } 1070 ott = odd->odd_target; 1071 if (ott->ott_mode != OVERLAY_TARGET_POINT && 1072 ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1073 mutex_exit(&odd->odd_lock); 1074 overlay_hold_rele(odd); 1075 return (ENOTSUP); 1076 } 1077 mutex_enter(&ott->ott_lock); 1078 mutex_exit(&odd->odd_lock); 1079 1080 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 1081 otc->otc_entry.otce_flags = 0; 1082 bcopy(&ott->ott_u.ott_point, &otc->otc_entry.otce_dest, 1083 sizeof (overlay_target_point_t)); 1084 } else { 1085 overlay_target_entry_t *ote; 1086 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1087 otc->otc_entry.otce_mac); 1088 if (ote != NULL) { 1089 mutex_enter(&ote->ote_lock); 1090 if ((ote->ote_flags & 1091 OVERLAY_ENTRY_F_VALID_MASK) != 0) { 1092 if (ote->ote_flags & OVERLAY_ENTRY_F_DROP) { 1093 otc->otc_entry.otce_flags = 1094 OVERLAY_TARGET_CACHE_DROP; 1095 } else { 1096 otc->otc_entry.otce_flags = 0; 1097 bcopy(&ote->ote_dest, 1098 &otc->otc_entry.otce_dest, 1099 sizeof (overlay_target_point_t)); 1100 } 1101 ret = 0; 1102 } else { 1103 ret = ENOENT; 1104 } 1105 mutex_exit(&ote->ote_lock); 1106 } else { 1107 ret = ENOENT; 1108 } 1109 } 1110 1111 mutex_exit(&ott->ott_lock); 1112 overlay_hold_rele(odd); 1113 1114 return (ret); 1115 } 1116 1117 /* ARGSUSED */ 1118 static int 1119 overlay_target_cache_set(overlay_target_hdl_t *thdl, void *arg) 1120 { 1121 overlay_dev_t *odd; 1122 overlay_target_t *ott; 1123 overlay_target_entry_t *ote; 1124 overlay_targ_cache_t *otc = arg; 1125 mblk_t *mp = NULL; 1126 1127 if (otc->otc_entry.otce_flags & ~OVERLAY_TARGET_CACHE_DROP) 1128 return (EINVAL); 1129 1130 odd = overlay_hold_by_dlid(otc->otc_linkid); 1131 if (odd == NULL) 1132 return (ENOENT); 1133 1134 mutex_enter(&odd->odd_lock); 1135 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1136 mutex_exit(&odd->odd_lock); 1137 overlay_hold_rele(odd); 1138 return (ENXIO); 1139 } 1140 ott = odd->odd_target; 1141 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1142 mutex_exit(&odd->odd_lock); 1143 overlay_hold_rele(odd); 1144 return (ENOTSUP); 1145 } 1146 mutex_enter(&ott->ott_lock); 1147 mutex_exit(&odd->odd_lock); 1148 1149 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1150 otc->otc_entry.otce_mac); 1151 if (ote == NULL) { 1152 ote = kmem_cache_alloc(overlay_entry_cache, KM_SLEEP); 1153 bcopy(otc->otc_entry.otce_mac, ote->ote_addr, ETHERADDRL); 1154 ote->ote_chead = ote->ote_ctail = NULL; 1155 ote->ote_mbsize = 0; 1156 ote->ote_ott = ott; 1157 ote->ote_odd = odd; 1158 mutex_enter(&ote->ote_lock); 1159 refhash_insert(ott->ott_u.ott_dyn.ott_dhash, ote); 1160 avl_add(&ott->ott_u.ott_dyn.ott_tree, ote); 1161 } else { 1162 mutex_enter(&ote->ote_lock); 1163 } 1164 1165 if (otc->otc_entry.otce_flags & OVERLAY_TARGET_CACHE_DROP) { 1166 ote->ote_flags |= OVERLAY_ENTRY_F_DROP; 1167 } else { 1168 ote->ote_flags |= OVERLAY_ENTRY_F_VALID; 1169 bcopy(&otc->otc_entry.otce_dest, &ote->ote_dest, 1170 sizeof (overlay_target_point_t)); 1171 mp = ote->ote_chead; 1172 ote->ote_chead = NULL; 1173 ote->ote_ctail = NULL; 1174 ote->ote_mbsize = 0; 1175 ote->ote_vtime = gethrtime(); 1176 } 1177 1178 mutex_exit(&ote->ote_lock); 1179 mutex_exit(&ott->ott_lock); 1180 1181 if (mp != NULL) { 1182 mp = overlay_m_tx(ote->ote_odd, mp); 1183 freemsgchain(mp); 1184 } 1185 1186 overlay_hold_rele(odd); 1187 1188 return (0); 1189 } 1190 1191 /* ARGSUSED */ 1192 static int 1193 overlay_target_cache_remove(overlay_target_hdl_t *thdl, void *arg) 1194 { 1195 int ret = 0; 1196 overlay_dev_t *odd; 1197 overlay_target_t *ott; 1198 overlay_target_entry_t *ote; 1199 overlay_targ_cache_t *otc = arg; 1200 1201 odd = overlay_hold_by_dlid(otc->otc_linkid); 1202 if (odd == NULL) 1203 return (ENOENT); 1204 1205 mutex_enter(&odd->odd_lock); 1206 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1207 mutex_exit(&odd->odd_lock); 1208 overlay_hold_rele(odd); 1209 return (ENXIO); 1210 } 1211 ott = odd->odd_target; 1212 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1213 mutex_exit(&odd->odd_lock); 1214 overlay_hold_rele(odd); 1215 return (ENOTSUP); 1216 } 1217 mutex_enter(&ott->ott_lock); 1218 mutex_exit(&odd->odd_lock); 1219 1220 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1221 otc->otc_entry.otce_mac); 1222 if (ote != NULL) { 1223 mutex_enter(&ote->ote_lock); 1224 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; 1225 mutex_exit(&ote->ote_lock); 1226 ret = 0; 1227 } else { 1228 ret = ENOENT; 1229 } 1230 1231 mutex_exit(&ott->ott_lock); 1232 overlay_hold_rele(odd); 1233 1234 return (ret); 1235 } 1236 1237 /* ARGSUSED */ 1238 static int 1239 overlay_target_cache_flush(overlay_target_hdl_t *thdl, void *arg) 1240 { 1241 avl_tree_t *avl; 1242 overlay_dev_t *odd; 1243 overlay_target_t *ott; 1244 overlay_target_entry_t *ote; 1245 overlay_targ_cache_t *otc = arg; 1246 1247 odd = overlay_hold_by_dlid(otc->otc_linkid); 1248 if (odd == NULL) 1249 return (ENOENT); 1250 1251 mutex_enter(&odd->odd_lock); 1252 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1253 mutex_exit(&odd->odd_lock); 1254 overlay_hold_rele(odd); 1255 return (ENXIO); 1256 } 1257 ott = odd->odd_target; 1258 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC) { 1259 mutex_exit(&odd->odd_lock); 1260 overlay_hold_rele(odd); 1261 return (ENOTSUP); 1262 } 1263 mutex_enter(&ott->ott_lock); 1264 mutex_exit(&odd->odd_lock); 1265 avl = &ott->ott_u.ott_dyn.ott_tree; 1266 1267 for (ote = avl_first(avl); ote != NULL; ote = AVL_NEXT(avl, ote)) { 1268 mutex_enter(&ote->ote_lock); 1269 ote->ote_flags &= ~OVERLAY_ENTRY_F_VALID_MASK; 1270 mutex_exit(&ote->ote_lock); 1271 } 1272 ote = refhash_lookup(ott->ott_u.ott_dyn.ott_dhash, 1273 otc->otc_entry.otce_mac); 1274 1275 mutex_exit(&ott->ott_lock); 1276 overlay_hold_rele(odd); 1277 1278 return (0); 1279 } 1280 1281 static int 1282 overlay_target_cache_iter_copyin(const void *ubuf, void **outp, size_t *bsize, 1283 int flags) 1284 { 1285 overlay_targ_cache_iter_t base, *iter; 1286 1287 if (ddi_copyin(ubuf, &base, sizeof (overlay_targ_cache_iter_t), 1288 flags & FKIOCTL) != 0) 1289 return (EFAULT); 1290 1291 if (base.otci_count > OVERLAY_TARGET_ITER_MAX) 1292 return (E2BIG); 1293 1294 if (base.otci_count == 0) 1295 return (EINVAL); 1296 1297 *bsize = sizeof (overlay_targ_cache_iter_t) + 1298 base.otci_count * sizeof (overlay_targ_cache_entry_t); 1299 iter = kmem_alloc(*bsize, KM_SLEEP); 1300 bcopy(&base, iter, sizeof (overlay_targ_cache_iter_t)); 1301 *outp = iter; 1302 1303 return (0); 1304 } 1305 1306 typedef struct overlay_targ_cache_marker { 1307 uint8_t otcm_mac[ETHERADDRL]; 1308 uint16_t otcm_done; 1309 } overlay_targ_cache_marker_t; 1310 1311 /* ARGSUSED */ 1312 static int 1313 overlay_target_cache_iter(overlay_target_hdl_t *thdl, void *arg) 1314 { 1315 overlay_dev_t *odd; 1316 overlay_target_t *ott; 1317 overlay_target_entry_t lookup, *ent; 1318 overlay_targ_cache_marker_t *mark; 1319 avl_index_t where; 1320 avl_tree_t *avl; 1321 uint16_t written = 0; 1322 1323 overlay_targ_cache_iter_t *iter = arg; 1324 mark = (void *)&iter->otci_marker; 1325 1326 if (mark->otcm_done != 0) { 1327 iter->otci_count = 0; 1328 return (0); 1329 } 1330 1331 odd = overlay_hold_by_dlid(iter->otci_linkid); 1332 if (odd == NULL) 1333 return (ENOENT); 1334 1335 mutex_enter(&odd->odd_lock); 1336 if (!(odd->odd_flags & OVERLAY_F_VARPD)) { 1337 mutex_exit(&odd->odd_lock); 1338 overlay_hold_rele(odd); 1339 return (ENXIO); 1340 } 1341 ott = odd->odd_target; 1342 if (ott->ott_mode != OVERLAY_TARGET_DYNAMIC && 1343 ott->ott_mode != OVERLAY_TARGET_POINT) { 1344 mutex_exit(&odd->odd_lock); 1345 overlay_hold_rele(odd); 1346 return (ENOTSUP); 1347 } 1348 1349 /* 1350 * Holding this lock across the entire iteration probably isn't very 1351 * good. We should perhaps add an r/w lock for the avl tree. But we'll 1352 * wait until we now it's necessary before we do more. 1353 */ 1354 mutex_enter(&ott->ott_lock); 1355 mutex_exit(&odd->odd_lock); 1356 1357 if (ott->ott_mode == OVERLAY_TARGET_POINT) { 1358 overlay_targ_cache_entry_t *out = &iter->otci_ents[0]; 1359 bzero(out->otce_mac, ETHERADDRL); 1360 out->otce_flags = 0; 1361 bcopy(&ott->ott_u.ott_point, &out->otce_dest, 1362 sizeof (overlay_target_point_t)); 1363 written++; 1364 mark->otcm_done = 1; 1365 } 1366 1367 avl = &ott->ott_u.ott_dyn.ott_tree; 1368 bcopy(mark->otcm_mac, lookup.ote_addr, ETHERADDRL); 1369 ent = avl_find(avl, &lookup, &where); 1370 1371 /* 1372 * NULL ent means that the entry does not exist, so we want to start 1373 * with the closest node in the tree. This means that we implicitly rely 1374 * on the tree's order and the first node will be the mac 00:00:00:00:00 1375 * and the last will be ff:ff:ff:ff:ff:ff. 1376 */ 1377 if (ent == NULL) { 1378 ent = avl_nearest(avl, where, AVL_AFTER); 1379 if (ent == NULL) { 1380 mark->otcm_done = 1; 1381 goto done; 1382 } 1383 } 1384 1385 for (; ent != NULL && written < iter->otci_count; 1386 ent = AVL_NEXT(avl, ent)) { 1387 overlay_targ_cache_entry_t *out = &iter->otci_ents[written]; 1388 mutex_enter(&ent->ote_lock); 1389 if ((ent->ote_flags & OVERLAY_ENTRY_F_VALID_MASK) == 0) { 1390 mutex_exit(&ent->ote_lock); 1391 continue; 1392 } 1393 bcopy(ent->ote_addr, out->otce_mac, ETHERADDRL); 1394 out->otce_flags = 0; 1395 if (ent->ote_flags & OVERLAY_ENTRY_F_DROP) 1396 out->otce_flags |= OVERLAY_TARGET_CACHE_DROP; 1397 if (ent->ote_flags & OVERLAY_ENTRY_F_VALID) 1398 bcopy(&ent->ote_dest, &out->otce_dest, 1399 sizeof (overlay_target_point_t)); 1400 written++; 1401 mutex_exit(&ent->ote_lock); 1402 } 1403 1404 if (ent != NULL) { 1405 bcopy(ent->ote_addr, mark->otcm_mac, ETHERADDRL); 1406 } else { 1407 mark->otcm_done = 1; 1408 } 1409 1410 done: 1411 iter->otci_count = written; 1412 mutex_exit(&ott->ott_lock); 1413 overlay_hold_rele(odd); 1414 1415 return (0); 1416 } 1417 1418 /* ARGSUSED */ 1419 static int 1420 overlay_target_cache_iter_copyout(void *ubuf, void *buf, size_t bufsize, 1421 int flags) 1422 { 1423 size_t outsize; 1424 const overlay_targ_cache_iter_t *iter = buf; 1425 1426 outsize = sizeof (overlay_targ_cache_iter_t) + 1427 iter->otci_count * sizeof (overlay_targ_cache_entry_t); 1428 1429 if (ddi_copyout(buf, ubuf, outsize, flags & FKIOCTL) != 0) 1430 return (EFAULT); 1431 1432 return (0); 1433 } 1434 1435 static overlay_target_ioctl_t overlay_target_ioctab[] = { 1436 { OVERLAY_TARG_INFO, B_TRUE, B_TRUE, 1437 NULL, overlay_target_info, 1438 NULL, sizeof (overlay_targ_info_t) }, 1439 { OVERLAY_TARG_ASSOCIATE, B_TRUE, B_FALSE, 1440 NULL, overlay_target_associate, 1441 NULL, sizeof (overlay_targ_associate_t) }, 1442 { OVERLAY_TARG_DISASSOCIATE, B_TRUE, B_FALSE, 1443 NULL, overlay_target_disassociate, 1444 NULL, sizeof (overlay_targ_id_t) }, 1445 { OVERLAY_TARG_DEGRADE, B_TRUE, B_FALSE, 1446 NULL, overlay_target_degrade, 1447 NULL, sizeof (overlay_targ_degrade_t) }, 1448 { OVERLAY_TARG_RESTORE, B_TRUE, B_FALSE, 1449 NULL, overlay_target_restore, 1450 NULL, sizeof (overlay_targ_id_t) }, 1451 { OVERLAY_TARG_LOOKUP, B_FALSE, B_TRUE, 1452 NULL, overlay_target_lookup_request, 1453 NULL, sizeof (overlay_targ_lookup_t) }, 1454 { OVERLAY_TARG_RESPOND, B_TRUE, B_FALSE, 1455 NULL, overlay_target_lookup_respond, 1456 NULL, sizeof (overlay_targ_resp_t) }, 1457 { OVERLAY_TARG_DROP, B_TRUE, B_FALSE, 1458 NULL, overlay_target_lookup_drop, 1459 NULL, sizeof (overlay_targ_resp_t) }, 1460 { OVERLAY_TARG_PKT, B_TRUE, B_TRUE, 1461 overlay_target_pkt_copyin, 1462 overlay_target_packet, 1463 overlay_target_pkt_copyout, 1464 sizeof (overlay_targ_pkt_t) }, 1465 { OVERLAY_TARG_INJECT, B_TRUE, B_FALSE, 1466 overlay_target_pkt_copyin, 1467 overlay_target_inject, 1468 NULL, sizeof (overlay_targ_pkt_t) }, 1469 { OVERLAY_TARG_RESEND, B_TRUE, B_FALSE, 1470 overlay_target_pkt_copyin, 1471 overlay_target_resend, 1472 NULL, sizeof (overlay_targ_pkt_t) }, 1473 { OVERLAY_TARG_LIST, B_FALSE, B_TRUE, 1474 overlay_target_list_copyin, 1475 overlay_target_ioctl_list, 1476 overlay_target_list_copyout, 1477 sizeof (overlay_targ_list_t) }, 1478 { OVERLAY_TARG_CACHE_GET, B_FALSE, B_TRUE, 1479 NULL, overlay_target_cache_get, 1480 NULL, sizeof (overlay_targ_cache_t) }, 1481 { OVERLAY_TARG_CACHE_SET, B_TRUE, B_TRUE, 1482 NULL, overlay_target_cache_set, 1483 NULL, sizeof (overlay_targ_cache_t) }, 1484 { OVERLAY_TARG_CACHE_REMOVE, B_TRUE, B_TRUE, 1485 NULL, overlay_target_cache_remove, 1486 NULL, sizeof (overlay_targ_cache_t) }, 1487 { OVERLAY_TARG_CACHE_FLUSH, B_TRUE, B_TRUE, 1488 NULL, overlay_target_cache_flush, 1489 NULL, sizeof (overlay_targ_cache_t) }, 1490 { OVERLAY_TARG_CACHE_ITER, B_FALSE, B_TRUE, 1491 overlay_target_cache_iter_copyin, 1492 overlay_target_cache_iter, 1493 overlay_target_cache_iter_copyout, 1494 sizeof (overlay_targ_cache_iter_t) }, 1495 { 0 } 1496 }; 1497 1498 int 1499 overlay_target_open(dev_t *devp, int flags, int otype, cred_t *credp) 1500 { 1501 minor_t mid; 1502 overlay_target_hdl_t *thdl; 1503 1504 if (secpolicy_dl_config(credp) != 0) 1505 return (EPERM); 1506 1507 if (getminor(*devp) != 0) 1508 return (ENXIO); 1509 1510 if (otype & OTYP_BLK) 1511 return (EINVAL); 1512 1513 if (flags & ~(FREAD | FWRITE | FEXCL)) 1514 return (EINVAL); 1515 1516 if ((flags & FWRITE) && 1517 !(flags & FEXCL)) 1518 return (EINVAL); 1519 1520 if (!(flags & FREAD) && !(flags & FWRITE)) 1521 return (EINVAL); 1522 1523 if (crgetzoneid(credp) != GLOBAL_ZONEID) 1524 return (EPERM); 1525 1526 mid = id_alloc(overlay_thdl_idspace); 1527 if (ddi_soft_state_zalloc(overlay_thdl_state, mid) != 0) { 1528 id_free(overlay_thdl_idspace, mid); 1529 return (ENXIO); 1530 } 1531 1532 thdl = ddi_get_soft_state(overlay_thdl_state, mid); 1533 VERIFY(thdl != NULL); 1534 thdl->oth_minor = mid; 1535 thdl->oth_zoneid = crgetzoneid(credp); 1536 thdl->oth_oflags = flags; 1537 mutex_init(&thdl->oth_lock, NULL, MUTEX_DRIVER, NULL); 1538 list_create(&thdl->oth_outstanding, sizeof (overlay_target_entry_t), 1539 offsetof(overlay_target_entry_t, ote_qlink)); 1540 *devp = makedevice(getmajor(*devp), mid); 1541 1542 mutex_enter(&overlay_target_lock); 1543 if ((flags & FEXCL) && overlay_target_excl == B_TRUE) { 1544 mutex_exit(&overlay_target_lock); 1545 list_destroy(&thdl->oth_outstanding); 1546 mutex_destroy(&thdl->oth_lock); 1547 ddi_soft_state_free(overlay_thdl_state, mid); 1548 id_free(overlay_thdl_idspace, mid); 1549 return (EEXIST); 1550 } else if ((flags & FEXCL) != 0) { 1551 VERIFY(overlay_target_excl == B_FALSE); 1552 overlay_target_excl = B_TRUE; 1553 } 1554 list_insert_tail(&overlay_thdl_list, thdl); 1555 mutex_exit(&overlay_target_lock); 1556 1557 return (0); 1558 } 1559 1560 /* ARGSUSED */ 1561 int 1562 overlay_target_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp, 1563 int *rvalp) 1564 { 1565 overlay_target_ioctl_t *ioc; 1566 overlay_target_hdl_t *thdl; 1567 1568 if (secpolicy_dl_config(credp) != 0) 1569 return (EPERM); 1570 1571 if ((thdl = ddi_get_soft_state(overlay_thdl_state, 1572 getminor(dev))) == NULL) 1573 return (ENXIO); 1574 1575 for (ioc = &overlay_target_ioctab[0]; ioc->oti_cmd != 0; ioc++) { 1576 int ret; 1577 caddr_t buf; 1578 size_t bufsize; 1579 1580 if (ioc->oti_cmd != cmd) 1581 continue; 1582 1583 if (ioc->oti_write == B_TRUE && !(mode & FWRITE)) 1584 return (EBADF); 1585 1586 if (ioc->oti_copyin == NULL) { 1587 bufsize = ioc->oti_size; 1588 buf = kmem_alloc(bufsize, KM_SLEEP); 1589 if (ddi_copyin((void *)(uintptr_t)arg, buf, bufsize, 1590 mode & FKIOCTL) != 0) { 1591 kmem_free(buf, bufsize); 1592 return (EFAULT); 1593 } 1594 } else { 1595 if ((ret = ioc->oti_copyin((void *)(uintptr_t)arg, 1596 (void **)&buf, &bufsize, mode)) != 0) 1597 return (ret); 1598 } 1599 1600 ret = ioc->oti_func(thdl, buf); 1601 if (ret == 0 && ioc->oti_size != 0 && 1602 ioc->oti_ncopyout == B_TRUE) { 1603 if (ioc->oti_copyout == NULL) { 1604 if (ddi_copyout(buf, (void *)(uintptr_t)arg, 1605 bufsize, mode & FKIOCTL) != 0) 1606 ret = EFAULT; 1607 } else { 1608 ret = ioc->oti_copyout((void *)(uintptr_t)arg, 1609 buf, bufsize, mode); 1610 } 1611 } 1612 1613 kmem_free(buf, bufsize); 1614 return (ret); 1615 } 1616 1617 return (ENOTTY); 1618 } 1619 1620 /* ARGSUSED */ 1621 int 1622 overlay_target_close(dev_t dev, int flags, int otype, cred_t *credp) 1623 { 1624 overlay_target_hdl_t *thdl; 1625 overlay_target_entry_t *entry; 1626 minor_t mid = getminor(dev); 1627 1628 if ((thdl = ddi_get_soft_state(overlay_thdl_state, mid)) == NULL) 1629 return (ENXIO); 1630 1631 mutex_enter(&overlay_target_lock); 1632 list_remove(&overlay_thdl_list, thdl); 1633 mutex_enter(&thdl->oth_lock); 1634 while ((entry = list_remove_head(&thdl->oth_outstanding)) != NULL) 1635 list_insert_tail(&overlay_target_list, entry); 1636 cv_signal(&overlay_target_condvar); 1637 mutex_exit(&thdl->oth_lock); 1638 if ((thdl->oth_oflags & FEXCL) != 0) { 1639 VERIFY(overlay_target_excl == B_TRUE); 1640 overlay_target_excl = B_FALSE; 1641 } 1642 mutex_exit(&overlay_target_lock); 1643 1644 list_destroy(&thdl->oth_outstanding); 1645 mutex_destroy(&thdl->oth_lock); 1646 mid = thdl->oth_minor; 1647 ddi_soft_state_free(overlay_thdl_state, mid); 1648 id_free(overlay_thdl_idspace, mid); 1649 1650 return (0); 1651 } 1652