1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Multidata, as described in the following papers: 29 * 30 * Adi Masputra, 31 * Multidata V.2: VA-Disjoint Packet Extents Framework Interface 32 * Design Specification. August 2004. 33 * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf. 34 * 35 * Adi Masputra, 36 * Multidata Interface Design Specification. Sep 2002. 37 * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf. 38 * 39 * Adi Masputra, Frank DiMambro, Kacheong Poon, 40 * An Efficient Networking Transmit Mechanism for Solaris: 41 * Multidata Transmit (MDT). May 2002. 42 * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf. 43 */ 44 45 #include <sys/types.h> 46 #include <sys/stream.h> 47 #include <sys/dlpi.h> 48 #include <sys/stropts.h> 49 #include <sys/strsun.h> 50 #include <sys/strlog.h> 51 #include <sys/strsubr.h> 52 #include <sys/sysmacros.h> 53 #include <sys/cmn_err.h> 54 #include <sys/debug.h> 55 #include <sys/kmem.h> 56 #include <sys/atomic.h> 57 58 #include <sys/multidata.h> 59 #include <sys/multidata_impl.h> 60 61 static int mmd_constructor(void *, void *, int); 62 static void mmd_destructor(void *, void *); 63 static int pdslab_constructor(void *, void *, int); 64 static void pdslab_destructor(void *, void *); 65 static int pattbl_constructor(void *, void *, int); 66 static void pattbl_destructor(void *, void *); 67 static void mmd_esballoc_free(caddr_t); 68 static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int); 69 70 static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *); 71 #pragma inline(pbuf_ref_valid) 72 73 static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *); 74 #pragma inline(pdi_in_range) 75 76 static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int); 77 #pragma inline(mmd_addpdesc_int) 78 79 static void mmd_destroy_pattbl(patbkt_t **); 80 #pragma inline(mmd_destroy_pattbl) 81 82 static pattr_t *mmd_find_pattr(patbkt_t *, uint_t); 83 #pragma inline(mmd_find_pattr) 84 85 static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *); 86 #pragma inline(mmd_destroy_pdesc) 87 88 static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t, 89 boolean_t); 90 #pragma inline(mmd_getpdesc) 91 92 static struct kmem_cache *mmd_cache; 93 static struct kmem_cache *pd_slab_cache; 94 static struct kmem_cache *pattbl_cache; 95 96 int mmd_debug = 1; 97 #define MMD_DEBUG(s) if (mmd_debug > 0) cmn_err s 98 99 /* 100 * Set to this to true to bypass pdesc bounds checking. 101 */ 102 boolean_t mmd_speed_over_safety = B_FALSE; 103 104 /* 105 * Patchable kmem_cache flags. 106 */ 107 int mmd_kmem_flags = 0; 108 int pdslab_kmem_flags = 0; 109 int pattbl_kmem_flags = 0; 110 111 /* 112 * Alignment (in bytes) of our kmem caches. 113 */ 114 #define MULTIDATA_CACHE_ALIGN 64 115 116 /* 117 * Default number of packet descriptors per descriptor slab. Making 118 * this too small will trigger more descriptor slab allocation; making 119 * it too large will create too many unclaimed descriptors. 120 */ 121 #define PDSLAB_SZ 15 122 uint_t pdslab_sz = PDSLAB_SZ; 123 124 /* 125 * Default attribute hash table size. It's okay to set this to a small 126 * value (even to 1) because there aren't that many attributes currently 127 * defined, and because we assume there won't be many attributes associated 128 * with a Multidata at a given time. Increasing the size will reduce 129 * attribute search time (given a large number of attributes in a Multidata), 130 * and decreasing it will reduce the memory footprints and the overhead 131 * associated with managing the table. 132 */ 133 #define PATTBL_SZ 1 134 uint_t pattbl_sz = PATTBL_SZ; 135 136 /* 137 * Attribute hash key. 138 */ 139 #define PATTBL_HASH(x, sz) ((x) % (sz)) 140 141 /* 142 * Structure that precedes each Multidata metadata. 143 */ 144 struct mmd_buf_info { 145 frtn_t frp; /* free routine */ 146 uint_t buf_len; /* length of kmem buffer */ 147 }; 148 149 /* 150 * The size of each metadata buffer. 151 */ 152 #define MMD_CACHE_SIZE \ 153 (sizeof (struct mmd_buf_info) + sizeof (multidata_t)) 154 155 /* 156 * Called during startup in order to create the Multidata kmem caches. 157 */ 158 void 159 mmd_init(void) 160 { 161 pdslab_sz = MAX(1, pdslab_sz); /* at least 1 descriptor */ 162 pattbl_sz = MAX(1, pattbl_sz); /* at least 1 bucket */ 163 164 mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE, 165 MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor, 166 NULL, NULL, NULL, mmd_kmem_flags); 167 168 pd_slab_cache = kmem_cache_create("multidata_pdslab", 169 PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN, 170 pdslab_constructor, pdslab_destructor, NULL, 171 (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags); 172 173 pattbl_cache = kmem_cache_create("multidata_pattbl", 174 sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN, 175 pattbl_constructor, pattbl_destructor, NULL, 176 (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags); 177 } 178 179 /* 180 * Create a Multidata message block. 181 */ 182 multidata_t * 183 mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags) 184 { 185 uchar_t *buf; 186 multidata_t *mmd; 187 uint_t mmd_mplen; 188 struct mmd_buf_info *buf_info; 189 190 ASSERT(hdr_mp != NULL); 191 ASSERT(mmd_mp != NULL); 192 193 /* 194 * Caller should never pass in a chain of mblks since we 195 * only care about the first one, hence the assertions. 196 */ 197 ASSERT(hdr_mp->b_cont == NULL); 198 199 if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL) 200 return (NULL); 201 202 buf_info = (struct mmd_buf_info *)buf; 203 buf_info->frp.free_arg = (caddr_t)buf; 204 205 mmd = (multidata_t *)(buf_info + 1); 206 mmd_mplen = sizeof (*mmd); 207 208 if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI, 209 &(buf_info->frp))) == NULL) { 210 kmem_cache_free(mmd_cache, buf); 211 return (NULL); 212 } 213 214 DB_TYPE(*mmd_mp) = M_MULTIDATA; 215 (*mmd_mp)->b_wptr += mmd_mplen; 216 mmd->mmd_dp = (*mmd_mp)->b_datap; 217 mmd->mmd_hbuf = hdr_mp; 218 219 return (mmd); 220 } 221 222 /* 223 * Associate additional payload buffer to the Multidata. 224 */ 225 int 226 mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp) 227 { 228 int i; 229 230 ASSERT(mmd != NULL); 231 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 232 ASSERT(pld_mp != NULL); 233 234 mutex_enter(&mmd->mmd_pd_slab_lock); 235 for (i = 0; i < MULTIDATA_MAX_PBUFS && 236 mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) { 237 if (mmd->mmd_pbuf[i] == pld_mp) { 238 /* duplicate entry */ 239 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding " 240 "pld 0x%p to mmd 0x%p since it has been " 241 "previously added into slot %d (total %d)\n", 242 (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt)); 243 mutex_exit(&mmd->mmd_pd_slab_lock); 244 return (-1); 245 } else if (mmd->mmd_pbuf[i] == NULL) { 246 mmd->mmd_pbuf[i] = pld_mp; 247 mmd->mmd_pbuf_cnt++; 248 mutex_exit(&mmd->mmd_pd_slab_lock); 249 return (i); 250 } 251 } 252 253 /* all slots are taken */ 254 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p " 255 "since no slot space is left (total %d max %d)\n", (void *)pld_mp, 256 (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS)); 257 mutex_exit(&mmd->mmd_pd_slab_lock); 258 259 return (-1); 260 } 261 262 /* 263 * Multidata metadata kmem cache constructor routine. 264 */ 265 /* ARGSUSED */ 266 static int 267 mmd_constructor(void *buf, void *cdrarg, int kmflags) 268 { 269 struct mmd_buf_info *buf_info; 270 multidata_t *mmd; 271 272 bzero((void *)buf, MMD_CACHE_SIZE); 273 274 buf_info = (struct mmd_buf_info *)buf; 275 buf_info->frp.free_func = mmd_esballoc_free; 276 buf_info->buf_len = MMD_CACHE_SIZE; 277 278 mmd = (multidata_t *)(buf_info + 1); 279 mmd->mmd_magic = MULTIDATA_MAGIC; 280 281 mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL); 282 QL_INIT(&(mmd->mmd_pd_slab_q)); 283 QL_INIT(&(mmd->mmd_pd_q)); 284 285 return (0); 286 } 287 288 /* 289 * Multidata metadata kmem cache destructor routine. 290 */ 291 /* ARGSUSED */ 292 static void 293 mmd_destructor(void *buf, void *cdrarg) 294 { 295 multidata_t *mmd; 296 #ifdef DEBUG 297 int i; 298 #endif 299 300 mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info)); 301 302 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 303 ASSERT(mmd->mmd_dp == NULL); 304 ASSERT(mmd->mmd_hbuf == NULL); 305 ASSERT(mmd->mmd_pbuf_cnt == 0); 306 #ifdef DEBUG 307 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) 308 ASSERT(mmd->mmd_pbuf[i] == NULL); 309 #endif 310 ASSERT(mmd->mmd_pattbl == NULL); 311 312 mutex_destroy(&(mmd->mmd_pd_slab_lock)); 313 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); 314 ASSERT(mmd->mmd_slab_cnt == 0); 315 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); 316 ASSERT(mmd->mmd_pd_cnt == 0); 317 ASSERT(mmd->mmd_hbuf_ref == 0); 318 ASSERT(mmd->mmd_pbuf_ref == 0); 319 } 320 321 /* 322 * Multidata message block free callback routine. 323 */ 324 static void 325 mmd_esballoc_free(caddr_t buf) 326 { 327 multidata_t *mmd; 328 pdesc_t *pd; 329 pdesc_slab_t *slab; 330 int i; 331 332 ASSERT(buf != NULL); 333 ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE); 334 335 mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info)); 336 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 337 338 ASSERT(mmd->mmd_dp != NULL); 339 ASSERT(mmd->mmd_dp->db_ref == 1); 340 341 /* remove all packet descriptors and private attributes */ 342 pd = Q2PD(mmd->mmd_pd_q.ql_next); 343 while (pd != Q2PD(&(mmd->mmd_pd_q))) 344 pd = mmd_destroy_pdesc(mmd, pd); 345 346 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); 347 ASSERT(mmd->mmd_pd_cnt == 0); 348 ASSERT(mmd->mmd_hbuf_ref == 0); 349 ASSERT(mmd->mmd_pbuf_ref == 0); 350 351 /* remove all global attributes */ 352 if (mmd->mmd_pattbl != NULL) 353 mmd_destroy_pattbl(&(mmd->mmd_pattbl)); 354 355 /* remove all descriptor slabs */ 356 slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next); 357 while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) { 358 pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next); 359 360 remque(&(slab->pds_next)); 361 slab->pds_next = NULL; 362 slab->pds_prev = NULL; 363 slab->pds_mmd = NULL; 364 slab->pds_used = 0; 365 kmem_cache_free(pd_slab_cache, slab); 366 367 ASSERT(mmd->mmd_slab_cnt > 0); 368 mmd->mmd_slab_cnt--; 369 slab = slab_next; 370 } 371 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); 372 ASSERT(mmd->mmd_slab_cnt == 0); 373 374 mmd->mmd_dp = NULL; 375 376 /* finally, free all associated message blocks */ 377 if (mmd->mmd_hbuf != NULL) { 378 freeb(mmd->mmd_hbuf); 379 mmd->mmd_hbuf = NULL; 380 } 381 382 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) { 383 if (mmd->mmd_pbuf[i] != NULL) { 384 freeb(mmd->mmd_pbuf[i]); 385 mmd->mmd_pbuf[i] = NULL; 386 ASSERT(mmd->mmd_pbuf_cnt > 0); 387 mmd->mmd_pbuf_cnt--; 388 } 389 } 390 391 ASSERT(mmd->mmd_pbuf_cnt == 0); 392 ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock))); 393 kmem_cache_free(mmd_cache, buf); 394 } 395 396 /* 397 * Multidata message block copy routine, called by copyb() when it 398 * encounters a M_MULTIDATA data block type. This routine should 399 * not be called by anyone other than copyb(), since it may go away 400 * (read: become static to this module) once some sort of copy callback 401 * routine is made available. 402 */ 403 mblk_t * 404 mmd_copy(mblk_t *bp, int kmflags) 405 { 406 multidata_t *mmd, *n_mmd; 407 mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS]; 408 mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1]; 409 mblk_t **pmp; 410 mblk_t *n_bp = NULL; 411 pdesc_t *pd; 412 uint_t n_pbuf_cnt = 0; 413 int idx, i; 414 415 #define FREE_PBUFS() { \ 416 for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++) \ 417 if (*pmp != NULL) freeb(*pmp); \ 418 } 419 420 #define REL_OFF(p, base, n_base) \ 421 ((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base)) 422 423 ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA); 424 mmd = mmd_getmultidata(bp); 425 426 /* copy the header buffer */ 427 if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL) 428 return (NULL); 429 430 /* copy the payload buffer(s) */ 431 mutex_enter(&mmd->mmd_pd_slab_lock); 432 bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS); 433 n_pbuf_cnt = mmd->mmd_pbuf_cnt; 434 for (i = 0; i < n_pbuf_cnt; i++) { 435 ASSERT(mmd->mmd_pbuf[i] != NULL); 436 n_pbuf[i] = copyb(mmd->mmd_pbuf[i]); 437 if (n_pbuf[i] == NULL) { 438 FREE_PBUFS(); 439 mutex_exit(&mmd->mmd_pd_slab_lock); 440 return (NULL); 441 } 442 } 443 444 /* allocate new Multidata */ 445 n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags); 446 if (n_mmd == NULL) { 447 if (n_hbuf != NULL) 448 freeb(n_hbuf); 449 if (n_pbuf_cnt != 0) 450 FREE_PBUFS(); 451 mutex_exit(&mmd->mmd_pd_slab_lock); 452 return (NULL); 453 } 454 455 /* 456 * Add payload buffer(s); upon success, leave n_pbuf array 457 * alone, as the newly-created Multidata had already contained 458 * the mblk pointers stored in the array. These will be freed 459 * along with the Multidata itself. 460 */ 461 for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) { 462 idx = mmd_addpldbuf(n_mmd, *pmp); 463 if (idx < 0) { 464 FREE_PBUFS(); 465 freeb(n_bp); 466 mutex_exit(&mmd->mmd_pd_slab_lock); 467 return (NULL); 468 } 469 } 470 471 /* copy over global attributes */ 472 if (mmd->mmd_pattbl != NULL && 473 mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) { 474 freeb(n_bp); 475 mutex_exit(&mmd->mmd_pd_slab_lock); 476 return (NULL); 477 } 478 479 /* copy over packet descriptors and their atttributes */ 480 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); /* first pdesc */ 481 while (pd != NULL) { 482 pdesc_t *n_pd; 483 pdescinfo_t *pdi, n_pdi; 484 uchar_t *n_base, *base; 485 pdesc_t *pd_next; 486 487 /* next pdesc */ 488 pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL, 489 1, B_TRUE); 490 491 /* skip if already removed */ 492 if (pd->pd_flags & PDESC_REM_DEFER) { 493 pd = pd_next; 494 continue; 495 } 496 497 pdi = &(pd->pd_pdi); 498 bzero(&n_pdi, sizeof (n_pdi)); 499 500 /* 501 * Calculate new descriptor values based on the offset of 502 * each pointer relative to the associated buffer(s). 503 */ 504 ASSERT(pdi->flags & PDESC_HAS_REF); 505 if (pdi->flags & PDESC_HBUF_REF) { 506 n_base = n_mmd->mmd_hbuf->b_rptr; 507 base = mmd->mmd_hbuf->b_rptr; 508 509 n_pdi.flags |= PDESC_HBUF_REF; 510 n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base); 511 n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base); 512 n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base); 513 n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base); 514 } 515 516 if (pdi->flags & PDESC_PBUF_REF) { 517 n_pdi.flags |= PDESC_PBUF_REF; 518 n_pdi.pld_cnt = pdi->pld_cnt; 519 520 for (i = 0; i < pdi->pld_cnt; i++) { 521 idx = pdi->pld_ary[i].pld_pbuf_idx; 522 ASSERT(idx < MULTIDATA_MAX_PBUFS); 523 ASSERT(n_mmd->mmd_pbuf[idx] != NULL); 524 ASSERT(mmd->mmd_pbuf[idx] != NULL); 525 526 n_base = n_mmd->mmd_pbuf[idx]->b_rptr; 527 base = mmd->mmd_pbuf[idx]->b_rptr; 528 529 n_pdi.pld_ary[i].pld_pbuf_idx = idx; 530 531 /* 532 * We can't copy the pointers just like that, 533 * so calculate the relative offset. 534 */ 535 n_pdi.pld_ary[i].pld_rptr = 536 REL_OFF(pdi->pld_ary[i].pld_rptr, 537 base, n_base); 538 n_pdi.pld_ary[i].pld_wptr = 539 REL_OFF(pdi->pld_ary[i].pld_wptr, 540 base, n_base); 541 } 542 } 543 544 /* add the new descriptor to the new Multidata */ 545 n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags); 546 547 if (n_pd == NULL || (pd->pd_pattbl != NULL && 548 mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) { 549 freeb(n_bp); 550 mutex_exit(&mmd->mmd_pd_slab_lock); 551 return (NULL); 552 } 553 554 pd = pd_next; 555 } 556 #undef REL_OFF 557 #undef FREE_PBUFS 558 559 mutex_exit(&mmd->mmd_pd_slab_lock); 560 return (n_bp); 561 } 562 563 /* 564 * Given a Multidata message block, return the Multidata metadata handle. 565 */ 566 multidata_t * 567 mmd_getmultidata(mblk_t *mp) 568 { 569 multidata_t *mmd; 570 571 ASSERT(mp != NULL); 572 573 if (DB_TYPE(mp) != M_MULTIDATA) 574 return (NULL); 575 576 mmd = (multidata_t *)mp->b_rptr; 577 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 578 579 return (mmd); 580 } 581 582 /* 583 * Return the start and end addresses of the associated buffer(s). 584 */ 585 void 586 mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi) 587 { 588 int i; 589 590 ASSERT(mmd != NULL); 591 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 592 ASSERT(mbi != NULL); 593 594 bzero((void *)mbi, sizeof (mbufinfo_t)); 595 596 if (mmd->mmd_hbuf != NULL) { 597 mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr; 598 mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr; 599 } 600 601 mutex_enter(&mmd->mmd_pd_slab_lock); 602 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 603 ASSERT(mmd->mmd_pbuf[i] != NULL); 604 mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr; 605 mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr; 606 607 } 608 mbi->pbuf_cnt = mmd->mmd_pbuf_cnt; 609 mutex_exit(&mmd->mmd_pd_slab_lock); 610 } 611 612 /* 613 * Return the Multidata statistics. 614 */ 615 uint_t 616 mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref) 617 { 618 uint_t pd_cnt; 619 620 ASSERT(mmd != NULL); 621 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 622 623 mutex_enter(&(mmd->mmd_pd_slab_lock)); 624 if (hbuf_ref != NULL) 625 *hbuf_ref = mmd->mmd_hbuf_ref; 626 if (pbuf_ref != NULL) 627 *pbuf_ref = mmd->mmd_pbuf_ref; 628 pd_cnt = mmd->mmd_pd_cnt; 629 mutex_exit(&(mmd->mmd_pd_slab_lock)); 630 631 return (pd_cnt); 632 } 633 634 #define HBUF_REF_VALID(mmd, pdi) \ 635 ((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL && \ 636 (pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL && \ 637 (pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base && \ 638 (pdi)->hdr_wptr >= (pdi)->hdr_rptr && \ 639 (pdi)->hdr_base <= (pdi)->hdr_rptr && \ 640 (pdi)->hdr_lim >= (pdi)->hdr_wptr && \ 641 (pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr && \ 642 MBLKIN((mmd)->mmd_hbuf, \ 643 (pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr), \ 644 PDESC_HDRSIZE(pdi))) 645 646 /* 647 * Bounds check payload area(s). 648 */ 649 static boolean_t 650 pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi) 651 { 652 int i = 0, idx; 653 boolean_t valid = B_TRUE; 654 struct pld_ary_s *pa; 655 656 mutex_enter(&mmd->mmd_pd_slab_lock); 657 if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) { 658 mutex_exit(&mmd->mmd_pd_slab_lock); 659 return (B_FALSE); 660 } 661 662 pa = &pdi->pld_ary[0]; 663 while (valid && i < pdi->pld_cnt) { 664 valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) && 665 pa->pld_rptr != NULL && pa->pld_wptr != NULL && 666 pa->pld_wptr >= pa->pld_rptr && 667 pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr && 668 MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr - 669 mmd->mmd_pbuf[idx]->b_rptr), 670 PDESC_PLD_SPAN_SIZE(pdi, i))); 671 672 if (!valid) { 673 MMD_DEBUG((CE_WARN, 674 "pbuf_ref_valid: pdi 0x%p pld out of bound; " 675 "index %d has pld_cnt %d pbuf_idx %d " 676 "(mmd_pbuf_cnt %d), " 677 "pld_rptr 0x%p pld_wptr 0x%p len %d " 678 "(valid 0x%p-0x%p len %d)\n", (void *)pdi, 679 i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt, 680 (void *)pa->pld_rptr, 681 (void *)pa->pld_wptr, 682 (int)PDESC_PLD_SPAN_SIZE(pdi, i), 683 (void *)mmd->mmd_pbuf[idx]->b_rptr, 684 (void *)mmd->mmd_pbuf[idx]->b_wptr, 685 (int)MBLKL(mmd->mmd_pbuf[idx]))); 686 } 687 688 /* advance to next entry */ 689 i++; 690 pa++; 691 } 692 693 mutex_exit(&mmd->mmd_pd_slab_lock); 694 return (valid); 695 } 696 697 /* 698 * Add a packet descriptor to the Multidata. 699 */ 700 pdesc_t * 701 mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) 702 { 703 ASSERT(mmd != NULL); 704 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 705 ASSERT(pdi != NULL); 706 ASSERT(pdi->flags & PDESC_HAS_REF); 707 708 /* do the references refer to invalid memory regions? */ 709 if (!mmd_speed_over_safety && 710 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || 711 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) { 712 if (err != NULL) 713 *err = EINVAL; 714 return (NULL); 715 } 716 717 return (mmd_addpdesc_int(mmd, pdi, err, kmflags)); 718 } 719 720 /* 721 * Internal routine to add a packet descriptor, called when mmd_addpdesc 722 * or mmd_copy tries to allocate and add a descriptor to a Multidata. 723 */ 724 static pdesc_t * 725 mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) 726 { 727 pdesc_slab_t *slab, *slab_last; 728 pdesc_t *pd; 729 730 ASSERT(pdi->flags & PDESC_HAS_REF); 731 ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi)); 732 ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi)); 733 734 if (err != NULL) 735 *err = 0; 736 737 mutex_enter(&(mmd->mmd_pd_slab_lock)); 738 /* 739 * Is slab list empty or the last-added slab is full? If so, 740 * allocate new slab for the descriptor; otherwise, use the 741 * last-added slab instead. 742 */ 743 slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev); 744 if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) || 745 slab_last->pds_used == slab_last->pds_sz) { 746 slab = kmem_cache_alloc(pd_slab_cache, kmflags); 747 if (slab == NULL) { 748 if (err != NULL) 749 *err = ENOMEM; 750 mutex_exit(&(mmd->mmd_pd_slab_lock)); 751 return (NULL); 752 } 753 slab->pds_mmd = mmd; 754 755 ASSERT(slab->pds_used == 0); 756 ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL); 757 758 /* insert slab at end of list */ 759 insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev); 760 mmd->mmd_slab_cnt++; 761 } else { 762 slab = slab_last; 763 } 764 ASSERT(slab->pds_used < slab->pds_sz); 765 pd = &(slab->pds_free_desc[slab->pds_used++]); 766 ASSERT(pd->pd_magic == PDESC_MAGIC); 767 pd->pd_next = NULL; 768 pd->pd_prev = NULL; 769 pd->pd_slab = slab; 770 pd->pd_pattbl = NULL; 771 772 /* copy over the descriptor info from caller */ 773 PDI_COPY(pdi, &(pd->pd_pdi)); 774 775 if (pd->pd_flags & PDESC_HBUF_REF) 776 mmd->mmd_hbuf_ref++; 777 if (pd->pd_flags & PDESC_PBUF_REF) 778 mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt; 779 mmd->mmd_pd_cnt++; 780 781 /* insert descriptor at end of list */ 782 insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev); 783 mutex_exit(&(mmd->mmd_pd_slab_lock)); 784 785 return (pd); 786 } 787 788 /* 789 * Packet descriptor slab kmem cache constructor routine. 790 */ 791 /* ARGSUSED */ 792 static int 793 pdslab_constructor(void *buf, void *cdrarg, int kmflags) 794 { 795 pdesc_slab_t *slab; 796 uint_t cnt = (uint_t)(uintptr_t)cdrarg; 797 int i; 798 799 ASSERT(cnt > 0); /* slab size can't be zero */ 800 801 slab = (pdesc_slab_t *)buf; 802 slab->pds_next = NULL; 803 slab->pds_prev = NULL; 804 slab->pds_mmd = NULL; 805 slab->pds_used = 0; 806 slab->pds_sz = cnt; 807 808 for (i = 0; i < cnt; i++) { 809 pdesc_t *pd = &(slab->pds_free_desc[i]); 810 pd->pd_magic = PDESC_MAGIC; 811 } 812 return (0); 813 } 814 815 /* 816 * Packet descriptor slab kmem cache destructor routine. 817 */ 818 /* ARGSUSED */ 819 static void 820 pdslab_destructor(void *buf, void *cdrarg) 821 { 822 pdesc_slab_t *slab; 823 824 slab = (pdesc_slab_t *)buf; 825 ASSERT(slab->pds_next == NULL); 826 ASSERT(slab->pds_prev == NULL); 827 ASSERT(slab->pds_mmd == NULL); 828 ASSERT(slab->pds_used == 0); 829 ASSERT(slab->pds_sz > 0); 830 } 831 832 /* 833 * Remove a packet descriptor from the in-use descriptor list, 834 * called by mmd_rempdesc or during free. 835 */ 836 static pdesc_t * 837 mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd) 838 { 839 pdesc_t *pd_next; 840 841 pd_next = Q2PD(pd->pd_next); 842 remque(&(pd->pd_next)); 843 844 /* remove all local attributes */ 845 if (pd->pd_pattbl != NULL) 846 mmd_destroy_pattbl(&(pd->pd_pattbl)); 847 848 /* don't decrease counts for a removed descriptor */ 849 if (!(pd->pd_flags & PDESC_REM_DEFER)) { 850 if (pd->pd_flags & PDESC_HBUF_REF) { 851 ASSERT(mmd->mmd_hbuf_ref > 0); 852 mmd->mmd_hbuf_ref--; 853 } 854 if (pd->pd_flags & PDESC_PBUF_REF) { 855 ASSERT(mmd->mmd_pbuf_ref > 0); 856 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; 857 } 858 ASSERT(mmd->mmd_pd_cnt > 0); 859 mmd->mmd_pd_cnt--; 860 } 861 return (pd_next); 862 } 863 864 /* 865 * Remove a packet descriptor from the Multidata. 866 */ 867 void 868 mmd_rempdesc(pdesc_t *pd) 869 { 870 multidata_t *mmd; 871 872 ASSERT(pd->pd_magic == PDESC_MAGIC); 873 ASSERT(pd->pd_slab != NULL); 874 875 mmd = pd->pd_slab->pds_mmd; 876 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 877 878 mutex_enter(&(mmd->mmd_pd_slab_lock)); 879 /* 880 * We can't deallocate the associated resources if the Multidata 881 * is shared with other threads, because it's possible that the 882 * descriptor handle value is held by those threads. That's why 883 * we simply mark the entry as "removed" and decrement the counts. 884 * If there are no other threads, then we free the descriptor. 885 */ 886 if (mmd->mmd_dp->db_ref > 1) { 887 pd->pd_flags |= PDESC_REM_DEFER; 888 if (pd->pd_flags & PDESC_HBUF_REF) { 889 ASSERT(mmd->mmd_hbuf_ref > 0); 890 mmd->mmd_hbuf_ref--; 891 } 892 if (pd->pd_flags & PDESC_PBUF_REF) { 893 ASSERT(mmd->mmd_pbuf_ref > 0); 894 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; 895 } 896 ASSERT(mmd->mmd_pd_cnt > 0); 897 mmd->mmd_pd_cnt--; 898 } else { 899 (void) mmd_destroy_pdesc(mmd, pd); 900 } 901 mutex_exit(&(mmd->mmd_pd_slab_lock)); 902 } 903 904 /* 905 * A generic routine to traverse the packet descriptor in-use list. 906 */ 907 static pdesc_t * 908 mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw, 909 boolean_t mutex_held) 910 { 911 pdesc_t *pd_head; 912 913 ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd); 914 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 915 ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock))); 916 917 if (!mutex_held) 918 mutex_enter(&(mmd->mmd_pd_slab_lock)); 919 pd_head = Q2PD(&(mmd->mmd_pd_q)); 920 921 if (pd == NULL) { 922 /* 923 * We're called by mmd_get{first,last}pdesc, and so 924 * return either the first or last list element. 925 */ 926 pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) : 927 Q2PD(mmd->mmd_pd_q.ql_prev); 928 } else { 929 /* 930 * We're called by mmd_get{next,prev}pdesc, and so 931 * return either the next or previous list element. 932 */ 933 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); 934 } 935 936 while (pd != pd_head) { 937 /* skip element if it has been removed */ 938 if (!(pd->pd_flags & PDESC_REM_DEFER)) 939 break; 940 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); 941 } 942 if (!mutex_held) 943 mutex_exit(&(mmd->mmd_pd_slab_lock)); 944 945 /* return NULL if we're back at the beginning */ 946 if (pd == pd_head) 947 pd = NULL; 948 949 /* got an entry; copy descriptor info to caller */ 950 if (pd != NULL && pdi != NULL) 951 PDI_COPY(&(pd->pd_pdi), pdi); 952 953 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); 954 return (pd); 955 956 } 957 958 /* 959 * Return the first packet descriptor in the in-use list. 960 */ 961 pdesc_t * 962 mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi) 963 { 964 return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE)); 965 } 966 967 /* 968 * Return the last packet descriptor in the in-use list. 969 */ 970 pdesc_t * 971 mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi) 972 { 973 return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE)); 974 } 975 976 /* 977 * Return the next packet descriptor in the in-use list. 978 */ 979 pdesc_t * 980 mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi) 981 { 982 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE)); 983 } 984 985 /* 986 * Return the previous packet descriptor in the in-use list. 987 */ 988 pdesc_t * 989 mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi) 990 { 991 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE)); 992 } 993 994 /* 995 * Check to see if pdi stretches over c_pdi; used to ensure that a packet 996 * descriptor's header and payload span may not be extended beyond the 997 * current boundaries. 998 */ 999 static boolean_t 1000 pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi) 1001 { 1002 int i; 1003 struct pld_ary_s *pa = &pdi->pld_ary[0]; 1004 struct pld_ary_s *c_pa = &c_pdi->pld_ary[0]; 1005 1006 if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim) 1007 return (B_FALSE); 1008 1009 /* 1010 * We don't allow the number of span to be reduced, for the sake 1011 * of simplicity. Instead, we provide PDESC_PLD_SPAN_CLEAR() to 1012 * clear a packet descriptor. Note that we allow the span count to 1013 * be increased, and the bounds check for the new one happens 1014 * in pbuf_ref_valid. 1015 */ 1016 if (pdi->pld_cnt < c_pdi->pld_cnt) 1017 return (B_FALSE); 1018 1019 /* compare only those which are currently defined */ 1020 for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) { 1021 if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx || 1022 pa->pld_rptr < c_pa->pld_rptr || 1023 pa->pld_wptr > c_pa->pld_wptr) 1024 return (B_FALSE); 1025 } 1026 return (B_TRUE); 1027 } 1028 1029 /* 1030 * Modify the layout of a packet descriptor. 1031 */ 1032 pdesc_t * 1033 mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi) 1034 { 1035 multidata_t *mmd; 1036 pdescinfo_t *c_pdi; 1037 1038 ASSERT(pd != NULL); 1039 ASSERT(pdi != NULL); 1040 ASSERT(pd->pd_magic == PDESC_MAGIC); 1041 1042 mmd = pd->pd_slab->pds_mmd; 1043 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1044 1045 /* entry has been removed */ 1046 if (pd->pd_flags & PDESC_REM_DEFER) 1047 return (NULL); 1048 1049 /* caller doesn't intend to specify any buffer reference? */ 1050 if (!(pdi->flags & PDESC_HAS_REF)) 1051 return (NULL); 1052 1053 /* do the references refer to invalid memory regions? */ 1054 if (!mmd_speed_over_safety && 1055 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || 1056 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) 1057 return (NULL); 1058 1059 /* they're not subsets of current references? */ 1060 c_pdi = &(pd->pd_pdi); 1061 if (!pdi_in_range(pdi, c_pdi)) 1062 return (NULL); 1063 1064 /* copy over the descriptor info from caller */ 1065 PDI_COPY(pdi, c_pdi); 1066 1067 return (pd); 1068 } 1069 1070 /* 1071 * Copy the contents of a packet descriptor into a new buffer. If the 1072 * descriptor points to more than one buffer fragments, the contents 1073 * of both fragments will be joined, with the header buffer fragment 1074 * preceding the payload buffer fragment(s). 1075 */ 1076 mblk_t * 1077 mmd_transform(pdesc_t *pd) 1078 { 1079 multidata_t *mmd; 1080 pdescinfo_t *pdi; 1081 mblk_t *mp; 1082 int h_size = 0, p_size = 0; 1083 int i, len; 1084 1085 ASSERT(pd != NULL); 1086 ASSERT(pd->pd_magic == PDESC_MAGIC); 1087 1088 mmd = pd->pd_slab->pds_mmd; 1089 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1090 1091 /* entry has been removed */ 1092 if (pd->pd_flags & PDESC_REM_DEFER) 1093 return (NULL); 1094 1095 mutex_enter(&mmd->mmd_pd_slab_lock); 1096 pdi = &(pd->pd_pdi); 1097 if (pdi->flags & PDESC_HBUF_REF) 1098 h_size = PDESC_HDRL(pdi); 1099 if (pdi->flags & PDESC_PBUF_REF) { 1100 for (i = 0; i < pdi->pld_cnt; i++) 1101 p_size += PDESC_PLD_SPAN_SIZE(pdi, i); 1102 } 1103 1104 /* allocate space large enough to hold the fragment(s) */ 1105 ASSERT(h_size + p_size >= 0); 1106 if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) { 1107 mutex_exit(&mmd->mmd_pd_slab_lock); 1108 return (NULL); 1109 } 1110 1111 /* copy over the header fragment */ 1112 if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) { 1113 bcopy(pdi->hdr_rptr, mp->b_wptr, h_size); 1114 mp->b_wptr += h_size; 1115 } 1116 1117 /* copy over the payload fragment */ 1118 if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) { 1119 for (i = 0; i < pdi->pld_cnt; i++) { 1120 len = PDESC_PLD_SPAN_SIZE(pdi, i); 1121 if (len > 0) { 1122 bcopy(pdi->pld_ary[i].pld_rptr, 1123 mp->b_wptr, len); 1124 mp->b_wptr += len; 1125 } 1126 } 1127 } 1128 1129 mutex_exit(&mmd->mmd_pd_slab_lock); 1130 return (mp); 1131 } 1132 1133 /* 1134 * Return a chain of mblks representing the Multidata packet. 1135 */ 1136 mblk_t * 1137 mmd_transform_link(pdesc_t *pd) 1138 { 1139 multidata_t *mmd; 1140 pdescinfo_t *pdi; 1141 mblk_t *nmp = NULL; 1142 1143 ASSERT(pd != NULL); 1144 ASSERT(pd->pd_magic == PDESC_MAGIC); 1145 1146 mmd = pd->pd_slab->pds_mmd; 1147 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1148 1149 /* entry has been removed */ 1150 if (pd->pd_flags & PDESC_REM_DEFER) 1151 return (NULL); 1152 1153 pdi = &(pd->pd_pdi); 1154 1155 /* duplicate header buffer */ 1156 if ((pdi->flags & PDESC_HBUF_REF)) { 1157 if ((nmp = dupb(mmd->mmd_hbuf)) == NULL) 1158 return (NULL); 1159 nmp->b_rptr = pdi->hdr_rptr; 1160 nmp->b_wptr = pdi->hdr_wptr; 1161 } 1162 1163 /* duplicate payload buffer(s) */ 1164 if (pdi->flags & PDESC_PBUF_REF) { 1165 int i; 1166 mblk_t *mp; 1167 struct pld_ary_s *pa = &pdi->pld_ary[0]; 1168 1169 mutex_enter(&mmd->mmd_pd_slab_lock); 1170 for (i = 0; i < pdi->pld_cnt; i++, pa++) { 1171 ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL); 1172 1173 /* skip empty ones */ 1174 if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0) 1175 continue; 1176 1177 mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]); 1178 if (mp == NULL) { 1179 if (nmp != NULL) 1180 freemsg(nmp); 1181 mutex_exit(&mmd->mmd_pd_slab_lock); 1182 return (NULL); 1183 } 1184 mp->b_rptr = pa->pld_rptr; 1185 mp->b_wptr = pa->pld_wptr; 1186 if (nmp == NULL) 1187 nmp = mp; 1188 else 1189 linkb(nmp, mp); 1190 } 1191 mutex_exit(&mmd->mmd_pd_slab_lock); 1192 } 1193 1194 return (nmp); 1195 } 1196 1197 /* 1198 * Return duplicate message block(s) of the associated buffer(s). 1199 */ 1200 int 1201 mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp) 1202 { 1203 ASSERT(mmd != NULL); 1204 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1205 1206 if (hmp != NULL) { 1207 *hmp = NULL; 1208 if (mmd->mmd_hbuf != NULL && 1209 (*hmp = dupb(mmd->mmd_hbuf)) == NULL) 1210 return (-1); 1211 } 1212 1213 if (pmp != NULL) { 1214 int i; 1215 mblk_t *mp; 1216 1217 mutex_enter(&mmd->mmd_pd_slab_lock); 1218 *pmp = NULL; 1219 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 1220 ASSERT(mmd->mmd_pbuf[i] != NULL); 1221 mp = dupb(mmd->mmd_pbuf[i]); 1222 if (mp == NULL) { 1223 if (hmp != NULL && *hmp != NULL) 1224 freeb(*hmp); 1225 if (*pmp != NULL) 1226 freemsg(*pmp); 1227 mutex_exit(&mmd->mmd_pd_slab_lock); 1228 return (-1); 1229 } 1230 if (*pmp == NULL) 1231 *pmp = mp; 1232 else 1233 linkb(*pmp, mp); 1234 } 1235 mutex_exit(&mmd->mmd_pd_slab_lock); 1236 } 1237 1238 return (0); 1239 } 1240 1241 /* 1242 * Return the layout of a packet descriptor. 1243 */ 1244 int 1245 mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi) 1246 { 1247 ASSERT(pd != NULL); 1248 ASSERT(pd->pd_magic == PDESC_MAGIC); 1249 ASSERT(pd->pd_slab != NULL); 1250 ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC); 1251 ASSERT(pdi != NULL); 1252 1253 /* entry has been removed */ 1254 if (pd->pd_flags & PDESC_REM_DEFER) 1255 return (-1); 1256 1257 /* copy descriptor info to caller */ 1258 PDI_COPY(&(pd->pd_pdi), pdi); 1259 1260 return (0); 1261 } 1262 1263 /* 1264 * Add a global or local attribute to a Multidata. Global attribute 1265 * association is specified by a NULL packet descriptor. 1266 */ 1267 pattr_t * 1268 mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai, 1269 boolean_t persistent, int kmflags) 1270 { 1271 patbkt_t **tbl_p; 1272 patbkt_t *tbl, *o_tbl; 1273 patbkt_t *bkt; 1274 pattr_t *pa; 1275 uint_t size; 1276 1277 ASSERT(mmd != NULL); 1278 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1279 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); 1280 ASSERT(pai != NULL); 1281 1282 /* pointer to the attribute hash table (local or global) */ 1283 tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl); 1284 1285 /* 1286 * See if the hash table has not yet been created; if so, 1287 * we create the table and store its address atomically. 1288 */ 1289 if ((tbl = *tbl_p) == NULL) { 1290 tbl = kmem_cache_alloc(pattbl_cache, kmflags); 1291 if (tbl == NULL) 1292 return (NULL); 1293 1294 /* if someone got there first, use his table instead */ 1295 if ((o_tbl = atomic_cas_ptr(tbl_p, NULL, tbl)) != NULL) { 1296 kmem_cache_free(pattbl_cache, tbl); 1297 tbl = o_tbl; 1298 } 1299 } 1300 1301 ASSERT(tbl->pbkt_tbl_sz > 0); 1302 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); 1303 1304 /* attribute of the same type already exists? */ 1305 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) 1306 return (NULL); 1307 1308 size = sizeof (*pa) + pai->len; 1309 if ((pa = kmem_zalloc(size, kmflags)) == NULL) 1310 return (NULL); 1311 1312 pa->pat_magic = PATTR_MAGIC; 1313 pa->pat_lock = &(bkt->pbkt_lock); 1314 pa->pat_mmd = mmd; 1315 pa->pat_buflen = size; 1316 pa->pat_type = pai->type; 1317 pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL; 1318 1319 if (persistent) 1320 pa->pat_flags = PATTR_PERSIST; 1321 1322 /* insert attribute at end of hash chain */ 1323 mutex_enter(&(bkt->pbkt_lock)); 1324 insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev); 1325 mutex_exit(&(bkt->pbkt_lock)); 1326 1327 return (pa); 1328 } 1329 1330 /* 1331 * Attribute hash table kmem cache constructor routine. 1332 */ 1333 /* ARGSUSED */ 1334 static int 1335 pattbl_constructor(void *buf, void *cdrarg, int kmflags) 1336 { 1337 patbkt_t *bkt; 1338 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; 1339 uint_t i; 1340 1341 ASSERT(tbl_sz > 0); /* table size can't be zero */ 1342 1343 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { 1344 mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL); 1345 QL_INIT(&(bkt->pbkt_pattr_q)); 1346 1347 /* first bucket contains the table size */ 1348 bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0; 1349 } 1350 return (0); 1351 } 1352 1353 /* 1354 * Attribute hash table kmem cache destructor routine. 1355 */ 1356 /* ARGSUSED */ 1357 static void 1358 pattbl_destructor(void *buf, void *cdrarg) 1359 { 1360 patbkt_t *bkt; 1361 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; 1362 uint_t i; 1363 1364 ASSERT(tbl_sz > 0); /* table size can't be zero */ 1365 1366 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { 1367 mutex_destroy(&(bkt->pbkt_lock)); 1368 ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q)); 1369 ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz); 1370 } 1371 } 1372 1373 /* 1374 * Destroy an attribute hash table, called by mmd_rempdesc or during free. 1375 */ 1376 static void 1377 mmd_destroy_pattbl(patbkt_t **tbl) 1378 { 1379 patbkt_t *bkt; 1380 pattr_t *pa, *pa_next; 1381 uint_t i, tbl_sz; 1382 1383 ASSERT(tbl != NULL); 1384 bkt = *tbl; 1385 tbl_sz = bkt->pbkt_tbl_sz; 1386 1387 /* make sure caller passes in the first bucket */ 1388 ASSERT(tbl_sz > 0); 1389 1390 /* destroy the contents of each bucket */ 1391 for (i = 0; i < tbl_sz; i++, bkt++) { 1392 /* we ought to be exclusive at this point */ 1393 ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock))); 1394 1395 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1396 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { 1397 ASSERT(pa->pat_magic == PATTR_MAGIC); 1398 pa_next = Q2PATTR(pa->pat_next); 1399 remque(&(pa->pat_next)); 1400 kmem_free(pa, pa->pat_buflen); 1401 pa = pa_next; 1402 } 1403 } 1404 1405 kmem_cache_free(pattbl_cache, *tbl); 1406 *tbl = NULL; 1407 1408 /* commit all previous stores */ 1409 membar_producer(); 1410 } 1411 1412 /* 1413 * Copy the contents of an attribute hash table, called by mmd_copy. 1414 */ 1415 static int 1416 mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd, 1417 int kmflags) 1418 { 1419 patbkt_t *bkt; 1420 pattr_t *pa; 1421 pattrinfo_t pai; 1422 uint_t i, tbl_sz; 1423 1424 ASSERT(src_tbl != NULL); 1425 bkt = src_tbl; 1426 tbl_sz = bkt->pbkt_tbl_sz; 1427 1428 /* make sure caller passes in the first bucket */ 1429 ASSERT(tbl_sz > 0); 1430 1431 for (i = 0; i < tbl_sz; i++, bkt++) { 1432 mutex_enter(&(bkt->pbkt_lock)); 1433 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1434 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { 1435 pattr_t *pa_next = Q2PATTR(pa->pat_next); 1436 1437 /* skip if it's removed */ 1438 if (pa->pat_flags & PATTR_REM_DEFER) { 1439 pa = pa_next; 1440 continue; 1441 } 1442 1443 pai.type = pa->pat_type; 1444 pai.len = pa->pat_buflen - sizeof (*pa); 1445 if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags & 1446 PATTR_PERSIST) != 0, kmflags) == NULL) { 1447 mutex_exit(&(bkt->pbkt_lock)); 1448 return (-1); 1449 } 1450 1451 /* copy over the contents */ 1452 if (pai.buf != NULL) 1453 bcopy(pa + 1, pai.buf, pai.len); 1454 1455 pa = pa_next; 1456 } 1457 mutex_exit(&(bkt->pbkt_lock)); 1458 } 1459 1460 return (0); 1461 } 1462 1463 /* 1464 * Search for an attribute type within an attribute hash bucket. 1465 */ 1466 static pattr_t * 1467 mmd_find_pattr(patbkt_t *bkt, uint_t type) 1468 { 1469 pattr_t *pa_head, *pa; 1470 1471 mutex_enter(&(bkt->pbkt_lock)); 1472 pa_head = Q2PATTR(&(bkt->pbkt_pattr_q)); 1473 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1474 1475 while (pa != pa_head) { 1476 ASSERT(pa->pat_magic == PATTR_MAGIC); 1477 1478 /* return a match; we treat removed entry as non-existent */ 1479 if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER)) 1480 break; 1481 pa = Q2PATTR(pa->pat_next); 1482 } 1483 mutex_exit(&(bkt->pbkt_lock)); 1484 1485 return (pa == pa_head ? NULL : pa); 1486 } 1487 1488 /* 1489 * Remove an attribute from a Multidata. 1490 */ 1491 void 1492 mmd_rempattr(pattr_t *pa) 1493 { 1494 kmutex_t *pat_lock = pa->pat_lock; 1495 1496 ASSERT(pa->pat_magic == PATTR_MAGIC); 1497 1498 /* ignore if attribute was marked as persistent */ 1499 if ((pa->pat_flags & PATTR_PERSIST) != 0) 1500 return; 1501 1502 mutex_enter(pat_lock); 1503 /* 1504 * We can't deallocate the associated resources if the Multidata 1505 * is shared with other threads, because it's possible that the 1506 * attribute handle value is held by those threads. That's why 1507 * we simply mark the entry as "removed". If there are no other 1508 * threads, then we free the attribute. 1509 */ 1510 if (pa->pat_mmd->mmd_dp->db_ref > 1) { 1511 pa->pat_flags |= PATTR_REM_DEFER; 1512 } else { 1513 remque(&(pa->pat_next)); 1514 kmem_free(pa, pa->pat_buflen); 1515 } 1516 mutex_exit(pat_lock); 1517 } 1518 1519 /* 1520 * Find an attribute (according to its type) and return its handle. 1521 */ 1522 pattr_t * 1523 mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai) 1524 { 1525 patbkt_t *tbl, *bkt; 1526 pattr_t *pa; 1527 1528 ASSERT(mmd != NULL); 1529 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1530 ASSERT(pai != NULL); 1531 1532 /* get the right attribute hash table (local or global) */ 1533 tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl; 1534 1535 /* attribute hash table doesn't exist? */ 1536 if (tbl == NULL) 1537 return (NULL); 1538 1539 ASSERT(tbl->pbkt_tbl_sz > 0); 1540 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); 1541 1542 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) { 1543 ASSERT(pa->pat_buflen >= sizeof (*pa)); 1544 pai->len = pa->pat_buflen - sizeof (*pa); 1545 pai->buf = pai->len > 0 ? 1546 (uchar_t *)pa + sizeof (pattr_t) : NULL; 1547 } 1548 ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC); 1549 return (pa); 1550 } 1551 1552 /* 1553 * Return total size of buffers and total size of areas referenced 1554 * by all in-use (unremoved) packet descriptors. 1555 */ 1556 void 1557 mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse) 1558 { 1559 pdesc_t *pd; 1560 pdescinfo_t *pdi; 1561 int i; 1562 1563 ASSERT(mmd != NULL); 1564 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1565 1566 mutex_enter(&mmd->mmd_pd_slab_lock); 1567 if (ptotal != NULL) { 1568 *ptotal = 0; 1569 1570 if (mmd->mmd_hbuf != NULL) 1571 *ptotal += MBLKL(mmd->mmd_hbuf); 1572 1573 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 1574 ASSERT(mmd->mmd_pbuf[i] != NULL); 1575 *ptotal += MBLKL(mmd->mmd_pbuf[i]); 1576 } 1577 } 1578 if (pinuse != NULL) { 1579 *pinuse = 0; 1580 1581 /* first pdesc */ 1582 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); 1583 while (pd != NULL) { 1584 pdi = &pd->pd_pdi; 1585 1586 /* next pdesc */ 1587 pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE); 1588 1589 /* skip over removed descriptor */ 1590 if (pdi->flags & PDESC_REM_DEFER) 1591 continue; 1592 1593 if (pdi->flags & PDESC_HBUF_REF) 1594 *pinuse += PDESC_HDRL(pdi); 1595 1596 if (pdi->flags & PDESC_PBUF_REF) { 1597 for (i = 0; i < pdi->pld_cnt; i++) 1598 *pinuse += PDESC_PLDL(pdi, i); 1599 } 1600 } 1601 } 1602 mutex_exit(&mmd->mmd_pd_slab_lock); 1603 } 1604