1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Multidata, as described in the following papers: 31 * 32 * Adi Masputra, 33 * Multidata V.2: VA-Disjoint Packet Extents Framework Interface 34 * Design Specification. August 2004. 35 * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf. 36 * 37 * Adi Masputra, 38 * Multidata Interface Design Specification. Sep 2002. 39 * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf. 40 * 41 * Adi Masputra, Frank DiMambro, Kacheong Poon, 42 * An Efficient Networking Transmit Mechanism for Solaris: 43 * Multidata Transmit (MDT). May 2002. 44 * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf. 45 */ 46 47 #include <sys/types.h> 48 #include <sys/stream.h> 49 #include <sys/dlpi.h> 50 #include <sys/stropts.h> 51 #include <sys/strsun.h> 52 #include <sys/strlog.h> 53 #include <sys/strsubr.h> 54 #include <sys/sysmacros.h> 55 #include <sys/cmn_err.h> 56 #include <sys/debug.h> 57 #include <sys/kmem.h> 58 #include <sys/atomic.h> 59 60 #include <sys/multidata.h> 61 #include <sys/multidata_impl.h> 62 63 static int mmd_constructor(void *, void *, int); 64 static void mmd_destructor(void *, void *); 65 static int pdslab_constructor(void *, void *, int); 66 static void pdslab_destructor(void *, void *); 67 static int pattbl_constructor(void *, void *, int); 68 static void pattbl_destructor(void *, void *); 69 static void mmd_esballoc_free(caddr_t); 70 static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int); 71 72 static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *); 73 #pragma inline(pbuf_ref_valid) 74 75 static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *); 76 #pragma inline(pdi_in_range) 77 78 static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int); 79 #pragma inline(mmd_addpdesc_int) 80 81 static void mmd_destroy_pattbl(patbkt_t **); 82 #pragma inline(mmd_destroy_pattbl) 83 84 static pattr_t *mmd_find_pattr(patbkt_t *, uint_t); 85 #pragma inline(mmd_find_pattr) 86 87 static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *); 88 #pragma inline(mmd_destroy_pdesc) 89 90 static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t, 91 boolean_t); 92 #pragma inline(mmd_getpdesc) 93 94 static struct kmem_cache *mmd_cache; 95 static struct kmem_cache *pd_slab_cache; 96 static struct kmem_cache *pattbl_cache; 97 98 int mmd_debug = 1; 99 #define MMD_DEBUG(s) if (mmd_debug > 0) cmn_err s 100 101 /* 102 * Set to this to true to bypass pdesc bounds checking. 103 */ 104 boolean_t mmd_speed_over_safety = B_FALSE; 105 106 /* 107 * Patchable kmem_cache flags. 108 */ 109 int mmd_kmem_flags = 0; 110 int pdslab_kmem_flags = 0; 111 int pattbl_kmem_flags = 0; 112 113 /* 114 * Alignment (in bytes) of our kmem caches. 115 */ 116 #define MULTIDATA_CACHE_ALIGN 64 117 118 /* 119 * Default number of packet descriptors per descriptor slab. Making 120 * this too small will trigger more descriptor slab allocation; making 121 * it too large will create too many unclaimed descriptors. 122 */ 123 #define PDSLAB_SZ 15 124 uint_t pdslab_sz = PDSLAB_SZ; 125 126 /* 127 * Default attribute hash table size. It's okay to set this to a small 128 * value (even to 1) because there aren't that many attributes currently 129 * defined, and because we assume there won't be many attributes associated 130 * with a Multidata at a given time. Increasing the size will reduce 131 * attribute search time (given a large number of attributes in a Multidata), 132 * and decreasing it will reduce the memory footprints and the overhead 133 * associated with managing the table. 134 */ 135 #define PATTBL_SZ 1 136 uint_t pattbl_sz = PATTBL_SZ; 137 138 /* 139 * Attribute hash key. 140 */ 141 #define PATTBL_HASH(x, sz) ((x) % (sz)) 142 143 /* 144 * Structure that precedes each Multidata metadata. 145 */ 146 struct mmd_buf_info { 147 frtn_t frp; /* free routine */ 148 uint_t buf_len; /* length of kmem buffer */ 149 }; 150 151 /* 152 * The size of each metadata buffer. 153 */ 154 #define MMD_CACHE_SIZE \ 155 (sizeof (struct mmd_buf_info) + sizeof (multidata_t)) 156 157 /* 158 * Called during startup in order to create the Multidata kmem caches. 159 */ 160 void 161 mmd_init(void) 162 { 163 pdslab_sz = MAX(1, pdslab_sz); /* at least 1 descriptor */ 164 pattbl_sz = MAX(1, pattbl_sz); /* at least 1 bucket */ 165 166 mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE, 167 MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor, 168 NULL, NULL, NULL, mmd_kmem_flags); 169 170 pd_slab_cache = kmem_cache_create("multidata_pdslab", 171 PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN, 172 pdslab_constructor, pdslab_destructor, NULL, 173 (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags); 174 175 pattbl_cache = kmem_cache_create("multidata_pattbl", 176 sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN, 177 pattbl_constructor, pattbl_destructor, NULL, 178 (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags); 179 } 180 181 /* 182 * Create a Multidata message block. 183 */ 184 multidata_t * 185 mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags) 186 { 187 uchar_t *buf; 188 multidata_t *mmd; 189 uint_t mmd_mplen; 190 struct mmd_buf_info *buf_info; 191 192 ASSERT(hdr_mp != NULL); 193 ASSERT(mmd_mp != NULL); 194 195 /* 196 * Caller should never pass in a chain of mblks since we 197 * only care about the first one, hence the assertions. 198 */ 199 ASSERT(hdr_mp->b_cont == NULL); 200 201 if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL) 202 return (NULL); 203 204 buf_info = (struct mmd_buf_info *)buf; 205 buf_info->frp.free_arg = (caddr_t)buf; 206 207 mmd = (multidata_t *)(buf_info + 1); 208 mmd_mplen = sizeof (*mmd); 209 210 if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI, 211 &(buf_info->frp))) == NULL) { 212 kmem_cache_free(mmd_cache, buf); 213 return (NULL); 214 } 215 216 DB_TYPE(*mmd_mp) = M_MULTIDATA; 217 (*mmd_mp)->b_wptr += mmd_mplen; 218 mmd->mmd_dp = (*mmd_mp)->b_datap; 219 mmd->mmd_hbuf = hdr_mp; 220 221 return (mmd); 222 } 223 224 /* 225 * Associate additional payload buffer to the Multidata. 226 */ 227 int 228 mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp) 229 { 230 int i; 231 232 ASSERT(mmd != NULL); 233 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 234 ASSERT(pld_mp != NULL); 235 236 mutex_enter(&mmd->mmd_pd_slab_lock); 237 for (i = 0; i < MULTIDATA_MAX_PBUFS && 238 mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) { 239 if (mmd->mmd_pbuf[i] == pld_mp) { 240 /* duplicate entry */ 241 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding " 242 "pld 0x%p to mmd 0x%p since it has been " 243 "previously added into slot %d (total %d)\n", 244 (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt)); 245 mutex_exit(&mmd->mmd_pd_slab_lock); 246 return (-1); 247 } else if (mmd->mmd_pbuf[i] == NULL) { 248 mmd->mmd_pbuf[i] = pld_mp; 249 mmd->mmd_pbuf_cnt++; 250 mutex_exit(&mmd->mmd_pd_slab_lock); 251 return (i); 252 } 253 } 254 255 /* all slots are taken */ 256 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p " 257 "since no slot space is left (total %d max %d)\n", (void *)pld_mp, 258 (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS)); 259 mutex_exit(&mmd->mmd_pd_slab_lock); 260 261 return (-1); 262 } 263 264 /* 265 * Multidata metadata kmem cache constructor routine. 266 */ 267 /* ARGSUSED */ 268 static int 269 mmd_constructor(void *buf, void *cdrarg, int kmflags) 270 { 271 struct mmd_buf_info *buf_info; 272 multidata_t *mmd; 273 274 bzero((void *)buf, MMD_CACHE_SIZE); 275 276 buf_info = (struct mmd_buf_info *)buf; 277 buf_info->frp.free_func = mmd_esballoc_free; 278 buf_info->buf_len = MMD_CACHE_SIZE; 279 280 mmd = (multidata_t *)(buf_info + 1); 281 mmd->mmd_magic = MULTIDATA_MAGIC; 282 283 mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL); 284 QL_INIT(&(mmd->mmd_pd_slab_q)); 285 QL_INIT(&(mmd->mmd_pd_q)); 286 287 return (0); 288 } 289 290 /* 291 * Multidata metadata kmem cache destructor routine. 292 */ 293 /* ARGSUSED */ 294 static void 295 mmd_destructor(void *buf, void *cdrarg) 296 { 297 multidata_t *mmd; 298 #ifdef DEBUG 299 int i; 300 #endif 301 302 mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info)); 303 304 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 305 ASSERT(mmd->mmd_dp == NULL); 306 ASSERT(mmd->mmd_hbuf == NULL); 307 ASSERT(mmd->mmd_pbuf_cnt == 0); 308 #ifdef DEBUG 309 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) 310 ASSERT(mmd->mmd_pbuf[i] == NULL); 311 #endif 312 ASSERT(mmd->mmd_pattbl == NULL); 313 314 mutex_destroy(&(mmd->mmd_pd_slab_lock)); 315 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); 316 ASSERT(mmd->mmd_slab_cnt == 0); 317 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); 318 ASSERT(mmd->mmd_pd_cnt == 0); 319 ASSERT(mmd->mmd_hbuf_ref == 0); 320 ASSERT(mmd->mmd_pbuf_ref == 0); 321 } 322 323 /* 324 * Multidata message block free callback routine. 325 */ 326 static void 327 mmd_esballoc_free(caddr_t buf) 328 { 329 multidata_t *mmd; 330 pdesc_t *pd; 331 pdesc_slab_t *slab; 332 int i; 333 334 ASSERT(buf != NULL); 335 ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE); 336 337 mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info)); 338 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 339 340 ASSERT(mmd->mmd_dp != NULL); 341 ASSERT(mmd->mmd_dp->db_ref == 1); 342 343 /* remove all packet descriptors and private attributes */ 344 pd = Q2PD(mmd->mmd_pd_q.ql_next); 345 while (pd != Q2PD(&(mmd->mmd_pd_q))) 346 pd = mmd_destroy_pdesc(mmd, pd); 347 348 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); 349 ASSERT(mmd->mmd_pd_cnt == 0); 350 ASSERT(mmd->mmd_hbuf_ref == 0); 351 ASSERT(mmd->mmd_pbuf_ref == 0); 352 353 /* remove all global attributes */ 354 if (mmd->mmd_pattbl != NULL) 355 mmd_destroy_pattbl(&(mmd->mmd_pattbl)); 356 357 /* remove all descriptor slabs */ 358 slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next); 359 while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) { 360 pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next); 361 362 remque(&(slab->pds_next)); 363 slab->pds_next = NULL; 364 slab->pds_prev = NULL; 365 slab->pds_mmd = NULL; 366 slab->pds_used = 0; 367 kmem_cache_free(pd_slab_cache, slab); 368 369 ASSERT(mmd->mmd_slab_cnt > 0); 370 mmd->mmd_slab_cnt--; 371 slab = slab_next; 372 } 373 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); 374 ASSERT(mmd->mmd_slab_cnt == 0); 375 376 mmd->mmd_dp = NULL; 377 378 /* finally, free all associated message blocks */ 379 if (mmd->mmd_hbuf != NULL) { 380 freeb(mmd->mmd_hbuf); 381 mmd->mmd_hbuf = NULL; 382 } 383 384 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) { 385 if (mmd->mmd_pbuf[i] != NULL) { 386 freeb(mmd->mmd_pbuf[i]); 387 mmd->mmd_pbuf[i] = NULL; 388 ASSERT(mmd->mmd_pbuf_cnt > 0); 389 mmd->mmd_pbuf_cnt--; 390 } 391 } 392 393 ASSERT(mmd->mmd_pbuf_cnt == 0); 394 ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock))); 395 kmem_cache_free(mmd_cache, buf); 396 } 397 398 /* 399 * Multidata message block copy routine, called by copyb() when it 400 * encounters a M_MULTIDATA data block type. This routine should 401 * not be called by anyone other than copyb(), since it may go away 402 * (read: become static to this module) once some sort of copy callback 403 * routine is made available. 404 */ 405 mblk_t * 406 mmd_copy(mblk_t *bp, int kmflags) 407 { 408 multidata_t *mmd, *n_mmd; 409 mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS]; 410 mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1]; 411 mblk_t **pmp; 412 mblk_t *n_bp = NULL; 413 pdesc_t *pd; 414 uint_t n_pbuf_cnt = 0; 415 int idx, i; 416 417 #define FREE_PBUFS() { \ 418 for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++) \ 419 if (*pmp != NULL) freeb(*pmp); \ 420 } 421 422 #define REL_OFF(p, base, n_base) \ 423 ((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base)) 424 425 ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA); 426 mmd = mmd_getmultidata(bp); 427 428 /* copy the header buffer */ 429 if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL) 430 return (NULL); 431 432 /* copy the payload buffer(s) */ 433 mutex_enter(&mmd->mmd_pd_slab_lock); 434 bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS); 435 n_pbuf_cnt = mmd->mmd_pbuf_cnt; 436 for (i = 0; i < n_pbuf_cnt; i++) { 437 ASSERT(mmd->mmd_pbuf[i] != NULL); 438 n_pbuf[i] = copyb(mmd->mmd_pbuf[i]); 439 if (n_pbuf[i] == NULL) { 440 FREE_PBUFS(); 441 mutex_exit(&mmd->mmd_pd_slab_lock); 442 return (NULL); 443 } 444 } 445 446 /* allocate new Multidata */ 447 n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags); 448 if (n_mmd == NULL) { 449 if (n_hbuf != NULL) 450 freeb(n_hbuf); 451 if (n_pbuf_cnt != 0) 452 FREE_PBUFS(); 453 mutex_exit(&mmd->mmd_pd_slab_lock); 454 return (NULL); 455 } 456 457 /* 458 * Add payload buffer(s); upon success, leave n_pbuf array 459 * alone, as the newly-created Multidata had already contained 460 * the mblk pointers stored in the array. These will be freed 461 * along with the Multidata itself. 462 */ 463 for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) { 464 idx = mmd_addpldbuf(n_mmd, *pmp); 465 if (idx < 0) { 466 FREE_PBUFS(); 467 freeb(n_bp); 468 mutex_exit(&mmd->mmd_pd_slab_lock); 469 return (NULL); 470 } 471 } 472 473 /* copy over global attributes */ 474 if (mmd->mmd_pattbl != NULL && 475 mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) { 476 freeb(n_bp); 477 mutex_exit(&mmd->mmd_pd_slab_lock); 478 return (NULL); 479 } 480 481 /* copy over packet descriptors and their atttributes */ 482 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); /* first pdesc */ 483 while (pd != NULL) { 484 pdesc_t *n_pd; 485 pdescinfo_t *pdi, n_pdi; 486 uchar_t *n_base, *base; 487 pdesc_t *pd_next; 488 489 /* next pdesc */ 490 pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL, 491 1, B_TRUE); 492 493 /* skip if already removed */ 494 if (pd->pd_flags & PDESC_REM_DEFER) { 495 pd = pd_next; 496 continue; 497 } 498 499 pdi = &(pd->pd_pdi); 500 bzero(&n_pdi, sizeof (n_pdi)); 501 502 /* 503 * Calculate new descriptor values based on the offset of 504 * each pointer relative to the associated buffer(s). 505 */ 506 ASSERT(pdi->flags & PDESC_HAS_REF); 507 if (pdi->flags & PDESC_HBUF_REF) { 508 n_base = n_mmd->mmd_hbuf->b_rptr; 509 base = mmd->mmd_hbuf->b_rptr; 510 511 n_pdi.flags |= PDESC_HBUF_REF; 512 n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base); 513 n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base); 514 n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base); 515 n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base); 516 } 517 518 if (pdi->flags & PDESC_PBUF_REF) { 519 n_pdi.flags |= PDESC_PBUF_REF; 520 n_pdi.pld_cnt = pdi->pld_cnt; 521 522 for (i = 0; i < pdi->pld_cnt; i++) { 523 idx = pdi->pld_ary[i].pld_pbuf_idx; 524 ASSERT(idx < MULTIDATA_MAX_PBUFS); 525 ASSERT(n_mmd->mmd_pbuf[idx] != NULL); 526 ASSERT(mmd->mmd_pbuf[idx] != NULL); 527 528 n_base = n_mmd->mmd_pbuf[idx]->b_rptr; 529 base = mmd->mmd_pbuf[idx]->b_rptr; 530 531 n_pdi.pld_ary[i].pld_pbuf_idx = idx; 532 533 /* 534 * We can't copy the pointers just like that, 535 * so calculate the relative offset. 536 */ 537 n_pdi.pld_ary[i].pld_rptr = 538 REL_OFF(pdi->pld_ary[i].pld_rptr, 539 base, n_base); 540 n_pdi.pld_ary[i].pld_wptr = 541 REL_OFF(pdi->pld_ary[i].pld_wptr, 542 base, n_base); 543 } 544 } 545 546 /* add the new descriptor to the new Multidata */ 547 n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags); 548 549 if (n_pd == NULL || (pd->pd_pattbl != NULL && 550 mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) { 551 freeb(n_bp); 552 mutex_exit(&mmd->mmd_pd_slab_lock); 553 return (NULL); 554 } 555 556 pd = pd_next; 557 } 558 #undef REL_OFF 559 #undef FREE_PBUFS 560 561 mutex_exit(&mmd->mmd_pd_slab_lock); 562 return (n_bp); 563 } 564 565 /* 566 * Given a Multidata message block, return the Multidata metadata handle. 567 */ 568 multidata_t * 569 mmd_getmultidata(mblk_t *mp) 570 { 571 multidata_t *mmd; 572 573 ASSERT(mp != NULL); 574 575 if (DB_TYPE(mp) != M_MULTIDATA) 576 return (NULL); 577 578 mmd = (multidata_t *)mp->b_rptr; 579 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 580 581 return (mmd); 582 } 583 584 /* 585 * Return the start and end addresses of the associated buffer(s). 586 */ 587 void 588 mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi) 589 { 590 int i; 591 592 ASSERT(mmd != NULL); 593 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 594 ASSERT(mbi != NULL); 595 596 bzero((void *)mbi, sizeof (mbufinfo_t)); 597 598 if (mmd->mmd_hbuf != NULL) { 599 mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr; 600 mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr; 601 } 602 603 mutex_enter(&mmd->mmd_pd_slab_lock); 604 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 605 ASSERT(mmd->mmd_pbuf[i] != NULL); 606 mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr; 607 mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr; 608 609 } 610 mbi->pbuf_cnt = mmd->mmd_pbuf_cnt; 611 mutex_exit(&mmd->mmd_pd_slab_lock); 612 } 613 614 /* 615 * Return the Multidata statistics. 616 */ 617 uint_t 618 mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref) 619 { 620 uint_t pd_cnt; 621 622 ASSERT(mmd != NULL); 623 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 624 625 mutex_enter(&(mmd->mmd_pd_slab_lock)); 626 if (hbuf_ref != NULL) 627 *hbuf_ref = mmd->mmd_hbuf_ref; 628 if (pbuf_ref != NULL) 629 *pbuf_ref = mmd->mmd_pbuf_ref; 630 pd_cnt = mmd->mmd_pd_cnt; 631 mutex_exit(&(mmd->mmd_pd_slab_lock)); 632 633 return (pd_cnt); 634 } 635 636 #define HBUF_REF_VALID(mmd, pdi) \ 637 ((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL && \ 638 (pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL && \ 639 (pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base && \ 640 (pdi)->hdr_wptr >= (pdi)->hdr_rptr && \ 641 (pdi)->hdr_base <= (pdi)->hdr_rptr && \ 642 (pdi)->hdr_lim >= (pdi)->hdr_wptr && \ 643 (pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr && \ 644 MBLKIN((mmd)->mmd_hbuf, \ 645 (pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr), \ 646 PDESC_HDRSIZE(pdi))) 647 648 /* 649 * Bounds check payload area(s). 650 */ 651 static boolean_t 652 pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi) 653 { 654 int i = 0, idx; 655 boolean_t valid = B_TRUE; 656 struct pld_ary_s *pa; 657 658 mutex_enter(&mmd->mmd_pd_slab_lock); 659 if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) { 660 mutex_exit(&mmd->mmd_pd_slab_lock); 661 return (B_FALSE); 662 } 663 664 pa = &pdi->pld_ary[0]; 665 while (valid && i < pdi->pld_cnt) { 666 valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) && 667 pa->pld_rptr != NULL && pa->pld_wptr != NULL && 668 pa->pld_wptr >= pa->pld_rptr && 669 pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr && 670 MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr - 671 mmd->mmd_pbuf[idx]->b_rptr), 672 PDESC_PLD_SPAN_SIZE(pdi, i))); 673 674 if (!valid) { 675 MMD_DEBUG((CE_WARN, 676 "pbuf_ref_valid: pdi 0x%p pld out of bound; " 677 "index %d has pld_cnt %d pbuf_idx %d " 678 "(mmd_pbuf_cnt %d), " 679 "pld_rptr 0x%p pld_wptr 0x%p len %d " 680 "(valid 0x%p-0x%p len %d)\n", (void *)pdi, 681 i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt, 682 (void *)pa->pld_rptr, 683 (void *)pa->pld_wptr, 684 (int)PDESC_PLD_SPAN_SIZE(pdi, i), 685 (void *)mmd->mmd_pbuf[idx]->b_rptr, 686 (void *)mmd->mmd_pbuf[idx]->b_wptr, 687 (int)MBLKL(mmd->mmd_pbuf[idx]))); 688 } 689 690 /* advance to next entry */ 691 i++; 692 pa++; 693 } 694 695 mutex_exit(&mmd->mmd_pd_slab_lock); 696 return (valid); 697 } 698 699 /* 700 * Add a packet descriptor to the Multidata. 701 */ 702 pdesc_t * 703 mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) 704 { 705 ASSERT(mmd != NULL); 706 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 707 ASSERT(pdi != NULL); 708 ASSERT(pdi->flags & PDESC_HAS_REF); 709 710 /* do the references refer to invalid memory regions? */ 711 if (!mmd_speed_over_safety && 712 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || 713 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) { 714 if (err != NULL) 715 *err = EINVAL; 716 return (NULL); 717 } 718 719 return (mmd_addpdesc_int(mmd, pdi, err, kmflags)); 720 } 721 722 /* 723 * Internal routine to add a packet descriptor, called when mmd_addpdesc 724 * or mmd_copy tries to allocate and add a descriptor to a Multidata. 725 */ 726 static pdesc_t * 727 mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) 728 { 729 pdesc_slab_t *slab, *slab_last; 730 pdesc_t *pd; 731 732 ASSERT(pdi->flags & PDESC_HAS_REF); 733 ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi)); 734 ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi)); 735 736 if (err != NULL) 737 *err = 0; 738 739 mutex_enter(&(mmd->mmd_pd_slab_lock)); 740 /* 741 * Is slab list empty or the last-added slab is full? If so, 742 * allocate new slab for the descriptor; otherwise, use the 743 * last-added slab instead. 744 */ 745 slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev); 746 if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) || 747 slab_last->pds_used == slab_last->pds_sz) { 748 slab = kmem_cache_alloc(pd_slab_cache, kmflags); 749 if (slab == NULL) { 750 if (err != NULL) 751 *err = ENOMEM; 752 mutex_exit(&(mmd->mmd_pd_slab_lock)); 753 return (NULL); 754 } 755 slab->pds_mmd = mmd; 756 757 ASSERT(slab->pds_used == 0); 758 ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL); 759 760 /* insert slab at end of list */ 761 insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev); 762 mmd->mmd_slab_cnt++; 763 } else { 764 slab = slab_last; 765 } 766 ASSERT(slab->pds_used < slab->pds_sz); 767 pd = &(slab->pds_free_desc[slab->pds_used++]); 768 ASSERT(pd->pd_magic == PDESC_MAGIC); 769 pd->pd_next = NULL; 770 pd->pd_prev = NULL; 771 pd->pd_slab = slab; 772 pd->pd_pattbl = NULL; 773 774 /* copy over the descriptor info from caller */ 775 PDI_COPY(pdi, &(pd->pd_pdi)); 776 777 if (pd->pd_flags & PDESC_HBUF_REF) 778 mmd->mmd_hbuf_ref++; 779 if (pd->pd_flags & PDESC_PBUF_REF) 780 mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt; 781 mmd->mmd_pd_cnt++; 782 783 /* insert descriptor at end of list */ 784 insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev); 785 mutex_exit(&(mmd->mmd_pd_slab_lock)); 786 787 return (pd); 788 } 789 790 /* 791 * Packet descriptor slab kmem cache constructor routine. 792 */ 793 /* ARGSUSED */ 794 static int 795 pdslab_constructor(void *buf, void *cdrarg, int kmflags) 796 { 797 pdesc_slab_t *slab; 798 uint_t cnt = (uint_t)(uintptr_t)cdrarg; 799 int i; 800 801 ASSERT(cnt > 0); /* slab size can't be zero */ 802 803 slab = (pdesc_slab_t *)buf; 804 slab->pds_next = NULL; 805 slab->pds_prev = NULL; 806 slab->pds_mmd = NULL; 807 slab->pds_used = 0; 808 slab->pds_sz = cnt; 809 810 for (i = 0; i < cnt; i++) { 811 pdesc_t *pd = &(slab->pds_free_desc[i]); 812 pd->pd_magic = PDESC_MAGIC; 813 } 814 return (0); 815 } 816 817 /* 818 * Packet descriptor slab kmem cache destructor routine. 819 */ 820 /* ARGSUSED */ 821 static void 822 pdslab_destructor(void *buf, void *cdrarg) 823 { 824 pdesc_slab_t *slab; 825 826 slab = (pdesc_slab_t *)buf; 827 ASSERT(slab->pds_next == NULL); 828 ASSERT(slab->pds_prev == NULL); 829 ASSERT(slab->pds_mmd == NULL); 830 ASSERT(slab->pds_used == 0); 831 ASSERT(slab->pds_sz > 0); 832 } 833 834 /* 835 * Remove a packet descriptor from the in-use descriptor list, 836 * called by mmd_rempdesc or during free. 837 */ 838 static pdesc_t * 839 mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd) 840 { 841 pdesc_t *pd_next; 842 843 pd_next = Q2PD(pd->pd_next); 844 remque(&(pd->pd_next)); 845 846 /* remove all local attributes */ 847 if (pd->pd_pattbl != NULL) 848 mmd_destroy_pattbl(&(pd->pd_pattbl)); 849 850 /* don't decrease counts for a removed descriptor */ 851 if (!(pd->pd_flags & PDESC_REM_DEFER)) { 852 if (pd->pd_flags & PDESC_HBUF_REF) { 853 ASSERT(mmd->mmd_hbuf_ref > 0); 854 mmd->mmd_hbuf_ref--; 855 } 856 if (pd->pd_flags & PDESC_PBUF_REF) { 857 ASSERT(mmd->mmd_pbuf_ref > 0); 858 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; 859 } 860 ASSERT(mmd->mmd_pd_cnt > 0); 861 mmd->mmd_pd_cnt--; 862 } 863 return (pd_next); 864 } 865 866 /* 867 * Remove a packet descriptor from the Multidata. 868 */ 869 void 870 mmd_rempdesc(pdesc_t *pd) 871 { 872 multidata_t *mmd; 873 874 ASSERT(pd->pd_magic == PDESC_MAGIC); 875 ASSERT(pd->pd_slab != NULL); 876 877 mmd = pd->pd_slab->pds_mmd; 878 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 879 880 mutex_enter(&(mmd->mmd_pd_slab_lock)); 881 /* 882 * We can't deallocate the associated resources if the Multidata 883 * is shared with other threads, because it's possible that the 884 * descriptor handle value is held by those threads. That's why 885 * we simply mark the entry as "removed" and decrement the counts. 886 * If there are no other threads, then we free the descriptor. 887 */ 888 if (mmd->mmd_dp->db_ref > 1) { 889 pd->pd_flags |= PDESC_REM_DEFER; 890 if (pd->pd_flags & PDESC_HBUF_REF) { 891 ASSERT(mmd->mmd_hbuf_ref > 0); 892 mmd->mmd_hbuf_ref--; 893 } 894 if (pd->pd_flags & PDESC_PBUF_REF) { 895 ASSERT(mmd->mmd_pbuf_ref > 0); 896 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; 897 } 898 ASSERT(mmd->mmd_pd_cnt > 0); 899 mmd->mmd_pd_cnt--; 900 } else { 901 (void) mmd_destroy_pdesc(mmd, pd); 902 } 903 mutex_exit(&(mmd->mmd_pd_slab_lock)); 904 } 905 906 /* 907 * A generic routine to traverse the packet descriptor in-use list. 908 */ 909 static pdesc_t * 910 mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw, 911 boolean_t mutex_held) 912 { 913 pdesc_t *pd_head; 914 915 ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd); 916 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 917 ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock))); 918 919 if (!mutex_held) 920 mutex_enter(&(mmd->mmd_pd_slab_lock)); 921 pd_head = Q2PD(&(mmd->mmd_pd_q)); 922 923 if (pd == NULL) { 924 /* 925 * We're called by mmd_get{first,last}pdesc, and so 926 * return either the first or last list element. 927 */ 928 pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) : 929 Q2PD(mmd->mmd_pd_q.ql_prev); 930 } else { 931 /* 932 * We're called by mmd_get{next,prev}pdesc, and so 933 * return either the next or previous list element. 934 */ 935 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); 936 } 937 938 while (pd != pd_head) { 939 /* skip element if it has been removed */ 940 if (!(pd->pd_flags & PDESC_REM_DEFER)) 941 break; 942 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); 943 } 944 if (!mutex_held) 945 mutex_exit(&(mmd->mmd_pd_slab_lock)); 946 947 /* return NULL if we're back at the beginning */ 948 if (pd == pd_head) 949 pd = NULL; 950 951 /* got an entry; copy descriptor info to caller */ 952 if (pd != NULL && pdi != NULL) 953 PDI_COPY(&(pd->pd_pdi), pdi); 954 955 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); 956 return (pd); 957 958 } 959 960 /* 961 * Return the first packet descriptor in the in-use list. 962 */ 963 pdesc_t * 964 mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi) 965 { 966 return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE)); 967 } 968 969 /* 970 * Return the last packet descriptor in the in-use list. 971 */ 972 pdesc_t * 973 mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi) 974 { 975 return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE)); 976 } 977 978 /* 979 * Return the next packet descriptor in the in-use list. 980 */ 981 pdesc_t * 982 mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi) 983 { 984 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE)); 985 } 986 987 /* 988 * Return the previous packet descriptor in the in-use list. 989 */ 990 pdesc_t * 991 mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi) 992 { 993 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE)); 994 } 995 996 /* 997 * Check to see if pdi stretches over c_pdi; used to ensure that a packet 998 * descriptor's header and payload span may not be extended beyond the 999 * current boundaries. 1000 */ 1001 static boolean_t 1002 pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi) 1003 { 1004 int i; 1005 struct pld_ary_s *pa = &pdi->pld_ary[0]; 1006 struct pld_ary_s *c_pa = &c_pdi->pld_ary[0]; 1007 1008 if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim) 1009 return (B_FALSE); 1010 1011 /* 1012 * We don't allow the number of span to be reduced, for the sake 1013 * of simplicity. Instead, we provide PDESC_PLD_SPAN_CLEAR() to 1014 * clear a packet descriptor. Note that we allow the span count to 1015 * be increased, and the bounds check for the new one happens 1016 * in pbuf_ref_valid. 1017 */ 1018 if (pdi->pld_cnt < c_pdi->pld_cnt) 1019 return (B_FALSE); 1020 1021 /* compare only those which are currently defined */ 1022 for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) { 1023 if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx || 1024 pa->pld_rptr < c_pa->pld_rptr || 1025 pa->pld_wptr > c_pa->pld_wptr) 1026 return (B_FALSE); 1027 } 1028 return (B_TRUE); 1029 } 1030 1031 /* 1032 * Modify the layout of a packet descriptor. 1033 */ 1034 pdesc_t * 1035 mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi) 1036 { 1037 multidata_t *mmd; 1038 pdescinfo_t *c_pdi; 1039 1040 ASSERT(pd != NULL); 1041 ASSERT(pdi != NULL); 1042 ASSERT(pd->pd_magic == PDESC_MAGIC); 1043 1044 mmd = pd->pd_slab->pds_mmd; 1045 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1046 1047 /* entry has been removed */ 1048 if (pd->pd_flags & PDESC_REM_DEFER) 1049 return (NULL); 1050 1051 /* caller doesn't intend to specify any buffer reference? */ 1052 if (!(pdi->flags & PDESC_HAS_REF)) 1053 return (NULL); 1054 1055 /* do the references refer to invalid memory regions? */ 1056 if (!mmd_speed_over_safety && 1057 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || 1058 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) 1059 return (NULL); 1060 1061 /* they're not subsets of current references? */ 1062 c_pdi = &(pd->pd_pdi); 1063 if (!pdi_in_range(pdi, c_pdi)) 1064 return (NULL); 1065 1066 /* copy over the descriptor info from caller */ 1067 PDI_COPY(pdi, c_pdi); 1068 1069 return (pd); 1070 } 1071 1072 /* 1073 * Copy the contents of a packet descriptor into a new buffer. If the 1074 * descriptor points to more than one buffer fragments, the contents 1075 * of both fragments will be joined, with the header buffer fragment 1076 * preceding the payload buffer fragment(s). 1077 */ 1078 mblk_t * 1079 mmd_transform(pdesc_t *pd) 1080 { 1081 multidata_t *mmd; 1082 pdescinfo_t *pdi; 1083 mblk_t *mp; 1084 int h_size = 0, p_size = 0; 1085 int i, len; 1086 1087 ASSERT(pd != NULL); 1088 ASSERT(pd->pd_magic == PDESC_MAGIC); 1089 1090 mmd = pd->pd_slab->pds_mmd; 1091 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1092 1093 /* entry has been removed */ 1094 if (pd->pd_flags & PDESC_REM_DEFER) 1095 return (NULL); 1096 1097 mutex_enter(&mmd->mmd_pd_slab_lock); 1098 pdi = &(pd->pd_pdi); 1099 if (pdi->flags & PDESC_HBUF_REF) 1100 h_size = PDESC_HDRL(pdi); 1101 if (pdi->flags & PDESC_PBUF_REF) { 1102 for (i = 0; i < pdi->pld_cnt; i++) 1103 p_size += PDESC_PLD_SPAN_SIZE(pdi, i); 1104 } 1105 1106 /* allocate space large enough to hold the fragment(s) */ 1107 ASSERT(h_size + p_size >= 0); 1108 if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) { 1109 mutex_exit(&mmd->mmd_pd_slab_lock); 1110 return (NULL); 1111 } 1112 1113 /* copy over the header fragment */ 1114 if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) { 1115 bcopy(pdi->hdr_rptr, mp->b_wptr, h_size); 1116 mp->b_wptr += h_size; 1117 } 1118 1119 /* copy over the payload fragment */ 1120 if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) { 1121 for (i = 0; i < pdi->pld_cnt; i++) { 1122 len = PDESC_PLD_SPAN_SIZE(pdi, i); 1123 if (len > 0) { 1124 bcopy(pdi->pld_ary[i].pld_rptr, 1125 mp->b_wptr, len); 1126 mp->b_wptr += len; 1127 } 1128 } 1129 } 1130 1131 mutex_exit(&mmd->mmd_pd_slab_lock); 1132 return (mp); 1133 } 1134 1135 /* 1136 * Return a chain of mblks representing the Multidata packet. 1137 */ 1138 mblk_t * 1139 mmd_transform_link(pdesc_t *pd) 1140 { 1141 multidata_t *mmd; 1142 pdescinfo_t *pdi; 1143 mblk_t *nmp = NULL; 1144 1145 ASSERT(pd != NULL); 1146 ASSERT(pd->pd_magic == PDESC_MAGIC); 1147 1148 mmd = pd->pd_slab->pds_mmd; 1149 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1150 1151 /* entry has been removed */ 1152 if (pd->pd_flags & PDESC_REM_DEFER) 1153 return (NULL); 1154 1155 pdi = &(pd->pd_pdi); 1156 1157 /* duplicate header buffer */ 1158 if ((pdi->flags & PDESC_HBUF_REF)) { 1159 if ((nmp = dupb(mmd->mmd_hbuf)) == NULL) 1160 return (NULL); 1161 nmp->b_rptr = pdi->hdr_rptr; 1162 nmp->b_wptr = pdi->hdr_wptr; 1163 } 1164 1165 /* duplicate payload buffer(s) */ 1166 if (pdi->flags & PDESC_PBUF_REF) { 1167 int i; 1168 mblk_t *mp; 1169 struct pld_ary_s *pa = &pdi->pld_ary[0]; 1170 1171 mutex_enter(&mmd->mmd_pd_slab_lock); 1172 for (i = 0; i < pdi->pld_cnt; i++, pa++) { 1173 ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL); 1174 1175 /* skip empty ones */ 1176 if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0) 1177 continue; 1178 1179 mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]); 1180 if (mp == NULL) { 1181 if (nmp != NULL) 1182 freemsg(nmp); 1183 mutex_exit(&mmd->mmd_pd_slab_lock); 1184 return (NULL); 1185 } 1186 mp->b_rptr = pa->pld_rptr; 1187 mp->b_wptr = pa->pld_wptr; 1188 if (nmp == NULL) 1189 nmp = mp; 1190 else 1191 linkb(nmp, mp); 1192 } 1193 mutex_exit(&mmd->mmd_pd_slab_lock); 1194 } 1195 1196 return (nmp); 1197 } 1198 1199 /* 1200 * Return duplicate message block(s) of the associated buffer(s). 1201 */ 1202 int 1203 mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp) 1204 { 1205 ASSERT(mmd != NULL); 1206 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1207 1208 if (hmp != NULL) { 1209 *hmp = NULL; 1210 if (mmd->mmd_hbuf != NULL && 1211 (*hmp = dupb(mmd->mmd_hbuf)) == NULL) 1212 return (-1); 1213 } 1214 1215 if (pmp != NULL) { 1216 int i; 1217 mblk_t *mp; 1218 1219 mutex_enter(&mmd->mmd_pd_slab_lock); 1220 *pmp = NULL; 1221 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 1222 ASSERT(mmd->mmd_pbuf[i] != NULL); 1223 mp = dupb(mmd->mmd_pbuf[i]); 1224 if (mp == NULL) { 1225 if (hmp != NULL && *hmp != NULL) 1226 freeb(*hmp); 1227 if (*pmp != NULL) 1228 freemsg(*pmp); 1229 mutex_exit(&mmd->mmd_pd_slab_lock); 1230 return (-1); 1231 } 1232 if (*pmp == NULL) 1233 *pmp = mp; 1234 else 1235 linkb(*pmp, mp); 1236 } 1237 mutex_exit(&mmd->mmd_pd_slab_lock); 1238 } 1239 1240 return (0); 1241 } 1242 1243 /* 1244 * Return the layout of a packet descriptor. 1245 */ 1246 int 1247 mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi) 1248 { 1249 ASSERT(pd != NULL); 1250 ASSERT(pd->pd_magic == PDESC_MAGIC); 1251 ASSERT(pd->pd_slab != NULL); 1252 ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC); 1253 ASSERT(pdi != NULL); 1254 1255 /* entry has been removed */ 1256 if (pd->pd_flags & PDESC_REM_DEFER) 1257 return (-1); 1258 1259 /* copy descriptor info to caller */ 1260 PDI_COPY(&(pd->pd_pdi), pdi); 1261 1262 return (0); 1263 } 1264 1265 /* 1266 * Add a global or local attribute to a Multidata. Global attribute 1267 * association is specified by a NULL packet descriptor. 1268 */ 1269 pattr_t * 1270 mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai, 1271 boolean_t persistent, int kmflags) 1272 { 1273 patbkt_t **tbl_p; 1274 patbkt_t *tbl, *o_tbl; 1275 patbkt_t *bkt; 1276 pattr_t *pa; 1277 uint_t size; 1278 1279 ASSERT(mmd != NULL); 1280 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1281 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); 1282 ASSERT(pai != NULL); 1283 1284 /* pointer to the attribute hash table (local or global) */ 1285 tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl); 1286 1287 /* 1288 * See if the hash table has not yet been created; if so, 1289 * we create the table and store its address atomically. 1290 */ 1291 if ((tbl = *tbl_p) == NULL) { 1292 tbl = kmem_cache_alloc(pattbl_cache, kmflags); 1293 if (tbl == NULL) 1294 return (NULL); 1295 1296 /* if someone got there first, use his table instead */ 1297 if ((o_tbl = casptr(tbl_p, NULL, tbl)) != NULL) { 1298 kmem_cache_free(pattbl_cache, tbl); 1299 tbl = o_tbl; 1300 } 1301 } 1302 1303 ASSERT(tbl->pbkt_tbl_sz > 0); 1304 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); 1305 1306 /* attribute of the same type already exists? */ 1307 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) 1308 return (NULL); 1309 1310 size = sizeof (*pa) + pai->len; 1311 if ((pa = kmem_zalloc(size, kmflags)) == NULL) 1312 return (NULL); 1313 1314 pa->pat_magic = PATTR_MAGIC; 1315 pa->pat_lock = &(bkt->pbkt_lock); 1316 pa->pat_mmd = mmd; 1317 pa->pat_buflen = size; 1318 pa->pat_type = pai->type; 1319 pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL; 1320 1321 if (persistent) 1322 pa->pat_flags = PATTR_PERSIST; 1323 1324 /* insert attribute at end of hash chain */ 1325 mutex_enter(&(bkt->pbkt_lock)); 1326 insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev); 1327 mutex_exit(&(bkt->pbkt_lock)); 1328 1329 return (pa); 1330 } 1331 1332 /* 1333 * Attribute hash table kmem cache constructor routine. 1334 */ 1335 /* ARGSUSED */ 1336 static int 1337 pattbl_constructor(void *buf, void *cdrarg, int kmflags) 1338 { 1339 patbkt_t *bkt; 1340 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; 1341 uint_t i; 1342 1343 ASSERT(tbl_sz > 0); /* table size can't be zero */ 1344 1345 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { 1346 mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL); 1347 QL_INIT(&(bkt->pbkt_pattr_q)); 1348 1349 /* first bucket contains the table size */ 1350 bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0; 1351 } 1352 return (0); 1353 } 1354 1355 /* 1356 * Attribute hash table kmem cache destructor routine. 1357 */ 1358 /* ARGSUSED */ 1359 static void 1360 pattbl_destructor(void *buf, void *cdrarg) 1361 { 1362 patbkt_t *bkt; 1363 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; 1364 uint_t i; 1365 1366 ASSERT(tbl_sz > 0); /* table size can't be zero */ 1367 1368 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { 1369 mutex_destroy(&(bkt->pbkt_lock)); 1370 ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q)); 1371 ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz); 1372 } 1373 } 1374 1375 /* 1376 * Destroy an attribute hash table, called by mmd_rempdesc or during free. 1377 */ 1378 static void 1379 mmd_destroy_pattbl(patbkt_t **tbl) 1380 { 1381 patbkt_t *bkt; 1382 pattr_t *pa, *pa_next; 1383 uint_t i, tbl_sz; 1384 1385 ASSERT(tbl != NULL); 1386 bkt = *tbl; 1387 tbl_sz = bkt->pbkt_tbl_sz; 1388 1389 /* make sure caller passes in the first bucket */ 1390 ASSERT(tbl_sz > 0); 1391 1392 /* destroy the contents of each bucket */ 1393 for (i = 0; i < tbl_sz; i++, bkt++) { 1394 /* we ought to be exclusive at this point */ 1395 ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock))); 1396 1397 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1398 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { 1399 ASSERT(pa->pat_magic == PATTR_MAGIC); 1400 pa_next = Q2PATTR(pa->pat_next); 1401 remque(&(pa->pat_next)); 1402 kmem_free(pa, pa->pat_buflen); 1403 pa = pa_next; 1404 } 1405 } 1406 1407 kmem_cache_free(pattbl_cache, *tbl); 1408 *tbl = NULL; 1409 1410 /* commit all previous stores */ 1411 membar_producer(); 1412 } 1413 1414 /* 1415 * Copy the contents of an attribute hash table, called by mmd_copy. 1416 */ 1417 static int 1418 mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd, 1419 int kmflags) 1420 { 1421 patbkt_t *bkt; 1422 pattr_t *pa; 1423 pattrinfo_t pai; 1424 uint_t i, tbl_sz; 1425 1426 ASSERT(src_tbl != NULL); 1427 bkt = src_tbl; 1428 tbl_sz = bkt->pbkt_tbl_sz; 1429 1430 /* make sure caller passes in the first bucket */ 1431 ASSERT(tbl_sz > 0); 1432 1433 for (i = 0; i < tbl_sz; i++, bkt++) { 1434 mutex_enter(&(bkt->pbkt_lock)); 1435 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1436 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { 1437 pattr_t *pa_next = Q2PATTR(pa->pat_next); 1438 1439 /* skip if it's removed */ 1440 if (pa->pat_flags & PATTR_REM_DEFER) { 1441 pa = pa_next; 1442 continue; 1443 } 1444 1445 pai.type = pa->pat_type; 1446 pai.len = pa->pat_buflen - sizeof (*pa); 1447 if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags & 1448 PATTR_PERSIST) != 0, kmflags) == NULL) { 1449 mutex_exit(&(bkt->pbkt_lock)); 1450 return (-1); 1451 } 1452 1453 /* copy over the contents */ 1454 if (pai.buf != NULL) 1455 bcopy(pa + 1, pai.buf, pai.len); 1456 1457 pa = pa_next; 1458 } 1459 mutex_exit(&(bkt->pbkt_lock)); 1460 } 1461 1462 return (0); 1463 } 1464 1465 /* 1466 * Search for an attribute type within an attribute hash bucket. 1467 */ 1468 static pattr_t * 1469 mmd_find_pattr(patbkt_t *bkt, uint_t type) 1470 { 1471 pattr_t *pa_head, *pa; 1472 1473 mutex_enter(&(bkt->pbkt_lock)); 1474 pa_head = Q2PATTR(&(bkt->pbkt_pattr_q)); 1475 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1476 1477 while (pa != pa_head) { 1478 ASSERT(pa->pat_magic == PATTR_MAGIC); 1479 1480 /* return a match; we treat removed entry as non-existent */ 1481 if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER)) 1482 break; 1483 pa = Q2PATTR(pa->pat_next); 1484 } 1485 mutex_exit(&(bkt->pbkt_lock)); 1486 1487 return (pa == pa_head ? NULL : pa); 1488 } 1489 1490 /* 1491 * Remove an attribute from a Multidata. 1492 */ 1493 void 1494 mmd_rempattr(pattr_t *pa) 1495 { 1496 kmutex_t *pat_lock = pa->pat_lock; 1497 1498 ASSERT(pa->pat_magic == PATTR_MAGIC); 1499 1500 /* ignore if attribute was marked as persistent */ 1501 if ((pa->pat_flags & PATTR_PERSIST) != 0) 1502 return; 1503 1504 mutex_enter(pat_lock); 1505 /* 1506 * We can't deallocate the associated resources if the Multidata 1507 * is shared with other threads, because it's possible that the 1508 * attribute handle value is held by those threads. That's why 1509 * we simply mark the entry as "removed". If there are no other 1510 * threads, then we free the attribute. 1511 */ 1512 if (pa->pat_mmd->mmd_dp->db_ref > 1) { 1513 pa->pat_flags |= PATTR_REM_DEFER; 1514 } else { 1515 remque(&(pa->pat_next)); 1516 kmem_free(pa, pa->pat_buflen); 1517 } 1518 mutex_exit(pat_lock); 1519 } 1520 1521 /* 1522 * Find an attribute (according to its type) and return its handle. 1523 */ 1524 pattr_t * 1525 mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai) 1526 { 1527 patbkt_t *tbl, *bkt; 1528 pattr_t *pa; 1529 1530 ASSERT(mmd != NULL); 1531 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1532 ASSERT(pai != NULL); 1533 1534 /* get the right attribute hash table (local or global) */ 1535 tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl; 1536 1537 /* attribute hash table doesn't exist? */ 1538 if (tbl == NULL) 1539 return (NULL); 1540 1541 ASSERT(tbl->pbkt_tbl_sz > 0); 1542 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); 1543 1544 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) { 1545 ASSERT(pa->pat_buflen >= sizeof (*pa)); 1546 pai->len = pa->pat_buflen - sizeof (*pa); 1547 pai->buf = pai->len > 0 ? 1548 (uchar_t *)pa + sizeof (pattr_t) : NULL; 1549 } 1550 ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC); 1551 return (pa); 1552 } 1553 1554 /* 1555 * Return total size of buffers and total size of areas referenced 1556 * by all in-use (unremoved) packet descriptors. 1557 */ 1558 void 1559 mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse) 1560 { 1561 pdesc_t *pd; 1562 pdescinfo_t *pdi; 1563 int i; 1564 1565 ASSERT(mmd != NULL); 1566 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1567 1568 mutex_enter(&mmd->mmd_pd_slab_lock); 1569 if (ptotal != NULL) { 1570 *ptotal = 0; 1571 1572 if (mmd->mmd_hbuf != NULL) 1573 *ptotal += MBLKL(mmd->mmd_hbuf); 1574 1575 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 1576 ASSERT(mmd->mmd_pbuf[i] != NULL); 1577 *ptotal += MBLKL(mmd->mmd_pbuf[i]); 1578 } 1579 } 1580 if (pinuse != NULL) { 1581 *pinuse = 0; 1582 1583 /* first pdesc */ 1584 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); 1585 while (pd != NULL) { 1586 pdi = &pd->pd_pdi; 1587 1588 /* next pdesc */ 1589 pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE); 1590 1591 /* skip over removed descriptor */ 1592 if (pdi->flags & PDESC_REM_DEFER) 1593 continue; 1594 1595 if (pdi->flags & PDESC_HBUF_REF) 1596 *pinuse += PDESC_HDRL(pdi); 1597 1598 if (pdi->flags & PDESC_PBUF_REF) { 1599 for (i = 0; i < pdi->pld_cnt; i++) 1600 *pinuse += PDESC_PLDL(pdi, i); 1601 } 1602 } 1603 } 1604 mutex_exit(&mmd->mmd_pd_slab_lock); 1605 } 1606