1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Multidata, as described in the following papers: 31 * 32 * Adi Masputra, 33 * Multidata V.2: VA-Disjoint Packet Extents Framework Interface 34 * Design Specification. August 2004. 35 * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf. 36 * 37 * Adi Masputra, 38 * Multidata Interface Design Specification. Sep 2002. 39 * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf. 40 * 41 * Adi Masputra, Frank DiMambro, Kacheong Poon, 42 * An Efficient Networking Transmit Mechanism for Solaris: 43 * Multidata Transmit (MDT). May 2002. 44 * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf. 45 */ 46 47 #include <sys/types.h> 48 #include <sys/stream.h> 49 #include <sys/dlpi.h> 50 #include <sys/stropts.h> 51 #include <sys/strsun.h> 52 #include <sys/strlog.h> 53 #include <sys/strsubr.h> 54 #include <sys/sysmacros.h> 55 #include <sys/cmn_err.h> 56 #include <sys/debug.h> 57 #include <sys/kmem.h> 58 #include <sys/atomic.h> 59 60 #include <sys/multidata.h> 61 #include <sys/multidata_impl.h> 62 63 extern mblk_t *desballoc(unsigned char *, size_t, uint_t, frtn_t *); 64 65 static int mmd_constructor(void *, void *, int); 66 static void mmd_destructor(void *, void *); 67 static int pdslab_constructor(void *, void *, int); 68 static void pdslab_destructor(void *, void *); 69 static int pattbl_constructor(void *, void *, int); 70 static void pattbl_destructor(void *, void *); 71 static void mmd_esballoc_free(caddr_t); 72 static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int); 73 74 static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *); 75 #pragma inline(pbuf_ref_valid) 76 77 static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *); 78 #pragma inline(pdi_in_range) 79 80 static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int); 81 #pragma inline(mmd_addpdesc_int) 82 83 static void mmd_destroy_pattbl(patbkt_t **); 84 #pragma inline(mmd_destroy_pattbl) 85 86 static pattr_t *mmd_find_pattr(patbkt_t *, uint_t); 87 #pragma inline(mmd_find_pattr) 88 89 static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *); 90 #pragma inline(mmd_destroy_pdesc) 91 92 static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t, 93 boolean_t); 94 #pragma inline(mmd_getpdesc) 95 96 static struct kmem_cache *mmd_cache; 97 static struct kmem_cache *pd_slab_cache; 98 static struct kmem_cache *pattbl_cache; 99 100 int mmd_debug = 1; 101 #define MMD_DEBUG(s) if (mmd_debug > 0) cmn_err s 102 103 /* 104 * Set to this to true to bypass pdesc bounds checking. 105 */ 106 boolean_t mmd_speed_over_safety = B_FALSE; 107 108 /* 109 * Patchable kmem_cache flags. 110 */ 111 int mmd_kmem_flags = 0; 112 int pdslab_kmem_flags = 0; 113 int pattbl_kmem_flags = 0; 114 115 /* 116 * Alignment (in bytes) of our kmem caches. 117 */ 118 #define MULTIDATA_CACHE_ALIGN 64 119 120 /* 121 * Default number of packet descriptors per descriptor slab. Making 122 * this too small will trigger more descriptor slab allocation; making 123 * it too large will create too many unclaimed descriptors. 124 */ 125 #define PDSLAB_SZ 15 126 uint_t pdslab_sz = PDSLAB_SZ; 127 128 /* 129 * Default attribute hash table size. It's okay to set this to a small 130 * value (even to 1) because there aren't that many attributes currently 131 * defined, and because we assume there won't be many attributes associated 132 * with a Multidata at a given time. Increasing the size will reduce 133 * attribute search time (given a large number of attributes in a Multidata), 134 * and decreasing it will reduce the memory footprints and the overhead 135 * associated with managing the table. 136 */ 137 #define PATTBL_SZ 1 138 uint_t pattbl_sz = PATTBL_SZ; 139 140 /* 141 * Attribute hash key. 142 */ 143 #define PATTBL_HASH(x, sz) ((x) % (sz)) 144 145 /* 146 * Structure that precedes each Multidata metadata. 147 */ 148 struct mmd_buf_info { 149 frtn_t frp; /* free routine */ 150 uint_t buf_len; /* length of kmem buffer */ 151 }; 152 153 /* 154 * The size of each metadata buffer. 155 */ 156 #define MMD_CACHE_SIZE \ 157 (sizeof (struct mmd_buf_info) + sizeof (multidata_t)) 158 159 /* 160 * Called during startup in order to create the Multidata kmem caches. 161 */ 162 void 163 mmd_init(void) 164 { 165 pdslab_sz = MAX(1, pdslab_sz); /* at least 1 descriptor */ 166 pattbl_sz = MAX(1, pattbl_sz); /* at least 1 bucket */ 167 168 mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE, 169 MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor, 170 NULL, NULL, NULL, mmd_kmem_flags); 171 172 pd_slab_cache = kmem_cache_create("multidata_pdslab", 173 PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN, 174 pdslab_constructor, pdslab_destructor, NULL, 175 (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags); 176 177 pattbl_cache = kmem_cache_create("multidata_pattbl", 178 sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN, 179 pattbl_constructor, pattbl_destructor, NULL, 180 (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags); 181 } 182 183 /* 184 * Create a Multidata message block. 185 */ 186 multidata_t * 187 mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags) 188 { 189 uchar_t *buf; 190 multidata_t *mmd; 191 uint_t mmd_mplen; 192 struct mmd_buf_info *buf_info; 193 194 ASSERT(hdr_mp != NULL); 195 ASSERT(mmd_mp != NULL); 196 197 /* 198 * Caller should never pass in a chain of mblks since we 199 * only care about the first one, hence the assertions. 200 */ 201 ASSERT(hdr_mp->b_cont == NULL); 202 203 if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL) 204 return (NULL); 205 206 buf_info = (struct mmd_buf_info *)buf; 207 buf_info->frp.free_arg = (caddr_t)buf; 208 209 mmd = (multidata_t *)(buf_info + 1); 210 mmd_mplen = sizeof (*mmd); 211 212 if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI, 213 &(buf_info->frp))) == NULL) { 214 kmem_cache_free(mmd_cache, buf); 215 return (NULL); 216 } 217 218 DB_TYPE(*mmd_mp) = M_MULTIDATA; 219 (*mmd_mp)->b_wptr += mmd_mplen; 220 mmd->mmd_dp = (*mmd_mp)->b_datap; 221 mmd->mmd_hbuf = hdr_mp; 222 223 return (mmd); 224 } 225 226 /* 227 * Associate additional payload buffer to the Multidata. 228 */ 229 int 230 mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp) 231 { 232 int i; 233 234 ASSERT(mmd != NULL); 235 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 236 ASSERT(pld_mp != NULL); 237 238 mutex_enter(&mmd->mmd_pd_slab_lock); 239 for (i = 0; i < MULTIDATA_MAX_PBUFS && 240 mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) { 241 if (mmd->mmd_pbuf[i] == pld_mp) { 242 /* duplicate entry */ 243 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding " 244 "pld 0x%p to mmd 0x%p since it has been " 245 "previously added into slot %d (total %d)\n", 246 (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt)); 247 mutex_exit(&mmd->mmd_pd_slab_lock); 248 return (-1); 249 } else if (mmd->mmd_pbuf[i] == NULL) { 250 mmd->mmd_pbuf[i] = pld_mp; 251 mmd->mmd_pbuf_cnt++; 252 mutex_exit(&mmd->mmd_pd_slab_lock); 253 return (i); 254 } 255 } 256 257 /* all slots are taken */ 258 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p " 259 "since no slot space is left (total %d max %d)\n", (void *)pld_mp, 260 (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS)); 261 mutex_exit(&mmd->mmd_pd_slab_lock); 262 263 return (-1); 264 } 265 266 /* 267 * Multidata metadata kmem cache constructor routine. 268 */ 269 /* ARGSUSED */ 270 static int 271 mmd_constructor(void *buf, void *cdrarg, int kmflags) 272 { 273 struct mmd_buf_info *buf_info; 274 multidata_t *mmd; 275 276 bzero((void *)buf, MMD_CACHE_SIZE); 277 278 buf_info = (struct mmd_buf_info *)buf; 279 buf_info->frp.free_func = mmd_esballoc_free; 280 buf_info->buf_len = MMD_CACHE_SIZE; 281 282 mmd = (multidata_t *)(buf_info + 1); 283 mmd->mmd_magic = MULTIDATA_MAGIC; 284 285 mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL); 286 QL_INIT(&(mmd->mmd_pd_slab_q)); 287 QL_INIT(&(mmd->mmd_pd_q)); 288 289 return (0); 290 } 291 292 /* 293 * Multidata metadata kmem cache destructor routine. 294 */ 295 /* ARGSUSED */ 296 static void 297 mmd_destructor(void *buf, void *cdrarg) 298 { 299 multidata_t *mmd; 300 #ifdef DEBUG 301 int i; 302 #endif 303 304 mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info)); 305 306 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 307 ASSERT(mmd->mmd_dp == NULL); 308 ASSERT(mmd->mmd_hbuf == NULL); 309 ASSERT(mmd->mmd_pbuf_cnt == 0); 310 #ifdef DEBUG 311 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) 312 ASSERT(mmd->mmd_pbuf[i] == NULL); 313 #endif 314 ASSERT(mmd->mmd_pattbl == NULL); 315 316 mutex_destroy(&(mmd->mmd_pd_slab_lock)); 317 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); 318 ASSERT(mmd->mmd_slab_cnt == 0); 319 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); 320 ASSERT(mmd->mmd_pd_cnt == 0); 321 ASSERT(mmd->mmd_hbuf_ref == 0); 322 ASSERT(mmd->mmd_pbuf_ref == 0); 323 } 324 325 /* 326 * Multidata message block free callback routine. 327 */ 328 static void 329 mmd_esballoc_free(caddr_t buf) 330 { 331 multidata_t *mmd; 332 pdesc_t *pd; 333 pdesc_slab_t *slab; 334 int i; 335 336 ASSERT(buf != NULL); 337 ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE); 338 339 mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info)); 340 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 341 342 ASSERT(mmd->mmd_dp != NULL); 343 ASSERT(mmd->mmd_dp->db_ref == 1); 344 345 /* remove all packet descriptors and private attributes */ 346 pd = Q2PD(mmd->mmd_pd_q.ql_next); 347 while (pd != Q2PD(&(mmd->mmd_pd_q))) 348 pd = mmd_destroy_pdesc(mmd, pd); 349 350 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q)); 351 ASSERT(mmd->mmd_pd_cnt == 0); 352 ASSERT(mmd->mmd_hbuf_ref == 0); 353 ASSERT(mmd->mmd_pbuf_ref == 0); 354 355 /* remove all global attributes */ 356 if (mmd->mmd_pattbl != NULL) 357 mmd_destroy_pattbl(&(mmd->mmd_pattbl)); 358 359 /* remove all descriptor slabs */ 360 slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next); 361 while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) { 362 pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next); 363 364 remque(&(slab->pds_next)); 365 slab->pds_next = NULL; 366 slab->pds_prev = NULL; 367 slab->pds_mmd = NULL; 368 slab->pds_used = 0; 369 kmem_cache_free(pd_slab_cache, slab); 370 371 ASSERT(mmd->mmd_slab_cnt > 0); 372 mmd->mmd_slab_cnt--; 373 slab = slab_next; 374 } 375 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q)); 376 ASSERT(mmd->mmd_slab_cnt == 0); 377 378 mmd->mmd_dp = NULL; 379 380 /* finally, free all associated message blocks */ 381 if (mmd->mmd_hbuf != NULL) { 382 freeb(mmd->mmd_hbuf); 383 mmd->mmd_hbuf = NULL; 384 } 385 386 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) { 387 if (mmd->mmd_pbuf[i] != NULL) { 388 freeb(mmd->mmd_pbuf[i]); 389 mmd->mmd_pbuf[i] = NULL; 390 ASSERT(mmd->mmd_pbuf_cnt > 0); 391 mmd->mmd_pbuf_cnt--; 392 } 393 } 394 395 ASSERT(mmd->mmd_pbuf_cnt == 0); 396 ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock))); 397 kmem_cache_free(mmd_cache, buf); 398 } 399 400 /* 401 * Multidata message block copy routine, called by copyb() when it 402 * encounters a M_MULTIDATA data block type. This routine should 403 * not be called by anyone other than copyb(), since it may go away 404 * (read: become static to this module) once some sort of copy callback 405 * routine is made available. 406 */ 407 mblk_t * 408 mmd_copy(mblk_t *bp, int kmflags) 409 { 410 multidata_t *mmd, *n_mmd; 411 mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS]; 412 mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1]; 413 mblk_t **pmp; 414 mblk_t *n_bp = NULL; 415 pdesc_t *pd; 416 uint_t n_pbuf_cnt = 0; 417 int idx, i; 418 419 #define FREE_PBUFS() { \ 420 for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++) \ 421 if (*pmp != NULL) freeb(*pmp); \ 422 } 423 424 #define REL_OFF(p, base, n_base) \ 425 ((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base)) 426 427 ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA); 428 mmd = mmd_getmultidata(bp); 429 430 /* copy the header buffer */ 431 if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL) 432 return (NULL); 433 434 /* copy the payload buffer(s) */ 435 mutex_enter(&mmd->mmd_pd_slab_lock); 436 bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS); 437 n_pbuf_cnt = mmd->mmd_pbuf_cnt; 438 for (i = 0; i < n_pbuf_cnt; i++) { 439 ASSERT(mmd->mmd_pbuf[i] != NULL); 440 n_pbuf[i] = copyb(mmd->mmd_pbuf[i]); 441 if (n_pbuf[i] == NULL) { 442 FREE_PBUFS(); 443 mutex_exit(&mmd->mmd_pd_slab_lock); 444 return (NULL); 445 } 446 } 447 448 /* allocate new Multidata */ 449 n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags); 450 if (n_mmd == NULL) { 451 if (n_hbuf != NULL) 452 freeb(n_hbuf); 453 if (n_pbuf_cnt != 0) 454 FREE_PBUFS(); 455 mutex_exit(&mmd->mmd_pd_slab_lock); 456 return (NULL); 457 } 458 459 /* 460 * Add payload buffer(s); upon success, leave n_pbuf array 461 * alone, as the newly-created Multidata had already contained 462 * the mblk pointers stored in the array. These will be freed 463 * along with the Multidata itself. 464 */ 465 for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) { 466 idx = mmd_addpldbuf(n_mmd, *pmp); 467 if (idx < 0) { 468 FREE_PBUFS(); 469 freeb(n_bp); 470 mutex_exit(&mmd->mmd_pd_slab_lock); 471 return (NULL); 472 } 473 } 474 475 /* copy over global attributes */ 476 if (mmd->mmd_pattbl != NULL && 477 mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) { 478 freeb(n_bp); 479 mutex_exit(&mmd->mmd_pd_slab_lock); 480 return (NULL); 481 } 482 483 /* copy over packet descriptors and their atttributes */ 484 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); /* first pdesc */ 485 while (pd != NULL) { 486 pdesc_t *n_pd; 487 pdescinfo_t *pdi, n_pdi; 488 uchar_t *n_base, *base; 489 pdesc_t *pd_next; 490 491 /* next pdesc */ 492 pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL, 493 1, B_TRUE); 494 495 /* skip if already removed */ 496 if (pd->pd_flags & PDESC_REM_DEFER) { 497 pd = pd_next; 498 continue; 499 } 500 501 pdi = &(pd->pd_pdi); 502 bzero(&n_pdi, sizeof (n_pdi)); 503 504 /* 505 * Calculate new descriptor values based on the offset of 506 * each pointer relative to the associated buffer(s). 507 */ 508 ASSERT(pdi->flags & PDESC_HAS_REF); 509 if (pdi->flags & PDESC_HBUF_REF) { 510 n_base = n_mmd->mmd_hbuf->b_rptr; 511 base = mmd->mmd_hbuf->b_rptr; 512 513 n_pdi.flags |= PDESC_HBUF_REF; 514 n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base); 515 n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base); 516 n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base); 517 n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base); 518 } 519 520 if (pdi->flags & PDESC_PBUF_REF) { 521 n_pdi.flags |= PDESC_PBUF_REF; 522 n_pdi.pld_cnt = pdi->pld_cnt; 523 524 for (i = 0; i < pdi->pld_cnt; i++) { 525 idx = pdi->pld_ary[i].pld_pbuf_idx; 526 ASSERT(idx < MULTIDATA_MAX_PBUFS); 527 ASSERT(n_mmd->mmd_pbuf[idx] != NULL); 528 ASSERT(mmd->mmd_pbuf[idx] != NULL); 529 530 n_base = n_mmd->mmd_pbuf[idx]->b_rptr; 531 base = mmd->mmd_pbuf[idx]->b_rptr; 532 533 n_pdi.pld_ary[i].pld_pbuf_idx = idx; 534 535 /* 536 * We can't copy the pointers just like that, 537 * so calculate the relative offset. 538 */ 539 n_pdi.pld_ary[i].pld_rptr = 540 REL_OFF(pdi->pld_ary[i].pld_rptr, 541 base, n_base); 542 n_pdi.pld_ary[i].pld_wptr = 543 REL_OFF(pdi->pld_ary[i].pld_wptr, 544 base, n_base); 545 } 546 } 547 548 /* add the new descriptor to the new Multidata */ 549 n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags); 550 551 if (n_pd == NULL || (pd->pd_pattbl != NULL && 552 mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) { 553 freeb(n_bp); 554 mutex_exit(&mmd->mmd_pd_slab_lock); 555 return (NULL); 556 } 557 558 pd = pd_next; 559 } 560 #undef REL_OFF 561 #undef FREE_PBUFS 562 563 mutex_exit(&mmd->mmd_pd_slab_lock); 564 return (n_bp); 565 } 566 567 /* 568 * Given a Multidata message block, return the Multidata metadata handle. 569 */ 570 multidata_t * 571 mmd_getmultidata(mblk_t *mp) 572 { 573 multidata_t *mmd; 574 575 ASSERT(mp != NULL); 576 577 if (DB_TYPE(mp) != M_MULTIDATA) 578 return (NULL); 579 580 mmd = (multidata_t *)mp->b_rptr; 581 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 582 583 return (mmd); 584 } 585 586 /* 587 * Return the start and end addresses of the associated buffer(s). 588 */ 589 void 590 mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi) 591 { 592 int i; 593 594 ASSERT(mmd != NULL); 595 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 596 ASSERT(mbi != NULL); 597 598 bzero((void *)mbi, sizeof (mbufinfo_t)); 599 600 if (mmd->mmd_hbuf != NULL) { 601 mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr; 602 mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr; 603 } 604 605 mutex_enter(&mmd->mmd_pd_slab_lock); 606 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 607 ASSERT(mmd->mmd_pbuf[i] != NULL); 608 mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr; 609 mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr; 610 611 } 612 mbi->pbuf_cnt = mmd->mmd_pbuf_cnt; 613 mutex_exit(&mmd->mmd_pd_slab_lock); 614 } 615 616 /* 617 * Return the Multidata statistics. 618 */ 619 uint_t 620 mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref) 621 { 622 uint_t pd_cnt; 623 624 ASSERT(mmd != NULL); 625 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 626 627 mutex_enter(&(mmd->mmd_pd_slab_lock)); 628 if (hbuf_ref != NULL) 629 *hbuf_ref = mmd->mmd_hbuf_ref; 630 if (pbuf_ref != NULL) 631 *pbuf_ref = mmd->mmd_pbuf_ref; 632 pd_cnt = mmd->mmd_pd_cnt; 633 mutex_exit(&(mmd->mmd_pd_slab_lock)); 634 635 return (pd_cnt); 636 } 637 638 #define HBUF_REF_VALID(mmd, pdi) \ 639 ((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL && \ 640 (pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL && \ 641 (pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base && \ 642 (pdi)->hdr_wptr >= (pdi)->hdr_rptr && \ 643 (pdi)->hdr_base <= (pdi)->hdr_rptr && \ 644 (pdi)->hdr_lim >= (pdi)->hdr_wptr && \ 645 (pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr && \ 646 MBLKIN((mmd)->mmd_hbuf, \ 647 (pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr), \ 648 PDESC_HDRSIZE(pdi))) 649 650 /* 651 * Bounds check payload area(s). 652 */ 653 static boolean_t 654 pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi) 655 { 656 int i = 0, idx; 657 boolean_t valid = B_TRUE; 658 struct pld_ary_s *pa; 659 660 mutex_enter(&mmd->mmd_pd_slab_lock); 661 if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) { 662 mutex_exit(&mmd->mmd_pd_slab_lock); 663 return (B_FALSE); 664 } 665 666 pa = &pdi->pld_ary[0]; 667 while (valid && i < pdi->pld_cnt) { 668 valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) && 669 pa->pld_rptr != NULL && pa->pld_wptr != NULL && 670 pa->pld_wptr >= pa->pld_rptr && 671 pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr && 672 MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr - 673 mmd->mmd_pbuf[idx]->b_rptr), 674 PDESC_PLD_SPAN_SIZE(pdi, i))); 675 676 if (!valid) { 677 MMD_DEBUG((CE_WARN, 678 "pbuf_ref_valid: pdi 0x%p pld out of bound; " 679 "index %d has pld_cnt %d pbuf_idx %d " 680 "(mmd_pbuf_cnt %d), " 681 "pld_rptr 0x%p pld_wptr 0x%p len %d " 682 "(valid 0x%p-0x%p len %d)\n", (void *)pdi, 683 i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt, 684 (void *)pa->pld_rptr, 685 (void *)pa->pld_wptr, 686 (int)PDESC_PLD_SPAN_SIZE(pdi, i), 687 (void *)mmd->mmd_pbuf[idx]->b_rptr, 688 (void *)mmd->mmd_pbuf[idx]->b_wptr, 689 (int)MBLKL(mmd->mmd_pbuf[idx]))); 690 } 691 692 /* advance to next entry */ 693 i++; 694 pa++; 695 } 696 697 mutex_exit(&mmd->mmd_pd_slab_lock); 698 return (valid); 699 } 700 701 /* 702 * Add a packet descriptor to the Multidata. 703 */ 704 pdesc_t * 705 mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) 706 { 707 ASSERT(mmd != NULL); 708 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 709 ASSERT(pdi != NULL); 710 ASSERT(pdi->flags & PDESC_HAS_REF); 711 712 /* do the references refer to invalid memory regions? */ 713 if (!mmd_speed_over_safety && 714 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || 715 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) { 716 if (err != NULL) 717 *err = EINVAL; 718 return (NULL); 719 } 720 721 return (mmd_addpdesc_int(mmd, pdi, err, kmflags)); 722 } 723 724 /* 725 * Internal routine to add a packet descriptor, called when mmd_addpdesc 726 * or mmd_copy tries to allocate and add a descriptor to a Multidata. 727 */ 728 static pdesc_t * 729 mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags) 730 { 731 pdesc_slab_t *slab, *slab_last; 732 pdesc_t *pd; 733 734 ASSERT(pdi->flags & PDESC_HAS_REF); 735 ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi)); 736 ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi)); 737 738 if (err != NULL) 739 *err = 0; 740 741 mutex_enter(&(mmd->mmd_pd_slab_lock)); 742 /* 743 * Is slab list empty or the last-added slab is full? If so, 744 * allocate new slab for the descriptor; otherwise, use the 745 * last-added slab instead. 746 */ 747 slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev); 748 if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) || 749 slab_last->pds_used == slab_last->pds_sz) { 750 slab = kmem_cache_alloc(pd_slab_cache, kmflags); 751 if (slab == NULL) { 752 if (err != NULL) 753 *err = ENOMEM; 754 mutex_exit(&(mmd->mmd_pd_slab_lock)); 755 return (NULL); 756 } 757 slab->pds_mmd = mmd; 758 759 ASSERT(slab->pds_used == 0); 760 ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL); 761 762 /* insert slab at end of list */ 763 insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev); 764 mmd->mmd_slab_cnt++; 765 } else { 766 slab = slab_last; 767 } 768 ASSERT(slab->pds_used < slab->pds_sz); 769 pd = &(slab->pds_free_desc[slab->pds_used++]); 770 ASSERT(pd->pd_magic == PDESC_MAGIC); 771 pd->pd_next = NULL; 772 pd->pd_prev = NULL; 773 pd->pd_slab = slab; 774 pd->pd_pattbl = NULL; 775 776 /* copy over the descriptor info from caller */ 777 PDI_COPY(pdi, &(pd->pd_pdi)); 778 779 if (pd->pd_flags & PDESC_HBUF_REF) 780 mmd->mmd_hbuf_ref++; 781 if (pd->pd_flags & PDESC_PBUF_REF) 782 mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt; 783 mmd->mmd_pd_cnt++; 784 785 /* insert descriptor at end of list */ 786 insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev); 787 mutex_exit(&(mmd->mmd_pd_slab_lock)); 788 789 return (pd); 790 } 791 792 /* 793 * Packet descriptor slab kmem cache constructor routine. 794 */ 795 /* ARGSUSED */ 796 static int 797 pdslab_constructor(void *buf, void *cdrarg, int kmflags) 798 { 799 pdesc_slab_t *slab; 800 uint_t cnt = (uint_t)(uintptr_t)cdrarg; 801 int i; 802 803 ASSERT(cnt > 0); /* slab size can't be zero */ 804 805 slab = (pdesc_slab_t *)buf; 806 slab->pds_next = NULL; 807 slab->pds_prev = NULL; 808 slab->pds_mmd = NULL; 809 slab->pds_used = 0; 810 slab->pds_sz = cnt; 811 812 for (i = 0; i < cnt; i++) { 813 pdesc_t *pd = &(slab->pds_free_desc[i]); 814 pd->pd_magic = PDESC_MAGIC; 815 } 816 return (0); 817 } 818 819 /* 820 * Packet descriptor slab kmem cache destructor routine. 821 */ 822 /* ARGSUSED */ 823 static void 824 pdslab_destructor(void *buf, void *cdrarg) 825 { 826 pdesc_slab_t *slab; 827 828 slab = (pdesc_slab_t *)buf; 829 ASSERT(slab->pds_next == NULL); 830 ASSERT(slab->pds_prev == NULL); 831 ASSERT(slab->pds_mmd == NULL); 832 ASSERT(slab->pds_used == 0); 833 ASSERT(slab->pds_sz > 0); 834 } 835 836 /* 837 * Remove a packet descriptor from the in-use descriptor list, 838 * called by mmd_rempdesc or during free. 839 */ 840 static pdesc_t * 841 mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd) 842 { 843 pdesc_t *pd_next; 844 845 pd_next = Q2PD(pd->pd_next); 846 remque(&(pd->pd_next)); 847 848 /* remove all local attributes */ 849 if (pd->pd_pattbl != NULL) 850 mmd_destroy_pattbl(&(pd->pd_pattbl)); 851 852 /* don't decrease counts for a removed descriptor */ 853 if (!(pd->pd_flags & PDESC_REM_DEFER)) { 854 if (pd->pd_flags & PDESC_HBUF_REF) { 855 ASSERT(mmd->mmd_hbuf_ref > 0); 856 mmd->mmd_hbuf_ref--; 857 } 858 if (pd->pd_flags & PDESC_PBUF_REF) { 859 ASSERT(mmd->mmd_pbuf_ref > 0); 860 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; 861 } 862 ASSERT(mmd->mmd_pd_cnt > 0); 863 mmd->mmd_pd_cnt--; 864 } 865 return (pd_next); 866 } 867 868 /* 869 * Remove a packet descriptor from the Multidata. 870 */ 871 void 872 mmd_rempdesc(pdesc_t *pd) 873 { 874 multidata_t *mmd; 875 876 ASSERT(pd->pd_magic == PDESC_MAGIC); 877 ASSERT(pd->pd_slab != NULL); 878 879 mmd = pd->pd_slab->pds_mmd; 880 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 881 882 mutex_enter(&(mmd->mmd_pd_slab_lock)); 883 /* 884 * We can't deallocate the associated resources if the Multidata 885 * is shared with other threads, because it's possible that the 886 * descriptor handle value is held by those threads. That's why 887 * we simply mark the entry as "removed" and decrement the counts. 888 * If there are no other threads, then we free the descriptor. 889 */ 890 if (mmd->mmd_dp->db_ref > 1) { 891 pd->pd_flags |= PDESC_REM_DEFER; 892 if (pd->pd_flags & PDESC_HBUF_REF) { 893 ASSERT(mmd->mmd_hbuf_ref > 0); 894 mmd->mmd_hbuf_ref--; 895 } 896 if (pd->pd_flags & PDESC_PBUF_REF) { 897 ASSERT(mmd->mmd_pbuf_ref > 0); 898 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt; 899 } 900 ASSERT(mmd->mmd_pd_cnt > 0); 901 mmd->mmd_pd_cnt--; 902 } else { 903 (void) mmd_destroy_pdesc(mmd, pd); 904 } 905 mutex_exit(&(mmd->mmd_pd_slab_lock)); 906 } 907 908 /* 909 * A generic routine to traverse the packet descriptor in-use list. 910 */ 911 static pdesc_t * 912 mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw, 913 boolean_t mutex_held) 914 { 915 pdesc_t *pd_head; 916 917 ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd); 918 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 919 ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock))); 920 921 if (!mutex_held) 922 mutex_enter(&(mmd->mmd_pd_slab_lock)); 923 pd_head = Q2PD(&(mmd->mmd_pd_q)); 924 925 if (pd == NULL) { 926 /* 927 * We're called by mmd_get{first,last}pdesc, and so 928 * return either the first or last list element. 929 */ 930 pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) : 931 Q2PD(mmd->mmd_pd_q.ql_prev); 932 } else { 933 /* 934 * We're called by mmd_get{next,prev}pdesc, and so 935 * return either the next or previous list element. 936 */ 937 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); 938 } 939 940 while (pd != pd_head) { 941 /* skip element if it has been removed */ 942 if (!(pd->pd_flags & PDESC_REM_DEFER)) 943 break; 944 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev); 945 } 946 if (!mutex_held) 947 mutex_exit(&(mmd->mmd_pd_slab_lock)); 948 949 /* return NULL if we're back at the beginning */ 950 if (pd == pd_head) 951 pd = NULL; 952 953 /* got an entry; copy descriptor info to caller */ 954 if (pd != NULL && pdi != NULL) 955 PDI_COPY(&(pd->pd_pdi), pdi); 956 957 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); 958 return (pd); 959 960 } 961 962 /* 963 * Return the first packet descriptor in the in-use list. 964 */ 965 pdesc_t * 966 mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi) 967 { 968 return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE)); 969 } 970 971 /* 972 * Return the last packet descriptor in the in-use list. 973 */ 974 pdesc_t * 975 mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi) 976 { 977 return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE)); 978 } 979 980 /* 981 * Return the next packet descriptor in the in-use list. 982 */ 983 pdesc_t * 984 mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi) 985 { 986 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE)); 987 } 988 989 /* 990 * Return the previous packet descriptor in the in-use list. 991 */ 992 pdesc_t * 993 mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi) 994 { 995 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE)); 996 } 997 998 /* 999 * Check to see if pdi stretches over c_pdi; used to ensure that a packet 1000 * descriptor's header and payload span may not be extended beyond the 1001 * current boundaries. 1002 */ 1003 static boolean_t 1004 pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi) 1005 { 1006 int i; 1007 struct pld_ary_s *pa = &pdi->pld_ary[0]; 1008 struct pld_ary_s *c_pa = &c_pdi->pld_ary[0]; 1009 1010 if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim) 1011 return (B_FALSE); 1012 1013 /* 1014 * We don't allow the number of span to be reduced, for the sake 1015 * of simplicity. Instead, we provide PDESC_PLD_SPAN_CLEAR() to 1016 * clear a packet descriptor. Note that we allow the span count to 1017 * be increased, and the bounds check for the new one happens 1018 * in pbuf_ref_valid. 1019 */ 1020 if (pdi->pld_cnt < c_pdi->pld_cnt) 1021 return (B_FALSE); 1022 1023 /* compare only those which are currently defined */ 1024 for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) { 1025 if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx || 1026 pa->pld_rptr < c_pa->pld_rptr || 1027 pa->pld_wptr > c_pa->pld_wptr) 1028 return (B_FALSE); 1029 } 1030 return (B_TRUE); 1031 } 1032 1033 /* 1034 * Modify the layout of a packet descriptor. 1035 */ 1036 pdesc_t * 1037 mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi) 1038 { 1039 multidata_t *mmd; 1040 pdescinfo_t *c_pdi; 1041 1042 ASSERT(pd != NULL); 1043 ASSERT(pdi != NULL); 1044 ASSERT(pd->pd_magic == PDESC_MAGIC); 1045 1046 mmd = pd->pd_slab->pds_mmd; 1047 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1048 1049 /* entry has been removed */ 1050 if (pd->pd_flags & PDESC_REM_DEFER) 1051 return (NULL); 1052 1053 /* caller doesn't intend to specify any buffer reference? */ 1054 if (!(pdi->flags & PDESC_HAS_REF)) 1055 return (NULL); 1056 1057 /* do the references refer to invalid memory regions? */ 1058 if (!mmd_speed_over_safety && 1059 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) || 1060 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) 1061 return (NULL); 1062 1063 /* they're not subsets of current references? */ 1064 c_pdi = &(pd->pd_pdi); 1065 if (!pdi_in_range(pdi, c_pdi)) 1066 return (NULL); 1067 1068 /* copy over the descriptor info from caller */ 1069 PDI_COPY(pdi, c_pdi); 1070 1071 return (pd); 1072 } 1073 1074 /* 1075 * Copy the contents of a packet descriptor into a new buffer. If the 1076 * descriptor points to more than one buffer fragments, the contents 1077 * of both fragments will be joined, with the header buffer fragment 1078 * preceding the payload buffer fragment(s). 1079 */ 1080 mblk_t * 1081 mmd_transform(pdesc_t *pd) 1082 { 1083 multidata_t *mmd; 1084 pdescinfo_t *pdi; 1085 mblk_t *mp; 1086 int h_size = 0, p_size = 0; 1087 int i, len; 1088 1089 ASSERT(pd != NULL); 1090 ASSERT(pd->pd_magic == PDESC_MAGIC); 1091 1092 mmd = pd->pd_slab->pds_mmd; 1093 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1094 1095 /* entry has been removed */ 1096 if (pd->pd_flags & PDESC_REM_DEFER) 1097 return (NULL); 1098 1099 mutex_enter(&mmd->mmd_pd_slab_lock); 1100 pdi = &(pd->pd_pdi); 1101 if (pdi->flags & PDESC_HBUF_REF) 1102 h_size = PDESC_HDRL(pdi); 1103 if (pdi->flags & PDESC_PBUF_REF) { 1104 for (i = 0; i < pdi->pld_cnt; i++) 1105 p_size += PDESC_PLD_SPAN_SIZE(pdi, i); 1106 } 1107 1108 /* allocate space large enough to hold the fragment(s) */ 1109 ASSERT(h_size + p_size >= 0); 1110 if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) { 1111 mutex_exit(&mmd->mmd_pd_slab_lock); 1112 return (NULL); 1113 } 1114 1115 /* copy over the header fragment */ 1116 if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) { 1117 bcopy(pdi->hdr_rptr, mp->b_wptr, h_size); 1118 mp->b_wptr += h_size; 1119 } 1120 1121 /* copy over the payload fragment */ 1122 if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) { 1123 for (i = 0; i < pdi->pld_cnt; i++) { 1124 len = PDESC_PLD_SPAN_SIZE(pdi, i); 1125 if (len > 0) { 1126 bcopy(pdi->pld_ary[i].pld_rptr, 1127 mp->b_wptr, len); 1128 mp->b_wptr += len; 1129 } 1130 } 1131 } 1132 1133 mutex_exit(&mmd->mmd_pd_slab_lock); 1134 return (mp); 1135 } 1136 1137 /* 1138 * Return a chain of mblks representing the Multidata packet. 1139 */ 1140 mblk_t * 1141 mmd_transform_link(pdesc_t *pd) 1142 { 1143 multidata_t *mmd; 1144 pdescinfo_t *pdi; 1145 mblk_t *nmp = NULL; 1146 1147 ASSERT(pd != NULL); 1148 ASSERT(pd->pd_magic == PDESC_MAGIC); 1149 1150 mmd = pd->pd_slab->pds_mmd; 1151 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1152 1153 /* entry has been removed */ 1154 if (pd->pd_flags & PDESC_REM_DEFER) 1155 return (NULL); 1156 1157 pdi = &(pd->pd_pdi); 1158 1159 /* duplicate header buffer */ 1160 if ((pdi->flags & PDESC_HBUF_REF)) { 1161 if ((nmp = dupb(mmd->mmd_hbuf)) == NULL) 1162 return (NULL); 1163 nmp->b_rptr = pdi->hdr_rptr; 1164 nmp->b_wptr = pdi->hdr_wptr; 1165 } 1166 1167 /* duplicate payload buffer(s) */ 1168 if (pdi->flags & PDESC_PBUF_REF) { 1169 int i; 1170 mblk_t *mp; 1171 struct pld_ary_s *pa = &pdi->pld_ary[0]; 1172 1173 mutex_enter(&mmd->mmd_pd_slab_lock); 1174 for (i = 0; i < pdi->pld_cnt; i++, pa++) { 1175 ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL); 1176 1177 /* skip empty ones */ 1178 if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0) 1179 continue; 1180 1181 mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]); 1182 if (mp == NULL) { 1183 if (nmp != NULL) 1184 freemsg(nmp); 1185 mutex_exit(&mmd->mmd_pd_slab_lock); 1186 return (NULL); 1187 } 1188 mp->b_rptr = pa->pld_rptr; 1189 mp->b_wptr = pa->pld_wptr; 1190 if (nmp == NULL) 1191 nmp = mp; 1192 else 1193 linkb(nmp, mp); 1194 } 1195 mutex_exit(&mmd->mmd_pd_slab_lock); 1196 } 1197 1198 return (nmp); 1199 } 1200 1201 /* 1202 * Return duplicate message block(s) of the associated buffer(s). 1203 */ 1204 int 1205 mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp) 1206 { 1207 ASSERT(mmd != NULL); 1208 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1209 1210 if (hmp != NULL) { 1211 *hmp = NULL; 1212 if (mmd->mmd_hbuf != NULL && 1213 (*hmp = dupb(mmd->mmd_hbuf)) == NULL) 1214 return (-1); 1215 } 1216 1217 if (pmp != NULL) { 1218 int i; 1219 mblk_t *mp; 1220 1221 mutex_enter(&mmd->mmd_pd_slab_lock); 1222 *pmp = NULL; 1223 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 1224 ASSERT(mmd->mmd_pbuf[i] != NULL); 1225 mp = dupb(mmd->mmd_pbuf[i]); 1226 if (mp == NULL) { 1227 if (hmp != NULL && *hmp != NULL) 1228 freeb(*hmp); 1229 if (*pmp != NULL) 1230 freemsg(*pmp); 1231 mutex_exit(&mmd->mmd_pd_slab_lock); 1232 return (-1); 1233 } 1234 if (*pmp == NULL) 1235 *pmp = mp; 1236 else 1237 linkb(*pmp, mp); 1238 } 1239 mutex_exit(&mmd->mmd_pd_slab_lock); 1240 } 1241 1242 return (0); 1243 } 1244 1245 /* 1246 * Return the layout of a packet descriptor. 1247 */ 1248 int 1249 mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi) 1250 { 1251 ASSERT(pd != NULL); 1252 ASSERT(pd->pd_magic == PDESC_MAGIC); 1253 ASSERT(pd->pd_slab != NULL); 1254 ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC); 1255 ASSERT(pdi != NULL); 1256 1257 /* entry has been removed */ 1258 if (pd->pd_flags & PDESC_REM_DEFER) 1259 return (-1); 1260 1261 /* copy descriptor info to caller */ 1262 PDI_COPY(&(pd->pd_pdi), pdi); 1263 1264 return (0); 1265 } 1266 1267 /* 1268 * Add a global or local attribute to a Multidata. Global attribute 1269 * association is specified by a NULL packet descriptor. 1270 */ 1271 pattr_t * 1272 mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai, 1273 boolean_t persistent, int kmflags) 1274 { 1275 patbkt_t **tbl_p; 1276 patbkt_t *tbl, *o_tbl; 1277 patbkt_t *bkt; 1278 pattr_t *pa; 1279 uint_t size; 1280 1281 ASSERT(mmd != NULL); 1282 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1283 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC); 1284 ASSERT(pai != NULL); 1285 1286 /* pointer to the attribute hash table (local or global) */ 1287 tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl); 1288 1289 /* 1290 * See if the hash table has not yet been created; if so, 1291 * we create the table and store its address atomically. 1292 */ 1293 if ((tbl = *tbl_p) == NULL) { 1294 tbl = kmem_cache_alloc(pattbl_cache, kmflags); 1295 if (tbl == NULL) 1296 return (NULL); 1297 1298 /* if someone got there first, use his table instead */ 1299 if ((o_tbl = casptr(tbl_p, NULL, tbl)) != NULL) { 1300 kmem_cache_free(pattbl_cache, tbl); 1301 tbl = o_tbl; 1302 } 1303 } 1304 1305 ASSERT(tbl->pbkt_tbl_sz > 0); 1306 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); 1307 1308 /* attribute of the same type already exists? */ 1309 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) 1310 return (NULL); 1311 1312 size = sizeof (*pa) + pai->len; 1313 if ((pa = kmem_zalloc(size, kmflags)) == NULL) 1314 return (NULL); 1315 1316 pa->pat_magic = PATTR_MAGIC; 1317 pa->pat_lock = &(bkt->pbkt_lock); 1318 pa->pat_mmd = mmd; 1319 pa->pat_buflen = size; 1320 pa->pat_type = pai->type; 1321 pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL; 1322 1323 if (persistent) 1324 pa->pat_flags = PATTR_PERSIST; 1325 1326 /* insert attribute at end of hash chain */ 1327 mutex_enter(&(bkt->pbkt_lock)); 1328 insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev); 1329 mutex_exit(&(bkt->pbkt_lock)); 1330 1331 return (pa); 1332 } 1333 1334 /* 1335 * Attribute hash table kmem cache constructor routine. 1336 */ 1337 /* ARGSUSED */ 1338 static int 1339 pattbl_constructor(void *buf, void *cdrarg, int kmflags) 1340 { 1341 patbkt_t *bkt; 1342 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; 1343 uint_t i; 1344 1345 ASSERT(tbl_sz > 0); /* table size can't be zero */ 1346 1347 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { 1348 mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL); 1349 QL_INIT(&(bkt->pbkt_pattr_q)); 1350 1351 /* first bucket contains the table size */ 1352 bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0; 1353 } 1354 return (0); 1355 } 1356 1357 /* 1358 * Attribute hash table kmem cache destructor routine. 1359 */ 1360 /* ARGSUSED */ 1361 static void 1362 pattbl_destructor(void *buf, void *cdrarg) 1363 { 1364 patbkt_t *bkt; 1365 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg; 1366 uint_t i; 1367 1368 ASSERT(tbl_sz > 0); /* table size can't be zero */ 1369 1370 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) { 1371 mutex_destroy(&(bkt->pbkt_lock)); 1372 ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q)); 1373 ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz); 1374 } 1375 } 1376 1377 /* 1378 * Destroy an attribute hash table, called by mmd_rempdesc or during free. 1379 */ 1380 static void 1381 mmd_destroy_pattbl(patbkt_t **tbl) 1382 { 1383 patbkt_t *bkt; 1384 pattr_t *pa, *pa_next; 1385 uint_t i, tbl_sz; 1386 1387 ASSERT(tbl != NULL); 1388 bkt = *tbl; 1389 tbl_sz = bkt->pbkt_tbl_sz; 1390 1391 /* make sure caller passes in the first bucket */ 1392 ASSERT(tbl_sz > 0); 1393 1394 /* destroy the contents of each bucket */ 1395 for (i = 0; i < tbl_sz; i++, bkt++) { 1396 /* we ought to be exclusive at this point */ 1397 ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock))); 1398 1399 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1400 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { 1401 ASSERT(pa->pat_magic == PATTR_MAGIC); 1402 pa_next = Q2PATTR(pa->pat_next); 1403 remque(&(pa->pat_next)); 1404 kmem_free(pa, pa->pat_buflen); 1405 pa = pa_next; 1406 } 1407 } 1408 1409 kmem_cache_free(pattbl_cache, *tbl); 1410 *tbl = NULL; 1411 1412 /* commit all previous stores */ 1413 membar_producer(); 1414 } 1415 1416 /* 1417 * Copy the contents of an attribute hash table, called by mmd_copy. 1418 */ 1419 static int 1420 mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd, 1421 int kmflags) 1422 { 1423 patbkt_t *bkt; 1424 pattr_t *pa; 1425 pattrinfo_t pai; 1426 uint_t i, tbl_sz; 1427 1428 ASSERT(src_tbl != NULL); 1429 bkt = src_tbl; 1430 tbl_sz = bkt->pbkt_tbl_sz; 1431 1432 /* make sure caller passes in the first bucket */ 1433 ASSERT(tbl_sz > 0); 1434 1435 for (i = 0; i < tbl_sz; i++, bkt++) { 1436 mutex_enter(&(bkt->pbkt_lock)); 1437 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1438 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) { 1439 pattr_t *pa_next = Q2PATTR(pa->pat_next); 1440 1441 /* skip if it's removed */ 1442 if (pa->pat_flags & PATTR_REM_DEFER) { 1443 pa = pa_next; 1444 continue; 1445 } 1446 1447 pai.type = pa->pat_type; 1448 pai.len = pa->pat_buflen - sizeof (*pa); 1449 if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags & 1450 PATTR_PERSIST) != 0, kmflags) == NULL) { 1451 mutex_exit(&(bkt->pbkt_lock)); 1452 return (-1); 1453 } 1454 1455 /* copy over the contents */ 1456 if (pai.buf != NULL) 1457 bcopy(pa + 1, pai.buf, pai.len); 1458 1459 pa = pa_next; 1460 } 1461 mutex_exit(&(bkt->pbkt_lock)); 1462 } 1463 1464 return (0); 1465 } 1466 1467 /* 1468 * Search for an attribute type within an attribute hash bucket. 1469 */ 1470 static pattr_t * 1471 mmd_find_pattr(patbkt_t *bkt, uint_t type) 1472 { 1473 pattr_t *pa_head, *pa; 1474 1475 mutex_enter(&(bkt->pbkt_lock)); 1476 pa_head = Q2PATTR(&(bkt->pbkt_pattr_q)); 1477 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next); 1478 1479 while (pa != pa_head) { 1480 ASSERT(pa->pat_magic == PATTR_MAGIC); 1481 1482 /* return a match; we treat removed entry as non-existent */ 1483 if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER)) 1484 break; 1485 pa = Q2PATTR(pa->pat_next); 1486 } 1487 mutex_exit(&(bkt->pbkt_lock)); 1488 1489 return (pa == pa_head ? NULL : pa); 1490 } 1491 1492 /* 1493 * Remove an attribute from a Multidata. 1494 */ 1495 void 1496 mmd_rempattr(pattr_t *pa) 1497 { 1498 kmutex_t *pat_lock = pa->pat_lock; 1499 1500 ASSERT(pa->pat_magic == PATTR_MAGIC); 1501 1502 /* ignore if attribute was marked as persistent */ 1503 if ((pa->pat_flags & PATTR_PERSIST) != 0) 1504 return; 1505 1506 mutex_enter(pat_lock); 1507 /* 1508 * We can't deallocate the associated resources if the Multidata 1509 * is shared with other threads, because it's possible that the 1510 * attribute handle value is held by those threads. That's why 1511 * we simply mark the entry as "removed". If there are no other 1512 * threads, then we free the attribute. 1513 */ 1514 if (pa->pat_mmd->mmd_dp->db_ref > 1) { 1515 pa->pat_flags |= PATTR_REM_DEFER; 1516 } else { 1517 remque(&(pa->pat_next)); 1518 kmem_free(pa, pa->pat_buflen); 1519 } 1520 mutex_exit(pat_lock); 1521 } 1522 1523 /* 1524 * Find an attribute (according to its type) and return its handle. 1525 */ 1526 pattr_t * 1527 mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai) 1528 { 1529 patbkt_t *tbl, *bkt; 1530 pattr_t *pa; 1531 1532 ASSERT(mmd != NULL); 1533 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1534 ASSERT(pai != NULL); 1535 1536 /* get the right attribute hash table (local or global) */ 1537 tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl; 1538 1539 /* attribute hash table doesn't exist? */ 1540 if (tbl == NULL) 1541 return (NULL); 1542 1543 ASSERT(tbl->pbkt_tbl_sz > 0); 1544 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]); 1545 1546 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) { 1547 ASSERT(pa->pat_buflen >= sizeof (*pa)); 1548 pai->len = pa->pat_buflen - sizeof (*pa); 1549 pai->buf = pai->len > 0 ? 1550 (uchar_t *)pa + sizeof (pattr_t) : NULL; 1551 } 1552 ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC); 1553 return (pa); 1554 } 1555 1556 /* 1557 * Return total size of buffers and total size of areas referenced 1558 * by all in-use (unremoved) packet descriptors. 1559 */ 1560 void 1561 mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse) 1562 { 1563 pdesc_t *pd; 1564 pdescinfo_t *pdi; 1565 int i; 1566 1567 ASSERT(mmd != NULL); 1568 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC); 1569 1570 mutex_enter(&mmd->mmd_pd_slab_lock); 1571 if (ptotal != NULL) { 1572 *ptotal = 0; 1573 1574 if (mmd->mmd_hbuf != NULL) 1575 *ptotal += MBLKL(mmd->mmd_hbuf); 1576 1577 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) { 1578 ASSERT(mmd->mmd_pbuf[i] != NULL); 1579 *ptotal += MBLKL(mmd->mmd_pbuf[i]); 1580 } 1581 } 1582 if (pinuse != NULL) { 1583 *pinuse = 0; 1584 1585 /* first pdesc */ 1586 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); 1587 while (pd != NULL) { 1588 pdi = &pd->pd_pdi; 1589 1590 /* next pdesc */ 1591 pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE); 1592 1593 /* skip over removed descriptor */ 1594 if (pdi->flags & PDESC_REM_DEFER) 1595 continue; 1596 1597 if (pdi->flags & PDESC_HBUF_REF) 1598 *pinuse += PDESC_HDRL(pdi); 1599 1600 if (pdi->flags & PDESC_PBUF_REF) { 1601 for (i = 0; i < pdi->pld_cnt; i++) 1602 *pinuse += PDESC_PLDL(pdi, i); 1603 } 1604 } 1605 } 1606 mutex_exit(&mmd->mmd_pd_slab_lock); 1607 } 1608