1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 /*
28 * Multidata, as described in the following papers:
29 *
30 * Adi Masputra,
31 * Multidata V.2: VA-Disjoint Packet Extents Framework Interface
32 * Design Specification. August 2004.
33 * Available as http://sac.sfbay/PSARC/2004/594/materials/mmd2.pdf.
34 *
35 * Adi Masputra,
36 * Multidata Interface Design Specification. Sep 2002.
37 * Available as http://sac.sfbay/PSARC/2002/276/materials/mmd.pdf.
38 *
39 * Adi Masputra, Frank DiMambro, Kacheong Poon,
40 * An Efficient Networking Transmit Mechanism for Solaris:
41 * Multidata Transmit (MDT). May 2002.
42 * Available as http://sac.sfbay/PSARC/2002/276/materials/mdt.pdf.
43 */
44
45 #include <sys/types.h>
46 #include <sys/stream.h>
47 #include <sys/dlpi.h>
48 #include <sys/stropts.h>
49 #include <sys/strsun.h>
50 #include <sys/strlog.h>
51 #include <sys/strsubr.h>
52 #include <sys/sysmacros.h>
53 #include <sys/cmn_err.h>
54 #include <sys/debug.h>
55 #include <sys/kmem.h>
56 #include <sys/atomic.h>
57
58 #include <sys/multidata.h>
59 #include <sys/multidata_impl.h>
60
61 static int mmd_constructor(void *, void *, int);
62 static void mmd_destructor(void *, void *);
63 static int pdslab_constructor(void *, void *, int);
64 static void pdslab_destructor(void *, void *);
65 static int pattbl_constructor(void *, void *, int);
66 static void pattbl_destructor(void *, void *);
67 static void mmd_esballoc_free(caddr_t);
68 static int mmd_copy_pattbl(patbkt_t *, multidata_t *, pdesc_t *, int);
69
70 static boolean_t pbuf_ref_valid(multidata_t *, pdescinfo_t *);
71 #pragma inline(pbuf_ref_valid)
72
73 static boolean_t pdi_in_range(pdescinfo_t *, pdescinfo_t *);
74 #pragma inline(pdi_in_range)
75
76 static pdesc_t *mmd_addpdesc_int(multidata_t *, pdescinfo_t *, int *, int);
77 #pragma inline(mmd_addpdesc_int)
78
79 static void mmd_destroy_pattbl(patbkt_t **);
80 #pragma inline(mmd_destroy_pattbl)
81
82 static pattr_t *mmd_find_pattr(patbkt_t *, uint_t);
83 #pragma inline(mmd_find_pattr)
84
85 static pdesc_t *mmd_destroy_pdesc(multidata_t *, pdesc_t *);
86 #pragma inline(mmd_destroy_pdesc)
87
88 static pdesc_t *mmd_getpdesc(multidata_t *, pdesc_t *, pdescinfo_t *, uint_t,
89 boolean_t);
90 #pragma inline(mmd_getpdesc)
91
92 static struct kmem_cache *mmd_cache;
93 static struct kmem_cache *pd_slab_cache;
94 static struct kmem_cache *pattbl_cache;
95
96 int mmd_debug = 1;
97 #define MMD_DEBUG(s) if (mmd_debug > 0) cmn_err s
98
99 /*
100 * Set to this to true to bypass pdesc bounds checking.
101 */
102 boolean_t mmd_speed_over_safety = B_FALSE;
103
104 /*
105 * Patchable kmem_cache flags.
106 */
107 int mmd_kmem_flags = 0;
108 int pdslab_kmem_flags = 0;
109 int pattbl_kmem_flags = 0;
110
111 /*
112 * Alignment (in bytes) of our kmem caches.
113 */
114 #define MULTIDATA_CACHE_ALIGN 64
115
116 /*
117 * Default number of packet descriptors per descriptor slab. Making
118 * this too small will trigger more descriptor slab allocation; making
119 * it too large will create too many unclaimed descriptors.
120 */
121 #define PDSLAB_SZ 15
122 uint_t pdslab_sz = PDSLAB_SZ;
123
124 /*
125 * Default attribute hash table size. It's okay to set this to a small
126 * value (even to 1) because there aren't that many attributes currently
127 * defined, and because we assume there won't be many attributes associated
128 * with a Multidata at a given time. Increasing the size will reduce
129 * attribute search time (given a large number of attributes in a Multidata),
130 * and decreasing it will reduce the memory footprints and the overhead
131 * associated with managing the table.
132 */
133 #define PATTBL_SZ 1
134 uint_t pattbl_sz = PATTBL_SZ;
135
136 /*
137 * Attribute hash key.
138 */
139 #define PATTBL_HASH(x, sz) ((x) % (sz))
140
141 /*
142 * Structure that precedes each Multidata metadata.
143 */
144 struct mmd_buf_info {
145 frtn_t frp; /* free routine */
146 uint_t buf_len; /* length of kmem buffer */
147 };
148
149 /*
150 * The size of each metadata buffer.
151 */
152 #define MMD_CACHE_SIZE \
153 (sizeof (struct mmd_buf_info) + sizeof (multidata_t))
154
155 /*
156 * Called during startup in order to create the Multidata kmem caches.
157 */
158 void
mmd_init(void)159 mmd_init(void)
160 {
161 pdslab_sz = MAX(1, pdslab_sz); /* at least 1 descriptor */
162 pattbl_sz = MAX(1, pattbl_sz); /* at least 1 bucket */
163
164 mmd_cache = kmem_cache_create("multidata", MMD_CACHE_SIZE,
165 MULTIDATA_CACHE_ALIGN, mmd_constructor, mmd_destructor,
166 NULL, NULL, NULL, mmd_kmem_flags);
167
168 pd_slab_cache = kmem_cache_create("multidata_pdslab",
169 PDESC_SLAB_SIZE(pdslab_sz), MULTIDATA_CACHE_ALIGN,
170 pdslab_constructor, pdslab_destructor, NULL,
171 (void *)(uintptr_t)pdslab_sz, NULL, pdslab_kmem_flags);
172
173 pattbl_cache = kmem_cache_create("multidata_pattbl",
174 sizeof (patbkt_t) * pattbl_sz, MULTIDATA_CACHE_ALIGN,
175 pattbl_constructor, pattbl_destructor, NULL,
176 (void *)(uintptr_t)pattbl_sz, NULL, pattbl_kmem_flags);
177 }
178
179 /*
180 * Create a Multidata message block.
181 */
182 multidata_t *
mmd_alloc(mblk_t * hdr_mp,mblk_t ** mmd_mp,int kmflags)183 mmd_alloc(mblk_t *hdr_mp, mblk_t **mmd_mp, int kmflags)
184 {
185 uchar_t *buf;
186 multidata_t *mmd;
187 uint_t mmd_mplen;
188 struct mmd_buf_info *buf_info;
189
190 ASSERT(hdr_mp != NULL);
191 ASSERT(mmd_mp != NULL);
192
193 /*
194 * Caller should never pass in a chain of mblks since we
195 * only care about the first one, hence the assertions.
196 */
197 ASSERT(hdr_mp->b_cont == NULL);
198
199 if ((buf = kmem_cache_alloc(mmd_cache, kmflags)) == NULL)
200 return (NULL);
201
202 buf_info = (struct mmd_buf_info *)buf;
203 buf_info->frp.free_arg = (caddr_t)buf;
204
205 mmd = (multidata_t *)(buf_info + 1);
206 mmd_mplen = sizeof (*mmd);
207
208 if ((*mmd_mp = desballoc((uchar_t *)mmd, mmd_mplen, BPRI_HI,
209 &(buf_info->frp))) == NULL) {
210 kmem_cache_free(mmd_cache, buf);
211 return (NULL);
212 }
213
214 DB_TYPE(*mmd_mp) = M_MULTIDATA;
215 (*mmd_mp)->b_wptr += mmd_mplen;
216 mmd->mmd_dp = (*mmd_mp)->b_datap;
217 mmd->mmd_hbuf = hdr_mp;
218
219 return (mmd);
220 }
221
222 /*
223 * Associate additional payload buffer to the Multidata.
224 */
225 int
mmd_addpldbuf(multidata_t * mmd,mblk_t * pld_mp)226 mmd_addpldbuf(multidata_t *mmd, mblk_t *pld_mp)
227 {
228 int i;
229
230 ASSERT(mmd != NULL);
231 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
232 ASSERT(pld_mp != NULL);
233
234 mutex_enter(&mmd->mmd_pd_slab_lock);
235 for (i = 0; i < MULTIDATA_MAX_PBUFS &&
236 mmd->mmd_pbuf_cnt < MULTIDATA_MAX_PBUFS; i++) {
237 if (mmd->mmd_pbuf[i] == pld_mp) {
238 /* duplicate entry */
239 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding "
240 "pld 0x%p to mmd 0x%p since it has been "
241 "previously added into slot %d (total %d)\n",
242 (void *)pld_mp, (void *)mmd, i, mmd->mmd_pbuf_cnt));
243 mutex_exit(&mmd->mmd_pd_slab_lock);
244 return (-1);
245 } else if (mmd->mmd_pbuf[i] == NULL) {
246 mmd->mmd_pbuf[i] = pld_mp;
247 mmd->mmd_pbuf_cnt++;
248 mutex_exit(&mmd->mmd_pd_slab_lock);
249 return (i);
250 }
251 }
252
253 /* all slots are taken */
254 MMD_DEBUG((CE_WARN, "mmd_addpldbuf: error adding pld 0x%p to mmd 0x%p "
255 "since no slot space is left (total %d max %d)\n", (void *)pld_mp,
256 (void *)mmd, mmd->mmd_pbuf_cnt, MULTIDATA_MAX_PBUFS));
257 mutex_exit(&mmd->mmd_pd_slab_lock);
258
259 return (-1);
260 }
261
262 /*
263 * Multidata metadata kmem cache constructor routine.
264 */
265 /* ARGSUSED */
266 static int
mmd_constructor(void * buf,void * cdrarg,int kmflags)267 mmd_constructor(void *buf, void *cdrarg, int kmflags)
268 {
269 struct mmd_buf_info *buf_info;
270 multidata_t *mmd;
271
272 bzero((void *)buf, MMD_CACHE_SIZE);
273
274 buf_info = (struct mmd_buf_info *)buf;
275 buf_info->frp.free_func = mmd_esballoc_free;
276 buf_info->buf_len = MMD_CACHE_SIZE;
277
278 mmd = (multidata_t *)(buf_info + 1);
279 mmd->mmd_magic = MULTIDATA_MAGIC;
280
281 mutex_init(&(mmd->mmd_pd_slab_lock), NULL, MUTEX_DRIVER, NULL);
282 QL_INIT(&(mmd->mmd_pd_slab_q));
283 QL_INIT(&(mmd->mmd_pd_q));
284
285 return (0);
286 }
287
288 /*
289 * Multidata metadata kmem cache destructor routine.
290 */
291 /* ARGSUSED */
292 static void
mmd_destructor(void * buf,void * cdrarg)293 mmd_destructor(void *buf, void *cdrarg)
294 {
295 multidata_t *mmd;
296 #ifdef DEBUG
297 int i;
298 #endif
299
300 mmd = (multidata_t *)((uchar_t *)buf + sizeof (struct mmd_buf_info));
301
302 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
303 ASSERT(mmd->mmd_dp == NULL);
304 ASSERT(mmd->mmd_hbuf == NULL);
305 ASSERT(mmd->mmd_pbuf_cnt == 0);
306 #ifdef DEBUG
307 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++)
308 ASSERT(mmd->mmd_pbuf[i] == NULL);
309 #endif
310 ASSERT(mmd->mmd_pattbl == NULL);
311
312 mutex_destroy(&(mmd->mmd_pd_slab_lock));
313 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
314 ASSERT(mmd->mmd_slab_cnt == 0);
315 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
316 ASSERT(mmd->mmd_pd_cnt == 0);
317 ASSERT(mmd->mmd_hbuf_ref == 0);
318 ASSERT(mmd->mmd_pbuf_ref == 0);
319 }
320
321 /*
322 * Multidata message block free callback routine.
323 */
324 static void
mmd_esballoc_free(caddr_t buf)325 mmd_esballoc_free(caddr_t buf)
326 {
327 multidata_t *mmd;
328 pdesc_t *pd;
329 pdesc_slab_t *slab;
330 int i;
331
332 ASSERT(buf != NULL);
333 ASSERT(((struct mmd_buf_info *)buf)->buf_len == MMD_CACHE_SIZE);
334
335 mmd = (multidata_t *)(buf + sizeof (struct mmd_buf_info));
336 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
337
338 ASSERT(mmd->mmd_dp != NULL);
339 ASSERT(mmd->mmd_dp->db_ref == 1);
340
341 /* remove all packet descriptors and private attributes */
342 pd = Q2PD(mmd->mmd_pd_q.ql_next);
343 while (pd != Q2PD(&(mmd->mmd_pd_q)))
344 pd = mmd_destroy_pdesc(mmd, pd);
345
346 ASSERT(mmd->mmd_pd_q.ql_next == &(mmd->mmd_pd_q));
347 ASSERT(mmd->mmd_pd_cnt == 0);
348 ASSERT(mmd->mmd_hbuf_ref == 0);
349 ASSERT(mmd->mmd_pbuf_ref == 0);
350
351 /* remove all global attributes */
352 if (mmd->mmd_pattbl != NULL)
353 mmd_destroy_pattbl(&(mmd->mmd_pattbl));
354
355 /* remove all descriptor slabs */
356 slab = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_next);
357 while (slab != Q2PDSLAB(&(mmd->mmd_pd_slab_q))) {
358 pdesc_slab_t *slab_next = Q2PDSLAB(slab->pds_next);
359
360 remque(&(slab->pds_next));
361 slab->pds_next = NULL;
362 slab->pds_prev = NULL;
363 slab->pds_mmd = NULL;
364 slab->pds_used = 0;
365 kmem_cache_free(pd_slab_cache, slab);
366
367 ASSERT(mmd->mmd_slab_cnt > 0);
368 mmd->mmd_slab_cnt--;
369 slab = slab_next;
370 }
371 ASSERT(mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q));
372 ASSERT(mmd->mmd_slab_cnt == 0);
373
374 mmd->mmd_dp = NULL;
375
376 /* finally, free all associated message blocks */
377 if (mmd->mmd_hbuf != NULL) {
378 freeb(mmd->mmd_hbuf);
379 mmd->mmd_hbuf = NULL;
380 }
381
382 for (i = 0; i < MULTIDATA_MAX_PBUFS; i++) {
383 if (mmd->mmd_pbuf[i] != NULL) {
384 freeb(mmd->mmd_pbuf[i]);
385 mmd->mmd_pbuf[i] = NULL;
386 ASSERT(mmd->mmd_pbuf_cnt > 0);
387 mmd->mmd_pbuf_cnt--;
388 }
389 }
390
391 ASSERT(mmd->mmd_pbuf_cnt == 0);
392 ASSERT(MUTEX_NOT_HELD(&(mmd->mmd_pd_slab_lock)));
393 kmem_cache_free(mmd_cache, buf);
394 }
395
396 /*
397 * Multidata message block copy routine, called by copyb() when it
398 * encounters a M_MULTIDATA data block type. This routine should
399 * not be called by anyone other than copyb(), since it may go away
400 * (read: become static to this module) once some sort of copy callback
401 * routine is made available.
402 */
403 mblk_t *
mmd_copy(mblk_t * bp,int kmflags)404 mmd_copy(mblk_t *bp, int kmflags)
405 {
406 multidata_t *mmd, *n_mmd;
407 mblk_t *n_hbuf = NULL, *n_pbuf[MULTIDATA_MAX_PBUFS];
408 mblk_t **pmp_last = &n_pbuf[MULTIDATA_MAX_PBUFS - 1];
409 mblk_t **pmp;
410 mblk_t *n_bp = NULL;
411 pdesc_t *pd;
412 uint_t n_pbuf_cnt = 0;
413 int idx, i;
414
415 #define FREE_PBUFS() { \
416 for (pmp = &n_pbuf[0]; pmp <= pmp_last; pmp++) \
417 if (*pmp != NULL) freeb(*pmp); \
418 }
419
420 #define REL_OFF(p, base, n_base) \
421 ((uchar_t *)(n_base) + ((uchar_t *)(p) - (uchar_t *)base))
422
423 ASSERT(bp != NULL && DB_TYPE(bp) == M_MULTIDATA);
424 mmd = mmd_getmultidata(bp);
425
426 /* copy the header buffer */
427 if (mmd->mmd_hbuf != NULL && (n_hbuf = copyb(mmd->mmd_hbuf)) == NULL)
428 return (NULL);
429
430 /* copy the payload buffer(s) */
431 mutex_enter(&mmd->mmd_pd_slab_lock);
432 bzero((void *)&n_pbuf[0], sizeof (mblk_t *) * MULTIDATA_MAX_PBUFS);
433 n_pbuf_cnt = mmd->mmd_pbuf_cnt;
434 for (i = 0; i < n_pbuf_cnt; i++) {
435 ASSERT(mmd->mmd_pbuf[i] != NULL);
436 n_pbuf[i] = copyb(mmd->mmd_pbuf[i]);
437 if (n_pbuf[i] == NULL) {
438 FREE_PBUFS();
439 mutex_exit(&mmd->mmd_pd_slab_lock);
440 return (NULL);
441 }
442 }
443
444 /* allocate new Multidata */
445 n_mmd = mmd_alloc(n_hbuf, &n_bp, kmflags);
446 if (n_mmd == NULL) {
447 if (n_hbuf != NULL)
448 freeb(n_hbuf);
449 if (n_pbuf_cnt != 0)
450 FREE_PBUFS();
451 mutex_exit(&mmd->mmd_pd_slab_lock);
452 return (NULL);
453 }
454
455 /*
456 * Add payload buffer(s); upon success, leave n_pbuf array
457 * alone, as the newly-created Multidata had already contained
458 * the mblk pointers stored in the array. These will be freed
459 * along with the Multidata itself.
460 */
461 for (i = 0, pmp = &n_pbuf[0]; i < n_pbuf_cnt; i++, pmp++) {
462 idx = mmd_addpldbuf(n_mmd, *pmp);
463 if (idx < 0) {
464 FREE_PBUFS();
465 freeb(n_bp);
466 mutex_exit(&mmd->mmd_pd_slab_lock);
467 return (NULL);
468 }
469 }
470
471 /* copy over global attributes */
472 if (mmd->mmd_pattbl != NULL &&
473 mmd_copy_pattbl(mmd->mmd_pattbl, n_mmd, NULL, kmflags) < 0) {
474 freeb(n_bp);
475 mutex_exit(&mmd->mmd_pd_slab_lock);
476 return (NULL);
477 }
478
479 /* copy over packet descriptors and their atttributes */
480 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE); /* first pdesc */
481 while (pd != NULL) {
482 pdesc_t *n_pd;
483 pdescinfo_t *pdi, n_pdi;
484 uchar_t *n_base, *base;
485 pdesc_t *pd_next;
486
487 /* next pdesc */
488 pd_next = mmd_getpdesc(pd->pd_slab->pds_mmd, pd, NULL,
489 1, B_TRUE);
490
491 /* skip if already removed */
492 if (pd->pd_flags & PDESC_REM_DEFER) {
493 pd = pd_next;
494 continue;
495 }
496
497 pdi = &(pd->pd_pdi);
498 bzero(&n_pdi, sizeof (n_pdi));
499
500 /*
501 * Calculate new descriptor values based on the offset of
502 * each pointer relative to the associated buffer(s).
503 */
504 ASSERT(pdi->flags & PDESC_HAS_REF);
505 if (pdi->flags & PDESC_HBUF_REF) {
506 n_base = n_mmd->mmd_hbuf->b_rptr;
507 base = mmd->mmd_hbuf->b_rptr;
508
509 n_pdi.flags |= PDESC_HBUF_REF;
510 n_pdi.hdr_base = REL_OFF(pdi->hdr_base, base, n_base);
511 n_pdi.hdr_rptr = REL_OFF(pdi->hdr_rptr, base, n_base);
512 n_pdi.hdr_wptr = REL_OFF(pdi->hdr_wptr, base, n_base);
513 n_pdi.hdr_lim = REL_OFF(pdi->hdr_lim, base, n_base);
514 }
515
516 if (pdi->flags & PDESC_PBUF_REF) {
517 n_pdi.flags |= PDESC_PBUF_REF;
518 n_pdi.pld_cnt = pdi->pld_cnt;
519
520 for (i = 0; i < pdi->pld_cnt; i++) {
521 idx = pdi->pld_ary[i].pld_pbuf_idx;
522 ASSERT(idx < MULTIDATA_MAX_PBUFS);
523 ASSERT(n_mmd->mmd_pbuf[idx] != NULL);
524 ASSERT(mmd->mmd_pbuf[idx] != NULL);
525
526 n_base = n_mmd->mmd_pbuf[idx]->b_rptr;
527 base = mmd->mmd_pbuf[idx]->b_rptr;
528
529 n_pdi.pld_ary[i].pld_pbuf_idx = idx;
530
531 /*
532 * We can't copy the pointers just like that,
533 * so calculate the relative offset.
534 */
535 n_pdi.pld_ary[i].pld_rptr =
536 REL_OFF(pdi->pld_ary[i].pld_rptr,
537 base, n_base);
538 n_pdi.pld_ary[i].pld_wptr =
539 REL_OFF(pdi->pld_ary[i].pld_wptr,
540 base, n_base);
541 }
542 }
543
544 /* add the new descriptor to the new Multidata */
545 n_pd = mmd_addpdesc_int(n_mmd, &n_pdi, NULL, kmflags);
546
547 if (n_pd == NULL || (pd->pd_pattbl != NULL &&
548 mmd_copy_pattbl(pd->pd_pattbl, n_mmd, n_pd, kmflags) < 0)) {
549 freeb(n_bp);
550 mutex_exit(&mmd->mmd_pd_slab_lock);
551 return (NULL);
552 }
553
554 pd = pd_next;
555 }
556 #undef REL_OFF
557 #undef FREE_PBUFS
558
559 mutex_exit(&mmd->mmd_pd_slab_lock);
560 return (n_bp);
561 }
562
563 /*
564 * Given a Multidata message block, return the Multidata metadata handle.
565 */
566 multidata_t *
mmd_getmultidata(mblk_t * mp)567 mmd_getmultidata(mblk_t *mp)
568 {
569 multidata_t *mmd;
570
571 ASSERT(mp != NULL);
572
573 if (DB_TYPE(mp) != M_MULTIDATA)
574 return (NULL);
575
576 mmd = (multidata_t *)mp->b_rptr;
577 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
578
579 return (mmd);
580 }
581
582 /*
583 * Return the start and end addresses of the associated buffer(s).
584 */
585 void
mmd_getregions(multidata_t * mmd,mbufinfo_t * mbi)586 mmd_getregions(multidata_t *mmd, mbufinfo_t *mbi)
587 {
588 int i;
589
590 ASSERT(mmd != NULL);
591 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
592 ASSERT(mbi != NULL);
593
594 bzero((void *)mbi, sizeof (mbufinfo_t));
595
596 if (mmd->mmd_hbuf != NULL) {
597 mbi->hbuf_rptr = mmd->mmd_hbuf->b_rptr;
598 mbi->hbuf_wptr = mmd->mmd_hbuf->b_wptr;
599 }
600
601 mutex_enter(&mmd->mmd_pd_slab_lock);
602 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
603 ASSERT(mmd->mmd_pbuf[i] != NULL);
604 mbi->pbuf_ary[i].pbuf_rptr = mmd->mmd_pbuf[i]->b_rptr;
605 mbi->pbuf_ary[i].pbuf_wptr = mmd->mmd_pbuf[i]->b_wptr;
606
607 }
608 mbi->pbuf_cnt = mmd->mmd_pbuf_cnt;
609 mutex_exit(&mmd->mmd_pd_slab_lock);
610 }
611
612 /*
613 * Return the Multidata statistics.
614 */
615 uint_t
mmd_getcnt(multidata_t * mmd,uint_t * hbuf_ref,uint_t * pbuf_ref)616 mmd_getcnt(multidata_t *mmd, uint_t *hbuf_ref, uint_t *pbuf_ref)
617 {
618 uint_t pd_cnt;
619
620 ASSERT(mmd != NULL);
621 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
622
623 mutex_enter(&(mmd->mmd_pd_slab_lock));
624 if (hbuf_ref != NULL)
625 *hbuf_ref = mmd->mmd_hbuf_ref;
626 if (pbuf_ref != NULL)
627 *pbuf_ref = mmd->mmd_pbuf_ref;
628 pd_cnt = mmd->mmd_pd_cnt;
629 mutex_exit(&(mmd->mmd_pd_slab_lock));
630
631 return (pd_cnt);
632 }
633
634 #define HBUF_REF_VALID(mmd, pdi) \
635 ((mmd)->mmd_hbuf != NULL && (pdi)->hdr_rptr != NULL && \
636 (pdi)->hdr_wptr != NULL && (pdi)->hdr_base != NULL && \
637 (pdi)->hdr_lim != NULL && (pdi)->hdr_lim >= (pdi)->hdr_base && \
638 (pdi)->hdr_wptr >= (pdi)->hdr_rptr && \
639 (pdi)->hdr_base <= (pdi)->hdr_rptr && \
640 (pdi)->hdr_lim >= (pdi)->hdr_wptr && \
641 (pdi)->hdr_base >= (mmd)->mmd_hbuf->b_rptr && \
642 MBLKIN((mmd)->mmd_hbuf, \
643 (pdi->hdr_base - (mmd)->mmd_hbuf->b_rptr), \
644 PDESC_HDRSIZE(pdi)))
645
646 /*
647 * Bounds check payload area(s).
648 */
649 static boolean_t
pbuf_ref_valid(multidata_t * mmd,pdescinfo_t * pdi)650 pbuf_ref_valid(multidata_t *mmd, pdescinfo_t *pdi)
651 {
652 int i = 0, idx;
653 boolean_t valid = B_TRUE;
654 struct pld_ary_s *pa;
655
656 mutex_enter(&mmd->mmd_pd_slab_lock);
657 if (pdi->pld_cnt == 0 || pdi->pld_cnt > mmd->mmd_pbuf_cnt) {
658 mutex_exit(&mmd->mmd_pd_slab_lock);
659 return (B_FALSE);
660 }
661
662 pa = &pdi->pld_ary[0];
663 while (valid && i < pdi->pld_cnt) {
664 valid = (((idx = pa->pld_pbuf_idx) < mmd->mmd_pbuf_cnt) &&
665 pa->pld_rptr != NULL && pa->pld_wptr != NULL &&
666 pa->pld_wptr >= pa->pld_rptr &&
667 pa->pld_rptr >= mmd->mmd_pbuf[idx]->b_rptr &&
668 MBLKIN(mmd->mmd_pbuf[idx], (pa->pld_rptr -
669 mmd->mmd_pbuf[idx]->b_rptr),
670 PDESC_PLD_SPAN_SIZE(pdi, i)));
671
672 if (!valid) {
673 MMD_DEBUG((CE_WARN,
674 "pbuf_ref_valid: pdi 0x%p pld out of bound; "
675 "index %d has pld_cnt %d pbuf_idx %d "
676 "(mmd_pbuf_cnt %d), "
677 "pld_rptr 0x%p pld_wptr 0x%p len %d "
678 "(valid 0x%p-0x%p len %d)\n", (void *)pdi,
679 i, pdi->pld_cnt, idx, mmd->mmd_pbuf_cnt,
680 (void *)pa->pld_rptr,
681 (void *)pa->pld_wptr,
682 (int)PDESC_PLD_SPAN_SIZE(pdi, i),
683 (void *)mmd->mmd_pbuf[idx]->b_rptr,
684 (void *)mmd->mmd_pbuf[idx]->b_wptr,
685 (int)MBLKL(mmd->mmd_pbuf[idx])));
686 }
687
688 /* advance to next entry */
689 i++;
690 pa++;
691 }
692
693 mutex_exit(&mmd->mmd_pd_slab_lock);
694 return (valid);
695 }
696
697 /*
698 * Add a packet descriptor to the Multidata.
699 */
700 pdesc_t *
mmd_addpdesc(multidata_t * mmd,pdescinfo_t * pdi,int * err,int kmflags)701 mmd_addpdesc(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
702 {
703 ASSERT(mmd != NULL);
704 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
705 ASSERT(pdi != NULL);
706 ASSERT(pdi->flags & PDESC_HAS_REF);
707
708 /* do the references refer to invalid memory regions? */
709 if (!mmd_speed_over_safety &&
710 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
711 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi)))) {
712 if (err != NULL)
713 *err = EINVAL;
714 return (NULL);
715 }
716
717 return (mmd_addpdesc_int(mmd, pdi, err, kmflags));
718 }
719
720 /*
721 * Internal routine to add a packet descriptor, called when mmd_addpdesc
722 * or mmd_copy tries to allocate and add a descriptor to a Multidata.
723 */
724 static pdesc_t *
mmd_addpdesc_int(multidata_t * mmd,pdescinfo_t * pdi,int * err,int kmflags)725 mmd_addpdesc_int(multidata_t *mmd, pdescinfo_t *pdi, int *err, int kmflags)
726 {
727 pdesc_slab_t *slab, *slab_last;
728 pdesc_t *pd;
729
730 ASSERT(pdi->flags & PDESC_HAS_REF);
731 ASSERT(!(pdi->flags & PDESC_HBUF_REF) || HBUF_REF_VALID(mmd, pdi));
732 ASSERT(!(pdi->flags & PDESC_PBUF_REF) || pbuf_ref_valid(mmd, pdi));
733
734 if (err != NULL)
735 *err = 0;
736
737 mutex_enter(&(mmd->mmd_pd_slab_lock));
738 /*
739 * Is slab list empty or the last-added slab is full? If so,
740 * allocate new slab for the descriptor; otherwise, use the
741 * last-added slab instead.
742 */
743 slab_last = Q2PDSLAB(mmd->mmd_pd_slab_q.ql_prev);
744 if (mmd->mmd_pd_slab_q.ql_next == &(mmd->mmd_pd_slab_q) ||
745 slab_last->pds_used == slab_last->pds_sz) {
746 slab = kmem_cache_alloc(pd_slab_cache, kmflags);
747 if (slab == NULL) {
748 if (err != NULL)
749 *err = ENOMEM;
750 mutex_exit(&(mmd->mmd_pd_slab_lock));
751 return (NULL);
752 }
753 slab->pds_mmd = mmd;
754
755 ASSERT(slab->pds_used == 0);
756 ASSERT(slab->pds_next == NULL && slab->pds_prev == NULL);
757
758 /* insert slab at end of list */
759 insque(&(slab->pds_next), mmd->mmd_pd_slab_q.ql_prev);
760 mmd->mmd_slab_cnt++;
761 } else {
762 slab = slab_last;
763 }
764 ASSERT(slab->pds_used < slab->pds_sz);
765 pd = &(slab->pds_free_desc[slab->pds_used++]);
766 ASSERT(pd->pd_magic == PDESC_MAGIC);
767 pd->pd_next = NULL;
768 pd->pd_prev = NULL;
769 pd->pd_slab = slab;
770 pd->pd_pattbl = NULL;
771
772 /* copy over the descriptor info from caller */
773 PDI_COPY(pdi, &(pd->pd_pdi));
774
775 if (pd->pd_flags & PDESC_HBUF_REF)
776 mmd->mmd_hbuf_ref++;
777 if (pd->pd_flags & PDESC_PBUF_REF)
778 mmd->mmd_pbuf_ref += pd->pd_pdi.pld_cnt;
779 mmd->mmd_pd_cnt++;
780
781 /* insert descriptor at end of list */
782 insque(&(pd->pd_next), mmd->mmd_pd_q.ql_prev);
783 mutex_exit(&(mmd->mmd_pd_slab_lock));
784
785 return (pd);
786 }
787
788 /*
789 * Packet descriptor slab kmem cache constructor routine.
790 */
791 /* ARGSUSED */
792 static int
pdslab_constructor(void * buf,void * cdrarg,int kmflags)793 pdslab_constructor(void *buf, void *cdrarg, int kmflags)
794 {
795 pdesc_slab_t *slab;
796 uint_t cnt = (uint_t)(uintptr_t)cdrarg;
797 int i;
798
799 ASSERT(cnt > 0); /* slab size can't be zero */
800
801 slab = (pdesc_slab_t *)buf;
802 slab->pds_next = NULL;
803 slab->pds_prev = NULL;
804 slab->pds_mmd = NULL;
805 slab->pds_used = 0;
806 slab->pds_sz = cnt;
807
808 for (i = 0; i < cnt; i++) {
809 pdesc_t *pd = &(slab->pds_free_desc[i]);
810 pd->pd_magic = PDESC_MAGIC;
811 }
812 return (0);
813 }
814
815 /*
816 * Packet descriptor slab kmem cache destructor routine.
817 */
818 /* ARGSUSED */
819 static void
pdslab_destructor(void * buf,void * cdrarg)820 pdslab_destructor(void *buf, void *cdrarg)
821 {
822 pdesc_slab_t *slab;
823
824 slab = (pdesc_slab_t *)buf;
825 ASSERT(slab->pds_next == NULL);
826 ASSERT(slab->pds_prev == NULL);
827 ASSERT(slab->pds_mmd == NULL);
828 ASSERT(slab->pds_used == 0);
829 ASSERT(slab->pds_sz > 0);
830 }
831
832 /*
833 * Remove a packet descriptor from the in-use descriptor list,
834 * called by mmd_rempdesc or during free.
835 */
836 static pdesc_t *
mmd_destroy_pdesc(multidata_t * mmd,pdesc_t * pd)837 mmd_destroy_pdesc(multidata_t *mmd, pdesc_t *pd)
838 {
839 pdesc_t *pd_next;
840
841 pd_next = Q2PD(pd->pd_next);
842 remque(&(pd->pd_next));
843
844 /* remove all local attributes */
845 if (pd->pd_pattbl != NULL)
846 mmd_destroy_pattbl(&(pd->pd_pattbl));
847
848 /* don't decrease counts for a removed descriptor */
849 if (!(pd->pd_flags & PDESC_REM_DEFER)) {
850 if (pd->pd_flags & PDESC_HBUF_REF) {
851 ASSERT(mmd->mmd_hbuf_ref > 0);
852 mmd->mmd_hbuf_ref--;
853 }
854 if (pd->pd_flags & PDESC_PBUF_REF) {
855 ASSERT(mmd->mmd_pbuf_ref > 0);
856 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
857 }
858 ASSERT(mmd->mmd_pd_cnt > 0);
859 mmd->mmd_pd_cnt--;
860 }
861 return (pd_next);
862 }
863
864 /*
865 * Remove a packet descriptor from the Multidata.
866 */
867 void
mmd_rempdesc(pdesc_t * pd)868 mmd_rempdesc(pdesc_t *pd)
869 {
870 multidata_t *mmd;
871
872 ASSERT(pd->pd_magic == PDESC_MAGIC);
873 ASSERT(pd->pd_slab != NULL);
874
875 mmd = pd->pd_slab->pds_mmd;
876 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
877
878 mutex_enter(&(mmd->mmd_pd_slab_lock));
879 /*
880 * We can't deallocate the associated resources if the Multidata
881 * is shared with other threads, because it's possible that the
882 * descriptor handle value is held by those threads. That's why
883 * we simply mark the entry as "removed" and decrement the counts.
884 * If there are no other threads, then we free the descriptor.
885 */
886 if (mmd->mmd_dp->db_ref > 1) {
887 pd->pd_flags |= PDESC_REM_DEFER;
888 if (pd->pd_flags & PDESC_HBUF_REF) {
889 ASSERT(mmd->mmd_hbuf_ref > 0);
890 mmd->mmd_hbuf_ref--;
891 }
892 if (pd->pd_flags & PDESC_PBUF_REF) {
893 ASSERT(mmd->mmd_pbuf_ref > 0);
894 mmd->mmd_pbuf_ref -= pd->pd_pdi.pld_cnt;
895 }
896 ASSERT(mmd->mmd_pd_cnt > 0);
897 mmd->mmd_pd_cnt--;
898 } else {
899 (void) mmd_destroy_pdesc(mmd, pd);
900 }
901 mutex_exit(&(mmd->mmd_pd_slab_lock));
902 }
903
904 /*
905 * A generic routine to traverse the packet descriptor in-use list.
906 */
907 static pdesc_t *
mmd_getpdesc(multidata_t * mmd,pdesc_t * pd,pdescinfo_t * pdi,uint_t forw,boolean_t mutex_held)908 mmd_getpdesc(multidata_t *mmd, pdesc_t *pd, pdescinfo_t *pdi, uint_t forw,
909 boolean_t mutex_held)
910 {
911 pdesc_t *pd_head;
912
913 ASSERT(pd == NULL || pd->pd_slab->pds_mmd == mmd);
914 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
915 ASSERT(!mutex_held || MUTEX_HELD(&(mmd->mmd_pd_slab_lock)));
916
917 if (!mutex_held)
918 mutex_enter(&(mmd->mmd_pd_slab_lock));
919 pd_head = Q2PD(&(mmd->mmd_pd_q));
920
921 if (pd == NULL) {
922 /*
923 * We're called by mmd_get{first,last}pdesc, and so
924 * return either the first or last list element.
925 */
926 pd = forw ? Q2PD(mmd->mmd_pd_q.ql_next) :
927 Q2PD(mmd->mmd_pd_q.ql_prev);
928 } else {
929 /*
930 * We're called by mmd_get{next,prev}pdesc, and so
931 * return either the next or previous list element.
932 */
933 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
934 }
935
936 while (pd != pd_head) {
937 /* skip element if it has been removed */
938 if (!(pd->pd_flags & PDESC_REM_DEFER))
939 break;
940 pd = forw ? Q2PD(pd->pd_next) : Q2PD(pd->pd_prev);
941 }
942 if (!mutex_held)
943 mutex_exit(&(mmd->mmd_pd_slab_lock));
944
945 /* return NULL if we're back at the beginning */
946 if (pd == pd_head)
947 pd = NULL;
948
949 /* got an entry; copy descriptor info to caller */
950 if (pd != NULL && pdi != NULL)
951 PDI_COPY(&(pd->pd_pdi), pdi);
952
953 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
954 return (pd);
955
956 }
957
958 /*
959 * Return the first packet descriptor in the in-use list.
960 */
961 pdesc_t *
mmd_getfirstpdesc(multidata_t * mmd,pdescinfo_t * pdi)962 mmd_getfirstpdesc(multidata_t *mmd, pdescinfo_t *pdi)
963 {
964 return (mmd_getpdesc(mmd, NULL, pdi, 1, B_FALSE));
965 }
966
967 /*
968 * Return the last packet descriptor in the in-use list.
969 */
970 pdesc_t *
mmd_getlastpdesc(multidata_t * mmd,pdescinfo_t * pdi)971 mmd_getlastpdesc(multidata_t *mmd, pdescinfo_t *pdi)
972 {
973 return (mmd_getpdesc(mmd, NULL, pdi, 0, B_FALSE));
974 }
975
976 /*
977 * Return the next packet descriptor in the in-use list.
978 */
979 pdesc_t *
mmd_getnextpdesc(pdesc_t * pd,pdescinfo_t * pdi)980 mmd_getnextpdesc(pdesc_t *pd, pdescinfo_t *pdi)
981 {
982 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 1, B_FALSE));
983 }
984
985 /*
986 * Return the previous packet descriptor in the in-use list.
987 */
988 pdesc_t *
mmd_getprevpdesc(pdesc_t * pd,pdescinfo_t * pdi)989 mmd_getprevpdesc(pdesc_t *pd, pdescinfo_t *pdi)
990 {
991 return (mmd_getpdesc(pd->pd_slab->pds_mmd, pd, pdi, 0, B_FALSE));
992 }
993
994 /*
995 * Check to see if pdi stretches over c_pdi; used to ensure that a packet
996 * descriptor's header and payload span may not be extended beyond the
997 * current boundaries.
998 */
999 static boolean_t
pdi_in_range(pdescinfo_t * pdi,pdescinfo_t * c_pdi)1000 pdi_in_range(pdescinfo_t *pdi, pdescinfo_t *c_pdi)
1001 {
1002 int i;
1003 struct pld_ary_s *pa = &pdi->pld_ary[0];
1004 struct pld_ary_s *c_pa = &c_pdi->pld_ary[0];
1005
1006 if (pdi->hdr_base < c_pdi->hdr_base || pdi->hdr_lim > c_pdi->hdr_lim)
1007 return (B_FALSE);
1008
1009 /*
1010 * We don't allow the number of span to be reduced, for the sake
1011 * of simplicity. Instead, we provide PDESC_PLD_SPAN_CLEAR() to
1012 * clear a packet descriptor. Note that we allow the span count to
1013 * be increased, and the bounds check for the new one happens
1014 * in pbuf_ref_valid.
1015 */
1016 if (pdi->pld_cnt < c_pdi->pld_cnt)
1017 return (B_FALSE);
1018
1019 /* compare only those which are currently defined */
1020 for (i = 0; i < c_pdi->pld_cnt; i++, pa++, c_pa++) {
1021 if (pa->pld_pbuf_idx != c_pa->pld_pbuf_idx ||
1022 pa->pld_rptr < c_pa->pld_rptr ||
1023 pa->pld_wptr > c_pa->pld_wptr)
1024 return (B_FALSE);
1025 }
1026 return (B_TRUE);
1027 }
1028
1029 /*
1030 * Modify the layout of a packet descriptor.
1031 */
1032 pdesc_t *
mmd_adjpdesc(pdesc_t * pd,pdescinfo_t * pdi)1033 mmd_adjpdesc(pdesc_t *pd, pdescinfo_t *pdi)
1034 {
1035 multidata_t *mmd;
1036 pdescinfo_t *c_pdi;
1037
1038 ASSERT(pd != NULL);
1039 ASSERT(pdi != NULL);
1040 ASSERT(pd->pd_magic == PDESC_MAGIC);
1041
1042 mmd = pd->pd_slab->pds_mmd;
1043 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1044
1045 /* entry has been removed */
1046 if (pd->pd_flags & PDESC_REM_DEFER)
1047 return (NULL);
1048
1049 /* caller doesn't intend to specify any buffer reference? */
1050 if (!(pdi->flags & PDESC_HAS_REF))
1051 return (NULL);
1052
1053 /* do the references refer to invalid memory regions? */
1054 if (!mmd_speed_over_safety &&
1055 (((pdi->flags & PDESC_HBUF_REF) && !HBUF_REF_VALID(mmd, pdi)) ||
1056 ((pdi->flags & PDESC_PBUF_REF) && !pbuf_ref_valid(mmd, pdi))))
1057 return (NULL);
1058
1059 /* they're not subsets of current references? */
1060 c_pdi = &(pd->pd_pdi);
1061 if (!pdi_in_range(pdi, c_pdi))
1062 return (NULL);
1063
1064 /* copy over the descriptor info from caller */
1065 PDI_COPY(pdi, c_pdi);
1066
1067 return (pd);
1068 }
1069
1070 /*
1071 * Copy the contents of a packet descriptor into a new buffer. If the
1072 * descriptor points to more than one buffer fragments, the contents
1073 * of both fragments will be joined, with the header buffer fragment
1074 * preceding the payload buffer fragment(s).
1075 */
1076 mblk_t *
mmd_transform(pdesc_t * pd)1077 mmd_transform(pdesc_t *pd)
1078 {
1079 multidata_t *mmd;
1080 pdescinfo_t *pdi;
1081 mblk_t *mp;
1082 int h_size = 0, p_size = 0;
1083 int i, len;
1084
1085 ASSERT(pd != NULL);
1086 ASSERT(pd->pd_magic == PDESC_MAGIC);
1087
1088 mmd = pd->pd_slab->pds_mmd;
1089 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1090
1091 /* entry has been removed */
1092 if (pd->pd_flags & PDESC_REM_DEFER)
1093 return (NULL);
1094
1095 mutex_enter(&mmd->mmd_pd_slab_lock);
1096 pdi = &(pd->pd_pdi);
1097 if (pdi->flags & PDESC_HBUF_REF)
1098 h_size = PDESC_HDRL(pdi);
1099 if (pdi->flags & PDESC_PBUF_REF) {
1100 for (i = 0; i < pdi->pld_cnt; i++)
1101 p_size += PDESC_PLD_SPAN_SIZE(pdi, i);
1102 }
1103
1104 /* allocate space large enough to hold the fragment(s) */
1105 ASSERT(h_size + p_size >= 0);
1106 if ((mp = allocb(h_size + p_size, BPRI_HI)) == NULL) {
1107 mutex_exit(&mmd->mmd_pd_slab_lock);
1108 return (NULL);
1109 }
1110
1111 /* copy over the header fragment */
1112 if ((pdi->flags & PDESC_HBUF_REF) && h_size > 0) {
1113 bcopy(pdi->hdr_rptr, mp->b_wptr, h_size);
1114 mp->b_wptr += h_size;
1115 }
1116
1117 /* copy over the payload fragment */
1118 if ((pdi->flags & PDESC_PBUF_REF) && p_size > 0) {
1119 for (i = 0; i < pdi->pld_cnt; i++) {
1120 len = PDESC_PLD_SPAN_SIZE(pdi, i);
1121 if (len > 0) {
1122 bcopy(pdi->pld_ary[i].pld_rptr,
1123 mp->b_wptr, len);
1124 mp->b_wptr += len;
1125 }
1126 }
1127 }
1128
1129 mutex_exit(&mmd->mmd_pd_slab_lock);
1130 return (mp);
1131 }
1132
1133 /*
1134 * Return a chain of mblks representing the Multidata packet.
1135 */
1136 mblk_t *
mmd_transform_link(pdesc_t * pd)1137 mmd_transform_link(pdesc_t *pd)
1138 {
1139 multidata_t *mmd;
1140 pdescinfo_t *pdi;
1141 mblk_t *nmp = NULL;
1142
1143 ASSERT(pd != NULL);
1144 ASSERT(pd->pd_magic == PDESC_MAGIC);
1145
1146 mmd = pd->pd_slab->pds_mmd;
1147 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1148
1149 /* entry has been removed */
1150 if (pd->pd_flags & PDESC_REM_DEFER)
1151 return (NULL);
1152
1153 pdi = &(pd->pd_pdi);
1154
1155 /* duplicate header buffer */
1156 if ((pdi->flags & PDESC_HBUF_REF)) {
1157 if ((nmp = dupb(mmd->mmd_hbuf)) == NULL)
1158 return (NULL);
1159 nmp->b_rptr = pdi->hdr_rptr;
1160 nmp->b_wptr = pdi->hdr_wptr;
1161 }
1162
1163 /* duplicate payload buffer(s) */
1164 if (pdi->flags & PDESC_PBUF_REF) {
1165 int i;
1166 mblk_t *mp;
1167 struct pld_ary_s *pa = &pdi->pld_ary[0];
1168
1169 mutex_enter(&mmd->mmd_pd_slab_lock);
1170 for (i = 0; i < pdi->pld_cnt; i++, pa++) {
1171 ASSERT(mmd->mmd_pbuf[pa->pld_pbuf_idx] != NULL);
1172
1173 /* skip empty ones */
1174 if (PDESC_PLD_SPAN_SIZE(pdi, i) == 0)
1175 continue;
1176
1177 mp = dupb(mmd->mmd_pbuf[pa->pld_pbuf_idx]);
1178 if (mp == NULL) {
1179 if (nmp != NULL)
1180 freemsg(nmp);
1181 mutex_exit(&mmd->mmd_pd_slab_lock);
1182 return (NULL);
1183 }
1184 mp->b_rptr = pa->pld_rptr;
1185 mp->b_wptr = pa->pld_wptr;
1186 if (nmp == NULL)
1187 nmp = mp;
1188 else
1189 linkb(nmp, mp);
1190 }
1191 mutex_exit(&mmd->mmd_pd_slab_lock);
1192 }
1193
1194 return (nmp);
1195 }
1196
1197 /*
1198 * Return duplicate message block(s) of the associated buffer(s).
1199 */
1200 int
mmd_dupbufs(multidata_t * mmd,mblk_t ** hmp,mblk_t ** pmp)1201 mmd_dupbufs(multidata_t *mmd, mblk_t **hmp, mblk_t **pmp)
1202 {
1203 ASSERT(mmd != NULL);
1204 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1205
1206 if (hmp != NULL) {
1207 *hmp = NULL;
1208 if (mmd->mmd_hbuf != NULL &&
1209 (*hmp = dupb(mmd->mmd_hbuf)) == NULL)
1210 return (-1);
1211 }
1212
1213 if (pmp != NULL) {
1214 int i;
1215 mblk_t *mp;
1216
1217 mutex_enter(&mmd->mmd_pd_slab_lock);
1218 *pmp = NULL;
1219 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1220 ASSERT(mmd->mmd_pbuf[i] != NULL);
1221 mp = dupb(mmd->mmd_pbuf[i]);
1222 if (mp == NULL) {
1223 if (hmp != NULL && *hmp != NULL)
1224 freeb(*hmp);
1225 if (*pmp != NULL)
1226 freemsg(*pmp);
1227 mutex_exit(&mmd->mmd_pd_slab_lock);
1228 return (-1);
1229 }
1230 if (*pmp == NULL)
1231 *pmp = mp;
1232 else
1233 linkb(*pmp, mp);
1234 }
1235 mutex_exit(&mmd->mmd_pd_slab_lock);
1236 }
1237
1238 return (0);
1239 }
1240
1241 /*
1242 * Return the layout of a packet descriptor.
1243 */
1244 int
mmd_getpdescinfo(pdesc_t * pd,pdescinfo_t * pdi)1245 mmd_getpdescinfo(pdesc_t *pd, pdescinfo_t *pdi)
1246 {
1247 ASSERT(pd != NULL);
1248 ASSERT(pd->pd_magic == PDESC_MAGIC);
1249 ASSERT(pd->pd_slab != NULL);
1250 ASSERT(pd->pd_slab->pds_mmd->mmd_magic == MULTIDATA_MAGIC);
1251 ASSERT(pdi != NULL);
1252
1253 /* entry has been removed */
1254 if (pd->pd_flags & PDESC_REM_DEFER)
1255 return (-1);
1256
1257 /* copy descriptor info to caller */
1258 PDI_COPY(&(pd->pd_pdi), pdi);
1259
1260 return (0);
1261 }
1262
1263 /*
1264 * Add a global or local attribute to a Multidata. Global attribute
1265 * association is specified by a NULL packet descriptor.
1266 */
1267 pattr_t *
mmd_addpattr(multidata_t * mmd,pdesc_t * pd,pattrinfo_t * pai,boolean_t persistent,int kmflags)1268 mmd_addpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai,
1269 boolean_t persistent, int kmflags)
1270 {
1271 patbkt_t **tbl_p;
1272 patbkt_t *tbl, *o_tbl;
1273 patbkt_t *bkt;
1274 pattr_t *pa;
1275 uint_t size;
1276
1277 ASSERT(mmd != NULL);
1278 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1279 ASSERT(pd == NULL || pd->pd_magic == PDESC_MAGIC);
1280 ASSERT(pai != NULL);
1281
1282 /* pointer to the attribute hash table (local or global) */
1283 tbl_p = pd != NULL ? &(pd->pd_pattbl) : &(mmd->mmd_pattbl);
1284
1285 /*
1286 * See if the hash table has not yet been created; if so,
1287 * we create the table and store its address atomically.
1288 */
1289 if ((tbl = *tbl_p) == NULL) {
1290 tbl = kmem_cache_alloc(pattbl_cache, kmflags);
1291 if (tbl == NULL)
1292 return (NULL);
1293
1294 /* if someone got there first, use his table instead */
1295 if ((o_tbl = atomic_cas_ptr(tbl_p, NULL, tbl)) != NULL) {
1296 kmem_cache_free(pattbl_cache, tbl);
1297 tbl = o_tbl;
1298 }
1299 }
1300
1301 ASSERT(tbl->pbkt_tbl_sz > 0);
1302 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1303
1304 /* attribute of the same type already exists? */
1305 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL)
1306 return (NULL);
1307
1308 size = sizeof (*pa) + pai->len;
1309 if ((pa = kmem_zalloc(size, kmflags)) == NULL)
1310 return (NULL);
1311
1312 pa->pat_magic = PATTR_MAGIC;
1313 pa->pat_lock = &(bkt->pbkt_lock);
1314 pa->pat_mmd = mmd;
1315 pa->pat_buflen = size;
1316 pa->pat_type = pai->type;
1317 pai->buf = pai->len > 0 ? ((uchar_t *)(pa + 1)) : NULL;
1318
1319 if (persistent)
1320 pa->pat_flags = PATTR_PERSIST;
1321
1322 /* insert attribute at end of hash chain */
1323 mutex_enter(&(bkt->pbkt_lock));
1324 insque(&(pa->pat_next), bkt->pbkt_pattr_q.ql_prev);
1325 mutex_exit(&(bkt->pbkt_lock));
1326
1327 return (pa);
1328 }
1329
1330 /*
1331 * Attribute hash table kmem cache constructor routine.
1332 */
1333 /* ARGSUSED */
1334 static int
pattbl_constructor(void * buf,void * cdrarg,int kmflags)1335 pattbl_constructor(void *buf, void *cdrarg, int kmflags)
1336 {
1337 patbkt_t *bkt;
1338 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1339 uint_t i;
1340
1341 ASSERT(tbl_sz > 0); /* table size can't be zero */
1342
1343 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1344 mutex_init(&(bkt->pbkt_lock), NULL, MUTEX_DRIVER, NULL);
1345 QL_INIT(&(bkt->pbkt_pattr_q));
1346
1347 /* first bucket contains the table size */
1348 bkt->pbkt_tbl_sz = i == 0 ? tbl_sz : 0;
1349 }
1350 return (0);
1351 }
1352
1353 /*
1354 * Attribute hash table kmem cache destructor routine.
1355 */
1356 /* ARGSUSED */
1357 static void
pattbl_destructor(void * buf,void * cdrarg)1358 pattbl_destructor(void *buf, void *cdrarg)
1359 {
1360 patbkt_t *bkt;
1361 uint_t tbl_sz = (uint_t)(uintptr_t)cdrarg;
1362 uint_t i;
1363
1364 ASSERT(tbl_sz > 0); /* table size can't be zero */
1365
1366 for (i = 0, bkt = (patbkt_t *)buf; i < tbl_sz; i++, bkt++) {
1367 mutex_destroy(&(bkt->pbkt_lock));
1368 ASSERT(bkt->pbkt_pattr_q.ql_next == &(bkt->pbkt_pattr_q));
1369 ASSERT(i > 0 || bkt->pbkt_tbl_sz == tbl_sz);
1370 }
1371 }
1372
1373 /*
1374 * Destroy an attribute hash table, called by mmd_rempdesc or during free.
1375 */
1376 static void
mmd_destroy_pattbl(patbkt_t ** tbl)1377 mmd_destroy_pattbl(patbkt_t **tbl)
1378 {
1379 patbkt_t *bkt;
1380 pattr_t *pa, *pa_next;
1381 uint_t i, tbl_sz;
1382
1383 ASSERT(tbl != NULL);
1384 bkt = *tbl;
1385 tbl_sz = bkt->pbkt_tbl_sz;
1386
1387 /* make sure caller passes in the first bucket */
1388 ASSERT(tbl_sz > 0);
1389
1390 /* destroy the contents of each bucket */
1391 for (i = 0; i < tbl_sz; i++, bkt++) {
1392 /* we ought to be exclusive at this point */
1393 ASSERT(MUTEX_NOT_HELD(&(bkt->pbkt_lock)));
1394
1395 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1396 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1397 ASSERT(pa->pat_magic == PATTR_MAGIC);
1398 pa_next = Q2PATTR(pa->pat_next);
1399 remque(&(pa->pat_next));
1400 kmem_free(pa, pa->pat_buflen);
1401 pa = pa_next;
1402 }
1403 }
1404
1405 kmem_cache_free(pattbl_cache, *tbl);
1406 *tbl = NULL;
1407
1408 /* commit all previous stores */
1409 membar_producer();
1410 }
1411
1412 /*
1413 * Copy the contents of an attribute hash table, called by mmd_copy.
1414 */
1415 static int
mmd_copy_pattbl(patbkt_t * src_tbl,multidata_t * n_mmd,pdesc_t * n_pd,int kmflags)1416 mmd_copy_pattbl(patbkt_t *src_tbl, multidata_t *n_mmd, pdesc_t *n_pd,
1417 int kmflags)
1418 {
1419 patbkt_t *bkt;
1420 pattr_t *pa;
1421 pattrinfo_t pai;
1422 uint_t i, tbl_sz;
1423
1424 ASSERT(src_tbl != NULL);
1425 bkt = src_tbl;
1426 tbl_sz = bkt->pbkt_tbl_sz;
1427
1428 /* make sure caller passes in the first bucket */
1429 ASSERT(tbl_sz > 0);
1430
1431 for (i = 0; i < tbl_sz; i++, bkt++) {
1432 mutex_enter(&(bkt->pbkt_lock));
1433 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1434 while (pa != Q2PATTR(&(bkt->pbkt_pattr_q))) {
1435 pattr_t *pa_next = Q2PATTR(pa->pat_next);
1436
1437 /* skip if it's removed */
1438 if (pa->pat_flags & PATTR_REM_DEFER) {
1439 pa = pa_next;
1440 continue;
1441 }
1442
1443 pai.type = pa->pat_type;
1444 pai.len = pa->pat_buflen - sizeof (*pa);
1445 if (mmd_addpattr(n_mmd, n_pd, &pai, (pa->pat_flags &
1446 PATTR_PERSIST) != 0, kmflags) == NULL) {
1447 mutex_exit(&(bkt->pbkt_lock));
1448 return (-1);
1449 }
1450
1451 /* copy over the contents */
1452 if (pai.buf != NULL)
1453 bcopy(pa + 1, pai.buf, pai.len);
1454
1455 pa = pa_next;
1456 }
1457 mutex_exit(&(bkt->pbkt_lock));
1458 }
1459
1460 return (0);
1461 }
1462
1463 /*
1464 * Search for an attribute type within an attribute hash bucket.
1465 */
1466 static pattr_t *
mmd_find_pattr(patbkt_t * bkt,uint_t type)1467 mmd_find_pattr(patbkt_t *bkt, uint_t type)
1468 {
1469 pattr_t *pa_head, *pa;
1470
1471 mutex_enter(&(bkt->pbkt_lock));
1472 pa_head = Q2PATTR(&(bkt->pbkt_pattr_q));
1473 pa = Q2PATTR(bkt->pbkt_pattr_q.ql_next);
1474
1475 while (pa != pa_head) {
1476 ASSERT(pa->pat_magic == PATTR_MAGIC);
1477
1478 /* return a match; we treat removed entry as non-existent */
1479 if (pa->pat_type == type && !(pa->pat_flags & PATTR_REM_DEFER))
1480 break;
1481 pa = Q2PATTR(pa->pat_next);
1482 }
1483 mutex_exit(&(bkt->pbkt_lock));
1484
1485 return (pa == pa_head ? NULL : pa);
1486 }
1487
1488 /*
1489 * Remove an attribute from a Multidata.
1490 */
1491 void
mmd_rempattr(pattr_t * pa)1492 mmd_rempattr(pattr_t *pa)
1493 {
1494 kmutex_t *pat_lock = pa->pat_lock;
1495
1496 ASSERT(pa->pat_magic == PATTR_MAGIC);
1497
1498 /* ignore if attribute was marked as persistent */
1499 if ((pa->pat_flags & PATTR_PERSIST) != 0)
1500 return;
1501
1502 mutex_enter(pat_lock);
1503 /*
1504 * We can't deallocate the associated resources if the Multidata
1505 * is shared with other threads, because it's possible that the
1506 * attribute handle value is held by those threads. That's why
1507 * we simply mark the entry as "removed". If there are no other
1508 * threads, then we free the attribute.
1509 */
1510 if (pa->pat_mmd->mmd_dp->db_ref > 1) {
1511 pa->pat_flags |= PATTR_REM_DEFER;
1512 } else {
1513 remque(&(pa->pat_next));
1514 kmem_free(pa, pa->pat_buflen);
1515 }
1516 mutex_exit(pat_lock);
1517 }
1518
1519 /*
1520 * Find an attribute (according to its type) and return its handle.
1521 */
1522 pattr_t *
mmd_getpattr(multidata_t * mmd,pdesc_t * pd,pattrinfo_t * pai)1523 mmd_getpattr(multidata_t *mmd, pdesc_t *pd, pattrinfo_t *pai)
1524 {
1525 patbkt_t *tbl, *bkt;
1526 pattr_t *pa;
1527
1528 ASSERT(mmd != NULL);
1529 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1530 ASSERT(pai != NULL);
1531
1532 /* get the right attribute hash table (local or global) */
1533 tbl = pd != NULL ? pd->pd_pattbl : mmd->mmd_pattbl;
1534
1535 /* attribute hash table doesn't exist? */
1536 if (tbl == NULL)
1537 return (NULL);
1538
1539 ASSERT(tbl->pbkt_tbl_sz > 0);
1540 bkt = &(tbl[PATTBL_HASH(pai->type, tbl->pbkt_tbl_sz)]);
1541
1542 if ((pa = mmd_find_pattr(bkt, pai->type)) != NULL) {
1543 ASSERT(pa->pat_buflen >= sizeof (*pa));
1544 pai->len = pa->pat_buflen - sizeof (*pa);
1545 pai->buf = pai->len > 0 ?
1546 (uchar_t *)pa + sizeof (pattr_t) : NULL;
1547 }
1548 ASSERT(pa == NULL || pa->pat_magic == PATTR_MAGIC);
1549 return (pa);
1550 }
1551
1552 /*
1553 * Return total size of buffers and total size of areas referenced
1554 * by all in-use (unremoved) packet descriptors.
1555 */
1556 void
mmd_getsize(multidata_t * mmd,uint_t * ptotal,uint_t * pinuse)1557 mmd_getsize(multidata_t *mmd, uint_t *ptotal, uint_t *pinuse)
1558 {
1559 pdesc_t *pd;
1560 pdescinfo_t *pdi;
1561 int i;
1562
1563 ASSERT(mmd != NULL);
1564 ASSERT(mmd->mmd_magic == MULTIDATA_MAGIC);
1565
1566 mutex_enter(&mmd->mmd_pd_slab_lock);
1567 if (ptotal != NULL) {
1568 *ptotal = 0;
1569
1570 if (mmd->mmd_hbuf != NULL)
1571 *ptotal += MBLKL(mmd->mmd_hbuf);
1572
1573 for (i = 0; i < mmd->mmd_pbuf_cnt; i++) {
1574 ASSERT(mmd->mmd_pbuf[i] != NULL);
1575 *ptotal += MBLKL(mmd->mmd_pbuf[i]);
1576 }
1577 }
1578 if (pinuse != NULL) {
1579 *pinuse = 0;
1580
1581 /* first pdesc */
1582 pd = mmd_getpdesc(mmd, NULL, NULL, 1, B_TRUE);
1583 while (pd != NULL) {
1584 pdi = &pd->pd_pdi;
1585
1586 /* next pdesc */
1587 pd = mmd_getpdesc(mmd, pd, NULL, 1, B_TRUE);
1588
1589 /* skip over removed descriptor */
1590 if (pdi->flags & PDESC_REM_DEFER)
1591 continue;
1592
1593 if (pdi->flags & PDESC_HBUF_REF)
1594 *pinuse += PDESC_HDRL(pdi);
1595
1596 if (pdi->flags & PDESC_PBUF_REF) {
1597 for (i = 0; i < pdi->pld_cnt; i++)
1598 *pinuse += PDESC_PLDL(pdi, i);
1599 }
1600 }
1601 }
1602 mutex_exit(&mmd->mmd_pd_slab_lock);
1603 }
1604