1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 * 28 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 29 * Copyright 2022 Garrett D'Amore 30 */ 31 32 #include <sys/types.h> 33 #include <sys/param.h> 34 #include <sys/thread.h> 35 #include <sys/sysmacros.h> 36 #include <sys/stropts.h> 37 #include <sys/stream.h> 38 #include <sys/strsubr.h> 39 #include <sys/strsun.h> 40 #include <sys/conf.h> 41 #include <sys/debug.h> 42 #include <sys/cmn_err.h> 43 #include <sys/kmem.h> 44 #include <sys/atomic.h> 45 #include <sys/errno.h> 46 #include <sys/vtrace.h> 47 #include <sys/ftrace.h> 48 #include <sys/ontrap.h> 49 #include <sys/sdt.h> 50 #include <sys/strft.h> 51 52 /* 53 * This file contains selected functions from io/stream.c 54 * needed by this library, mostly unmodified. 55 */ 56 57 /* 58 * STREAMS message allocator: principles of operation 59 * (See usr/src/uts/common/io/stream.c) 60 */ 61 #define DBLK_MAX_CACHE 73728 62 #define DBLK_CACHE_ALIGN 64 63 #define DBLK_MIN_SIZE 8 64 #define DBLK_SIZE_SHIFT 3 65 66 #ifdef _BIG_ENDIAN 67 #define DBLK_RTFU_SHIFT(field) \ 68 (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field)) 69 #else 70 #define DBLK_RTFU_SHIFT(field) \ 71 (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref)) 72 #endif 73 74 #define DBLK_RTFU(ref, type, flags, uioflag) \ 75 (((ref) << DBLK_RTFU_SHIFT(db_ref)) | \ 76 ((type) << DBLK_RTFU_SHIFT(db_type)) | \ 77 (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \ 78 ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag))) 79 #define DBLK_RTFU_REF_MASK (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref)) 80 #define DBLK_RTFU_WORD(dbp) (*((uint32_t *)&(dbp)->db_ref)) 81 #define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band)) 82 83 static size_t dblk_sizes[] = { 84 #ifdef _LP64 85 16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856, 86 8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624, 87 40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392, 88 #else 89 64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904, 90 8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672, 91 40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440, 92 #endif 93 DBLK_MAX_CACHE, 0 94 }; 95 96 static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE]; 97 static struct kmem_cache *mblk_cache; 98 static struct kmem_cache *dblk_esb_cache; 99 100 static void dblk_lastfree(mblk_t *mp, dblk_t *dbp); 101 static mblk_t *allocb_oversize(size_t size, int flags); 102 static int allocb_tryhard_fails; 103 static void frnop_func(void *arg); 104 frtn_t frnop = { frnop_func }; 105 static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp); 106 107 /* 108 * Patchable mblk/dblk kmem_cache flags. 109 */ 110 int dblk_kmem_flags = 0; 111 int mblk_kmem_flags = 0; 112 113 static int 114 dblk_constructor(void *buf, void *cdrarg, int kmflags) 115 { 116 dblk_t *dbp = buf; 117 ssize_t msg_size = (ssize_t)cdrarg; 118 size_t index; 119 120 ASSERT(msg_size != 0); 121 122 index = (msg_size - 1) >> DBLK_SIZE_SHIFT; 123 124 ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)); 125 126 if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 127 return (-1); 128 if ((msg_size & PAGEOFFSET) == 0) { 129 dbp->db_base = kmem_alloc(msg_size, kmflags); 130 if (dbp->db_base == NULL) { 131 kmem_cache_free(mblk_cache, dbp->db_mblk); 132 return (-1); 133 } 134 } else { 135 dbp->db_base = (unsigned char *)&dbp[1]; 136 } 137 138 dbp->db_mblk->b_datap = dbp; 139 dbp->db_cache = dblk_cache[index]; 140 dbp->db_lim = dbp->db_base + msg_size; 141 dbp->db_free = dbp->db_lastfree = dblk_lastfree; 142 dbp->db_frtnp = NULL; 143 dbp->db_fthdr = NULL; 144 dbp->db_credp = NULL; 145 dbp->db_cpid = -1; 146 dbp->db_struioflag = 0; 147 dbp->db_struioun.cksum.flags = 0; 148 return (0); 149 } 150 151 /*ARGSUSED*/ 152 static int 153 dblk_esb_constructor(void *buf, void *cdrarg, int kmflags) 154 { 155 dblk_t *dbp = buf; 156 157 if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 158 return (-1); 159 dbp->db_mblk->b_datap = dbp; 160 dbp->db_cache = dblk_esb_cache; 161 dbp->db_fthdr = NULL; 162 dbp->db_credp = NULL; 163 dbp->db_cpid = -1; 164 dbp->db_struioflag = 0; 165 dbp->db_struioun.cksum.flags = 0; 166 return (0); 167 } 168 169 static int 170 bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags) 171 { 172 dblk_t *dbp = buf; 173 bcache_t *bcp = cdrarg; 174 175 if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 176 return (-1); 177 178 dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags); 179 if (dbp->db_base == NULL) { 180 kmem_cache_free(mblk_cache, dbp->db_mblk); 181 return (-1); 182 } 183 184 dbp->db_mblk->b_datap = dbp; 185 dbp->db_cache = (void *)bcp; 186 dbp->db_lim = dbp->db_base + bcp->size; 187 dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree; 188 dbp->db_frtnp = NULL; 189 dbp->db_fthdr = NULL; 190 dbp->db_credp = NULL; 191 dbp->db_cpid = -1; 192 dbp->db_struioflag = 0; 193 dbp->db_struioun.cksum.flags = 0; 194 return (0); 195 } 196 197 /*ARGSUSED*/ 198 static void 199 dblk_destructor(void *buf, void *cdrarg) 200 { 201 dblk_t *dbp = buf; 202 ssize_t msg_size = (ssize_t)cdrarg; 203 204 ASSERT(dbp->db_mblk->b_datap == dbp); 205 ASSERT(msg_size != 0); 206 ASSERT(dbp->db_struioflag == 0); 207 ASSERT(dbp->db_struioun.cksum.flags == 0); 208 209 if ((msg_size & PAGEOFFSET) == 0) { 210 kmem_free(dbp->db_base, msg_size); 211 } 212 213 kmem_cache_free(mblk_cache, dbp->db_mblk); 214 } 215 216 static void 217 bcache_dblk_destructor(void *buf, void *cdrarg) 218 { 219 dblk_t *dbp = buf; 220 bcache_t *bcp = cdrarg; 221 222 kmem_cache_free(bcp->buffer_cache, dbp->db_base); 223 224 ASSERT(dbp->db_mblk->b_datap == dbp); 225 ASSERT(dbp->db_struioflag == 0); 226 ASSERT(dbp->db_struioun.cksum.flags == 0); 227 228 kmem_cache_free(mblk_cache, dbp->db_mblk); 229 } 230 231 /* Needed in the ASSERT below */ 232 #ifdef DEBUG 233 #ifdef _KERNEL 234 #define KMEM_SLAB_T_SZ sizeof (kmem_slab_t) 235 #else /* _KERNEL */ 236 #define KMEM_SLAB_T_SZ 64 /* fakekernel */ 237 #endif /* _KERNEL */ 238 #endif /* DEBUG */ 239 240 void 241 streams_msg_init(void) 242 { 243 char name[40]; 244 size_t size; 245 size_t lastsize = DBLK_MIN_SIZE; 246 size_t *sizep; 247 struct kmem_cache *cp; 248 size_t tot_size; 249 int offset; 250 251 mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32, 252 NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags); 253 254 for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) { 255 256 if ((offset = (size & PAGEOFFSET)) != 0) { 257 /* 258 * We are in the middle of a page, dblk should 259 * be allocated on the same page 260 */ 261 tot_size = size + sizeof (dblk_t); 262 ASSERT((offset + sizeof (dblk_t) + KMEM_SLAB_T_SZ) 263 < PAGESIZE); 264 ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0); 265 266 } else { 267 268 /* 269 * buf size is multiple of page size, dblk and 270 * buffer are allocated separately. 271 */ 272 273 ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0); 274 tot_size = sizeof (dblk_t); 275 } 276 277 (void) sprintf(name, "streams_dblk_%ld", (long)size); 278 cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN, 279 dblk_constructor, dblk_destructor, NULL, (void *)(size), 280 NULL, dblk_kmem_flags); 281 282 while (lastsize <= size) { 283 dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp; 284 lastsize += DBLK_MIN_SIZE; 285 } 286 } 287 288 dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t), 289 DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL, 290 (void *)sizeof (dblk_t), NULL, dblk_kmem_flags); 291 292 /* fthdr_cache, ftblk_cache, ... */ 293 } 294 295 /*ARGSUSED*/ 296 mblk_t * 297 allocb(size_t size, uint_t pri) 298 { 299 dblk_t *dbp; 300 mblk_t *mp; 301 size_t index; 302 303 index = (size - 1) >> DBLK_SIZE_SHIFT; 304 305 if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 306 if (size != 0) { 307 mp = allocb_oversize(size, KM_NOSLEEP); 308 goto out; 309 } 310 index = 0; 311 } 312 313 if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) { 314 mp = NULL; 315 goto out; 316 } 317 318 mp = dbp->db_mblk; 319 DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 320 mp->b_next = mp->b_prev = mp->b_cont = NULL; 321 mp->b_rptr = mp->b_wptr = dbp->db_base; 322 mp->b_queue = NULL; 323 MBLK_BAND_FLAG_WORD(mp) = 0; 324 STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size); 325 out: 326 FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp); 327 328 return (mp); 329 } 330 331 /* 332 * Allocate an mblk taking db_credp and db_cpid from the template. 333 * Allow the cred to be NULL. 334 */ 335 mblk_t * 336 allocb_tmpl(size_t size, const mblk_t *tmpl) 337 { 338 mblk_t *mp = allocb(size, 0); 339 340 if (mp != NULL) { 341 dblk_t *src = tmpl->b_datap; 342 dblk_t *dst = mp->b_datap; 343 cred_t *cr; 344 pid_t cpid; 345 346 cr = msg_getcred(tmpl, &cpid); 347 if (cr != NULL) 348 crhold(dst->db_credp = cr); 349 dst->db_cpid = cpid; 350 dst->db_type = src->db_type; 351 } 352 return (mp); 353 } 354 355 mblk_t * 356 allocb_cred(size_t size, cred_t *cr, pid_t cpid) 357 { 358 mblk_t *mp = allocb(size, 0); 359 360 ASSERT(cr != NULL); 361 if (mp != NULL) { 362 dblk_t *dbp = mp->b_datap; 363 364 crhold(dbp->db_credp = cr); 365 dbp->db_cpid = cpid; 366 } 367 return (mp); 368 } 369 370 mblk_t * 371 allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid) 372 { 373 mblk_t *mp = allocb_wait(size, 0, flags, error); 374 375 ASSERT(cr != NULL); 376 if (mp != NULL) { 377 dblk_t *dbp = mp->b_datap; 378 379 crhold(dbp->db_credp = cr); 380 dbp->db_cpid = cpid; 381 } 382 383 return (mp); 384 } 385 386 /* 387 * Extract the db_cred (and optionally db_cpid) from a message. 388 * We find the first mblk which has a non-NULL db_cred and use that. 389 * If none found we return NULL. 390 * Does NOT get a hold on the cred. 391 */ 392 cred_t * 393 msg_getcred(const mblk_t *mp, pid_t *cpidp) 394 { 395 cred_t *cr = NULL; 396 397 while (mp != NULL) { 398 dblk_t *dbp = mp->b_datap; 399 400 cr = dbp->db_credp; 401 if (cr == NULL) { 402 mp = mp->b_cont; 403 continue; 404 } 405 if (cpidp != NULL) 406 *cpidp = dbp->db_cpid; 407 408 /* DEBUG check for only one db_credp */ 409 return (cr); 410 } 411 if (cpidp != NULL) 412 *cpidp = NOPID; 413 return (NULL); 414 } 415 416 /* 417 * Variant of msg_getcred which, when a cred is found 418 * 1. Returns with a hold on the cred 419 * 2. Clears the first cred in the mblk. 420 * This is more efficient to use than a msg_getcred() + crhold() when 421 * the message is freed after the cred has been extracted. 422 * 423 * The caller is responsible for ensuring that there is no other reference 424 * on the message since db_credp can not be cleared when there are other 425 * references. 426 */ 427 cred_t * 428 msg_extractcred(mblk_t *mp, pid_t *cpidp) 429 { 430 cred_t *cr = NULL; 431 432 while (mp != NULL) { 433 dblk_t *dbp = mp->b_datap; 434 435 cr = dbp->db_credp; 436 if (cr == NULL) { 437 mp = mp->b_cont; 438 continue; 439 } 440 ASSERT(dbp->db_ref == 1); 441 dbp->db_credp = NULL; 442 if (cpidp != NULL) 443 *cpidp = dbp->db_cpid; 444 445 /* DEBUG check for only one db_credp */ 446 return (cr); 447 } 448 return (NULL); 449 } 450 451 /* _KERNEL msg_getlabel() */ 452 453 void 454 freeb(mblk_t *mp) 455 { 456 dblk_t *dbp = mp->b_datap; 457 458 ASSERT(dbp->db_ref > 0); 459 ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 460 FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp); 461 462 STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 463 464 dbp->db_free(mp, dbp); 465 } 466 467 void 468 freemsg(mblk_t *mp) 469 { 470 FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp); 471 while (mp) { 472 dblk_t *dbp = mp->b_datap; 473 mblk_t *mp_cont = mp->b_cont; 474 475 ASSERT(dbp->db_ref > 0); 476 ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 477 478 STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 479 480 dbp->db_free(mp, dbp); 481 mp = mp_cont; 482 } 483 } 484 485 /* 486 * Reallocate a block for another use. Try hard to use the old block. 487 * If the old data is wanted (copy), leave b_wptr at the end of the data, 488 * otherwise return b_wptr = b_rptr. 489 * 490 * This routine is private and unstable. 491 */ 492 mblk_t * 493 reallocb(mblk_t *mp, size_t size, uint_t copy) 494 { 495 mblk_t *mp1; 496 unsigned char *old_rptr; 497 ptrdiff_t cur_size; 498 499 if (mp == NULL) 500 return (allocb(size, BPRI_HI)); 501 502 cur_size = mp->b_wptr - mp->b_rptr; 503 old_rptr = mp->b_rptr; 504 505 ASSERT(mp->b_datap->db_ref != 0); 506 507 if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) { 508 /* 509 * If the data is wanted and it will fit where it is, no 510 * work is required. 511 */ 512 if (copy && mp->b_datap->db_lim - mp->b_rptr >= size) 513 return (mp); 514 515 mp->b_wptr = mp->b_rptr = mp->b_datap->db_base; 516 mp1 = mp; 517 } else if ((mp1 = allocb_tmpl(size, mp)) != NULL) { 518 /* XXX other mp state could be copied too, db_flags ... ? */ 519 mp1->b_cont = mp->b_cont; 520 } else { 521 return (NULL); 522 } 523 524 if (copy) { 525 bcopy(old_rptr, mp1->b_rptr, cur_size); 526 mp1->b_wptr = mp1->b_rptr + cur_size; 527 } 528 529 if (mp != mp1) 530 freeb(mp); 531 532 return (mp1); 533 } 534 535 static void 536 dblk_lastfree(mblk_t *mp, dblk_t *dbp) 537 { 538 ASSERT(dbp->db_mblk == mp); 539 if (dbp->db_fthdr != NULL) 540 str_ftfree(dbp); 541 542 /* set credp and projid to be 'unspecified' before returning to cache */ 543 if (dbp->db_credp != NULL) { 544 crfree(dbp->db_credp); 545 dbp->db_credp = NULL; 546 } 547 dbp->db_cpid = -1; 548 549 /* Reset the struioflag and the checksum flag fields */ 550 dbp->db_struioflag = 0; 551 dbp->db_struioun.cksum.flags = 0; 552 553 /* and the COOKED and/or UIOA flag(s) */ 554 dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA); 555 556 kmem_cache_free(dbp->db_cache, dbp); 557 } 558 559 static void 560 dblk_decref(mblk_t *mp, dblk_t *dbp) 561 { 562 if (dbp->db_ref != 1) { 563 uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp), 564 -(1 << DBLK_RTFU_SHIFT(db_ref))); 565 /* 566 * atomic_add_32_nv() just decremented db_ref, so we no longer 567 * have a reference to the dblk, which means another thread 568 * could free it. Therefore we cannot examine the dblk to 569 * determine whether ours was the last reference. Instead, 570 * we extract the new and minimum reference counts from rtfu. 571 * Note that all we're really saying is "if (ref != refmin)". 572 */ 573 if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) != 574 ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) { 575 kmem_cache_free(mblk_cache, mp); 576 return; 577 } 578 } 579 dbp->db_mblk = mp; 580 dbp->db_free = dbp->db_lastfree; 581 dbp->db_lastfree(mp, dbp); 582 } 583 584 mblk_t * 585 dupb(mblk_t *mp) 586 { 587 dblk_t *dbp = mp->b_datap; 588 mblk_t *new_mp; 589 uint32_t oldrtfu, newrtfu; 590 591 if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL) 592 goto out; 593 594 new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL; 595 new_mp->b_rptr = mp->b_rptr; 596 new_mp->b_wptr = mp->b_wptr; 597 new_mp->b_datap = dbp; 598 new_mp->b_queue = NULL; 599 MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp); 600 601 STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref); 602 603 dbp->db_free = dblk_decref; 604 do { 605 ASSERT(dbp->db_ref > 0); 606 oldrtfu = DBLK_RTFU_WORD(dbp); 607 newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref)); 608 /* 609 * If db_ref is maxed out we can't dup this message anymore. 610 */ 611 if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) { 612 kmem_cache_free(mblk_cache, new_mp); 613 new_mp = NULL; 614 goto out; 615 } 616 } while (atomic_cas_32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != 617 oldrtfu); 618 619 out: 620 FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp); 621 return (new_mp); 622 } 623 624 /*ARGSUSED*/ 625 static void 626 frnop_func(void *arg) 627 { 628 } 629 630 /* 631 * Generic esballoc used to implement the four flavors: [d]esballoc[a]. 632 * and allocb_oversize 633 */ 634 static mblk_t * 635 gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp, 636 void (*lastfree)(mblk_t *, dblk_t *), int kmflags) 637 { 638 dblk_t *dbp; 639 mblk_t *mp; 640 641 ASSERT(base != NULL && frp != NULL); 642 643 if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) { 644 mp = NULL; 645 goto out; 646 } 647 648 mp = dbp->db_mblk; 649 dbp->db_base = base; 650 dbp->db_lim = base + size; 651 dbp->db_free = dbp->db_lastfree = lastfree; 652 dbp->db_frtnp = frp; 653 DBLK_RTFU_WORD(dbp) = db_rtfu; 654 mp->b_next = mp->b_prev = mp->b_cont = NULL; 655 mp->b_rptr = mp->b_wptr = base; 656 mp->b_queue = NULL; 657 MBLK_BAND_FLAG_WORD(mp) = 0; 658 659 out: 660 FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp); 661 return (mp); 662 } 663 664 static void 665 bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp) 666 { 667 bcache_t *bcp = dbp->db_cache; 668 669 ASSERT(dbp->db_mblk == mp); 670 if (dbp->db_fthdr != NULL) 671 str_ftfree(dbp); 672 673 /* set credp and projid to be 'unspecified' before returning to cache */ 674 if (dbp->db_credp != NULL) { 675 crfree(dbp->db_credp); 676 dbp->db_credp = NULL; 677 } 678 dbp->db_cpid = -1; 679 dbp->db_struioflag = 0; 680 dbp->db_struioun.cksum.flags = 0; 681 682 mutex_enter(&bcp->mutex); 683 kmem_cache_free(bcp->dblk_cache, dbp); 684 bcp->alloc--; 685 686 if (bcp->alloc == 0 && bcp->destroy != 0) { 687 kmem_cache_destroy(bcp->dblk_cache); 688 kmem_cache_destroy(bcp->buffer_cache); 689 mutex_exit(&bcp->mutex); 690 mutex_destroy(&bcp->mutex); 691 kmem_free(bcp, sizeof (bcache_t)); 692 } else { 693 mutex_exit(&bcp->mutex); 694 } 695 } 696 697 bcache_t * 698 bcache_create(char *name, size_t size, uint_t align) 699 { 700 bcache_t *bcp; 701 char buffer[255]; 702 703 ASSERT((align & (align - 1)) == 0); 704 705 if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL) 706 return (NULL); 707 708 bcp->size = size; 709 bcp->align = align; 710 bcp->alloc = 0; 711 bcp->destroy = 0; 712 713 mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL); 714 715 (void) sprintf(buffer, "%s_buffer_cache", name); 716 bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL, 717 NULL, NULL, NULL, 0); 718 (void) sprintf(buffer, "%s_dblk_cache", name); 719 bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t), 720 DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor, 721 NULL, (void *)bcp, NULL, 0); 722 723 return (bcp); 724 } 725 726 void 727 bcache_destroy(bcache_t *bcp) 728 { 729 ASSERT(bcp != NULL); 730 731 mutex_enter(&bcp->mutex); 732 if (bcp->alloc == 0) { 733 kmem_cache_destroy(bcp->dblk_cache); 734 kmem_cache_destroy(bcp->buffer_cache); 735 mutex_exit(&bcp->mutex); 736 mutex_destroy(&bcp->mutex); 737 kmem_free(bcp, sizeof (bcache_t)); 738 } else { 739 bcp->destroy++; 740 mutex_exit(&bcp->mutex); 741 } 742 } 743 744 /*ARGSUSED*/ 745 mblk_t * 746 bcache_allocb(bcache_t *bcp, uint_t pri) 747 { 748 dblk_t *dbp; 749 mblk_t *mp = NULL; 750 751 ASSERT(bcp != NULL); 752 753 mutex_enter(&bcp->mutex); 754 if (bcp->destroy != 0) { 755 mutex_exit(&bcp->mutex); 756 goto out; 757 } 758 759 if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) { 760 mutex_exit(&bcp->mutex); 761 goto out; 762 } 763 bcp->alloc++; 764 mutex_exit(&bcp->mutex); 765 766 ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0); 767 768 mp = dbp->db_mblk; 769 DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 770 mp->b_next = mp->b_prev = mp->b_cont = NULL; 771 mp->b_rptr = mp->b_wptr = dbp->db_base; 772 mp->b_queue = NULL; 773 MBLK_BAND_FLAG_WORD(mp) = 0; 774 STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size); 775 out: 776 FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp); 777 778 return (mp); 779 } 780 781 static void 782 dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp) 783 { 784 ASSERT(dbp->db_mblk == mp); 785 if (dbp->db_fthdr != NULL) 786 str_ftfree(dbp); 787 788 /* set credp and projid to be 'unspecified' before returning to cache */ 789 if (dbp->db_credp != NULL) { 790 crfree(dbp->db_credp); 791 dbp->db_credp = NULL; 792 } 793 dbp->db_cpid = -1; 794 dbp->db_struioflag = 0; 795 dbp->db_struioun.cksum.flags = 0; 796 797 kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base); 798 kmem_cache_free(dbp->db_cache, dbp); 799 } 800 801 static mblk_t * 802 allocb_oversize(size_t size, int kmflags) 803 { 804 mblk_t *mp; 805 void *buf; 806 807 size = P2ROUNDUP(size, DBLK_CACHE_ALIGN); 808 if ((buf = kmem_alloc(size, kmflags)) == NULL) 809 return (NULL); 810 if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0), 811 &frnop, dblk_lastfree_oversize, kmflags)) == NULL) 812 kmem_free(buf, size); 813 814 if (mp != NULL) 815 STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size); 816 817 return (mp); 818 } 819 820 mblk_t * 821 allocb_tryhard(size_t target_size) 822 { 823 size_t size; 824 mblk_t *bp; 825 826 for (size = target_size; size < target_size + 512; 827 size += DBLK_CACHE_ALIGN) 828 if ((bp = allocb(size, BPRI_HI)) != NULL) 829 return (bp); 830 allocb_tryhard_fails++; 831 return (NULL); 832 } 833 834 /* 835 * This routine is consolidation private for STREAMS internal use 836 * This routine may only be called from sync routines (i.e., not 837 * from put or service procedures). It is located here (rather 838 * than strsubr.c) so that we don't have to expose all of the 839 * allocb() implementation details in header files. 840 */ 841 mblk_t * 842 allocb_wait(size_t size, uint_t pri, uint_t flags, int *error) 843 { 844 dblk_t *dbp; 845 mblk_t *mp; 846 size_t index; 847 848 index = (size -1) >> DBLK_SIZE_SHIFT; 849 850 if (flags & STR_NOSIG) { 851 if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 852 if (size != 0) { 853 mp = allocb_oversize(size, KM_SLEEP); 854 FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", 855 (uintptr_t)mp); 856 return (mp); 857 } 858 index = 0; 859 } 860 861 dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP); 862 mp = dbp->db_mblk; 863 DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 864 mp->b_next = mp->b_prev = mp->b_cont = NULL; 865 mp->b_rptr = mp->b_wptr = dbp->db_base; 866 mp->b_queue = NULL; 867 MBLK_BAND_FLAG_WORD(mp) = 0; 868 STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size); 869 870 FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp); 871 872 } else { 873 while ((mp = allocb(size, pri)) == NULL) { 874 if ((*error = strwaitbuf(size, BPRI_HI)) != 0) 875 return (NULL); 876 } 877 } 878 879 return (mp); 880 } 881 882 /* 883 * Call function 'func' with 'arg' when a class zero block can 884 * be allocated with priority 'pri'. 885 */ 886 bufcall_id_t 887 esbbcall(uint_t pri, void (*func)(void *), void *arg) 888 { 889 return (bufcall(1, pri, func, arg)); 890 } 891 892 /* 893 * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials 894 * ioc_id, rval and error of the struct ioctl to set up an ioctl call. 895 * This provides consistency for all internal allocators of ioctl. 896 */ 897 mblk_t * 898 mkiocb(uint_t cmd) 899 { 900 struct iocblk *ioc; 901 mblk_t *mp; 902 903 /* 904 * Allocate enough space for any of the ioctl related messages. 905 */ 906 if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL) 907 return (NULL); 908 909 bzero(mp->b_rptr, sizeof (union ioctypes)); 910 911 /* 912 * Set the mblk_t information and ptrs correctly. 913 */ 914 mp->b_wptr += sizeof (struct iocblk); 915 mp->b_datap->db_type = M_IOCTL; 916 917 /* 918 * Fill in the fields. 919 */ 920 ioc = (struct iocblk *)mp->b_rptr; 921 ioc->ioc_cmd = cmd; 922 ioc->ioc_cr = kcred; 923 ioc->ioc_id = getiocseqno(); 924 ioc->ioc_flag = IOC_NATIVE; 925 return (mp); 926 } 927 928 /* 929 * test if block of given size can be allocated with a request of 930 * the given priority. 931 * 'pri' is no longer used, but is retained for compatibility. 932 */ 933 /* ARGSUSED */ 934 int 935 testb(size_t size, uint_t pri) 936 { 937 return ((size + sizeof (dblk_t)) <= kmem_avail()); 938 } 939 940 /* _KERNEL: bufcall, unbufcall */ 941 942 /* 943 * Duplicate a message block by block (uses dupb), returning 944 * a pointer to the duplicate message. 945 * Returns a non-NULL value only if the entire message 946 * was dup'd. 947 */ 948 mblk_t * 949 dupmsg(mblk_t *bp) 950 { 951 mblk_t *head, *nbp; 952 953 if (!bp || !(nbp = head = dupb(bp))) 954 return (NULL); 955 956 while (bp->b_cont) { 957 if (!(nbp->b_cont = dupb(bp->b_cont))) { 958 freemsg(head); 959 return (NULL); 960 } 961 nbp = nbp->b_cont; 962 bp = bp->b_cont; 963 } 964 return (head); 965 } 966 967 #define DUPB_NOLOAN(bp) \ 968 ((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \ 969 copyb((bp)) : dupb((bp))) 970 971 mblk_t * 972 dupmsg_noloan(mblk_t *bp) 973 { 974 mblk_t *head, *nbp; 975 976 if (bp == NULL || DB_TYPE(bp) != M_DATA || 977 ((nbp = head = DUPB_NOLOAN(bp)) == NULL)) 978 return (NULL); 979 980 while (bp->b_cont) { 981 if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) { 982 freemsg(head); 983 return (NULL); 984 } 985 nbp = nbp->b_cont; 986 bp = bp->b_cont; 987 } 988 return (head); 989 } 990 991 /* 992 * Copy data from message and data block to newly allocated message and 993 * data block. Returns new message block pointer, or NULL if error. 994 * The alignment of rptr (w.r.t. word alignment) will be the same in the copy 995 * as in the original even when db_base is not word aligned. (bug 1052877) 996 */ 997 mblk_t * 998 copyb(mblk_t *bp) 999 { 1000 mblk_t *nbp; 1001 dblk_t *dp, *ndp; 1002 uchar_t *base; 1003 size_t size; 1004 size_t unaligned; 1005 1006 ASSERT(bp->b_wptr >= bp->b_rptr); 1007 1008 dp = bp->b_datap; 1009 if (dp->db_fthdr != NULL) 1010 STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0); 1011 1012 size = dp->db_lim - dp->db_base; 1013 unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t)); 1014 if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL) 1015 return (NULL); 1016 nbp->b_flag = bp->b_flag; 1017 nbp->b_band = bp->b_band; 1018 ndp = nbp->b_datap; 1019 1020 /* 1021 * Well, here is a potential issue. If we are trying to 1022 * trace a flow, and we copy the message, we might lose 1023 * information about where this message might have been. 1024 * So we should inherit the FT data. On the other hand, 1025 * a user might be interested only in alloc to free data. 1026 * So I guess the real answer is to provide a tunable. 1027 */ 1028 STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 1029 1030 base = ndp->db_base + unaligned; 1031 bcopy(dp->db_base, ndp->db_base + unaligned, size); 1032 1033 nbp->b_rptr = base + (bp->b_rptr - dp->db_base); 1034 nbp->b_wptr = nbp->b_rptr + MBLKL(bp); 1035 1036 return (nbp); 1037 } 1038 1039 /* 1040 * Copy data from message to newly allocated message using new 1041 * data blocks. Returns a pointer to the new message, or NULL if error. 1042 */ 1043 mblk_t * 1044 copymsg(mblk_t *bp) 1045 { 1046 mblk_t *head, *nbp; 1047 1048 if (!bp || !(nbp = head = copyb(bp))) 1049 return (NULL); 1050 1051 while (bp->b_cont) { 1052 if (!(nbp->b_cont = copyb(bp->b_cont))) { 1053 freemsg(head); 1054 return (NULL); 1055 } 1056 nbp = nbp->b_cont; 1057 bp = bp->b_cont; 1058 } 1059 return (head); 1060 } 1061 1062 /* 1063 * link a message block to tail of message 1064 */ 1065 void 1066 linkb(mblk_t *mp, mblk_t *bp) 1067 { 1068 ASSERT(mp && bp); 1069 1070 for (; mp->b_cont; mp = mp->b_cont) 1071 ; 1072 mp->b_cont = bp; 1073 } 1074 1075 /* 1076 * unlink a message block from head of message 1077 * return pointer to new message. 1078 * NULL if message becomes empty. 1079 */ 1080 mblk_t * 1081 unlinkb(mblk_t *bp) 1082 { 1083 mblk_t *bp1; 1084 1085 bp1 = bp->b_cont; 1086 bp->b_cont = NULL; 1087 return (bp1); 1088 } 1089 1090 /* 1091 * remove a message block "bp" from message "mp" 1092 * 1093 * Return pointer to new message or NULL if no message remains. 1094 * Return -1 if bp is not found in message. 1095 */ 1096 mblk_t * 1097 rmvb(mblk_t *mp, mblk_t *bp) 1098 { 1099 mblk_t *tmp; 1100 mblk_t *lastp = NULL; 1101 1102 ASSERT(mp && bp); 1103 for (tmp = mp; tmp; tmp = tmp->b_cont) { 1104 if (tmp == bp) { 1105 if (lastp) 1106 lastp->b_cont = tmp->b_cont; 1107 else 1108 mp = tmp->b_cont; 1109 tmp->b_cont = NULL; 1110 return (mp); 1111 } 1112 lastp = tmp; 1113 } 1114 return ((mblk_t *)-1); 1115 } 1116 1117 /* 1118 * Concatenate and align first len bytes of common 1119 * message type. Len == -1, means concat everything. 1120 * Returns 1 on success, 0 on failure 1121 * After the pullup, mp points to the pulled up data. 1122 */ 1123 int 1124 pullupmsg(mblk_t *mp, ssize_t len) 1125 { 1126 mblk_t *bp, *b_cont; 1127 dblk_t *dbp; 1128 ssize_t n; 1129 1130 ASSERT(mp->b_datap->db_ref > 0); 1131 ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 1132 1133 /* 1134 * We won't handle Multidata message, since it contains 1135 * metadata which this function has no knowledge of; we 1136 * assert on DEBUG, and return failure otherwise. 1137 */ 1138 ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1139 if (mp->b_datap->db_type == M_MULTIDATA) 1140 return (0); 1141 1142 if (len == -1) { 1143 if (mp->b_cont == NULL && str_aligned(mp->b_rptr)) 1144 return (1); 1145 len = xmsgsize(mp); 1146 } else { 1147 ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr; 1148 ASSERT(first_mblk_len >= 0); 1149 /* 1150 * If the length is less than that of the first mblk, 1151 * we want to pull up the message into an aligned mblk. 1152 * Though not part of the spec, some callers assume it. 1153 */ 1154 if (len <= first_mblk_len) { 1155 if (str_aligned(mp->b_rptr)) 1156 return (1); 1157 len = first_mblk_len; 1158 } else if (xmsgsize(mp) < len) 1159 return (0); 1160 } 1161 1162 if ((bp = allocb_tmpl(len, mp)) == NULL) 1163 return (0); 1164 1165 dbp = bp->b_datap; 1166 *bp = *mp; /* swap mblks so bp heads the old msg... */ 1167 mp->b_datap = dbp; /* ... and mp heads the new message */ 1168 mp->b_datap->db_mblk = mp; 1169 bp->b_datap->db_mblk = bp; 1170 mp->b_rptr = mp->b_wptr = dbp->db_base; 1171 1172 do { 1173 ASSERT(bp->b_datap->db_ref > 0); 1174 ASSERT(bp->b_wptr >= bp->b_rptr); 1175 n = MIN(bp->b_wptr - bp->b_rptr, len); 1176 ASSERT(n >= 0); /* allow zero-length mblk_t's */ 1177 if (n > 0) 1178 bcopy(bp->b_rptr, mp->b_wptr, (size_t)n); 1179 mp->b_wptr += n; 1180 bp->b_rptr += n; 1181 len -= n; 1182 if (bp->b_rptr != bp->b_wptr) 1183 break; 1184 b_cont = bp->b_cont; 1185 freeb(bp); 1186 bp = b_cont; 1187 } while (len && bp); 1188 1189 mp->b_cont = bp; /* tack on whatever wasn't pulled up */ 1190 1191 return (1); 1192 } 1193 1194 /* 1195 * Concatenate and align at least the first len bytes of common message 1196 * type. Len == -1 means concatenate everything. The original message is 1197 * unaltered. Returns a pointer to a new message on success, otherwise 1198 * returns NULL. 1199 */ 1200 mblk_t * 1201 msgpullup(mblk_t *mp, ssize_t len) 1202 { 1203 mblk_t *newmp; 1204 ssize_t totlen; 1205 ssize_t n; 1206 1207 /* 1208 * We won't handle Multidata message, since it contains 1209 * metadata which this function has no knowledge of; we 1210 * assert on DEBUG, and return failure otherwise. 1211 */ 1212 ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1213 if (mp->b_datap->db_type == M_MULTIDATA) 1214 return (NULL); 1215 1216 totlen = xmsgsize(mp); 1217 1218 if ((len > 0) && (len > totlen)) 1219 return (NULL); 1220 1221 /* 1222 * Copy all of the first msg type into one new mblk, then dupmsg 1223 * and link the rest onto this. 1224 */ 1225 1226 len = totlen; 1227 1228 if ((newmp = allocb_tmpl(len, mp)) == NULL) 1229 return (NULL); 1230 1231 newmp->b_flag = mp->b_flag; 1232 newmp->b_band = mp->b_band; 1233 1234 while (len > 0) { 1235 n = mp->b_wptr - mp->b_rptr; 1236 ASSERT(n >= 0); /* allow zero-length mblk_t's */ 1237 if (n > 0) 1238 bcopy(mp->b_rptr, newmp->b_wptr, n); 1239 newmp->b_wptr += n; 1240 len -= n; 1241 mp = mp->b_cont; 1242 } 1243 1244 if (mp != NULL) { 1245 newmp->b_cont = dupmsg(mp); 1246 if (newmp->b_cont == NULL) { 1247 freemsg(newmp); 1248 return (NULL); 1249 } 1250 } 1251 1252 return (newmp); 1253 } 1254 1255 /* 1256 * Trim bytes from message 1257 * len > 0, trim from head 1258 * len < 0, trim from tail 1259 * Returns 1 on success, 0 on failure. 1260 */ 1261 int 1262 adjmsg(mblk_t *mp, ssize_t len) 1263 { 1264 mblk_t *bp; 1265 mblk_t *save_bp = NULL; 1266 mblk_t *prev_bp; 1267 mblk_t *bcont; 1268 unsigned char type; 1269 ssize_t n; 1270 int fromhead; 1271 int first; 1272 1273 ASSERT(mp != NULL); 1274 /* 1275 * We won't handle Multidata message, since it contains 1276 * metadata which this function has no knowledge of; we 1277 * assert on DEBUG, and return failure otherwise. 1278 */ 1279 ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1280 if (mp->b_datap->db_type == M_MULTIDATA) 1281 return (0); 1282 1283 if (len < 0) { 1284 fromhead = 0; 1285 len = -len; 1286 } else { 1287 fromhead = 1; 1288 } 1289 1290 if (xmsgsize(mp) < len) 1291 return (0); 1292 1293 if (fromhead) { 1294 first = 1; 1295 while (len) { 1296 ASSERT(mp->b_wptr >= mp->b_rptr); 1297 n = MIN(mp->b_wptr - mp->b_rptr, len); 1298 mp->b_rptr += n; 1299 len -= n; 1300 1301 /* 1302 * If this is not the first zero length 1303 * message remove it 1304 */ 1305 if (!first && (mp->b_wptr == mp->b_rptr)) { 1306 bcont = mp->b_cont; 1307 freeb(mp); 1308 mp = save_bp->b_cont = bcont; 1309 } else { 1310 save_bp = mp; 1311 mp = mp->b_cont; 1312 } 1313 first = 0; 1314 } 1315 } else { 1316 type = mp->b_datap->db_type; 1317 while (len) { 1318 bp = mp; 1319 save_bp = NULL; 1320 1321 /* 1322 * Find the last message of same type 1323 */ 1324 while (bp && bp->b_datap->db_type == type) { 1325 ASSERT(bp->b_wptr >= bp->b_rptr); 1326 prev_bp = save_bp; 1327 save_bp = bp; 1328 bp = bp->b_cont; 1329 } 1330 if (save_bp == NULL) 1331 break; 1332 n = MIN(save_bp->b_wptr - save_bp->b_rptr, len); 1333 save_bp->b_wptr -= n; 1334 len -= n; 1335 1336 /* 1337 * If this is not the first message 1338 * and we have taken away everything 1339 * from this message, remove it 1340 */ 1341 1342 if ((save_bp != mp) && 1343 (save_bp->b_wptr == save_bp->b_rptr)) { 1344 bcont = save_bp->b_cont; 1345 freeb(save_bp); 1346 prev_bp->b_cont = bcont; 1347 } 1348 } 1349 } 1350 return (1); 1351 } 1352 1353 /* 1354 * get number of data bytes in message 1355 */ 1356 size_t 1357 msgdsize(mblk_t *bp) 1358 { 1359 size_t count = 0; 1360 1361 for (; bp; bp = bp->b_cont) 1362 if (bp->b_datap->db_type == M_DATA) { 1363 ASSERT(bp->b_wptr >= bp->b_rptr); 1364 count += bp->b_wptr - bp->b_rptr; 1365 } 1366 return (count); 1367 } 1368 1369 /* getq() etc to EOF removed */ 1370