1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 /* All Rights Reserved */ 23 24 /* 25 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 * 28 * Copyright 2017 Nexenta Systems, Inc. All rights reserved. 29 */ 30 31 #include <sys/types.h> 32 #include <sys/param.h> 33 #include <sys/thread.h> 34 #include <sys/sysmacros.h> 35 #include <sys/stropts.h> 36 #include <sys/stream.h> 37 #include <sys/strsubr.h> 38 #include <sys/strsun.h> 39 #include <sys/conf.h> 40 #include <sys/debug.h> 41 #include <sys/cmn_err.h> 42 #include <sys/kmem.h> 43 #include <sys/atomic.h> 44 #include <sys/errno.h> 45 #include <sys/vtrace.h> 46 #include <sys/ftrace.h> 47 #include <sys/ontrap.h> 48 #include <sys/sdt.h> 49 #include <sys/strft.h> 50 51 /* 52 * This file contains selected functions from io/stream.c 53 * needed by this library, mostly unmodified. 54 */ 55 56 /* 57 * STREAMS message allocator: principles of operation 58 * (See usr/src/uts/common/io/stream.c) 59 */ 60 #define DBLK_MAX_CACHE 73728 61 #define DBLK_CACHE_ALIGN 64 62 #define DBLK_MIN_SIZE 8 63 #define DBLK_SIZE_SHIFT 3 64 65 #ifdef _BIG_ENDIAN 66 #define DBLK_RTFU_SHIFT(field) \ 67 (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field)) 68 #else 69 #define DBLK_RTFU_SHIFT(field) \ 70 (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref)) 71 #endif 72 73 #define DBLK_RTFU(ref, type, flags, uioflag) \ 74 (((ref) << DBLK_RTFU_SHIFT(db_ref)) | \ 75 ((type) << DBLK_RTFU_SHIFT(db_type)) | \ 76 (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \ 77 ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag))) 78 #define DBLK_RTFU_REF_MASK (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref)) 79 #define DBLK_RTFU_WORD(dbp) (*((uint32_t *)&(dbp)->db_ref)) 80 #define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band)) 81 82 static size_t dblk_sizes[] = { 83 #ifdef _LP64 84 16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856, 85 8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624, 86 40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392, 87 #else 88 64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904, 89 8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672, 90 40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440, 91 #endif 92 DBLK_MAX_CACHE, 0 93 }; 94 95 static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE]; 96 static struct kmem_cache *mblk_cache; 97 static struct kmem_cache *dblk_esb_cache; 98 99 static void dblk_lastfree(mblk_t *mp, dblk_t *dbp); 100 static mblk_t *allocb_oversize(size_t size, int flags); 101 static int allocb_tryhard_fails; 102 static void frnop_func(void *arg); 103 frtn_t frnop = { frnop_func }; 104 static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp); 105 106 /* 107 * Patchable mblk/dblk kmem_cache flags. 108 */ 109 int dblk_kmem_flags = 0; 110 int mblk_kmem_flags = 0; 111 112 static int 113 dblk_constructor(void *buf, void *cdrarg, int kmflags) 114 { 115 dblk_t *dbp = buf; 116 ssize_t msg_size = (ssize_t)cdrarg; 117 size_t index; 118 119 ASSERT(msg_size != 0); 120 121 index = (msg_size - 1) >> DBLK_SIZE_SHIFT; 122 123 ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)); 124 125 if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 126 return (-1); 127 if ((msg_size & PAGEOFFSET) == 0) { 128 dbp->db_base = kmem_alloc(msg_size, kmflags); 129 if (dbp->db_base == NULL) { 130 kmem_cache_free(mblk_cache, dbp->db_mblk); 131 return (-1); 132 } 133 } else { 134 dbp->db_base = (unsigned char *)&dbp[1]; 135 } 136 137 dbp->db_mblk->b_datap = dbp; 138 dbp->db_cache = dblk_cache[index]; 139 dbp->db_lim = dbp->db_base + msg_size; 140 dbp->db_free = dbp->db_lastfree = dblk_lastfree; 141 dbp->db_frtnp = NULL; 142 dbp->db_fthdr = NULL; 143 dbp->db_credp = NULL; 144 dbp->db_cpid = -1; 145 dbp->db_struioflag = 0; 146 dbp->db_struioun.cksum.flags = 0; 147 return (0); 148 } 149 150 /*ARGSUSED*/ 151 static int 152 dblk_esb_constructor(void *buf, void *cdrarg, int kmflags) 153 { 154 dblk_t *dbp = buf; 155 156 if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 157 return (-1); 158 dbp->db_mblk->b_datap = dbp; 159 dbp->db_cache = dblk_esb_cache; 160 dbp->db_fthdr = NULL; 161 dbp->db_credp = NULL; 162 dbp->db_cpid = -1; 163 dbp->db_struioflag = 0; 164 dbp->db_struioun.cksum.flags = 0; 165 return (0); 166 } 167 168 static int 169 bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags) 170 { 171 dblk_t *dbp = buf; 172 bcache_t *bcp = cdrarg; 173 174 if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 175 return (-1); 176 177 dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags); 178 if (dbp->db_base == NULL) { 179 kmem_cache_free(mblk_cache, dbp->db_mblk); 180 return (-1); 181 } 182 183 dbp->db_mblk->b_datap = dbp; 184 dbp->db_cache = (void *)bcp; 185 dbp->db_lim = dbp->db_base + bcp->size; 186 dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree; 187 dbp->db_frtnp = NULL; 188 dbp->db_fthdr = NULL; 189 dbp->db_credp = NULL; 190 dbp->db_cpid = -1; 191 dbp->db_struioflag = 0; 192 dbp->db_struioun.cksum.flags = 0; 193 return (0); 194 } 195 196 /*ARGSUSED*/ 197 static void 198 dblk_destructor(void *buf, void *cdrarg) 199 { 200 dblk_t *dbp = buf; 201 ssize_t msg_size = (ssize_t)cdrarg; 202 203 ASSERT(dbp->db_mblk->b_datap == dbp); 204 ASSERT(msg_size != 0); 205 ASSERT(dbp->db_struioflag == 0); 206 ASSERT(dbp->db_struioun.cksum.flags == 0); 207 208 if ((msg_size & PAGEOFFSET) == 0) { 209 kmem_free(dbp->db_base, msg_size); 210 } 211 212 kmem_cache_free(mblk_cache, dbp->db_mblk); 213 } 214 215 static void 216 bcache_dblk_destructor(void *buf, void *cdrarg) 217 { 218 dblk_t *dbp = buf; 219 bcache_t *bcp = cdrarg; 220 221 kmem_cache_free(bcp->buffer_cache, dbp->db_base); 222 223 ASSERT(dbp->db_mblk->b_datap == dbp); 224 ASSERT(dbp->db_struioflag == 0); 225 ASSERT(dbp->db_struioun.cksum.flags == 0); 226 227 kmem_cache_free(mblk_cache, dbp->db_mblk); 228 } 229 230 /* Needed in the ASSERT below */ 231 #ifdef DEBUG 232 #ifdef _KERNEL 233 #define KMEM_SLAB_T_SZ sizeof (kmem_slab_t) 234 #else /* _KERNEL */ 235 #define KMEM_SLAB_T_SZ 64 /* fakekernel */ 236 #endif /* _KERNEL */ 237 #endif /* DEBUG */ 238 239 void 240 streams_msg_init(void) 241 { 242 char name[40]; 243 size_t size; 244 size_t lastsize = DBLK_MIN_SIZE; 245 size_t *sizep; 246 struct kmem_cache *cp; 247 size_t tot_size; 248 int offset; 249 250 mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32, 251 NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags); 252 253 for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) { 254 255 if ((offset = (size & PAGEOFFSET)) != 0) { 256 /* 257 * We are in the middle of a page, dblk should 258 * be allocated on the same page 259 */ 260 tot_size = size + sizeof (dblk_t); 261 ASSERT((offset + sizeof (dblk_t) + KMEM_SLAB_T_SZ) 262 < PAGESIZE); 263 ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0); 264 265 } else { 266 267 /* 268 * buf size is multiple of page size, dblk and 269 * buffer are allocated separately. 270 */ 271 272 ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0); 273 tot_size = sizeof (dblk_t); 274 } 275 276 (void) sprintf(name, "streams_dblk_%ld", (long)size); 277 cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN, 278 dblk_constructor, dblk_destructor, NULL, (void *)(size), 279 NULL, dblk_kmem_flags); 280 281 while (lastsize <= size) { 282 dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp; 283 lastsize += DBLK_MIN_SIZE; 284 } 285 } 286 287 dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t), 288 DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL, 289 (void *)sizeof (dblk_t), NULL, dblk_kmem_flags); 290 291 /* fthdr_cache, ftblk_cache, mmd_init... */ 292 } 293 294 /*ARGSUSED*/ 295 mblk_t * 296 allocb(size_t size, uint_t pri) 297 { 298 dblk_t *dbp; 299 mblk_t *mp; 300 size_t index; 301 302 index = (size - 1) >> DBLK_SIZE_SHIFT; 303 304 if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 305 if (size != 0) { 306 mp = allocb_oversize(size, KM_NOSLEEP); 307 goto out; 308 } 309 index = 0; 310 } 311 312 if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) { 313 mp = NULL; 314 goto out; 315 } 316 317 mp = dbp->db_mblk; 318 DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 319 mp->b_next = mp->b_prev = mp->b_cont = NULL; 320 mp->b_rptr = mp->b_wptr = dbp->db_base; 321 mp->b_queue = NULL; 322 MBLK_BAND_FLAG_WORD(mp) = 0; 323 STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size); 324 out: 325 FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp); 326 327 return (mp); 328 } 329 330 /* 331 * Allocate an mblk taking db_credp and db_cpid from the template. 332 * Allow the cred to be NULL. 333 */ 334 mblk_t * 335 allocb_tmpl(size_t size, const mblk_t *tmpl) 336 { 337 mblk_t *mp = allocb(size, 0); 338 339 if (mp != NULL) { 340 dblk_t *src = tmpl->b_datap; 341 dblk_t *dst = mp->b_datap; 342 cred_t *cr; 343 pid_t cpid; 344 345 cr = msg_getcred(tmpl, &cpid); 346 if (cr != NULL) 347 crhold(dst->db_credp = cr); 348 dst->db_cpid = cpid; 349 dst->db_type = src->db_type; 350 } 351 return (mp); 352 } 353 354 mblk_t * 355 allocb_cred(size_t size, cred_t *cr, pid_t cpid) 356 { 357 mblk_t *mp = allocb(size, 0); 358 359 ASSERT(cr != NULL); 360 if (mp != NULL) { 361 dblk_t *dbp = mp->b_datap; 362 363 crhold(dbp->db_credp = cr); 364 dbp->db_cpid = cpid; 365 } 366 return (mp); 367 } 368 369 mblk_t * 370 allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid) 371 { 372 mblk_t *mp = allocb_wait(size, 0, flags, error); 373 374 ASSERT(cr != NULL); 375 if (mp != NULL) { 376 dblk_t *dbp = mp->b_datap; 377 378 crhold(dbp->db_credp = cr); 379 dbp->db_cpid = cpid; 380 } 381 382 return (mp); 383 } 384 385 /* 386 * Extract the db_cred (and optionally db_cpid) from a message. 387 * We find the first mblk which has a non-NULL db_cred and use that. 388 * If none found we return NULL. 389 * Does NOT get a hold on the cred. 390 */ 391 cred_t * 392 msg_getcred(const mblk_t *mp, pid_t *cpidp) 393 { 394 cred_t *cr = NULL; 395 396 while (mp != NULL) { 397 dblk_t *dbp = mp->b_datap; 398 399 cr = dbp->db_credp; 400 if (cr == NULL) { 401 mp = mp->b_cont; 402 continue; 403 } 404 if (cpidp != NULL) 405 *cpidp = dbp->db_cpid; 406 407 /* DEBUG check for only one db_credp */ 408 return (cr); 409 } 410 if (cpidp != NULL) 411 *cpidp = NOPID; 412 return (NULL); 413 } 414 415 /* 416 * Variant of msg_getcred which, when a cred is found 417 * 1. Returns with a hold on the cred 418 * 2. Clears the first cred in the mblk. 419 * This is more efficient to use than a msg_getcred() + crhold() when 420 * the message is freed after the cred has been extracted. 421 * 422 * The caller is responsible for ensuring that there is no other reference 423 * on the message since db_credp can not be cleared when there are other 424 * references. 425 */ 426 cred_t * 427 msg_extractcred(mblk_t *mp, pid_t *cpidp) 428 { 429 cred_t *cr = NULL; 430 431 while (mp != NULL) { 432 dblk_t *dbp = mp->b_datap; 433 434 cr = dbp->db_credp; 435 if (cr == NULL) { 436 mp = mp->b_cont; 437 continue; 438 } 439 ASSERT(dbp->db_ref == 1); 440 dbp->db_credp = NULL; 441 if (cpidp != NULL) 442 *cpidp = dbp->db_cpid; 443 444 /* DEBUG check for only one db_credp */ 445 return (cr); 446 } 447 return (NULL); 448 } 449 450 /* _KERNEL msg_getlabel() */ 451 452 void 453 freeb(mblk_t *mp) 454 { 455 dblk_t *dbp = mp->b_datap; 456 457 ASSERT(dbp->db_ref > 0); 458 ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 459 FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp); 460 461 STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 462 463 dbp->db_free(mp, dbp); 464 } 465 466 void 467 freemsg(mblk_t *mp) 468 { 469 FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp); 470 while (mp) { 471 dblk_t *dbp = mp->b_datap; 472 mblk_t *mp_cont = mp->b_cont; 473 474 ASSERT(dbp->db_ref > 0); 475 ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 476 477 STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 478 479 dbp->db_free(mp, dbp); 480 mp = mp_cont; 481 } 482 } 483 484 /* 485 * Reallocate a block for another use. Try hard to use the old block. 486 * If the old data is wanted (copy), leave b_wptr at the end of the data, 487 * otherwise return b_wptr = b_rptr. 488 * 489 * This routine is private and unstable. 490 */ 491 mblk_t * 492 reallocb(mblk_t *mp, size_t size, uint_t copy) 493 { 494 mblk_t *mp1; 495 unsigned char *old_rptr; 496 ptrdiff_t cur_size; 497 498 if (mp == NULL) 499 return (allocb(size, BPRI_HI)); 500 501 cur_size = mp->b_wptr - mp->b_rptr; 502 old_rptr = mp->b_rptr; 503 504 ASSERT(mp->b_datap->db_ref != 0); 505 506 if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) { 507 /* 508 * If the data is wanted and it will fit where it is, no 509 * work is required. 510 */ 511 if (copy && mp->b_datap->db_lim - mp->b_rptr >= size) 512 return (mp); 513 514 mp->b_wptr = mp->b_rptr = mp->b_datap->db_base; 515 mp1 = mp; 516 } else if ((mp1 = allocb_tmpl(size, mp)) != NULL) { 517 /* XXX other mp state could be copied too, db_flags ... ? */ 518 mp1->b_cont = mp->b_cont; 519 } else { 520 return (NULL); 521 } 522 523 if (copy) { 524 bcopy(old_rptr, mp1->b_rptr, cur_size); 525 mp1->b_wptr = mp1->b_rptr + cur_size; 526 } 527 528 if (mp != mp1) 529 freeb(mp); 530 531 return (mp1); 532 } 533 534 static void 535 dblk_lastfree(mblk_t *mp, dblk_t *dbp) 536 { 537 ASSERT(dbp->db_mblk == mp); 538 if (dbp->db_fthdr != NULL) 539 str_ftfree(dbp); 540 541 /* set credp and projid to be 'unspecified' before returning to cache */ 542 if (dbp->db_credp != NULL) { 543 crfree(dbp->db_credp); 544 dbp->db_credp = NULL; 545 } 546 dbp->db_cpid = -1; 547 548 /* Reset the struioflag and the checksum flag fields */ 549 dbp->db_struioflag = 0; 550 dbp->db_struioun.cksum.flags = 0; 551 552 /* and the COOKED and/or UIOA flag(s) */ 553 dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA); 554 555 kmem_cache_free(dbp->db_cache, dbp); 556 } 557 558 static void 559 dblk_decref(mblk_t *mp, dblk_t *dbp) 560 { 561 if (dbp->db_ref != 1) { 562 uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp), 563 -(1 << DBLK_RTFU_SHIFT(db_ref))); 564 /* 565 * atomic_add_32_nv() just decremented db_ref, so we no longer 566 * have a reference to the dblk, which means another thread 567 * could free it. Therefore we cannot examine the dblk to 568 * determine whether ours was the last reference. Instead, 569 * we extract the new and minimum reference counts from rtfu. 570 * Note that all we're really saying is "if (ref != refmin)". 571 */ 572 if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) != 573 ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) { 574 kmem_cache_free(mblk_cache, mp); 575 return; 576 } 577 } 578 dbp->db_mblk = mp; 579 dbp->db_free = dbp->db_lastfree; 580 dbp->db_lastfree(mp, dbp); 581 } 582 583 mblk_t * 584 dupb(mblk_t *mp) 585 { 586 dblk_t *dbp = mp->b_datap; 587 mblk_t *new_mp; 588 uint32_t oldrtfu, newrtfu; 589 590 if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL) 591 goto out; 592 593 new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL; 594 new_mp->b_rptr = mp->b_rptr; 595 new_mp->b_wptr = mp->b_wptr; 596 new_mp->b_datap = dbp; 597 new_mp->b_queue = NULL; 598 MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp); 599 600 STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref); 601 602 dbp->db_free = dblk_decref; 603 do { 604 ASSERT(dbp->db_ref > 0); 605 oldrtfu = DBLK_RTFU_WORD(dbp); 606 newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref)); 607 /* 608 * If db_ref is maxed out we can't dup this message anymore. 609 */ 610 if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) { 611 kmem_cache_free(mblk_cache, new_mp); 612 new_mp = NULL; 613 goto out; 614 } 615 } while (atomic_cas_32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != 616 oldrtfu); 617 618 out: 619 FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp); 620 return (new_mp); 621 } 622 623 /*ARGSUSED*/ 624 static void 625 frnop_func(void *arg) 626 { 627 } 628 629 /* 630 * Generic esballoc used to implement the four flavors: [d]esballoc[a]. 631 * and allocb_oversize 632 */ 633 static mblk_t * 634 gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp, 635 void (*lastfree)(mblk_t *, dblk_t *), int kmflags) 636 { 637 dblk_t *dbp; 638 mblk_t *mp; 639 640 ASSERT(base != NULL && frp != NULL); 641 642 if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) { 643 mp = NULL; 644 goto out; 645 } 646 647 mp = dbp->db_mblk; 648 dbp->db_base = base; 649 dbp->db_lim = base + size; 650 dbp->db_free = dbp->db_lastfree = lastfree; 651 dbp->db_frtnp = frp; 652 DBLK_RTFU_WORD(dbp) = db_rtfu; 653 mp->b_next = mp->b_prev = mp->b_cont = NULL; 654 mp->b_rptr = mp->b_wptr = base; 655 mp->b_queue = NULL; 656 MBLK_BAND_FLAG_WORD(mp) = 0; 657 658 out: 659 FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp); 660 return (mp); 661 } 662 663 static void 664 bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp) 665 { 666 bcache_t *bcp = dbp->db_cache; 667 668 ASSERT(dbp->db_mblk == mp); 669 if (dbp->db_fthdr != NULL) 670 str_ftfree(dbp); 671 672 /* set credp and projid to be 'unspecified' before returning to cache */ 673 if (dbp->db_credp != NULL) { 674 crfree(dbp->db_credp); 675 dbp->db_credp = NULL; 676 } 677 dbp->db_cpid = -1; 678 dbp->db_struioflag = 0; 679 dbp->db_struioun.cksum.flags = 0; 680 681 mutex_enter(&bcp->mutex); 682 kmem_cache_free(bcp->dblk_cache, dbp); 683 bcp->alloc--; 684 685 if (bcp->alloc == 0 && bcp->destroy != 0) { 686 kmem_cache_destroy(bcp->dblk_cache); 687 kmem_cache_destroy(bcp->buffer_cache); 688 mutex_exit(&bcp->mutex); 689 mutex_destroy(&bcp->mutex); 690 kmem_free(bcp, sizeof (bcache_t)); 691 } else { 692 mutex_exit(&bcp->mutex); 693 } 694 } 695 696 bcache_t * 697 bcache_create(char *name, size_t size, uint_t align) 698 { 699 bcache_t *bcp; 700 char buffer[255]; 701 702 ASSERT((align & (align - 1)) == 0); 703 704 if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL) 705 return (NULL); 706 707 bcp->size = size; 708 bcp->align = align; 709 bcp->alloc = 0; 710 bcp->destroy = 0; 711 712 mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL); 713 714 (void) sprintf(buffer, "%s_buffer_cache", name); 715 bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL, 716 NULL, NULL, NULL, 0); 717 (void) sprintf(buffer, "%s_dblk_cache", name); 718 bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t), 719 DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor, 720 NULL, (void *)bcp, NULL, 0); 721 722 return (bcp); 723 } 724 725 void 726 bcache_destroy(bcache_t *bcp) 727 { 728 ASSERT(bcp != NULL); 729 730 mutex_enter(&bcp->mutex); 731 if (bcp->alloc == 0) { 732 kmem_cache_destroy(bcp->dblk_cache); 733 kmem_cache_destroy(bcp->buffer_cache); 734 mutex_exit(&bcp->mutex); 735 mutex_destroy(&bcp->mutex); 736 kmem_free(bcp, sizeof (bcache_t)); 737 } else { 738 bcp->destroy++; 739 mutex_exit(&bcp->mutex); 740 } 741 } 742 743 /*ARGSUSED*/ 744 mblk_t * 745 bcache_allocb(bcache_t *bcp, uint_t pri) 746 { 747 dblk_t *dbp; 748 mblk_t *mp = NULL; 749 750 ASSERT(bcp != NULL); 751 752 mutex_enter(&bcp->mutex); 753 if (bcp->destroy != 0) { 754 mutex_exit(&bcp->mutex); 755 goto out; 756 } 757 758 if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) { 759 mutex_exit(&bcp->mutex); 760 goto out; 761 } 762 bcp->alloc++; 763 mutex_exit(&bcp->mutex); 764 765 ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0); 766 767 mp = dbp->db_mblk; 768 DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 769 mp->b_next = mp->b_prev = mp->b_cont = NULL; 770 mp->b_rptr = mp->b_wptr = dbp->db_base; 771 mp->b_queue = NULL; 772 MBLK_BAND_FLAG_WORD(mp) = 0; 773 STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size); 774 out: 775 FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp); 776 777 return (mp); 778 } 779 780 static void 781 dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp) 782 { 783 ASSERT(dbp->db_mblk == mp); 784 if (dbp->db_fthdr != NULL) 785 str_ftfree(dbp); 786 787 /* set credp and projid to be 'unspecified' before returning to cache */ 788 if (dbp->db_credp != NULL) { 789 crfree(dbp->db_credp); 790 dbp->db_credp = NULL; 791 } 792 dbp->db_cpid = -1; 793 dbp->db_struioflag = 0; 794 dbp->db_struioun.cksum.flags = 0; 795 796 kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base); 797 kmem_cache_free(dbp->db_cache, dbp); 798 } 799 800 static mblk_t * 801 allocb_oversize(size_t size, int kmflags) 802 { 803 mblk_t *mp; 804 void *buf; 805 806 size = P2ROUNDUP(size, DBLK_CACHE_ALIGN); 807 if ((buf = kmem_alloc(size, kmflags)) == NULL) 808 return (NULL); 809 if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0), 810 &frnop, dblk_lastfree_oversize, kmflags)) == NULL) 811 kmem_free(buf, size); 812 813 if (mp != NULL) 814 STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size); 815 816 return (mp); 817 } 818 819 mblk_t * 820 allocb_tryhard(size_t target_size) 821 { 822 size_t size; 823 mblk_t *bp; 824 825 for (size = target_size; size < target_size + 512; 826 size += DBLK_CACHE_ALIGN) 827 if ((bp = allocb(size, BPRI_HI)) != NULL) 828 return (bp); 829 allocb_tryhard_fails++; 830 return (NULL); 831 } 832 833 /* 834 * This routine is consolidation private for STREAMS internal use 835 * This routine may only be called from sync routines (i.e., not 836 * from put or service procedures). It is located here (rather 837 * than strsubr.c) so that we don't have to expose all of the 838 * allocb() implementation details in header files. 839 */ 840 mblk_t * 841 allocb_wait(size_t size, uint_t pri, uint_t flags, int *error) 842 { 843 dblk_t *dbp; 844 mblk_t *mp; 845 size_t index; 846 847 index = (size -1) >> DBLK_SIZE_SHIFT; 848 849 if (flags & STR_NOSIG) { 850 if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 851 if (size != 0) { 852 mp = allocb_oversize(size, KM_SLEEP); 853 FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", 854 (uintptr_t)mp); 855 return (mp); 856 } 857 index = 0; 858 } 859 860 dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP); 861 mp = dbp->db_mblk; 862 DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 863 mp->b_next = mp->b_prev = mp->b_cont = NULL; 864 mp->b_rptr = mp->b_wptr = dbp->db_base; 865 mp->b_queue = NULL; 866 MBLK_BAND_FLAG_WORD(mp) = 0; 867 STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size); 868 869 FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp); 870 871 } else { 872 while ((mp = allocb(size, pri)) == NULL) { 873 if ((*error = strwaitbuf(size, BPRI_HI)) != 0) 874 return (NULL); 875 } 876 } 877 878 return (mp); 879 } 880 881 /* 882 * Call function 'func' with 'arg' when a class zero block can 883 * be allocated with priority 'pri'. 884 */ 885 bufcall_id_t 886 esbbcall(uint_t pri, void (*func)(void *), void *arg) 887 { 888 return (bufcall(1, pri, func, arg)); 889 } 890 891 /* 892 * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials 893 * ioc_id, rval and error of the struct ioctl to set up an ioctl call. 894 * This provides consistency for all internal allocators of ioctl. 895 */ 896 mblk_t * 897 mkiocb(uint_t cmd) 898 { 899 struct iocblk *ioc; 900 mblk_t *mp; 901 902 /* 903 * Allocate enough space for any of the ioctl related messages. 904 */ 905 if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL) 906 return (NULL); 907 908 bzero(mp->b_rptr, sizeof (union ioctypes)); 909 910 /* 911 * Set the mblk_t information and ptrs correctly. 912 */ 913 mp->b_wptr += sizeof (struct iocblk); 914 mp->b_datap->db_type = M_IOCTL; 915 916 /* 917 * Fill in the fields. 918 */ 919 ioc = (struct iocblk *)mp->b_rptr; 920 ioc->ioc_cmd = cmd; 921 ioc->ioc_cr = kcred; 922 ioc->ioc_id = getiocseqno(); 923 ioc->ioc_flag = IOC_NATIVE; 924 return (mp); 925 } 926 927 /* 928 * test if block of given size can be allocated with a request of 929 * the given priority. 930 * 'pri' is no longer used, but is retained for compatibility. 931 */ 932 /* ARGSUSED */ 933 int 934 testb(size_t size, uint_t pri) 935 { 936 return ((size + sizeof (dblk_t)) <= kmem_avail()); 937 } 938 939 /* _KERNEL: bufcall, unbufcall */ 940 941 /* 942 * Duplicate a message block by block (uses dupb), returning 943 * a pointer to the duplicate message. 944 * Returns a non-NULL value only if the entire message 945 * was dup'd. 946 */ 947 mblk_t * 948 dupmsg(mblk_t *bp) 949 { 950 mblk_t *head, *nbp; 951 952 if (!bp || !(nbp = head = dupb(bp))) 953 return (NULL); 954 955 while (bp->b_cont) { 956 if (!(nbp->b_cont = dupb(bp->b_cont))) { 957 freemsg(head); 958 return (NULL); 959 } 960 nbp = nbp->b_cont; 961 bp = bp->b_cont; 962 } 963 return (head); 964 } 965 966 #define DUPB_NOLOAN(bp) \ 967 ((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \ 968 copyb((bp)) : dupb((bp))) 969 970 mblk_t * 971 dupmsg_noloan(mblk_t *bp) 972 { 973 mblk_t *head, *nbp; 974 975 if (bp == NULL || DB_TYPE(bp) != M_DATA || 976 ((nbp = head = DUPB_NOLOAN(bp)) == NULL)) 977 return (NULL); 978 979 while (bp->b_cont) { 980 if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) { 981 freemsg(head); 982 return (NULL); 983 } 984 nbp = nbp->b_cont; 985 bp = bp->b_cont; 986 } 987 return (head); 988 } 989 990 /* 991 * Copy data from message and data block to newly allocated message and 992 * data block. Returns new message block pointer, or NULL if error. 993 * The alignment of rptr (w.r.t. word alignment) will be the same in the copy 994 * as in the original even when db_base is not word aligned. (bug 1052877) 995 */ 996 mblk_t * 997 copyb(mblk_t *bp) 998 { 999 mblk_t *nbp; 1000 dblk_t *dp, *ndp; 1001 uchar_t *base; 1002 size_t size; 1003 size_t unaligned; 1004 1005 ASSERT(bp->b_wptr >= bp->b_rptr); 1006 1007 dp = bp->b_datap; 1008 if (dp->db_fthdr != NULL) 1009 STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0); 1010 1011 /* 1012 * Special handling for Multidata message; this should be 1013 * removed once a copy-callback routine is made available. 1014 */ 1015 if (dp->db_type == M_MULTIDATA) { 1016 /* _KERNEL mmd_copy stuff */ 1017 return (NULL); 1018 } 1019 1020 size = dp->db_lim - dp->db_base; 1021 unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t)); 1022 if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL) 1023 return (NULL); 1024 nbp->b_flag = bp->b_flag; 1025 nbp->b_band = bp->b_band; 1026 ndp = nbp->b_datap; 1027 1028 /* 1029 * Well, here is a potential issue. If we are trying to 1030 * trace a flow, and we copy the message, we might lose 1031 * information about where this message might have been. 1032 * So we should inherit the FT data. On the other hand, 1033 * a user might be interested only in alloc to free data. 1034 * So I guess the real answer is to provide a tunable. 1035 */ 1036 STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 1037 1038 base = ndp->db_base + unaligned; 1039 bcopy(dp->db_base, ndp->db_base + unaligned, size); 1040 1041 nbp->b_rptr = base + (bp->b_rptr - dp->db_base); 1042 nbp->b_wptr = nbp->b_rptr + MBLKL(bp); 1043 1044 return (nbp); 1045 } 1046 1047 /* 1048 * Copy data from message to newly allocated message using new 1049 * data blocks. Returns a pointer to the new message, or NULL if error. 1050 */ 1051 mblk_t * 1052 copymsg(mblk_t *bp) 1053 { 1054 mblk_t *head, *nbp; 1055 1056 if (!bp || !(nbp = head = copyb(bp))) 1057 return (NULL); 1058 1059 while (bp->b_cont) { 1060 if (!(nbp->b_cont = copyb(bp->b_cont))) { 1061 freemsg(head); 1062 return (NULL); 1063 } 1064 nbp = nbp->b_cont; 1065 bp = bp->b_cont; 1066 } 1067 return (head); 1068 } 1069 1070 /* 1071 * link a message block to tail of message 1072 */ 1073 void 1074 linkb(mblk_t *mp, mblk_t *bp) 1075 { 1076 ASSERT(mp && bp); 1077 1078 for (; mp->b_cont; mp = mp->b_cont) 1079 ; 1080 mp->b_cont = bp; 1081 } 1082 1083 /* 1084 * unlink a message block from head of message 1085 * return pointer to new message. 1086 * NULL if message becomes empty. 1087 */ 1088 mblk_t * 1089 unlinkb(mblk_t *bp) 1090 { 1091 mblk_t *bp1; 1092 1093 bp1 = bp->b_cont; 1094 bp->b_cont = NULL; 1095 return (bp1); 1096 } 1097 1098 /* 1099 * remove a message block "bp" from message "mp" 1100 * 1101 * Return pointer to new message or NULL if no message remains. 1102 * Return -1 if bp is not found in message. 1103 */ 1104 mblk_t * 1105 rmvb(mblk_t *mp, mblk_t *bp) 1106 { 1107 mblk_t *tmp; 1108 mblk_t *lastp = NULL; 1109 1110 ASSERT(mp && bp); 1111 for (tmp = mp; tmp; tmp = tmp->b_cont) { 1112 if (tmp == bp) { 1113 if (lastp) 1114 lastp->b_cont = tmp->b_cont; 1115 else 1116 mp = tmp->b_cont; 1117 tmp->b_cont = NULL; 1118 return (mp); 1119 } 1120 lastp = tmp; 1121 } 1122 return ((mblk_t *)-1); 1123 } 1124 1125 /* 1126 * Concatenate and align first len bytes of common 1127 * message type. Len == -1, means concat everything. 1128 * Returns 1 on success, 0 on failure 1129 * After the pullup, mp points to the pulled up data. 1130 */ 1131 int 1132 pullupmsg(mblk_t *mp, ssize_t len) 1133 { 1134 mblk_t *bp, *b_cont; 1135 dblk_t *dbp; 1136 ssize_t n; 1137 1138 ASSERT(mp->b_datap->db_ref > 0); 1139 ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 1140 1141 /* 1142 * We won't handle Multidata message, since it contains 1143 * metadata which this function has no knowledge of; we 1144 * assert on DEBUG, and return failure otherwise. 1145 */ 1146 ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1147 if (mp->b_datap->db_type == M_MULTIDATA) 1148 return (0); 1149 1150 if (len == -1) { 1151 if (mp->b_cont == NULL && str_aligned(mp->b_rptr)) 1152 return (1); 1153 len = xmsgsize(mp); 1154 } else { 1155 ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr; 1156 ASSERT(first_mblk_len >= 0); 1157 /* 1158 * If the length is less than that of the first mblk, 1159 * we want to pull up the message into an aligned mblk. 1160 * Though not part of the spec, some callers assume it. 1161 */ 1162 if (len <= first_mblk_len) { 1163 if (str_aligned(mp->b_rptr)) 1164 return (1); 1165 len = first_mblk_len; 1166 } else if (xmsgsize(mp) < len) 1167 return (0); 1168 } 1169 1170 if ((bp = allocb_tmpl(len, mp)) == NULL) 1171 return (0); 1172 1173 dbp = bp->b_datap; 1174 *bp = *mp; /* swap mblks so bp heads the old msg... */ 1175 mp->b_datap = dbp; /* ... and mp heads the new message */ 1176 mp->b_datap->db_mblk = mp; 1177 bp->b_datap->db_mblk = bp; 1178 mp->b_rptr = mp->b_wptr = dbp->db_base; 1179 1180 do { 1181 ASSERT(bp->b_datap->db_ref > 0); 1182 ASSERT(bp->b_wptr >= bp->b_rptr); 1183 n = MIN(bp->b_wptr - bp->b_rptr, len); 1184 ASSERT(n >= 0); /* allow zero-length mblk_t's */ 1185 if (n > 0) 1186 bcopy(bp->b_rptr, mp->b_wptr, (size_t)n); 1187 mp->b_wptr += n; 1188 bp->b_rptr += n; 1189 len -= n; 1190 if (bp->b_rptr != bp->b_wptr) 1191 break; 1192 b_cont = bp->b_cont; 1193 freeb(bp); 1194 bp = b_cont; 1195 } while (len && bp); 1196 1197 mp->b_cont = bp; /* tack on whatever wasn't pulled up */ 1198 1199 return (1); 1200 } 1201 1202 /* 1203 * Concatenate and align at least the first len bytes of common message 1204 * type. Len == -1 means concatenate everything. The original message is 1205 * unaltered. Returns a pointer to a new message on success, otherwise 1206 * returns NULL. 1207 */ 1208 mblk_t * 1209 msgpullup(mblk_t *mp, ssize_t len) 1210 { 1211 mblk_t *newmp; 1212 ssize_t totlen; 1213 ssize_t n; 1214 1215 /* 1216 * We won't handle Multidata message, since it contains 1217 * metadata which this function has no knowledge of; we 1218 * assert on DEBUG, and return failure otherwise. 1219 */ 1220 ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1221 if (mp->b_datap->db_type == M_MULTIDATA) 1222 return (NULL); 1223 1224 totlen = xmsgsize(mp); 1225 1226 if ((len > 0) && (len > totlen)) 1227 return (NULL); 1228 1229 /* 1230 * Copy all of the first msg type into one new mblk, then dupmsg 1231 * and link the rest onto this. 1232 */ 1233 1234 len = totlen; 1235 1236 if ((newmp = allocb_tmpl(len, mp)) == NULL) 1237 return (NULL); 1238 1239 newmp->b_flag = mp->b_flag; 1240 newmp->b_band = mp->b_band; 1241 1242 while (len > 0) { 1243 n = mp->b_wptr - mp->b_rptr; 1244 ASSERT(n >= 0); /* allow zero-length mblk_t's */ 1245 if (n > 0) 1246 bcopy(mp->b_rptr, newmp->b_wptr, n); 1247 newmp->b_wptr += n; 1248 len -= n; 1249 mp = mp->b_cont; 1250 } 1251 1252 if (mp != NULL) { 1253 newmp->b_cont = dupmsg(mp); 1254 if (newmp->b_cont == NULL) { 1255 freemsg(newmp); 1256 return (NULL); 1257 } 1258 } 1259 1260 return (newmp); 1261 } 1262 1263 /* 1264 * Trim bytes from message 1265 * len > 0, trim from head 1266 * len < 0, trim from tail 1267 * Returns 1 on success, 0 on failure. 1268 */ 1269 int 1270 adjmsg(mblk_t *mp, ssize_t len) 1271 { 1272 mblk_t *bp; 1273 mblk_t *save_bp = NULL; 1274 mblk_t *prev_bp; 1275 mblk_t *bcont; 1276 unsigned char type; 1277 ssize_t n; 1278 int fromhead; 1279 int first; 1280 1281 ASSERT(mp != NULL); 1282 /* 1283 * We won't handle Multidata message, since it contains 1284 * metadata which this function has no knowledge of; we 1285 * assert on DEBUG, and return failure otherwise. 1286 */ 1287 ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1288 if (mp->b_datap->db_type == M_MULTIDATA) 1289 return (0); 1290 1291 if (len < 0) { 1292 fromhead = 0; 1293 len = -len; 1294 } else { 1295 fromhead = 1; 1296 } 1297 1298 if (xmsgsize(mp) < len) 1299 return (0); 1300 1301 if (fromhead) { 1302 first = 1; 1303 while (len) { 1304 ASSERT(mp->b_wptr >= mp->b_rptr); 1305 n = MIN(mp->b_wptr - mp->b_rptr, len); 1306 mp->b_rptr += n; 1307 len -= n; 1308 1309 /* 1310 * If this is not the first zero length 1311 * message remove it 1312 */ 1313 if (!first && (mp->b_wptr == mp->b_rptr)) { 1314 bcont = mp->b_cont; 1315 freeb(mp); 1316 mp = save_bp->b_cont = bcont; 1317 } else { 1318 save_bp = mp; 1319 mp = mp->b_cont; 1320 } 1321 first = 0; 1322 } 1323 } else { 1324 type = mp->b_datap->db_type; 1325 while (len) { 1326 bp = mp; 1327 save_bp = NULL; 1328 1329 /* 1330 * Find the last message of same type 1331 */ 1332 while (bp && bp->b_datap->db_type == type) { 1333 ASSERT(bp->b_wptr >= bp->b_rptr); 1334 prev_bp = save_bp; 1335 save_bp = bp; 1336 bp = bp->b_cont; 1337 } 1338 if (save_bp == NULL) 1339 break; 1340 n = MIN(save_bp->b_wptr - save_bp->b_rptr, len); 1341 save_bp->b_wptr -= n; 1342 len -= n; 1343 1344 /* 1345 * If this is not the first message 1346 * and we have taken away everything 1347 * from this message, remove it 1348 */ 1349 1350 if ((save_bp != mp) && 1351 (save_bp->b_wptr == save_bp->b_rptr)) { 1352 bcont = save_bp->b_cont; 1353 freeb(save_bp); 1354 prev_bp->b_cont = bcont; 1355 } 1356 } 1357 } 1358 return (1); 1359 } 1360 1361 /* 1362 * get number of data bytes in message 1363 */ 1364 size_t 1365 msgdsize(mblk_t *bp) 1366 { 1367 size_t count = 0; 1368 1369 for (; bp; bp = bp->b_cont) 1370 if (bp->b_datap->db_type == M_DATA) { 1371 ASSERT(bp->b_wptr >= bp->b_rptr); 1372 count += bp->b_wptr - bp->b_rptr; 1373 } 1374 return (count); 1375 } 1376 1377 /* getq() etc to EOF removed */ 1378