1 /*- 2 * Copyright (c) 2004, 2005, 3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_param.h" 32 33 #include <sys/param.h> 34 #include <sys/malloc.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/domain.h> 39 #include <sys/eventhandler.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/protosw.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 47 #include <vm/vm.h> 48 #include <vm/vm_extern.h> 49 #include <vm/vm_kern.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_map.h> 52 #include <vm/uma.h> 53 #include <vm/uma_dbg.h> 54 55 /* 56 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 57 * Zones. 58 * 59 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 60 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 61 * administrator so desires. 62 * 63 * Mbufs are allocated from a UMA Master Zone called the Mbuf 64 * Zone. 65 * 66 * Additionally, FreeBSD provides a Packet Zone, which it 67 * configures as a Secondary Zone to the Mbuf Master Zone, 68 * thus sharing backend Slab kegs with the Mbuf Master Zone. 69 * 70 * Thus common-case allocations and locking are simplified: 71 * 72 * m_clget() m_getcl() 73 * | | 74 * | .------------>[(Packet Cache)] m_get(), m_gethdr() 75 * | | [ Packet ] | 76 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 77 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 78 * | \________ | 79 * [ Cluster Keg ] \ / 80 * | [ Mbuf Keg ] 81 * [ Cluster Slabs ] | 82 * | [ Mbuf Slabs ] 83 * \____________(VM)_________________/ 84 * 85 * 86 * Whenever an object is allocated with uma_zalloc() out of 87 * one of the Zones its _ctor_ function is executed. The same 88 * for any deallocation through uma_zfree() the _dtor_ function 89 * is executed. 90 * 91 * Caches are per-CPU and are filled from the Master Zone. 92 * 93 * Whenever an object is allocated from the underlying global 94 * memory pool it gets pre-initialized with the _zinit_ functions. 95 * When the Keg's are overfull objects get decommissioned with 96 * _zfini_ functions and free'd back to the global memory pool. 97 * 98 */ 99 100 int nmbufs; /* limits number of mbufs */ 101 int nmbclusters; /* limits number of mbuf clusters */ 102 int nmbjumbop; /* limits number of page size jumbo clusters */ 103 int nmbjumbo9; /* limits number of 9k jumbo clusters */ 104 int nmbjumbo16; /* limits number of 16k jumbo clusters */ 105 106 static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 107 108 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, 109 "Maximum real memory allocatable to various mbuf types"); 110 111 /* 112 * tunable_mbinit() has to be run before any mbuf allocations are done. 113 */ 114 static void 115 tunable_mbinit(void *dummy) 116 { 117 quad_t realmem; 118 119 /* 120 * The default limit for all mbuf related memory is 1/2 of all 121 * available kernel memory (physical or kmem). 122 * At most it can be 3/4 of available kernel memory. 123 */ 124 realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 125 maxmbufmem = realmem / 2; 126 TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 127 if (maxmbufmem > realmem / 4 * 3) 128 maxmbufmem = realmem / 4 * 3; 129 130 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 131 if (nmbclusters == 0) 132 nmbclusters = maxmbufmem / MCLBYTES / 4; 133 134 TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 135 if (nmbjumbop == 0) 136 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 137 138 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 139 if (nmbjumbo9 == 0) 140 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 141 142 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 143 if (nmbjumbo16 == 0) 144 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 145 146 /* 147 * We need at least as many mbufs as we have clusters of 148 * the various types added together. 149 */ 150 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 151 if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 152 nmbufs = lmax(maxmbufmem / MSIZE / 5, 153 nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 154 } 155 SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 156 157 static int 158 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 159 { 160 int error, newnmbclusters; 161 162 newnmbclusters = nmbclusters; 163 error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 164 if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { 165 if (newnmbclusters > nmbclusters && 166 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 167 nmbclusters = newnmbclusters; 168 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 169 EVENTHANDLER_INVOKE(nmbclusters_change); 170 } else 171 error = EINVAL; 172 } 173 return (error); 174 } 175 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 176 &nmbclusters, 0, sysctl_nmbclusters, "IU", 177 "Maximum number of mbuf clusters allowed"); 178 179 static int 180 sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 181 { 182 int error, newnmbjumbop; 183 184 newnmbjumbop = nmbjumbop; 185 error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 186 if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { 187 if (newnmbjumbop > nmbjumbop && 188 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 189 nmbjumbop = newnmbjumbop; 190 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 191 } else 192 error = EINVAL; 193 } 194 return (error); 195 } 196 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 197 &nmbjumbop, 0, sysctl_nmbjumbop, "IU", 198 "Maximum number of mbuf page size jumbo clusters allowed"); 199 200 static int 201 sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 202 { 203 int error, newnmbjumbo9; 204 205 newnmbjumbo9 = nmbjumbo9; 206 error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 207 if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { 208 if (newnmbjumbo9 > nmbjumbo9 && 209 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 210 nmbjumbo9 = newnmbjumbo9; 211 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 212 } else 213 error = EINVAL; 214 } 215 return (error); 216 } 217 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 218 &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 219 "Maximum number of mbuf 9k jumbo clusters allowed"); 220 221 static int 222 sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 223 { 224 int error, newnmbjumbo16; 225 226 newnmbjumbo16 = nmbjumbo16; 227 error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 228 if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { 229 if (newnmbjumbo16 > nmbjumbo16 && 230 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 231 nmbjumbo16 = newnmbjumbo16; 232 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 233 } else 234 error = EINVAL; 235 } 236 return (error); 237 } 238 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 239 &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 240 "Maximum number of mbuf 16k jumbo clusters allowed"); 241 242 static int 243 sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 244 { 245 int error, newnmbufs; 246 247 newnmbufs = nmbufs; 248 error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 249 if (error == 0 && req->newptr && newnmbufs != nmbufs) { 250 if (newnmbufs > nmbufs) { 251 nmbufs = newnmbufs; 252 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 253 EVENTHANDLER_INVOKE(nmbufs_change); 254 } else 255 error = EINVAL; 256 } 257 return (error); 258 } 259 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 260 &nmbufs, 0, sysctl_nmbufs, "IU", 261 "Maximum number of mbufs allowed"); 262 263 /* 264 * Zones from which we allocate. 265 */ 266 uma_zone_t zone_mbuf; 267 uma_zone_t zone_clust; 268 uma_zone_t zone_pack; 269 uma_zone_t zone_jumbop; 270 uma_zone_t zone_jumbo9; 271 uma_zone_t zone_jumbo16; 272 273 /* 274 * Local prototypes. 275 */ 276 static int mb_ctor_mbuf(void *, int, void *, int); 277 static int mb_ctor_clust(void *, int, void *, int); 278 static int mb_ctor_pack(void *, int, void *, int); 279 static void mb_dtor_mbuf(void *, int, void *); 280 static void mb_dtor_pack(void *, int, void *); 281 static int mb_zinit_pack(void *, int, int); 282 static void mb_zfini_pack(void *, int); 283 static void mb_reclaim(uma_zone_t, int); 284 static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); 285 286 /* Ensure that MSIZE is a power of 2. */ 287 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 288 289 /* 290 * Initialize FreeBSD Network buffer allocation. 291 */ 292 static void 293 mbuf_init(void *dummy) 294 { 295 296 /* 297 * Configure UMA zones for Mbufs, Clusters, and Packets. 298 */ 299 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 300 mb_ctor_mbuf, mb_dtor_mbuf, 301 #ifdef INVARIANTS 302 trash_init, trash_fini, 303 #else 304 NULL, NULL, 305 #endif 306 MSIZE - 1, UMA_ZONE_MAXBUCKET); 307 if (nmbufs > 0) 308 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 309 uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 310 uma_zone_set_maxaction(zone_mbuf, mb_reclaim); 311 312 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 313 mb_ctor_clust, 314 #ifdef INVARIANTS 315 trash_dtor, trash_init, trash_fini, 316 #else 317 NULL, NULL, NULL, 318 #endif 319 UMA_ALIGN_PTR, 0); 320 if (nmbclusters > 0) 321 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 322 uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 323 uma_zone_set_maxaction(zone_clust, mb_reclaim); 324 325 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 326 mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 327 328 /* Make jumbo frame zone too. Page size, 9k and 16k. */ 329 zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 330 mb_ctor_clust, 331 #ifdef INVARIANTS 332 trash_dtor, trash_init, trash_fini, 333 #else 334 NULL, NULL, NULL, 335 #endif 336 UMA_ALIGN_PTR, 0); 337 if (nmbjumbop > 0) 338 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 339 uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 340 uma_zone_set_maxaction(zone_jumbop, mb_reclaim); 341 342 zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 343 mb_ctor_clust, 344 #ifdef INVARIANTS 345 trash_dtor, trash_init, trash_fini, 346 #else 347 NULL, NULL, NULL, 348 #endif 349 UMA_ALIGN_PTR, 0); 350 uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 351 if (nmbjumbo9 > 0) 352 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 353 uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 354 uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); 355 356 zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 357 mb_ctor_clust, 358 #ifdef INVARIANTS 359 trash_dtor, trash_init, trash_fini, 360 #else 361 NULL, NULL, NULL, 362 #endif 363 UMA_ALIGN_PTR, 0); 364 uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 365 if (nmbjumbo16 > 0) 366 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 367 uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 368 uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); 369 370 /* 371 * Hook event handler for low-memory situation, used to 372 * drain protocols and push data back to the caches (UMA 373 * later pushes it back to VM). 374 */ 375 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 376 EVENTHANDLER_PRI_FIRST); 377 } 378 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 379 380 /* 381 * UMA backend page allocator for the jumbo frame zones. 382 * 383 * Allocates kernel virtual memory that is backed by contiguous physical 384 * pages. 385 */ 386 static void * 387 mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) 388 { 389 390 /* Inform UMA that this allocator uses kernel_map/object. */ 391 *flags = UMA_SLAB_KERNEL; 392 return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 393 (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 394 } 395 396 /* 397 * Constructor for Mbuf master zone. 398 * 399 * The 'arg' pointer points to a mb_args structure which 400 * contains call-specific information required to support the 401 * mbuf allocation API. See mbuf.h. 402 */ 403 static int 404 mb_ctor_mbuf(void *mem, int size, void *arg, int how) 405 { 406 struct mbuf *m; 407 struct mb_args *args; 408 int error; 409 int flags; 410 short type; 411 412 #ifdef INVARIANTS 413 trash_ctor(mem, size, arg, how); 414 #endif 415 args = (struct mb_args *)arg; 416 type = args->type; 417 418 /* 419 * The mbuf is initialized later. The caller has the 420 * responsibility to set up any MAC labels too. 421 */ 422 if (type == MT_NOINIT) 423 return (0); 424 425 m = (struct mbuf *)mem; 426 flags = args->flags; 427 MPASS((flags & M_NOFREE) == 0); 428 429 error = m_init(m, how, type, flags); 430 431 return (error); 432 } 433 434 /* 435 * The Mbuf master zone destructor. 436 */ 437 static void 438 mb_dtor_mbuf(void *mem, int size, void *arg) 439 { 440 struct mbuf *m; 441 unsigned long flags; 442 443 m = (struct mbuf *)mem; 444 flags = (unsigned long)arg; 445 446 KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 447 if (!(flags & MB_DTOR_SKIP) && (m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 448 m_tag_delete_chain(m, NULL); 449 #ifdef INVARIANTS 450 trash_dtor(mem, size, arg); 451 #endif 452 } 453 454 /* 455 * The Mbuf Packet zone destructor. 456 */ 457 static void 458 mb_dtor_pack(void *mem, int size, void *arg) 459 { 460 struct mbuf *m; 461 462 m = (struct mbuf *)mem; 463 if ((m->m_flags & M_PKTHDR) != 0) 464 m_tag_delete_chain(m, NULL); 465 466 /* Make sure we've got a clean cluster back. */ 467 KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 468 KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 469 KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 470 KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 471 KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 472 KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 473 KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 474 #ifdef INVARIANTS 475 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 476 #endif 477 /* 478 * If there are processes blocked on zone_clust, waiting for pages 479 * to be freed up, * cause them to be woken up by draining the 480 * packet zone. We are exposed to a race here * (in the check for 481 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 482 * is deliberate. We don't want to acquire the zone lock for every 483 * mbuf free. 484 */ 485 if (uma_zone_exhausted_nolock(zone_clust)) 486 zone_drain(zone_pack); 487 } 488 489 /* 490 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 491 * 492 * Here the 'arg' pointer points to the Mbuf which we 493 * are configuring cluster storage for. If 'arg' is 494 * empty we allocate just the cluster without setting 495 * the mbuf to it. See mbuf.h. 496 */ 497 static int 498 mb_ctor_clust(void *mem, int size, void *arg, int how) 499 { 500 struct mbuf *m; 501 502 #ifdef INVARIANTS 503 trash_ctor(mem, size, arg, how); 504 #endif 505 m = (struct mbuf *)arg; 506 if (m != NULL) { 507 m->m_ext.ext_buf = (caddr_t)mem; 508 m->m_data = m->m_ext.ext_buf; 509 m->m_flags |= M_EXT; 510 m->m_ext.ext_free = NULL; 511 m->m_ext.ext_arg1 = NULL; 512 m->m_ext.ext_arg2 = NULL; 513 m->m_ext.ext_size = size; 514 m->m_ext.ext_type = m_gettype(size); 515 m->m_ext.ext_flags = EXT_FLAG_EMBREF; 516 m->m_ext.ext_count = 1; 517 } 518 519 return (0); 520 } 521 522 /* 523 * The Packet secondary zone's init routine, executed on the 524 * object's transition from mbuf keg slab to zone cache. 525 */ 526 static int 527 mb_zinit_pack(void *mem, int size, int how) 528 { 529 struct mbuf *m; 530 531 m = (struct mbuf *)mem; /* m is virgin. */ 532 if (uma_zalloc_arg(zone_clust, m, how) == NULL || 533 m->m_ext.ext_buf == NULL) 534 return (ENOMEM); 535 m->m_ext.ext_type = EXT_PACKET; /* Override. */ 536 #ifdef INVARIANTS 537 trash_init(m->m_ext.ext_buf, MCLBYTES, how); 538 #endif 539 return (0); 540 } 541 542 /* 543 * The Packet secondary zone's fini routine, executed on the 544 * object's transition from zone cache to keg slab. 545 */ 546 static void 547 mb_zfini_pack(void *mem, int size) 548 { 549 struct mbuf *m; 550 551 m = (struct mbuf *)mem; 552 #ifdef INVARIANTS 553 trash_fini(m->m_ext.ext_buf, MCLBYTES); 554 #endif 555 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 556 #ifdef INVARIANTS 557 trash_dtor(mem, size, NULL); 558 #endif 559 } 560 561 /* 562 * The "packet" keg constructor. 563 */ 564 static int 565 mb_ctor_pack(void *mem, int size, void *arg, int how) 566 { 567 struct mbuf *m; 568 struct mb_args *args; 569 int error, flags; 570 short type; 571 572 m = (struct mbuf *)mem; 573 args = (struct mb_args *)arg; 574 flags = args->flags; 575 type = args->type; 576 MPASS((flags & M_NOFREE) == 0); 577 578 #ifdef INVARIANTS 579 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 580 #endif 581 582 error = m_init(m, how, type, flags); 583 584 /* m_ext is already initialized. */ 585 m->m_data = m->m_ext.ext_buf; 586 m->m_flags = (flags | M_EXT); 587 588 return (error); 589 } 590 591 /* 592 * This is the protocol drain routine. Called by UMA whenever any of the 593 * mbuf zones is closed to its limit. 594 * 595 * No locks should be held when this is called. The drain routines have to 596 * presently acquire some locks which raises the possibility of lock order 597 * reversal. 598 */ 599 static void 600 mb_reclaim(uma_zone_t zone __unused, int pending __unused) 601 { 602 struct domain *dp; 603 struct protosw *pr; 604 605 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); 606 607 for (dp = domains; dp != NULL; dp = dp->dom_next) 608 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 609 if (pr->pr_drain != NULL) 610 (*pr->pr_drain)(); 611 } 612 613 /* 614 * Clean up after mbufs with M_EXT storage attached to them if the 615 * reference count hits 1. 616 */ 617 void 618 mb_free_ext(struct mbuf *m) 619 { 620 volatile u_int *refcnt; 621 struct mbuf *mref; 622 int freembuf; 623 624 KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 625 626 /* See if this is the mbuf that holds the embedded refcount. */ 627 if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 628 refcnt = &m->m_ext.ext_count; 629 mref = m; 630 } else { 631 KASSERT(m->m_ext.ext_cnt != NULL, 632 ("%s: no refcounting pointer on %p", __func__, m)); 633 refcnt = m->m_ext.ext_cnt; 634 mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 635 } 636 637 /* 638 * Check if the header is embedded in the cluster. It is 639 * important that we can't touch any of the mbuf fields 640 * after we have freed the external storage, since mbuf 641 * could have been embedded in it. For now, the mbufs 642 * embedded into the cluster are always of type EXT_EXTREF, 643 * and for this type we won't free the mref. 644 */ 645 if (m->m_flags & M_NOFREE) { 646 freembuf = 0; 647 KASSERT(m->m_ext.ext_type == EXT_EXTREF, 648 ("%s: no-free mbuf %p has wrong type", __func__, m)); 649 } else 650 freembuf = 1; 651 652 /* Free attached storage if this mbuf is the only reference to it. */ 653 if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { 654 switch (m->m_ext.ext_type) { 655 case EXT_PACKET: 656 /* The packet zone is special. */ 657 if (*refcnt == 0) 658 *refcnt = 1; 659 uma_zfree(zone_pack, mref); 660 break; 661 case EXT_CLUSTER: 662 uma_zfree(zone_clust, m->m_ext.ext_buf); 663 uma_zfree(zone_mbuf, mref); 664 break; 665 case EXT_JUMBOP: 666 uma_zfree(zone_jumbop, m->m_ext.ext_buf); 667 uma_zfree(zone_mbuf, mref); 668 break; 669 case EXT_JUMBO9: 670 uma_zfree(zone_jumbo9, m->m_ext.ext_buf); 671 uma_zfree(zone_mbuf, mref); 672 break; 673 case EXT_JUMBO16: 674 uma_zfree(zone_jumbo16, m->m_ext.ext_buf); 675 uma_zfree(zone_mbuf, mref); 676 break; 677 case EXT_SFBUF: 678 sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); 679 uma_zfree(zone_mbuf, mref); 680 break; 681 case EXT_SFBUF_NOCACHE: 682 sf_ext_free_nocache(m->m_ext.ext_arg1, 683 m->m_ext.ext_arg2); 684 uma_zfree(zone_mbuf, mref); 685 break; 686 case EXT_NET_DRV: 687 case EXT_MOD_TYPE: 688 case EXT_DISPOSABLE: 689 KASSERT(m->m_ext.ext_free != NULL, 690 ("%s: ext_free not set", __func__)); 691 (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, 692 m->m_ext.ext_arg2); 693 uma_zfree(zone_mbuf, mref); 694 break; 695 case EXT_EXTREF: 696 KASSERT(m->m_ext.ext_free != NULL, 697 ("%s: ext_free not set", __func__)); 698 (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, 699 m->m_ext.ext_arg2); 700 break; 701 default: 702 KASSERT(m->m_ext.ext_type == 0, 703 ("%s: unknown ext_type", __func__)); 704 } 705 } 706 707 if (freembuf && m != mref) 708 uma_zfree(zone_mbuf, m); 709 } 710 711 /* 712 * Official mbuf(9) allocation KPI for stack and drivers: 713 * 714 * m_get() - a single mbuf without any attachments, sys/mbuf.h. 715 * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. 716 * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. 717 * m_clget() - attach cluster to already allocated mbuf. 718 * m_cljget() - attach jumbo cluster to already allocated mbuf. 719 * m_get2() - allocate minimum mbuf that would fit size argument. 720 * m_getm2() - allocate a chain of mbufs/clusters. 721 * m_extadd() - attach external cluster to mbuf. 722 * 723 * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. 724 * m_freem() - free chain of mbufs. 725 */ 726 727 int 728 m_clget(struct mbuf *m, int how) 729 { 730 731 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 732 __func__, m)); 733 m->m_ext.ext_buf = (char *)NULL; 734 uma_zalloc_arg(zone_clust, m, how); 735 /* 736 * On a cluster allocation failure, drain the packet zone and retry, 737 * we might be able to loosen a few clusters up on the drain. 738 */ 739 if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { 740 zone_drain(zone_pack); 741 uma_zalloc_arg(zone_clust, m, how); 742 } 743 MBUF_PROBE2(m__clget, m, how); 744 return (m->m_flags & M_EXT); 745 } 746 747 /* 748 * m_cljget() is different from m_clget() as it can allocate clusters without 749 * attaching them to an mbuf. In that case the return value is the pointer 750 * to the cluster of the requested size. If an mbuf was specified, it gets 751 * the cluster attached to it and the return value can be safely ignored. 752 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 753 */ 754 void * 755 m_cljget(struct mbuf *m, int how, int size) 756 { 757 uma_zone_t zone; 758 void *retval; 759 760 if (m != NULL) { 761 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 762 __func__, m)); 763 m->m_ext.ext_buf = NULL; 764 } 765 766 zone = m_getzone(size); 767 retval = uma_zalloc_arg(zone, m, how); 768 769 MBUF_PROBE4(m__cljget, m, how, size, retval); 770 771 return (retval); 772 } 773 774 /* 775 * m_get2() allocates minimum mbuf that would fit "size" argument. 776 */ 777 struct mbuf * 778 m_get2(int size, int how, short type, int flags) 779 { 780 struct mb_args args; 781 struct mbuf *m, *n; 782 783 args.flags = flags; 784 args.type = type; 785 786 if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) 787 return (uma_zalloc_arg(zone_mbuf, &args, how)); 788 if (size <= MCLBYTES) 789 return (uma_zalloc_arg(zone_pack, &args, how)); 790 791 if (size > MJUMPAGESIZE) 792 return (NULL); 793 794 m = uma_zalloc_arg(zone_mbuf, &args, how); 795 if (m == NULL) 796 return (NULL); 797 798 n = uma_zalloc_arg(zone_jumbop, m, how); 799 if (n == NULL) { 800 uma_zfree(zone_mbuf, m); 801 return (NULL); 802 } 803 804 return (m); 805 } 806 807 /* 808 * m_getjcl() returns an mbuf with a cluster of the specified size attached. 809 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 810 */ 811 struct mbuf * 812 m_getjcl(int how, short type, int flags, int size) 813 { 814 struct mb_args args; 815 struct mbuf *m, *n; 816 uma_zone_t zone; 817 818 if (size == MCLBYTES) 819 return m_getcl(how, type, flags); 820 821 args.flags = flags; 822 args.type = type; 823 824 m = uma_zalloc_arg(zone_mbuf, &args, how); 825 if (m == NULL) 826 return (NULL); 827 828 zone = m_getzone(size); 829 n = uma_zalloc_arg(zone, m, how); 830 if (n == NULL) { 831 uma_zfree(zone_mbuf, m); 832 return (NULL); 833 } 834 return (m); 835 } 836 837 /* 838 * Allocate a given length worth of mbufs and/or clusters (whatever fits 839 * best) and return a pointer to the top of the allocated chain. If an 840 * existing mbuf chain is provided, then we will append the new chain 841 * to the existing one but still return the top of the newly allocated 842 * chain. 843 */ 844 struct mbuf * 845 m_getm2(struct mbuf *m, int len, int how, short type, int flags) 846 { 847 struct mbuf *mb, *nm = NULL, *mtail = NULL; 848 849 KASSERT(len >= 0, ("%s: len is < 0", __func__)); 850 851 /* Validate flags. */ 852 flags &= (M_PKTHDR | M_EOR); 853 854 /* Packet header mbuf must be first in chain. */ 855 if ((flags & M_PKTHDR) && m != NULL) 856 flags &= ~M_PKTHDR; 857 858 /* Loop and append maximum sized mbufs to the chain tail. */ 859 while (len > 0) { 860 if (len > MCLBYTES) 861 mb = m_getjcl(how, type, (flags & M_PKTHDR), 862 MJUMPAGESIZE); 863 else if (len >= MINCLSIZE) 864 mb = m_getcl(how, type, (flags & M_PKTHDR)); 865 else if (flags & M_PKTHDR) 866 mb = m_gethdr(how, type); 867 else 868 mb = m_get(how, type); 869 870 /* Fail the whole operation if one mbuf can't be allocated. */ 871 if (mb == NULL) { 872 if (nm != NULL) 873 m_freem(nm); 874 return (NULL); 875 } 876 877 /* Book keeping. */ 878 len -= M_SIZE(mb); 879 if (mtail != NULL) 880 mtail->m_next = mb; 881 else 882 nm = mb; 883 mtail = mb; 884 flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 885 } 886 if (flags & M_EOR) 887 mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ 888 889 /* If mbuf was supplied, append new chain to the end of it. */ 890 if (m != NULL) { 891 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) 892 ; 893 mtail->m_next = nm; 894 mtail->m_flags &= ~M_EOR; 895 } else 896 m = nm; 897 898 return (m); 899 } 900 901 /*- 902 * Configure a provided mbuf to refer to the provided external storage 903 * buffer and setup a reference count for said buffer. 904 * 905 * Arguments: 906 * mb The existing mbuf to which to attach the provided buffer. 907 * buf The address of the provided external storage buffer. 908 * size The size of the provided buffer. 909 * freef A pointer to a routine that is responsible for freeing the 910 * provided external storage buffer. 911 * args A pointer to an argument structure (of any type) to be passed 912 * to the provided freef routine (may be NULL). 913 * flags Any other flags to be passed to the provided mbuf. 914 * type The type that the external storage buffer should be 915 * labeled with. 916 * 917 * Returns: 918 * Nothing. 919 */ 920 void 921 m_extadd(struct mbuf *mb, caddr_t buf, u_int size, 922 void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2, 923 int flags, int type) 924 { 925 926 KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); 927 928 mb->m_flags |= (M_EXT | flags); 929 mb->m_ext.ext_buf = buf; 930 mb->m_data = mb->m_ext.ext_buf; 931 mb->m_ext.ext_size = size; 932 mb->m_ext.ext_free = freef; 933 mb->m_ext.ext_arg1 = arg1; 934 mb->m_ext.ext_arg2 = arg2; 935 mb->m_ext.ext_type = type; 936 937 if (type != EXT_EXTREF) { 938 mb->m_ext.ext_count = 1; 939 mb->m_ext.ext_flags = EXT_FLAG_EMBREF; 940 } else 941 mb->m_ext.ext_flags = 0; 942 } 943 944 /* 945 * Free an entire chain of mbufs and associated external buffers, if 946 * applicable. 947 */ 948 void 949 m_freem(struct mbuf *mb) 950 { 951 952 MBUF_PROBE1(m__freem, mb); 953 while (mb != NULL) 954 mb = m_free(mb); 955 } 956