1 /*- 2 * Copyright (c) 2004, 2005, 3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_param.h" 32 33 #include <sys/param.h> 34 #include <sys/malloc.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/domain.h> 39 #include <sys/eventhandler.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/protosw.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 47 #include <vm/vm.h> 48 #include <vm/vm_extern.h> 49 #include <vm/vm_kern.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_map.h> 52 #include <vm/uma.h> 53 #include <vm/uma_dbg.h> 54 55 /* 56 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 57 * Zones. 58 * 59 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 60 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 61 * administrator so desires. 62 * 63 * Mbufs are allocated from a UMA Master Zone called the Mbuf 64 * Zone. 65 * 66 * Additionally, FreeBSD provides a Packet Zone, which it 67 * configures as a Secondary Zone to the Mbuf Master Zone, 68 * thus sharing backend Slab kegs with the Mbuf Master Zone. 69 * 70 * Thus common-case allocations and locking are simplified: 71 * 72 * m_clget() m_getcl() 73 * | | 74 * | .------------>[(Packet Cache)] m_get(), m_gethdr() 75 * | | [ Packet ] | 76 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 77 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 78 * | \________ | 79 * [ Cluster Keg ] \ / 80 * | [ Mbuf Keg ] 81 * [ Cluster Slabs ] | 82 * | [ Mbuf Slabs ] 83 * \____________(VM)_________________/ 84 * 85 * 86 * Whenever an object is allocated with uma_zalloc() out of 87 * one of the Zones its _ctor_ function is executed. The same 88 * for any deallocation through uma_zfree() the _dtor_ function 89 * is executed. 90 * 91 * Caches are per-CPU and are filled from the Master Zone. 92 * 93 * Whenever an object is allocated from the underlying global 94 * memory pool it gets pre-initialized with the _zinit_ functions. 95 * When the Keg's are overfull objects get decomissioned with 96 * _zfini_ functions and free'd back to the global memory pool. 97 * 98 */ 99 100 int nmbufs; /* limits number of mbufs */ 101 int nmbclusters; /* limits number of mbuf clusters */ 102 int nmbjumbop; /* limits number of page size jumbo clusters */ 103 int nmbjumbo9; /* limits number of 9k jumbo clusters */ 104 int nmbjumbo16; /* limits number of 16k jumbo clusters */ 105 106 static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 107 108 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, 109 "Maximum real memory allocatable to various mbuf types"); 110 111 /* 112 * tunable_mbinit() has to be run before any mbuf allocations are done. 113 */ 114 static void 115 tunable_mbinit(void *dummy) 116 { 117 quad_t realmem; 118 119 /* 120 * The default limit for all mbuf related memory is 1/2 of all 121 * available kernel memory (physical or kmem). 122 * At most it can be 3/4 of available kernel memory. 123 */ 124 realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 125 maxmbufmem = realmem / 2; 126 TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 127 if (maxmbufmem > realmem / 4 * 3) 128 maxmbufmem = realmem / 4 * 3; 129 130 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 131 if (nmbclusters == 0) 132 nmbclusters = maxmbufmem / MCLBYTES / 4; 133 134 TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 135 if (nmbjumbop == 0) 136 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 137 138 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 139 if (nmbjumbo9 == 0) 140 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 141 142 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 143 if (nmbjumbo16 == 0) 144 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 145 146 /* 147 * We need at least as many mbufs as we have clusters of 148 * the various types added together. 149 */ 150 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 151 if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 152 nmbufs = lmax(maxmbufmem / MSIZE / 5, 153 nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 154 } 155 SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 156 157 static int 158 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 159 { 160 int error, newnmbclusters; 161 162 newnmbclusters = nmbclusters; 163 error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 164 if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { 165 if (newnmbclusters > nmbclusters && 166 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 167 nmbclusters = newnmbclusters; 168 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 169 EVENTHANDLER_INVOKE(nmbclusters_change); 170 } else 171 error = EINVAL; 172 } 173 return (error); 174 } 175 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 176 &nmbclusters, 0, sysctl_nmbclusters, "IU", 177 "Maximum number of mbuf clusters allowed"); 178 179 static int 180 sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 181 { 182 int error, newnmbjumbop; 183 184 newnmbjumbop = nmbjumbop; 185 error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 186 if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { 187 if (newnmbjumbop > nmbjumbop && 188 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 189 nmbjumbop = newnmbjumbop; 190 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 191 } else 192 error = EINVAL; 193 } 194 return (error); 195 } 196 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 197 &nmbjumbop, 0, sysctl_nmbjumbop, "IU", 198 "Maximum number of mbuf page size jumbo clusters allowed"); 199 200 static int 201 sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 202 { 203 int error, newnmbjumbo9; 204 205 newnmbjumbo9 = nmbjumbo9; 206 error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 207 if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { 208 if (newnmbjumbo9 > nmbjumbo9 && 209 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 210 nmbjumbo9 = newnmbjumbo9; 211 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 212 } else 213 error = EINVAL; 214 } 215 return (error); 216 } 217 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 218 &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 219 "Maximum number of mbuf 9k jumbo clusters allowed"); 220 221 static int 222 sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 223 { 224 int error, newnmbjumbo16; 225 226 newnmbjumbo16 = nmbjumbo16; 227 error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 228 if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { 229 if (newnmbjumbo16 > nmbjumbo16 && 230 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 231 nmbjumbo16 = newnmbjumbo16; 232 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 233 } else 234 error = EINVAL; 235 } 236 return (error); 237 } 238 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 239 &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 240 "Maximum number of mbuf 16k jumbo clusters allowed"); 241 242 static int 243 sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 244 { 245 int error, newnmbufs; 246 247 newnmbufs = nmbufs; 248 error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 249 if (error == 0 && req->newptr && newnmbufs != nmbufs) { 250 if (newnmbufs > nmbufs) { 251 nmbufs = newnmbufs; 252 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 253 EVENTHANDLER_INVOKE(nmbufs_change); 254 } else 255 error = EINVAL; 256 } 257 return (error); 258 } 259 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 260 &nmbufs, 0, sysctl_nmbufs, "IU", 261 "Maximum number of mbufs allowed"); 262 263 /* 264 * Zones from which we allocate. 265 */ 266 uma_zone_t zone_mbuf; 267 uma_zone_t zone_clust; 268 uma_zone_t zone_pack; 269 uma_zone_t zone_jumbop; 270 uma_zone_t zone_jumbo9; 271 uma_zone_t zone_jumbo16; 272 273 /* 274 * Local prototypes. 275 */ 276 static int mb_ctor_mbuf(void *, int, void *, int); 277 static int mb_ctor_clust(void *, int, void *, int); 278 static int mb_ctor_pack(void *, int, void *, int); 279 static void mb_dtor_mbuf(void *, int, void *); 280 static void mb_dtor_pack(void *, int, void *); 281 static int mb_zinit_pack(void *, int, int); 282 static void mb_zfini_pack(void *, int); 283 static void mb_reclaim(uma_zone_t, int); 284 static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); 285 286 /* Ensure that MSIZE is a power of 2. */ 287 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 288 289 /* 290 * Initialize FreeBSD Network buffer allocation. 291 */ 292 static void 293 mbuf_init(void *dummy) 294 { 295 296 /* 297 * Configure UMA zones for Mbufs, Clusters, and Packets. 298 */ 299 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 300 mb_ctor_mbuf, mb_dtor_mbuf, 301 #ifdef INVARIANTS 302 trash_init, trash_fini, 303 #else 304 NULL, NULL, 305 #endif 306 MSIZE - 1, UMA_ZONE_MAXBUCKET); 307 if (nmbufs > 0) 308 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 309 uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 310 uma_zone_set_maxaction(zone_mbuf, mb_reclaim); 311 312 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 313 mb_ctor_clust, 314 #ifdef INVARIANTS 315 trash_dtor, trash_init, trash_fini, 316 #else 317 NULL, NULL, NULL, 318 #endif 319 UMA_ALIGN_PTR, 0); 320 if (nmbclusters > 0) 321 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 322 uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 323 uma_zone_set_maxaction(zone_clust, mb_reclaim); 324 325 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 326 mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 327 328 /* Make jumbo frame zone too. Page size, 9k and 16k. */ 329 zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 330 mb_ctor_clust, 331 #ifdef INVARIANTS 332 trash_dtor, trash_init, trash_fini, 333 #else 334 NULL, NULL, NULL, 335 #endif 336 UMA_ALIGN_PTR, 0); 337 if (nmbjumbop > 0) 338 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 339 uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 340 uma_zone_set_maxaction(zone_jumbop, mb_reclaim); 341 342 zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 343 mb_ctor_clust, 344 #ifdef INVARIANTS 345 trash_dtor, trash_init, trash_fini, 346 #else 347 NULL, NULL, NULL, 348 #endif 349 UMA_ALIGN_PTR, 0); 350 uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 351 if (nmbjumbo9 > 0) 352 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 353 uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 354 uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); 355 356 zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 357 mb_ctor_clust, 358 #ifdef INVARIANTS 359 trash_dtor, trash_init, trash_fini, 360 #else 361 NULL, NULL, NULL, 362 #endif 363 UMA_ALIGN_PTR, 0); 364 uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 365 if (nmbjumbo16 > 0) 366 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 367 uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 368 uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); 369 370 /* 371 * Hook event handler for low-memory situation, used to 372 * drain protocols and push data back to the caches (UMA 373 * later pushes it back to VM). 374 */ 375 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 376 EVENTHANDLER_PRI_FIRST); 377 } 378 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 379 380 /* 381 * UMA backend page allocator for the jumbo frame zones. 382 * 383 * Allocates kernel virtual memory that is backed by contiguous physical 384 * pages. 385 */ 386 static void * 387 mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) 388 { 389 390 /* Inform UMA that this allocator uses kernel_map/object. */ 391 *flags = UMA_SLAB_KERNEL; 392 return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 393 (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 394 } 395 396 /* 397 * Constructor for Mbuf master zone. 398 * 399 * The 'arg' pointer points to a mb_args structure which 400 * contains call-specific information required to support the 401 * mbuf allocation API. See mbuf.h. 402 */ 403 static int 404 mb_ctor_mbuf(void *mem, int size, void *arg, int how) 405 { 406 struct mbuf *m; 407 struct mb_args *args; 408 int error; 409 int flags; 410 short type; 411 412 #ifdef INVARIANTS 413 trash_ctor(mem, size, arg, how); 414 #endif 415 args = (struct mb_args *)arg; 416 type = args->type; 417 418 /* 419 * The mbuf is initialized later. The caller has the 420 * responsibility to set up any MAC labels too. 421 */ 422 if (type == MT_NOINIT) 423 return (0); 424 425 m = (struct mbuf *)mem; 426 flags = args->flags; 427 428 error = m_init(m, how, type, flags); 429 430 return (error); 431 } 432 433 /* 434 * The Mbuf master zone destructor. 435 */ 436 static void 437 mb_dtor_mbuf(void *mem, int size, void *arg) 438 { 439 struct mbuf *m; 440 unsigned long flags; 441 442 m = (struct mbuf *)mem; 443 flags = (unsigned long)arg; 444 445 KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 446 if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 447 m_tag_delete_chain(m, NULL); 448 #ifdef INVARIANTS 449 trash_dtor(mem, size, arg); 450 #endif 451 } 452 453 /* 454 * The Mbuf Packet zone destructor. 455 */ 456 static void 457 mb_dtor_pack(void *mem, int size, void *arg) 458 { 459 struct mbuf *m; 460 461 m = (struct mbuf *)mem; 462 if ((m->m_flags & M_PKTHDR) != 0) 463 m_tag_delete_chain(m, NULL); 464 465 /* Make sure we've got a clean cluster back. */ 466 KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 467 KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 468 KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 469 KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 470 KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 471 KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 472 KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 473 #ifdef INVARIANTS 474 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 475 #endif 476 /* 477 * If there are processes blocked on zone_clust, waiting for pages 478 * to be freed up, * cause them to be woken up by draining the 479 * packet zone. We are exposed to a race here * (in the check for 480 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 481 * is deliberate. We don't want to acquire the zone lock for every 482 * mbuf free. 483 */ 484 if (uma_zone_exhausted_nolock(zone_clust)) 485 zone_drain(zone_pack); 486 } 487 488 /* 489 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 490 * 491 * Here the 'arg' pointer points to the Mbuf which we 492 * are configuring cluster storage for. If 'arg' is 493 * empty we allocate just the cluster without setting 494 * the mbuf to it. See mbuf.h. 495 */ 496 static int 497 mb_ctor_clust(void *mem, int size, void *arg, int how) 498 { 499 struct mbuf *m; 500 501 #ifdef INVARIANTS 502 trash_ctor(mem, size, arg, how); 503 #endif 504 m = (struct mbuf *)arg; 505 if (m != NULL) { 506 m->m_ext.ext_buf = (caddr_t)mem; 507 m->m_data = m->m_ext.ext_buf; 508 m->m_flags |= M_EXT; 509 m->m_ext.ext_free = NULL; 510 m->m_ext.ext_arg1 = NULL; 511 m->m_ext.ext_arg2 = NULL; 512 m->m_ext.ext_size = size; 513 m->m_ext.ext_type = m_gettype(size); 514 m->m_ext.ext_flags = EXT_FLAG_EMBREF; 515 m->m_ext.ext_count = 1; 516 } 517 518 return (0); 519 } 520 521 /* 522 * The Packet secondary zone's init routine, executed on the 523 * object's transition from mbuf keg slab to zone cache. 524 */ 525 static int 526 mb_zinit_pack(void *mem, int size, int how) 527 { 528 struct mbuf *m; 529 530 m = (struct mbuf *)mem; /* m is virgin. */ 531 if (uma_zalloc_arg(zone_clust, m, how) == NULL || 532 m->m_ext.ext_buf == NULL) 533 return (ENOMEM); 534 m->m_ext.ext_type = EXT_PACKET; /* Override. */ 535 #ifdef INVARIANTS 536 trash_init(m->m_ext.ext_buf, MCLBYTES, how); 537 #endif 538 return (0); 539 } 540 541 /* 542 * The Packet secondary zone's fini routine, executed on the 543 * object's transition from zone cache to keg slab. 544 */ 545 static void 546 mb_zfini_pack(void *mem, int size) 547 { 548 struct mbuf *m; 549 550 m = (struct mbuf *)mem; 551 #ifdef INVARIANTS 552 trash_fini(m->m_ext.ext_buf, MCLBYTES); 553 #endif 554 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 555 #ifdef INVARIANTS 556 trash_dtor(mem, size, NULL); 557 #endif 558 } 559 560 /* 561 * The "packet" keg constructor. 562 */ 563 static int 564 mb_ctor_pack(void *mem, int size, void *arg, int how) 565 { 566 struct mbuf *m; 567 struct mb_args *args; 568 int error, flags; 569 short type; 570 571 m = (struct mbuf *)mem; 572 args = (struct mb_args *)arg; 573 flags = args->flags; 574 type = args->type; 575 576 #ifdef INVARIANTS 577 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 578 #endif 579 580 error = m_init(m, how, type, flags); 581 582 /* m_ext is already initialized. */ 583 m->m_data = m->m_ext.ext_buf; 584 m->m_flags = (flags | M_EXT); 585 586 return (error); 587 } 588 589 /* 590 * This is the protocol drain routine. Called by UMA whenever any of the 591 * mbuf zones is closed to its limit. 592 * 593 * No locks should be held when this is called. The drain routines have to 594 * presently acquire some locks which raises the possibility of lock order 595 * reversal. 596 */ 597 static void 598 mb_reclaim(uma_zone_t zone __unused, int pending __unused) 599 { 600 struct domain *dp; 601 struct protosw *pr; 602 603 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); 604 605 for (dp = domains; dp != NULL; dp = dp->dom_next) 606 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 607 if (pr->pr_drain != NULL) 608 (*pr->pr_drain)(); 609 } 610 611 /* 612 * Clean up after mbufs with M_EXT storage attached to them if the 613 * reference count hits 1. 614 */ 615 void 616 mb_free_ext(struct mbuf *m) 617 { 618 volatile u_int *refcnt; 619 struct mbuf *mref; 620 int freembuf; 621 622 KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 623 624 /* See if this is the mbuf that holds the embedded refcount. */ 625 if (m->m_ext.ext_flags & EXT_FLAG_EMBREF) { 626 refcnt = &m->m_ext.ext_count; 627 mref = m; 628 } else { 629 KASSERT(m->m_ext.ext_cnt != NULL, 630 ("%s: no refcounting pointer on %p", __func__, m)); 631 refcnt = m->m_ext.ext_cnt; 632 mref = __containerof(refcnt, struct mbuf, m_ext.ext_count); 633 } 634 635 /* 636 * Check if the header is embedded in the cluster. It is 637 * important that we can't touch any of the mbuf fields 638 * after we have freed the external storage, since mbuf 639 * could have been embedded in it. 640 */ 641 freembuf = (m->m_flags & M_NOFREE) ? 0 : 1; 642 643 /* Free attached storage if this mbuf is the only reference to it. */ 644 if (*refcnt == 1 || atomic_fetchadd_int(refcnt, -1) == 1) { 645 switch (m->m_ext.ext_type) { 646 case EXT_PACKET: 647 /* The packet zone is special. */ 648 if (*refcnt == 0) 649 *refcnt = 1; 650 uma_zfree(zone_pack, mref); 651 break; 652 case EXT_CLUSTER: 653 uma_zfree(zone_clust, m->m_ext.ext_buf); 654 uma_zfree(zone_mbuf, mref); 655 break; 656 case EXT_JUMBOP: 657 uma_zfree(zone_jumbop, m->m_ext.ext_buf); 658 uma_zfree(zone_mbuf, mref); 659 break; 660 case EXT_JUMBO9: 661 uma_zfree(zone_jumbo9, m->m_ext.ext_buf); 662 uma_zfree(zone_mbuf, mref); 663 break; 664 case EXT_JUMBO16: 665 uma_zfree(zone_jumbo16, m->m_ext.ext_buf); 666 uma_zfree(zone_mbuf, mref); 667 break; 668 case EXT_SFBUF: 669 sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); 670 uma_zfree(zone_mbuf, mref); 671 break; 672 case EXT_SFBUF_NOCACHE: 673 sf_ext_free_nocache(m->m_ext.ext_arg1, 674 m->m_ext.ext_arg2); 675 uma_zfree(zone_mbuf, mref); 676 break; 677 case EXT_NET_DRV: 678 case EXT_MOD_TYPE: 679 case EXT_DISPOSABLE: 680 KASSERT(m->m_ext.ext_free != NULL, 681 ("%s: ext_free not set", __func__)); 682 (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, 683 m->m_ext.ext_arg2); 684 uma_zfree(zone_mbuf, mref); 685 break; 686 case EXT_EXTREF: 687 KASSERT(m->m_ext.ext_free != NULL, 688 ("%s: ext_free not set", __func__)); 689 (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, 690 m->m_ext.ext_arg2); 691 break; 692 default: 693 KASSERT(m->m_ext.ext_type == 0, 694 ("%s: unknown ext_type", __func__)); 695 } 696 } 697 698 if (freembuf && m != mref) 699 uma_zfree(zone_mbuf, m); 700 } 701 702 /* 703 * Official mbuf(9) allocation KPI for stack and drivers: 704 * 705 * m_get() - a single mbuf without any attachments, sys/mbuf.h. 706 * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. 707 * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. 708 * m_clget() - attach cluster to already allocated mbuf. 709 * m_cljget() - attach jumbo cluster to already allocated mbuf. 710 * m_get2() - allocate minimum mbuf that would fit size argument. 711 * m_getm2() - allocate a chain of mbufs/clusters. 712 * m_extadd() - attach external cluster to mbuf. 713 * 714 * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. 715 * m_freem() - free chain of mbufs. 716 */ 717 718 int 719 m_clget(struct mbuf *m, int how) 720 { 721 722 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 723 __func__, m)); 724 m->m_ext.ext_buf = (char *)NULL; 725 uma_zalloc_arg(zone_clust, m, how); 726 /* 727 * On a cluster allocation failure, drain the packet zone and retry, 728 * we might be able to loosen a few clusters up on the drain. 729 */ 730 if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { 731 zone_drain(zone_pack); 732 uma_zalloc_arg(zone_clust, m, how); 733 } 734 return (m->m_flags & M_EXT); 735 } 736 737 /* 738 * m_cljget() is different from m_clget() as it can allocate clusters without 739 * attaching them to an mbuf. In that case the return value is the pointer 740 * to the cluster of the requested size. If an mbuf was specified, it gets 741 * the cluster attached to it and the return value can be safely ignored. 742 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 743 */ 744 void * 745 m_cljget(struct mbuf *m, int how, int size) 746 { 747 uma_zone_t zone; 748 749 if (m != NULL) { 750 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 751 __func__, m)); 752 m->m_ext.ext_buf = NULL; 753 } 754 755 zone = m_getzone(size); 756 return (uma_zalloc_arg(zone, m, how)); 757 } 758 759 /* 760 * m_get2() allocates minimum mbuf that would fit "size" argument. 761 */ 762 struct mbuf * 763 m_get2(int size, int how, short type, int flags) 764 { 765 struct mb_args args; 766 struct mbuf *m, *n; 767 768 args.flags = flags; 769 args.type = type; 770 771 if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) 772 return (uma_zalloc_arg(zone_mbuf, &args, how)); 773 if (size <= MCLBYTES) 774 return (uma_zalloc_arg(zone_pack, &args, how)); 775 776 if (size > MJUMPAGESIZE) 777 return (NULL); 778 779 m = uma_zalloc_arg(zone_mbuf, &args, how); 780 if (m == NULL) 781 return (NULL); 782 783 n = uma_zalloc_arg(zone_jumbop, m, how); 784 if (n == NULL) { 785 uma_zfree(zone_mbuf, m); 786 return (NULL); 787 } 788 789 return (m); 790 } 791 792 /* 793 * m_getjcl() returns an mbuf with a cluster of the specified size attached. 794 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 795 */ 796 struct mbuf * 797 m_getjcl(int how, short type, int flags, int size) 798 { 799 struct mb_args args; 800 struct mbuf *m, *n; 801 uma_zone_t zone; 802 803 if (size == MCLBYTES) 804 return m_getcl(how, type, flags); 805 806 args.flags = flags; 807 args.type = type; 808 809 m = uma_zalloc_arg(zone_mbuf, &args, how); 810 if (m == NULL) 811 return (NULL); 812 813 zone = m_getzone(size); 814 n = uma_zalloc_arg(zone, m, how); 815 if (n == NULL) { 816 uma_zfree(zone_mbuf, m); 817 return (NULL); 818 } 819 return (m); 820 } 821 822 /* 823 * Allocate a given length worth of mbufs and/or clusters (whatever fits 824 * best) and return a pointer to the top of the allocated chain. If an 825 * existing mbuf chain is provided, then we will append the new chain 826 * to the existing one but still return the top of the newly allocated 827 * chain. 828 */ 829 struct mbuf * 830 m_getm2(struct mbuf *m, int len, int how, short type, int flags) 831 { 832 struct mbuf *mb, *nm = NULL, *mtail = NULL; 833 834 KASSERT(len >= 0, ("%s: len is < 0", __func__)); 835 836 /* Validate flags. */ 837 flags &= (M_PKTHDR | M_EOR); 838 839 /* Packet header mbuf must be first in chain. */ 840 if ((flags & M_PKTHDR) && m != NULL) 841 flags &= ~M_PKTHDR; 842 843 /* Loop and append maximum sized mbufs to the chain tail. */ 844 while (len > 0) { 845 if (len > MCLBYTES) 846 mb = m_getjcl(how, type, (flags & M_PKTHDR), 847 MJUMPAGESIZE); 848 else if (len >= MINCLSIZE) 849 mb = m_getcl(how, type, (flags & M_PKTHDR)); 850 else if (flags & M_PKTHDR) 851 mb = m_gethdr(how, type); 852 else 853 mb = m_get(how, type); 854 855 /* Fail the whole operation if one mbuf can't be allocated. */ 856 if (mb == NULL) { 857 if (nm != NULL) 858 m_freem(nm); 859 return (NULL); 860 } 861 862 /* Book keeping. */ 863 len -= M_SIZE(mb); 864 if (mtail != NULL) 865 mtail->m_next = mb; 866 else 867 nm = mb; 868 mtail = mb; 869 flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 870 } 871 if (flags & M_EOR) 872 mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ 873 874 /* If mbuf was supplied, append new chain to the end of it. */ 875 if (m != NULL) { 876 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) 877 ; 878 mtail->m_next = nm; 879 mtail->m_flags &= ~M_EOR; 880 } else 881 m = nm; 882 883 return (m); 884 } 885 886 /*- 887 * Configure a provided mbuf to refer to the provided external storage 888 * buffer and setup a reference count for said buffer. 889 * 890 * Arguments: 891 * mb The existing mbuf to which to attach the provided buffer. 892 * buf The address of the provided external storage buffer. 893 * size The size of the provided buffer. 894 * freef A pointer to a routine that is responsible for freeing the 895 * provided external storage buffer. 896 * args A pointer to an argument structure (of any type) to be passed 897 * to the provided freef routine (may be NULL). 898 * flags Any other flags to be passed to the provided mbuf. 899 * type The type that the external storage buffer should be 900 * labeled with. 901 * 902 * Returns: 903 * Nothing. 904 */ 905 void 906 m_extadd(struct mbuf *mb, caddr_t buf, u_int size, 907 void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2, 908 int flags, int type) 909 { 910 911 KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); 912 913 mb->m_flags |= (M_EXT | flags); 914 mb->m_ext.ext_buf = buf; 915 mb->m_data = mb->m_ext.ext_buf; 916 mb->m_ext.ext_size = size; 917 mb->m_ext.ext_free = freef; 918 mb->m_ext.ext_arg1 = arg1; 919 mb->m_ext.ext_arg2 = arg2; 920 mb->m_ext.ext_type = type; 921 922 if (type != EXT_EXTREF) { 923 mb->m_ext.ext_count = 1; 924 mb->m_ext.ext_flags = EXT_FLAG_EMBREF; 925 } else 926 mb->m_ext.ext_flags = 0; 927 } 928 929 /* 930 * Free an entire chain of mbufs and associated external buffers, if 931 * applicable. 932 */ 933 void 934 m_freem(struct mbuf *mb) 935 { 936 937 while (mb != NULL) 938 mb = m_free(mb); 939 } 940