1 /*- 2 * Copyright (c) 2004, 2005, 3 * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_param.h" 32 33 #include <sys/param.h> 34 #include <sys/malloc.h> 35 #include <sys/types.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/domain.h> 39 #include <sys/eventhandler.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/mutex.h> 43 #include <sys/protosw.h> 44 #include <sys/smp.h> 45 #include <sys/sysctl.h> 46 47 #include <vm/vm.h> 48 #include <vm/vm_extern.h> 49 #include <vm/vm_kern.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_map.h> 52 #include <vm/uma.h> 53 #include <vm/uma_dbg.h> 54 55 /* 56 * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA 57 * Zones. 58 * 59 * Mbuf Clusters (2K, contiguous) are allocated from the Cluster 60 * Zone. The Zone can be capped at kern.ipc.nmbclusters, if the 61 * administrator so desires. 62 * 63 * Mbufs are allocated from a UMA Master Zone called the Mbuf 64 * Zone. 65 * 66 * Additionally, FreeBSD provides a Packet Zone, which it 67 * configures as a Secondary Zone to the Mbuf Master Zone, 68 * thus sharing backend Slab kegs with the Mbuf Master Zone. 69 * 70 * Thus common-case allocations and locking are simplified: 71 * 72 * m_clget() m_getcl() 73 * | | 74 * | .------------>[(Packet Cache)] m_get(), m_gethdr() 75 * | | [ Packet ] | 76 * [(Cluster Cache)] [ Secondary ] [ (Mbuf Cache) ] 77 * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] 78 * | \________ | 79 * [ Cluster Keg ] \ / 80 * | [ Mbuf Keg ] 81 * [ Cluster Slabs ] | 82 * | [ Mbuf Slabs ] 83 * \____________(VM)_________________/ 84 * 85 * 86 * Whenever an object is allocated with uma_zalloc() out of 87 * one of the Zones its _ctor_ function is executed. The same 88 * for any deallocation through uma_zfree() the _dtor_ function 89 * is executed. 90 * 91 * Caches are per-CPU and are filled from the Master Zone. 92 * 93 * Whenever an object is allocated from the underlying global 94 * memory pool it gets pre-initialized with the _zinit_ functions. 95 * When the Keg's are overfull objects get decomissioned with 96 * _zfini_ functions and free'd back to the global memory pool. 97 * 98 */ 99 100 int nmbufs; /* limits number of mbufs */ 101 int nmbclusters; /* limits number of mbuf clusters */ 102 int nmbjumbop; /* limits number of page size jumbo clusters */ 103 int nmbjumbo9; /* limits number of 9k jumbo clusters */ 104 int nmbjumbo16; /* limits number of 16k jumbo clusters */ 105 106 static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ 107 108 SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &maxmbufmem, 0, 109 "Maximum real memory allocatable to various mbuf types"); 110 111 /* 112 * tunable_mbinit() has to be run before any mbuf allocations are done. 113 */ 114 static void 115 tunable_mbinit(void *dummy) 116 { 117 quad_t realmem; 118 119 /* 120 * The default limit for all mbuf related memory is 1/2 of all 121 * available kernel memory (physical or kmem). 122 * At most it can be 3/4 of available kernel memory. 123 */ 124 realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_kmem_size); 125 maxmbufmem = realmem / 2; 126 TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); 127 if (maxmbufmem > realmem / 4 * 3) 128 maxmbufmem = realmem / 4 * 3; 129 130 TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); 131 if (nmbclusters == 0) 132 nmbclusters = maxmbufmem / MCLBYTES / 4; 133 134 TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); 135 if (nmbjumbop == 0) 136 nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; 137 138 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); 139 if (nmbjumbo9 == 0) 140 nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; 141 142 TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); 143 if (nmbjumbo16 == 0) 144 nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; 145 146 /* 147 * We need at least as many mbufs as we have clusters of 148 * the various types added together. 149 */ 150 TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); 151 if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 152 nmbufs = lmax(maxmbufmem / MSIZE / 5, 153 nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); 154 } 155 SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); 156 157 static int 158 sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) 159 { 160 int error, newnmbclusters; 161 162 newnmbclusters = nmbclusters; 163 error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); 164 if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { 165 if (newnmbclusters > nmbclusters && 166 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 167 nmbclusters = newnmbclusters; 168 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 169 EVENTHANDLER_INVOKE(nmbclusters_change); 170 } else 171 error = EINVAL; 172 } 173 return (error); 174 } 175 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbclusters, CTLTYPE_INT|CTLFLAG_RW, 176 &nmbclusters, 0, sysctl_nmbclusters, "IU", 177 "Maximum number of mbuf clusters allowed"); 178 179 static int 180 sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) 181 { 182 int error, newnmbjumbop; 183 184 newnmbjumbop = nmbjumbop; 185 error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); 186 if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { 187 if (newnmbjumbop > nmbjumbop && 188 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 189 nmbjumbop = newnmbjumbop; 190 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 191 } else 192 error = EINVAL; 193 } 194 return (error); 195 } 196 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, 197 &nmbjumbop, 0, sysctl_nmbjumbop, "IU", 198 "Maximum number of mbuf page size jumbo clusters allowed"); 199 200 static int 201 sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) 202 { 203 int error, newnmbjumbo9; 204 205 newnmbjumbo9 = nmbjumbo9; 206 error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); 207 if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { 208 if (newnmbjumbo9 > nmbjumbo9 && 209 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 210 nmbjumbo9 = newnmbjumbo9; 211 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 212 } else 213 error = EINVAL; 214 } 215 return (error); 216 } 217 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, 218 &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", 219 "Maximum number of mbuf 9k jumbo clusters allowed"); 220 221 static int 222 sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) 223 { 224 int error, newnmbjumbo16; 225 226 newnmbjumbo16 = nmbjumbo16; 227 error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); 228 if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { 229 if (newnmbjumbo16 > nmbjumbo16 && 230 nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { 231 nmbjumbo16 = newnmbjumbo16; 232 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 233 } else 234 error = EINVAL; 235 } 236 return (error); 237 } 238 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, 239 &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", 240 "Maximum number of mbuf 16k jumbo clusters allowed"); 241 242 static int 243 sysctl_nmbufs(SYSCTL_HANDLER_ARGS) 244 { 245 int error, newnmbufs; 246 247 newnmbufs = nmbufs; 248 error = sysctl_handle_int(oidp, &newnmbufs, 0, req); 249 if (error == 0 && req->newptr && newnmbufs != nmbufs) { 250 if (newnmbufs > nmbufs) { 251 nmbufs = newnmbufs; 252 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 253 EVENTHANDLER_INVOKE(nmbufs_change); 254 } else 255 error = EINVAL; 256 } 257 return (error); 258 } 259 SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, 260 &nmbufs, 0, sysctl_nmbufs, "IU", 261 "Maximum number of mbufs allowed"); 262 263 /* 264 * Zones from which we allocate. 265 */ 266 uma_zone_t zone_mbuf; 267 uma_zone_t zone_clust; 268 uma_zone_t zone_pack; 269 uma_zone_t zone_jumbop; 270 uma_zone_t zone_jumbo9; 271 uma_zone_t zone_jumbo16; 272 uma_zone_t zone_ext_refcnt; 273 274 /* 275 * Local prototypes. 276 */ 277 static int mb_ctor_mbuf(void *, int, void *, int); 278 static int mb_ctor_clust(void *, int, void *, int); 279 static int mb_ctor_pack(void *, int, void *, int); 280 static void mb_dtor_mbuf(void *, int, void *); 281 static void mb_dtor_clust(void *, int, void *); 282 static void mb_dtor_pack(void *, int, void *); 283 static int mb_zinit_pack(void *, int, int); 284 static void mb_zfini_pack(void *, int); 285 static void mb_reclaim(uma_zone_t, int); 286 static void *mbuf_jumbo_alloc(uma_zone_t, vm_size_t, uint8_t *, int); 287 288 /* Ensure that MSIZE is a power of 2. */ 289 CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); 290 291 /* 292 * Initialize FreeBSD Network buffer allocation. 293 */ 294 static void 295 mbuf_init(void *dummy) 296 { 297 298 /* 299 * Configure UMA zones for Mbufs, Clusters, and Packets. 300 */ 301 zone_mbuf = uma_zcreate(MBUF_MEM_NAME, MSIZE, 302 mb_ctor_mbuf, mb_dtor_mbuf, 303 #ifdef INVARIANTS 304 trash_init, trash_fini, 305 #else 306 NULL, NULL, 307 #endif 308 MSIZE - 1, UMA_ZONE_MAXBUCKET); 309 if (nmbufs > 0) 310 nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); 311 uma_zone_set_warning(zone_mbuf, "kern.ipc.nmbufs limit reached"); 312 uma_zone_set_maxaction(zone_mbuf, mb_reclaim); 313 314 zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, 315 mb_ctor_clust, mb_dtor_clust, 316 #ifdef INVARIANTS 317 trash_init, trash_fini, 318 #else 319 NULL, NULL, 320 #endif 321 UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 322 if (nmbclusters > 0) 323 nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); 324 uma_zone_set_warning(zone_clust, "kern.ipc.nmbclusters limit reached"); 325 uma_zone_set_maxaction(zone_clust, mb_reclaim); 326 327 zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, 328 mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); 329 330 /* Make jumbo frame zone too. Page size, 9k and 16k. */ 331 zone_jumbop = uma_zcreate(MBUF_JUMBOP_MEM_NAME, MJUMPAGESIZE, 332 mb_ctor_clust, mb_dtor_clust, 333 #ifdef INVARIANTS 334 trash_init, trash_fini, 335 #else 336 NULL, NULL, 337 #endif 338 UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 339 if (nmbjumbop > 0) 340 nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); 341 uma_zone_set_warning(zone_jumbop, "kern.ipc.nmbjumbop limit reached"); 342 uma_zone_set_maxaction(zone_jumbop, mb_reclaim); 343 344 zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, 345 mb_ctor_clust, mb_dtor_clust, 346 #ifdef INVARIANTS 347 trash_init, trash_fini, 348 #else 349 NULL, NULL, 350 #endif 351 UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 352 uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); 353 if (nmbjumbo9 > 0) 354 nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); 355 uma_zone_set_warning(zone_jumbo9, "kern.ipc.nmbjumbo9 limit reached"); 356 uma_zone_set_maxaction(zone_jumbo9, mb_reclaim); 357 358 zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, 359 mb_ctor_clust, mb_dtor_clust, 360 #ifdef INVARIANTS 361 trash_init, trash_fini, 362 #else 363 NULL, NULL, 364 #endif 365 UMA_ALIGN_PTR, UMA_ZONE_REFCNT); 366 uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); 367 if (nmbjumbo16 > 0) 368 nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); 369 uma_zone_set_warning(zone_jumbo16, "kern.ipc.nmbjumbo16 limit reached"); 370 uma_zone_set_maxaction(zone_jumbo16, mb_reclaim); 371 372 zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), 373 NULL, NULL, 374 NULL, NULL, 375 UMA_ALIGN_PTR, UMA_ZONE_ZINIT); 376 377 /* 378 * Hook event handler for low-memory situation, used to 379 * drain protocols and push data back to the caches (UMA 380 * later pushes it back to VM). 381 */ 382 EVENTHANDLER_REGISTER(vm_lowmem, mb_reclaim, NULL, 383 EVENTHANDLER_PRI_FIRST); 384 } 385 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); 386 387 /* 388 * UMA backend page allocator for the jumbo frame zones. 389 * 390 * Allocates kernel virtual memory that is backed by contiguous physical 391 * pages. 392 */ 393 static void * 394 mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, uint8_t *flags, int wait) 395 { 396 397 /* Inform UMA that this allocator uses kernel_map/object. */ 398 *flags = UMA_SLAB_KERNEL; 399 return ((void *)kmem_alloc_contig(kernel_arena, bytes, wait, 400 (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); 401 } 402 403 /* 404 * Constructor for Mbuf master zone. 405 * 406 * The 'arg' pointer points to a mb_args structure which 407 * contains call-specific information required to support the 408 * mbuf allocation API. See mbuf.h. 409 */ 410 static int 411 mb_ctor_mbuf(void *mem, int size, void *arg, int how) 412 { 413 struct mbuf *m; 414 struct mb_args *args; 415 int error; 416 int flags; 417 short type; 418 419 #ifdef INVARIANTS 420 trash_ctor(mem, size, arg, how); 421 #endif 422 args = (struct mb_args *)arg; 423 type = args->type; 424 425 /* 426 * The mbuf is initialized later. The caller has the 427 * responsibility to set up any MAC labels too. 428 */ 429 if (type == MT_NOINIT) 430 return (0); 431 432 m = (struct mbuf *)mem; 433 flags = args->flags; 434 435 error = m_init(m, how, type, flags); 436 437 return (error); 438 } 439 440 /* 441 * The Mbuf master zone destructor. 442 */ 443 static void 444 mb_dtor_mbuf(void *mem, int size, void *arg) 445 { 446 struct mbuf *m; 447 unsigned long flags; 448 449 m = (struct mbuf *)mem; 450 flags = (unsigned long)arg; 451 452 KASSERT((m->m_flags & M_NOFREE) == 0, ("%s: M_NOFREE set", __func__)); 453 if ((m->m_flags & M_PKTHDR) && !SLIST_EMPTY(&m->m_pkthdr.tags)) 454 m_tag_delete_chain(m, NULL); 455 #ifdef INVARIANTS 456 trash_dtor(mem, size, arg); 457 #endif 458 } 459 460 /* 461 * The Mbuf Packet zone destructor. 462 */ 463 static void 464 mb_dtor_pack(void *mem, int size, void *arg) 465 { 466 struct mbuf *m; 467 468 m = (struct mbuf *)mem; 469 if ((m->m_flags & M_PKTHDR) != 0) 470 m_tag_delete_chain(m, NULL); 471 472 /* Make sure we've got a clean cluster back. */ 473 KASSERT((m->m_flags & M_EXT) == M_EXT, ("%s: M_EXT not set", __func__)); 474 KASSERT(m->m_ext.ext_buf != NULL, ("%s: ext_buf == NULL", __func__)); 475 KASSERT(m->m_ext.ext_free == NULL, ("%s: ext_free != NULL", __func__)); 476 KASSERT(m->m_ext.ext_arg1 == NULL, ("%s: ext_arg1 != NULL", __func__)); 477 KASSERT(m->m_ext.ext_arg2 == NULL, ("%s: ext_arg2 != NULL", __func__)); 478 KASSERT(m->m_ext.ext_size == MCLBYTES, ("%s: ext_size != MCLBYTES", __func__)); 479 KASSERT(m->m_ext.ext_type == EXT_PACKET, ("%s: ext_type != EXT_PACKET", __func__)); 480 KASSERT(*m->m_ext.ext_cnt == 1, ("%s: ext_cnt != 1", __func__)); 481 #ifdef INVARIANTS 482 trash_dtor(m->m_ext.ext_buf, MCLBYTES, arg); 483 #endif 484 /* 485 * If there are processes blocked on zone_clust, waiting for pages 486 * to be freed up, * cause them to be woken up by draining the 487 * packet zone. We are exposed to a race here * (in the check for 488 * the UMA_ZFLAG_FULL) where we might miss the flag set, but that 489 * is deliberate. We don't want to acquire the zone lock for every 490 * mbuf free. 491 */ 492 if (uma_zone_exhausted_nolock(zone_clust)) 493 zone_drain(zone_pack); 494 } 495 496 /* 497 * The Cluster and Jumbo[PAGESIZE|9|16] zone constructor. 498 * 499 * Here the 'arg' pointer points to the Mbuf which we 500 * are configuring cluster storage for. If 'arg' is 501 * empty we allocate just the cluster without setting 502 * the mbuf to it. See mbuf.h. 503 */ 504 static int 505 mb_ctor_clust(void *mem, int size, void *arg, int how) 506 { 507 struct mbuf *m; 508 u_int *refcnt; 509 int type; 510 uma_zone_t zone; 511 512 #ifdef INVARIANTS 513 trash_ctor(mem, size, arg, how); 514 #endif 515 switch (size) { 516 case MCLBYTES: 517 type = EXT_CLUSTER; 518 zone = zone_clust; 519 break; 520 #if MJUMPAGESIZE != MCLBYTES 521 case MJUMPAGESIZE: 522 type = EXT_JUMBOP; 523 zone = zone_jumbop; 524 break; 525 #endif 526 case MJUM9BYTES: 527 type = EXT_JUMBO9; 528 zone = zone_jumbo9; 529 break; 530 case MJUM16BYTES: 531 type = EXT_JUMBO16; 532 zone = zone_jumbo16; 533 break; 534 default: 535 panic("unknown cluster size"); 536 break; 537 } 538 539 m = (struct mbuf *)arg; 540 refcnt = uma_find_refcnt(zone, mem); 541 *refcnt = 1; 542 if (m != NULL) { 543 m->m_ext.ext_buf = (caddr_t)mem; 544 m->m_data = m->m_ext.ext_buf; 545 m->m_flags |= M_EXT; 546 m->m_ext.ext_free = NULL; 547 m->m_ext.ext_arg1 = NULL; 548 m->m_ext.ext_arg2 = NULL; 549 m->m_ext.ext_size = size; 550 m->m_ext.ext_type = type; 551 m->m_ext.ext_flags = 0; 552 m->m_ext.ext_cnt = refcnt; 553 } 554 555 return (0); 556 } 557 558 /* 559 * The Mbuf Cluster zone destructor. 560 */ 561 static void 562 mb_dtor_clust(void *mem, int size, void *arg) 563 { 564 #ifdef INVARIANTS 565 uma_zone_t zone; 566 567 zone = m_getzone(size); 568 KASSERT(*(uma_find_refcnt(zone, mem)) <= 1, 569 ("%s: refcnt incorrect %u", __func__, 570 *(uma_find_refcnt(zone, mem))) ); 571 572 trash_dtor(mem, size, arg); 573 #endif 574 } 575 576 /* 577 * The Packet secondary zone's init routine, executed on the 578 * object's transition from mbuf keg slab to zone cache. 579 */ 580 static int 581 mb_zinit_pack(void *mem, int size, int how) 582 { 583 struct mbuf *m; 584 585 m = (struct mbuf *)mem; /* m is virgin. */ 586 if (uma_zalloc_arg(zone_clust, m, how) == NULL || 587 m->m_ext.ext_buf == NULL) 588 return (ENOMEM); 589 m->m_ext.ext_type = EXT_PACKET; /* Override. */ 590 #ifdef INVARIANTS 591 trash_init(m->m_ext.ext_buf, MCLBYTES, how); 592 #endif 593 return (0); 594 } 595 596 /* 597 * The Packet secondary zone's fini routine, executed on the 598 * object's transition from zone cache to keg slab. 599 */ 600 static void 601 mb_zfini_pack(void *mem, int size) 602 { 603 struct mbuf *m; 604 605 m = (struct mbuf *)mem; 606 #ifdef INVARIANTS 607 trash_fini(m->m_ext.ext_buf, MCLBYTES); 608 #endif 609 uma_zfree_arg(zone_clust, m->m_ext.ext_buf, NULL); 610 #ifdef INVARIANTS 611 trash_dtor(mem, size, NULL); 612 #endif 613 } 614 615 /* 616 * The "packet" keg constructor. 617 */ 618 static int 619 mb_ctor_pack(void *mem, int size, void *arg, int how) 620 { 621 struct mbuf *m; 622 struct mb_args *args; 623 int error, flags; 624 short type; 625 626 m = (struct mbuf *)mem; 627 args = (struct mb_args *)arg; 628 flags = args->flags; 629 type = args->type; 630 631 #ifdef INVARIANTS 632 trash_ctor(m->m_ext.ext_buf, MCLBYTES, arg, how); 633 #endif 634 635 error = m_init(m, how, type, flags); 636 637 /* m_ext is already initialized. */ 638 m->m_data = m->m_ext.ext_buf; 639 m->m_flags = (flags | M_EXT); 640 641 return (error); 642 } 643 644 /* 645 * This is the protocol drain routine. Called by UMA whenever any of the 646 * mbuf zones is closed to its limit. 647 * 648 * No locks should be held when this is called. The drain routines have to 649 * presently acquire some locks which raises the possibility of lock order 650 * reversal. 651 */ 652 static void 653 mb_reclaim(uma_zone_t zone __unused, int pending __unused) 654 { 655 struct domain *dp; 656 struct protosw *pr; 657 658 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK | WARN_PANIC, NULL, __func__); 659 660 for (dp = domains; dp != NULL; dp = dp->dom_next) 661 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 662 if (pr->pr_drain != NULL) 663 (*pr->pr_drain)(); 664 } 665 666 /* 667 * Clean up after mbufs with M_EXT storage attached to them if the 668 * reference count hits 1. 669 */ 670 void 671 mb_free_ext(struct mbuf *m) 672 { 673 int freembuf; 674 675 KASSERT(m->m_flags & M_EXT, ("%s: M_EXT not set on %p", __func__, m)); 676 677 /* 678 * Check if the header is embedded in the cluster. 679 */ 680 freembuf = (m->m_flags & M_NOFREE) ? 0 : 1; 681 682 switch (m->m_ext.ext_type) { 683 case EXT_SFBUF: 684 sf_ext_free(m->m_ext.ext_arg1, m->m_ext.ext_arg2); 685 break; 686 case EXT_SFBUF_NOCACHE: 687 sf_ext_free_nocache(m->m_ext.ext_arg1, m->m_ext.ext_arg2); 688 break; 689 default: 690 KASSERT(m->m_ext.ext_cnt != NULL, 691 ("%s: no refcounting pointer on %p", __func__, m)); 692 /* 693 * Free attached storage if this mbuf is the only 694 * reference to it. 695 */ 696 if (*(m->m_ext.ext_cnt) != 1) { 697 if (atomic_fetchadd_int(m->m_ext.ext_cnt, -1) != 1) 698 break; 699 } 700 701 switch (m->m_ext.ext_type) { 702 case EXT_PACKET: /* The packet zone is special. */ 703 if (*(m->m_ext.ext_cnt) == 0) 704 *(m->m_ext.ext_cnt) = 1; 705 uma_zfree(zone_pack, m); 706 return; /* Job done. */ 707 case EXT_CLUSTER: 708 uma_zfree(zone_clust, m->m_ext.ext_buf); 709 break; 710 case EXT_JUMBOP: 711 uma_zfree(zone_jumbop, m->m_ext.ext_buf); 712 break; 713 case EXT_JUMBO9: 714 uma_zfree(zone_jumbo9, m->m_ext.ext_buf); 715 break; 716 case EXT_JUMBO16: 717 uma_zfree(zone_jumbo16, m->m_ext.ext_buf); 718 break; 719 case EXT_NET_DRV: 720 case EXT_MOD_TYPE: 721 case EXT_DISPOSABLE: 722 *(m->m_ext.ext_cnt) = 0; 723 uma_zfree(zone_ext_refcnt, __DEVOLATILE(u_int *, 724 m->m_ext.ext_cnt)); 725 /* FALLTHROUGH */ 726 case EXT_EXTREF: 727 KASSERT(m->m_ext.ext_free != NULL, 728 ("%s: ext_free not set", __func__)); 729 (*(m->m_ext.ext_free))(m, m->m_ext.ext_arg1, 730 m->m_ext.ext_arg2); 731 break; 732 default: 733 KASSERT(m->m_ext.ext_type == 0, 734 ("%s: unknown ext_type", __func__)); 735 } 736 } 737 738 if (freembuf) 739 uma_zfree(zone_mbuf, m); 740 } 741 742 /* 743 * Official mbuf(9) allocation KPI for stack and drivers: 744 * 745 * m_get() - a single mbuf without any attachments, sys/mbuf.h. 746 * m_gethdr() - a single mbuf initialized as M_PKTHDR, sys/mbuf.h. 747 * m_getcl() - an mbuf + 2k cluster, sys/mbuf.h. 748 * m_clget() - attach cluster to already allocated mbuf. 749 * m_cljget() - attach jumbo cluster to already allocated mbuf. 750 * m_get2() - allocate minimum mbuf that would fit size argument. 751 * m_getm2() - allocate a chain of mbufs/clusters. 752 * m_extadd() - attach external cluster to mbuf. 753 * 754 * m_free() - free single mbuf with its tags and ext, sys/mbuf.h. 755 * m_freem() - free chain of mbufs. 756 */ 757 758 int 759 m_clget(struct mbuf *m, int how) 760 { 761 762 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 763 __func__, m)); 764 m->m_ext.ext_buf = (char *)NULL; 765 uma_zalloc_arg(zone_clust, m, how); 766 /* 767 * On a cluster allocation failure, drain the packet zone and retry, 768 * we might be able to loosen a few clusters up on the drain. 769 */ 770 if ((how & M_NOWAIT) && (m->m_ext.ext_buf == NULL)) { 771 zone_drain(zone_pack); 772 uma_zalloc_arg(zone_clust, m, how); 773 } 774 return (m->m_flags & M_EXT); 775 } 776 777 /* 778 * m_cljget() is different from m_clget() as it can allocate clusters without 779 * attaching them to an mbuf. In that case the return value is the pointer 780 * to the cluster of the requested size. If an mbuf was specified, it gets 781 * the cluster attached to it and the return value can be safely ignored. 782 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 783 */ 784 void * 785 m_cljget(struct mbuf *m, int how, int size) 786 { 787 uma_zone_t zone; 788 789 if (m != NULL) { 790 KASSERT((m->m_flags & M_EXT) == 0, ("%s: mbuf %p has M_EXT", 791 __func__, m)); 792 m->m_ext.ext_buf = NULL; 793 } 794 795 zone = m_getzone(size); 796 return (uma_zalloc_arg(zone, m, how)); 797 } 798 799 /* 800 * m_get2() allocates minimum mbuf that would fit "size" argument. 801 */ 802 struct mbuf * 803 m_get2(int size, int how, short type, int flags) 804 { 805 struct mb_args args; 806 struct mbuf *m, *n; 807 808 args.flags = flags; 809 args.type = type; 810 811 if (size <= MHLEN || (size <= MLEN && (flags & M_PKTHDR) == 0)) 812 return (uma_zalloc_arg(zone_mbuf, &args, how)); 813 if (size <= MCLBYTES) 814 return (uma_zalloc_arg(zone_pack, &args, how)); 815 816 if (size > MJUMPAGESIZE) 817 return (NULL); 818 819 m = uma_zalloc_arg(zone_mbuf, &args, how); 820 if (m == NULL) 821 return (NULL); 822 823 n = uma_zalloc_arg(zone_jumbop, m, how); 824 if (n == NULL) { 825 uma_zfree(zone_mbuf, m); 826 return (NULL); 827 } 828 829 return (m); 830 } 831 832 /* 833 * m_getjcl() returns an mbuf with a cluster of the specified size attached. 834 * For size it takes MCLBYTES, MJUMPAGESIZE, MJUM9BYTES, MJUM16BYTES. 835 */ 836 struct mbuf * 837 m_getjcl(int how, short type, int flags, int size) 838 { 839 struct mb_args args; 840 struct mbuf *m, *n; 841 uma_zone_t zone; 842 843 if (size == MCLBYTES) 844 return m_getcl(how, type, flags); 845 846 args.flags = flags; 847 args.type = type; 848 849 m = uma_zalloc_arg(zone_mbuf, &args, how); 850 if (m == NULL) 851 return (NULL); 852 853 zone = m_getzone(size); 854 n = uma_zalloc_arg(zone, m, how); 855 if (n == NULL) { 856 uma_zfree(zone_mbuf, m); 857 return (NULL); 858 } 859 return (m); 860 } 861 862 /* 863 * Allocate a given length worth of mbufs and/or clusters (whatever fits 864 * best) and return a pointer to the top of the allocated chain. If an 865 * existing mbuf chain is provided, then we will append the new chain 866 * to the existing one but still return the top of the newly allocated 867 * chain. 868 */ 869 struct mbuf * 870 m_getm2(struct mbuf *m, int len, int how, short type, int flags) 871 { 872 struct mbuf *mb, *nm = NULL, *mtail = NULL; 873 874 KASSERT(len >= 0, ("%s: len is < 0", __func__)); 875 876 /* Validate flags. */ 877 flags &= (M_PKTHDR | M_EOR); 878 879 /* Packet header mbuf must be first in chain. */ 880 if ((flags & M_PKTHDR) && m != NULL) 881 flags &= ~M_PKTHDR; 882 883 /* Loop and append maximum sized mbufs to the chain tail. */ 884 while (len > 0) { 885 if (len > MCLBYTES) 886 mb = m_getjcl(how, type, (flags & M_PKTHDR), 887 MJUMPAGESIZE); 888 else if (len >= MINCLSIZE) 889 mb = m_getcl(how, type, (flags & M_PKTHDR)); 890 else if (flags & M_PKTHDR) 891 mb = m_gethdr(how, type); 892 else 893 mb = m_get(how, type); 894 895 /* Fail the whole operation if one mbuf can't be allocated. */ 896 if (mb == NULL) { 897 if (nm != NULL) 898 m_freem(nm); 899 return (NULL); 900 } 901 902 /* Book keeping. */ 903 len -= M_SIZE(mb); 904 if (mtail != NULL) 905 mtail->m_next = mb; 906 else 907 nm = mb; 908 mtail = mb; 909 flags &= ~M_PKTHDR; /* Only valid on the first mbuf. */ 910 } 911 if (flags & M_EOR) 912 mtail->m_flags |= M_EOR; /* Only valid on the last mbuf. */ 913 914 /* If mbuf was supplied, append new chain to the end of it. */ 915 if (m != NULL) { 916 for (mtail = m; mtail->m_next != NULL; mtail = mtail->m_next) 917 ; 918 mtail->m_next = nm; 919 mtail->m_flags &= ~M_EOR; 920 } else 921 m = nm; 922 923 return (m); 924 } 925 926 /*- 927 * Configure a provided mbuf to refer to the provided external storage 928 * buffer and setup a reference count for said buffer. If the setting 929 * up of the reference count fails, the M_EXT bit will not be set. If 930 * successfull, the M_EXT bit is set in the mbuf's flags. 931 * 932 * Arguments: 933 * mb The existing mbuf to which to attach the provided buffer. 934 * buf The address of the provided external storage buffer. 935 * size The size of the provided buffer. 936 * freef A pointer to a routine that is responsible for freeing the 937 * provided external storage buffer. 938 * args A pointer to an argument structure (of any type) to be passed 939 * to the provided freef routine (may be NULL). 940 * flags Any other flags to be passed to the provided mbuf. 941 * type The type that the external storage buffer should be 942 * labeled with. 943 * 944 * Returns: 945 * Nothing. 946 */ 947 int 948 m_extadd(struct mbuf *mb, caddr_t buf, u_int size, 949 void (*freef)(struct mbuf *, void *, void *), void *arg1, void *arg2, 950 int flags, int type, int wait) 951 { 952 KASSERT(type != EXT_CLUSTER, ("%s: EXT_CLUSTER not allowed", __func__)); 953 954 if (type != EXT_EXTREF) 955 mb->m_ext.ext_cnt = uma_zalloc(zone_ext_refcnt, wait); 956 957 if (mb->m_ext.ext_cnt == NULL) 958 return (ENOMEM); 959 960 *(mb->m_ext.ext_cnt) = 1; 961 mb->m_flags |= (M_EXT | flags); 962 mb->m_ext.ext_buf = buf; 963 mb->m_data = mb->m_ext.ext_buf; 964 mb->m_ext.ext_size = size; 965 mb->m_ext.ext_free = freef; 966 mb->m_ext.ext_arg1 = arg1; 967 mb->m_ext.ext_arg2 = arg2; 968 mb->m_ext.ext_type = type; 969 mb->m_ext.ext_flags = 0; 970 971 return (0); 972 } 973 974 /* 975 * Free an entire chain of mbufs and associated external buffers, if 976 * applicable. 977 */ 978 void 979 m_freem(struct mbuf *mb) 980 { 981 982 while (mb != NULL) 983 mb = m_free(mb); 984 } 985