1 /* 2 * Copyright (c) 1982, 1986, 1988, 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)uipc_mbuf.c 8.2 (Berkeley) 1/4/94 34 * $FreeBSD$ 35 */ 36 37 #include "opt_param.h" 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/mutex.h> 43 #include <sys/kernel.h> 44 #include <sys/sysctl.h> 45 #include <sys/domain.h> 46 #include <sys/protosw.h> 47 #include <vm/vm.h> 48 #include <vm/vm_kern.h> 49 #include <vm/vm_extern.h> 50 51 static void mbinit __P((void *)); 52 SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbinit, NULL) 53 54 struct mbuf *mbutl; 55 struct mbstat mbstat; 56 u_long mbtypes[MT_NTYPES]; 57 int max_linkhdr; 58 int max_protohdr; 59 int max_hdr; 60 int max_datalen; 61 int nmbclusters; 62 int nmbufs; 63 int nmbcnt; 64 u_long m_mballoc_wid = 0; 65 u_long m_clalloc_wid = 0; 66 67 /* 68 * freelist header structures... 69 * mbffree_lst, mclfree_lst, mcntfree_lst 70 */ 71 struct mbffree_lst mmbfree; 72 struct mclfree_lst mclfree; 73 struct mcntfree_lst mcntfree; 74 75 /* 76 * sysctl(8) exported objects 77 */ 78 SYSCTL_DECL(_kern_ipc); 79 SYSCTL_INT(_kern_ipc, KIPC_MAX_LINKHDR, max_linkhdr, CTLFLAG_RW, 80 &max_linkhdr, 0, ""); 81 SYSCTL_INT(_kern_ipc, KIPC_MAX_PROTOHDR, max_protohdr, CTLFLAG_RW, 82 &max_protohdr, 0, ""); 83 SYSCTL_INT(_kern_ipc, KIPC_MAX_HDR, max_hdr, CTLFLAG_RW, &max_hdr, 0, ""); 84 SYSCTL_INT(_kern_ipc, KIPC_MAX_DATALEN, max_datalen, CTLFLAG_RW, 85 &max_datalen, 0, ""); 86 SYSCTL_INT(_kern_ipc, OID_AUTO, mbuf_wait, CTLFLAG_RW, 87 &mbuf_wait, 0, ""); 88 SYSCTL_STRUCT(_kern_ipc, KIPC_MBSTAT, mbstat, CTLFLAG_RD, &mbstat, mbstat, ""); 89 SYSCTL_OPAQUE(_kern_ipc, OID_AUTO, mbtypes, CTLFLAG_RD, mbtypes, 90 sizeof(mbtypes), "LU", ""); 91 SYSCTL_INT(_kern_ipc, KIPC_NMBCLUSTERS, nmbclusters, CTLFLAG_RD, 92 &nmbclusters, 0, "Maximum number of mbuf clusters available"); 93 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbufs, CTLFLAG_RD, &nmbufs, 0, 94 "Maximum number of mbufs available"); 95 SYSCTL_INT(_kern_ipc, OID_AUTO, nmbcnt, CTLFLAG_RD, &nmbcnt, 0, 96 "Maximum number of ext_buf counters available"); 97 #ifndef NMBCLUSTERS 98 #define NMBCLUSTERS (512 + MAXUSERS * 16) 99 #endif 100 TUNABLE_INT_DECL("kern.ipc.nmbclusters", NMBCLUSTERS, nmbclusters); 101 TUNABLE_INT_DECL("kern.ipc.nmbufs", NMBCLUSTERS * 4, nmbufs); 102 TUNABLE_INT_DECL("kern.ipc.nmbcnt", EXT_COUNTERS, nmbcnt); 103 104 static void m_reclaim __P((void)); 105 106 /* Initial allocation numbers */ 107 #define NCL_INIT 2 108 #define NMB_INIT 16 109 #define REF_INIT NMBCLUSTERS 110 111 /* 112 * Full mbuf subsystem initialization done here. 113 * 114 * XXX: If ever we have system specific map setups to do, then move them to 115 * machdep.c - for now, there is no reason for this stuff to go there. 116 */ 117 static void 118 mbinit(dummy) 119 void *dummy; 120 { 121 vm_offset_t maxaddr, mb_map_size; 122 123 /* 124 * Setup the mb_map, allocate requested VM space. 125 */ 126 mb_map_size = nmbufs * MSIZE + nmbclusters * MCLBYTES + nmbcnt 127 * sizeof(union mext_refcnt); 128 mb_map_size = roundup2(mb_map_size, PAGE_SIZE); 129 mb_map = kmem_suballoc(kmem_map, (vm_offset_t *)&mbutl, &maxaddr, 130 mb_map_size); 131 /* XXX: mb_map->system_map = 1; */ 132 133 /* 134 * Initialize the free list headers, and setup locks for lists. 135 */ 136 mmbfree.m_head = NULL; 137 mclfree.m_head = NULL; 138 mcntfree.m_head = NULL; 139 mtx_init(&mmbfree.m_mtx, "mbuf free list lock", MTX_DEF); 140 mtx_init(&mclfree.m_mtx, "mcluster free list lock", MTX_DEF); 141 mtx_init(&mcntfree.m_mtx, "m_ext counter free list lock", MTX_DEF); 142 143 /* 144 * Initialize mbuf subsystem (sysctl exported) statistics structure. 145 */ 146 mbstat.m_msize = MSIZE; 147 mbstat.m_mclbytes = MCLBYTES; 148 mbstat.m_minclsize = MINCLSIZE; 149 mbstat.m_mlen = MLEN; 150 mbstat.m_mhlen = MHLEN; 151 152 /* 153 * Perform some initial allocations. 154 */ 155 mtx_enter(&mcntfree.m_mtx, MTX_DEF); 156 if (m_alloc_ref(REF_INIT, M_DONTWAIT) == 0) 157 goto bad; 158 mtx_exit(&mcntfree.m_mtx, MTX_DEF); 159 160 mtx_enter(&mmbfree.m_mtx, MTX_DEF); 161 if (m_mballoc(NMB_INIT, M_DONTWAIT) == 0) 162 goto bad; 163 mtx_exit(&mmbfree.m_mtx, MTX_DEF); 164 165 mtx_enter(&mclfree.m_mtx, MTX_DEF); 166 if (m_clalloc(NCL_INIT, M_DONTWAIT) == 0) 167 goto bad; 168 mtx_exit(&mclfree.m_mtx, MTX_DEF); 169 170 return; 171 bad: 172 panic("mbinit: failed to initialize mbuf subsystem!"); 173 } 174 175 /* 176 * Allocate at least nmb reference count structs and place them 177 * on the ref cnt free list. 178 * 179 * Must be called with the mcntfree lock held. 180 */ 181 int 182 m_alloc_ref(nmb, how) 183 u_int nmb; 184 int how; 185 { 186 caddr_t p; 187 u_int nbytes; 188 int i; 189 190 /* 191 * We don't cap the amount of memory that can be used 192 * by the reference counters, like we do for mbufs and 193 * mbuf clusters. In fact, we're absolutely sure that we 194 * won't ever be going over our allocated space. We keep enough 195 * space in mb_map to accomodate maximum values of allocatable 196 * external buffers including, but not limited to, clusters. 197 * (That's also why we won't have to have wait routines for 198 * counters). 199 * 200 * If we're in here, we're absolutely certain to be returning 201 * succesfully, as long as there is physical memory to accomodate 202 * us. And if there isn't, but we're willing to wait, then 203 * kmem_malloc() will do the only waiting needed. 204 */ 205 206 nbytes = round_page(nmb * sizeof(union mext_refcnt)); 207 mtx_exit(&mcntfree.m_mtx, MTX_DEF); 208 mtx_enter(&Giant, MTX_DEF); 209 if ((p = (caddr_t)kmem_malloc(mb_map, nbytes, how == M_WAIT ? M_WAIT : 210 M_NOWAIT)) == NULL) { 211 mtx_exit(&Giant, MTX_DEF); 212 mtx_enter(&mcntfree.m_mtx, MTX_DEF); /* XXX: We must be holding 213 it going out. */ 214 return (0); 215 } 216 mtx_exit(&Giant, MTX_DEF); 217 nmb = nbytes / sizeof(union mext_refcnt); 218 219 /* 220 * We don't let go of the mutex in order to avoid a race. 221 * It is up to the caller to let go of the mutex. 222 */ 223 mtx_enter(&mcntfree.m_mtx, MTX_DEF); 224 for (i = 0; i < nmb; i++) { 225 ((union mext_refcnt *)p)->next_ref = mcntfree.m_head; 226 mcntfree.m_head = (union mext_refcnt *)p; 227 p += sizeof(union mext_refcnt); 228 mbstat.m_refree++; 229 } 230 mbstat.m_refcnt += nmb; 231 232 return (1); 233 } 234 235 /* 236 * Allocate at least nmb mbufs and place on mbuf free list. 237 * 238 * Must be called with the mmbfree lock held. 239 */ 240 int 241 m_mballoc(nmb, how) 242 register int nmb; 243 int how; 244 { 245 register caddr_t p; 246 register int i; 247 int nbytes; 248 249 /* 250 * If we've hit the mbuf limit, stop allocating from mb_map. 251 * Also, once we run out of map space, it will be impossible to 252 * get any more (nothing is ever freed back to the map). 253 */ 254 if (mb_map_full || ((nmb + mbstat.m_mbufs) > nmbufs)) { 255 /* 256 * Needs to be atomic as we may be incrementing it 257 * while holding another mutex, like mclfree. In other 258 * words, m_drops is not reserved solely for mbufs, 259 * but is also available for clusters. 260 */ 261 atomic_add_long(&mbstat.m_drops, 1); 262 return (0); 263 } 264 265 nbytes = round_page(nmb * MSIZE); 266 267 /* XXX: The letting go of the mmbfree lock here may eventually 268 be moved to only be done for M_WAIT calls to kmem_malloc() */ 269 mtx_exit(&mmbfree.m_mtx, MTX_DEF); 270 mtx_enter(&Giant, MTX_DEF); 271 p = (caddr_t)kmem_malloc(mb_map, nbytes, M_NOWAIT); 272 if (p == 0 && how == M_WAIT) { 273 atomic_add_long(&mbstat.m_wait, 1); 274 p = (caddr_t)kmem_malloc(mb_map, nbytes, M_WAITOK); 275 } 276 mtx_exit(&Giant, MTX_DEF); 277 mtx_enter(&mmbfree.m_mtx, MTX_DEF); 278 279 /* 280 * Either the map is now full, or `how' is M_DONTWAIT and there 281 * are no pages left. 282 */ 283 if (p == NULL) 284 return (0); 285 286 nmb = nbytes / MSIZE; 287 288 /* 289 * We don't let go of the mutex in order to avoid a race. 290 * It is up to the caller to let go of the mutex when done 291 * with grabbing the mbuf from the free list. 292 */ 293 for (i = 0; i < nmb; i++) { 294 ((struct mbuf *)p)->m_next = mmbfree.m_head; 295 mmbfree.m_head = (struct mbuf *)p; 296 p += MSIZE; 297 } 298 mbstat.m_mbufs += nmb; 299 mbtypes[MT_FREE] += nmb; 300 return (1); 301 } 302 303 /* 304 * Once the mb_map has been exhausted and if the call to the allocation macros 305 * (or, in some cases, functions) is with M_WAIT, then it is necessary to rely 306 * solely on reclaimed mbufs. 307 * 308 * Here we request for the protocols to free up some resources and, if we 309 * still cannot get anything, then we wait for an mbuf to be freed for a 310 * designated (mbuf_wait) time. 311 * 312 * Must be called with the mmbfree mutex held, and we will probably end 313 * up recursing into that lock from some of the drain routines, but 314 * this should be okay, as long as we don't block there, or attempt 315 * to allocate from them (theoretically impossible). 316 */ 317 struct mbuf * 318 m_mballoc_wait(void) 319 { 320 struct mbuf *p = NULL; 321 322 /* 323 * See if we can drain some resources out of the protocols. 324 */ 325 m_reclaim(); 326 _MGET(p, M_DONTWAIT); 327 328 if (p == NULL) { 329 m_mballoc_wid++; 330 if (msleep(&m_mballoc_wid, &mmbfree.m_mtx, PVM, "mballc", 331 mbuf_wait) == EWOULDBLOCK) 332 m_mballoc_wid--; 333 334 /* 335 * Try again (one last time). 336 * 337 * We retry to fetch _even_ if the sleep timed out. This 338 * is left this way, purposely, in the [unlikely] case 339 * that an mbuf was freed but the sleep was not awoken 340 * in time. 341 * 342 * If the sleep didn't time out (i.e. we got woken up) then 343 * we have the lock so we just grab an mbuf, hopefully. 344 */ 345 _MGET(p, M_DONTWAIT); 346 } 347 348 /* If we waited and got something... */ 349 if (p != NULL) { 350 atomic_add_long(&mbstat.m_wait, 1); 351 if (mmbfree.m_head != NULL) 352 MBWAKEUP(m_mballoc_wid); 353 } else 354 atomic_add_long(&mbstat.m_drops, 1); 355 356 return (p); 357 } 358 359 /* 360 * Allocate some number of mbuf clusters 361 * and place on cluster free list. 362 * 363 * Must be called with the mclfree lock held. 364 */ 365 int 366 m_clalloc(ncl, how) 367 register int ncl; 368 int how; 369 { 370 register caddr_t p; 371 register int i; 372 int npg; 373 374 /* 375 * If the map is now full (nothing will ever be freed to it). 376 * If we've hit the mcluster number limit, stop allocating from 377 * mb_map. 378 */ 379 if (mb_map_full || ((ncl + mbstat.m_clusters) > nmbclusters)) { 380 atomic_add_long(&mbstat.m_drops, 1); 381 return (0); 382 } 383 384 npg = ncl; 385 mtx_exit(&mclfree.m_mtx, MTX_DEF); 386 mtx_enter(&Giant, MTX_DEF); 387 p = (caddr_t)kmem_malloc(mb_map, ctob(npg), 388 how != M_WAIT ? M_NOWAIT : M_WAITOK); 389 mtx_exit(&Giant, MTX_DEF); 390 ncl = ncl * PAGE_SIZE / MCLBYTES; 391 mtx_enter(&mclfree.m_mtx, MTX_DEF); 392 393 /* 394 * Either the map is now full, or `how' is M_DONTWAIT and there 395 * are no pages left. 396 */ 397 if (p == NULL) { 398 atomic_add_long(&mbstat.m_drops, 1); 399 return (0); 400 } 401 402 /* 403 * We don't let go of the mutex in order to avoid a race. 404 */ 405 for (i = 0; i < ncl; i++) { 406 ((union mcluster *)p)->mcl_next = mclfree.m_head; 407 mclfree.m_head = (union mcluster *)p; 408 p += MCLBYTES; 409 mbstat.m_clfree++; 410 } 411 mbstat.m_clusters += ncl; 412 return (1); 413 } 414 415 /* 416 * Once the mb_map submap has been exhausted and the allocation is called with 417 * M_WAIT, we rely on the mclfree list. If nothing is free, we will 418 * sleep for a designated amount of time (mbuf_wait) or until we're woken up 419 * due to sudden mcluster availability. 420 * 421 * Must be called with the mclfree lock held. 422 */ 423 caddr_t 424 m_clalloc_wait(void) 425 { 426 caddr_t p = NULL; 427 428 m_clalloc_wid++; 429 if (msleep(&m_clalloc_wid, &mclfree.m_mtx, PVM, "mclalc", mbuf_wait) 430 == EWOULDBLOCK) 431 m_clalloc_wid--; 432 433 /* 434 * Now that we (think) that we've got something, try again. 435 */ 436 _MCLALLOC(p, M_DONTWAIT); 437 438 /* If we waited and got something ... */ 439 if (p != NULL) { 440 atomic_add_long(&mbstat.m_wait, 1); 441 if (mclfree.m_head != NULL) 442 MBWAKEUP(m_clalloc_wid); 443 } else 444 atomic_add_long(&mbstat.m_drops, 1); 445 446 return (p); 447 } 448 449 /* 450 * m_reclaim: drain protocols in hopes to free up some resources... 451 * 452 * Should be called with mmbfree.m_mtx mutex held. We will most likely 453 * recursively grab it from within some drain routines, but that's okay, 454 * as the mutex will never be completely released until we let go of it 455 * after our m_reclaim() is over. 456 * 457 * Note: Drain routines are only allowed to free mbufs (and mclusters, 458 * as a consequence, if need be). They are not allowed to allocate 459 * new ones (that would defeat the purpose, anyway). 460 */ 461 static void 462 m_reclaim() 463 { 464 register struct domain *dp; 465 register struct protosw *pr; 466 467 for (dp = domains; dp; dp = dp->dom_next) 468 for (pr = dp->dom_protosw; pr < dp->dom_protoswNPROTOSW; pr++) 469 if (pr->pr_drain) 470 (*pr->pr_drain)(); 471 mbstat.m_drain++; 472 } 473 474 /* 475 * Space allocation routines. 476 * These are also available as macros 477 * for critical paths. 478 */ 479 struct mbuf * 480 m_get(how, type) 481 int how, type; 482 { 483 register struct mbuf *m; 484 485 MGET(m, how, type); 486 return (m); 487 } 488 489 struct mbuf * 490 m_gethdr(how, type) 491 int how, type; 492 { 493 register struct mbuf *m; 494 495 MGETHDR(m, how, type); 496 return (m); 497 } 498 499 struct mbuf * 500 m_getclr(how, type) 501 int how, type; 502 { 503 register struct mbuf *m; 504 505 MGET(m, how, type); 506 if (m == 0) 507 return (0); 508 bzero(mtod(m, caddr_t), MLEN); 509 return (m); 510 } 511 512 struct mbuf * 513 m_free(m) 514 struct mbuf *m; 515 { 516 register struct mbuf *n; 517 518 MFREE(m, n); 519 return (n); 520 } 521 522 void 523 m_freem(m) 524 register struct mbuf *m; 525 { 526 register struct mbuf *n; 527 528 if (m == NULL) 529 return; 530 do { 531 /* 532 * we do need to check non-first mbuf, since some of existing 533 * code does not call M_PREPEND properly. 534 * (example: call to bpf_mtap from drivers) 535 */ 536 if ((m->m_flags & M_PKTHDR) != 0 && m->m_pkthdr.aux) { 537 m_freem(m->m_pkthdr.aux); 538 m->m_pkthdr.aux = NULL; 539 } 540 MFREE(m, n); 541 m = n; 542 } while (m); 543 } 544 545 /* 546 * Mbuffer utility routines. 547 */ 548 549 /* 550 * Lesser-used path for M_PREPEND: 551 * allocate new mbuf to prepend to chain, 552 * copy junk along. 553 */ 554 struct mbuf * 555 m_prepend(m, len, how) 556 register struct mbuf *m; 557 int len, how; 558 { 559 struct mbuf *mn; 560 561 MGET(mn, how, m->m_type); 562 if (mn == (struct mbuf *)NULL) { 563 m_freem(m); 564 return ((struct mbuf *)NULL); 565 } 566 if (m->m_flags & M_PKTHDR) { 567 M_COPY_PKTHDR(mn, m); 568 m->m_flags &= ~M_PKTHDR; 569 } 570 mn->m_next = m; 571 m = mn; 572 if (len < MHLEN) 573 MH_ALIGN(m, len); 574 m->m_len = len; 575 return (m); 576 } 577 578 /* 579 * Make a copy of an mbuf chain starting "off0" bytes from the beginning, 580 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf. 581 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller. 582 * Note that the copy is read-only, because clusters are not copied, 583 * only their reference counts are incremented. 584 */ 585 #define MCFail (mbstat.m_mcfail) 586 587 struct mbuf * 588 m_copym(m, off0, len, wait) 589 register struct mbuf *m; 590 int off0, wait; 591 register int len; 592 { 593 register struct mbuf *n, **np; 594 register int off = off0; 595 struct mbuf *top; 596 int copyhdr = 0; 597 598 KASSERT(off >= 0, ("m_copym, negative off %d", off)); 599 KASSERT(len >= 0, ("m_copym, negative len %d", len)); 600 if (off == 0 && m->m_flags & M_PKTHDR) 601 copyhdr = 1; 602 while (off > 0) { 603 KASSERT(m != NULL, ("m_copym, offset > size of mbuf chain")); 604 if (off < m->m_len) 605 break; 606 off -= m->m_len; 607 m = m->m_next; 608 } 609 np = ⊤ 610 top = 0; 611 while (len > 0) { 612 if (m == 0) { 613 KASSERT(len == M_COPYALL, 614 ("m_copym, length > size of mbuf chain")); 615 break; 616 } 617 MGET(n, wait, m->m_type); 618 *np = n; 619 if (n == 0) 620 goto nospace; 621 if (copyhdr) { 622 M_COPY_PKTHDR(n, m); 623 if (len == M_COPYALL) 624 n->m_pkthdr.len -= off0; 625 else 626 n->m_pkthdr.len = len; 627 copyhdr = 0; 628 } 629 n->m_len = min(len, m->m_len - off); 630 if (m->m_flags & M_EXT) { 631 n->m_data = m->m_data + off; 632 n->m_ext = m->m_ext; 633 n->m_flags |= M_EXT; 634 MEXT_ADD_REF(m); 635 } else 636 bcopy(mtod(m, caddr_t)+off, mtod(n, caddr_t), 637 (unsigned)n->m_len); 638 if (len != M_COPYALL) 639 len -= n->m_len; 640 off = 0; 641 m = m->m_next; 642 np = &n->m_next; 643 } 644 if (top == 0) 645 atomic_add_long(&MCFail, 1); 646 return (top); 647 nospace: 648 m_freem(top); 649 atomic_add_long(&MCFail, 1); 650 return (0); 651 } 652 653 /* 654 * Copy an entire packet, including header (which must be present). 655 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'. 656 * Note that the copy is read-only, because clusters are not copied, 657 * only their reference counts are incremented. 658 */ 659 struct mbuf * 660 m_copypacket(m, how) 661 struct mbuf *m; 662 int how; 663 { 664 struct mbuf *top, *n, *o; 665 666 MGET(n, how, m->m_type); 667 top = n; 668 if (!n) 669 goto nospace; 670 671 M_COPY_PKTHDR(n, m); 672 n->m_len = m->m_len; 673 if (m->m_flags & M_EXT) { 674 n->m_data = m->m_data; 675 n->m_ext = m->m_ext; 676 n->m_flags |= M_EXT; 677 MEXT_ADD_REF(m); 678 } else { 679 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 680 } 681 682 m = m->m_next; 683 while (m) { 684 MGET(o, how, m->m_type); 685 if (!o) 686 goto nospace; 687 688 n->m_next = o; 689 n = n->m_next; 690 691 n->m_len = m->m_len; 692 if (m->m_flags & M_EXT) { 693 n->m_data = m->m_data; 694 n->m_ext = m->m_ext; 695 n->m_flags |= M_EXT; 696 MEXT_ADD_REF(m); 697 } else { 698 bcopy(mtod(m, char *), mtod(n, char *), n->m_len); 699 } 700 701 m = m->m_next; 702 } 703 return top; 704 nospace: 705 m_freem(top); 706 atomic_add_long(&MCFail, 1); 707 return 0; 708 } 709 710 /* 711 * Copy data from an mbuf chain starting "off" bytes from the beginning, 712 * continuing for "len" bytes, into the indicated buffer. 713 */ 714 void 715 m_copydata(m, off, len, cp) 716 register struct mbuf *m; 717 register int off; 718 register int len; 719 caddr_t cp; 720 { 721 register unsigned count; 722 723 KASSERT(off >= 0, ("m_copydata, negative off %d", off)); 724 KASSERT(len >= 0, ("m_copydata, negative len %d", len)); 725 while (off > 0) { 726 KASSERT(m != NULL, ("m_copydata, offset > size of mbuf chain")); 727 if (off < m->m_len) 728 break; 729 off -= m->m_len; 730 m = m->m_next; 731 } 732 while (len > 0) { 733 KASSERT(m != NULL, ("m_copydata, length > size of mbuf chain")); 734 count = min(m->m_len - off, len); 735 bcopy(mtod(m, caddr_t) + off, cp, count); 736 len -= count; 737 cp += count; 738 off = 0; 739 m = m->m_next; 740 } 741 } 742 743 /* 744 * Copy a packet header mbuf chain into a completely new chain, including 745 * copying any mbuf clusters. Use this instead of m_copypacket() when 746 * you need a writable copy of an mbuf chain. 747 */ 748 struct mbuf * 749 m_dup(m, how) 750 struct mbuf *m; 751 int how; 752 { 753 struct mbuf **p, *top = NULL; 754 int remain, moff, nsize; 755 756 /* Sanity check */ 757 if (m == NULL) 758 return (0); 759 KASSERT((m->m_flags & M_PKTHDR) != 0, ("%s: !PKTHDR", __FUNCTION__)); 760 761 /* While there's more data, get a new mbuf, tack it on, and fill it */ 762 remain = m->m_pkthdr.len; 763 moff = 0; 764 p = ⊤ 765 while (remain > 0 || top == NULL) { /* allow m->m_pkthdr.len == 0 */ 766 struct mbuf *n; 767 768 /* Get the next new mbuf */ 769 MGET(n, how, m->m_type); 770 if (n == NULL) 771 goto nospace; 772 if (top == NULL) { /* first one, must be PKTHDR */ 773 M_COPY_PKTHDR(n, m); 774 nsize = MHLEN; 775 } else /* not the first one */ 776 nsize = MLEN; 777 if (remain >= MINCLSIZE) { 778 MCLGET(n, how); 779 if ((n->m_flags & M_EXT) == 0) { 780 (void)m_free(n); 781 goto nospace; 782 } 783 nsize = MCLBYTES; 784 } 785 n->m_len = 0; 786 787 /* Link it into the new chain */ 788 *p = n; 789 p = &n->m_next; 790 791 /* Copy data from original mbuf(s) into new mbuf */ 792 while (n->m_len < nsize && m != NULL) { 793 int chunk = min(nsize - n->m_len, m->m_len - moff); 794 795 bcopy(m->m_data + moff, n->m_data + n->m_len, chunk); 796 moff += chunk; 797 n->m_len += chunk; 798 remain -= chunk; 799 if (moff == m->m_len) { 800 m = m->m_next; 801 moff = 0; 802 } 803 } 804 805 /* Check correct total mbuf length */ 806 KASSERT((remain > 0 && m != NULL) || (remain == 0 && m == NULL), 807 ("%s: bogus m_pkthdr.len", __FUNCTION__)); 808 } 809 return (top); 810 811 nospace: 812 m_freem(top); 813 atomic_add_long(&MCFail, 1); 814 return (0); 815 } 816 817 /* 818 * Concatenate mbuf chain n to m. 819 * Both chains must be of the same type (e.g. MT_DATA). 820 * Any m_pkthdr is not updated. 821 */ 822 void 823 m_cat(m, n) 824 register struct mbuf *m, *n; 825 { 826 while (m->m_next) 827 m = m->m_next; 828 while (n) { 829 if (m->m_flags & M_EXT || 830 m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { 831 /* just join the two chains */ 832 m->m_next = n; 833 return; 834 } 835 /* splat the data from one into the other */ 836 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 837 (u_int)n->m_len); 838 m->m_len += n->m_len; 839 n = m_free(n); 840 } 841 } 842 843 void 844 m_adj(mp, req_len) 845 struct mbuf *mp; 846 int req_len; 847 { 848 register int len = req_len; 849 register struct mbuf *m; 850 register int count; 851 852 if ((m = mp) == NULL) 853 return; 854 if (len >= 0) { 855 /* 856 * Trim from head. 857 */ 858 while (m != NULL && len > 0) { 859 if (m->m_len <= len) { 860 len -= m->m_len; 861 m->m_len = 0; 862 m = m->m_next; 863 } else { 864 m->m_len -= len; 865 m->m_data += len; 866 len = 0; 867 } 868 } 869 m = mp; 870 if (mp->m_flags & M_PKTHDR) 871 m->m_pkthdr.len -= (req_len - len); 872 } else { 873 /* 874 * Trim from tail. Scan the mbuf chain, 875 * calculating its length and finding the last mbuf. 876 * If the adjustment only affects this mbuf, then just 877 * adjust and return. Otherwise, rescan and truncate 878 * after the remaining size. 879 */ 880 len = -len; 881 count = 0; 882 for (;;) { 883 count += m->m_len; 884 if (m->m_next == (struct mbuf *)0) 885 break; 886 m = m->m_next; 887 } 888 if (m->m_len >= len) { 889 m->m_len -= len; 890 if (mp->m_flags & M_PKTHDR) 891 mp->m_pkthdr.len -= len; 892 return; 893 } 894 count -= len; 895 if (count < 0) 896 count = 0; 897 /* 898 * Correct length for chain is "count". 899 * Find the mbuf with last data, adjust its length, 900 * and toss data from remaining mbufs on chain. 901 */ 902 m = mp; 903 if (m->m_flags & M_PKTHDR) 904 m->m_pkthdr.len = count; 905 for (; m; m = m->m_next) { 906 if (m->m_len >= count) { 907 m->m_len = count; 908 break; 909 } 910 count -= m->m_len; 911 } 912 while (m->m_next) 913 (m = m->m_next) ->m_len = 0; 914 } 915 } 916 917 /* 918 * Rearange an mbuf chain so that len bytes are contiguous 919 * and in the data area of an mbuf (so that mtod and dtom 920 * will work for a structure of size len). Returns the resulting 921 * mbuf chain on success, frees it and returns null on failure. 922 * If there is room, it will add up to max_protohdr-len extra bytes to the 923 * contiguous region in an attempt to avoid being called next time. 924 */ 925 #define MPFail (mbstat.m_mpfail) 926 927 struct mbuf * 928 m_pullup(n, len) 929 register struct mbuf *n; 930 int len; 931 { 932 register struct mbuf *m; 933 register int count; 934 int space; 935 936 /* 937 * If first mbuf has no cluster, and has room for len bytes 938 * without shifting current data, pullup into it, 939 * otherwise allocate a new mbuf to prepend to the chain. 940 */ 941 if ((n->m_flags & M_EXT) == 0 && 942 n->m_data + len < &n->m_dat[MLEN] && n->m_next) { 943 if (n->m_len >= len) 944 return (n); 945 m = n; 946 n = n->m_next; 947 len -= m->m_len; 948 } else { 949 if (len > MHLEN) 950 goto bad; 951 MGET(m, M_DONTWAIT, n->m_type); 952 if (m == 0) 953 goto bad; 954 m->m_len = 0; 955 if (n->m_flags & M_PKTHDR) { 956 M_COPY_PKTHDR(m, n); 957 n->m_flags &= ~M_PKTHDR; 958 } 959 } 960 space = &m->m_dat[MLEN] - (m->m_data + m->m_len); 961 do { 962 count = min(min(max(len, max_protohdr), space), n->m_len); 963 bcopy(mtod(n, caddr_t), mtod(m, caddr_t) + m->m_len, 964 (unsigned)count); 965 len -= count; 966 m->m_len += count; 967 n->m_len -= count; 968 space -= count; 969 if (n->m_len) 970 n->m_data += count; 971 else 972 n = m_free(n); 973 } while (len > 0 && n); 974 if (len > 0) { 975 (void) m_free(m); 976 goto bad; 977 } 978 m->m_next = n; 979 return (m); 980 bad: 981 m_freem(n); 982 atomic_add_long(&MPFail, 1); 983 return (0); 984 } 985 986 /* 987 * Partition an mbuf chain in two pieces, returning the tail -- 988 * all but the first len0 bytes. In case of failure, it returns NULL and 989 * attempts to restore the chain to its original state. 990 */ 991 struct mbuf * 992 m_split(m0, len0, wait) 993 register struct mbuf *m0; 994 int len0, wait; 995 { 996 register struct mbuf *m, *n; 997 unsigned len = len0, remain; 998 999 for (m = m0; m && len > m->m_len; m = m->m_next) 1000 len -= m->m_len; 1001 if (m == 0) 1002 return (0); 1003 remain = m->m_len - len; 1004 if (m0->m_flags & M_PKTHDR) { 1005 MGETHDR(n, wait, m0->m_type); 1006 if (n == 0) 1007 return (0); 1008 n->m_pkthdr.rcvif = m0->m_pkthdr.rcvif; 1009 n->m_pkthdr.len = m0->m_pkthdr.len - len0; 1010 m0->m_pkthdr.len = len0; 1011 if (m->m_flags & M_EXT) 1012 goto extpacket; 1013 if (remain > MHLEN) { 1014 /* m can't be the lead packet */ 1015 MH_ALIGN(n, 0); 1016 n->m_next = m_split(m, len, wait); 1017 if (n->m_next == 0) { 1018 (void) m_free(n); 1019 return (0); 1020 } else 1021 return (n); 1022 } else 1023 MH_ALIGN(n, remain); 1024 } else if (remain == 0) { 1025 n = m->m_next; 1026 m->m_next = 0; 1027 return (n); 1028 } else { 1029 MGET(n, wait, m->m_type); 1030 if (n == 0) 1031 return (0); 1032 M_ALIGN(n, remain); 1033 } 1034 extpacket: 1035 if (m->m_flags & M_EXT) { 1036 n->m_flags |= M_EXT; 1037 n->m_ext = m->m_ext; 1038 MEXT_ADD_REF(m); 1039 m->m_ext.ext_size = 0; /* For Accounting XXXXXX danger */ 1040 n->m_data = m->m_data + len; 1041 } else { 1042 bcopy(mtod(m, caddr_t) + len, mtod(n, caddr_t), remain); 1043 } 1044 n->m_len = remain; 1045 m->m_len = len; 1046 n->m_next = m->m_next; 1047 m->m_next = 0; 1048 return (n); 1049 } 1050 /* 1051 * Routine to copy from device local memory into mbufs. 1052 */ 1053 struct mbuf * 1054 m_devget(buf, totlen, off0, ifp, copy) 1055 char *buf; 1056 int totlen, off0; 1057 struct ifnet *ifp; 1058 void (*copy) __P((char *from, caddr_t to, u_int len)); 1059 { 1060 register struct mbuf *m; 1061 struct mbuf *top = 0, **mp = ⊤ 1062 register int off = off0, len; 1063 register char *cp; 1064 char *epkt; 1065 1066 cp = buf; 1067 epkt = cp + totlen; 1068 if (off) { 1069 cp += off + 2 * sizeof(u_short); 1070 totlen -= 2 * sizeof(u_short); 1071 } 1072 MGETHDR(m, M_DONTWAIT, MT_DATA); 1073 if (m == 0) 1074 return (0); 1075 m->m_pkthdr.rcvif = ifp; 1076 m->m_pkthdr.len = totlen; 1077 m->m_len = MHLEN; 1078 1079 while (totlen > 0) { 1080 if (top) { 1081 MGET(m, M_DONTWAIT, MT_DATA); 1082 if (m == 0) { 1083 m_freem(top); 1084 return (0); 1085 } 1086 m->m_len = MLEN; 1087 } 1088 len = min(totlen, epkt - cp); 1089 if (len >= MINCLSIZE) { 1090 MCLGET(m, M_DONTWAIT); 1091 if (m->m_flags & M_EXT) 1092 m->m_len = len = min(len, MCLBYTES); 1093 else 1094 len = m->m_len; 1095 } else { 1096 /* 1097 * Place initial small packet/header at end of mbuf. 1098 */ 1099 if (len < m->m_len) { 1100 if (top == 0 && len + max_linkhdr <= m->m_len) 1101 m->m_data += max_linkhdr; 1102 m->m_len = len; 1103 } else 1104 len = m->m_len; 1105 } 1106 if (copy) 1107 copy(cp, mtod(m, caddr_t), (unsigned)len); 1108 else 1109 bcopy(cp, mtod(m, caddr_t), (unsigned)len); 1110 cp += len; 1111 *mp = m; 1112 mp = &m->m_next; 1113 totlen -= len; 1114 if (cp == epkt) 1115 cp = buf; 1116 } 1117 return (top); 1118 } 1119 1120 /* 1121 * Copy data from a buffer back into the indicated mbuf chain, 1122 * starting "off" bytes from the beginning, extending the mbuf 1123 * chain if necessary. 1124 */ 1125 void 1126 m_copyback(m0, off, len, cp) 1127 struct mbuf *m0; 1128 register int off; 1129 register int len; 1130 caddr_t cp; 1131 { 1132 register int mlen; 1133 register struct mbuf *m = m0, *n; 1134 int totlen = 0; 1135 1136 if (m0 == 0) 1137 return; 1138 while (off > (mlen = m->m_len)) { 1139 off -= mlen; 1140 totlen += mlen; 1141 if (m->m_next == 0) { 1142 n = m_getclr(M_DONTWAIT, m->m_type); 1143 if (n == 0) 1144 goto out; 1145 n->m_len = min(MLEN, len + off); 1146 m->m_next = n; 1147 } 1148 m = m->m_next; 1149 } 1150 while (len > 0) { 1151 mlen = min (m->m_len - off, len); 1152 bcopy(cp, off + mtod(m, caddr_t), (unsigned)mlen); 1153 cp += mlen; 1154 len -= mlen; 1155 mlen += off; 1156 off = 0; 1157 totlen += mlen; 1158 if (len == 0) 1159 break; 1160 if (m->m_next == 0) { 1161 n = m_get(M_DONTWAIT, m->m_type); 1162 if (n == 0) 1163 break; 1164 n->m_len = min(MLEN, len); 1165 m->m_next = n; 1166 } 1167 m = m->m_next; 1168 } 1169 out: if (((m = m0)->m_flags & M_PKTHDR) && (m->m_pkthdr.len < totlen)) 1170 m->m_pkthdr.len = totlen; 1171 } 1172 1173 void 1174 m_print(const struct mbuf *m) 1175 { 1176 int len; 1177 const struct mbuf *m2; 1178 1179 len = m->m_pkthdr.len; 1180 m2 = m; 1181 while (len) { 1182 printf("%p %*D\n", m2, m2->m_len, (u_char *)m2->m_data, "-"); 1183 len -= m2->m_len; 1184 m2 = m2->m_next; 1185 } 1186 return; 1187 } 1188