1 /*- 2 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 /* 30 * IPsec-specific mbuf routines. 31 */ 32 33 #include "opt_param.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 40 #include <net/route.h> 41 #include <netinet/in.h> 42 43 #include <netipsec/ipsec.h> 44 45 extern struct mbuf *m_getptr(struct mbuf *, int, int *); 46 47 /* 48 * Create a writable copy of the mbuf chain. While doing this 49 * we compact the chain with a goal of producing a chain with 50 * at most two mbufs. The second mbuf in this chain is likely 51 * to be a cluster. The primary purpose of this work is to create 52 * a writable packet for encryption, compression, etc. The 53 * secondary goal is to linearize the data so the data can be 54 * passed to crypto hardware in the most efficient manner possible. 55 */ 56 struct mbuf * 57 m_clone(struct mbuf *m0) 58 { 59 struct mbuf *m, *mprev; 60 struct mbuf *n, *mfirst, *mlast; 61 int len, off; 62 63 KASSERT(m0 != NULL, ("m_clone: null mbuf")); 64 65 mprev = NULL; 66 for (m = m0; m != NULL; m = mprev->m_next) { 67 /* 68 * Regular mbufs are ignored unless there's a cluster 69 * in front of it that we can use to coalesce. We do 70 * the latter mainly so later clusters can be coalesced 71 * also w/o having to handle them specially (i.e. convert 72 * mbuf+cluster -> cluster). This optimization is heavily 73 * influenced by the assumption that we're running over 74 * Ethernet where MCLBYTES is large enough that the max 75 * packet size will permit lots of coalescing into a 76 * single cluster. This in turn permits efficient 77 * crypto operations, especially when using hardware. 78 */ 79 if ((m->m_flags & M_EXT) == 0) { 80 if (mprev && (mprev->m_flags & M_EXT) && 81 m->m_len <= M_TRAILINGSPACE(mprev)) { 82 /* XXX: this ignores mbuf types */ 83 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 84 mtod(m, caddr_t), m->m_len); 85 mprev->m_len += m->m_len; 86 mprev->m_next = m->m_next; /* unlink from chain */ 87 m_free(m); /* reclaim mbuf */ 88 newipsecstat.ips_mbcoalesced++; 89 } else { 90 mprev = m; 91 } 92 continue; 93 } 94 /* 95 * Writable mbufs are left alone (for now). 96 */ 97 if (!MEXT_IS_REF(m)) { 98 mprev = m; 99 continue; 100 } 101 102 /* 103 * Not writable, replace with a copy or coalesce with 104 * the previous mbuf if possible (since we have to copy 105 * it anyway, we try to reduce the number of mbufs and 106 * clusters so that future work is easier). 107 */ 108 KASSERT(m->m_flags & M_EXT, 109 ("m_clone: m_flags 0x%x", m->m_flags)); 110 /* NB: we only coalesce into a cluster or larger */ 111 if (mprev != NULL && (mprev->m_flags & M_EXT) && 112 m->m_len <= M_TRAILINGSPACE(mprev)) { 113 /* XXX: this ignores mbuf types */ 114 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 115 mtod(m, caddr_t), m->m_len); 116 mprev->m_len += m->m_len; 117 mprev->m_next = m->m_next; /* unlink from chain */ 118 m_free(m); /* reclaim mbuf */ 119 newipsecstat.ips_clcoalesced++; 120 continue; 121 } 122 123 /* 124 * Allocate new space to hold the copy... 125 */ 126 /* XXX why can M_PKTHDR be set past the first mbuf? */ 127 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 128 /* 129 * NB: if a packet header is present we must 130 * allocate the mbuf separately from any cluster 131 * because M_MOVE_PKTHDR will smash the data 132 * pointer and drop the M_EXT marker. 133 */ 134 MGETHDR(n, M_DONTWAIT, m->m_type); 135 if (n == NULL) { 136 m_freem(m0); 137 return (NULL); 138 } 139 M_MOVE_PKTHDR(n, m); 140 MCLGET(n, M_DONTWAIT); 141 if ((n->m_flags & M_EXT) == 0) { 142 m_free(n); 143 m_freem(m0); 144 return (NULL); 145 } 146 } else { 147 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 148 if (n == NULL) { 149 m_freem(m0); 150 return (NULL); 151 } 152 } 153 /* 154 * ... and copy the data. We deal with jumbo mbufs 155 * (i.e. m_len > MCLBYTES) by splitting them into 156 * clusters. We could just malloc a buffer and make 157 * it external but too many device drivers don't know 158 * how to break up the non-contiguous memory when 159 * doing DMA. 160 */ 161 len = m->m_len; 162 off = 0; 163 mfirst = n; 164 mlast = NULL; 165 for (;;) { 166 int cc = min(len, MCLBYTES); 167 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 168 n->m_len = cc; 169 if (mlast != NULL) 170 mlast->m_next = n; 171 mlast = n; 172 newipsecstat.ips_clcopied++; 173 174 len -= cc; 175 if (len <= 0) 176 break; 177 off += cc; 178 179 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 180 if (n == NULL) { 181 m_freem(mfirst); 182 m_freem(m0); 183 return (NULL); 184 } 185 } 186 n->m_next = m->m_next; 187 if (mprev == NULL) 188 m0 = mfirst; /* new head of chain */ 189 else 190 mprev->m_next = mfirst; /* replace old mbuf */ 191 m_free(m); /* release old mbuf */ 192 mprev = mfirst; 193 } 194 return (m0); 195 } 196 197 /* 198 * Make space for a new header of length hlen at skip bytes 199 * into the packet. When doing this we allocate new mbufs only 200 * when absolutely necessary. The mbuf where the new header 201 * is to go is returned together with an offset into the mbuf. 202 * If NULL is returned then the mbuf chain may have been modified; 203 * the caller is assumed to always free the chain. 204 */ 205 struct mbuf * 206 m_makespace(struct mbuf *m0, int skip, int hlen, int *off) 207 { 208 struct mbuf *m; 209 unsigned remain; 210 211 KASSERT(m0 != NULL, ("m_dmakespace: null mbuf")); 212 KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen)); 213 214 for (m = m0; m && skip > m->m_len; m = m->m_next) 215 skip -= m->m_len; 216 if (m == NULL) 217 return (NULL); 218 /* 219 * At this point skip is the offset into the mbuf m 220 * where the new header should be placed. Figure out 221 * if there's space to insert the new header. If so, 222 * and copying the remainder makese sense then do so. 223 * Otherwise insert a new mbuf in the chain, splitting 224 * the contents of m as needed. 225 */ 226 remain = m->m_len - skip; /* data to move */ 227 if (hlen > M_TRAILINGSPACE(m)) { 228 struct mbuf *n; 229 230 /* XXX code doesn't handle clusters XXX */ 231 KASSERT(remain < MLEN, 232 ("m_makespace: remainder too big: %u", remain)); 233 /* 234 * Not enough space in m, split the contents 235 * of m, inserting new mbufs as required. 236 * 237 * NB: this ignores mbuf types. 238 */ 239 MGET(n, M_DONTWAIT, MT_DATA); 240 if (n == NULL) 241 return (NULL); 242 n->m_next = m->m_next; /* splice new mbuf */ 243 m->m_next = n; 244 newipsecstat.ips_mbinserted++; 245 if (hlen <= M_TRAILINGSPACE(m) + remain) { 246 /* 247 * New header fits in the old mbuf if we copy 248 * the remainder; just do the copy to the new 249 * mbuf and we're good to go. 250 */ 251 memcpy(mtod(n, caddr_t), 252 mtod(m, caddr_t) + skip, remain); 253 n->m_len = remain; 254 m->m_len = skip + hlen; 255 *off = skip; 256 } else { 257 /* 258 * No space in the old mbuf for the new header. 259 * Make space in the new mbuf and check the 260 * remainder'd data fits too. If not then we 261 * must allocate an additional mbuf (yech). 262 */ 263 n->m_len = 0; 264 if (remain + hlen > M_TRAILINGSPACE(n)) { 265 struct mbuf *n2; 266 267 MGET(n2, M_DONTWAIT, MT_DATA); 268 /* NB: new mbuf is on chain, let caller free */ 269 if (n2 == NULL) 270 return (NULL); 271 n2->m_len = 0; 272 memcpy(mtod(n2, caddr_t), 273 mtod(m, caddr_t) + skip, remain); 274 n2->m_len = remain; 275 /* splice in second mbuf */ 276 n2->m_next = n->m_next; 277 n->m_next = n2; 278 newipsecstat.ips_mbinserted++; 279 } else { 280 memcpy(mtod(n, caddr_t) + hlen, 281 mtod(m, caddr_t) + skip, remain); 282 n->m_len += remain; 283 } 284 m->m_len -= remain; 285 n->m_len += hlen; 286 m = n; /* header is at front ... */ 287 *off = 0; /* ... of new mbuf */ 288 } 289 } else { 290 /* 291 * Copy the remainder to the back of the mbuf 292 * so there's space to write the new header. 293 */ 294 /* XXX can this be memcpy? does it handle overlap? */ 295 ovbcopy(mtod(m, caddr_t) + skip, 296 mtod(m, caddr_t) + skip + hlen, remain); 297 m->m_len += hlen; 298 *off = skip; 299 } 300 m0->m_pkthdr.len += hlen; /* adjust packet length */ 301 return m; 302 } 303 304 /* 305 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header 306 * length is updated, and a pointer to the first byte of the padding 307 * (which is guaranteed to be all in one mbuf) is returned. 308 */ 309 caddr_t 310 m_pad(struct mbuf *m, int n) 311 { 312 register struct mbuf *m0, *m1; 313 register int len, pad; 314 caddr_t retval; 315 316 if (n <= 0) { /* No stupid arguments. */ 317 DPRINTF(("m_pad: pad length invalid (%d)\n", n)); 318 m_freem(m); 319 return NULL; 320 } 321 322 len = m->m_pkthdr.len; 323 pad = n; 324 m0 = m; 325 326 while (m0->m_len < len) { 327 KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/ 328 len -= m0->m_len; 329 m0 = m0->m_next; 330 } 331 332 if (m0->m_len != len) { 333 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n", 334 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); 335 336 m_freem(m); 337 return NULL; 338 } 339 340 /* Check for zero-length trailing mbufs, and find the last one. */ 341 for (m1 = m0; m1->m_next; m1 = m1->m_next) { 342 if (m1->m_next->m_len != 0) { 343 DPRINTF(("m_pad: length mismatch (should be %d " 344 "instead of %d)\n", 345 m->m_pkthdr.len, 346 m->m_pkthdr.len + m1->m_next->m_len)); 347 348 m_freem(m); 349 return NULL; 350 } 351 352 m0 = m1->m_next; 353 } 354 355 if (pad > M_TRAILINGSPACE(m0)) { 356 /* Add an mbuf to the chain. */ 357 MGET(m1, M_DONTWAIT, MT_DATA); 358 if (m1 == 0) { 359 m_freem(m0); 360 DPRINTF(("m_pad: unable to get extra mbuf\n")); 361 return NULL; 362 } 363 364 m0->m_next = m1; 365 m0 = m1; 366 m0->m_len = 0; 367 } 368 369 retval = m0->m_data + m0->m_len; 370 m0->m_len += pad; 371 m->m_pkthdr.len += pad; 372 373 return retval; 374 } 375 376 /* 377 * Remove hlen data at offset skip in the packet. This is used by 378 * the protocols strip protocol headers and associated data (e.g. IV, 379 * authenticator) on input. 380 */ 381 int 382 m_striphdr(struct mbuf *m, int skip, int hlen) 383 { 384 struct mbuf *m1; 385 int roff; 386 387 /* Find beginning of header */ 388 m1 = m_getptr(m, skip, &roff); 389 if (m1 == NULL) 390 return (EINVAL); 391 392 /* Remove the header and associated data from the mbuf. */ 393 if (roff == 0) { 394 /* The header was at the beginning of the mbuf */ 395 newipsecstat.ips_input_front++; 396 m_adj(m1, hlen); 397 if ((m1->m_flags & M_PKTHDR) == 0) 398 m->m_pkthdr.len -= hlen; 399 } else if (roff + hlen >= m1->m_len) { 400 struct mbuf *mo; 401 402 /* 403 * Part or all of the header is at the end of this mbuf, 404 * so first let's remove the remainder of the header from 405 * the beginning of the remainder of the mbuf chain, if any. 406 */ 407 newipsecstat.ips_input_end++; 408 if (roff + hlen > m1->m_len) { 409 /* Adjust the next mbuf by the remainder */ 410 m_adj(m1->m_next, roff + hlen - m1->m_len); 411 412 /* The second mbuf is guaranteed not to have a pkthdr... */ 413 m->m_pkthdr.len -= (roff + hlen - m1->m_len); 414 } 415 416 /* Now, let's unlink the mbuf chain for a second...*/ 417 mo = m1->m_next; 418 m1->m_next = NULL; 419 420 /* ...and trim the end of the first part of the chain...sick */ 421 m_adj(m1, -(m1->m_len - roff)); 422 if ((m1->m_flags & M_PKTHDR) == 0) 423 m->m_pkthdr.len -= (m1->m_len - roff); 424 425 /* Finally, let's relink */ 426 m1->m_next = mo; 427 } else { 428 /* 429 * The header lies in the "middle" of the mbuf; copy 430 * the remainder of the mbuf down over the header. 431 */ 432 newipsecstat.ips_input_middle++; 433 bcopy(mtod(m1, u_char *) + roff + hlen, 434 mtod(m1, u_char *) + roff, 435 m1->m_len - (roff + hlen)); 436 m1->m_len -= hlen; 437 m->m_pkthdr.len -= hlen; 438 } 439 return (0); 440 } 441 442 /* 443 * Diagnostic routine to check mbuf alignment as required by the 444 * crypto device drivers (that use DMA). 445 */ 446 void 447 m_checkalignment(const char* where, struct mbuf *m0, int off, int len) 448 { 449 int roff; 450 struct mbuf *m = m_getptr(m0, off, &roff); 451 caddr_t addr; 452 453 if (m == NULL) 454 return; 455 printf("%s (off %u len %u): ", where, off, len); 456 addr = mtod(m, caddr_t) + roff; 457 do { 458 int mlen; 459 460 if (((uintptr_t) addr) & 3) { 461 printf("addr misaligned %p,", addr); 462 break; 463 } 464 mlen = m->m_len; 465 if (mlen > len) 466 mlen = len; 467 len -= mlen; 468 if (len && (mlen & 3)) { 469 printf("len mismatch %u,", mlen); 470 break; 471 } 472 m = m->m_next; 473 addr = m ? mtod(m, caddr_t) : NULL; 474 } while (m && len > 0); 475 for (m = m0; m; m = m->m_next) 476 printf(" [%p:%u]", mtod(m, caddr_t), m->m_len); 477 printf("\n"); 478 } 479