1 /*- 2 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 /* 30 * IPsec-specific mbuf routines. 31 */ 32 33 #include "opt_param.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 40 #include <net/route.h> 41 #include <netinet/in.h> 42 43 #include <netipsec/ipsec.h> 44 45 extern struct mbuf *m_getptr(struct mbuf *, int, int *); 46 47 /* 48 * Create a writable copy of the mbuf chain. While doing this 49 * we compact the chain with a goal of producing a chain with 50 * at most two mbufs. The second mbuf in this chain is likely 51 * to be a cluster. The primary purpose of this work is to create 52 * a writable packet for encryption, compression, etc. The 53 * secondary goal is to linearize the data so the data can be 54 * passed to crypto hardware in the most efficient manner possible. 55 */ 56 struct mbuf * 57 m_clone(struct mbuf *m0) 58 { 59 struct mbuf *m, *mprev; 60 struct mbuf *n, *mfirst, *mlast; 61 int len, off; 62 63 KASSERT(m0 != NULL, ("m_clone: null mbuf")); 64 65 mprev = NULL; 66 for (m = m0; m != NULL; m = mprev->m_next) { 67 /* 68 * Regular mbufs are ignored unless there's a cluster 69 * in front of it that we can use to coalesce. We do 70 * the latter mainly so later clusters can be coalesced 71 * also w/o having to handle them specially (i.e. convert 72 * mbuf+cluster -> cluster). This optimization is heavily 73 * influenced by the assumption that we're running over 74 * Ethernet where MCLBYTES is large enough that the max 75 * packet size will permit lots of coalescing into a 76 * single cluster. This in turn permits efficient 77 * crypto operations, especially when using hardware. 78 */ 79 if ((m->m_flags & M_EXT) == 0) { 80 if (mprev && (mprev->m_flags & M_EXT) && 81 m->m_len <= M_TRAILINGSPACE(mprev)) { 82 /* XXX: this ignores mbuf types */ 83 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 84 mtod(m, caddr_t), m->m_len); 85 mprev->m_len += m->m_len; 86 mprev->m_next = m->m_next; /* unlink from chain */ 87 m_free(m); /* reclaim mbuf */ 88 newipsecstat.ips_mbcoalesced++; 89 } else { 90 mprev = m; 91 } 92 continue; 93 } 94 /* 95 * Writable mbufs are left alone (for now). 96 */ 97 if (!MEXT_IS_REF(m)) { 98 mprev = m; 99 continue; 100 } 101 102 /* 103 * Not writable, replace with a copy or coalesce with 104 * the previous mbuf if possible (since we have to copy 105 * it anyway, we try to reduce the number of mbufs and 106 * clusters so that future work is easier). 107 */ 108 KASSERT(m->m_flags & M_EXT, 109 ("m_clone: m_flags 0x%x", m->m_flags)); 110 /* NB: we only coalesce into a cluster or larger */ 111 if (mprev != NULL && (mprev->m_flags & M_EXT) && 112 m->m_len <= M_TRAILINGSPACE(mprev)) { 113 /* XXX: this ignores mbuf types */ 114 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 115 mtod(m, caddr_t), m->m_len); 116 mprev->m_len += m->m_len; 117 mprev->m_next = m->m_next; /* unlink from chain */ 118 m_free(m); /* reclaim mbuf */ 119 newipsecstat.ips_clcoalesced++; 120 continue; 121 } 122 123 /* 124 * Allocate new space to hold the copy... 125 */ 126 /* XXX why can M_PKTHDR be set past the first mbuf? */ 127 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 128 /* 129 * NB: if a packet header is present we must 130 * allocate the mbuf separately from any cluster 131 * because M_MOVE_PKTHDR will smash the data 132 * pointer and drop the M_EXT marker. 133 */ 134 MGETHDR(n, M_DONTWAIT, m->m_type); 135 if (n == NULL) { 136 m_freem(m0); 137 return (NULL); 138 } 139 M_MOVE_PKTHDR(n, m); 140 MCLGET(n, M_DONTWAIT); 141 if ((n->m_flags & M_EXT) == 0) { 142 m_free(n); 143 m_freem(m0); 144 return (NULL); 145 } 146 } else { 147 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 148 if (n == NULL) { 149 m_freem(m0); 150 return (NULL); 151 } 152 } 153 /* 154 * ... and copy the data. We deal with jumbo mbufs 155 * (i.e. m_len > MCLBYTES) by splitting them into 156 * clusters. We could just malloc a buffer and make 157 * it external but too many device drivers don't know 158 * how to break up the non-contiguous memory when 159 * doing DMA. 160 */ 161 len = m->m_len; 162 off = 0; 163 mfirst = n; 164 mlast = NULL; 165 for (;;) { 166 int cc = min(len, MCLBYTES); 167 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 168 n->m_len = cc; 169 if (mlast != NULL) 170 mlast->m_next = n; 171 mlast = n; 172 newipsecstat.ips_clcopied++; 173 174 len -= cc; 175 if (len <= 0) 176 break; 177 off += cc; 178 179 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 180 if (n == NULL) { 181 m_freem(mfirst); 182 m_freem(m0); 183 return (NULL); 184 } 185 } 186 n->m_next = m->m_next; 187 if (mprev == NULL) 188 m0 = mfirst; /* new head of chain */ 189 else 190 mprev->m_next = mfirst; /* replace old mbuf */ 191 m_free(m); /* release old mbuf */ 192 mprev = mfirst; 193 } 194 return (m0); 195 } 196 197 /* 198 * Make space for a new header of length hlen at skip bytes 199 * into the packet. When doing this we allocate new mbufs only 200 * when absolutely necessary. The mbuf where the new header 201 * is to go is returned together with an offset into the mbuf. 202 * If NULL is returned then the mbuf chain may have been modified; 203 * the caller is assumed to always free the chain. 204 */ 205 struct mbuf * 206 m_makespace(struct mbuf *m0, int skip, int hlen, int *off) 207 { 208 struct mbuf *m; 209 unsigned remain; 210 211 KASSERT(m0 != NULL, ("m_dmakespace: null mbuf")); 212 KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen)); 213 214 for (m = m0; m && skip > m->m_len; m = m->m_next) 215 skip -= m->m_len; 216 if (m == NULL) 217 return (NULL); 218 /* 219 * At this point skip is the offset into the mbuf m 220 * where the new header should be placed. Figure out 221 * if there's space to insert the new header. If so, 222 * and copying the remainder makese sense then do so. 223 * Otherwise insert a new mbuf in the chain, splitting 224 * the contents of m as needed. 225 */ 226 remain = m->m_len - skip; /* data to move */ 227 if (hlen > M_TRAILINGSPACE(m)) { 228 struct mbuf *n; 229 230 /* XXX code doesn't handle clusters XXX */ 231 KASSERT(remain < MLEN, 232 ("m_makespace: remainder too big: %u", remain)); 233 /* 234 * Not enough space in m, split the contents 235 * of m, inserting new mbufs as required. 236 * 237 * NB: this ignores mbuf types. 238 */ 239 MGET(n, M_DONTWAIT, MT_DATA); 240 if (n == NULL) 241 return (NULL); 242 n->m_next = m->m_next; /* splice new mbuf */ 243 m->m_next = n; 244 newipsecstat.ips_mbinserted++; 245 if (hlen <= M_TRAILINGSPACE(m) + remain) { 246 /* 247 * New header fits in the old mbuf if we copy 248 * the remainder; just do the copy to the new 249 * mbuf and we're good to go. 250 */ 251 memcpy(mtod(n, caddr_t), 252 mtod(m, caddr_t) + skip, remain); 253 n->m_len = remain; 254 m->m_len = skip + hlen; 255 *off = skip; 256 } else { 257 /* 258 * No space in the old mbuf for the new header. 259 * Make space in the new mbuf and check the 260 * remainder'd data fits too. If not then we 261 * must allocate an additional mbuf (yech). 262 */ 263 n->m_len = 0; 264 if (remain + hlen > M_TRAILINGSPACE(n)) { 265 struct mbuf *n2; 266 267 MGET(n2, M_DONTWAIT, MT_DATA); 268 /* NB: new mbuf is on chain, let caller free */ 269 if (n2 == NULL) 270 return (NULL); 271 n2->m_len = 0; 272 memcpy(mtod(n2, caddr_t), 273 mtod(m, caddr_t) + skip, remain); 274 n2->m_len = remain; 275 /* splice in second mbuf */ 276 n2->m_next = n->m_next; 277 n->m_next = n2; 278 newipsecstat.ips_mbinserted++; 279 } else { 280 memcpy(mtod(n, caddr_t) + hlen, 281 mtod(m, caddr_t) + skip, remain); 282 n->m_len += remain; 283 } 284 m->m_len -= remain; 285 n->m_len += hlen; 286 m = n; /* header is at front ... */ 287 *off = 0; /* ... of new mbuf */ 288 } 289 } else { 290 /* 291 * Copy the remainder to the back of the mbuf 292 * so there's space to write the new header. 293 */ 294 bcopy(mtod(m, caddr_t) + skip, 295 mtod(m, caddr_t) + skip + hlen, remain); 296 m->m_len += hlen; 297 *off = skip; 298 } 299 m0->m_pkthdr.len += hlen; /* adjust packet length */ 300 return m; 301 } 302 303 /* 304 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header 305 * length is updated, and a pointer to the first byte of the padding 306 * (which is guaranteed to be all in one mbuf) is returned. 307 */ 308 caddr_t 309 m_pad(struct mbuf *m, int n) 310 { 311 register struct mbuf *m0, *m1; 312 register int len, pad; 313 caddr_t retval; 314 315 if (n <= 0) { /* No stupid arguments. */ 316 DPRINTF(("m_pad: pad length invalid (%d)\n", n)); 317 m_freem(m); 318 return NULL; 319 } 320 321 len = m->m_pkthdr.len; 322 pad = n; 323 m0 = m; 324 325 while (m0->m_len < len) { 326 KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/ 327 len -= m0->m_len; 328 m0 = m0->m_next; 329 } 330 331 if (m0->m_len != len) { 332 DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n", 333 m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len)); 334 335 m_freem(m); 336 return NULL; 337 } 338 339 /* Check for zero-length trailing mbufs, and find the last one. */ 340 for (m1 = m0; m1->m_next; m1 = m1->m_next) { 341 if (m1->m_next->m_len != 0) { 342 DPRINTF(("m_pad: length mismatch (should be %d " 343 "instead of %d)\n", 344 m->m_pkthdr.len, 345 m->m_pkthdr.len + m1->m_next->m_len)); 346 347 m_freem(m); 348 return NULL; 349 } 350 351 m0 = m1->m_next; 352 } 353 354 if (pad > M_TRAILINGSPACE(m0)) { 355 /* Add an mbuf to the chain. */ 356 MGET(m1, M_DONTWAIT, MT_DATA); 357 if (m1 == 0) { 358 m_freem(m0); 359 DPRINTF(("m_pad: unable to get extra mbuf\n")); 360 return NULL; 361 } 362 363 m0->m_next = m1; 364 m0 = m1; 365 m0->m_len = 0; 366 } 367 368 retval = m0->m_data + m0->m_len; 369 m0->m_len += pad; 370 m->m_pkthdr.len += pad; 371 372 return retval; 373 } 374 375 /* 376 * Remove hlen data at offset skip in the packet. This is used by 377 * the protocols strip protocol headers and associated data (e.g. IV, 378 * authenticator) on input. 379 */ 380 int 381 m_striphdr(struct mbuf *m, int skip, int hlen) 382 { 383 struct mbuf *m1; 384 int roff; 385 386 /* Find beginning of header */ 387 m1 = m_getptr(m, skip, &roff); 388 if (m1 == NULL) 389 return (EINVAL); 390 391 /* Remove the header and associated data from the mbuf. */ 392 if (roff == 0) { 393 /* The header was at the beginning of the mbuf */ 394 newipsecstat.ips_input_front++; 395 m_adj(m1, hlen); 396 if ((m1->m_flags & M_PKTHDR) == 0) 397 m->m_pkthdr.len -= hlen; 398 } else if (roff + hlen >= m1->m_len) { 399 struct mbuf *mo; 400 401 /* 402 * Part or all of the header is at the end of this mbuf, 403 * so first let's remove the remainder of the header from 404 * the beginning of the remainder of the mbuf chain, if any. 405 */ 406 newipsecstat.ips_input_end++; 407 if (roff + hlen > m1->m_len) { 408 /* Adjust the next mbuf by the remainder */ 409 m_adj(m1->m_next, roff + hlen - m1->m_len); 410 411 /* The second mbuf is guaranteed not to have a pkthdr... */ 412 m->m_pkthdr.len -= (roff + hlen - m1->m_len); 413 } 414 415 /* Now, let's unlink the mbuf chain for a second...*/ 416 mo = m1->m_next; 417 m1->m_next = NULL; 418 419 /* ...and trim the end of the first part of the chain...sick */ 420 m_adj(m1, -(m1->m_len - roff)); 421 if ((m1->m_flags & M_PKTHDR) == 0) 422 m->m_pkthdr.len -= (m1->m_len - roff); 423 424 /* Finally, let's relink */ 425 m1->m_next = mo; 426 } else { 427 /* 428 * The header lies in the "middle" of the mbuf; copy 429 * the remainder of the mbuf down over the header. 430 */ 431 newipsecstat.ips_input_middle++; 432 bcopy(mtod(m1, u_char *) + roff + hlen, 433 mtod(m1, u_char *) + roff, 434 m1->m_len - (roff + hlen)); 435 m1->m_len -= hlen; 436 m->m_pkthdr.len -= hlen; 437 } 438 return (0); 439 } 440 441 /* 442 * Diagnostic routine to check mbuf alignment as required by the 443 * crypto device drivers (that use DMA). 444 */ 445 void 446 m_checkalignment(const char* where, struct mbuf *m0, int off, int len) 447 { 448 int roff; 449 struct mbuf *m = m_getptr(m0, off, &roff); 450 caddr_t addr; 451 452 if (m == NULL) 453 return; 454 printf("%s (off %u len %u): ", where, off, len); 455 addr = mtod(m, caddr_t) + roff; 456 do { 457 int mlen; 458 459 if (((uintptr_t) addr) & 3) { 460 printf("addr misaligned %p,", addr); 461 break; 462 } 463 mlen = m->m_len; 464 if (mlen > len) 465 mlen = len; 466 len -= mlen; 467 if (len && (mlen & 3)) { 468 printf("len mismatch %u,", mlen); 469 break; 470 } 471 m = m->m_next; 472 addr = m ? mtod(m, caddr_t) : NULL; 473 } while (m && len > 0); 474 for (m = m0; m; m = m->m_next) 475 printf(" [%p:%u]", mtod(m, caddr_t), m->m_len); 476 printf("\n"); 477 } 478