1 /*- 2 * Copyright (c) 2002, 2003 Sam Leffler, Errno Consulting 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 /* 30 * IPsec-specific mbuf routines. 31 */ 32 33 #include "opt_param.h" 34 35 #include <sys/param.h> 36 #include <sys/systm.h> 37 #include <sys/mbuf.h> 38 #include <sys/socket.h> 39 40 #include <net/route.h> 41 #include <netinet/in.h> 42 43 #include <netipsec/ipsec.h> 44 45 /* 46 * Create a writable copy of the mbuf chain. While doing this 47 * we compact the chain with a goal of producing a chain with 48 * at most two mbufs. The second mbuf in this chain is likely 49 * to be a cluster. The primary purpose of this work is to create 50 * a writable packet for encryption, compression, etc. The 51 * secondary goal is to linearize the data so the data can be 52 * passed to crypto hardware in the most efficient manner possible. 53 */ 54 struct mbuf * 55 m_clone(struct mbuf *m0) 56 { 57 struct mbuf *m, *mprev; 58 struct mbuf *n, *mfirst, *mlast; 59 int len, off; 60 61 IPSEC_ASSERT(m0 != NULL, ("null mbuf")); 62 63 mprev = NULL; 64 for (m = m0; m != NULL; m = mprev->m_next) { 65 /* 66 * Regular mbufs are ignored unless there's a cluster 67 * in front of it that we can use to coalesce. We do 68 * the latter mainly so later clusters can be coalesced 69 * also w/o having to handle them specially (i.e. convert 70 * mbuf+cluster -> cluster). This optimization is heavily 71 * influenced by the assumption that we're running over 72 * Ethernet where MCLBYTES is large enough that the max 73 * packet size will permit lots of coalescing into a 74 * single cluster. This in turn permits efficient 75 * crypto operations, especially when using hardware. 76 */ 77 if ((m->m_flags & M_EXT) == 0) { 78 if (mprev && (mprev->m_flags & M_EXT) && 79 m->m_len <= M_TRAILINGSPACE(mprev)) { 80 /* XXX: this ignores mbuf types */ 81 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 82 mtod(m, caddr_t), m->m_len); 83 mprev->m_len += m->m_len; 84 mprev->m_next = m->m_next; /* unlink from chain */ 85 m_free(m); /* reclaim mbuf */ 86 newipsecstat.ips_mbcoalesced++; 87 } else { 88 mprev = m; 89 } 90 continue; 91 } 92 /* 93 * Writable mbufs are left alone (for now). 94 */ 95 if (!MEXT_IS_REF(m)) { 96 mprev = m; 97 continue; 98 } 99 100 /* 101 * Not writable, replace with a copy or coalesce with 102 * the previous mbuf if possible (since we have to copy 103 * it anyway, we try to reduce the number of mbufs and 104 * clusters so that future work is easier). 105 */ 106 IPSEC_ASSERT(m->m_flags & M_EXT, ("m_flags 0x%x", m->m_flags)); 107 /* NB: we only coalesce into a cluster or larger */ 108 if (mprev != NULL && (mprev->m_flags & M_EXT) && 109 m->m_len <= M_TRAILINGSPACE(mprev)) { 110 /* XXX: this ignores mbuf types */ 111 memcpy(mtod(mprev, caddr_t) + mprev->m_len, 112 mtod(m, caddr_t), m->m_len); 113 mprev->m_len += m->m_len; 114 mprev->m_next = m->m_next; /* unlink from chain */ 115 m_free(m); /* reclaim mbuf */ 116 newipsecstat.ips_clcoalesced++; 117 continue; 118 } 119 120 /* 121 * Allocate new space to hold the copy... 122 */ 123 /* XXX why can M_PKTHDR be set past the first mbuf? */ 124 if (mprev == NULL && (m->m_flags & M_PKTHDR)) { 125 /* 126 * NB: if a packet header is present we must 127 * allocate the mbuf separately from any cluster 128 * because M_MOVE_PKTHDR will smash the data 129 * pointer and drop the M_EXT marker. 130 */ 131 MGETHDR(n, M_DONTWAIT, m->m_type); 132 if (n == NULL) { 133 m_freem(m0); 134 return (NULL); 135 } 136 M_MOVE_PKTHDR(n, m); 137 MCLGET(n, M_DONTWAIT); 138 if ((n->m_flags & M_EXT) == 0) { 139 m_free(n); 140 m_freem(m0); 141 return (NULL); 142 } 143 } else { 144 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 145 if (n == NULL) { 146 m_freem(m0); 147 return (NULL); 148 } 149 } 150 /* 151 * ... and copy the data. We deal with jumbo mbufs 152 * (i.e. m_len > MCLBYTES) by splitting them into 153 * clusters. We could just malloc a buffer and make 154 * it external but too many device drivers don't know 155 * how to break up the non-contiguous memory when 156 * doing DMA. 157 */ 158 len = m->m_len; 159 off = 0; 160 mfirst = n; 161 mlast = NULL; 162 for (;;) { 163 int cc = min(len, MCLBYTES); 164 memcpy(mtod(n, caddr_t), mtod(m, caddr_t) + off, cc); 165 n->m_len = cc; 166 if (mlast != NULL) 167 mlast->m_next = n; 168 mlast = n; 169 newipsecstat.ips_clcopied++; 170 171 len -= cc; 172 if (len <= 0) 173 break; 174 off += cc; 175 176 n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags); 177 if (n == NULL) { 178 m_freem(mfirst); 179 m_freem(m0); 180 return (NULL); 181 } 182 } 183 n->m_next = m->m_next; 184 if (mprev == NULL) 185 m0 = mfirst; /* new head of chain */ 186 else 187 mprev->m_next = mfirst; /* replace old mbuf */ 188 m_free(m); /* release old mbuf */ 189 mprev = mfirst; 190 } 191 return (m0); 192 } 193 194 /* 195 * Make space for a new header of length hlen at skip bytes 196 * into the packet. When doing this we allocate new mbufs only 197 * when absolutely necessary. The mbuf where the new header 198 * is to go is returned together with an offset into the mbuf. 199 * If NULL is returned then the mbuf chain may have been modified; 200 * the caller is assumed to always free the chain. 201 */ 202 struct mbuf * 203 m_makespace(struct mbuf *m0, int skip, int hlen, int *off) 204 { 205 struct mbuf *m; 206 unsigned remain; 207 208 IPSEC_ASSERT(m0 != NULL, ("null mbuf")); 209 IPSEC_ASSERT(hlen < MHLEN, ("hlen too big: %u", hlen)); 210 211 for (m = m0; m && skip > m->m_len; m = m->m_next) 212 skip -= m->m_len; 213 if (m == NULL) 214 return (NULL); 215 /* 216 * At this point skip is the offset into the mbuf m 217 * where the new header should be placed. Figure out 218 * if there's space to insert the new header. If so, 219 * and copying the remainder makese sense then do so. 220 * Otherwise insert a new mbuf in the chain, splitting 221 * the contents of m as needed. 222 */ 223 remain = m->m_len - skip; /* data to move */ 224 if (hlen > M_TRAILINGSPACE(m)) { 225 struct mbuf *n; 226 227 /* XXX code doesn't handle clusters XXX */ 228 IPSEC_ASSERT(remain < MLEN, ("remainder too big: %u", remain)); 229 /* 230 * Not enough space in m, split the contents 231 * of m, inserting new mbufs as required. 232 * 233 * NB: this ignores mbuf types. 234 */ 235 MGET(n, M_DONTWAIT, MT_DATA); 236 if (n == NULL) 237 return (NULL); 238 n->m_next = m->m_next; /* splice new mbuf */ 239 m->m_next = n; 240 newipsecstat.ips_mbinserted++; 241 if (hlen <= M_TRAILINGSPACE(m) + remain) { 242 /* 243 * New header fits in the old mbuf if we copy 244 * the remainder; just do the copy to the new 245 * mbuf and we're good to go. 246 */ 247 memcpy(mtod(n, caddr_t), 248 mtod(m, caddr_t) + skip, remain); 249 n->m_len = remain; 250 m->m_len = skip + hlen; 251 *off = skip; 252 } else { 253 /* 254 * No space in the old mbuf for the new header. 255 * Make space in the new mbuf and check the 256 * remainder'd data fits too. If not then we 257 * must allocate an additional mbuf (yech). 258 */ 259 n->m_len = 0; 260 if (remain + hlen > M_TRAILINGSPACE(n)) { 261 struct mbuf *n2; 262 263 MGET(n2, M_DONTWAIT, MT_DATA); 264 /* NB: new mbuf is on chain, let caller free */ 265 if (n2 == NULL) 266 return (NULL); 267 n2->m_len = 0; 268 memcpy(mtod(n2, caddr_t), 269 mtod(m, caddr_t) + skip, remain); 270 n2->m_len = remain; 271 /* splice in second mbuf */ 272 n2->m_next = n->m_next; 273 n->m_next = n2; 274 newipsecstat.ips_mbinserted++; 275 } else { 276 memcpy(mtod(n, caddr_t) + hlen, 277 mtod(m, caddr_t) + skip, remain); 278 n->m_len += remain; 279 } 280 m->m_len -= remain; 281 n->m_len += hlen; 282 m = n; /* header is at front ... */ 283 *off = 0; /* ... of new mbuf */ 284 } 285 } else { 286 /* 287 * Copy the remainder to the back of the mbuf 288 * so there's space to write the new header. 289 */ 290 bcopy(mtod(m, caddr_t) + skip, 291 mtod(m, caddr_t) + skip + hlen, remain); 292 m->m_len += hlen; 293 *off = skip; 294 } 295 m0->m_pkthdr.len += hlen; /* adjust packet length */ 296 return m; 297 } 298 299 /* 300 * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header 301 * length is updated, and a pointer to the first byte of the padding 302 * (which is guaranteed to be all in one mbuf) is returned. 303 */ 304 caddr_t 305 m_pad(struct mbuf *m, int n) 306 { 307 register struct mbuf *m0, *m1; 308 register int len, pad; 309 caddr_t retval; 310 311 if (n <= 0) { /* No stupid arguments. */ 312 DPRINTF(("%s: pad length invalid (%d)\n", __func__, n)); 313 m_freem(m); 314 return NULL; 315 } 316 317 len = m->m_pkthdr.len; 318 pad = n; 319 m0 = m; 320 321 while (m0->m_len < len) { 322 len -= m0->m_len; 323 m0 = m0->m_next; 324 } 325 326 if (m0->m_len != len) { 327 DPRINTF(("%s: length mismatch (should be %d instead of %d)\n", 328 __func__, m->m_pkthdr.len, 329 m->m_pkthdr.len + m0->m_len - len)); 330 331 m_freem(m); 332 return NULL; 333 } 334 335 /* Check for zero-length trailing mbufs, and find the last one. */ 336 for (m1 = m0; m1->m_next; m1 = m1->m_next) { 337 if (m1->m_next->m_len != 0) { 338 DPRINTF(("%s: length mismatch (should be %d instead " 339 "of %d)\n", __func__, 340 m->m_pkthdr.len, 341 m->m_pkthdr.len + m1->m_next->m_len)); 342 343 m_freem(m); 344 return NULL; 345 } 346 347 m0 = m1->m_next; 348 } 349 350 if (pad > M_TRAILINGSPACE(m0)) { 351 /* Add an mbuf to the chain. */ 352 MGET(m1, M_DONTWAIT, MT_DATA); 353 if (m1 == 0) { 354 m_freem(m0); 355 DPRINTF(("%s: unable to get extra mbuf\n", __func__)); 356 return NULL; 357 } 358 359 m0->m_next = m1; 360 m0 = m1; 361 m0->m_len = 0; 362 } 363 364 retval = m0->m_data + m0->m_len; 365 m0->m_len += pad; 366 m->m_pkthdr.len += pad; 367 368 return retval; 369 } 370 371 /* 372 * Remove hlen data at offset skip in the packet. This is used by 373 * the protocols strip protocol headers and associated data (e.g. IV, 374 * authenticator) on input. 375 */ 376 int 377 m_striphdr(struct mbuf *m, int skip, int hlen) 378 { 379 struct mbuf *m1; 380 int roff; 381 382 /* Find beginning of header */ 383 m1 = m_getptr(m, skip, &roff); 384 if (m1 == NULL) 385 return (EINVAL); 386 387 /* Remove the header and associated data from the mbuf. */ 388 if (roff == 0) { 389 /* The header was at the beginning of the mbuf */ 390 newipsecstat.ips_input_front++; 391 m_adj(m1, hlen); 392 if ((m1->m_flags & M_PKTHDR) == 0) 393 m->m_pkthdr.len -= hlen; 394 } else if (roff + hlen >= m1->m_len) { 395 struct mbuf *mo; 396 397 /* 398 * Part or all of the header is at the end of this mbuf, 399 * so first let's remove the remainder of the header from 400 * the beginning of the remainder of the mbuf chain, if any. 401 */ 402 newipsecstat.ips_input_end++; 403 if (roff + hlen > m1->m_len) { 404 /* Adjust the next mbuf by the remainder */ 405 m_adj(m1->m_next, roff + hlen - m1->m_len); 406 407 /* The second mbuf is guaranteed not to have a pkthdr... */ 408 m->m_pkthdr.len -= (roff + hlen - m1->m_len); 409 } 410 411 /* Now, let's unlink the mbuf chain for a second...*/ 412 mo = m1->m_next; 413 m1->m_next = NULL; 414 415 /* ...and trim the end of the first part of the chain...sick */ 416 m_adj(m1, -(m1->m_len - roff)); 417 if ((m1->m_flags & M_PKTHDR) == 0) 418 m->m_pkthdr.len -= (m1->m_len - roff); 419 420 /* Finally, let's relink */ 421 m1->m_next = mo; 422 } else { 423 /* 424 * The header lies in the "middle" of the mbuf; copy 425 * the remainder of the mbuf down over the header. 426 */ 427 newipsecstat.ips_input_middle++; 428 bcopy(mtod(m1, u_char *) + roff + hlen, 429 mtod(m1, u_char *) + roff, 430 m1->m_len - (roff + hlen)); 431 m1->m_len -= hlen; 432 m->m_pkthdr.len -= hlen; 433 } 434 return (0); 435 } 436 437 /* 438 * Diagnostic routine to check mbuf alignment as required by the 439 * crypto device drivers (that use DMA). 440 */ 441 void 442 m_checkalignment(const char* where, struct mbuf *m0, int off, int len) 443 { 444 int roff; 445 struct mbuf *m = m_getptr(m0, off, &roff); 446 caddr_t addr; 447 448 if (m == NULL) 449 return; 450 printf("%s (off %u len %u): ", where, off, len); 451 addr = mtod(m, caddr_t) + roff; 452 do { 453 int mlen; 454 455 if (((uintptr_t) addr) & 3) { 456 printf("addr misaligned %p,", addr); 457 break; 458 } 459 mlen = m->m_len; 460 if (mlen > len) 461 mlen = len; 462 len -= mlen; 463 if (len && (mlen & 3)) { 464 printf("len mismatch %u,", mlen); 465 break; 466 } 467 m = m->m_next; 468 addr = m ? mtod(m, caddr_t) : NULL; 469 } while (m && len > 0); 470 for (m = m0; m; m = m->m_next) 471 printf(" [%p:%u]", mtod(m, caddr_t), m->m_len); 472 printf("\n"); 473 } 474