xref: /freebsd/sys/netipsec/ipsec_mbuf.c (revision 51a9219f5780e61e1437d25220bf8750d9df7f8b)
1 /*	$FreeBSD$	*/
2 
3 /*
4  * IPsec-specific mbuf routines.
5  */
6 
7 #include "opt_param.h"
8 
9 #include <sys/param.h>
10 #include <sys/systm.h>
11 #include <sys/mbuf.h>
12 #include <sys/socket.h>
13 
14 #include <net/route.h>
15 #include <netinet/in.h>
16 
17 #include <netipsec/ipsec.h>
18 
19 extern	struct mbuf *m_getptr(struct mbuf *, int, int *);
20 
21 /*
22  * Create a writable copy of the mbuf chain.  While doing this
23  * we compact the chain with a goal of producing a chain with
24  * at most two mbufs.  The second mbuf in this chain is likely
25  * to be a cluster.  The primary purpose of this work is to create
26  * a writable packet for encryption, compression, etc.  The
27  * secondary goal is to linearize the data so the data can be
28  * passed to crypto hardware in the most efficient manner possible.
29  */
30 struct mbuf *
31 m_clone(struct mbuf *m0)
32 {
33 	struct mbuf *m, *mprev;
34 
35 	KASSERT(m0 != NULL, ("m_clone: null mbuf"));
36 
37 	mprev = NULL;
38 	for (m = m0; m != NULL; m = mprev->m_next) {
39 		/*
40 		 * Regular mbufs are ignored unless there's a cluster
41 		 * in front of it that we can use to coalesce.  We do
42 		 * the latter mainly so later clusters can be coalesced
43 		 * also w/o having to handle them specially (i.e. convert
44 		 * mbuf+cluster -> cluster).  This optimization is heavily
45 		 * influenced by the assumption that we're running over
46 		 * Ethernet where MCBYTES is large enough that the max
47 		 * packet size will permit lots of coalescing into a
48 		 * single cluster.  This in turn permits efficient
49 		 * crypto operations, especially when using hardware.
50 		 */
51 		if ((m->m_flags & M_EXT) == 0) {
52 			if (mprev && (mprev->m_flags & M_EXT) &&
53 			    m->m_len <= M_TRAILINGSPACE(mprev)) {
54 				/* XXX: this ignores mbuf types */
55 				memcpy(mtod(mprev, caddr_t) + mprev->m_len,
56 				       mtod(m, caddr_t), m->m_len);
57 				mprev->m_len += m->m_len;
58 				mprev->m_next = m->m_next;	/* unlink from chain */
59 				m_free(m);			/* reclaim mbuf */
60 				newipsecstat.ips_mbcoalesced++;
61 			} else {
62 				mprev = m;
63 			}
64 			continue;
65 		}
66 		/*
67 		 * Cluster'd mbufs are left alone (for now).
68 		 */
69 		if (!MEXT_IS_REF(m)) {
70 			mprev = m;
71 			continue;
72 		}
73 		/*
74 		 * Not writable, replace with a copy or coalesce with
75 		 * the previous mbuf if possible (since we have to copy
76 		 * it anyway, we try to reduce the number of mbufs and
77 		 * clusters so that future work is easier).
78 		 */
79 		/* XXX why can M_PKTHDR be set past the first mbuf? */
80 		KASSERT(m->m_flags & M_EXT,
81 			("m_clone: m_flags 0x%x", m->m_flags));
82 		/* NB: we only coalesce into a cluster */
83 		if (mprev == NULL || (mprev->m_flags & M_EXT) == 0 ||
84 		    m->m_len > M_TRAILINGSPACE(mprev)) {
85 			struct mbuf *n;
86 
87 			/*
88 			 * Allocate a new page, copy the data to the front
89 			 * and release the reference to the old page.
90 			 */
91 			n = m_getcl(M_DONTWAIT, m->m_type, m->m_flags);
92 			if (n == NULL) {
93 				m_freem(m0);
94 				return (NULL);
95 			}
96 			if (mprev == NULL && (m->m_flags & M_PKTHDR))
97 				M_COPY_PKTHDR(n, m);
98 			memcpy(mtod(n, caddr_t), mtod(m, caddr_t), m->m_len);
99 			n->m_len = m->m_len;
100 			n->m_next = m->m_next;
101 			if (mprev == NULL)
102 				m0 = n;			/* new head of chain */
103 			else
104 				mprev->m_next = n;	/* replace old mbuf */
105 			m_free(m);			/* release old mbuf */
106 			mprev = n;
107 			newipsecstat.ips_clcopied++;
108 		} else {
109 			/* XXX: this ignores mbuf types */
110 			memcpy(mtod(mprev, caddr_t) + mprev->m_len,
111 			       mtod(m, caddr_t), m->m_len);
112 			mprev->m_len += m->m_len;
113 			mprev->m_next = m->m_next;	/* unlink from chain */
114 			m_free(m);			/* reclaim mbuf */
115 			newipsecstat.ips_clcoalesced++;
116 		}
117 	}
118 	return (m0);
119 }
120 
121 /*
122  * Make space for a new header of length hlen at offset off
123  * in the packet.  When doing this we allocate new mbufs only
124  * when absolutely necessary.  The mbuf where the new header
125  * is to go is returned together with an offset into the mbuf.
126  * If NULL is returned then the mbuf chain may have been modified;
127  * the caller is assumed to always free the chain.
128  */
129 struct mbuf *
130 m_makespace(struct mbuf *m0, int skip, int hlen, int *off)
131 {
132 	struct mbuf *m;
133 	unsigned remain;
134 
135 	KASSERT(m0 != NULL, ("m_dmakespace: null mbuf"));
136 	KASSERT(hlen < MHLEN, ("m_makespace: hlen too big: %u", hlen));
137 
138 	for (m = m0; m && skip > m->m_len; m = m->m_next)
139 		skip -= m->m_len;
140 	if (m == NULL)
141 		return (NULL);
142 	/*
143 	 * At this point skip is the offset into the mbuf m
144 	 * where the new header should be placed.  Figure out
145 	 * if there's space to insert the new header.  If so,
146 	 * and copying the remainder makese sense then do so.
147 	 * Otherwise insert a new mbuf in the chain, splitting
148 	 * the contents of m as needed.
149 	 */
150 	remain = m->m_len - skip;		/* data to move */
151 	/* XXX code doesn't handle clusters XXX */
152 	KASSERT(remain < MLEN, ("m_makespace: remainder too big: %u", remain));
153 	if (hlen > M_TRAILINGSPACE(m)) {
154 		struct mbuf *n;
155 
156 		/*
157 		 * Not enough space in m, split the contents
158 		 * of m, inserting new mbufs as required.
159 		 *
160 		 * NB: this ignores mbuf types.
161 		 */
162 		MGET(n, M_DONTWAIT, MT_DATA);
163 		if (n == NULL)
164 			return (NULL);
165 		n->m_next = m->m_next;		/* splice new mbuf */
166 		m->m_next = n;
167 		newipsecstat.ips_mbinserted++;
168 		if (hlen <= M_TRAILINGSPACE(m) + remain) {
169 			/*
170 			 * New header fits in the old mbuf if we copy
171 			 * the remainder; just do the copy to the new
172 			 * mbuf and we're good to go.
173 			 */
174 			memcpy(mtod(n, caddr_t),
175 			       mtod(m, caddr_t) + skip, remain);
176 			n->m_len = remain;
177 			m->m_len = skip + hlen;
178 			*off = skip;
179 		} else {
180 			/*
181 			 * No space in the old mbuf for the new header.
182 			 * Make space in the new mbuf and check the
183 			 * remainder'd data fits too.  If not then we
184 			 * must allocate an additional mbuf (yech).
185 			 */
186 			n->m_len = 0;
187 			if (remain + hlen > M_TRAILINGSPACE(n)) {
188 				struct mbuf *n2;
189 
190 				MGET(n2, M_DONTWAIT, MT_DATA);
191 				/* NB: new mbuf is on chain, let caller free */
192 				if (n2 == NULL)
193 					return (NULL);
194 				n2->m_len = 0;
195 				memcpy(mtod(n2, caddr_t),
196 				       mtod(m, caddr_t) + skip, remain);
197 				n2->m_len = remain;
198 				/* splice in second mbuf */
199 				n2->m_next = n->m_next;
200 				n->m_next = n2;
201 				newipsecstat.ips_mbinserted++;
202 			} else {
203 				memcpy(mtod(n, caddr_t) + hlen,
204 				       mtod(m, caddr_t) + skip, remain);
205 				n->m_len += remain;
206 			}
207 			m->m_len -= remain;
208 			n->m_len += hlen;
209 			m = n;			/* header is at front ... */
210 			*off = 0;		/* ... of new mbuf */
211 		}
212 	} else {
213 		/*
214 		 * Copy the remainder to the back of the mbuf
215 		 * so there's space to write the new header.
216 		 */
217 		/* XXX can this be memcpy? does it handle overlap? */
218 		ovbcopy(mtod(m, caddr_t) + skip,
219 			mtod(m, caddr_t) + skip + hlen, remain);
220 		m->m_len += hlen;
221 		*off = skip;
222 	}
223 	m0->m_pkthdr.len += hlen;		/* adjust packet length */
224 	return m;
225 }
226 
227 /*
228  * m_pad(m, n) pads <m> with <n> bytes at the end. The packet header
229  * length is updated, and a pointer to the first byte of the padding
230  * (which is guaranteed to be all in one mbuf) is returned.
231  */
232 caddr_t
233 m_pad(struct mbuf *m, int n)
234 {
235 	register struct mbuf *m0, *m1;
236 	register int len, pad;
237 	caddr_t retval;
238 
239 	if (n <= 0) {  /* No stupid arguments. */
240 		DPRINTF(("m_pad: pad length invalid (%d)\n", n));
241 		m_freem(m);
242 		return NULL;
243 	}
244 
245 	len = m->m_pkthdr.len;
246 	pad = n;
247 	m0 = m;
248 
249 	while (m0->m_len < len) {
250 KASSERT(m0->m_next != NULL, ("m_pad: m0 null, len %u m_len %u", len, m0->m_len));/*XXX*/
251 		len -= m0->m_len;
252 		m0 = m0->m_next;
253 	}
254 
255 	if (m0->m_len != len) {
256 		DPRINTF(("m_pad: length mismatch (should be %d instead of %d)\n",
257 		    m->m_pkthdr.len, m->m_pkthdr.len + m0->m_len - len));
258 
259 		m_freem(m);
260 		return NULL;
261 	}
262 
263 	/* Check for zero-length trailing mbufs, and find the last one. */
264 	for (m1 = m0; m1->m_next; m1 = m1->m_next) {
265 		if (m1->m_next->m_len != 0) {
266 			DPRINTF(("m_pad: length mismatch (should be %d "
267 			    "instead of %d)\n",
268 			    m->m_pkthdr.len,
269 			    m->m_pkthdr.len + m1->m_next->m_len));
270 
271 			m_freem(m);
272 			return NULL;
273 		}
274 
275 		m0 = m1->m_next;
276 	}
277 
278 	if (pad > M_TRAILINGSPACE(m0)) {
279 		/* Add an mbuf to the chain. */
280 		MGET(m1, M_DONTWAIT, MT_DATA);
281 		if (m1 == 0) {
282 			m_freem(m0);
283 			DPRINTF(("m_pad: unable to get extra mbuf\n"));
284 			return NULL;
285 		}
286 
287 		m0->m_next = m1;
288 		m0 = m1;
289 		m0->m_len = 0;
290 	}
291 
292 	retval = m0->m_data + m0->m_len;
293 	m0->m_len += pad;
294 	m->m_pkthdr.len += pad;
295 
296 	return retval;
297 }
298 
299 /*
300  * Remove hlen data at offset skip in the packet.  This is used by
301  * the protocols strip protocol headers and associated data (e.g. IV,
302  * authenticator) on input.
303  */
304 int
305 m_striphdr(struct mbuf *m, int skip, int hlen)
306 {
307 	struct mbuf *m1;
308 	int roff;
309 
310 	/* Find beginning of header */
311 	m1 = m_getptr(m, skip, &roff);
312 	if (m1 == NULL)
313 		return (EINVAL);
314 
315 	/* Remove the header and associated data from the mbuf. */
316 	if (roff == 0) {
317 		/* The header was at the beginning of the mbuf */
318 		newipsecstat.ips_input_front++;
319 		m_adj(m1, hlen);
320 		if ((m1->m_flags & M_PKTHDR) == 0)
321 			m->m_pkthdr.len -= hlen;
322 	} else if (roff + hlen >= m1->m_len) {
323 		struct mbuf *mo;
324 
325 		/*
326 		 * Part or all of the header is at the end of this mbuf,
327 		 * so first let's remove the remainder of the header from
328 		 * the beginning of the remainder of the mbuf chain, if any.
329 		 */
330 		newipsecstat.ips_input_end++;
331 		if (roff + hlen > m1->m_len) {
332 			/* Adjust the next mbuf by the remainder */
333 			m_adj(m1->m_next, roff + hlen - m1->m_len);
334 
335 			/* The second mbuf is guaranteed not to have a pkthdr... */
336 			m->m_pkthdr.len -= (roff + hlen - m1->m_len);
337 		}
338 
339 		/* Now, let's unlink the mbuf chain for a second...*/
340 		mo = m1->m_next;
341 		m1->m_next = NULL;
342 
343 		/* ...and trim the end of the first part of the chain...sick */
344 		m_adj(m1, -(m1->m_len - roff));
345 		if ((m1->m_flags & M_PKTHDR) == 0)
346 			m->m_pkthdr.len -= (m1->m_len - roff);
347 
348 		/* Finally, let's relink */
349 		m1->m_next = mo;
350 	} else {
351 		/*
352 		 * The header lies in the "middle" of the mbuf; copy
353 		 * the remainder of the mbuf down over the header.
354 		 */
355 		newipsecstat.ips_input_middle++;
356 		bcopy(mtod(m1, u_char *) + roff + hlen,
357 		      mtod(m1, u_char *) + roff,
358 		      m1->m_len - (roff + hlen));
359 		m1->m_len -= hlen;
360 		m->m_pkthdr.len -= hlen;
361 	}
362 	return (0);
363 }
364 
365 /*
366  * Diagnostic routine to check mbuf alignment as required by the
367  * crypto device drivers (that use DMA).
368  */
369 void
370 m_checkalignment(const char* where, struct mbuf *m0, int off, int len)
371 {
372 	int roff;
373 	struct mbuf *m = m_getptr(m0, off, &roff);
374 	caddr_t addr;
375 
376 	if (m == NULL)
377 		return;
378 	printf("%s (off %u len %u): ", where, off, len);
379 	addr = mtod(m, caddr_t) + roff;
380 	do {
381 		int mlen;
382 
383 		if (((uintptr_t) addr) & 3) {
384 			printf("addr misaligned %p,", addr);
385 			break;
386 		}
387 		mlen = m->m_len;
388 		if (mlen > len)
389 			mlen = len;
390 		len -= mlen;
391 		if (len && (mlen & 3)) {
392 			printf("len mismatch %u,", mlen);
393 			break;
394 		}
395 		m = m->m_next;
396 		addr = m ? mtod(m, caddr_t) : NULL;
397 	} while (m && len > 0);
398 	for (m = m0; m; m = m->m_next)
399 		printf(" [%p:%u]", mtod(m, caddr_t), m->m_len);
400 	printf("\n");
401 }
402