xref: /freebsd/sys/compat/linuxkpi/common/src/linux_skbuff.c (revision c07d6445eb89d9dd3950361b065b7bd110e3a043)
1 /*-
2  * Copyright (c) 2020-2022 The FreeBSD Foundation
3  * Copyright (c) 2021-2022 Bjoern A. Zeeb
4  *
5  * This software was developed by Björn Zeeb under sponsorship from
6  * the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 /*
33  * NOTE: this socket buffer compatibility code is highly EXPERIMENTAL.
34  *       Do not rely on the internals of this implementation.  They are highly
35  *       likely to change as we will improve the integration to FreeBSD mbufs.
36  */
37 
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40 
41 #include "opt_ddb.h"
42 
43 #include <sys/param.h>
44 #include <sys/types.h>
45 #include <sys/kernel.h>
46 #include <sys/malloc.h>
47 #include <sys/sysctl.h>
48 
49 #ifdef DDB
50 #include <ddb/ddb.h>
51 #endif
52 
53 #include <linux/skbuff.h>
54 #include <linux/slab.h>
55 #include <linux/gfp.h>
56 #ifdef __LP64__
57 #include <linux/log2.h>
58 #endif
59 
60 SYSCTL_DECL(_compat_linuxkpi);
61 SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, skb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
62     "LinuxKPI skbuff");
63 
64 #ifdef SKB_DEBUG
65 int linuxkpi_debug_skb;
66 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, debug, CTLFLAG_RWTUN,
67     &linuxkpi_debug_skb, 0, "SKB debug level");
68 #endif
69 
70 #ifdef __LP64__
71 /*
72  * Realtek wireless drivers (e.g., rtw88) require 32bit DMA in a single segment.
73  * busdma(9) has a hard time providing this currently for 3-ish pages at large
74  * quantities (see lkpi_pci_nseg1_fail in linux_pci.c).
75  * Work around this for now by allowing a tunable to enforce physical addresses
76  * allocation limits on 64bit platforms using "old-school" contigmalloc(9) to
77  * avoid bouncing.
78  */
79 static int linuxkpi_skb_memlimit;
80 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, mem_limit, CTLFLAG_RDTUN,
81     &linuxkpi_skb_memlimit, 0, "SKB memory limit: 0=no limit, "
82     "1=32bit, 2=36bit, other=undef (currently 32bit)");
83 #endif
84 
85 static MALLOC_DEFINE(M_LKPISKB, "lkpiskb", "Linux KPI skbuff compat");
86 
87 struct sk_buff *
88 linuxkpi_alloc_skb(size_t size, gfp_t gfp)
89 {
90 	struct sk_buff *skb;
91 	size_t len;
92 
93 	len = sizeof(*skb) + size + sizeof(struct skb_shared_info);
94 	/*
95 	 * Using our own type here not backing my kmalloc.
96 	 * We assume no one calls kfree directly on the skb.
97 	 */
98 #ifdef __LP64__
99 	if (__predict_true(linuxkpi_skb_memlimit == 0)) {
100 		skb = malloc(len, M_LKPISKB, linux_check_m_flags(gfp) | M_ZERO);
101 	} else {
102 		vm_paddr_t high;
103 
104 		switch (linuxkpi_skb_memlimit) {
105 		case 2:
106 			high = (0xfffffffff);	/* 1<<36 really. */
107 			break;
108 		case 1:
109 		default:
110 			high = (0xffffffff);	/* 1<<32 really. */
111 			break;
112 		}
113 		len = roundup_pow_of_two(len);
114 		skb = contigmalloc(len, M_LKPISKB,
115 		    linux_check_m_flags(gfp) | M_ZERO, 0, high, PAGE_SIZE, 0);
116 	}
117 #else
118 	skb = malloc(len, M_LKPISKB, linux_check_m_flags(gfp) | M_ZERO);
119 #endif
120 	if (skb == NULL)
121 		return (skb);
122 	skb->_alloc_len = len;
123 	skb->truesize = size;
124 
125 	skb->head = skb->data = skb->tail = (uint8_t *)(skb+1);
126 	skb->end = skb->head + size;
127 
128 	skb->prev = skb->next = skb;
129 
130 	skb->shinfo = (struct skb_shared_info *)(skb->end);
131 
132 	SKB_TRACE_FMT(skb, "data %p size %zu", (skb) ? skb->data : NULL, size);
133 	return (skb);
134 }
135 
136 struct sk_buff *
137 linuxkpi_dev_alloc_skb(size_t size, gfp_t gfp)
138 {
139 	struct sk_buff *skb;
140 	size_t len;
141 
142 	len = size + NET_SKB_PAD;
143 	skb = linuxkpi_alloc_skb(len, gfp);
144 
145 	if (skb != NULL)
146 		skb_reserve(skb, NET_SKB_PAD);
147 
148 	SKB_TRACE_FMT(skb, "data %p size %zu len %zu",
149 	    (skb) ? skb->data : NULL, size, len);
150 	return (skb);
151 }
152 
153 struct sk_buff *
154 linuxkpi_build_skb(void *data, size_t fragsz)
155 {
156 	struct sk_buff *skb;
157 
158 	if (data == NULL || fragsz == 0)
159 		return (NULL);
160 
161 	/* Just allocate a skb without data area. */
162 	skb = linuxkpi_alloc_skb(0, GFP_KERNEL);
163 	if (skb == NULL)
164 		return (NULL);
165 
166 	skb->_flags |= _SKB_FLAGS_SKBEXTFRAG;
167 	skb->truesize = fragsz;
168 	skb->head = skb->data = data;
169 	skb_reset_tail_pointer(skb);	/* XXX is that correct? */
170 	skb->end = (void *)((uintptr_t)skb->head + fragsz);
171 
172 	return (skb);
173 }
174 
175 struct sk_buff *
176 linuxkpi_skb_copy(struct sk_buff *skb, gfp_t gfp)
177 {
178 	struct sk_buff *new;
179 	struct skb_shared_info *shinfo;
180 	size_t len;
181 	unsigned int headroom;
182 
183 	/* Full buffer size + any fragments. */
184 	len = skb->end - skb->head + skb->data_len;
185 
186 	new = linuxkpi_alloc_skb(len, gfp);
187 	if (new == NULL)
188 		return (NULL);
189 
190 	headroom = skb_headroom(skb);
191 	/* Fixup head and end. */
192 	skb_reserve(new, headroom);	/* data and tail move headroom forward. */
193 	skb_put(new, skb->len);		/* tail and len get adjusted */
194 
195 	/* Copy data. */
196 	memcpy(new->head, skb->data - headroom, headroom + skb->len);
197 
198 	/* Deal with fragments. */
199 	shinfo = skb->shinfo;
200 	if (shinfo->nr_frags > 0) {
201 		printf("%s:%d: NOT YET SUPPORTED; missing %d frags\n",
202 		    __func__, __LINE__, shinfo->nr_frags);
203 		SKB_TODO();
204 	}
205 
206 	/* Deal with header fields. */
207 	memcpy(new->cb, skb->cb, sizeof(skb->cb));
208 	SKB_IMPROVE("more header fields to copy?");
209 
210 	return (new);
211 }
212 
213 void
214 linuxkpi_kfree_skb(struct sk_buff *skb)
215 {
216 	struct skb_shared_info *shinfo;
217 	uint16_t fragno, count;
218 
219 	SKB_TRACE(skb);
220 	if (skb == NULL)
221 		return;
222 
223 	/*
224 	 * XXX TODO this will go away once we have skb backed by mbuf.
225 	 * currently we allow the mbuf to stay around and use a private
226 	 * free function to allow secondary resources to be freed along.
227 	 */
228 	if (skb->m != NULL) {
229 		void *m;
230 
231 		m = skb->m;
232 		skb->m = NULL;
233 
234 		KASSERT(skb->m_free_func != NULL, ("%s: skb %p has m %p but no "
235 		    "m_free_func %p\n", __func__, skb, m, skb->m_free_func));
236 		skb->m_free_func(m);
237 	}
238 	KASSERT(skb->m == NULL,
239 	    ("%s: skb %p m %p != NULL\n", __func__, skb, skb->m));
240 
241 	shinfo = skb->shinfo;
242 	for (count = fragno = 0;
243 	    count < shinfo->nr_frags && fragno < nitems(shinfo->frags);
244 	    fragno++) {
245 
246 		if (shinfo->frags[fragno].page != NULL) {
247 			struct page *p;
248 
249 			p = shinfo->frags[fragno].page;
250 			shinfo->frags[fragno].size = 0;
251 			shinfo->frags[fragno].offset = 0;
252 			shinfo->frags[fragno].page = NULL;
253 			__free_page(p);
254 			count++;
255 		}
256 	}
257 
258 	if ((skb->_flags & _SKB_FLAGS_SKBEXTFRAG) != 0) {
259 		void *p;
260 
261 		p = skb->head;
262 		skb_free_frag(p);
263 	}
264 
265 #ifdef __LP64__
266 	if (__predict_true(linuxkpi_skb_memlimit == 0))
267 		free(skb, M_LKPISKB);
268 	else
269 		contigfree(skb, skb->_alloc_len, M_LKPISKB);
270 #else
271 	free(skb, M_LKPISKB);
272 #endif
273 }
274 
275 #ifdef DDB
276 DB_SHOW_COMMAND(skb, db_show_skb)
277 {
278 	struct sk_buff *skb;
279 	int i;
280 
281 	if (!have_addr) {
282 		db_printf("usage: show skb <addr>\n");
283 			return;
284 	}
285 
286 	skb = (struct sk_buff *)addr;
287 
288 	db_printf("skb %p\n", skb);
289 	db_printf("\tnext %p prev %p\n", skb->next, skb->prev);
290 	db_printf("\tlist %p\n", &skb->list);
291 	db_printf("\t_alloc_len %u len %u data_len %u truesize %u mac_len %u\n",
292 	    skb->_alloc_len, skb->len, skb->data_len, skb->truesize,
293 	    skb->mac_len);
294 	db_printf("\tcsum %#06x l3hdroff %u l4hdroff %u priority %u qmap %u\n",
295 	    skb->csum, skb->l3hdroff, skb->l4hdroff, skb->priority, skb->qmap);
296 	db_printf("\tpkt_type %d dev %p sk %p\n",
297 	    skb->pkt_type, skb->dev, skb->sk);
298 	db_printf("\tcsum_offset %d csum_start %d ip_summed %d protocol %d\n",
299 	    skb->csum_offset, skb->csum_start, skb->ip_summed, skb->protocol);
300 	db_printf("\t_flags %#06x\n", skb->_flags);		/* XXX-BZ print names? */
301 	db_printf("\thead %p data %p tail %p end %p\n",
302 	    skb->head, skb->data, skb->tail, skb->end);
303 	db_printf("\tshinfo %p m %p m_free_func %p\n",
304 	    skb->shinfo, skb->m, skb->m_free_func);
305 
306 	if (skb->shinfo != NULL) {
307 		struct skb_shared_info *shinfo;
308 
309 		shinfo = skb->shinfo;
310 		db_printf("\t\tgso_type %d gso_size %u nr_frags %u\n",
311 		    shinfo->gso_type, shinfo->gso_size, shinfo->nr_frags);
312 		for (i = 0; i < nitems(shinfo->frags); i++) {
313 			struct skb_frag *frag;
314 
315 			frag = &shinfo->frags[i];
316 			if (frag == NULL || frag->page == NULL)
317 				continue;
318 			db_printf("\t\t\tfrag %p fragno %d page %p %p "
319 			    "offset %ju size %zu\n",
320 			    frag, i, frag->page, linux_page_address(frag->page),
321 			    (uintmax_t)frag->offset, frag->size);
322 		}
323 	}
324 	db_printf("\tcb[] %p {", skb->cb);
325 	for (i = 0; i < nitems(skb->cb); i++) {
326 		db_printf("%#04x%s",
327 		    skb->cb[i], (i < (nitems(skb->cb)-1)) ? ", " : "");
328 	}
329 	db_printf("}\n");
330 
331 	db_printf("\t__scratch[0] %p\n", skb->__scratch);
332 };
333 #endif
334