1 /*-
2 * Copyright (c) 2020-2025 The FreeBSD Foundation
3 * Copyright (c) 2021-2022 Bjoern A. Zeeb
4 *
5 * This software was developed by Björn Zeeb under sponsorship from
6 * the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * NOTE: this socket buffer compatibility code is highly EXPERIMENTAL.
32 * Do not rely on the internals of this implementation. They are highly
33 * likely to change as we will improve the integration to FreeBSD mbufs.
34 */
35
36 #include <sys/cdefs.h>
37 #include "opt_ddb.h"
38
39 #include <sys/param.h>
40 #include <sys/types.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/sysctl.h>
44
45 #include <vm/uma.h>
46
47 #ifdef DDB
48 #include <ddb/ddb.h>
49 #endif
50
51 #include <linux/skbuff.h>
52 #include <linux/slab.h>
53 #include <linux/gfp.h>
54 #ifdef __LP64__
55 #include <linux/log2.h>
56 #endif
57
58 SYSCTL_DECL(_compat_linuxkpi);
59 SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, skb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
60 "LinuxKPI skbuff");
61
62 #ifdef SKB_DEBUG
63 int linuxkpi_debug_skb;
64 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, debug, CTLFLAG_RWTUN,
65 &linuxkpi_debug_skb, 0, "SKB debug level");
66 #endif
67
68 static uma_zone_t skbzone;
69
70 #define SKB_DMA32_MALLOC
71 #ifdef SKB_DMA32_MALLOC
72 /*
73 * Realtek wireless drivers (e.g., rtw88) require 32bit DMA in a single segment.
74 * busdma(9) has a hard time providing this currently for 3-ish pages at large
75 * quantities (see lkpi_pci_nseg1_fail in linux_pci.c).
76 * Work around this for now by allowing a tunable to enforce physical addresses
77 * allocation limits using "old-school" contigmalloc(9) to avoid bouncing.
78 * Note: with the malloc/contigmalloc + kmalloc changes also providing physical
79 * contiguous memory, and the nseg=1 limit for bouncing we should in theory be
80 * fine now and not need any of this anymore, however busdma still has troubles
81 * boncing three contiguous pages so for now this stays.
82 */
83 static int linuxkpi_skb_memlimit;
84 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, mem_limit, CTLFLAG_RDTUN,
85 &linuxkpi_skb_memlimit, 0, "SKB memory limit: 0=no limit, "
86 "1=32bit, 2=36bit, other=undef (currently 32bit)");
87
88 static MALLOC_DEFINE(M_LKPISKB, "lkpiskb", "Linux KPI skbuff compat");
89 #endif
90
91 struct sk_buff *
linuxkpi_alloc_skb(size_t size,gfp_t gfp)92 linuxkpi_alloc_skb(size_t size, gfp_t gfp)
93 {
94 struct sk_buff *skb;
95 void *p;
96 size_t len;
97
98 skb = uma_zalloc(skbzone, linux_check_m_flags(gfp) | M_ZERO);
99 if (skb == NULL)
100 return (NULL);
101
102 skb->prev = skb->next = skb;
103 skb->truesize = size;
104 skb->shinfo = (struct skb_shared_info *)(skb + 1);
105
106 if (size == 0)
107 return (skb);
108
109 len = size;
110 #ifdef SKB_DMA32_MALLOC
111 /*
112 * Using our own type here not backing my kmalloc.
113 * We assume no one calls kfree directly on the skb.
114 */
115 if (__predict_false(linuxkpi_skb_memlimit != 0)) {
116 vm_paddr_t high;
117
118 switch (linuxkpi_skb_memlimit) {
119 #ifdef __LP64__
120 case 2:
121 high = (0xfffffffff); /* 1<<36 really. */
122 break;
123 #endif
124 case 1:
125 default:
126 high = (0xffffffff); /* 1<<32 really. */
127 break;
128 }
129 len = roundup_pow_of_two(len);
130 p = contigmalloc(len, M_LKPISKB,
131 linux_check_m_flags(gfp) | M_ZERO, 0, high, PAGE_SIZE, 0);
132 } else
133 #endif
134 p = __kmalloc(len, linux_check_m_flags(gfp) | M_ZERO);
135 if (p == NULL) {
136 uma_zfree(skbzone, skb);
137 return (NULL);
138 }
139
140 skb->head = skb->data = (uint8_t *)p;
141 skb_reset_tail_pointer(skb);
142 skb->end = skb->head + size;
143
144 SKB_TRACE_FMT(skb, "data %p size %zu", (skb) ? skb->data : NULL, size);
145 return (skb);
146 }
147
148 struct sk_buff *
linuxkpi_dev_alloc_skb(size_t size,gfp_t gfp)149 linuxkpi_dev_alloc_skb(size_t size, gfp_t gfp)
150 {
151 struct sk_buff *skb;
152 size_t len;
153
154 len = size + NET_SKB_PAD;
155 skb = linuxkpi_alloc_skb(len, gfp);
156
157 if (skb != NULL)
158 skb_reserve(skb, NET_SKB_PAD);
159
160 SKB_TRACE_FMT(skb, "data %p size %zu len %zu",
161 (skb) ? skb->data : NULL, size, len);
162 return (skb);
163 }
164
165 struct sk_buff *
linuxkpi_build_skb(void * data,size_t fragsz)166 linuxkpi_build_skb(void *data, size_t fragsz)
167 {
168 struct sk_buff *skb;
169
170 if (data == NULL || fragsz == 0)
171 return (NULL);
172
173 /* Just allocate a skb without data area. */
174 skb = linuxkpi_alloc_skb(0, GFP_KERNEL);
175 if (skb == NULL)
176 return (NULL);
177
178 skb->_flags |= _SKB_FLAGS_SKBEXTFRAG;
179 skb->truesize = fragsz;
180 skb->head = skb->data = data;
181 skb_reset_tail_pointer(skb);
182 skb->end = skb->head + fragsz;
183
184 return (skb);
185 }
186
187 struct sk_buff *
linuxkpi_skb_copy(const struct sk_buff * skb,gfp_t gfp)188 linuxkpi_skb_copy(const struct sk_buff *skb, gfp_t gfp)
189 {
190 struct sk_buff *new;
191 struct skb_shared_info *shinfo;
192 size_t len;
193 unsigned int headroom;
194
195 /* Full buffer size + any fragments. */
196 len = skb->end - skb->head + skb->data_len;
197
198 new = linuxkpi_alloc_skb(len, gfp);
199 if (new == NULL)
200 return (NULL);
201
202 headroom = skb_headroom(skb);
203 /* Fixup head and end. */
204 skb_reserve(new, headroom); /* data and tail move headroom forward. */
205 skb_put(new, skb->len); /* tail and len get adjusted */
206
207 /* Copy data. */
208 memcpy(new->head, skb->data - headroom, headroom + skb->len);
209
210 /* Deal with fragments. */
211 shinfo = skb->shinfo;
212 if (shinfo->nr_frags > 0) {
213 printf("%s:%d: NOT YET SUPPORTED; missing %d frags\n",
214 __func__, __LINE__, shinfo->nr_frags);
215 SKB_TODO();
216 }
217
218 /* Deal with header fields. */
219 memcpy(new->cb, skb->cb, sizeof(skb->cb));
220 SKB_IMPROVE("more header fields to copy?");
221
222 return (new);
223 }
224
225 void
linuxkpi_kfree_skb(struct sk_buff * skb)226 linuxkpi_kfree_skb(struct sk_buff *skb)
227 {
228 struct skb_shared_info *shinfo;
229 uint16_t fragno, count;
230
231 SKB_TRACE(skb);
232 if (skb == NULL)
233 return;
234
235 /*
236 * XXX TODO this will go away once we have skb backed by mbuf.
237 * currently we allow the mbuf to stay around and use a private
238 * free function to allow secondary resources to be freed along.
239 */
240 if (skb->m != NULL) {
241 void *m;
242
243 m = skb->m;
244 skb->m = NULL;
245
246 KASSERT(skb->m_free_func != NULL, ("%s: skb %p has m %p but no "
247 "m_free_func %p\n", __func__, skb, m, skb->m_free_func));
248 skb->m_free_func(m);
249 }
250 KASSERT(skb->m == NULL,
251 ("%s: skb %p m %p != NULL\n", __func__, skb, skb->m));
252
253 shinfo = skb->shinfo;
254 for (count = fragno = 0;
255 count < shinfo->nr_frags && fragno < nitems(shinfo->frags);
256 fragno++) {
257
258 if (shinfo->frags[fragno].page != NULL) {
259 struct page *p;
260
261 p = shinfo->frags[fragno].page;
262 shinfo->frags[fragno].size = 0;
263 shinfo->frags[fragno].offset = 0;
264 shinfo->frags[fragno].page = NULL;
265 __free_page(p);
266 count++;
267 }
268 }
269
270 if ((skb->_flags & _SKB_FLAGS_SKBEXTFRAG) != 0) {
271 void *p;
272
273 p = skb->head;
274 skb_free_frag(p);
275 skb->head = NULL;
276 }
277
278 #ifdef SKB_DMA32_MALLOC
279 if (__predict_false(linuxkpi_skb_memlimit != 0))
280 free(skb->head, M_LKPISKB);
281 else
282 #endif
283 kfree(skb->head);
284 uma_zfree(skbzone, skb);
285 }
286
287 static void
lkpi_skbuff_init(void * arg __unused)288 lkpi_skbuff_init(void *arg __unused)
289 {
290 skbzone = uma_zcreate("skbuff",
291 sizeof(struct sk_buff) + sizeof(struct skb_shared_info),
292 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
293 /* Do we need to apply limits? */
294 }
295 SYSINIT(linuxkpi_skbuff, SI_SUB_DRIVERS, SI_ORDER_FIRST, lkpi_skbuff_init, NULL);
296
297 static void
lkpi_skbuff_destroy(void * arg __unused)298 lkpi_skbuff_destroy(void *arg __unused)
299 {
300 uma_zdestroy(skbzone);
301 }
302 SYSUNINIT(linuxkpi_skbuff, SI_SUB_DRIVERS, SI_ORDER_SECOND, lkpi_skbuff_destroy, NULL);
303
304 #ifdef DDB
DB_SHOW_COMMAND(skb,db_show_skb)305 DB_SHOW_COMMAND(skb, db_show_skb)
306 {
307 struct sk_buff *skb;
308 int i;
309
310 if (!have_addr) {
311 db_printf("usage: show skb <addr>\n");
312 return;
313 }
314
315 skb = (struct sk_buff *)addr;
316
317 db_printf("skb %p\n", skb);
318 db_printf("\tnext %p prev %p\n", skb->next, skb->prev);
319 db_printf("\tlist %p\n", &skb->list);
320 db_printf("\tlen %u data_len %u truesize %u mac_len %u\n",
321 skb->len, skb->data_len, skb->truesize, skb->mac_len);
322 db_printf("\tcsum %#06x l3hdroff %u l4hdroff %u priority %u qmap %u\n",
323 skb->csum, skb->l3hdroff, skb->l4hdroff, skb->priority, skb->qmap);
324 db_printf("\tpkt_type %d dev %p sk %p\n",
325 skb->pkt_type, skb->dev, skb->sk);
326 db_printf("\tcsum_offset %d csum_start %d ip_summed %d protocol %d\n",
327 skb->csum_offset, skb->csum_start, skb->ip_summed, skb->protocol);
328 db_printf("\t_flags %#06x\n", skb->_flags); /* XXX-BZ print names? */
329 db_printf("\thead %p data %p tail %p end %p\n",
330 skb->head, skb->data, skb->tail, skb->end);
331 db_printf("\tshinfo %p m %p m_free_func %p\n",
332 skb->shinfo, skb->m, skb->m_free_func);
333
334 if (skb->shinfo != NULL) {
335 struct skb_shared_info *shinfo;
336
337 shinfo = skb->shinfo;
338 db_printf("\t\tgso_type %d gso_size %u nr_frags %u\n",
339 shinfo->gso_type, shinfo->gso_size, shinfo->nr_frags);
340 for (i = 0; i < nitems(shinfo->frags); i++) {
341 struct skb_frag *frag;
342
343 frag = &shinfo->frags[i];
344 if (frag == NULL || frag->page == NULL)
345 continue;
346 db_printf("\t\t\tfrag %p fragno %d page %p %p "
347 "offset %ju size %zu\n",
348 frag, i, frag->page, linux_page_address(frag->page),
349 (uintmax_t)frag->offset, frag->size);
350 }
351 }
352 db_printf("\tcb[] %p {", skb->cb);
353 for (i = 0; i < nitems(skb->cb); i++) {
354 db_printf("%#04x%s",
355 skb->cb[i], (i < (nitems(skb->cb)-1)) ? ", " : "");
356 }
357 db_printf("}\n");
358
359 db_printf("\t__scratch[0] %p\n", skb->__scratch);
360 };
361 #endif
362