1 /*-
2 * Copyright (c) 2020-2022 The FreeBSD Foundation
3 * Copyright (c) 2021-2022 Bjoern A. Zeeb
4 *
5 * This software was developed by Björn Zeeb under sponsorship from
6 * the FreeBSD Foundation.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30 /*
31 * NOTE: this socket buffer compatibility code is highly EXPERIMENTAL.
32 * Do not rely on the internals of this implementation. They are highly
33 * likely to change as we will improve the integration to FreeBSD mbufs.
34 */
35
36 #include <sys/cdefs.h>
37 #include "opt_ddb.h"
38
39 #include <sys/param.h>
40 #include <sys/types.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/sysctl.h>
44
45 #ifdef DDB
46 #include <ddb/ddb.h>
47 #endif
48
49 #include <linux/skbuff.h>
50 #include <linux/slab.h>
51 #include <linux/gfp.h>
52 #ifdef __LP64__
53 #include <linux/log2.h>
54 #endif
55
56 SYSCTL_DECL(_compat_linuxkpi);
57 SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, skb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
58 "LinuxKPI skbuff");
59
60 #ifdef SKB_DEBUG
61 int linuxkpi_debug_skb;
62 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, debug, CTLFLAG_RWTUN,
63 &linuxkpi_debug_skb, 0, "SKB debug level");
64 #endif
65
66 #ifdef __LP64__
67 /*
68 * Realtek wireless drivers (e.g., rtw88) require 32bit DMA in a single segment.
69 * busdma(9) has a hard time providing this currently for 3-ish pages at large
70 * quantities (see lkpi_pci_nseg1_fail in linux_pci.c).
71 * Work around this for now by allowing a tunable to enforce physical addresses
72 * allocation limits on 64bit platforms using "old-school" contigmalloc(9) to
73 * avoid bouncing.
74 */
75 static int linuxkpi_skb_memlimit;
76 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, mem_limit, CTLFLAG_RDTUN,
77 &linuxkpi_skb_memlimit, 0, "SKB memory limit: 0=no limit, "
78 "1=32bit, 2=36bit, other=undef (currently 32bit)");
79 #endif
80
81 static MALLOC_DEFINE(M_LKPISKB, "lkpiskb", "Linux KPI skbuff compat");
82
83 struct sk_buff *
linuxkpi_alloc_skb(size_t size,gfp_t gfp)84 linuxkpi_alloc_skb(size_t size, gfp_t gfp)
85 {
86 struct sk_buff *skb;
87 size_t len;
88
89 len = sizeof(*skb) + size + sizeof(struct skb_shared_info);
90 /*
91 * Using our own type here not backing my kmalloc.
92 * We assume no one calls kfree directly on the skb.
93 */
94 #ifdef __LP64__
95 if (__predict_true(linuxkpi_skb_memlimit == 0)) {
96 skb = malloc(len, M_LKPISKB, linux_check_m_flags(gfp) | M_ZERO);
97 } else {
98 vm_paddr_t high;
99
100 switch (linuxkpi_skb_memlimit) {
101 case 2:
102 high = (0xfffffffff); /* 1<<36 really. */
103 break;
104 case 1:
105 default:
106 high = (0xffffffff); /* 1<<32 really. */
107 break;
108 }
109 len = roundup_pow_of_two(len);
110 skb = contigmalloc(len, M_LKPISKB,
111 linux_check_m_flags(gfp) | M_ZERO, 0, high, PAGE_SIZE, 0);
112 }
113 #else
114 skb = malloc(len, M_LKPISKB, linux_check_m_flags(gfp) | M_ZERO);
115 #endif
116 if (skb == NULL)
117 return (skb);
118 skb->_alloc_len = len;
119 skb->truesize = size;
120
121 skb->head = skb->data = skb->tail = (uint8_t *)(skb+1);
122 skb->end = skb->head + size;
123
124 skb->prev = skb->next = skb;
125
126 skb->shinfo = (struct skb_shared_info *)(skb->end);
127
128 SKB_TRACE_FMT(skb, "data %p size %zu", (skb) ? skb->data : NULL, size);
129 return (skb);
130 }
131
132 struct sk_buff *
linuxkpi_dev_alloc_skb(size_t size,gfp_t gfp)133 linuxkpi_dev_alloc_skb(size_t size, gfp_t gfp)
134 {
135 struct sk_buff *skb;
136 size_t len;
137
138 len = size + NET_SKB_PAD;
139 skb = linuxkpi_alloc_skb(len, gfp);
140
141 if (skb != NULL)
142 skb_reserve(skb, NET_SKB_PAD);
143
144 SKB_TRACE_FMT(skb, "data %p size %zu len %zu",
145 (skb) ? skb->data : NULL, size, len);
146 return (skb);
147 }
148
149 struct sk_buff *
linuxkpi_build_skb(void * data,size_t fragsz)150 linuxkpi_build_skb(void *data, size_t fragsz)
151 {
152 struct sk_buff *skb;
153
154 if (data == NULL || fragsz == 0)
155 return (NULL);
156
157 /* Just allocate a skb without data area. */
158 skb = linuxkpi_alloc_skb(0, GFP_KERNEL);
159 if (skb == NULL)
160 return (NULL);
161
162 skb->_flags |= _SKB_FLAGS_SKBEXTFRAG;
163 skb->truesize = fragsz;
164 skb->head = skb->data = data;
165 skb_reset_tail_pointer(skb); /* XXX is that correct? */
166 skb->end = (void *)((uintptr_t)skb->head + fragsz);
167
168 return (skb);
169 }
170
171 struct sk_buff *
linuxkpi_skb_copy(struct sk_buff * skb,gfp_t gfp)172 linuxkpi_skb_copy(struct sk_buff *skb, gfp_t gfp)
173 {
174 struct sk_buff *new;
175 struct skb_shared_info *shinfo;
176 size_t len;
177 unsigned int headroom;
178
179 /* Full buffer size + any fragments. */
180 len = skb->end - skb->head + skb->data_len;
181
182 new = linuxkpi_alloc_skb(len, gfp);
183 if (new == NULL)
184 return (NULL);
185
186 headroom = skb_headroom(skb);
187 /* Fixup head and end. */
188 skb_reserve(new, headroom); /* data and tail move headroom forward. */
189 skb_put(new, skb->len); /* tail and len get adjusted */
190
191 /* Copy data. */
192 memcpy(new->head, skb->data - headroom, headroom + skb->len);
193
194 /* Deal with fragments. */
195 shinfo = skb->shinfo;
196 if (shinfo->nr_frags > 0) {
197 printf("%s:%d: NOT YET SUPPORTED; missing %d frags\n",
198 __func__, __LINE__, shinfo->nr_frags);
199 SKB_TODO();
200 }
201
202 /* Deal with header fields. */
203 memcpy(new->cb, skb->cb, sizeof(skb->cb));
204 SKB_IMPROVE("more header fields to copy?");
205
206 return (new);
207 }
208
209 void
linuxkpi_kfree_skb(struct sk_buff * skb)210 linuxkpi_kfree_skb(struct sk_buff *skb)
211 {
212 struct skb_shared_info *shinfo;
213 uint16_t fragno, count;
214
215 SKB_TRACE(skb);
216 if (skb == NULL)
217 return;
218
219 /*
220 * XXX TODO this will go away once we have skb backed by mbuf.
221 * currently we allow the mbuf to stay around and use a private
222 * free function to allow secondary resources to be freed along.
223 */
224 if (skb->m != NULL) {
225 void *m;
226
227 m = skb->m;
228 skb->m = NULL;
229
230 KASSERT(skb->m_free_func != NULL, ("%s: skb %p has m %p but no "
231 "m_free_func %p\n", __func__, skb, m, skb->m_free_func));
232 skb->m_free_func(m);
233 }
234 KASSERT(skb->m == NULL,
235 ("%s: skb %p m %p != NULL\n", __func__, skb, skb->m));
236
237 shinfo = skb->shinfo;
238 for (count = fragno = 0;
239 count < shinfo->nr_frags && fragno < nitems(shinfo->frags);
240 fragno++) {
241
242 if (shinfo->frags[fragno].page != NULL) {
243 struct page *p;
244
245 p = shinfo->frags[fragno].page;
246 shinfo->frags[fragno].size = 0;
247 shinfo->frags[fragno].offset = 0;
248 shinfo->frags[fragno].page = NULL;
249 __free_page(p);
250 count++;
251 }
252 }
253
254 if ((skb->_flags & _SKB_FLAGS_SKBEXTFRAG) != 0) {
255 void *p;
256
257 p = skb->head;
258 skb_free_frag(p);
259 }
260
261 #ifdef __LP64__
262 if (__predict_true(linuxkpi_skb_memlimit == 0))
263 free(skb, M_LKPISKB);
264 else
265 contigfree(skb, skb->_alloc_len, M_LKPISKB);
266 #else
267 free(skb, M_LKPISKB);
268 #endif
269 }
270
271 #ifdef DDB
DB_SHOW_COMMAND(skb,db_show_skb)272 DB_SHOW_COMMAND(skb, db_show_skb)
273 {
274 struct sk_buff *skb;
275 int i;
276
277 if (!have_addr) {
278 db_printf("usage: show skb <addr>\n");
279 return;
280 }
281
282 skb = (struct sk_buff *)addr;
283
284 db_printf("skb %p\n", skb);
285 db_printf("\tnext %p prev %p\n", skb->next, skb->prev);
286 db_printf("\tlist %p\n", &skb->list);
287 db_printf("\t_alloc_len %u len %u data_len %u truesize %u mac_len %u\n",
288 skb->_alloc_len, skb->len, skb->data_len, skb->truesize,
289 skb->mac_len);
290 db_printf("\tcsum %#06x l3hdroff %u l4hdroff %u priority %u qmap %u\n",
291 skb->csum, skb->l3hdroff, skb->l4hdroff, skb->priority, skb->qmap);
292 db_printf("\tpkt_type %d dev %p sk %p\n",
293 skb->pkt_type, skb->dev, skb->sk);
294 db_printf("\tcsum_offset %d csum_start %d ip_summed %d protocol %d\n",
295 skb->csum_offset, skb->csum_start, skb->ip_summed, skb->protocol);
296 db_printf("\t_flags %#06x\n", skb->_flags); /* XXX-BZ print names? */
297 db_printf("\thead %p data %p tail %p end %p\n",
298 skb->head, skb->data, skb->tail, skb->end);
299 db_printf("\tshinfo %p m %p m_free_func %p\n",
300 skb->shinfo, skb->m, skb->m_free_func);
301
302 if (skb->shinfo != NULL) {
303 struct skb_shared_info *shinfo;
304
305 shinfo = skb->shinfo;
306 db_printf("\t\tgso_type %d gso_size %u nr_frags %u\n",
307 shinfo->gso_type, shinfo->gso_size, shinfo->nr_frags);
308 for (i = 0; i < nitems(shinfo->frags); i++) {
309 struct skb_frag *frag;
310
311 frag = &shinfo->frags[i];
312 if (frag == NULL || frag->page == NULL)
313 continue;
314 db_printf("\t\t\tfrag %p fragno %d page %p %p "
315 "offset %ju size %zu\n",
316 frag, i, frag->page, linux_page_address(frag->page),
317 (uintmax_t)frag->offset, frag->size);
318 }
319 }
320 db_printf("\tcb[] %p {", skb->cb);
321 for (i = 0; i < nitems(skb->cb); i++) {
322 db_printf("%#04x%s",
323 skb->cb[i], (i < (nitems(skb->cb)-1)) ? ", " : "");
324 }
325 db_printf("}\n");
326
327 db_printf("\t__scratch[0] %p\n", skb->__scratch);
328 };
329 #endif
330