1 /*- 2 * Copyright (c) 2020-2025 The FreeBSD Foundation 3 * Copyright (c) 2021-2022 Bjoern A. Zeeb 4 * 5 * This software was developed by Björn Zeeb under sponsorship from 6 * the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * NOTE: this socket buffer compatibility code is highly EXPERIMENTAL. 32 * Do not rely on the internals of this implementation. They are highly 33 * likely to change as we will improve the integration to FreeBSD mbufs. 34 */ 35 36 #include <sys/cdefs.h> 37 #include "opt_ddb.h" 38 39 #include <sys/param.h> 40 #include <sys/types.h> 41 #include <sys/kernel.h> 42 #include <sys/malloc.h> 43 #include <sys/sysctl.h> 44 45 #include <vm/uma.h> 46 47 #ifdef DDB 48 #include <ddb/ddb.h> 49 #endif 50 51 #include <linux/skbuff.h> 52 #include <linux/slab.h> 53 #include <linux/gfp.h> 54 #ifdef __LP64__ 55 #include <linux/log2.h> 56 #endif 57 58 SYSCTL_DECL(_compat_linuxkpi); 59 SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, skb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 60 "LinuxKPI skbuff"); 61 62 #ifdef SKB_DEBUG 63 int linuxkpi_debug_skb; 64 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, debug, CTLFLAG_RWTUN, 65 &linuxkpi_debug_skb, 0, "SKB debug level"); 66 #endif 67 68 static uma_zone_t skbzone; 69 70 #define SKB_DMA32_MALLOC 71 #ifdef SKB_DMA32_MALLOC 72 /* 73 * Realtek wireless drivers (e.g., rtw88) require 32bit DMA in a single segment. 74 * busdma(9) has a hard time providing this currently for 3-ish pages at large 75 * quantities (see lkpi_pci_nseg1_fail in linux_pci.c). 76 * Work around this for now by allowing a tunable to enforce physical addresses 77 * allocation limits using "old-school" contigmalloc(9) to avoid bouncing. 78 * Note: with the malloc/contigmalloc + kmalloc changes also providing physical 79 * contiguous memory, and the nseg=1 limit for bouncing we should in theory be 80 * fine now and not need any of this anymore, however busdma still has troubles 81 * boncing three contiguous pages so for now this stays. 82 */ 83 static int linuxkpi_skb_memlimit; 84 SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, mem_limit, CTLFLAG_RDTUN, 85 &linuxkpi_skb_memlimit, 0, "SKB memory limit: 0=no limit, " 86 "1=32bit, 2=36bit, other=undef (currently 32bit)"); 87 88 static MALLOC_DEFINE(M_LKPISKB, "lkpiskb", "Linux KPI skbuff compat"); 89 #endif 90 91 struct sk_buff * 92 linuxkpi_alloc_skb(size_t size, gfp_t gfp) 93 { 94 struct sk_buff *skb; 95 void *p; 96 size_t len; 97 98 skb = uma_zalloc(skbzone, linux_check_m_flags(gfp) | M_ZERO); 99 if (skb == NULL) 100 return (NULL); 101 102 skb->prev = skb->next = skb; 103 skb->truesize = size; 104 skb->shinfo = (struct skb_shared_info *)(skb + 1); 105 106 if (size == 0) 107 return (skb); 108 109 len = size; 110 #ifdef SKB_DMA32_MALLOC 111 /* 112 * Using our own type here not backing my kmalloc. 113 * We assume no one calls kfree directly on the skb. 114 */ 115 if (__predict_false(linuxkpi_skb_memlimit != 0)) { 116 vm_paddr_t high; 117 118 switch (linuxkpi_skb_memlimit) { 119 #ifdef __LP64__ 120 case 2: 121 high = (0xfffffffff); /* 1<<36 really. */ 122 break; 123 #endif 124 case 1: 125 default: 126 high = (0xffffffff); /* 1<<32 really. */ 127 break; 128 } 129 len = roundup_pow_of_two(len); 130 p = contigmalloc(len, M_LKPISKB, 131 linux_check_m_flags(gfp) | M_ZERO, 0, high, PAGE_SIZE, 0); 132 } else 133 #endif 134 p = __kmalloc(len, linux_check_m_flags(gfp) | M_ZERO); 135 if (p == NULL) { 136 uma_zfree(skbzone, skb); 137 return (NULL); 138 } 139 140 skb->head = skb->data = (uint8_t *)p; 141 skb_reset_tail_pointer(skb); 142 skb->end = skb->head + size; 143 144 SKB_TRACE_FMT(skb, "data %p size %zu", (skb) ? skb->data : NULL, size); 145 return (skb); 146 } 147 148 struct sk_buff * 149 linuxkpi_dev_alloc_skb(size_t size, gfp_t gfp) 150 { 151 struct sk_buff *skb; 152 size_t len; 153 154 len = size + NET_SKB_PAD; 155 skb = linuxkpi_alloc_skb(len, gfp); 156 157 if (skb != NULL) 158 skb_reserve(skb, NET_SKB_PAD); 159 160 SKB_TRACE_FMT(skb, "data %p size %zu len %zu", 161 (skb) ? skb->data : NULL, size, len); 162 return (skb); 163 } 164 165 struct sk_buff * 166 linuxkpi_build_skb(void *data, size_t fragsz) 167 { 168 struct sk_buff *skb; 169 170 if (data == NULL || fragsz == 0) 171 return (NULL); 172 173 /* Just allocate a skb without data area. */ 174 skb = linuxkpi_alloc_skb(0, GFP_KERNEL); 175 if (skb == NULL) 176 return (NULL); 177 178 skb->_flags |= _SKB_FLAGS_SKBEXTFRAG; 179 skb->truesize = fragsz; 180 skb->head = skb->data = data; 181 skb_reset_tail_pointer(skb); 182 skb->end = skb->head + fragsz; 183 184 return (skb); 185 } 186 187 struct sk_buff * 188 linuxkpi_skb_copy(const struct sk_buff *skb, gfp_t gfp) 189 { 190 struct sk_buff *new; 191 struct skb_shared_info *shinfo; 192 size_t len; 193 unsigned int headroom; 194 195 /* Full buffer size + any fragments. */ 196 len = skb->end - skb->head + skb->data_len; 197 198 new = linuxkpi_alloc_skb(len, gfp); 199 if (new == NULL) 200 return (NULL); 201 202 headroom = skb_headroom(skb); 203 /* Fixup head and end. */ 204 skb_reserve(new, headroom); /* data and tail move headroom forward. */ 205 skb_put(new, skb->len); /* tail and len get adjusted */ 206 207 /* Copy data. */ 208 memcpy(new->head, skb->data - headroom, headroom + skb->len); 209 210 /* Deal with fragments. */ 211 shinfo = skb->shinfo; 212 if (shinfo->nr_frags > 0) { 213 printf("%s:%d: NOT YET SUPPORTED; missing %d frags\n", 214 __func__, __LINE__, shinfo->nr_frags); 215 SKB_TODO(); 216 } 217 218 /* Deal with header fields. */ 219 memcpy(new->cb, skb->cb, sizeof(skb->cb)); 220 SKB_IMPROVE("more header fields to copy?"); 221 222 return (new); 223 } 224 225 void 226 linuxkpi_kfree_skb(struct sk_buff *skb) 227 { 228 struct skb_shared_info *shinfo; 229 uint16_t fragno, count; 230 231 SKB_TRACE(skb); 232 if (skb == NULL) 233 return; 234 235 /* 236 * XXX TODO this will go away once we have skb backed by mbuf. 237 * currently we allow the mbuf to stay around and use a private 238 * free function to allow secondary resources to be freed along. 239 */ 240 if (skb->m != NULL) { 241 void *m; 242 243 m = skb->m; 244 skb->m = NULL; 245 246 KASSERT(skb->m_free_func != NULL, ("%s: skb %p has m %p but no " 247 "m_free_func %p\n", __func__, skb, m, skb->m_free_func)); 248 skb->m_free_func(m); 249 } 250 KASSERT(skb->m == NULL, 251 ("%s: skb %p m %p != NULL\n", __func__, skb, skb->m)); 252 253 shinfo = skb->shinfo; 254 for (count = fragno = 0; 255 count < shinfo->nr_frags && fragno < nitems(shinfo->frags); 256 fragno++) { 257 258 if (shinfo->frags[fragno].page != NULL) { 259 struct page *p; 260 261 p = shinfo->frags[fragno].page; 262 shinfo->frags[fragno].size = 0; 263 shinfo->frags[fragno].offset = 0; 264 shinfo->frags[fragno].page = NULL; 265 __free_page(p); 266 count++; 267 } 268 } 269 270 if ((skb->_flags & _SKB_FLAGS_SKBEXTFRAG) != 0) { 271 void *p; 272 273 p = skb->head; 274 skb_free_frag(p); 275 skb->head = NULL; 276 } 277 278 #ifdef SKB_DMA32_MALLOC 279 if (__predict_false(linuxkpi_skb_memlimit != 0)) 280 free(skb->head, M_LKPISKB); 281 else 282 #endif 283 kfree(skb->head); 284 uma_zfree(skbzone, skb); 285 } 286 287 static void 288 lkpi_skbuff_init(void *arg __unused) 289 { 290 skbzone = uma_zcreate("skbuff", 291 sizeof(struct sk_buff) + sizeof(struct skb_shared_info), 292 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 293 /* Do we need to apply limits? */ 294 } 295 SYSINIT(linuxkpi_skbuff, SI_SUB_DRIVERS, SI_ORDER_FIRST, lkpi_skbuff_init, NULL); 296 297 static void 298 lkpi_skbuff_destroy(void *arg __unused) 299 { 300 uma_zdestroy(skbzone); 301 } 302 SYSUNINIT(linuxkpi_skbuff, SI_SUB_DRIVERS, SI_ORDER_SECOND, lkpi_skbuff_destroy, NULL); 303 304 #ifdef DDB 305 DB_SHOW_COMMAND(skb, db_show_skb) 306 { 307 struct sk_buff *skb; 308 int i; 309 310 if (!have_addr) { 311 db_printf("usage: show skb <addr>\n"); 312 return; 313 } 314 315 skb = (struct sk_buff *)addr; 316 317 db_printf("skb %p\n", skb); 318 db_printf("\tnext %p prev %p\n", skb->next, skb->prev); 319 db_printf("\tlist %p\n", &skb->list); 320 db_printf("\tlen %u data_len %u truesize %u mac_len %u\n", 321 skb->len, skb->data_len, skb->truesize, skb->mac_len); 322 db_printf("\tcsum %#06x l3hdroff %u l4hdroff %u priority %u qmap %u\n", 323 skb->csum, skb->l3hdroff, skb->l4hdroff, skb->priority, skb->qmap); 324 db_printf("\tpkt_type %d dev %p sk %p\n", 325 skb->pkt_type, skb->dev, skb->sk); 326 db_printf("\tcsum_offset %d csum_start %d ip_summed %d protocol %d\n", 327 skb->csum_offset, skb->csum_start, skb->ip_summed, skb->protocol); 328 db_printf("\t_flags %#06x\n", skb->_flags); /* XXX-BZ print names? */ 329 db_printf("\thead %p data %p tail %p end %p\n", 330 skb->head, skb->data, skb->tail, skb->end); 331 db_printf("\tshinfo %p m %p m_free_func %p\n", 332 skb->shinfo, skb->m, skb->m_free_func); 333 334 if (skb->shinfo != NULL) { 335 struct skb_shared_info *shinfo; 336 337 shinfo = skb->shinfo; 338 db_printf("\t\tgso_type %d gso_size %u nr_frags %u\n", 339 shinfo->gso_type, shinfo->gso_size, shinfo->nr_frags); 340 for (i = 0; i < nitems(shinfo->frags); i++) { 341 struct skb_frag *frag; 342 343 frag = &shinfo->frags[i]; 344 if (frag == NULL || frag->page == NULL) 345 continue; 346 db_printf("\t\t\tfrag %p fragno %d page %p %p " 347 "offset %ju size %zu\n", 348 frag, i, frag->page, linux_page_address(frag->page), 349 (uintmax_t)frag->offset, frag->size); 350 } 351 } 352 db_printf("\tcb[] %p {", skb->cb); 353 for (i = 0; i < nitems(skb->cb); i++) { 354 db_printf("%#04x%s", 355 skb->cb[i], (i < (nitems(skb->cb)-1)) ? ", " : ""); 356 } 357 db_printf("}\n"); 358 359 db_printf("\t__scratch[0] %p\n", skb->__scratch); 360 }; 361 #endif 362