149ed6e97SBjoern A. Zeeb /*- 26baea331SBjoern A. Zeeb * Copyright (c) 2020-2022 The FreeBSD Foundation 36baea331SBjoern A. Zeeb * Copyright (c) 2021-2022 Bjoern A. Zeeb 449ed6e97SBjoern A. Zeeb * 549ed6e97SBjoern A. Zeeb * This software was developed by Björn Zeeb under sponsorship from 649ed6e97SBjoern A. Zeeb * the FreeBSD Foundation. 749ed6e97SBjoern A. Zeeb * 849ed6e97SBjoern A. Zeeb * Redistribution and use in source and binary forms, with or without 949ed6e97SBjoern A. Zeeb * modification, are permitted provided that the following conditions 1049ed6e97SBjoern A. Zeeb * are met: 1149ed6e97SBjoern A. Zeeb * 1. Redistributions of source code must retain the above copyright 1249ed6e97SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer. 1349ed6e97SBjoern A. Zeeb * 2. Redistributions in binary form must reproduce the above copyright 1449ed6e97SBjoern A. Zeeb * notice, this list of conditions and the following disclaimer in the 1549ed6e97SBjoern A. Zeeb * documentation and/or other materials provided with the distribution. 1649ed6e97SBjoern A. Zeeb * 1749ed6e97SBjoern A. Zeeb * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 1849ed6e97SBjoern A. Zeeb * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 1949ed6e97SBjoern A. Zeeb * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 2049ed6e97SBjoern A. Zeeb * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 2149ed6e97SBjoern A. Zeeb * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 2249ed6e97SBjoern A. Zeeb * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 2349ed6e97SBjoern A. Zeeb * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 2449ed6e97SBjoern A. Zeeb * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 2549ed6e97SBjoern A. Zeeb * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 2649ed6e97SBjoern A. Zeeb * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 2749ed6e97SBjoern A. Zeeb * SUCH DAMAGE. 2849ed6e97SBjoern A. Zeeb * 2949ed6e97SBjoern A. Zeeb * $FreeBSD$ 3049ed6e97SBjoern A. Zeeb */ 3149ed6e97SBjoern A. Zeeb 3249ed6e97SBjoern A. Zeeb /* 3349ed6e97SBjoern A. Zeeb * NOTE: this socket buffer compatibility code is highly EXPERIMENTAL. 3449ed6e97SBjoern A. Zeeb * Do not rely on the internals of this implementation. They are highly 3549ed6e97SBjoern A. Zeeb * likely to change as we will improve the integration to FreeBSD mbufs. 3649ed6e97SBjoern A. Zeeb */ 3749ed6e97SBjoern A. Zeeb 3849ed6e97SBjoern A. Zeeb #include <sys/cdefs.h> 3949ed6e97SBjoern A. Zeeb __FBSDID("$FreeBSD$"); 4049ed6e97SBjoern A. Zeeb 416baea331SBjoern A. Zeeb #include "opt_ddb.h" 426baea331SBjoern A. Zeeb 4349ed6e97SBjoern A. Zeeb #include <sys/param.h> 4449ed6e97SBjoern A. Zeeb #include <sys/types.h> 4549ed6e97SBjoern A. Zeeb #include <sys/kernel.h> 4649ed6e97SBjoern A. Zeeb #include <sys/malloc.h> 476baea331SBjoern A. Zeeb #include <sys/sysctl.h> 486baea331SBjoern A. Zeeb 496baea331SBjoern A. Zeeb #ifdef DDB 506baea331SBjoern A. Zeeb #include <ddb/ddb.h> 516baea331SBjoern A. Zeeb #endif 5249ed6e97SBjoern A. Zeeb 5349ed6e97SBjoern A. Zeeb #include <linux/skbuff.h> 5449ed6e97SBjoern A. Zeeb #include <linux/slab.h> 556baea331SBjoern A. Zeeb #include <linux/gfp.h> 56*6a501570SBjoern A. Zeeb #ifdef __LP64__ 57*6a501570SBjoern A. Zeeb #include <linux/log2.h> 58*6a501570SBjoern A. Zeeb #endif 596baea331SBjoern A. Zeeb 606baea331SBjoern A. Zeeb SYSCTL_DECL(_compat_linuxkpi); 616baea331SBjoern A. Zeeb SYSCTL_NODE(_compat_linuxkpi, OID_AUTO, skb, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 626baea331SBjoern A. Zeeb "LinuxKPI skbuff"); 636baea331SBjoern A. Zeeb 64*6a501570SBjoern A. Zeeb #ifdef SKB_DEBUG 656baea331SBjoern A. Zeeb int linuxkpi_debug_skb; 666baea331SBjoern A. Zeeb SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, debug, CTLFLAG_RWTUN, 676baea331SBjoern A. Zeeb &linuxkpi_debug_skb, 0, "SKB debug level"); 686baea331SBjoern A. Zeeb #endif 6949ed6e97SBjoern A. Zeeb 70*6a501570SBjoern A. Zeeb #ifdef __LP64__ 71*6a501570SBjoern A. Zeeb /* 72*6a501570SBjoern A. Zeeb * Realtek wireless drivers (e.g., rtw88) require 32bit DMA in a single segment. 73*6a501570SBjoern A. Zeeb * busdma(9) has a hard time providing this currently for 3-ish pages at large 74*6a501570SBjoern A. Zeeb * quantities (see lkpi_pci_nseg1_fail in linux_pci.c). 75*6a501570SBjoern A. Zeeb * Work around this for now by allowing a tunable to enforce physical addresses 76*6a501570SBjoern A. Zeeb * allocation limits on 64bit platforms using "old-school" contigmalloc(9) to 77*6a501570SBjoern A. Zeeb * avoid bouncing. 78*6a501570SBjoern A. Zeeb */ 79*6a501570SBjoern A. Zeeb static int linuxkpi_skb_memlimit; 80*6a501570SBjoern A. Zeeb SYSCTL_INT(_compat_linuxkpi_skb, OID_AUTO, mem_limit, CTLFLAG_RDTUN, 81*6a501570SBjoern A. Zeeb &linuxkpi_skb_memlimit, 0, "SKB memory limit: 0=no limit, " 82*6a501570SBjoern A. Zeeb "1=32bit, 2=36bit, other=undef (currently 32bit)"); 83*6a501570SBjoern A. Zeeb #endif 84*6a501570SBjoern A. Zeeb 8549ed6e97SBjoern A. Zeeb static MALLOC_DEFINE(M_LKPISKB, "lkpiskb", "Linux KPI skbuff compat"); 8649ed6e97SBjoern A. Zeeb 8749ed6e97SBjoern A. Zeeb struct sk_buff * 8849ed6e97SBjoern A. Zeeb linuxkpi_alloc_skb(size_t size, gfp_t gfp) 8949ed6e97SBjoern A. Zeeb { 9049ed6e97SBjoern A. Zeeb struct sk_buff *skb; 9149ed6e97SBjoern A. Zeeb size_t len; 9249ed6e97SBjoern A. Zeeb 9349ed6e97SBjoern A. Zeeb len = sizeof(*skb) + size + sizeof(struct skb_shared_info); 9449ed6e97SBjoern A. Zeeb /* 956baea331SBjoern A. Zeeb * Using our own type here not backing my kmalloc. 9649ed6e97SBjoern A. Zeeb * We assume no one calls kfree directly on the skb. 9749ed6e97SBjoern A. Zeeb */ 98*6a501570SBjoern A. Zeeb #ifdef __LP64__ 99*6a501570SBjoern A. Zeeb if (__predict_true(linuxkpi_skb_memlimit == 0)) { 10049ed6e97SBjoern A. Zeeb skb = malloc(len, M_LKPISKB, linux_check_m_flags(gfp) | M_ZERO); 101*6a501570SBjoern A. Zeeb } else { 102*6a501570SBjoern A. Zeeb vm_paddr_t high; 103*6a501570SBjoern A. Zeeb 104*6a501570SBjoern A. Zeeb switch (linuxkpi_skb_memlimit) { 105*6a501570SBjoern A. Zeeb case 2: 106*6a501570SBjoern A. Zeeb high = (0xfffffffff); /* 1<<36 really. */ 107*6a501570SBjoern A. Zeeb break; 108*6a501570SBjoern A. Zeeb case 1: 109*6a501570SBjoern A. Zeeb default: 110*6a501570SBjoern A. Zeeb high = (0xffffffff); /* 1<<32 really. */ 111*6a501570SBjoern A. Zeeb break; 112*6a501570SBjoern A. Zeeb } 113*6a501570SBjoern A. Zeeb len = roundup_pow_of_two(len); 114*6a501570SBjoern A. Zeeb skb = contigmalloc(len, M_LKPISKB, 115*6a501570SBjoern A. Zeeb linux_check_m_flags(gfp) | M_ZERO, 0, high, PAGE_SIZE, 0); 116*6a501570SBjoern A. Zeeb } 117*6a501570SBjoern A. Zeeb #else 118*6a501570SBjoern A. Zeeb skb = malloc(len, M_LKPISKB, linux_check_m_flags(gfp) | M_ZERO); 119*6a501570SBjoern A. Zeeb #endif 12049ed6e97SBjoern A. Zeeb if (skb == NULL) 12149ed6e97SBjoern A. Zeeb return (skb); 1229df5f29cSBjoern A. Zeeb skb->_alloc_len = len; 12349ed6e97SBjoern A. Zeeb skb->truesize = size; 12449ed6e97SBjoern A. Zeeb 12549ed6e97SBjoern A. Zeeb skb->head = skb->data = skb->tail = (uint8_t *)(skb+1); 12649ed6e97SBjoern A. Zeeb skb->end = skb->head + size; 12749ed6e97SBjoern A. Zeeb 1286a8973c3SBjoern A. Zeeb skb->prev = skb->next = skb; 1296a8973c3SBjoern A. Zeeb 13049ed6e97SBjoern A. Zeeb skb->shinfo = (struct skb_shared_info *)(skb->end); 13149ed6e97SBjoern A. Zeeb 1329df5f29cSBjoern A. Zeeb SKB_TRACE_FMT(skb, "data %p size %zu", (skb) ? skb->data : NULL, size); 1339df5f29cSBjoern A. Zeeb return (skb); 1349df5f29cSBjoern A. Zeeb } 1359df5f29cSBjoern A. Zeeb 1369df5f29cSBjoern A. Zeeb struct sk_buff * 1379df5f29cSBjoern A. Zeeb linuxkpi_dev_alloc_skb(size_t size, gfp_t gfp) 1389df5f29cSBjoern A. Zeeb { 1399df5f29cSBjoern A. Zeeb struct sk_buff *skb; 1409df5f29cSBjoern A. Zeeb size_t len; 1419df5f29cSBjoern A. Zeeb 1429df5f29cSBjoern A. Zeeb len = size + NET_SKB_PAD; 1439df5f29cSBjoern A. Zeeb skb = linuxkpi_alloc_skb(len, gfp); 1449df5f29cSBjoern A. Zeeb 1459df5f29cSBjoern A. Zeeb if (skb != NULL) 1469df5f29cSBjoern A. Zeeb skb_reserve(skb, NET_SKB_PAD); 1479df5f29cSBjoern A. Zeeb 1489df5f29cSBjoern A. Zeeb SKB_TRACE_FMT(skb, "data %p size %zu len %zu", 1499df5f29cSBjoern A. Zeeb (skb) ? skb->data : NULL, size, len); 15049ed6e97SBjoern A. Zeeb return (skb); 15149ed6e97SBjoern A. Zeeb } 15249ed6e97SBjoern A. Zeeb 153349b042bSBjoern A. Zeeb struct sk_buff * 154349b042bSBjoern A. Zeeb linuxkpi_skb_copy(struct sk_buff *skb, gfp_t gfp) 155349b042bSBjoern A. Zeeb { 156349b042bSBjoern A. Zeeb struct sk_buff *new; 157349b042bSBjoern A. Zeeb struct skb_shared_info *shinfo; 158349b042bSBjoern A. Zeeb size_t len; 159349b042bSBjoern A. Zeeb unsigned int headroom; 160349b042bSBjoern A. Zeeb 161349b042bSBjoern A. Zeeb /* Full buffer size + any fragments. */ 162349b042bSBjoern A. Zeeb len = skb->end - skb->head + skb->data_len; 163349b042bSBjoern A. Zeeb 164349b042bSBjoern A. Zeeb new = linuxkpi_alloc_skb(len, gfp); 165349b042bSBjoern A. Zeeb if (new == NULL) 166349b042bSBjoern A. Zeeb return (NULL); 167349b042bSBjoern A. Zeeb 168349b042bSBjoern A. Zeeb headroom = skb_headroom(skb); 169349b042bSBjoern A. Zeeb /* Fixup head and end. */ 170349b042bSBjoern A. Zeeb skb_reserve(new, headroom); /* data and tail move headroom forward. */ 171349b042bSBjoern A. Zeeb skb_put(new, skb->len); /* tail and len get adjusted */ 172349b042bSBjoern A. Zeeb 173349b042bSBjoern A. Zeeb /* Copy data. */ 174349b042bSBjoern A. Zeeb memcpy(new->head, skb->data - headroom, headroom + skb->len); 175349b042bSBjoern A. Zeeb 176349b042bSBjoern A. Zeeb /* Deal with fragments. */ 177349b042bSBjoern A. Zeeb shinfo = skb->shinfo; 178349b042bSBjoern A. Zeeb if (shinfo->nr_frags > 0) { 179349b042bSBjoern A. Zeeb printf("%s:%d: NOT YET SUPPORTED; missing %d frags\n", 180349b042bSBjoern A. Zeeb __func__, __LINE__, shinfo->nr_frags); 181349b042bSBjoern A. Zeeb SKB_TODO(); 182349b042bSBjoern A. Zeeb } 183349b042bSBjoern A. Zeeb 184349b042bSBjoern A. Zeeb /* Deal with header fields. */ 185349b042bSBjoern A. Zeeb memcpy(new->cb, skb->cb, sizeof(skb->cb)); 186349b042bSBjoern A. Zeeb SKB_IMPROVE("more header fields to copy?"); 187349b042bSBjoern A. Zeeb 188349b042bSBjoern A. Zeeb return (new); 189349b042bSBjoern A. Zeeb } 190349b042bSBjoern A. Zeeb 19149ed6e97SBjoern A. Zeeb void 19249ed6e97SBjoern A. Zeeb linuxkpi_kfree_skb(struct sk_buff *skb) 19349ed6e97SBjoern A. Zeeb { 19449ed6e97SBjoern A. Zeeb struct skb_shared_info *shinfo; 1956baea331SBjoern A. Zeeb uint16_t fragno, count; 19649ed6e97SBjoern A. Zeeb 19749ed6e97SBjoern A. Zeeb SKB_TRACE(skb); 19849ed6e97SBjoern A. Zeeb if (skb == NULL) 19949ed6e97SBjoern A. Zeeb return; 20049ed6e97SBjoern A. Zeeb 20149ed6e97SBjoern A. Zeeb /* 20249ed6e97SBjoern A. Zeeb * XXX TODO this will go away once we have skb backed by mbuf. 20349ed6e97SBjoern A. Zeeb * currently we allow the mbuf to stay around and use a private 20449ed6e97SBjoern A. Zeeb * free function to allow secondary resources to be freed along. 20549ed6e97SBjoern A. Zeeb */ 20649ed6e97SBjoern A. Zeeb if (skb->m != NULL) { 20749ed6e97SBjoern A. Zeeb void *m; 20849ed6e97SBjoern A. Zeeb 20949ed6e97SBjoern A. Zeeb m = skb->m; 21049ed6e97SBjoern A. Zeeb skb->m = NULL; 21149ed6e97SBjoern A. Zeeb 21249ed6e97SBjoern A. Zeeb KASSERT(skb->m_free_func != NULL, ("%s: skb %p has m %p but no " 21349ed6e97SBjoern A. Zeeb "m_free_func %p\n", __func__, skb, m, skb->m_free_func)); 21449ed6e97SBjoern A. Zeeb skb->m_free_func(m); 21549ed6e97SBjoern A. Zeeb } 21649ed6e97SBjoern A. Zeeb KASSERT(skb->m == NULL, 21749ed6e97SBjoern A. Zeeb ("%s: skb %p m %p != NULL\n", __func__, skb, skb->m)); 21849ed6e97SBjoern A. Zeeb 21949ed6e97SBjoern A. Zeeb shinfo = skb->shinfo; 2206baea331SBjoern A. Zeeb for (count = fragno = 0; 2216baea331SBjoern A. Zeeb count < shinfo->nr_frags && fragno < nitems(shinfo->frags); 2226baea331SBjoern A. Zeeb fragno++) { 22349ed6e97SBjoern A. Zeeb 2246baea331SBjoern A. Zeeb if (shinfo->frags[fragno].page != NULL) { 2256baea331SBjoern A. Zeeb struct page *p; 2266baea331SBjoern A. Zeeb 2276baea331SBjoern A. Zeeb p = shinfo->frags[fragno].page; 2286baea331SBjoern A. Zeeb shinfo->frags[fragno].size = 0; 2296baea331SBjoern A. Zeeb shinfo->frags[fragno].offset = 0; 2306baea331SBjoern A. Zeeb shinfo->frags[fragno].page = NULL; 2316baea331SBjoern A. Zeeb __free_page(p); 2326baea331SBjoern A. Zeeb count++; 2336baea331SBjoern A. Zeeb } 23449ed6e97SBjoern A. Zeeb } 23549ed6e97SBjoern A. Zeeb 236*6a501570SBjoern A. Zeeb #ifdef __LP64__ 237*6a501570SBjoern A. Zeeb if (__predict_true(linuxkpi_skb_memlimit == 0)) 23849ed6e97SBjoern A. Zeeb free(skb, M_LKPISKB); 239*6a501570SBjoern A. Zeeb else 240*6a501570SBjoern A. Zeeb contigfree(skb, skb->_alloc_len, M_LKPISKB); 241*6a501570SBjoern A. Zeeb #else 242*6a501570SBjoern A. Zeeb free(skb, M_LKPISKB); 243*6a501570SBjoern A. Zeeb #endif 24449ed6e97SBjoern A. Zeeb } 2456baea331SBjoern A. Zeeb 2466baea331SBjoern A. Zeeb #ifdef DDB 2476baea331SBjoern A. Zeeb DB_SHOW_COMMAND(skb, db_show_skb) 2486baea331SBjoern A. Zeeb { 2496baea331SBjoern A. Zeeb struct sk_buff *skb; 2506baea331SBjoern A. Zeeb int i; 2516baea331SBjoern A. Zeeb 2526baea331SBjoern A. Zeeb if (!have_addr) { 2536baea331SBjoern A. Zeeb db_printf("usage: show skb <addr>\n"); 2546baea331SBjoern A. Zeeb return; 2556baea331SBjoern A. Zeeb } 2566baea331SBjoern A. Zeeb 2576baea331SBjoern A. Zeeb skb = (struct sk_buff *)addr; 2586baea331SBjoern A. Zeeb 2596baea331SBjoern A. Zeeb db_printf("skb %p\n", skb); 2606baea331SBjoern A. Zeeb db_printf("\tnext %p prev %p\n", skb->next, skb->prev); 2616baea331SBjoern A. Zeeb db_printf("\tlist %d\n", skb->list); 2626baea331SBjoern A. Zeeb db_printf("\t_alloc_len %u len %u data_len %u truesize %u mac_len %u\n", 2636baea331SBjoern A. Zeeb skb->_alloc_len, skb->len, skb->data_len, skb->truesize, 2646baea331SBjoern A. Zeeb skb->mac_len); 2656baea331SBjoern A. Zeeb db_printf("\tcsum %#06x l3hdroff %u l4hdroff %u priority %u qmap %u\n", 2666baea331SBjoern A. Zeeb skb->csum, skb->l3hdroff, skb->l4hdroff, skb->priority, skb->qmap); 2676baea331SBjoern A. Zeeb db_printf("\tpkt_type %d dev %p sk %p\n", 2686baea331SBjoern A. Zeeb skb->pkt_type, skb->dev, skb->sk); 2696baea331SBjoern A. Zeeb db_printf("\tcsum_offset %d csum_start %d ip_summed %d protocol %d\n", 2706baea331SBjoern A. Zeeb skb->csum_offset, skb->csum_start, skb->ip_summed, skb->protocol); 2716baea331SBjoern A. Zeeb db_printf("\thead %p data %p tail %p end %p\n", 2726baea331SBjoern A. Zeeb skb->head, skb->data, skb->tail, skb->end); 2736baea331SBjoern A. Zeeb db_printf("\tshinfo %p m %p m_free_func %p\n", 2746baea331SBjoern A. Zeeb skb->shinfo, skb->m, skb->m_free_func); 2756baea331SBjoern A. Zeeb 2766baea331SBjoern A. Zeeb if (skb->shinfo != NULL) { 2776baea331SBjoern A. Zeeb struct skb_shared_info *shinfo; 2786baea331SBjoern A. Zeeb 2796baea331SBjoern A. Zeeb shinfo = skb->shinfo; 2806baea331SBjoern A. Zeeb db_printf("\t\tgso_type %d gso_size %u nr_frags %u\n", 2816baea331SBjoern A. Zeeb shinfo->gso_type, shinfo->gso_size, shinfo->nr_frags); 2826baea331SBjoern A. Zeeb for (i = 0; i < nitems(shinfo->frags); i++) { 2836baea331SBjoern A. Zeeb struct skb_frag *frag; 2846baea331SBjoern A. Zeeb 2856baea331SBjoern A. Zeeb frag = &shinfo->frags[i]; 2866baea331SBjoern A. Zeeb if (frag == NULL || frag->page == NULL) 2876baea331SBjoern A. Zeeb continue; 2886baea331SBjoern A. Zeeb db_printf("\t\t\tfrag %p fragno %d page %p %p " 2896baea331SBjoern A. Zeeb "offset %ju size %zu\n", 2906baea331SBjoern A. Zeeb frag, i, frag->page, linux_page_address(frag->page), 2916baea331SBjoern A. Zeeb (uintmax_t)frag->offset, frag->size); 2926baea331SBjoern A. Zeeb } 2936baea331SBjoern A. Zeeb } 2946baea331SBjoern A. Zeeb db_printf("\tcb[] %p {", skb->cb); 2956baea331SBjoern A. Zeeb for (i = 0; i < nitems(skb->cb); i++) { 2966baea331SBjoern A. Zeeb db_printf("%#04x%s", 2976baea331SBjoern A. Zeeb skb->cb[i], (i < (nitems(skb->cb)-1)) ? ", " : ""); 2986baea331SBjoern A. Zeeb } 2996baea331SBjoern A. Zeeb db_printf("}\n"); 3006baea331SBjoern A. Zeeb 3016baea331SBjoern A. Zeeb db_printf("\t_spareu16_0 %#06x __scratch[0] %p\n", 3026baea331SBjoern A. Zeeb skb->_spareu16_0, skb->__scratch); 3036baea331SBjoern A. Zeeb }; 3046baea331SBjoern A. Zeeb #endif 305