1*e682d02eSNavdeep Parhar /*- 2*e682d02eSNavdeep Parhar * Copyright (c) 2012 Chelsio Communications, Inc. 3*e682d02eSNavdeep Parhar * All rights reserved. 4*e682d02eSNavdeep Parhar * Written by: Navdeep Parhar <np@FreeBSD.org> 5*e682d02eSNavdeep Parhar * 6*e682d02eSNavdeep Parhar * Redistribution and use in source and binary forms, with or without 7*e682d02eSNavdeep Parhar * modification, are permitted provided that the following conditions 8*e682d02eSNavdeep Parhar * are met: 9*e682d02eSNavdeep Parhar * 1. Redistributions of source code must retain the above copyright 10*e682d02eSNavdeep Parhar * notice, this list of conditions and the following disclaimer. 11*e682d02eSNavdeep Parhar * 2. Redistributions in binary form must reproduce the above copyright 12*e682d02eSNavdeep Parhar * notice, this list of conditions and the following disclaimer in the 13*e682d02eSNavdeep Parhar * documentation and/or other materials provided with the distribution. 14*e682d02eSNavdeep Parhar * 15*e682d02eSNavdeep Parhar * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16*e682d02eSNavdeep Parhar * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17*e682d02eSNavdeep Parhar * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18*e682d02eSNavdeep Parhar * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19*e682d02eSNavdeep Parhar * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20*e682d02eSNavdeep Parhar * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21*e682d02eSNavdeep Parhar * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22*e682d02eSNavdeep Parhar * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23*e682d02eSNavdeep Parhar * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24*e682d02eSNavdeep Parhar * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25*e682d02eSNavdeep Parhar * SUCH DAMAGE. 26*e682d02eSNavdeep Parhar */ 27*e682d02eSNavdeep Parhar 28*e682d02eSNavdeep Parhar #include <sys/cdefs.h> 29*e682d02eSNavdeep Parhar __FBSDID("$FreeBSD$"); 30*e682d02eSNavdeep Parhar 31*e682d02eSNavdeep Parhar #include "opt_inet.h" 32*e682d02eSNavdeep Parhar 33*e682d02eSNavdeep Parhar #include <sys/param.h> 34*e682d02eSNavdeep Parhar #include <sys/types.h> 35*e682d02eSNavdeep Parhar #include <sys/systm.h> 36*e682d02eSNavdeep Parhar #include <sys/kernel.h> 37*e682d02eSNavdeep Parhar #include <sys/ktr.h> 38*e682d02eSNavdeep Parhar #include <sys/module.h> 39*e682d02eSNavdeep Parhar #include <sys/protosw.h> 40*e682d02eSNavdeep Parhar #include <sys/proc.h> 41*e682d02eSNavdeep Parhar #include <sys/domain.h> 42*e682d02eSNavdeep Parhar #include <sys/socket.h> 43*e682d02eSNavdeep Parhar #include <sys/socketvar.h> 44*e682d02eSNavdeep Parhar #include <sys/uio.h> 45*e682d02eSNavdeep Parhar #include <netinet/in.h> 46*e682d02eSNavdeep Parhar #include <netinet/in_pcb.h> 47*e682d02eSNavdeep Parhar #include <netinet/ip.h> 48*e682d02eSNavdeep Parhar #include <netinet/tcp_var.h> 49*e682d02eSNavdeep Parhar #define TCPSTATES 50*e682d02eSNavdeep Parhar #include <netinet/tcp_fsm.h> 51*e682d02eSNavdeep Parhar #include <netinet/toecore.h> 52*e682d02eSNavdeep Parhar 53*e682d02eSNavdeep Parhar #include <vm/vm.h> 54*e682d02eSNavdeep Parhar #include <vm/vm_extern.h> 55*e682d02eSNavdeep Parhar #include <vm/vm_param.h> 56*e682d02eSNavdeep Parhar #include <vm/pmap.h> 57*e682d02eSNavdeep Parhar #include <vm/vm_map.h> 58*e682d02eSNavdeep Parhar #include <vm/vm_page.h> 59*e682d02eSNavdeep Parhar #include <vm/vm_object.h> 60*e682d02eSNavdeep Parhar 61*e682d02eSNavdeep Parhar #ifdef TCP_OFFLOAD 62*e682d02eSNavdeep Parhar #include "common/common.h" 63*e682d02eSNavdeep Parhar #include "common/t4_msg.h" 64*e682d02eSNavdeep Parhar #include "common/t4_regs.h" 65*e682d02eSNavdeep Parhar #include "common/t4_tcb.h" 66*e682d02eSNavdeep Parhar #include "tom/t4_tom.h" 67*e682d02eSNavdeep Parhar 68*e682d02eSNavdeep Parhar #define PPOD_SZ(n) ((n) * sizeof(struct pagepod)) 69*e682d02eSNavdeep Parhar #define PPOD_SIZE (PPOD_SZ(1)) 70*e682d02eSNavdeep Parhar 71*e682d02eSNavdeep Parhar /* XXX: must match A_ULP_RX_TDDP_PSZ */ 72*e682d02eSNavdeep Parhar static int t4_ddp_pgsz[] = {4096, 4096 << 2, 4096 << 4, 4096 << 6}; 73*e682d02eSNavdeep Parhar 74*e682d02eSNavdeep Parhar #if 0 75*e682d02eSNavdeep Parhar static void 76*e682d02eSNavdeep Parhar t4_dump_tcb(struct adapter *sc, int tid) 77*e682d02eSNavdeep Parhar { 78*e682d02eSNavdeep Parhar uint32_t tcb_base, off, i, j; 79*e682d02eSNavdeep Parhar 80*e682d02eSNavdeep Parhar /* Dump TCB for the tid */ 81*e682d02eSNavdeep Parhar tcb_base = t4_read_reg(sc, A_TP_CMM_TCB_BASE); 82*e682d02eSNavdeep Parhar t4_write_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2), 83*e682d02eSNavdeep Parhar tcb_base + tid * TCB_SIZE); 84*e682d02eSNavdeep Parhar t4_read_reg(sc, PCIE_MEM_ACCESS_REG(A_PCIE_MEM_ACCESS_OFFSET, 2)); 85*e682d02eSNavdeep Parhar off = 0; 86*e682d02eSNavdeep Parhar printf("\n"); 87*e682d02eSNavdeep Parhar for (i = 0; i < 4; i++) { 88*e682d02eSNavdeep Parhar uint32_t buf[8]; 89*e682d02eSNavdeep Parhar for (j = 0; j < 8; j++, off += 4) 90*e682d02eSNavdeep Parhar buf[j] = htonl(t4_read_reg(sc, MEMWIN2_BASE + off)); 91*e682d02eSNavdeep Parhar 92*e682d02eSNavdeep Parhar printf("%08x %08x %08x %08x %08x %08x %08x %08x\n", 93*e682d02eSNavdeep Parhar buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], 94*e682d02eSNavdeep Parhar buf[7]); 95*e682d02eSNavdeep Parhar } 96*e682d02eSNavdeep Parhar } 97*e682d02eSNavdeep Parhar #endif 98*e682d02eSNavdeep Parhar 99*e682d02eSNavdeep Parhar #define MAX_DDP_BUFFER_SIZE (M_TCB_RX_DDP_BUF0_LEN) 100*e682d02eSNavdeep Parhar static int 101*e682d02eSNavdeep Parhar alloc_ppods(struct tom_data *td, int n, struct ppod_region *pr) 102*e682d02eSNavdeep Parhar { 103*e682d02eSNavdeep Parhar int ppod; 104*e682d02eSNavdeep Parhar 105*e682d02eSNavdeep Parhar KASSERT(n > 0, ("%s: nonsense allocation (%d)", __func__, n)); 106*e682d02eSNavdeep Parhar 107*e682d02eSNavdeep Parhar mtx_lock(&td->ppod_lock); 108*e682d02eSNavdeep Parhar if (n > td->nppods_free) { 109*e682d02eSNavdeep Parhar mtx_unlock(&td->ppod_lock); 110*e682d02eSNavdeep Parhar return (-1); 111*e682d02eSNavdeep Parhar } 112*e682d02eSNavdeep Parhar 113*e682d02eSNavdeep Parhar if (td->nppods_free_head >= n) { 114*e682d02eSNavdeep Parhar td->nppods_free_head -= n; 115*e682d02eSNavdeep Parhar ppod = td->nppods_free_head; 116*e682d02eSNavdeep Parhar TAILQ_INSERT_HEAD(&td->ppods, pr, link); 117*e682d02eSNavdeep Parhar } else { 118*e682d02eSNavdeep Parhar struct ppod_region *p; 119*e682d02eSNavdeep Parhar 120*e682d02eSNavdeep Parhar ppod = td->nppods_free_head; 121*e682d02eSNavdeep Parhar TAILQ_FOREACH(p, &td->ppods, link) { 122*e682d02eSNavdeep Parhar ppod += p->used + p->free; 123*e682d02eSNavdeep Parhar if (n <= p->free) { 124*e682d02eSNavdeep Parhar ppod -= n; 125*e682d02eSNavdeep Parhar p->free -= n; 126*e682d02eSNavdeep Parhar TAILQ_INSERT_AFTER(&td->ppods, p, pr, link); 127*e682d02eSNavdeep Parhar goto allocated; 128*e682d02eSNavdeep Parhar } 129*e682d02eSNavdeep Parhar } 130*e682d02eSNavdeep Parhar 131*e682d02eSNavdeep Parhar if (__predict_false(ppod != td->nppods)) { 132*e682d02eSNavdeep Parhar panic("%s: ppods TAILQ (%p) corrupt." 133*e682d02eSNavdeep Parhar " At %d instead of %d at the end of the queue.", 134*e682d02eSNavdeep Parhar __func__, &td->ppods, ppod, td->nppods); 135*e682d02eSNavdeep Parhar } 136*e682d02eSNavdeep Parhar 137*e682d02eSNavdeep Parhar mtx_unlock(&td->ppod_lock); 138*e682d02eSNavdeep Parhar return (-1); 139*e682d02eSNavdeep Parhar } 140*e682d02eSNavdeep Parhar 141*e682d02eSNavdeep Parhar allocated: 142*e682d02eSNavdeep Parhar pr->used = n; 143*e682d02eSNavdeep Parhar pr->free = 0; 144*e682d02eSNavdeep Parhar td->nppods_free -= n; 145*e682d02eSNavdeep Parhar mtx_unlock(&td->ppod_lock); 146*e682d02eSNavdeep Parhar 147*e682d02eSNavdeep Parhar return (ppod); 148*e682d02eSNavdeep Parhar } 149*e682d02eSNavdeep Parhar 150*e682d02eSNavdeep Parhar static void 151*e682d02eSNavdeep Parhar free_ppods(struct tom_data *td, struct ppod_region *pr) 152*e682d02eSNavdeep Parhar { 153*e682d02eSNavdeep Parhar struct ppod_region *p; 154*e682d02eSNavdeep Parhar 155*e682d02eSNavdeep Parhar KASSERT(pr->used > 0, ("%s: nonsense free (%d)", __func__, pr->used)); 156*e682d02eSNavdeep Parhar 157*e682d02eSNavdeep Parhar mtx_lock(&td->ppod_lock); 158*e682d02eSNavdeep Parhar p = TAILQ_PREV(pr, ppod_head, link); 159*e682d02eSNavdeep Parhar if (p != NULL) 160*e682d02eSNavdeep Parhar p->free += pr->used + pr->free; 161*e682d02eSNavdeep Parhar else 162*e682d02eSNavdeep Parhar td->nppods_free_head += pr->used + pr->free; 163*e682d02eSNavdeep Parhar td->nppods_free += pr->used; 164*e682d02eSNavdeep Parhar KASSERT(td->nppods_free <= td->nppods, 165*e682d02eSNavdeep Parhar ("%s: nppods_free (%d) > nppods (%d). %d freed this time.", 166*e682d02eSNavdeep Parhar __func__, td->nppods_free, td->nppods, pr->used)); 167*e682d02eSNavdeep Parhar TAILQ_REMOVE(&td->ppods, pr, link); 168*e682d02eSNavdeep Parhar mtx_unlock(&td->ppod_lock); 169*e682d02eSNavdeep Parhar } 170*e682d02eSNavdeep Parhar 171*e682d02eSNavdeep Parhar static inline int 172*e682d02eSNavdeep Parhar pages_to_nppods(int npages, int ddp_pgsz) 173*e682d02eSNavdeep Parhar { 174*e682d02eSNavdeep Parhar int nsegs = npages * PAGE_SIZE / ddp_pgsz; 175*e682d02eSNavdeep Parhar 176*e682d02eSNavdeep Parhar return (howmany(nsegs, PPOD_PAGES)); 177*e682d02eSNavdeep Parhar } 178*e682d02eSNavdeep Parhar 179*e682d02eSNavdeep Parhar static void 180*e682d02eSNavdeep Parhar free_ddp_buffer(struct tom_data *td, struct ddp_buffer *db) 181*e682d02eSNavdeep Parhar { 182*e682d02eSNavdeep Parhar 183*e682d02eSNavdeep Parhar if (db == NULL) 184*e682d02eSNavdeep Parhar return; 185*e682d02eSNavdeep Parhar 186*e682d02eSNavdeep Parhar if (db->pages) 187*e682d02eSNavdeep Parhar free(db->pages, M_CXGBE); 188*e682d02eSNavdeep Parhar 189*e682d02eSNavdeep Parhar if (db->nppods > 0) 190*e682d02eSNavdeep Parhar free_ppods(td, &db->ppod_region); 191*e682d02eSNavdeep Parhar 192*e682d02eSNavdeep Parhar free(db, M_CXGBE); 193*e682d02eSNavdeep Parhar } 194*e682d02eSNavdeep Parhar 195*e682d02eSNavdeep Parhar void 196*e682d02eSNavdeep Parhar release_ddp_resources(struct toepcb *toep) 197*e682d02eSNavdeep Parhar { 198*e682d02eSNavdeep Parhar int i; 199*e682d02eSNavdeep Parhar 200*e682d02eSNavdeep Parhar for (i = 0; i < ARRAY_SIZE(toep->db); i++) { 201*e682d02eSNavdeep Parhar if (toep->db[i] != NULL) { 202*e682d02eSNavdeep Parhar free_ddp_buffer(toep->td, toep->db[i]); 203*e682d02eSNavdeep Parhar toep->db[i] = NULL; 204*e682d02eSNavdeep Parhar } 205*e682d02eSNavdeep Parhar } 206*e682d02eSNavdeep Parhar } 207*e682d02eSNavdeep Parhar 208*e682d02eSNavdeep Parhar /* SET_TCB_FIELD sent as a ULP command looks like this */ 209*e682d02eSNavdeep Parhar #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ 210*e682d02eSNavdeep Parhar sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) 211*e682d02eSNavdeep Parhar 212*e682d02eSNavdeep Parhar /* RX_DATA_ACK sent as a ULP command looks like this */ 213*e682d02eSNavdeep Parhar #define LEN__RX_DATA_ACK_ULP (sizeof(struct ulp_txpkt) + \ 214*e682d02eSNavdeep Parhar sizeof(struct ulptx_idata) + sizeof(struct cpl_rx_data_ack_core)) 215*e682d02eSNavdeep Parhar 216*e682d02eSNavdeep Parhar static inline void * 217*e682d02eSNavdeep Parhar mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep, 218*e682d02eSNavdeep Parhar uint64_t word, uint64_t mask, uint64_t val) 219*e682d02eSNavdeep Parhar { 220*e682d02eSNavdeep Parhar struct ulptx_idata *ulpsc; 221*e682d02eSNavdeep Parhar struct cpl_set_tcb_field_core *req; 222*e682d02eSNavdeep Parhar 223*e682d02eSNavdeep Parhar ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); 224*e682d02eSNavdeep Parhar ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); 225*e682d02eSNavdeep Parhar 226*e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(ulpmc + 1); 227*e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); 228*e682d02eSNavdeep Parhar ulpsc->len = htobe32(sizeof(*req)); 229*e682d02eSNavdeep Parhar 230*e682d02eSNavdeep Parhar req = (struct cpl_set_tcb_field_core *)(ulpsc + 1); 231*e682d02eSNavdeep Parhar OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid)); 232*e682d02eSNavdeep Parhar req->reply_ctrl = htobe16(V_NO_REPLY(1) | 233*e682d02eSNavdeep Parhar V_QUEUENO(toep->ofld_rxq->iq.abs_id)); 234*e682d02eSNavdeep Parhar req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); 235*e682d02eSNavdeep Parhar req->mask = htobe64(mask); 236*e682d02eSNavdeep Parhar req->val = htobe64(val); 237*e682d02eSNavdeep Parhar 238*e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(req + 1); 239*e682d02eSNavdeep Parhar if (LEN__SET_TCB_FIELD_ULP % 16) { 240*e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); 241*e682d02eSNavdeep Parhar ulpsc->len = htobe32(0); 242*e682d02eSNavdeep Parhar return (ulpsc + 1); 243*e682d02eSNavdeep Parhar } 244*e682d02eSNavdeep Parhar return (ulpsc); 245*e682d02eSNavdeep Parhar } 246*e682d02eSNavdeep Parhar 247*e682d02eSNavdeep Parhar static inline void * 248*e682d02eSNavdeep Parhar mk_rx_data_ack_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep) 249*e682d02eSNavdeep Parhar { 250*e682d02eSNavdeep Parhar struct ulptx_idata *ulpsc; 251*e682d02eSNavdeep Parhar struct cpl_rx_data_ack_core *req; 252*e682d02eSNavdeep Parhar 253*e682d02eSNavdeep Parhar ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); 254*e682d02eSNavdeep Parhar ulpmc->len = htobe32(howmany(LEN__RX_DATA_ACK_ULP, 16)); 255*e682d02eSNavdeep Parhar 256*e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(ulpmc + 1); 257*e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); 258*e682d02eSNavdeep Parhar ulpsc->len = htobe32(sizeof(*req)); 259*e682d02eSNavdeep Parhar 260*e682d02eSNavdeep Parhar req = (struct cpl_rx_data_ack_core *)(ulpsc + 1); 261*e682d02eSNavdeep Parhar OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tid)); 262*e682d02eSNavdeep Parhar req->credit_dack = htobe32(F_RX_MODULATE_RX); 263*e682d02eSNavdeep Parhar 264*e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(req + 1); 265*e682d02eSNavdeep Parhar if (LEN__RX_DATA_ACK_ULP % 16) { 266*e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); 267*e682d02eSNavdeep Parhar ulpsc->len = htobe32(0); 268*e682d02eSNavdeep Parhar return (ulpsc + 1); 269*e682d02eSNavdeep Parhar } 270*e682d02eSNavdeep Parhar return (ulpsc); 271*e682d02eSNavdeep Parhar } 272*e682d02eSNavdeep Parhar 273*e682d02eSNavdeep Parhar static inline uint64_t 274*e682d02eSNavdeep Parhar select_ddp_flags(struct socket *so, int flags, int db_idx) 275*e682d02eSNavdeep Parhar { 276*e682d02eSNavdeep Parhar uint64_t ddp_flags = V_TF_DDP_INDICATE_OUT(0); 277*e682d02eSNavdeep Parhar int waitall = flags & MSG_WAITALL; 278*e682d02eSNavdeep Parhar int nb = so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO); 279*e682d02eSNavdeep Parhar 280*e682d02eSNavdeep Parhar KASSERT(db_idx == 0 || db_idx == 1, 281*e682d02eSNavdeep Parhar ("%s: bad DDP buffer index %d", __func__, db_idx)); 282*e682d02eSNavdeep Parhar 283*e682d02eSNavdeep Parhar if (db_idx == 0) { 284*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0); 285*e682d02eSNavdeep Parhar if (waitall) 286*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_PUSH_DISABLE_0(1); 287*e682d02eSNavdeep Parhar else if (nb) 288*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_BUF0_FLUSH(1); 289*e682d02eSNavdeep Parhar else 290*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_BUF0_FLUSH(0); 291*e682d02eSNavdeep Parhar } else { 292*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1); 293*e682d02eSNavdeep Parhar if (waitall) 294*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_PUSH_DISABLE_1(1); 295*e682d02eSNavdeep Parhar else if (nb) 296*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_BUF1_FLUSH(1); 297*e682d02eSNavdeep Parhar else 298*e682d02eSNavdeep Parhar ddp_flags |= V_TF_DDP_BUF1_FLUSH(0); 299*e682d02eSNavdeep Parhar } 300*e682d02eSNavdeep Parhar 301*e682d02eSNavdeep Parhar return (ddp_flags); 302*e682d02eSNavdeep Parhar } 303*e682d02eSNavdeep Parhar 304*e682d02eSNavdeep Parhar static struct wrqe * 305*e682d02eSNavdeep Parhar mk_update_tcb_for_ddp(struct adapter *sc, struct toepcb *toep, int db_idx, 306*e682d02eSNavdeep Parhar int offset, uint64_t ddp_flags) 307*e682d02eSNavdeep Parhar { 308*e682d02eSNavdeep Parhar struct ddp_buffer *db = toep->db[db_idx]; 309*e682d02eSNavdeep Parhar struct wrqe *wr; 310*e682d02eSNavdeep Parhar struct work_request_hdr *wrh; 311*e682d02eSNavdeep Parhar struct ulp_txpkt *ulpmc; 312*e682d02eSNavdeep Parhar int len; 313*e682d02eSNavdeep Parhar 314*e682d02eSNavdeep Parhar KASSERT(db_idx == 0 || db_idx == 1, 315*e682d02eSNavdeep Parhar ("%s: bad DDP buffer index %d", __func__, db_idx)); 316*e682d02eSNavdeep Parhar 317*e682d02eSNavdeep Parhar /* 318*e682d02eSNavdeep Parhar * We'll send a compound work request that has 3 SET_TCB_FIELDs and an 319*e682d02eSNavdeep Parhar * RX_DATA_ACK (with RX_MODULATE to speed up delivery). 320*e682d02eSNavdeep Parhar * 321*e682d02eSNavdeep Parhar * The work request header is 16B and always ends at a 16B boundary. 322*e682d02eSNavdeep Parhar * The ULPTX master commands that follow must all end at 16B boundaries 323*e682d02eSNavdeep Parhar * too so we round up the size to 16. 324*e682d02eSNavdeep Parhar */ 325*e682d02eSNavdeep Parhar len = sizeof(*wrh) + 3 * roundup(LEN__SET_TCB_FIELD_ULP, 16) + 326*e682d02eSNavdeep Parhar roundup(LEN__RX_DATA_ACK_ULP, 16); 327*e682d02eSNavdeep Parhar 328*e682d02eSNavdeep Parhar wr = alloc_wrqe(len, toep->ctrlq); 329*e682d02eSNavdeep Parhar if (wr == NULL) 330*e682d02eSNavdeep Parhar return (NULL); 331*e682d02eSNavdeep Parhar wrh = wrtod(wr); 332*e682d02eSNavdeep Parhar INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ 333*e682d02eSNavdeep Parhar ulpmc = (struct ulp_txpkt *)(wrh + 1); 334*e682d02eSNavdeep Parhar 335*e682d02eSNavdeep Parhar /* Write the buffer's tag */ 336*e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 337*e682d02eSNavdeep Parhar W_TCB_RX_DDP_BUF0_TAG + db_idx, 338*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 339*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_TAG(db->tag)); 340*e682d02eSNavdeep Parhar 341*e682d02eSNavdeep Parhar /* Update the current offset in the DDP buffer and its total length */ 342*e682d02eSNavdeep Parhar if (db_idx == 0) 343*e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 344*e682d02eSNavdeep Parhar W_TCB_RX_DDP_BUF0_OFFSET, 345*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 346*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 347*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_OFFSET(offset) | 348*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_LEN(db->len)); 349*e682d02eSNavdeep Parhar else 350*e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 351*e682d02eSNavdeep Parhar W_TCB_RX_DDP_BUF1_OFFSET, 352*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 353*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF1_LEN((u64)M_TCB_RX_DDP_BUF1_LEN << 32), 354*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF1_OFFSET(offset) | 355*e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF1_LEN((u64)db->len << 32)); 356*e682d02eSNavdeep Parhar 357*e682d02eSNavdeep Parhar /* Update DDP flags */ 358*e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_FLAGS, 359*e682d02eSNavdeep Parhar V_TF_DDP_BUF0_FLUSH(1) | V_TF_DDP_BUF1_FLUSH(1) | 360*e682d02eSNavdeep Parhar V_TF_DDP_PUSH_DISABLE_0(1) | V_TF_DDP_PUSH_DISABLE_1(1) | 361*e682d02eSNavdeep Parhar V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1) | 362*e682d02eSNavdeep Parhar V_TF_DDP_ACTIVE_BUF(1) | V_TF_DDP_INDICATE_OUT(1), ddp_flags); 363*e682d02eSNavdeep Parhar 364*e682d02eSNavdeep Parhar /* Gratuitous RX_DATA_ACK with RX_MODULATE set to speed up delivery. */ 365*e682d02eSNavdeep Parhar ulpmc = mk_rx_data_ack_ulp(ulpmc, toep); 366*e682d02eSNavdeep Parhar 367*e682d02eSNavdeep Parhar return (wr); 368*e682d02eSNavdeep Parhar } 369*e682d02eSNavdeep Parhar 370*e682d02eSNavdeep Parhar static void 371*e682d02eSNavdeep Parhar discourage_ddp(struct toepcb *toep) 372*e682d02eSNavdeep Parhar { 373*e682d02eSNavdeep Parhar 374*e682d02eSNavdeep Parhar if (toep->ddp_score && --toep->ddp_score == 0) { 375*e682d02eSNavdeep Parhar toep->ddp_flags &= ~DDP_OK; 376*e682d02eSNavdeep Parhar toep->ddp_disabled = time_uptime; 377*e682d02eSNavdeep Parhar CTR3(KTR_CXGBE, "%s: tid %u !DDP_OK @ %u", 378*e682d02eSNavdeep Parhar __func__, toep->tid, time_uptime); 379*e682d02eSNavdeep Parhar } 380*e682d02eSNavdeep Parhar } 381*e682d02eSNavdeep Parhar 382*e682d02eSNavdeep Parhar static int 383*e682d02eSNavdeep Parhar handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) 384*e682d02eSNavdeep Parhar { 385*e682d02eSNavdeep Parhar uint32_t report = be32toh(ddp_report); 386*e682d02eSNavdeep Parhar unsigned int db_flag; 387*e682d02eSNavdeep Parhar struct inpcb *inp = toep->inp; 388*e682d02eSNavdeep Parhar struct tcpcb *tp; 389*e682d02eSNavdeep Parhar struct socket *so; 390*e682d02eSNavdeep Parhar struct sockbuf *sb; 391*e682d02eSNavdeep Parhar struct mbuf *m; 392*e682d02eSNavdeep Parhar 393*e682d02eSNavdeep Parhar db_flag = report & F_DDP_BUF_IDX ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE; 394*e682d02eSNavdeep Parhar 395*e682d02eSNavdeep Parhar if (__predict_false(!(report & F_DDP_INV))) 396*e682d02eSNavdeep Parhar CXGBE_UNIMPLEMENTED("DDP buffer still valid"); 397*e682d02eSNavdeep Parhar 398*e682d02eSNavdeep Parhar INP_WLOCK(inp); 399*e682d02eSNavdeep Parhar so = inp_inpcbtosocket(inp); 400*e682d02eSNavdeep Parhar sb = &so->so_rcv; 401*e682d02eSNavdeep Parhar if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) { 402*e682d02eSNavdeep Parhar 403*e682d02eSNavdeep Parhar /* 404*e682d02eSNavdeep Parhar * XXX: think a bit more. 405*e682d02eSNavdeep Parhar * tcpcb probably gone, but socket should still be around 406*e682d02eSNavdeep Parhar * because we always wait for DDP completion in soreceive no 407*e682d02eSNavdeep Parhar * matter what. Just wake it up and let it clean up. 408*e682d02eSNavdeep Parhar */ 409*e682d02eSNavdeep Parhar 410*e682d02eSNavdeep Parhar CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x", 411*e682d02eSNavdeep Parhar __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags); 412*e682d02eSNavdeep Parhar SOCKBUF_LOCK(sb); 413*e682d02eSNavdeep Parhar goto wakeup; 414*e682d02eSNavdeep Parhar } 415*e682d02eSNavdeep Parhar 416*e682d02eSNavdeep Parhar tp = intotcpcb(inp); 417*e682d02eSNavdeep Parhar len += be32toh(rcv_nxt) - tp->rcv_nxt; 418*e682d02eSNavdeep Parhar tp->rcv_nxt += len; 419*e682d02eSNavdeep Parhar tp->t_rcvtime = ticks; 420*e682d02eSNavdeep Parhar #ifndef USE_DDP_RX_FLOW_CONTROL 421*e682d02eSNavdeep Parhar KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); 422*e682d02eSNavdeep Parhar tp->rcv_wnd -= len; 423*e682d02eSNavdeep Parhar #endif 424*e682d02eSNavdeep Parhar 425*e682d02eSNavdeep Parhar m = m_get(M_NOWAIT, MT_DATA); 426*e682d02eSNavdeep Parhar if (m == NULL) 427*e682d02eSNavdeep Parhar CXGBE_UNIMPLEMENTED("mbuf alloc failure"); 428*e682d02eSNavdeep Parhar m->m_len = len; 429*e682d02eSNavdeep Parhar m->m_flags |= M_DDP; /* Data is already where it should be */ 430*e682d02eSNavdeep Parhar m->m_data = "nothing to see here"; 431*e682d02eSNavdeep Parhar 432*e682d02eSNavdeep Parhar SOCKBUF_LOCK(sb); 433*e682d02eSNavdeep Parhar if (report & F_DDP_BUF_COMPLETE) 434*e682d02eSNavdeep Parhar toep->ddp_score = DDP_HIGH_SCORE; 435*e682d02eSNavdeep Parhar else 436*e682d02eSNavdeep Parhar discourage_ddp(toep); 437*e682d02eSNavdeep Parhar 438*e682d02eSNavdeep Parhar KASSERT(toep->sb_cc >= sb->sb_cc, 439*e682d02eSNavdeep Parhar ("%s: sb %p has more data (%d) than last time (%d).", 440*e682d02eSNavdeep Parhar __func__, sb, sb->sb_cc, toep->sb_cc)); 441*e682d02eSNavdeep Parhar toep->rx_credits += toep->sb_cc - sb->sb_cc; 442*e682d02eSNavdeep Parhar #ifdef USE_DDP_RX_FLOW_CONTROL 443*e682d02eSNavdeep Parhar toep->rx_credits -= len; /* adjust for F_RX_FC_DDP */ 444*e682d02eSNavdeep Parhar #endif 445*e682d02eSNavdeep Parhar sbappendstream_locked(sb, m); 446*e682d02eSNavdeep Parhar toep->sb_cc = sb->sb_cc; 447*e682d02eSNavdeep Parhar wakeup: 448*e682d02eSNavdeep Parhar KASSERT(toep->ddp_flags & db_flag, 449*e682d02eSNavdeep Parhar ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x, report 0x%x", 450*e682d02eSNavdeep Parhar __func__, toep, toep->ddp_flags, report)); 451*e682d02eSNavdeep Parhar toep->ddp_flags &= ~db_flag; 452*e682d02eSNavdeep Parhar sorwakeup_locked(so); 453*e682d02eSNavdeep Parhar SOCKBUF_UNLOCK_ASSERT(sb); 454*e682d02eSNavdeep Parhar 455*e682d02eSNavdeep Parhar INP_WUNLOCK(inp); 456*e682d02eSNavdeep Parhar return (0); 457*e682d02eSNavdeep Parhar } 458*e682d02eSNavdeep Parhar 459*e682d02eSNavdeep Parhar #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 460*e682d02eSNavdeep Parhar F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 461*e682d02eSNavdeep Parhar F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 462*e682d02eSNavdeep Parhar F_DDP_INVALID_PPOD | F_DDP_HDRCRC_ERR | F_DDP_DATACRC_ERR) 463*e682d02eSNavdeep Parhar 464*e682d02eSNavdeep Parhar static int 465*e682d02eSNavdeep Parhar do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 466*e682d02eSNavdeep Parhar { 467*e682d02eSNavdeep Parhar struct adapter *sc = iq->adapter; 468*e682d02eSNavdeep Parhar const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); 469*e682d02eSNavdeep Parhar unsigned int tid = GET_TID(cpl); 470*e682d02eSNavdeep Parhar uint32_t vld; 471*e682d02eSNavdeep Parhar struct toepcb *toep = lookup_tid(sc, tid); 472*e682d02eSNavdeep Parhar 473*e682d02eSNavdeep Parhar KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 474*e682d02eSNavdeep Parhar KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); 475*e682d02eSNavdeep Parhar KASSERT(!toepcb_flag(toep, TPF_SYNQE), 476*e682d02eSNavdeep Parhar ("%s: toep %p claims to be a synq entry", __func__, toep)); 477*e682d02eSNavdeep Parhar 478*e682d02eSNavdeep Parhar vld = be32toh(cpl->ddpvld); 479*e682d02eSNavdeep Parhar if (__predict_false(vld & DDP_ERR)) { 480*e682d02eSNavdeep Parhar panic("%s: DDP error 0x%x (tid %d, toep %p)", 481*e682d02eSNavdeep Parhar __func__, vld, tid, toep); 482*e682d02eSNavdeep Parhar } 483*e682d02eSNavdeep Parhar 484*e682d02eSNavdeep Parhar handle_ddp_data(toep, cpl->u.ddp_report, cpl->seq, be16toh(cpl->len)); 485*e682d02eSNavdeep Parhar 486*e682d02eSNavdeep Parhar return (0); 487*e682d02eSNavdeep Parhar } 488*e682d02eSNavdeep Parhar 489*e682d02eSNavdeep Parhar static int 490*e682d02eSNavdeep Parhar do_rx_ddp_complete(struct sge_iq *iq, const struct rss_header *rss, 491*e682d02eSNavdeep Parhar struct mbuf *m) 492*e682d02eSNavdeep Parhar { 493*e682d02eSNavdeep Parhar struct adapter *sc = iq->adapter; 494*e682d02eSNavdeep Parhar const struct cpl_rx_ddp_complete *cpl = (const void *)(rss + 1); 495*e682d02eSNavdeep Parhar unsigned int tid = GET_TID(cpl); 496*e682d02eSNavdeep Parhar struct toepcb *toep = lookup_tid(sc, tid); 497*e682d02eSNavdeep Parhar 498*e682d02eSNavdeep Parhar KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 499*e682d02eSNavdeep Parhar KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); 500*e682d02eSNavdeep Parhar KASSERT(!toepcb_flag(toep, TPF_SYNQE), 501*e682d02eSNavdeep Parhar ("%s: toep %p claims to be a synq entry", __func__, toep)); 502*e682d02eSNavdeep Parhar 503*e682d02eSNavdeep Parhar handle_ddp_data(toep, cpl->ddp_report, cpl->rcv_nxt, 0); 504*e682d02eSNavdeep Parhar 505*e682d02eSNavdeep Parhar return (0); 506*e682d02eSNavdeep Parhar } 507*e682d02eSNavdeep Parhar 508*e682d02eSNavdeep Parhar void 509*e682d02eSNavdeep Parhar enable_ddp(struct adapter *sc, struct toepcb *toep) 510*e682d02eSNavdeep Parhar { 511*e682d02eSNavdeep Parhar 512*e682d02eSNavdeep Parhar KASSERT((toep->ddp_flags & (DDP_ON | DDP_OK | DDP_SC_REQ)) == DDP_OK, 513*e682d02eSNavdeep Parhar ("%s: toep %p has bad ddp_flags 0x%x", 514*e682d02eSNavdeep Parhar __func__, toep, toep->ddp_flags)); 515*e682d02eSNavdeep Parhar 516*e682d02eSNavdeep Parhar CTR3(KTR_CXGBE, "%s: tid %u (time %u)", 517*e682d02eSNavdeep Parhar __func__, toep->tid, time_uptime); 518*e682d02eSNavdeep Parhar 519*e682d02eSNavdeep Parhar toep->ddp_flags |= DDP_SC_REQ; 520*e682d02eSNavdeep Parhar t4_set_tcb_field(sc, toep, W_TCB_RX_DDP_FLAGS, 521*e682d02eSNavdeep Parhar V_TF_DDP_OFF(1) | V_TF_DDP_INDICATE_OUT(1) | 522*e682d02eSNavdeep Parhar V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1) | 523*e682d02eSNavdeep Parhar V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1), 524*e682d02eSNavdeep Parhar V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1)); 525*e682d02eSNavdeep Parhar t4_set_tcb_field(sc, toep, W_TCB_T_FLAGS, 526*e682d02eSNavdeep Parhar V_TF_RCV_COALESCE_ENABLE(1), 0); 527*e682d02eSNavdeep Parhar } 528*e682d02eSNavdeep Parhar 529*e682d02eSNavdeep Parhar static inline void 530*e682d02eSNavdeep Parhar disable_ddp(struct adapter *sc, struct toepcb *toep) 531*e682d02eSNavdeep Parhar { 532*e682d02eSNavdeep Parhar 533*e682d02eSNavdeep Parhar KASSERT((toep->ddp_flags & (DDP_ON | DDP_SC_REQ)) == DDP_ON, 534*e682d02eSNavdeep Parhar ("%s: toep %p has bad ddp_flags 0x%x", 535*e682d02eSNavdeep Parhar __func__, toep, toep->ddp_flags)); 536*e682d02eSNavdeep Parhar 537*e682d02eSNavdeep Parhar CTR3(KTR_CXGBE, "%s: tid %u (time %u)", 538*e682d02eSNavdeep Parhar __func__, toep->tid, time_uptime); 539*e682d02eSNavdeep Parhar 540*e682d02eSNavdeep Parhar toep->ddp_flags |= DDP_SC_REQ; 541*e682d02eSNavdeep Parhar t4_set_tcb_field(sc, toep, W_TCB_T_FLAGS, 542*e682d02eSNavdeep Parhar V_TF_RCV_COALESCE_ENABLE(1), V_TF_RCV_COALESCE_ENABLE(1)); 543*e682d02eSNavdeep Parhar t4_set_tcb_field(sc, toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 544*e682d02eSNavdeep Parhar V_TF_DDP_OFF(1)); 545*e682d02eSNavdeep Parhar } 546*e682d02eSNavdeep Parhar 547*e682d02eSNavdeep Parhar static int 548*e682d02eSNavdeep Parhar hold_uio(struct uio *uio, vm_page_t **ppages, int *pnpages) 549*e682d02eSNavdeep Parhar { 550*e682d02eSNavdeep Parhar struct vm_map *map; 551*e682d02eSNavdeep Parhar struct iovec *iov; 552*e682d02eSNavdeep Parhar vm_offset_t start, end; 553*e682d02eSNavdeep Parhar vm_page_t *pp; 554*e682d02eSNavdeep Parhar int n; 555*e682d02eSNavdeep Parhar 556*e682d02eSNavdeep Parhar KASSERT(uio->uio_iovcnt == 1, 557*e682d02eSNavdeep Parhar ("%s: uio_iovcnt %d", __func__, uio->uio_iovcnt)); 558*e682d02eSNavdeep Parhar KASSERT(uio->uio_td->td_proc == curproc, 559*e682d02eSNavdeep Parhar ("%s: uio proc (%p) is not curproc (%p)", 560*e682d02eSNavdeep Parhar __func__, uio->uio_td->td_proc, curproc)); 561*e682d02eSNavdeep Parhar 562*e682d02eSNavdeep Parhar map = &curproc->p_vmspace->vm_map; 563*e682d02eSNavdeep Parhar iov = &uio->uio_iov[0]; 564*e682d02eSNavdeep Parhar start = trunc_page((uintptr_t)iov->iov_base); 565*e682d02eSNavdeep Parhar end = round_page((vm_offset_t)iov->iov_base + iov->iov_len); 566*e682d02eSNavdeep Parhar n = howmany(end - start, PAGE_SIZE); 567*e682d02eSNavdeep Parhar 568*e682d02eSNavdeep Parhar if (end - start > MAX_DDP_BUFFER_SIZE) 569*e682d02eSNavdeep Parhar return (E2BIG); 570*e682d02eSNavdeep Parhar 571*e682d02eSNavdeep Parhar pp = malloc(n * sizeof(vm_page_t), M_CXGBE, M_NOWAIT); 572*e682d02eSNavdeep Parhar if (pp == NULL) 573*e682d02eSNavdeep Parhar return (ENOMEM); 574*e682d02eSNavdeep Parhar 575*e682d02eSNavdeep Parhar if (vm_fault_quick_hold_pages(map, (vm_offset_t)iov->iov_base, 576*e682d02eSNavdeep Parhar iov->iov_len, VM_PROT_WRITE, pp, n) < 0) { 577*e682d02eSNavdeep Parhar free(pp, M_CXGBE); 578*e682d02eSNavdeep Parhar return (EFAULT); 579*e682d02eSNavdeep Parhar } 580*e682d02eSNavdeep Parhar 581*e682d02eSNavdeep Parhar *ppages = pp; 582*e682d02eSNavdeep Parhar *pnpages = n; 583*e682d02eSNavdeep Parhar 584*e682d02eSNavdeep Parhar return (0); 585*e682d02eSNavdeep Parhar } 586*e682d02eSNavdeep Parhar 587*e682d02eSNavdeep Parhar static int 588*e682d02eSNavdeep Parhar bufcmp(struct ddp_buffer *db, vm_page_t *pages, int npages, int offset, int len) 589*e682d02eSNavdeep Parhar { 590*e682d02eSNavdeep Parhar int i; 591*e682d02eSNavdeep Parhar 592*e682d02eSNavdeep Parhar if (db == NULL || db->npages != npages || db->offset != offset || 593*e682d02eSNavdeep Parhar db->len != len) 594*e682d02eSNavdeep Parhar return (1); 595*e682d02eSNavdeep Parhar 596*e682d02eSNavdeep Parhar for (i = 0; i < npages; i++) { 597*e682d02eSNavdeep Parhar if (pages[i]->phys_addr != db->pages[i]->phys_addr) 598*e682d02eSNavdeep Parhar return (1); 599*e682d02eSNavdeep Parhar } 600*e682d02eSNavdeep Parhar 601*e682d02eSNavdeep Parhar return (0); 602*e682d02eSNavdeep Parhar } 603*e682d02eSNavdeep Parhar 604*e682d02eSNavdeep Parhar static int 605*e682d02eSNavdeep Parhar calculate_hcf(int n1, int n2) 606*e682d02eSNavdeep Parhar { 607*e682d02eSNavdeep Parhar int a, b, t; 608*e682d02eSNavdeep Parhar 609*e682d02eSNavdeep Parhar if (n1 <= n2) { 610*e682d02eSNavdeep Parhar a = n1; 611*e682d02eSNavdeep Parhar b = n2; 612*e682d02eSNavdeep Parhar } else { 613*e682d02eSNavdeep Parhar a = n2; 614*e682d02eSNavdeep Parhar b = n1; 615*e682d02eSNavdeep Parhar } 616*e682d02eSNavdeep Parhar 617*e682d02eSNavdeep Parhar while (a != 0) { 618*e682d02eSNavdeep Parhar t = a; 619*e682d02eSNavdeep Parhar a = b % a; 620*e682d02eSNavdeep Parhar b = t; 621*e682d02eSNavdeep Parhar } 622*e682d02eSNavdeep Parhar 623*e682d02eSNavdeep Parhar return (b); 624*e682d02eSNavdeep Parhar } 625*e682d02eSNavdeep Parhar 626*e682d02eSNavdeep Parhar static struct ddp_buffer * 627*e682d02eSNavdeep Parhar alloc_ddp_buffer(struct tom_data *td, vm_page_t *pages, int npages, int offset, 628*e682d02eSNavdeep Parhar int len) 629*e682d02eSNavdeep Parhar { 630*e682d02eSNavdeep Parhar int i, hcf, seglen, idx, ppod, nppods; 631*e682d02eSNavdeep Parhar struct ddp_buffer *db; 632*e682d02eSNavdeep Parhar 633*e682d02eSNavdeep Parhar /* 634*e682d02eSNavdeep Parhar * The DDP page size is unrelated to the VM page size. We combine 635*e682d02eSNavdeep Parhar * contiguous physical pages into larger segments to get the best DDP 636*e682d02eSNavdeep Parhar * page size possible. This is the largest of the four sizes in 637*e682d02eSNavdeep Parhar * A_ULP_RX_TDDP_PSZ that evenly divides the HCF of the segment sizes in 638*e682d02eSNavdeep Parhar * the page list. 639*e682d02eSNavdeep Parhar */ 640*e682d02eSNavdeep Parhar hcf = 0; 641*e682d02eSNavdeep Parhar for (i = 0; i < npages; i++) { 642*e682d02eSNavdeep Parhar seglen = PAGE_SIZE; 643*e682d02eSNavdeep Parhar while (i < npages - 1 && 644*e682d02eSNavdeep Parhar pages[i]->phys_addr + PAGE_SIZE == pages[i + 1]->phys_addr) { 645*e682d02eSNavdeep Parhar seglen += PAGE_SIZE; 646*e682d02eSNavdeep Parhar i++; 647*e682d02eSNavdeep Parhar } 648*e682d02eSNavdeep Parhar 649*e682d02eSNavdeep Parhar hcf = calculate_hcf(hcf, seglen); 650*e682d02eSNavdeep Parhar if (hcf < t4_ddp_pgsz[1]) { 651*e682d02eSNavdeep Parhar idx = 0; 652*e682d02eSNavdeep Parhar goto have_pgsz; /* give up, short circuit */ 653*e682d02eSNavdeep Parhar } 654*e682d02eSNavdeep Parhar } 655*e682d02eSNavdeep Parhar 656*e682d02eSNavdeep Parhar if (hcf % t4_ddp_pgsz[0] != 0) { 657*e682d02eSNavdeep Parhar /* hmmm. This could only happen when PAGE_SIZE < 4K */ 658*e682d02eSNavdeep Parhar KASSERT(PAGE_SIZE < 4096, 659*e682d02eSNavdeep Parhar ("%s: PAGE_SIZE %d, hcf %d", __func__, PAGE_SIZE, hcf)); 660*e682d02eSNavdeep Parhar CTR3(KTR_CXGBE, "%s: PAGE_SIZE %d, hcf %d", 661*e682d02eSNavdeep Parhar __func__, PAGE_SIZE, hcf); 662*e682d02eSNavdeep Parhar return (NULL); 663*e682d02eSNavdeep Parhar } 664*e682d02eSNavdeep Parhar 665*e682d02eSNavdeep Parhar for (idx = ARRAY_SIZE(t4_ddp_pgsz) - 1; idx > 0; idx--) { 666*e682d02eSNavdeep Parhar if (hcf % t4_ddp_pgsz[idx] == 0) 667*e682d02eSNavdeep Parhar break; 668*e682d02eSNavdeep Parhar } 669*e682d02eSNavdeep Parhar have_pgsz: 670*e682d02eSNavdeep Parhar 671*e682d02eSNavdeep Parhar db = malloc(sizeof(*db), M_CXGBE, M_NOWAIT); 672*e682d02eSNavdeep Parhar if (db == NULL) { 673*e682d02eSNavdeep Parhar CTR1(KTR_CXGBE, "%s: malloc failed.", __func__); 674*e682d02eSNavdeep Parhar return (NULL); 675*e682d02eSNavdeep Parhar } 676*e682d02eSNavdeep Parhar 677*e682d02eSNavdeep Parhar nppods = pages_to_nppods(npages, t4_ddp_pgsz[idx]); 678*e682d02eSNavdeep Parhar ppod = alloc_ppods(td, nppods, &db->ppod_region); 679*e682d02eSNavdeep Parhar if (ppod < 0) { 680*e682d02eSNavdeep Parhar free(db, M_CXGBE); 681*e682d02eSNavdeep Parhar CTR4(KTR_CXGBE, "%s: no pods, nppods %d, resid %d, pgsz %d", 682*e682d02eSNavdeep Parhar __func__, nppods, len, t4_ddp_pgsz[idx]); 683*e682d02eSNavdeep Parhar return (NULL); 684*e682d02eSNavdeep Parhar } 685*e682d02eSNavdeep Parhar 686*e682d02eSNavdeep Parhar KASSERT(idx <= M_PPOD_PGSZ && ppod <= M_PPOD_TAG, 687*e682d02eSNavdeep Parhar ("%s: DDP pgsz_idx = %d, ppod = %d", __func__, idx, ppod)); 688*e682d02eSNavdeep Parhar 689*e682d02eSNavdeep Parhar db->tag = V_PPOD_PGSZ(idx) | V_PPOD_TAG(ppod); 690*e682d02eSNavdeep Parhar db->nppods = nppods; 691*e682d02eSNavdeep Parhar db->npages = npages; 692*e682d02eSNavdeep Parhar db->pages = pages; 693*e682d02eSNavdeep Parhar db->offset = offset; 694*e682d02eSNavdeep Parhar db->len = len; 695*e682d02eSNavdeep Parhar 696*e682d02eSNavdeep Parhar CTR6(KTR_CXGBE, "New DDP buffer. " 697*e682d02eSNavdeep Parhar "ddp_pgsz %d, ppod 0x%x, npages %d, nppods %d, offset %d, len %d", 698*e682d02eSNavdeep Parhar t4_ddp_pgsz[idx], ppod, db->npages, db->nppods, db->offset, 699*e682d02eSNavdeep Parhar db->len); 700*e682d02eSNavdeep Parhar 701*e682d02eSNavdeep Parhar return (db); 702*e682d02eSNavdeep Parhar } 703*e682d02eSNavdeep Parhar 704*e682d02eSNavdeep Parhar #define NUM_ULP_TX_SC_IMM_PPODS (256 / PPOD_SIZE) 705*e682d02eSNavdeep Parhar 706*e682d02eSNavdeep Parhar static int 707*e682d02eSNavdeep Parhar write_page_pods(struct adapter *sc, struct toepcb *toep, struct ddp_buffer *db) 708*e682d02eSNavdeep Parhar { 709*e682d02eSNavdeep Parhar struct wrqe *wr; 710*e682d02eSNavdeep Parhar struct ulp_mem_io *ulpmc; 711*e682d02eSNavdeep Parhar struct ulptx_idata *ulpsc; 712*e682d02eSNavdeep Parhar struct pagepod *ppod; 713*e682d02eSNavdeep Parhar int i, j, k, n, chunk, len, ddp_pgsz, idx, ppod_addr; 714*e682d02eSNavdeep Parhar 715*e682d02eSNavdeep Parhar ddp_pgsz = t4_ddp_pgsz[G_PPOD_PGSZ(db->tag)]; 716*e682d02eSNavdeep Parhar ppod_addr = sc->vres.ddp.start + G_PPOD_TAG(db->tag) * PPOD_SIZE; 717*e682d02eSNavdeep Parhar for (i = 0; i < db->nppods; ppod_addr += chunk) { 718*e682d02eSNavdeep Parhar 719*e682d02eSNavdeep Parhar /* How many page pods are we writing in this cycle */ 720*e682d02eSNavdeep Parhar n = min(db->nppods - i, NUM_ULP_TX_SC_IMM_PPODS); 721*e682d02eSNavdeep Parhar chunk = PPOD_SZ(n); 722*e682d02eSNavdeep Parhar len = roundup(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); 723*e682d02eSNavdeep Parhar 724*e682d02eSNavdeep Parhar wr = alloc_wrqe(len, toep->ctrlq); 725*e682d02eSNavdeep Parhar if (wr == NULL) 726*e682d02eSNavdeep Parhar return (ENOMEM); /* ok to just bail out */ 727*e682d02eSNavdeep Parhar ulpmc = wrtod(wr); 728*e682d02eSNavdeep Parhar 729*e682d02eSNavdeep Parhar INIT_ULPTX_WR(ulpmc, len, 0, 0); 730*e682d02eSNavdeep Parhar ulpmc->cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE) | 731*e682d02eSNavdeep Parhar F_ULP_MEMIO_ORDER); 732*e682d02eSNavdeep Parhar ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); 733*e682d02eSNavdeep Parhar ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); 734*e682d02eSNavdeep Parhar ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); 735*e682d02eSNavdeep Parhar 736*e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(ulpmc + 1); 737*e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); 738*e682d02eSNavdeep Parhar ulpsc->len = htobe32(chunk); 739*e682d02eSNavdeep Parhar 740*e682d02eSNavdeep Parhar ppod = (struct pagepod *)(ulpsc + 1); 741*e682d02eSNavdeep Parhar for (j = 0; j < n; i++, j++, ppod++) { 742*e682d02eSNavdeep Parhar ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | 743*e682d02eSNavdeep Parhar V_PPOD_TID(toep->tid) | db->tag); 744*e682d02eSNavdeep Parhar ppod->len_offset = htobe64(V_PPOD_LEN(db->len) | 745*e682d02eSNavdeep Parhar V_PPOD_OFST(db->offset)); 746*e682d02eSNavdeep Parhar ppod->rsvd = 0; 747*e682d02eSNavdeep Parhar idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE); 748*e682d02eSNavdeep Parhar for (k = 0; k < ARRAY_SIZE(ppod->addr); k++) { 749*e682d02eSNavdeep Parhar if (idx < db->npages) { 750*e682d02eSNavdeep Parhar ppod->addr[k] = 751*e682d02eSNavdeep Parhar htobe64(db->pages[idx]->phys_addr); 752*e682d02eSNavdeep Parhar idx += ddp_pgsz / PAGE_SIZE; 753*e682d02eSNavdeep Parhar } else 754*e682d02eSNavdeep Parhar ppod->addr[k] = 0; 755*e682d02eSNavdeep Parhar #if 0 756*e682d02eSNavdeep Parhar CTR5(KTR_CXGBE, 757*e682d02eSNavdeep Parhar "%s: tid %d ppod[%d]->addr[%d] = %p", 758*e682d02eSNavdeep Parhar __func__, toep->tid, i, k, 759*e682d02eSNavdeep Parhar htobe64(ppod->addr[k])); 760*e682d02eSNavdeep Parhar #endif 761*e682d02eSNavdeep Parhar } 762*e682d02eSNavdeep Parhar 763*e682d02eSNavdeep Parhar } 764*e682d02eSNavdeep Parhar 765*e682d02eSNavdeep Parhar t4_wrq_tx(sc, wr); 766*e682d02eSNavdeep Parhar } 767*e682d02eSNavdeep Parhar 768*e682d02eSNavdeep Parhar return (0); 769*e682d02eSNavdeep Parhar } 770*e682d02eSNavdeep Parhar 771*e682d02eSNavdeep Parhar /* 772*e682d02eSNavdeep Parhar * Reuse, or allocate (and program the page pods for) a new DDP buffer. 773*e682d02eSNavdeep Parhar */ 774*e682d02eSNavdeep Parhar static int 775*e682d02eSNavdeep Parhar select_ddp_buffer(struct adapter *sc, struct toepcb *toep, vm_page_t *pages, 776*e682d02eSNavdeep Parhar int npages, int db_off, int db_len) 777*e682d02eSNavdeep Parhar { 778*e682d02eSNavdeep Parhar struct ddp_buffer *db; 779*e682d02eSNavdeep Parhar struct tom_data *td = sc->tom_softc; 780*e682d02eSNavdeep Parhar int i, empty_slot = -1; 781*e682d02eSNavdeep Parhar 782*e682d02eSNavdeep Parhar /* Try to reuse */ 783*e682d02eSNavdeep Parhar for (i = 0; i < ARRAY_SIZE(toep->db); i++) { 784*e682d02eSNavdeep Parhar if (bufcmp(toep->db[i], pages, npages, db_off, db_len) == 0) { 785*e682d02eSNavdeep Parhar free(pages, M_CXGBE); 786*e682d02eSNavdeep Parhar return (i); /* pages still held */ 787*e682d02eSNavdeep Parhar } else if (toep->db[i] == NULL && empty_slot < 0) 788*e682d02eSNavdeep Parhar empty_slot = i; 789*e682d02eSNavdeep Parhar } 790*e682d02eSNavdeep Parhar 791*e682d02eSNavdeep Parhar /* Allocate new buffer, write its page pods. */ 792*e682d02eSNavdeep Parhar db = alloc_ddp_buffer(td, pages, npages, db_off, db_len); 793*e682d02eSNavdeep Parhar if (db == NULL) { 794*e682d02eSNavdeep Parhar vm_page_unhold_pages(pages, npages); 795*e682d02eSNavdeep Parhar free(pages, M_CXGBE); 796*e682d02eSNavdeep Parhar return (-1); 797*e682d02eSNavdeep Parhar } 798*e682d02eSNavdeep Parhar if (write_page_pods(sc, toep, db) != 0) { 799*e682d02eSNavdeep Parhar vm_page_unhold_pages(pages, npages); 800*e682d02eSNavdeep Parhar free_ddp_buffer(td, db); 801*e682d02eSNavdeep Parhar return (-1); 802*e682d02eSNavdeep Parhar } 803*e682d02eSNavdeep Parhar 804*e682d02eSNavdeep Parhar i = empty_slot; 805*e682d02eSNavdeep Parhar if (i < 0) { 806*e682d02eSNavdeep Parhar i = arc4random() % ARRAY_SIZE(toep->db); 807*e682d02eSNavdeep Parhar free_ddp_buffer(td, toep->db[i]); 808*e682d02eSNavdeep Parhar } 809*e682d02eSNavdeep Parhar toep->db[i] = db; 810*e682d02eSNavdeep Parhar 811*e682d02eSNavdeep Parhar CTR5(KTR_CXGBE, "%s: tid %d, DDP buffer[%d] = %p (tag 0x%x)", 812*e682d02eSNavdeep Parhar __func__, toep->tid, i, db, db->tag); 813*e682d02eSNavdeep Parhar 814*e682d02eSNavdeep Parhar return (i); 815*e682d02eSNavdeep Parhar } 816*e682d02eSNavdeep Parhar 817*e682d02eSNavdeep Parhar static void 818*e682d02eSNavdeep Parhar wire_ddp_buffer(struct ddp_buffer *db) 819*e682d02eSNavdeep Parhar { 820*e682d02eSNavdeep Parhar int i; 821*e682d02eSNavdeep Parhar vm_page_t p; 822*e682d02eSNavdeep Parhar 823*e682d02eSNavdeep Parhar for (i = 0; i < db->npages; i++) { 824*e682d02eSNavdeep Parhar p = db->pages[i]; 825*e682d02eSNavdeep Parhar vm_page_lock(p); 826*e682d02eSNavdeep Parhar vm_page_wire(p); 827*e682d02eSNavdeep Parhar vm_page_unhold(p); 828*e682d02eSNavdeep Parhar vm_page_unlock(p); 829*e682d02eSNavdeep Parhar } 830*e682d02eSNavdeep Parhar } 831*e682d02eSNavdeep Parhar 832*e682d02eSNavdeep Parhar static void 833*e682d02eSNavdeep Parhar unwire_ddp_buffer(struct ddp_buffer *db) 834*e682d02eSNavdeep Parhar { 835*e682d02eSNavdeep Parhar int i; 836*e682d02eSNavdeep Parhar vm_page_t p; 837*e682d02eSNavdeep Parhar 838*e682d02eSNavdeep Parhar for (i = 0; i < db->npages; i++) { 839*e682d02eSNavdeep Parhar p = db->pages[i]; 840*e682d02eSNavdeep Parhar vm_page_lock(p); 841*e682d02eSNavdeep Parhar vm_page_unwire(p, 0); 842*e682d02eSNavdeep Parhar vm_page_unlock(p); 843*e682d02eSNavdeep Parhar } 844*e682d02eSNavdeep Parhar } 845*e682d02eSNavdeep Parhar 846*e682d02eSNavdeep Parhar static inline void 847*e682d02eSNavdeep Parhar unhold_ddp_buffer(struct ddp_buffer *db) 848*e682d02eSNavdeep Parhar { 849*e682d02eSNavdeep Parhar 850*e682d02eSNavdeep Parhar vm_page_unhold_pages(db->pages, db->npages); 851*e682d02eSNavdeep Parhar } 852*e682d02eSNavdeep Parhar 853*e682d02eSNavdeep Parhar static int 854*e682d02eSNavdeep Parhar handle_ddp(struct socket *so, struct uio *uio, int flags, int error) 855*e682d02eSNavdeep Parhar { 856*e682d02eSNavdeep Parhar struct sockbuf *sb = &so->so_rcv; 857*e682d02eSNavdeep Parhar struct tcpcb *tp = so_sototcpcb(so); 858*e682d02eSNavdeep Parhar struct toepcb *toep = tp->t_toe; 859*e682d02eSNavdeep Parhar struct adapter *sc = td_adapter(toep->td); 860*e682d02eSNavdeep Parhar vm_page_t *pages; 861*e682d02eSNavdeep Parhar int npages, db_idx, rc, buf_flag; 862*e682d02eSNavdeep Parhar struct ddp_buffer *db; 863*e682d02eSNavdeep Parhar struct wrqe *wr; 864*e682d02eSNavdeep Parhar uint64_t ddp_flags; 865*e682d02eSNavdeep Parhar 866*e682d02eSNavdeep Parhar SOCKBUF_LOCK_ASSERT(sb); 867*e682d02eSNavdeep Parhar 868*e682d02eSNavdeep Parhar #if 0 869*e682d02eSNavdeep Parhar if (sb->sb_cc + sc->tt.ddp_thres > uio->uio_resid) { 870*e682d02eSNavdeep Parhar CTR4(KTR_CXGBE, "%s: sb_cc %d, threshold %d, resid %d", 871*e682d02eSNavdeep Parhar __func__, sb->sb_cc, sc->tt.ddp_thres, uio->uio_resid); 872*e682d02eSNavdeep Parhar } 873*e682d02eSNavdeep Parhar #endif 874*e682d02eSNavdeep Parhar 875*e682d02eSNavdeep Parhar /* XXX: too eager to disable DDP, could handle NBIO better than this. */ 876*e682d02eSNavdeep Parhar if (sb->sb_cc >= uio->uio_resid || uio->uio_resid < sc->tt.ddp_thres || 877*e682d02eSNavdeep Parhar uio->uio_resid > MAX_DDP_BUFFER_SIZE || uio->uio_iovcnt > 1 || 878*e682d02eSNavdeep Parhar so->so_state & SS_NBIO || flags & (MSG_DONTWAIT | MSG_NBIO) || 879*e682d02eSNavdeep Parhar error || so->so_error || sb->sb_state & SBS_CANTRCVMORE) 880*e682d02eSNavdeep Parhar goto no_ddp; 881*e682d02eSNavdeep Parhar 882*e682d02eSNavdeep Parhar /* 883*e682d02eSNavdeep Parhar * Fault in and then hold the pages of the uio buffers. We'll wire them 884*e682d02eSNavdeep Parhar * a bit later if everything else works out. 885*e682d02eSNavdeep Parhar */ 886*e682d02eSNavdeep Parhar if (hold_uio(uio, &pages, &npages) != 0) 887*e682d02eSNavdeep Parhar goto no_ddp; 888*e682d02eSNavdeep Parhar 889*e682d02eSNavdeep Parhar /* 890*e682d02eSNavdeep Parhar * Figure out which one of the two DDP buffers to use this time. 891*e682d02eSNavdeep Parhar */ 892*e682d02eSNavdeep Parhar db_idx = select_ddp_buffer(sc, toep, pages, npages, 893*e682d02eSNavdeep Parhar (uintptr_t)uio->uio_iov->iov_base & PAGE_MASK, uio->uio_resid); 894*e682d02eSNavdeep Parhar pages = NULL; /* pages either in use elsewhere or unheld + freed */ 895*e682d02eSNavdeep Parhar if (db_idx < 0) 896*e682d02eSNavdeep Parhar goto no_ddp; 897*e682d02eSNavdeep Parhar db = toep->db[db_idx]; 898*e682d02eSNavdeep Parhar buf_flag = db_idx == 0 ? DDP_BUF0_ACTIVE : DDP_BUF1_ACTIVE; 899*e682d02eSNavdeep Parhar 900*e682d02eSNavdeep Parhar /* 901*e682d02eSNavdeep Parhar * Build the compound work request that tells the chip where to DMA the 902*e682d02eSNavdeep Parhar * payload. 903*e682d02eSNavdeep Parhar */ 904*e682d02eSNavdeep Parhar ddp_flags = select_ddp_flags(so, flags, db_idx); 905*e682d02eSNavdeep Parhar wr = mk_update_tcb_for_ddp(sc, toep, db_idx, sb->sb_cc, ddp_flags); 906*e682d02eSNavdeep Parhar if (wr == NULL) { 907*e682d02eSNavdeep Parhar unhold_ddp_buffer(db); 908*e682d02eSNavdeep Parhar goto no_ddp; 909*e682d02eSNavdeep Parhar } 910*e682d02eSNavdeep Parhar 911*e682d02eSNavdeep Parhar /* Wire the pages and give the chip the go-ahead. */ 912*e682d02eSNavdeep Parhar wire_ddp_buffer(db); 913*e682d02eSNavdeep Parhar t4_wrq_tx(sc, wr); 914*e682d02eSNavdeep Parhar sb->sb_flags &= ~SB_DDP_INDICATE; 915*e682d02eSNavdeep Parhar toep->ddp_flags |= buf_flag; 916*e682d02eSNavdeep Parhar 917*e682d02eSNavdeep Parhar /* 918*e682d02eSNavdeep Parhar * Wait for the DDP operation to complete and then unwire the pages. 919*e682d02eSNavdeep Parhar * The return code from the sbwait will be the final return code of this 920*e682d02eSNavdeep Parhar * function. But we do need to wait for DDP no matter what. 921*e682d02eSNavdeep Parhar */ 922*e682d02eSNavdeep Parhar rc = sbwait(sb); 923*e682d02eSNavdeep Parhar while (toep->ddp_flags & buf_flag) { 924*e682d02eSNavdeep Parhar sb->sb_flags |= SB_WAIT; 925*e682d02eSNavdeep Parhar msleep(&sb->sb_cc, &sb->sb_mtx, PSOCK , "sbwait", 0); 926*e682d02eSNavdeep Parhar } 927*e682d02eSNavdeep Parhar unwire_ddp_buffer(db); 928*e682d02eSNavdeep Parhar return (rc); 929*e682d02eSNavdeep Parhar no_ddp: 930*e682d02eSNavdeep Parhar disable_ddp(sc, toep); 931*e682d02eSNavdeep Parhar discourage_ddp(toep); 932*e682d02eSNavdeep Parhar sb->sb_flags &= ~SB_DDP_INDICATE; 933*e682d02eSNavdeep Parhar return (0); 934*e682d02eSNavdeep Parhar } 935*e682d02eSNavdeep Parhar 936*e682d02eSNavdeep Parhar void 937*e682d02eSNavdeep Parhar t4_init_ddp(struct adapter *sc, struct tom_data *td) 938*e682d02eSNavdeep Parhar { 939*e682d02eSNavdeep Parhar int nppods = sc->vres.ddp.size / PPOD_SIZE; 940*e682d02eSNavdeep Parhar 941*e682d02eSNavdeep Parhar td->nppods = nppods; 942*e682d02eSNavdeep Parhar td->nppods_free = nppods; 943*e682d02eSNavdeep Parhar td->nppods_free_head = nppods; 944*e682d02eSNavdeep Parhar TAILQ_INIT(&td->ppods); 945*e682d02eSNavdeep Parhar mtx_init(&td->ppod_lock, "page pods", NULL, MTX_DEF); 946*e682d02eSNavdeep Parhar 947*e682d02eSNavdeep Parhar t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, do_rx_data_ddp); 948*e682d02eSNavdeep Parhar t4_register_cpl_handler(sc, CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 949*e682d02eSNavdeep Parhar } 950*e682d02eSNavdeep Parhar 951*e682d02eSNavdeep Parhar void 952*e682d02eSNavdeep Parhar t4_uninit_ddp(struct adapter *sc __unused, struct tom_data *td) 953*e682d02eSNavdeep Parhar { 954*e682d02eSNavdeep Parhar 955*e682d02eSNavdeep Parhar KASSERT(td->nppods == td->nppods_free, 956*e682d02eSNavdeep Parhar ("%s: page pods still in use, nppods = %d, free = %d", 957*e682d02eSNavdeep Parhar __func__, td->nppods, td->nppods_free)); 958*e682d02eSNavdeep Parhar 959*e682d02eSNavdeep Parhar if (mtx_initialized(&td->ppod_lock)) 960*e682d02eSNavdeep Parhar mtx_destroy(&td->ppod_lock); 961*e682d02eSNavdeep Parhar } 962*e682d02eSNavdeep Parhar 963*e682d02eSNavdeep Parhar #define VNET_SO_ASSERT(so) \ 964*e682d02eSNavdeep Parhar VNET_ASSERT(curvnet != NULL, \ 965*e682d02eSNavdeep Parhar ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); 966*e682d02eSNavdeep Parhar #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) 967*e682d02eSNavdeep Parhar static int 968*e682d02eSNavdeep Parhar soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) 969*e682d02eSNavdeep Parhar { 970*e682d02eSNavdeep Parhar 971*e682d02eSNavdeep Parhar CXGBE_UNIMPLEMENTED(__func__); 972*e682d02eSNavdeep Parhar } 973*e682d02eSNavdeep Parhar 974*e682d02eSNavdeep Parhar /* 975*e682d02eSNavdeep Parhar * Copy an mbuf chain into a uio limited by len if set. 976*e682d02eSNavdeep Parhar */ 977*e682d02eSNavdeep Parhar static int 978*e682d02eSNavdeep Parhar m_mbuftouio_ddp(struct uio *uio, struct mbuf *m, int len) 979*e682d02eSNavdeep Parhar { 980*e682d02eSNavdeep Parhar int error, length, total; 981*e682d02eSNavdeep Parhar int progress = 0; 982*e682d02eSNavdeep Parhar 983*e682d02eSNavdeep Parhar if (len > 0) 984*e682d02eSNavdeep Parhar total = min(uio->uio_resid, len); 985*e682d02eSNavdeep Parhar else 986*e682d02eSNavdeep Parhar total = uio->uio_resid; 987*e682d02eSNavdeep Parhar 988*e682d02eSNavdeep Parhar /* Fill the uio with data from the mbufs. */ 989*e682d02eSNavdeep Parhar for (; m != NULL; m = m->m_next) { 990*e682d02eSNavdeep Parhar length = min(m->m_len, total - progress); 991*e682d02eSNavdeep Parhar 992*e682d02eSNavdeep Parhar if (m->m_flags & M_DDP) { 993*e682d02eSNavdeep Parhar enum uio_seg segflag = uio->uio_segflg; 994*e682d02eSNavdeep Parhar 995*e682d02eSNavdeep Parhar uio->uio_segflg = UIO_NOCOPY; 996*e682d02eSNavdeep Parhar error = uiomove(mtod(m, void *), length, uio); 997*e682d02eSNavdeep Parhar uio->uio_segflg = segflag; 998*e682d02eSNavdeep Parhar } else 999*e682d02eSNavdeep Parhar error = uiomove(mtod(m, void *), length, uio); 1000*e682d02eSNavdeep Parhar if (error) 1001*e682d02eSNavdeep Parhar return (error); 1002*e682d02eSNavdeep Parhar 1003*e682d02eSNavdeep Parhar progress += length; 1004*e682d02eSNavdeep Parhar } 1005*e682d02eSNavdeep Parhar 1006*e682d02eSNavdeep Parhar return (0); 1007*e682d02eSNavdeep Parhar } 1008*e682d02eSNavdeep Parhar 1009*e682d02eSNavdeep Parhar /* 1010*e682d02eSNavdeep Parhar * Based on soreceive_stream() in uipc_socket.c 1011*e682d02eSNavdeep Parhar */ 1012*e682d02eSNavdeep Parhar int 1013*e682d02eSNavdeep Parhar t4_soreceive_ddp(struct socket *so, struct sockaddr **psa, struct uio *uio, 1014*e682d02eSNavdeep Parhar struct mbuf **mp0, struct mbuf **controlp, int *flagsp) 1015*e682d02eSNavdeep Parhar { 1016*e682d02eSNavdeep Parhar int len = 0, error = 0, flags, oresid, ddp_handled = 0; 1017*e682d02eSNavdeep Parhar struct sockbuf *sb; 1018*e682d02eSNavdeep Parhar struct mbuf *m, *n = NULL; 1019*e682d02eSNavdeep Parhar 1020*e682d02eSNavdeep Parhar /* We only do stream sockets. */ 1021*e682d02eSNavdeep Parhar if (so->so_type != SOCK_STREAM) 1022*e682d02eSNavdeep Parhar return (EINVAL); 1023*e682d02eSNavdeep Parhar if (psa != NULL) 1024*e682d02eSNavdeep Parhar *psa = NULL; 1025*e682d02eSNavdeep Parhar if (controlp != NULL) 1026*e682d02eSNavdeep Parhar return (EINVAL); 1027*e682d02eSNavdeep Parhar if (flagsp != NULL) 1028*e682d02eSNavdeep Parhar flags = *flagsp &~ MSG_EOR; 1029*e682d02eSNavdeep Parhar else 1030*e682d02eSNavdeep Parhar flags = 0; 1031*e682d02eSNavdeep Parhar if (flags & MSG_OOB) 1032*e682d02eSNavdeep Parhar return (soreceive_rcvoob(so, uio, flags)); 1033*e682d02eSNavdeep Parhar if (mp0 != NULL) 1034*e682d02eSNavdeep Parhar *mp0 = NULL; 1035*e682d02eSNavdeep Parhar 1036*e682d02eSNavdeep Parhar sb = &so->so_rcv; 1037*e682d02eSNavdeep Parhar 1038*e682d02eSNavdeep Parhar /* Prevent other readers from entering the socket. */ 1039*e682d02eSNavdeep Parhar error = sblock(sb, SBLOCKWAIT(flags)); 1040*e682d02eSNavdeep Parhar if (error) 1041*e682d02eSNavdeep Parhar goto out; 1042*e682d02eSNavdeep Parhar SOCKBUF_LOCK(sb); 1043*e682d02eSNavdeep Parhar 1044*e682d02eSNavdeep Parhar /* Easy one, no space to copyout anything. */ 1045*e682d02eSNavdeep Parhar if (uio->uio_resid == 0) { 1046*e682d02eSNavdeep Parhar error = EINVAL; 1047*e682d02eSNavdeep Parhar goto out; 1048*e682d02eSNavdeep Parhar } 1049*e682d02eSNavdeep Parhar oresid = uio->uio_resid; 1050*e682d02eSNavdeep Parhar 1051*e682d02eSNavdeep Parhar /* We will never ever get anything unless we are or were connected. */ 1052*e682d02eSNavdeep Parhar if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { 1053*e682d02eSNavdeep Parhar error = ENOTCONN; 1054*e682d02eSNavdeep Parhar goto out; 1055*e682d02eSNavdeep Parhar } 1056*e682d02eSNavdeep Parhar 1057*e682d02eSNavdeep Parhar restart: 1058*e682d02eSNavdeep Parhar SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1059*e682d02eSNavdeep Parhar 1060*e682d02eSNavdeep Parhar if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) { 1061*e682d02eSNavdeep Parhar 1062*e682d02eSNavdeep Parhar /* uio should be just as it was at entry */ 1063*e682d02eSNavdeep Parhar KASSERT(oresid == uio->uio_resid, 1064*e682d02eSNavdeep Parhar ("%s: oresid = %d, uio_resid = %zd, sb_cc = %d", 1065*e682d02eSNavdeep Parhar __func__, oresid, uio->uio_resid, sb->sb_cc)); 1066*e682d02eSNavdeep Parhar 1067*e682d02eSNavdeep Parhar error = handle_ddp(so, uio, flags, 0); 1068*e682d02eSNavdeep Parhar ddp_handled = 1; 1069*e682d02eSNavdeep Parhar if (error) 1070*e682d02eSNavdeep Parhar goto out; 1071*e682d02eSNavdeep Parhar } 1072*e682d02eSNavdeep Parhar 1073*e682d02eSNavdeep Parhar /* Abort if socket has reported problems. */ 1074*e682d02eSNavdeep Parhar if (so->so_error) { 1075*e682d02eSNavdeep Parhar if (sb->sb_cc > 0) 1076*e682d02eSNavdeep Parhar goto deliver; 1077*e682d02eSNavdeep Parhar if (oresid > uio->uio_resid) 1078*e682d02eSNavdeep Parhar goto out; 1079*e682d02eSNavdeep Parhar error = so->so_error; 1080*e682d02eSNavdeep Parhar if (!(flags & MSG_PEEK)) 1081*e682d02eSNavdeep Parhar so->so_error = 0; 1082*e682d02eSNavdeep Parhar goto out; 1083*e682d02eSNavdeep Parhar } 1084*e682d02eSNavdeep Parhar 1085*e682d02eSNavdeep Parhar /* Door is closed. Deliver what is left, if any. */ 1086*e682d02eSNavdeep Parhar if (sb->sb_state & SBS_CANTRCVMORE) { 1087*e682d02eSNavdeep Parhar if (sb->sb_cc > 0) 1088*e682d02eSNavdeep Parhar goto deliver; 1089*e682d02eSNavdeep Parhar else 1090*e682d02eSNavdeep Parhar goto out; 1091*e682d02eSNavdeep Parhar } 1092*e682d02eSNavdeep Parhar 1093*e682d02eSNavdeep Parhar /* Socket buffer is empty and we shall not block. */ 1094*e682d02eSNavdeep Parhar if (sb->sb_cc == 0 && 1095*e682d02eSNavdeep Parhar ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { 1096*e682d02eSNavdeep Parhar error = EAGAIN; 1097*e682d02eSNavdeep Parhar goto out; 1098*e682d02eSNavdeep Parhar } 1099*e682d02eSNavdeep Parhar 1100*e682d02eSNavdeep Parhar /* Socket buffer got some data that we shall deliver now. */ 1101*e682d02eSNavdeep Parhar if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) && 1102*e682d02eSNavdeep Parhar ((sb->sb_flags & SS_NBIO) || 1103*e682d02eSNavdeep Parhar (flags & (MSG_DONTWAIT|MSG_NBIO)) || 1104*e682d02eSNavdeep Parhar sb->sb_cc >= sb->sb_lowat || 1105*e682d02eSNavdeep Parhar sb->sb_cc >= uio->uio_resid || 1106*e682d02eSNavdeep Parhar sb->sb_cc >= sb->sb_hiwat) ) { 1107*e682d02eSNavdeep Parhar goto deliver; 1108*e682d02eSNavdeep Parhar } 1109*e682d02eSNavdeep Parhar 1110*e682d02eSNavdeep Parhar /* On MSG_WAITALL we must wait until all data or error arrives. */ 1111*e682d02eSNavdeep Parhar if ((flags & MSG_WAITALL) && 1112*e682d02eSNavdeep Parhar (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat)) 1113*e682d02eSNavdeep Parhar goto deliver; 1114*e682d02eSNavdeep Parhar 1115*e682d02eSNavdeep Parhar /* 1116*e682d02eSNavdeep Parhar * Wait and block until (more) data comes in. 1117*e682d02eSNavdeep Parhar * NB: Drops the sockbuf lock during wait. 1118*e682d02eSNavdeep Parhar */ 1119*e682d02eSNavdeep Parhar error = sbwait(sb); 1120*e682d02eSNavdeep Parhar if (error) { 1121*e682d02eSNavdeep Parhar if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) { 1122*e682d02eSNavdeep Parhar (void) handle_ddp(so, uio, flags, 1); 1123*e682d02eSNavdeep Parhar ddp_handled = 1; 1124*e682d02eSNavdeep Parhar } 1125*e682d02eSNavdeep Parhar goto out; 1126*e682d02eSNavdeep Parhar } 1127*e682d02eSNavdeep Parhar goto restart; 1128*e682d02eSNavdeep Parhar 1129*e682d02eSNavdeep Parhar deliver: 1130*e682d02eSNavdeep Parhar SOCKBUF_LOCK_ASSERT(&so->so_rcv); 1131*e682d02eSNavdeep Parhar KASSERT(sb->sb_cc > 0, ("%s: sockbuf empty", __func__)); 1132*e682d02eSNavdeep Parhar KASSERT(sb->sb_mb != NULL, ("%s: sb_mb == NULL", __func__)); 1133*e682d02eSNavdeep Parhar 1134*e682d02eSNavdeep Parhar if (sb->sb_flags & SB_DDP_INDICATE && !ddp_handled) 1135*e682d02eSNavdeep Parhar goto restart; 1136*e682d02eSNavdeep Parhar 1137*e682d02eSNavdeep Parhar /* Statistics. */ 1138*e682d02eSNavdeep Parhar if (uio->uio_td) 1139*e682d02eSNavdeep Parhar uio->uio_td->td_ru.ru_msgrcv++; 1140*e682d02eSNavdeep Parhar 1141*e682d02eSNavdeep Parhar /* Fill uio until full or current end of socket buffer is reached. */ 1142*e682d02eSNavdeep Parhar len = min(uio->uio_resid, sb->sb_cc); 1143*e682d02eSNavdeep Parhar if (mp0 != NULL) { 1144*e682d02eSNavdeep Parhar /* Dequeue as many mbufs as possible. */ 1145*e682d02eSNavdeep Parhar if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { 1146*e682d02eSNavdeep Parhar for (*mp0 = m = sb->sb_mb; 1147*e682d02eSNavdeep Parhar m != NULL && m->m_len <= len; 1148*e682d02eSNavdeep Parhar m = m->m_next) { 1149*e682d02eSNavdeep Parhar len -= m->m_len; 1150*e682d02eSNavdeep Parhar uio->uio_resid -= m->m_len; 1151*e682d02eSNavdeep Parhar sbfree(sb, m); 1152*e682d02eSNavdeep Parhar n = m; 1153*e682d02eSNavdeep Parhar } 1154*e682d02eSNavdeep Parhar sb->sb_mb = m; 1155*e682d02eSNavdeep Parhar if (sb->sb_mb == NULL) 1156*e682d02eSNavdeep Parhar SB_EMPTY_FIXUP(sb); 1157*e682d02eSNavdeep Parhar n->m_next = NULL; 1158*e682d02eSNavdeep Parhar } 1159*e682d02eSNavdeep Parhar /* Copy the remainder. */ 1160*e682d02eSNavdeep Parhar if (len > 0) { 1161*e682d02eSNavdeep Parhar KASSERT(sb->sb_mb != NULL, 1162*e682d02eSNavdeep Parhar ("%s: len > 0 && sb->sb_mb empty", __func__)); 1163*e682d02eSNavdeep Parhar 1164*e682d02eSNavdeep Parhar m = m_copym(sb->sb_mb, 0, len, M_DONTWAIT); 1165*e682d02eSNavdeep Parhar if (m == NULL) 1166*e682d02eSNavdeep Parhar len = 0; /* Don't flush data from sockbuf. */ 1167*e682d02eSNavdeep Parhar else 1168*e682d02eSNavdeep Parhar uio->uio_resid -= m->m_len; 1169*e682d02eSNavdeep Parhar if (*mp0 != NULL) 1170*e682d02eSNavdeep Parhar n->m_next = m; 1171*e682d02eSNavdeep Parhar else 1172*e682d02eSNavdeep Parhar *mp0 = m; 1173*e682d02eSNavdeep Parhar if (*mp0 == NULL) { 1174*e682d02eSNavdeep Parhar error = ENOBUFS; 1175*e682d02eSNavdeep Parhar goto out; 1176*e682d02eSNavdeep Parhar } 1177*e682d02eSNavdeep Parhar } 1178*e682d02eSNavdeep Parhar } else { 1179*e682d02eSNavdeep Parhar /* NB: Must unlock socket buffer as uiomove may sleep. */ 1180*e682d02eSNavdeep Parhar SOCKBUF_UNLOCK(sb); 1181*e682d02eSNavdeep Parhar error = m_mbuftouio_ddp(uio, sb->sb_mb, len); 1182*e682d02eSNavdeep Parhar SOCKBUF_LOCK(sb); 1183*e682d02eSNavdeep Parhar if (error) 1184*e682d02eSNavdeep Parhar goto out; 1185*e682d02eSNavdeep Parhar } 1186*e682d02eSNavdeep Parhar SBLASTRECORDCHK(sb); 1187*e682d02eSNavdeep Parhar SBLASTMBUFCHK(sb); 1188*e682d02eSNavdeep Parhar 1189*e682d02eSNavdeep Parhar /* 1190*e682d02eSNavdeep Parhar * Remove the delivered data from the socket buffer unless we 1191*e682d02eSNavdeep Parhar * were only peeking. 1192*e682d02eSNavdeep Parhar */ 1193*e682d02eSNavdeep Parhar if (!(flags & MSG_PEEK)) { 1194*e682d02eSNavdeep Parhar if (len > 0) 1195*e682d02eSNavdeep Parhar sbdrop_locked(sb, len); 1196*e682d02eSNavdeep Parhar 1197*e682d02eSNavdeep Parhar /* Notify protocol that we drained some data. */ 1198*e682d02eSNavdeep Parhar if ((so->so_proto->pr_flags & PR_WANTRCVD) && 1199*e682d02eSNavdeep Parhar (((flags & MSG_WAITALL) && uio->uio_resid > 0) || 1200*e682d02eSNavdeep Parhar !(flags & MSG_SOCALLBCK))) { 1201*e682d02eSNavdeep Parhar SOCKBUF_UNLOCK(sb); 1202*e682d02eSNavdeep Parhar VNET_SO_ASSERT(so); 1203*e682d02eSNavdeep Parhar (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); 1204*e682d02eSNavdeep Parhar SOCKBUF_LOCK(sb); 1205*e682d02eSNavdeep Parhar } 1206*e682d02eSNavdeep Parhar } 1207*e682d02eSNavdeep Parhar 1208*e682d02eSNavdeep Parhar /* 1209*e682d02eSNavdeep Parhar * For MSG_WAITALL we may have to loop again and wait for 1210*e682d02eSNavdeep Parhar * more data to come in. 1211*e682d02eSNavdeep Parhar */ 1212*e682d02eSNavdeep Parhar if ((flags & MSG_WAITALL) && uio->uio_resid > 0) 1213*e682d02eSNavdeep Parhar goto restart; 1214*e682d02eSNavdeep Parhar out: 1215*e682d02eSNavdeep Parhar SOCKBUF_LOCK_ASSERT(sb); 1216*e682d02eSNavdeep Parhar SBLASTRECORDCHK(sb); 1217*e682d02eSNavdeep Parhar SBLASTMBUFCHK(sb); 1218*e682d02eSNavdeep Parhar SOCKBUF_UNLOCK(sb); 1219*e682d02eSNavdeep Parhar sbunlock(sb); 1220*e682d02eSNavdeep Parhar return (error); 1221*e682d02eSNavdeep Parhar } 1222*e682d02eSNavdeep Parhar 1223*e682d02eSNavdeep Parhar #endif 1224