1e682d02eSNavdeep Parhar /*- 2e682d02eSNavdeep Parhar * Copyright (c) 2012 Chelsio Communications, Inc. 3e682d02eSNavdeep Parhar * All rights reserved. 4e682d02eSNavdeep Parhar * Written by: Navdeep Parhar <np@FreeBSD.org> 5e682d02eSNavdeep Parhar * 6e682d02eSNavdeep Parhar * Redistribution and use in source and binary forms, with or without 7e682d02eSNavdeep Parhar * modification, are permitted provided that the following conditions 8e682d02eSNavdeep Parhar * are met: 9e682d02eSNavdeep Parhar * 1. Redistributions of source code must retain the above copyright 10e682d02eSNavdeep Parhar * notice, this list of conditions and the following disclaimer. 11e682d02eSNavdeep Parhar * 2. Redistributions in binary form must reproduce the above copyright 12e682d02eSNavdeep Parhar * notice, this list of conditions and the following disclaimer in the 13e682d02eSNavdeep Parhar * documentation and/or other materials provided with the distribution. 14e682d02eSNavdeep Parhar * 15e682d02eSNavdeep Parhar * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16e682d02eSNavdeep Parhar * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17e682d02eSNavdeep Parhar * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18e682d02eSNavdeep Parhar * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19e682d02eSNavdeep Parhar * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20e682d02eSNavdeep Parhar * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21e682d02eSNavdeep Parhar * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22e682d02eSNavdeep Parhar * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23e682d02eSNavdeep Parhar * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24e682d02eSNavdeep Parhar * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25e682d02eSNavdeep Parhar * SUCH DAMAGE. 26e682d02eSNavdeep Parhar */ 27e682d02eSNavdeep Parhar 28e682d02eSNavdeep Parhar #include <sys/cdefs.h> 29e682d02eSNavdeep Parhar __FBSDID("$FreeBSD$"); 30e682d02eSNavdeep Parhar 31e682d02eSNavdeep Parhar #include "opt_inet.h" 32e682d02eSNavdeep Parhar 33e682d02eSNavdeep Parhar #include <sys/param.h> 34dc964385SJohn Baldwin #include <sys/aio.h> 35dc964385SJohn Baldwin #include <sys/file.h> 36e682d02eSNavdeep Parhar #include <sys/systm.h> 37e682d02eSNavdeep Parhar #include <sys/kernel.h> 38e682d02eSNavdeep Parhar #include <sys/ktr.h> 39e682d02eSNavdeep Parhar #include <sys/module.h> 40e682d02eSNavdeep Parhar #include <sys/protosw.h> 41e682d02eSNavdeep Parhar #include <sys/proc.h> 42e682d02eSNavdeep Parhar #include <sys/domain.h> 43e682d02eSNavdeep Parhar #include <sys/socket.h> 44e682d02eSNavdeep Parhar #include <sys/socketvar.h> 45dc964385SJohn Baldwin #include <sys/taskqueue.h> 46e682d02eSNavdeep Parhar #include <sys/uio.h> 47e682d02eSNavdeep Parhar #include <netinet/in.h> 48e682d02eSNavdeep Parhar #include <netinet/in_pcb.h> 49e682d02eSNavdeep Parhar #include <netinet/ip.h> 50e682d02eSNavdeep Parhar #include <netinet/tcp_var.h> 51e682d02eSNavdeep Parhar #define TCPSTATES 52e682d02eSNavdeep Parhar #include <netinet/tcp_fsm.h> 53e682d02eSNavdeep Parhar #include <netinet/toecore.h> 54e682d02eSNavdeep Parhar 55e682d02eSNavdeep Parhar #include <vm/vm.h> 56e682d02eSNavdeep Parhar #include <vm/vm_extern.h> 57e682d02eSNavdeep Parhar #include <vm/vm_param.h> 58e682d02eSNavdeep Parhar #include <vm/pmap.h> 59e682d02eSNavdeep Parhar #include <vm/vm_map.h> 60e682d02eSNavdeep Parhar #include <vm/vm_page.h> 61e682d02eSNavdeep Parhar #include <vm/vm_object.h> 62e682d02eSNavdeep Parhar 63e682d02eSNavdeep Parhar #ifdef TCP_OFFLOAD 64e682d02eSNavdeep Parhar #include "common/common.h" 65e682d02eSNavdeep Parhar #include "common/t4_msg.h" 66e682d02eSNavdeep Parhar #include "common/t4_regs.h" 67e682d02eSNavdeep Parhar #include "common/t4_tcb.h" 68e682d02eSNavdeep Parhar #include "tom/t4_tom.h" 69e682d02eSNavdeep Parhar 7069a08863SJohn Baldwin VNET_DECLARE(int, tcp_do_autorcvbuf); 7169a08863SJohn Baldwin #define V_tcp_do_autorcvbuf VNET(tcp_do_autorcvbuf) 7269a08863SJohn Baldwin VNET_DECLARE(int, tcp_autorcvbuf_inc); 7369a08863SJohn Baldwin #define V_tcp_autorcvbuf_inc VNET(tcp_autorcvbuf_inc) 7469a08863SJohn Baldwin VNET_DECLARE(int, tcp_autorcvbuf_max); 7569a08863SJohn Baldwin #define V_tcp_autorcvbuf_max VNET(tcp_autorcvbuf_max) 7669a08863SJohn Baldwin 77*fe0bdd1dSJohn Baldwin /* 78*fe0bdd1dSJohn Baldwin * Use the 'backend3' field in AIO jobs to store the amount of data 79*fe0bdd1dSJohn Baldwin * received by the AIO job so far. 80*fe0bdd1dSJohn Baldwin */ 81*fe0bdd1dSJohn Baldwin #define aio_received backend3 82*fe0bdd1dSJohn Baldwin 83dc964385SJohn Baldwin static void aio_ddp_requeue_task(void *context, int pending); 84dc964385SJohn Baldwin static void ddp_complete_all(struct toepcb *toep, int error); 85dc964385SJohn Baldwin static void t4_aio_cancel_active(struct kaiocb *job); 86dc964385SJohn Baldwin static void t4_aio_cancel_queued(struct kaiocb *job); 87b12c0a9eSJohn Baldwin 88e682d02eSNavdeep Parhar #define PPOD_SZ(n) ((n) * sizeof(struct pagepod)) 89e682d02eSNavdeep Parhar #define PPOD_SIZE (PPOD_SZ(1)) 90e682d02eSNavdeep Parhar 91e682d02eSNavdeep Parhar /* XXX: must match A_ULP_RX_TDDP_PSZ */ 92e682d02eSNavdeep Parhar static int t4_ddp_pgsz[] = {4096, 4096 << 2, 4096 << 4, 4096 << 6}; 93e682d02eSNavdeep Parhar 94dc964385SJohn Baldwin static TAILQ_HEAD(, pageset) ddp_orphan_pagesets; 95dc964385SJohn Baldwin static struct mtx ddp_orphan_pagesets_lock; 96dc964385SJohn Baldwin static struct task ddp_orphan_task; 97dc964385SJohn Baldwin 98e682d02eSNavdeep Parhar #define MAX_DDP_BUFFER_SIZE (M_TCB_RX_DDP_BUF0_LEN) 99e682d02eSNavdeep Parhar static int 100db8bcd1bSNavdeep Parhar alloc_ppods(struct tom_data *td, int n, u_int *ppod_addr) 101e682d02eSNavdeep Parhar { 102db8bcd1bSNavdeep Parhar vmem_addr_t v; 103db8bcd1bSNavdeep Parhar int rc; 104e682d02eSNavdeep Parhar 105f8c47908SNavdeep Parhar MPASS(n > 0); 106e682d02eSNavdeep Parhar 107db8bcd1bSNavdeep Parhar rc = vmem_alloc(td->ppod_arena, PPOD_SZ(n), M_NOWAIT | M_FIRSTFIT, &v); 108db8bcd1bSNavdeep Parhar *ppod_addr = (u_int)v; 109db8bcd1bSNavdeep Parhar 110db8bcd1bSNavdeep Parhar return (rc); 111e682d02eSNavdeep Parhar } 112e682d02eSNavdeep Parhar 113e682d02eSNavdeep Parhar static void 114db8bcd1bSNavdeep Parhar free_ppods(struct tom_data *td, u_int ppod_addr, int n) 115e682d02eSNavdeep Parhar { 116e682d02eSNavdeep Parhar 117f8c47908SNavdeep Parhar MPASS(n > 0); 118e682d02eSNavdeep Parhar 119db8bcd1bSNavdeep Parhar vmem_free(td->ppod_arena, (vmem_addr_t)ppod_addr, PPOD_SZ(n)); 120e682d02eSNavdeep Parhar } 121e682d02eSNavdeep Parhar 122e682d02eSNavdeep Parhar static inline int 123e682d02eSNavdeep Parhar pages_to_nppods(int npages, int ddp_pgsz) 124e682d02eSNavdeep Parhar { 125e682d02eSNavdeep Parhar int nsegs = npages * PAGE_SIZE / ddp_pgsz; 126e682d02eSNavdeep Parhar 127e682d02eSNavdeep Parhar return (howmany(nsegs, PPOD_PAGES)); 128e682d02eSNavdeep Parhar } 129e682d02eSNavdeep Parhar 130dc964385SJohn Baldwin /* 131dc964385SJohn Baldwin * A page set holds information about a buffer used for DDP. The page 132dc964385SJohn Baldwin * set holds resources such as the VM pages backing the buffer (either 133dc964385SJohn Baldwin * held or wired) and the page pods associated with the buffer. 134dc964385SJohn Baldwin * Recently used page sets are cached to allow for efficient reuse of 135dc964385SJohn Baldwin * buffers (avoiding the need to re-fault in pages, hold them, etc.). 136dc964385SJohn Baldwin * Note that cached page sets keep the backing pages wired. The 137dc964385SJohn Baldwin * number of wired pages is capped by only allowing for two wired 138dc964385SJohn Baldwin * pagesets per connection. This is not a perfect cap, but is a 139dc964385SJohn Baldwin * trade-off for performance. 140dc964385SJohn Baldwin * 141dc964385SJohn Baldwin * If an application ping-pongs two buffers for a connection via 142dc964385SJohn Baldwin * aio_read(2) then those buffers should remain wired and expensive VM 143dc964385SJohn Baldwin * fault lookups should be avoided after each buffer has been used 144dc964385SJohn Baldwin * once. If an application uses more than two buffers then this will 145dc964385SJohn Baldwin * fall back to doing expensive VM fault lookups for each operation. 146dc964385SJohn Baldwin */ 147dc964385SJohn Baldwin static void 148dc964385SJohn Baldwin free_pageset(struct tom_data *td, struct pageset *ps) 149dc964385SJohn Baldwin { 150dc964385SJohn Baldwin vm_page_t p; 151dc964385SJohn Baldwin int i; 152dc964385SJohn Baldwin 153dc964385SJohn Baldwin if (ps->nppods > 0) 154dc964385SJohn Baldwin free_ppods(td, ps->ppod_addr, ps->nppods); 155dc964385SJohn Baldwin 156dc964385SJohn Baldwin if (ps->flags & PS_WIRED) { 157dc964385SJohn Baldwin for (i = 0; i < ps->npages; i++) { 158dc964385SJohn Baldwin p = ps->pages[i]; 159dc964385SJohn Baldwin vm_page_lock(p); 160dc964385SJohn Baldwin vm_page_unwire(p, PQ_INACTIVE); 161dc964385SJohn Baldwin vm_page_unlock(p); 162dc964385SJohn Baldwin } 163dc964385SJohn Baldwin } else 164dc964385SJohn Baldwin vm_page_unhold_pages(ps->pages, ps->npages); 165dc964385SJohn Baldwin mtx_lock(&ddp_orphan_pagesets_lock); 166dc964385SJohn Baldwin TAILQ_INSERT_TAIL(&ddp_orphan_pagesets, ps, link); 167dc964385SJohn Baldwin taskqueue_enqueue(taskqueue_thread, &ddp_orphan_task); 168dc964385SJohn Baldwin mtx_unlock(&ddp_orphan_pagesets_lock); 169dc964385SJohn Baldwin } 170dc964385SJohn Baldwin 171dc964385SJohn Baldwin static void 172dc964385SJohn Baldwin ddp_free_orphan_pagesets(void *context, int pending) 173dc964385SJohn Baldwin { 174dc964385SJohn Baldwin struct pageset *ps; 175dc964385SJohn Baldwin 176dc964385SJohn Baldwin mtx_lock(&ddp_orphan_pagesets_lock); 177dc964385SJohn Baldwin while (!TAILQ_EMPTY(&ddp_orphan_pagesets)) { 178dc964385SJohn Baldwin ps = TAILQ_FIRST(&ddp_orphan_pagesets); 179dc964385SJohn Baldwin TAILQ_REMOVE(&ddp_orphan_pagesets, ps, link); 180dc964385SJohn Baldwin mtx_unlock(&ddp_orphan_pagesets_lock); 181dc964385SJohn Baldwin if (ps->vm) 182dc964385SJohn Baldwin vmspace_free(ps->vm); 183dc964385SJohn Baldwin free(ps, M_CXGBE); 184dc964385SJohn Baldwin mtx_lock(&ddp_orphan_pagesets_lock); 185dc964385SJohn Baldwin } 186dc964385SJohn Baldwin mtx_unlock(&ddp_orphan_pagesets_lock); 187dc964385SJohn Baldwin } 188dc964385SJohn Baldwin 189dc964385SJohn Baldwin static void 190dc964385SJohn Baldwin recycle_pageset(struct toepcb *toep, struct pageset *ps) 191dc964385SJohn Baldwin { 192dc964385SJohn Baldwin 193dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 194dc964385SJohn Baldwin if (!(toep->ddp_flags & DDP_DEAD) && ps->flags & PS_WIRED) { 195dc964385SJohn Baldwin KASSERT(toep->ddp_cached_count + toep->ddp_active_count < 196dc964385SJohn Baldwin nitems(toep->db), ("too many wired pagesets")); 197dc964385SJohn Baldwin TAILQ_INSERT_HEAD(&toep->ddp_cached_pagesets, ps, link); 198dc964385SJohn Baldwin toep->ddp_cached_count++; 199dc964385SJohn Baldwin } else 200dc964385SJohn Baldwin free_pageset(toep->td, ps); 201dc964385SJohn Baldwin } 202dc964385SJohn Baldwin 203dc964385SJohn Baldwin static void 204dc964385SJohn Baldwin ddp_complete_one(struct kaiocb *job, int error) 205dc964385SJohn Baldwin { 206dc964385SJohn Baldwin long copied; 207dc964385SJohn Baldwin 208dc964385SJohn Baldwin /* 209dc964385SJohn Baldwin * If this job had copied data out of the socket buffer before 210dc964385SJohn Baldwin * it was cancelled, report it as a short read rather than an 211dc964385SJohn Baldwin * error. 212dc964385SJohn Baldwin */ 213*fe0bdd1dSJohn Baldwin copied = job->aio_received; 214dc964385SJohn Baldwin if (copied != 0 || error == 0) 215dc964385SJohn Baldwin aio_complete(job, copied, 0); 216dc964385SJohn Baldwin else 217dc964385SJohn Baldwin aio_complete(job, -1, error); 218dc964385SJohn Baldwin } 219dc964385SJohn Baldwin 220e682d02eSNavdeep Parhar static void 221e682d02eSNavdeep Parhar free_ddp_buffer(struct tom_data *td, struct ddp_buffer *db) 222e682d02eSNavdeep Parhar { 223e682d02eSNavdeep Parhar 224dc964385SJohn Baldwin if (db->job) { 225dc964385SJohn Baldwin /* 226dc964385SJohn Baldwin * XXX: If we are un-offloading the socket then we 227dc964385SJohn Baldwin * should requeue these on the socket somehow. If we 228dc964385SJohn Baldwin * got a FIN from the remote end, then this completes 229dc964385SJohn Baldwin * any remaining requests with an EOF read. 230dc964385SJohn Baldwin */ 231dc964385SJohn Baldwin if (!aio_clear_cancel_function(db->job)) 232dc964385SJohn Baldwin ddp_complete_one(db->job, 0); 233dc964385SJohn Baldwin } 234e682d02eSNavdeep Parhar 235dc964385SJohn Baldwin if (db->ps) 236dc964385SJohn Baldwin free_pageset(td, db->ps); 237dc964385SJohn Baldwin } 238e682d02eSNavdeep Parhar 239dc964385SJohn Baldwin void 240dc964385SJohn Baldwin ddp_init_toep(struct toepcb *toep) 241dc964385SJohn Baldwin { 242e682d02eSNavdeep Parhar 243dc964385SJohn Baldwin TAILQ_INIT(&toep->ddp_aiojobq); 244dc964385SJohn Baldwin TASK_INIT(&toep->ddp_requeue_task, 0, aio_ddp_requeue_task, toep); 245dc964385SJohn Baldwin toep->ddp_active_id = -1; 246dc964385SJohn Baldwin mtx_init(&toep->ddp_lock, "t4 ddp", NULL, MTX_DEF); 247dc964385SJohn Baldwin } 248dc964385SJohn Baldwin 249dc964385SJohn Baldwin void 250dc964385SJohn Baldwin ddp_uninit_toep(struct toepcb *toep) 251dc964385SJohn Baldwin { 252dc964385SJohn Baldwin 253dc964385SJohn Baldwin mtx_destroy(&toep->ddp_lock); 254e682d02eSNavdeep Parhar } 255e682d02eSNavdeep Parhar 256e682d02eSNavdeep Parhar void 257e682d02eSNavdeep Parhar release_ddp_resources(struct toepcb *toep) 258e682d02eSNavdeep Parhar { 259dc964385SJohn Baldwin struct pageset *ps; 260e682d02eSNavdeep Parhar int i; 261e682d02eSNavdeep Parhar 262dc964385SJohn Baldwin DDP_LOCK(toep); 263dc964385SJohn Baldwin toep->flags |= DDP_DEAD; 26457c60f98SNavdeep Parhar for (i = 0; i < nitems(toep->db); i++) { 265dc964385SJohn Baldwin free_ddp_buffer(toep->td, &toep->db[i]); 266e682d02eSNavdeep Parhar } 267dc964385SJohn Baldwin while ((ps = TAILQ_FIRST(&toep->ddp_cached_pagesets)) != NULL) { 268dc964385SJohn Baldwin TAILQ_REMOVE(&toep->ddp_cached_pagesets, ps, link); 269dc964385SJohn Baldwin free_pageset(toep->td, ps); 270e682d02eSNavdeep Parhar } 271dc964385SJohn Baldwin ddp_complete_all(toep, 0); 272dc964385SJohn Baldwin DDP_UNLOCK(toep); 273dc964385SJohn Baldwin } 274dc964385SJohn Baldwin 275dc964385SJohn Baldwin #ifdef INVARIANTS 276dc964385SJohn Baldwin void 277dc964385SJohn Baldwin ddp_assert_empty(struct toepcb *toep) 278dc964385SJohn Baldwin { 279dc964385SJohn Baldwin int i; 280dc964385SJohn Baldwin 281dc964385SJohn Baldwin MPASS(!(toep->ddp_flags & DDP_TASK_ACTIVE)); 282dc964385SJohn Baldwin for (i = 0; i < nitems(toep->db); i++) { 283dc964385SJohn Baldwin MPASS(toep->db[i].job == NULL); 284dc964385SJohn Baldwin MPASS(toep->db[i].ps == NULL); 285dc964385SJohn Baldwin } 286dc964385SJohn Baldwin MPASS(TAILQ_EMPTY(&toep->ddp_cached_pagesets)); 287dc964385SJohn Baldwin MPASS(TAILQ_EMPTY(&toep->ddp_aiojobq)); 288dc964385SJohn Baldwin } 289dc964385SJohn Baldwin #endif 290dc964385SJohn Baldwin 291dc964385SJohn Baldwin static void 292dc964385SJohn Baldwin complete_ddp_buffer(struct toepcb *toep, struct ddp_buffer *db, 293dc964385SJohn Baldwin unsigned int db_idx) 294dc964385SJohn Baldwin { 295dc964385SJohn Baldwin unsigned int db_flag; 296dc964385SJohn Baldwin 297dc964385SJohn Baldwin toep->ddp_active_count--; 298dc964385SJohn Baldwin if (toep->ddp_active_id == db_idx) { 299dc964385SJohn Baldwin if (toep->ddp_active_count == 0) { 300dc964385SJohn Baldwin KASSERT(toep->db[db_idx ^ 1].job == NULL, 301dc964385SJohn Baldwin ("%s: active_count mismatch", __func__)); 302dc964385SJohn Baldwin toep->ddp_active_id = -1; 303dc964385SJohn Baldwin } else 304dc964385SJohn Baldwin toep->ddp_active_id ^= 1; 3051081d276SJohn Baldwin #ifdef VERBOSE_TRACES 306dc964385SJohn Baldwin CTR2(KTR_CXGBE, "%s: ddp_active_id = %d", __func__, 307dc964385SJohn Baldwin toep->ddp_active_id); 3081081d276SJohn Baldwin #endif 309dc964385SJohn Baldwin } else { 310dc964385SJohn Baldwin KASSERT(toep->ddp_active_count != 0 && 311dc964385SJohn Baldwin toep->ddp_active_id != -1, 312dc964385SJohn Baldwin ("%s: active count mismatch", __func__)); 313dc964385SJohn Baldwin } 314dc964385SJohn Baldwin 315dc964385SJohn Baldwin db->cancel_pending = 0; 316dc964385SJohn Baldwin db->job = NULL; 317dc964385SJohn Baldwin recycle_pageset(toep, db->ps); 318dc964385SJohn Baldwin db->ps = NULL; 319dc964385SJohn Baldwin 320dc964385SJohn Baldwin db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE; 321dc964385SJohn Baldwin KASSERT(toep->ddp_flags & db_flag, 322dc964385SJohn Baldwin ("%s: DDP buffer not active. toep %p, ddp_flags 0x%x", 323dc964385SJohn Baldwin __func__, toep, toep->ddp_flags)); 324dc964385SJohn Baldwin toep->ddp_flags &= ~db_flag; 325e682d02eSNavdeep Parhar } 326e682d02eSNavdeep Parhar 327d588c1f9SNavdeep Parhar /* XXX: handle_ddp_data code duplication */ 328d588c1f9SNavdeep Parhar void 329d588c1f9SNavdeep Parhar insert_ddp_data(struct toepcb *toep, uint32_t n) 330d588c1f9SNavdeep Parhar { 331d588c1f9SNavdeep Parhar struct inpcb *inp = toep->inp; 332d588c1f9SNavdeep Parhar struct tcpcb *tp = intotcpcb(inp); 333dc964385SJohn Baldwin struct ddp_buffer *db; 334dc964385SJohn Baldwin struct kaiocb *job; 335dc964385SJohn Baldwin size_t placed; 336dc964385SJohn Baldwin long copied; 337dc964385SJohn Baldwin unsigned int db_flag, db_idx; 338d588c1f9SNavdeep Parhar 339d588c1f9SNavdeep Parhar INP_WLOCK_ASSERT(inp); 340dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 341d588c1f9SNavdeep Parhar 342d588c1f9SNavdeep Parhar tp->rcv_nxt += n; 343d588c1f9SNavdeep Parhar #ifndef USE_DDP_RX_FLOW_CONTROL 344d588c1f9SNavdeep Parhar KASSERT(tp->rcv_wnd >= n, ("%s: negative window size", __func__)); 345d588c1f9SNavdeep Parhar tp->rcv_wnd -= n; 346d588c1f9SNavdeep Parhar #endif 347dc964385SJohn Baldwin #ifndef USE_DDP_RX_FLOW_CONTROL 348dc964385SJohn Baldwin toep->rx_credits += n; 349d588c1f9SNavdeep Parhar #endif 350dc964385SJohn Baldwin CTR2(KTR_CXGBE, "%s: placed %u bytes before falling out of DDP", 351dc964385SJohn Baldwin __func__, n); 352dc964385SJohn Baldwin while (toep->ddp_active_count > 0) { 353dc964385SJohn Baldwin MPASS(toep->ddp_active_id != -1); 354dc964385SJohn Baldwin db_idx = toep->ddp_active_id; 355dc964385SJohn Baldwin db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE; 356dc964385SJohn Baldwin MPASS((toep->ddp_flags & db_flag) != 0); 357dc964385SJohn Baldwin db = &toep->db[db_idx]; 358dc964385SJohn Baldwin job = db->job; 359*fe0bdd1dSJohn Baldwin copied = job->aio_received; 360dc964385SJohn Baldwin placed = n; 361dc964385SJohn Baldwin if (placed > job->uaiocb.aio_nbytes - copied) 362dc964385SJohn Baldwin placed = job->uaiocb.aio_nbytes - copied; 363dc964385SJohn Baldwin if (!aio_clear_cancel_function(job)) { 364dc964385SJohn Baldwin /* 365dc964385SJohn Baldwin * Update the copied length for when 366dc964385SJohn Baldwin * t4_aio_cancel_active() completes this 367dc964385SJohn Baldwin * request. 368dc964385SJohn Baldwin */ 369*fe0bdd1dSJohn Baldwin job->aio_received += placed; 370dc964385SJohn Baldwin } else if (copied + placed != 0) { 371dc964385SJohn Baldwin CTR4(KTR_CXGBE, 372dc964385SJohn Baldwin "%s: completing %p (copied %ld, placed %lu)", 373dc964385SJohn Baldwin __func__, job, copied, placed); 374dc964385SJohn Baldwin /* XXX: This always completes if there is some data. */ 375dc964385SJohn Baldwin aio_complete(job, copied + placed, 0); 376dc964385SJohn Baldwin } else if (aio_set_cancel_function(job, t4_aio_cancel_queued)) { 377dc964385SJohn Baldwin TAILQ_INSERT_HEAD(&toep->ddp_aiojobq, job, list); 378dc964385SJohn Baldwin toep->ddp_waiting_count++; 379dc964385SJohn Baldwin } else 380dc964385SJohn Baldwin aio_cancel(job); 381dc964385SJohn Baldwin n -= placed; 382dc964385SJohn Baldwin complete_ddp_buffer(toep, db, db_idx); 383dc964385SJohn Baldwin } 384dc964385SJohn Baldwin 385dc964385SJohn Baldwin MPASS(n == 0); 386d588c1f9SNavdeep Parhar } 387d588c1f9SNavdeep Parhar 388e682d02eSNavdeep Parhar /* SET_TCB_FIELD sent as a ULP command looks like this */ 389e682d02eSNavdeep Parhar #define LEN__SET_TCB_FIELD_ULP (sizeof(struct ulp_txpkt) + \ 390e682d02eSNavdeep Parhar sizeof(struct ulptx_idata) + sizeof(struct cpl_set_tcb_field_core)) 391e682d02eSNavdeep Parhar 392e682d02eSNavdeep Parhar /* RX_DATA_ACK sent as a ULP command looks like this */ 393e682d02eSNavdeep Parhar #define LEN__RX_DATA_ACK_ULP (sizeof(struct ulp_txpkt) + \ 394e682d02eSNavdeep Parhar sizeof(struct ulptx_idata) + sizeof(struct cpl_rx_data_ack_core)) 395e682d02eSNavdeep Parhar 396e682d02eSNavdeep Parhar static inline void * 397e682d02eSNavdeep Parhar mk_set_tcb_field_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep, 398e682d02eSNavdeep Parhar uint64_t word, uint64_t mask, uint64_t val) 399e682d02eSNavdeep Parhar { 400e682d02eSNavdeep Parhar struct ulptx_idata *ulpsc; 401e682d02eSNavdeep Parhar struct cpl_set_tcb_field_core *req; 402e682d02eSNavdeep Parhar 403e682d02eSNavdeep Parhar ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); 404e682d02eSNavdeep Parhar ulpmc->len = htobe32(howmany(LEN__SET_TCB_FIELD_ULP, 16)); 405e682d02eSNavdeep Parhar 406e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(ulpmc + 1); 407e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); 408e682d02eSNavdeep Parhar ulpsc->len = htobe32(sizeof(*req)); 409e682d02eSNavdeep Parhar 410e682d02eSNavdeep Parhar req = (struct cpl_set_tcb_field_core *)(ulpsc + 1); 411e682d02eSNavdeep Parhar OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tid)); 412e682d02eSNavdeep Parhar req->reply_ctrl = htobe16(V_NO_REPLY(1) | 413e682d02eSNavdeep Parhar V_QUEUENO(toep->ofld_rxq->iq.abs_id)); 414e682d02eSNavdeep Parhar req->word_cookie = htobe16(V_WORD(word) | V_COOKIE(0)); 415e682d02eSNavdeep Parhar req->mask = htobe64(mask); 416e682d02eSNavdeep Parhar req->val = htobe64(val); 417e682d02eSNavdeep Parhar 418e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(req + 1); 419e682d02eSNavdeep Parhar if (LEN__SET_TCB_FIELD_ULP % 16) { 420e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); 421e682d02eSNavdeep Parhar ulpsc->len = htobe32(0); 422e682d02eSNavdeep Parhar return (ulpsc + 1); 423e682d02eSNavdeep Parhar } 424e682d02eSNavdeep Parhar return (ulpsc); 425e682d02eSNavdeep Parhar } 426e682d02eSNavdeep Parhar 427e682d02eSNavdeep Parhar static inline void * 428e682d02eSNavdeep Parhar mk_rx_data_ack_ulp(struct ulp_txpkt *ulpmc, struct toepcb *toep) 429e682d02eSNavdeep Parhar { 430e682d02eSNavdeep Parhar struct ulptx_idata *ulpsc; 431e682d02eSNavdeep Parhar struct cpl_rx_data_ack_core *req; 432e682d02eSNavdeep Parhar 433e682d02eSNavdeep Parhar ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0)); 434e682d02eSNavdeep Parhar ulpmc->len = htobe32(howmany(LEN__RX_DATA_ACK_ULP, 16)); 435e682d02eSNavdeep Parhar 436e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(ulpmc + 1); 437e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); 438e682d02eSNavdeep Parhar ulpsc->len = htobe32(sizeof(*req)); 439e682d02eSNavdeep Parhar 440e682d02eSNavdeep Parhar req = (struct cpl_rx_data_ack_core *)(ulpsc + 1); 441e682d02eSNavdeep Parhar OPCODE_TID(req) = htobe32(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tid)); 442e682d02eSNavdeep Parhar req->credit_dack = htobe32(F_RX_MODULATE_RX); 443e682d02eSNavdeep Parhar 444e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(req + 1); 445e682d02eSNavdeep Parhar if (LEN__RX_DATA_ACK_ULP % 16) { 446e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_NOOP)); 447e682d02eSNavdeep Parhar ulpsc->len = htobe32(0); 448e682d02eSNavdeep Parhar return (ulpsc + 1); 449e682d02eSNavdeep Parhar } 450e682d02eSNavdeep Parhar return (ulpsc); 451e682d02eSNavdeep Parhar } 452e682d02eSNavdeep Parhar 453e682d02eSNavdeep Parhar static struct wrqe * 454e682d02eSNavdeep Parhar mk_update_tcb_for_ddp(struct adapter *sc, struct toepcb *toep, int db_idx, 455dc964385SJohn Baldwin struct pageset *ps, int offset, uint64_t ddp_flags, uint64_t ddp_flags_mask) 456e682d02eSNavdeep Parhar { 457e682d02eSNavdeep Parhar struct wrqe *wr; 458e682d02eSNavdeep Parhar struct work_request_hdr *wrh; 459e682d02eSNavdeep Parhar struct ulp_txpkt *ulpmc; 460e682d02eSNavdeep Parhar int len; 461e682d02eSNavdeep Parhar 462e682d02eSNavdeep Parhar KASSERT(db_idx == 0 || db_idx == 1, 463e682d02eSNavdeep Parhar ("%s: bad DDP buffer index %d", __func__, db_idx)); 464e682d02eSNavdeep Parhar 465e682d02eSNavdeep Parhar /* 466e682d02eSNavdeep Parhar * We'll send a compound work request that has 3 SET_TCB_FIELDs and an 467e682d02eSNavdeep Parhar * RX_DATA_ACK (with RX_MODULATE to speed up delivery). 468e682d02eSNavdeep Parhar * 469e682d02eSNavdeep Parhar * The work request header is 16B and always ends at a 16B boundary. 470e682d02eSNavdeep Parhar * The ULPTX master commands that follow must all end at 16B boundaries 471e682d02eSNavdeep Parhar * too so we round up the size to 16. 472e682d02eSNavdeep Parhar */ 473d14b0ac1SNavdeep Parhar len = sizeof(*wrh) + 3 * roundup2(LEN__SET_TCB_FIELD_ULP, 16) + 474d14b0ac1SNavdeep Parhar roundup2(LEN__RX_DATA_ACK_ULP, 16); 475e682d02eSNavdeep Parhar 476e682d02eSNavdeep Parhar wr = alloc_wrqe(len, toep->ctrlq); 477e682d02eSNavdeep Parhar if (wr == NULL) 478e682d02eSNavdeep Parhar return (NULL); 479e682d02eSNavdeep Parhar wrh = wrtod(wr); 480e682d02eSNavdeep Parhar INIT_ULPTX_WRH(wrh, len, 1, 0); /* atomic */ 481e682d02eSNavdeep Parhar ulpmc = (struct ulp_txpkt *)(wrh + 1); 482e682d02eSNavdeep Parhar 483e682d02eSNavdeep Parhar /* Write the buffer's tag */ 484e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 485e682d02eSNavdeep Parhar W_TCB_RX_DDP_BUF0_TAG + db_idx, 486e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 487dc964385SJohn Baldwin V_TCB_RX_DDP_BUF0_TAG(ps->tag)); 488e682d02eSNavdeep Parhar 489e682d02eSNavdeep Parhar /* Update the current offset in the DDP buffer and its total length */ 490e682d02eSNavdeep Parhar if (db_idx == 0) 491e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 492e682d02eSNavdeep Parhar W_TCB_RX_DDP_BUF0_OFFSET, 493e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 494e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 495e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF0_OFFSET(offset) | 496dc964385SJohn Baldwin V_TCB_RX_DDP_BUF0_LEN(ps->len)); 497e682d02eSNavdeep Parhar else 498e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, 499e682d02eSNavdeep Parhar W_TCB_RX_DDP_BUF1_OFFSET, 500e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 501e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF1_LEN((u64)M_TCB_RX_DDP_BUF1_LEN << 32), 502e682d02eSNavdeep Parhar V_TCB_RX_DDP_BUF1_OFFSET(offset) | 503dc964385SJohn Baldwin V_TCB_RX_DDP_BUF1_LEN((u64)ps->len << 32)); 504e682d02eSNavdeep Parhar 505e682d02eSNavdeep Parhar /* Update DDP flags */ 506e682d02eSNavdeep Parhar ulpmc = mk_set_tcb_field_ulp(ulpmc, toep, W_TCB_RX_DDP_FLAGS, 507dc964385SJohn Baldwin ddp_flags_mask, ddp_flags); 508e682d02eSNavdeep Parhar 509e682d02eSNavdeep Parhar /* Gratuitous RX_DATA_ACK with RX_MODULATE set to speed up delivery. */ 510e682d02eSNavdeep Parhar ulpmc = mk_rx_data_ack_ulp(ulpmc, toep); 511e682d02eSNavdeep Parhar 512e682d02eSNavdeep Parhar return (wr); 513e682d02eSNavdeep Parhar } 514e682d02eSNavdeep Parhar 515e682d02eSNavdeep Parhar static int 516e682d02eSNavdeep Parhar handle_ddp_data(struct toepcb *toep, __be32 ddp_report, __be32 rcv_nxt, int len) 517e682d02eSNavdeep Parhar { 518e682d02eSNavdeep Parhar uint32_t report = be32toh(ddp_report); 519dc964385SJohn Baldwin unsigned int db_idx; 520e682d02eSNavdeep Parhar struct inpcb *inp = toep->inp; 521dc964385SJohn Baldwin struct ddp_buffer *db; 522e682d02eSNavdeep Parhar struct tcpcb *tp; 523e682d02eSNavdeep Parhar struct socket *so; 524e682d02eSNavdeep Parhar struct sockbuf *sb; 525dc964385SJohn Baldwin struct kaiocb *job; 526dc964385SJohn Baldwin long copied; 527e682d02eSNavdeep Parhar 528dc964385SJohn Baldwin db_idx = report & F_DDP_BUF_IDX ? 1 : 0; 529e682d02eSNavdeep Parhar 530e682d02eSNavdeep Parhar if (__predict_false(!(report & F_DDP_INV))) 531e682d02eSNavdeep Parhar CXGBE_UNIMPLEMENTED("DDP buffer still valid"); 532e682d02eSNavdeep Parhar 533e682d02eSNavdeep Parhar INP_WLOCK(inp); 534e682d02eSNavdeep Parhar so = inp_inpcbtosocket(inp); 535e682d02eSNavdeep Parhar sb = &so->so_rcv; 536dc964385SJohn Baldwin DDP_LOCK(toep); 537dc964385SJohn Baldwin 538dc964385SJohn Baldwin KASSERT(toep->ddp_active_id == db_idx, 539dc964385SJohn Baldwin ("completed DDP buffer (%d) != active_id (%d) for tid %d", db_idx, 540dc964385SJohn Baldwin toep->ddp_active_id, toep->tid)); 541dc964385SJohn Baldwin db = &toep->db[db_idx]; 542dc964385SJohn Baldwin job = db->job; 543dc964385SJohn Baldwin 544e682d02eSNavdeep Parhar if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT))) { 545e682d02eSNavdeep Parhar /* 546dc964385SJohn Baldwin * This can happen due to an administrative tcpdrop(8). 547dc964385SJohn Baldwin * Just fail the request with ECONNRESET. 548e682d02eSNavdeep Parhar */ 549e682d02eSNavdeep Parhar CTR5(KTR_CXGBE, "%s: tid %u, seq 0x%x, len %d, inp_flags 0x%x", 550e682d02eSNavdeep Parhar __func__, toep->tid, be32toh(rcv_nxt), len, inp->inp_flags); 551dc964385SJohn Baldwin if (aio_clear_cancel_function(job)) 552dc964385SJohn Baldwin ddp_complete_one(job, ECONNRESET); 553dc964385SJohn Baldwin goto completed; 554e682d02eSNavdeep Parhar } 555e682d02eSNavdeep Parhar 556e682d02eSNavdeep Parhar tp = intotcpcb(inp); 5578fb15ddbSJohn Baldwin 5588fb15ddbSJohn Baldwin /* 5598fb15ddbSJohn Baldwin * For RX_DDP_COMPLETE, len will be zero and rcv_nxt is the 5608fb15ddbSJohn Baldwin * sequence number of the next byte to receive. The length of 5618fb15ddbSJohn Baldwin * the data received for this message must be computed by 5628fb15ddbSJohn Baldwin * comparing the new and old values of rcv_nxt. 5638fb15ddbSJohn Baldwin * 5648fb15ddbSJohn Baldwin * For RX_DATA_DDP, len might be non-zero, but it is only the 5658fb15ddbSJohn Baldwin * length of the most recent DMA. It does not include the 5668fb15ddbSJohn Baldwin * total length of the data received since the previous update 5678fb15ddbSJohn Baldwin * for this DDP buffer. rcv_nxt is the sequence number of the 5688fb15ddbSJohn Baldwin * first received byte from the most recent DMA. 5698fb15ddbSJohn Baldwin */ 570e682d02eSNavdeep Parhar len += be32toh(rcv_nxt) - tp->rcv_nxt; 571e682d02eSNavdeep Parhar tp->rcv_nxt += len; 572e682d02eSNavdeep Parhar tp->t_rcvtime = ticks; 573e682d02eSNavdeep Parhar #ifndef USE_DDP_RX_FLOW_CONTROL 574e682d02eSNavdeep Parhar KASSERT(tp->rcv_wnd >= len, ("%s: negative window size", __func__)); 575e682d02eSNavdeep Parhar tp->rcv_wnd -= len; 576e682d02eSNavdeep Parhar #endif 577dc964385SJohn Baldwin #ifdef VERBOSE_TRACES 578dc964385SJohn Baldwin CTR4(KTR_CXGBE, "%s: DDP[%d] placed %d bytes (%#x)", __func__, db_idx, 579dc964385SJohn Baldwin len, report); 580dc964385SJohn Baldwin #endif 581e682d02eSNavdeep Parhar 58269a08863SJohn Baldwin /* receive buffer autosize */ 583dc964385SJohn Baldwin CURVNET_SET(so->so_vnet); 584dc964385SJohn Baldwin SOCKBUF_LOCK(sb); 58569a08863SJohn Baldwin if (sb->sb_flags & SB_AUTOSIZE && 58669a08863SJohn Baldwin V_tcp_do_autorcvbuf && 58769a08863SJohn Baldwin sb->sb_hiwat < V_tcp_autorcvbuf_max && 58869a08863SJohn Baldwin len > (sbspace(sb) / 8 * 7)) { 58969a08863SJohn Baldwin unsigned int hiwat = sb->sb_hiwat; 59069a08863SJohn Baldwin unsigned int newsize = min(hiwat + V_tcp_autorcvbuf_inc, 59169a08863SJohn Baldwin V_tcp_autorcvbuf_max); 59269a08863SJohn Baldwin 59369a08863SJohn Baldwin if (!sbreserve_locked(sb, newsize, so, NULL)) 59469a08863SJohn Baldwin sb->sb_flags &= ~SB_AUTOSIZE; 59569a08863SJohn Baldwin else 59669a08863SJohn Baldwin toep->rx_credits += newsize - hiwat; 59769a08863SJohn Baldwin } 598dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 599dc964385SJohn Baldwin CURVNET_RESTORE(); 60069a08863SJohn Baldwin 601dc964385SJohn Baldwin #ifndef USE_DDP_RX_FLOW_CONTROL 602dc964385SJohn Baldwin toep->rx_credits += len; 603e682d02eSNavdeep Parhar #endif 604e682d02eSNavdeep Parhar 605dc964385SJohn Baldwin if (db->cancel_pending) { 606dc964385SJohn Baldwin /* 607dc964385SJohn Baldwin * Update the job's length but defer completion to the 608dc964385SJohn Baldwin * TCB_RPL callback. 609dc964385SJohn Baldwin */ 610*fe0bdd1dSJohn Baldwin job->aio_received += len; 611dc964385SJohn Baldwin goto out; 612dc964385SJohn Baldwin } else if (!aio_clear_cancel_function(job)) { 613dc964385SJohn Baldwin /* 614dc964385SJohn Baldwin * Update the copied length for when 615dc964385SJohn Baldwin * t4_aio_cancel_active() completes this request. 616dc964385SJohn Baldwin */ 617*fe0bdd1dSJohn Baldwin job->aio_received += len; 618dc964385SJohn Baldwin } else { 619*fe0bdd1dSJohn Baldwin copied = job->aio_received; 620dc964385SJohn Baldwin #ifdef VERBOSE_TRACES 621dc964385SJohn Baldwin CTR4(KTR_CXGBE, "%s: completing %p (copied %ld, placed %d)", 622dc964385SJohn Baldwin __func__, job, copied, len); 623dc964385SJohn Baldwin #endif 624dc964385SJohn Baldwin aio_complete(job, copied + len, 0); 625dc964385SJohn Baldwin t4_rcvd(&toep->td->tod, tp); 626dc964385SJohn Baldwin } 627dc964385SJohn Baldwin 628dc964385SJohn Baldwin completed: 629dc964385SJohn Baldwin complete_ddp_buffer(toep, db, db_idx); 630dc964385SJohn Baldwin if (toep->ddp_waiting_count > 0) 631dc964385SJohn Baldwin ddp_queue_toep(toep); 632dc964385SJohn Baldwin out: 633dc964385SJohn Baldwin DDP_UNLOCK(toep); 634e682d02eSNavdeep Parhar INP_WUNLOCK(inp); 635dc964385SJohn Baldwin 636e682d02eSNavdeep Parhar return (0); 637e682d02eSNavdeep Parhar } 638e682d02eSNavdeep Parhar 639b12c0a9eSJohn Baldwin void 640dc964385SJohn Baldwin handle_ddp_indicate(struct toepcb *toep) 641b12c0a9eSJohn Baldwin { 642b12c0a9eSJohn Baldwin 643dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 644dc964385SJohn Baldwin MPASS(toep->ddp_active_count == 0); 645dc964385SJohn Baldwin MPASS((toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)) == 0); 646dc964385SJohn Baldwin if (toep->ddp_waiting_count == 0) { 647dc964385SJohn Baldwin /* 648dc964385SJohn Baldwin * The pending requests that triggered the request for an 649dc964385SJohn Baldwin * an indicate were cancelled. Those cancels should have 650dc964385SJohn Baldwin * already disabled DDP. Just ignore this as the data is 651dc964385SJohn Baldwin * going into the socket buffer anyway. 652dc964385SJohn Baldwin */ 653dc964385SJohn Baldwin return; 654dc964385SJohn Baldwin } 655dc964385SJohn Baldwin CTR3(KTR_CXGBE, "%s: tid %d indicated (%d waiting)", __func__, 656dc964385SJohn Baldwin toep->tid, toep->ddp_waiting_count); 657dc964385SJohn Baldwin ddp_queue_toep(toep); 658dc964385SJohn Baldwin } 659dc964385SJohn Baldwin 660dc964385SJohn Baldwin enum { 661dc964385SJohn Baldwin DDP_BUF0_INVALIDATED = 0x2, 662dc964385SJohn Baldwin DDP_BUF1_INVALIDATED 663dc964385SJohn Baldwin }; 664dc964385SJohn Baldwin 665dc964385SJohn Baldwin void 666dc964385SJohn Baldwin handle_ddp_tcb_rpl(struct toepcb *toep, const struct cpl_set_tcb_rpl *cpl) 667dc964385SJohn Baldwin { 668dc964385SJohn Baldwin unsigned int db_idx; 669dc964385SJohn Baldwin struct inpcb *inp = toep->inp; 670dc964385SJohn Baldwin struct ddp_buffer *db; 671dc964385SJohn Baldwin struct kaiocb *job; 672dc964385SJohn Baldwin long copied; 673dc964385SJohn Baldwin 674dc964385SJohn Baldwin if (cpl->status != CPL_ERR_NONE) 675dc964385SJohn Baldwin panic("XXX: tcp_rpl failed: %d", cpl->status); 676dc964385SJohn Baldwin 677dc964385SJohn Baldwin switch (cpl->cookie) { 678dc964385SJohn Baldwin case V_WORD(W_TCB_RX_DDP_FLAGS) | V_COOKIE(DDP_BUF0_INVALIDATED): 679dc964385SJohn Baldwin case V_WORD(W_TCB_RX_DDP_FLAGS) | V_COOKIE(DDP_BUF1_INVALIDATED): 680dc964385SJohn Baldwin /* 681dc964385SJohn Baldwin * XXX: This duplicates a lot of code with handle_ddp_data(). 682dc964385SJohn Baldwin */ 683dc964385SJohn Baldwin db_idx = G_COOKIE(cpl->cookie) - DDP_BUF0_INVALIDATED; 684dc964385SJohn Baldwin INP_WLOCK(inp); 685dc964385SJohn Baldwin DDP_LOCK(toep); 686dc964385SJohn Baldwin db = &toep->db[db_idx]; 687dc964385SJohn Baldwin 688dc964385SJohn Baldwin /* 689dc964385SJohn Baldwin * handle_ddp_data() should leave the job around until 690dc964385SJohn Baldwin * this callback runs once a cancel is pending. 691dc964385SJohn Baldwin */ 692dc964385SJohn Baldwin MPASS(db != NULL); 693dc964385SJohn Baldwin MPASS(db->job != NULL); 694dc964385SJohn Baldwin MPASS(db->cancel_pending); 695dc964385SJohn Baldwin 696dc964385SJohn Baldwin /* 697dc964385SJohn Baldwin * XXX: It's not clear what happens if there is data 698dc964385SJohn Baldwin * placed when the buffer is invalidated. I suspect we 699dc964385SJohn Baldwin * need to read the TCB to see how much data was placed. 700dc964385SJohn Baldwin * 701dc964385SJohn Baldwin * For now this just pretends like nothing was placed. 702dc964385SJohn Baldwin * 703dc964385SJohn Baldwin * XXX: Note that if we did check the PCB we would need to 704dc964385SJohn Baldwin * also take care of updating the tp, etc. 705dc964385SJohn Baldwin */ 706dc964385SJohn Baldwin job = db->job; 707*fe0bdd1dSJohn Baldwin copied = job->aio_received; 708dc964385SJohn Baldwin if (copied == 0) { 709dc964385SJohn Baldwin CTR2(KTR_CXGBE, "%s: cancelling %p", __func__, job); 710dc964385SJohn Baldwin aio_cancel(job); 711dc964385SJohn Baldwin } else { 712dc964385SJohn Baldwin CTR3(KTR_CXGBE, "%s: completing %p (copied %ld)", 713dc964385SJohn Baldwin __func__, job, copied); 714dc964385SJohn Baldwin aio_complete(job, copied, 0); 715dc964385SJohn Baldwin t4_rcvd(&toep->td->tod, intotcpcb(inp)); 716dc964385SJohn Baldwin } 717dc964385SJohn Baldwin 718dc964385SJohn Baldwin complete_ddp_buffer(toep, db, db_idx); 719dc964385SJohn Baldwin if (toep->ddp_waiting_count > 0) 720dc964385SJohn Baldwin ddp_queue_toep(toep); 721dc964385SJohn Baldwin DDP_UNLOCK(toep); 722dc964385SJohn Baldwin INP_WUNLOCK(inp); 723dc964385SJohn Baldwin break; 724dc964385SJohn Baldwin default: 725dc964385SJohn Baldwin panic("XXX: unknown tcb_rpl offset %#x, cookie %#x", 726dc964385SJohn Baldwin G_WORD(cpl->cookie), G_COOKIE(cpl->cookie)); 727dc964385SJohn Baldwin } 728dc964385SJohn Baldwin } 729dc964385SJohn Baldwin 730dc964385SJohn Baldwin void 731dc964385SJohn Baldwin handle_ddp_close(struct toepcb *toep, struct tcpcb *tp, __be32 rcv_nxt) 732dc964385SJohn Baldwin { 733dc964385SJohn Baldwin struct ddp_buffer *db; 734dc964385SJohn Baldwin struct kaiocb *job; 735dc964385SJohn Baldwin long copied; 736dc964385SJohn Baldwin unsigned int db_flag, db_idx; 737dc964385SJohn Baldwin int len, placed; 738dc964385SJohn Baldwin 739b12c0a9eSJohn Baldwin INP_WLOCK_ASSERT(toep->inp); 740dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 741b12c0a9eSJohn Baldwin len = be32toh(rcv_nxt) - tp->rcv_nxt; 742b12c0a9eSJohn Baldwin 743b12c0a9eSJohn Baldwin tp->rcv_nxt += len; 744dc964385SJohn Baldwin #ifndef USE_DDP_RX_FLOW_CONTROL 745dc964385SJohn Baldwin toep->rx_credits += len; 746b12c0a9eSJohn Baldwin #endif 747b12c0a9eSJohn Baldwin 748dc964385SJohn Baldwin while (toep->ddp_active_count > 0) { 749dc964385SJohn Baldwin MPASS(toep->ddp_active_id != -1); 750dc964385SJohn Baldwin db_idx = toep->ddp_active_id; 751dc964385SJohn Baldwin db_flag = db_idx == 1 ? DDP_BUF1_ACTIVE : DDP_BUF0_ACTIVE; 752dc964385SJohn Baldwin MPASS((toep->ddp_flags & db_flag) != 0); 753dc964385SJohn Baldwin db = &toep->db[db_idx]; 754dc964385SJohn Baldwin job = db->job; 755*fe0bdd1dSJohn Baldwin copied = job->aio_received; 756dc964385SJohn Baldwin placed = len; 757dc964385SJohn Baldwin if (placed > job->uaiocb.aio_nbytes - copied) 758dc964385SJohn Baldwin placed = job->uaiocb.aio_nbytes - copied; 759dc964385SJohn Baldwin if (!aio_clear_cancel_function(job)) { 760dc964385SJohn Baldwin /* 761dc964385SJohn Baldwin * Update the copied length for when 762dc964385SJohn Baldwin * t4_aio_cancel_active() completes this 763dc964385SJohn Baldwin * request. 764dc964385SJohn Baldwin */ 765*fe0bdd1dSJohn Baldwin job->aio_received += placed; 766dc964385SJohn Baldwin } else { 767dc964385SJohn Baldwin CTR4(KTR_CXGBE, "%s: tid %d completed buf %d len %d", 768dc964385SJohn Baldwin __func__, toep->tid, db_idx, placed); 769dc964385SJohn Baldwin aio_complete(job, copied + placed, 0); 770dc964385SJohn Baldwin } 771dc964385SJohn Baldwin len -= placed; 772dc964385SJohn Baldwin complete_ddp_buffer(toep, db, db_idx); 773dc964385SJohn Baldwin } 774b12c0a9eSJohn Baldwin 775dc964385SJohn Baldwin MPASS(len == 0); 776dc964385SJohn Baldwin ddp_complete_all(toep, 0); 777b12c0a9eSJohn Baldwin } 778b12c0a9eSJohn Baldwin 779e682d02eSNavdeep Parhar #define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 780e682d02eSNavdeep Parhar F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 781e682d02eSNavdeep Parhar F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 782e682d02eSNavdeep Parhar F_DDP_INVALID_PPOD | F_DDP_HDRCRC_ERR | F_DDP_DATACRC_ERR) 783e682d02eSNavdeep Parhar 784e682d02eSNavdeep Parhar static int 785e682d02eSNavdeep Parhar do_rx_data_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 786e682d02eSNavdeep Parhar { 787e682d02eSNavdeep Parhar struct adapter *sc = iq->adapter; 788e682d02eSNavdeep Parhar const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); 789e682d02eSNavdeep Parhar unsigned int tid = GET_TID(cpl); 790e682d02eSNavdeep Parhar uint32_t vld; 791e682d02eSNavdeep Parhar struct toepcb *toep = lookup_tid(sc, tid); 792e682d02eSNavdeep Parhar 793e682d02eSNavdeep Parhar KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 794e682d02eSNavdeep Parhar KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); 795c91bcaaaSNavdeep Parhar KASSERT(!(toep->flags & TPF_SYNQE), 796e682d02eSNavdeep Parhar ("%s: toep %p claims to be a synq entry", __func__, toep)); 797e682d02eSNavdeep Parhar 798e682d02eSNavdeep Parhar vld = be32toh(cpl->ddpvld); 799e682d02eSNavdeep Parhar if (__predict_false(vld & DDP_ERR)) { 800e682d02eSNavdeep Parhar panic("%s: DDP error 0x%x (tid %d, toep %p)", 801e682d02eSNavdeep Parhar __func__, vld, tid, toep); 802e682d02eSNavdeep Parhar } 8039eb533d3SNavdeep Parhar 8040fe98277SNavdeep Parhar if (toep->ulp_mode == ULP_MODE_ISCSI) { 8059eb533d3SNavdeep Parhar sc->cpl_handler[CPL_RX_ISCSI_DDP](iq, rss, m); 8060fe98277SNavdeep Parhar return (0); 8070fe98277SNavdeep Parhar } 808e682d02eSNavdeep Parhar 809e682d02eSNavdeep Parhar handle_ddp_data(toep, cpl->u.ddp_report, cpl->seq, be16toh(cpl->len)); 810e682d02eSNavdeep Parhar 811e682d02eSNavdeep Parhar return (0); 812e682d02eSNavdeep Parhar } 813e682d02eSNavdeep Parhar 814e682d02eSNavdeep Parhar static int 815e682d02eSNavdeep Parhar do_rx_ddp_complete(struct sge_iq *iq, const struct rss_header *rss, 816e682d02eSNavdeep Parhar struct mbuf *m) 817e682d02eSNavdeep Parhar { 818e682d02eSNavdeep Parhar struct adapter *sc = iq->adapter; 819e682d02eSNavdeep Parhar const struct cpl_rx_ddp_complete *cpl = (const void *)(rss + 1); 820e682d02eSNavdeep Parhar unsigned int tid = GET_TID(cpl); 821e682d02eSNavdeep Parhar struct toepcb *toep = lookup_tid(sc, tid); 822e682d02eSNavdeep Parhar 823e682d02eSNavdeep Parhar KASSERT(m == NULL, ("%s: wasn't expecting payload", __func__)); 824e682d02eSNavdeep Parhar KASSERT(toep->tid == tid, ("%s: toep tid/atid mismatch", __func__)); 825c91bcaaaSNavdeep Parhar KASSERT(!(toep->flags & TPF_SYNQE), 826e682d02eSNavdeep Parhar ("%s: toep %p claims to be a synq entry", __func__, toep)); 827e682d02eSNavdeep Parhar 828e682d02eSNavdeep Parhar handle_ddp_data(toep, cpl->ddp_report, cpl->rcv_nxt, 0); 829e682d02eSNavdeep Parhar 830e682d02eSNavdeep Parhar return (0); 831e682d02eSNavdeep Parhar } 832e682d02eSNavdeep Parhar 833dc964385SJohn Baldwin static void 834e682d02eSNavdeep Parhar enable_ddp(struct adapter *sc, struct toepcb *toep) 835e682d02eSNavdeep Parhar { 836e682d02eSNavdeep Parhar 837e682d02eSNavdeep Parhar KASSERT((toep->ddp_flags & (DDP_ON | DDP_OK | DDP_SC_REQ)) == DDP_OK, 838e682d02eSNavdeep Parhar ("%s: toep %p has bad ddp_flags 0x%x", 839e682d02eSNavdeep Parhar __func__, toep, toep->ddp_flags)); 840e682d02eSNavdeep Parhar 841e682d02eSNavdeep Parhar CTR3(KTR_CXGBE, "%s: tid %u (time %u)", 842e682d02eSNavdeep Parhar __func__, toep->tid, time_uptime); 843e682d02eSNavdeep Parhar 844dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 845e682d02eSNavdeep Parhar toep->ddp_flags |= DDP_SC_REQ; 846f81cb396SNavdeep Parhar t4_set_tcb_field(sc, toep, 1, W_TCB_RX_DDP_FLAGS, 847e682d02eSNavdeep Parhar V_TF_DDP_OFF(1) | V_TF_DDP_INDICATE_OUT(1) | 848e682d02eSNavdeep Parhar V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1) | 849e682d02eSNavdeep Parhar V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_BUF1_VALID(1), 850e682d02eSNavdeep Parhar V_TF_DDP_BUF0_INDICATE(1) | V_TF_DDP_BUF1_INDICATE(1)); 851f81cb396SNavdeep Parhar t4_set_tcb_field(sc, toep, 1, W_TCB_T_FLAGS, 852e682d02eSNavdeep Parhar V_TF_RCV_COALESCE_ENABLE(1), 0); 853e682d02eSNavdeep Parhar } 854e682d02eSNavdeep Parhar 855e682d02eSNavdeep Parhar static int 856e682d02eSNavdeep Parhar calculate_hcf(int n1, int n2) 857e682d02eSNavdeep Parhar { 858e682d02eSNavdeep Parhar int a, b, t; 859e682d02eSNavdeep Parhar 860e682d02eSNavdeep Parhar if (n1 <= n2) { 861e682d02eSNavdeep Parhar a = n1; 862e682d02eSNavdeep Parhar b = n2; 863e682d02eSNavdeep Parhar } else { 864e682d02eSNavdeep Parhar a = n2; 865e682d02eSNavdeep Parhar b = n1; 866e682d02eSNavdeep Parhar } 867e682d02eSNavdeep Parhar 868e682d02eSNavdeep Parhar while (a != 0) { 869e682d02eSNavdeep Parhar t = a; 870e682d02eSNavdeep Parhar a = b % a; 871e682d02eSNavdeep Parhar b = t; 872e682d02eSNavdeep Parhar } 873e682d02eSNavdeep Parhar 874e682d02eSNavdeep Parhar return (b); 875e682d02eSNavdeep Parhar } 876e682d02eSNavdeep Parhar 877dc964385SJohn Baldwin static int 878dc964385SJohn Baldwin alloc_page_pods(struct tom_data *td, struct pageset *ps) 879e682d02eSNavdeep Parhar { 880e682d02eSNavdeep Parhar int i, hcf, seglen, idx, ppod, nppods; 881dc964385SJohn Baldwin u_int ppod_addr; 882dc964385SJohn Baldwin 883dc964385SJohn Baldwin KASSERT(ps->nppods == 0, ("%s: page pods already allocated", __func__)); 884e682d02eSNavdeep Parhar 885e682d02eSNavdeep Parhar /* 886e682d02eSNavdeep Parhar * The DDP page size is unrelated to the VM page size. We combine 887e682d02eSNavdeep Parhar * contiguous physical pages into larger segments to get the best DDP 888e682d02eSNavdeep Parhar * page size possible. This is the largest of the four sizes in 889e682d02eSNavdeep Parhar * A_ULP_RX_TDDP_PSZ that evenly divides the HCF of the segment sizes in 890e682d02eSNavdeep Parhar * the page list. 891e682d02eSNavdeep Parhar */ 892e682d02eSNavdeep Parhar hcf = 0; 893dc964385SJohn Baldwin for (i = 0; i < ps->npages; i++) { 894e682d02eSNavdeep Parhar seglen = PAGE_SIZE; 895dc964385SJohn Baldwin while (i < ps->npages - 1 && 896dc964385SJohn Baldwin ps->pages[i]->phys_addr + PAGE_SIZE == 897dc964385SJohn Baldwin ps->pages[i + 1]->phys_addr) { 898e682d02eSNavdeep Parhar seglen += PAGE_SIZE; 899e682d02eSNavdeep Parhar i++; 900e682d02eSNavdeep Parhar } 901e682d02eSNavdeep Parhar 902e682d02eSNavdeep Parhar hcf = calculate_hcf(hcf, seglen); 903e682d02eSNavdeep Parhar if (hcf < t4_ddp_pgsz[1]) { 904e682d02eSNavdeep Parhar idx = 0; 905e682d02eSNavdeep Parhar goto have_pgsz; /* give up, short circuit */ 906e682d02eSNavdeep Parhar } 907e682d02eSNavdeep Parhar } 908e682d02eSNavdeep Parhar 909e682d02eSNavdeep Parhar if (hcf % t4_ddp_pgsz[0] != 0) { 910e682d02eSNavdeep Parhar /* hmmm. This could only happen when PAGE_SIZE < 4K */ 911e682d02eSNavdeep Parhar KASSERT(PAGE_SIZE < 4096, 912e682d02eSNavdeep Parhar ("%s: PAGE_SIZE %d, hcf %d", __func__, PAGE_SIZE, hcf)); 913e682d02eSNavdeep Parhar CTR3(KTR_CXGBE, "%s: PAGE_SIZE %d, hcf %d", 914e682d02eSNavdeep Parhar __func__, PAGE_SIZE, hcf); 915dc964385SJohn Baldwin return (0); 916e682d02eSNavdeep Parhar } 917e682d02eSNavdeep Parhar 91857c60f98SNavdeep Parhar for (idx = nitems(t4_ddp_pgsz) - 1; idx > 0; idx--) { 919e682d02eSNavdeep Parhar if (hcf % t4_ddp_pgsz[idx] == 0) 920e682d02eSNavdeep Parhar break; 921e682d02eSNavdeep Parhar } 922e682d02eSNavdeep Parhar have_pgsz: 923db8bcd1bSNavdeep Parhar MPASS(idx <= M_PPOD_PGSZ); 924e682d02eSNavdeep Parhar 925dc964385SJohn Baldwin nppods = pages_to_nppods(ps->npages, t4_ddp_pgsz[idx]); 926dc964385SJohn Baldwin if (alloc_ppods(td, nppods, &ppod_addr) != 0) { 927dc964385SJohn Baldwin CTR4(KTR_CXGBE, "%s: no pods, nppods %d, npages %d, pgsz %d", 928dc964385SJohn Baldwin __func__, nppods, ps->npages, t4_ddp_pgsz[idx]); 929dc964385SJohn Baldwin return (0); 930e682d02eSNavdeep Parhar } 931e682d02eSNavdeep Parhar 932dc964385SJohn Baldwin ppod = (ppod_addr - td->ppod_start) / PPOD_SIZE; 933dc964385SJohn Baldwin ps->tag = V_PPOD_PGSZ(idx) | V_PPOD_TAG(ppod); 934dc964385SJohn Baldwin ps->ppod_addr = ppod_addr; 935dc964385SJohn Baldwin ps->nppods = nppods; 936e682d02eSNavdeep Parhar 937dc964385SJohn Baldwin CTR5(KTR_CXGBE, "New page pods. " 938dc964385SJohn Baldwin "ps %p, ddp_pgsz %d, ppod 0x%x, npages %d, nppods %d", 939dc964385SJohn Baldwin ps, t4_ddp_pgsz[idx], ppod, ps->npages, ps->nppods); 940e682d02eSNavdeep Parhar 941dc964385SJohn Baldwin return (1); 942e682d02eSNavdeep Parhar } 943e682d02eSNavdeep Parhar 944e682d02eSNavdeep Parhar #define NUM_ULP_TX_SC_IMM_PPODS (256 / PPOD_SIZE) 945e682d02eSNavdeep Parhar 946e682d02eSNavdeep Parhar static int 947dc964385SJohn Baldwin write_page_pods(struct adapter *sc, struct toepcb *toep, struct pageset *ps) 948e682d02eSNavdeep Parhar { 949e682d02eSNavdeep Parhar struct wrqe *wr; 950e682d02eSNavdeep Parhar struct ulp_mem_io *ulpmc; 951e682d02eSNavdeep Parhar struct ulptx_idata *ulpsc; 952e682d02eSNavdeep Parhar struct pagepod *ppod; 953db8bcd1bSNavdeep Parhar int i, j, k, n, chunk, len, ddp_pgsz, idx; 954db8bcd1bSNavdeep Parhar u_int ppod_addr; 95588c4ff7bSNavdeep Parhar uint32_t cmd; 956e682d02eSNavdeep Parhar 957dc964385SJohn Baldwin KASSERT(!(ps->flags & PS_PPODS_WRITTEN), 958dc964385SJohn Baldwin ("%s: page pods already written", __func__)); 959dc964385SJohn Baldwin 96088c4ff7bSNavdeep Parhar cmd = htobe32(V_ULPTX_CMD(ULP_TX_MEM_WRITE)); 96188c4ff7bSNavdeep Parhar if (is_t4(sc)) 96288c4ff7bSNavdeep Parhar cmd |= htobe32(F_ULP_MEMIO_ORDER); 96388c4ff7bSNavdeep Parhar else 96488c4ff7bSNavdeep Parhar cmd |= htobe32(F_T5_ULP_MEMIO_IMM); 965dc964385SJohn Baldwin ddp_pgsz = t4_ddp_pgsz[G_PPOD_PGSZ(ps->tag)]; 966dc964385SJohn Baldwin ppod_addr = ps->ppod_addr; 967dc964385SJohn Baldwin for (i = 0; i < ps->nppods; ppod_addr += chunk) { 968e682d02eSNavdeep Parhar 969e682d02eSNavdeep Parhar /* How many page pods are we writing in this cycle */ 970dc964385SJohn Baldwin n = min(ps->nppods - i, NUM_ULP_TX_SC_IMM_PPODS); 971e682d02eSNavdeep Parhar chunk = PPOD_SZ(n); 972d14b0ac1SNavdeep Parhar len = roundup2(sizeof(*ulpmc) + sizeof(*ulpsc) + chunk, 16); 973e682d02eSNavdeep Parhar 974e682d02eSNavdeep Parhar wr = alloc_wrqe(len, toep->ctrlq); 975e682d02eSNavdeep Parhar if (wr == NULL) 976e682d02eSNavdeep Parhar return (ENOMEM); /* ok to just bail out */ 977e682d02eSNavdeep Parhar ulpmc = wrtod(wr); 978e682d02eSNavdeep Parhar 979e682d02eSNavdeep Parhar INIT_ULPTX_WR(ulpmc, len, 0, 0); 98088c4ff7bSNavdeep Parhar ulpmc->cmd = cmd; 981e682d02eSNavdeep Parhar ulpmc->dlen = htobe32(V_ULP_MEMIO_DATA_LEN(chunk / 32)); 982e682d02eSNavdeep Parhar ulpmc->len16 = htobe32(howmany(len - sizeof(ulpmc->wr), 16)); 983e682d02eSNavdeep Parhar ulpmc->lock_addr = htobe32(V_ULP_MEMIO_ADDR(ppod_addr >> 5)); 984e682d02eSNavdeep Parhar 985e682d02eSNavdeep Parhar ulpsc = (struct ulptx_idata *)(ulpmc + 1); 986e682d02eSNavdeep Parhar ulpsc->cmd_more = htobe32(V_ULPTX_CMD(ULP_TX_SC_IMM)); 987e682d02eSNavdeep Parhar ulpsc->len = htobe32(chunk); 988e682d02eSNavdeep Parhar 989e682d02eSNavdeep Parhar ppod = (struct pagepod *)(ulpsc + 1); 990e682d02eSNavdeep Parhar for (j = 0; j < n; i++, j++, ppod++) { 991e682d02eSNavdeep Parhar ppod->vld_tid_pgsz_tag_color = htobe64(F_PPOD_VALID | 992dc964385SJohn Baldwin V_PPOD_TID(toep->tid) | ps->tag); 993dc964385SJohn Baldwin ppod->len_offset = htobe64(V_PPOD_LEN(ps->len) | 994dc964385SJohn Baldwin V_PPOD_OFST(ps->offset)); 995e682d02eSNavdeep Parhar ppod->rsvd = 0; 996e682d02eSNavdeep Parhar idx = i * PPOD_PAGES * (ddp_pgsz / PAGE_SIZE); 99757c60f98SNavdeep Parhar for (k = 0; k < nitems(ppod->addr); k++) { 998dc964385SJohn Baldwin if (idx < ps->npages) { 999e682d02eSNavdeep Parhar ppod->addr[k] = 1000dc964385SJohn Baldwin htobe64(ps->pages[idx]->phys_addr); 1001e682d02eSNavdeep Parhar idx += ddp_pgsz / PAGE_SIZE; 1002e682d02eSNavdeep Parhar } else 1003e682d02eSNavdeep Parhar ppod->addr[k] = 0; 1004e682d02eSNavdeep Parhar #if 0 1005e682d02eSNavdeep Parhar CTR5(KTR_CXGBE, 1006e682d02eSNavdeep Parhar "%s: tid %d ppod[%d]->addr[%d] = %p", 1007e682d02eSNavdeep Parhar __func__, toep->tid, i, k, 1008e682d02eSNavdeep Parhar htobe64(ppod->addr[k])); 1009e682d02eSNavdeep Parhar #endif 1010e682d02eSNavdeep Parhar } 1011e682d02eSNavdeep Parhar 1012e682d02eSNavdeep Parhar } 1013e682d02eSNavdeep Parhar 1014e682d02eSNavdeep Parhar t4_wrq_tx(sc, wr); 1015e682d02eSNavdeep Parhar } 1016dc964385SJohn Baldwin ps->flags |= PS_PPODS_WRITTEN; 1017e682d02eSNavdeep Parhar 1018e682d02eSNavdeep Parhar return (0); 1019e682d02eSNavdeep Parhar } 1020e682d02eSNavdeep Parhar 1021e682d02eSNavdeep Parhar static void 1022dc964385SJohn Baldwin wire_pageset(struct pageset *ps) 1023e682d02eSNavdeep Parhar { 1024e682d02eSNavdeep Parhar vm_page_t p; 1025dc964385SJohn Baldwin int i; 1026e682d02eSNavdeep Parhar 1027dc964385SJohn Baldwin KASSERT(!(ps->flags & PS_WIRED), ("pageset already wired")); 1028dc964385SJohn Baldwin 1029dc964385SJohn Baldwin for (i = 0; i < ps->npages; i++) { 1030dc964385SJohn Baldwin p = ps->pages[i]; 1031e682d02eSNavdeep Parhar vm_page_lock(p); 1032e682d02eSNavdeep Parhar vm_page_wire(p); 1033e682d02eSNavdeep Parhar vm_page_unhold(p); 1034e682d02eSNavdeep Parhar vm_page_unlock(p); 1035e682d02eSNavdeep Parhar } 1036dc964385SJohn Baldwin ps->flags |= PS_WIRED; 1037e682d02eSNavdeep Parhar } 1038e682d02eSNavdeep Parhar 1039dc964385SJohn Baldwin /* 1040dc964385SJohn Baldwin * Prepare a pageset for DDP. This wires the pageset and sets up page 1041dc964385SJohn Baldwin * pods. 1042dc964385SJohn Baldwin */ 1043e682d02eSNavdeep Parhar static int 1044dc964385SJohn Baldwin prep_pageset(struct adapter *sc, struct toepcb *toep, struct pageset *ps) 1045e682d02eSNavdeep Parhar { 1046dc964385SJohn Baldwin struct tom_data *td = sc->tom_softc; 1047e682d02eSNavdeep Parhar 1048dc964385SJohn Baldwin if (!(ps->flags & PS_WIRED)) 1049dc964385SJohn Baldwin wire_pageset(ps); 1050dc964385SJohn Baldwin if (ps->nppods == 0 && !alloc_page_pods(td, ps)) { 1051e682d02eSNavdeep Parhar return (0); 1052e682d02eSNavdeep Parhar } 1053dc964385SJohn Baldwin if (!(ps->flags & PS_PPODS_WRITTEN) && 1054dc964385SJohn Baldwin write_page_pods(sc, toep, ps) != 0) { 1055dc964385SJohn Baldwin return (0); 1056dc964385SJohn Baldwin } 1057dc964385SJohn Baldwin 1058dc964385SJohn Baldwin return (1); 1059dc964385SJohn Baldwin } 1060e682d02eSNavdeep Parhar 1061e682d02eSNavdeep Parhar void 1062e682d02eSNavdeep Parhar t4_init_ddp(struct adapter *sc, struct tom_data *td) 1063e682d02eSNavdeep Parhar { 1064e682d02eSNavdeep Parhar 1065db8bcd1bSNavdeep Parhar td->ppod_start = sc->vres.ddp.start; 1066db8bcd1bSNavdeep Parhar td->ppod_arena = vmem_create("DDP page pods", sc->vres.ddp.start, 1067db8bcd1bSNavdeep Parhar sc->vres.ddp.size, 1, 32, M_FIRSTFIT | M_NOWAIT); 1068e682d02eSNavdeep Parhar 1069e682d02eSNavdeep Parhar t4_register_cpl_handler(sc, CPL_RX_DATA_DDP, do_rx_data_ddp); 1070e682d02eSNavdeep Parhar t4_register_cpl_handler(sc, CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 1071e682d02eSNavdeep Parhar } 1072e682d02eSNavdeep Parhar 1073e682d02eSNavdeep Parhar void 1074e682d02eSNavdeep Parhar t4_uninit_ddp(struct adapter *sc __unused, struct tom_data *td) 1075e682d02eSNavdeep Parhar { 1076e682d02eSNavdeep Parhar 1077f8c47908SNavdeep Parhar if (td->ppod_arena != NULL) { 1078f8c47908SNavdeep Parhar vmem_destroy(td->ppod_arena); 1079f8c47908SNavdeep Parhar td->ppod_arena = NULL; 1080f8c47908SNavdeep Parhar } 1081e682d02eSNavdeep Parhar } 1082e682d02eSNavdeep Parhar 1083e682d02eSNavdeep Parhar static int 1084dc964385SJohn Baldwin pscmp(struct pageset *ps, struct vmspace *vm, vm_offset_t start, int npages, 1085dc964385SJohn Baldwin int pgoff, int len) 1086e682d02eSNavdeep Parhar { 1087e682d02eSNavdeep Parhar 1088dc964385SJohn Baldwin if (ps->npages != npages || ps->offset != pgoff || ps->len != len) 1089dc964385SJohn Baldwin return (1); 1090dc964385SJohn Baldwin 1091dc964385SJohn Baldwin return (ps->vm != vm || ps->vm_timestamp != vm->vm_map.timestamp); 1092e682d02eSNavdeep Parhar } 1093e682d02eSNavdeep Parhar 1094dc964385SJohn Baldwin static int 1095dc964385SJohn Baldwin hold_aio(struct toepcb *toep, struct kaiocb *job, struct pageset **pps) 1096688dba74SNavdeep Parhar { 1097dc964385SJohn Baldwin struct vmspace *vm; 1098dc964385SJohn Baldwin vm_map_t map; 1099dc964385SJohn Baldwin vm_offset_t start, end, pgoff; 1100dc964385SJohn Baldwin struct pageset *ps; 1101dc964385SJohn Baldwin int n; 1102688dba74SNavdeep Parhar 1103dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 1104688dba74SNavdeep Parhar 1105dc964385SJohn Baldwin /* 1106dc964385SJohn Baldwin * The AIO subsystem will cancel and drain all requests before 1107dc964385SJohn Baldwin * permitting a process to exit or exec, so p_vmspace should 1108dc964385SJohn Baldwin * be stable here. 1109dc964385SJohn Baldwin */ 1110dc964385SJohn Baldwin vm = job->userproc->p_vmspace; 1111dc964385SJohn Baldwin map = &vm->vm_map; 1112dc964385SJohn Baldwin start = (uintptr_t)job->uaiocb.aio_buf; 1113dc964385SJohn Baldwin pgoff = start & PAGE_MASK; 1114dc964385SJohn Baldwin end = round_page(start + job->uaiocb.aio_nbytes); 1115dc964385SJohn Baldwin start = trunc_page(start); 1116dc964385SJohn Baldwin 1117dc964385SJohn Baldwin if (end - start > MAX_DDP_BUFFER_SIZE) { 1118dc964385SJohn Baldwin /* 1119dc964385SJohn Baldwin * Truncate the request to a short read. 1120dc964385SJohn Baldwin * Alternatively, we could DDP in chunks to the larger 1121dc964385SJohn Baldwin * buffer, but that would be quite a bit more work. 1122dc964385SJohn Baldwin * 1123dc964385SJohn Baldwin * When truncating, round the request down to avoid 1124dc964385SJohn Baldwin * crossing a cache line on the final transaction. 1125dc964385SJohn Baldwin */ 1126dc964385SJohn Baldwin end = rounddown2(start + MAX_DDP_BUFFER_SIZE, CACHE_LINE_SIZE); 1127dc964385SJohn Baldwin #ifdef VERBOSE_TRACES 1128dc964385SJohn Baldwin CTR4(KTR_CXGBE, "%s: tid %d, truncating size from %lu to %lu", 1129dc964385SJohn Baldwin __func__, toep->tid, (unsigned long)job->uaiocb.aio_nbytes, 1130dc964385SJohn Baldwin (unsigned long)(end - (start + pgoff))); 1131dc964385SJohn Baldwin job->uaiocb.aio_nbytes = end - (start + pgoff); 1132dc964385SJohn Baldwin #endif 1133dc964385SJohn Baldwin end = round_page(end); 1134688dba74SNavdeep Parhar } 1135688dba74SNavdeep Parhar 1136dc964385SJohn Baldwin n = atop(end - start); 1137688dba74SNavdeep Parhar 1138dc964385SJohn Baldwin /* 1139dc964385SJohn Baldwin * Try to reuse a cached pageset. 1140dc964385SJohn Baldwin */ 1141dc964385SJohn Baldwin TAILQ_FOREACH(ps, &toep->ddp_cached_pagesets, link) { 1142dc964385SJohn Baldwin if (pscmp(ps, vm, start, n, pgoff, 1143dc964385SJohn Baldwin job->uaiocb.aio_nbytes) == 0) { 1144dc964385SJohn Baldwin TAILQ_REMOVE(&toep->ddp_cached_pagesets, ps, link); 1145dc964385SJohn Baldwin toep->ddp_cached_count--; 1146dc964385SJohn Baldwin *pps = ps; 1147dc964385SJohn Baldwin return (0); 1148dc964385SJohn Baldwin } 1149688dba74SNavdeep Parhar } 1150688dba74SNavdeep Parhar 1151e682d02eSNavdeep Parhar /* 1152dc964385SJohn Baldwin * If there are too many cached pagesets to create a new one, 1153dc964385SJohn Baldwin * free a pageset before creating a new one. 1154e682d02eSNavdeep Parhar */ 1155dc964385SJohn Baldwin KASSERT(toep->ddp_active_count + toep->ddp_cached_count <= 1156dc964385SJohn Baldwin nitems(toep->db), ("%s: too many wired pagesets", __func__)); 1157dc964385SJohn Baldwin if (toep->ddp_active_count + toep->ddp_cached_count == 1158dc964385SJohn Baldwin nitems(toep->db)) { 1159dc964385SJohn Baldwin KASSERT(toep->ddp_cached_count > 0, 1160dc964385SJohn Baldwin ("no cached pageset to free")); 1161dc964385SJohn Baldwin ps = TAILQ_LAST(&toep->ddp_cached_pagesets, pagesetq); 1162dc964385SJohn Baldwin TAILQ_REMOVE(&toep->ddp_cached_pagesets, ps, link); 1163dc964385SJohn Baldwin toep->ddp_cached_count--; 1164dc964385SJohn Baldwin free_pageset(toep->td, ps); 1165dc964385SJohn Baldwin } 1166dc964385SJohn Baldwin DDP_UNLOCK(toep); 1167e682d02eSNavdeep Parhar 1168dc964385SJohn Baldwin /* Create a new pageset. */ 1169dc964385SJohn Baldwin ps = malloc(sizeof(*ps) + n * sizeof(vm_page_t), M_CXGBE, M_WAITOK | 1170dc964385SJohn Baldwin M_ZERO); 1171dc964385SJohn Baldwin ps->pages = (vm_page_t *)(ps + 1); 1172dc964385SJohn Baldwin ps->vm_timestamp = map->timestamp; 1173dc964385SJohn Baldwin ps->npages = vm_fault_quick_hold_pages(map, start, end - start, 1174dc964385SJohn Baldwin VM_PROT_WRITE, ps->pages, n); 1175e682d02eSNavdeep Parhar 1176dc964385SJohn Baldwin DDP_LOCK(toep); 1177dc964385SJohn Baldwin if (ps->npages < 0) { 1178dc964385SJohn Baldwin free(ps, M_CXGBE); 1179dc964385SJohn Baldwin return (EFAULT); 1180e682d02eSNavdeep Parhar } 1181e682d02eSNavdeep Parhar 1182dc964385SJohn Baldwin KASSERT(ps->npages == n, ("hold_aio: page count mismatch: %d vs %d", 1183dc964385SJohn Baldwin ps->npages, n)); 1184dc964385SJohn Baldwin 1185dc964385SJohn Baldwin ps->offset = pgoff; 1186dc964385SJohn Baldwin ps->len = job->uaiocb.aio_nbytes; 1187dc964385SJohn Baldwin atomic_add_int(&vm->vm_refcnt, 1); 1188dc964385SJohn Baldwin ps->vm = vm; 1189dc964385SJohn Baldwin 1190dc964385SJohn Baldwin CTR5(KTR_CXGBE, "%s: tid %d, new pageset %p for job %p, npages %d", 1191dc964385SJohn Baldwin __func__, toep->tid, ps, job, ps->npages); 1192dc964385SJohn Baldwin *pps = ps; 1193e682d02eSNavdeep Parhar return (0); 1194e682d02eSNavdeep Parhar } 1195e682d02eSNavdeep Parhar 1196dc964385SJohn Baldwin static void 1197dc964385SJohn Baldwin ddp_complete_all(struct toepcb *toep, int error) 1198e682d02eSNavdeep Parhar { 1199dc964385SJohn Baldwin struct kaiocb *job; 1200e682d02eSNavdeep Parhar 1201dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 1202dc964385SJohn Baldwin while (!TAILQ_EMPTY(&toep->ddp_aiojobq)) { 1203dc964385SJohn Baldwin job = TAILQ_FIRST(&toep->ddp_aiojobq); 1204dc964385SJohn Baldwin TAILQ_REMOVE(&toep->ddp_aiojobq, job, list); 1205dc964385SJohn Baldwin toep->ddp_waiting_count--; 1206dc964385SJohn Baldwin if (aio_clear_cancel_function(job)) 1207dc964385SJohn Baldwin ddp_complete_one(job, error); 1208dc964385SJohn Baldwin } 1209dc964385SJohn Baldwin } 1210dc964385SJohn Baldwin 1211dc964385SJohn Baldwin static void 1212dc964385SJohn Baldwin aio_ddp_cancel_one(struct kaiocb *job) 1213dc964385SJohn Baldwin { 1214dc964385SJohn Baldwin long copied; 1215dc964385SJohn Baldwin 1216dc964385SJohn Baldwin /* 1217dc964385SJohn Baldwin * If this job had copied data out of the socket buffer before 1218dc964385SJohn Baldwin * it was cancelled, report it as a short read rather than an 1219dc964385SJohn Baldwin * error. 1220dc964385SJohn Baldwin */ 1221*fe0bdd1dSJohn Baldwin copied = job->aio_received; 1222dc964385SJohn Baldwin if (copied != 0) 1223dc964385SJohn Baldwin aio_complete(job, copied, 0); 1224e682d02eSNavdeep Parhar else 1225dc964385SJohn Baldwin aio_cancel(job); 1226e682d02eSNavdeep Parhar } 1227e682d02eSNavdeep Parhar 1228dc964385SJohn Baldwin /* 1229dc964385SJohn Baldwin * Called when the main loop wants to requeue a job to retry it later. 1230dc964385SJohn Baldwin * Deals with the race of the job being cancelled while it was being 1231dc964385SJohn Baldwin * examined. 1232dc964385SJohn Baldwin */ 1233dc964385SJohn Baldwin static void 1234dc964385SJohn Baldwin aio_ddp_requeue_one(struct toepcb *toep, struct kaiocb *job) 1235dc964385SJohn Baldwin { 1236dc964385SJohn Baldwin 1237dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 1238dc964385SJohn Baldwin if (!(toep->ddp_flags & DDP_DEAD) && 1239dc964385SJohn Baldwin aio_set_cancel_function(job, t4_aio_cancel_queued)) { 1240dc964385SJohn Baldwin TAILQ_INSERT_HEAD(&toep->ddp_aiojobq, job, list); 1241dc964385SJohn Baldwin toep->ddp_waiting_count++; 1242dc964385SJohn Baldwin } else 1243dc964385SJohn Baldwin aio_ddp_cancel_one(job); 1244e682d02eSNavdeep Parhar } 1245e682d02eSNavdeep Parhar 1246dc964385SJohn Baldwin static void 1247dc964385SJohn Baldwin aio_ddp_requeue(struct toepcb *toep) 1248dc964385SJohn Baldwin { 1249dc964385SJohn Baldwin struct adapter *sc = td_adapter(toep->td); 1250dc964385SJohn Baldwin struct socket *so; 1251dc964385SJohn Baldwin struct sockbuf *sb; 1252dc964385SJohn Baldwin struct inpcb *inp; 1253dc964385SJohn Baldwin struct kaiocb *job; 1254dc964385SJohn Baldwin struct ddp_buffer *db; 1255dc964385SJohn Baldwin size_t copied, offset, resid; 1256dc964385SJohn Baldwin struct pageset *ps; 1257dc964385SJohn Baldwin struct mbuf *m; 1258dc964385SJohn Baldwin uint64_t ddp_flags, ddp_flags_mask; 1259dc964385SJohn Baldwin struct wrqe *wr; 1260dc964385SJohn Baldwin int buf_flag, db_idx, error; 1261dc964385SJohn Baldwin 1262dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 1263dc964385SJohn Baldwin 1264e682d02eSNavdeep Parhar restart: 1265dc964385SJohn Baldwin if (toep->ddp_flags & DDP_DEAD) { 1266dc964385SJohn Baldwin MPASS(toep->ddp_waiting_count == 0); 1267dc964385SJohn Baldwin MPASS(toep->ddp_active_count == 0); 1268dc964385SJohn Baldwin return; 1269e682d02eSNavdeep Parhar } 1270e682d02eSNavdeep Parhar 1271dc964385SJohn Baldwin if (toep->ddp_waiting_count == 0 || 1272dc964385SJohn Baldwin toep->ddp_active_count == nitems(toep->db)) { 1273dc964385SJohn Baldwin return; 1274dc964385SJohn Baldwin } 1275dc964385SJohn Baldwin 1276dc964385SJohn Baldwin job = TAILQ_FIRST(&toep->ddp_aiojobq); 1277dc964385SJohn Baldwin so = job->fd_file->f_data; 1278dc964385SJohn Baldwin sb = &so->so_rcv; 1279dc964385SJohn Baldwin SOCKBUF_LOCK(sb); 1280dc964385SJohn Baldwin 1281dc964385SJohn Baldwin /* We will never get anything unless we are or were connected. */ 1282dc964385SJohn Baldwin if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { 1283dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1284dc964385SJohn Baldwin ddp_complete_all(toep, ENOTCONN); 1285dc964385SJohn Baldwin return; 1286dc964385SJohn Baldwin } 1287dc964385SJohn Baldwin 1288dc964385SJohn Baldwin KASSERT(toep->ddp_active_count == 0 || sbavail(sb) == 0, 1289dc964385SJohn Baldwin ("%s: pending sockbuf data and DDP is active", __func__)); 1290dc964385SJohn Baldwin 1291e682d02eSNavdeep Parhar /* Abort if socket has reported problems. */ 1292dc964385SJohn Baldwin /* XXX: Wait for any queued DDP's to finish and/or flush them? */ 1293dc964385SJohn Baldwin if (so->so_error && sbavail(sb) == 0) { 1294dc964385SJohn Baldwin toep->ddp_waiting_count--; 1295dc964385SJohn Baldwin TAILQ_REMOVE(&toep->ddp_aiojobq, job, list); 1296dc964385SJohn Baldwin if (!aio_clear_cancel_function(job)) { 1297dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1298dc964385SJohn Baldwin goto restart; 1299dc964385SJohn Baldwin } 1300dc964385SJohn Baldwin 1301dc964385SJohn Baldwin /* 1302dc964385SJohn Baldwin * If this job has previously copied some data, report 1303dc964385SJohn Baldwin * a short read and leave the error to be reported by 1304dc964385SJohn Baldwin * a future request. 1305dc964385SJohn Baldwin */ 1306*fe0bdd1dSJohn Baldwin copied = job->aio_received; 1307dc964385SJohn Baldwin if (copied != 0) { 1308dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1309dc964385SJohn Baldwin aio_complete(job, copied, 0); 1310dc964385SJohn Baldwin goto restart; 1311dc964385SJohn Baldwin } 1312e682d02eSNavdeep Parhar error = so->so_error; 1313e682d02eSNavdeep Parhar so->so_error = 0; 1314dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1315dc964385SJohn Baldwin aio_complete(job, -1, error); 1316dc964385SJohn Baldwin goto restart; 1317e682d02eSNavdeep Parhar } 1318e682d02eSNavdeep Parhar 1319e682d02eSNavdeep Parhar /* 1320dc964385SJohn Baldwin * Door is closed. If there is pending data in the socket buffer, 1321dc964385SJohn Baldwin * deliver it. If there are pending DDP requests, wait for those 1322dc964385SJohn Baldwin * to complete. Once they have completed, return EOF reads. 1323e682d02eSNavdeep Parhar */ 1324dc964385SJohn Baldwin if (sb->sb_state & SBS_CANTRCVMORE && sbavail(sb) == 0) { 1325dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1326dc964385SJohn Baldwin if (toep->ddp_active_count != 0) 1327dc964385SJohn Baldwin return; 1328dc964385SJohn Baldwin ddp_complete_all(toep, 0); 1329dc964385SJohn Baldwin return; 1330e682d02eSNavdeep Parhar } 1331dc964385SJohn Baldwin 1332dc964385SJohn Baldwin /* 1333dc964385SJohn Baldwin * If DDP is not enabled and there is no pending socket buffer 1334dc964385SJohn Baldwin * data, try to enable DDP. 1335dc964385SJohn Baldwin */ 1336dc964385SJohn Baldwin if (sbavail(sb) == 0 && (toep->ddp_flags & DDP_ON) == 0) { 1337dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1338dc964385SJohn Baldwin 1339dc964385SJohn Baldwin /* 1340dc964385SJohn Baldwin * Wait for the card to ACK that DDP is enabled before 1341dc964385SJohn Baldwin * queueing any buffers. Currently this waits for an 1342dc964385SJohn Baldwin * indicate to arrive. This could use a TCB_SET_FIELD_RPL 1343dc964385SJohn Baldwin * message to know that DDP was enabled instead of waiting 1344dc964385SJohn Baldwin * for the indicate which would avoid copying the indicate 1345dc964385SJohn Baldwin * if no data is pending. 1346dc964385SJohn Baldwin * 1347dc964385SJohn Baldwin * XXX: Might want to limit the indicate size to the size 1348dc964385SJohn Baldwin * of the first queued request. 1349dc964385SJohn Baldwin */ 1350dc964385SJohn Baldwin if ((toep->ddp_flags & DDP_SC_REQ) == 0) 1351dc964385SJohn Baldwin enable_ddp(sc, toep); 1352dc964385SJohn Baldwin return; 1353e682d02eSNavdeep Parhar } 1354dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1355dc964385SJohn Baldwin 1356dc964385SJohn Baldwin /* 1357dc964385SJohn Baldwin * If another thread is queueing a buffer for DDP, let it 1358dc964385SJohn Baldwin * drain any work and return. 1359dc964385SJohn Baldwin */ 1360dc964385SJohn Baldwin if (toep->ddp_queueing != NULL) 1361dc964385SJohn Baldwin return; 1362dc964385SJohn Baldwin 1363dc964385SJohn Baldwin /* Take the next job to prep it for DDP. */ 1364dc964385SJohn Baldwin toep->ddp_waiting_count--; 1365dc964385SJohn Baldwin TAILQ_REMOVE(&toep->ddp_aiojobq, job, list); 1366dc964385SJohn Baldwin if (!aio_clear_cancel_function(job)) 1367e682d02eSNavdeep Parhar goto restart; 1368dc964385SJohn Baldwin toep->ddp_queueing = job; 1369e682d02eSNavdeep Parhar 1370dc964385SJohn Baldwin /* NB: This drops DDP_LOCK while it holds the backing VM pages. */ 1371dc964385SJohn Baldwin error = hold_aio(toep, job, &ps); 1372dc964385SJohn Baldwin if (error != 0) { 1373dc964385SJohn Baldwin ddp_complete_one(job, error); 1374dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1375e682d02eSNavdeep Parhar goto restart; 1376dc964385SJohn Baldwin } 1377e682d02eSNavdeep Parhar 1378dc964385SJohn Baldwin SOCKBUF_LOCK(sb); 1379dc964385SJohn Baldwin if (so->so_error && sbavail(sb) == 0) { 1380*fe0bdd1dSJohn Baldwin copied = job->aio_received; 1381dc964385SJohn Baldwin if (copied != 0) { 1382dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1383dc964385SJohn Baldwin recycle_pageset(toep, ps); 1384dc964385SJohn Baldwin aio_complete(job, copied, 0); 1385dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1386dc964385SJohn Baldwin goto restart; 1387dc964385SJohn Baldwin } 1388e682d02eSNavdeep Parhar 1389dc964385SJohn Baldwin error = so->so_error; 1390dc964385SJohn Baldwin so->so_error = 0; 1391dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1392dc964385SJohn Baldwin recycle_pageset(toep, ps); 1393dc964385SJohn Baldwin aio_complete(job, -1, error); 1394dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1395dc964385SJohn Baldwin goto restart; 1396e682d02eSNavdeep Parhar } 1397e682d02eSNavdeep Parhar 1398dc964385SJohn Baldwin if (sb->sb_state & SBS_CANTRCVMORE && sbavail(sb) == 0) { 1399dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1400dc964385SJohn Baldwin recycle_pageset(toep, ps); 1401dc964385SJohn Baldwin if (toep->ddp_active_count != 0) { 1402dc964385SJohn Baldwin /* 1403dc964385SJohn Baldwin * The door is closed, but there are still pending 1404dc964385SJohn Baldwin * DDP buffers. Requeue. These jobs will all be 1405dc964385SJohn Baldwin * completed once those buffers drain. 1406dc964385SJohn Baldwin */ 1407dc964385SJohn Baldwin aio_ddp_requeue_one(toep, job); 1408dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1409dc964385SJohn Baldwin return; 1410e682d02eSNavdeep Parhar } 1411dc964385SJohn Baldwin ddp_complete_one(job, 0); 1412dc964385SJohn Baldwin ddp_complete_all(toep, 0); 1413dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1414dc964385SJohn Baldwin return; 1415e682d02eSNavdeep Parhar } 1416dc964385SJohn Baldwin 1417dc964385SJohn Baldwin sbcopy: 1418dc964385SJohn Baldwin /* 1419dc964385SJohn Baldwin * If the toep is dead, there shouldn't be any data in the socket 1420dc964385SJohn Baldwin * buffer, so the above case should have handled this. 1421dc964385SJohn Baldwin */ 1422dc964385SJohn Baldwin MPASS(!(toep->ddp_flags & DDP_DEAD)); 1423dc964385SJohn Baldwin 1424dc964385SJohn Baldwin /* 1425dc964385SJohn Baldwin * If there is pending data in the socket buffer (either 1426dc964385SJohn Baldwin * from before the requests were queued or a DDP indicate), 1427dc964385SJohn Baldwin * copy those mbufs out directly. 1428dc964385SJohn Baldwin */ 1429dc964385SJohn Baldwin copied = 0; 1430*fe0bdd1dSJohn Baldwin offset = ps->offset + job->aio_received; 1431*fe0bdd1dSJohn Baldwin MPASS(job->aio_received <= job->uaiocb.aio_nbytes); 1432*fe0bdd1dSJohn Baldwin resid = job->uaiocb.aio_nbytes - job->aio_received; 1433dc964385SJohn Baldwin m = sb->sb_mb; 1434dc964385SJohn Baldwin KASSERT(m == NULL || toep->ddp_active_count == 0, 1435dc964385SJohn Baldwin ("%s: sockbuf data with active DDP", __func__)); 1436dc964385SJohn Baldwin while (m != NULL && resid > 0) { 1437dc964385SJohn Baldwin struct iovec iov[1]; 1438dc964385SJohn Baldwin struct uio uio; 1439dc964385SJohn Baldwin int error; 1440dc964385SJohn Baldwin 1441dc964385SJohn Baldwin iov[0].iov_base = mtod(m, void *); 1442dc964385SJohn Baldwin iov[0].iov_len = m->m_len; 1443dc964385SJohn Baldwin if (iov[0].iov_len > resid) 1444dc964385SJohn Baldwin iov[0].iov_len = resid; 1445dc964385SJohn Baldwin uio.uio_iov = iov; 1446dc964385SJohn Baldwin uio.uio_iovcnt = 1; 1447dc964385SJohn Baldwin uio.uio_offset = 0; 1448dc964385SJohn Baldwin uio.uio_resid = iov[0].iov_len; 1449dc964385SJohn Baldwin uio.uio_segflg = UIO_SYSSPACE; 1450dc964385SJohn Baldwin uio.uio_rw = UIO_WRITE; 1451dc964385SJohn Baldwin error = uiomove_fromphys(ps->pages, offset + copied, 1452dc964385SJohn Baldwin uio.uio_resid, &uio); 1453dc964385SJohn Baldwin MPASS(error == 0 && uio.uio_resid == 0); 1454dc964385SJohn Baldwin copied += uio.uio_offset; 1455dc964385SJohn Baldwin resid -= uio.uio_offset; 1456dc964385SJohn Baldwin m = m->m_next; 1457dc964385SJohn Baldwin } 1458dc964385SJohn Baldwin if (copied != 0) { 1459dc964385SJohn Baldwin sbdrop_locked(sb, copied); 1460*fe0bdd1dSJohn Baldwin job->aio_received += copied; 1461*fe0bdd1dSJohn Baldwin copied = job->aio_received; 1462dc964385SJohn Baldwin inp = sotoinpcb(so); 1463dc964385SJohn Baldwin if (!INP_TRY_WLOCK(inp)) { 1464dc964385SJohn Baldwin /* 1465dc964385SJohn Baldwin * The reference on the socket file descriptor in 1466dc964385SJohn Baldwin * the AIO job should keep 'sb' and 'inp' stable. 1467dc964385SJohn Baldwin * Our caller has a reference on the 'toep' that 1468dc964385SJohn Baldwin * keeps it stable. 1469dc964385SJohn Baldwin */ 1470dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1471dc964385SJohn Baldwin DDP_UNLOCK(toep); 1472dc964385SJohn Baldwin INP_WLOCK(inp); 1473dc964385SJohn Baldwin DDP_LOCK(toep); 1474dc964385SJohn Baldwin SOCKBUF_LOCK(sb); 1475dc964385SJohn Baldwin 1476dc964385SJohn Baldwin /* 1477dc964385SJohn Baldwin * If the socket has been closed, we should detect 1478dc964385SJohn Baldwin * that and complete this request if needed on 1479dc964385SJohn Baldwin * the next trip around the loop. 1480dc964385SJohn Baldwin */ 1481dc964385SJohn Baldwin } 1482dc964385SJohn Baldwin t4_rcvd_locked(&toep->td->tod, intotcpcb(inp)); 1483dc964385SJohn Baldwin INP_WUNLOCK(inp); 1484dc964385SJohn Baldwin if (resid == 0 || toep->ddp_flags & DDP_DEAD) { 1485dc964385SJohn Baldwin /* 1486dc964385SJohn Baldwin * We filled the entire buffer with socket 1487dc964385SJohn Baldwin * data, DDP is not being used, or the socket 1488dc964385SJohn Baldwin * is being shut down, so complete the 1489dc964385SJohn Baldwin * request. 1490dc964385SJohn Baldwin */ 1491dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1492dc964385SJohn Baldwin recycle_pageset(toep, ps); 1493dc964385SJohn Baldwin aio_complete(job, copied, 0); 1494dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1495dc964385SJohn Baldwin goto restart; 1496dc964385SJohn Baldwin } 1497dc964385SJohn Baldwin 1498dc964385SJohn Baldwin /* 1499dc964385SJohn Baldwin * If DDP is not enabled, requeue this request and restart. 1500dc964385SJohn Baldwin * This will either enable DDP or wait for more data to 1501dc964385SJohn Baldwin * arrive on the socket buffer. 1502dc964385SJohn Baldwin */ 1503dc964385SJohn Baldwin if ((toep->ddp_flags & (DDP_ON | DDP_SC_REQ)) != DDP_ON) { 1504dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1505dc964385SJohn Baldwin recycle_pageset(toep, ps); 1506dc964385SJohn Baldwin aio_ddp_requeue_one(toep, job); 1507dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1508dc964385SJohn Baldwin goto restart; 1509dc964385SJohn Baldwin } 1510dc964385SJohn Baldwin 1511dc964385SJohn Baldwin /* 1512dc964385SJohn Baldwin * An indicate might have arrived and been added to 1513dc964385SJohn Baldwin * the socket buffer while it was unlocked after the 1514dc964385SJohn Baldwin * copy to lock the INP. If so, restart the copy. 1515dc964385SJohn Baldwin */ 1516dc964385SJohn Baldwin if (sbavail(sb) != 0) 1517dc964385SJohn Baldwin goto sbcopy; 1518dc964385SJohn Baldwin } 1519dc964385SJohn Baldwin SOCKBUF_UNLOCK(sb); 1520dc964385SJohn Baldwin 1521dc964385SJohn Baldwin if (prep_pageset(sc, toep, ps) == 0) { 1522dc964385SJohn Baldwin recycle_pageset(toep, ps); 1523dc964385SJohn Baldwin aio_ddp_requeue_one(toep, job); 1524dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1525dc964385SJohn Baldwin 1526dc964385SJohn Baldwin /* 1527dc964385SJohn Baldwin * XXX: Need to retry this later. Mostly need a trigger 1528dc964385SJohn Baldwin * when page pods are freed up. 1529dc964385SJohn Baldwin */ 1530dc964385SJohn Baldwin printf("%s: prep_pageset failed\n", __func__); 1531dc964385SJohn Baldwin return; 1532dc964385SJohn Baldwin } 1533dc964385SJohn Baldwin 1534dc964385SJohn Baldwin /* Determine which DDP buffer to use. */ 1535dc964385SJohn Baldwin if (toep->db[0].job == NULL) { 1536dc964385SJohn Baldwin db_idx = 0; 1537e682d02eSNavdeep Parhar } else { 1538dc964385SJohn Baldwin MPASS(toep->db[1].job == NULL); 1539dc964385SJohn Baldwin db_idx = 1; 1540e682d02eSNavdeep Parhar } 1541e682d02eSNavdeep Parhar 1542dc964385SJohn Baldwin ddp_flags = 0; 1543dc964385SJohn Baldwin ddp_flags_mask = 0; 1544dc964385SJohn Baldwin if (db_idx == 0) { 1545dc964385SJohn Baldwin ddp_flags |= V_TF_DDP_BUF0_VALID(1); 1546dc964385SJohn Baldwin if (so->so_state & SS_NBIO) 1547dc964385SJohn Baldwin ddp_flags |= V_TF_DDP_BUF0_FLUSH(1); 1548dc964385SJohn Baldwin ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE0(1) | 1549dc964385SJohn Baldwin V_TF_DDP_PUSH_DISABLE_0(1) | V_TF_DDP_PSHF_ENABLE_0(1) | 1550dc964385SJohn Baldwin V_TF_DDP_BUF0_FLUSH(1) | V_TF_DDP_BUF0_VALID(1); 1551dc964385SJohn Baldwin buf_flag = DDP_BUF0_ACTIVE; 1552dc964385SJohn Baldwin } else { 1553dc964385SJohn Baldwin ddp_flags |= V_TF_DDP_BUF1_VALID(1); 1554dc964385SJohn Baldwin if (so->so_state & SS_NBIO) 1555dc964385SJohn Baldwin ddp_flags |= V_TF_DDP_BUF1_FLUSH(1); 1556dc964385SJohn Baldwin ddp_flags_mask |= V_TF_DDP_PSH_NO_INVALIDATE1(1) | 1557dc964385SJohn Baldwin V_TF_DDP_PUSH_DISABLE_1(1) | V_TF_DDP_PSHF_ENABLE_1(1) | 1558dc964385SJohn Baldwin V_TF_DDP_BUF1_FLUSH(1) | V_TF_DDP_BUF1_VALID(1); 1559dc964385SJohn Baldwin buf_flag = DDP_BUF1_ACTIVE; 1560e682d02eSNavdeep Parhar } 1561dc964385SJohn Baldwin MPASS((toep->ddp_flags & buf_flag) == 0); 1562dc964385SJohn Baldwin if ((toep->ddp_flags & (DDP_BUF0_ACTIVE | DDP_BUF1_ACTIVE)) == 0) { 1563dc964385SJohn Baldwin MPASS(db_idx == 0); 1564dc964385SJohn Baldwin MPASS(toep->ddp_active_id == -1); 1565dc964385SJohn Baldwin MPASS(toep->ddp_active_count == 0); 1566dc964385SJohn Baldwin ddp_flags_mask |= V_TF_DDP_ACTIVE_BUF(1); 1567e682d02eSNavdeep Parhar } 1568e682d02eSNavdeep Parhar 1569e682d02eSNavdeep Parhar /* 1570dc964385SJohn Baldwin * The TID for this connection should still be valid. If DDP_DEAD 1571dc964385SJohn Baldwin * is set, SBS_CANTRCVMORE should be set, so we shouldn't be 1572dc964385SJohn Baldwin * this far anyway. Even if the socket is closing on the other 1573dc964385SJohn Baldwin * end, the AIO job holds a reference on this end of the socket 1574dc964385SJohn Baldwin * which will keep it open and keep the TCP PCB attached until 1575dc964385SJohn Baldwin * after the job is completed. 1576e682d02eSNavdeep Parhar */ 1577*fe0bdd1dSJohn Baldwin wr = mk_update_tcb_for_ddp(sc, toep, db_idx, ps, job->aio_received, 1578*fe0bdd1dSJohn Baldwin ddp_flags, ddp_flags_mask); 1579dc964385SJohn Baldwin if (wr == NULL) { 1580dc964385SJohn Baldwin recycle_pageset(toep, ps); 1581dc964385SJohn Baldwin aio_ddp_requeue_one(toep, job); 1582dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1583dc964385SJohn Baldwin 1584dc964385SJohn Baldwin /* 1585dc964385SJohn Baldwin * XXX: Need a way to kick a retry here. 1586dc964385SJohn Baldwin * 1587dc964385SJohn Baldwin * XXX: We know the fixed size needed and could 1588dc964385SJohn Baldwin * preallocate this using a blocking request at the 1589dc964385SJohn Baldwin * start of the task to avoid having to handle this 1590dc964385SJohn Baldwin * edge case. 1591dc964385SJohn Baldwin */ 1592dc964385SJohn Baldwin printf("%s: mk_update_tcb_for_ddp failed\n", __func__); 1593dc964385SJohn Baldwin return; 1594dc964385SJohn Baldwin } 1595dc964385SJohn Baldwin 1596dc964385SJohn Baldwin if (!aio_set_cancel_function(job, t4_aio_cancel_active)) { 1597dc964385SJohn Baldwin free_wrqe(wr); 1598dc964385SJohn Baldwin recycle_pageset(toep, ps); 1599dc964385SJohn Baldwin aio_ddp_cancel_one(job); 1600dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1601e682d02eSNavdeep Parhar goto restart; 1602e682d02eSNavdeep Parhar } 1603e682d02eSNavdeep Parhar 1604dc964385SJohn Baldwin #ifdef VERBOSE_TRACES 1605dc964385SJohn Baldwin CTR5(KTR_CXGBE, "%s: scheduling %p for DDP[%d] (flags %#lx/%#lx)", 1606dc964385SJohn Baldwin __func__, job, db_idx, ddp_flags, ddp_flags_mask); 1607dc964385SJohn Baldwin #endif 1608dc964385SJohn Baldwin /* Give the chip the go-ahead. */ 1609dc964385SJohn Baldwin t4_wrq_tx(sc, wr); 1610dc964385SJohn Baldwin db = &toep->db[db_idx]; 1611dc964385SJohn Baldwin db->cancel_pending = 0; 1612dc964385SJohn Baldwin db->job = job; 1613dc964385SJohn Baldwin db->ps = ps; 1614dc964385SJohn Baldwin toep->ddp_queueing = NULL; 1615dc964385SJohn Baldwin toep->ddp_flags |= buf_flag; 1616dc964385SJohn Baldwin toep->ddp_active_count++; 1617dc964385SJohn Baldwin if (toep->ddp_active_count == 1) { 1618dc964385SJohn Baldwin MPASS(toep->ddp_active_id == -1); 1619dc964385SJohn Baldwin toep->ddp_active_id = db_idx; 1620dc964385SJohn Baldwin CTR2(KTR_CXGBE, "%s: ddp_active_id = %d", __func__, 1621dc964385SJohn Baldwin toep->ddp_active_id); 1622dc964385SJohn Baldwin } 1623dc964385SJohn Baldwin goto restart; 1624dc964385SJohn Baldwin } 1625dc964385SJohn Baldwin 1626dc964385SJohn Baldwin void 1627dc964385SJohn Baldwin ddp_queue_toep(struct toepcb *toep) 1628dc964385SJohn Baldwin { 1629dc964385SJohn Baldwin 1630dc964385SJohn Baldwin DDP_ASSERT_LOCKED(toep); 1631dc964385SJohn Baldwin if (toep->ddp_flags & DDP_TASK_ACTIVE) 1632dc964385SJohn Baldwin return; 1633dc964385SJohn Baldwin toep->ddp_flags |= DDP_TASK_ACTIVE; 1634dc964385SJohn Baldwin hold_toepcb(toep); 1635dc964385SJohn Baldwin soaio_enqueue(&toep->ddp_requeue_task); 1636dc964385SJohn Baldwin } 1637dc964385SJohn Baldwin 1638dc964385SJohn Baldwin static void 1639dc964385SJohn Baldwin aio_ddp_requeue_task(void *context, int pending) 1640dc964385SJohn Baldwin { 1641dc964385SJohn Baldwin struct toepcb *toep = context; 1642dc964385SJohn Baldwin 1643dc964385SJohn Baldwin DDP_LOCK(toep); 1644dc964385SJohn Baldwin aio_ddp_requeue(toep); 1645dc964385SJohn Baldwin toep->ddp_flags &= ~DDP_TASK_ACTIVE; 1646dc964385SJohn Baldwin DDP_UNLOCK(toep); 1647dc964385SJohn Baldwin 1648dc964385SJohn Baldwin free_toepcb(toep); 1649dc964385SJohn Baldwin } 1650dc964385SJohn Baldwin 1651dc964385SJohn Baldwin static void 1652dc964385SJohn Baldwin t4_aio_cancel_active(struct kaiocb *job) 1653dc964385SJohn Baldwin { 1654dc964385SJohn Baldwin struct socket *so = job->fd_file->f_data; 1655dc964385SJohn Baldwin struct tcpcb *tp = so_sototcpcb(so); 1656dc964385SJohn Baldwin struct toepcb *toep = tp->t_toe; 1657dc964385SJohn Baldwin struct adapter *sc = td_adapter(toep->td); 1658dc964385SJohn Baldwin uint64_t valid_flag; 1659dc964385SJohn Baldwin int i; 1660dc964385SJohn Baldwin 1661dc964385SJohn Baldwin DDP_LOCK(toep); 1662dc964385SJohn Baldwin if (aio_cancel_cleared(job)) { 1663dc964385SJohn Baldwin DDP_UNLOCK(toep); 1664dc964385SJohn Baldwin aio_ddp_cancel_one(job); 1665dc964385SJohn Baldwin return; 1666dc964385SJohn Baldwin } 1667dc964385SJohn Baldwin 1668dc964385SJohn Baldwin for (i = 0; i < nitems(toep->db); i++) { 1669dc964385SJohn Baldwin if (toep->db[i].job == job) { 1670dc964385SJohn Baldwin /* Should only ever get one cancel request for a job. */ 1671dc964385SJohn Baldwin MPASS(toep->db[i].cancel_pending == 0); 1672dc964385SJohn Baldwin 1673dc964385SJohn Baldwin /* 1674dc964385SJohn Baldwin * Invalidate this buffer. It will be 1675dc964385SJohn Baldwin * cancelled or partially completed once the 1676dc964385SJohn Baldwin * card ACKs the invalidate. 1677dc964385SJohn Baldwin */ 1678dc964385SJohn Baldwin valid_flag = i == 0 ? V_TF_DDP_BUF0_VALID(1) : 1679dc964385SJohn Baldwin V_TF_DDP_BUF1_VALID(1); 1680dc964385SJohn Baldwin t4_set_tcb_field_rpl(sc, toep, 1, W_TCB_RX_DDP_FLAGS, 1681dc964385SJohn Baldwin valid_flag, 0, i + DDP_BUF0_INVALIDATED); 1682dc964385SJohn Baldwin toep->db[i].cancel_pending = 1; 1683dc964385SJohn Baldwin CTR2(KTR_CXGBE, "%s: request %p marked pending", 1684dc964385SJohn Baldwin __func__, job); 1685dc964385SJohn Baldwin break; 1686dc964385SJohn Baldwin } 1687dc964385SJohn Baldwin } 1688dc964385SJohn Baldwin DDP_UNLOCK(toep); 1689dc964385SJohn Baldwin } 1690dc964385SJohn Baldwin 1691dc964385SJohn Baldwin static void 1692dc964385SJohn Baldwin t4_aio_cancel_queued(struct kaiocb *job) 1693dc964385SJohn Baldwin { 1694dc964385SJohn Baldwin struct socket *so = job->fd_file->f_data; 1695dc964385SJohn Baldwin struct tcpcb *tp = so_sototcpcb(so); 1696dc964385SJohn Baldwin struct toepcb *toep = tp->t_toe; 1697dc964385SJohn Baldwin 1698dc964385SJohn Baldwin DDP_LOCK(toep); 1699dc964385SJohn Baldwin if (!aio_cancel_cleared(job)) { 1700dc964385SJohn Baldwin TAILQ_REMOVE(&toep->ddp_aiojobq, job, list); 1701dc964385SJohn Baldwin toep->ddp_waiting_count--; 1702dc964385SJohn Baldwin if (toep->ddp_waiting_count == 0) 1703dc964385SJohn Baldwin ddp_queue_toep(toep); 1704dc964385SJohn Baldwin } 1705dc964385SJohn Baldwin CTR2(KTR_CXGBE, "%s: request %p cancelled", __func__, job); 1706dc964385SJohn Baldwin DDP_UNLOCK(toep); 1707dc964385SJohn Baldwin 1708dc964385SJohn Baldwin aio_ddp_cancel_one(job); 1709dc964385SJohn Baldwin } 1710dc964385SJohn Baldwin 1711dc964385SJohn Baldwin int 1712dc964385SJohn Baldwin t4_aio_queue_ddp(struct socket *so, struct kaiocb *job) 1713dc964385SJohn Baldwin { 1714dc964385SJohn Baldwin struct tcpcb *tp = so_sototcpcb(so); 1715dc964385SJohn Baldwin struct toepcb *toep = tp->t_toe; 1716dc964385SJohn Baldwin 1717dc964385SJohn Baldwin 1718dc964385SJohn Baldwin /* Ignore writes. */ 1719dc964385SJohn Baldwin if (job->uaiocb.aio_lio_opcode != LIO_READ) 1720dc964385SJohn Baldwin return (EOPNOTSUPP); 1721dc964385SJohn Baldwin 1722dc964385SJohn Baldwin DDP_LOCK(toep); 1723dc964385SJohn Baldwin 1724dc964385SJohn Baldwin /* 1725dc964385SJohn Baldwin * XXX: Think about possibly returning errors for ENOTCONN, 1726dc964385SJohn Baldwin * etc. Perhaps the caller would only queue the request 1727dc964385SJohn Baldwin * if it failed with EOPNOTSUPP? 1728dc964385SJohn Baldwin */ 1729dc964385SJohn Baldwin 1730dc964385SJohn Baldwin #ifdef VERBOSE_TRACES 1731dc964385SJohn Baldwin CTR2(KTR_CXGBE, "%s: queueing %p", __func__, job); 1732dc964385SJohn Baldwin #endif 1733dc964385SJohn Baldwin if (!aio_set_cancel_function(job, t4_aio_cancel_queued)) 1734dc964385SJohn Baldwin panic("new job was cancelled"); 1735dc964385SJohn Baldwin TAILQ_INSERT_TAIL(&toep->ddp_aiojobq, job, list); 1736dc964385SJohn Baldwin toep->ddp_waiting_count++; 1737dc964385SJohn Baldwin toep->ddp_flags |= DDP_OK; 1738dc964385SJohn Baldwin 1739dc964385SJohn Baldwin /* 1740dc964385SJohn Baldwin * Try to handle this request synchronously. If this has 1741dc964385SJohn Baldwin * to block because the task is running, it will just bail 1742dc964385SJohn Baldwin * and let the task handle it instead. 1743dc964385SJohn Baldwin */ 1744dc964385SJohn Baldwin aio_ddp_requeue(toep); 1745dc964385SJohn Baldwin DDP_UNLOCK(toep); 1746dc964385SJohn Baldwin return (0); 1747dc964385SJohn Baldwin } 1748dc964385SJohn Baldwin 1749dc964385SJohn Baldwin int 1750dc964385SJohn Baldwin t4_ddp_mod_load(void) 1751dc964385SJohn Baldwin { 1752dc964385SJohn Baldwin 1753dc964385SJohn Baldwin TAILQ_INIT(&ddp_orphan_pagesets); 1754dc964385SJohn Baldwin mtx_init(&ddp_orphan_pagesets_lock, "ddp orphans", NULL, MTX_DEF); 1755dc964385SJohn Baldwin TASK_INIT(&ddp_orphan_task, 0, ddp_free_orphan_pagesets, NULL); 1756dc964385SJohn Baldwin return (0); 1757dc964385SJohn Baldwin } 1758dc964385SJohn Baldwin 1759dc964385SJohn Baldwin void 1760dc964385SJohn Baldwin t4_ddp_mod_unload(void) 1761dc964385SJohn Baldwin { 1762dc964385SJohn Baldwin 1763dc964385SJohn Baldwin taskqueue_drain(taskqueue_thread, &ddp_orphan_task); 1764dc964385SJohn Baldwin MPASS(TAILQ_EMPTY(&ddp_orphan_pagesets)); 1765dc964385SJohn Baldwin mtx_destroy(&ddp_orphan_pagesets_lock); 1766dc964385SJohn Baldwin } 1767e682d02eSNavdeep Parhar #endif 1768