1 /*- 2 * Copyright (c) 2012 The FreeBSD Foundation 3 * Copyright (c) 2015 Chelsio Communications, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * cxgbei implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 42 #ifdef TCP_OFFLOAD 43 #include <sys/param.h> 44 #include <sys/capsicum.h> 45 #include <sys/condvar.h> 46 #include <sys/conf.h> 47 #include <sys/file.h> 48 #include <sys/kernel.h> 49 #include <sys/kthread.h> 50 #include <sys/ktr.h> 51 #include <sys/lock.h> 52 #include <sys/mbuf.h> 53 #include <sys/mutex.h> 54 #include <sys/module.h> 55 #include <sys/protosw.h> 56 #include <sys/socket.h> 57 #include <sys/socketvar.h> 58 #include <sys/sysctl.h> 59 #include <sys/systm.h> 60 #include <sys/sx.h> 61 #include <sys/uio.h> 62 #include <machine/bus.h> 63 #include <vm/vm.h> 64 #include <vm/pmap.h> 65 #include <netinet/in.h> 66 #include <netinet/in_pcb.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_var.h> 69 #include <netinet/toecore.h> 70 71 #include <dev/iscsi/icl.h> 72 #include <dev/iscsi/iscsi_proto.h> 73 #include <icl_conn_if.h> 74 75 #include <cam/scsi/scsi_all.h> 76 #include <cam/scsi/scsi_da.h> 77 #include <cam/ctl/ctl_io.h> 78 #include <cam/ctl/ctl.h> 79 #include <cam/ctl/ctl_backend.h> 80 #include <cam/ctl/ctl_error.h> 81 #include <cam/ctl/ctl_frontend.h> 82 #include <cam/ctl/ctl_debug.h> 83 #include <cam/ctl/ctl_ha.h> 84 #include <cam/ctl/ctl_ioctl.h> 85 86 #include <cam/cam.h> 87 #include <cam/cam_ccb.h> 88 #include <cam/cam_xpt.h> 89 #include <cam/cam_debug.h> 90 #include <cam/cam_sim.h> 91 #include <cam/cam_xpt_sim.h> 92 #include <cam/cam_xpt_periph.h> 93 #include <cam/cam_periph.h> 94 #include <cam/cam_compat.h> 95 #include <cam/scsi/scsi_message.h> 96 97 #include "common/common.h" 98 #include "common/t4_regs.h" 99 #include "common/t4_tcb.h" 100 #include "tom/t4_tom.h" 101 #include "cxgbei.h" 102 103 /* 104 * Use the page pod tag for the TT hash. 105 */ 106 #define TT_HASH(icc, tt) (G_PPOD_TAG(tt) & (icc)->cmp_hash_mask) 107 108 struct cxgbei_ddp_state { 109 struct ppod_reservation prsv; 110 struct cxgbei_cmp cmp; 111 }; 112 113 static MALLOC_DEFINE(M_CXGBEI, "cxgbei", "cxgbei(4)"); 114 115 SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 116 "Chelsio iSCSI offload"); 117 static int first_burst_length = 8192; 118 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 119 &first_burst_length, 0, "First burst length"); 120 static int max_burst_length = 2 * 1024 * 1024; 121 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 122 &max_burst_length, 0, "Maximum burst length"); 123 static int sendspace = 1048576; 124 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, 125 &sendspace, 0, "Default send socket buffer size"); 126 static int recvspace = 1048576; 127 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, 128 &recvspace, 0, "Default receive socket buffer size"); 129 130 static volatile u_int icl_cxgbei_ncons; 131 132 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 133 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 134 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 135 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 136 137 static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; 138 static icl_conn_pdu_data_segment_length_t 139 icl_cxgbei_conn_pdu_data_segment_length; 140 static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; 141 static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; 142 static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; 143 static icl_conn_pdu_queue_cb_t icl_cxgbei_conn_pdu_queue_cb; 144 static icl_conn_handoff_t icl_cxgbei_conn_handoff; 145 static icl_conn_free_t icl_cxgbei_conn_free; 146 static icl_conn_close_t icl_cxgbei_conn_close; 147 static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; 148 static icl_conn_task_done_t icl_cxgbei_conn_task_done; 149 static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; 150 static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; 151 152 static kobj_method_t icl_cxgbei_methods[] = { 153 KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), 154 KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), 155 KOBJMETHOD(icl_conn_pdu_data_segment_length, 156 icl_cxgbei_conn_pdu_data_segment_length), 157 KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), 158 KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), 159 KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), 160 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_cxgbei_conn_pdu_queue_cb), 161 KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), 162 KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), 163 KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), 164 KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), 165 KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), 166 KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), 167 KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), 168 { 0, 0 } 169 }; 170 171 DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); 172 173 void 174 icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 175 { 176 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 177 178 KASSERT(icp->ref_cnt != 0, ("freeing deleted PDU")); 179 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 180 MPASS(ic == ip->ip_conn); 181 182 m_freem(ip->ip_ahs_mbuf); 183 m_freem(ip->ip_data_mbuf); 184 m_freem(ip->ip_bhs_mbuf); 185 186 KASSERT(ic != NULL || icp->ref_cnt == 1, 187 ("orphaned PDU has oustanding references")); 188 189 if (atomic_fetchadd_int(&icp->ref_cnt, -1) != 1) 190 return; 191 192 free(icp, M_CXGBEI); 193 #ifdef DIAGNOSTIC 194 if (__predict_true(ic != NULL)) 195 refcount_release(&ic->ic_outstanding_pdus); 196 #endif 197 } 198 199 static void 200 icl_cxgbei_pdu_call_cb(struct icl_pdu *ip) 201 { 202 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 203 204 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 205 206 if (icp->cb != NULL) 207 icp->cb(ip, icp->error); 208 #ifdef DIAGNOSTIC 209 if (__predict_true(ip->ip_conn != NULL)) 210 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 211 #endif 212 free(icp, M_CXGBEI); 213 } 214 215 static void 216 icl_cxgbei_pdu_done(struct icl_pdu *ip, int error) 217 { 218 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 219 220 if (error != 0) 221 icp->error = error; 222 223 m_freem(ip->ip_ahs_mbuf); 224 ip->ip_ahs_mbuf = NULL; 225 m_freem(ip->ip_data_mbuf); 226 ip->ip_data_mbuf = NULL; 227 m_freem(ip->ip_bhs_mbuf); 228 ip->ip_bhs_mbuf = NULL; 229 230 /* 231 * All other references to this PDU should have been dropped 232 * by the m_freem() of ip_data_mbuf. 233 */ 234 if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) 235 icl_cxgbei_pdu_call_cb(ip); 236 else 237 __assert_unreachable(); 238 } 239 240 static void 241 icl_cxgbei_mbuf_done(struct mbuf *mb) 242 { 243 244 struct icl_cxgbei_pdu *icp = (struct icl_cxgbei_pdu *)mb->m_ext.ext_arg1; 245 246 /* 247 * NB: mb_free_mext() might leave ref_cnt as 1 without 248 * decrementing it if it hits the fast path in the ref_cnt 249 * check. 250 */ 251 icl_cxgbei_pdu_call_cb(&icp->ip); 252 } 253 254 struct icl_pdu * 255 icl_cxgbei_new_pdu(int flags) 256 { 257 struct icl_cxgbei_pdu *icp; 258 struct icl_pdu *ip; 259 struct mbuf *m; 260 261 icp = malloc(sizeof(*icp), M_CXGBEI, flags | M_ZERO); 262 if (__predict_false(icp == NULL)) 263 return (NULL); 264 265 icp->icp_signature = CXGBEI_PDU_SIGNATURE; 266 icp->ref_cnt = 1; 267 ip = &icp->ip; 268 269 m = m_gethdr(flags, MT_DATA); 270 if (__predict_false(m == NULL)) { 271 free(icp, M_CXGBEI); 272 return (NULL); 273 } 274 275 ip->ip_bhs_mbuf = m; 276 ip->ip_bhs = mtod(m, struct iscsi_bhs *); 277 memset(ip->ip_bhs, 0, sizeof(*ip->ip_bhs)); 278 m->m_len = sizeof(struct iscsi_bhs); 279 m->m_pkthdr.len = m->m_len; 280 281 return (ip); 282 } 283 284 void 285 icl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic) 286 { 287 288 ip->ip_conn = ic; 289 #ifdef DIAGNOSTIC 290 refcount_acquire(&ic->ic_outstanding_pdus); 291 #endif 292 } 293 294 /* 295 * Allocate icl_pdu with empty BHS to fill up by the caller. 296 */ 297 static struct icl_pdu * 298 icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) 299 { 300 struct icl_pdu *ip; 301 302 ip = icl_cxgbei_new_pdu(flags); 303 if (__predict_false(ip == NULL)) 304 return (NULL); 305 icl_cxgbei_new_pdu_set_conn(ip, ic); 306 307 return (ip); 308 } 309 310 static size_t 311 icl_pdu_data_segment_length(const struct icl_pdu *request) 312 { 313 uint32_t len = 0; 314 315 len += request->ip_bhs->bhs_data_segment_len[0]; 316 len <<= 8; 317 len += request->ip_bhs->bhs_data_segment_len[1]; 318 len <<= 8; 319 len += request->ip_bhs->bhs_data_segment_len[2]; 320 321 return (len); 322 } 323 324 size_t 325 icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, 326 const struct icl_pdu *request) 327 { 328 329 return (icl_pdu_data_segment_length(request)); 330 } 331 332 static struct mbuf * 333 finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) 334 { 335 struct icl_pdu *ip = &icp->ip; 336 uint8_t ulp_submode, padding; 337 struct mbuf *m, *last; 338 struct iscsi_bhs *bhs; 339 int data_len; 340 341 /* 342 * Fix up the data segment mbuf first. 343 */ 344 m = ip->ip_data_mbuf; 345 ulp_submode = icc->ulp_submode; 346 if (m != NULL) { 347 last = m_last(m); 348 349 /* 350 * Round up the data segment to a 4B boundary. Pad with 0 if 351 * necessary. There will definitely be room in the mbuf. 352 */ 353 padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; 354 if (padding != 0) { 355 MPASS(padding <= M_TRAILINGSPACE(last)); 356 bzero(mtod(last, uint8_t *) + last->m_len, padding); 357 last->m_len += padding; 358 } 359 } else { 360 MPASS(ip->ip_data_len == 0); 361 ulp_submode &= ~ULP_CRC_DATA; 362 padding = 0; 363 } 364 365 /* 366 * Now the header mbuf that has the BHS. 367 */ 368 m = ip->ip_bhs_mbuf; 369 MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); 370 MPASS(m->m_len == sizeof(struct iscsi_bhs)); 371 372 bhs = ip->ip_bhs; 373 data_len = ip->ip_data_len; 374 if (data_len > icc->ic.ic_max_send_data_segment_length) { 375 struct iscsi_bhs_data_in *bhsdi; 376 int flags; 377 378 KASSERT(padding == 0, ("%s: ISO with padding %d for icp %p", 379 __func__, padding, icp)); 380 switch (bhs->bhs_opcode) { 381 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: 382 flags = 1; 383 break; 384 case ISCSI_BHS_OPCODE_SCSI_DATA_IN: 385 flags = 2; 386 break; 387 default: 388 panic("invalid opcode %#x for ISO", bhs->bhs_opcode); 389 } 390 data_len = icc->ic.ic_max_send_data_segment_length; 391 bhsdi = (struct iscsi_bhs_data_in *)bhs; 392 if (bhsdi->bhsdi_flags & BHSDI_FLAGS_F) { 393 /* 394 * Firmware will set F on the final PDU in the 395 * burst. 396 */ 397 flags |= CXGBE_ISO_F; 398 bhsdi->bhsdi_flags &= ~BHSDI_FLAGS_F; 399 } 400 set_mbuf_iscsi_iso(m, true); 401 set_mbuf_iscsi_iso_flags(m, flags); 402 set_mbuf_iscsi_iso_mss(m, data_len); 403 } 404 405 bhs->bhs_data_segment_len[2] = data_len; 406 bhs->bhs_data_segment_len[1] = data_len >> 8; 407 bhs->bhs_data_segment_len[0] = data_len >> 16; 408 409 /* 410 * Extract mbuf chain from PDU. 411 */ 412 m->m_pkthdr.len += ip->ip_data_len + padding; 413 m->m_next = ip->ip_data_mbuf; 414 set_mbuf_ulp_submode(m, ulp_submode); 415 ip->ip_bhs_mbuf = NULL; 416 ip->ip_data_mbuf = NULL; 417 ip->ip_bhs = NULL; 418 419 /* 420 * Drop PDU reference on icp. Additional references might 421 * still be held by zero-copy PDU buffers (ICL_NOCOPY). 422 */ 423 if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) 424 icl_cxgbei_pdu_call_cb(ip); 425 426 return (m); 427 } 428 429 int 430 icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, 431 const void *addr, size_t len, int flags) 432 { 433 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 434 struct mbuf *m, *m_tail; 435 const char *src; 436 437 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 438 MPASS(ic == ip->ip_conn); 439 KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); 440 441 m_tail = ip->ip_data_mbuf; 442 if (m_tail != NULL) 443 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) 444 ; 445 446 if (flags & ICL_NOCOPY) { 447 m = m_get(flags & ~ICL_NOCOPY, MT_DATA); 448 if (m == NULL) { 449 ICL_WARN("failed to allocate mbuf"); 450 return (ENOMEM); 451 } 452 453 m->m_flags |= M_RDONLY; 454 m_extaddref(m, __DECONST(char *, addr), len, &icp->ref_cnt, 455 icl_cxgbei_mbuf_done, icp, NULL); 456 m->m_len = len; 457 if (ip->ip_data_mbuf == NULL) { 458 ip->ip_data_mbuf = m; 459 ip->ip_data_len = len; 460 } else { 461 m_tail->m_next = m; 462 m_tail = m_tail->m_next; 463 ip->ip_data_len += len; 464 } 465 466 return (0); 467 } 468 469 src = (const char *)addr; 470 471 /* Allocate as jumbo mbufs of size MJUM16BYTES. */ 472 while (len >= MJUM16BYTES) { 473 m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); 474 if (__predict_false(m == NULL)) { 475 if ((flags & M_WAITOK) != 0) { 476 /* Fall back to non-jumbo mbufs. */ 477 break; 478 } 479 return (ENOMEM); 480 } 481 memcpy(mtod(m, void *), src, MJUM16BYTES); 482 m->m_len = MJUM16BYTES; 483 if (ip->ip_data_mbuf == NULL) { 484 ip->ip_data_mbuf = m_tail = m; 485 ip->ip_data_len = MJUM16BYTES; 486 } else { 487 m_tail->m_next = m; 488 m_tail = m_tail->m_next; 489 ip->ip_data_len += MJUM16BYTES; 490 } 491 src += MJUM16BYTES; 492 len -= MJUM16BYTES; 493 } 494 495 /* Allocate mbuf chain for the remaining data. */ 496 if (len != 0) { 497 m = m_getm2(NULL, len, flags, MT_DATA, 0); 498 if (__predict_false(m == NULL)) 499 return (ENOMEM); 500 if (ip->ip_data_mbuf == NULL) { 501 ip->ip_data_mbuf = m; 502 ip->ip_data_len = len; 503 } else { 504 m_tail->m_next = m; 505 ip->ip_data_len += len; 506 } 507 for (; m != NULL; m = m->m_next) { 508 m->m_len = min(len, M_SIZE(m)); 509 memcpy(mtod(m, void *), src, m->m_len); 510 src += m->m_len; 511 len -= m->m_len; 512 } 513 MPASS(len == 0); 514 } 515 MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length, 516 ic->ic_hw_isomax)); 517 518 return (0); 519 } 520 521 void 522 icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 523 size_t off, void *addr, size_t len) 524 { 525 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 526 527 if (icp->icp_flags & ICPF_RX_DDP) 528 return; /* data is DDP'ed, no need to copy */ 529 m_copydata(ip->ip_data_mbuf, off, len, addr); 530 } 531 532 void 533 icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 534 { 535 icl_cxgbei_conn_pdu_queue_cb(ic, ip, NULL); 536 } 537 538 void 539 icl_cxgbei_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 540 icl_pdu_cb cb) 541 { 542 struct epoch_tracker et; 543 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 544 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 545 struct socket *so = ic->ic_socket; 546 struct toepcb *toep = icc->toep; 547 struct inpcb *inp; 548 struct mbuf *m; 549 550 MPASS(ic == ip->ip_conn); 551 MPASS(ip->ip_bhs_mbuf != NULL); 552 /* The kernel doesn't generate PDUs with AHS. */ 553 MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); 554 555 ICL_CONN_LOCK_ASSERT(ic); 556 557 icp->cb = cb; 558 559 /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ 560 if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { 561 icl_cxgbei_pdu_done(ip, ENOTCONN); 562 return; 563 } 564 565 m = finalize_pdu(icc, icp); 566 M_ASSERTPKTHDR(m); 567 MPASS((m->m_pkthdr.len & 3) == 0); 568 569 /* 570 * Do not get inp from toep->inp as the toepcb might have detached 571 * already. 572 */ 573 inp = sotoinpcb(so); 574 CURVNET_SET(toep->vnet); 575 NET_EPOCH_ENTER(et); 576 INP_WLOCK(inp); 577 if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || 578 __predict_false((toep->flags & TPF_ATTACHED) == 0)) 579 m_freem(m); 580 else { 581 mbufq_enqueue(&toep->ulp_pduq, m); 582 t4_push_pdus(icc->sc, toep, 0); 583 } 584 INP_WUNLOCK(inp); 585 NET_EPOCH_EXIT(et); 586 CURVNET_RESTORE(); 587 } 588 589 static struct icl_conn * 590 icl_cxgbei_new_conn(const char *name, struct mtx *lock) 591 { 592 struct icl_cxgbei_conn *icc; 593 struct icl_conn *ic; 594 595 refcount_acquire(&icl_cxgbei_ncons); 596 597 icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, 598 M_WAITOK | M_ZERO); 599 icc->icc_signature = CXGBEI_CONN_SIGNATURE; 600 STAILQ_INIT(&icc->rcvd_pdus); 601 602 icc->cmp_table = hashinit(64, M_CXGBEI, &icc->cmp_hash_mask); 603 mtx_init(&icc->cmp_lock, "cxgbei_cmp", NULL, MTX_DEF); 604 605 ic = &icc->ic; 606 ic->ic_lock = lock; 607 608 #ifdef DIAGNOSTIC 609 refcount_init(&ic->ic_outstanding_pdus, 0); 610 #endif 611 ic->ic_name = name; 612 ic->ic_offload = "cxgbei"; 613 ic->ic_unmapped = false; 614 615 CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 616 617 return (ic); 618 } 619 620 void 621 icl_cxgbei_conn_free(struct icl_conn *ic) 622 { 623 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 624 625 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 626 627 CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 628 629 mtx_destroy(&icc->cmp_lock); 630 hashdestroy(icc->cmp_table, M_CXGBEI, icc->cmp_hash_mask); 631 kobj_delete((struct kobj *)icc, M_CXGBE); 632 refcount_release(&icl_cxgbei_ncons); 633 } 634 635 static int 636 icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so, int sspace, 637 int rspace) 638 { 639 struct sockopt opt; 640 int error, one = 1, ss, rs; 641 642 ss = max(sendspace, sspace); 643 rs = max(recvspace, rspace); 644 645 error = soreserve(so, ss, rs); 646 if (error != 0) { 647 icl_cxgbei_conn_close(ic); 648 return (error); 649 } 650 SOCKBUF_LOCK(&so->so_snd); 651 so->so_snd.sb_flags |= SB_AUTOSIZE; 652 SOCKBUF_UNLOCK(&so->so_snd); 653 SOCKBUF_LOCK(&so->so_rcv); 654 so->so_rcv.sb_flags |= SB_AUTOSIZE; 655 SOCKBUF_UNLOCK(&so->so_rcv); 656 657 /* 658 * Disable Nagle. 659 */ 660 bzero(&opt, sizeof(opt)); 661 opt.sopt_dir = SOPT_SET; 662 opt.sopt_level = IPPROTO_TCP; 663 opt.sopt_name = TCP_NODELAY; 664 opt.sopt_val = &one; 665 opt.sopt_valsize = sizeof(one); 666 error = sosetopt(so, &opt); 667 if (error != 0) { 668 icl_cxgbei_conn_close(ic); 669 return (error); 670 } 671 672 return (0); 673 } 674 675 /* 676 * Request/response structure used to find out the adapter offloading a socket. 677 */ 678 struct find_ofld_adapter_rr { 679 struct socket *so; 680 struct adapter *sc; /* result */ 681 }; 682 683 static void 684 find_offload_adapter(struct adapter *sc, void *arg) 685 { 686 struct find_ofld_adapter_rr *fa = arg; 687 struct socket *so = fa->so; 688 struct tom_data *td = sc->tom_softc; 689 struct tcpcb *tp; 690 struct inpcb *inp; 691 692 /* Non-TCP were filtered out earlier. */ 693 MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); 694 695 if (fa->sc != NULL) 696 return; /* Found already. */ 697 698 if (td == NULL) 699 return; /* TOE not enabled on this adapter. */ 700 701 inp = sotoinpcb(so); 702 INP_WLOCK(inp); 703 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 704 tp = intotcpcb(inp); 705 if (tp->t_flags & TF_TOE && tp->tod == &td->tod) 706 fa->sc = sc; /* Found. */ 707 } 708 INP_WUNLOCK(inp); 709 } 710 711 static bool 712 is_memfree(struct adapter *sc) 713 { 714 uint32_t em; 715 716 em = t4_read_reg(sc, A_MA_TARGET_MEM_ENABLE); 717 if ((em & F_EXT_MEM_ENABLE) != 0) 718 return (false); 719 if (is_t5(sc) && (em & F_EXT_MEM1_ENABLE) != 0) 720 return (false); 721 return (true); 722 } 723 724 /* XXXNP: move this to t4_tom. */ 725 static void 726 send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen) 727 { 728 struct wrqe *wr; 729 struct fw_flowc_wr *flowc; 730 const u_int nparams = 1; 731 u_int flowclen; 732 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 733 734 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 735 736 wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq); 737 if (wr == NULL) { 738 /* XXX */ 739 panic("%s: allocation failure.", __func__); 740 } 741 flowc = wrtod(wr); 742 memset(flowc, 0, wr->wr_len); 743 744 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 745 V_FW_FLOWC_WR_NPARAMS(nparams)); 746 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 747 V_FW_WR_FLOWID(toep->tid)); 748 749 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; 750 flowc->mnemval[0].val = htobe32(maxlen); 751 752 txsd->tx_credits = howmany(flowclen, 16); 753 txsd->plen = 0; 754 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 755 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 756 toep->tx_credits -= txsd->tx_credits; 757 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 758 toep->txsd_pidx = 0; 759 toep->txsd_avail--; 760 761 t4_wrq_tx(sc, wr); 762 } 763 764 static void 765 set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, u_int ulp_submode) 766 { 767 uint64_t val; 768 769 CTR3(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, submode=%#x", 770 __func__, toep->tid, ulp_submode); 771 772 val = V_TCB_ULP_TYPE(ULP_MODE_ISCSI) | V_TCB_ULP_RAW(ulp_submode); 773 t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_ULP_TYPE, 774 V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val, 775 0, 0); 776 777 val = V_TF_RX_FLOW_CONTROL_DISABLE(1ULL); 778 t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, val, val, 0, 0); 779 } 780 781 /* 782 * XXXNP: Who is responsible for cleaning up the socket if this returns with an 783 * error? Review all error paths. 784 * 785 * XXXNP: What happens to the socket's fd reference if the operation is 786 * successful, and how does that affect the socket's life cycle? 787 */ 788 int 789 icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) 790 { 791 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 792 struct cxgbei_data *ci; 793 struct find_ofld_adapter_rr fa; 794 struct file *fp; 795 struct socket *so; 796 struct inpcb *inp; 797 struct tcpcb *tp; 798 struct toepcb *toep; 799 cap_rights_t rights; 800 u_int max_rx_pdu_len, max_tx_pdu_len; 801 int error, max_iso_pdus; 802 803 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 804 ICL_CONN_LOCK_ASSERT_NOT(ic); 805 806 /* 807 * Steal the socket from userland. 808 */ 809 error = fget(curthread, fd, 810 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 811 if (error != 0) 812 return (error); 813 if (fp->f_type != DTYPE_SOCKET) { 814 fdrop(fp, curthread); 815 return (EINVAL); 816 } 817 so = fp->f_data; 818 if (so->so_type != SOCK_STREAM || 819 so->so_proto->pr_protocol != IPPROTO_TCP) { 820 fdrop(fp, curthread); 821 return (EINVAL); 822 } 823 824 ICL_CONN_LOCK(ic); 825 if (ic->ic_socket != NULL) { 826 ICL_CONN_UNLOCK(ic); 827 fdrop(fp, curthread); 828 return (EBUSY); 829 } 830 ic->ic_disconnecting = false; 831 ic->ic_socket = so; 832 fp->f_ops = &badfileops; 833 fp->f_data = NULL; 834 fdrop(fp, curthread); 835 ICL_CONN_UNLOCK(ic); 836 837 /* Find the adapter offloading this socket. */ 838 fa.sc = NULL; 839 fa.so = so; 840 t4_iterate(find_offload_adapter, &fa); 841 if (fa.sc == NULL) 842 return (EINVAL); 843 icc->sc = fa.sc; 844 ci = icc->sc->iscsi_ulp_softc; 845 846 max_rx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_recv_data_segment_length; 847 max_tx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_send_data_segment_length; 848 if (ic->ic_header_crc32c) { 849 max_rx_pdu_len += ISCSI_HEADER_DIGEST_SIZE; 850 max_tx_pdu_len += ISCSI_HEADER_DIGEST_SIZE; 851 } 852 if (ic->ic_data_crc32c) { 853 max_rx_pdu_len += ISCSI_DATA_DIGEST_SIZE; 854 max_tx_pdu_len += ISCSI_DATA_DIGEST_SIZE; 855 } 856 857 inp = sotoinpcb(so); 858 INP_WLOCK(inp); 859 tp = intotcpcb(inp); 860 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) { 861 INP_WUNLOCK(inp); 862 return (EBUSY); 863 } 864 865 /* 866 * socket could not have been "unoffloaded" if here. 867 */ 868 MPASS(tp->t_flags & TF_TOE); 869 MPASS(tp->tod != NULL); 870 MPASS(tp->t_toe != NULL); 871 toep = tp->t_toe; 872 MPASS(toep->vi->adapter == icc->sc); 873 874 if (ulp_mode(toep) != ULP_MODE_NONE) { 875 INP_WUNLOCK(inp); 876 return (EINVAL); 877 } 878 879 icc->toep = toep; 880 icc->cwt = cxgbei_select_worker_thread(icc); 881 882 icc->ulp_submode = 0; 883 if (ic->ic_header_crc32c) 884 icc->ulp_submode |= ULP_CRC_HEADER; 885 if (ic->ic_data_crc32c) 886 icc->ulp_submode |= ULP_CRC_DATA; 887 888 if (icc->sc->tt.iso && chip_id(icc->sc) >= CHELSIO_T5 && 889 !is_memfree(icc->sc)) { 890 max_iso_pdus = CXGBEI_MAX_ISO_PAYLOAD / max_tx_pdu_len; 891 ic->ic_hw_isomax = max_iso_pdus * 892 ic->ic_max_send_data_segment_length; 893 } else 894 max_iso_pdus = 1; 895 896 toep->params.ulp_mode = ULP_MODE_ISCSI; 897 toep->ulpcb = icc; 898 899 send_iscsi_flowc_wr(icc->sc, toep, 900 roundup(max_iso_pdus * max_tx_pdu_len, tp->t_maxseg)); 901 set_ulp_mode_iscsi(icc->sc, toep, icc->ulp_submode); 902 INP_WUNLOCK(inp); 903 904 return (icl_cxgbei_setsockopt(ic, so, max_tx_pdu_len, max_rx_pdu_len)); 905 } 906 907 void 908 icl_cxgbei_conn_close(struct icl_conn *ic) 909 { 910 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 911 struct icl_pdu *ip; 912 struct socket *so; 913 struct sockbuf *sb; 914 struct inpcb *inp; 915 struct toepcb *toep = icc->toep; 916 917 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 918 ICL_CONN_LOCK_ASSERT_NOT(ic); 919 920 ICL_CONN_LOCK(ic); 921 so = ic->ic_socket; 922 if (ic->ic_disconnecting || so == NULL) { 923 CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p", 924 __func__, icc, ic->ic_disconnecting, so); 925 ICL_CONN_UNLOCK(ic); 926 return; 927 } 928 ic->ic_disconnecting = true; 929 930 #ifdef DIAGNOSTIC 931 KASSERT(ic->ic_outstanding_pdus == 0, 932 ("destroying session with %d outstanding PDUs", 933 ic->ic_outstanding_pdus)); 934 #endif 935 ICL_CONN_UNLOCK(ic); 936 937 CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1, 938 icc); 939 inp = sotoinpcb(so); 940 sb = &so->so_rcv; 941 INP_WLOCK(inp); 942 if (toep != NULL) { /* NULL if connection was never offloaded. */ 943 toep->ulpcb = NULL; 944 945 /* Discard PDUs queued for TX. */ 946 mbufq_drain(&toep->ulp_pduq); 947 948 /* 949 * Wait for the cwt threads to stop processing this 950 * connection. 951 */ 952 SOCKBUF_LOCK(sb); 953 if (icc->rx_flags & RXF_ACTIVE) { 954 volatile u_int *p = &icc->rx_flags; 955 956 SOCKBUF_UNLOCK(sb); 957 INP_WUNLOCK(inp); 958 959 while (*p & RXF_ACTIVE) 960 pause("conclo", 1); 961 962 INP_WLOCK(inp); 963 SOCKBUF_LOCK(sb); 964 } 965 966 /* 967 * Discard received PDUs not passed to the iSCSI 968 * layer. 969 */ 970 while (!STAILQ_EMPTY(&icc->rcvd_pdus)) { 971 ip = STAILQ_FIRST(&icc->rcvd_pdus); 972 STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next); 973 icl_cxgbei_pdu_done(ip, ENOTCONN); 974 } 975 SOCKBUF_UNLOCK(sb); 976 977 /* 978 * Grab a reference to use when waiting for the final 979 * CPL to be received. If toep->inp is NULL, then 980 * final_cpl_received() has already been called (e.g. 981 * due to the peer sending a RST). 982 */ 983 if (toep->inp != NULL) { 984 toep = hold_toepcb(toep); 985 toep->flags |= TPF_WAITING_FOR_FINAL; 986 } else 987 toep = NULL; 988 } 989 INP_WUNLOCK(inp); 990 991 ICL_CONN_LOCK(ic); 992 ic->ic_socket = NULL; 993 ICL_CONN_UNLOCK(ic); 994 995 /* 996 * XXXNP: we should send RST instead of FIN when PDUs held in various 997 * queues were purged instead of delivered reliably but soabort isn't 998 * really general purpose and wouldn't do the right thing here. 999 */ 1000 soclose(so); 1001 1002 /* 1003 * Wait for the socket to fully close. This ensures any 1004 * pending received data has been received (and in particular, 1005 * any data that would be received by DDP has been handled). 1006 * Callers assume that it is safe to free buffers for tasks 1007 * and transfers after this function returns. 1008 */ 1009 if (toep != NULL) { 1010 struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep); 1011 1012 mtx_lock(lock); 1013 while ((toep->flags & TPF_WAITING_FOR_FINAL) != 0) 1014 mtx_sleep(toep, lock, PSOCK, "conclo2", 0); 1015 mtx_unlock(lock); 1016 free_toepcb(toep); 1017 } 1018 } 1019 1020 static void 1021 cxgbei_insert_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp, 1022 uint32_t tt) 1023 { 1024 #ifdef INVARIANTS 1025 struct cxgbei_cmp *cmp2; 1026 #endif 1027 1028 cmp->tt = tt; 1029 1030 mtx_lock(&icc->cmp_lock); 1031 #ifdef INVARIANTS 1032 LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, tt)], link) { 1033 KASSERT(cmp2->tt != tt, ("%s: duplicate cmp", __func__)); 1034 } 1035 #endif 1036 LIST_INSERT_HEAD(&icc->cmp_table[TT_HASH(icc, tt)], cmp, link); 1037 mtx_unlock(&icc->cmp_lock); 1038 } 1039 1040 struct cxgbei_cmp * 1041 cxgbei_find_cmp(struct icl_cxgbei_conn *icc, uint32_t tt) 1042 { 1043 struct cxgbei_cmp *cmp; 1044 1045 mtx_lock(&icc->cmp_lock); 1046 LIST_FOREACH(cmp, &icc->cmp_table[TT_HASH(icc, tt)], link) { 1047 if (cmp->tt == tt) 1048 break; 1049 } 1050 mtx_unlock(&icc->cmp_lock); 1051 return (cmp); 1052 } 1053 1054 static void 1055 cxgbei_rm_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp) 1056 { 1057 #ifdef INVARIANTS 1058 struct cxgbei_cmp *cmp2; 1059 #endif 1060 1061 mtx_lock(&icc->cmp_lock); 1062 1063 #ifdef INVARIANTS 1064 LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, cmp->tt)], link) { 1065 if (cmp2 == cmp) 1066 goto found; 1067 } 1068 panic("%s: could not find cmp", __func__); 1069 found: 1070 #endif 1071 LIST_REMOVE(cmp, link); 1072 mtx_unlock(&icc->cmp_lock); 1073 } 1074 1075 int 1076 icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1077 struct ccb_scsiio *csio, uint32_t *ittp, void **arg) 1078 { 1079 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 1080 struct toepcb *toep = icc->toep; 1081 struct adapter *sc = icc->sc; 1082 struct cxgbei_data *ci = sc->iscsi_ulp_softc; 1083 struct ppod_region *pr = &ci->pr; 1084 struct cxgbei_ddp_state *ddp; 1085 struct ppod_reservation *prsv; 1086 struct inpcb *inp; 1087 struct mbufq mq; 1088 uint32_t itt; 1089 int rc = 0; 1090 1091 ICL_CONN_LOCK_ASSERT(ic); 1092 1093 /* This is for the offload driver's state. Must not be set already. */ 1094 MPASS(arg != NULL); 1095 MPASS(*arg == NULL); 1096 1097 if (ic->ic_disconnecting || ic->ic_socket == NULL) 1098 return (ECONNRESET); 1099 1100 if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_IN || 1101 csio->dxfer_len < ci->ddp_threshold) { 1102 no_ddp: 1103 /* 1104 * No DDP for this I/O. Allocate an ITT (based on the one 1105 * passed in) that cannot be a valid hardware DDP tag in the 1106 * iSCSI region. 1107 */ 1108 itt = *ittp & M_PPOD_TAG; 1109 itt = V_PPOD_TAG(itt) | pr->pr_invalid_bit; 1110 *ittp = htobe32(itt); 1111 MPASS(*arg == NULL); /* State is maintained for DDP only. */ 1112 if (rc != 0) 1113 counter_u64_add( 1114 toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); 1115 return (0); 1116 } 1117 1118 /* 1119 * Reserve resources for DDP, update the itt that should be used in the 1120 * PDU, and save DDP specific state for this I/O in *arg. 1121 */ 1122 ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO); 1123 if (ddp == NULL) { 1124 rc = ENOMEM; 1125 goto no_ddp; 1126 } 1127 prsv = &ddp->prsv; 1128 1129 /* XXX add support for all CAM_DATA_ types */ 1130 MPASS((csio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR); 1131 rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr, 1132 csio->dxfer_len, prsv); 1133 if (rc != 0) { 1134 free(ddp, M_CXGBEI); 1135 goto no_ddp; 1136 } 1137 1138 mbufq_init(&mq, INT_MAX); 1139 rc = t4_write_page_pods_for_buf(sc, toep, prsv, 1140 (vm_offset_t)csio->data_ptr, csio->dxfer_len, &mq); 1141 if (__predict_false(rc != 0)) { 1142 mbufq_drain(&mq); 1143 t4_free_page_pods(prsv); 1144 free(ddp, M_CXGBEI); 1145 goto no_ddp; 1146 } 1147 1148 /* 1149 * Do not get inp from toep->inp as the toepcb might have 1150 * detached already. 1151 */ 1152 inp = sotoinpcb(ic->ic_socket); 1153 INP_WLOCK(inp); 1154 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { 1155 INP_WUNLOCK(inp); 1156 mbufq_drain(&mq); 1157 t4_free_page_pods(prsv); 1158 free(ddp, M_CXGBEI); 1159 return (ECONNRESET); 1160 } 1161 mbufq_concat(&toep->ulp_pduq, &mq); 1162 INP_WUNLOCK(inp); 1163 1164 ddp->cmp.last_datasn = -1; 1165 cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); 1166 *ittp = htobe32(prsv->prsv_tag); 1167 *arg = prsv; 1168 counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); 1169 return (0); 1170 } 1171 1172 void 1173 icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg) 1174 { 1175 1176 if (arg != NULL) { 1177 struct cxgbei_ddp_state *ddp = arg; 1178 1179 cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp); 1180 t4_free_page_pods(&ddp->prsv); 1181 free(ddp, M_CXGBEI); 1182 } 1183 } 1184 1185 static inline bool 1186 ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen) 1187 { 1188 int total_len = 0; 1189 1190 MPASS(entries > 0); 1191 if (((vm_offset_t)sg[--entries].addr & 3U) != 0) 1192 return (false); 1193 1194 total_len += sg[entries].len; 1195 1196 while (--entries >= 0) { 1197 if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 || 1198 (sg[entries].len % PAGE_SIZE) != 0) 1199 return (false); 1200 total_len += sg[entries].len; 1201 } 1202 1203 MPASS(total_len == xferlen); 1204 return (true); 1205 } 1206 1207 /* XXXNP: PDU should be passed in as parameter, like on the initiator. */ 1208 #define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr) 1209 #define io_to_ddp_state(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr) 1210 1211 int 1212 icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1213 uint32_t *tttp, void **arg) 1214 { 1215 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 1216 struct toepcb *toep = icc->toep; 1217 struct ctl_scsiio *ctsio = &io->scsiio; 1218 struct adapter *sc = icc->sc; 1219 struct cxgbei_data *ci = sc->iscsi_ulp_softc; 1220 struct ppod_region *pr = &ci->pr; 1221 struct cxgbei_ddp_state *ddp; 1222 struct ppod_reservation *prsv; 1223 struct ctl_sg_entry *sgl, sg_entry; 1224 struct inpcb *inp; 1225 struct mbufq mq; 1226 int sg_entries = ctsio->kern_sg_entries; 1227 uint32_t ttt; 1228 int xferlen, rc = 0, alias; 1229 1230 /* This is for the offload driver's state. Must not be set already. */ 1231 MPASS(arg != NULL); 1232 MPASS(*arg == NULL); 1233 1234 if (ctsio->ext_data_filled == 0) { 1235 int first_burst; 1236 struct icl_pdu *ip = io_to_request_pdu(io); 1237 #ifdef INVARIANTS 1238 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 1239 1240 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 1241 MPASS(ic == ip->ip_conn); 1242 MPASS(ip->ip_bhs_mbuf != NULL); 1243 #endif 1244 first_burst = icl_pdu_data_segment_length(ip); 1245 1246 /* 1247 * Note that ICL calls conn_transfer_setup even if the first 1248 * burst had everything and there's nothing left to transfer. 1249 * 1250 * NB: The CTL frontend might have provided a buffer 1251 * whose length (kern_data_len) is smaller than the 1252 * FirstBurstLength of unsolicited data. Treat those 1253 * as an empty transfer. 1254 */ 1255 xferlen = ctsio->kern_data_len; 1256 if (xferlen < first_burst || 1257 xferlen - first_burst < ci->ddp_threshold) { 1258 no_ddp: 1259 /* 1260 * No DDP for this transfer. Allocate a TTT (based on 1261 * the one passed in) that cannot be a valid hardware 1262 * DDP tag in the iSCSI region. 1263 */ 1264 ttt = *tttp & M_PPOD_TAG; 1265 ttt = V_PPOD_TAG(ttt) | pr->pr_invalid_bit; 1266 *tttp = htobe32(ttt); 1267 MPASS(io_to_ddp_state(io) == NULL); 1268 if (rc != 0) 1269 counter_u64_add( 1270 toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); 1271 return (0); 1272 } 1273 1274 if (sg_entries == 0) { 1275 sgl = &sg_entry; 1276 sgl->len = xferlen; 1277 sgl->addr = (void *)ctsio->kern_data_ptr; 1278 sg_entries = 1; 1279 } else 1280 sgl = (void *)ctsio->kern_data_ptr; 1281 1282 if (!ddp_sgl_check(sgl, sg_entries, xferlen)) 1283 goto no_ddp; 1284 1285 /* 1286 * Reserve resources for DDP, update the ttt that should be used 1287 * in the PDU, and save DDP specific state for this I/O. 1288 */ 1289 MPASS(io_to_ddp_state(io) == NULL); 1290 ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO); 1291 if (ddp == NULL) { 1292 rc = ENOMEM; 1293 goto no_ddp; 1294 } 1295 prsv = &ddp->prsv; 1296 1297 rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv); 1298 if (rc != 0) { 1299 free(ddp, M_CXGBEI); 1300 goto no_ddp; 1301 } 1302 1303 mbufq_init(&mq, INT_MAX); 1304 rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries, 1305 xferlen, &mq); 1306 if (__predict_false(rc != 0)) { 1307 mbufq_drain(&mq); 1308 t4_free_page_pods(prsv); 1309 free(ddp, M_CXGBEI); 1310 goto no_ddp; 1311 } 1312 1313 /* 1314 * Do not get inp from toep->inp as the toepcb might 1315 * have detached already. 1316 */ 1317 ICL_CONN_LOCK(ic); 1318 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1319 ICL_CONN_UNLOCK(ic); 1320 mbufq_drain(&mq); 1321 t4_free_page_pods(prsv); 1322 free(ddp, M_CXGBEI); 1323 return (ECONNRESET); 1324 } 1325 inp = sotoinpcb(ic->ic_socket); 1326 INP_WLOCK(inp); 1327 ICL_CONN_UNLOCK(ic); 1328 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { 1329 INP_WUNLOCK(inp); 1330 mbufq_drain(&mq); 1331 t4_free_page_pods(prsv); 1332 free(ddp, M_CXGBEI); 1333 return (ECONNRESET); 1334 } 1335 mbufq_concat(&toep->ulp_pduq, &mq); 1336 INP_WUNLOCK(inp); 1337 1338 ddp->cmp.next_buffer_offset = ctsio->kern_rel_offset + 1339 first_burst; 1340 ddp->cmp.last_datasn = -1; 1341 cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); 1342 *tttp = htobe32(prsv->prsv_tag); 1343 io_to_ddp_state(io) = ddp; 1344 *arg = ctsio; 1345 counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); 1346 return (0); 1347 } 1348 1349 /* 1350 * In the middle of an I/O. A non-NULL page pod reservation indicates 1351 * that a DDP buffer is being used for the I/O. 1352 */ 1353 ddp = io_to_ddp_state(ctsio); 1354 if (ddp == NULL) 1355 goto no_ddp; 1356 prsv = &ddp->prsv; 1357 1358 alias = (prsv->prsv_tag & pr->pr_alias_mask) >> pr->pr_alias_shift; 1359 alias++; 1360 prsv->prsv_tag &= ~pr->pr_alias_mask; 1361 prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask; 1362 1363 ddp->cmp.last_datasn = -1; 1364 cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); 1365 *tttp = htobe32(prsv->prsv_tag); 1366 *arg = ctsio; 1367 1368 return (0); 1369 } 1370 1371 void 1372 icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *arg) 1373 { 1374 struct ctl_scsiio *ctsio = arg; 1375 1376 if (ctsio != NULL) { 1377 struct cxgbei_ddp_state *ddp; 1378 1379 ddp = io_to_ddp_state(ctsio); 1380 MPASS(ddp != NULL); 1381 1382 cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp); 1383 if (ctsio->kern_data_len == ctsio->ext_data_filled || 1384 ic->ic_disconnecting) { 1385 t4_free_page_pods(&ddp->prsv); 1386 free(ddp, M_CXGBEI); 1387 io_to_ddp_state(ctsio) = NULL; 1388 } 1389 } 1390 } 1391 1392 static void 1393 cxgbei_limits(struct adapter *sc, void *arg) 1394 { 1395 struct icl_drv_limits *idl = arg; 1396 struct cxgbei_data *ci; 1397 int max_dsl; 1398 1399 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4lims") != 0) 1400 return; 1401 1402 if (uld_active(sc, ULD_ISCSI)) { 1403 ci = sc->iscsi_ulp_softc; 1404 MPASS(ci != NULL); 1405 1406 1407 max_dsl = ci->max_rx_data_len; 1408 if (idl->idl_max_recv_data_segment_length > max_dsl) 1409 idl->idl_max_recv_data_segment_length = max_dsl; 1410 1411 max_dsl = ci->max_tx_data_len; 1412 if (idl->idl_max_send_data_segment_length > max_dsl) 1413 idl->idl_max_send_data_segment_length = max_dsl; 1414 } 1415 1416 end_synchronized_op(sc, LOCK_HELD); 1417 } 1418 1419 static int 1420 icl_cxgbei_limits(struct icl_drv_limits *idl) 1421 { 1422 1423 /* Maximum allowed by the RFC. cxgbei_limits will clip them. */ 1424 idl->idl_max_recv_data_segment_length = (1 << 24) - 1; 1425 idl->idl_max_send_data_segment_length = (1 << 24) - 1; 1426 1427 /* These are somewhat arbitrary. */ 1428 idl->idl_max_burst_length = max_burst_length; 1429 idl->idl_first_burst_length = first_burst_length; 1430 1431 t4_iterate(cxgbei_limits, idl); 1432 1433 return (0); 1434 } 1435 1436 int 1437 icl_cxgbei_mod_load(void) 1438 { 1439 int rc; 1440 1441 refcount_init(&icl_cxgbei_ncons, 0); 1442 1443 rc = icl_register("cxgbei", false, -100, icl_cxgbei_limits, 1444 icl_cxgbei_new_conn); 1445 1446 return (rc); 1447 } 1448 1449 int 1450 icl_cxgbei_mod_unload(void) 1451 { 1452 1453 if (icl_cxgbei_ncons != 0) 1454 return (EBUSY); 1455 1456 icl_unregister("cxgbei", false); 1457 1458 return (0); 1459 } 1460 #endif 1461