1 /*- 2 * Copyright (c) 2012 The FreeBSD Foundation 3 * Copyright (c) 2015 Chelsio Communications, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * cxgbei implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 42 #ifdef TCP_OFFLOAD 43 #include <sys/param.h> 44 #include <sys/capsicum.h> 45 #include <sys/condvar.h> 46 #include <sys/conf.h> 47 #include <sys/file.h> 48 #include <sys/kernel.h> 49 #include <sys/kthread.h> 50 #include <sys/ktr.h> 51 #include <sys/lock.h> 52 #include <sys/mbuf.h> 53 #include <sys/mutex.h> 54 #include <sys/module.h> 55 #include <sys/protosw.h> 56 #include <sys/socket.h> 57 #include <sys/socketvar.h> 58 #include <sys/sysctl.h> 59 #include <sys/systm.h> 60 #include <sys/sx.h> 61 #include <sys/uio.h> 62 #include <machine/bus.h> 63 #include <vm/vm.h> 64 #include <vm/pmap.h> 65 #include <netinet/in.h> 66 #include <netinet/in_pcb.h> 67 #include <netinet/tcp.h> 68 #include <netinet/tcp_var.h> 69 #include <netinet/toecore.h> 70 71 #include <dev/iscsi/icl.h> 72 #include <dev/iscsi/iscsi_proto.h> 73 #include <icl_conn_if.h> 74 75 #include <cam/scsi/scsi_all.h> 76 #include <cam/scsi/scsi_da.h> 77 #include <cam/ctl/ctl_io.h> 78 #include <cam/ctl/ctl.h> 79 #include <cam/ctl/ctl_backend.h> 80 #include <cam/ctl/ctl_error.h> 81 #include <cam/ctl/ctl_frontend.h> 82 #include <cam/ctl/ctl_debug.h> 83 #include <cam/ctl/ctl_ha.h> 84 #include <cam/ctl/ctl_ioctl.h> 85 86 #include <cam/cam.h> 87 #include <cam/cam_ccb.h> 88 #include <cam/cam_xpt.h> 89 #include <cam/cam_debug.h> 90 #include <cam/cam_sim.h> 91 #include <cam/cam_xpt_sim.h> 92 #include <cam/cam_xpt_periph.h> 93 #include <cam/cam_periph.h> 94 #include <cam/cam_compat.h> 95 #include <cam/scsi/scsi_message.h> 96 97 #include "common/common.h" 98 #include "common/t4_tcb.h" 99 #include "tom/t4_tom.h" 100 #include "cxgbei.h" 101 102 /* 103 * Use the page pod tag for the TT hash. 104 */ 105 #define TT_HASH(icc, tt) (G_PPOD_TAG(tt) & (icc)->cmp_hash_mask) 106 107 struct cxgbei_ddp_state { 108 struct ppod_reservation prsv; 109 struct cxgbei_cmp cmp; 110 }; 111 112 static MALLOC_DEFINE(M_CXGBEI, "cxgbei", "cxgbei(4)"); 113 114 SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 115 "Chelsio iSCSI offload"); 116 static int first_burst_length = 8192; 117 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, first_burst_length, CTLFLAG_RWTUN, 118 &first_burst_length, 0, "First burst length"); 119 static int max_burst_length = 2 * 1024 * 1024; 120 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, max_burst_length, CTLFLAG_RWTUN, 121 &max_burst_length, 0, "Maximum burst length"); 122 static int sendspace = 1048576; 123 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, 124 &sendspace, 0, "Default send socket buffer size"); 125 static int recvspace = 1048576; 126 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, 127 &recvspace, 0, "Default receive socket buffer size"); 128 129 static volatile u_int icl_cxgbei_ncons; 130 131 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 132 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 133 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 134 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 135 136 static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; 137 static icl_conn_pdu_data_segment_length_t 138 icl_cxgbei_conn_pdu_data_segment_length; 139 static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; 140 static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; 141 static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; 142 static icl_conn_pdu_queue_cb_t icl_cxgbei_conn_pdu_queue_cb; 143 static icl_conn_handoff_t icl_cxgbei_conn_handoff; 144 static icl_conn_free_t icl_cxgbei_conn_free; 145 static icl_conn_close_t icl_cxgbei_conn_close; 146 static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; 147 static icl_conn_task_done_t icl_cxgbei_conn_task_done; 148 static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; 149 static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; 150 151 static kobj_method_t icl_cxgbei_methods[] = { 152 KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), 153 KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), 154 KOBJMETHOD(icl_conn_pdu_data_segment_length, 155 icl_cxgbei_conn_pdu_data_segment_length), 156 KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), 157 KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), 158 KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), 159 KOBJMETHOD(icl_conn_pdu_queue_cb, icl_cxgbei_conn_pdu_queue_cb), 160 KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), 161 KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), 162 KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), 163 KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), 164 KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), 165 KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), 166 KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), 167 { 0, 0 } 168 }; 169 170 DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); 171 172 void 173 icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 174 { 175 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 176 177 KASSERT(icp->ref_cnt != 0, ("freeing deleted PDU")); 178 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 179 MPASS(ic == ip->ip_conn); 180 181 m_freem(ip->ip_ahs_mbuf); 182 m_freem(ip->ip_data_mbuf); 183 m_freem(ip->ip_bhs_mbuf); 184 185 KASSERT(ic != NULL || icp->ref_cnt == 1, 186 ("orphaned PDU has oustanding references")); 187 188 if (atomic_fetchadd_int(&icp->ref_cnt, -1) != 1) 189 return; 190 191 free(icp, M_CXGBEI); 192 #ifdef DIAGNOSTIC 193 if (__predict_true(ic != NULL)) 194 refcount_release(&ic->ic_outstanding_pdus); 195 #endif 196 } 197 198 static void 199 icl_cxgbei_pdu_call_cb(struct icl_pdu *ip) 200 { 201 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 202 203 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 204 205 if (icp->cb != NULL) 206 icp->cb(ip, icp->error); 207 #ifdef DIAGNOSTIC 208 if (__predict_true(ip->ip_conn != NULL)) 209 refcount_release(&ip->ip_conn->ic_outstanding_pdus); 210 #endif 211 free(icp, M_CXGBEI); 212 } 213 214 static void 215 icl_cxgbei_pdu_done(struct icl_pdu *ip, int error) 216 { 217 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 218 219 if (error != 0) 220 icp->error = error; 221 222 m_freem(ip->ip_ahs_mbuf); 223 ip->ip_ahs_mbuf = NULL; 224 m_freem(ip->ip_data_mbuf); 225 ip->ip_data_mbuf = NULL; 226 m_freem(ip->ip_bhs_mbuf); 227 ip->ip_bhs_mbuf = NULL; 228 229 /* 230 * All other references to this PDU should have been dropped 231 * by the m_freem() of ip_data_mbuf. 232 */ 233 if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) 234 icl_cxgbei_pdu_call_cb(ip); 235 else 236 __assert_unreachable(); 237 } 238 239 static void 240 icl_cxgbei_mbuf_done(struct mbuf *mb) 241 { 242 243 struct icl_cxgbei_pdu *icp = (struct icl_cxgbei_pdu *)mb->m_ext.ext_arg1; 244 245 /* 246 * NB: mb_free_mext() might leave ref_cnt as 1 without 247 * decrementing it if it hits the fast path in the ref_cnt 248 * check. 249 */ 250 icl_cxgbei_pdu_call_cb(&icp->ip); 251 } 252 253 struct icl_pdu * 254 icl_cxgbei_new_pdu(int flags) 255 { 256 struct icl_cxgbei_pdu *icp; 257 struct icl_pdu *ip; 258 struct mbuf *m; 259 260 icp = malloc(sizeof(*icp), M_CXGBEI, flags | M_ZERO); 261 if (__predict_false(icp == NULL)) 262 return (NULL); 263 264 icp->icp_signature = CXGBEI_PDU_SIGNATURE; 265 icp->ref_cnt = 1; 266 ip = &icp->ip; 267 268 m = m_gethdr(flags, MT_DATA); 269 if (__predict_false(m == NULL)) { 270 free(icp, M_CXGBEI); 271 return (NULL); 272 } 273 274 ip->ip_bhs_mbuf = m; 275 ip->ip_bhs = mtod(m, struct iscsi_bhs *); 276 memset(ip->ip_bhs, 0, sizeof(*ip->ip_bhs)); 277 m->m_len = sizeof(struct iscsi_bhs); 278 m->m_pkthdr.len = m->m_len; 279 280 return (ip); 281 } 282 283 void 284 icl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic) 285 { 286 287 ip->ip_conn = ic; 288 #ifdef DIAGNOSTIC 289 refcount_acquire(&ic->ic_outstanding_pdus); 290 #endif 291 } 292 293 /* 294 * Allocate icl_pdu with empty BHS to fill up by the caller. 295 */ 296 static struct icl_pdu * 297 icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) 298 { 299 struct icl_pdu *ip; 300 301 ip = icl_cxgbei_new_pdu(flags); 302 if (__predict_false(ip == NULL)) 303 return (NULL); 304 icl_cxgbei_new_pdu_set_conn(ip, ic); 305 306 return (ip); 307 } 308 309 static size_t 310 icl_pdu_data_segment_length(const struct icl_pdu *request) 311 { 312 uint32_t len = 0; 313 314 len += request->ip_bhs->bhs_data_segment_len[0]; 315 len <<= 8; 316 len += request->ip_bhs->bhs_data_segment_len[1]; 317 len <<= 8; 318 len += request->ip_bhs->bhs_data_segment_len[2]; 319 320 return (len); 321 } 322 323 size_t 324 icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, 325 const struct icl_pdu *request) 326 { 327 328 return (icl_pdu_data_segment_length(request)); 329 } 330 331 static struct mbuf * 332 finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) 333 { 334 struct icl_pdu *ip = &icp->ip; 335 uint8_t ulp_submode, padding; 336 struct mbuf *m, *last; 337 struct iscsi_bhs *bhs; 338 int data_len; 339 340 /* 341 * Fix up the data segment mbuf first. 342 */ 343 m = ip->ip_data_mbuf; 344 ulp_submode = icc->ulp_submode; 345 if (m != NULL) { 346 last = m_last(m); 347 348 /* 349 * Round up the data segment to a 4B boundary. Pad with 0 if 350 * necessary. There will definitely be room in the mbuf. 351 */ 352 padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; 353 if (padding != 0) { 354 MPASS(padding <= M_TRAILINGSPACE(last)); 355 bzero(mtod(last, uint8_t *) + last->m_len, padding); 356 last->m_len += padding; 357 } 358 } else { 359 MPASS(ip->ip_data_len == 0); 360 ulp_submode &= ~ULP_CRC_DATA; 361 padding = 0; 362 } 363 364 /* 365 * Now the header mbuf that has the BHS. 366 */ 367 m = ip->ip_bhs_mbuf; 368 MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); 369 MPASS(m->m_len == sizeof(struct iscsi_bhs)); 370 371 bhs = ip->ip_bhs; 372 data_len = ip->ip_data_len; 373 if (data_len > icc->ic.ic_max_send_data_segment_length) { 374 struct iscsi_bhs_data_in *bhsdi; 375 int flags; 376 377 KASSERT(padding == 0, ("%s: ISO with padding %d for icp %p", 378 __func__, padding, icp)); 379 switch (bhs->bhs_opcode) { 380 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: 381 flags = 1; 382 break; 383 case ISCSI_BHS_OPCODE_SCSI_DATA_IN: 384 flags = 2; 385 break; 386 default: 387 panic("invalid opcode %#x for ISO", bhs->bhs_opcode); 388 } 389 data_len = icc->ic.ic_max_send_data_segment_length; 390 bhsdi = (struct iscsi_bhs_data_in *)bhs; 391 if (bhsdi->bhsdi_flags & BHSDI_FLAGS_F) { 392 /* 393 * Firmware will set F on the final PDU in the 394 * burst. 395 */ 396 flags |= CXGBE_ISO_F; 397 bhsdi->bhsdi_flags &= ~BHSDI_FLAGS_F; 398 } 399 set_mbuf_iscsi_iso(m, true); 400 set_mbuf_iscsi_iso_flags(m, flags); 401 set_mbuf_iscsi_iso_mss(m, data_len); 402 } 403 404 bhs->bhs_data_segment_len[2] = data_len; 405 bhs->bhs_data_segment_len[1] = data_len >> 8; 406 bhs->bhs_data_segment_len[0] = data_len >> 16; 407 408 /* 409 * Extract mbuf chain from PDU. 410 */ 411 m->m_pkthdr.len += ip->ip_data_len + padding; 412 m->m_next = ip->ip_data_mbuf; 413 set_mbuf_ulp_submode(m, ulp_submode); 414 ip->ip_bhs_mbuf = NULL; 415 ip->ip_data_mbuf = NULL; 416 ip->ip_bhs = NULL; 417 418 /* 419 * Drop PDU reference on icp. Additional references might 420 * still be held by zero-copy PDU buffers (ICL_NOCOPY). 421 */ 422 if (atomic_fetchadd_int(&icp->ref_cnt, -1) == 1) 423 icl_cxgbei_pdu_call_cb(ip); 424 425 return (m); 426 } 427 428 int 429 icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, 430 const void *addr, size_t len, int flags) 431 { 432 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 433 struct mbuf *m, *m_tail; 434 const char *src; 435 436 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 437 MPASS(ic == ip->ip_conn); 438 KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); 439 440 m_tail = ip->ip_data_mbuf; 441 if (m_tail != NULL) 442 for (; m_tail->m_next != NULL; m_tail = m_tail->m_next) 443 ; 444 445 if (flags & ICL_NOCOPY) { 446 m = m_get(flags & ~ICL_NOCOPY, MT_DATA); 447 if (m == NULL) { 448 ICL_WARN("failed to allocate mbuf"); 449 return (ENOMEM); 450 } 451 452 m->m_flags |= M_RDONLY; 453 m_extaddref(m, __DECONST(char *, addr), len, &icp->ref_cnt, 454 icl_cxgbei_mbuf_done, icp, NULL); 455 m->m_len = len; 456 if (ip->ip_data_mbuf == NULL) { 457 ip->ip_data_mbuf = m; 458 ip->ip_data_len = len; 459 } else { 460 m_tail->m_next = m; 461 m_tail = m_tail->m_next; 462 ip->ip_data_len += len; 463 } 464 465 return (0); 466 } 467 468 src = (const char *)addr; 469 470 /* Allocate as jumbo mbufs of size MJUM16BYTES. */ 471 while (len >= MJUM16BYTES) { 472 m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); 473 if (__predict_false(m == NULL)) { 474 if ((flags & M_WAITOK) != 0) { 475 /* Fall back to non-jumbo mbufs. */ 476 break; 477 } 478 return (ENOMEM); 479 } 480 memcpy(mtod(m, void *), src, MJUM16BYTES); 481 m->m_len = MJUM16BYTES; 482 if (ip->ip_data_mbuf == NULL) { 483 ip->ip_data_mbuf = m_tail = m; 484 ip->ip_data_len = MJUM16BYTES; 485 } else { 486 m_tail->m_next = m; 487 m_tail = m_tail->m_next; 488 ip->ip_data_len += MJUM16BYTES; 489 } 490 src += MJUM16BYTES; 491 len -= MJUM16BYTES; 492 } 493 494 /* Allocate mbuf chain for the remaining data. */ 495 if (len != 0) { 496 m = m_getm2(NULL, len, flags, MT_DATA, 0); 497 if (__predict_false(m == NULL)) 498 return (ENOMEM); 499 if (ip->ip_data_mbuf == NULL) { 500 ip->ip_data_mbuf = m; 501 ip->ip_data_len = len; 502 } else { 503 m_tail->m_next = m; 504 ip->ip_data_len += len; 505 } 506 for (; m != NULL; m = m->m_next) { 507 m->m_len = min(len, M_SIZE(m)); 508 memcpy(mtod(m, void *), src, m->m_len); 509 src += m->m_len; 510 len -= m->m_len; 511 } 512 MPASS(len == 0); 513 } 514 MPASS(ip->ip_data_len <= max(ic->ic_max_send_data_segment_length, 515 ic->ic_hw_isomax)); 516 517 return (0); 518 } 519 520 void 521 icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 522 size_t off, void *addr, size_t len) 523 { 524 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 525 526 if (icp->icp_flags & ICPF_RX_DDP) 527 return; /* data is DDP'ed, no need to copy */ 528 m_copydata(ip->ip_data_mbuf, off, len, addr); 529 } 530 531 void 532 icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 533 { 534 icl_cxgbei_conn_pdu_queue_cb(ic, ip, NULL); 535 } 536 537 void 538 icl_cxgbei_conn_pdu_queue_cb(struct icl_conn *ic, struct icl_pdu *ip, 539 icl_pdu_cb cb) 540 { 541 struct epoch_tracker et; 542 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 543 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 544 struct socket *so = ic->ic_socket; 545 struct toepcb *toep = icc->toep; 546 struct inpcb *inp; 547 struct mbuf *m; 548 549 MPASS(ic == ip->ip_conn); 550 MPASS(ip->ip_bhs_mbuf != NULL); 551 /* The kernel doesn't generate PDUs with AHS. */ 552 MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); 553 554 ICL_CONN_LOCK_ASSERT(ic); 555 556 icp->cb = cb; 557 558 /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ 559 if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { 560 icl_cxgbei_pdu_done(ip, ENOTCONN); 561 return; 562 } 563 564 m = finalize_pdu(icc, icp); 565 M_ASSERTPKTHDR(m); 566 MPASS((m->m_pkthdr.len & 3) == 0); 567 568 /* 569 * Do not get inp from toep->inp as the toepcb might have detached 570 * already. 571 */ 572 inp = sotoinpcb(so); 573 CURVNET_SET(toep->vnet); 574 NET_EPOCH_ENTER(et); 575 INP_WLOCK(inp); 576 if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || 577 __predict_false((toep->flags & TPF_ATTACHED) == 0)) 578 m_freem(m); 579 else { 580 mbufq_enqueue(&toep->ulp_pduq, m); 581 t4_push_pdus(icc->sc, toep, 0); 582 } 583 INP_WUNLOCK(inp); 584 NET_EPOCH_EXIT(et); 585 CURVNET_RESTORE(); 586 } 587 588 static struct icl_conn * 589 icl_cxgbei_new_conn(const char *name, struct mtx *lock) 590 { 591 struct icl_cxgbei_conn *icc; 592 struct icl_conn *ic; 593 594 refcount_acquire(&icl_cxgbei_ncons); 595 596 icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, 597 M_WAITOK | M_ZERO); 598 icc->icc_signature = CXGBEI_CONN_SIGNATURE; 599 STAILQ_INIT(&icc->rcvd_pdus); 600 601 icc->cmp_table = hashinit(64, M_CXGBEI, &icc->cmp_hash_mask); 602 mtx_init(&icc->cmp_lock, "cxgbei_cmp", NULL, MTX_DEF); 603 604 ic = &icc->ic; 605 ic->ic_lock = lock; 606 607 #ifdef DIAGNOSTIC 608 refcount_init(&ic->ic_outstanding_pdus, 0); 609 #endif 610 ic->ic_name = name; 611 ic->ic_offload = "cxgbei"; 612 ic->ic_unmapped = false; 613 614 CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 615 616 return (ic); 617 } 618 619 void 620 icl_cxgbei_conn_free(struct icl_conn *ic) 621 { 622 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 623 624 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 625 626 CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 627 628 mtx_destroy(&icc->cmp_lock); 629 hashdestroy(icc->cmp_table, M_CXGBEI, icc->cmp_hash_mask); 630 kobj_delete((struct kobj *)icc, M_CXGBE); 631 refcount_release(&icl_cxgbei_ncons); 632 } 633 634 static int 635 icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so, int sspace, 636 int rspace) 637 { 638 struct sockopt opt; 639 int error, one = 1, ss, rs; 640 641 ss = max(sendspace, sspace); 642 rs = max(recvspace, rspace); 643 644 error = soreserve(so, ss, rs); 645 if (error != 0) { 646 icl_cxgbei_conn_close(ic); 647 return (error); 648 } 649 SOCKBUF_LOCK(&so->so_snd); 650 so->so_snd.sb_flags |= SB_AUTOSIZE; 651 SOCKBUF_UNLOCK(&so->so_snd); 652 SOCKBUF_LOCK(&so->so_rcv); 653 so->so_rcv.sb_flags |= SB_AUTOSIZE; 654 SOCKBUF_UNLOCK(&so->so_rcv); 655 656 /* 657 * Disable Nagle. 658 */ 659 bzero(&opt, sizeof(opt)); 660 opt.sopt_dir = SOPT_SET; 661 opt.sopt_level = IPPROTO_TCP; 662 opt.sopt_name = TCP_NODELAY; 663 opt.sopt_val = &one; 664 opt.sopt_valsize = sizeof(one); 665 error = sosetopt(so, &opt); 666 if (error != 0) { 667 icl_cxgbei_conn_close(ic); 668 return (error); 669 } 670 671 return (0); 672 } 673 674 /* 675 * Request/response structure used to find out the adapter offloading a socket. 676 */ 677 struct find_ofld_adapter_rr { 678 struct socket *so; 679 struct adapter *sc; /* result */ 680 }; 681 682 static void 683 find_offload_adapter(struct adapter *sc, void *arg) 684 { 685 struct find_ofld_adapter_rr *fa = arg; 686 struct socket *so = fa->so; 687 struct tom_data *td = sc->tom_softc; 688 struct tcpcb *tp; 689 struct inpcb *inp; 690 691 /* Non-TCP were filtered out earlier. */ 692 MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); 693 694 if (fa->sc != NULL) 695 return; /* Found already. */ 696 697 if (td == NULL) 698 return; /* TOE not enabled on this adapter. */ 699 700 inp = sotoinpcb(so); 701 INP_WLOCK(inp); 702 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 703 tp = intotcpcb(inp); 704 if (tp->t_flags & TF_TOE && tp->tod == &td->tod) 705 fa->sc = sc; /* Found. */ 706 } 707 INP_WUNLOCK(inp); 708 } 709 710 /* XXXNP: move this to t4_tom. */ 711 static void 712 send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen) 713 { 714 struct wrqe *wr; 715 struct fw_flowc_wr *flowc; 716 const u_int nparams = 1; 717 u_int flowclen; 718 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 719 720 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 721 722 wr = alloc_wrqe(roundup2(flowclen, 16), &toep->ofld_txq->wrq); 723 if (wr == NULL) { 724 /* XXX */ 725 panic("%s: allocation failure.", __func__); 726 } 727 flowc = wrtod(wr); 728 memset(flowc, 0, wr->wr_len); 729 730 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 731 V_FW_FLOWC_WR_NPARAMS(nparams)); 732 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 733 V_FW_WR_FLOWID(toep->tid)); 734 735 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; 736 flowc->mnemval[0].val = htobe32(maxlen); 737 738 txsd->tx_credits = howmany(flowclen, 16); 739 txsd->plen = 0; 740 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 741 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 742 toep->tx_credits -= txsd->tx_credits; 743 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 744 toep->txsd_pidx = 0; 745 toep->txsd_avail--; 746 747 t4_wrq_tx(sc, wr); 748 } 749 750 static void 751 set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, u_int ulp_submode) 752 { 753 uint64_t val; 754 755 CTR3(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, submode=%#x", 756 __func__, toep->tid, ulp_submode); 757 758 val = V_TCB_ULP_TYPE(ULP_MODE_ISCSI) | V_TCB_ULP_RAW(ulp_submode); 759 t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_ULP_TYPE, 760 V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val, 761 0, 0); 762 763 val = V_TF_RX_FLOW_CONTROL_DISABLE(1ULL); 764 t4_set_tcb_field(sc, toep->ctrlq, toep, W_TCB_T_FLAGS, val, val, 0, 0); 765 } 766 767 /* 768 * XXXNP: Who is responsible for cleaning up the socket if this returns with an 769 * error? Review all error paths. 770 * 771 * XXXNP: What happens to the socket's fd reference if the operation is 772 * successful, and how does that affect the socket's life cycle? 773 */ 774 int 775 icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) 776 { 777 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 778 struct cxgbei_data *ci; 779 struct find_ofld_adapter_rr fa; 780 struct file *fp; 781 struct socket *so; 782 struct inpcb *inp; 783 struct tcpcb *tp; 784 struct toepcb *toep; 785 cap_rights_t rights; 786 u_int max_rx_pdu_len, max_tx_pdu_len; 787 int error, max_iso_pdus; 788 789 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 790 ICL_CONN_LOCK_ASSERT_NOT(ic); 791 792 /* 793 * Steal the socket from userland. 794 */ 795 error = fget(curthread, fd, 796 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 797 if (error != 0) 798 return (error); 799 if (fp->f_type != DTYPE_SOCKET) { 800 fdrop(fp, curthread); 801 return (EINVAL); 802 } 803 so = fp->f_data; 804 if (so->so_type != SOCK_STREAM || 805 so->so_proto->pr_protocol != IPPROTO_TCP) { 806 fdrop(fp, curthread); 807 return (EINVAL); 808 } 809 810 ICL_CONN_LOCK(ic); 811 if (ic->ic_socket != NULL) { 812 ICL_CONN_UNLOCK(ic); 813 fdrop(fp, curthread); 814 return (EBUSY); 815 } 816 ic->ic_disconnecting = false; 817 ic->ic_socket = so; 818 fp->f_ops = &badfileops; 819 fp->f_data = NULL; 820 fdrop(fp, curthread); 821 ICL_CONN_UNLOCK(ic); 822 823 /* Find the adapter offloading this socket. */ 824 fa.sc = NULL; 825 fa.so = so; 826 t4_iterate(find_offload_adapter, &fa); 827 if (fa.sc == NULL) 828 return (EINVAL); 829 icc->sc = fa.sc; 830 ci = icc->sc->iscsi_ulp_softc; 831 832 max_rx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_recv_data_segment_length; 833 max_tx_pdu_len = ISCSI_BHS_SIZE + ic->ic_max_send_data_segment_length; 834 if (ic->ic_header_crc32c) { 835 max_rx_pdu_len += ISCSI_HEADER_DIGEST_SIZE; 836 max_tx_pdu_len += ISCSI_HEADER_DIGEST_SIZE; 837 } 838 if (ic->ic_data_crc32c) { 839 max_rx_pdu_len += ISCSI_DATA_DIGEST_SIZE; 840 max_tx_pdu_len += ISCSI_DATA_DIGEST_SIZE; 841 } 842 843 inp = sotoinpcb(so); 844 INP_WLOCK(inp); 845 tp = intotcpcb(inp); 846 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) 847 error = EBUSY; 848 else { 849 /* 850 * socket could not have been "unoffloaded" if here. 851 */ 852 MPASS(tp->t_flags & TF_TOE); 853 MPASS(tp->tod != NULL); 854 MPASS(tp->t_toe != NULL); 855 toep = tp->t_toe; 856 MPASS(toep->vi->adapter == icc->sc); 857 icc->toep = toep; 858 icc->cwt = cxgbei_select_worker_thread(icc); 859 860 icc->ulp_submode = 0; 861 if (ic->ic_header_crc32c) 862 icc->ulp_submode |= ULP_CRC_HEADER; 863 if (ic->ic_data_crc32c) 864 icc->ulp_submode |= ULP_CRC_DATA; 865 866 if (icc->sc->tt.iso && chip_id(icc->sc) >= CHELSIO_T5) { 867 max_iso_pdus = CXGBEI_MAX_ISO_PAYLOAD / 868 max_tx_pdu_len; 869 ic->ic_hw_isomax = max_iso_pdus * 870 ic->ic_max_send_data_segment_length; 871 } else 872 max_iso_pdus = 1; 873 874 so->so_options |= SO_NO_DDP; 875 toep->params.ulp_mode = ULP_MODE_ISCSI; 876 toep->ulpcb = icc; 877 878 send_iscsi_flowc_wr(icc->sc, toep, 879 roundup(max_iso_pdus * max_tx_pdu_len, tp->t_maxseg)); 880 set_ulp_mode_iscsi(icc->sc, toep, icc->ulp_submode); 881 error = 0; 882 } 883 INP_WUNLOCK(inp); 884 885 if (error == 0) { 886 error = icl_cxgbei_setsockopt(ic, so, max_tx_pdu_len, 887 max_rx_pdu_len); 888 } 889 890 return (error); 891 } 892 893 void 894 icl_cxgbei_conn_close(struct icl_conn *ic) 895 { 896 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 897 struct icl_pdu *ip; 898 struct socket *so; 899 struct sockbuf *sb; 900 struct inpcb *inp; 901 struct toepcb *toep = icc->toep; 902 903 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 904 ICL_CONN_LOCK_ASSERT_NOT(ic); 905 906 ICL_CONN_LOCK(ic); 907 so = ic->ic_socket; 908 if (ic->ic_disconnecting || so == NULL) { 909 CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p", 910 __func__, icc, ic->ic_disconnecting, so); 911 ICL_CONN_UNLOCK(ic); 912 return; 913 } 914 ic->ic_disconnecting = true; 915 916 #ifdef DIAGNOSTIC 917 KASSERT(ic->ic_outstanding_pdus == 0, 918 ("destroying session with %d outstanding PDUs", 919 ic->ic_outstanding_pdus)); 920 #endif 921 ICL_CONN_UNLOCK(ic); 922 923 CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1, 924 icc); 925 inp = sotoinpcb(so); 926 sb = &so->so_rcv; 927 INP_WLOCK(inp); 928 if (toep != NULL) { /* NULL if connection was never offloaded. */ 929 toep->ulpcb = NULL; 930 931 /* Discard PDUs queued for TX. */ 932 mbufq_drain(&toep->ulp_pduq); 933 934 /* 935 * Wait for the cwt threads to stop processing this 936 * connection. 937 */ 938 SOCKBUF_LOCK(sb); 939 if (icc->rx_flags & RXF_ACTIVE) { 940 volatile u_int *p = &icc->rx_flags; 941 942 SOCKBUF_UNLOCK(sb); 943 INP_WUNLOCK(inp); 944 945 while (*p & RXF_ACTIVE) 946 pause("conclo", 1); 947 948 INP_WLOCK(inp); 949 SOCKBUF_LOCK(sb); 950 } 951 952 /* 953 * Discard received PDUs not passed to the iSCSI 954 * layer. 955 */ 956 while (!STAILQ_EMPTY(&icc->rcvd_pdus)) { 957 ip = STAILQ_FIRST(&icc->rcvd_pdus); 958 STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next); 959 icl_cxgbei_pdu_done(ip, ENOTCONN); 960 } 961 SOCKBUF_UNLOCK(sb); 962 963 /* 964 * Grab a reference to use when waiting for the final 965 * CPL to be received. If toep->inp is NULL, then 966 * final_cpl_received() has already been called (e.g. 967 * due to the peer sending a RST). 968 */ 969 if (toep->inp != NULL) { 970 toep = hold_toepcb(toep); 971 toep->flags |= TPF_WAITING_FOR_FINAL; 972 } else 973 toep = NULL; 974 } 975 INP_WUNLOCK(inp); 976 977 ICL_CONN_LOCK(ic); 978 ic->ic_socket = NULL; 979 ICL_CONN_UNLOCK(ic); 980 981 /* 982 * XXXNP: we should send RST instead of FIN when PDUs held in various 983 * queues were purged instead of delivered reliably but soabort isn't 984 * really general purpose and wouldn't do the right thing here. 985 */ 986 soclose(so); 987 988 /* 989 * Wait for the socket to fully close. This ensures any 990 * pending received data has been received (and in particular, 991 * any data that would be received by DDP has been handled). 992 * Callers assume that it is safe to free buffers for tasks 993 * and transfers after this function returns. 994 */ 995 if (toep != NULL) { 996 struct mtx *lock = mtx_pool_find(mtxpool_sleep, toep); 997 998 mtx_lock(lock); 999 while ((toep->flags & TPF_WAITING_FOR_FINAL) != 0) 1000 mtx_sleep(toep, lock, PSOCK, "conclo2", 0); 1001 mtx_unlock(lock); 1002 free_toepcb(toep); 1003 } 1004 } 1005 1006 static void 1007 cxgbei_insert_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp, 1008 uint32_t tt) 1009 { 1010 #ifdef INVARIANTS 1011 struct cxgbei_cmp *cmp2; 1012 #endif 1013 1014 cmp->tt = tt; 1015 1016 mtx_lock(&icc->cmp_lock); 1017 #ifdef INVARIANTS 1018 LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, tt)], link) { 1019 KASSERT(cmp2->tt != tt, ("%s: duplicate cmp", __func__)); 1020 } 1021 #endif 1022 LIST_INSERT_HEAD(&icc->cmp_table[TT_HASH(icc, tt)], cmp, link); 1023 mtx_unlock(&icc->cmp_lock); 1024 } 1025 1026 struct cxgbei_cmp * 1027 cxgbei_find_cmp(struct icl_cxgbei_conn *icc, uint32_t tt) 1028 { 1029 struct cxgbei_cmp *cmp; 1030 1031 mtx_lock(&icc->cmp_lock); 1032 LIST_FOREACH(cmp, &icc->cmp_table[TT_HASH(icc, tt)], link) { 1033 if (cmp->tt == tt) 1034 break; 1035 } 1036 mtx_unlock(&icc->cmp_lock); 1037 return (cmp); 1038 } 1039 1040 static void 1041 cxgbei_rm_cmp(struct icl_cxgbei_conn *icc, struct cxgbei_cmp *cmp) 1042 { 1043 #ifdef INVARIANTS 1044 struct cxgbei_cmp *cmp2; 1045 #endif 1046 1047 mtx_lock(&icc->cmp_lock); 1048 1049 #ifdef INVARIANTS 1050 LIST_FOREACH(cmp2, &icc->cmp_table[TT_HASH(icc, cmp->tt)], link) { 1051 if (cmp2 == cmp) 1052 goto found; 1053 } 1054 panic("%s: could not find cmp", __func__); 1055 found: 1056 #endif 1057 LIST_REMOVE(cmp, link); 1058 mtx_unlock(&icc->cmp_lock); 1059 } 1060 1061 int 1062 icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 1063 struct ccb_scsiio *csio, uint32_t *ittp, void **arg) 1064 { 1065 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 1066 struct toepcb *toep = icc->toep; 1067 struct adapter *sc = icc->sc; 1068 struct cxgbei_data *ci = sc->iscsi_ulp_softc; 1069 struct ppod_region *pr = &ci->pr; 1070 struct cxgbei_ddp_state *ddp; 1071 struct ppod_reservation *prsv; 1072 struct inpcb *inp; 1073 struct mbufq mq; 1074 uint32_t itt; 1075 int rc = 0; 1076 1077 ICL_CONN_LOCK_ASSERT(ic); 1078 1079 /* This is for the offload driver's state. Must not be set already. */ 1080 MPASS(arg != NULL); 1081 MPASS(*arg == NULL); 1082 1083 if (ic->ic_disconnecting || ic->ic_socket == NULL) 1084 return (ECONNRESET); 1085 1086 if ((csio->ccb_h.flags & CAM_DIR_MASK) != CAM_DIR_IN || 1087 csio->dxfer_len < ci->ddp_threshold) { 1088 no_ddp: 1089 /* 1090 * No DDP for this I/O. Allocate an ITT (based on the one 1091 * passed in) that cannot be a valid hardware DDP tag in the 1092 * iSCSI region. 1093 */ 1094 itt = *ittp & M_PPOD_TAG; 1095 itt = V_PPOD_TAG(itt) | pr->pr_invalid_bit; 1096 *ittp = htobe32(itt); 1097 MPASS(*arg == NULL); /* State is maintained for DDP only. */ 1098 if (rc != 0) 1099 counter_u64_add( 1100 toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); 1101 return (0); 1102 } 1103 1104 /* 1105 * Reserve resources for DDP, update the itt that should be used in the 1106 * PDU, and save DDP specific state for this I/O in *arg. 1107 */ 1108 ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO); 1109 if (ddp == NULL) { 1110 rc = ENOMEM; 1111 goto no_ddp; 1112 } 1113 prsv = &ddp->prsv; 1114 1115 /* XXX add support for all CAM_DATA_ types */ 1116 MPASS((csio->ccb_h.flags & CAM_DATA_MASK) == CAM_DATA_VADDR); 1117 rc = t4_alloc_page_pods_for_buf(pr, (vm_offset_t)csio->data_ptr, 1118 csio->dxfer_len, prsv); 1119 if (rc != 0) { 1120 free(ddp, M_CXGBEI); 1121 goto no_ddp; 1122 } 1123 1124 mbufq_init(&mq, INT_MAX); 1125 rc = t4_write_page_pods_for_buf(sc, toep, prsv, 1126 (vm_offset_t)csio->data_ptr, csio->dxfer_len, &mq); 1127 if (__predict_false(rc != 0)) { 1128 mbufq_drain(&mq); 1129 t4_free_page_pods(prsv); 1130 free(ddp, M_CXGBEI); 1131 goto no_ddp; 1132 } 1133 1134 /* 1135 * Do not get inp from toep->inp as the toepcb might have 1136 * detached already. 1137 */ 1138 inp = sotoinpcb(ic->ic_socket); 1139 INP_WLOCK(inp); 1140 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { 1141 INP_WUNLOCK(inp); 1142 mbufq_drain(&mq); 1143 t4_free_page_pods(prsv); 1144 free(ddp, M_CXGBEI); 1145 return (ECONNRESET); 1146 } 1147 mbufq_concat(&toep->ulp_pduq, &mq); 1148 INP_WUNLOCK(inp); 1149 1150 ddp->cmp.last_datasn = -1; 1151 cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); 1152 *ittp = htobe32(prsv->prsv_tag); 1153 *arg = prsv; 1154 counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); 1155 return (0); 1156 } 1157 1158 void 1159 icl_cxgbei_conn_task_done(struct icl_conn *ic, void *arg) 1160 { 1161 1162 if (arg != NULL) { 1163 struct cxgbei_ddp_state *ddp = arg; 1164 1165 cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp); 1166 t4_free_page_pods(&ddp->prsv); 1167 free(ddp, M_CXGBEI); 1168 } 1169 } 1170 1171 static inline bool 1172 ddp_sgl_check(struct ctl_sg_entry *sg, int entries, int xferlen) 1173 { 1174 int total_len = 0; 1175 1176 MPASS(entries > 0); 1177 if (((vm_offset_t)sg[--entries].addr & 3U) != 0) 1178 return (false); 1179 1180 total_len += sg[entries].len; 1181 1182 while (--entries >= 0) { 1183 if (((vm_offset_t)sg[entries].addr & PAGE_MASK) != 0 || 1184 (sg[entries].len % PAGE_SIZE) != 0) 1185 return (false); 1186 total_len += sg[entries].len; 1187 } 1188 1189 MPASS(total_len == xferlen); 1190 return (true); 1191 } 1192 1193 /* XXXNP: PDU should be passed in as parameter, like on the initiator. */ 1194 #define io_to_request_pdu(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr) 1195 #define io_to_ddp_state(io) ((io)->io_hdr.ctl_private[CTL_PRIV_FRONTEND2].ptr) 1196 1197 int 1198 icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 1199 uint32_t *tttp, void **arg) 1200 { 1201 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 1202 struct toepcb *toep = icc->toep; 1203 struct ctl_scsiio *ctsio = &io->scsiio; 1204 struct adapter *sc = icc->sc; 1205 struct cxgbei_data *ci = sc->iscsi_ulp_softc; 1206 struct ppod_region *pr = &ci->pr; 1207 struct cxgbei_ddp_state *ddp; 1208 struct ppod_reservation *prsv; 1209 struct ctl_sg_entry *sgl, sg_entry; 1210 struct inpcb *inp; 1211 struct mbufq mq; 1212 int sg_entries = ctsio->kern_sg_entries; 1213 uint32_t ttt; 1214 int xferlen, rc = 0, alias; 1215 1216 /* This is for the offload driver's state. Must not be set already. */ 1217 MPASS(arg != NULL); 1218 MPASS(*arg == NULL); 1219 1220 if (ctsio->ext_data_filled == 0) { 1221 int first_burst; 1222 struct icl_pdu *ip = io_to_request_pdu(io); 1223 #ifdef INVARIANTS 1224 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 1225 1226 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 1227 MPASS(ic == ip->ip_conn); 1228 MPASS(ip->ip_bhs_mbuf != NULL); 1229 #endif 1230 first_burst = icl_pdu_data_segment_length(ip); 1231 1232 /* 1233 * Note that ICL calls conn_transfer_setup even if the first 1234 * burst had everything and there's nothing left to transfer. 1235 * 1236 * NB: The CTL frontend might have provided a buffer 1237 * whose length (kern_data_len) is smaller than the 1238 * FirstBurstLength of unsolicited data. Treat those 1239 * as an empty transfer. 1240 */ 1241 xferlen = ctsio->kern_data_len; 1242 if (xferlen < first_burst || 1243 xferlen - first_burst < ci->ddp_threshold) { 1244 no_ddp: 1245 /* 1246 * No DDP for this transfer. Allocate a TTT (based on 1247 * the one passed in) that cannot be a valid hardware 1248 * DDP tag in the iSCSI region. 1249 */ 1250 ttt = *tttp & M_PPOD_TAG; 1251 ttt = V_PPOD_TAG(ttt) | pr->pr_invalid_bit; 1252 *tttp = htobe32(ttt); 1253 MPASS(io_to_ddp_state(io) == NULL); 1254 if (rc != 0) 1255 counter_u64_add( 1256 toep->ofld_rxq->rx_iscsi_ddp_setup_error, 1); 1257 return (0); 1258 } 1259 1260 if (sg_entries == 0) { 1261 sgl = &sg_entry; 1262 sgl->len = xferlen; 1263 sgl->addr = (void *)ctsio->kern_data_ptr; 1264 sg_entries = 1; 1265 } else 1266 sgl = (void *)ctsio->kern_data_ptr; 1267 1268 if (!ddp_sgl_check(sgl, sg_entries, xferlen)) 1269 goto no_ddp; 1270 1271 /* 1272 * Reserve resources for DDP, update the ttt that should be used 1273 * in the PDU, and save DDP specific state for this I/O. 1274 */ 1275 MPASS(io_to_ddp_state(io) == NULL); 1276 ddp = malloc(sizeof(*ddp), M_CXGBEI, M_NOWAIT | M_ZERO); 1277 if (ddp == NULL) { 1278 rc = ENOMEM; 1279 goto no_ddp; 1280 } 1281 prsv = &ddp->prsv; 1282 1283 rc = t4_alloc_page_pods_for_sgl(pr, sgl, sg_entries, prsv); 1284 if (rc != 0) { 1285 free(ddp, M_CXGBEI); 1286 goto no_ddp; 1287 } 1288 1289 mbufq_init(&mq, INT_MAX); 1290 rc = t4_write_page_pods_for_sgl(sc, toep, prsv, sgl, sg_entries, 1291 xferlen, &mq); 1292 if (__predict_false(rc != 0)) { 1293 mbufq_drain(&mq); 1294 t4_free_page_pods(prsv); 1295 free(ddp, M_CXGBEI); 1296 goto no_ddp; 1297 } 1298 1299 /* 1300 * Do not get inp from toep->inp as the toepcb might 1301 * have detached already. 1302 */ 1303 ICL_CONN_LOCK(ic); 1304 if (ic->ic_disconnecting || ic->ic_socket == NULL) { 1305 ICL_CONN_UNLOCK(ic); 1306 mbufq_drain(&mq); 1307 t4_free_page_pods(prsv); 1308 free(ddp, M_CXGBEI); 1309 return (ECONNRESET); 1310 } 1311 inp = sotoinpcb(ic->ic_socket); 1312 INP_WLOCK(inp); 1313 ICL_CONN_UNLOCK(ic); 1314 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) != 0) { 1315 INP_WUNLOCK(inp); 1316 mbufq_drain(&mq); 1317 t4_free_page_pods(prsv); 1318 free(ddp, M_CXGBEI); 1319 return (ECONNRESET); 1320 } 1321 mbufq_concat(&toep->ulp_pduq, &mq); 1322 INP_WUNLOCK(inp); 1323 1324 ddp->cmp.next_buffer_offset = ctsio->kern_rel_offset + 1325 first_burst; 1326 ddp->cmp.last_datasn = -1; 1327 cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); 1328 *tttp = htobe32(prsv->prsv_tag); 1329 io_to_ddp_state(io) = ddp; 1330 *arg = ctsio; 1331 counter_u64_add(toep->ofld_rxq->rx_iscsi_ddp_setup_ok, 1); 1332 return (0); 1333 } 1334 1335 /* 1336 * In the middle of an I/O. A non-NULL page pod reservation indicates 1337 * that a DDP buffer is being used for the I/O. 1338 */ 1339 ddp = io_to_ddp_state(ctsio); 1340 if (ddp == NULL) 1341 goto no_ddp; 1342 prsv = &ddp->prsv; 1343 1344 alias = (prsv->prsv_tag & pr->pr_alias_mask) >> pr->pr_alias_shift; 1345 alias++; 1346 prsv->prsv_tag &= ~pr->pr_alias_mask; 1347 prsv->prsv_tag |= alias << pr->pr_alias_shift & pr->pr_alias_mask; 1348 1349 ddp->cmp.last_datasn = -1; 1350 cxgbei_insert_cmp(icc, &ddp->cmp, prsv->prsv_tag); 1351 *tttp = htobe32(prsv->prsv_tag); 1352 *arg = ctsio; 1353 1354 return (0); 1355 } 1356 1357 void 1358 icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *arg) 1359 { 1360 struct ctl_scsiio *ctsio = arg; 1361 1362 if (ctsio != NULL) { 1363 struct cxgbei_ddp_state *ddp; 1364 1365 ddp = io_to_ddp_state(ctsio); 1366 MPASS(ddp != NULL); 1367 1368 cxgbei_rm_cmp(ic_to_icc(ic), &ddp->cmp); 1369 if (ctsio->kern_data_len == ctsio->ext_data_filled || 1370 ic->ic_disconnecting) { 1371 t4_free_page_pods(&ddp->prsv); 1372 free(ddp, M_CXGBEI); 1373 io_to_ddp_state(ctsio) = NULL; 1374 } 1375 } 1376 } 1377 1378 static void 1379 cxgbei_limits(struct adapter *sc, void *arg) 1380 { 1381 struct icl_drv_limits *idl = arg; 1382 struct cxgbei_data *ci; 1383 int max_dsl; 1384 1385 if (begin_synchronized_op(sc, NULL, HOLD_LOCK, "t4lims") != 0) 1386 return; 1387 1388 if (uld_active(sc, ULD_ISCSI)) { 1389 ci = sc->iscsi_ulp_softc; 1390 MPASS(ci != NULL); 1391 1392 1393 max_dsl = ci->max_rx_data_len; 1394 if (idl->idl_max_recv_data_segment_length > max_dsl) 1395 idl->idl_max_recv_data_segment_length = max_dsl; 1396 1397 max_dsl = ci->max_tx_data_len; 1398 if (idl->idl_max_send_data_segment_length > max_dsl) 1399 idl->idl_max_send_data_segment_length = max_dsl; 1400 } 1401 1402 end_synchronized_op(sc, LOCK_HELD); 1403 } 1404 1405 static int 1406 icl_cxgbei_limits(struct icl_drv_limits *idl) 1407 { 1408 1409 /* Maximum allowed by the RFC. cxgbei_limits will clip them. */ 1410 idl->idl_max_recv_data_segment_length = (1 << 24) - 1; 1411 idl->idl_max_send_data_segment_length = (1 << 24) - 1; 1412 1413 /* These are somewhat arbitrary. */ 1414 idl->idl_max_burst_length = max_burst_length; 1415 idl->idl_first_burst_length = first_burst_length; 1416 1417 t4_iterate(cxgbei_limits, idl); 1418 1419 return (0); 1420 } 1421 1422 int 1423 icl_cxgbei_mod_load(void) 1424 { 1425 int rc; 1426 1427 refcount_init(&icl_cxgbei_ncons, 0); 1428 1429 rc = icl_register("cxgbei", false, -100, icl_cxgbei_limits, 1430 icl_cxgbei_new_conn); 1431 1432 return (rc); 1433 } 1434 1435 int 1436 icl_cxgbei_mod_unload(void) 1437 { 1438 1439 if (icl_cxgbei_ncons != 0) 1440 return (EBUSY); 1441 1442 icl_unregister("cxgbei", false); 1443 1444 return (0); 1445 } 1446 #endif 1447