1 /*- 2 * Copyright (c) 2012 The FreeBSD Foundation 3 * Copyright (c) 2015 Chelsio Communications, Inc. 4 * All rights reserved. 5 * 6 * This software was developed by Edward Tomasz Napierala under sponsorship 7 * from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 */ 31 32 /* 33 * cxgbei implementation of iSCSI Common Layer kobj(9) interface. 34 */ 35 36 #include <sys/cdefs.h> 37 __FBSDID("$FreeBSD$"); 38 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 42 #ifdef TCP_OFFLOAD 43 #include <sys/param.h> 44 #include <sys/capsicum.h> 45 #include <sys/condvar.h> 46 #include <sys/conf.h> 47 #include <sys/file.h> 48 #include <sys/kernel.h> 49 #include <sys/kthread.h> 50 #include <sys/lock.h> 51 #include <sys/mbuf.h> 52 #include <sys/mutex.h> 53 #include <sys/module.h> 54 #include <sys/protosw.h> 55 #include <sys/socket.h> 56 #include <sys/socketvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/systm.h> 59 #include <sys/sx.h> 60 #include <sys/uio.h> 61 #include <machine/bus.h> 62 #include <vm/uma.h> 63 #include <netinet/in.h> 64 #include <netinet/in_pcb.h> 65 #include <netinet/tcp.h> 66 #include <netinet/tcp_var.h> 67 #include <netinet/toecore.h> 68 69 #include <dev/iscsi/icl.h> 70 #include <dev/iscsi/iscsi_proto.h> 71 #include <icl_conn_if.h> 72 73 #include "common/common.h" 74 #include "common/t4_tcb.h" 75 #include "tom/t4_tom.h" 76 #include "cxgbei.h" 77 78 SYSCTL_NODE(_kern_icl, OID_AUTO, cxgbei, CTLFLAG_RD, 0, "Chelsio iSCSI offload"); 79 static int coalesce = 1; 80 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, coalesce, CTLFLAG_RWTUN, 81 &coalesce, 0, "Try to coalesce PDUs before sending"); 82 static int partial_receive_len = 128 * 1024; 83 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, partial_receive_len, CTLFLAG_RWTUN, 84 &partial_receive_len, 0, "Minimum read size for partially received " 85 "data segment"); 86 static int sendspace = 1048576; 87 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, sendspace, CTLFLAG_RWTUN, 88 &sendspace, 0, "Default send socket buffer size"); 89 static int recvspace = 1048576; 90 SYSCTL_INT(_kern_icl_cxgbei, OID_AUTO, recvspace, CTLFLAG_RWTUN, 91 &recvspace, 0, "Default receive socket buffer size"); 92 93 static uma_zone_t icl_transfer_zone; 94 95 static volatile u_int icl_cxgbei_ncons; 96 97 #define ICL_CONN_LOCK(X) mtx_lock(X->ic_lock) 98 #define ICL_CONN_UNLOCK(X) mtx_unlock(X->ic_lock) 99 #define ICL_CONN_LOCK_ASSERT(X) mtx_assert(X->ic_lock, MA_OWNED) 100 #define ICL_CONN_LOCK_ASSERT_NOT(X) mtx_assert(X->ic_lock, MA_NOTOWNED) 101 102 struct icl_pdu *icl_cxgbei_new_pdu(int); 103 void icl_cxgbei_new_pdu_set_conn(struct icl_pdu *, struct icl_conn *); 104 105 static icl_conn_new_pdu_t icl_cxgbei_conn_new_pdu; 106 icl_conn_pdu_free_t icl_cxgbei_conn_pdu_free; 107 static icl_conn_pdu_data_segment_length_t 108 icl_cxgbei_conn_pdu_data_segment_length; 109 static icl_conn_pdu_append_data_t icl_cxgbei_conn_pdu_append_data; 110 static icl_conn_pdu_get_data_t icl_cxgbei_conn_pdu_get_data; 111 static icl_conn_pdu_queue_t icl_cxgbei_conn_pdu_queue; 112 static icl_conn_handoff_t icl_cxgbei_conn_handoff; 113 static icl_conn_free_t icl_cxgbei_conn_free; 114 static icl_conn_close_t icl_cxgbei_conn_close; 115 static icl_conn_task_setup_t icl_cxgbei_conn_task_setup; 116 static icl_conn_task_done_t icl_cxgbei_conn_task_done; 117 static icl_conn_transfer_setup_t icl_cxgbei_conn_transfer_setup; 118 static icl_conn_transfer_done_t icl_cxgbei_conn_transfer_done; 119 120 static kobj_method_t icl_cxgbei_methods[] = { 121 KOBJMETHOD(icl_conn_new_pdu, icl_cxgbei_conn_new_pdu), 122 KOBJMETHOD(icl_conn_pdu_free, icl_cxgbei_conn_pdu_free), 123 KOBJMETHOD(icl_conn_pdu_data_segment_length, 124 icl_cxgbei_conn_pdu_data_segment_length), 125 KOBJMETHOD(icl_conn_pdu_append_data, icl_cxgbei_conn_pdu_append_data), 126 KOBJMETHOD(icl_conn_pdu_get_data, icl_cxgbei_conn_pdu_get_data), 127 KOBJMETHOD(icl_conn_pdu_queue, icl_cxgbei_conn_pdu_queue), 128 KOBJMETHOD(icl_conn_handoff, icl_cxgbei_conn_handoff), 129 KOBJMETHOD(icl_conn_free, icl_cxgbei_conn_free), 130 KOBJMETHOD(icl_conn_close, icl_cxgbei_conn_close), 131 KOBJMETHOD(icl_conn_task_setup, icl_cxgbei_conn_task_setup), 132 KOBJMETHOD(icl_conn_task_done, icl_cxgbei_conn_task_done), 133 KOBJMETHOD(icl_conn_transfer_setup, icl_cxgbei_conn_transfer_setup), 134 KOBJMETHOD(icl_conn_transfer_done, icl_cxgbei_conn_transfer_done), 135 { 0, 0 } 136 }; 137 138 DEFINE_CLASS(icl_cxgbei, icl_cxgbei_methods, sizeof(struct icl_cxgbei_conn)); 139 140 #if 0 141 /* 142 * Subtract another 256 for AHS from MAX_DSL if AHS could be used. 143 */ 144 #define CXGBEI_MAX_PDU 16224 145 #define CXGBEI_MAX_DSL (CXGBEI_MAX_PDU - sizeof(struct iscsi_bhs) - 8) 146 #endif 147 #define CXGBEI_MAX_DSL 8192 148 #define CXGBEI_MAX_PDU (CXGBEI_MAX_DSL + sizeof(struct iscsi_bhs) + 8) 149 150 void 151 icl_cxgbei_conn_pdu_free(struct icl_conn *ic, struct icl_pdu *ip) 152 { 153 #ifdef INVARIANTS 154 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 155 #endif 156 157 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 158 MPASS(ic == ip->ip_conn); 159 MPASS(ip->ip_bhs_mbuf != NULL); 160 161 m_freem(ip->ip_ahs_mbuf); 162 m_freem(ip->ip_data_mbuf); 163 m_freem(ip->ip_bhs_mbuf); /* storage for icl_cxgbei_pdu itself */ 164 165 #ifdef DIAGNOSTIC 166 if (__predict_true(ic != NULL)) 167 refcount_release(&ic->ic_outstanding_pdus); 168 #endif 169 } 170 171 struct icl_pdu * 172 icl_cxgbei_new_pdu(int flags) 173 { 174 struct icl_cxgbei_pdu *icp; 175 struct icl_pdu *ip; 176 struct mbuf *m; 177 uintptr_t a; 178 179 m = m_gethdr(flags, MT_DATA); 180 if (__predict_false(m == NULL)) 181 return (NULL); 182 183 a = roundup2(mtod(m, uintptr_t), _Alignof(struct icl_cxgbei_pdu)); 184 icp = (struct icl_cxgbei_pdu *)a; 185 bzero(icp, sizeof(*icp)); 186 187 icp->icp_signature = CXGBEI_PDU_SIGNATURE; 188 ip = &icp->ip; 189 ip->ip_bhs_mbuf = m; 190 191 a = roundup2((uintptr_t)(icp + 1), _Alignof(struct iscsi_bhs *)); 192 ip->ip_bhs = (struct iscsi_bhs *)a; 193 #ifdef INVARIANTS 194 /* Everything must fit entirely in the mbuf. */ 195 a = (uintptr_t)(ip->ip_bhs + 1); 196 MPASS(a <= (uintptr_t)m + MSIZE); 197 #endif 198 bzero(ip->ip_bhs, sizeof(*ip->ip_bhs)); 199 200 m->m_data = (void *)ip->ip_bhs; 201 m->m_len = sizeof(struct iscsi_bhs); 202 m->m_pkthdr.len = m->m_len; 203 204 return (ip); 205 } 206 207 void 208 icl_cxgbei_new_pdu_set_conn(struct icl_pdu *ip, struct icl_conn *ic) 209 { 210 211 ip->ip_conn = ic; 212 #ifdef DIAGNOSTIC 213 refcount_acquire(&ic->ic_outstanding_pdus); 214 #endif 215 } 216 217 /* 218 * Allocate icl_pdu with empty BHS to fill up by the caller. 219 */ 220 static struct icl_pdu * 221 icl_cxgbei_conn_new_pdu(struct icl_conn *ic, int flags) 222 { 223 struct icl_pdu *ip; 224 225 ip = icl_cxgbei_new_pdu(flags); 226 if (__predict_false(ip == NULL)) 227 return (NULL); 228 icl_cxgbei_new_pdu_set_conn(ip, ic); 229 230 return (ip); 231 } 232 233 static size_t 234 icl_pdu_data_segment_length(const struct icl_pdu *request) 235 { 236 uint32_t len = 0; 237 238 len += request->ip_bhs->bhs_data_segment_len[0]; 239 len <<= 8; 240 len += request->ip_bhs->bhs_data_segment_len[1]; 241 len <<= 8; 242 len += request->ip_bhs->bhs_data_segment_len[2]; 243 244 return (len); 245 } 246 247 size_t 248 icl_cxgbei_conn_pdu_data_segment_length(struct icl_conn *ic, 249 const struct icl_pdu *request) 250 { 251 252 return (icl_pdu_data_segment_length(request)); 253 } 254 255 static uint32_t 256 icl_conn_build_tasktag(struct icl_conn *ic, uint32_t tag) 257 { 258 return tag; 259 } 260 261 static struct mbuf * 262 finalize_pdu(struct icl_cxgbei_conn *icc, struct icl_cxgbei_pdu *icp) 263 { 264 struct icl_pdu *ip = &icp->ip; 265 uint8_t ulp_submode, padding; 266 struct mbuf *m, *last; 267 struct iscsi_bhs *bhs; 268 269 /* 270 * Fix up the data segment mbuf first. 271 */ 272 m = ip->ip_data_mbuf; 273 ulp_submode = icc->ulp_submode; 274 if (m) { 275 last = m_last(m); 276 277 /* 278 * Round up the data segment to a 4B boundary. Pad with 0 if 279 * necessary. There will definitely be room in the mbuf. 280 */ 281 padding = roundup2(ip->ip_data_len, 4) - ip->ip_data_len; 282 if (padding) { 283 bzero(mtod(last, uint8_t *) + last->m_len, padding); 284 last->m_len += padding; 285 } 286 } else { 287 MPASS(ip->ip_data_len == 0); 288 ulp_submode &= ~ULP_CRC_DATA; 289 padding = 0; 290 } 291 292 /* 293 * Now the header mbuf that has the BHS. 294 */ 295 m = ip->ip_bhs_mbuf; 296 MPASS(m->m_pkthdr.len == sizeof(struct iscsi_bhs)); 297 MPASS(m->m_len == sizeof(struct iscsi_bhs)); 298 299 bhs = ip->ip_bhs; 300 bhs->bhs_data_segment_len[2] = ip->ip_data_len; 301 bhs->bhs_data_segment_len[1] = ip->ip_data_len >> 8; 302 bhs->bhs_data_segment_len[0] = ip->ip_data_len >> 16; 303 304 /* "Convert" PDU to mbuf chain. Do not use icp/ip after this. */ 305 m->m_pkthdr.len = sizeof(struct iscsi_bhs) + ip->ip_data_len + padding; 306 m->m_next = ip->ip_data_mbuf; 307 set_mbuf_ulp_submode(m, ulp_submode); 308 #ifdef INVARIANTS 309 bzero(icp, sizeof(*icp)); 310 #endif 311 #ifdef DIAGNOSTIC 312 refcount_release(&icc->ic.ic_outstanding_pdus); 313 #endif 314 315 return (m); 316 } 317 318 int 319 icl_cxgbei_conn_pdu_append_data(struct icl_conn *ic, struct icl_pdu *ip, 320 const void *addr, size_t len, int flags) 321 { 322 struct mbuf *m; 323 #ifdef INVARIANTS 324 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 325 #endif 326 327 MPASS(icp->icp_signature == CXGBEI_PDU_SIGNATURE); 328 MPASS(ic == ip->ip_conn); 329 KASSERT(len > 0, ("%s: len is %jd", __func__, (intmax_t)len)); 330 331 m = ip->ip_data_mbuf; 332 if (m == NULL) { 333 m = m_getjcl(M_NOWAIT, MT_DATA, 0, MJUM16BYTES); 334 if (__predict_false(m == NULL)) 335 return (ENOMEM); 336 337 ip->ip_data_mbuf = m; 338 } 339 340 if (__predict_true(m_append(m, len, addr) != 0)) { 341 ip->ip_data_len += len; 342 MPASS(ip->ip_data_len <= CXGBEI_MAX_DSL); 343 return (0); 344 } else { 345 if (flags & M_WAITOK) { 346 CXGBE_UNIMPLEMENTED("fail safe append"); 347 } 348 ip->ip_data_len = m_length(m, NULL); 349 return (1); 350 } 351 } 352 353 void 354 icl_cxgbei_conn_pdu_get_data(struct icl_conn *ic, struct icl_pdu *ip, 355 size_t off, void *addr, size_t len) 356 { 357 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 358 359 if (icp->pdu_flags & SBUF_ULP_FLAG_DATA_DDPED) 360 return; /* data is DDP'ed, no need to copy */ 361 m_copydata(ip->ip_data_mbuf, off, len, addr); 362 } 363 364 void 365 icl_cxgbei_conn_pdu_queue(struct icl_conn *ic, struct icl_pdu *ip) 366 { 367 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 368 struct icl_cxgbei_pdu *icp = ip_to_icp(ip); 369 struct socket *so = ic->ic_socket; 370 struct toepcb *toep = icc->toep; 371 struct inpcb *inp; 372 struct mbuf *m; 373 374 MPASS(ic == ip->ip_conn); 375 MPASS(ip->ip_bhs_mbuf != NULL); 376 /* The kernel doesn't generate PDUs with AHS. */ 377 MPASS(ip->ip_ahs_mbuf == NULL && ip->ip_ahs_len == 0); 378 379 ICL_CONN_LOCK_ASSERT(ic); 380 /* NOTE: sowriteable without so_snd lock is a mostly harmless race. */ 381 if (ic->ic_disconnecting || so == NULL || !sowriteable(so)) { 382 icl_cxgbei_conn_pdu_free(ic, ip); 383 return; 384 } 385 386 m = finalize_pdu(icc, icp); 387 M_ASSERTPKTHDR(m); 388 MPASS((m->m_pkthdr.len & 3) == 0); 389 MPASS(m->m_pkthdr.len + 8 <= CXGBEI_MAX_PDU); 390 391 /* 392 * Do not get inp from toep->inp as the toepcb might have detached 393 * already. 394 */ 395 inp = sotoinpcb(so); 396 INP_WLOCK(inp); 397 if (__predict_false(inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) || 398 __predict_false((toep->flags & TPF_ATTACHED) == 0)) 399 m_freem(m); 400 else { 401 mbufq_enqueue(&toep->ulp_pduq, m); 402 t4_push_pdus(icc->sc, toep, 0); 403 } 404 INP_WUNLOCK(inp); 405 } 406 407 static struct icl_conn * 408 icl_cxgbei_new_conn(const char *name, struct mtx *lock) 409 { 410 struct icl_cxgbei_conn *icc; 411 struct icl_conn *ic; 412 413 refcount_acquire(&icl_cxgbei_ncons); 414 415 icc = (struct icl_cxgbei_conn *)kobj_create(&icl_cxgbei_class, M_CXGBE, 416 M_WAITOK | M_ZERO); 417 icc->icc_signature = CXGBEI_CONN_SIGNATURE; 418 STAILQ_INIT(&icc->rcvd_pdus); 419 420 ic = &icc->ic; 421 ic->ic_lock = lock; 422 423 /* XXXNP: review. Most of these icl_conn fields aren't really used */ 424 STAILQ_INIT(&ic->ic_to_send); 425 cv_init(&ic->ic_send_cv, "icl_cxgbei_tx"); 426 cv_init(&ic->ic_receive_cv, "icl_cxgbei_rx"); 427 #ifdef DIAGNOSTIC 428 refcount_init(&ic->ic_outstanding_pdus, 0); 429 #endif 430 ic->ic_max_data_segment_length = CXGBEI_MAX_DSL; 431 ic->ic_name = name; 432 ic->ic_offload = "cxgbei"; 433 ic->ic_unmapped = false; 434 435 CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 436 437 return (ic); 438 } 439 440 void 441 icl_cxgbei_conn_free(struct icl_conn *ic) 442 { 443 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 444 445 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 446 447 CTR2(KTR_CXGBE, "%s: icc %p", __func__, icc); 448 449 cv_destroy(&ic->ic_send_cv); 450 cv_destroy(&ic->ic_receive_cv); 451 452 kobj_delete((struct kobj *)icc, M_CXGBE); 453 refcount_release(&icl_cxgbei_ncons); 454 } 455 456 static int 457 icl_cxgbei_setsockopt(struct icl_conn *ic, struct socket *so) 458 { 459 size_t minspace; 460 struct sockopt opt; 461 int error, one = 1; 462 463 /* 464 * For sendspace, this is required because the current code cannot 465 * send a PDU in pieces; thus, the minimum buffer size is equal 466 * to the maximum PDU size. "+4" is to account for possible padding. 467 * 468 * What we should actually do here is to use autoscaling, but set 469 * some minimal buffer size to "minspace". I don't know a way to do 470 * that, though. 471 */ 472 minspace = sizeof(struct iscsi_bhs) + ic->ic_max_data_segment_length + 473 ISCSI_HEADER_DIGEST_SIZE + ISCSI_DATA_DIGEST_SIZE + 4; 474 if (sendspace < minspace) 475 sendspace = minspace; 476 if (recvspace < minspace) 477 recvspace = minspace; 478 479 error = soreserve(so, sendspace, recvspace); 480 if (error != 0) { 481 icl_cxgbei_conn_close(ic); 482 return (error); 483 } 484 SOCKBUF_LOCK(&so->so_snd); 485 so->so_snd.sb_flags |= SB_AUTOSIZE; 486 SOCKBUF_UNLOCK(&so->so_snd); 487 SOCKBUF_LOCK(&so->so_rcv); 488 so->so_rcv.sb_flags |= SB_AUTOSIZE; 489 SOCKBUF_UNLOCK(&so->so_rcv); 490 491 /* 492 * Disable Nagle. 493 */ 494 bzero(&opt, sizeof(opt)); 495 opt.sopt_dir = SOPT_SET; 496 opt.sopt_level = IPPROTO_TCP; 497 opt.sopt_name = TCP_NODELAY; 498 opt.sopt_val = &one; 499 opt.sopt_valsize = sizeof(one); 500 error = sosetopt(so, &opt); 501 if (error != 0) { 502 icl_cxgbei_conn_close(ic); 503 return (error); 504 } 505 506 return (0); 507 } 508 509 /* 510 * Request/response structure used to find out the adapter offloading a socket. 511 */ 512 struct find_ofld_adapter_rr { 513 struct socket *so; 514 struct adapter *sc; /* result */ 515 }; 516 517 static void 518 find_offload_adapter(struct adapter *sc, void *arg) 519 { 520 struct find_ofld_adapter_rr *fa = arg; 521 struct socket *so = fa->so; 522 struct tom_data *td = sc->tom_softc; 523 struct tcpcb *tp; 524 struct inpcb *inp; 525 526 /* Non-TCP were filtered out earlier. */ 527 MPASS(so->so_proto->pr_protocol == IPPROTO_TCP); 528 529 if (fa->sc != NULL) 530 return; /* Found already. */ 531 532 if (td == NULL) 533 return; /* TOE not enabled on this adapter. */ 534 535 inp = sotoinpcb(so); 536 INP_WLOCK(inp); 537 if ((inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) == 0) { 538 tp = intotcpcb(inp); 539 if (tp->t_flags & TF_TOE && tp->tod == &td->tod) 540 fa->sc = sc; /* Found. */ 541 } 542 INP_WUNLOCK(inp); 543 } 544 545 /* XXXNP: move this to t4_tom. */ 546 static void 547 send_iscsi_flowc_wr(struct adapter *sc, struct toepcb *toep, int maxlen) 548 { 549 struct wrqe *wr; 550 struct fw_flowc_wr *flowc; 551 const u_int nparams = 1; 552 u_int flowclen; 553 struct ofld_tx_sdesc *txsd = &toep->txsd[toep->txsd_pidx]; 554 555 flowclen = sizeof(*flowc) + nparams * sizeof(struct fw_flowc_mnemval); 556 557 wr = alloc_wrqe(roundup2(flowclen, 16), toep->ofld_txq); 558 if (wr == NULL) { 559 /* XXX */ 560 panic("%s: allocation failure.", __func__); 561 } 562 flowc = wrtod(wr); 563 memset(flowc, 0, wr->wr_len); 564 565 flowc->op_to_nparams = htobe32(V_FW_WR_OP(FW_FLOWC_WR) | 566 V_FW_FLOWC_WR_NPARAMS(nparams)); 567 flowc->flowid_len16 = htonl(V_FW_WR_LEN16(howmany(flowclen, 16)) | 568 V_FW_WR_FLOWID(toep->tid)); 569 570 flowc->mnemval[0].mnemonic = FW_FLOWC_MNEM_TXDATAPLEN_MAX; 571 flowc->mnemval[0].val = htobe32(maxlen); 572 573 txsd->tx_credits = howmany(flowclen, 16); 574 txsd->plen = 0; 575 KASSERT(toep->tx_credits >= txsd->tx_credits && toep->txsd_avail > 0, 576 ("%s: not enough credits (%d)", __func__, toep->tx_credits)); 577 toep->tx_credits -= txsd->tx_credits; 578 if (__predict_false(++toep->txsd_pidx == toep->txsd_total)) 579 toep->txsd_pidx = 0; 580 toep->txsd_avail--; 581 582 t4_wrq_tx(sc, wr); 583 } 584 585 static void 586 set_ulp_mode_iscsi(struct adapter *sc, struct toepcb *toep, int hcrc, int dcrc) 587 { 588 uint64_t val = ULP_MODE_ISCSI; 589 590 if (hcrc) 591 val |= ULP_CRC_HEADER << 4; 592 if (dcrc) 593 val |= ULP_CRC_DATA << 4; 594 595 CTR4(KTR_CXGBE, "%s: tid %u, ULP_MODE_ISCSI, CRC hdr=%d data=%d", 596 __func__, toep->tid, hcrc, dcrc); 597 598 t4_set_tcb_field(sc, toep->ctrlq, toep->tid, W_TCB_ULP_TYPE, 599 V_TCB_ULP_TYPE(M_TCB_ULP_TYPE) | V_TCB_ULP_RAW(M_TCB_ULP_RAW), val, 600 0, 0, toep->ofld_rxq->iq.abs_id); 601 } 602 603 /* 604 * XXXNP: Who is responsible for cleaning up the socket if this returns with an 605 * error? Review all error paths. 606 * 607 * XXXNP: What happens to the socket's fd reference if the operation is 608 * successful, and how does that affect the socket's life cycle? 609 */ 610 int 611 icl_cxgbei_conn_handoff(struct icl_conn *ic, int fd) 612 { 613 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 614 struct find_ofld_adapter_rr fa; 615 struct file *fp; 616 struct socket *so; 617 struct inpcb *inp; 618 struct tcpcb *tp; 619 struct toepcb *toep; 620 cap_rights_t rights; 621 int error; 622 623 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 624 ICL_CONN_LOCK_ASSERT_NOT(ic); 625 626 /* 627 * Steal the socket from userland. 628 */ 629 error = fget(curthread, fd, 630 cap_rights_init(&rights, CAP_SOCK_CLIENT), &fp); 631 if (error != 0) 632 return (error); 633 if (fp->f_type != DTYPE_SOCKET) { 634 fdrop(fp, curthread); 635 return (EINVAL); 636 } 637 so = fp->f_data; 638 if (so->so_type != SOCK_STREAM || 639 so->so_proto->pr_protocol != IPPROTO_TCP) { 640 fdrop(fp, curthread); 641 return (EINVAL); 642 } 643 644 ICL_CONN_LOCK(ic); 645 if (ic->ic_socket != NULL) { 646 ICL_CONN_UNLOCK(ic); 647 fdrop(fp, curthread); 648 return (EBUSY); 649 } 650 ic->ic_disconnecting = false; 651 ic->ic_socket = so; 652 fp->f_ops = &badfileops; 653 fp->f_data = NULL; 654 fdrop(fp, curthread); 655 ICL_CONN_UNLOCK(ic); 656 657 /* Find the adapter offloading this socket. */ 658 fa.sc = NULL; 659 fa.so = so; 660 t4_iterate(find_offload_adapter, &fa); 661 if (fa.sc == NULL) 662 return (EINVAL); 663 icc->sc = fa.sc; 664 665 error = icl_cxgbei_setsockopt(ic, so); 666 if (error) 667 return (error); 668 669 inp = sotoinpcb(so); 670 INP_WLOCK(inp); 671 tp = intotcpcb(inp); 672 if (inp->inp_flags & (INP_DROPPED | INP_TIMEWAIT)) 673 error = EBUSY; 674 else { 675 /* 676 * socket could not have been "unoffloaded" if here. 677 */ 678 MPASS(tp->t_flags & TF_TOE); 679 MPASS(tp->tod != NULL); 680 MPASS(tp->t_toe != NULL); 681 toep = tp->t_toe; 682 MPASS(toep->vi->pi->adapter == icc->sc); 683 icc->toep = toep; 684 icc->cwt = cxgbei_select_worker_thread(icc); 685 icc->ulp_submode = 0; 686 if (ic->ic_header_crc32c) 687 icc->ulp_submode |= ULP_CRC_HEADER; 688 if (ic->ic_data_crc32c) 689 icc->ulp_submode |= ULP_CRC_DATA; 690 so->so_options |= SO_NO_DDP; 691 toep->ulp_mode = ULP_MODE_ISCSI; 692 toep->ulpcb = icc; 693 694 send_iscsi_flowc_wr(icc->sc, toep, CXGBEI_MAX_PDU); 695 set_ulp_mode_iscsi(icc->sc, toep, ic->ic_header_crc32c, 696 ic->ic_data_crc32c); 697 error = 0; 698 } 699 INP_WUNLOCK(inp); 700 701 return (error); 702 } 703 704 void 705 icl_cxgbei_conn_close(struct icl_conn *ic) 706 { 707 struct icl_cxgbei_conn *icc = ic_to_icc(ic); 708 struct icl_pdu *ip; 709 struct socket *so; 710 struct sockbuf *sb; 711 struct inpcb *inp; 712 struct toepcb *toep = icc->toep; 713 714 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 715 ICL_CONN_LOCK_ASSERT_NOT(ic); 716 717 ICL_CONN_LOCK(ic); 718 so = ic->ic_socket; 719 if (ic->ic_disconnecting || so == NULL) { 720 CTR4(KTR_CXGBE, "%s: icc %p (disconnecting = %d), so %p", 721 __func__, icc, ic->ic_disconnecting, so); 722 ICL_CONN_UNLOCK(ic); 723 return; 724 } 725 ic->ic_disconnecting = true; 726 727 /* These are unused in this driver right now. */ 728 MPASS(STAILQ_EMPTY(&ic->ic_to_send)); 729 MPASS(ic->ic_receive_pdu == NULL); 730 731 #ifdef DIAGNOSTIC 732 KASSERT(ic->ic_outstanding_pdus == 0, 733 ("destroying session with %d outstanding PDUs", 734 ic->ic_outstanding_pdus)); 735 #endif 736 ICL_CONN_UNLOCK(ic); 737 738 CTR3(KTR_CXGBE, "%s: tid %d, icc %p", __func__, toep ? toep->tid : -1, 739 icc); 740 inp = sotoinpcb(so); 741 sb = &so->so_rcv; 742 INP_WLOCK(inp); 743 if (toep != NULL) { /* NULL if connection was never offloaded. */ 744 toep->ulpcb = NULL; 745 mbufq_drain(&toep->ulp_pduq); 746 SOCKBUF_LOCK(sb); 747 if (icc->rx_flags & RXF_ACTIVE) { 748 volatile u_int *p = &icc->rx_flags; 749 750 SOCKBUF_UNLOCK(sb); 751 INP_WUNLOCK(inp); 752 753 while (*p & RXF_ACTIVE) 754 pause("conclo", 1); 755 756 INP_WLOCK(inp); 757 SOCKBUF_LOCK(sb); 758 } 759 760 while (!STAILQ_EMPTY(&icc->rcvd_pdus)) { 761 ip = STAILQ_FIRST(&icc->rcvd_pdus); 762 STAILQ_REMOVE_HEAD(&icc->rcvd_pdus, ip_next); 763 icl_cxgbei_conn_pdu_free(ic, ip); 764 } 765 SOCKBUF_UNLOCK(sb); 766 } 767 INP_WUNLOCK(inp); 768 769 ICL_CONN_LOCK(ic); 770 ic->ic_socket = NULL; 771 ICL_CONN_UNLOCK(ic); 772 773 /* 774 * XXXNP: we should send RST instead of FIN when PDUs held in various 775 * queues were purged instead of delivered reliably but soabort isn't 776 * really general purpose and wouldn't do the right thing here. 777 */ 778 soclose(so); 779 } 780 781 int 782 icl_cxgbei_conn_task_setup(struct icl_conn *ic, struct icl_pdu *ip, 783 struct ccb_scsiio *csio, uint32_t *task_tagp, void **prvp) 784 { 785 void *prv; 786 787 *task_tagp = icl_conn_build_tasktag(ic, *task_tagp); 788 789 prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); 790 if (prv == NULL) 791 return (ENOMEM); 792 793 *prvp = prv; 794 795 cxgbei_conn_task_reserve_itt(ic, prvp, csio, task_tagp); 796 797 return (0); 798 } 799 800 void 801 icl_cxgbei_conn_task_done(struct icl_conn *ic, void *prv) 802 { 803 804 cxgbei_cleanup_task(ic, prv); 805 uma_zfree(icl_transfer_zone, prv); 806 } 807 808 int 809 icl_cxgbei_conn_transfer_setup(struct icl_conn *ic, union ctl_io *io, 810 uint32_t *transfer_tag, void **prvp) 811 { 812 void *prv; 813 814 *transfer_tag = icl_conn_build_tasktag(ic, *transfer_tag); 815 816 prv = uma_zalloc(icl_transfer_zone, M_NOWAIT | M_ZERO); 817 if (prv == NULL) 818 return (ENOMEM); 819 820 *prvp = prv; 821 822 cxgbei_conn_transfer_reserve_ttt(ic, prvp, io, transfer_tag); 823 824 return (0); 825 } 826 827 void 828 icl_cxgbei_conn_transfer_done(struct icl_conn *ic, void *prv) 829 { 830 cxgbei_cleanup_task(ic, prv); 831 uma_zfree(icl_transfer_zone, prv); 832 } 833 834 static int 835 icl_cxgbei_limits(size_t *limitp) 836 { 837 838 *limitp = CXGBEI_MAX_DSL; 839 840 return (0); 841 } 842 843 static int 844 icl_cxgbei_load(void) 845 { 846 int error; 847 848 icl_transfer_zone = uma_zcreate("icl_transfer", 849 16 * 1024, NULL, NULL, NULL, NULL, 850 UMA_ALIGN_PTR, 0); 851 852 refcount_init(&icl_cxgbei_ncons, 0); 853 854 error = icl_register("cxgbei", false, -100, icl_cxgbei_limits, 855 icl_cxgbei_new_conn); 856 KASSERT(error == 0, ("failed to register")); 857 858 return (error); 859 } 860 861 static int 862 icl_cxgbei_unload(void) 863 { 864 865 if (icl_cxgbei_ncons != 0) 866 return (EBUSY); 867 868 icl_unregister("cxgbei", false); 869 870 uma_zdestroy(icl_transfer_zone); 871 872 return (0); 873 } 874 875 static int 876 icl_cxgbei_modevent(module_t mod, int what, void *arg) 877 { 878 879 switch (what) { 880 case MOD_LOAD: 881 return (icl_cxgbei_load()); 882 case MOD_UNLOAD: 883 return (icl_cxgbei_unload()); 884 default: 885 return (EINVAL); 886 } 887 } 888 889 moduledata_t icl_cxgbei_data = { 890 "icl_cxgbei", 891 icl_cxgbei_modevent, 892 0 893 }; 894 895 DECLARE_MODULE(icl_cxgbei, icl_cxgbei_data, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); 896 MODULE_DEPEND(icl_cxgbei, icl, 1, 1, 1); 897 MODULE_VERSION(icl_cxgbei, 1); 898 #endif 899