1 /*- 2 * Copyright (c) 2012 Chelsio Communications, Inc. 3 * All rights reserved. 4 * 5 * Chelsio T5xx iSCSI driver 6 * 7 * Written by: Sreenivasa Honnur <shonnur@chelsio.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 #include "opt_inet.h" 33 #include "opt_inet6.h" 34 35 #include <sys/types.h> 36 #include <sys/param.h> 37 #include <sys/kernel.h> 38 #include <sys/ktr.h> 39 #include <sys/module.h> 40 #include <sys/systm.h> 41 42 #ifdef TCP_OFFLOAD 43 #include <sys/errno.h> 44 #include <sys/gsb_crc32.h> 45 #include <sys/kthread.h> 46 #include <sys/smp.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/mbuf.h> 50 #include <sys/lock.h> 51 #include <sys/mutex.h> 52 #include <sys/condvar.h> 53 #include <sys/uio.h> 54 55 #include <netinet/in.h> 56 #include <netinet/in_pcb.h> 57 #include <netinet/toecore.h> 58 #include <netinet/tcp_var.h> 59 #include <netinet/tcp_fsm.h> 60 61 #include <cam/scsi/scsi_all.h> 62 #include <cam/scsi/scsi_da.h> 63 #include <cam/ctl/ctl_io.h> 64 #include <cam/ctl/ctl.h> 65 #include <cam/ctl/ctl_backend.h> 66 #include <cam/ctl/ctl_error.h> 67 #include <cam/ctl/ctl_frontend.h> 68 #include <cam/ctl/ctl_debug.h> 69 #include <cam/ctl/ctl_ha.h> 70 #include <cam/ctl/ctl_ioctl.h> 71 72 #include <dev/iscsi/icl.h> 73 #include <dev/iscsi/iscsi_proto.h> 74 #include <dev/iscsi/iscsi_ioctl.h> 75 #include <dev/iscsi/iscsi.h> 76 #include <cam/ctl/ctl_frontend_iscsi.h> 77 78 #include <cam/cam.h> 79 #include <cam/cam_ccb.h> 80 #include <cam/cam_xpt.h> 81 #include <cam/cam_debug.h> 82 #include <cam/cam_sim.h> 83 #include <cam/cam_xpt_sim.h> 84 #include <cam/cam_xpt_periph.h> 85 #include <cam/cam_periph.h> 86 #include <cam/cam_compat.h> 87 #include <cam/scsi/scsi_message.h> 88 89 #include "common/common.h" 90 #include "common/t4_msg.h" 91 #include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */ 92 #include "tom/t4_tom.h" 93 #include "cxgbei.h" 94 95 static void 96 read_pdu_limits(struct adapter *sc, uint32_t *max_tx_data_len, 97 uint32_t *max_rx_data_len, struct ppod_region *pr) 98 { 99 uint32_t tx_len, rx_len, r, v; 100 101 rx_len = t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE); 102 tx_len = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE); 103 104 r = t4_read_reg(sc, A_TP_PARA_REG2); 105 rx_len = min(rx_len, G_MAXRXDATA(r)); 106 tx_len = min(tx_len, G_MAXRXDATA(r)); 107 108 r = t4_read_reg(sc, A_TP_PARA_REG7); 109 v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r)); 110 rx_len = min(rx_len, v); 111 tx_len = min(tx_len, v); 112 113 /* 114 * AHS is not supported by the kernel so we'll not account for 115 * it either in our PDU len -> data segment len conversions. 116 */ 117 rx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE + 118 ISCSI_DATA_DIGEST_SIZE; 119 tx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE + 120 ISCSI_DATA_DIGEST_SIZE; 121 122 /* 123 * DDP can place only 4 pages for a single PDU. A single 124 * request might use larger pages than the smallest page size, 125 * but that cannot be guaranteed. Assume the smallest DDP 126 * page size for this limit. 127 */ 128 rx_len = min(rx_len, 4 * (1U << pr->pr_page_shift[0])); 129 130 if (chip_id(sc) == CHELSIO_T5) { 131 tx_len = min(tx_len, 15360); 132 133 rx_len = rounddown2(rx_len, 512); 134 tx_len = rounddown2(tx_len, 512); 135 } 136 137 *max_tx_data_len = tx_len; 138 *max_rx_data_len = rx_len; 139 } 140 141 /* 142 * Initialize the software state of the iSCSI ULP driver. 143 * 144 * ENXIO means firmware didn't set up something that it was supposed to. 145 */ 146 static int 147 cxgbei_init(struct adapter *sc, struct cxgbei_data *ci) 148 { 149 struct sysctl_oid *oid; 150 struct sysctl_oid_list *children; 151 struct ppod_region *pr; 152 uint32_t r; 153 int rc; 154 155 MPASS(sc->vres.iscsi.size > 0); 156 MPASS(ci != NULL); 157 158 pr = &ci->pr; 159 r = t4_read_reg(sc, A_ULP_RX_ISCSI_PSZ); 160 rc = t4_init_ppod_region(pr, &sc->vres.iscsi, r, "iSCSI page pods"); 161 if (rc != 0) { 162 device_printf(sc->dev, 163 "%s: failed to initialize the iSCSI page pod region: %u.\n", 164 __func__, rc); 165 return (rc); 166 } 167 168 read_pdu_limits(sc, &ci->max_tx_data_len, &ci->max_rx_data_len, pr); 169 170 sysctl_ctx_init(&ci->ctx); 171 oid = device_get_sysctl_tree(sc->dev); /* dev.t5nex.X */ 172 children = SYSCTL_CHILDREN(oid); 173 174 oid = SYSCTL_ADD_NODE(&ci->ctx, children, OID_AUTO, "iscsi", 175 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "iSCSI ULP settings"); 176 children = SYSCTL_CHILDREN(oid); 177 178 ci->ddp_threshold = 2048; 179 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "ddp_threshold", 180 CTLFLAG_RW, &ci->ddp_threshold, 0, "Rx zero copy threshold"); 181 182 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_rx_data_len", 183 CTLFLAG_RW, &ci->max_rx_data_len, 0, 184 "Maximum receive data segment length"); 185 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_tx_data_len", 186 CTLFLAG_RW, &ci->max_tx_data_len, 0, 187 "Maximum transmit data segment length"); 188 189 return (0); 190 } 191 192 static int 193 do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 194 { 195 struct adapter *sc = iq->adapter; 196 struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *); 197 u_int tid = GET_TID(cpl); 198 struct toepcb *toep = lookup_tid(sc, tid); 199 struct icl_pdu *ip; 200 struct icl_cxgbei_pdu *icp; 201 uint16_t len_ddp = be16toh(cpl->pdu_len_ddp); 202 uint16_t len = be16toh(cpl->len); 203 204 M_ASSERTPKTHDR(m); 205 MPASS(m->m_pkthdr.len == len + sizeof(*cpl)); 206 207 ip = icl_cxgbei_new_pdu(M_NOWAIT); 208 if (ip == NULL) 209 CXGBE_UNIMPLEMENTED("PDU allocation failure"); 210 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs); 211 ip->ip_data_len = G_ISCSI_PDU_LEN(len_ddp) - len; 212 icp = ip_to_icp(ip); 213 icp->icp_seq = ntohl(cpl->seq); 214 icp->icp_flags = ICPF_RX_HDR; 215 216 /* This is the start of a new PDU. There should be no old state. */ 217 MPASS(toep->ulpcb2 == NULL); 218 toep->ulpcb2 = icp; 219 220 #if 0 221 CTR5(KTR_CXGBE, "%s: tid %u, cpl->len %u, pdu_len_ddp 0x%04x, icp %p", 222 __func__, tid, len, len_ddp, icp); 223 #endif 224 225 m_freem(m); 226 return (0); 227 } 228 229 static int 230 do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 231 { 232 struct adapter *sc = iq->adapter; 233 struct cpl_iscsi_data *cpl = mtod(m, struct cpl_iscsi_data *); 234 u_int tid = GET_TID(cpl); 235 struct toepcb *toep = lookup_tid(sc, tid); 236 struct icl_cxgbei_pdu *icp = toep->ulpcb2; 237 struct icl_pdu *ip; 238 239 M_ASSERTPKTHDR(m); 240 MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl)); 241 242 if (icp == NULL) { 243 /* 244 * T6 completion enabled, start of a new pdu. Header 245 * will come in completion CPL. 246 */ 247 ip = icl_cxgbei_new_pdu(M_NOWAIT); 248 if (ip == NULL) 249 CXGBE_UNIMPLEMENTED("PDU allocation failure"); 250 icp = ip_to_icp(ip); 251 } else { 252 /* T5 mode, header is already received. */ 253 MPASS(icp->icp_flags == ICPF_RX_HDR); 254 MPASS(icp->ip.ip_data_mbuf == NULL); 255 MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl)); 256 } 257 258 /* Trim the cpl header from mbuf. */ 259 m_adj(m, sizeof(*cpl)); 260 261 icp->icp_flags |= ICPF_RX_FLBUF; 262 icp->ip.ip_data_mbuf = m; 263 toep->ofld_rxq->rx_iscsi_fl_pdus++; 264 toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len; 265 266 /* 267 * For T6, save the icp for further processing in the 268 * completion handler. 269 */ 270 if (icp->icp_flags == ICPF_RX_FLBUF) { 271 MPASS(toep->ulpcb2 == NULL); 272 toep->ulpcb2 = icp; 273 } 274 275 #if 0 276 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid, 277 be16toh(cpl->len), icp); 278 #endif 279 280 return (0); 281 } 282 283 static int 284 mbuf_crc32c_helper(void *arg, void *data, u_int len) 285 { 286 uint32_t *digestp = arg; 287 288 *digestp = calculate_crc32c(*digestp, data, len); 289 return (0); 290 } 291 292 static struct icl_pdu * 293 parse_pdu(struct socket *so, struct toepcb *toep, struct icl_cxgbei_conn *icc, 294 struct sockbuf *sb, u_int total_len) 295 { 296 struct uio uio; 297 struct iovec iov[2]; 298 struct iscsi_bhs bhs; 299 struct mbuf *m; 300 struct icl_pdu *ip; 301 u_int ahs_len, data_len, header_len, pdu_len; 302 uint32_t calc_digest, wire_digest; 303 int error; 304 305 uio.uio_segflg = UIO_SYSSPACE; 306 uio.uio_rw = UIO_READ; 307 uio.uio_td = curthread; 308 309 header_len = sizeof(struct iscsi_bhs); 310 if (icc->ic.ic_header_crc32c) 311 header_len += ISCSI_HEADER_DIGEST_SIZE; 312 313 if (total_len < header_len) { 314 ICL_WARN("truncated pre-offload PDU with len %u", total_len); 315 return (NULL); 316 } 317 318 iov[0].iov_base = &bhs; 319 iov[0].iov_len = sizeof(bhs); 320 iov[1].iov_base = &wire_digest; 321 iov[1].iov_len = sizeof(wire_digest); 322 uio.uio_iov = iov; 323 uio.uio_iovcnt = 1; 324 uio.uio_offset = 0; 325 uio.uio_resid = header_len; 326 error = soreceive(so, NULL, &uio, NULL, NULL, NULL); 327 if (error != 0) { 328 ICL_WARN("failed to read BHS from pre-offload PDU: %d", error); 329 return (NULL); 330 } 331 332 ahs_len = bhs.bhs_total_ahs_len * 4; 333 data_len = bhs.bhs_data_segment_len[0] << 16 | 334 bhs.bhs_data_segment_len[1] << 8 | 335 bhs.bhs_data_segment_len[2]; 336 pdu_len = header_len + ahs_len + roundup2(data_len, 4); 337 if (icc->ic.ic_data_crc32c && data_len != 0) 338 pdu_len += ISCSI_DATA_DIGEST_SIZE; 339 340 if (total_len < pdu_len) { 341 ICL_WARN("truncated pre-offload PDU len %u vs %u", total_len, 342 pdu_len); 343 return (NULL); 344 } 345 346 if (ahs_len != 0) { 347 ICL_WARN("received pre-offload PDU with AHS"); 348 return (NULL); 349 } 350 351 if (icc->ic.ic_header_crc32c) { 352 calc_digest = calculate_crc32c(0xffffffff, (caddr_t)&bhs, 353 sizeof(bhs)); 354 calc_digest ^= 0xffffffff; 355 if (calc_digest != wire_digest) { 356 ICL_WARN("received pre-offload PDU 0x%02x with " 357 "invalid header digest (0x%x vs 0x%x)", 358 bhs.bhs_opcode, wire_digest, calc_digest); 359 toep->ofld_rxq->rx_iscsi_header_digest_errors++; 360 return (NULL); 361 } 362 } 363 364 m = NULL; 365 if (data_len != 0) { 366 uio.uio_iov = NULL; 367 uio.uio_resid = roundup2(data_len, 4); 368 if (icc->ic.ic_data_crc32c) 369 uio.uio_resid += ISCSI_DATA_DIGEST_SIZE; 370 371 error = soreceive(so, NULL, &uio, &m, NULL, NULL); 372 if (error != 0) { 373 ICL_WARN("failed to read data payload from " 374 "pre-offload PDU: %d", error); 375 return (NULL); 376 } 377 378 if (icc->ic.ic_data_crc32c) { 379 m_copydata(m, roundup2(data_len, 4), 380 sizeof(wire_digest), (caddr_t)&wire_digest); 381 382 calc_digest = 0xffffffff; 383 m_apply(m, 0, roundup2(data_len, 4), mbuf_crc32c_helper, 384 &calc_digest); 385 calc_digest ^= 0xffffffff; 386 if (calc_digest != wire_digest) { 387 ICL_WARN("received pre-offload PDU 0x%02x " 388 "with invalid data digest (0x%x vs 0x%x)", 389 bhs.bhs_opcode, wire_digest, calc_digest); 390 toep->ofld_rxq->rx_iscsi_data_digest_errors++; 391 m_freem(m); 392 return (NULL); 393 } 394 } 395 } 396 397 ip = icl_cxgbei_new_pdu(M_WAITOK); 398 icl_cxgbei_new_pdu_set_conn(ip, &icc->ic); 399 *ip->ip_bhs = bhs; 400 ip->ip_data_len = data_len; 401 ip->ip_data_mbuf = m; 402 return (ip); 403 } 404 405 void 406 parse_pdus(struct icl_cxgbei_conn *icc, struct sockbuf *sb) 407 { 408 struct icl_conn *ic = &icc->ic; 409 struct socket *so = ic->ic_socket; 410 struct toepcb *toep = icc->toep; 411 struct icl_pdu *ip, *lastip; 412 u_int total_len; 413 414 SOCKBUF_LOCK_ASSERT(sb); 415 416 CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, toep->tid, 417 sbused(sb)); 418 419 lastip = NULL; 420 while (sbused(sb) != 0 && (sb->sb_state & SBS_CANTRCVMORE) == 0) { 421 total_len = sbused(sb); 422 SOCKBUF_UNLOCK(sb); 423 424 ip = parse_pdu(so, toep, icc, sb, total_len); 425 426 if (ip == NULL) { 427 ic->ic_error(ic); 428 SOCKBUF_LOCK(sb); 429 return; 430 } 431 432 if (lastip == NULL) 433 STAILQ_INSERT_HEAD(&icc->rcvd_pdus, ip, ip_next); 434 else 435 STAILQ_INSERT_AFTER(&icc->rcvd_pdus, lastip, ip, 436 ip_next); 437 lastip = ip; 438 439 SOCKBUF_LOCK(sb); 440 } 441 } 442 443 static int 444 do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 445 { 446 struct adapter *sc = iq->adapter; 447 const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1); 448 u_int tid = GET_TID(cpl); 449 struct toepcb *toep = lookup_tid(sc, tid); 450 struct inpcb *inp = toep->inp; 451 struct socket *so; 452 struct sockbuf *sb; 453 struct tcpcb *tp; 454 struct icl_cxgbei_conn *icc; 455 struct icl_conn *ic; 456 struct icl_cxgbei_pdu *icp = toep->ulpcb2; 457 struct icl_pdu *ip; 458 u_int pdu_len, val; 459 struct epoch_tracker et; 460 461 MPASS(m == NULL); 462 463 /* Must already be assembling a PDU. */ 464 MPASS(icp != NULL); 465 MPASS(icp->icp_flags & ICPF_RX_HDR); /* Data is optional. */ 466 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0); 467 468 pdu_len = be16toh(cpl->len); /* includes everything. */ 469 val = be32toh(cpl->ddpvld); 470 471 #if 0 472 CTR5(KTR_CXGBE, 473 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp_flags 0x%08x", 474 __func__, tid, pdu_len, val, icp->icp_flags); 475 #endif 476 477 icp->icp_flags |= ICPF_RX_STATUS; 478 ip = &icp->ip; 479 if (val & F_DDP_PADDING_ERR) { 480 ICL_WARN("received PDU 0x%02x with invalid padding", 481 ip->ip_bhs->bhs_opcode); 482 toep->ofld_rxq->rx_iscsi_padding_errors++; 483 } 484 if (val & F_DDP_HDRCRC_ERR) { 485 ICL_WARN("received PDU 0x%02x with invalid header digest", 486 ip->ip_bhs->bhs_opcode); 487 toep->ofld_rxq->rx_iscsi_header_digest_errors++; 488 } 489 if (val & F_DDP_DATACRC_ERR) { 490 ICL_WARN("received PDU 0x%02x with invalid data digest", 491 ip->ip_bhs->bhs_opcode); 492 toep->ofld_rxq->rx_iscsi_data_digest_errors++; 493 } 494 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) { 495 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0); 496 MPASS(ip->ip_data_len > 0); 497 icp->icp_flags |= ICPF_RX_DDP; 498 toep->ofld_rxq->rx_iscsi_ddp_pdus++; 499 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len; 500 } 501 502 INP_WLOCK(inp); 503 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 504 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 505 __func__, tid, pdu_len, inp->inp_flags); 506 INP_WUNLOCK(inp); 507 icl_cxgbei_conn_pdu_free(NULL, ip); 508 toep->ulpcb2 = NULL; 509 return (0); 510 } 511 512 /* 513 * T6+ does not report data PDUs received via DDP without F 514 * set. This can result in gaps in the TCP sequence space. 515 */ 516 tp = intotcpcb(inp); 517 MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt); 518 tp->rcv_nxt = icp->icp_seq + pdu_len; 519 tp->t_rcvtime = ticks; 520 521 /* 522 * Don't update the window size or return credits since RX 523 * flow control is disabled. 524 */ 525 526 so = inp->inp_socket; 527 sb = &so->so_rcv; 528 SOCKBUF_LOCK(sb); 529 530 icc = toep->ulpcb; 531 if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) { 532 CTR5(KTR_CXGBE, 533 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x", 534 __func__, tid, pdu_len, icc, sb->sb_state); 535 SOCKBUF_UNLOCK(sb); 536 INP_WUNLOCK(inp); 537 538 CURVNET_SET(so->so_vnet); 539 NET_EPOCH_ENTER(et); 540 INP_WLOCK(inp); 541 tp = tcp_drop(tp, ECONNRESET); 542 if (tp) 543 INP_WUNLOCK(inp); 544 NET_EPOCH_EXIT(et); 545 CURVNET_RESTORE(); 546 547 icl_cxgbei_conn_pdu_free(NULL, ip); 548 toep->ulpcb2 = NULL; 549 return (0); 550 } 551 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 552 ic = &icc->ic; 553 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR | 554 F_DDP_DATACRC_ERR)) != 0) { 555 SOCKBUF_UNLOCK(sb); 556 INP_WUNLOCK(inp); 557 558 icl_cxgbei_conn_pdu_free(NULL, ip); 559 toep->ulpcb2 = NULL; 560 ic->ic_error(ic); 561 return (0); 562 } 563 564 icl_cxgbei_new_pdu_set_conn(ip, ic); 565 566 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next); 567 if (!icc->rx_active) { 568 icc->rx_active = true; 569 wakeup(&icc->rx_active); 570 } 571 SOCKBUF_UNLOCK(sb); 572 INP_WUNLOCK(inp); 573 574 toep->ulpcb2 = NULL; 575 576 return (0); 577 } 578 579 static int 580 do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m) 581 { 582 struct epoch_tracker et; 583 struct adapter *sc = iq->adapter; 584 struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *); 585 u_int tid = GET_TID(cpl); 586 struct toepcb *toep = lookup_tid(sc, tid); 587 struct icl_cxgbei_pdu *icp = toep->ulpcb2; 588 struct icl_pdu *ip; 589 struct cxgbei_cmp *cmp; 590 struct inpcb *inp = toep->inp; 591 #ifdef INVARIANTS 592 uint16_t len = be16toh(cpl->len); 593 u_int data_digest_len; 594 #endif 595 struct socket *so; 596 struct sockbuf *sb; 597 struct tcpcb *tp; 598 struct icl_cxgbei_conn *icc; 599 struct icl_conn *ic; 600 struct iscsi_bhs_data_out *bhsdo; 601 u_int val = be32toh(cpl->ddpvld); 602 u_int npdus, pdu_len; 603 uint32_t prev_seg_len; 604 605 M_ASSERTPKTHDR(m); 606 MPASS(m->m_pkthdr.len == len + sizeof(*cpl)); 607 608 if ((val & F_DDP_PDU) == 0) { 609 MPASS(icp != NULL); 610 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0); 611 ip = &icp->ip; 612 } 613 614 if (icp == NULL) { 615 /* T6 completion enabled, start of a new PDU. */ 616 ip = icl_cxgbei_new_pdu(M_NOWAIT); 617 if (ip == NULL) 618 CXGBE_UNIMPLEMENTED("PDU allocation failure"); 619 icp = ip_to_icp(ip); 620 } 621 pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp)); 622 623 #if 0 624 CTR5(KTR_CXGBE, 625 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p", 626 __func__, tid, pdu_len, val, icp); 627 #endif 628 629 /* Copy header */ 630 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs); 631 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs; 632 ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 | 633 bhsdo->bhsdo_data_segment_len[1] << 8 | 634 bhsdo->bhsdo_data_segment_len[2]; 635 icp->icp_seq = ntohl(cpl->seq); 636 icp->icp_flags |= ICPF_RX_HDR; 637 icp->icp_flags |= ICPF_RX_STATUS; 638 639 if (val & F_DDP_PADDING_ERR) { 640 ICL_WARN("received PDU 0x%02x with invalid padding", 641 ip->ip_bhs->bhs_opcode); 642 toep->ofld_rxq->rx_iscsi_padding_errors++; 643 } 644 if (val & F_DDP_HDRCRC_ERR) { 645 ICL_WARN("received PDU 0x%02x with invalid header digest", 646 ip->ip_bhs->bhs_opcode); 647 toep->ofld_rxq->rx_iscsi_header_digest_errors++; 648 } 649 if (val & F_DDP_DATACRC_ERR) { 650 ICL_WARN("received PDU 0x%02x with invalid data digest", 651 ip->ip_bhs->bhs_opcode); 652 toep->ofld_rxq->rx_iscsi_data_digest_errors++; 653 } 654 655 INP_WLOCK(inp); 656 if (__predict_false(inp->inp_flags & INP_DROPPED)) { 657 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x", 658 __func__, tid, pdu_len, inp->inp_flags); 659 INP_WUNLOCK(inp); 660 icl_cxgbei_conn_pdu_free(NULL, ip); 661 toep->ulpcb2 = NULL; 662 m_freem(m); 663 return (0); 664 } 665 666 tp = intotcpcb(inp); 667 668 /* 669 * If icc is NULL, the connection is being closed in 670 * icl_cxgbei_conn_close(), just drop this data. 671 */ 672 icc = toep->ulpcb; 673 if (__predict_false(icc == NULL)) { 674 CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p", 675 __func__, tid, pdu_len, icc); 676 677 /* 678 * Update rcv_nxt so the sequence number of the FIN 679 * doesn't appear wrong. 680 */ 681 tp->rcv_nxt = icp->icp_seq + pdu_len; 682 tp->t_rcvtime = ticks; 683 INP_WUNLOCK(inp); 684 685 icl_cxgbei_conn_pdu_free(NULL, ip); 686 toep->ulpcb2 = NULL; 687 m_freem(m); 688 return (0); 689 } 690 691 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE); 692 ic = &icc->ic; 693 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR | 694 F_DDP_DATACRC_ERR)) != 0) { 695 INP_WUNLOCK(inp); 696 697 icl_cxgbei_conn_pdu_free(NULL, ip); 698 toep->ulpcb2 = NULL; 699 m_freem(m); 700 ic->ic_error(ic); 701 return (0); 702 } 703 704 #ifdef INVARIANTS 705 data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ? 706 ISCSI_DATA_DIGEST_SIZE : 0; 707 MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len); 708 #endif 709 710 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) { 711 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0); 712 MPASS(ip->ip_data_len > 0); 713 icp->icp_flags |= ICPF_RX_DDP; 714 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs; 715 716 switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) { 717 case ISCSI_BHS_OPCODE_SCSI_DATA_IN: 718 cmp = cxgbei_find_cmp(icc, 719 be32toh(bhsdo->bhsdo_initiator_task_tag)); 720 break; 721 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT: 722 cmp = cxgbei_find_cmp(icc, 723 be32toh(bhsdo->bhsdo_target_transfer_tag)); 724 break; 725 default: 726 __assert_unreachable(); 727 } 728 MPASS(cmp != NULL); 729 730 /* 731 * The difference between the end of the last burst 732 * and the offset of the last PDU in this burst is 733 * the additional data received via DDP. 734 */ 735 prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) - 736 cmp->next_buffer_offset; 737 738 if (prev_seg_len != 0) { 739 uint32_t orig_datasn; 740 741 /* 742 * Return a "large" PDU representing the burst 743 * of PDUs. Adjust the offset and length of 744 * this PDU to represent the entire burst. 745 */ 746 ip->ip_data_len += prev_seg_len; 747 bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len; 748 bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8; 749 bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16; 750 bhsdo->bhsdo_buffer_offset = 751 htobe32(cmp->next_buffer_offset); 752 753 orig_datasn = htobe32(bhsdo->bhsdo_datasn); 754 npdus = orig_datasn - cmp->last_datasn; 755 bhsdo->bhsdo_datasn = htobe32(cmp->last_datasn + 1); 756 cmp->last_datasn = orig_datasn; 757 ip->ip_additional_pdus = npdus - 1; 758 } else { 759 MPASS(htobe32(bhsdo->bhsdo_datasn) == 760 cmp->last_datasn + 1); 761 npdus = 1; 762 cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn); 763 } 764 765 cmp->next_buffer_offset += ip->ip_data_len; 766 toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus; 767 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len; 768 } else { 769 MPASS(icp->icp_flags & (ICPF_RX_FLBUF)); 770 MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len); 771 } 772 773 tp->rcv_nxt = icp->icp_seq + pdu_len; 774 tp->t_rcvtime = ticks; 775 776 /* 777 * Don't update the window size or return credits since RX 778 * flow control is disabled. 779 */ 780 781 so = inp->inp_socket; 782 sb = &so->so_rcv; 783 SOCKBUF_LOCK(sb); 784 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) { 785 CTR5(KTR_CXGBE, 786 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x", 787 __func__, tid, pdu_len, icc, sb->sb_state); 788 SOCKBUF_UNLOCK(sb); 789 INP_WUNLOCK(inp); 790 791 CURVNET_SET(so->so_vnet); 792 NET_EPOCH_ENTER(et); 793 INP_WLOCK(inp); 794 tp = tcp_drop(tp, ECONNRESET); 795 if (tp != NULL) 796 INP_WUNLOCK(inp); 797 NET_EPOCH_EXIT(et); 798 CURVNET_RESTORE(); 799 800 icl_cxgbei_conn_pdu_free(NULL, ip); 801 toep->ulpcb2 = NULL; 802 m_freem(m); 803 return (0); 804 } 805 806 icl_cxgbei_new_pdu_set_conn(ip, ic); 807 808 /* Enqueue the PDU to the received pdus queue. */ 809 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next); 810 if (!icc->rx_active) { 811 icc->rx_active = true; 812 wakeup(&icc->rx_active); 813 } 814 SOCKBUF_UNLOCK(sb); 815 INP_WUNLOCK(inp); 816 817 toep->ulpcb2 = NULL; 818 m_freem(m); 819 820 return (0); 821 } 822 823 static int 824 cxgbei_activate(struct adapter *sc) 825 { 826 struct cxgbei_data *ci; 827 int rc; 828 829 ASSERT_SYNCHRONIZED_OP(sc); 830 831 if (uld_active(sc, ULD_ISCSI)) { 832 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p", 833 __func__, sc)); 834 return (0); 835 } 836 837 if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) { 838 device_printf(sc->dev, 839 "not iSCSI offload capable, or capability disabled.\n"); 840 return (ENOSYS); 841 } 842 843 /* per-adapter softc for iSCSI */ 844 ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_WAITOK); 845 if (ci == NULL) 846 return (ENOMEM); 847 848 rc = cxgbei_init(sc, ci); 849 if (rc != 0) { 850 free(ci, M_CXGBE); 851 return (rc); 852 } 853 854 sc->iscsi_ulp_softc = ci; 855 856 return (0); 857 } 858 859 static int 860 cxgbei_deactivate(struct adapter *sc) 861 { 862 struct cxgbei_data *ci = sc->iscsi_ulp_softc; 863 864 ASSERT_SYNCHRONIZED_OP(sc); 865 866 if (ci != NULL) { 867 sysctl_ctx_free(&ci->ctx); 868 t4_free_ppod_region(&ci->pr); 869 free(ci, M_CXGBE); 870 sc->iscsi_ulp_softc = NULL; 871 } 872 873 return (0); 874 } 875 876 static void 877 cxgbei_activate_all(struct adapter *sc, void *arg __unused) 878 { 879 880 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0) 881 return; 882 883 /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */ 884 if (sc->offload_map && !uld_active(sc, ULD_ISCSI)) 885 (void) t4_activate_uld(sc, ULD_ISCSI); 886 887 end_synchronized_op(sc, 0); 888 } 889 890 static void 891 cxgbei_deactivate_all(struct adapter *sc, void *arg __unused) 892 { 893 894 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0) 895 return; 896 897 if (uld_active(sc, ULD_ISCSI)) 898 (void) t4_deactivate_uld(sc, ULD_ISCSI); 899 900 end_synchronized_op(sc, 0); 901 } 902 903 static struct uld_info cxgbei_uld_info = { 904 .uld_activate = cxgbei_activate, 905 .uld_deactivate = cxgbei_deactivate, 906 }; 907 908 static int 909 cxgbei_mod_load(void) 910 { 911 int rc; 912 913 t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr); 914 t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data); 915 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp); 916 t4_register_cpl_handler(CPL_RX_ISCSI_CMP, do_rx_iscsi_cmp); 917 918 rc = t4_register_uld(&cxgbei_uld_info, ULD_ISCSI); 919 if (rc != 0) 920 return (rc); 921 922 t4_iterate(cxgbei_activate_all, NULL); 923 924 return (rc); 925 } 926 927 static int 928 cxgbei_mod_unload(void) 929 { 930 931 t4_iterate(cxgbei_deactivate_all, NULL); 932 933 if (t4_unregister_uld(&cxgbei_uld_info, ULD_ISCSI) == EBUSY) 934 return (EBUSY); 935 936 t4_register_cpl_handler(CPL_ISCSI_HDR, NULL); 937 t4_register_cpl_handler(CPL_ISCSI_DATA, NULL); 938 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL); 939 t4_register_cpl_handler(CPL_RX_ISCSI_CMP, NULL); 940 941 return (0); 942 } 943 #endif 944 945 static int 946 cxgbei_modevent(module_t mod, int cmd, void *arg) 947 { 948 int rc = 0; 949 950 #ifdef TCP_OFFLOAD 951 switch (cmd) { 952 case MOD_LOAD: 953 rc = cxgbei_mod_load(); 954 if (rc == 0) 955 rc = icl_cxgbei_mod_load(); 956 break; 957 958 case MOD_UNLOAD: 959 rc = icl_cxgbei_mod_unload(); 960 if (rc == 0) 961 rc = cxgbei_mod_unload(); 962 break; 963 964 default: 965 rc = EINVAL; 966 } 967 #else 968 printf("cxgbei: compiled without TCP_OFFLOAD support.\n"); 969 rc = EOPNOTSUPP; 970 #endif 971 972 return (rc); 973 } 974 975 static moduledata_t cxgbei_mod = { 976 "cxgbei", 977 cxgbei_modevent, 978 NULL, 979 }; 980 981 MODULE_VERSION(cxgbei, 1); 982 DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY); 983 MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1); 984 MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1); 985 MODULE_DEPEND(cxgbei, icl, 1, 1, 1); 986