1 /*-
2 * Copyright (c) 2012 Chelsio Communications, Inc.
3 * All rights reserved.
4 *
5 * Chelsio T5xx iSCSI driver
6 *
7 * Written by: Sreenivasa Honnur <shonnur@chelsio.com>
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31 #include <sys/cdefs.h>
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34
35 #include <sys/types.h>
36 #include <sys/param.h>
37 #include <sys/kernel.h>
38 #include <sys/ktr.h>
39 #include <sys/module.h>
40 #include <sys/systm.h>
41
42 #ifdef TCP_OFFLOAD
43 #include <sys/errno.h>
44 #include <sys/gsb_crc32.h>
45 #include <sys/kthread.h>
46 #include <sys/smp.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/mbuf.h>
50 #include <sys/lock.h>
51 #include <sys/mutex.h>
52 #include <sys/condvar.h>
53 #include <sys/uio.h>
54
55 #include <netinet/in.h>
56 #include <netinet/in_pcb.h>
57 #include <netinet/toecore.h>
58 #include <netinet/tcp_var.h>
59 #include <netinet/tcp_fsm.h>
60
61 #include <cam/scsi/scsi_all.h>
62 #include <cam/scsi/scsi_da.h>
63 #include <cam/ctl/ctl_io.h>
64 #include <cam/ctl/ctl.h>
65 #include <cam/ctl/ctl_backend.h>
66 #include <cam/ctl/ctl_error.h>
67 #include <cam/ctl/ctl_frontend.h>
68 #include <cam/ctl/ctl_debug.h>
69 #include <cam/ctl/ctl_ha.h>
70 #include <cam/ctl/ctl_ioctl.h>
71
72 #include <dev/iscsi/icl.h>
73 #include <dev/iscsi/iscsi_proto.h>
74 #include <dev/iscsi/iscsi_ioctl.h>
75 #include <dev/iscsi/iscsi.h>
76 #include <cam/ctl/ctl_frontend_iscsi.h>
77
78 #include <cam/cam.h>
79 #include <cam/cam_ccb.h>
80 #include <cam/cam_xpt.h>
81 #include <cam/cam_debug.h>
82 #include <cam/cam_sim.h>
83 #include <cam/cam_xpt_sim.h>
84 #include <cam/cam_xpt_periph.h>
85 #include <cam/cam_periph.h>
86 #include <cam/cam_compat.h>
87 #include <cam/scsi/scsi_message.h>
88
89 #include "common/common.h"
90 #include "common/t4_msg.h"
91 #include "common/t4_regs.h" /* for PCIE_MEM_ACCESS */
92 #include "tom/t4_tom.h"
93 #include "cxgbei.h"
94
95 static void
read_pdu_limits(struct adapter * sc,uint32_t * max_tx_data_len,uint32_t * max_rx_data_len,struct ppod_region * pr)96 read_pdu_limits(struct adapter *sc, uint32_t *max_tx_data_len,
97 uint32_t *max_rx_data_len, struct ppod_region *pr)
98 {
99 uint32_t tx_len, rx_len, r, v;
100
101 rx_len = t4_read_reg(sc, A_TP_PMM_RX_PAGE_SIZE);
102 tx_len = t4_read_reg(sc, A_TP_PMM_TX_PAGE_SIZE);
103
104 r = t4_read_reg(sc, A_TP_PARA_REG2);
105 rx_len = min(rx_len, G_MAXRXDATA(r));
106 tx_len = min(tx_len, G_MAXRXDATA(r));
107
108 r = t4_read_reg(sc, A_TP_PARA_REG7);
109 v = min(G_PMMAXXFERLEN0(r), G_PMMAXXFERLEN1(r));
110 rx_len = min(rx_len, v);
111 tx_len = min(tx_len, v);
112
113 /*
114 * AHS is not supported by the kernel so we'll not account for
115 * it either in our PDU len -> data segment len conversions.
116 */
117 rx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
118 ISCSI_DATA_DIGEST_SIZE;
119 tx_len -= ISCSI_BHS_SIZE + ISCSI_HEADER_DIGEST_SIZE +
120 ISCSI_DATA_DIGEST_SIZE;
121
122 /*
123 * DDP can place only 4 pages for a single PDU. A single
124 * request might use larger pages than the smallest page size,
125 * but that cannot be guaranteed. Assume the smallest DDP
126 * page size for this limit.
127 */
128 rx_len = min(rx_len, 4 * (1U << pr->pr_page_shift[0]));
129
130 if (chip_id(sc) == CHELSIO_T5) {
131 tx_len = min(tx_len, 15360);
132
133 rx_len = rounddown2(rx_len, 512);
134 tx_len = rounddown2(tx_len, 512);
135 }
136
137 *max_tx_data_len = tx_len;
138 *max_rx_data_len = rx_len;
139 }
140
141 /*
142 * Initialize the software state of the iSCSI ULP driver.
143 *
144 * ENXIO means firmware didn't set up something that it was supposed to.
145 */
146 static int
cxgbei_init(struct adapter * sc,struct cxgbei_data * ci)147 cxgbei_init(struct adapter *sc, struct cxgbei_data *ci)
148 {
149 struct sysctl_oid *oid;
150 struct sysctl_oid_list *children;
151 struct ppod_region *pr;
152 uint32_t r;
153 int rc;
154
155 MPASS(sc->vres.iscsi.size > 0);
156 MPASS(ci != NULL);
157
158 pr = &ci->pr;
159 r = t4_read_reg(sc, A_ULP_RX_ISCSI_PSZ);
160 rc = t4_init_ppod_region(pr, &sc->vres.iscsi, r, "iSCSI page pods");
161 if (rc != 0) {
162 device_printf(sc->dev,
163 "%s: failed to initialize the iSCSI page pod region: %u.\n",
164 __func__, rc);
165 return (rc);
166 }
167
168 read_pdu_limits(sc, &ci->max_tx_data_len, &ci->max_rx_data_len, pr);
169
170 sysctl_ctx_init(&ci->ctx);
171 oid = device_get_sysctl_tree(sc->dev); /* dev.t5nex.X */
172 children = SYSCTL_CHILDREN(oid);
173
174 oid = SYSCTL_ADD_NODE(&ci->ctx, children, OID_AUTO, "iscsi",
175 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "iSCSI ULP settings");
176 children = SYSCTL_CHILDREN(oid);
177
178 ci->ddp_threshold = 2048;
179 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "ddp_threshold",
180 CTLFLAG_RW, &ci->ddp_threshold, 0, "Rx zero copy threshold");
181
182 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_rx_data_len",
183 CTLFLAG_RW, &ci->max_rx_data_len, 0,
184 "Maximum receive data segment length");
185 SYSCTL_ADD_UINT(&ci->ctx, children, OID_AUTO, "max_tx_data_len",
186 CTLFLAG_RW, &ci->max_tx_data_len, 0,
187 "Maximum transmit data segment length");
188
189 return (0);
190 }
191
192 static int
do_rx_iscsi_hdr(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)193 do_rx_iscsi_hdr(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
194 {
195 struct adapter *sc = iq->adapter;
196 struct cpl_iscsi_hdr *cpl = mtod(m, struct cpl_iscsi_hdr *);
197 u_int tid = GET_TID(cpl);
198 struct toepcb *toep = lookup_tid(sc, tid);
199 struct icl_pdu *ip;
200 struct icl_cxgbei_pdu *icp;
201 uint16_t len_ddp = be16toh(cpl->pdu_len_ddp);
202 uint16_t len = be16toh(cpl->len);
203
204 M_ASSERTPKTHDR(m);
205 MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
206
207 ip = icl_cxgbei_new_pdu(M_NOWAIT);
208 if (ip == NULL)
209 CXGBE_UNIMPLEMENTED("PDU allocation failure");
210 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
211 ip->ip_data_len = G_ISCSI_PDU_LEN(len_ddp) - len;
212 icp = ip_to_icp(ip);
213 icp->icp_seq = ntohl(cpl->seq);
214 icp->icp_flags = ICPF_RX_HDR;
215
216 /* This is the start of a new PDU. There should be no old state. */
217 MPASS(toep->ulpcb2 == NULL);
218 toep->ulpcb2 = icp;
219
220 #if 0
221 CTR5(KTR_CXGBE, "%s: tid %u, cpl->len %u, pdu_len_ddp 0x%04x, icp %p",
222 __func__, tid, len, len_ddp, icp);
223 #endif
224
225 m_freem(m);
226 return (0);
227 }
228
229 static int
do_rx_iscsi_data(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)230 do_rx_iscsi_data(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
231 {
232 struct adapter *sc = iq->adapter;
233 struct cpl_iscsi_data *cpl = mtod(m, struct cpl_iscsi_data *);
234 u_int tid = GET_TID(cpl);
235 struct toepcb *toep = lookup_tid(sc, tid);
236 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
237 struct icl_pdu *ip;
238
239 M_ASSERTPKTHDR(m);
240 MPASS(m->m_pkthdr.len == be16toh(cpl->len) + sizeof(*cpl));
241
242 if (icp == NULL) {
243 /*
244 * T6 completion enabled, start of a new pdu. Header
245 * will come in completion CPL.
246 */
247 ip = icl_cxgbei_new_pdu(M_NOWAIT);
248 if (ip == NULL)
249 CXGBE_UNIMPLEMENTED("PDU allocation failure");
250 icp = ip_to_icp(ip);
251 } else {
252 /* T5 mode, header is already received. */
253 MPASS(icp->icp_flags == ICPF_RX_HDR);
254 MPASS(icp->ip.ip_data_mbuf == NULL);
255 MPASS(icp->ip.ip_data_len == m->m_pkthdr.len - sizeof(*cpl));
256 }
257
258 /* Trim the cpl header from mbuf. */
259 m_adj(m, sizeof(*cpl));
260
261 icp->icp_flags |= ICPF_RX_FLBUF;
262 icp->ip.ip_data_mbuf = m;
263 toep->ofld_rxq->rx_iscsi_fl_pdus++;
264 toep->ofld_rxq->rx_iscsi_fl_octets += m->m_pkthdr.len;
265
266 /*
267 * For T6, save the icp for further processing in the
268 * completion handler.
269 */
270 if (icp->icp_flags == ICPF_RX_FLBUF) {
271 MPASS(toep->ulpcb2 == NULL);
272 toep->ulpcb2 = icp;
273 }
274
275 #if 0
276 CTR4(KTR_CXGBE, "%s: tid %u, cpl->len %u, icp %p", __func__, tid,
277 be16toh(cpl->len), icp);
278 #endif
279
280 return (0);
281 }
282
283 static int
mbuf_crc32c_helper(void * arg,void * data,u_int len)284 mbuf_crc32c_helper(void *arg, void *data, u_int len)
285 {
286 uint32_t *digestp = arg;
287
288 *digestp = calculate_crc32c(*digestp, data, len);
289 return (0);
290 }
291
292 static struct icl_pdu *
parse_pdu(struct socket * so,struct toepcb * toep,struct icl_cxgbei_conn * icc,struct sockbuf * sb,u_int total_len)293 parse_pdu(struct socket *so, struct toepcb *toep, struct icl_cxgbei_conn *icc,
294 struct sockbuf *sb, u_int total_len)
295 {
296 struct uio uio;
297 struct iovec iov[2];
298 struct iscsi_bhs bhs;
299 struct mbuf *m;
300 struct icl_pdu *ip;
301 u_int ahs_len, data_len, header_len, pdu_len;
302 uint32_t calc_digest, wire_digest;
303 int error;
304
305 uio.uio_segflg = UIO_SYSSPACE;
306 uio.uio_rw = UIO_READ;
307 uio.uio_td = curthread;
308
309 header_len = sizeof(struct iscsi_bhs);
310 if (icc->ic.ic_header_crc32c)
311 header_len += ISCSI_HEADER_DIGEST_SIZE;
312
313 if (total_len < header_len) {
314 ICL_WARN("truncated pre-offload PDU with len %u", total_len);
315 return (NULL);
316 }
317
318 iov[0].iov_base = &bhs;
319 iov[0].iov_len = sizeof(bhs);
320 iov[1].iov_base = &wire_digest;
321 iov[1].iov_len = sizeof(wire_digest);
322 uio.uio_iov = iov;
323 uio.uio_iovcnt = 1;
324 uio.uio_offset = 0;
325 uio.uio_resid = header_len;
326 error = soreceive(so, NULL, &uio, NULL, NULL, NULL);
327 if (error != 0) {
328 ICL_WARN("failed to read BHS from pre-offload PDU: %d", error);
329 return (NULL);
330 }
331
332 ahs_len = bhs.bhs_total_ahs_len * 4;
333 data_len = bhs.bhs_data_segment_len[0] << 16 |
334 bhs.bhs_data_segment_len[1] << 8 |
335 bhs.bhs_data_segment_len[2];
336 pdu_len = header_len + ahs_len + roundup2(data_len, 4);
337 if (icc->ic.ic_data_crc32c && data_len != 0)
338 pdu_len += ISCSI_DATA_DIGEST_SIZE;
339
340 if (total_len < pdu_len) {
341 ICL_WARN("truncated pre-offload PDU len %u vs %u", total_len,
342 pdu_len);
343 return (NULL);
344 }
345
346 if (ahs_len != 0) {
347 ICL_WARN("received pre-offload PDU with AHS");
348 return (NULL);
349 }
350
351 if (icc->ic.ic_header_crc32c) {
352 calc_digest = calculate_crc32c(0xffffffff, (caddr_t)&bhs,
353 sizeof(bhs));
354 calc_digest ^= 0xffffffff;
355 if (calc_digest != wire_digest) {
356 ICL_WARN("received pre-offload PDU 0x%02x with "
357 "invalid header digest (0x%x vs 0x%x)",
358 bhs.bhs_opcode, wire_digest, calc_digest);
359 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
360 return (NULL);
361 }
362 }
363
364 m = NULL;
365 if (data_len != 0) {
366 uio.uio_iov = NULL;
367 uio.uio_resid = roundup2(data_len, 4);
368 if (icc->ic.ic_data_crc32c)
369 uio.uio_resid += ISCSI_DATA_DIGEST_SIZE;
370
371 error = soreceive(so, NULL, &uio, &m, NULL, NULL);
372 if (error != 0) {
373 ICL_WARN("failed to read data payload from "
374 "pre-offload PDU: %d", error);
375 return (NULL);
376 }
377
378 if (icc->ic.ic_data_crc32c) {
379 m_copydata(m, roundup2(data_len, 4),
380 sizeof(wire_digest), (caddr_t)&wire_digest);
381
382 calc_digest = 0xffffffff;
383 m_apply(m, 0, roundup2(data_len, 4), mbuf_crc32c_helper,
384 &calc_digest);
385 calc_digest ^= 0xffffffff;
386 if (calc_digest != wire_digest) {
387 ICL_WARN("received pre-offload PDU 0x%02x "
388 "with invalid data digest (0x%x vs 0x%x)",
389 bhs.bhs_opcode, wire_digest, calc_digest);
390 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
391 m_freem(m);
392 return (NULL);
393 }
394 }
395 }
396
397 ip = icl_cxgbei_new_pdu(M_WAITOK);
398 icl_cxgbei_new_pdu_set_conn(ip, &icc->ic);
399 *ip->ip_bhs = bhs;
400 ip->ip_data_len = data_len;
401 ip->ip_data_mbuf = m;
402 return (ip);
403 }
404
405 void
parse_pdus(struct icl_cxgbei_conn * icc,struct sockbuf * sb)406 parse_pdus(struct icl_cxgbei_conn *icc, struct sockbuf *sb)
407 {
408 struct icl_conn *ic = &icc->ic;
409 struct socket *so = ic->ic_socket;
410 struct toepcb *toep = icc->toep;
411 struct icl_pdu *ip, *lastip;
412 u_int total_len;
413
414 SOCKBUF_LOCK_ASSERT(sb);
415
416 CTR3(KTR_CXGBE, "%s: tid %u, %u bytes in so_rcv", __func__, toep->tid,
417 sbused(sb));
418
419 lastip = NULL;
420 while (sbused(sb) != 0 && (sb->sb_state & SBS_CANTRCVMORE) == 0) {
421 total_len = sbused(sb);
422 SOCKBUF_UNLOCK(sb);
423
424 ip = parse_pdu(so, toep, icc, sb, total_len);
425
426 if (ip == NULL) {
427 ic->ic_error(ic);
428 SOCKBUF_LOCK(sb);
429 return;
430 }
431
432 if (lastip == NULL)
433 STAILQ_INSERT_HEAD(&icc->rcvd_pdus, ip, ip_next);
434 else
435 STAILQ_INSERT_AFTER(&icc->rcvd_pdus, lastip, ip,
436 ip_next);
437 lastip = ip;
438
439 SOCKBUF_LOCK(sb);
440 }
441 }
442
443 static int
do_rx_iscsi_ddp(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)444 do_rx_iscsi_ddp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
445 {
446 struct adapter *sc = iq->adapter;
447 const struct cpl_rx_data_ddp *cpl = (const void *)(rss + 1);
448 u_int tid = GET_TID(cpl);
449 struct toepcb *toep = lookup_tid(sc, tid);
450 struct inpcb *inp = toep->inp;
451 struct socket *so;
452 struct sockbuf *sb;
453 struct tcpcb *tp;
454 struct icl_cxgbei_conn *icc;
455 struct icl_conn *ic;
456 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
457 struct icl_pdu *ip;
458 u_int pdu_len, val;
459 struct epoch_tracker et;
460
461 MPASS(m == NULL);
462
463 /* Must already be assembling a PDU. */
464 MPASS(icp != NULL);
465 MPASS(icp->icp_flags & ICPF_RX_HDR); /* Data is optional. */
466 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
467
468 pdu_len = be16toh(cpl->len); /* includes everything. */
469 val = be32toh(cpl->ddpvld);
470
471 #if 0
472 CTR5(KTR_CXGBE,
473 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp_flags 0x%08x",
474 __func__, tid, pdu_len, val, icp->icp_flags);
475 #endif
476
477 icp->icp_flags |= ICPF_RX_STATUS;
478 ip = &icp->ip;
479 if (val & F_DDP_PADDING_ERR) {
480 ICL_WARN("received PDU 0x%02x with invalid padding",
481 ip->ip_bhs->bhs_opcode);
482 toep->ofld_rxq->rx_iscsi_padding_errors++;
483 }
484 if (val & F_DDP_HDRCRC_ERR) {
485 ICL_WARN("received PDU 0x%02x with invalid header digest",
486 ip->ip_bhs->bhs_opcode);
487 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
488 }
489 if (val & F_DDP_DATACRC_ERR) {
490 ICL_WARN("received PDU 0x%02x with invalid data digest",
491 ip->ip_bhs->bhs_opcode);
492 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
493 }
494 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
495 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
496 MPASS(ip->ip_data_len > 0);
497 icp->icp_flags |= ICPF_RX_DDP;
498 toep->ofld_rxq->rx_iscsi_ddp_pdus++;
499 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
500 }
501
502 INP_WLOCK(inp);
503 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
504 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
505 __func__, tid, pdu_len, inp->inp_flags);
506 INP_WUNLOCK(inp);
507 icl_cxgbei_conn_pdu_free(NULL, ip);
508 toep->ulpcb2 = NULL;
509 return (0);
510 }
511
512 /*
513 * T6+ does not report data PDUs received via DDP without F
514 * set. This can result in gaps in the TCP sequence space.
515 */
516 tp = intotcpcb(inp);
517 MPASS(chip_id(sc) >= CHELSIO_T6 || icp->icp_seq == tp->rcv_nxt);
518 tp->rcv_nxt = icp->icp_seq + pdu_len;
519 tp->t_rcvtime = ticks;
520
521 /*
522 * Don't update the window size or return credits since RX
523 * flow control is disabled.
524 */
525
526 so = inp->inp_socket;
527 sb = &so->so_rcv;
528 SOCKBUF_LOCK(sb);
529
530 icc = toep->ulpcb;
531 if (__predict_false(icc == NULL || sb->sb_state & SBS_CANTRCVMORE)) {
532 CTR5(KTR_CXGBE,
533 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
534 __func__, tid, pdu_len, icc, sb->sb_state);
535 SOCKBUF_UNLOCK(sb);
536 INP_WUNLOCK(inp);
537
538 CURVNET_SET(so->so_vnet);
539 NET_EPOCH_ENTER(et);
540 INP_WLOCK(inp);
541 tp = tcp_drop(tp, ECONNRESET);
542 if (tp)
543 INP_WUNLOCK(inp);
544 NET_EPOCH_EXIT(et);
545 CURVNET_RESTORE();
546
547 icl_cxgbei_conn_pdu_free(NULL, ip);
548 toep->ulpcb2 = NULL;
549 return (0);
550 }
551 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
552 ic = &icc->ic;
553 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
554 F_DDP_DATACRC_ERR)) != 0) {
555 SOCKBUF_UNLOCK(sb);
556 INP_WUNLOCK(inp);
557
558 icl_cxgbei_conn_pdu_free(NULL, ip);
559 toep->ulpcb2 = NULL;
560 ic->ic_error(ic);
561 return (0);
562 }
563
564 icl_cxgbei_new_pdu_set_conn(ip, ic);
565
566 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
567 if (!icc->rx_active) {
568 icc->rx_active = true;
569 wakeup(&icc->rx_active);
570 }
571 SOCKBUF_UNLOCK(sb);
572 INP_WUNLOCK(inp);
573
574 toep->ulpcb2 = NULL;
575
576 return (0);
577 }
578
579 static int
do_rx_iscsi_cmp(struct sge_iq * iq,const struct rss_header * rss,struct mbuf * m)580 do_rx_iscsi_cmp(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m)
581 {
582 struct epoch_tracker et;
583 struct adapter *sc = iq->adapter;
584 struct cpl_rx_iscsi_cmp *cpl = mtod(m, struct cpl_rx_iscsi_cmp *);
585 u_int tid = GET_TID(cpl);
586 struct toepcb *toep = lookup_tid(sc, tid);
587 struct icl_cxgbei_pdu *icp = toep->ulpcb2;
588 struct icl_pdu *ip;
589 struct cxgbei_cmp *cmp;
590 struct inpcb *inp = toep->inp;
591 #ifdef INVARIANTS
592 uint16_t len = be16toh(cpl->len);
593 u_int data_digest_len;
594 #endif
595 struct socket *so;
596 struct sockbuf *sb;
597 struct tcpcb *tp;
598 struct icl_cxgbei_conn *icc;
599 struct icl_conn *ic;
600 struct iscsi_bhs_data_out *bhsdo;
601 u_int val = be32toh(cpl->ddpvld);
602 u_int npdus, pdu_len;
603 uint32_t prev_seg_len;
604
605 M_ASSERTPKTHDR(m);
606 MPASS(m->m_pkthdr.len == len + sizeof(*cpl));
607
608 if ((val & F_DDP_PDU) == 0) {
609 MPASS(icp != NULL);
610 MPASS((icp->icp_flags & ICPF_RX_STATUS) == 0);
611 ip = &icp->ip;
612 }
613
614 if (icp == NULL) {
615 /* T6 completion enabled, start of a new PDU. */
616 ip = icl_cxgbei_new_pdu(M_NOWAIT);
617 if (ip == NULL)
618 CXGBE_UNIMPLEMENTED("PDU allocation failure");
619 icp = ip_to_icp(ip);
620 }
621 pdu_len = G_ISCSI_PDU_LEN(be16toh(cpl->pdu_len_ddp));
622
623 #if 0
624 CTR5(KTR_CXGBE,
625 "%s: tid %u, cpl->len %u, ddpvld 0x%08x, icp %p",
626 __func__, tid, pdu_len, val, icp);
627 #endif
628
629 /* Copy header */
630 m_copydata(m, sizeof(*cpl), ISCSI_BHS_SIZE, (caddr_t)ip->ip_bhs);
631 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
632 ip->ip_data_len = bhsdo->bhsdo_data_segment_len[0] << 16 |
633 bhsdo->bhsdo_data_segment_len[1] << 8 |
634 bhsdo->bhsdo_data_segment_len[2];
635 icp->icp_seq = ntohl(cpl->seq);
636 icp->icp_flags |= ICPF_RX_HDR;
637 icp->icp_flags |= ICPF_RX_STATUS;
638
639 if (val & F_DDP_PADDING_ERR) {
640 ICL_WARN("received PDU 0x%02x with invalid padding",
641 ip->ip_bhs->bhs_opcode);
642 toep->ofld_rxq->rx_iscsi_padding_errors++;
643 }
644 if (val & F_DDP_HDRCRC_ERR) {
645 ICL_WARN("received PDU 0x%02x with invalid header digest",
646 ip->ip_bhs->bhs_opcode);
647 toep->ofld_rxq->rx_iscsi_header_digest_errors++;
648 }
649 if (val & F_DDP_DATACRC_ERR) {
650 ICL_WARN("received PDU 0x%02x with invalid data digest",
651 ip->ip_bhs->bhs_opcode);
652 toep->ofld_rxq->rx_iscsi_data_digest_errors++;
653 }
654
655 INP_WLOCK(inp);
656 if (__predict_false(inp->inp_flags & INP_DROPPED)) {
657 CTR4(KTR_CXGBE, "%s: tid %u, rx (%d bytes), inp_flags 0x%x",
658 __func__, tid, pdu_len, inp->inp_flags);
659 INP_WUNLOCK(inp);
660 icl_cxgbei_conn_pdu_free(NULL, ip);
661 toep->ulpcb2 = NULL;
662 m_freem(m);
663 return (0);
664 }
665
666 tp = intotcpcb(inp);
667
668 /*
669 * If icc is NULL, the connection is being closed in
670 * icl_cxgbei_conn_close(), just drop this data.
671 */
672 icc = toep->ulpcb;
673 if (__predict_false(icc == NULL)) {
674 CTR4(KTR_CXGBE, "%s: tid %u, excess rx (%d bytes), icc %p",
675 __func__, tid, pdu_len, icc);
676
677 /*
678 * Update rcv_nxt so the sequence number of the FIN
679 * doesn't appear wrong.
680 */
681 tp->rcv_nxt = icp->icp_seq + pdu_len;
682 tp->t_rcvtime = ticks;
683 INP_WUNLOCK(inp);
684
685 icl_cxgbei_conn_pdu_free(NULL, ip);
686 toep->ulpcb2 = NULL;
687 m_freem(m);
688 return (0);
689 }
690
691 MPASS(icc->icc_signature == CXGBEI_CONN_SIGNATURE);
692 ic = &icc->ic;
693 if ((val & (F_DDP_PADDING_ERR | F_DDP_HDRCRC_ERR |
694 F_DDP_DATACRC_ERR)) != 0) {
695 INP_WUNLOCK(inp);
696
697 icl_cxgbei_conn_pdu_free(NULL, ip);
698 toep->ulpcb2 = NULL;
699 m_freem(m);
700 ic->ic_error(ic);
701 return (0);
702 }
703
704 #ifdef INVARIANTS
705 data_digest_len = (icc->ulp_submode & ULP_CRC_DATA) ?
706 ISCSI_DATA_DIGEST_SIZE : 0;
707 MPASS(roundup2(ip->ip_data_len, 4) == pdu_len - len - data_digest_len);
708 #endif
709
710 if (val & F_DDP_PDU && ip->ip_data_mbuf == NULL) {
711 MPASS((icp->icp_flags & ICPF_RX_FLBUF) == 0);
712 MPASS(ip->ip_data_len > 0);
713 icp->icp_flags |= ICPF_RX_DDP;
714 bhsdo = (struct iscsi_bhs_data_out *)ip->ip_bhs;
715
716 switch (ip->ip_bhs->bhs_opcode & ~ISCSI_BHS_OPCODE_IMMEDIATE) {
717 case ISCSI_BHS_OPCODE_SCSI_DATA_IN:
718 cmp = cxgbei_find_cmp(icc,
719 be32toh(bhsdo->bhsdo_initiator_task_tag));
720 break;
721 case ISCSI_BHS_OPCODE_SCSI_DATA_OUT:
722 cmp = cxgbei_find_cmp(icc,
723 be32toh(bhsdo->bhsdo_target_transfer_tag));
724 break;
725 default:
726 __assert_unreachable();
727 }
728 MPASS(cmp != NULL);
729
730 /*
731 * The difference between the end of the last burst
732 * and the offset of the last PDU in this burst is
733 * the additional data received via DDP.
734 */
735 prev_seg_len = be32toh(bhsdo->bhsdo_buffer_offset) -
736 cmp->next_buffer_offset;
737
738 if (prev_seg_len != 0) {
739 uint32_t orig_datasn;
740
741 /*
742 * Return a "large" PDU representing the burst
743 * of PDUs. Adjust the offset and length of
744 * this PDU to represent the entire burst.
745 */
746 ip->ip_data_len += prev_seg_len;
747 bhsdo->bhsdo_data_segment_len[2] = ip->ip_data_len;
748 bhsdo->bhsdo_data_segment_len[1] = ip->ip_data_len >> 8;
749 bhsdo->bhsdo_data_segment_len[0] = ip->ip_data_len >> 16;
750 bhsdo->bhsdo_buffer_offset =
751 htobe32(cmp->next_buffer_offset);
752
753 orig_datasn = htobe32(bhsdo->bhsdo_datasn);
754 npdus = orig_datasn - cmp->last_datasn;
755 bhsdo->bhsdo_datasn = htobe32(cmp->last_datasn + 1);
756 cmp->last_datasn = orig_datasn;
757 ip->ip_additional_pdus = npdus - 1;
758 } else {
759 MPASS(htobe32(bhsdo->bhsdo_datasn) ==
760 cmp->last_datasn + 1);
761 npdus = 1;
762 cmp->last_datasn = htobe32(bhsdo->bhsdo_datasn);
763 }
764
765 cmp->next_buffer_offset += ip->ip_data_len;
766 toep->ofld_rxq->rx_iscsi_ddp_pdus += npdus;
767 toep->ofld_rxq->rx_iscsi_ddp_octets += ip->ip_data_len;
768 } else {
769 MPASS(icp->icp_flags & (ICPF_RX_FLBUF));
770 MPASS(ip->ip_data_len == ip->ip_data_mbuf->m_pkthdr.len);
771 }
772
773 tp->rcv_nxt = icp->icp_seq + pdu_len;
774 tp->t_rcvtime = ticks;
775
776 /*
777 * Don't update the window size or return credits since RX
778 * flow control is disabled.
779 */
780
781 so = inp->inp_socket;
782 sb = &so->so_rcv;
783 SOCKBUF_LOCK(sb);
784 if (__predict_false(sb->sb_state & SBS_CANTRCVMORE)) {
785 CTR5(KTR_CXGBE,
786 "%s: tid %u, excess rx (%d bytes), icc %p, sb_state 0x%x",
787 __func__, tid, pdu_len, icc, sb->sb_state);
788 SOCKBUF_UNLOCK(sb);
789 INP_WUNLOCK(inp);
790
791 CURVNET_SET(so->so_vnet);
792 NET_EPOCH_ENTER(et);
793 INP_WLOCK(inp);
794 tp = tcp_drop(tp, ECONNRESET);
795 if (tp != NULL)
796 INP_WUNLOCK(inp);
797 NET_EPOCH_EXIT(et);
798 CURVNET_RESTORE();
799
800 icl_cxgbei_conn_pdu_free(NULL, ip);
801 toep->ulpcb2 = NULL;
802 m_freem(m);
803 return (0);
804 }
805
806 icl_cxgbei_new_pdu_set_conn(ip, ic);
807
808 /* Enqueue the PDU to the received pdus queue. */
809 STAILQ_INSERT_TAIL(&icc->rcvd_pdus, ip, ip_next);
810 if (!icc->rx_active) {
811 icc->rx_active = true;
812 wakeup(&icc->rx_active);
813 }
814 SOCKBUF_UNLOCK(sb);
815 INP_WUNLOCK(inp);
816
817 toep->ulpcb2 = NULL;
818 m_freem(m);
819
820 return (0);
821 }
822
823 static int
cxgbei_activate(struct adapter * sc)824 cxgbei_activate(struct adapter *sc)
825 {
826 struct cxgbei_data *ci;
827 int rc;
828
829 ASSERT_SYNCHRONIZED_OP(sc);
830
831 if (uld_active(sc, ULD_ISCSI)) {
832 KASSERT(0, ("%s: iSCSI offload already enabled on adapter %p",
833 __func__, sc));
834 return (0);
835 }
836
837 if (sc->iscsicaps == 0 || sc->vres.iscsi.size == 0) {
838 device_printf(sc->dev,
839 "not iSCSI offload capable, or capability disabled.\n");
840 return (ENOSYS);
841 }
842
843 /* per-adapter softc for iSCSI */
844 ci = malloc(sizeof(*ci), M_CXGBE, M_ZERO | M_WAITOK);
845 rc = cxgbei_init(sc, ci);
846 if (rc != 0) {
847 free(ci, M_CXGBE);
848 return (rc);
849 }
850
851 sc->iscsi_ulp_softc = ci;
852
853 return (0);
854 }
855
856 static int
cxgbei_deactivate(struct adapter * sc)857 cxgbei_deactivate(struct adapter *sc)
858 {
859 struct cxgbei_data *ci = sc->iscsi_ulp_softc;
860
861 ASSERT_SYNCHRONIZED_OP(sc);
862
863 if (ci != NULL) {
864 sysctl_ctx_free(&ci->ctx);
865 t4_free_ppod_region(&ci->pr);
866 free(ci, M_CXGBE);
867 sc->iscsi_ulp_softc = NULL;
868 }
869
870 return (0);
871 }
872
873 static void
cxgbei_activate_all(struct adapter * sc,void * arg __unused)874 cxgbei_activate_all(struct adapter *sc, void *arg __unused)
875 {
876
877 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isact") != 0)
878 return;
879
880 /* Activate iSCSI if any port on this adapter has IFCAP_TOE enabled. */
881 if (sc->offload_map && !uld_active(sc, ULD_ISCSI))
882 (void) t4_activate_uld(sc, ULD_ISCSI);
883
884 end_synchronized_op(sc, 0);
885 }
886
887 static void
cxgbei_deactivate_all(struct adapter * sc,void * arg __unused)888 cxgbei_deactivate_all(struct adapter *sc, void *arg __unused)
889 {
890
891 if (begin_synchronized_op(sc, NULL, SLEEP_OK | INTR_OK, "t4isdea") != 0)
892 return;
893
894 if (uld_active(sc, ULD_ISCSI))
895 (void) t4_deactivate_uld(sc, ULD_ISCSI);
896
897 end_synchronized_op(sc, 0);
898 }
899
900 static struct uld_info cxgbei_uld_info = {
901 .uld_activate = cxgbei_activate,
902 .uld_deactivate = cxgbei_deactivate,
903 };
904
905 static int
cxgbei_mod_load(void)906 cxgbei_mod_load(void)
907 {
908 int rc;
909
910 t4_register_cpl_handler(CPL_ISCSI_HDR, do_rx_iscsi_hdr);
911 t4_register_cpl_handler(CPL_ISCSI_DATA, do_rx_iscsi_data);
912 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, do_rx_iscsi_ddp);
913 t4_register_cpl_handler(CPL_RX_ISCSI_CMP, do_rx_iscsi_cmp);
914
915 rc = t4_register_uld(&cxgbei_uld_info, ULD_ISCSI);
916 if (rc != 0)
917 return (rc);
918
919 t4_iterate(cxgbei_activate_all, NULL);
920
921 return (rc);
922 }
923
924 static int
cxgbei_mod_unload(void)925 cxgbei_mod_unload(void)
926 {
927
928 t4_iterate(cxgbei_deactivate_all, NULL);
929
930 if (t4_unregister_uld(&cxgbei_uld_info, ULD_ISCSI) == EBUSY)
931 return (EBUSY);
932
933 t4_register_cpl_handler(CPL_ISCSI_HDR, NULL);
934 t4_register_cpl_handler(CPL_ISCSI_DATA, NULL);
935 t4_register_cpl_handler(CPL_RX_ISCSI_DDP, NULL);
936 t4_register_cpl_handler(CPL_RX_ISCSI_CMP, NULL);
937
938 return (0);
939 }
940 #endif
941
942 static int
cxgbei_modevent(module_t mod,int cmd,void * arg)943 cxgbei_modevent(module_t mod, int cmd, void *arg)
944 {
945 int rc = 0;
946
947 #ifdef TCP_OFFLOAD
948 switch (cmd) {
949 case MOD_LOAD:
950 rc = cxgbei_mod_load();
951 if (rc == 0)
952 rc = icl_cxgbei_mod_load();
953 break;
954
955 case MOD_UNLOAD:
956 rc = icl_cxgbei_mod_unload();
957 if (rc == 0)
958 rc = cxgbei_mod_unload();
959 break;
960
961 default:
962 rc = EINVAL;
963 }
964 #else
965 printf("cxgbei: compiled without TCP_OFFLOAD support.\n");
966 rc = EOPNOTSUPP;
967 #endif
968
969 return (rc);
970 }
971
972 static moduledata_t cxgbei_mod = {
973 "cxgbei",
974 cxgbei_modevent,
975 NULL,
976 };
977
978 MODULE_VERSION(cxgbei, 1);
979 DECLARE_MODULE(cxgbei, cxgbei_mod, SI_SUB_EXEC, SI_ORDER_ANY);
980 MODULE_DEPEND(cxgbei, t4_tom, 1, 1, 1);
981 MODULE_DEPEND(cxgbei, cxgbe, 1, 1, 1);
982 MODULE_DEPEND(cxgbei, icl, 1, 1, 1);
983