1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/capsicum.h> 10 #include <sys/condvar.h> 11 #include <sys/file.h> 12 #include <sys/gsb_crc32.h> 13 #include <sys/kernel.h> 14 #include <sys/kthread.h> 15 #include <sys/limits.h> 16 #include <sys/lock.h> 17 #include <sys/malloc.h> 18 #include <sys/mbuf.h> 19 #include <sys/module.h> 20 #include <sys/mutex.h> 21 #include <sys/nv.h> 22 #include <sys/protosw.h> 23 #include <sys/refcount.h> 24 #include <sys/socket.h> 25 #include <sys/socketvar.h> 26 #include <sys/sysctl.h> 27 #include <sys/uio.h> 28 #include <netinet/in.h> 29 #include <dev/nvme/nvme.h> 30 #include <dev/nvmf/nvmf.h> 31 #include <dev/nvmf/nvmf_proto.h> 32 #include <dev/nvmf/nvmf_tcp.h> 33 #include <dev/nvmf/nvmf_transport.h> 34 #include <dev/nvmf/nvmf_transport_internal.h> 35 36 struct nvmf_tcp_capsule; 37 struct nvmf_tcp_qpair; 38 39 struct nvmf_tcp_command_buffer { 40 struct nvmf_tcp_qpair *qp; 41 42 struct nvmf_io_request io; 43 size_t data_len; 44 size_t data_xfered; 45 uint32_t data_offset; 46 47 u_int refs; 48 int error; 49 50 uint16_t cid; 51 uint16_t ttag; 52 53 TAILQ_ENTRY(nvmf_tcp_command_buffer) link; 54 55 /* Controller only */ 56 struct nvmf_tcp_capsule *tc; 57 }; 58 59 struct nvmf_tcp_command_buffer_list { 60 TAILQ_HEAD(, nvmf_tcp_command_buffer) head; 61 struct mtx lock; 62 }; 63 64 struct nvmf_tcp_qpair { 65 struct nvmf_qpair qp; 66 67 struct socket *so; 68 69 volatile u_int refs; /* Every allocated capsule holds a reference */ 70 uint8_t txpda; 71 uint8_t rxpda; 72 bool header_digests; 73 bool data_digests; 74 uint32_t maxr2t; 75 uint32_t maxh2cdata; /* Controller only */ 76 uint32_t max_tx_data; 77 uint32_t max_icd; /* Host only */ 78 uint16_t next_ttag; /* Controller only */ 79 u_int num_ttags; /* Controller only */ 80 u_int active_ttags; /* Controller only */ 81 bool send_success; /* Controller only */ 82 83 /* Receive state. */ 84 struct thread *rx_thread; 85 struct cv rx_cv; 86 bool rx_shutdown; 87 88 /* Transmit state. */ 89 struct thread *tx_thread; 90 struct cv tx_cv; 91 bool tx_shutdown; 92 struct mbufq tx_pdus; 93 STAILQ_HEAD(, nvmf_tcp_capsule) tx_capsules; 94 95 struct nvmf_tcp_command_buffer_list tx_buffers; 96 struct nvmf_tcp_command_buffer_list rx_buffers; 97 98 /* 99 * For the controller, an RX command buffer can be in one of 100 * two locations, all protected by the rx_buffers.lock. If a 101 * receive request is waiting for either an R2T slot for its 102 * command (due to exceeding MAXR2T), or a transfer tag it is 103 * placed on the rx_buffers list. When a request is allocated 104 * an active transfer tag, it moves to the open_ttags[] array 105 * (indexed by the tag) until it completes. 106 */ 107 struct nvmf_tcp_command_buffer **open_ttags; /* Controller only */ 108 }; 109 110 struct nvmf_tcp_rxpdu { 111 struct mbuf *m; 112 const struct nvme_tcp_common_pdu_hdr *hdr; 113 uint32_t data_len; 114 bool data_digest_mismatch; 115 }; 116 117 struct nvmf_tcp_capsule { 118 struct nvmf_capsule nc; 119 120 volatile u_int refs; 121 122 struct nvmf_tcp_rxpdu rx_pdu; 123 124 uint32_t active_r2ts; /* Controller only */ 125 #ifdef INVARIANTS 126 uint32_t tx_data_offset; /* Controller only */ 127 u_int pending_r2ts; /* Controller only */ 128 #endif 129 130 STAILQ_ENTRY(nvmf_tcp_capsule) link; 131 }; 132 133 #define TCAP(nc) ((struct nvmf_tcp_capsule *)(nc)) 134 #define TQP(qp) ((struct nvmf_tcp_qpair *)(qp)) 135 136 static void tcp_release_capsule(struct nvmf_tcp_capsule *tc); 137 static void tcp_free_qpair(struct nvmf_qpair *nq); 138 139 SYSCTL_NODE(_kern_nvmf, OID_AUTO, tcp, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 140 "TCP transport"); 141 static u_int tcp_max_transmit_data = 256 * 1024; 142 SYSCTL_UINT(_kern_nvmf_tcp, OID_AUTO, max_transmit_data, CTLFLAG_RWTUN, 143 &tcp_max_transmit_data, 0, 144 "Maximum size of data payload in a transmitted PDU"); 145 146 static MALLOC_DEFINE(M_NVMF_TCP, "nvmf_tcp", "NVMe over TCP"); 147 148 static int 149 mbuf_crc32c_helper(void *arg, void *data, u_int len) 150 { 151 uint32_t *digestp = arg; 152 153 *digestp = calculate_crc32c(*digestp, data, len); 154 return (0); 155 } 156 157 static uint32_t 158 mbuf_crc32c(struct mbuf *m, u_int offset, u_int len) 159 { 160 uint32_t digest = 0xffffffff; 161 162 m_apply(m, offset, len, mbuf_crc32c_helper, &digest); 163 digest = digest ^ 0xffffffff; 164 165 return (digest); 166 } 167 168 static uint32_t 169 compute_digest(const void *buf, size_t len) 170 { 171 return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff); 172 } 173 174 static struct nvmf_tcp_command_buffer * 175 tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, 176 const struct nvmf_io_request *io, uint32_t data_offset, size_t data_len, 177 uint16_t cid) 178 { 179 struct nvmf_tcp_command_buffer *cb; 180 181 cb = malloc(sizeof(*cb), M_NVMF_TCP, M_WAITOK); 182 cb->qp = qp; 183 cb->io = *io; 184 cb->data_offset = data_offset; 185 cb->data_len = data_len; 186 cb->data_xfered = 0; 187 refcount_init(&cb->refs, 1); 188 cb->error = 0; 189 cb->cid = cid; 190 cb->ttag = 0; 191 cb->tc = NULL; 192 193 return (cb); 194 } 195 196 static void 197 tcp_hold_command_buffer(struct nvmf_tcp_command_buffer *cb) 198 { 199 refcount_acquire(&cb->refs); 200 } 201 202 static void 203 tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb) 204 { 205 nvmf_complete_io_request(&cb->io, cb->data_xfered, cb->error); 206 if (cb->tc != NULL) 207 tcp_release_capsule(cb->tc); 208 free(cb, M_NVMF_TCP); 209 } 210 211 static void 212 tcp_release_command_buffer(struct nvmf_tcp_command_buffer *cb) 213 { 214 if (refcount_release(&cb->refs)) 215 tcp_free_command_buffer(cb); 216 } 217 218 static void 219 tcp_add_command_buffer(struct nvmf_tcp_command_buffer_list *list, 220 struct nvmf_tcp_command_buffer *cb) 221 { 222 mtx_assert(&list->lock, MA_OWNED); 223 TAILQ_INSERT_HEAD(&list->head, cb, link); 224 } 225 226 static struct nvmf_tcp_command_buffer * 227 tcp_find_command_buffer(struct nvmf_tcp_command_buffer_list *list, 228 uint16_t cid, uint16_t ttag) 229 { 230 struct nvmf_tcp_command_buffer *cb; 231 232 mtx_assert(&list->lock, MA_OWNED); 233 TAILQ_FOREACH(cb, &list->head, link) { 234 if (cb->cid == cid && cb->ttag == ttag) 235 return (cb); 236 } 237 return (NULL); 238 } 239 240 static void 241 tcp_remove_command_buffer(struct nvmf_tcp_command_buffer_list *list, 242 struct nvmf_tcp_command_buffer *cb) 243 { 244 mtx_assert(&list->lock, MA_OWNED); 245 TAILQ_REMOVE(&list->head, cb, link); 246 } 247 248 static void 249 tcp_purge_command_buffer(struct nvmf_tcp_command_buffer_list *list, 250 uint16_t cid, uint16_t ttag) 251 { 252 struct nvmf_tcp_command_buffer *cb; 253 254 mtx_lock(&list->lock); 255 cb = tcp_find_command_buffer(list, cid, ttag); 256 if (cb != NULL) { 257 tcp_remove_command_buffer(list, cb); 258 mtx_unlock(&list->lock); 259 tcp_release_command_buffer(cb); 260 } else 261 mtx_unlock(&list->lock); 262 } 263 264 static void 265 nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, struct mbuf *m) 266 { 267 struct socket *so = qp->so; 268 269 SOCKBUF_LOCK(&so->so_snd); 270 mbufq_enqueue(&qp->tx_pdus, m); 271 /* XXX: Do we need to handle sb_hiwat being wrong? */ 272 if (sowriteable(so)) 273 cv_signal(&qp->tx_cv); 274 SOCKBUF_UNLOCK(&so->so_snd); 275 } 276 277 static void 278 nvmf_tcp_report_error(struct nvmf_tcp_qpair *qp, uint16_t fes, uint32_t fei, 279 struct mbuf *rx_pdu, u_int hlen) 280 { 281 struct nvme_tcp_term_req_hdr *hdr; 282 struct mbuf *m; 283 284 if (hlen != 0) { 285 hlen = min(hlen, NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE); 286 hlen = min(hlen, m_length(rx_pdu, NULL)); 287 } 288 289 m = m_get2(sizeof(*hdr) + hlen, M_WAITOK, MT_DATA, 0); 290 m->m_len = sizeof(*hdr) + hlen; 291 hdr = mtod(m, void *); 292 memset(hdr, 0, sizeof(*hdr)); 293 hdr->common.pdu_type = qp->qp.nq_controller ? 294 NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 295 hdr->common.hlen = sizeof(*hdr); 296 hdr->common.plen = sizeof(*hdr) + hlen; 297 hdr->fes = htole16(fes); 298 le32enc(hdr->fei, fei); 299 if (hlen != 0) 300 m_copydata(rx_pdu, 0, hlen, (caddr_t)(hdr + 1)); 301 302 nvmf_tcp_write_pdu(qp, m); 303 } 304 305 static int 306 nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 307 { 308 const struct nvme_tcp_common_pdu_hdr *ch; 309 struct mbuf *m = pdu->m; 310 uint32_t data_len, fei, plen; 311 uint32_t digest, rx_digest; 312 u_int hlen; 313 int error; 314 uint16_t fes; 315 316 /* Determine how large of a PDU header to return for errors. */ 317 ch = pdu->hdr; 318 hlen = ch->hlen; 319 plen = le32toh(ch->plen); 320 if (hlen < sizeof(*ch) || hlen > plen) 321 hlen = sizeof(*ch); 322 323 error = nvmf_tcp_validate_pdu_header(ch, qp->qp.nq_controller, 324 qp->header_digests, qp->data_digests, qp->rxpda, &data_len, &fes, 325 &fei); 326 if (error != 0) { 327 if (error != ECONNRESET) 328 nvmf_tcp_report_error(qp, fes, fei, m, hlen); 329 return (error); 330 } 331 332 /* Check header digest if present. */ 333 if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) { 334 digest = mbuf_crc32c(m, 0, ch->hlen); 335 m_copydata(m, ch->hlen, sizeof(rx_digest), (caddr_t)&rx_digest); 336 if (digest != rx_digest) { 337 printf("NVMe/TCP: Header digest mismatch\n"); 338 nvmf_tcp_report_error(qp, 339 NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, m, 340 hlen); 341 return (EBADMSG); 342 } 343 } 344 345 /* Check data digest if present. */ 346 pdu->data_digest_mismatch = false; 347 if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) { 348 digest = mbuf_crc32c(m, ch->pdo, data_len); 349 m_copydata(m, plen - sizeof(rx_digest), sizeof(rx_digest), 350 (caddr_t)&rx_digest); 351 if (digest != rx_digest) { 352 printf("NVMe/TCP: Data digest mismatch\n"); 353 pdu->data_digest_mismatch = true; 354 } 355 } 356 357 pdu->data_len = data_len; 358 return (0); 359 } 360 361 static void 362 nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu) 363 { 364 m_freem(pdu->m); 365 pdu->m = NULL; 366 pdu->hdr = NULL; 367 } 368 369 static int 370 nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu) 371 { 372 const struct nvme_tcp_term_req_hdr *hdr; 373 374 hdr = (const void *)pdu->hdr; 375 376 printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n", 377 le16toh(hdr->fes), le32dec(hdr->fei)); 378 nvmf_tcp_free_pdu(pdu); 379 return (ECONNRESET); 380 } 381 382 static int 383 nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp, 384 struct nvmf_tcp_rxpdu *pdu) 385 { 386 const struct nvme_tcp_cmd *cmd; 387 struct nvmf_capsule *nc; 388 struct nvmf_tcp_capsule *tc; 389 390 cmd = (const void *)pdu->hdr; 391 392 nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe, M_WAITOK); 393 394 tc = TCAP(nc); 395 tc->rx_pdu = *pdu; 396 397 nvmf_capsule_received(&qp->qp, nc); 398 return (0); 399 } 400 401 static int 402 nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp, 403 struct nvmf_tcp_rxpdu *pdu) 404 { 405 const struct nvme_tcp_rsp *rsp; 406 struct nvmf_capsule *nc; 407 struct nvmf_tcp_capsule *tc; 408 409 rsp = (const void *)pdu->hdr; 410 411 nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe, M_WAITOK); 412 413 nc->nc_sqhd_valid = true; 414 tc = TCAP(nc); 415 tc->rx_pdu = *pdu; 416 417 /* 418 * Once the CQE has been received, no further transfers to the 419 * command buffer for the associated CID can occur. 420 */ 421 tcp_purge_command_buffer(&qp->rx_buffers, rsp->rccqe.cid, 0); 422 tcp_purge_command_buffer(&qp->tx_buffers, rsp->rccqe.cid, 0); 423 424 nvmf_capsule_received(&qp->qp, nc); 425 return (0); 426 } 427 428 /* 429 * Construct a PDU that contains an optional data payload. This 430 * includes dealing with digests and the length fields in the common 431 * header. 432 */ 433 static struct mbuf * 434 nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen, 435 struct mbuf *data, uint32_t data_len) 436 { 437 struct nvme_tcp_common_pdu_hdr *ch; 438 struct mbuf *top; 439 uint32_t digest, pad, pdo, plen, mlen; 440 441 plen = hlen; 442 if (qp->header_digests) 443 plen += sizeof(digest); 444 if (data_len != 0) { 445 KASSERT(m_length(data, NULL) == data_len, ("length mismatch")); 446 pdo = roundup(plen, qp->txpda); 447 pad = pdo - plen; 448 plen = pdo + data_len; 449 if (qp->data_digests) 450 plen += sizeof(digest); 451 mlen = pdo; 452 } else { 453 KASSERT(data == NULL, ("payload mbuf with zero length")); 454 pdo = 0; 455 pad = 0; 456 mlen = plen; 457 } 458 459 top = m_get2(mlen, M_WAITOK, MT_DATA, 0); 460 top->m_len = mlen; 461 ch = mtod(top, void *); 462 memcpy(ch, hdr, hlen); 463 ch->hlen = hlen; 464 if (qp->header_digests) 465 ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF; 466 if (qp->data_digests && data_len != 0) 467 ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF; 468 ch->pdo = pdo; 469 ch->plen = htole32(plen); 470 471 /* HDGST */ 472 if (qp->header_digests) { 473 digest = compute_digest(ch, hlen); 474 memcpy((char *)ch + hlen, &digest, sizeof(digest)); 475 } 476 477 if (pad != 0) { 478 /* PAD */ 479 memset((char *)ch + pdo - pad, 0, pad); 480 } 481 482 if (data_len != 0) { 483 /* DATA */ 484 top->m_next = data; 485 486 /* DDGST */ 487 if (qp->data_digests) { 488 digest = mbuf_crc32c(data, 0, data_len); 489 490 /* XXX: Can't use m_append as it uses M_NOWAIT. */ 491 while (data->m_next != NULL) 492 data = data->m_next; 493 494 data->m_next = m_get(M_WAITOK, MT_DATA); 495 data->m_next->m_len = sizeof(digest); 496 memcpy(mtod(data->m_next, void *), &digest, 497 sizeof(digest)); 498 } 499 } 500 501 return (top); 502 } 503 504 /* Find the next command buffer eligible to schedule for R2T. */ 505 static struct nvmf_tcp_command_buffer * 506 nvmf_tcp_next_r2t(struct nvmf_tcp_qpair *qp) 507 { 508 struct nvmf_tcp_command_buffer *cb; 509 510 mtx_assert(&qp->rx_buffers.lock, MA_OWNED); 511 MPASS(qp->active_ttags < qp->num_ttags); 512 513 TAILQ_FOREACH(cb, &qp->rx_buffers.head, link) { 514 /* NB: maxr2t is 0's based. */ 515 if (cb->tc->active_r2ts > qp->maxr2t) 516 continue; 517 #ifdef INVARIANTS 518 cb->tc->pending_r2ts--; 519 #endif 520 TAILQ_REMOVE(&qp->rx_buffers.head, cb, link); 521 return (cb); 522 } 523 return (NULL); 524 } 525 526 /* Allocate the next free transfer tag and assign it to cb. */ 527 static void 528 nvmf_tcp_allocate_ttag(struct nvmf_tcp_qpair *qp, 529 struct nvmf_tcp_command_buffer *cb) 530 { 531 uint16_t ttag; 532 533 mtx_assert(&qp->rx_buffers.lock, MA_OWNED); 534 535 ttag = qp->next_ttag; 536 for (;;) { 537 if (qp->open_ttags[ttag] == NULL) 538 break; 539 if (ttag == qp->num_ttags - 1) 540 ttag = 0; 541 else 542 ttag++; 543 MPASS(ttag != qp->next_ttag); 544 } 545 if (ttag == qp->num_ttags - 1) 546 qp->next_ttag = 0; 547 else 548 qp->next_ttag = ttag + 1; 549 550 cb->tc->active_r2ts++; 551 qp->active_ttags++; 552 qp->open_ttags[ttag] = cb; 553 554 /* 555 * Don't bother byte-swapping ttag as it is just a cookie 556 * value returned by the other end as-is. 557 */ 558 cb->ttag = ttag; 559 } 560 561 /* NB: cid and ttag are both little-endian already. */ 562 static void 563 tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 564 uint32_t data_offset, uint32_t data_len) 565 { 566 struct nvme_tcp_r2t_hdr r2t; 567 struct mbuf *m; 568 569 memset(&r2t, 0, sizeof(r2t)); 570 r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T; 571 r2t.cccid = cid; 572 r2t.ttag = ttag; 573 r2t.r2to = htole32(data_offset); 574 r2t.r2tl = htole32(data_len); 575 576 m = nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0); 577 nvmf_tcp_write_pdu(qp, m); 578 } 579 580 /* 581 * Release a transfer tag and schedule another R2T. 582 * 583 * NB: This drops the rx_buffers.lock mutex. 584 */ 585 static void 586 nvmf_tcp_send_next_r2t(struct nvmf_tcp_qpair *qp, 587 struct nvmf_tcp_command_buffer *cb) 588 { 589 struct nvmf_tcp_command_buffer *ncb; 590 591 mtx_assert(&qp->rx_buffers.lock, MA_OWNED); 592 MPASS(qp->open_ttags[cb->ttag] == cb); 593 594 /* Release this transfer tag. */ 595 qp->open_ttags[cb->ttag] = NULL; 596 qp->active_ttags--; 597 cb->tc->active_r2ts--; 598 599 /* Schedule another R2T. */ 600 ncb = nvmf_tcp_next_r2t(qp); 601 if (ncb != NULL) { 602 nvmf_tcp_allocate_ttag(qp, ncb); 603 mtx_unlock(&qp->rx_buffers.lock); 604 tcp_send_r2t(qp, ncb->cid, ncb->ttag, ncb->data_offset, 605 ncb->data_len); 606 } else 607 mtx_unlock(&qp->rx_buffers.lock); 608 } 609 610 /* 611 * Copy len bytes starting at offset skip from an mbuf chain into an 612 * I/O buffer at destination offset io_offset. 613 */ 614 static void 615 mbuf_copyto_io(struct mbuf *m, u_int skip, u_int len, 616 struct nvmf_io_request *io, u_int io_offset) 617 { 618 u_int todo; 619 620 while (m->m_len <= skip) { 621 skip -= m->m_len; 622 m = m->m_next; 623 } 624 while (len != 0) { 625 MPASS((m->m_flags & M_EXTPG) == 0); 626 627 todo = min(m->m_len - skip, len); 628 memdesc_copyback(&io->io_mem, io_offset, todo, mtodo(m, skip)); 629 skip = 0; 630 io_offset += todo; 631 len -= todo; 632 m = m->m_next; 633 } 634 } 635 636 static int 637 nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 638 { 639 const struct nvme_tcp_h2c_data_hdr *h2c; 640 struct nvmf_tcp_command_buffer *cb; 641 uint32_t data_len, data_offset; 642 uint16_t ttag; 643 644 h2c = (const void *)pdu->hdr; 645 if (le32toh(h2c->datal) > qp->maxh2cdata) { 646 nvmf_tcp_report_error(qp, 647 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0, 648 pdu->m, pdu->hdr->hlen); 649 nvmf_tcp_free_pdu(pdu); 650 return (EBADMSG); 651 } 652 653 /* 654 * NB: Don't bother byte-swapping ttag as we don't byte-swap 655 * it when sending. 656 */ 657 ttag = h2c->ttag; 658 if (ttag >= qp->num_ttags) { 659 nvmf_tcp_report_error(qp, 660 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 661 offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->m, 662 pdu->hdr->hlen); 663 nvmf_tcp_free_pdu(pdu); 664 return (EBADMSG); 665 } 666 667 mtx_lock(&qp->rx_buffers.lock); 668 cb = qp->open_ttags[ttag]; 669 if (cb == NULL) { 670 mtx_unlock(&qp->rx_buffers.lock); 671 nvmf_tcp_report_error(qp, 672 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 673 offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->m, 674 pdu->hdr->hlen); 675 nvmf_tcp_free_pdu(pdu); 676 return (EBADMSG); 677 } 678 MPASS(cb->ttag == ttag); 679 680 /* For a data digest mismatch, fail the I/O request. */ 681 if (pdu->data_digest_mismatch) { 682 nvmf_tcp_send_next_r2t(qp, cb); 683 cb->error = EINTEGRITY; 684 tcp_release_command_buffer(cb); 685 nvmf_tcp_free_pdu(pdu); 686 return (0); 687 } 688 689 data_len = le32toh(h2c->datal); 690 if (data_len != pdu->data_len) { 691 mtx_unlock(&qp->rx_buffers.lock); 692 nvmf_tcp_report_error(qp, 693 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 694 offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->m, 695 pdu->hdr->hlen); 696 nvmf_tcp_free_pdu(pdu); 697 return (EBADMSG); 698 } 699 700 data_offset = le32toh(h2c->datao); 701 if (data_offset < cb->data_offset || 702 data_offset + data_len > cb->data_offset + cb->data_len) { 703 mtx_unlock(&qp->rx_buffers.lock); 704 nvmf_tcp_report_error(qp, 705 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, pdu->m, 706 pdu->hdr->hlen); 707 nvmf_tcp_free_pdu(pdu); 708 return (EBADMSG); 709 } 710 711 if (data_offset != cb->data_offset + cb->data_xfered) { 712 mtx_unlock(&qp->rx_buffers.lock); 713 nvmf_tcp_report_error(qp, 714 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m, 715 pdu->hdr->hlen); 716 nvmf_tcp_free_pdu(pdu); 717 return (EBADMSG); 718 } 719 720 if ((cb->data_xfered + data_len == cb->data_len) != 721 ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) { 722 mtx_unlock(&qp->rx_buffers.lock); 723 nvmf_tcp_report_error(qp, 724 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m, 725 pdu->hdr->hlen); 726 nvmf_tcp_free_pdu(pdu); 727 return (EBADMSG); 728 } 729 730 cb->data_xfered += data_len; 731 data_offset -= cb->data_offset; 732 if (cb->data_xfered == cb->data_len) { 733 nvmf_tcp_send_next_r2t(qp, cb); 734 } else { 735 tcp_hold_command_buffer(cb); 736 mtx_unlock(&qp->rx_buffers.lock); 737 } 738 739 mbuf_copyto_io(pdu->m, pdu->hdr->pdo, data_len, &cb->io, data_offset); 740 741 tcp_release_command_buffer(cb); 742 nvmf_tcp_free_pdu(pdu); 743 return (0); 744 } 745 746 static int 747 nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 748 { 749 const struct nvme_tcp_c2h_data_hdr *c2h; 750 struct nvmf_tcp_command_buffer *cb; 751 uint32_t data_len, data_offset; 752 753 c2h = (const void *)pdu->hdr; 754 755 mtx_lock(&qp->rx_buffers.lock); 756 cb = tcp_find_command_buffer(&qp->rx_buffers, c2h->cccid, 0); 757 if (cb == NULL) { 758 mtx_unlock(&qp->rx_buffers.lock); 759 /* 760 * XXX: Could be PDU sequence error if cccid is for a 761 * command that doesn't use a command buffer. 762 */ 763 nvmf_tcp_report_error(qp, 764 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 765 offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->m, 766 pdu->hdr->hlen); 767 nvmf_tcp_free_pdu(pdu); 768 return (EBADMSG); 769 } 770 771 /* For a data digest mismatch, fail the I/O request. */ 772 if (pdu->data_digest_mismatch) { 773 cb->error = EINTEGRITY; 774 tcp_remove_command_buffer(&qp->rx_buffers, cb); 775 mtx_unlock(&qp->rx_buffers.lock); 776 tcp_release_command_buffer(cb); 777 nvmf_tcp_free_pdu(pdu); 778 return (0); 779 } 780 781 data_len = le32toh(c2h->datal); 782 if (data_len != pdu->data_len) { 783 mtx_unlock(&qp->rx_buffers.lock); 784 nvmf_tcp_report_error(qp, 785 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 786 offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->m, 787 pdu->hdr->hlen); 788 nvmf_tcp_free_pdu(pdu); 789 return (EBADMSG); 790 } 791 792 data_offset = le32toh(c2h->datao); 793 if (data_offset < cb->data_offset || 794 data_offset + data_len > cb->data_offset + cb->data_len) { 795 mtx_unlock(&qp->rx_buffers.lock); 796 nvmf_tcp_report_error(qp, 797 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 798 pdu->m, pdu->hdr->hlen); 799 nvmf_tcp_free_pdu(pdu); 800 return (EBADMSG); 801 } 802 803 if (data_offset != cb->data_offset + cb->data_xfered) { 804 mtx_unlock(&qp->rx_buffers.lock); 805 nvmf_tcp_report_error(qp, 806 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m, 807 pdu->hdr->hlen); 808 nvmf_tcp_free_pdu(pdu); 809 return (EBADMSG); 810 } 811 812 if ((cb->data_xfered + data_len == cb->data_len) != 813 ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) { 814 mtx_unlock(&qp->rx_buffers.lock); 815 nvmf_tcp_report_error(qp, 816 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m, 817 pdu->hdr->hlen); 818 nvmf_tcp_free_pdu(pdu); 819 return (EBADMSG); 820 } 821 822 cb->data_xfered += data_len; 823 data_offset -= cb->data_offset; 824 if (cb->data_xfered == cb->data_len) 825 tcp_remove_command_buffer(&qp->rx_buffers, cb); 826 else 827 tcp_hold_command_buffer(cb); 828 mtx_unlock(&qp->rx_buffers.lock); 829 830 mbuf_copyto_io(pdu->m, pdu->hdr->pdo, data_len, &cb->io, data_offset); 831 832 tcp_release_command_buffer(cb); 833 834 if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) { 835 struct nvme_completion cqe; 836 struct nvmf_capsule *nc; 837 838 memset(&cqe, 0, sizeof(cqe)); 839 cqe.cid = c2h->cccid; 840 841 nc = nvmf_allocate_response(&qp->qp, &cqe, M_WAITOK); 842 nc->nc_sqhd_valid = false; 843 844 nvmf_capsule_received(&qp->qp, nc); 845 } 846 847 nvmf_tcp_free_pdu(pdu); 848 return (0); 849 } 850 851 /* Called when m_free drops refcount to 0. */ 852 static void 853 nvmf_tcp_mbuf_done(struct mbuf *m) 854 { 855 struct nvmf_tcp_command_buffer *cb = m->m_ext.ext_arg1; 856 857 tcp_free_command_buffer(cb); 858 } 859 860 static struct mbuf * 861 nvmf_tcp_mbuf(void *arg, int how, void *data, size_t len) 862 { 863 struct nvmf_tcp_command_buffer *cb = arg; 864 struct mbuf *m; 865 866 m = m_get(how, MT_DATA); 867 m->m_flags |= M_RDONLY; 868 m_extaddref(m, data, len, &cb->refs, nvmf_tcp_mbuf_done, cb, NULL); 869 m->m_len = len; 870 return (m); 871 } 872 873 static void 874 nvmf_tcp_free_mext_pg(struct mbuf *m) 875 { 876 struct nvmf_tcp_command_buffer *cb = m->m_ext.ext_arg1; 877 878 M_ASSERTEXTPG(m); 879 tcp_release_command_buffer(cb); 880 } 881 882 static struct mbuf * 883 nvmf_tcp_mext_pg(void *arg, int how) 884 { 885 struct nvmf_tcp_command_buffer *cb = arg; 886 struct mbuf *m; 887 888 m = mb_alloc_ext_pgs(how, nvmf_tcp_free_mext_pg, M_RDONLY); 889 m->m_ext.ext_arg1 = cb; 890 tcp_hold_command_buffer(cb); 891 return (m); 892 } 893 894 /* 895 * Return an mbuf chain for a range of data belonging to a command 896 * buffer. 897 * 898 * The mbuf chain uses M_EXT mbufs which hold references on the 899 * command buffer so that it remains "alive" until the data has been 900 * fully transmitted. If truncate_ok is true, then the mbuf chain 901 * might return a short chain to avoid gratuitously splitting up a 902 * page. 903 */ 904 static struct mbuf * 905 nvmf_tcp_command_buffer_mbuf(struct nvmf_tcp_command_buffer *cb, 906 uint32_t data_offset, uint32_t data_len, uint32_t *actual_len, 907 bool can_truncate) 908 { 909 struct mbuf *m; 910 size_t len; 911 912 m = memdesc_alloc_ext_mbufs(&cb->io.io_mem, nvmf_tcp_mbuf, 913 nvmf_tcp_mext_pg, cb, M_WAITOK, data_offset, data_len, &len, 914 can_truncate); 915 if (actual_len != NULL) 916 *actual_len = len; 917 return (m); 918 } 919 920 /* NB: cid and ttag and little-endian already. */ 921 static void 922 tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 923 uint32_t data_offset, struct mbuf *m, size_t len, bool last_pdu) 924 { 925 struct nvme_tcp_h2c_data_hdr h2c; 926 struct mbuf *top; 927 928 memset(&h2c, 0, sizeof(h2c)); 929 h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA; 930 if (last_pdu) 931 h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 932 h2c.cccid = cid; 933 h2c.ttag = ttag; 934 h2c.datao = htole32(data_offset); 935 h2c.datal = htole32(len); 936 937 top = nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), m, len); 938 nvmf_tcp_write_pdu(qp, top); 939 } 940 941 static int 942 nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 943 { 944 const struct nvme_tcp_r2t_hdr *r2t; 945 struct nvmf_tcp_command_buffer *cb; 946 uint32_t data_len, data_offset; 947 948 r2t = (const void *)pdu->hdr; 949 950 mtx_lock(&qp->tx_buffers.lock); 951 cb = tcp_find_command_buffer(&qp->tx_buffers, r2t->cccid, 0); 952 if (cb == NULL) { 953 mtx_unlock(&qp->tx_buffers.lock); 954 nvmf_tcp_report_error(qp, 955 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 956 offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->m, 957 pdu->hdr->hlen); 958 nvmf_tcp_free_pdu(pdu); 959 return (EBADMSG); 960 } 961 962 data_offset = le32toh(r2t->r2to); 963 if (data_offset != cb->data_xfered) { 964 mtx_unlock(&qp->tx_buffers.lock); 965 nvmf_tcp_report_error(qp, 966 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->m, 967 pdu->hdr->hlen); 968 nvmf_tcp_free_pdu(pdu); 969 return (EBADMSG); 970 } 971 972 /* 973 * XXX: The spec does not specify how to handle R2T tranfers 974 * out of range of the original command. 975 */ 976 data_len = le32toh(r2t->r2tl); 977 if (data_offset + data_len > cb->data_len) { 978 mtx_unlock(&qp->tx_buffers.lock); 979 nvmf_tcp_report_error(qp, 980 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 981 pdu->m, pdu->hdr->hlen); 982 nvmf_tcp_free_pdu(pdu); 983 return (EBADMSG); 984 } 985 986 cb->data_xfered += data_len; 987 if (cb->data_xfered == cb->data_len) 988 tcp_remove_command_buffer(&qp->tx_buffers, cb); 989 else 990 tcp_hold_command_buffer(cb); 991 mtx_unlock(&qp->tx_buffers.lock); 992 993 /* 994 * Queue one or more H2C_DATA PDUs containing the requested 995 * data. 996 */ 997 while (data_len > 0) { 998 struct mbuf *m; 999 uint32_t sent, todo; 1000 1001 todo = min(data_len, qp->max_tx_data); 1002 m = nvmf_tcp_command_buffer_mbuf(cb, data_offset, todo, &sent, 1003 todo < data_len); 1004 tcp_send_h2c_pdu(qp, r2t->cccid, r2t->ttag, data_offset, m, 1005 sent, sent == data_len); 1006 1007 data_offset += sent; 1008 data_len -= sent; 1009 } 1010 1011 tcp_release_command_buffer(cb); 1012 nvmf_tcp_free_pdu(pdu); 1013 return (0); 1014 } 1015 1016 /* 1017 * A variant of m_pullup that uses M_WAITOK instead of failing. It 1018 * also doesn't do anything if enough bytes are already present in the 1019 * first mbuf. 1020 */ 1021 static struct mbuf * 1022 pullup_pdu_hdr(struct mbuf *m, int len) 1023 { 1024 struct mbuf *n, *p; 1025 1026 KASSERT(len <= MCLBYTES, ("%s: len too large", __func__)); 1027 if (m->m_len >= len) 1028 return (m); 1029 1030 n = m_get2(len, M_WAITOK, MT_DATA, 0); 1031 n->m_len = len; 1032 m_copydata(m, 0, len, mtod(n, void *)); 1033 1034 while (m != NULL && m->m_len <= len) { 1035 p = m->m_next; 1036 len -= m->m_len; 1037 m_free(m); 1038 m = p; 1039 } 1040 if (len > 0) { 1041 m->m_data += len; 1042 m->m_len -= len; 1043 } 1044 n->m_next = m; 1045 return (n); 1046 } 1047 1048 static int 1049 nvmf_tcp_dispatch_pdu(struct nvmf_tcp_qpair *qp, 1050 const struct nvme_tcp_common_pdu_hdr *ch, struct nvmf_tcp_rxpdu *pdu) 1051 { 1052 /* Ensure the PDU header is contiguous. */ 1053 pdu->m = pullup_pdu_hdr(pdu->m, ch->hlen); 1054 pdu->hdr = mtod(pdu->m, const void *); 1055 1056 switch (ch->pdu_type) { 1057 default: 1058 __assert_unreachable(); 1059 break; 1060 case NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 1061 case NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 1062 return (nvmf_tcp_handle_term_req(pdu)); 1063 case NVME_TCP_PDU_TYPE_CAPSULE_CMD: 1064 return (nvmf_tcp_save_command_capsule(qp, pdu)); 1065 case NVME_TCP_PDU_TYPE_CAPSULE_RESP: 1066 return (nvmf_tcp_save_response_capsule(qp, pdu)); 1067 case NVME_TCP_PDU_TYPE_H2C_DATA: 1068 return (nvmf_tcp_handle_h2c_data(qp, pdu)); 1069 case NVME_TCP_PDU_TYPE_C2H_DATA: 1070 return (nvmf_tcp_handle_c2h_data(qp, pdu)); 1071 case NVME_TCP_PDU_TYPE_R2T: 1072 return (nvmf_tcp_handle_r2t(qp, pdu)); 1073 } 1074 } 1075 1076 static void 1077 nvmf_tcp_receive(void *arg) 1078 { 1079 struct nvmf_tcp_qpair *qp = arg; 1080 struct socket *so = qp->so; 1081 struct nvmf_tcp_rxpdu pdu; 1082 struct nvme_tcp_common_pdu_hdr ch; 1083 struct uio uio; 1084 struct iovec iov[1]; 1085 struct mbuf *m, *n, *tail; 1086 u_int avail, needed; 1087 int error, flags, terror; 1088 bool have_header; 1089 1090 m = tail = NULL; 1091 have_header = false; 1092 SOCKBUF_LOCK(&so->so_rcv); 1093 while (!qp->rx_shutdown) { 1094 /* Wait until there is enough data for the next step. */ 1095 if (so->so_error != 0 || so->so_rerror != 0) { 1096 if (so->so_error != 0) 1097 error = so->so_error; 1098 else 1099 error = so->so_rerror; 1100 SOCKBUF_UNLOCK(&so->so_rcv); 1101 error: 1102 m_freem(m); 1103 nvmf_qpair_error(&qp->qp, error); 1104 SOCKBUF_LOCK(&so->so_rcv); 1105 while (!qp->rx_shutdown) 1106 cv_wait(&qp->rx_cv, SOCKBUF_MTX(&so->so_rcv)); 1107 break; 1108 } 1109 avail = sbavail(&so->so_rcv); 1110 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) != 0) { 1111 if (!have_header && avail == 0) 1112 error = 0; 1113 else 1114 error = ECONNRESET; 1115 SOCKBUF_UNLOCK(&so->so_rcv); 1116 goto error; 1117 } 1118 if (avail == 0 || (!have_header && avail < sizeof(ch))) { 1119 cv_wait(&qp->rx_cv, SOCKBUF_MTX(&so->so_rcv)); 1120 continue; 1121 } 1122 SOCKBUF_UNLOCK(&so->so_rcv); 1123 1124 if (!have_header) { 1125 KASSERT(m == NULL, ("%s: m != NULL but no header", 1126 __func__)); 1127 memset(&uio, 0, sizeof(uio)); 1128 iov[0].iov_base = &ch; 1129 iov[0].iov_len = sizeof(ch); 1130 uio.uio_iov = iov; 1131 uio.uio_iovcnt = 1; 1132 uio.uio_resid = sizeof(ch); 1133 uio.uio_segflg = UIO_SYSSPACE; 1134 uio.uio_rw = UIO_READ; 1135 flags = MSG_DONTWAIT | MSG_PEEK; 1136 1137 error = soreceive(so, NULL, &uio, NULL, NULL, &flags); 1138 if (error != 0) 1139 goto error; 1140 KASSERT(uio.uio_resid == 0, ("%s: short CH read", 1141 __func__)); 1142 1143 have_header = true; 1144 needed = le32toh(ch.plen); 1145 1146 /* 1147 * Malformed PDUs will be reported as errors 1148 * by nvmf_tcp_validate_pdu. Just pass along 1149 * garbage headers if the lengths mismatch. 1150 */ 1151 if (needed < sizeof(ch) || ch.hlen > needed) 1152 needed = sizeof(ch); 1153 1154 memset(&uio, 0, sizeof(uio)); 1155 uio.uio_resid = needed; 1156 } 1157 1158 flags = MSG_DONTWAIT; 1159 error = soreceive(so, NULL, &uio, &n, NULL, &flags); 1160 if (error != 0) 1161 goto error; 1162 1163 if (m == NULL) 1164 m = n; 1165 else 1166 tail->m_next = n; 1167 1168 if (uio.uio_resid != 0) { 1169 tail = n; 1170 while (tail->m_next != NULL) 1171 tail = tail->m_next; 1172 1173 SOCKBUF_LOCK(&so->so_rcv); 1174 continue; 1175 } 1176 #ifdef INVARIANTS 1177 tail = NULL; 1178 #endif 1179 1180 pdu.m = m; 1181 m = NULL; 1182 pdu.hdr = &ch; 1183 error = nvmf_tcp_validate_pdu(qp, &pdu); 1184 if (error != 0) 1185 m_freem(pdu.m); 1186 else 1187 error = nvmf_tcp_dispatch_pdu(qp, &ch, &pdu); 1188 if (error != 0) { 1189 /* 1190 * If we received a termination request, close 1191 * the connection immediately. 1192 */ 1193 if (error == ECONNRESET) 1194 goto error; 1195 1196 /* 1197 * Wait for up to 30 seconds for the socket to 1198 * be closed by the other end. 1199 */ 1200 SOCKBUF_LOCK(&so->so_rcv); 1201 if ((so->so_rcv.sb_state & SBS_CANTRCVMORE) == 0) { 1202 terror = cv_timedwait(&qp->rx_cv, 1203 SOCKBUF_MTX(&so->so_rcv), 30 * hz); 1204 if (terror == ETIMEDOUT) 1205 printf("NVMe/TCP: Timed out after sending terminate request\n"); 1206 } 1207 SOCKBUF_UNLOCK(&so->so_rcv); 1208 goto error; 1209 } 1210 1211 have_header = false; 1212 SOCKBUF_LOCK(&so->so_rcv); 1213 } 1214 SOCKBUF_UNLOCK(&so->so_rcv); 1215 kthread_exit(); 1216 } 1217 1218 static struct mbuf * 1219 tcp_command_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_capsule *tc) 1220 { 1221 struct nvmf_capsule *nc = &tc->nc; 1222 struct nvmf_tcp_command_buffer *cb; 1223 struct nvme_sgl_descriptor *sgl; 1224 struct nvme_tcp_cmd cmd; 1225 struct mbuf *top, *m; 1226 bool use_icd; 1227 1228 use_icd = false; 1229 cb = NULL; 1230 m = NULL; 1231 1232 if (nc->nc_data.io_len != 0) { 1233 cb = tcp_alloc_command_buffer(qp, &nc->nc_data, 0, 1234 nc->nc_data.io_len, nc->nc_sqe.cid); 1235 1236 if (nc->nc_send_data && nc->nc_data.io_len <= qp->max_icd) { 1237 use_icd = true; 1238 m = nvmf_tcp_command_buffer_mbuf(cb, 0, 1239 nc->nc_data.io_len, NULL, false); 1240 cb->data_xfered = nc->nc_data.io_len; 1241 tcp_release_command_buffer(cb); 1242 } else if (nc->nc_send_data) { 1243 mtx_lock(&qp->tx_buffers.lock); 1244 tcp_add_command_buffer(&qp->tx_buffers, cb); 1245 mtx_unlock(&qp->tx_buffers.lock); 1246 } else { 1247 mtx_lock(&qp->rx_buffers.lock); 1248 tcp_add_command_buffer(&qp->rx_buffers, cb); 1249 mtx_unlock(&qp->rx_buffers.lock); 1250 } 1251 } 1252 1253 memset(&cmd, 0, sizeof(cmd)); 1254 cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD; 1255 cmd.ccsqe = nc->nc_sqe; 1256 1257 /* Populate SGL in SQE. */ 1258 sgl = &cmd.ccsqe.sgl; 1259 memset(sgl, 0, sizeof(*sgl)); 1260 sgl->address = 0; 1261 sgl->length = htole32(nc->nc_data.io_len); 1262 if (use_icd) { 1263 /* Use in-capsule data. */ 1264 sgl->type = NVME_SGL_TYPE_ICD; 1265 } else { 1266 /* Use a command buffer. */ 1267 sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER; 1268 } 1269 1270 top = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), m, m != NULL ? 1271 nc->nc_data.io_len : 0); 1272 return (top); 1273 } 1274 1275 static struct mbuf * 1276 tcp_response_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_capsule *tc) 1277 { 1278 struct nvmf_capsule *nc = &tc->nc; 1279 struct nvme_tcp_rsp rsp; 1280 1281 memset(&rsp, 0, sizeof(rsp)); 1282 rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1283 rsp.rccqe = nc->nc_cqe; 1284 1285 return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0)); 1286 } 1287 1288 static struct mbuf * 1289 capsule_to_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_capsule *tc) 1290 { 1291 if (tc->nc.nc_qe_len == sizeof(struct nvme_command)) 1292 return (tcp_command_pdu(qp, tc)); 1293 else 1294 return (tcp_response_pdu(qp, tc)); 1295 } 1296 1297 static void 1298 nvmf_tcp_send(void *arg) 1299 { 1300 struct nvmf_tcp_qpair *qp = arg; 1301 struct nvmf_tcp_capsule *tc; 1302 struct socket *so = qp->so; 1303 struct mbuf *m, *n, *p; 1304 u_long space, tosend; 1305 int error; 1306 1307 m = NULL; 1308 SOCKBUF_LOCK(&so->so_snd); 1309 while (!qp->tx_shutdown) { 1310 if (so->so_error != 0) { 1311 error = so->so_error; 1312 SOCKBUF_UNLOCK(&so->so_snd); 1313 error: 1314 m_freem(m); 1315 nvmf_qpair_error(&qp->qp, error); 1316 SOCKBUF_LOCK(&so->so_snd); 1317 while (!qp->tx_shutdown) 1318 cv_wait(&qp->tx_cv, SOCKBUF_MTX(&so->so_snd)); 1319 break; 1320 } 1321 1322 if (m == NULL) { 1323 /* Next PDU to send. */ 1324 m = mbufq_dequeue(&qp->tx_pdus); 1325 } 1326 if (m == NULL) { 1327 if (STAILQ_EMPTY(&qp->tx_capsules)) { 1328 cv_wait(&qp->tx_cv, SOCKBUF_MTX(&so->so_snd)); 1329 continue; 1330 } 1331 1332 /* Convert a capsule into a PDU. */ 1333 tc = STAILQ_FIRST(&qp->tx_capsules); 1334 STAILQ_REMOVE_HEAD(&qp->tx_capsules, link); 1335 SOCKBUF_UNLOCK(&so->so_snd); 1336 1337 n = capsule_to_pdu(qp, tc); 1338 tcp_release_capsule(tc); 1339 1340 SOCKBUF_LOCK(&so->so_snd); 1341 mbufq_enqueue(&qp->tx_pdus, n); 1342 continue; 1343 } 1344 1345 /* 1346 * Wait until there is enough room to send some data. 1347 * If the socket buffer is empty, always send at least 1348 * something. 1349 */ 1350 space = sbspace(&so->so_snd); 1351 if (space < m->m_len && sbused(&so->so_snd) != 0) { 1352 cv_wait(&qp->tx_cv, SOCKBUF_MTX(&so->so_snd)); 1353 continue; 1354 } 1355 SOCKBUF_UNLOCK(&so->so_snd); 1356 1357 /* 1358 * If 'm' is too big, then the socket buffer must be 1359 * empty. Split 'm' to make at least some forward 1360 * progress. 1361 * 1362 * Otherwise, chain up as many pending mbufs from 'm' 1363 * that will fit. 1364 */ 1365 if (m->m_len > space) { 1366 n = m_split(m, space, M_WAITOK); 1367 } else { 1368 tosend = m->m_len; 1369 n = m->m_next; 1370 p = m; 1371 while (n != NULL && tosend + n->m_len <= space) { 1372 tosend += n->m_len; 1373 p = n; 1374 n = n->m_next; 1375 } 1376 KASSERT(p->m_next == n, ("%s: p not before n", 1377 __func__)); 1378 p->m_next = NULL; 1379 1380 KASSERT(m_length(m, NULL) == tosend, 1381 ("%s: length mismatch", __func__)); 1382 } 1383 error = sosend(so, NULL, NULL, m, NULL, MSG_DONTWAIT, NULL); 1384 if (error != 0) { 1385 m = NULL; 1386 m_freem(n); 1387 goto error; 1388 } 1389 m = n; 1390 SOCKBUF_LOCK(&so->so_snd); 1391 } 1392 SOCKBUF_UNLOCK(&so->so_snd); 1393 kthread_exit(); 1394 } 1395 1396 static int 1397 nvmf_soupcall_receive(struct socket *so, void *arg, int waitflag) 1398 { 1399 struct nvmf_tcp_qpair *qp = arg; 1400 1401 if (soreadable(so)) 1402 cv_signal(&qp->rx_cv); 1403 return (SU_OK); 1404 } 1405 1406 static int 1407 nvmf_soupcall_send(struct socket *so, void *arg, int waitflag) 1408 { 1409 struct nvmf_tcp_qpair *qp = arg; 1410 1411 if (sowriteable(so)) 1412 cv_signal(&qp->tx_cv); 1413 return (SU_OK); 1414 } 1415 1416 static struct nvmf_qpair * 1417 tcp_allocate_qpair(bool controller, const nvlist_t *nvl) 1418 { 1419 struct nvmf_tcp_qpair *qp; 1420 struct socket *so; 1421 struct file *fp; 1422 cap_rights_t rights; 1423 int error; 1424 1425 if (!nvlist_exists_number(nvl, "fd") || 1426 !nvlist_exists_number(nvl, "rxpda") || 1427 !nvlist_exists_number(nvl, "txpda") || 1428 !nvlist_exists_bool(nvl, "header_digests") || 1429 !nvlist_exists_bool(nvl, "data_digests") || 1430 !nvlist_exists_number(nvl, "maxr2t") || 1431 !nvlist_exists_number(nvl, "maxh2cdata") || 1432 !nvlist_exists_number(nvl, "max_icd")) 1433 return (NULL); 1434 1435 error = fget(curthread, nvlist_get_number(nvl, "fd"), 1436 cap_rights_init_one(&rights, CAP_SOCK_CLIENT), &fp); 1437 if (error != 0) 1438 return (NULL); 1439 if (fp->f_type != DTYPE_SOCKET) { 1440 fdrop(fp, curthread); 1441 return (NULL); 1442 } 1443 so = fp->f_data; 1444 if (so->so_type != SOCK_STREAM || 1445 so->so_proto->pr_protocol != IPPROTO_TCP) { 1446 fdrop(fp, curthread); 1447 return (NULL); 1448 } 1449 1450 /* Claim socket from file descriptor. */ 1451 fp->f_ops = &badfileops; 1452 fp->f_data = NULL; 1453 fdrop(fp, curthread); 1454 1455 qp = malloc(sizeof(*qp), M_NVMF_TCP, M_WAITOK | M_ZERO); 1456 qp->so = so; 1457 refcount_init(&qp->refs, 1); 1458 qp->txpda = nvlist_get_number(nvl, "txpda"); 1459 qp->rxpda = nvlist_get_number(nvl, "rxpda"); 1460 qp->header_digests = nvlist_get_bool(nvl, "header_digests"); 1461 qp->data_digests = nvlist_get_bool(nvl, "data_digests"); 1462 qp->maxr2t = nvlist_get_number(nvl, "maxr2t"); 1463 if (controller) 1464 qp->maxh2cdata = nvlist_get_number(nvl, "maxh2cdata"); 1465 qp->max_tx_data = tcp_max_transmit_data; 1466 if (!controller) { 1467 qp->max_tx_data = min(qp->max_tx_data, 1468 nvlist_get_number(nvl, "maxh2cdata")); 1469 qp->max_icd = nvlist_get_number(nvl, "max_icd"); 1470 } 1471 1472 if (controller) { 1473 /* Use the SUCCESS flag if SQ flow control is disabled. */ 1474 qp->send_success = !nvlist_get_bool(nvl, "sq_flow_control"); 1475 1476 /* NB: maxr2t is 0's based. */ 1477 qp->num_ttags = MIN((u_int)UINT16_MAX + 1, 1478 nvlist_get_number(nvl, "qsize") * 1479 ((uint64_t)qp->maxr2t + 1)); 1480 qp->open_ttags = mallocarray(qp->num_ttags, 1481 sizeof(*qp->open_ttags), M_NVMF_TCP, M_WAITOK | M_ZERO); 1482 } 1483 1484 TAILQ_INIT(&qp->rx_buffers.head); 1485 TAILQ_INIT(&qp->tx_buffers.head); 1486 mtx_init(&qp->rx_buffers.lock, "nvmf/tcp rx buffers", NULL, MTX_DEF); 1487 mtx_init(&qp->tx_buffers.lock, "nvmf/tcp tx buffers", NULL, MTX_DEF); 1488 1489 cv_init(&qp->rx_cv, "-"); 1490 cv_init(&qp->tx_cv, "-"); 1491 mbufq_init(&qp->tx_pdus, 0); 1492 STAILQ_INIT(&qp->tx_capsules); 1493 1494 /* Register socket upcalls. */ 1495 SOCKBUF_LOCK(&so->so_rcv); 1496 soupcall_set(so, SO_RCV, nvmf_soupcall_receive, qp); 1497 SOCKBUF_UNLOCK(&so->so_rcv); 1498 SOCKBUF_LOCK(&so->so_snd); 1499 soupcall_set(so, SO_SND, nvmf_soupcall_send, qp); 1500 SOCKBUF_UNLOCK(&so->so_snd); 1501 1502 /* Spin up kthreads. */ 1503 error = kthread_add(nvmf_tcp_receive, qp, NULL, &qp->rx_thread, 0, 0, 1504 "nvmef tcp rx"); 1505 if (error != 0) { 1506 tcp_free_qpair(&qp->qp); 1507 return (NULL); 1508 } 1509 error = kthread_add(nvmf_tcp_send, qp, NULL, &qp->tx_thread, 0, 0, 1510 "nvmef tcp tx"); 1511 if (error != 0) { 1512 tcp_free_qpair(&qp->qp); 1513 return (NULL); 1514 } 1515 1516 return (&qp->qp); 1517 } 1518 1519 static void 1520 tcp_release_qpair(struct nvmf_tcp_qpair *qp) 1521 { 1522 if (refcount_release(&qp->refs)) 1523 free(qp, M_NVMF_TCP); 1524 } 1525 1526 static void 1527 tcp_free_qpair(struct nvmf_qpair *nq) 1528 { 1529 struct nvmf_tcp_qpair *qp = TQP(nq); 1530 struct nvmf_tcp_command_buffer *ncb, *cb; 1531 struct nvmf_tcp_capsule *ntc, *tc; 1532 struct socket *so = qp->so; 1533 1534 /* Shut down kthreads and clear upcalls */ 1535 SOCKBUF_LOCK(&so->so_snd); 1536 qp->tx_shutdown = true; 1537 if (qp->tx_thread != NULL) { 1538 cv_signal(&qp->tx_cv); 1539 mtx_sleep(qp->tx_thread, SOCKBUF_MTX(&so->so_snd), 0, 1540 "nvtcptx", 0); 1541 } 1542 soupcall_clear(so, SO_SND); 1543 SOCKBUF_UNLOCK(&so->so_snd); 1544 1545 SOCKBUF_LOCK(&so->so_rcv); 1546 qp->rx_shutdown = true; 1547 if (qp->rx_thread != NULL) { 1548 cv_signal(&qp->rx_cv); 1549 mtx_sleep(qp->rx_thread, SOCKBUF_MTX(&so->so_rcv), 0, 1550 "nvtcprx", 0); 1551 } 1552 soupcall_clear(so, SO_RCV); 1553 SOCKBUF_UNLOCK(&so->so_rcv); 1554 1555 STAILQ_FOREACH_SAFE(tc, &qp->tx_capsules, link, ntc) { 1556 nvmf_abort_capsule_data(&tc->nc, ECONNABORTED); 1557 tcp_release_capsule(tc); 1558 } 1559 mbufq_drain(&qp->tx_pdus); 1560 1561 cv_destroy(&qp->tx_cv); 1562 cv_destroy(&qp->rx_cv); 1563 1564 if (qp->open_ttags != NULL) { 1565 for (u_int i = 0; i < qp->num_ttags; i++) { 1566 cb = qp->open_ttags[i]; 1567 if (cb != NULL) { 1568 cb->tc->active_r2ts--; 1569 cb->error = ECONNABORTED; 1570 tcp_release_command_buffer(cb); 1571 } 1572 } 1573 free(qp->open_ttags, M_NVMF_TCP); 1574 } 1575 1576 mtx_lock(&qp->rx_buffers.lock); 1577 TAILQ_FOREACH_SAFE(cb, &qp->rx_buffers.head, link, ncb) { 1578 tcp_remove_command_buffer(&qp->rx_buffers, cb); 1579 mtx_unlock(&qp->rx_buffers.lock); 1580 #ifdef INVARIANTS 1581 if (cb->tc != NULL) 1582 cb->tc->pending_r2ts--; 1583 #endif 1584 cb->error = ECONNABORTED; 1585 tcp_release_command_buffer(cb); 1586 mtx_lock(&qp->rx_buffers.lock); 1587 } 1588 mtx_destroy(&qp->rx_buffers.lock); 1589 1590 mtx_lock(&qp->tx_buffers.lock); 1591 TAILQ_FOREACH_SAFE(cb, &qp->tx_buffers.head, link, ncb) { 1592 tcp_remove_command_buffer(&qp->tx_buffers, cb); 1593 mtx_unlock(&qp->tx_buffers.lock); 1594 cb->error = ECONNABORTED; 1595 tcp_release_command_buffer(cb); 1596 mtx_lock(&qp->tx_buffers.lock); 1597 } 1598 mtx_destroy(&qp->tx_buffers.lock); 1599 1600 soclose(so); 1601 1602 tcp_release_qpair(qp); 1603 } 1604 1605 static struct nvmf_capsule * 1606 tcp_allocate_capsule(struct nvmf_qpair *nq, int how) 1607 { 1608 struct nvmf_tcp_qpair *qp = TQP(nq); 1609 struct nvmf_tcp_capsule *tc; 1610 1611 tc = malloc(sizeof(*tc), M_NVMF_TCP, how | M_ZERO); 1612 if (tc == NULL) 1613 return (NULL); 1614 refcount_init(&tc->refs, 1); 1615 refcount_acquire(&qp->refs); 1616 return (&tc->nc); 1617 } 1618 1619 static void 1620 tcp_release_capsule(struct nvmf_tcp_capsule *tc) 1621 { 1622 struct nvmf_tcp_qpair *qp = TQP(tc->nc.nc_qpair); 1623 1624 if (!refcount_release(&tc->refs)) 1625 return; 1626 1627 MPASS(tc->active_r2ts == 0); 1628 MPASS(tc->pending_r2ts == 0); 1629 1630 nvmf_tcp_free_pdu(&tc->rx_pdu); 1631 free(tc, M_NVMF_TCP); 1632 tcp_release_qpair(qp); 1633 } 1634 1635 static void 1636 tcp_free_capsule(struct nvmf_capsule *nc) 1637 { 1638 struct nvmf_tcp_capsule *tc = TCAP(nc); 1639 1640 tcp_release_capsule(tc); 1641 } 1642 1643 static int 1644 tcp_transmit_capsule(struct nvmf_capsule *nc) 1645 { 1646 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1647 struct nvmf_tcp_capsule *tc = TCAP(nc); 1648 struct socket *so = qp->so; 1649 1650 refcount_acquire(&tc->refs); 1651 SOCKBUF_LOCK(&so->so_snd); 1652 STAILQ_INSERT_TAIL(&qp->tx_capsules, tc, link); 1653 if (sowriteable(so)) 1654 cv_signal(&qp->tx_cv); 1655 SOCKBUF_UNLOCK(&so->so_snd); 1656 return (0); 1657 } 1658 1659 static uint8_t 1660 tcp_validate_command_capsule(struct nvmf_capsule *nc) 1661 { 1662 struct nvmf_tcp_capsule *tc = TCAP(nc); 1663 struct nvme_sgl_descriptor *sgl; 1664 1665 KASSERT(tc->rx_pdu.hdr != NULL, ("capsule wasn't received")); 1666 1667 sgl = &nc->nc_sqe.sgl; 1668 switch (sgl->type) { 1669 case NVME_SGL_TYPE_ICD: 1670 if (tc->rx_pdu.data_len != le32toh(sgl->length)) { 1671 printf("NVMe/TCP: Command Capsule with mismatched ICD length\n"); 1672 return (NVME_SC_DATA_SGL_LENGTH_INVALID); 1673 } 1674 break; 1675 case NVME_SGL_TYPE_COMMAND_BUFFER: 1676 if (tc->rx_pdu.data_len != 0) { 1677 printf("NVMe/TCP: Command Buffer SGL with ICD\n"); 1678 return (NVME_SC_INVALID_FIELD); 1679 } 1680 break; 1681 default: 1682 printf("NVMe/TCP: Invalid SGL type in Command Capsule\n"); 1683 return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID); 1684 } 1685 1686 if (sgl->address != 0) { 1687 printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n"); 1688 return (NVME_SC_SGL_OFFSET_INVALID); 1689 } 1690 1691 return (NVME_SC_SUCCESS); 1692 } 1693 1694 static size_t 1695 tcp_capsule_data_len(const struct nvmf_capsule *nc) 1696 { 1697 MPASS(nc->nc_qe_len == sizeof(struct nvme_command)); 1698 return (le32toh(nc->nc_sqe.sgl.length)); 1699 } 1700 1701 static void 1702 tcp_receive_r2t_data(struct nvmf_capsule *nc, uint32_t data_offset, 1703 struct nvmf_io_request *io) 1704 { 1705 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1706 struct nvmf_tcp_capsule *tc = TCAP(nc); 1707 struct nvmf_tcp_command_buffer *cb; 1708 1709 cb = tcp_alloc_command_buffer(qp, io, data_offset, io->io_len, 1710 nc->nc_sqe.cid); 1711 1712 cb->tc = tc; 1713 refcount_acquire(&tc->refs); 1714 1715 /* 1716 * If this command has too many active R2Ts or there are no 1717 * available transfer tags, queue the request for later. 1718 * 1719 * NB: maxr2t is 0's based. 1720 */ 1721 mtx_lock(&qp->rx_buffers.lock); 1722 if (tc->active_r2ts > qp->maxr2t || qp->active_ttags == qp->num_ttags) { 1723 #ifdef INVARIANTS 1724 tc->pending_r2ts++; 1725 #endif 1726 TAILQ_INSERT_TAIL(&qp->rx_buffers.head, cb, link); 1727 mtx_unlock(&qp->rx_buffers.lock); 1728 return; 1729 } 1730 1731 nvmf_tcp_allocate_ttag(qp, cb); 1732 mtx_unlock(&qp->rx_buffers.lock); 1733 1734 tcp_send_r2t(qp, nc->nc_sqe.cid, cb->ttag, data_offset, io->io_len); 1735 } 1736 1737 static void 1738 tcp_receive_icd_data(struct nvmf_capsule *nc, uint32_t data_offset, 1739 struct nvmf_io_request *io) 1740 { 1741 struct nvmf_tcp_capsule *tc = TCAP(nc); 1742 1743 mbuf_copyto_io(tc->rx_pdu.m, tc->rx_pdu.hdr->pdo + data_offset, 1744 io->io_len, io, 0); 1745 nvmf_complete_io_request(io, io->io_len, 0); 1746 } 1747 1748 static int 1749 tcp_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 1750 struct nvmf_io_request *io) 1751 { 1752 struct nvme_sgl_descriptor *sgl; 1753 size_t data_len; 1754 1755 if (nc->nc_qe_len != sizeof(struct nvme_command) || 1756 !nc->nc_qpair->nq_controller) 1757 return (EINVAL); 1758 1759 sgl = &nc->nc_sqe.sgl; 1760 data_len = le32toh(sgl->length); 1761 if (data_offset + io->io_len > data_len) 1762 return (EFBIG); 1763 1764 if (sgl->type == NVME_SGL_TYPE_ICD) 1765 tcp_receive_icd_data(nc, data_offset, io); 1766 else 1767 tcp_receive_r2t_data(nc, data_offset, io); 1768 return (0); 1769 } 1770 1771 /* NB: cid is little-endian already. */ 1772 static void 1773 tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint32_t data_offset, 1774 struct mbuf *m, size_t len, bool last_pdu, bool success) 1775 { 1776 struct nvme_tcp_c2h_data_hdr c2h; 1777 struct mbuf *top; 1778 1779 memset(&c2h, 0, sizeof(c2h)); 1780 c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA; 1781 if (last_pdu) 1782 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 1783 if (success) 1784 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 1785 c2h.cccid = cid; 1786 c2h.datao = htole32(data_offset); 1787 c2h.datal = htole32(len); 1788 1789 top = nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h), m, len); 1790 nvmf_tcp_write_pdu(qp, top); 1791 } 1792 1793 static u_int 1794 tcp_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 1795 struct mbuf *m, size_t len) 1796 { 1797 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1798 struct nvme_sgl_descriptor *sgl; 1799 uint32_t data_len; 1800 bool last_pdu, last_xfer; 1801 1802 if (nc->nc_qe_len != sizeof(struct nvme_command) || 1803 !qp->qp.nq_controller) { 1804 m_freem(m); 1805 return (NVME_SC_INVALID_FIELD); 1806 } 1807 1808 sgl = &nc->nc_sqe.sgl; 1809 data_len = le32toh(sgl->length); 1810 if (data_offset + len > data_len) { 1811 m_freem(m); 1812 return (NVME_SC_INVALID_FIELD); 1813 } 1814 last_xfer = (data_offset + len == data_len); 1815 1816 if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) { 1817 m_freem(m); 1818 return (NVME_SC_INVALID_FIELD); 1819 } 1820 1821 KASSERT(data_offset == TCAP(nc)->tx_data_offset, 1822 ("%s: starting data_offset %u doesn't match end of previous xfer %u", 1823 __func__, data_offset, TCAP(nc)->tx_data_offset)); 1824 1825 /* Queue one more C2H_DATA PDUs containing the data from 'm'. */ 1826 while (m != NULL) { 1827 struct mbuf *n; 1828 uint32_t todo; 1829 1830 if (m->m_len > qp->max_tx_data) { 1831 n = m_split(m, qp->max_tx_data, M_WAITOK); 1832 todo = m->m_len; 1833 } else { 1834 struct mbuf *p; 1835 1836 todo = m->m_len; 1837 p = m; 1838 n = p->m_next; 1839 while (n != NULL) { 1840 if (todo + n->m_len > qp->max_tx_data) { 1841 p->m_next = NULL; 1842 break; 1843 } 1844 todo += n->m_len; 1845 p = n; 1846 n = p->m_next; 1847 } 1848 MPASS(m_length(m, NULL) == todo); 1849 } 1850 1851 last_pdu = (n == NULL && last_xfer); 1852 tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset, m, todo, 1853 last_pdu, last_pdu && qp->send_success); 1854 1855 data_offset += todo; 1856 data_len -= todo; 1857 m = n; 1858 } 1859 MPASS(data_len == 0); 1860 1861 #ifdef INVARIANTS 1862 TCAP(nc)->tx_data_offset = data_offset; 1863 #endif 1864 if (!last_xfer) 1865 return (NVMF_MORE); 1866 else if (qp->send_success) 1867 return (NVMF_SUCCESS_SENT); 1868 else 1869 return (NVME_SC_SUCCESS); 1870 } 1871 1872 struct nvmf_transport_ops tcp_ops = { 1873 .allocate_qpair = tcp_allocate_qpair, 1874 .free_qpair = tcp_free_qpair, 1875 .allocate_capsule = tcp_allocate_capsule, 1876 .free_capsule = tcp_free_capsule, 1877 .transmit_capsule = tcp_transmit_capsule, 1878 .validate_command_capsule = tcp_validate_command_capsule, 1879 .capsule_data_len = tcp_capsule_data_len, 1880 .receive_controller_data = tcp_receive_controller_data, 1881 .send_controller_data = tcp_send_controller_data, 1882 .trtype = NVMF_TRTYPE_TCP, 1883 .priority = 0, 1884 }; 1885 1886 NVMF_TRANSPORT(tcp, tcp_ops); 1887