1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/endian.h> 9 #include <sys/gsb_crc32.h> 10 #include <sys/queue.h> 11 #include <sys/uio.h> 12 #include <assert.h> 13 #include <errno.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <unistd.h> 18 19 #include "libnvmf.h" 20 #include "internal.h" 21 #include "nvmf_tcp.h" 22 23 struct nvmf_tcp_qpair; 24 25 struct nvmf_tcp_command_buffer { 26 struct nvmf_tcp_qpair *qp; 27 28 void *data; 29 size_t data_len; 30 size_t data_xfered; 31 uint32_t data_offset; 32 33 uint16_t cid; 34 uint16_t ttag; 35 36 LIST_ENTRY(nvmf_tcp_command_buffer) link; 37 }; 38 39 LIST_HEAD(nvmf_tcp_command_buffer_list, nvmf_tcp_command_buffer); 40 41 struct nvmf_tcp_association { 42 struct nvmf_association na; 43 44 uint32_t ioccsz; 45 }; 46 47 struct nvmf_tcp_rxpdu { 48 struct nvme_tcp_common_pdu_hdr *hdr; 49 uint32_t data_len; 50 }; 51 52 struct nvmf_tcp_capsule { 53 struct nvmf_capsule nc; 54 55 struct nvmf_tcp_rxpdu rx_pdu; 56 struct nvmf_tcp_command_buffer *cb; 57 58 TAILQ_ENTRY(nvmf_tcp_capsule) link; 59 }; 60 61 struct nvmf_tcp_qpair { 62 struct nvmf_qpair qp; 63 int s; 64 65 uint8_t txpda; 66 uint8_t rxpda; 67 bool header_digests; 68 bool data_digests; 69 uint32_t maxr2t; 70 uint32_t maxh2cdata; 71 uint32_t max_icd; /* Host only */ 72 uint16_t next_ttag; /* Controller only */ 73 74 struct nvmf_tcp_command_buffer_list tx_buffers; 75 struct nvmf_tcp_command_buffer_list rx_buffers; 76 TAILQ_HEAD(, nvmf_tcp_capsule) rx_capsules; 77 }; 78 79 #define TASSOC(nc) ((struct nvmf_tcp_association *)(na)) 80 #define TCAP(nc) ((struct nvmf_tcp_capsule *)(nc)) 81 #define CTCAP(nc) ((const struct nvmf_tcp_capsule *)(nc)) 82 #define TQP(qp) ((struct nvmf_tcp_qpair *)(qp)) 83 84 static const char zero_padding[NVME_TCP_PDU_PDO_MAX_OFFSET]; 85 86 static uint32_t 87 compute_digest(const void *buf, size_t len) 88 { 89 return (calculate_crc32c(0xffffffff, buf, len) ^ 0xffffffff); 90 } 91 92 static struct nvmf_tcp_command_buffer * 93 tcp_alloc_command_buffer(struct nvmf_tcp_qpair *qp, void *data, 94 uint32_t data_offset, size_t data_len, uint16_t cid, uint16_t ttag, 95 bool receive) 96 { 97 struct nvmf_tcp_command_buffer *cb; 98 99 cb = malloc(sizeof(*cb)); 100 cb->qp = qp; 101 cb->data = data; 102 cb->data_offset = data_offset; 103 cb->data_len = data_len; 104 cb->data_xfered = 0; 105 cb->cid = cid; 106 cb->ttag = ttag; 107 108 if (receive) 109 LIST_INSERT_HEAD(&qp->rx_buffers, cb, link); 110 else 111 LIST_INSERT_HEAD(&qp->tx_buffers, cb, link); 112 return (cb); 113 } 114 115 static struct nvmf_tcp_command_buffer * 116 tcp_find_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 117 bool receive) 118 { 119 struct nvmf_tcp_command_buffer_list *list; 120 struct nvmf_tcp_command_buffer *cb; 121 122 list = receive ? &qp->rx_buffers : &qp->tx_buffers; 123 LIST_FOREACH(cb, list, link) { 124 if (cb->cid == cid && cb->ttag == ttag) 125 return (cb); 126 } 127 return (NULL); 128 } 129 130 static void 131 tcp_purge_command_buffer(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 132 bool receive) 133 { 134 struct nvmf_tcp_command_buffer *cb; 135 136 cb = tcp_find_command_buffer(qp, cid, ttag, receive); 137 if (cb != NULL) 138 LIST_REMOVE(cb, link); 139 } 140 141 static void 142 tcp_free_command_buffer(struct nvmf_tcp_command_buffer *cb) 143 { 144 LIST_REMOVE(cb, link); 145 free(cb); 146 } 147 148 static int 149 nvmf_tcp_write_pdu(struct nvmf_tcp_qpair *qp, const void *pdu, size_t len) 150 { 151 ssize_t nwritten; 152 const char *cp; 153 154 cp = pdu; 155 while (len != 0) { 156 nwritten = write(qp->s, cp, len); 157 if (nwritten < 0) 158 return (errno); 159 len -= nwritten; 160 cp += nwritten; 161 } 162 return (0); 163 } 164 165 static int 166 nvmf_tcp_write_pdu_iov(struct nvmf_tcp_qpair *qp, struct iovec *iov, 167 u_int iovcnt, size_t len) 168 { 169 ssize_t nwritten; 170 171 for (;;) { 172 nwritten = writev(qp->s, iov, iovcnt); 173 if (nwritten < 0) 174 return (errno); 175 176 len -= nwritten; 177 if (len == 0) 178 return (0); 179 180 while (iov->iov_len <= (size_t)nwritten) { 181 nwritten -= iov->iov_len; 182 iovcnt--; 183 iov++; 184 } 185 186 iov->iov_base = (char *)iov->iov_base + nwritten; 187 iov->iov_len -= nwritten; 188 } 189 } 190 191 static void 192 nvmf_tcp_report_error(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 193 uint16_t fes, uint32_t fei, const void *rx_pdu, size_t pdu_len, u_int hlen) 194 { 195 struct nvme_tcp_term_req_hdr hdr; 196 struct iovec iov[2]; 197 198 if (hlen != 0) { 199 if (hlen > NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE) 200 hlen = NVME_TCP_TERM_REQ_ERROR_DATA_MAX_SIZE; 201 if (hlen > pdu_len) 202 hlen = pdu_len; 203 } 204 205 memset(&hdr, 0, sizeof(hdr)); 206 hdr.common.pdu_type = na->na_controller ? 207 NVME_TCP_PDU_TYPE_C2H_TERM_REQ : NVME_TCP_PDU_TYPE_H2C_TERM_REQ; 208 hdr.common.hlen = sizeof(hdr); 209 hdr.common.plen = sizeof(hdr) + hlen; 210 hdr.fes = htole16(fes); 211 le32enc(hdr.fei, fei); 212 iov[0].iov_base = &hdr; 213 iov[0].iov_len = sizeof(hdr); 214 iov[1].iov_base = __DECONST(void *, rx_pdu); 215 iov[1].iov_len = hlen; 216 217 (void)nvmf_tcp_write_pdu_iov(qp, iov, nitems(iov), sizeof(hdr) + hlen); 218 close(qp->s); 219 qp->s = -1; 220 } 221 222 static int 223 nvmf_tcp_validate_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu, 224 size_t pdu_len) 225 { 226 const struct nvme_tcp_common_pdu_hdr *ch; 227 uint32_t data_len, fei, plen; 228 uint32_t digest, rx_digest; 229 u_int hlen; 230 int error; 231 uint16_t fes; 232 233 /* Determine how large of a PDU header to return for errors. */ 234 ch = pdu->hdr; 235 hlen = ch->hlen; 236 plen = le32toh(ch->plen); 237 if (hlen < sizeof(*ch) || hlen > plen) 238 hlen = sizeof(*ch); 239 240 error = nvmf_tcp_validate_pdu_header(ch, 241 qp->qp.nq_association->na_controller, qp->header_digests, 242 qp->data_digests, qp->rxpda, &data_len, &fes, &fei); 243 if (error != 0) { 244 if (error == ECONNRESET) { 245 close(qp->s); 246 qp->s = -1; 247 } else { 248 nvmf_tcp_report_error(qp->qp.nq_association, qp, 249 fes, fei, ch, pdu_len, hlen); 250 } 251 return (error); 252 } 253 254 /* Check header digest if present. */ 255 if ((ch->flags & NVME_TCP_CH_FLAGS_HDGSTF) != 0) { 256 digest = compute_digest(ch, ch->hlen); 257 memcpy(&rx_digest, (const char *)ch + ch->hlen, 258 sizeof(rx_digest)); 259 if (digest != rx_digest) { 260 printf("NVMe/TCP: Header digest mismatch\n"); 261 nvmf_tcp_report_error(qp->qp.nq_association, qp, 262 NVME_TCP_TERM_REQ_FES_HDGST_ERROR, rx_digest, ch, 263 pdu_len, hlen); 264 return (EBADMSG); 265 } 266 } 267 268 /* Check data digest if present. */ 269 if ((ch->flags & NVME_TCP_CH_FLAGS_DDGSTF) != 0) { 270 digest = compute_digest((const char *)ch + ch->pdo, data_len); 271 memcpy(&rx_digest, (const char *)ch + plen - sizeof(rx_digest), 272 sizeof(rx_digest)); 273 if (digest != rx_digest) { 274 printf("NVMe/TCP: Data digest mismatch\n"); 275 return (EBADMSG); 276 } 277 } 278 279 pdu->data_len = data_len; 280 return (0); 281 } 282 283 /* 284 * Read data from a socket, retrying until the data has been fully 285 * read or an error occurs. 286 */ 287 static int 288 nvmf_tcp_read_buffer(int s, void *buf, size_t len) 289 { 290 ssize_t nread; 291 char *cp; 292 293 cp = buf; 294 while (len != 0) { 295 nread = read(s, cp, len); 296 if (nread < 0) 297 return (errno); 298 if (nread == 0) 299 return (ECONNRESET); 300 len -= nread; 301 cp += nread; 302 } 303 return (0); 304 } 305 306 static int 307 nvmf_tcp_read_pdu(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 308 { 309 struct nvme_tcp_common_pdu_hdr ch; 310 uint32_t plen; 311 int error; 312 313 memset(pdu, 0, sizeof(*pdu)); 314 error = nvmf_tcp_read_buffer(qp->s, &ch, sizeof(ch)); 315 if (error != 0) 316 return (error); 317 318 plen = le32toh(ch.plen); 319 320 /* 321 * Validate a header with garbage lengths to trigger 322 * an error message without reading more. 323 */ 324 if (plen < sizeof(ch) || ch.hlen > plen) { 325 pdu->hdr = &ch; 326 error = nvmf_tcp_validate_pdu(qp, pdu, sizeof(ch)); 327 pdu->hdr = NULL; 328 assert(error != 0); 329 return (error); 330 } 331 332 /* Read the rest of the PDU. */ 333 pdu->hdr = malloc(plen); 334 memcpy(pdu->hdr, &ch, sizeof(ch)); 335 error = nvmf_tcp_read_buffer(qp->s, pdu->hdr + 1, plen - sizeof(ch)); 336 if (error != 0) 337 return (error); 338 error = nvmf_tcp_validate_pdu(qp, pdu, plen); 339 if (error != 0) { 340 free(pdu->hdr); 341 pdu->hdr = NULL; 342 } 343 return (error); 344 } 345 346 static void 347 nvmf_tcp_free_pdu(struct nvmf_tcp_rxpdu *pdu) 348 { 349 free(pdu->hdr); 350 pdu->hdr = NULL; 351 } 352 353 static int 354 nvmf_tcp_handle_term_req(struct nvmf_tcp_rxpdu *pdu) 355 { 356 struct nvme_tcp_term_req_hdr *hdr; 357 358 hdr = (void *)pdu->hdr; 359 360 printf("NVMe/TCP: Received termination request: fes %#x fei %#x\n", 361 le16toh(hdr->fes), le32dec(hdr->fei)); 362 nvmf_tcp_free_pdu(pdu); 363 return (ECONNRESET); 364 } 365 366 static int 367 nvmf_tcp_save_command_capsule(struct nvmf_tcp_qpair *qp, 368 struct nvmf_tcp_rxpdu *pdu) 369 { 370 struct nvme_tcp_cmd *cmd; 371 struct nvmf_capsule *nc; 372 struct nvmf_tcp_capsule *tc; 373 374 cmd = (void *)pdu->hdr; 375 376 nc = nvmf_allocate_command(&qp->qp, &cmd->ccsqe); 377 if (nc == NULL) 378 return (ENOMEM); 379 380 tc = TCAP(nc); 381 tc->rx_pdu = *pdu; 382 383 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 384 return (0); 385 } 386 387 static int 388 nvmf_tcp_save_response_capsule(struct nvmf_tcp_qpair *qp, 389 struct nvmf_tcp_rxpdu *pdu) 390 { 391 struct nvme_tcp_rsp *rsp; 392 struct nvmf_capsule *nc; 393 struct nvmf_tcp_capsule *tc; 394 395 rsp = (void *)pdu->hdr; 396 397 nc = nvmf_allocate_response(&qp->qp, &rsp->rccqe); 398 if (nc == NULL) 399 return (ENOMEM); 400 401 nc->nc_sqhd_valid = true; 402 tc = TCAP(nc); 403 tc->rx_pdu = *pdu; 404 405 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 406 407 /* 408 * Once the CQE has been received, no further transfers to the 409 * command buffer for the associated CID can occur. 410 */ 411 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, true); 412 tcp_purge_command_buffer(qp, rsp->rccqe.cid, 0, false); 413 414 return (0); 415 } 416 417 /* 418 * Construct and send a PDU that contains an optional data payload. 419 * This includes dealing with digests and the length fields in the 420 * common header. 421 */ 422 static int 423 nvmf_tcp_construct_pdu(struct nvmf_tcp_qpair *qp, void *hdr, size_t hlen, 424 void *data, uint32_t data_len) 425 { 426 struct nvme_tcp_common_pdu_hdr *ch; 427 struct iovec iov[5]; 428 u_int iovcnt; 429 uint32_t header_digest, data_digest, pad, pdo, plen; 430 431 plen = hlen; 432 if (qp->header_digests) 433 plen += sizeof(header_digest); 434 if (data_len != 0) { 435 pdo = roundup2(plen, qp->txpda); 436 pad = pdo - plen; 437 plen = pdo + data_len; 438 if (qp->data_digests) 439 plen += sizeof(data_digest); 440 } else { 441 assert(data == NULL); 442 pdo = 0; 443 pad = 0; 444 } 445 446 ch = hdr; 447 ch->hlen = hlen; 448 if (qp->header_digests) 449 ch->flags |= NVME_TCP_CH_FLAGS_HDGSTF; 450 if (qp->data_digests && data_len != 0) 451 ch->flags |= NVME_TCP_CH_FLAGS_DDGSTF; 452 ch->pdo = pdo; 453 ch->plen = htole32(plen); 454 455 /* CH + PSH */ 456 iov[0].iov_base = hdr; 457 iov[0].iov_len = hlen; 458 iovcnt = 1; 459 460 /* HDGST */ 461 if (qp->header_digests) { 462 header_digest = compute_digest(hdr, hlen); 463 iov[iovcnt].iov_base = &header_digest; 464 iov[iovcnt].iov_len = sizeof(header_digest); 465 iovcnt++; 466 } 467 468 if (pad != 0) { 469 /* PAD */ 470 iov[iovcnt].iov_base = __DECONST(char *, zero_padding); 471 iov[iovcnt].iov_len = pad; 472 iovcnt++; 473 } 474 475 if (data_len != 0) { 476 /* DATA */ 477 iov[iovcnt].iov_base = data; 478 iov[iovcnt].iov_len = data_len; 479 iovcnt++; 480 481 /* DDGST */ 482 if (qp->data_digests) { 483 data_digest = compute_digest(data, data_len); 484 iov[iovcnt].iov_base = &data_digest; 485 iov[iovcnt].iov_len = sizeof(data_digest); 486 iovcnt++; 487 } 488 } 489 490 return (nvmf_tcp_write_pdu_iov(qp, iov, iovcnt, plen)); 491 } 492 493 static int 494 nvmf_tcp_handle_h2c_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 495 { 496 struct nvme_tcp_h2c_data_hdr *h2c; 497 struct nvmf_tcp_command_buffer *cb; 498 uint32_t data_len, data_offset; 499 const char *icd; 500 501 h2c = (void *)pdu->hdr; 502 if (le32toh(h2c->datal) > qp->maxh2cdata) { 503 nvmf_tcp_report_error(qp->qp.nq_association, qp, 504 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_LIMIT_EXCEEDED, 0, 505 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 506 nvmf_tcp_free_pdu(pdu); 507 return (EBADMSG); 508 } 509 510 cb = tcp_find_command_buffer(qp, h2c->cccid, h2c->ttag, true); 511 if (cb == NULL) { 512 nvmf_tcp_report_error(qp->qp.nq_association, qp, 513 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 514 offsetof(struct nvme_tcp_h2c_data_hdr, ttag), pdu->hdr, 515 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 516 nvmf_tcp_free_pdu(pdu); 517 return (EBADMSG); 518 } 519 520 data_len = le32toh(h2c->datal); 521 if (data_len != pdu->data_len) { 522 nvmf_tcp_report_error(qp->qp.nq_association, qp, 523 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 524 offsetof(struct nvme_tcp_h2c_data_hdr, datal), pdu->hdr, 525 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 526 nvmf_tcp_free_pdu(pdu); 527 return (EBADMSG); 528 } 529 530 data_offset = le32toh(h2c->datao); 531 if (data_offset < cb->data_offset || 532 data_offset + data_len > cb->data_offset + cb->data_len) { 533 nvmf_tcp_report_error(qp->qp.nq_association, qp, 534 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 535 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 536 nvmf_tcp_free_pdu(pdu); 537 return (EBADMSG); 538 } 539 540 if (data_offset != cb->data_offset + cb->data_xfered) { 541 nvmf_tcp_report_error(qp->qp.nq_association, qp, 542 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 543 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 544 nvmf_tcp_free_pdu(pdu); 545 return (EBADMSG); 546 } 547 548 if ((cb->data_xfered + data_len == cb->data_len) != 549 ((pdu->hdr->flags & NVME_TCP_H2C_DATA_FLAGS_LAST_PDU) != 0)) { 550 nvmf_tcp_report_error(qp->qp.nq_association, qp, 551 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 552 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 553 nvmf_tcp_free_pdu(pdu); 554 return (EBADMSG); 555 } 556 557 cb->data_xfered += data_len; 558 data_offset -= cb->data_offset; 559 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 560 memcpy((char *)cb->data + data_offset, icd, data_len); 561 562 nvmf_tcp_free_pdu(pdu); 563 return (0); 564 } 565 566 static int 567 nvmf_tcp_handle_c2h_data(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 568 { 569 struct nvme_tcp_c2h_data_hdr *c2h; 570 struct nvmf_tcp_command_buffer *cb; 571 uint32_t data_len, data_offset; 572 const char *icd; 573 574 c2h = (void *)pdu->hdr; 575 576 cb = tcp_find_command_buffer(qp, c2h->cccid, 0, true); 577 if (cb == NULL) { 578 /* 579 * XXX: Could be PDU sequence error if cccid is for a 580 * command that doesn't use a command buffer. 581 */ 582 nvmf_tcp_report_error(qp->qp.nq_association, qp, 583 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 584 offsetof(struct nvme_tcp_c2h_data_hdr, cccid), pdu->hdr, 585 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 586 nvmf_tcp_free_pdu(pdu); 587 return (EBADMSG); 588 } 589 590 data_len = le32toh(c2h->datal); 591 if (data_len != pdu->data_len) { 592 nvmf_tcp_report_error(qp->qp.nq_association, qp, 593 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 594 offsetof(struct nvme_tcp_c2h_data_hdr, datal), pdu->hdr, 595 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 596 nvmf_tcp_free_pdu(pdu); 597 return (EBADMSG); 598 } 599 600 data_offset = le32toh(c2h->datao); 601 if (data_offset < cb->data_offset || 602 data_offset + data_len > cb->data_offset + cb->data_len) { 603 nvmf_tcp_report_error(qp->qp.nq_association, qp, 604 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 605 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 606 nvmf_tcp_free_pdu(pdu); 607 return (EBADMSG); 608 } 609 610 if (data_offset != cb->data_offset + cb->data_xfered) { 611 nvmf_tcp_report_error(qp->qp.nq_association, qp, 612 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 613 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 614 nvmf_tcp_free_pdu(pdu); 615 return (EBADMSG); 616 } 617 618 if ((cb->data_xfered + data_len == cb->data_len) != 619 ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_LAST_PDU) != 0)) { 620 nvmf_tcp_report_error(qp->qp.nq_association, qp, 621 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 622 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 623 nvmf_tcp_free_pdu(pdu); 624 return (EBADMSG); 625 } 626 627 cb->data_xfered += data_len; 628 data_offset -= cb->data_offset; 629 icd = (const char *)pdu->hdr + pdu->hdr->pdo; 630 memcpy((char *)cb->data + data_offset, icd, data_len); 631 632 if ((pdu->hdr->flags & NVME_TCP_C2H_DATA_FLAGS_SUCCESS) != 0) { 633 struct nvme_completion cqe; 634 struct nvmf_tcp_capsule *tc; 635 struct nvmf_capsule *nc; 636 637 memset(&cqe, 0, sizeof(cqe)); 638 cqe.cid = cb->cid; 639 640 nc = nvmf_allocate_response(&qp->qp, &cqe); 641 if (nc == NULL) { 642 nvmf_tcp_free_pdu(pdu); 643 return (ENOMEM); 644 } 645 nc->nc_sqhd_valid = false; 646 647 tc = TCAP(nc); 648 TAILQ_INSERT_TAIL(&qp->rx_capsules, tc, link); 649 } 650 651 nvmf_tcp_free_pdu(pdu); 652 return (0); 653 } 654 655 /* NB: cid and ttag and little-endian already. */ 656 static int 657 tcp_send_h2c_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 658 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 659 { 660 struct nvme_tcp_h2c_data_hdr h2c; 661 662 memset(&h2c, 0, sizeof(h2c)); 663 h2c.common.pdu_type = NVME_TCP_PDU_TYPE_H2C_DATA; 664 if (last_pdu) 665 h2c.common.flags |= NVME_TCP_H2C_DATA_FLAGS_LAST_PDU; 666 h2c.cccid = cid; 667 h2c.ttag = ttag; 668 h2c.datao = htole32(data_offset); 669 h2c.datal = htole32(len); 670 671 return (nvmf_tcp_construct_pdu(qp, &h2c, sizeof(h2c), buf, len)); 672 } 673 674 /* Sends one or more H2C_DATA PDUs, subject to MAXH2CDATA. */ 675 static int 676 tcp_send_h2c_pdus(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 677 uint32_t data_offset, void *buf, size_t len, bool last_pdu) 678 { 679 char *p; 680 681 p = buf; 682 while (len != 0) { 683 size_t todo; 684 int error; 685 686 todo = len; 687 if (todo > qp->maxh2cdata) 688 todo = qp->maxh2cdata; 689 error = tcp_send_h2c_pdu(qp, cid, ttag, data_offset, p, todo, 690 last_pdu && todo == len); 691 if (error != 0) 692 return (error); 693 p += todo; 694 len -= todo; 695 } 696 return (0); 697 } 698 699 static int 700 nvmf_tcp_handle_r2t(struct nvmf_tcp_qpair *qp, struct nvmf_tcp_rxpdu *pdu) 701 { 702 struct nvmf_tcp_command_buffer *cb; 703 struct nvme_tcp_r2t_hdr *r2t; 704 uint32_t data_len, data_offset; 705 int error; 706 707 r2t = (void *)pdu->hdr; 708 709 cb = tcp_find_command_buffer(qp, r2t->cccid, 0, false); 710 if (cb == NULL) { 711 nvmf_tcp_report_error(qp->qp.nq_association, qp, 712 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 713 offsetof(struct nvme_tcp_r2t_hdr, cccid), pdu->hdr, 714 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 715 nvmf_tcp_free_pdu(pdu); 716 return (EBADMSG); 717 } 718 719 data_offset = le32toh(r2t->r2to); 720 if (data_offset != cb->data_xfered) { 721 nvmf_tcp_report_error(qp->qp.nq_association, qp, 722 NVME_TCP_TERM_REQ_FES_PDU_SEQUENCE_ERROR, 0, pdu->hdr, 723 le32toh(pdu->hdr->plen), pdu->hdr->hlen); 724 nvmf_tcp_free_pdu(pdu); 725 return (EBADMSG); 726 } 727 728 /* 729 * XXX: The spec does not specify how to handle R2T tranfers 730 * out of range of the original command. 731 */ 732 data_len = le32toh(r2t->r2tl); 733 if (data_offset + data_len > cb->data_len) { 734 nvmf_tcp_report_error(qp->qp.nq_association, qp, 735 NVME_TCP_TERM_REQ_FES_DATA_TRANSFER_OUT_OF_RANGE, 0, 736 pdu->hdr, le32toh(pdu->hdr->plen), pdu->hdr->hlen); 737 nvmf_tcp_free_pdu(pdu); 738 return (EBADMSG); 739 } 740 741 cb->data_xfered += data_len; 742 743 /* 744 * Write out one or more H2C_DATA PDUs containing the 745 * requested data. 746 */ 747 error = tcp_send_h2c_pdus(qp, r2t->cccid, r2t->ttag, 748 data_offset, (char *)cb->data + data_offset, data_len, true); 749 750 nvmf_tcp_free_pdu(pdu); 751 return (error); 752 } 753 754 static int 755 nvmf_tcp_receive_pdu(struct nvmf_tcp_qpair *qp) 756 { 757 struct nvmf_tcp_rxpdu pdu; 758 int error; 759 760 error = nvmf_tcp_read_pdu(qp, &pdu); 761 if (error != 0) 762 return (error); 763 764 switch (pdu.hdr->pdu_type) { 765 default: 766 __unreachable(); 767 break; 768 case NVME_TCP_PDU_TYPE_H2C_TERM_REQ: 769 case NVME_TCP_PDU_TYPE_C2H_TERM_REQ: 770 return (nvmf_tcp_handle_term_req(&pdu)); 771 case NVME_TCP_PDU_TYPE_CAPSULE_CMD: 772 return (nvmf_tcp_save_command_capsule(qp, &pdu)); 773 case NVME_TCP_PDU_TYPE_CAPSULE_RESP: 774 return (nvmf_tcp_save_response_capsule(qp, &pdu)); 775 case NVME_TCP_PDU_TYPE_H2C_DATA: 776 return (nvmf_tcp_handle_h2c_data(qp, &pdu)); 777 case NVME_TCP_PDU_TYPE_C2H_DATA: 778 return (nvmf_tcp_handle_c2h_data(qp, &pdu)); 779 case NVME_TCP_PDU_TYPE_R2T: 780 return (nvmf_tcp_handle_r2t(qp, &pdu)); 781 } 782 } 783 784 static bool 785 nvmf_tcp_validate_ic_pdu(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 786 const struct nvme_tcp_common_pdu_hdr *ch, size_t pdu_len) 787 { 788 const struct nvme_tcp_ic_req *pdu; 789 uint32_t plen; 790 u_int hlen; 791 792 /* Determine how large of a PDU header to return for errors. */ 793 hlen = ch->hlen; 794 plen = le32toh(ch->plen); 795 if (hlen < sizeof(*ch) || hlen > plen) 796 hlen = sizeof(*ch); 797 798 /* 799 * Errors must be reported for the lowest incorrect field 800 * first, so validate fields in order. 801 */ 802 803 /* Validate pdu_type. */ 804 805 /* Controllers only receive PDUs with a PDU direction of 0. */ 806 if (na->na_controller != ((ch->pdu_type & 0x01) == 0)) { 807 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 808 nvmf_tcp_report_error(na, qp, 809 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 810 hlen); 811 return (false); 812 } 813 814 switch (ch->pdu_type) { 815 case NVME_TCP_PDU_TYPE_IC_REQ: 816 case NVME_TCP_PDU_TYPE_IC_RESP: 817 break; 818 default: 819 na_error(na, "NVMe/TCP: Invalid PDU type %u", ch->pdu_type); 820 nvmf_tcp_report_error(na, qp, 821 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 0, ch, pdu_len, 822 hlen); 823 return (false); 824 } 825 826 /* Validate flags. */ 827 if (ch->flags != 0) { 828 na_error(na, "NVMe/TCP: Invalid PDU header flags %#x", 829 ch->flags); 830 nvmf_tcp_report_error(na, qp, 831 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 1, ch, pdu_len, 832 hlen); 833 return (false); 834 } 835 836 /* Validate hlen. */ 837 if (ch->hlen != 128) { 838 na_error(na, "NVMe/TCP: Invalid PDU header length %u", 839 ch->hlen); 840 nvmf_tcp_report_error(na, qp, 841 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 2, ch, pdu_len, 842 hlen); 843 return (false); 844 } 845 846 /* Validate pdo. */ 847 if (ch->pdo != 0) { 848 na_error(na, "NVMe/TCP: Invalid PDU data offset %u", ch->pdo); 849 nvmf_tcp_report_error(na, qp, 850 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 3, ch, pdu_len, 851 hlen); 852 return (false); 853 } 854 855 /* Validate plen. */ 856 if (plen != 128) { 857 na_error(na, "NVMe/TCP: Invalid PDU length %u", plen); 858 nvmf_tcp_report_error(na, qp, 859 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 4, ch, pdu_len, 860 hlen); 861 return (false); 862 } 863 864 /* Validate fields common to both ICReq and ICResp. */ 865 pdu = (const struct nvme_tcp_ic_req *)ch; 866 if (le16toh(pdu->pfv) != 0) { 867 na_error(na, "NVMe/TCP: Unsupported PDU version %u", 868 le16toh(pdu->pfv)); 869 nvmf_tcp_report_error(na, qp, 870 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 871 8, ch, pdu_len, hlen); 872 return (false); 873 } 874 875 if (pdu->hpda > NVME_TCP_HPDA_MAX) { 876 na_error(na, "NVMe/TCP: Unsupported PDA %u", pdu->hpda); 877 nvmf_tcp_report_error(na, qp, 878 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 10, ch, pdu_len, 879 hlen); 880 return (false); 881 } 882 883 if (pdu->dgst.bits.reserved != 0) { 884 na_error(na, "NVMe/TCP: Invalid digest settings"); 885 nvmf_tcp_report_error(na, qp, 886 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 11, ch, pdu_len, 887 hlen); 888 return (false); 889 } 890 891 return (true); 892 } 893 894 static bool 895 nvmf_tcp_read_ic_req(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 896 struct nvme_tcp_ic_req *pdu) 897 { 898 int error; 899 900 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 901 if (error != 0) { 902 na_error(na, "NVMe/TCP: Failed to read IC request: %s", 903 strerror(error)); 904 return (false); 905 } 906 907 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 908 } 909 910 static bool 911 nvmf_tcp_read_ic_resp(struct nvmf_association *na, struct nvmf_tcp_qpair *qp, 912 struct nvme_tcp_ic_resp *pdu) 913 { 914 int error; 915 916 error = nvmf_tcp_read_buffer(qp->s, pdu, sizeof(*pdu)); 917 if (error != 0) { 918 na_error(na, "NVMe/TCP: Failed to read IC response: %s", 919 strerror(error)); 920 return (false); 921 } 922 923 return (nvmf_tcp_validate_ic_pdu(na, qp, &pdu->common, sizeof(*pdu))); 924 } 925 926 static struct nvmf_association * 927 tcp_allocate_association(bool controller __unused, 928 const struct nvmf_association_params *params __unused) 929 { 930 struct nvmf_tcp_association *ta; 931 932 ta = calloc(1, sizeof(*ta)); 933 934 return (&ta->na); 935 } 936 937 static void 938 tcp_update_association(struct nvmf_association *na, 939 const struct nvme_controller_data *cdata) 940 { 941 struct nvmf_tcp_association *ta = TASSOC(na); 942 943 ta->ioccsz = le32toh(cdata->ioccsz); 944 } 945 946 static void 947 tcp_free_association(struct nvmf_association *na) 948 { 949 free(na); 950 } 951 952 static bool 953 tcp_connect(struct nvmf_tcp_qpair *qp, struct nvmf_association *na, bool admin) 954 { 955 const struct nvmf_association_params *params = &na->na_params; 956 struct nvmf_tcp_association *ta = TASSOC(na); 957 struct nvme_tcp_ic_req ic_req; 958 struct nvme_tcp_ic_resp ic_resp; 959 int error; 960 961 if (!admin) { 962 if (ta->ioccsz == 0) { 963 na_error(na, "TCP I/O queues require cdata"); 964 return (false); 965 } 966 if (ta->ioccsz < 4) { 967 na_error(na, "Invalid IOCCSZ %u", ta->ioccsz); 968 return (false); 969 } 970 } 971 972 memset(&ic_req, 0, sizeof(ic_req)); 973 ic_req.common.pdu_type = NVME_TCP_PDU_TYPE_IC_REQ; 974 ic_req.common.hlen = sizeof(ic_req); 975 ic_req.common.plen = htole32(sizeof(ic_req)); 976 ic_req.pfv = htole16(0); 977 ic_req.hpda = params->tcp.pda; 978 if (params->tcp.header_digests) 979 ic_req.dgst.bits.hdgst_enable = 1; 980 if (params->tcp.data_digests) 981 ic_req.dgst.bits.ddgst_enable = 1; 982 ic_req.maxr2t = htole32(params->tcp.maxr2t); 983 984 error = nvmf_tcp_write_pdu(qp, &ic_req, sizeof(ic_req)); 985 if (error != 0) { 986 na_error(na, "Failed to write IC request: %s", strerror(error)); 987 return (false); 988 } 989 990 if (!nvmf_tcp_read_ic_resp(na, qp, &ic_resp)) 991 return (false); 992 993 /* Ensure the controller didn't enable digests we didn't request. */ 994 if ((!params->tcp.header_digests && 995 ic_resp.dgst.bits.hdgst_enable != 0) || 996 (!params->tcp.data_digests && 997 ic_resp.dgst.bits.ddgst_enable != 0)) { 998 na_error(na, "Controller enabled unrequested digests"); 999 nvmf_tcp_report_error(na, qp, 1000 NVME_TCP_TERM_REQ_FES_INVALID_DATA_UNSUPPORTED_PARAMETER, 1001 11, &ic_resp, sizeof(ic_resp), sizeof(ic_resp)); 1002 return (false); 1003 } 1004 1005 /* 1006 * XXX: Is there an upper-bound to enforce here? Perhaps pick 1007 * some large value and report larger values as an unsupported 1008 * parameter? 1009 */ 1010 if (le32toh(ic_resp.maxh2cdata) < 4096) { 1011 na_error(na, "Invalid MAXH2CDATA %u", 1012 le32toh(ic_resp.maxh2cdata)); 1013 nvmf_tcp_report_error(na, qp, 1014 NVME_TCP_TERM_REQ_FES_INVALID_HEADER_FIELD, 12, &ic_resp, 1015 sizeof(ic_resp), sizeof(ic_resp)); 1016 return (false); 1017 } 1018 1019 qp->txpda = (params->tcp.pda + 1) * 4; 1020 qp->rxpda = (ic_resp.cpda + 1) * 4; 1021 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1022 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1023 qp->maxr2t = params->tcp.maxr2t; 1024 qp->maxh2cdata = le32toh(ic_resp.maxh2cdata); 1025 if (admin) 1026 /* 7.4.3 */ 1027 qp->max_icd = 8192; 1028 else 1029 qp->max_icd = (ta->ioccsz - 4) * 16; 1030 1031 return (0); 1032 } 1033 1034 static bool 1035 tcp_accept(struct nvmf_tcp_qpair *qp, struct nvmf_association *na) 1036 { 1037 const struct nvmf_association_params *params = &na->na_params; 1038 struct nvme_tcp_ic_req ic_req; 1039 struct nvme_tcp_ic_resp ic_resp; 1040 int error; 1041 1042 if (!nvmf_tcp_read_ic_req(na, qp, &ic_req)) 1043 return (false); 1044 1045 memset(&ic_resp, 0, sizeof(ic_resp)); 1046 ic_resp.common.pdu_type = NVME_TCP_PDU_TYPE_IC_RESP; 1047 ic_resp.common.hlen = sizeof(ic_req); 1048 ic_resp.common.plen = htole32(sizeof(ic_req)); 1049 ic_resp.pfv = htole16(0); 1050 ic_resp.cpda = params->tcp.pda; 1051 if (params->tcp.header_digests && ic_req.dgst.bits.hdgst_enable != 0) 1052 ic_resp.dgst.bits.hdgst_enable = 1; 1053 if (params->tcp.data_digests && ic_req.dgst.bits.ddgst_enable != 0) 1054 ic_resp.dgst.bits.ddgst_enable = 1; 1055 ic_resp.maxh2cdata = htole32(params->tcp.maxh2cdata); 1056 1057 error = nvmf_tcp_write_pdu(qp, &ic_resp, sizeof(ic_resp)); 1058 if (error != 0) { 1059 na_error(na, "Failed to write IC response: %s", 1060 strerror(error)); 1061 return (false); 1062 } 1063 1064 qp->txpda = (params->tcp.pda + 1) * 4; 1065 qp->rxpda = (ic_req.hpda + 1) * 4; 1066 qp->header_digests = ic_resp.dgst.bits.hdgst_enable != 0; 1067 qp->data_digests = ic_resp.dgst.bits.ddgst_enable != 0; 1068 qp->maxr2t = le32toh(ic_req.maxr2t); 1069 qp->maxh2cdata = params->tcp.maxh2cdata; 1070 qp->max_icd = 0; /* XXX */ 1071 return (0); 1072 } 1073 1074 static struct nvmf_qpair * 1075 tcp_allocate_qpair(struct nvmf_association *na, 1076 const struct nvmf_qpair_params *qparams) 1077 { 1078 const struct nvmf_association_params *aparams = &na->na_params; 1079 struct nvmf_tcp_qpair *qp; 1080 int error; 1081 1082 if (aparams->tcp.pda > NVME_TCP_CPDA_MAX) { 1083 na_error(na, "Invalid PDA"); 1084 return (NULL); 1085 } 1086 1087 qp = calloc(1, sizeof(*qp)); 1088 qp->s = qparams->tcp.fd; 1089 LIST_INIT(&qp->rx_buffers); 1090 LIST_INIT(&qp->tx_buffers); 1091 TAILQ_INIT(&qp->rx_capsules); 1092 if (na->na_controller) 1093 error = tcp_accept(qp, na); 1094 else 1095 error = tcp_connect(qp, na, qparams->admin); 1096 if (error != 0) { 1097 free(qp); 1098 return (NULL); 1099 } 1100 1101 return (&qp->qp); 1102 } 1103 1104 static void 1105 tcp_free_qpair(struct nvmf_qpair *nq) 1106 { 1107 struct nvmf_tcp_qpair *qp = TQP(nq); 1108 struct nvmf_tcp_capsule *ntc, *tc; 1109 struct nvmf_tcp_command_buffer *ncb, *cb; 1110 1111 TAILQ_FOREACH_SAFE(tc, &qp->rx_capsules, link, ntc) { 1112 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1113 nvmf_free_capsule(&tc->nc); 1114 } 1115 LIST_FOREACH_SAFE(cb, &qp->rx_buffers, link, ncb) { 1116 tcp_free_command_buffer(cb); 1117 } 1118 LIST_FOREACH_SAFE(cb, &qp->tx_buffers, link, ncb) { 1119 tcp_free_command_buffer(cb); 1120 } 1121 free(qp); 1122 } 1123 1124 static int 1125 tcp_kernel_handoff_params(struct nvmf_qpair *nq, 1126 struct nvmf_handoff_qpair_params *qparams) 1127 { 1128 struct nvmf_tcp_qpair *qp = TQP(nq); 1129 1130 qparams->tcp.fd = qp->s; 1131 qparams->tcp.rxpda = qp->rxpda; 1132 qparams->tcp.txpda = qp->txpda; 1133 qparams->tcp.header_digests = qp->header_digests; 1134 qparams->tcp.data_digests = qp->data_digests; 1135 qparams->tcp.maxr2t = qp->maxr2t; 1136 qparams->tcp.maxh2cdata = qp->maxh2cdata; 1137 qparams->tcp.max_icd = qp->max_icd; 1138 1139 return (0); 1140 } 1141 1142 static struct nvmf_capsule * 1143 tcp_allocate_capsule(struct nvmf_qpair *qp __unused) 1144 { 1145 struct nvmf_tcp_capsule *nc; 1146 1147 nc = calloc(1, sizeof(*nc)); 1148 return (&nc->nc); 1149 } 1150 1151 static void 1152 tcp_free_capsule(struct nvmf_capsule *nc) 1153 { 1154 struct nvmf_tcp_capsule *tc = TCAP(nc); 1155 1156 nvmf_tcp_free_pdu(&tc->rx_pdu); 1157 if (tc->cb != NULL) 1158 tcp_free_command_buffer(tc->cb); 1159 free(tc); 1160 } 1161 1162 static int 1163 tcp_transmit_command(struct nvmf_capsule *nc) 1164 { 1165 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1166 struct nvmf_tcp_capsule *tc = TCAP(nc); 1167 struct nvme_tcp_cmd cmd; 1168 struct nvme_sgl_descriptor *sgl; 1169 int error; 1170 bool use_icd; 1171 1172 use_icd = false; 1173 if (nc->nc_data_len != 0 && nc->nc_send_data && 1174 nc->nc_data_len <= qp->max_icd) 1175 use_icd = true; 1176 1177 memset(&cmd, 0, sizeof(cmd)); 1178 cmd.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_CMD; 1179 cmd.ccsqe = nc->nc_sqe; 1180 1181 /* Populate SGL in SQE. */ 1182 sgl = &cmd.ccsqe.sgl; 1183 memset(sgl, 0, sizeof(*sgl)); 1184 sgl->address = 0; 1185 sgl->length = htole32(nc->nc_data_len); 1186 if (use_icd) { 1187 /* Use in-capsule data. */ 1188 sgl->type = NVME_SGL_TYPE_ICD; 1189 } else { 1190 /* Use a command buffer. */ 1191 sgl->type = NVME_SGL_TYPE_COMMAND_BUFFER; 1192 } 1193 1194 /* Send command capsule. */ 1195 error = nvmf_tcp_construct_pdu(qp, &cmd, sizeof(cmd), use_icd ? 1196 nc->nc_data : NULL, use_icd ? nc->nc_data_len : 0); 1197 if (error != 0) 1198 return (error); 1199 1200 /* 1201 * If data will be transferred using a command buffer, allocate a 1202 * buffer structure and queue it. 1203 */ 1204 if (nc->nc_data_len != 0 && !use_icd) 1205 tc->cb = tcp_alloc_command_buffer(qp, nc->nc_data, 0, 1206 nc->nc_data_len, cmd.ccsqe.cid, 0, !nc->nc_send_data); 1207 1208 return (0); 1209 } 1210 1211 static int 1212 tcp_transmit_response(struct nvmf_capsule *nc) 1213 { 1214 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1215 struct nvme_tcp_rsp rsp; 1216 1217 memset(&rsp, 0, sizeof(rsp)); 1218 rsp.common.pdu_type = NVME_TCP_PDU_TYPE_CAPSULE_RESP; 1219 rsp.rccqe = nc->nc_cqe; 1220 1221 return (nvmf_tcp_construct_pdu(qp, &rsp, sizeof(rsp), NULL, 0)); 1222 } 1223 1224 static int 1225 tcp_transmit_capsule(struct nvmf_capsule *nc) 1226 { 1227 if (nc->nc_qe_len == sizeof(struct nvme_command)) 1228 return (tcp_transmit_command(nc)); 1229 else 1230 return (tcp_transmit_response(nc)); 1231 } 1232 1233 static int 1234 tcp_receive_capsule(struct nvmf_qpair *nq, struct nvmf_capsule **ncp) 1235 { 1236 struct nvmf_tcp_qpair *qp = TQP(nq); 1237 struct nvmf_tcp_capsule *tc; 1238 int error; 1239 1240 while (TAILQ_EMPTY(&qp->rx_capsules)) { 1241 error = nvmf_tcp_receive_pdu(qp); 1242 if (error != 0) 1243 return (error); 1244 } 1245 tc = TAILQ_FIRST(&qp->rx_capsules); 1246 TAILQ_REMOVE(&qp->rx_capsules, tc, link); 1247 *ncp = &tc->nc; 1248 return (0); 1249 } 1250 1251 static uint8_t 1252 tcp_validate_command_capsule(const struct nvmf_capsule *nc) 1253 { 1254 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1255 const struct nvme_sgl_descriptor *sgl; 1256 1257 assert(tc->rx_pdu.hdr != NULL); 1258 1259 sgl = &nc->nc_sqe.sgl; 1260 switch (sgl->type) { 1261 case NVME_SGL_TYPE_ICD: 1262 if (tc->rx_pdu.data_len != le32toh(sgl->length)) { 1263 printf("NVMe/TCP: Command Capsule with mismatched ICD length\n"); 1264 return (NVME_SC_DATA_SGL_LENGTH_INVALID); 1265 } 1266 break; 1267 case NVME_SGL_TYPE_COMMAND_BUFFER: 1268 if (tc->rx_pdu.data_len != 0) { 1269 printf("NVMe/TCP: Command Buffer SGL with ICD\n"); 1270 return (NVME_SC_INVALID_FIELD); 1271 } 1272 break; 1273 default: 1274 printf("NVMe/TCP: Invalid SGL type in Command Capsule\n"); 1275 return (NVME_SC_SGL_DESCRIPTOR_TYPE_INVALID); 1276 } 1277 1278 if (sgl->address != 0) { 1279 printf("NVMe/TCP: Invalid SGL offset in Command Capsule\n"); 1280 return (NVME_SC_SGL_OFFSET_INVALID); 1281 } 1282 1283 return (NVME_SC_SUCCESS); 1284 } 1285 1286 static size_t 1287 tcp_capsule_data_len(const struct nvmf_capsule *nc) 1288 { 1289 assert(nc->nc_qe_len == sizeof(struct nvme_command)); 1290 return (le32toh(nc->nc_sqe.sgl.length)); 1291 } 1292 1293 /* NB: cid and ttag are both little-endian already. */ 1294 static int 1295 tcp_send_r2t(struct nvmf_tcp_qpair *qp, uint16_t cid, uint16_t ttag, 1296 uint32_t data_offset, uint32_t data_len) 1297 { 1298 struct nvme_tcp_r2t_hdr r2t; 1299 1300 memset(&r2t, 0, sizeof(r2t)); 1301 r2t.common.pdu_type = NVME_TCP_PDU_TYPE_R2T; 1302 r2t.cccid = cid; 1303 r2t.ttag = ttag; 1304 r2t.r2to = htole32(data_offset); 1305 r2t.r2tl = htole32(data_len); 1306 1307 return (nvmf_tcp_construct_pdu(qp, &r2t, sizeof(r2t), NULL, 0)); 1308 } 1309 1310 static int 1311 tcp_receive_r2t_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1312 void *buf, size_t len) 1313 { 1314 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1315 struct nvmf_tcp_command_buffer *cb; 1316 int error; 1317 uint16_t ttag; 1318 1319 /* 1320 * Don't bother byte-swapping ttag as it is just a cookie 1321 * value returned by the other end as-is. 1322 */ 1323 ttag = qp->next_ttag++; 1324 1325 error = tcp_send_r2t(qp, nc->nc_sqe.cid, ttag, data_offset, len); 1326 if (error != 0) 1327 return (error); 1328 1329 cb = tcp_alloc_command_buffer(qp, buf, data_offset, len, 1330 nc->nc_sqe.cid, ttag, true); 1331 1332 /* Parse received PDUs until the data transfer is complete. */ 1333 while (cb->data_xfered < cb->data_len) { 1334 error = nvmf_tcp_receive_pdu(qp); 1335 if (error != 0) 1336 break; 1337 } 1338 tcp_free_command_buffer(cb); 1339 return (error); 1340 } 1341 1342 static int 1343 tcp_receive_icd_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1344 void *buf, size_t len) 1345 { 1346 const struct nvmf_tcp_capsule *tc = CTCAP(nc); 1347 const char *icd; 1348 1349 icd = (const char *)tc->rx_pdu.hdr + tc->rx_pdu.hdr->pdo + data_offset; 1350 memcpy(buf, icd, len); 1351 return (0); 1352 } 1353 1354 static int 1355 tcp_receive_controller_data(const struct nvmf_capsule *nc, uint32_t data_offset, 1356 void *buf, size_t len) 1357 { 1358 struct nvmf_association *na = nc->nc_qpair->nq_association; 1359 const struct nvme_sgl_descriptor *sgl; 1360 size_t data_len; 1361 1362 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1363 return (EINVAL); 1364 1365 sgl = &nc->nc_sqe.sgl; 1366 data_len = le32toh(sgl->length); 1367 if (data_offset + len > data_len) 1368 return (EFBIG); 1369 1370 if (sgl->type == NVME_SGL_TYPE_ICD) 1371 return (tcp_receive_icd_data(nc, data_offset, buf, len)); 1372 else 1373 return (tcp_receive_r2t_data(nc, data_offset, buf, len)); 1374 } 1375 1376 /* NB: cid is little-endian already. */ 1377 static int 1378 tcp_send_c2h_pdu(struct nvmf_tcp_qpair *qp, uint16_t cid, 1379 uint32_t data_offset, const void *buf, size_t len, bool last_pdu, 1380 bool success) 1381 { 1382 struct nvme_tcp_c2h_data_hdr c2h; 1383 1384 memset(&c2h, 0, sizeof(c2h)); 1385 c2h.common.pdu_type = NVME_TCP_PDU_TYPE_C2H_DATA; 1386 if (last_pdu) 1387 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_LAST_PDU; 1388 if (success) 1389 c2h.common.flags |= NVME_TCP_C2H_DATA_FLAGS_SUCCESS; 1390 c2h.cccid = cid; 1391 c2h.datao = htole32(data_offset); 1392 c2h.datal = htole32(len); 1393 1394 return (nvmf_tcp_construct_pdu(qp, &c2h, sizeof(c2h), 1395 __DECONST(void *, buf), len)); 1396 } 1397 1398 static int 1399 tcp_send_controller_data(const struct nvmf_capsule *nc, const void *buf, 1400 size_t len) 1401 { 1402 struct nvmf_association *na = nc->nc_qpair->nq_association; 1403 struct nvmf_tcp_qpair *qp = TQP(nc->nc_qpair); 1404 const struct nvme_sgl_descriptor *sgl; 1405 const char *src; 1406 size_t todo; 1407 uint32_t data_len, data_offset; 1408 int error; 1409 bool last_pdu, send_success_flag; 1410 1411 if (nc->nc_qe_len != sizeof(struct nvme_command) || !na->na_controller) 1412 return (EINVAL); 1413 1414 sgl = &nc->nc_sqe.sgl; 1415 data_len = le32toh(sgl->length); 1416 if (len != data_len) { 1417 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1418 return (EFBIG); 1419 } 1420 1421 if (sgl->type != NVME_SGL_TYPE_COMMAND_BUFFER) { 1422 nvmf_send_generic_error(nc, NVME_SC_INVALID_FIELD); 1423 return (EINVAL); 1424 } 1425 1426 /* Use the SUCCESS flag if SQ flow control is disabled. */ 1427 send_success_flag = !qp->qp.nq_flow_control; 1428 1429 /* 1430 * Write out one or more C2H_DATA PDUs containing the data. 1431 * Each PDU is arbitrarily capped at 256k. 1432 */ 1433 data_offset = 0; 1434 src = buf; 1435 while (len > 0) { 1436 if (len > 256 * 1024) { 1437 todo = 256 * 1024; 1438 last_pdu = false; 1439 } else { 1440 todo = len; 1441 last_pdu = true; 1442 } 1443 error = tcp_send_c2h_pdu(qp, nc->nc_sqe.cid, data_offset, 1444 src, todo, last_pdu, last_pdu && send_success_flag); 1445 if (error != 0) { 1446 nvmf_send_generic_error(nc, 1447 NVME_SC_TRANSIENT_TRANSPORT_ERROR); 1448 return (error); 1449 } 1450 data_offset += todo; 1451 src += todo; 1452 len -= todo; 1453 } 1454 if (!send_success_flag) 1455 nvmf_send_success(nc); 1456 return (0); 1457 } 1458 1459 struct nvmf_transport_ops tcp_ops = { 1460 .allocate_association = tcp_allocate_association, 1461 .update_association = tcp_update_association, 1462 .free_association = tcp_free_association, 1463 .allocate_qpair = tcp_allocate_qpair, 1464 .free_qpair = tcp_free_qpair, 1465 .kernel_handoff_params = tcp_kernel_handoff_params, 1466 .allocate_capsule = tcp_allocate_capsule, 1467 .free_capsule = tcp_free_capsule, 1468 .transmit_capsule = tcp_transmit_capsule, 1469 .receive_capsule = tcp_receive_capsule, 1470 .validate_command_capsule = tcp_validate_command_capsule, 1471 .capsule_data_len = tcp_capsule_data_len, 1472 .receive_controller_data = tcp_receive_controller_data, 1473 .send_controller_data = tcp_send_controller_data, 1474 }; 1475