rdma.c (d92f79a55232405d66ca343294f873cefd04ddd7) rdma.c (8094ba0ace7f6cd1e31ea8b151fba3594cadfa9a)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVMe over Fabrics RDMA target.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/atomic.h>
8#include <linux/ctype.h>

--- 6 unchanged lines hidden (view full) ---

15#include <linux/string.h>
16#include <linux/wait.h>
17#include <linux/inet.h>
18#include <asm/unaligned.h>
19
20#include <rdma/ib_verbs.h>
21#include <rdma/rdma_cm.h>
22#include <rdma/rw.h>
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVMe over Fabrics RDMA target.
4 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
5 */
6#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7#include <linux/atomic.h>
8#include <linux/ctype.h>

--- 6 unchanged lines hidden (view full) ---

15#include <linux/string.h>
16#include <linux/wait.h>
17#include <linux/inet.h>
18#include <asm/unaligned.h>
19
20#include <rdma/ib_verbs.h>
21#include <rdma/rdma_cm.h>
22#include <rdma/rw.h>
23#include <rdma/ib_cm.h>
23
24#include <linux/nvme-rdma.h>
25#include "nvmet.h"
26
27/*
28 * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data
29 */
30#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE
31#define NVMET_RDMA_MAX_INLINE_SGE 4
32#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
33
34/* Assume mpsmin == device_page_size == 4KB */
35#define NVMET_RDMA_MAX_MDTS 8
24
25#include <linux/nvme-rdma.h>
26#include "nvmet.h"
27
28/*
29 * We allow at least 1 page, up to 4 SGEs, and up to 16KB of inline data
30 */
31#define NVMET_RDMA_DEFAULT_INLINE_DATA_SIZE PAGE_SIZE
32#define NVMET_RDMA_MAX_INLINE_SGE 4
33#define NVMET_RDMA_MAX_INLINE_DATA_SIZE max_t(int, SZ_16K, PAGE_SIZE)
34
35/* Assume mpsmin == device_page_size == 4KB */
36#define NVMET_RDMA_MAX_MDTS 8
36#define NVMET_RDMA_MAX_METADATA_MDTS 5
37
37
38struct nvmet_rdma_srq;
39
40struct nvmet_rdma_cmd {
41 struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
42 struct ib_cqe cqe;
43 struct ib_recv_wr wr;
44 struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
45 struct nvme_command *nvme_cmd;
46 struct nvmet_rdma_queue *queue;
38struct nvmet_rdma_cmd {
39 struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1];
40 struct ib_cqe cqe;
41 struct ib_recv_wr wr;
42 struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE];
43 struct nvme_command *nvme_cmd;
44 struct nvmet_rdma_queue *queue;
47 struct nvmet_rdma_srq *nsrq;
48};
49
50enum {
51 NVMET_RDMA_REQ_INLINE_DATA = (1 << 0),
52 NVMET_RDMA_REQ_INVALIDATE_RKEY = (1 << 1),
53};
54
55struct nvmet_rdma_rsp {
56 struct ib_sge send_sge;
57 struct ib_cqe send_cqe;
58 struct ib_send_wr send_wr;
59
60 struct nvmet_rdma_cmd *cmd;
61 struct nvmet_rdma_queue *queue;
62
63 struct ib_cqe read_cqe;
45};
46
47enum {
48 NVMET_RDMA_REQ_INLINE_DATA = (1 << 0),
49 NVMET_RDMA_REQ_INVALIDATE_RKEY = (1 << 1),
50};
51
52struct nvmet_rdma_rsp {
53 struct ib_sge send_sge;
54 struct ib_cqe send_cqe;
55 struct ib_send_wr send_wr;
56
57 struct nvmet_rdma_cmd *cmd;
58 struct nvmet_rdma_queue *queue;
59
60 struct ib_cqe read_cqe;
64 struct ib_cqe write_cqe;
65 struct rdma_rw_ctx rw;
66
67 struct nvmet_req req;
68
69 bool allocated;
70 u8 n_rdma;
71 u32 flags;
72 u32 invalidate_rkey;

--- 10 unchanged lines hidden (view full) ---

83
84struct nvmet_rdma_queue {
85 struct rdma_cm_id *cm_id;
86 struct ib_qp *qp;
87 struct nvmet_port *port;
88 struct ib_cq *cq;
89 atomic_t sq_wr_avail;
90 struct nvmet_rdma_device *dev;
61 struct rdma_rw_ctx rw;
62
63 struct nvmet_req req;
64
65 bool allocated;
66 u8 n_rdma;
67 u32 flags;
68 u32 invalidate_rkey;

--- 10 unchanged lines hidden (view full) ---

79
80struct nvmet_rdma_queue {
81 struct rdma_cm_id *cm_id;
82 struct ib_qp *qp;
83 struct nvmet_port *port;
84 struct ib_cq *cq;
85 atomic_t sq_wr_avail;
86 struct nvmet_rdma_device *dev;
91 struct nvmet_rdma_srq *nsrq;
92 spinlock_t state_lock;
93 enum nvmet_rdma_queue_state state;
94 struct nvmet_cq nvme_cq;
95 struct nvmet_sq nvme_sq;
96
97 struct nvmet_rdma_rsp *rsps;
98 struct list_head free_rsps;
99 spinlock_t rsps_lock;
100 struct nvmet_rdma_cmd *cmds;
101
102 struct work_struct release_work;
103 struct list_head rsp_wait_list;
104 struct list_head rsp_wr_wait_list;
105 spinlock_t rsp_wr_wait_lock;
106
107 int idx;
108 int host_qid;
87 spinlock_t state_lock;
88 enum nvmet_rdma_queue_state state;
89 struct nvmet_cq nvme_cq;
90 struct nvmet_sq nvme_sq;
91
92 struct nvmet_rdma_rsp *rsps;
93 struct list_head free_rsps;
94 spinlock_t rsps_lock;
95 struct nvmet_rdma_cmd *cmds;
96
97 struct work_struct release_work;
98 struct list_head rsp_wait_list;
99 struct list_head rsp_wr_wait_list;
100 spinlock_t rsp_wr_wait_lock;
101
102 int idx;
103 int host_qid;
109 int comp_vector;
110 int recv_queue_size;
111 int send_queue_size;
112
113 struct list_head queue_list;
114};
115
116struct nvmet_rdma_port {
117 struct nvmet_port *nport;
118 struct sockaddr_storage addr;
119 struct rdma_cm_id *cm_id;
120 struct delayed_work repair_work;
121};
122
104 int recv_queue_size;
105 int send_queue_size;
106
107 struct list_head queue_list;
108};
109
110struct nvmet_rdma_port {
111 struct nvmet_port *nport;
112 struct sockaddr_storage addr;
113 struct rdma_cm_id *cm_id;
114 struct delayed_work repair_work;
115};
116
123struct nvmet_rdma_srq {
124 struct ib_srq *srq;
125 struct nvmet_rdma_cmd *cmds;
126 struct nvmet_rdma_device *ndev;
127};
128
129struct nvmet_rdma_device {
130 struct ib_device *device;
131 struct ib_pd *pd;
117struct nvmet_rdma_device {
118 struct ib_device *device;
119 struct ib_pd *pd;
132 struct nvmet_rdma_srq **srqs;
133 int srq_count;
120 struct ib_srq *srq;
121 struct nvmet_rdma_cmd *srq_cmds;
134 size_t srq_size;
135 struct kref ref;
136 struct list_head entry;
137 int inline_data_size;
138 int inline_page_count;
139};
140
141static bool nvmet_rdma_use_srq;
142module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
143MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
144
122 size_t srq_size;
123 struct kref ref;
124 struct list_head entry;
125 int inline_data_size;
126 int inline_page_count;
127};
128
129static bool nvmet_rdma_use_srq;
130module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444);
131MODULE_PARM_DESC(use_srq, "Use shared receive queue.");
132
145static int srq_size_set(const char *val, const struct kernel_param *kp);
146static const struct kernel_param_ops srq_size_ops = {
147 .set = srq_size_set,
148 .get = param_get_int,
149};
150
151static int nvmet_rdma_srq_size = 1024;
152module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644);
153MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)");
154
155static DEFINE_IDA(nvmet_rdma_queue_ida);
156static LIST_HEAD(nvmet_rdma_queue_list);
157static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
158
159static LIST_HEAD(device_list);
160static DEFINE_MUTEX(device_list_mutex);
161
162static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
163static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
164static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
165static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
133static DEFINE_IDA(nvmet_rdma_queue_ida);
134static LIST_HEAD(nvmet_rdma_queue_list);
135static DEFINE_MUTEX(nvmet_rdma_queue_mutex);
136
137static LIST_HEAD(device_list);
138static DEFINE_MUTEX(device_list_mutex);
139
140static bool nvmet_rdma_execute_command(struct nvmet_rdma_rsp *rsp);
141static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc);
142static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc);
143static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc);
166static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc);
167static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
168static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
169static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
170 struct nvmet_rdma_rsp *r);
171static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
172 struct nvmet_rdma_rsp *r);
173
174static const struct nvmet_fabrics_ops nvmet_rdma_ops;
175
144static void nvmet_rdma_qp_event(struct ib_event *event, void *priv);
145static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue);
146static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev,
147 struct nvmet_rdma_rsp *r);
148static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
149 struct nvmet_rdma_rsp *r);
150
151static const struct nvmet_fabrics_ops nvmet_rdma_ops;
152
176static int srq_size_set(const char *val, const struct kernel_param *kp)
177{
178 int n = 0, ret;
179
180 ret = kstrtoint(val, 10, &n);
181 if (ret != 0 || n < 256)
182 return -EINVAL;
183
184 return param_set_int(val, kp);
185}
186
187static int num_pages(int len)
188{
189 return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
190}
191
192static inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp)
193{
194 return nvme_is_write(rsp->req.cmd) &&

--- 226 unchanged lines hidden (view full) ---

421
422 r->send_wr.wr_cqe = &r->send_cqe;
423 r->send_wr.sg_list = &r->send_sge;
424 r->send_wr.num_sge = 1;
425 r->send_wr.send_flags = IB_SEND_SIGNALED;
426
427 /* Data In / RDMA READ */
428 r->read_cqe.done = nvmet_rdma_read_data_done;
153static int num_pages(int len)
154{
155 return 1 + (((len - 1) & PAGE_MASK) >> PAGE_SHIFT);
156}
157
158static inline bool nvmet_rdma_need_data_in(struct nvmet_rdma_rsp *rsp)
159{
160 return nvme_is_write(rsp->req.cmd) &&

--- 226 unchanged lines hidden (view full) ---

387
388 r->send_wr.wr_cqe = &r->send_cqe;
389 r->send_wr.sg_list = &r->send_sge;
390 r->send_wr.num_sge = 1;
391 r->send_wr.send_flags = IB_SEND_SIGNALED;
392
393 /* Data In / RDMA READ */
394 r->read_cqe.done = nvmet_rdma_read_data_done;
429 /* Data Out / RDMA WRITE */
430 r->write_cqe.done = nvmet_rdma_write_data_done;
431
432 return 0;
433
434out_free_rsp:
435 kfree(r->req.cqe);
436out:
437 return -ENOMEM;
438}
439

--- 59 unchanged lines hidden (view full) ---

499 struct nvmet_rdma_cmd *cmd)
500{
501 int ret;
502
503 ib_dma_sync_single_for_device(ndev->device,
504 cmd->sge[0].addr, cmd->sge[0].length,
505 DMA_FROM_DEVICE);
506
395 return 0;
396
397out_free_rsp:
398 kfree(r->req.cqe);
399out:
400 return -ENOMEM;
401}
402

--- 59 unchanged lines hidden (view full) ---

462 struct nvmet_rdma_cmd *cmd)
463{
464 int ret;
465
466 ib_dma_sync_single_for_device(ndev->device,
467 cmd->sge[0].addr, cmd->sge[0].length,
468 DMA_FROM_DEVICE);
469
507 if (cmd->nsrq)
508 ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL);
470 if (ndev->srq)
471 ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL);
509 else
510 ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
511
512 if (unlikely(ret))
513 pr_err("post_recv cmd failed\n");
514
515 return ret;
516}

--- 16 unchanged lines hidden (view full) ---

533 if (!ret) {
534 list_add(&rsp->wait_list, &queue->rsp_wr_wait_list);
535 break;
536 }
537 }
538 spin_unlock(&queue->rsp_wr_wait_lock);
539}
540
472 else
473 ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL);
474
475 if (unlikely(ret))
476 pr_err("post_recv cmd failed\n");
477
478 return ret;
479}

--- 16 unchanged lines hidden (view full) ---

496 if (!ret) {
497 list_add(&rsp->wait_list, &queue->rsp_wr_wait_list);
498 break;
499 }
500 }
501 spin_unlock(&queue->rsp_wr_wait_lock);
502}
503
541static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr)
542{
543 struct ib_mr_status mr_status;
544 int ret;
545 u16 status = 0;
546
504
547 ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status);
548 if (ret) {
549 pr_err("ib_check_mr_status failed, ret %d\n", ret);
550 return NVME_SC_INVALID_PI;
551 }
552
553 if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) {
554 switch (mr_status.sig_err.err_type) {
555 case IB_SIG_BAD_GUARD:
556 status = NVME_SC_GUARD_CHECK;
557 break;
558 case IB_SIG_BAD_REFTAG:
559 status = NVME_SC_REFTAG_CHECK;
560 break;
561 case IB_SIG_BAD_APPTAG:
562 status = NVME_SC_APPTAG_CHECK;
563 break;
564 }
565 pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n",
566 mr_status.sig_err.err_type,
567 mr_status.sig_err.expected,
568 mr_status.sig_err.actual);
569 }
570
571 return status;
572}
573
574static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi,
575 struct nvme_command *cmd, struct ib_sig_domain *domain,
576 u16 control, u8 pi_type)
577{
578 domain->sig_type = IB_SIG_TYPE_T10_DIF;
579 domain->sig.dif.bg_type = IB_T10DIF_CRC;
580 domain->sig.dif.pi_interval = 1 << bi->interval_exp;
581 domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag);
582 if (control & NVME_RW_PRINFO_PRCHK_REF)
583 domain->sig.dif.ref_remap = true;
584
585 domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag);
586 domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask);
587 domain->sig.dif.app_escape = true;
588 if (pi_type == NVME_NS_DPS_PI_TYPE3)
589 domain->sig.dif.ref_escape = true;
590}
591
592static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req,
593 struct ib_sig_attrs *sig_attrs)
594{
595 struct nvme_command *cmd = req->cmd;
596 u16 control = le16_to_cpu(cmd->rw.control);
597 u8 pi_type = req->ns->pi_type;
598 struct blk_integrity *bi;
599
600 bi = bdev_get_integrity(req->ns->bdev);
601
602 memset(sig_attrs, 0, sizeof(*sig_attrs));
603
604 if (control & NVME_RW_PRINFO_PRACT) {
605 /* for WRITE_INSERT/READ_STRIP no wire domain */
606 sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE;
607 nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
608 pi_type);
609 /* Clear the PRACT bit since HCA will generate/verify the PI */
610 control &= ~NVME_RW_PRINFO_PRACT;
611 cmd->rw.control = cpu_to_le16(control);
612 /* PI is added by the HW */
613 req->transfer_len += req->metadata_len;
614 } else {
615 /* for WRITE_PASS/READ_PASS both wire/memory domains exist */
616 nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control,
617 pi_type);
618 nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control,
619 pi_type);
620 }
621
622 if (control & NVME_RW_PRINFO_PRCHK_REF)
623 sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG;
624 if (control & NVME_RW_PRINFO_PRCHK_GUARD)
625 sig_attrs->check_mask |= IB_SIG_CHECK_GUARD;
626 if (control & NVME_RW_PRINFO_PRCHK_APP)
627 sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG;
628}
629
630static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key,
631 struct ib_sig_attrs *sig_attrs)
632{
633 struct rdma_cm_id *cm_id = rsp->queue->cm_id;
634 struct nvmet_req *req = &rsp->req;
635 int ret;
636
637 if (req->metadata_len)
638 ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp,
639 cm_id->port_num, req->sg, req->sg_cnt,
640 req->metadata_sg, req->metadata_sg_cnt, sig_attrs,
641 addr, key, nvmet_data_dir(req));
642 else
643 ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
644 req->sg, req->sg_cnt, 0, addr, key,
645 nvmet_data_dir(req));
646
647 return ret;
648}
649
650static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp)
651{
652 struct rdma_cm_id *cm_id = rsp->queue->cm_id;
653 struct nvmet_req *req = &rsp->req;
654
655 if (req->metadata_len)
656 rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp,
657 cm_id->port_num, req->sg, req->sg_cnt,
658 req->metadata_sg, req->metadata_sg_cnt,
659 nvmet_data_dir(req));
660 else
661 rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num,
662 req->sg, req->sg_cnt, nvmet_data_dir(req));
663}
664
665static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
666{
667 struct nvmet_rdma_queue *queue = rsp->queue;
668
669 atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
670
505static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp)
506{
507 struct nvmet_rdma_queue *queue = rsp->queue;
508
509 atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail);
510
671 if (rsp->n_rdma)
672 nvmet_rdma_rw_ctx_destroy(rsp);
511 if (rsp->n_rdma) {
512 rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
513 queue->cm_id->port_num, rsp->req.sg,
514 rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
515 }
673
674 if (rsp->req.sg != rsp->cmd->inline_sg)
516
517 if (rsp->req.sg != rsp->cmd->inline_sg)
675 nvmet_req_free_sgls(&rsp->req);
518 nvmet_req_free_sgl(&rsp->req);
676
677 if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
678 nvmet_rdma_process_wr_wait_list(queue);
679
680 nvmet_rdma_put_rsp(rsp);
681}
682
683static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)

--- 35 unchanged lines hidden (view full) ---

719
720 if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
721 rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
722 rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey;
723 } else {
724 rsp->send_wr.opcode = IB_WR_SEND;
725 }
726
519
520 if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list)))
521 nvmet_rdma_process_wr_wait_list(queue);
522
523 nvmet_rdma_put_rsp(rsp);
524}
525
526static void nvmet_rdma_error_comp(struct nvmet_rdma_queue *queue)

--- 35 unchanged lines hidden (view full) ---

562
563 if (rsp->flags & NVMET_RDMA_REQ_INVALIDATE_RKEY) {
564 rsp->send_wr.opcode = IB_WR_SEND_WITH_INV;
565 rsp->send_wr.ex.invalidate_rkey = rsp->invalidate_rkey;
566 } else {
567 rsp->send_wr.opcode = IB_WR_SEND;
568 }
569
727 if (nvmet_rdma_need_data_out(rsp)) {
728 if (rsp->req.metadata_len)
729 first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
730 cm_id->port_num, &rsp->write_cqe, NULL);
731 else
732 first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
733 cm_id->port_num, NULL, &rsp->send_wr);
734 } else {
570 if (nvmet_rdma_need_data_out(rsp))
571 first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp,
572 cm_id->port_num, NULL, &rsp->send_wr);
573 else
735 first_wr = &rsp->send_wr;
574 first_wr = &rsp->send_wr;
736 }
737
738 nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
739
740 ib_dma_sync_single_for_device(rsp->queue->dev->device,
741 rsp->send_sge.addr, rsp->send_sge.length,
742 DMA_TO_DEVICE);
743
744 if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
745 pr_err("sending cmd response failed\n");
746 nvmet_rdma_release_rsp(rsp);
747 }
748}
749
750static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
751{
752 struct nvmet_rdma_rsp *rsp =
753 container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
754 struct nvmet_rdma_queue *queue = cq->cq_context;
575
576 nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd);
577
578 ib_dma_sync_single_for_device(rsp->queue->dev->device,
579 rsp->send_sge.addr, rsp->send_sge.length,
580 DMA_TO_DEVICE);
581
582 if (unlikely(ib_post_send(cm_id->qp, first_wr, NULL))) {
583 pr_err("sending cmd response failed\n");
584 nvmet_rdma_release_rsp(rsp);
585 }
586}
587
588static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc)
589{
590 struct nvmet_rdma_rsp *rsp =
591 container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe);
592 struct nvmet_rdma_queue *queue = cq->cq_context;
755 u16 status = 0;
756
757 WARN_ON(rsp->n_rdma <= 0);
758 atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
593
594 WARN_ON(rsp->n_rdma <= 0);
595 atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
596 rdma_rw_ctx_destroy(&rsp->rw, queue->qp,
597 queue->cm_id->port_num, rsp->req.sg,
598 rsp->req.sg_cnt, nvmet_data_dir(&rsp->req));
759 rsp->n_rdma = 0;
760
761 if (unlikely(wc->status != IB_WC_SUCCESS)) {
599 rsp->n_rdma = 0;
600
601 if (unlikely(wc->status != IB_WC_SUCCESS)) {
762 nvmet_rdma_rw_ctx_destroy(rsp);
763 nvmet_req_uninit(&rsp->req);
764 nvmet_rdma_release_rsp(rsp);
765 if (wc->status != IB_WC_WR_FLUSH_ERR) {
766 pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
767 wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
768 nvmet_rdma_error_comp(queue);
769 }
770 return;
771 }
772
602 nvmet_req_uninit(&rsp->req);
603 nvmet_rdma_release_rsp(rsp);
604 if (wc->status != IB_WC_WR_FLUSH_ERR) {
605 pr_info("RDMA READ for CQE 0x%p failed with status %s (%d).\n",
606 wc->wr_cqe, ib_wc_status_msg(wc->status), wc->status);
607 nvmet_rdma_error_comp(queue);
608 }
609 return;
610 }
611
773 if (rsp->req.metadata_len)
774 status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
775 nvmet_rdma_rw_ctx_destroy(rsp);
776
777 if (unlikely(status))
778 nvmet_req_complete(&rsp->req, status);
779 else
780 rsp->req.execute(&rsp->req);
612 rsp->req.execute(&rsp->req);
781}
782
613}
614
783static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
784{
785 struct nvmet_rdma_rsp *rsp =
786 container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe);
787 struct nvmet_rdma_queue *queue = cq->cq_context;
788 struct rdma_cm_id *cm_id = rsp->queue->cm_id;
789 u16 status;
790
791 if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY))
792 return;
793
794 WARN_ON(rsp->n_rdma <= 0);
795 atomic_add(rsp->n_rdma, &queue->sq_wr_avail);
796 rsp->n_rdma = 0;
797
798 if (unlikely(wc->status != IB_WC_SUCCESS)) {
799 nvmet_rdma_rw_ctx_destroy(rsp);
800 nvmet_req_uninit(&rsp->req);
801 nvmet_rdma_release_rsp(rsp);
802 if (wc->status != IB_WC_WR_FLUSH_ERR) {
803 pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n",
804 wc->wr_cqe, ib_wc_status_msg(wc->status),
805 wc->status);
806 nvmet_rdma_error_comp(queue);
807 }
808 return;
809 }
810
811 /*
812 * Upon RDMA completion check the signature status
813 * - if succeeded send good NVMe response
814 * - if failed send bad NVMe response with appropriate error
815 */
816 status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr);
817 if (unlikely(status))
818 rsp->req.cqe->status = cpu_to_le16(status << 1);
819 nvmet_rdma_rw_ctx_destroy(rsp);
820
821 if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) {
822 pr_err("sending cmd response failed\n");
823 nvmet_rdma_release_rsp(rsp);
824 }
825}
826
827static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
828 u64 off)
829{
830 int sg_count = num_pages(len);
831 struct scatterlist *sg;
832 int i;
833
834 sg = rsp->cmd->inline_sg;

--- 38 unchanged lines hidden (view full) ---

873 rsp->flags |= NVMET_RDMA_REQ_INLINE_DATA;
874 rsp->req.transfer_len += len;
875 return 0;
876}
877
878static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
879 struct nvme_keyed_sgl_desc *sgl, bool invalidate)
880{
615static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len,
616 u64 off)
617{
618 int sg_count = num_pages(len);
619 struct scatterlist *sg;
620 int i;
621
622 sg = rsp->cmd->inline_sg;

--- 38 unchanged lines hidden (view full) ---

661 rsp->flags |= NVMET_RDMA_REQ_INLINE_DATA;
662 rsp->req.transfer_len += len;
663 return 0;
664}
665
666static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp,
667 struct nvme_keyed_sgl_desc *sgl, bool invalidate)
668{
669 struct rdma_cm_id *cm_id = rsp->queue->cm_id;
881 u64 addr = le64_to_cpu(sgl->addr);
882 u32 key = get_unaligned_le32(sgl->key);
670 u64 addr = le64_to_cpu(sgl->addr);
671 u32 key = get_unaligned_le32(sgl->key);
883 struct ib_sig_attrs sig_attrs;
884 int ret;
885
886 rsp->req.transfer_len = get_unaligned_le24(sgl->length);
887
888 /* no data command? */
889 if (!rsp->req.transfer_len)
890 return 0;
891
672 int ret;
673
674 rsp->req.transfer_len = get_unaligned_le24(sgl->length);
675
676 /* no data command? */
677 if (!rsp->req.transfer_len)
678 return 0;
679
892 if (rsp->req.metadata_len)
893 nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs);
894
895 ret = nvmet_req_alloc_sgls(&rsp->req);
680 ret = nvmet_req_alloc_sgl(&rsp->req);
896 if (unlikely(ret < 0))
897 goto error_out;
898
681 if (unlikely(ret < 0))
682 goto error_out;
683
899 ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs);
684 ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num,
685 rsp->req.sg, rsp->req.sg_cnt, 0, addr, key,
686 nvmet_data_dir(&rsp->req));
900 if (unlikely(ret < 0))
901 goto error_out;
902 rsp->n_rdma += ret;
903
904 if (invalidate) {
905 rsp->invalidate_rkey = key;
906 rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY;
907 }

--- 146 unchanged lines hidden (view full) ---

1054 nvmet_rdma_put_rsp(rsp);
1055 spin_unlock_irqrestore(&queue->state_lock, flags);
1056 return;
1057 }
1058
1059 nvmet_rdma_handle_command(queue, rsp);
1060}
1061
687 if (unlikely(ret < 0))
688 goto error_out;
689 rsp->n_rdma += ret;
690
691 if (invalidate) {
692 rsp->invalidate_rkey = key;
693 rsp->flags |= NVMET_RDMA_REQ_INVALIDATE_RKEY;
694 }

--- 146 unchanged lines hidden (view full) ---

841 nvmet_rdma_put_rsp(rsp);
842 spin_unlock_irqrestore(&queue->state_lock, flags);
843 return;
844 }
845
846 nvmet_rdma_handle_command(queue, rsp);
847}
848
1062static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq)
849static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev)
1063{
850{
1064 nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size,
1065 false);
1066 ib_destroy_srq(nsrq->srq);
1067
1068 kfree(nsrq);
1069}
1070
1071static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev)
1072{
1073 int i;
1074
1075 if (!ndev->srqs)
851 if (!ndev->srq)
1076 return;
1077
852 return;
853
1078 for (i = 0; i < ndev->srq_count; i++)
1079 nvmet_rdma_destroy_srq(ndev->srqs[i]);
1080
1081 kfree(ndev->srqs);
854 nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
855 ib_destroy_srq(ndev->srq);
1082}
1083
856}
857
1084static struct nvmet_rdma_srq *
1085nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
858static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev)
1086{
1087 struct ib_srq_init_attr srq_attr = { NULL, };
859{
860 struct ib_srq_init_attr srq_attr = { NULL, };
1088 size_t srq_size = ndev->srq_size;
1089 struct nvmet_rdma_srq *nsrq;
1090 struct ib_srq *srq;
861 struct ib_srq *srq;
862 size_t srq_size;
1091 int ret, i;
1092
863 int ret, i;
864
1093 nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL);
1094 if (!nsrq)
1095 return ERR_PTR(-ENOMEM);
865 srq_size = 4095; /* XXX: tune */
1096
1097 srq_attr.attr.max_wr = srq_size;
1098 srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
1099 srq_attr.attr.srq_limit = 0;
1100 srq_attr.srq_type = IB_SRQT_BASIC;
1101 srq = ib_create_srq(ndev->pd, &srq_attr);
1102 if (IS_ERR(srq)) {
866
867 srq_attr.attr.max_wr = srq_size;
868 srq_attr.attr.max_sge = 1 + ndev->inline_page_count;
869 srq_attr.attr.srq_limit = 0;
870 srq_attr.srq_type = IB_SRQT_BASIC;
871 srq = ib_create_srq(ndev->pd, &srq_attr);
872 if (IS_ERR(srq)) {
1103 ret = PTR_ERR(srq);
1104 goto out_free;
873 /*
874 * If SRQs aren't supported we just go ahead and use normal
875 * non-shared receive queues.
876 */
877 pr_info("SRQ requested but not supported.\n");
878 return 0;
1105 }
1106
879 }
880
1107 nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
1108 if (IS_ERR(nsrq->cmds)) {
1109 ret = PTR_ERR(nsrq->cmds);
881 ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false);
882 if (IS_ERR(ndev->srq_cmds)) {
883 ret = PTR_ERR(ndev->srq_cmds);
1110 goto out_destroy_srq;
1111 }
1112
884 goto out_destroy_srq;
885 }
886
1113 nsrq->srq = srq;
1114 nsrq->ndev = ndev;
887 ndev->srq = srq;
888 ndev->srq_size = srq_size;
1115
1116 for (i = 0; i < srq_size; i++) {
889
890 for (i = 0; i < srq_size; i++) {
1117 nsrq->cmds[i].nsrq = nsrq;
1118 ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]);
891 ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]);
1119 if (ret)
1120 goto out_free_cmds;
1121 }
1122
892 if (ret)
893 goto out_free_cmds;
894 }
895
1123 return nsrq;
896 return 0;
1124
1125out_free_cmds:
897
898out_free_cmds:
1126 nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false);
899 nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false);
1127out_destroy_srq:
1128 ib_destroy_srq(srq);
900out_destroy_srq:
901 ib_destroy_srq(srq);
1129out_free:
1130 kfree(nsrq);
1131 return ERR_PTR(ret);
1132}
1133
1134static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev)
1135{
1136 int i, ret;
1137
1138 if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) {
1139 /*
1140 * If SRQs aren't supported we just go ahead and use normal
1141 * non-shared receive queues.
1142 */
1143 pr_info("SRQ requested but not supported.\n");
1144 return 0;
1145 }
1146
1147 ndev->srq_size = min(ndev->device->attrs.max_srq_wr,
1148 nvmet_rdma_srq_size);
1149 ndev->srq_count = min(ndev->device->num_comp_vectors,
1150 ndev->device->attrs.max_srq);
1151
1152 ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL);
1153 if (!ndev->srqs)
1154 return -ENOMEM;
1155
1156 for (i = 0; i < ndev->srq_count; i++) {
1157 ndev->srqs[i] = nvmet_rdma_init_srq(ndev);
1158 if (IS_ERR(ndev->srqs[i])) {
1159 ret = PTR_ERR(ndev->srqs[i]);
1160 goto err_srq;
1161 }
1162 }
1163
1164 return 0;
1165
1166err_srq:
1167 while (--i >= 0)
1168 nvmet_rdma_destroy_srq(ndev->srqs[i]);
1169 kfree(ndev->srqs);
1170 return ret;
1171}
1172
1173static void nvmet_rdma_free_dev(struct kref *ref)
1174{
1175 struct nvmet_rdma_device *ndev =
1176 container_of(ref, struct nvmet_rdma_device, ref);
1177
1178 mutex_lock(&device_list_mutex);
1179 list_del(&ndev->entry);
1180 mutex_unlock(&device_list_mutex);
1181
902 return ret;
903}
904
905static void nvmet_rdma_free_dev(struct kref *ref)
906{
907 struct nvmet_rdma_device *ndev =
908 container_of(ref, struct nvmet_rdma_device, ref);
909
910 mutex_lock(&device_list_mutex);
911 list_del(&ndev->entry);
912 mutex_unlock(&device_list_mutex);
913
1182 nvmet_rdma_destroy_srqs(ndev);
914 nvmet_rdma_destroy_srq(ndev);
1183 ib_dealloc_pd(ndev->pd);
1184
1185 kfree(ndev);
1186}
1187
1188static struct nvmet_rdma_device *
1189nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
1190{

--- 30 unchanged lines hidden (view full) ---

1221 ndev->device = cm_id->device;
1222 kref_init(&ndev->ref);
1223
1224 ndev->pd = ib_alloc_pd(ndev->device, 0);
1225 if (IS_ERR(ndev->pd))
1226 goto out_free_dev;
1227
1228 if (nvmet_rdma_use_srq) {
915 ib_dealloc_pd(ndev->pd);
916
917 kfree(ndev);
918}
919
920static struct nvmet_rdma_device *
921nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id)
922{

--- 30 unchanged lines hidden (view full) ---

953 ndev->device = cm_id->device;
954 kref_init(&ndev->ref);
955
956 ndev->pd = ib_alloc_pd(ndev->device, 0);
957 if (IS_ERR(ndev->pd))
958 goto out_free_dev;
959
960 if (nvmet_rdma_use_srq) {
1229 ret = nvmet_rdma_init_srqs(ndev);
961 ret = nvmet_rdma_init_srq(ndev);
1230 if (ret)
1231 goto out_free_pd;
1232 }
1233
1234 list_add(&ndev->entry, &device_list);
1235out_unlock:
1236 mutex_unlock(&device_list_mutex);
1237 pr_debug("added %s.\n", ndev->device->name);

--- 7 unchanged lines hidden (view full) ---

1245 mutex_unlock(&device_list_mutex);
1246 return NULL;
1247}
1248
1249static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
1250{
1251 struct ib_qp_init_attr qp_attr;
1252 struct nvmet_rdma_device *ndev = queue->dev;
962 if (ret)
963 goto out_free_pd;
964 }
965
966 list_add(&ndev->entry, &device_list);
967out_unlock:
968 mutex_unlock(&device_list_mutex);
969 pr_debug("added %s.\n", ndev->device->name);

--- 7 unchanged lines hidden (view full) ---

977 mutex_unlock(&device_list_mutex);
978 return NULL;
979}
980
981static int nvmet_rdma_create_queue_ib(struct nvmet_rdma_queue *queue)
982{
983 struct ib_qp_init_attr qp_attr;
984 struct nvmet_rdma_device *ndev = queue->dev;
1253 int nr_cqe, ret, i, factor;
985 int comp_vector, nr_cqe, ret, i, factor;
1254
1255 /*
986
987 /*
988 * Spread the io queues across completion vectors,
989 * but still keep all admin queues on vector 0.
990 */
991 comp_vector = !queue->host_qid ? 0 :
992 queue->idx % ndev->device->num_comp_vectors;
993
994 /*
1256 * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
1257 */
1258 nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
1259
1260 queue->cq = ib_alloc_cq(ndev->device, queue,
995 * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND.
996 */
997 nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size;
998
999 queue->cq = ib_alloc_cq(ndev->device, queue,
1261 nr_cqe + 1, queue->comp_vector,
1000 nr_cqe + 1, comp_vector,
1262 IB_POLL_WORKQUEUE);
1263 if (IS_ERR(queue->cq)) {
1264 ret = PTR_ERR(queue->cq);
1265 pr_err("failed to create CQ cqe= %d ret= %d\n",
1266 nr_cqe + 1, ret);
1267 goto out;
1268 }
1269

--- 7 unchanged lines hidden (view full) ---

1277 /* +1 for drain */
1278 qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
1279 factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num,
1280 1 << NVMET_RDMA_MAX_MDTS);
1281 qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor;
1282 qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
1283 ndev->device->attrs.max_send_sge);
1284
1001 IB_POLL_WORKQUEUE);
1002 if (IS_ERR(queue->cq)) {
1003 ret = PTR_ERR(queue->cq);
1004 pr_err("failed to create CQ cqe= %d ret= %d\n",
1005 nr_cqe + 1, ret);
1006 goto out;
1007 }
1008

--- 7 unchanged lines hidden (view full) ---

1016 /* +1 for drain */
1017 qp_attr.cap.max_send_wr = queue->send_queue_size + 1;
1018 factor = rdma_rw_mr_factor(ndev->device, queue->cm_id->port_num,
1019 1 << NVMET_RDMA_MAX_MDTS);
1020 qp_attr.cap.max_rdma_ctxs = queue->send_queue_size * factor;
1021 qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd,
1022 ndev->device->attrs.max_send_sge);
1023
1285 if (queue->nsrq) {
1286 qp_attr.srq = queue->nsrq->srq;
1024 if (ndev->srq) {
1025 qp_attr.srq = ndev->srq;
1287 } else {
1288 /* +1 for drain */
1289 qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
1290 qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
1291 }
1292
1026 } else {
1027 /* +1 for drain */
1028 qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size;
1029 qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count;
1030 }
1031
1293 if (queue->port->pi_enable && queue->host_qid)
1294 qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN;
1295
1296 ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
1297 if (ret) {
1298 pr_err("failed to create_qp ret= %d\n", ret);
1299 goto err_destroy_cq;
1300 }
1301 queue->qp = queue->cm_id->qp;
1302
1303 atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
1304
1305 pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
1306 __func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
1307 qp_attr.cap.max_send_wr, queue->cm_id);
1308
1032 ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr);
1033 if (ret) {
1034 pr_err("failed to create_qp ret= %d\n", ret);
1035 goto err_destroy_cq;
1036 }
1037 queue->qp = queue->cm_id->qp;
1038
1039 atomic_set(&queue->sq_wr_avail, qp_attr.cap.max_send_wr);
1040
1041 pr_debug("%s: max_cqe= %d max_sge= %d sq_size = %d cm_id= %p\n",
1042 __func__, queue->cq->cqe, qp_attr.cap.max_send_sge,
1043 qp_attr.cap.max_send_wr, queue->cm_id);
1044
1309 if (!queue->nsrq) {
1045 if (!ndev->srq) {
1310 for (i = 0; i < queue->recv_queue_size; i++) {
1311 queue->cmds[i].queue = queue;
1312 ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
1313 if (ret)
1314 goto err_destroy_qp;
1315 }
1316 }
1317

--- 18 unchanged lines hidden (view full) ---

1336
1337static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
1338{
1339 pr_debug("freeing queue %d\n", queue->idx);
1340
1341 nvmet_sq_destroy(&queue->nvme_sq);
1342
1343 nvmet_rdma_destroy_queue_ib(queue);
1046 for (i = 0; i < queue->recv_queue_size; i++) {
1047 queue->cmds[i].queue = queue;
1048 ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]);
1049 if (ret)
1050 goto err_destroy_qp;
1051 }
1052 }
1053

--- 18 unchanged lines hidden (view full) ---

1072
1073static void nvmet_rdma_free_queue(struct nvmet_rdma_queue *queue)
1074{
1075 pr_debug("freeing queue %d\n", queue->idx);
1076
1077 nvmet_sq_destroy(&queue->nvme_sq);
1078
1079 nvmet_rdma_destroy_queue_ib(queue);
1344 if (!queue->nsrq) {
1080 if (!queue->dev->srq) {
1345 nvmet_rdma_free_cmds(queue->dev, queue->cmds,
1346 queue->recv_queue_size,
1347 !queue->host_qid);
1348 }
1349 nvmet_rdma_free_rsps(queue);
1350 ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
1351 kfree(queue);
1352}

--- 45 unchanged lines hidden (view full) ---

1398 struct nvme_rdma_cm_rej rej;
1399
1400 pr_debug("rejecting connect request: status %d (%s)\n",
1401 status, nvme_rdma_cm_msg(status));
1402
1403 rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
1404 rej.sts = cpu_to_le16(status);
1405
1081 nvmet_rdma_free_cmds(queue->dev, queue->cmds,
1082 queue->recv_queue_size,
1083 !queue->host_qid);
1084 }
1085 nvmet_rdma_free_rsps(queue);
1086 ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);
1087 kfree(queue);
1088}

--- 45 unchanged lines hidden (view full) ---

1134 struct nvme_rdma_cm_rej rej;
1135
1136 pr_debug("rejecting connect request: status %d (%s)\n",
1137 status, nvme_rdma_cm_msg(status));
1138
1139 rej.recfmt = cpu_to_le16(NVME_RDMA_CM_FMT_1_0);
1140 rej.sts = cpu_to_le16(status);
1141
1406 return rdma_reject(cm_id, (void *)&rej, sizeof(rej));
1142 return rdma_reject(cm_id, (void *)&rej, sizeof(rej),
1143 IB_CM_REJ_CONSUMER_DEFINED);
1407}
1408
1409static struct nvmet_rdma_queue *
1410nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
1411 struct rdma_cm_id *cm_id,
1412 struct rdma_cm_event *event)
1413{
1144}
1145
1146static struct nvmet_rdma_queue *
1147nvmet_rdma_alloc_queue(struct nvmet_rdma_device *ndev,
1148 struct rdma_cm_id *cm_id,
1149 struct rdma_cm_event *event)
1150{
1414 struct nvmet_rdma_port *port = cm_id->context;
1415 struct nvmet_rdma_queue *queue;
1416 int ret;
1417
1418 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1419 if (!queue) {
1420 ret = NVME_RDMA_CM_NO_RSC;
1421 goto out_reject;
1422 }

--- 10 unchanged lines hidden (view full) ---

1433
1434 /*
1435 * Schedules the actual release because calling rdma_destroy_id from
1436 * inside a CM callback would trigger a deadlock. (great API design..)
1437 */
1438 INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
1439 queue->dev = ndev;
1440 queue->cm_id = cm_id;
1151 struct nvmet_rdma_queue *queue;
1152 int ret;
1153
1154 queue = kzalloc(sizeof(*queue), GFP_KERNEL);
1155 if (!queue) {
1156 ret = NVME_RDMA_CM_NO_RSC;
1157 goto out_reject;
1158 }

--- 10 unchanged lines hidden (view full) ---

1169
1170 /*
1171 * Schedules the actual release because calling rdma_destroy_id from
1172 * inside a CM callback would trigger a deadlock. (great API design..)
1173 */
1174 INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work);
1175 queue->dev = ndev;
1176 queue->cm_id = cm_id;
1441 queue->port = port->nport;
1442
1443 spin_lock_init(&queue->state_lock);
1444 queue->state = NVMET_RDMA_Q_CONNECTING;
1445 INIT_LIST_HEAD(&queue->rsp_wait_list);
1446 INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
1447 spin_lock_init(&queue->rsp_wr_wait_lock);
1448 INIT_LIST_HEAD(&queue->free_rsps);
1449 spin_lock_init(&queue->rsps_lock);
1450 INIT_LIST_HEAD(&queue->queue_list);
1451
1452 queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
1453 if (queue->idx < 0) {
1454 ret = NVME_RDMA_CM_NO_RSC;
1455 goto out_destroy_sq;
1456 }
1457
1177
1178 spin_lock_init(&queue->state_lock);
1179 queue->state = NVMET_RDMA_Q_CONNECTING;
1180 INIT_LIST_HEAD(&queue->rsp_wait_list);
1181 INIT_LIST_HEAD(&queue->rsp_wr_wait_list);
1182 spin_lock_init(&queue->rsp_wr_wait_lock);
1183 INIT_LIST_HEAD(&queue->free_rsps);
1184 spin_lock_init(&queue->rsps_lock);
1185 INIT_LIST_HEAD(&queue->queue_list);
1186
1187 queue->idx = ida_simple_get(&nvmet_rdma_queue_ida, 0, 0, GFP_KERNEL);
1188 if (queue->idx < 0) {
1189 ret = NVME_RDMA_CM_NO_RSC;
1190 goto out_destroy_sq;
1191 }
1192
1458 /*
1459 * Spread the io queues across completion vectors,
1460 * but still keep all admin queues on vector 0.
1461 */
1462 queue->comp_vector = !queue->host_qid ? 0 :
1463 queue->idx % ndev->device->num_comp_vectors;
1464
1465
1466 ret = nvmet_rdma_alloc_rsps(queue);
1467 if (ret) {
1468 ret = NVME_RDMA_CM_NO_RSC;
1469 goto out_ida_remove;
1470 }
1471
1193 ret = nvmet_rdma_alloc_rsps(queue);
1194 if (ret) {
1195 ret = NVME_RDMA_CM_NO_RSC;
1196 goto out_ida_remove;
1197 }
1198
1472 if (ndev->srqs) {
1473 queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count];
1474 } else {
1199 if (!ndev->srq) {
1475 queue->cmds = nvmet_rdma_alloc_cmds(ndev,
1476 queue->recv_queue_size,
1477 !queue->host_qid);
1478 if (IS_ERR(queue->cmds)) {
1479 ret = NVME_RDMA_CM_NO_RSC;
1480 goto out_free_responses;
1481 }
1482 }

--- 4 unchanged lines hidden (view full) ---

1487 __func__, ret);
1488 ret = NVME_RDMA_CM_NO_RSC;
1489 goto out_free_cmds;
1490 }
1491
1492 return queue;
1493
1494out_free_cmds:
1200 queue->cmds = nvmet_rdma_alloc_cmds(ndev,
1201 queue->recv_queue_size,
1202 !queue->host_qid);
1203 if (IS_ERR(queue->cmds)) {
1204 ret = NVME_RDMA_CM_NO_RSC;
1205 goto out_free_responses;
1206 }
1207 }

--- 4 unchanged lines hidden (view full) ---

1212 __func__, ret);
1213 ret = NVME_RDMA_CM_NO_RSC;
1214 goto out_free_cmds;
1215 }
1216
1217 return queue;
1218
1219out_free_cmds:
1495 if (!queue->nsrq) {
1220 if (!ndev->srq) {
1496 nvmet_rdma_free_cmds(queue->dev, queue->cmds,
1497 queue->recv_queue_size,
1498 !queue->host_qid);
1499 }
1500out_free_responses:
1501 nvmet_rdma_free_rsps(queue);
1502out_ida_remove:
1503 ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);

--- 9 unchanged lines hidden (view full) ---

1513static void nvmet_rdma_qp_event(struct ib_event *event, void *priv)
1514{
1515 struct nvmet_rdma_queue *queue = priv;
1516
1517 switch (event->event) {
1518 case IB_EVENT_COMM_EST:
1519 rdma_notify(queue->cm_id, event->event);
1520 break;
1221 nvmet_rdma_free_cmds(queue->dev, queue->cmds,
1222 queue->recv_queue_size,
1223 !queue->host_qid);
1224 }
1225out_free_responses:
1226 nvmet_rdma_free_rsps(queue);
1227out_ida_remove:
1228 ida_simple_remove(&nvmet_rdma_queue_ida, queue->idx);

--- 9 unchanged lines hidden (view full) ---

1238static void nvmet_rdma_qp_event(struct ib_event *event, void *priv)
1239{
1240 struct nvmet_rdma_queue *queue = priv;
1241
1242 switch (event->event) {
1243 case IB_EVENT_COMM_EST:
1244 rdma_notify(queue->cm_id, event->event);
1245 break;
1521 case IB_EVENT_QP_LAST_WQE_REACHED:
1522 pr_debug("received last WQE reached event for queue=0x%p\n",
1523 queue);
1524 break;
1525 default:
1526 pr_err("received IB QP event: %s (%d)\n",
1527 ib_event_msg(event->event), event->event);
1528 break;
1529 }
1530}
1531
1532static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,

--- 18 unchanged lines hidden (view full) ---

1551 pr_err("rdma_accept failed (error code = %d)\n", ret);
1552
1553 return ret;
1554}
1555
1556static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
1557 struct rdma_cm_event *event)
1558{
1246 default:
1247 pr_err("received IB QP event: %s (%d)\n",
1248 ib_event_msg(event->event), event->event);
1249 break;
1250 }
1251}
1252
1253static int nvmet_rdma_cm_accept(struct rdma_cm_id *cm_id,

--- 18 unchanged lines hidden (view full) ---

1272 pr_err("rdma_accept failed (error code = %d)\n", ret);
1273
1274 return ret;
1275}
1276
1277static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id,
1278 struct rdma_cm_event *event)
1279{
1280 struct nvmet_rdma_port *port = cm_id->context;
1559 struct nvmet_rdma_device *ndev;
1560 struct nvmet_rdma_queue *queue;
1561 int ret = -EINVAL;
1562
1563 ndev = nvmet_rdma_find_get_device(cm_id);
1564 if (!ndev) {
1565 nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
1566 return -ECONNREFUSED;
1567 }
1568
1569 queue = nvmet_rdma_alloc_queue(ndev, cm_id, event);
1570 if (!queue) {
1571 ret = -ENOMEM;
1572 goto put_device;
1573 }
1281 struct nvmet_rdma_device *ndev;
1282 struct nvmet_rdma_queue *queue;
1283 int ret = -EINVAL;
1284
1285 ndev = nvmet_rdma_find_get_device(cm_id);
1286 if (!ndev) {
1287 nvmet_rdma_cm_reject(cm_id, NVME_RDMA_CM_NO_RSC);
1288 return -ECONNREFUSED;
1289 }
1290
1291 queue = nvmet_rdma_alloc_queue(ndev, cm_id, event);
1292 if (!queue) {
1293 ret = -ENOMEM;
1294 goto put_device;
1295 }
1296 queue->port = port->nport;
1574
1575 if (queue->host_qid == 0) {
1576 /* Let inflight controller teardown complete */
1577 flush_scheduled_work();
1578 }
1579
1580 ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
1581 if (ret) {

--- 255 unchanged lines hidden (view full) ---

1837 }
1838
1839 ret = rdma_listen(cm_id, 128);
1840 if (ret) {
1841 pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
1842 goto out_destroy_id;
1843 }
1844
1297
1298 if (queue->host_qid == 0) {
1299 /* Let inflight controller teardown complete */
1300 flush_scheduled_work();
1301 }
1302
1303 ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn);
1304 if (ret) {

--- 255 unchanged lines hidden (view full) ---

1560 }
1561
1562 ret = rdma_listen(cm_id, 128);
1563 if (ret) {
1564 pr_err("listening to %pISpcs failed (%d)\n", addr, ret);
1565 goto out_destroy_id;
1566 }
1567
1845 if (port->nport->pi_enable &&
1846 !(cm_id->device->attrs.device_cap_flags &
1847 IB_DEVICE_INTEGRITY_HANDOVER)) {
1848 pr_err("T10-PI is not supported for %pISpcs\n", addr);
1849 ret = -EINVAL;
1850 goto out_destroy_id;
1851 }
1852
1853 port->cm_id = cm_id;
1854 return 0;
1855
1856out_destroy_id:
1857 rdma_destroy_id(cm_id);
1858 return ret;
1859}
1860

--- 93 unchanged lines hidden (view full) ---

1954 sprintf(traddr, "%pISc", addr);
1955 } else {
1956 memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
1957 }
1958}
1959
1960static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
1961{
1568 port->cm_id = cm_id;
1569 return 0;
1570
1571out_destroy_id:
1572 rdma_destroy_id(cm_id);
1573 return ret;
1574}
1575

--- 93 unchanged lines hidden (view full) ---

1669 sprintf(traddr, "%pISc", addr);
1670 } else {
1671 memcpy(traddr, nport->disc_addr.traddr, NVMF_TRADDR_SIZE);
1672 }
1673}
1674
1675static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl)
1676{
1962 if (ctrl->pi_support)
1963 return NVMET_RDMA_MAX_METADATA_MDTS;
1964 return NVMET_RDMA_MAX_MDTS;
1965}
1966
1967static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
1968 .owner = THIS_MODULE,
1969 .type = NVMF_TRTYPE_RDMA,
1970 .msdbd = 1,
1971 .has_keyed_sgls = 1,
1677 return NVMET_RDMA_MAX_MDTS;
1678}
1679
1680static const struct nvmet_fabrics_ops nvmet_rdma_ops = {
1681 .owner = THIS_MODULE,
1682 .type = NVMF_TRTYPE_RDMA,
1683 .msdbd = 1,
1684 .has_keyed_sgls = 1,
1972 .metadata_support = 1,
1973 .add_port = nvmet_rdma_add_port,
1974 .remove_port = nvmet_rdma_remove_port,
1975 .queue_response = nvmet_rdma_queue_response,
1976 .delete_ctrl = nvmet_rdma_delete_ctrl,
1977 .disc_traddr = nvmet_rdma_disc_port_addr,
1978 .get_mdts = nvmet_rdma_get_mdts,
1979};
1980

--- 74 unchanged lines hidden ---
1685 .add_port = nvmet_rdma_add_port,
1686 .remove_port = nvmet_rdma_remove_port,
1687 .queue_response = nvmet_rdma_queue_response,
1688 .delete_ctrl = nvmet_rdma_delete_ctrl,
1689 .disc_traddr = nvmet_rdma_disc_port_addr,
1690 .get_mdts = nvmet_rdma_get_mdts,
1691};
1692

--- 74 unchanged lines hidden ---