1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/kernel.h> 10 #include <sys/limits.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/module.h> 15 #include <sys/refcount.h> 16 #include <sys/sysctl.h> 17 #include <sys/sx.h> 18 #include <dev/nvme/nvme.h> 19 #include <dev/nvmf/nvmf.h> 20 #include <dev/nvmf/nvmf_transport.h> 21 #include <dev/nvmf/nvmf_transport_internal.h> 22 23 /* Transport-independent support for fabrics queue pairs and commands. */ 24 25 struct nvmf_transport { 26 struct nvmf_transport_ops *nt_ops; 27 28 volatile u_int nt_active_qpairs; 29 SLIST_ENTRY(nvmf_transport) nt_link; 30 }; 31 32 /* nvmf_transports[nvmf_trtype] is sorted by priority */ 33 static SLIST_HEAD(, nvmf_transport) nvmf_transports[NVMF_TRTYPE_TCP + 1]; 34 static struct sx nvmf_transports_lock; 35 36 static MALLOC_DEFINE(M_NVMF_TRANSPORT, "nvmf_xport", 37 "NVMe over Fabrics transport"); 38 39 SYSCTL_NODE(_kern, OID_AUTO, nvmf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 40 "NVMe over Fabrics"); 41 42 static bool 43 nvmf_supported_trtype(enum nvmf_trtype trtype) 44 { 45 return (trtype < nitems(nvmf_transports)); 46 } 47 48 struct nvmf_qpair * 49 nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, 50 const struct nvmf_handoff_qpair_params *params, 51 nvmf_qpair_error_t *error_cb, void *error_cb_arg, 52 nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg) 53 { 54 struct nvmf_transport *nt; 55 struct nvmf_qpair *qp; 56 57 if (!nvmf_supported_trtype(trtype)) 58 return (NULL); 59 60 sx_slock(&nvmf_transports_lock); 61 SLIST_FOREACH(nt, &nvmf_transports[trtype], nt_link) { 62 qp = nt->nt_ops->allocate_qpair(controller, params); 63 if (qp != NULL) { 64 refcount_acquire(&nt->nt_active_qpairs); 65 break; 66 } 67 } 68 sx_sunlock(&nvmf_transports_lock); 69 if (qp == NULL) 70 return (NULL); 71 72 qp->nq_transport = nt; 73 qp->nq_ops = nt->nt_ops; 74 qp->nq_controller = controller; 75 qp->nq_error = error_cb; 76 qp->nq_error_arg = error_cb_arg; 77 qp->nq_receive = receive_cb; 78 qp->nq_receive_arg = receive_cb_arg; 79 qp->nq_admin = params->admin; 80 return (qp); 81 } 82 83 void 84 nvmf_free_qpair(struct nvmf_qpair *qp) 85 { 86 struct nvmf_transport *nt; 87 88 nt = qp->nq_transport; 89 qp->nq_ops->free_qpair(qp); 90 if (refcount_release(&nt->nt_active_qpairs)) 91 wakeup(nt); 92 } 93 94 struct nvmf_capsule * 95 nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how) 96 { 97 struct nvmf_capsule *nc; 98 99 KASSERT(how == M_WAITOK || how == M_NOWAIT, 100 ("%s: invalid how", __func__)); 101 nc = qp->nq_ops->allocate_capsule(qp, how); 102 if (nc == NULL) 103 return (NULL); 104 105 nc->nc_qpair = qp; 106 nc->nc_qe_len = sizeof(struct nvme_command); 107 memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len); 108 109 /* 4.2 of NVMe base spec: Fabrics always uses SGL. */ 110 nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT); 111 nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL); 112 return (nc); 113 } 114 115 struct nvmf_capsule * 116 nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how) 117 { 118 struct nvmf_capsule *nc; 119 120 KASSERT(how == M_WAITOK || how == M_NOWAIT, 121 ("%s: invalid how", __func__)); 122 nc = qp->nq_ops->allocate_capsule(qp, how); 123 if (nc == NULL) 124 return (NULL); 125 126 nc->nc_qpair = qp; 127 nc->nc_qe_len = sizeof(struct nvme_completion); 128 memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len); 129 return (nc); 130 } 131 132 int 133 nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem, 134 size_t len, bool send, nvmf_io_complete_t *complete_cb, 135 void *cb_arg) 136 { 137 if (nc->nc_data.io_len != 0) 138 return (EBUSY); 139 140 nc->nc_send_data = send; 141 nc->nc_data.io_mem = *mem; 142 nc->nc_data.io_len = len; 143 nc->nc_data.io_complete = complete_cb; 144 nc->nc_data.io_complete_arg = cb_arg; 145 return (0); 146 } 147 148 void 149 nvmf_free_capsule(struct nvmf_capsule *nc) 150 { 151 nc->nc_qpair->nq_ops->free_capsule(nc); 152 } 153 154 int 155 nvmf_transmit_capsule(struct nvmf_capsule *nc) 156 { 157 return (nc->nc_qpair->nq_ops->transmit_capsule(nc)); 158 } 159 160 void 161 nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error) 162 { 163 if (nc->nc_data.io_len != 0) 164 nvmf_complete_io_request(&nc->nc_data, 0, error); 165 } 166 167 void * 168 nvmf_capsule_sqe(struct nvmf_capsule *nc) 169 { 170 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 171 ("%s: capsule %p is not a command capsule", __func__, nc)); 172 return (&nc->nc_sqe); 173 } 174 175 void * 176 nvmf_capsule_cqe(struct nvmf_capsule *nc) 177 { 178 KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), 179 ("%s: capsule %p is not a response capsule", __func__, nc)); 180 return (&nc->nc_cqe); 181 } 182 183 uint8_t 184 nvmf_validate_command_capsule(struct nvmf_capsule *nc) 185 { 186 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 187 ("%s: capsule %p is not a command capsule", __func__, nc)); 188 189 if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL) 190 return (NVME_SC_INVALID_FIELD); 191 192 return (nc->nc_qpair->nq_ops->validate_command_capsule(nc)); 193 } 194 195 size_t 196 nvmf_capsule_data_len(const struct nvmf_capsule *nc) 197 { 198 return (nc->nc_qpair->nq_ops->capsule_data_len(nc)); 199 } 200 201 int 202 nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 203 struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb, 204 void *cb_arg) 205 { 206 struct nvmf_io_request io; 207 208 io.io_mem = *mem; 209 io.io_len = len; 210 io.io_complete = complete_cb; 211 io.io_complete_arg = cb_arg; 212 return (nc->nc_qpair->nq_ops->receive_controller_data(nc, data_offset, 213 &io)); 214 } 215 216 u_int 217 nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 218 struct mbuf *m, size_t len) 219 { 220 MPASS(m_length(m, NULL) == len); 221 return (nc->nc_qpair->nq_ops->send_controller_data(nc, data_offset, m, 222 len)); 223 } 224 225 int 226 nvmf_transport_module_handler(struct module *mod, int what, void *arg) 227 { 228 struct nvmf_transport_ops *ops = arg; 229 struct nvmf_transport *nt, *nt2, *prev; 230 int error; 231 232 switch (what) { 233 case MOD_LOAD: 234 if (!nvmf_supported_trtype(ops->trtype)) { 235 printf("NVMF: Unsupported transport %u", ops->trtype); 236 return (EINVAL); 237 } 238 239 nt = malloc(sizeof(*nt), M_NVMF_TRANSPORT, M_WAITOK | M_ZERO); 240 nt->nt_ops = arg; 241 242 sx_xlock(&nvmf_transports_lock); 243 if (SLIST_EMPTY(&nvmf_transports[ops->trtype])) { 244 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt, 245 nt_link); 246 } else { 247 prev = NULL; 248 SLIST_FOREACH(nt2, &nvmf_transports[ops->trtype], 249 nt_link) { 250 if (ops->priority > nt2->nt_ops->priority) 251 break; 252 prev = nt2; 253 } 254 if (prev == NULL) 255 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], 256 nt, nt_link); 257 else 258 SLIST_INSERT_AFTER(prev, nt, nt_link); 259 } 260 sx_xunlock(&nvmf_transports_lock); 261 return (0); 262 263 case MOD_QUIESCE: 264 if (!nvmf_supported_trtype(ops->trtype)) 265 return (0); 266 267 sx_slock(&nvmf_transports_lock); 268 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 269 if (nt->nt_ops == ops) 270 break; 271 } 272 if (nt == NULL) { 273 sx_sunlock(&nvmf_transports_lock); 274 return (0); 275 } 276 if (nt->nt_active_qpairs != 0) { 277 sx_sunlock(&nvmf_transports_lock); 278 return (EBUSY); 279 } 280 sx_sunlock(&nvmf_transports_lock); 281 return (0); 282 283 case MOD_UNLOAD: 284 if (!nvmf_supported_trtype(ops->trtype)) 285 return (0); 286 287 sx_xlock(&nvmf_transports_lock); 288 prev = NULL; 289 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 290 if (nt->nt_ops == ops) 291 break; 292 prev = nt; 293 } 294 if (nt == NULL) { 295 sx_xunlock(&nvmf_transports_lock); 296 return (0); 297 } 298 299 if (prev == NULL) 300 SLIST_REMOVE_HEAD(&nvmf_transports[ops->trtype], 301 nt_link); 302 else 303 SLIST_REMOVE_AFTER(prev, nt_link); 304 305 error = 0; 306 while (nt->nt_active_qpairs != 0 && error == 0) 307 error = sx_sleep(nt, &nvmf_transports_lock, PCATCH, 308 "nftunld", 0); 309 sx_xunlock(&nvmf_transports_lock); 310 if (error != 0) 311 return (error); 312 free(nt, M_NVMF_TRANSPORT); 313 return (0); 314 315 default: 316 return (EOPNOTSUPP); 317 } 318 } 319 320 static int 321 nvmf_transport_modevent(module_t mod __unused, int what, void *arg __unused) 322 { 323 switch (what) { 324 case MOD_LOAD: 325 for (u_int i = 0; i < nitems(nvmf_transports); i++) 326 SLIST_INIT(&nvmf_transports[i]); 327 sx_init(&nvmf_transports_lock, "nvmf transports"); 328 return (0); 329 default: 330 return (EOPNOTSUPP); 331 } 332 } 333 334 static moduledata_t nvmf_transport_mod = { 335 "nvmf_transport", 336 nvmf_transport_modevent, 337 0 338 }; 339 340 DECLARE_MODULE(nvmf_transport, nvmf_transport_mod, SI_SUB_DRIVERS, 341 SI_ORDER_FIRST); 342 MODULE_VERSION(nvmf_transport, 1); 343