1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/kernel.h> 10 #include <sys/limits.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/module.h> 15 #include <sys/refcount.h> 16 #include <sys/sysctl.h> 17 #include <sys/sx.h> 18 #include <dev/nvme/nvme.h> 19 #include <dev/nvmf/nvmf.h> 20 #include <dev/nvmf/nvmf_transport.h> 21 #include <dev/nvmf/nvmf_transport_internal.h> 22 23 /* Transport-independent support for fabrics queue pairs and commands. */ 24 25 struct nvmf_transport { 26 struct nvmf_transport_ops *nt_ops; 27 28 volatile u_int nt_active_qpairs; 29 SLIST_ENTRY(nvmf_transport) nt_link; 30 }; 31 32 /* nvmf_transports[nvmf_trtype] is sorted by priority */ 33 static SLIST_HEAD(, nvmf_transport) nvmf_transports[NVMF_TRTYPE_TCP + 1]; 34 static struct sx nvmf_transports_lock; 35 36 static MALLOC_DEFINE(M_NVMF_TRANSPORT, "nvmf_xport", 37 "NVMe over Fabrics transport"); 38 39 SYSCTL_NODE(_kern, OID_AUTO, nvmf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 40 "NVMe over Fabrics"); 41 42 static bool 43 nvmf_supported_trtype(enum nvmf_trtype trtype) 44 { 45 return (trtype < nitems(nvmf_transports)); 46 } 47 48 struct nvmf_qpair * 49 nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, 50 const struct nvmf_handoff_qpair_params *params, 51 nvmf_qpair_error_t *error_cb, void *error_cb_arg, 52 nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg) 53 { 54 struct nvmf_transport *nt; 55 struct nvmf_qpair *qp; 56 57 if (!nvmf_supported_trtype(trtype)) 58 return (NULL); 59 60 sx_slock(&nvmf_transports_lock); 61 SLIST_FOREACH(nt, &nvmf_transports[trtype], nt_link) { 62 qp = nt->nt_ops->allocate_qpair(controller, params); 63 if (qp != NULL) { 64 refcount_acquire(&nt->nt_active_qpairs); 65 break; 66 } 67 } 68 sx_sunlock(&nvmf_transports_lock); 69 if (qp == NULL) 70 return (NULL); 71 72 qp->nq_transport = nt; 73 qp->nq_ops = nt->nt_ops; 74 qp->nq_controller = controller; 75 qp->nq_error = error_cb; 76 qp->nq_error_arg = error_cb_arg; 77 qp->nq_receive = receive_cb; 78 qp->nq_receive_arg = receive_cb_arg; 79 qp->nq_admin = params->admin; 80 return (qp); 81 } 82 83 void 84 nvmf_free_qpair(struct nvmf_qpair *qp) 85 { 86 struct nvmf_transport *nt; 87 88 nt = qp->nq_transport; 89 qp->nq_ops->free_qpair(qp); 90 if (refcount_release(&nt->nt_active_qpairs)) 91 wakeup(nt); 92 } 93 94 struct nvmf_capsule * 95 nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how) 96 { 97 struct nvmf_capsule *nc; 98 99 KASSERT(how == M_WAITOK || how == M_NOWAIT, 100 ("%s: invalid how", __func__)); 101 nc = qp->nq_ops->allocate_capsule(qp, how); 102 if (nc == NULL) 103 return (NULL); 104 105 nc->nc_qpair = qp; 106 nc->nc_qe_len = sizeof(struct nvme_command); 107 memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len); 108 109 /* 4.2 of NVMe base spec: Fabrics always uses SGL. */ 110 nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT); 111 nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL); 112 return (nc); 113 } 114 115 struct nvmf_capsule * 116 nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how) 117 { 118 struct nvmf_capsule *nc; 119 120 KASSERT(how == M_WAITOK || how == M_NOWAIT, 121 ("%s: invalid how", __func__)); 122 nc = qp->nq_ops->allocate_capsule(qp, how); 123 if (nc == NULL) 124 return (NULL); 125 126 nc->nc_qpair = qp; 127 nc->nc_qe_len = sizeof(struct nvme_completion); 128 memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len); 129 return (nc); 130 } 131 132 int 133 nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem, 134 size_t len, bool send, nvmf_io_complete_t *complete_cb, 135 void *cb_arg) 136 { 137 if (nc->nc_data.io_len != 0) 138 return (EBUSY); 139 140 nc->nc_send_data = send; 141 nc->nc_data.io_mem = *mem; 142 nc->nc_data.io_len = len; 143 nc->nc_data.io_complete = complete_cb; 144 nc->nc_data.io_complete_arg = cb_arg; 145 return (0); 146 } 147 148 void 149 nvmf_free_capsule(struct nvmf_capsule *nc) 150 { 151 nc->nc_qpair->nq_ops->free_capsule(nc); 152 } 153 154 int 155 nvmf_transmit_capsule(struct nvmf_capsule *nc) 156 { 157 return (nc->nc_qpair->nq_ops->transmit_capsule(nc)); 158 } 159 160 void 161 nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error) 162 { 163 if (nc->nc_data.io_len != 0) 164 nvmf_complete_io_request(&nc->nc_data, 0, error); 165 } 166 167 void * 168 nvmf_capsule_sqe(struct nvmf_capsule *nc) 169 { 170 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 171 ("%s: capsule %p is not a command capsule", __func__, nc)); 172 return (&nc->nc_sqe); 173 } 174 175 void * 176 nvmf_capsule_cqe(struct nvmf_capsule *nc) 177 { 178 KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), 179 ("%s: capsule %p is not a response capsule", __func__, nc)); 180 return (&nc->nc_cqe); 181 } 182 183 bool 184 nvmf_sqhd_valid(struct nvmf_capsule *nc) 185 { 186 KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), 187 ("%s: capsule %p is not a response capsule", __func__, nc)); 188 return (nc->nc_sqhd_valid); 189 } 190 191 uint8_t 192 nvmf_validate_command_capsule(struct nvmf_capsule *nc) 193 { 194 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 195 ("%s: capsule %p is not a command capsule", __func__, nc)); 196 197 if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL) 198 return (NVME_SC_INVALID_FIELD); 199 200 return (nc->nc_qpair->nq_ops->validate_command_capsule(nc)); 201 } 202 203 size_t 204 nvmf_capsule_data_len(const struct nvmf_capsule *nc) 205 { 206 return (nc->nc_qpair->nq_ops->capsule_data_len(nc)); 207 } 208 209 int 210 nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 211 struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb, 212 void *cb_arg) 213 { 214 struct nvmf_io_request io; 215 216 io.io_mem = *mem; 217 io.io_len = len; 218 io.io_complete = complete_cb; 219 io.io_complete_arg = cb_arg; 220 return (nc->nc_qpair->nq_ops->receive_controller_data(nc, data_offset, 221 &io)); 222 } 223 224 u_int 225 nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 226 struct mbuf *m, size_t len) 227 { 228 MPASS(m_length(m, NULL) == len); 229 return (nc->nc_qpair->nq_ops->send_controller_data(nc, data_offset, m, 230 len)); 231 } 232 233 int 234 nvmf_transport_module_handler(struct module *mod, int what, void *arg) 235 { 236 struct nvmf_transport_ops *ops = arg; 237 struct nvmf_transport *nt, *nt2, *prev; 238 int error; 239 240 switch (what) { 241 case MOD_LOAD: 242 if (!nvmf_supported_trtype(ops->trtype)) { 243 printf("NVMF: Unsupported transport %u", ops->trtype); 244 return (EINVAL); 245 } 246 247 nt = malloc(sizeof(*nt), M_NVMF_TRANSPORT, M_WAITOK | M_ZERO); 248 nt->nt_ops = arg; 249 250 sx_xlock(&nvmf_transports_lock); 251 if (SLIST_EMPTY(&nvmf_transports[ops->trtype])) { 252 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt, 253 nt_link); 254 } else { 255 prev = NULL; 256 SLIST_FOREACH(nt2, &nvmf_transports[ops->trtype], 257 nt_link) { 258 if (ops->priority > nt2->nt_ops->priority) 259 break; 260 prev = nt2; 261 } 262 if (prev == NULL) 263 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], 264 nt, nt_link); 265 else 266 SLIST_INSERT_AFTER(prev, nt, nt_link); 267 } 268 sx_xunlock(&nvmf_transports_lock); 269 return (0); 270 271 case MOD_QUIESCE: 272 if (!nvmf_supported_trtype(ops->trtype)) 273 return (0); 274 275 sx_slock(&nvmf_transports_lock); 276 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 277 if (nt->nt_ops == ops) 278 break; 279 } 280 if (nt == NULL) { 281 sx_sunlock(&nvmf_transports_lock); 282 return (0); 283 } 284 if (nt->nt_active_qpairs != 0) { 285 sx_sunlock(&nvmf_transports_lock); 286 return (EBUSY); 287 } 288 sx_sunlock(&nvmf_transports_lock); 289 return (0); 290 291 case MOD_UNLOAD: 292 if (!nvmf_supported_trtype(ops->trtype)) 293 return (0); 294 295 sx_xlock(&nvmf_transports_lock); 296 prev = NULL; 297 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 298 if (nt->nt_ops == ops) 299 break; 300 prev = nt; 301 } 302 if (nt == NULL) { 303 sx_xunlock(&nvmf_transports_lock); 304 return (0); 305 } 306 307 if (prev == NULL) 308 SLIST_REMOVE_HEAD(&nvmf_transports[ops->trtype], 309 nt_link); 310 else 311 SLIST_REMOVE_AFTER(prev, nt_link); 312 313 error = 0; 314 while (nt->nt_active_qpairs != 0 && error == 0) 315 error = sx_sleep(nt, &nvmf_transports_lock, PCATCH, 316 "nftunld", 0); 317 sx_xunlock(&nvmf_transports_lock); 318 if (error != 0) 319 return (error); 320 free(nt, M_NVMF_TRANSPORT); 321 return (0); 322 323 default: 324 return (EOPNOTSUPP); 325 } 326 } 327 328 static int 329 nvmf_transport_modevent(module_t mod __unused, int what, void *arg __unused) 330 { 331 switch (what) { 332 case MOD_LOAD: 333 for (u_int i = 0; i < nitems(nvmf_transports); i++) 334 SLIST_INIT(&nvmf_transports[i]); 335 sx_init(&nvmf_transports_lock, "nvmf transports"); 336 return (0); 337 default: 338 return (EOPNOTSUPP); 339 } 340 } 341 342 static moduledata_t nvmf_transport_mod = { 343 "nvmf_transport", 344 nvmf_transport_modevent, 345 0 346 }; 347 348 DECLARE_MODULE(nvmf_transport, nvmf_transport_mod, SI_SUB_DRIVERS, 349 SI_ORDER_FIRST); 350 MODULE_VERSION(nvmf_transport, 1); 351