1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2022-2024 Chelsio Communications, Inc. 5 * Written by: John Baldwin <jhb@FreeBSD.org> 6 */ 7 8 #include <sys/param.h> 9 #include <sys/kernel.h> 10 #include <sys/limits.h> 11 #include <sys/lock.h> 12 #include <sys/malloc.h> 13 #include <sys/mbuf.h> 14 #include <sys/module.h> 15 #include <sys/nv.h> 16 #include <sys/refcount.h> 17 #include <sys/sysctl.h> 18 #include <sys/sx.h> 19 #include <dev/nvme/nvme.h> 20 #include <dev/nvmf/nvmf.h> 21 #include <dev/nvmf/nvmf_transport.h> 22 #include <dev/nvmf/nvmf_transport_internal.h> 23 24 /* Transport-independent support for fabrics queue pairs and commands. */ 25 26 struct nvmf_transport { 27 struct nvmf_transport_ops *nt_ops; 28 29 volatile u_int nt_active_qpairs; 30 SLIST_ENTRY(nvmf_transport) nt_link; 31 }; 32 33 /* nvmf_transports[nvmf_trtype] is sorted by priority */ 34 static SLIST_HEAD(, nvmf_transport) nvmf_transports[NVMF_TRTYPE_TCP + 1]; 35 static struct sx nvmf_transports_lock; 36 37 static MALLOC_DEFINE(M_NVMF_TRANSPORT, "nvmf_xport", 38 "NVMe over Fabrics transport"); 39 40 SYSCTL_NODE(_kern, OID_AUTO, nvmf, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 41 "NVMe over Fabrics"); 42 43 static bool 44 nvmf_supported_trtype(enum nvmf_trtype trtype) 45 { 46 return (trtype < nitems(nvmf_transports)); 47 } 48 49 struct nvmf_qpair * 50 nvmf_allocate_qpair(enum nvmf_trtype trtype, bool controller, 51 const nvlist_t *params, nvmf_qpair_error_t *error_cb, void *error_cb_arg, 52 nvmf_capsule_receive_t *receive_cb, void *receive_cb_arg) 53 { 54 struct nvmf_transport *nt; 55 struct nvmf_qpair *qp; 56 57 if (!nvmf_supported_trtype(trtype)) 58 return (NULL); 59 60 sx_slock(&nvmf_transports_lock); 61 SLIST_FOREACH(nt, &nvmf_transports[trtype], nt_link) { 62 qp = nt->nt_ops->allocate_qpair(controller, params); 63 if (qp != NULL) { 64 refcount_acquire(&nt->nt_active_qpairs); 65 break; 66 } 67 } 68 sx_sunlock(&nvmf_transports_lock); 69 if (qp == NULL) 70 return (NULL); 71 72 qp->nq_transport = nt; 73 qp->nq_ops = nt->nt_ops; 74 qp->nq_controller = controller; 75 qp->nq_error = error_cb; 76 qp->nq_error_arg = error_cb_arg; 77 qp->nq_receive = receive_cb; 78 qp->nq_receive_arg = receive_cb_arg; 79 qp->nq_admin = nvlist_get_bool(params, "admin"); 80 return (qp); 81 } 82 83 void 84 nvmf_free_qpair(struct nvmf_qpair *qp) 85 { 86 struct nvmf_transport *nt; 87 88 nt = qp->nq_transport; 89 qp->nq_ops->free_qpair(qp); 90 if (refcount_release(&nt->nt_active_qpairs)) 91 wakeup(nt); 92 } 93 94 struct nvmf_capsule * 95 nvmf_allocate_command(struct nvmf_qpair *qp, const void *sqe, int how) 96 { 97 struct nvmf_capsule *nc; 98 99 KASSERT(how == M_WAITOK || how == M_NOWAIT, 100 ("%s: invalid how", __func__)); 101 nc = qp->nq_ops->allocate_capsule(qp, how); 102 if (nc == NULL) 103 return (NULL); 104 105 nc->nc_qpair = qp; 106 nc->nc_qe_len = sizeof(struct nvme_command); 107 memcpy(&nc->nc_sqe, sqe, nc->nc_qe_len); 108 109 /* 4.2 of NVMe base spec: Fabrics always uses SGL. */ 110 nc->nc_sqe.fuse &= ~NVMEM(NVME_CMD_PSDT); 111 nc->nc_sqe.fuse |= NVMEF(NVME_CMD_PSDT, NVME_PSDT_SGL); 112 return (nc); 113 } 114 115 struct nvmf_capsule * 116 nvmf_allocate_response(struct nvmf_qpair *qp, const void *cqe, int how) 117 { 118 struct nvmf_capsule *nc; 119 120 KASSERT(how == M_WAITOK || how == M_NOWAIT, 121 ("%s: invalid how", __func__)); 122 nc = qp->nq_ops->allocate_capsule(qp, how); 123 if (nc == NULL) 124 return (NULL); 125 126 nc->nc_qpair = qp; 127 nc->nc_qe_len = sizeof(struct nvme_completion); 128 memcpy(&nc->nc_cqe, cqe, nc->nc_qe_len); 129 return (nc); 130 } 131 132 int 133 nvmf_capsule_append_data(struct nvmf_capsule *nc, struct memdesc *mem, 134 size_t len, bool send, nvmf_io_complete_t *complete_cb, 135 void *cb_arg) 136 { 137 if (nc->nc_data.io_len != 0) 138 return (EBUSY); 139 140 nc->nc_send_data = send; 141 nc->nc_data.io_mem = *mem; 142 nc->nc_data.io_len = len; 143 nc->nc_data.io_complete = complete_cb; 144 nc->nc_data.io_complete_arg = cb_arg; 145 return (0); 146 } 147 148 void 149 nvmf_free_capsule(struct nvmf_capsule *nc) 150 { 151 nc->nc_qpair->nq_ops->free_capsule(nc); 152 } 153 154 int 155 nvmf_transmit_capsule(struct nvmf_capsule *nc) 156 { 157 return (nc->nc_qpair->nq_ops->transmit_capsule(nc)); 158 } 159 160 void 161 nvmf_abort_capsule_data(struct nvmf_capsule *nc, int error) 162 { 163 if (nc->nc_data.io_len != 0) 164 nvmf_complete_io_request(&nc->nc_data, 0, error); 165 } 166 167 void * 168 nvmf_capsule_sqe(struct nvmf_capsule *nc) 169 { 170 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 171 ("%s: capsule %p is not a command capsule", __func__, nc)); 172 return (&nc->nc_sqe); 173 } 174 175 void * 176 nvmf_capsule_cqe(struct nvmf_capsule *nc) 177 { 178 KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), 179 ("%s: capsule %p is not a response capsule", __func__, nc)); 180 return (&nc->nc_cqe); 181 } 182 183 bool 184 nvmf_sqhd_valid(struct nvmf_capsule *nc) 185 { 186 KASSERT(nc->nc_qe_len == sizeof(struct nvme_completion), 187 ("%s: capsule %p is not a response capsule", __func__, nc)); 188 return (nc->nc_sqhd_valid); 189 } 190 191 uint8_t 192 nvmf_validate_command_capsule(struct nvmf_capsule *nc) 193 { 194 KASSERT(nc->nc_qe_len == sizeof(struct nvme_command), 195 ("%s: capsule %p is not a command capsule", __func__, nc)); 196 197 if (NVMEV(NVME_CMD_PSDT, nc->nc_sqe.fuse) != NVME_PSDT_SGL) 198 return (NVME_SC_INVALID_FIELD); 199 200 return (nc->nc_qpair->nq_ops->validate_command_capsule(nc)); 201 } 202 203 size_t 204 nvmf_capsule_data_len(const struct nvmf_capsule *nc) 205 { 206 return (nc->nc_qpair->nq_ops->capsule_data_len(nc)); 207 } 208 209 int 210 nvmf_receive_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 211 struct memdesc *mem, size_t len, nvmf_io_complete_t *complete_cb, 212 void *cb_arg) 213 { 214 struct nvmf_io_request io; 215 216 io.io_mem = *mem; 217 io.io_len = len; 218 io.io_complete = complete_cb; 219 io.io_complete_arg = cb_arg; 220 return (nc->nc_qpair->nq_ops->receive_controller_data(nc, data_offset, 221 &io)); 222 } 223 224 u_int 225 nvmf_send_controller_data(struct nvmf_capsule *nc, uint32_t data_offset, 226 struct mbuf *m, size_t len) 227 { 228 MPASS(m_length(m, NULL) == len); 229 return (nc->nc_qpair->nq_ops->send_controller_data(nc, data_offset, m, 230 len)); 231 } 232 233 int 234 nvmf_pack_ioc_nvlist(const nvlist_t *nvl, struct nvmf_ioc_nv *nv) 235 { 236 void *packed; 237 int error; 238 239 error = nvlist_error(nvl); 240 if (error != 0) 241 return (error); 242 243 if (nv->size == 0) { 244 nv->len = nvlist_size(nvl); 245 } else { 246 packed = nvlist_pack(nvl, &nv->len); 247 if (packed == NULL) 248 error = ENOMEM; 249 else if (nv->len > nv->size) 250 error = EFBIG; 251 else 252 error = copyout(packed, nv->data, nv->len); 253 free(packed, M_NVLIST); 254 } 255 return (error); 256 } 257 258 int 259 nvmf_unpack_ioc_nvlist(const struct nvmf_ioc_nv *nv, nvlist_t **nvlp) 260 { 261 void *packed; 262 nvlist_t *nvl; 263 int error; 264 265 packed = malloc(nv->size, M_NVMF_TRANSPORT, M_WAITOK); 266 error = copyin(nv->data, packed, nv->size); 267 if (error != 0) { 268 free(packed, M_NVMF_TRANSPORT); 269 return (error); 270 } 271 272 nvl = nvlist_unpack(packed, nv->size, 0); 273 free(packed, M_NVMF_TRANSPORT); 274 if (nvl == NULL) 275 return (EINVAL); 276 277 *nvlp = nvl; 278 return (0); 279 } 280 281 bool 282 nvmf_validate_qpair_nvlist(const nvlist_t *nvl, bool controller) 283 { 284 uint64_t value, qsize; 285 bool admin, valid; 286 287 valid = true; 288 valid &= nvlist_exists_bool(nvl, "admin"); 289 valid &= nvlist_exists_bool(nvl, "sq_flow_control"); 290 valid &= nvlist_exists_number(nvl, "qsize"); 291 valid &= nvlist_exists_number(nvl, "sqhd"); 292 if (!controller) 293 valid &= nvlist_exists_number(nvl, "sqtail"); 294 if (!valid) 295 return (false); 296 297 admin = nvlist_get_bool(nvl, "admin"); 298 qsize = nvlist_get_number(nvl, "qsize"); 299 if (admin) { 300 if (qsize < NVME_MIN_ADMIN_ENTRIES || 301 qsize > NVME_MAX_ADMIN_ENTRIES) 302 return (false); 303 } else { 304 if (qsize < NVME_MIN_IO_ENTRIES || qsize > NVME_MAX_IO_ENTRIES) 305 return (false); 306 } 307 value = nvlist_get_number(nvl, "sqhd"); 308 if (value > qsize - 1) 309 return (false); 310 if (!controller) { 311 value = nvlist_get_number(nvl, "sqtail"); 312 if (value > qsize - 1) 313 return (false); 314 } 315 316 return (true); 317 } 318 319 int 320 nvmf_transport_module_handler(struct module *mod, int what, void *arg) 321 { 322 struct nvmf_transport_ops *ops = arg; 323 struct nvmf_transport *nt, *nt2, *prev; 324 int error; 325 326 switch (what) { 327 case MOD_LOAD: 328 if (!nvmf_supported_trtype(ops->trtype)) { 329 printf("NVMF: Unsupported transport %u", ops->trtype); 330 return (EINVAL); 331 } 332 333 nt = malloc(sizeof(*nt), M_NVMF_TRANSPORT, M_WAITOK | M_ZERO); 334 nt->nt_ops = arg; 335 336 sx_xlock(&nvmf_transports_lock); 337 if (SLIST_EMPTY(&nvmf_transports[ops->trtype])) { 338 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], nt, 339 nt_link); 340 } else { 341 prev = NULL; 342 SLIST_FOREACH(nt2, &nvmf_transports[ops->trtype], 343 nt_link) { 344 if (ops->priority > nt2->nt_ops->priority) 345 break; 346 prev = nt2; 347 } 348 if (prev == NULL) 349 SLIST_INSERT_HEAD(&nvmf_transports[ops->trtype], 350 nt, nt_link); 351 else 352 SLIST_INSERT_AFTER(prev, nt, nt_link); 353 } 354 sx_xunlock(&nvmf_transports_lock); 355 return (0); 356 357 case MOD_QUIESCE: 358 if (!nvmf_supported_trtype(ops->trtype)) 359 return (0); 360 361 sx_slock(&nvmf_transports_lock); 362 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 363 if (nt->nt_ops == ops) 364 break; 365 } 366 if (nt == NULL) { 367 sx_sunlock(&nvmf_transports_lock); 368 return (0); 369 } 370 if (nt->nt_active_qpairs != 0) { 371 sx_sunlock(&nvmf_transports_lock); 372 return (EBUSY); 373 } 374 sx_sunlock(&nvmf_transports_lock); 375 return (0); 376 377 case MOD_UNLOAD: 378 if (!nvmf_supported_trtype(ops->trtype)) 379 return (0); 380 381 sx_xlock(&nvmf_transports_lock); 382 prev = NULL; 383 SLIST_FOREACH(nt, &nvmf_transports[ops->trtype], nt_link) { 384 if (nt->nt_ops == ops) 385 break; 386 prev = nt; 387 } 388 if (nt == NULL) { 389 sx_xunlock(&nvmf_transports_lock); 390 return (0); 391 } 392 393 if (prev == NULL) 394 SLIST_REMOVE_HEAD(&nvmf_transports[ops->trtype], 395 nt_link); 396 else 397 SLIST_REMOVE_AFTER(prev, nt_link); 398 399 error = 0; 400 while (nt->nt_active_qpairs != 0 && error == 0) 401 error = sx_sleep(nt, &nvmf_transports_lock, PCATCH, 402 "nftunld", 0); 403 sx_xunlock(&nvmf_transports_lock); 404 if (error != 0) 405 return (error); 406 free(nt, M_NVMF_TRANSPORT); 407 return (0); 408 409 default: 410 return (EOPNOTSUPP); 411 } 412 } 413 414 static int 415 nvmf_transport_modevent(module_t mod __unused, int what, void *arg __unused) 416 { 417 switch (what) { 418 case MOD_LOAD: 419 for (u_int i = 0; i < nitems(nvmf_transports); i++) 420 SLIST_INIT(&nvmf_transports[i]); 421 sx_init(&nvmf_transports_lock, "nvmf transports"); 422 return (0); 423 default: 424 return (EOPNOTSUPP); 425 } 426 } 427 428 static moduledata_t nvmf_transport_mod = { 429 "nvmf_transport", 430 nvmf_transport_modevent, 431 0 432 }; 433 434 DECLARE_MODULE(nvmf_transport, nvmf_transport_mod, SI_SUB_DRIVERS, 435 SI_ORDER_FIRST); 436 MODULE_VERSION(nvmf_transport, 1); 437