1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * NVMe over Fabrics Persist Reservation. 4 * Copyright (c) 2024 Guixin Liu, Alibaba Group. 5 * All rights reserved. 6 */ 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 #include <linux/unaligned.h> 9 #include "nvmet.h" 10 11 #define NVMET_PR_NOTIFI_MASK_ALL \ 12 (1 << NVME_PR_NOTIFY_BIT_REG_PREEMPTED | \ 13 1 << NVME_PR_NOTIFY_BIT_RESV_RELEASED | \ 14 1 << NVME_PR_NOTIFY_BIT_RESV_PREEMPTED) 15 16 static inline bool nvmet_pr_parse_ignore_key(u32 cdw10) 17 { 18 /* Ignore existing key, bit 03. */ 19 return (cdw10 >> 3) & 1; 20 } 21 22 static inline struct nvmet_ns *nvmet_pr_to_ns(struct nvmet_pr *pr) 23 { 24 return container_of(pr, struct nvmet_ns, pr); 25 } 26 27 static struct nvmet_pr_registrant * 28 nvmet_pr_find_registrant(struct nvmet_pr *pr, uuid_t *hostid) 29 { 30 struct nvmet_pr_registrant *reg; 31 32 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 33 if (uuid_equal(®->hostid, hostid)) 34 return reg; 35 } 36 return NULL; 37 } 38 39 u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask) 40 { 41 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 42 struct nvmet_ctrl *ctrl = req->sq->ctrl; 43 struct nvmet_ns *ns; 44 unsigned long idx; 45 u16 status; 46 47 if (mask & ~(NVMET_PR_NOTIFI_MASK_ALL)) { 48 req->error_loc = offsetof(struct nvme_common_command, cdw11); 49 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 50 } 51 52 if (nsid != U32_MAX) { 53 status = nvmet_req_find_ns(req); 54 if (status) 55 return status; 56 if (!req->ns->pr.enable) 57 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 58 59 WRITE_ONCE(req->ns->pr.notify_mask, mask); 60 goto success; 61 } 62 63 xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 64 if (ns->pr.enable) 65 WRITE_ONCE(ns->pr.notify_mask, mask); 66 } 67 68 success: 69 nvmet_set_result(req, mask); 70 return NVME_SC_SUCCESS; 71 } 72 73 u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req) 74 { 75 u16 status; 76 77 status = nvmet_req_find_ns(req); 78 if (status) 79 return status; 80 81 if (!req->ns->pr.enable) 82 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 83 84 nvmet_set_result(req, READ_ONCE(req->ns->pr.notify_mask)); 85 return status; 86 } 87 88 void nvmet_execute_get_log_page_resv(struct nvmet_req *req) 89 { 90 struct nvmet_pr_log_mgr *log_mgr = &req->sq->ctrl->pr_log_mgr; 91 struct nvme_pr_log next_log = {0}; 92 struct nvme_pr_log log = {0}; 93 u16 status = NVME_SC_SUCCESS; 94 u64 lost_count; 95 u64 cur_count; 96 u64 next_count; 97 98 mutex_lock(&log_mgr->lock); 99 if (!kfifo_get(&log_mgr->log_queue, &log)) 100 goto out; 101 102 /* 103 * We can't get the last in kfifo. 104 * Utilize the current count and the count from the next log to 105 * calculate the number of lost logs, while also addressing cases 106 * of overflow. If there is no subsequent log, the number of lost 107 * logs is equal to the lost_count within the nvmet_pr_log_mgr. 108 */ 109 cur_count = le64_to_cpu(log.count); 110 if (kfifo_peek(&log_mgr->log_queue, &next_log)) { 111 next_count = le64_to_cpu(next_log.count); 112 if (next_count > cur_count) 113 lost_count = next_count - cur_count - 1; 114 else 115 lost_count = U64_MAX - cur_count + next_count - 1; 116 } else { 117 lost_count = log_mgr->lost_count; 118 } 119 120 log.count = cpu_to_le64((cur_count + lost_count) == 0 ? 121 1 : (cur_count + lost_count)); 122 log_mgr->lost_count -= lost_count; 123 124 log.nr_pages = kfifo_len(&log_mgr->log_queue); 125 126 out: 127 status = nvmet_copy_to_sgl(req, 0, &log, sizeof(log)); 128 mutex_unlock(&log_mgr->lock); 129 nvmet_req_complete(req, status); 130 } 131 132 static void nvmet_pr_add_resv_log(struct nvmet_ctrl *ctrl, u8 log_type, 133 u32 nsid) 134 { 135 struct nvmet_pr_log_mgr *log_mgr = &ctrl->pr_log_mgr; 136 struct nvme_pr_log log = {0}; 137 138 mutex_lock(&log_mgr->lock); 139 log_mgr->counter++; 140 if (log_mgr->counter == 0) 141 log_mgr->counter = 1; 142 143 log.count = cpu_to_le64(log_mgr->counter); 144 log.type = log_type; 145 log.nsid = cpu_to_le32(nsid); 146 147 if (!kfifo_put(&log_mgr->log_queue, log)) { 148 pr_info("a reservation log lost, cntlid:%d, log_type:%d, nsid:%d\n", 149 ctrl->cntlid, log_type, nsid); 150 log_mgr->lost_count++; 151 } 152 153 mutex_unlock(&log_mgr->lock); 154 } 155 156 static void nvmet_pr_resv_released(struct nvmet_pr *pr, uuid_t *hostid) 157 { 158 struct nvmet_ns *ns = nvmet_pr_to_ns(pr); 159 struct nvmet_subsys *subsys = ns->subsys; 160 struct nvmet_ctrl *ctrl; 161 162 if (test_bit(NVME_PR_NOTIFY_BIT_RESV_RELEASED, &pr->notify_mask)) 163 return; 164 165 mutex_lock(&subsys->lock); 166 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 167 if (!uuid_equal(&ctrl->hostid, hostid) && 168 nvmet_pr_find_registrant(pr, &ctrl->hostid)) { 169 nvmet_pr_add_resv_log(ctrl, 170 NVME_PR_LOG_RESERVATION_RELEASED, ns->nsid); 171 nvmet_add_async_event(ctrl, NVME_AER_CSS, 172 NVME_AEN_RESV_LOG_PAGE_AVALIABLE, 173 NVME_LOG_RESERVATION); 174 } 175 } 176 mutex_unlock(&subsys->lock); 177 } 178 179 static void nvmet_pr_send_event_to_host(struct nvmet_pr *pr, uuid_t *hostid, 180 u8 log_type) 181 { 182 struct nvmet_ns *ns = nvmet_pr_to_ns(pr); 183 struct nvmet_subsys *subsys = ns->subsys; 184 struct nvmet_ctrl *ctrl; 185 186 mutex_lock(&subsys->lock); 187 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 188 if (uuid_equal(hostid, &ctrl->hostid)) { 189 nvmet_pr_add_resv_log(ctrl, log_type, ns->nsid); 190 nvmet_add_async_event(ctrl, NVME_AER_CSS, 191 NVME_AEN_RESV_LOG_PAGE_AVALIABLE, 192 NVME_LOG_RESERVATION); 193 } 194 } 195 mutex_unlock(&subsys->lock); 196 } 197 198 static void nvmet_pr_resv_preempted(struct nvmet_pr *pr, uuid_t *hostid) 199 { 200 if (test_bit(NVME_PR_NOTIFY_BIT_RESV_PREEMPTED, &pr->notify_mask)) 201 return; 202 203 nvmet_pr_send_event_to_host(pr, hostid, 204 NVME_PR_LOG_RESERVATOIN_PREEMPTED); 205 } 206 207 static void nvmet_pr_registration_preempted(struct nvmet_pr *pr, 208 uuid_t *hostid) 209 { 210 if (test_bit(NVME_PR_NOTIFY_BIT_REG_PREEMPTED, &pr->notify_mask)) 211 return; 212 213 nvmet_pr_send_event_to_host(pr, hostid, 214 NVME_PR_LOG_REGISTRATION_PREEMPTED); 215 } 216 217 static inline void nvmet_pr_set_new_holder(struct nvmet_pr *pr, u8 new_rtype, 218 struct nvmet_pr_registrant *reg) 219 { 220 reg->rtype = new_rtype; 221 rcu_assign_pointer(pr->holder, reg); 222 } 223 224 static u16 nvmet_pr_register(struct nvmet_req *req, 225 struct nvmet_pr_register_data *d) 226 { 227 struct nvmet_ctrl *ctrl = req->sq->ctrl; 228 struct nvmet_pr_registrant *new, *reg; 229 struct nvmet_pr *pr = &req->ns->pr; 230 u16 status = NVME_SC_SUCCESS; 231 u64 nrkey = le64_to_cpu(d->nrkey); 232 233 new = kmalloc(sizeof(*new), GFP_KERNEL); 234 if (!new) 235 return NVME_SC_INTERNAL; 236 237 down(&pr->pr_sem); 238 reg = nvmet_pr_find_registrant(pr, &ctrl->hostid); 239 if (reg) { 240 if (reg->rkey != nrkey) 241 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 242 kfree(new); 243 goto out; 244 } 245 246 memset(new, 0, sizeof(*new)); 247 INIT_LIST_HEAD(&new->entry); 248 new->rkey = nrkey; 249 uuid_copy(&new->hostid, &ctrl->hostid); 250 list_add_tail_rcu(&new->entry, &pr->registrant_list); 251 252 out: 253 up(&pr->pr_sem); 254 return status; 255 } 256 257 static void nvmet_pr_unregister_one(struct nvmet_pr *pr, 258 struct nvmet_pr_registrant *reg) 259 { 260 struct nvmet_pr_registrant *first_reg; 261 struct nvmet_pr_registrant *holder; 262 u8 original_rtype; 263 264 list_del_rcu(®->entry); 265 266 holder = rcu_dereference_protected(pr->holder, 1); 267 if (reg != holder) 268 goto out; 269 270 original_rtype = holder->rtype; 271 if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 272 original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 273 first_reg = list_first_or_null_rcu(&pr->registrant_list, 274 struct nvmet_pr_registrant, entry); 275 if (first_reg) 276 first_reg->rtype = original_rtype; 277 rcu_assign_pointer(pr->holder, first_reg); 278 } else { 279 rcu_assign_pointer(pr->holder, NULL); 280 281 if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_REG_ONLY || 282 original_rtype == NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY) 283 nvmet_pr_resv_released(pr, ®->hostid); 284 } 285 out: 286 kfree_rcu(reg, rcu); 287 } 288 289 static u16 nvmet_pr_unregister(struct nvmet_req *req, 290 struct nvmet_pr_register_data *d, 291 bool ignore_key) 292 { 293 u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 294 struct nvmet_ctrl *ctrl = req->sq->ctrl; 295 struct nvmet_pr *pr = &req->ns->pr; 296 struct nvmet_pr_registrant *reg; 297 298 down(&pr->pr_sem); 299 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 300 if (uuid_equal(®->hostid, &ctrl->hostid)) { 301 if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) { 302 status = NVME_SC_SUCCESS; 303 nvmet_pr_unregister_one(pr, reg); 304 } 305 break; 306 } 307 } 308 up(&pr->pr_sem); 309 310 return status; 311 } 312 313 static void nvmet_pr_update_reg_rkey(struct nvmet_pr_registrant *reg, 314 void *attr) 315 { 316 reg->rkey = *(u64 *)attr; 317 } 318 319 static u16 nvmet_pr_update_reg_attr(struct nvmet_pr *pr, 320 struct nvmet_pr_registrant *reg, 321 void (*change_attr)(struct nvmet_pr_registrant *reg, 322 void *attr), 323 void *attr) 324 { 325 struct nvmet_pr_registrant *holder; 326 struct nvmet_pr_registrant *new; 327 328 holder = rcu_dereference_protected(pr->holder, 1); 329 if (reg != holder) { 330 change_attr(reg, attr); 331 return NVME_SC_SUCCESS; 332 } 333 334 new = kmalloc(sizeof(*new), GFP_ATOMIC); 335 if (!new) 336 return NVME_SC_INTERNAL; 337 338 new->rkey = holder->rkey; 339 new->rtype = holder->rtype; 340 uuid_copy(&new->hostid, &holder->hostid); 341 INIT_LIST_HEAD(&new->entry); 342 343 change_attr(new, attr); 344 list_replace_rcu(&holder->entry, &new->entry); 345 rcu_assign_pointer(pr->holder, new); 346 kfree_rcu(holder, rcu); 347 348 return NVME_SC_SUCCESS; 349 } 350 351 static u16 nvmet_pr_replace(struct nvmet_req *req, 352 struct nvmet_pr_register_data *d, 353 bool ignore_key) 354 { 355 u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 356 struct nvmet_ctrl *ctrl = req->sq->ctrl; 357 struct nvmet_pr *pr = &req->ns->pr; 358 struct nvmet_pr_registrant *reg; 359 u64 nrkey = le64_to_cpu(d->nrkey); 360 361 down(&pr->pr_sem); 362 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 363 if (uuid_equal(®->hostid, &ctrl->hostid)) { 364 if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) 365 status = nvmet_pr_update_reg_attr(pr, reg, 366 nvmet_pr_update_reg_rkey, 367 &nrkey); 368 break; 369 } 370 } 371 up(&pr->pr_sem); 372 return status; 373 } 374 375 static void nvmet_execute_pr_register(struct nvmet_req *req) 376 { 377 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 378 bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 379 struct nvmet_pr_register_data *d; 380 u8 reg_act = cdw10 & 0x07; /* Reservation Register Action, bit 02:00 */ 381 u16 status; 382 383 d = kmalloc(sizeof(*d), GFP_KERNEL); 384 if (!d) { 385 status = NVME_SC_INTERNAL; 386 goto out; 387 } 388 389 status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 390 if (status) 391 goto free_data; 392 393 switch (reg_act) { 394 case NVME_PR_REGISTER_ACT_REG: 395 status = nvmet_pr_register(req, d); 396 break; 397 case NVME_PR_REGISTER_ACT_UNREG: 398 status = nvmet_pr_unregister(req, d, ignore_key); 399 break; 400 case NVME_PR_REGISTER_ACT_REPLACE: 401 status = nvmet_pr_replace(req, d, ignore_key); 402 break; 403 default: 404 req->error_loc = offsetof(struct nvme_common_command, cdw10); 405 status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 406 break; 407 } 408 free_data: 409 kfree(d); 410 out: 411 if (!status) 412 atomic_inc(&req->ns->pr.generation); 413 nvmet_req_complete(req, status); 414 } 415 416 static u16 nvmet_pr_acquire(struct nvmet_req *req, 417 struct nvmet_pr_registrant *reg, 418 u8 rtype) 419 { 420 struct nvmet_pr *pr = &req->ns->pr; 421 struct nvmet_pr_registrant *holder; 422 423 holder = rcu_dereference_protected(pr->holder, 1); 424 if (holder && reg != holder) 425 return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 426 if (holder && reg == holder) { 427 if (holder->rtype == rtype) 428 return NVME_SC_SUCCESS; 429 return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 430 } 431 432 nvmet_pr_set_new_holder(pr, rtype, reg); 433 return NVME_SC_SUCCESS; 434 } 435 436 static void nvmet_pr_confirm_ns_pc_ref(struct percpu_ref *ref) 437 { 438 struct nvmet_pr_per_ctrl_ref *pc_ref = 439 container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); 440 441 complete(&pc_ref->confirm_done); 442 } 443 444 static void nvmet_pr_set_ctrl_to_abort(struct nvmet_req *req, uuid_t *hostid) 445 { 446 struct nvmet_pr_per_ctrl_ref *pc_ref; 447 struct nvmet_ns *ns = req->ns; 448 unsigned long idx; 449 450 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 451 if (uuid_equal(&pc_ref->hostid, hostid)) { 452 percpu_ref_kill_and_confirm(&pc_ref->ref, 453 nvmet_pr_confirm_ns_pc_ref); 454 wait_for_completion(&pc_ref->confirm_done); 455 } 456 } 457 } 458 459 static u16 nvmet_pr_unreg_all_host_by_prkey(struct nvmet_req *req, u64 prkey, 460 uuid_t *send_hostid, 461 bool abort) 462 { 463 u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 464 struct nvmet_pr_registrant *reg, *tmp; 465 struct nvmet_pr *pr = &req->ns->pr; 466 uuid_t hostid; 467 468 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 469 if (reg->rkey == prkey) { 470 status = NVME_SC_SUCCESS; 471 uuid_copy(&hostid, ®->hostid); 472 if (abort) 473 nvmet_pr_set_ctrl_to_abort(req, &hostid); 474 nvmet_pr_unregister_one(pr, reg); 475 if (!uuid_equal(&hostid, send_hostid)) 476 nvmet_pr_registration_preempted(pr, &hostid); 477 } 478 } 479 return status; 480 } 481 482 static void nvmet_pr_unreg_all_others_by_prkey(struct nvmet_req *req, 483 u64 prkey, 484 uuid_t *send_hostid, 485 bool abort) 486 { 487 struct nvmet_pr_registrant *reg, *tmp; 488 struct nvmet_pr *pr = &req->ns->pr; 489 uuid_t hostid; 490 491 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 492 if (reg->rkey == prkey && 493 !uuid_equal(®->hostid, send_hostid)) { 494 uuid_copy(&hostid, ®->hostid); 495 if (abort) 496 nvmet_pr_set_ctrl_to_abort(req, &hostid); 497 nvmet_pr_unregister_one(pr, reg); 498 nvmet_pr_registration_preempted(pr, &hostid); 499 } 500 } 501 } 502 503 static void nvmet_pr_unreg_all_others(struct nvmet_req *req, 504 uuid_t *send_hostid, 505 bool abort) 506 { 507 struct nvmet_pr_registrant *reg, *tmp; 508 struct nvmet_pr *pr = &req->ns->pr; 509 uuid_t hostid; 510 511 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 512 if (!uuid_equal(®->hostid, send_hostid)) { 513 uuid_copy(&hostid, ®->hostid); 514 if (abort) 515 nvmet_pr_set_ctrl_to_abort(req, &hostid); 516 nvmet_pr_unregister_one(pr, reg); 517 nvmet_pr_registration_preempted(pr, &hostid); 518 } 519 } 520 } 521 522 static void nvmet_pr_update_holder_rtype(struct nvmet_pr_registrant *reg, 523 void *attr) 524 { 525 u8 new_rtype = *(u8 *)attr; 526 527 reg->rtype = new_rtype; 528 } 529 530 static u16 nvmet_pr_preempt(struct nvmet_req *req, 531 struct nvmet_pr_registrant *reg, 532 u8 rtype, 533 struct nvmet_pr_acquire_data *d, 534 bool abort) 535 { 536 struct nvmet_ctrl *ctrl = req->sq->ctrl; 537 struct nvmet_pr *pr = &req->ns->pr; 538 struct nvmet_pr_registrant *holder; 539 enum nvme_pr_type original_rtype; 540 u64 prkey = le64_to_cpu(d->prkey); 541 u16 status; 542 543 holder = rcu_dereference_protected(pr->holder, 1); 544 if (!holder) 545 return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 546 &ctrl->hostid, abort); 547 548 original_rtype = holder->rtype; 549 if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 550 original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 551 if (!prkey) { 552 /* 553 * To prevent possible access from other hosts, and 554 * avoid terminate the holder, set the new holder 555 * first before unregistering. 556 */ 557 nvmet_pr_set_new_holder(pr, rtype, reg); 558 nvmet_pr_unreg_all_others(req, &ctrl->hostid, abort); 559 return NVME_SC_SUCCESS; 560 } 561 return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 562 &ctrl->hostid, abort); 563 } 564 565 if (holder == reg) { 566 status = nvmet_pr_update_reg_attr(pr, holder, 567 nvmet_pr_update_holder_rtype, &rtype); 568 if (!status && original_rtype != rtype) 569 nvmet_pr_resv_released(pr, ®->hostid); 570 return status; 571 } 572 573 if (prkey == holder->rkey) { 574 /* 575 * Same as before, set the new holder first. 576 */ 577 nvmet_pr_set_new_holder(pr, rtype, reg); 578 nvmet_pr_unreg_all_others_by_prkey(req, prkey, &ctrl->hostid, 579 abort); 580 if (original_rtype != rtype) 581 nvmet_pr_resv_released(pr, ®->hostid); 582 return NVME_SC_SUCCESS; 583 } 584 585 if (prkey) 586 return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 587 &ctrl->hostid, abort); 588 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 589 } 590 591 static void nvmet_pr_do_abort(struct work_struct *w) 592 { 593 struct nvmet_req *req = container_of(w, struct nvmet_req, r.abort_work); 594 struct nvmet_pr_per_ctrl_ref *pc_ref; 595 struct nvmet_ns *ns = req->ns; 596 unsigned long idx; 597 598 /* 599 * The target does not support abort, just wait per-controller ref to 0. 600 */ 601 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 602 if (percpu_ref_is_dying(&pc_ref->ref)) { 603 wait_for_completion(&pc_ref->free_done); 604 reinit_completion(&pc_ref->confirm_done); 605 reinit_completion(&pc_ref->free_done); 606 percpu_ref_resurrect(&pc_ref->ref); 607 } 608 } 609 610 up(&ns->pr.pr_sem); 611 nvmet_req_complete(req, NVME_SC_SUCCESS); 612 } 613 614 static u16 __nvmet_execute_pr_acquire(struct nvmet_req *req, 615 struct nvmet_pr_registrant *reg, 616 u8 acquire_act, 617 u8 rtype, 618 struct nvmet_pr_acquire_data *d) 619 { 620 u16 status; 621 622 switch (acquire_act) { 623 case NVME_PR_ACQUIRE_ACT_ACQUIRE: 624 status = nvmet_pr_acquire(req, reg, rtype); 625 goto out; 626 case NVME_PR_ACQUIRE_ACT_PREEMPT: 627 status = nvmet_pr_preempt(req, reg, rtype, d, false); 628 goto inc_gen; 629 case NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT: 630 status = nvmet_pr_preempt(req, reg, rtype, d, true); 631 goto inc_gen; 632 default: 633 req->error_loc = offsetof(struct nvme_common_command, cdw10); 634 status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 635 goto out; 636 } 637 inc_gen: 638 if (!status) 639 atomic_inc(&req->ns->pr.generation); 640 out: 641 return status; 642 } 643 644 static void nvmet_execute_pr_acquire(struct nvmet_req *req) 645 { 646 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 647 bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 648 /* Reservation type, bit 15:08 */ 649 u8 rtype = (u8)((cdw10 >> 8) & 0xff); 650 /* Reservation acquire action, bit 02:00 */ 651 u8 acquire_act = cdw10 & 0x07; 652 struct nvmet_ctrl *ctrl = req->sq->ctrl; 653 struct nvmet_pr_acquire_data *d = NULL; 654 struct nvmet_pr *pr = &req->ns->pr; 655 struct nvmet_pr_registrant *reg; 656 u16 status = NVME_SC_SUCCESS; 657 658 if (ignore_key || 659 rtype < NVME_PR_WRITE_EXCLUSIVE || 660 rtype > NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 661 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 662 goto out; 663 } 664 665 d = kmalloc(sizeof(*d), GFP_KERNEL); 666 if (!d) { 667 status = NVME_SC_INTERNAL; 668 goto out; 669 } 670 671 status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 672 if (status) 673 goto free_data; 674 675 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 676 down(&pr->pr_sem); 677 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 678 if (uuid_equal(®->hostid, &ctrl->hostid) && 679 reg->rkey == le64_to_cpu(d->crkey)) { 680 status = __nvmet_execute_pr_acquire(req, reg, 681 acquire_act, rtype, d); 682 break; 683 } 684 } 685 686 if (!status && acquire_act == NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT) { 687 kfree(d); 688 INIT_WORK(&req->r.abort_work, nvmet_pr_do_abort); 689 queue_work(nvmet_wq, &req->r.abort_work); 690 return; 691 } 692 693 up(&pr->pr_sem); 694 695 free_data: 696 kfree(d); 697 out: 698 nvmet_req_complete(req, status); 699 } 700 701 static u16 nvmet_pr_release(struct nvmet_req *req, 702 struct nvmet_pr_registrant *reg, 703 u8 rtype) 704 { 705 struct nvmet_pr *pr = &req->ns->pr; 706 struct nvmet_pr_registrant *holder; 707 u8 original_rtype; 708 709 holder = rcu_dereference_protected(pr->holder, 1); 710 if (!holder || reg != holder) 711 return NVME_SC_SUCCESS; 712 713 original_rtype = holder->rtype; 714 if (original_rtype != rtype) 715 return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 716 717 rcu_assign_pointer(pr->holder, NULL); 718 719 if (original_rtype != NVME_PR_WRITE_EXCLUSIVE && 720 original_rtype != NVME_PR_EXCLUSIVE_ACCESS) 721 nvmet_pr_resv_released(pr, ®->hostid); 722 723 return NVME_SC_SUCCESS; 724 } 725 726 static void nvmet_pr_clear(struct nvmet_req *req) 727 { 728 struct nvmet_pr_registrant *reg, *tmp; 729 struct nvmet_pr *pr = &req->ns->pr; 730 731 rcu_assign_pointer(pr->holder, NULL); 732 733 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 734 list_del_rcu(®->entry); 735 if (!uuid_equal(&req->sq->ctrl->hostid, ®->hostid)) 736 nvmet_pr_resv_preempted(pr, ®->hostid); 737 kfree_rcu(reg, rcu); 738 } 739 740 atomic_inc(&pr->generation); 741 } 742 743 static u16 __nvmet_execute_pr_release(struct nvmet_req *req, 744 struct nvmet_pr_registrant *reg, 745 u8 release_act, u8 rtype) 746 { 747 switch (release_act) { 748 case NVME_PR_RELEASE_ACT_RELEASE: 749 return nvmet_pr_release(req, reg, rtype); 750 case NVME_PR_RELEASE_ACT_CLEAR: 751 nvmet_pr_clear(req); 752 return NVME_SC_SUCCESS; 753 default: 754 req->error_loc = offsetof(struct nvme_common_command, cdw10); 755 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 756 } 757 } 758 759 static void nvmet_execute_pr_release(struct nvmet_req *req) 760 { 761 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 762 bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 763 u8 rtype = (u8)((cdw10 >> 8) & 0xff); /* Reservation type, bit 15:08 */ 764 u8 release_act = cdw10 & 0x07; /* Reservation release action, bit 02:00 */ 765 struct nvmet_ctrl *ctrl = req->sq->ctrl; 766 struct nvmet_pr *pr = &req->ns->pr; 767 struct nvmet_pr_release_data *d; 768 struct nvmet_pr_registrant *reg; 769 u16 status; 770 771 if (ignore_key) { 772 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 773 goto out; 774 } 775 776 d = kmalloc(sizeof(*d), GFP_KERNEL); 777 if (!d) { 778 status = NVME_SC_INTERNAL; 779 goto out; 780 } 781 782 status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 783 if (status) 784 goto free_data; 785 786 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 787 down(&pr->pr_sem); 788 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 789 if (uuid_equal(®->hostid, &ctrl->hostid) && 790 reg->rkey == le64_to_cpu(d->crkey)) { 791 status = __nvmet_execute_pr_release(req, reg, 792 release_act, rtype); 793 break; 794 } 795 } 796 up(&pr->pr_sem); 797 free_data: 798 kfree(d); 799 out: 800 nvmet_req_complete(req, status); 801 } 802 803 static void nvmet_execute_pr_report(struct nvmet_req *req) 804 { 805 u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11); 806 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 807 u32 num_bytes = 4 * (cdw10 + 1); /* cdw10 is number of dwords */ 808 u8 eds = cdw11 & 1; /* Extended data structure, bit 00 */ 809 struct nvme_registered_ctrl_ext *ctrl_eds; 810 struct nvme_reservation_status_ext *data; 811 struct nvmet_pr *pr = &req->ns->pr; 812 struct nvmet_pr_registrant *holder; 813 struct nvmet_pr_registrant *reg; 814 u16 num_ctrls = 0; 815 u16 status; 816 u8 rtype; 817 818 /* nvmet hostid(uuid_t) is 128 bit. */ 819 if (!eds) { 820 req->error_loc = offsetof(struct nvme_common_command, cdw11); 821 status = NVME_SC_HOST_ID_INCONSIST | NVME_STATUS_DNR; 822 goto out; 823 } 824 825 if (num_bytes < sizeof(struct nvme_reservation_status_ext)) { 826 req->error_loc = offsetof(struct nvme_common_command, cdw10); 827 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 828 goto out; 829 } 830 831 data = kzalloc(num_bytes, GFP_KERNEL); 832 if (!data) { 833 status = NVME_SC_INTERNAL; 834 goto out; 835 } 836 data->gen = cpu_to_le32(atomic_read(&pr->generation)); 837 data->ptpls = 0; 838 ctrl_eds = data->regctl_eds; 839 840 rcu_read_lock(); 841 holder = rcu_dereference(pr->holder); 842 rtype = holder ? holder->rtype : 0; 843 data->rtype = rtype; 844 845 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 846 num_ctrls++; 847 /* 848 * continue to get the number of all registrans. 849 */ 850 if (((void *)ctrl_eds + sizeof(*ctrl_eds)) > 851 ((void *)data + num_bytes)) 852 continue; 853 /* 854 * Dynamic controller, set cntlid to 0xffff. 855 */ 856 ctrl_eds->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); 857 if (rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 858 rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) 859 ctrl_eds->rcsts = 1; 860 if (reg == holder) 861 ctrl_eds->rcsts = 1; 862 uuid_copy((uuid_t *)&ctrl_eds->hostid, ®->hostid); 863 ctrl_eds->rkey = cpu_to_le64(reg->rkey); 864 ctrl_eds++; 865 } 866 rcu_read_unlock(); 867 868 put_unaligned_le16(num_ctrls, data->regctl); 869 status = nvmet_copy_to_sgl(req, 0, data, num_bytes); 870 kfree(data); 871 out: 872 nvmet_req_complete(req, status); 873 } 874 875 u16 nvmet_parse_pr_cmd(struct nvmet_req *req) 876 { 877 struct nvme_command *cmd = req->cmd; 878 879 switch (cmd->common.opcode) { 880 case nvme_cmd_resv_register: 881 req->execute = nvmet_execute_pr_register; 882 break; 883 case nvme_cmd_resv_acquire: 884 req->execute = nvmet_execute_pr_acquire; 885 break; 886 case nvme_cmd_resv_release: 887 req->execute = nvmet_execute_pr_release; 888 break; 889 case nvme_cmd_resv_report: 890 req->execute = nvmet_execute_pr_report; 891 break; 892 default: 893 return 1; 894 } 895 return NVME_SC_SUCCESS; 896 } 897 898 static bool nvmet_is_req_write_cmd_group(struct nvmet_req *req) 899 { 900 u8 opcode = req->cmd->common.opcode; 901 902 if (req->sq->qid) { 903 switch (opcode) { 904 case nvme_cmd_flush: 905 case nvme_cmd_write: 906 case nvme_cmd_write_zeroes: 907 case nvme_cmd_dsm: 908 case nvme_cmd_zone_append: 909 case nvme_cmd_zone_mgmt_send: 910 return true; 911 default: 912 return false; 913 } 914 } 915 return false; 916 } 917 918 static bool nvmet_is_req_read_cmd_group(struct nvmet_req *req) 919 { 920 u8 opcode = req->cmd->common.opcode; 921 922 if (req->sq->qid) { 923 switch (opcode) { 924 case nvme_cmd_read: 925 case nvme_cmd_zone_mgmt_recv: 926 return true; 927 default: 928 return false; 929 } 930 } 931 return false; 932 } 933 934 u16 nvmet_pr_check_cmd_access(struct nvmet_req *req) 935 { 936 struct nvmet_ctrl *ctrl = req->sq->ctrl; 937 struct nvmet_pr_registrant *holder; 938 struct nvmet_ns *ns = req->ns; 939 struct nvmet_pr *pr = &ns->pr; 940 u16 status = NVME_SC_SUCCESS; 941 942 rcu_read_lock(); 943 holder = rcu_dereference(pr->holder); 944 if (!holder) 945 goto unlock; 946 if (uuid_equal(&ctrl->hostid, &holder->hostid)) 947 goto unlock; 948 949 /* 950 * The Reservation command group is checked in executing, 951 * allow it here. 952 */ 953 switch (holder->rtype) { 954 case NVME_PR_WRITE_EXCLUSIVE: 955 if (nvmet_is_req_write_cmd_group(req)) 956 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 957 break; 958 case NVME_PR_EXCLUSIVE_ACCESS: 959 if (nvmet_is_req_read_cmd_group(req) || 960 nvmet_is_req_write_cmd_group(req)) 961 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 962 break; 963 case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY: 964 case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS: 965 if ((nvmet_is_req_write_cmd_group(req)) && 966 !nvmet_pr_find_registrant(pr, &ctrl->hostid)) 967 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 968 break; 969 case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY: 970 case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS: 971 if ((nvmet_is_req_read_cmd_group(req) || 972 nvmet_is_req_write_cmd_group(req)) && 973 !nvmet_pr_find_registrant(pr, &ctrl->hostid)) 974 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 975 break; 976 default: 977 pr_warn("the reservation type is set wrong, type:%d\n", 978 holder->rtype); 979 break; 980 } 981 982 unlock: 983 rcu_read_unlock(); 984 if (status) 985 req->error_loc = offsetof(struct nvme_common_command, opcode); 986 return status; 987 } 988 989 u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req) 990 { 991 struct nvmet_pr_per_ctrl_ref *pc_ref; 992 993 pc_ref = xa_load(&req->ns->pr_per_ctrl_refs, 994 req->sq->ctrl->cntlid); 995 if (unlikely(!percpu_ref_tryget_live(&pc_ref->ref))) 996 return NVME_SC_INTERNAL; 997 req->pc_ref = pc_ref; 998 return NVME_SC_SUCCESS; 999 } 1000 1001 static void nvmet_pr_ctrl_ns_all_cmds_done(struct percpu_ref *ref) 1002 { 1003 struct nvmet_pr_per_ctrl_ref *pc_ref = 1004 container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); 1005 1006 complete(&pc_ref->free_done); 1007 } 1008 1009 static int nvmet_pr_alloc_and_insert_pc_ref(struct nvmet_ns *ns, 1010 unsigned long idx, 1011 uuid_t *hostid) 1012 { 1013 struct nvmet_pr_per_ctrl_ref *pc_ref; 1014 int ret; 1015 1016 pc_ref = kmalloc(sizeof(*pc_ref), GFP_ATOMIC); 1017 if (!pc_ref) 1018 return -ENOMEM; 1019 1020 ret = percpu_ref_init(&pc_ref->ref, nvmet_pr_ctrl_ns_all_cmds_done, 1021 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); 1022 if (ret) 1023 goto free; 1024 1025 init_completion(&pc_ref->free_done); 1026 init_completion(&pc_ref->confirm_done); 1027 uuid_copy(&pc_ref->hostid, hostid); 1028 1029 ret = xa_insert(&ns->pr_per_ctrl_refs, idx, pc_ref, GFP_KERNEL); 1030 if (ret) 1031 goto exit; 1032 return ret; 1033 exit: 1034 percpu_ref_exit(&pc_ref->ref); 1035 free: 1036 kfree(pc_ref); 1037 return ret; 1038 } 1039 1040 int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl) 1041 { 1042 struct nvmet_subsys *subsys = ctrl->subsys; 1043 struct nvmet_pr_per_ctrl_ref *pc_ref; 1044 struct nvmet_ns *ns = NULL; 1045 unsigned long idx; 1046 int ret; 1047 1048 ctrl->pr_log_mgr.counter = 0; 1049 ctrl->pr_log_mgr.lost_count = 0; 1050 mutex_init(&ctrl->pr_log_mgr.lock); 1051 INIT_KFIFO(ctrl->pr_log_mgr.log_queue); 1052 1053 /* 1054 * Here we are under subsys lock, if an ns not in subsys->namespaces, 1055 * we can make sure that ns is not enabled, and not call 1056 * nvmet_pr_init_ns(), see more details in nvmet_ns_enable(). 1057 * So just check ns->pr.enable. 1058 */ 1059 xa_for_each(&subsys->namespaces, idx, ns) { 1060 if (ns->pr.enable) { 1061 ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, 1062 &ctrl->hostid); 1063 if (ret) 1064 goto free_per_ctrl_refs; 1065 } 1066 } 1067 return 0; 1068 1069 free_per_ctrl_refs: 1070 xa_for_each(&subsys->namespaces, idx, ns) { 1071 if (ns->pr.enable) { 1072 pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); 1073 if (pc_ref) 1074 percpu_ref_exit(&pc_ref->ref); 1075 kfree(pc_ref); 1076 } 1077 } 1078 return ret; 1079 } 1080 1081 void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl) 1082 { 1083 struct nvmet_pr_per_ctrl_ref *pc_ref; 1084 struct nvmet_ns *ns; 1085 unsigned long idx; 1086 1087 kfifo_free(&ctrl->pr_log_mgr.log_queue); 1088 mutex_destroy(&ctrl->pr_log_mgr.lock); 1089 1090 xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 1091 if (ns->pr.enable) { 1092 pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); 1093 if (pc_ref) 1094 percpu_ref_exit(&pc_ref->ref); 1095 kfree(pc_ref); 1096 } 1097 } 1098 } 1099 1100 int nvmet_pr_init_ns(struct nvmet_ns *ns) 1101 { 1102 struct nvmet_subsys *subsys = ns->subsys; 1103 struct nvmet_pr_per_ctrl_ref *pc_ref; 1104 struct nvmet_ctrl *ctrl = NULL; 1105 unsigned long idx; 1106 int ret; 1107 1108 ns->pr.holder = NULL; 1109 atomic_set(&ns->pr.generation, 0); 1110 sema_init(&ns->pr.pr_sem, 1); 1111 INIT_LIST_HEAD(&ns->pr.registrant_list); 1112 ns->pr.notify_mask = 0; 1113 1114 xa_init(&ns->pr_per_ctrl_refs); 1115 1116 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1117 ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, 1118 &ctrl->hostid); 1119 if (ret) 1120 goto free_per_ctrl_refs; 1121 } 1122 return 0; 1123 1124 free_per_ctrl_refs: 1125 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 1126 xa_erase(&ns->pr_per_ctrl_refs, idx); 1127 percpu_ref_exit(&pc_ref->ref); 1128 kfree(pc_ref); 1129 } 1130 return ret; 1131 } 1132 1133 void nvmet_pr_exit_ns(struct nvmet_ns *ns) 1134 { 1135 struct nvmet_pr_registrant *reg, *tmp; 1136 struct nvmet_pr_per_ctrl_ref *pc_ref; 1137 struct nvmet_pr *pr = &ns->pr; 1138 unsigned long idx; 1139 1140 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 1141 list_del(®->entry); 1142 kfree(reg); 1143 } 1144 1145 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 1146 /* 1147 * No command on ns here, we can safely free pc_ref. 1148 */ 1149 pc_ref = xa_erase(&ns->pr_per_ctrl_refs, idx); 1150 percpu_ref_exit(&pc_ref->ref); 1151 kfree(pc_ref); 1152 } 1153 1154 xa_destroy(&ns->pr_per_ctrl_refs); 1155 } 1156