1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * NVMe over Fabrics Persist Reservation. 4 * Copyright (c) 2024 Guixin Liu, Alibaba Group. 5 * All rights reserved. 6 */ 7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 8 #include <linux/unaligned.h> 9 #include "nvmet.h" 10 11 #define NVMET_PR_NOTIFI_MASK_ALL \ 12 (1 << NVME_PR_NOTIFY_BIT_REG_PREEMPTED | \ 13 1 << NVME_PR_NOTIFY_BIT_RESV_RELEASED | \ 14 1 << NVME_PR_NOTIFY_BIT_RESV_PREEMPTED) 15 16 static inline bool nvmet_pr_parse_ignore_key(u32 cdw10) 17 { 18 /* Ignore existing key, bit 03. */ 19 return (cdw10 >> 3) & 1; 20 } 21 22 static inline struct nvmet_ns *nvmet_pr_to_ns(struct nvmet_pr *pr) 23 { 24 return container_of(pr, struct nvmet_ns, pr); 25 } 26 27 static struct nvmet_pr_registrant * 28 nvmet_pr_find_registrant(struct nvmet_pr *pr, uuid_t *hostid) 29 { 30 struct nvmet_pr_registrant *reg; 31 32 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 33 if (uuid_equal(®->hostid, hostid)) 34 return reg; 35 } 36 return NULL; 37 } 38 39 u16 nvmet_set_feat_resv_notif_mask(struct nvmet_req *req, u32 mask) 40 { 41 u32 nsid = le32_to_cpu(req->cmd->common.nsid); 42 struct nvmet_ctrl *ctrl = req->sq->ctrl; 43 struct nvmet_ns *ns; 44 unsigned long idx; 45 u16 status; 46 47 if (mask & ~(NVMET_PR_NOTIFI_MASK_ALL)) { 48 req->error_loc = offsetof(struct nvme_common_command, cdw11); 49 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 50 } 51 52 if (nsid != U32_MAX) { 53 status = nvmet_req_find_ns(req); 54 if (status) 55 return status; 56 if (!req->ns->pr.enable) 57 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 58 59 WRITE_ONCE(req->ns->pr.notify_mask, mask); 60 goto success; 61 } 62 63 xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 64 if (ns->pr.enable) 65 WRITE_ONCE(ns->pr.notify_mask, mask); 66 } 67 68 success: 69 nvmet_set_result(req, mask); 70 return NVME_SC_SUCCESS; 71 } 72 73 u16 nvmet_get_feat_resv_notif_mask(struct nvmet_req *req) 74 { 75 u16 status; 76 77 status = nvmet_req_find_ns(req); 78 if (status) 79 return status; 80 81 if (!req->ns->pr.enable) 82 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 83 84 nvmet_set_result(req, READ_ONCE(req->ns->pr.notify_mask)); 85 return status; 86 } 87 88 void nvmet_execute_get_log_page_resv(struct nvmet_req *req) 89 { 90 struct nvmet_pr_log_mgr *log_mgr = &req->sq->ctrl->pr_log_mgr; 91 struct nvme_pr_log next_log = {0}; 92 struct nvme_pr_log log = {0}; 93 u16 status = NVME_SC_SUCCESS; 94 u64 lost_count; 95 u64 cur_count; 96 u64 next_count; 97 98 mutex_lock(&log_mgr->lock); 99 if (!kfifo_get(&log_mgr->log_queue, &log)) 100 goto out; 101 102 /* 103 * We can't get the last in kfifo. 104 * Utilize the current count and the count from the next log to 105 * calculate the number of lost logs, while also addressing cases 106 * of overflow. If there is no subsequent log, the number of lost 107 * logs is equal to the lost_count within the nvmet_pr_log_mgr. 108 */ 109 cur_count = le64_to_cpu(log.count); 110 if (kfifo_peek(&log_mgr->log_queue, &next_log)) { 111 next_count = le64_to_cpu(next_log.count); 112 if (next_count > cur_count) 113 lost_count = next_count - cur_count - 1; 114 else 115 lost_count = U64_MAX - cur_count + next_count - 1; 116 } else { 117 lost_count = log_mgr->lost_count; 118 } 119 120 log.count = cpu_to_le64((cur_count + lost_count) == 0 ? 121 1 : (cur_count + lost_count)); 122 log_mgr->lost_count -= lost_count; 123 124 log.nr_pages = kfifo_len(&log_mgr->log_queue); 125 126 out: 127 status = nvmet_copy_to_sgl(req, 0, &log, sizeof(log)); 128 mutex_unlock(&log_mgr->lock); 129 nvmet_req_complete(req, status); 130 } 131 132 static void nvmet_pr_add_resv_log(struct nvmet_ctrl *ctrl, u8 log_type, 133 u32 nsid) 134 { 135 struct nvmet_pr_log_mgr *log_mgr = &ctrl->pr_log_mgr; 136 struct nvme_pr_log log = {0}; 137 138 mutex_lock(&log_mgr->lock); 139 log_mgr->counter++; 140 if (log_mgr->counter == 0) 141 log_mgr->counter = 1; 142 143 log.count = cpu_to_le64(log_mgr->counter); 144 log.type = log_type; 145 log.nsid = cpu_to_le32(nsid); 146 147 if (!kfifo_put(&log_mgr->log_queue, log)) { 148 pr_info("a reservation log lost, cntlid:%d, log_type:%d, nsid:%d\n", 149 ctrl->cntlid, log_type, nsid); 150 log_mgr->lost_count++; 151 } 152 153 mutex_unlock(&log_mgr->lock); 154 } 155 156 static void nvmet_pr_resv_released(struct nvmet_pr *pr, uuid_t *hostid) 157 { 158 struct nvmet_ns *ns = nvmet_pr_to_ns(pr); 159 struct nvmet_subsys *subsys = ns->subsys; 160 struct nvmet_ctrl *ctrl; 161 162 if (test_bit(NVME_PR_NOTIFY_BIT_RESV_RELEASED, &pr->notify_mask)) 163 return; 164 165 mutex_lock(&subsys->lock); 166 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 167 if (!uuid_equal(&ctrl->hostid, hostid) && 168 nvmet_pr_find_registrant(pr, &ctrl->hostid)) { 169 nvmet_pr_add_resv_log(ctrl, 170 NVME_PR_LOG_RESERVATION_RELEASED, ns->nsid); 171 nvmet_add_async_event(ctrl, NVME_AER_CSS, 172 NVME_AEN_RESV_LOG_PAGE_AVALIABLE, 173 NVME_LOG_RESERVATION); 174 } 175 } 176 mutex_unlock(&subsys->lock); 177 } 178 179 static void nvmet_pr_send_event_to_host(struct nvmet_pr *pr, uuid_t *hostid, 180 u8 log_type) 181 { 182 struct nvmet_ns *ns = nvmet_pr_to_ns(pr); 183 struct nvmet_subsys *subsys = ns->subsys; 184 struct nvmet_ctrl *ctrl; 185 186 mutex_lock(&subsys->lock); 187 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 188 if (uuid_equal(hostid, &ctrl->hostid)) { 189 nvmet_pr_add_resv_log(ctrl, log_type, ns->nsid); 190 nvmet_add_async_event(ctrl, NVME_AER_CSS, 191 NVME_AEN_RESV_LOG_PAGE_AVALIABLE, 192 NVME_LOG_RESERVATION); 193 } 194 } 195 mutex_unlock(&subsys->lock); 196 } 197 198 static void nvmet_pr_resv_preempted(struct nvmet_pr *pr, uuid_t *hostid) 199 { 200 if (test_bit(NVME_PR_NOTIFY_BIT_RESV_PREEMPTED, &pr->notify_mask)) 201 return; 202 203 nvmet_pr_send_event_to_host(pr, hostid, 204 NVME_PR_LOG_RESERVATOIN_PREEMPTED); 205 } 206 207 static void nvmet_pr_registration_preempted(struct nvmet_pr *pr, 208 uuid_t *hostid) 209 { 210 if (test_bit(NVME_PR_NOTIFY_BIT_REG_PREEMPTED, &pr->notify_mask)) 211 return; 212 213 nvmet_pr_send_event_to_host(pr, hostid, 214 NVME_PR_LOG_REGISTRATION_PREEMPTED); 215 } 216 217 static inline void nvmet_pr_set_new_holder(struct nvmet_pr *pr, u8 new_rtype, 218 struct nvmet_pr_registrant *reg) 219 { 220 reg->rtype = new_rtype; 221 rcu_assign_pointer(pr->holder, reg); 222 } 223 224 static u16 nvmet_pr_register(struct nvmet_req *req, 225 struct nvmet_pr_register_data *d) 226 { 227 struct nvmet_ctrl *ctrl = req->sq->ctrl; 228 struct nvmet_pr_registrant *new, *reg; 229 struct nvmet_pr *pr = &req->ns->pr; 230 u16 status = NVME_SC_SUCCESS; 231 u64 nrkey = le64_to_cpu(d->nrkey); 232 233 new = kmalloc(sizeof(*new), GFP_KERNEL); 234 if (!new) 235 return NVME_SC_INTERNAL; 236 237 down(&pr->pr_sem); 238 reg = nvmet_pr_find_registrant(pr, &ctrl->hostid); 239 if (reg) { 240 if (reg->rkey != nrkey) 241 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 242 kfree(new); 243 goto out; 244 } 245 246 memset(new, 0, sizeof(*new)); 247 INIT_LIST_HEAD(&new->entry); 248 new->rkey = nrkey; 249 uuid_copy(&new->hostid, &ctrl->hostid); 250 list_add_tail_rcu(&new->entry, &pr->registrant_list); 251 252 out: 253 up(&pr->pr_sem); 254 return status; 255 } 256 257 static void nvmet_pr_unregister_one(struct nvmet_pr *pr, 258 struct nvmet_pr_registrant *reg) 259 { 260 struct nvmet_pr_registrant *first_reg; 261 struct nvmet_pr_registrant *holder; 262 u8 original_rtype; 263 264 list_del_rcu(®->entry); 265 266 holder = rcu_dereference_protected(pr->holder, 1); 267 if (reg != holder) 268 goto out; 269 270 original_rtype = holder->rtype; 271 if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 272 original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 273 first_reg = list_first_or_null_rcu(&pr->registrant_list, 274 struct nvmet_pr_registrant, entry); 275 if (first_reg) 276 first_reg->rtype = original_rtype; 277 rcu_assign_pointer(pr->holder, first_reg); 278 } else { 279 rcu_assign_pointer(pr->holder, NULL); 280 281 if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_REG_ONLY || 282 original_rtype == NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY) 283 nvmet_pr_resv_released(pr, ®->hostid); 284 } 285 out: 286 kfree_rcu(reg, rcu); 287 } 288 289 static u16 nvmet_pr_unregister(struct nvmet_req *req, 290 struct nvmet_pr_register_data *d, 291 bool ignore_key) 292 { 293 u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 294 struct nvmet_ctrl *ctrl = req->sq->ctrl; 295 struct nvmet_pr *pr = &req->ns->pr; 296 struct nvmet_pr_registrant *reg; 297 298 down(&pr->pr_sem); 299 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 300 if (uuid_equal(®->hostid, &ctrl->hostid)) { 301 if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) { 302 status = NVME_SC_SUCCESS; 303 nvmet_pr_unregister_one(pr, reg); 304 } 305 break; 306 } 307 } 308 up(&pr->pr_sem); 309 310 return status; 311 } 312 313 static void nvmet_pr_update_reg_rkey(struct nvmet_pr_registrant *reg, 314 void *attr) 315 { 316 reg->rkey = *(u64 *)attr; 317 } 318 319 static u16 nvmet_pr_update_reg_attr(struct nvmet_pr *pr, 320 struct nvmet_pr_registrant *reg, 321 void (*change_attr)(struct nvmet_pr_registrant *reg, 322 void *attr), 323 void *attr) 324 { 325 struct nvmet_pr_registrant *holder; 326 struct nvmet_pr_registrant *new; 327 328 holder = rcu_dereference_protected(pr->holder, 1); 329 if (reg != holder) { 330 change_attr(reg, attr); 331 return NVME_SC_SUCCESS; 332 } 333 334 new = kmalloc(sizeof(*new), GFP_ATOMIC); 335 if (!new) 336 return NVME_SC_INTERNAL; 337 338 new->rkey = holder->rkey; 339 new->rtype = holder->rtype; 340 uuid_copy(&new->hostid, &holder->hostid); 341 INIT_LIST_HEAD(&new->entry); 342 343 change_attr(new, attr); 344 list_replace_rcu(&holder->entry, &new->entry); 345 rcu_assign_pointer(pr->holder, new); 346 kfree_rcu(holder, rcu); 347 348 return NVME_SC_SUCCESS; 349 } 350 351 static u16 nvmet_pr_replace(struct nvmet_req *req, 352 struct nvmet_pr_register_data *d, 353 bool ignore_key) 354 { 355 u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 356 struct nvmet_ctrl *ctrl = req->sq->ctrl; 357 struct nvmet_pr *pr = &req->ns->pr; 358 struct nvmet_pr_registrant *reg; 359 u64 nrkey = le64_to_cpu(d->nrkey); 360 361 down(&pr->pr_sem); 362 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 363 if (uuid_equal(®->hostid, &ctrl->hostid)) { 364 if (ignore_key || reg->rkey == le64_to_cpu(d->crkey)) 365 status = nvmet_pr_update_reg_attr(pr, reg, 366 nvmet_pr_update_reg_rkey, 367 &nrkey); 368 break; 369 } 370 } 371 up(&pr->pr_sem); 372 return status; 373 } 374 375 static void nvmet_execute_pr_register(struct nvmet_req *req) 376 { 377 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 378 bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 379 struct nvmet_pr_register_data *d; 380 u8 reg_act = cdw10 & 0x07; /* Reservation Register Action, bit 02:00 */ 381 u16 status; 382 383 d = kmalloc(sizeof(*d), GFP_KERNEL); 384 if (!d) { 385 status = NVME_SC_INTERNAL; 386 goto out; 387 } 388 389 status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 390 if (status) 391 goto free_data; 392 393 switch (reg_act) { 394 case NVME_PR_REGISTER_ACT_REG: 395 status = nvmet_pr_register(req, d); 396 break; 397 case NVME_PR_REGISTER_ACT_UNREG: 398 status = nvmet_pr_unregister(req, d, ignore_key); 399 break; 400 case NVME_PR_REGISTER_ACT_REPLACE: 401 status = nvmet_pr_replace(req, d, ignore_key); 402 break; 403 default: 404 req->error_loc = offsetof(struct nvme_common_command, cdw10); 405 status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 406 break; 407 } 408 free_data: 409 kfree(d); 410 out: 411 if (!status) 412 atomic_inc(&req->ns->pr.generation); 413 nvmet_req_complete(req, status); 414 } 415 416 static u16 nvmet_pr_acquire(struct nvmet_req *req, 417 struct nvmet_pr_registrant *reg, 418 u8 rtype) 419 { 420 struct nvmet_pr *pr = &req->ns->pr; 421 struct nvmet_pr_registrant *holder; 422 423 holder = rcu_dereference_protected(pr->holder, 1); 424 if (holder && reg != holder) 425 return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 426 if (holder && reg == holder) { 427 if (holder->rtype == rtype) 428 return NVME_SC_SUCCESS; 429 return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 430 } 431 432 nvmet_pr_set_new_holder(pr, rtype, reg); 433 return NVME_SC_SUCCESS; 434 } 435 436 static void nvmet_pr_confirm_ns_pc_ref(struct percpu_ref *ref) 437 { 438 struct nvmet_pr_per_ctrl_ref *pc_ref = 439 container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); 440 441 complete(&pc_ref->confirm_done); 442 } 443 444 static void nvmet_pr_set_ctrl_to_abort(struct nvmet_req *req, uuid_t *hostid) 445 { 446 struct nvmet_pr_per_ctrl_ref *pc_ref; 447 struct nvmet_ns *ns = req->ns; 448 unsigned long idx; 449 450 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 451 if (uuid_equal(&pc_ref->hostid, hostid)) { 452 percpu_ref_kill_and_confirm(&pc_ref->ref, 453 nvmet_pr_confirm_ns_pc_ref); 454 wait_for_completion(&pc_ref->confirm_done); 455 } 456 } 457 } 458 459 static u16 nvmet_pr_unreg_all_host_by_prkey(struct nvmet_req *req, u64 prkey, 460 uuid_t *send_hostid, 461 bool abort) 462 { 463 u16 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 464 struct nvmet_pr_registrant *reg, *tmp; 465 struct nvmet_pr *pr = &req->ns->pr; 466 uuid_t hostid; 467 468 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 469 if (reg->rkey == prkey) { 470 status = NVME_SC_SUCCESS; 471 uuid_copy(&hostid, ®->hostid); 472 if (abort) 473 nvmet_pr_set_ctrl_to_abort(req, &hostid); 474 nvmet_pr_unregister_one(pr, reg); 475 if (!uuid_equal(&hostid, send_hostid)) 476 nvmet_pr_registration_preempted(pr, &hostid); 477 } 478 } 479 return status; 480 } 481 482 static void nvmet_pr_unreg_all_others_by_prkey(struct nvmet_req *req, 483 u64 prkey, 484 uuid_t *send_hostid, 485 bool abort) 486 { 487 struct nvmet_pr_registrant *reg, *tmp; 488 struct nvmet_pr *pr = &req->ns->pr; 489 uuid_t hostid; 490 491 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 492 if (reg->rkey == prkey && 493 !uuid_equal(®->hostid, send_hostid)) { 494 uuid_copy(&hostid, ®->hostid); 495 if (abort) 496 nvmet_pr_set_ctrl_to_abort(req, &hostid); 497 nvmet_pr_unregister_one(pr, reg); 498 nvmet_pr_registration_preempted(pr, &hostid); 499 } 500 } 501 } 502 503 static void nvmet_pr_unreg_all_others(struct nvmet_req *req, 504 uuid_t *send_hostid, 505 bool abort) 506 { 507 struct nvmet_pr_registrant *reg, *tmp; 508 struct nvmet_pr *pr = &req->ns->pr; 509 uuid_t hostid; 510 511 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 512 if (!uuid_equal(®->hostid, send_hostid)) { 513 uuid_copy(&hostid, ®->hostid); 514 if (abort) 515 nvmet_pr_set_ctrl_to_abort(req, &hostid); 516 nvmet_pr_unregister_one(pr, reg); 517 nvmet_pr_registration_preempted(pr, &hostid); 518 } 519 } 520 } 521 522 static void nvmet_pr_update_holder_rtype(struct nvmet_pr_registrant *reg, 523 void *attr) 524 { 525 u8 new_rtype = *(u8 *)attr; 526 527 reg->rtype = new_rtype; 528 } 529 530 static u16 nvmet_pr_preempt(struct nvmet_req *req, 531 struct nvmet_pr_registrant *reg, 532 u8 rtype, 533 struct nvmet_pr_acquire_data *d, 534 bool abort) 535 { 536 struct nvmet_ctrl *ctrl = req->sq->ctrl; 537 struct nvmet_pr *pr = &req->ns->pr; 538 struct nvmet_pr_registrant *holder; 539 enum nvme_pr_type original_rtype; 540 u64 prkey = le64_to_cpu(d->prkey); 541 u16 status; 542 543 holder = rcu_dereference_protected(pr->holder, 1); 544 if (!holder) 545 return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 546 &ctrl->hostid, abort); 547 548 original_rtype = holder->rtype; 549 if (original_rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 550 original_rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 551 if (!prkey) { 552 /* 553 * To prevent possible access from other hosts, and 554 * avoid terminate the holder, set the new holder 555 * first before unregistering. 556 */ 557 nvmet_pr_set_new_holder(pr, rtype, reg); 558 nvmet_pr_unreg_all_others(req, &ctrl->hostid, abort); 559 return NVME_SC_SUCCESS; 560 } 561 return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 562 &ctrl->hostid, abort); 563 } 564 565 if (holder == reg) { 566 status = nvmet_pr_update_reg_attr(pr, holder, 567 nvmet_pr_update_holder_rtype, &rtype); 568 if (!status && original_rtype != rtype) 569 nvmet_pr_resv_released(pr, ®->hostid); 570 return status; 571 } 572 573 if (prkey == holder->rkey) { 574 /* 575 * Same as before, set the new holder first. 576 */ 577 nvmet_pr_set_new_holder(pr, rtype, reg); 578 nvmet_pr_unreg_all_others_by_prkey(req, prkey, &ctrl->hostid, 579 abort); 580 if (original_rtype != rtype) 581 nvmet_pr_resv_released(pr, ®->hostid); 582 return NVME_SC_SUCCESS; 583 } 584 585 if (prkey) 586 return nvmet_pr_unreg_all_host_by_prkey(req, prkey, 587 &ctrl->hostid, abort); 588 return NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 589 } 590 591 static void nvmet_pr_do_abort(struct work_struct *w) 592 { 593 struct nvmet_req *req = container_of(w, struct nvmet_req, r.abort_work); 594 struct nvmet_pr_per_ctrl_ref *pc_ref; 595 struct nvmet_ns *ns = req->ns; 596 unsigned long idx; 597 598 /* 599 * The target does not support abort, just wait per-controller ref to 0. 600 */ 601 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 602 if (percpu_ref_is_dying(&pc_ref->ref)) { 603 wait_for_completion(&pc_ref->free_done); 604 reinit_completion(&pc_ref->confirm_done); 605 reinit_completion(&pc_ref->free_done); 606 percpu_ref_resurrect(&pc_ref->ref); 607 } 608 } 609 610 up(&ns->pr.pr_sem); 611 nvmet_req_complete(req, NVME_SC_SUCCESS); 612 } 613 614 static u16 __nvmet_execute_pr_acquire(struct nvmet_req *req, 615 struct nvmet_pr_registrant *reg, 616 u8 acquire_act, 617 u8 rtype, 618 struct nvmet_pr_acquire_data *d) 619 { 620 u16 status; 621 622 switch (acquire_act) { 623 case NVME_PR_ACQUIRE_ACT_ACQUIRE: 624 status = nvmet_pr_acquire(req, reg, rtype); 625 goto out; 626 case NVME_PR_ACQUIRE_ACT_PREEMPT: 627 status = nvmet_pr_preempt(req, reg, rtype, d, false); 628 goto inc_gen; 629 case NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT: 630 status = nvmet_pr_preempt(req, reg, rtype, d, true); 631 goto inc_gen; 632 default: 633 req->error_loc = offsetof(struct nvme_common_command, cdw10); 634 status = NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 635 goto out; 636 } 637 inc_gen: 638 if (!status) 639 atomic_inc(&req->ns->pr.generation); 640 out: 641 return status; 642 } 643 644 static void nvmet_execute_pr_acquire(struct nvmet_req *req) 645 { 646 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 647 bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 648 /* Reservation type, bit 15:08 */ 649 u8 rtype = (u8)((cdw10 >> 8) & 0xff); 650 /* Reservation acquire action, bit 02:00 */ 651 u8 acquire_act = cdw10 & 0x07; 652 struct nvmet_ctrl *ctrl = req->sq->ctrl; 653 struct nvmet_pr_acquire_data *d = NULL; 654 struct nvmet_pr *pr = &req->ns->pr; 655 struct nvmet_pr_registrant *reg; 656 u16 status = NVME_SC_SUCCESS; 657 658 if (ignore_key || 659 rtype < NVME_PR_WRITE_EXCLUSIVE || 660 rtype > NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) { 661 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 662 goto out; 663 } 664 665 d = kmalloc(sizeof(*d), GFP_KERNEL); 666 if (!d) { 667 status = NVME_SC_INTERNAL; 668 goto out; 669 } 670 671 status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 672 if (status) 673 goto free_data; 674 675 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 676 down(&pr->pr_sem); 677 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 678 if (uuid_equal(®->hostid, &ctrl->hostid) && 679 reg->rkey == le64_to_cpu(d->crkey)) { 680 status = __nvmet_execute_pr_acquire(req, reg, 681 acquire_act, rtype, d); 682 break; 683 } 684 } 685 686 if (!status && acquire_act == NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT) { 687 kfree(d); 688 INIT_WORK(&req->r.abort_work, nvmet_pr_do_abort); 689 queue_work(nvmet_wq, &req->r.abort_work); 690 return; 691 } 692 693 up(&pr->pr_sem); 694 695 free_data: 696 kfree(d); 697 out: 698 nvmet_req_complete(req, status); 699 } 700 701 static u16 nvmet_pr_release(struct nvmet_req *req, 702 struct nvmet_pr_registrant *reg, 703 u8 rtype) 704 { 705 struct nvmet_pr *pr = &req->ns->pr; 706 struct nvmet_pr_registrant *holder; 707 u8 original_rtype; 708 709 holder = rcu_dereference_protected(pr->holder, 1); 710 if (!holder || reg != holder) 711 return NVME_SC_SUCCESS; 712 713 original_rtype = holder->rtype; 714 if (original_rtype != rtype) 715 return NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 716 717 rcu_assign_pointer(pr->holder, NULL); 718 719 if (original_rtype != NVME_PR_WRITE_EXCLUSIVE && 720 original_rtype != NVME_PR_EXCLUSIVE_ACCESS) 721 nvmet_pr_resv_released(pr, ®->hostid); 722 723 return NVME_SC_SUCCESS; 724 } 725 726 static void nvmet_pr_clear(struct nvmet_req *req) 727 { 728 struct nvmet_pr_registrant *reg, *tmp; 729 struct nvmet_pr *pr = &req->ns->pr; 730 731 rcu_assign_pointer(pr->holder, NULL); 732 733 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 734 list_del_rcu(®->entry); 735 if (!uuid_equal(&req->sq->ctrl->hostid, ®->hostid)) 736 nvmet_pr_resv_preempted(pr, ®->hostid); 737 kfree_rcu(reg, rcu); 738 } 739 740 atomic_inc(&pr->generation); 741 } 742 743 static u16 __nvmet_execute_pr_release(struct nvmet_req *req, 744 struct nvmet_pr_registrant *reg, 745 u8 release_act, u8 rtype) 746 { 747 switch (release_act) { 748 case NVME_PR_RELEASE_ACT_RELEASE: 749 return nvmet_pr_release(req, reg, rtype); 750 case NVME_PR_RELEASE_ACT_CLEAR: 751 nvmet_pr_clear(req); 752 return NVME_SC_SUCCESS; 753 default: 754 req->error_loc = offsetof(struct nvme_common_command, cdw10); 755 return NVME_SC_INVALID_OPCODE | NVME_STATUS_DNR; 756 } 757 } 758 759 static void nvmet_execute_pr_release(struct nvmet_req *req) 760 { 761 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 762 bool ignore_key = nvmet_pr_parse_ignore_key(cdw10); 763 u8 rtype = (u8)((cdw10 >> 8) & 0xff); /* Reservation type, bit 15:08 */ 764 u8 release_act = cdw10 & 0x07; /* Reservation release action, bit 02:00 */ 765 struct nvmet_ctrl *ctrl = req->sq->ctrl; 766 struct nvmet_pr *pr = &req->ns->pr; 767 struct nvmet_pr_release_data *d; 768 struct nvmet_pr_registrant *reg; 769 u16 status; 770 771 if (ignore_key) { 772 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 773 goto out; 774 } 775 776 d = kmalloc(sizeof(*d), GFP_KERNEL); 777 if (!d) { 778 status = NVME_SC_INTERNAL; 779 goto out; 780 } 781 782 status = nvmet_copy_from_sgl(req, 0, d, sizeof(*d)); 783 if (status) 784 goto free_data; 785 786 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 787 down(&pr->pr_sem); 788 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 789 if (uuid_equal(®->hostid, &ctrl->hostid) && 790 reg->rkey == le64_to_cpu(d->crkey)) { 791 status = __nvmet_execute_pr_release(req, reg, 792 release_act, rtype); 793 break; 794 } 795 } 796 up(&pr->pr_sem); 797 free_data: 798 kfree(d); 799 out: 800 nvmet_req_complete(req, status); 801 } 802 803 static void nvmet_execute_pr_report(struct nvmet_req *req) 804 { 805 u32 cdw11 = le32_to_cpu(req->cmd->common.cdw11); 806 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 807 u32 num_bytes = 4 * (cdw10 + 1); /* cdw10 is number of dwords */ 808 u8 eds = cdw11 & 1; /* Extended data structure, bit 00 */ 809 struct nvme_registered_ctrl_ext *ctrl_eds; 810 struct nvme_reservation_status_ext *data; 811 struct nvmet_pr *pr = &req->ns->pr; 812 struct nvmet_pr_registrant *holder; 813 struct nvmet_pr_registrant *reg; 814 u16 num_ctrls = 0; 815 u16 status; 816 u8 rtype; 817 818 /* nvmet hostid(uuid_t) is 128 bit. */ 819 if (!eds) { 820 req->error_loc = offsetof(struct nvme_common_command, cdw11); 821 status = NVME_SC_HOST_ID_INCONSIST | NVME_STATUS_DNR; 822 goto out; 823 } 824 825 if (num_bytes < sizeof(struct nvme_reservation_status_ext)) { 826 req->error_loc = offsetof(struct nvme_common_command, cdw10); 827 status = NVME_SC_INVALID_FIELD | NVME_STATUS_DNR; 828 goto out; 829 } 830 831 data = kmalloc(num_bytes, GFP_KERNEL); 832 if (!data) { 833 status = NVME_SC_INTERNAL; 834 goto out; 835 } 836 memset(data, 0, num_bytes); 837 data->gen = cpu_to_le32(atomic_read(&pr->generation)); 838 data->ptpls = 0; 839 ctrl_eds = data->regctl_eds; 840 841 rcu_read_lock(); 842 holder = rcu_dereference(pr->holder); 843 rtype = holder ? holder->rtype : 0; 844 data->rtype = rtype; 845 846 list_for_each_entry_rcu(reg, &pr->registrant_list, entry) { 847 num_ctrls++; 848 /* 849 * continue to get the number of all registrans. 850 */ 851 if (((void *)ctrl_eds + sizeof(*ctrl_eds)) > 852 ((void *)data + num_bytes)) 853 continue; 854 /* 855 * Dynamic controller, set cntlid to 0xffff. 856 */ 857 ctrl_eds->cntlid = cpu_to_le16(NVME_CNTLID_DYNAMIC); 858 if (rtype == NVME_PR_WRITE_EXCLUSIVE_ALL_REGS || 859 rtype == NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS) 860 ctrl_eds->rcsts = 1; 861 if (reg == holder) 862 ctrl_eds->rcsts = 1; 863 uuid_copy((uuid_t *)&ctrl_eds->hostid, ®->hostid); 864 ctrl_eds->rkey = cpu_to_le64(reg->rkey); 865 ctrl_eds++; 866 } 867 rcu_read_unlock(); 868 869 put_unaligned_le16(num_ctrls, data->regctl); 870 status = nvmet_copy_to_sgl(req, 0, data, num_bytes); 871 kfree(data); 872 out: 873 nvmet_req_complete(req, status); 874 } 875 876 u16 nvmet_parse_pr_cmd(struct nvmet_req *req) 877 { 878 struct nvme_command *cmd = req->cmd; 879 880 switch (cmd->common.opcode) { 881 case nvme_cmd_resv_register: 882 req->execute = nvmet_execute_pr_register; 883 break; 884 case nvme_cmd_resv_acquire: 885 req->execute = nvmet_execute_pr_acquire; 886 break; 887 case nvme_cmd_resv_release: 888 req->execute = nvmet_execute_pr_release; 889 break; 890 case nvme_cmd_resv_report: 891 req->execute = nvmet_execute_pr_report; 892 break; 893 default: 894 return 1; 895 } 896 return NVME_SC_SUCCESS; 897 } 898 899 static bool nvmet_is_req_write_cmd_group(struct nvmet_req *req) 900 { 901 u8 opcode = req->cmd->common.opcode; 902 903 if (req->sq->qid) { 904 switch (opcode) { 905 case nvme_cmd_flush: 906 case nvme_cmd_write: 907 case nvme_cmd_write_zeroes: 908 case nvme_cmd_dsm: 909 case nvme_cmd_zone_append: 910 case nvme_cmd_zone_mgmt_send: 911 return true; 912 default: 913 return false; 914 } 915 } 916 return false; 917 } 918 919 static bool nvmet_is_req_read_cmd_group(struct nvmet_req *req) 920 { 921 u8 opcode = req->cmd->common.opcode; 922 923 if (req->sq->qid) { 924 switch (opcode) { 925 case nvme_cmd_read: 926 case nvme_cmd_zone_mgmt_recv: 927 return true; 928 default: 929 return false; 930 } 931 } 932 return false; 933 } 934 935 u16 nvmet_pr_check_cmd_access(struct nvmet_req *req) 936 { 937 struct nvmet_ctrl *ctrl = req->sq->ctrl; 938 struct nvmet_pr_registrant *holder; 939 struct nvmet_ns *ns = req->ns; 940 struct nvmet_pr *pr = &ns->pr; 941 u16 status = NVME_SC_SUCCESS; 942 943 rcu_read_lock(); 944 holder = rcu_dereference(pr->holder); 945 if (!holder) 946 goto unlock; 947 if (uuid_equal(&ctrl->hostid, &holder->hostid)) 948 goto unlock; 949 950 /* 951 * The Reservation command group is checked in executing, 952 * allow it here. 953 */ 954 switch (holder->rtype) { 955 case NVME_PR_WRITE_EXCLUSIVE: 956 if (nvmet_is_req_write_cmd_group(req)) 957 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 958 break; 959 case NVME_PR_EXCLUSIVE_ACCESS: 960 if (nvmet_is_req_read_cmd_group(req) || 961 nvmet_is_req_write_cmd_group(req)) 962 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 963 break; 964 case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY: 965 case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS: 966 if ((nvmet_is_req_write_cmd_group(req)) && 967 !nvmet_pr_find_registrant(pr, &ctrl->hostid)) 968 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 969 break; 970 case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY: 971 case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS: 972 if ((nvmet_is_req_read_cmd_group(req) || 973 nvmet_is_req_write_cmd_group(req)) && 974 !nvmet_pr_find_registrant(pr, &ctrl->hostid)) 975 status = NVME_SC_RESERVATION_CONFLICT | NVME_STATUS_DNR; 976 break; 977 default: 978 pr_warn("the reservation type is set wrong, type:%d\n", 979 holder->rtype); 980 break; 981 } 982 983 unlock: 984 rcu_read_unlock(); 985 if (status) 986 req->error_loc = offsetof(struct nvme_common_command, opcode); 987 return status; 988 } 989 990 u16 nvmet_pr_get_ns_pc_ref(struct nvmet_req *req) 991 { 992 struct nvmet_pr_per_ctrl_ref *pc_ref; 993 994 pc_ref = xa_load(&req->ns->pr_per_ctrl_refs, 995 req->sq->ctrl->cntlid); 996 if (unlikely(!percpu_ref_tryget_live(&pc_ref->ref))) 997 return NVME_SC_INTERNAL; 998 req->pc_ref = pc_ref; 999 return NVME_SC_SUCCESS; 1000 } 1001 1002 static void nvmet_pr_ctrl_ns_all_cmds_done(struct percpu_ref *ref) 1003 { 1004 struct nvmet_pr_per_ctrl_ref *pc_ref = 1005 container_of(ref, struct nvmet_pr_per_ctrl_ref, ref); 1006 1007 complete(&pc_ref->free_done); 1008 } 1009 1010 static int nvmet_pr_alloc_and_insert_pc_ref(struct nvmet_ns *ns, 1011 unsigned long idx, 1012 uuid_t *hostid) 1013 { 1014 struct nvmet_pr_per_ctrl_ref *pc_ref; 1015 int ret; 1016 1017 pc_ref = kmalloc(sizeof(*pc_ref), GFP_ATOMIC); 1018 if (!pc_ref) 1019 return -ENOMEM; 1020 1021 ret = percpu_ref_init(&pc_ref->ref, nvmet_pr_ctrl_ns_all_cmds_done, 1022 PERCPU_REF_ALLOW_REINIT, GFP_KERNEL); 1023 if (ret) 1024 goto free; 1025 1026 init_completion(&pc_ref->free_done); 1027 init_completion(&pc_ref->confirm_done); 1028 uuid_copy(&pc_ref->hostid, hostid); 1029 1030 ret = xa_insert(&ns->pr_per_ctrl_refs, idx, pc_ref, GFP_KERNEL); 1031 if (ret) 1032 goto exit; 1033 return ret; 1034 exit: 1035 percpu_ref_exit(&pc_ref->ref); 1036 free: 1037 kfree(pc_ref); 1038 return ret; 1039 } 1040 1041 int nvmet_ctrl_init_pr(struct nvmet_ctrl *ctrl) 1042 { 1043 struct nvmet_subsys *subsys = ctrl->subsys; 1044 struct nvmet_pr_per_ctrl_ref *pc_ref; 1045 struct nvmet_ns *ns = NULL; 1046 unsigned long idx; 1047 int ret; 1048 1049 ctrl->pr_log_mgr.counter = 0; 1050 ctrl->pr_log_mgr.lost_count = 0; 1051 mutex_init(&ctrl->pr_log_mgr.lock); 1052 INIT_KFIFO(ctrl->pr_log_mgr.log_queue); 1053 1054 /* 1055 * Here we are under subsys lock, if an ns not in subsys->namespaces, 1056 * we can make sure that ns is not enabled, and not call 1057 * nvmet_pr_init_ns(), see more details in nvmet_ns_enable(). 1058 * So just check ns->pr.enable. 1059 */ 1060 xa_for_each(&subsys->namespaces, idx, ns) { 1061 if (ns->pr.enable) { 1062 ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, 1063 &ctrl->hostid); 1064 if (ret) 1065 goto free_per_ctrl_refs; 1066 } 1067 } 1068 return 0; 1069 1070 free_per_ctrl_refs: 1071 xa_for_each(&subsys->namespaces, idx, ns) { 1072 if (ns->pr.enable) { 1073 pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); 1074 if (pc_ref) 1075 percpu_ref_exit(&pc_ref->ref); 1076 kfree(pc_ref); 1077 } 1078 } 1079 return ret; 1080 } 1081 1082 void nvmet_ctrl_destroy_pr(struct nvmet_ctrl *ctrl) 1083 { 1084 struct nvmet_pr_per_ctrl_ref *pc_ref; 1085 struct nvmet_ns *ns; 1086 unsigned long idx; 1087 1088 kfifo_free(&ctrl->pr_log_mgr.log_queue); 1089 mutex_destroy(&ctrl->pr_log_mgr.lock); 1090 1091 xa_for_each(&ctrl->subsys->namespaces, idx, ns) { 1092 if (ns->pr.enable) { 1093 pc_ref = xa_erase(&ns->pr_per_ctrl_refs, ctrl->cntlid); 1094 if (pc_ref) 1095 percpu_ref_exit(&pc_ref->ref); 1096 kfree(pc_ref); 1097 } 1098 } 1099 } 1100 1101 int nvmet_pr_init_ns(struct nvmet_ns *ns) 1102 { 1103 struct nvmet_subsys *subsys = ns->subsys; 1104 struct nvmet_pr_per_ctrl_ref *pc_ref; 1105 struct nvmet_ctrl *ctrl = NULL; 1106 unsigned long idx; 1107 int ret; 1108 1109 ns->pr.holder = NULL; 1110 atomic_set(&ns->pr.generation, 0); 1111 sema_init(&ns->pr.pr_sem, 1); 1112 INIT_LIST_HEAD(&ns->pr.registrant_list); 1113 ns->pr.notify_mask = 0; 1114 1115 xa_init(&ns->pr_per_ctrl_refs); 1116 1117 list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { 1118 ret = nvmet_pr_alloc_and_insert_pc_ref(ns, ctrl->cntlid, 1119 &ctrl->hostid); 1120 if (ret) 1121 goto free_per_ctrl_refs; 1122 } 1123 return 0; 1124 1125 free_per_ctrl_refs: 1126 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 1127 xa_erase(&ns->pr_per_ctrl_refs, idx); 1128 percpu_ref_exit(&pc_ref->ref); 1129 kfree(pc_ref); 1130 } 1131 return ret; 1132 } 1133 1134 void nvmet_pr_exit_ns(struct nvmet_ns *ns) 1135 { 1136 struct nvmet_pr_registrant *reg, *tmp; 1137 struct nvmet_pr_per_ctrl_ref *pc_ref; 1138 struct nvmet_pr *pr = &ns->pr; 1139 unsigned long idx; 1140 1141 list_for_each_entry_safe(reg, tmp, &pr->registrant_list, entry) { 1142 list_del(®->entry); 1143 kfree(reg); 1144 } 1145 1146 xa_for_each(&ns->pr_per_ctrl_refs, idx, pc_ref) { 1147 /* 1148 * No command on ns here, we can safely free pc_ref. 1149 */ 1150 pc_ref = xa_erase(&ns->pr_per_ctrl_refs, idx); 1151 percpu_ref_exit(&pc_ref->ref); 1152 kfree(pc_ref); 1153 } 1154 1155 xa_destroy(&ns->pr_per_ctrl_refs); 1156 } 1157