1 /* SPDX-License-Identifier: GPL-2.0 */ 2 /* 3 * Copyright (c) 2011-2014, Intel Corporation. 4 */ 5 6 #ifndef _NVME_H 7 #define _NVME_H 8 9 #include <linux/nvme.h> 10 #include <linux/cdev.h> 11 #include <linux/pci.h> 12 #include <linux/kref.h> 13 #include <linux/blk-mq.h> 14 #include <linux/lightnvm.h> 15 #include <linux/sed-opal.h> 16 #include <linux/fault-inject.h> 17 #include <linux/rcupdate.h> 18 #include <linux/wait.h> 19 20 #include <trace/events/block.h> 21 22 extern unsigned int nvme_io_timeout; 23 #define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) 24 25 extern unsigned int admin_timeout; 26 #define ADMIN_TIMEOUT (admin_timeout * HZ) 27 28 #define NVME_DEFAULT_KATO 5 29 #define NVME_KATO_GRACE 10 30 31 extern struct workqueue_struct *nvme_wq; 32 extern struct workqueue_struct *nvme_reset_wq; 33 extern struct workqueue_struct *nvme_delete_wq; 34 35 enum { 36 NVME_NS_LBA = 0, 37 NVME_NS_LIGHTNVM = 1, 38 }; 39 40 /* 41 * List of workarounds for devices that required behavior not specified in 42 * the standard. 43 */ 44 enum nvme_quirks { 45 /* 46 * Prefers I/O aligned to a stripe size specified in a vendor 47 * specific Identify field. 48 */ 49 NVME_QUIRK_STRIPE_SIZE = (1 << 0), 50 51 /* 52 * The controller doesn't handle Identify value others than 0 or 1 53 * correctly. 54 */ 55 NVME_QUIRK_IDENTIFY_CNS = (1 << 1), 56 57 /* 58 * The controller deterministically returns O's on reads to 59 * logical blocks that deallocate was called on. 60 */ 61 NVME_QUIRK_DEALLOCATE_ZEROES = (1 << 2), 62 63 /* 64 * The controller needs a delay before starts checking the device 65 * readiness, which is done by reading the NVME_CSTS_RDY bit. 66 */ 67 NVME_QUIRK_DELAY_BEFORE_CHK_RDY = (1 << 3), 68 69 /* 70 * APST should not be used. 71 */ 72 NVME_QUIRK_NO_APST = (1 << 4), 73 74 /* 75 * The deepest sleep state should not be used. 76 */ 77 NVME_QUIRK_NO_DEEPEST_PS = (1 << 5), 78 79 /* 80 * Supports the LighNVM command set if indicated in vs[1]. 81 */ 82 NVME_QUIRK_LIGHTNVM = (1 << 6), 83 84 /* 85 * Set MEDIUM priority on SQ creation 86 */ 87 NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), 88 89 /* 90 * Ignore device provided subnqn. 91 */ 92 NVME_QUIRK_IGNORE_DEV_SUBNQN = (1 << 8), 93 94 /* 95 * Broken Write Zeroes. 96 */ 97 NVME_QUIRK_DISABLE_WRITE_ZEROES = (1 << 9), 98 99 /* 100 * Force simple suspend/resume path. 101 */ 102 NVME_QUIRK_SIMPLE_SUSPEND = (1 << 10), 103 104 /* 105 * Use only one interrupt vector for all queues 106 */ 107 NVME_QUIRK_SINGLE_VECTOR = (1 << 11), 108 109 /* 110 * Use non-standard 128 bytes SQEs. 111 */ 112 NVME_QUIRK_128_BYTES_SQES = (1 << 12), 113 114 /* 115 * Prevent tag overlap between queues 116 */ 117 NVME_QUIRK_SHARED_TAGS = (1 << 13), 118 }; 119 120 /* 121 * Common request structure for NVMe passthrough. All drivers must have 122 * this structure as the first member of their request-private data. 123 */ 124 struct nvme_request { 125 struct nvme_command *cmd; 126 union nvme_result result; 127 u8 retries; 128 u8 flags; 129 u16 status; 130 struct nvme_ctrl *ctrl; 131 }; 132 133 /* 134 * Mark a bio as coming in through the mpath node. 135 */ 136 #define REQ_NVME_MPATH REQ_DRV 137 138 enum { 139 NVME_REQ_CANCELLED = (1 << 0), 140 NVME_REQ_USERCMD = (1 << 1), 141 }; 142 143 static inline struct nvme_request *nvme_req(struct request *req) 144 { 145 return blk_mq_rq_to_pdu(req); 146 } 147 148 static inline u16 nvme_req_qid(struct request *req) 149 { 150 if (!req->rq_disk) 151 return 0; 152 return blk_mq_unique_tag_to_hwq(blk_mq_unique_tag(req)) + 1; 153 } 154 155 /* The below value is the specific amount of delay needed before checking 156 * readiness in case of the PCI_DEVICE(0x1c58, 0x0003), which needs the 157 * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was 158 * found empirically. 159 */ 160 #define NVME_QUIRK_DELAY_AMOUNT 2300 161 162 enum nvme_ctrl_state { 163 NVME_CTRL_NEW, 164 NVME_CTRL_LIVE, 165 NVME_CTRL_RESETTING, 166 NVME_CTRL_CONNECTING, 167 NVME_CTRL_DELETING, 168 NVME_CTRL_DEAD, 169 }; 170 171 struct nvme_fault_inject { 172 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 173 struct fault_attr attr; 174 struct dentry *parent; 175 bool dont_retry; /* DNR, do not retry */ 176 u16 status; /* status code */ 177 #endif 178 }; 179 180 struct nvme_ctrl { 181 bool comp_seen; 182 enum nvme_ctrl_state state; 183 bool identified; 184 spinlock_t lock; 185 struct mutex scan_lock; 186 const struct nvme_ctrl_ops *ops; 187 struct request_queue *admin_q; 188 struct request_queue *connect_q; 189 struct request_queue *fabrics_q; 190 struct device *dev; 191 int instance; 192 int numa_node; 193 struct blk_mq_tag_set *tagset; 194 struct blk_mq_tag_set *admin_tagset; 195 struct list_head namespaces; 196 struct rw_semaphore namespaces_rwsem; 197 struct device ctrl_device; 198 struct device *device; /* char device */ 199 struct cdev cdev; 200 struct work_struct reset_work; 201 struct work_struct delete_work; 202 wait_queue_head_t state_wq; 203 204 struct nvme_subsystem *subsys; 205 struct list_head subsys_entry; 206 207 struct opal_dev *opal_dev; 208 209 char name[12]; 210 u16 cntlid; 211 212 u32 ctrl_config; 213 u16 mtfa; 214 u32 queue_count; 215 216 u64 cap; 217 u32 page_size; 218 u32 max_hw_sectors; 219 u32 max_segments; 220 u16 crdt[3]; 221 u16 oncs; 222 u16 oacs; 223 u16 nssa; 224 u16 nr_streams; 225 u16 sqsize; 226 u32 max_namespaces; 227 atomic_t abort_limit; 228 u8 vwc; 229 u32 vs; 230 u32 sgls; 231 u16 kas; 232 u8 npss; 233 u8 apsta; 234 u32 oaes; 235 u32 aen_result; 236 u32 ctratt; 237 unsigned int shutdown_timeout; 238 unsigned int kato; 239 bool subsystem; 240 unsigned long quirks; 241 struct nvme_id_power_state psd[32]; 242 struct nvme_effects_log *effects; 243 struct work_struct scan_work; 244 struct work_struct async_event_work; 245 struct delayed_work ka_work; 246 struct nvme_command ka_cmd; 247 struct work_struct fw_act_work; 248 unsigned long events; 249 250 #ifdef CONFIG_NVME_MULTIPATH 251 /* asymmetric namespace access: */ 252 u8 anacap; 253 u8 anatt; 254 u32 anagrpmax; 255 u32 nanagrpid; 256 struct mutex ana_lock; 257 struct nvme_ana_rsp_hdr *ana_log_buf; 258 size_t ana_log_size; 259 struct timer_list anatt_timer; 260 struct work_struct ana_work; 261 #endif 262 263 /* Power saving configuration */ 264 u64 ps_max_latency_us; 265 bool apst_enabled; 266 267 /* PCIe only: */ 268 u32 hmpre; 269 u32 hmmin; 270 u32 hmminds; 271 u16 hmmaxd; 272 273 /* Fabrics only */ 274 u32 ioccsz; 275 u32 iorcsz; 276 u16 icdoff; 277 u16 maxcmd; 278 int nr_reconnects; 279 struct nvmf_ctrl_options *opts; 280 281 struct page *discard_page; 282 unsigned long discard_page_busy; 283 284 struct nvme_fault_inject fault_inject; 285 }; 286 287 enum nvme_iopolicy { 288 NVME_IOPOLICY_NUMA, 289 NVME_IOPOLICY_RR, 290 }; 291 292 struct nvme_subsystem { 293 int instance; 294 struct device dev; 295 /* 296 * Because we unregister the device on the last put we need 297 * a separate refcount. 298 */ 299 struct kref ref; 300 struct list_head entry; 301 struct mutex lock; 302 struct list_head ctrls; 303 struct list_head nsheads; 304 char subnqn[NVMF_NQN_SIZE]; 305 char serial[20]; 306 char model[40]; 307 char firmware_rev[8]; 308 u8 cmic; 309 u16 vendor_id; 310 u16 awupf; /* 0's based awupf value. */ 311 struct ida ns_ida; 312 #ifdef CONFIG_NVME_MULTIPATH 313 enum nvme_iopolicy iopolicy; 314 #endif 315 }; 316 317 /* 318 * Container structure for uniqueue namespace identifiers. 319 */ 320 struct nvme_ns_ids { 321 u8 eui64[8]; 322 u8 nguid[16]; 323 uuid_t uuid; 324 }; 325 326 /* 327 * Anchor structure for namespaces. There is one for each namespace in a 328 * NVMe subsystem that any of our controllers can see, and the namespace 329 * structure for each controller is chained of it. For private namespaces 330 * there is a 1:1 relation to our namespace structures, that is ->list 331 * only ever has a single entry for private namespaces. 332 */ 333 struct nvme_ns_head { 334 struct list_head list; 335 struct srcu_struct srcu; 336 struct nvme_subsystem *subsys; 337 unsigned ns_id; 338 struct nvme_ns_ids ids; 339 struct list_head entry; 340 struct kref ref; 341 int instance; 342 #ifdef CONFIG_NVME_MULTIPATH 343 struct gendisk *disk; 344 struct bio_list requeue_list; 345 spinlock_t requeue_lock; 346 struct work_struct requeue_work; 347 struct mutex lock; 348 struct nvme_ns __rcu *current_path[]; 349 #endif 350 }; 351 352 struct nvme_ns { 353 struct list_head list; 354 355 struct nvme_ctrl *ctrl; 356 struct request_queue *queue; 357 struct gendisk *disk; 358 #ifdef CONFIG_NVME_MULTIPATH 359 enum nvme_ana_state ana_state; 360 u32 ana_grpid; 361 #endif 362 struct list_head siblings; 363 struct nvm_dev *ndev; 364 struct kref kref; 365 struct nvme_ns_head *head; 366 367 int lba_shift; 368 u16 ms; 369 u16 sgs; 370 u32 sws; 371 bool ext; 372 u8 pi_type; 373 unsigned long flags; 374 #define NVME_NS_REMOVING 0 375 #define NVME_NS_DEAD 1 376 #define NVME_NS_ANA_PENDING 2 377 u16 noiob; 378 379 struct nvme_fault_inject fault_inject; 380 381 }; 382 383 struct nvme_ctrl_ops { 384 const char *name; 385 struct module *module; 386 unsigned int flags; 387 #define NVME_F_FABRICS (1 << 0) 388 #define NVME_F_METADATA_SUPPORTED (1 << 1) 389 #define NVME_F_PCI_P2PDMA (1 << 2) 390 int (*reg_read32)(struct nvme_ctrl *ctrl, u32 off, u32 *val); 391 int (*reg_write32)(struct nvme_ctrl *ctrl, u32 off, u32 val); 392 int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val); 393 void (*free_ctrl)(struct nvme_ctrl *ctrl); 394 void (*submit_async_event)(struct nvme_ctrl *ctrl); 395 void (*delete_ctrl)(struct nvme_ctrl *ctrl); 396 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 397 }; 398 399 #ifdef CONFIG_FAULT_INJECTION_DEBUG_FS 400 void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, 401 const char *dev_name); 402 void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inject); 403 void nvme_should_fail(struct request *req); 404 #else 405 static inline void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj, 406 const char *dev_name) 407 { 408 } 409 static inline void nvme_fault_inject_fini(struct nvme_fault_inject *fault_inj) 410 { 411 } 412 static inline void nvme_should_fail(struct request *req) {} 413 #endif 414 415 static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl) 416 { 417 if (!ctrl->subsystem) 418 return -ENOTTY; 419 return ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65); 420 } 421 422 static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) 423 { 424 return (sector >> (ns->lba_shift - 9)); 425 } 426 427 static inline void nvme_end_request(struct request *req, __le16 status, 428 union nvme_result result) 429 { 430 struct nvme_request *rq = nvme_req(req); 431 432 rq->status = le16_to_cpu(status) >> 1; 433 rq->result = result; 434 /* inject error when permitted by fault injection framework */ 435 nvme_should_fail(req); 436 blk_mq_complete_request(req); 437 } 438 439 static inline void nvme_get_ctrl(struct nvme_ctrl *ctrl) 440 { 441 get_device(ctrl->device); 442 } 443 444 static inline void nvme_put_ctrl(struct nvme_ctrl *ctrl) 445 { 446 put_device(ctrl->device); 447 } 448 449 void nvme_complete_rq(struct request *req); 450 bool nvme_cancel_request(struct request *req, void *data, bool reserved); 451 bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, 452 enum nvme_ctrl_state new_state); 453 bool nvme_wait_reset(struct nvme_ctrl *ctrl); 454 int nvme_disable_ctrl(struct nvme_ctrl *ctrl); 455 int nvme_enable_ctrl(struct nvme_ctrl *ctrl); 456 int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl); 457 int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, 458 const struct nvme_ctrl_ops *ops, unsigned long quirks); 459 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); 460 void nvme_start_ctrl(struct nvme_ctrl *ctrl); 461 void nvme_stop_ctrl(struct nvme_ctrl *ctrl); 462 void nvme_put_ctrl(struct nvme_ctrl *ctrl); 463 int nvme_init_identify(struct nvme_ctrl *ctrl); 464 465 void nvme_remove_namespaces(struct nvme_ctrl *ctrl); 466 467 int nvme_sec_submit(void *data, u16 spsp, u8 secp, void *buffer, size_t len, 468 bool send); 469 470 void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, 471 volatile union nvme_result *res); 472 473 void nvme_stop_queues(struct nvme_ctrl *ctrl); 474 void nvme_start_queues(struct nvme_ctrl *ctrl); 475 void nvme_kill_queues(struct nvme_ctrl *ctrl); 476 void nvme_sync_queues(struct nvme_ctrl *ctrl); 477 void nvme_unfreeze(struct nvme_ctrl *ctrl); 478 void nvme_wait_freeze(struct nvme_ctrl *ctrl); 479 void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); 480 void nvme_start_freeze(struct nvme_ctrl *ctrl); 481 482 #define NVME_QID_ANY -1 483 struct request *nvme_alloc_request(struct request_queue *q, 484 struct nvme_command *cmd, blk_mq_req_flags_t flags, int qid); 485 void nvme_cleanup_cmd(struct request *req); 486 blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, 487 struct nvme_command *cmd); 488 int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 489 void *buf, unsigned bufflen); 490 int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, 491 union nvme_result *result, void *buffer, unsigned bufflen, 492 unsigned timeout, int qid, int at_head, 493 blk_mq_req_flags_t flags, bool poll); 494 int nvme_set_features(struct nvme_ctrl *dev, unsigned int fid, 495 unsigned int dword11, void *buffer, size_t buflen, 496 u32 *result); 497 int nvme_get_features(struct nvme_ctrl *dev, unsigned int fid, 498 unsigned int dword11, void *buffer, size_t buflen, 499 u32 *result); 500 int nvme_set_queue_count(struct nvme_ctrl *ctrl, int *count); 501 void nvme_stop_keep_alive(struct nvme_ctrl *ctrl); 502 int nvme_reset_ctrl(struct nvme_ctrl *ctrl); 503 int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl); 504 int nvme_try_sched_reset(struct nvme_ctrl *ctrl); 505 int nvme_delete_ctrl(struct nvme_ctrl *ctrl); 506 507 int nvme_get_log(struct nvme_ctrl *ctrl, u32 nsid, u8 log_page, u8 lsp, 508 void *log, size_t size, u64 offset); 509 510 extern const struct attribute_group *nvme_ns_id_attr_groups[]; 511 extern const struct block_device_operations nvme_ns_head_ops; 512 513 #ifdef CONFIG_NVME_MULTIPATH 514 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) 515 { 516 return ctrl->ana_log_buf != NULL; 517 } 518 519 void nvme_mpath_unfreeze(struct nvme_subsystem *subsys); 520 void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys); 521 void nvme_mpath_start_freeze(struct nvme_subsystem *subsys); 522 void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, 523 struct nvme_ctrl *ctrl, int *flags); 524 void nvme_failover_req(struct request *req); 525 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl); 526 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl,struct nvme_ns_head *head); 527 void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); 528 void nvme_mpath_remove_disk(struct nvme_ns_head *head); 529 int nvme_mpath_init(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); 530 void nvme_mpath_uninit(struct nvme_ctrl *ctrl); 531 void nvme_mpath_stop(struct nvme_ctrl *ctrl); 532 bool nvme_mpath_clear_current_path(struct nvme_ns *ns); 533 void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl); 534 struct nvme_ns *nvme_find_path(struct nvme_ns_head *head); 535 536 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) 537 { 538 struct nvme_ns_head *head = ns->head; 539 540 if (head->disk && list_empty(&head->list)) 541 kblockd_schedule_work(&head->requeue_work); 542 } 543 544 static inline void nvme_trace_bio_complete(struct request *req, 545 blk_status_t status) 546 { 547 struct nvme_ns *ns = req->q->queuedata; 548 549 if (req->cmd_flags & REQ_NVME_MPATH) 550 trace_block_bio_complete(ns->head->disk->queue, 551 req->bio, status); 552 } 553 554 extern struct device_attribute dev_attr_ana_grpid; 555 extern struct device_attribute dev_attr_ana_state; 556 extern struct device_attribute subsys_attr_iopolicy; 557 558 #else 559 static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl) 560 { 561 return false; 562 } 563 /* 564 * Without the multipath code enabled, multiple controller per subsystems are 565 * visible as devices and thus we cannot use the subsystem instance. 566 */ 567 static inline void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, 568 struct nvme_ctrl *ctrl, int *flags) 569 { 570 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); 571 } 572 573 static inline void nvme_failover_req(struct request *req) 574 { 575 } 576 static inline void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) 577 { 578 } 579 static inline int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, 580 struct nvme_ns_head *head) 581 { 582 return 0; 583 } 584 static inline void nvme_mpath_add_disk(struct nvme_ns *ns, 585 struct nvme_id_ns *id) 586 { 587 } 588 static inline void nvme_mpath_remove_disk(struct nvme_ns_head *head) 589 { 590 } 591 static inline bool nvme_mpath_clear_current_path(struct nvme_ns *ns) 592 { 593 return false; 594 } 595 static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl) 596 { 597 } 598 static inline void nvme_mpath_check_last_path(struct nvme_ns *ns) 599 { 600 } 601 static inline void nvme_trace_bio_complete(struct request *req, 602 blk_status_t status) 603 { 604 } 605 static inline int nvme_mpath_init(struct nvme_ctrl *ctrl, 606 struct nvme_id_ctrl *id) 607 { 608 if (ctrl->subsys->cmic & (1 << 3)) 609 dev_warn(ctrl->device, 610 "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); 611 return 0; 612 } 613 static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) 614 { 615 } 616 static inline void nvme_mpath_stop(struct nvme_ctrl *ctrl) 617 { 618 } 619 static inline void nvme_mpath_unfreeze(struct nvme_subsystem *subsys) 620 { 621 } 622 static inline void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys) 623 { 624 } 625 static inline void nvme_mpath_start_freeze(struct nvme_subsystem *subsys) 626 { 627 } 628 #endif /* CONFIG_NVME_MULTIPATH */ 629 630 #ifdef CONFIG_NVM 631 int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, int node); 632 void nvme_nvm_unregister(struct nvme_ns *ns); 633 extern const struct attribute_group nvme_nvm_attr_group; 634 int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, unsigned long arg); 635 #else 636 static inline int nvme_nvm_register(struct nvme_ns *ns, char *disk_name, 637 int node) 638 { 639 return 0; 640 } 641 642 static inline void nvme_nvm_unregister(struct nvme_ns *ns) {}; 643 static inline int nvme_nvm_ioctl(struct nvme_ns *ns, unsigned int cmd, 644 unsigned long arg) 645 { 646 return -ENOTTY; 647 } 648 #endif /* CONFIG_NVM */ 649 650 static inline struct nvme_ns *nvme_get_ns_from_dev(struct device *dev) 651 { 652 return dev_to_disk(dev)->private_data; 653 } 654 655 #endif /* _NVME_H */ 656