core.c (6d5a763c303bc9d78b17361d30b692ba2facf9b4) core.c (400b6a7b13a3fd71cff087139ce45dd1e5fff444)
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVM Express device driver
4 * Copyright (c) 2011-2014, Intel Corporation.
5 */
6
7#include <linux/blkdev.h>
8#include <linux/blk-mq.h>

--- 102 unchanged lines hidden (view full) ---

111 revalidate_disk(ns->disk);
112}
113
114static void nvme_queue_scan(struct nvme_ctrl *ctrl)
115{
116 /*
117 * Only new queue scan work when admin and IO queues are both alive
118 */
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * NVM Express device driver
4 * Copyright (c) 2011-2014, Intel Corporation.
5 */
6
7#include <linux/blkdev.h>
8#include <linux/blk-mq.h>

--- 102 unchanged lines hidden (view full) ---

111 revalidate_disk(ns->disk);
112}
113
114static void nvme_queue_scan(struct nvme_ctrl *ctrl)
115{
116 /*
117 * Only new queue scan work when admin and IO queues are both alive
118 */
119 if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
119 if (ctrl->state == NVME_CTRL_LIVE)
120 queue_work(nvme_wq, &ctrl->scan_work);
121}
122
120 queue_work(nvme_wq, &ctrl->scan_work);
121}
122
123/*
124 * Use this function to proceed with scheduling reset_work for a controller
125 * that had previously been set to the resetting state. This is intended for
126 * code paths that can't be interrupted by other reset attempts. A hot removal
127 * may prevent this from succeeding.
128 */
129int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
130{
131 if (ctrl->state != NVME_CTRL_RESETTING)
132 return -EBUSY;
133 if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
134 return -EBUSY;
135 return 0;
136}
137EXPORT_SYMBOL_GPL(nvme_try_sched_reset);
138
139int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
140{
141 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
142 return -EBUSY;
143 if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
144 return -EBUSY;
145 return 0;
146}
147EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
148
149int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
150{
151 int ret;
152
153 ret = nvme_reset_ctrl(ctrl);
154 if (!ret) {
155 flush_work(&ctrl->reset_work);
123int nvme_reset_ctrl(struct nvme_ctrl *ctrl)
124{
125 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
126 return -EBUSY;
127 if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
128 return -EBUSY;
129 return 0;
130}
131EXPORT_SYMBOL_GPL(nvme_reset_ctrl);
132
133int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
134{
135 int ret;
136
137 ret = nvme_reset_ctrl(ctrl);
138 if (!ret) {
139 flush_work(&ctrl->reset_work);
156 if (ctrl->state != NVME_CTRL_LIVE)
140 if (ctrl->state != NVME_CTRL_LIVE &&
141 ctrl->state != NVME_CTRL_ADMIN_ONLY)
157 ret = -ENETRESET;
158 }
159
160 return ret;
161}
162EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
163
164static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)

--- 113 unchanged lines hidden (view full) ---

278}
279
280void nvme_complete_rq(struct request *req)
281{
282 blk_status_t status = nvme_error_status(nvme_req(req)->status);
283
284 trace_nvme_complete_rq(req);
285
142 ret = -ENETRESET;
143 }
144
145 return ret;
146}
147EXPORT_SYMBOL_GPL(nvme_reset_ctrl_sync);
148
149static void nvme_do_delete_ctrl(struct nvme_ctrl *ctrl)

--- 113 unchanged lines hidden (view full) ---

263}
264
265void nvme_complete_rq(struct request *req)
266{
267 blk_status_t status = nvme_error_status(nvme_req(req)->status);
268
269 trace_nvme_complete_rq(req);
270
271 nvme_cleanup_cmd(req);
272
286 if (nvme_req(req)->ctrl->kas)
287 nvme_req(req)->ctrl->comp_seen = true;
288
289 if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
290 if ((req->cmd_flags & REQ_NVME_MPATH) &&
291 blk_path_error(status)) {
292 nvme_failover_req(req);
293 return;

--- 14 unchanged lines hidden (view full) ---

308{
309 dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
310 "Cancelling I/O %d", req->tag);
311
312 /* don't abort one completed request */
313 if (blk_mq_request_completed(req))
314 return true;
315
273 if (nvme_req(req)->ctrl->kas)
274 nvme_req(req)->ctrl->comp_seen = true;
275
276 if (unlikely(status != BLK_STS_OK && nvme_req_needs_retry(req))) {
277 if ((req->cmd_flags & REQ_NVME_MPATH) &&
278 blk_path_error(status)) {
279 nvme_failover_req(req);
280 return;

--- 14 unchanged lines hidden (view full) ---

295{
296 dev_dbg_ratelimited(((struct nvme_ctrl *) data)->device,
297 "Cancelling I/O %d", req->tag);
298
299 /* don't abort one completed request */
300 if (blk_mq_request_completed(req))
301 return true;
302
316 nvme_req(req)->status = NVME_SC_HOST_PATH_ERROR;
303 nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
317 blk_mq_complete_request(req);
318 return true;
319}
320EXPORT_SYMBOL_GPL(nvme_cancel_request);
321
322bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
323 enum nvme_ctrl_state new_state)
324{
325 enum nvme_ctrl_state old_state;
326 unsigned long flags;
327 bool changed = false;
328
329 spin_lock_irqsave(&ctrl->lock, flags);
330
331 old_state = ctrl->state;
332 switch (new_state) {
304 blk_mq_complete_request(req);
305 return true;
306}
307EXPORT_SYMBOL_GPL(nvme_cancel_request);
308
309bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
310 enum nvme_ctrl_state new_state)
311{
312 enum nvme_ctrl_state old_state;
313 unsigned long flags;
314 bool changed = false;
315
316 spin_lock_irqsave(&ctrl->lock, flags);
317
318 old_state = ctrl->state;
319 switch (new_state) {
320 case NVME_CTRL_ADMIN_ONLY:
321 switch (old_state) {
322 case NVME_CTRL_CONNECTING:
323 changed = true;
324 /* FALLTHRU */
325 default:
326 break;
327 }
328 break;
333 case NVME_CTRL_LIVE:
334 switch (old_state) {
335 case NVME_CTRL_NEW:
336 case NVME_CTRL_RESETTING:
337 case NVME_CTRL_CONNECTING:
338 changed = true;
339 /* FALLTHRU */
340 default:
341 break;
342 }
343 break;
344 case NVME_CTRL_RESETTING:
345 switch (old_state) {
346 case NVME_CTRL_NEW:
347 case NVME_CTRL_LIVE:
329 case NVME_CTRL_LIVE:
330 switch (old_state) {
331 case NVME_CTRL_NEW:
332 case NVME_CTRL_RESETTING:
333 case NVME_CTRL_CONNECTING:
334 changed = true;
335 /* FALLTHRU */
336 default:
337 break;
338 }
339 break;
340 case NVME_CTRL_RESETTING:
341 switch (old_state) {
342 case NVME_CTRL_NEW:
343 case NVME_CTRL_LIVE:
344 case NVME_CTRL_ADMIN_ONLY:
348 changed = true;
349 /* FALLTHRU */
350 default:
351 break;
352 }
353 break;
354 case NVME_CTRL_CONNECTING:
355 switch (old_state) {
356 case NVME_CTRL_NEW:
357 case NVME_CTRL_RESETTING:
358 changed = true;
359 /* FALLTHRU */
360 default:
361 break;
362 }
363 break;
364 case NVME_CTRL_DELETING:
365 switch (old_state) {
366 case NVME_CTRL_LIVE:
345 changed = true;
346 /* FALLTHRU */
347 default:
348 break;
349 }
350 break;
351 case NVME_CTRL_CONNECTING:
352 switch (old_state) {
353 case NVME_CTRL_NEW:
354 case NVME_CTRL_RESETTING:
355 changed = true;
356 /* FALLTHRU */
357 default:
358 break;
359 }
360 break;
361 case NVME_CTRL_DELETING:
362 switch (old_state) {
363 case NVME_CTRL_LIVE:
364 case NVME_CTRL_ADMIN_ONLY:
367 case NVME_CTRL_RESETTING:
368 case NVME_CTRL_CONNECTING:
369 changed = true;
370 /* FALLTHRU */
371 default:
372 break;
373 }
374 break;

--- 5 unchanged lines hidden (view full) ---

380 default:
381 break;
382 }
383 break;
384 default:
385 break;
386 }
387
365 case NVME_CTRL_RESETTING:
366 case NVME_CTRL_CONNECTING:
367 changed = true;
368 /* FALLTHRU */
369 default:
370 break;
371 }
372 break;

--- 5 unchanged lines hidden (view full) ---

378 default:
379 break;
380 }
381 break;
382 default:
383 break;
384 }
385
388 if (changed) {
386 if (changed)
389 ctrl->state = new_state;
387 ctrl->state = new_state;
390 wake_up_all(&ctrl->state_wq);
391 }
392
393 spin_unlock_irqrestore(&ctrl->lock, flags);
394 if (changed && ctrl->state == NVME_CTRL_LIVE)
395 nvme_kick_requeue_lists(ctrl);
396 return changed;
397}
398EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
399
388
389 spin_unlock_irqrestore(&ctrl->lock, flags);
390 if (changed && ctrl->state == NVME_CTRL_LIVE)
391 nvme_kick_requeue_lists(ctrl);
392 return changed;
393}
394EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
395
400/*
401 * Returns true for sink states that can't ever transition back to live.
402 */
403static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
404{
405 switch (ctrl->state) {
406 case NVME_CTRL_NEW:
407 case NVME_CTRL_LIVE:
408 case NVME_CTRL_RESETTING:
409 case NVME_CTRL_CONNECTING:
410 return false;
411 case NVME_CTRL_DELETING:
412 case NVME_CTRL_DEAD:
413 return true;
414 default:
415 WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
416 return true;
417 }
418}
419
420/*
421 * Waits for the controller state to be resetting, or returns false if it is
422 * not possible to ever transition to that state.
423 */
424bool nvme_wait_reset(struct nvme_ctrl *ctrl)
425{
426 wait_event(ctrl->state_wq,
427 nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
428 nvme_state_terminal(ctrl));
429 return ctrl->state == NVME_CTRL_RESETTING;
430}
431EXPORT_SYMBOL_GPL(nvme_wait_reset);
432
433static void nvme_free_ns_head(struct kref *ref)
434{
435 struct nvme_ns_head *head =
436 container_of(ref, struct nvme_ns_head, ref);
437
438 nvme_mpath_remove_disk(head);
439 ida_simple_remove(&head->subsys->ns_ida, head->instance);
440 list_del_init(&head->entry);

--- 180 unchanged lines hidden (view full) ---

621 */
622 if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy))
623 return BLK_STS_RESOURCE;
624
625 range = page_address(ns->ctrl->discard_page);
626 }
627
628 __rq_for_each_bio(bio, req) {
396static void nvme_free_ns_head(struct kref *ref)
397{
398 struct nvme_ns_head *head =
399 container_of(ref, struct nvme_ns_head, ref);
400
401 nvme_mpath_remove_disk(head);
402 ida_simple_remove(&head->subsys->ns_ida, head->instance);
403 list_del_init(&head->entry);

--- 180 unchanged lines hidden (view full) ---

584 */
585 if (test_and_set_bit_lock(0, &ns->ctrl->discard_page_busy))
586 return BLK_STS_RESOURCE;
587
588 range = page_address(ns->ctrl->discard_page);
589 }
590
591 __rq_for_each_bio(bio, req) {
629 u64 slba = nvme_block_nr(ns, bio->bi_iter.bi_sector);
592 u64 slba = nvme_sect_to_lba(ns, bio->bi_iter.bi_sector);
630 u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
631
632 if (n < segments) {
633 range[n].cattr = cpu_to_le32(0);
634 range[n].nlb = cpu_to_le32(nlb);
635 range[n].slba = cpu_to_le64(slba);
636 }
637 n++;

--- 24 unchanged lines hidden (view full) ---

662 struct request *req, struct nvme_command *cmnd)
663{
664 if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
665 return nvme_setup_discard(ns, req, cmnd);
666
667 cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
668 cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
669 cmnd->write_zeroes.slba =
593 u32 nlb = bio->bi_iter.bi_size >> ns->lba_shift;
594
595 if (n < segments) {
596 range[n].cattr = cpu_to_le32(0);
597 range[n].nlb = cpu_to_le32(nlb);
598 range[n].slba = cpu_to_le64(slba);
599 }
600 n++;

--- 24 unchanged lines hidden (view full) ---

625 struct request *req, struct nvme_command *cmnd)
626{
627 if (ns->ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
628 return nvme_setup_discard(ns, req, cmnd);
629
630 cmnd->write_zeroes.opcode = nvme_cmd_write_zeroes;
631 cmnd->write_zeroes.nsid = cpu_to_le32(ns->head->ns_id);
632 cmnd->write_zeroes.slba =
670 cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
633 cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
671 cmnd->write_zeroes.length =
672 cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
673 cmnd->write_zeroes.control = 0;
674 return BLK_STS_OK;
675}
676
677static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
678 struct request *req, struct nvme_command *cmnd)

--- 7 unchanged lines hidden (view full) ---

686 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
687 control |= NVME_RW_LR;
688
689 if (req->cmd_flags & REQ_RAHEAD)
690 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
691
692 cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
693 cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
634 cmnd->write_zeroes.length =
635 cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
636 cmnd->write_zeroes.control = 0;
637 return BLK_STS_OK;
638}
639
640static inline blk_status_t nvme_setup_rw(struct nvme_ns *ns,
641 struct request *req, struct nvme_command *cmnd)

--- 7 unchanged lines hidden (view full) ---

649 if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
650 control |= NVME_RW_LR;
651
652 if (req->cmd_flags & REQ_RAHEAD)
653 dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
654
655 cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
656 cmnd->rw.nsid = cpu_to_le32(ns->head->ns_id);
694 cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
657 cmnd->rw.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
695 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
696
697 if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
698 nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
699
700 if (ns->ms) {
701 /*
702 * If formated with metadata, the block layer always provides a

--- 637 unchanged lines hidden (view full) ---

1340{
1341 struct nvme_ns *ns;
1342
1343 down_read(&ctrl->namespaces_rwsem);
1344 list_for_each_entry(ns, &ctrl->namespaces, list)
1345 if (ns->disk && nvme_revalidate_disk(ns->disk))
1346 nvme_set_queue_dying(ns);
1347 up_read(&ctrl->namespaces_rwsem);
658 cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
659
660 if (req_op(req) == REQ_OP_WRITE && ctrl->nr_streams)
661 nvme_assign_write_stream(ctrl, req, &control, &dsmgmt);
662
663 if (ns->ms) {
664 /*
665 * If formated with metadata, the block layer always provides a

--- 637 unchanged lines hidden (view full) ---

1303{
1304 struct nvme_ns *ns;
1305
1306 down_read(&ctrl->namespaces_rwsem);
1307 list_for_each_entry(ns, &ctrl->namespaces, list)
1308 if (ns->disk && nvme_revalidate_disk(ns->disk))
1309 nvme_set_queue_dying(ns);
1310 up_read(&ctrl->namespaces_rwsem);
1311
1312 nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
1348}
1349
1350static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
1351{
1352 /*
1353 * Revalidate LBA changes prior to unfreezing. This is necessary to
1354 * prevent memory corruption if a logical block size was changed by
1355 * this command.
1356 */
1357 if (effects & NVME_CMD_EFFECTS_LBCC)
1358 nvme_update_formats(ctrl);
1359 if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
1360 nvme_unfreeze(ctrl);
1361 nvme_mpath_unfreeze(ctrl->subsys);
1362 mutex_unlock(&ctrl->subsys->lock);
1313}
1314
1315static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
1316{
1317 /*
1318 * Revalidate LBA changes prior to unfreezing. This is necessary to
1319 * prevent memory corruption if a logical block size was changed by
1320 * this command.
1321 */
1322 if (effects & NVME_CMD_EFFECTS_LBCC)
1323 nvme_update_formats(ctrl);
1324 if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
1325 nvme_unfreeze(ctrl);
1326 nvme_mpath_unfreeze(ctrl->subsys);
1327 mutex_unlock(&ctrl->subsys->lock);
1363 nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
1364 mutex_unlock(&ctrl->scan_lock);
1365 }
1366 if (effects & NVME_CMD_EFFECTS_CCC)
1367 nvme_init_identify(ctrl);
1368 if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
1369 nvme_queue_scan(ctrl);
1370}
1371

--- 270 unchanged lines hidden (view full) ---

1642#else
1643static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
1644{
1645}
1646#endif /* CONFIG_BLK_DEV_INTEGRITY */
1647
1648static void nvme_set_chunk_size(struct nvme_ns *ns)
1649{
1328 mutex_unlock(&ctrl->scan_lock);
1329 }
1330 if (effects & NVME_CMD_EFFECTS_CCC)
1331 nvme_init_identify(ctrl);
1332 if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC))
1333 nvme_queue_scan(ctrl);
1334}
1335

--- 270 unchanged lines hidden (view full) ---

1606#else
1607static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type)
1608{
1609}
1610#endif /* CONFIG_BLK_DEV_INTEGRITY */
1611
1612static void nvme_set_chunk_size(struct nvme_ns *ns)
1613{
1650 u32 chunk_size = (((u32)ns->noiob) << (ns->lba_shift - 9));
1614 u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob);
1651 blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
1652}
1653
1654static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
1655{
1656 struct nvme_ctrl *ctrl = ns->ctrl;
1657 struct request_queue *queue = disk->queue;
1658 u32 size = queue_logical_block_size(queue);

--- 20 unchanged lines hidden (view full) ---

1679 blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
1680
1681 if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
1682 blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
1683}
1684
1685static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
1686{
1615 blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size));
1616}
1617
1618static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
1619{
1620 struct nvme_ctrl *ctrl = ns->ctrl;
1621 struct request_queue *queue = disk->queue;
1622 u32 size = queue_logical_block_size(queue);

--- 20 unchanged lines hidden (view full) ---

1643 blk_queue_max_discard_segments(queue, NVME_DSM_MAX_RANGES);
1644
1645 if (ctrl->quirks & NVME_QUIRK_DEALLOCATE_ZEROES)
1646 blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
1647}
1648
1649static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
1650{
1687 u32 max_sectors;
1688 unsigned short bs = 1 << ns->lba_shift;
1651 u64 max_blocks;
1689
1690 if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
1691 (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
1692 return;
1693 /*
1694 * Even though NVMe spec explicitly states that MDTS is not
1695 * applicable to the write-zeroes:- "The restriction does not apply to
1696 * commands that do not transfer data between the host and the
1697 * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
1698 * In order to be more cautious use controller's max_hw_sectors value
1699 * to configure the maximum sectors for the write-zeroes which is
1700 * configured based on the controller's MDTS field in the
1701 * nvme_init_identify() if available.
1702 */
1703 if (ns->ctrl->max_hw_sectors == UINT_MAX)
1652
1653 if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
1654 (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
1655 return;
1656 /*
1657 * Even though NVMe spec explicitly states that MDTS is not
1658 * applicable to the write-zeroes:- "The restriction does not apply to
1659 * commands that do not transfer data between the host and the
1660 * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
1661 * In order to be more cautious use controller's max_hw_sectors value
1662 * to configure the maximum sectors for the write-zeroes which is
1663 * configured based on the controller's MDTS field in the
1664 * nvme_init_identify() if available.
1665 */
1666 if (ns->ctrl->max_hw_sectors == UINT_MAX)
1704 max_sectors = ((u32)(USHRT_MAX + 1) * bs) >> 9;
1667 max_blocks = (u64)USHRT_MAX + 1;
1705 else
1668 else
1706 max_sectors = ((u32)(ns->ctrl->max_hw_sectors + 1) * bs) >> 9;
1669 max_blocks = ns->ctrl->max_hw_sectors + 1;
1707
1670
1708 blk_queue_max_write_zeroes_sectors(disk->queue, max_sectors);
1671 blk_queue_max_write_zeroes_sectors(disk->queue,
1672 nvme_lba_to_sect(ns, max_blocks));
1709}
1710
1711static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
1712 struct nvme_id_ns *id, struct nvme_ns_ids *ids)
1713{
1714 int ret = 0;
1715
1716 memset(ids, 0, sizeof(*ids));

--- 26 unchanged lines hidden (view full) ---

1743 return uuid_equal(&a->uuid, &b->uuid) &&
1744 memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 &&
1745 memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
1746}
1747
1748static void nvme_update_disk_info(struct gendisk *disk,
1749 struct nvme_ns *ns, struct nvme_id_ns *id)
1750{
1673}
1674
1675static int nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid,
1676 struct nvme_id_ns *id, struct nvme_ns_ids *ids)
1677{
1678 int ret = 0;
1679
1680 memset(ids, 0, sizeof(*ids));

--- 26 unchanged lines hidden (view full) ---

1707 return uuid_equal(&a->uuid, &b->uuid) &&
1708 memcmp(&a->nguid, &b->nguid, sizeof(a->nguid)) == 0 &&
1709 memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0;
1710}
1711
1712static void nvme_update_disk_info(struct gendisk *disk,
1713 struct nvme_ns *ns, struct nvme_id_ns *id)
1714{
1751 sector_t capacity = le64_to_cpu(id->nsze) << (ns->lba_shift - 9);
1715 sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze));
1752 unsigned short bs = 1 << ns->lba_shift;
1753 u32 atomic_bs, phys_bs, io_opt;
1754
1755 if (ns->lba_shift > PAGE_SHIFT) {
1756 /* unsupported block size, set capacity to 0 later */
1757 bs = (1 << 9);
1758 }
1759 blk_mq_freeze_queue(disk->queue);

--- 1031 unchanged lines hidden (view full) ---

2791 ctrl->crdt[0] = le16_to_cpu(id->crdt1);
2792 ctrl->crdt[1] = le16_to_cpu(id->crdt2);
2793 ctrl->crdt[2] = le16_to_cpu(id->crdt3);
2794
2795 ctrl->oacs = le16_to_cpu(id->oacs);
2796 ctrl->oncs = le16_to_cpu(id->oncs);
2797 ctrl->mtfa = le16_to_cpu(id->mtfa);
2798 ctrl->oaes = le32_to_cpu(id->oaes);
1716 unsigned short bs = 1 << ns->lba_shift;
1717 u32 atomic_bs, phys_bs, io_opt;
1718
1719 if (ns->lba_shift > PAGE_SHIFT) {
1720 /* unsupported block size, set capacity to 0 later */
1721 bs = (1 << 9);
1722 }
1723 blk_mq_freeze_queue(disk->queue);

--- 1031 unchanged lines hidden (view full) ---

2755 ctrl->crdt[0] = le16_to_cpu(id->crdt1);
2756 ctrl->crdt[1] = le16_to_cpu(id->crdt2);
2757 ctrl->crdt[2] = le16_to_cpu(id->crdt3);
2758
2759 ctrl->oacs = le16_to_cpu(id->oacs);
2760 ctrl->oncs = le16_to_cpu(id->oncs);
2761 ctrl->mtfa = le16_to_cpu(id->mtfa);
2762 ctrl->oaes = le32_to_cpu(id->oaes);
2763 ctrl->wctemp = le16_to_cpu(id->wctemp);
2764 ctrl->cctemp = le16_to_cpu(id->cctemp);
2765
2799 atomic_set(&ctrl->abort_limit, id->acl + 1);
2800 ctrl->vwc = id->vwc;
2801 if (id->mdts)
2802 max_hw_sectors = 1 << (id->mdts + page_shift - 9);
2803 else
2804 max_hw_sectors = UINT_MAX;
2805 ctrl->max_hw_sectors =
2806 min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);

--- 83 unchanged lines hidden (view full) ---

2890 ret = nvme_configure_directives(ctrl);
2891 if (ret < 0)
2892 return ret;
2893
2894 ret = nvme_configure_acre(ctrl);
2895 if (ret < 0)
2896 return ret;
2897
2766 atomic_set(&ctrl->abort_limit, id->acl + 1);
2767 ctrl->vwc = id->vwc;
2768 if (id->mdts)
2769 max_hw_sectors = 1 << (id->mdts + page_shift - 9);
2770 else
2771 max_hw_sectors = UINT_MAX;
2772 ctrl->max_hw_sectors =
2773 min_not_zero(ctrl->max_hw_sectors, max_hw_sectors);

--- 83 unchanged lines hidden (view full) ---

2857 ret = nvme_configure_directives(ctrl);
2858 if (ret < 0)
2859 return ret;
2860
2861 ret = nvme_configure_acre(ctrl);
2862 if (ret < 0)
2863 return ret;
2864
2865 if (!ctrl->identified)
2866 nvme_hwmon_init(ctrl);
2867
2898 ctrl->identified = true;
2899
2900 return 0;
2901
2902out_free:
2903 kfree(id);
2904 return ret;
2905}
2906EXPORT_SYMBOL_GPL(nvme_init_identify);
2907
2908static int nvme_dev_open(struct inode *inode, struct file *file)
2909{
2910 struct nvme_ctrl *ctrl =
2911 container_of(inode->i_cdev, struct nvme_ctrl, cdev);
2912
2913 switch (ctrl->state) {
2914 case NVME_CTRL_LIVE:
2868 ctrl->identified = true;
2869
2870 return 0;
2871
2872out_free:
2873 kfree(id);
2874 return ret;
2875}
2876EXPORT_SYMBOL_GPL(nvme_init_identify);
2877
2878static int nvme_dev_open(struct inode *inode, struct file *file)
2879{
2880 struct nvme_ctrl *ctrl =
2881 container_of(inode->i_cdev, struct nvme_ctrl, cdev);
2882
2883 switch (ctrl->state) {
2884 case NVME_CTRL_LIVE:
2885 case NVME_CTRL_ADMIN_ONLY:
2915 break;
2916 default:
2917 return -EWOULDBLOCK;
2918 }
2919
2920 file->private_data = ctrl;
2921 return 0;
2922}

--- 277 unchanged lines hidden (view full) ---

3200static ssize_t nvme_sysfs_show_state(struct device *dev,
3201 struct device_attribute *attr,
3202 char *buf)
3203{
3204 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3205 static const char *const state_name[] = {
3206 [NVME_CTRL_NEW] = "new",
3207 [NVME_CTRL_LIVE] = "live",
2886 break;
2887 default:
2888 return -EWOULDBLOCK;
2889 }
2890
2891 file->private_data = ctrl;
2892 return 0;
2893}

--- 277 unchanged lines hidden (view full) ---

3171static ssize_t nvme_sysfs_show_state(struct device *dev,
3172 struct device_attribute *attr,
3173 char *buf)
3174{
3175 struct nvme_ctrl *ctrl = dev_get_drvdata(dev);
3176 static const char *const state_name[] = {
3177 [NVME_CTRL_NEW] = "new",
3178 [NVME_CTRL_LIVE] = "live",
3179 [NVME_CTRL_ADMIN_ONLY] = "only-admin",
3208 [NVME_CTRL_RESETTING] = "resetting",
3209 [NVME_CTRL_CONNECTING] = "connecting",
3210 [NVME_CTRL_DELETING] = "deleting",
3211 [NVME_CTRL_DEAD] = "dead",
3212 };
3213
3214 if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
3215 state_name[ctrl->state])

--- 494 unchanged lines hidden (view full) ---

3710
3711static void nvme_scan_work(struct work_struct *work)
3712{
3713 struct nvme_ctrl *ctrl =
3714 container_of(work, struct nvme_ctrl, scan_work);
3715 struct nvme_id_ctrl *id;
3716 unsigned nn;
3717
3180 [NVME_CTRL_RESETTING] = "resetting",
3181 [NVME_CTRL_CONNECTING] = "connecting",
3182 [NVME_CTRL_DELETING] = "deleting",
3183 [NVME_CTRL_DEAD] = "dead",
3184 };
3185
3186 if ((unsigned)ctrl->state < ARRAY_SIZE(state_name) &&
3187 state_name[ctrl->state])

--- 494 unchanged lines hidden (view full) ---

3682
3683static void nvme_scan_work(struct work_struct *work)
3684{
3685 struct nvme_ctrl *ctrl =
3686 container_of(work, struct nvme_ctrl, scan_work);
3687 struct nvme_id_ctrl *id;
3688 unsigned nn;
3689
3718 /* No tagset on a live ctrl means IO queues could not created */
3719 if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
3690 if (ctrl->state != NVME_CTRL_LIVE)
3720 return;
3721
3691 return;
3692
3693 WARN_ON_ONCE(!ctrl->tagset);
3694
3722 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
3723 dev_info(ctrl->device, "rescanning namespaces.\n");
3724 nvme_clear_changed_ns_log(ctrl);
3725 }
3726
3727 if (nvme_identify_ctrl(ctrl, &id))
3728 return;
3729

--- 144 unchanged lines hidden (view full) ---

3874 fw_act_timeout = jiffies +
3875 msecs_to_jiffies(admin_timeout * 1000);
3876
3877 nvme_stop_queues(ctrl);
3878 while (nvme_ctrl_pp_status(ctrl)) {
3879 if (time_after(jiffies, fw_act_timeout)) {
3880 dev_warn(ctrl->device,
3881 "Fw activation timeout, reset controller\n");
3695 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
3696 dev_info(ctrl->device, "rescanning namespaces.\n");
3697 nvme_clear_changed_ns_log(ctrl);
3698 }
3699
3700 if (nvme_identify_ctrl(ctrl, &id))
3701 return;
3702

--- 144 unchanged lines hidden (view full) ---

3847 fw_act_timeout = jiffies +
3848 msecs_to_jiffies(admin_timeout * 1000);
3849
3850 nvme_stop_queues(ctrl);
3851 while (nvme_ctrl_pp_status(ctrl)) {
3852 if (time_after(jiffies, fw_act_timeout)) {
3853 dev_warn(ctrl->device,
3854 "Fw activation timeout, reset controller\n");
3882 nvme_try_sched_reset(ctrl);
3883 return;
3855 nvme_reset_ctrl(ctrl);
3856 break;
3884 }
3885 msleep(100);
3886 }
3887
3857 }
3858 msleep(100);
3859 }
3860
3888 if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE))
3861 if (ctrl->state != NVME_CTRL_LIVE)
3889 return;
3890
3891 nvme_start_queues(ctrl);
3892 /* read FW slot information to clear the AER */
3893 nvme_get_fw_slot_info(ctrl);
3894}
3895
3896static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
3897{
3898 u32 aer_notice_type = (result & 0xff00) >> 8;
3899
3900 trace_nvme_async_event(ctrl, aer_notice_type);
3901
3902 switch (aer_notice_type) {
3903 case NVME_AER_NOTICE_NS_CHANGED:
3904 set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
3905 nvme_queue_scan(ctrl);
3906 break;
3907 case NVME_AER_NOTICE_FW_ACT_STARTING:
3862 return;
3863
3864 nvme_start_queues(ctrl);
3865 /* read FW slot information to clear the AER */
3866 nvme_get_fw_slot_info(ctrl);
3867}
3868
3869static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
3870{
3871 u32 aer_notice_type = (result & 0xff00) >> 8;
3872
3873 trace_nvme_async_event(ctrl, aer_notice_type);
3874
3875 switch (aer_notice_type) {
3876 case NVME_AER_NOTICE_NS_CHANGED:
3877 set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events);
3878 nvme_queue_scan(ctrl);
3879 break;
3880 case NVME_AER_NOTICE_FW_ACT_STARTING:
3908 /*
3909 * We are (ab)using the RESETTING state to prevent subsequent
3910 * recovery actions from interfering with the controller's
3911 * firmware activation.
3912 */
3913 if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING))
3914 queue_work(nvme_wq, &ctrl->fw_act_work);
3881 queue_work(nvme_wq, &ctrl->fw_act_work);
3915 break;
3916#ifdef CONFIG_NVME_MULTIPATH
3917 case NVME_AER_NOTICE_ANA:
3918 if (!ctrl->ana_log_buf)
3919 break;
3920 queue_work(nvme_wq, &ctrl->ana_work);
3921 break;
3922#endif

--- 106 unchanged lines hidden (view full) ---

4029 init_rwsem(&ctrl->namespaces_rwsem);
4030 ctrl->dev = dev;
4031 ctrl->ops = ops;
4032 ctrl->quirks = quirks;
4033 INIT_WORK(&ctrl->scan_work, nvme_scan_work);
4034 INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
4035 INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
4036 INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
3882 break;
3883#ifdef CONFIG_NVME_MULTIPATH
3884 case NVME_AER_NOTICE_ANA:
3885 if (!ctrl->ana_log_buf)
3886 break;
3887 queue_work(nvme_wq, &ctrl->ana_work);
3888 break;
3889#endif

--- 106 unchanged lines hidden (view full) ---

3996 init_rwsem(&ctrl->namespaces_rwsem);
3997 ctrl->dev = dev;
3998 ctrl->ops = ops;
3999 ctrl->quirks = quirks;
4000 INIT_WORK(&ctrl->scan_work, nvme_scan_work);
4001 INIT_WORK(&ctrl->async_event_work, nvme_async_event_work);
4002 INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work);
4003 INIT_WORK(&ctrl->delete_work, nvme_delete_ctrl_work);
4037 init_waitqueue_head(&ctrl->state_wq);
4038
4039 INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
4040 memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
4041 ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
4042
4043 BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
4044 PAGE_SIZE);
4045 ctrl->discard_page = alloc_page(GFP_KERNEL);

--- 250 unchanged lines hidden ---
4004
4005 INIT_DELAYED_WORK(&ctrl->ka_work, nvme_keep_alive_work);
4006 memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
4007 ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
4008
4009 BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
4010 PAGE_SIZE);
4011 ctrl->discard_page = alloc_page(GFP_KERNEL);

--- 250 unchanged lines hidden ---