xref: /linux/drivers/nvme/host/zns.c (revision c79c3c34f75d72a066e292b10aa50fc758c97c89)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4  */
5 
6 #include <linux/blkdev.h>
7 #include <linux/vmalloc.h>
8 #include "nvme.h"
9 
10 int nvme_revalidate_zones(struct nvme_ns *ns)
11 {
12 	return blk_revalidate_disk_zones(ns->disk, NULL);
13 }
14 
15 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
16 {
17 	struct nvme_command c = { };
18 	struct nvme_id_ctrl_zns *id;
19 	int status;
20 
21 	id = kzalloc(sizeof(*id), GFP_KERNEL);
22 	if (!id)
23 		return -ENOMEM;
24 
25 	c.identify.opcode = nvme_admin_identify;
26 	c.identify.cns = NVME_ID_CNS_CS_CTRL;
27 	c.identify.csi = NVME_CSI_ZNS;
28 
29 	status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
30 	if (status) {
31 		kfree(id);
32 		return status;
33 	}
34 
35 	if (id->zasl)
36 		ctrl->max_zone_append = 1 << (id->zasl + 3);
37 	else
38 		ctrl->max_zone_append = ctrl->max_hw_sectors;
39 	kfree(id);
40 	return 0;
41 }
42 
43 int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
44 {
45 	struct nvme_effects_log *log = ns->head->effects;
46 	struct request_queue *q = ns->queue;
47 	struct nvme_command c = { };
48 	struct nvme_id_ns_zns *id;
49 	int status;
50 
51 	/* Driver requires zone append support */
52 	if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
53 			NVME_CMD_EFFECTS_CSUPP)) {
54 		if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
55 			dev_warn(ns->ctrl->device,
56 				 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
57 				 ns->head->ns_id);
58 	} else {
59 		set_bit(NVME_NS_FORCE_RO, &ns->flags);
60 		dev_warn(ns->ctrl->device,
61 			 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
62 			 ns->head->ns_id);
63 	}
64 
65 	/* Lazily query controller append limit for the first zoned namespace */
66 	if (!ns->ctrl->max_zone_append) {
67 		status = nvme_set_max_append(ns->ctrl);
68 		if (status)
69 			return status;
70 	}
71 
72 	id = kzalloc(sizeof(*id), GFP_KERNEL);
73 	if (!id)
74 		return -ENOMEM;
75 
76 	c.identify.opcode = nvme_admin_identify;
77 	c.identify.nsid = cpu_to_le32(ns->head->ns_id);
78 	c.identify.cns = NVME_ID_CNS_CS_NS;
79 	c.identify.csi = NVME_CSI_ZNS;
80 
81 	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
82 	if (status)
83 		goto free_data;
84 
85 	/*
86 	 * We currently do not handle devices requiring any of the zoned
87 	 * operation characteristics.
88 	 */
89 	if (id->zoc) {
90 		dev_warn(ns->ctrl->device,
91 			"zone operations:%x not supported for namespace:%u\n",
92 			le16_to_cpu(id->zoc), ns->head->ns_id);
93 		status = -EINVAL;
94 		goto free_data;
95 	}
96 
97 	ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
98 	if (!is_power_of_2(ns->zsze)) {
99 		dev_warn(ns->ctrl->device,
100 			"invalid zone size:%llu for namespace:%u\n",
101 			ns->zsze, ns->head->ns_id);
102 		status = -EINVAL;
103 		goto free_data;
104 	}
105 
106 	blk_queue_set_zoned(ns->disk, BLK_ZONED_HM);
107 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
108 	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
109 	blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
110 	blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
111 free_data:
112 	kfree(id);
113 	return status;
114 }
115 
116 static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
117 					  unsigned int nr_zones, size_t *buflen)
118 {
119 	struct request_queue *q = ns->disk->queue;
120 	size_t bufsize;
121 	void *buf;
122 
123 	const size_t min_bufsize = sizeof(struct nvme_zone_report) +
124 				   sizeof(struct nvme_zone_descriptor);
125 
126 	nr_zones = min_t(unsigned int, nr_zones,
127 			 get_capacity(ns->disk) >> ilog2(ns->zsze));
128 
129 	bufsize = sizeof(struct nvme_zone_report) +
130 		nr_zones * sizeof(struct nvme_zone_descriptor);
131 	bufsize = min_t(size_t, bufsize,
132 			queue_max_hw_sectors(q) << SECTOR_SHIFT);
133 	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
134 
135 	while (bufsize >= min_bufsize) {
136 		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
137 		if (buf) {
138 			*buflen = bufsize;
139 			return buf;
140 		}
141 		bufsize >>= 1;
142 	}
143 	return NULL;
144 }
145 
146 static int nvme_zone_parse_entry(struct nvme_ns *ns,
147 				 struct nvme_zone_descriptor *entry,
148 				 unsigned int idx, report_zones_cb cb,
149 				 void *data)
150 {
151 	struct blk_zone zone = { };
152 
153 	if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
154 		dev_err(ns->ctrl->device, "invalid zone type %#x\n",
155 				entry->zt);
156 		return -EINVAL;
157 	}
158 
159 	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
160 	zone.cond = entry->zs >> 4;
161 	zone.len = ns->zsze;
162 	zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
163 	zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
164 	zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
165 
166 	return cb(&zone, idx, data);
167 }
168 
169 static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
170 			unsigned int nr_zones, report_zones_cb cb, void *data)
171 {
172 	struct nvme_zone_report *report;
173 	struct nvme_command c = { };
174 	int ret, zone_idx = 0;
175 	unsigned int nz, i;
176 	size_t buflen;
177 
178 	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
179 	if (!report)
180 		return -ENOMEM;
181 
182 	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
183 	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
184 	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
185 	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
186 	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
187 	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
188 
189 	sector &= ~(ns->zsze - 1);
190 	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
191 		memset(report, 0, buflen);
192 
193 		c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
194 		ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
195 		if (ret) {
196 			if (ret > 0)
197 				ret = -EIO;
198 			goto out_free;
199 		}
200 
201 		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
202 		if (!nz)
203 			break;
204 
205 		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
206 			ret = nvme_zone_parse_entry(ns, &report->entries[i],
207 						    zone_idx, cb, data);
208 			if (ret)
209 				goto out_free;
210 			zone_idx++;
211 		}
212 
213 		sector += ns->zsze * nz;
214 	}
215 
216 	if (zone_idx > 0)
217 		ret = zone_idx;
218 	else
219 		ret = -EINVAL;
220 out_free:
221 	kvfree(report);
222 	return ret;
223 }
224 
225 int nvme_report_zones(struct gendisk *disk, sector_t sector,
226 		      unsigned int nr_zones, report_zones_cb cb, void *data)
227 {
228 	struct nvme_ns_head *head = NULL;
229 	struct nvme_ns *ns;
230 	int srcu_idx, ret;
231 
232 	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
233 	if (unlikely(!ns))
234 		return -EWOULDBLOCK;
235 
236 	if (ns->head->ids.csi == NVME_CSI_ZNS)
237 		ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
238 	else
239 		ret = -EINVAL;
240 	nvme_put_ns_from_disk(head, srcu_idx);
241 
242 	return ret;
243 }
244 
245 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
246 		struct nvme_command *c, enum nvme_zone_mgmt_action action)
247 {
248 	c->zms.opcode = nvme_cmd_zone_mgmt_send;
249 	c->zms.nsid = cpu_to_le32(ns->head->ns_id);
250 	c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
251 	c->zms.zsa = action;
252 
253 	if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
254 		c->zms.select_all = 1;
255 
256 	return BLK_STS_OK;
257 }
258