xref: /linux/drivers/nvme/host/zns.c (revision 0ddd7eaffa644baa78e247bbd220ab7195b1eed6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4  */
5 
6 #include <linux/blkdev.h>
7 #include <linux/vmalloc.h>
8 #include "nvme.h"
9 
10 int nvme_revalidate_zones(struct nvme_ns *ns)
11 {
12 	struct request_queue *q = ns->queue;
13 	int ret;
14 
15 	ret = blk_revalidate_disk_zones(ns->disk, NULL);
16 	if (!ret)
17 		blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
18 	return ret;
19 }
20 
21 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
22 {
23 	struct nvme_command c = { };
24 	struct nvme_id_ctrl_zns *id;
25 	int status;
26 
27 	id = kzalloc(sizeof(*id), GFP_KERNEL);
28 	if (!id)
29 		return -ENOMEM;
30 
31 	c.identify.opcode = nvme_admin_identify;
32 	c.identify.cns = NVME_ID_CNS_CS_CTRL;
33 	c.identify.csi = NVME_CSI_ZNS;
34 
35 	status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
36 	if (status) {
37 		kfree(id);
38 		return status;
39 	}
40 
41 	if (id->zasl)
42 		ctrl->max_zone_append = 1 << (id->zasl + 3);
43 	else
44 		ctrl->max_zone_append = ctrl->max_hw_sectors;
45 	kfree(id);
46 	return 0;
47 }
48 
49 int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
50 {
51 	struct nvme_effects_log *log = ns->head->effects;
52 	struct request_queue *q = ns->queue;
53 	struct nvme_command c = { };
54 	struct nvme_id_ns_zns *id;
55 	int status;
56 
57 	/* Driver requires zone append support */
58 	if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
59 			NVME_CMD_EFFECTS_CSUPP)) {
60 		if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
61 			dev_warn(ns->ctrl->device,
62 				 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
63 				 ns->head->ns_id);
64 	} else {
65 		set_bit(NVME_NS_FORCE_RO, &ns->flags);
66 		dev_warn(ns->ctrl->device,
67 			 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
68 			 ns->head->ns_id);
69 	}
70 
71 	/* Lazily query controller append limit for the first zoned namespace */
72 	if (!ns->ctrl->max_zone_append) {
73 		status = nvme_set_max_append(ns->ctrl);
74 		if (status)
75 			return status;
76 	}
77 
78 	id = kzalloc(sizeof(*id), GFP_KERNEL);
79 	if (!id)
80 		return -ENOMEM;
81 
82 	c.identify.opcode = nvme_admin_identify;
83 	c.identify.nsid = cpu_to_le32(ns->head->ns_id);
84 	c.identify.cns = NVME_ID_CNS_CS_NS;
85 	c.identify.csi = NVME_CSI_ZNS;
86 
87 	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
88 	if (status)
89 		goto free_data;
90 
91 	/*
92 	 * We currently do not handle devices requiring any of the zoned
93 	 * operation characteristics.
94 	 */
95 	if (id->zoc) {
96 		dev_warn(ns->ctrl->device,
97 			"zone operations:%x not supported for namespace:%u\n",
98 			le16_to_cpu(id->zoc), ns->head->ns_id);
99 		status = -ENODEV;
100 		goto free_data;
101 	}
102 
103 	ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
104 	if (!is_power_of_2(ns->zsze)) {
105 		dev_warn(ns->ctrl->device,
106 			"invalid zone size:%llu for namespace:%u\n",
107 			ns->zsze, ns->head->ns_id);
108 		status = -ENODEV;
109 		goto free_data;
110 	}
111 
112 	blk_queue_set_zoned(ns->disk, BLK_ZONED_HM);
113 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
114 	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
115 	blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
116 free_data:
117 	kfree(id);
118 	return status;
119 }
120 
121 static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
122 					  unsigned int nr_zones, size_t *buflen)
123 {
124 	struct request_queue *q = ns->disk->queue;
125 	size_t bufsize;
126 	void *buf;
127 
128 	const size_t min_bufsize = sizeof(struct nvme_zone_report) +
129 				   sizeof(struct nvme_zone_descriptor);
130 
131 	nr_zones = min_t(unsigned int, nr_zones,
132 			 get_capacity(ns->disk) >> ilog2(ns->zsze));
133 
134 	bufsize = sizeof(struct nvme_zone_report) +
135 		nr_zones * sizeof(struct nvme_zone_descriptor);
136 	bufsize = min_t(size_t, bufsize,
137 			queue_max_hw_sectors(q) << SECTOR_SHIFT);
138 	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
139 
140 	while (bufsize >= min_bufsize) {
141 		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
142 		if (buf) {
143 			*buflen = bufsize;
144 			return buf;
145 		}
146 		bufsize >>= 1;
147 	}
148 	return NULL;
149 }
150 
151 static int nvme_zone_parse_entry(struct nvme_ns *ns,
152 				 struct nvme_zone_descriptor *entry,
153 				 unsigned int idx, report_zones_cb cb,
154 				 void *data)
155 {
156 	struct blk_zone zone = { };
157 
158 	if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
159 		dev_err(ns->ctrl->device, "invalid zone type %#x\n",
160 				entry->zt);
161 		return -EINVAL;
162 	}
163 
164 	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
165 	zone.cond = entry->zs >> 4;
166 	zone.len = ns->zsze;
167 	zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
168 	zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
169 	zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
170 
171 	return cb(&zone, idx, data);
172 }
173 
174 static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
175 			unsigned int nr_zones, report_zones_cb cb, void *data)
176 {
177 	struct nvme_zone_report *report;
178 	struct nvme_command c = { };
179 	int ret, zone_idx = 0;
180 	unsigned int nz, i;
181 	size_t buflen;
182 
183 	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
184 	if (!report)
185 		return -ENOMEM;
186 
187 	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
188 	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
189 	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
190 	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
191 	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
192 	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
193 
194 	sector &= ~(ns->zsze - 1);
195 	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
196 		memset(report, 0, buflen);
197 
198 		c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
199 		ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
200 		if (ret) {
201 			if (ret > 0)
202 				ret = -EIO;
203 			goto out_free;
204 		}
205 
206 		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
207 		if (!nz)
208 			break;
209 
210 		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
211 			ret = nvme_zone_parse_entry(ns, &report->entries[i],
212 						    zone_idx, cb, data);
213 			if (ret)
214 				goto out_free;
215 			zone_idx++;
216 		}
217 
218 		sector += ns->zsze * nz;
219 	}
220 
221 	if (zone_idx > 0)
222 		ret = zone_idx;
223 	else
224 		ret = -EINVAL;
225 out_free:
226 	kvfree(report);
227 	return ret;
228 }
229 
230 int nvme_report_zones(struct gendisk *disk, sector_t sector,
231 		      unsigned int nr_zones, report_zones_cb cb, void *data)
232 {
233 	struct nvme_ns_head *head = NULL;
234 	struct nvme_ns *ns;
235 	int srcu_idx, ret;
236 
237 	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
238 	if (unlikely(!ns))
239 		return -EWOULDBLOCK;
240 
241 	if (ns->head->ids.csi == NVME_CSI_ZNS)
242 		ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
243 	else
244 		ret = -EINVAL;
245 	nvme_put_ns_from_disk(head, srcu_idx);
246 
247 	return ret;
248 }
249 
250 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
251 		struct nvme_command *c, enum nvme_zone_mgmt_action action)
252 {
253 	c->zms.opcode = nvme_cmd_zone_mgmt_send;
254 	c->zms.nsid = cpu_to_le32(ns->head->ns_id);
255 	c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
256 	c->zms.zsa = action;
257 
258 	if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
259 		c->zms.select_all = 1;
260 
261 	return BLK_STS_OK;
262 }
263