xref: /linux/drivers/nvme/host/zns.c (revision 9abd613a85af72fa560e49d9a0acc5b872840c72)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4  */
5 
6 #include <linux/blkdev.h>
7 #include <linux/vmalloc.h>
8 #include "nvme.h"
9 
10 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
11 {
12 	struct nvme_command c = { };
13 	struct nvme_id_ctrl_zns *id;
14 	int status;
15 
16 	id = kzalloc(sizeof(*id), GFP_KERNEL);
17 	if (!id)
18 		return -ENOMEM;
19 
20 	c.identify.opcode = nvme_admin_identify;
21 	c.identify.cns = NVME_ID_CNS_CS_CTRL;
22 	c.identify.csi = NVME_CSI_ZNS;
23 
24 	status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
25 	if (status) {
26 		kfree(id);
27 		return status;
28 	}
29 
30 	if (id->zasl)
31 		ctrl->max_zone_append = 1 << (id->zasl + 3);
32 	else
33 		ctrl->max_zone_append = ctrl->max_hw_sectors;
34 	kfree(id);
35 	return 0;
36 }
37 
38 int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
39 		struct queue_limits *lim)
40 {
41 	struct nvme_effects_log *log = ns->head->effects;
42 	struct nvme_command c = { };
43 	struct nvme_id_ns_zns *id;
44 	int status;
45 
46 	/* Driver requires zone append support */
47 	if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
48 			NVME_CMD_EFFECTS_CSUPP)) {
49 		if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
50 			dev_warn(ns->ctrl->device,
51 				 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
52 				 ns->head->ns_id);
53 	} else {
54 		set_bit(NVME_NS_FORCE_RO, &ns->flags);
55 		dev_warn(ns->ctrl->device,
56 			 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
57 			 ns->head->ns_id);
58 	}
59 
60 	/* Lazily query controller append limit for the first zoned namespace */
61 	if (!ns->ctrl->max_zone_append) {
62 		status = nvme_set_max_append(ns->ctrl);
63 		if (status)
64 			return status;
65 	}
66 
67 	id = kzalloc(sizeof(*id), GFP_KERNEL);
68 	if (!id)
69 		return -ENOMEM;
70 
71 	c.identify.opcode = nvme_admin_identify;
72 	c.identify.nsid = cpu_to_le32(ns->head->ns_id);
73 	c.identify.cns = NVME_ID_CNS_CS_NS;
74 	c.identify.csi = NVME_CSI_ZNS;
75 
76 	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
77 	if (status)
78 		goto free_data;
79 
80 	/*
81 	 * We currently do not handle devices requiring any of the zoned
82 	 * operation characteristics.
83 	 */
84 	if (id->zoc) {
85 		dev_warn(ns->ctrl->device,
86 			"zone operations:%x not supported for namespace:%u\n",
87 			le16_to_cpu(id->zoc), ns->head->ns_id);
88 		status = -ENODEV;
89 		goto free_data;
90 	}
91 
92 	ns->head->zsze =
93 		nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze));
94 	if (!is_power_of_2(ns->head->zsze)) {
95 		dev_warn(ns->ctrl->device,
96 			"invalid zone size:%llu for namespace:%u\n",
97 			ns->head->zsze, ns->head->ns_id);
98 		status = -ENODEV;
99 		goto free_data;
100 	}
101 
102 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
103 	lim->zoned = 1;
104 	lim->max_open_zones = le32_to_cpu(id->mor) + 1;
105 	lim->max_active_zones = le32_to_cpu(id->mar) + 1;
106 	lim->chunk_sectors = ns->head->zsze;
107 	lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
108 free_data:
109 	kfree(id);
110 	return status;
111 }
112 
113 static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
114 					  unsigned int nr_zones, size_t *buflen)
115 {
116 	struct request_queue *q = ns->disk->queue;
117 	size_t bufsize;
118 	void *buf;
119 
120 	const size_t min_bufsize = sizeof(struct nvme_zone_report) +
121 				   sizeof(struct nvme_zone_descriptor);
122 
123 	nr_zones = min_t(unsigned int, nr_zones,
124 			 get_capacity(ns->disk) >> ilog2(ns->head->zsze));
125 
126 	bufsize = sizeof(struct nvme_zone_report) +
127 		nr_zones * sizeof(struct nvme_zone_descriptor);
128 	bufsize = min_t(size_t, bufsize,
129 			queue_max_hw_sectors(q) << SECTOR_SHIFT);
130 	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
131 
132 	while (bufsize >= min_bufsize) {
133 		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
134 		if (buf) {
135 			*buflen = bufsize;
136 			return buf;
137 		}
138 		bufsize >>= 1;
139 	}
140 	return NULL;
141 }
142 
143 static int nvme_zone_parse_entry(struct nvme_ctrl *ctrl,
144 				 struct nvme_ns_head *head,
145 				 struct nvme_zone_descriptor *entry,
146 				 unsigned int idx, report_zones_cb cb,
147 				 void *data)
148 {
149 	struct blk_zone zone = { };
150 
151 	if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
152 		dev_err(ctrl->device, "invalid zone type %#x\n",
153 				entry->zt);
154 		return -EINVAL;
155 	}
156 
157 	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
158 	zone.cond = entry->zs >> 4;
159 	zone.len = head->zsze;
160 	zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap));
161 	zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba));
162 	if (zone.cond == BLK_ZONE_COND_FULL)
163 		zone.wp = zone.start + zone.len;
164 	else
165 		zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp));
166 
167 	return cb(&zone, idx, data);
168 }
169 
170 int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
171 		unsigned int nr_zones, report_zones_cb cb, void *data)
172 {
173 	struct nvme_zone_report *report;
174 	struct nvme_command c = { };
175 	int ret, zone_idx = 0;
176 	unsigned int nz, i;
177 	size_t buflen;
178 
179 	if (ns->head->ids.csi != NVME_CSI_ZNS)
180 		return -EINVAL;
181 
182 	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
183 	if (!report)
184 		return -ENOMEM;
185 
186 	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
187 	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
188 	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
189 	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
190 	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
191 	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
192 
193 	sector &= ~(ns->head->zsze - 1);
194 	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
195 		memset(report, 0, buflen);
196 
197 		c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector));
198 		ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
199 		if (ret) {
200 			if (ret > 0)
201 				ret = -EIO;
202 			goto out_free;
203 		}
204 
205 		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
206 		if (!nz)
207 			break;
208 
209 		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
210 			ret = nvme_zone_parse_entry(ns->ctrl, ns->head,
211 						    &report->entries[i],
212 						    zone_idx, cb, data);
213 			if (ret)
214 				goto out_free;
215 			zone_idx++;
216 		}
217 
218 		sector += ns->head->zsze * nz;
219 	}
220 
221 	if (zone_idx > 0)
222 		ret = zone_idx;
223 	else
224 		ret = -EINVAL;
225 out_free:
226 	kvfree(report);
227 	return ret;
228 }
229 
230 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
231 		struct nvme_command *c, enum nvme_zone_mgmt_action action)
232 {
233 	memset(c, 0, sizeof(*c));
234 
235 	c->zms.opcode = nvme_cmd_zone_mgmt_send;
236 	c->zms.nsid = cpu_to_le32(ns->head->ns_id);
237 	c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req)));
238 	c->zms.zsa = action;
239 
240 	if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
241 		c->zms.select_all = 1;
242 
243 	return BLK_STS_OK;
244 }
245