1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4 */
5
6 #include <linux/blkdev.h>
7 #include <linux/vmalloc.h>
8 #include "nvme.h"
9
nvme_set_max_append(struct nvme_ctrl * ctrl)10 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
11 {
12 struct nvme_command c = { };
13 struct nvme_id_ctrl_zns *id;
14 int status;
15
16 id = kzalloc(sizeof(*id), GFP_KERNEL);
17 if (!id)
18 return -ENOMEM;
19
20 c.identify.opcode = nvme_admin_identify;
21 c.identify.cns = NVME_ID_CNS_CS_CTRL;
22 c.identify.csi = NVME_CSI_ZNS;
23
24 status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
25 if (status) {
26 kfree(id);
27 return status;
28 }
29
30 if (id->zasl)
31 ctrl->max_zone_append = 1 << (id->zasl + 3);
32 else
33 ctrl->max_zone_append = ctrl->max_hw_sectors;
34 kfree(id);
35 return 0;
36 }
37
nvme_query_zone_info(struct nvme_ns * ns,unsigned lbaf,struct nvme_zone_info * zi)38 int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
39 struct nvme_zone_info *zi)
40 {
41 struct nvme_effects_log *log = ns->head->effects;
42 struct nvme_command c = { };
43 struct nvme_id_ns_zns *id;
44 int status;
45
46 /* Driver requires zone append support */
47 if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
48 NVME_CMD_EFFECTS_CSUPP)) {
49 if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
50 dev_warn(ns->ctrl->device,
51 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
52 ns->head->ns_id);
53 } else {
54 set_bit(NVME_NS_FORCE_RO, &ns->flags);
55 dev_warn(ns->ctrl->device,
56 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
57 ns->head->ns_id);
58 }
59
60 /* Lazily query controller append limit for the first zoned namespace */
61 if (!ns->ctrl->max_zone_append) {
62 status = nvme_set_max_append(ns->ctrl);
63 if (status)
64 return status;
65 }
66
67 id = kzalloc(sizeof(*id), GFP_KERNEL);
68 if (!id)
69 return -ENOMEM;
70
71 c.identify.opcode = nvme_admin_identify;
72 c.identify.nsid = cpu_to_le32(ns->head->ns_id);
73 c.identify.cns = NVME_ID_CNS_CS_NS;
74 c.identify.csi = NVME_CSI_ZNS;
75
76 status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
77 if (status)
78 goto free_data;
79
80 /*
81 * We currently do not handle devices requiring any of the zoned
82 * operation characteristics.
83 */
84 if (id->zoc) {
85 dev_warn(ns->ctrl->device,
86 "zone operations:%x not supported for namespace:%u\n",
87 le16_to_cpu(id->zoc), ns->head->ns_id);
88 status = -ENODEV;
89 goto free_data;
90 }
91
92 zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze);
93 if (!is_power_of_2(zi->zone_size)) {
94 dev_warn(ns->ctrl->device,
95 "invalid zone size: %llu for namespace: %u\n",
96 zi->zone_size, ns->head->ns_id);
97 status = -ENODEV;
98 goto free_data;
99 }
100 zi->max_open_zones = le32_to_cpu(id->mor) + 1;
101 zi->max_active_zones = le32_to_cpu(id->mar) + 1;
102
103 free_data:
104 kfree(id);
105 return status;
106 }
107
nvme_update_zone_info(struct nvme_ns * ns,struct queue_limits * lim,struct nvme_zone_info * zi)108 void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
109 struct nvme_zone_info *zi)
110 {
111 lim->features |= BLK_FEAT_ZONED;
112 lim->max_open_zones = zi->max_open_zones;
113 lim->max_active_zones = zi->max_active_zones;
114 lim->max_hw_zone_append_sectors = ns->ctrl->max_zone_append;
115 lim->chunk_sectors = ns->head->zsze =
116 nvme_lba_to_sect(ns->head, zi->zone_size);
117 }
118
nvme_zns_alloc_report_buffer(struct nvme_ns * ns,unsigned int nr_zones,size_t * buflen)119 static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
120 unsigned int nr_zones, size_t *buflen)
121 {
122 struct request_queue *q = ns->disk->queue;
123 size_t bufsize;
124 void *buf;
125
126 const size_t min_bufsize = sizeof(struct nvme_zone_report) +
127 sizeof(struct nvme_zone_descriptor);
128
129 nr_zones = min_t(unsigned int, nr_zones,
130 get_capacity(ns->disk) >> ilog2(ns->head->zsze));
131
132 bufsize = sizeof(struct nvme_zone_report) +
133 nr_zones * sizeof(struct nvme_zone_descriptor);
134 bufsize = min_t(size_t, bufsize,
135 queue_max_hw_sectors(q) << SECTOR_SHIFT);
136 bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
137
138 while (bufsize >= min_bufsize) {
139 buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
140 if (buf) {
141 *buflen = bufsize;
142 return buf;
143 }
144 bufsize >>= 1;
145 }
146 return NULL;
147 }
148
nvme_zone_parse_entry(struct nvme_ns * ns,struct nvme_zone_descriptor * entry,unsigned int idx,report_zones_cb cb,void * data)149 static int nvme_zone_parse_entry(struct nvme_ns *ns,
150 struct nvme_zone_descriptor *entry,
151 unsigned int idx, report_zones_cb cb,
152 void *data)
153 {
154 struct nvme_ns_head *head = ns->head;
155 struct blk_zone zone = { };
156
157 if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
158 dev_err(ns->ctrl->device, "invalid zone type %#x\n", entry->zt);
159 return -EINVAL;
160 }
161
162 zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
163 zone.cond = entry->zs >> 4;
164 zone.len = head->zsze;
165 zone.capacity = nvme_lba_to_sect(head, le64_to_cpu(entry->zcap));
166 zone.start = nvme_lba_to_sect(head, le64_to_cpu(entry->zslba));
167 if (zone.cond == BLK_ZONE_COND_FULL)
168 zone.wp = zone.start + zone.len;
169 else
170 zone.wp = nvme_lba_to_sect(head, le64_to_cpu(entry->wp));
171
172 return cb(&zone, idx, data);
173 }
174
nvme_ns_report_zones(struct nvme_ns * ns,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)175 int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
176 unsigned int nr_zones, report_zones_cb cb, void *data)
177 {
178 struct nvme_zone_report *report;
179 struct nvme_command c = { };
180 int ret, zone_idx = 0;
181 unsigned int nz, i;
182 size_t buflen;
183
184 if (ns->head->ids.csi != NVME_CSI_ZNS)
185 return -EINVAL;
186
187 report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
188 if (!report)
189 return -ENOMEM;
190
191 c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
192 c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
193 c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
194 c.zmr.zra = NVME_ZRA_ZONE_REPORT;
195 c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
196 c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
197
198 sector &= ~(ns->head->zsze - 1);
199 while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
200 memset(report, 0, buflen);
201
202 c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, sector));
203 ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
204 if (ret) {
205 if (ret > 0)
206 ret = -EIO;
207 goto out_free;
208 }
209
210 nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
211 if (!nz)
212 break;
213
214 for (i = 0; i < nz && zone_idx < nr_zones; i++) {
215 ret = nvme_zone_parse_entry(ns, &report->entries[i],
216 zone_idx, cb, data);
217 if (ret)
218 goto out_free;
219 zone_idx++;
220 }
221
222 sector += ns->head->zsze * nz;
223 }
224
225 if (zone_idx > 0)
226 ret = zone_idx;
227 else
228 ret = -EINVAL;
229 out_free:
230 kvfree(report);
231 return ret;
232 }
233
nvme_setup_zone_mgmt_send(struct nvme_ns * ns,struct request * req,struct nvme_command * c,enum nvme_zone_mgmt_action action)234 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
235 struct nvme_command *c, enum nvme_zone_mgmt_action action)
236 {
237 memset(c, 0, sizeof(*c));
238
239 c->zms.opcode = nvme_cmd_zone_mgmt_send;
240 c->zms.nsid = cpu_to_le32(ns->head->ns_id);
241 c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns->head, blk_rq_pos(req)));
242 c->zms.zsa = action;
243
244 if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
245 c->zms.select_all = 1;
246
247 return BLK_STS_OK;
248 }
249