1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2024 Oxide Computer Company
14 */
15
16 /*
17 * This file deals with all the knowledge related to supported, standard NVMe
18 * features as well as validation of other requests related to features. While
19 * there are vendor-specific features, we currently don't support issuing them
20 * to the kernel.
21 *
22 * Like other parts of the common NVMe logic, we have two different sets of data
23 * tables to help us drive validation:
24 *
25 * 1) We have a list of fields that are supported in the kernel ioctl interface
26 * and libnvme for features. There are some fields like allowing a specification
27 * via UUID which are not currently supported. The field tables are split up
28 * among get and set features because they are somewhat different in terms of
29 * what they allow (i.e. set features may use cdw12, cdw13, cdw15, etc.) and
30 * because the kernel doesn't support issuing set features from userland today.
31 *
32 * 2) We have a table of NVMe specified required and optional features. This
33 * table has dynamic properties related to whether things are supported and the
34 * set of fields that are usable because some aspects of this change with the
35 * specification version (e.g. the temperature threshold feature had no input
36 * argument in cdw11 in NVMe 1.0).
37 */
38
39 #include "nvme_common.h"
40
41 #include <sys/sysmacros.h>
42 #ifdef _KERNEL
43 #include <sys/sunddi.h>
44 #include <sys/stdint.h>
45 #else
46 #include <stdio.h>
47 #include <inttypes.h>
48 #endif
49
50 static bool
nvme_get_feat_supported_sel(const nvme_field_info_t * field,const nvme_valid_ctrl_data_t * data,char * msg,size_t msglen)51 nvme_get_feat_supported_sel(const nvme_field_info_t *field,
52 const nvme_valid_ctrl_data_t *data, char *msg, size_t msglen)
53 {
54 if (data->vcd_id->id_oncs.on_save != 0) {
55 return (true);
56 }
57
58 (void) snprintf(msg, msglen, "controller does not support field %s "
59 "(%s): missing extended data support in Log Page Attributes (LPA)",
60 field->nlfi_human, field->nlfi_spec);
61 return (false);
62 }
63
64 /*
65 * An astute observer will note that there is no instance for the DPTR here.
66 * While a buffer is required for this command, the common code does not
67 * validate buffers. In other pieces we use a length as a proxy for checking the
68 * buffer; however, there is no length argument here. The buffer is expected by
69 * the controller to be of sufficient size. This is validated by the kernel in
70 * nvme_validate_get_feature().
71 */
72 const nvme_field_info_t nvme_get_feat_fields[] = {
73 [NVME_GET_FEAT_REQ_FIELD_FID] = {
74 .nlfi_vers = &nvme_vers_1v0,
75 .nlfi_max_size = NVME_FEAT_MAX_FID,
76 .nlfi_spec = "fid",
77 .nlfi_human = "feature identifier",
78 .nlfi_def_req = true,
79 .nlfi_def_allow = true
80 },
81 [NVME_GET_FEAT_REQ_FIELD_SEL] = {
82 .nlfi_vers = &nvme_vers_1v1,
83 .nlfi_sup = nvme_get_feat_supported_sel,
84 .nlfi_max_size = NVME_FEAT_MAX_SEL,
85 .nlfi_spec = "sel",
86 .nlfi_human = "select",
87 /*
88 * Because this field was introduced in NVMe 1.1 and because
89 * most of the time we want to assume folks are looking for the
90 * current value, we end up opting to make this a non-required
91 * field and default to getting the current value.
92 */
93 .nlfi_def_req = false,
94 .nlfi_def_allow = true
95 },
96 [NVME_GET_FEAT_REQ_FIELD_CDW11] = {
97 .nlfi_vers = &nvme_vers_1v0,
98 .nlfi_max_size = UINT32_MAX,
99 .nlfi_spec = "cdw11",
100 .nlfi_human = "control dword 11",
101 /*
102 * While this isn't required by default, we will end up setting
103 * it as required based on the specifics of the feature and its
104 * version.
105 */
106 .nlfi_def_req = false,
107 .nlfi_def_allow = true
108 },
109 [NVME_GET_FEAT_REQ_FIELD_NSID] = {
110 .nlfi_vers = &nvme_vers_1v0,
111 .nlfi_valid = nvme_field_valid_nsid,
112 .nlfi_spec = "nsid",
113 .nlfi_human = "namespace ID",
114 .nlfi_def_req = false,
115 .nlfi_def_allow = true
116 }
117 };
118
119 size_t nvme_get_feat_nfields = ARRAY_SIZE(nvme_get_feat_fields);
120
121 static bool
nvme_feat_write_cache_sup(const nvme_valid_ctrl_data_t * data,const nvme_feat_info_t * feat)122 nvme_feat_write_cache_sup(const nvme_valid_ctrl_data_t *data,
123 const nvme_feat_info_t *feat)
124 {
125 return (data->vcd_id->id_vwc.vwc_present != 0);
126 }
127
128 static bool
nvme_feat_apst_sup(const nvme_valid_ctrl_data_t * data,const nvme_feat_info_t * feat)129 nvme_feat_apst_sup(const nvme_valid_ctrl_data_t *data,
130 const nvme_feat_info_t *feat)
131 {
132 return (data->vcd_id->id_apsta.ap_sup != 0);
133 }
134
135 /*
136 * Note, many of these short names come from the history of nvmeadm(8). If you
137 * wish to change them, then you must figure out a way to make sure we can still
138 * honor the original names. Most fields here try to use a value of 0 as
139 * reasonable default so if something's not specified we'll get a reasonable
140 * value. For example, NVME_FEAT_MANDATORY, NVME_FEAT_CSI_NONE, etc. all have a
141 * value of zero so when that field isn't present we get something reasonable.
142 * This leads us to generally define fields that are exceptions to the norm
143 * (e.g. when a feature is specific to the NVM feature set).
144 */
145 const nvme_feat_info_t nvme_std_feats[] = { {
146 .nfeat_short = "arb",
147 .nfeat_spec = "Arbitration",
148 .nfeat_fid = NVME_FEAT_ARBITRATION,
149 .nfeat_vers = &nvme_vers_1v0,
150 .nfeat_kind = NVME_FEAT_MANDATORY,
151 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
152 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
153 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
154 }, {
155 .nfeat_short = "pm",
156 .nfeat_spec = "Power Management",
157 .nfeat_fid = NVME_FEAT_POWER_MGMT,
158 .nfeat_vers = &nvme_vers_1v0,
159 .nfeat_kind = NVME_FEAT_MANDATORY,
160 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
161 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
162 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
163 }, {
164 .nfeat_short = "range",
165 .nfeat_spec = "LBA Range Type",
166 .nfeat_fid = NVME_FEAT_LBA_RANGE,
167 .nfeat_vers = &nvme_vers_1v0,
168 .nfeat_kind = NVME_FEAT_OPTIONAL,
169 .nfeat_scope = NVME_FEAT_SCOPE_NS,
170 .nfeat_csi = NVME_FEAT_CSI_NVM,
171 .nfeat_in_get = NVME_GET_FEAT_F_NSID | NVME_GET_FEAT_F_DATA,
172 .nfeat_in_set = NVME_SET_FEAT_F_NSID | NVME_SET_FEAT_F_CDW11 |
173 NVME_SET_FEAT_F_DATA,
174 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 | NVME_FEAT_OUTPUT_DATA,
175 .nfeat_len = NVME_LBA_RANGE_BUFSIZE
176 }, {
177 .nfeat_short = "temp",
178 .nfeat_spec = "Temperature Threshold",
179 .nfeat_fid = NVME_FEAT_TEMPERATURE,
180 .nfeat_vers = &nvme_vers_1v0,
181 .nfeat_kind = NVME_FEAT_MANDATORY,
182 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
183 /*
184 * In NVMe 1.0 and NVMe 1.1, there was only a single temperature sensor
185 * that the spec defined and was present in the threshold feature.
186 * However, starting in NVMe 1.2, this was changed so that a sensor was
187 * required to be specified in NVMe 1.2 to identify the sensor. As such
188 * we always end up saying that this is required.
189 */
190 .nfeat_in_get = NVME_GET_FEAT_F_CDW11,
191 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
192 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
193 }, {
194 .nfeat_short = "errrec",
195 .nfeat_spec = "Error Recovery",
196 .nfeat_fid = NVME_FEAT_ERROR,
197 .nfeat_vers = &nvme_vers_1v0,
198 .nfeat_csi = NVME_FEAT_CSI_NVM,
199 .nfeat_kind = NVME_FEAT_MANDATORY,
200 /*
201 * The scope of this feature has a bit of a complicated history.
202 * Originally we always got this on the controller and that works for
203 * most NVMe 1.0-1.2 devices. The introduction of both namespace
204 * management and of the DULBE option which is namespace specific, made
205 * this more nuanced. The NVMe 1.4 specification makes it clear that
206 * this is namespace specific; however, if we ask for this feature on
207 * many NVMe 1.3 devices with namespace support and some NVMe 1.2, it'll
208 * generate an error about missing namespace information. Unfortunately
209 * namespace management is not a good proxy for this as for example the
210 * Samsung 980 Pro is an NVMe 1.3 device without namespace management
211 * and it will error with invalid namespace if we specify zeros.
212 *
213 * However, most devices that we've surveyed will always answer a GET
214 * FEATURES request with a namespace specified. Therefore, given the
215 * changes that have happened, for now we're going to phrase it scoped
216 * to a namespace and requiring a namespace ID.
217 */
218 .nfeat_scope = NVME_FEAT_SCOPE_NS,
219 .nfeat_in_get = NVME_GET_FEAT_F_NSID,
220 .nfeat_in_set = NVME_SET_FEAT_F_NSID | NVME_SET_FEAT_F_CDW11,
221 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
222 }, {
223 .nfeat_short = "cache",
224 .nfeat_spec = "Volatile Write Cache",
225 .nfeat_fid = NVME_FEAT_WRITE_CACHE,
226 .nfeat_sup_func = nvme_feat_write_cache_sup,
227 .nfeat_vers = &nvme_vers_1v0,
228 .nfeat_kind = NVME_FEAT_OPTIONAL,
229 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
230 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
231 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
232 }, {
233 .nfeat_short = "queues",
234 .nfeat_spec = "Number of Queues",
235 .nfeat_fid = NVME_FEAT_NQUEUES,
236 .nfeat_vers = &nvme_vers_1v0,
237 .nfeat_kind = NVME_FEAT_MANDATORY,
238 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
239 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
240 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
241 }, {
242 /*
243 * The interrupt coalescing and the interrupt vector configuration
244 * features are required for all PCIe controllers; however, they are not
245 * supported for other types of controllers. As we only support NVMe
246 * PCIe controllers with this library right now we don't do anything
247 * special to denote that. If we do, we will probably want to create an
248 * optional function for determining the kind of feature and leverage
249 * the existing nfeat_sup_func.
250 */
251 .nfeat_short = "coalescing",
252 .nfeat_spec = "Interrupt Coalescing",
253 .nfeat_fid = NVME_FEAT_INTR_COAL,
254 .nfeat_vers = &nvme_vers_1v0,
255 .nfeat_kind = NVME_FEAT_MANDATORY,
256 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
257 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
258 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
259 }, {
260 .nfeat_short = "vector",
261 .nfeat_spec = "Interrupt Vector Configuration",
262 .nfeat_fid = NVME_FEAT_INTR_VECT,
263 .nfeat_vers = &nvme_vers_1v0,
264 .nfeat_kind = NVME_FEAT_MANDATORY,
265 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
266 .nfeat_in_get = NVME_GET_FEAT_F_CDW11,
267 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
268 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
269 }, {
270 .nfeat_short = "atomicity",
271 .nfeat_spec = "Write Atomicity",
272 .nfeat_fid = NVME_FEAT_WRITE_ATOM,
273 .nfeat_vers = &nvme_vers_1v0,
274 .nfeat_kind = NVME_FEAT_MANDATORY,
275 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
276 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
277 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
278 }, {
279 .nfeat_short = "event",
280 .nfeat_spec = "Asynchronous Event Configuration",
281 .nfeat_fid = NVME_FEAT_ASYNC_EVENT,
282 .nfeat_vers = &nvme_vers_1v0,
283 .nfeat_kind = NVME_FEAT_MANDATORY,
284 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
285 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
286 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
287 }, {
288 .nfeat_short = "apst",
289 .nfeat_spec = "Autonomous Power State Transition",
290 .nfeat_fid = NVME_FEAT_AUTO_PST,
291 .nfeat_vers = &nvme_vers_1v1,
292 .nfeat_sup_func = nvme_feat_apst_sup,
293 .nfeat_kind = NVME_FEAT_OPTIONAL,
294 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
295 .nfeat_in_get = NVME_GET_FEAT_F_DATA,
296 .nfeat_in_set = NVME_SET_FEAT_F_CDW11 | NVME_SET_FEAT_F_DATA,
297 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 | NVME_FEAT_OUTPUT_DATA,
298 .nfeat_len = NVME_AUTO_PST_BUFSIZE
299 }, {
300 .nfeat_short = "progress",
301 .nfeat_spec = "Software Progress Marker",
302 .nfeat_fid = NVME_FEAT_PROGRESS,
303 .nfeat_vers = &nvme_vers_1v0,
304 .nfeat_kind = NVME_FEAT_OPTIONAL,
305 .nfeat_scope = NVME_FEAT_SCOPE_CTRL,
306 .nfeat_in_set = NVME_SET_FEAT_F_CDW11,
307 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
308 } };
309
310 size_t nvme_std_nfeats = ARRAY_SIZE(nvme_std_feats);
311
312 /*
313 * Now it's time to answer the only hard question here: is this feature actually
314 * supported by the controller. Prior to NVMe 2.x and the Feature Identifiers
315 * Supported and Effects page, we have to use a heuristic for this. Our
316 * heuristics rules are as follows:
317 *
318 * 1) If this is a vendor-specific feature that we have identified is present on
319 * this controller based on a datasheet, we assume it's present.
320 *
321 * 2) If the feature was introduced in an NVMe spec version newer than our
322 * controller, then it's clearly unsupported.
323 *
324 * 3) If it is a mandatory feature, we have the right controller type, and we
325 * are past the minimum version, then this is supported.
326 *
327 * 4) If the feature is optional and has an explicit feature bit that indicates
328 * whether it's present or not, then we can use that to determine if it's
329 * implemented or not.
330 *
331 * Otherwise we must conclude that we don't know.
332 */
333 nvme_feat_impl_t
nvme_feat_supported(const nvme_feat_info_t * info,const nvme_valid_ctrl_data_t * data)334 nvme_feat_supported(const nvme_feat_info_t *info,
335 const nvme_valid_ctrl_data_t *data)
336 {
337 if (info->nfeat_kind == NVME_FEAT_VENDOR_SPECIFIC) {
338 return (NVME_FEAT_IMPL_SUPPORTED);
339 }
340
341 if (info->nfeat_vers != NULL &&
342 !nvme_vers_atleast(data->vcd_vers, info->nfeat_vers)) {
343 return (NVME_FEAT_IMPL_UNSUPPORTED);
344 }
345
346 if (info->nfeat_kind == NVME_FEAT_MANDATORY) {
347 ASSERT3P(info->nfeat_sup_func, ==, NULL);
348 return (NVME_FEAT_IMPL_SUPPORTED);
349 }
350
351 if (info->nfeat_sup_func != NULL) {
352 if (info->nfeat_sup_func(data, info)) {
353 return (NVME_FEAT_IMPL_SUPPORTED);
354 }
355
356 return (NVME_FEAT_IMPL_UNSUPPORTED);
357 }
358
359 return (NVME_FEAT_IMPL_UNKNOWN);
360 }
361