xref: /illumos-gate/usr/src/common/nvme/nvme_feature.c (revision 2833423dc59f4c35fe4713dbb942950c82df0437)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 /*
17  * This file deals with all the knowledge related to supported, standard NVMe
18  * features as well as validation of other requests related to features. While
19  * there are vendor-specific features, we currently don't support issuing them
20  * to the kernel.
21  *
22  * Like other parts of the common NVMe logic, we have two different sets of data
23  * tables to help us drive validation:
24  *
25  * 1) We have a list of fields that are supported in the kernel ioctl interface
26  * and libnvme for features. There are some fields like allowing a specification
27  * via UUID which are not currently supported. The field tables are split up
28  * among get and set features because they are somewhat different in terms of
29  * what they allow (i.e. set features may use cdw12, cdw13, cdw15, etc.) and
30  * because the kernel doesn't support issuing set features from userland today.
31  *
32  * 2) We have a table of NVMe specified required and optional features. This
33  * table has dynamic properties related to whether things are supported and the
34  * set of fields that are usable because some aspects of this change with the
35  * specification version (e.g. the temperature threshold feature had no input
36  * argument in cdw11 in NVMe 1.0).
37  */
38 
39 #include "nvme_common.h"
40 
41 #include <sys/sysmacros.h>
42 #ifdef	_KERNEL
43 #include <sys/sunddi.h>
44 #include <sys/stdint.h>
45 #else
46 #include <stdio.h>
47 #include <inttypes.h>
48 #endif
49 
50 static bool
51 nvme_get_feat_supported_sel(const nvme_field_info_t *field,
52     const nvme_valid_ctrl_data_t *data, char *msg, size_t msglen)
53 {
54 	if (data->vcd_id->id_oncs.on_save != 0) {
55 		return (true);
56 	}
57 
58 	(void) snprintf(msg, msglen, "controller does not support field %s "
59 	    "(%s): missing extended data support in Log Page Attributes (LPA)",
60 	    field->nlfi_human, field->nlfi_spec);
61 	return (false);
62 }
63 
64 const nvme_field_info_t nvme_get_feat_fields[] = {
65 	[NVME_GET_FEAT_REQ_FIELD_FID] = {
66 		.nlfi_vers = &nvme_vers_1v0,
67 		.nlfi_max_size = NVME_FEAT_MAX_FID,
68 		.nlfi_spec = "fid",
69 		.nlfi_human = "feature identifier",
70 		.nlfi_def_req = true,
71 		.nlfi_def_allow = true
72 	},
73 	[NVME_GET_FEAT_REQ_FIELD_SEL] = {
74 		.nlfi_vers = &nvme_vers_1v1,
75 		.nlfi_sup = nvme_get_feat_supported_sel,
76 		.nlfi_max_size = NVME_FEAT_MAX_SEL,
77 		.nlfi_spec = "sel",
78 		.nlfi_human = "select",
79 		/*
80 		 * Because this field was introduced in NVMe 1.1 and because
81 		 * most of the time we want to assume folks are looking for the
82 		 * current value, we end up opting to make this a non-required
83 		 * field and default to getting the current value.
84 		 */
85 		.nlfi_def_req = false,
86 		.nlfi_def_allow = true
87 	},
88 	[NVME_GET_FEAT_REQ_FIELD_CDW11] = {
89 		.nlfi_vers = &nvme_vers_1v0,
90 		.nlfi_max_size = UINT32_MAX,
91 		.nlfi_spec = "cdw11",
92 		.nlfi_human = "control dword 11",
93 		/*
94 		 * While this isn't required by default, we will end up setting
95 		 * it as required based on the specifics of the feature and its
96 		 * version.
97 		 */
98 		.nlfi_def_req = false,
99 		.nlfi_def_allow = true
100 	},
101 	[NVME_GET_FEAT_REQ_FIELD_NSID] = {
102 		.nlfi_vers = &nvme_vers_1v0,
103 		.nlfi_valid = nvme_field_valid_nsid,
104 		.nlfi_spec = "nsid",
105 		.nlfi_human = "namespace ID",
106 		.nlfi_def_req = false,
107 		.nlfi_def_allow = true
108 	}
109 };
110 
111 size_t nvme_get_feat_nfields = ARRAY_SIZE(nvme_get_feat_fields);
112 
113 static bool
114 nvme_feat_write_cache_sup(const nvme_valid_ctrl_data_t *data,
115     const nvme_feat_info_t *feat)
116 {
117 	return (data->vcd_id->id_vwc.vwc_present != 0);
118 }
119 
120 static bool
121 nvme_feat_apst_sup(const nvme_valid_ctrl_data_t *data,
122     const nvme_feat_info_t *feat)
123 {
124 	return (data->vcd_id->id_apsta.ap_sup != 0);
125 }
126 
127 /*
128  * Note, many of these short names come from the history of nvmeadm(8). If you
129  * wish to change them, then you must figure out a way to make sure we can still
130  * honor the original names. Most fields here try to use a value of 0 as
131  * reasonable default so if something's not specified we'll get a reasonable
132  * value. For example, NVME_FEAT_MANDATORY, NVME_FEAT_CSI_NONE, etc. all have a
133  * value of zero so when that field isn't present we get something reasonable.
134  * This leads us to generally define fields that are exceptions to the norm
135  * (e.g. when a feature is specific to the NVM feature set).
136  */
137 const nvme_feat_info_t nvme_std_feats[] = { {
138 	.nfeat_short = "arb",
139 	.nfeat_spec = "Arbitration",
140 	.nfeat_fid = NVME_FEAT_ARBITRATION,
141 	.nfeat_vers = &nvme_vers_1v0,
142 	.nfeat_kind = NVME_FEAT_MANDATORY,
143 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
144 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
145 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
146 }, {
147 	.nfeat_short = "pm",
148 	.nfeat_spec = "Power Management",
149 	.nfeat_fid = NVME_FEAT_POWER_MGMT,
150 	.nfeat_vers = &nvme_vers_1v0,
151 	.nfeat_kind = NVME_FEAT_MANDATORY,
152 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
153 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
154 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
155 }, {
156 	.nfeat_short = "range",
157 	.nfeat_spec = "LBA Range Type",
158 	.nfeat_fid = NVME_FEAT_LBA_RANGE,
159 	.nfeat_vers = &nvme_vers_1v0,
160 	.nfeat_kind = NVME_FEAT_OPTIONAL,
161 	.nfeat_scope = NVME_FEAT_SCOPE_NS,
162 	.nfeat_csi = NVME_FEAT_CSI_NVM,
163 	.nfeat_in_get = NVME_GET_FEAT_F_NSID | NVME_GET_FEAT_F_DATA,
164 	.nfeat_in_set = NVME_SET_FEAT_F_NSID | NVME_SET_FEAT_F_CDW11 |
165 	    NVME_SET_FEAT_F_DATA,
166 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 | NVME_FEAT_OUTPUT_DATA,
167 	.nfeat_len = NVME_LBA_RANGE_BUFSIZE
168 }, {
169 	.nfeat_short = "temp",
170 	.nfeat_spec = "Temperature Threshold",
171 	.nfeat_fid = NVME_FEAT_TEMPERATURE,
172 	.nfeat_vers = &nvme_vers_1v0,
173 	.nfeat_kind = NVME_FEAT_MANDATORY,
174 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
175 	/*
176 	 * In NVMe 1.0 and NVMe 1.1, there was only a single temperature sensor
177 	 * that the spec defined and was present in the threshold feature.
178 	 * However, starting in NVMe 1.2, this was changed so that a sensor was
179 	 * required to be specified in NVMe 1.2 to identify the sensor. As such
180 	 * we always end up saying that this is required.
181 	 */
182 	.nfeat_in_get = NVME_GET_FEAT_F_CDW11,
183 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
184 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
185 }, {
186 	.nfeat_short = "errrec",
187 	.nfeat_spec = "Error Recovery",
188 	.nfeat_fid = NVME_FEAT_ERROR,
189 	.nfeat_vers = &nvme_vers_1v0,
190 	.nfeat_csi = NVME_FEAT_CSI_NVM,
191 	.nfeat_kind = NVME_FEAT_MANDATORY,
192 	/*
193 	 * The scope of this feature has a bit of a complicated history.
194 	 * Originally we always got this on the controller and that works for
195 	 * most NVMe 1.0-1.2 devices. The introduction of both namespace
196 	 * management and of the DULBE option which is namespace specific, made
197 	 * this more nuanced. The NVMe 1.4 specification makes it clear that
198 	 * this is namespace specific; however, if we ask for this feature on
199 	 * many NVMe 1.3 devices with namespace support and some NVMe 1.2, it'll
200 	 * generate an error about missing namespace information. Unfortunately
201 	 * namespace management is not a good proxy for this as for example the
202 	 * Samsung 980 Pro is an NVMe 1.3 device without namespace management
203 	 * and it will error with invalid namespace if we specify zeros.
204 	 *
205 	 * However, most devices that we've surveyed will always answer a GET
206 	 * FEATURES request with a namespace specified. Therefore, given the
207 	 * changes that have happened, for now we're going to phrase it scoped
208 	 * to a namespace and requiring a namespace ID.
209 	 */
210 	.nfeat_scope = NVME_FEAT_SCOPE_NS,
211 	.nfeat_in_get = NVME_GET_FEAT_F_NSID,
212 	.nfeat_in_set = NVME_SET_FEAT_F_NSID | NVME_SET_FEAT_F_CDW11,
213 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
214 }, {
215 	.nfeat_short = "cache",
216 	.nfeat_spec = "Volatile Write Cache",
217 	.nfeat_fid = NVME_FEAT_WRITE_CACHE,
218 	.nfeat_sup_func = nvme_feat_write_cache_sup,
219 	.nfeat_vers = &nvme_vers_1v0,
220 	.nfeat_kind = NVME_FEAT_OPTIONAL,
221 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
222 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
223 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
224 }, {
225 	.nfeat_short = "queues",
226 	.nfeat_spec = "Number of Queues",
227 	.nfeat_fid = NVME_FEAT_NQUEUES,
228 	.nfeat_vers = &nvme_vers_1v0,
229 	.nfeat_kind = NVME_FEAT_MANDATORY,
230 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
231 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
232 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
233 }, {
234 	/*
235 	 * The interrupt coalescing and the interrupt vector configuration
236 	 * features are required for all PCIe controllers; however, they are not
237 	 * supported for other types of controllers. As we only support NVMe
238 	 * PCIe controllers with this library right now we don't do anything
239 	 * special to denote that. If we do, we will probably want to create an
240 	 * optional function for determining the kind of feature and leverage
241 	 * the existing nfeat_sup_func.
242 	 */
243 	.nfeat_short = "coalescing",
244 	.nfeat_spec = "Interrupt Coalescing",
245 	.nfeat_fid = NVME_FEAT_INTR_COAL,
246 	.nfeat_vers = &nvme_vers_1v0,
247 	.nfeat_kind = NVME_FEAT_MANDATORY,
248 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
249 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
250 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
251 }, {
252 	.nfeat_short = "vector",
253 	.nfeat_spec = "Interrupt Vector Configuration",
254 	.nfeat_fid = NVME_FEAT_INTR_VECT,
255 	.nfeat_vers = &nvme_vers_1v0,
256 	.nfeat_kind = NVME_FEAT_MANDATORY,
257 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
258 	.nfeat_in_get = NVME_GET_FEAT_F_CDW11,
259 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
260 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
261 }, {
262 	.nfeat_short = "atomicity",
263 	.nfeat_spec = "Write Atomicity",
264 	.nfeat_fid = NVME_FEAT_WRITE_ATOM,
265 	.nfeat_vers = &nvme_vers_1v0,
266 	.nfeat_kind = NVME_FEAT_MANDATORY,
267 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
268 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
269 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
270 }, {
271 	.nfeat_short = "event",
272 	.nfeat_spec = "Asynchronous Event Configuration",
273 	.nfeat_fid = NVME_FEAT_ASYNC_EVENT,
274 	.nfeat_vers = &nvme_vers_1v0,
275 	.nfeat_kind = NVME_FEAT_MANDATORY,
276 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
277 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
278 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
279 }, {
280 	.nfeat_short = "apst",
281 	.nfeat_spec = "Autonomous Power State Transition",
282 	.nfeat_fid = NVME_FEAT_AUTO_PST,
283 	.nfeat_vers = &nvme_vers_1v1,
284 	.nfeat_sup_func = nvme_feat_apst_sup,
285 	.nfeat_kind = NVME_FEAT_OPTIONAL,
286 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
287 	.nfeat_in_get = NVME_GET_FEAT_F_DATA,
288 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11 | NVME_SET_FEAT_F_DATA,
289 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 | NVME_FEAT_OUTPUT_DATA,
290 	.nfeat_len = NVME_AUTO_PST_BUFSIZE
291 }, {
292 	.nfeat_short = "progress",
293 	.nfeat_spec = "Software Progress Marker",
294 	.nfeat_fid = NVME_FEAT_PROGRESS,
295 	.nfeat_vers = &nvme_vers_1v0,
296 	.nfeat_kind = NVME_FEAT_OPTIONAL,
297 	.nfeat_scope = NVME_FEAT_SCOPE_CTRL,
298 	.nfeat_in_set = NVME_SET_FEAT_F_CDW11,
299 	.nfeat_out_get = NVME_FEAT_OUTPUT_CDW0
300 } };
301 
302 size_t nvme_std_nfeats = ARRAY_SIZE(nvme_std_feats);
303 
304 /*
305  * Now it's time to answer the only hard question here: is this feature actually
306  * supported by the controller. Prior to NVMe 2.x and the Feature Identifiers
307  * Supported and Effects page, we have to use a heuristic for this. Our
308  * heuristics rules are as follows:
309  *
310  * 1) If this is a vendor-specific feature that we have identified is present on
311  * this controller based on a datasheet, we assume it's present.
312  *
313  * 2) If the feature was introduced in an NVMe spec version newer than our
314  * controller, then it's clearly unsupported.
315  *
316  * 3) If it is a mandatory feature, we have the right controller type, and we
317  * are past the minimum version, then this is supported.
318  *
319  * 4) If the feature is optional and has an explicit feature bit that indicates
320  * whether it's present or not, then we can use that to determine if it's
321  * implemented or not.
322  *
323  * Otherwise we must conclude that we don't know.
324  */
325 nvme_feat_impl_t
326 nvme_feat_supported(const nvme_feat_info_t *info,
327     const nvme_valid_ctrl_data_t *data)
328 {
329 	if (info->nfeat_kind == NVME_FEAT_VENDOR_SPECIFIC) {
330 		return (NVME_FEAT_IMPL_SUPPORTED);
331 	}
332 
333 	if (info->nfeat_vers != NULL &&
334 	    !nvme_vers_atleast(data->vcd_vers, info->nfeat_vers)) {
335 		return (NVME_FEAT_IMPL_UNSUPPORTED);
336 	}
337 
338 	if (info->nfeat_kind == NVME_FEAT_MANDATORY) {
339 		ASSERT3P(info->nfeat_sup_func, ==, NULL);
340 		return (NVME_FEAT_IMPL_SUPPORTED);
341 	}
342 
343 	if (info->nfeat_sup_func != NULL) {
344 		if (info->nfeat_sup_func(data, info)) {
345 			return (NVME_FEAT_IMPL_SUPPORTED);
346 		}
347 
348 		return (NVME_FEAT_IMPL_UNSUPPORTED);
349 	}
350 
351 	return (NVME_FEAT_IMPL_UNKNOWN);
352 }
353