1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2024 Oxide Computer Company 14 */ 15 16 /* 17 * This file deals with all the knowledge related to supported, standard NVMe 18 * features as well as validation of other requests related to features. While 19 * there are vendor-specific features, we currently don't support issuing them 20 * to the kernel. 21 * 22 * Like other parts of the common NVMe logic, we have two different sets of data 23 * tables to help us drive validation: 24 * 25 * 1) We have a list of fields that are supported in the kernel ioctl interface 26 * and libnvme for features. There are some fields like allowing a specification 27 * via UUID which are not currently supported. The field tables are split up 28 * among get and set features because they are somewhat different in terms of 29 * what they allow (i.e. set features may use cdw12, cdw13, cdw15, etc.) and 30 * because the kernel doesn't support issuing set features from userland today. 31 * 32 * 2) We have a table of NVMe specified required and optional features. This 33 * table has dynamic properties related to whether things are supported and the 34 * set of fields that are usable because some aspects of this change with the 35 * specification version (e.g. the temperature threshold feature had no input 36 * argument in cdw11 in NVMe 1.0). 37 */ 38 39 #include "nvme_common.h" 40 41 #include <sys/sysmacros.h> 42 #ifdef _KERNEL 43 #include <sys/sunddi.h> 44 #include <sys/stdint.h> 45 #else 46 #include <stdio.h> 47 #include <inttypes.h> 48 #endif 49 50 static bool 51 nvme_get_feat_supported_sel(const nvme_field_info_t *field, 52 const nvme_valid_ctrl_data_t *data, char *msg, size_t msglen) 53 { 54 if (data->vcd_id->id_oncs.on_save != 0) { 55 return (true); 56 } 57 58 (void) snprintf(msg, msglen, "controller does not support field %s " 59 "(%s): missing extended data support in Log Page Attributes (LPA)", 60 field->nlfi_human, field->nlfi_spec); 61 return (false); 62 } 63 64 /* 65 * An astute observer will note that there is no instance for the DPTR here. 66 * While a buffer is required for this command, the common code does not 67 * validate buffers. In other pieces we use a length as a proxy for checking the 68 * buffer; however, there is no length argument here. The buffer is expected by 69 * the controller to be of sufficient size. This is validated by the kernel in 70 * nvme_validate_get_feature(). 71 */ 72 const nvme_field_info_t nvme_get_feat_fields[] = { 73 [NVME_GET_FEAT_REQ_FIELD_FID] = { 74 .nlfi_vers = &nvme_vers_1v0, 75 .nlfi_max_size = NVME_FEAT_MAX_FID, 76 .nlfi_spec = "fid", 77 .nlfi_human = "feature identifier", 78 .nlfi_def_req = true, 79 .nlfi_def_allow = true 80 }, 81 [NVME_GET_FEAT_REQ_FIELD_SEL] = { 82 .nlfi_vers = &nvme_vers_1v1, 83 .nlfi_sup = nvme_get_feat_supported_sel, 84 .nlfi_max_size = NVME_FEAT_MAX_SEL, 85 .nlfi_spec = "sel", 86 .nlfi_human = "select", 87 /* 88 * Because this field was introduced in NVMe 1.1 and because 89 * most of the time we want to assume folks are looking for the 90 * current value, we end up opting to make this a non-required 91 * field and default to getting the current value. 92 */ 93 .nlfi_def_req = false, 94 .nlfi_def_allow = true 95 }, 96 [NVME_GET_FEAT_REQ_FIELD_CDW11] = { 97 .nlfi_vers = &nvme_vers_1v0, 98 .nlfi_max_size = UINT32_MAX, 99 .nlfi_spec = "cdw11", 100 .nlfi_human = "control dword 11", 101 /* 102 * While this isn't required by default, we will end up setting 103 * it as required based on the specifics of the feature and its 104 * version. 105 */ 106 .nlfi_def_req = false, 107 .nlfi_def_allow = true 108 }, 109 [NVME_GET_FEAT_REQ_FIELD_NSID] = { 110 .nlfi_vers = &nvme_vers_1v0, 111 .nlfi_valid = nvme_field_valid_nsid, 112 .nlfi_spec = "nsid", 113 .nlfi_human = "namespace ID", 114 .nlfi_def_req = false, 115 .nlfi_def_allow = true 116 } 117 }; 118 119 size_t nvme_get_feat_nfields = ARRAY_SIZE(nvme_get_feat_fields); 120 121 static bool 122 nvme_feat_write_cache_sup(const nvme_valid_ctrl_data_t *data, 123 const nvme_feat_info_t *feat) 124 { 125 return (data->vcd_id->id_vwc.vwc_present != 0); 126 } 127 128 static bool 129 nvme_feat_apst_sup(const nvme_valid_ctrl_data_t *data, 130 const nvme_feat_info_t *feat) 131 { 132 return (data->vcd_id->id_apsta.ap_sup != 0); 133 } 134 135 /* 136 * Note, many of these short names come from the history of nvmeadm(8). If you 137 * wish to change them, then you must figure out a way to make sure we can still 138 * honor the original names. Most fields here try to use a value of 0 as 139 * reasonable default so if something's not specified we'll get a reasonable 140 * value. For example, NVME_FEAT_MANDATORY, NVME_FEAT_CSI_NONE, etc. all have a 141 * value of zero so when that field isn't present we get something reasonable. 142 * This leads us to generally define fields that are exceptions to the norm 143 * (e.g. when a feature is specific to the NVM feature set). 144 */ 145 const nvme_feat_info_t nvme_std_feats[] = { { 146 .nfeat_short = "arb", 147 .nfeat_spec = "Arbitration", 148 .nfeat_fid = NVME_FEAT_ARBITRATION, 149 .nfeat_vers = &nvme_vers_1v0, 150 .nfeat_kind = NVME_FEAT_MANDATORY, 151 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 152 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 153 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 154 }, { 155 .nfeat_short = "pm", 156 .nfeat_spec = "Power Management", 157 .nfeat_fid = NVME_FEAT_POWER_MGMT, 158 .nfeat_vers = &nvme_vers_1v0, 159 .nfeat_kind = NVME_FEAT_MANDATORY, 160 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 161 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 162 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 163 }, { 164 .nfeat_short = "range", 165 .nfeat_spec = "LBA Range Type", 166 .nfeat_fid = NVME_FEAT_LBA_RANGE, 167 .nfeat_vers = &nvme_vers_1v0, 168 .nfeat_kind = NVME_FEAT_OPTIONAL, 169 .nfeat_scope = NVME_FEAT_SCOPE_NS, 170 .nfeat_csi = NVME_FEAT_CSI_NVM, 171 .nfeat_in_get = NVME_GET_FEAT_F_NSID | NVME_GET_FEAT_F_DATA, 172 .nfeat_in_set = NVME_SET_FEAT_F_NSID | NVME_SET_FEAT_F_CDW11 | 173 NVME_SET_FEAT_F_DATA, 174 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 | NVME_FEAT_OUTPUT_DATA, 175 .nfeat_len = NVME_LBA_RANGE_BUFSIZE 176 }, { 177 .nfeat_short = "temp", 178 .nfeat_spec = "Temperature Threshold", 179 .nfeat_fid = NVME_FEAT_TEMPERATURE, 180 .nfeat_vers = &nvme_vers_1v0, 181 .nfeat_kind = NVME_FEAT_MANDATORY, 182 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 183 /* 184 * In NVMe 1.0 and NVMe 1.1, there was only a single temperature sensor 185 * that the spec defined and was present in the threshold feature. 186 * However, starting in NVMe 1.2, this was changed so that a sensor was 187 * required to be specified in NVMe 1.2 to identify the sensor. As such 188 * we always end up saying that this is required. 189 */ 190 .nfeat_in_get = NVME_GET_FEAT_F_CDW11, 191 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 192 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 193 }, { 194 .nfeat_short = "errrec", 195 .nfeat_spec = "Error Recovery", 196 .nfeat_fid = NVME_FEAT_ERROR, 197 .nfeat_vers = &nvme_vers_1v0, 198 .nfeat_csi = NVME_FEAT_CSI_NVM, 199 .nfeat_kind = NVME_FEAT_MANDATORY, 200 /* 201 * The scope of this feature has a bit of a complicated history. 202 * Originally we always got this on the controller and that works for 203 * most NVMe 1.0-1.2 devices. The introduction of both namespace 204 * management and of the DULBE option which is namespace specific, made 205 * this more nuanced. The NVMe 1.4 specification makes it clear that 206 * this is namespace specific; however, if we ask for this feature on 207 * many NVMe 1.3 devices with namespace support and some NVMe 1.2, it'll 208 * generate an error about missing namespace information. Unfortunately 209 * namespace management is not a good proxy for this as for example the 210 * Samsung 980 Pro is an NVMe 1.3 device without namespace management 211 * and it will error with invalid namespace if we specify zeros. 212 * 213 * However, most devices that we've surveyed will always answer a GET 214 * FEATURES request with a namespace specified. Therefore, given the 215 * changes that have happened, for now we're going to phrase it scoped 216 * to a namespace and requiring a namespace ID. 217 */ 218 .nfeat_scope = NVME_FEAT_SCOPE_NS, 219 .nfeat_in_get = NVME_GET_FEAT_F_NSID, 220 .nfeat_in_set = NVME_SET_FEAT_F_NSID | NVME_SET_FEAT_F_CDW11, 221 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 222 }, { 223 .nfeat_short = "cache", 224 .nfeat_spec = "Volatile Write Cache", 225 .nfeat_fid = NVME_FEAT_WRITE_CACHE, 226 .nfeat_sup_func = nvme_feat_write_cache_sup, 227 .nfeat_vers = &nvme_vers_1v0, 228 .nfeat_kind = NVME_FEAT_OPTIONAL, 229 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 230 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 231 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 232 }, { 233 .nfeat_short = "queues", 234 .nfeat_spec = "Number of Queues", 235 .nfeat_fid = NVME_FEAT_NQUEUES, 236 .nfeat_vers = &nvme_vers_1v0, 237 .nfeat_kind = NVME_FEAT_MANDATORY, 238 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 239 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 240 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 241 }, { 242 /* 243 * The interrupt coalescing and the interrupt vector configuration 244 * features are required for all PCIe controllers; however, they are not 245 * supported for other types of controllers. As we only support NVMe 246 * PCIe controllers with this library right now we don't do anything 247 * special to denote that. If we do, we will probably want to create an 248 * optional function for determining the kind of feature and leverage 249 * the existing nfeat_sup_func. 250 */ 251 .nfeat_short = "coalescing", 252 .nfeat_spec = "Interrupt Coalescing", 253 .nfeat_fid = NVME_FEAT_INTR_COAL, 254 .nfeat_vers = &nvme_vers_1v0, 255 .nfeat_kind = NVME_FEAT_MANDATORY, 256 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 257 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 258 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 259 }, { 260 .nfeat_short = "vector", 261 .nfeat_spec = "Interrupt Vector Configuration", 262 .nfeat_fid = NVME_FEAT_INTR_VECT, 263 .nfeat_vers = &nvme_vers_1v0, 264 .nfeat_kind = NVME_FEAT_MANDATORY, 265 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 266 .nfeat_in_get = NVME_GET_FEAT_F_CDW11, 267 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 268 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 269 }, { 270 .nfeat_short = "atomicity", 271 .nfeat_spec = "Write Atomicity", 272 .nfeat_fid = NVME_FEAT_WRITE_ATOM, 273 .nfeat_vers = &nvme_vers_1v0, 274 .nfeat_kind = NVME_FEAT_MANDATORY, 275 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 276 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 277 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 278 }, { 279 .nfeat_short = "event", 280 .nfeat_spec = "Asynchronous Event Configuration", 281 .nfeat_fid = NVME_FEAT_ASYNC_EVENT, 282 .nfeat_vers = &nvme_vers_1v0, 283 .nfeat_kind = NVME_FEAT_MANDATORY, 284 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 285 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 286 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 287 }, { 288 .nfeat_short = "apst", 289 .nfeat_spec = "Autonomous Power State Transition", 290 .nfeat_fid = NVME_FEAT_AUTO_PST, 291 .nfeat_vers = &nvme_vers_1v1, 292 .nfeat_sup_func = nvme_feat_apst_sup, 293 .nfeat_kind = NVME_FEAT_OPTIONAL, 294 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 295 .nfeat_in_get = NVME_GET_FEAT_F_DATA, 296 .nfeat_in_set = NVME_SET_FEAT_F_CDW11 | NVME_SET_FEAT_F_DATA, 297 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 | NVME_FEAT_OUTPUT_DATA, 298 .nfeat_len = NVME_AUTO_PST_BUFSIZE 299 }, { 300 .nfeat_short = "progress", 301 .nfeat_spec = "Software Progress Marker", 302 .nfeat_fid = NVME_FEAT_PROGRESS, 303 .nfeat_vers = &nvme_vers_1v0, 304 .nfeat_kind = NVME_FEAT_OPTIONAL, 305 .nfeat_scope = NVME_FEAT_SCOPE_CTRL, 306 .nfeat_in_set = NVME_SET_FEAT_F_CDW11, 307 .nfeat_out_get = NVME_FEAT_OUTPUT_CDW0 308 } }; 309 310 size_t nvme_std_nfeats = ARRAY_SIZE(nvme_std_feats); 311 312 /* 313 * Now it's time to answer the only hard question here: is this feature actually 314 * supported by the controller. Prior to NVMe 2.x and the Feature Identifiers 315 * Supported and Effects page, we have to use a heuristic for this. Our 316 * heuristics rules are as follows: 317 * 318 * 1) If this is a vendor-specific feature that we have identified is present on 319 * this controller based on a datasheet, we assume it's present. 320 * 321 * 2) If the feature was introduced in an NVMe spec version newer than our 322 * controller, then it's clearly unsupported. 323 * 324 * 3) If it is a mandatory feature, we have the right controller type, and we 325 * are past the minimum version, then this is supported. 326 * 327 * 4) If the feature is optional and has an explicit feature bit that indicates 328 * whether it's present or not, then we can use that to determine if it's 329 * implemented or not. 330 * 331 * Otherwise we must conclude that we don't know. 332 */ 333 nvme_feat_impl_t 334 nvme_feat_supported(const nvme_feat_info_t *info, 335 const nvme_valid_ctrl_data_t *data) 336 { 337 if (info->nfeat_kind == NVME_FEAT_VENDOR_SPECIFIC) { 338 return (NVME_FEAT_IMPL_SUPPORTED); 339 } 340 341 if (info->nfeat_vers != NULL && 342 !nvme_vers_atleast(data->vcd_vers, info->nfeat_vers)) { 343 return (NVME_FEAT_IMPL_UNSUPPORTED); 344 } 345 346 if (info->nfeat_kind == NVME_FEAT_MANDATORY) { 347 ASSERT3P(info->nfeat_sup_func, ==, NULL); 348 return (NVME_FEAT_IMPL_SUPPORTED); 349 } 350 351 if (info->nfeat_sup_func != NULL) { 352 if (info->nfeat_sup_func(data, info)) { 353 return (NVME_FEAT_IMPL_SUPPORTED); 354 } 355 356 return (NVME_FEAT_IMPL_UNSUPPORTED); 357 } 358 359 return (NVME_FEAT_IMPL_UNKNOWN); 360 } 361