1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2026 Oxide Computer Company
14 */
15
16 /*
17 * NVMe Vendor Unique Command related functions.
18 */
19
20 #include <getopt.h>
21 #include <err.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <sys/stat.h>
25 #include <fcntl.h>
26 #include <unistd.h>
27 #include <sys/debug.h>
28 #include <sys/sysmacros.h>
29
30 #include "nvmeadm.h"
31
32 /*
33 * We choose 60 seconds as a reasonable enough default VUC timeout. This is a
34 * fairly arbitrary selection but should be good enough for most non-formatting
35 * related commands.
36 */
37 #define NVMEADM_VUC_TO_DEFAULT 60
38
39 /*
40 * We need some upper bound on how much data we'll read in and zero. The kernel
41 * may change its maximum that it'll allow. It'd probably be smart of us to ask
42 * what the max is to help reduce hardcoding. For now we use 2x its current
43 * value 32 MiB. We make the minimum 4 bytes because we need 4 byte alignment.
44 */
45 #define NVMEADM_VUC_LEN_MAX (32 * 1024 * 1024)
46 #define NVMEADM_VUC_LEN_ALIGN 4
47
48 typedef struct nvmeadm_vuc {
49 uint8_t vuc_opc;
50 uint32_t vuc_nsid;
51 uint32_t vuc_cdw12;
52 uint32_t vuc_cdw13;
53 uint32_t vuc_cdw14;
54 uint32_t vuc_cdw15;
55 uint32_t vuc_timeout;
56 uint32_t vuc_dlen;
57 const char *vuc_input;
58 const char *vuc_output;
59 nvme_lock_level_t vuc_lock;
60 nvme_vuc_disc_impact_t vuc_impact;
61 } nvmeadm_vuc_t;
62
63 nvme_vuc_disc_t *
nvmeadm_vuc_init(const nvme_process_arg_t * npa,const char * name)64 nvmeadm_vuc_init(const nvme_process_arg_t *npa, const char *name)
65 {
66 nvme_vuc_disc_t *vuc;
67 nvme_vuc_disc_lock_t lock;
68
69 if (!nvme_vuc_discover_by_name(npa->npa_ctrl, name, 0, &vuc)) {
70 nvmeadm_fatal(npa, "%s does not support operation %s: device "
71 "does not support vendor unique command %s", npa->npa_name,
72 npa->npa_cmd->c_name, name);
73 }
74
75 lock = nvme_vuc_disc_lock(vuc);
76 switch (lock) {
77 case NVME_VUC_DISC_LOCK_NONE:
78 break;
79 case NVME_VUC_DISC_LOCK_READ:
80 nvmeadm_excl(npa, NVME_LOCK_L_READ);
81 break;
82 case NVME_VUC_DISC_LOCK_WRITE:
83 nvmeadm_excl(npa, NVME_LOCK_L_WRITE);
84 break;
85 }
86
87 return (vuc);
88 }
89
90 void
nvmeadm_vuc_fini(const nvme_process_arg_t * npa,nvme_vuc_disc_t * vuc)91 nvmeadm_vuc_fini(const nvme_process_arg_t *npa, nvme_vuc_disc_t *vuc)
92 {
93 if (nvme_vuc_disc_lock(vuc) != NVME_VUC_DISC_LOCK_NONE) {
94 if (npa->npa_ns != NULL) {
95 nvme_ns_unlock(npa->npa_ns);
96 } else if (npa->npa_ctrl != NULL) {
97 nvme_ctrl_unlock(npa->npa_ctrl);
98 }
99 }
100
101 nvme_vuc_disc_free(vuc);
102 }
103
104 void
usage_vendor_cmd(const char * c_name)105 usage_vendor_cmd(const char *c_name)
106 {
107 (void) fprintf(stderr, "%s -O opcode [-n nsid] [--cdw12 cdw12] "
108 "[--cdw13 cdw13]\n\t [--cdw14 cdw14] [--cdw15 cdw15] "
109 "[-l length [-i file | -o file]]\n\t [-L lock] [-I impact] "
110 "[-t timeout] <ctl>[/<ns>]\n\n", c_name);
111 (void) fprintf(stderr, " Run a vendor-specific command against a "
112 "device\n");
113 }
114
115 /*
116 * Most folks reasonably expect short options for all long options. We do have
117 * these here for the various --cdw arguments, but there are no good short
118 * options here depending on what we want to do. These are different from the
119 * Linux nvme-cli, so we would have preferred not to have them at all, but the
120 * mappings in that tool are not very usable either, e.g. --cdw12 is 6. When
121 * we're doing documentation: usage statements, manuals, overviews, prefer the
122 * --cdw form.
123 */
124 static const struct option vendor_cmd_lopts[] = {
125 { "opcode", required_argument, NULL, 'O' },
126 { "nsid", required_argument, NULL, 'n' },
127 { "cdw12", required_argument, NULL, '2' },
128 { "cdw13", required_argument, NULL, '3' },
129 { "cdw14", required_argument, NULL, '4' },
130 { "cdw15", required_argument, NULL, '5' },
131 { "length", required_argument, NULL, 'l' },
132 { "input", required_argument, NULL, 'i' },
133 { "output", required_argument, NULL, 'o' },
134 { "lock", required_argument, NULL, 'L' },
135 { "impact", required_argument, NULL, 'I' },
136 { "timeout", required_argument, NULL, 't' },
137 { NULL, 0, NULL, 0 }
138 };
139
140 static long long
optparse_vendor_cmd_ui(const char * raw,const char * field,uint64_t min,uint64_t max)141 optparse_vendor_cmd_ui(const char *raw, const char *field, uint64_t min,
142 uint64_t max)
143 {
144 const char *errstr;
145 long long l;
146
147 l = strtonumx(raw, min, max, &errstr, 0);
148 if (errstr != NULL) {
149 errx(-1, "failed to parse %s: value %s is %s: valid values "
150 "are in the range [0x%" PRIx64 ", 0x%" PRIx64 "]", field,
151 raw, errstr, min, max);
152 }
153
154 return (l);
155 }
156
157 void
optparse_vendor_cmd(nvme_process_arg_t * npa)158 optparse_vendor_cmd(nvme_process_arg_t *npa)
159 {
160 int c;
161 nvmeadm_vuc_t *vuc;
162
163 if ((vuc = calloc(1, sizeof (nvmeadm_vuc_t))) == NULL) {
164 err(-1, "failed to allocate memory for option tracking");
165 }
166 vuc->vuc_timeout = NVMEADM_VUC_TO_DEFAULT;
167
168 /*
169 * Normally we can reset optind to 0 to make sure that we can account
170 * for the fact that we've modified our arguments. Unfortunately
171 * getopt_long() tries to detect this as a case where some tools have
172 * used it as a way to ask for option processing to be reset and thus
173 * skip our first argument. As such we cheat a bit with the arguments we
174 * pass.
175 */
176 while ((c = getopt_long(npa->npa_argc + 1, npa->npa_argv - 1,
177 ":O:n:l:i:I:o:L:t:2:3:4:5:", vendor_cmd_lopts, NULL)) != -1) {
178 char *last;
179
180 switch (c) {
181 case 'O':
182 vuc->vuc_opc = (uint8_t)optparse_vendor_cmd_ui(optarg,
183 "opcode", NVME_PASSTHRU_MIN_ADMIN_OPC,
184 NVME_PASSTHRU_MAX_ADMIN_OPC);
185 break;
186 case 'n':
187 /*
188 * We don't use NVME_NSID_MIN here because we want to
189 * allow the invalid nsid 0 to be specified for this
190 * field.
191 */
192 vuc->vuc_nsid = (uint8_t)optparse_vendor_cmd_ui(optarg,
193 "opcode", 0, NVME_NSID_BCAST);
194 break;
195 case 'l':
196 vuc->vuc_dlen = (uint32_t)optparse_vendor_cmd_ui(optarg,
197 "length", 0, NVMEADM_VUC_LEN_MAX);
198 if (vuc->vuc_dlen % NVMEADM_VUC_LEN_ALIGN != 0) {
199 errx(-1, "invalid data length %u: must be a "
200 "multiple of 4 bytes", vuc->vuc_dlen);
201 }
202 break;
203 case 'i':
204 vuc->vuc_input = optarg;
205 break;
206 case 'o':
207 vuc->vuc_output = optarg;
208 break;
209 case 'L':
210 if (strcmp(optarg, "read") == 0) {
211 vuc->vuc_lock = NVME_LOCK_L_READ;
212 } else if (strcmp(optarg, "write") == 0) {
213 vuc->vuc_lock = NVME_LOCK_L_WRITE;
214 } else {
215 errx(-1, "invalid lock value %s: valid values "
216 "are 'read' or 'write'", optarg);
217 }
218 break;
219 case 'I':
220 for (char *s = strtok_r(optarg, ",", &last); s != NULL;
221 s = strtok_r(NULL, ",", &last)) {
222 if (strcmp(s, "data") == 0) {
223 vuc->vuc_impact |=
224 NVME_VUC_DISC_IMPACT_DATA;
225 } else if (strcmp(s, "namespace") == 0) {
226 vuc->vuc_impact |=
227 NVME_VUC_DISC_IMPACT_NS;
228 } else {
229 errx(-1, "invalid impact string: %s",
230 s);
231 }
232 }
233 break;
234 case 't':
235 /* This will be further constrained by libnvme */
236 vuc->vuc_timeout = (uint32_t)optparse_vendor_cmd_ui(
237 optarg, "timeout", 1, UINT32_MAX);
238 break;
239 case '2':
240 vuc->vuc_cdw12 = (uint32_t)optparse_vendor_cmd_ui(
241 optarg, "cdw12", 0, UINT32_MAX);
242 break;
243 case '3':
244 vuc->vuc_cdw13 = (uint32_t)optparse_vendor_cmd_ui(
245 optarg, "cdw13", 0, UINT32_MAX);
246 break;
247 case '4':
248 vuc->vuc_cdw14 = (uint32_t)optparse_vendor_cmd_ui(
249 optarg, "cdw14", 0, UINT32_MAX);
250 break;
251 case '5':
252 vuc->vuc_cdw15 = (uint32_t)optparse_vendor_cmd_ui(
253 optarg, "cdw15", 0, UINT32_MAX);
254 break;
255 case '?':
256 errx(-1, "unknown option: -%c", optopt);
257 case ':':
258 errx(-1, "option -%c requires an argument", optopt);
259 break;
260 }
261 }
262
263 /*
264 * Undo our optind lies.
265 */
266 optind--;
267
268 if (vuc->vuc_opc == 0) {
269 errx(-1, "missing required command opcode");
270 }
271
272 if (vuc->vuc_input != NULL && vuc->vuc_output != NULL) {
273 errx(-1, "cannot specify both an input file (-i) and an output "
274 "file (-o)");
275 }
276
277 if ((vuc->vuc_input != NULL || vuc->vuc_output != NULL) &&
278 vuc->vuc_dlen == 0) {
279 errx(-1, "asked to transfer data (-%c) but missing required "
280 "data length (-l)", vuc->vuc_input != NULL ? 'i' : 'o');
281 }
282
283 if (vuc->vuc_input == NULL && vuc->vuc_output == NULL &&
284 vuc->vuc_dlen != 0) {
285 errx(-1, "%u bytes of data transfer requested (-l), but no "
286 "input (-i) or output (-o) specified", vuc->vuc_dlen);
287 }
288
289 /*
290 * Only check if the namespace id matches if the user specified a
291 * namespace.
292 */
293 if (npa->npa_ns != NULL) {
294 uint32_t nsid = nvme_ns_info_nsid(npa->npa_ns_info);
295 if (vuc->vuc_nsid != 0 && vuc->vuc_nsid != nsid) {
296 errx(-1, "Requested namespace id (-n) %u does not "
297 "match the nsid of %s (%u): either remove the "
298 "-n argument or specify just a controller",
299 vuc->vuc_nsid, npa->npa_name, nsid);
300 }
301
302 vuc->vuc_nsid = nsid;
303 }
304
305 npa->npa_cmd_arg = vuc;
306 }
307
308 int
do_vendor_cmd(const nvme_process_arg_t * npa)309 do_vendor_cmd(const nvme_process_arg_t *npa)
310 {
311 const nvmeadm_vuc_t *vuc = npa->npa_cmd_arg;
312 uint8_t *buf = NULL;
313 nvme_vuc_req_t *req;
314 int ofd = -1;
315
316 /*
317 * Verify we can get a request. This is effectively our is this
318 * supported check.
319 */
320 if (!nvme_vuc_req_init(npa->npa_ctrl, &req)) {
321 nvmeadm_fatal(npa, "failed to initialize vendor unique "
322 "request");
323 }
324
325 if (vuc->vuc_dlen > 0) {
326 if ((buf = calloc(sizeof (uint8_t), vuc->vuc_dlen)) == NULL) {
327 nvmeadm_fatal(npa, "failed to allocate 0x%x byte "
328 "request data buffer", vuc->vuc_dlen);
329 }
330
331 /*
332 * If we have an input file, then we want to read data from it
333 * until we either hit EOF or we read sufficient bytes from it
334 * to fill our buffer. Anything we don't will be zero filled,
335 * which was already taken care of by using calloc.
336 */
337 if (vuc->vuc_input != NULL) {
338 int ifd = open(vuc->vuc_input, O_RDONLY);
339 if (ifd < 0) {
340 err(EXIT_FAILURE, "failed to open input file "
341 "%s", vuc->vuc_input);
342 }
343
344 size_t rem = vuc->vuc_dlen, off = 0;
345 while (rem > 0) {
346 size_t toread = MIN(16 * 1024, rem);
347 ssize_t ret = read(ifd, buf + off, toread);
348 if (ret < 0) {
349 nvmeadm_fatal(npa, "failed to read %zu "
350 "bytes at offset %zu from %s",
351 toread, off, vuc->vuc_input);
352 } else if (ret == 0) {
353 break;
354 }
355
356 rem -= (size_t)ret;
357 off += (size_t)ret;
358 }
359
360 VERIFY0(close(ifd));
361 } else if (vuc->vuc_output != NULL) {
362 ofd = open(vuc->vuc_output, O_RDWR | O_TRUNC | O_CREAT,
363 0644);
364 if (ofd < 0) {
365 err(-1, "failed to open output file %s",
366 vuc->vuc_output);
367 }
368 }
369 }
370
371 if (!nvme_vuc_req_set_opcode(req, vuc->vuc_opc) ||
372 !nvme_vuc_req_set_nsid(req, vuc->vuc_nsid) ||
373 !nvme_vuc_req_set_timeout(req, vuc->vuc_timeout) ||
374 !nvme_vuc_req_set_cdw12(req, vuc->vuc_cdw12) ||
375 !nvme_vuc_req_set_cdw13(req, vuc->vuc_cdw13) ||
376 !nvme_vuc_req_set_cdw14(req, vuc->vuc_cdw14) ||
377 !nvme_vuc_req_set_cdw15(req, vuc->vuc_cdw15) ||
378 !nvme_vuc_req_set_impact(req, vuc->vuc_impact)) {
379 nvmeadm_fatal(npa, "failed to set request fields");
380 }
381
382 if (vuc->vuc_input != NULL) {
383 if (!nvme_vuc_req_set_input(req, buf, vuc->vuc_dlen)) {
384 nvmeadm_fatal(npa, "failed to set input buffer");
385 }
386 } else if (vuc->vuc_output != NULL) {
387 if (!nvme_vuc_req_set_output(req, buf, vuc->vuc_dlen)) {
388 nvmeadm_fatal(npa, "failed to set output buffer");
389 }
390 }
391
392 if (vuc->vuc_lock != 0) {
393 nvmeadm_excl(npa, vuc->vuc_lock);
394 }
395
396 if (!nvme_vuc_req_exec(req)) {
397 nvmeadm_fatal(npa, "failed to execute request");
398 }
399
400 uint32_t cdw0;
401 if (nvme_vuc_req_get_cdw0(req, &cdw0)) {
402 (void) printf("Request cdw0: 0x%x\n", cdw0);
403 }
404
405 /*
406 * Remove the lock manually. npa->npa_excl isn't set, so we need to
407 * manually take care of this.
408 */
409 if (vuc->vuc_lock != 0) {
410 if (npa->npa_ns != NULL) {
411 nvme_ns_unlock(npa->npa_ns);
412 } else {
413 nvme_ctrl_unlock(npa->npa_ctrl);
414 }
415 }
416
417 if (vuc->vuc_output != NULL) {
418 size_t rem = vuc->vuc_dlen, off = 0;
419 while (rem > 0) {
420 size_t towrite = MIN(16 * 1024, rem);
421 ssize_t ret = write(ofd, buf + off, towrite);
422
423 if (ret < 0) {
424 nvmeadm_fatal(npa, "failed to write %zu bytes "
425 "of output data at offset %zu to %s",
426 towrite, off, vuc->vuc_output);
427 }
428
429 rem -= towrite;
430 off += towrite;
431 }
432 }
433
434 if (ofd >= 0) {
435 VERIFY0(close(ofd));
436 }
437 nvme_vuc_req_fini(req);
438 free(buf);
439 return (0);
440 }
441