1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2025 Oxide Computer Company
14 */
15
16 /*
17 * NVMe Namespace Management Commands
18 */
19
20 #include <err.h>
21 #include <string.h>
22 #include <sys/sysmacros.h>
23
24 #include "nvmeadm.h"
25
26 /*
27 * Attempt to parse a string with a power of 2 unit suffix into a uint64_t. We
28 * stop allowing suffixes at PiB as we're trying to fit this into a uint64_t and
29 * there aren't really many valid values of EiB. In the future when we have
30 * devices with such large capacities, we should change this to return a
31 * uint128_t style value as it's possible that with a larger block size, that
32 * this will make more sense. When we do that, we should probably also figure
33 * out how we want to commonize this function across the tree.
34 */
35 static uint64_t
nvmeadm_parse_units(const char * str,const char * desc)36 nvmeadm_parse_units(const char *str, const char *desc)
37 {
38 unsigned long long l;
39 char *eptr;
40 const char units[] = { 'B', 'K', 'M', 'G', 'T', 'P' };
41
42 errno = 0;
43 l = strtoull(str, &eptr, 0);
44 if (errno != 0) {
45 err(-1, "failed to parse %s: %s", desc, str);
46 }
47
48 if (*eptr == '\0') {
49 return ((uint64_t)l);
50 }
51
52 if (eptr[1] != '\0') {
53 errx(-1, "failed to parse %s unit suffix: %s", desc, eptr);
54 }
55
56 for (size_t i = 0; i < ARRAY_SIZE(units); i++) {
57 if (strncasecmp(eptr, &units[i], 1) != 0) {
58 continue;
59 }
60
61 for (; i > 0; i--) {
62 const uint64_t max = UINT64_MAX / 1024;
63
64 if (l > max) {
65 errx(-1, "%s value %s would overflow a "
66 "uint64_t", desc, str);
67 }
68
69 l *= 1024;
70 }
71
72 return ((uint64_t)l);
73 }
74
75 errx(-1, "invalid %s unit suffix: %s", desc, eptr);
76 }
77
78 /*
79 * Today create-namespace takes a limited number of arguments. Here is how we
80 * expect it to continue to change over time:
81 *
82 * 1) First, we have a limited number of short options that we support. If we
83 * ever support long options, these should match the NVMe name for the option,
84 * e.g. --nsze, --ncap, --nmic, etc.
85 *
86 * 2) Today we require that this operates only when the namespace is already
87 * detached from a controller. If we want to change this behavior then we should
88 * add something such as a [-R] flag to indicate that it should take all the
89 * other steps necessary recursively.
90 *
91 * 3) Most other options have a default that indicates that they're unused or
92 * similar. This allows us to add additional option arguments. Some of these may
93 * end up with aliases for the default case, e.g. -t nvm for the default NVM
94 * CSI.
95 *
96 * 4) We only support specifying the size of a namespace in bytes today. If we
97 * want to change this we should add a flag like a -B that specifies that all
98 * sizes are in units of the logical block.
99 */
100 void
usage_create_ns(const char * c_name)101 usage_create_ns(const char *c_name)
102 {
103 (void) fprintf(stderr, "%s -f flbas | -b block-size [-c cap] "
104 "[-n nmic]\n\t [-t type] <ctl> <size>\n\n"
105 " Create a new namespace on the specified controller of the "
106 "requested size. The\n size is specified in bytes and may use "
107 "any suffix such as B (bytes), K\n (kibibytes, 2^10), M "
108 "(mibibytes, 2^20), G (gibibytes, 2^30), T (tebibytes,\n 2^40), "
109 "etc. The size must be a multiple of the selected block size. The\n"
110 " controller may impose additional alignment constraints.\n",
111 c_name);
112 }
113
114 void
optparse_create_ns(nvme_process_arg_t * npa)115 optparse_create_ns(nvme_process_arg_t *npa)
116 {
117 int c;
118 nvmeadm_create_ns_t *ncn;
119 const char *nmic = NULL, *type = NULL, *cap = NULL;
120 const char *bs = NULL, *flbas = NULL;
121
122 if ((ncn = calloc(1, sizeof (nvmeadm_create_ns_t))) == NULL) {
123 err(-1, "failed to allocate memory to track create-namespace "
124 "information");
125 }
126
127 npa->npa_cmd_arg = ncn;
128
129 while ((c = getopt(npa->npa_argc, npa->npa_argv, ":b:c:f:n:t:")) !=
130 -1) {
131 switch (c) {
132 case 'b':
133 bs = optarg;
134 break;
135 case 'c':
136 cap = optarg;
137 break;
138 case 'f':
139 flbas = optarg;
140 break;
141 case 'n':
142 nmic = optarg;
143 break;
144 case 't':
145 type = optarg;
146 break;
147 case '?':
148 errx(-1, "unknown option: -%c", optopt);
149 case ':':
150 errx(-1, "option -%c requires an argument", optopt);
151 }
152 }
153
154 if (flbas != NULL && bs != NULL) {
155 errx(-1, "only one of -b and -f may be specified");
156 }
157
158 if (flbas == NULL && bs == NULL) {
159 errx(-1, "at least one of -b and -f must be specified");
160 }
161
162 if (flbas != NULL) {
163 const char *err;
164 ncn->ncn_use_flbas = B_TRUE;
165 ncn->ncn_lba = strtonumx(flbas, 0, NVME_MAX_LBAF - 1, &err, 0);
166 if (err != NULL) {
167 errx(-1, "failed to parse formatted LBA index: %s is "
168 "%s, valid values are between 0 and %u",
169 flbas, err, NVME_MAX_LBAF - 1);
170 }
171 }
172
173 if (bs != NULL) {
174 ncn->ncn_use_flbas = B_FALSE;
175 ncn->ncn_lba = nvmeadm_parse_units(bs, "block-size");
176 }
177
178 if (cap != NULL) {
179 ncn->ncn_cap = nvmeadm_parse_units(cap, "block-size");
180 } else {
181 ncn->ncn_cap = UINT64_MAX;
182 }
183
184 if (type != NULL) {
185 if (strcasecmp(type, "nvm") == 0) {
186 ncn->ncn_csi = NVME_CSI_NVM;
187 } else if (strcasecmp(type, "kv") == 0) {
188 ncn->ncn_csi = NVME_CSI_KV;
189 } else if (strcasecmp(type, "zns") == 0) {
190 ncn->ncn_csi = NVME_CSI_ZNS;
191 } else {
192 errx(-1, "unknown CSI type string: '%s'; valid values "
193 "are 'nvm', 'kv', and 'zns'", type);
194 }
195 } else {
196 ncn->ncn_csi = NVME_CSI_NVM;
197 }
198
199 if (nmic != NULL) {
200 if (strcasecmp(nmic, "none") == 0) {
201 ncn->ncn_nmic = NVME_NS_NMIC_T_NONE;
202 } else if (strcasecmp(nmic, "shared") == 0) {
203 ncn->ncn_nmic = NVME_NS_NMIC_T_SHARED;
204 } else {
205 errx(-1, "unknown nmic string: '%s'; valid values are "
206 "'none' and 'shared'", nmic);
207 }
208 } else {
209 ncn->ncn_nmic = NVME_NS_NMIC_T_NONE;
210 }
211
212 if (npa->npa_argc - optind > 2) {
213 errx(-1, "%s passed extraneous arguments starting with %s",
214 npa->npa_cmd->c_name, npa->npa_argv[optind + 2]);
215 } else if (npa->npa_argc - optind != 2) {
216 errx(-1, "missing required size parameter");
217 }
218
219 ncn->ncn_size = nvmeadm_parse_units(npa->npa_argv[optind + 1],
220 "namespace size");
221 if (cap == NULL) {
222 ncn->ncn_cap = ncn->ncn_size;
223 }
224 }
225
226 static const nvme_nvm_lba_fmt_t *
do_create_ns_find_lba(const nvme_process_arg_t * npa,const nvmeadm_create_ns_t * ncn)227 do_create_ns_find_lba(const nvme_process_arg_t *npa,
228 const nvmeadm_create_ns_t *ncn)
229 {
230 const uint32_t nfmts = nvme_ctrl_info_nformats(npa->npa_ctrl_info);
231 const nvme_nvm_lba_fmt_t *best = NULL;
232 uint32_t best_rp = UINT32_MAX;
233
234 for (size_t i = 0; i < nfmts; i++) {
235 const nvme_nvm_lba_fmt_t *fmt;
236 uint32_t rp;
237
238 if (!nvme_ctrl_info_format(npa->npa_ctrl_info, i, &fmt)) {
239 continue;
240 }
241
242 if (nvme_nvm_lba_fmt_meta_size(fmt) != 0)
243 continue;
244
245 if (nvme_nvm_lba_fmt_data_size(fmt) != ncn->ncn_lba)
246 continue;
247
248 rp = nvme_nvm_lba_fmt_rel_perf(fmt);
249 if (rp < best_rp) {
250 best_rp = rp;
251 best = fmt;
252 }
253 }
254
255 if (best == NULL) {
256 errx(-1, "failed to find an LBA format with %u byte block size",
257 ncn->ncn_lba);
258 }
259
260 return (best);
261 }
262
263 int
do_create_ns(const nvme_process_arg_t * npa)264 do_create_ns(const nvme_process_arg_t *npa)
265 {
266 const nvmeadm_create_ns_t *ncn = npa->npa_cmd_arg;
267 nvme_ns_create_req_t *req;
268 const nvme_nvm_lba_fmt_t *lba;
269 uint32_t nsid, flbas, ds;
270 uint64_t size;
271
272 if (npa->npa_ns != NULL) {
273 errx(-1, "%s cannot be used on namespaces",
274 npa->npa_cmd->c_name);
275 }
276
277 /*
278 * This should have been checked above.
279 */
280 if (npa->npa_argc > 1) {
281 errx(-1, "%s passed extraneous arguments starting with %s",
282 npa->npa_cmd->c_name, npa->npa_argv[1]);
283 }
284
285 /*
286 * If we were given a block size rather than the formatted LBA size, go
287 * deal with converting that now.
288 */
289 if (!ncn->ncn_use_flbas) {
290 lba = do_create_ns_find_lba(npa, ncn);
291 } else {
292 if (!nvme_ctrl_info_format(npa->npa_ctrl_info, ncn->ncn_lba,
293 &lba)) {
294 nvmeadm_fatal(npa, "failed to look up LBA format index "
295 "%u", ncn->ncn_lba);
296 }
297 }
298
299 if (!nvme_ns_create_req_init_by_csi(npa->npa_ctrl, ncn->ncn_csi,
300 &req)) {
301 nvmeadm_fatal(npa, "failed to initialize namespace create "
302 "request");
303 }
304
305 ds = nvme_nvm_lba_fmt_data_size(lba);
306 flbas = nvme_nvm_lba_fmt_id(lba);
307 if (!nvme_ns_create_req_set_flbas(req, flbas)) {
308 nvmeadm_fatal(npa, "failed to set namespace create request "
309 "formatted LBA index to %u", flbas);
310 }
311
312 if (ncn->ncn_size % ds != 0) {
313 nvmeadm_fatal(npa, "requested namespace size 0x%lx is not a "
314 "multiple of the requested LBA block size (0x%x)",
315 ncn->ncn_size, ds);
316 }
317 size = ncn->ncn_size / ds;
318 if (!nvme_ns_create_req_set_nsze(req, size)) {
319 nvmeadm_fatal(npa, "failed to set namespace create request "
320 "namespace size to 0x%lx", size);
321 }
322
323 if (ncn->ncn_cap % ds != 0) {
324 nvmeadm_fatal(npa, "requested namespace capacity 0x%lx is not "
325 "a multiple of the requested LBA block size (0x%x)",
326 ncn->ncn_cap, ds);
327 }
328 size = ncn->ncn_cap/ ds;
329 if (!nvme_ns_create_req_set_ncap(req, size)) {
330 nvmeadm_fatal(npa, "failed to set namespace create request "
331 "namespace capacity to 0x%lx", size);
332 }
333
334 if (!nvme_ns_create_req_set_nmic(req, ncn->ncn_nmic)) {
335 nvmeadm_fatal(npa, "failed to set namespace multipath I/O and "
336 "sharing capabilities to 0x%x", ncn->ncn_nmic);
337 }
338
339 if (!nvme_ns_create_req_exec(req)) {
340 nvmeadm_fatal(npa, "failed to execute namespace create "
341 "request");
342 }
343
344 if (!nvme_ns_create_req_get_nsid(req, &nsid)) {
345 nvmeadm_fatal(npa, "Failed to retrieve the new namespace ID");
346 }
347
348 nvme_ns_create_req_fini(req);
349
350 (void) printf("created namespace %s/%u\n", npa->npa_ctrl_name, nsid);
351 return (EXIT_SUCCESS);
352 }
353
354 void
usage_delete_ns(const char * c_name)355 usage_delete_ns(const char *c_name)
356 {
357 (void) fprintf(stderr, "%s <ctl>/<ns>\n\n"
358 " Delete the specified namespace. It must be first detached from "
359 "all\n controllers. Controllers can be detached from a namespace "
360 "with the\n detach-namespace sub-command.\n", c_name);
361 }
362
363 int
do_delete_ns(const nvme_process_arg_t * npa)364 do_delete_ns(const nvme_process_arg_t *npa)
365 {
366 nvme_ns_delete_req_t *req;
367
368 if (npa->npa_ns == NULL) {
369 errx(-1, "%s cannot be used on controllers",
370 npa->npa_cmd->c_name);
371 }
372
373 if (npa->npa_argc > 0) {
374 errx(-1, "%s passed extraneous arguments starting with %s",
375 npa->npa_cmd->c_name, npa->npa_argv[0]);
376 }
377
378 if (!nvme_ns_delete_req_init(npa->npa_ctrl, &req)) {
379 nvmeadm_fatal(npa, "failed to initialize namespace delete "
380 "request");
381 }
382
383 const uint32_t nsid = nvme_ns_info_nsid(npa->npa_ns_info);
384 if (!nvme_ns_delete_req_set_nsid(req, nsid)) {
385 nvmeadm_fatal(npa, "failed to set namespace delete request "
386 "namespace ID to 0x%x", nsid);
387 }
388
389 if (!nvme_ns_delete_req_exec(req)) {
390 nvmeadm_fatal(npa, "failed to execute namespace delete "
391 "request");
392 }
393
394 nvme_ns_delete_req_fini(req);
395 return (EXIT_SUCCESS);
396 }
397
398 /*
399 * Currently both attach namespace and detach namespace only will perform an
400 * attach or detach of the namespace from the current controller in the system.
401 * In the future, we should probably support an argument to provide an explicit
402 * controller list either in the form of IDs or device names, probably with -c
403 * or -C.
404 */
405 void
usage_attach_ns(const char * c_name)406 usage_attach_ns(const char *c_name)
407 {
408 (void) fprintf(stderr, "%s <ctl>/<ns>\n\n"
409 " Attach the specified namespace to the current controller.\n",
410 c_name);
411 }
412
413 void
usage_detach_ns(const char * c_name)414 usage_detach_ns(const char *c_name)
415 {
416 (void) fprintf(stderr, "%s <ctl>/<ns>\n\n"
417 " Detach the specified namespace from its current controller. The "
418 "namespace\n must have its blkdev instances detached with the "
419 "detach sub-command.\n", c_name);
420 }
421
422 static int
do_attach_ns_common(const nvme_process_arg_t * npa,uint32_t sel)423 do_attach_ns_common(const nvme_process_arg_t *npa, uint32_t sel)
424 {
425 const char *desc = sel == NVME_NS_ATTACH_CTRL_ATTACH ? "attach" :
426 "detach";
427 nvme_ns_attach_req_t *req;
428
429 if (npa->npa_ns == NULL) {
430 errx(-1, "%s cannot be used on controllers",
431 npa->npa_cmd->c_name);
432 }
433
434 if (npa->npa_argc > 0) {
435 errx(-1, "%s passed extraneous arguments starting with %s",
436 npa->npa_cmd->c_name, npa->npa_argv[0]);
437 }
438
439 if (!nvme_ns_attach_req_init_by_sel(npa->npa_ctrl, sel, &req)) {
440 nvmeadm_fatal(npa, "failed to initialize controller "
441 "%s request for %s", desc, npa->npa_name);
442 }
443
444 const uint32_t nsid = nvme_ns_info_nsid(npa->npa_ns_info);
445 if (!nvme_ns_attach_req_set_nsid(req, nsid)) {
446 nvmeadm_fatal(npa, "failed to set namespace to %s to %u",
447 desc, nsid);
448 }
449
450 if (!nvme_ns_attach_req_set_ctrlid_self(req)) {
451 nvmeadm_fatal(npa, "failed to set controller to %s for %s",
452 desc, npa->npa_name);
453 }
454
455 if (!nvme_ns_attach_req_exec(req)) {
456 nvmeadm_fatal(npa, "failed to execute controller %s request",
457 desc);
458 }
459
460 nvme_ns_attach_req_fini(req);
461 return (EXIT_SUCCESS);
462 }
463
464 int
do_attach_ns(const nvme_process_arg_t * npa)465 do_attach_ns(const nvme_process_arg_t *npa)
466 {
467 return (do_attach_ns_common(npa, NVME_NS_ATTACH_CTRL_ATTACH));
468 }
469
470 int
do_detach_ns(const nvme_process_arg_t * npa)471 do_detach_ns(const nvme_process_arg_t *npa)
472 {
473 return (do_attach_ns_common(npa, NVME_NS_ATTACH_CTRL_DETACH));
474 }
475