xref: /illumos-gate/usr/src/test/nvme-tests/tests/libnvme/format.c (revision f5f0964ce91892f7482efc86903b0ec7c7b6ba66)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2025 Oxide Computer Company
14  */
15 
16 /*
17  * Test basic features around formatting namespaces and secure erase. In
18  * particular we want to make sure that we can do the following:
19  *
20  *  - Format a single namespace
21  *  - Broadcast format all active namespaces
22  *  - Secure erase (whether broadcast or serially)
23  *
24  * We create two 1 GiB namespaces that we use for this. The namespace size
25  * hopefully keeps format and secure erase timing reasonable. We end up writing
26  * a message to sector 0 of each namespace to try to verify data was actually
27  * erased.
28  *
29  * This test starts from the device-empty profile so we can control the size and
30  * space of namespaces.
31  */
32 
33 #include <err.h>
34 #include <stdlib.h>
35 #include <stdio.h>
36 #include <string.h>
37 #include <libdevinfo.h>
38 #include <fcntl.h>
39 #include <unistd.h>
40 
41 #include "libnvme_test_common.h"
42 
43 #define	NSID_BASE	1
44 #define	NNSIDS		2
45 
46 /*
47  * Because we use the raw block device, our size needs to be a multiple of both
48  * of the sector sizes we use (4k, 512), which gives us a 4k buffer.
49  */
50 #define	FMT_BUF_SIZE	4096
51 
52 static const char *format_msgs[NNSIDS] = {
53 	"Those Who Fight Further",
54 	"Those Who Deny the Dawn"
55 };
56 
57 /*
58  * Check the results of a format / erase namespace and verify that our old data
59  * is gone. Write new data.
60  */
61 static bool
format_io_verify(int fd,uint32_t nsid)62 format_io_verify(int fd, uint32_t nsid)
63 {
64 	uint8_t buf[FMT_BUF_SIZE];
65 	const char *msg = format_msgs[nsid - 1];
66 	size_t msglen = strlen(msg) + 1;
67 
68 	if (pread(fd, buf, sizeof (buf), 0) != sizeof (buf)) {
69 		warn("TEST FAILED: failed to read from nsid %u", nsid);
70 		return (false);
71 	}
72 
73 	/*
74 	 * These contents should never match our message.
75 	 */
76 	if (memcmp(buf, msg, msglen) != 0) {
77 		(void) printf("TEST PASSED: namespace %u data successfully "
78 		    "cleared\n", nsid);
79 	} else {
80 		warnx("TEST FAILED: nsid %u data was not successfully erased",
81 		    nsid);
82 		(void) printf("Unexpected data: found\n");
83 		for (size_t i = 0; i < msglen; i++) {
84 			(void) printf("buf[%u]: %02x\n", i, buf[i]);
85 		}
86 
87 		return (false);
88 	}
89 
90 	(void) memset(buf, 0x77, sizeof (buf));
91 	(void) memcpy(buf, msg, msglen);
92 	if (pwrite(fd, buf, sizeof (buf), 0) != sizeof (buf)) {
93 		warnx("TEST FAILED: failed to write updated buffer to nsid "
94 		    "%u", nsid);
95 		return (false);
96 	}
97 
98 	if (fsync(fd) != 0) {
99 		warn("TEST FAILED: failed to synchronize raw device write "
100 		    "to ns %u", nsid);
101 	}
102 
103 	(void) memset(buf, 0, sizeof (buf));
104 	if (pread(fd, buf, sizeof (buf), 0) != sizeof (buf)) {
105 		warnx("TEST FAILED: failed to read back data written to %u",
106 		    nsid);
107 		return (false);
108 	}
109 
110 	if (memcmp(buf, msg, msglen) != 0) {
111 		warnx("TEST FAILED: did not get back data written to nsid %u",
112 		    nsid);
113 		(void) printf("Mismatched data: found/expected\n");
114 		for (size_t i = 0; i < msglen; i++) {
115 			(void) printf("buf[%u]: %02x/%02x\n", i, buf[i],
116 			    msg[i]);
117 		}
118 		return (false);
119 	}
120 
121 	(void) printf("TEST PASSED: successfully wrote message to nsid %u\n",
122 	    nsid);
123 	return (true);
124 }
125 
126 /*
127  * Used after another namespace has been formatted to check that the other is
128  * still okay and its data hasn't been overwritten.
129  */
130 static bool
format_io_check(int fd,uint32_t nsid)131 format_io_check(int fd, uint32_t nsid)
132 {
133 	uint8_t buf[FMT_BUF_SIZE];
134 	const char *msg = format_msgs[nsid - 1];
135 	size_t msglen = strlen(msg) + 1;
136 
137 	if (pread(fd, buf, sizeof (buf), 0) != sizeof (buf)) {
138 		warnx("TEST FAILED: failed to read back data on nsid %u",
139 		    nsid);
140 		return (false);
141 	}
142 
143 	if (memcmp(buf, msg, msglen) != 0) {
144 		warnx("TEST FAILED: data on nsid %u changed after format of "
145 		    "other namespace", nsid);
146 		(void) printf("Mismatched data: found/expected\n");
147 		for (size_t i = 0; i < msglen; i++) {
148 			(void) printf("buf[%u]: %02x/%02x\n", i, buf[i],
149 			    msg[i]);
150 		}
151 		return (false);
152 	}
153 
154 	(void) printf("TEST PASSED: verified prior message on nsid %u\n",
155 	    nsid);
156 	return (true);
157 }
158 
159 /*
160  * Get the file descripto that corresponds to the raw whole disk device which is
161  * generally s2 or 'c,raw'.
162  */
163 static int
format_blkdev_fd(const char * bd_addr)164 format_blkdev_fd(const char *bd_addr)
165 {
166 	int fd = -1;
167 	di_node_t root;
168 
169 	root = di_init("/", DINFOCPYALL);
170 	if (root == DI_NODE_NIL) {
171 		warnx("failed to take devinfo snapshot");
172 		return (-1);
173 	}
174 
175 	for (di_node_t n = di_drv_first_node("blkdev", root); n != DI_NODE_NIL;
176 	    n = di_drv_next_node(n)) {
177 		char *devfs, path[PATH_MAX];
178 		const char *addr = di_bus_addr(n);
179 
180 		if (addr == NULL) {
181 			continue;
182 		}
183 
184 		if (strcmp(bd_addr, addr) != 0)
185 			continue;
186 
187 		devfs = di_devfs_path(n);
188 		if (devfs == NULL) {
189 			warn("failed to get devfs path for blkdev %s", bd_addr);
190 			goto out;
191 		}
192 
193 		if (snprintf(path, sizeof (path), "/devices/%s:c,raw", devfs) >=
194 		    sizeof (path)) {
195 			di_devfs_path_free(devfs);
196 			warnx("Construction of blkdev %s minor path exceeded "
197 			    "internal buffer", bd_addr);
198 			goto out;
199 		}
200 
201 		/*
202 		 * We need to use O_NDELAY here to convince the system that it's
203 		 * okay that there isn't valid CMLB information yet, which is
204 		 * fine because we're trashing this device.
205 		 */
206 		di_devfs_path_free(devfs);
207 		fd = open(path, O_RDWR | O_NDELAY);
208 		if (fd < 0) {
209 			warn("failed to open %s", path);
210 			goto out;
211 		}
212 	}
213 
214 	if (fd == -1) {
215 		warnx("failed to find di_node_t that matches %s", bd_addr);
216 	}
217 
218 out:
219 	di_fini(root);
220 	return (fd);
221 }
222 
223 /*
224  * Leave a message in sector 0 of each device that we can later verify is there
225  * or not.
226  */
227 static bool
format_nsid_io(nvme_ctrl_t * ctrl,uint32_t nsid,bool (* cb)(int,uint32_t))228 format_nsid_io(nvme_ctrl_t *ctrl, uint32_t nsid, bool (*cb)(int, uint32_t))
229 {
230 	int fd;
231 	nvme_ns_info_t *ns = NULL;
232 	const char *bd_addr;
233 	bool ret = false;
234 
235 	if (!libnvme_test_setup_ns(ctrl, NVME_NS_DISC_F_BLKDEV, nsid,
236 	    UINT32_MAX)) {
237 		libnvme_test_ctrl_warn(ctrl, "failed to attach blkdev to "
238 		    "nsid %u", nsid);
239 		return (false);
240 	}
241 
242 	if (!nvme_ctrl_ns_info_snap(ctrl, nsid, &ns)) {
243 		libnvme_test_ctrl_warn(ctrl, "failed to take namespace %u "
244 		    "info snapshot", nsid);
245 		goto out;
246 	}
247 
248 	if (!nvme_ns_info_bd_addr(ns, &bd_addr)) {
249 		libnvme_test_ctrl_warn(ctrl, "failed to get blkdev address "
250 		    "for namespace %u", nsid);
251 		goto out;
252 	}
253 
254 	if ((fd = format_blkdev_fd(bd_addr)) < 0) {
255 		warnx("TEST FAILED: failed to acquire blkdev fd for nsid %u "
256 		    "to write data", nsid);
257 		goto out;
258 	}
259 
260 	ret = cb(fd, nsid);
261 	VERIFY0(close(fd));
262 
263 out:
264 	if (!libnvme_test_setup_ns(ctrl, NVME_NS_DISC_F_NOT_IGNORED, nsid,
265 	    UINT32_MAX)) {
266 		libnvme_test_ctrl_warn(ctrl, "failed to detach blkdev from "
267 		    "nsid %u", nsid);
268 		ret = false;
269 	}
270 
271 	nvme_ns_info_free(ns);
272 	return (ret);
273 }
274 
275 /*
276  * Verify that a given namespace has the expected LBA format.
277  */
278 static bool
format_check_lbaf(nvme_ctrl_t * ctrl,uint32_t nsid,uint32_t lbaf)279 format_check_lbaf(nvme_ctrl_t *ctrl, uint32_t nsid, uint32_t lbaf)
280 {
281 	bool ret = false;
282 	nvme_ns_info_t *info;
283 	const nvme_nvm_lba_fmt_t *lba;
284 
285 	if (!nvme_ctrl_ns_info_snap(ctrl, nsid, &info)) {
286 		libnvme_test_ctrl_warn(ctrl, "failed to take namespace %u "
287 		    "info snapshot", nsid);
288 		return (false);
289 	}
290 
291 	if (!nvme_ns_info_curformat(info, &lba)) {
292 		libnvme_test_ctrl_warn(ctrl, "failed to get namespace %u "
293 		    "current lba format", nsid);
294 		goto out;
295 	}
296 
297 	if (nvme_nvm_lba_fmt_id(lba) == lbaf) {
298 		(void) printf("TEST PASSED: Succesfully formatted namespace %u "
299 		    "to format %u (0x%" PRIx64 ")\n", nsid, lbaf,
300 		    nvme_nvm_lba_fmt_data_size(lba));
301 		ret = true;
302 	} else {
303 		warnx("TEST FAILED: Formatted namespace %u ended up with LBA "
304 		    "format %u (0x%" PRIx64 " bytes), not %u", nsid,
305 		    nvme_nvm_lba_fmt_id(lba), nvme_nvm_lba_fmt_data_size(lba),
306 		    lbaf);
307 	}
308 
309 out:
310 	nvme_ns_info_free(info);
311 	return (ret);
312 }
313 
314 static bool
format_ns(nvme_ctrl_t * ctrl,uint32_t nsid,uint32_t ses,uint32_t lbaf)315 format_ns(nvme_ctrl_t *ctrl, uint32_t nsid, uint32_t ses, uint32_t lbaf)
316 {
317 	bool ret = true;
318 	nvme_format_req_t *req;
319 
320 	if (!nvme_format_req_init(ctrl, &req)) {
321 		libnvme_test_ctrl_warn(ctrl, "failed to initialize format "
322 		    "request");
323 		ret = false;
324 		goto done;
325 	}
326 
327 	if (!nvme_format_req_set_lbaf(req, lbaf)) {
328 		libnvme_test_ctrl_warn(ctrl, "failed to set format lbaf to "
329 		    "0x%x", lbaf);
330 		ret = false;
331 		goto done;
332 	}
333 
334 	if (!nvme_format_req_set_ses(req, ses)) {
335 		libnvme_test_ctrl_warn(ctrl, "failed to set format ses to 0x%x",
336 		    ses);
337 		ret = false;
338 		goto done;
339 	}
340 
341 	if (!nvme_format_req_set_nsid(req, nsid)) {
342 		libnvme_test_ctrl_warn(ctrl, "failed to set format nsid to "
343 		    "0x%x", nsid);
344 		ret = false;
345 		goto done;
346 	}
347 
348 	if (!nvme_format_req_exec(req)) {
349 		libnvme_test_ctrl_warn(ctrl, "failed to execute format "
350 		    "namespace for nsid %u", nsid);
351 		ret = false;
352 		goto done;
353 	}
354 
355 	for (uint32_t i = 0; i < NNSIDS; i++) {
356 		if (nsid == NVME_NSID_BCAST || nsid == (NSID_BASE + i)) {
357 			if (!format_check_lbaf(ctrl, NSID_BASE + i, lbaf)) {
358 				ret = false;
359 			}
360 		}
361 	}
362 
363 done:
364 	nvme_format_req_fini(req);
365 	return (ret);
366 }
367 
368 int
main(void)369 main(void)
370 {
371 	int ret = EXIT_SUCCESS;
372 	nvme_t *nvme;
373 	nvme_ctrl_t *ctrl;
374 	nvme_ctrl_info_t *info;
375 	uint32_t lbaf_4k, lbaf_512, ses;
376 	const nvme_identify_ctrl_t *id;
377 
378 	libnvme_test_init(&nvme, &ctrl);
379 	if (!nvme_ctrl_lock(ctrl, NVME_LOCK_L_WRITE, NVME_LOCK_F_DONT_BLOCK)) {
380 		libnvme_test_ctrl_fatal(ctrl, "failed to obtain write lock");
381 	}
382 
383 	if (!nvme_ctrl_info_snap(ctrl, &info)) {
384 		libnvme_test_ctrl_fatal(ctrl, "failed to get info snapshot");
385 	}
386 
387 	if (!libnvme_test_lbaf(info, 4096, &lbaf_4k)) {
388 		errx(EXIT_FAILURE, "failed to find 4K LBA format, cannot "
389 		    "continue");
390 	}
391 
392 	if (!libnvme_test_lbaf(info, 512, &lbaf_512)) {
393 		errx(EXIT_FAILURE, "failed to find 512 byte LBA format, cannot "
394 		    "continue");
395 	}
396 	(void) printf("LBA indexes: 512/4k %u/%u\n", lbaf_512, lbaf_4k);
397 
398 	/*
399 	 * Start by creating a single 512 byte namespace. We only create a
400 	 * single one for now because we expect that many devices don't like
401 	 * having namespaces with different LBA formats despite indicating in
402 	 * the format NVM attributes that namespaces are independent.
403 	 */
404 	if (!libnvme_test_setup_ns(ctrl, NVME_NS_DISC_F_ACTIVE, 1, lbaf_512)) {
405 		libnvme_test_ctrl_fatal(ctrl, "failed to create initial "
406 		    "namespaces");
407 	}
408 
409 	/*
410 	 * Because the namespaces was created as part of this (we assume we
411 	 * started from the empty device-reset profile), nothing should be here.
412 	 */
413 	if (!format_nsid_io(ctrl, 1, format_io_verify))
414 		ret = EXIT_FAILURE;
415 
416 	/*
417 	 * Format it to itself and make sure that data is gone.
418 	 */
419 	if (!format_ns(ctrl, 1, NVME_FRMT_SES_NONE, lbaf_512))
420 		ret = EXIT_FAILURE;
421 
422 	if (!format_nsid_io(ctrl, 1, format_io_verify))
423 		ret = EXIT_FAILURE;
424 
425 	/*
426 	 * Transform it to 4K now.
427 	 */
428 	if (!format_ns(ctrl, 1, NVME_FRMT_SES_NONE, lbaf_4k))
429 		ret = EXIT_FAILURE;
430 	if (!format_nsid_io(ctrl, 1, format_io_verify))
431 		ret = EXIT_FAILURE;
432 
433 	/*
434 	 * Now create a second namespace. At this point we are constrained to
435 	 * 4k.
436 	 */
437 	if (!libnvme_test_setup_ns(ctrl, NVME_NS_DISC_F_ACTIVE, 2, lbaf_4k)) {
438 		libnvme_test_ctrl_fatal(ctrl, "failed to create second "
439 		    "namespace");
440 	}
441 
442 	if (!format_nsid_io(ctrl, 2, format_io_verify))
443 		ret = EXIT_FAILURE;
444 	if (!format_nsid_io(ctrl, 1, format_io_check))
445 		ret = EXIT_FAILURE;
446 
447 	/*
448 	 * Now reformat 1 and make sure its data is gone and 2's data is intact.
449 	 */
450 	if (!format_ns(ctrl, 2, NVME_FRMT_SES_NONE, lbaf_4k))
451 		ret = EXIT_FAILURE;
452 	if (!format_nsid_io(ctrl, 1, format_io_check))
453 		ret = EXIT_FAILURE;
454 	if (!format_nsid_io(ctrl, 2, format_io_verify))
455 		ret = EXIT_FAILURE;
456 
457 	/*
458 	 * Perform a broadcast format back to 512.
459 	 */
460 	if (!format_ns(ctrl, NVME_NSID_BCAST, NVME_FRMT_SES_NONE, lbaf_512))
461 		ret = EXIT_FAILURE;
462 
463 	if (!format_nsid_io(ctrl, 1, format_io_verify))
464 		ret = EXIT_FAILURE;
465 	if (!format_nsid_io(ctrl, 2, format_io_verify))
466 		ret = EXIT_FAILURE;
467 
468 	/*
469 	 * All devices that support format in theory support secure-erase. Check
470 	 * to see if they support cryptographic secure erase as that should
471 	 * speed things up due to per-key usage. Secure erase may only work
472 	 * globally or operate per-namespace. Regardless of this, we assume that
473 	 * if we're changing the format, that has to be done globally.
474 	 */
475 	id = nvme_ctrl_info_identify(info);
476 	if (id->id_fna.fn_crypt_erase != 0) {
477 		ses = NVME_FRMT_SES_CRYPTO;
478 	} else {
479 		ses = NVME_FRMT_SES_USER;
480 	}
481 
482 	if (!format_ns(ctrl, NVME_NSID_BCAST, ses, lbaf_4k))
483 		ret = EXIT_FAILURE;
484 	if (!format_nsid_io(ctrl, 1, format_io_verify))
485 		ret = EXIT_FAILURE;
486 	if (!format_nsid_io(ctrl, 2, format_io_verify))
487 		ret = EXIT_FAILURE;
488 
489 	if (id->id_fna.fn_sec_erase == 0) {
490 		if (!format_ns(ctrl, 1, ses, lbaf_4k))
491 			ret = EXIT_FAILURE;
492 		if (!format_nsid_io(ctrl, 2, format_io_check))
493 			ret = EXIT_FAILURE;
494 		if (!format_nsid_io(ctrl, 1, format_io_verify))
495 			ret = EXIT_FAILURE;
496 
497 		if (!format_ns(ctrl, 2, ses, lbaf_4k))
498 			ret = EXIT_FAILURE;
499 		if (!format_nsid_io(ctrl, 2, format_io_verify))
500 			ret = EXIT_FAILURE;
501 		if (!format_nsid_io(ctrl, 1, format_io_check))
502 			ret = EXIT_FAILURE;
503 	}
504 
505 	nvme_ctrl_info_free(info);
506 	nvme_ctrl_unlock(ctrl);
507 	nvme_ctrl_fini(ctrl);
508 	nvme_fini(nvme);
509 
510 	if (ret == EXIT_SUCCESS) {
511 		(void) printf("All tests passed successfully\n");
512 	}
513 
514 	return (ret);
515 }
516