xref: /illumos-gate/usr/src/cmd/nvmeadm/nvmeadm_wdc.c (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2024 Oxide Computer Company
14  */
15 
16 /*
17  * WDC vendor-specific commands
18  */
19 
20 #include <err.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <sys/sysmacros.h>
26 #include <stdbool.h>
27 #include <endian.h>
28 #include <sys/nvme/wdc.h>
29 
30 #include "nvmeadm.h"
31 
32 /*
33  * This is the default chunk size that we'll read the e6 log in. This generally
34  * should fit within the maximum transfer size for a device. If we wanted to
35  * improve this, we could expose what the kernel's maximum transfer size is for
36  * a device and then use that as a larger upper bound. Currently the value is 64
37  * KiB.
38  */
39 #define	E6_BUFSIZE	0x10000
40 
41 typedef struct nvmeadm_e6_dump {
42 	const char *e6_output;
43 } nvmeadm_e6_dump_t;
44 
45 typedef struct nvmeadm_wdc_resize {
46 	bool wr_query;
47 	uint32_t wr_set;
48 } nvmeadm_wdc_resize_t;
49 
50 void
51 usage_wdc_e6dump(const char *c_name)
52 {
53 	(void) fprintf(stderr, "%s -o output <ctl>\n\n"
54 	    "  Dump WDC e6 diagnostic log from a device.\n", c_name);
55 }
56 
57 void
58 optparse_wdc_e6dump(nvme_process_arg_t *npa)
59 {
60 	int c;
61 	nvmeadm_e6_dump_t *e6;
62 
63 	if ((e6 = calloc(1, sizeof (nvmeadm_e6_dump_t))) == NULL) {
64 		err(-1, "failed to allocate memory for e6 options structure");
65 	}
66 
67 	while ((c = getopt(npa->npa_argc, npa->npa_argv, ":o:")) != -1) {
68 		switch (c) {
69 		case 'o':
70 			e6->e6_output = optarg;
71 			break;
72 		case '?':
73 			errx(-1, "unknown option: -%c", optopt);
74 		case ':':
75 			errx(-1, "option -%c requires an argument", optopt);
76 		}
77 	}
78 
79 	if (e6->e6_output == NULL) {
80 		errx(-1, "missing required e6dump output file, specify with "
81 		    "-o");
82 	}
83 
84 	npa->npa_cmd_arg = e6;
85 }
86 
87 static void
88 wdc_e6_read(const nvme_process_arg_t *npa, nvme_wdc_e6_req_t *req,
89     uint64_t off, void *buf, size_t len)
90 {
91 	if (!nvme_wdc_e6_req_set_offset(req, off)) {
92 		nvmeadm_fatal(npa, "failed to set e6 request offset to 0x%"
93 		    PRIx64, off);
94 	}
95 
96 	if (!nvme_wdc_e6_req_set_output(req, buf, len)) {
97 		nvmeadm_fatal(npa, "failed to set e6 request output buffer");
98 	}
99 
100 	if (!nvme_wdc_e6_req_exec(req)) {
101 		nvmeadm_fatal(npa, "failed to issue e6 request for %zu bytes "
102 		    "at offset 0x%" PRIx64, len, off);
103 	}
104 }
105 
106 /*
107  * Write out e6 data to a file. Because our read from the device has already
108  * been constrained by size, we don't bother further chunking up the write out
109  * to a file.
110  */
111 static void
112 wdc_e6_write(int fd, const void *buf, size_t len)
113 {
114 	size_t off = 0;
115 
116 	while (len > 0) {
117 		void *boff = (void *)((uintptr_t)buf + off);
118 		ssize_t ret = write(fd, boff, len);
119 		if (ret < 0) {
120 			/*
121 			 * We explicitly allow a signal that interrupts us to
122 			 * lead to a failure assuming someone has more likely
123 			 * than not issued a SIGINT or similar.
124 			 */
125 			err(-1, "failed to write e6 data to output file");
126 		}
127 
128 		len -= (size_t)ret;
129 		off += (size_t)ret;
130 	}
131 }
132 
133 int
134 do_wdc_e6dump(const nvme_process_arg_t *npa)
135 {
136 	int ofd;
137 	nvmeadm_e6_dump_t *e6 = npa->npa_cmd_arg;
138 	nvme_vuc_disc_t *vuc;
139 	void *buf;
140 	nvme_wdc_e6_req_t *req;
141 	const wdc_e6_header_t *header;
142 	uint64_t len, off;
143 
144 	vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
145 
146 	ofd = open(e6->e6_output, O_RDWR | O_CREAT | O_TRUNC, 0644);
147 	if (ofd < 0) {
148 		err(-1, "failed to open file %s", e6->e6_output);
149 	}
150 
151 	if ((buf = calloc(1, E6_BUFSIZE)) == NULL) {
152 		err(-1, "failed to allocate 0x%x bytes for E6 transfer buffer",
153 		    E6_BUFSIZE);
154 	}
155 
156 	if (!nvme_wdc_e6_req_init(npa->npa_ctrl, &req)) {
157 		nvmeadm_fatal(npa, "failed to initialize e6 request");
158 	}
159 
160 	/*
161 	 * Begin by reading the header to determine the actual size. Note, as
162 	 * far as we can tell, the size of the header is included in the size we
163 	 * get.
164 	 */
165 	wdc_e6_read(npa, req, 0, buf, sizeof (wdc_e6_header_t));
166 	header = buf;
167 	len = be32toh(header->e6_size_be);
168 
169 	if (len == UINT32_MAX) {
170 		errx(-1, "e6 header size 0x%" PRIx64 " looks like an invalid "
171 		    "PCI read, aborting", len);
172 	}
173 
174 	if ((len % 4) != 0) {
175 		warnx("e6 header size 0x%zx is not 4 byte aligned, but "
176 		    "firmware claims it always will be, rounding up", len);
177 		len = P2ROUNDUP(len, 4);
178 	}
179 
180 	if (len < sizeof (wdc_e6_header_t)) {
181 		errx(-1, "e6 header size is too small, 0x%zx bytes does not "
182 		    "even cover the header", len);
183 	}
184 	wdc_e6_write(ofd, buf, sizeof (wdc_e6_header_t));
185 
186 	/*
187 	 * Account for the fact that we already read the header.
188 	 */
189 	off = sizeof (wdc_e6_header_t);
190 	len -= off;
191 	while (len > 0) {
192 		uint32_t toread = MIN(len, E6_BUFSIZE);
193 		wdc_e6_read(npa, req, off, buf, toread);
194 		wdc_e6_write(ofd, buf, toread);
195 
196 		off += toread;
197 		len -= toread;
198 	}
199 
200 	nvme_wdc_e6_req_fini(req);
201 	VERIFY0(close(ofd));
202 	nvmeadm_vuc_fini(npa, vuc);
203 
204 	return (0);
205 }
206 
207 void
208 usage_wdc_resize(const char *c_name)
209 {
210 	(void) fprintf(stderr, "%s -s size | -g <ctl>\n\n"
211 	    "  Resize a device to a new overall capacity in GB (not GiB) or "
212 	    "get its\n  current size. Resizing will cause all data and "
213 	    "namespaces to be lost.\n",
214 	    c_name);
215 }
216 
217 void
218 optparse_wdc_resize(nvme_process_arg_t *npa)
219 {
220 	int c;
221 	nvmeadm_wdc_resize_t *resize;
222 
223 	if ((resize = calloc(1, sizeof (nvmeadm_wdc_resize_t))) == NULL) {
224 		err(-1, "failed to allocate memory for resize options "
225 		    "structure");
226 	}
227 
228 	while ((c = getopt(npa->npa_argc, npa->npa_argv, ":gs:")) != -1) {
229 		const char *err;
230 
231 		switch (c) {
232 		case 'g':
233 			resize->wr_query = true;
234 			break;
235 		case 's':
236 			/*
237 			 * The size to set is in GB (not GiB). While WDC
238 			 * recommends specific size points depending on the
239 			 * drives initial capacity, we allow the user to set
240 			 * what they expect and will allow the command to
241 			 * succeed or fail as per the controller's whims. It
242 			 * would be better if we looked at the device and
243 			 * determined its underlying capacity and figured out
244 			 * what points made sense, but it's not clear on the
245 			 * best way to do that across a few different
246 			 * generations of WDC products.
247 			 */
248 			resize->wr_set = (uint32_t)strtonumx(optarg, 1,
249 			    UINT16_MAX, &err, 0);
250 			if (err != NULL) {
251 				errx(-1, "failed to parse resize size %s:"
252 				    "value is %s", optarg, err);
253 			}
254 			break;
255 		case '?':
256 			errx(-1, "unknown option: -%c", optopt);
257 		case ':':
258 			errx(-1, "option -%c requires an argument", optopt);
259 		}
260 	}
261 
262 	if (resize->wr_query && resize->wr_set != 0) {
263 		errx(-1, "only one of -g and -s may be specified");
264 	}
265 
266 	if (!resize->wr_query && resize->wr_set == 0) {
267 		errx(-1, "one of -g and -s must be specified");
268 	}
269 
270 	npa->npa_cmd_arg = resize;
271 }
272 
273 int
274 do_wdc_resize(const nvme_process_arg_t *npa)
275 {
276 	nvmeadm_wdc_resize_t *resize = npa->npa_cmd_arg;
277 	nvme_vuc_disc_t *vuc;
278 
279 	vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
280 
281 	/*
282 	 * The VUC for this generally recommends exclusive access. If this
283 	 * becomes problematic for folks issuing this query, then we should
284 	 * break the query into a separate VUC entry that we should discover
285 	 * instead.
286 	 */
287 	if (resize->wr_query) {
288 		uint32_t val;
289 
290 		if (!nvme_wdc_resize_get(npa->npa_ctrl, &val)) {
291 			nvmeadm_fatal(npa, "failed to query current WDC "
292 			    "device capacity");
293 		}
294 
295 		(void) printf("%u\n", val);
296 		nvmeadm_vuc_fini(npa, vuc);
297 		return (0);
298 	}
299 
300 	if (!nvme_wdc_resize_set(npa->npa_ctrl, resize->wr_set)) {
301 		nvmeadm_fatal(npa, "failed to resize device to %u",
302 		    resize->wr_set);
303 	}
304 
305 	(void) printf("%s resized to %u GB\n", npa->npa_name, resize->wr_set);
306 	nvmeadm_vuc_fini(npa, vuc);
307 
308 	return (0);
309 }
310 
311 int
312 do_wdc_inject_assert(const nvme_process_arg_t *npa)
313 {
314 	nvme_vuc_disc_t *vuc;
315 
316 	if (npa->npa_argc > 0) {
317 		errx(-1, "%s passed extraneous arguments starting with %s",
318 		    npa->npa_cmd->c_name, npa->npa_argv[0]);
319 	}
320 
321 	vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
322 
323 	if (!nvme_wdc_assert_inject(npa->npa_ctrl)) {
324 		nvmeadm_fatal(npa, "failed to inject assertion");
325 	}
326 
327 	nvmeadm_vuc_fini(npa, vuc);
328 	return (0);
329 }
330 
331 int
332 do_wdc_clear_assert(const nvme_process_arg_t *npa)
333 {
334 	nvme_vuc_disc_t *vuc;
335 
336 	if (npa->npa_argc > 0) {
337 		errx(-1, "%s passed extraneous arguments starting with %s",
338 		    npa->npa_cmd->c_name, npa->npa_argv[0]);
339 	}
340 
341 	vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
342 
343 	if (!nvme_wdc_assert_clear(npa->npa_ctrl)) {
344 		nvmeadm_fatal(npa, "failed to clear assertion");
345 	}
346 
347 	nvmeadm_vuc_fini(npa, vuc);
348 	return (0);
349 }
350 
351 void
352 usage_wdc_clear_assert(const char *c_name)
353 {
354 	(void) fprintf(stderr, "%s <ctl>\n\n"
355 	    "  Clear an internal device assertion.\n", c_name);
356 }
357 
358 void
359 usage_wdc_inject_assert(const char *c_name)
360 {
361 	(void) fprintf(stderr, "%s <ctl>\n\n"
362 	    "  Inject a device assertion. This will cause the device to "
363 	    "pause\n  execution of commands and create an internal fault. This "
364 	    "should\n  not be used unless directed as part of a "
365 	    "troubleshooting exercise.\n  If in doubt, do not use this!\n",
366 	    c_name);
367 }
368