1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2024 Oxide Computer Company
14 */
15
16 /*
17 * WDC vendor-specific commands
18 */
19
20 #include <err.h>
21 #include <sys/types.h>
22 #include <sys/stat.h>
23 #include <fcntl.h>
24 #include <unistd.h>
25 #include <sys/sysmacros.h>
26 #include <stdbool.h>
27 #include <endian.h>
28 #include <sys/nvme/wdc.h>
29
30 #include "nvmeadm.h"
31
32 /*
33 * This is the default chunk size that we'll read the e6 log in. This generally
34 * should fit within the maximum transfer size for a device. If we wanted to
35 * improve this, we could expose what the kernel's maximum transfer size is for
36 * a device and then use that as a larger upper bound. Currently the value is 64
37 * KiB.
38 */
39 #define E6_BUFSIZE 0x10000
40
41 typedef struct nvmeadm_e6_dump {
42 const char *e6_output;
43 } nvmeadm_e6_dump_t;
44
45 typedef struct nvmeadm_wdc_resize {
46 bool wr_query;
47 uint32_t wr_set;
48 } nvmeadm_wdc_resize_t;
49
50 void
usage_wdc_e6dump(const char * c_name)51 usage_wdc_e6dump(const char *c_name)
52 {
53 (void) fprintf(stderr, "%s -o output <ctl>\n\n"
54 " Dump WDC e6 diagnostic log from a device.\n", c_name);
55 }
56
57 void
optparse_wdc_e6dump(nvme_process_arg_t * npa)58 optparse_wdc_e6dump(nvme_process_arg_t *npa)
59 {
60 int c;
61 nvmeadm_e6_dump_t *e6;
62
63 if ((e6 = calloc(1, sizeof (nvmeadm_e6_dump_t))) == NULL) {
64 err(-1, "failed to allocate memory for e6 options structure");
65 }
66
67 while ((c = getopt(npa->npa_argc, npa->npa_argv, ":o:")) != -1) {
68 switch (c) {
69 case 'o':
70 e6->e6_output = optarg;
71 break;
72 case '?':
73 errx(-1, "unknown option: -%c", optopt);
74 case ':':
75 errx(-1, "option -%c requires an argument", optopt);
76 }
77 }
78
79 if (e6->e6_output == NULL) {
80 errx(-1, "missing required e6dump output file, specify with "
81 "-o");
82 }
83
84 npa->npa_cmd_arg = e6;
85 }
86
87 static void
wdc_e6_read(const nvme_process_arg_t * npa,nvme_wdc_e6_req_t * req,uint64_t off,void * buf,size_t len)88 wdc_e6_read(const nvme_process_arg_t *npa, nvme_wdc_e6_req_t *req,
89 uint64_t off, void *buf, size_t len)
90 {
91 if (!nvme_wdc_e6_req_set_offset(req, off)) {
92 nvmeadm_fatal(npa, "failed to set e6 request offset to 0x%"
93 PRIx64, off);
94 }
95
96 if (!nvme_wdc_e6_req_set_output(req, buf, len)) {
97 nvmeadm_fatal(npa, "failed to set e6 request output buffer");
98 }
99
100 if (!nvme_wdc_e6_req_exec(req)) {
101 nvmeadm_fatal(npa, "failed to issue e6 request for %zu bytes "
102 "at offset 0x%" PRIx64, len, off);
103 }
104 }
105
106 /*
107 * Write out e6 data to a file. Because our read from the device has already
108 * been constrained by size, we don't bother further chunking up the write out
109 * to a file.
110 */
111 static void
wdc_e6_write(int fd,const void * buf,size_t len)112 wdc_e6_write(int fd, const void *buf, size_t len)
113 {
114 size_t off = 0;
115
116 while (len > 0) {
117 void *boff = (void *)((uintptr_t)buf + off);
118 ssize_t ret = write(fd, boff, len);
119 if (ret < 0) {
120 /*
121 * We explicitly allow a signal that interrupts us to
122 * lead to a failure assuming someone has more likely
123 * than not issued a SIGINT or similar.
124 */
125 err(-1, "failed to write e6 data to output file");
126 }
127
128 len -= (size_t)ret;
129 off += (size_t)ret;
130 }
131 }
132
133 int
do_wdc_e6dump(const nvme_process_arg_t * npa)134 do_wdc_e6dump(const nvme_process_arg_t *npa)
135 {
136 int ofd;
137 nvmeadm_e6_dump_t *e6 = npa->npa_cmd_arg;
138 nvme_vuc_disc_t *vuc;
139 void *buf;
140 nvme_wdc_e6_req_t *req;
141 const wdc_e6_header_t *header;
142 uint64_t len, off;
143
144 vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
145
146 ofd = open(e6->e6_output, O_RDWR | O_CREAT | O_TRUNC, 0644);
147 if (ofd < 0) {
148 err(-1, "failed to open file %s", e6->e6_output);
149 }
150
151 if ((buf = calloc(1, E6_BUFSIZE)) == NULL) {
152 err(-1, "failed to allocate 0x%x bytes for E6 transfer buffer",
153 E6_BUFSIZE);
154 }
155
156 if (!nvme_wdc_e6_req_init(npa->npa_ctrl, &req)) {
157 nvmeadm_fatal(npa, "failed to initialize e6 request");
158 }
159
160 /*
161 * Begin by reading the header to determine the actual size. Note, as
162 * far as we can tell, the size of the header is included in the size we
163 * get.
164 */
165 wdc_e6_read(npa, req, 0, buf, sizeof (wdc_e6_header_t));
166 header = buf;
167 len = be32toh(header->e6_size_be);
168
169 if (len == UINT32_MAX) {
170 errx(-1, "e6 header size 0x%" PRIx64 " looks like an invalid "
171 "PCI read, aborting", len);
172 }
173
174 if ((len % 4) != 0) {
175 warnx("e6 header size 0x%zx is not 4 byte aligned, but "
176 "firmware claims it always will be, rounding up", len);
177 len = P2ROUNDUP(len, 4);
178 }
179
180 if (len < sizeof (wdc_e6_header_t)) {
181 errx(-1, "e6 header size is too small, 0x%zx bytes does not "
182 "even cover the header", len);
183 }
184 wdc_e6_write(ofd, buf, sizeof (wdc_e6_header_t));
185
186 /*
187 * Account for the fact that we already read the header.
188 */
189 off = sizeof (wdc_e6_header_t);
190 len -= off;
191 while (len > 0) {
192 uint32_t toread = MIN(len, E6_BUFSIZE);
193 wdc_e6_read(npa, req, off, buf, toread);
194 wdc_e6_write(ofd, buf, toread);
195
196 off += toread;
197 len -= toread;
198 }
199
200 nvme_wdc_e6_req_fini(req);
201 VERIFY0(close(ofd));
202 nvmeadm_vuc_fini(npa, vuc);
203
204 return (0);
205 }
206
207 void
usage_wdc_resize(const char * c_name)208 usage_wdc_resize(const char *c_name)
209 {
210 (void) fprintf(stderr, "%s -s size | -g <ctl>\n\n"
211 " Resize a device to a new overall capacity in GB (not GiB) or "
212 "get its\n current size. Resizing will cause all data and "
213 "namespaces to be lost.\n",
214 c_name);
215 }
216
217 void
optparse_wdc_resize(nvme_process_arg_t * npa)218 optparse_wdc_resize(nvme_process_arg_t *npa)
219 {
220 int c;
221 nvmeadm_wdc_resize_t *resize;
222
223 if ((resize = calloc(1, sizeof (nvmeadm_wdc_resize_t))) == NULL) {
224 err(-1, "failed to allocate memory for resize options "
225 "structure");
226 }
227
228 while ((c = getopt(npa->npa_argc, npa->npa_argv, ":gs:")) != -1) {
229 const char *err;
230
231 switch (c) {
232 case 'g':
233 resize->wr_query = true;
234 break;
235 case 's':
236 /*
237 * The size to set is in GB (not GiB). While WDC
238 * recommends specific size points depending on the
239 * drives initial capacity, we allow the user to set
240 * what they expect and will allow the command to
241 * succeed or fail as per the controller's whims. It
242 * would be better if we looked at the device and
243 * determined its underlying capacity and figured out
244 * what points made sense, but it's not clear on the
245 * best way to do that across a few different
246 * generations of WDC products.
247 */
248 resize->wr_set = (uint32_t)strtonumx(optarg, 1,
249 UINT16_MAX, &err, 0);
250 if (err != NULL) {
251 errx(-1, "failed to parse resize size %s:"
252 "value is %s", optarg, err);
253 }
254 break;
255 case '?':
256 errx(-1, "unknown option: -%c", optopt);
257 case ':':
258 errx(-1, "option -%c requires an argument", optopt);
259 }
260 }
261
262 if (resize->wr_query && resize->wr_set != 0) {
263 errx(-1, "only one of -g and -s may be specified");
264 }
265
266 if (!resize->wr_query && resize->wr_set == 0) {
267 errx(-1, "one of -g and -s must be specified");
268 }
269
270 npa->npa_cmd_arg = resize;
271 }
272
273 int
do_wdc_resize(const nvme_process_arg_t * npa)274 do_wdc_resize(const nvme_process_arg_t *npa)
275 {
276 nvmeadm_wdc_resize_t *resize = npa->npa_cmd_arg;
277 nvme_vuc_disc_t *vuc;
278
279 vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
280
281 /*
282 * The VUC for this generally recommends exclusive access. If this
283 * becomes problematic for folks issuing this query, then we should
284 * break the query into a separate VUC entry that we should discover
285 * instead.
286 */
287 if (resize->wr_query) {
288 uint32_t val;
289
290 if (!nvme_wdc_resize_get(npa->npa_ctrl, &val)) {
291 nvmeadm_fatal(npa, "failed to query current WDC "
292 "device capacity");
293 }
294
295 (void) printf("%u\n", val);
296 nvmeadm_vuc_fini(npa, vuc);
297 return (0);
298 }
299
300 if (!nvme_wdc_resize_set(npa->npa_ctrl, resize->wr_set)) {
301 nvmeadm_fatal(npa, "failed to resize device to %u",
302 resize->wr_set);
303 }
304
305 (void) printf("%s resized to %u GB\n", npa->npa_name, resize->wr_set);
306 nvmeadm_vuc_fini(npa, vuc);
307
308 return (0);
309 }
310
311 int
do_wdc_inject_assert(const nvme_process_arg_t * npa)312 do_wdc_inject_assert(const nvme_process_arg_t *npa)
313 {
314 nvme_vuc_disc_t *vuc;
315
316 if (npa->npa_argc > 0) {
317 errx(-1, "%s passed extraneous arguments starting with %s",
318 npa->npa_cmd->c_name, npa->npa_argv[0]);
319 }
320
321 vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
322
323 if (!nvme_wdc_assert_inject(npa->npa_ctrl)) {
324 nvmeadm_fatal(npa, "failed to inject assertion");
325 }
326
327 nvmeadm_vuc_fini(npa, vuc);
328 return (0);
329 }
330
331 int
do_wdc_clear_assert(const nvme_process_arg_t * npa)332 do_wdc_clear_assert(const nvme_process_arg_t *npa)
333 {
334 nvme_vuc_disc_t *vuc;
335
336 if (npa->npa_argc > 0) {
337 errx(-1, "%s passed extraneous arguments starting with %s",
338 npa->npa_cmd->c_name, npa->npa_argv[0]);
339 }
340
341 vuc = nvmeadm_vuc_init(npa, npa->npa_cmd->c_name);
342
343 if (!nvme_wdc_assert_clear(npa->npa_ctrl)) {
344 nvmeadm_fatal(npa, "failed to clear assertion");
345 }
346
347 nvmeadm_vuc_fini(npa, vuc);
348 return (0);
349 }
350
351 void
usage_wdc_clear_assert(const char * c_name)352 usage_wdc_clear_assert(const char *c_name)
353 {
354 (void) fprintf(stderr, "%s <ctl>\n\n"
355 " Clear an internal device assertion.\n", c_name);
356 }
357
358 void
usage_wdc_inject_assert(const char * c_name)359 usage_wdc_inject_assert(const char *c_name)
360 {
361 (void) fprintf(stderr, "%s <ctl>\n\n"
362 " Inject a device assertion. This will cause the device to "
363 "pause\n execution of commands and create an internal fault. This "
364 "should\n not be used unless directed as part of a "
365 "troubleshooting exercise.\n If in doubt, do not use this!\n",
366 c_name);
367 }
368