xref: /illumos-gate/usr/src/cmd/nvmeadm/nvmeadm_telemetry.c (revision 7b0d41e2e6f9dd04a09d33692a556b359dab9847)
1*7b0d41e2SRobert Mustacchi /*
2*7b0d41e2SRobert Mustacchi  * This file and its contents are supplied under the terms of the
3*7b0d41e2SRobert Mustacchi  * Common Development and Distribution License ("CDDL"), version 1.0.
4*7b0d41e2SRobert Mustacchi  * You may only use this file in accordance with the terms of version
5*7b0d41e2SRobert Mustacchi  * 1.0 of the CDDL.
6*7b0d41e2SRobert Mustacchi  *
7*7b0d41e2SRobert Mustacchi  * A full copy of the text of the CDDL should have accompanied this
8*7b0d41e2SRobert Mustacchi  * source.  A copy of the CDDL is also available via the Internet at
9*7b0d41e2SRobert Mustacchi  * http://www.illumos.org/license/CDDL.
10*7b0d41e2SRobert Mustacchi  */
11*7b0d41e2SRobert Mustacchi 
12*7b0d41e2SRobert Mustacchi /*
13*7b0d41e2SRobert Mustacchi  * Copyright 2025 Oxide Computer Company
14*7b0d41e2SRobert Mustacchi  */
15*7b0d41e2SRobert Mustacchi 
16*7b0d41e2SRobert Mustacchi /*
17*7b0d41e2SRobert Mustacchi  * Logic to fetch and save an instance of the telemetry log page. The telemetry
18*7b0d41e2SRobert Mustacchi  * log page consists of a 512-byte header followed by a number of data blocks.
19*7b0d41e2SRobert Mustacchi  * The number of data blocks is indicated by the controller.
20*7b0d41e2SRobert Mustacchi  *
21*7b0d41e2SRobert Mustacchi  * Telemetry may either be host-initiated or device-initiated. When the
22*7b0d41e2SRobert Mustacchi  * telemetry is host-initiated, the host specifies when to create the telemetry
23*7b0d41e2SRobert Mustacchi  * using a flag in the log-specific parameter field (lsp). Whenever this is a 1,
24*7b0d41e2SRobert Mustacchi  * then this data is created again. When telemetry is device-initiated, which
25*7b0d41e2SRobert Mustacchi  * uses a different log page, then the data persists as long as the retain async
26*7b0d41e2SRobert Mustacchi  * event flag is specified.
27*7b0d41e2SRobert Mustacchi  *
28*7b0d41e2SRobert Mustacchi  * In the telemetry header there are two things that we need to pay attention
29*7b0d41e2SRobert Mustacchi  * to:
30*7b0d41e2SRobert Mustacchi  *
31*7b0d41e2SRobert Mustacchi  * 1. There are up to four indicators for the number of telemetry blocks that
32*7b0d41e2SRobert Mustacchi  *    could exist. These are meant to be indicators of short, medium, and long.
33*7b0d41e2SRobert Mustacchi  *    The 4th one requires the kernel to opt into it with a specific set
34*7b0d41e2SRobert Mustacchi  *    features command. We basically always try to get the largest amount that
35*7b0d41e2SRobert Mustacchi  *    exists in the header.
36*7b0d41e2SRobert Mustacchi  *
37*7b0d41e2SRobert Mustacchi  * 2. There are a series of generation numbers that exist. We need to ensure
38*7b0d41e2SRobert Mustacchi  *    that these generation numbers are the same across everything that we find.
39*7b0d41e2SRobert Mustacchi  *    So basically we read this initially at the start and then read it again at
40*7b0d41e2SRobert Mustacchi  *    the end. If the values differ, then we throw an error or would otherwise
41*7b0d41e2SRobert Mustacchi  *    have to start over. There are separate generation numbers for
42*7b0d41e2SRobert Mustacchi  *    host-initiated and controller-initiated telemetry.
43*7b0d41e2SRobert Mustacchi  *
44*7b0d41e2SRobert Mustacchi  * The telemetry file may have a large number of blocks so we split this up into
45*7b0d41e2SRobert Mustacchi  * multiple requests of up to 1 MiB (our default maximum size). We do not assume
46*7b0d41e2SRobert Mustacchi  * that we will get the log all in one go. As such, we also will not assume that
47*7b0d41e2SRobert Mustacchi  * we can buffer the entire log page in memory and will always write it out.
48*7b0d41e2SRobert Mustacchi  * This means that the user will be required to use the output file option.
49*7b0d41e2SRobert Mustacchi  */
50*7b0d41e2SRobert Mustacchi 
51*7b0d41e2SRobert Mustacchi #include <sys/types.h>
52*7b0d41e2SRobert Mustacchi #include <sys/stat.h>
53*7b0d41e2SRobert Mustacchi #include <fcntl.h>
54*7b0d41e2SRobert Mustacchi #include <err.h>
55*7b0d41e2SRobert Mustacchi #include <unistd.h>
56*7b0d41e2SRobert Mustacchi #include <sys/sysmacros.h>
57*7b0d41e2SRobert Mustacchi 
58*7b0d41e2SRobert Mustacchi #include "nvmeadm.h"
59*7b0d41e2SRobert Mustacchi 
60*7b0d41e2SRobert Mustacchi /*
61*7b0d41e2SRobert Mustacchi  * We use a 64 KiB buffer here as that's usually within a device's maximum
62*7b0d41e2SRobert Mustacchi  * payload.
63*7b0d41e2SRobert Mustacchi  */
64*7b0d41e2SRobert Mustacchi #define	TELEM_BLKSIZE	(64 * 1024)
65*7b0d41e2SRobert Mustacchi 
66*7b0d41e2SRobert Mustacchi static void
telemetry_read(const nvme_process_arg_t * npa,nvme_log_req_t * req,void * buf,size_t len,uint64_t off)67*7b0d41e2SRobert Mustacchi telemetry_read(const nvme_process_arg_t *npa, nvme_log_req_t *req, void *buf,
68*7b0d41e2SRobert Mustacchi     size_t len, uint64_t off)
69*7b0d41e2SRobert Mustacchi {
70*7b0d41e2SRobert Mustacchi 	if (!nvme_log_req_set_output(req, buf, len)) {
71*7b0d41e2SRobert Mustacchi 		nvmeadm_fatal(npa, "failed to set output buffer");
72*7b0d41e2SRobert Mustacchi 	}
73*7b0d41e2SRobert Mustacchi 
74*7b0d41e2SRobert Mustacchi 	if (!nvme_log_req_set_offset(req, off)) {
75*7b0d41e2SRobert Mustacchi 		nvmeadm_fatal(npa, "failed to set offset to 0x%lx", off);
76*7b0d41e2SRobert Mustacchi 	}
77*7b0d41e2SRobert Mustacchi 
78*7b0d41e2SRobert Mustacchi 	if (!nvme_log_req_exec(req)) {
79*7b0d41e2SRobert Mustacchi 		nvmeadm_fatal(npa, "failed to read %zu bytes at 0x%lx", len,
80*7b0d41e2SRobert Mustacchi 		    off);
81*7b0d41e2SRobert Mustacchi 	}
82*7b0d41e2SRobert Mustacchi }
83*7b0d41e2SRobert Mustacchi 
84*7b0d41e2SRobert Mustacchi static void
telemetry_write(int ofd,const void * buf,size_t len)85*7b0d41e2SRobert Mustacchi telemetry_write(int ofd, const void *buf, size_t len)
86*7b0d41e2SRobert Mustacchi {
87*7b0d41e2SRobert Mustacchi 	size_t off = 0;
88*7b0d41e2SRobert Mustacchi 
89*7b0d41e2SRobert Mustacchi 	while (len > 0) {
90*7b0d41e2SRobert Mustacchi 		ssize_t ret = write(ofd, buf + off, len - off);
91*7b0d41e2SRobert Mustacchi 		if (ret < 0) {
92*7b0d41e2SRobert Mustacchi 			err(EXIT_FAILURE, "failed to write to log telemetry "
93*7b0d41e2SRobert Mustacchi 			    "output file");
94*7b0d41e2SRobert Mustacchi 		}
95*7b0d41e2SRobert Mustacchi 
96*7b0d41e2SRobert Mustacchi 		off += (size_t)ret;
97*7b0d41e2SRobert Mustacchi 		len -= (size_t)ret;
98*7b0d41e2SRobert Mustacchi 	}
99*7b0d41e2SRobert Mustacchi }
100*7b0d41e2SRobert Mustacchi 
101*7b0d41e2SRobert Mustacchi int
do_get_logpage_telemetry(const nvme_process_arg_t * npa,const nvme_log_disc_t * disc,nvme_log_req_t * req)102*7b0d41e2SRobert Mustacchi do_get_logpage_telemetry(const nvme_process_arg_t *npa,
103*7b0d41e2SRobert Mustacchi     const nvme_log_disc_t *disc, nvme_log_req_t *req)
104*7b0d41e2SRobert Mustacchi {
105*7b0d41e2SRobert Mustacchi 	int ofd;
106*7b0d41e2SRobert Mustacchi 	const nvmeadm_get_logpage_t *log = npa->npa_cmd_arg;
107*7b0d41e2SRobert Mustacchi 	void *buf;
108*7b0d41e2SRobert Mustacchi 	nvme_telemetry_log_t hdr;
109*7b0d41e2SRobert Mustacchi 	uint64_t len;
110*7b0d41e2SRobert Mustacchi 
111*7b0d41e2SRobert Mustacchi 	if (log->ngl_output == NULL) {
112*7b0d41e2SRobert Mustacchi 		errx(-1, "log page %s requires specifying an output file",
113*7b0d41e2SRobert Mustacchi 		    nvme_log_disc_name(disc));
114*7b0d41e2SRobert Mustacchi 	}
115*7b0d41e2SRobert Mustacchi 
116*7b0d41e2SRobert Mustacchi 	ofd = open(log->ngl_output, O_WRONLY | O_TRUNC | O_CREAT, 0644);
117*7b0d41e2SRobert Mustacchi 	if (ofd < 0) {
118*7b0d41e2SRobert Mustacchi 		err(-1, "failed to create output file %s", log->ngl_output);
119*7b0d41e2SRobert Mustacchi 	}
120*7b0d41e2SRobert Mustacchi 
121*7b0d41e2SRobert Mustacchi 	buf = calloc(TELEM_BLKSIZE, sizeof (uint8_t));
122*7b0d41e2SRobert Mustacchi 	if (buf == NULL) {
123*7b0d41e2SRobert Mustacchi 		err(-1, "failed to allocate %u bytes for interim data buffer",
124*7b0d41e2SRobert Mustacchi 		    TELEM_BLKSIZE);
125*7b0d41e2SRobert Mustacchi 	}
126*7b0d41e2SRobert Mustacchi 
127*7b0d41e2SRobert Mustacchi 	/*
128*7b0d41e2SRobert Mustacchi 	 * First create a new request and read the first 512-bytes.
129*7b0d41e2SRobert Mustacchi 	 */
130*7b0d41e2SRobert Mustacchi 	if (!nvme_log_req_set_lsp(req, NVME_TELMCTRL_LSP_CTHID)) {
131*7b0d41e2SRobert Mustacchi 		nvmeadm_fatal(npa, "failed to set lsp to create host "
132*7b0d41e2SRobert Mustacchi 		    "telemetry");
133*7b0d41e2SRobert Mustacchi 	}
134*7b0d41e2SRobert Mustacchi 
135*7b0d41e2SRobert Mustacchi 	telemetry_read(npa, req, &hdr, sizeof (hdr), 0);
136*7b0d41e2SRobert Mustacchi 	telemetry_write(ofd, &hdr, sizeof (hdr));
137*7b0d41e2SRobert Mustacchi 
138*7b0d41e2SRobert Mustacchi 	/*
139*7b0d41e2SRobert Mustacchi 	 * Clear the request to create telemetry for the rest of our operation.
140*7b0d41e2SRobert Mustacchi 	 */
141*7b0d41e2SRobert Mustacchi 	if (!nvme_log_req_set_lsp(req, 0)) {
142*7b0d41e2SRobert Mustacchi 		nvmeadm_fatal(npa, "failed to set lsp to create host "
143*7b0d41e2SRobert Mustacchi 		    "telemetry");
144*7b0d41e2SRobert Mustacchi 	}
145*7b0d41e2SRobert Mustacchi 
146*7b0d41e2SRobert Mustacchi 	if (!nvme_log_disc_calc_size(disc, &len, &hdr, sizeof (hdr))) {
147*7b0d41e2SRobert Mustacchi 		errx(-1, "failed to determine full %s log length",
148*7b0d41e2SRobert Mustacchi 		    npa->npa_argv[0]);
149*7b0d41e2SRobert Mustacchi 	}
150*7b0d41e2SRobert Mustacchi 
151*7b0d41e2SRobert Mustacchi 	size_t off = sizeof (hdr);
152*7b0d41e2SRobert Mustacchi 	while (off < len) {
153*7b0d41e2SRobert Mustacchi 		size_t to_read = MIN(len - off, TELEM_BLKSIZE);
154*7b0d41e2SRobert Mustacchi 		telemetry_read(npa, req, buf, to_read, off);
155*7b0d41e2SRobert Mustacchi 		telemetry_write(ofd, buf, to_read);
156*7b0d41e2SRobert Mustacchi 		off += to_read;
157*7b0d41e2SRobert Mustacchi 	}
158*7b0d41e2SRobert Mustacchi 
159*7b0d41e2SRobert Mustacchi 	telemetry_read(npa, req, buf, sizeof (hdr), 0);
160*7b0d41e2SRobert Mustacchi 	const nvme_telemetry_log_t *final = (const nvme_telemetry_log_t *)buf;
161*7b0d41e2SRobert Mustacchi 	if (hdr.ntl_thdgn != final->ntl_thdgn) {
162*7b0d41e2SRobert Mustacchi 		errx(-1, "log telemetry generation changed: originally was "
163*7b0d41e2SRobert Mustacchi 		    "0x%x, ended with 0x%x", hdr.ntl_thdgn, final->ntl_thdgn);
164*7b0d41e2SRobert Mustacchi 	}
165*7b0d41e2SRobert Mustacchi 
166*7b0d41e2SRobert Mustacchi 	free(buf);
167*7b0d41e2SRobert Mustacchi 	(void) close(ofd);
168*7b0d41e2SRobert Mustacchi 	return (0);
169*7b0d41e2SRobert Mustacchi }
170