1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2025 Oxide Computer Company 14 */ 15 16 /* 17 * Logic to fetch and save an instance of the telemetry log page. The telemetry 18 * log page consists of a 512-byte header followed by a number of data blocks. 19 * The number of data blocks is indicated by the controller. 20 * 21 * Telemetry may either be host-initiated or device-initiated. When the 22 * telemetry is host-initiated, the host specifies when to create the telemetry 23 * using a flag in the log-specific parameter field (lsp). Whenever this is a 1, 24 * then this data is created again. When telemetry is device-initiated, which 25 * uses a different log page, then the data persists as long as the retain async 26 * event flag is specified. 27 * 28 * In the telemetry header there are two things that we need to pay attention 29 * to: 30 * 31 * 1. There are up to four indicators for the number of telemetry blocks that 32 * could exist. These are meant to be indicators of short, medium, and long. 33 * The 4th one requires the kernel to opt into it with a specific set 34 * features command. We basically always try to get the largest amount that 35 * exists in the header. 36 * 37 * 2. There are a series of generation numbers that exist. We need to ensure 38 * that these generation numbers are the same across everything that we find. 39 * So basically we read this initially at the start and then read it again at 40 * the end. If the values differ, then we throw an error or would otherwise 41 * have to start over. There are separate generation numbers for 42 * host-initiated and controller-initiated telemetry. 43 * 44 * The telemetry file may have a large number of blocks so we split this up into 45 * multiple requests of up to 1 MiB (our default maximum size). We do not assume 46 * that we will get the log all in one go. As such, we also will not assume that 47 * we can buffer the entire log page in memory and will always write it out. 48 * This means that the user will be required to use the output file option. 49 */ 50 51 #include <sys/types.h> 52 #include <sys/stat.h> 53 #include <fcntl.h> 54 #include <err.h> 55 #include <unistd.h> 56 #include <sys/sysmacros.h> 57 58 #include "nvmeadm.h" 59 60 /* 61 * We use a 64 KiB buffer here as that's usually within a device's maximum 62 * payload. 63 */ 64 #define TELEM_BLKSIZE (64 * 1024) 65 66 static void 67 telemetry_read(const nvme_process_arg_t *npa, nvme_log_req_t *req, void *buf, 68 size_t len, uint64_t off) 69 { 70 if (!nvme_log_req_set_output(req, buf, len)) { 71 nvmeadm_fatal(npa, "failed to set output buffer"); 72 } 73 74 if (!nvme_log_req_set_offset(req, off)) { 75 nvmeadm_fatal(npa, "failed to set offset to 0x%lx", off); 76 } 77 78 if (!nvme_log_req_exec(req)) { 79 nvmeadm_fatal(npa, "failed to read %zu bytes at 0x%lx", len, 80 off); 81 } 82 } 83 84 static void 85 telemetry_write(int ofd, const void *buf, size_t len) 86 { 87 size_t off = 0; 88 89 while (len > 0) { 90 ssize_t ret = write(ofd, buf + off, len - off); 91 if (ret < 0) { 92 err(EXIT_FAILURE, "failed to write to log telemetry " 93 "output file"); 94 } 95 96 off += (size_t)ret; 97 len -= (size_t)ret; 98 } 99 } 100 101 int 102 do_get_logpage_telemetry(const nvme_process_arg_t *npa, 103 const nvme_log_disc_t *disc, nvme_log_req_t *req) 104 { 105 int ofd; 106 const nvmeadm_get_logpage_t *log = npa->npa_cmd_arg; 107 void *buf; 108 nvme_telemetry_log_t hdr; 109 uint64_t len; 110 111 if (log->ngl_output == NULL) { 112 errx(-1, "log page %s requires specifying an output file", 113 nvme_log_disc_name(disc)); 114 } 115 116 ofd = open(log->ngl_output, O_WRONLY | O_TRUNC | O_CREAT, 0644); 117 if (ofd < 0) { 118 err(-1, "failed to create output file %s", log->ngl_output); 119 } 120 121 buf = calloc(TELEM_BLKSIZE, sizeof (uint8_t)); 122 if (buf == NULL) { 123 err(-1, "failed to allocate %u bytes for interim data buffer", 124 TELEM_BLKSIZE); 125 } 126 127 /* 128 * First create a new request and read the first 512-bytes. 129 */ 130 if (!nvme_log_req_set_lsp(req, NVME_TELMCTRL_LSP_CTHID)) { 131 nvmeadm_fatal(npa, "failed to set lsp to create host " 132 "telemetry"); 133 } 134 135 telemetry_read(npa, req, &hdr, sizeof (hdr), 0); 136 telemetry_write(ofd, &hdr, sizeof (hdr)); 137 138 /* 139 * Clear the request to create telemetry for the rest of our operation. 140 */ 141 if (!nvme_log_req_set_lsp(req, 0)) { 142 nvmeadm_fatal(npa, "failed to set lsp to create host " 143 "telemetry"); 144 } 145 146 if (!nvme_log_disc_calc_size(disc, &len, &hdr, sizeof (hdr))) { 147 errx(-1, "failed to determine full %s log length", 148 npa->npa_argv[0]); 149 } 150 151 size_t off = sizeof (hdr); 152 while (off < len) { 153 size_t to_read = MIN(len - off, TELEM_BLKSIZE); 154 telemetry_read(npa, req, buf, to_read, off); 155 telemetry_write(ofd, buf, to_read); 156 off += to_read; 157 } 158 159 telemetry_read(npa, req, buf, sizeof (hdr), 0); 160 const nvme_telemetry_log_t *final = (const nvme_telemetry_log_t *)buf; 161 if (hdr.ntl_thdgn != final->ntl_thdgn) { 162 errx(-1, "log telemetry generation changed: originally was " 163 "0x%x, ended with 0x%x", hdr.ntl_thdgn, final->ntl_thdgn); 164 } 165 166 free(buf); 167 (void) close(ofd); 168 return (0); 169 } 170