xref: /illumos-gate/usr/src/cmd/nvmeadm/nvmeadm_micron.c (revision a3f161ae396d35a44727aa9d23cf0889e44e6eff)
1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright 2026 Oxide Computer Company
14  */
15 
16 /*
17  * Deal with Micron-specific logs.
18  */
19 
20 #include <err.h>
21 #include <string.h>
22 #include <sys/stddef.h>
23 #include <sys/sysmacros.h>
24 #include <sys/nvme/micron.h>
25 
26 #include "nvmeadm.h"
27 
28 /*
29  * Synthetic identifiers for these logs to deal with the changes that we have
30  * found in here over time. See micron_vul_ext_smart_getvers() for more.
31  */
32 #define	MICRON_GEN_73XX	1
33 #define	MICRON_GEN_74XX	2
34 
35 #define	MICRON_F_SMART(f)	\
36 	.nf_off = offsetof(micron_vul_ext_smart_t, mes_##f), \
37 	.nf_len = sizeof (((micron_vul_ext_smart_t *)NULL)->mes_##f)
38 
39 static const nvmeadm_field_bit_t micron_vul_ext_smart_wpr_bits[] = { {
40 	.nfb_lowbit = 0, .nfb_hibit = 0,
41 	.nfb_short = "dramue",
42 	.nfb_desc = "DRAM Double Bit Error",
43 	.nfb_type = NVMEADM_FT_STRMAP,
44 	.nfb_strs = { "did not occur", "occurred" }
45 }, {
46 	.nfb_lowbit = 1, .nfb_hibit = 1,
47 	.nfb_short = "spare",
48 	.nfb_desc = "Low Remaining Spare Block Count",
49 	.nfb_type = NVMEADM_FT_STRMAP,
50 	.nfb_strs = { "did not occur", "occurred" }
51 }, {
52 	.nfb_lowbit = 2, .nfb_hibit = 2,
53 	.nfb_short = "cap",
54 	.nfb_desc = "Power Holdup Capacitor Failure",
55 	.nfb_type = NVMEADM_FT_STRMAP,
56 	.nfb_strs = { "did not occur", "occurred" }
57 }, {
58 	.nfb_lowbit = 3, .nfb_hibit = 3,
59 	.nfb_short = "nvram",
60 	.nfb_desc = "NVRAM Checksum Failure",
61 	.nfb_type = NVMEADM_FT_STRMAP,
62 	.nfb_strs = { "did not occur", "occurred" }
63 }, {
64 	.nfb_lowbit = 4, .nfb_hibit = 4,
65 	.nfb_short = "daor",
66 	.nfb_desc = "DRAM Address Out of Range",
67 	.nfb_type = NVMEADM_FT_STRMAP,
68 	.nfb_strs = { "did not occur", "occurred" }
69 }, {
70 	.nfb_lowbit = 5, .nfb_hibit = 5,
71 	.nfb_short = "temp",
72 	.nfb_desc = "Overtemp Shutdown",
73 	.nfb_type = NVMEADM_FT_STRMAP,
74 	.nfb_strs = { "did not occur", "occurred" }
75 } };
76 
77 static const nvmeadm_field_t micron_vul_ext_smart_fields[] = { {
78 	MICRON_F_SMART(gbb),
79 	.nf_short = "gbb",
80 	.nf_desc = "Grown Bad Block Count",
81 	.nf_type = NVMEADM_FT_HEX
82 }, {
83 	MICRON_F_SMART(max_erase),
84 	.nf_short = "mec",
85 	.nf_desc = "Per-Block Max Erase Count",
86 	.nf_type = NVMEADM_FT_HEX
87 }, {
88 	MICRON_F_SMART(power_on),
89 	.nf_short = "pon",
90 	.nf_desc = "Power-on",
91 	.nf_type = NVMEADM_FT_UNIT,
92 	.nf_addend = { .nfa_unit = "min" }
93 }, {
94 	MICRON_F_SMART(wp_reason),
95 	.nf_short = "wpr",
96 	.nf_desc = "Write Protect Reason",
97 	NVMEADM_F_BITS(micron_vul_ext_smart_wpr_bits)
98 }, {
99 	MICRON_F_SMART(cap),
100 	.nf_short = "cap",
101 	.nf_desc = "Device Capacity",
102 	.nf_type = NVMEADM_FT_BYTES
103 }, {
104 	MICRON_F_SMART(erase_count),
105 	.nf_short = "tec",
106 	.nf_desc = "Total Erase Count",
107 	.nf_type = NVMEADM_FT_HEX
108 }, {
109 	MICRON_F_SMART(use_rate),
110 	.nf_short = "use",
111 	.nf_desc = "Lifetime Use Rate",
112 	.nf_type = NVMEADM_FT_HEX
113 }, {
114 	MICRON_F_SMART(erase_fail),
115 	.nf_short = "efc",
116 	.nf_desc = "Erase Fail Count",
117 	.nf_rev = MICRON_GEN_74XX,
118 	.nf_type = NVMEADM_FT_HEX
119 }, {
120 	MICRON_F_SMART(uecc),
121 	.nf_short = "uecc",
122 	.nf_desc = "Reported Uncorrectable ECC Errors",
123 	.nf_rev = MICRON_GEN_74XX,
124 	.nf_type = NVMEADM_FT_HEX
125 }, {
126 	MICRON_F_SMART(prog_fail),
127 	.nf_short = "pfc",
128 	.nf_desc = "Program Fail Count",
129 	.nf_rev = MICRON_GEN_74XX,
130 	.nf_type = NVMEADM_FT_HEX
131 }, {
132 	MICRON_F_SMART(read_bytes),
133 	.nf_short = "read",
134 	.nf_desc = "Total Bytes Read",
135 	.nf_rev = MICRON_GEN_74XX,
136 	.nf_type = NVMEADM_FT_BYTES
137 }, {
138 	MICRON_F_SMART(write_bytes),
139 	.nf_short = "write",
140 	.nf_desc = "Total Bytes Written",
141 	.nf_rev = MICRON_GEN_74XX,
142 	.nf_type = NVMEADM_FT_BYTES
143 }, {
144 	MICRON_F_SMART(trans_size),
145 	.nf_short = "tus",
146 	.nf_desc = "Translation Unit Size",
147 	.nf_type = NVMEADM_FT_BYTES
148 }, {
149 	MICRON_F_SMART(bs_total),
150 	.nf_short = "tbs",
151 	.nf_desc = "Total Block Stripe Count for User Data",
152 	.nf_type = NVMEADM_FT_HEX
153 }, {
154 	MICRON_F_SMART(bs_free),
155 	.nf_short = "fbs",
156 	.nf_desc = "Free Block Stripe Count for User Data",
157 	.nf_type = NVMEADM_FT_HEX
158 }, {
159 	MICRON_F_SMART(bs_cap),
160 	.nf_short = "bss",
161 	.nf_desc = "Block Stripe Size",
162 	.nf_type = NVMEADM_FT_BYTES
163 }, {
164 	MICRON_F_SMART(user_erase_min),
165 	.nf_short = "ubemin",
166 	.nf_desc = "Minimum User Block Erase Count",
167 	.nf_type = NVMEADM_FT_HEX
168 }, {
169 	MICRON_F_SMART(user_erase_avg),
170 	.nf_short = "ubeavg",
171 	.nf_desc = "Average User Block Erase Count",
172 	.nf_type = NVMEADM_FT_HEX
173 }, {
174 	MICRON_F_SMART(user_erase_max),
175 	.nf_short = "ubemax",
176 	.nf_desc = "Maximum User Block Erase Count",
177 	.nf_type = NVMEADM_FT_HEX
178 } };
179 
180 /*
181  * The 73xx series and 74xx series have some different entries in these log
182  * pages. There is no good way to determine this in the log. Instead we use a
183  * crude but reasonable heuristic. The 74xx series added a pair of counters for
184  * total bytes read and written. If these are zero, then we know we're on the
185  * 73xx assuming it's playing by its reserved rules. We've also seen some cases
186  * where the 73xx parts will return all 1s for the reserved fields, so we check
187  * that too.
188  */
189 static uint32_t
micron_vul_ext_smart_getvers(const void * data,size_t len)190 micron_vul_ext_smart_getvers(const void *data, size_t len)
191 {
192 	const uint8_t zero[16] = { 0 };
193 	uint8_t ones[16];
194 
195 	if (len < sizeof (micron_vul_ext_smart_t)) {
196 		errx(-1, "cannot parse revision information, found 0x%zx "
197 		    "bytes, need at least 0x%zx", len,
198 		    sizeof (micron_vul_ext_smart_t));
199 	}
200 
201 	(void) memset(ones, 0xff, sizeof (ones));
202 	const micron_vul_ext_smart_t *log = data;
203 	if (memcmp(zero, log->mes_read_bytes, sizeof (zero)) == 0 &&
204 	    memcmp(zero, log->mes_write_bytes, sizeof (zero)) == 0) {
205 		return (MICRON_GEN_73XX);
206 	}
207 
208 	if (memcmp(ones, log->mes_read_bytes, sizeof (ones)) == 0 &&
209 	    memcmp(ones, log->mes_write_bytes, sizeof (ones)) == 0) {
210 		return (MICRON_GEN_73XX);
211 	}
212 
213 	return (MICRON_GEN_74XX);
214 }
215 
216 const nvmeadm_log_field_info_t micron_vul_extsmart_field_info = {
217 	.nlfi_log = "micron/extsmart",
218 	.nlfi_fields = micron_vul_ext_smart_fields,
219 	.nlfi_nfields = ARRAY_SIZE(micron_vul_ext_smart_fields),
220 	.nlfi_min = sizeof (micron_vul_ext_smart_t),
221 	.nlfi_getrev = micron_vul_ext_smart_getvers
222 };
223