xref: /linux/drivers/platform/x86/intel/ifs/ifs.h (revision 156010ed9c2ac1e9df6c11b1f688cf8a6e0152e6)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /* Copyright(c) 2022 Intel Corporation. */
3 
4 #ifndef _IFS_H_
5 #define _IFS_H_
6 
7 /**
8  * DOC: In-Field Scan
9  *
10  * =============
11  * In-Field Scan
12  * =============
13  *
14  * Introduction
15  * ------------
16  *
17  * In Field Scan (IFS) is a hardware feature to run circuit level tests on
18  * a CPU core to detect problems that are not caught by parity or ECC checks.
19  * Future CPUs will support more than one type of test which will show up
20  * with a new platform-device instance-id, for now only .0 is exposed.
21  *
22  *
23  * IFS Image
24  * ---------
25  *
26  * Intel provides a firmware file containing the scan tests via
27  * github [#f1]_.  Similar to microcode there is a separate file for each
28  * family-model-stepping.
29  *
30  * IFS Image Loading
31  * -----------------
32  *
33  * The driver loads the tests into memory reserved BIOS local to each CPU
34  * socket in a two step process using writes to MSRs to first load the
35  * SHA hashes for the test. Then the tests themselves. Status MSRs provide
36  * feedback on the success/failure of these steps.
37  *
38  * The test files are kept in a fixed location: /lib/firmware/intel/ifs_0/
39  * For e.g if there are 3 test files, they would be named in the following
40  * fashion:
41  * ff-mm-ss-01.scan
42  * ff-mm-ss-02.scan
43  * ff-mm-ss-03.scan
44  * (where ff refers to family, mm indicates model and ss indicates stepping)
45  *
46  * A different test file can be loaded by writing the numerical portion
47  * (e.g 1, 2 or 3 in the above scenario) into the curent_batch file.
48  * To load ff-mm-ss-02.scan, the following command can be used::
49  *
50  *   # echo 2 > /sys/devices/virtual/misc/intel_ifs_0/current_batch
51  *
52  * The above file can also be read to know the currently loaded image.
53  *
54  * Running tests
55  * -------------
56  *
57  * Tests are run by the driver synchronizing execution of all threads on a
58  * core and then writing to the ACTIVATE_SCAN MSR on all threads. Instruction
59  * execution continues when:
60  *
61  * 1) All tests have completed.
62  * 2) Execution was interrupted.
63  * 3) A test detected a problem.
64  *
65  * Note that ALL THREADS ON THE CORE ARE EFFECTIVELY OFFLINE FOR THE
66  * DURATION OF THE TEST. This can be up to 200 milliseconds. If the system
67  * is running latency sensitive applications that cannot tolerate an
68  * interruption of this magnitude, the system administrator must arrange
69  * to migrate those applications to other cores before running a core test.
70  * It may also be necessary to redirect interrupts to other CPUs.
71  *
72  * In all cases reading the SCAN_STATUS MSR provides details on what
73  * happened. The driver makes the value of this MSR visible to applications
74  * via the "details" file (see below). Interrupted tests may be restarted.
75  *
76  * The IFS driver provides sysfs interfaces via /sys/devices/virtual/misc/intel_ifs_0/
77  * to control execution:
78  *
79  * Test a specific core::
80  *
81  *   # echo <cpu#> > /sys/devices/virtual/misc/intel_ifs_0/run_test
82  *
83  * when HT is enabled any of the sibling cpu# can be specified to test
84  * its corresponding physical core. Since the tests are per physical core,
85  * the result of testing any thread is same. All siblings must be online
86  * to run a core test. It is only necessary to test one thread.
87  *
88  * For e.g. to test core corresponding to cpu5
89  *
90  *   # echo 5 > /sys/devices/virtual/misc/intel_ifs_0/run_test
91  *
92  * Results of the last test is provided in /sys::
93  *
94  *   $ cat /sys/devices/virtual/misc/intel_ifs_0/status
95  *   pass
96  *
97  * Status can be one of pass, fail, untested
98  *
99  * Additional details of the last test is provided by the details file::
100  *
101  *   $ cat /sys/devices/virtual/misc/intel_ifs_0/details
102  *   0x8081
103  *
104  * The details file reports the hex value of the SCAN_STATUS MSR.
105  * Hardware defined error codes are documented in volume 4 of the Intel
106  * Software Developer's Manual but the error_code field may contain one of
107  * the following driver defined software codes:
108  *
109  * +------+--------------------+
110  * | 0xFD | Software timeout   |
111  * +------+--------------------+
112  * | 0xFE | Partial completion |
113  * +------+--------------------+
114  *
115  * Driver design choices
116  * ---------------------
117  *
118  * 1) The ACTIVATE_SCAN MSR allows for running any consecutive subrange of
119  * available tests. But the driver always tries to run all tests and only
120  * uses the subrange feature to restart an interrupted test.
121  *
122  * 2) Hardware allows for some number of cores to be tested in parallel.
123  * The driver does not make use of this, it only tests one core at a time.
124  *
125  * .. [#f1] https://github.com/intel/TBD
126  */
127 #include <linux/device.h>
128 #include <linux/miscdevice.h>
129 
130 #define MSR_COPY_SCAN_HASHES			0x000002c2
131 #define MSR_SCAN_HASHES_STATUS			0x000002c3
132 #define MSR_AUTHENTICATE_AND_COPY_CHUNK		0x000002c4
133 #define MSR_CHUNKS_AUTHENTICATION_STATUS	0x000002c5
134 #define MSR_ACTIVATE_SCAN			0x000002c6
135 #define MSR_SCAN_STATUS				0x000002c7
136 #define SCAN_NOT_TESTED				0
137 #define SCAN_TEST_PASS				1
138 #define SCAN_TEST_FAIL				2
139 
140 /* MSR_SCAN_HASHES_STATUS bit fields */
141 union ifs_scan_hashes_status {
142 	u64	data;
143 	struct {
144 		u32	chunk_size	:16;
145 		u32	num_chunks	:8;
146 		u32	rsvd1		:8;
147 		u32	error_code	:8;
148 		u32	rsvd2		:11;
149 		u32	max_core_limit	:12;
150 		u32	valid		:1;
151 	};
152 };
153 
154 /* MSR_CHUNKS_AUTH_STATUS bit fields */
155 union ifs_chunks_auth_status {
156 	u64	data;
157 	struct {
158 		u32	valid_chunks	:8;
159 		u32	total_chunks	:8;
160 		u32	rsvd1		:16;
161 		u32	error_code	:8;
162 		u32	rsvd2		:24;
163 	};
164 };
165 
166 /* MSR_ACTIVATE_SCAN bit fields */
167 union ifs_scan {
168 	u64	data;
169 	struct {
170 		u32	start	:8;
171 		u32	stop	:8;
172 		u32	rsvd	:16;
173 		u32	delay	:31;
174 		u32	sigmce	:1;
175 	};
176 };
177 
178 /* MSR_SCAN_STATUS bit fields */
179 union ifs_status {
180 	u64	data;
181 	struct {
182 		u32	chunk_num		:8;
183 		u32	chunk_stop_index	:8;
184 		u32	rsvd1			:16;
185 		u32	error_code		:8;
186 		u32	rsvd2			:22;
187 		u32	control_error		:1;
188 		u32	signature_error		:1;
189 	};
190 };
191 
192 /*
193  * Driver populated error-codes
194  * 0xFD: Test timed out before completing all the chunks.
195  * 0xFE: not all scan chunks were executed. Maximum forward progress retries exceeded.
196  */
197 #define IFS_SW_TIMEOUT				0xFD
198 #define IFS_SW_PARTIAL_COMPLETION		0xFE
199 
200 /**
201  * struct ifs_data - attributes related to intel IFS driver
202  * @integrity_cap_bit: MSR_INTEGRITY_CAPS bit enumerating this test
203  * @loaded_version: stores the currently loaded ifs image version.
204  * @pkg_auth: array of bool storing per package auth status
205  * @loaded: If a valid test binary has been loaded into the memory
206  * @loading_error: Error occurred on another CPU while loading image
207  * @valid_chunks: number of chunks which could be validated.
208  * @status: it holds simple status pass/fail/untested
209  * @scan_details: opaque scan status code from h/w
210  * @cur_batch: number indicating the currently loaded test file
211  * @test_num: number indicating the test type
212  */
213 struct ifs_data {
214 	int	integrity_cap_bit;
215 	bool	*pkg_auth;
216 	int	loaded_version;
217 	bool	loaded;
218 	bool	loading_error;
219 	int	valid_chunks;
220 	int	status;
221 	u64	scan_details;
222 	u32	cur_batch;
223 	int	test_num;
224 };
225 
226 struct ifs_work {
227 	struct work_struct w;
228 	struct device *dev;
229 };
230 
231 struct ifs_device {
232 	struct ifs_data data;
233 	struct miscdevice misc;
234 };
235 
236 static inline struct ifs_data *ifs_get_data(struct device *dev)
237 {
238 	struct miscdevice *m = dev_get_drvdata(dev);
239 	struct ifs_device *d = container_of(m, struct ifs_device, misc);
240 
241 	return &d->data;
242 }
243 
244 int ifs_load_firmware(struct device *dev);
245 int do_core_test(int cpu, struct device *dev);
246 const struct attribute_group **ifs_get_groups(void);
247 
248 #endif
249