xref: /linux/drivers/platform/x86/intel/ifs/runtest.c (revision 42b16d3ac371a2fac9b6f08fd75f23f34ba3955a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright(c) 2022 Intel Corporation. */
3 
4 #include <linux/cpu.h>
5 #include <linux/delay.h>
6 #include <linux/fs.h>
7 #include <linux/nmi.h>
8 #include <linux/slab.h>
9 #include <linux/stop_machine.h>
10 
11 #include "ifs.h"
12 
13 /*
14  * Note all code and data in this file is protected by
15  * ifs_sem. On HT systems all threads on a core will
16  * execute together, but only the first thread on the
17  * core will update results of the test.
18  */
19 
20 #define CREATE_TRACE_POINTS
21 #include <trace/events/intel_ifs.h>
22 
23 /* Max retries on the same chunk */
24 #define MAX_IFS_RETRIES  5
25 
26 struct run_params {
27 	struct ifs_data *ifsd;
28 	union ifs_scan *activate;
29 	union ifs_status status;
30 };
31 
32 struct sbaf_run_params {
33 	struct ifs_data *ifsd;
34 	int *retry_cnt;
35 	union ifs_sbaf *activate;
36 	union ifs_sbaf_status status;
37 };
38 
39 /*
40  * Number of TSC cycles that a logical CPU will wait for the other
41  * logical CPU on the core in the WRMSR(ACTIVATE_SCAN).
42  */
43 #define IFS_THREAD_WAIT 100000
44 
45 enum ifs_status_err_code {
46 	IFS_NO_ERROR				= 0,
47 	IFS_OTHER_THREAD_COULD_NOT_JOIN		= 1,
48 	IFS_INTERRUPTED_BEFORE_RENDEZVOUS	= 2,
49 	IFS_POWER_MGMT_INADEQUATE_FOR_SCAN	= 3,
50 	IFS_INVALID_CHUNK_RANGE			= 4,
51 	IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS	= 5,
52 	IFS_CORE_NOT_CAPABLE_CURRENTLY		= 6,
53 	IFS_UNASSIGNED_ERROR_CODE		= 7,
54 	IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
55 	IFS_INTERRUPTED_DURING_EXECUTION	= 9,
56 	IFS_UNASSIGNED_ERROR_CODE_0xA		= 0xA,
57 	IFS_CORRUPTED_CHUNK		= 0xB,
58 };
59 
60 static const char * const scan_test_status[] = {
61 	[IFS_NO_ERROR] = "SCAN no error",
62 	[IFS_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
63 	[IFS_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SCAN coordination.",
64 	[IFS_POWER_MGMT_INADEQUATE_FOR_SCAN] =
65 	"Core Abort SCAN Response due to power management condition.",
66 	[IFS_INVALID_CHUNK_RANGE] = "Non valid chunks in the range",
67 	[IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
68 	[IFS_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SCAN currently",
69 	[IFS_UNASSIGNED_ERROR_CODE] = "Unassigned error code 0x7",
70 	[IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT] =
71 	"Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
72 	[IFS_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SCAN start",
73 	[IFS_UNASSIGNED_ERROR_CODE_0xA] = "Unassigned error code 0xA",
74 	[IFS_CORRUPTED_CHUNK] = "Scan operation aborted due to corrupted image. Try reloading",
75 };
76 
message_not_tested(struct device * dev,int cpu,union ifs_status status)77 static void message_not_tested(struct device *dev, int cpu, union ifs_status status)
78 {
79 	struct ifs_data *ifsd = ifs_get_data(dev);
80 
81 	/*
82 	 * control_error is set when the microcode runs into a problem
83 	 * loading the image from the reserved BIOS memory, or it has
84 	 * been corrupted. Reloading the image may fix this issue.
85 	 */
86 	if (status.control_error) {
87 		dev_warn(dev, "CPU(s) %*pbl: Scan controller error. Batch: %02x version: 0x%x\n",
88 			 cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version);
89 		return;
90 	}
91 
92 	if (status.error_code < ARRAY_SIZE(scan_test_status)) {
93 		dev_info(dev, "CPU(s) %*pbl: SCAN operation did not start. %s\n",
94 			 cpumask_pr_args(cpu_smt_mask(cpu)),
95 			 scan_test_status[status.error_code]);
96 	} else if (status.error_code == IFS_SW_TIMEOUT) {
97 		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
98 			 cpumask_pr_args(cpu_smt_mask(cpu)));
99 	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
100 		dev_info(dev, "CPU(s) %*pbl: %s\n",
101 			 cpumask_pr_args(cpu_smt_mask(cpu)),
102 			 "Not all scan chunks were executed. Maximum forward progress retries exceeded");
103 	} else {
104 		dev_info(dev, "CPU(s) %*pbl: SCAN unknown status %llx\n",
105 			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
106 	}
107 }
108 
message_fail(struct device * dev,int cpu,union ifs_status status)109 static void message_fail(struct device *dev, int cpu, union ifs_status status)
110 {
111 	struct ifs_data *ifsd = ifs_get_data(dev);
112 
113 	/*
114 	 * signature_error is set when the output from the scan chains does not
115 	 * match the expected signature. This might be a transient problem (e.g.
116 	 * due to a bit flip from an alpha particle or neutron). If the problem
117 	 * repeats on a subsequent test, then it indicates an actual problem in
118 	 * the core being tested.
119 	 */
120 	if (status.signature_error) {
121 		dev_err(dev, "CPU(s) %*pbl: test signature incorrect. Batch: %02x version: 0x%x\n",
122 			cpumask_pr_args(cpu_smt_mask(cpu)), ifsd->cur_batch, ifsd->loaded_version);
123 	}
124 }
125 
can_restart(union ifs_status status)126 static bool can_restart(union ifs_status status)
127 {
128 	enum ifs_status_err_code err_code = status.error_code;
129 
130 	/* Signature for chunk is bad, or scan test failed */
131 	if (status.signature_error || status.control_error)
132 		return false;
133 
134 	switch (err_code) {
135 	case IFS_NO_ERROR:
136 	case IFS_OTHER_THREAD_COULD_NOT_JOIN:
137 	case IFS_INTERRUPTED_BEFORE_RENDEZVOUS:
138 	case IFS_POWER_MGMT_INADEQUATE_FOR_SCAN:
139 	case IFS_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
140 	case IFS_INTERRUPTED_DURING_EXECUTION:
141 		return true;
142 	case IFS_INVALID_CHUNK_RANGE:
143 	case IFS_MISMATCH_ARGUMENTS_BETWEEN_THREADS:
144 	case IFS_CORE_NOT_CAPABLE_CURRENTLY:
145 	case IFS_UNASSIGNED_ERROR_CODE:
146 	case IFS_UNASSIGNED_ERROR_CODE_0xA:
147 	case IFS_CORRUPTED_CHUNK:
148 		break;
149 	}
150 	return false;
151 }
152 
153 #define SPINUNIT 100 /* 100 nsec */
154 static atomic_t array_cpus_in;
155 static atomic_t scan_cpus_in;
156 static atomic_t sbaf_cpus_in;
157 
158 /*
159  * Simplified cpu sibling rendezvous loop based on microcode loader __wait_for_cpus()
160  */
wait_for_sibling_cpu(atomic_t * t,long long timeout)161 static void wait_for_sibling_cpu(atomic_t *t, long long timeout)
162 {
163 	int cpu = smp_processor_id();
164 	const struct cpumask *smt_mask = cpu_smt_mask(cpu);
165 	int all_cpus = cpumask_weight(smt_mask);
166 
167 	atomic_inc(t);
168 	while (atomic_read(t) < all_cpus) {
169 		if (timeout < SPINUNIT)
170 			return;
171 		ndelay(SPINUNIT);
172 		timeout -= SPINUNIT;
173 		touch_nmi_watchdog();
174 	}
175 }
176 
177 /*
178  * Execute the scan. Called "simultaneously" on all threads of a core
179  * at high priority using the stop_cpus mechanism.
180  */
doscan(void * data)181 static int doscan(void *data)
182 {
183 	int cpu = smp_processor_id(), start, stop;
184 	struct run_params *params = data;
185 	union ifs_status status;
186 	struct ifs_data *ifsd;
187 	int first;
188 
189 	ifsd = params->ifsd;
190 
191 	if (ifsd->generation) {
192 		start = params->activate->gen2.start;
193 		stop = params->activate->gen2.stop;
194 	} else {
195 		start = params->activate->gen0.start;
196 		stop = params->activate->gen0.stop;
197 	}
198 
199 	/* Only the first logical CPU on a core reports result */
200 	first = cpumask_first(cpu_smt_mask(cpu));
201 
202 	wait_for_sibling_cpu(&scan_cpus_in, NSEC_PER_SEC);
203 
204 	/*
205 	 * This WRMSR will wait for other HT threads to also write
206 	 * to this MSR (at most for activate.delay cycles). Then it
207 	 * starts scan of each requested chunk. The core scan happens
208 	 * during the "execution" of the WRMSR. This instruction can
209 	 * take up to 200 milliseconds (in the case where all chunks
210 	 * are processed in a single pass) before it retires.
211 	 */
212 	wrmsrl(MSR_ACTIVATE_SCAN, params->activate->data);
213 	rdmsrl(MSR_SCAN_STATUS, status.data);
214 
215 	trace_ifs_status(ifsd->cur_batch, start, stop, status.data);
216 
217 	/* Pass back the result of the scan */
218 	if (cpu == first)
219 		params->status = status;
220 
221 	return 0;
222 }
223 
224 /*
225  * Use stop_core_cpuslocked() to synchronize writing to MSR_ACTIVATE_SCAN
226  * on all threads of the core to be tested. Loop if necessary to complete
227  * run of all chunks. Include some defensive tests to make sure forward
228  * progress is made, and that the whole test completes in a reasonable time.
229  */
ifs_test_core(int cpu,struct device * dev)230 static void ifs_test_core(int cpu, struct device *dev)
231 {
232 	union ifs_status status = {};
233 	union ifs_scan activate;
234 	unsigned long timeout;
235 	struct ifs_data *ifsd;
236 	int to_start, to_stop;
237 	int status_chunk;
238 	struct run_params params;
239 	int retries;
240 
241 	ifsd = ifs_get_data(dev);
242 
243 	activate.gen0.rsvd = 0;
244 	activate.delay = IFS_THREAD_WAIT;
245 	activate.sigmce = 0;
246 	to_start = 0;
247 	to_stop = ifsd->valid_chunks - 1;
248 
249 	params.ifsd = ifs_get_data(dev);
250 
251 	if (ifsd->generation) {
252 		activate.gen2.start = to_start;
253 		activate.gen2.stop = to_stop;
254 	} else {
255 		activate.gen0.start = to_start;
256 		activate.gen0.stop = to_stop;
257 	}
258 
259 	timeout = jiffies + HZ / 2;
260 	retries = MAX_IFS_RETRIES;
261 
262 	while (to_start <= to_stop) {
263 		if (time_after(jiffies, timeout)) {
264 			status.error_code = IFS_SW_TIMEOUT;
265 			break;
266 		}
267 
268 		params.activate = &activate;
269 		atomic_set(&scan_cpus_in, 0);
270 		stop_core_cpuslocked(cpu, doscan, &params);
271 
272 		status = params.status;
273 
274 		/* Some cases can be retried, give up for others */
275 		if (!can_restart(status))
276 			break;
277 
278 		status_chunk = ifsd->generation ? status.gen2.chunk_num : status.gen0.chunk_num;
279 		if (status_chunk == to_start) {
280 			/* Check for forward progress */
281 			if (--retries == 0) {
282 				if (status.error_code == IFS_NO_ERROR)
283 					status.error_code = IFS_SW_PARTIAL_COMPLETION;
284 				break;
285 			}
286 		} else {
287 			retries = MAX_IFS_RETRIES;
288 			if (ifsd->generation)
289 				activate.gen2.start = status_chunk;
290 			else
291 				activate.gen0.start = status_chunk;
292 			to_start = status_chunk;
293 		}
294 	}
295 
296 	/* Update status for this core */
297 	ifsd->scan_details = status.data;
298 
299 	if (status.signature_error) {
300 		ifsd->status = SCAN_TEST_FAIL;
301 		message_fail(dev, cpu, status);
302 	} else if (status.control_error || status.error_code) {
303 		ifsd->status = SCAN_NOT_TESTED;
304 		message_not_tested(dev, cpu, status);
305 	} else {
306 		ifsd->status = SCAN_TEST_PASS;
307 	}
308 }
309 
do_array_test(void * data)310 static int do_array_test(void *data)
311 {
312 	union ifs_array *command = data;
313 	int cpu = smp_processor_id();
314 	int first;
315 
316 	wait_for_sibling_cpu(&array_cpus_in, NSEC_PER_SEC);
317 
318 	/*
319 	 * Only one logical CPU on a core needs to trigger the Array test via MSR write.
320 	 */
321 	first = cpumask_first(cpu_smt_mask(cpu));
322 
323 	if (cpu == first) {
324 		wrmsrl(MSR_ARRAY_BIST, command->data);
325 		/* Pass back the result of the test */
326 		rdmsrl(MSR_ARRAY_BIST, command->data);
327 	}
328 
329 	return 0;
330 }
331 
ifs_array_test_core(int cpu,struct device * dev)332 static void ifs_array_test_core(int cpu, struct device *dev)
333 {
334 	union ifs_array command = {};
335 	bool timed_out = false;
336 	struct ifs_data *ifsd;
337 	unsigned long timeout;
338 
339 	ifsd = ifs_get_data(dev);
340 
341 	command.array_bitmask = ~0U;
342 	timeout = jiffies + HZ / 2;
343 
344 	do {
345 		if (time_after(jiffies, timeout)) {
346 			timed_out = true;
347 			break;
348 		}
349 		atomic_set(&array_cpus_in, 0);
350 		stop_core_cpuslocked(cpu, do_array_test, &command);
351 
352 		if (command.ctrl_result)
353 			break;
354 	} while (command.array_bitmask);
355 
356 	ifsd->scan_details = command.data;
357 
358 	if (command.ctrl_result)
359 		ifsd->status = SCAN_TEST_FAIL;
360 	else if (timed_out || command.array_bitmask)
361 		ifsd->status = SCAN_NOT_TESTED;
362 	else
363 		ifsd->status = SCAN_TEST_PASS;
364 }
365 
366 #define ARRAY_GEN1_TEST_ALL_ARRAYS	0x0ULL
367 #define ARRAY_GEN1_STATUS_FAIL		0x1ULL
368 
do_array_test_gen1(void * status)369 static int do_array_test_gen1(void *status)
370 {
371 	int cpu = smp_processor_id();
372 	int first;
373 
374 	first = cpumask_first(cpu_smt_mask(cpu));
375 
376 	if (cpu == first) {
377 		wrmsrl(MSR_ARRAY_TRIGGER, ARRAY_GEN1_TEST_ALL_ARRAYS);
378 		rdmsrl(MSR_ARRAY_STATUS, *((u64 *)status));
379 	}
380 
381 	return 0;
382 }
383 
ifs_array_test_gen1(int cpu,struct device * dev)384 static void ifs_array_test_gen1(int cpu, struct device *dev)
385 {
386 	struct ifs_data *ifsd = ifs_get_data(dev);
387 	u64 status = 0;
388 
389 	stop_core_cpuslocked(cpu, do_array_test_gen1, &status);
390 	ifsd->scan_details = status;
391 
392 	if (status & ARRAY_GEN1_STATUS_FAIL)
393 		ifsd->status = SCAN_TEST_FAIL;
394 	else
395 		ifsd->status = SCAN_TEST_PASS;
396 }
397 
398 #define SBAF_STATUS_PASS			0
399 #define SBAF_STATUS_SIGN_FAIL			1
400 #define SBAF_STATUS_INTR			2
401 #define SBAF_STATUS_TEST_FAIL			3
402 
403 enum sbaf_status_err_code {
404 	IFS_SBAF_NO_ERROR				= 0,
405 	IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN		= 1,
406 	IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS		= 2,
407 	IFS_SBAF_UNASSIGNED_ERROR_CODE3			= 3,
408 	IFS_SBAF_INVALID_BUNDLE_INDEX			= 4,
409 	IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS		= 5,
410 	IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY		= 6,
411 	IFS_SBAF_UNASSIGNED_ERROR_CODE7			= 7,
412 	IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT	= 8,
413 	IFS_SBAF_INTERRUPTED_DURING_EXECUTION		= 9,
414 	IFS_SBAF_INVALID_PROGRAM_INDEX			= 0xA,
415 	IFS_SBAF_CORRUPTED_CHUNK			= 0xB,
416 	IFS_SBAF_DID_NOT_START				= 0xC,
417 };
418 
419 static const char * const sbaf_test_status[] = {
420 	[IFS_SBAF_NO_ERROR] = "SBAF no error",
421 	[IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN] = "Other thread could not join.",
422 	[IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS] = "Interrupt occurred prior to SBAF coordination.",
423 	[IFS_SBAF_UNASSIGNED_ERROR_CODE3] = "Unassigned error code 0x3",
424 	[IFS_SBAF_INVALID_BUNDLE_INDEX] = "Non-valid sbaf bundles. Reload test image",
425 	[IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS] = "Mismatch in arguments between threads T0/T1.",
426 	[IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY] = "Core not capable of performing SBAF currently",
427 	[IFS_SBAF_UNASSIGNED_ERROR_CODE7] = "Unassigned error code 0x7",
428 	[IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT] = "Exceeded number of Logical Processors (LP) allowed to run Scan-At-Field concurrently",
429 	[IFS_SBAF_INTERRUPTED_DURING_EXECUTION] = "Interrupt occurred prior to SBAF start",
430 	[IFS_SBAF_INVALID_PROGRAM_INDEX] = "SBAF program index not valid",
431 	[IFS_SBAF_CORRUPTED_CHUNK] = "SBAF operation aborted due to corrupted chunk",
432 	[IFS_SBAF_DID_NOT_START] = "SBAF operation did not start",
433 };
434 
sbaf_message_not_tested(struct device * dev,int cpu,u64 status_data)435 static void sbaf_message_not_tested(struct device *dev, int cpu, u64 status_data)
436 {
437 	union ifs_sbaf_status status = (union ifs_sbaf_status)status_data;
438 
439 	if (status.error_code < ARRAY_SIZE(sbaf_test_status)) {
440 		dev_info(dev, "CPU(s) %*pbl: SBAF operation did not start. %s\n",
441 			 cpumask_pr_args(cpu_smt_mask(cpu)),
442 			 sbaf_test_status[status.error_code]);
443 	} else if (status.error_code == IFS_SW_TIMEOUT) {
444 		dev_info(dev, "CPU(s) %*pbl: software timeout during scan\n",
445 			 cpumask_pr_args(cpu_smt_mask(cpu)));
446 	} else if (status.error_code == IFS_SW_PARTIAL_COMPLETION) {
447 		dev_info(dev, "CPU(s) %*pbl: %s\n",
448 			 cpumask_pr_args(cpu_smt_mask(cpu)),
449 			 "Not all SBAF bundles executed. Maximum forward progress retries exceeded");
450 	} else {
451 		dev_info(dev, "CPU(s) %*pbl: SBAF unknown status %llx\n",
452 			 cpumask_pr_args(cpu_smt_mask(cpu)), status.data);
453 	}
454 }
455 
sbaf_message_fail(struct device * dev,int cpu,union ifs_sbaf_status status)456 static void sbaf_message_fail(struct device *dev, int cpu, union ifs_sbaf_status status)
457 {
458 	/* Failed signature check is set when SBAF signature did not match the expected value */
459 	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL) {
460 		dev_err(dev, "CPU(s) %*pbl: Failed signature check\n",
461 			cpumask_pr_args(cpu_smt_mask(cpu)));
462 	}
463 
464 	/* Failed to reach end of test */
465 	if (status.sbaf_status == SBAF_STATUS_TEST_FAIL) {
466 		dev_err(dev, "CPU(s) %*pbl: Failed to complete test\n",
467 			cpumask_pr_args(cpu_smt_mask(cpu)));
468 	}
469 }
470 
sbaf_bundle_completed(union ifs_sbaf_status status)471 static bool sbaf_bundle_completed(union ifs_sbaf_status status)
472 {
473 	return !(status.sbaf_status || status.error_code);
474 }
475 
sbaf_can_restart(union ifs_sbaf_status status)476 static bool sbaf_can_restart(union ifs_sbaf_status status)
477 {
478 	enum sbaf_status_err_code err_code = status.error_code;
479 
480 	/* Signature for chunk is bad, or scan test failed */
481 	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL ||
482 	    status.sbaf_status == SBAF_STATUS_TEST_FAIL)
483 		return false;
484 
485 	switch (err_code) {
486 	case IFS_SBAF_NO_ERROR:
487 	case IFS_SBAF_OTHER_THREAD_COULD_NOT_JOIN:
488 	case IFS_SBAF_INTERRUPTED_BEFORE_RENDEZVOUS:
489 	case IFS_SBAF_EXCEED_NUMBER_OF_THREADS_CONCURRENT:
490 	case IFS_SBAF_INTERRUPTED_DURING_EXECUTION:
491 		return true;
492 	case IFS_SBAF_UNASSIGNED_ERROR_CODE3:
493 	case IFS_SBAF_INVALID_BUNDLE_INDEX:
494 	case IFS_SBAF_MISMATCH_ARGS_BETWEEN_THREADS:
495 	case IFS_SBAF_CORE_NOT_CAPABLE_CURRENTLY:
496 	case IFS_SBAF_UNASSIGNED_ERROR_CODE7:
497 	case IFS_SBAF_INVALID_PROGRAM_INDEX:
498 	case IFS_SBAF_CORRUPTED_CHUNK:
499 	case IFS_SBAF_DID_NOT_START:
500 		break;
501 	}
502 	return false;
503 }
504 
505 /*
506  * Execute the SBAF test. Called "simultaneously" on all threads of a core
507  * at high priority using the stop_cpus mechanism.
508  */
dosbaf(void * data)509 static int dosbaf(void *data)
510 {
511 	struct sbaf_run_params *run_params = data;
512 	int cpu = smp_processor_id();
513 	union ifs_sbaf_status status;
514 	struct ifs_data *ifsd;
515 	int first;
516 
517 	ifsd = run_params->ifsd;
518 
519 	/* Only the first logical CPU on a core reports result */
520 	first = cpumask_first(cpu_smt_mask(cpu));
521 	wait_for_sibling_cpu(&sbaf_cpus_in, NSEC_PER_SEC);
522 
523 	/*
524 	 * This WRMSR will wait for other HT threads to also write
525 	 * to this MSR (at most for activate.delay cycles). Then it
526 	 * starts scan of each requested bundle. The core test happens
527 	 * during the "execution" of the WRMSR.
528 	 */
529 	wrmsrl(MSR_ACTIVATE_SBAF, run_params->activate->data);
530 	rdmsrl(MSR_SBAF_STATUS, status.data);
531 	trace_ifs_sbaf(ifsd->cur_batch, *run_params->activate, status);
532 
533 	/* Pass back the result of the test */
534 	if (cpu == first)
535 		run_params->status = status;
536 
537 	return 0;
538 }
539 
ifs_sbaf_test_core(int cpu,struct device * dev)540 static void ifs_sbaf_test_core(int cpu, struct device *dev)
541 {
542 	struct sbaf_run_params run_params;
543 	union ifs_sbaf_status status = {};
544 	union ifs_sbaf activate;
545 	unsigned long timeout;
546 	struct ifs_data *ifsd;
547 	int stop_bundle;
548 	int retries;
549 
550 	ifsd = ifs_get_data(dev);
551 
552 	activate.data = 0;
553 	activate.delay = IFS_THREAD_WAIT;
554 
555 	timeout = jiffies + 2 * HZ;
556 	retries = MAX_IFS_RETRIES;
557 	activate.bundle_idx = 0;
558 	stop_bundle = ifsd->max_bundle;
559 
560 	while (activate.bundle_idx <= stop_bundle) {
561 		if (time_after(jiffies, timeout)) {
562 			status.error_code = IFS_SW_TIMEOUT;
563 			break;
564 		}
565 
566 		atomic_set(&sbaf_cpus_in, 0);
567 
568 		run_params.ifsd = ifsd;
569 		run_params.activate = &activate;
570 		run_params.retry_cnt = &retries;
571 		stop_core_cpuslocked(cpu, dosbaf, &run_params);
572 
573 		status = run_params.status;
574 
575 		if (sbaf_bundle_completed(status)) {
576 			activate.bundle_idx = status.bundle_idx + 1;
577 			activate.pgm_idx = 0;
578 			retries = MAX_IFS_RETRIES;
579 			continue;
580 		}
581 
582 		/* Some cases can be retried, give up for others */
583 		if (!sbaf_can_restart(status))
584 			break;
585 
586 		if (status.pgm_idx == activate.pgm_idx) {
587 			/* If no progress retry */
588 			if (--retries == 0) {
589 				if (status.error_code == IFS_NO_ERROR)
590 					status.error_code = IFS_SW_PARTIAL_COMPLETION;
591 				break;
592 			}
593 		} else {
594 			/* if some progress, more pgms remaining in bundle, reset retries */
595 			retries = MAX_IFS_RETRIES;
596 			activate.bundle_idx = status.bundle_idx;
597 			activate.pgm_idx = status.pgm_idx;
598 		}
599 	}
600 
601 	/* Update status for this core */
602 	ifsd->scan_details = status.data;
603 
604 	if (status.sbaf_status == SBAF_STATUS_SIGN_FAIL ||
605 	    status.sbaf_status == SBAF_STATUS_TEST_FAIL) {
606 		ifsd->status = SCAN_TEST_FAIL;
607 		sbaf_message_fail(dev, cpu, status);
608 	} else if (status.error_code || status.sbaf_status == SBAF_STATUS_INTR ||
609 		   (activate.bundle_idx < stop_bundle)) {
610 		ifsd->status = SCAN_NOT_TESTED;
611 		sbaf_message_not_tested(dev, cpu, status.data);
612 	} else {
613 		ifsd->status = SCAN_TEST_PASS;
614 	}
615 }
616 
617 /*
618  * Initiate per core test. It wakes up work queue threads on the target cpu and
619  * its sibling cpu. Once all sibling threads wake up, the scan test gets executed and
620  * wait for all sibling threads to finish the scan test.
621  */
do_core_test(int cpu,struct device * dev)622 int do_core_test(int cpu, struct device *dev)
623 {
624 	const struct ifs_test_caps *test = ifs_get_test_caps(dev);
625 	struct ifs_data *ifsd = ifs_get_data(dev);
626 	int ret = 0;
627 
628 	/* Prevent CPUs from being taken offline during the scan test */
629 	cpus_read_lock();
630 
631 	if (!cpu_online(cpu)) {
632 		dev_info(dev, "cannot test on the offline cpu %d\n", cpu);
633 		ret = -EINVAL;
634 		goto out;
635 	}
636 
637 	switch (test->test_num) {
638 	case IFS_TYPE_SAF:
639 		if (!ifsd->loaded)
640 			ret = -EPERM;
641 		else
642 			ifs_test_core(cpu, dev);
643 		break;
644 	case IFS_TYPE_ARRAY_BIST:
645 		if (ifsd->array_gen == ARRAY_GEN0)
646 			ifs_array_test_core(cpu, dev);
647 		else
648 			ifs_array_test_gen1(cpu, dev);
649 		break;
650 	case IFS_TYPE_SBAF:
651 		if (!ifsd->loaded)
652 			ret = -EPERM;
653 		else
654 			ifs_sbaf_test_core(cpu, dev);
655 		break;
656 	default:
657 		ret = -EINVAL;
658 	}
659 out:
660 	cpus_read_unlock();
661 	return ret;
662 }
663