1 // SPDX-License-Identifier: MIT
2 /*
3 * Copyright © 2022 Intel Corporation
4 */
5
6 #include "gt/intel_gt_print.h"
7 #include "selftests/igt_spinner.h"
8 #include "selftests/igt_reset.h"
9 #include "selftests/intel_scheduler_helpers.h"
10 #include "gt/intel_engine_heartbeat.h"
11 #include "gem/selftests/mock_context.h"
12
13 #define BEAT_INTERVAL 100
14
nop_request(struct intel_engine_cs * engine)15 static struct i915_request *nop_request(struct intel_engine_cs *engine)
16 {
17 struct i915_request *rq;
18
19 rq = intel_engine_create_kernel_request(engine);
20 if (IS_ERR(rq))
21 return rq;
22
23 i915_request_get(rq);
24 i915_request_add(rq);
25
26 return rq;
27 }
28
intel_hang_guc(void * arg)29 static int intel_hang_guc(void *arg)
30 {
31 struct intel_gt *gt = arg;
32 int ret = 0;
33 struct i915_gem_context *ctx;
34 struct intel_context *ce;
35 struct igt_spinner spin;
36 struct i915_request *rq;
37 intel_wakeref_t wakeref;
38 struct i915_gpu_error *global = >->i915->gpu_error;
39 struct intel_engine_cs *engine = intel_selftest_find_any_engine(gt);
40 unsigned int reset_count;
41 u32 guc_status;
42 u32 old_beat;
43
44 if (!engine)
45 return 0;
46
47 ctx = kernel_context(gt->i915, NULL);
48 if (IS_ERR(ctx)) {
49 gt_err(gt, "Failed get kernel context: %pe\n", ctx);
50 return PTR_ERR(ctx);
51 }
52
53 wakeref = intel_runtime_pm_get(gt->uncore->rpm);
54
55 ce = intel_context_create(engine);
56 if (IS_ERR(ce)) {
57 ret = PTR_ERR(ce);
58 gt_err(gt, "Failed to create spinner request: %pe\n", ce);
59 goto err;
60 }
61
62 reset_count = i915_reset_count(global);
63
64 old_beat = engine->props.heartbeat_interval_ms;
65 ret = intel_engine_set_heartbeat(engine, BEAT_INTERVAL);
66 if (ret) {
67 gt_err(gt, "Failed to boost heatbeat interval: %pe\n", ERR_PTR(ret));
68 goto err;
69 }
70
71 ret = igt_spinner_init(&spin, engine->gt);
72 if (ret) {
73 gt_err(gt, "Failed to create spinner: %pe\n", ERR_PTR(ret));
74 goto err;
75 }
76
77 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK);
78 intel_context_put(ce);
79 if (IS_ERR(rq)) {
80 ret = PTR_ERR(rq);
81 gt_err(gt, "Failed to create spinner request: %pe\n", rq);
82 goto err_spin;
83 }
84
85 ret = request_add_spin(rq, &spin);
86 if (ret) {
87 i915_request_put(rq);
88 gt_err(gt, "Failed to add Spinner request: %pe\n", ERR_PTR(ret));
89 goto err_spin;
90 }
91
92 ret = intel_reset_guc(gt);
93 if (ret) {
94 i915_request_put(rq);
95 gt_err(gt, "Failed to reset GuC: %pe\n", ERR_PTR(ret));
96 goto err_spin;
97 }
98
99 guc_status = intel_uncore_read(gt->uncore, GUC_STATUS);
100 if (!(guc_status & GS_MIA_IN_RESET)) {
101 i915_request_put(rq);
102 gt_err(gt, "Failed to reset GuC: status = 0x%08X\n", guc_status);
103 ret = -EIO;
104 goto err_spin;
105 }
106
107 /* Wait for the heartbeat to cause a reset */
108 ret = intel_selftest_wait_for_rq(rq);
109 i915_request_put(rq);
110 if (ret) {
111 gt_err(gt, "Request failed to complete: %pe\n", ERR_PTR(ret));
112 goto err_spin;
113 }
114
115 if (i915_reset_count(global) == reset_count) {
116 gt_err(gt, "Failed to record a GPU reset\n");
117 ret = -EINVAL;
118 goto err_spin;
119 }
120
121 err_spin:
122 igt_spinner_end(&spin);
123 igt_spinner_fini(&spin);
124 intel_engine_set_heartbeat(engine, old_beat);
125
126 if (ret == 0) {
127 rq = nop_request(engine);
128 if (IS_ERR(rq)) {
129 ret = PTR_ERR(rq);
130 goto err;
131 }
132
133 ret = intel_selftest_wait_for_rq(rq);
134 i915_request_put(rq);
135 if (ret) {
136 gt_err(gt, "No-op failed to complete: %pe\n", ERR_PTR(ret));
137 goto err;
138 }
139 }
140
141 err:
142 intel_runtime_pm_put(gt->uncore->rpm, wakeref);
143 kernel_context_close(ctx);
144
145 return ret;
146 }
147
intel_guc_hang_check(struct drm_i915_private * i915)148 int intel_guc_hang_check(struct drm_i915_private *i915)
149 {
150 static const struct i915_subtest tests[] = {
151 SUBTEST(intel_hang_guc),
152 };
153 struct intel_gt *gt = to_gt(i915);
154
155 if (intel_gt_is_wedged(gt))
156 return 0;
157
158 if (!intel_uc_uses_guc_submission(>->uc))
159 return 0;
160
161 return intel_gt_live_subtests(tests, gt);
162 }
163