1 /*
2 * Copyright 2021 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 */
22 #include "runl.h"
23 #include "cgrp.h"
24 #include "chan.h"
25 #include "chid.h"
26 #include "priv.h"
27 #include "runq.h"
28
29 #include <core/gpuobj.h>
30 #include <subdev/timer.h>
31 #include <subdev/top.h>
32
33 static struct nvkm_cgrp *
nvkm_engn_cgrp_get(struct nvkm_engn * engn,unsigned long * pirqflags)34 nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags)
35 {
36 struct nvkm_cgrp *cgrp = NULL;
37 struct nvkm_chan *chan;
38 bool cgid;
39 int id;
40
41 id = engn->func->cxid(engn, &cgid);
42 if (id < 0)
43 return NULL;
44
45 if (!cgid) {
46 chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags);
47 if (chan)
48 cgrp = chan->cgrp;
49 } else {
50 cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags);
51 }
52
53 WARN_ON(!cgrp);
54 return cgrp;
55 }
56
57 static void
nvkm_runl_rc(struct nvkm_runl * runl)58 nvkm_runl_rc(struct nvkm_runl *runl)
59 {
60 struct nvkm_fifo *fifo = runl->fifo;
61 struct nvkm_cgrp *cgrp, *gtmp;
62 struct nvkm_chan *chan, *ctmp;
63 struct nvkm_engn *engn;
64 unsigned long flags;
65 int rc, state, i;
66 bool reset;
67
68 /* Runlist is blocked before scheduling recovery - fetch count. */
69 BUG_ON(!mutex_is_locked(&runl->mutex));
70 rc = atomic_xchg(&runl->rc_pending, 0);
71 if (!rc)
72 return;
73
74 /* Look for channel groups flagged for RC. */
75 nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) {
76 state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING);
77 if (state == NVKM_CGRP_RC_PENDING) {
78 /* Disable all channels in them, and remove from runlist. */
79 nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp) {
80 nvkm_chan_error(chan, false);
81 nvkm_chan_remove_locked(chan);
82 }
83 }
84 }
85
86 /* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */
87 if (runl->func->preempt) {
88 for (i = 0; i < runl->runq_nr; i++) {
89 struct nvkm_runq *runq = runl->runq[i];
90
91 if (runq) {
92 nvkm_msec(fifo->engine.subdev.device, 2000,
93 if (runq->func->idle(runq))
94 break;
95 );
96 }
97 }
98 }
99
100 /* Look for engines that are still on flagged channel groups - reset them. */
101 nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) {
102 cgrp = nvkm_engn_cgrp_get(engn, &flags);
103 if (!cgrp) {
104 ENGN_DEBUG(engn, "cxid not valid");
105 continue;
106 }
107
108 reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING;
109 nvkm_cgrp_put(&cgrp, flags);
110 if (!reset) {
111 ENGN_DEBUG(engn, "cxid not in recovery");
112 continue;
113 }
114
115 ENGN_DEBUG(engn, "resetting...");
116 /*TODO: can we do something less of a potential catastrophe on failure? */
117 WARN_ON(nvkm_engine_reset(engn->engine));
118 }
119
120 /* Submit runlist update, and clear any remaining exception state. */
121 runl->func->update(runl);
122 if (runl->func->fault_clear)
123 runl->func->fault_clear(runl);
124
125 /* Unblock runlist processing. */
126 while (rc--)
127 nvkm_runl_allow(runl);
128 runl->func->wait(runl);
129 }
130
131 static void
nvkm_runl_rc_runl(struct nvkm_runl * runl)132 nvkm_runl_rc_runl(struct nvkm_runl *runl)
133 {
134 RUNL_ERROR(runl, "rc scheduled");
135
136 nvkm_runl_block(runl);
137 if (runl->func->preempt)
138 runl->func->preempt(runl);
139
140 atomic_inc(&runl->rc_pending);
141 schedule_work(&runl->work);
142 }
143
144 void
nvkm_runl_rc_cgrp(struct nvkm_cgrp * cgrp)145 nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp)
146 {
147 if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE)
148 return;
149
150 CGRP_ERROR(cgrp, "rc scheduled");
151 nvkm_runl_rc_runl(cgrp->runl);
152 }
153
154 void
nvkm_runl_rc_engn(struct nvkm_runl * runl,struct nvkm_engn * engn)155 nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn)
156 {
157 struct nvkm_cgrp *cgrp;
158 unsigned long flags;
159
160 /* Lookup channel group currently on engine. */
161 cgrp = nvkm_engn_cgrp_get(engn, &flags);
162 if (!cgrp) {
163 ENGN_DEBUG(engn, "rc skipped, not on channel");
164 return;
165 }
166
167 nvkm_runl_rc_cgrp(cgrp);
168 nvkm_cgrp_put(&cgrp, flags);
169 }
170
171 static void
nvkm_runl_work(struct work_struct * work)172 nvkm_runl_work(struct work_struct *work)
173 {
174 struct nvkm_runl *runl = container_of(work, typeof(*runl), work);
175
176 mutex_lock(&runl->mutex);
177 nvkm_runl_rc(runl);
178 mutex_unlock(&runl->mutex);
179
180 }
181
182 struct nvkm_chan *
nvkm_runl_chan_get_inst(struct nvkm_runl * runl,u64 inst,unsigned long * pirqflags)183 nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags)
184 {
185 struct nvkm_chid *chid = runl->chid;
186 struct nvkm_chan *chan;
187 unsigned long flags;
188 int id;
189
190 spin_lock_irqsave(&chid->lock, flags);
191 for_each_set_bit(id, chid->used, chid->nr) {
192 chan = chid->data[id];
193 if (likely(chan)) {
194 if (chan->inst->addr == inst) {
195 spin_lock(&chan->cgrp->lock);
196 *pirqflags = flags;
197 spin_unlock(&chid->lock);
198 return chan;
199 }
200 }
201 }
202 spin_unlock_irqrestore(&chid->lock, flags);
203 return NULL;
204 }
205
206 struct nvkm_chan *
nvkm_runl_chan_get_chid(struct nvkm_runl * runl,int id,unsigned long * pirqflags)207 nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
208 {
209 struct nvkm_chid *chid = runl->chid;
210 struct nvkm_chan *chan;
211 unsigned long flags;
212
213 spin_lock_irqsave(&chid->lock, flags);
214 if (!WARN_ON(id >= chid->nr)) {
215 chan = chid->data[id];
216 if (likely(chan)) {
217 spin_lock(&chan->cgrp->lock);
218 *pirqflags = flags;
219 spin_unlock(&chid->lock);
220 return chan;
221 }
222 }
223 spin_unlock_irqrestore(&chid->lock, flags);
224 return NULL;
225 }
226
227 struct nvkm_cgrp *
nvkm_runl_cgrp_get_cgid(struct nvkm_runl * runl,int id,unsigned long * pirqflags)228 nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
229 {
230 struct nvkm_chid *cgid = runl->cgid;
231 struct nvkm_cgrp *cgrp;
232 unsigned long flags;
233
234 spin_lock_irqsave(&cgid->lock, flags);
235 if (!WARN_ON(id >= cgid->nr)) {
236 cgrp = cgid->data[id];
237 if (likely(cgrp)) {
238 spin_lock(&cgrp->lock);
239 *pirqflags = flags;
240 spin_unlock(&cgid->lock);
241 return cgrp;
242 }
243 }
244 spin_unlock_irqrestore(&cgid->lock, flags);
245 return NULL;
246 }
247
248 int
nvkm_runl_preempt_wait(struct nvkm_runl * runl)249 nvkm_runl_preempt_wait(struct nvkm_runl *runl)
250 {
251 return nvkm_msec(runl->fifo->engine.subdev.device, runl->fifo->timeout.chan_msec,
252 if (!runl->func->preempt_pending(runl))
253 break;
254
255 nvkm_runl_rc(runl);
256 usleep_range(1, 2);
257 ) < 0 ? -ETIMEDOUT : 0;
258 }
259
260 bool
nvkm_runl_update_pending(struct nvkm_runl * runl)261 nvkm_runl_update_pending(struct nvkm_runl *runl)
262 {
263 if (!runl->func->pending(runl))
264 return false;
265
266 nvkm_runl_rc(runl);
267 return true;
268 }
269
270 void
nvkm_runl_update_locked(struct nvkm_runl * runl,bool wait)271 nvkm_runl_update_locked(struct nvkm_runl *runl, bool wait)
272 {
273 if (atomic_xchg(&runl->changed, 0) && runl->func->update) {
274 runl->func->update(runl);
275 if (wait)
276 runl->func->wait(runl);
277 }
278 }
279
280 void
nvkm_runl_allow(struct nvkm_runl * runl)281 nvkm_runl_allow(struct nvkm_runl *runl)
282 {
283 struct nvkm_fifo *fifo = runl->fifo;
284 unsigned long flags;
285
286 spin_lock_irqsave(&fifo->lock, flags);
287 if (!--runl->blocked) {
288 RUNL_TRACE(runl, "running");
289 runl->func->allow(runl, ~0);
290 }
291 spin_unlock_irqrestore(&fifo->lock, flags);
292 }
293
294 void
nvkm_runl_block(struct nvkm_runl * runl)295 nvkm_runl_block(struct nvkm_runl *runl)
296 {
297 struct nvkm_fifo *fifo = runl->fifo;
298 unsigned long flags;
299
300 spin_lock_irqsave(&fifo->lock, flags);
301 if (!runl->blocked++) {
302 RUNL_TRACE(runl, "stopped");
303 runl->func->block(runl, ~0);
304 }
305 spin_unlock_irqrestore(&fifo->lock, flags);
306 }
307
308 void
nvkm_runl_fini(struct nvkm_runl * runl)309 nvkm_runl_fini(struct nvkm_runl *runl)
310 {
311 if (runl->func->fini)
312 runl->func->fini(runl);
313
314 flush_work(&runl->work);
315 }
316
317 void
nvkm_runl_del(struct nvkm_runl * runl)318 nvkm_runl_del(struct nvkm_runl *runl)
319 {
320 struct nvkm_engn *engn, *engt;
321
322 nvkm_memory_unref(&runl->mem);
323
324 list_for_each_entry_safe(engn, engt, &runl->engns, head) {
325 list_del(&engn->head);
326 kfree(engn);
327 }
328
329 nvkm_chid_unref(&runl->chid);
330 nvkm_chid_unref(&runl->cgid);
331
332 list_del(&runl->head);
333 mutex_destroy(&runl->mutex);
334 kfree(runl);
335 }
336
337 struct nvkm_engn *
nvkm_runl_add(struct nvkm_runl * runl,int engi,const struct nvkm_engn_func * func,enum nvkm_subdev_type type,int inst)338 nvkm_runl_add(struct nvkm_runl *runl, int engi, const struct nvkm_engn_func *func,
339 enum nvkm_subdev_type type, int inst)
340 {
341 struct nvkm_fifo *fifo = runl->fifo;
342 struct nvkm_device *device = fifo->engine.subdev.device;
343 struct nvkm_engine *engine;
344 struct nvkm_engn *engn;
345
346 engine = nvkm_device_engine(device, type, inst);
347 if (!engine) {
348 RUNL_DEBUG(runl, "engn %d.%d[%s] not found", engi, inst, nvkm_subdev_type[type]);
349 return NULL;
350 }
351
352 if (!(engn = kzalloc(sizeof(*engn), GFP_KERNEL)))
353 return NULL;
354
355 engn->func = func;
356 engn->runl = runl;
357 engn->id = engi;
358 engn->engine = engine;
359 engn->fault = -1;
360 list_add_tail(&engn->head, &runl->engns);
361
362 /* Lookup MMU engine ID for fault handling. */
363 if (device->top)
364 engn->fault = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst);
365
366 if (engn->fault < 0 && fifo->func->mmu_fault) {
367 const struct nvkm_enum *map = fifo->func->mmu_fault->engine;
368
369 while (map->name) {
370 if (map->data2 == engine->subdev.type && map->inst == engine->subdev.inst) {
371 engn->fault = map->value;
372 break;
373 }
374 map++;
375 }
376 }
377
378 return engn;
379 }
380
381 struct nvkm_runl *
nvkm_runl_get(struct nvkm_fifo * fifo,int runi,u32 addr)382 nvkm_runl_get(struct nvkm_fifo *fifo, int runi, u32 addr)
383 {
384 struct nvkm_runl *runl;
385
386 nvkm_runl_foreach(runl, fifo) {
387 if ((runi >= 0 && runl->id == runi) || (runi < 0 && runl->addr == addr))
388 return runl;
389 }
390
391 return NULL;
392 }
393
394 struct nvkm_runl *
nvkm_runl_new(struct nvkm_fifo * fifo,int runi,u32 addr,int id_nr)395 nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
396 {
397 struct nvkm_subdev *subdev = &fifo->engine.subdev;
398 struct nvkm_runl *runl;
399 int ret;
400
401 if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL)))
402 return ERR_PTR(-ENOMEM);
403
404 runl->func = fifo->func->runl;
405 runl->fifo = fifo;
406 runl->id = runi;
407 runl->addr = addr;
408 INIT_LIST_HEAD(&runl->engns);
409 INIT_LIST_HEAD(&runl->cgrps);
410 atomic_set(&runl->changed, 0);
411 mutex_init(&runl->mutex);
412 INIT_WORK(&runl->work, nvkm_runl_work);
413 atomic_set(&runl->rc_triggered, 0);
414 atomic_set(&runl->rc_pending, 0);
415 list_add_tail(&runl->head, &fifo->runls);
416
417 if (!fifo->chid) {
418 if ((ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->cgid)) ||
419 (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) {
420 RUNL_ERROR(runl, "cgid/chid: %d", ret);
421 nvkm_runl_del(runl);
422 return ERR_PTR(ret);
423 }
424 } else {
425 runl->cgid = nvkm_chid_ref(fifo->cgid);
426 runl->chid = nvkm_chid_ref(fifo->chid);
427 }
428
429 return runl;
430 }
431