xref: /linux/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c (revision c0e297dc61f8d4453e07afbea1fa8d0e67cd4a34)
1 /*
2  * Copyright 2012 Red Hat Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Ben Skeggs
23  */
24 #include "nv50.h"
25 
26 #include <core/client.h>
27 #include <core/handle.h>
28 #include <engine/fifo.h>
29 #include <subdev/timer.h>
30 
31 struct nv50_gr {
32 	struct nvkm_gr base;
33 	spinlock_t lock;
34 	u32 size;
35 };
36 
37 struct nv50_gr_chan {
38 	struct nvkm_gr_chan base;
39 };
40 
41 static u64
42 nv50_gr_units(struct nvkm_gr *gr)
43 {
44 	return nv_rd32(gr, 0x1540);
45 }
46 
47 /*******************************************************************************
48  * Graphics object classes
49  ******************************************************************************/
50 
51 static int
52 nv50_gr_object_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
53 		    struct nvkm_oclass *oclass, void *data, u32 size,
54 		    struct nvkm_object **pobject)
55 {
56 	struct nvkm_gpuobj *obj;
57 	int ret;
58 
59 	ret = nvkm_gpuobj_create(parent, engine, oclass, 0, parent,
60 				 16, 16, 0, &obj);
61 	*pobject = nv_object(obj);
62 	if (ret)
63 		return ret;
64 
65 	nv_wo32(obj, 0x00, nv_mclass(obj));
66 	nv_wo32(obj, 0x04, 0x00000000);
67 	nv_wo32(obj, 0x08, 0x00000000);
68 	nv_wo32(obj, 0x0c, 0x00000000);
69 	return 0;
70 }
71 
72 static struct nvkm_ofuncs
73 nv50_gr_ofuncs = {
74 	.ctor = nv50_gr_object_ctor,
75 	.dtor = _nvkm_gpuobj_dtor,
76 	.init = _nvkm_gpuobj_init,
77 	.fini = _nvkm_gpuobj_fini,
78 	.rd32 = _nvkm_gpuobj_rd32,
79 	.wr32 = _nvkm_gpuobj_wr32,
80 };
81 
82 static struct nvkm_oclass
83 nv50_gr_sclass[] = {
84 	{ 0x0030, &nv50_gr_ofuncs },
85 	{ 0x502d, &nv50_gr_ofuncs },
86 	{ 0x5039, &nv50_gr_ofuncs },
87 	{ 0x5097, &nv50_gr_ofuncs },
88 	{ 0x50c0, &nv50_gr_ofuncs },
89 	{}
90 };
91 
92 static struct nvkm_oclass
93 g84_gr_sclass[] = {
94 	{ 0x0030, &nv50_gr_ofuncs },
95 	{ 0x502d, &nv50_gr_ofuncs },
96 	{ 0x5039, &nv50_gr_ofuncs },
97 	{ 0x50c0, &nv50_gr_ofuncs },
98 	{ 0x8297, &nv50_gr_ofuncs },
99 	{}
100 };
101 
102 static struct nvkm_oclass
103 gt200_gr_sclass[] = {
104 	{ 0x0030, &nv50_gr_ofuncs },
105 	{ 0x502d, &nv50_gr_ofuncs },
106 	{ 0x5039, &nv50_gr_ofuncs },
107 	{ 0x50c0, &nv50_gr_ofuncs },
108 	{ 0x8397, &nv50_gr_ofuncs },
109 	{}
110 };
111 
112 static struct nvkm_oclass
113 gt215_gr_sclass[] = {
114 	{ 0x0030, &nv50_gr_ofuncs },
115 	{ 0x502d, &nv50_gr_ofuncs },
116 	{ 0x5039, &nv50_gr_ofuncs },
117 	{ 0x50c0, &nv50_gr_ofuncs },
118 	{ 0x8597, &nv50_gr_ofuncs },
119 	{ 0x85c0, &nv50_gr_ofuncs },
120 	{}
121 };
122 
123 static struct nvkm_oclass
124 mcp89_gr_sclass[] = {
125 	{ 0x0030, &nv50_gr_ofuncs },
126 	{ 0x502d, &nv50_gr_ofuncs },
127 	{ 0x5039, &nv50_gr_ofuncs },
128 	{ 0x50c0, &nv50_gr_ofuncs },
129 	{ 0x85c0, &nv50_gr_ofuncs },
130 	{ 0x8697, &nv50_gr_ofuncs },
131 	{}
132 };
133 
134 /*******************************************************************************
135  * PGRAPH context
136  ******************************************************************************/
137 
138 static int
139 nv50_gr_context_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
140 		     struct nvkm_oclass *oclass, void *data, u32 size,
141 		     struct nvkm_object **pobject)
142 {
143 	struct nv50_gr *gr = (void *)engine;
144 	struct nv50_gr_chan *chan;
145 	int ret;
146 
147 	ret = nvkm_gr_context_create(parent, engine, oclass, NULL, gr->size,
148 				     0, NVOBJ_FLAG_ZERO_ALLOC, &chan);
149 	*pobject = nv_object(chan);
150 	if (ret)
151 		return ret;
152 
153 	nv50_grctx_fill(nv_device(gr), nv_gpuobj(chan));
154 	return 0;
155 }
156 
157 static struct nvkm_oclass
158 nv50_gr_cclass = {
159 	.handle = NV_ENGCTX(GR, 0x50),
160 	.ofuncs = &(struct nvkm_ofuncs) {
161 		.ctor = nv50_gr_context_ctor,
162 		.dtor = _nvkm_gr_context_dtor,
163 		.init = _nvkm_gr_context_init,
164 		.fini = _nvkm_gr_context_fini,
165 		.rd32 = _nvkm_gr_context_rd32,
166 		.wr32 = _nvkm_gr_context_wr32,
167 	},
168 };
169 
170 /*******************************************************************************
171  * PGRAPH engine/subdev functions
172  ******************************************************************************/
173 
174 static const struct nvkm_bitfield nv50_gr_status[] = {
175 	{ 0x00000001, "BUSY" }, /* set when any bit is set */
176 	{ 0x00000002, "DISPATCH" },
177 	{ 0x00000004, "UNK2" },
178 	{ 0x00000008, "UNK3" },
179 	{ 0x00000010, "UNK4" },
180 	{ 0x00000020, "UNK5" },
181 	{ 0x00000040, "M2MF" },
182 	{ 0x00000080, "UNK7" },
183 	{ 0x00000100, "CTXPROG" },
184 	{ 0x00000200, "VFETCH" },
185 	{ 0x00000400, "CCACHE_PREGEOM" },
186 	{ 0x00000800, "STRMOUT_VATTR_POSTGEOM" },
187 	{ 0x00001000, "VCLIP" },
188 	{ 0x00002000, "RATTR_APLANE" },
189 	{ 0x00004000, "TRAST" },
190 	{ 0x00008000, "CLIPID" },
191 	{ 0x00010000, "ZCULL" },
192 	{ 0x00020000, "ENG2D" },
193 	{ 0x00040000, "RMASK" },
194 	{ 0x00080000, "TPC_RAST" },
195 	{ 0x00100000, "TPC_PROP" },
196 	{ 0x00200000, "TPC_TEX" },
197 	{ 0x00400000, "TPC_GEOM" },
198 	{ 0x00800000, "TPC_MP" },
199 	{ 0x01000000, "ROP" },
200 	{}
201 };
202 
203 static const char *const nv50_gr_vstatus_0[] = {
204 	"VFETCH", "CCACHE", "PREGEOM", "POSTGEOM", "VATTR", "STRMOUT", "VCLIP",
205 	NULL
206 };
207 
208 static const char *const nv50_gr_vstatus_1[] = {
209 	"TPC_RAST", "TPC_PROP", "TPC_TEX", "TPC_GEOM", "TPC_MP", NULL
210 };
211 
212 static const char *const nv50_gr_vstatus_2[] = {
213 	"RATTR", "APLANE", "TRAST", "CLIPID", "ZCULL", "ENG2D", "RMASK",
214 	"ROP", NULL
215 };
216 
217 static void
218 nvkm_gr_vstatus_print(struct nv50_gr *gr, int r,
219 		       const char *const units[], u32 status)
220 {
221 	int i;
222 
223 	nv_error(gr, "PGRAPH_VSTATUS%d: 0x%08x", r, status);
224 
225 	for (i = 0; units[i] && status; i++) {
226 		if ((status & 7) == 1)
227 			pr_cont(" %s", units[i]);
228 		status >>= 3;
229 	}
230 	if (status)
231 		pr_cont(" (invalid: 0x%x)", status);
232 	pr_cont("\n");
233 }
234 
235 static int
236 g84_gr_tlb_flush(struct nvkm_engine *engine)
237 {
238 	struct nvkm_timer *tmr = nvkm_timer(engine);
239 	struct nv50_gr *gr = (void *)engine;
240 	bool idle, timeout = false;
241 	unsigned long flags;
242 	u64 start;
243 	u32 tmp;
244 
245 	spin_lock_irqsave(&gr->lock, flags);
246 	nv_mask(gr, 0x400500, 0x00000001, 0x00000000);
247 
248 	start = tmr->read(tmr);
249 	do {
250 		idle = true;
251 
252 		for (tmp = nv_rd32(gr, 0x400380); tmp && idle; tmp >>= 3) {
253 			if ((tmp & 7) == 1)
254 				idle = false;
255 		}
256 
257 		for (tmp = nv_rd32(gr, 0x400384); tmp && idle; tmp >>= 3) {
258 			if ((tmp & 7) == 1)
259 				idle = false;
260 		}
261 
262 		for (tmp = nv_rd32(gr, 0x400388); tmp && idle; tmp >>= 3) {
263 			if ((tmp & 7) == 1)
264 				idle = false;
265 		}
266 	} while (!idle &&
267 		 !(timeout = tmr->read(tmr) - start > 2000000000));
268 
269 	if (timeout) {
270 		nv_error(gr, "PGRAPH TLB flush idle timeout fail\n");
271 
272 		tmp = nv_rd32(gr, 0x400700);
273 		nv_error(gr, "PGRAPH_STATUS  : 0x%08x", tmp);
274 		nvkm_bitfield_print(nv50_gr_status, tmp);
275 		pr_cont("\n");
276 
277 		nvkm_gr_vstatus_print(gr, 0, nv50_gr_vstatus_0,
278 				       nv_rd32(gr, 0x400380));
279 		nvkm_gr_vstatus_print(gr, 1, nv50_gr_vstatus_1,
280 				       nv_rd32(gr, 0x400384));
281 		nvkm_gr_vstatus_print(gr, 2, nv50_gr_vstatus_2,
282 				       nv_rd32(gr, 0x400388));
283 	}
284 
285 
286 	nv_wr32(gr, 0x100c80, 0x00000001);
287 	if (!nv_wait(gr, 0x100c80, 0x00000001, 0x00000000))
288 		nv_error(gr, "vm flush timeout\n");
289 	nv_mask(gr, 0x400500, 0x00000001, 0x00000001);
290 	spin_unlock_irqrestore(&gr->lock, flags);
291 	return timeout ? -EBUSY : 0;
292 }
293 
294 static const struct nvkm_bitfield nv50_mp_exec_errors[] = {
295 	{ 0x01, "STACK_UNDERFLOW" },
296 	{ 0x02, "STACK_MISMATCH" },
297 	{ 0x04, "QUADON_ACTIVE" },
298 	{ 0x08, "TIMEOUT" },
299 	{ 0x10, "INVALID_OPCODE" },
300 	{ 0x20, "PM_OVERFLOW" },
301 	{ 0x40, "BREAKPOINT" },
302 	{}
303 };
304 
305 static const struct nvkm_bitfield nv50_mpc_traps[] = {
306 	{ 0x0000001, "LOCAL_LIMIT_READ" },
307 	{ 0x0000010, "LOCAL_LIMIT_WRITE" },
308 	{ 0x0000040, "STACK_LIMIT" },
309 	{ 0x0000100, "GLOBAL_LIMIT_READ" },
310 	{ 0x0001000, "GLOBAL_LIMIT_WRITE" },
311 	{ 0x0010000, "MP0" },
312 	{ 0x0020000, "MP1" },
313 	{ 0x0040000, "GLOBAL_LIMIT_RED" },
314 	{ 0x0400000, "GLOBAL_LIMIT_ATOM" },
315 	{ 0x4000000, "MP2" },
316 	{}
317 };
318 
319 static const struct nvkm_bitfield nv50_tex_traps[] = {
320 	{ 0x00000001, "" }, /* any bit set? */
321 	{ 0x00000002, "FAULT" },
322 	{ 0x00000004, "STORAGE_TYPE_MISMATCH" },
323 	{ 0x00000008, "LINEAR_MISMATCH" },
324 	{ 0x00000020, "WRONG_MEMTYPE" },
325 	{}
326 };
327 
328 static const struct nvkm_bitfield nv50_gr_trap_m2mf[] = {
329 	{ 0x00000001, "NOTIFY" },
330 	{ 0x00000002, "IN" },
331 	{ 0x00000004, "OUT" },
332 	{}
333 };
334 
335 static const struct nvkm_bitfield nv50_gr_trap_vfetch[] = {
336 	{ 0x00000001, "FAULT" },
337 	{}
338 };
339 
340 static const struct nvkm_bitfield nv50_gr_trap_strmout[] = {
341 	{ 0x00000001, "FAULT" },
342 	{}
343 };
344 
345 static const struct nvkm_bitfield nv50_gr_trap_ccache[] = {
346 	{ 0x00000001, "FAULT" },
347 	{}
348 };
349 
350 /* There must be a *lot* of these. Will take some time to gather them up. */
351 const struct nvkm_enum nv50_data_error_names[] = {
352 	{ 0x00000003, "INVALID_OPERATION", NULL },
353 	{ 0x00000004, "INVALID_VALUE", NULL },
354 	{ 0x00000005, "INVALID_ENUM", NULL },
355 	{ 0x00000008, "INVALID_OBJECT", NULL },
356 	{ 0x00000009, "READ_ONLY_OBJECT", NULL },
357 	{ 0x0000000a, "SUPERVISOR_OBJECT", NULL },
358 	{ 0x0000000b, "INVALID_ADDRESS_ALIGNMENT", NULL },
359 	{ 0x0000000c, "INVALID_BITFIELD", NULL },
360 	{ 0x0000000d, "BEGIN_END_ACTIVE", NULL },
361 	{ 0x0000000e, "SEMANTIC_COLOR_BACK_OVER_LIMIT", NULL },
362 	{ 0x0000000f, "VIEWPORT_ID_NEEDS_GP", NULL },
363 	{ 0x00000010, "RT_DOUBLE_BIND", NULL },
364 	{ 0x00000011, "RT_TYPES_MISMATCH", NULL },
365 	{ 0x00000012, "RT_LINEAR_WITH_ZETA", NULL },
366 	{ 0x00000015, "FP_TOO_FEW_REGS", NULL },
367 	{ 0x00000016, "ZETA_FORMAT_CSAA_MISMATCH", NULL },
368 	{ 0x00000017, "RT_LINEAR_WITH_MSAA", NULL },
369 	{ 0x00000018, "FP_INTERPOLANT_START_OVER_LIMIT", NULL },
370 	{ 0x00000019, "SEMANTIC_LAYER_OVER_LIMIT", NULL },
371 	{ 0x0000001a, "RT_INVALID_ALIGNMENT", NULL },
372 	{ 0x0000001b, "SAMPLER_OVER_LIMIT", NULL },
373 	{ 0x0000001c, "TEXTURE_OVER_LIMIT", NULL },
374 	{ 0x0000001e, "GP_TOO_MANY_OUTPUTS", NULL },
375 	{ 0x0000001f, "RT_BPP128_WITH_MS8", NULL },
376 	{ 0x00000021, "Z_OUT_OF_BOUNDS", NULL },
377 	{ 0x00000023, "XY_OUT_OF_BOUNDS", NULL },
378 	{ 0x00000024, "VP_ZERO_INPUTS", NULL },
379 	{ 0x00000027, "CP_MORE_PARAMS_THAN_SHARED", NULL },
380 	{ 0x00000028, "CP_NO_REG_SPACE_STRIPED", NULL },
381 	{ 0x00000029, "CP_NO_REG_SPACE_PACKED", NULL },
382 	{ 0x0000002a, "CP_NOT_ENOUGH_WARPS", NULL },
383 	{ 0x0000002b, "CP_BLOCK_SIZE_MISMATCH", NULL },
384 	{ 0x0000002c, "CP_NOT_ENOUGH_LOCAL_WARPS", NULL },
385 	{ 0x0000002d, "CP_NOT_ENOUGH_STACK_WARPS", NULL },
386 	{ 0x0000002e, "CP_NO_BLOCKDIM_LATCH", NULL },
387 	{ 0x00000031, "ENG2D_FORMAT_MISMATCH", NULL },
388 	{ 0x0000003f, "PRIMITIVE_ID_NEEDS_GP", NULL },
389 	{ 0x00000044, "SEMANTIC_VIEWPORT_OVER_LIMIT", NULL },
390 	{ 0x00000045, "SEMANTIC_COLOR_FRONT_OVER_LIMIT", NULL },
391 	{ 0x00000046, "LAYER_ID_NEEDS_GP", NULL },
392 	{ 0x00000047, "SEMANTIC_CLIP_OVER_LIMIT", NULL },
393 	{ 0x00000048, "SEMANTIC_PTSZ_OVER_LIMIT", NULL },
394 	{}
395 };
396 
397 static const struct nvkm_bitfield nv50_gr_intr_name[] = {
398 	{ 0x00000001, "NOTIFY" },
399 	{ 0x00000002, "COMPUTE_QUERY" },
400 	{ 0x00000010, "ILLEGAL_MTHD" },
401 	{ 0x00000020, "ILLEGAL_CLASS" },
402 	{ 0x00000040, "DOUBLE_NOTIFY" },
403 	{ 0x00001000, "CONTEXT_SWITCH" },
404 	{ 0x00010000, "BUFFER_NOTIFY" },
405 	{ 0x00100000, "DATA_ERROR" },
406 	{ 0x00200000, "TRAP" },
407 	{ 0x01000000, "SINGLE_STEP" },
408 	{}
409 };
410 
411 static const struct nvkm_bitfield nv50_gr_trap_prop[] = {
412 	{ 0x00000004, "SURF_WIDTH_OVERRUN" },
413 	{ 0x00000008, "SURF_HEIGHT_OVERRUN" },
414 	{ 0x00000010, "DST2D_FAULT" },
415 	{ 0x00000020, "ZETA_FAULT" },
416 	{ 0x00000040, "RT_FAULT" },
417 	{ 0x00000080, "CUDA_FAULT" },
418 	{ 0x00000100, "DST2D_STORAGE_TYPE_MISMATCH" },
419 	{ 0x00000200, "ZETA_STORAGE_TYPE_MISMATCH" },
420 	{ 0x00000400, "RT_STORAGE_TYPE_MISMATCH" },
421 	{ 0x00000800, "DST2D_LINEAR_MISMATCH" },
422 	{ 0x00001000, "RT_LINEAR_MISMATCH" },
423 	{}
424 };
425 
426 static void
427 nv50_gr_prop_trap(struct nv50_gr *gr,
428 		    u32 ustatus_addr, u32 ustatus, u32 tp)
429 {
430 	u32 e0c = nv_rd32(gr, ustatus_addr + 0x04);
431 	u32 e10 = nv_rd32(gr, ustatus_addr + 0x08);
432 	u32 e14 = nv_rd32(gr, ustatus_addr + 0x0c);
433 	u32 e18 = nv_rd32(gr, ustatus_addr + 0x10);
434 	u32 e1c = nv_rd32(gr, ustatus_addr + 0x14);
435 	u32 e20 = nv_rd32(gr, ustatus_addr + 0x18);
436 	u32 e24 = nv_rd32(gr, ustatus_addr + 0x1c);
437 
438 	/* CUDA memory: l[], g[] or stack. */
439 	if (ustatus & 0x00000080) {
440 		if (e18 & 0x80000000) {
441 			/* g[] read fault? */
442 			nv_error(gr, "TRAP_PROP - TP %d - CUDA_FAULT - Global read fault at address %02x%08x\n",
443 					 tp, e14, e10 | ((e18 >> 24) & 0x1f));
444 			e18 &= ~0x1f000000;
445 		} else if (e18 & 0xc) {
446 			/* g[] write fault? */
447 			nv_error(gr, "TRAP_PROP - TP %d - CUDA_FAULT - Global write fault at address %02x%08x\n",
448 				 tp, e14, e10 | ((e18 >> 7) & 0x1f));
449 			e18 &= ~0x00000f80;
450 		} else {
451 			nv_error(gr, "TRAP_PROP - TP %d - Unknown CUDA fault at address %02x%08x\n",
452 				 tp, e14, e10);
453 		}
454 		ustatus &= ~0x00000080;
455 	}
456 	if (ustatus) {
457 		nv_error(gr, "TRAP_PROP - TP %d -", tp);
458 		nvkm_bitfield_print(nv50_gr_trap_prop, ustatus);
459 		pr_cont(" - Address %02x%08x\n", e14, e10);
460 	}
461 	nv_error(gr, "TRAP_PROP - TP %d - e0c: %08x, e18: %08x, e1c: %08x, e20: %08x, e24: %08x\n",
462 		 tp, e0c, e18, e1c, e20, e24);
463 }
464 
465 static void
466 nv50_gr_mp_trap(struct nv50_gr *gr, int tpid, int display)
467 {
468 	u32 units = nv_rd32(gr, 0x1540);
469 	u32 addr, mp10, status, pc, oplow, ophigh;
470 	int i;
471 	int mps = 0;
472 	for (i = 0; i < 4; i++) {
473 		if (!(units & 1 << (i+24)))
474 			continue;
475 		if (nv_device(gr)->chipset < 0xa0)
476 			addr = 0x408200 + (tpid << 12) + (i << 7);
477 		else
478 			addr = 0x408100 + (tpid << 11) + (i << 7);
479 		mp10 = nv_rd32(gr, addr + 0x10);
480 		status = nv_rd32(gr, addr + 0x14);
481 		if (!status)
482 			continue;
483 		if (display) {
484 			nv_rd32(gr, addr + 0x20);
485 			pc = nv_rd32(gr, addr + 0x24);
486 			oplow = nv_rd32(gr, addr + 0x70);
487 			ophigh = nv_rd32(gr, addr + 0x74);
488 			nv_error(gr, "TRAP_MP_EXEC - "
489 					"TP %d MP %d:", tpid, i);
490 			nvkm_bitfield_print(nv50_mp_exec_errors, status);
491 			pr_cont(" at %06x warp %d, opcode %08x %08x\n",
492 					pc&0xffffff, pc >> 24,
493 					oplow, ophigh);
494 		}
495 		nv_wr32(gr, addr + 0x10, mp10);
496 		nv_wr32(gr, addr + 0x14, 0);
497 		mps++;
498 	}
499 	if (!mps && display)
500 		nv_error(gr, "TRAP_MP_EXEC - TP %d: "
501 				"No MPs claiming errors?\n", tpid);
502 }
503 
504 static void
505 nv50_gr_tp_trap(struct nv50_gr *gr, int type, u32 ustatus_old,
506 		  u32 ustatus_new, int display, const char *name)
507 {
508 	int tps = 0;
509 	u32 units = nv_rd32(gr, 0x1540);
510 	int i, r;
511 	u32 ustatus_addr, ustatus;
512 	for (i = 0; i < 16; i++) {
513 		if (!(units & (1 << i)))
514 			continue;
515 		if (nv_device(gr)->chipset < 0xa0)
516 			ustatus_addr = ustatus_old + (i << 12);
517 		else
518 			ustatus_addr = ustatus_new + (i << 11);
519 		ustatus = nv_rd32(gr, ustatus_addr) & 0x7fffffff;
520 		if (!ustatus)
521 			continue;
522 		tps++;
523 		switch (type) {
524 		case 6: /* texture error... unknown for now */
525 			if (display) {
526 				nv_error(gr, "magic set %d:\n", i);
527 				for (r = ustatus_addr + 4; r <= ustatus_addr + 0x10; r += 4)
528 					nv_error(gr, "\t0x%08x: 0x%08x\n", r,
529 						nv_rd32(gr, r));
530 				if (ustatus) {
531 					nv_error(gr, "%s - TP%d:", name, i);
532 					nvkm_bitfield_print(nv50_tex_traps,
533 							       ustatus);
534 					pr_cont("\n");
535 					ustatus = 0;
536 				}
537 			}
538 			break;
539 		case 7: /* MP error */
540 			if (ustatus & 0x04030000) {
541 				nv50_gr_mp_trap(gr, i, display);
542 				ustatus &= ~0x04030000;
543 			}
544 			if (ustatus && display) {
545 				nv_error(gr, "%s - TP%d:", name, i);
546 				nvkm_bitfield_print(nv50_mpc_traps, ustatus);
547 				pr_cont("\n");
548 				ustatus = 0;
549 			}
550 			break;
551 		case 8: /* PROP error */
552 			if (display)
553 				nv50_gr_prop_trap(
554 						gr, ustatus_addr, ustatus, i);
555 			ustatus = 0;
556 			break;
557 		}
558 		if (ustatus) {
559 			if (display)
560 				nv_error(gr, "%s - TP%d: Unhandled ustatus 0x%08x\n", name, i, ustatus);
561 		}
562 		nv_wr32(gr, ustatus_addr, 0xc0000000);
563 	}
564 
565 	if (!tps && display)
566 		nv_warn(gr, "%s - No TPs claiming errors?\n", name);
567 }
568 
569 static int
570 nv50_gr_trap_handler(struct nv50_gr *gr, u32 display,
571 		     int chid, u64 inst, struct nvkm_object *engctx)
572 {
573 	u32 status = nv_rd32(gr, 0x400108);
574 	u32 ustatus;
575 
576 	if (!status && display) {
577 		nv_error(gr, "TRAP: no units reporting traps?\n");
578 		return 1;
579 	}
580 
581 	/* DISPATCH: Relays commands to other units and handles NOTIFY,
582 	 * COND, QUERY. If you get a trap from it, the command is still stuck
583 	 * in DISPATCH and you need to do something about it. */
584 	if (status & 0x001) {
585 		ustatus = nv_rd32(gr, 0x400804) & 0x7fffffff;
586 		if (!ustatus && display) {
587 			nv_error(gr, "TRAP_DISPATCH - no ustatus?\n");
588 		}
589 
590 		nv_wr32(gr, 0x400500, 0x00000000);
591 
592 		/* Known to be triggered by screwed up NOTIFY and COND... */
593 		if (ustatus & 0x00000001) {
594 			u32 addr = nv_rd32(gr, 0x400808);
595 			u32 subc = (addr & 0x00070000) >> 16;
596 			u32 mthd = (addr & 0x00001ffc);
597 			u32 datal = nv_rd32(gr, 0x40080c);
598 			u32 datah = nv_rd32(gr, 0x400810);
599 			u32 class = nv_rd32(gr, 0x400814);
600 			u32 r848 = nv_rd32(gr, 0x400848);
601 
602 			nv_error(gr, "TRAP DISPATCH_FAULT\n");
603 			if (display && (addr & 0x80000000)) {
604 				nv_error(gr,
605 					 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x%08x 400808 0x%08x 400848 0x%08x\n",
606 					 chid, inst,
607 					 nvkm_client_name(engctx), subc,
608 					 class, mthd, datah, datal, addr, r848);
609 			} else
610 			if (display) {
611 				nv_error(gr, "no stuck command?\n");
612 			}
613 
614 			nv_wr32(gr, 0x400808, 0);
615 			nv_wr32(gr, 0x4008e8, nv_rd32(gr, 0x4008e8) & 3);
616 			nv_wr32(gr, 0x400848, 0);
617 			ustatus &= ~0x00000001;
618 		}
619 
620 		if (ustatus & 0x00000002) {
621 			u32 addr = nv_rd32(gr, 0x40084c);
622 			u32 subc = (addr & 0x00070000) >> 16;
623 			u32 mthd = (addr & 0x00001ffc);
624 			u32 data = nv_rd32(gr, 0x40085c);
625 			u32 class = nv_rd32(gr, 0x400814);
626 
627 			nv_error(gr, "TRAP DISPATCH_QUERY\n");
628 			if (display && (addr & 0x80000000)) {
629 				nv_error(gr,
630 					 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x 40084c 0x%08x\n",
631 					 chid, inst,
632 					 nvkm_client_name(engctx), subc,
633 					 class, mthd, data, addr);
634 			} else
635 			if (display) {
636 				nv_error(gr, "no stuck command?\n");
637 			}
638 
639 			nv_wr32(gr, 0x40084c, 0);
640 			ustatus &= ~0x00000002;
641 		}
642 
643 		if (ustatus && display) {
644 			nv_error(gr, "TRAP_DISPATCH (unknown "
645 				      "0x%08x)\n", ustatus);
646 		}
647 
648 		nv_wr32(gr, 0x400804, 0xc0000000);
649 		nv_wr32(gr, 0x400108, 0x001);
650 		status &= ~0x001;
651 		if (!status)
652 			return 0;
653 	}
654 
655 	/* M2MF: Memory to memory copy engine. */
656 	if (status & 0x002) {
657 		u32 ustatus = nv_rd32(gr, 0x406800) & 0x7fffffff;
658 		if (display) {
659 			nv_error(gr, "TRAP_M2MF");
660 			nvkm_bitfield_print(nv50_gr_trap_m2mf, ustatus);
661 			pr_cont("\n");
662 			nv_error(gr, "TRAP_M2MF %08x %08x %08x %08x\n",
663 				nv_rd32(gr, 0x406804), nv_rd32(gr, 0x406808),
664 				nv_rd32(gr, 0x40680c), nv_rd32(gr, 0x406810));
665 
666 		}
667 
668 		/* No sane way found yet -- just reset the bugger. */
669 		nv_wr32(gr, 0x400040, 2);
670 		nv_wr32(gr, 0x400040, 0);
671 		nv_wr32(gr, 0x406800, 0xc0000000);
672 		nv_wr32(gr, 0x400108, 0x002);
673 		status &= ~0x002;
674 	}
675 
676 	/* VFETCH: Fetches data from vertex buffers. */
677 	if (status & 0x004) {
678 		u32 ustatus = nv_rd32(gr, 0x400c04) & 0x7fffffff;
679 		if (display) {
680 			nv_error(gr, "TRAP_VFETCH");
681 			nvkm_bitfield_print(nv50_gr_trap_vfetch, ustatus);
682 			pr_cont("\n");
683 			nv_error(gr, "TRAP_VFETCH %08x %08x %08x %08x\n",
684 				nv_rd32(gr, 0x400c00), nv_rd32(gr, 0x400c08),
685 				nv_rd32(gr, 0x400c0c), nv_rd32(gr, 0x400c10));
686 		}
687 
688 		nv_wr32(gr, 0x400c04, 0xc0000000);
689 		nv_wr32(gr, 0x400108, 0x004);
690 		status &= ~0x004;
691 	}
692 
693 	/* STRMOUT: DirectX streamout / OpenGL transform feedback. */
694 	if (status & 0x008) {
695 		ustatus = nv_rd32(gr, 0x401800) & 0x7fffffff;
696 		if (display) {
697 			nv_error(gr, "TRAP_STRMOUT");
698 			nvkm_bitfield_print(nv50_gr_trap_strmout, ustatus);
699 			pr_cont("\n");
700 			nv_error(gr, "TRAP_STRMOUT %08x %08x %08x %08x\n",
701 				nv_rd32(gr, 0x401804), nv_rd32(gr, 0x401808),
702 				nv_rd32(gr, 0x40180c), nv_rd32(gr, 0x401810));
703 
704 		}
705 
706 		/* No sane way found yet -- just reset the bugger. */
707 		nv_wr32(gr, 0x400040, 0x80);
708 		nv_wr32(gr, 0x400040, 0);
709 		nv_wr32(gr, 0x401800, 0xc0000000);
710 		nv_wr32(gr, 0x400108, 0x008);
711 		status &= ~0x008;
712 	}
713 
714 	/* CCACHE: Handles code and c[] caches and fills them. */
715 	if (status & 0x010) {
716 		ustatus = nv_rd32(gr, 0x405018) & 0x7fffffff;
717 		if (display) {
718 			nv_error(gr, "TRAP_CCACHE");
719 			nvkm_bitfield_print(nv50_gr_trap_ccache, ustatus);
720 			pr_cont("\n");
721 			nv_error(gr, "TRAP_CCACHE %08x %08x %08x %08x"
722 				     " %08x %08x %08x\n",
723 				nv_rd32(gr, 0x405000), nv_rd32(gr, 0x405004),
724 				nv_rd32(gr, 0x405008), nv_rd32(gr, 0x40500c),
725 				nv_rd32(gr, 0x405010), nv_rd32(gr, 0x405014),
726 				nv_rd32(gr, 0x40501c));
727 
728 		}
729 
730 		nv_wr32(gr, 0x405018, 0xc0000000);
731 		nv_wr32(gr, 0x400108, 0x010);
732 		status &= ~0x010;
733 	}
734 
735 	/* Unknown, not seen yet... 0x402000 is the only trap status reg
736 	 * remaining, so try to handle it anyway. Perhaps related to that
737 	 * unknown DMA slot on tesla? */
738 	if (status & 0x20) {
739 		ustatus = nv_rd32(gr, 0x402000) & 0x7fffffff;
740 		if (display)
741 			nv_error(gr, "TRAP_UNKC04 0x%08x\n", ustatus);
742 		nv_wr32(gr, 0x402000, 0xc0000000);
743 		/* no status modifiction on purpose */
744 	}
745 
746 	/* TEXTURE: CUDA texturing units */
747 	if (status & 0x040) {
748 		nv50_gr_tp_trap(gr, 6, 0x408900, 0x408600, display,
749 				    "TRAP_TEXTURE");
750 		nv_wr32(gr, 0x400108, 0x040);
751 		status &= ~0x040;
752 	}
753 
754 	/* MP: CUDA execution engines. */
755 	if (status & 0x080) {
756 		nv50_gr_tp_trap(gr, 7, 0x408314, 0x40831c, display,
757 				    "TRAP_MP");
758 		nv_wr32(gr, 0x400108, 0x080);
759 		status &= ~0x080;
760 	}
761 
762 	/* PROP:  Handles TP-initiated uncached memory accesses:
763 	 * l[], g[], stack, 2d surfaces, render targets. */
764 	if (status & 0x100) {
765 		nv50_gr_tp_trap(gr, 8, 0x408e08, 0x408708, display,
766 				    "TRAP_PROP");
767 		nv_wr32(gr, 0x400108, 0x100);
768 		status &= ~0x100;
769 	}
770 
771 	if (status) {
772 		if (display)
773 			nv_error(gr, "TRAP: unknown 0x%08x\n", status);
774 		nv_wr32(gr, 0x400108, status);
775 	}
776 
777 	return 1;
778 }
779 
780 static void
781 nv50_gr_intr(struct nvkm_subdev *subdev)
782 {
783 	struct nvkm_fifo *fifo = nvkm_fifo(subdev);
784 	struct nvkm_engine *engine = nv_engine(subdev);
785 	struct nvkm_object *engctx;
786 	struct nvkm_handle *handle = NULL;
787 	struct nv50_gr *gr = (void *)subdev;
788 	u32 stat = nv_rd32(gr, 0x400100);
789 	u32 inst = nv_rd32(gr, 0x40032c) & 0x0fffffff;
790 	u32 addr = nv_rd32(gr, 0x400704);
791 	u32 subc = (addr & 0x00070000) >> 16;
792 	u32 mthd = (addr & 0x00001ffc);
793 	u32 data = nv_rd32(gr, 0x400708);
794 	u32 class = nv_rd32(gr, 0x400814);
795 	u32 show = stat, show_bitfield = stat;
796 	int chid;
797 
798 	engctx = nvkm_engctx_get(engine, inst);
799 	chid   = fifo->chid(fifo, engctx);
800 
801 	if (stat & 0x00000010) {
802 		handle = nvkm_handle_get_class(engctx, class);
803 		if (handle && !nv_call(handle->object, mthd, data))
804 			show &= ~0x00000010;
805 		nvkm_handle_put(handle);
806 	}
807 
808 	if (show & 0x00100000) {
809 		u32 ecode = nv_rd32(gr, 0x400110);
810 		nv_error(gr, "DATA_ERROR ");
811 		nvkm_enum_print(nv50_data_error_names, ecode);
812 		pr_cont("\n");
813 		show_bitfield &= ~0x00100000;
814 	}
815 
816 	if (stat & 0x00200000) {
817 		if (!nv50_gr_trap_handler(gr, show, chid, (u64)inst << 12,
818 					  engctx))
819 			show &= ~0x00200000;
820 		show_bitfield &= ~0x00200000;
821 	}
822 
823 	nv_wr32(gr, 0x400100, stat);
824 	nv_wr32(gr, 0x400500, 0x00010001);
825 
826 	if (show) {
827 		show &= show_bitfield;
828 		if (show) {
829 			nv_error(gr, "%s", "");
830 			nvkm_bitfield_print(nv50_gr_intr_name, show);
831 			pr_cont("\n");
832 		}
833 		nv_error(gr,
834 			 "ch %d [0x%010llx %s] subc %d class 0x%04x mthd 0x%04x data 0x%08x\n",
835 			 chid, (u64)inst << 12, nvkm_client_name(engctx),
836 			 subc, class, mthd, data);
837 	}
838 
839 	if (nv_rd32(gr, 0x400824) & (1 << 31))
840 		nv_wr32(gr, 0x400824, nv_rd32(gr, 0x400824) & ~(1 << 31));
841 
842 	nvkm_engctx_put(engctx);
843 }
844 
845 static int
846 nv50_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
847 	     struct nvkm_oclass *oclass, void *data, u32 size,
848 	     struct nvkm_object **pobject)
849 {
850 	struct nv50_gr *gr;
851 	int ret;
852 
853 	ret = nvkm_gr_create(parent, engine, oclass, true, &gr);
854 	*pobject = nv_object(gr);
855 	if (ret)
856 		return ret;
857 
858 	nv_subdev(gr)->unit = 0x00201000;
859 	nv_subdev(gr)->intr = nv50_gr_intr;
860 	nv_engine(gr)->cclass = &nv50_gr_cclass;
861 
862 	gr->base.units = nv50_gr_units;
863 
864 	switch (nv_device(gr)->chipset) {
865 	case 0x50:
866 		nv_engine(gr)->sclass = nv50_gr_sclass;
867 		break;
868 	case 0x84:
869 	case 0x86:
870 	case 0x92:
871 	case 0x94:
872 	case 0x96:
873 	case 0x98:
874 		nv_engine(gr)->sclass = g84_gr_sclass;
875 		break;
876 	case 0xa0:
877 	case 0xaa:
878 	case 0xac:
879 		nv_engine(gr)->sclass = gt200_gr_sclass;
880 		break;
881 	case 0xa3:
882 	case 0xa5:
883 	case 0xa8:
884 		nv_engine(gr)->sclass = gt215_gr_sclass;
885 		break;
886 	case 0xaf:
887 		nv_engine(gr)->sclass = mcp89_gr_sclass;
888 		break;
889 
890 	}
891 
892 	/* unfortunate hw bug workaround... */
893 	if (nv_device(gr)->chipset != 0x50 &&
894 	    nv_device(gr)->chipset != 0xac)
895 		nv_engine(gr)->tlb_flush = g84_gr_tlb_flush;
896 
897 	spin_lock_init(&gr->lock);
898 	return 0;
899 }
900 
901 static int
902 nv50_gr_init(struct nvkm_object *object)
903 {
904 	struct nv50_gr *gr = (void *)object;
905 	int ret, units, i;
906 
907 	ret = nvkm_gr_init(&gr->base);
908 	if (ret)
909 		return ret;
910 
911 	/* NV_PGRAPH_DEBUG_3_HW_CTX_SWITCH_ENABLED */
912 	nv_wr32(gr, 0x40008c, 0x00000004);
913 
914 	/* reset/enable traps and interrupts */
915 	nv_wr32(gr, 0x400804, 0xc0000000);
916 	nv_wr32(gr, 0x406800, 0xc0000000);
917 	nv_wr32(gr, 0x400c04, 0xc0000000);
918 	nv_wr32(gr, 0x401800, 0xc0000000);
919 	nv_wr32(gr, 0x405018, 0xc0000000);
920 	nv_wr32(gr, 0x402000, 0xc0000000);
921 
922 	units = nv_rd32(gr, 0x001540);
923 	for (i = 0; i < 16; i++) {
924 		if (!(units & (1 << i)))
925 			continue;
926 
927 		if (nv_device(gr)->chipset < 0xa0) {
928 			nv_wr32(gr, 0x408900 + (i << 12), 0xc0000000);
929 			nv_wr32(gr, 0x408e08 + (i << 12), 0xc0000000);
930 			nv_wr32(gr, 0x408314 + (i << 12), 0xc0000000);
931 		} else {
932 			nv_wr32(gr, 0x408600 + (i << 11), 0xc0000000);
933 			nv_wr32(gr, 0x408708 + (i << 11), 0xc0000000);
934 			nv_wr32(gr, 0x40831c + (i << 11), 0xc0000000);
935 		}
936 	}
937 
938 	nv_wr32(gr, 0x400108, 0xffffffff);
939 	nv_wr32(gr, 0x400138, 0xffffffff);
940 	nv_wr32(gr, 0x400100, 0xffffffff);
941 	nv_wr32(gr, 0x40013c, 0xffffffff);
942 	nv_wr32(gr, 0x400500, 0x00010001);
943 
944 	/* upload context program, initialise ctxctl defaults */
945 	ret = nv50_grctx_init(nv_device(gr), &gr->size);
946 	if (ret)
947 		return ret;
948 
949 	nv_wr32(gr, 0x400824, 0x00000000);
950 	nv_wr32(gr, 0x400828, 0x00000000);
951 	nv_wr32(gr, 0x40082c, 0x00000000);
952 	nv_wr32(gr, 0x400830, 0x00000000);
953 	nv_wr32(gr, 0x40032c, 0x00000000);
954 	nv_wr32(gr, 0x400330, 0x00000000);
955 
956 	/* some unknown zcull magic */
957 	switch (nv_device(gr)->chipset & 0xf0) {
958 	case 0x50:
959 	case 0x80:
960 	case 0x90:
961 		nv_wr32(gr, 0x402ca8, 0x00000800);
962 		break;
963 	case 0xa0:
964 	default:
965 		if (nv_device(gr)->chipset == 0xa0 ||
966 		    nv_device(gr)->chipset == 0xaa ||
967 		    nv_device(gr)->chipset == 0xac) {
968 			nv_wr32(gr, 0x402ca8, 0x00000802);
969 		} else {
970 			nv_wr32(gr, 0x402cc0, 0x00000000);
971 			nv_wr32(gr, 0x402ca8, 0x00000002);
972 		}
973 
974 		break;
975 	}
976 
977 	/* zero out zcull regions */
978 	for (i = 0; i < 8; i++) {
979 		nv_wr32(gr, 0x402c20 + (i * 0x10), 0x00000000);
980 		nv_wr32(gr, 0x402c24 + (i * 0x10), 0x00000000);
981 		nv_wr32(gr, 0x402c28 + (i * 0x10), 0x00000000);
982 		nv_wr32(gr, 0x402c2c + (i * 0x10), 0x00000000);
983 	}
984 	return 0;
985 }
986 
987 struct nvkm_oclass
988 nv50_gr_oclass = {
989 	.handle = NV_ENGINE(GR, 0x50),
990 	.ofuncs = &(struct nvkm_ofuncs) {
991 		.ctor = nv50_gr_ctor,
992 		.dtor = _nvkm_gr_dtor,
993 		.init = nv50_gr_init,
994 		.fini = _nvkm_gr_fini,
995 	},
996 };
997