xref: /linux/drivers/gpu/drm/radeon/cik.c (revision 4d7696f1b05f4aeb586c74868fe3da2731daca4b)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
45 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
46 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
47 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
48 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
49 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
50 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
51 MODULE_FIRMWARE("radeon/KABINI_me.bin");
52 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
53 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
54 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
55 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
56 
57 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
58 extern void r600_ih_ring_fini(struct radeon_device *rdev);
59 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
60 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
61 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
62 extern void sumo_rlc_fini(struct radeon_device *rdev);
63 extern int sumo_rlc_init(struct radeon_device *rdev);
64 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
65 extern void si_rlc_reset(struct radeon_device *rdev);
66 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
67 extern int cik_sdma_resume(struct radeon_device *rdev);
68 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
69 extern void cik_sdma_fini(struct radeon_device *rdev);
70 extern void cik_sdma_vm_set_page(struct radeon_device *rdev,
71 				 struct radeon_ib *ib,
72 				 uint64_t pe,
73 				 uint64_t addr, unsigned count,
74 				 uint32_t incr, uint32_t flags);
75 static void cik_rlc_stop(struct radeon_device *rdev);
76 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
77 static void cik_program_aspm(struct radeon_device *rdev);
78 static void cik_init_pg(struct radeon_device *rdev);
79 static void cik_init_cg(struct radeon_device *rdev);
80 
81 /* get temperature in millidegrees */
82 int ci_get_temp(struct radeon_device *rdev)
83 {
84 	u32 temp;
85 	int actual_temp = 0;
86 
87 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
88 		CTF_TEMP_SHIFT;
89 
90 	if (temp & 0x200)
91 		actual_temp = 255;
92 	else
93 		actual_temp = temp & 0x1ff;
94 
95 	actual_temp = actual_temp * 1000;
96 
97 	return actual_temp;
98 }
99 
100 /* get temperature in millidegrees */
101 int kv_get_temp(struct radeon_device *rdev)
102 {
103 	u32 temp;
104 	int actual_temp = 0;
105 
106 	temp = RREG32_SMC(0xC0300E0C);
107 
108 	if (temp)
109 		actual_temp = (temp / 8) - 49;
110 	else
111 		actual_temp = 0;
112 
113 	actual_temp = actual_temp * 1000;
114 
115 	return actual_temp;
116 }
117 
118 /*
119  * Indirect registers accessor
120  */
121 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
122 {
123 	u32 r;
124 
125 	WREG32(PCIE_INDEX, reg);
126 	(void)RREG32(PCIE_INDEX);
127 	r = RREG32(PCIE_DATA);
128 	return r;
129 }
130 
131 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
132 {
133 	WREG32(PCIE_INDEX, reg);
134 	(void)RREG32(PCIE_INDEX);
135 	WREG32(PCIE_DATA, v);
136 	(void)RREG32(PCIE_DATA);
137 }
138 
139 static const u32 spectre_rlc_save_restore_register_list[] =
140 {
141 	(0x0e00 << 16) | (0xc12c >> 2),
142 	0x00000000,
143 	(0x0e00 << 16) | (0xc140 >> 2),
144 	0x00000000,
145 	(0x0e00 << 16) | (0xc150 >> 2),
146 	0x00000000,
147 	(0x0e00 << 16) | (0xc15c >> 2),
148 	0x00000000,
149 	(0x0e00 << 16) | (0xc168 >> 2),
150 	0x00000000,
151 	(0x0e00 << 16) | (0xc170 >> 2),
152 	0x00000000,
153 	(0x0e00 << 16) | (0xc178 >> 2),
154 	0x00000000,
155 	(0x0e00 << 16) | (0xc204 >> 2),
156 	0x00000000,
157 	(0x0e00 << 16) | (0xc2b4 >> 2),
158 	0x00000000,
159 	(0x0e00 << 16) | (0xc2b8 >> 2),
160 	0x00000000,
161 	(0x0e00 << 16) | (0xc2bc >> 2),
162 	0x00000000,
163 	(0x0e00 << 16) | (0xc2c0 >> 2),
164 	0x00000000,
165 	(0x0e00 << 16) | (0x8228 >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0x829c >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0x869c >> 2),
170 	0x00000000,
171 	(0x0600 << 16) | (0x98f4 >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0x98f8 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0x9900 >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc260 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0x90e8 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0x3c000 >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0x3c00c >> 2),
184 	0x00000000,
185 	(0x0e00 << 16) | (0x8c1c >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0x9700 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0xcd20 >> 2),
190 	0x00000000,
191 	(0x4e00 << 16) | (0xcd20 >> 2),
192 	0x00000000,
193 	(0x5e00 << 16) | (0xcd20 >> 2),
194 	0x00000000,
195 	(0x6e00 << 16) | (0xcd20 >> 2),
196 	0x00000000,
197 	(0x7e00 << 16) | (0xcd20 >> 2),
198 	0x00000000,
199 	(0x8e00 << 16) | (0xcd20 >> 2),
200 	0x00000000,
201 	(0x9e00 << 16) | (0xcd20 >> 2),
202 	0x00000000,
203 	(0xae00 << 16) | (0xcd20 >> 2),
204 	0x00000000,
205 	(0xbe00 << 16) | (0xcd20 >> 2),
206 	0x00000000,
207 	(0x0e00 << 16) | (0x89bc >> 2),
208 	0x00000000,
209 	(0x0e00 << 16) | (0x8900 >> 2),
210 	0x00000000,
211 	0x3,
212 	(0x0e00 << 16) | (0xc130 >> 2),
213 	0x00000000,
214 	(0x0e00 << 16) | (0xc134 >> 2),
215 	0x00000000,
216 	(0x0e00 << 16) | (0xc1fc >> 2),
217 	0x00000000,
218 	(0x0e00 << 16) | (0xc208 >> 2),
219 	0x00000000,
220 	(0x0e00 << 16) | (0xc264 >> 2),
221 	0x00000000,
222 	(0x0e00 << 16) | (0xc268 >> 2),
223 	0x00000000,
224 	(0x0e00 << 16) | (0xc26c >> 2),
225 	0x00000000,
226 	(0x0e00 << 16) | (0xc270 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc274 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc278 >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc27c >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0xc280 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0xc284 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc288 >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc28c >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc290 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc294 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc298 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc29c >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc2a0 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc2a4 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc2a8 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc2ac  >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc2b0 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0x301d0 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0x30238 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0x30250 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0x30254 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0x30258 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0x3025c >> 2),
271 	0x00000000,
272 	(0x4e00 << 16) | (0xc900 >> 2),
273 	0x00000000,
274 	(0x5e00 << 16) | (0xc900 >> 2),
275 	0x00000000,
276 	(0x6e00 << 16) | (0xc900 >> 2),
277 	0x00000000,
278 	(0x7e00 << 16) | (0xc900 >> 2),
279 	0x00000000,
280 	(0x8e00 << 16) | (0xc900 >> 2),
281 	0x00000000,
282 	(0x9e00 << 16) | (0xc900 >> 2),
283 	0x00000000,
284 	(0xae00 << 16) | (0xc900 >> 2),
285 	0x00000000,
286 	(0xbe00 << 16) | (0xc900 >> 2),
287 	0x00000000,
288 	(0x4e00 << 16) | (0xc904 >> 2),
289 	0x00000000,
290 	(0x5e00 << 16) | (0xc904 >> 2),
291 	0x00000000,
292 	(0x6e00 << 16) | (0xc904 >> 2),
293 	0x00000000,
294 	(0x7e00 << 16) | (0xc904 >> 2),
295 	0x00000000,
296 	(0x8e00 << 16) | (0xc904 >> 2),
297 	0x00000000,
298 	(0x9e00 << 16) | (0xc904 >> 2),
299 	0x00000000,
300 	(0xae00 << 16) | (0xc904 >> 2),
301 	0x00000000,
302 	(0xbe00 << 16) | (0xc904 >> 2),
303 	0x00000000,
304 	(0x4e00 << 16) | (0xc908 >> 2),
305 	0x00000000,
306 	(0x5e00 << 16) | (0xc908 >> 2),
307 	0x00000000,
308 	(0x6e00 << 16) | (0xc908 >> 2),
309 	0x00000000,
310 	(0x7e00 << 16) | (0xc908 >> 2),
311 	0x00000000,
312 	(0x8e00 << 16) | (0xc908 >> 2),
313 	0x00000000,
314 	(0x9e00 << 16) | (0xc908 >> 2),
315 	0x00000000,
316 	(0xae00 << 16) | (0xc908 >> 2),
317 	0x00000000,
318 	(0xbe00 << 16) | (0xc908 >> 2),
319 	0x00000000,
320 	(0x4e00 << 16) | (0xc90c >> 2),
321 	0x00000000,
322 	(0x5e00 << 16) | (0xc90c >> 2),
323 	0x00000000,
324 	(0x6e00 << 16) | (0xc90c >> 2),
325 	0x00000000,
326 	(0x7e00 << 16) | (0xc90c >> 2),
327 	0x00000000,
328 	(0x8e00 << 16) | (0xc90c >> 2),
329 	0x00000000,
330 	(0x9e00 << 16) | (0xc90c >> 2),
331 	0x00000000,
332 	(0xae00 << 16) | (0xc90c >> 2),
333 	0x00000000,
334 	(0xbe00 << 16) | (0xc90c >> 2),
335 	0x00000000,
336 	(0x4e00 << 16) | (0xc910 >> 2),
337 	0x00000000,
338 	(0x5e00 << 16) | (0xc910 >> 2),
339 	0x00000000,
340 	(0x6e00 << 16) | (0xc910 >> 2),
341 	0x00000000,
342 	(0x7e00 << 16) | (0xc910 >> 2),
343 	0x00000000,
344 	(0x8e00 << 16) | (0xc910 >> 2),
345 	0x00000000,
346 	(0x9e00 << 16) | (0xc910 >> 2),
347 	0x00000000,
348 	(0xae00 << 16) | (0xc910 >> 2),
349 	0x00000000,
350 	(0xbe00 << 16) | (0xc910 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc99c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0x9834 >> 2),
355 	0x00000000,
356 	(0x0000 << 16) | (0x30f00 >> 2),
357 	0x00000000,
358 	(0x0001 << 16) | (0x30f00 >> 2),
359 	0x00000000,
360 	(0x0000 << 16) | (0x30f04 >> 2),
361 	0x00000000,
362 	(0x0001 << 16) | (0x30f04 >> 2),
363 	0x00000000,
364 	(0x0000 << 16) | (0x30f08 >> 2),
365 	0x00000000,
366 	(0x0001 << 16) | (0x30f08 >> 2),
367 	0x00000000,
368 	(0x0000 << 16) | (0x30f0c >> 2),
369 	0x00000000,
370 	(0x0001 << 16) | (0x30f0c >> 2),
371 	0x00000000,
372 	(0x0600 << 16) | (0x9b7c >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0x8a14 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0x8a18 >> 2),
377 	0x00000000,
378 	(0x0600 << 16) | (0x30a00 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0x8bf0 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0x8bcc >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0x8b24 >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0x30a04 >> 2),
387 	0x00000000,
388 	(0x0600 << 16) | (0x30a10 >> 2),
389 	0x00000000,
390 	(0x0600 << 16) | (0x30a14 >> 2),
391 	0x00000000,
392 	(0x0600 << 16) | (0x30a18 >> 2),
393 	0x00000000,
394 	(0x0600 << 16) | (0x30a2c >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0xc700 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0xc704 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0xc708 >> 2),
401 	0x00000000,
402 	(0x0e00 << 16) | (0xc768 >> 2),
403 	0x00000000,
404 	(0x0400 << 16) | (0xc770 >> 2),
405 	0x00000000,
406 	(0x0400 << 16) | (0xc774 >> 2),
407 	0x00000000,
408 	(0x0400 << 16) | (0xc778 >> 2),
409 	0x00000000,
410 	(0x0400 << 16) | (0xc77c >> 2),
411 	0x00000000,
412 	(0x0400 << 16) | (0xc780 >> 2),
413 	0x00000000,
414 	(0x0400 << 16) | (0xc784 >> 2),
415 	0x00000000,
416 	(0x0400 << 16) | (0xc788 >> 2),
417 	0x00000000,
418 	(0x0400 << 16) | (0xc78c >> 2),
419 	0x00000000,
420 	(0x0400 << 16) | (0xc798 >> 2),
421 	0x00000000,
422 	(0x0400 << 16) | (0xc79c >> 2),
423 	0x00000000,
424 	(0x0400 << 16) | (0xc7a0 >> 2),
425 	0x00000000,
426 	(0x0400 << 16) | (0xc7a4 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc7a8 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc7ac >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc7b0 >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc7b4 >> 2),
435 	0x00000000,
436 	(0x0e00 << 16) | (0x9100 >> 2),
437 	0x00000000,
438 	(0x0e00 << 16) | (0x3c010 >> 2),
439 	0x00000000,
440 	(0x0e00 << 16) | (0x92a8 >> 2),
441 	0x00000000,
442 	(0x0e00 << 16) | (0x92ac >> 2),
443 	0x00000000,
444 	(0x0e00 << 16) | (0x92b4 >> 2),
445 	0x00000000,
446 	(0x0e00 << 16) | (0x92b8 >> 2),
447 	0x00000000,
448 	(0x0e00 << 16) | (0x92bc >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x92c0 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x92c4 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x92c8 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x92cc >> 2),
457 	0x00000000,
458 	(0x0e00 << 16) | (0x92d0 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x8c00 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x8c04 >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x8c20 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x8c38 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x8c3c >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0xae00 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x9604 >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0xac08 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0xac0c >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0xac10 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xac14 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0xac58 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xac68 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0xac6c >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xac70 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0xac74 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0xac78 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xac7c >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0xac80 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac84 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac88 >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac8c >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x970c >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0x9714 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x9718 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x971c >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x31068 >> 2),
513 	0x00000000,
514 	(0x4e00 << 16) | (0x31068 >> 2),
515 	0x00000000,
516 	(0x5e00 << 16) | (0x31068 >> 2),
517 	0x00000000,
518 	(0x6e00 << 16) | (0x31068 >> 2),
519 	0x00000000,
520 	(0x7e00 << 16) | (0x31068 >> 2),
521 	0x00000000,
522 	(0x8e00 << 16) | (0x31068 >> 2),
523 	0x00000000,
524 	(0x9e00 << 16) | (0x31068 >> 2),
525 	0x00000000,
526 	(0xae00 << 16) | (0x31068 >> 2),
527 	0x00000000,
528 	(0xbe00 << 16) | (0x31068 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xcd10 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0xcd14 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x88b0 >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x88b4 >> 2),
537 	0x00000000,
538 	(0x0e00 << 16) | (0x88b8 >> 2),
539 	0x00000000,
540 	(0x0e00 << 16) | (0x88bc >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0x89c0 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0x88c4 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0x88c8 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0x88d0 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x88d4 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x88d8 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x8980 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0x30938 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x3093c >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x30940 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x89a0 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x30900 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x30904 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x89b4 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x3c210 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x3c214 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x3c218 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x8904 >> 2),
577 	0x00000000,
578 	0x5,
579 	(0x0e00 << 16) | (0x8c28 >> 2),
580 	(0x0e00 << 16) | (0x8c2c >> 2),
581 	(0x0e00 << 16) | (0x8c30 >> 2),
582 	(0x0e00 << 16) | (0x8c34 >> 2),
583 	(0x0e00 << 16) | (0x9600 >> 2),
584 };
585 
586 static const u32 kalindi_rlc_save_restore_register_list[] =
587 {
588 	(0x0e00 << 16) | (0xc12c >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0xc140 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0xc150 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0xc15c >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0xc168 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xc170 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0xc204 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xc2b4 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xc2b8 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xc2bc >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xc2c0 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0x8228 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0x829c >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0x869c >> 2),
615 	0x00000000,
616 	(0x0600 << 16) | (0x98f4 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0x98f8 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0x9900 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc260 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x90e8 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x3c000 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x3c00c >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0x8c1c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x9700 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0xcd20 >> 2),
635 	0x00000000,
636 	(0x4e00 << 16) | (0xcd20 >> 2),
637 	0x00000000,
638 	(0x5e00 << 16) | (0xcd20 >> 2),
639 	0x00000000,
640 	(0x6e00 << 16) | (0xcd20 >> 2),
641 	0x00000000,
642 	(0x7e00 << 16) | (0xcd20 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x89bc >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x8900 >> 2),
647 	0x00000000,
648 	0x3,
649 	(0x0e00 << 16) | (0xc130 >> 2),
650 	0x00000000,
651 	(0x0e00 << 16) | (0xc134 >> 2),
652 	0x00000000,
653 	(0x0e00 << 16) | (0xc1fc >> 2),
654 	0x00000000,
655 	(0x0e00 << 16) | (0xc208 >> 2),
656 	0x00000000,
657 	(0x0e00 << 16) | (0xc264 >> 2),
658 	0x00000000,
659 	(0x0e00 << 16) | (0xc268 >> 2),
660 	0x00000000,
661 	(0x0e00 << 16) | (0xc26c >> 2),
662 	0x00000000,
663 	(0x0e00 << 16) | (0xc270 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc274 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc28c >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc290 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc294 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc298 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc2a0 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc2a4 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc2a8 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc2ac >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0x301d0 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0x30238 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0x30250 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0x30254 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0x30258 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0x3025c >> 2),
694 	0x00000000,
695 	(0x4e00 << 16) | (0xc900 >> 2),
696 	0x00000000,
697 	(0x5e00 << 16) | (0xc900 >> 2),
698 	0x00000000,
699 	(0x6e00 << 16) | (0xc900 >> 2),
700 	0x00000000,
701 	(0x7e00 << 16) | (0xc900 >> 2),
702 	0x00000000,
703 	(0x4e00 << 16) | (0xc904 >> 2),
704 	0x00000000,
705 	(0x5e00 << 16) | (0xc904 >> 2),
706 	0x00000000,
707 	(0x6e00 << 16) | (0xc904 >> 2),
708 	0x00000000,
709 	(0x7e00 << 16) | (0xc904 >> 2),
710 	0x00000000,
711 	(0x4e00 << 16) | (0xc908 >> 2),
712 	0x00000000,
713 	(0x5e00 << 16) | (0xc908 >> 2),
714 	0x00000000,
715 	(0x6e00 << 16) | (0xc908 >> 2),
716 	0x00000000,
717 	(0x7e00 << 16) | (0xc908 >> 2),
718 	0x00000000,
719 	(0x4e00 << 16) | (0xc90c >> 2),
720 	0x00000000,
721 	(0x5e00 << 16) | (0xc90c >> 2),
722 	0x00000000,
723 	(0x6e00 << 16) | (0xc90c >> 2),
724 	0x00000000,
725 	(0x7e00 << 16) | (0xc90c >> 2),
726 	0x00000000,
727 	(0x4e00 << 16) | (0xc910 >> 2),
728 	0x00000000,
729 	(0x5e00 << 16) | (0xc910 >> 2),
730 	0x00000000,
731 	(0x6e00 << 16) | (0xc910 >> 2),
732 	0x00000000,
733 	(0x7e00 << 16) | (0xc910 >> 2),
734 	0x00000000,
735 	(0x0e00 << 16) | (0xc99c >> 2),
736 	0x00000000,
737 	(0x0e00 << 16) | (0x9834 >> 2),
738 	0x00000000,
739 	(0x0000 << 16) | (0x30f00 >> 2),
740 	0x00000000,
741 	(0x0000 << 16) | (0x30f04 >> 2),
742 	0x00000000,
743 	(0x0000 << 16) | (0x30f08 >> 2),
744 	0x00000000,
745 	(0x0000 << 16) | (0x30f0c >> 2),
746 	0x00000000,
747 	(0x0600 << 16) | (0x9b7c >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0x8a14 >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x8a18 >> 2),
752 	0x00000000,
753 	(0x0600 << 16) | (0x30a00 >> 2),
754 	0x00000000,
755 	(0x0e00 << 16) | (0x8bf0 >> 2),
756 	0x00000000,
757 	(0x0e00 << 16) | (0x8bcc >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0x8b24 >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x30a04 >> 2),
762 	0x00000000,
763 	(0x0600 << 16) | (0x30a10 >> 2),
764 	0x00000000,
765 	(0x0600 << 16) | (0x30a14 >> 2),
766 	0x00000000,
767 	(0x0600 << 16) | (0x30a18 >> 2),
768 	0x00000000,
769 	(0x0600 << 16) | (0x30a2c >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0xc700 >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0xc704 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0xc708 >> 2),
776 	0x00000000,
777 	(0x0e00 << 16) | (0xc768 >> 2),
778 	0x00000000,
779 	(0x0400 << 16) | (0xc770 >> 2),
780 	0x00000000,
781 	(0x0400 << 16) | (0xc774 >> 2),
782 	0x00000000,
783 	(0x0400 << 16) | (0xc798 >> 2),
784 	0x00000000,
785 	(0x0400 << 16) | (0xc79c >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0x9100 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0x3c010 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0x8c00 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0x8c04 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0x8c20 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0x8c38 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0x8c3c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xae00 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x9604 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xac08 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xac0c >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xac10 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0xac14 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0xac58 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xac68 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0xac6c >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0xac70 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xac74 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xac78 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xac7c >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xac80 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac84 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac88 >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac8c >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0x970c >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0x9714 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0x9718 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0x971c >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0x31068 >> 2),
844 	0x00000000,
845 	(0x4e00 << 16) | (0x31068 >> 2),
846 	0x00000000,
847 	(0x5e00 << 16) | (0x31068 >> 2),
848 	0x00000000,
849 	(0x6e00 << 16) | (0x31068 >> 2),
850 	0x00000000,
851 	(0x7e00 << 16) | (0x31068 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0xcd10 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0xcd14 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x88b0 >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x88b4 >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x88b8 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x88bc >> 2),
864 	0x00000000,
865 	(0x0400 << 16) | (0x89c0 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x88c4 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0x88c8 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x88d0 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x88d4 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x88d8 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8980 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x30938 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x3093c >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x30940 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x89a0 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x30900 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30904 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x89b4 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x3e1fc >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x3c210 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x3c214 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x3c218 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x8904 >> 2),
902 	0x00000000,
903 	0x5,
904 	(0x0e00 << 16) | (0x8c28 >> 2),
905 	(0x0e00 << 16) | (0x8c2c >> 2),
906 	(0x0e00 << 16) | (0x8c30 >> 2),
907 	(0x0e00 << 16) | (0x8c34 >> 2),
908 	(0x0e00 << 16) | (0x9600 >> 2),
909 };
910 
911 static const u32 bonaire_golden_spm_registers[] =
912 {
913 	0x30800, 0xe0ffffff, 0xe0000000
914 };
915 
916 static const u32 bonaire_golden_common_registers[] =
917 {
918 	0xc770, 0xffffffff, 0x00000800,
919 	0xc774, 0xffffffff, 0x00000800,
920 	0xc798, 0xffffffff, 0x00007fbf,
921 	0xc79c, 0xffffffff, 0x00007faf
922 };
923 
924 static const u32 bonaire_golden_registers[] =
925 {
926 	0x3354, 0x00000333, 0x00000333,
927 	0x3350, 0x000c0fc0, 0x00040200,
928 	0x9a10, 0x00010000, 0x00058208,
929 	0x3c000, 0xffff1fff, 0x00140000,
930 	0x3c200, 0xfdfc0fff, 0x00000100,
931 	0x3c234, 0x40000000, 0x40000200,
932 	0x9830, 0xffffffff, 0x00000000,
933 	0x9834, 0xf00fffff, 0x00000400,
934 	0x9838, 0x0002021c, 0x00020200,
935 	0xc78, 0x00000080, 0x00000000,
936 	0x5bb0, 0x000000f0, 0x00000070,
937 	0x5bc0, 0xf0311fff, 0x80300000,
938 	0x98f8, 0x73773777, 0x12010001,
939 	0x350c, 0x00810000, 0x408af000,
940 	0x7030, 0x31000111, 0x00000011,
941 	0x2f48, 0x73773777, 0x12010001,
942 	0x220c, 0x00007fb6, 0x0021a1b1,
943 	0x2210, 0x00007fb6, 0x002021b1,
944 	0x2180, 0x00007fb6, 0x00002191,
945 	0x2218, 0x00007fb6, 0x002121b1,
946 	0x221c, 0x00007fb6, 0x002021b1,
947 	0x21dc, 0x00007fb6, 0x00002191,
948 	0x21e0, 0x00007fb6, 0x00002191,
949 	0x3628, 0x0000003f, 0x0000000a,
950 	0x362c, 0x0000003f, 0x0000000a,
951 	0x2ae4, 0x00073ffe, 0x000022a2,
952 	0x240c, 0x000007ff, 0x00000000,
953 	0x8a14, 0xf000003f, 0x00000007,
954 	0x8bf0, 0x00002001, 0x00000001,
955 	0x8b24, 0xffffffff, 0x00ffffff,
956 	0x30a04, 0x0000ff0f, 0x00000000,
957 	0x28a4c, 0x07ffffff, 0x06000000,
958 	0x4d8, 0x00000fff, 0x00000100,
959 	0x3e78, 0x00000001, 0x00000002,
960 	0x9100, 0x03000000, 0x0362c688,
961 	0x8c00, 0x000000ff, 0x00000001,
962 	0xe40, 0x00001fff, 0x00001fff,
963 	0x9060, 0x0000007f, 0x00000020,
964 	0x9508, 0x00010000, 0x00010000,
965 	0xac14, 0x000003ff, 0x000000f3,
966 	0xac0c, 0xffffffff, 0x00001032
967 };
968 
969 static const u32 bonaire_mgcg_cgcg_init[] =
970 {
971 	0xc420, 0xffffffff, 0xfffffffc,
972 	0x30800, 0xffffffff, 0xe0000000,
973 	0x3c2a0, 0xffffffff, 0x00000100,
974 	0x3c208, 0xffffffff, 0x00000100,
975 	0x3c2c0, 0xffffffff, 0xc0000100,
976 	0x3c2c8, 0xffffffff, 0xc0000100,
977 	0x3c2c4, 0xffffffff, 0xc0000100,
978 	0x55e4, 0xffffffff, 0x00600100,
979 	0x3c280, 0xffffffff, 0x00000100,
980 	0x3c214, 0xffffffff, 0x06000100,
981 	0x3c220, 0xffffffff, 0x00000100,
982 	0x3c218, 0xffffffff, 0x06000100,
983 	0x3c204, 0xffffffff, 0x00000100,
984 	0x3c2e0, 0xffffffff, 0x00000100,
985 	0x3c224, 0xffffffff, 0x00000100,
986 	0x3c200, 0xffffffff, 0x00000100,
987 	0x3c230, 0xffffffff, 0x00000100,
988 	0x3c234, 0xffffffff, 0x00000100,
989 	0x3c250, 0xffffffff, 0x00000100,
990 	0x3c254, 0xffffffff, 0x00000100,
991 	0x3c258, 0xffffffff, 0x00000100,
992 	0x3c25c, 0xffffffff, 0x00000100,
993 	0x3c260, 0xffffffff, 0x00000100,
994 	0x3c27c, 0xffffffff, 0x00000100,
995 	0x3c278, 0xffffffff, 0x00000100,
996 	0x3c210, 0xffffffff, 0x06000100,
997 	0x3c290, 0xffffffff, 0x00000100,
998 	0x3c274, 0xffffffff, 0x00000100,
999 	0x3c2b4, 0xffffffff, 0x00000100,
1000 	0x3c2b0, 0xffffffff, 0x00000100,
1001 	0x3c270, 0xffffffff, 0x00000100,
1002 	0x30800, 0xffffffff, 0xe0000000,
1003 	0x3c020, 0xffffffff, 0x00010000,
1004 	0x3c024, 0xffffffff, 0x00030002,
1005 	0x3c028, 0xffffffff, 0x00040007,
1006 	0x3c02c, 0xffffffff, 0x00060005,
1007 	0x3c030, 0xffffffff, 0x00090008,
1008 	0x3c034, 0xffffffff, 0x00010000,
1009 	0x3c038, 0xffffffff, 0x00030002,
1010 	0x3c03c, 0xffffffff, 0x00040007,
1011 	0x3c040, 0xffffffff, 0x00060005,
1012 	0x3c044, 0xffffffff, 0x00090008,
1013 	0x3c048, 0xffffffff, 0x00010000,
1014 	0x3c04c, 0xffffffff, 0x00030002,
1015 	0x3c050, 0xffffffff, 0x00040007,
1016 	0x3c054, 0xffffffff, 0x00060005,
1017 	0x3c058, 0xffffffff, 0x00090008,
1018 	0x3c05c, 0xffffffff, 0x00010000,
1019 	0x3c060, 0xffffffff, 0x00030002,
1020 	0x3c064, 0xffffffff, 0x00040007,
1021 	0x3c068, 0xffffffff, 0x00060005,
1022 	0x3c06c, 0xffffffff, 0x00090008,
1023 	0x3c070, 0xffffffff, 0x00010000,
1024 	0x3c074, 0xffffffff, 0x00030002,
1025 	0x3c078, 0xffffffff, 0x00040007,
1026 	0x3c07c, 0xffffffff, 0x00060005,
1027 	0x3c080, 0xffffffff, 0x00090008,
1028 	0x3c084, 0xffffffff, 0x00010000,
1029 	0x3c088, 0xffffffff, 0x00030002,
1030 	0x3c08c, 0xffffffff, 0x00040007,
1031 	0x3c090, 0xffffffff, 0x00060005,
1032 	0x3c094, 0xffffffff, 0x00090008,
1033 	0x3c098, 0xffffffff, 0x00010000,
1034 	0x3c09c, 0xffffffff, 0x00030002,
1035 	0x3c0a0, 0xffffffff, 0x00040007,
1036 	0x3c0a4, 0xffffffff, 0x00060005,
1037 	0x3c0a8, 0xffffffff, 0x00090008,
1038 	0x3c000, 0xffffffff, 0x96e00200,
1039 	0x8708, 0xffffffff, 0x00900100,
1040 	0xc424, 0xffffffff, 0x0020003f,
1041 	0x38, 0xffffffff, 0x0140001c,
1042 	0x3c, 0x000f0000, 0x000f0000,
1043 	0x220, 0xffffffff, 0xC060000C,
1044 	0x224, 0xc0000fff, 0x00000100,
1045 	0xf90, 0xffffffff, 0x00000100,
1046 	0xf98, 0x00000101, 0x00000000,
1047 	0x20a8, 0xffffffff, 0x00000104,
1048 	0x55e4, 0xff000fff, 0x00000100,
1049 	0x30cc, 0xc0000fff, 0x00000104,
1050 	0xc1e4, 0x00000001, 0x00000001,
1051 	0xd00c, 0xff000ff0, 0x00000100,
1052 	0xd80c, 0xff000ff0, 0x00000100
1053 };
1054 
1055 static const u32 spectre_golden_spm_registers[] =
1056 {
1057 	0x30800, 0xe0ffffff, 0xe0000000
1058 };
1059 
1060 static const u32 spectre_golden_common_registers[] =
1061 {
1062 	0xc770, 0xffffffff, 0x00000800,
1063 	0xc774, 0xffffffff, 0x00000800,
1064 	0xc798, 0xffffffff, 0x00007fbf,
1065 	0xc79c, 0xffffffff, 0x00007faf
1066 };
1067 
1068 static const u32 spectre_golden_registers[] =
1069 {
1070 	0x3c000, 0xffff1fff, 0x96940200,
1071 	0x3c00c, 0xffff0001, 0xff000000,
1072 	0x3c200, 0xfffc0fff, 0x00000100,
1073 	0x6ed8, 0x00010101, 0x00010000,
1074 	0x9834, 0xf00fffff, 0x00000400,
1075 	0x9838, 0xfffffffc, 0x00020200,
1076 	0x5bb0, 0x000000f0, 0x00000070,
1077 	0x5bc0, 0xf0311fff, 0x80300000,
1078 	0x98f8, 0x73773777, 0x12010001,
1079 	0x9b7c, 0x00ff0000, 0x00fc0000,
1080 	0x2f48, 0x73773777, 0x12010001,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8b24, 0xffffffff, 0x00ffffff,
1083 	0x28350, 0x3f3f3fff, 0x00000082,
1084 	0x28355, 0x0000003f, 0x00000000,
1085 	0x3e78, 0x00000001, 0x00000002,
1086 	0x913c, 0xffff03df, 0x00000004,
1087 	0xc768, 0x00000008, 0x00000008,
1088 	0x8c00, 0x000008ff, 0x00000800,
1089 	0x9508, 0x00010000, 0x00010000,
1090 	0xac0c, 0xffffffff, 0x54763210,
1091 	0x214f8, 0x01ff01ff, 0x00000002,
1092 	0x21498, 0x007ff800, 0x00200000,
1093 	0x2015c, 0xffffffff, 0x00000f40,
1094 	0x30934, 0xffffffff, 0x00000001
1095 };
1096 
1097 static const u32 spectre_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0x00000100,
1104 	0x3c2c8, 0xffffffff, 0x00000100,
1105 	0x3c2c4, 0xffffffff, 0x00000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c0ac, 0xffffffff, 0x00010000,
1167 	0x3c0b0, 0xffffffff, 0x00030002,
1168 	0x3c0b4, 0xffffffff, 0x00040007,
1169 	0x3c0b8, 0xffffffff, 0x00060005,
1170 	0x3c0bc, 0xffffffff, 0x00090008,
1171 	0x3c000, 0xffffffff, 0x96e00200,
1172 	0x8708, 0xffffffff, 0x00900100,
1173 	0xc424, 0xffffffff, 0x0020003f,
1174 	0x38, 0xffffffff, 0x0140001c,
1175 	0x3c, 0x000f0000, 0x000f0000,
1176 	0x220, 0xffffffff, 0xC060000C,
1177 	0x224, 0xc0000fff, 0x00000100,
1178 	0xf90, 0xffffffff, 0x00000100,
1179 	0xf98, 0x00000101, 0x00000000,
1180 	0x20a8, 0xffffffff, 0x00000104,
1181 	0x55e4, 0xff000fff, 0x00000100,
1182 	0x30cc, 0xc0000fff, 0x00000104,
1183 	0xc1e4, 0x00000001, 0x00000001,
1184 	0xd00c, 0xff000ff0, 0x00000100,
1185 	0xd80c, 0xff000ff0, 0x00000100
1186 };
1187 
1188 static const u32 kalindi_golden_spm_registers[] =
1189 {
1190 	0x30800, 0xe0ffffff, 0xe0000000
1191 };
1192 
1193 static const u32 kalindi_golden_common_registers[] =
1194 {
1195 	0xc770, 0xffffffff, 0x00000800,
1196 	0xc774, 0xffffffff, 0x00000800,
1197 	0xc798, 0xffffffff, 0x00007fbf,
1198 	0xc79c, 0xffffffff, 0x00007faf
1199 };
1200 
1201 static const u32 kalindi_golden_registers[] =
1202 {
1203 	0x3c000, 0xffffdfff, 0x6e944040,
1204 	0x55e4, 0xff607fff, 0xfc000100,
1205 	0x3c220, 0xff000fff, 0x00000100,
1206 	0x3c224, 0xff000fff, 0x00000100,
1207 	0x3c200, 0xfffc0fff, 0x00000100,
1208 	0x6ed8, 0x00010101, 0x00010000,
1209 	0x9830, 0xffffffff, 0x00000000,
1210 	0x9834, 0xf00fffff, 0x00000400,
1211 	0x5bb0, 0x000000f0, 0x00000070,
1212 	0x5bc0, 0xf0311fff, 0x80300000,
1213 	0x98f8, 0x73773777, 0x12010001,
1214 	0x98fc, 0xffffffff, 0x00000010,
1215 	0x9b7c, 0x00ff0000, 0x00fc0000,
1216 	0x8030, 0x00001f0f, 0x0000100a,
1217 	0x2f48, 0x73773777, 0x12010001,
1218 	0x2408, 0x000fffff, 0x000c007f,
1219 	0x8a14, 0xf000003f, 0x00000007,
1220 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1221 	0x30a04, 0x0000ff0f, 0x00000000,
1222 	0x28a4c, 0x07ffffff, 0x06000000,
1223 	0x4d8, 0x00000fff, 0x00000100,
1224 	0x3e78, 0x00000001, 0x00000002,
1225 	0xc768, 0x00000008, 0x00000008,
1226 	0x8c00, 0x000000ff, 0x00000003,
1227 	0x214f8, 0x01ff01ff, 0x00000002,
1228 	0x21498, 0x007ff800, 0x00200000,
1229 	0x2015c, 0xffffffff, 0x00000f40,
1230 	0x88c4, 0x001f3ae3, 0x00000082,
1231 	0x88d4, 0x0000001f, 0x00000010,
1232 	0x30934, 0xffffffff, 0x00000000
1233 };
1234 
1235 static const u32 kalindi_mgcg_cgcg_init[] =
1236 {
1237 	0xc420, 0xffffffff, 0xfffffffc,
1238 	0x30800, 0xffffffff, 0xe0000000,
1239 	0x3c2a0, 0xffffffff, 0x00000100,
1240 	0x3c208, 0xffffffff, 0x00000100,
1241 	0x3c2c0, 0xffffffff, 0x00000100,
1242 	0x3c2c8, 0xffffffff, 0x00000100,
1243 	0x3c2c4, 0xffffffff, 0x00000100,
1244 	0x55e4, 0xffffffff, 0x00600100,
1245 	0x3c280, 0xffffffff, 0x00000100,
1246 	0x3c214, 0xffffffff, 0x06000100,
1247 	0x3c220, 0xffffffff, 0x00000100,
1248 	0x3c218, 0xffffffff, 0x06000100,
1249 	0x3c204, 0xffffffff, 0x00000100,
1250 	0x3c2e0, 0xffffffff, 0x00000100,
1251 	0x3c224, 0xffffffff, 0x00000100,
1252 	0x3c200, 0xffffffff, 0x00000100,
1253 	0x3c230, 0xffffffff, 0x00000100,
1254 	0x3c234, 0xffffffff, 0x00000100,
1255 	0x3c250, 0xffffffff, 0x00000100,
1256 	0x3c254, 0xffffffff, 0x00000100,
1257 	0x3c258, 0xffffffff, 0x00000100,
1258 	0x3c25c, 0xffffffff, 0x00000100,
1259 	0x3c260, 0xffffffff, 0x00000100,
1260 	0x3c27c, 0xffffffff, 0x00000100,
1261 	0x3c278, 0xffffffff, 0x00000100,
1262 	0x3c210, 0xffffffff, 0x06000100,
1263 	0x3c290, 0xffffffff, 0x00000100,
1264 	0x3c274, 0xffffffff, 0x00000100,
1265 	0x3c2b4, 0xffffffff, 0x00000100,
1266 	0x3c2b0, 0xffffffff, 0x00000100,
1267 	0x3c270, 0xffffffff, 0x00000100,
1268 	0x30800, 0xffffffff, 0xe0000000,
1269 	0x3c020, 0xffffffff, 0x00010000,
1270 	0x3c024, 0xffffffff, 0x00030002,
1271 	0x3c028, 0xffffffff, 0x00040007,
1272 	0x3c02c, 0xffffffff, 0x00060005,
1273 	0x3c030, 0xffffffff, 0x00090008,
1274 	0x3c034, 0xffffffff, 0x00010000,
1275 	0x3c038, 0xffffffff, 0x00030002,
1276 	0x3c03c, 0xffffffff, 0x00040007,
1277 	0x3c040, 0xffffffff, 0x00060005,
1278 	0x3c044, 0xffffffff, 0x00090008,
1279 	0x3c000, 0xffffffff, 0x96e00200,
1280 	0x8708, 0xffffffff, 0x00900100,
1281 	0xc424, 0xffffffff, 0x0020003f,
1282 	0x38, 0xffffffff, 0x0140001c,
1283 	0x3c, 0x000f0000, 0x000f0000,
1284 	0x220, 0xffffffff, 0xC060000C,
1285 	0x224, 0xc0000fff, 0x00000100,
1286 	0x20a8, 0xffffffff, 0x00000104,
1287 	0x55e4, 0xff000fff, 0x00000100,
1288 	0x30cc, 0xc0000fff, 0x00000104,
1289 	0xc1e4, 0x00000001, 0x00000001,
1290 	0xd00c, 0xff000ff0, 0x00000100,
1291 	0xd80c, 0xff000ff0, 0x00000100
1292 };
1293 
1294 static void cik_init_golden_registers(struct radeon_device *rdev)
1295 {
1296 	switch (rdev->family) {
1297 	case CHIP_BONAIRE:
1298 		radeon_program_register_sequence(rdev,
1299 						 bonaire_mgcg_cgcg_init,
1300 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1301 		radeon_program_register_sequence(rdev,
1302 						 bonaire_golden_registers,
1303 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1304 		radeon_program_register_sequence(rdev,
1305 						 bonaire_golden_common_registers,
1306 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1307 		radeon_program_register_sequence(rdev,
1308 						 bonaire_golden_spm_registers,
1309 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1310 		break;
1311 	case CHIP_KABINI:
1312 		radeon_program_register_sequence(rdev,
1313 						 kalindi_mgcg_cgcg_init,
1314 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1315 		radeon_program_register_sequence(rdev,
1316 						 kalindi_golden_registers,
1317 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1318 		radeon_program_register_sequence(rdev,
1319 						 kalindi_golden_common_registers,
1320 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1321 		radeon_program_register_sequence(rdev,
1322 						 kalindi_golden_spm_registers,
1323 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1324 		break;
1325 	case CHIP_KAVERI:
1326 		radeon_program_register_sequence(rdev,
1327 						 spectre_mgcg_cgcg_init,
1328 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1329 		radeon_program_register_sequence(rdev,
1330 						 spectre_golden_registers,
1331 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1332 		radeon_program_register_sequence(rdev,
1333 						 spectre_golden_common_registers,
1334 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1335 		radeon_program_register_sequence(rdev,
1336 						 spectre_golden_spm_registers,
1337 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1338 		break;
1339 	default:
1340 		break;
1341 	}
1342 }
1343 
1344 /**
1345  * cik_get_xclk - get the xclk
1346  *
1347  * @rdev: radeon_device pointer
1348  *
1349  * Returns the reference clock used by the gfx engine
1350  * (CIK).
1351  */
1352 u32 cik_get_xclk(struct radeon_device *rdev)
1353 {
1354         u32 reference_clock = rdev->clock.spll.reference_freq;
1355 
1356 	if (rdev->flags & RADEON_IS_IGP) {
1357 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1358 			return reference_clock / 2;
1359 	} else {
1360 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1361 			return reference_clock / 4;
1362 	}
1363 	return reference_clock;
1364 }
1365 
1366 /**
1367  * cik_mm_rdoorbell - read a doorbell dword
1368  *
1369  * @rdev: radeon_device pointer
1370  * @offset: byte offset into the aperture
1371  *
1372  * Returns the value in the doorbell aperture at the
1373  * requested offset (CIK).
1374  */
1375 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
1376 {
1377 	if (offset < rdev->doorbell.size) {
1378 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
1379 	} else {
1380 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
1381 		return 0;
1382 	}
1383 }
1384 
1385 /**
1386  * cik_mm_wdoorbell - write a doorbell dword
1387  *
1388  * @rdev: radeon_device pointer
1389  * @offset: byte offset into the aperture
1390  * @v: value to write
1391  *
1392  * Writes @v to the doorbell aperture at the
1393  * requested offset (CIK).
1394  */
1395 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
1396 {
1397 	if (offset < rdev->doorbell.size) {
1398 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
1399 	} else {
1400 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
1401 	}
1402 }
1403 
1404 #define BONAIRE_IO_MC_REGS_SIZE 36
1405 
1406 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1407 {
1408 	{0x00000070, 0x04400000},
1409 	{0x00000071, 0x80c01803},
1410 	{0x00000072, 0x00004004},
1411 	{0x00000073, 0x00000100},
1412 	{0x00000074, 0x00ff0000},
1413 	{0x00000075, 0x34000000},
1414 	{0x00000076, 0x08000014},
1415 	{0x00000077, 0x00cc08ec},
1416 	{0x00000078, 0x00000400},
1417 	{0x00000079, 0x00000000},
1418 	{0x0000007a, 0x04090000},
1419 	{0x0000007c, 0x00000000},
1420 	{0x0000007e, 0x4408a8e8},
1421 	{0x0000007f, 0x00000304},
1422 	{0x00000080, 0x00000000},
1423 	{0x00000082, 0x00000001},
1424 	{0x00000083, 0x00000002},
1425 	{0x00000084, 0xf3e4f400},
1426 	{0x00000085, 0x052024e3},
1427 	{0x00000087, 0x00000000},
1428 	{0x00000088, 0x01000000},
1429 	{0x0000008a, 0x1c0a0000},
1430 	{0x0000008b, 0xff010000},
1431 	{0x0000008d, 0xffffefff},
1432 	{0x0000008e, 0xfff3efff},
1433 	{0x0000008f, 0xfff3efbf},
1434 	{0x00000092, 0xf7ffffff},
1435 	{0x00000093, 0xffffff7f},
1436 	{0x00000095, 0x00101101},
1437 	{0x00000096, 0x00000fff},
1438 	{0x00000097, 0x00116fff},
1439 	{0x00000098, 0x60010000},
1440 	{0x00000099, 0x10010000},
1441 	{0x0000009a, 0x00006000},
1442 	{0x0000009b, 0x00001000},
1443 	{0x0000009f, 0x00b48000}
1444 };
1445 
1446 /**
1447  * cik_srbm_select - select specific register instances
1448  *
1449  * @rdev: radeon_device pointer
1450  * @me: selected ME (micro engine)
1451  * @pipe: pipe
1452  * @queue: queue
1453  * @vmid: VMID
1454  *
1455  * Switches the currently active registers instances.  Some
1456  * registers are instanced per VMID, others are instanced per
1457  * me/pipe/queue combination.
1458  */
1459 static void cik_srbm_select(struct radeon_device *rdev,
1460 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1461 {
1462 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1463 			     MEID(me & 0x3) |
1464 			     VMID(vmid & 0xf) |
1465 			     QUEUEID(queue & 0x7));
1466 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1467 }
1468 
1469 /* ucode loading */
1470 /**
1471  * ci_mc_load_microcode - load MC ucode into the hw
1472  *
1473  * @rdev: radeon_device pointer
1474  *
1475  * Load the GDDR MC ucode into the hw (CIK).
1476  * Returns 0 on success, error on failure.
1477  */
1478 static int ci_mc_load_microcode(struct radeon_device *rdev)
1479 {
1480 	const __be32 *fw_data;
1481 	u32 running, blackout = 0;
1482 	u32 *io_mc_regs;
1483 	int i, ucode_size, regs_size;
1484 
1485 	if (!rdev->mc_fw)
1486 		return -EINVAL;
1487 
1488 	switch (rdev->family) {
1489 	case CHIP_BONAIRE:
1490 	default:
1491 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1492 		ucode_size = CIK_MC_UCODE_SIZE;
1493 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1494 		break;
1495 	}
1496 
1497 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1498 
1499 	if (running == 0) {
1500 		if (running) {
1501 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1502 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1503 		}
1504 
1505 		/* reset the engine and set to writable */
1506 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1507 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1508 
1509 		/* load mc io regs */
1510 		for (i = 0; i < regs_size; i++) {
1511 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1512 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1513 		}
1514 		/* load the MC ucode */
1515 		fw_data = (const __be32 *)rdev->mc_fw->data;
1516 		for (i = 0; i < ucode_size; i++)
1517 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1518 
1519 		/* put the engine back into the active state */
1520 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1521 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1522 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1523 
1524 		/* wait for training to complete */
1525 		for (i = 0; i < rdev->usec_timeout; i++) {
1526 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1527 				break;
1528 			udelay(1);
1529 		}
1530 		for (i = 0; i < rdev->usec_timeout; i++) {
1531 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1532 				break;
1533 			udelay(1);
1534 		}
1535 
1536 		if (running)
1537 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1538 	}
1539 
1540 	return 0;
1541 }
1542 
1543 /**
1544  * cik_init_microcode - load ucode images from disk
1545  *
1546  * @rdev: radeon_device pointer
1547  *
1548  * Use the firmware interface to load the ucode images into
1549  * the driver (not loaded into hw).
1550  * Returns 0 on success, error on failure.
1551  */
1552 static int cik_init_microcode(struct radeon_device *rdev)
1553 {
1554 	const char *chip_name;
1555 	size_t pfp_req_size, me_req_size, ce_req_size,
1556 		mec_req_size, rlc_req_size, mc_req_size,
1557 		sdma_req_size, smc_req_size;
1558 	char fw_name[30];
1559 	int err;
1560 
1561 	DRM_DEBUG("\n");
1562 
1563 	switch (rdev->family) {
1564 	case CHIP_BONAIRE:
1565 		chip_name = "BONAIRE";
1566 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1567 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1568 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1569 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1570 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1571 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1572 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1573 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1574 		break;
1575 	case CHIP_KAVERI:
1576 		chip_name = "KAVERI";
1577 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1578 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1579 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1580 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1581 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1582 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1583 		break;
1584 	case CHIP_KABINI:
1585 		chip_name = "KABINI";
1586 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1587 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1588 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1589 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1590 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1591 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1592 		break;
1593 	default: BUG();
1594 	}
1595 
1596 	DRM_INFO("Loading %s Microcode\n", chip_name);
1597 
1598 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1599 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1600 	if (err)
1601 		goto out;
1602 	if (rdev->pfp_fw->size != pfp_req_size) {
1603 		printk(KERN_ERR
1604 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1605 		       rdev->pfp_fw->size, fw_name);
1606 		err = -EINVAL;
1607 		goto out;
1608 	}
1609 
1610 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1611 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1612 	if (err)
1613 		goto out;
1614 	if (rdev->me_fw->size != me_req_size) {
1615 		printk(KERN_ERR
1616 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1617 		       rdev->me_fw->size, fw_name);
1618 		err = -EINVAL;
1619 	}
1620 
1621 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1622 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1623 	if (err)
1624 		goto out;
1625 	if (rdev->ce_fw->size != ce_req_size) {
1626 		printk(KERN_ERR
1627 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1628 		       rdev->ce_fw->size, fw_name);
1629 		err = -EINVAL;
1630 	}
1631 
1632 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1633 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1634 	if (err)
1635 		goto out;
1636 	if (rdev->mec_fw->size != mec_req_size) {
1637 		printk(KERN_ERR
1638 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1639 		       rdev->mec_fw->size, fw_name);
1640 		err = -EINVAL;
1641 	}
1642 
1643 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1644 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1645 	if (err)
1646 		goto out;
1647 	if (rdev->rlc_fw->size != rlc_req_size) {
1648 		printk(KERN_ERR
1649 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1650 		       rdev->rlc_fw->size, fw_name);
1651 		err = -EINVAL;
1652 	}
1653 
1654 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1655 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1656 	if (err)
1657 		goto out;
1658 	if (rdev->sdma_fw->size != sdma_req_size) {
1659 		printk(KERN_ERR
1660 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1661 		       rdev->sdma_fw->size, fw_name);
1662 		err = -EINVAL;
1663 	}
1664 
1665 	/* No SMC, MC ucode on APUs */
1666 	if (!(rdev->flags & RADEON_IS_IGP)) {
1667 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1668 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1669 		if (err)
1670 			goto out;
1671 		if (rdev->mc_fw->size != mc_req_size) {
1672 			printk(KERN_ERR
1673 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1674 			       rdev->mc_fw->size, fw_name);
1675 			err = -EINVAL;
1676 		}
1677 
1678 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1679 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1680 		if (err) {
1681 			printk(KERN_ERR
1682 			       "smc: error loading firmware \"%s\"\n",
1683 			       fw_name);
1684 			release_firmware(rdev->smc_fw);
1685 			rdev->smc_fw = NULL;
1686 		} else if (rdev->smc_fw->size != smc_req_size) {
1687 			printk(KERN_ERR
1688 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1689 			       rdev->smc_fw->size, fw_name);
1690 			err = -EINVAL;
1691 		}
1692 	}
1693 
1694 out:
1695 	if (err) {
1696 		if (err != -EINVAL)
1697 			printk(KERN_ERR
1698 			       "cik_cp: Failed to load firmware \"%s\"\n",
1699 			       fw_name);
1700 		release_firmware(rdev->pfp_fw);
1701 		rdev->pfp_fw = NULL;
1702 		release_firmware(rdev->me_fw);
1703 		rdev->me_fw = NULL;
1704 		release_firmware(rdev->ce_fw);
1705 		rdev->ce_fw = NULL;
1706 		release_firmware(rdev->rlc_fw);
1707 		rdev->rlc_fw = NULL;
1708 		release_firmware(rdev->mc_fw);
1709 		rdev->mc_fw = NULL;
1710 		release_firmware(rdev->smc_fw);
1711 		rdev->smc_fw = NULL;
1712 	}
1713 	return err;
1714 }
1715 
1716 /*
1717  * Core functions
1718  */
1719 /**
1720  * cik_tiling_mode_table_init - init the hw tiling table
1721  *
1722  * @rdev: radeon_device pointer
1723  *
1724  * Starting with SI, the tiling setup is done globally in a
1725  * set of 32 tiling modes.  Rather than selecting each set of
1726  * parameters per surface as on older asics, we just select
1727  * which index in the tiling table we want to use, and the
1728  * surface uses those parameters (CIK).
1729  */
1730 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1731 {
1732 	const u32 num_tile_mode_states = 32;
1733 	const u32 num_secondary_tile_mode_states = 16;
1734 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1735 	u32 num_pipe_configs;
1736 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1737 		rdev->config.cik.max_shader_engines;
1738 
1739 	switch (rdev->config.cik.mem_row_size_in_kb) {
1740 	case 1:
1741 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1742 		break;
1743 	case 2:
1744 	default:
1745 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1746 		break;
1747 	case 4:
1748 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1749 		break;
1750 	}
1751 
1752 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1753 	if (num_pipe_configs > 8)
1754 		num_pipe_configs = 8; /* ??? */
1755 
1756 	if (num_pipe_configs == 8) {
1757 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1758 			switch (reg_offset) {
1759 			case 0:
1760 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1761 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1762 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1763 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1764 				break;
1765 			case 1:
1766 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1767 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1768 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1769 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1770 				break;
1771 			case 2:
1772 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1773 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1774 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1775 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1776 				break;
1777 			case 3:
1778 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1779 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1780 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1781 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1782 				break;
1783 			case 4:
1784 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1785 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1786 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1787 						 TILE_SPLIT(split_equal_to_row_size));
1788 				break;
1789 			case 5:
1790 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1791 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1792 				break;
1793 			case 6:
1794 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1795 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1796 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1797 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1798 				break;
1799 			case 7:
1800 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1801 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1802 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1803 						 TILE_SPLIT(split_equal_to_row_size));
1804 				break;
1805 			case 8:
1806 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1807 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
1808 				break;
1809 			case 9:
1810 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1811 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1812 				break;
1813 			case 10:
1814 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1815 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1816 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1817 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1818 				break;
1819 			case 11:
1820 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1821 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1822 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1823 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1824 				break;
1825 			case 12:
1826 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1827 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1828 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1829 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1830 				break;
1831 			case 13:
1832 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1833 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1834 				break;
1835 			case 14:
1836 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1837 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1838 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1839 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1840 				break;
1841 			case 16:
1842 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1843 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1844 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1845 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1846 				break;
1847 			case 17:
1848 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1849 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1850 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1851 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1852 				break;
1853 			case 27:
1854 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1855 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1856 				break;
1857 			case 28:
1858 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1859 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1860 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1861 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1862 				break;
1863 			case 29:
1864 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1865 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1866 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1867 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1868 				break;
1869 			case 30:
1870 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1871 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1872 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1873 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1874 				break;
1875 			default:
1876 				gb_tile_moden = 0;
1877 				break;
1878 			}
1879 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1880 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1881 		}
1882 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1883 			switch (reg_offset) {
1884 			case 0:
1885 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1886 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1887 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1888 						 NUM_BANKS(ADDR_SURF_16_BANK));
1889 				break;
1890 			case 1:
1891 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1892 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1893 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1894 						 NUM_BANKS(ADDR_SURF_16_BANK));
1895 				break;
1896 			case 2:
1897 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1898 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1899 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1900 						 NUM_BANKS(ADDR_SURF_16_BANK));
1901 				break;
1902 			case 3:
1903 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1904 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1905 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1906 						 NUM_BANKS(ADDR_SURF_16_BANK));
1907 				break;
1908 			case 4:
1909 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1910 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1911 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1912 						 NUM_BANKS(ADDR_SURF_8_BANK));
1913 				break;
1914 			case 5:
1915 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1916 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1917 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1918 						 NUM_BANKS(ADDR_SURF_4_BANK));
1919 				break;
1920 			case 6:
1921 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1922 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1923 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1924 						 NUM_BANKS(ADDR_SURF_2_BANK));
1925 				break;
1926 			case 8:
1927 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1928 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1929 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1930 						 NUM_BANKS(ADDR_SURF_16_BANK));
1931 				break;
1932 			case 9:
1933 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1934 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1935 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1936 						 NUM_BANKS(ADDR_SURF_16_BANK));
1937 				break;
1938 			case 10:
1939 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1940 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1941 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1942 						 NUM_BANKS(ADDR_SURF_16_BANK));
1943 				break;
1944 			case 11:
1945 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1946 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1947 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1948 						 NUM_BANKS(ADDR_SURF_16_BANK));
1949 				break;
1950 			case 12:
1951 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1952 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1953 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1954 						 NUM_BANKS(ADDR_SURF_8_BANK));
1955 				break;
1956 			case 13:
1957 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1958 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1959 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1960 						 NUM_BANKS(ADDR_SURF_4_BANK));
1961 				break;
1962 			case 14:
1963 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1964 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1965 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1966 						 NUM_BANKS(ADDR_SURF_2_BANK));
1967 				break;
1968 			default:
1969 				gb_tile_moden = 0;
1970 				break;
1971 			}
1972 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1973 		}
1974 	} else if (num_pipe_configs == 4) {
1975 		if (num_rbs == 4) {
1976 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1977 				switch (reg_offset) {
1978 				case 0:
1979 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1980 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1981 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1982 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1983 					break;
1984 				case 1:
1985 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1986 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1987 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1988 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1989 					break;
1990 				case 2:
1991 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1992 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1993 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1994 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1995 					break;
1996 				case 3:
1997 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1998 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1999 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2000 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2001 					break;
2002 				case 4:
2003 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2005 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2006 							 TILE_SPLIT(split_equal_to_row_size));
2007 					break;
2008 				case 5:
2009 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2010 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2011 					break;
2012 				case 6:
2013 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2014 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2015 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2016 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2017 					break;
2018 				case 7:
2019 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2020 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2021 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2022 							 TILE_SPLIT(split_equal_to_row_size));
2023 					break;
2024 				case 8:
2025 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2026 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2027 					break;
2028 				case 9:
2029 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2030 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2031 					break;
2032 				case 10:
2033 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2034 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2035 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2036 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2037 					break;
2038 				case 11:
2039 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2040 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2041 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2042 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2043 					break;
2044 				case 12:
2045 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2046 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2047 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2048 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2049 					break;
2050 				case 13:
2051 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2052 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2053 					break;
2054 				case 14:
2055 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2056 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2057 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2058 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2059 					break;
2060 				case 16:
2061 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2062 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2063 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2064 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2065 					break;
2066 				case 17:
2067 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2068 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2069 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2070 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2071 					break;
2072 				case 27:
2073 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2074 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2075 					break;
2076 				case 28:
2077 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2078 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2079 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2080 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2081 					break;
2082 				case 29:
2083 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2084 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2085 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2086 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2087 					break;
2088 				case 30:
2089 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2090 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2091 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2092 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2093 					break;
2094 				default:
2095 					gb_tile_moden = 0;
2096 					break;
2097 				}
2098 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2099 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2100 			}
2101 		} else if (num_rbs < 4) {
2102 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2103 				switch (reg_offset) {
2104 				case 0:
2105 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2106 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2107 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2108 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2109 					break;
2110 				case 1:
2111 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2112 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2113 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2114 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2115 					break;
2116 				case 2:
2117 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2118 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2119 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2120 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2121 					break;
2122 				case 3:
2123 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2124 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2125 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2126 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2127 					break;
2128 				case 4:
2129 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2130 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2131 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2132 							 TILE_SPLIT(split_equal_to_row_size));
2133 					break;
2134 				case 5:
2135 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2136 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2137 					break;
2138 				case 6:
2139 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2140 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2141 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2142 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2143 					break;
2144 				case 7:
2145 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2146 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2147 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2148 							 TILE_SPLIT(split_equal_to_row_size));
2149 					break;
2150 				case 8:
2151 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2152 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2153 					break;
2154 				case 9:
2155 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2156 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2157 					break;
2158 				case 10:
2159 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2160 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2161 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2162 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2163 					break;
2164 				case 11:
2165 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2166 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2167 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2168 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2169 					break;
2170 				case 12:
2171 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2172 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2173 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2174 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2175 					break;
2176 				case 13:
2177 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2178 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2179 					break;
2180 				case 14:
2181 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2182 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2183 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2184 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2185 					break;
2186 				case 16:
2187 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2188 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2189 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2190 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2191 					break;
2192 				case 17:
2193 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2194 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2195 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2196 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2197 					break;
2198 				case 27:
2199 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2200 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2201 					break;
2202 				case 28:
2203 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2204 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2205 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2206 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2207 					break;
2208 				case 29:
2209 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2210 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2211 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2212 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2213 					break;
2214 				case 30:
2215 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2216 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2217 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2218 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2219 					break;
2220 				default:
2221 					gb_tile_moden = 0;
2222 					break;
2223 				}
2224 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2225 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2226 			}
2227 		}
2228 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2229 			switch (reg_offset) {
2230 			case 0:
2231 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2233 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2234 						 NUM_BANKS(ADDR_SURF_16_BANK));
2235 				break;
2236 			case 1:
2237 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2239 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2240 						 NUM_BANKS(ADDR_SURF_16_BANK));
2241 				break;
2242 			case 2:
2243 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2246 						 NUM_BANKS(ADDR_SURF_16_BANK));
2247 				break;
2248 			case 3:
2249 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2251 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252 						 NUM_BANKS(ADDR_SURF_16_BANK));
2253 				break;
2254 			case 4:
2255 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2257 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 						 NUM_BANKS(ADDR_SURF_16_BANK));
2259 				break;
2260 			case 5:
2261 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2264 						 NUM_BANKS(ADDR_SURF_8_BANK));
2265 				break;
2266 			case 6:
2267 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270 						 NUM_BANKS(ADDR_SURF_4_BANK));
2271 				break;
2272 			case 8:
2273 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2274 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2275 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2276 						 NUM_BANKS(ADDR_SURF_16_BANK));
2277 				break;
2278 			case 9:
2279 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2280 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2281 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2282 						 NUM_BANKS(ADDR_SURF_16_BANK));
2283 				break;
2284 			case 10:
2285 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2288 						 NUM_BANKS(ADDR_SURF_16_BANK));
2289 				break;
2290 			case 11:
2291 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2292 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2293 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2294 						 NUM_BANKS(ADDR_SURF_16_BANK));
2295 				break;
2296 			case 12:
2297 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2298 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2299 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2300 						 NUM_BANKS(ADDR_SURF_16_BANK));
2301 				break;
2302 			case 13:
2303 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2305 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2306 						 NUM_BANKS(ADDR_SURF_8_BANK));
2307 				break;
2308 			case 14:
2309 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2310 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2311 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2312 						 NUM_BANKS(ADDR_SURF_4_BANK));
2313 				break;
2314 			default:
2315 				gb_tile_moden = 0;
2316 				break;
2317 			}
2318 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2319 		}
2320 	} else if (num_pipe_configs == 2) {
2321 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2322 			switch (reg_offset) {
2323 			case 0:
2324 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326 						 PIPE_CONFIG(ADDR_SURF_P2) |
2327 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2328 				break;
2329 			case 1:
2330 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2331 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2332 						 PIPE_CONFIG(ADDR_SURF_P2) |
2333 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2334 				break;
2335 			case 2:
2336 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2337 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2338 						 PIPE_CONFIG(ADDR_SURF_P2) |
2339 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2340 				break;
2341 			case 3:
2342 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2343 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2344 						 PIPE_CONFIG(ADDR_SURF_P2) |
2345 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2346 				break;
2347 			case 4:
2348 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2349 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2350 						 PIPE_CONFIG(ADDR_SURF_P2) |
2351 						 TILE_SPLIT(split_equal_to_row_size));
2352 				break;
2353 			case 5:
2354 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2355 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2356 				break;
2357 			case 6:
2358 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2359 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2360 						 PIPE_CONFIG(ADDR_SURF_P2) |
2361 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2362 				break;
2363 			case 7:
2364 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2365 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2366 						 PIPE_CONFIG(ADDR_SURF_P2) |
2367 						 TILE_SPLIT(split_equal_to_row_size));
2368 				break;
2369 			case 8:
2370 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2371 				break;
2372 			case 9:
2373 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2374 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2375 				break;
2376 			case 10:
2377 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2378 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2379 						 PIPE_CONFIG(ADDR_SURF_P2) |
2380 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2381 				break;
2382 			case 11:
2383 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2384 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2385 						 PIPE_CONFIG(ADDR_SURF_P2) |
2386 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2387 				break;
2388 			case 12:
2389 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2390 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2391 						 PIPE_CONFIG(ADDR_SURF_P2) |
2392 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2393 				break;
2394 			case 13:
2395 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2396 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2397 				break;
2398 			case 14:
2399 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2400 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2401 						 PIPE_CONFIG(ADDR_SURF_P2) |
2402 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2403 				break;
2404 			case 16:
2405 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2407 						 PIPE_CONFIG(ADDR_SURF_P2) |
2408 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2409 				break;
2410 			case 17:
2411 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2412 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2413 						 PIPE_CONFIG(ADDR_SURF_P2) |
2414 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2415 				break;
2416 			case 27:
2417 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2418 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2419 				break;
2420 			case 28:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2422 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2423 						 PIPE_CONFIG(ADDR_SURF_P2) |
2424 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2425 				break;
2426 			case 29:
2427 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2428 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2429 						 PIPE_CONFIG(ADDR_SURF_P2) |
2430 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2431 				break;
2432 			case 30:
2433 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2434 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2435 						 PIPE_CONFIG(ADDR_SURF_P2) |
2436 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2437 				break;
2438 			default:
2439 				gb_tile_moden = 0;
2440 				break;
2441 			}
2442 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2443 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2444 		}
2445 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2446 			switch (reg_offset) {
2447 			case 0:
2448 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2449 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2450 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2451 						 NUM_BANKS(ADDR_SURF_16_BANK));
2452 				break;
2453 			case 1:
2454 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2455 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2456 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2457 						 NUM_BANKS(ADDR_SURF_16_BANK));
2458 				break;
2459 			case 2:
2460 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2461 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2462 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2463 						 NUM_BANKS(ADDR_SURF_16_BANK));
2464 				break;
2465 			case 3:
2466 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2467 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2468 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2469 						 NUM_BANKS(ADDR_SURF_16_BANK));
2470 				break;
2471 			case 4:
2472 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2473 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2474 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2475 						 NUM_BANKS(ADDR_SURF_16_BANK));
2476 				break;
2477 			case 5:
2478 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2479 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2480 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2481 						 NUM_BANKS(ADDR_SURF_16_BANK));
2482 				break;
2483 			case 6:
2484 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2485 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2486 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2487 						 NUM_BANKS(ADDR_SURF_8_BANK));
2488 				break;
2489 			case 8:
2490 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2491 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2492 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2493 						 NUM_BANKS(ADDR_SURF_16_BANK));
2494 				break;
2495 			case 9:
2496 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2497 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2498 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2499 						 NUM_BANKS(ADDR_SURF_16_BANK));
2500 				break;
2501 			case 10:
2502 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2503 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2504 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2505 						 NUM_BANKS(ADDR_SURF_16_BANK));
2506 				break;
2507 			case 11:
2508 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2509 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2510 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2511 						 NUM_BANKS(ADDR_SURF_16_BANK));
2512 				break;
2513 			case 12:
2514 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2515 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2516 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2517 						 NUM_BANKS(ADDR_SURF_16_BANK));
2518 				break;
2519 			case 13:
2520 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2521 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2522 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2523 						 NUM_BANKS(ADDR_SURF_16_BANK));
2524 				break;
2525 			case 14:
2526 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2527 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2528 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2529 						 NUM_BANKS(ADDR_SURF_8_BANK));
2530 				break;
2531 			default:
2532 				gb_tile_moden = 0;
2533 				break;
2534 			}
2535 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2536 		}
2537 	} else
2538 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
2539 }
2540 
2541 /**
2542  * cik_select_se_sh - select which SE, SH to address
2543  *
2544  * @rdev: radeon_device pointer
2545  * @se_num: shader engine to address
2546  * @sh_num: sh block to address
2547  *
2548  * Select which SE, SH combinations to address. Certain
2549  * registers are instanced per SE or SH.  0xffffffff means
2550  * broadcast to all SEs or SHs (CIK).
2551  */
2552 static void cik_select_se_sh(struct radeon_device *rdev,
2553 			     u32 se_num, u32 sh_num)
2554 {
2555 	u32 data = INSTANCE_BROADCAST_WRITES;
2556 
2557 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2558 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2559 	else if (se_num == 0xffffffff)
2560 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2561 	else if (sh_num == 0xffffffff)
2562 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2563 	else
2564 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2565 	WREG32(GRBM_GFX_INDEX, data);
2566 }
2567 
2568 /**
2569  * cik_create_bitmask - create a bitmask
2570  *
2571  * @bit_width: length of the mask
2572  *
2573  * create a variable length bit mask (CIK).
2574  * Returns the bitmask.
2575  */
2576 static u32 cik_create_bitmask(u32 bit_width)
2577 {
2578 	u32 i, mask = 0;
2579 
2580 	for (i = 0; i < bit_width; i++) {
2581 		mask <<= 1;
2582 		mask |= 1;
2583 	}
2584 	return mask;
2585 }
2586 
2587 /**
2588  * cik_select_se_sh - select which SE, SH to address
2589  *
2590  * @rdev: radeon_device pointer
2591  * @max_rb_num: max RBs (render backends) for the asic
2592  * @se_num: number of SEs (shader engines) for the asic
2593  * @sh_per_se: number of SH blocks per SE for the asic
2594  *
2595  * Calculates the bitmask of disabled RBs (CIK).
2596  * Returns the disabled RB bitmask.
2597  */
2598 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
2599 			      u32 max_rb_num, u32 se_num,
2600 			      u32 sh_per_se)
2601 {
2602 	u32 data, mask;
2603 
2604 	data = RREG32(CC_RB_BACKEND_DISABLE);
2605 	if (data & 1)
2606 		data &= BACKEND_DISABLE_MASK;
2607 	else
2608 		data = 0;
2609 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2610 
2611 	data >>= BACKEND_DISABLE_SHIFT;
2612 
2613 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
2614 
2615 	return data & mask;
2616 }
2617 
2618 /**
2619  * cik_setup_rb - setup the RBs on the asic
2620  *
2621  * @rdev: radeon_device pointer
2622  * @se_num: number of SEs (shader engines) for the asic
2623  * @sh_per_se: number of SH blocks per SE for the asic
2624  * @max_rb_num: max RBs (render backends) for the asic
2625  *
2626  * Configures per-SE/SH RB registers (CIK).
2627  */
2628 static void cik_setup_rb(struct radeon_device *rdev,
2629 			 u32 se_num, u32 sh_per_se,
2630 			 u32 max_rb_num)
2631 {
2632 	int i, j;
2633 	u32 data, mask;
2634 	u32 disabled_rbs = 0;
2635 	u32 enabled_rbs = 0;
2636 
2637 	for (i = 0; i < se_num; i++) {
2638 		for (j = 0; j < sh_per_se; j++) {
2639 			cik_select_se_sh(rdev, i, j);
2640 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2641 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
2642 		}
2643 	}
2644 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2645 
2646 	mask = 1;
2647 	for (i = 0; i < max_rb_num; i++) {
2648 		if (!(disabled_rbs & mask))
2649 			enabled_rbs |= mask;
2650 		mask <<= 1;
2651 	}
2652 
2653 	for (i = 0; i < se_num; i++) {
2654 		cik_select_se_sh(rdev, i, 0xffffffff);
2655 		data = 0;
2656 		for (j = 0; j < sh_per_se; j++) {
2657 			switch (enabled_rbs & 3) {
2658 			case 1:
2659 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2660 				break;
2661 			case 2:
2662 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2663 				break;
2664 			case 3:
2665 			default:
2666 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2667 				break;
2668 			}
2669 			enabled_rbs >>= 2;
2670 		}
2671 		WREG32(PA_SC_RASTER_CONFIG, data);
2672 	}
2673 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2674 }
2675 
2676 /**
2677  * cik_gpu_init - setup the 3D engine
2678  *
2679  * @rdev: radeon_device pointer
2680  *
2681  * Configures the 3D engine and tiling configuration
2682  * registers so that the 3D engine is usable.
2683  */
2684 static void cik_gpu_init(struct radeon_device *rdev)
2685 {
2686 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
2687 	u32 mc_shared_chmap, mc_arb_ramcfg;
2688 	u32 hdp_host_path_cntl;
2689 	u32 tmp;
2690 	int i, j;
2691 
2692 	switch (rdev->family) {
2693 	case CHIP_BONAIRE:
2694 		rdev->config.cik.max_shader_engines = 2;
2695 		rdev->config.cik.max_tile_pipes = 4;
2696 		rdev->config.cik.max_cu_per_sh = 7;
2697 		rdev->config.cik.max_sh_per_se = 1;
2698 		rdev->config.cik.max_backends_per_se = 2;
2699 		rdev->config.cik.max_texture_channel_caches = 4;
2700 		rdev->config.cik.max_gprs = 256;
2701 		rdev->config.cik.max_gs_threads = 32;
2702 		rdev->config.cik.max_hw_contexts = 8;
2703 
2704 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2705 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2706 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2707 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2708 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2709 		break;
2710 	case CHIP_KAVERI:
2711 		rdev->config.cik.max_shader_engines = 1;
2712 		rdev->config.cik.max_tile_pipes = 4;
2713 		if ((rdev->pdev->device == 0x1304) ||
2714 		    (rdev->pdev->device == 0x1305) ||
2715 		    (rdev->pdev->device == 0x130C) ||
2716 		    (rdev->pdev->device == 0x130F) ||
2717 		    (rdev->pdev->device == 0x1310) ||
2718 		    (rdev->pdev->device == 0x1311) ||
2719 		    (rdev->pdev->device == 0x131C)) {
2720 			rdev->config.cik.max_cu_per_sh = 8;
2721 			rdev->config.cik.max_backends_per_se = 2;
2722 		} else if ((rdev->pdev->device == 0x1309) ||
2723 			   (rdev->pdev->device == 0x130A) ||
2724 			   (rdev->pdev->device == 0x130D) ||
2725 			   (rdev->pdev->device == 0x1313)) {
2726 			rdev->config.cik.max_cu_per_sh = 6;
2727 			rdev->config.cik.max_backends_per_se = 2;
2728 		} else if ((rdev->pdev->device == 0x1306) ||
2729 			   (rdev->pdev->device == 0x1307) ||
2730 			   (rdev->pdev->device == 0x130B) ||
2731 			   (rdev->pdev->device == 0x130E) ||
2732 			   (rdev->pdev->device == 0x1315) ||
2733 			   (rdev->pdev->device == 0x131B)) {
2734 			rdev->config.cik.max_cu_per_sh = 4;
2735 			rdev->config.cik.max_backends_per_se = 1;
2736 		} else {
2737 			rdev->config.cik.max_cu_per_sh = 3;
2738 			rdev->config.cik.max_backends_per_se = 1;
2739 		}
2740 		rdev->config.cik.max_sh_per_se = 1;
2741 		rdev->config.cik.max_texture_channel_caches = 4;
2742 		rdev->config.cik.max_gprs = 256;
2743 		rdev->config.cik.max_gs_threads = 16;
2744 		rdev->config.cik.max_hw_contexts = 8;
2745 
2746 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2747 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2748 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2749 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2750 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2751 		break;
2752 	case CHIP_KABINI:
2753 	default:
2754 		rdev->config.cik.max_shader_engines = 1;
2755 		rdev->config.cik.max_tile_pipes = 2;
2756 		rdev->config.cik.max_cu_per_sh = 2;
2757 		rdev->config.cik.max_sh_per_se = 1;
2758 		rdev->config.cik.max_backends_per_se = 1;
2759 		rdev->config.cik.max_texture_channel_caches = 2;
2760 		rdev->config.cik.max_gprs = 256;
2761 		rdev->config.cik.max_gs_threads = 16;
2762 		rdev->config.cik.max_hw_contexts = 8;
2763 
2764 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
2765 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
2766 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
2767 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
2768 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
2769 		break;
2770 	}
2771 
2772 	/* Initialize HDP */
2773 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2774 		WREG32((0x2c14 + j), 0x00000000);
2775 		WREG32((0x2c18 + j), 0x00000000);
2776 		WREG32((0x2c1c + j), 0x00000000);
2777 		WREG32((0x2c20 + j), 0x00000000);
2778 		WREG32((0x2c24 + j), 0x00000000);
2779 	}
2780 
2781 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2782 
2783 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2784 
2785 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2786 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2787 
2788 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
2789 	rdev->config.cik.mem_max_burst_length_bytes = 256;
2790 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2791 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
2792 	if (rdev->config.cik.mem_row_size_in_kb > 4)
2793 		rdev->config.cik.mem_row_size_in_kb = 4;
2794 	/* XXX use MC settings? */
2795 	rdev->config.cik.shader_engine_tile_size = 32;
2796 	rdev->config.cik.num_gpus = 1;
2797 	rdev->config.cik.multi_gpu_tile_size = 64;
2798 
2799 	/* fix up row size */
2800 	gb_addr_config &= ~ROW_SIZE_MASK;
2801 	switch (rdev->config.cik.mem_row_size_in_kb) {
2802 	case 1:
2803 	default:
2804 		gb_addr_config |= ROW_SIZE(0);
2805 		break;
2806 	case 2:
2807 		gb_addr_config |= ROW_SIZE(1);
2808 		break;
2809 	case 4:
2810 		gb_addr_config |= ROW_SIZE(2);
2811 		break;
2812 	}
2813 
2814 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
2815 	 * not have bank info, so create a custom tiling dword.
2816 	 * bits 3:0   num_pipes
2817 	 * bits 7:4   num_banks
2818 	 * bits 11:8  group_size
2819 	 * bits 15:12 row_size
2820 	 */
2821 	rdev->config.cik.tile_config = 0;
2822 	switch (rdev->config.cik.num_tile_pipes) {
2823 	case 1:
2824 		rdev->config.cik.tile_config |= (0 << 0);
2825 		break;
2826 	case 2:
2827 		rdev->config.cik.tile_config |= (1 << 0);
2828 		break;
2829 	case 4:
2830 		rdev->config.cik.tile_config |= (2 << 0);
2831 		break;
2832 	case 8:
2833 	default:
2834 		/* XXX what about 12? */
2835 		rdev->config.cik.tile_config |= (3 << 0);
2836 		break;
2837 	}
2838 	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
2839 		rdev->config.cik.tile_config |= 1 << 4;
2840 	else
2841 		rdev->config.cik.tile_config |= 0 << 4;
2842 	rdev->config.cik.tile_config |=
2843 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
2844 	rdev->config.cik.tile_config |=
2845 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
2846 
2847 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
2848 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
2849 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
2850 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
2851 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
2852 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
2853 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
2854 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
2855 
2856 	cik_tiling_mode_table_init(rdev);
2857 
2858 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2859 		     rdev->config.cik.max_sh_per_se,
2860 		     rdev->config.cik.max_backends_per_se);
2861 
2862 	/* set HW defaults for 3D engine */
2863 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2864 
2865 	WREG32(SX_DEBUG_1, 0x20);
2866 
2867 	WREG32(TA_CNTL_AUX, 0x00010000);
2868 
2869 	tmp = RREG32(SPI_CONFIG_CNTL);
2870 	tmp |= 0x03000000;
2871 	WREG32(SPI_CONFIG_CNTL, tmp);
2872 
2873 	WREG32(SQ_CONFIG, 1);
2874 
2875 	WREG32(DB_DEBUG, 0);
2876 
2877 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2878 	tmp |= 0x00000400;
2879 	WREG32(DB_DEBUG2, tmp);
2880 
2881 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2882 	tmp |= 0x00020200;
2883 	WREG32(DB_DEBUG3, tmp);
2884 
2885 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2886 	tmp |= 0x00018208;
2887 	WREG32(CB_HW_CONTROL, tmp);
2888 
2889 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2890 
2891 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2892 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2893 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2894 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2895 
2896 	WREG32(VGT_NUM_INSTANCES, 1);
2897 
2898 	WREG32(CP_PERFMON_CNTL, 0);
2899 
2900 	WREG32(SQ_CONFIG, 0);
2901 
2902 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2903 					  FORCE_EOV_MAX_REZ_CNT(255)));
2904 
2905 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2906 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2907 
2908 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2909 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2910 
2911 	tmp = RREG32(HDP_MISC_CNTL);
2912 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2913 	WREG32(HDP_MISC_CNTL, tmp);
2914 
2915 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2916 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2917 
2918 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2919 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2920 
2921 	udelay(50);
2922 }
2923 
2924 /*
2925  * GPU scratch registers helpers function.
2926  */
2927 /**
2928  * cik_scratch_init - setup driver info for CP scratch regs
2929  *
2930  * @rdev: radeon_device pointer
2931  *
2932  * Set up the number and offset of the CP scratch registers.
2933  * NOTE: use of CP scratch registers is a legacy inferface and
2934  * is not used by default on newer asics (r6xx+).  On newer asics,
2935  * memory buffers are used for fences rather than scratch regs.
2936  */
2937 static void cik_scratch_init(struct radeon_device *rdev)
2938 {
2939 	int i;
2940 
2941 	rdev->scratch.num_reg = 7;
2942 	rdev->scratch.reg_base = SCRATCH_REG0;
2943 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2944 		rdev->scratch.free[i] = true;
2945 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2946 	}
2947 }
2948 
2949 /**
2950  * cik_ring_test - basic gfx ring test
2951  *
2952  * @rdev: radeon_device pointer
2953  * @ring: radeon_ring structure holding ring information
2954  *
2955  * Allocate a scratch register and write to it using the gfx ring (CIK).
2956  * Provides a basic gfx ring test to verify that the ring is working.
2957  * Used by cik_cp_gfx_resume();
2958  * Returns 0 on success, error on failure.
2959  */
2960 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2961 {
2962 	uint32_t scratch;
2963 	uint32_t tmp = 0;
2964 	unsigned i;
2965 	int r;
2966 
2967 	r = radeon_scratch_get(rdev, &scratch);
2968 	if (r) {
2969 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2970 		return r;
2971 	}
2972 	WREG32(scratch, 0xCAFEDEAD);
2973 	r = radeon_ring_lock(rdev, ring, 3);
2974 	if (r) {
2975 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2976 		radeon_scratch_free(rdev, scratch);
2977 		return r;
2978 	}
2979 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2980 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2981 	radeon_ring_write(ring, 0xDEADBEEF);
2982 	radeon_ring_unlock_commit(rdev, ring);
2983 
2984 	for (i = 0; i < rdev->usec_timeout; i++) {
2985 		tmp = RREG32(scratch);
2986 		if (tmp == 0xDEADBEEF)
2987 			break;
2988 		DRM_UDELAY(1);
2989 	}
2990 	if (i < rdev->usec_timeout) {
2991 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2992 	} else {
2993 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2994 			  ring->idx, scratch, tmp);
2995 		r = -EINVAL;
2996 	}
2997 	radeon_scratch_free(rdev, scratch);
2998 	return r;
2999 }
3000 
3001 /**
3002  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3003  *
3004  * @rdev: radeon_device pointer
3005  * @fence: radeon fence object
3006  *
3007  * Emits a fence sequnce number on the gfx ring and flushes
3008  * GPU caches.
3009  */
3010 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3011 			     struct radeon_fence *fence)
3012 {
3013 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3014 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3015 
3016 	/* EVENT_WRITE_EOP - flush caches, send int */
3017 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3018 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3019 				 EOP_TC_ACTION_EN |
3020 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3021 				 EVENT_INDEX(5)));
3022 	radeon_ring_write(ring, addr & 0xfffffffc);
3023 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3024 	radeon_ring_write(ring, fence->seq);
3025 	radeon_ring_write(ring, 0);
3026 	/* HDP flush */
3027 	/* We should be using the new WAIT_REG_MEM special op packet here
3028 	 * but it causes the CP to hang
3029 	 */
3030 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3031 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3032 				 WRITE_DATA_DST_SEL(0)));
3033 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3034 	radeon_ring_write(ring, 0);
3035 	radeon_ring_write(ring, 0);
3036 }
3037 
3038 /**
3039  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3040  *
3041  * @rdev: radeon_device pointer
3042  * @fence: radeon fence object
3043  *
3044  * Emits a fence sequnce number on the compute ring and flushes
3045  * GPU caches.
3046  */
3047 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3048 				 struct radeon_fence *fence)
3049 {
3050 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3051 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3052 
3053 	/* RELEASE_MEM - flush caches, send int */
3054 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3055 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3056 				 EOP_TC_ACTION_EN |
3057 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3058 				 EVENT_INDEX(5)));
3059 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3060 	radeon_ring_write(ring, addr & 0xfffffffc);
3061 	radeon_ring_write(ring, upper_32_bits(addr));
3062 	radeon_ring_write(ring, fence->seq);
3063 	radeon_ring_write(ring, 0);
3064 	/* HDP flush */
3065 	/* We should be using the new WAIT_REG_MEM special op packet here
3066 	 * but it causes the CP to hang
3067 	 */
3068 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3069 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
3070 				 WRITE_DATA_DST_SEL(0)));
3071 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
3072 	radeon_ring_write(ring, 0);
3073 	radeon_ring_write(ring, 0);
3074 }
3075 
3076 void cik_semaphore_ring_emit(struct radeon_device *rdev,
3077 			     struct radeon_ring *ring,
3078 			     struct radeon_semaphore *semaphore,
3079 			     bool emit_wait)
3080 {
3081 	uint64_t addr = semaphore->gpu_addr;
3082 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3083 
3084 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3085 	radeon_ring_write(ring, addr & 0xffffffff);
3086 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3087 }
3088 
3089 /*
3090  * IB stuff
3091  */
3092 /**
3093  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3094  *
3095  * @rdev: radeon_device pointer
3096  * @ib: radeon indirect buffer object
3097  *
3098  * Emits an DE (drawing engine) or CE (constant engine) IB
3099  * on the gfx ring.  IBs are usually generated by userspace
3100  * acceleration drivers and submitted to the kernel for
3101  * sheduling on the ring.  This function schedules the IB
3102  * on the gfx ring for execution by the GPU.
3103  */
3104 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3105 {
3106 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3107 	u32 header, control = INDIRECT_BUFFER_VALID;
3108 
3109 	if (ib->is_const_ib) {
3110 		/* set switch buffer packet before const IB */
3111 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3112 		radeon_ring_write(ring, 0);
3113 
3114 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3115 	} else {
3116 		u32 next_rptr;
3117 		if (ring->rptr_save_reg) {
3118 			next_rptr = ring->wptr + 3 + 4;
3119 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3120 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3121 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3122 			radeon_ring_write(ring, next_rptr);
3123 		} else if (rdev->wb.enabled) {
3124 			next_rptr = ring->wptr + 5 + 4;
3125 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3126 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3127 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3128 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3129 			radeon_ring_write(ring, next_rptr);
3130 		}
3131 
3132 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3133 	}
3134 
3135 	control |= ib->length_dw |
3136 		(ib->vm ? (ib->vm->id << 24) : 0);
3137 
3138 	radeon_ring_write(ring, header);
3139 	radeon_ring_write(ring,
3140 #ifdef __BIG_ENDIAN
3141 			  (2 << 0) |
3142 #endif
3143 			  (ib->gpu_addr & 0xFFFFFFFC));
3144 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3145 	radeon_ring_write(ring, control);
3146 }
3147 
3148 /**
3149  * cik_ib_test - basic gfx ring IB test
3150  *
3151  * @rdev: radeon_device pointer
3152  * @ring: radeon_ring structure holding ring information
3153  *
3154  * Allocate an IB and execute it on the gfx ring (CIK).
3155  * Provides a basic gfx ring test to verify that IBs are working.
3156  * Returns 0 on success, error on failure.
3157  */
3158 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3159 {
3160 	struct radeon_ib ib;
3161 	uint32_t scratch;
3162 	uint32_t tmp = 0;
3163 	unsigned i;
3164 	int r;
3165 
3166 	r = radeon_scratch_get(rdev, &scratch);
3167 	if (r) {
3168 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3169 		return r;
3170 	}
3171 	WREG32(scratch, 0xCAFEDEAD);
3172 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3173 	if (r) {
3174 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3175 		return r;
3176 	}
3177 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3178 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3179 	ib.ptr[2] = 0xDEADBEEF;
3180 	ib.length_dw = 3;
3181 	r = radeon_ib_schedule(rdev, &ib, NULL);
3182 	if (r) {
3183 		radeon_scratch_free(rdev, scratch);
3184 		radeon_ib_free(rdev, &ib);
3185 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3186 		return r;
3187 	}
3188 	r = radeon_fence_wait(ib.fence, false);
3189 	if (r) {
3190 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3191 		return r;
3192 	}
3193 	for (i = 0; i < rdev->usec_timeout; i++) {
3194 		tmp = RREG32(scratch);
3195 		if (tmp == 0xDEADBEEF)
3196 			break;
3197 		DRM_UDELAY(1);
3198 	}
3199 	if (i < rdev->usec_timeout) {
3200 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3201 	} else {
3202 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3203 			  scratch, tmp);
3204 		r = -EINVAL;
3205 	}
3206 	radeon_scratch_free(rdev, scratch);
3207 	radeon_ib_free(rdev, &ib);
3208 	return r;
3209 }
3210 
3211 /*
3212  * CP.
3213  * On CIK, gfx and compute now have independant command processors.
3214  *
3215  * GFX
3216  * Gfx consists of a single ring and can process both gfx jobs and
3217  * compute jobs.  The gfx CP consists of three microengines (ME):
3218  * PFP - Pre-Fetch Parser
3219  * ME - Micro Engine
3220  * CE - Constant Engine
3221  * The PFP and ME make up what is considered the Drawing Engine (DE).
3222  * The CE is an asynchronous engine used for updating buffer desciptors
3223  * used by the DE so that they can be loaded into cache in parallel
3224  * while the DE is processing state update packets.
3225  *
3226  * Compute
3227  * The compute CP consists of two microengines (ME):
3228  * MEC1 - Compute MicroEngine 1
3229  * MEC2 - Compute MicroEngine 2
3230  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3231  * The queues are exposed to userspace and are programmed directly
3232  * by the compute runtime.
3233  */
3234 /**
3235  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3236  *
3237  * @rdev: radeon_device pointer
3238  * @enable: enable or disable the MEs
3239  *
3240  * Halts or unhalts the gfx MEs.
3241  */
3242 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3243 {
3244 	if (enable)
3245 		WREG32(CP_ME_CNTL, 0);
3246 	else {
3247 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3248 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3249 	}
3250 	udelay(50);
3251 }
3252 
3253 /**
3254  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3255  *
3256  * @rdev: radeon_device pointer
3257  *
3258  * Loads the gfx PFP, ME, and CE ucode.
3259  * Returns 0 for success, -EINVAL if the ucode is not available.
3260  */
3261 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3262 {
3263 	const __be32 *fw_data;
3264 	int i;
3265 
3266 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3267 		return -EINVAL;
3268 
3269 	cik_cp_gfx_enable(rdev, false);
3270 
3271 	/* PFP */
3272 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3273 	WREG32(CP_PFP_UCODE_ADDR, 0);
3274 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3275 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3276 	WREG32(CP_PFP_UCODE_ADDR, 0);
3277 
3278 	/* CE */
3279 	fw_data = (const __be32 *)rdev->ce_fw->data;
3280 	WREG32(CP_CE_UCODE_ADDR, 0);
3281 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3282 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3283 	WREG32(CP_CE_UCODE_ADDR, 0);
3284 
3285 	/* ME */
3286 	fw_data = (const __be32 *)rdev->me_fw->data;
3287 	WREG32(CP_ME_RAM_WADDR, 0);
3288 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3289 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3290 	WREG32(CP_ME_RAM_WADDR, 0);
3291 
3292 	WREG32(CP_PFP_UCODE_ADDR, 0);
3293 	WREG32(CP_CE_UCODE_ADDR, 0);
3294 	WREG32(CP_ME_RAM_WADDR, 0);
3295 	WREG32(CP_ME_RAM_RADDR, 0);
3296 	return 0;
3297 }
3298 
3299 /**
3300  * cik_cp_gfx_start - start the gfx ring
3301  *
3302  * @rdev: radeon_device pointer
3303  *
3304  * Enables the ring and loads the clear state context and other
3305  * packets required to init the ring.
3306  * Returns 0 for success, error for failure.
3307  */
3308 static int cik_cp_gfx_start(struct radeon_device *rdev)
3309 {
3310 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3311 	int r, i;
3312 
3313 	/* init the CP */
3314 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3315 	WREG32(CP_ENDIAN_SWAP, 0);
3316 	WREG32(CP_DEVICE_ID, 1);
3317 
3318 	cik_cp_gfx_enable(rdev, true);
3319 
3320 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3321 	if (r) {
3322 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3323 		return r;
3324 	}
3325 
3326 	/* init the CE partitions.  CE only used for gfx on CIK */
3327 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3328 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3329 	radeon_ring_write(ring, 0xc000);
3330 	radeon_ring_write(ring, 0xc000);
3331 
3332 	/* setup clear context state */
3333 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3334 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3335 
3336 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3337 	radeon_ring_write(ring, 0x80000000);
3338 	radeon_ring_write(ring, 0x80000000);
3339 
3340 	for (i = 0; i < cik_default_size; i++)
3341 		radeon_ring_write(ring, cik_default_state[i]);
3342 
3343 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3344 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3345 
3346 	/* set clear context state */
3347 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3348 	radeon_ring_write(ring, 0);
3349 
3350 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3351 	radeon_ring_write(ring, 0x00000316);
3352 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3353 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3354 
3355 	radeon_ring_unlock_commit(rdev, ring);
3356 
3357 	return 0;
3358 }
3359 
3360 /**
3361  * cik_cp_gfx_fini - stop the gfx ring
3362  *
3363  * @rdev: radeon_device pointer
3364  *
3365  * Stop the gfx ring and tear down the driver ring
3366  * info.
3367  */
3368 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3369 {
3370 	cik_cp_gfx_enable(rdev, false);
3371 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3372 }
3373 
3374 /**
3375  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3376  *
3377  * @rdev: radeon_device pointer
3378  *
3379  * Program the location and size of the gfx ring buffer
3380  * and test it to make sure it's working.
3381  * Returns 0 for success, error for failure.
3382  */
3383 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3384 {
3385 	struct radeon_ring *ring;
3386 	u32 tmp;
3387 	u32 rb_bufsz;
3388 	u64 rb_addr;
3389 	int r;
3390 
3391 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3392 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3393 
3394 	/* Set the write pointer delay */
3395 	WREG32(CP_RB_WPTR_DELAY, 0);
3396 
3397 	/* set the RB to use vmid 0 */
3398 	WREG32(CP_RB_VMID, 0);
3399 
3400 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3401 
3402 	/* ring 0 - compute and gfx */
3403 	/* Set ring buffer size */
3404 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3405 	rb_bufsz = order_base_2(ring->ring_size / 8);
3406 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3407 #ifdef __BIG_ENDIAN
3408 	tmp |= BUF_SWAP_32BIT;
3409 #endif
3410 	WREG32(CP_RB0_CNTL, tmp);
3411 
3412 	/* Initialize the ring buffer's read and write pointers */
3413 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3414 	ring->wptr = 0;
3415 	WREG32(CP_RB0_WPTR, ring->wptr);
3416 
3417 	/* set the wb address wether it's enabled or not */
3418 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3419 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3420 
3421 	/* scratch register shadowing is no longer supported */
3422 	WREG32(SCRATCH_UMSK, 0);
3423 
3424 	if (!rdev->wb.enabled)
3425 		tmp |= RB_NO_UPDATE;
3426 
3427 	mdelay(1);
3428 	WREG32(CP_RB0_CNTL, tmp);
3429 
3430 	rb_addr = ring->gpu_addr >> 8;
3431 	WREG32(CP_RB0_BASE, rb_addr);
3432 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
3433 
3434 	ring->rptr = RREG32(CP_RB0_RPTR);
3435 
3436 	/* start the ring */
3437 	cik_cp_gfx_start(rdev);
3438 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3439 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3440 	if (r) {
3441 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3442 		return r;
3443 	}
3444 	return 0;
3445 }
3446 
3447 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
3448 			      struct radeon_ring *ring)
3449 {
3450 	u32 rptr;
3451 
3452 
3453 
3454 	if (rdev->wb.enabled) {
3455 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
3456 	} else {
3457 		mutex_lock(&rdev->srbm_mutex);
3458 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3459 		rptr = RREG32(CP_HQD_PQ_RPTR);
3460 		cik_srbm_select(rdev, 0, 0, 0, 0);
3461 		mutex_unlock(&rdev->srbm_mutex);
3462 	}
3463 
3464 	return rptr;
3465 }
3466 
3467 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
3468 			      struct radeon_ring *ring)
3469 {
3470 	u32 wptr;
3471 
3472 	if (rdev->wb.enabled) {
3473 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
3474 	} else {
3475 		mutex_lock(&rdev->srbm_mutex);
3476 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
3477 		wptr = RREG32(CP_HQD_PQ_WPTR);
3478 		cik_srbm_select(rdev, 0, 0, 0, 0);
3479 		mutex_unlock(&rdev->srbm_mutex);
3480 	}
3481 
3482 	return wptr;
3483 }
3484 
3485 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
3486 			       struct radeon_ring *ring)
3487 {
3488 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(ring->wptr);
3489 	WDOORBELL32(ring->doorbell_offset, ring->wptr);
3490 }
3491 
3492 /**
3493  * cik_cp_compute_enable - enable/disable the compute CP MEs
3494  *
3495  * @rdev: radeon_device pointer
3496  * @enable: enable or disable the MEs
3497  *
3498  * Halts or unhalts the compute MEs.
3499  */
3500 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
3501 {
3502 	if (enable)
3503 		WREG32(CP_MEC_CNTL, 0);
3504 	else
3505 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
3506 	udelay(50);
3507 }
3508 
3509 /**
3510  * cik_cp_compute_load_microcode - load the compute CP ME ucode
3511  *
3512  * @rdev: radeon_device pointer
3513  *
3514  * Loads the compute MEC1&2 ucode.
3515  * Returns 0 for success, -EINVAL if the ucode is not available.
3516  */
3517 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
3518 {
3519 	const __be32 *fw_data;
3520 	int i;
3521 
3522 	if (!rdev->mec_fw)
3523 		return -EINVAL;
3524 
3525 	cik_cp_compute_enable(rdev, false);
3526 
3527 	/* MEC1 */
3528 	fw_data = (const __be32 *)rdev->mec_fw->data;
3529 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3530 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3531 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
3532 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
3533 
3534 	if (rdev->family == CHIP_KAVERI) {
3535 		/* MEC2 */
3536 		fw_data = (const __be32 *)rdev->mec_fw->data;
3537 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3538 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
3539 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
3540 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
3541 	}
3542 
3543 	return 0;
3544 }
3545 
3546 /**
3547  * cik_cp_compute_start - start the compute queues
3548  *
3549  * @rdev: radeon_device pointer
3550  *
3551  * Enable the compute queues.
3552  * Returns 0 for success, error for failure.
3553  */
3554 static int cik_cp_compute_start(struct radeon_device *rdev)
3555 {
3556 	cik_cp_compute_enable(rdev, true);
3557 
3558 	return 0;
3559 }
3560 
3561 /**
3562  * cik_cp_compute_fini - stop the compute queues
3563  *
3564  * @rdev: radeon_device pointer
3565  *
3566  * Stop the compute queues and tear down the driver queue
3567  * info.
3568  */
3569 static void cik_cp_compute_fini(struct radeon_device *rdev)
3570 {
3571 	int i, idx, r;
3572 
3573 	cik_cp_compute_enable(rdev, false);
3574 
3575 	for (i = 0; i < 2; i++) {
3576 		if (i == 0)
3577 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3578 		else
3579 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3580 
3581 		if (rdev->ring[idx].mqd_obj) {
3582 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3583 			if (unlikely(r != 0))
3584 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
3585 
3586 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
3587 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3588 
3589 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
3590 			rdev->ring[idx].mqd_obj = NULL;
3591 		}
3592 	}
3593 }
3594 
3595 static void cik_mec_fini(struct radeon_device *rdev)
3596 {
3597 	int r;
3598 
3599 	if (rdev->mec.hpd_eop_obj) {
3600 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3601 		if (unlikely(r != 0))
3602 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
3603 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
3604 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3605 
3606 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
3607 		rdev->mec.hpd_eop_obj = NULL;
3608 	}
3609 }
3610 
3611 #define MEC_HPD_SIZE 2048
3612 
3613 static int cik_mec_init(struct radeon_device *rdev)
3614 {
3615 	int r;
3616 	u32 *hpd;
3617 
3618 	/*
3619 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
3620 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
3621 	 */
3622 	if (rdev->family == CHIP_KAVERI)
3623 		rdev->mec.num_mec = 2;
3624 	else
3625 		rdev->mec.num_mec = 1;
3626 	rdev->mec.num_pipe = 4;
3627 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
3628 
3629 	if (rdev->mec.hpd_eop_obj == NULL) {
3630 		r = radeon_bo_create(rdev,
3631 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
3632 				     PAGE_SIZE, true,
3633 				     RADEON_GEM_DOMAIN_GTT, NULL,
3634 				     &rdev->mec.hpd_eop_obj);
3635 		if (r) {
3636 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
3637 			return r;
3638 		}
3639 	}
3640 
3641 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
3642 	if (unlikely(r != 0)) {
3643 		cik_mec_fini(rdev);
3644 		return r;
3645 	}
3646 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
3647 			  &rdev->mec.hpd_eop_gpu_addr);
3648 	if (r) {
3649 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
3650 		cik_mec_fini(rdev);
3651 		return r;
3652 	}
3653 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
3654 	if (r) {
3655 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
3656 		cik_mec_fini(rdev);
3657 		return r;
3658 	}
3659 
3660 	/* clear memory.  Not sure if this is required or not */
3661 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
3662 
3663 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
3664 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
3665 
3666 	return 0;
3667 }
3668 
3669 struct hqd_registers
3670 {
3671 	u32 cp_mqd_base_addr;
3672 	u32 cp_mqd_base_addr_hi;
3673 	u32 cp_hqd_active;
3674 	u32 cp_hqd_vmid;
3675 	u32 cp_hqd_persistent_state;
3676 	u32 cp_hqd_pipe_priority;
3677 	u32 cp_hqd_queue_priority;
3678 	u32 cp_hqd_quantum;
3679 	u32 cp_hqd_pq_base;
3680 	u32 cp_hqd_pq_base_hi;
3681 	u32 cp_hqd_pq_rptr;
3682 	u32 cp_hqd_pq_rptr_report_addr;
3683 	u32 cp_hqd_pq_rptr_report_addr_hi;
3684 	u32 cp_hqd_pq_wptr_poll_addr;
3685 	u32 cp_hqd_pq_wptr_poll_addr_hi;
3686 	u32 cp_hqd_pq_doorbell_control;
3687 	u32 cp_hqd_pq_wptr;
3688 	u32 cp_hqd_pq_control;
3689 	u32 cp_hqd_ib_base_addr;
3690 	u32 cp_hqd_ib_base_addr_hi;
3691 	u32 cp_hqd_ib_rptr;
3692 	u32 cp_hqd_ib_control;
3693 	u32 cp_hqd_iq_timer;
3694 	u32 cp_hqd_iq_rptr;
3695 	u32 cp_hqd_dequeue_request;
3696 	u32 cp_hqd_dma_offload;
3697 	u32 cp_hqd_sema_cmd;
3698 	u32 cp_hqd_msg_type;
3699 	u32 cp_hqd_atomic0_preop_lo;
3700 	u32 cp_hqd_atomic0_preop_hi;
3701 	u32 cp_hqd_atomic1_preop_lo;
3702 	u32 cp_hqd_atomic1_preop_hi;
3703 	u32 cp_hqd_hq_scheduler0;
3704 	u32 cp_hqd_hq_scheduler1;
3705 	u32 cp_mqd_control;
3706 };
3707 
3708 struct bonaire_mqd
3709 {
3710 	u32 header;
3711 	u32 dispatch_initiator;
3712 	u32 dimensions[3];
3713 	u32 start_idx[3];
3714 	u32 num_threads[3];
3715 	u32 pipeline_stat_enable;
3716 	u32 perf_counter_enable;
3717 	u32 pgm[2];
3718 	u32 tba[2];
3719 	u32 tma[2];
3720 	u32 pgm_rsrc[2];
3721 	u32 vmid;
3722 	u32 resource_limits;
3723 	u32 static_thread_mgmt01[2];
3724 	u32 tmp_ring_size;
3725 	u32 static_thread_mgmt23[2];
3726 	u32 restart[3];
3727 	u32 thread_trace_enable;
3728 	u32 reserved1;
3729 	u32 user_data[16];
3730 	u32 vgtcs_invoke_count[2];
3731 	struct hqd_registers queue_state;
3732 	u32 dequeue_cntr;
3733 	u32 interrupt_queue[64];
3734 };
3735 
3736 /**
3737  * cik_cp_compute_resume - setup the compute queue registers
3738  *
3739  * @rdev: radeon_device pointer
3740  *
3741  * Program the compute queues and test them to make sure they
3742  * are working.
3743  * Returns 0 for success, error for failure.
3744  */
3745 static int cik_cp_compute_resume(struct radeon_device *rdev)
3746 {
3747 	int r, i, idx;
3748 	u32 tmp;
3749 	bool use_doorbell = true;
3750 	u64 hqd_gpu_addr;
3751 	u64 mqd_gpu_addr;
3752 	u64 eop_gpu_addr;
3753 	u64 wb_gpu_addr;
3754 	u32 *buf;
3755 	struct bonaire_mqd *mqd;
3756 
3757 	r = cik_cp_compute_start(rdev);
3758 	if (r)
3759 		return r;
3760 
3761 	/* fix up chicken bits */
3762 	tmp = RREG32(CP_CPF_DEBUG);
3763 	tmp |= (1 << 23);
3764 	WREG32(CP_CPF_DEBUG, tmp);
3765 
3766 	/* init the pipes */
3767 	mutex_lock(&rdev->srbm_mutex);
3768 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
3769 		int me = (i < 4) ? 1 : 2;
3770 		int pipe = (i < 4) ? i : (i - 4);
3771 
3772 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
3773 
3774 		cik_srbm_select(rdev, me, pipe, 0, 0);
3775 
3776 		/* write the EOP addr */
3777 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
3778 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
3779 
3780 		/* set the VMID assigned */
3781 		WREG32(CP_HPD_EOP_VMID, 0);
3782 
3783 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
3784 		tmp = RREG32(CP_HPD_EOP_CONTROL);
3785 		tmp &= ~EOP_SIZE_MASK;
3786 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
3787 		WREG32(CP_HPD_EOP_CONTROL, tmp);
3788 	}
3789 	cik_srbm_select(rdev, 0, 0, 0, 0);
3790 	mutex_unlock(&rdev->srbm_mutex);
3791 
3792 	/* init the queues.  Just two for now. */
3793 	for (i = 0; i < 2; i++) {
3794 		if (i == 0)
3795 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
3796 		else
3797 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
3798 
3799 		if (rdev->ring[idx].mqd_obj == NULL) {
3800 			r = radeon_bo_create(rdev,
3801 					     sizeof(struct bonaire_mqd),
3802 					     PAGE_SIZE, true,
3803 					     RADEON_GEM_DOMAIN_GTT, NULL,
3804 					     &rdev->ring[idx].mqd_obj);
3805 			if (r) {
3806 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
3807 				return r;
3808 			}
3809 		}
3810 
3811 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
3812 		if (unlikely(r != 0)) {
3813 			cik_cp_compute_fini(rdev);
3814 			return r;
3815 		}
3816 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
3817 				  &mqd_gpu_addr);
3818 		if (r) {
3819 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
3820 			cik_cp_compute_fini(rdev);
3821 			return r;
3822 		}
3823 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
3824 		if (r) {
3825 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
3826 			cik_cp_compute_fini(rdev);
3827 			return r;
3828 		}
3829 
3830 		/* doorbell offset */
3831 		rdev->ring[idx].doorbell_offset =
3832 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
3833 
3834 		/* init the mqd struct */
3835 		memset(buf, 0, sizeof(struct bonaire_mqd));
3836 
3837 		mqd = (struct bonaire_mqd *)buf;
3838 		mqd->header = 0xC0310800;
3839 		mqd->static_thread_mgmt01[0] = 0xffffffff;
3840 		mqd->static_thread_mgmt01[1] = 0xffffffff;
3841 		mqd->static_thread_mgmt23[0] = 0xffffffff;
3842 		mqd->static_thread_mgmt23[1] = 0xffffffff;
3843 
3844 		mutex_lock(&rdev->srbm_mutex);
3845 		cik_srbm_select(rdev, rdev->ring[idx].me,
3846 				rdev->ring[idx].pipe,
3847 				rdev->ring[idx].queue, 0);
3848 
3849 		/* disable wptr polling */
3850 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
3851 		tmp &= ~WPTR_POLL_EN;
3852 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
3853 
3854 		/* enable doorbell? */
3855 		mqd->queue_state.cp_hqd_pq_doorbell_control =
3856 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3857 		if (use_doorbell)
3858 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3859 		else
3860 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3861 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3862 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3863 
3864 		/* disable the queue if it's active */
3865 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3866 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3867 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3868 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3869 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3870 			for (i = 0; i < rdev->usec_timeout; i++) {
3871 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3872 					break;
3873 				udelay(1);
3874 			}
3875 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3876 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3877 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3878 		}
3879 
3880 		/* set the pointer to the MQD */
3881 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3882 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3883 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3884 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3885 		/* set MQD vmid to 0 */
3886 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3887 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3888 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3889 
3890 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3891 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3892 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3893 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3894 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3895 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3896 
3897 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3898 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3899 		mqd->queue_state.cp_hqd_pq_control &=
3900 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3901 
3902 		mqd->queue_state.cp_hqd_pq_control |=
3903 			order_base_2(rdev->ring[idx].ring_size / 8);
3904 		mqd->queue_state.cp_hqd_pq_control |=
3905 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
3906 #ifdef __BIG_ENDIAN
3907 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3908 #endif
3909 		mqd->queue_state.cp_hqd_pq_control &=
3910 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3911 		mqd->queue_state.cp_hqd_pq_control |=
3912 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3913 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3914 
3915 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3916 		if (i == 0)
3917 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3918 		else
3919 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3920 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3921 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3922 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3923 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3924 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3925 
3926 		/* set the wb address wether it's enabled or not */
3927 		if (i == 0)
3928 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3929 		else
3930 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3931 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3932 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3933 			upper_32_bits(wb_gpu_addr) & 0xffff;
3934 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3935 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3936 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3937 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3938 
3939 		/* enable the doorbell if requested */
3940 		if (use_doorbell) {
3941 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3942 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3943 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3944 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3945 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3946 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3947 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3948 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3949 
3950 		} else {
3951 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3952 		}
3953 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3954 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3955 
3956 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3957 		rdev->ring[idx].wptr = 0;
3958 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3959 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3960 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3961 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3962 
3963 		/* set the vmid for the queue */
3964 		mqd->queue_state.cp_hqd_vmid = 0;
3965 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3966 
3967 		/* activate the queue */
3968 		mqd->queue_state.cp_hqd_active = 1;
3969 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3970 
3971 		cik_srbm_select(rdev, 0, 0, 0, 0);
3972 		mutex_unlock(&rdev->srbm_mutex);
3973 
3974 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3975 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3976 
3977 		rdev->ring[idx].ready = true;
3978 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3979 		if (r)
3980 			rdev->ring[idx].ready = false;
3981 	}
3982 
3983 	return 0;
3984 }
3985 
3986 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3987 {
3988 	cik_cp_gfx_enable(rdev, enable);
3989 	cik_cp_compute_enable(rdev, enable);
3990 }
3991 
3992 static int cik_cp_load_microcode(struct radeon_device *rdev)
3993 {
3994 	int r;
3995 
3996 	r = cik_cp_gfx_load_microcode(rdev);
3997 	if (r)
3998 		return r;
3999 	r = cik_cp_compute_load_microcode(rdev);
4000 	if (r)
4001 		return r;
4002 
4003 	return 0;
4004 }
4005 
4006 static void cik_cp_fini(struct radeon_device *rdev)
4007 {
4008 	cik_cp_gfx_fini(rdev);
4009 	cik_cp_compute_fini(rdev);
4010 }
4011 
4012 static int cik_cp_resume(struct radeon_device *rdev)
4013 {
4014 	int r;
4015 
4016 	r = cik_cp_load_microcode(rdev);
4017 	if (r)
4018 		return r;
4019 
4020 	r = cik_cp_gfx_resume(rdev);
4021 	if (r)
4022 		return r;
4023 	r = cik_cp_compute_resume(rdev);
4024 	if (r)
4025 		return r;
4026 
4027 	return 0;
4028 }
4029 
4030 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4031 {
4032 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4033 		RREG32(GRBM_STATUS));
4034 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4035 		RREG32(GRBM_STATUS2));
4036 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4037 		RREG32(GRBM_STATUS_SE0));
4038 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4039 		RREG32(GRBM_STATUS_SE1));
4040 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4041 		RREG32(GRBM_STATUS_SE2));
4042 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4043 		RREG32(GRBM_STATUS_SE3));
4044 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4045 		RREG32(SRBM_STATUS));
4046 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4047 		RREG32(SRBM_STATUS2));
4048 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4049 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4050 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4051 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4052 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4053 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4054 		 RREG32(CP_STALLED_STAT1));
4055 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4056 		 RREG32(CP_STALLED_STAT2));
4057 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4058 		 RREG32(CP_STALLED_STAT3));
4059 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4060 		 RREG32(CP_CPF_BUSY_STAT));
4061 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4062 		 RREG32(CP_CPF_STALLED_STAT1));
4063 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4064 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4065 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4066 		 RREG32(CP_CPC_STALLED_STAT1));
4067 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4068 }
4069 
4070 /**
4071  * cik_gpu_check_soft_reset - check which blocks are busy
4072  *
4073  * @rdev: radeon_device pointer
4074  *
4075  * Check which blocks are busy and return the relevant reset
4076  * mask to be used by cik_gpu_soft_reset().
4077  * Returns a mask of the blocks to be reset.
4078  */
4079 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4080 {
4081 	u32 reset_mask = 0;
4082 	u32 tmp;
4083 
4084 	/* GRBM_STATUS */
4085 	tmp = RREG32(GRBM_STATUS);
4086 	if (tmp & (PA_BUSY | SC_BUSY |
4087 		   BCI_BUSY | SX_BUSY |
4088 		   TA_BUSY | VGT_BUSY |
4089 		   DB_BUSY | CB_BUSY |
4090 		   GDS_BUSY | SPI_BUSY |
4091 		   IA_BUSY | IA_BUSY_NO_DMA))
4092 		reset_mask |= RADEON_RESET_GFX;
4093 
4094 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4095 		reset_mask |= RADEON_RESET_CP;
4096 
4097 	/* GRBM_STATUS2 */
4098 	tmp = RREG32(GRBM_STATUS2);
4099 	if (tmp & RLC_BUSY)
4100 		reset_mask |= RADEON_RESET_RLC;
4101 
4102 	/* SDMA0_STATUS_REG */
4103 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4104 	if (!(tmp & SDMA_IDLE))
4105 		reset_mask |= RADEON_RESET_DMA;
4106 
4107 	/* SDMA1_STATUS_REG */
4108 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4109 	if (!(tmp & SDMA_IDLE))
4110 		reset_mask |= RADEON_RESET_DMA1;
4111 
4112 	/* SRBM_STATUS2 */
4113 	tmp = RREG32(SRBM_STATUS2);
4114 	if (tmp & SDMA_BUSY)
4115 		reset_mask |= RADEON_RESET_DMA;
4116 
4117 	if (tmp & SDMA1_BUSY)
4118 		reset_mask |= RADEON_RESET_DMA1;
4119 
4120 	/* SRBM_STATUS */
4121 	tmp = RREG32(SRBM_STATUS);
4122 
4123 	if (tmp & IH_BUSY)
4124 		reset_mask |= RADEON_RESET_IH;
4125 
4126 	if (tmp & SEM_BUSY)
4127 		reset_mask |= RADEON_RESET_SEM;
4128 
4129 	if (tmp & GRBM_RQ_PENDING)
4130 		reset_mask |= RADEON_RESET_GRBM;
4131 
4132 	if (tmp & VMC_BUSY)
4133 		reset_mask |= RADEON_RESET_VMC;
4134 
4135 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4136 		   MCC_BUSY | MCD_BUSY))
4137 		reset_mask |= RADEON_RESET_MC;
4138 
4139 	if (evergreen_is_display_hung(rdev))
4140 		reset_mask |= RADEON_RESET_DISPLAY;
4141 
4142 	/* Skip MC reset as it's mostly likely not hung, just busy */
4143 	if (reset_mask & RADEON_RESET_MC) {
4144 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4145 		reset_mask &= ~RADEON_RESET_MC;
4146 	}
4147 
4148 	return reset_mask;
4149 }
4150 
4151 /**
4152  * cik_gpu_soft_reset - soft reset GPU
4153  *
4154  * @rdev: radeon_device pointer
4155  * @reset_mask: mask of which blocks to reset
4156  *
4157  * Soft reset the blocks specified in @reset_mask.
4158  */
4159 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4160 {
4161 	struct evergreen_mc_save save;
4162 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4163 	u32 tmp;
4164 
4165 	if (reset_mask == 0)
4166 		return;
4167 
4168 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4169 
4170 	cik_print_gpu_status_regs(rdev);
4171 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4172 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4173 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4174 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4175 
4176 	/* stop the rlc */
4177 	cik_rlc_stop(rdev);
4178 
4179 	/* Disable GFX parsing/prefetching */
4180 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4181 
4182 	/* Disable MEC parsing/prefetching */
4183 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4184 
4185 	if (reset_mask & RADEON_RESET_DMA) {
4186 		/* sdma0 */
4187 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4188 		tmp |= SDMA_HALT;
4189 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4190 	}
4191 	if (reset_mask & RADEON_RESET_DMA1) {
4192 		/* sdma1 */
4193 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4194 		tmp |= SDMA_HALT;
4195 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4196 	}
4197 
4198 	evergreen_mc_stop(rdev, &save);
4199 	if (evergreen_mc_wait_for_idle(rdev)) {
4200 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4201 	}
4202 
4203 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4204 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4205 
4206 	if (reset_mask & RADEON_RESET_CP) {
4207 		grbm_soft_reset |= SOFT_RESET_CP;
4208 
4209 		srbm_soft_reset |= SOFT_RESET_GRBM;
4210 	}
4211 
4212 	if (reset_mask & RADEON_RESET_DMA)
4213 		srbm_soft_reset |= SOFT_RESET_SDMA;
4214 
4215 	if (reset_mask & RADEON_RESET_DMA1)
4216 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4217 
4218 	if (reset_mask & RADEON_RESET_DISPLAY)
4219 		srbm_soft_reset |= SOFT_RESET_DC;
4220 
4221 	if (reset_mask & RADEON_RESET_RLC)
4222 		grbm_soft_reset |= SOFT_RESET_RLC;
4223 
4224 	if (reset_mask & RADEON_RESET_SEM)
4225 		srbm_soft_reset |= SOFT_RESET_SEM;
4226 
4227 	if (reset_mask & RADEON_RESET_IH)
4228 		srbm_soft_reset |= SOFT_RESET_IH;
4229 
4230 	if (reset_mask & RADEON_RESET_GRBM)
4231 		srbm_soft_reset |= SOFT_RESET_GRBM;
4232 
4233 	if (reset_mask & RADEON_RESET_VMC)
4234 		srbm_soft_reset |= SOFT_RESET_VMC;
4235 
4236 	if (!(rdev->flags & RADEON_IS_IGP)) {
4237 		if (reset_mask & RADEON_RESET_MC)
4238 			srbm_soft_reset |= SOFT_RESET_MC;
4239 	}
4240 
4241 	if (grbm_soft_reset) {
4242 		tmp = RREG32(GRBM_SOFT_RESET);
4243 		tmp |= grbm_soft_reset;
4244 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4245 		WREG32(GRBM_SOFT_RESET, tmp);
4246 		tmp = RREG32(GRBM_SOFT_RESET);
4247 
4248 		udelay(50);
4249 
4250 		tmp &= ~grbm_soft_reset;
4251 		WREG32(GRBM_SOFT_RESET, tmp);
4252 		tmp = RREG32(GRBM_SOFT_RESET);
4253 	}
4254 
4255 	if (srbm_soft_reset) {
4256 		tmp = RREG32(SRBM_SOFT_RESET);
4257 		tmp |= srbm_soft_reset;
4258 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4259 		WREG32(SRBM_SOFT_RESET, tmp);
4260 		tmp = RREG32(SRBM_SOFT_RESET);
4261 
4262 		udelay(50);
4263 
4264 		tmp &= ~srbm_soft_reset;
4265 		WREG32(SRBM_SOFT_RESET, tmp);
4266 		tmp = RREG32(SRBM_SOFT_RESET);
4267 	}
4268 
4269 	/* Wait a little for things to settle down */
4270 	udelay(50);
4271 
4272 	evergreen_mc_resume(rdev, &save);
4273 	udelay(50);
4274 
4275 	cik_print_gpu_status_regs(rdev);
4276 }
4277 
4278 /**
4279  * cik_asic_reset - soft reset GPU
4280  *
4281  * @rdev: radeon_device pointer
4282  *
4283  * Look up which blocks are hung and attempt
4284  * to reset them.
4285  * Returns 0 for success.
4286  */
4287 int cik_asic_reset(struct radeon_device *rdev)
4288 {
4289 	u32 reset_mask;
4290 
4291 	reset_mask = cik_gpu_check_soft_reset(rdev);
4292 
4293 	if (reset_mask)
4294 		r600_set_bios_scratch_engine_hung(rdev, true);
4295 
4296 	cik_gpu_soft_reset(rdev, reset_mask);
4297 
4298 	reset_mask = cik_gpu_check_soft_reset(rdev);
4299 
4300 	if (!reset_mask)
4301 		r600_set_bios_scratch_engine_hung(rdev, false);
4302 
4303 	return 0;
4304 }
4305 
4306 /**
4307  * cik_gfx_is_lockup - check if the 3D engine is locked up
4308  *
4309  * @rdev: radeon_device pointer
4310  * @ring: radeon_ring structure holding ring information
4311  *
4312  * Check if the 3D engine is locked up (CIK).
4313  * Returns true if the engine is locked, false if not.
4314  */
4315 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4316 {
4317 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4318 
4319 	if (!(reset_mask & (RADEON_RESET_GFX |
4320 			    RADEON_RESET_COMPUTE |
4321 			    RADEON_RESET_CP))) {
4322 		radeon_ring_lockup_update(ring);
4323 		return false;
4324 	}
4325 	/* force CP activities */
4326 	radeon_ring_force_activity(rdev, ring);
4327 	return radeon_ring_test_lockup(rdev, ring);
4328 }
4329 
4330 /* MC */
4331 /**
4332  * cik_mc_program - program the GPU memory controller
4333  *
4334  * @rdev: radeon_device pointer
4335  *
4336  * Set the location of vram, gart, and AGP in the GPU's
4337  * physical address space (CIK).
4338  */
4339 static void cik_mc_program(struct radeon_device *rdev)
4340 {
4341 	struct evergreen_mc_save save;
4342 	u32 tmp;
4343 	int i, j;
4344 
4345 	/* Initialize HDP */
4346 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4347 		WREG32((0x2c14 + j), 0x00000000);
4348 		WREG32((0x2c18 + j), 0x00000000);
4349 		WREG32((0x2c1c + j), 0x00000000);
4350 		WREG32((0x2c20 + j), 0x00000000);
4351 		WREG32((0x2c24 + j), 0x00000000);
4352 	}
4353 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4354 
4355 	evergreen_mc_stop(rdev, &save);
4356 	if (radeon_mc_wait_for_idle(rdev)) {
4357 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4358 	}
4359 	/* Lockout access through VGA aperture*/
4360 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4361 	/* Update configuration */
4362 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4363 	       rdev->mc.vram_start >> 12);
4364 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4365 	       rdev->mc.vram_end >> 12);
4366 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4367 	       rdev->vram_scratch.gpu_addr >> 12);
4368 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4369 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4370 	WREG32(MC_VM_FB_LOCATION, tmp);
4371 	/* XXX double check these! */
4372 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4373 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4374 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4375 	WREG32(MC_VM_AGP_BASE, 0);
4376 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4377 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4378 	if (radeon_mc_wait_for_idle(rdev)) {
4379 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4380 	}
4381 	evergreen_mc_resume(rdev, &save);
4382 	/* we need to own VRAM, so turn off the VGA renderer here
4383 	 * to stop it overwriting our objects */
4384 	rv515_vga_render_disable(rdev);
4385 }
4386 
4387 /**
4388  * cik_mc_init - initialize the memory controller driver params
4389  *
4390  * @rdev: radeon_device pointer
4391  *
4392  * Look up the amount of vram, vram width, and decide how to place
4393  * vram and gart within the GPU's physical address space (CIK).
4394  * Returns 0 for success.
4395  */
4396 static int cik_mc_init(struct radeon_device *rdev)
4397 {
4398 	u32 tmp;
4399 	int chansize, numchan;
4400 
4401 	/* Get VRAM informations */
4402 	rdev->mc.vram_is_ddr = true;
4403 	tmp = RREG32(MC_ARB_RAMCFG);
4404 	if (tmp & CHANSIZE_MASK) {
4405 		chansize = 64;
4406 	} else {
4407 		chansize = 32;
4408 	}
4409 	tmp = RREG32(MC_SHARED_CHMAP);
4410 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4411 	case 0:
4412 	default:
4413 		numchan = 1;
4414 		break;
4415 	case 1:
4416 		numchan = 2;
4417 		break;
4418 	case 2:
4419 		numchan = 4;
4420 		break;
4421 	case 3:
4422 		numchan = 8;
4423 		break;
4424 	case 4:
4425 		numchan = 3;
4426 		break;
4427 	case 5:
4428 		numchan = 6;
4429 		break;
4430 	case 6:
4431 		numchan = 10;
4432 		break;
4433 	case 7:
4434 		numchan = 12;
4435 		break;
4436 	case 8:
4437 		numchan = 16;
4438 		break;
4439 	}
4440 	rdev->mc.vram_width = numchan * chansize;
4441 	/* Could aper size report 0 ? */
4442 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4443 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4444 	/* size in MB on si */
4445 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4446 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4447 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4448 	si_vram_gtt_location(rdev, &rdev->mc);
4449 	radeon_update_bandwidth_info(rdev);
4450 
4451 	return 0;
4452 }
4453 
4454 /*
4455  * GART
4456  * VMID 0 is the physical GPU addresses as used by the kernel.
4457  * VMIDs 1-15 are used for userspace clients and are handled
4458  * by the radeon vm/hsa code.
4459  */
4460 /**
4461  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4462  *
4463  * @rdev: radeon_device pointer
4464  *
4465  * Flush the TLB for the VMID 0 page table (CIK).
4466  */
4467 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4468 {
4469 	/* flush hdp cache */
4470 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4471 
4472 	/* bits 0-15 are the VM contexts0-15 */
4473 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4474 }
4475 
4476 /**
4477  * cik_pcie_gart_enable - gart enable
4478  *
4479  * @rdev: radeon_device pointer
4480  *
4481  * This sets up the TLBs, programs the page tables for VMID0,
4482  * sets up the hw for VMIDs 1-15 which are allocated on
4483  * demand, and sets up the global locations for the LDS, GDS,
4484  * and GPUVM for FSA64 clients (CIK).
4485  * Returns 0 for success, errors for failure.
4486  */
4487 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4488 {
4489 	int r, i;
4490 
4491 	if (rdev->gart.robj == NULL) {
4492 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4493 		return -EINVAL;
4494 	}
4495 	r = radeon_gart_table_vram_pin(rdev);
4496 	if (r)
4497 		return r;
4498 	radeon_gart_restore(rdev);
4499 	/* Setup TLB control */
4500 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4501 	       (0xA << 7) |
4502 	       ENABLE_L1_TLB |
4503 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4504 	       ENABLE_ADVANCED_DRIVER_MODEL |
4505 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4506 	/* Setup L2 cache */
4507 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4508 	       ENABLE_L2_FRAGMENT_PROCESSING |
4509 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4510 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4511 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4512 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4513 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4514 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4515 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4516 	/* setup context0 */
4517 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4518 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4519 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4520 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4521 			(u32)(rdev->dummy_page.addr >> 12));
4522 	WREG32(VM_CONTEXT0_CNTL2, 0);
4523 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4524 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4525 
4526 	WREG32(0x15D4, 0);
4527 	WREG32(0x15D8, 0);
4528 	WREG32(0x15DC, 0);
4529 
4530 	/* empty context1-15 */
4531 	/* FIXME start with 4G, once using 2 level pt switch to full
4532 	 * vm size space
4533 	 */
4534 	/* set vm size, must be a multiple of 4 */
4535 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4536 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4537 	for (i = 1; i < 16; i++) {
4538 		if (i < 8)
4539 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4540 			       rdev->gart.table_addr >> 12);
4541 		else
4542 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4543 			       rdev->gart.table_addr >> 12);
4544 	}
4545 
4546 	/* enable context1-15 */
4547 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4548 	       (u32)(rdev->dummy_page.addr >> 12));
4549 	WREG32(VM_CONTEXT1_CNTL2, 4);
4550 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4551 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4552 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4553 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4554 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4555 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4556 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4557 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4558 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4559 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4560 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4561 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4562 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4563 
4564 	/* TC cache setup ??? */
4565 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4566 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4567 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4568 
4569 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4570 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4571 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4572 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4573 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4574 
4575 	WREG32(TC_CFG_L1_VOLATILE, 0);
4576 	WREG32(TC_CFG_L2_VOLATILE, 0);
4577 
4578 	if (rdev->family == CHIP_KAVERI) {
4579 		u32 tmp = RREG32(CHUB_CONTROL);
4580 		tmp &= ~BYPASS_VM;
4581 		WREG32(CHUB_CONTROL, tmp);
4582 	}
4583 
4584 	/* XXX SH_MEM regs */
4585 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4586 	mutex_lock(&rdev->srbm_mutex);
4587 	for (i = 0; i < 16; i++) {
4588 		cik_srbm_select(rdev, 0, 0, 0, i);
4589 		/* CP and shaders */
4590 		WREG32(SH_MEM_CONFIG, 0);
4591 		WREG32(SH_MEM_APE1_BASE, 1);
4592 		WREG32(SH_MEM_APE1_LIMIT, 0);
4593 		WREG32(SH_MEM_BASES, 0);
4594 		/* SDMA GFX */
4595 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4596 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4597 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4598 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4599 		/* XXX SDMA RLC - todo */
4600 	}
4601 	cik_srbm_select(rdev, 0, 0, 0, 0);
4602 	mutex_unlock(&rdev->srbm_mutex);
4603 
4604 	cik_pcie_gart_tlb_flush(rdev);
4605 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4606 		 (unsigned)(rdev->mc.gtt_size >> 20),
4607 		 (unsigned long long)rdev->gart.table_addr);
4608 	rdev->gart.ready = true;
4609 	return 0;
4610 }
4611 
4612 /**
4613  * cik_pcie_gart_disable - gart disable
4614  *
4615  * @rdev: radeon_device pointer
4616  *
4617  * This disables all VM page table (CIK).
4618  */
4619 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4620 {
4621 	/* Disable all tables */
4622 	WREG32(VM_CONTEXT0_CNTL, 0);
4623 	WREG32(VM_CONTEXT1_CNTL, 0);
4624 	/* Setup TLB control */
4625 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4626 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4627 	/* Setup L2 cache */
4628 	WREG32(VM_L2_CNTL,
4629 	       ENABLE_L2_FRAGMENT_PROCESSING |
4630 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4631 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4632 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4633 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4634 	WREG32(VM_L2_CNTL2, 0);
4635 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4636 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4637 	radeon_gart_table_vram_unpin(rdev);
4638 }
4639 
4640 /**
4641  * cik_pcie_gart_fini - vm fini callback
4642  *
4643  * @rdev: radeon_device pointer
4644  *
4645  * Tears down the driver GART/VM setup (CIK).
4646  */
4647 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4648 {
4649 	cik_pcie_gart_disable(rdev);
4650 	radeon_gart_table_vram_free(rdev);
4651 	radeon_gart_fini(rdev);
4652 }
4653 
4654 /* vm parser */
4655 /**
4656  * cik_ib_parse - vm ib_parse callback
4657  *
4658  * @rdev: radeon_device pointer
4659  * @ib: indirect buffer pointer
4660  *
4661  * CIK uses hw IB checking so this is a nop (CIK).
4662  */
4663 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4664 {
4665 	return 0;
4666 }
4667 
4668 /*
4669  * vm
4670  * VMID 0 is the physical GPU addresses as used by the kernel.
4671  * VMIDs 1-15 are used for userspace clients and are handled
4672  * by the radeon vm/hsa code.
4673  */
4674 /**
4675  * cik_vm_init - cik vm init callback
4676  *
4677  * @rdev: radeon_device pointer
4678  *
4679  * Inits cik specific vm parameters (number of VMs, base of vram for
4680  * VMIDs 1-15) (CIK).
4681  * Returns 0 for success.
4682  */
4683 int cik_vm_init(struct radeon_device *rdev)
4684 {
4685 	/* number of VMs */
4686 	rdev->vm_manager.nvm = 16;
4687 	/* base offset of vram pages */
4688 	if (rdev->flags & RADEON_IS_IGP) {
4689 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4690 		tmp <<= 22;
4691 		rdev->vm_manager.vram_base_offset = tmp;
4692 	} else
4693 		rdev->vm_manager.vram_base_offset = 0;
4694 
4695 	return 0;
4696 }
4697 
4698 /**
4699  * cik_vm_fini - cik vm fini callback
4700  *
4701  * @rdev: radeon_device pointer
4702  *
4703  * Tear down any asic specific VM setup (CIK).
4704  */
4705 void cik_vm_fini(struct radeon_device *rdev)
4706 {
4707 }
4708 
4709 /**
4710  * cik_vm_decode_fault - print human readable fault info
4711  *
4712  * @rdev: radeon_device pointer
4713  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4714  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4715  *
4716  * Print human readable fault information (CIK).
4717  */
4718 static void cik_vm_decode_fault(struct radeon_device *rdev,
4719 				u32 status, u32 addr, u32 mc_client)
4720 {
4721 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4722 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4723 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4724 	char *block = (char *)&mc_client;
4725 
4726 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4727 	       protections, vmid, addr,
4728 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4729 	       block, mc_id);
4730 }
4731 
4732 /**
4733  * cik_vm_flush - cik vm flush using the CP
4734  *
4735  * @rdev: radeon_device pointer
4736  *
4737  * Update the page table base and flush the VM TLB
4738  * using the CP (CIK).
4739  */
4740 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4741 {
4742 	struct radeon_ring *ring = &rdev->ring[ridx];
4743 
4744 	if (vm == NULL)
4745 		return;
4746 
4747 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4748 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4749 				 WRITE_DATA_DST_SEL(0)));
4750 	if (vm->id < 8) {
4751 		radeon_ring_write(ring,
4752 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4753 	} else {
4754 		radeon_ring_write(ring,
4755 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4756 	}
4757 	radeon_ring_write(ring, 0);
4758 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4759 
4760 	/* update SH_MEM_* regs */
4761 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4762 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4763 				 WRITE_DATA_DST_SEL(0)));
4764 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4765 	radeon_ring_write(ring, 0);
4766 	radeon_ring_write(ring, VMID(vm->id));
4767 
4768 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4769 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4770 				 WRITE_DATA_DST_SEL(0)));
4771 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4772 	radeon_ring_write(ring, 0);
4773 
4774 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4775 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4776 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4777 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4778 
4779 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4780 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4781 				 WRITE_DATA_DST_SEL(0)));
4782 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4783 	radeon_ring_write(ring, 0);
4784 	radeon_ring_write(ring, VMID(0));
4785 
4786 	/* HDP flush */
4787 	/* We should be using the WAIT_REG_MEM packet here like in
4788 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4789 	 * context...
4790 	 */
4791 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4792 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4793 				 WRITE_DATA_DST_SEL(0)));
4794 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4795 	radeon_ring_write(ring, 0);
4796 	radeon_ring_write(ring, 0);
4797 
4798 	/* bits 0-15 are the VM contexts0-15 */
4799 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4800 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4801 				 WRITE_DATA_DST_SEL(0)));
4802 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4803 	radeon_ring_write(ring, 0);
4804 	radeon_ring_write(ring, 1 << vm->id);
4805 
4806 	/* compute doesn't have PFP */
4807 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4808 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4809 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4810 		radeon_ring_write(ring, 0x0);
4811 	}
4812 }
4813 
4814 /**
4815  * cik_vm_set_page - update the page tables using sDMA
4816  *
4817  * @rdev: radeon_device pointer
4818  * @ib: indirect buffer to fill with commands
4819  * @pe: addr of the page entry
4820  * @addr: dst addr to write into pe
4821  * @count: number of page entries to update
4822  * @incr: increase next addr by incr bytes
4823  * @flags: access flags
4824  *
4825  * Update the page tables using CP or sDMA (CIK).
4826  */
4827 void cik_vm_set_page(struct radeon_device *rdev,
4828 		     struct radeon_ib *ib,
4829 		     uint64_t pe,
4830 		     uint64_t addr, unsigned count,
4831 		     uint32_t incr, uint32_t flags)
4832 {
4833 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4834 	uint64_t value;
4835 	unsigned ndw;
4836 
4837 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4838 		/* CP */
4839 		while (count) {
4840 			ndw = 2 + count * 2;
4841 			if (ndw > 0x3FFE)
4842 				ndw = 0x3FFE;
4843 
4844 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4845 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4846 						    WRITE_DATA_DST_SEL(1));
4847 			ib->ptr[ib->length_dw++] = pe;
4848 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4849 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4850 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4851 					value = radeon_vm_map_gart(rdev, addr);
4852 					value &= 0xFFFFFFFFFFFFF000ULL;
4853 				} else if (flags & RADEON_VM_PAGE_VALID) {
4854 					value = addr;
4855 				} else {
4856 					value = 0;
4857 				}
4858 				addr += incr;
4859 				value |= r600_flags;
4860 				ib->ptr[ib->length_dw++] = value;
4861 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4862 			}
4863 		}
4864 	} else {
4865 		/* DMA */
4866 		cik_sdma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4867 	}
4868 }
4869 
4870 /*
4871  * RLC
4872  * The RLC is a multi-purpose microengine that handles a
4873  * variety of functions, the most important of which is
4874  * the interrupt controller.
4875  */
4876 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
4877 					  bool enable)
4878 {
4879 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4880 
4881 	if (enable)
4882 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4883 	else
4884 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4885 	WREG32(CP_INT_CNTL_RING0, tmp);
4886 }
4887 
4888 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
4889 {
4890 	u32 tmp;
4891 
4892 	tmp = RREG32(RLC_LB_CNTL);
4893 	if (enable)
4894 		tmp |= LOAD_BALANCE_ENABLE;
4895 	else
4896 		tmp &= ~LOAD_BALANCE_ENABLE;
4897 	WREG32(RLC_LB_CNTL, tmp);
4898 }
4899 
4900 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
4901 {
4902 	u32 i, j, k;
4903 	u32 mask;
4904 
4905 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4906 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4907 			cik_select_se_sh(rdev, i, j);
4908 			for (k = 0; k < rdev->usec_timeout; k++) {
4909 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4910 					break;
4911 				udelay(1);
4912 			}
4913 		}
4914 	}
4915 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4916 
4917 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4918 	for (k = 0; k < rdev->usec_timeout; k++) {
4919 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4920 			break;
4921 		udelay(1);
4922 	}
4923 }
4924 
4925 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
4926 {
4927 	u32 tmp;
4928 
4929 	tmp = RREG32(RLC_CNTL);
4930 	if (tmp != rlc)
4931 		WREG32(RLC_CNTL, rlc);
4932 }
4933 
4934 static u32 cik_halt_rlc(struct radeon_device *rdev)
4935 {
4936 	u32 data, orig;
4937 
4938 	orig = data = RREG32(RLC_CNTL);
4939 
4940 	if (data & RLC_ENABLE) {
4941 		u32 i;
4942 
4943 		data &= ~RLC_ENABLE;
4944 		WREG32(RLC_CNTL, data);
4945 
4946 		for (i = 0; i < rdev->usec_timeout; i++) {
4947 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
4948 				break;
4949 			udelay(1);
4950 		}
4951 
4952 		cik_wait_for_rlc_serdes(rdev);
4953 	}
4954 
4955 	return orig;
4956 }
4957 
4958 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
4959 {
4960 	u32 tmp, i, mask;
4961 
4962 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
4963 	WREG32(RLC_GPR_REG2, tmp);
4964 
4965 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
4966 	for (i = 0; i < rdev->usec_timeout; i++) {
4967 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
4968 			break;
4969 		udelay(1);
4970 	}
4971 
4972 	for (i = 0; i < rdev->usec_timeout; i++) {
4973 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
4974 			break;
4975 		udelay(1);
4976 	}
4977 }
4978 
4979 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
4980 {
4981 	u32 tmp;
4982 
4983 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
4984 	WREG32(RLC_GPR_REG2, tmp);
4985 }
4986 
4987 /**
4988  * cik_rlc_stop - stop the RLC ME
4989  *
4990  * @rdev: radeon_device pointer
4991  *
4992  * Halt the RLC ME (MicroEngine) (CIK).
4993  */
4994 static void cik_rlc_stop(struct radeon_device *rdev)
4995 {
4996 	WREG32(RLC_CNTL, 0);
4997 
4998 	cik_enable_gui_idle_interrupt(rdev, false);
4999 
5000 	cik_wait_for_rlc_serdes(rdev);
5001 }
5002 
5003 /**
5004  * cik_rlc_start - start the RLC ME
5005  *
5006  * @rdev: radeon_device pointer
5007  *
5008  * Unhalt the RLC ME (MicroEngine) (CIK).
5009  */
5010 static void cik_rlc_start(struct radeon_device *rdev)
5011 {
5012 	WREG32(RLC_CNTL, RLC_ENABLE);
5013 
5014 	cik_enable_gui_idle_interrupt(rdev, true);
5015 
5016 	udelay(50);
5017 }
5018 
5019 /**
5020  * cik_rlc_resume - setup the RLC hw
5021  *
5022  * @rdev: radeon_device pointer
5023  *
5024  * Initialize the RLC registers, load the ucode,
5025  * and start the RLC (CIK).
5026  * Returns 0 for success, -EINVAL if the ucode is not available.
5027  */
5028 static int cik_rlc_resume(struct radeon_device *rdev)
5029 {
5030 	u32 i, size, tmp;
5031 	const __be32 *fw_data;
5032 
5033 	if (!rdev->rlc_fw)
5034 		return -EINVAL;
5035 
5036 	switch (rdev->family) {
5037 	case CHIP_BONAIRE:
5038 	default:
5039 		size = BONAIRE_RLC_UCODE_SIZE;
5040 		break;
5041 	case CHIP_KAVERI:
5042 		size = KV_RLC_UCODE_SIZE;
5043 		break;
5044 	case CHIP_KABINI:
5045 		size = KB_RLC_UCODE_SIZE;
5046 		break;
5047 	}
5048 
5049 	cik_rlc_stop(rdev);
5050 
5051 	/* disable CG */
5052 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5053 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5054 
5055 	si_rlc_reset(rdev);
5056 
5057 	cik_init_pg(rdev);
5058 
5059 	cik_init_cg(rdev);
5060 
5061 	WREG32(RLC_LB_CNTR_INIT, 0);
5062 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5063 
5064 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5065 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5066 	WREG32(RLC_LB_PARAMS, 0x00600408);
5067 	WREG32(RLC_LB_CNTL, 0x80000004);
5068 
5069 	WREG32(RLC_MC_CNTL, 0);
5070 	WREG32(RLC_UCODE_CNTL, 0);
5071 
5072 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5073 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5074 	for (i = 0; i < size; i++)
5075 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5076 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5077 
5078 	/* XXX - find out what chips support lbpw */
5079 	cik_enable_lbpw(rdev, false);
5080 
5081 	if (rdev->family == CHIP_BONAIRE)
5082 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5083 
5084 	cik_rlc_start(rdev);
5085 
5086 	return 0;
5087 }
5088 
5089 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5090 {
5091 	u32 data, orig, tmp, tmp2;
5092 
5093 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5094 
5095 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5096 		cik_enable_gui_idle_interrupt(rdev, true);
5097 
5098 		tmp = cik_halt_rlc(rdev);
5099 
5100 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5101 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5102 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5103 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5104 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5105 
5106 		cik_update_rlc(rdev, tmp);
5107 
5108 		data |= CGCG_EN | CGLS_EN;
5109 	} else {
5110 		cik_enable_gui_idle_interrupt(rdev, false);
5111 
5112 		RREG32(CB_CGTT_SCLK_CTRL);
5113 		RREG32(CB_CGTT_SCLK_CTRL);
5114 		RREG32(CB_CGTT_SCLK_CTRL);
5115 		RREG32(CB_CGTT_SCLK_CTRL);
5116 
5117 		data &= ~(CGCG_EN | CGLS_EN);
5118 	}
5119 
5120 	if (orig != data)
5121 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5122 
5123 }
5124 
5125 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5126 {
5127 	u32 data, orig, tmp = 0;
5128 
5129 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5130 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5131 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5132 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5133 				data |= CP_MEM_LS_EN;
5134 				if (orig != data)
5135 					WREG32(CP_MEM_SLP_CNTL, data);
5136 			}
5137 		}
5138 
5139 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5140 		data &= 0xfffffffd;
5141 		if (orig != data)
5142 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5143 
5144 		tmp = cik_halt_rlc(rdev);
5145 
5146 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5147 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5148 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5149 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5150 		WREG32(RLC_SERDES_WR_CTRL, data);
5151 
5152 		cik_update_rlc(rdev, tmp);
5153 
5154 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5155 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5156 			data &= ~SM_MODE_MASK;
5157 			data |= SM_MODE(0x2);
5158 			data |= SM_MODE_ENABLE;
5159 			data &= ~CGTS_OVERRIDE;
5160 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5161 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5162 				data &= ~CGTS_LS_OVERRIDE;
5163 			data &= ~ON_MONITOR_ADD_MASK;
5164 			data |= ON_MONITOR_ADD_EN;
5165 			data |= ON_MONITOR_ADD(0x96);
5166 			if (orig != data)
5167 				WREG32(CGTS_SM_CTRL_REG, data);
5168 		}
5169 	} else {
5170 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5171 		data |= 0x00000002;
5172 		if (orig != data)
5173 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5174 
5175 		data = RREG32(RLC_MEM_SLP_CNTL);
5176 		if (data & RLC_MEM_LS_EN) {
5177 			data &= ~RLC_MEM_LS_EN;
5178 			WREG32(RLC_MEM_SLP_CNTL, data);
5179 		}
5180 
5181 		data = RREG32(CP_MEM_SLP_CNTL);
5182 		if (data & CP_MEM_LS_EN) {
5183 			data &= ~CP_MEM_LS_EN;
5184 			WREG32(CP_MEM_SLP_CNTL, data);
5185 		}
5186 
5187 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5188 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5189 		if (orig != data)
5190 			WREG32(CGTS_SM_CTRL_REG, data);
5191 
5192 		tmp = cik_halt_rlc(rdev);
5193 
5194 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5195 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5196 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5197 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5198 		WREG32(RLC_SERDES_WR_CTRL, data);
5199 
5200 		cik_update_rlc(rdev, tmp);
5201 	}
5202 }
5203 
5204 static const u32 mc_cg_registers[] =
5205 {
5206 	MC_HUB_MISC_HUB_CG,
5207 	MC_HUB_MISC_SIP_CG,
5208 	MC_HUB_MISC_VM_CG,
5209 	MC_XPB_CLK_GAT,
5210 	ATC_MISC_CG,
5211 	MC_CITF_MISC_WR_CG,
5212 	MC_CITF_MISC_RD_CG,
5213 	MC_CITF_MISC_VM_CG,
5214 	VM_L2_CG,
5215 };
5216 
5217 static void cik_enable_mc_ls(struct radeon_device *rdev,
5218 			     bool enable)
5219 {
5220 	int i;
5221 	u32 orig, data;
5222 
5223 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5224 		orig = data = RREG32(mc_cg_registers[i]);
5225 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5226 			data |= MC_LS_ENABLE;
5227 		else
5228 			data &= ~MC_LS_ENABLE;
5229 		if (data != orig)
5230 			WREG32(mc_cg_registers[i], data);
5231 	}
5232 }
5233 
5234 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5235 			       bool enable)
5236 {
5237 	int i;
5238 	u32 orig, data;
5239 
5240 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5241 		orig = data = RREG32(mc_cg_registers[i]);
5242 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5243 			data |= MC_CG_ENABLE;
5244 		else
5245 			data &= ~MC_CG_ENABLE;
5246 		if (data != orig)
5247 			WREG32(mc_cg_registers[i], data);
5248 	}
5249 }
5250 
5251 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5252 				 bool enable)
5253 {
5254 	u32 orig, data;
5255 
5256 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5257 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5258 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5259 	} else {
5260 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5261 		data |= 0xff000000;
5262 		if (data != orig)
5263 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5264 
5265 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5266 		data |= 0xff000000;
5267 		if (data != orig)
5268 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5269 	}
5270 }
5271 
5272 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5273 				 bool enable)
5274 {
5275 	u32 orig, data;
5276 
5277 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
5278 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5279 		data |= 0x100;
5280 		if (orig != data)
5281 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5282 
5283 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5284 		data |= 0x100;
5285 		if (orig != data)
5286 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5287 	} else {
5288 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
5289 		data &= ~0x100;
5290 		if (orig != data)
5291 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
5292 
5293 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
5294 		data &= ~0x100;
5295 		if (orig != data)
5296 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
5297 	}
5298 }
5299 
5300 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
5301 				bool enable)
5302 {
5303 	u32 orig, data;
5304 
5305 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5306 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5307 		data = 0xfff;
5308 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5309 
5310 		orig = data = RREG32(UVD_CGC_CTRL);
5311 		data |= DCM;
5312 		if (orig != data)
5313 			WREG32(UVD_CGC_CTRL, data);
5314 	} else {
5315 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5316 		data &= ~0xfff;
5317 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
5318 
5319 		orig = data = RREG32(UVD_CGC_CTRL);
5320 		data &= ~DCM;
5321 		if (orig != data)
5322 			WREG32(UVD_CGC_CTRL, data);
5323 	}
5324 }
5325 
5326 static void cik_enable_bif_mgls(struct radeon_device *rdev,
5327 			       bool enable)
5328 {
5329 	u32 orig, data;
5330 
5331 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
5332 
5333 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5334 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5335 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5336 	else
5337 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5338 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5339 
5340 	if (orig != data)
5341 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
5342 }
5343 
5344 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
5345 				bool enable)
5346 {
5347 	u32 orig, data;
5348 
5349 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5350 
5351 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5352 		data &= ~CLOCK_GATING_DIS;
5353 	else
5354 		data |= CLOCK_GATING_DIS;
5355 
5356 	if (orig != data)
5357 		WREG32(HDP_HOST_PATH_CNTL, data);
5358 }
5359 
5360 static void cik_enable_hdp_ls(struct radeon_device *rdev,
5361 			      bool enable)
5362 {
5363 	u32 orig, data;
5364 
5365 	orig = data = RREG32(HDP_MEM_POWER_LS);
5366 
5367 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5368 		data |= HDP_LS_ENABLE;
5369 	else
5370 		data &= ~HDP_LS_ENABLE;
5371 
5372 	if (orig != data)
5373 		WREG32(HDP_MEM_POWER_LS, data);
5374 }
5375 
5376 void cik_update_cg(struct radeon_device *rdev,
5377 		   u32 block, bool enable)
5378 {
5379 	if (block & RADEON_CG_BLOCK_GFX) {
5380 		/* order matters! */
5381 		if (enable) {
5382 			cik_enable_mgcg(rdev, true);
5383 			cik_enable_cgcg(rdev, true);
5384 		} else {
5385 			cik_enable_cgcg(rdev, false);
5386 			cik_enable_mgcg(rdev, false);
5387 		}
5388 	}
5389 
5390 	if (block & RADEON_CG_BLOCK_MC) {
5391 		if (!(rdev->flags & RADEON_IS_IGP)) {
5392 			cik_enable_mc_mgcg(rdev, enable);
5393 			cik_enable_mc_ls(rdev, enable);
5394 		}
5395 	}
5396 
5397 	if (block & RADEON_CG_BLOCK_SDMA) {
5398 		cik_enable_sdma_mgcg(rdev, enable);
5399 		cik_enable_sdma_mgls(rdev, enable);
5400 	}
5401 
5402 	if (block & RADEON_CG_BLOCK_BIF) {
5403 		cik_enable_bif_mgls(rdev, enable);
5404 	}
5405 
5406 	if (block & RADEON_CG_BLOCK_UVD) {
5407 		if (rdev->has_uvd)
5408 			cik_enable_uvd_mgcg(rdev, enable);
5409 	}
5410 
5411 	if (block & RADEON_CG_BLOCK_HDP) {
5412 		cik_enable_hdp_mgcg(rdev, enable);
5413 		cik_enable_hdp_ls(rdev, enable);
5414 	}
5415 }
5416 
5417 static void cik_init_cg(struct radeon_device *rdev)
5418 {
5419 
5420 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
5421 
5422 	if (rdev->has_uvd)
5423 		si_init_uvd_internal_cg(rdev);
5424 
5425 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5426 			     RADEON_CG_BLOCK_SDMA |
5427 			     RADEON_CG_BLOCK_BIF |
5428 			     RADEON_CG_BLOCK_UVD |
5429 			     RADEON_CG_BLOCK_HDP), true);
5430 }
5431 
5432 static void cik_fini_cg(struct radeon_device *rdev)
5433 {
5434 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
5435 			     RADEON_CG_BLOCK_SDMA |
5436 			     RADEON_CG_BLOCK_BIF |
5437 			     RADEON_CG_BLOCK_UVD |
5438 			     RADEON_CG_BLOCK_HDP), false);
5439 
5440 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
5441 }
5442 
5443 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
5444 					  bool enable)
5445 {
5446 	u32 data, orig;
5447 
5448 	orig = data = RREG32(RLC_PG_CNTL);
5449 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5450 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5451 	else
5452 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
5453 	if (orig != data)
5454 		WREG32(RLC_PG_CNTL, data);
5455 }
5456 
5457 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
5458 					  bool enable)
5459 {
5460 	u32 data, orig;
5461 
5462 	orig = data = RREG32(RLC_PG_CNTL);
5463 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
5464 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5465 	else
5466 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
5467 	if (orig != data)
5468 		WREG32(RLC_PG_CNTL, data);
5469 }
5470 
5471 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
5472 {
5473 	u32 data, orig;
5474 
5475 	orig = data = RREG32(RLC_PG_CNTL);
5476 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
5477 		data &= ~DISABLE_CP_PG;
5478 	else
5479 		data |= DISABLE_CP_PG;
5480 	if (orig != data)
5481 		WREG32(RLC_PG_CNTL, data);
5482 }
5483 
5484 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
5485 {
5486 	u32 data, orig;
5487 
5488 	orig = data = RREG32(RLC_PG_CNTL);
5489 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
5490 		data &= ~DISABLE_GDS_PG;
5491 	else
5492 		data |= DISABLE_GDS_PG;
5493 	if (orig != data)
5494 		WREG32(RLC_PG_CNTL, data);
5495 }
5496 
5497 #define CP_ME_TABLE_SIZE    96
5498 #define CP_ME_TABLE_OFFSET  2048
5499 #define CP_MEC_TABLE_OFFSET 4096
5500 
5501 void cik_init_cp_pg_table(struct radeon_device *rdev)
5502 {
5503 	const __be32 *fw_data;
5504 	volatile u32 *dst_ptr;
5505 	int me, i, max_me = 4;
5506 	u32 bo_offset = 0;
5507 	u32 table_offset;
5508 
5509 	if (rdev->family == CHIP_KAVERI)
5510 		max_me = 5;
5511 
5512 	if (rdev->rlc.cp_table_ptr == NULL)
5513 		return;
5514 
5515 	/* write the cp table buffer */
5516 	dst_ptr = rdev->rlc.cp_table_ptr;
5517 	for (me = 0; me < max_me; me++) {
5518 		if (me == 0) {
5519 			fw_data = (const __be32 *)rdev->ce_fw->data;
5520 			table_offset = CP_ME_TABLE_OFFSET;
5521 		} else if (me == 1) {
5522 			fw_data = (const __be32 *)rdev->pfp_fw->data;
5523 			table_offset = CP_ME_TABLE_OFFSET;
5524 		} else if (me == 2) {
5525 			fw_data = (const __be32 *)rdev->me_fw->data;
5526 			table_offset = CP_ME_TABLE_OFFSET;
5527 		} else {
5528 			fw_data = (const __be32 *)rdev->mec_fw->data;
5529 			table_offset = CP_MEC_TABLE_OFFSET;
5530 		}
5531 
5532 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
5533 			dst_ptr[bo_offset + i] = be32_to_cpu(fw_data[table_offset + i]);
5534 		}
5535 		bo_offset += CP_ME_TABLE_SIZE;
5536 	}
5537 }
5538 
5539 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
5540 				bool enable)
5541 {
5542 	u32 data, orig;
5543 
5544 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
5545 		orig = data = RREG32(RLC_PG_CNTL);
5546 		data |= GFX_PG_ENABLE;
5547 		if (orig != data)
5548 			WREG32(RLC_PG_CNTL, data);
5549 
5550 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5551 		data |= AUTO_PG_EN;
5552 		if (orig != data)
5553 			WREG32(RLC_AUTO_PG_CTRL, data);
5554 	} else {
5555 		orig = data = RREG32(RLC_PG_CNTL);
5556 		data &= ~GFX_PG_ENABLE;
5557 		if (orig != data)
5558 			WREG32(RLC_PG_CNTL, data);
5559 
5560 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
5561 		data &= ~AUTO_PG_EN;
5562 		if (orig != data)
5563 			WREG32(RLC_AUTO_PG_CTRL, data);
5564 
5565 		data = RREG32(DB_RENDER_CONTROL);
5566 	}
5567 }
5568 
5569 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5570 {
5571 	u32 mask = 0, tmp, tmp1;
5572 	int i;
5573 
5574 	cik_select_se_sh(rdev, se, sh);
5575 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5576 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5577 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5578 
5579 	tmp &= 0xffff0000;
5580 
5581 	tmp |= tmp1;
5582 	tmp >>= 16;
5583 
5584 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
5585 		mask <<= 1;
5586 		mask |= 1;
5587 	}
5588 
5589 	return (~tmp) & mask;
5590 }
5591 
5592 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
5593 {
5594 	u32 i, j, k, active_cu_number = 0;
5595 	u32 mask, counter, cu_bitmap;
5596 	u32 tmp = 0;
5597 
5598 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5599 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5600 			mask = 1;
5601 			cu_bitmap = 0;
5602 			counter = 0;
5603 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
5604 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
5605 					if (counter < 2)
5606 						cu_bitmap |= mask;
5607 					counter ++;
5608 				}
5609 				mask <<= 1;
5610 			}
5611 
5612 			active_cu_number += counter;
5613 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5614 		}
5615 	}
5616 
5617 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5618 
5619 	tmp = RREG32(RLC_MAX_PG_CU);
5620 	tmp &= ~MAX_PU_CU_MASK;
5621 	tmp |= MAX_PU_CU(active_cu_number);
5622 	WREG32(RLC_MAX_PG_CU, tmp);
5623 }
5624 
5625 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
5626 				       bool enable)
5627 {
5628 	u32 data, orig;
5629 
5630 	orig = data = RREG32(RLC_PG_CNTL);
5631 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
5632 		data |= STATIC_PER_CU_PG_ENABLE;
5633 	else
5634 		data &= ~STATIC_PER_CU_PG_ENABLE;
5635 	if (orig != data)
5636 		WREG32(RLC_PG_CNTL, data);
5637 }
5638 
5639 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
5640 					bool enable)
5641 {
5642 	u32 data, orig;
5643 
5644 	orig = data = RREG32(RLC_PG_CNTL);
5645 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
5646 		data |= DYN_PER_CU_PG_ENABLE;
5647 	else
5648 		data &= ~DYN_PER_CU_PG_ENABLE;
5649 	if (orig != data)
5650 		WREG32(RLC_PG_CNTL, data);
5651 }
5652 
5653 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
5654 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
5655 
5656 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
5657 {
5658 	u32 data, orig;
5659 	u32 i;
5660 
5661 	if (rdev->rlc.cs_data) {
5662 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5663 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
5664 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
5665 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
5666 	} else {
5667 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
5668 		for (i = 0; i < 3; i++)
5669 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
5670 	}
5671 	if (rdev->rlc.reg_list) {
5672 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
5673 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
5674 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
5675 	}
5676 
5677 	orig = data = RREG32(RLC_PG_CNTL);
5678 	data |= GFX_PG_SRC;
5679 	if (orig != data)
5680 		WREG32(RLC_PG_CNTL, data);
5681 
5682 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5683 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
5684 
5685 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
5686 	data &= ~IDLE_POLL_COUNT_MASK;
5687 	data |= IDLE_POLL_COUNT(0x60);
5688 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
5689 
5690 	data = 0x10101010;
5691 	WREG32(RLC_PG_DELAY, data);
5692 
5693 	data = RREG32(RLC_PG_DELAY_2);
5694 	data &= ~0xff;
5695 	data |= 0x3;
5696 	WREG32(RLC_PG_DELAY_2, data);
5697 
5698 	data = RREG32(RLC_AUTO_PG_CTRL);
5699 	data &= ~GRBM_REG_SGIT_MASK;
5700 	data |= GRBM_REG_SGIT(0x700);
5701 	WREG32(RLC_AUTO_PG_CTRL, data);
5702 
5703 }
5704 
5705 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
5706 {
5707 	cik_enable_gfx_cgpg(rdev, enable);
5708 	cik_enable_gfx_static_mgpg(rdev, enable);
5709 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
5710 }
5711 
5712 u32 cik_get_csb_size(struct radeon_device *rdev)
5713 {
5714 	u32 count = 0;
5715 	const struct cs_section_def *sect = NULL;
5716 	const struct cs_extent_def *ext = NULL;
5717 
5718 	if (rdev->rlc.cs_data == NULL)
5719 		return 0;
5720 
5721 	/* begin clear state */
5722 	count += 2;
5723 	/* context control state */
5724 	count += 3;
5725 
5726 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5727 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5728 			if (sect->id == SECT_CONTEXT)
5729 				count += 2 + ext->reg_count;
5730 			else
5731 				return 0;
5732 		}
5733 	}
5734 	/* pa_sc_raster_config/pa_sc_raster_config1 */
5735 	count += 4;
5736 	/* end clear state */
5737 	count += 2;
5738 	/* clear state */
5739 	count += 2;
5740 
5741 	return count;
5742 }
5743 
5744 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5745 {
5746 	u32 count = 0, i;
5747 	const struct cs_section_def *sect = NULL;
5748 	const struct cs_extent_def *ext = NULL;
5749 
5750 	if (rdev->rlc.cs_data == NULL)
5751 		return;
5752 	if (buffer == NULL)
5753 		return;
5754 
5755 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5756 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5757 
5758 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5759 	buffer[count++] = 0x80000000;
5760 	buffer[count++] = 0x80000000;
5761 
5762 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5763 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5764 			if (sect->id == SECT_CONTEXT) {
5765 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5766 				buffer[count++] = ext->reg_index - 0xa000;
5767 				for (i = 0; i < ext->reg_count; i++)
5768 					buffer[count++] = ext->extent[i];
5769 			} else {
5770 				return;
5771 			}
5772 		}
5773 	}
5774 
5775 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 2);
5776 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5777 	switch (rdev->family) {
5778 	case CHIP_BONAIRE:
5779 		buffer[count++] = 0x16000012;
5780 		buffer[count++] = 0x00000000;
5781 		break;
5782 	case CHIP_KAVERI:
5783 		buffer[count++] = 0x00000000; /* XXX */
5784 		buffer[count++] = 0x00000000;
5785 		break;
5786 	case CHIP_KABINI:
5787 		buffer[count++] = 0x00000000; /* XXX */
5788 		buffer[count++] = 0x00000000;
5789 		break;
5790 	default:
5791 		buffer[count++] = 0x00000000;
5792 		buffer[count++] = 0x00000000;
5793 		break;
5794 	}
5795 
5796 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5797 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5798 
5799 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5800 	buffer[count++] = 0;
5801 }
5802 
5803 static void cik_init_pg(struct radeon_device *rdev)
5804 {
5805 	if (rdev->pg_flags) {
5806 		cik_enable_sck_slowdown_on_pu(rdev, true);
5807 		cik_enable_sck_slowdown_on_pd(rdev, true);
5808 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5809 			cik_init_gfx_cgpg(rdev);
5810 			cik_enable_cp_pg(rdev, true);
5811 			cik_enable_gds_pg(rdev, true);
5812 		}
5813 		cik_init_ao_cu_mask(rdev);
5814 		cik_update_gfx_pg(rdev, true);
5815 	}
5816 }
5817 
5818 static void cik_fini_pg(struct radeon_device *rdev)
5819 {
5820 	if (rdev->pg_flags) {
5821 		cik_update_gfx_pg(rdev, false);
5822 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5823 			cik_enable_cp_pg(rdev, false);
5824 			cik_enable_gds_pg(rdev, false);
5825 		}
5826 	}
5827 }
5828 
5829 /*
5830  * Interrupts
5831  * Starting with r6xx, interrupts are handled via a ring buffer.
5832  * Ring buffers are areas of GPU accessible memory that the GPU
5833  * writes interrupt vectors into and the host reads vectors out of.
5834  * There is a rptr (read pointer) that determines where the
5835  * host is currently reading, and a wptr (write pointer)
5836  * which determines where the GPU has written.  When the
5837  * pointers are equal, the ring is idle.  When the GPU
5838  * writes vectors to the ring buffer, it increments the
5839  * wptr.  When there is an interrupt, the host then starts
5840  * fetching commands and processing them until the pointers are
5841  * equal again at which point it updates the rptr.
5842  */
5843 
5844 /**
5845  * cik_enable_interrupts - Enable the interrupt ring buffer
5846  *
5847  * @rdev: radeon_device pointer
5848  *
5849  * Enable the interrupt ring buffer (CIK).
5850  */
5851 static void cik_enable_interrupts(struct radeon_device *rdev)
5852 {
5853 	u32 ih_cntl = RREG32(IH_CNTL);
5854 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5855 
5856 	ih_cntl |= ENABLE_INTR;
5857 	ih_rb_cntl |= IH_RB_ENABLE;
5858 	WREG32(IH_CNTL, ih_cntl);
5859 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5860 	rdev->ih.enabled = true;
5861 }
5862 
5863 /**
5864  * cik_disable_interrupts - Disable the interrupt ring buffer
5865  *
5866  * @rdev: radeon_device pointer
5867  *
5868  * Disable the interrupt ring buffer (CIK).
5869  */
5870 static void cik_disable_interrupts(struct radeon_device *rdev)
5871 {
5872 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5873 	u32 ih_cntl = RREG32(IH_CNTL);
5874 
5875 	ih_rb_cntl &= ~IH_RB_ENABLE;
5876 	ih_cntl &= ~ENABLE_INTR;
5877 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5878 	WREG32(IH_CNTL, ih_cntl);
5879 	/* set rptr, wptr to 0 */
5880 	WREG32(IH_RB_RPTR, 0);
5881 	WREG32(IH_RB_WPTR, 0);
5882 	rdev->ih.enabled = false;
5883 	rdev->ih.rptr = 0;
5884 }
5885 
5886 /**
5887  * cik_disable_interrupt_state - Disable all interrupt sources
5888  *
5889  * @rdev: radeon_device pointer
5890  *
5891  * Clear all interrupt enable bits used by the driver (CIK).
5892  */
5893 static void cik_disable_interrupt_state(struct radeon_device *rdev)
5894 {
5895 	u32 tmp;
5896 
5897 	/* gfx ring */
5898 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5899 	/* sdma */
5900 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5901 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5902 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5903 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5904 	/* compute queues */
5905 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
5906 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
5907 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
5908 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
5909 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
5910 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
5911 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
5912 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
5913 	/* grbm */
5914 	WREG32(GRBM_INT_CNTL, 0);
5915 	/* vline/vblank, etc. */
5916 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5917 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5918 	if (rdev->num_crtc >= 4) {
5919 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5920 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5921 	}
5922 	if (rdev->num_crtc >= 6) {
5923 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5924 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5925 	}
5926 
5927 	/* dac hotplug */
5928 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5929 
5930 	/* digital hotplug */
5931 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5932 	WREG32(DC_HPD1_INT_CONTROL, tmp);
5933 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5934 	WREG32(DC_HPD2_INT_CONTROL, tmp);
5935 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5936 	WREG32(DC_HPD3_INT_CONTROL, tmp);
5937 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5938 	WREG32(DC_HPD4_INT_CONTROL, tmp);
5939 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5940 	WREG32(DC_HPD5_INT_CONTROL, tmp);
5941 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5942 	WREG32(DC_HPD6_INT_CONTROL, tmp);
5943 
5944 }
5945 
5946 /**
5947  * cik_irq_init - init and enable the interrupt ring
5948  *
5949  * @rdev: radeon_device pointer
5950  *
5951  * Allocate a ring buffer for the interrupt controller,
5952  * enable the RLC, disable interrupts, enable the IH
5953  * ring buffer and enable it (CIK).
5954  * Called at device load and reume.
5955  * Returns 0 for success, errors for failure.
5956  */
5957 static int cik_irq_init(struct radeon_device *rdev)
5958 {
5959 	int ret = 0;
5960 	int rb_bufsz;
5961 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5962 
5963 	/* allocate ring */
5964 	ret = r600_ih_ring_alloc(rdev);
5965 	if (ret)
5966 		return ret;
5967 
5968 	/* disable irqs */
5969 	cik_disable_interrupts(rdev);
5970 
5971 	/* init rlc */
5972 	ret = cik_rlc_resume(rdev);
5973 	if (ret) {
5974 		r600_ih_ring_fini(rdev);
5975 		return ret;
5976 	}
5977 
5978 	/* setup interrupt control */
5979 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5980 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5981 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5982 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5983 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5984 	 */
5985 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5986 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5987 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5988 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5989 
5990 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5991 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5992 
5993 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5994 		      IH_WPTR_OVERFLOW_CLEAR |
5995 		      (rb_bufsz << 1));
5996 
5997 	if (rdev->wb.enabled)
5998 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5999 
6000 	/* set the writeback address whether it's enabled or not */
6001 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6002 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6003 
6004 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6005 
6006 	/* set rptr, wptr to 0 */
6007 	WREG32(IH_RB_RPTR, 0);
6008 	WREG32(IH_RB_WPTR, 0);
6009 
6010 	/* Default settings for IH_CNTL (disabled at first) */
6011 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6012 	/* RPTR_REARM only works if msi's are enabled */
6013 	if (rdev->msi_enabled)
6014 		ih_cntl |= RPTR_REARM;
6015 	WREG32(IH_CNTL, ih_cntl);
6016 
6017 	/* force the active interrupt state to all disabled */
6018 	cik_disable_interrupt_state(rdev);
6019 
6020 	pci_set_master(rdev->pdev);
6021 
6022 	/* enable irqs */
6023 	cik_enable_interrupts(rdev);
6024 
6025 	return ret;
6026 }
6027 
6028 /**
6029  * cik_irq_set - enable/disable interrupt sources
6030  *
6031  * @rdev: radeon_device pointer
6032  *
6033  * Enable interrupt sources on the GPU (vblanks, hpd,
6034  * etc.) (CIK).
6035  * Returns 0 for success, errors for failure.
6036  */
6037 int cik_irq_set(struct radeon_device *rdev)
6038 {
6039 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
6040 		PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6041 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6042 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6043 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6044 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6045 	u32 grbm_int_cntl = 0;
6046 	u32 dma_cntl, dma_cntl1;
6047 	u32 thermal_int;
6048 
6049 	if (!rdev->irq.installed) {
6050 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6051 		return -EINVAL;
6052 	}
6053 	/* don't enable anything if the ih is disabled */
6054 	if (!rdev->ih.enabled) {
6055 		cik_disable_interrupts(rdev);
6056 		/* force the active interrupt state to all disabled */
6057 		cik_disable_interrupt_state(rdev);
6058 		return 0;
6059 	}
6060 
6061 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6062 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6063 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6064 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6065 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6066 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6067 
6068 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6069 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6070 
6071 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6072 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6073 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6074 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6075 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6076 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6077 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6078 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6079 
6080 	if (rdev->flags & RADEON_IS_IGP)
6081 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6082 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6083 	else
6084 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6085 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6086 
6087 	/* enable CP interrupts on all rings */
6088 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6089 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6090 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6091 	}
6092 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6093 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6094 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6095 		if (ring->me == 1) {
6096 			switch (ring->pipe) {
6097 			case 0:
6098 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6099 				break;
6100 			case 1:
6101 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6102 				break;
6103 			case 2:
6104 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6105 				break;
6106 			case 3:
6107 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6108 				break;
6109 			default:
6110 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6111 				break;
6112 			}
6113 		} else if (ring->me == 2) {
6114 			switch (ring->pipe) {
6115 			case 0:
6116 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6117 				break;
6118 			case 1:
6119 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6120 				break;
6121 			case 2:
6122 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6123 				break;
6124 			case 3:
6125 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6126 				break;
6127 			default:
6128 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6129 				break;
6130 			}
6131 		} else {
6132 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6133 		}
6134 	}
6135 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6136 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6137 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6138 		if (ring->me == 1) {
6139 			switch (ring->pipe) {
6140 			case 0:
6141 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6142 				break;
6143 			case 1:
6144 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6145 				break;
6146 			case 2:
6147 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6148 				break;
6149 			case 3:
6150 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6151 				break;
6152 			default:
6153 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6154 				break;
6155 			}
6156 		} else if (ring->me == 2) {
6157 			switch (ring->pipe) {
6158 			case 0:
6159 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6160 				break;
6161 			case 1:
6162 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6163 				break;
6164 			case 2:
6165 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6166 				break;
6167 			case 3:
6168 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6169 				break;
6170 			default:
6171 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6172 				break;
6173 			}
6174 		} else {
6175 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6176 		}
6177 	}
6178 
6179 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6180 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6181 		dma_cntl |= TRAP_ENABLE;
6182 	}
6183 
6184 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6185 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6186 		dma_cntl1 |= TRAP_ENABLE;
6187 	}
6188 
6189 	if (rdev->irq.crtc_vblank_int[0] ||
6190 	    atomic_read(&rdev->irq.pflip[0])) {
6191 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6192 		crtc1 |= VBLANK_INTERRUPT_MASK;
6193 	}
6194 	if (rdev->irq.crtc_vblank_int[1] ||
6195 	    atomic_read(&rdev->irq.pflip[1])) {
6196 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6197 		crtc2 |= VBLANK_INTERRUPT_MASK;
6198 	}
6199 	if (rdev->irq.crtc_vblank_int[2] ||
6200 	    atomic_read(&rdev->irq.pflip[2])) {
6201 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6202 		crtc3 |= VBLANK_INTERRUPT_MASK;
6203 	}
6204 	if (rdev->irq.crtc_vblank_int[3] ||
6205 	    atomic_read(&rdev->irq.pflip[3])) {
6206 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6207 		crtc4 |= VBLANK_INTERRUPT_MASK;
6208 	}
6209 	if (rdev->irq.crtc_vblank_int[4] ||
6210 	    atomic_read(&rdev->irq.pflip[4])) {
6211 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6212 		crtc5 |= VBLANK_INTERRUPT_MASK;
6213 	}
6214 	if (rdev->irq.crtc_vblank_int[5] ||
6215 	    atomic_read(&rdev->irq.pflip[5])) {
6216 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6217 		crtc6 |= VBLANK_INTERRUPT_MASK;
6218 	}
6219 	if (rdev->irq.hpd[0]) {
6220 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6221 		hpd1 |= DC_HPDx_INT_EN;
6222 	}
6223 	if (rdev->irq.hpd[1]) {
6224 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6225 		hpd2 |= DC_HPDx_INT_EN;
6226 	}
6227 	if (rdev->irq.hpd[2]) {
6228 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6229 		hpd3 |= DC_HPDx_INT_EN;
6230 	}
6231 	if (rdev->irq.hpd[3]) {
6232 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6233 		hpd4 |= DC_HPDx_INT_EN;
6234 	}
6235 	if (rdev->irq.hpd[4]) {
6236 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6237 		hpd5 |= DC_HPDx_INT_EN;
6238 	}
6239 	if (rdev->irq.hpd[5]) {
6240 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6241 		hpd6 |= DC_HPDx_INT_EN;
6242 	}
6243 
6244 	if (rdev->irq.dpm_thermal) {
6245 		DRM_DEBUG("dpm thermal\n");
6246 		if (rdev->flags & RADEON_IS_IGP)
6247 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6248 		else
6249 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6250 	}
6251 
6252 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6253 
6254 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6255 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6256 
6257 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6258 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6259 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6260 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
6261 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
6262 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
6263 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
6264 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
6265 
6266 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6267 
6268 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6269 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6270 	if (rdev->num_crtc >= 4) {
6271 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6272 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6273 	}
6274 	if (rdev->num_crtc >= 6) {
6275 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6276 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6277 	}
6278 
6279 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
6280 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
6281 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
6282 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
6283 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
6284 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
6285 
6286 	if (rdev->flags & RADEON_IS_IGP)
6287 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
6288 	else
6289 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
6290 
6291 	return 0;
6292 }
6293 
6294 /**
6295  * cik_irq_ack - ack interrupt sources
6296  *
6297  * @rdev: radeon_device pointer
6298  *
6299  * Ack interrupt sources on the GPU (vblanks, hpd,
6300  * etc.) (CIK).  Certain interrupts sources are sw
6301  * generated and do not require an explicit ack.
6302  */
6303 static inline void cik_irq_ack(struct radeon_device *rdev)
6304 {
6305 	u32 tmp;
6306 
6307 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6308 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6309 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6310 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6311 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6312 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6313 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
6314 
6315 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
6316 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6317 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
6318 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6319 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6320 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6321 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6322 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6323 
6324 	if (rdev->num_crtc >= 4) {
6325 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6326 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6327 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6328 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6329 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6330 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6331 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6332 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6333 	}
6334 
6335 	if (rdev->num_crtc >= 6) {
6336 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6337 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6338 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6339 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6340 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6341 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6342 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6343 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6344 	}
6345 
6346 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6347 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6348 		tmp |= DC_HPDx_INT_ACK;
6349 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6350 	}
6351 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6352 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6353 		tmp |= DC_HPDx_INT_ACK;
6354 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6355 	}
6356 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6357 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6358 		tmp |= DC_HPDx_INT_ACK;
6359 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6360 	}
6361 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6362 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6363 		tmp |= DC_HPDx_INT_ACK;
6364 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6365 	}
6366 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6367 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6368 		tmp |= DC_HPDx_INT_ACK;
6369 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6370 	}
6371 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6372 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6373 		tmp |= DC_HPDx_INT_ACK;
6374 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6375 	}
6376 }
6377 
6378 /**
6379  * cik_irq_disable - disable interrupts
6380  *
6381  * @rdev: radeon_device pointer
6382  *
6383  * Disable interrupts on the hw (CIK).
6384  */
6385 static void cik_irq_disable(struct radeon_device *rdev)
6386 {
6387 	cik_disable_interrupts(rdev);
6388 	/* Wait and acknowledge irq */
6389 	mdelay(1);
6390 	cik_irq_ack(rdev);
6391 	cik_disable_interrupt_state(rdev);
6392 }
6393 
6394 /**
6395  * cik_irq_disable - disable interrupts for suspend
6396  *
6397  * @rdev: radeon_device pointer
6398  *
6399  * Disable interrupts and stop the RLC (CIK).
6400  * Used for suspend.
6401  */
6402 static void cik_irq_suspend(struct radeon_device *rdev)
6403 {
6404 	cik_irq_disable(rdev);
6405 	cik_rlc_stop(rdev);
6406 }
6407 
6408 /**
6409  * cik_irq_fini - tear down interrupt support
6410  *
6411  * @rdev: radeon_device pointer
6412  *
6413  * Disable interrupts on the hw and free the IH ring
6414  * buffer (CIK).
6415  * Used for driver unload.
6416  */
6417 static void cik_irq_fini(struct radeon_device *rdev)
6418 {
6419 	cik_irq_suspend(rdev);
6420 	r600_ih_ring_fini(rdev);
6421 }
6422 
6423 /**
6424  * cik_get_ih_wptr - get the IH ring buffer wptr
6425  *
6426  * @rdev: radeon_device pointer
6427  *
6428  * Get the IH ring buffer wptr from either the register
6429  * or the writeback memory buffer (CIK).  Also check for
6430  * ring buffer overflow and deal with it.
6431  * Used by cik_irq_process().
6432  * Returns the value of the wptr.
6433  */
6434 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
6435 {
6436 	u32 wptr, tmp;
6437 
6438 	if (rdev->wb.enabled)
6439 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6440 	else
6441 		wptr = RREG32(IH_RB_WPTR);
6442 
6443 	if (wptr & RB_OVERFLOW) {
6444 		/* When a ring buffer overflow happen start parsing interrupt
6445 		 * from the last not overwritten vector (wptr + 16). Hopefully
6446 		 * this should allow us to catchup.
6447 		 */
6448 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
6449 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
6450 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6451 		tmp = RREG32(IH_RB_CNTL);
6452 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6453 		WREG32(IH_RB_CNTL, tmp);
6454 	}
6455 	return (wptr & rdev->ih.ptr_mask);
6456 }
6457 
6458 /*        CIK IV Ring
6459  * Each IV ring entry is 128 bits:
6460  * [7:0]    - interrupt source id
6461  * [31:8]   - reserved
6462  * [59:32]  - interrupt source data
6463  * [63:60]  - reserved
6464  * [71:64]  - RINGID
6465  *            CP:
6466  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
6467  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
6468  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
6469  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
6470  *            PIPE_ID - ME0 0=3D
6471  *                    - ME1&2 compute dispatcher (4 pipes each)
6472  *            SDMA:
6473  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
6474  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
6475  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
6476  * [79:72]  - VMID
6477  * [95:80]  - PASID
6478  * [127:96] - reserved
6479  */
6480 /**
6481  * cik_irq_process - interrupt handler
6482  *
6483  * @rdev: radeon_device pointer
6484  *
6485  * Interrupt hander (CIK).  Walk the IH ring,
6486  * ack interrupts and schedule work to handle
6487  * interrupt events.
6488  * Returns irq process return code.
6489  */
6490 int cik_irq_process(struct radeon_device *rdev)
6491 {
6492 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6493 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6494 	u32 wptr;
6495 	u32 rptr;
6496 	u32 src_id, src_data, ring_id;
6497 	u8 me_id, pipe_id, queue_id;
6498 	u32 ring_index;
6499 	bool queue_hotplug = false;
6500 	bool queue_reset = false;
6501 	u32 addr, status, mc_client;
6502 	bool queue_thermal = false;
6503 
6504 	if (!rdev->ih.enabled || rdev->shutdown)
6505 		return IRQ_NONE;
6506 
6507 	wptr = cik_get_ih_wptr(rdev);
6508 
6509 restart_ih:
6510 	/* is somebody else already processing irqs? */
6511 	if (atomic_xchg(&rdev->ih.lock, 1))
6512 		return IRQ_NONE;
6513 
6514 	rptr = rdev->ih.rptr;
6515 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6516 
6517 	/* Order reading of wptr vs. reading of IH ring data */
6518 	rmb();
6519 
6520 	/* display interrupts */
6521 	cik_irq_ack(rdev);
6522 
6523 	while (rptr != wptr) {
6524 		/* wptr/rptr are in bytes! */
6525 		ring_index = rptr / 4;
6526 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6527 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6528 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6529 
6530 		switch (src_id) {
6531 		case 1: /* D1 vblank/vline */
6532 			switch (src_data) {
6533 			case 0: /* D1 vblank */
6534 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
6535 					if (rdev->irq.crtc_vblank_int[0]) {
6536 						drm_handle_vblank(rdev->ddev, 0);
6537 						rdev->pm.vblank_sync = true;
6538 						wake_up(&rdev->irq.vblank_queue);
6539 					}
6540 					if (atomic_read(&rdev->irq.pflip[0]))
6541 						radeon_crtc_handle_flip(rdev, 0);
6542 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6543 					DRM_DEBUG("IH: D1 vblank\n");
6544 				}
6545 				break;
6546 			case 1: /* D1 vline */
6547 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
6548 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6549 					DRM_DEBUG("IH: D1 vline\n");
6550 				}
6551 				break;
6552 			default:
6553 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6554 				break;
6555 			}
6556 			break;
6557 		case 2: /* D2 vblank/vline */
6558 			switch (src_data) {
6559 			case 0: /* D2 vblank */
6560 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6561 					if (rdev->irq.crtc_vblank_int[1]) {
6562 						drm_handle_vblank(rdev->ddev, 1);
6563 						rdev->pm.vblank_sync = true;
6564 						wake_up(&rdev->irq.vblank_queue);
6565 					}
6566 					if (atomic_read(&rdev->irq.pflip[1]))
6567 						radeon_crtc_handle_flip(rdev, 1);
6568 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6569 					DRM_DEBUG("IH: D2 vblank\n");
6570 				}
6571 				break;
6572 			case 1: /* D2 vline */
6573 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6574 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6575 					DRM_DEBUG("IH: D2 vline\n");
6576 				}
6577 				break;
6578 			default:
6579 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6580 				break;
6581 			}
6582 			break;
6583 		case 3: /* D3 vblank/vline */
6584 			switch (src_data) {
6585 			case 0: /* D3 vblank */
6586 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6587 					if (rdev->irq.crtc_vblank_int[2]) {
6588 						drm_handle_vblank(rdev->ddev, 2);
6589 						rdev->pm.vblank_sync = true;
6590 						wake_up(&rdev->irq.vblank_queue);
6591 					}
6592 					if (atomic_read(&rdev->irq.pflip[2]))
6593 						radeon_crtc_handle_flip(rdev, 2);
6594 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6595 					DRM_DEBUG("IH: D3 vblank\n");
6596 				}
6597 				break;
6598 			case 1: /* D3 vline */
6599 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6600 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6601 					DRM_DEBUG("IH: D3 vline\n");
6602 				}
6603 				break;
6604 			default:
6605 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6606 				break;
6607 			}
6608 			break;
6609 		case 4: /* D4 vblank/vline */
6610 			switch (src_data) {
6611 			case 0: /* D4 vblank */
6612 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6613 					if (rdev->irq.crtc_vblank_int[3]) {
6614 						drm_handle_vblank(rdev->ddev, 3);
6615 						rdev->pm.vblank_sync = true;
6616 						wake_up(&rdev->irq.vblank_queue);
6617 					}
6618 					if (atomic_read(&rdev->irq.pflip[3]))
6619 						radeon_crtc_handle_flip(rdev, 3);
6620 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6621 					DRM_DEBUG("IH: D4 vblank\n");
6622 				}
6623 				break;
6624 			case 1: /* D4 vline */
6625 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6626 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6627 					DRM_DEBUG("IH: D4 vline\n");
6628 				}
6629 				break;
6630 			default:
6631 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6632 				break;
6633 			}
6634 			break;
6635 		case 5: /* D5 vblank/vline */
6636 			switch (src_data) {
6637 			case 0: /* D5 vblank */
6638 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6639 					if (rdev->irq.crtc_vblank_int[4]) {
6640 						drm_handle_vblank(rdev->ddev, 4);
6641 						rdev->pm.vblank_sync = true;
6642 						wake_up(&rdev->irq.vblank_queue);
6643 					}
6644 					if (atomic_read(&rdev->irq.pflip[4]))
6645 						radeon_crtc_handle_flip(rdev, 4);
6646 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6647 					DRM_DEBUG("IH: D5 vblank\n");
6648 				}
6649 				break;
6650 			case 1: /* D5 vline */
6651 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6652 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6653 					DRM_DEBUG("IH: D5 vline\n");
6654 				}
6655 				break;
6656 			default:
6657 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6658 				break;
6659 			}
6660 			break;
6661 		case 6: /* D6 vblank/vline */
6662 			switch (src_data) {
6663 			case 0: /* D6 vblank */
6664 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6665 					if (rdev->irq.crtc_vblank_int[5]) {
6666 						drm_handle_vblank(rdev->ddev, 5);
6667 						rdev->pm.vblank_sync = true;
6668 						wake_up(&rdev->irq.vblank_queue);
6669 					}
6670 					if (atomic_read(&rdev->irq.pflip[5]))
6671 						radeon_crtc_handle_flip(rdev, 5);
6672 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6673 					DRM_DEBUG("IH: D6 vblank\n");
6674 				}
6675 				break;
6676 			case 1: /* D6 vline */
6677 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6678 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6679 					DRM_DEBUG("IH: D6 vline\n");
6680 				}
6681 				break;
6682 			default:
6683 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6684 				break;
6685 			}
6686 			break;
6687 		case 42: /* HPD hotplug */
6688 			switch (src_data) {
6689 			case 0:
6690 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
6691 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
6692 					queue_hotplug = true;
6693 					DRM_DEBUG("IH: HPD1\n");
6694 				}
6695 				break;
6696 			case 1:
6697 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
6698 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6699 					queue_hotplug = true;
6700 					DRM_DEBUG("IH: HPD2\n");
6701 				}
6702 				break;
6703 			case 2:
6704 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6705 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6706 					queue_hotplug = true;
6707 					DRM_DEBUG("IH: HPD3\n");
6708 				}
6709 				break;
6710 			case 3:
6711 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6712 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6713 					queue_hotplug = true;
6714 					DRM_DEBUG("IH: HPD4\n");
6715 				}
6716 				break;
6717 			case 4:
6718 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6719 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6720 					queue_hotplug = true;
6721 					DRM_DEBUG("IH: HPD5\n");
6722 				}
6723 				break;
6724 			case 5:
6725 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6726 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6727 					queue_hotplug = true;
6728 					DRM_DEBUG("IH: HPD6\n");
6729 				}
6730 				break;
6731 			default:
6732 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6733 				break;
6734 			}
6735 			break;
6736 		case 124: /* UVD */
6737 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6738 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6739 			break;
6740 		case 146:
6741 		case 147:
6742 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6743 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6744 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
6745 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6746 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6747 				addr);
6748 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6749 				status);
6750 			cik_vm_decode_fault(rdev, status, addr, mc_client);
6751 			/* reset addr and status */
6752 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6753 			break;
6754 		case 176: /* GFX RB CP_INT */
6755 		case 177: /* GFX IB CP_INT */
6756 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6757 			break;
6758 		case 181: /* CP EOP event */
6759 			DRM_DEBUG("IH: CP EOP\n");
6760 			/* XXX check the bitfield order! */
6761 			me_id = (ring_id & 0x60) >> 5;
6762 			pipe_id = (ring_id & 0x18) >> 3;
6763 			queue_id = (ring_id & 0x7) >> 0;
6764 			switch (me_id) {
6765 			case 0:
6766 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6767 				break;
6768 			case 1:
6769 			case 2:
6770 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
6771 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6772 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
6773 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6774 				break;
6775 			}
6776 			break;
6777 		case 184: /* CP Privileged reg access */
6778 			DRM_ERROR("Illegal register access in command stream\n");
6779 			/* XXX check the bitfield order! */
6780 			me_id = (ring_id & 0x60) >> 5;
6781 			pipe_id = (ring_id & 0x18) >> 3;
6782 			queue_id = (ring_id & 0x7) >> 0;
6783 			switch (me_id) {
6784 			case 0:
6785 				/* This results in a full GPU reset, but all we need to do is soft
6786 				 * reset the CP for gfx
6787 				 */
6788 				queue_reset = true;
6789 				break;
6790 			case 1:
6791 				/* XXX compute */
6792 				queue_reset = true;
6793 				break;
6794 			case 2:
6795 				/* XXX compute */
6796 				queue_reset = true;
6797 				break;
6798 			}
6799 			break;
6800 		case 185: /* CP Privileged inst */
6801 			DRM_ERROR("Illegal instruction in command stream\n");
6802 			/* XXX check the bitfield order! */
6803 			me_id = (ring_id & 0x60) >> 5;
6804 			pipe_id = (ring_id & 0x18) >> 3;
6805 			queue_id = (ring_id & 0x7) >> 0;
6806 			switch (me_id) {
6807 			case 0:
6808 				/* This results in a full GPU reset, but all we need to do is soft
6809 				 * reset the CP for gfx
6810 				 */
6811 				queue_reset = true;
6812 				break;
6813 			case 1:
6814 				/* XXX compute */
6815 				queue_reset = true;
6816 				break;
6817 			case 2:
6818 				/* XXX compute */
6819 				queue_reset = true;
6820 				break;
6821 			}
6822 			break;
6823 		case 224: /* SDMA trap event */
6824 			/* XXX check the bitfield order! */
6825 			me_id = (ring_id & 0x3) >> 0;
6826 			queue_id = (ring_id & 0xc) >> 2;
6827 			DRM_DEBUG("IH: SDMA trap\n");
6828 			switch (me_id) {
6829 			case 0:
6830 				switch (queue_id) {
6831 				case 0:
6832 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6833 					break;
6834 				case 1:
6835 					/* XXX compute */
6836 					break;
6837 				case 2:
6838 					/* XXX compute */
6839 					break;
6840 				}
6841 				break;
6842 			case 1:
6843 				switch (queue_id) {
6844 				case 0:
6845 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6846 					break;
6847 				case 1:
6848 					/* XXX compute */
6849 					break;
6850 				case 2:
6851 					/* XXX compute */
6852 					break;
6853 				}
6854 				break;
6855 			}
6856 			break;
6857 		case 230: /* thermal low to high */
6858 			DRM_DEBUG("IH: thermal low to high\n");
6859 			rdev->pm.dpm.thermal.high_to_low = false;
6860 			queue_thermal = true;
6861 			break;
6862 		case 231: /* thermal high to low */
6863 			DRM_DEBUG("IH: thermal high to low\n");
6864 			rdev->pm.dpm.thermal.high_to_low = true;
6865 			queue_thermal = true;
6866 			break;
6867 		case 233: /* GUI IDLE */
6868 			DRM_DEBUG("IH: GUI idle\n");
6869 			break;
6870 		case 241: /* SDMA Privileged inst */
6871 		case 247: /* SDMA Privileged inst */
6872 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
6873 			/* XXX check the bitfield order! */
6874 			me_id = (ring_id & 0x3) >> 0;
6875 			queue_id = (ring_id & 0xc) >> 2;
6876 			switch (me_id) {
6877 			case 0:
6878 				switch (queue_id) {
6879 				case 0:
6880 					queue_reset = true;
6881 					break;
6882 				case 1:
6883 					/* XXX compute */
6884 					queue_reset = true;
6885 					break;
6886 				case 2:
6887 					/* XXX compute */
6888 					queue_reset = true;
6889 					break;
6890 				}
6891 				break;
6892 			case 1:
6893 				switch (queue_id) {
6894 				case 0:
6895 					queue_reset = true;
6896 					break;
6897 				case 1:
6898 					/* XXX compute */
6899 					queue_reset = true;
6900 					break;
6901 				case 2:
6902 					/* XXX compute */
6903 					queue_reset = true;
6904 					break;
6905 				}
6906 				break;
6907 			}
6908 			break;
6909 		default:
6910 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6911 			break;
6912 		}
6913 
6914 		/* wptr/rptr are in bytes! */
6915 		rptr += 16;
6916 		rptr &= rdev->ih.ptr_mask;
6917 	}
6918 	if (queue_hotplug)
6919 		schedule_work(&rdev->hotplug_work);
6920 	if (queue_reset)
6921 		schedule_work(&rdev->reset_work);
6922 	if (queue_thermal)
6923 		schedule_work(&rdev->pm.dpm.thermal.work);
6924 	rdev->ih.rptr = rptr;
6925 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6926 	atomic_set(&rdev->ih.lock, 0);
6927 
6928 	/* make sure wptr hasn't changed while processing */
6929 	wptr = cik_get_ih_wptr(rdev);
6930 	if (wptr != rptr)
6931 		goto restart_ih;
6932 
6933 	return IRQ_HANDLED;
6934 }
6935 
6936 /*
6937  * startup/shutdown callbacks
6938  */
6939 /**
6940  * cik_startup - program the asic to a functional state
6941  *
6942  * @rdev: radeon_device pointer
6943  *
6944  * Programs the asic to a functional state (CIK).
6945  * Called by cik_init() and cik_resume().
6946  * Returns 0 for success, error for failure.
6947  */
6948 static int cik_startup(struct radeon_device *rdev)
6949 {
6950 	struct radeon_ring *ring;
6951 	int r;
6952 
6953 	/* enable pcie gen2/3 link */
6954 	cik_pcie_gen3_enable(rdev);
6955 	/* enable aspm */
6956 	cik_program_aspm(rdev);
6957 
6958 	/* scratch needs to be initialized before MC */
6959 	r = r600_vram_scratch_init(rdev);
6960 	if (r)
6961 		return r;
6962 
6963 	cik_mc_program(rdev);
6964 
6965 	if (rdev->flags & RADEON_IS_IGP) {
6966 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6967 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
6968 			r = cik_init_microcode(rdev);
6969 			if (r) {
6970 				DRM_ERROR("Failed to load firmware!\n");
6971 				return r;
6972 			}
6973 		}
6974 	} else {
6975 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6976 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
6977 		    !rdev->mc_fw) {
6978 			r = cik_init_microcode(rdev);
6979 			if (r) {
6980 				DRM_ERROR("Failed to load firmware!\n");
6981 				return r;
6982 			}
6983 		}
6984 
6985 		r = ci_mc_load_microcode(rdev);
6986 		if (r) {
6987 			DRM_ERROR("Failed to load MC firmware!\n");
6988 			return r;
6989 		}
6990 	}
6991 
6992 	r = cik_pcie_gart_enable(rdev);
6993 	if (r)
6994 		return r;
6995 	cik_gpu_init(rdev);
6996 
6997 	/* allocate rlc buffers */
6998 	if (rdev->flags & RADEON_IS_IGP) {
6999 		if (rdev->family == CHIP_KAVERI) {
7000 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7001 			rdev->rlc.reg_list_size =
7002 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7003 		} else {
7004 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7005 			rdev->rlc.reg_list_size =
7006 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7007 		}
7008 	}
7009 	rdev->rlc.cs_data = ci_cs_data;
7010 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7011 	r = sumo_rlc_init(rdev);
7012 	if (r) {
7013 		DRM_ERROR("Failed to init rlc BOs!\n");
7014 		return r;
7015 	}
7016 
7017 	/* allocate wb buffer */
7018 	r = radeon_wb_init(rdev);
7019 	if (r)
7020 		return r;
7021 
7022 	/* allocate mec buffers */
7023 	r = cik_mec_init(rdev);
7024 	if (r) {
7025 		DRM_ERROR("Failed to init MEC BOs!\n");
7026 		return r;
7027 	}
7028 
7029 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7030 	if (r) {
7031 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7032 		return r;
7033 	}
7034 
7035 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7036 	if (r) {
7037 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7038 		return r;
7039 	}
7040 
7041 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7042 	if (r) {
7043 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7044 		return r;
7045 	}
7046 
7047 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7048 	if (r) {
7049 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7050 		return r;
7051 	}
7052 
7053 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7054 	if (r) {
7055 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7056 		return r;
7057 	}
7058 
7059 	r = radeon_uvd_resume(rdev);
7060 	if (!r) {
7061 		r = uvd_v4_2_resume(rdev);
7062 		if (!r) {
7063 			r = radeon_fence_driver_start_ring(rdev,
7064 							   R600_RING_TYPE_UVD_INDEX);
7065 			if (r)
7066 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7067 		}
7068 	}
7069 	if (r)
7070 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7071 
7072 	/* Enable IRQ */
7073 	if (!rdev->irq.installed) {
7074 		r = radeon_irq_kms_init(rdev);
7075 		if (r)
7076 			return r;
7077 	}
7078 
7079 	r = cik_irq_init(rdev);
7080 	if (r) {
7081 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7082 		radeon_irq_kms_fini(rdev);
7083 		return r;
7084 	}
7085 	cik_irq_set(rdev);
7086 
7087 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7088 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7089 			     CP_RB0_RPTR, CP_RB0_WPTR,
7090 			     RADEON_CP_PACKET2);
7091 	if (r)
7092 		return r;
7093 
7094 	/* set up the compute queues */
7095 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7096 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7097 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7098 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7099 			     PACKET3(PACKET3_NOP, 0x3FFF));
7100 	if (r)
7101 		return r;
7102 	ring->me = 1; /* first MEC */
7103 	ring->pipe = 0; /* first pipe */
7104 	ring->queue = 0; /* first queue */
7105 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7106 
7107 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7108 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7109 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7110 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
7111 			     PACKET3(PACKET3_NOP, 0x3FFF));
7112 	if (r)
7113 		return r;
7114 	/* dGPU only have 1 MEC */
7115 	ring->me = 1; /* first MEC */
7116 	ring->pipe = 0; /* first pipe */
7117 	ring->queue = 1; /* second queue */
7118 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7119 
7120 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7121 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7122 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
7123 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
7124 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7125 	if (r)
7126 		return r;
7127 
7128 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7129 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7130 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
7131 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
7132 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7133 	if (r)
7134 		return r;
7135 
7136 	r = cik_cp_resume(rdev);
7137 	if (r)
7138 		return r;
7139 
7140 	r = cik_sdma_resume(rdev);
7141 	if (r)
7142 		return r;
7143 
7144 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7145 	if (ring->ring_size) {
7146 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7147 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
7148 				     RADEON_CP_PACKET2);
7149 		if (!r)
7150 			r = uvd_v1_0_init(rdev);
7151 		if (r)
7152 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7153 	}
7154 
7155 	r = radeon_ib_pool_init(rdev);
7156 	if (r) {
7157 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7158 		return r;
7159 	}
7160 
7161 	r = radeon_vm_manager_init(rdev);
7162 	if (r) {
7163 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7164 		return r;
7165 	}
7166 
7167 	r = dce6_audio_init(rdev);
7168 	if (r)
7169 		return r;
7170 
7171 	return 0;
7172 }
7173 
7174 /**
7175  * cik_resume - resume the asic to a functional state
7176  *
7177  * @rdev: radeon_device pointer
7178  *
7179  * Programs the asic to a functional state (CIK).
7180  * Called at resume.
7181  * Returns 0 for success, error for failure.
7182  */
7183 int cik_resume(struct radeon_device *rdev)
7184 {
7185 	int r;
7186 
7187 	/* post card */
7188 	atom_asic_init(rdev->mode_info.atom_context);
7189 
7190 	/* init golden registers */
7191 	cik_init_golden_registers(rdev);
7192 
7193 	rdev->accel_working = true;
7194 	r = cik_startup(rdev);
7195 	if (r) {
7196 		DRM_ERROR("cik startup failed on resume\n");
7197 		rdev->accel_working = false;
7198 		return r;
7199 	}
7200 
7201 	return r;
7202 
7203 }
7204 
7205 /**
7206  * cik_suspend - suspend the asic
7207  *
7208  * @rdev: radeon_device pointer
7209  *
7210  * Bring the chip into a state suitable for suspend (CIK).
7211  * Called at suspend.
7212  * Returns 0 for success.
7213  */
7214 int cik_suspend(struct radeon_device *rdev)
7215 {
7216 	dce6_audio_fini(rdev);
7217 	radeon_vm_manager_fini(rdev);
7218 	cik_cp_enable(rdev, false);
7219 	cik_sdma_enable(rdev, false);
7220 	uvd_v1_0_fini(rdev);
7221 	radeon_uvd_suspend(rdev);
7222 	cik_fini_pg(rdev);
7223 	cik_fini_cg(rdev);
7224 	cik_irq_suspend(rdev);
7225 	radeon_wb_disable(rdev);
7226 	cik_pcie_gart_disable(rdev);
7227 	return 0;
7228 }
7229 
7230 /* Plan is to move initialization in that function and use
7231  * helper function so that radeon_device_init pretty much
7232  * do nothing more than calling asic specific function. This
7233  * should also allow to remove a bunch of callback function
7234  * like vram_info.
7235  */
7236 /**
7237  * cik_init - asic specific driver and hw init
7238  *
7239  * @rdev: radeon_device pointer
7240  *
7241  * Setup asic specific driver variables and program the hw
7242  * to a functional state (CIK).
7243  * Called at driver startup.
7244  * Returns 0 for success, errors for failure.
7245  */
7246 int cik_init(struct radeon_device *rdev)
7247 {
7248 	struct radeon_ring *ring;
7249 	int r;
7250 
7251 	/* Read BIOS */
7252 	if (!radeon_get_bios(rdev)) {
7253 		if (ASIC_IS_AVIVO(rdev))
7254 			return -EINVAL;
7255 	}
7256 	/* Must be an ATOMBIOS */
7257 	if (!rdev->is_atom_bios) {
7258 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7259 		return -EINVAL;
7260 	}
7261 	r = radeon_atombios_init(rdev);
7262 	if (r)
7263 		return r;
7264 
7265 	/* Post card if necessary */
7266 	if (!radeon_card_posted(rdev)) {
7267 		if (!rdev->bios) {
7268 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7269 			return -EINVAL;
7270 		}
7271 		DRM_INFO("GPU not posted. posting now...\n");
7272 		atom_asic_init(rdev->mode_info.atom_context);
7273 	}
7274 	/* init golden registers */
7275 	cik_init_golden_registers(rdev);
7276 	/* Initialize scratch registers */
7277 	cik_scratch_init(rdev);
7278 	/* Initialize surface registers */
7279 	radeon_surface_init(rdev);
7280 	/* Initialize clocks */
7281 	radeon_get_clock_info(rdev->ddev);
7282 
7283 	/* Fence driver */
7284 	r = radeon_fence_driver_init(rdev);
7285 	if (r)
7286 		return r;
7287 
7288 	/* initialize memory controller */
7289 	r = cik_mc_init(rdev);
7290 	if (r)
7291 		return r;
7292 	/* Memory manager */
7293 	r = radeon_bo_init(rdev);
7294 	if (r)
7295 		return r;
7296 
7297 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7298 	ring->ring_obj = NULL;
7299 	r600_ring_init(rdev, ring, 1024 * 1024);
7300 
7301 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7302 	ring->ring_obj = NULL;
7303 	r600_ring_init(rdev, ring, 1024 * 1024);
7304 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7305 	if (r)
7306 		return r;
7307 
7308 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7309 	ring->ring_obj = NULL;
7310 	r600_ring_init(rdev, ring, 1024 * 1024);
7311 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
7312 	if (r)
7313 		return r;
7314 
7315 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7316 	ring->ring_obj = NULL;
7317 	r600_ring_init(rdev, ring, 256 * 1024);
7318 
7319 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7320 	ring->ring_obj = NULL;
7321 	r600_ring_init(rdev, ring, 256 * 1024);
7322 
7323 	r = radeon_uvd_init(rdev);
7324 	if (!r) {
7325 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7326 		ring->ring_obj = NULL;
7327 		r600_ring_init(rdev, ring, 4096);
7328 	}
7329 
7330 	rdev->ih.ring_obj = NULL;
7331 	r600_ih_ring_init(rdev, 64 * 1024);
7332 
7333 	r = r600_pcie_gart_init(rdev);
7334 	if (r)
7335 		return r;
7336 
7337 	rdev->accel_working = true;
7338 	r = cik_startup(rdev);
7339 	if (r) {
7340 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7341 		cik_cp_fini(rdev);
7342 		cik_sdma_fini(rdev);
7343 		cik_irq_fini(rdev);
7344 		sumo_rlc_fini(rdev);
7345 		cik_mec_fini(rdev);
7346 		radeon_wb_fini(rdev);
7347 		radeon_ib_pool_fini(rdev);
7348 		radeon_vm_manager_fini(rdev);
7349 		radeon_irq_kms_fini(rdev);
7350 		cik_pcie_gart_fini(rdev);
7351 		rdev->accel_working = false;
7352 	}
7353 
7354 	/* Don't start up if the MC ucode is missing.
7355 	 * The default clocks and voltages before the MC ucode
7356 	 * is loaded are not suffient for advanced operations.
7357 	 */
7358 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
7359 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7360 		return -EINVAL;
7361 	}
7362 
7363 	return 0;
7364 }
7365 
7366 /**
7367  * cik_fini - asic specific driver and hw fini
7368  *
7369  * @rdev: radeon_device pointer
7370  *
7371  * Tear down the asic specific driver variables and program the hw
7372  * to an idle state (CIK).
7373  * Called at driver unload.
7374  */
7375 void cik_fini(struct radeon_device *rdev)
7376 {
7377 	cik_cp_fini(rdev);
7378 	cik_sdma_fini(rdev);
7379 	cik_fini_pg(rdev);
7380 	cik_fini_cg(rdev);
7381 	cik_irq_fini(rdev);
7382 	sumo_rlc_fini(rdev);
7383 	cik_mec_fini(rdev);
7384 	radeon_wb_fini(rdev);
7385 	radeon_vm_manager_fini(rdev);
7386 	radeon_ib_pool_fini(rdev);
7387 	radeon_irq_kms_fini(rdev);
7388 	uvd_v1_0_fini(rdev);
7389 	radeon_uvd_fini(rdev);
7390 	cik_pcie_gart_fini(rdev);
7391 	r600_vram_scratch_fini(rdev);
7392 	radeon_gem_fini(rdev);
7393 	radeon_fence_driver_fini(rdev);
7394 	radeon_bo_fini(rdev);
7395 	radeon_atombios_fini(rdev);
7396 	kfree(rdev->bios);
7397 	rdev->bios = NULL;
7398 }
7399 
7400 /* display watermark setup */
7401 /**
7402  * dce8_line_buffer_adjust - Set up the line buffer
7403  *
7404  * @rdev: radeon_device pointer
7405  * @radeon_crtc: the selected display controller
7406  * @mode: the current display mode on the selected display
7407  * controller
7408  *
7409  * Setup up the line buffer allocation for
7410  * the selected display controller (CIK).
7411  * Returns the line buffer size in pixels.
7412  */
7413 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
7414 				   struct radeon_crtc *radeon_crtc,
7415 				   struct drm_display_mode *mode)
7416 {
7417 	u32 tmp, buffer_alloc, i;
7418 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
7419 	/*
7420 	 * Line Buffer Setup
7421 	 * There are 6 line buffers, one for each display controllers.
7422 	 * There are 3 partitions per LB. Select the number of partitions
7423 	 * to enable based on the display width.  For display widths larger
7424 	 * than 4096, you need use to use 2 display controllers and combine
7425 	 * them using the stereo blender.
7426 	 */
7427 	if (radeon_crtc->base.enabled && mode) {
7428 		if (mode->crtc_hdisplay < 1920) {
7429 			tmp = 1;
7430 			buffer_alloc = 2;
7431 		} else if (mode->crtc_hdisplay < 2560) {
7432 			tmp = 2;
7433 			buffer_alloc = 2;
7434 		} else if (mode->crtc_hdisplay < 4096) {
7435 			tmp = 0;
7436 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7437 		} else {
7438 			DRM_DEBUG_KMS("Mode too big for LB!\n");
7439 			tmp = 0;
7440 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
7441 		}
7442 	} else {
7443 		tmp = 1;
7444 		buffer_alloc = 0;
7445 	}
7446 
7447 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
7448 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
7449 
7450 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
7451 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
7452 	for (i = 0; i < rdev->usec_timeout; i++) {
7453 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
7454 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
7455 			break;
7456 		udelay(1);
7457 	}
7458 
7459 	if (radeon_crtc->base.enabled && mode) {
7460 		switch (tmp) {
7461 		case 0:
7462 		default:
7463 			return 4096 * 2;
7464 		case 1:
7465 			return 1920 * 2;
7466 		case 2:
7467 			return 2560 * 2;
7468 		}
7469 	}
7470 
7471 	/* controller not enabled, so no lb used */
7472 	return 0;
7473 }
7474 
7475 /**
7476  * cik_get_number_of_dram_channels - get the number of dram channels
7477  *
7478  * @rdev: radeon_device pointer
7479  *
7480  * Look up the number of video ram channels (CIK).
7481  * Used for display watermark bandwidth calculations
7482  * Returns the number of dram channels
7483  */
7484 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
7485 {
7486 	u32 tmp = RREG32(MC_SHARED_CHMAP);
7487 
7488 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
7489 	case 0:
7490 	default:
7491 		return 1;
7492 	case 1:
7493 		return 2;
7494 	case 2:
7495 		return 4;
7496 	case 3:
7497 		return 8;
7498 	case 4:
7499 		return 3;
7500 	case 5:
7501 		return 6;
7502 	case 6:
7503 		return 10;
7504 	case 7:
7505 		return 12;
7506 	case 8:
7507 		return 16;
7508 	}
7509 }
7510 
7511 struct dce8_wm_params {
7512 	u32 dram_channels; /* number of dram channels */
7513 	u32 yclk;          /* bandwidth per dram data pin in kHz */
7514 	u32 sclk;          /* engine clock in kHz */
7515 	u32 disp_clk;      /* display clock in kHz */
7516 	u32 src_width;     /* viewport width */
7517 	u32 active_time;   /* active display time in ns */
7518 	u32 blank_time;    /* blank time in ns */
7519 	bool interlaced;    /* mode is interlaced */
7520 	fixed20_12 vsc;    /* vertical scale ratio */
7521 	u32 num_heads;     /* number of active crtcs */
7522 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
7523 	u32 lb_size;       /* line buffer allocated to pipe */
7524 	u32 vtaps;         /* vertical scaler taps */
7525 };
7526 
7527 /**
7528  * dce8_dram_bandwidth - get the dram bandwidth
7529  *
7530  * @wm: watermark calculation data
7531  *
7532  * Calculate the raw dram bandwidth (CIK).
7533  * Used for display watermark bandwidth calculations
7534  * Returns the dram bandwidth in MBytes/s
7535  */
7536 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
7537 {
7538 	/* Calculate raw DRAM Bandwidth */
7539 	fixed20_12 dram_efficiency; /* 0.7 */
7540 	fixed20_12 yclk, dram_channels, bandwidth;
7541 	fixed20_12 a;
7542 
7543 	a.full = dfixed_const(1000);
7544 	yclk.full = dfixed_const(wm->yclk);
7545 	yclk.full = dfixed_div(yclk, a);
7546 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7547 	a.full = dfixed_const(10);
7548 	dram_efficiency.full = dfixed_const(7);
7549 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
7550 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7551 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
7552 
7553 	return dfixed_trunc(bandwidth);
7554 }
7555 
7556 /**
7557  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
7558  *
7559  * @wm: watermark calculation data
7560  *
7561  * Calculate the dram bandwidth used for display (CIK).
7562  * Used for display watermark bandwidth calculations
7563  * Returns the dram bandwidth for display in MBytes/s
7564  */
7565 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7566 {
7567 	/* Calculate DRAM Bandwidth and the part allocated to display. */
7568 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
7569 	fixed20_12 yclk, dram_channels, bandwidth;
7570 	fixed20_12 a;
7571 
7572 	a.full = dfixed_const(1000);
7573 	yclk.full = dfixed_const(wm->yclk);
7574 	yclk.full = dfixed_div(yclk, a);
7575 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
7576 	a.full = dfixed_const(10);
7577 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
7578 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
7579 	bandwidth.full = dfixed_mul(dram_channels, yclk);
7580 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
7581 
7582 	return dfixed_trunc(bandwidth);
7583 }
7584 
7585 /**
7586  * dce8_data_return_bandwidth - get the data return bandwidth
7587  *
7588  * @wm: watermark calculation data
7589  *
7590  * Calculate the data return bandwidth used for display (CIK).
7591  * Used for display watermark bandwidth calculations
7592  * Returns the data return bandwidth in MBytes/s
7593  */
7594 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
7595 {
7596 	/* Calculate the display Data return Bandwidth */
7597 	fixed20_12 return_efficiency; /* 0.8 */
7598 	fixed20_12 sclk, bandwidth;
7599 	fixed20_12 a;
7600 
7601 	a.full = dfixed_const(1000);
7602 	sclk.full = dfixed_const(wm->sclk);
7603 	sclk.full = dfixed_div(sclk, a);
7604 	a.full = dfixed_const(10);
7605 	return_efficiency.full = dfixed_const(8);
7606 	return_efficiency.full = dfixed_div(return_efficiency, a);
7607 	a.full = dfixed_const(32);
7608 	bandwidth.full = dfixed_mul(a, sclk);
7609 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
7610 
7611 	return dfixed_trunc(bandwidth);
7612 }
7613 
7614 /**
7615  * dce8_dmif_request_bandwidth - get the dmif bandwidth
7616  *
7617  * @wm: watermark calculation data
7618  *
7619  * Calculate the dmif bandwidth used for display (CIK).
7620  * Used for display watermark bandwidth calculations
7621  * Returns the dmif bandwidth in MBytes/s
7622  */
7623 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
7624 {
7625 	/* Calculate the DMIF Request Bandwidth */
7626 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
7627 	fixed20_12 disp_clk, bandwidth;
7628 	fixed20_12 a, b;
7629 
7630 	a.full = dfixed_const(1000);
7631 	disp_clk.full = dfixed_const(wm->disp_clk);
7632 	disp_clk.full = dfixed_div(disp_clk, a);
7633 	a.full = dfixed_const(32);
7634 	b.full = dfixed_mul(a, disp_clk);
7635 
7636 	a.full = dfixed_const(10);
7637 	disp_clk_request_efficiency.full = dfixed_const(8);
7638 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
7639 
7640 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
7641 
7642 	return dfixed_trunc(bandwidth);
7643 }
7644 
7645 /**
7646  * dce8_available_bandwidth - get the min available bandwidth
7647  *
7648  * @wm: watermark calculation data
7649  *
7650  * Calculate the min available bandwidth used for display (CIK).
7651  * Used for display watermark bandwidth calculations
7652  * Returns the min available bandwidth in MBytes/s
7653  */
7654 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
7655 {
7656 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
7657 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
7658 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
7659 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
7660 
7661 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
7662 }
7663 
7664 /**
7665  * dce8_average_bandwidth - get the average available bandwidth
7666  *
7667  * @wm: watermark calculation data
7668  *
7669  * Calculate the average available bandwidth used for display (CIK).
7670  * Used for display watermark bandwidth calculations
7671  * Returns the average available bandwidth in MBytes/s
7672  */
7673 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
7674 {
7675 	/* Calculate the display mode Average Bandwidth
7676 	 * DisplayMode should contain the source and destination dimensions,
7677 	 * timing, etc.
7678 	 */
7679 	fixed20_12 bpp;
7680 	fixed20_12 line_time;
7681 	fixed20_12 src_width;
7682 	fixed20_12 bandwidth;
7683 	fixed20_12 a;
7684 
7685 	a.full = dfixed_const(1000);
7686 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
7687 	line_time.full = dfixed_div(line_time, a);
7688 	bpp.full = dfixed_const(wm->bytes_per_pixel);
7689 	src_width.full = dfixed_const(wm->src_width);
7690 	bandwidth.full = dfixed_mul(src_width, bpp);
7691 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
7692 	bandwidth.full = dfixed_div(bandwidth, line_time);
7693 
7694 	return dfixed_trunc(bandwidth);
7695 }
7696 
7697 /**
7698  * dce8_latency_watermark - get the latency watermark
7699  *
7700  * @wm: watermark calculation data
7701  *
7702  * Calculate the latency watermark (CIK).
7703  * Used for display watermark bandwidth calculations
7704  * Returns the latency watermark in ns
7705  */
7706 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
7707 {
7708 	/* First calculate the latency in ns */
7709 	u32 mc_latency = 2000; /* 2000 ns. */
7710 	u32 available_bandwidth = dce8_available_bandwidth(wm);
7711 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
7712 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
7713 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
7714 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
7715 		(wm->num_heads * cursor_line_pair_return_time);
7716 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
7717 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
7718 	u32 tmp, dmif_size = 12288;
7719 	fixed20_12 a, b, c;
7720 
7721 	if (wm->num_heads == 0)
7722 		return 0;
7723 
7724 	a.full = dfixed_const(2);
7725 	b.full = dfixed_const(1);
7726 	if ((wm->vsc.full > a.full) ||
7727 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
7728 	    (wm->vtaps >= 5) ||
7729 	    ((wm->vsc.full >= a.full) && wm->interlaced))
7730 		max_src_lines_per_dst_line = 4;
7731 	else
7732 		max_src_lines_per_dst_line = 2;
7733 
7734 	a.full = dfixed_const(available_bandwidth);
7735 	b.full = dfixed_const(wm->num_heads);
7736 	a.full = dfixed_div(a, b);
7737 
7738 	b.full = dfixed_const(mc_latency + 512);
7739 	c.full = dfixed_const(wm->disp_clk);
7740 	b.full = dfixed_div(b, c);
7741 
7742 	c.full = dfixed_const(dmif_size);
7743 	b.full = dfixed_div(c, b);
7744 
7745 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
7746 
7747 	b.full = dfixed_const(1000);
7748 	c.full = dfixed_const(wm->disp_clk);
7749 	b.full = dfixed_div(c, b);
7750 	c.full = dfixed_const(wm->bytes_per_pixel);
7751 	b.full = dfixed_mul(b, c);
7752 
7753 	lb_fill_bw = min(tmp, dfixed_trunc(b));
7754 
7755 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
7756 	b.full = dfixed_const(1000);
7757 	c.full = dfixed_const(lb_fill_bw);
7758 	b.full = dfixed_div(c, b);
7759 	a.full = dfixed_div(a, b);
7760 	line_fill_time = dfixed_trunc(a);
7761 
7762 	if (line_fill_time < wm->active_time)
7763 		return latency;
7764 	else
7765 		return latency + (line_fill_time - wm->active_time);
7766 
7767 }
7768 
7769 /**
7770  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
7771  * average and available dram bandwidth
7772  *
7773  * @wm: watermark calculation data
7774  *
7775  * Check if the display average bandwidth fits in the display
7776  * dram bandwidth (CIK).
7777  * Used for display watermark bandwidth calculations
7778  * Returns true if the display fits, false if not.
7779  */
7780 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
7781 {
7782 	if (dce8_average_bandwidth(wm) <=
7783 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
7784 		return true;
7785 	else
7786 		return false;
7787 }
7788 
7789 /**
7790  * dce8_average_bandwidth_vs_available_bandwidth - check
7791  * average and available bandwidth
7792  *
7793  * @wm: watermark calculation data
7794  *
7795  * Check if the display average bandwidth fits in the display
7796  * available bandwidth (CIK).
7797  * Used for display watermark bandwidth calculations
7798  * Returns true if the display fits, false if not.
7799  */
7800 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
7801 {
7802 	if (dce8_average_bandwidth(wm) <=
7803 	    (dce8_available_bandwidth(wm) / wm->num_heads))
7804 		return true;
7805 	else
7806 		return false;
7807 }
7808 
7809 /**
7810  * dce8_check_latency_hiding - check latency hiding
7811  *
7812  * @wm: watermark calculation data
7813  *
7814  * Check latency hiding (CIK).
7815  * Used for display watermark bandwidth calculations
7816  * Returns true if the display fits, false if not.
7817  */
7818 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
7819 {
7820 	u32 lb_partitions = wm->lb_size / wm->src_width;
7821 	u32 line_time = wm->active_time + wm->blank_time;
7822 	u32 latency_tolerant_lines;
7823 	u32 latency_hiding;
7824 	fixed20_12 a;
7825 
7826 	a.full = dfixed_const(1);
7827 	if (wm->vsc.full > a.full)
7828 		latency_tolerant_lines = 1;
7829 	else {
7830 		if (lb_partitions <= (wm->vtaps + 1))
7831 			latency_tolerant_lines = 1;
7832 		else
7833 			latency_tolerant_lines = 2;
7834 	}
7835 
7836 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
7837 
7838 	if (dce8_latency_watermark(wm) <= latency_hiding)
7839 		return true;
7840 	else
7841 		return false;
7842 }
7843 
7844 /**
7845  * dce8_program_watermarks - program display watermarks
7846  *
7847  * @rdev: radeon_device pointer
7848  * @radeon_crtc: the selected display controller
7849  * @lb_size: line buffer size
7850  * @num_heads: number of display controllers in use
7851  *
7852  * Calculate and program the display watermarks for the
7853  * selected display controller (CIK).
7854  */
7855 static void dce8_program_watermarks(struct radeon_device *rdev,
7856 				    struct radeon_crtc *radeon_crtc,
7857 				    u32 lb_size, u32 num_heads)
7858 {
7859 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
7860 	struct dce8_wm_params wm_low, wm_high;
7861 	u32 pixel_period;
7862 	u32 line_time = 0;
7863 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
7864 	u32 tmp, wm_mask;
7865 
7866 	if (radeon_crtc->base.enabled && num_heads && mode) {
7867 		pixel_period = 1000000 / (u32)mode->clock;
7868 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
7869 
7870 		/* watermark for high clocks */
7871 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7872 		    rdev->pm.dpm_enabled) {
7873 			wm_high.yclk =
7874 				radeon_dpm_get_mclk(rdev, false) * 10;
7875 			wm_high.sclk =
7876 				radeon_dpm_get_sclk(rdev, false) * 10;
7877 		} else {
7878 			wm_high.yclk = rdev->pm.current_mclk * 10;
7879 			wm_high.sclk = rdev->pm.current_sclk * 10;
7880 		}
7881 
7882 		wm_high.disp_clk = mode->clock;
7883 		wm_high.src_width = mode->crtc_hdisplay;
7884 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
7885 		wm_high.blank_time = line_time - wm_high.active_time;
7886 		wm_high.interlaced = false;
7887 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7888 			wm_high.interlaced = true;
7889 		wm_high.vsc = radeon_crtc->vsc;
7890 		wm_high.vtaps = 1;
7891 		if (radeon_crtc->rmx_type != RMX_OFF)
7892 			wm_high.vtaps = 2;
7893 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
7894 		wm_high.lb_size = lb_size;
7895 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
7896 		wm_high.num_heads = num_heads;
7897 
7898 		/* set for high clocks */
7899 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
7900 
7901 		/* possibly force display priority to high */
7902 		/* should really do this at mode validation time... */
7903 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
7904 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
7905 		    !dce8_check_latency_hiding(&wm_high) ||
7906 		    (rdev->disp_priority == 2)) {
7907 			DRM_DEBUG_KMS("force priority to high\n");
7908 		}
7909 
7910 		/* watermark for low clocks */
7911 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
7912 		    rdev->pm.dpm_enabled) {
7913 			wm_low.yclk =
7914 				radeon_dpm_get_mclk(rdev, true) * 10;
7915 			wm_low.sclk =
7916 				radeon_dpm_get_sclk(rdev, true) * 10;
7917 		} else {
7918 			wm_low.yclk = rdev->pm.current_mclk * 10;
7919 			wm_low.sclk = rdev->pm.current_sclk * 10;
7920 		}
7921 
7922 		wm_low.disp_clk = mode->clock;
7923 		wm_low.src_width = mode->crtc_hdisplay;
7924 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
7925 		wm_low.blank_time = line_time - wm_low.active_time;
7926 		wm_low.interlaced = false;
7927 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
7928 			wm_low.interlaced = true;
7929 		wm_low.vsc = radeon_crtc->vsc;
7930 		wm_low.vtaps = 1;
7931 		if (radeon_crtc->rmx_type != RMX_OFF)
7932 			wm_low.vtaps = 2;
7933 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
7934 		wm_low.lb_size = lb_size;
7935 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
7936 		wm_low.num_heads = num_heads;
7937 
7938 		/* set for low clocks */
7939 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
7940 
7941 		/* possibly force display priority to high */
7942 		/* should really do this at mode validation time... */
7943 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
7944 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
7945 		    !dce8_check_latency_hiding(&wm_low) ||
7946 		    (rdev->disp_priority == 2)) {
7947 			DRM_DEBUG_KMS("force priority to high\n");
7948 		}
7949 	}
7950 
7951 	/* select wm A */
7952 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7953 	tmp = wm_mask;
7954 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7955 	tmp |= LATENCY_WATERMARK_MASK(1);
7956 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7957 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7958 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
7959 		LATENCY_HIGH_WATERMARK(line_time)));
7960 	/* select wm B */
7961 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
7962 	tmp &= ~LATENCY_WATERMARK_MASK(3);
7963 	tmp |= LATENCY_WATERMARK_MASK(2);
7964 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
7965 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
7966 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
7967 		LATENCY_HIGH_WATERMARK(line_time)));
7968 	/* restore original selection */
7969 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
7970 
7971 	/* save values for DPM */
7972 	radeon_crtc->line_time = line_time;
7973 	radeon_crtc->wm_high = latency_watermark_a;
7974 	radeon_crtc->wm_low = latency_watermark_b;
7975 }
7976 
7977 /**
7978  * dce8_bandwidth_update - program display watermarks
7979  *
7980  * @rdev: radeon_device pointer
7981  *
7982  * Calculate and program the display watermarks and line
7983  * buffer allocation (CIK).
7984  */
7985 void dce8_bandwidth_update(struct radeon_device *rdev)
7986 {
7987 	struct drm_display_mode *mode = NULL;
7988 	u32 num_heads = 0, lb_size;
7989 	int i;
7990 
7991 	radeon_update_display_priority(rdev);
7992 
7993 	for (i = 0; i < rdev->num_crtc; i++) {
7994 		if (rdev->mode_info.crtcs[i]->base.enabled)
7995 			num_heads++;
7996 	}
7997 	for (i = 0; i < rdev->num_crtc; i++) {
7998 		mode = &rdev->mode_info.crtcs[i]->base.mode;
7999 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8000 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8001 	}
8002 }
8003 
8004 /**
8005  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8006  *
8007  * @rdev: radeon_device pointer
8008  *
8009  * Fetches a GPU clock counter snapshot (SI).
8010  * Returns the 64 bit clock counter snapshot.
8011  */
8012 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8013 {
8014 	uint64_t clock;
8015 
8016 	mutex_lock(&rdev->gpu_clock_mutex);
8017 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8018 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8019 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8020 	mutex_unlock(&rdev->gpu_clock_mutex);
8021 	return clock;
8022 }
8023 
8024 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8025                               u32 cntl_reg, u32 status_reg)
8026 {
8027 	int r, i;
8028 	struct atom_clock_dividers dividers;
8029 	uint32_t tmp;
8030 
8031 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8032 					   clock, false, &dividers);
8033 	if (r)
8034 		return r;
8035 
8036 	tmp = RREG32_SMC(cntl_reg);
8037 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8038 	tmp |= dividers.post_divider;
8039 	WREG32_SMC(cntl_reg, tmp);
8040 
8041 	for (i = 0; i < 100; i++) {
8042 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8043 			break;
8044 		mdelay(10);
8045 	}
8046 	if (i == 100)
8047 		return -ETIMEDOUT;
8048 
8049 	return 0;
8050 }
8051 
8052 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8053 {
8054 	int r = 0;
8055 
8056 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8057 	if (r)
8058 		return r;
8059 
8060 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8061 	return r;
8062 }
8063 
8064 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8065 {
8066 	struct pci_dev *root = rdev->pdev->bus->self;
8067 	int bridge_pos, gpu_pos;
8068 	u32 speed_cntl, mask, current_data_rate;
8069 	int ret, i;
8070 	u16 tmp16;
8071 
8072 	if (radeon_pcie_gen2 == 0)
8073 		return;
8074 
8075 	if (rdev->flags & RADEON_IS_IGP)
8076 		return;
8077 
8078 	if (!(rdev->flags & RADEON_IS_PCIE))
8079 		return;
8080 
8081 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8082 	if (ret != 0)
8083 		return;
8084 
8085 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8086 		return;
8087 
8088 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8089 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8090 		LC_CURRENT_DATA_RATE_SHIFT;
8091 	if (mask & DRM_PCIE_SPEED_80) {
8092 		if (current_data_rate == 2) {
8093 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8094 			return;
8095 		}
8096 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8097 	} else if (mask & DRM_PCIE_SPEED_50) {
8098 		if (current_data_rate == 1) {
8099 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8100 			return;
8101 		}
8102 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8103 	}
8104 
8105 	bridge_pos = pci_pcie_cap(root);
8106 	if (!bridge_pos)
8107 		return;
8108 
8109 	gpu_pos = pci_pcie_cap(rdev->pdev);
8110 	if (!gpu_pos)
8111 		return;
8112 
8113 	if (mask & DRM_PCIE_SPEED_80) {
8114 		/* re-try equalization if gen3 is not already enabled */
8115 		if (current_data_rate != 2) {
8116 			u16 bridge_cfg, gpu_cfg;
8117 			u16 bridge_cfg2, gpu_cfg2;
8118 			u32 max_lw, current_lw, tmp;
8119 
8120 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8121 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8122 
8123 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8124 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8125 
8126 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8127 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8128 
8129 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8130 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8131 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8132 
8133 			if (current_lw < max_lw) {
8134 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8135 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8136 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8137 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8138 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8139 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8140 				}
8141 			}
8142 
8143 			for (i = 0; i < 10; i++) {
8144 				/* check status */
8145 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8146 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8147 					break;
8148 
8149 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8150 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8151 
8152 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8153 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8154 
8155 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8156 				tmp |= LC_SET_QUIESCE;
8157 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8158 
8159 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8160 				tmp |= LC_REDO_EQ;
8161 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8162 
8163 				mdelay(100);
8164 
8165 				/* linkctl */
8166 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8167 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8168 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8169 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8170 
8171 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8172 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8173 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8174 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8175 
8176 				/* linkctl2 */
8177 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8178 				tmp16 &= ~((1 << 4) | (7 << 9));
8179 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8180 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8181 
8182 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8183 				tmp16 &= ~((1 << 4) | (7 << 9));
8184 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8185 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8186 
8187 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8188 				tmp &= ~LC_SET_QUIESCE;
8189 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8190 			}
8191 		}
8192 	}
8193 
8194 	/* set the link speed */
8195 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
8196 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
8197 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8198 
8199 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8200 	tmp16 &= ~0xf;
8201 	if (mask & DRM_PCIE_SPEED_80)
8202 		tmp16 |= 3; /* gen3 */
8203 	else if (mask & DRM_PCIE_SPEED_50)
8204 		tmp16 |= 2; /* gen2 */
8205 	else
8206 		tmp16 |= 1; /* gen1 */
8207 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8208 
8209 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8210 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
8211 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
8212 
8213 	for (i = 0; i < rdev->usec_timeout; i++) {
8214 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8215 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
8216 			break;
8217 		udelay(1);
8218 	}
8219 }
8220 
8221 static void cik_program_aspm(struct radeon_device *rdev)
8222 {
8223 	u32 data, orig;
8224 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
8225 	bool disable_clkreq = false;
8226 
8227 	if (radeon_aspm == 0)
8228 		return;
8229 
8230 	/* XXX double check IGPs */
8231 	if (rdev->flags & RADEON_IS_IGP)
8232 		return;
8233 
8234 	if (!(rdev->flags & RADEON_IS_PCIE))
8235 		return;
8236 
8237 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8238 	data &= ~LC_XMIT_N_FTS_MASK;
8239 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
8240 	if (orig != data)
8241 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
8242 
8243 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
8244 	data |= LC_GO_TO_RECOVERY;
8245 	if (orig != data)
8246 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
8247 
8248 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
8249 	data |= P_IGNORE_EDB_ERR;
8250 	if (orig != data)
8251 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
8252 
8253 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8254 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
8255 	data |= LC_PMI_TO_L1_DIS;
8256 	if (!disable_l0s)
8257 		data |= LC_L0S_INACTIVITY(7);
8258 
8259 	if (!disable_l1) {
8260 		data |= LC_L1_INACTIVITY(7);
8261 		data &= ~LC_PMI_TO_L1_DIS;
8262 		if (orig != data)
8263 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8264 
8265 		if (!disable_plloff_in_l1) {
8266 			bool clk_req_support;
8267 
8268 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
8269 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8270 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8271 			if (orig != data)
8272 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
8273 
8274 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
8275 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8276 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8277 			if (orig != data)
8278 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
8279 
8280 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
8281 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
8282 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
8283 			if (orig != data)
8284 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
8285 
8286 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
8287 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
8288 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
8289 			if (orig != data)
8290 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
8291 
8292 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8293 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
8294 			data |= LC_DYN_LANES_PWR_STATE(3);
8295 			if (orig != data)
8296 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
8297 
8298 			if (!disable_clkreq) {
8299 				struct pci_dev *root = rdev->pdev->bus->self;
8300 				u32 lnkcap;
8301 
8302 				clk_req_support = false;
8303 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
8304 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
8305 					clk_req_support = true;
8306 			} else {
8307 				clk_req_support = false;
8308 			}
8309 
8310 			if (clk_req_support) {
8311 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
8312 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
8313 				if (orig != data)
8314 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
8315 
8316 				orig = data = RREG32_SMC(THM_CLK_CNTL);
8317 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
8318 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
8319 				if (orig != data)
8320 					WREG32_SMC(THM_CLK_CNTL, data);
8321 
8322 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
8323 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
8324 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
8325 				if (orig != data)
8326 					WREG32_SMC(MISC_CLK_CTRL, data);
8327 
8328 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
8329 				data &= ~BCLK_AS_XCLK;
8330 				if (orig != data)
8331 					WREG32_SMC(CG_CLKPIN_CNTL, data);
8332 
8333 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
8334 				data &= ~FORCE_BIF_REFCLK_EN;
8335 				if (orig != data)
8336 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
8337 
8338 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
8339 				data &= ~MPLL_CLKOUT_SEL_MASK;
8340 				data |= MPLL_CLKOUT_SEL(4);
8341 				if (orig != data)
8342 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
8343 			}
8344 		}
8345 	} else {
8346 		if (orig != data)
8347 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8348 	}
8349 
8350 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
8351 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
8352 	if (orig != data)
8353 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
8354 
8355 	if (!disable_l0s) {
8356 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
8357 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
8358 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8359 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
8360 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
8361 				data &= ~LC_L0S_INACTIVITY_MASK;
8362 				if (orig != data)
8363 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
8364 			}
8365 		}
8366 	}
8367 }
8368