xref: /linux/drivers/gpu/drm/radeon/cik.c (revision 9da8320bb97768e35f2e64fa7642015271d672eb)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182 	unsigned long flags;
183 	u32 r;
184 
185 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186 	WREG32(CIK_DIDT_IND_INDEX, (reg));
187 	r = RREG32(CIK_DIDT_IND_DATA);
188 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189 	return r;
190 }
191 
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194 	unsigned long flags;
195 
196 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197 	WREG32(CIK_DIDT_IND_INDEX, (reg));
198 	WREG32(CIK_DIDT_IND_DATA, (v));
199 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201 
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205 	u32 temp;
206 	int actual_temp = 0;
207 
208 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209 		CTF_TEMP_SHIFT;
210 
211 	if (temp & 0x200)
212 		actual_temp = 255;
213 	else
214 		actual_temp = temp & 0x1ff;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224 	u32 temp;
225 	int actual_temp = 0;
226 
227 	temp = RREG32_SMC(0xC0300E0C);
228 
229 	if (temp)
230 		actual_temp = (temp / 8) - 49;
231 	else
232 		actual_temp = 0;
233 
234 	actual_temp = actual_temp * 1000;
235 
236 	return actual_temp;
237 }
238 
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244 	unsigned long flags;
245 	u32 r;
246 
247 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248 	WREG32(PCIE_INDEX, reg);
249 	(void)RREG32(PCIE_INDEX);
250 	r = RREG32(PCIE_DATA);
251 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252 	return r;
253 }
254 
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257 	unsigned long flags;
258 
259 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260 	WREG32(PCIE_INDEX, reg);
261 	(void)RREG32(PCIE_INDEX);
262 	WREG32(PCIE_DATA, v);
263 	(void)RREG32(PCIE_DATA);
264 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266 
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269 	(0x0e00 << 16) | (0xc12c >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc140 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc150 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc15c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc168 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc170 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc178 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc204 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc2b4 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b8 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2bc >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2c0 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x8228 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x829c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x869c >> 2),
298 	0x00000000,
299 	(0x0600 << 16) | (0x98f4 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x98f8 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x9900 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc260 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x3c000 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c00c >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x8c1c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x9700 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x89bc >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x8900 >> 2),
338 	0x00000000,
339 	0x3,
340 	(0x0e00 << 16) | (0xc130 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc134 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc1fc >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc208 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc264 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc268 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc26c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc270 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc274 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc278 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc27c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc280 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc284 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc288 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc28c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc290 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc294 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc298 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc29c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc2a0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a4 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a8 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2ac  >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2b0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x301d0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x30238 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30250 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30254 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30258 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x3025c >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x4e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0x5e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x6e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x7e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x8e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x9e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0xae00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xbe00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xc99c >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x9834 >> 2),
483 	0x00000000,
484 	(0x0000 << 16) | (0x30f00 >> 2),
485 	0x00000000,
486 	(0x0001 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f04 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f08 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f0c >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x9b7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8a14 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a00 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8bf0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bcc >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8b24 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x30a04 >> 2),
515 	0x00000000,
516 	(0x0600 << 16) | (0x30a10 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a14 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a18 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a2c >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xc700 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc704 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc708 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc768 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc770 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc774 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc778 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc77c >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc780 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc784 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc788 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc78c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc798 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc79c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7a0 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a4 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a8 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7ac >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7b0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x9100 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c010 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92a8 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92ac >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92b4 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b8 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92bc >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92c0 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92cc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92d0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x8c00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c04 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c20 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c38 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c3c >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xae00 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x9604 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac08 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac0c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac10 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac14 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac58 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac68 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac6c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac70 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac74 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac78 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac7c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac80 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac84 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac88 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac8c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x970c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9714 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9718 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x971c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x8e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x9e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0xae00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xbe00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd10 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd14 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88b0 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b4 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b8 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88bc >> 2),
669 	0x00000000,
670 	(0x0400 << 16) | (0x89c0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88c4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88d0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d4 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d8 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x8980 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30938 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3093c >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30940 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x89a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30900 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30904 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x89b4 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c210 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c214 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c218 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x8904 >> 2),
705 	0x00000000,
706 	0x5,
707 	(0x0e00 << 16) | (0x8c28 >> 2),
708 	(0x0e00 << 16) | (0x8c2c >> 2),
709 	(0x0e00 << 16) | (0x8c30 >> 2),
710 	(0x0e00 << 16) | (0x8c34 >> 2),
711 	(0x0e00 << 16) | (0x9600 >> 2),
712 };
713 
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716 	(0x0e00 << 16) | (0xc12c >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc140 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc150 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc15c >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc168 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc170 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc204 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2b4 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b8 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2bc >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2c0 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x8228 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x829c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x869c >> 2),
743 	0x00000000,
744 	(0x0600 << 16) | (0x98f4 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x98f8 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9900 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc260 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x90e8 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x3c000 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c00c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8c1c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9700 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xcd20 >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x89bc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8900 >> 2),
775 	0x00000000,
776 	0x3,
777 	(0x0e00 << 16) | (0xc130 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc134 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc1fc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc208 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc264 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc268 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc26c >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc270 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc274 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc28c >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc290 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc294 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc298 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc2a0 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a4 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a8 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2ac >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x301d0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x30238 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30250 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30254 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30258 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x3025c >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc900 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc904 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc908 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc90c >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc910 >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xc99c >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x9834 >> 2),
866 	0x00000000,
867 	(0x0000 << 16) | (0x30f00 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f04 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f08 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f0c >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x9b7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8a14 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a00 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8bf0 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bcc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8b24 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30a04 >> 2),
890 	0x00000000,
891 	(0x0600 << 16) | (0x30a10 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a14 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a18 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a2c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xc700 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc704 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc708 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc768 >> 2),
906 	0x00000000,
907 	(0x0400 << 16) | (0xc770 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc774 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc798 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc79c >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9100 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3c010 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x8c00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c04 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c20 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c38 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c3c >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xae00 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x9604 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac08 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac0c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac10 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac14 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac58 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac68 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac6c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac70 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac74 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac78 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac7c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac80 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac84 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac88 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac8c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x970c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x9714 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9718 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x971c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x31068 >> 2),
972 	0x00000000,
973 	(0x4e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x5e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x6e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x7e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0xcd10 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd14 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88b0 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b4 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b8 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88bc >> 2),
992 	0x00000000,
993 	(0x0400 << 16) | (0x89c0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88c4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88d0 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d4 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d8 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x8980 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30938 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3093c >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30940 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x89a0 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30900 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30904 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x89b4 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x3e1fc >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3c210 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c214 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c218 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x8904 >> 2),
1030 	0x00000000,
1031 	0x5,
1032 	(0x0e00 << 16) | (0x8c28 >> 2),
1033 	(0x0e00 << 16) | (0x8c2c >> 2),
1034 	(0x0e00 << 16) | (0x8c30 >> 2),
1035 	(0x0e00 << 16) | (0x8c34 >> 2),
1036 	(0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038 
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041 	0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043 
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046 	0xc770, 0xffffffff, 0x00000800,
1047 	0xc774, 0xffffffff, 0x00000800,
1048 	0xc798, 0xffffffff, 0x00007fbf,
1049 	0xc79c, 0xffffffff, 0x00007faf
1050 };
1051 
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054 	0x3354, 0x00000333, 0x00000333,
1055 	0x3350, 0x000c0fc0, 0x00040200,
1056 	0x9a10, 0x00010000, 0x00058208,
1057 	0x3c000, 0xffff1fff, 0x00140000,
1058 	0x3c200, 0xfdfc0fff, 0x00000100,
1059 	0x3c234, 0x40000000, 0x40000200,
1060 	0x9830, 0xffffffff, 0x00000000,
1061 	0x9834, 0xf00fffff, 0x00000400,
1062 	0x9838, 0x0002021c, 0x00020200,
1063 	0xc78, 0x00000080, 0x00000000,
1064 	0x5bb0, 0x000000f0, 0x00000070,
1065 	0x5bc0, 0xf0311fff, 0x80300000,
1066 	0x98f8, 0x73773777, 0x12010001,
1067 	0x350c, 0x00810000, 0x408af000,
1068 	0x7030, 0x31000111, 0x00000011,
1069 	0x2f48, 0x73773777, 0x12010001,
1070 	0x220c, 0x00007fb6, 0x0021a1b1,
1071 	0x2210, 0x00007fb6, 0x002021b1,
1072 	0x2180, 0x00007fb6, 0x00002191,
1073 	0x2218, 0x00007fb6, 0x002121b1,
1074 	0x221c, 0x00007fb6, 0x002021b1,
1075 	0x21dc, 0x00007fb6, 0x00002191,
1076 	0x21e0, 0x00007fb6, 0x00002191,
1077 	0x3628, 0x0000003f, 0x0000000a,
1078 	0x362c, 0x0000003f, 0x0000000a,
1079 	0x2ae4, 0x00073ffe, 0x000022a2,
1080 	0x240c, 0x000007ff, 0x00000000,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8bf0, 0x00002001, 0x00000001,
1083 	0x8b24, 0xffffffff, 0x00ffffff,
1084 	0x30a04, 0x0000ff0f, 0x00000000,
1085 	0x28a4c, 0x07ffffff, 0x06000000,
1086 	0x4d8, 0x00000fff, 0x00000100,
1087 	0x3e78, 0x00000001, 0x00000002,
1088 	0x9100, 0x03000000, 0x0362c688,
1089 	0x8c00, 0x000000ff, 0x00000001,
1090 	0xe40, 0x00001fff, 0x00001fff,
1091 	0x9060, 0x0000007f, 0x00000020,
1092 	0x9508, 0x00010000, 0x00010000,
1093 	0xac14, 0x000003ff, 0x000000f3,
1094 	0xac0c, 0xffffffff, 0x00001032
1095 };
1096 
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0xc0000100,
1104 	0x3c2c8, 0xffffffff, 0xc0000100,
1105 	0x3c2c4, 0xffffffff, 0xc0000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c000, 0xffffffff, 0x96e00200,
1167 	0x8708, 0xffffffff, 0x00900100,
1168 	0xc424, 0xffffffff, 0x0020003f,
1169 	0x38, 0xffffffff, 0x0140001c,
1170 	0x3c, 0x000f0000, 0x000f0000,
1171 	0x220, 0xffffffff, 0xC060000C,
1172 	0x224, 0xc0000fff, 0x00000100,
1173 	0xf90, 0xffffffff, 0x00000100,
1174 	0xf98, 0x00000101, 0x00000000,
1175 	0x20a8, 0xffffffff, 0x00000104,
1176 	0x55e4, 0xff000fff, 0x00000100,
1177 	0x30cc, 0xc0000fff, 0x00000104,
1178 	0xc1e4, 0x00000001, 0x00000001,
1179 	0xd00c, 0xff000ff0, 0x00000100,
1180 	0xd80c, 0xff000ff0, 0x00000100
1181 };
1182 
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185 	0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187 
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190 	0xc770, 0xffffffff, 0x00000800,
1191 	0xc774, 0xffffffff, 0x00000800,
1192 	0xc798, 0xffffffff, 0x00007fbf,
1193 	0xc79c, 0xffffffff, 0x00007faf
1194 };
1195 
1196 static const u32 spectre_golden_registers[] =
1197 {
1198 	0x3c000, 0xffff1fff, 0x96940200,
1199 	0x3c00c, 0xffff0001, 0xff000000,
1200 	0x3c200, 0xfffc0fff, 0x00000100,
1201 	0x6ed8, 0x00010101, 0x00010000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x9838, 0xfffffffc, 0x00020200,
1204 	0x5bb0, 0x000000f0, 0x00000070,
1205 	0x5bc0, 0xf0311fff, 0x80300000,
1206 	0x98f8, 0x73773777, 0x12010001,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x2f48, 0x73773777, 0x12010001,
1209 	0x8a14, 0xf000003f, 0x00000007,
1210 	0x8b24, 0xffffffff, 0x00ffffff,
1211 	0x28350, 0x3f3f3fff, 0x00000082,
1212 	0x28354, 0x0000003f, 0x00000000,
1213 	0x3e78, 0x00000001, 0x00000002,
1214 	0x913c, 0xffff03df, 0x00000004,
1215 	0xc768, 0x00000008, 0x00000008,
1216 	0x8c00, 0x000008ff, 0x00000800,
1217 	0x9508, 0x00010000, 0x00010000,
1218 	0xac0c, 0xffffffff, 0x54763210,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x30934, 0xffffffff, 0x00000001
1223 };
1224 
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227 	0xc420, 0xffffffff, 0xfffffffc,
1228 	0x30800, 0xffffffff, 0xe0000000,
1229 	0x3c2a0, 0xffffffff, 0x00000100,
1230 	0x3c208, 0xffffffff, 0x00000100,
1231 	0x3c2c0, 0xffffffff, 0x00000100,
1232 	0x3c2c8, 0xffffffff, 0x00000100,
1233 	0x3c2c4, 0xffffffff, 0x00000100,
1234 	0x55e4, 0xffffffff, 0x00600100,
1235 	0x3c280, 0xffffffff, 0x00000100,
1236 	0x3c214, 0xffffffff, 0x06000100,
1237 	0x3c220, 0xffffffff, 0x00000100,
1238 	0x3c218, 0xffffffff, 0x06000100,
1239 	0x3c204, 0xffffffff, 0x00000100,
1240 	0x3c2e0, 0xffffffff, 0x00000100,
1241 	0x3c224, 0xffffffff, 0x00000100,
1242 	0x3c200, 0xffffffff, 0x00000100,
1243 	0x3c230, 0xffffffff, 0x00000100,
1244 	0x3c234, 0xffffffff, 0x00000100,
1245 	0x3c250, 0xffffffff, 0x00000100,
1246 	0x3c254, 0xffffffff, 0x00000100,
1247 	0x3c258, 0xffffffff, 0x00000100,
1248 	0x3c25c, 0xffffffff, 0x00000100,
1249 	0x3c260, 0xffffffff, 0x00000100,
1250 	0x3c27c, 0xffffffff, 0x00000100,
1251 	0x3c278, 0xffffffff, 0x00000100,
1252 	0x3c210, 0xffffffff, 0x06000100,
1253 	0x3c290, 0xffffffff, 0x00000100,
1254 	0x3c274, 0xffffffff, 0x00000100,
1255 	0x3c2b4, 0xffffffff, 0x00000100,
1256 	0x3c2b0, 0xffffffff, 0x00000100,
1257 	0x3c270, 0xffffffff, 0x00000100,
1258 	0x30800, 0xffffffff, 0xe0000000,
1259 	0x3c020, 0xffffffff, 0x00010000,
1260 	0x3c024, 0xffffffff, 0x00030002,
1261 	0x3c028, 0xffffffff, 0x00040007,
1262 	0x3c02c, 0xffffffff, 0x00060005,
1263 	0x3c030, 0xffffffff, 0x00090008,
1264 	0x3c034, 0xffffffff, 0x00010000,
1265 	0x3c038, 0xffffffff, 0x00030002,
1266 	0x3c03c, 0xffffffff, 0x00040007,
1267 	0x3c040, 0xffffffff, 0x00060005,
1268 	0x3c044, 0xffffffff, 0x00090008,
1269 	0x3c048, 0xffffffff, 0x00010000,
1270 	0x3c04c, 0xffffffff, 0x00030002,
1271 	0x3c050, 0xffffffff, 0x00040007,
1272 	0x3c054, 0xffffffff, 0x00060005,
1273 	0x3c058, 0xffffffff, 0x00090008,
1274 	0x3c05c, 0xffffffff, 0x00010000,
1275 	0x3c060, 0xffffffff, 0x00030002,
1276 	0x3c064, 0xffffffff, 0x00040007,
1277 	0x3c068, 0xffffffff, 0x00060005,
1278 	0x3c06c, 0xffffffff, 0x00090008,
1279 	0x3c070, 0xffffffff, 0x00010000,
1280 	0x3c074, 0xffffffff, 0x00030002,
1281 	0x3c078, 0xffffffff, 0x00040007,
1282 	0x3c07c, 0xffffffff, 0x00060005,
1283 	0x3c080, 0xffffffff, 0x00090008,
1284 	0x3c084, 0xffffffff, 0x00010000,
1285 	0x3c088, 0xffffffff, 0x00030002,
1286 	0x3c08c, 0xffffffff, 0x00040007,
1287 	0x3c090, 0xffffffff, 0x00060005,
1288 	0x3c094, 0xffffffff, 0x00090008,
1289 	0x3c098, 0xffffffff, 0x00010000,
1290 	0x3c09c, 0xffffffff, 0x00030002,
1291 	0x3c0a0, 0xffffffff, 0x00040007,
1292 	0x3c0a4, 0xffffffff, 0x00060005,
1293 	0x3c0a8, 0xffffffff, 0x00090008,
1294 	0x3c0ac, 0xffffffff, 0x00010000,
1295 	0x3c0b0, 0xffffffff, 0x00030002,
1296 	0x3c0b4, 0xffffffff, 0x00040007,
1297 	0x3c0b8, 0xffffffff, 0x00060005,
1298 	0x3c0bc, 0xffffffff, 0x00090008,
1299 	0x3c000, 0xffffffff, 0x96e00200,
1300 	0x8708, 0xffffffff, 0x00900100,
1301 	0xc424, 0xffffffff, 0x0020003f,
1302 	0x38, 0xffffffff, 0x0140001c,
1303 	0x3c, 0x000f0000, 0x000f0000,
1304 	0x220, 0xffffffff, 0xC060000C,
1305 	0x224, 0xc0000fff, 0x00000100,
1306 	0xf90, 0xffffffff, 0x00000100,
1307 	0xf98, 0x00000101, 0x00000000,
1308 	0x20a8, 0xffffffff, 0x00000104,
1309 	0x55e4, 0xff000fff, 0x00000100,
1310 	0x30cc, 0xc0000fff, 0x00000104,
1311 	0xc1e4, 0x00000001, 0x00000001,
1312 	0xd00c, 0xff000ff0, 0x00000100,
1313 	0xd80c, 0xff000ff0, 0x00000100
1314 };
1315 
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318 	0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320 
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323 	0xc770, 0xffffffff, 0x00000800,
1324 	0xc774, 0xffffffff, 0x00000800,
1325 	0xc798, 0xffffffff, 0x00007fbf,
1326 	0xc79c, 0xffffffff, 0x00007faf
1327 };
1328 
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331 	0x3c000, 0xffffdfff, 0x6e944040,
1332 	0x55e4, 0xff607fff, 0xfc000100,
1333 	0x3c220, 0xff000fff, 0x00000100,
1334 	0x3c224, 0xff000fff, 0x00000100,
1335 	0x3c200, 0xfffc0fff, 0x00000100,
1336 	0x6ed8, 0x00010101, 0x00010000,
1337 	0x9830, 0xffffffff, 0x00000000,
1338 	0x9834, 0xf00fffff, 0x00000400,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x98f8, 0x73773777, 0x12010001,
1342 	0x98fc, 0xffffffff, 0x00000010,
1343 	0x9b7c, 0x00ff0000, 0x00fc0000,
1344 	0x8030, 0x00001f0f, 0x0000100a,
1345 	0x2f48, 0x73773777, 0x12010001,
1346 	0x2408, 0x000fffff, 0x000c007f,
1347 	0x8a14, 0xf000003f, 0x00000007,
1348 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349 	0x30a04, 0x0000ff0f, 0x00000000,
1350 	0x28a4c, 0x07ffffff, 0x06000000,
1351 	0x4d8, 0x00000fff, 0x00000100,
1352 	0x3e78, 0x00000001, 0x00000002,
1353 	0xc768, 0x00000008, 0x00000008,
1354 	0x8c00, 0x000000ff, 0x00000003,
1355 	0x214f8, 0x01ff01ff, 0x00000002,
1356 	0x21498, 0x007ff800, 0x00200000,
1357 	0x2015c, 0xffffffff, 0x00000f40,
1358 	0x88c4, 0x001f3ae3, 0x00000082,
1359 	0x88d4, 0x0000001f, 0x00000010,
1360 	0x30934, 0xffffffff, 0x00000000
1361 };
1362 
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffc,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00600100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c000, 0xffffffff, 0x96e00200,
1408 	0x8708, 0xffffffff, 0x00900100,
1409 	0xc424, 0xffffffff, 0x0020003f,
1410 	0x38, 0xffffffff, 0x0140001c,
1411 	0x3c, 0x000f0000, 0x000f0000,
1412 	0x220, 0xffffffff, 0xC060000C,
1413 	0x224, 0xc0000fff, 0x00000100,
1414 	0x20a8, 0xffffffff, 0x00000104,
1415 	0x55e4, 0xff000fff, 0x00000100,
1416 	0x30cc, 0xc0000fff, 0x00000104,
1417 	0xc1e4, 0x00000001, 0x00000001,
1418 	0xd00c, 0xff000ff0, 0x00000100,
1419 	0xd80c, 0xff000ff0, 0x00000100
1420 };
1421 
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424 	0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426 
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429 	0x30800, 0xffffffff, 0xe0000000,
1430 	0x28350, 0xffffffff, 0x3a00161a,
1431 	0x28354, 0xffffffff, 0x0000002e,
1432 	0x9a10, 0xffffffff, 0x00018208,
1433 	0x98f8, 0xffffffff, 0x12011003
1434 };
1435 
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438 	0x3354, 0x00000333, 0x00000333,
1439 	0x9a10, 0x00010000, 0x00058208,
1440 	0x9830, 0xffffffff, 0x00000000,
1441 	0x9834, 0xf00fffff, 0x00000400,
1442 	0x9838, 0x0002021c, 0x00020200,
1443 	0xc78, 0x00000080, 0x00000000,
1444 	0x5bb0, 0x000000f0, 0x00000070,
1445 	0x5bc0, 0xf0311fff, 0x80300000,
1446 	0x350c, 0x00810000, 0x408af000,
1447 	0x7030, 0x31000111, 0x00000011,
1448 	0x2f48, 0x73773777, 0x12010001,
1449 	0x2120, 0x0000007f, 0x0000001b,
1450 	0x21dc, 0x00007fb6, 0x00002191,
1451 	0x3628, 0x0000003f, 0x0000000a,
1452 	0x362c, 0x0000003f, 0x0000000a,
1453 	0x2ae4, 0x00073ffe, 0x000022a2,
1454 	0x240c, 0x000007ff, 0x00000000,
1455 	0x8bf0, 0x00002001, 0x00000001,
1456 	0x8b24, 0xffffffff, 0x00ffffff,
1457 	0x30a04, 0x0000ff0f, 0x00000000,
1458 	0x28a4c, 0x07ffffff, 0x06000000,
1459 	0x3e78, 0x00000001, 0x00000002,
1460 	0xc768, 0x00000008, 0x00000008,
1461 	0xc770, 0x00000f00, 0x00000800,
1462 	0xc774, 0x00000f00, 0x00000800,
1463 	0xc798, 0x00ffffff, 0x00ff7fbf,
1464 	0xc79c, 0x00ffffff, 0x00ff7faf,
1465 	0x8c00, 0x000000ff, 0x00000800,
1466 	0xe40, 0x00001fff, 0x00001fff,
1467 	0x9060, 0x0000007f, 0x00000020,
1468 	0x9508, 0x00010000, 0x00010000,
1469 	0xae00, 0x00100000, 0x000ff07c,
1470 	0xac14, 0x000003ff, 0x0000000f,
1471 	0xac10, 0xffffffff, 0x7564fdec,
1472 	0xac0c, 0xffffffff, 0x3120b9a8,
1473 	0xac08, 0x20000000, 0x0f9c0000
1474 };
1475 
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478 	0xc420, 0xffffffff, 0xfffffffd,
1479 	0x30800, 0xffffffff, 0xe0000000,
1480 	0x3c2a0, 0xffffffff, 0x00000100,
1481 	0x3c208, 0xffffffff, 0x00000100,
1482 	0x3c2c0, 0xffffffff, 0x00000100,
1483 	0x3c2c8, 0xffffffff, 0x00000100,
1484 	0x3c2c4, 0xffffffff, 0x00000100,
1485 	0x55e4, 0xffffffff, 0x00200100,
1486 	0x3c280, 0xffffffff, 0x00000100,
1487 	0x3c214, 0xffffffff, 0x06000100,
1488 	0x3c220, 0xffffffff, 0x00000100,
1489 	0x3c218, 0xffffffff, 0x06000100,
1490 	0x3c204, 0xffffffff, 0x00000100,
1491 	0x3c2e0, 0xffffffff, 0x00000100,
1492 	0x3c224, 0xffffffff, 0x00000100,
1493 	0x3c200, 0xffffffff, 0x00000100,
1494 	0x3c230, 0xffffffff, 0x00000100,
1495 	0x3c234, 0xffffffff, 0x00000100,
1496 	0x3c250, 0xffffffff, 0x00000100,
1497 	0x3c254, 0xffffffff, 0x00000100,
1498 	0x3c258, 0xffffffff, 0x00000100,
1499 	0x3c25c, 0xffffffff, 0x00000100,
1500 	0x3c260, 0xffffffff, 0x00000100,
1501 	0x3c27c, 0xffffffff, 0x00000100,
1502 	0x3c278, 0xffffffff, 0x00000100,
1503 	0x3c210, 0xffffffff, 0x06000100,
1504 	0x3c290, 0xffffffff, 0x00000100,
1505 	0x3c274, 0xffffffff, 0x00000100,
1506 	0x3c2b4, 0xffffffff, 0x00000100,
1507 	0x3c2b0, 0xffffffff, 0x00000100,
1508 	0x3c270, 0xffffffff, 0x00000100,
1509 	0x30800, 0xffffffff, 0xe0000000,
1510 	0x3c020, 0xffffffff, 0x00010000,
1511 	0x3c024, 0xffffffff, 0x00030002,
1512 	0x3c028, 0xffffffff, 0x00040007,
1513 	0x3c02c, 0xffffffff, 0x00060005,
1514 	0x3c030, 0xffffffff, 0x00090008,
1515 	0x3c034, 0xffffffff, 0x00010000,
1516 	0x3c038, 0xffffffff, 0x00030002,
1517 	0x3c03c, 0xffffffff, 0x00040007,
1518 	0x3c040, 0xffffffff, 0x00060005,
1519 	0x3c044, 0xffffffff, 0x00090008,
1520 	0x3c048, 0xffffffff, 0x00010000,
1521 	0x3c04c, 0xffffffff, 0x00030002,
1522 	0x3c050, 0xffffffff, 0x00040007,
1523 	0x3c054, 0xffffffff, 0x00060005,
1524 	0x3c058, 0xffffffff, 0x00090008,
1525 	0x3c05c, 0xffffffff, 0x00010000,
1526 	0x3c060, 0xffffffff, 0x00030002,
1527 	0x3c064, 0xffffffff, 0x00040007,
1528 	0x3c068, 0xffffffff, 0x00060005,
1529 	0x3c06c, 0xffffffff, 0x00090008,
1530 	0x3c070, 0xffffffff, 0x00010000,
1531 	0x3c074, 0xffffffff, 0x00030002,
1532 	0x3c078, 0xffffffff, 0x00040007,
1533 	0x3c07c, 0xffffffff, 0x00060005,
1534 	0x3c080, 0xffffffff, 0x00090008,
1535 	0x3c084, 0xffffffff, 0x00010000,
1536 	0x3c088, 0xffffffff, 0x00030002,
1537 	0x3c08c, 0xffffffff, 0x00040007,
1538 	0x3c090, 0xffffffff, 0x00060005,
1539 	0x3c094, 0xffffffff, 0x00090008,
1540 	0x3c098, 0xffffffff, 0x00010000,
1541 	0x3c09c, 0xffffffff, 0x00030002,
1542 	0x3c0a0, 0xffffffff, 0x00040007,
1543 	0x3c0a4, 0xffffffff, 0x00060005,
1544 	0x3c0a8, 0xffffffff, 0x00090008,
1545 	0x3c0ac, 0xffffffff, 0x00010000,
1546 	0x3c0b0, 0xffffffff, 0x00030002,
1547 	0x3c0b4, 0xffffffff, 0x00040007,
1548 	0x3c0b8, 0xffffffff, 0x00060005,
1549 	0x3c0bc, 0xffffffff, 0x00090008,
1550 	0x3c0c0, 0xffffffff, 0x00010000,
1551 	0x3c0c4, 0xffffffff, 0x00030002,
1552 	0x3c0c8, 0xffffffff, 0x00040007,
1553 	0x3c0cc, 0xffffffff, 0x00060005,
1554 	0x3c0d0, 0xffffffff, 0x00090008,
1555 	0x3c0d4, 0xffffffff, 0x00010000,
1556 	0x3c0d8, 0xffffffff, 0x00030002,
1557 	0x3c0dc, 0xffffffff, 0x00040007,
1558 	0x3c0e0, 0xffffffff, 0x00060005,
1559 	0x3c0e4, 0xffffffff, 0x00090008,
1560 	0x3c0e8, 0xffffffff, 0x00010000,
1561 	0x3c0ec, 0xffffffff, 0x00030002,
1562 	0x3c0f0, 0xffffffff, 0x00040007,
1563 	0x3c0f4, 0xffffffff, 0x00060005,
1564 	0x3c0f8, 0xffffffff, 0x00090008,
1565 	0xc318, 0xffffffff, 0x00020200,
1566 	0x3350, 0xffffffff, 0x00000200,
1567 	0x15c0, 0xffffffff, 0x00000400,
1568 	0x55e8, 0xffffffff, 0x00000000,
1569 	0x2f50, 0xffffffff, 0x00000902,
1570 	0x3c000, 0xffffffff, 0x96940200,
1571 	0x8708, 0xffffffff, 0x00900100,
1572 	0xc424, 0xffffffff, 0x0020003f,
1573 	0x38, 0xffffffff, 0x0140001c,
1574 	0x3c, 0x000f0000, 0x000f0000,
1575 	0x220, 0xffffffff, 0xc060000c,
1576 	0x224, 0xc0000fff, 0x00000100,
1577 	0xf90, 0xffffffff, 0x00000100,
1578 	0xf98, 0x00000101, 0x00000000,
1579 	0x20a8, 0xffffffff, 0x00000104,
1580 	0x55e4, 0xff000fff, 0x00000100,
1581 	0x30cc, 0xc0000fff, 0x00000104,
1582 	0xc1e4, 0x00000001, 0x00000001,
1583 	0xd00c, 0xff000ff0, 0x00000100,
1584 	0xd80c, 0xff000ff0, 0x00000100
1585 };
1586 
1587 static const u32 godavari_golden_registers[] =
1588 {
1589 	0x55e4, 0xff607fff, 0xfc000100,
1590 	0x6ed8, 0x00010101, 0x00010000,
1591 	0x9830, 0xffffffff, 0x00000000,
1592 	0x98302, 0xf00fffff, 0x00000400,
1593 	0x6130, 0xffffffff, 0x00010000,
1594 	0x5bb0, 0x000000f0, 0x00000070,
1595 	0x5bc0, 0xf0311fff, 0x80300000,
1596 	0x98f8, 0x73773777, 0x12010001,
1597 	0x98fc, 0xffffffff, 0x00000010,
1598 	0x8030, 0x00001f0f, 0x0000100a,
1599 	0x2f48, 0x73773777, 0x12010001,
1600 	0x2408, 0x000fffff, 0x000c007f,
1601 	0x8a14, 0xf000003f, 0x00000007,
1602 	0x8b24, 0xffffffff, 0x00ff0fff,
1603 	0x30a04, 0x0000ff0f, 0x00000000,
1604 	0x28a4c, 0x07ffffff, 0x06000000,
1605 	0x4d8, 0x00000fff, 0x00000100,
1606 	0xd014, 0x00010000, 0x00810001,
1607 	0xd814, 0x00010000, 0x00810001,
1608 	0x3e78, 0x00000001, 0x00000002,
1609 	0xc768, 0x00000008, 0x00000008,
1610 	0xc770, 0x00000f00, 0x00000800,
1611 	0xc774, 0x00000f00, 0x00000800,
1612 	0xc798, 0x00ffffff, 0x00ff7fbf,
1613 	0xc79c, 0x00ffffff, 0x00ff7faf,
1614 	0x8c00, 0x000000ff, 0x00000001,
1615 	0x214f8, 0x01ff01ff, 0x00000002,
1616 	0x21498, 0x007ff800, 0x00200000,
1617 	0x2015c, 0xffffffff, 0x00000f40,
1618 	0x88c4, 0x001f3ae3, 0x00000082,
1619 	0x88d4, 0x0000001f, 0x00000010,
1620 	0x30934, 0xffffffff, 0x00000000
1621 };
1622 
1623 
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627 	mutex_lock(&rdev->grbm_idx_mutex);
1628 	switch (rdev->family) {
1629 	case CHIP_BONAIRE:
1630 		radeon_program_register_sequence(rdev,
1631 						 bonaire_mgcg_cgcg_init,
1632 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633 		radeon_program_register_sequence(rdev,
1634 						 bonaire_golden_registers,
1635 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_golden_common_registers,
1638 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_spm_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642 		break;
1643 	case CHIP_KABINI:
1644 		radeon_program_register_sequence(rdev,
1645 						 kalindi_mgcg_cgcg_init,
1646 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_common_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_spm_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656 		break;
1657 	case CHIP_MULLINS:
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_mgcg_cgcg_init,
1660 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661 		radeon_program_register_sequence(rdev,
1662 						 godavari_golden_registers,
1663 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_golden_common_registers,
1666 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_golden_spm_registers,
1669 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670 		break;
1671 	case CHIP_KAVERI:
1672 		radeon_program_register_sequence(rdev,
1673 						 spectre_mgcg_cgcg_init,
1674 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675 		radeon_program_register_sequence(rdev,
1676 						 spectre_golden_registers,
1677 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_golden_common_registers,
1680 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_spm_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684 		break;
1685 	case CHIP_HAWAII:
1686 		radeon_program_register_sequence(rdev,
1687 						 hawaii_mgcg_cgcg_init,
1688 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689 		radeon_program_register_sequence(rdev,
1690 						 hawaii_golden_registers,
1691 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_golden_common_registers,
1694 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_spm_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698 		break;
1699 	default:
1700 		break;
1701 	}
1702 	mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704 
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716 
1717 	if (rdev->flags & RADEON_IS_IGP) {
1718 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719 			return reference_clock / 2;
1720 	} else {
1721 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722 			return reference_clock / 4;
1723 	}
1724 	return reference_clock;
1725 }
1726 
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738 	if (index < rdev->doorbell.num_doorbells) {
1739 		return readl(rdev->doorbell.ptr + index);
1740 	} else {
1741 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742 		return 0;
1743 	}
1744 }
1745 
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758 	if (index < rdev->doorbell.num_doorbells) {
1759 		writel(v, rdev->doorbell.ptr + index);
1760 	} else {
1761 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762 	}
1763 }
1764 
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766 
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769 	{0x00000070, 0x04400000},
1770 	{0x00000071, 0x80c01803},
1771 	{0x00000072, 0x00004004},
1772 	{0x00000073, 0x00000100},
1773 	{0x00000074, 0x00ff0000},
1774 	{0x00000075, 0x34000000},
1775 	{0x00000076, 0x08000014},
1776 	{0x00000077, 0x00cc08ec},
1777 	{0x00000078, 0x00000400},
1778 	{0x00000079, 0x00000000},
1779 	{0x0000007a, 0x04090000},
1780 	{0x0000007c, 0x00000000},
1781 	{0x0000007e, 0x4408a8e8},
1782 	{0x0000007f, 0x00000304},
1783 	{0x00000080, 0x00000000},
1784 	{0x00000082, 0x00000001},
1785 	{0x00000083, 0x00000002},
1786 	{0x00000084, 0xf3e4f400},
1787 	{0x00000085, 0x052024e3},
1788 	{0x00000087, 0x00000000},
1789 	{0x00000088, 0x01000000},
1790 	{0x0000008a, 0x1c0a0000},
1791 	{0x0000008b, 0xff010000},
1792 	{0x0000008d, 0xffffefff},
1793 	{0x0000008e, 0xfff3efff},
1794 	{0x0000008f, 0xfff3efbf},
1795 	{0x00000092, 0xf7ffffff},
1796 	{0x00000093, 0xffffff7f},
1797 	{0x00000095, 0x00101101},
1798 	{0x00000096, 0x00000fff},
1799 	{0x00000097, 0x00116fff},
1800 	{0x00000098, 0x60010000},
1801 	{0x00000099, 0x10010000},
1802 	{0x0000009a, 0x00006000},
1803 	{0x0000009b, 0x00001000},
1804 	{0x0000009f, 0x00b48000}
1805 };
1806 
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808 
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811 	{0x0000007d, 0x40000000},
1812 	{0x0000007e, 0x40180304},
1813 	{0x0000007f, 0x0000ff00},
1814 	{0x00000081, 0x00000000},
1815 	{0x00000083, 0x00000800},
1816 	{0x00000086, 0x00000000},
1817 	{0x00000087, 0x00000100},
1818 	{0x00000088, 0x00020100},
1819 	{0x00000089, 0x00000000},
1820 	{0x0000008b, 0x00040000},
1821 	{0x0000008c, 0x00000100},
1822 	{0x0000008e, 0xff010000},
1823 	{0x00000090, 0xffffefff},
1824 	{0x00000091, 0xfff3efff},
1825 	{0x00000092, 0xfff3efbf},
1826 	{0x00000093, 0xf7ffffff},
1827 	{0x00000094, 0xffffff7f},
1828 	{0x00000095, 0x00000fff},
1829 	{0x00000096, 0x00116fff},
1830 	{0x00000097, 0x60010000},
1831 	{0x00000098, 0x10010000},
1832 	{0x0000009f, 0x00c79000}
1833 };
1834 
1835 
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853 			     MEID(me & 0x3) |
1854 			     VMID(vmid & 0xf) |
1855 			     QUEUEID(queue & 0x7));
1856 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858 
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870 	const __be32 *fw_data = NULL;
1871 	const __le32 *new_fw_data = NULL;
1872 	u32 running, blackout = 0, tmp;
1873 	u32 *io_mc_regs = NULL;
1874 	const __le32 *new_io_mc_regs = NULL;
1875 	int i, regs_size, ucode_size;
1876 
1877 	if (!rdev->mc_fw)
1878 		return -EINVAL;
1879 
1880 	if (rdev->new_fw) {
1881 		const struct mc_firmware_header_v1_0 *hdr =
1882 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883 
1884 		radeon_ucode_print_mc_hdr(&hdr->header);
1885 
1886 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887 		new_io_mc_regs = (const __le32 *)
1888 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890 		new_fw_data = (const __le32 *)
1891 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892 	} else {
1893 		ucode_size = rdev->mc_fw->size / 4;
1894 
1895 		switch (rdev->family) {
1896 		case CHIP_BONAIRE:
1897 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899 			break;
1900 		case CHIP_HAWAII:
1901 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903 			break;
1904 		default:
1905 			return -EINVAL;
1906 		}
1907 		fw_data = (const __be32 *)rdev->mc_fw->data;
1908 	}
1909 
1910 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911 
1912 	if (running == 0) {
1913 		if (running) {
1914 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916 		}
1917 
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 
1966 		if (running)
1967 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984 	const char *chip_name;
1985 	const char *new_chip_name;
1986 	size_t pfp_req_size, me_req_size, ce_req_size,
1987 		mec_req_size, rlc_req_size, mc_req_size = 0,
1988 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989 	char fw_name[30];
1990 	int new_fw = 0;
1991 	int err;
1992 	int num_fw;
1993 
1994 	DRM_DEBUG("\n");
1995 
1996 	switch (rdev->family) {
1997 	case CHIP_BONAIRE:
1998 		chip_name = "BONAIRE";
1999 		new_chip_name = "bonaire";
2000 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009 		num_fw = 8;
2010 		break;
2011 	case CHIP_HAWAII:
2012 		chip_name = "HAWAII";
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			printk(KERN_ERR
2072 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073 			       rdev->pfp_fw->size, fw_name);
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->pfp_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->me_fw->size != me_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->ce_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->ce_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 	if (err) {
2142 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144 		if (err)
2145 			goto out;
2146 		if (rdev->mec_fw->size != mec_req_size) {
2147 			printk(KERN_ERR
2148 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149 			       rdev->mec_fw->size, fw_name);
2150 			err = -EINVAL;
2151 		}
2152 	} else {
2153 		err = radeon_ucode_validate(rdev->mec_fw);
2154 		if (err) {
2155 			printk(KERN_ERR
2156 			       "cik_fw: validation failed for firmware \"%s\"\n",
2157 			       fw_name);
2158 			goto out;
2159 		} else {
2160 			new_fw++;
2161 		}
2162 	}
2163 
2164 	if (rdev->family == CHIP_KAVERI) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167 		if (err) {
2168 			goto out;
2169 		} else {
2170 			err = radeon_ucode_validate(rdev->mec2_fw);
2171 			if (err) {
2172 				goto out;
2173 			} else {
2174 				new_fw++;
2175 			}
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->rlc_fw->size != rlc_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->rlc_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->rlc_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206 	if (err) {
2207 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209 		if (err)
2210 			goto out;
2211 		if (rdev->sdma_fw->size != sdma_req_size) {
2212 			printk(KERN_ERR
2213 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214 			       rdev->sdma_fw->size, fw_name);
2215 			err = -EINVAL;
2216 		}
2217 	} else {
2218 		err = radeon_ucode_validate(rdev->sdma_fw);
2219 		if (err) {
2220 			printk(KERN_ERR
2221 			       "cik_fw: validation failed for firmware \"%s\"\n",
2222 			       fw_name);
2223 			goto out;
2224 		} else {
2225 			new_fw++;
2226 		}
2227 	}
2228 
2229 	/* No SMC, MC ucode on APUs */
2230 	if (!(rdev->flags & RADEON_IS_IGP)) {
2231 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233 		if (err) {
2234 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236 			if (err) {
2237 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239 				if (err)
2240 					goto out;
2241 			}
2242 			if ((rdev->mc_fw->size != mc_req_size) &&
2243 			    (rdev->mc_fw->size != mc2_req_size)){
2244 				printk(KERN_ERR
2245 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246 				       rdev->mc_fw->size, fw_name);
2247 				err = -EINVAL;
2248 			}
2249 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250 		} else {
2251 			err = radeon_ucode_validate(rdev->mc_fw);
2252 			if (err) {
2253 				printk(KERN_ERR
2254 				       "cik_fw: validation failed for firmware \"%s\"\n",
2255 				       fw_name);
2256 				goto out;
2257 			} else {
2258 				new_fw++;
2259 			}
2260 		}
2261 
2262 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264 		if (err) {
2265 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267 			if (err) {
2268 				printk(KERN_ERR
2269 				       "smc: error loading firmware \"%s\"\n",
2270 				       fw_name);
2271 				release_firmware(rdev->smc_fw);
2272 				rdev->smc_fw = NULL;
2273 				err = 0;
2274 			} else if (rdev->smc_fw->size != smc_req_size) {
2275 				printk(KERN_ERR
2276 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277 				       rdev->smc_fw->size, fw_name);
2278 				err = -EINVAL;
2279 			}
2280 		} else {
2281 			err = radeon_ucode_validate(rdev->smc_fw);
2282 			if (err) {
2283 				printk(KERN_ERR
2284 				       "cik_fw: validation failed for firmware \"%s\"\n",
2285 				       fw_name);
2286 				goto out;
2287 			} else {
2288 				new_fw++;
2289 			}
2290 		}
2291 	}
2292 
2293 	if (new_fw == 0) {
2294 		rdev->new_fw = false;
2295 	} else if (new_fw < num_fw) {
2296 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297 		err = -EINVAL;
2298 	} else {
2299 		rdev->new_fw = true;
2300 	}
2301 
2302 out:
2303 	if (err) {
2304 		if (err != -EINVAL)
2305 			printk(KERN_ERR
2306 			       "cik_cp: Failed to load firmware \"%s\"\n",
2307 			       fw_name);
2308 		release_firmware(rdev->pfp_fw);
2309 		rdev->pfp_fw = NULL;
2310 		release_firmware(rdev->me_fw);
2311 		rdev->me_fw = NULL;
2312 		release_firmware(rdev->ce_fw);
2313 		rdev->ce_fw = NULL;
2314 		release_firmware(rdev->mec_fw);
2315 		rdev->mec_fw = NULL;
2316 		release_firmware(rdev->mec2_fw);
2317 		rdev->mec2_fw = NULL;
2318 		release_firmware(rdev->rlc_fw);
2319 		rdev->rlc_fw = NULL;
2320 		release_firmware(rdev->sdma_fw);
2321 		rdev->sdma_fw = NULL;
2322 		release_firmware(rdev->mc_fw);
2323 		rdev->mc_fw = NULL;
2324 		release_firmware(rdev->smc_fw);
2325 		rdev->smc_fw = NULL;
2326 	}
2327 	return err;
2328 }
2329 
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346 	const u32 num_tile_mode_states = 32;
2347 	const u32 num_secondary_tile_mode_states = 16;
2348 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349 	u32 num_pipe_configs;
2350 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351 		rdev->config.cik.max_shader_engines;
2352 
2353 	switch (rdev->config.cik.mem_row_size_in_kb) {
2354 	case 1:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356 		break;
2357 	case 2:
2358 	default:
2359 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360 		break;
2361 	case 4:
2362 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363 		break;
2364 	}
2365 
2366 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367 	if (num_pipe_configs > 8)
2368 		num_pipe_configs = 16;
2369 
2370 	if (num_pipe_configs == 16) {
2371 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372 			switch (reg_offset) {
2373 			case 0:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378 				break;
2379 			case 1:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384 				break;
2385 			case 2:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390 				break;
2391 			case 3:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396 				break;
2397 			case 4:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 TILE_SPLIT(split_equal_to_row_size));
2402 				break;
2403 			case 5:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 				break;
2408 			case 6:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413 				break;
2414 			case 7:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 						 TILE_SPLIT(split_equal_to_row_size));
2419 				break;
2420 			case 8:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423 				break;
2424 			case 9:
2425 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428 				break;
2429 			case 10:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 11:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 12:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			case 13:
2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451 				break;
2452 			case 14:
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 				break;
2458 			case 16:
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 				break;
2464 			case 17:
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469 				break;
2470 			case 27:
2471 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474 				break;
2475 			case 28:
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 				break;
2481 			case 29:
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486 				break;
2487 			case 30:
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 				break;
2493 			default:
2494 				gb_tile_moden = 0;
2495 				break;
2496 			}
2497 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499 		}
2500 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501 			switch (reg_offset) {
2502 			case 0:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK));
2507 				break;
2508 			case 1:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 						 NUM_BANKS(ADDR_SURF_16_BANK));
2513 				break;
2514 			case 2:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK));
2519 				break;
2520 			case 3:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_16_BANK));
2525 				break;
2526 			case 4:
2527 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 						 NUM_BANKS(ADDR_SURF_8_BANK));
2531 				break;
2532 			case 5:
2533 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536 						 NUM_BANKS(ADDR_SURF_4_BANK));
2537 				break;
2538 			case 6:
2539 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542 						 NUM_BANKS(ADDR_SURF_2_BANK));
2543 				break;
2544 			case 8:
2545 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK));
2549 				break;
2550 			case 9:
2551 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 						 NUM_BANKS(ADDR_SURF_16_BANK));
2555 				break;
2556 			case 10:
2557 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 						 NUM_BANKS(ADDR_SURF_16_BANK));
2561 				break;
2562 			case 11:
2563 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 						 NUM_BANKS(ADDR_SURF_8_BANK));
2567 				break;
2568 			case 12:
2569 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 						 NUM_BANKS(ADDR_SURF_4_BANK));
2573 				break;
2574 			case 13:
2575 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 						 NUM_BANKS(ADDR_SURF_2_BANK));
2579 				break;
2580 			case 14:
2581 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584 						 NUM_BANKS(ADDR_SURF_2_BANK));
2585 				break;
2586 			default:
2587 				gb_tile_moden = 0;
2588 				break;
2589 			}
2590 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592 		}
2593 	} else if (num_pipe_configs == 8) {
2594 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595 			switch (reg_offset) {
2596 			case 0:
2597 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601 				break;
2602 			case 1:
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607 				break;
2608 			case 2:
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613 				break;
2614 			case 3:
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619 				break;
2620 			case 4:
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 TILE_SPLIT(split_equal_to_row_size));
2625 				break;
2626 			case 5:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630 				break;
2631 			case 6:
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636 				break;
2637 			case 7:
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 						 TILE_SPLIT(split_equal_to_row_size));
2642 				break;
2643 			case 8:
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 				break;
2647 			case 9:
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651 				break;
2652 			case 10:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			case 11:
2659 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 				break;
2664 			case 12:
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669 				break;
2670 			case 13:
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674 				break;
2675 			case 14:
2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680 				break;
2681 			case 16:
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 				break;
2687 			case 17:
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 				break;
2693 			case 27:
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697 				break;
2698 			case 28:
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 				break;
2704 			case 29:
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 				break;
2710 			case 30:
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 				break;
2716 			default:
2717 				gb_tile_moden = 0;
2718 				break;
2719 			}
2720 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722 		}
2723 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724 			switch (reg_offset) {
2725 			case 0:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 1:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 2:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 3:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 4:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 						 NUM_BANKS(ADDR_SURF_8_BANK));
2754 				break;
2755 			case 5:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759 						 NUM_BANKS(ADDR_SURF_4_BANK));
2760 				break;
2761 			case 6:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 						 NUM_BANKS(ADDR_SURF_2_BANK));
2766 				break;
2767 			case 8:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK));
2772 				break;
2773 			case 9:
2774 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 						 NUM_BANKS(ADDR_SURF_16_BANK));
2778 				break;
2779 			case 10:
2780 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 						 NUM_BANKS(ADDR_SURF_16_BANK));
2784 				break;
2785 			case 11:
2786 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK));
2790 				break;
2791 			case 12:
2792 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795 						 NUM_BANKS(ADDR_SURF_8_BANK));
2796 				break;
2797 			case 13:
2798 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801 						 NUM_BANKS(ADDR_SURF_4_BANK));
2802 				break;
2803 			case 14:
2804 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807 						 NUM_BANKS(ADDR_SURF_2_BANK));
2808 				break;
2809 			default:
2810 				gb_tile_moden = 0;
2811 				break;
2812 			}
2813 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815 		}
2816 	} else if (num_pipe_configs == 4) {
2817 		if (num_rbs == 4) {
2818 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819 				switch (reg_offset) {
2820 				case 0:
2821 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825 					break;
2826 				case 1:
2827 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831 					break;
2832 				case 2:
2833 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837 					break;
2838 				case 3:
2839 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843 					break;
2844 				case 4:
2845 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 TILE_SPLIT(split_equal_to_row_size));
2849 					break;
2850 				case 5:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 					break;
2855 				case 6:
2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860 					break;
2861 				case 7:
2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 							 TILE_SPLIT(split_equal_to_row_size));
2866 					break;
2867 				case 8:
2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870 					break;
2871 				case 9:
2872 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875 					break;
2876 				case 10:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				case 11:
2883 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 					break;
2888 				case 12:
2889 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 					break;
2894 				case 13:
2895 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898 					break;
2899 				case 14:
2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904 					break;
2905 				case 16:
2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 					break;
2911 				case 17:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 					break;
2917 				case 27:
2918 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921 					break;
2922 				case 28:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 29:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 30:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				default:
2941 					gb_tile_moden = 0;
2942 					break;
2943 				}
2944 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946 			}
2947 		} else if (num_rbs < 4) {
2948 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949 				switch (reg_offset) {
2950 				case 0:
2951 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955 					break;
2956 				case 1:
2957 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961 					break;
2962 				case 2:
2963 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967 					break;
2968 				case 3:
2969 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973 					break;
2974 				case 4:
2975 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 TILE_SPLIT(split_equal_to_row_size));
2979 					break;
2980 				case 5:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984 					break;
2985 				case 6:
2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990 					break;
2991 				case 7:
2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995 							 TILE_SPLIT(split_equal_to_row_size));
2996 					break;
2997 				case 8:
2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000 					break;
3001 				case 9:
3002 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005 					break;
3006 				case 10:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				case 11:
3013 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 					break;
3018 				case 12:
3019 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023 					break;
3024 				case 13:
3025 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028 					break;
3029 				case 14:
3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 					break;
3035 				case 16:
3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 					break;
3041 				case 17:
3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 					break;
3047 				case 27:
3048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051 					break;
3052 				case 28:
3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 					break;
3058 				case 29:
3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063 					break;
3064 				case 30:
3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 					break;
3070 				default:
3071 					gb_tile_moden = 0;
3072 					break;
3073 				}
3074 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076 			}
3077 		}
3078 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079 			switch (reg_offset) {
3080 			case 0:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 1:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 2:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_16_BANK));
3097 				break;
3098 			case 3:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102 						 NUM_BANKS(ADDR_SURF_16_BANK));
3103 				break;
3104 			case 4:
3105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 						 NUM_BANKS(ADDR_SURF_16_BANK));
3109 				break;
3110 			case 5:
3111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114 						 NUM_BANKS(ADDR_SURF_8_BANK));
3115 				break;
3116 			case 6:
3117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 						 NUM_BANKS(ADDR_SURF_4_BANK));
3121 				break;
3122 			case 8:
3123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 						 NUM_BANKS(ADDR_SURF_16_BANK));
3127 				break;
3128 			case 9:
3129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 						 NUM_BANKS(ADDR_SURF_16_BANK));
3133 				break;
3134 			case 10:
3135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 						 NUM_BANKS(ADDR_SURF_16_BANK));
3139 				break;
3140 			case 11:
3141 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 						 NUM_BANKS(ADDR_SURF_16_BANK));
3145 				break;
3146 			case 12:
3147 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 						 NUM_BANKS(ADDR_SURF_16_BANK));
3151 				break;
3152 			case 13:
3153 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156 						 NUM_BANKS(ADDR_SURF_8_BANK));
3157 				break;
3158 			case 14:
3159 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 						 NUM_BANKS(ADDR_SURF_4_BANK));
3163 				break;
3164 			default:
3165 				gb_tile_moden = 0;
3166 				break;
3167 			}
3168 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170 		}
3171 	} else if (num_pipe_configs == 2) {
3172 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173 			switch (reg_offset) {
3174 			case 0:
3175 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177 						 PIPE_CONFIG(ADDR_SURF_P2) |
3178 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179 				break;
3180 			case 1:
3181 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183 						 PIPE_CONFIG(ADDR_SURF_P2) |
3184 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185 				break;
3186 			case 2:
3187 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189 						 PIPE_CONFIG(ADDR_SURF_P2) |
3190 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191 				break;
3192 			case 3:
3193 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195 						 PIPE_CONFIG(ADDR_SURF_P2) |
3196 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197 				break;
3198 			case 4:
3199 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201 						 PIPE_CONFIG(ADDR_SURF_P2) |
3202 						 TILE_SPLIT(split_equal_to_row_size));
3203 				break;
3204 			case 5:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208 				break;
3209 			case 6:
3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214 				break;
3215 			case 7:
3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
3219 						 TILE_SPLIT(split_equal_to_row_size));
3220 				break;
3221 			case 8:
3222 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223 						PIPE_CONFIG(ADDR_SURF_P2);
3224 				break;
3225 			case 9:
3226 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228 						 PIPE_CONFIG(ADDR_SURF_P2));
3229 				break;
3230 			case 10:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			case 11:
3237 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 						 PIPE_CONFIG(ADDR_SURF_P2) |
3240 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241 				break;
3242 			case 12:
3243 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 						 PIPE_CONFIG(ADDR_SURF_P2) |
3246 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 				break;
3248 			case 13:
3249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250 						 PIPE_CONFIG(ADDR_SURF_P2) |
3251 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252 				break;
3253 			case 14:
3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 				break;
3259 			case 16:
3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 				break;
3265 			case 17:
3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268 						 PIPE_CONFIG(ADDR_SURF_P2) |
3269 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 				break;
3271 			case 27:
3272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274 						 PIPE_CONFIG(ADDR_SURF_P2));
3275 				break;
3276 			case 28:
3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281 				break;
3282 			case 29:
3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 				break;
3288 			case 30:
3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291 						 PIPE_CONFIG(ADDR_SURF_P2) |
3292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 				break;
3294 			default:
3295 				gb_tile_moden = 0;
3296 				break;
3297 			}
3298 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300 		}
3301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302 			switch (reg_offset) {
3303 			case 0:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 1:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 2:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 3:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325 						 NUM_BANKS(ADDR_SURF_16_BANK));
3326 				break;
3327 			case 4:
3328 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 						 NUM_BANKS(ADDR_SURF_16_BANK));
3332 				break;
3333 			case 5:
3334 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 						 NUM_BANKS(ADDR_SURF_16_BANK));
3338 				break;
3339 			case 6:
3340 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 						 NUM_BANKS(ADDR_SURF_8_BANK));
3344 				break;
3345 			case 8:
3346 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 						 NUM_BANKS(ADDR_SURF_16_BANK));
3350 				break;
3351 			case 9:
3352 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 						 NUM_BANKS(ADDR_SURF_16_BANK));
3356 				break;
3357 			case 10:
3358 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 						 NUM_BANKS(ADDR_SURF_16_BANK));
3362 				break;
3363 			case 11:
3364 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 						 NUM_BANKS(ADDR_SURF_16_BANK));
3368 				break;
3369 			case 12:
3370 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373 						 NUM_BANKS(ADDR_SURF_16_BANK));
3374 				break;
3375 			case 13:
3376 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 						 NUM_BANKS(ADDR_SURF_16_BANK));
3380 				break;
3381 			case 14:
3382 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385 						 NUM_BANKS(ADDR_SURF_8_BANK));
3386 				break;
3387 			default:
3388 				gb_tile_moden = 0;
3389 				break;
3390 			}
3391 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393 		}
3394 	} else
3395 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397 
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410 			     u32 se_num, u32 sh_num)
3411 {
3412 	u32 data = INSTANCE_BROADCAST_WRITES;
3413 
3414 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416 	else if (se_num == 0xffffffff)
3417 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418 	else if (sh_num == 0xffffffff)
3419 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420 	else
3421 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422 	WREG32(GRBM_GFX_INDEX, data);
3423 }
3424 
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435 	u32 i, mask = 0;
3436 
3437 	for (i = 0; i < bit_width; i++) {
3438 		mask <<= 1;
3439 		mask |= 1;
3440 	}
3441 	return mask;
3442 }
3443 
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456 			      u32 max_rb_num_per_se,
3457 			      u32 sh_per_se)
3458 {
3459 	u32 data, mask;
3460 
3461 	data = RREG32(CC_RB_BACKEND_DISABLE);
3462 	if (data & 1)
3463 		data &= BACKEND_DISABLE_MASK;
3464 	else
3465 		data = 0;
3466 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467 
3468 	data >>= BACKEND_DISABLE_SHIFT;
3469 
3470 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471 
3472 	return data & mask;
3473 }
3474 
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486 			 u32 se_num, u32 sh_per_se,
3487 			 u32 max_rb_num_per_se)
3488 {
3489 	int i, j;
3490 	u32 data, mask;
3491 	u32 disabled_rbs = 0;
3492 	u32 enabled_rbs = 0;
3493 
3494 	mutex_lock(&rdev->grbm_idx_mutex);
3495 	for (i = 0; i < se_num; i++) {
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			cik_select_se_sh(rdev, i, j);
3498 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499 			if (rdev->family == CHIP_HAWAII)
3500 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501 			else
3502 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503 		}
3504 	}
3505 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506 	mutex_unlock(&rdev->grbm_idx_mutex);
3507 
3508 	mask = 1;
3509 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510 		if (!(disabled_rbs & mask))
3511 			enabled_rbs |= mask;
3512 		mask <<= 1;
3513 	}
3514 
3515 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3516 
3517 	mutex_lock(&rdev->grbm_idx_mutex);
3518 	for (i = 0; i < se_num; i++) {
3519 		cik_select_se_sh(rdev, i, 0xffffffff);
3520 		data = 0;
3521 		for (j = 0; j < sh_per_se; j++) {
3522 			switch (enabled_rbs & 3) {
3523 			case 0:
3524 				if (j == 0)
3525 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526 				else
3527 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528 				break;
3529 			case 1:
3530 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531 				break;
3532 			case 2:
3533 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534 				break;
3535 			case 3:
3536 			default:
3537 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538 				break;
3539 			}
3540 			enabled_rbs >>= 2;
3541 		}
3542 		WREG32(PA_SC_RASTER_CONFIG, data);
3543 	}
3544 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545 	mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547 
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559 	u32 mc_shared_chmap, mc_arb_ramcfg;
3560 	u32 hdp_host_path_cntl;
3561 	u32 tmp;
3562 	int i, j;
3563 
3564 	switch (rdev->family) {
3565 	case CHIP_BONAIRE:
3566 		rdev->config.cik.max_shader_engines = 2;
3567 		rdev->config.cik.max_tile_pipes = 4;
3568 		rdev->config.cik.max_cu_per_sh = 7;
3569 		rdev->config.cik.max_sh_per_se = 1;
3570 		rdev->config.cik.max_backends_per_se = 2;
3571 		rdev->config.cik.max_texture_channel_caches = 4;
3572 		rdev->config.cik.max_gprs = 256;
3573 		rdev->config.cik.max_gs_threads = 32;
3574 		rdev->config.cik.max_hw_contexts = 8;
3575 
3576 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581 		break;
3582 	case CHIP_HAWAII:
3583 		rdev->config.cik.max_shader_engines = 4;
3584 		rdev->config.cik.max_tile_pipes = 16;
3585 		rdev->config.cik.max_cu_per_sh = 11;
3586 		rdev->config.cik.max_sh_per_se = 1;
3587 		rdev->config.cik.max_backends_per_se = 4;
3588 		rdev->config.cik.max_texture_channel_caches = 16;
3589 		rdev->config.cik.max_gprs = 256;
3590 		rdev->config.cik.max_gs_threads = 32;
3591 		rdev->config.cik.max_hw_contexts = 8;
3592 
3593 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598 		break;
3599 	case CHIP_KAVERI:
3600 		rdev->config.cik.max_shader_engines = 1;
3601 		rdev->config.cik.max_tile_pipes = 4;
3602 		if ((rdev->pdev->device == 0x1304) ||
3603 		    (rdev->pdev->device == 0x1305) ||
3604 		    (rdev->pdev->device == 0x130C) ||
3605 		    (rdev->pdev->device == 0x130F) ||
3606 		    (rdev->pdev->device == 0x1310) ||
3607 		    (rdev->pdev->device == 0x1311) ||
3608 		    (rdev->pdev->device == 0x131C)) {
3609 			rdev->config.cik.max_cu_per_sh = 8;
3610 			rdev->config.cik.max_backends_per_se = 2;
3611 		} else if ((rdev->pdev->device == 0x1309) ||
3612 			   (rdev->pdev->device == 0x130A) ||
3613 			   (rdev->pdev->device == 0x130D) ||
3614 			   (rdev->pdev->device == 0x1313) ||
3615 			   (rdev->pdev->device == 0x131D)) {
3616 			rdev->config.cik.max_cu_per_sh = 6;
3617 			rdev->config.cik.max_backends_per_se = 2;
3618 		} else if ((rdev->pdev->device == 0x1306) ||
3619 			   (rdev->pdev->device == 0x1307) ||
3620 			   (rdev->pdev->device == 0x130B) ||
3621 			   (rdev->pdev->device == 0x130E) ||
3622 			   (rdev->pdev->device == 0x1315) ||
3623 			   (rdev->pdev->device == 0x1318) ||
3624 			   (rdev->pdev->device == 0x131B)) {
3625 			rdev->config.cik.max_cu_per_sh = 4;
3626 			rdev->config.cik.max_backends_per_se = 1;
3627 		} else {
3628 			rdev->config.cik.max_cu_per_sh = 3;
3629 			rdev->config.cik.max_backends_per_se = 1;
3630 		}
3631 		rdev->config.cik.max_sh_per_se = 1;
3632 		rdev->config.cik.max_texture_channel_caches = 4;
3633 		rdev->config.cik.max_gprs = 256;
3634 		rdev->config.cik.max_gs_threads = 16;
3635 		rdev->config.cik.max_hw_contexts = 8;
3636 
3637 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642 		break;
3643 	case CHIP_KABINI:
3644 	case CHIP_MULLINS:
3645 	default:
3646 		rdev->config.cik.max_shader_engines = 1;
3647 		rdev->config.cik.max_tile_pipes = 2;
3648 		rdev->config.cik.max_cu_per_sh = 2;
3649 		rdev->config.cik.max_sh_per_se = 1;
3650 		rdev->config.cik.max_backends_per_se = 1;
3651 		rdev->config.cik.max_texture_channel_caches = 2;
3652 		rdev->config.cik.max_gprs = 256;
3653 		rdev->config.cik.max_gs_threads = 16;
3654 		rdev->config.cik.max_hw_contexts = 8;
3655 
3656 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661 		break;
3662 	}
3663 
3664 	/* Initialize HDP */
3665 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666 		WREG32((0x2c14 + j), 0x00000000);
3667 		WREG32((0x2c18 + j), 0x00000000);
3668 		WREG32((0x2c1c + j), 0x00000000);
3669 		WREG32((0x2c20 + j), 0x00000000);
3670 		WREG32((0x2c24 + j), 0x00000000);
3671 	}
3672 
3673 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674 	WREG32(SRBM_INT_CNTL, 0x1);
3675 	WREG32(SRBM_INT_ACK, 0x1);
3676 
3677 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678 
3679 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681 
3682 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3684 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3687 		rdev->config.cik.mem_row_size_in_kb = 4;
3688 	/* XXX use MC settings? */
3689 	rdev->config.cik.shader_engine_tile_size = 32;
3690 	rdev->config.cik.num_gpus = 1;
3691 	rdev->config.cik.multi_gpu_tile_size = 64;
3692 
3693 	/* fix up row size */
3694 	gb_addr_config &= ~ROW_SIZE_MASK;
3695 	switch (rdev->config.cik.mem_row_size_in_kb) {
3696 	case 1:
3697 	default:
3698 		gb_addr_config |= ROW_SIZE(0);
3699 		break;
3700 	case 2:
3701 		gb_addr_config |= ROW_SIZE(1);
3702 		break;
3703 	case 4:
3704 		gb_addr_config |= ROW_SIZE(2);
3705 		break;
3706 	}
3707 
3708 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3709 	 * not have bank info, so create a custom tiling dword.
3710 	 * bits 3:0   num_pipes
3711 	 * bits 7:4   num_banks
3712 	 * bits 11:8  group_size
3713 	 * bits 15:12 row_size
3714 	 */
3715 	rdev->config.cik.tile_config = 0;
3716 	switch (rdev->config.cik.num_tile_pipes) {
3717 	case 1:
3718 		rdev->config.cik.tile_config |= (0 << 0);
3719 		break;
3720 	case 2:
3721 		rdev->config.cik.tile_config |= (1 << 0);
3722 		break;
3723 	case 4:
3724 		rdev->config.cik.tile_config |= (2 << 0);
3725 		break;
3726 	case 8:
3727 	default:
3728 		/* XXX what about 12? */
3729 		rdev->config.cik.tile_config |= (3 << 0);
3730 		break;
3731 	}
3732 	rdev->config.cik.tile_config |=
3733 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734 	rdev->config.cik.tile_config |=
3735 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736 	rdev->config.cik.tile_config |=
3737 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738 
3739 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747 
3748 	cik_tiling_mode_table_init(rdev);
3749 
3750 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751 		     rdev->config.cik.max_sh_per_se,
3752 		     rdev->config.cik.max_backends_per_se);
3753 
3754 	rdev->config.cik.active_cus = 0;
3755 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757 			rdev->config.cik.active_cus +=
3758 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759 		}
3760 	}
3761 
3762 	/* set HW defaults for 3D engine */
3763 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764 
3765 	mutex_lock(&rdev->grbm_idx_mutex);
3766 	/*
3767 	 * making sure that the following register writes will be broadcasted
3768 	 * to all the shaders
3769 	 */
3770 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771 	WREG32(SX_DEBUG_1, 0x20);
3772 
3773 	WREG32(TA_CNTL_AUX, 0x00010000);
3774 
3775 	tmp = RREG32(SPI_CONFIG_CNTL);
3776 	tmp |= 0x03000000;
3777 	WREG32(SPI_CONFIG_CNTL, tmp);
3778 
3779 	WREG32(SQ_CONFIG, 1);
3780 
3781 	WREG32(DB_DEBUG, 0);
3782 
3783 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784 	tmp |= 0x00000400;
3785 	WREG32(DB_DEBUG2, tmp);
3786 
3787 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788 	tmp |= 0x00020200;
3789 	WREG32(DB_DEBUG3, tmp);
3790 
3791 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792 	tmp |= 0x00018208;
3793 	WREG32(CB_HW_CONTROL, tmp);
3794 
3795 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796 
3797 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801 
3802 	WREG32(VGT_NUM_INSTANCES, 1);
3803 
3804 	WREG32(CP_PERFMON_CNTL, 0);
3805 
3806 	WREG32(SQ_CONFIG, 0);
3807 
3808 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809 					  FORCE_EOV_MAX_REZ_CNT(255)));
3810 
3811 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813 
3814 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3815 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816 
3817 	tmp = RREG32(HDP_MISC_CNTL);
3818 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819 	WREG32(HDP_MISC_CNTL, tmp);
3820 
3821 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823 
3824 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826 	mutex_unlock(&rdev->grbm_idx_mutex);
3827 
3828 	udelay(50);
3829 }
3830 
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846 	int i;
3847 
3848 	rdev->scratch.num_reg = 7;
3849 	rdev->scratch.reg_base = SCRATCH_REG0;
3850 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3851 		rdev->scratch.free[i] = true;
3852 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853 	}
3854 }
3855 
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869 	uint32_t scratch;
3870 	uint32_t tmp = 0;
3871 	unsigned i;
3872 	int r;
3873 
3874 	r = radeon_scratch_get(rdev, &scratch);
3875 	if (r) {
3876 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877 		return r;
3878 	}
3879 	WREG32(scratch, 0xCAFEDEAD);
3880 	r = radeon_ring_lock(rdev, ring, 3);
3881 	if (r) {
3882 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883 		radeon_scratch_free(rdev, scratch);
3884 		return r;
3885 	}
3886 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888 	radeon_ring_write(ring, 0xDEADBEEF);
3889 	radeon_ring_unlock_commit(rdev, ring, false);
3890 
3891 	for (i = 0; i < rdev->usec_timeout; i++) {
3892 		tmp = RREG32(scratch);
3893 		if (tmp == 0xDEADBEEF)
3894 			break;
3895 		DRM_UDELAY(1);
3896 	}
3897 	if (i < rdev->usec_timeout) {
3898 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899 	} else {
3900 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901 			  ring->idx, scratch, tmp);
3902 		r = -EINVAL;
3903 	}
3904 	radeon_scratch_free(rdev, scratch);
3905 	return r;
3906 }
3907 
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917 				       int ridx)
3918 {
3919 	struct radeon_ring *ring = &rdev->ring[ridx];
3920 	u32 ref_and_mask;
3921 
3922 	switch (ring->idx) {
3923 	case CAYMAN_RING_TYPE_CP1_INDEX:
3924 	case CAYMAN_RING_TYPE_CP2_INDEX:
3925 	default:
3926 		switch (ring->me) {
3927 		case 0:
3928 			ref_and_mask = CP2 << ring->pipe;
3929 			break;
3930 		case 1:
3931 			ref_and_mask = CP6 << ring->pipe;
3932 			break;
3933 		default:
3934 			return;
3935 		}
3936 		break;
3937 	case RADEON_RING_TYPE_GFX_INDEX:
3938 		ref_and_mask = CP0;
3939 		break;
3940 	}
3941 
3942 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948 	radeon_ring_write(ring, ref_and_mask);
3949 	radeon_ring_write(ring, ref_and_mask);
3950 	radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952 
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963 			     struct radeon_fence *fence)
3964 {
3965 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3966 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967 
3968 	/* Workaround for cache flush problems. First send a dummy EOP
3969 	 * event down the pipe with seq one below.
3970 	 */
3971 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973 				 EOP_TC_ACTION_EN |
3974 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975 				 EVENT_INDEX(5)));
3976 	radeon_ring_write(ring, addr & 0xfffffffc);
3977 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978 				DATA_SEL(1) | INT_SEL(0));
3979 	radeon_ring_write(ring, fence->seq - 1);
3980 	radeon_ring_write(ring, 0);
3981 
3982 	/* Then send the real EOP event down the pipe. */
3983 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985 				 EOP_TC_ACTION_EN |
3986 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987 				 EVENT_INDEX(5)));
3988 	radeon_ring_write(ring, addr & 0xfffffffc);
3989 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990 	radeon_ring_write(ring, fence->seq);
3991 	radeon_ring_write(ring, 0);
3992 }
3993 
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004 				 struct radeon_fence *fence)
4005 {
4006 	struct radeon_ring *ring = &rdev->ring[fence->ring];
4007 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008 
4009 	/* RELEASE_MEM - flush caches, send int */
4010 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012 				 EOP_TC_ACTION_EN |
4013 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014 				 EVENT_INDEX(5)));
4015 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016 	radeon_ring_write(ring, addr & 0xfffffffc);
4017 	radeon_ring_write(ring, upper_32_bits(addr));
4018 	radeon_ring_write(ring, fence->seq);
4019 	radeon_ring_write(ring, 0);
4020 }
4021 
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034 			     struct radeon_ring *ring,
4035 			     struct radeon_semaphore *semaphore,
4036 			     bool emit_wait)
4037 {
4038 	uint64_t addr = semaphore->gpu_addr;
4039 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040 
4041 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042 	radeon_ring_write(ring, lower_32_bits(addr));
4043 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044 
4045 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046 		/* Prevent the PFP from running ahead of the semaphore wait */
4047 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048 		radeon_ring_write(ring, 0x0);
4049 	}
4050 
4051 	return true;
4052 }
4053 
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068 				    uint64_t src_offset, uint64_t dst_offset,
4069 				    unsigned num_gpu_pages,
4070 				    struct reservation_object *resv)
4071 {
4072 	struct radeon_fence *fence;
4073 	struct radeon_sync sync;
4074 	int ring_index = rdev->asic->copy.blit_ring_index;
4075 	struct radeon_ring *ring = &rdev->ring[ring_index];
4076 	u32 size_in_bytes, cur_size_in_bytes, control;
4077 	int i, num_loops;
4078 	int r = 0;
4079 
4080 	radeon_sync_create(&sync);
4081 
4082 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085 	if (r) {
4086 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4087 		radeon_sync_free(rdev, &sync, NULL);
4088 		return ERR_PTR(r);
4089 	}
4090 
4091 	radeon_sync_resv(rdev, &sync, resv, false);
4092 	radeon_sync_rings(rdev, &sync, ring->idx);
4093 
4094 	for (i = 0; i < num_loops; i++) {
4095 		cur_size_in_bytes = size_in_bytes;
4096 		if (cur_size_in_bytes > 0x1fffff)
4097 			cur_size_in_bytes = 0x1fffff;
4098 		size_in_bytes -= cur_size_in_bytes;
4099 		control = 0;
4100 		if (size_in_bytes == 0)
4101 			control |= PACKET3_DMA_DATA_CP_SYNC;
4102 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103 		radeon_ring_write(ring, control);
4104 		radeon_ring_write(ring, lower_32_bits(src_offset));
4105 		radeon_ring_write(ring, upper_32_bits(src_offset));
4106 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4107 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4108 		radeon_ring_write(ring, cur_size_in_bytes);
4109 		src_offset += cur_size_in_bytes;
4110 		dst_offset += cur_size_in_bytes;
4111 	}
4112 
4113 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4114 	if (r) {
4115 		radeon_ring_unlock_undo(rdev, ring);
4116 		radeon_sync_free(rdev, &sync, NULL);
4117 		return ERR_PTR(r);
4118 	}
4119 
4120 	radeon_ring_unlock_commit(rdev, ring, false);
4121 	radeon_sync_free(rdev, &sync, fence);
4122 
4123 	return fence;
4124 }
4125 
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4144 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145 	u32 header, control = INDIRECT_BUFFER_VALID;
4146 
4147 	if (ib->is_const_ib) {
4148 		/* set switch buffer packet before const IB */
4149 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150 		radeon_ring_write(ring, 0);
4151 
4152 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153 	} else {
4154 		u32 next_rptr;
4155 		if (ring->rptr_save_reg) {
4156 			next_rptr = ring->wptr + 3 + 4;
4157 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4159 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4160 			radeon_ring_write(ring, next_rptr);
4161 		} else if (rdev->wb.enabled) {
4162 			next_rptr = ring->wptr + 5 + 4;
4163 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167 			radeon_ring_write(ring, next_rptr);
4168 		}
4169 
4170 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171 	}
4172 
4173 	control |= ib->length_dw | (vm_id << 24);
4174 
4175 	radeon_ring_write(ring, header);
4176 	radeon_ring_write(ring, (ib->gpu_addr & 0xFFFFFFFC));
4177 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4178 	radeon_ring_write(ring, control);
4179 }
4180 
4181 /**
4182  * cik_ib_test - basic gfx ring IB test
4183  *
4184  * @rdev: radeon_device pointer
4185  * @ring: radeon_ring structure holding ring information
4186  *
4187  * Allocate an IB and execute it on the gfx ring (CIK).
4188  * Provides a basic gfx ring test to verify that IBs are working.
4189  * Returns 0 on success, error on failure.
4190  */
4191 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4192 {
4193 	struct radeon_ib ib;
4194 	uint32_t scratch;
4195 	uint32_t tmp = 0;
4196 	unsigned i;
4197 	int r;
4198 
4199 	r = radeon_scratch_get(rdev, &scratch);
4200 	if (r) {
4201 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4202 		return r;
4203 	}
4204 	WREG32(scratch, 0xCAFEDEAD);
4205 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4206 	if (r) {
4207 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4208 		radeon_scratch_free(rdev, scratch);
4209 		return r;
4210 	}
4211 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4212 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4213 	ib.ptr[2] = 0xDEADBEEF;
4214 	ib.length_dw = 3;
4215 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4216 	if (r) {
4217 		radeon_scratch_free(rdev, scratch);
4218 		radeon_ib_free(rdev, &ib);
4219 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4220 		return r;
4221 	}
4222 	r = radeon_fence_wait(ib.fence, false);
4223 	if (r) {
4224 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4225 		radeon_scratch_free(rdev, scratch);
4226 		radeon_ib_free(rdev, &ib);
4227 		return r;
4228 	}
4229 	for (i = 0; i < rdev->usec_timeout; i++) {
4230 		tmp = RREG32(scratch);
4231 		if (tmp == 0xDEADBEEF)
4232 			break;
4233 		DRM_UDELAY(1);
4234 	}
4235 	if (i < rdev->usec_timeout) {
4236 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4237 	} else {
4238 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4239 			  scratch, tmp);
4240 		r = -EINVAL;
4241 	}
4242 	radeon_scratch_free(rdev, scratch);
4243 	radeon_ib_free(rdev, &ib);
4244 	return r;
4245 }
4246 
4247 /*
4248  * CP.
4249  * On CIK, gfx and compute now have independant command processors.
4250  *
4251  * GFX
4252  * Gfx consists of a single ring and can process both gfx jobs and
4253  * compute jobs.  The gfx CP consists of three microengines (ME):
4254  * PFP - Pre-Fetch Parser
4255  * ME - Micro Engine
4256  * CE - Constant Engine
4257  * The PFP and ME make up what is considered the Drawing Engine (DE).
4258  * The CE is an asynchronous engine used for updating buffer desciptors
4259  * used by the DE so that they can be loaded into cache in parallel
4260  * while the DE is processing state update packets.
4261  *
4262  * Compute
4263  * The compute CP consists of two microengines (ME):
4264  * MEC1 - Compute MicroEngine 1
4265  * MEC2 - Compute MicroEngine 2
4266  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4267  * The queues are exposed to userspace and are programmed directly
4268  * by the compute runtime.
4269  */
4270 /**
4271  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4272  *
4273  * @rdev: radeon_device pointer
4274  * @enable: enable or disable the MEs
4275  *
4276  * Halts or unhalts the gfx MEs.
4277  */
4278 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4279 {
4280 	if (enable)
4281 		WREG32(CP_ME_CNTL, 0);
4282 	else {
4283 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4284 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4285 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4286 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4287 	}
4288 	udelay(50);
4289 }
4290 
4291 /**
4292  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4293  *
4294  * @rdev: radeon_device pointer
4295  *
4296  * Loads the gfx PFP, ME, and CE ucode.
4297  * Returns 0 for success, -EINVAL if the ucode is not available.
4298  */
4299 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4300 {
4301 	int i;
4302 
4303 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4304 		return -EINVAL;
4305 
4306 	cik_cp_gfx_enable(rdev, false);
4307 
4308 	if (rdev->new_fw) {
4309 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4310 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4311 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4312 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4313 		const struct gfx_firmware_header_v1_0 *me_hdr =
4314 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4315 		const __le32 *fw_data;
4316 		u32 fw_size;
4317 
4318 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4319 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4320 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4321 
4322 		/* PFP */
4323 		fw_data = (const __le32 *)
4324 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4325 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4326 		WREG32(CP_PFP_UCODE_ADDR, 0);
4327 		for (i = 0; i < fw_size; i++)
4328 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4329 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4330 
4331 		/* CE */
4332 		fw_data = (const __le32 *)
4333 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4334 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4335 		WREG32(CP_CE_UCODE_ADDR, 0);
4336 		for (i = 0; i < fw_size; i++)
4337 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4338 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4339 
4340 		/* ME */
4341 		fw_data = (const __be32 *)
4342 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4343 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4344 		WREG32(CP_ME_RAM_WADDR, 0);
4345 		for (i = 0; i < fw_size; i++)
4346 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4347 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4348 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4349 	} else {
4350 		const __be32 *fw_data;
4351 
4352 		/* PFP */
4353 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4354 		WREG32(CP_PFP_UCODE_ADDR, 0);
4355 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4356 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4357 		WREG32(CP_PFP_UCODE_ADDR, 0);
4358 
4359 		/* CE */
4360 		fw_data = (const __be32 *)rdev->ce_fw->data;
4361 		WREG32(CP_CE_UCODE_ADDR, 0);
4362 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4363 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4364 		WREG32(CP_CE_UCODE_ADDR, 0);
4365 
4366 		/* ME */
4367 		fw_data = (const __be32 *)rdev->me_fw->data;
4368 		WREG32(CP_ME_RAM_WADDR, 0);
4369 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4370 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4371 		WREG32(CP_ME_RAM_WADDR, 0);
4372 	}
4373 
4374 	return 0;
4375 }
4376 
4377 /**
4378  * cik_cp_gfx_start - start the gfx ring
4379  *
4380  * @rdev: radeon_device pointer
4381  *
4382  * Enables the ring and loads the clear state context and other
4383  * packets required to init the ring.
4384  * Returns 0 for success, error for failure.
4385  */
4386 static int cik_cp_gfx_start(struct radeon_device *rdev)
4387 {
4388 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4389 	int r, i;
4390 
4391 	/* init the CP */
4392 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4393 	WREG32(CP_ENDIAN_SWAP, 0);
4394 	WREG32(CP_DEVICE_ID, 1);
4395 
4396 	cik_cp_gfx_enable(rdev, true);
4397 
4398 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4399 	if (r) {
4400 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4401 		return r;
4402 	}
4403 
4404 	/* init the CE partitions.  CE only used for gfx on CIK */
4405 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4406 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4407 	radeon_ring_write(ring, 0x8000);
4408 	radeon_ring_write(ring, 0x8000);
4409 
4410 	/* setup clear context state */
4411 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4412 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4413 
4414 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4415 	radeon_ring_write(ring, 0x80000000);
4416 	radeon_ring_write(ring, 0x80000000);
4417 
4418 	for (i = 0; i < cik_default_size; i++)
4419 		radeon_ring_write(ring, cik_default_state[i]);
4420 
4421 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4422 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4423 
4424 	/* set clear context state */
4425 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4426 	radeon_ring_write(ring, 0);
4427 
4428 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4429 	radeon_ring_write(ring, 0x00000316);
4430 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4431 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4432 
4433 	radeon_ring_unlock_commit(rdev, ring, false);
4434 
4435 	return 0;
4436 }
4437 
4438 /**
4439  * cik_cp_gfx_fini - stop the gfx ring
4440  *
4441  * @rdev: radeon_device pointer
4442  *
4443  * Stop the gfx ring and tear down the driver ring
4444  * info.
4445  */
4446 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4447 {
4448 	cik_cp_gfx_enable(rdev, false);
4449 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4450 }
4451 
4452 /**
4453  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4454  *
4455  * @rdev: radeon_device pointer
4456  *
4457  * Program the location and size of the gfx ring buffer
4458  * and test it to make sure it's working.
4459  * Returns 0 for success, error for failure.
4460  */
4461 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4462 {
4463 	struct radeon_ring *ring;
4464 	u32 tmp;
4465 	u32 rb_bufsz;
4466 	u64 rb_addr;
4467 	int r;
4468 
4469 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4470 	if (rdev->family != CHIP_HAWAII)
4471 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4472 
4473 	/* Set the write pointer delay */
4474 	WREG32(CP_RB_WPTR_DELAY, 0);
4475 
4476 	/* set the RB to use vmid 0 */
4477 	WREG32(CP_RB_VMID, 0);
4478 
4479 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4480 
4481 	/* ring 0 - compute and gfx */
4482 	/* Set ring buffer size */
4483 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4484 	rb_bufsz = order_base_2(ring->ring_size / 8);
4485 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4486 #ifdef __BIG_ENDIAN
4487 	tmp |= BUF_SWAP_32BIT;
4488 #endif
4489 	WREG32(CP_RB0_CNTL, tmp);
4490 
4491 	/* Initialize the ring buffer's read and write pointers */
4492 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4493 	ring->wptr = 0;
4494 	WREG32(CP_RB0_WPTR, ring->wptr);
4495 
4496 	/* set the wb address wether it's enabled or not */
4497 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4498 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4499 
4500 	/* scratch register shadowing is no longer supported */
4501 	WREG32(SCRATCH_UMSK, 0);
4502 
4503 	if (!rdev->wb.enabled)
4504 		tmp |= RB_NO_UPDATE;
4505 
4506 	mdelay(1);
4507 	WREG32(CP_RB0_CNTL, tmp);
4508 
4509 	rb_addr = ring->gpu_addr >> 8;
4510 	WREG32(CP_RB0_BASE, rb_addr);
4511 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4512 
4513 	/* start the ring */
4514 	cik_cp_gfx_start(rdev);
4515 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4516 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4517 	if (r) {
4518 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4519 		return r;
4520 	}
4521 
4522 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4523 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4524 
4525 	return 0;
4526 }
4527 
4528 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4529 		     struct radeon_ring *ring)
4530 {
4531 	u32 rptr;
4532 
4533 	if (rdev->wb.enabled)
4534 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4535 	else
4536 		rptr = RREG32(CP_RB0_RPTR);
4537 
4538 	return rptr;
4539 }
4540 
4541 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4542 		     struct radeon_ring *ring)
4543 {
4544 	u32 wptr;
4545 
4546 	wptr = RREG32(CP_RB0_WPTR);
4547 
4548 	return wptr;
4549 }
4550 
4551 void cik_gfx_set_wptr(struct radeon_device *rdev,
4552 		      struct radeon_ring *ring)
4553 {
4554 	WREG32(CP_RB0_WPTR, ring->wptr);
4555 	(void)RREG32(CP_RB0_WPTR);
4556 }
4557 
4558 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4559 			 struct radeon_ring *ring)
4560 {
4561 	u32 rptr;
4562 
4563 	if (rdev->wb.enabled) {
4564 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4565 	} else {
4566 		mutex_lock(&rdev->srbm_mutex);
4567 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4568 		rptr = RREG32(CP_HQD_PQ_RPTR);
4569 		cik_srbm_select(rdev, 0, 0, 0, 0);
4570 		mutex_unlock(&rdev->srbm_mutex);
4571 	}
4572 
4573 	return rptr;
4574 }
4575 
4576 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4577 			 struct radeon_ring *ring)
4578 {
4579 	u32 wptr;
4580 
4581 	if (rdev->wb.enabled) {
4582 		/* XXX check if swapping is necessary on BE */
4583 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4584 	} else {
4585 		mutex_lock(&rdev->srbm_mutex);
4586 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4587 		wptr = RREG32(CP_HQD_PQ_WPTR);
4588 		cik_srbm_select(rdev, 0, 0, 0, 0);
4589 		mutex_unlock(&rdev->srbm_mutex);
4590 	}
4591 
4592 	return wptr;
4593 }
4594 
4595 void cik_compute_set_wptr(struct radeon_device *rdev,
4596 			  struct radeon_ring *ring)
4597 {
4598 	/* XXX check if swapping is necessary on BE */
4599 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4600 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4601 }
4602 
4603 static void cik_compute_stop(struct radeon_device *rdev,
4604 			     struct radeon_ring *ring)
4605 {
4606 	u32 j, tmp;
4607 
4608 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4609 	/* Disable wptr polling. */
4610 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4611 	tmp &= ~WPTR_POLL_EN;
4612 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4613 	/* Disable HQD. */
4614 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4615 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4616 		for (j = 0; j < rdev->usec_timeout; j++) {
4617 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4618 				break;
4619 			udelay(1);
4620 		}
4621 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4622 		WREG32(CP_HQD_PQ_RPTR, 0);
4623 		WREG32(CP_HQD_PQ_WPTR, 0);
4624 	}
4625 	cik_srbm_select(rdev, 0, 0, 0, 0);
4626 }
4627 
4628 /**
4629  * cik_cp_compute_enable - enable/disable the compute CP MEs
4630  *
4631  * @rdev: radeon_device pointer
4632  * @enable: enable or disable the MEs
4633  *
4634  * Halts or unhalts the compute MEs.
4635  */
4636 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4637 {
4638 	if (enable)
4639 		WREG32(CP_MEC_CNTL, 0);
4640 	else {
4641 		/*
4642 		 * To make hibernation reliable we need to clear compute ring
4643 		 * configuration before halting the compute ring.
4644 		 */
4645 		mutex_lock(&rdev->srbm_mutex);
4646 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4647 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4648 		mutex_unlock(&rdev->srbm_mutex);
4649 
4650 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4651 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4652 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4653 	}
4654 	udelay(50);
4655 }
4656 
4657 /**
4658  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4659  *
4660  * @rdev: radeon_device pointer
4661  *
4662  * Loads the compute MEC1&2 ucode.
4663  * Returns 0 for success, -EINVAL if the ucode is not available.
4664  */
4665 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4666 {
4667 	int i;
4668 
4669 	if (!rdev->mec_fw)
4670 		return -EINVAL;
4671 
4672 	cik_cp_compute_enable(rdev, false);
4673 
4674 	if (rdev->new_fw) {
4675 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4676 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4677 		const __le32 *fw_data;
4678 		u32 fw_size;
4679 
4680 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4681 
4682 		/* MEC1 */
4683 		fw_data = (const __le32 *)
4684 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4685 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4686 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4687 		for (i = 0; i < fw_size; i++)
4688 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4689 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4690 
4691 		/* MEC2 */
4692 		if (rdev->family == CHIP_KAVERI) {
4693 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4694 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4695 
4696 			fw_data = (const __le32 *)
4697 				(rdev->mec2_fw->data +
4698 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4699 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4700 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4701 			for (i = 0; i < fw_size; i++)
4702 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4703 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4704 		}
4705 	} else {
4706 		const __be32 *fw_data;
4707 
4708 		/* MEC1 */
4709 		fw_data = (const __be32 *)rdev->mec_fw->data;
4710 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4711 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4712 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4713 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4714 
4715 		if (rdev->family == CHIP_KAVERI) {
4716 			/* MEC2 */
4717 			fw_data = (const __be32 *)rdev->mec_fw->data;
4718 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4719 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4720 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4721 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4722 		}
4723 	}
4724 
4725 	return 0;
4726 }
4727 
4728 /**
4729  * cik_cp_compute_start - start the compute queues
4730  *
4731  * @rdev: radeon_device pointer
4732  *
4733  * Enable the compute queues.
4734  * Returns 0 for success, error for failure.
4735  */
4736 static int cik_cp_compute_start(struct radeon_device *rdev)
4737 {
4738 	cik_cp_compute_enable(rdev, true);
4739 
4740 	return 0;
4741 }
4742 
4743 /**
4744  * cik_cp_compute_fini - stop the compute queues
4745  *
4746  * @rdev: radeon_device pointer
4747  *
4748  * Stop the compute queues and tear down the driver queue
4749  * info.
4750  */
4751 static void cik_cp_compute_fini(struct radeon_device *rdev)
4752 {
4753 	int i, idx, r;
4754 
4755 	cik_cp_compute_enable(rdev, false);
4756 
4757 	for (i = 0; i < 2; i++) {
4758 		if (i == 0)
4759 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4760 		else
4761 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4762 
4763 		if (rdev->ring[idx].mqd_obj) {
4764 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4765 			if (unlikely(r != 0))
4766 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4767 
4768 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4769 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4770 
4771 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4772 			rdev->ring[idx].mqd_obj = NULL;
4773 		}
4774 	}
4775 }
4776 
4777 static void cik_mec_fini(struct radeon_device *rdev)
4778 {
4779 	int r;
4780 
4781 	if (rdev->mec.hpd_eop_obj) {
4782 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4783 		if (unlikely(r != 0))
4784 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4785 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4786 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4787 
4788 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4789 		rdev->mec.hpd_eop_obj = NULL;
4790 	}
4791 }
4792 
4793 #define MEC_HPD_SIZE 2048
4794 
4795 static int cik_mec_init(struct radeon_device *rdev)
4796 {
4797 	int r;
4798 	u32 *hpd;
4799 
4800 	/*
4801 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4802 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4803 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4804 	 * be handled by KFD
4805 	 */
4806 	rdev->mec.num_mec = 1;
4807 	rdev->mec.num_pipe = 1;
4808 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4809 
4810 	if (rdev->mec.hpd_eop_obj == NULL) {
4811 		r = radeon_bo_create(rdev,
4812 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4813 				     PAGE_SIZE, true,
4814 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4815 				     &rdev->mec.hpd_eop_obj);
4816 		if (r) {
4817 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4818 			return r;
4819 		}
4820 	}
4821 
4822 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4823 	if (unlikely(r != 0)) {
4824 		cik_mec_fini(rdev);
4825 		return r;
4826 	}
4827 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4828 			  &rdev->mec.hpd_eop_gpu_addr);
4829 	if (r) {
4830 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4831 		cik_mec_fini(rdev);
4832 		return r;
4833 	}
4834 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4835 	if (r) {
4836 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4837 		cik_mec_fini(rdev);
4838 		return r;
4839 	}
4840 
4841 	/* clear memory.  Not sure if this is required or not */
4842 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4843 
4844 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4845 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4846 
4847 	return 0;
4848 }
4849 
4850 struct hqd_registers
4851 {
4852 	u32 cp_mqd_base_addr;
4853 	u32 cp_mqd_base_addr_hi;
4854 	u32 cp_hqd_active;
4855 	u32 cp_hqd_vmid;
4856 	u32 cp_hqd_persistent_state;
4857 	u32 cp_hqd_pipe_priority;
4858 	u32 cp_hqd_queue_priority;
4859 	u32 cp_hqd_quantum;
4860 	u32 cp_hqd_pq_base;
4861 	u32 cp_hqd_pq_base_hi;
4862 	u32 cp_hqd_pq_rptr;
4863 	u32 cp_hqd_pq_rptr_report_addr;
4864 	u32 cp_hqd_pq_rptr_report_addr_hi;
4865 	u32 cp_hqd_pq_wptr_poll_addr;
4866 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4867 	u32 cp_hqd_pq_doorbell_control;
4868 	u32 cp_hqd_pq_wptr;
4869 	u32 cp_hqd_pq_control;
4870 	u32 cp_hqd_ib_base_addr;
4871 	u32 cp_hqd_ib_base_addr_hi;
4872 	u32 cp_hqd_ib_rptr;
4873 	u32 cp_hqd_ib_control;
4874 	u32 cp_hqd_iq_timer;
4875 	u32 cp_hqd_iq_rptr;
4876 	u32 cp_hqd_dequeue_request;
4877 	u32 cp_hqd_dma_offload;
4878 	u32 cp_hqd_sema_cmd;
4879 	u32 cp_hqd_msg_type;
4880 	u32 cp_hqd_atomic0_preop_lo;
4881 	u32 cp_hqd_atomic0_preop_hi;
4882 	u32 cp_hqd_atomic1_preop_lo;
4883 	u32 cp_hqd_atomic1_preop_hi;
4884 	u32 cp_hqd_hq_scheduler0;
4885 	u32 cp_hqd_hq_scheduler1;
4886 	u32 cp_mqd_control;
4887 };
4888 
4889 struct bonaire_mqd
4890 {
4891 	u32 header;
4892 	u32 dispatch_initiator;
4893 	u32 dimensions[3];
4894 	u32 start_idx[3];
4895 	u32 num_threads[3];
4896 	u32 pipeline_stat_enable;
4897 	u32 perf_counter_enable;
4898 	u32 pgm[2];
4899 	u32 tba[2];
4900 	u32 tma[2];
4901 	u32 pgm_rsrc[2];
4902 	u32 vmid;
4903 	u32 resource_limits;
4904 	u32 static_thread_mgmt01[2];
4905 	u32 tmp_ring_size;
4906 	u32 static_thread_mgmt23[2];
4907 	u32 restart[3];
4908 	u32 thread_trace_enable;
4909 	u32 reserved1;
4910 	u32 user_data[16];
4911 	u32 vgtcs_invoke_count[2];
4912 	struct hqd_registers queue_state;
4913 	u32 dequeue_cntr;
4914 	u32 interrupt_queue[64];
4915 };
4916 
4917 /**
4918  * cik_cp_compute_resume - setup the compute queue registers
4919  *
4920  * @rdev: radeon_device pointer
4921  *
4922  * Program the compute queues and test them to make sure they
4923  * are working.
4924  * Returns 0 for success, error for failure.
4925  */
4926 static int cik_cp_compute_resume(struct radeon_device *rdev)
4927 {
4928 	int r, i, j, idx;
4929 	u32 tmp;
4930 	bool use_doorbell = true;
4931 	u64 hqd_gpu_addr;
4932 	u64 mqd_gpu_addr;
4933 	u64 eop_gpu_addr;
4934 	u64 wb_gpu_addr;
4935 	u32 *buf;
4936 	struct bonaire_mqd *mqd;
4937 
4938 	r = cik_cp_compute_start(rdev);
4939 	if (r)
4940 		return r;
4941 
4942 	/* fix up chicken bits */
4943 	tmp = RREG32(CP_CPF_DEBUG);
4944 	tmp |= (1 << 23);
4945 	WREG32(CP_CPF_DEBUG, tmp);
4946 
4947 	/* init the pipes */
4948 	mutex_lock(&rdev->srbm_mutex);
4949 
4950 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4951 
4952 	cik_srbm_select(rdev, 0, 0, 0, 0);
4953 
4954 	/* write the EOP addr */
4955 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4956 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4957 
4958 	/* set the VMID assigned */
4959 	WREG32(CP_HPD_EOP_VMID, 0);
4960 
4961 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4962 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4963 	tmp &= ~EOP_SIZE_MASK;
4964 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4965 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4966 
4967 	mutex_unlock(&rdev->srbm_mutex);
4968 
4969 	/* init the queues.  Just two for now. */
4970 	for (i = 0; i < 2; i++) {
4971 		if (i == 0)
4972 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4973 		else
4974 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4975 
4976 		if (rdev->ring[idx].mqd_obj == NULL) {
4977 			r = radeon_bo_create(rdev,
4978 					     sizeof(struct bonaire_mqd),
4979 					     PAGE_SIZE, true,
4980 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4981 					     NULL, &rdev->ring[idx].mqd_obj);
4982 			if (r) {
4983 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4984 				return r;
4985 			}
4986 		}
4987 
4988 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4989 		if (unlikely(r != 0)) {
4990 			cik_cp_compute_fini(rdev);
4991 			return r;
4992 		}
4993 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4994 				  &mqd_gpu_addr);
4995 		if (r) {
4996 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4997 			cik_cp_compute_fini(rdev);
4998 			return r;
4999 		}
5000 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
5001 		if (r) {
5002 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
5003 			cik_cp_compute_fini(rdev);
5004 			return r;
5005 		}
5006 
5007 		/* init the mqd struct */
5008 		memset(buf, 0, sizeof(struct bonaire_mqd));
5009 
5010 		mqd = (struct bonaire_mqd *)buf;
5011 		mqd->header = 0xC0310800;
5012 		mqd->static_thread_mgmt01[0] = 0xffffffff;
5013 		mqd->static_thread_mgmt01[1] = 0xffffffff;
5014 		mqd->static_thread_mgmt23[0] = 0xffffffff;
5015 		mqd->static_thread_mgmt23[1] = 0xffffffff;
5016 
5017 		mutex_lock(&rdev->srbm_mutex);
5018 		cik_srbm_select(rdev, rdev->ring[idx].me,
5019 				rdev->ring[idx].pipe,
5020 				rdev->ring[idx].queue, 0);
5021 
5022 		/* disable wptr polling */
5023 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5024 		tmp &= ~WPTR_POLL_EN;
5025 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5026 
5027 		/* enable doorbell? */
5028 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5029 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5030 		if (use_doorbell)
5031 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5032 		else
5033 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5034 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5035 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5036 
5037 		/* disable the queue if it's active */
5038 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5039 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5040 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5041 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5042 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5043 			for (j = 0; j < rdev->usec_timeout; j++) {
5044 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5045 					break;
5046 				udelay(1);
5047 			}
5048 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5049 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5050 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5051 		}
5052 
5053 		/* set the pointer to the MQD */
5054 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5055 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5056 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5057 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5058 		/* set MQD vmid to 0 */
5059 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5060 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5061 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5062 
5063 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5064 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5065 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5066 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5067 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5068 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5069 
5070 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5071 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5072 		mqd->queue_state.cp_hqd_pq_control &=
5073 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5074 
5075 		mqd->queue_state.cp_hqd_pq_control |=
5076 			order_base_2(rdev->ring[idx].ring_size / 8);
5077 		mqd->queue_state.cp_hqd_pq_control |=
5078 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5079 #ifdef __BIG_ENDIAN
5080 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5081 #endif
5082 		mqd->queue_state.cp_hqd_pq_control &=
5083 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5084 		mqd->queue_state.cp_hqd_pq_control |=
5085 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5086 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5087 
5088 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5089 		if (i == 0)
5090 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5091 		else
5092 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5093 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5094 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5095 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5096 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5097 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5098 
5099 		/* set the wb address wether it's enabled or not */
5100 		if (i == 0)
5101 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5102 		else
5103 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5104 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5105 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5106 			upper_32_bits(wb_gpu_addr) & 0xffff;
5107 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5108 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5109 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5110 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5111 
5112 		/* enable the doorbell if requested */
5113 		if (use_doorbell) {
5114 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5115 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5116 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5117 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5118 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5119 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5120 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5121 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5122 
5123 		} else {
5124 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5125 		}
5126 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5127 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5128 
5129 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5130 		rdev->ring[idx].wptr = 0;
5131 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5132 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5133 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5134 
5135 		/* set the vmid for the queue */
5136 		mqd->queue_state.cp_hqd_vmid = 0;
5137 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5138 
5139 		/* activate the queue */
5140 		mqd->queue_state.cp_hqd_active = 1;
5141 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5142 
5143 		cik_srbm_select(rdev, 0, 0, 0, 0);
5144 		mutex_unlock(&rdev->srbm_mutex);
5145 
5146 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5147 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5148 
5149 		rdev->ring[idx].ready = true;
5150 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5151 		if (r)
5152 			rdev->ring[idx].ready = false;
5153 	}
5154 
5155 	return 0;
5156 }
5157 
5158 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5159 {
5160 	cik_cp_gfx_enable(rdev, enable);
5161 	cik_cp_compute_enable(rdev, enable);
5162 }
5163 
5164 static int cik_cp_load_microcode(struct radeon_device *rdev)
5165 {
5166 	int r;
5167 
5168 	r = cik_cp_gfx_load_microcode(rdev);
5169 	if (r)
5170 		return r;
5171 	r = cik_cp_compute_load_microcode(rdev);
5172 	if (r)
5173 		return r;
5174 
5175 	return 0;
5176 }
5177 
5178 static void cik_cp_fini(struct radeon_device *rdev)
5179 {
5180 	cik_cp_gfx_fini(rdev);
5181 	cik_cp_compute_fini(rdev);
5182 }
5183 
5184 static int cik_cp_resume(struct radeon_device *rdev)
5185 {
5186 	int r;
5187 
5188 	cik_enable_gui_idle_interrupt(rdev, false);
5189 
5190 	r = cik_cp_load_microcode(rdev);
5191 	if (r)
5192 		return r;
5193 
5194 	r = cik_cp_gfx_resume(rdev);
5195 	if (r)
5196 		return r;
5197 	r = cik_cp_compute_resume(rdev);
5198 	if (r)
5199 		return r;
5200 
5201 	cik_enable_gui_idle_interrupt(rdev, true);
5202 
5203 	return 0;
5204 }
5205 
5206 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5207 {
5208 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5209 		RREG32(GRBM_STATUS));
5210 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5211 		RREG32(GRBM_STATUS2));
5212 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5213 		RREG32(GRBM_STATUS_SE0));
5214 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5215 		RREG32(GRBM_STATUS_SE1));
5216 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5217 		RREG32(GRBM_STATUS_SE2));
5218 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5219 		RREG32(GRBM_STATUS_SE3));
5220 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5221 		RREG32(SRBM_STATUS));
5222 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5223 		RREG32(SRBM_STATUS2));
5224 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5225 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5226 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5227 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5228 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5229 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5230 		 RREG32(CP_STALLED_STAT1));
5231 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5232 		 RREG32(CP_STALLED_STAT2));
5233 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5234 		 RREG32(CP_STALLED_STAT3));
5235 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5236 		 RREG32(CP_CPF_BUSY_STAT));
5237 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5238 		 RREG32(CP_CPF_STALLED_STAT1));
5239 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5240 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5241 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5242 		 RREG32(CP_CPC_STALLED_STAT1));
5243 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5244 }
5245 
5246 /**
5247  * cik_gpu_check_soft_reset - check which blocks are busy
5248  *
5249  * @rdev: radeon_device pointer
5250  *
5251  * Check which blocks are busy and return the relevant reset
5252  * mask to be used by cik_gpu_soft_reset().
5253  * Returns a mask of the blocks to be reset.
5254  */
5255 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5256 {
5257 	u32 reset_mask = 0;
5258 	u32 tmp;
5259 
5260 	/* GRBM_STATUS */
5261 	tmp = RREG32(GRBM_STATUS);
5262 	if (tmp & (PA_BUSY | SC_BUSY |
5263 		   BCI_BUSY | SX_BUSY |
5264 		   TA_BUSY | VGT_BUSY |
5265 		   DB_BUSY | CB_BUSY |
5266 		   GDS_BUSY | SPI_BUSY |
5267 		   IA_BUSY | IA_BUSY_NO_DMA))
5268 		reset_mask |= RADEON_RESET_GFX;
5269 
5270 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5271 		reset_mask |= RADEON_RESET_CP;
5272 
5273 	/* GRBM_STATUS2 */
5274 	tmp = RREG32(GRBM_STATUS2);
5275 	if (tmp & RLC_BUSY)
5276 		reset_mask |= RADEON_RESET_RLC;
5277 
5278 	/* SDMA0_STATUS_REG */
5279 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5280 	if (!(tmp & SDMA_IDLE))
5281 		reset_mask |= RADEON_RESET_DMA;
5282 
5283 	/* SDMA1_STATUS_REG */
5284 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5285 	if (!(tmp & SDMA_IDLE))
5286 		reset_mask |= RADEON_RESET_DMA1;
5287 
5288 	/* SRBM_STATUS2 */
5289 	tmp = RREG32(SRBM_STATUS2);
5290 	if (tmp & SDMA_BUSY)
5291 		reset_mask |= RADEON_RESET_DMA;
5292 
5293 	if (tmp & SDMA1_BUSY)
5294 		reset_mask |= RADEON_RESET_DMA1;
5295 
5296 	/* SRBM_STATUS */
5297 	tmp = RREG32(SRBM_STATUS);
5298 
5299 	if (tmp & IH_BUSY)
5300 		reset_mask |= RADEON_RESET_IH;
5301 
5302 	if (tmp & SEM_BUSY)
5303 		reset_mask |= RADEON_RESET_SEM;
5304 
5305 	if (tmp & GRBM_RQ_PENDING)
5306 		reset_mask |= RADEON_RESET_GRBM;
5307 
5308 	if (tmp & VMC_BUSY)
5309 		reset_mask |= RADEON_RESET_VMC;
5310 
5311 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5312 		   MCC_BUSY | MCD_BUSY))
5313 		reset_mask |= RADEON_RESET_MC;
5314 
5315 	if (evergreen_is_display_hung(rdev))
5316 		reset_mask |= RADEON_RESET_DISPLAY;
5317 
5318 	/* Skip MC reset as it's mostly likely not hung, just busy */
5319 	if (reset_mask & RADEON_RESET_MC) {
5320 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5321 		reset_mask &= ~RADEON_RESET_MC;
5322 	}
5323 
5324 	return reset_mask;
5325 }
5326 
5327 /**
5328  * cik_gpu_soft_reset - soft reset GPU
5329  *
5330  * @rdev: radeon_device pointer
5331  * @reset_mask: mask of which blocks to reset
5332  *
5333  * Soft reset the blocks specified in @reset_mask.
5334  */
5335 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5336 {
5337 	struct evergreen_mc_save save;
5338 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5339 	u32 tmp;
5340 
5341 	if (reset_mask == 0)
5342 		return;
5343 
5344 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5345 
5346 	cik_print_gpu_status_regs(rdev);
5347 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5348 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5349 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5350 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5351 
5352 	/* disable CG/PG */
5353 	cik_fini_pg(rdev);
5354 	cik_fini_cg(rdev);
5355 
5356 	/* stop the rlc */
5357 	cik_rlc_stop(rdev);
5358 
5359 	/* Disable GFX parsing/prefetching */
5360 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5361 
5362 	/* Disable MEC parsing/prefetching */
5363 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5364 
5365 	if (reset_mask & RADEON_RESET_DMA) {
5366 		/* sdma0 */
5367 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5368 		tmp |= SDMA_HALT;
5369 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5370 	}
5371 	if (reset_mask & RADEON_RESET_DMA1) {
5372 		/* sdma1 */
5373 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5374 		tmp |= SDMA_HALT;
5375 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5376 	}
5377 
5378 	evergreen_mc_stop(rdev, &save);
5379 	if (evergreen_mc_wait_for_idle(rdev)) {
5380 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5381 	}
5382 
5383 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5384 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5385 
5386 	if (reset_mask & RADEON_RESET_CP) {
5387 		grbm_soft_reset |= SOFT_RESET_CP;
5388 
5389 		srbm_soft_reset |= SOFT_RESET_GRBM;
5390 	}
5391 
5392 	if (reset_mask & RADEON_RESET_DMA)
5393 		srbm_soft_reset |= SOFT_RESET_SDMA;
5394 
5395 	if (reset_mask & RADEON_RESET_DMA1)
5396 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5397 
5398 	if (reset_mask & RADEON_RESET_DISPLAY)
5399 		srbm_soft_reset |= SOFT_RESET_DC;
5400 
5401 	if (reset_mask & RADEON_RESET_RLC)
5402 		grbm_soft_reset |= SOFT_RESET_RLC;
5403 
5404 	if (reset_mask & RADEON_RESET_SEM)
5405 		srbm_soft_reset |= SOFT_RESET_SEM;
5406 
5407 	if (reset_mask & RADEON_RESET_IH)
5408 		srbm_soft_reset |= SOFT_RESET_IH;
5409 
5410 	if (reset_mask & RADEON_RESET_GRBM)
5411 		srbm_soft_reset |= SOFT_RESET_GRBM;
5412 
5413 	if (reset_mask & RADEON_RESET_VMC)
5414 		srbm_soft_reset |= SOFT_RESET_VMC;
5415 
5416 	if (!(rdev->flags & RADEON_IS_IGP)) {
5417 		if (reset_mask & RADEON_RESET_MC)
5418 			srbm_soft_reset |= SOFT_RESET_MC;
5419 	}
5420 
5421 	if (grbm_soft_reset) {
5422 		tmp = RREG32(GRBM_SOFT_RESET);
5423 		tmp |= grbm_soft_reset;
5424 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5425 		WREG32(GRBM_SOFT_RESET, tmp);
5426 		tmp = RREG32(GRBM_SOFT_RESET);
5427 
5428 		udelay(50);
5429 
5430 		tmp &= ~grbm_soft_reset;
5431 		WREG32(GRBM_SOFT_RESET, tmp);
5432 		tmp = RREG32(GRBM_SOFT_RESET);
5433 	}
5434 
5435 	if (srbm_soft_reset) {
5436 		tmp = RREG32(SRBM_SOFT_RESET);
5437 		tmp |= srbm_soft_reset;
5438 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5439 		WREG32(SRBM_SOFT_RESET, tmp);
5440 		tmp = RREG32(SRBM_SOFT_RESET);
5441 
5442 		udelay(50);
5443 
5444 		tmp &= ~srbm_soft_reset;
5445 		WREG32(SRBM_SOFT_RESET, tmp);
5446 		tmp = RREG32(SRBM_SOFT_RESET);
5447 	}
5448 
5449 	/* Wait a little for things to settle down */
5450 	udelay(50);
5451 
5452 	evergreen_mc_resume(rdev, &save);
5453 	udelay(50);
5454 
5455 	cik_print_gpu_status_regs(rdev);
5456 }
5457 
5458 struct kv_reset_save_regs {
5459 	u32 gmcon_reng_execute;
5460 	u32 gmcon_misc;
5461 	u32 gmcon_misc3;
5462 };
5463 
5464 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5465 				   struct kv_reset_save_regs *save)
5466 {
5467 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5468 	save->gmcon_misc = RREG32(GMCON_MISC);
5469 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5470 
5471 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5472 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5473 						STCTRL_STUTTER_EN));
5474 }
5475 
5476 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5477 				      struct kv_reset_save_regs *save)
5478 {
5479 	int i;
5480 
5481 	WREG32(GMCON_PGFSM_WRITE, 0);
5482 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5483 
5484 	for (i = 0; i < 5; i++)
5485 		WREG32(GMCON_PGFSM_WRITE, 0);
5486 
5487 	WREG32(GMCON_PGFSM_WRITE, 0);
5488 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5489 
5490 	for (i = 0; i < 5; i++)
5491 		WREG32(GMCON_PGFSM_WRITE, 0);
5492 
5493 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5494 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5495 
5496 	for (i = 0; i < 5; i++)
5497 		WREG32(GMCON_PGFSM_WRITE, 0);
5498 
5499 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5500 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5501 
5502 	for (i = 0; i < 5; i++)
5503 		WREG32(GMCON_PGFSM_WRITE, 0);
5504 
5505 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5506 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5507 
5508 	for (i = 0; i < 5; i++)
5509 		WREG32(GMCON_PGFSM_WRITE, 0);
5510 
5511 	WREG32(GMCON_PGFSM_WRITE, 0);
5512 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5513 
5514 	for (i = 0; i < 5; i++)
5515 		WREG32(GMCON_PGFSM_WRITE, 0);
5516 
5517 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5518 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5519 
5520 	for (i = 0; i < 5; i++)
5521 		WREG32(GMCON_PGFSM_WRITE, 0);
5522 
5523 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5524 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5525 
5526 	for (i = 0; i < 5; i++)
5527 		WREG32(GMCON_PGFSM_WRITE, 0);
5528 
5529 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5530 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5531 
5532 	for (i = 0; i < 5; i++)
5533 		WREG32(GMCON_PGFSM_WRITE, 0);
5534 
5535 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5536 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5537 
5538 	for (i = 0; i < 5; i++)
5539 		WREG32(GMCON_PGFSM_WRITE, 0);
5540 
5541 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5542 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5543 
5544 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5545 	WREG32(GMCON_MISC, save->gmcon_misc);
5546 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5547 }
5548 
5549 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5550 {
5551 	struct evergreen_mc_save save;
5552 	struct kv_reset_save_regs kv_save = { 0 };
5553 	u32 tmp, i;
5554 
5555 	dev_info(rdev->dev, "GPU pci config reset\n");
5556 
5557 	/* disable dpm? */
5558 
5559 	/* disable cg/pg */
5560 	cik_fini_pg(rdev);
5561 	cik_fini_cg(rdev);
5562 
5563 	/* Disable GFX parsing/prefetching */
5564 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5565 
5566 	/* Disable MEC parsing/prefetching */
5567 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5568 
5569 	/* sdma0 */
5570 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5571 	tmp |= SDMA_HALT;
5572 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5573 	/* sdma1 */
5574 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5575 	tmp |= SDMA_HALT;
5576 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5577 	/* XXX other engines? */
5578 
5579 	/* halt the rlc, disable cp internal ints */
5580 	cik_rlc_stop(rdev);
5581 
5582 	udelay(50);
5583 
5584 	/* disable mem access */
5585 	evergreen_mc_stop(rdev, &save);
5586 	if (evergreen_mc_wait_for_idle(rdev)) {
5587 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5588 	}
5589 
5590 	if (rdev->flags & RADEON_IS_IGP)
5591 		kv_save_regs_for_reset(rdev, &kv_save);
5592 
5593 	/* disable BM */
5594 	pci_clear_master(rdev->pdev);
5595 	/* reset */
5596 	radeon_pci_config_reset(rdev);
5597 
5598 	udelay(100);
5599 
5600 	/* wait for asic to come out of reset */
5601 	for (i = 0; i < rdev->usec_timeout; i++) {
5602 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5603 			break;
5604 		udelay(1);
5605 	}
5606 
5607 	/* does asic init need to be run first??? */
5608 	if (rdev->flags & RADEON_IS_IGP)
5609 		kv_restore_regs_for_reset(rdev, &kv_save);
5610 }
5611 
5612 /**
5613  * cik_asic_reset - soft reset GPU
5614  *
5615  * @rdev: radeon_device pointer
5616  *
5617  * Look up which blocks are hung and attempt
5618  * to reset them.
5619  * Returns 0 for success.
5620  */
5621 int cik_asic_reset(struct radeon_device *rdev)
5622 {
5623 	u32 reset_mask;
5624 
5625 	reset_mask = cik_gpu_check_soft_reset(rdev);
5626 
5627 	if (reset_mask)
5628 		r600_set_bios_scratch_engine_hung(rdev, true);
5629 
5630 	/* try soft reset */
5631 	cik_gpu_soft_reset(rdev, reset_mask);
5632 
5633 	reset_mask = cik_gpu_check_soft_reset(rdev);
5634 
5635 	/* try pci config reset */
5636 	if (reset_mask && radeon_hard_reset)
5637 		cik_gpu_pci_config_reset(rdev);
5638 
5639 	reset_mask = cik_gpu_check_soft_reset(rdev);
5640 
5641 	if (!reset_mask)
5642 		r600_set_bios_scratch_engine_hung(rdev, false);
5643 
5644 	return 0;
5645 }
5646 
5647 /**
5648  * cik_gfx_is_lockup - check if the 3D engine is locked up
5649  *
5650  * @rdev: radeon_device pointer
5651  * @ring: radeon_ring structure holding ring information
5652  *
5653  * Check if the 3D engine is locked up (CIK).
5654  * Returns true if the engine is locked, false if not.
5655  */
5656 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5657 {
5658 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5659 
5660 	if (!(reset_mask & (RADEON_RESET_GFX |
5661 			    RADEON_RESET_COMPUTE |
5662 			    RADEON_RESET_CP))) {
5663 		radeon_ring_lockup_update(rdev, ring);
5664 		return false;
5665 	}
5666 	return radeon_ring_test_lockup(rdev, ring);
5667 }
5668 
5669 /* MC */
5670 /**
5671  * cik_mc_program - program the GPU memory controller
5672  *
5673  * @rdev: radeon_device pointer
5674  *
5675  * Set the location of vram, gart, and AGP in the GPU's
5676  * physical address space (CIK).
5677  */
5678 static void cik_mc_program(struct radeon_device *rdev)
5679 {
5680 	struct evergreen_mc_save save;
5681 	u32 tmp;
5682 	int i, j;
5683 
5684 	/* Initialize HDP */
5685 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5686 		WREG32((0x2c14 + j), 0x00000000);
5687 		WREG32((0x2c18 + j), 0x00000000);
5688 		WREG32((0x2c1c + j), 0x00000000);
5689 		WREG32((0x2c20 + j), 0x00000000);
5690 		WREG32((0x2c24 + j), 0x00000000);
5691 	}
5692 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5693 
5694 	evergreen_mc_stop(rdev, &save);
5695 	if (radeon_mc_wait_for_idle(rdev)) {
5696 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5697 	}
5698 	/* Lockout access through VGA aperture*/
5699 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5700 	/* Update configuration */
5701 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5702 	       rdev->mc.vram_start >> 12);
5703 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5704 	       rdev->mc.vram_end >> 12);
5705 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5706 	       rdev->vram_scratch.gpu_addr >> 12);
5707 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5708 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5709 	WREG32(MC_VM_FB_LOCATION, tmp);
5710 	/* XXX double check these! */
5711 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5712 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5713 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5714 	WREG32(MC_VM_AGP_BASE, 0);
5715 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5716 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5717 	if (radeon_mc_wait_for_idle(rdev)) {
5718 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5719 	}
5720 	evergreen_mc_resume(rdev, &save);
5721 	/* we need to own VRAM, so turn off the VGA renderer here
5722 	 * to stop it overwriting our objects */
5723 	rv515_vga_render_disable(rdev);
5724 }
5725 
5726 /**
5727  * cik_mc_init - initialize the memory controller driver params
5728  *
5729  * @rdev: radeon_device pointer
5730  *
5731  * Look up the amount of vram, vram width, and decide how to place
5732  * vram and gart within the GPU's physical address space (CIK).
5733  * Returns 0 for success.
5734  */
5735 static int cik_mc_init(struct radeon_device *rdev)
5736 {
5737 	u32 tmp;
5738 	int chansize, numchan;
5739 
5740 	/* Get VRAM informations */
5741 	rdev->mc.vram_is_ddr = true;
5742 	tmp = RREG32(MC_ARB_RAMCFG);
5743 	if (tmp & CHANSIZE_MASK) {
5744 		chansize = 64;
5745 	} else {
5746 		chansize = 32;
5747 	}
5748 	tmp = RREG32(MC_SHARED_CHMAP);
5749 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5750 	case 0:
5751 	default:
5752 		numchan = 1;
5753 		break;
5754 	case 1:
5755 		numchan = 2;
5756 		break;
5757 	case 2:
5758 		numchan = 4;
5759 		break;
5760 	case 3:
5761 		numchan = 8;
5762 		break;
5763 	case 4:
5764 		numchan = 3;
5765 		break;
5766 	case 5:
5767 		numchan = 6;
5768 		break;
5769 	case 6:
5770 		numchan = 10;
5771 		break;
5772 	case 7:
5773 		numchan = 12;
5774 		break;
5775 	case 8:
5776 		numchan = 16;
5777 		break;
5778 	}
5779 	rdev->mc.vram_width = numchan * chansize;
5780 	/* Could aper size report 0 ? */
5781 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5782 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5783 	/* size in MB on si */
5784 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5785 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5786 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5787 	si_vram_gtt_location(rdev, &rdev->mc);
5788 	radeon_update_bandwidth_info(rdev);
5789 
5790 	return 0;
5791 }
5792 
5793 /*
5794  * GART
5795  * VMID 0 is the physical GPU addresses as used by the kernel.
5796  * VMIDs 1-15 are used for userspace clients and are handled
5797  * by the radeon vm/hsa code.
5798  */
5799 /**
5800  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5801  *
5802  * @rdev: radeon_device pointer
5803  *
5804  * Flush the TLB for the VMID 0 page table (CIK).
5805  */
5806 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5807 {
5808 	/* flush hdp cache */
5809 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5810 
5811 	/* bits 0-15 are the VM contexts0-15 */
5812 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5813 }
5814 
5815 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5816 {
5817 	int i;
5818 	uint32_t sh_mem_bases, sh_mem_config;
5819 
5820 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5821 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5822 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5823 
5824 	mutex_lock(&rdev->srbm_mutex);
5825 	for (i = 8; i < 16; i++) {
5826 		cik_srbm_select(rdev, 0, 0, 0, i);
5827 		/* CP and shaders */
5828 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5829 		WREG32(SH_MEM_APE1_BASE, 1);
5830 		WREG32(SH_MEM_APE1_LIMIT, 0);
5831 		WREG32(SH_MEM_BASES, sh_mem_bases);
5832 	}
5833 	cik_srbm_select(rdev, 0, 0, 0, 0);
5834 	mutex_unlock(&rdev->srbm_mutex);
5835 }
5836 
5837 /**
5838  * cik_pcie_gart_enable - gart enable
5839  *
5840  * @rdev: radeon_device pointer
5841  *
5842  * This sets up the TLBs, programs the page tables for VMID0,
5843  * sets up the hw for VMIDs 1-15 which are allocated on
5844  * demand, and sets up the global locations for the LDS, GDS,
5845  * and GPUVM for FSA64 clients (CIK).
5846  * Returns 0 for success, errors for failure.
5847  */
5848 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5849 {
5850 	int r, i;
5851 
5852 	if (rdev->gart.robj == NULL) {
5853 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5854 		return -EINVAL;
5855 	}
5856 	r = radeon_gart_table_vram_pin(rdev);
5857 	if (r)
5858 		return r;
5859 	/* Setup TLB control */
5860 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5861 	       (0xA << 7) |
5862 	       ENABLE_L1_TLB |
5863 	       ENABLE_L1_FRAGMENT_PROCESSING |
5864 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5865 	       ENABLE_ADVANCED_DRIVER_MODEL |
5866 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5867 	/* Setup L2 cache */
5868 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5869 	       ENABLE_L2_FRAGMENT_PROCESSING |
5870 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5871 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5872 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5873 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5874 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5875 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5876 	       BANK_SELECT(4) |
5877 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5878 	/* setup context0 */
5879 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5880 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5881 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5882 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5883 			(u32)(rdev->dummy_page.addr >> 12));
5884 	WREG32(VM_CONTEXT0_CNTL2, 0);
5885 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5886 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5887 
5888 	WREG32(0x15D4, 0);
5889 	WREG32(0x15D8, 0);
5890 	WREG32(0x15DC, 0);
5891 
5892 	/* restore context1-15 */
5893 	/* set vm size, must be a multiple of 4 */
5894 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5895 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5896 	for (i = 1; i < 16; i++) {
5897 		if (i < 8)
5898 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5899 			       rdev->vm_manager.saved_table_addr[i]);
5900 		else
5901 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5902 			       rdev->vm_manager.saved_table_addr[i]);
5903 	}
5904 
5905 	/* enable context1-15 */
5906 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5907 	       (u32)(rdev->dummy_page.addr >> 12));
5908 	WREG32(VM_CONTEXT1_CNTL2, 4);
5909 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5910 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5911 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5912 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5913 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5914 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5915 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5916 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5917 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5918 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5919 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5920 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5921 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5922 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5923 
5924 	if (rdev->family == CHIP_KAVERI) {
5925 		u32 tmp = RREG32(CHUB_CONTROL);
5926 		tmp &= ~BYPASS_VM;
5927 		WREG32(CHUB_CONTROL, tmp);
5928 	}
5929 
5930 	/* XXX SH_MEM regs */
5931 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5932 	mutex_lock(&rdev->srbm_mutex);
5933 	for (i = 0; i < 16; i++) {
5934 		cik_srbm_select(rdev, 0, 0, 0, i);
5935 		/* CP and shaders */
5936 		WREG32(SH_MEM_CONFIG, 0);
5937 		WREG32(SH_MEM_APE1_BASE, 1);
5938 		WREG32(SH_MEM_APE1_LIMIT, 0);
5939 		WREG32(SH_MEM_BASES, 0);
5940 		/* SDMA GFX */
5941 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5942 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5943 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5944 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5945 		/* XXX SDMA RLC - todo */
5946 	}
5947 	cik_srbm_select(rdev, 0, 0, 0, 0);
5948 	mutex_unlock(&rdev->srbm_mutex);
5949 
5950 	cik_pcie_init_compute_vmid(rdev);
5951 
5952 	cik_pcie_gart_tlb_flush(rdev);
5953 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5954 		 (unsigned)(rdev->mc.gtt_size >> 20),
5955 		 (unsigned long long)rdev->gart.table_addr);
5956 	rdev->gart.ready = true;
5957 	return 0;
5958 }
5959 
5960 /**
5961  * cik_pcie_gart_disable - gart disable
5962  *
5963  * @rdev: radeon_device pointer
5964  *
5965  * This disables all VM page table (CIK).
5966  */
5967 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5968 {
5969 	unsigned i;
5970 
5971 	for (i = 1; i < 16; ++i) {
5972 		uint32_t reg;
5973 		if (i < 8)
5974 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5975 		else
5976 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5977 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5978 	}
5979 
5980 	/* Disable all tables */
5981 	WREG32(VM_CONTEXT0_CNTL, 0);
5982 	WREG32(VM_CONTEXT1_CNTL, 0);
5983 	/* Setup TLB control */
5984 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5985 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5986 	/* Setup L2 cache */
5987 	WREG32(VM_L2_CNTL,
5988 	       ENABLE_L2_FRAGMENT_PROCESSING |
5989 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5990 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5991 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5992 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5993 	WREG32(VM_L2_CNTL2, 0);
5994 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5995 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5996 	radeon_gart_table_vram_unpin(rdev);
5997 }
5998 
5999 /**
6000  * cik_pcie_gart_fini - vm fini callback
6001  *
6002  * @rdev: radeon_device pointer
6003  *
6004  * Tears down the driver GART/VM setup (CIK).
6005  */
6006 static void cik_pcie_gart_fini(struct radeon_device *rdev)
6007 {
6008 	cik_pcie_gart_disable(rdev);
6009 	radeon_gart_table_vram_free(rdev);
6010 	radeon_gart_fini(rdev);
6011 }
6012 
6013 /* vm parser */
6014 /**
6015  * cik_ib_parse - vm ib_parse callback
6016  *
6017  * @rdev: radeon_device pointer
6018  * @ib: indirect buffer pointer
6019  *
6020  * CIK uses hw IB checking so this is a nop (CIK).
6021  */
6022 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6023 {
6024 	return 0;
6025 }
6026 
6027 /*
6028  * vm
6029  * VMID 0 is the physical GPU addresses as used by the kernel.
6030  * VMIDs 1-15 are used for userspace clients and are handled
6031  * by the radeon vm/hsa code.
6032  */
6033 /**
6034  * cik_vm_init - cik vm init callback
6035  *
6036  * @rdev: radeon_device pointer
6037  *
6038  * Inits cik specific vm parameters (number of VMs, base of vram for
6039  * VMIDs 1-15) (CIK).
6040  * Returns 0 for success.
6041  */
6042 int cik_vm_init(struct radeon_device *rdev)
6043 {
6044 	/*
6045 	 * number of VMs
6046 	 * VMID 0 is reserved for System
6047 	 * radeon graphics/compute will use VMIDs 1-7
6048 	 * amdkfd will use VMIDs 8-15
6049 	 */
6050 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6051 	/* base offset of vram pages */
6052 	if (rdev->flags & RADEON_IS_IGP) {
6053 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6054 		tmp <<= 22;
6055 		rdev->vm_manager.vram_base_offset = tmp;
6056 	} else
6057 		rdev->vm_manager.vram_base_offset = 0;
6058 
6059 	return 0;
6060 }
6061 
6062 /**
6063  * cik_vm_fini - cik vm fini callback
6064  *
6065  * @rdev: radeon_device pointer
6066  *
6067  * Tear down any asic specific VM setup (CIK).
6068  */
6069 void cik_vm_fini(struct radeon_device *rdev)
6070 {
6071 }
6072 
6073 /**
6074  * cik_vm_decode_fault - print human readable fault info
6075  *
6076  * @rdev: radeon_device pointer
6077  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6078  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6079  *
6080  * Print human readable fault information (CIK).
6081  */
6082 static void cik_vm_decode_fault(struct radeon_device *rdev,
6083 				u32 status, u32 addr, u32 mc_client)
6084 {
6085 	u32 mc_id;
6086 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6087 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6088 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6089 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6090 
6091 	if (rdev->family == CHIP_HAWAII)
6092 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6093 	else
6094 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6095 
6096 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6097 	       protections, vmid, addr,
6098 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6099 	       block, mc_client, mc_id);
6100 }
6101 
6102 /**
6103  * cik_vm_flush - cik vm flush using the CP
6104  *
6105  * @rdev: radeon_device pointer
6106  *
6107  * Update the page table base and flush the VM TLB
6108  * using the CP (CIK).
6109  */
6110 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6111 		  unsigned vm_id, uint64_t pd_addr)
6112 {
6113 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6114 
6115 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6116 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6117 				 WRITE_DATA_DST_SEL(0)));
6118 	if (vm_id < 8) {
6119 		radeon_ring_write(ring,
6120 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6121 	} else {
6122 		radeon_ring_write(ring,
6123 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6124 	}
6125 	radeon_ring_write(ring, 0);
6126 	radeon_ring_write(ring, pd_addr >> 12);
6127 
6128 	/* update SH_MEM_* regs */
6129 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6130 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6131 				 WRITE_DATA_DST_SEL(0)));
6132 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6133 	radeon_ring_write(ring, 0);
6134 	radeon_ring_write(ring, VMID(vm_id));
6135 
6136 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6137 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6138 				 WRITE_DATA_DST_SEL(0)));
6139 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6140 	radeon_ring_write(ring, 0);
6141 
6142 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6143 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6144 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6145 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6146 
6147 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6148 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6149 				 WRITE_DATA_DST_SEL(0)));
6150 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6151 	radeon_ring_write(ring, 0);
6152 	radeon_ring_write(ring, VMID(0));
6153 
6154 	/* HDP flush */
6155 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6156 
6157 	/* bits 0-15 are the VM contexts0-15 */
6158 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6159 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6160 				 WRITE_DATA_DST_SEL(0)));
6161 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6162 	radeon_ring_write(ring, 0);
6163 	radeon_ring_write(ring, 1 << vm_id);
6164 
6165 	/* wait for the invalidate to complete */
6166 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6167 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6168 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6169 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6170 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6171 	radeon_ring_write(ring, 0);
6172 	radeon_ring_write(ring, 0); /* ref */
6173 	radeon_ring_write(ring, 0); /* mask */
6174 	radeon_ring_write(ring, 0x20); /* poll interval */
6175 
6176 	/* compute doesn't have PFP */
6177 	if (usepfp) {
6178 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6179 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6180 		radeon_ring_write(ring, 0x0);
6181 	}
6182 }
6183 
6184 /*
6185  * RLC
6186  * The RLC is a multi-purpose microengine that handles a
6187  * variety of functions, the most important of which is
6188  * the interrupt controller.
6189  */
6190 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6191 					  bool enable)
6192 {
6193 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6194 
6195 	if (enable)
6196 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6197 	else
6198 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6199 	WREG32(CP_INT_CNTL_RING0, tmp);
6200 }
6201 
6202 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6203 {
6204 	u32 tmp;
6205 
6206 	tmp = RREG32(RLC_LB_CNTL);
6207 	if (enable)
6208 		tmp |= LOAD_BALANCE_ENABLE;
6209 	else
6210 		tmp &= ~LOAD_BALANCE_ENABLE;
6211 	WREG32(RLC_LB_CNTL, tmp);
6212 }
6213 
6214 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6215 {
6216 	u32 i, j, k;
6217 	u32 mask;
6218 
6219 	mutex_lock(&rdev->grbm_idx_mutex);
6220 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6221 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6222 			cik_select_se_sh(rdev, i, j);
6223 			for (k = 0; k < rdev->usec_timeout; k++) {
6224 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6225 					break;
6226 				udelay(1);
6227 			}
6228 		}
6229 	}
6230 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6231 	mutex_unlock(&rdev->grbm_idx_mutex);
6232 
6233 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6234 	for (k = 0; k < rdev->usec_timeout; k++) {
6235 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6236 			break;
6237 		udelay(1);
6238 	}
6239 }
6240 
6241 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6242 {
6243 	u32 tmp;
6244 
6245 	tmp = RREG32(RLC_CNTL);
6246 	if (tmp != rlc)
6247 		WREG32(RLC_CNTL, rlc);
6248 }
6249 
6250 static u32 cik_halt_rlc(struct radeon_device *rdev)
6251 {
6252 	u32 data, orig;
6253 
6254 	orig = data = RREG32(RLC_CNTL);
6255 
6256 	if (data & RLC_ENABLE) {
6257 		u32 i;
6258 
6259 		data &= ~RLC_ENABLE;
6260 		WREG32(RLC_CNTL, data);
6261 
6262 		for (i = 0; i < rdev->usec_timeout; i++) {
6263 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6264 				break;
6265 			udelay(1);
6266 		}
6267 
6268 		cik_wait_for_rlc_serdes(rdev);
6269 	}
6270 
6271 	return orig;
6272 }
6273 
6274 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6275 {
6276 	u32 tmp, i, mask;
6277 
6278 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6279 	WREG32(RLC_GPR_REG2, tmp);
6280 
6281 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6282 	for (i = 0; i < rdev->usec_timeout; i++) {
6283 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6284 			break;
6285 		udelay(1);
6286 	}
6287 
6288 	for (i = 0; i < rdev->usec_timeout; i++) {
6289 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6290 			break;
6291 		udelay(1);
6292 	}
6293 }
6294 
6295 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6296 {
6297 	u32 tmp;
6298 
6299 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6300 	WREG32(RLC_GPR_REG2, tmp);
6301 }
6302 
6303 /**
6304  * cik_rlc_stop - stop the RLC ME
6305  *
6306  * @rdev: radeon_device pointer
6307  *
6308  * Halt the RLC ME (MicroEngine) (CIK).
6309  */
6310 static void cik_rlc_stop(struct radeon_device *rdev)
6311 {
6312 	WREG32(RLC_CNTL, 0);
6313 
6314 	cik_enable_gui_idle_interrupt(rdev, false);
6315 
6316 	cik_wait_for_rlc_serdes(rdev);
6317 }
6318 
6319 /**
6320  * cik_rlc_start - start the RLC ME
6321  *
6322  * @rdev: radeon_device pointer
6323  *
6324  * Unhalt the RLC ME (MicroEngine) (CIK).
6325  */
6326 static void cik_rlc_start(struct radeon_device *rdev)
6327 {
6328 	WREG32(RLC_CNTL, RLC_ENABLE);
6329 
6330 	cik_enable_gui_idle_interrupt(rdev, true);
6331 
6332 	udelay(50);
6333 }
6334 
6335 /**
6336  * cik_rlc_resume - setup the RLC hw
6337  *
6338  * @rdev: radeon_device pointer
6339  *
6340  * Initialize the RLC registers, load the ucode,
6341  * and start the RLC (CIK).
6342  * Returns 0 for success, -EINVAL if the ucode is not available.
6343  */
6344 static int cik_rlc_resume(struct radeon_device *rdev)
6345 {
6346 	u32 i, size, tmp;
6347 
6348 	if (!rdev->rlc_fw)
6349 		return -EINVAL;
6350 
6351 	cik_rlc_stop(rdev);
6352 
6353 	/* disable CG */
6354 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6355 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6356 
6357 	si_rlc_reset(rdev);
6358 
6359 	cik_init_pg(rdev);
6360 
6361 	cik_init_cg(rdev);
6362 
6363 	WREG32(RLC_LB_CNTR_INIT, 0);
6364 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6365 
6366 	mutex_lock(&rdev->grbm_idx_mutex);
6367 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6368 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6369 	WREG32(RLC_LB_PARAMS, 0x00600408);
6370 	WREG32(RLC_LB_CNTL, 0x80000004);
6371 	mutex_unlock(&rdev->grbm_idx_mutex);
6372 
6373 	WREG32(RLC_MC_CNTL, 0);
6374 	WREG32(RLC_UCODE_CNTL, 0);
6375 
6376 	if (rdev->new_fw) {
6377 		const struct rlc_firmware_header_v1_0 *hdr =
6378 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6379 		const __le32 *fw_data = (const __le32 *)
6380 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6381 
6382 		radeon_ucode_print_rlc_hdr(&hdr->header);
6383 
6384 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6385 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6386 		for (i = 0; i < size; i++)
6387 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6388 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6389 	} else {
6390 		const __be32 *fw_data;
6391 
6392 		switch (rdev->family) {
6393 		case CHIP_BONAIRE:
6394 		case CHIP_HAWAII:
6395 		default:
6396 			size = BONAIRE_RLC_UCODE_SIZE;
6397 			break;
6398 		case CHIP_KAVERI:
6399 			size = KV_RLC_UCODE_SIZE;
6400 			break;
6401 		case CHIP_KABINI:
6402 			size = KB_RLC_UCODE_SIZE;
6403 			break;
6404 		case CHIP_MULLINS:
6405 			size = ML_RLC_UCODE_SIZE;
6406 			break;
6407 		}
6408 
6409 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6410 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6411 		for (i = 0; i < size; i++)
6412 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6413 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6414 	}
6415 
6416 	/* XXX - find out what chips support lbpw */
6417 	cik_enable_lbpw(rdev, false);
6418 
6419 	if (rdev->family == CHIP_BONAIRE)
6420 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6421 
6422 	cik_rlc_start(rdev);
6423 
6424 	return 0;
6425 }
6426 
6427 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6428 {
6429 	u32 data, orig, tmp, tmp2;
6430 
6431 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6432 
6433 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6434 		cik_enable_gui_idle_interrupt(rdev, true);
6435 
6436 		tmp = cik_halt_rlc(rdev);
6437 
6438 		mutex_lock(&rdev->grbm_idx_mutex);
6439 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6440 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6441 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6442 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6443 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6444 		mutex_unlock(&rdev->grbm_idx_mutex);
6445 
6446 		cik_update_rlc(rdev, tmp);
6447 
6448 		data |= CGCG_EN | CGLS_EN;
6449 	} else {
6450 		cik_enable_gui_idle_interrupt(rdev, false);
6451 
6452 		RREG32(CB_CGTT_SCLK_CTRL);
6453 		RREG32(CB_CGTT_SCLK_CTRL);
6454 		RREG32(CB_CGTT_SCLK_CTRL);
6455 		RREG32(CB_CGTT_SCLK_CTRL);
6456 
6457 		data &= ~(CGCG_EN | CGLS_EN);
6458 	}
6459 
6460 	if (orig != data)
6461 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6462 
6463 }
6464 
6465 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6466 {
6467 	u32 data, orig, tmp = 0;
6468 
6469 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6470 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6471 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6472 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6473 				data |= CP_MEM_LS_EN;
6474 				if (orig != data)
6475 					WREG32(CP_MEM_SLP_CNTL, data);
6476 			}
6477 		}
6478 
6479 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6480 		data |= 0x00000001;
6481 		data &= 0xfffffffd;
6482 		if (orig != data)
6483 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6484 
6485 		tmp = cik_halt_rlc(rdev);
6486 
6487 		mutex_lock(&rdev->grbm_idx_mutex);
6488 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6489 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6490 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6491 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6492 		WREG32(RLC_SERDES_WR_CTRL, data);
6493 		mutex_unlock(&rdev->grbm_idx_mutex);
6494 
6495 		cik_update_rlc(rdev, tmp);
6496 
6497 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6498 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6499 			data &= ~SM_MODE_MASK;
6500 			data |= SM_MODE(0x2);
6501 			data |= SM_MODE_ENABLE;
6502 			data &= ~CGTS_OVERRIDE;
6503 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6504 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6505 				data &= ~CGTS_LS_OVERRIDE;
6506 			data &= ~ON_MONITOR_ADD_MASK;
6507 			data |= ON_MONITOR_ADD_EN;
6508 			data |= ON_MONITOR_ADD(0x96);
6509 			if (orig != data)
6510 				WREG32(CGTS_SM_CTRL_REG, data);
6511 		}
6512 	} else {
6513 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6514 		data |= 0x00000003;
6515 		if (orig != data)
6516 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6517 
6518 		data = RREG32(RLC_MEM_SLP_CNTL);
6519 		if (data & RLC_MEM_LS_EN) {
6520 			data &= ~RLC_MEM_LS_EN;
6521 			WREG32(RLC_MEM_SLP_CNTL, data);
6522 		}
6523 
6524 		data = RREG32(CP_MEM_SLP_CNTL);
6525 		if (data & CP_MEM_LS_EN) {
6526 			data &= ~CP_MEM_LS_EN;
6527 			WREG32(CP_MEM_SLP_CNTL, data);
6528 		}
6529 
6530 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6531 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6532 		if (orig != data)
6533 			WREG32(CGTS_SM_CTRL_REG, data);
6534 
6535 		tmp = cik_halt_rlc(rdev);
6536 
6537 		mutex_lock(&rdev->grbm_idx_mutex);
6538 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6539 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6540 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6541 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6542 		WREG32(RLC_SERDES_WR_CTRL, data);
6543 		mutex_unlock(&rdev->grbm_idx_mutex);
6544 
6545 		cik_update_rlc(rdev, tmp);
6546 	}
6547 }
6548 
6549 static const u32 mc_cg_registers[] =
6550 {
6551 	MC_HUB_MISC_HUB_CG,
6552 	MC_HUB_MISC_SIP_CG,
6553 	MC_HUB_MISC_VM_CG,
6554 	MC_XPB_CLK_GAT,
6555 	ATC_MISC_CG,
6556 	MC_CITF_MISC_WR_CG,
6557 	MC_CITF_MISC_RD_CG,
6558 	MC_CITF_MISC_VM_CG,
6559 	VM_L2_CG,
6560 };
6561 
6562 static void cik_enable_mc_ls(struct radeon_device *rdev,
6563 			     bool enable)
6564 {
6565 	int i;
6566 	u32 orig, data;
6567 
6568 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6569 		orig = data = RREG32(mc_cg_registers[i]);
6570 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6571 			data |= MC_LS_ENABLE;
6572 		else
6573 			data &= ~MC_LS_ENABLE;
6574 		if (data != orig)
6575 			WREG32(mc_cg_registers[i], data);
6576 	}
6577 }
6578 
6579 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6580 			       bool enable)
6581 {
6582 	int i;
6583 	u32 orig, data;
6584 
6585 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6586 		orig = data = RREG32(mc_cg_registers[i]);
6587 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6588 			data |= MC_CG_ENABLE;
6589 		else
6590 			data &= ~MC_CG_ENABLE;
6591 		if (data != orig)
6592 			WREG32(mc_cg_registers[i], data);
6593 	}
6594 }
6595 
6596 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6597 				 bool enable)
6598 {
6599 	u32 orig, data;
6600 
6601 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6602 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6603 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6604 	} else {
6605 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6606 		data |= 0xff000000;
6607 		if (data != orig)
6608 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6609 
6610 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6611 		data |= 0xff000000;
6612 		if (data != orig)
6613 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6614 	}
6615 }
6616 
6617 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6618 				 bool enable)
6619 {
6620 	u32 orig, data;
6621 
6622 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6623 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6624 		data |= 0x100;
6625 		if (orig != data)
6626 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6627 
6628 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6629 		data |= 0x100;
6630 		if (orig != data)
6631 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6632 	} else {
6633 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6634 		data &= ~0x100;
6635 		if (orig != data)
6636 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6637 
6638 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6639 		data &= ~0x100;
6640 		if (orig != data)
6641 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6642 	}
6643 }
6644 
6645 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6646 				bool enable)
6647 {
6648 	u32 orig, data;
6649 
6650 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6651 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6652 		data = 0xfff;
6653 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6654 
6655 		orig = data = RREG32(UVD_CGC_CTRL);
6656 		data |= DCM;
6657 		if (orig != data)
6658 			WREG32(UVD_CGC_CTRL, data);
6659 	} else {
6660 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6661 		data &= ~0xfff;
6662 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6663 
6664 		orig = data = RREG32(UVD_CGC_CTRL);
6665 		data &= ~DCM;
6666 		if (orig != data)
6667 			WREG32(UVD_CGC_CTRL, data);
6668 	}
6669 }
6670 
6671 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6672 			       bool enable)
6673 {
6674 	u32 orig, data;
6675 
6676 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6677 
6678 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6679 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6680 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6681 	else
6682 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6683 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6684 
6685 	if (orig != data)
6686 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6687 }
6688 
6689 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6690 				bool enable)
6691 {
6692 	u32 orig, data;
6693 
6694 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6695 
6696 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6697 		data &= ~CLOCK_GATING_DIS;
6698 	else
6699 		data |= CLOCK_GATING_DIS;
6700 
6701 	if (orig != data)
6702 		WREG32(HDP_HOST_PATH_CNTL, data);
6703 }
6704 
6705 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6706 			      bool enable)
6707 {
6708 	u32 orig, data;
6709 
6710 	orig = data = RREG32(HDP_MEM_POWER_LS);
6711 
6712 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6713 		data |= HDP_LS_ENABLE;
6714 	else
6715 		data &= ~HDP_LS_ENABLE;
6716 
6717 	if (orig != data)
6718 		WREG32(HDP_MEM_POWER_LS, data);
6719 }
6720 
6721 void cik_update_cg(struct radeon_device *rdev,
6722 		   u32 block, bool enable)
6723 {
6724 
6725 	if (block & RADEON_CG_BLOCK_GFX) {
6726 		cik_enable_gui_idle_interrupt(rdev, false);
6727 		/* order matters! */
6728 		if (enable) {
6729 			cik_enable_mgcg(rdev, true);
6730 			cik_enable_cgcg(rdev, true);
6731 		} else {
6732 			cik_enable_cgcg(rdev, false);
6733 			cik_enable_mgcg(rdev, false);
6734 		}
6735 		cik_enable_gui_idle_interrupt(rdev, true);
6736 	}
6737 
6738 	if (block & RADEON_CG_BLOCK_MC) {
6739 		if (!(rdev->flags & RADEON_IS_IGP)) {
6740 			cik_enable_mc_mgcg(rdev, enable);
6741 			cik_enable_mc_ls(rdev, enable);
6742 		}
6743 	}
6744 
6745 	if (block & RADEON_CG_BLOCK_SDMA) {
6746 		cik_enable_sdma_mgcg(rdev, enable);
6747 		cik_enable_sdma_mgls(rdev, enable);
6748 	}
6749 
6750 	if (block & RADEON_CG_BLOCK_BIF) {
6751 		cik_enable_bif_mgls(rdev, enable);
6752 	}
6753 
6754 	if (block & RADEON_CG_BLOCK_UVD) {
6755 		if (rdev->has_uvd)
6756 			cik_enable_uvd_mgcg(rdev, enable);
6757 	}
6758 
6759 	if (block & RADEON_CG_BLOCK_HDP) {
6760 		cik_enable_hdp_mgcg(rdev, enable);
6761 		cik_enable_hdp_ls(rdev, enable);
6762 	}
6763 
6764 	if (block & RADEON_CG_BLOCK_VCE) {
6765 		vce_v2_0_enable_mgcg(rdev, enable);
6766 	}
6767 }
6768 
6769 static void cik_init_cg(struct radeon_device *rdev)
6770 {
6771 
6772 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6773 
6774 	if (rdev->has_uvd)
6775 		si_init_uvd_internal_cg(rdev);
6776 
6777 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6778 			     RADEON_CG_BLOCK_SDMA |
6779 			     RADEON_CG_BLOCK_BIF |
6780 			     RADEON_CG_BLOCK_UVD |
6781 			     RADEON_CG_BLOCK_HDP), true);
6782 }
6783 
6784 static void cik_fini_cg(struct radeon_device *rdev)
6785 {
6786 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6787 			     RADEON_CG_BLOCK_SDMA |
6788 			     RADEON_CG_BLOCK_BIF |
6789 			     RADEON_CG_BLOCK_UVD |
6790 			     RADEON_CG_BLOCK_HDP), false);
6791 
6792 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6793 }
6794 
6795 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6796 					  bool enable)
6797 {
6798 	u32 data, orig;
6799 
6800 	orig = data = RREG32(RLC_PG_CNTL);
6801 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6802 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6803 	else
6804 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6805 	if (orig != data)
6806 		WREG32(RLC_PG_CNTL, data);
6807 }
6808 
6809 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6810 					  bool enable)
6811 {
6812 	u32 data, orig;
6813 
6814 	orig = data = RREG32(RLC_PG_CNTL);
6815 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6816 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6817 	else
6818 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6819 	if (orig != data)
6820 		WREG32(RLC_PG_CNTL, data);
6821 }
6822 
6823 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6824 {
6825 	u32 data, orig;
6826 
6827 	orig = data = RREG32(RLC_PG_CNTL);
6828 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6829 		data &= ~DISABLE_CP_PG;
6830 	else
6831 		data |= DISABLE_CP_PG;
6832 	if (orig != data)
6833 		WREG32(RLC_PG_CNTL, data);
6834 }
6835 
6836 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6837 {
6838 	u32 data, orig;
6839 
6840 	orig = data = RREG32(RLC_PG_CNTL);
6841 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6842 		data &= ~DISABLE_GDS_PG;
6843 	else
6844 		data |= DISABLE_GDS_PG;
6845 	if (orig != data)
6846 		WREG32(RLC_PG_CNTL, data);
6847 }
6848 
6849 #define CP_ME_TABLE_SIZE    96
6850 #define CP_ME_TABLE_OFFSET  2048
6851 #define CP_MEC_TABLE_OFFSET 4096
6852 
6853 void cik_init_cp_pg_table(struct radeon_device *rdev)
6854 {
6855 	volatile u32 *dst_ptr;
6856 	int me, i, max_me = 4;
6857 	u32 bo_offset = 0;
6858 	u32 table_offset, table_size;
6859 
6860 	if (rdev->family == CHIP_KAVERI)
6861 		max_me = 5;
6862 
6863 	if (rdev->rlc.cp_table_ptr == NULL)
6864 		return;
6865 
6866 	/* write the cp table buffer */
6867 	dst_ptr = rdev->rlc.cp_table_ptr;
6868 	for (me = 0; me < max_me; me++) {
6869 		if (rdev->new_fw) {
6870 			const __le32 *fw_data;
6871 			const struct gfx_firmware_header_v1_0 *hdr;
6872 
6873 			if (me == 0) {
6874 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6875 				fw_data = (const __le32 *)
6876 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6877 				table_offset = le32_to_cpu(hdr->jt_offset);
6878 				table_size = le32_to_cpu(hdr->jt_size);
6879 			} else if (me == 1) {
6880 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6881 				fw_data = (const __le32 *)
6882 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6883 				table_offset = le32_to_cpu(hdr->jt_offset);
6884 				table_size = le32_to_cpu(hdr->jt_size);
6885 			} else if (me == 2) {
6886 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6887 				fw_data = (const __le32 *)
6888 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6889 				table_offset = le32_to_cpu(hdr->jt_offset);
6890 				table_size = le32_to_cpu(hdr->jt_size);
6891 			} else if (me == 3) {
6892 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6893 				fw_data = (const __le32 *)
6894 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6895 				table_offset = le32_to_cpu(hdr->jt_offset);
6896 				table_size = le32_to_cpu(hdr->jt_size);
6897 			} else {
6898 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6899 				fw_data = (const __le32 *)
6900 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6901 				table_offset = le32_to_cpu(hdr->jt_offset);
6902 				table_size = le32_to_cpu(hdr->jt_size);
6903 			}
6904 
6905 			for (i = 0; i < table_size; i ++) {
6906 				dst_ptr[bo_offset + i] =
6907 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6908 			}
6909 			bo_offset += table_size;
6910 		} else {
6911 			const __be32 *fw_data;
6912 			table_size = CP_ME_TABLE_SIZE;
6913 
6914 			if (me == 0) {
6915 				fw_data = (const __be32 *)rdev->ce_fw->data;
6916 				table_offset = CP_ME_TABLE_OFFSET;
6917 			} else if (me == 1) {
6918 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6919 				table_offset = CP_ME_TABLE_OFFSET;
6920 			} else if (me == 2) {
6921 				fw_data = (const __be32 *)rdev->me_fw->data;
6922 				table_offset = CP_ME_TABLE_OFFSET;
6923 			} else {
6924 				fw_data = (const __be32 *)rdev->mec_fw->data;
6925 				table_offset = CP_MEC_TABLE_OFFSET;
6926 			}
6927 
6928 			for (i = 0; i < table_size; i ++) {
6929 				dst_ptr[bo_offset + i] =
6930 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6931 			}
6932 			bo_offset += table_size;
6933 		}
6934 	}
6935 }
6936 
6937 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6938 				bool enable)
6939 {
6940 	u32 data, orig;
6941 
6942 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6943 		orig = data = RREG32(RLC_PG_CNTL);
6944 		data |= GFX_PG_ENABLE;
6945 		if (orig != data)
6946 			WREG32(RLC_PG_CNTL, data);
6947 
6948 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6949 		data |= AUTO_PG_EN;
6950 		if (orig != data)
6951 			WREG32(RLC_AUTO_PG_CTRL, data);
6952 	} else {
6953 		orig = data = RREG32(RLC_PG_CNTL);
6954 		data &= ~GFX_PG_ENABLE;
6955 		if (orig != data)
6956 			WREG32(RLC_PG_CNTL, data);
6957 
6958 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6959 		data &= ~AUTO_PG_EN;
6960 		if (orig != data)
6961 			WREG32(RLC_AUTO_PG_CTRL, data);
6962 
6963 		data = RREG32(DB_RENDER_CONTROL);
6964 	}
6965 }
6966 
6967 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6968 {
6969 	u32 mask = 0, tmp, tmp1;
6970 	int i;
6971 
6972 	mutex_lock(&rdev->grbm_idx_mutex);
6973 	cik_select_se_sh(rdev, se, sh);
6974 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6975 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6976 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6977 	mutex_unlock(&rdev->grbm_idx_mutex);
6978 
6979 	tmp &= 0xffff0000;
6980 
6981 	tmp |= tmp1;
6982 	tmp >>= 16;
6983 
6984 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6985 		mask <<= 1;
6986 		mask |= 1;
6987 	}
6988 
6989 	return (~tmp) & mask;
6990 }
6991 
6992 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6993 {
6994 	u32 i, j, k, active_cu_number = 0;
6995 	u32 mask, counter, cu_bitmap;
6996 	u32 tmp = 0;
6997 
6998 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6999 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
7000 			mask = 1;
7001 			cu_bitmap = 0;
7002 			counter = 0;
7003 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
7004 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
7005 					if (counter < 2)
7006 						cu_bitmap |= mask;
7007 					counter ++;
7008 				}
7009 				mask <<= 1;
7010 			}
7011 
7012 			active_cu_number += counter;
7013 			tmp |= (cu_bitmap << (i * 16 + j * 8));
7014 		}
7015 	}
7016 
7017 	WREG32(RLC_PG_AO_CU_MASK, tmp);
7018 
7019 	tmp = RREG32(RLC_MAX_PG_CU);
7020 	tmp &= ~MAX_PU_CU_MASK;
7021 	tmp |= MAX_PU_CU(active_cu_number);
7022 	WREG32(RLC_MAX_PG_CU, tmp);
7023 }
7024 
7025 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7026 				       bool enable)
7027 {
7028 	u32 data, orig;
7029 
7030 	orig = data = RREG32(RLC_PG_CNTL);
7031 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7032 		data |= STATIC_PER_CU_PG_ENABLE;
7033 	else
7034 		data &= ~STATIC_PER_CU_PG_ENABLE;
7035 	if (orig != data)
7036 		WREG32(RLC_PG_CNTL, data);
7037 }
7038 
7039 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7040 					bool enable)
7041 {
7042 	u32 data, orig;
7043 
7044 	orig = data = RREG32(RLC_PG_CNTL);
7045 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7046 		data |= DYN_PER_CU_PG_ENABLE;
7047 	else
7048 		data &= ~DYN_PER_CU_PG_ENABLE;
7049 	if (orig != data)
7050 		WREG32(RLC_PG_CNTL, data);
7051 }
7052 
7053 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7054 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7055 
7056 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7057 {
7058 	u32 data, orig;
7059 	u32 i;
7060 
7061 	if (rdev->rlc.cs_data) {
7062 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7063 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7064 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7065 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7066 	} else {
7067 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7068 		for (i = 0; i < 3; i++)
7069 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7070 	}
7071 	if (rdev->rlc.reg_list) {
7072 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7073 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7074 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7075 	}
7076 
7077 	orig = data = RREG32(RLC_PG_CNTL);
7078 	data |= GFX_PG_SRC;
7079 	if (orig != data)
7080 		WREG32(RLC_PG_CNTL, data);
7081 
7082 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7083 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7084 
7085 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7086 	data &= ~IDLE_POLL_COUNT_MASK;
7087 	data |= IDLE_POLL_COUNT(0x60);
7088 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7089 
7090 	data = 0x10101010;
7091 	WREG32(RLC_PG_DELAY, data);
7092 
7093 	data = RREG32(RLC_PG_DELAY_2);
7094 	data &= ~0xff;
7095 	data |= 0x3;
7096 	WREG32(RLC_PG_DELAY_2, data);
7097 
7098 	data = RREG32(RLC_AUTO_PG_CTRL);
7099 	data &= ~GRBM_REG_SGIT_MASK;
7100 	data |= GRBM_REG_SGIT(0x700);
7101 	WREG32(RLC_AUTO_PG_CTRL, data);
7102 
7103 }
7104 
7105 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7106 {
7107 	cik_enable_gfx_cgpg(rdev, enable);
7108 	cik_enable_gfx_static_mgpg(rdev, enable);
7109 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7110 }
7111 
7112 u32 cik_get_csb_size(struct radeon_device *rdev)
7113 {
7114 	u32 count = 0;
7115 	const struct cs_section_def *sect = NULL;
7116 	const struct cs_extent_def *ext = NULL;
7117 
7118 	if (rdev->rlc.cs_data == NULL)
7119 		return 0;
7120 
7121 	/* begin clear state */
7122 	count += 2;
7123 	/* context control state */
7124 	count += 3;
7125 
7126 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7127 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7128 			if (sect->id == SECT_CONTEXT)
7129 				count += 2 + ext->reg_count;
7130 			else
7131 				return 0;
7132 		}
7133 	}
7134 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7135 	count += 4;
7136 	/* end clear state */
7137 	count += 2;
7138 	/* clear state */
7139 	count += 2;
7140 
7141 	return count;
7142 }
7143 
7144 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7145 {
7146 	u32 count = 0, i;
7147 	const struct cs_section_def *sect = NULL;
7148 	const struct cs_extent_def *ext = NULL;
7149 
7150 	if (rdev->rlc.cs_data == NULL)
7151 		return;
7152 	if (buffer == NULL)
7153 		return;
7154 
7155 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7156 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7157 
7158 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7159 	buffer[count++] = cpu_to_le32(0x80000000);
7160 	buffer[count++] = cpu_to_le32(0x80000000);
7161 
7162 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7163 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7164 			if (sect->id == SECT_CONTEXT) {
7165 				buffer[count++] =
7166 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7167 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7168 				for (i = 0; i < ext->reg_count; i++)
7169 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7170 			} else {
7171 				return;
7172 			}
7173 		}
7174 	}
7175 
7176 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7177 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7178 	switch (rdev->family) {
7179 	case CHIP_BONAIRE:
7180 		buffer[count++] = cpu_to_le32(0x16000012);
7181 		buffer[count++] = cpu_to_le32(0x00000000);
7182 		break;
7183 	case CHIP_KAVERI:
7184 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7185 		buffer[count++] = cpu_to_le32(0x00000000);
7186 		break;
7187 	case CHIP_KABINI:
7188 	case CHIP_MULLINS:
7189 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7190 		buffer[count++] = cpu_to_le32(0x00000000);
7191 		break;
7192 	case CHIP_HAWAII:
7193 		buffer[count++] = cpu_to_le32(0x3a00161a);
7194 		buffer[count++] = cpu_to_le32(0x0000002e);
7195 		break;
7196 	default:
7197 		buffer[count++] = cpu_to_le32(0x00000000);
7198 		buffer[count++] = cpu_to_le32(0x00000000);
7199 		break;
7200 	}
7201 
7202 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7203 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7204 
7205 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7206 	buffer[count++] = cpu_to_le32(0);
7207 }
7208 
7209 static void cik_init_pg(struct radeon_device *rdev)
7210 {
7211 	if (rdev->pg_flags) {
7212 		cik_enable_sck_slowdown_on_pu(rdev, true);
7213 		cik_enable_sck_slowdown_on_pd(rdev, true);
7214 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7215 			cik_init_gfx_cgpg(rdev);
7216 			cik_enable_cp_pg(rdev, true);
7217 			cik_enable_gds_pg(rdev, true);
7218 		}
7219 		cik_init_ao_cu_mask(rdev);
7220 		cik_update_gfx_pg(rdev, true);
7221 	}
7222 }
7223 
7224 static void cik_fini_pg(struct radeon_device *rdev)
7225 {
7226 	if (rdev->pg_flags) {
7227 		cik_update_gfx_pg(rdev, false);
7228 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7229 			cik_enable_cp_pg(rdev, false);
7230 			cik_enable_gds_pg(rdev, false);
7231 		}
7232 	}
7233 }
7234 
7235 /*
7236  * Interrupts
7237  * Starting with r6xx, interrupts are handled via a ring buffer.
7238  * Ring buffers are areas of GPU accessible memory that the GPU
7239  * writes interrupt vectors into and the host reads vectors out of.
7240  * There is a rptr (read pointer) that determines where the
7241  * host is currently reading, and a wptr (write pointer)
7242  * which determines where the GPU has written.  When the
7243  * pointers are equal, the ring is idle.  When the GPU
7244  * writes vectors to the ring buffer, it increments the
7245  * wptr.  When there is an interrupt, the host then starts
7246  * fetching commands and processing them until the pointers are
7247  * equal again at which point it updates the rptr.
7248  */
7249 
7250 /**
7251  * cik_enable_interrupts - Enable the interrupt ring buffer
7252  *
7253  * @rdev: radeon_device pointer
7254  *
7255  * Enable the interrupt ring buffer (CIK).
7256  */
7257 static void cik_enable_interrupts(struct radeon_device *rdev)
7258 {
7259 	u32 ih_cntl = RREG32(IH_CNTL);
7260 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7261 
7262 	ih_cntl |= ENABLE_INTR;
7263 	ih_rb_cntl |= IH_RB_ENABLE;
7264 	WREG32(IH_CNTL, ih_cntl);
7265 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7266 	rdev->ih.enabled = true;
7267 }
7268 
7269 /**
7270  * cik_disable_interrupts - Disable the interrupt ring buffer
7271  *
7272  * @rdev: radeon_device pointer
7273  *
7274  * Disable the interrupt ring buffer (CIK).
7275  */
7276 static void cik_disable_interrupts(struct radeon_device *rdev)
7277 {
7278 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7279 	u32 ih_cntl = RREG32(IH_CNTL);
7280 
7281 	ih_rb_cntl &= ~IH_RB_ENABLE;
7282 	ih_cntl &= ~ENABLE_INTR;
7283 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7284 	WREG32(IH_CNTL, ih_cntl);
7285 	/* set rptr, wptr to 0 */
7286 	WREG32(IH_RB_RPTR, 0);
7287 	WREG32(IH_RB_WPTR, 0);
7288 	rdev->ih.enabled = false;
7289 	rdev->ih.rptr = 0;
7290 }
7291 
7292 /**
7293  * cik_disable_interrupt_state - Disable all interrupt sources
7294  *
7295  * @rdev: radeon_device pointer
7296  *
7297  * Clear all interrupt enable bits used by the driver (CIK).
7298  */
7299 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7300 {
7301 	u32 tmp;
7302 
7303 	/* gfx ring */
7304 	tmp = RREG32(CP_INT_CNTL_RING0) &
7305 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7306 	WREG32(CP_INT_CNTL_RING0, tmp);
7307 	/* sdma */
7308 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7309 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7310 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7311 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7312 	/* compute queues */
7313 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7314 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7315 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7316 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7317 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7318 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7319 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7320 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7321 	/* grbm */
7322 	WREG32(GRBM_INT_CNTL, 0);
7323 	/* SRBM */
7324 	WREG32(SRBM_INT_CNTL, 0);
7325 	/* vline/vblank, etc. */
7326 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7327 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7328 	if (rdev->num_crtc >= 4) {
7329 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7330 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7331 	}
7332 	if (rdev->num_crtc >= 6) {
7333 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7334 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7335 	}
7336 	/* pflip */
7337 	if (rdev->num_crtc >= 2) {
7338 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7339 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7340 	}
7341 	if (rdev->num_crtc >= 4) {
7342 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7343 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7344 	}
7345 	if (rdev->num_crtc >= 6) {
7346 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7347 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7348 	}
7349 
7350 	/* dac hotplug */
7351 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7352 
7353 	/* digital hotplug */
7354 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7355 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7356 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7357 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7358 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7359 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7360 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7361 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7362 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7363 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7364 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7365 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7366 
7367 }
7368 
7369 /**
7370  * cik_irq_init - init and enable the interrupt ring
7371  *
7372  * @rdev: radeon_device pointer
7373  *
7374  * Allocate a ring buffer for the interrupt controller,
7375  * enable the RLC, disable interrupts, enable the IH
7376  * ring buffer and enable it (CIK).
7377  * Called at device load and reume.
7378  * Returns 0 for success, errors for failure.
7379  */
7380 static int cik_irq_init(struct radeon_device *rdev)
7381 {
7382 	int ret = 0;
7383 	int rb_bufsz;
7384 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7385 
7386 	/* allocate ring */
7387 	ret = r600_ih_ring_alloc(rdev);
7388 	if (ret)
7389 		return ret;
7390 
7391 	/* disable irqs */
7392 	cik_disable_interrupts(rdev);
7393 
7394 	/* init rlc */
7395 	ret = cik_rlc_resume(rdev);
7396 	if (ret) {
7397 		r600_ih_ring_fini(rdev);
7398 		return ret;
7399 	}
7400 
7401 	/* setup interrupt control */
7402 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7403 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7404 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7405 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7406 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7407 	 */
7408 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7409 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7410 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7411 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7412 
7413 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7414 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7415 
7416 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7417 		      IH_WPTR_OVERFLOW_CLEAR |
7418 		      (rb_bufsz << 1));
7419 
7420 	if (rdev->wb.enabled)
7421 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7422 
7423 	/* set the writeback address whether it's enabled or not */
7424 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7425 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7426 
7427 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7428 
7429 	/* set rptr, wptr to 0 */
7430 	WREG32(IH_RB_RPTR, 0);
7431 	WREG32(IH_RB_WPTR, 0);
7432 
7433 	/* Default settings for IH_CNTL (disabled at first) */
7434 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7435 	/* RPTR_REARM only works if msi's are enabled */
7436 	if (rdev->msi_enabled)
7437 		ih_cntl |= RPTR_REARM;
7438 	WREG32(IH_CNTL, ih_cntl);
7439 
7440 	/* force the active interrupt state to all disabled */
7441 	cik_disable_interrupt_state(rdev);
7442 
7443 	pci_set_master(rdev->pdev);
7444 
7445 	/* enable irqs */
7446 	cik_enable_interrupts(rdev);
7447 
7448 	return ret;
7449 }
7450 
7451 /**
7452  * cik_irq_set - enable/disable interrupt sources
7453  *
7454  * @rdev: radeon_device pointer
7455  *
7456  * Enable interrupt sources on the GPU (vblanks, hpd,
7457  * etc.) (CIK).
7458  * Returns 0 for success, errors for failure.
7459  */
7460 int cik_irq_set(struct radeon_device *rdev)
7461 {
7462 	u32 cp_int_cntl;
7463 	u32 cp_m1p0;
7464 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7465 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7466 	u32 grbm_int_cntl = 0;
7467 	u32 dma_cntl, dma_cntl1;
7468 
7469 	if (!rdev->irq.installed) {
7470 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7471 		return -EINVAL;
7472 	}
7473 	/* don't enable anything if the ih is disabled */
7474 	if (!rdev->ih.enabled) {
7475 		cik_disable_interrupts(rdev);
7476 		/* force the active interrupt state to all disabled */
7477 		cik_disable_interrupt_state(rdev);
7478 		return 0;
7479 	}
7480 
7481 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7482 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7483 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7484 
7485 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7486 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7487 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7488 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7489 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7490 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7491 
7492 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7493 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7494 
7495 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7496 
7497 	/* enable CP interrupts on all rings */
7498 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7499 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7500 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7501 	}
7502 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7503 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7504 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7505 		if (ring->me == 1) {
7506 			switch (ring->pipe) {
7507 			case 0:
7508 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7509 				break;
7510 			default:
7511 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7512 				break;
7513 			}
7514 		} else {
7515 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7516 		}
7517 	}
7518 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7519 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7520 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7521 		if (ring->me == 1) {
7522 			switch (ring->pipe) {
7523 			case 0:
7524 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7525 				break;
7526 			default:
7527 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7528 				break;
7529 			}
7530 		} else {
7531 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7532 		}
7533 	}
7534 
7535 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7536 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7537 		dma_cntl |= TRAP_ENABLE;
7538 	}
7539 
7540 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7541 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7542 		dma_cntl1 |= TRAP_ENABLE;
7543 	}
7544 
7545 	if (rdev->irq.crtc_vblank_int[0] ||
7546 	    atomic_read(&rdev->irq.pflip[0])) {
7547 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7548 		crtc1 |= VBLANK_INTERRUPT_MASK;
7549 	}
7550 	if (rdev->irq.crtc_vblank_int[1] ||
7551 	    atomic_read(&rdev->irq.pflip[1])) {
7552 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7553 		crtc2 |= VBLANK_INTERRUPT_MASK;
7554 	}
7555 	if (rdev->irq.crtc_vblank_int[2] ||
7556 	    atomic_read(&rdev->irq.pflip[2])) {
7557 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7558 		crtc3 |= VBLANK_INTERRUPT_MASK;
7559 	}
7560 	if (rdev->irq.crtc_vblank_int[3] ||
7561 	    atomic_read(&rdev->irq.pflip[3])) {
7562 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7563 		crtc4 |= VBLANK_INTERRUPT_MASK;
7564 	}
7565 	if (rdev->irq.crtc_vblank_int[4] ||
7566 	    atomic_read(&rdev->irq.pflip[4])) {
7567 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7568 		crtc5 |= VBLANK_INTERRUPT_MASK;
7569 	}
7570 	if (rdev->irq.crtc_vblank_int[5] ||
7571 	    atomic_read(&rdev->irq.pflip[5])) {
7572 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7573 		crtc6 |= VBLANK_INTERRUPT_MASK;
7574 	}
7575 	if (rdev->irq.hpd[0]) {
7576 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7577 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7578 	}
7579 	if (rdev->irq.hpd[1]) {
7580 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7581 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7582 	}
7583 	if (rdev->irq.hpd[2]) {
7584 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7585 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7586 	}
7587 	if (rdev->irq.hpd[3]) {
7588 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7589 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7590 	}
7591 	if (rdev->irq.hpd[4]) {
7592 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7593 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7594 	}
7595 	if (rdev->irq.hpd[5]) {
7596 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7597 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7598 	}
7599 
7600 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7601 
7602 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7603 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7604 
7605 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7606 
7607 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7608 
7609 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7610 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7611 	if (rdev->num_crtc >= 4) {
7612 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7613 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7614 	}
7615 	if (rdev->num_crtc >= 6) {
7616 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7617 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7618 	}
7619 
7620 	if (rdev->num_crtc >= 2) {
7621 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7622 		       GRPH_PFLIP_INT_MASK);
7623 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7624 		       GRPH_PFLIP_INT_MASK);
7625 	}
7626 	if (rdev->num_crtc >= 4) {
7627 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7628 		       GRPH_PFLIP_INT_MASK);
7629 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7630 		       GRPH_PFLIP_INT_MASK);
7631 	}
7632 	if (rdev->num_crtc >= 6) {
7633 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7634 		       GRPH_PFLIP_INT_MASK);
7635 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7636 		       GRPH_PFLIP_INT_MASK);
7637 	}
7638 
7639 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7640 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7641 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7642 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7643 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7644 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7645 
7646 	/* posting read */
7647 	RREG32(SRBM_STATUS);
7648 
7649 	return 0;
7650 }
7651 
7652 /**
7653  * cik_irq_ack - ack interrupt sources
7654  *
7655  * @rdev: radeon_device pointer
7656  *
7657  * Ack interrupt sources on the GPU (vblanks, hpd,
7658  * etc.) (CIK).  Certain interrupts sources are sw
7659  * generated and do not require an explicit ack.
7660  */
7661 static inline void cik_irq_ack(struct radeon_device *rdev)
7662 {
7663 	u32 tmp;
7664 
7665 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7666 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7667 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7668 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7669 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7670 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7671 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7672 
7673 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7674 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7675 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7676 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7677 	if (rdev->num_crtc >= 4) {
7678 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7679 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7680 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7681 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7682 	}
7683 	if (rdev->num_crtc >= 6) {
7684 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7685 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7686 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7687 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7688 	}
7689 
7690 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7691 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7692 		       GRPH_PFLIP_INT_CLEAR);
7693 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7694 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7695 		       GRPH_PFLIP_INT_CLEAR);
7696 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7697 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7698 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7699 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7700 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7701 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7702 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7703 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7704 
7705 	if (rdev->num_crtc >= 4) {
7706 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7707 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7708 			       GRPH_PFLIP_INT_CLEAR);
7709 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7710 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7711 			       GRPH_PFLIP_INT_CLEAR);
7712 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7713 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7714 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7715 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7716 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7717 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7718 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7719 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7720 	}
7721 
7722 	if (rdev->num_crtc >= 6) {
7723 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7724 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7725 			       GRPH_PFLIP_INT_CLEAR);
7726 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7727 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7728 			       GRPH_PFLIP_INT_CLEAR);
7729 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7730 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7731 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7732 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7733 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7734 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7735 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7736 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7737 	}
7738 
7739 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7740 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7741 		tmp |= DC_HPDx_INT_ACK;
7742 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7743 	}
7744 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7745 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7746 		tmp |= DC_HPDx_INT_ACK;
7747 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7748 	}
7749 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7750 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7751 		tmp |= DC_HPDx_INT_ACK;
7752 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7753 	}
7754 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7755 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7756 		tmp |= DC_HPDx_INT_ACK;
7757 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7758 	}
7759 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7760 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7761 		tmp |= DC_HPDx_INT_ACK;
7762 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7763 	}
7764 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7765 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7766 		tmp |= DC_HPDx_INT_ACK;
7767 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7768 	}
7769 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7770 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7771 		tmp |= DC_HPDx_RX_INT_ACK;
7772 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7773 	}
7774 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7775 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7776 		tmp |= DC_HPDx_RX_INT_ACK;
7777 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7778 	}
7779 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7780 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7781 		tmp |= DC_HPDx_RX_INT_ACK;
7782 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7783 	}
7784 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7785 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7786 		tmp |= DC_HPDx_RX_INT_ACK;
7787 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7788 	}
7789 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7790 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7791 		tmp |= DC_HPDx_RX_INT_ACK;
7792 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7793 	}
7794 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7795 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7796 		tmp |= DC_HPDx_RX_INT_ACK;
7797 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7798 	}
7799 }
7800 
7801 /**
7802  * cik_irq_disable - disable interrupts
7803  *
7804  * @rdev: radeon_device pointer
7805  *
7806  * Disable interrupts on the hw (CIK).
7807  */
7808 static void cik_irq_disable(struct radeon_device *rdev)
7809 {
7810 	cik_disable_interrupts(rdev);
7811 	/* Wait and acknowledge irq */
7812 	mdelay(1);
7813 	cik_irq_ack(rdev);
7814 	cik_disable_interrupt_state(rdev);
7815 }
7816 
7817 /**
7818  * cik_irq_disable - disable interrupts for suspend
7819  *
7820  * @rdev: radeon_device pointer
7821  *
7822  * Disable interrupts and stop the RLC (CIK).
7823  * Used for suspend.
7824  */
7825 static void cik_irq_suspend(struct radeon_device *rdev)
7826 {
7827 	cik_irq_disable(rdev);
7828 	cik_rlc_stop(rdev);
7829 }
7830 
7831 /**
7832  * cik_irq_fini - tear down interrupt support
7833  *
7834  * @rdev: radeon_device pointer
7835  *
7836  * Disable interrupts on the hw and free the IH ring
7837  * buffer (CIK).
7838  * Used for driver unload.
7839  */
7840 static void cik_irq_fini(struct radeon_device *rdev)
7841 {
7842 	cik_irq_suspend(rdev);
7843 	r600_ih_ring_fini(rdev);
7844 }
7845 
7846 /**
7847  * cik_get_ih_wptr - get the IH ring buffer wptr
7848  *
7849  * @rdev: radeon_device pointer
7850  *
7851  * Get the IH ring buffer wptr from either the register
7852  * or the writeback memory buffer (CIK).  Also check for
7853  * ring buffer overflow and deal with it.
7854  * Used by cik_irq_process().
7855  * Returns the value of the wptr.
7856  */
7857 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7858 {
7859 	u32 wptr, tmp;
7860 
7861 	if (rdev->wb.enabled)
7862 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7863 	else
7864 		wptr = RREG32(IH_RB_WPTR);
7865 
7866 	if (wptr & RB_OVERFLOW) {
7867 		wptr &= ~RB_OVERFLOW;
7868 		/* When a ring buffer overflow happen start parsing interrupt
7869 		 * from the last not overwritten vector (wptr + 16). Hopefully
7870 		 * this should allow us to catchup.
7871 		 */
7872 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7873 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7874 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7875 		tmp = RREG32(IH_RB_CNTL);
7876 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7877 		WREG32(IH_RB_CNTL, tmp);
7878 	}
7879 	return (wptr & rdev->ih.ptr_mask);
7880 }
7881 
7882 /*        CIK IV Ring
7883  * Each IV ring entry is 128 bits:
7884  * [7:0]    - interrupt source id
7885  * [31:8]   - reserved
7886  * [59:32]  - interrupt source data
7887  * [63:60]  - reserved
7888  * [71:64]  - RINGID
7889  *            CP:
7890  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7891  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7892  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7893  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7894  *            PIPE_ID - ME0 0=3D
7895  *                    - ME1&2 compute dispatcher (4 pipes each)
7896  *            SDMA:
7897  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7898  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7899  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7900  * [79:72]  - VMID
7901  * [95:80]  - PASID
7902  * [127:96] - reserved
7903  */
7904 /**
7905  * cik_irq_process - interrupt handler
7906  *
7907  * @rdev: radeon_device pointer
7908  *
7909  * Interrupt hander (CIK).  Walk the IH ring,
7910  * ack interrupts and schedule work to handle
7911  * interrupt events.
7912  * Returns irq process return code.
7913  */
7914 int cik_irq_process(struct radeon_device *rdev)
7915 {
7916 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7917 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7918 	u32 wptr;
7919 	u32 rptr;
7920 	u32 src_id, src_data, ring_id;
7921 	u8 me_id, pipe_id, queue_id;
7922 	u32 ring_index;
7923 	bool queue_hotplug = false;
7924 	bool queue_dp = false;
7925 	bool queue_reset = false;
7926 	u32 addr, status, mc_client;
7927 	bool queue_thermal = false;
7928 
7929 	if (!rdev->ih.enabled || rdev->shutdown)
7930 		return IRQ_NONE;
7931 
7932 	wptr = cik_get_ih_wptr(rdev);
7933 
7934 restart_ih:
7935 	/* is somebody else already processing irqs? */
7936 	if (atomic_xchg(&rdev->ih.lock, 1))
7937 		return IRQ_NONE;
7938 
7939 	rptr = rdev->ih.rptr;
7940 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7941 
7942 	/* Order reading of wptr vs. reading of IH ring data */
7943 	rmb();
7944 
7945 	/* display interrupts */
7946 	cik_irq_ack(rdev);
7947 
7948 	while (rptr != wptr) {
7949 		/* wptr/rptr are in bytes! */
7950 		ring_index = rptr / 4;
7951 
7952 		radeon_kfd_interrupt(rdev,
7953 				(const void *) &rdev->ih.ring[ring_index]);
7954 
7955 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7956 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7957 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7958 
7959 		switch (src_id) {
7960 		case 1: /* D1 vblank/vline */
7961 			switch (src_data) {
7962 			case 0: /* D1 vblank */
7963 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT))
7964 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7965 
7966 				if (rdev->irq.crtc_vblank_int[0]) {
7967 					drm_handle_vblank(rdev->ddev, 0);
7968 					rdev->pm.vblank_sync = true;
7969 					wake_up(&rdev->irq.vblank_queue);
7970 				}
7971 				if (atomic_read(&rdev->irq.pflip[0]))
7972 					radeon_crtc_handle_vblank(rdev, 0);
7973 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7974 				DRM_DEBUG("IH: D1 vblank\n");
7975 
7976 				break;
7977 			case 1: /* D1 vline */
7978 				if (!(rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT))
7979 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7980 
7981 				rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7982 				DRM_DEBUG("IH: D1 vline\n");
7983 
7984 				break;
7985 			default:
7986 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7987 				break;
7988 			}
7989 			break;
7990 		case 2: /* D2 vblank/vline */
7991 			switch (src_data) {
7992 			case 0: /* D2 vblank */
7993 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
7994 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
7995 
7996 				if (rdev->irq.crtc_vblank_int[1]) {
7997 					drm_handle_vblank(rdev->ddev, 1);
7998 					rdev->pm.vblank_sync = true;
7999 					wake_up(&rdev->irq.vblank_queue);
8000 				}
8001 				if (atomic_read(&rdev->irq.pflip[1]))
8002 					radeon_crtc_handle_vblank(rdev, 1);
8003 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8004 				DRM_DEBUG("IH: D2 vblank\n");
8005 
8006 				break;
8007 			case 1: /* D2 vline */
8008 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT))
8009 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8010 
8011 				rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8012 				DRM_DEBUG("IH: D2 vline\n");
8013 
8014 				break;
8015 			default:
8016 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8017 				break;
8018 			}
8019 			break;
8020 		case 3: /* D3 vblank/vline */
8021 			switch (src_data) {
8022 			case 0: /* D3 vblank */
8023 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
8024 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8025 
8026 				if (rdev->irq.crtc_vblank_int[2]) {
8027 					drm_handle_vblank(rdev->ddev, 2);
8028 					rdev->pm.vblank_sync = true;
8029 					wake_up(&rdev->irq.vblank_queue);
8030 				}
8031 				if (atomic_read(&rdev->irq.pflip[2]))
8032 					radeon_crtc_handle_vblank(rdev, 2);
8033 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8034 				DRM_DEBUG("IH: D3 vblank\n");
8035 
8036 				break;
8037 			case 1: /* D3 vline */
8038 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
8039 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8040 
8041 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8042 				DRM_DEBUG("IH: D3 vline\n");
8043 
8044 				break;
8045 			default:
8046 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8047 				break;
8048 			}
8049 			break;
8050 		case 4: /* D4 vblank/vline */
8051 			switch (src_data) {
8052 			case 0: /* D4 vblank */
8053 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
8054 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8055 
8056 				if (rdev->irq.crtc_vblank_int[3]) {
8057 					drm_handle_vblank(rdev->ddev, 3);
8058 					rdev->pm.vblank_sync = true;
8059 					wake_up(&rdev->irq.vblank_queue);
8060 				}
8061 				if (atomic_read(&rdev->irq.pflip[3]))
8062 					radeon_crtc_handle_vblank(rdev, 3);
8063 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8064 				DRM_DEBUG("IH: D4 vblank\n");
8065 
8066 				break;
8067 			case 1: /* D4 vline */
8068 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
8069 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8070 
8071 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8072 				DRM_DEBUG("IH: D4 vline\n");
8073 
8074 				break;
8075 			default:
8076 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8077 				break;
8078 			}
8079 			break;
8080 		case 5: /* D5 vblank/vline */
8081 			switch (src_data) {
8082 			case 0: /* D5 vblank */
8083 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
8084 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8085 
8086 				if (rdev->irq.crtc_vblank_int[4]) {
8087 					drm_handle_vblank(rdev->ddev, 4);
8088 					rdev->pm.vblank_sync = true;
8089 					wake_up(&rdev->irq.vblank_queue);
8090 				}
8091 				if (atomic_read(&rdev->irq.pflip[4]))
8092 					radeon_crtc_handle_vblank(rdev, 4);
8093 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8094 				DRM_DEBUG("IH: D5 vblank\n");
8095 
8096 				break;
8097 			case 1: /* D5 vline */
8098 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
8099 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8100 
8101 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8102 				DRM_DEBUG("IH: D5 vline\n");
8103 
8104 				break;
8105 			default:
8106 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8107 				break;
8108 			}
8109 			break;
8110 		case 6: /* D6 vblank/vline */
8111 			switch (src_data) {
8112 			case 0: /* D6 vblank */
8113 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
8114 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8115 
8116 				if (rdev->irq.crtc_vblank_int[5]) {
8117 					drm_handle_vblank(rdev->ddev, 5);
8118 					rdev->pm.vblank_sync = true;
8119 					wake_up(&rdev->irq.vblank_queue);
8120 				}
8121 				if (atomic_read(&rdev->irq.pflip[5]))
8122 					radeon_crtc_handle_vblank(rdev, 5);
8123 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8124 				DRM_DEBUG("IH: D6 vblank\n");
8125 
8126 				break;
8127 			case 1: /* D6 vline */
8128 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
8129 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8130 
8131 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8132 				DRM_DEBUG("IH: D6 vline\n");
8133 
8134 				break;
8135 			default:
8136 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8137 				break;
8138 			}
8139 			break;
8140 		case 8: /* D1 page flip */
8141 		case 10: /* D2 page flip */
8142 		case 12: /* D3 page flip */
8143 		case 14: /* D4 page flip */
8144 		case 16: /* D5 page flip */
8145 		case 18: /* D6 page flip */
8146 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8147 			if (radeon_use_pflipirq > 0)
8148 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8149 			break;
8150 		case 42: /* HPD hotplug */
8151 			switch (src_data) {
8152 			case 0:
8153 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT))
8154 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8155 
8156 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8157 				queue_hotplug = true;
8158 				DRM_DEBUG("IH: HPD1\n");
8159 
8160 				break;
8161 			case 1:
8162 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT))
8163 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8164 
8165 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8166 				queue_hotplug = true;
8167 				DRM_DEBUG("IH: HPD2\n");
8168 
8169 				break;
8170 			case 2:
8171 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT))
8172 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8173 
8174 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8175 				queue_hotplug = true;
8176 				DRM_DEBUG("IH: HPD3\n");
8177 
8178 				break;
8179 			case 3:
8180 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT))
8181 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8182 
8183 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8184 				queue_hotplug = true;
8185 				DRM_DEBUG("IH: HPD4\n");
8186 
8187 				break;
8188 			case 4:
8189 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT))
8190 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8191 
8192 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8193 				queue_hotplug = true;
8194 				DRM_DEBUG("IH: HPD5\n");
8195 
8196 				break;
8197 			case 5:
8198 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT))
8199 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8200 
8201 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8202 				queue_hotplug = true;
8203 				DRM_DEBUG("IH: HPD6\n");
8204 
8205 				break;
8206 			case 6:
8207 				if (!(rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT))
8208 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8209 
8210 				rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8211 				queue_dp = true;
8212 				DRM_DEBUG("IH: HPD_RX 1\n");
8213 
8214 				break;
8215 			case 7:
8216 				if (!(rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT))
8217 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8218 
8219 				rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8220 				queue_dp = true;
8221 				DRM_DEBUG("IH: HPD_RX 2\n");
8222 
8223 				break;
8224 			case 8:
8225 				if (!(rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
8226 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8227 
8228 				rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8229 				queue_dp = true;
8230 				DRM_DEBUG("IH: HPD_RX 3\n");
8231 
8232 				break;
8233 			case 9:
8234 				if (!(rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
8235 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8236 
8237 				rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8238 				queue_dp = true;
8239 				DRM_DEBUG("IH: HPD_RX 4\n");
8240 
8241 				break;
8242 			case 10:
8243 				if (!(rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
8244 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8245 
8246 				rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8247 				queue_dp = true;
8248 				DRM_DEBUG("IH: HPD_RX 5\n");
8249 
8250 				break;
8251 			case 11:
8252 				if (!(rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
8253 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
8254 
8255 				rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8256 				queue_dp = true;
8257 				DRM_DEBUG("IH: HPD_RX 6\n");
8258 
8259 				break;
8260 			default:
8261 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8262 				break;
8263 			}
8264 			break;
8265 		case 96:
8266 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8267 			WREG32(SRBM_INT_ACK, 0x1);
8268 			break;
8269 		case 124: /* UVD */
8270 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8271 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8272 			break;
8273 		case 146:
8274 		case 147:
8275 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8276 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8277 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8278 			/* reset addr and status */
8279 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8280 			if (addr == 0x0 && status == 0x0)
8281 				break;
8282 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8283 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8284 				addr);
8285 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8286 				status);
8287 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8288 			break;
8289 		case 167: /* VCE */
8290 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8291 			switch (src_data) {
8292 			case 0:
8293 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8294 				break;
8295 			case 1:
8296 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8297 				break;
8298 			default:
8299 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8300 				break;
8301 			}
8302 			break;
8303 		case 176: /* GFX RB CP_INT */
8304 		case 177: /* GFX IB CP_INT */
8305 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8306 			break;
8307 		case 181: /* CP EOP event */
8308 			DRM_DEBUG("IH: CP EOP\n");
8309 			/* XXX check the bitfield order! */
8310 			me_id = (ring_id & 0x60) >> 5;
8311 			pipe_id = (ring_id & 0x18) >> 3;
8312 			queue_id = (ring_id & 0x7) >> 0;
8313 			switch (me_id) {
8314 			case 0:
8315 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8316 				break;
8317 			case 1:
8318 			case 2:
8319 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8320 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8321 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8322 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8323 				break;
8324 			}
8325 			break;
8326 		case 184: /* CP Privileged reg access */
8327 			DRM_ERROR("Illegal register access in command stream\n");
8328 			/* XXX check the bitfield order! */
8329 			me_id = (ring_id & 0x60) >> 5;
8330 			pipe_id = (ring_id & 0x18) >> 3;
8331 			queue_id = (ring_id & 0x7) >> 0;
8332 			switch (me_id) {
8333 			case 0:
8334 				/* This results in a full GPU reset, but all we need to do is soft
8335 				 * reset the CP for gfx
8336 				 */
8337 				queue_reset = true;
8338 				break;
8339 			case 1:
8340 				/* XXX compute */
8341 				queue_reset = true;
8342 				break;
8343 			case 2:
8344 				/* XXX compute */
8345 				queue_reset = true;
8346 				break;
8347 			}
8348 			break;
8349 		case 185: /* CP Privileged inst */
8350 			DRM_ERROR("Illegal instruction in command stream\n");
8351 			/* XXX check the bitfield order! */
8352 			me_id = (ring_id & 0x60) >> 5;
8353 			pipe_id = (ring_id & 0x18) >> 3;
8354 			queue_id = (ring_id & 0x7) >> 0;
8355 			switch (me_id) {
8356 			case 0:
8357 				/* This results in a full GPU reset, but all we need to do is soft
8358 				 * reset the CP for gfx
8359 				 */
8360 				queue_reset = true;
8361 				break;
8362 			case 1:
8363 				/* XXX compute */
8364 				queue_reset = true;
8365 				break;
8366 			case 2:
8367 				/* XXX compute */
8368 				queue_reset = true;
8369 				break;
8370 			}
8371 			break;
8372 		case 224: /* SDMA trap event */
8373 			/* XXX check the bitfield order! */
8374 			me_id = (ring_id & 0x3) >> 0;
8375 			queue_id = (ring_id & 0xc) >> 2;
8376 			DRM_DEBUG("IH: SDMA trap\n");
8377 			switch (me_id) {
8378 			case 0:
8379 				switch (queue_id) {
8380 				case 0:
8381 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8382 					break;
8383 				case 1:
8384 					/* XXX compute */
8385 					break;
8386 				case 2:
8387 					/* XXX compute */
8388 					break;
8389 				}
8390 				break;
8391 			case 1:
8392 				switch (queue_id) {
8393 				case 0:
8394 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8395 					break;
8396 				case 1:
8397 					/* XXX compute */
8398 					break;
8399 				case 2:
8400 					/* XXX compute */
8401 					break;
8402 				}
8403 				break;
8404 			}
8405 			break;
8406 		case 230: /* thermal low to high */
8407 			DRM_DEBUG("IH: thermal low to high\n");
8408 			rdev->pm.dpm.thermal.high_to_low = false;
8409 			queue_thermal = true;
8410 			break;
8411 		case 231: /* thermal high to low */
8412 			DRM_DEBUG("IH: thermal high to low\n");
8413 			rdev->pm.dpm.thermal.high_to_low = true;
8414 			queue_thermal = true;
8415 			break;
8416 		case 233: /* GUI IDLE */
8417 			DRM_DEBUG("IH: GUI idle\n");
8418 			break;
8419 		case 241: /* SDMA Privileged inst */
8420 		case 247: /* SDMA Privileged inst */
8421 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8422 			/* XXX check the bitfield order! */
8423 			me_id = (ring_id & 0x3) >> 0;
8424 			queue_id = (ring_id & 0xc) >> 2;
8425 			switch (me_id) {
8426 			case 0:
8427 				switch (queue_id) {
8428 				case 0:
8429 					queue_reset = true;
8430 					break;
8431 				case 1:
8432 					/* XXX compute */
8433 					queue_reset = true;
8434 					break;
8435 				case 2:
8436 					/* XXX compute */
8437 					queue_reset = true;
8438 					break;
8439 				}
8440 				break;
8441 			case 1:
8442 				switch (queue_id) {
8443 				case 0:
8444 					queue_reset = true;
8445 					break;
8446 				case 1:
8447 					/* XXX compute */
8448 					queue_reset = true;
8449 					break;
8450 				case 2:
8451 					/* XXX compute */
8452 					queue_reset = true;
8453 					break;
8454 				}
8455 				break;
8456 			}
8457 			break;
8458 		default:
8459 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8460 			break;
8461 		}
8462 
8463 		/* wptr/rptr are in bytes! */
8464 		rptr += 16;
8465 		rptr &= rdev->ih.ptr_mask;
8466 		WREG32(IH_RB_RPTR, rptr);
8467 	}
8468 	if (queue_dp)
8469 		schedule_work(&rdev->dp_work);
8470 	if (queue_hotplug)
8471 		schedule_delayed_work(&rdev->hotplug_work, 0);
8472 	if (queue_reset) {
8473 		rdev->needs_reset = true;
8474 		wake_up_all(&rdev->fence_queue);
8475 	}
8476 	if (queue_thermal)
8477 		schedule_work(&rdev->pm.dpm.thermal.work);
8478 	rdev->ih.rptr = rptr;
8479 	atomic_set(&rdev->ih.lock, 0);
8480 
8481 	/* make sure wptr hasn't changed while processing */
8482 	wptr = cik_get_ih_wptr(rdev);
8483 	if (wptr != rptr)
8484 		goto restart_ih;
8485 
8486 	return IRQ_HANDLED;
8487 }
8488 
8489 /*
8490  * startup/shutdown callbacks
8491  */
8492 /**
8493  * cik_startup - program the asic to a functional state
8494  *
8495  * @rdev: radeon_device pointer
8496  *
8497  * Programs the asic to a functional state (CIK).
8498  * Called by cik_init() and cik_resume().
8499  * Returns 0 for success, error for failure.
8500  */
8501 static int cik_startup(struct radeon_device *rdev)
8502 {
8503 	struct radeon_ring *ring;
8504 	u32 nop;
8505 	int r;
8506 
8507 	/* enable pcie gen2/3 link */
8508 	cik_pcie_gen3_enable(rdev);
8509 	/* enable aspm */
8510 	cik_program_aspm(rdev);
8511 
8512 	/* scratch needs to be initialized before MC */
8513 	r = r600_vram_scratch_init(rdev);
8514 	if (r)
8515 		return r;
8516 
8517 	cik_mc_program(rdev);
8518 
8519 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8520 		r = ci_mc_load_microcode(rdev);
8521 		if (r) {
8522 			DRM_ERROR("Failed to load MC firmware!\n");
8523 			return r;
8524 		}
8525 	}
8526 
8527 	r = cik_pcie_gart_enable(rdev);
8528 	if (r)
8529 		return r;
8530 	cik_gpu_init(rdev);
8531 
8532 	/* allocate rlc buffers */
8533 	if (rdev->flags & RADEON_IS_IGP) {
8534 		if (rdev->family == CHIP_KAVERI) {
8535 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8536 			rdev->rlc.reg_list_size =
8537 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8538 		} else {
8539 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8540 			rdev->rlc.reg_list_size =
8541 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8542 		}
8543 	}
8544 	rdev->rlc.cs_data = ci_cs_data;
8545 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8546 	r = sumo_rlc_init(rdev);
8547 	if (r) {
8548 		DRM_ERROR("Failed to init rlc BOs!\n");
8549 		return r;
8550 	}
8551 
8552 	/* allocate wb buffer */
8553 	r = radeon_wb_init(rdev);
8554 	if (r)
8555 		return r;
8556 
8557 	/* allocate mec buffers */
8558 	r = cik_mec_init(rdev);
8559 	if (r) {
8560 		DRM_ERROR("Failed to init MEC BOs!\n");
8561 		return r;
8562 	}
8563 
8564 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8565 	if (r) {
8566 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8567 		return r;
8568 	}
8569 
8570 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8571 	if (r) {
8572 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8573 		return r;
8574 	}
8575 
8576 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8577 	if (r) {
8578 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8579 		return r;
8580 	}
8581 
8582 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8583 	if (r) {
8584 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8585 		return r;
8586 	}
8587 
8588 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8589 	if (r) {
8590 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8591 		return r;
8592 	}
8593 
8594 	r = radeon_uvd_resume(rdev);
8595 	if (!r) {
8596 		r = uvd_v4_2_resume(rdev);
8597 		if (!r) {
8598 			r = radeon_fence_driver_start_ring(rdev,
8599 							   R600_RING_TYPE_UVD_INDEX);
8600 			if (r)
8601 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8602 		}
8603 	}
8604 	if (r)
8605 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8606 
8607 	r = radeon_vce_resume(rdev);
8608 	if (!r) {
8609 		r = vce_v2_0_resume(rdev);
8610 		if (!r)
8611 			r = radeon_fence_driver_start_ring(rdev,
8612 							   TN_RING_TYPE_VCE1_INDEX);
8613 		if (!r)
8614 			r = radeon_fence_driver_start_ring(rdev,
8615 							   TN_RING_TYPE_VCE2_INDEX);
8616 	}
8617 	if (r) {
8618 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8619 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8620 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8621 	}
8622 
8623 	/* Enable IRQ */
8624 	if (!rdev->irq.installed) {
8625 		r = radeon_irq_kms_init(rdev);
8626 		if (r)
8627 			return r;
8628 	}
8629 
8630 	r = cik_irq_init(rdev);
8631 	if (r) {
8632 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8633 		radeon_irq_kms_fini(rdev);
8634 		return r;
8635 	}
8636 	cik_irq_set(rdev);
8637 
8638 	if (rdev->family == CHIP_HAWAII) {
8639 		if (rdev->new_fw)
8640 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8641 		else
8642 			nop = RADEON_CP_PACKET2;
8643 	} else {
8644 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8645 	}
8646 
8647 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8648 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8649 			     nop);
8650 	if (r)
8651 		return r;
8652 
8653 	/* set up the compute queues */
8654 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8655 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8656 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8657 			     nop);
8658 	if (r)
8659 		return r;
8660 	ring->me = 1; /* first MEC */
8661 	ring->pipe = 0; /* first pipe */
8662 	ring->queue = 0; /* first queue */
8663 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8664 
8665 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8666 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8667 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8668 			     nop);
8669 	if (r)
8670 		return r;
8671 	/* dGPU only have 1 MEC */
8672 	ring->me = 1; /* first MEC */
8673 	ring->pipe = 0; /* first pipe */
8674 	ring->queue = 1; /* second queue */
8675 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8676 
8677 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8678 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8679 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8680 	if (r)
8681 		return r;
8682 
8683 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8684 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8685 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8686 	if (r)
8687 		return r;
8688 
8689 	r = cik_cp_resume(rdev);
8690 	if (r)
8691 		return r;
8692 
8693 	r = cik_sdma_resume(rdev);
8694 	if (r)
8695 		return r;
8696 
8697 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8698 	if (ring->ring_size) {
8699 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8700 				     RADEON_CP_PACKET2);
8701 		if (!r)
8702 			r = uvd_v1_0_init(rdev);
8703 		if (r)
8704 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8705 	}
8706 
8707 	r = -ENOENT;
8708 
8709 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8710 	if (ring->ring_size)
8711 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8712 				     VCE_CMD_NO_OP);
8713 
8714 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8715 	if (ring->ring_size)
8716 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8717 				     VCE_CMD_NO_OP);
8718 
8719 	if (!r)
8720 		r = vce_v1_0_init(rdev);
8721 	else if (r != -ENOENT)
8722 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8723 
8724 	r = radeon_ib_pool_init(rdev);
8725 	if (r) {
8726 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8727 		return r;
8728 	}
8729 
8730 	r = radeon_vm_manager_init(rdev);
8731 	if (r) {
8732 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8733 		return r;
8734 	}
8735 
8736 	r = radeon_audio_init(rdev);
8737 	if (r)
8738 		return r;
8739 
8740 	r = radeon_kfd_resume(rdev);
8741 	if (r)
8742 		return r;
8743 
8744 	return 0;
8745 }
8746 
8747 /**
8748  * cik_resume - resume the asic to a functional state
8749  *
8750  * @rdev: radeon_device pointer
8751  *
8752  * Programs the asic to a functional state (CIK).
8753  * Called at resume.
8754  * Returns 0 for success, error for failure.
8755  */
8756 int cik_resume(struct radeon_device *rdev)
8757 {
8758 	int r;
8759 
8760 	/* post card */
8761 	atom_asic_init(rdev->mode_info.atom_context);
8762 
8763 	/* init golden registers */
8764 	cik_init_golden_registers(rdev);
8765 
8766 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8767 		radeon_pm_resume(rdev);
8768 
8769 	rdev->accel_working = true;
8770 	r = cik_startup(rdev);
8771 	if (r) {
8772 		DRM_ERROR("cik startup failed on resume\n");
8773 		rdev->accel_working = false;
8774 		return r;
8775 	}
8776 
8777 	return r;
8778 
8779 }
8780 
8781 /**
8782  * cik_suspend - suspend the asic
8783  *
8784  * @rdev: radeon_device pointer
8785  *
8786  * Bring the chip into a state suitable for suspend (CIK).
8787  * Called at suspend.
8788  * Returns 0 for success.
8789  */
8790 int cik_suspend(struct radeon_device *rdev)
8791 {
8792 	radeon_kfd_suspend(rdev);
8793 	radeon_pm_suspend(rdev);
8794 	radeon_audio_fini(rdev);
8795 	radeon_vm_manager_fini(rdev);
8796 	cik_cp_enable(rdev, false);
8797 	cik_sdma_enable(rdev, false);
8798 	uvd_v1_0_fini(rdev);
8799 	radeon_uvd_suspend(rdev);
8800 	radeon_vce_suspend(rdev);
8801 	cik_fini_pg(rdev);
8802 	cik_fini_cg(rdev);
8803 	cik_irq_suspend(rdev);
8804 	radeon_wb_disable(rdev);
8805 	cik_pcie_gart_disable(rdev);
8806 	return 0;
8807 }
8808 
8809 /* Plan is to move initialization in that function and use
8810  * helper function so that radeon_device_init pretty much
8811  * do nothing more than calling asic specific function. This
8812  * should also allow to remove a bunch of callback function
8813  * like vram_info.
8814  */
8815 /**
8816  * cik_init - asic specific driver and hw init
8817  *
8818  * @rdev: radeon_device pointer
8819  *
8820  * Setup asic specific driver variables and program the hw
8821  * to a functional state (CIK).
8822  * Called at driver startup.
8823  * Returns 0 for success, errors for failure.
8824  */
8825 int cik_init(struct radeon_device *rdev)
8826 {
8827 	struct radeon_ring *ring;
8828 	int r;
8829 
8830 	/* Read BIOS */
8831 	if (!radeon_get_bios(rdev)) {
8832 		if (ASIC_IS_AVIVO(rdev))
8833 			return -EINVAL;
8834 	}
8835 	/* Must be an ATOMBIOS */
8836 	if (!rdev->is_atom_bios) {
8837 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8838 		return -EINVAL;
8839 	}
8840 	r = radeon_atombios_init(rdev);
8841 	if (r)
8842 		return r;
8843 
8844 	/* Post card if necessary */
8845 	if (!radeon_card_posted(rdev)) {
8846 		if (!rdev->bios) {
8847 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8848 			return -EINVAL;
8849 		}
8850 		DRM_INFO("GPU not posted. posting now...\n");
8851 		atom_asic_init(rdev->mode_info.atom_context);
8852 	}
8853 	/* init golden registers */
8854 	cik_init_golden_registers(rdev);
8855 	/* Initialize scratch registers */
8856 	cik_scratch_init(rdev);
8857 	/* Initialize surface registers */
8858 	radeon_surface_init(rdev);
8859 	/* Initialize clocks */
8860 	radeon_get_clock_info(rdev->ddev);
8861 
8862 	/* Fence driver */
8863 	r = radeon_fence_driver_init(rdev);
8864 	if (r)
8865 		return r;
8866 
8867 	/* initialize memory controller */
8868 	r = cik_mc_init(rdev);
8869 	if (r)
8870 		return r;
8871 	/* Memory manager */
8872 	r = radeon_bo_init(rdev);
8873 	if (r)
8874 		return r;
8875 
8876 	if (rdev->flags & RADEON_IS_IGP) {
8877 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8878 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8879 			r = cik_init_microcode(rdev);
8880 			if (r) {
8881 				DRM_ERROR("Failed to load firmware!\n");
8882 				return r;
8883 			}
8884 		}
8885 	} else {
8886 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8887 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8888 		    !rdev->mc_fw) {
8889 			r = cik_init_microcode(rdev);
8890 			if (r) {
8891 				DRM_ERROR("Failed to load firmware!\n");
8892 				return r;
8893 			}
8894 		}
8895 	}
8896 
8897 	/* Initialize power management */
8898 	radeon_pm_init(rdev);
8899 
8900 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8901 	ring->ring_obj = NULL;
8902 	r600_ring_init(rdev, ring, 1024 * 1024);
8903 
8904 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8905 	ring->ring_obj = NULL;
8906 	r600_ring_init(rdev, ring, 1024 * 1024);
8907 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8908 	if (r)
8909 		return r;
8910 
8911 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8912 	ring->ring_obj = NULL;
8913 	r600_ring_init(rdev, ring, 1024 * 1024);
8914 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8915 	if (r)
8916 		return r;
8917 
8918 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8919 	ring->ring_obj = NULL;
8920 	r600_ring_init(rdev, ring, 256 * 1024);
8921 
8922 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8923 	ring->ring_obj = NULL;
8924 	r600_ring_init(rdev, ring, 256 * 1024);
8925 
8926 	r = radeon_uvd_init(rdev);
8927 	if (!r) {
8928 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8929 		ring->ring_obj = NULL;
8930 		r600_ring_init(rdev, ring, 4096);
8931 	}
8932 
8933 	r = radeon_vce_init(rdev);
8934 	if (!r) {
8935 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8936 		ring->ring_obj = NULL;
8937 		r600_ring_init(rdev, ring, 4096);
8938 
8939 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8940 		ring->ring_obj = NULL;
8941 		r600_ring_init(rdev, ring, 4096);
8942 	}
8943 
8944 	rdev->ih.ring_obj = NULL;
8945 	r600_ih_ring_init(rdev, 64 * 1024);
8946 
8947 	r = r600_pcie_gart_init(rdev);
8948 	if (r)
8949 		return r;
8950 
8951 	rdev->accel_working = true;
8952 	r = cik_startup(rdev);
8953 	if (r) {
8954 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8955 		cik_cp_fini(rdev);
8956 		cik_sdma_fini(rdev);
8957 		cik_irq_fini(rdev);
8958 		sumo_rlc_fini(rdev);
8959 		cik_mec_fini(rdev);
8960 		radeon_wb_fini(rdev);
8961 		radeon_ib_pool_fini(rdev);
8962 		radeon_vm_manager_fini(rdev);
8963 		radeon_irq_kms_fini(rdev);
8964 		cik_pcie_gart_fini(rdev);
8965 		rdev->accel_working = false;
8966 	}
8967 
8968 	/* Don't start up if the MC ucode is missing.
8969 	 * The default clocks and voltages before the MC ucode
8970 	 * is loaded are not suffient for advanced operations.
8971 	 */
8972 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8973 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8974 		return -EINVAL;
8975 	}
8976 
8977 	return 0;
8978 }
8979 
8980 /**
8981  * cik_fini - asic specific driver and hw fini
8982  *
8983  * @rdev: radeon_device pointer
8984  *
8985  * Tear down the asic specific driver variables and program the hw
8986  * to an idle state (CIK).
8987  * Called at driver unload.
8988  */
8989 void cik_fini(struct radeon_device *rdev)
8990 {
8991 	radeon_pm_fini(rdev);
8992 	cik_cp_fini(rdev);
8993 	cik_sdma_fini(rdev);
8994 	cik_fini_pg(rdev);
8995 	cik_fini_cg(rdev);
8996 	cik_irq_fini(rdev);
8997 	sumo_rlc_fini(rdev);
8998 	cik_mec_fini(rdev);
8999 	radeon_wb_fini(rdev);
9000 	radeon_vm_manager_fini(rdev);
9001 	radeon_ib_pool_fini(rdev);
9002 	radeon_irq_kms_fini(rdev);
9003 	uvd_v1_0_fini(rdev);
9004 	radeon_uvd_fini(rdev);
9005 	radeon_vce_fini(rdev);
9006 	cik_pcie_gart_fini(rdev);
9007 	r600_vram_scratch_fini(rdev);
9008 	radeon_gem_fini(rdev);
9009 	radeon_fence_driver_fini(rdev);
9010 	radeon_bo_fini(rdev);
9011 	radeon_atombios_fini(rdev);
9012 	kfree(rdev->bios);
9013 	rdev->bios = NULL;
9014 }
9015 
9016 void dce8_program_fmt(struct drm_encoder *encoder)
9017 {
9018 	struct drm_device *dev = encoder->dev;
9019 	struct radeon_device *rdev = dev->dev_private;
9020 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
9021 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
9022 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
9023 	int bpc = 0;
9024 	u32 tmp = 0;
9025 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
9026 
9027 	if (connector) {
9028 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
9029 		bpc = radeon_get_monitor_bpc(connector);
9030 		dither = radeon_connector->dither;
9031 	}
9032 
9033 	/* LVDS/eDP FMT is set up by atom */
9034 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
9035 		return;
9036 
9037 	/* not needed for analog */
9038 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
9039 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
9040 		return;
9041 
9042 	if (bpc == 0)
9043 		return;
9044 
9045 	switch (bpc) {
9046 	case 6:
9047 		if (dither == RADEON_FMT_DITHER_ENABLE)
9048 			/* XXX sort out optimal dither settings */
9049 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9050 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9051 		else
9052 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9053 		break;
9054 	case 8:
9055 		if (dither == RADEON_FMT_DITHER_ENABLE)
9056 			/* XXX sort out optimal dither settings */
9057 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9058 				FMT_RGB_RANDOM_ENABLE |
9059 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9060 		else
9061 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9062 		break;
9063 	case 10:
9064 		if (dither == RADEON_FMT_DITHER_ENABLE)
9065 			/* XXX sort out optimal dither settings */
9066 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9067 				FMT_RGB_RANDOM_ENABLE |
9068 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9069 		else
9070 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9071 		break;
9072 	default:
9073 		/* not needed */
9074 		break;
9075 	}
9076 
9077 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9078 }
9079 
9080 /* display watermark setup */
9081 /**
9082  * dce8_line_buffer_adjust - Set up the line buffer
9083  *
9084  * @rdev: radeon_device pointer
9085  * @radeon_crtc: the selected display controller
9086  * @mode: the current display mode on the selected display
9087  * controller
9088  *
9089  * Setup up the line buffer allocation for
9090  * the selected display controller (CIK).
9091  * Returns the line buffer size in pixels.
9092  */
9093 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9094 				   struct radeon_crtc *radeon_crtc,
9095 				   struct drm_display_mode *mode)
9096 {
9097 	u32 tmp, buffer_alloc, i;
9098 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9099 	/*
9100 	 * Line Buffer Setup
9101 	 * There are 6 line buffers, one for each display controllers.
9102 	 * There are 3 partitions per LB. Select the number of partitions
9103 	 * to enable based on the display width.  For display widths larger
9104 	 * than 4096, you need use to use 2 display controllers and combine
9105 	 * them using the stereo blender.
9106 	 */
9107 	if (radeon_crtc->base.enabled && mode) {
9108 		if (mode->crtc_hdisplay < 1920) {
9109 			tmp = 1;
9110 			buffer_alloc = 2;
9111 		} else if (mode->crtc_hdisplay < 2560) {
9112 			tmp = 2;
9113 			buffer_alloc = 2;
9114 		} else if (mode->crtc_hdisplay < 4096) {
9115 			tmp = 0;
9116 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9117 		} else {
9118 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9119 			tmp = 0;
9120 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9121 		}
9122 	} else {
9123 		tmp = 1;
9124 		buffer_alloc = 0;
9125 	}
9126 
9127 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9128 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9129 
9130 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9131 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9132 	for (i = 0; i < rdev->usec_timeout; i++) {
9133 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9134 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9135 			break;
9136 		udelay(1);
9137 	}
9138 
9139 	if (radeon_crtc->base.enabled && mode) {
9140 		switch (tmp) {
9141 		case 0:
9142 		default:
9143 			return 4096 * 2;
9144 		case 1:
9145 			return 1920 * 2;
9146 		case 2:
9147 			return 2560 * 2;
9148 		}
9149 	}
9150 
9151 	/* controller not enabled, so no lb used */
9152 	return 0;
9153 }
9154 
9155 /**
9156  * cik_get_number_of_dram_channels - get the number of dram channels
9157  *
9158  * @rdev: radeon_device pointer
9159  *
9160  * Look up the number of video ram channels (CIK).
9161  * Used for display watermark bandwidth calculations
9162  * Returns the number of dram channels
9163  */
9164 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9165 {
9166 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9167 
9168 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9169 	case 0:
9170 	default:
9171 		return 1;
9172 	case 1:
9173 		return 2;
9174 	case 2:
9175 		return 4;
9176 	case 3:
9177 		return 8;
9178 	case 4:
9179 		return 3;
9180 	case 5:
9181 		return 6;
9182 	case 6:
9183 		return 10;
9184 	case 7:
9185 		return 12;
9186 	case 8:
9187 		return 16;
9188 	}
9189 }
9190 
9191 struct dce8_wm_params {
9192 	u32 dram_channels; /* number of dram channels */
9193 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9194 	u32 sclk;          /* engine clock in kHz */
9195 	u32 disp_clk;      /* display clock in kHz */
9196 	u32 src_width;     /* viewport width */
9197 	u32 active_time;   /* active display time in ns */
9198 	u32 blank_time;    /* blank time in ns */
9199 	bool interlaced;    /* mode is interlaced */
9200 	fixed20_12 vsc;    /* vertical scale ratio */
9201 	u32 num_heads;     /* number of active crtcs */
9202 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9203 	u32 lb_size;       /* line buffer allocated to pipe */
9204 	u32 vtaps;         /* vertical scaler taps */
9205 };
9206 
9207 /**
9208  * dce8_dram_bandwidth - get the dram bandwidth
9209  *
9210  * @wm: watermark calculation data
9211  *
9212  * Calculate the raw dram bandwidth (CIK).
9213  * Used for display watermark bandwidth calculations
9214  * Returns the dram bandwidth in MBytes/s
9215  */
9216 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9217 {
9218 	/* Calculate raw DRAM Bandwidth */
9219 	fixed20_12 dram_efficiency; /* 0.7 */
9220 	fixed20_12 yclk, dram_channels, bandwidth;
9221 	fixed20_12 a;
9222 
9223 	a.full = dfixed_const(1000);
9224 	yclk.full = dfixed_const(wm->yclk);
9225 	yclk.full = dfixed_div(yclk, a);
9226 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9227 	a.full = dfixed_const(10);
9228 	dram_efficiency.full = dfixed_const(7);
9229 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9230 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9231 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9232 
9233 	return dfixed_trunc(bandwidth);
9234 }
9235 
9236 /**
9237  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9238  *
9239  * @wm: watermark calculation data
9240  *
9241  * Calculate the dram bandwidth used for display (CIK).
9242  * Used for display watermark bandwidth calculations
9243  * Returns the dram bandwidth for display in MBytes/s
9244  */
9245 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9246 {
9247 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9248 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9249 	fixed20_12 yclk, dram_channels, bandwidth;
9250 	fixed20_12 a;
9251 
9252 	a.full = dfixed_const(1000);
9253 	yclk.full = dfixed_const(wm->yclk);
9254 	yclk.full = dfixed_div(yclk, a);
9255 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9256 	a.full = dfixed_const(10);
9257 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9258 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9259 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9260 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9261 
9262 	return dfixed_trunc(bandwidth);
9263 }
9264 
9265 /**
9266  * dce8_data_return_bandwidth - get the data return bandwidth
9267  *
9268  * @wm: watermark calculation data
9269  *
9270  * Calculate the data return bandwidth used for display (CIK).
9271  * Used for display watermark bandwidth calculations
9272  * Returns the data return bandwidth in MBytes/s
9273  */
9274 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9275 {
9276 	/* Calculate the display Data return Bandwidth */
9277 	fixed20_12 return_efficiency; /* 0.8 */
9278 	fixed20_12 sclk, bandwidth;
9279 	fixed20_12 a;
9280 
9281 	a.full = dfixed_const(1000);
9282 	sclk.full = dfixed_const(wm->sclk);
9283 	sclk.full = dfixed_div(sclk, a);
9284 	a.full = dfixed_const(10);
9285 	return_efficiency.full = dfixed_const(8);
9286 	return_efficiency.full = dfixed_div(return_efficiency, a);
9287 	a.full = dfixed_const(32);
9288 	bandwidth.full = dfixed_mul(a, sclk);
9289 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9290 
9291 	return dfixed_trunc(bandwidth);
9292 }
9293 
9294 /**
9295  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9296  *
9297  * @wm: watermark calculation data
9298  *
9299  * Calculate the dmif bandwidth used for display (CIK).
9300  * Used for display watermark bandwidth calculations
9301  * Returns the dmif bandwidth in MBytes/s
9302  */
9303 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9304 {
9305 	/* Calculate the DMIF Request Bandwidth */
9306 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9307 	fixed20_12 disp_clk, bandwidth;
9308 	fixed20_12 a, b;
9309 
9310 	a.full = dfixed_const(1000);
9311 	disp_clk.full = dfixed_const(wm->disp_clk);
9312 	disp_clk.full = dfixed_div(disp_clk, a);
9313 	a.full = dfixed_const(32);
9314 	b.full = dfixed_mul(a, disp_clk);
9315 
9316 	a.full = dfixed_const(10);
9317 	disp_clk_request_efficiency.full = dfixed_const(8);
9318 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9319 
9320 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9321 
9322 	return dfixed_trunc(bandwidth);
9323 }
9324 
9325 /**
9326  * dce8_available_bandwidth - get the min available bandwidth
9327  *
9328  * @wm: watermark calculation data
9329  *
9330  * Calculate the min available bandwidth used for display (CIK).
9331  * Used for display watermark bandwidth calculations
9332  * Returns the min available bandwidth in MBytes/s
9333  */
9334 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9335 {
9336 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9337 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9338 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9339 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9340 
9341 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9342 }
9343 
9344 /**
9345  * dce8_average_bandwidth - get the average available bandwidth
9346  *
9347  * @wm: watermark calculation data
9348  *
9349  * Calculate the average available bandwidth used for display (CIK).
9350  * Used for display watermark bandwidth calculations
9351  * Returns the average available bandwidth in MBytes/s
9352  */
9353 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9354 {
9355 	/* Calculate the display mode Average Bandwidth
9356 	 * DisplayMode should contain the source and destination dimensions,
9357 	 * timing, etc.
9358 	 */
9359 	fixed20_12 bpp;
9360 	fixed20_12 line_time;
9361 	fixed20_12 src_width;
9362 	fixed20_12 bandwidth;
9363 	fixed20_12 a;
9364 
9365 	a.full = dfixed_const(1000);
9366 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9367 	line_time.full = dfixed_div(line_time, a);
9368 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9369 	src_width.full = dfixed_const(wm->src_width);
9370 	bandwidth.full = dfixed_mul(src_width, bpp);
9371 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9372 	bandwidth.full = dfixed_div(bandwidth, line_time);
9373 
9374 	return dfixed_trunc(bandwidth);
9375 }
9376 
9377 /**
9378  * dce8_latency_watermark - get the latency watermark
9379  *
9380  * @wm: watermark calculation data
9381  *
9382  * Calculate the latency watermark (CIK).
9383  * Used for display watermark bandwidth calculations
9384  * Returns the latency watermark in ns
9385  */
9386 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9387 {
9388 	/* First calculate the latency in ns */
9389 	u32 mc_latency = 2000; /* 2000 ns. */
9390 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9391 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9392 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9393 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9394 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9395 		(wm->num_heads * cursor_line_pair_return_time);
9396 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9397 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9398 	u32 tmp, dmif_size = 12288;
9399 	fixed20_12 a, b, c;
9400 
9401 	if (wm->num_heads == 0)
9402 		return 0;
9403 
9404 	a.full = dfixed_const(2);
9405 	b.full = dfixed_const(1);
9406 	if ((wm->vsc.full > a.full) ||
9407 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9408 	    (wm->vtaps >= 5) ||
9409 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9410 		max_src_lines_per_dst_line = 4;
9411 	else
9412 		max_src_lines_per_dst_line = 2;
9413 
9414 	a.full = dfixed_const(available_bandwidth);
9415 	b.full = dfixed_const(wm->num_heads);
9416 	a.full = dfixed_div(a, b);
9417 
9418 	b.full = dfixed_const(mc_latency + 512);
9419 	c.full = dfixed_const(wm->disp_clk);
9420 	b.full = dfixed_div(b, c);
9421 
9422 	c.full = dfixed_const(dmif_size);
9423 	b.full = dfixed_div(c, b);
9424 
9425 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9426 
9427 	b.full = dfixed_const(1000);
9428 	c.full = dfixed_const(wm->disp_clk);
9429 	b.full = dfixed_div(c, b);
9430 	c.full = dfixed_const(wm->bytes_per_pixel);
9431 	b.full = dfixed_mul(b, c);
9432 
9433 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9434 
9435 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9436 	b.full = dfixed_const(1000);
9437 	c.full = dfixed_const(lb_fill_bw);
9438 	b.full = dfixed_div(c, b);
9439 	a.full = dfixed_div(a, b);
9440 	line_fill_time = dfixed_trunc(a);
9441 
9442 	if (line_fill_time < wm->active_time)
9443 		return latency;
9444 	else
9445 		return latency + (line_fill_time - wm->active_time);
9446 
9447 }
9448 
9449 /**
9450  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9451  * average and available dram bandwidth
9452  *
9453  * @wm: watermark calculation data
9454  *
9455  * Check if the display average bandwidth fits in the display
9456  * dram bandwidth (CIK).
9457  * Used for display watermark bandwidth calculations
9458  * Returns true if the display fits, false if not.
9459  */
9460 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9461 {
9462 	if (dce8_average_bandwidth(wm) <=
9463 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9464 		return true;
9465 	else
9466 		return false;
9467 }
9468 
9469 /**
9470  * dce8_average_bandwidth_vs_available_bandwidth - check
9471  * average and available bandwidth
9472  *
9473  * @wm: watermark calculation data
9474  *
9475  * Check if the display average bandwidth fits in the display
9476  * available bandwidth (CIK).
9477  * Used for display watermark bandwidth calculations
9478  * Returns true if the display fits, false if not.
9479  */
9480 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9481 {
9482 	if (dce8_average_bandwidth(wm) <=
9483 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9484 		return true;
9485 	else
9486 		return false;
9487 }
9488 
9489 /**
9490  * dce8_check_latency_hiding - check latency hiding
9491  *
9492  * @wm: watermark calculation data
9493  *
9494  * Check latency hiding (CIK).
9495  * Used for display watermark bandwidth calculations
9496  * Returns true if the display fits, false if not.
9497  */
9498 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9499 {
9500 	u32 lb_partitions = wm->lb_size / wm->src_width;
9501 	u32 line_time = wm->active_time + wm->blank_time;
9502 	u32 latency_tolerant_lines;
9503 	u32 latency_hiding;
9504 	fixed20_12 a;
9505 
9506 	a.full = dfixed_const(1);
9507 	if (wm->vsc.full > a.full)
9508 		latency_tolerant_lines = 1;
9509 	else {
9510 		if (lb_partitions <= (wm->vtaps + 1))
9511 			latency_tolerant_lines = 1;
9512 		else
9513 			latency_tolerant_lines = 2;
9514 	}
9515 
9516 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9517 
9518 	if (dce8_latency_watermark(wm) <= latency_hiding)
9519 		return true;
9520 	else
9521 		return false;
9522 }
9523 
9524 /**
9525  * dce8_program_watermarks - program display watermarks
9526  *
9527  * @rdev: radeon_device pointer
9528  * @radeon_crtc: the selected display controller
9529  * @lb_size: line buffer size
9530  * @num_heads: number of display controllers in use
9531  *
9532  * Calculate and program the display watermarks for the
9533  * selected display controller (CIK).
9534  */
9535 static void dce8_program_watermarks(struct radeon_device *rdev,
9536 				    struct radeon_crtc *radeon_crtc,
9537 				    u32 lb_size, u32 num_heads)
9538 {
9539 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9540 	struct dce8_wm_params wm_low, wm_high;
9541 	u32 pixel_period;
9542 	u32 line_time = 0;
9543 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9544 	u32 tmp, wm_mask;
9545 
9546 	if (radeon_crtc->base.enabled && num_heads && mode) {
9547 		pixel_period = 1000000 / (u32)mode->clock;
9548 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9549 
9550 		/* watermark for high clocks */
9551 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9552 		    rdev->pm.dpm_enabled) {
9553 			wm_high.yclk =
9554 				radeon_dpm_get_mclk(rdev, false) * 10;
9555 			wm_high.sclk =
9556 				radeon_dpm_get_sclk(rdev, false) * 10;
9557 		} else {
9558 			wm_high.yclk = rdev->pm.current_mclk * 10;
9559 			wm_high.sclk = rdev->pm.current_sclk * 10;
9560 		}
9561 
9562 		wm_high.disp_clk = mode->clock;
9563 		wm_high.src_width = mode->crtc_hdisplay;
9564 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9565 		wm_high.blank_time = line_time - wm_high.active_time;
9566 		wm_high.interlaced = false;
9567 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9568 			wm_high.interlaced = true;
9569 		wm_high.vsc = radeon_crtc->vsc;
9570 		wm_high.vtaps = 1;
9571 		if (radeon_crtc->rmx_type != RMX_OFF)
9572 			wm_high.vtaps = 2;
9573 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9574 		wm_high.lb_size = lb_size;
9575 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9576 		wm_high.num_heads = num_heads;
9577 
9578 		/* set for high clocks */
9579 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9580 
9581 		/* possibly force display priority to high */
9582 		/* should really do this at mode validation time... */
9583 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9584 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9585 		    !dce8_check_latency_hiding(&wm_high) ||
9586 		    (rdev->disp_priority == 2)) {
9587 			DRM_DEBUG_KMS("force priority to high\n");
9588 		}
9589 
9590 		/* watermark for low clocks */
9591 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9592 		    rdev->pm.dpm_enabled) {
9593 			wm_low.yclk =
9594 				radeon_dpm_get_mclk(rdev, true) * 10;
9595 			wm_low.sclk =
9596 				radeon_dpm_get_sclk(rdev, true) * 10;
9597 		} else {
9598 			wm_low.yclk = rdev->pm.current_mclk * 10;
9599 			wm_low.sclk = rdev->pm.current_sclk * 10;
9600 		}
9601 
9602 		wm_low.disp_clk = mode->clock;
9603 		wm_low.src_width = mode->crtc_hdisplay;
9604 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9605 		wm_low.blank_time = line_time - wm_low.active_time;
9606 		wm_low.interlaced = false;
9607 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9608 			wm_low.interlaced = true;
9609 		wm_low.vsc = radeon_crtc->vsc;
9610 		wm_low.vtaps = 1;
9611 		if (radeon_crtc->rmx_type != RMX_OFF)
9612 			wm_low.vtaps = 2;
9613 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9614 		wm_low.lb_size = lb_size;
9615 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9616 		wm_low.num_heads = num_heads;
9617 
9618 		/* set for low clocks */
9619 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9620 
9621 		/* possibly force display priority to high */
9622 		/* should really do this at mode validation time... */
9623 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9624 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9625 		    !dce8_check_latency_hiding(&wm_low) ||
9626 		    (rdev->disp_priority == 2)) {
9627 			DRM_DEBUG_KMS("force priority to high\n");
9628 		}
9629 
9630 		/* Save number of lines the linebuffer leads before the scanout */
9631 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
9632 	}
9633 
9634 	/* select wm A */
9635 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9636 	tmp = wm_mask;
9637 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9638 	tmp |= LATENCY_WATERMARK_MASK(1);
9639 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9640 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9641 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9642 		LATENCY_HIGH_WATERMARK(line_time)));
9643 	/* select wm B */
9644 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9645 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9646 	tmp |= LATENCY_WATERMARK_MASK(2);
9647 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9648 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9649 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9650 		LATENCY_HIGH_WATERMARK(line_time)));
9651 	/* restore original selection */
9652 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9653 
9654 	/* save values for DPM */
9655 	radeon_crtc->line_time = line_time;
9656 	radeon_crtc->wm_high = latency_watermark_a;
9657 	radeon_crtc->wm_low = latency_watermark_b;
9658 }
9659 
9660 /**
9661  * dce8_bandwidth_update - program display watermarks
9662  *
9663  * @rdev: radeon_device pointer
9664  *
9665  * Calculate and program the display watermarks and line
9666  * buffer allocation (CIK).
9667  */
9668 void dce8_bandwidth_update(struct radeon_device *rdev)
9669 {
9670 	struct drm_display_mode *mode = NULL;
9671 	u32 num_heads = 0, lb_size;
9672 	int i;
9673 
9674 	if (!rdev->mode_info.mode_config_initialized)
9675 		return;
9676 
9677 	radeon_update_display_priority(rdev);
9678 
9679 	for (i = 0; i < rdev->num_crtc; i++) {
9680 		if (rdev->mode_info.crtcs[i]->base.enabled)
9681 			num_heads++;
9682 	}
9683 	for (i = 0; i < rdev->num_crtc; i++) {
9684 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9685 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9686 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9687 	}
9688 }
9689 
9690 /**
9691  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9692  *
9693  * @rdev: radeon_device pointer
9694  *
9695  * Fetches a GPU clock counter snapshot (SI).
9696  * Returns the 64 bit clock counter snapshot.
9697  */
9698 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9699 {
9700 	uint64_t clock;
9701 
9702 	mutex_lock(&rdev->gpu_clock_mutex);
9703 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9704 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9705 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9706 	mutex_unlock(&rdev->gpu_clock_mutex);
9707 	return clock;
9708 }
9709 
9710 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9711                               u32 cntl_reg, u32 status_reg)
9712 {
9713 	int r, i;
9714 	struct atom_clock_dividers dividers;
9715 	uint32_t tmp;
9716 
9717 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9718 					   clock, false, &dividers);
9719 	if (r)
9720 		return r;
9721 
9722 	tmp = RREG32_SMC(cntl_reg);
9723 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9724 	tmp |= dividers.post_divider;
9725 	WREG32_SMC(cntl_reg, tmp);
9726 
9727 	for (i = 0; i < 100; i++) {
9728 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9729 			break;
9730 		mdelay(10);
9731 	}
9732 	if (i == 100)
9733 		return -ETIMEDOUT;
9734 
9735 	return 0;
9736 }
9737 
9738 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9739 {
9740 	int r = 0;
9741 
9742 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9743 	if (r)
9744 		return r;
9745 
9746 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9747 	return r;
9748 }
9749 
9750 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9751 {
9752 	int r, i;
9753 	struct atom_clock_dividers dividers;
9754 	u32 tmp;
9755 
9756 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9757 					   ecclk, false, &dividers);
9758 	if (r)
9759 		return r;
9760 
9761 	for (i = 0; i < 100; i++) {
9762 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9763 			break;
9764 		mdelay(10);
9765 	}
9766 	if (i == 100)
9767 		return -ETIMEDOUT;
9768 
9769 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9770 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9771 	tmp |= dividers.post_divider;
9772 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9773 
9774 	for (i = 0; i < 100; i++) {
9775 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9776 			break;
9777 		mdelay(10);
9778 	}
9779 	if (i == 100)
9780 		return -ETIMEDOUT;
9781 
9782 	return 0;
9783 }
9784 
9785 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9786 {
9787 	struct pci_dev *root = rdev->pdev->bus->self;
9788 	int bridge_pos, gpu_pos;
9789 	u32 speed_cntl, mask, current_data_rate;
9790 	int ret, i;
9791 	u16 tmp16;
9792 
9793 	if (pci_is_root_bus(rdev->pdev->bus))
9794 		return;
9795 
9796 	if (radeon_pcie_gen2 == 0)
9797 		return;
9798 
9799 	if (rdev->flags & RADEON_IS_IGP)
9800 		return;
9801 
9802 	if (!(rdev->flags & RADEON_IS_PCIE))
9803 		return;
9804 
9805 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9806 	if (ret != 0)
9807 		return;
9808 
9809 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9810 		return;
9811 
9812 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9813 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9814 		LC_CURRENT_DATA_RATE_SHIFT;
9815 	if (mask & DRM_PCIE_SPEED_80) {
9816 		if (current_data_rate == 2) {
9817 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9818 			return;
9819 		}
9820 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9821 	} else if (mask & DRM_PCIE_SPEED_50) {
9822 		if (current_data_rate == 1) {
9823 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9824 			return;
9825 		}
9826 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9827 	}
9828 
9829 	bridge_pos = pci_pcie_cap(root);
9830 	if (!bridge_pos)
9831 		return;
9832 
9833 	gpu_pos = pci_pcie_cap(rdev->pdev);
9834 	if (!gpu_pos)
9835 		return;
9836 
9837 	if (mask & DRM_PCIE_SPEED_80) {
9838 		/* re-try equalization if gen3 is not already enabled */
9839 		if (current_data_rate != 2) {
9840 			u16 bridge_cfg, gpu_cfg;
9841 			u16 bridge_cfg2, gpu_cfg2;
9842 			u32 max_lw, current_lw, tmp;
9843 
9844 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9845 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9846 
9847 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9848 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9849 
9850 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9851 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9852 
9853 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9854 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9855 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9856 
9857 			if (current_lw < max_lw) {
9858 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9859 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9860 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9861 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9862 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9863 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9864 				}
9865 			}
9866 
9867 			for (i = 0; i < 10; i++) {
9868 				/* check status */
9869 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9870 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9871 					break;
9872 
9873 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9874 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9875 
9876 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9877 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9878 
9879 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9880 				tmp |= LC_SET_QUIESCE;
9881 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9882 
9883 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9884 				tmp |= LC_REDO_EQ;
9885 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9886 
9887 				mdelay(100);
9888 
9889 				/* linkctl */
9890 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9891 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9892 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9893 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9894 
9895 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9896 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9897 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9898 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9899 
9900 				/* linkctl2 */
9901 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9902 				tmp16 &= ~((1 << 4) | (7 << 9));
9903 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9904 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9905 
9906 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9907 				tmp16 &= ~((1 << 4) | (7 << 9));
9908 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9909 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9910 
9911 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9912 				tmp &= ~LC_SET_QUIESCE;
9913 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9914 			}
9915 		}
9916 	}
9917 
9918 	/* set the link speed */
9919 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9920 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9921 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9922 
9923 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9924 	tmp16 &= ~0xf;
9925 	if (mask & DRM_PCIE_SPEED_80)
9926 		tmp16 |= 3; /* gen3 */
9927 	else if (mask & DRM_PCIE_SPEED_50)
9928 		tmp16 |= 2; /* gen2 */
9929 	else
9930 		tmp16 |= 1; /* gen1 */
9931 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9932 
9933 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9934 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9935 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9936 
9937 	for (i = 0; i < rdev->usec_timeout; i++) {
9938 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9939 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9940 			break;
9941 		udelay(1);
9942 	}
9943 }
9944 
9945 static void cik_program_aspm(struct radeon_device *rdev)
9946 {
9947 	u32 data, orig;
9948 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9949 	bool disable_clkreq = false;
9950 
9951 	if (radeon_aspm == 0)
9952 		return;
9953 
9954 	/* XXX double check IGPs */
9955 	if (rdev->flags & RADEON_IS_IGP)
9956 		return;
9957 
9958 	if (!(rdev->flags & RADEON_IS_PCIE))
9959 		return;
9960 
9961 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9962 	data &= ~LC_XMIT_N_FTS_MASK;
9963 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9964 	if (orig != data)
9965 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9966 
9967 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9968 	data |= LC_GO_TO_RECOVERY;
9969 	if (orig != data)
9970 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9971 
9972 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9973 	data |= P_IGNORE_EDB_ERR;
9974 	if (orig != data)
9975 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9976 
9977 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9978 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9979 	data |= LC_PMI_TO_L1_DIS;
9980 	if (!disable_l0s)
9981 		data |= LC_L0S_INACTIVITY(7);
9982 
9983 	if (!disable_l1) {
9984 		data |= LC_L1_INACTIVITY(7);
9985 		data &= ~LC_PMI_TO_L1_DIS;
9986 		if (orig != data)
9987 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9988 
9989 		if (!disable_plloff_in_l1) {
9990 			bool clk_req_support;
9991 
9992 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9993 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9994 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9995 			if (orig != data)
9996 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9997 
9998 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9999 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10000 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10001 			if (orig != data)
10002 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
10003 
10004 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
10005 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
10006 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
10007 			if (orig != data)
10008 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
10009 
10010 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
10011 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
10012 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
10013 			if (orig != data)
10014 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
10015 
10016 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
10017 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
10018 			data |= LC_DYN_LANES_PWR_STATE(3);
10019 			if (orig != data)
10020 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
10021 
10022 			if (!disable_clkreq &&
10023 			    !pci_is_root_bus(rdev->pdev->bus)) {
10024 				struct pci_dev *root = rdev->pdev->bus->self;
10025 				u32 lnkcap;
10026 
10027 				clk_req_support = false;
10028 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
10029 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
10030 					clk_req_support = true;
10031 			} else {
10032 				clk_req_support = false;
10033 			}
10034 
10035 			if (clk_req_support) {
10036 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
10037 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
10038 				if (orig != data)
10039 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
10040 
10041 				orig = data = RREG32_SMC(THM_CLK_CNTL);
10042 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
10043 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
10044 				if (orig != data)
10045 					WREG32_SMC(THM_CLK_CNTL, data);
10046 
10047 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10048 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10049 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10050 				if (orig != data)
10051 					WREG32_SMC(MISC_CLK_CTRL, data);
10052 
10053 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10054 				data &= ~BCLK_AS_XCLK;
10055 				if (orig != data)
10056 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10057 
10058 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10059 				data &= ~FORCE_BIF_REFCLK_EN;
10060 				if (orig != data)
10061 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10062 
10063 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10064 				data &= ~MPLL_CLKOUT_SEL_MASK;
10065 				data |= MPLL_CLKOUT_SEL(4);
10066 				if (orig != data)
10067 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10068 			}
10069 		}
10070 	} else {
10071 		if (orig != data)
10072 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10073 	}
10074 
10075 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10076 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10077 	if (orig != data)
10078 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10079 
10080 	if (!disable_l0s) {
10081 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10082 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10083 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10084 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10085 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10086 				data &= ~LC_L0S_INACTIVITY_MASK;
10087 				if (orig != data)
10088 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10089 			}
10090 		}
10091 	}
10092 }
10093