xref: /linux/drivers/gpu/drm/radeon/cik.c (revision 005438a8eef063495ac059d128eea71b58de50e5)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182 	unsigned long flags;
183 	u32 r;
184 
185 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186 	WREG32(CIK_DIDT_IND_INDEX, (reg));
187 	r = RREG32(CIK_DIDT_IND_DATA);
188 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189 	return r;
190 }
191 
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194 	unsigned long flags;
195 
196 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197 	WREG32(CIK_DIDT_IND_INDEX, (reg));
198 	WREG32(CIK_DIDT_IND_DATA, (v));
199 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201 
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205 	u32 temp;
206 	int actual_temp = 0;
207 
208 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209 		CTF_TEMP_SHIFT;
210 
211 	if (temp & 0x200)
212 		actual_temp = 255;
213 	else
214 		actual_temp = temp & 0x1ff;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224 	u32 temp;
225 	int actual_temp = 0;
226 
227 	temp = RREG32_SMC(0xC0300E0C);
228 
229 	if (temp)
230 		actual_temp = (temp / 8) - 49;
231 	else
232 		actual_temp = 0;
233 
234 	actual_temp = actual_temp * 1000;
235 
236 	return actual_temp;
237 }
238 
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244 	unsigned long flags;
245 	u32 r;
246 
247 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248 	WREG32(PCIE_INDEX, reg);
249 	(void)RREG32(PCIE_INDEX);
250 	r = RREG32(PCIE_DATA);
251 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252 	return r;
253 }
254 
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257 	unsigned long flags;
258 
259 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260 	WREG32(PCIE_INDEX, reg);
261 	(void)RREG32(PCIE_INDEX);
262 	WREG32(PCIE_DATA, v);
263 	(void)RREG32(PCIE_DATA);
264 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266 
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269 	(0x0e00 << 16) | (0xc12c >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc140 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc150 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc15c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc168 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc170 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc178 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc204 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc2b4 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b8 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2bc >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2c0 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x8228 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x829c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x869c >> 2),
298 	0x00000000,
299 	(0x0600 << 16) | (0x98f4 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x98f8 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x9900 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc260 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x3c000 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c00c >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x8c1c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x9700 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x89bc >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x8900 >> 2),
338 	0x00000000,
339 	0x3,
340 	(0x0e00 << 16) | (0xc130 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc134 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc1fc >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc208 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc264 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc268 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc26c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc270 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc274 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc278 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc27c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc280 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc284 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc288 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc28c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc290 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc294 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc298 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc29c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc2a0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a4 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a8 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2ac  >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2b0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x301d0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x30238 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30250 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30254 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30258 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x3025c >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x4e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0x5e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x6e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x7e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x8e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x9e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0xae00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xbe00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xc99c >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x9834 >> 2),
483 	0x00000000,
484 	(0x0000 << 16) | (0x30f00 >> 2),
485 	0x00000000,
486 	(0x0001 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f04 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f08 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f0c >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x9b7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8a14 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a00 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8bf0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bcc >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8b24 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x30a04 >> 2),
515 	0x00000000,
516 	(0x0600 << 16) | (0x30a10 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a14 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a18 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a2c >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xc700 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc704 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc708 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc768 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc770 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc774 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc778 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc77c >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc780 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc784 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc788 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc78c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc798 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc79c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7a0 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a4 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a8 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7ac >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7b0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x9100 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c010 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92a8 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92ac >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92b4 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b8 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92bc >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92c0 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92cc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92d0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x8c00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c04 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c20 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c38 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c3c >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xae00 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x9604 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac08 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac0c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac10 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac14 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac58 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac68 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac6c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac70 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac74 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac78 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac7c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac80 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac84 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac88 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac8c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x970c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9714 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9718 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x971c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x8e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x9e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0xae00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xbe00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd10 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd14 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88b0 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b4 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b8 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88bc >> 2),
669 	0x00000000,
670 	(0x0400 << 16) | (0x89c0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88c4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88d0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d4 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d8 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x8980 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30938 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3093c >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30940 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x89a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30900 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30904 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x89b4 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c210 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c214 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c218 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x8904 >> 2),
705 	0x00000000,
706 	0x5,
707 	(0x0e00 << 16) | (0x8c28 >> 2),
708 	(0x0e00 << 16) | (0x8c2c >> 2),
709 	(0x0e00 << 16) | (0x8c30 >> 2),
710 	(0x0e00 << 16) | (0x8c34 >> 2),
711 	(0x0e00 << 16) | (0x9600 >> 2),
712 };
713 
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716 	(0x0e00 << 16) | (0xc12c >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc140 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc150 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc15c >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc168 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc170 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc204 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2b4 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b8 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2bc >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2c0 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x8228 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x829c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x869c >> 2),
743 	0x00000000,
744 	(0x0600 << 16) | (0x98f4 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x98f8 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9900 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc260 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x90e8 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x3c000 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c00c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8c1c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9700 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xcd20 >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x89bc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8900 >> 2),
775 	0x00000000,
776 	0x3,
777 	(0x0e00 << 16) | (0xc130 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc134 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc1fc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc208 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc264 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc268 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc26c >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc270 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc274 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc28c >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc290 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc294 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc298 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc2a0 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a4 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a8 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2ac >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x301d0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x30238 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30250 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30254 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30258 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x3025c >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc900 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc904 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc908 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc90c >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc910 >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xc99c >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x9834 >> 2),
866 	0x00000000,
867 	(0x0000 << 16) | (0x30f00 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f04 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f08 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f0c >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x9b7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8a14 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a00 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8bf0 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bcc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8b24 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30a04 >> 2),
890 	0x00000000,
891 	(0x0600 << 16) | (0x30a10 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a14 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a18 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a2c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xc700 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc704 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc708 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc768 >> 2),
906 	0x00000000,
907 	(0x0400 << 16) | (0xc770 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc774 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc798 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc79c >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9100 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3c010 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x8c00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c04 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c20 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c38 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c3c >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xae00 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x9604 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac08 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac0c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac10 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac14 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac58 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac68 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac6c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac70 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac74 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac78 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac7c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac80 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac84 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac88 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac8c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x970c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x9714 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9718 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x971c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x31068 >> 2),
972 	0x00000000,
973 	(0x4e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x5e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x6e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x7e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0xcd10 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd14 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88b0 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b4 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b8 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88bc >> 2),
992 	0x00000000,
993 	(0x0400 << 16) | (0x89c0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88c4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88d0 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d4 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d8 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x8980 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30938 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3093c >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30940 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x89a0 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30900 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30904 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x89b4 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x3e1fc >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3c210 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c214 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c218 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x8904 >> 2),
1030 	0x00000000,
1031 	0x5,
1032 	(0x0e00 << 16) | (0x8c28 >> 2),
1033 	(0x0e00 << 16) | (0x8c2c >> 2),
1034 	(0x0e00 << 16) | (0x8c30 >> 2),
1035 	(0x0e00 << 16) | (0x8c34 >> 2),
1036 	(0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038 
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041 	0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043 
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046 	0xc770, 0xffffffff, 0x00000800,
1047 	0xc774, 0xffffffff, 0x00000800,
1048 	0xc798, 0xffffffff, 0x00007fbf,
1049 	0xc79c, 0xffffffff, 0x00007faf
1050 };
1051 
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054 	0x3354, 0x00000333, 0x00000333,
1055 	0x3350, 0x000c0fc0, 0x00040200,
1056 	0x9a10, 0x00010000, 0x00058208,
1057 	0x3c000, 0xffff1fff, 0x00140000,
1058 	0x3c200, 0xfdfc0fff, 0x00000100,
1059 	0x3c234, 0x40000000, 0x40000200,
1060 	0x9830, 0xffffffff, 0x00000000,
1061 	0x9834, 0xf00fffff, 0x00000400,
1062 	0x9838, 0x0002021c, 0x00020200,
1063 	0xc78, 0x00000080, 0x00000000,
1064 	0x5bb0, 0x000000f0, 0x00000070,
1065 	0x5bc0, 0xf0311fff, 0x80300000,
1066 	0x98f8, 0x73773777, 0x12010001,
1067 	0x350c, 0x00810000, 0x408af000,
1068 	0x7030, 0x31000111, 0x00000011,
1069 	0x2f48, 0x73773777, 0x12010001,
1070 	0x220c, 0x00007fb6, 0x0021a1b1,
1071 	0x2210, 0x00007fb6, 0x002021b1,
1072 	0x2180, 0x00007fb6, 0x00002191,
1073 	0x2218, 0x00007fb6, 0x002121b1,
1074 	0x221c, 0x00007fb6, 0x002021b1,
1075 	0x21dc, 0x00007fb6, 0x00002191,
1076 	0x21e0, 0x00007fb6, 0x00002191,
1077 	0x3628, 0x0000003f, 0x0000000a,
1078 	0x362c, 0x0000003f, 0x0000000a,
1079 	0x2ae4, 0x00073ffe, 0x000022a2,
1080 	0x240c, 0x000007ff, 0x00000000,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8bf0, 0x00002001, 0x00000001,
1083 	0x8b24, 0xffffffff, 0x00ffffff,
1084 	0x30a04, 0x0000ff0f, 0x00000000,
1085 	0x28a4c, 0x07ffffff, 0x06000000,
1086 	0x4d8, 0x00000fff, 0x00000100,
1087 	0x3e78, 0x00000001, 0x00000002,
1088 	0x9100, 0x03000000, 0x0362c688,
1089 	0x8c00, 0x000000ff, 0x00000001,
1090 	0xe40, 0x00001fff, 0x00001fff,
1091 	0x9060, 0x0000007f, 0x00000020,
1092 	0x9508, 0x00010000, 0x00010000,
1093 	0xac14, 0x000003ff, 0x000000f3,
1094 	0xac0c, 0xffffffff, 0x00001032
1095 };
1096 
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0xc0000100,
1104 	0x3c2c8, 0xffffffff, 0xc0000100,
1105 	0x3c2c4, 0xffffffff, 0xc0000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c000, 0xffffffff, 0x96e00200,
1167 	0x8708, 0xffffffff, 0x00900100,
1168 	0xc424, 0xffffffff, 0x0020003f,
1169 	0x38, 0xffffffff, 0x0140001c,
1170 	0x3c, 0x000f0000, 0x000f0000,
1171 	0x220, 0xffffffff, 0xC060000C,
1172 	0x224, 0xc0000fff, 0x00000100,
1173 	0xf90, 0xffffffff, 0x00000100,
1174 	0xf98, 0x00000101, 0x00000000,
1175 	0x20a8, 0xffffffff, 0x00000104,
1176 	0x55e4, 0xff000fff, 0x00000100,
1177 	0x30cc, 0xc0000fff, 0x00000104,
1178 	0xc1e4, 0x00000001, 0x00000001,
1179 	0xd00c, 0xff000ff0, 0x00000100,
1180 	0xd80c, 0xff000ff0, 0x00000100
1181 };
1182 
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185 	0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187 
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190 	0xc770, 0xffffffff, 0x00000800,
1191 	0xc774, 0xffffffff, 0x00000800,
1192 	0xc798, 0xffffffff, 0x00007fbf,
1193 	0xc79c, 0xffffffff, 0x00007faf
1194 };
1195 
1196 static const u32 spectre_golden_registers[] =
1197 {
1198 	0x3c000, 0xffff1fff, 0x96940200,
1199 	0x3c00c, 0xffff0001, 0xff000000,
1200 	0x3c200, 0xfffc0fff, 0x00000100,
1201 	0x6ed8, 0x00010101, 0x00010000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x9838, 0xfffffffc, 0x00020200,
1204 	0x5bb0, 0x000000f0, 0x00000070,
1205 	0x5bc0, 0xf0311fff, 0x80300000,
1206 	0x98f8, 0x73773777, 0x12010001,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x2f48, 0x73773777, 0x12010001,
1209 	0x8a14, 0xf000003f, 0x00000007,
1210 	0x8b24, 0xffffffff, 0x00ffffff,
1211 	0x28350, 0x3f3f3fff, 0x00000082,
1212 	0x28354, 0x0000003f, 0x00000000,
1213 	0x3e78, 0x00000001, 0x00000002,
1214 	0x913c, 0xffff03df, 0x00000004,
1215 	0xc768, 0x00000008, 0x00000008,
1216 	0x8c00, 0x000008ff, 0x00000800,
1217 	0x9508, 0x00010000, 0x00010000,
1218 	0xac0c, 0xffffffff, 0x54763210,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x30934, 0xffffffff, 0x00000001
1223 };
1224 
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227 	0xc420, 0xffffffff, 0xfffffffc,
1228 	0x30800, 0xffffffff, 0xe0000000,
1229 	0x3c2a0, 0xffffffff, 0x00000100,
1230 	0x3c208, 0xffffffff, 0x00000100,
1231 	0x3c2c0, 0xffffffff, 0x00000100,
1232 	0x3c2c8, 0xffffffff, 0x00000100,
1233 	0x3c2c4, 0xffffffff, 0x00000100,
1234 	0x55e4, 0xffffffff, 0x00600100,
1235 	0x3c280, 0xffffffff, 0x00000100,
1236 	0x3c214, 0xffffffff, 0x06000100,
1237 	0x3c220, 0xffffffff, 0x00000100,
1238 	0x3c218, 0xffffffff, 0x06000100,
1239 	0x3c204, 0xffffffff, 0x00000100,
1240 	0x3c2e0, 0xffffffff, 0x00000100,
1241 	0x3c224, 0xffffffff, 0x00000100,
1242 	0x3c200, 0xffffffff, 0x00000100,
1243 	0x3c230, 0xffffffff, 0x00000100,
1244 	0x3c234, 0xffffffff, 0x00000100,
1245 	0x3c250, 0xffffffff, 0x00000100,
1246 	0x3c254, 0xffffffff, 0x00000100,
1247 	0x3c258, 0xffffffff, 0x00000100,
1248 	0x3c25c, 0xffffffff, 0x00000100,
1249 	0x3c260, 0xffffffff, 0x00000100,
1250 	0x3c27c, 0xffffffff, 0x00000100,
1251 	0x3c278, 0xffffffff, 0x00000100,
1252 	0x3c210, 0xffffffff, 0x06000100,
1253 	0x3c290, 0xffffffff, 0x00000100,
1254 	0x3c274, 0xffffffff, 0x00000100,
1255 	0x3c2b4, 0xffffffff, 0x00000100,
1256 	0x3c2b0, 0xffffffff, 0x00000100,
1257 	0x3c270, 0xffffffff, 0x00000100,
1258 	0x30800, 0xffffffff, 0xe0000000,
1259 	0x3c020, 0xffffffff, 0x00010000,
1260 	0x3c024, 0xffffffff, 0x00030002,
1261 	0x3c028, 0xffffffff, 0x00040007,
1262 	0x3c02c, 0xffffffff, 0x00060005,
1263 	0x3c030, 0xffffffff, 0x00090008,
1264 	0x3c034, 0xffffffff, 0x00010000,
1265 	0x3c038, 0xffffffff, 0x00030002,
1266 	0x3c03c, 0xffffffff, 0x00040007,
1267 	0x3c040, 0xffffffff, 0x00060005,
1268 	0x3c044, 0xffffffff, 0x00090008,
1269 	0x3c048, 0xffffffff, 0x00010000,
1270 	0x3c04c, 0xffffffff, 0x00030002,
1271 	0x3c050, 0xffffffff, 0x00040007,
1272 	0x3c054, 0xffffffff, 0x00060005,
1273 	0x3c058, 0xffffffff, 0x00090008,
1274 	0x3c05c, 0xffffffff, 0x00010000,
1275 	0x3c060, 0xffffffff, 0x00030002,
1276 	0x3c064, 0xffffffff, 0x00040007,
1277 	0x3c068, 0xffffffff, 0x00060005,
1278 	0x3c06c, 0xffffffff, 0x00090008,
1279 	0x3c070, 0xffffffff, 0x00010000,
1280 	0x3c074, 0xffffffff, 0x00030002,
1281 	0x3c078, 0xffffffff, 0x00040007,
1282 	0x3c07c, 0xffffffff, 0x00060005,
1283 	0x3c080, 0xffffffff, 0x00090008,
1284 	0x3c084, 0xffffffff, 0x00010000,
1285 	0x3c088, 0xffffffff, 0x00030002,
1286 	0x3c08c, 0xffffffff, 0x00040007,
1287 	0x3c090, 0xffffffff, 0x00060005,
1288 	0x3c094, 0xffffffff, 0x00090008,
1289 	0x3c098, 0xffffffff, 0x00010000,
1290 	0x3c09c, 0xffffffff, 0x00030002,
1291 	0x3c0a0, 0xffffffff, 0x00040007,
1292 	0x3c0a4, 0xffffffff, 0x00060005,
1293 	0x3c0a8, 0xffffffff, 0x00090008,
1294 	0x3c0ac, 0xffffffff, 0x00010000,
1295 	0x3c0b0, 0xffffffff, 0x00030002,
1296 	0x3c0b4, 0xffffffff, 0x00040007,
1297 	0x3c0b8, 0xffffffff, 0x00060005,
1298 	0x3c0bc, 0xffffffff, 0x00090008,
1299 	0x3c000, 0xffffffff, 0x96e00200,
1300 	0x8708, 0xffffffff, 0x00900100,
1301 	0xc424, 0xffffffff, 0x0020003f,
1302 	0x38, 0xffffffff, 0x0140001c,
1303 	0x3c, 0x000f0000, 0x000f0000,
1304 	0x220, 0xffffffff, 0xC060000C,
1305 	0x224, 0xc0000fff, 0x00000100,
1306 	0xf90, 0xffffffff, 0x00000100,
1307 	0xf98, 0x00000101, 0x00000000,
1308 	0x20a8, 0xffffffff, 0x00000104,
1309 	0x55e4, 0xff000fff, 0x00000100,
1310 	0x30cc, 0xc0000fff, 0x00000104,
1311 	0xc1e4, 0x00000001, 0x00000001,
1312 	0xd00c, 0xff000ff0, 0x00000100,
1313 	0xd80c, 0xff000ff0, 0x00000100
1314 };
1315 
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318 	0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320 
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323 	0xc770, 0xffffffff, 0x00000800,
1324 	0xc774, 0xffffffff, 0x00000800,
1325 	0xc798, 0xffffffff, 0x00007fbf,
1326 	0xc79c, 0xffffffff, 0x00007faf
1327 };
1328 
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331 	0x3c000, 0xffffdfff, 0x6e944040,
1332 	0x55e4, 0xff607fff, 0xfc000100,
1333 	0x3c220, 0xff000fff, 0x00000100,
1334 	0x3c224, 0xff000fff, 0x00000100,
1335 	0x3c200, 0xfffc0fff, 0x00000100,
1336 	0x6ed8, 0x00010101, 0x00010000,
1337 	0x9830, 0xffffffff, 0x00000000,
1338 	0x9834, 0xf00fffff, 0x00000400,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x98f8, 0x73773777, 0x12010001,
1342 	0x98fc, 0xffffffff, 0x00000010,
1343 	0x9b7c, 0x00ff0000, 0x00fc0000,
1344 	0x8030, 0x00001f0f, 0x0000100a,
1345 	0x2f48, 0x73773777, 0x12010001,
1346 	0x2408, 0x000fffff, 0x000c007f,
1347 	0x8a14, 0xf000003f, 0x00000007,
1348 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349 	0x30a04, 0x0000ff0f, 0x00000000,
1350 	0x28a4c, 0x07ffffff, 0x06000000,
1351 	0x4d8, 0x00000fff, 0x00000100,
1352 	0x3e78, 0x00000001, 0x00000002,
1353 	0xc768, 0x00000008, 0x00000008,
1354 	0x8c00, 0x000000ff, 0x00000003,
1355 	0x214f8, 0x01ff01ff, 0x00000002,
1356 	0x21498, 0x007ff800, 0x00200000,
1357 	0x2015c, 0xffffffff, 0x00000f40,
1358 	0x88c4, 0x001f3ae3, 0x00000082,
1359 	0x88d4, 0x0000001f, 0x00000010,
1360 	0x30934, 0xffffffff, 0x00000000
1361 };
1362 
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffc,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00600100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c000, 0xffffffff, 0x96e00200,
1408 	0x8708, 0xffffffff, 0x00900100,
1409 	0xc424, 0xffffffff, 0x0020003f,
1410 	0x38, 0xffffffff, 0x0140001c,
1411 	0x3c, 0x000f0000, 0x000f0000,
1412 	0x220, 0xffffffff, 0xC060000C,
1413 	0x224, 0xc0000fff, 0x00000100,
1414 	0x20a8, 0xffffffff, 0x00000104,
1415 	0x55e4, 0xff000fff, 0x00000100,
1416 	0x30cc, 0xc0000fff, 0x00000104,
1417 	0xc1e4, 0x00000001, 0x00000001,
1418 	0xd00c, 0xff000ff0, 0x00000100,
1419 	0xd80c, 0xff000ff0, 0x00000100
1420 };
1421 
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424 	0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426 
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429 	0x30800, 0xffffffff, 0xe0000000,
1430 	0x28350, 0xffffffff, 0x3a00161a,
1431 	0x28354, 0xffffffff, 0x0000002e,
1432 	0x9a10, 0xffffffff, 0x00018208,
1433 	0x98f8, 0xffffffff, 0x12011003
1434 };
1435 
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438 	0x3354, 0x00000333, 0x00000333,
1439 	0x9a10, 0x00010000, 0x00058208,
1440 	0x9830, 0xffffffff, 0x00000000,
1441 	0x9834, 0xf00fffff, 0x00000400,
1442 	0x9838, 0x0002021c, 0x00020200,
1443 	0xc78, 0x00000080, 0x00000000,
1444 	0x5bb0, 0x000000f0, 0x00000070,
1445 	0x5bc0, 0xf0311fff, 0x80300000,
1446 	0x350c, 0x00810000, 0x408af000,
1447 	0x7030, 0x31000111, 0x00000011,
1448 	0x2f48, 0x73773777, 0x12010001,
1449 	0x2120, 0x0000007f, 0x0000001b,
1450 	0x21dc, 0x00007fb6, 0x00002191,
1451 	0x3628, 0x0000003f, 0x0000000a,
1452 	0x362c, 0x0000003f, 0x0000000a,
1453 	0x2ae4, 0x00073ffe, 0x000022a2,
1454 	0x240c, 0x000007ff, 0x00000000,
1455 	0x8bf0, 0x00002001, 0x00000001,
1456 	0x8b24, 0xffffffff, 0x00ffffff,
1457 	0x30a04, 0x0000ff0f, 0x00000000,
1458 	0x28a4c, 0x07ffffff, 0x06000000,
1459 	0x3e78, 0x00000001, 0x00000002,
1460 	0xc768, 0x00000008, 0x00000008,
1461 	0xc770, 0x00000f00, 0x00000800,
1462 	0xc774, 0x00000f00, 0x00000800,
1463 	0xc798, 0x00ffffff, 0x00ff7fbf,
1464 	0xc79c, 0x00ffffff, 0x00ff7faf,
1465 	0x8c00, 0x000000ff, 0x00000800,
1466 	0xe40, 0x00001fff, 0x00001fff,
1467 	0x9060, 0x0000007f, 0x00000020,
1468 	0x9508, 0x00010000, 0x00010000,
1469 	0xae00, 0x00100000, 0x000ff07c,
1470 	0xac14, 0x000003ff, 0x0000000f,
1471 	0xac10, 0xffffffff, 0x7564fdec,
1472 	0xac0c, 0xffffffff, 0x3120b9a8,
1473 	0xac08, 0x20000000, 0x0f9c0000
1474 };
1475 
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478 	0xc420, 0xffffffff, 0xfffffffd,
1479 	0x30800, 0xffffffff, 0xe0000000,
1480 	0x3c2a0, 0xffffffff, 0x00000100,
1481 	0x3c208, 0xffffffff, 0x00000100,
1482 	0x3c2c0, 0xffffffff, 0x00000100,
1483 	0x3c2c8, 0xffffffff, 0x00000100,
1484 	0x3c2c4, 0xffffffff, 0x00000100,
1485 	0x55e4, 0xffffffff, 0x00200100,
1486 	0x3c280, 0xffffffff, 0x00000100,
1487 	0x3c214, 0xffffffff, 0x06000100,
1488 	0x3c220, 0xffffffff, 0x00000100,
1489 	0x3c218, 0xffffffff, 0x06000100,
1490 	0x3c204, 0xffffffff, 0x00000100,
1491 	0x3c2e0, 0xffffffff, 0x00000100,
1492 	0x3c224, 0xffffffff, 0x00000100,
1493 	0x3c200, 0xffffffff, 0x00000100,
1494 	0x3c230, 0xffffffff, 0x00000100,
1495 	0x3c234, 0xffffffff, 0x00000100,
1496 	0x3c250, 0xffffffff, 0x00000100,
1497 	0x3c254, 0xffffffff, 0x00000100,
1498 	0x3c258, 0xffffffff, 0x00000100,
1499 	0x3c25c, 0xffffffff, 0x00000100,
1500 	0x3c260, 0xffffffff, 0x00000100,
1501 	0x3c27c, 0xffffffff, 0x00000100,
1502 	0x3c278, 0xffffffff, 0x00000100,
1503 	0x3c210, 0xffffffff, 0x06000100,
1504 	0x3c290, 0xffffffff, 0x00000100,
1505 	0x3c274, 0xffffffff, 0x00000100,
1506 	0x3c2b4, 0xffffffff, 0x00000100,
1507 	0x3c2b0, 0xffffffff, 0x00000100,
1508 	0x3c270, 0xffffffff, 0x00000100,
1509 	0x30800, 0xffffffff, 0xe0000000,
1510 	0x3c020, 0xffffffff, 0x00010000,
1511 	0x3c024, 0xffffffff, 0x00030002,
1512 	0x3c028, 0xffffffff, 0x00040007,
1513 	0x3c02c, 0xffffffff, 0x00060005,
1514 	0x3c030, 0xffffffff, 0x00090008,
1515 	0x3c034, 0xffffffff, 0x00010000,
1516 	0x3c038, 0xffffffff, 0x00030002,
1517 	0x3c03c, 0xffffffff, 0x00040007,
1518 	0x3c040, 0xffffffff, 0x00060005,
1519 	0x3c044, 0xffffffff, 0x00090008,
1520 	0x3c048, 0xffffffff, 0x00010000,
1521 	0x3c04c, 0xffffffff, 0x00030002,
1522 	0x3c050, 0xffffffff, 0x00040007,
1523 	0x3c054, 0xffffffff, 0x00060005,
1524 	0x3c058, 0xffffffff, 0x00090008,
1525 	0x3c05c, 0xffffffff, 0x00010000,
1526 	0x3c060, 0xffffffff, 0x00030002,
1527 	0x3c064, 0xffffffff, 0x00040007,
1528 	0x3c068, 0xffffffff, 0x00060005,
1529 	0x3c06c, 0xffffffff, 0x00090008,
1530 	0x3c070, 0xffffffff, 0x00010000,
1531 	0x3c074, 0xffffffff, 0x00030002,
1532 	0x3c078, 0xffffffff, 0x00040007,
1533 	0x3c07c, 0xffffffff, 0x00060005,
1534 	0x3c080, 0xffffffff, 0x00090008,
1535 	0x3c084, 0xffffffff, 0x00010000,
1536 	0x3c088, 0xffffffff, 0x00030002,
1537 	0x3c08c, 0xffffffff, 0x00040007,
1538 	0x3c090, 0xffffffff, 0x00060005,
1539 	0x3c094, 0xffffffff, 0x00090008,
1540 	0x3c098, 0xffffffff, 0x00010000,
1541 	0x3c09c, 0xffffffff, 0x00030002,
1542 	0x3c0a0, 0xffffffff, 0x00040007,
1543 	0x3c0a4, 0xffffffff, 0x00060005,
1544 	0x3c0a8, 0xffffffff, 0x00090008,
1545 	0x3c0ac, 0xffffffff, 0x00010000,
1546 	0x3c0b0, 0xffffffff, 0x00030002,
1547 	0x3c0b4, 0xffffffff, 0x00040007,
1548 	0x3c0b8, 0xffffffff, 0x00060005,
1549 	0x3c0bc, 0xffffffff, 0x00090008,
1550 	0x3c0c0, 0xffffffff, 0x00010000,
1551 	0x3c0c4, 0xffffffff, 0x00030002,
1552 	0x3c0c8, 0xffffffff, 0x00040007,
1553 	0x3c0cc, 0xffffffff, 0x00060005,
1554 	0x3c0d0, 0xffffffff, 0x00090008,
1555 	0x3c0d4, 0xffffffff, 0x00010000,
1556 	0x3c0d8, 0xffffffff, 0x00030002,
1557 	0x3c0dc, 0xffffffff, 0x00040007,
1558 	0x3c0e0, 0xffffffff, 0x00060005,
1559 	0x3c0e4, 0xffffffff, 0x00090008,
1560 	0x3c0e8, 0xffffffff, 0x00010000,
1561 	0x3c0ec, 0xffffffff, 0x00030002,
1562 	0x3c0f0, 0xffffffff, 0x00040007,
1563 	0x3c0f4, 0xffffffff, 0x00060005,
1564 	0x3c0f8, 0xffffffff, 0x00090008,
1565 	0xc318, 0xffffffff, 0x00020200,
1566 	0x3350, 0xffffffff, 0x00000200,
1567 	0x15c0, 0xffffffff, 0x00000400,
1568 	0x55e8, 0xffffffff, 0x00000000,
1569 	0x2f50, 0xffffffff, 0x00000902,
1570 	0x3c000, 0xffffffff, 0x96940200,
1571 	0x8708, 0xffffffff, 0x00900100,
1572 	0xc424, 0xffffffff, 0x0020003f,
1573 	0x38, 0xffffffff, 0x0140001c,
1574 	0x3c, 0x000f0000, 0x000f0000,
1575 	0x220, 0xffffffff, 0xc060000c,
1576 	0x224, 0xc0000fff, 0x00000100,
1577 	0xf90, 0xffffffff, 0x00000100,
1578 	0xf98, 0x00000101, 0x00000000,
1579 	0x20a8, 0xffffffff, 0x00000104,
1580 	0x55e4, 0xff000fff, 0x00000100,
1581 	0x30cc, 0xc0000fff, 0x00000104,
1582 	0xc1e4, 0x00000001, 0x00000001,
1583 	0xd00c, 0xff000ff0, 0x00000100,
1584 	0xd80c, 0xff000ff0, 0x00000100
1585 };
1586 
1587 static const u32 godavari_golden_registers[] =
1588 {
1589 	0x55e4, 0xff607fff, 0xfc000100,
1590 	0x6ed8, 0x00010101, 0x00010000,
1591 	0x9830, 0xffffffff, 0x00000000,
1592 	0x98302, 0xf00fffff, 0x00000400,
1593 	0x6130, 0xffffffff, 0x00010000,
1594 	0x5bb0, 0x000000f0, 0x00000070,
1595 	0x5bc0, 0xf0311fff, 0x80300000,
1596 	0x98f8, 0x73773777, 0x12010001,
1597 	0x98fc, 0xffffffff, 0x00000010,
1598 	0x8030, 0x00001f0f, 0x0000100a,
1599 	0x2f48, 0x73773777, 0x12010001,
1600 	0x2408, 0x000fffff, 0x000c007f,
1601 	0x8a14, 0xf000003f, 0x00000007,
1602 	0x8b24, 0xffffffff, 0x00ff0fff,
1603 	0x30a04, 0x0000ff0f, 0x00000000,
1604 	0x28a4c, 0x07ffffff, 0x06000000,
1605 	0x4d8, 0x00000fff, 0x00000100,
1606 	0xd014, 0x00010000, 0x00810001,
1607 	0xd814, 0x00010000, 0x00810001,
1608 	0x3e78, 0x00000001, 0x00000002,
1609 	0xc768, 0x00000008, 0x00000008,
1610 	0xc770, 0x00000f00, 0x00000800,
1611 	0xc774, 0x00000f00, 0x00000800,
1612 	0xc798, 0x00ffffff, 0x00ff7fbf,
1613 	0xc79c, 0x00ffffff, 0x00ff7faf,
1614 	0x8c00, 0x000000ff, 0x00000001,
1615 	0x214f8, 0x01ff01ff, 0x00000002,
1616 	0x21498, 0x007ff800, 0x00200000,
1617 	0x2015c, 0xffffffff, 0x00000f40,
1618 	0x88c4, 0x001f3ae3, 0x00000082,
1619 	0x88d4, 0x0000001f, 0x00000010,
1620 	0x30934, 0xffffffff, 0x00000000
1621 };
1622 
1623 
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627 	mutex_lock(&rdev->grbm_idx_mutex);
1628 	switch (rdev->family) {
1629 	case CHIP_BONAIRE:
1630 		radeon_program_register_sequence(rdev,
1631 						 bonaire_mgcg_cgcg_init,
1632 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633 		radeon_program_register_sequence(rdev,
1634 						 bonaire_golden_registers,
1635 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_golden_common_registers,
1638 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_spm_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642 		break;
1643 	case CHIP_KABINI:
1644 		radeon_program_register_sequence(rdev,
1645 						 kalindi_mgcg_cgcg_init,
1646 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_common_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_spm_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656 		break;
1657 	case CHIP_MULLINS:
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_mgcg_cgcg_init,
1660 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661 		radeon_program_register_sequence(rdev,
1662 						 godavari_golden_registers,
1663 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_golden_common_registers,
1666 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_golden_spm_registers,
1669 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670 		break;
1671 	case CHIP_KAVERI:
1672 		radeon_program_register_sequence(rdev,
1673 						 spectre_mgcg_cgcg_init,
1674 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675 		radeon_program_register_sequence(rdev,
1676 						 spectre_golden_registers,
1677 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_golden_common_registers,
1680 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_spm_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684 		break;
1685 	case CHIP_HAWAII:
1686 		radeon_program_register_sequence(rdev,
1687 						 hawaii_mgcg_cgcg_init,
1688 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689 		radeon_program_register_sequence(rdev,
1690 						 hawaii_golden_registers,
1691 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_golden_common_registers,
1694 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_spm_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698 		break;
1699 	default:
1700 		break;
1701 	}
1702 	mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704 
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716 
1717 	if (rdev->flags & RADEON_IS_IGP) {
1718 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719 			return reference_clock / 2;
1720 	} else {
1721 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722 			return reference_clock / 4;
1723 	}
1724 	return reference_clock;
1725 }
1726 
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738 	if (index < rdev->doorbell.num_doorbells) {
1739 		return readl(rdev->doorbell.ptr + index);
1740 	} else {
1741 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742 		return 0;
1743 	}
1744 }
1745 
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758 	if (index < rdev->doorbell.num_doorbells) {
1759 		writel(v, rdev->doorbell.ptr + index);
1760 	} else {
1761 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762 	}
1763 }
1764 
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766 
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769 	{0x00000070, 0x04400000},
1770 	{0x00000071, 0x80c01803},
1771 	{0x00000072, 0x00004004},
1772 	{0x00000073, 0x00000100},
1773 	{0x00000074, 0x00ff0000},
1774 	{0x00000075, 0x34000000},
1775 	{0x00000076, 0x08000014},
1776 	{0x00000077, 0x00cc08ec},
1777 	{0x00000078, 0x00000400},
1778 	{0x00000079, 0x00000000},
1779 	{0x0000007a, 0x04090000},
1780 	{0x0000007c, 0x00000000},
1781 	{0x0000007e, 0x4408a8e8},
1782 	{0x0000007f, 0x00000304},
1783 	{0x00000080, 0x00000000},
1784 	{0x00000082, 0x00000001},
1785 	{0x00000083, 0x00000002},
1786 	{0x00000084, 0xf3e4f400},
1787 	{0x00000085, 0x052024e3},
1788 	{0x00000087, 0x00000000},
1789 	{0x00000088, 0x01000000},
1790 	{0x0000008a, 0x1c0a0000},
1791 	{0x0000008b, 0xff010000},
1792 	{0x0000008d, 0xffffefff},
1793 	{0x0000008e, 0xfff3efff},
1794 	{0x0000008f, 0xfff3efbf},
1795 	{0x00000092, 0xf7ffffff},
1796 	{0x00000093, 0xffffff7f},
1797 	{0x00000095, 0x00101101},
1798 	{0x00000096, 0x00000fff},
1799 	{0x00000097, 0x00116fff},
1800 	{0x00000098, 0x60010000},
1801 	{0x00000099, 0x10010000},
1802 	{0x0000009a, 0x00006000},
1803 	{0x0000009b, 0x00001000},
1804 	{0x0000009f, 0x00b48000}
1805 };
1806 
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808 
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811 	{0x0000007d, 0x40000000},
1812 	{0x0000007e, 0x40180304},
1813 	{0x0000007f, 0x0000ff00},
1814 	{0x00000081, 0x00000000},
1815 	{0x00000083, 0x00000800},
1816 	{0x00000086, 0x00000000},
1817 	{0x00000087, 0x00000100},
1818 	{0x00000088, 0x00020100},
1819 	{0x00000089, 0x00000000},
1820 	{0x0000008b, 0x00040000},
1821 	{0x0000008c, 0x00000100},
1822 	{0x0000008e, 0xff010000},
1823 	{0x00000090, 0xffffefff},
1824 	{0x00000091, 0xfff3efff},
1825 	{0x00000092, 0xfff3efbf},
1826 	{0x00000093, 0xf7ffffff},
1827 	{0x00000094, 0xffffff7f},
1828 	{0x00000095, 0x00000fff},
1829 	{0x00000096, 0x00116fff},
1830 	{0x00000097, 0x60010000},
1831 	{0x00000098, 0x10010000},
1832 	{0x0000009f, 0x00c79000}
1833 };
1834 
1835 
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853 			     MEID(me & 0x3) |
1854 			     VMID(vmid & 0xf) |
1855 			     QUEUEID(queue & 0x7));
1856 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858 
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870 	const __be32 *fw_data = NULL;
1871 	const __le32 *new_fw_data = NULL;
1872 	u32 running, blackout = 0, tmp;
1873 	u32 *io_mc_regs = NULL;
1874 	const __le32 *new_io_mc_regs = NULL;
1875 	int i, regs_size, ucode_size;
1876 
1877 	if (!rdev->mc_fw)
1878 		return -EINVAL;
1879 
1880 	if (rdev->new_fw) {
1881 		const struct mc_firmware_header_v1_0 *hdr =
1882 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883 
1884 		radeon_ucode_print_mc_hdr(&hdr->header);
1885 
1886 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887 		new_io_mc_regs = (const __le32 *)
1888 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890 		new_fw_data = (const __le32 *)
1891 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892 	} else {
1893 		ucode_size = rdev->mc_fw->size / 4;
1894 
1895 		switch (rdev->family) {
1896 		case CHIP_BONAIRE:
1897 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899 			break;
1900 		case CHIP_HAWAII:
1901 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903 			break;
1904 		default:
1905 			return -EINVAL;
1906 		}
1907 		fw_data = (const __be32 *)rdev->mc_fw->data;
1908 	}
1909 
1910 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911 
1912 	if (running == 0) {
1913 		if (running) {
1914 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916 		}
1917 
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 
1966 		if (running)
1967 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984 	const char *chip_name;
1985 	const char *new_chip_name;
1986 	size_t pfp_req_size, me_req_size, ce_req_size,
1987 		mec_req_size, rlc_req_size, mc_req_size = 0,
1988 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989 	char fw_name[30];
1990 	int new_fw = 0;
1991 	int err;
1992 	int num_fw;
1993 
1994 	DRM_DEBUG("\n");
1995 
1996 	switch (rdev->family) {
1997 	case CHIP_BONAIRE:
1998 		chip_name = "BONAIRE";
1999 		new_chip_name = "bonaire";
2000 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009 		num_fw = 8;
2010 		break;
2011 	case CHIP_HAWAII:
2012 		chip_name = "HAWAII";
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			printk(KERN_ERR
2072 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073 			       rdev->pfp_fw->size, fw_name);
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->pfp_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->me_fw->size != me_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->ce_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->ce_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 	if (err) {
2142 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144 		if (err)
2145 			goto out;
2146 		if (rdev->mec_fw->size != mec_req_size) {
2147 			printk(KERN_ERR
2148 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149 			       rdev->mec_fw->size, fw_name);
2150 			err = -EINVAL;
2151 		}
2152 	} else {
2153 		err = radeon_ucode_validate(rdev->mec_fw);
2154 		if (err) {
2155 			printk(KERN_ERR
2156 			       "cik_fw: validation failed for firmware \"%s\"\n",
2157 			       fw_name);
2158 			goto out;
2159 		} else {
2160 			new_fw++;
2161 		}
2162 	}
2163 
2164 	if (rdev->family == CHIP_KAVERI) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167 		if (err) {
2168 			goto out;
2169 		} else {
2170 			err = radeon_ucode_validate(rdev->mec2_fw);
2171 			if (err) {
2172 				goto out;
2173 			} else {
2174 				new_fw++;
2175 			}
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->rlc_fw->size != rlc_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->rlc_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->rlc_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206 	if (err) {
2207 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209 		if (err)
2210 			goto out;
2211 		if (rdev->sdma_fw->size != sdma_req_size) {
2212 			printk(KERN_ERR
2213 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214 			       rdev->sdma_fw->size, fw_name);
2215 			err = -EINVAL;
2216 		}
2217 	} else {
2218 		err = radeon_ucode_validate(rdev->sdma_fw);
2219 		if (err) {
2220 			printk(KERN_ERR
2221 			       "cik_fw: validation failed for firmware \"%s\"\n",
2222 			       fw_name);
2223 			goto out;
2224 		} else {
2225 			new_fw++;
2226 		}
2227 	}
2228 
2229 	/* No SMC, MC ucode on APUs */
2230 	if (!(rdev->flags & RADEON_IS_IGP)) {
2231 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233 		if (err) {
2234 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236 			if (err) {
2237 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239 				if (err)
2240 					goto out;
2241 			}
2242 			if ((rdev->mc_fw->size != mc_req_size) &&
2243 			    (rdev->mc_fw->size != mc2_req_size)){
2244 				printk(KERN_ERR
2245 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246 				       rdev->mc_fw->size, fw_name);
2247 				err = -EINVAL;
2248 			}
2249 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250 		} else {
2251 			err = radeon_ucode_validate(rdev->mc_fw);
2252 			if (err) {
2253 				printk(KERN_ERR
2254 				       "cik_fw: validation failed for firmware \"%s\"\n",
2255 				       fw_name);
2256 				goto out;
2257 			} else {
2258 				new_fw++;
2259 			}
2260 		}
2261 
2262 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264 		if (err) {
2265 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267 			if (err) {
2268 				printk(KERN_ERR
2269 				       "smc: error loading firmware \"%s\"\n",
2270 				       fw_name);
2271 				release_firmware(rdev->smc_fw);
2272 				rdev->smc_fw = NULL;
2273 				err = 0;
2274 			} else if (rdev->smc_fw->size != smc_req_size) {
2275 				printk(KERN_ERR
2276 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277 				       rdev->smc_fw->size, fw_name);
2278 				err = -EINVAL;
2279 			}
2280 		} else {
2281 			err = radeon_ucode_validate(rdev->smc_fw);
2282 			if (err) {
2283 				printk(KERN_ERR
2284 				       "cik_fw: validation failed for firmware \"%s\"\n",
2285 				       fw_name);
2286 				goto out;
2287 			} else {
2288 				new_fw++;
2289 			}
2290 		}
2291 	}
2292 
2293 	if (new_fw == 0) {
2294 		rdev->new_fw = false;
2295 	} else if (new_fw < num_fw) {
2296 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297 		err = -EINVAL;
2298 	} else {
2299 		rdev->new_fw = true;
2300 	}
2301 
2302 out:
2303 	if (err) {
2304 		if (err != -EINVAL)
2305 			printk(KERN_ERR
2306 			       "cik_cp: Failed to load firmware \"%s\"\n",
2307 			       fw_name);
2308 		release_firmware(rdev->pfp_fw);
2309 		rdev->pfp_fw = NULL;
2310 		release_firmware(rdev->me_fw);
2311 		rdev->me_fw = NULL;
2312 		release_firmware(rdev->ce_fw);
2313 		rdev->ce_fw = NULL;
2314 		release_firmware(rdev->mec_fw);
2315 		rdev->mec_fw = NULL;
2316 		release_firmware(rdev->mec2_fw);
2317 		rdev->mec2_fw = NULL;
2318 		release_firmware(rdev->rlc_fw);
2319 		rdev->rlc_fw = NULL;
2320 		release_firmware(rdev->sdma_fw);
2321 		rdev->sdma_fw = NULL;
2322 		release_firmware(rdev->mc_fw);
2323 		rdev->mc_fw = NULL;
2324 		release_firmware(rdev->smc_fw);
2325 		rdev->smc_fw = NULL;
2326 	}
2327 	return err;
2328 }
2329 
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346 	const u32 num_tile_mode_states = 32;
2347 	const u32 num_secondary_tile_mode_states = 16;
2348 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349 	u32 num_pipe_configs;
2350 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351 		rdev->config.cik.max_shader_engines;
2352 
2353 	switch (rdev->config.cik.mem_row_size_in_kb) {
2354 	case 1:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356 		break;
2357 	case 2:
2358 	default:
2359 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360 		break;
2361 	case 4:
2362 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363 		break;
2364 	}
2365 
2366 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367 	if (num_pipe_configs > 8)
2368 		num_pipe_configs = 16;
2369 
2370 	if (num_pipe_configs == 16) {
2371 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372 			switch (reg_offset) {
2373 			case 0:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378 				break;
2379 			case 1:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384 				break;
2385 			case 2:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390 				break;
2391 			case 3:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396 				break;
2397 			case 4:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 TILE_SPLIT(split_equal_to_row_size));
2402 				break;
2403 			case 5:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 				break;
2408 			case 6:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413 				break;
2414 			case 7:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 						 TILE_SPLIT(split_equal_to_row_size));
2419 				break;
2420 			case 8:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423 				break;
2424 			case 9:
2425 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428 				break;
2429 			case 10:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 11:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 12:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			case 13:
2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451 				break;
2452 			case 14:
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 				break;
2458 			case 16:
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 				break;
2464 			case 17:
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469 				break;
2470 			case 27:
2471 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474 				break;
2475 			case 28:
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 				break;
2481 			case 29:
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486 				break;
2487 			case 30:
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 				break;
2493 			default:
2494 				gb_tile_moden = 0;
2495 				break;
2496 			}
2497 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499 		}
2500 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501 			switch (reg_offset) {
2502 			case 0:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK));
2507 				break;
2508 			case 1:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 						 NUM_BANKS(ADDR_SURF_16_BANK));
2513 				break;
2514 			case 2:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK));
2519 				break;
2520 			case 3:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_16_BANK));
2525 				break;
2526 			case 4:
2527 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 						 NUM_BANKS(ADDR_SURF_8_BANK));
2531 				break;
2532 			case 5:
2533 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536 						 NUM_BANKS(ADDR_SURF_4_BANK));
2537 				break;
2538 			case 6:
2539 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542 						 NUM_BANKS(ADDR_SURF_2_BANK));
2543 				break;
2544 			case 8:
2545 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK));
2549 				break;
2550 			case 9:
2551 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 						 NUM_BANKS(ADDR_SURF_16_BANK));
2555 				break;
2556 			case 10:
2557 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 						 NUM_BANKS(ADDR_SURF_16_BANK));
2561 				break;
2562 			case 11:
2563 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 						 NUM_BANKS(ADDR_SURF_8_BANK));
2567 				break;
2568 			case 12:
2569 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 						 NUM_BANKS(ADDR_SURF_4_BANK));
2573 				break;
2574 			case 13:
2575 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 						 NUM_BANKS(ADDR_SURF_2_BANK));
2579 				break;
2580 			case 14:
2581 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584 						 NUM_BANKS(ADDR_SURF_2_BANK));
2585 				break;
2586 			default:
2587 				gb_tile_moden = 0;
2588 				break;
2589 			}
2590 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592 		}
2593 	} else if (num_pipe_configs == 8) {
2594 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595 			switch (reg_offset) {
2596 			case 0:
2597 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601 				break;
2602 			case 1:
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607 				break;
2608 			case 2:
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613 				break;
2614 			case 3:
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619 				break;
2620 			case 4:
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 TILE_SPLIT(split_equal_to_row_size));
2625 				break;
2626 			case 5:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630 				break;
2631 			case 6:
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636 				break;
2637 			case 7:
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 						 TILE_SPLIT(split_equal_to_row_size));
2642 				break;
2643 			case 8:
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 				break;
2647 			case 9:
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651 				break;
2652 			case 10:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			case 11:
2659 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 				break;
2664 			case 12:
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669 				break;
2670 			case 13:
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674 				break;
2675 			case 14:
2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680 				break;
2681 			case 16:
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 				break;
2687 			case 17:
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 				break;
2693 			case 27:
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697 				break;
2698 			case 28:
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 				break;
2704 			case 29:
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 				break;
2710 			case 30:
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 				break;
2716 			default:
2717 				gb_tile_moden = 0;
2718 				break;
2719 			}
2720 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722 		}
2723 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724 			switch (reg_offset) {
2725 			case 0:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 1:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 2:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 3:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 4:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 						 NUM_BANKS(ADDR_SURF_8_BANK));
2754 				break;
2755 			case 5:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759 						 NUM_BANKS(ADDR_SURF_4_BANK));
2760 				break;
2761 			case 6:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 						 NUM_BANKS(ADDR_SURF_2_BANK));
2766 				break;
2767 			case 8:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK));
2772 				break;
2773 			case 9:
2774 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 						 NUM_BANKS(ADDR_SURF_16_BANK));
2778 				break;
2779 			case 10:
2780 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 						 NUM_BANKS(ADDR_SURF_16_BANK));
2784 				break;
2785 			case 11:
2786 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK));
2790 				break;
2791 			case 12:
2792 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795 						 NUM_BANKS(ADDR_SURF_8_BANK));
2796 				break;
2797 			case 13:
2798 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801 						 NUM_BANKS(ADDR_SURF_4_BANK));
2802 				break;
2803 			case 14:
2804 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807 						 NUM_BANKS(ADDR_SURF_2_BANK));
2808 				break;
2809 			default:
2810 				gb_tile_moden = 0;
2811 				break;
2812 			}
2813 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815 		}
2816 	} else if (num_pipe_configs == 4) {
2817 		if (num_rbs == 4) {
2818 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819 				switch (reg_offset) {
2820 				case 0:
2821 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825 					break;
2826 				case 1:
2827 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831 					break;
2832 				case 2:
2833 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837 					break;
2838 				case 3:
2839 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843 					break;
2844 				case 4:
2845 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 TILE_SPLIT(split_equal_to_row_size));
2849 					break;
2850 				case 5:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 					break;
2855 				case 6:
2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860 					break;
2861 				case 7:
2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 							 TILE_SPLIT(split_equal_to_row_size));
2866 					break;
2867 				case 8:
2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870 					break;
2871 				case 9:
2872 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875 					break;
2876 				case 10:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				case 11:
2883 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 					break;
2888 				case 12:
2889 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 					break;
2894 				case 13:
2895 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898 					break;
2899 				case 14:
2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904 					break;
2905 				case 16:
2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 					break;
2911 				case 17:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 					break;
2917 				case 27:
2918 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921 					break;
2922 				case 28:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 29:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 30:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				default:
2941 					gb_tile_moden = 0;
2942 					break;
2943 				}
2944 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946 			}
2947 		} else if (num_rbs < 4) {
2948 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949 				switch (reg_offset) {
2950 				case 0:
2951 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955 					break;
2956 				case 1:
2957 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961 					break;
2962 				case 2:
2963 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967 					break;
2968 				case 3:
2969 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973 					break;
2974 				case 4:
2975 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 TILE_SPLIT(split_equal_to_row_size));
2979 					break;
2980 				case 5:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984 					break;
2985 				case 6:
2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990 					break;
2991 				case 7:
2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995 							 TILE_SPLIT(split_equal_to_row_size));
2996 					break;
2997 				case 8:
2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000 					break;
3001 				case 9:
3002 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005 					break;
3006 				case 10:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				case 11:
3013 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 					break;
3018 				case 12:
3019 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023 					break;
3024 				case 13:
3025 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028 					break;
3029 				case 14:
3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 					break;
3035 				case 16:
3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 					break;
3041 				case 17:
3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 					break;
3047 				case 27:
3048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051 					break;
3052 				case 28:
3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 					break;
3058 				case 29:
3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063 					break;
3064 				case 30:
3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 					break;
3070 				default:
3071 					gb_tile_moden = 0;
3072 					break;
3073 				}
3074 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076 			}
3077 		}
3078 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079 			switch (reg_offset) {
3080 			case 0:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 1:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 2:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_16_BANK));
3097 				break;
3098 			case 3:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102 						 NUM_BANKS(ADDR_SURF_16_BANK));
3103 				break;
3104 			case 4:
3105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 						 NUM_BANKS(ADDR_SURF_16_BANK));
3109 				break;
3110 			case 5:
3111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114 						 NUM_BANKS(ADDR_SURF_8_BANK));
3115 				break;
3116 			case 6:
3117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 						 NUM_BANKS(ADDR_SURF_4_BANK));
3121 				break;
3122 			case 8:
3123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 						 NUM_BANKS(ADDR_SURF_16_BANK));
3127 				break;
3128 			case 9:
3129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 						 NUM_BANKS(ADDR_SURF_16_BANK));
3133 				break;
3134 			case 10:
3135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 						 NUM_BANKS(ADDR_SURF_16_BANK));
3139 				break;
3140 			case 11:
3141 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 						 NUM_BANKS(ADDR_SURF_16_BANK));
3145 				break;
3146 			case 12:
3147 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 						 NUM_BANKS(ADDR_SURF_16_BANK));
3151 				break;
3152 			case 13:
3153 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156 						 NUM_BANKS(ADDR_SURF_8_BANK));
3157 				break;
3158 			case 14:
3159 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 						 NUM_BANKS(ADDR_SURF_4_BANK));
3163 				break;
3164 			default:
3165 				gb_tile_moden = 0;
3166 				break;
3167 			}
3168 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170 		}
3171 	} else if (num_pipe_configs == 2) {
3172 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173 			switch (reg_offset) {
3174 			case 0:
3175 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177 						 PIPE_CONFIG(ADDR_SURF_P2) |
3178 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179 				break;
3180 			case 1:
3181 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183 						 PIPE_CONFIG(ADDR_SURF_P2) |
3184 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185 				break;
3186 			case 2:
3187 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189 						 PIPE_CONFIG(ADDR_SURF_P2) |
3190 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191 				break;
3192 			case 3:
3193 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195 						 PIPE_CONFIG(ADDR_SURF_P2) |
3196 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197 				break;
3198 			case 4:
3199 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201 						 PIPE_CONFIG(ADDR_SURF_P2) |
3202 						 TILE_SPLIT(split_equal_to_row_size));
3203 				break;
3204 			case 5:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208 				break;
3209 			case 6:
3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214 				break;
3215 			case 7:
3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
3219 						 TILE_SPLIT(split_equal_to_row_size));
3220 				break;
3221 			case 8:
3222 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223 						PIPE_CONFIG(ADDR_SURF_P2);
3224 				break;
3225 			case 9:
3226 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228 						 PIPE_CONFIG(ADDR_SURF_P2));
3229 				break;
3230 			case 10:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			case 11:
3237 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 						 PIPE_CONFIG(ADDR_SURF_P2) |
3240 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241 				break;
3242 			case 12:
3243 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 						 PIPE_CONFIG(ADDR_SURF_P2) |
3246 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 				break;
3248 			case 13:
3249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250 						 PIPE_CONFIG(ADDR_SURF_P2) |
3251 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252 				break;
3253 			case 14:
3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 				break;
3259 			case 16:
3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 				break;
3265 			case 17:
3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268 						 PIPE_CONFIG(ADDR_SURF_P2) |
3269 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 				break;
3271 			case 27:
3272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274 						 PIPE_CONFIG(ADDR_SURF_P2));
3275 				break;
3276 			case 28:
3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281 				break;
3282 			case 29:
3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 				break;
3288 			case 30:
3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291 						 PIPE_CONFIG(ADDR_SURF_P2) |
3292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 				break;
3294 			default:
3295 				gb_tile_moden = 0;
3296 				break;
3297 			}
3298 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300 		}
3301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302 			switch (reg_offset) {
3303 			case 0:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 1:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 2:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 3:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325 						 NUM_BANKS(ADDR_SURF_16_BANK));
3326 				break;
3327 			case 4:
3328 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 						 NUM_BANKS(ADDR_SURF_16_BANK));
3332 				break;
3333 			case 5:
3334 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 						 NUM_BANKS(ADDR_SURF_16_BANK));
3338 				break;
3339 			case 6:
3340 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 						 NUM_BANKS(ADDR_SURF_8_BANK));
3344 				break;
3345 			case 8:
3346 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 						 NUM_BANKS(ADDR_SURF_16_BANK));
3350 				break;
3351 			case 9:
3352 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 						 NUM_BANKS(ADDR_SURF_16_BANK));
3356 				break;
3357 			case 10:
3358 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 						 NUM_BANKS(ADDR_SURF_16_BANK));
3362 				break;
3363 			case 11:
3364 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 						 NUM_BANKS(ADDR_SURF_16_BANK));
3368 				break;
3369 			case 12:
3370 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373 						 NUM_BANKS(ADDR_SURF_16_BANK));
3374 				break;
3375 			case 13:
3376 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 						 NUM_BANKS(ADDR_SURF_16_BANK));
3380 				break;
3381 			case 14:
3382 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385 						 NUM_BANKS(ADDR_SURF_8_BANK));
3386 				break;
3387 			default:
3388 				gb_tile_moden = 0;
3389 				break;
3390 			}
3391 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393 		}
3394 	} else
3395 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397 
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410 			     u32 se_num, u32 sh_num)
3411 {
3412 	u32 data = INSTANCE_BROADCAST_WRITES;
3413 
3414 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416 	else if (se_num == 0xffffffff)
3417 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418 	else if (sh_num == 0xffffffff)
3419 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420 	else
3421 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422 	WREG32(GRBM_GFX_INDEX, data);
3423 }
3424 
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435 	u32 i, mask = 0;
3436 
3437 	for (i = 0; i < bit_width; i++) {
3438 		mask <<= 1;
3439 		mask |= 1;
3440 	}
3441 	return mask;
3442 }
3443 
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456 			      u32 max_rb_num_per_se,
3457 			      u32 sh_per_se)
3458 {
3459 	u32 data, mask;
3460 
3461 	data = RREG32(CC_RB_BACKEND_DISABLE);
3462 	if (data & 1)
3463 		data &= BACKEND_DISABLE_MASK;
3464 	else
3465 		data = 0;
3466 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467 
3468 	data >>= BACKEND_DISABLE_SHIFT;
3469 
3470 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471 
3472 	return data & mask;
3473 }
3474 
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486 			 u32 se_num, u32 sh_per_se,
3487 			 u32 max_rb_num_per_se)
3488 {
3489 	int i, j;
3490 	u32 data, mask;
3491 	u32 disabled_rbs = 0;
3492 	u32 enabled_rbs = 0;
3493 
3494 	mutex_lock(&rdev->grbm_idx_mutex);
3495 	for (i = 0; i < se_num; i++) {
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			cik_select_se_sh(rdev, i, j);
3498 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499 			if (rdev->family == CHIP_HAWAII)
3500 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501 			else
3502 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503 		}
3504 	}
3505 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506 	mutex_unlock(&rdev->grbm_idx_mutex);
3507 
3508 	mask = 1;
3509 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510 		if (!(disabled_rbs & mask))
3511 			enabled_rbs |= mask;
3512 		mask <<= 1;
3513 	}
3514 
3515 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3516 
3517 	mutex_lock(&rdev->grbm_idx_mutex);
3518 	for (i = 0; i < se_num; i++) {
3519 		cik_select_se_sh(rdev, i, 0xffffffff);
3520 		data = 0;
3521 		for (j = 0; j < sh_per_se; j++) {
3522 			switch (enabled_rbs & 3) {
3523 			case 0:
3524 				if (j == 0)
3525 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526 				else
3527 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528 				break;
3529 			case 1:
3530 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531 				break;
3532 			case 2:
3533 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534 				break;
3535 			case 3:
3536 			default:
3537 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538 				break;
3539 			}
3540 			enabled_rbs >>= 2;
3541 		}
3542 		WREG32(PA_SC_RASTER_CONFIG, data);
3543 	}
3544 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545 	mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547 
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559 	u32 mc_shared_chmap, mc_arb_ramcfg;
3560 	u32 hdp_host_path_cntl;
3561 	u32 tmp;
3562 	int i, j;
3563 
3564 	switch (rdev->family) {
3565 	case CHIP_BONAIRE:
3566 		rdev->config.cik.max_shader_engines = 2;
3567 		rdev->config.cik.max_tile_pipes = 4;
3568 		rdev->config.cik.max_cu_per_sh = 7;
3569 		rdev->config.cik.max_sh_per_se = 1;
3570 		rdev->config.cik.max_backends_per_se = 2;
3571 		rdev->config.cik.max_texture_channel_caches = 4;
3572 		rdev->config.cik.max_gprs = 256;
3573 		rdev->config.cik.max_gs_threads = 32;
3574 		rdev->config.cik.max_hw_contexts = 8;
3575 
3576 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581 		break;
3582 	case CHIP_HAWAII:
3583 		rdev->config.cik.max_shader_engines = 4;
3584 		rdev->config.cik.max_tile_pipes = 16;
3585 		rdev->config.cik.max_cu_per_sh = 11;
3586 		rdev->config.cik.max_sh_per_se = 1;
3587 		rdev->config.cik.max_backends_per_se = 4;
3588 		rdev->config.cik.max_texture_channel_caches = 16;
3589 		rdev->config.cik.max_gprs = 256;
3590 		rdev->config.cik.max_gs_threads = 32;
3591 		rdev->config.cik.max_hw_contexts = 8;
3592 
3593 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598 		break;
3599 	case CHIP_KAVERI:
3600 		rdev->config.cik.max_shader_engines = 1;
3601 		rdev->config.cik.max_tile_pipes = 4;
3602 		if ((rdev->pdev->device == 0x1304) ||
3603 		    (rdev->pdev->device == 0x1305) ||
3604 		    (rdev->pdev->device == 0x130C) ||
3605 		    (rdev->pdev->device == 0x130F) ||
3606 		    (rdev->pdev->device == 0x1310) ||
3607 		    (rdev->pdev->device == 0x1311) ||
3608 		    (rdev->pdev->device == 0x131C)) {
3609 			rdev->config.cik.max_cu_per_sh = 8;
3610 			rdev->config.cik.max_backends_per_se = 2;
3611 		} else if ((rdev->pdev->device == 0x1309) ||
3612 			   (rdev->pdev->device == 0x130A) ||
3613 			   (rdev->pdev->device == 0x130D) ||
3614 			   (rdev->pdev->device == 0x1313) ||
3615 			   (rdev->pdev->device == 0x131D)) {
3616 			rdev->config.cik.max_cu_per_sh = 6;
3617 			rdev->config.cik.max_backends_per_se = 2;
3618 		} else if ((rdev->pdev->device == 0x1306) ||
3619 			   (rdev->pdev->device == 0x1307) ||
3620 			   (rdev->pdev->device == 0x130B) ||
3621 			   (rdev->pdev->device == 0x130E) ||
3622 			   (rdev->pdev->device == 0x1315) ||
3623 			   (rdev->pdev->device == 0x1318) ||
3624 			   (rdev->pdev->device == 0x131B)) {
3625 			rdev->config.cik.max_cu_per_sh = 4;
3626 			rdev->config.cik.max_backends_per_se = 1;
3627 		} else {
3628 			rdev->config.cik.max_cu_per_sh = 3;
3629 			rdev->config.cik.max_backends_per_se = 1;
3630 		}
3631 		rdev->config.cik.max_sh_per_se = 1;
3632 		rdev->config.cik.max_texture_channel_caches = 4;
3633 		rdev->config.cik.max_gprs = 256;
3634 		rdev->config.cik.max_gs_threads = 16;
3635 		rdev->config.cik.max_hw_contexts = 8;
3636 
3637 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642 		break;
3643 	case CHIP_KABINI:
3644 	case CHIP_MULLINS:
3645 	default:
3646 		rdev->config.cik.max_shader_engines = 1;
3647 		rdev->config.cik.max_tile_pipes = 2;
3648 		rdev->config.cik.max_cu_per_sh = 2;
3649 		rdev->config.cik.max_sh_per_se = 1;
3650 		rdev->config.cik.max_backends_per_se = 1;
3651 		rdev->config.cik.max_texture_channel_caches = 2;
3652 		rdev->config.cik.max_gprs = 256;
3653 		rdev->config.cik.max_gs_threads = 16;
3654 		rdev->config.cik.max_hw_contexts = 8;
3655 
3656 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661 		break;
3662 	}
3663 
3664 	/* Initialize HDP */
3665 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666 		WREG32((0x2c14 + j), 0x00000000);
3667 		WREG32((0x2c18 + j), 0x00000000);
3668 		WREG32((0x2c1c + j), 0x00000000);
3669 		WREG32((0x2c20 + j), 0x00000000);
3670 		WREG32((0x2c24 + j), 0x00000000);
3671 	}
3672 
3673 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674 	WREG32(SRBM_INT_CNTL, 0x1);
3675 	WREG32(SRBM_INT_ACK, 0x1);
3676 
3677 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678 
3679 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681 
3682 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3684 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3687 		rdev->config.cik.mem_row_size_in_kb = 4;
3688 	/* XXX use MC settings? */
3689 	rdev->config.cik.shader_engine_tile_size = 32;
3690 	rdev->config.cik.num_gpus = 1;
3691 	rdev->config.cik.multi_gpu_tile_size = 64;
3692 
3693 	/* fix up row size */
3694 	gb_addr_config &= ~ROW_SIZE_MASK;
3695 	switch (rdev->config.cik.mem_row_size_in_kb) {
3696 	case 1:
3697 	default:
3698 		gb_addr_config |= ROW_SIZE(0);
3699 		break;
3700 	case 2:
3701 		gb_addr_config |= ROW_SIZE(1);
3702 		break;
3703 	case 4:
3704 		gb_addr_config |= ROW_SIZE(2);
3705 		break;
3706 	}
3707 
3708 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3709 	 * not have bank info, so create a custom tiling dword.
3710 	 * bits 3:0   num_pipes
3711 	 * bits 7:4   num_banks
3712 	 * bits 11:8  group_size
3713 	 * bits 15:12 row_size
3714 	 */
3715 	rdev->config.cik.tile_config = 0;
3716 	switch (rdev->config.cik.num_tile_pipes) {
3717 	case 1:
3718 		rdev->config.cik.tile_config |= (0 << 0);
3719 		break;
3720 	case 2:
3721 		rdev->config.cik.tile_config |= (1 << 0);
3722 		break;
3723 	case 4:
3724 		rdev->config.cik.tile_config |= (2 << 0);
3725 		break;
3726 	case 8:
3727 	default:
3728 		/* XXX what about 12? */
3729 		rdev->config.cik.tile_config |= (3 << 0);
3730 		break;
3731 	}
3732 	rdev->config.cik.tile_config |=
3733 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734 	rdev->config.cik.tile_config |=
3735 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736 	rdev->config.cik.tile_config |=
3737 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738 
3739 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747 
3748 	cik_tiling_mode_table_init(rdev);
3749 
3750 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751 		     rdev->config.cik.max_sh_per_se,
3752 		     rdev->config.cik.max_backends_per_se);
3753 
3754 	rdev->config.cik.active_cus = 0;
3755 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757 			rdev->config.cik.active_cus +=
3758 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759 		}
3760 	}
3761 
3762 	/* set HW defaults for 3D engine */
3763 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764 
3765 	mutex_lock(&rdev->grbm_idx_mutex);
3766 	/*
3767 	 * making sure that the following register writes will be broadcasted
3768 	 * to all the shaders
3769 	 */
3770 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771 	WREG32(SX_DEBUG_1, 0x20);
3772 
3773 	WREG32(TA_CNTL_AUX, 0x00010000);
3774 
3775 	tmp = RREG32(SPI_CONFIG_CNTL);
3776 	tmp |= 0x03000000;
3777 	WREG32(SPI_CONFIG_CNTL, tmp);
3778 
3779 	WREG32(SQ_CONFIG, 1);
3780 
3781 	WREG32(DB_DEBUG, 0);
3782 
3783 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784 	tmp |= 0x00000400;
3785 	WREG32(DB_DEBUG2, tmp);
3786 
3787 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788 	tmp |= 0x00020200;
3789 	WREG32(DB_DEBUG3, tmp);
3790 
3791 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792 	tmp |= 0x00018208;
3793 	WREG32(CB_HW_CONTROL, tmp);
3794 
3795 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796 
3797 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801 
3802 	WREG32(VGT_NUM_INSTANCES, 1);
3803 
3804 	WREG32(CP_PERFMON_CNTL, 0);
3805 
3806 	WREG32(SQ_CONFIG, 0);
3807 
3808 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809 					  FORCE_EOV_MAX_REZ_CNT(255)));
3810 
3811 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813 
3814 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3815 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816 
3817 	tmp = RREG32(HDP_MISC_CNTL);
3818 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819 	WREG32(HDP_MISC_CNTL, tmp);
3820 
3821 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823 
3824 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826 	mutex_unlock(&rdev->grbm_idx_mutex);
3827 
3828 	udelay(50);
3829 }
3830 
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846 	int i;
3847 
3848 	rdev->scratch.num_reg = 7;
3849 	rdev->scratch.reg_base = SCRATCH_REG0;
3850 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3851 		rdev->scratch.free[i] = true;
3852 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853 	}
3854 }
3855 
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869 	uint32_t scratch;
3870 	uint32_t tmp = 0;
3871 	unsigned i;
3872 	int r;
3873 
3874 	r = radeon_scratch_get(rdev, &scratch);
3875 	if (r) {
3876 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877 		return r;
3878 	}
3879 	WREG32(scratch, 0xCAFEDEAD);
3880 	r = radeon_ring_lock(rdev, ring, 3);
3881 	if (r) {
3882 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883 		radeon_scratch_free(rdev, scratch);
3884 		return r;
3885 	}
3886 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888 	radeon_ring_write(ring, 0xDEADBEEF);
3889 	radeon_ring_unlock_commit(rdev, ring, false);
3890 
3891 	for (i = 0; i < rdev->usec_timeout; i++) {
3892 		tmp = RREG32(scratch);
3893 		if (tmp == 0xDEADBEEF)
3894 			break;
3895 		DRM_UDELAY(1);
3896 	}
3897 	if (i < rdev->usec_timeout) {
3898 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899 	} else {
3900 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901 			  ring->idx, scratch, tmp);
3902 		r = -EINVAL;
3903 	}
3904 	radeon_scratch_free(rdev, scratch);
3905 	return r;
3906 }
3907 
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917 				       int ridx)
3918 {
3919 	struct radeon_ring *ring = &rdev->ring[ridx];
3920 	u32 ref_and_mask;
3921 
3922 	switch (ring->idx) {
3923 	case CAYMAN_RING_TYPE_CP1_INDEX:
3924 	case CAYMAN_RING_TYPE_CP2_INDEX:
3925 	default:
3926 		switch (ring->me) {
3927 		case 0:
3928 			ref_and_mask = CP2 << ring->pipe;
3929 			break;
3930 		case 1:
3931 			ref_and_mask = CP6 << ring->pipe;
3932 			break;
3933 		default:
3934 			return;
3935 		}
3936 		break;
3937 	case RADEON_RING_TYPE_GFX_INDEX:
3938 		ref_and_mask = CP0;
3939 		break;
3940 	}
3941 
3942 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948 	radeon_ring_write(ring, ref_and_mask);
3949 	radeon_ring_write(ring, ref_and_mask);
3950 	radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952 
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963 			     struct radeon_fence *fence)
3964 {
3965 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3966 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967 
3968 	/* Workaround for cache flush problems. First send a dummy EOP
3969 	 * event down the pipe with seq one below.
3970 	 */
3971 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973 				 EOP_TC_ACTION_EN |
3974 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975 				 EVENT_INDEX(5)));
3976 	radeon_ring_write(ring, addr & 0xfffffffc);
3977 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978 				DATA_SEL(1) | INT_SEL(0));
3979 	radeon_ring_write(ring, fence->seq - 1);
3980 	radeon_ring_write(ring, 0);
3981 
3982 	/* Then send the real EOP event down the pipe. */
3983 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985 				 EOP_TC_ACTION_EN |
3986 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987 				 EVENT_INDEX(5)));
3988 	radeon_ring_write(ring, addr & 0xfffffffc);
3989 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990 	radeon_ring_write(ring, fence->seq);
3991 	radeon_ring_write(ring, 0);
3992 }
3993 
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004 				 struct radeon_fence *fence)
4005 {
4006 	struct radeon_ring *ring = &rdev->ring[fence->ring];
4007 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008 
4009 	/* RELEASE_MEM - flush caches, send int */
4010 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012 				 EOP_TC_ACTION_EN |
4013 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014 				 EVENT_INDEX(5)));
4015 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016 	radeon_ring_write(ring, addr & 0xfffffffc);
4017 	radeon_ring_write(ring, upper_32_bits(addr));
4018 	radeon_ring_write(ring, fence->seq);
4019 	radeon_ring_write(ring, 0);
4020 }
4021 
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034 			     struct radeon_ring *ring,
4035 			     struct radeon_semaphore *semaphore,
4036 			     bool emit_wait)
4037 {
4038 	uint64_t addr = semaphore->gpu_addr;
4039 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040 
4041 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042 	radeon_ring_write(ring, lower_32_bits(addr));
4043 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044 
4045 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046 		/* Prevent the PFP from running ahead of the semaphore wait */
4047 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048 		radeon_ring_write(ring, 0x0);
4049 	}
4050 
4051 	return true;
4052 }
4053 
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068 				    uint64_t src_offset, uint64_t dst_offset,
4069 				    unsigned num_gpu_pages,
4070 				    struct reservation_object *resv)
4071 {
4072 	struct radeon_fence *fence;
4073 	struct radeon_sync sync;
4074 	int ring_index = rdev->asic->copy.blit_ring_index;
4075 	struct radeon_ring *ring = &rdev->ring[ring_index];
4076 	u32 size_in_bytes, cur_size_in_bytes, control;
4077 	int i, num_loops;
4078 	int r = 0;
4079 
4080 	radeon_sync_create(&sync);
4081 
4082 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085 	if (r) {
4086 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4087 		radeon_sync_free(rdev, &sync, NULL);
4088 		return ERR_PTR(r);
4089 	}
4090 
4091 	radeon_sync_resv(rdev, &sync, resv, false);
4092 	radeon_sync_rings(rdev, &sync, ring->idx);
4093 
4094 	for (i = 0; i < num_loops; i++) {
4095 		cur_size_in_bytes = size_in_bytes;
4096 		if (cur_size_in_bytes > 0x1fffff)
4097 			cur_size_in_bytes = 0x1fffff;
4098 		size_in_bytes -= cur_size_in_bytes;
4099 		control = 0;
4100 		if (size_in_bytes == 0)
4101 			control |= PACKET3_DMA_DATA_CP_SYNC;
4102 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103 		radeon_ring_write(ring, control);
4104 		radeon_ring_write(ring, lower_32_bits(src_offset));
4105 		radeon_ring_write(ring, upper_32_bits(src_offset));
4106 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4107 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4108 		radeon_ring_write(ring, cur_size_in_bytes);
4109 		src_offset += cur_size_in_bytes;
4110 		dst_offset += cur_size_in_bytes;
4111 	}
4112 
4113 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4114 	if (r) {
4115 		radeon_ring_unlock_undo(rdev, ring);
4116 		radeon_sync_free(rdev, &sync, NULL);
4117 		return ERR_PTR(r);
4118 	}
4119 
4120 	radeon_ring_unlock_commit(rdev, ring, false);
4121 	radeon_sync_free(rdev, &sync, fence);
4122 
4123 	return fence;
4124 }
4125 
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4144 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145 	u32 header, control = INDIRECT_BUFFER_VALID;
4146 
4147 	if (ib->is_const_ib) {
4148 		/* set switch buffer packet before const IB */
4149 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150 		radeon_ring_write(ring, 0);
4151 
4152 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153 	} else {
4154 		u32 next_rptr;
4155 		if (ring->rptr_save_reg) {
4156 			next_rptr = ring->wptr + 3 + 4;
4157 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4159 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4160 			radeon_ring_write(ring, next_rptr);
4161 		} else if (rdev->wb.enabled) {
4162 			next_rptr = ring->wptr + 5 + 4;
4163 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167 			radeon_ring_write(ring, next_rptr);
4168 		}
4169 
4170 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171 	}
4172 
4173 	control |= ib->length_dw | (vm_id << 24);
4174 
4175 	radeon_ring_write(ring, header);
4176 	radeon_ring_write(ring,
4177 #ifdef __BIG_ENDIAN
4178 			  (2 << 0) |
4179 #endif
4180 			  (ib->gpu_addr & 0xFFFFFFFC));
4181 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4182 	radeon_ring_write(ring, control);
4183 }
4184 
4185 /**
4186  * cik_ib_test - basic gfx ring IB test
4187  *
4188  * @rdev: radeon_device pointer
4189  * @ring: radeon_ring structure holding ring information
4190  *
4191  * Allocate an IB and execute it on the gfx ring (CIK).
4192  * Provides a basic gfx ring test to verify that IBs are working.
4193  * Returns 0 on success, error on failure.
4194  */
4195 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4196 {
4197 	struct radeon_ib ib;
4198 	uint32_t scratch;
4199 	uint32_t tmp = 0;
4200 	unsigned i;
4201 	int r;
4202 
4203 	r = radeon_scratch_get(rdev, &scratch);
4204 	if (r) {
4205 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4206 		return r;
4207 	}
4208 	WREG32(scratch, 0xCAFEDEAD);
4209 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4210 	if (r) {
4211 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4212 		radeon_scratch_free(rdev, scratch);
4213 		return r;
4214 	}
4215 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4216 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4217 	ib.ptr[2] = 0xDEADBEEF;
4218 	ib.length_dw = 3;
4219 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4220 	if (r) {
4221 		radeon_scratch_free(rdev, scratch);
4222 		radeon_ib_free(rdev, &ib);
4223 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4224 		return r;
4225 	}
4226 	r = radeon_fence_wait(ib.fence, false);
4227 	if (r) {
4228 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4229 		radeon_scratch_free(rdev, scratch);
4230 		radeon_ib_free(rdev, &ib);
4231 		return r;
4232 	}
4233 	for (i = 0; i < rdev->usec_timeout; i++) {
4234 		tmp = RREG32(scratch);
4235 		if (tmp == 0xDEADBEEF)
4236 			break;
4237 		DRM_UDELAY(1);
4238 	}
4239 	if (i < rdev->usec_timeout) {
4240 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4241 	} else {
4242 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4243 			  scratch, tmp);
4244 		r = -EINVAL;
4245 	}
4246 	radeon_scratch_free(rdev, scratch);
4247 	radeon_ib_free(rdev, &ib);
4248 	return r;
4249 }
4250 
4251 /*
4252  * CP.
4253  * On CIK, gfx and compute now have independant command processors.
4254  *
4255  * GFX
4256  * Gfx consists of a single ring and can process both gfx jobs and
4257  * compute jobs.  The gfx CP consists of three microengines (ME):
4258  * PFP - Pre-Fetch Parser
4259  * ME - Micro Engine
4260  * CE - Constant Engine
4261  * The PFP and ME make up what is considered the Drawing Engine (DE).
4262  * The CE is an asynchronous engine used for updating buffer desciptors
4263  * used by the DE so that they can be loaded into cache in parallel
4264  * while the DE is processing state update packets.
4265  *
4266  * Compute
4267  * The compute CP consists of two microengines (ME):
4268  * MEC1 - Compute MicroEngine 1
4269  * MEC2 - Compute MicroEngine 2
4270  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4271  * The queues are exposed to userspace and are programmed directly
4272  * by the compute runtime.
4273  */
4274 /**
4275  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4276  *
4277  * @rdev: radeon_device pointer
4278  * @enable: enable or disable the MEs
4279  *
4280  * Halts or unhalts the gfx MEs.
4281  */
4282 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4283 {
4284 	if (enable)
4285 		WREG32(CP_ME_CNTL, 0);
4286 	else {
4287 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4288 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4289 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4290 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4291 	}
4292 	udelay(50);
4293 }
4294 
4295 /**
4296  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4297  *
4298  * @rdev: radeon_device pointer
4299  *
4300  * Loads the gfx PFP, ME, and CE ucode.
4301  * Returns 0 for success, -EINVAL if the ucode is not available.
4302  */
4303 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4304 {
4305 	int i;
4306 
4307 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4308 		return -EINVAL;
4309 
4310 	cik_cp_gfx_enable(rdev, false);
4311 
4312 	if (rdev->new_fw) {
4313 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4314 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4315 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4316 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4317 		const struct gfx_firmware_header_v1_0 *me_hdr =
4318 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4319 		const __le32 *fw_data;
4320 		u32 fw_size;
4321 
4322 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4323 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4324 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4325 
4326 		/* PFP */
4327 		fw_data = (const __le32 *)
4328 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4329 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4330 		WREG32(CP_PFP_UCODE_ADDR, 0);
4331 		for (i = 0; i < fw_size; i++)
4332 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4333 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4334 
4335 		/* CE */
4336 		fw_data = (const __le32 *)
4337 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4338 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4339 		WREG32(CP_CE_UCODE_ADDR, 0);
4340 		for (i = 0; i < fw_size; i++)
4341 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4342 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4343 
4344 		/* ME */
4345 		fw_data = (const __be32 *)
4346 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4347 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4348 		WREG32(CP_ME_RAM_WADDR, 0);
4349 		for (i = 0; i < fw_size; i++)
4350 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4351 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4352 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4353 	} else {
4354 		const __be32 *fw_data;
4355 
4356 		/* PFP */
4357 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4358 		WREG32(CP_PFP_UCODE_ADDR, 0);
4359 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4360 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4361 		WREG32(CP_PFP_UCODE_ADDR, 0);
4362 
4363 		/* CE */
4364 		fw_data = (const __be32 *)rdev->ce_fw->data;
4365 		WREG32(CP_CE_UCODE_ADDR, 0);
4366 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4367 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4368 		WREG32(CP_CE_UCODE_ADDR, 0);
4369 
4370 		/* ME */
4371 		fw_data = (const __be32 *)rdev->me_fw->data;
4372 		WREG32(CP_ME_RAM_WADDR, 0);
4373 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4374 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4375 		WREG32(CP_ME_RAM_WADDR, 0);
4376 	}
4377 
4378 	return 0;
4379 }
4380 
4381 /**
4382  * cik_cp_gfx_start - start the gfx ring
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Enables the ring and loads the clear state context and other
4387  * packets required to init the ring.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_start(struct radeon_device *rdev)
4391 {
4392 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393 	int r, i;
4394 
4395 	/* init the CP */
4396 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4397 	WREG32(CP_ENDIAN_SWAP, 0);
4398 	WREG32(CP_DEVICE_ID, 1);
4399 
4400 	cik_cp_gfx_enable(rdev, true);
4401 
4402 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4403 	if (r) {
4404 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4405 		return r;
4406 	}
4407 
4408 	/* init the CE partitions.  CE only used for gfx on CIK */
4409 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4410 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4411 	radeon_ring_write(ring, 0x8000);
4412 	radeon_ring_write(ring, 0x8000);
4413 
4414 	/* setup clear context state */
4415 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4416 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4417 
4418 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4419 	radeon_ring_write(ring, 0x80000000);
4420 	radeon_ring_write(ring, 0x80000000);
4421 
4422 	for (i = 0; i < cik_default_size; i++)
4423 		radeon_ring_write(ring, cik_default_state[i]);
4424 
4425 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4426 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4427 
4428 	/* set clear context state */
4429 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4430 	radeon_ring_write(ring, 0);
4431 
4432 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4433 	radeon_ring_write(ring, 0x00000316);
4434 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4435 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4436 
4437 	radeon_ring_unlock_commit(rdev, ring, false);
4438 
4439 	return 0;
4440 }
4441 
4442 /**
4443  * cik_cp_gfx_fini - stop the gfx ring
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Stop the gfx ring and tear down the driver ring
4448  * info.
4449  */
4450 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4451 {
4452 	cik_cp_gfx_enable(rdev, false);
4453 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4454 }
4455 
4456 /**
4457  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4458  *
4459  * @rdev: radeon_device pointer
4460  *
4461  * Program the location and size of the gfx ring buffer
4462  * and test it to make sure it's working.
4463  * Returns 0 for success, error for failure.
4464  */
4465 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4466 {
4467 	struct radeon_ring *ring;
4468 	u32 tmp;
4469 	u32 rb_bufsz;
4470 	u64 rb_addr;
4471 	int r;
4472 
4473 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4474 	if (rdev->family != CHIP_HAWAII)
4475 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4476 
4477 	/* Set the write pointer delay */
4478 	WREG32(CP_RB_WPTR_DELAY, 0);
4479 
4480 	/* set the RB to use vmid 0 */
4481 	WREG32(CP_RB_VMID, 0);
4482 
4483 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4484 
4485 	/* ring 0 - compute and gfx */
4486 	/* Set ring buffer size */
4487 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4488 	rb_bufsz = order_base_2(ring->ring_size / 8);
4489 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4490 #ifdef __BIG_ENDIAN
4491 	tmp |= BUF_SWAP_32BIT;
4492 #endif
4493 	WREG32(CP_RB0_CNTL, tmp);
4494 
4495 	/* Initialize the ring buffer's read and write pointers */
4496 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4497 	ring->wptr = 0;
4498 	WREG32(CP_RB0_WPTR, ring->wptr);
4499 
4500 	/* set the wb address wether it's enabled or not */
4501 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4502 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4503 
4504 	/* scratch register shadowing is no longer supported */
4505 	WREG32(SCRATCH_UMSK, 0);
4506 
4507 	if (!rdev->wb.enabled)
4508 		tmp |= RB_NO_UPDATE;
4509 
4510 	mdelay(1);
4511 	WREG32(CP_RB0_CNTL, tmp);
4512 
4513 	rb_addr = ring->gpu_addr >> 8;
4514 	WREG32(CP_RB0_BASE, rb_addr);
4515 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4516 
4517 	/* start the ring */
4518 	cik_cp_gfx_start(rdev);
4519 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4520 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4521 	if (r) {
4522 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4523 		return r;
4524 	}
4525 
4526 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4527 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4528 
4529 	return 0;
4530 }
4531 
4532 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4533 		     struct radeon_ring *ring)
4534 {
4535 	u32 rptr;
4536 
4537 	if (rdev->wb.enabled)
4538 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4539 	else
4540 		rptr = RREG32(CP_RB0_RPTR);
4541 
4542 	return rptr;
4543 }
4544 
4545 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4546 		     struct radeon_ring *ring)
4547 {
4548 	u32 wptr;
4549 
4550 	wptr = RREG32(CP_RB0_WPTR);
4551 
4552 	return wptr;
4553 }
4554 
4555 void cik_gfx_set_wptr(struct radeon_device *rdev,
4556 		      struct radeon_ring *ring)
4557 {
4558 	WREG32(CP_RB0_WPTR, ring->wptr);
4559 	(void)RREG32(CP_RB0_WPTR);
4560 }
4561 
4562 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4563 			 struct radeon_ring *ring)
4564 {
4565 	u32 rptr;
4566 
4567 	if (rdev->wb.enabled) {
4568 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4569 	} else {
4570 		mutex_lock(&rdev->srbm_mutex);
4571 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4572 		rptr = RREG32(CP_HQD_PQ_RPTR);
4573 		cik_srbm_select(rdev, 0, 0, 0, 0);
4574 		mutex_unlock(&rdev->srbm_mutex);
4575 	}
4576 
4577 	return rptr;
4578 }
4579 
4580 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4581 			 struct radeon_ring *ring)
4582 {
4583 	u32 wptr;
4584 
4585 	if (rdev->wb.enabled) {
4586 		/* XXX check if swapping is necessary on BE */
4587 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4588 	} else {
4589 		mutex_lock(&rdev->srbm_mutex);
4590 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4591 		wptr = RREG32(CP_HQD_PQ_WPTR);
4592 		cik_srbm_select(rdev, 0, 0, 0, 0);
4593 		mutex_unlock(&rdev->srbm_mutex);
4594 	}
4595 
4596 	return wptr;
4597 }
4598 
4599 void cik_compute_set_wptr(struct radeon_device *rdev,
4600 			  struct radeon_ring *ring)
4601 {
4602 	/* XXX check if swapping is necessary on BE */
4603 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4604 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4605 }
4606 
4607 static void cik_compute_stop(struct radeon_device *rdev,
4608 			     struct radeon_ring *ring)
4609 {
4610 	u32 j, tmp;
4611 
4612 	cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4613 	/* Disable wptr polling. */
4614 	tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4615 	tmp &= ~WPTR_POLL_EN;
4616 	WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4617 	/* Disable HQD. */
4618 	if (RREG32(CP_HQD_ACTIVE) & 1) {
4619 		WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4620 		for (j = 0; j < rdev->usec_timeout; j++) {
4621 			if (!(RREG32(CP_HQD_ACTIVE) & 1))
4622 				break;
4623 			udelay(1);
4624 		}
4625 		WREG32(CP_HQD_DEQUEUE_REQUEST, 0);
4626 		WREG32(CP_HQD_PQ_RPTR, 0);
4627 		WREG32(CP_HQD_PQ_WPTR, 0);
4628 	}
4629 	cik_srbm_select(rdev, 0, 0, 0, 0);
4630 }
4631 
4632 /**
4633  * cik_cp_compute_enable - enable/disable the compute CP MEs
4634  *
4635  * @rdev: radeon_device pointer
4636  * @enable: enable or disable the MEs
4637  *
4638  * Halts or unhalts the compute MEs.
4639  */
4640 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4641 {
4642 	if (enable)
4643 		WREG32(CP_MEC_CNTL, 0);
4644 	else {
4645 		/*
4646 		 * To make hibernation reliable we need to clear compute ring
4647 		 * configuration before halting the compute ring.
4648 		 */
4649 		mutex_lock(&rdev->srbm_mutex);
4650 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
4651 		cik_compute_stop(rdev,&rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
4652 		mutex_unlock(&rdev->srbm_mutex);
4653 
4654 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4655 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4656 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4657 	}
4658 	udelay(50);
4659 }
4660 
4661 /**
4662  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4663  *
4664  * @rdev: radeon_device pointer
4665  *
4666  * Loads the compute MEC1&2 ucode.
4667  * Returns 0 for success, -EINVAL if the ucode is not available.
4668  */
4669 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4670 {
4671 	int i;
4672 
4673 	if (!rdev->mec_fw)
4674 		return -EINVAL;
4675 
4676 	cik_cp_compute_enable(rdev, false);
4677 
4678 	if (rdev->new_fw) {
4679 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4680 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4681 		const __le32 *fw_data;
4682 		u32 fw_size;
4683 
4684 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4685 
4686 		/* MEC1 */
4687 		fw_data = (const __le32 *)
4688 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4689 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4690 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4691 		for (i = 0; i < fw_size; i++)
4692 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4693 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4694 
4695 		/* MEC2 */
4696 		if (rdev->family == CHIP_KAVERI) {
4697 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4698 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4699 
4700 			fw_data = (const __le32 *)
4701 				(rdev->mec2_fw->data +
4702 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4703 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4704 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4705 			for (i = 0; i < fw_size; i++)
4706 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4707 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4708 		}
4709 	} else {
4710 		const __be32 *fw_data;
4711 
4712 		/* MEC1 */
4713 		fw_data = (const __be32 *)rdev->mec_fw->data;
4714 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4715 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4716 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4717 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4718 
4719 		if (rdev->family == CHIP_KAVERI) {
4720 			/* MEC2 */
4721 			fw_data = (const __be32 *)rdev->mec_fw->data;
4722 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4723 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4724 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4725 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4726 		}
4727 	}
4728 
4729 	return 0;
4730 }
4731 
4732 /**
4733  * cik_cp_compute_start - start the compute queues
4734  *
4735  * @rdev: radeon_device pointer
4736  *
4737  * Enable the compute queues.
4738  * Returns 0 for success, error for failure.
4739  */
4740 static int cik_cp_compute_start(struct radeon_device *rdev)
4741 {
4742 	cik_cp_compute_enable(rdev, true);
4743 
4744 	return 0;
4745 }
4746 
4747 /**
4748  * cik_cp_compute_fini - stop the compute queues
4749  *
4750  * @rdev: radeon_device pointer
4751  *
4752  * Stop the compute queues and tear down the driver queue
4753  * info.
4754  */
4755 static void cik_cp_compute_fini(struct radeon_device *rdev)
4756 {
4757 	int i, idx, r;
4758 
4759 	cik_cp_compute_enable(rdev, false);
4760 
4761 	for (i = 0; i < 2; i++) {
4762 		if (i == 0)
4763 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4764 		else
4765 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4766 
4767 		if (rdev->ring[idx].mqd_obj) {
4768 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4769 			if (unlikely(r != 0))
4770 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4771 
4772 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4773 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4774 
4775 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4776 			rdev->ring[idx].mqd_obj = NULL;
4777 		}
4778 	}
4779 }
4780 
4781 static void cik_mec_fini(struct radeon_device *rdev)
4782 {
4783 	int r;
4784 
4785 	if (rdev->mec.hpd_eop_obj) {
4786 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4787 		if (unlikely(r != 0))
4788 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4789 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4790 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4791 
4792 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4793 		rdev->mec.hpd_eop_obj = NULL;
4794 	}
4795 }
4796 
4797 #define MEC_HPD_SIZE 2048
4798 
4799 static int cik_mec_init(struct radeon_device *rdev)
4800 {
4801 	int r;
4802 	u32 *hpd;
4803 
4804 	/*
4805 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4806 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4807 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4808 	 * be handled by KFD
4809 	 */
4810 	rdev->mec.num_mec = 1;
4811 	rdev->mec.num_pipe = 1;
4812 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4813 
4814 	if (rdev->mec.hpd_eop_obj == NULL) {
4815 		r = radeon_bo_create(rdev,
4816 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4817 				     PAGE_SIZE, true,
4818 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4819 				     &rdev->mec.hpd_eop_obj);
4820 		if (r) {
4821 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4822 			return r;
4823 		}
4824 	}
4825 
4826 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4827 	if (unlikely(r != 0)) {
4828 		cik_mec_fini(rdev);
4829 		return r;
4830 	}
4831 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4832 			  &rdev->mec.hpd_eop_gpu_addr);
4833 	if (r) {
4834 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4835 		cik_mec_fini(rdev);
4836 		return r;
4837 	}
4838 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4839 	if (r) {
4840 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4841 		cik_mec_fini(rdev);
4842 		return r;
4843 	}
4844 
4845 	/* clear memory.  Not sure if this is required or not */
4846 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4847 
4848 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4849 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4850 
4851 	return 0;
4852 }
4853 
4854 struct hqd_registers
4855 {
4856 	u32 cp_mqd_base_addr;
4857 	u32 cp_mqd_base_addr_hi;
4858 	u32 cp_hqd_active;
4859 	u32 cp_hqd_vmid;
4860 	u32 cp_hqd_persistent_state;
4861 	u32 cp_hqd_pipe_priority;
4862 	u32 cp_hqd_queue_priority;
4863 	u32 cp_hqd_quantum;
4864 	u32 cp_hqd_pq_base;
4865 	u32 cp_hqd_pq_base_hi;
4866 	u32 cp_hqd_pq_rptr;
4867 	u32 cp_hqd_pq_rptr_report_addr;
4868 	u32 cp_hqd_pq_rptr_report_addr_hi;
4869 	u32 cp_hqd_pq_wptr_poll_addr;
4870 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4871 	u32 cp_hqd_pq_doorbell_control;
4872 	u32 cp_hqd_pq_wptr;
4873 	u32 cp_hqd_pq_control;
4874 	u32 cp_hqd_ib_base_addr;
4875 	u32 cp_hqd_ib_base_addr_hi;
4876 	u32 cp_hqd_ib_rptr;
4877 	u32 cp_hqd_ib_control;
4878 	u32 cp_hqd_iq_timer;
4879 	u32 cp_hqd_iq_rptr;
4880 	u32 cp_hqd_dequeue_request;
4881 	u32 cp_hqd_dma_offload;
4882 	u32 cp_hqd_sema_cmd;
4883 	u32 cp_hqd_msg_type;
4884 	u32 cp_hqd_atomic0_preop_lo;
4885 	u32 cp_hqd_atomic0_preop_hi;
4886 	u32 cp_hqd_atomic1_preop_lo;
4887 	u32 cp_hqd_atomic1_preop_hi;
4888 	u32 cp_hqd_hq_scheduler0;
4889 	u32 cp_hqd_hq_scheduler1;
4890 	u32 cp_mqd_control;
4891 };
4892 
4893 struct bonaire_mqd
4894 {
4895 	u32 header;
4896 	u32 dispatch_initiator;
4897 	u32 dimensions[3];
4898 	u32 start_idx[3];
4899 	u32 num_threads[3];
4900 	u32 pipeline_stat_enable;
4901 	u32 perf_counter_enable;
4902 	u32 pgm[2];
4903 	u32 tba[2];
4904 	u32 tma[2];
4905 	u32 pgm_rsrc[2];
4906 	u32 vmid;
4907 	u32 resource_limits;
4908 	u32 static_thread_mgmt01[2];
4909 	u32 tmp_ring_size;
4910 	u32 static_thread_mgmt23[2];
4911 	u32 restart[3];
4912 	u32 thread_trace_enable;
4913 	u32 reserved1;
4914 	u32 user_data[16];
4915 	u32 vgtcs_invoke_count[2];
4916 	struct hqd_registers queue_state;
4917 	u32 dequeue_cntr;
4918 	u32 interrupt_queue[64];
4919 };
4920 
4921 /**
4922  * cik_cp_compute_resume - setup the compute queue registers
4923  *
4924  * @rdev: radeon_device pointer
4925  *
4926  * Program the compute queues and test them to make sure they
4927  * are working.
4928  * Returns 0 for success, error for failure.
4929  */
4930 static int cik_cp_compute_resume(struct radeon_device *rdev)
4931 {
4932 	int r, i, j, idx;
4933 	u32 tmp;
4934 	bool use_doorbell = true;
4935 	u64 hqd_gpu_addr;
4936 	u64 mqd_gpu_addr;
4937 	u64 eop_gpu_addr;
4938 	u64 wb_gpu_addr;
4939 	u32 *buf;
4940 	struct bonaire_mqd *mqd;
4941 
4942 	r = cik_cp_compute_start(rdev);
4943 	if (r)
4944 		return r;
4945 
4946 	/* fix up chicken bits */
4947 	tmp = RREG32(CP_CPF_DEBUG);
4948 	tmp |= (1 << 23);
4949 	WREG32(CP_CPF_DEBUG, tmp);
4950 
4951 	/* init the pipes */
4952 	mutex_lock(&rdev->srbm_mutex);
4953 
4954 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4955 
4956 	cik_srbm_select(rdev, 0, 0, 0, 0);
4957 
4958 	/* write the EOP addr */
4959 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4960 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4961 
4962 	/* set the VMID assigned */
4963 	WREG32(CP_HPD_EOP_VMID, 0);
4964 
4965 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4966 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4967 	tmp &= ~EOP_SIZE_MASK;
4968 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4969 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4970 
4971 	mutex_unlock(&rdev->srbm_mutex);
4972 
4973 	/* init the queues.  Just two for now. */
4974 	for (i = 0; i < 2; i++) {
4975 		if (i == 0)
4976 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4977 		else
4978 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4979 
4980 		if (rdev->ring[idx].mqd_obj == NULL) {
4981 			r = radeon_bo_create(rdev,
4982 					     sizeof(struct bonaire_mqd),
4983 					     PAGE_SIZE, true,
4984 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4985 					     NULL, &rdev->ring[idx].mqd_obj);
4986 			if (r) {
4987 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4988 				return r;
4989 			}
4990 		}
4991 
4992 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4993 		if (unlikely(r != 0)) {
4994 			cik_cp_compute_fini(rdev);
4995 			return r;
4996 		}
4997 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4998 				  &mqd_gpu_addr);
4999 		if (r) {
5000 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
5001 			cik_cp_compute_fini(rdev);
5002 			return r;
5003 		}
5004 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
5005 		if (r) {
5006 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
5007 			cik_cp_compute_fini(rdev);
5008 			return r;
5009 		}
5010 
5011 		/* init the mqd struct */
5012 		memset(buf, 0, sizeof(struct bonaire_mqd));
5013 
5014 		mqd = (struct bonaire_mqd *)buf;
5015 		mqd->header = 0xC0310800;
5016 		mqd->static_thread_mgmt01[0] = 0xffffffff;
5017 		mqd->static_thread_mgmt01[1] = 0xffffffff;
5018 		mqd->static_thread_mgmt23[0] = 0xffffffff;
5019 		mqd->static_thread_mgmt23[1] = 0xffffffff;
5020 
5021 		mutex_lock(&rdev->srbm_mutex);
5022 		cik_srbm_select(rdev, rdev->ring[idx].me,
5023 				rdev->ring[idx].pipe,
5024 				rdev->ring[idx].queue, 0);
5025 
5026 		/* disable wptr polling */
5027 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
5028 		tmp &= ~WPTR_POLL_EN;
5029 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
5030 
5031 		/* enable doorbell? */
5032 		mqd->queue_state.cp_hqd_pq_doorbell_control =
5033 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5034 		if (use_doorbell)
5035 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5036 		else
5037 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5038 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5039 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5040 
5041 		/* disable the queue if it's active */
5042 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5043 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5044 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5045 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5046 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5047 			for (j = 0; j < rdev->usec_timeout; j++) {
5048 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5049 					break;
5050 				udelay(1);
5051 			}
5052 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5053 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5054 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5055 		}
5056 
5057 		/* set the pointer to the MQD */
5058 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5059 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5060 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5061 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5062 		/* set MQD vmid to 0 */
5063 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5064 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5065 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5066 
5067 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5068 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5069 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5070 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5071 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5072 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5073 
5074 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5075 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5076 		mqd->queue_state.cp_hqd_pq_control &=
5077 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5078 
5079 		mqd->queue_state.cp_hqd_pq_control |=
5080 			order_base_2(rdev->ring[idx].ring_size / 8);
5081 		mqd->queue_state.cp_hqd_pq_control |=
5082 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5083 #ifdef __BIG_ENDIAN
5084 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5085 #endif
5086 		mqd->queue_state.cp_hqd_pq_control &=
5087 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5088 		mqd->queue_state.cp_hqd_pq_control |=
5089 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5090 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5091 
5092 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5093 		if (i == 0)
5094 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5095 		else
5096 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5097 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5098 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5099 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5100 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5101 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5102 
5103 		/* set the wb address wether it's enabled or not */
5104 		if (i == 0)
5105 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5106 		else
5107 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5108 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5109 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5110 			upper_32_bits(wb_gpu_addr) & 0xffff;
5111 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5112 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5113 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5114 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5115 
5116 		/* enable the doorbell if requested */
5117 		if (use_doorbell) {
5118 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5119 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5120 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5121 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5122 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5123 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5124 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5125 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5126 
5127 		} else {
5128 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5129 		}
5130 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5131 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5132 
5133 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5134 		rdev->ring[idx].wptr = 0;
5135 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5136 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5137 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5138 
5139 		/* set the vmid for the queue */
5140 		mqd->queue_state.cp_hqd_vmid = 0;
5141 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5142 
5143 		/* activate the queue */
5144 		mqd->queue_state.cp_hqd_active = 1;
5145 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5146 
5147 		cik_srbm_select(rdev, 0, 0, 0, 0);
5148 		mutex_unlock(&rdev->srbm_mutex);
5149 
5150 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5151 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5152 
5153 		rdev->ring[idx].ready = true;
5154 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5155 		if (r)
5156 			rdev->ring[idx].ready = false;
5157 	}
5158 
5159 	return 0;
5160 }
5161 
5162 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5163 {
5164 	cik_cp_gfx_enable(rdev, enable);
5165 	cik_cp_compute_enable(rdev, enable);
5166 }
5167 
5168 static int cik_cp_load_microcode(struct radeon_device *rdev)
5169 {
5170 	int r;
5171 
5172 	r = cik_cp_gfx_load_microcode(rdev);
5173 	if (r)
5174 		return r;
5175 	r = cik_cp_compute_load_microcode(rdev);
5176 	if (r)
5177 		return r;
5178 
5179 	return 0;
5180 }
5181 
5182 static void cik_cp_fini(struct radeon_device *rdev)
5183 {
5184 	cik_cp_gfx_fini(rdev);
5185 	cik_cp_compute_fini(rdev);
5186 }
5187 
5188 static int cik_cp_resume(struct radeon_device *rdev)
5189 {
5190 	int r;
5191 
5192 	cik_enable_gui_idle_interrupt(rdev, false);
5193 
5194 	r = cik_cp_load_microcode(rdev);
5195 	if (r)
5196 		return r;
5197 
5198 	r = cik_cp_gfx_resume(rdev);
5199 	if (r)
5200 		return r;
5201 	r = cik_cp_compute_resume(rdev);
5202 	if (r)
5203 		return r;
5204 
5205 	cik_enable_gui_idle_interrupt(rdev, true);
5206 
5207 	return 0;
5208 }
5209 
5210 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5211 {
5212 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5213 		RREG32(GRBM_STATUS));
5214 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5215 		RREG32(GRBM_STATUS2));
5216 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5217 		RREG32(GRBM_STATUS_SE0));
5218 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5219 		RREG32(GRBM_STATUS_SE1));
5220 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5221 		RREG32(GRBM_STATUS_SE2));
5222 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5223 		RREG32(GRBM_STATUS_SE3));
5224 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5225 		RREG32(SRBM_STATUS));
5226 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5227 		RREG32(SRBM_STATUS2));
5228 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5229 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5230 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5231 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5232 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5233 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5234 		 RREG32(CP_STALLED_STAT1));
5235 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5236 		 RREG32(CP_STALLED_STAT2));
5237 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5238 		 RREG32(CP_STALLED_STAT3));
5239 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5240 		 RREG32(CP_CPF_BUSY_STAT));
5241 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5242 		 RREG32(CP_CPF_STALLED_STAT1));
5243 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5244 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5245 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5246 		 RREG32(CP_CPC_STALLED_STAT1));
5247 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5248 }
5249 
5250 /**
5251  * cik_gpu_check_soft_reset - check which blocks are busy
5252  *
5253  * @rdev: radeon_device pointer
5254  *
5255  * Check which blocks are busy and return the relevant reset
5256  * mask to be used by cik_gpu_soft_reset().
5257  * Returns a mask of the blocks to be reset.
5258  */
5259 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5260 {
5261 	u32 reset_mask = 0;
5262 	u32 tmp;
5263 
5264 	/* GRBM_STATUS */
5265 	tmp = RREG32(GRBM_STATUS);
5266 	if (tmp & (PA_BUSY | SC_BUSY |
5267 		   BCI_BUSY | SX_BUSY |
5268 		   TA_BUSY | VGT_BUSY |
5269 		   DB_BUSY | CB_BUSY |
5270 		   GDS_BUSY | SPI_BUSY |
5271 		   IA_BUSY | IA_BUSY_NO_DMA))
5272 		reset_mask |= RADEON_RESET_GFX;
5273 
5274 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5275 		reset_mask |= RADEON_RESET_CP;
5276 
5277 	/* GRBM_STATUS2 */
5278 	tmp = RREG32(GRBM_STATUS2);
5279 	if (tmp & RLC_BUSY)
5280 		reset_mask |= RADEON_RESET_RLC;
5281 
5282 	/* SDMA0_STATUS_REG */
5283 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5284 	if (!(tmp & SDMA_IDLE))
5285 		reset_mask |= RADEON_RESET_DMA;
5286 
5287 	/* SDMA1_STATUS_REG */
5288 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5289 	if (!(tmp & SDMA_IDLE))
5290 		reset_mask |= RADEON_RESET_DMA1;
5291 
5292 	/* SRBM_STATUS2 */
5293 	tmp = RREG32(SRBM_STATUS2);
5294 	if (tmp & SDMA_BUSY)
5295 		reset_mask |= RADEON_RESET_DMA;
5296 
5297 	if (tmp & SDMA1_BUSY)
5298 		reset_mask |= RADEON_RESET_DMA1;
5299 
5300 	/* SRBM_STATUS */
5301 	tmp = RREG32(SRBM_STATUS);
5302 
5303 	if (tmp & IH_BUSY)
5304 		reset_mask |= RADEON_RESET_IH;
5305 
5306 	if (tmp & SEM_BUSY)
5307 		reset_mask |= RADEON_RESET_SEM;
5308 
5309 	if (tmp & GRBM_RQ_PENDING)
5310 		reset_mask |= RADEON_RESET_GRBM;
5311 
5312 	if (tmp & VMC_BUSY)
5313 		reset_mask |= RADEON_RESET_VMC;
5314 
5315 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5316 		   MCC_BUSY | MCD_BUSY))
5317 		reset_mask |= RADEON_RESET_MC;
5318 
5319 	if (evergreen_is_display_hung(rdev))
5320 		reset_mask |= RADEON_RESET_DISPLAY;
5321 
5322 	/* Skip MC reset as it's mostly likely not hung, just busy */
5323 	if (reset_mask & RADEON_RESET_MC) {
5324 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5325 		reset_mask &= ~RADEON_RESET_MC;
5326 	}
5327 
5328 	return reset_mask;
5329 }
5330 
5331 /**
5332  * cik_gpu_soft_reset - soft reset GPU
5333  *
5334  * @rdev: radeon_device pointer
5335  * @reset_mask: mask of which blocks to reset
5336  *
5337  * Soft reset the blocks specified in @reset_mask.
5338  */
5339 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5340 {
5341 	struct evergreen_mc_save save;
5342 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5343 	u32 tmp;
5344 
5345 	if (reset_mask == 0)
5346 		return;
5347 
5348 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5349 
5350 	cik_print_gpu_status_regs(rdev);
5351 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5352 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5353 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5354 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5355 
5356 	/* disable CG/PG */
5357 	cik_fini_pg(rdev);
5358 	cik_fini_cg(rdev);
5359 
5360 	/* stop the rlc */
5361 	cik_rlc_stop(rdev);
5362 
5363 	/* Disable GFX parsing/prefetching */
5364 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5365 
5366 	/* Disable MEC parsing/prefetching */
5367 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5368 
5369 	if (reset_mask & RADEON_RESET_DMA) {
5370 		/* sdma0 */
5371 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5372 		tmp |= SDMA_HALT;
5373 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5374 	}
5375 	if (reset_mask & RADEON_RESET_DMA1) {
5376 		/* sdma1 */
5377 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5378 		tmp |= SDMA_HALT;
5379 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5380 	}
5381 
5382 	evergreen_mc_stop(rdev, &save);
5383 	if (evergreen_mc_wait_for_idle(rdev)) {
5384 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5385 	}
5386 
5387 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5388 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5389 
5390 	if (reset_mask & RADEON_RESET_CP) {
5391 		grbm_soft_reset |= SOFT_RESET_CP;
5392 
5393 		srbm_soft_reset |= SOFT_RESET_GRBM;
5394 	}
5395 
5396 	if (reset_mask & RADEON_RESET_DMA)
5397 		srbm_soft_reset |= SOFT_RESET_SDMA;
5398 
5399 	if (reset_mask & RADEON_RESET_DMA1)
5400 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5401 
5402 	if (reset_mask & RADEON_RESET_DISPLAY)
5403 		srbm_soft_reset |= SOFT_RESET_DC;
5404 
5405 	if (reset_mask & RADEON_RESET_RLC)
5406 		grbm_soft_reset |= SOFT_RESET_RLC;
5407 
5408 	if (reset_mask & RADEON_RESET_SEM)
5409 		srbm_soft_reset |= SOFT_RESET_SEM;
5410 
5411 	if (reset_mask & RADEON_RESET_IH)
5412 		srbm_soft_reset |= SOFT_RESET_IH;
5413 
5414 	if (reset_mask & RADEON_RESET_GRBM)
5415 		srbm_soft_reset |= SOFT_RESET_GRBM;
5416 
5417 	if (reset_mask & RADEON_RESET_VMC)
5418 		srbm_soft_reset |= SOFT_RESET_VMC;
5419 
5420 	if (!(rdev->flags & RADEON_IS_IGP)) {
5421 		if (reset_mask & RADEON_RESET_MC)
5422 			srbm_soft_reset |= SOFT_RESET_MC;
5423 	}
5424 
5425 	if (grbm_soft_reset) {
5426 		tmp = RREG32(GRBM_SOFT_RESET);
5427 		tmp |= grbm_soft_reset;
5428 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5429 		WREG32(GRBM_SOFT_RESET, tmp);
5430 		tmp = RREG32(GRBM_SOFT_RESET);
5431 
5432 		udelay(50);
5433 
5434 		tmp &= ~grbm_soft_reset;
5435 		WREG32(GRBM_SOFT_RESET, tmp);
5436 		tmp = RREG32(GRBM_SOFT_RESET);
5437 	}
5438 
5439 	if (srbm_soft_reset) {
5440 		tmp = RREG32(SRBM_SOFT_RESET);
5441 		tmp |= srbm_soft_reset;
5442 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5443 		WREG32(SRBM_SOFT_RESET, tmp);
5444 		tmp = RREG32(SRBM_SOFT_RESET);
5445 
5446 		udelay(50);
5447 
5448 		tmp &= ~srbm_soft_reset;
5449 		WREG32(SRBM_SOFT_RESET, tmp);
5450 		tmp = RREG32(SRBM_SOFT_RESET);
5451 	}
5452 
5453 	/* Wait a little for things to settle down */
5454 	udelay(50);
5455 
5456 	evergreen_mc_resume(rdev, &save);
5457 	udelay(50);
5458 
5459 	cik_print_gpu_status_regs(rdev);
5460 }
5461 
5462 struct kv_reset_save_regs {
5463 	u32 gmcon_reng_execute;
5464 	u32 gmcon_misc;
5465 	u32 gmcon_misc3;
5466 };
5467 
5468 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5469 				   struct kv_reset_save_regs *save)
5470 {
5471 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5472 	save->gmcon_misc = RREG32(GMCON_MISC);
5473 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5474 
5475 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5476 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5477 						STCTRL_STUTTER_EN));
5478 }
5479 
5480 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5481 				      struct kv_reset_save_regs *save)
5482 {
5483 	int i;
5484 
5485 	WREG32(GMCON_PGFSM_WRITE, 0);
5486 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5487 
5488 	for (i = 0; i < 5; i++)
5489 		WREG32(GMCON_PGFSM_WRITE, 0);
5490 
5491 	WREG32(GMCON_PGFSM_WRITE, 0);
5492 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5493 
5494 	for (i = 0; i < 5; i++)
5495 		WREG32(GMCON_PGFSM_WRITE, 0);
5496 
5497 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5498 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5499 
5500 	for (i = 0; i < 5; i++)
5501 		WREG32(GMCON_PGFSM_WRITE, 0);
5502 
5503 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5504 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5505 
5506 	for (i = 0; i < 5; i++)
5507 		WREG32(GMCON_PGFSM_WRITE, 0);
5508 
5509 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5510 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5511 
5512 	for (i = 0; i < 5; i++)
5513 		WREG32(GMCON_PGFSM_WRITE, 0);
5514 
5515 	WREG32(GMCON_PGFSM_WRITE, 0);
5516 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5517 
5518 	for (i = 0; i < 5; i++)
5519 		WREG32(GMCON_PGFSM_WRITE, 0);
5520 
5521 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5522 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5523 
5524 	for (i = 0; i < 5; i++)
5525 		WREG32(GMCON_PGFSM_WRITE, 0);
5526 
5527 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5528 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5529 
5530 	for (i = 0; i < 5; i++)
5531 		WREG32(GMCON_PGFSM_WRITE, 0);
5532 
5533 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5534 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5535 
5536 	for (i = 0; i < 5; i++)
5537 		WREG32(GMCON_PGFSM_WRITE, 0);
5538 
5539 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5540 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5541 
5542 	for (i = 0; i < 5; i++)
5543 		WREG32(GMCON_PGFSM_WRITE, 0);
5544 
5545 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5546 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5547 
5548 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5549 	WREG32(GMCON_MISC, save->gmcon_misc);
5550 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5551 }
5552 
5553 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5554 {
5555 	struct evergreen_mc_save save;
5556 	struct kv_reset_save_regs kv_save = { 0 };
5557 	u32 tmp, i;
5558 
5559 	dev_info(rdev->dev, "GPU pci config reset\n");
5560 
5561 	/* disable dpm? */
5562 
5563 	/* disable cg/pg */
5564 	cik_fini_pg(rdev);
5565 	cik_fini_cg(rdev);
5566 
5567 	/* Disable GFX parsing/prefetching */
5568 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5569 
5570 	/* Disable MEC parsing/prefetching */
5571 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5572 
5573 	/* sdma0 */
5574 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5575 	tmp |= SDMA_HALT;
5576 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5577 	/* sdma1 */
5578 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5579 	tmp |= SDMA_HALT;
5580 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5581 	/* XXX other engines? */
5582 
5583 	/* halt the rlc, disable cp internal ints */
5584 	cik_rlc_stop(rdev);
5585 
5586 	udelay(50);
5587 
5588 	/* disable mem access */
5589 	evergreen_mc_stop(rdev, &save);
5590 	if (evergreen_mc_wait_for_idle(rdev)) {
5591 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5592 	}
5593 
5594 	if (rdev->flags & RADEON_IS_IGP)
5595 		kv_save_regs_for_reset(rdev, &kv_save);
5596 
5597 	/* disable BM */
5598 	pci_clear_master(rdev->pdev);
5599 	/* reset */
5600 	radeon_pci_config_reset(rdev);
5601 
5602 	udelay(100);
5603 
5604 	/* wait for asic to come out of reset */
5605 	for (i = 0; i < rdev->usec_timeout; i++) {
5606 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5607 			break;
5608 		udelay(1);
5609 	}
5610 
5611 	/* does asic init need to be run first??? */
5612 	if (rdev->flags & RADEON_IS_IGP)
5613 		kv_restore_regs_for_reset(rdev, &kv_save);
5614 }
5615 
5616 /**
5617  * cik_asic_reset - soft reset GPU
5618  *
5619  * @rdev: radeon_device pointer
5620  *
5621  * Look up which blocks are hung and attempt
5622  * to reset them.
5623  * Returns 0 for success.
5624  */
5625 int cik_asic_reset(struct radeon_device *rdev)
5626 {
5627 	u32 reset_mask;
5628 
5629 	reset_mask = cik_gpu_check_soft_reset(rdev);
5630 
5631 	if (reset_mask)
5632 		r600_set_bios_scratch_engine_hung(rdev, true);
5633 
5634 	/* try soft reset */
5635 	cik_gpu_soft_reset(rdev, reset_mask);
5636 
5637 	reset_mask = cik_gpu_check_soft_reset(rdev);
5638 
5639 	/* try pci config reset */
5640 	if (reset_mask && radeon_hard_reset)
5641 		cik_gpu_pci_config_reset(rdev);
5642 
5643 	reset_mask = cik_gpu_check_soft_reset(rdev);
5644 
5645 	if (!reset_mask)
5646 		r600_set_bios_scratch_engine_hung(rdev, false);
5647 
5648 	return 0;
5649 }
5650 
5651 /**
5652  * cik_gfx_is_lockup - check if the 3D engine is locked up
5653  *
5654  * @rdev: radeon_device pointer
5655  * @ring: radeon_ring structure holding ring information
5656  *
5657  * Check if the 3D engine is locked up (CIK).
5658  * Returns true if the engine is locked, false if not.
5659  */
5660 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5661 {
5662 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5663 
5664 	if (!(reset_mask & (RADEON_RESET_GFX |
5665 			    RADEON_RESET_COMPUTE |
5666 			    RADEON_RESET_CP))) {
5667 		radeon_ring_lockup_update(rdev, ring);
5668 		return false;
5669 	}
5670 	return radeon_ring_test_lockup(rdev, ring);
5671 }
5672 
5673 /* MC */
5674 /**
5675  * cik_mc_program - program the GPU memory controller
5676  *
5677  * @rdev: radeon_device pointer
5678  *
5679  * Set the location of vram, gart, and AGP in the GPU's
5680  * physical address space (CIK).
5681  */
5682 static void cik_mc_program(struct radeon_device *rdev)
5683 {
5684 	struct evergreen_mc_save save;
5685 	u32 tmp;
5686 	int i, j;
5687 
5688 	/* Initialize HDP */
5689 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5690 		WREG32((0x2c14 + j), 0x00000000);
5691 		WREG32((0x2c18 + j), 0x00000000);
5692 		WREG32((0x2c1c + j), 0x00000000);
5693 		WREG32((0x2c20 + j), 0x00000000);
5694 		WREG32((0x2c24 + j), 0x00000000);
5695 	}
5696 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5697 
5698 	evergreen_mc_stop(rdev, &save);
5699 	if (radeon_mc_wait_for_idle(rdev)) {
5700 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5701 	}
5702 	/* Lockout access through VGA aperture*/
5703 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5704 	/* Update configuration */
5705 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5706 	       rdev->mc.vram_start >> 12);
5707 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5708 	       rdev->mc.vram_end >> 12);
5709 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5710 	       rdev->vram_scratch.gpu_addr >> 12);
5711 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5712 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5713 	WREG32(MC_VM_FB_LOCATION, tmp);
5714 	/* XXX double check these! */
5715 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5716 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5717 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5718 	WREG32(MC_VM_AGP_BASE, 0);
5719 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5720 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5721 	if (radeon_mc_wait_for_idle(rdev)) {
5722 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5723 	}
5724 	evergreen_mc_resume(rdev, &save);
5725 	/* we need to own VRAM, so turn off the VGA renderer here
5726 	 * to stop it overwriting our objects */
5727 	rv515_vga_render_disable(rdev);
5728 }
5729 
5730 /**
5731  * cik_mc_init - initialize the memory controller driver params
5732  *
5733  * @rdev: radeon_device pointer
5734  *
5735  * Look up the amount of vram, vram width, and decide how to place
5736  * vram and gart within the GPU's physical address space (CIK).
5737  * Returns 0 for success.
5738  */
5739 static int cik_mc_init(struct radeon_device *rdev)
5740 {
5741 	u32 tmp;
5742 	int chansize, numchan;
5743 
5744 	/* Get VRAM informations */
5745 	rdev->mc.vram_is_ddr = true;
5746 	tmp = RREG32(MC_ARB_RAMCFG);
5747 	if (tmp & CHANSIZE_MASK) {
5748 		chansize = 64;
5749 	} else {
5750 		chansize = 32;
5751 	}
5752 	tmp = RREG32(MC_SHARED_CHMAP);
5753 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5754 	case 0:
5755 	default:
5756 		numchan = 1;
5757 		break;
5758 	case 1:
5759 		numchan = 2;
5760 		break;
5761 	case 2:
5762 		numchan = 4;
5763 		break;
5764 	case 3:
5765 		numchan = 8;
5766 		break;
5767 	case 4:
5768 		numchan = 3;
5769 		break;
5770 	case 5:
5771 		numchan = 6;
5772 		break;
5773 	case 6:
5774 		numchan = 10;
5775 		break;
5776 	case 7:
5777 		numchan = 12;
5778 		break;
5779 	case 8:
5780 		numchan = 16;
5781 		break;
5782 	}
5783 	rdev->mc.vram_width = numchan * chansize;
5784 	/* Could aper size report 0 ? */
5785 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5786 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5787 	/* size in MB on si */
5788 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5789 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5790 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5791 	si_vram_gtt_location(rdev, &rdev->mc);
5792 	radeon_update_bandwidth_info(rdev);
5793 
5794 	return 0;
5795 }
5796 
5797 /*
5798  * GART
5799  * VMID 0 is the physical GPU addresses as used by the kernel.
5800  * VMIDs 1-15 are used for userspace clients and are handled
5801  * by the radeon vm/hsa code.
5802  */
5803 /**
5804  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5805  *
5806  * @rdev: radeon_device pointer
5807  *
5808  * Flush the TLB for the VMID 0 page table (CIK).
5809  */
5810 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5811 {
5812 	/* flush hdp cache */
5813 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5814 
5815 	/* bits 0-15 are the VM contexts0-15 */
5816 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5817 }
5818 
5819 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5820 {
5821 	int i;
5822 	uint32_t sh_mem_bases, sh_mem_config;
5823 
5824 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5825 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5826 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5827 
5828 	mutex_lock(&rdev->srbm_mutex);
5829 	for (i = 8; i < 16; i++) {
5830 		cik_srbm_select(rdev, 0, 0, 0, i);
5831 		/* CP and shaders */
5832 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5833 		WREG32(SH_MEM_APE1_BASE, 1);
5834 		WREG32(SH_MEM_APE1_LIMIT, 0);
5835 		WREG32(SH_MEM_BASES, sh_mem_bases);
5836 	}
5837 	cik_srbm_select(rdev, 0, 0, 0, 0);
5838 	mutex_unlock(&rdev->srbm_mutex);
5839 }
5840 
5841 /**
5842  * cik_pcie_gart_enable - gart enable
5843  *
5844  * @rdev: radeon_device pointer
5845  *
5846  * This sets up the TLBs, programs the page tables for VMID0,
5847  * sets up the hw for VMIDs 1-15 which are allocated on
5848  * demand, and sets up the global locations for the LDS, GDS,
5849  * and GPUVM for FSA64 clients (CIK).
5850  * Returns 0 for success, errors for failure.
5851  */
5852 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5853 {
5854 	int r, i;
5855 
5856 	if (rdev->gart.robj == NULL) {
5857 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5858 		return -EINVAL;
5859 	}
5860 	r = radeon_gart_table_vram_pin(rdev);
5861 	if (r)
5862 		return r;
5863 	/* Setup TLB control */
5864 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5865 	       (0xA << 7) |
5866 	       ENABLE_L1_TLB |
5867 	       ENABLE_L1_FRAGMENT_PROCESSING |
5868 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5869 	       ENABLE_ADVANCED_DRIVER_MODEL |
5870 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5871 	/* Setup L2 cache */
5872 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5873 	       ENABLE_L2_FRAGMENT_PROCESSING |
5874 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5875 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5876 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5877 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5878 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5879 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5880 	       BANK_SELECT(4) |
5881 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5882 	/* setup context0 */
5883 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5884 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5885 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5886 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5887 			(u32)(rdev->dummy_page.addr >> 12));
5888 	WREG32(VM_CONTEXT0_CNTL2, 0);
5889 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5890 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5891 
5892 	WREG32(0x15D4, 0);
5893 	WREG32(0x15D8, 0);
5894 	WREG32(0x15DC, 0);
5895 
5896 	/* restore context1-15 */
5897 	/* set vm size, must be a multiple of 4 */
5898 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5899 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5900 	for (i = 1; i < 16; i++) {
5901 		if (i < 8)
5902 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5903 			       rdev->vm_manager.saved_table_addr[i]);
5904 		else
5905 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5906 			       rdev->vm_manager.saved_table_addr[i]);
5907 	}
5908 
5909 	/* enable context1-15 */
5910 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5911 	       (u32)(rdev->dummy_page.addr >> 12));
5912 	WREG32(VM_CONTEXT1_CNTL2, 4);
5913 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5914 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5915 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5916 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5917 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5918 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5919 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5920 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5921 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5922 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5923 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5924 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5925 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5926 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5927 
5928 	if (rdev->family == CHIP_KAVERI) {
5929 		u32 tmp = RREG32(CHUB_CONTROL);
5930 		tmp &= ~BYPASS_VM;
5931 		WREG32(CHUB_CONTROL, tmp);
5932 	}
5933 
5934 	/* XXX SH_MEM regs */
5935 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5936 	mutex_lock(&rdev->srbm_mutex);
5937 	for (i = 0; i < 16; i++) {
5938 		cik_srbm_select(rdev, 0, 0, 0, i);
5939 		/* CP and shaders */
5940 		WREG32(SH_MEM_CONFIG, 0);
5941 		WREG32(SH_MEM_APE1_BASE, 1);
5942 		WREG32(SH_MEM_APE1_LIMIT, 0);
5943 		WREG32(SH_MEM_BASES, 0);
5944 		/* SDMA GFX */
5945 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5946 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5947 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5948 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5949 		/* XXX SDMA RLC - todo */
5950 	}
5951 	cik_srbm_select(rdev, 0, 0, 0, 0);
5952 	mutex_unlock(&rdev->srbm_mutex);
5953 
5954 	cik_pcie_init_compute_vmid(rdev);
5955 
5956 	cik_pcie_gart_tlb_flush(rdev);
5957 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5958 		 (unsigned)(rdev->mc.gtt_size >> 20),
5959 		 (unsigned long long)rdev->gart.table_addr);
5960 	rdev->gart.ready = true;
5961 	return 0;
5962 }
5963 
5964 /**
5965  * cik_pcie_gart_disable - gart disable
5966  *
5967  * @rdev: radeon_device pointer
5968  *
5969  * This disables all VM page table (CIK).
5970  */
5971 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5972 {
5973 	unsigned i;
5974 
5975 	for (i = 1; i < 16; ++i) {
5976 		uint32_t reg;
5977 		if (i < 8)
5978 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5979 		else
5980 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5981 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5982 	}
5983 
5984 	/* Disable all tables */
5985 	WREG32(VM_CONTEXT0_CNTL, 0);
5986 	WREG32(VM_CONTEXT1_CNTL, 0);
5987 	/* Setup TLB control */
5988 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5989 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5990 	/* Setup L2 cache */
5991 	WREG32(VM_L2_CNTL,
5992 	       ENABLE_L2_FRAGMENT_PROCESSING |
5993 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5994 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5995 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5996 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5997 	WREG32(VM_L2_CNTL2, 0);
5998 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5999 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
6000 	radeon_gart_table_vram_unpin(rdev);
6001 }
6002 
6003 /**
6004  * cik_pcie_gart_fini - vm fini callback
6005  *
6006  * @rdev: radeon_device pointer
6007  *
6008  * Tears down the driver GART/VM setup (CIK).
6009  */
6010 static void cik_pcie_gart_fini(struct radeon_device *rdev)
6011 {
6012 	cik_pcie_gart_disable(rdev);
6013 	radeon_gart_table_vram_free(rdev);
6014 	radeon_gart_fini(rdev);
6015 }
6016 
6017 /* vm parser */
6018 /**
6019  * cik_ib_parse - vm ib_parse callback
6020  *
6021  * @rdev: radeon_device pointer
6022  * @ib: indirect buffer pointer
6023  *
6024  * CIK uses hw IB checking so this is a nop (CIK).
6025  */
6026 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
6027 {
6028 	return 0;
6029 }
6030 
6031 /*
6032  * vm
6033  * VMID 0 is the physical GPU addresses as used by the kernel.
6034  * VMIDs 1-15 are used for userspace clients and are handled
6035  * by the radeon vm/hsa code.
6036  */
6037 /**
6038  * cik_vm_init - cik vm init callback
6039  *
6040  * @rdev: radeon_device pointer
6041  *
6042  * Inits cik specific vm parameters (number of VMs, base of vram for
6043  * VMIDs 1-15) (CIK).
6044  * Returns 0 for success.
6045  */
6046 int cik_vm_init(struct radeon_device *rdev)
6047 {
6048 	/*
6049 	 * number of VMs
6050 	 * VMID 0 is reserved for System
6051 	 * radeon graphics/compute will use VMIDs 1-7
6052 	 * amdkfd will use VMIDs 8-15
6053 	 */
6054 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6055 	/* base offset of vram pages */
6056 	if (rdev->flags & RADEON_IS_IGP) {
6057 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6058 		tmp <<= 22;
6059 		rdev->vm_manager.vram_base_offset = tmp;
6060 	} else
6061 		rdev->vm_manager.vram_base_offset = 0;
6062 
6063 	return 0;
6064 }
6065 
6066 /**
6067  * cik_vm_fini - cik vm fini callback
6068  *
6069  * @rdev: radeon_device pointer
6070  *
6071  * Tear down any asic specific VM setup (CIK).
6072  */
6073 void cik_vm_fini(struct radeon_device *rdev)
6074 {
6075 }
6076 
6077 /**
6078  * cik_vm_decode_fault - print human readable fault info
6079  *
6080  * @rdev: radeon_device pointer
6081  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6082  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6083  *
6084  * Print human readable fault information (CIK).
6085  */
6086 static void cik_vm_decode_fault(struct radeon_device *rdev,
6087 				u32 status, u32 addr, u32 mc_client)
6088 {
6089 	u32 mc_id;
6090 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6091 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6092 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6093 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6094 
6095 	if (rdev->family == CHIP_HAWAII)
6096 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6097 	else
6098 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6099 
6100 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6101 	       protections, vmid, addr,
6102 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6103 	       block, mc_client, mc_id);
6104 }
6105 
6106 /**
6107  * cik_vm_flush - cik vm flush using the CP
6108  *
6109  * @rdev: radeon_device pointer
6110  *
6111  * Update the page table base and flush the VM TLB
6112  * using the CP (CIK).
6113  */
6114 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6115 		  unsigned vm_id, uint64_t pd_addr)
6116 {
6117 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6118 
6119 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6120 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6121 				 WRITE_DATA_DST_SEL(0)));
6122 	if (vm_id < 8) {
6123 		radeon_ring_write(ring,
6124 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6125 	} else {
6126 		radeon_ring_write(ring,
6127 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6128 	}
6129 	radeon_ring_write(ring, 0);
6130 	radeon_ring_write(ring, pd_addr >> 12);
6131 
6132 	/* update SH_MEM_* regs */
6133 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6134 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6135 				 WRITE_DATA_DST_SEL(0)));
6136 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6137 	radeon_ring_write(ring, 0);
6138 	radeon_ring_write(ring, VMID(vm_id));
6139 
6140 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6141 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6142 				 WRITE_DATA_DST_SEL(0)));
6143 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6144 	radeon_ring_write(ring, 0);
6145 
6146 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6147 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6148 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6149 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6150 
6151 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6152 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6153 				 WRITE_DATA_DST_SEL(0)));
6154 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6155 	radeon_ring_write(ring, 0);
6156 	radeon_ring_write(ring, VMID(0));
6157 
6158 	/* HDP flush */
6159 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6160 
6161 	/* bits 0-15 are the VM contexts0-15 */
6162 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6163 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6164 				 WRITE_DATA_DST_SEL(0)));
6165 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6166 	radeon_ring_write(ring, 0);
6167 	radeon_ring_write(ring, 1 << vm_id);
6168 
6169 	/* wait for the invalidate to complete */
6170 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6171 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6172 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6173 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6174 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6175 	radeon_ring_write(ring, 0);
6176 	radeon_ring_write(ring, 0); /* ref */
6177 	radeon_ring_write(ring, 0); /* mask */
6178 	radeon_ring_write(ring, 0x20); /* poll interval */
6179 
6180 	/* compute doesn't have PFP */
6181 	if (usepfp) {
6182 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6183 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6184 		radeon_ring_write(ring, 0x0);
6185 	}
6186 }
6187 
6188 /*
6189  * RLC
6190  * The RLC is a multi-purpose microengine that handles a
6191  * variety of functions, the most important of which is
6192  * the interrupt controller.
6193  */
6194 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6195 					  bool enable)
6196 {
6197 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6198 
6199 	if (enable)
6200 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6201 	else
6202 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6203 	WREG32(CP_INT_CNTL_RING0, tmp);
6204 }
6205 
6206 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6207 {
6208 	u32 tmp;
6209 
6210 	tmp = RREG32(RLC_LB_CNTL);
6211 	if (enable)
6212 		tmp |= LOAD_BALANCE_ENABLE;
6213 	else
6214 		tmp &= ~LOAD_BALANCE_ENABLE;
6215 	WREG32(RLC_LB_CNTL, tmp);
6216 }
6217 
6218 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6219 {
6220 	u32 i, j, k;
6221 	u32 mask;
6222 
6223 	mutex_lock(&rdev->grbm_idx_mutex);
6224 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6225 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6226 			cik_select_se_sh(rdev, i, j);
6227 			for (k = 0; k < rdev->usec_timeout; k++) {
6228 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6229 					break;
6230 				udelay(1);
6231 			}
6232 		}
6233 	}
6234 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6235 	mutex_unlock(&rdev->grbm_idx_mutex);
6236 
6237 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6238 	for (k = 0; k < rdev->usec_timeout; k++) {
6239 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6240 			break;
6241 		udelay(1);
6242 	}
6243 }
6244 
6245 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6246 {
6247 	u32 tmp;
6248 
6249 	tmp = RREG32(RLC_CNTL);
6250 	if (tmp != rlc)
6251 		WREG32(RLC_CNTL, rlc);
6252 }
6253 
6254 static u32 cik_halt_rlc(struct radeon_device *rdev)
6255 {
6256 	u32 data, orig;
6257 
6258 	orig = data = RREG32(RLC_CNTL);
6259 
6260 	if (data & RLC_ENABLE) {
6261 		u32 i;
6262 
6263 		data &= ~RLC_ENABLE;
6264 		WREG32(RLC_CNTL, data);
6265 
6266 		for (i = 0; i < rdev->usec_timeout; i++) {
6267 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6268 				break;
6269 			udelay(1);
6270 		}
6271 
6272 		cik_wait_for_rlc_serdes(rdev);
6273 	}
6274 
6275 	return orig;
6276 }
6277 
6278 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6279 {
6280 	u32 tmp, i, mask;
6281 
6282 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6283 	WREG32(RLC_GPR_REG2, tmp);
6284 
6285 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6286 	for (i = 0; i < rdev->usec_timeout; i++) {
6287 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6288 			break;
6289 		udelay(1);
6290 	}
6291 
6292 	for (i = 0; i < rdev->usec_timeout; i++) {
6293 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6294 			break;
6295 		udelay(1);
6296 	}
6297 }
6298 
6299 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6300 {
6301 	u32 tmp;
6302 
6303 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6304 	WREG32(RLC_GPR_REG2, tmp);
6305 }
6306 
6307 /**
6308  * cik_rlc_stop - stop the RLC ME
6309  *
6310  * @rdev: radeon_device pointer
6311  *
6312  * Halt the RLC ME (MicroEngine) (CIK).
6313  */
6314 static void cik_rlc_stop(struct radeon_device *rdev)
6315 {
6316 	WREG32(RLC_CNTL, 0);
6317 
6318 	cik_enable_gui_idle_interrupt(rdev, false);
6319 
6320 	cik_wait_for_rlc_serdes(rdev);
6321 }
6322 
6323 /**
6324  * cik_rlc_start - start the RLC ME
6325  *
6326  * @rdev: radeon_device pointer
6327  *
6328  * Unhalt the RLC ME (MicroEngine) (CIK).
6329  */
6330 static void cik_rlc_start(struct radeon_device *rdev)
6331 {
6332 	WREG32(RLC_CNTL, RLC_ENABLE);
6333 
6334 	cik_enable_gui_idle_interrupt(rdev, true);
6335 
6336 	udelay(50);
6337 }
6338 
6339 /**
6340  * cik_rlc_resume - setup the RLC hw
6341  *
6342  * @rdev: radeon_device pointer
6343  *
6344  * Initialize the RLC registers, load the ucode,
6345  * and start the RLC (CIK).
6346  * Returns 0 for success, -EINVAL if the ucode is not available.
6347  */
6348 static int cik_rlc_resume(struct radeon_device *rdev)
6349 {
6350 	u32 i, size, tmp;
6351 
6352 	if (!rdev->rlc_fw)
6353 		return -EINVAL;
6354 
6355 	cik_rlc_stop(rdev);
6356 
6357 	/* disable CG */
6358 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6359 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6360 
6361 	si_rlc_reset(rdev);
6362 
6363 	cik_init_pg(rdev);
6364 
6365 	cik_init_cg(rdev);
6366 
6367 	WREG32(RLC_LB_CNTR_INIT, 0);
6368 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6369 
6370 	mutex_lock(&rdev->grbm_idx_mutex);
6371 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6372 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6373 	WREG32(RLC_LB_PARAMS, 0x00600408);
6374 	WREG32(RLC_LB_CNTL, 0x80000004);
6375 	mutex_unlock(&rdev->grbm_idx_mutex);
6376 
6377 	WREG32(RLC_MC_CNTL, 0);
6378 	WREG32(RLC_UCODE_CNTL, 0);
6379 
6380 	if (rdev->new_fw) {
6381 		const struct rlc_firmware_header_v1_0 *hdr =
6382 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6383 		const __le32 *fw_data = (const __le32 *)
6384 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6385 
6386 		radeon_ucode_print_rlc_hdr(&hdr->header);
6387 
6388 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6389 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6390 		for (i = 0; i < size; i++)
6391 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6392 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6393 	} else {
6394 		const __be32 *fw_data;
6395 
6396 		switch (rdev->family) {
6397 		case CHIP_BONAIRE:
6398 		case CHIP_HAWAII:
6399 		default:
6400 			size = BONAIRE_RLC_UCODE_SIZE;
6401 			break;
6402 		case CHIP_KAVERI:
6403 			size = KV_RLC_UCODE_SIZE;
6404 			break;
6405 		case CHIP_KABINI:
6406 			size = KB_RLC_UCODE_SIZE;
6407 			break;
6408 		case CHIP_MULLINS:
6409 			size = ML_RLC_UCODE_SIZE;
6410 			break;
6411 		}
6412 
6413 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6414 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6415 		for (i = 0; i < size; i++)
6416 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6417 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6418 	}
6419 
6420 	/* XXX - find out what chips support lbpw */
6421 	cik_enable_lbpw(rdev, false);
6422 
6423 	if (rdev->family == CHIP_BONAIRE)
6424 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6425 
6426 	cik_rlc_start(rdev);
6427 
6428 	return 0;
6429 }
6430 
6431 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6432 {
6433 	u32 data, orig, tmp, tmp2;
6434 
6435 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6436 
6437 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6438 		cik_enable_gui_idle_interrupt(rdev, true);
6439 
6440 		tmp = cik_halt_rlc(rdev);
6441 
6442 		mutex_lock(&rdev->grbm_idx_mutex);
6443 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6444 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6445 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6446 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6447 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6448 		mutex_unlock(&rdev->grbm_idx_mutex);
6449 
6450 		cik_update_rlc(rdev, tmp);
6451 
6452 		data |= CGCG_EN | CGLS_EN;
6453 	} else {
6454 		cik_enable_gui_idle_interrupt(rdev, false);
6455 
6456 		RREG32(CB_CGTT_SCLK_CTRL);
6457 		RREG32(CB_CGTT_SCLK_CTRL);
6458 		RREG32(CB_CGTT_SCLK_CTRL);
6459 		RREG32(CB_CGTT_SCLK_CTRL);
6460 
6461 		data &= ~(CGCG_EN | CGLS_EN);
6462 	}
6463 
6464 	if (orig != data)
6465 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6466 
6467 }
6468 
6469 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6470 {
6471 	u32 data, orig, tmp = 0;
6472 
6473 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6474 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6475 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6476 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6477 				data |= CP_MEM_LS_EN;
6478 				if (orig != data)
6479 					WREG32(CP_MEM_SLP_CNTL, data);
6480 			}
6481 		}
6482 
6483 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6484 		data |= 0x00000001;
6485 		data &= 0xfffffffd;
6486 		if (orig != data)
6487 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6488 
6489 		tmp = cik_halt_rlc(rdev);
6490 
6491 		mutex_lock(&rdev->grbm_idx_mutex);
6492 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6493 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6494 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6495 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6496 		WREG32(RLC_SERDES_WR_CTRL, data);
6497 		mutex_unlock(&rdev->grbm_idx_mutex);
6498 
6499 		cik_update_rlc(rdev, tmp);
6500 
6501 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6502 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6503 			data &= ~SM_MODE_MASK;
6504 			data |= SM_MODE(0x2);
6505 			data |= SM_MODE_ENABLE;
6506 			data &= ~CGTS_OVERRIDE;
6507 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6508 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6509 				data &= ~CGTS_LS_OVERRIDE;
6510 			data &= ~ON_MONITOR_ADD_MASK;
6511 			data |= ON_MONITOR_ADD_EN;
6512 			data |= ON_MONITOR_ADD(0x96);
6513 			if (orig != data)
6514 				WREG32(CGTS_SM_CTRL_REG, data);
6515 		}
6516 	} else {
6517 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6518 		data |= 0x00000003;
6519 		if (orig != data)
6520 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6521 
6522 		data = RREG32(RLC_MEM_SLP_CNTL);
6523 		if (data & RLC_MEM_LS_EN) {
6524 			data &= ~RLC_MEM_LS_EN;
6525 			WREG32(RLC_MEM_SLP_CNTL, data);
6526 		}
6527 
6528 		data = RREG32(CP_MEM_SLP_CNTL);
6529 		if (data & CP_MEM_LS_EN) {
6530 			data &= ~CP_MEM_LS_EN;
6531 			WREG32(CP_MEM_SLP_CNTL, data);
6532 		}
6533 
6534 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6535 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6536 		if (orig != data)
6537 			WREG32(CGTS_SM_CTRL_REG, data);
6538 
6539 		tmp = cik_halt_rlc(rdev);
6540 
6541 		mutex_lock(&rdev->grbm_idx_mutex);
6542 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6543 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6544 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6545 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6546 		WREG32(RLC_SERDES_WR_CTRL, data);
6547 		mutex_unlock(&rdev->grbm_idx_mutex);
6548 
6549 		cik_update_rlc(rdev, tmp);
6550 	}
6551 }
6552 
6553 static const u32 mc_cg_registers[] =
6554 {
6555 	MC_HUB_MISC_HUB_CG,
6556 	MC_HUB_MISC_SIP_CG,
6557 	MC_HUB_MISC_VM_CG,
6558 	MC_XPB_CLK_GAT,
6559 	ATC_MISC_CG,
6560 	MC_CITF_MISC_WR_CG,
6561 	MC_CITF_MISC_RD_CG,
6562 	MC_CITF_MISC_VM_CG,
6563 	VM_L2_CG,
6564 };
6565 
6566 static void cik_enable_mc_ls(struct radeon_device *rdev,
6567 			     bool enable)
6568 {
6569 	int i;
6570 	u32 orig, data;
6571 
6572 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6573 		orig = data = RREG32(mc_cg_registers[i]);
6574 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6575 			data |= MC_LS_ENABLE;
6576 		else
6577 			data &= ~MC_LS_ENABLE;
6578 		if (data != orig)
6579 			WREG32(mc_cg_registers[i], data);
6580 	}
6581 }
6582 
6583 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6584 			       bool enable)
6585 {
6586 	int i;
6587 	u32 orig, data;
6588 
6589 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6590 		orig = data = RREG32(mc_cg_registers[i]);
6591 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6592 			data |= MC_CG_ENABLE;
6593 		else
6594 			data &= ~MC_CG_ENABLE;
6595 		if (data != orig)
6596 			WREG32(mc_cg_registers[i], data);
6597 	}
6598 }
6599 
6600 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6601 				 bool enable)
6602 {
6603 	u32 orig, data;
6604 
6605 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6606 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6607 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6608 	} else {
6609 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6610 		data |= 0xff000000;
6611 		if (data != orig)
6612 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6613 
6614 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6615 		data |= 0xff000000;
6616 		if (data != orig)
6617 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6618 	}
6619 }
6620 
6621 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6622 				 bool enable)
6623 {
6624 	u32 orig, data;
6625 
6626 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6627 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6628 		data |= 0x100;
6629 		if (orig != data)
6630 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6631 
6632 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6633 		data |= 0x100;
6634 		if (orig != data)
6635 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6636 	} else {
6637 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6638 		data &= ~0x100;
6639 		if (orig != data)
6640 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6641 
6642 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6643 		data &= ~0x100;
6644 		if (orig != data)
6645 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6646 	}
6647 }
6648 
6649 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6650 				bool enable)
6651 {
6652 	u32 orig, data;
6653 
6654 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6655 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6656 		data = 0xfff;
6657 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6658 
6659 		orig = data = RREG32(UVD_CGC_CTRL);
6660 		data |= DCM;
6661 		if (orig != data)
6662 			WREG32(UVD_CGC_CTRL, data);
6663 	} else {
6664 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6665 		data &= ~0xfff;
6666 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6667 
6668 		orig = data = RREG32(UVD_CGC_CTRL);
6669 		data &= ~DCM;
6670 		if (orig != data)
6671 			WREG32(UVD_CGC_CTRL, data);
6672 	}
6673 }
6674 
6675 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6676 			       bool enable)
6677 {
6678 	u32 orig, data;
6679 
6680 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6681 
6682 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6683 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6684 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6685 	else
6686 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6687 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6688 
6689 	if (orig != data)
6690 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6691 }
6692 
6693 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6694 				bool enable)
6695 {
6696 	u32 orig, data;
6697 
6698 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6699 
6700 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6701 		data &= ~CLOCK_GATING_DIS;
6702 	else
6703 		data |= CLOCK_GATING_DIS;
6704 
6705 	if (orig != data)
6706 		WREG32(HDP_HOST_PATH_CNTL, data);
6707 }
6708 
6709 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6710 			      bool enable)
6711 {
6712 	u32 orig, data;
6713 
6714 	orig = data = RREG32(HDP_MEM_POWER_LS);
6715 
6716 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6717 		data |= HDP_LS_ENABLE;
6718 	else
6719 		data &= ~HDP_LS_ENABLE;
6720 
6721 	if (orig != data)
6722 		WREG32(HDP_MEM_POWER_LS, data);
6723 }
6724 
6725 void cik_update_cg(struct radeon_device *rdev,
6726 		   u32 block, bool enable)
6727 {
6728 
6729 	if (block & RADEON_CG_BLOCK_GFX) {
6730 		cik_enable_gui_idle_interrupt(rdev, false);
6731 		/* order matters! */
6732 		if (enable) {
6733 			cik_enable_mgcg(rdev, true);
6734 			cik_enable_cgcg(rdev, true);
6735 		} else {
6736 			cik_enable_cgcg(rdev, false);
6737 			cik_enable_mgcg(rdev, false);
6738 		}
6739 		cik_enable_gui_idle_interrupt(rdev, true);
6740 	}
6741 
6742 	if (block & RADEON_CG_BLOCK_MC) {
6743 		if (!(rdev->flags & RADEON_IS_IGP)) {
6744 			cik_enable_mc_mgcg(rdev, enable);
6745 			cik_enable_mc_ls(rdev, enable);
6746 		}
6747 	}
6748 
6749 	if (block & RADEON_CG_BLOCK_SDMA) {
6750 		cik_enable_sdma_mgcg(rdev, enable);
6751 		cik_enable_sdma_mgls(rdev, enable);
6752 	}
6753 
6754 	if (block & RADEON_CG_BLOCK_BIF) {
6755 		cik_enable_bif_mgls(rdev, enable);
6756 	}
6757 
6758 	if (block & RADEON_CG_BLOCK_UVD) {
6759 		if (rdev->has_uvd)
6760 			cik_enable_uvd_mgcg(rdev, enable);
6761 	}
6762 
6763 	if (block & RADEON_CG_BLOCK_HDP) {
6764 		cik_enable_hdp_mgcg(rdev, enable);
6765 		cik_enable_hdp_ls(rdev, enable);
6766 	}
6767 
6768 	if (block & RADEON_CG_BLOCK_VCE) {
6769 		vce_v2_0_enable_mgcg(rdev, enable);
6770 	}
6771 }
6772 
6773 static void cik_init_cg(struct radeon_device *rdev)
6774 {
6775 
6776 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6777 
6778 	if (rdev->has_uvd)
6779 		si_init_uvd_internal_cg(rdev);
6780 
6781 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6782 			     RADEON_CG_BLOCK_SDMA |
6783 			     RADEON_CG_BLOCK_BIF |
6784 			     RADEON_CG_BLOCK_UVD |
6785 			     RADEON_CG_BLOCK_HDP), true);
6786 }
6787 
6788 static void cik_fini_cg(struct radeon_device *rdev)
6789 {
6790 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6791 			     RADEON_CG_BLOCK_SDMA |
6792 			     RADEON_CG_BLOCK_BIF |
6793 			     RADEON_CG_BLOCK_UVD |
6794 			     RADEON_CG_BLOCK_HDP), false);
6795 
6796 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6797 }
6798 
6799 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6800 					  bool enable)
6801 {
6802 	u32 data, orig;
6803 
6804 	orig = data = RREG32(RLC_PG_CNTL);
6805 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6806 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6807 	else
6808 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6809 	if (orig != data)
6810 		WREG32(RLC_PG_CNTL, data);
6811 }
6812 
6813 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6814 					  bool enable)
6815 {
6816 	u32 data, orig;
6817 
6818 	orig = data = RREG32(RLC_PG_CNTL);
6819 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6820 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6821 	else
6822 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6823 	if (orig != data)
6824 		WREG32(RLC_PG_CNTL, data);
6825 }
6826 
6827 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6828 {
6829 	u32 data, orig;
6830 
6831 	orig = data = RREG32(RLC_PG_CNTL);
6832 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6833 		data &= ~DISABLE_CP_PG;
6834 	else
6835 		data |= DISABLE_CP_PG;
6836 	if (orig != data)
6837 		WREG32(RLC_PG_CNTL, data);
6838 }
6839 
6840 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6841 {
6842 	u32 data, orig;
6843 
6844 	orig = data = RREG32(RLC_PG_CNTL);
6845 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6846 		data &= ~DISABLE_GDS_PG;
6847 	else
6848 		data |= DISABLE_GDS_PG;
6849 	if (orig != data)
6850 		WREG32(RLC_PG_CNTL, data);
6851 }
6852 
6853 #define CP_ME_TABLE_SIZE    96
6854 #define CP_ME_TABLE_OFFSET  2048
6855 #define CP_MEC_TABLE_OFFSET 4096
6856 
6857 void cik_init_cp_pg_table(struct radeon_device *rdev)
6858 {
6859 	volatile u32 *dst_ptr;
6860 	int me, i, max_me = 4;
6861 	u32 bo_offset = 0;
6862 	u32 table_offset, table_size;
6863 
6864 	if (rdev->family == CHIP_KAVERI)
6865 		max_me = 5;
6866 
6867 	if (rdev->rlc.cp_table_ptr == NULL)
6868 		return;
6869 
6870 	/* write the cp table buffer */
6871 	dst_ptr = rdev->rlc.cp_table_ptr;
6872 	for (me = 0; me < max_me; me++) {
6873 		if (rdev->new_fw) {
6874 			const __le32 *fw_data;
6875 			const struct gfx_firmware_header_v1_0 *hdr;
6876 
6877 			if (me == 0) {
6878 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6879 				fw_data = (const __le32 *)
6880 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6881 				table_offset = le32_to_cpu(hdr->jt_offset);
6882 				table_size = le32_to_cpu(hdr->jt_size);
6883 			} else if (me == 1) {
6884 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6885 				fw_data = (const __le32 *)
6886 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6887 				table_offset = le32_to_cpu(hdr->jt_offset);
6888 				table_size = le32_to_cpu(hdr->jt_size);
6889 			} else if (me == 2) {
6890 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6891 				fw_data = (const __le32 *)
6892 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6893 				table_offset = le32_to_cpu(hdr->jt_offset);
6894 				table_size = le32_to_cpu(hdr->jt_size);
6895 			} else if (me == 3) {
6896 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6897 				fw_data = (const __le32 *)
6898 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6899 				table_offset = le32_to_cpu(hdr->jt_offset);
6900 				table_size = le32_to_cpu(hdr->jt_size);
6901 			} else {
6902 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6903 				fw_data = (const __le32 *)
6904 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6905 				table_offset = le32_to_cpu(hdr->jt_offset);
6906 				table_size = le32_to_cpu(hdr->jt_size);
6907 			}
6908 
6909 			for (i = 0; i < table_size; i ++) {
6910 				dst_ptr[bo_offset + i] =
6911 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6912 			}
6913 			bo_offset += table_size;
6914 		} else {
6915 			const __be32 *fw_data;
6916 			table_size = CP_ME_TABLE_SIZE;
6917 
6918 			if (me == 0) {
6919 				fw_data = (const __be32 *)rdev->ce_fw->data;
6920 				table_offset = CP_ME_TABLE_OFFSET;
6921 			} else if (me == 1) {
6922 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6923 				table_offset = CP_ME_TABLE_OFFSET;
6924 			} else if (me == 2) {
6925 				fw_data = (const __be32 *)rdev->me_fw->data;
6926 				table_offset = CP_ME_TABLE_OFFSET;
6927 			} else {
6928 				fw_data = (const __be32 *)rdev->mec_fw->data;
6929 				table_offset = CP_MEC_TABLE_OFFSET;
6930 			}
6931 
6932 			for (i = 0; i < table_size; i ++) {
6933 				dst_ptr[bo_offset + i] =
6934 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6935 			}
6936 			bo_offset += table_size;
6937 		}
6938 	}
6939 }
6940 
6941 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6942 				bool enable)
6943 {
6944 	u32 data, orig;
6945 
6946 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6947 		orig = data = RREG32(RLC_PG_CNTL);
6948 		data |= GFX_PG_ENABLE;
6949 		if (orig != data)
6950 			WREG32(RLC_PG_CNTL, data);
6951 
6952 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6953 		data |= AUTO_PG_EN;
6954 		if (orig != data)
6955 			WREG32(RLC_AUTO_PG_CTRL, data);
6956 	} else {
6957 		orig = data = RREG32(RLC_PG_CNTL);
6958 		data &= ~GFX_PG_ENABLE;
6959 		if (orig != data)
6960 			WREG32(RLC_PG_CNTL, data);
6961 
6962 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6963 		data &= ~AUTO_PG_EN;
6964 		if (orig != data)
6965 			WREG32(RLC_AUTO_PG_CTRL, data);
6966 
6967 		data = RREG32(DB_RENDER_CONTROL);
6968 	}
6969 }
6970 
6971 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6972 {
6973 	u32 mask = 0, tmp, tmp1;
6974 	int i;
6975 
6976 	mutex_lock(&rdev->grbm_idx_mutex);
6977 	cik_select_se_sh(rdev, se, sh);
6978 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6979 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6980 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6981 	mutex_unlock(&rdev->grbm_idx_mutex);
6982 
6983 	tmp &= 0xffff0000;
6984 
6985 	tmp |= tmp1;
6986 	tmp >>= 16;
6987 
6988 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6989 		mask <<= 1;
6990 		mask |= 1;
6991 	}
6992 
6993 	return (~tmp) & mask;
6994 }
6995 
6996 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6997 {
6998 	u32 i, j, k, active_cu_number = 0;
6999 	u32 mask, counter, cu_bitmap;
7000 	u32 tmp = 0;
7001 
7002 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
7003 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
7004 			mask = 1;
7005 			cu_bitmap = 0;
7006 			counter = 0;
7007 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
7008 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
7009 					if (counter < 2)
7010 						cu_bitmap |= mask;
7011 					counter ++;
7012 				}
7013 				mask <<= 1;
7014 			}
7015 
7016 			active_cu_number += counter;
7017 			tmp |= (cu_bitmap << (i * 16 + j * 8));
7018 		}
7019 	}
7020 
7021 	WREG32(RLC_PG_AO_CU_MASK, tmp);
7022 
7023 	tmp = RREG32(RLC_MAX_PG_CU);
7024 	tmp &= ~MAX_PU_CU_MASK;
7025 	tmp |= MAX_PU_CU(active_cu_number);
7026 	WREG32(RLC_MAX_PG_CU, tmp);
7027 }
7028 
7029 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
7030 				       bool enable)
7031 {
7032 	u32 data, orig;
7033 
7034 	orig = data = RREG32(RLC_PG_CNTL);
7035 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7036 		data |= STATIC_PER_CU_PG_ENABLE;
7037 	else
7038 		data &= ~STATIC_PER_CU_PG_ENABLE;
7039 	if (orig != data)
7040 		WREG32(RLC_PG_CNTL, data);
7041 }
7042 
7043 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7044 					bool enable)
7045 {
7046 	u32 data, orig;
7047 
7048 	orig = data = RREG32(RLC_PG_CNTL);
7049 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7050 		data |= DYN_PER_CU_PG_ENABLE;
7051 	else
7052 		data &= ~DYN_PER_CU_PG_ENABLE;
7053 	if (orig != data)
7054 		WREG32(RLC_PG_CNTL, data);
7055 }
7056 
7057 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7058 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7059 
7060 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7061 {
7062 	u32 data, orig;
7063 	u32 i;
7064 
7065 	if (rdev->rlc.cs_data) {
7066 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7067 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7068 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7069 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7070 	} else {
7071 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7072 		for (i = 0; i < 3; i++)
7073 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7074 	}
7075 	if (rdev->rlc.reg_list) {
7076 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7077 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7078 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7079 	}
7080 
7081 	orig = data = RREG32(RLC_PG_CNTL);
7082 	data |= GFX_PG_SRC;
7083 	if (orig != data)
7084 		WREG32(RLC_PG_CNTL, data);
7085 
7086 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7087 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7088 
7089 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7090 	data &= ~IDLE_POLL_COUNT_MASK;
7091 	data |= IDLE_POLL_COUNT(0x60);
7092 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7093 
7094 	data = 0x10101010;
7095 	WREG32(RLC_PG_DELAY, data);
7096 
7097 	data = RREG32(RLC_PG_DELAY_2);
7098 	data &= ~0xff;
7099 	data |= 0x3;
7100 	WREG32(RLC_PG_DELAY_2, data);
7101 
7102 	data = RREG32(RLC_AUTO_PG_CTRL);
7103 	data &= ~GRBM_REG_SGIT_MASK;
7104 	data |= GRBM_REG_SGIT(0x700);
7105 	WREG32(RLC_AUTO_PG_CTRL, data);
7106 
7107 }
7108 
7109 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7110 {
7111 	cik_enable_gfx_cgpg(rdev, enable);
7112 	cik_enable_gfx_static_mgpg(rdev, enable);
7113 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7114 }
7115 
7116 u32 cik_get_csb_size(struct radeon_device *rdev)
7117 {
7118 	u32 count = 0;
7119 	const struct cs_section_def *sect = NULL;
7120 	const struct cs_extent_def *ext = NULL;
7121 
7122 	if (rdev->rlc.cs_data == NULL)
7123 		return 0;
7124 
7125 	/* begin clear state */
7126 	count += 2;
7127 	/* context control state */
7128 	count += 3;
7129 
7130 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7131 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7132 			if (sect->id == SECT_CONTEXT)
7133 				count += 2 + ext->reg_count;
7134 			else
7135 				return 0;
7136 		}
7137 	}
7138 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7139 	count += 4;
7140 	/* end clear state */
7141 	count += 2;
7142 	/* clear state */
7143 	count += 2;
7144 
7145 	return count;
7146 }
7147 
7148 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7149 {
7150 	u32 count = 0, i;
7151 	const struct cs_section_def *sect = NULL;
7152 	const struct cs_extent_def *ext = NULL;
7153 
7154 	if (rdev->rlc.cs_data == NULL)
7155 		return;
7156 	if (buffer == NULL)
7157 		return;
7158 
7159 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7160 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7161 
7162 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7163 	buffer[count++] = cpu_to_le32(0x80000000);
7164 	buffer[count++] = cpu_to_le32(0x80000000);
7165 
7166 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7167 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7168 			if (sect->id == SECT_CONTEXT) {
7169 				buffer[count++] =
7170 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7171 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7172 				for (i = 0; i < ext->reg_count; i++)
7173 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7174 			} else {
7175 				return;
7176 			}
7177 		}
7178 	}
7179 
7180 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7181 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7182 	switch (rdev->family) {
7183 	case CHIP_BONAIRE:
7184 		buffer[count++] = cpu_to_le32(0x16000012);
7185 		buffer[count++] = cpu_to_le32(0x00000000);
7186 		break;
7187 	case CHIP_KAVERI:
7188 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7189 		buffer[count++] = cpu_to_le32(0x00000000);
7190 		break;
7191 	case CHIP_KABINI:
7192 	case CHIP_MULLINS:
7193 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7194 		buffer[count++] = cpu_to_le32(0x00000000);
7195 		break;
7196 	case CHIP_HAWAII:
7197 		buffer[count++] = cpu_to_le32(0x3a00161a);
7198 		buffer[count++] = cpu_to_le32(0x0000002e);
7199 		break;
7200 	default:
7201 		buffer[count++] = cpu_to_le32(0x00000000);
7202 		buffer[count++] = cpu_to_le32(0x00000000);
7203 		break;
7204 	}
7205 
7206 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7207 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7208 
7209 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7210 	buffer[count++] = cpu_to_le32(0);
7211 }
7212 
7213 static void cik_init_pg(struct radeon_device *rdev)
7214 {
7215 	if (rdev->pg_flags) {
7216 		cik_enable_sck_slowdown_on_pu(rdev, true);
7217 		cik_enable_sck_slowdown_on_pd(rdev, true);
7218 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7219 			cik_init_gfx_cgpg(rdev);
7220 			cik_enable_cp_pg(rdev, true);
7221 			cik_enable_gds_pg(rdev, true);
7222 		}
7223 		cik_init_ao_cu_mask(rdev);
7224 		cik_update_gfx_pg(rdev, true);
7225 	}
7226 }
7227 
7228 static void cik_fini_pg(struct radeon_device *rdev)
7229 {
7230 	if (rdev->pg_flags) {
7231 		cik_update_gfx_pg(rdev, false);
7232 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7233 			cik_enable_cp_pg(rdev, false);
7234 			cik_enable_gds_pg(rdev, false);
7235 		}
7236 	}
7237 }
7238 
7239 /*
7240  * Interrupts
7241  * Starting with r6xx, interrupts are handled via a ring buffer.
7242  * Ring buffers are areas of GPU accessible memory that the GPU
7243  * writes interrupt vectors into and the host reads vectors out of.
7244  * There is a rptr (read pointer) that determines where the
7245  * host is currently reading, and a wptr (write pointer)
7246  * which determines where the GPU has written.  When the
7247  * pointers are equal, the ring is idle.  When the GPU
7248  * writes vectors to the ring buffer, it increments the
7249  * wptr.  When there is an interrupt, the host then starts
7250  * fetching commands and processing them until the pointers are
7251  * equal again at which point it updates the rptr.
7252  */
7253 
7254 /**
7255  * cik_enable_interrupts - Enable the interrupt ring buffer
7256  *
7257  * @rdev: radeon_device pointer
7258  *
7259  * Enable the interrupt ring buffer (CIK).
7260  */
7261 static void cik_enable_interrupts(struct radeon_device *rdev)
7262 {
7263 	u32 ih_cntl = RREG32(IH_CNTL);
7264 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7265 
7266 	ih_cntl |= ENABLE_INTR;
7267 	ih_rb_cntl |= IH_RB_ENABLE;
7268 	WREG32(IH_CNTL, ih_cntl);
7269 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7270 	rdev->ih.enabled = true;
7271 }
7272 
7273 /**
7274  * cik_disable_interrupts - Disable the interrupt ring buffer
7275  *
7276  * @rdev: radeon_device pointer
7277  *
7278  * Disable the interrupt ring buffer (CIK).
7279  */
7280 static void cik_disable_interrupts(struct radeon_device *rdev)
7281 {
7282 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7283 	u32 ih_cntl = RREG32(IH_CNTL);
7284 
7285 	ih_rb_cntl &= ~IH_RB_ENABLE;
7286 	ih_cntl &= ~ENABLE_INTR;
7287 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7288 	WREG32(IH_CNTL, ih_cntl);
7289 	/* set rptr, wptr to 0 */
7290 	WREG32(IH_RB_RPTR, 0);
7291 	WREG32(IH_RB_WPTR, 0);
7292 	rdev->ih.enabled = false;
7293 	rdev->ih.rptr = 0;
7294 }
7295 
7296 /**
7297  * cik_disable_interrupt_state - Disable all interrupt sources
7298  *
7299  * @rdev: radeon_device pointer
7300  *
7301  * Clear all interrupt enable bits used by the driver (CIK).
7302  */
7303 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7304 {
7305 	u32 tmp;
7306 
7307 	/* gfx ring */
7308 	tmp = RREG32(CP_INT_CNTL_RING0) &
7309 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7310 	WREG32(CP_INT_CNTL_RING0, tmp);
7311 	/* sdma */
7312 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7313 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7314 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7315 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7316 	/* compute queues */
7317 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7318 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7319 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7320 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7321 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7322 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7323 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7324 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7325 	/* grbm */
7326 	WREG32(GRBM_INT_CNTL, 0);
7327 	/* SRBM */
7328 	WREG32(SRBM_INT_CNTL, 0);
7329 	/* vline/vblank, etc. */
7330 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7331 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7332 	if (rdev->num_crtc >= 4) {
7333 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7334 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7335 	}
7336 	if (rdev->num_crtc >= 6) {
7337 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7338 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7339 	}
7340 	/* pflip */
7341 	if (rdev->num_crtc >= 2) {
7342 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7343 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7344 	}
7345 	if (rdev->num_crtc >= 4) {
7346 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7347 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7348 	}
7349 	if (rdev->num_crtc >= 6) {
7350 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7351 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7352 	}
7353 
7354 	/* dac hotplug */
7355 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7356 
7357 	/* digital hotplug */
7358 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7359 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7360 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7361 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7362 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7363 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7364 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7365 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7366 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7367 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7368 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7369 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7370 
7371 }
7372 
7373 /**
7374  * cik_irq_init - init and enable the interrupt ring
7375  *
7376  * @rdev: radeon_device pointer
7377  *
7378  * Allocate a ring buffer for the interrupt controller,
7379  * enable the RLC, disable interrupts, enable the IH
7380  * ring buffer and enable it (CIK).
7381  * Called at device load and reume.
7382  * Returns 0 for success, errors for failure.
7383  */
7384 static int cik_irq_init(struct radeon_device *rdev)
7385 {
7386 	int ret = 0;
7387 	int rb_bufsz;
7388 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7389 
7390 	/* allocate ring */
7391 	ret = r600_ih_ring_alloc(rdev);
7392 	if (ret)
7393 		return ret;
7394 
7395 	/* disable irqs */
7396 	cik_disable_interrupts(rdev);
7397 
7398 	/* init rlc */
7399 	ret = cik_rlc_resume(rdev);
7400 	if (ret) {
7401 		r600_ih_ring_fini(rdev);
7402 		return ret;
7403 	}
7404 
7405 	/* setup interrupt control */
7406 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7407 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7408 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7409 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7410 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7411 	 */
7412 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7413 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7414 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7415 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7416 
7417 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7418 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7419 
7420 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7421 		      IH_WPTR_OVERFLOW_CLEAR |
7422 		      (rb_bufsz << 1));
7423 
7424 	if (rdev->wb.enabled)
7425 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7426 
7427 	/* set the writeback address whether it's enabled or not */
7428 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7429 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7430 
7431 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7432 
7433 	/* set rptr, wptr to 0 */
7434 	WREG32(IH_RB_RPTR, 0);
7435 	WREG32(IH_RB_WPTR, 0);
7436 
7437 	/* Default settings for IH_CNTL (disabled at first) */
7438 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7439 	/* RPTR_REARM only works if msi's are enabled */
7440 	if (rdev->msi_enabled)
7441 		ih_cntl |= RPTR_REARM;
7442 	WREG32(IH_CNTL, ih_cntl);
7443 
7444 	/* force the active interrupt state to all disabled */
7445 	cik_disable_interrupt_state(rdev);
7446 
7447 	pci_set_master(rdev->pdev);
7448 
7449 	/* enable irqs */
7450 	cik_enable_interrupts(rdev);
7451 
7452 	return ret;
7453 }
7454 
7455 /**
7456  * cik_irq_set - enable/disable interrupt sources
7457  *
7458  * @rdev: radeon_device pointer
7459  *
7460  * Enable interrupt sources on the GPU (vblanks, hpd,
7461  * etc.) (CIK).
7462  * Returns 0 for success, errors for failure.
7463  */
7464 int cik_irq_set(struct radeon_device *rdev)
7465 {
7466 	u32 cp_int_cntl;
7467 	u32 cp_m1p0;
7468 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7469 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7470 	u32 grbm_int_cntl = 0;
7471 	u32 dma_cntl, dma_cntl1;
7472 
7473 	if (!rdev->irq.installed) {
7474 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7475 		return -EINVAL;
7476 	}
7477 	/* don't enable anything if the ih is disabled */
7478 	if (!rdev->ih.enabled) {
7479 		cik_disable_interrupts(rdev);
7480 		/* force the active interrupt state to all disabled */
7481 		cik_disable_interrupt_state(rdev);
7482 		return 0;
7483 	}
7484 
7485 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7486 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7487 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7488 
7489 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7490 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7491 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7492 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7493 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7494 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7495 
7496 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7497 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7498 
7499 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7500 
7501 	/* enable CP interrupts on all rings */
7502 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7503 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7504 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7505 	}
7506 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7507 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7508 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7509 		if (ring->me == 1) {
7510 			switch (ring->pipe) {
7511 			case 0:
7512 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7513 				break;
7514 			default:
7515 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7516 				break;
7517 			}
7518 		} else {
7519 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7520 		}
7521 	}
7522 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7523 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7524 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7525 		if (ring->me == 1) {
7526 			switch (ring->pipe) {
7527 			case 0:
7528 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7529 				break;
7530 			default:
7531 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7532 				break;
7533 			}
7534 		} else {
7535 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7536 		}
7537 	}
7538 
7539 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7540 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7541 		dma_cntl |= TRAP_ENABLE;
7542 	}
7543 
7544 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7545 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7546 		dma_cntl1 |= TRAP_ENABLE;
7547 	}
7548 
7549 	if (rdev->irq.crtc_vblank_int[0] ||
7550 	    atomic_read(&rdev->irq.pflip[0])) {
7551 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7552 		crtc1 |= VBLANK_INTERRUPT_MASK;
7553 	}
7554 	if (rdev->irq.crtc_vblank_int[1] ||
7555 	    atomic_read(&rdev->irq.pflip[1])) {
7556 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7557 		crtc2 |= VBLANK_INTERRUPT_MASK;
7558 	}
7559 	if (rdev->irq.crtc_vblank_int[2] ||
7560 	    atomic_read(&rdev->irq.pflip[2])) {
7561 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7562 		crtc3 |= VBLANK_INTERRUPT_MASK;
7563 	}
7564 	if (rdev->irq.crtc_vblank_int[3] ||
7565 	    atomic_read(&rdev->irq.pflip[3])) {
7566 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7567 		crtc4 |= VBLANK_INTERRUPT_MASK;
7568 	}
7569 	if (rdev->irq.crtc_vblank_int[4] ||
7570 	    atomic_read(&rdev->irq.pflip[4])) {
7571 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7572 		crtc5 |= VBLANK_INTERRUPT_MASK;
7573 	}
7574 	if (rdev->irq.crtc_vblank_int[5] ||
7575 	    atomic_read(&rdev->irq.pflip[5])) {
7576 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7577 		crtc6 |= VBLANK_INTERRUPT_MASK;
7578 	}
7579 	if (rdev->irq.hpd[0]) {
7580 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7581 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7582 	}
7583 	if (rdev->irq.hpd[1]) {
7584 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7585 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7586 	}
7587 	if (rdev->irq.hpd[2]) {
7588 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7589 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7590 	}
7591 	if (rdev->irq.hpd[3]) {
7592 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7593 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7594 	}
7595 	if (rdev->irq.hpd[4]) {
7596 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7597 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7598 	}
7599 	if (rdev->irq.hpd[5]) {
7600 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7601 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7602 	}
7603 
7604 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7605 
7606 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7607 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7608 
7609 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7610 
7611 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7612 
7613 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7614 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7615 	if (rdev->num_crtc >= 4) {
7616 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7617 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7618 	}
7619 	if (rdev->num_crtc >= 6) {
7620 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7621 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7622 	}
7623 
7624 	if (rdev->num_crtc >= 2) {
7625 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7626 		       GRPH_PFLIP_INT_MASK);
7627 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7628 		       GRPH_PFLIP_INT_MASK);
7629 	}
7630 	if (rdev->num_crtc >= 4) {
7631 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7632 		       GRPH_PFLIP_INT_MASK);
7633 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7634 		       GRPH_PFLIP_INT_MASK);
7635 	}
7636 	if (rdev->num_crtc >= 6) {
7637 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7638 		       GRPH_PFLIP_INT_MASK);
7639 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7640 		       GRPH_PFLIP_INT_MASK);
7641 	}
7642 
7643 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7644 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7645 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7646 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7647 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7648 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7649 
7650 	/* posting read */
7651 	RREG32(SRBM_STATUS);
7652 
7653 	return 0;
7654 }
7655 
7656 /**
7657  * cik_irq_ack - ack interrupt sources
7658  *
7659  * @rdev: radeon_device pointer
7660  *
7661  * Ack interrupt sources on the GPU (vblanks, hpd,
7662  * etc.) (CIK).  Certain interrupts sources are sw
7663  * generated and do not require an explicit ack.
7664  */
7665 static inline void cik_irq_ack(struct radeon_device *rdev)
7666 {
7667 	u32 tmp;
7668 
7669 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7670 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7671 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7672 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7673 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7674 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7675 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7676 
7677 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7678 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7679 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7680 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7681 	if (rdev->num_crtc >= 4) {
7682 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7683 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7684 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7685 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7686 	}
7687 	if (rdev->num_crtc >= 6) {
7688 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7689 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7690 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7691 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7692 	}
7693 
7694 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7695 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7696 		       GRPH_PFLIP_INT_CLEAR);
7697 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7698 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7699 		       GRPH_PFLIP_INT_CLEAR);
7700 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7701 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7702 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7703 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7704 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7705 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7706 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7707 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7708 
7709 	if (rdev->num_crtc >= 4) {
7710 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7711 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7712 			       GRPH_PFLIP_INT_CLEAR);
7713 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7714 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7715 			       GRPH_PFLIP_INT_CLEAR);
7716 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7717 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7718 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7719 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7720 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7721 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7722 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7723 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7724 	}
7725 
7726 	if (rdev->num_crtc >= 6) {
7727 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7728 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7729 			       GRPH_PFLIP_INT_CLEAR);
7730 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7731 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7732 			       GRPH_PFLIP_INT_CLEAR);
7733 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7734 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7735 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7736 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7737 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7738 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7739 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7740 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7741 	}
7742 
7743 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7744 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7745 		tmp |= DC_HPDx_INT_ACK;
7746 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7747 	}
7748 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7749 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7750 		tmp |= DC_HPDx_INT_ACK;
7751 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7752 	}
7753 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7754 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7755 		tmp |= DC_HPDx_INT_ACK;
7756 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7757 	}
7758 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7759 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7760 		tmp |= DC_HPDx_INT_ACK;
7761 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7762 	}
7763 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7764 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7765 		tmp |= DC_HPDx_INT_ACK;
7766 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7767 	}
7768 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7769 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7770 		tmp |= DC_HPDx_INT_ACK;
7771 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7772 	}
7773 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7774 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7775 		tmp |= DC_HPDx_RX_INT_ACK;
7776 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7777 	}
7778 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7779 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7780 		tmp |= DC_HPDx_RX_INT_ACK;
7781 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7782 	}
7783 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7784 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7785 		tmp |= DC_HPDx_RX_INT_ACK;
7786 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7787 	}
7788 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7789 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7790 		tmp |= DC_HPDx_RX_INT_ACK;
7791 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7792 	}
7793 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7794 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7795 		tmp |= DC_HPDx_RX_INT_ACK;
7796 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7797 	}
7798 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7799 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7800 		tmp |= DC_HPDx_RX_INT_ACK;
7801 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7802 	}
7803 }
7804 
7805 /**
7806  * cik_irq_disable - disable interrupts
7807  *
7808  * @rdev: radeon_device pointer
7809  *
7810  * Disable interrupts on the hw (CIK).
7811  */
7812 static void cik_irq_disable(struct radeon_device *rdev)
7813 {
7814 	cik_disable_interrupts(rdev);
7815 	/* Wait and acknowledge irq */
7816 	mdelay(1);
7817 	cik_irq_ack(rdev);
7818 	cik_disable_interrupt_state(rdev);
7819 }
7820 
7821 /**
7822  * cik_irq_disable - disable interrupts for suspend
7823  *
7824  * @rdev: radeon_device pointer
7825  *
7826  * Disable interrupts and stop the RLC (CIK).
7827  * Used for suspend.
7828  */
7829 static void cik_irq_suspend(struct radeon_device *rdev)
7830 {
7831 	cik_irq_disable(rdev);
7832 	cik_rlc_stop(rdev);
7833 }
7834 
7835 /**
7836  * cik_irq_fini - tear down interrupt support
7837  *
7838  * @rdev: radeon_device pointer
7839  *
7840  * Disable interrupts on the hw and free the IH ring
7841  * buffer (CIK).
7842  * Used for driver unload.
7843  */
7844 static void cik_irq_fini(struct radeon_device *rdev)
7845 {
7846 	cik_irq_suspend(rdev);
7847 	r600_ih_ring_fini(rdev);
7848 }
7849 
7850 /**
7851  * cik_get_ih_wptr - get the IH ring buffer wptr
7852  *
7853  * @rdev: radeon_device pointer
7854  *
7855  * Get the IH ring buffer wptr from either the register
7856  * or the writeback memory buffer (CIK).  Also check for
7857  * ring buffer overflow and deal with it.
7858  * Used by cik_irq_process().
7859  * Returns the value of the wptr.
7860  */
7861 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7862 {
7863 	u32 wptr, tmp;
7864 
7865 	if (rdev->wb.enabled)
7866 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7867 	else
7868 		wptr = RREG32(IH_RB_WPTR);
7869 
7870 	if (wptr & RB_OVERFLOW) {
7871 		wptr &= ~RB_OVERFLOW;
7872 		/* When a ring buffer overflow happen start parsing interrupt
7873 		 * from the last not overwritten vector (wptr + 16). Hopefully
7874 		 * this should allow us to catchup.
7875 		 */
7876 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7877 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7878 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7879 		tmp = RREG32(IH_RB_CNTL);
7880 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7881 		WREG32(IH_RB_CNTL, tmp);
7882 	}
7883 	return (wptr & rdev->ih.ptr_mask);
7884 }
7885 
7886 /*        CIK IV Ring
7887  * Each IV ring entry is 128 bits:
7888  * [7:0]    - interrupt source id
7889  * [31:8]   - reserved
7890  * [59:32]  - interrupt source data
7891  * [63:60]  - reserved
7892  * [71:64]  - RINGID
7893  *            CP:
7894  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7895  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7896  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7897  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7898  *            PIPE_ID - ME0 0=3D
7899  *                    - ME1&2 compute dispatcher (4 pipes each)
7900  *            SDMA:
7901  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7902  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7903  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7904  * [79:72]  - VMID
7905  * [95:80]  - PASID
7906  * [127:96] - reserved
7907  */
7908 /**
7909  * cik_irq_process - interrupt handler
7910  *
7911  * @rdev: radeon_device pointer
7912  *
7913  * Interrupt hander (CIK).  Walk the IH ring,
7914  * ack interrupts and schedule work to handle
7915  * interrupt events.
7916  * Returns irq process return code.
7917  */
7918 int cik_irq_process(struct radeon_device *rdev)
7919 {
7920 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7921 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7922 	u32 wptr;
7923 	u32 rptr;
7924 	u32 src_id, src_data, ring_id;
7925 	u8 me_id, pipe_id, queue_id;
7926 	u32 ring_index;
7927 	bool queue_hotplug = false;
7928 	bool queue_dp = false;
7929 	bool queue_reset = false;
7930 	u32 addr, status, mc_client;
7931 	bool queue_thermal = false;
7932 
7933 	if (!rdev->ih.enabled || rdev->shutdown)
7934 		return IRQ_NONE;
7935 
7936 	wptr = cik_get_ih_wptr(rdev);
7937 
7938 restart_ih:
7939 	/* is somebody else already processing irqs? */
7940 	if (atomic_xchg(&rdev->ih.lock, 1))
7941 		return IRQ_NONE;
7942 
7943 	rptr = rdev->ih.rptr;
7944 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7945 
7946 	/* Order reading of wptr vs. reading of IH ring data */
7947 	rmb();
7948 
7949 	/* display interrupts */
7950 	cik_irq_ack(rdev);
7951 
7952 	while (rptr != wptr) {
7953 		/* wptr/rptr are in bytes! */
7954 		ring_index = rptr / 4;
7955 
7956 		radeon_kfd_interrupt(rdev,
7957 				(const void *) &rdev->ih.ring[ring_index]);
7958 
7959 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7960 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7961 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7962 
7963 		switch (src_id) {
7964 		case 1: /* D1 vblank/vline */
7965 			switch (src_data) {
7966 			case 0: /* D1 vblank */
7967 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7968 					if (rdev->irq.crtc_vblank_int[0]) {
7969 						drm_handle_vblank(rdev->ddev, 0);
7970 						rdev->pm.vblank_sync = true;
7971 						wake_up(&rdev->irq.vblank_queue);
7972 					}
7973 					if (atomic_read(&rdev->irq.pflip[0]))
7974 						radeon_crtc_handle_vblank(rdev, 0);
7975 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7976 					DRM_DEBUG("IH: D1 vblank\n");
7977 				}
7978 				break;
7979 			case 1: /* D1 vline */
7980 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7981 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7982 					DRM_DEBUG("IH: D1 vline\n");
7983 				}
7984 				break;
7985 			default:
7986 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7987 				break;
7988 			}
7989 			break;
7990 		case 2: /* D2 vblank/vline */
7991 			switch (src_data) {
7992 			case 0: /* D2 vblank */
7993 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7994 					if (rdev->irq.crtc_vblank_int[1]) {
7995 						drm_handle_vblank(rdev->ddev, 1);
7996 						rdev->pm.vblank_sync = true;
7997 						wake_up(&rdev->irq.vblank_queue);
7998 					}
7999 					if (atomic_read(&rdev->irq.pflip[1]))
8000 						radeon_crtc_handle_vblank(rdev, 1);
8001 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
8002 					DRM_DEBUG("IH: D2 vblank\n");
8003 				}
8004 				break;
8005 			case 1: /* D2 vline */
8006 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
8007 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
8008 					DRM_DEBUG("IH: D2 vline\n");
8009 				}
8010 				break;
8011 			default:
8012 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8013 				break;
8014 			}
8015 			break;
8016 		case 3: /* D3 vblank/vline */
8017 			switch (src_data) {
8018 			case 0: /* D3 vblank */
8019 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
8020 					if (rdev->irq.crtc_vblank_int[2]) {
8021 						drm_handle_vblank(rdev->ddev, 2);
8022 						rdev->pm.vblank_sync = true;
8023 						wake_up(&rdev->irq.vblank_queue);
8024 					}
8025 					if (atomic_read(&rdev->irq.pflip[2]))
8026 						radeon_crtc_handle_vblank(rdev, 2);
8027 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
8028 					DRM_DEBUG("IH: D3 vblank\n");
8029 				}
8030 				break;
8031 			case 1: /* D3 vline */
8032 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
8033 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8034 					DRM_DEBUG("IH: D3 vline\n");
8035 				}
8036 				break;
8037 			default:
8038 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8039 				break;
8040 			}
8041 			break;
8042 		case 4: /* D4 vblank/vline */
8043 			switch (src_data) {
8044 			case 0: /* D4 vblank */
8045 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
8046 					if (rdev->irq.crtc_vblank_int[3]) {
8047 						drm_handle_vblank(rdev->ddev, 3);
8048 						rdev->pm.vblank_sync = true;
8049 						wake_up(&rdev->irq.vblank_queue);
8050 					}
8051 					if (atomic_read(&rdev->irq.pflip[3]))
8052 						radeon_crtc_handle_vblank(rdev, 3);
8053 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8054 					DRM_DEBUG("IH: D4 vblank\n");
8055 				}
8056 				break;
8057 			case 1: /* D4 vline */
8058 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
8059 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8060 					DRM_DEBUG("IH: D4 vline\n");
8061 				}
8062 				break;
8063 			default:
8064 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8065 				break;
8066 			}
8067 			break;
8068 		case 5: /* D5 vblank/vline */
8069 			switch (src_data) {
8070 			case 0: /* D5 vblank */
8071 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
8072 					if (rdev->irq.crtc_vblank_int[4]) {
8073 						drm_handle_vblank(rdev->ddev, 4);
8074 						rdev->pm.vblank_sync = true;
8075 						wake_up(&rdev->irq.vblank_queue);
8076 					}
8077 					if (atomic_read(&rdev->irq.pflip[4]))
8078 						radeon_crtc_handle_vblank(rdev, 4);
8079 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8080 					DRM_DEBUG("IH: D5 vblank\n");
8081 				}
8082 				break;
8083 			case 1: /* D5 vline */
8084 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
8085 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8086 					DRM_DEBUG("IH: D5 vline\n");
8087 				}
8088 				break;
8089 			default:
8090 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8091 				break;
8092 			}
8093 			break;
8094 		case 6: /* D6 vblank/vline */
8095 			switch (src_data) {
8096 			case 0: /* D6 vblank */
8097 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
8098 					if (rdev->irq.crtc_vblank_int[5]) {
8099 						drm_handle_vblank(rdev->ddev, 5);
8100 						rdev->pm.vblank_sync = true;
8101 						wake_up(&rdev->irq.vblank_queue);
8102 					}
8103 					if (atomic_read(&rdev->irq.pflip[5]))
8104 						radeon_crtc_handle_vblank(rdev, 5);
8105 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8106 					DRM_DEBUG("IH: D6 vblank\n");
8107 				}
8108 				break;
8109 			case 1: /* D6 vline */
8110 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
8111 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8112 					DRM_DEBUG("IH: D6 vline\n");
8113 				}
8114 				break;
8115 			default:
8116 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8117 				break;
8118 			}
8119 			break;
8120 		case 8: /* D1 page flip */
8121 		case 10: /* D2 page flip */
8122 		case 12: /* D3 page flip */
8123 		case 14: /* D4 page flip */
8124 		case 16: /* D5 page flip */
8125 		case 18: /* D6 page flip */
8126 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8127 			if (radeon_use_pflipirq > 0)
8128 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8129 			break;
8130 		case 42: /* HPD hotplug */
8131 			switch (src_data) {
8132 			case 0:
8133 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8134 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8135 					queue_hotplug = true;
8136 					DRM_DEBUG("IH: HPD1\n");
8137 				}
8138 				break;
8139 			case 1:
8140 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8141 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8142 					queue_hotplug = true;
8143 					DRM_DEBUG("IH: HPD2\n");
8144 				}
8145 				break;
8146 			case 2:
8147 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8148 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8149 					queue_hotplug = true;
8150 					DRM_DEBUG("IH: HPD3\n");
8151 				}
8152 				break;
8153 			case 3:
8154 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8155 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8156 					queue_hotplug = true;
8157 					DRM_DEBUG("IH: HPD4\n");
8158 				}
8159 				break;
8160 			case 4:
8161 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8162 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8163 					queue_hotplug = true;
8164 					DRM_DEBUG("IH: HPD5\n");
8165 				}
8166 				break;
8167 			case 5:
8168 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8169 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8170 					queue_hotplug = true;
8171 					DRM_DEBUG("IH: HPD6\n");
8172 				}
8173 				break;
8174 			case 6:
8175 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
8176 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8177 					queue_dp = true;
8178 					DRM_DEBUG("IH: HPD_RX 1\n");
8179 				}
8180 				break;
8181 			case 7:
8182 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
8183 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8184 					queue_dp = true;
8185 					DRM_DEBUG("IH: HPD_RX 2\n");
8186 				}
8187 				break;
8188 			case 8:
8189 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
8190 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8191 					queue_dp = true;
8192 					DRM_DEBUG("IH: HPD_RX 3\n");
8193 				}
8194 				break;
8195 			case 9:
8196 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
8197 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8198 					queue_dp = true;
8199 					DRM_DEBUG("IH: HPD_RX 4\n");
8200 				}
8201 				break;
8202 			case 10:
8203 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
8204 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8205 					queue_dp = true;
8206 					DRM_DEBUG("IH: HPD_RX 5\n");
8207 				}
8208 				break;
8209 			case 11:
8210 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
8211 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8212 					queue_dp = true;
8213 					DRM_DEBUG("IH: HPD_RX 6\n");
8214 				}
8215 				break;
8216 			default:
8217 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8218 				break;
8219 			}
8220 			break;
8221 		case 96:
8222 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8223 			WREG32(SRBM_INT_ACK, 0x1);
8224 			break;
8225 		case 124: /* UVD */
8226 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8227 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8228 			break;
8229 		case 146:
8230 		case 147:
8231 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8232 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8233 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8234 			/* reset addr and status */
8235 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8236 			if (addr == 0x0 && status == 0x0)
8237 				break;
8238 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8239 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8240 				addr);
8241 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8242 				status);
8243 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8244 			break;
8245 		case 167: /* VCE */
8246 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8247 			switch (src_data) {
8248 			case 0:
8249 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8250 				break;
8251 			case 1:
8252 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8253 				break;
8254 			default:
8255 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8256 				break;
8257 			}
8258 			break;
8259 		case 176: /* GFX RB CP_INT */
8260 		case 177: /* GFX IB CP_INT */
8261 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8262 			break;
8263 		case 181: /* CP EOP event */
8264 			DRM_DEBUG("IH: CP EOP\n");
8265 			/* XXX check the bitfield order! */
8266 			me_id = (ring_id & 0x60) >> 5;
8267 			pipe_id = (ring_id & 0x18) >> 3;
8268 			queue_id = (ring_id & 0x7) >> 0;
8269 			switch (me_id) {
8270 			case 0:
8271 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8272 				break;
8273 			case 1:
8274 			case 2:
8275 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8276 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8277 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8278 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8279 				break;
8280 			}
8281 			break;
8282 		case 184: /* CP Privileged reg access */
8283 			DRM_ERROR("Illegal register access in command stream\n");
8284 			/* XXX check the bitfield order! */
8285 			me_id = (ring_id & 0x60) >> 5;
8286 			pipe_id = (ring_id & 0x18) >> 3;
8287 			queue_id = (ring_id & 0x7) >> 0;
8288 			switch (me_id) {
8289 			case 0:
8290 				/* This results in a full GPU reset, but all we need to do is soft
8291 				 * reset the CP for gfx
8292 				 */
8293 				queue_reset = true;
8294 				break;
8295 			case 1:
8296 				/* XXX compute */
8297 				queue_reset = true;
8298 				break;
8299 			case 2:
8300 				/* XXX compute */
8301 				queue_reset = true;
8302 				break;
8303 			}
8304 			break;
8305 		case 185: /* CP Privileged inst */
8306 			DRM_ERROR("Illegal instruction in command stream\n");
8307 			/* XXX check the bitfield order! */
8308 			me_id = (ring_id & 0x60) >> 5;
8309 			pipe_id = (ring_id & 0x18) >> 3;
8310 			queue_id = (ring_id & 0x7) >> 0;
8311 			switch (me_id) {
8312 			case 0:
8313 				/* This results in a full GPU reset, but all we need to do is soft
8314 				 * reset the CP for gfx
8315 				 */
8316 				queue_reset = true;
8317 				break;
8318 			case 1:
8319 				/* XXX compute */
8320 				queue_reset = true;
8321 				break;
8322 			case 2:
8323 				/* XXX compute */
8324 				queue_reset = true;
8325 				break;
8326 			}
8327 			break;
8328 		case 224: /* SDMA trap event */
8329 			/* XXX check the bitfield order! */
8330 			me_id = (ring_id & 0x3) >> 0;
8331 			queue_id = (ring_id & 0xc) >> 2;
8332 			DRM_DEBUG("IH: SDMA trap\n");
8333 			switch (me_id) {
8334 			case 0:
8335 				switch (queue_id) {
8336 				case 0:
8337 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8338 					break;
8339 				case 1:
8340 					/* XXX compute */
8341 					break;
8342 				case 2:
8343 					/* XXX compute */
8344 					break;
8345 				}
8346 				break;
8347 			case 1:
8348 				switch (queue_id) {
8349 				case 0:
8350 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8351 					break;
8352 				case 1:
8353 					/* XXX compute */
8354 					break;
8355 				case 2:
8356 					/* XXX compute */
8357 					break;
8358 				}
8359 				break;
8360 			}
8361 			break;
8362 		case 230: /* thermal low to high */
8363 			DRM_DEBUG("IH: thermal low to high\n");
8364 			rdev->pm.dpm.thermal.high_to_low = false;
8365 			queue_thermal = true;
8366 			break;
8367 		case 231: /* thermal high to low */
8368 			DRM_DEBUG("IH: thermal high to low\n");
8369 			rdev->pm.dpm.thermal.high_to_low = true;
8370 			queue_thermal = true;
8371 			break;
8372 		case 233: /* GUI IDLE */
8373 			DRM_DEBUG("IH: GUI idle\n");
8374 			break;
8375 		case 241: /* SDMA Privileged inst */
8376 		case 247: /* SDMA Privileged inst */
8377 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8378 			/* XXX check the bitfield order! */
8379 			me_id = (ring_id & 0x3) >> 0;
8380 			queue_id = (ring_id & 0xc) >> 2;
8381 			switch (me_id) {
8382 			case 0:
8383 				switch (queue_id) {
8384 				case 0:
8385 					queue_reset = true;
8386 					break;
8387 				case 1:
8388 					/* XXX compute */
8389 					queue_reset = true;
8390 					break;
8391 				case 2:
8392 					/* XXX compute */
8393 					queue_reset = true;
8394 					break;
8395 				}
8396 				break;
8397 			case 1:
8398 				switch (queue_id) {
8399 				case 0:
8400 					queue_reset = true;
8401 					break;
8402 				case 1:
8403 					/* XXX compute */
8404 					queue_reset = true;
8405 					break;
8406 				case 2:
8407 					/* XXX compute */
8408 					queue_reset = true;
8409 					break;
8410 				}
8411 				break;
8412 			}
8413 			break;
8414 		default:
8415 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8416 			break;
8417 		}
8418 
8419 		/* wptr/rptr are in bytes! */
8420 		rptr += 16;
8421 		rptr &= rdev->ih.ptr_mask;
8422 		WREG32(IH_RB_RPTR, rptr);
8423 	}
8424 	if (queue_dp)
8425 		schedule_work(&rdev->dp_work);
8426 	if (queue_hotplug)
8427 		schedule_work(&rdev->hotplug_work);
8428 	if (queue_reset) {
8429 		rdev->needs_reset = true;
8430 		wake_up_all(&rdev->fence_queue);
8431 	}
8432 	if (queue_thermal)
8433 		schedule_work(&rdev->pm.dpm.thermal.work);
8434 	rdev->ih.rptr = rptr;
8435 	atomic_set(&rdev->ih.lock, 0);
8436 
8437 	/* make sure wptr hasn't changed while processing */
8438 	wptr = cik_get_ih_wptr(rdev);
8439 	if (wptr != rptr)
8440 		goto restart_ih;
8441 
8442 	return IRQ_HANDLED;
8443 }
8444 
8445 /*
8446  * startup/shutdown callbacks
8447  */
8448 /**
8449  * cik_startup - program the asic to a functional state
8450  *
8451  * @rdev: radeon_device pointer
8452  *
8453  * Programs the asic to a functional state (CIK).
8454  * Called by cik_init() and cik_resume().
8455  * Returns 0 for success, error for failure.
8456  */
8457 static int cik_startup(struct radeon_device *rdev)
8458 {
8459 	struct radeon_ring *ring;
8460 	u32 nop;
8461 	int r;
8462 
8463 	/* enable pcie gen2/3 link */
8464 	cik_pcie_gen3_enable(rdev);
8465 	/* enable aspm */
8466 	cik_program_aspm(rdev);
8467 
8468 	/* scratch needs to be initialized before MC */
8469 	r = r600_vram_scratch_init(rdev);
8470 	if (r)
8471 		return r;
8472 
8473 	cik_mc_program(rdev);
8474 
8475 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8476 		r = ci_mc_load_microcode(rdev);
8477 		if (r) {
8478 			DRM_ERROR("Failed to load MC firmware!\n");
8479 			return r;
8480 		}
8481 	}
8482 
8483 	r = cik_pcie_gart_enable(rdev);
8484 	if (r)
8485 		return r;
8486 	cik_gpu_init(rdev);
8487 
8488 	/* allocate rlc buffers */
8489 	if (rdev->flags & RADEON_IS_IGP) {
8490 		if (rdev->family == CHIP_KAVERI) {
8491 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8492 			rdev->rlc.reg_list_size =
8493 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8494 		} else {
8495 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8496 			rdev->rlc.reg_list_size =
8497 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8498 		}
8499 	}
8500 	rdev->rlc.cs_data = ci_cs_data;
8501 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8502 	r = sumo_rlc_init(rdev);
8503 	if (r) {
8504 		DRM_ERROR("Failed to init rlc BOs!\n");
8505 		return r;
8506 	}
8507 
8508 	/* allocate wb buffer */
8509 	r = radeon_wb_init(rdev);
8510 	if (r)
8511 		return r;
8512 
8513 	/* allocate mec buffers */
8514 	r = cik_mec_init(rdev);
8515 	if (r) {
8516 		DRM_ERROR("Failed to init MEC BOs!\n");
8517 		return r;
8518 	}
8519 
8520 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8521 	if (r) {
8522 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8523 		return r;
8524 	}
8525 
8526 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8527 	if (r) {
8528 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8529 		return r;
8530 	}
8531 
8532 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8533 	if (r) {
8534 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8535 		return r;
8536 	}
8537 
8538 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8539 	if (r) {
8540 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8541 		return r;
8542 	}
8543 
8544 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8545 	if (r) {
8546 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8547 		return r;
8548 	}
8549 
8550 	r = radeon_uvd_resume(rdev);
8551 	if (!r) {
8552 		r = uvd_v4_2_resume(rdev);
8553 		if (!r) {
8554 			r = radeon_fence_driver_start_ring(rdev,
8555 							   R600_RING_TYPE_UVD_INDEX);
8556 			if (r)
8557 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8558 		}
8559 	}
8560 	if (r)
8561 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8562 
8563 	r = radeon_vce_resume(rdev);
8564 	if (!r) {
8565 		r = vce_v2_0_resume(rdev);
8566 		if (!r)
8567 			r = radeon_fence_driver_start_ring(rdev,
8568 							   TN_RING_TYPE_VCE1_INDEX);
8569 		if (!r)
8570 			r = radeon_fence_driver_start_ring(rdev,
8571 							   TN_RING_TYPE_VCE2_INDEX);
8572 	}
8573 	if (r) {
8574 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8575 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8576 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8577 	}
8578 
8579 	/* Enable IRQ */
8580 	if (!rdev->irq.installed) {
8581 		r = radeon_irq_kms_init(rdev);
8582 		if (r)
8583 			return r;
8584 	}
8585 
8586 	r = cik_irq_init(rdev);
8587 	if (r) {
8588 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8589 		radeon_irq_kms_fini(rdev);
8590 		return r;
8591 	}
8592 	cik_irq_set(rdev);
8593 
8594 	if (rdev->family == CHIP_HAWAII) {
8595 		if (rdev->new_fw)
8596 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8597 		else
8598 			nop = RADEON_CP_PACKET2;
8599 	} else {
8600 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8601 	}
8602 
8603 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8604 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8605 			     nop);
8606 	if (r)
8607 		return r;
8608 
8609 	/* set up the compute queues */
8610 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8611 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8612 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8613 			     nop);
8614 	if (r)
8615 		return r;
8616 	ring->me = 1; /* first MEC */
8617 	ring->pipe = 0; /* first pipe */
8618 	ring->queue = 0; /* first queue */
8619 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8620 
8621 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8622 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8623 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8624 			     nop);
8625 	if (r)
8626 		return r;
8627 	/* dGPU only have 1 MEC */
8628 	ring->me = 1; /* first MEC */
8629 	ring->pipe = 0; /* first pipe */
8630 	ring->queue = 1; /* second queue */
8631 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8632 
8633 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8634 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8635 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8636 	if (r)
8637 		return r;
8638 
8639 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8640 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8641 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8642 	if (r)
8643 		return r;
8644 
8645 	r = cik_cp_resume(rdev);
8646 	if (r)
8647 		return r;
8648 
8649 	r = cik_sdma_resume(rdev);
8650 	if (r)
8651 		return r;
8652 
8653 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8654 	if (ring->ring_size) {
8655 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8656 				     RADEON_CP_PACKET2);
8657 		if (!r)
8658 			r = uvd_v1_0_init(rdev);
8659 		if (r)
8660 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8661 	}
8662 
8663 	r = -ENOENT;
8664 
8665 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8666 	if (ring->ring_size)
8667 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8668 				     VCE_CMD_NO_OP);
8669 
8670 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8671 	if (ring->ring_size)
8672 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8673 				     VCE_CMD_NO_OP);
8674 
8675 	if (!r)
8676 		r = vce_v1_0_init(rdev);
8677 	else if (r != -ENOENT)
8678 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8679 
8680 	r = radeon_ib_pool_init(rdev);
8681 	if (r) {
8682 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8683 		return r;
8684 	}
8685 
8686 	r = radeon_vm_manager_init(rdev);
8687 	if (r) {
8688 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8689 		return r;
8690 	}
8691 
8692 	r = radeon_audio_init(rdev);
8693 	if (r)
8694 		return r;
8695 
8696 	r = radeon_kfd_resume(rdev);
8697 	if (r)
8698 		return r;
8699 
8700 	return 0;
8701 }
8702 
8703 /**
8704  * cik_resume - resume the asic to a functional state
8705  *
8706  * @rdev: radeon_device pointer
8707  *
8708  * Programs the asic to a functional state (CIK).
8709  * Called at resume.
8710  * Returns 0 for success, error for failure.
8711  */
8712 int cik_resume(struct radeon_device *rdev)
8713 {
8714 	int r;
8715 
8716 	/* post card */
8717 	atom_asic_init(rdev->mode_info.atom_context);
8718 
8719 	/* init golden registers */
8720 	cik_init_golden_registers(rdev);
8721 
8722 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8723 		radeon_pm_resume(rdev);
8724 
8725 	rdev->accel_working = true;
8726 	r = cik_startup(rdev);
8727 	if (r) {
8728 		DRM_ERROR("cik startup failed on resume\n");
8729 		rdev->accel_working = false;
8730 		return r;
8731 	}
8732 
8733 	return r;
8734 
8735 }
8736 
8737 /**
8738  * cik_suspend - suspend the asic
8739  *
8740  * @rdev: radeon_device pointer
8741  *
8742  * Bring the chip into a state suitable for suspend (CIK).
8743  * Called at suspend.
8744  * Returns 0 for success.
8745  */
8746 int cik_suspend(struct radeon_device *rdev)
8747 {
8748 	radeon_kfd_suspend(rdev);
8749 	radeon_pm_suspend(rdev);
8750 	radeon_audio_fini(rdev);
8751 	radeon_vm_manager_fini(rdev);
8752 	cik_cp_enable(rdev, false);
8753 	cik_sdma_enable(rdev, false);
8754 	uvd_v1_0_fini(rdev);
8755 	radeon_uvd_suspend(rdev);
8756 	radeon_vce_suspend(rdev);
8757 	cik_fini_pg(rdev);
8758 	cik_fini_cg(rdev);
8759 	cik_irq_suspend(rdev);
8760 	radeon_wb_disable(rdev);
8761 	cik_pcie_gart_disable(rdev);
8762 	return 0;
8763 }
8764 
8765 /* Plan is to move initialization in that function and use
8766  * helper function so that radeon_device_init pretty much
8767  * do nothing more than calling asic specific function. This
8768  * should also allow to remove a bunch of callback function
8769  * like vram_info.
8770  */
8771 /**
8772  * cik_init - asic specific driver and hw init
8773  *
8774  * @rdev: radeon_device pointer
8775  *
8776  * Setup asic specific driver variables and program the hw
8777  * to a functional state (CIK).
8778  * Called at driver startup.
8779  * Returns 0 for success, errors for failure.
8780  */
8781 int cik_init(struct radeon_device *rdev)
8782 {
8783 	struct radeon_ring *ring;
8784 	int r;
8785 
8786 	/* Read BIOS */
8787 	if (!radeon_get_bios(rdev)) {
8788 		if (ASIC_IS_AVIVO(rdev))
8789 			return -EINVAL;
8790 	}
8791 	/* Must be an ATOMBIOS */
8792 	if (!rdev->is_atom_bios) {
8793 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8794 		return -EINVAL;
8795 	}
8796 	r = radeon_atombios_init(rdev);
8797 	if (r)
8798 		return r;
8799 
8800 	/* Post card if necessary */
8801 	if (!radeon_card_posted(rdev)) {
8802 		if (!rdev->bios) {
8803 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8804 			return -EINVAL;
8805 		}
8806 		DRM_INFO("GPU not posted. posting now...\n");
8807 		atom_asic_init(rdev->mode_info.atom_context);
8808 	}
8809 	/* init golden registers */
8810 	cik_init_golden_registers(rdev);
8811 	/* Initialize scratch registers */
8812 	cik_scratch_init(rdev);
8813 	/* Initialize surface registers */
8814 	radeon_surface_init(rdev);
8815 	/* Initialize clocks */
8816 	radeon_get_clock_info(rdev->ddev);
8817 
8818 	/* Fence driver */
8819 	r = radeon_fence_driver_init(rdev);
8820 	if (r)
8821 		return r;
8822 
8823 	/* initialize memory controller */
8824 	r = cik_mc_init(rdev);
8825 	if (r)
8826 		return r;
8827 	/* Memory manager */
8828 	r = radeon_bo_init(rdev);
8829 	if (r)
8830 		return r;
8831 
8832 	if (rdev->flags & RADEON_IS_IGP) {
8833 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8834 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8835 			r = cik_init_microcode(rdev);
8836 			if (r) {
8837 				DRM_ERROR("Failed to load firmware!\n");
8838 				return r;
8839 			}
8840 		}
8841 	} else {
8842 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8843 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8844 		    !rdev->mc_fw) {
8845 			r = cik_init_microcode(rdev);
8846 			if (r) {
8847 				DRM_ERROR("Failed to load firmware!\n");
8848 				return r;
8849 			}
8850 		}
8851 	}
8852 
8853 	/* Initialize power management */
8854 	radeon_pm_init(rdev);
8855 
8856 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8857 	ring->ring_obj = NULL;
8858 	r600_ring_init(rdev, ring, 1024 * 1024);
8859 
8860 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8861 	ring->ring_obj = NULL;
8862 	r600_ring_init(rdev, ring, 1024 * 1024);
8863 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8864 	if (r)
8865 		return r;
8866 
8867 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8868 	ring->ring_obj = NULL;
8869 	r600_ring_init(rdev, ring, 1024 * 1024);
8870 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8871 	if (r)
8872 		return r;
8873 
8874 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8875 	ring->ring_obj = NULL;
8876 	r600_ring_init(rdev, ring, 256 * 1024);
8877 
8878 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8879 	ring->ring_obj = NULL;
8880 	r600_ring_init(rdev, ring, 256 * 1024);
8881 
8882 	r = radeon_uvd_init(rdev);
8883 	if (!r) {
8884 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8885 		ring->ring_obj = NULL;
8886 		r600_ring_init(rdev, ring, 4096);
8887 	}
8888 
8889 	r = radeon_vce_init(rdev);
8890 	if (!r) {
8891 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8892 		ring->ring_obj = NULL;
8893 		r600_ring_init(rdev, ring, 4096);
8894 
8895 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8896 		ring->ring_obj = NULL;
8897 		r600_ring_init(rdev, ring, 4096);
8898 	}
8899 
8900 	rdev->ih.ring_obj = NULL;
8901 	r600_ih_ring_init(rdev, 64 * 1024);
8902 
8903 	r = r600_pcie_gart_init(rdev);
8904 	if (r)
8905 		return r;
8906 
8907 	rdev->accel_working = true;
8908 	r = cik_startup(rdev);
8909 	if (r) {
8910 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8911 		cik_cp_fini(rdev);
8912 		cik_sdma_fini(rdev);
8913 		cik_irq_fini(rdev);
8914 		sumo_rlc_fini(rdev);
8915 		cik_mec_fini(rdev);
8916 		radeon_wb_fini(rdev);
8917 		radeon_ib_pool_fini(rdev);
8918 		radeon_vm_manager_fini(rdev);
8919 		radeon_irq_kms_fini(rdev);
8920 		cik_pcie_gart_fini(rdev);
8921 		rdev->accel_working = false;
8922 	}
8923 
8924 	/* Don't start up if the MC ucode is missing.
8925 	 * The default clocks and voltages before the MC ucode
8926 	 * is loaded are not suffient for advanced operations.
8927 	 */
8928 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8929 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8930 		return -EINVAL;
8931 	}
8932 
8933 	return 0;
8934 }
8935 
8936 /**
8937  * cik_fini - asic specific driver and hw fini
8938  *
8939  * @rdev: radeon_device pointer
8940  *
8941  * Tear down the asic specific driver variables and program the hw
8942  * to an idle state (CIK).
8943  * Called at driver unload.
8944  */
8945 void cik_fini(struct radeon_device *rdev)
8946 {
8947 	radeon_pm_fini(rdev);
8948 	cik_cp_fini(rdev);
8949 	cik_sdma_fini(rdev);
8950 	cik_fini_pg(rdev);
8951 	cik_fini_cg(rdev);
8952 	cik_irq_fini(rdev);
8953 	sumo_rlc_fini(rdev);
8954 	cik_mec_fini(rdev);
8955 	radeon_wb_fini(rdev);
8956 	radeon_vm_manager_fini(rdev);
8957 	radeon_ib_pool_fini(rdev);
8958 	radeon_irq_kms_fini(rdev);
8959 	uvd_v1_0_fini(rdev);
8960 	radeon_uvd_fini(rdev);
8961 	radeon_vce_fini(rdev);
8962 	cik_pcie_gart_fini(rdev);
8963 	r600_vram_scratch_fini(rdev);
8964 	radeon_gem_fini(rdev);
8965 	radeon_fence_driver_fini(rdev);
8966 	radeon_bo_fini(rdev);
8967 	radeon_atombios_fini(rdev);
8968 	kfree(rdev->bios);
8969 	rdev->bios = NULL;
8970 }
8971 
8972 void dce8_program_fmt(struct drm_encoder *encoder)
8973 {
8974 	struct drm_device *dev = encoder->dev;
8975 	struct radeon_device *rdev = dev->dev_private;
8976 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8977 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8978 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8979 	int bpc = 0;
8980 	u32 tmp = 0;
8981 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8982 
8983 	if (connector) {
8984 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8985 		bpc = radeon_get_monitor_bpc(connector);
8986 		dither = radeon_connector->dither;
8987 	}
8988 
8989 	/* LVDS/eDP FMT is set up by atom */
8990 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8991 		return;
8992 
8993 	/* not needed for analog */
8994 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8995 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8996 		return;
8997 
8998 	if (bpc == 0)
8999 		return;
9000 
9001 	switch (bpc) {
9002 	case 6:
9003 		if (dither == RADEON_FMT_DITHER_ENABLE)
9004 			/* XXX sort out optimal dither settings */
9005 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9006 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
9007 		else
9008 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
9009 		break;
9010 	case 8:
9011 		if (dither == RADEON_FMT_DITHER_ENABLE)
9012 			/* XXX sort out optimal dither settings */
9013 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9014 				FMT_RGB_RANDOM_ENABLE |
9015 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
9016 		else
9017 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
9018 		break;
9019 	case 10:
9020 		if (dither == RADEON_FMT_DITHER_ENABLE)
9021 			/* XXX sort out optimal dither settings */
9022 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
9023 				FMT_RGB_RANDOM_ENABLE |
9024 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
9025 		else
9026 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
9027 		break;
9028 	default:
9029 		/* not needed */
9030 		break;
9031 	}
9032 
9033 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9034 }
9035 
9036 /* display watermark setup */
9037 /**
9038  * dce8_line_buffer_adjust - Set up the line buffer
9039  *
9040  * @rdev: radeon_device pointer
9041  * @radeon_crtc: the selected display controller
9042  * @mode: the current display mode on the selected display
9043  * controller
9044  *
9045  * Setup up the line buffer allocation for
9046  * the selected display controller (CIK).
9047  * Returns the line buffer size in pixels.
9048  */
9049 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9050 				   struct radeon_crtc *radeon_crtc,
9051 				   struct drm_display_mode *mode)
9052 {
9053 	u32 tmp, buffer_alloc, i;
9054 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9055 	/*
9056 	 * Line Buffer Setup
9057 	 * There are 6 line buffers, one for each display controllers.
9058 	 * There are 3 partitions per LB. Select the number of partitions
9059 	 * to enable based on the display width.  For display widths larger
9060 	 * than 4096, you need use to use 2 display controllers and combine
9061 	 * them using the stereo blender.
9062 	 */
9063 	if (radeon_crtc->base.enabled && mode) {
9064 		if (mode->crtc_hdisplay < 1920) {
9065 			tmp = 1;
9066 			buffer_alloc = 2;
9067 		} else if (mode->crtc_hdisplay < 2560) {
9068 			tmp = 2;
9069 			buffer_alloc = 2;
9070 		} else if (mode->crtc_hdisplay < 4096) {
9071 			tmp = 0;
9072 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9073 		} else {
9074 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9075 			tmp = 0;
9076 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9077 		}
9078 	} else {
9079 		tmp = 1;
9080 		buffer_alloc = 0;
9081 	}
9082 
9083 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9084 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9085 
9086 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9087 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9088 	for (i = 0; i < rdev->usec_timeout; i++) {
9089 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9090 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9091 			break;
9092 		udelay(1);
9093 	}
9094 
9095 	if (radeon_crtc->base.enabled && mode) {
9096 		switch (tmp) {
9097 		case 0:
9098 		default:
9099 			return 4096 * 2;
9100 		case 1:
9101 			return 1920 * 2;
9102 		case 2:
9103 			return 2560 * 2;
9104 		}
9105 	}
9106 
9107 	/* controller not enabled, so no lb used */
9108 	return 0;
9109 }
9110 
9111 /**
9112  * cik_get_number_of_dram_channels - get the number of dram channels
9113  *
9114  * @rdev: radeon_device pointer
9115  *
9116  * Look up the number of video ram channels (CIK).
9117  * Used for display watermark bandwidth calculations
9118  * Returns the number of dram channels
9119  */
9120 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9121 {
9122 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9123 
9124 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9125 	case 0:
9126 	default:
9127 		return 1;
9128 	case 1:
9129 		return 2;
9130 	case 2:
9131 		return 4;
9132 	case 3:
9133 		return 8;
9134 	case 4:
9135 		return 3;
9136 	case 5:
9137 		return 6;
9138 	case 6:
9139 		return 10;
9140 	case 7:
9141 		return 12;
9142 	case 8:
9143 		return 16;
9144 	}
9145 }
9146 
9147 struct dce8_wm_params {
9148 	u32 dram_channels; /* number of dram channels */
9149 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9150 	u32 sclk;          /* engine clock in kHz */
9151 	u32 disp_clk;      /* display clock in kHz */
9152 	u32 src_width;     /* viewport width */
9153 	u32 active_time;   /* active display time in ns */
9154 	u32 blank_time;    /* blank time in ns */
9155 	bool interlaced;    /* mode is interlaced */
9156 	fixed20_12 vsc;    /* vertical scale ratio */
9157 	u32 num_heads;     /* number of active crtcs */
9158 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9159 	u32 lb_size;       /* line buffer allocated to pipe */
9160 	u32 vtaps;         /* vertical scaler taps */
9161 };
9162 
9163 /**
9164  * dce8_dram_bandwidth - get the dram bandwidth
9165  *
9166  * @wm: watermark calculation data
9167  *
9168  * Calculate the raw dram bandwidth (CIK).
9169  * Used for display watermark bandwidth calculations
9170  * Returns the dram bandwidth in MBytes/s
9171  */
9172 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9173 {
9174 	/* Calculate raw DRAM Bandwidth */
9175 	fixed20_12 dram_efficiency; /* 0.7 */
9176 	fixed20_12 yclk, dram_channels, bandwidth;
9177 	fixed20_12 a;
9178 
9179 	a.full = dfixed_const(1000);
9180 	yclk.full = dfixed_const(wm->yclk);
9181 	yclk.full = dfixed_div(yclk, a);
9182 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9183 	a.full = dfixed_const(10);
9184 	dram_efficiency.full = dfixed_const(7);
9185 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9186 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9187 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9188 
9189 	return dfixed_trunc(bandwidth);
9190 }
9191 
9192 /**
9193  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9194  *
9195  * @wm: watermark calculation data
9196  *
9197  * Calculate the dram bandwidth used for display (CIK).
9198  * Used for display watermark bandwidth calculations
9199  * Returns the dram bandwidth for display in MBytes/s
9200  */
9201 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9202 {
9203 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9204 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9205 	fixed20_12 yclk, dram_channels, bandwidth;
9206 	fixed20_12 a;
9207 
9208 	a.full = dfixed_const(1000);
9209 	yclk.full = dfixed_const(wm->yclk);
9210 	yclk.full = dfixed_div(yclk, a);
9211 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9212 	a.full = dfixed_const(10);
9213 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9214 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9215 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9216 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9217 
9218 	return dfixed_trunc(bandwidth);
9219 }
9220 
9221 /**
9222  * dce8_data_return_bandwidth - get the data return bandwidth
9223  *
9224  * @wm: watermark calculation data
9225  *
9226  * Calculate the data return bandwidth used for display (CIK).
9227  * Used for display watermark bandwidth calculations
9228  * Returns the data return bandwidth in MBytes/s
9229  */
9230 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9231 {
9232 	/* Calculate the display Data return Bandwidth */
9233 	fixed20_12 return_efficiency; /* 0.8 */
9234 	fixed20_12 sclk, bandwidth;
9235 	fixed20_12 a;
9236 
9237 	a.full = dfixed_const(1000);
9238 	sclk.full = dfixed_const(wm->sclk);
9239 	sclk.full = dfixed_div(sclk, a);
9240 	a.full = dfixed_const(10);
9241 	return_efficiency.full = dfixed_const(8);
9242 	return_efficiency.full = dfixed_div(return_efficiency, a);
9243 	a.full = dfixed_const(32);
9244 	bandwidth.full = dfixed_mul(a, sclk);
9245 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9246 
9247 	return dfixed_trunc(bandwidth);
9248 }
9249 
9250 /**
9251  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9252  *
9253  * @wm: watermark calculation data
9254  *
9255  * Calculate the dmif bandwidth used for display (CIK).
9256  * Used for display watermark bandwidth calculations
9257  * Returns the dmif bandwidth in MBytes/s
9258  */
9259 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9260 {
9261 	/* Calculate the DMIF Request Bandwidth */
9262 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9263 	fixed20_12 disp_clk, bandwidth;
9264 	fixed20_12 a, b;
9265 
9266 	a.full = dfixed_const(1000);
9267 	disp_clk.full = dfixed_const(wm->disp_clk);
9268 	disp_clk.full = dfixed_div(disp_clk, a);
9269 	a.full = dfixed_const(32);
9270 	b.full = dfixed_mul(a, disp_clk);
9271 
9272 	a.full = dfixed_const(10);
9273 	disp_clk_request_efficiency.full = dfixed_const(8);
9274 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9275 
9276 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9277 
9278 	return dfixed_trunc(bandwidth);
9279 }
9280 
9281 /**
9282  * dce8_available_bandwidth - get the min available bandwidth
9283  *
9284  * @wm: watermark calculation data
9285  *
9286  * Calculate the min available bandwidth used for display (CIK).
9287  * Used for display watermark bandwidth calculations
9288  * Returns the min available bandwidth in MBytes/s
9289  */
9290 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9291 {
9292 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9293 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9294 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9295 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9296 
9297 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9298 }
9299 
9300 /**
9301  * dce8_average_bandwidth - get the average available bandwidth
9302  *
9303  * @wm: watermark calculation data
9304  *
9305  * Calculate the average available bandwidth used for display (CIK).
9306  * Used for display watermark bandwidth calculations
9307  * Returns the average available bandwidth in MBytes/s
9308  */
9309 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9310 {
9311 	/* Calculate the display mode Average Bandwidth
9312 	 * DisplayMode should contain the source and destination dimensions,
9313 	 * timing, etc.
9314 	 */
9315 	fixed20_12 bpp;
9316 	fixed20_12 line_time;
9317 	fixed20_12 src_width;
9318 	fixed20_12 bandwidth;
9319 	fixed20_12 a;
9320 
9321 	a.full = dfixed_const(1000);
9322 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9323 	line_time.full = dfixed_div(line_time, a);
9324 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9325 	src_width.full = dfixed_const(wm->src_width);
9326 	bandwidth.full = dfixed_mul(src_width, bpp);
9327 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9328 	bandwidth.full = dfixed_div(bandwidth, line_time);
9329 
9330 	return dfixed_trunc(bandwidth);
9331 }
9332 
9333 /**
9334  * dce8_latency_watermark - get the latency watermark
9335  *
9336  * @wm: watermark calculation data
9337  *
9338  * Calculate the latency watermark (CIK).
9339  * Used for display watermark bandwidth calculations
9340  * Returns the latency watermark in ns
9341  */
9342 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9343 {
9344 	/* First calculate the latency in ns */
9345 	u32 mc_latency = 2000; /* 2000 ns. */
9346 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9347 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9348 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9349 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9350 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9351 		(wm->num_heads * cursor_line_pair_return_time);
9352 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9353 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9354 	u32 tmp, dmif_size = 12288;
9355 	fixed20_12 a, b, c;
9356 
9357 	if (wm->num_heads == 0)
9358 		return 0;
9359 
9360 	a.full = dfixed_const(2);
9361 	b.full = dfixed_const(1);
9362 	if ((wm->vsc.full > a.full) ||
9363 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9364 	    (wm->vtaps >= 5) ||
9365 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9366 		max_src_lines_per_dst_line = 4;
9367 	else
9368 		max_src_lines_per_dst_line = 2;
9369 
9370 	a.full = dfixed_const(available_bandwidth);
9371 	b.full = dfixed_const(wm->num_heads);
9372 	a.full = dfixed_div(a, b);
9373 
9374 	b.full = dfixed_const(mc_latency + 512);
9375 	c.full = dfixed_const(wm->disp_clk);
9376 	b.full = dfixed_div(b, c);
9377 
9378 	c.full = dfixed_const(dmif_size);
9379 	b.full = dfixed_div(c, b);
9380 
9381 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9382 
9383 	b.full = dfixed_const(1000);
9384 	c.full = dfixed_const(wm->disp_clk);
9385 	b.full = dfixed_div(c, b);
9386 	c.full = dfixed_const(wm->bytes_per_pixel);
9387 	b.full = dfixed_mul(b, c);
9388 
9389 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9390 
9391 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9392 	b.full = dfixed_const(1000);
9393 	c.full = dfixed_const(lb_fill_bw);
9394 	b.full = dfixed_div(c, b);
9395 	a.full = dfixed_div(a, b);
9396 	line_fill_time = dfixed_trunc(a);
9397 
9398 	if (line_fill_time < wm->active_time)
9399 		return latency;
9400 	else
9401 		return latency + (line_fill_time - wm->active_time);
9402 
9403 }
9404 
9405 /**
9406  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9407  * average and available dram bandwidth
9408  *
9409  * @wm: watermark calculation data
9410  *
9411  * Check if the display average bandwidth fits in the display
9412  * dram bandwidth (CIK).
9413  * Used for display watermark bandwidth calculations
9414  * Returns true if the display fits, false if not.
9415  */
9416 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9417 {
9418 	if (dce8_average_bandwidth(wm) <=
9419 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9420 		return true;
9421 	else
9422 		return false;
9423 }
9424 
9425 /**
9426  * dce8_average_bandwidth_vs_available_bandwidth - check
9427  * average and available bandwidth
9428  *
9429  * @wm: watermark calculation data
9430  *
9431  * Check if the display average bandwidth fits in the display
9432  * available bandwidth (CIK).
9433  * Used for display watermark bandwidth calculations
9434  * Returns true if the display fits, false if not.
9435  */
9436 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9437 {
9438 	if (dce8_average_bandwidth(wm) <=
9439 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9440 		return true;
9441 	else
9442 		return false;
9443 }
9444 
9445 /**
9446  * dce8_check_latency_hiding - check latency hiding
9447  *
9448  * @wm: watermark calculation data
9449  *
9450  * Check latency hiding (CIK).
9451  * Used for display watermark bandwidth calculations
9452  * Returns true if the display fits, false if not.
9453  */
9454 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9455 {
9456 	u32 lb_partitions = wm->lb_size / wm->src_width;
9457 	u32 line_time = wm->active_time + wm->blank_time;
9458 	u32 latency_tolerant_lines;
9459 	u32 latency_hiding;
9460 	fixed20_12 a;
9461 
9462 	a.full = dfixed_const(1);
9463 	if (wm->vsc.full > a.full)
9464 		latency_tolerant_lines = 1;
9465 	else {
9466 		if (lb_partitions <= (wm->vtaps + 1))
9467 			latency_tolerant_lines = 1;
9468 		else
9469 			latency_tolerant_lines = 2;
9470 	}
9471 
9472 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9473 
9474 	if (dce8_latency_watermark(wm) <= latency_hiding)
9475 		return true;
9476 	else
9477 		return false;
9478 }
9479 
9480 /**
9481  * dce8_program_watermarks - program display watermarks
9482  *
9483  * @rdev: radeon_device pointer
9484  * @radeon_crtc: the selected display controller
9485  * @lb_size: line buffer size
9486  * @num_heads: number of display controllers in use
9487  *
9488  * Calculate and program the display watermarks for the
9489  * selected display controller (CIK).
9490  */
9491 static void dce8_program_watermarks(struct radeon_device *rdev,
9492 				    struct radeon_crtc *radeon_crtc,
9493 				    u32 lb_size, u32 num_heads)
9494 {
9495 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9496 	struct dce8_wm_params wm_low, wm_high;
9497 	u32 pixel_period;
9498 	u32 line_time = 0;
9499 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9500 	u32 tmp, wm_mask;
9501 
9502 	if (radeon_crtc->base.enabled && num_heads && mode) {
9503 		pixel_period = 1000000 / (u32)mode->clock;
9504 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9505 
9506 		/* watermark for high clocks */
9507 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9508 		    rdev->pm.dpm_enabled) {
9509 			wm_high.yclk =
9510 				radeon_dpm_get_mclk(rdev, false) * 10;
9511 			wm_high.sclk =
9512 				radeon_dpm_get_sclk(rdev, false) * 10;
9513 		} else {
9514 			wm_high.yclk = rdev->pm.current_mclk * 10;
9515 			wm_high.sclk = rdev->pm.current_sclk * 10;
9516 		}
9517 
9518 		wm_high.disp_clk = mode->clock;
9519 		wm_high.src_width = mode->crtc_hdisplay;
9520 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9521 		wm_high.blank_time = line_time - wm_high.active_time;
9522 		wm_high.interlaced = false;
9523 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9524 			wm_high.interlaced = true;
9525 		wm_high.vsc = radeon_crtc->vsc;
9526 		wm_high.vtaps = 1;
9527 		if (radeon_crtc->rmx_type != RMX_OFF)
9528 			wm_high.vtaps = 2;
9529 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9530 		wm_high.lb_size = lb_size;
9531 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9532 		wm_high.num_heads = num_heads;
9533 
9534 		/* set for high clocks */
9535 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9536 
9537 		/* possibly force display priority to high */
9538 		/* should really do this at mode validation time... */
9539 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9540 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9541 		    !dce8_check_latency_hiding(&wm_high) ||
9542 		    (rdev->disp_priority == 2)) {
9543 			DRM_DEBUG_KMS("force priority to high\n");
9544 		}
9545 
9546 		/* watermark for low clocks */
9547 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9548 		    rdev->pm.dpm_enabled) {
9549 			wm_low.yclk =
9550 				radeon_dpm_get_mclk(rdev, true) * 10;
9551 			wm_low.sclk =
9552 				radeon_dpm_get_sclk(rdev, true) * 10;
9553 		} else {
9554 			wm_low.yclk = rdev->pm.current_mclk * 10;
9555 			wm_low.sclk = rdev->pm.current_sclk * 10;
9556 		}
9557 
9558 		wm_low.disp_clk = mode->clock;
9559 		wm_low.src_width = mode->crtc_hdisplay;
9560 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9561 		wm_low.blank_time = line_time - wm_low.active_time;
9562 		wm_low.interlaced = false;
9563 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9564 			wm_low.interlaced = true;
9565 		wm_low.vsc = radeon_crtc->vsc;
9566 		wm_low.vtaps = 1;
9567 		if (radeon_crtc->rmx_type != RMX_OFF)
9568 			wm_low.vtaps = 2;
9569 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9570 		wm_low.lb_size = lb_size;
9571 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9572 		wm_low.num_heads = num_heads;
9573 
9574 		/* set for low clocks */
9575 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9576 
9577 		/* possibly force display priority to high */
9578 		/* should really do this at mode validation time... */
9579 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9580 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9581 		    !dce8_check_latency_hiding(&wm_low) ||
9582 		    (rdev->disp_priority == 2)) {
9583 			DRM_DEBUG_KMS("force priority to high\n");
9584 		}
9585 	}
9586 
9587 	/* select wm A */
9588 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9589 	tmp = wm_mask;
9590 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9591 	tmp |= LATENCY_WATERMARK_MASK(1);
9592 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9593 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9594 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9595 		LATENCY_HIGH_WATERMARK(line_time)));
9596 	/* select wm B */
9597 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9598 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9599 	tmp |= LATENCY_WATERMARK_MASK(2);
9600 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9601 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9602 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9603 		LATENCY_HIGH_WATERMARK(line_time)));
9604 	/* restore original selection */
9605 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9606 
9607 	/* save values for DPM */
9608 	radeon_crtc->line_time = line_time;
9609 	radeon_crtc->wm_high = latency_watermark_a;
9610 	radeon_crtc->wm_low = latency_watermark_b;
9611 }
9612 
9613 /**
9614  * dce8_bandwidth_update - program display watermarks
9615  *
9616  * @rdev: radeon_device pointer
9617  *
9618  * Calculate and program the display watermarks and line
9619  * buffer allocation (CIK).
9620  */
9621 void dce8_bandwidth_update(struct radeon_device *rdev)
9622 {
9623 	struct drm_display_mode *mode = NULL;
9624 	u32 num_heads = 0, lb_size;
9625 	int i;
9626 
9627 	if (!rdev->mode_info.mode_config_initialized)
9628 		return;
9629 
9630 	radeon_update_display_priority(rdev);
9631 
9632 	for (i = 0; i < rdev->num_crtc; i++) {
9633 		if (rdev->mode_info.crtcs[i]->base.enabled)
9634 			num_heads++;
9635 	}
9636 	for (i = 0; i < rdev->num_crtc; i++) {
9637 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9638 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9639 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9640 	}
9641 }
9642 
9643 /**
9644  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9645  *
9646  * @rdev: radeon_device pointer
9647  *
9648  * Fetches a GPU clock counter snapshot (SI).
9649  * Returns the 64 bit clock counter snapshot.
9650  */
9651 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9652 {
9653 	uint64_t clock;
9654 
9655 	mutex_lock(&rdev->gpu_clock_mutex);
9656 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9657 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9658 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9659 	mutex_unlock(&rdev->gpu_clock_mutex);
9660 	return clock;
9661 }
9662 
9663 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9664                               u32 cntl_reg, u32 status_reg)
9665 {
9666 	int r, i;
9667 	struct atom_clock_dividers dividers;
9668 	uint32_t tmp;
9669 
9670 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9671 					   clock, false, &dividers);
9672 	if (r)
9673 		return r;
9674 
9675 	tmp = RREG32_SMC(cntl_reg);
9676 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9677 	tmp |= dividers.post_divider;
9678 	WREG32_SMC(cntl_reg, tmp);
9679 
9680 	for (i = 0; i < 100; i++) {
9681 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9682 			break;
9683 		mdelay(10);
9684 	}
9685 	if (i == 100)
9686 		return -ETIMEDOUT;
9687 
9688 	return 0;
9689 }
9690 
9691 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9692 {
9693 	int r = 0;
9694 
9695 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9696 	if (r)
9697 		return r;
9698 
9699 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9700 	return r;
9701 }
9702 
9703 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9704 {
9705 	int r, i;
9706 	struct atom_clock_dividers dividers;
9707 	u32 tmp;
9708 
9709 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9710 					   ecclk, false, &dividers);
9711 	if (r)
9712 		return r;
9713 
9714 	for (i = 0; i < 100; i++) {
9715 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9716 			break;
9717 		mdelay(10);
9718 	}
9719 	if (i == 100)
9720 		return -ETIMEDOUT;
9721 
9722 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9723 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9724 	tmp |= dividers.post_divider;
9725 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9726 
9727 	for (i = 0; i < 100; i++) {
9728 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9729 			break;
9730 		mdelay(10);
9731 	}
9732 	if (i == 100)
9733 		return -ETIMEDOUT;
9734 
9735 	return 0;
9736 }
9737 
9738 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9739 {
9740 	struct pci_dev *root = rdev->pdev->bus->self;
9741 	int bridge_pos, gpu_pos;
9742 	u32 speed_cntl, mask, current_data_rate;
9743 	int ret, i;
9744 	u16 tmp16;
9745 
9746 	if (pci_is_root_bus(rdev->pdev->bus))
9747 		return;
9748 
9749 	if (radeon_pcie_gen2 == 0)
9750 		return;
9751 
9752 	if (rdev->flags & RADEON_IS_IGP)
9753 		return;
9754 
9755 	if (!(rdev->flags & RADEON_IS_PCIE))
9756 		return;
9757 
9758 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9759 	if (ret != 0)
9760 		return;
9761 
9762 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9763 		return;
9764 
9765 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9766 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9767 		LC_CURRENT_DATA_RATE_SHIFT;
9768 	if (mask & DRM_PCIE_SPEED_80) {
9769 		if (current_data_rate == 2) {
9770 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9771 			return;
9772 		}
9773 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9774 	} else if (mask & DRM_PCIE_SPEED_50) {
9775 		if (current_data_rate == 1) {
9776 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9777 			return;
9778 		}
9779 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9780 	}
9781 
9782 	bridge_pos = pci_pcie_cap(root);
9783 	if (!bridge_pos)
9784 		return;
9785 
9786 	gpu_pos = pci_pcie_cap(rdev->pdev);
9787 	if (!gpu_pos)
9788 		return;
9789 
9790 	if (mask & DRM_PCIE_SPEED_80) {
9791 		/* re-try equalization if gen3 is not already enabled */
9792 		if (current_data_rate != 2) {
9793 			u16 bridge_cfg, gpu_cfg;
9794 			u16 bridge_cfg2, gpu_cfg2;
9795 			u32 max_lw, current_lw, tmp;
9796 
9797 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9798 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9799 
9800 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9801 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9802 
9803 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9804 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9805 
9806 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9807 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9808 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9809 
9810 			if (current_lw < max_lw) {
9811 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9812 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9813 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9814 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9815 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9816 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9817 				}
9818 			}
9819 
9820 			for (i = 0; i < 10; i++) {
9821 				/* check status */
9822 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9823 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9824 					break;
9825 
9826 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9827 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9828 
9829 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9830 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9831 
9832 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9833 				tmp |= LC_SET_QUIESCE;
9834 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9835 
9836 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9837 				tmp |= LC_REDO_EQ;
9838 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9839 
9840 				mdelay(100);
9841 
9842 				/* linkctl */
9843 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9844 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9845 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9846 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9847 
9848 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9849 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9850 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9851 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9852 
9853 				/* linkctl2 */
9854 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9855 				tmp16 &= ~((1 << 4) | (7 << 9));
9856 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9857 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9858 
9859 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9860 				tmp16 &= ~((1 << 4) | (7 << 9));
9861 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9862 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9863 
9864 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9865 				tmp &= ~LC_SET_QUIESCE;
9866 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9867 			}
9868 		}
9869 	}
9870 
9871 	/* set the link speed */
9872 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9873 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9874 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9875 
9876 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9877 	tmp16 &= ~0xf;
9878 	if (mask & DRM_PCIE_SPEED_80)
9879 		tmp16 |= 3; /* gen3 */
9880 	else if (mask & DRM_PCIE_SPEED_50)
9881 		tmp16 |= 2; /* gen2 */
9882 	else
9883 		tmp16 |= 1; /* gen1 */
9884 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9885 
9886 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9887 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9888 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9889 
9890 	for (i = 0; i < rdev->usec_timeout; i++) {
9891 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9892 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9893 			break;
9894 		udelay(1);
9895 	}
9896 }
9897 
9898 static void cik_program_aspm(struct radeon_device *rdev)
9899 {
9900 	u32 data, orig;
9901 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9902 	bool disable_clkreq = false;
9903 
9904 	if (radeon_aspm == 0)
9905 		return;
9906 
9907 	/* XXX double check IGPs */
9908 	if (rdev->flags & RADEON_IS_IGP)
9909 		return;
9910 
9911 	if (!(rdev->flags & RADEON_IS_PCIE))
9912 		return;
9913 
9914 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9915 	data &= ~LC_XMIT_N_FTS_MASK;
9916 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9917 	if (orig != data)
9918 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9919 
9920 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9921 	data |= LC_GO_TO_RECOVERY;
9922 	if (orig != data)
9923 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9924 
9925 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9926 	data |= P_IGNORE_EDB_ERR;
9927 	if (orig != data)
9928 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9929 
9930 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9931 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9932 	data |= LC_PMI_TO_L1_DIS;
9933 	if (!disable_l0s)
9934 		data |= LC_L0S_INACTIVITY(7);
9935 
9936 	if (!disable_l1) {
9937 		data |= LC_L1_INACTIVITY(7);
9938 		data &= ~LC_PMI_TO_L1_DIS;
9939 		if (orig != data)
9940 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9941 
9942 		if (!disable_plloff_in_l1) {
9943 			bool clk_req_support;
9944 
9945 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9946 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9947 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9948 			if (orig != data)
9949 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9950 
9951 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9952 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9953 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9954 			if (orig != data)
9955 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9956 
9957 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9958 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9959 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9960 			if (orig != data)
9961 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9962 
9963 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9964 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9965 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9966 			if (orig != data)
9967 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9968 
9969 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9970 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9971 			data |= LC_DYN_LANES_PWR_STATE(3);
9972 			if (orig != data)
9973 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9974 
9975 			if (!disable_clkreq &&
9976 			    !pci_is_root_bus(rdev->pdev->bus)) {
9977 				struct pci_dev *root = rdev->pdev->bus->self;
9978 				u32 lnkcap;
9979 
9980 				clk_req_support = false;
9981 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9982 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9983 					clk_req_support = true;
9984 			} else {
9985 				clk_req_support = false;
9986 			}
9987 
9988 			if (clk_req_support) {
9989 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9990 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9991 				if (orig != data)
9992 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9993 
9994 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9995 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9996 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9997 				if (orig != data)
9998 					WREG32_SMC(THM_CLK_CNTL, data);
9999 
10000 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
10001 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
10002 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
10003 				if (orig != data)
10004 					WREG32_SMC(MISC_CLK_CTRL, data);
10005 
10006 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
10007 				data &= ~BCLK_AS_XCLK;
10008 				if (orig != data)
10009 					WREG32_SMC(CG_CLKPIN_CNTL, data);
10010 
10011 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
10012 				data &= ~FORCE_BIF_REFCLK_EN;
10013 				if (orig != data)
10014 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
10015 
10016 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
10017 				data &= ~MPLL_CLKOUT_SEL_MASK;
10018 				data |= MPLL_CLKOUT_SEL(4);
10019 				if (orig != data)
10020 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
10021 			}
10022 		}
10023 	} else {
10024 		if (orig != data)
10025 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10026 	}
10027 
10028 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
10029 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
10030 	if (orig != data)
10031 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
10032 
10033 	if (!disable_l0s) {
10034 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10035 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10036 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10037 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10038 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10039 				data &= ~LC_L0S_INACTIVITY_MASK;
10040 				if (orig != data)
10041 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10042 			}
10043 		}
10044 	}
10045 }
10046