xref: /linux/drivers/gpu/drm/radeon/cik.c (revision 3e44c471a2dab210f7e9b1e5f7d4d54d52df59eb)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 #include "radeon_ucode.h"
35 #include "clearstate_ci.h"
36 #include "radeon_kfd.h"
37 
38 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
45 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
46 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
47 
48 MODULE_FIRMWARE("radeon/bonaire_pfp.bin");
49 MODULE_FIRMWARE("radeon/bonaire_me.bin");
50 MODULE_FIRMWARE("radeon/bonaire_ce.bin");
51 MODULE_FIRMWARE("radeon/bonaire_mec.bin");
52 MODULE_FIRMWARE("radeon/bonaire_mc.bin");
53 MODULE_FIRMWARE("radeon/bonaire_rlc.bin");
54 MODULE_FIRMWARE("radeon/bonaire_sdma.bin");
55 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
56 
57 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
58 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
59 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
60 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
61 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
62 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
63 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
64 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
65 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
66 
67 MODULE_FIRMWARE("radeon/hawaii_pfp.bin");
68 MODULE_FIRMWARE("radeon/hawaii_me.bin");
69 MODULE_FIRMWARE("radeon/hawaii_ce.bin");
70 MODULE_FIRMWARE("radeon/hawaii_mec.bin");
71 MODULE_FIRMWARE("radeon/hawaii_mc.bin");
72 MODULE_FIRMWARE("radeon/hawaii_rlc.bin");
73 MODULE_FIRMWARE("radeon/hawaii_sdma.bin");
74 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
75 
76 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
77 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
78 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
79 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
80 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
81 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
82 
83 MODULE_FIRMWARE("radeon/kaveri_pfp.bin");
84 MODULE_FIRMWARE("radeon/kaveri_me.bin");
85 MODULE_FIRMWARE("radeon/kaveri_ce.bin");
86 MODULE_FIRMWARE("radeon/kaveri_mec.bin");
87 MODULE_FIRMWARE("radeon/kaveri_mec2.bin");
88 MODULE_FIRMWARE("radeon/kaveri_rlc.bin");
89 MODULE_FIRMWARE("radeon/kaveri_sdma.bin");
90 
91 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
92 MODULE_FIRMWARE("radeon/KABINI_me.bin");
93 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
94 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
95 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
96 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
97 
98 MODULE_FIRMWARE("radeon/kabini_pfp.bin");
99 MODULE_FIRMWARE("radeon/kabini_me.bin");
100 MODULE_FIRMWARE("radeon/kabini_ce.bin");
101 MODULE_FIRMWARE("radeon/kabini_mec.bin");
102 MODULE_FIRMWARE("radeon/kabini_rlc.bin");
103 MODULE_FIRMWARE("radeon/kabini_sdma.bin");
104 
105 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
106 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
107 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
108 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
109 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
110 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
111 
112 MODULE_FIRMWARE("radeon/mullins_pfp.bin");
113 MODULE_FIRMWARE("radeon/mullins_me.bin");
114 MODULE_FIRMWARE("radeon/mullins_ce.bin");
115 MODULE_FIRMWARE("radeon/mullins_mec.bin");
116 MODULE_FIRMWARE("radeon/mullins_rlc.bin");
117 MODULE_FIRMWARE("radeon/mullins_sdma.bin");
118 
119 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
120 extern void r600_ih_ring_fini(struct radeon_device *rdev);
121 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
122 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
123 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
127 extern void si_rlc_reset(struct radeon_device *rdev);
128 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
129 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
130 extern int cik_sdma_resume(struct radeon_device *rdev);
131 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
132 extern void cik_sdma_fini(struct radeon_device *rdev);
133 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
134 static void cik_rlc_stop(struct radeon_device *rdev);
135 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
136 static void cik_program_aspm(struct radeon_device *rdev);
137 static void cik_init_pg(struct radeon_device *rdev);
138 static void cik_init_cg(struct radeon_device *rdev);
139 static void cik_fini_pg(struct radeon_device *rdev);
140 static void cik_fini_cg(struct radeon_device *rdev);
141 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
142 					  bool enable);
143 
144 /**
145  * cik_get_allowed_info_register - fetch the register for the info ioctl
146  *
147  * @rdev: radeon_device pointer
148  * @reg: register offset in bytes
149  * @val: register value
150  *
151  * Returns 0 for success or -EINVAL for an invalid register
152  *
153  */
154 int cik_get_allowed_info_register(struct radeon_device *rdev,
155 				  u32 reg, u32 *val)
156 {
157 	switch (reg) {
158 	case GRBM_STATUS:
159 	case GRBM_STATUS2:
160 	case GRBM_STATUS_SE0:
161 	case GRBM_STATUS_SE1:
162 	case GRBM_STATUS_SE2:
163 	case GRBM_STATUS_SE3:
164 	case SRBM_STATUS:
165 	case SRBM_STATUS2:
166 	case (SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET):
167 	case (SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET):
168 	case UVD_STATUS:
169 	/* TODO VCE */
170 		*val = RREG32(reg);
171 		return 0;
172 	default:
173 		return -EINVAL;
174 	}
175 }
176 
177 /*
178  * Indirect registers accessor
179  */
180 u32 cik_didt_rreg(struct radeon_device *rdev, u32 reg)
181 {
182 	unsigned long flags;
183 	u32 r;
184 
185 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
186 	WREG32(CIK_DIDT_IND_INDEX, (reg));
187 	r = RREG32(CIK_DIDT_IND_DATA);
188 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
189 	return r;
190 }
191 
192 void cik_didt_wreg(struct radeon_device *rdev, u32 reg, u32 v)
193 {
194 	unsigned long flags;
195 
196 	spin_lock_irqsave(&rdev->didt_idx_lock, flags);
197 	WREG32(CIK_DIDT_IND_INDEX, (reg));
198 	WREG32(CIK_DIDT_IND_DATA, (v));
199 	spin_unlock_irqrestore(&rdev->didt_idx_lock, flags);
200 }
201 
202 /* get temperature in millidegrees */
203 int ci_get_temp(struct radeon_device *rdev)
204 {
205 	u32 temp;
206 	int actual_temp = 0;
207 
208 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
209 		CTF_TEMP_SHIFT;
210 
211 	if (temp & 0x200)
212 		actual_temp = 255;
213 	else
214 		actual_temp = temp & 0x1ff;
215 
216 	actual_temp = actual_temp * 1000;
217 
218 	return actual_temp;
219 }
220 
221 /* get temperature in millidegrees */
222 int kv_get_temp(struct radeon_device *rdev)
223 {
224 	u32 temp;
225 	int actual_temp = 0;
226 
227 	temp = RREG32_SMC(0xC0300E0C);
228 
229 	if (temp)
230 		actual_temp = (temp / 8) - 49;
231 	else
232 		actual_temp = 0;
233 
234 	actual_temp = actual_temp * 1000;
235 
236 	return actual_temp;
237 }
238 
239 /*
240  * Indirect registers accessor
241  */
242 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
243 {
244 	unsigned long flags;
245 	u32 r;
246 
247 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
248 	WREG32(PCIE_INDEX, reg);
249 	(void)RREG32(PCIE_INDEX);
250 	r = RREG32(PCIE_DATA);
251 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
252 	return r;
253 }
254 
255 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
256 {
257 	unsigned long flags;
258 
259 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
260 	WREG32(PCIE_INDEX, reg);
261 	(void)RREG32(PCIE_INDEX);
262 	WREG32(PCIE_DATA, v);
263 	(void)RREG32(PCIE_DATA);
264 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
265 }
266 
267 static const u32 spectre_rlc_save_restore_register_list[] =
268 {
269 	(0x0e00 << 16) | (0xc12c >> 2),
270 	0x00000000,
271 	(0x0e00 << 16) | (0xc140 >> 2),
272 	0x00000000,
273 	(0x0e00 << 16) | (0xc150 >> 2),
274 	0x00000000,
275 	(0x0e00 << 16) | (0xc15c >> 2),
276 	0x00000000,
277 	(0x0e00 << 16) | (0xc168 >> 2),
278 	0x00000000,
279 	(0x0e00 << 16) | (0xc170 >> 2),
280 	0x00000000,
281 	(0x0e00 << 16) | (0xc178 >> 2),
282 	0x00000000,
283 	(0x0e00 << 16) | (0xc204 >> 2),
284 	0x00000000,
285 	(0x0e00 << 16) | (0xc2b4 >> 2),
286 	0x00000000,
287 	(0x0e00 << 16) | (0xc2b8 >> 2),
288 	0x00000000,
289 	(0x0e00 << 16) | (0xc2bc >> 2),
290 	0x00000000,
291 	(0x0e00 << 16) | (0xc2c0 >> 2),
292 	0x00000000,
293 	(0x0e00 << 16) | (0x8228 >> 2),
294 	0x00000000,
295 	(0x0e00 << 16) | (0x829c >> 2),
296 	0x00000000,
297 	(0x0e00 << 16) | (0x869c >> 2),
298 	0x00000000,
299 	(0x0600 << 16) | (0x98f4 >> 2),
300 	0x00000000,
301 	(0x0e00 << 16) | (0x98f8 >> 2),
302 	0x00000000,
303 	(0x0e00 << 16) | (0x9900 >> 2),
304 	0x00000000,
305 	(0x0e00 << 16) | (0xc260 >> 2),
306 	0x00000000,
307 	(0x0e00 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x0e00 << 16) | (0x3c000 >> 2),
310 	0x00000000,
311 	(0x0e00 << 16) | (0x3c00c >> 2),
312 	0x00000000,
313 	(0x0e00 << 16) | (0x8c1c >> 2),
314 	0x00000000,
315 	(0x0e00 << 16) | (0x9700 >> 2),
316 	0x00000000,
317 	(0x0e00 << 16) | (0xcd20 >> 2),
318 	0x00000000,
319 	(0x4e00 << 16) | (0xcd20 >> 2),
320 	0x00000000,
321 	(0x5e00 << 16) | (0xcd20 >> 2),
322 	0x00000000,
323 	(0x6e00 << 16) | (0xcd20 >> 2),
324 	0x00000000,
325 	(0x7e00 << 16) | (0xcd20 >> 2),
326 	0x00000000,
327 	(0x8e00 << 16) | (0xcd20 >> 2),
328 	0x00000000,
329 	(0x9e00 << 16) | (0xcd20 >> 2),
330 	0x00000000,
331 	(0xae00 << 16) | (0xcd20 >> 2),
332 	0x00000000,
333 	(0xbe00 << 16) | (0xcd20 >> 2),
334 	0x00000000,
335 	(0x0e00 << 16) | (0x89bc >> 2),
336 	0x00000000,
337 	(0x0e00 << 16) | (0x8900 >> 2),
338 	0x00000000,
339 	0x3,
340 	(0x0e00 << 16) | (0xc130 >> 2),
341 	0x00000000,
342 	(0x0e00 << 16) | (0xc134 >> 2),
343 	0x00000000,
344 	(0x0e00 << 16) | (0xc1fc >> 2),
345 	0x00000000,
346 	(0x0e00 << 16) | (0xc208 >> 2),
347 	0x00000000,
348 	(0x0e00 << 16) | (0xc264 >> 2),
349 	0x00000000,
350 	(0x0e00 << 16) | (0xc268 >> 2),
351 	0x00000000,
352 	(0x0e00 << 16) | (0xc26c >> 2),
353 	0x00000000,
354 	(0x0e00 << 16) | (0xc270 >> 2),
355 	0x00000000,
356 	(0x0e00 << 16) | (0xc274 >> 2),
357 	0x00000000,
358 	(0x0e00 << 16) | (0xc278 >> 2),
359 	0x00000000,
360 	(0x0e00 << 16) | (0xc27c >> 2),
361 	0x00000000,
362 	(0x0e00 << 16) | (0xc280 >> 2),
363 	0x00000000,
364 	(0x0e00 << 16) | (0xc284 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc288 >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0xc28c >> 2),
369 	0x00000000,
370 	(0x0e00 << 16) | (0xc290 >> 2),
371 	0x00000000,
372 	(0x0e00 << 16) | (0xc294 >> 2),
373 	0x00000000,
374 	(0x0e00 << 16) | (0xc298 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc29c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0xc2a0 >> 2),
379 	0x00000000,
380 	(0x0e00 << 16) | (0xc2a4 >> 2),
381 	0x00000000,
382 	(0x0e00 << 16) | (0xc2a8 >> 2),
383 	0x00000000,
384 	(0x0e00 << 16) | (0xc2ac  >> 2),
385 	0x00000000,
386 	(0x0e00 << 16) | (0xc2b0 >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x301d0 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x30238 >> 2),
391 	0x00000000,
392 	(0x0e00 << 16) | (0x30250 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x30254 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x30258 >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x3025c >> 2),
399 	0x00000000,
400 	(0x4e00 << 16) | (0xc900 >> 2),
401 	0x00000000,
402 	(0x5e00 << 16) | (0xc900 >> 2),
403 	0x00000000,
404 	(0x6e00 << 16) | (0xc900 >> 2),
405 	0x00000000,
406 	(0x7e00 << 16) | (0xc900 >> 2),
407 	0x00000000,
408 	(0x8e00 << 16) | (0xc900 >> 2),
409 	0x00000000,
410 	(0x9e00 << 16) | (0xc900 >> 2),
411 	0x00000000,
412 	(0xae00 << 16) | (0xc900 >> 2),
413 	0x00000000,
414 	(0xbe00 << 16) | (0xc900 >> 2),
415 	0x00000000,
416 	(0x4e00 << 16) | (0xc904 >> 2),
417 	0x00000000,
418 	(0x5e00 << 16) | (0xc904 >> 2),
419 	0x00000000,
420 	(0x6e00 << 16) | (0xc904 >> 2),
421 	0x00000000,
422 	(0x7e00 << 16) | (0xc904 >> 2),
423 	0x00000000,
424 	(0x8e00 << 16) | (0xc904 >> 2),
425 	0x00000000,
426 	(0x9e00 << 16) | (0xc904 >> 2),
427 	0x00000000,
428 	(0xae00 << 16) | (0xc904 >> 2),
429 	0x00000000,
430 	(0xbe00 << 16) | (0xc904 >> 2),
431 	0x00000000,
432 	(0x4e00 << 16) | (0xc908 >> 2),
433 	0x00000000,
434 	(0x5e00 << 16) | (0xc908 >> 2),
435 	0x00000000,
436 	(0x6e00 << 16) | (0xc908 >> 2),
437 	0x00000000,
438 	(0x7e00 << 16) | (0xc908 >> 2),
439 	0x00000000,
440 	(0x8e00 << 16) | (0xc908 >> 2),
441 	0x00000000,
442 	(0x9e00 << 16) | (0xc908 >> 2),
443 	0x00000000,
444 	(0xae00 << 16) | (0xc908 >> 2),
445 	0x00000000,
446 	(0xbe00 << 16) | (0xc908 >> 2),
447 	0x00000000,
448 	(0x4e00 << 16) | (0xc90c >> 2),
449 	0x00000000,
450 	(0x5e00 << 16) | (0xc90c >> 2),
451 	0x00000000,
452 	(0x6e00 << 16) | (0xc90c >> 2),
453 	0x00000000,
454 	(0x7e00 << 16) | (0xc90c >> 2),
455 	0x00000000,
456 	(0x8e00 << 16) | (0xc90c >> 2),
457 	0x00000000,
458 	(0x9e00 << 16) | (0xc90c >> 2),
459 	0x00000000,
460 	(0xae00 << 16) | (0xc90c >> 2),
461 	0x00000000,
462 	(0xbe00 << 16) | (0xc90c >> 2),
463 	0x00000000,
464 	(0x4e00 << 16) | (0xc910 >> 2),
465 	0x00000000,
466 	(0x5e00 << 16) | (0xc910 >> 2),
467 	0x00000000,
468 	(0x6e00 << 16) | (0xc910 >> 2),
469 	0x00000000,
470 	(0x7e00 << 16) | (0xc910 >> 2),
471 	0x00000000,
472 	(0x8e00 << 16) | (0xc910 >> 2),
473 	0x00000000,
474 	(0x9e00 << 16) | (0xc910 >> 2),
475 	0x00000000,
476 	(0xae00 << 16) | (0xc910 >> 2),
477 	0x00000000,
478 	(0xbe00 << 16) | (0xc910 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0xc99c >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x9834 >> 2),
483 	0x00000000,
484 	(0x0000 << 16) | (0x30f00 >> 2),
485 	0x00000000,
486 	(0x0001 << 16) | (0x30f00 >> 2),
487 	0x00000000,
488 	(0x0000 << 16) | (0x30f04 >> 2),
489 	0x00000000,
490 	(0x0001 << 16) | (0x30f04 >> 2),
491 	0x00000000,
492 	(0x0000 << 16) | (0x30f08 >> 2),
493 	0x00000000,
494 	(0x0001 << 16) | (0x30f08 >> 2),
495 	0x00000000,
496 	(0x0000 << 16) | (0x30f0c >> 2),
497 	0x00000000,
498 	(0x0001 << 16) | (0x30f0c >> 2),
499 	0x00000000,
500 	(0x0600 << 16) | (0x9b7c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0x8a14 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0x8a18 >> 2),
505 	0x00000000,
506 	(0x0600 << 16) | (0x30a00 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0x8bf0 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0x8bcc >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0x8b24 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0x30a04 >> 2),
515 	0x00000000,
516 	(0x0600 << 16) | (0x30a10 >> 2),
517 	0x00000000,
518 	(0x0600 << 16) | (0x30a14 >> 2),
519 	0x00000000,
520 	(0x0600 << 16) | (0x30a18 >> 2),
521 	0x00000000,
522 	(0x0600 << 16) | (0x30a2c >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xc700 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xc704 >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0xc708 >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0xc768 >> 2),
531 	0x00000000,
532 	(0x0400 << 16) | (0xc770 >> 2),
533 	0x00000000,
534 	(0x0400 << 16) | (0xc774 >> 2),
535 	0x00000000,
536 	(0x0400 << 16) | (0xc778 >> 2),
537 	0x00000000,
538 	(0x0400 << 16) | (0xc77c >> 2),
539 	0x00000000,
540 	(0x0400 << 16) | (0xc780 >> 2),
541 	0x00000000,
542 	(0x0400 << 16) | (0xc784 >> 2),
543 	0x00000000,
544 	(0x0400 << 16) | (0xc788 >> 2),
545 	0x00000000,
546 	(0x0400 << 16) | (0xc78c >> 2),
547 	0x00000000,
548 	(0x0400 << 16) | (0xc798 >> 2),
549 	0x00000000,
550 	(0x0400 << 16) | (0xc79c >> 2),
551 	0x00000000,
552 	(0x0400 << 16) | (0xc7a0 >> 2),
553 	0x00000000,
554 	(0x0400 << 16) | (0xc7a4 >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0xc7a8 >> 2),
557 	0x00000000,
558 	(0x0400 << 16) | (0xc7ac >> 2),
559 	0x00000000,
560 	(0x0400 << 16) | (0xc7b0 >> 2),
561 	0x00000000,
562 	(0x0400 << 16) | (0xc7b4 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x9100 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x3c010 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x92a8 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x92ac >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x92b4 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x92b8 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x92bc >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x92c0 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x92c4 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x92c8 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x92cc >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x92d0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x8c00 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8c04 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x8c20 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x8c38 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x8c3c >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0xae00 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x9604 >> 2),
601 	0x00000000,
602 	(0x0e00 << 16) | (0xac08 >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xac0c >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xac10 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xac14 >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xac58 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xac68 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xac6c >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xac70 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xac74 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xac78 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xac7c >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xac80 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xac84 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xac88 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xac8c >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x970c >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9714 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x9718 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x971c >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x31068 >> 2),
641 	0x00000000,
642 	(0x4e00 << 16) | (0x31068 >> 2),
643 	0x00000000,
644 	(0x5e00 << 16) | (0x31068 >> 2),
645 	0x00000000,
646 	(0x6e00 << 16) | (0x31068 >> 2),
647 	0x00000000,
648 	(0x7e00 << 16) | (0x31068 >> 2),
649 	0x00000000,
650 	(0x8e00 << 16) | (0x31068 >> 2),
651 	0x00000000,
652 	(0x9e00 << 16) | (0x31068 >> 2),
653 	0x00000000,
654 	(0xae00 << 16) | (0x31068 >> 2),
655 	0x00000000,
656 	(0xbe00 << 16) | (0x31068 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd10 >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0xcd14 >> 2),
661 	0x00000000,
662 	(0x0e00 << 16) | (0x88b0 >> 2),
663 	0x00000000,
664 	(0x0e00 << 16) | (0x88b4 >> 2),
665 	0x00000000,
666 	(0x0e00 << 16) | (0x88b8 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x88bc >> 2),
669 	0x00000000,
670 	(0x0400 << 16) | (0x89c0 >> 2),
671 	0x00000000,
672 	(0x0e00 << 16) | (0x88c4 >> 2),
673 	0x00000000,
674 	(0x0e00 << 16) | (0x88c8 >> 2),
675 	0x00000000,
676 	(0x0e00 << 16) | (0x88d0 >> 2),
677 	0x00000000,
678 	(0x0e00 << 16) | (0x88d4 >> 2),
679 	0x00000000,
680 	(0x0e00 << 16) | (0x88d8 >> 2),
681 	0x00000000,
682 	(0x0e00 << 16) | (0x8980 >> 2),
683 	0x00000000,
684 	(0x0e00 << 16) | (0x30938 >> 2),
685 	0x00000000,
686 	(0x0e00 << 16) | (0x3093c >> 2),
687 	0x00000000,
688 	(0x0e00 << 16) | (0x30940 >> 2),
689 	0x00000000,
690 	(0x0e00 << 16) | (0x89a0 >> 2),
691 	0x00000000,
692 	(0x0e00 << 16) | (0x30900 >> 2),
693 	0x00000000,
694 	(0x0e00 << 16) | (0x30904 >> 2),
695 	0x00000000,
696 	(0x0e00 << 16) | (0x89b4 >> 2),
697 	0x00000000,
698 	(0x0e00 << 16) | (0x3c210 >> 2),
699 	0x00000000,
700 	(0x0e00 << 16) | (0x3c214 >> 2),
701 	0x00000000,
702 	(0x0e00 << 16) | (0x3c218 >> 2),
703 	0x00000000,
704 	(0x0e00 << 16) | (0x8904 >> 2),
705 	0x00000000,
706 	0x5,
707 	(0x0e00 << 16) | (0x8c28 >> 2),
708 	(0x0e00 << 16) | (0x8c2c >> 2),
709 	(0x0e00 << 16) | (0x8c30 >> 2),
710 	(0x0e00 << 16) | (0x8c34 >> 2),
711 	(0x0e00 << 16) | (0x9600 >> 2),
712 };
713 
714 static const u32 kalindi_rlc_save_restore_register_list[] =
715 {
716 	(0x0e00 << 16) | (0xc12c >> 2),
717 	0x00000000,
718 	(0x0e00 << 16) | (0xc140 >> 2),
719 	0x00000000,
720 	(0x0e00 << 16) | (0xc150 >> 2),
721 	0x00000000,
722 	(0x0e00 << 16) | (0xc15c >> 2),
723 	0x00000000,
724 	(0x0e00 << 16) | (0xc168 >> 2),
725 	0x00000000,
726 	(0x0e00 << 16) | (0xc170 >> 2),
727 	0x00000000,
728 	(0x0e00 << 16) | (0xc204 >> 2),
729 	0x00000000,
730 	(0x0e00 << 16) | (0xc2b4 >> 2),
731 	0x00000000,
732 	(0x0e00 << 16) | (0xc2b8 >> 2),
733 	0x00000000,
734 	(0x0e00 << 16) | (0xc2bc >> 2),
735 	0x00000000,
736 	(0x0e00 << 16) | (0xc2c0 >> 2),
737 	0x00000000,
738 	(0x0e00 << 16) | (0x8228 >> 2),
739 	0x00000000,
740 	(0x0e00 << 16) | (0x829c >> 2),
741 	0x00000000,
742 	(0x0e00 << 16) | (0x869c >> 2),
743 	0x00000000,
744 	(0x0600 << 16) | (0x98f4 >> 2),
745 	0x00000000,
746 	(0x0e00 << 16) | (0x98f8 >> 2),
747 	0x00000000,
748 	(0x0e00 << 16) | (0x9900 >> 2),
749 	0x00000000,
750 	(0x0e00 << 16) | (0xc260 >> 2),
751 	0x00000000,
752 	(0x0e00 << 16) | (0x90e8 >> 2),
753 	0x00000000,
754 	(0x0e00 << 16) | (0x3c000 >> 2),
755 	0x00000000,
756 	(0x0e00 << 16) | (0x3c00c >> 2),
757 	0x00000000,
758 	(0x0e00 << 16) | (0x8c1c >> 2),
759 	0x00000000,
760 	(0x0e00 << 16) | (0x9700 >> 2),
761 	0x00000000,
762 	(0x0e00 << 16) | (0xcd20 >> 2),
763 	0x00000000,
764 	(0x4e00 << 16) | (0xcd20 >> 2),
765 	0x00000000,
766 	(0x5e00 << 16) | (0xcd20 >> 2),
767 	0x00000000,
768 	(0x6e00 << 16) | (0xcd20 >> 2),
769 	0x00000000,
770 	(0x7e00 << 16) | (0xcd20 >> 2),
771 	0x00000000,
772 	(0x0e00 << 16) | (0x89bc >> 2),
773 	0x00000000,
774 	(0x0e00 << 16) | (0x8900 >> 2),
775 	0x00000000,
776 	0x3,
777 	(0x0e00 << 16) | (0xc130 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0xc134 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0xc1fc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0xc208 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc264 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc268 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc26c >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc270 >> 2),
792 	0x00000000,
793 	(0x0e00 << 16) | (0xc274 >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc28c >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc290 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc294 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc298 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0xc2a0 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0xc2a4 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0xc2a8 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0xc2ac >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x301d0 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x30238 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x30250 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x30254 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x30258 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x3025c >> 2),
822 	0x00000000,
823 	(0x4e00 << 16) | (0xc900 >> 2),
824 	0x00000000,
825 	(0x5e00 << 16) | (0xc900 >> 2),
826 	0x00000000,
827 	(0x6e00 << 16) | (0xc900 >> 2),
828 	0x00000000,
829 	(0x7e00 << 16) | (0xc900 >> 2),
830 	0x00000000,
831 	(0x4e00 << 16) | (0xc904 >> 2),
832 	0x00000000,
833 	(0x5e00 << 16) | (0xc904 >> 2),
834 	0x00000000,
835 	(0x6e00 << 16) | (0xc904 >> 2),
836 	0x00000000,
837 	(0x7e00 << 16) | (0xc904 >> 2),
838 	0x00000000,
839 	(0x4e00 << 16) | (0xc908 >> 2),
840 	0x00000000,
841 	(0x5e00 << 16) | (0xc908 >> 2),
842 	0x00000000,
843 	(0x6e00 << 16) | (0xc908 >> 2),
844 	0x00000000,
845 	(0x7e00 << 16) | (0xc908 >> 2),
846 	0x00000000,
847 	(0x4e00 << 16) | (0xc90c >> 2),
848 	0x00000000,
849 	(0x5e00 << 16) | (0xc90c >> 2),
850 	0x00000000,
851 	(0x6e00 << 16) | (0xc90c >> 2),
852 	0x00000000,
853 	(0x7e00 << 16) | (0xc90c >> 2),
854 	0x00000000,
855 	(0x4e00 << 16) | (0xc910 >> 2),
856 	0x00000000,
857 	(0x5e00 << 16) | (0xc910 >> 2),
858 	0x00000000,
859 	(0x6e00 << 16) | (0xc910 >> 2),
860 	0x00000000,
861 	(0x7e00 << 16) | (0xc910 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0xc99c >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x9834 >> 2),
866 	0x00000000,
867 	(0x0000 << 16) | (0x30f00 >> 2),
868 	0x00000000,
869 	(0x0000 << 16) | (0x30f04 >> 2),
870 	0x00000000,
871 	(0x0000 << 16) | (0x30f08 >> 2),
872 	0x00000000,
873 	(0x0000 << 16) | (0x30f0c >> 2),
874 	0x00000000,
875 	(0x0600 << 16) | (0x9b7c >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x8a14 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0x8a18 >> 2),
880 	0x00000000,
881 	(0x0600 << 16) | (0x30a00 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x8bf0 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x8bcc >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x8b24 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x30a04 >> 2),
890 	0x00000000,
891 	(0x0600 << 16) | (0x30a10 >> 2),
892 	0x00000000,
893 	(0x0600 << 16) | (0x30a14 >> 2),
894 	0x00000000,
895 	(0x0600 << 16) | (0x30a18 >> 2),
896 	0x00000000,
897 	(0x0600 << 16) | (0x30a2c >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0xc700 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0xc704 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0xc708 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0xc768 >> 2),
906 	0x00000000,
907 	(0x0400 << 16) | (0xc770 >> 2),
908 	0x00000000,
909 	(0x0400 << 16) | (0xc774 >> 2),
910 	0x00000000,
911 	(0x0400 << 16) | (0xc798 >> 2),
912 	0x00000000,
913 	(0x0400 << 16) | (0xc79c >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x9100 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3c010 >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x8c00 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x8c04 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x8c20 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8c38 >> 2),
926 	0x00000000,
927 	(0x0e00 << 16) | (0x8c3c >> 2),
928 	0x00000000,
929 	(0x0e00 << 16) | (0xae00 >> 2),
930 	0x00000000,
931 	(0x0e00 << 16) | (0x9604 >> 2),
932 	0x00000000,
933 	(0x0e00 << 16) | (0xac08 >> 2),
934 	0x00000000,
935 	(0x0e00 << 16) | (0xac0c >> 2),
936 	0x00000000,
937 	(0x0e00 << 16) | (0xac10 >> 2),
938 	0x00000000,
939 	(0x0e00 << 16) | (0xac14 >> 2),
940 	0x00000000,
941 	(0x0e00 << 16) | (0xac58 >> 2),
942 	0x00000000,
943 	(0x0e00 << 16) | (0xac68 >> 2),
944 	0x00000000,
945 	(0x0e00 << 16) | (0xac6c >> 2),
946 	0x00000000,
947 	(0x0e00 << 16) | (0xac70 >> 2),
948 	0x00000000,
949 	(0x0e00 << 16) | (0xac74 >> 2),
950 	0x00000000,
951 	(0x0e00 << 16) | (0xac78 >> 2),
952 	0x00000000,
953 	(0x0e00 << 16) | (0xac7c >> 2),
954 	0x00000000,
955 	(0x0e00 << 16) | (0xac80 >> 2),
956 	0x00000000,
957 	(0x0e00 << 16) | (0xac84 >> 2),
958 	0x00000000,
959 	(0x0e00 << 16) | (0xac88 >> 2),
960 	0x00000000,
961 	(0x0e00 << 16) | (0xac8c >> 2),
962 	0x00000000,
963 	(0x0e00 << 16) | (0x970c >> 2),
964 	0x00000000,
965 	(0x0e00 << 16) | (0x9714 >> 2),
966 	0x00000000,
967 	(0x0e00 << 16) | (0x9718 >> 2),
968 	0x00000000,
969 	(0x0e00 << 16) | (0x971c >> 2),
970 	0x00000000,
971 	(0x0e00 << 16) | (0x31068 >> 2),
972 	0x00000000,
973 	(0x4e00 << 16) | (0x31068 >> 2),
974 	0x00000000,
975 	(0x5e00 << 16) | (0x31068 >> 2),
976 	0x00000000,
977 	(0x6e00 << 16) | (0x31068 >> 2),
978 	0x00000000,
979 	(0x7e00 << 16) | (0x31068 >> 2),
980 	0x00000000,
981 	(0x0e00 << 16) | (0xcd10 >> 2),
982 	0x00000000,
983 	(0x0e00 << 16) | (0xcd14 >> 2),
984 	0x00000000,
985 	(0x0e00 << 16) | (0x88b0 >> 2),
986 	0x00000000,
987 	(0x0e00 << 16) | (0x88b4 >> 2),
988 	0x00000000,
989 	(0x0e00 << 16) | (0x88b8 >> 2),
990 	0x00000000,
991 	(0x0e00 << 16) | (0x88bc >> 2),
992 	0x00000000,
993 	(0x0400 << 16) | (0x89c0 >> 2),
994 	0x00000000,
995 	(0x0e00 << 16) | (0x88c4 >> 2),
996 	0x00000000,
997 	(0x0e00 << 16) | (0x88c8 >> 2),
998 	0x00000000,
999 	(0x0e00 << 16) | (0x88d0 >> 2),
1000 	0x00000000,
1001 	(0x0e00 << 16) | (0x88d4 >> 2),
1002 	0x00000000,
1003 	(0x0e00 << 16) | (0x88d8 >> 2),
1004 	0x00000000,
1005 	(0x0e00 << 16) | (0x8980 >> 2),
1006 	0x00000000,
1007 	(0x0e00 << 16) | (0x30938 >> 2),
1008 	0x00000000,
1009 	(0x0e00 << 16) | (0x3093c >> 2),
1010 	0x00000000,
1011 	(0x0e00 << 16) | (0x30940 >> 2),
1012 	0x00000000,
1013 	(0x0e00 << 16) | (0x89a0 >> 2),
1014 	0x00000000,
1015 	(0x0e00 << 16) | (0x30900 >> 2),
1016 	0x00000000,
1017 	(0x0e00 << 16) | (0x30904 >> 2),
1018 	0x00000000,
1019 	(0x0e00 << 16) | (0x89b4 >> 2),
1020 	0x00000000,
1021 	(0x0e00 << 16) | (0x3e1fc >> 2),
1022 	0x00000000,
1023 	(0x0e00 << 16) | (0x3c210 >> 2),
1024 	0x00000000,
1025 	(0x0e00 << 16) | (0x3c214 >> 2),
1026 	0x00000000,
1027 	(0x0e00 << 16) | (0x3c218 >> 2),
1028 	0x00000000,
1029 	(0x0e00 << 16) | (0x8904 >> 2),
1030 	0x00000000,
1031 	0x5,
1032 	(0x0e00 << 16) | (0x8c28 >> 2),
1033 	(0x0e00 << 16) | (0x8c2c >> 2),
1034 	(0x0e00 << 16) | (0x8c30 >> 2),
1035 	(0x0e00 << 16) | (0x8c34 >> 2),
1036 	(0x0e00 << 16) | (0x9600 >> 2),
1037 };
1038 
1039 static const u32 bonaire_golden_spm_registers[] =
1040 {
1041 	0x30800, 0xe0ffffff, 0xe0000000
1042 };
1043 
1044 static const u32 bonaire_golden_common_registers[] =
1045 {
1046 	0xc770, 0xffffffff, 0x00000800,
1047 	0xc774, 0xffffffff, 0x00000800,
1048 	0xc798, 0xffffffff, 0x00007fbf,
1049 	0xc79c, 0xffffffff, 0x00007faf
1050 };
1051 
1052 static const u32 bonaire_golden_registers[] =
1053 {
1054 	0x3354, 0x00000333, 0x00000333,
1055 	0x3350, 0x000c0fc0, 0x00040200,
1056 	0x9a10, 0x00010000, 0x00058208,
1057 	0x3c000, 0xffff1fff, 0x00140000,
1058 	0x3c200, 0xfdfc0fff, 0x00000100,
1059 	0x3c234, 0x40000000, 0x40000200,
1060 	0x9830, 0xffffffff, 0x00000000,
1061 	0x9834, 0xf00fffff, 0x00000400,
1062 	0x9838, 0x0002021c, 0x00020200,
1063 	0xc78, 0x00000080, 0x00000000,
1064 	0x5bb0, 0x000000f0, 0x00000070,
1065 	0x5bc0, 0xf0311fff, 0x80300000,
1066 	0x98f8, 0x73773777, 0x12010001,
1067 	0x350c, 0x00810000, 0x408af000,
1068 	0x7030, 0x31000111, 0x00000011,
1069 	0x2f48, 0x73773777, 0x12010001,
1070 	0x220c, 0x00007fb6, 0x0021a1b1,
1071 	0x2210, 0x00007fb6, 0x002021b1,
1072 	0x2180, 0x00007fb6, 0x00002191,
1073 	0x2218, 0x00007fb6, 0x002121b1,
1074 	0x221c, 0x00007fb6, 0x002021b1,
1075 	0x21dc, 0x00007fb6, 0x00002191,
1076 	0x21e0, 0x00007fb6, 0x00002191,
1077 	0x3628, 0x0000003f, 0x0000000a,
1078 	0x362c, 0x0000003f, 0x0000000a,
1079 	0x2ae4, 0x00073ffe, 0x000022a2,
1080 	0x240c, 0x000007ff, 0x00000000,
1081 	0x8a14, 0xf000003f, 0x00000007,
1082 	0x8bf0, 0x00002001, 0x00000001,
1083 	0x8b24, 0xffffffff, 0x00ffffff,
1084 	0x30a04, 0x0000ff0f, 0x00000000,
1085 	0x28a4c, 0x07ffffff, 0x06000000,
1086 	0x4d8, 0x00000fff, 0x00000100,
1087 	0x3e78, 0x00000001, 0x00000002,
1088 	0x9100, 0x03000000, 0x0362c688,
1089 	0x8c00, 0x000000ff, 0x00000001,
1090 	0xe40, 0x00001fff, 0x00001fff,
1091 	0x9060, 0x0000007f, 0x00000020,
1092 	0x9508, 0x00010000, 0x00010000,
1093 	0xac14, 0x000003ff, 0x000000f3,
1094 	0xac0c, 0xffffffff, 0x00001032
1095 };
1096 
1097 static const u32 bonaire_mgcg_cgcg_init[] =
1098 {
1099 	0xc420, 0xffffffff, 0xfffffffc,
1100 	0x30800, 0xffffffff, 0xe0000000,
1101 	0x3c2a0, 0xffffffff, 0x00000100,
1102 	0x3c208, 0xffffffff, 0x00000100,
1103 	0x3c2c0, 0xffffffff, 0xc0000100,
1104 	0x3c2c8, 0xffffffff, 0xc0000100,
1105 	0x3c2c4, 0xffffffff, 0xc0000100,
1106 	0x55e4, 0xffffffff, 0x00600100,
1107 	0x3c280, 0xffffffff, 0x00000100,
1108 	0x3c214, 0xffffffff, 0x06000100,
1109 	0x3c220, 0xffffffff, 0x00000100,
1110 	0x3c218, 0xffffffff, 0x06000100,
1111 	0x3c204, 0xffffffff, 0x00000100,
1112 	0x3c2e0, 0xffffffff, 0x00000100,
1113 	0x3c224, 0xffffffff, 0x00000100,
1114 	0x3c200, 0xffffffff, 0x00000100,
1115 	0x3c230, 0xffffffff, 0x00000100,
1116 	0x3c234, 0xffffffff, 0x00000100,
1117 	0x3c250, 0xffffffff, 0x00000100,
1118 	0x3c254, 0xffffffff, 0x00000100,
1119 	0x3c258, 0xffffffff, 0x00000100,
1120 	0x3c25c, 0xffffffff, 0x00000100,
1121 	0x3c260, 0xffffffff, 0x00000100,
1122 	0x3c27c, 0xffffffff, 0x00000100,
1123 	0x3c278, 0xffffffff, 0x00000100,
1124 	0x3c210, 0xffffffff, 0x06000100,
1125 	0x3c290, 0xffffffff, 0x00000100,
1126 	0x3c274, 0xffffffff, 0x00000100,
1127 	0x3c2b4, 0xffffffff, 0x00000100,
1128 	0x3c2b0, 0xffffffff, 0x00000100,
1129 	0x3c270, 0xffffffff, 0x00000100,
1130 	0x30800, 0xffffffff, 0xe0000000,
1131 	0x3c020, 0xffffffff, 0x00010000,
1132 	0x3c024, 0xffffffff, 0x00030002,
1133 	0x3c028, 0xffffffff, 0x00040007,
1134 	0x3c02c, 0xffffffff, 0x00060005,
1135 	0x3c030, 0xffffffff, 0x00090008,
1136 	0x3c034, 0xffffffff, 0x00010000,
1137 	0x3c038, 0xffffffff, 0x00030002,
1138 	0x3c03c, 0xffffffff, 0x00040007,
1139 	0x3c040, 0xffffffff, 0x00060005,
1140 	0x3c044, 0xffffffff, 0x00090008,
1141 	0x3c048, 0xffffffff, 0x00010000,
1142 	0x3c04c, 0xffffffff, 0x00030002,
1143 	0x3c050, 0xffffffff, 0x00040007,
1144 	0x3c054, 0xffffffff, 0x00060005,
1145 	0x3c058, 0xffffffff, 0x00090008,
1146 	0x3c05c, 0xffffffff, 0x00010000,
1147 	0x3c060, 0xffffffff, 0x00030002,
1148 	0x3c064, 0xffffffff, 0x00040007,
1149 	0x3c068, 0xffffffff, 0x00060005,
1150 	0x3c06c, 0xffffffff, 0x00090008,
1151 	0x3c070, 0xffffffff, 0x00010000,
1152 	0x3c074, 0xffffffff, 0x00030002,
1153 	0x3c078, 0xffffffff, 0x00040007,
1154 	0x3c07c, 0xffffffff, 0x00060005,
1155 	0x3c080, 0xffffffff, 0x00090008,
1156 	0x3c084, 0xffffffff, 0x00010000,
1157 	0x3c088, 0xffffffff, 0x00030002,
1158 	0x3c08c, 0xffffffff, 0x00040007,
1159 	0x3c090, 0xffffffff, 0x00060005,
1160 	0x3c094, 0xffffffff, 0x00090008,
1161 	0x3c098, 0xffffffff, 0x00010000,
1162 	0x3c09c, 0xffffffff, 0x00030002,
1163 	0x3c0a0, 0xffffffff, 0x00040007,
1164 	0x3c0a4, 0xffffffff, 0x00060005,
1165 	0x3c0a8, 0xffffffff, 0x00090008,
1166 	0x3c000, 0xffffffff, 0x96e00200,
1167 	0x8708, 0xffffffff, 0x00900100,
1168 	0xc424, 0xffffffff, 0x0020003f,
1169 	0x38, 0xffffffff, 0x0140001c,
1170 	0x3c, 0x000f0000, 0x000f0000,
1171 	0x220, 0xffffffff, 0xC060000C,
1172 	0x224, 0xc0000fff, 0x00000100,
1173 	0xf90, 0xffffffff, 0x00000100,
1174 	0xf98, 0x00000101, 0x00000000,
1175 	0x20a8, 0xffffffff, 0x00000104,
1176 	0x55e4, 0xff000fff, 0x00000100,
1177 	0x30cc, 0xc0000fff, 0x00000104,
1178 	0xc1e4, 0x00000001, 0x00000001,
1179 	0xd00c, 0xff000ff0, 0x00000100,
1180 	0xd80c, 0xff000ff0, 0x00000100
1181 };
1182 
1183 static const u32 spectre_golden_spm_registers[] =
1184 {
1185 	0x30800, 0xe0ffffff, 0xe0000000
1186 };
1187 
1188 static const u32 spectre_golden_common_registers[] =
1189 {
1190 	0xc770, 0xffffffff, 0x00000800,
1191 	0xc774, 0xffffffff, 0x00000800,
1192 	0xc798, 0xffffffff, 0x00007fbf,
1193 	0xc79c, 0xffffffff, 0x00007faf
1194 };
1195 
1196 static const u32 spectre_golden_registers[] =
1197 {
1198 	0x3c000, 0xffff1fff, 0x96940200,
1199 	0x3c00c, 0xffff0001, 0xff000000,
1200 	0x3c200, 0xfffc0fff, 0x00000100,
1201 	0x6ed8, 0x00010101, 0x00010000,
1202 	0x9834, 0xf00fffff, 0x00000400,
1203 	0x9838, 0xfffffffc, 0x00020200,
1204 	0x5bb0, 0x000000f0, 0x00000070,
1205 	0x5bc0, 0xf0311fff, 0x80300000,
1206 	0x98f8, 0x73773777, 0x12010001,
1207 	0x9b7c, 0x00ff0000, 0x00fc0000,
1208 	0x2f48, 0x73773777, 0x12010001,
1209 	0x8a14, 0xf000003f, 0x00000007,
1210 	0x8b24, 0xffffffff, 0x00ffffff,
1211 	0x28350, 0x3f3f3fff, 0x00000082,
1212 	0x28354, 0x0000003f, 0x00000000,
1213 	0x3e78, 0x00000001, 0x00000002,
1214 	0x913c, 0xffff03df, 0x00000004,
1215 	0xc768, 0x00000008, 0x00000008,
1216 	0x8c00, 0x000008ff, 0x00000800,
1217 	0x9508, 0x00010000, 0x00010000,
1218 	0xac0c, 0xffffffff, 0x54763210,
1219 	0x214f8, 0x01ff01ff, 0x00000002,
1220 	0x21498, 0x007ff800, 0x00200000,
1221 	0x2015c, 0xffffffff, 0x00000f40,
1222 	0x30934, 0xffffffff, 0x00000001
1223 };
1224 
1225 static const u32 spectre_mgcg_cgcg_init[] =
1226 {
1227 	0xc420, 0xffffffff, 0xfffffffc,
1228 	0x30800, 0xffffffff, 0xe0000000,
1229 	0x3c2a0, 0xffffffff, 0x00000100,
1230 	0x3c208, 0xffffffff, 0x00000100,
1231 	0x3c2c0, 0xffffffff, 0x00000100,
1232 	0x3c2c8, 0xffffffff, 0x00000100,
1233 	0x3c2c4, 0xffffffff, 0x00000100,
1234 	0x55e4, 0xffffffff, 0x00600100,
1235 	0x3c280, 0xffffffff, 0x00000100,
1236 	0x3c214, 0xffffffff, 0x06000100,
1237 	0x3c220, 0xffffffff, 0x00000100,
1238 	0x3c218, 0xffffffff, 0x06000100,
1239 	0x3c204, 0xffffffff, 0x00000100,
1240 	0x3c2e0, 0xffffffff, 0x00000100,
1241 	0x3c224, 0xffffffff, 0x00000100,
1242 	0x3c200, 0xffffffff, 0x00000100,
1243 	0x3c230, 0xffffffff, 0x00000100,
1244 	0x3c234, 0xffffffff, 0x00000100,
1245 	0x3c250, 0xffffffff, 0x00000100,
1246 	0x3c254, 0xffffffff, 0x00000100,
1247 	0x3c258, 0xffffffff, 0x00000100,
1248 	0x3c25c, 0xffffffff, 0x00000100,
1249 	0x3c260, 0xffffffff, 0x00000100,
1250 	0x3c27c, 0xffffffff, 0x00000100,
1251 	0x3c278, 0xffffffff, 0x00000100,
1252 	0x3c210, 0xffffffff, 0x06000100,
1253 	0x3c290, 0xffffffff, 0x00000100,
1254 	0x3c274, 0xffffffff, 0x00000100,
1255 	0x3c2b4, 0xffffffff, 0x00000100,
1256 	0x3c2b0, 0xffffffff, 0x00000100,
1257 	0x3c270, 0xffffffff, 0x00000100,
1258 	0x30800, 0xffffffff, 0xe0000000,
1259 	0x3c020, 0xffffffff, 0x00010000,
1260 	0x3c024, 0xffffffff, 0x00030002,
1261 	0x3c028, 0xffffffff, 0x00040007,
1262 	0x3c02c, 0xffffffff, 0x00060005,
1263 	0x3c030, 0xffffffff, 0x00090008,
1264 	0x3c034, 0xffffffff, 0x00010000,
1265 	0x3c038, 0xffffffff, 0x00030002,
1266 	0x3c03c, 0xffffffff, 0x00040007,
1267 	0x3c040, 0xffffffff, 0x00060005,
1268 	0x3c044, 0xffffffff, 0x00090008,
1269 	0x3c048, 0xffffffff, 0x00010000,
1270 	0x3c04c, 0xffffffff, 0x00030002,
1271 	0x3c050, 0xffffffff, 0x00040007,
1272 	0x3c054, 0xffffffff, 0x00060005,
1273 	0x3c058, 0xffffffff, 0x00090008,
1274 	0x3c05c, 0xffffffff, 0x00010000,
1275 	0x3c060, 0xffffffff, 0x00030002,
1276 	0x3c064, 0xffffffff, 0x00040007,
1277 	0x3c068, 0xffffffff, 0x00060005,
1278 	0x3c06c, 0xffffffff, 0x00090008,
1279 	0x3c070, 0xffffffff, 0x00010000,
1280 	0x3c074, 0xffffffff, 0x00030002,
1281 	0x3c078, 0xffffffff, 0x00040007,
1282 	0x3c07c, 0xffffffff, 0x00060005,
1283 	0x3c080, 0xffffffff, 0x00090008,
1284 	0x3c084, 0xffffffff, 0x00010000,
1285 	0x3c088, 0xffffffff, 0x00030002,
1286 	0x3c08c, 0xffffffff, 0x00040007,
1287 	0x3c090, 0xffffffff, 0x00060005,
1288 	0x3c094, 0xffffffff, 0x00090008,
1289 	0x3c098, 0xffffffff, 0x00010000,
1290 	0x3c09c, 0xffffffff, 0x00030002,
1291 	0x3c0a0, 0xffffffff, 0x00040007,
1292 	0x3c0a4, 0xffffffff, 0x00060005,
1293 	0x3c0a8, 0xffffffff, 0x00090008,
1294 	0x3c0ac, 0xffffffff, 0x00010000,
1295 	0x3c0b0, 0xffffffff, 0x00030002,
1296 	0x3c0b4, 0xffffffff, 0x00040007,
1297 	0x3c0b8, 0xffffffff, 0x00060005,
1298 	0x3c0bc, 0xffffffff, 0x00090008,
1299 	0x3c000, 0xffffffff, 0x96e00200,
1300 	0x8708, 0xffffffff, 0x00900100,
1301 	0xc424, 0xffffffff, 0x0020003f,
1302 	0x38, 0xffffffff, 0x0140001c,
1303 	0x3c, 0x000f0000, 0x000f0000,
1304 	0x220, 0xffffffff, 0xC060000C,
1305 	0x224, 0xc0000fff, 0x00000100,
1306 	0xf90, 0xffffffff, 0x00000100,
1307 	0xf98, 0x00000101, 0x00000000,
1308 	0x20a8, 0xffffffff, 0x00000104,
1309 	0x55e4, 0xff000fff, 0x00000100,
1310 	0x30cc, 0xc0000fff, 0x00000104,
1311 	0xc1e4, 0x00000001, 0x00000001,
1312 	0xd00c, 0xff000ff0, 0x00000100,
1313 	0xd80c, 0xff000ff0, 0x00000100
1314 };
1315 
1316 static const u32 kalindi_golden_spm_registers[] =
1317 {
1318 	0x30800, 0xe0ffffff, 0xe0000000
1319 };
1320 
1321 static const u32 kalindi_golden_common_registers[] =
1322 {
1323 	0xc770, 0xffffffff, 0x00000800,
1324 	0xc774, 0xffffffff, 0x00000800,
1325 	0xc798, 0xffffffff, 0x00007fbf,
1326 	0xc79c, 0xffffffff, 0x00007faf
1327 };
1328 
1329 static const u32 kalindi_golden_registers[] =
1330 {
1331 	0x3c000, 0xffffdfff, 0x6e944040,
1332 	0x55e4, 0xff607fff, 0xfc000100,
1333 	0x3c220, 0xff000fff, 0x00000100,
1334 	0x3c224, 0xff000fff, 0x00000100,
1335 	0x3c200, 0xfffc0fff, 0x00000100,
1336 	0x6ed8, 0x00010101, 0x00010000,
1337 	0x9830, 0xffffffff, 0x00000000,
1338 	0x9834, 0xf00fffff, 0x00000400,
1339 	0x5bb0, 0x000000f0, 0x00000070,
1340 	0x5bc0, 0xf0311fff, 0x80300000,
1341 	0x98f8, 0x73773777, 0x12010001,
1342 	0x98fc, 0xffffffff, 0x00000010,
1343 	0x9b7c, 0x00ff0000, 0x00fc0000,
1344 	0x8030, 0x00001f0f, 0x0000100a,
1345 	0x2f48, 0x73773777, 0x12010001,
1346 	0x2408, 0x000fffff, 0x000c007f,
1347 	0x8a14, 0xf000003f, 0x00000007,
1348 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1349 	0x30a04, 0x0000ff0f, 0x00000000,
1350 	0x28a4c, 0x07ffffff, 0x06000000,
1351 	0x4d8, 0x00000fff, 0x00000100,
1352 	0x3e78, 0x00000001, 0x00000002,
1353 	0xc768, 0x00000008, 0x00000008,
1354 	0x8c00, 0x000000ff, 0x00000003,
1355 	0x214f8, 0x01ff01ff, 0x00000002,
1356 	0x21498, 0x007ff800, 0x00200000,
1357 	0x2015c, 0xffffffff, 0x00000f40,
1358 	0x88c4, 0x001f3ae3, 0x00000082,
1359 	0x88d4, 0x0000001f, 0x00000010,
1360 	0x30934, 0xffffffff, 0x00000000
1361 };
1362 
1363 static const u32 kalindi_mgcg_cgcg_init[] =
1364 {
1365 	0xc420, 0xffffffff, 0xfffffffc,
1366 	0x30800, 0xffffffff, 0xe0000000,
1367 	0x3c2a0, 0xffffffff, 0x00000100,
1368 	0x3c208, 0xffffffff, 0x00000100,
1369 	0x3c2c0, 0xffffffff, 0x00000100,
1370 	0x3c2c8, 0xffffffff, 0x00000100,
1371 	0x3c2c4, 0xffffffff, 0x00000100,
1372 	0x55e4, 0xffffffff, 0x00600100,
1373 	0x3c280, 0xffffffff, 0x00000100,
1374 	0x3c214, 0xffffffff, 0x06000100,
1375 	0x3c220, 0xffffffff, 0x00000100,
1376 	0x3c218, 0xffffffff, 0x06000100,
1377 	0x3c204, 0xffffffff, 0x00000100,
1378 	0x3c2e0, 0xffffffff, 0x00000100,
1379 	0x3c224, 0xffffffff, 0x00000100,
1380 	0x3c200, 0xffffffff, 0x00000100,
1381 	0x3c230, 0xffffffff, 0x00000100,
1382 	0x3c234, 0xffffffff, 0x00000100,
1383 	0x3c250, 0xffffffff, 0x00000100,
1384 	0x3c254, 0xffffffff, 0x00000100,
1385 	0x3c258, 0xffffffff, 0x00000100,
1386 	0x3c25c, 0xffffffff, 0x00000100,
1387 	0x3c260, 0xffffffff, 0x00000100,
1388 	0x3c27c, 0xffffffff, 0x00000100,
1389 	0x3c278, 0xffffffff, 0x00000100,
1390 	0x3c210, 0xffffffff, 0x06000100,
1391 	0x3c290, 0xffffffff, 0x00000100,
1392 	0x3c274, 0xffffffff, 0x00000100,
1393 	0x3c2b4, 0xffffffff, 0x00000100,
1394 	0x3c2b0, 0xffffffff, 0x00000100,
1395 	0x3c270, 0xffffffff, 0x00000100,
1396 	0x30800, 0xffffffff, 0xe0000000,
1397 	0x3c020, 0xffffffff, 0x00010000,
1398 	0x3c024, 0xffffffff, 0x00030002,
1399 	0x3c028, 0xffffffff, 0x00040007,
1400 	0x3c02c, 0xffffffff, 0x00060005,
1401 	0x3c030, 0xffffffff, 0x00090008,
1402 	0x3c034, 0xffffffff, 0x00010000,
1403 	0x3c038, 0xffffffff, 0x00030002,
1404 	0x3c03c, 0xffffffff, 0x00040007,
1405 	0x3c040, 0xffffffff, 0x00060005,
1406 	0x3c044, 0xffffffff, 0x00090008,
1407 	0x3c000, 0xffffffff, 0x96e00200,
1408 	0x8708, 0xffffffff, 0x00900100,
1409 	0xc424, 0xffffffff, 0x0020003f,
1410 	0x38, 0xffffffff, 0x0140001c,
1411 	0x3c, 0x000f0000, 0x000f0000,
1412 	0x220, 0xffffffff, 0xC060000C,
1413 	0x224, 0xc0000fff, 0x00000100,
1414 	0x20a8, 0xffffffff, 0x00000104,
1415 	0x55e4, 0xff000fff, 0x00000100,
1416 	0x30cc, 0xc0000fff, 0x00000104,
1417 	0xc1e4, 0x00000001, 0x00000001,
1418 	0xd00c, 0xff000ff0, 0x00000100,
1419 	0xd80c, 0xff000ff0, 0x00000100
1420 };
1421 
1422 static const u32 hawaii_golden_spm_registers[] =
1423 {
1424 	0x30800, 0xe0ffffff, 0xe0000000
1425 };
1426 
1427 static const u32 hawaii_golden_common_registers[] =
1428 {
1429 	0x30800, 0xffffffff, 0xe0000000,
1430 	0x28350, 0xffffffff, 0x3a00161a,
1431 	0x28354, 0xffffffff, 0x0000002e,
1432 	0x9a10, 0xffffffff, 0x00018208,
1433 	0x98f8, 0xffffffff, 0x12011003
1434 };
1435 
1436 static const u32 hawaii_golden_registers[] =
1437 {
1438 	0x3354, 0x00000333, 0x00000333,
1439 	0x9a10, 0x00010000, 0x00058208,
1440 	0x9830, 0xffffffff, 0x00000000,
1441 	0x9834, 0xf00fffff, 0x00000400,
1442 	0x9838, 0x0002021c, 0x00020200,
1443 	0xc78, 0x00000080, 0x00000000,
1444 	0x5bb0, 0x000000f0, 0x00000070,
1445 	0x5bc0, 0xf0311fff, 0x80300000,
1446 	0x350c, 0x00810000, 0x408af000,
1447 	0x7030, 0x31000111, 0x00000011,
1448 	0x2f48, 0x73773777, 0x12010001,
1449 	0x2120, 0x0000007f, 0x0000001b,
1450 	0x21dc, 0x00007fb6, 0x00002191,
1451 	0x3628, 0x0000003f, 0x0000000a,
1452 	0x362c, 0x0000003f, 0x0000000a,
1453 	0x2ae4, 0x00073ffe, 0x000022a2,
1454 	0x240c, 0x000007ff, 0x00000000,
1455 	0x8bf0, 0x00002001, 0x00000001,
1456 	0x8b24, 0xffffffff, 0x00ffffff,
1457 	0x30a04, 0x0000ff0f, 0x00000000,
1458 	0x28a4c, 0x07ffffff, 0x06000000,
1459 	0x3e78, 0x00000001, 0x00000002,
1460 	0xc768, 0x00000008, 0x00000008,
1461 	0xc770, 0x00000f00, 0x00000800,
1462 	0xc774, 0x00000f00, 0x00000800,
1463 	0xc798, 0x00ffffff, 0x00ff7fbf,
1464 	0xc79c, 0x00ffffff, 0x00ff7faf,
1465 	0x8c00, 0x000000ff, 0x00000800,
1466 	0xe40, 0x00001fff, 0x00001fff,
1467 	0x9060, 0x0000007f, 0x00000020,
1468 	0x9508, 0x00010000, 0x00010000,
1469 	0xae00, 0x00100000, 0x000ff07c,
1470 	0xac14, 0x000003ff, 0x0000000f,
1471 	0xac10, 0xffffffff, 0x7564fdec,
1472 	0xac0c, 0xffffffff, 0x3120b9a8,
1473 	0xac08, 0x20000000, 0x0f9c0000
1474 };
1475 
1476 static const u32 hawaii_mgcg_cgcg_init[] =
1477 {
1478 	0xc420, 0xffffffff, 0xfffffffd,
1479 	0x30800, 0xffffffff, 0xe0000000,
1480 	0x3c2a0, 0xffffffff, 0x00000100,
1481 	0x3c208, 0xffffffff, 0x00000100,
1482 	0x3c2c0, 0xffffffff, 0x00000100,
1483 	0x3c2c8, 0xffffffff, 0x00000100,
1484 	0x3c2c4, 0xffffffff, 0x00000100,
1485 	0x55e4, 0xffffffff, 0x00200100,
1486 	0x3c280, 0xffffffff, 0x00000100,
1487 	0x3c214, 0xffffffff, 0x06000100,
1488 	0x3c220, 0xffffffff, 0x00000100,
1489 	0x3c218, 0xffffffff, 0x06000100,
1490 	0x3c204, 0xffffffff, 0x00000100,
1491 	0x3c2e0, 0xffffffff, 0x00000100,
1492 	0x3c224, 0xffffffff, 0x00000100,
1493 	0x3c200, 0xffffffff, 0x00000100,
1494 	0x3c230, 0xffffffff, 0x00000100,
1495 	0x3c234, 0xffffffff, 0x00000100,
1496 	0x3c250, 0xffffffff, 0x00000100,
1497 	0x3c254, 0xffffffff, 0x00000100,
1498 	0x3c258, 0xffffffff, 0x00000100,
1499 	0x3c25c, 0xffffffff, 0x00000100,
1500 	0x3c260, 0xffffffff, 0x00000100,
1501 	0x3c27c, 0xffffffff, 0x00000100,
1502 	0x3c278, 0xffffffff, 0x00000100,
1503 	0x3c210, 0xffffffff, 0x06000100,
1504 	0x3c290, 0xffffffff, 0x00000100,
1505 	0x3c274, 0xffffffff, 0x00000100,
1506 	0x3c2b4, 0xffffffff, 0x00000100,
1507 	0x3c2b0, 0xffffffff, 0x00000100,
1508 	0x3c270, 0xffffffff, 0x00000100,
1509 	0x30800, 0xffffffff, 0xe0000000,
1510 	0x3c020, 0xffffffff, 0x00010000,
1511 	0x3c024, 0xffffffff, 0x00030002,
1512 	0x3c028, 0xffffffff, 0x00040007,
1513 	0x3c02c, 0xffffffff, 0x00060005,
1514 	0x3c030, 0xffffffff, 0x00090008,
1515 	0x3c034, 0xffffffff, 0x00010000,
1516 	0x3c038, 0xffffffff, 0x00030002,
1517 	0x3c03c, 0xffffffff, 0x00040007,
1518 	0x3c040, 0xffffffff, 0x00060005,
1519 	0x3c044, 0xffffffff, 0x00090008,
1520 	0x3c048, 0xffffffff, 0x00010000,
1521 	0x3c04c, 0xffffffff, 0x00030002,
1522 	0x3c050, 0xffffffff, 0x00040007,
1523 	0x3c054, 0xffffffff, 0x00060005,
1524 	0x3c058, 0xffffffff, 0x00090008,
1525 	0x3c05c, 0xffffffff, 0x00010000,
1526 	0x3c060, 0xffffffff, 0x00030002,
1527 	0x3c064, 0xffffffff, 0x00040007,
1528 	0x3c068, 0xffffffff, 0x00060005,
1529 	0x3c06c, 0xffffffff, 0x00090008,
1530 	0x3c070, 0xffffffff, 0x00010000,
1531 	0x3c074, 0xffffffff, 0x00030002,
1532 	0x3c078, 0xffffffff, 0x00040007,
1533 	0x3c07c, 0xffffffff, 0x00060005,
1534 	0x3c080, 0xffffffff, 0x00090008,
1535 	0x3c084, 0xffffffff, 0x00010000,
1536 	0x3c088, 0xffffffff, 0x00030002,
1537 	0x3c08c, 0xffffffff, 0x00040007,
1538 	0x3c090, 0xffffffff, 0x00060005,
1539 	0x3c094, 0xffffffff, 0x00090008,
1540 	0x3c098, 0xffffffff, 0x00010000,
1541 	0x3c09c, 0xffffffff, 0x00030002,
1542 	0x3c0a0, 0xffffffff, 0x00040007,
1543 	0x3c0a4, 0xffffffff, 0x00060005,
1544 	0x3c0a8, 0xffffffff, 0x00090008,
1545 	0x3c0ac, 0xffffffff, 0x00010000,
1546 	0x3c0b0, 0xffffffff, 0x00030002,
1547 	0x3c0b4, 0xffffffff, 0x00040007,
1548 	0x3c0b8, 0xffffffff, 0x00060005,
1549 	0x3c0bc, 0xffffffff, 0x00090008,
1550 	0x3c0c0, 0xffffffff, 0x00010000,
1551 	0x3c0c4, 0xffffffff, 0x00030002,
1552 	0x3c0c8, 0xffffffff, 0x00040007,
1553 	0x3c0cc, 0xffffffff, 0x00060005,
1554 	0x3c0d0, 0xffffffff, 0x00090008,
1555 	0x3c0d4, 0xffffffff, 0x00010000,
1556 	0x3c0d8, 0xffffffff, 0x00030002,
1557 	0x3c0dc, 0xffffffff, 0x00040007,
1558 	0x3c0e0, 0xffffffff, 0x00060005,
1559 	0x3c0e4, 0xffffffff, 0x00090008,
1560 	0x3c0e8, 0xffffffff, 0x00010000,
1561 	0x3c0ec, 0xffffffff, 0x00030002,
1562 	0x3c0f0, 0xffffffff, 0x00040007,
1563 	0x3c0f4, 0xffffffff, 0x00060005,
1564 	0x3c0f8, 0xffffffff, 0x00090008,
1565 	0xc318, 0xffffffff, 0x00020200,
1566 	0x3350, 0xffffffff, 0x00000200,
1567 	0x15c0, 0xffffffff, 0x00000400,
1568 	0x55e8, 0xffffffff, 0x00000000,
1569 	0x2f50, 0xffffffff, 0x00000902,
1570 	0x3c000, 0xffffffff, 0x96940200,
1571 	0x8708, 0xffffffff, 0x00900100,
1572 	0xc424, 0xffffffff, 0x0020003f,
1573 	0x38, 0xffffffff, 0x0140001c,
1574 	0x3c, 0x000f0000, 0x000f0000,
1575 	0x220, 0xffffffff, 0xc060000c,
1576 	0x224, 0xc0000fff, 0x00000100,
1577 	0xf90, 0xffffffff, 0x00000100,
1578 	0xf98, 0x00000101, 0x00000000,
1579 	0x20a8, 0xffffffff, 0x00000104,
1580 	0x55e4, 0xff000fff, 0x00000100,
1581 	0x30cc, 0xc0000fff, 0x00000104,
1582 	0xc1e4, 0x00000001, 0x00000001,
1583 	0xd00c, 0xff000ff0, 0x00000100,
1584 	0xd80c, 0xff000ff0, 0x00000100
1585 };
1586 
1587 static const u32 godavari_golden_registers[] =
1588 {
1589 	0x55e4, 0xff607fff, 0xfc000100,
1590 	0x6ed8, 0x00010101, 0x00010000,
1591 	0x9830, 0xffffffff, 0x00000000,
1592 	0x98302, 0xf00fffff, 0x00000400,
1593 	0x6130, 0xffffffff, 0x00010000,
1594 	0x5bb0, 0x000000f0, 0x00000070,
1595 	0x5bc0, 0xf0311fff, 0x80300000,
1596 	0x98f8, 0x73773777, 0x12010001,
1597 	0x98fc, 0xffffffff, 0x00000010,
1598 	0x8030, 0x00001f0f, 0x0000100a,
1599 	0x2f48, 0x73773777, 0x12010001,
1600 	0x2408, 0x000fffff, 0x000c007f,
1601 	0x8a14, 0xf000003f, 0x00000007,
1602 	0x8b24, 0xffffffff, 0x00ff0fff,
1603 	0x30a04, 0x0000ff0f, 0x00000000,
1604 	0x28a4c, 0x07ffffff, 0x06000000,
1605 	0x4d8, 0x00000fff, 0x00000100,
1606 	0xd014, 0x00010000, 0x00810001,
1607 	0xd814, 0x00010000, 0x00810001,
1608 	0x3e78, 0x00000001, 0x00000002,
1609 	0xc768, 0x00000008, 0x00000008,
1610 	0xc770, 0x00000f00, 0x00000800,
1611 	0xc774, 0x00000f00, 0x00000800,
1612 	0xc798, 0x00ffffff, 0x00ff7fbf,
1613 	0xc79c, 0x00ffffff, 0x00ff7faf,
1614 	0x8c00, 0x000000ff, 0x00000001,
1615 	0x214f8, 0x01ff01ff, 0x00000002,
1616 	0x21498, 0x007ff800, 0x00200000,
1617 	0x2015c, 0xffffffff, 0x00000f40,
1618 	0x88c4, 0x001f3ae3, 0x00000082,
1619 	0x88d4, 0x0000001f, 0x00000010,
1620 	0x30934, 0xffffffff, 0x00000000
1621 };
1622 
1623 
1624 static void cik_init_golden_registers(struct radeon_device *rdev)
1625 {
1626 	/* Some of the registers might be dependent on GRBM_GFX_INDEX */
1627 	mutex_lock(&rdev->grbm_idx_mutex);
1628 	switch (rdev->family) {
1629 	case CHIP_BONAIRE:
1630 		radeon_program_register_sequence(rdev,
1631 						 bonaire_mgcg_cgcg_init,
1632 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1633 		radeon_program_register_sequence(rdev,
1634 						 bonaire_golden_registers,
1635 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1636 		radeon_program_register_sequence(rdev,
1637 						 bonaire_golden_common_registers,
1638 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1639 		radeon_program_register_sequence(rdev,
1640 						 bonaire_golden_spm_registers,
1641 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1642 		break;
1643 	case CHIP_KABINI:
1644 		radeon_program_register_sequence(rdev,
1645 						 kalindi_mgcg_cgcg_init,
1646 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1647 		radeon_program_register_sequence(rdev,
1648 						 kalindi_golden_registers,
1649 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1650 		radeon_program_register_sequence(rdev,
1651 						 kalindi_golden_common_registers,
1652 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1653 		radeon_program_register_sequence(rdev,
1654 						 kalindi_golden_spm_registers,
1655 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1656 		break;
1657 	case CHIP_MULLINS:
1658 		radeon_program_register_sequence(rdev,
1659 						 kalindi_mgcg_cgcg_init,
1660 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1661 		radeon_program_register_sequence(rdev,
1662 						 godavari_golden_registers,
1663 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1664 		radeon_program_register_sequence(rdev,
1665 						 kalindi_golden_common_registers,
1666 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1667 		radeon_program_register_sequence(rdev,
1668 						 kalindi_golden_spm_registers,
1669 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1670 		break;
1671 	case CHIP_KAVERI:
1672 		radeon_program_register_sequence(rdev,
1673 						 spectre_mgcg_cgcg_init,
1674 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1675 		radeon_program_register_sequence(rdev,
1676 						 spectre_golden_registers,
1677 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1678 		radeon_program_register_sequence(rdev,
1679 						 spectre_golden_common_registers,
1680 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1681 		radeon_program_register_sequence(rdev,
1682 						 spectre_golden_spm_registers,
1683 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1684 		break;
1685 	case CHIP_HAWAII:
1686 		radeon_program_register_sequence(rdev,
1687 						 hawaii_mgcg_cgcg_init,
1688 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1689 		radeon_program_register_sequence(rdev,
1690 						 hawaii_golden_registers,
1691 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1692 		radeon_program_register_sequence(rdev,
1693 						 hawaii_golden_common_registers,
1694 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1695 		radeon_program_register_sequence(rdev,
1696 						 hawaii_golden_spm_registers,
1697 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1698 		break;
1699 	default:
1700 		break;
1701 	}
1702 	mutex_unlock(&rdev->grbm_idx_mutex);
1703 }
1704 
1705 /**
1706  * cik_get_xclk - get the xclk
1707  *
1708  * @rdev: radeon_device pointer
1709  *
1710  * Returns the reference clock used by the gfx engine
1711  * (CIK).
1712  */
1713 u32 cik_get_xclk(struct radeon_device *rdev)
1714 {
1715         u32 reference_clock = rdev->clock.spll.reference_freq;
1716 
1717 	if (rdev->flags & RADEON_IS_IGP) {
1718 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1719 			return reference_clock / 2;
1720 	} else {
1721 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1722 			return reference_clock / 4;
1723 	}
1724 	return reference_clock;
1725 }
1726 
1727 /**
1728  * cik_mm_rdoorbell - read a doorbell dword
1729  *
1730  * @rdev: radeon_device pointer
1731  * @index: doorbell index
1732  *
1733  * Returns the value in the doorbell aperture at the
1734  * requested doorbell index (CIK).
1735  */
1736 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1737 {
1738 	if (index < rdev->doorbell.num_doorbells) {
1739 		return readl(rdev->doorbell.ptr + index);
1740 	} else {
1741 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1742 		return 0;
1743 	}
1744 }
1745 
1746 /**
1747  * cik_mm_wdoorbell - write a doorbell dword
1748  *
1749  * @rdev: radeon_device pointer
1750  * @index: doorbell index
1751  * @v: value to write
1752  *
1753  * Writes @v to the doorbell aperture at the
1754  * requested doorbell index (CIK).
1755  */
1756 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1757 {
1758 	if (index < rdev->doorbell.num_doorbells) {
1759 		writel(v, rdev->doorbell.ptr + index);
1760 	} else {
1761 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1762 	}
1763 }
1764 
1765 #define BONAIRE_IO_MC_REGS_SIZE 36
1766 
1767 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1768 {
1769 	{0x00000070, 0x04400000},
1770 	{0x00000071, 0x80c01803},
1771 	{0x00000072, 0x00004004},
1772 	{0x00000073, 0x00000100},
1773 	{0x00000074, 0x00ff0000},
1774 	{0x00000075, 0x34000000},
1775 	{0x00000076, 0x08000014},
1776 	{0x00000077, 0x00cc08ec},
1777 	{0x00000078, 0x00000400},
1778 	{0x00000079, 0x00000000},
1779 	{0x0000007a, 0x04090000},
1780 	{0x0000007c, 0x00000000},
1781 	{0x0000007e, 0x4408a8e8},
1782 	{0x0000007f, 0x00000304},
1783 	{0x00000080, 0x00000000},
1784 	{0x00000082, 0x00000001},
1785 	{0x00000083, 0x00000002},
1786 	{0x00000084, 0xf3e4f400},
1787 	{0x00000085, 0x052024e3},
1788 	{0x00000087, 0x00000000},
1789 	{0x00000088, 0x01000000},
1790 	{0x0000008a, 0x1c0a0000},
1791 	{0x0000008b, 0xff010000},
1792 	{0x0000008d, 0xffffefff},
1793 	{0x0000008e, 0xfff3efff},
1794 	{0x0000008f, 0xfff3efbf},
1795 	{0x00000092, 0xf7ffffff},
1796 	{0x00000093, 0xffffff7f},
1797 	{0x00000095, 0x00101101},
1798 	{0x00000096, 0x00000fff},
1799 	{0x00000097, 0x00116fff},
1800 	{0x00000098, 0x60010000},
1801 	{0x00000099, 0x10010000},
1802 	{0x0000009a, 0x00006000},
1803 	{0x0000009b, 0x00001000},
1804 	{0x0000009f, 0x00b48000}
1805 };
1806 
1807 #define HAWAII_IO_MC_REGS_SIZE 22
1808 
1809 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1810 {
1811 	{0x0000007d, 0x40000000},
1812 	{0x0000007e, 0x40180304},
1813 	{0x0000007f, 0x0000ff00},
1814 	{0x00000081, 0x00000000},
1815 	{0x00000083, 0x00000800},
1816 	{0x00000086, 0x00000000},
1817 	{0x00000087, 0x00000100},
1818 	{0x00000088, 0x00020100},
1819 	{0x00000089, 0x00000000},
1820 	{0x0000008b, 0x00040000},
1821 	{0x0000008c, 0x00000100},
1822 	{0x0000008e, 0xff010000},
1823 	{0x00000090, 0xffffefff},
1824 	{0x00000091, 0xfff3efff},
1825 	{0x00000092, 0xfff3efbf},
1826 	{0x00000093, 0xf7ffffff},
1827 	{0x00000094, 0xffffff7f},
1828 	{0x00000095, 0x00000fff},
1829 	{0x00000096, 0x00116fff},
1830 	{0x00000097, 0x60010000},
1831 	{0x00000098, 0x10010000},
1832 	{0x0000009f, 0x00c79000}
1833 };
1834 
1835 
1836 /**
1837  * cik_srbm_select - select specific register instances
1838  *
1839  * @rdev: radeon_device pointer
1840  * @me: selected ME (micro engine)
1841  * @pipe: pipe
1842  * @queue: queue
1843  * @vmid: VMID
1844  *
1845  * Switches the currently active registers instances.  Some
1846  * registers are instanced per VMID, others are instanced per
1847  * me/pipe/queue combination.
1848  */
1849 static void cik_srbm_select(struct radeon_device *rdev,
1850 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1851 {
1852 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1853 			     MEID(me & 0x3) |
1854 			     VMID(vmid & 0xf) |
1855 			     QUEUEID(queue & 0x7));
1856 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1857 }
1858 
1859 /* ucode loading */
1860 /**
1861  * ci_mc_load_microcode - load MC ucode into the hw
1862  *
1863  * @rdev: radeon_device pointer
1864  *
1865  * Load the GDDR MC ucode into the hw (CIK).
1866  * Returns 0 on success, error on failure.
1867  */
1868 int ci_mc_load_microcode(struct radeon_device *rdev)
1869 {
1870 	const __be32 *fw_data = NULL;
1871 	const __le32 *new_fw_data = NULL;
1872 	u32 running, blackout = 0, tmp;
1873 	u32 *io_mc_regs = NULL;
1874 	const __le32 *new_io_mc_regs = NULL;
1875 	int i, regs_size, ucode_size;
1876 
1877 	if (!rdev->mc_fw)
1878 		return -EINVAL;
1879 
1880 	if (rdev->new_fw) {
1881 		const struct mc_firmware_header_v1_0 *hdr =
1882 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1883 
1884 		radeon_ucode_print_mc_hdr(&hdr->header);
1885 
1886 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1887 		new_io_mc_regs = (const __le32 *)
1888 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1889 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1890 		new_fw_data = (const __le32 *)
1891 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1892 	} else {
1893 		ucode_size = rdev->mc_fw->size / 4;
1894 
1895 		switch (rdev->family) {
1896 		case CHIP_BONAIRE:
1897 			io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1898 			regs_size = BONAIRE_IO_MC_REGS_SIZE;
1899 			break;
1900 		case CHIP_HAWAII:
1901 			io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1902 			regs_size = HAWAII_IO_MC_REGS_SIZE;
1903 			break;
1904 		default:
1905 			return -EINVAL;
1906 		}
1907 		fw_data = (const __be32 *)rdev->mc_fw->data;
1908 	}
1909 
1910 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1911 
1912 	if (running == 0) {
1913 		if (running) {
1914 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1915 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1916 		}
1917 
1918 		/* reset the engine and set to writable */
1919 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1920 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1921 
1922 		/* load mc io regs */
1923 		for (i = 0; i < regs_size; i++) {
1924 			if (rdev->new_fw) {
1925 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1926 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1927 			} else {
1928 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1929 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1930 			}
1931 		}
1932 
1933 		tmp = RREG32(MC_SEQ_MISC0);
1934 		if ((rdev->pdev->device == 0x6649) && ((tmp & 0xff00) == 0x5600)) {
1935 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 5);
1936 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x00000023);
1937 			WREG32(MC_SEQ_IO_DEBUG_INDEX, 9);
1938 			WREG32(MC_SEQ_IO_DEBUG_DATA, 0x000001f0);
1939 		}
1940 
1941 		/* load the MC ucode */
1942 		for (i = 0; i < ucode_size; i++) {
1943 			if (rdev->new_fw)
1944 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1945 			else
1946 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1947 		}
1948 
1949 		/* put the engine back into the active state */
1950 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1951 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1952 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1953 
1954 		/* wait for training to complete */
1955 		for (i = 0; i < rdev->usec_timeout; i++) {
1956 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1957 				break;
1958 			udelay(1);
1959 		}
1960 		for (i = 0; i < rdev->usec_timeout; i++) {
1961 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1962 				break;
1963 			udelay(1);
1964 		}
1965 
1966 		if (running)
1967 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1968 	}
1969 
1970 	return 0;
1971 }
1972 
1973 /**
1974  * cik_init_microcode - load ucode images from disk
1975  *
1976  * @rdev: radeon_device pointer
1977  *
1978  * Use the firmware interface to load the ucode images into
1979  * the driver (not loaded into hw).
1980  * Returns 0 on success, error on failure.
1981  */
1982 static int cik_init_microcode(struct radeon_device *rdev)
1983 {
1984 	const char *chip_name;
1985 	const char *new_chip_name;
1986 	size_t pfp_req_size, me_req_size, ce_req_size,
1987 		mec_req_size, rlc_req_size, mc_req_size = 0,
1988 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1989 	char fw_name[30];
1990 	int new_fw = 0;
1991 	int err;
1992 	int num_fw;
1993 
1994 	DRM_DEBUG("\n");
1995 
1996 	switch (rdev->family) {
1997 	case CHIP_BONAIRE:
1998 		chip_name = "BONAIRE";
1999 		new_chip_name = "bonaire";
2000 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2001 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2002 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2003 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2004 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2005 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
2006 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
2007 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2008 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
2009 		num_fw = 8;
2010 		break;
2011 	case CHIP_HAWAII:
2012 		chip_name = "HAWAII";
2013 		new_chip_name = "hawaii";
2014 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2015 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2016 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2017 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2018 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
2019 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
2020 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
2021 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2022 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
2023 		num_fw = 8;
2024 		break;
2025 	case CHIP_KAVERI:
2026 		chip_name = "KAVERI";
2027 		new_chip_name = "kaveri";
2028 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2029 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2030 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2031 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2032 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
2033 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2034 		num_fw = 7;
2035 		break;
2036 	case CHIP_KABINI:
2037 		chip_name = "KABINI";
2038 		new_chip_name = "kabini";
2039 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2040 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2041 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2042 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2043 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
2044 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2045 		num_fw = 6;
2046 		break;
2047 	case CHIP_MULLINS:
2048 		chip_name = "MULLINS";
2049 		new_chip_name = "mullins";
2050 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
2051 		me_req_size = CIK_ME_UCODE_SIZE * 4;
2052 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
2053 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
2054 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
2055 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
2056 		num_fw = 6;
2057 		break;
2058 	default: BUG();
2059 	}
2060 
2061 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
2062 
2063 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
2064 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2065 	if (err) {
2066 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
2067 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
2068 		if (err)
2069 			goto out;
2070 		if (rdev->pfp_fw->size != pfp_req_size) {
2071 			printk(KERN_ERR
2072 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2073 			       rdev->pfp_fw->size, fw_name);
2074 			err = -EINVAL;
2075 			goto out;
2076 		}
2077 	} else {
2078 		err = radeon_ucode_validate(rdev->pfp_fw);
2079 		if (err) {
2080 			printk(KERN_ERR
2081 			       "cik_fw: validation failed for firmware \"%s\"\n",
2082 			       fw_name);
2083 			goto out;
2084 		} else {
2085 			new_fw++;
2086 		}
2087 	}
2088 
2089 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
2090 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2091 	if (err) {
2092 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
2093 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
2094 		if (err)
2095 			goto out;
2096 		if (rdev->me_fw->size != me_req_size) {
2097 			printk(KERN_ERR
2098 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2099 			       rdev->me_fw->size, fw_name);
2100 			err = -EINVAL;
2101 		}
2102 	} else {
2103 		err = radeon_ucode_validate(rdev->me_fw);
2104 		if (err) {
2105 			printk(KERN_ERR
2106 			       "cik_fw: validation failed for firmware \"%s\"\n",
2107 			       fw_name);
2108 			goto out;
2109 		} else {
2110 			new_fw++;
2111 		}
2112 	}
2113 
2114 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
2115 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2116 	if (err) {
2117 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
2118 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
2119 		if (err)
2120 			goto out;
2121 		if (rdev->ce_fw->size != ce_req_size) {
2122 			printk(KERN_ERR
2123 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2124 			       rdev->ce_fw->size, fw_name);
2125 			err = -EINVAL;
2126 		}
2127 	} else {
2128 		err = radeon_ucode_validate(rdev->ce_fw);
2129 		if (err) {
2130 			printk(KERN_ERR
2131 			       "cik_fw: validation failed for firmware \"%s\"\n",
2132 			       fw_name);
2133 			goto out;
2134 		} else {
2135 			new_fw++;
2136 		}
2137 	}
2138 
2139 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", new_chip_name);
2140 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2141 	if (err) {
2142 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
2143 		err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
2144 		if (err)
2145 			goto out;
2146 		if (rdev->mec_fw->size != mec_req_size) {
2147 			printk(KERN_ERR
2148 			       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
2149 			       rdev->mec_fw->size, fw_name);
2150 			err = -EINVAL;
2151 		}
2152 	} else {
2153 		err = radeon_ucode_validate(rdev->mec_fw);
2154 		if (err) {
2155 			printk(KERN_ERR
2156 			       "cik_fw: validation failed for firmware \"%s\"\n",
2157 			       fw_name);
2158 			goto out;
2159 		} else {
2160 			new_fw++;
2161 		}
2162 	}
2163 
2164 	if (rdev->family == CHIP_KAVERI) {
2165 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec2.bin", new_chip_name);
2166 		err = request_firmware(&rdev->mec2_fw, fw_name, rdev->dev);
2167 		if (err) {
2168 			goto out;
2169 		} else {
2170 			err = radeon_ucode_validate(rdev->mec2_fw);
2171 			if (err) {
2172 				goto out;
2173 			} else {
2174 				new_fw++;
2175 			}
2176 		}
2177 	}
2178 
2179 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
2180 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2181 	if (err) {
2182 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
2183 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
2184 		if (err)
2185 			goto out;
2186 		if (rdev->rlc_fw->size != rlc_req_size) {
2187 			printk(KERN_ERR
2188 			       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
2189 			       rdev->rlc_fw->size, fw_name);
2190 			err = -EINVAL;
2191 		}
2192 	} else {
2193 		err = radeon_ucode_validate(rdev->rlc_fw);
2194 		if (err) {
2195 			printk(KERN_ERR
2196 			       "cik_fw: validation failed for firmware \"%s\"\n",
2197 			       fw_name);
2198 			goto out;
2199 		} else {
2200 			new_fw++;
2201 		}
2202 	}
2203 
2204 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", new_chip_name);
2205 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2206 	if (err) {
2207 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
2208 		err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
2209 		if (err)
2210 			goto out;
2211 		if (rdev->sdma_fw->size != sdma_req_size) {
2212 			printk(KERN_ERR
2213 			       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
2214 			       rdev->sdma_fw->size, fw_name);
2215 			err = -EINVAL;
2216 		}
2217 	} else {
2218 		err = radeon_ucode_validate(rdev->sdma_fw);
2219 		if (err) {
2220 			printk(KERN_ERR
2221 			       "cik_fw: validation failed for firmware \"%s\"\n",
2222 			       fw_name);
2223 			goto out;
2224 		} else {
2225 			new_fw++;
2226 		}
2227 	}
2228 
2229 	/* No SMC, MC ucode on APUs */
2230 	if (!(rdev->flags & RADEON_IS_IGP)) {
2231 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
2232 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2233 		if (err) {
2234 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
2235 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2236 			if (err) {
2237 				snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
2238 				err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
2239 				if (err)
2240 					goto out;
2241 			}
2242 			if ((rdev->mc_fw->size != mc_req_size) &&
2243 			    (rdev->mc_fw->size != mc2_req_size)){
2244 				printk(KERN_ERR
2245 				       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
2246 				       rdev->mc_fw->size, fw_name);
2247 				err = -EINVAL;
2248 			}
2249 			DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
2250 		} else {
2251 			err = radeon_ucode_validate(rdev->mc_fw);
2252 			if (err) {
2253 				printk(KERN_ERR
2254 				       "cik_fw: validation failed for firmware \"%s\"\n",
2255 				       fw_name);
2256 				goto out;
2257 			} else {
2258 				new_fw++;
2259 			}
2260 		}
2261 
2262 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
2263 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2264 		if (err) {
2265 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
2266 			err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
2267 			if (err) {
2268 				printk(KERN_ERR
2269 				       "smc: error loading firmware \"%s\"\n",
2270 				       fw_name);
2271 				release_firmware(rdev->smc_fw);
2272 				rdev->smc_fw = NULL;
2273 				err = 0;
2274 			} else if (rdev->smc_fw->size != smc_req_size) {
2275 				printk(KERN_ERR
2276 				       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2277 				       rdev->smc_fw->size, fw_name);
2278 				err = -EINVAL;
2279 			}
2280 		} else {
2281 			err = radeon_ucode_validate(rdev->smc_fw);
2282 			if (err) {
2283 				printk(KERN_ERR
2284 				       "cik_fw: validation failed for firmware \"%s\"\n",
2285 				       fw_name);
2286 				goto out;
2287 			} else {
2288 				new_fw++;
2289 			}
2290 		}
2291 	}
2292 
2293 	if (new_fw == 0) {
2294 		rdev->new_fw = false;
2295 	} else if (new_fw < num_fw) {
2296 		printk(KERN_ERR "ci_fw: mixing new and old firmware!\n");
2297 		err = -EINVAL;
2298 	} else {
2299 		rdev->new_fw = true;
2300 	}
2301 
2302 out:
2303 	if (err) {
2304 		if (err != -EINVAL)
2305 			printk(KERN_ERR
2306 			       "cik_cp: Failed to load firmware \"%s\"\n",
2307 			       fw_name);
2308 		release_firmware(rdev->pfp_fw);
2309 		rdev->pfp_fw = NULL;
2310 		release_firmware(rdev->me_fw);
2311 		rdev->me_fw = NULL;
2312 		release_firmware(rdev->ce_fw);
2313 		rdev->ce_fw = NULL;
2314 		release_firmware(rdev->mec_fw);
2315 		rdev->mec_fw = NULL;
2316 		release_firmware(rdev->mec2_fw);
2317 		rdev->mec2_fw = NULL;
2318 		release_firmware(rdev->rlc_fw);
2319 		rdev->rlc_fw = NULL;
2320 		release_firmware(rdev->sdma_fw);
2321 		rdev->sdma_fw = NULL;
2322 		release_firmware(rdev->mc_fw);
2323 		rdev->mc_fw = NULL;
2324 		release_firmware(rdev->smc_fw);
2325 		rdev->smc_fw = NULL;
2326 	}
2327 	return err;
2328 }
2329 
2330 /*
2331  * Core functions
2332  */
2333 /**
2334  * cik_tiling_mode_table_init - init the hw tiling table
2335  *
2336  * @rdev: radeon_device pointer
2337  *
2338  * Starting with SI, the tiling setup is done globally in a
2339  * set of 32 tiling modes.  Rather than selecting each set of
2340  * parameters per surface as on older asics, we just select
2341  * which index in the tiling table we want to use, and the
2342  * surface uses those parameters (CIK).
2343  */
2344 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2345 {
2346 	const u32 num_tile_mode_states = 32;
2347 	const u32 num_secondary_tile_mode_states = 16;
2348 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2349 	u32 num_pipe_configs;
2350 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2351 		rdev->config.cik.max_shader_engines;
2352 
2353 	switch (rdev->config.cik.mem_row_size_in_kb) {
2354 	case 1:
2355 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2356 		break;
2357 	case 2:
2358 	default:
2359 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2360 		break;
2361 	case 4:
2362 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2363 		break;
2364 	}
2365 
2366 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2367 	if (num_pipe_configs > 8)
2368 		num_pipe_configs = 16;
2369 
2370 	if (num_pipe_configs == 16) {
2371 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2372 			switch (reg_offset) {
2373 			case 0:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2375 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2376 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2377 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2378 				break;
2379 			case 1:
2380 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2381 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2382 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2383 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2384 				break;
2385 			case 2:
2386 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2387 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2388 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2389 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2390 				break;
2391 			case 3:
2392 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2393 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2394 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2395 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2396 				break;
2397 			case 4:
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2401 						 TILE_SPLIT(split_equal_to_row_size));
2402 				break;
2403 			case 5:
2404 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2405 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2406 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2407 				break;
2408 			case 6:
2409 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2410 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2411 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2412 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2413 				break;
2414 			case 7:
2415 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2416 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2417 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2418 						 TILE_SPLIT(split_equal_to_row_size));
2419 				break;
2420 			case 8:
2421 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2422 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2423 				break;
2424 			case 9:
2425 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2426 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2427 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2428 				break;
2429 			case 10:
2430 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2431 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2432 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2433 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2434 				break;
2435 			case 11:
2436 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2437 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2438 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2439 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2440 				break;
2441 			case 12:
2442 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2443 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2444 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2445 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2446 				break;
2447 			case 13:
2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2449 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2450 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2451 				break;
2452 			case 14:
2453 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2454 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2455 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2456 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2457 				break;
2458 			case 16:
2459 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2460 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2461 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2462 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2463 				break;
2464 			case 17:
2465 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2466 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2467 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2468 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2469 				break;
2470 			case 27:
2471 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2472 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2473 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2474 				break;
2475 			case 28:
2476 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2477 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2478 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2479 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2480 				break;
2481 			case 29:
2482 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2483 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2484 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2485 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2486 				break;
2487 			case 30:
2488 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2489 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2490 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2491 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2492 				break;
2493 			default:
2494 				gb_tile_moden = 0;
2495 				break;
2496 			}
2497 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2498 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2499 		}
2500 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2501 			switch (reg_offset) {
2502 			case 0:
2503 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2506 						 NUM_BANKS(ADDR_SURF_16_BANK));
2507 				break;
2508 			case 1:
2509 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2510 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2511 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2512 						 NUM_BANKS(ADDR_SURF_16_BANK));
2513 				break;
2514 			case 2:
2515 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2518 						 NUM_BANKS(ADDR_SURF_16_BANK));
2519 				break;
2520 			case 3:
2521 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2523 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2524 						 NUM_BANKS(ADDR_SURF_16_BANK));
2525 				break;
2526 			case 4:
2527 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2528 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2529 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2530 						 NUM_BANKS(ADDR_SURF_8_BANK));
2531 				break;
2532 			case 5:
2533 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2535 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2536 						 NUM_BANKS(ADDR_SURF_4_BANK));
2537 				break;
2538 			case 6:
2539 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2541 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2542 						 NUM_BANKS(ADDR_SURF_2_BANK));
2543 				break;
2544 			case 8:
2545 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2547 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2548 						 NUM_BANKS(ADDR_SURF_16_BANK));
2549 				break;
2550 			case 9:
2551 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2553 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2554 						 NUM_BANKS(ADDR_SURF_16_BANK));
2555 				break;
2556 			case 10:
2557 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2559 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2560 						 NUM_BANKS(ADDR_SURF_16_BANK));
2561 				break;
2562 			case 11:
2563 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2564 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2565 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2566 						 NUM_BANKS(ADDR_SURF_8_BANK));
2567 				break;
2568 			case 12:
2569 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2571 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2572 						 NUM_BANKS(ADDR_SURF_4_BANK));
2573 				break;
2574 			case 13:
2575 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2577 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2578 						 NUM_BANKS(ADDR_SURF_2_BANK));
2579 				break;
2580 			case 14:
2581 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2582 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2583 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2584 						 NUM_BANKS(ADDR_SURF_2_BANK));
2585 				break;
2586 			default:
2587 				gb_tile_moden = 0;
2588 				break;
2589 			}
2590 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2591 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2592 		}
2593 	} else if (num_pipe_configs == 8) {
2594 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2595 			switch (reg_offset) {
2596 			case 0:
2597 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2599 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2600 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2601 				break;
2602 			case 1:
2603 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2605 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2606 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2607 				break;
2608 			case 2:
2609 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2610 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2611 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2612 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2613 				break;
2614 			case 3:
2615 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2617 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2618 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2619 				break;
2620 			case 4:
2621 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2622 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2623 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2624 						 TILE_SPLIT(split_equal_to_row_size));
2625 				break;
2626 			case 5:
2627 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2628 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2629 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2630 				break;
2631 			case 6:
2632 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2633 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2634 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2635 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2636 				break;
2637 			case 7:
2638 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2640 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2641 						 TILE_SPLIT(split_equal_to_row_size));
2642 				break;
2643 			case 8:
2644 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2645 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2646 				break;
2647 			case 9:
2648 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2649 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2650 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2651 				break;
2652 			case 10:
2653 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2654 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2655 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2656 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2657 				break;
2658 			case 11:
2659 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2660 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2661 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2662 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2663 				break;
2664 			case 12:
2665 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2666 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2667 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2668 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2669 				break;
2670 			case 13:
2671 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2672 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2673 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2674 				break;
2675 			case 14:
2676 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2678 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2679 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2680 				break;
2681 			case 16:
2682 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2683 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2684 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2685 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2686 				break;
2687 			case 17:
2688 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2689 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2690 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2691 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2692 				break;
2693 			case 27:
2694 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2695 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2696 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2697 				break;
2698 			case 28:
2699 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2700 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2701 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2702 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2703 				break;
2704 			case 29:
2705 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2706 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2707 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2708 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2709 				break;
2710 			case 30:
2711 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2712 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2713 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2714 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2715 				break;
2716 			default:
2717 				gb_tile_moden = 0;
2718 				break;
2719 			}
2720 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2721 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2722 		}
2723 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2724 			switch (reg_offset) {
2725 			case 0:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2729 						 NUM_BANKS(ADDR_SURF_16_BANK));
2730 				break;
2731 			case 1:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 2:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 3:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 4:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2753 						 NUM_BANKS(ADDR_SURF_8_BANK));
2754 				break;
2755 			case 5:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2759 						 NUM_BANKS(ADDR_SURF_4_BANK));
2760 				break;
2761 			case 6:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2765 						 NUM_BANKS(ADDR_SURF_2_BANK));
2766 				break;
2767 			case 8:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2771 						 NUM_BANKS(ADDR_SURF_16_BANK));
2772 				break;
2773 			case 9:
2774 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2775 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2776 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2777 						 NUM_BANKS(ADDR_SURF_16_BANK));
2778 				break;
2779 			case 10:
2780 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2781 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2782 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2783 						 NUM_BANKS(ADDR_SURF_16_BANK));
2784 				break;
2785 			case 11:
2786 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2787 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2788 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2789 						 NUM_BANKS(ADDR_SURF_16_BANK));
2790 				break;
2791 			case 12:
2792 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2793 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2794 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2795 						 NUM_BANKS(ADDR_SURF_8_BANK));
2796 				break;
2797 			case 13:
2798 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2799 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2800 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2801 						 NUM_BANKS(ADDR_SURF_4_BANK));
2802 				break;
2803 			case 14:
2804 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2805 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2806 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2807 						 NUM_BANKS(ADDR_SURF_2_BANK));
2808 				break;
2809 			default:
2810 				gb_tile_moden = 0;
2811 				break;
2812 			}
2813 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2814 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2815 		}
2816 	} else if (num_pipe_configs == 4) {
2817 		if (num_rbs == 4) {
2818 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2819 				switch (reg_offset) {
2820 				case 0:
2821 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2822 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2823 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2824 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2825 					break;
2826 				case 1:
2827 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2828 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2829 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2830 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2831 					break;
2832 				case 2:
2833 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2835 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2836 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2837 					break;
2838 				case 3:
2839 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2841 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2842 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2843 					break;
2844 				case 4:
2845 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2846 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2847 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2848 							 TILE_SPLIT(split_equal_to_row_size));
2849 					break;
2850 				case 5:
2851 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2852 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2853 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2854 					break;
2855 				case 6:
2856 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2857 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2858 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2859 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2860 					break;
2861 				case 7:
2862 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2863 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2864 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2865 							 TILE_SPLIT(split_equal_to_row_size));
2866 					break;
2867 				case 8:
2868 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2869 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2870 					break;
2871 				case 9:
2872 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2873 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2874 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2875 					break;
2876 				case 10:
2877 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2878 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2879 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2880 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2881 					break;
2882 				case 11:
2883 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2884 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2885 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2886 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2887 					break;
2888 				case 12:
2889 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2890 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2891 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2892 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2893 					break;
2894 				case 13:
2895 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2896 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2897 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2898 					break;
2899 				case 14:
2900 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2902 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2903 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2904 					break;
2905 				case 16:
2906 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2907 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2908 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2909 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2910 					break;
2911 				case 17:
2912 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2913 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2914 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2915 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2916 					break;
2917 				case 27:
2918 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2919 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2920 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2921 					break;
2922 				case 28:
2923 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2924 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2925 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2926 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2927 					break;
2928 				case 29:
2929 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2930 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2931 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2932 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2933 					break;
2934 				case 30:
2935 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2936 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2937 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2938 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2939 					break;
2940 				default:
2941 					gb_tile_moden = 0;
2942 					break;
2943 				}
2944 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2945 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2946 			}
2947 		} else if (num_rbs < 4) {
2948 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2949 				switch (reg_offset) {
2950 				case 0:
2951 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2952 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2953 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2954 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2955 					break;
2956 				case 1:
2957 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2959 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2960 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2961 					break;
2962 				case 2:
2963 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2964 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2965 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2966 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2967 					break;
2968 				case 3:
2969 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2970 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2971 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2972 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2973 					break;
2974 				case 4:
2975 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2976 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2977 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2978 							 TILE_SPLIT(split_equal_to_row_size));
2979 					break;
2980 				case 5:
2981 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2982 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2983 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2984 					break;
2985 				case 6:
2986 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2987 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2988 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2989 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2990 					break;
2991 				case 7:
2992 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2994 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2995 							 TILE_SPLIT(split_equal_to_row_size));
2996 					break;
2997 				case 8:
2998 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2999 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
3000 					break;
3001 				case 9:
3002 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3003 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3004 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
3005 					break;
3006 				case 10:
3007 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3008 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3009 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3010 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3011 					break;
3012 				case 11:
3013 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3014 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3015 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3016 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3017 					break;
3018 				case 12:
3019 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3020 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3021 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3022 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3023 					break;
3024 				case 13:
3025 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3026 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3027 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3028 					break;
3029 				case 14:
3030 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3031 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3032 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3033 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3034 					break;
3035 				case 16:
3036 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3037 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3038 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3039 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3040 					break;
3041 				case 17:
3042 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3043 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3044 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3045 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3046 					break;
3047 				case 27:
3048 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3049 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3050 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
3051 					break;
3052 				case 28:
3053 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3054 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3055 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3056 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3057 					break;
3058 				case 29:
3059 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3060 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3061 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3062 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3063 					break;
3064 				case 30:
3065 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3066 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3067 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
3068 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3069 					break;
3070 				default:
3071 					gb_tile_moden = 0;
3072 					break;
3073 				}
3074 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3075 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3076 			}
3077 		}
3078 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3079 			switch (reg_offset) {
3080 			case 0:
3081 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3082 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3083 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3084 						 NUM_BANKS(ADDR_SURF_16_BANK));
3085 				break;
3086 			case 1:
3087 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3088 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3089 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3090 						 NUM_BANKS(ADDR_SURF_16_BANK));
3091 				break;
3092 			case 2:
3093 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3094 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3095 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3096 						 NUM_BANKS(ADDR_SURF_16_BANK));
3097 				break;
3098 			case 3:
3099 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3100 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3101 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3102 						 NUM_BANKS(ADDR_SURF_16_BANK));
3103 				break;
3104 			case 4:
3105 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3106 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3107 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3108 						 NUM_BANKS(ADDR_SURF_16_BANK));
3109 				break;
3110 			case 5:
3111 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3112 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3113 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3114 						 NUM_BANKS(ADDR_SURF_8_BANK));
3115 				break;
3116 			case 6:
3117 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3118 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3119 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3120 						 NUM_BANKS(ADDR_SURF_4_BANK));
3121 				break;
3122 			case 8:
3123 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3124 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3125 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3126 						 NUM_BANKS(ADDR_SURF_16_BANK));
3127 				break;
3128 			case 9:
3129 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3130 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3131 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3132 						 NUM_BANKS(ADDR_SURF_16_BANK));
3133 				break;
3134 			case 10:
3135 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3136 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3137 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3138 						 NUM_BANKS(ADDR_SURF_16_BANK));
3139 				break;
3140 			case 11:
3141 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3142 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3143 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3144 						 NUM_BANKS(ADDR_SURF_16_BANK));
3145 				break;
3146 			case 12:
3147 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3148 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3149 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3150 						 NUM_BANKS(ADDR_SURF_16_BANK));
3151 				break;
3152 			case 13:
3153 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3154 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3155 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3156 						 NUM_BANKS(ADDR_SURF_8_BANK));
3157 				break;
3158 			case 14:
3159 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3160 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3161 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
3162 						 NUM_BANKS(ADDR_SURF_4_BANK));
3163 				break;
3164 			default:
3165 				gb_tile_moden = 0;
3166 				break;
3167 			}
3168 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3169 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3170 		}
3171 	} else if (num_pipe_configs == 2) {
3172 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
3173 			switch (reg_offset) {
3174 			case 0:
3175 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3176 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3177 						 PIPE_CONFIG(ADDR_SURF_P2) |
3178 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
3179 				break;
3180 			case 1:
3181 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3182 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3183 						 PIPE_CONFIG(ADDR_SURF_P2) |
3184 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
3185 				break;
3186 			case 2:
3187 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3188 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3189 						 PIPE_CONFIG(ADDR_SURF_P2) |
3190 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3191 				break;
3192 			case 3:
3193 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3194 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3195 						 PIPE_CONFIG(ADDR_SURF_P2) |
3196 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
3197 				break;
3198 			case 4:
3199 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3200 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3201 						 PIPE_CONFIG(ADDR_SURF_P2) |
3202 						 TILE_SPLIT(split_equal_to_row_size));
3203 				break;
3204 			case 5:
3205 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3206 						 PIPE_CONFIG(ADDR_SURF_P2) |
3207 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
3208 				break;
3209 			case 6:
3210 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3211 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3212 						 PIPE_CONFIG(ADDR_SURF_P2) |
3213 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
3214 				break;
3215 			case 7:
3216 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3217 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
3218 						 PIPE_CONFIG(ADDR_SURF_P2) |
3219 						 TILE_SPLIT(split_equal_to_row_size));
3220 				break;
3221 			case 8:
3222 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
3223 						PIPE_CONFIG(ADDR_SURF_P2);
3224 				break;
3225 			case 9:
3226 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3227 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3228 						 PIPE_CONFIG(ADDR_SURF_P2));
3229 				break;
3230 			case 10:
3231 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3232 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3233 						 PIPE_CONFIG(ADDR_SURF_P2) |
3234 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3235 				break;
3236 			case 11:
3237 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3238 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3239 						 PIPE_CONFIG(ADDR_SURF_P2) |
3240 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3241 				break;
3242 			case 12:
3243 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3244 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
3245 						 PIPE_CONFIG(ADDR_SURF_P2) |
3246 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3247 				break;
3248 			case 13:
3249 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3250 						 PIPE_CONFIG(ADDR_SURF_P2) |
3251 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
3252 				break;
3253 			case 14:
3254 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
3255 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3256 						 PIPE_CONFIG(ADDR_SURF_P2) |
3257 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3258 				break;
3259 			case 16:
3260 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3261 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3262 						 PIPE_CONFIG(ADDR_SURF_P2) |
3263 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3264 				break;
3265 			case 17:
3266 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3267 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
3268 						 PIPE_CONFIG(ADDR_SURF_P2) |
3269 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3270 				break;
3271 			case 27:
3272 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
3273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3274 						 PIPE_CONFIG(ADDR_SURF_P2));
3275 				break;
3276 			case 28:
3277 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3278 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3279 						 PIPE_CONFIG(ADDR_SURF_P2) |
3280 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3281 				break;
3282 			case 29:
3283 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
3284 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3285 						 PIPE_CONFIG(ADDR_SURF_P2) |
3286 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3287 				break;
3288 			case 30:
3289 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
3290 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
3291 						 PIPE_CONFIG(ADDR_SURF_P2) |
3292 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
3293 				break;
3294 			default:
3295 				gb_tile_moden = 0;
3296 				break;
3297 			}
3298 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3299 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3300 		}
3301 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3302 			switch (reg_offset) {
3303 			case 0:
3304 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3305 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3306 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3307 						 NUM_BANKS(ADDR_SURF_16_BANK));
3308 				break;
3309 			case 1:
3310 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3311 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3312 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3313 						 NUM_BANKS(ADDR_SURF_16_BANK));
3314 				break;
3315 			case 2:
3316 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3317 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3318 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3319 						 NUM_BANKS(ADDR_SURF_16_BANK));
3320 				break;
3321 			case 3:
3322 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3323 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3324 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3325 						 NUM_BANKS(ADDR_SURF_16_BANK));
3326 				break;
3327 			case 4:
3328 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3329 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3330 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3331 						 NUM_BANKS(ADDR_SURF_16_BANK));
3332 				break;
3333 			case 5:
3334 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3335 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3336 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3337 						 NUM_BANKS(ADDR_SURF_16_BANK));
3338 				break;
3339 			case 6:
3340 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3341 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3342 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3343 						 NUM_BANKS(ADDR_SURF_8_BANK));
3344 				break;
3345 			case 8:
3346 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3347 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3348 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3349 						 NUM_BANKS(ADDR_SURF_16_BANK));
3350 				break;
3351 			case 9:
3352 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3353 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3354 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3355 						 NUM_BANKS(ADDR_SURF_16_BANK));
3356 				break;
3357 			case 10:
3358 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3359 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3360 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3361 						 NUM_BANKS(ADDR_SURF_16_BANK));
3362 				break;
3363 			case 11:
3364 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3365 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3366 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3367 						 NUM_BANKS(ADDR_SURF_16_BANK));
3368 				break;
3369 			case 12:
3370 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3371 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3372 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3373 						 NUM_BANKS(ADDR_SURF_16_BANK));
3374 				break;
3375 			case 13:
3376 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3377 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3378 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3379 						 NUM_BANKS(ADDR_SURF_16_BANK));
3380 				break;
3381 			case 14:
3382 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3383 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3384 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3385 						 NUM_BANKS(ADDR_SURF_8_BANK));
3386 				break;
3387 			default:
3388 				gb_tile_moden = 0;
3389 				break;
3390 			}
3391 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3392 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3393 		}
3394 	} else
3395 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3396 }
3397 
3398 /**
3399  * cik_select_se_sh - select which SE, SH to address
3400  *
3401  * @rdev: radeon_device pointer
3402  * @se_num: shader engine to address
3403  * @sh_num: sh block to address
3404  *
3405  * Select which SE, SH combinations to address. Certain
3406  * registers are instanced per SE or SH.  0xffffffff means
3407  * broadcast to all SEs or SHs (CIK).
3408  */
3409 static void cik_select_se_sh(struct radeon_device *rdev,
3410 			     u32 se_num, u32 sh_num)
3411 {
3412 	u32 data = INSTANCE_BROADCAST_WRITES;
3413 
3414 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3415 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3416 	else if (se_num == 0xffffffff)
3417 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3418 	else if (sh_num == 0xffffffff)
3419 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3420 	else
3421 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3422 	WREG32(GRBM_GFX_INDEX, data);
3423 }
3424 
3425 /**
3426  * cik_create_bitmask - create a bitmask
3427  *
3428  * @bit_width: length of the mask
3429  *
3430  * create a variable length bit mask (CIK).
3431  * Returns the bitmask.
3432  */
3433 static u32 cik_create_bitmask(u32 bit_width)
3434 {
3435 	u32 i, mask = 0;
3436 
3437 	for (i = 0; i < bit_width; i++) {
3438 		mask <<= 1;
3439 		mask |= 1;
3440 	}
3441 	return mask;
3442 }
3443 
3444 /**
3445  * cik_get_rb_disabled - computes the mask of disabled RBs
3446  *
3447  * @rdev: radeon_device pointer
3448  * @max_rb_num: max RBs (render backends) for the asic
3449  * @se_num: number of SEs (shader engines) for the asic
3450  * @sh_per_se: number of SH blocks per SE for the asic
3451  *
3452  * Calculates the bitmask of disabled RBs (CIK).
3453  * Returns the disabled RB bitmask.
3454  */
3455 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3456 			      u32 max_rb_num_per_se,
3457 			      u32 sh_per_se)
3458 {
3459 	u32 data, mask;
3460 
3461 	data = RREG32(CC_RB_BACKEND_DISABLE);
3462 	if (data & 1)
3463 		data &= BACKEND_DISABLE_MASK;
3464 	else
3465 		data = 0;
3466 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3467 
3468 	data >>= BACKEND_DISABLE_SHIFT;
3469 
3470 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3471 
3472 	return data & mask;
3473 }
3474 
3475 /**
3476  * cik_setup_rb - setup the RBs on the asic
3477  *
3478  * @rdev: radeon_device pointer
3479  * @se_num: number of SEs (shader engines) for the asic
3480  * @sh_per_se: number of SH blocks per SE for the asic
3481  * @max_rb_num: max RBs (render backends) for the asic
3482  *
3483  * Configures per-SE/SH RB registers (CIK).
3484  */
3485 static void cik_setup_rb(struct radeon_device *rdev,
3486 			 u32 se_num, u32 sh_per_se,
3487 			 u32 max_rb_num_per_se)
3488 {
3489 	int i, j;
3490 	u32 data, mask;
3491 	u32 disabled_rbs = 0;
3492 	u32 enabled_rbs = 0;
3493 
3494 	mutex_lock(&rdev->grbm_idx_mutex);
3495 	for (i = 0; i < se_num; i++) {
3496 		for (j = 0; j < sh_per_se; j++) {
3497 			cik_select_se_sh(rdev, i, j);
3498 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3499 			if (rdev->family == CHIP_HAWAII)
3500 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3501 			else
3502 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3503 		}
3504 	}
3505 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3506 	mutex_unlock(&rdev->grbm_idx_mutex);
3507 
3508 	mask = 1;
3509 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3510 		if (!(disabled_rbs & mask))
3511 			enabled_rbs |= mask;
3512 		mask <<= 1;
3513 	}
3514 
3515 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3516 
3517 	mutex_lock(&rdev->grbm_idx_mutex);
3518 	for (i = 0; i < se_num; i++) {
3519 		cik_select_se_sh(rdev, i, 0xffffffff);
3520 		data = 0;
3521 		for (j = 0; j < sh_per_se; j++) {
3522 			switch (enabled_rbs & 3) {
3523 			case 0:
3524 				if (j == 0)
3525 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3526 				else
3527 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3528 				break;
3529 			case 1:
3530 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3531 				break;
3532 			case 2:
3533 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3534 				break;
3535 			case 3:
3536 			default:
3537 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3538 				break;
3539 			}
3540 			enabled_rbs >>= 2;
3541 		}
3542 		WREG32(PA_SC_RASTER_CONFIG, data);
3543 	}
3544 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3545 	mutex_unlock(&rdev->grbm_idx_mutex);
3546 }
3547 
3548 /**
3549  * cik_gpu_init - setup the 3D engine
3550  *
3551  * @rdev: radeon_device pointer
3552  *
3553  * Configures the 3D engine and tiling configuration
3554  * registers so that the 3D engine is usable.
3555  */
3556 static void cik_gpu_init(struct radeon_device *rdev)
3557 {
3558 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3559 	u32 mc_shared_chmap, mc_arb_ramcfg;
3560 	u32 hdp_host_path_cntl;
3561 	u32 tmp;
3562 	int i, j;
3563 
3564 	switch (rdev->family) {
3565 	case CHIP_BONAIRE:
3566 		rdev->config.cik.max_shader_engines = 2;
3567 		rdev->config.cik.max_tile_pipes = 4;
3568 		rdev->config.cik.max_cu_per_sh = 7;
3569 		rdev->config.cik.max_sh_per_se = 1;
3570 		rdev->config.cik.max_backends_per_se = 2;
3571 		rdev->config.cik.max_texture_channel_caches = 4;
3572 		rdev->config.cik.max_gprs = 256;
3573 		rdev->config.cik.max_gs_threads = 32;
3574 		rdev->config.cik.max_hw_contexts = 8;
3575 
3576 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3577 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3578 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3579 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3580 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3581 		break;
3582 	case CHIP_HAWAII:
3583 		rdev->config.cik.max_shader_engines = 4;
3584 		rdev->config.cik.max_tile_pipes = 16;
3585 		rdev->config.cik.max_cu_per_sh = 11;
3586 		rdev->config.cik.max_sh_per_se = 1;
3587 		rdev->config.cik.max_backends_per_se = 4;
3588 		rdev->config.cik.max_texture_channel_caches = 16;
3589 		rdev->config.cik.max_gprs = 256;
3590 		rdev->config.cik.max_gs_threads = 32;
3591 		rdev->config.cik.max_hw_contexts = 8;
3592 
3593 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3594 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3595 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3596 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3597 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3598 		break;
3599 	case CHIP_KAVERI:
3600 		rdev->config.cik.max_shader_engines = 1;
3601 		rdev->config.cik.max_tile_pipes = 4;
3602 		if ((rdev->pdev->device == 0x1304) ||
3603 		    (rdev->pdev->device == 0x1305) ||
3604 		    (rdev->pdev->device == 0x130C) ||
3605 		    (rdev->pdev->device == 0x130F) ||
3606 		    (rdev->pdev->device == 0x1310) ||
3607 		    (rdev->pdev->device == 0x1311) ||
3608 		    (rdev->pdev->device == 0x131C)) {
3609 			rdev->config.cik.max_cu_per_sh = 8;
3610 			rdev->config.cik.max_backends_per_se = 2;
3611 		} else if ((rdev->pdev->device == 0x1309) ||
3612 			   (rdev->pdev->device == 0x130A) ||
3613 			   (rdev->pdev->device == 0x130D) ||
3614 			   (rdev->pdev->device == 0x1313) ||
3615 			   (rdev->pdev->device == 0x131D)) {
3616 			rdev->config.cik.max_cu_per_sh = 6;
3617 			rdev->config.cik.max_backends_per_se = 2;
3618 		} else if ((rdev->pdev->device == 0x1306) ||
3619 			   (rdev->pdev->device == 0x1307) ||
3620 			   (rdev->pdev->device == 0x130B) ||
3621 			   (rdev->pdev->device == 0x130E) ||
3622 			   (rdev->pdev->device == 0x1315) ||
3623 			   (rdev->pdev->device == 0x1318) ||
3624 			   (rdev->pdev->device == 0x131B)) {
3625 			rdev->config.cik.max_cu_per_sh = 4;
3626 			rdev->config.cik.max_backends_per_se = 1;
3627 		} else {
3628 			rdev->config.cik.max_cu_per_sh = 3;
3629 			rdev->config.cik.max_backends_per_se = 1;
3630 		}
3631 		rdev->config.cik.max_sh_per_se = 1;
3632 		rdev->config.cik.max_texture_channel_caches = 4;
3633 		rdev->config.cik.max_gprs = 256;
3634 		rdev->config.cik.max_gs_threads = 16;
3635 		rdev->config.cik.max_hw_contexts = 8;
3636 
3637 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3638 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3639 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3640 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3641 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3642 		break;
3643 	case CHIP_KABINI:
3644 	case CHIP_MULLINS:
3645 	default:
3646 		rdev->config.cik.max_shader_engines = 1;
3647 		rdev->config.cik.max_tile_pipes = 2;
3648 		rdev->config.cik.max_cu_per_sh = 2;
3649 		rdev->config.cik.max_sh_per_se = 1;
3650 		rdev->config.cik.max_backends_per_se = 1;
3651 		rdev->config.cik.max_texture_channel_caches = 2;
3652 		rdev->config.cik.max_gprs = 256;
3653 		rdev->config.cik.max_gs_threads = 16;
3654 		rdev->config.cik.max_hw_contexts = 8;
3655 
3656 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3657 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3658 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3659 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3660 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3661 		break;
3662 	}
3663 
3664 	/* Initialize HDP */
3665 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3666 		WREG32((0x2c14 + j), 0x00000000);
3667 		WREG32((0x2c18 + j), 0x00000000);
3668 		WREG32((0x2c1c + j), 0x00000000);
3669 		WREG32((0x2c20 + j), 0x00000000);
3670 		WREG32((0x2c24 + j), 0x00000000);
3671 	}
3672 
3673 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3674 	WREG32(SRBM_INT_CNTL, 0x1);
3675 	WREG32(SRBM_INT_ACK, 0x1);
3676 
3677 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3678 
3679 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3680 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3681 
3682 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3683 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3684 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3685 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3686 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3687 		rdev->config.cik.mem_row_size_in_kb = 4;
3688 	/* XXX use MC settings? */
3689 	rdev->config.cik.shader_engine_tile_size = 32;
3690 	rdev->config.cik.num_gpus = 1;
3691 	rdev->config.cik.multi_gpu_tile_size = 64;
3692 
3693 	/* fix up row size */
3694 	gb_addr_config &= ~ROW_SIZE_MASK;
3695 	switch (rdev->config.cik.mem_row_size_in_kb) {
3696 	case 1:
3697 	default:
3698 		gb_addr_config |= ROW_SIZE(0);
3699 		break;
3700 	case 2:
3701 		gb_addr_config |= ROW_SIZE(1);
3702 		break;
3703 	case 4:
3704 		gb_addr_config |= ROW_SIZE(2);
3705 		break;
3706 	}
3707 
3708 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3709 	 * not have bank info, so create a custom tiling dword.
3710 	 * bits 3:0   num_pipes
3711 	 * bits 7:4   num_banks
3712 	 * bits 11:8  group_size
3713 	 * bits 15:12 row_size
3714 	 */
3715 	rdev->config.cik.tile_config = 0;
3716 	switch (rdev->config.cik.num_tile_pipes) {
3717 	case 1:
3718 		rdev->config.cik.tile_config |= (0 << 0);
3719 		break;
3720 	case 2:
3721 		rdev->config.cik.tile_config |= (1 << 0);
3722 		break;
3723 	case 4:
3724 		rdev->config.cik.tile_config |= (2 << 0);
3725 		break;
3726 	case 8:
3727 	default:
3728 		/* XXX what about 12? */
3729 		rdev->config.cik.tile_config |= (3 << 0);
3730 		break;
3731 	}
3732 	rdev->config.cik.tile_config |=
3733 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3734 	rdev->config.cik.tile_config |=
3735 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3736 	rdev->config.cik.tile_config |=
3737 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3738 
3739 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3740 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3741 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3742 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3743 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3744 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3745 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3746 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3747 
3748 	cik_tiling_mode_table_init(rdev);
3749 
3750 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3751 		     rdev->config.cik.max_sh_per_se,
3752 		     rdev->config.cik.max_backends_per_se);
3753 
3754 	rdev->config.cik.active_cus = 0;
3755 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3756 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3757 			rdev->config.cik.active_cus +=
3758 				hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3759 		}
3760 	}
3761 
3762 	/* set HW defaults for 3D engine */
3763 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3764 
3765 	mutex_lock(&rdev->grbm_idx_mutex);
3766 	/*
3767 	 * making sure that the following register writes will be broadcasted
3768 	 * to all the shaders
3769 	 */
3770 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3771 	WREG32(SX_DEBUG_1, 0x20);
3772 
3773 	WREG32(TA_CNTL_AUX, 0x00010000);
3774 
3775 	tmp = RREG32(SPI_CONFIG_CNTL);
3776 	tmp |= 0x03000000;
3777 	WREG32(SPI_CONFIG_CNTL, tmp);
3778 
3779 	WREG32(SQ_CONFIG, 1);
3780 
3781 	WREG32(DB_DEBUG, 0);
3782 
3783 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3784 	tmp |= 0x00000400;
3785 	WREG32(DB_DEBUG2, tmp);
3786 
3787 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3788 	tmp |= 0x00020200;
3789 	WREG32(DB_DEBUG3, tmp);
3790 
3791 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3792 	tmp |= 0x00018208;
3793 	WREG32(CB_HW_CONTROL, tmp);
3794 
3795 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3796 
3797 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3798 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3799 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3800 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3801 
3802 	WREG32(VGT_NUM_INSTANCES, 1);
3803 
3804 	WREG32(CP_PERFMON_CNTL, 0);
3805 
3806 	WREG32(SQ_CONFIG, 0);
3807 
3808 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3809 					  FORCE_EOV_MAX_REZ_CNT(255)));
3810 
3811 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3812 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3813 
3814 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3815 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3816 
3817 	tmp = RREG32(HDP_MISC_CNTL);
3818 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3819 	WREG32(HDP_MISC_CNTL, tmp);
3820 
3821 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3822 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3823 
3824 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3825 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3826 	mutex_unlock(&rdev->grbm_idx_mutex);
3827 
3828 	udelay(50);
3829 }
3830 
3831 /*
3832  * GPU scratch registers helpers function.
3833  */
3834 /**
3835  * cik_scratch_init - setup driver info for CP scratch regs
3836  *
3837  * @rdev: radeon_device pointer
3838  *
3839  * Set up the number and offset of the CP scratch registers.
3840  * NOTE: use of CP scratch registers is a legacy inferface and
3841  * is not used by default on newer asics (r6xx+).  On newer asics,
3842  * memory buffers are used for fences rather than scratch regs.
3843  */
3844 static void cik_scratch_init(struct radeon_device *rdev)
3845 {
3846 	int i;
3847 
3848 	rdev->scratch.num_reg = 7;
3849 	rdev->scratch.reg_base = SCRATCH_REG0;
3850 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3851 		rdev->scratch.free[i] = true;
3852 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3853 	}
3854 }
3855 
3856 /**
3857  * cik_ring_test - basic gfx ring test
3858  *
3859  * @rdev: radeon_device pointer
3860  * @ring: radeon_ring structure holding ring information
3861  *
3862  * Allocate a scratch register and write to it using the gfx ring (CIK).
3863  * Provides a basic gfx ring test to verify that the ring is working.
3864  * Used by cik_cp_gfx_resume();
3865  * Returns 0 on success, error on failure.
3866  */
3867 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3868 {
3869 	uint32_t scratch;
3870 	uint32_t tmp = 0;
3871 	unsigned i;
3872 	int r;
3873 
3874 	r = radeon_scratch_get(rdev, &scratch);
3875 	if (r) {
3876 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3877 		return r;
3878 	}
3879 	WREG32(scratch, 0xCAFEDEAD);
3880 	r = radeon_ring_lock(rdev, ring, 3);
3881 	if (r) {
3882 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3883 		radeon_scratch_free(rdev, scratch);
3884 		return r;
3885 	}
3886 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3887 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3888 	radeon_ring_write(ring, 0xDEADBEEF);
3889 	radeon_ring_unlock_commit(rdev, ring, false);
3890 
3891 	for (i = 0; i < rdev->usec_timeout; i++) {
3892 		tmp = RREG32(scratch);
3893 		if (tmp == 0xDEADBEEF)
3894 			break;
3895 		DRM_UDELAY(1);
3896 	}
3897 	if (i < rdev->usec_timeout) {
3898 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3899 	} else {
3900 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3901 			  ring->idx, scratch, tmp);
3902 		r = -EINVAL;
3903 	}
3904 	radeon_scratch_free(rdev, scratch);
3905 	return r;
3906 }
3907 
3908 /**
3909  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3910  *
3911  * @rdev: radeon_device pointer
3912  * @ridx: radeon ring index
3913  *
3914  * Emits an hdp flush on the cp.
3915  */
3916 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3917 				       int ridx)
3918 {
3919 	struct radeon_ring *ring = &rdev->ring[ridx];
3920 	u32 ref_and_mask;
3921 
3922 	switch (ring->idx) {
3923 	case CAYMAN_RING_TYPE_CP1_INDEX:
3924 	case CAYMAN_RING_TYPE_CP2_INDEX:
3925 	default:
3926 		switch (ring->me) {
3927 		case 0:
3928 			ref_and_mask = CP2 << ring->pipe;
3929 			break;
3930 		case 1:
3931 			ref_and_mask = CP6 << ring->pipe;
3932 			break;
3933 		default:
3934 			return;
3935 		}
3936 		break;
3937 	case RADEON_RING_TYPE_GFX_INDEX:
3938 		ref_and_mask = CP0;
3939 		break;
3940 	}
3941 
3942 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3943 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3944 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3945 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3946 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3947 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3948 	radeon_ring_write(ring, ref_and_mask);
3949 	radeon_ring_write(ring, ref_and_mask);
3950 	radeon_ring_write(ring, 0x20); /* poll interval */
3951 }
3952 
3953 /**
3954  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3955  *
3956  * @rdev: radeon_device pointer
3957  * @fence: radeon fence object
3958  *
3959  * Emits a fence sequnce number on the gfx ring and flushes
3960  * GPU caches.
3961  */
3962 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3963 			     struct radeon_fence *fence)
3964 {
3965 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3966 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3967 
3968 	/* Workaround for cache flush problems. First send a dummy EOP
3969 	 * event down the pipe with seq one below.
3970 	 */
3971 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3972 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3973 				 EOP_TC_ACTION_EN |
3974 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3975 				 EVENT_INDEX(5)));
3976 	radeon_ring_write(ring, addr & 0xfffffffc);
3977 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) |
3978 				DATA_SEL(1) | INT_SEL(0));
3979 	radeon_ring_write(ring, fence->seq - 1);
3980 	radeon_ring_write(ring, 0);
3981 
3982 	/* Then send the real EOP event down the pipe. */
3983 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3984 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3985 				 EOP_TC_ACTION_EN |
3986 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3987 				 EVENT_INDEX(5)));
3988 	radeon_ring_write(ring, addr & 0xfffffffc);
3989 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3990 	radeon_ring_write(ring, fence->seq);
3991 	radeon_ring_write(ring, 0);
3992 }
3993 
3994 /**
3995  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3996  *
3997  * @rdev: radeon_device pointer
3998  * @fence: radeon fence object
3999  *
4000  * Emits a fence sequnce number on the compute ring and flushes
4001  * GPU caches.
4002  */
4003 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
4004 				 struct radeon_fence *fence)
4005 {
4006 	struct radeon_ring *ring = &rdev->ring[fence->ring];
4007 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
4008 
4009 	/* RELEASE_MEM - flush caches, send int */
4010 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
4011 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
4012 				 EOP_TC_ACTION_EN |
4013 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
4014 				 EVENT_INDEX(5)));
4015 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
4016 	radeon_ring_write(ring, addr & 0xfffffffc);
4017 	radeon_ring_write(ring, upper_32_bits(addr));
4018 	radeon_ring_write(ring, fence->seq);
4019 	radeon_ring_write(ring, 0);
4020 }
4021 
4022 /**
4023  * cik_semaphore_ring_emit - emit a semaphore on the CP ring
4024  *
4025  * @rdev: radeon_device pointer
4026  * @ring: radeon ring buffer object
4027  * @semaphore: radeon semaphore object
4028  * @emit_wait: Is this a sempahore wait?
4029  *
4030  * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
4031  * from running ahead of semaphore waits.
4032  */
4033 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
4034 			     struct radeon_ring *ring,
4035 			     struct radeon_semaphore *semaphore,
4036 			     bool emit_wait)
4037 {
4038 	uint64_t addr = semaphore->gpu_addr;
4039 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
4040 
4041 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
4042 	radeon_ring_write(ring, lower_32_bits(addr));
4043 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
4044 
4045 	if (emit_wait && ring->idx == RADEON_RING_TYPE_GFX_INDEX) {
4046 		/* Prevent the PFP from running ahead of the semaphore wait */
4047 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4048 		radeon_ring_write(ring, 0x0);
4049 	}
4050 
4051 	return true;
4052 }
4053 
4054 /**
4055  * cik_copy_cpdma - copy pages using the CP DMA engine
4056  *
4057  * @rdev: radeon_device pointer
4058  * @src_offset: src GPU address
4059  * @dst_offset: dst GPU address
4060  * @num_gpu_pages: number of GPU pages to xfer
4061  * @resv: reservation object to sync to
4062  *
4063  * Copy GPU paging using the CP DMA engine (CIK+).
4064  * Used by the radeon ttm implementation to move pages if
4065  * registered as the asic copy callback.
4066  */
4067 struct radeon_fence *cik_copy_cpdma(struct radeon_device *rdev,
4068 				    uint64_t src_offset, uint64_t dst_offset,
4069 				    unsigned num_gpu_pages,
4070 				    struct reservation_object *resv)
4071 {
4072 	struct radeon_fence *fence;
4073 	struct radeon_sync sync;
4074 	int ring_index = rdev->asic->copy.blit_ring_index;
4075 	struct radeon_ring *ring = &rdev->ring[ring_index];
4076 	u32 size_in_bytes, cur_size_in_bytes, control;
4077 	int i, num_loops;
4078 	int r = 0;
4079 
4080 	radeon_sync_create(&sync);
4081 
4082 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
4083 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
4084 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
4085 	if (r) {
4086 		DRM_ERROR("radeon: moving bo (%d).\n", r);
4087 		radeon_sync_free(rdev, &sync, NULL);
4088 		return ERR_PTR(r);
4089 	}
4090 
4091 	radeon_sync_resv(rdev, &sync, resv, false);
4092 	radeon_sync_rings(rdev, &sync, ring->idx);
4093 
4094 	for (i = 0; i < num_loops; i++) {
4095 		cur_size_in_bytes = size_in_bytes;
4096 		if (cur_size_in_bytes > 0x1fffff)
4097 			cur_size_in_bytes = 0x1fffff;
4098 		size_in_bytes -= cur_size_in_bytes;
4099 		control = 0;
4100 		if (size_in_bytes == 0)
4101 			control |= PACKET3_DMA_DATA_CP_SYNC;
4102 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
4103 		radeon_ring_write(ring, control);
4104 		radeon_ring_write(ring, lower_32_bits(src_offset));
4105 		radeon_ring_write(ring, upper_32_bits(src_offset));
4106 		radeon_ring_write(ring, lower_32_bits(dst_offset));
4107 		radeon_ring_write(ring, upper_32_bits(dst_offset));
4108 		radeon_ring_write(ring, cur_size_in_bytes);
4109 		src_offset += cur_size_in_bytes;
4110 		dst_offset += cur_size_in_bytes;
4111 	}
4112 
4113 	r = radeon_fence_emit(rdev, &fence, ring->idx);
4114 	if (r) {
4115 		radeon_ring_unlock_undo(rdev, ring);
4116 		radeon_sync_free(rdev, &sync, NULL);
4117 		return ERR_PTR(r);
4118 	}
4119 
4120 	radeon_ring_unlock_commit(rdev, ring, false);
4121 	radeon_sync_free(rdev, &sync, fence);
4122 
4123 	return fence;
4124 }
4125 
4126 /*
4127  * IB stuff
4128  */
4129 /**
4130  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
4131  *
4132  * @rdev: radeon_device pointer
4133  * @ib: radeon indirect buffer object
4134  *
4135  * Emits an DE (drawing engine) or CE (constant engine) IB
4136  * on the gfx ring.  IBs are usually generated by userspace
4137  * acceleration drivers and submitted to the kernel for
4138  * sheduling on the ring.  This function schedules the IB
4139  * on the gfx ring for execution by the GPU.
4140  */
4141 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
4142 {
4143 	struct radeon_ring *ring = &rdev->ring[ib->ring];
4144 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
4145 	u32 header, control = INDIRECT_BUFFER_VALID;
4146 
4147 	if (ib->is_const_ib) {
4148 		/* set switch buffer packet before const IB */
4149 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
4150 		radeon_ring_write(ring, 0);
4151 
4152 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
4153 	} else {
4154 		u32 next_rptr;
4155 		if (ring->rptr_save_reg) {
4156 			next_rptr = ring->wptr + 3 + 4;
4157 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
4158 			radeon_ring_write(ring, ((ring->rptr_save_reg -
4159 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
4160 			radeon_ring_write(ring, next_rptr);
4161 		} else if (rdev->wb.enabled) {
4162 			next_rptr = ring->wptr + 5 + 4;
4163 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4164 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
4165 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
4166 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
4167 			radeon_ring_write(ring, next_rptr);
4168 		}
4169 
4170 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
4171 	}
4172 
4173 	control |= ib->length_dw | (vm_id << 24);
4174 
4175 	radeon_ring_write(ring, header);
4176 	radeon_ring_write(ring,
4177 #ifdef __BIG_ENDIAN
4178 			  (2 << 0) |
4179 #endif
4180 			  (ib->gpu_addr & 0xFFFFFFFC));
4181 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
4182 	radeon_ring_write(ring, control);
4183 }
4184 
4185 /**
4186  * cik_ib_test - basic gfx ring IB test
4187  *
4188  * @rdev: radeon_device pointer
4189  * @ring: radeon_ring structure holding ring information
4190  *
4191  * Allocate an IB and execute it on the gfx ring (CIK).
4192  * Provides a basic gfx ring test to verify that IBs are working.
4193  * Returns 0 on success, error on failure.
4194  */
4195 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
4196 {
4197 	struct radeon_ib ib;
4198 	uint32_t scratch;
4199 	uint32_t tmp = 0;
4200 	unsigned i;
4201 	int r;
4202 
4203 	r = radeon_scratch_get(rdev, &scratch);
4204 	if (r) {
4205 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
4206 		return r;
4207 	}
4208 	WREG32(scratch, 0xCAFEDEAD);
4209 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
4210 	if (r) {
4211 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
4212 		radeon_scratch_free(rdev, scratch);
4213 		return r;
4214 	}
4215 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
4216 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
4217 	ib.ptr[2] = 0xDEADBEEF;
4218 	ib.length_dw = 3;
4219 	r = radeon_ib_schedule(rdev, &ib, NULL, false);
4220 	if (r) {
4221 		radeon_scratch_free(rdev, scratch);
4222 		radeon_ib_free(rdev, &ib);
4223 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
4224 		return r;
4225 	}
4226 	r = radeon_fence_wait(ib.fence, false);
4227 	if (r) {
4228 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
4229 		radeon_scratch_free(rdev, scratch);
4230 		radeon_ib_free(rdev, &ib);
4231 		return r;
4232 	}
4233 	for (i = 0; i < rdev->usec_timeout; i++) {
4234 		tmp = RREG32(scratch);
4235 		if (tmp == 0xDEADBEEF)
4236 			break;
4237 		DRM_UDELAY(1);
4238 	}
4239 	if (i < rdev->usec_timeout) {
4240 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
4241 	} else {
4242 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
4243 			  scratch, tmp);
4244 		r = -EINVAL;
4245 	}
4246 	radeon_scratch_free(rdev, scratch);
4247 	radeon_ib_free(rdev, &ib);
4248 	return r;
4249 }
4250 
4251 /*
4252  * CP.
4253  * On CIK, gfx and compute now have independant command processors.
4254  *
4255  * GFX
4256  * Gfx consists of a single ring and can process both gfx jobs and
4257  * compute jobs.  The gfx CP consists of three microengines (ME):
4258  * PFP - Pre-Fetch Parser
4259  * ME - Micro Engine
4260  * CE - Constant Engine
4261  * The PFP and ME make up what is considered the Drawing Engine (DE).
4262  * The CE is an asynchronous engine used for updating buffer desciptors
4263  * used by the DE so that they can be loaded into cache in parallel
4264  * while the DE is processing state update packets.
4265  *
4266  * Compute
4267  * The compute CP consists of two microengines (ME):
4268  * MEC1 - Compute MicroEngine 1
4269  * MEC2 - Compute MicroEngine 2
4270  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
4271  * The queues are exposed to userspace and are programmed directly
4272  * by the compute runtime.
4273  */
4274 /**
4275  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
4276  *
4277  * @rdev: radeon_device pointer
4278  * @enable: enable or disable the MEs
4279  *
4280  * Halts or unhalts the gfx MEs.
4281  */
4282 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
4283 {
4284 	if (enable)
4285 		WREG32(CP_ME_CNTL, 0);
4286 	else {
4287 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4288 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
4289 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
4290 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4291 	}
4292 	udelay(50);
4293 }
4294 
4295 /**
4296  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
4297  *
4298  * @rdev: radeon_device pointer
4299  *
4300  * Loads the gfx PFP, ME, and CE ucode.
4301  * Returns 0 for success, -EINVAL if the ucode is not available.
4302  */
4303 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
4304 {
4305 	int i;
4306 
4307 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
4308 		return -EINVAL;
4309 
4310 	cik_cp_gfx_enable(rdev, false);
4311 
4312 	if (rdev->new_fw) {
4313 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
4314 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
4315 		const struct gfx_firmware_header_v1_0 *ce_hdr =
4316 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
4317 		const struct gfx_firmware_header_v1_0 *me_hdr =
4318 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
4319 		const __le32 *fw_data;
4320 		u32 fw_size;
4321 
4322 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
4323 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
4324 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
4325 
4326 		/* PFP */
4327 		fw_data = (const __le32 *)
4328 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
4329 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
4330 		WREG32(CP_PFP_UCODE_ADDR, 0);
4331 		for (i = 0; i < fw_size; i++)
4332 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
4333 		WREG32(CP_PFP_UCODE_ADDR, le32_to_cpu(pfp_hdr->header.ucode_version));
4334 
4335 		/* CE */
4336 		fw_data = (const __le32 *)
4337 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
4338 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
4339 		WREG32(CP_CE_UCODE_ADDR, 0);
4340 		for (i = 0; i < fw_size; i++)
4341 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
4342 		WREG32(CP_CE_UCODE_ADDR, le32_to_cpu(ce_hdr->header.ucode_version));
4343 
4344 		/* ME */
4345 		fw_data = (const __be32 *)
4346 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
4347 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
4348 		WREG32(CP_ME_RAM_WADDR, 0);
4349 		for (i = 0; i < fw_size; i++)
4350 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
4351 		WREG32(CP_ME_RAM_WADDR, le32_to_cpu(me_hdr->header.ucode_version));
4352 		WREG32(CP_ME_RAM_RADDR, le32_to_cpu(me_hdr->header.ucode_version));
4353 	} else {
4354 		const __be32 *fw_data;
4355 
4356 		/* PFP */
4357 		fw_data = (const __be32 *)rdev->pfp_fw->data;
4358 		WREG32(CP_PFP_UCODE_ADDR, 0);
4359 		for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
4360 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
4361 		WREG32(CP_PFP_UCODE_ADDR, 0);
4362 
4363 		/* CE */
4364 		fw_data = (const __be32 *)rdev->ce_fw->data;
4365 		WREG32(CP_CE_UCODE_ADDR, 0);
4366 		for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
4367 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
4368 		WREG32(CP_CE_UCODE_ADDR, 0);
4369 
4370 		/* ME */
4371 		fw_data = (const __be32 *)rdev->me_fw->data;
4372 		WREG32(CP_ME_RAM_WADDR, 0);
4373 		for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
4374 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
4375 		WREG32(CP_ME_RAM_WADDR, 0);
4376 	}
4377 
4378 	return 0;
4379 }
4380 
4381 /**
4382  * cik_cp_gfx_start - start the gfx ring
4383  *
4384  * @rdev: radeon_device pointer
4385  *
4386  * Enables the ring and loads the clear state context and other
4387  * packets required to init the ring.
4388  * Returns 0 for success, error for failure.
4389  */
4390 static int cik_cp_gfx_start(struct radeon_device *rdev)
4391 {
4392 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4393 	int r, i;
4394 
4395 	/* init the CP */
4396 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4397 	WREG32(CP_ENDIAN_SWAP, 0);
4398 	WREG32(CP_DEVICE_ID, 1);
4399 
4400 	cik_cp_gfx_enable(rdev, true);
4401 
4402 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4403 	if (r) {
4404 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4405 		return r;
4406 	}
4407 
4408 	/* init the CE partitions.  CE only used for gfx on CIK */
4409 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4410 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4411 	radeon_ring_write(ring, 0x8000);
4412 	radeon_ring_write(ring, 0x8000);
4413 
4414 	/* setup clear context state */
4415 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4416 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4417 
4418 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4419 	radeon_ring_write(ring, 0x80000000);
4420 	radeon_ring_write(ring, 0x80000000);
4421 
4422 	for (i = 0; i < cik_default_size; i++)
4423 		radeon_ring_write(ring, cik_default_state[i]);
4424 
4425 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4426 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4427 
4428 	/* set clear context state */
4429 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4430 	radeon_ring_write(ring, 0);
4431 
4432 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4433 	radeon_ring_write(ring, 0x00000316);
4434 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4435 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4436 
4437 	radeon_ring_unlock_commit(rdev, ring, false);
4438 
4439 	return 0;
4440 }
4441 
4442 /**
4443  * cik_cp_gfx_fini - stop the gfx ring
4444  *
4445  * @rdev: radeon_device pointer
4446  *
4447  * Stop the gfx ring and tear down the driver ring
4448  * info.
4449  */
4450 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4451 {
4452 	cik_cp_gfx_enable(rdev, false);
4453 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4454 }
4455 
4456 /**
4457  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4458  *
4459  * @rdev: radeon_device pointer
4460  *
4461  * Program the location and size of the gfx ring buffer
4462  * and test it to make sure it's working.
4463  * Returns 0 for success, error for failure.
4464  */
4465 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4466 {
4467 	struct radeon_ring *ring;
4468 	u32 tmp;
4469 	u32 rb_bufsz;
4470 	u64 rb_addr;
4471 	int r;
4472 
4473 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4474 	if (rdev->family != CHIP_HAWAII)
4475 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4476 
4477 	/* Set the write pointer delay */
4478 	WREG32(CP_RB_WPTR_DELAY, 0);
4479 
4480 	/* set the RB to use vmid 0 */
4481 	WREG32(CP_RB_VMID, 0);
4482 
4483 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4484 
4485 	/* ring 0 - compute and gfx */
4486 	/* Set ring buffer size */
4487 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4488 	rb_bufsz = order_base_2(ring->ring_size / 8);
4489 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4490 #ifdef __BIG_ENDIAN
4491 	tmp |= BUF_SWAP_32BIT;
4492 #endif
4493 	WREG32(CP_RB0_CNTL, tmp);
4494 
4495 	/* Initialize the ring buffer's read and write pointers */
4496 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4497 	ring->wptr = 0;
4498 	WREG32(CP_RB0_WPTR, ring->wptr);
4499 
4500 	/* set the wb address wether it's enabled or not */
4501 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4502 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4503 
4504 	/* scratch register shadowing is no longer supported */
4505 	WREG32(SCRATCH_UMSK, 0);
4506 
4507 	if (!rdev->wb.enabled)
4508 		tmp |= RB_NO_UPDATE;
4509 
4510 	mdelay(1);
4511 	WREG32(CP_RB0_CNTL, tmp);
4512 
4513 	rb_addr = ring->gpu_addr >> 8;
4514 	WREG32(CP_RB0_BASE, rb_addr);
4515 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4516 
4517 	/* start the ring */
4518 	cik_cp_gfx_start(rdev);
4519 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4520 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4521 	if (r) {
4522 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4523 		return r;
4524 	}
4525 
4526 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4527 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4528 
4529 	return 0;
4530 }
4531 
4532 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4533 		     struct radeon_ring *ring)
4534 {
4535 	u32 rptr;
4536 
4537 	if (rdev->wb.enabled)
4538 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4539 	else
4540 		rptr = RREG32(CP_RB0_RPTR);
4541 
4542 	return rptr;
4543 }
4544 
4545 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4546 		     struct radeon_ring *ring)
4547 {
4548 	u32 wptr;
4549 
4550 	wptr = RREG32(CP_RB0_WPTR);
4551 
4552 	return wptr;
4553 }
4554 
4555 void cik_gfx_set_wptr(struct radeon_device *rdev,
4556 		      struct radeon_ring *ring)
4557 {
4558 	WREG32(CP_RB0_WPTR, ring->wptr);
4559 	(void)RREG32(CP_RB0_WPTR);
4560 }
4561 
4562 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4563 			 struct radeon_ring *ring)
4564 {
4565 	u32 rptr;
4566 
4567 	if (rdev->wb.enabled) {
4568 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4569 	} else {
4570 		mutex_lock(&rdev->srbm_mutex);
4571 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4572 		rptr = RREG32(CP_HQD_PQ_RPTR);
4573 		cik_srbm_select(rdev, 0, 0, 0, 0);
4574 		mutex_unlock(&rdev->srbm_mutex);
4575 	}
4576 
4577 	return rptr;
4578 }
4579 
4580 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4581 			 struct radeon_ring *ring)
4582 {
4583 	u32 wptr;
4584 
4585 	if (rdev->wb.enabled) {
4586 		/* XXX check if swapping is necessary on BE */
4587 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4588 	} else {
4589 		mutex_lock(&rdev->srbm_mutex);
4590 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4591 		wptr = RREG32(CP_HQD_PQ_WPTR);
4592 		cik_srbm_select(rdev, 0, 0, 0, 0);
4593 		mutex_unlock(&rdev->srbm_mutex);
4594 	}
4595 
4596 	return wptr;
4597 }
4598 
4599 void cik_compute_set_wptr(struct radeon_device *rdev,
4600 			  struct radeon_ring *ring)
4601 {
4602 	/* XXX check if swapping is necessary on BE */
4603 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4604 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4605 }
4606 
4607 /**
4608  * cik_cp_compute_enable - enable/disable the compute CP MEs
4609  *
4610  * @rdev: radeon_device pointer
4611  * @enable: enable or disable the MEs
4612  *
4613  * Halts or unhalts the compute MEs.
4614  */
4615 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4616 {
4617 	if (enable)
4618 		WREG32(CP_MEC_CNTL, 0);
4619 	else {
4620 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4621 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4622 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4623 	}
4624 	udelay(50);
4625 }
4626 
4627 /**
4628  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4629  *
4630  * @rdev: radeon_device pointer
4631  *
4632  * Loads the compute MEC1&2 ucode.
4633  * Returns 0 for success, -EINVAL if the ucode is not available.
4634  */
4635 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4636 {
4637 	int i;
4638 
4639 	if (!rdev->mec_fw)
4640 		return -EINVAL;
4641 
4642 	cik_cp_compute_enable(rdev, false);
4643 
4644 	if (rdev->new_fw) {
4645 		const struct gfx_firmware_header_v1_0 *mec_hdr =
4646 			(const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
4647 		const __le32 *fw_data;
4648 		u32 fw_size;
4649 
4650 		radeon_ucode_print_gfx_hdr(&mec_hdr->header);
4651 
4652 		/* MEC1 */
4653 		fw_data = (const __le32 *)
4654 			(rdev->mec_fw->data + le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes));
4655 		fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4;
4656 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4657 		for (i = 0; i < fw_size; i++)
4658 			WREG32(CP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data++));
4659 		WREG32(CP_MEC_ME1_UCODE_ADDR, le32_to_cpu(mec_hdr->header.ucode_version));
4660 
4661 		/* MEC2 */
4662 		if (rdev->family == CHIP_KAVERI) {
4663 			const struct gfx_firmware_header_v1_0 *mec2_hdr =
4664 				(const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
4665 
4666 			fw_data = (const __le32 *)
4667 				(rdev->mec2_fw->data +
4668 				 le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes));
4669 			fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4;
4670 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4671 			for (i = 0; i < fw_size; i++)
4672 				WREG32(CP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data++));
4673 			WREG32(CP_MEC_ME2_UCODE_ADDR, le32_to_cpu(mec2_hdr->header.ucode_version));
4674 		}
4675 	} else {
4676 		const __be32 *fw_data;
4677 
4678 		/* MEC1 */
4679 		fw_data = (const __be32 *)rdev->mec_fw->data;
4680 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4681 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4682 			WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4683 		WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4684 
4685 		if (rdev->family == CHIP_KAVERI) {
4686 			/* MEC2 */
4687 			fw_data = (const __be32 *)rdev->mec_fw->data;
4688 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4689 			for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4690 				WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4691 			WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4692 		}
4693 	}
4694 
4695 	return 0;
4696 }
4697 
4698 /**
4699  * cik_cp_compute_start - start the compute queues
4700  *
4701  * @rdev: radeon_device pointer
4702  *
4703  * Enable the compute queues.
4704  * Returns 0 for success, error for failure.
4705  */
4706 static int cik_cp_compute_start(struct radeon_device *rdev)
4707 {
4708 	cik_cp_compute_enable(rdev, true);
4709 
4710 	return 0;
4711 }
4712 
4713 /**
4714  * cik_cp_compute_fini - stop the compute queues
4715  *
4716  * @rdev: radeon_device pointer
4717  *
4718  * Stop the compute queues and tear down the driver queue
4719  * info.
4720  */
4721 static void cik_cp_compute_fini(struct radeon_device *rdev)
4722 {
4723 	int i, idx, r;
4724 
4725 	cik_cp_compute_enable(rdev, false);
4726 
4727 	for (i = 0; i < 2; i++) {
4728 		if (i == 0)
4729 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4730 		else
4731 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4732 
4733 		if (rdev->ring[idx].mqd_obj) {
4734 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4735 			if (unlikely(r != 0))
4736 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4737 
4738 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4739 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4740 
4741 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4742 			rdev->ring[idx].mqd_obj = NULL;
4743 		}
4744 	}
4745 }
4746 
4747 static void cik_mec_fini(struct radeon_device *rdev)
4748 {
4749 	int r;
4750 
4751 	if (rdev->mec.hpd_eop_obj) {
4752 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4753 		if (unlikely(r != 0))
4754 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4755 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4756 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4757 
4758 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4759 		rdev->mec.hpd_eop_obj = NULL;
4760 	}
4761 }
4762 
4763 #define MEC_HPD_SIZE 2048
4764 
4765 static int cik_mec_init(struct radeon_device *rdev)
4766 {
4767 	int r;
4768 	u32 *hpd;
4769 
4770 	/*
4771 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4772 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4773 	 * Nonetheless, we assign only 1 pipe because all other pipes will
4774 	 * be handled by KFD
4775 	 */
4776 	rdev->mec.num_mec = 1;
4777 	rdev->mec.num_pipe = 1;
4778 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4779 
4780 	if (rdev->mec.hpd_eop_obj == NULL) {
4781 		r = radeon_bo_create(rdev,
4782 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4783 				     PAGE_SIZE, true,
4784 				     RADEON_GEM_DOMAIN_GTT, 0, NULL, NULL,
4785 				     &rdev->mec.hpd_eop_obj);
4786 		if (r) {
4787 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4788 			return r;
4789 		}
4790 	}
4791 
4792 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4793 	if (unlikely(r != 0)) {
4794 		cik_mec_fini(rdev);
4795 		return r;
4796 	}
4797 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4798 			  &rdev->mec.hpd_eop_gpu_addr);
4799 	if (r) {
4800 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4801 		cik_mec_fini(rdev);
4802 		return r;
4803 	}
4804 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4805 	if (r) {
4806 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4807 		cik_mec_fini(rdev);
4808 		return r;
4809 	}
4810 
4811 	/* clear memory.  Not sure if this is required or not */
4812 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4813 
4814 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4815 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4816 
4817 	return 0;
4818 }
4819 
4820 struct hqd_registers
4821 {
4822 	u32 cp_mqd_base_addr;
4823 	u32 cp_mqd_base_addr_hi;
4824 	u32 cp_hqd_active;
4825 	u32 cp_hqd_vmid;
4826 	u32 cp_hqd_persistent_state;
4827 	u32 cp_hqd_pipe_priority;
4828 	u32 cp_hqd_queue_priority;
4829 	u32 cp_hqd_quantum;
4830 	u32 cp_hqd_pq_base;
4831 	u32 cp_hqd_pq_base_hi;
4832 	u32 cp_hqd_pq_rptr;
4833 	u32 cp_hqd_pq_rptr_report_addr;
4834 	u32 cp_hqd_pq_rptr_report_addr_hi;
4835 	u32 cp_hqd_pq_wptr_poll_addr;
4836 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4837 	u32 cp_hqd_pq_doorbell_control;
4838 	u32 cp_hqd_pq_wptr;
4839 	u32 cp_hqd_pq_control;
4840 	u32 cp_hqd_ib_base_addr;
4841 	u32 cp_hqd_ib_base_addr_hi;
4842 	u32 cp_hqd_ib_rptr;
4843 	u32 cp_hqd_ib_control;
4844 	u32 cp_hqd_iq_timer;
4845 	u32 cp_hqd_iq_rptr;
4846 	u32 cp_hqd_dequeue_request;
4847 	u32 cp_hqd_dma_offload;
4848 	u32 cp_hqd_sema_cmd;
4849 	u32 cp_hqd_msg_type;
4850 	u32 cp_hqd_atomic0_preop_lo;
4851 	u32 cp_hqd_atomic0_preop_hi;
4852 	u32 cp_hqd_atomic1_preop_lo;
4853 	u32 cp_hqd_atomic1_preop_hi;
4854 	u32 cp_hqd_hq_scheduler0;
4855 	u32 cp_hqd_hq_scheduler1;
4856 	u32 cp_mqd_control;
4857 };
4858 
4859 struct bonaire_mqd
4860 {
4861 	u32 header;
4862 	u32 dispatch_initiator;
4863 	u32 dimensions[3];
4864 	u32 start_idx[3];
4865 	u32 num_threads[3];
4866 	u32 pipeline_stat_enable;
4867 	u32 perf_counter_enable;
4868 	u32 pgm[2];
4869 	u32 tba[2];
4870 	u32 tma[2];
4871 	u32 pgm_rsrc[2];
4872 	u32 vmid;
4873 	u32 resource_limits;
4874 	u32 static_thread_mgmt01[2];
4875 	u32 tmp_ring_size;
4876 	u32 static_thread_mgmt23[2];
4877 	u32 restart[3];
4878 	u32 thread_trace_enable;
4879 	u32 reserved1;
4880 	u32 user_data[16];
4881 	u32 vgtcs_invoke_count[2];
4882 	struct hqd_registers queue_state;
4883 	u32 dequeue_cntr;
4884 	u32 interrupt_queue[64];
4885 };
4886 
4887 /**
4888  * cik_cp_compute_resume - setup the compute queue registers
4889  *
4890  * @rdev: radeon_device pointer
4891  *
4892  * Program the compute queues and test them to make sure they
4893  * are working.
4894  * Returns 0 for success, error for failure.
4895  */
4896 static int cik_cp_compute_resume(struct radeon_device *rdev)
4897 {
4898 	int r, i, j, idx;
4899 	u32 tmp;
4900 	bool use_doorbell = true;
4901 	u64 hqd_gpu_addr;
4902 	u64 mqd_gpu_addr;
4903 	u64 eop_gpu_addr;
4904 	u64 wb_gpu_addr;
4905 	u32 *buf;
4906 	struct bonaire_mqd *mqd;
4907 
4908 	r = cik_cp_compute_start(rdev);
4909 	if (r)
4910 		return r;
4911 
4912 	/* fix up chicken bits */
4913 	tmp = RREG32(CP_CPF_DEBUG);
4914 	tmp |= (1 << 23);
4915 	WREG32(CP_CPF_DEBUG, tmp);
4916 
4917 	/* init the pipes */
4918 	mutex_lock(&rdev->srbm_mutex);
4919 
4920 	eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr;
4921 
4922 	cik_srbm_select(rdev, 0, 0, 0, 0);
4923 
4924 	/* write the EOP addr */
4925 	WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4926 	WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4927 
4928 	/* set the VMID assigned */
4929 	WREG32(CP_HPD_EOP_VMID, 0);
4930 
4931 	/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4932 	tmp = RREG32(CP_HPD_EOP_CONTROL);
4933 	tmp &= ~EOP_SIZE_MASK;
4934 	tmp |= order_base_2(MEC_HPD_SIZE / 8);
4935 	WREG32(CP_HPD_EOP_CONTROL, tmp);
4936 
4937 	mutex_unlock(&rdev->srbm_mutex);
4938 
4939 	/* init the queues.  Just two for now. */
4940 	for (i = 0; i < 2; i++) {
4941 		if (i == 0)
4942 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4943 		else
4944 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4945 
4946 		if (rdev->ring[idx].mqd_obj == NULL) {
4947 			r = radeon_bo_create(rdev,
4948 					     sizeof(struct bonaire_mqd),
4949 					     PAGE_SIZE, true,
4950 					     RADEON_GEM_DOMAIN_GTT, 0, NULL,
4951 					     NULL, &rdev->ring[idx].mqd_obj);
4952 			if (r) {
4953 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4954 				return r;
4955 			}
4956 		}
4957 
4958 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4959 		if (unlikely(r != 0)) {
4960 			cik_cp_compute_fini(rdev);
4961 			return r;
4962 		}
4963 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4964 				  &mqd_gpu_addr);
4965 		if (r) {
4966 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4967 			cik_cp_compute_fini(rdev);
4968 			return r;
4969 		}
4970 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4971 		if (r) {
4972 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4973 			cik_cp_compute_fini(rdev);
4974 			return r;
4975 		}
4976 
4977 		/* init the mqd struct */
4978 		memset(buf, 0, sizeof(struct bonaire_mqd));
4979 
4980 		mqd = (struct bonaire_mqd *)buf;
4981 		mqd->header = 0xC0310800;
4982 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4983 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4984 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4985 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4986 
4987 		mutex_lock(&rdev->srbm_mutex);
4988 		cik_srbm_select(rdev, rdev->ring[idx].me,
4989 				rdev->ring[idx].pipe,
4990 				rdev->ring[idx].queue, 0);
4991 
4992 		/* disable wptr polling */
4993 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4994 		tmp &= ~WPTR_POLL_EN;
4995 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4996 
4997 		/* enable doorbell? */
4998 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4999 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5000 		if (use_doorbell)
5001 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5002 		else
5003 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
5004 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5005 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5006 
5007 		/* disable the queue if it's active */
5008 		mqd->queue_state.cp_hqd_dequeue_request = 0;
5009 		mqd->queue_state.cp_hqd_pq_rptr = 0;
5010 		mqd->queue_state.cp_hqd_pq_wptr= 0;
5011 		if (RREG32(CP_HQD_ACTIVE) & 1) {
5012 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
5013 			for (j = 0; j < rdev->usec_timeout; j++) {
5014 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
5015 					break;
5016 				udelay(1);
5017 			}
5018 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
5019 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
5020 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5021 		}
5022 
5023 		/* set the pointer to the MQD */
5024 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
5025 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
5026 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
5027 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
5028 		/* set MQD vmid to 0 */
5029 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
5030 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
5031 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
5032 
5033 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
5034 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
5035 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
5036 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
5037 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
5038 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
5039 
5040 		/* set up the HQD, this is similar to CP_RB0_CNTL */
5041 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
5042 		mqd->queue_state.cp_hqd_pq_control &=
5043 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
5044 
5045 		mqd->queue_state.cp_hqd_pq_control |=
5046 			order_base_2(rdev->ring[idx].ring_size / 8);
5047 		mqd->queue_state.cp_hqd_pq_control |=
5048 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
5049 #ifdef __BIG_ENDIAN
5050 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
5051 #endif
5052 		mqd->queue_state.cp_hqd_pq_control &=
5053 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
5054 		mqd->queue_state.cp_hqd_pq_control |=
5055 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
5056 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
5057 
5058 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
5059 		if (i == 0)
5060 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
5061 		else
5062 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
5063 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
5064 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
5065 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
5066 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
5067 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
5068 
5069 		/* set the wb address wether it's enabled or not */
5070 		if (i == 0)
5071 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
5072 		else
5073 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
5074 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
5075 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
5076 			upper_32_bits(wb_gpu_addr) & 0xffff;
5077 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
5078 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
5079 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
5080 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
5081 
5082 		/* enable the doorbell if requested */
5083 		if (use_doorbell) {
5084 			mqd->queue_state.cp_hqd_pq_doorbell_control =
5085 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
5086 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
5087 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
5088 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
5089 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
5090 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
5091 				~(DOORBELL_SOURCE | DOORBELL_HIT);
5092 
5093 		} else {
5094 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
5095 		}
5096 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
5097 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
5098 
5099 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
5100 		rdev->ring[idx].wptr = 0;
5101 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
5102 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
5103 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
5104 
5105 		/* set the vmid for the queue */
5106 		mqd->queue_state.cp_hqd_vmid = 0;
5107 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
5108 
5109 		/* activate the queue */
5110 		mqd->queue_state.cp_hqd_active = 1;
5111 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
5112 
5113 		cik_srbm_select(rdev, 0, 0, 0, 0);
5114 		mutex_unlock(&rdev->srbm_mutex);
5115 
5116 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
5117 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
5118 
5119 		rdev->ring[idx].ready = true;
5120 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
5121 		if (r)
5122 			rdev->ring[idx].ready = false;
5123 	}
5124 
5125 	return 0;
5126 }
5127 
5128 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
5129 {
5130 	cik_cp_gfx_enable(rdev, enable);
5131 	cik_cp_compute_enable(rdev, enable);
5132 }
5133 
5134 static int cik_cp_load_microcode(struct radeon_device *rdev)
5135 {
5136 	int r;
5137 
5138 	r = cik_cp_gfx_load_microcode(rdev);
5139 	if (r)
5140 		return r;
5141 	r = cik_cp_compute_load_microcode(rdev);
5142 	if (r)
5143 		return r;
5144 
5145 	return 0;
5146 }
5147 
5148 static void cik_cp_fini(struct radeon_device *rdev)
5149 {
5150 	cik_cp_gfx_fini(rdev);
5151 	cik_cp_compute_fini(rdev);
5152 }
5153 
5154 static int cik_cp_resume(struct radeon_device *rdev)
5155 {
5156 	int r;
5157 
5158 	cik_enable_gui_idle_interrupt(rdev, false);
5159 
5160 	r = cik_cp_load_microcode(rdev);
5161 	if (r)
5162 		return r;
5163 
5164 	r = cik_cp_gfx_resume(rdev);
5165 	if (r)
5166 		return r;
5167 	r = cik_cp_compute_resume(rdev);
5168 	if (r)
5169 		return r;
5170 
5171 	cik_enable_gui_idle_interrupt(rdev, true);
5172 
5173 	return 0;
5174 }
5175 
5176 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
5177 {
5178 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
5179 		RREG32(GRBM_STATUS));
5180 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
5181 		RREG32(GRBM_STATUS2));
5182 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
5183 		RREG32(GRBM_STATUS_SE0));
5184 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
5185 		RREG32(GRBM_STATUS_SE1));
5186 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
5187 		RREG32(GRBM_STATUS_SE2));
5188 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
5189 		RREG32(GRBM_STATUS_SE3));
5190 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
5191 		RREG32(SRBM_STATUS));
5192 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
5193 		RREG32(SRBM_STATUS2));
5194 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
5195 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
5196 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
5197 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
5198 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
5199 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
5200 		 RREG32(CP_STALLED_STAT1));
5201 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
5202 		 RREG32(CP_STALLED_STAT2));
5203 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
5204 		 RREG32(CP_STALLED_STAT3));
5205 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
5206 		 RREG32(CP_CPF_BUSY_STAT));
5207 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
5208 		 RREG32(CP_CPF_STALLED_STAT1));
5209 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
5210 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
5211 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
5212 		 RREG32(CP_CPC_STALLED_STAT1));
5213 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
5214 }
5215 
5216 /**
5217  * cik_gpu_check_soft_reset - check which blocks are busy
5218  *
5219  * @rdev: radeon_device pointer
5220  *
5221  * Check which blocks are busy and return the relevant reset
5222  * mask to be used by cik_gpu_soft_reset().
5223  * Returns a mask of the blocks to be reset.
5224  */
5225 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
5226 {
5227 	u32 reset_mask = 0;
5228 	u32 tmp;
5229 
5230 	/* GRBM_STATUS */
5231 	tmp = RREG32(GRBM_STATUS);
5232 	if (tmp & (PA_BUSY | SC_BUSY |
5233 		   BCI_BUSY | SX_BUSY |
5234 		   TA_BUSY | VGT_BUSY |
5235 		   DB_BUSY | CB_BUSY |
5236 		   GDS_BUSY | SPI_BUSY |
5237 		   IA_BUSY | IA_BUSY_NO_DMA))
5238 		reset_mask |= RADEON_RESET_GFX;
5239 
5240 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
5241 		reset_mask |= RADEON_RESET_CP;
5242 
5243 	/* GRBM_STATUS2 */
5244 	tmp = RREG32(GRBM_STATUS2);
5245 	if (tmp & RLC_BUSY)
5246 		reset_mask |= RADEON_RESET_RLC;
5247 
5248 	/* SDMA0_STATUS_REG */
5249 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
5250 	if (!(tmp & SDMA_IDLE))
5251 		reset_mask |= RADEON_RESET_DMA;
5252 
5253 	/* SDMA1_STATUS_REG */
5254 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
5255 	if (!(tmp & SDMA_IDLE))
5256 		reset_mask |= RADEON_RESET_DMA1;
5257 
5258 	/* SRBM_STATUS2 */
5259 	tmp = RREG32(SRBM_STATUS2);
5260 	if (tmp & SDMA_BUSY)
5261 		reset_mask |= RADEON_RESET_DMA;
5262 
5263 	if (tmp & SDMA1_BUSY)
5264 		reset_mask |= RADEON_RESET_DMA1;
5265 
5266 	/* SRBM_STATUS */
5267 	tmp = RREG32(SRBM_STATUS);
5268 
5269 	if (tmp & IH_BUSY)
5270 		reset_mask |= RADEON_RESET_IH;
5271 
5272 	if (tmp & SEM_BUSY)
5273 		reset_mask |= RADEON_RESET_SEM;
5274 
5275 	if (tmp & GRBM_RQ_PENDING)
5276 		reset_mask |= RADEON_RESET_GRBM;
5277 
5278 	if (tmp & VMC_BUSY)
5279 		reset_mask |= RADEON_RESET_VMC;
5280 
5281 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
5282 		   MCC_BUSY | MCD_BUSY))
5283 		reset_mask |= RADEON_RESET_MC;
5284 
5285 	if (evergreen_is_display_hung(rdev))
5286 		reset_mask |= RADEON_RESET_DISPLAY;
5287 
5288 	/* Skip MC reset as it's mostly likely not hung, just busy */
5289 	if (reset_mask & RADEON_RESET_MC) {
5290 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
5291 		reset_mask &= ~RADEON_RESET_MC;
5292 	}
5293 
5294 	return reset_mask;
5295 }
5296 
5297 /**
5298  * cik_gpu_soft_reset - soft reset GPU
5299  *
5300  * @rdev: radeon_device pointer
5301  * @reset_mask: mask of which blocks to reset
5302  *
5303  * Soft reset the blocks specified in @reset_mask.
5304  */
5305 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
5306 {
5307 	struct evergreen_mc_save save;
5308 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
5309 	u32 tmp;
5310 
5311 	if (reset_mask == 0)
5312 		return;
5313 
5314 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
5315 
5316 	cik_print_gpu_status_regs(rdev);
5317 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5318 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5319 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5320 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5321 
5322 	/* disable CG/PG */
5323 	cik_fini_pg(rdev);
5324 	cik_fini_cg(rdev);
5325 
5326 	/* stop the rlc */
5327 	cik_rlc_stop(rdev);
5328 
5329 	/* Disable GFX parsing/prefetching */
5330 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5331 
5332 	/* Disable MEC parsing/prefetching */
5333 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5334 
5335 	if (reset_mask & RADEON_RESET_DMA) {
5336 		/* sdma0 */
5337 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5338 		tmp |= SDMA_HALT;
5339 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5340 	}
5341 	if (reset_mask & RADEON_RESET_DMA1) {
5342 		/* sdma1 */
5343 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5344 		tmp |= SDMA_HALT;
5345 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5346 	}
5347 
5348 	evergreen_mc_stop(rdev, &save);
5349 	if (evergreen_mc_wait_for_idle(rdev)) {
5350 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5351 	}
5352 
5353 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
5354 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
5355 
5356 	if (reset_mask & RADEON_RESET_CP) {
5357 		grbm_soft_reset |= SOFT_RESET_CP;
5358 
5359 		srbm_soft_reset |= SOFT_RESET_GRBM;
5360 	}
5361 
5362 	if (reset_mask & RADEON_RESET_DMA)
5363 		srbm_soft_reset |= SOFT_RESET_SDMA;
5364 
5365 	if (reset_mask & RADEON_RESET_DMA1)
5366 		srbm_soft_reset |= SOFT_RESET_SDMA1;
5367 
5368 	if (reset_mask & RADEON_RESET_DISPLAY)
5369 		srbm_soft_reset |= SOFT_RESET_DC;
5370 
5371 	if (reset_mask & RADEON_RESET_RLC)
5372 		grbm_soft_reset |= SOFT_RESET_RLC;
5373 
5374 	if (reset_mask & RADEON_RESET_SEM)
5375 		srbm_soft_reset |= SOFT_RESET_SEM;
5376 
5377 	if (reset_mask & RADEON_RESET_IH)
5378 		srbm_soft_reset |= SOFT_RESET_IH;
5379 
5380 	if (reset_mask & RADEON_RESET_GRBM)
5381 		srbm_soft_reset |= SOFT_RESET_GRBM;
5382 
5383 	if (reset_mask & RADEON_RESET_VMC)
5384 		srbm_soft_reset |= SOFT_RESET_VMC;
5385 
5386 	if (!(rdev->flags & RADEON_IS_IGP)) {
5387 		if (reset_mask & RADEON_RESET_MC)
5388 			srbm_soft_reset |= SOFT_RESET_MC;
5389 	}
5390 
5391 	if (grbm_soft_reset) {
5392 		tmp = RREG32(GRBM_SOFT_RESET);
5393 		tmp |= grbm_soft_reset;
5394 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
5395 		WREG32(GRBM_SOFT_RESET, tmp);
5396 		tmp = RREG32(GRBM_SOFT_RESET);
5397 
5398 		udelay(50);
5399 
5400 		tmp &= ~grbm_soft_reset;
5401 		WREG32(GRBM_SOFT_RESET, tmp);
5402 		tmp = RREG32(GRBM_SOFT_RESET);
5403 	}
5404 
5405 	if (srbm_soft_reset) {
5406 		tmp = RREG32(SRBM_SOFT_RESET);
5407 		tmp |= srbm_soft_reset;
5408 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5409 		WREG32(SRBM_SOFT_RESET, tmp);
5410 		tmp = RREG32(SRBM_SOFT_RESET);
5411 
5412 		udelay(50);
5413 
5414 		tmp &= ~srbm_soft_reset;
5415 		WREG32(SRBM_SOFT_RESET, tmp);
5416 		tmp = RREG32(SRBM_SOFT_RESET);
5417 	}
5418 
5419 	/* Wait a little for things to settle down */
5420 	udelay(50);
5421 
5422 	evergreen_mc_resume(rdev, &save);
5423 	udelay(50);
5424 
5425 	cik_print_gpu_status_regs(rdev);
5426 }
5427 
5428 struct kv_reset_save_regs {
5429 	u32 gmcon_reng_execute;
5430 	u32 gmcon_misc;
5431 	u32 gmcon_misc3;
5432 };
5433 
5434 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5435 				   struct kv_reset_save_regs *save)
5436 {
5437 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5438 	save->gmcon_misc = RREG32(GMCON_MISC);
5439 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5440 
5441 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5442 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5443 						STCTRL_STUTTER_EN));
5444 }
5445 
5446 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5447 				      struct kv_reset_save_regs *save)
5448 {
5449 	int i;
5450 
5451 	WREG32(GMCON_PGFSM_WRITE, 0);
5452 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5453 
5454 	for (i = 0; i < 5; i++)
5455 		WREG32(GMCON_PGFSM_WRITE, 0);
5456 
5457 	WREG32(GMCON_PGFSM_WRITE, 0);
5458 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5459 
5460 	for (i = 0; i < 5; i++)
5461 		WREG32(GMCON_PGFSM_WRITE, 0);
5462 
5463 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5464 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5465 
5466 	for (i = 0; i < 5; i++)
5467 		WREG32(GMCON_PGFSM_WRITE, 0);
5468 
5469 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5470 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5471 
5472 	for (i = 0; i < 5; i++)
5473 		WREG32(GMCON_PGFSM_WRITE, 0);
5474 
5475 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5476 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5477 
5478 	for (i = 0; i < 5; i++)
5479 		WREG32(GMCON_PGFSM_WRITE, 0);
5480 
5481 	WREG32(GMCON_PGFSM_WRITE, 0);
5482 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5483 
5484 	for (i = 0; i < 5; i++)
5485 		WREG32(GMCON_PGFSM_WRITE, 0);
5486 
5487 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5488 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5489 
5490 	for (i = 0; i < 5; i++)
5491 		WREG32(GMCON_PGFSM_WRITE, 0);
5492 
5493 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5494 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5495 
5496 	for (i = 0; i < 5; i++)
5497 		WREG32(GMCON_PGFSM_WRITE, 0);
5498 
5499 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5500 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5501 
5502 	for (i = 0; i < 5; i++)
5503 		WREG32(GMCON_PGFSM_WRITE, 0);
5504 
5505 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5506 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5507 
5508 	for (i = 0; i < 5; i++)
5509 		WREG32(GMCON_PGFSM_WRITE, 0);
5510 
5511 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5512 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5513 
5514 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5515 	WREG32(GMCON_MISC, save->gmcon_misc);
5516 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5517 }
5518 
5519 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5520 {
5521 	struct evergreen_mc_save save;
5522 	struct kv_reset_save_regs kv_save = { 0 };
5523 	u32 tmp, i;
5524 
5525 	dev_info(rdev->dev, "GPU pci config reset\n");
5526 
5527 	/* disable dpm? */
5528 
5529 	/* disable cg/pg */
5530 	cik_fini_pg(rdev);
5531 	cik_fini_cg(rdev);
5532 
5533 	/* Disable GFX parsing/prefetching */
5534 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5535 
5536 	/* Disable MEC parsing/prefetching */
5537 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5538 
5539 	/* sdma0 */
5540 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5541 	tmp |= SDMA_HALT;
5542 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5543 	/* sdma1 */
5544 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5545 	tmp |= SDMA_HALT;
5546 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5547 	/* XXX other engines? */
5548 
5549 	/* halt the rlc, disable cp internal ints */
5550 	cik_rlc_stop(rdev);
5551 
5552 	udelay(50);
5553 
5554 	/* disable mem access */
5555 	evergreen_mc_stop(rdev, &save);
5556 	if (evergreen_mc_wait_for_idle(rdev)) {
5557 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5558 	}
5559 
5560 	if (rdev->flags & RADEON_IS_IGP)
5561 		kv_save_regs_for_reset(rdev, &kv_save);
5562 
5563 	/* disable BM */
5564 	pci_clear_master(rdev->pdev);
5565 	/* reset */
5566 	radeon_pci_config_reset(rdev);
5567 
5568 	udelay(100);
5569 
5570 	/* wait for asic to come out of reset */
5571 	for (i = 0; i < rdev->usec_timeout; i++) {
5572 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5573 			break;
5574 		udelay(1);
5575 	}
5576 
5577 	/* does asic init need to be run first??? */
5578 	if (rdev->flags & RADEON_IS_IGP)
5579 		kv_restore_regs_for_reset(rdev, &kv_save);
5580 }
5581 
5582 /**
5583  * cik_asic_reset - soft reset GPU
5584  *
5585  * @rdev: radeon_device pointer
5586  *
5587  * Look up which blocks are hung and attempt
5588  * to reset them.
5589  * Returns 0 for success.
5590  */
5591 int cik_asic_reset(struct radeon_device *rdev)
5592 {
5593 	u32 reset_mask;
5594 
5595 	reset_mask = cik_gpu_check_soft_reset(rdev);
5596 
5597 	if (reset_mask)
5598 		r600_set_bios_scratch_engine_hung(rdev, true);
5599 
5600 	/* try soft reset */
5601 	cik_gpu_soft_reset(rdev, reset_mask);
5602 
5603 	reset_mask = cik_gpu_check_soft_reset(rdev);
5604 
5605 	/* try pci config reset */
5606 	if (reset_mask && radeon_hard_reset)
5607 		cik_gpu_pci_config_reset(rdev);
5608 
5609 	reset_mask = cik_gpu_check_soft_reset(rdev);
5610 
5611 	if (!reset_mask)
5612 		r600_set_bios_scratch_engine_hung(rdev, false);
5613 
5614 	return 0;
5615 }
5616 
5617 /**
5618  * cik_gfx_is_lockup - check if the 3D engine is locked up
5619  *
5620  * @rdev: radeon_device pointer
5621  * @ring: radeon_ring structure holding ring information
5622  *
5623  * Check if the 3D engine is locked up (CIK).
5624  * Returns true if the engine is locked, false if not.
5625  */
5626 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5627 {
5628 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5629 
5630 	if (!(reset_mask & (RADEON_RESET_GFX |
5631 			    RADEON_RESET_COMPUTE |
5632 			    RADEON_RESET_CP))) {
5633 		radeon_ring_lockup_update(rdev, ring);
5634 		return false;
5635 	}
5636 	return radeon_ring_test_lockup(rdev, ring);
5637 }
5638 
5639 /* MC */
5640 /**
5641  * cik_mc_program - program the GPU memory controller
5642  *
5643  * @rdev: radeon_device pointer
5644  *
5645  * Set the location of vram, gart, and AGP in the GPU's
5646  * physical address space (CIK).
5647  */
5648 static void cik_mc_program(struct radeon_device *rdev)
5649 {
5650 	struct evergreen_mc_save save;
5651 	u32 tmp;
5652 	int i, j;
5653 
5654 	/* Initialize HDP */
5655 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5656 		WREG32((0x2c14 + j), 0x00000000);
5657 		WREG32((0x2c18 + j), 0x00000000);
5658 		WREG32((0x2c1c + j), 0x00000000);
5659 		WREG32((0x2c20 + j), 0x00000000);
5660 		WREG32((0x2c24 + j), 0x00000000);
5661 	}
5662 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5663 
5664 	evergreen_mc_stop(rdev, &save);
5665 	if (radeon_mc_wait_for_idle(rdev)) {
5666 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5667 	}
5668 	/* Lockout access through VGA aperture*/
5669 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5670 	/* Update configuration */
5671 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5672 	       rdev->mc.vram_start >> 12);
5673 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5674 	       rdev->mc.vram_end >> 12);
5675 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5676 	       rdev->vram_scratch.gpu_addr >> 12);
5677 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5678 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5679 	WREG32(MC_VM_FB_LOCATION, tmp);
5680 	/* XXX double check these! */
5681 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5682 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5683 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5684 	WREG32(MC_VM_AGP_BASE, 0);
5685 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5686 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5687 	if (radeon_mc_wait_for_idle(rdev)) {
5688 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5689 	}
5690 	evergreen_mc_resume(rdev, &save);
5691 	/* we need to own VRAM, so turn off the VGA renderer here
5692 	 * to stop it overwriting our objects */
5693 	rv515_vga_render_disable(rdev);
5694 }
5695 
5696 /**
5697  * cik_mc_init - initialize the memory controller driver params
5698  *
5699  * @rdev: radeon_device pointer
5700  *
5701  * Look up the amount of vram, vram width, and decide how to place
5702  * vram and gart within the GPU's physical address space (CIK).
5703  * Returns 0 for success.
5704  */
5705 static int cik_mc_init(struct radeon_device *rdev)
5706 {
5707 	u32 tmp;
5708 	int chansize, numchan;
5709 
5710 	/* Get VRAM informations */
5711 	rdev->mc.vram_is_ddr = true;
5712 	tmp = RREG32(MC_ARB_RAMCFG);
5713 	if (tmp & CHANSIZE_MASK) {
5714 		chansize = 64;
5715 	} else {
5716 		chansize = 32;
5717 	}
5718 	tmp = RREG32(MC_SHARED_CHMAP);
5719 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5720 	case 0:
5721 	default:
5722 		numchan = 1;
5723 		break;
5724 	case 1:
5725 		numchan = 2;
5726 		break;
5727 	case 2:
5728 		numchan = 4;
5729 		break;
5730 	case 3:
5731 		numchan = 8;
5732 		break;
5733 	case 4:
5734 		numchan = 3;
5735 		break;
5736 	case 5:
5737 		numchan = 6;
5738 		break;
5739 	case 6:
5740 		numchan = 10;
5741 		break;
5742 	case 7:
5743 		numchan = 12;
5744 		break;
5745 	case 8:
5746 		numchan = 16;
5747 		break;
5748 	}
5749 	rdev->mc.vram_width = numchan * chansize;
5750 	/* Could aper size report 0 ? */
5751 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5752 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5753 	/* size in MB on si */
5754 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5755 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5756 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5757 	si_vram_gtt_location(rdev, &rdev->mc);
5758 	radeon_update_bandwidth_info(rdev);
5759 
5760 	return 0;
5761 }
5762 
5763 /*
5764  * GART
5765  * VMID 0 is the physical GPU addresses as used by the kernel.
5766  * VMIDs 1-15 are used for userspace clients and are handled
5767  * by the radeon vm/hsa code.
5768  */
5769 /**
5770  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5771  *
5772  * @rdev: radeon_device pointer
5773  *
5774  * Flush the TLB for the VMID 0 page table (CIK).
5775  */
5776 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5777 {
5778 	/* flush hdp cache */
5779 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5780 
5781 	/* bits 0-15 are the VM contexts0-15 */
5782 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5783 }
5784 
5785 static void cik_pcie_init_compute_vmid(struct radeon_device *rdev)
5786 {
5787 	int i;
5788 	uint32_t sh_mem_bases, sh_mem_config;
5789 
5790 	sh_mem_bases = 0x6000 | 0x6000 << 16;
5791 	sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED);
5792 	sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED);
5793 
5794 	mutex_lock(&rdev->srbm_mutex);
5795 	for (i = 8; i < 16; i++) {
5796 		cik_srbm_select(rdev, 0, 0, 0, i);
5797 		/* CP and shaders */
5798 		WREG32(SH_MEM_CONFIG, sh_mem_config);
5799 		WREG32(SH_MEM_APE1_BASE, 1);
5800 		WREG32(SH_MEM_APE1_LIMIT, 0);
5801 		WREG32(SH_MEM_BASES, sh_mem_bases);
5802 	}
5803 	cik_srbm_select(rdev, 0, 0, 0, 0);
5804 	mutex_unlock(&rdev->srbm_mutex);
5805 }
5806 
5807 /**
5808  * cik_pcie_gart_enable - gart enable
5809  *
5810  * @rdev: radeon_device pointer
5811  *
5812  * This sets up the TLBs, programs the page tables for VMID0,
5813  * sets up the hw for VMIDs 1-15 which are allocated on
5814  * demand, and sets up the global locations for the LDS, GDS,
5815  * and GPUVM for FSA64 clients (CIK).
5816  * Returns 0 for success, errors for failure.
5817  */
5818 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5819 {
5820 	int r, i;
5821 
5822 	if (rdev->gart.robj == NULL) {
5823 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5824 		return -EINVAL;
5825 	}
5826 	r = radeon_gart_table_vram_pin(rdev);
5827 	if (r)
5828 		return r;
5829 	/* Setup TLB control */
5830 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5831 	       (0xA << 7) |
5832 	       ENABLE_L1_TLB |
5833 	       ENABLE_L1_FRAGMENT_PROCESSING |
5834 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5835 	       ENABLE_ADVANCED_DRIVER_MODEL |
5836 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5837 	/* Setup L2 cache */
5838 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5839 	       ENABLE_L2_FRAGMENT_PROCESSING |
5840 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5841 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5842 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5843 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5844 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5845 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5846 	       BANK_SELECT(4) |
5847 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5848 	/* setup context0 */
5849 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5850 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5851 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5852 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5853 			(u32)(rdev->dummy_page.addr >> 12));
5854 	WREG32(VM_CONTEXT0_CNTL2, 0);
5855 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5856 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5857 
5858 	WREG32(0x15D4, 0);
5859 	WREG32(0x15D8, 0);
5860 	WREG32(0x15DC, 0);
5861 
5862 	/* restore context1-15 */
5863 	/* set vm size, must be a multiple of 4 */
5864 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5865 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
5866 	for (i = 1; i < 16; i++) {
5867 		if (i < 8)
5868 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5869 			       rdev->vm_manager.saved_table_addr[i]);
5870 		else
5871 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5872 			       rdev->vm_manager.saved_table_addr[i]);
5873 	}
5874 
5875 	/* enable context1-15 */
5876 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5877 	       (u32)(rdev->dummy_page.addr >> 12));
5878 	WREG32(VM_CONTEXT1_CNTL2, 4);
5879 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5880 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5881 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5882 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5883 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5884 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5885 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5886 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5887 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5888 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5889 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5890 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5891 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5892 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5893 
5894 	if (rdev->family == CHIP_KAVERI) {
5895 		u32 tmp = RREG32(CHUB_CONTROL);
5896 		tmp &= ~BYPASS_VM;
5897 		WREG32(CHUB_CONTROL, tmp);
5898 	}
5899 
5900 	/* XXX SH_MEM regs */
5901 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5902 	mutex_lock(&rdev->srbm_mutex);
5903 	for (i = 0; i < 16; i++) {
5904 		cik_srbm_select(rdev, 0, 0, 0, i);
5905 		/* CP and shaders */
5906 		WREG32(SH_MEM_CONFIG, 0);
5907 		WREG32(SH_MEM_APE1_BASE, 1);
5908 		WREG32(SH_MEM_APE1_LIMIT, 0);
5909 		WREG32(SH_MEM_BASES, 0);
5910 		/* SDMA GFX */
5911 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5912 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5913 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5914 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5915 		/* XXX SDMA RLC - todo */
5916 	}
5917 	cik_srbm_select(rdev, 0, 0, 0, 0);
5918 	mutex_unlock(&rdev->srbm_mutex);
5919 
5920 	cik_pcie_init_compute_vmid(rdev);
5921 
5922 	cik_pcie_gart_tlb_flush(rdev);
5923 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5924 		 (unsigned)(rdev->mc.gtt_size >> 20),
5925 		 (unsigned long long)rdev->gart.table_addr);
5926 	rdev->gart.ready = true;
5927 	return 0;
5928 }
5929 
5930 /**
5931  * cik_pcie_gart_disable - gart disable
5932  *
5933  * @rdev: radeon_device pointer
5934  *
5935  * This disables all VM page table (CIK).
5936  */
5937 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5938 {
5939 	unsigned i;
5940 
5941 	for (i = 1; i < 16; ++i) {
5942 		uint32_t reg;
5943 		if (i < 8)
5944 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
5945 		else
5946 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
5947 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
5948 	}
5949 
5950 	/* Disable all tables */
5951 	WREG32(VM_CONTEXT0_CNTL, 0);
5952 	WREG32(VM_CONTEXT1_CNTL, 0);
5953 	/* Setup TLB control */
5954 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5955 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5956 	/* Setup L2 cache */
5957 	WREG32(VM_L2_CNTL,
5958 	       ENABLE_L2_FRAGMENT_PROCESSING |
5959 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5960 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5961 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5962 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5963 	WREG32(VM_L2_CNTL2, 0);
5964 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5965 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5966 	radeon_gart_table_vram_unpin(rdev);
5967 }
5968 
5969 /**
5970  * cik_pcie_gart_fini - vm fini callback
5971  *
5972  * @rdev: radeon_device pointer
5973  *
5974  * Tears down the driver GART/VM setup (CIK).
5975  */
5976 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5977 {
5978 	cik_pcie_gart_disable(rdev);
5979 	radeon_gart_table_vram_free(rdev);
5980 	radeon_gart_fini(rdev);
5981 }
5982 
5983 /* vm parser */
5984 /**
5985  * cik_ib_parse - vm ib_parse callback
5986  *
5987  * @rdev: radeon_device pointer
5988  * @ib: indirect buffer pointer
5989  *
5990  * CIK uses hw IB checking so this is a nop (CIK).
5991  */
5992 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5993 {
5994 	return 0;
5995 }
5996 
5997 /*
5998  * vm
5999  * VMID 0 is the physical GPU addresses as used by the kernel.
6000  * VMIDs 1-15 are used for userspace clients and are handled
6001  * by the radeon vm/hsa code.
6002  */
6003 /**
6004  * cik_vm_init - cik vm init callback
6005  *
6006  * @rdev: radeon_device pointer
6007  *
6008  * Inits cik specific vm parameters (number of VMs, base of vram for
6009  * VMIDs 1-15) (CIK).
6010  * Returns 0 for success.
6011  */
6012 int cik_vm_init(struct radeon_device *rdev)
6013 {
6014 	/*
6015 	 * number of VMs
6016 	 * VMID 0 is reserved for System
6017 	 * radeon graphics/compute will use VMIDs 1-7
6018 	 * amdkfd will use VMIDs 8-15
6019 	 */
6020 	rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS;
6021 	/* base offset of vram pages */
6022 	if (rdev->flags & RADEON_IS_IGP) {
6023 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
6024 		tmp <<= 22;
6025 		rdev->vm_manager.vram_base_offset = tmp;
6026 	} else
6027 		rdev->vm_manager.vram_base_offset = 0;
6028 
6029 	return 0;
6030 }
6031 
6032 /**
6033  * cik_vm_fini - cik vm fini callback
6034  *
6035  * @rdev: radeon_device pointer
6036  *
6037  * Tear down any asic specific VM setup (CIK).
6038  */
6039 void cik_vm_fini(struct radeon_device *rdev)
6040 {
6041 }
6042 
6043 /**
6044  * cik_vm_decode_fault - print human readable fault info
6045  *
6046  * @rdev: radeon_device pointer
6047  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
6048  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
6049  *
6050  * Print human readable fault information (CIK).
6051  */
6052 static void cik_vm_decode_fault(struct radeon_device *rdev,
6053 				u32 status, u32 addr, u32 mc_client)
6054 {
6055 	u32 mc_id;
6056 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
6057 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
6058 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
6059 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
6060 
6061 	if (rdev->family == CHIP_HAWAII)
6062 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6063 	else
6064 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
6065 
6066 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
6067 	       protections, vmid, addr,
6068 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
6069 	       block, mc_client, mc_id);
6070 }
6071 
6072 /**
6073  * cik_vm_flush - cik vm flush using the CP
6074  *
6075  * @rdev: radeon_device pointer
6076  *
6077  * Update the page table base and flush the VM TLB
6078  * using the CP (CIK).
6079  */
6080 void cik_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
6081 		  unsigned vm_id, uint64_t pd_addr)
6082 {
6083 	int usepfp = (ring->idx == RADEON_RING_TYPE_GFX_INDEX);
6084 
6085 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6086 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6087 				 WRITE_DATA_DST_SEL(0)));
6088 	if (vm_id < 8) {
6089 		radeon_ring_write(ring,
6090 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
6091 	} else {
6092 		radeon_ring_write(ring,
6093 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
6094 	}
6095 	radeon_ring_write(ring, 0);
6096 	radeon_ring_write(ring, pd_addr >> 12);
6097 
6098 	/* update SH_MEM_* regs */
6099 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6100 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6101 				 WRITE_DATA_DST_SEL(0)));
6102 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6103 	radeon_ring_write(ring, 0);
6104 	radeon_ring_write(ring, VMID(vm_id));
6105 
6106 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
6107 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6108 				 WRITE_DATA_DST_SEL(0)));
6109 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
6110 	radeon_ring_write(ring, 0);
6111 
6112 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
6113 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
6114 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
6115 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
6116 
6117 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6118 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6119 				 WRITE_DATA_DST_SEL(0)));
6120 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
6121 	radeon_ring_write(ring, 0);
6122 	radeon_ring_write(ring, VMID(0));
6123 
6124 	/* HDP flush */
6125 	cik_hdp_flush_cp_ring_emit(rdev, ring->idx);
6126 
6127 	/* bits 0-15 are the VM contexts0-15 */
6128 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
6129 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
6130 				 WRITE_DATA_DST_SEL(0)));
6131 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6132 	radeon_ring_write(ring, 0);
6133 	radeon_ring_write(ring, 1 << vm_id);
6134 
6135 	/* wait for the invalidate to complete */
6136 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
6137 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(0) | /* wait */
6138 				 WAIT_REG_MEM_FUNCTION(0) |  /* always */
6139 				 WAIT_REG_MEM_ENGINE(0))); /* me */
6140 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
6141 	radeon_ring_write(ring, 0);
6142 	radeon_ring_write(ring, 0); /* ref */
6143 	radeon_ring_write(ring, 0); /* mask */
6144 	radeon_ring_write(ring, 0x20); /* poll interval */
6145 
6146 	/* compute doesn't have PFP */
6147 	if (usepfp) {
6148 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
6149 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
6150 		radeon_ring_write(ring, 0x0);
6151 	}
6152 }
6153 
6154 /*
6155  * RLC
6156  * The RLC is a multi-purpose microengine that handles a
6157  * variety of functions, the most important of which is
6158  * the interrupt controller.
6159  */
6160 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
6161 					  bool enable)
6162 {
6163 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
6164 
6165 	if (enable)
6166 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6167 	else
6168 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6169 	WREG32(CP_INT_CNTL_RING0, tmp);
6170 }
6171 
6172 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
6173 {
6174 	u32 tmp;
6175 
6176 	tmp = RREG32(RLC_LB_CNTL);
6177 	if (enable)
6178 		tmp |= LOAD_BALANCE_ENABLE;
6179 	else
6180 		tmp &= ~LOAD_BALANCE_ENABLE;
6181 	WREG32(RLC_LB_CNTL, tmp);
6182 }
6183 
6184 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
6185 {
6186 	u32 i, j, k;
6187 	u32 mask;
6188 
6189 	mutex_lock(&rdev->grbm_idx_mutex);
6190 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6191 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6192 			cik_select_se_sh(rdev, i, j);
6193 			for (k = 0; k < rdev->usec_timeout; k++) {
6194 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
6195 					break;
6196 				udelay(1);
6197 			}
6198 		}
6199 	}
6200 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6201 	mutex_unlock(&rdev->grbm_idx_mutex);
6202 
6203 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
6204 	for (k = 0; k < rdev->usec_timeout; k++) {
6205 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
6206 			break;
6207 		udelay(1);
6208 	}
6209 }
6210 
6211 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
6212 {
6213 	u32 tmp;
6214 
6215 	tmp = RREG32(RLC_CNTL);
6216 	if (tmp != rlc)
6217 		WREG32(RLC_CNTL, rlc);
6218 }
6219 
6220 static u32 cik_halt_rlc(struct radeon_device *rdev)
6221 {
6222 	u32 data, orig;
6223 
6224 	orig = data = RREG32(RLC_CNTL);
6225 
6226 	if (data & RLC_ENABLE) {
6227 		u32 i;
6228 
6229 		data &= ~RLC_ENABLE;
6230 		WREG32(RLC_CNTL, data);
6231 
6232 		for (i = 0; i < rdev->usec_timeout; i++) {
6233 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
6234 				break;
6235 			udelay(1);
6236 		}
6237 
6238 		cik_wait_for_rlc_serdes(rdev);
6239 	}
6240 
6241 	return orig;
6242 }
6243 
6244 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
6245 {
6246 	u32 tmp, i, mask;
6247 
6248 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
6249 	WREG32(RLC_GPR_REG2, tmp);
6250 
6251 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
6252 	for (i = 0; i < rdev->usec_timeout; i++) {
6253 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
6254 			break;
6255 		udelay(1);
6256 	}
6257 
6258 	for (i = 0; i < rdev->usec_timeout; i++) {
6259 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
6260 			break;
6261 		udelay(1);
6262 	}
6263 }
6264 
6265 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
6266 {
6267 	u32 tmp;
6268 
6269 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
6270 	WREG32(RLC_GPR_REG2, tmp);
6271 }
6272 
6273 /**
6274  * cik_rlc_stop - stop the RLC ME
6275  *
6276  * @rdev: radeon_device pointer
6277  *
6278  * Halt the RLC ME (MicroEngine) (CIK).
6279  */
6280 static void cik_rlc_stop(struct radeon_device *rdev)
6281 {
6282 	WREG32(RLC_CNTL, 0);
6283 
6284 	cik_enable_gui_idle_interrupt(rdev, false);
6285 
6286 	cik_wait_for_rlc_serdes(rdev);
6287 }
6288 
6289 /**
6290  * cik_rlc_start - start the RLC ME
6291  *
6292  * @rdev: radeon_device pointer
6293  *
6294  * Unhalt the RLC ME (MicroEngine) (CIK).
6295  */
6296 static void cik_rlc_start(struct radeon_device *rdev)
6297 {
6298 	WREG32(RLC_CNTL, RLC_ENABLE);
6299 
6300 	cik_enable_gui_idle_interrupt(rdev, true);
6301 
6302 	udelay(50);
6303 }
6304 
6305 /**
6306  * cik_rlc_resume - setup the RLC hw
6307  *
6308  * @rdev: radeon_device pointer
6309  *
6310  * Initialize the RLC registers, load the ucode,
6311  * and start the RLC (CIK).
6312  * Returns 0 for success, -EINVAL if the ucode is not available.
6313  */
6314 static int cik_rlc_resume(struct radeon_device *rdev)
6315 {
6316 	u32 i, size, tmp;
6317 
6318 	if (!rdev->rlc_fw)
6319 		return -EINVAL;
6320 
6321 	cik_rlc_stop(rdev);
6322 
6323 	/* disable CG */
6324 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
6325 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
6326 
6327 	si_rlc_reset(rdev);
6328 
6329 	cik_init_pg(rdev);
6330 
6331 	cik_init_cg(rdev);
6332 
6333 	WREG32(RLC_LB_CNTR_INIT, 0);
6334 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
6335 
6336 	mutex_lock(&rdev->grbm_idx_mutex);
6337 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6338 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
6339 	WREG32(RLC_LB_PARAMS, 0x00600408);
6340 	WREG32(RLC_LB_CNTL, 0x80000004);
6341 	mutex_unlock(&rdev->grbm_idx_mutex);
6342 
6343 	WREG32(RLC_MC_CNTL, 0);
6344 	WREG32(RLC_UCODE_CNTL, 0);
6345 
6346 	if (rdev->new_fw) {
6347 		const struct rlc_firmware_header_v1_0 *hdr =
6348 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
6349 		const __le32 *fw_data = (const __le32 *)
6350 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6351 
6352 		radeon_ucode_print_rlc_hdr(&hdr->header);
6353 
6354 		size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
6355 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6356 		for (i = 0; i < size; i++)
6357 			WREG32(RLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++));
6358 		WREG32(RLC_GPM_UCODE_ADDR, le32_to_cpu(hdr->header.ucode_version));
6359 	} else {
6360 		const __be32 *fw_data;
6361 
6362 		switch (rdev->family) {
6363 		case CHIP_BONAIRE:
6364 		case CHIP_HAWAII:
6365 		default:
6366 			size = BONAIRE_RLC_UCODE_SIZE;
6367 			break;
6368 		case CHIP_KAVERI:
6369 			size = KV_RLC_UCODE_SIZE;
6370 			break;
6371 		case CHIP_KABINI:
6372 			size = KB_RLC_UCODE_SIZE;
6373 			break;
6374 		case CHIP_MULLINS:
6375 			size = ML_RLC_UCODE_SIZE;
6376 			break;
6377 		}
6378 
6379 		fw_data = (const __be32 *)rdev->rlc_fw->data;
6380 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6381 		for (i = 0; i < size; i++)
6382 			WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
6383 		WREG32(RLC_GPM_UCODE_ADDR, 0);
6384 	}
6385 
6386 	/* XXX - find out what chips support lbpw */
6387 	cik_enable_lbpw(rdev, false);
6388 
6389 	if (rdev->family == CHIP_BONAIRE)
6390 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
6391 
6392 	cik_rlc_start(rdev);
6393 
6394 	return 0;
6395 }
6396 
6397 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
6398 {
6399 	u32 data, orig, tmp, tmp2;
6400 
6401 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
6402 
6403 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
6404 		cik_enable_gui_idle_interrupt(rdev, true);
6405 
6406 		tmp = cik_halt_rlc(rdev);
6407 
6408 		mutex_lock(&rdev->grbm_idx_mutex);
6409 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6410 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6411 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6412 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
6413 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
6414 		mutex_unlock(&rdev->grbm_idx_mutex);
6415 
6416 		cik_update_rlc(rdev, tmp);
6417 
6418 		data |= CGCG_EN | CGLS_EN;
6419 	} else {
6420 		cik_enable_gui_idle_interrupt(rdev, false);
6421 
6422 		RREG32(CB_CGTT_SCLK_CTRL);
6423 		RREG32(CB_CGTT_SCLK_CTRL);
6424 		RREG32(CB_CGTT_SCLK_CTRL);
6425 		RREG32(CB_CGTT_SCLK_CTRL);
6426 
6427 		data &= ~(CGCG_EN | CGLS_EN);
6428 	}
6429 
6430 	if (orig != data)
6431 		WREG32(RLC_CGCG_CGLS_CTRL, data);
6432 
6433 }
6434 
6435 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
6436 {
6437 	u32 data, orig, tmp = 0;
6438 
6439 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
6440 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
6441 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
6442 				orig = data = RREG32(CP_MEM_SLP_CNTL);
6443 				data |= CP_MEM_LS_EN;
6444 				if (orig != data)
6445 					WREG32(CP_MEM_SLP_CNTL, data);
6446 			}
6447 		}
6448 
6449 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6450 		data |= 0x00000001;
6451 		data &= 0xfffffffd;
6452 		if (orig != data)
6453 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6454 
6455 		tmp = cik_halt_rlc(rdev);
6456 
6457 		mutex_lock(&rdev->grbm_idx_mutex);
6458 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6459 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6460 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6461 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
6462 		WREG32(RLC_SERDES_WR_CTRL, data);
6463 		mutex_unlock(&rdev->grbm_idx_mutex);
6464 
6465 		cik_update_rlc(rdev, tmp);
6466 
6467 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
6468 			orig = data = RREG32(CGTS_SM_CTRL_REG);
6469 			data &= ~SM_MODE_MASK;
6470 			data |= SM_MODE(0x2);
6471 			data |= SM_MODE_ENABLE;
6472 			data &= ~CGTS_OVERRIDE;
6473 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6474 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6475 				data &= ~CGTS_LS_OVERRIDE;
6476 			data &= ~ON_MONITOR_ADD_MASK;
6477 			data |= ON_MONITOR_ADD_EN;
6478 			data |= ON_MONITOR_ADD(0x96);
6479 			if (orig != data)
6480 				WREG32(CGTS_SM_CTRL_REG, data);
6481 		}
6482 	} else {
6483 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6484 		data |= 0x00000003;
6485 		if (orig != data)
6486 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6487 
6488 		data = RREG32(RLC_MEM_SLP_CNTL);
6489 		if (data & RLC_MEM_LS_EN) {
6490 			data &= ~RLC_MEM_LS_EN;
6491 			WREG32(RLC_MEM_SLP_CNTL, data);
6492 		}
6493 
6494 		data = RREG32(CP_MEM_SLP_CNTL);
6495 		if (data & CP_MEM_LS_EN) {
6496 			data &= ~CP_MEM_LS_EN;
6497 			WREG32(CP_MEM_SLP_CNTL, data);
6498 		}
6499 
6500 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6501 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6502 		if (orig != data)
6503 			WREG32(CGTS_SM_CTRL_REG, data);
6504 
6505 		tmp = cik_halt_rlc(rdev);
6506 
6507 		mutex_lock(&rdev->grbm_idx_mutex);
6508 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6509 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6510 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6511 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6512 		WREG32(RLC_SERDES_WR_CTRL, data);
6513 		mutex_unlock(&rdev->grbm_idx_mutex);
6514 
6515 		cik_update_rlc(rdev, tmp);
6516 	}
6517 }
6518 
6519 static const u32 mc_cg_registers[] =
6520 {
6521 	MC_HUB_MISC_HUB_CG,
6522 	MC_HUB_MISC_SIP_CG,
6523 	MC_HUB_MISC_VM_CG,
6524 	MC_XPB_CLK_GAT,
6525 	ATC_MISC_CG,
6526 	MC_CITF_MISC_WR_CG,
6527 	MC_CITF_MISC_RD_CG,
6528 	MC_CITF_MISC_VM_CG,
6529 	VM_L2_CG,
6530 };
6531 
6532 static void cik_enable_mc_ls(struct radeon_device *rdev,
6533 			     bool enable)
6534 {
6535 	int i;
6536 	u32 orig, data;
6537 
6538 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6539 		orig = data = RREG32(mc_cg_registers[i]);
6540 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6541 			data |= MC_LS_ENABLE;
6542 		else
6543 			data &= ~MC_LS_ENABLE;
6544 		if (data != orig)
6545 			WREG32(mc_cg_registers[i], data);
6546 	}
6547 }
6548 
6549 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6550 			       bool enable)
6551 {
6552 	int i;
6553 	u32 orig, data;
6554 
6555 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6556 		orig = data = RREG32(mc_cg_registers[i]);
6557 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6558 			data |= MC_CG_ENABLE;
6559 		else
6560 			data &= ~MC_CG_ENABLE;
6561 		if (data != orig)
6562 			WREG32(mc_cg_registers[i], data);
6563 	}
6564 }
6565 
6566 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6567 				 bool enable)
6568 {
6569 	u32 orig, data;
6570 
6571 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6572 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6573 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6574 	} else {
6575 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6576 		data |= 0xff000000;
6577 		if (data != orig)
6578 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6579 
6580 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6581 		data |= 0xff000000;
6582 		if (data != orig)
6583 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6584 	}
6585 }
6586 
6587 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6588 				 bool enable)
6589 {
6590 	u32 orig, data;
6591 
6592 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6593 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6594 		data |= 0x100;
6595 		if (orig != data)
6596 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6597 
6598 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6599 		data |= 0x100;
6600 		if (orig != data)
6601 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6602 	} else {
6603 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6604 		data &= ~0x100;
6605 		if (orig != data)
6606 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6607 
6608 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6609 		data &= ~0x100;
6610 		if (orig != data)
6611 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6612 	}
6613 }
6614 
6615 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6616 				bool enable)
6617 {
6618 	u32 orig, data;
6619 
6620 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6621 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6622 		data = 0xfff;
6623 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6624 
6625 		orig = data = RREG32(UVD_CGC_CTRL);
6626 		data |= DCM;
6627 		if (orig != data)
6628 			WREG32(UVD_CGC_CTRL, data);
6629 	} else {
6630 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6631 		data &= ~0xfff;
6632 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6633 
6634 		orig = data = RREG32(UVD_CGC_CTRL);
6635 		data &= ~DCM;
6636 		if (orig != data)
6637 			WREG32(UVD_CGC_CTRL, data);
6638 	}
6639 }
6640 
6641 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6642 			       bool enable)
6643 {
6644 	u32 orig, data;
6645 
6646 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6647 
6648 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6649 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6650 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6651 	else
6652 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6653 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6654 
6655 	if (orig != data)
6656 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6657 }
6658 
6659 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6660 				bool enable)
6661 {
6662 	u32 orig, data;
6663 
6664 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6665 
6666 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6667 		data &= ~CLOCK_GATING_DIS;
6668 	else
6669 		data |= CLOCK_GATING_DIS;
6670 
6671 	if (orig != data)
6672 		WREG32(HDP_HOST_PATH_CNTL, data);
6673 }
6674 
6675 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6676 			      bool enable)
6677 {
6678 	u32 orig, data;
6679 
6680 	orig = data = RREG32(HDP_MEM_POWER_LS);
6681 
6682 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6683 		data |= HDP_LS_ENABLE;
6684 	else
6685 		data &= ~HDP_LS_ENABLE;
6686 
6687 	if (orig != data)
6688 		WREG32(HDP_MEM_POWER_LS, data);
6689 }
6690 
6691 void cik_update_cg(struct radeon_device *rdev,
6692 		   u32 block, bool enable)
6693 {
6694 
6695 	if (block & RADEON_CG_BLOCK_GFX) {
6696 		cik_enable_gui_idle_interrupt(rdev, false);
6697 		/* order matters! */
6698 		if (enable) {
6699 			cik_enable_mgcg(rdev, true);
6700 			cik_enable_cgcg(rdev, true);
6701 		} else {
6702 			cik_enable_cgcg(rdev, false);
6703 			cik_enable_mgcg(rdev, false);
6704 		}
6705 		cik_enable_gui_idle_interrupt(rdev, true);
6706 	}
6707 
6708 	if (block & RADEON_CG_BLOCK_MC) {
6709 		if (!(rdev->flags & RADEON_IS_IGP)) {
6710 			cik_enable_mc_mgcg(rdev, enable);
6711 			cik_enable_mc_ls(rdev, enable);
6712 		}
6713 	}
6714 
6715 	if (block & RADEON_CG_BLOCK_SDMA) {
6716 		cik_enable_sdma_mgcg(rdev, enable);
6717 		cik_enable_sdma_mgls(rdev, enable);
6718 	}
6719 
6720 	if (block & RADEON_CG_BLOCK_BIF) {
6721 		cik_enable_bif_mgls(rdev, enable);
6722 	}
6723 
6724 	if (block & RADEON_CG_BLOCK_UVD) {
6725 		if (rdev->has_uvd)
6726 			cik_enable_uvd_mgcg(rdev, enable);
6727 	}
6728 
6729 	if (block & RADEON_CG_BLOCK_HDP) {
6730 		cik_enable_hdp_mgcg(rdev, enable);
6731 		cik_enable_hdp_ls(rdev, enable);
6732 	}
6733 
6734 	if (block & RADEON_CG_BLOCK_VCE) {
6735 		vce_v2_0_enable_mgcg(rdev, enable);
6736 	}
6737 }
6738 
6739 static void cik_init_cg(struct radeon_device *rdev)
6740 {
6741 
6742 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6743 
6744 	if (rdev->has_uvd)
6745 		si_init_uvd_internal_cg(rdev);
6746 
6747 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6748 			     RADEON_CG_BLOCK_SDMA |
6749 			     RADEON_CG_BLOCK_BIF |
6750 			     RADEON_CG_BLOCK_UVD |
6751 			     RADEON_CG_BLOCK_HDP), true);
6752 }
6753 
6754 static void cik_fini_cg(struct radeon_device *rdev)
6755 {
6756 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6757 			     RADEON_CG_BLOCK_SDMA |
6758 			     RADEON_CG_BLOCK_BIF |
6759 			     RADEON_CG_BLOCK_UVD |
6760 			     RADEON_CG_BLOCK_HDP), false);
6761 
6762 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6763 }
6764 
6765 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6766 					  bool enable)
6767 {
6768 	u32 data, orig;
6769 
6770 	orig = data = RREG32(RLC_PG_CNTL);
6771 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6772 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6773 	else
6774 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6775 	if (orig != data)
6776 		WREG32(RLC_PG_CNTL, data);
6777 }
6778 
6779 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6780 					  bool enable)
6781 {
6782 	u32 data, orig;
6783 
6784 	orig = data = RREG32(RLC_PG_CNTL);
6785 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6786 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6787 	else
6788 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6789 	if (orig != data)
6790 		WREG32(RLC_PG_CNTL, data);
6791 }
6792 
6793 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6794 {
6795 	u32 data, orig;
6796 
6797 	orig = data = RREG32(RLC_PG_CNTL);
6798 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6799 		data &= ~DISABLE_CP_PG;
6800 	else
6801 		data |= DISABLE_CP_PG;
6802 	if (orig != data)
6803 		WREG32(RLC_PG_CNTL, data);
6804 }
6805 
6806 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6807 {
6808 	u32 data, orig;
6809 
6810 	orig = data = RREG32(RLC_PG_CNTL);
6811 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6812 		data &= ~DISABLE_GDS_PG;
6813 	else
6814 		data |= DISABLE_GDS_PG;
6815 	if (orig != data)
6816 		WREG32(RLC_PG_CNTL, data);
6817 }
6818 
6819 #define CP_ME_TABLE_SIZE    96
6820 #define CP_ME_TABLE_OFFSET  2048
6821 #define CP_MEC_TABLE_OFFSET 4096
6822 
6823 void cik_init_cp_pg_table(struct radeon_device *rdev)
6824 {
6825 	volatile u32 *dst_ptr;
6826 	int me, i, max_me = 4;
6827 	u32 bo_offset = 0;
6828 	u32 table_offset, table_size;
6829 
6830 	if (rdev->family == CHIP_KAVERI)
6831 		max_me = 5;
6832 
6833 	if (rdev->rlc.cp_table_ptr == NULL)
6834 		return;
6835 
6836 	/* write the cp table buffer */
6837 	dst_ptr = rdev->rlc.cp_table_ptr;
6838 	for (me = 0; me < max_me; me++) {
6839 		if (rdev->new_fw) {
6840 			const __le32 *fw_data;
6841 			const struct gfx_firmware_header_v1_0 *hdr;
6842 
6843 			if (me == 0) {
6844 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
6845 				fw_data = (const __le32 *)
6846 					(rdev->ce_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6847 				table_offset = le32_to_cpu(hdr->jt_offset);
6848 				table_size = le32_to_cpu(hdr->jt_size);
6849 			} else if (me == 1) {
6850 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
6851 				fw_data = (const __le32 *)
6852 					(rdev->pfp_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6853 				table_offset = le32_to_cpu(hdr->jt_offset);
6854 				table_size = le32_to_cpu(hdr->jt_size);
6855 			} else if (me == 2) {
6856 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
6857 				fw_data = (const __le32 *)
6858 					(rdev->me_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6859 				table_offset = le32_to_cpu(hdr->jt_offset);
6860 				table_size = le32_to_cpu(hdr->jt_size);
6861 			} else if (me == 3) {
6862 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec_fw->data;
6863 				fw_data = (const __le32 *)
6864 					(rdev->mec_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6865 				table_offset = le32_to_cpu(hdr->jt_offset);
6866 				table_size = le32_to_cpu(hdr->jt_size);
6867 			} else {
6868 				hdr = (const struct gfx_firmware_header_v1_0 *)rdev->mec2_fw->data;
6869 				fw_data = (const __le32 *)
6870 					(rdev->mec2_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
6871 				table_offset = le32_to_cpu(hdr->jt_offset);
6872 				table_size = le32_to_cpu(hdr->jt_size);
6873 			}
6874 
6875 			for (i = 0; i < table_size; i ++) {
6876 				dst_ptr[bo_offset + i] =
6877 					cpu_to_le32(le32_to_cpu(fw_data[table_offset + i]));
6878 			}
6879 			bo_offset += table_size;
6880 		} else {
6881 			const __be32 *fw_data;
6882 			table_size = CP_ME_TABLE_SIZE;
6883 
6884 			if (me == 0) {
6885 				fw_data = (const __be32 *)rdev->ce_fw->data;
6886 				table_offset = CP_ME_TABLE_OFFSET;
6887 			} else if (me == 1) {
6888 				fw_data = (const __be32 *)rdev->pfp_fw->data;
6889 				table_offset = CP_ME_TABLE_OFFSET;
6890 			} else if (me == 2) {
6891 				fw_data = (const __be32 *)rdev->me_fw->data;
6892 				table_offset = CP_ME_TABLE_OFFSET;
6893 			} else {
6894 				fw_data = (const __be32 *)rdev->mec_fw->data;
6895 				table_offset = CP_MEC_TABLE_OFFSET;
6896 			}
6897 
6898 			for (i = 0; i < table_size; i ++) {
6899 				dst_ptr[bo_offset + i] =
6900 					cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6901 			}
6902 			bo_offset += table_size;
6903 		}
6904 	}
6905 }
6906 
6907 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6908 				bool enable)
6909 {
6910 	u32 data, orig;
6911 
6912 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6913 		orig = data = RREG32(RLC_PG_CNTL);
6914 		data |= GFX_PG_ENABLE;
6915 		if (orig != data)
6916 			WREG32(RLC_PG_CNTL, data);
6917 
6918 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6919 		data |= AUTO_PG_EN;
6920 		if (orig != data)
6921 			WREG32(RLC_AUTO_PG_CTRL, data);
6922 	} else {
6923 		orig = data = RREG32(RLC_PG_CNTL);
6924 		data &= ~GFX_PG_ENABLE;
6925 		if (orig != data)
6926 			WREG32(RLC_PG_CNTL, data);
6927 
6928 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6929 		data &= ~AUTO_PG_EN;
6930 		if (orig != data)
6931 			WREG32(RLC_AUTO_PG_CTRL, data);
6932 
6933 		data = RREG32(DB_RENDER_CONTROL);
6934 	}
6935 }
6936 
6937 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6938 {
6939 	u32 mask = 0, tmp, tmp1;
6940 	int i;
6941 
6942 	mutex_lock(&rdev->grbm_idx_mutex);
6943 	cik_select_se_sh(rdev, se, sh);
6944 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6945 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6946 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6947 	mutex_unlock(&rdev->grbm_idx_mutex);
6948 
6949 	tmp &= 0xffff0000;
6950 
6951 	tmp |= tmp1;
6952 	tmp >>= 16;
6953 
6954 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6955 		mask <<= 1;
6956 		mask |= 1;
6957 	}
6958 
6959 	return (~tmp) & mask;
6960 }
6961 
6962 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6963 {
6964 	u32 i, j, k, active_cu_number = 0;
6965 	u32 mask, counter, cu_bitmap;
6966 	u32 tmp = 0;
6967 
6968 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6969 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6970 			mask = 1;
6971 			cu_bitmap = 0;
6972 			counter = 0;
6973 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6974 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6975 					if (counter < 2)
6976 						cu_bitmap |= mask;
6977 					counter ++;
6978 				}
6979 				mask <<= 1;
6980 			}
6981 
6982 			active_cu_number += counter;
6983 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6984 		}
6985 	}
6986 
6987 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6988 
6989 	tmp = RREG32(RLC_MAX_PG_CU);
6990 	tmp &= ~MAX_PU_CU_MASK;
6991 	tmp |= MAX_PU_CU(active_cu_number);
6992 	WREG32(RLC_MAX_PG_CU, tmp);
6993 }
6994 
6995 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6996 				       bool enable)
6997 {
6998 	u32 data, orig;
6999 
7000 	orig = data = RREG32(RLC_PG_CNTL);
7001 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
7002 		data |= STATIC_PER_CU_PG_ENABLE;
7003 	else
7004 		data &= ~STATIC_PER_CU_PG_ENABLE;
7005 	if (orig != data)
7006 		WREG32(RLC_PG_CNTL, data);
7007 }
7008 
7009 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
7010 					bool enable)
7011 {
7012 	u32 data, orig;
7013 
7014 	orig = data = RREG32(RLC_PG_CNTL);
7015 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
7016 		data |= DYN_PER_CU_PG_ENABLE;
7017 	else
7018 		data &= ~DYN_PER_CU_PG_ENABLE;
7019 	if (orig != data)
7020 		WREG32(RLC_PG_CNTL, data);
7021 }
7022 
7023 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
7024 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
7025 
7026 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
7027 {
7028 	u32 data, orig;
7029 	u32 i;
7030 
7031 	if (rdev->rlc.cs_data) {
7032 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7033 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
7034 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
7035 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
7036 	} else {
7037 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
7038 		for (i = 0; i < 3; i++)
7039 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
7040 	}
7041 	if (rdev->rlc.reg_list) {
7042 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
7043 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
7044 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
7045 	}
7046 
7047 	orig = data = RREG32(RLC_PG_CNTL);
7048 	data |= GFX_PG_SRC;
7049 	if (orig != data)
7050 		WREG32(RLC_PG_CNTL, data);
7051 
7052 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
7053 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
7054 
7055 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
7056 	data &= ~IDLE_POLL_COUNT_MASK;
7057 	data |= IDLE_POLL_COUNT(0x60);
7058 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
7059 
7060 	data = 0x10101010;
7061 	WREG32(RLC_PG_DELAY, data);
7062 
7063 	data = RREG32(RLC_PG_DELAY_2);
7064 	data &= ~0xff;
7065 	data |= 0x3;
7066 	WREG32(RLC_PG_DELAY_2, data);
7067 
7068 	data = RREG32(RLC_AUTO_PG_CTRL);
7069 	data &= ~GRBM_REG_SGIT_MASK;
7070 	data |= GRBM_REG_SGIT(0x700);
7071 	WREG32(RLC_AUTO_PG_CTRL, data);
7072 
7073 }
7074 
7075 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
7076 {
7077 	cik_enable_gfx_cgpg(rdev, enable);
7078 	cik_enable_gfx_static_mgpg(rdev, enable);
7079 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
7080 }
7081 
7082 u32 cik_get_csb_size(struct radeon_device *rdev)
7083 {
7084 	u32 count = 0;
7085 	const struct cs_section_def *sect = NULL;
7086 	const struct cs_extent_def *ext = NULL;
7087 
7088 	if (rdev->rlc.cs_data == NULL)
7089 		return 0;
7090 
7091 	/* begin clear state */
7092 	count += 2;
7093 	/* context control state */
7094 	count += 3;
7095 
7096 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7097 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7098 			if (sect->id == SECT_CONTEXT)
7099 				count += 2 + ext->reg_count;
7100 			else
7101 				return 0;
7102 		}
7103 	}
7104 	/* pa_sc_raster_config/pa_sc_raster_config1 */
7105 	count += 4;
7106 	/* end clear state */
7107 	count += 2;
7108 	/* clear state */
7109 	count += 2;
7110 
7111 	return count;
7112 }
7113 
7114 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
7115 {
7116 	u32 count = 0, i;
7117 	const struct cs_section_def *sect = NULL;
7118 	const struct cs_extent_def *ext = NULL;
7119 
7120 	if (rdev->rlc.cs_data == NULL)
7121 		return;
7122 	if (buffer == NULL)
7123 		return;
7124 
7125 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7126 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
7127 
7128 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
7129 	buffer[count++] = cpu_to_le32(0x80000000);
7130 	buffer[count++] = cpu_to_le32(0x80000000);
7131 
7132 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
7133 		for (ext = sect->section; ext->extent != NULL; ++ext) {
7134 			if (sect->id == SECT_CONTEXT) {
7135 				buffer[count++] =
7136 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
7137 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
7138 				for (i = 0; i < ext->reg_count; i++)
7139 					buffer[count++] = cpu_to_le32(ext->extent[i]);
7140 			} else {
7141 				return;
7142 			}
7143 		}
7144 	}
7145 
7146 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
7147 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
7148 	switch (rdev->family) {
7149 	case CHIP_BONAIRE:
7150 		buffer[count++] = cpu_to_le32(0x16000012);
7151 		buffer[count++] = cpu_to_le32(0x00000000);
7152 		break;
7153 	case CHIP_KAVERI:
7154 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7155 		buffer[count++] = cpu_to_le32(0x00000000);
7156 		break;
7157 	case CHIP_KABINI:
7158 	case CHIP_MULLINS:
7159 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
7160 		buffer[count++] = cpu_to_le32(0x00000000);
7161 		break;
7162 	case CHIP_HAWAII:
7163 		buffer[count++] = cpu_to_le32(0x3a00161a);
7164 		buffer[count++] = cpu_to_le32(0x0000002e);
7165 		break;
7166 	default:
7167 		buffer[count++] = cpu_to_le32(0x00000000);
7168 		buffer[count++] = cpu_to_le32(0x00000000);
7169 		break;
7170 	}
7171 
7172 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
7173 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
7174 
7175 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
7176 	buffer[count++] = cpu_to_le32(0);
7177 }
7178 
7179 static void cik_init_pg(struct radeon_device *rdev)
7180 {
7181 	if (rdev->pg_flags) {
7182 		cik_enable_sck_slowdown_on_pu(rdev, true);
7183 		cik_enable_sck_slowdown_on_pd(rdev, true);
7184 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7185 			cik_init_gfx_cgpg(rdev);
7186 			cik_enable_cp_pg(rdev, true);
7187 			cik_enable_gds_pg(rdev, true);
7188 		}
7189 		cik_init_ao_cu_mask(rdev);
7190 		cik_update_gfx_pg(rdev, true);
7191 	}
7192 }
7193 
7194 static void cik_fini_pg(struct radeon_device *rdev)
7195 {
7196 	if (rdev->pg_flags) {
7197 		cik_update_gfx_pg(rdev, false);
7198 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
7199 			cik_enable_cp_pg(rdev, false);
7200 			cik_enable_gds_pg(rdev, false);
7201 		}
7202 	}
7203 }
7204 
7205 /*
7206  * Interrupts
7207  * Starting with r6xx, interrupts are handled via a ring buffer.
7208  * Ring buffers are areas of GPU accessible memory that the GPU
7209  * writes interrupt vectors into and the host reads vectors out of.
7210  * There is a rptr (read pointer) that determines where the
7211  * host is currently reading, and a wptr (write pointer)
7212  * which determines where the GPU has written.  When the
7213  * pointers are equal, the ring is idle.  When the GPU
7214  * writes vectors to the ring buffer, it increments the
7215  * wptr.  When there is an interrupt, the host then starts
7216  * fetching commands and processing them until the pointers are
7217  * equal again at which point it updates the rptr.
7218  */
7219 
7220 /**
7221  * cik_enable_interrupts - Enable the interrupt ring buffer
7222  *
7223  * @rdev: radeon_device pointer
7224  *
7225  * Enable the interrupt ring buffer (CIK).
7226  */
7227 static void cik_enable_interrupts(struct radeon_device *rdev)
7228 {
7229 	u32 ih_cntl = RREG32(IH_CNTL);
7230 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7231 
7232 	ih_cntl |= ENABLE_INTR;
7233 	ih_rb_cntl |= IH_RB_ENABLE;
7234 	WREG32(IH_CNTL, ih_cntl);
7235 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7236 	rdev->ih.enabled = true;
7237 }
7238 
7239 /**
7240  * cik_disable_interrupts - Disable the interrupt ring buffer
7241  *
7242  * @rdev: radeon_device pointer
7243  *
7244  * Disable the interrupt ring buffer (CIK).
7245  */
7246 static void cik_disable_interrupts(struct radeon_device *rdev)
7247 {
7248 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
7249 	u32 ih_cntl = RREG32(IH_CNTL);
7250 
7251 	ih_rb_cntl &= ~IH_RB_ENABLE;
7252 	ih_cntl &= ~ENABLE_INTR;
7253 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7254 	WREG32(IH_CNTL, ih_cntl);
7255 	/* set rptr, wptr to 0 */
7256 	WREG32(IH_RB_RPTR, 0);
7257 	WREG32(IH_RB_WPTR, 0);
7258 	rdev->ih.enabled = false;
7259 	rdev->ih.rptr = 0;
7260 }
7261 
7262 /**
7263  * cik_disable_interrupt_state - Disable all interrupt sources
7264  *
7265  * @rdev: radeon_device pointer
7266  *
7267  * Clear all interrupt enable bits used by the driver (CIK).
7268  */
7269 static void cik_disable_interrupt_state(struct radeon_device *rdev)
7270 {
7271 	u32 tmp;
7272 
7273 	/* gfx ring */
7274 	tmp = RREG32(CP_INT_CNTL_RING0) &
7275 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7276 	WREG32(CP_INT_CNTL_RING0, tmp);
7277 	/* sdma */
7278 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7279 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
7280 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7281 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
7282 	/* compute queues */
7283 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
7284 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
7285 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
7286 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
7287 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
7288 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
7289 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
7290 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
7291 	/* grbm */
7292 	WREG32(GRBM_INT_CNTL, 0);
7293 	/* SRBM */
7294 	WREG32(SRBM_INT_CNTL, 0);
7295 	/* vline/vblank, etc. */
7296 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7297 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7298 	if (rdev->num_crtc >= 4) {
7299 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7300 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7301 	}
7302 	if (rdev->num_crtc >= 6) {
7303 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7304 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7305 	}
7306 	/* pflip */
7307 	if (rdev->num_crtc >= 2) {
7308 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
7309 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
7310 	}
7311 	if (rdev->num_crtc >= 4) {
7312 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
7313 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
7314 	}
7315 	if (rdev->num_crtc >= 6) {
7316 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
7317 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
7318 	}
7319 
7320 	/* dac hotplug */
7321 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
7322 
7323 	/* digital hotplug */
7324 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7325 	WREG32(DC_HPD1_INT_CONTROL, tmp);
7326 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7327 	WREG32(DC_HPD2_INT_CONTROL, tmp);
7328 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7329 	WREG32(DC_HPD3_INT_CONTROL, tmp);
7330 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7331 	WREG32(DC_HPD4_INT_CONTROL, tmp);
7332 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7333 	WREG32(DC_HPD5_INT_CONTROL, tmp);
7334 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
7335 	WREG32(DC_HPD6_INT_CONTROL, tmp);
7336 
7337 }
7338 
7339 /**
7340  * cik_irq_init - init and enable the interrupt ring
7341  *
7342  * @rdev: radeon_device pointer
7343  *
7344  * Allocate a ring buffer for the interrupt controller,
7345  * enable the RLC, disable interrupts, enable the IH
7346  * ring buffer and enable it (CIK).
7347  * Called at device load and reume.
7348  * Returns 0 for success, errors for failure.
7349  */
7350 static int cik_irq_init(struct radeon_device *rdev)
7351 {
7352 	int ret = 0;
7353 	int rb_bufsz;
7354 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
7355 
7356 	/* allocate ring */
7357 	ret = r600_ih_ring_alloc(rdev);
7358 	if (ret)
7359 		return ret;
7360 
7361 	/* disable irqs */
7362 	cik_disable_interrupts(rdev);
7363 
7364 	/* init rlc */
7365 	ret = cik_rlc_resume(rdev);
7366 	if (ret) {
7367 		r600_ih_ring_fini(rdev);
7368 		return ret;
7369 	}
7370 
7371 	/* setup interrupt control */
7372 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
7373 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
7374 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
7375 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
7376 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
7377 	 */
7378 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
7379 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
7380 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
7381 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
7382 
7383 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
7384 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
7385 
7386 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
7387 		      IH_WPTR_OVERFLOW_CLEAR |
7388 		      (rb_bufsz << 1));
7389 
7390 	if (rdev->wb.enabled)
7391 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
7392 
7393 	/* set the writeback address whether it's enabled or not */
7394 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
7395 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
7396 
7397 	WREG32(IH_RB_CNTL, ih_rb_cntl);
7398 
7399 	/* set rptr, wptr to 0 */
7400 	WREG32(IH_RB_RPTR, 0);
7401 	WREG32(IH_RB_WPTR, 0);
7402 
7403 	/* Default settings for IH_CNTL (disabled at first) */
7404 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
7405 	/* RPTR_REARM only works if msi's are enabled */
7406 	if (rdev->msi_enabled)
7407 		ih_cntl |= RPTR_REARM;
7408 	WREG32(IH_CNTL, ih_cntl);
7409 
7410 	/* force the active interrupt state to all disabled */
7411 	cik_disable_interrupt_state(rdev);
7412 
7413 	pci_set_master(rdev->pdev);
7414 
7415 	/* enable irqs */
7416 	cik_enable_interrupts(rdev);
7417 
7418 	return ret;
7419 }
7420 
7421 /**
7422  * cik_irq_set - enable/disable interrupt sources
7423  *
7424  * @rdev: radeon_device pointer
7425  *
7426  * Enable interrupt sources on the GPU (vblanks, hpd,
7427  * etc.) (CIK).
7428  * Returns 0 for success, errors for failure.
7429  */
7430 int cik_irq_set(struct radeon_device *rdev)
7431 {
7432 	u32 cp_int_cntl;
7433 	u32 cp_m1p0;
7434 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
7435 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
7436 	u32 grbm_int_cntl = 0;
7437 	u32 dma_cntl, dma_cntl1;
7438 
7439 	if (!rdev->irq.installed) {
7440 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
7441 		return -EINVAL;
7442 	}
7443 	/* don't enable anything if the ih is disabled */
7444 	if (!rdev->ih.enabled) {
7445 		cik_disable_interrupts(rdev);
7446 		/* force the active interrupt state to all disabled */
7447 		cik_disable_interrupt_state(rdev);
7448 		return 0;
7449 	}
7450 
7451 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
7452 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
7453 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
7454 
7455 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7456 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7457 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7458 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7459 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7460 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
7461 
7462 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
7463 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
7464 
7465 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
7466 
7467 	/* enable CP interrupts on all rings */
7468 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
7469 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
7470 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
7471 	}
7472 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
7473 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7474 		DRM_DEBUG("si_irq_set: sw int cp1\n");
7475 		if (ring->me == 1) {
7476 			switch (ring->pipe) {
7477 			case 0:
7478 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7479 				break;
7480 			default:
7481 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7482 				break;
7483 			}
7484 		} else {
7485 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7486 		}
7487 	}
7488 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7489 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7490 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7491 		if (ring->me == 1) {
7492 			switch (ring->pipe) {
7493 			case 0:
7494 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7495 				break;
7496 			default:
7497 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7498 				break;
7499 			}
7500 		} else {
7501 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7502 		}
7503 	}
7504 
7505 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7506 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7507 		dma_cntl |= TRAP_ENABLE;
7508 	}
7509 
7510 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7511 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7512 		dma_cntl1 |= TRAP_ENABLE;
7513 	}
7514 
7515 	if (rdev->irq.crtc_vblank_int[0] ||
7516 	    atomic_read(&rdev->irq.pflip[0])) {
7517 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7518 		crtc1 |= VBLANK_INTERRUPT_MASK;
7519 	}
7520 	if (rdev->irq.crtc_vblank_int[1] ||
7521 	    atomic_read(&rdev->irq.pflip[1])) {
7522 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7523 		crtc2 |= VBLANK_INTERRUPT_MASK;
7524 	}
7525 	if (rdev->irq.crtc_vblank_int[2] ||
7526 	    atomic_read(&rdev->irq.pflip[2])) {
7527 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7528 		crtc3 |= VBLANK_INTERRUPT_MASK;
7529 	}
7530 	if (rdev->irq.crtc_vblank_int[3] ||
7531 	    atomic_read(&rdev->irq.pflip[3])) {
7532 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7533 		crtc4 |= VBLANK_INTERRUPT_MASK;
7534 	}
7535 	if (rdev->irq.crtc_vblank_int[4] ||
7536 	    atomic_read(&rdev->irq.pflip[4])) {
7537 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7538 		crtc5 |= VBLANK_INTERRUPT_MASK;
7539 	}
7540 	if (rdev->irq.crtc_vblank_int[5] ||
7541 	    atomic_read(&rdev->irq.pflip[5])) {
7542 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7543 		crtc6 |= VBLANK_INTERRUPT_MASK;
7544 	}
7545 	if (rdev->irq.hpd[0]) {
7546 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7547 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7548 	}
7549 	if (rdev->irq.hpd[1]) {
7550 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7551 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7552 	}
7553 	if (rdev->irq.hpd[2]) {
7554 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7555 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7556 	}
7557 	if (rdev->irq.hpd[3]) {
7558 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7559 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7560 	}
7561 	if (rdev->irq.hpd[4]) {
7562 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7563 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7564 	}
7565 	if (rdev->irq.hpd[5]) {
7566 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7567 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
7568 	}
7569 
7570 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7571 
7572 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7573 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7574 
7575 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7576 
7577 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7578 
7579 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7580 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7581 	if (rdev->num_crtc >= 4) {
7582 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7583 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7584 	}
7585 	if (rdev->num_crtc >= 6) {
7586 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7587 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7588 	}
7589 
7590 	if (rdev->num_crtc >= 2) {
7591 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7592 		       GRPH_PFLIP_INT_MASK);
7593 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7594 		       GRPH_PFLIP_INT_MASK);
7595 	}
7596 	if (rdev->num_crtc >= 4) {
7597 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7598 		       GRPH_PFLIP_INT_MASK);
7599 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7600 		       GRPH_PFLIP_INT_MASK);
7601 	}
7602 	if (rdev->num_crtc >= 6) {
7603 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7604 		       GRPH_PFLIP_INT_MASK);
7605 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7606 		       GRPH_PFLIP_INT_MASK);
7607 	}
7608 
7609 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7610 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7611 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7612 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7613 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7614 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7615 
7616 	/* posting read */
7617 	RREG32(SRBM_STATUS);
7618 
7619 	return 0;
7620 }
7621 
7622 /**
7623  * cik_irq_ack - ack interrupt sources
7624  *
7625  * @rdev: radeon_device pointer
7626  *
7627  * Ack interrupt sources on the GPU (vblanks, hpd,
7628  * etc.) (CIK).  Certain interrupts sources are sw
7629  * generated and do not require an explicit ack.
7630  */
7631 static inline void cik_irq_ack(struct radeon_device *rdev)
7632 {
7633 	u32 tmp;
7634 
7635 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7636 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7637 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7638 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7639 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7640 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7641 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7642 
7643 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7644 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7645 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7646 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7647 	if (rdev->num_crtc >= 4) {
7648 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7649 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7650 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7651 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7652 	}
7653 	if (rdev->num_crtc >= 6) {
7654 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7655 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7656 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7657 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7658 	}
7659 
7660 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7661 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7662 		       GRPH_PFLIP_INT_CLEAR);
7663 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7664 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7665 		       GRPH_PFLIP_INT_CLEAR);
7666 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7667 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7668 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7669 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7670 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7671 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7672 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7673 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7674 
7675 	if (rdev->num_crtc >= 4) {
7676 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7677 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7678 			       GRPH_PFLIP_INT_CLEAR);
7679 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7680 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7681 			       GRPH_PFLIP_INT_CLEAR);
7682 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7683 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7684 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7685 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7686 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7687 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7688 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7689 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7690 	}
7691 
7692 	if (rdev->num_crtc >= 6) {
7693 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7694 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7695 			       GRPH_PFLIP_INT_CLEAR);
7696 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7697 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7698 			       GRPH_PFLIP_INT_CLEAR);
7699 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7700 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7701 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7702 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7703 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7704 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7705 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7706 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7707 	}
7708 
7709 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7710 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7711 		tmp |= DC_HPDx_INT_ACK;
7712 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7713 	}
7714 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7715 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7716 		tmp |= DC_HPDx_INT_ACK;
7717 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7718 	}
7719 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7720 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7721 		tmp |= DC_HPDx_INT_ACK;
7722 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7723 	}
7724 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7725 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7726 		tmp |= DC_HPDx_INT_ACK;
7727 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7728 	}
7729 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7730 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7731 		tmp |= DC_HPDx_INT_ACK;
7732 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7733 	}
7734 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7735 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7736 		tmp |= DC_HPDx_INT_ACK;
7737 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7738 	}
7739 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
7740 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7741 		tmp |= DC_HPDx_RX_INT_ACK;
7742 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7743 	}
7744 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
7745 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7746 		tmp |= DC_HPDx_RX_INT_ACK;
7747 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7748 	}
7749 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
7750 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7751 		tmp |= DC_HPDx_RX_INT_ACK;
7752 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7753 	}
7754 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
7755 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7756 		tmp |= DC_HPDx_RX_INT_ACK;
7757 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7758 	}
7759 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
7760 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7761 		tmp |= DC_HPDx_RX_INT_ACK;
7762 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7763 	}
7764 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
7765 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7766 		tmp |= DC_HPDx_RX_INT_ACK;
7767 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7768 	}
7769 }
7770 
7771 /**
7772  * cik_irq_disable - disable interrupts
7773  *
7774  * @rdev: radeon_device pointer
7775  *
7776  * Disable interrupts on the hw (CIK).
7777  */
7778 static void cik_irq_disable(struct radeon_device *rdev)
7779 {
7780 	cik_disable_interrupts(rdev);
7781 	/* Wait and acknowledge irq */
7782 	mdelay(1);
7783 	cik_irq_ack(rdev);
7784 	cik_disable_interrupt_state(rdev);
7785 }
7786 
7787 /**
7788  * cik_irq_disable - disable interrupts for suspend
7789  *
7790  * @rdev: radeon_device pointer
7791  *
7792  * Disable interrupts and stop the RLC (CIK).
7793  * Used for suspend.
7794  */
7795 static void cik_irq_suspend(struct radeon_device *rdev)
7796 {
7797 	cik_irq_disable(rdev);
7798 	cik_rlc_stop(rdev);
7799 }
7800 
7801 /**
7802  * cik_irq_fini - tear down interrupt support
7803  *
7804  * @rdev: radeon_device pointer
7805  *
7806  * Disable interrupts on the hw and free the IH ring
7807  * buffer (CIK).
7808  * Used for driver unload.
7809  */
7810 static void cik_irq_fini(struct radeon_device *rdev)
7811 {
7812 	cik_irq_suspend(rdev);
7813 	r600_ih_ring_fini(rdev);
7814 }
7815 
7816 /**
7817  * cik_get_ih_wptr - get the IH ring buffer wptr
7818  *
7819  * @rdev: radeon_device pointer
7820  *
7821  * Get the IH ring buffer wptr from either the register
7822  * or the writeback memory buffer (CIK).  Also check for
7823  * ring buffer overflow and deal with it.
7824  * Used by cik_irq_process().
7825  * Returns the value of the wptr.
7826  */
7827 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7828 {
7829 	u32 wptr, tmp;
7830 
7831 	if (rdev->wb.enabled)
7832 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7833 	else
7834 		wptr = RREG32(IH_RB_WPTR);
7835 
7836 	if (wptr & RB_OVERFLOW) {
7837 		wptr &= ~RB_OVERFLOW;
7838 		/* When a ring buffer overflow happen start parsing interrupt
7839 		 * from the last not overwritten vector (wptr + 16). Hopefully
7840 		 * this should allow us to catchup.
7841 		 */
7842 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
7843 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
7844 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7845 		tmp = RREG32(IH_RB_CNTL);
7846 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7847 		WREG32(IH_RB_CNTL, tmp);
7848 	}
7849 	return (wptr & rdev->ih.ptr_mask);
7850 }
7851 
7852 /*        CIK IV Ring
7853  * Each IV ring entry is 128 bits:
7854  * [7:0]    - interrupt source id
7855  * [31:8]   - reserved
7856  * [59:32]  - interrupt source data
7857  * [63:60]  - reserved
7858  * [71:64]  - RINGID
7859  *            CP:
7860  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7861  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7862  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7863  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7864  *            PIPE_ID - ME0 0=3D
7865  *                    - ME1&2 compute dispatcher (4 pipes each)
7866  *            SDMA:
7867  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7868  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7869  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7870  * [79:72]  - VMID
7871  * [95:80]  - PASID
7872  * [127:96] - reserved
7873  */
7874 /**
7875  * cik_irq_process - interrupt handler
7876  *
7877  * @rdev: radeon_device pointer
7878  *
7879  * Interrupt hander (CIK).  Walk the IH ring,
7880  * ack interrupts and schedule work to handle
7881  * interrupt events.
7882  * Returns irq process return code.
7883  */
7884 int cik_irq_process(struct radeon_device *rdev)
7885 {
7886 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7887 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7888 	u32 wptr;
7889 	u32 rptr;
7890 	u32 src_id, src_data, ring_id;
7891 	u8 me_id, pipe_id, queue_id;
7892 	u32 ring_index;
7893 	bool queue_hotplug = false;
7894 	bool queue_dp = false;
7895 	bool queue_reset = false;
7896 	u32 addr, status, mc_client;
7897 	bool queue_thermal = false;
7898 
7899 	if (!rdev->ih.enabled || rdev->shutdown)
7900 		return IRQ_NONE;
7901 
7902 	wptr = cik_get_ih_wptr(rdev);
7903 
7904 restart_ih:
7905 	/* is somebody else already processing irqs? */
7906 	if (atomic_xchg(&rdev->ih.lock, 1))
7907 		return IRQ_NONE;
7908 
7909 	rptr = rdev->ih.rptr;
7910 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7911 
7912 	/* Order reading of wptr vs. reading of IH ring data */
7913 	rmb();
7914 
7915 	/* display interrupts */
7916 	cik_irq_ack(rdev);
7917 
7918 	while (rptr != wptr) {
7919 		/* wptr/rptr are in bytes! */
7920 		ring_index = rptr / 4;
7921 
7922 		radeon_kfd_interrupt(rdev,
7923 				(const void *) &rdev->ih.ring[ring_index]);
7924 
7925 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7926 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7927 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7928 
7929 		switch (src_id) {
7930 		case 1: /* D1 vblank/vline */
7931 			switch (src_data) {
7932 			case 0: /* D1 vblank */
7933 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7934 					if (rdev->irq.crtc_vblank_int[0]) {
7935 						drm_handle_vblank(rdev->ddev, 0);
7936 						rdev->pm.vblank_sync = true;
7937 						wake_up(&rdev->irq.vblank_queue);
7938 					}
7939 					if (atomic_read(&rdev->irq.pflip[0]))
7940 						radeon_crtc_handle_vblank(rdev, 0);
7941 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7942 					DRM_DEBUG("IH: D1 vblank\n");
7943 				}
7944 				break;
7945 			case 1: /* D1 vline */
7946 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7947 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7948 					DRM_DEBUG("IH: D1 vline\n");
7949 				}
7950 				break;
7951 			default:
7952 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7953 				break;
7954 			}
7955 			break;
7956 		case 2: /* D2 vblank/vline */
7957 			switch (src_data) {
7958 			case 0: /* D2 vblank */
7959 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7960 					if (rdev->irq.crtc_vblank_int[1]) {
7961 						drm_handle_vblank(rdev->ddev, 1);
7962 						rdev->pm.vblank_sync = true;
7963 						wake_up(&rdev->irq.vblank_queue);
7964 					}
7965 					if (atomic_read(&rdev->irq.pflip[1]))
7966 						radeon_crtc_handle_vblank(rdev, 1);
7967 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7968 					DRM_DEBUG("IH: D2 vblank\n");
7969 				}
7970 				break;
7971 			case 1: /* D2 vline */
7972 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7973 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7974 					DRM_DEBUG("IH: D2 vline\n");
7975 				}
7976 				break;
7977 			default:
7978 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7979 				break;
7980 			}
7981 			break;
7982 		case 3: /* D3 vblank/vline */
7983 			switch (src_data) {
7984 			case 0: /* D3 vblank */
7985 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7986 					if (rdev->irq.crtc_vblank_int[2]) {
7987 						drm_handle_vblank(rdev->ddev, 2);
7988 						rdev->pm.vblank_sync = true;
7989 						wake_up(&rdev->irq.vblank_queue);
7990 					}
7991 					if (atomic_read(&rdev->irq.pflip[2]))
7992 						radeon_crtc_handle_vblank(rdev, 2);
7993 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7994 					DRM_DEBUG("IH: D3 vblank\n");
7995 				}
7996 				break;
7997 			case 1: /* D3 vline */
7998 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7999 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
8000 					DRM_DEBUG("IH: D3 vline\n");
8001 				}
8002 				break;
8003 			default:
8004 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8005 				break;
8006 			}
8007 			break;
8008 		case 4: /* D4 vblank/vline */
8009 			switch (src_data) {
8010 			case 0: /* D4 vblank */
8011 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
8012 					if (rdev->irq.crtc_vblank_int[3]) {
8013 						drm_handle_vblank(rdev->ddev, 3);
8014 						rdev->pm.vblank_sync = true;
8015 						wake_up(&rdev->irq.vblank_queue);
8016 					}
8017 					if (atomic_read(&rdev->irq.pflip[3]))
8018 						radeon_crtc_handle_vblank(rdev, 3);
8019 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
8020 					DRM_DEBUG("IH: D4 vblank\n");
8021 				}
8022 				break;
8023 			case 1: /* D4 vline */
8024 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
8025 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
8026 					DRM_DEBUG("IH: D4 vline\n");
8027 				}
8028 				break;
8029 			default:
8030 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8031 				break;
8032 			}
8033 			break;
8034 		case 5: /* D5 vblank/vline */
8035 			switch (src_data) {
8036 			case 0: /* D5 vblank */
8037 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
8038 					if (rdev->irq.crtc_vblank_int[4]) {
8039 						drm_handle_vblank(rdev->ddev, 4);
8040 						rdev->pm.vblank_sync = true;
8041 						wake_up(&rdev->irq.vblank_queue);
8042 					}
8043 					if (atomic_read(&rdev->irq.pflip[4]))
8044 						radeon_crtc_handle_vblank(rdev, 4);
8045 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
8046 					DRM_DEBUG("IH: D5 vblank\n");
8047 				}
8048 				break;
8049 			case 1: /* D5 vline */
8050 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
8051 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
8052 					DRM_DEBUG("IH: D5 vline\n");
8053 				}
8054 				break;
8055 			default:
8056 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8057 				break;
8058 			}
8059 			break;
8060 		case 6: /* D6 vblank/vline */
8061 			switch (src_data) {
8062 			case 0: /* D6 vblank */
8063 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
8064 					if (rdev->irq.crtc_vblank_int[5]) {
8065 						drm_handle_vblank(rdev->ddev, 5);
8066 						rdev->pm.vblank_sync = true;
8067 						wake_up(&rdev->irq.vblank_queue);
8068 					}
8069 					if (atomic_read(&rdev->irq.pflip[5]))
8070 						radeon_crtc_handle_vblank(rdev, 5);
8071 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
8072 					DRM_DEBUG("IH: D6 vblank\n");
8073 				}
8074 				break;
8075 			case 1: /* D6 vline */
8076 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
8077 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
8078 					DRM_DEBUG("IH: D6 vline\n");
8079 				}
8080 				break;
8081 			default:
8082 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8083 				break;
8084 			}
8085 			break;
8086 		case 8: /* D1 page flip */
8087 		case 10: /* D2 page flip */
8088 		case 12: /* D3 page flip */
8089 		case 14: /* D4 page flip */
8090 		case 16: /* D5 page flip */
8091 		case 18: /* D6 page flip */
8092 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
8093 			if (radeon_use_pflipirq > 0)
8094 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
8095 			break;
8096 		case 42: /* HPD hotplug */
8097 			switch (src_data) {
8098 			case 0:
8099 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
8100 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
8101 					queue_hotplug = true;
8102 					DRM_DEBUG("IH: HPD1\n");
8103 				}
8104 				break;
8105 			case 1:
8106 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
8107 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
8108 					queue_hotplug = true;
8109 					DRM_DEBUG("IH: HPD2\n");
8110 				}
8111 				break;
8112 			case 2:
8113 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
8114 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
8115 					queue_hotplug = true;
8116 					DRM_DEBUG("IH: HPD3\n");
8117 				}
8118 				break;
8119 			case 3:
8120 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
8121 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
8122 					queue_hotplug = true;
8123 					DRM_DEBUG("IH: HPD4\n");
8124 				}
8125 				break;
8126 			case 4:
8127 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
8128 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
8129 					queue_hotplug = true;
8130 					DRM_DEBUG("IH: HPD5\n");
8131 				}
8132 				break;
8133 			case 5:
8134 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
8135 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
8136 					queue_hotplug = true;
8137 					DRM_DEBUG("IH: HPD6\n");
8138 				}
8139 				break;
8140 			case 6:
8141 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_RX_INTERRUPT) {
8142 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_RX_INTERRUPT;
8143 					queue_dp = true;
8144 					DRM_DEBUG("IH: HPD_RX 1\n");
8145 				}
8146 				break;
8147 			case 7:
8148 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
8149 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
8150 					queue_dp = true;
8151 					DRM_DEBUG("IH: HPD_RX 2\n");
8152 				}
8153 				break;
8154 			case 8:
8155 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
8156 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
8157 					queue_dp = true;
8158 					DRM_DEBUG("IH: HPD_RX 3\n");
8159 				}
8160 				break;
8161 			case 9:
8162 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
8163 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
8164 					queue_dp = true;
8165 					DRM_DEBUG("IH: HPD_RX 4\n");
8166 				}
8167 				break;
8168 			case 10:
8169 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
8170 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
8171 					queue_dp = true;
8172 					DRM_DEBUG("IH: HPD_RX 5\n");
8173 				}
8174 				break;
8175 			case 11:
8176 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
8177 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
8178 					queue_dp = true;
8179 					DRM_DEBUG("IH: HPD_RX 6\n");
8180 				}
8181 				break;
8182 			default:
8183 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8184 				break;
8185 			}
8186 			break;
8187 		case 96:
8188 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
8189 			WREG32(SRBM_INT_ACK, 0x1);
8190 			break;
8191 		case 124: /* UVD */
8192 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
8193 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
8194 			break;
8195 		case 146:
8196 		case 147:
8197 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
8198 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
8199 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
8200 			/* reset addr and status */
8201 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
8202 			if (addr == 0x0 && status == 0x0)
8203 				break;
8204 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
8205 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
8206 				addr);
8207 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
8208 				status);
8209 			cik_vm_decode_fault(rdev, status, addr, mc_client);
8210 			break;
8211 		case 167: /* VCE */
8212 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
8213 			switch (src_data) {
8214 			case 0:
8215 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
8216 				break;
8217 			case 1:
8218 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
8219 				break;
8220 			default:
8221 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
8222 				break;
8223 			}
8224 			break;
8225 		case 176: /* GFX RB CP_INT */
8226 		case 177: /* GFX IB CP_INT */
8227 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8228 			break;
8229 		case 181: /* CP EOP event */
8230 			DRM_DEBUG("IH: CP EOP\n");
8231 			/* XXX check the bitfield order! */
8232 			me_id = (ring_id & 0x60) >> 5;
8233 			pipe_id = (ring_id & 0x18) >> 3;
8234 			queue_id = (ring_id & 0x7) >> 0;
8235 			switch (me_id) {
8236 			case 0:
8237 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
8238 				break;
8239 			case 1:
8240 			case 2:
8241 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
8242 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8243 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
8244 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8245 				break;
8246 			}
8247 			break;
8248 		case 184: /* CP Privileged reg access */
8249 			DRM_ERROR("Illegal register access in command stream\n");
8250 			/* XXX check the bitfield order! */
8251 			me_id = (ring_id & 0x60) >> 5;
8252 			pipe_id = (ring_id & 0x18) >> 3;
8253 			queue_id = (ring_id & 0x7) >> 0;
8254 			switch (me_id) {
8255 			case 0:
8256 				/* This results in a full GPU reset, but all we need to do is soft
8257 				 * reset the CP for gfx
8258 				 */
8259 				queue_reset = true;
8260 				break;
8261 			case 1:
8262 				/* XXX compute */
8263 				queue_reset = true;
8264 				break;
8265 			case 2:
8266 				/* XXX compute */
8267 				queue_reset = true;
8268 				break;
8269 			}
8270 			break;
8271 		case 185: /* CP Privileged inst */
8272 			DRM_ERROR("Illegal instruction in command stream\n");
8273 			/* XXX check the bitfield order! */
8274 			me_id = (ring_id & 0x60) >> 5;
8275 			pipe_id = (ring_id & 0x18) >> 3;
8276 			queue_id = (ring_id & 0x7) >> 0;
8277 			switch (me_id) {
8278 			case 0:
8279 				/* This results in a full GPU reset, but all we need to do is soft
8280 				 * reset the CP for gfx
8281 				 */
8282 				queue_reset = true;
8283 				break;
8284 			case 1:
8285 				/* XXX compute */
8286 				queue_reset = true;
8287 				break;
8288 			case 2:
8289 				/* XXX compute */
8290 				queue_reset = true;
8291 				break;
8292 			}
8293 			break;
8294 		case 224: /* SDMA trap event */
8295 			/* XXX check the bitfield order! */
8296 			me_id = (ring_id & 0x3) >> 0;
8297 			queue_id = (ring_id & 0xc) >> 2;
8298 			DRM_DEBUG("IH: SDMA trap\n");
8299 			switch (me_id) {
8300 			case 0:
8301 				switch (queue_id) {
8302 				case 0:
8303 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
8304 					break;
8305 				case 1:
8306 					/* XXX compute */
8307 					break;
8308 				case 2:
8309 					/* XXX compute */
8310 					break;
8311 				}
8312 				break;
8313 			case 1:
8314 				switch (queue_id) {
8315 				case 0:
8316 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8317 					break;
8318 				case 1:
8319 					/* XXX compute */
8320 					break;
8321 				case 2:
8322 					/* XXX compute */
8323 					break;
8324 				}
8325 				break;
8326 			}
8327 			break;
8328 		case 230: /* thermal low to high */
8329 			DRM_DEBUG("IH: thermal low to high\n");
8330 			rdev->pm.dpm.thermal.high_to_low = false;
8331 			queue_thermal = true;
8332 			break;
8333 		case 231: /* thermal high to low */
8334 			DRM_DEBUG("IH: thermal high to low\n");
8335 			rdev->pm.dpm.thermal.high_to_low = true;
8336 			queue_thermal = true;
8337 			break;
8338 		case 233: /* GUI IDLE */
8339 			DRM_DEBUG("IH: GUI idle\n");
8340 			break;
8341 		case 241: /* SDMA Privileged inst */
8342 		case 247: /* SDMA Privileged inst */
8343 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
8344 			/* XXX check the bitfield order! */
8345 			me_id = (ring_id & 0x3) >> 0;
8346 			queue_id = (ring_id & 0xc) >> 2;
8347 			switch (me_id) {
8348 			case 0:
8349 				switch (queue_id) {
8350 				case 0:
8351 					queue_reset = true;
8352 					break;
8353 				case 1:
8354 					/* XXX compute */
8355 					queue_reset = true;
8356 					break;
8357 				case 2:
8358 					/* XXX compute */
8359 					queue_reset = true;
8360 					break;
8361 				}
8362 				break;
8363 			case 1:
8364 				switch (queue_id) {
8365 				case 0:
8366 					queue_reset = true;
8367 					break;
8368 				case 1:
8369 					/* XXX compute */
8370 					queue_reset = true;
8371 					break;
8372 				case 2:
8373 					/* XXX compute */
8374 					queue_reset = true;
8375 					break;
8376 				}
8377 				break;
8378 			}
8379 			break;
8380 		default:
8381 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
8382 			break;
8383 		}
8384 
8385 		/* wptr/rptr are in bytes! */
8386 		rptr += 16;
8387 		rptr &= rdev->ih.ptr_mask;
8388 		WREG32(IH_RB_RPTR, rptr);
8389 	}
8390 	if (queue_dp)
8391 		schedule_work(&rdev->dp_work);
8392 	if (queue_hotplug)
8393 		schedule_work(&rdev->hotplug_work);
8394 	if (queue_reset) {
8395 		rdev->needs_reset = true;
8396 		wake_up_all(&rdev->fence_queue);
8397 	}
8398 	if (queue_thermal)
8399 		schedule_work(&rdev->pm.dpm.thermal.work);
8400 	rdev->ih.rptr = rptr;
8401 	atomic_set(&rdev->ih.lock, 0);
8402 
8403 	/* make sure wptr hasn't changed while processing */
8404 	wptr = cik_get_ih_wptr(rdev);
8405 	if (wptr != rptr)
8406 		goto restart_ih;
8407 
8408 	return IRQ_HANDLED;
8409 }
8410 
8411 /*
8412  * startup/shutdown callbacks
8413  */
8414 /**
8415  * cik_startup - program the asic to a functional state
8416  *
8417  * @rdev: radeon_device pointer
8418  *
8419  * Programs the asic to a functional state (CIK).
8420  * Called by cik_init() and cik_resume().
8421  * Returns 0 for success, error for failure.
8422  */
8423 static int cik_startup(struct radeon_device *rdev)
8424 {
8425 	struct radeon_ring *ring;
8426 	u32 nop;
8427 	int r;
8428 
8429 	/* enable pcie gen2/3 link */
8430 	cik_pcie_gen3_enable(rdev);
8431 	/* enable aspm */
8432 	cik_program_aspm(rdev);
8433 
8434 	/* scratch needs to be initialized before MC */
8435 	r = r600_vram_scratch_init(rdev);
8436 	if (r)
8437 		return r;
8438 
8439 	cik_mc_program(rdev);
8440 
8441 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
8442 		r = ci_mc_load_microcode(rdev);
8443 		if (r) {
8444 			DRM_ERROR("Failed to load MC firmware!\n");
8445 			return r;
8446 		}
8447 	}
8448 
8449 	r = cik_pcie_gart_enable(rdev);
8450 	if (r)
8451 		return r;
8452 	cik_gpu_init(rdev);
8453 
8454 	/* allocate rlc buffers */
8455 	if (rdev->flags & RADEON_IS_IGP) {
8456 		if (rdev->family == CHIP_KAVERI) {
8457 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
8458 			rdev->rlc.reg_list_size =
8459 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
8460 		} else {
8461 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
8462 			rdev->rlc.reg_list_size =
8463 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
8464 		}
8465 	}
8466 	rdev->rlc.cs_data = ci_cs_data;
8467 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
8468 	r = sumo_rlc_init(rdev);
8469 	if (r) {
8470 		DRM_ERROR("Failed to init rlc BOs!\n");
8471 		return r;
8472 	}
8473 
8474 	/* allocate wb buffer */
8475 	r = radeon_wb_init(rdev);
8476 	if (r)
8477 		return r;
8478 
8479 	/* allocate mec buffers */
8480 	r = cik_mec_init(rdev);
8481 	if (r) {
8482 		DRM_ERROR("Failed to init MEC BOs!\n");
8483 		return r;
8484 	}
8485 
8486 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
8487 	if (r) {
8488 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8489 		return r;
8490 	}
8491 
8492 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
8493 	if (r) {
8494 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8495 		return r;
8496 	}
8497 
8498 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
8499 	if (r) {
8500 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
8501 		return r;
8502 	}
8503 
8504 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
8505 	if (r) {
8506 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8507 		return r;
8508 	}
8509 
8510 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
8511 	if (r) {
8512 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
8513 		return r;
8514 	}
8515 
8516 	r = radeon_uvd_resume(rdev);
8517 	if (!r) {
8518 		r = uvd_v4_2_resume(rdev);
8519 		if (!r) {
8520 			r = radeon_fence_driver_start_ring(rdev,
8521 							   R600_RING_TYPE_UVD_INDEX);
8522 			if (r)
8523 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
8524 		}
8525 	}
8526 	if (r)
8527 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8528 
8529 	r = radeon_vce_resume(rdev);
8530 	if (!r) {
8531 		r = vce_v2_0_resume(rdev);
8532 		if (!r)
8533 			r = radeon_fence_driver_start_ring(rdev,
8534 							   TN_RING_TYPE_VCE1_INDEX);
8535 		if (!r)
8536 			r = radeon_fence_driver_start_ring(rdev,
8537 							   TN_RING_TYPE_VCE2_INDEX);
8538 	}
8539 	if (r) {
8540 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8541 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8542 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8543 	}
8544 
8545 	/* Enable IRQ */
8546 	if (!rdev->irq.installed) {
8547 		r = radeon_irq_kms_init(rdev);
8548 		if (r)
8549 			return r;
8550 	}
8551 
8552 	r = cik_irq_init(rdev);
8553 	if (r) {
8554 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8555 		radeon_irq_kms_fini(rdev);
8556 		return r;
8557 	}
8558 	cik_irq_set(rdev);
8559 
8560 	if (rdev->family == CHIP_HAWAII) {
8561 		if (rdev->new_fw)
8562 			nop = PACKET3(PACKET3_NOP, 0x3FFF);
8563 		else
8564 			nop = RADEON_CP_PACKET2;
8565 	} else {
8566 		nop = PACKET3(PACKET3_NOP, 0x3FFF);
8567 	}
8568 
8569 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8570 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8571 			     nop);
8572 	if (r)
8573 		return r;
8574 
8575 	/* set up the compute queues */
8576 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8577 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8578 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8579 			     nop);
8580 	if (r)
8581 		return r;
8582 	ring->me = 1; /* first MEC */
8583 	ring->pipe = 0; /* first pipe */
8584 	ring->queue = 0; /* first queue */
8585 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8586 
8587 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8588 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8589 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8590 			     nop);
8591 	if (r)
8592 		return r;
8593 	/* dGPU only have 1 MEC */
8594 	ring->me = 1; /* first MEC */
8595 	ring->pipe = 0; /* first pipe */
8596 	ring->queue = 1; /* second queue */
8597 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8598 
8599 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8600 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8601 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8602 	if (r)
8603 		return r;
8604 
8605 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8606 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8607 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8608 	if (r)
8609 		return r;
8610 
8611 	r = cik_cp_resume(rdev);
8612 	if (r)
8613 		return r;
8614 
8615 	r = cik_sdma_resume(rdev);
8616 	if (r)
8617 		return r;
8618 
8619 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8620 	if (ring->ring_size) {
8621 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8622 				     RADEON_CP_PACKET2);
8623 		if (!r)
8624 			r = uvd_v1_0_init(rdev);
8625 		if (r)
8626 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8627 	}
8628 
8629 	r = -ENOENT;
8630 
8631 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8632 	if (ring->ring_size)
8633 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8634 				     VCE_CMD_NO_OP);
8635 
8636 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8637 	if (ring->ring_size)
8638 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8639 				     VCE_CMD_NO_OP);
8640 
8641 	if (!r)
8642 		r = vce_v1_0_init(rdev);
8643 	else if (r != -ENOENT)
8644 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8645 
8646 	r = radeon_ib_pool_init(rdev);
8647 	if (r) {
8648 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8649 		return r;
8650 	}
8651 
8652 	r = radeon_vm_manager_init(rdev);
8653 	if (r) {
8654 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8655 		return r;
8656 	}
8657 
8658 	r = radeon_audio_init(rdev);
8659 	if (r)
8660 		return r;
8661 
8662 	r = radeon_kfd_resume(rdev);
8663 	if (r)
8664 		return r;
8665 
8666 	return 0;
8667 }
8668 
8669 /**
8670  * cik_resume - resume the asic to a functional state
8671  *
8672  * @rdev: radeon_device pointer
8673  *
8674  * Programs the asic to a functional state (CIK).
8675  * Called at resume.
8676  * Returns 0 for success, error for failure.
8677  */
8678 int cik_resume(struct radeon_device *rdev)
8679 {
8680 	int r;
8681 
8682 	/* post card */
8683 	atom_asic_init(rdev->mode_info.atom_context);
8684 
8685 	/* init golden registers */
8686 	cik_init_golden_registers(rdev);
8687 
8688 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8689 		radeon_pm_resume(rdev);
8690 
8691 	rdev->accel_working = true;
8692 	r = cik_startup(rdev);
8693 	if (r) {
8694 		DRM_ERROR("cik startup failed on resume\n");
8695 		rdev->accel_working = false;
8696 		return r;
8697 	}
8698 
8699 	return r;
8700 
8701 }
8702 
8703 /**
8704  * cik_suspend - suspend the asic
8705  *
8706  * @rdev: radeon_device pointer
8707  *
8708  * Bring the chip into a state suitable for suspend (CIK).
8709  * Called at suspend.
8710  * Returns 0 for success.
8711  */
8712 int cik_suspend(struct radeon_device *rdev)
8713 {
8714 	radeon_kfd_suspend(rdev);
8715 	radeon_pm_suspend(rdev);
8716 	radeon_audio_fini(rdev);
8717 	radeon_vm_manager_fini(rdev);
8718 	cik_cp_enable(rdev, false);
8719 	cik_sdma_enable(rdev, false);
8720 	uvd_v1_0_fini(rdev);
8721 	radeon_uvd_suspend(rdev);
8722 	radeon_vce_suspend(rdev);
8723 	cik_fini_pg(rdev);
8724 	cik_fini_cg(rdev);
8725 	cik_irq_suspend(rdev);
8726 	radeon_wb_disable(rdev);
8727 	cik_pcie_gart_disable(rdev);
8728 	return 0;
8729 }
8730 
8731 /* Plan is to move initialization in that function and use
8732  * helper function so that radeon_device_init pretty much
8733  * do nothing more than calling asic specific function. This
8734  * should also allow to remove a bunch of callback function
8735  * like vram_info.
8736  */
8737 /**
8738  * cik_init - asic specific driver and hw init
8739  *
8740  * @rdev: radeon_device pointer
8741  *
8742  * Setup asic specific driver variables and program the hw
8743  * to a functional state (CIK).
8744  * Called at driver startup.
8745  * Returns 0 for success, errors for failure.
8746  */
8747 int cik_init(struct radeon_device *rdev)
8748 {
8749 	struct radeon_ring *ring;
8750 	int r;
8751 
8752 	/* Read BIOS */
8753 	if (!radeon_get_bios(rdev)) {
8754 		if (ASIC_IS_AVIVO(rdev))
8755 			return -EINVAL;
8756 	}
8757 	/* Must be an ATOMBIOS */
8758 	if (!rdev->is_atom_bios) {
8759 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8760 		return -EINVAL;
8761 	}
8762 	r = radeon_atombios_init(rdev);
8763 	if (r)
8764 		return r;
8765 
8766 	/* Post card if necessary */
8767 	if (!radeon_card_posted(rdev)) {
8768 		if (!rdev->bios) {
8769 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8770 			return -EINVAL;
8771 		}
8772 		DRM_INFO("GPU not posted. posting now...\n");
8773 		atom_asic_init(rdev->mode_info.atom_context);
8774 	}
8775 	/* init golden registers */
8776 	cik_init_golden_registers(rdev);
8777 	/* Initialize scratch registers */
8778 	cik_scratch_init(rdev);
8779 	/* Initialize surface registers */
8780 	radeon_surface_init(rdev);
8781 	/* Initialize clocks */
8782 	radeon_get_clock_info(rdev->ddev);
8783 
8784 	/* Fence driver */
8785 	r = radeon_fence_driver_init(rdev);
8786 	if (r)
8787 		return r;
8788 
8789 	/* initialize memory controller */
8790 	r = cik_mc_init(rdev);
8791 	if (r)
8792 		return r;
8793 	/* Memory manager */
8794 	r = radeon_bo_init(rdev);
8795 	if (r)
8796 		return r;
8797 
8798 	if (rdev->flags & RADEON_IS_IGP) {
8799 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8800 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8801 			r = cik_init_microcode(rdev);
8802 			if (r) {
8803 				DRM_ERROR("Failed to load firmware!\n");
8804 				return r;
8805 			}
8806 		}
8807 	} else {
8808 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8809 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8810 		    !rdev->mc_fw) {
8811 			r = cik_init_microcode(rdev);
8812 			if (r) {
8813 				DRM_ERROR("Failed to load firmware!\n");
8814 				return r;
8815 			}
8816 		}
8817 	}
8818 
8819 	/* Initialize power management */
8820 	radeon_pm_init(rdev);
8821 
8822 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8823 	ring->ring_obj = NULL;
8824 	r600_ring_init(rdev, ring, 1024 * 1024);
8825 
8826 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8827 	ring->ring_obj = NULL;
8828 	r600_ring_init(rdev, ring, 1024 * 1024);
8829 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8830 	if (r)
8831 		return r;
8832 
8833 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8834 	ring->ring_obj = NULL;
8835 	r600_ring_init(rdev, ring, 1024 * 1024);
8836 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8837 	if (r)
8838 		return r;
8839 
8840 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8841 	ring->ring_obj = NULL;
8842 	r600_ring_init(rdev, ring, 256 * 1024);
8843 
8844 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8845 	ring->ring_obj = NULL;
8846 	r600_ring_init(rdev, ring, 256 * 1024);
8847 
8848 	r = radeon_uvd_init(rdev);
8849 	if (!r) {
8850 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8851 		ring->ring_obj = NULL;
8852 		r600_ring_init(rdev, ring, 4096);
8853 	}
8854 
8855 	r = radeon_vce_init(rdev);
8856 	if (!r) {
8857 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8858 		ring->ring_obj = NULL;
8859 		r600_ring_init(rdev, ring, 4096);
8860 
8861 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8862 		ring->ring_obj = NULL;
8863 		r600_ring_init(rdev, ring, 4096);
8864 	}
8865 
8866 	rdev->ih.ring_obj = NULL;
8867 	r600_ih_ring_init(rdev, 64 * 1024);
8868 
8869 	r = r600_pcie_gart_init(rdev);
8870 	if (r)
8871 		return r;
8872 
8873 	rdev->accel_working = true;
8874 	r = cik_startup(rdev);
8875 	if (r) {
8876 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8877 		cik_cp_fini(rdev);
8878 		cik_sdma_fini(rdev);
8879 		cik_irq_fini(rdev);
8880 		sumo_rlc_fini(rdev);
8881 		cik_mec_fini(rdev);
8882 		radeon_wb_fini(rdev);
8883 		radeon_ib_pool_fini(rdev);
8884 		radeon_vm_manager_fini(rdev);
8885 		radeon_irq_kms_fini(rdev);
8886 		cik_pcie_gart_fini(rdev);
8887 		rdev->accel_working = false;
8888 	}
8889 
8890 	/* Don't start up if the MC ucode is missing.
8891 	 * The default clocks and voltages before the MC ucode
8892 	 * is loaded are not suffient for advanced operations.
8893 	 */
8894 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8895 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8896 		return -EINVAL;
8897 	}
8898 
8899 	return 0;
8900 }
8901 
8902 /**
8903  * cik_fini - asic specific driver and hw fini
8904  *
8905  * @rdev: radeon_device pointer
8906  *
8907  * Tear down the asic specific driver variables and program the hw
8908  * to an idle state (CIK).
8909  * Called at driver unload.
8910  */
8911 void cik_fini(struct radeon_device *rdev)
8912 {
8913 	radeon_pm_fini(rdev);
8914 	cik_cp_fini(rdev);
8915 	cik_sdma_fini(rdev);
8916 	cik_fini_pg(rdev);
8917 	cik_fini_cg(rdev);
8918 	cik_irq_fini(rdev);
8919 	sumo_rlc_fini(rdev);
8920 	cik_mec_fini(rdev);
8921 	radeon_wb_fini(rdev);
8922 	radeon_vm_manager_fini(rdev);
8923 	radeon_ib_pool_fini(rdev);
8924 	radeon_irq_kms_fini(rdev);
8925 	uvd_v1_0_fini(rdev);
8926 	radeon_uvd_fini(rdev);
8927 	radeon_vce_fini(rdev);
8928 	cik_pcie_gart_fini(rdev);
8929 	r600_vram_scratch_fini(rdev);
8930 	radeon_gem_fini(rdev);
8931 	radeon_fence_driver_fini(rdev);
8932 	radeon_bo_fini(rdev);
8933 	radeon_atombios_fini(rdev);
8934 	kfree(rdev->bios);
8935 	rdev->bios = NULL;
8936 }
8937 
8938 void dce8_program_fmt(struct drm_encoder *encoder)
8939 {
8940 	struct drm_device *dev = encoder->dev;
8941 	struct radeon_device *rdev = dev->dev_private;
8942 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8943 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8944 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8945 	int bpc = 0;
8946 	u32 tmp = 0;
8947 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8948 
8949 	if (connector) {
8950 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8951 		bpc = radeon_get_monitor_bpc(connector);
8952 		dither = radeon_connector->dither;
8953 	}
8954 
8955 	/* LVDS/eDP FMT is set up by atom */
8956 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8957 		return;
8958 
8959 	/* not needed for analog */
8960 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8961 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8962 		return;
8963 
8964 	if (bpc == 0)
8965 		return;
8966 
8967 	switch (bpc) {
8968 	case 6:
8969 		if (dither == RADEON_FMT_DITHER_ENABLE)
8970 			/* XXX sort out optimal dither settings */
8971 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8972 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8973 		else
8974 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8975 		break;
8976 	case 8:
8977 		if (dither == RADEON_FMT_DITHER_ENABLE)
8978 			/* XXX sort out optimal dither settings */
8979 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8980 				FMT_RGB_RANDOM_ENABLE |
8981 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8982 		else
8983 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8984 		break;
8985 	case 10:
8986 		if (dither == RADEON_FMT_DITHER_ENABLE)
8987 			/* XXX sort out optimal dither settings */
8988 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8989 				FMT_RGB_RANDOM_ENABLE |
8990 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8991 		else
8992 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8993 		break;
8994 	default:
8995 		/* not needed */
8996 		break;
8997 	}
8998 
8999 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
9000 }
9001 
9002 /* display watermark setup */
9003 /**
9004  * dce8_line_buffer_adjust - Set up the line buffer
9005  *
9006  * @rdev: radeon_device pointer
9007  * @radeon_crtc: the selected display controller
9008  * @mode: the current display mode on the selected display
9009  * controller
9010  *
9011  * Setup up the line buffer allocation for
9012  * the selected display controller (CIK).
9013  * Returns the line buffer size in pixels.
9014  */
9015 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
9016 				   struct radeon_crtc *radeon_crtc,
9017 				   struct drm_display_mode *mode)
9018 {
9019 	u32 tmp, buffer_alloc, i;
9020 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
9021 	/*
9022 	 * Line Buffer Setup
9023 	 * There are 6 line buffers, one for each display controllers.
9024 	 * There are 3 partitions per LB. Select the number of partitions
9025 	 * to enable based on the display width.  For display widths larger
9026 	 * than 4096, you need use to use 2 display controllers and combine
9027 	 * them using the stereo blender.
9028 	 */
9029 	if (radeon_crtc->base.enabled && mode) {
9030 		if (mode->crtc_hdisplay < 1920) {
9031 			tmp = 1;
9032 			buffer_alloc = 2;
9033 		} else if (mode->crtc_hdisplay < 2560) {
9034 			tmp = 2;
9035 			buffer_alloc = 2;
9036 		} else if (mode->crtc_hdisplay < 4096) {
9037 			tmp = 0;
9038 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9039 		} else {
9040 			DRM_DEBUG_KMS("Mode too big for LB!\n");
9041 			tmp = 0;
9042 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
9043 		}
9044 	} else {
9045 		tmp = 1;
9046 		buffer_alloc = 0;
9047 	}
9048 
9049 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
9050 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
9051 
9052 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
9053 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
9054 	for (i = 0; i < rdev->usec_timeout; i++) {
9055 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
9056 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
9057 			break;
9058 		udelay(1);
9059 	}
9060 
9061 	if (radeon_crtc->base.enabled && mode) {
9062 		switch (tmp) {
9063 		case 0:
9064 		default:
9065 			return 4096 * 2;
9066 		case 1:
9067 			return 1920 * 2;
9068 		case 2:
9069 			return 2560 * 2;
9070 		}
9071 	}
9072 
9073 	/* controller not enabled, so no lb used */
9074 	return 0;
9075 }
9076 
9077 /**
9078  * cik_get_number_of_dram_channels - get the number of dram channels
9079  *
9080  * @rdev: radeon_device pointer
9081  *
9082  * Look up the number of video ram channels (CIK).
9083  * Used for display watermark bandwidth calculations
9084  * Returns the number of dram channels
9085  */
9086 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
9087 {
9088 	u32 tmp = RREG32(MC_SHARED_CHMAP);
9089 
9090 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
9091 	case 0:
9092 	default:
9093 		return 1;
9094 	case 1:
9095 		return 2;
9096 	case 2:
9097 		return 4;
9098 	case 3:
9099 		return 8;
9100 	case 4:
9101 		return 3;
9102 	case 5:
9103 		return 6;
9104 	case 6:
9105 		return 10;
9106 	case 7:
9107 		return 12;
9108 	case 8:
9109 		return 16;
9110 	}
9111 }
9112 
9113 struct dce8_wm_params {
9114 	u32 dram_channels; /* number of dram channels */
9115 	u32 yclk;          /* bandwidth per dram data pin in kHz */
9116 	u32 sclk;          /* engine clock in kHz */
9117 	u32 disp_clk;      /* display clock in kHz */
9118 	u32 src_width;     /* viewport width */
9119 	u32 active_time;   /* active display time in ns */
9120 	u32 blank_time;    /* blank time in ns */
9121 	bool interlaced;    /* mode is interlaced */
9122 	fixed20_12 vsc;    /* vertical scale ratio */
9123 	u32 num_heads;     /* number of active crtcs */
9124 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
9125 	u32 lb_size;       /* line buffer allocated to pipe */
9126 	u32 vtaps;         /* vertical scaler taps */
9127 };
9128 
9129 /**
9130  * dce8_dram_bandwidth - get the dram bandwidth
9131  *
9132  * @wm: watermark calculation data
9133  *
9134  * Calculate the raw dram bandwidth (CIK).
9135  * Used for display watermark bandwidth calculations
9136  * Returns the dram bandwidth in MBytes/s
9137  */
9138 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
9139 {
9140 	/* Calculate raw DRAM Bandwidth */
9141 	fixed20_12 dram_efficiency; /* 0.7 */
9142 	fixed20_12 yclk, dram_channels, bandwidth;
9143 	fixed20_12 a;
9144 
9145 	a.full = dfixed_const(1000);
9146 	yclk.full = dfixed_const(wm->yclk);
9147 	yclk.full = dfixed_div(yclk, a);
9148 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9149 	a.full = dfixed_const(10);
9150 	dram_efficiency.full = dfixed_const(7);
9151 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
9152 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9153 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
9154 
9155 	return dfixed_trunc(bandwidth);
9156 }
9157 
9158 /**
9159  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
9160  *
9161  * @wm: watermark calculation data
9162  *
9163  * Calculate the dram bandwidth used for display (CIK).
9164  * Used for display watermark bandwidth calculations
9165  * Returns the dram bandwidth for display in MBytes/s
9166  */
9167 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9168 {
9169 	/* Calculate DRAM Bandwidth and the part allocated to display. */
9170 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
9171 	fixed20_12 yclk, dram_channels, bandwidth;
9172 	fixed20_12 a;
9173 
9174 	a.full = dfixed_const(1000);
9175 	yclk.full = dfixed_const(wm->yclk);
9176 	yclk.full = dfixed_div(yclk, a);
9177 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
9178 	a.full = dfixed_const(10);
9179 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
9180 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
9181 	bandwidth.full = dfixed_mul(dram_channels, yclk);
9182 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
9183 
9184 	return dfixed_trunc(bandwidth);
9185 }
9186 
9187 /**
9188  * dce8_data_return_bandwidth - get the data return bandwidth
9189  *
9190  * @wm: watermark calculation data
9191  *
9192  * Calculate the data return bandwidth used for display (CIK).
9193  * Used for display watermark bandwidth calculations
9194  * Returns the data return bandwidth in MBytes/s
9195  */
9196 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
9197 {
9198 	/* Calculate the display Data return Bandwidth */
9199 	fixed20_12 return_efficiency; /* 0.8 */
9200 	fixed20_12 sclk, bandwidth;
9201 	fixed20_12 a;
9202 
9203 	a.full = dfixed_const(1000);
9204 	sclk.full = dfixed_const(wm->sclk);
9205 	sclk.full = dfixed_div(sclk, a);
9206 	a.full = dfixed_const(10);
9207 	return_efficiency.full = dfixed_const(8);
9208 	return_efficiency.full = dfixed_div(return_efficiency, a);
9209 	a.full = dfixed_const(32);
9210 	bandwidth.full = dfixed_mul(a, sclk);
9211 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
9212 
9213 	return dfixed_trunc(bandwidth);
9214 }
9215 
9216 /**
9217  * dce8_dmif_request_bandwidth - get the dmif bandwidth
9218  *
9219  * @wm: watermark calculation data
9220  *
9221  * Calculate the dmif bandwidth used for display (CIK).
9222  * Used for display watermark bandwidth calculations
9223  * Returns the dmif bandwidth in MBytes/s
9224  */
9225 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
9226 {
9227 	/* Calculate the DMIF Request Bandwidth */
9228 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
9229 	fixed20_12 disp_clk, bandwidth;
9230 	fixed20_12 a, b;
9231 
9232 	a.full = dfixed_const(1000);
9233 	disp_clk.full = dfixed_const(wm->disp_clk);
9234 	disp_clk.full = dfixed_div(disp_clk, a);
9235 	a.full = dfixed_const(32);
9236 	b.full = dfixed_mul(a, disp_clk);
9237 
9238 	a.full = dfixed_const(10);
9239 	disp_clk_request_efficiency.full = dfixed_const(8);
9240 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
9241 
9242 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
9243 
9244 	return dfixed_trunc(bandwidth);
9245 }
9246 
9247 /**
9248  * dce8_available_bandwidth - get the min available bandwidth
9249  *
9250  * @wm: watermark calculation data
9251  *
9252  * Calculate the min available bandwidth used for display (CIK).
9253  * Used for display watermark bandwidth calculations
9254  * Returns the min available bandwidth in MBytes/s
9255  */
9256 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
9257 {
9258 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
9259 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
9260 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
9261 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
9262 
9263 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
9264 }
9265 
9266 /**
9267  * dce8_average_bandwidth - get the average available bandwidth
9268  *
9269  * @wm: watermark calculation data
9270  *
9271  * Calculate the average available bandwidth used for display (CIK).
9272  * Used for display watermark bandwidth calculations
9273  * Returns the average available bandwidth in MBytes/s
9274  */
9275 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
9276 {
9277 	/* Calculate the display mode Average Bandwidth
9278 	 * DisplayMode should contain the source and destination dimensions,
9279 	 * timing, etc.
9280 	 */
9281 	fixed20_12 bpp;
9282 	fixed20_12 line_time;
9283 	fixed20_12 src_width;
9284 	fixed20_12 bandwidth;
9285 	fixed20_12 a;
9286 
9287 	a.full = dfixed_const(1000);
9288 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
9289 	line_time.full = dfixed_div(line_time, a);
9290 	bpp.full = dfixed_const(wm->bytes_per_pixel);
9291 	src_width.full = dfixed_const(wm->src_width);
9292 	bandwidth.full = dfixed_mul(src_width, bpp);
9293 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
9294 	bandwidth.full = dfixed_div(bandwidth, line_time);
9295 
9296 	return dfixed_trunc(bandwidth);
9297 }
9298 
9299 /**
9300  * dce8_latency_watermark - get the latency watermark
9301  *
9302  * @wm: watermark calculation data
9303  *
9304  * Calculate the latency watermark (CIK).
9305  * Used for display watermark bandwidth calculations
9306  * Returns the latency watermark in ns
9307  */
9308 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
9309 {
9310 	/* First calculate the latency in ns */
9311 	u32 mc_latency = 2000; /* 2000 ns. */
9312 	u32 available_bandwidth = dce8_available_bandwidth(wm);
9313 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
9314 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
9315 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
9316 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
9317 		(wm->num_heads * cursor_line_pair_return_time);
9318 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
9319 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
9320 	u32 tmp, dmif_size = 12288;
9321 	fixed20_12 a, b, c;
9322 
9323 	if (wm->num_heads == 0)
9324 		return 0;
9325 
9326 	a.full = dfixed_const(2);
9327 	b.full = dfixed_const(1);
9328 	if ((wm->vsc.full > a.full) ||
9329 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
9330 	    (wm->vtaps >= 5) ||
9331 	    ((wm->vsc.full >= a.full) && wm->interlaced))
9332 		max_src_lines_per_dst_line = 4;
9333 	else
9334 		max_src_lines_per_dst_line = 2;
9335 
9336 	a.full = dfixed_const(available_bandwidth);
9337 	b.full = dfixed_const(wm->num_heads);
9338 	a.full = dfixed_div(a, b);
9339 
9340 	b.full = dfixed_const(mc_latency + 512);
9341 	c.full = dfixed_const(wm->disp_clk);
9342 	b.full = dfixed_div(b, c);
9343 
9344 	c.full = dfixed_const(dmif_size);
9345 	b.full = dfixed_div(c, b);
9346 
9347 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
9348 
9349 	b.full = dfixed_const(1000);
9350 	c.full = dfixed_const(wm->disp_clk);
9351 	b.full = dfixed_div(c, b);
9352 	c.full = dfixed_const(wm->bytes_per_pixel);
9353 	b.full = dfixed_mul(b, c);
9354 
9355 	lb_fill_bw = min(tmp, dfixed_trunc(b));
9356 
9357 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
9358 	b.full = dfixed_const(1000);
9359 	c.full = dfixed_const(lb_fill_bw);
9360 	b.full = dfixed_div(c, b);
9361 	a.full = dfixed_div(a, b);
9362 	line_fill_time = dfixed_trunc(a);
9363 
9364 	if (line_fill_time < wm->active_time)
9365 		return latency;
9366 	else
9367 		return latency + (line_fill_time - wm->active_time);
9368 
9369 }
9370 
9371 /**
9372  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
9373  * average and available dram bandwidth
9374  *
9375  * @wm: watermark calculation data
9376  *
9377  * Check if the display average bandwidth fits in the display
9378  * dram bandwidth (CIK).
9379  * Used for display watermark bandwidth calculations
9380  * Returns true if the display fits, false if not.
9381  */
9382 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
9383 {
9384 	if (dce8_average_bandwidth(wm) <=
9385 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
9386 		return true;
9387 	else
9388 		return false;
9389 }
9390 
9391 /**
9392  * dce8_average_bandwidth_vs_available_bandwidth - check
9393  * average and available bandwidth
9394  *
9395  * @wm: watermark calculation data
9396  *
9397  * Check if the display average bandwidth fits in the display
9398  * available bandwidth (CIK).
9399  * Used for display watermark bandwidth calculations
9400  * Returns true if the display fits, false if not.
9401  */
9402 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
9403 {
9404 	if (dce8_average_bandwidth(wm) <=
9405 	    (dce8_available_bandwidth(wm) / wm->num_heads))
9406 		return true;
9407 	else
9408 		return false;
9409 }
9410 
9411 /**
9412  * dce8_check_latency_hiding - check latency hiding
9413  *
9414  * @wm: watermark calculation data
9415  *
9416  * Check latency hiding (CIK).
9417  * Used for display watermark bandwidth calculations
9418  * Returns true if the display fits, false if not.
9419  */
9420 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
9421 {
9422 	u32 lb_partitions = wm->lb_size / wm->src_width;
9423 	u32 line_time = wm->active_time + wm->blank_time;
9424 	u32 latency_tolerant_lines;
9425 	u32 latency_hiding;
9426 	fixed20_12 a;
9427 
9428 	a.full = dfixed_const(1);
9429 	if (wm->vsc.full > a.full)
9430 		latency_tolerant_lines = 1;
9431 	else {
9432 		if (lb_partitions <= (wm->vtaps + 1))
9433 			latency_tolerant_lines = 1;
9434 		else
9435 			latency_tolerant_lines = 2;
9436 	}
9437 
9438 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
9439 
9440 	if (dce8_latency_watermark(wm) <= latency_hiding)
9441 		return true;
9442 	else
9443 		return false;
9444 }
9445 
9446 /**
9447  * dce8_program_watermarks - program display watermarks
9448  *
9449  * @rdev: radeon_device pointer
9450  * @radeon_crtc: the selected display controller
9451  * @lb_size: line buffer size
9452  * @num_heads: number of display controllers in use
9453  *
9454  * Calculate and program the display watermarks for the
9455  * selected display controller (CIK).
9456  */
9457 static void dce8_program_watermarks(struct radeon_device *rdev,
9458 				    struct radeon_crtc *radeon_crtc,
9459 				    u32 lb_size, u32 num_heads)
9460 {
9461 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
9462 	struct dce8_wm_params wm_low, wm_high;
9463 	u32 pixel_period;
9464 	u32 line_time = 0;
9465 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
9466 	u32 tmp, wm_mask;
9467 
9468 	if (radeon_crtc->base.enabled && num_heads && mode) {
9469 		pixel_period = 1000000 / (u32)mode->clock;
9470 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
9471 
9472 		/* watermark for high clocks */
9473 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9474 		    rdev->pm.dpm_enabled) {
9475 			wm_high.yclk =
9476 				radeon_dpm_get_mclk(rdev, false) * 10;
9477 			wm_high.sclk =
9478 				radeon_dpm_get_sclk(rdev, false) * 10;
9479 		} else {
9480 			wm_high.yclk = rdev->pm.current_mclk * 10;
9481 			wm_high.sclk = rdev->pm.current_sclk * 10;
9482 		}
9483 
9484 		wm_high.disp_clk = mode->clock;
9485 		wm_high.src_width = mode->crtc_hdisplay;
9486 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
9487 		wm_high.blank_time = line_time - wm_high.active_time;
9488 		wm_high.interlaced = false;
9489 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9490 			wm_high.interlaced = true;
9491 		wm_high.vsc = radeon_crtc->vsc;
9492 		wm_high.vtaps = 1;
9493 		if (radeon_crtc->rmx_type != RMX_OFF)
9494 			wm_high.vtaps = 2;
9495 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
9496 		wm_high.lb_size = lb_size;
9497 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
9498 		wm_high.num_heads = num_heads;
9499 
9500 		/* set for high clocks */
9501 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
9502 
9503 		/* possibly force display priority to high */
9504 		/* should really do this at mode validation time... */
9505 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
9506 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
9507 		    !dce8_check_latency_hiding(&wm_high) ||
9508 		    (rdev->disp_priority == 2)) {
9509 			DRM_DEBUG_KMS("force priority to high\n");
9510 		}
9511 
9512 		/* watermark for low clocks */
9513 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
9514 		    rdev->pm.dpm_enabled) {
9515 			wm_low.yclk =
9516 				radeon_dpm_get_mclk(rdev, true) * 10;
9517 			wm_low.sclk =
9518 				radeon_dpm_get_sclk(rdev, true) * 10;
9519 		} else {
9520 			wm_low.yclk = rdev->pm.current_mclk * 10;
9521 			wm_low.sclk = rdev->pm.current_sclk * 10;
9522 		}
9523 
9524 		wm_low.disp_clk = mode->clock;
9525 		wm_low.src_width = mode->crtc_hdisplay;
9526 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
9527 		wm_low.blank_time = line_time - wm_low.active_time;
9528 		wm_low.interlaced = false;
9529 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
9530 			wm_low.interlaced = true;
9531 		wm_low.vsc = radeon_crtc->vsc;
9532 		wm_low.vtaps = 1;
9533 		if (radeon_crtc->rmx_type != RMX_OFF)
9534 			wm_low.vtaps = 2;
9535 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
9536 		wm_low.lb_size = lb_size;
9537 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
9538 		wm_low.num_heads = num_heads;
9539 
9540 		/* set for low clocks */
9541 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9542 
9543 		/* possibly force display priority to high */
9544 		/* should really do this at mode validation time... */
9545 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9546 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9547 		    !dce8_check_latency_hiding(&wm_low) ||
9548 		    (rdev->disp_priority == 2)) {
9549 			DRM_DEBUG_KMS("force priority to high\n");
9550 		}
9551 	}
9552 
9553 	/* select wm A */
9554 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9555 	tmp = wm_mask;
9556 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9557 	tmp |= LATENCY_WATERMARK_MASK(1);
9558 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9559 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9560 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9561 		LATENCY_HIGH_WATERMARK(line_time)));
9562 	/* select wm B */
9563 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9564 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9565 	tmp |= LATENCY_WATERMARK_MASK(2);
9566 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9567 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9568 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9569 		LATENCY_HIGH_WATERMARK(line_time)));
9570 	/* restore original selection */
9571 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9572 
9573 	/* save values for DPM */
9574 	radeon_crtc->line_time = line_time;
9575 	radeon_crtc->wm_high = latency_watermark_a;
9576 	radeon_crtc->wm_low = latency_watermark_b;
9577 }
9578 
9579 /**
9580  * dce8_bandwidth_update - program display watermarks
9581  *
9582  * @rdev: radeon_device pointer
9583  *
9584  * Calculate and program the display watermarks and line
9585  * buffer allocation (CIK).
9586  */
9587 void dce8_bandwidth_update(struct radeon_device *rdev)
9588 {
9589 	struct drm_display_mode *mode = NULL;
9590 	u32 num_heads = 0, lb_size;
9591 	int i;
9592 
9593 	if (!rdev->mode_info.mode_config_initialized)
9594 		return;
9595 
9596 	radeon_update_display_priority(rdev);
9597 
9598 	for (i = 0; i < rdev->num_crtc; i++) {
9599 		if (rdev->mode_info.crtcs[i]->base.enabled)
9600 			num_heads++;
9601 	}
9602 	for (i = 0; i < rdev->num_crtc; i++) {
9603 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9604 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9605 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9606 	}
9607 }
9608 
9609 /**
9610  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9611  *
9612  * @rdev: radeon_device pointer
9613  *
9614  * Fetches a GPU clock counter snapshot (SI).
9615  * Returns the 64 bit clock counter snapshot.
9616  */
9617 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9618 {
9619 	uint64_t clock;
9620 
9621 	mutex_lock(&rdev->gpu_clock_mutex);
9622 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9623 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9624 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9625 	mutex_unlock(&rdev->gpu_clock_mutex);
9626 	return clock;
9627 }
9628 
9629 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9630                               u32 cntl_reg, u32 status_reg)
9631 {
9632 	int r, i;
9633 	struct atom_clock_dividers dividers;
9634 	uint32_t tmp;
9635 
9636 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9637 					   clock, false, &dividers);
9638 	if (r)
9639 		return r;
9640 
9641 	tmp = RREG32_SMC(cntl_reg);
9642 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9643 	tmp |= dividers.post_divider;
9644 	WREG32_SMC(cntl_reg, tmp);
9645 
9646 	for (i = 0; i < 100; i++) {
9647 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9648 			break;
9649 		mdelay(10);
9650 	}
9651 	if (i == 100)
9652 		return -ETIMEDOUT;
9653 
9654 	return 0;
9655 }
9656 
9657 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9658 {
9659 	int r = 0;
9660 
9661 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9662 	if (r)
9663 		return r;
9664 
9665 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9666 	return r;
9667 }
9668 
9669 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9670 {
9671 	int r, i;
9672 	struct atom_clock_dividers dividers;
9673 	u32 tmp;
9674 
9675 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9676 					   ecclk, false, &dividers);
9677 	if (r)
9678 		return r;
9679 
9680 	for (i = 0; i < 100; i++) {
9681 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9682 			break;
9683 		mdelay(10);
9684 	}
9685 	if (i == 100)
9686 		return -ETIMEDOUT;
9687 
9688 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9689 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9690 	tmp |= dividers.post_divider;
9691 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9692 
9693 	for (i = 0; i < 100; i++) {
9694 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9695 			break;
9696 		mdelay(10);
9697 	}
9698 	if (i == 100)
9699 		return -ETIMEDOUT;
9700 
9701 	return 0;
9702 }
9703 
9704 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9705 {
9706 	struct pci_dev *root = rdev->pdev->bus->self;
9707 	int bridge_pos, gpu_pos;
9708 	u32 speed_cntl, mask, current_data_rate;
9709 	int ret, i;
9710 	u16 tmp16;
9711 
9712 	if (pci_is_root_bus(rdev->pdev->bus))
9713 		return;
9714 
9715 	if (radeon_pcie_gen2 == 0)
9716 		return;
9717 
9718 	if (rdev->flags & RADEON_IS_IGP)
9719 		return;
9720 
9721 	if (!(rdev->flags & RADEON_IS_PCIE))
9722 		return;
9723 
9724 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9725 	if (ret != 0)
9726 		return;
9727 
9728 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9729 		return;
9730 
9731 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9732 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9733 		LC_CURRENT_DATA_RATE_SHIFT;
9734 	if (mask & DRM_PCIE_SPEED_80) {
9735 		if (current_data_rate == 2) {
9736 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9737 			return;
9738 		}
9739 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9740 	} else if (mask & DRM_PCIE_SPEED_50) {
9741 		if (current_data_rate == 1) {
9742 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9743 			return;
9744 		}
9745 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9746 	}
9747 
9748 	bridge_pos = pci_pcie_cap(root);
9749 	if (!bridge_pos)
9750 		return;
9751 
9752 	gpu_pos = pci_pcie_cap(rdev->pdev);
9753 	if (!gpu_pos)
9754 		return;
9755 
9756 	if (mask & DRM_PCIE_SPEED_80) {
9757 		/* re-try equalization if gen3 is not already enabled */
9758 		if (current_data_rate != 2) {
9759 			u16 bridge_cfg, gpu_cfg;
9760 			u16 bridge_cfg2, gpu_cfg2;
9761 			u32 max_lw, current_lw, tmp;
9762 
9763 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9764 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9765 
9766 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9767 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9768 
9769 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9770 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9771 
9772 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9773 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9774 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9775 
9776 			if (current_lw < max_lw) {
9777 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9778 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9779 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9780 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9781 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9782 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9783 				}
9784 			}
9785 
9786 			for (i = 0; i < 10; i++) {
9787 				/* check status */
9788 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9789 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9790 					break;
9791 
9792 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9793 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9794 
9795 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9796 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9797 
9798 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9799 				tmp |= LC_SET_QUIESCE;
9800 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9801 
9802 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9803 				tmp |= LC_REDO_EQ;
9804 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9805 
9806 				mdelay(100);
9807 
9808 				/* linkctl */
9809 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9810 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9811 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9812 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9813 
9814 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9815 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9816 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9817 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9818 
9819 				/* linkctl2 */
9820 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9821 				tmp16 &= ~((1 << 4) | (7 << 9));
9822 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9823 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9824 
9825 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9826 				tmp16 &= ~((1 << 4) | (7 << 9));
9827 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9828 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9829 
9830 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9831 				tmp &= ~LC_SET_QUIESCE;
9832 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9833 			}
9834 		}
9835 	}
9836 
9837 	/* set the link speed */
9838 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9839 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9840 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9841 
9842 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9843 	tmp16 &= ~0xf;
9844 	if (mask & DRM_PCIE_SPEED_80)
9845 		tmp16 |= 3; /* gen3 */
9846 	else if (mask & DRM_PCIE_SPEED_50)
9847 		tmp16 |= 2; /* gen2 */
9848 	else
9849 		tmp16 |= 1; /* gen1 */
9850 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9851 
9852 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9853 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9854 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9855 
9856 	for (i = 0; i < rdev->usec_timeout; i++) {
9857 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9858 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9859 			break;
9860 		udelay(1);
9861 	}
9862 }
9863 
9864 static void cik_program_aspm(struct radeon_device *rdev)
9865 {
9866 	u32 data, orig;
9867 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9868 	bool disable_clkreq = false;
9869 
9870 	if (radeon_aspm == 0)
9871 		return;
9872 
9873 	/* XXX double check IGPs */
9874 	if (rdev->flags & RADEON_IS_IGP)
9875 		return;
9876 
9877 	if (!(rdev->flags & RADEON_IS_PCIE))
9878 		return;
9879 
9880 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9881 	data &= ~LC_XMIT_N_FTS_MASK;
9882 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9883 	if (orig != data)
9884 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9885 
9886 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9887 	data |= LC_GO_TO_RECOVERY;
9888 	if (orig != data)
9889 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9890 
9891 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9892 	data |= P_IGNORE_EDB_ERR;
9893 	if (orig != data)
9894 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9895 
9896 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9897 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9898 	data |= LC_PMI_TO_L1_DIS;
9899 	if (!disable_l0s)
9900 		data |= LC_L0S_INACTIVITY(7);
9901 
9902 	if (!disable_l1) {
9903 		data |= LC_L1_INACTIVITY(7);
9904 		data &= ~LC_PMI_TO_L1_DIS;
9905 		if (orig != data)
9906 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9907 
9908 		if (!disable_plloff_in_l1) {
9909 			bool clk_req_support;
9910 
9911 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9912 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9913 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9914 			if (orig != data)
9915 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9916 
9917 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9918 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9919 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9920 			if (orig != data)
9921 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9922 
9923 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9924 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9925 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9926 			if (orig != data)
9927 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9928 
9929 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9930 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9931 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9932 			if (orig != data)
9933 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9934 
9935 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9936 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9937 			data |= LC_DYN_LANES_PWR_STATE(3);
9938 			if (orig != data)
9939 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9940 
9941 			if (!disable_clkreq &&
9942 			    !pci_is_root_bus(rdev->pdev->bus)) {
9943 				struct pci_dev *root = rdev->pdev->bus->self;
9944 				u32 lnkcap;
9945 
9946 				clk_req_support = false;
9947 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9948 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9949 					clk_req_support = true;
9950 			} else {
9951 				clk_req_support = false;
9952 			}
9953 
9954 			if (clk_req_support) {
9955 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9956 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9957 				if (orig != data)
9958 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9959 
9960 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9961 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9962 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9963 				if (orig != data)
9964 					WREG32_SMC(THM_CLK_CNTL, data);
9965 
9966 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9967 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9968 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9969 				if (orig != data)
9970 					WREG32_SMC(MISC_CLK_CTRL, data);
9971 
9972 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9973 				data &= ~BCLK_AS_XCLK;
9974 				if (orig != data)
9975 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9976 
9977 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9978 				data &= ~FORCE_BIF_REFCLK_EN;
9979 				if (orig != data)
9980 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9981 
9982 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9983 				data &= ~MPLL_CLKOUT_SEL_MASK;
9984 				data |= MPLL_CLKOUT_SEL(4);
9985 				if (orig != data)
9986 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9987 			}
9988 		}
9989 	} else {
9990 		if (orig != data)
9991 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9992 	}
9993 
9994 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9995 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9996 	if (orig != data)
9997 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9998 
9999 	if (!disable_l0s) {
10000 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
10001 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
10002 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
10003 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
10004 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
10005 				data &= ~LC_L0S_INACTIVITY_MASK;
10006 				if (orig != data)
10007 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
10008 			}
10009 		}
10010 	}
10011 }
10012