xref: /linux/drivers/gpu/drm/radeon/cik.c (revision 988b0c541ed8b1c633c4d4df7169010635942e18)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_mc2.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
44 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_mc2.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
52 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
53 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
60 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
61 MODULE_FIRMWARE("radeon/KABINI_me.bin");
62 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
63 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
64 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
65 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
66 MODULE_FIRMWARE("radeon/MULLINS_pfp.bin");
67 MODULE_FIRMWARE("radeon/MULLINS_me.bin");
68 MODULE_FIRMWARE("radeon/MULLINS_ce.bin");
69 MODULE_FIRMWARE("radeon/MULLINS_mec.bin");
70 MODULE_FIRMWARE("radeon/MULLINS_rlc.bin");
71 MODULE_FIRMWARE("radeon/MULLINS_sdma.bin");
72 
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
76 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
78 extern void sumo_rlc_fini(struct radeon_device *rdev);
79 extern int sumo_rlc_init(struct radeon_device *rdev);
80 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
81 extern void si_rlc_reset(struct radeon_device *rdev);
82 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
83 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
84 extern int cik_sdma_resume(struct radeon_device *rdev);
85 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
86 extern void cik_sdma_fini(struct radeon_device *rdev);
87 extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
88 static void cik_rlc_stop(struct radeon_device *rdev);
89 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
90 static void cik_program_aspm(struct radeon_device *rdev);
91 static void cik_init_pg(struct radeon_device *rdev);
92 static void cik_init_cg(struct radeon_device *rdev);
93 static void cik_fini_pg(struct radeon_device *rdev);
94 static void cik_fini_cg(struct radeon_device *rdev);
95 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
96 					  bool enable);
97 
98 /* get temperature in millidegrees */
99 int ci_get_temp(struct radeon_device *rdev)
100 {
101 	u32 temp;
102 	int actual_temp = 0;
103 
104 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
105 		CTF_TEMP_SHIFT;
106 
107 	if (temp & 0x200)
108 		actual_temp = 255;
109 	else
110 		actual_temp = temp & 0x1ff;
111 
112 	actual_temp = actual_temp * 1000;
113 
114 	return actual_temp;
115 }
116 
117 /* get temperature in millidegrees */
118 int kv_get_temp(struct radeon_device *rdev)
119 {
120 	u32 temp;
121 	int actual_temp = 0;
122 
123 	temp = RREG32_SMC(0xC0300E0C);
124 
125 	if (temp)
126 		actual_temp = (temp / 8) - 49;
127 	else
128 		actual_temp = 0;
129 
130 	actual_temp = actual_temp * 1000;
131 
132 	return actual_temp;
133 }
134 
135 /*
136  * Indirect registers accessor
137  */
138 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
139 {
140 	unsigned long flags;
141 	u32 r;
142 
143 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
144 	WREG32(PCIE_INDEX, reg);
145 	(void)RREG32(PCIE_INDEX);
146 	r = RREG32(PCIE_DATA);
147 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
148 	return r;
149 }
150 
151 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
152 {
153 	unsigned long flags;
154 
155 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
156 	WREG32(PCIE_INDEX, reg);
157 	(void)RREG32(PCIE_INDEX);
158 	WREG32(PCIE_DATA, v);
159 	(void)RREG32(PCIE_DATA);
160 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
161 }
162 
163 static const u32 spectre_rlc_save_restore_register_list[] =
164 {
165 	(0x0e00 << 16) | (0xc12c >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc140 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc150 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0xc15c >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0xc168 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0xc170 >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc178 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0xc204 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0xc2b4 >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0xc2b8 >> 2),
184 	0x00000000,
185 	(0x0e00 << 16) | (0xc2bc >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0xc2c0 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x8228 >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0x829c >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x869c >> 2),
194 	0x00000000,
195 	(0x0600 << 16) | (0x98f4 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0x98f8 >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x9900 >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0xc260 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0x90e8 >> 2),
204 	0x00000000,
205 	(0x0e00 << 16) | (0x3c000 >> 2),
206 	0x00000000,
207 	(0x0e00 << 16) | (0x3c00c >> 2),
208 	0x00000000,
209 	(0x0e00 << 16) | (0x8c1c >> 2),
210 	0x00000000,
211 	(0x0e00 << 16) | (0x9700 >> 2),
212 	0x00000000,
213 	(0x0e00 << 16) | (0xcd20 >> 2),
214 	0x00000000,
215 	(0x4e00 << 16) | (0xcd20 >> 2),
216 	0x00000000,
217 	(0x5e00 << 16) | (0xcd20 >> 2),
218 	0x00000000,
219 	(0x6e00 << 16) | (0xcd20 >> 2),
220 	0x00000000,
221 	(0x7e00 << 16) | (0xcd20 >> 2),
222 	0x00000000,
223 	(0x8e00 << 16) | (0xcd20 >> 2),
224 	0x00000000,
225 	(0x9e00 << 16) | (0xcd20 >> 2),
226 	0x00000000,
227 	(0xae00 << 16) | (0xcd20 >> 2),
228 	0x00000000,
229 	(0xbe00 << 16) | (0xcd20 >> 2),
230 	0x00000000,
231 	(0x0e00 << 16) | (0x89bc >> 2),
232 	0x00000000,
233 	(0x0e00 << 16) | (0x8900 >> 2),
234 	0x00000000,
235 	0x3,
236 	(0x0e00 << 16) | (0xc130 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc134 >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc1fc >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc208 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc264 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc268 >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc26c >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc270 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc274 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc278 >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc27c >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc280 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc284 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc288 >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc28c >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc290 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc294 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc298 >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc29c >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0xc2a0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0xc2a4 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0xc2a8 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0xc2ac  >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0xc2b0 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x301d0 >> 2),
285 	0x00000000,
286 	(0x0e00 << 16) | (0x30238 >> 2),
287 	0x00000000,
288 	(0x0e00 << 16) | (0x30250 >> 2),
289 	0x00000000,
290 	(0x0e00 << 16) | (0x30254 >> 2),
291 	0x00000000,
292 	(0x0e00 << 16) | (0x30258 >> 2),
293 	0x00000000,
294 	(0x0e00 << 16) | (0x3025c >> 2),
295 	0x00000000,
296 	(0x4e00 << 16) | (0xc900 >> 2),
297 	0x00000000,
298 	(0x5e00 << 16) | (0xc900 >> 2),
299 	0x00000000,
300 	(0x6e00 << 16) | (0xc900 >> 2),
301 	0x00000000,
302 	(0x7e00 << 16) | (0xc900 >> 2),
303 	0x00000000,
304 	(0x8e00 << 16) | (0xc900 >> 2),
305 	0x00000000,
306 	(0x9e00 << 16) | (0xc900 >> 2),
307 	0x00000000,
308 	(0xae00 << 16) | (0xc900 >> 2),
309 	0x00000000,
310 	(0xbe00 << 16) | (0xc900 >> 2),
311 	0x00000000,
312 	(0x4e00 << 16) | (0xc904 >> 2),
313 	0x00000000,
314 	(0x5e00 << 16) | (0xc904 >> 2),
315 	0x00000000,
316 	(0x6e00 << 16) | (0xc904 >> 2),
317 	0x00000000,
318 	(0x7e00 << 16) | (0xc904 >> 2),
319 	0x00000000,
320 	(0x8e00 << 16) | (0xc904 >> 2),
321 	0x00000000,
322 	(0x9e00 << 16) | (0xc904 >> 2),
323 	0x00000000,
324 	(0xae00 << 16) | (0xc904 >> 2),
325 	0x00000000,
326 	(0xbe00 << 16) | (0xc904 >> 2),
327 	0x00000000,
328 	(0x4e00 << 16) | (0xc908 >> 2),
329 	0x00000000,
330 	(0x5e00 << 16) | (0xc908 >> 2),
331 	0x00000000,
332 	(0x6e00 << 16) | (0xc908 >> 2),
333 	0x00000000,
334 	(0x7e00 << 16) | (0xc908 >> 2),
335 	0x00000000,
336 	(0x8e00 << 16) | (0xc908 >> 2),
337 	0x00000000,
338 	(0x9e00 << 16) | (0xc908 >> 2),
339 	0x00000000,
340 	(0xae00 << 16) | (0xc908 >> 2),
341 	0x00000000,
342 	(0xbe00 << 16) | (0xc908 >> 2),
343 	0x00000000,
344 	(0x4e00 << 16) | (0xc90c >> 2),
345 	0x00000000,
346 	(0x5e00 << 16) | (0xc90c >> 2),
347 	0x00000000,
348 	(0x6e00 << 16) | (0xc90c >> 2),
349 	0x00000000,
350 	(0x7e00 << 16) | (0xc90c >> 2),
351 	0x00000000,
352 	(0x8e00 << 16) | (0xc90c >> 2),
353 	0x00000000,
354 	(0x9e00 << 16) | (0xc90c >> 2),
355 	0x00000000,
356 	(0xae00 << 16) | (0xc90c >> 2),
357 	0x00000000,
358 	(0xbe00 << 16) | (0xc90c >> 2),
359 	0x00000000,
360 	(0x4e00 << 16) | (0xc910 >> 2),
361 	0x00000000,
362 	(0x5e00 << 16) | (0xc910 >> 2),
363 	0x00000000,
364 	(0x6e00 << 16) | (0xc910 >> 2),
365 	0x00000000,
366 	(0x7e00 << 16) | (0xc910 >> 2),
367 	0x00000000,
368 	(0x8e00 << 16) | (0xc910 >> 2),
369 	0x00000000,
370 	(0x9e00 << 16) | (0xc910 >> 2),
371 	0x00000000,
372 	(0xae00 << 16) | (0xc910 >> 2),
373 	0x00000000,
374 	(0xbe00 << 16) | (0xc910 >> 2),
375 	0x00000000,
376 	(0x0e00 << 16) | (0xc99c >> 2),
377 	0x00000000,
378 	(0x0e00 << 16) | (0x9834 >> 2),
379 	0x00000000,
380 	(0x0000 << 16) | (0x30f00 >> 2),
381 	0x00000000,
382 	(0x0001 << 16) | (0x30f00 >> 2),
383 	0x00000000,
384 	(0x0000 << 16) | (0x30f04 >> 2),
385 	0x00000000,
386 	(0x0001 << 16) | (0x30f04 >> 2),
387 	0x00000000,
388 	(0x0000 << 16) | (0x30f08 >> 2),
389 	0x00000000,
390 	(0x0001 << 16) | (0x30f08 >> 2),
391 	0x00000000,
392 	(0x0000 << 16) | (0x30f0c >> 2),
393 	0x00000000,
394 	(0x0001 << 16) | (0x30f0c >> 2),
395 	0x00000000,
396 	(0x0600 << 16) | (0x9b7c >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x8a14 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x8a18 >> 2),
401 	0x00000000,
402 	(0x0600 << 16) | (0x30a00 >> 2),
403 	0x00000000,
404 	(0x0e00 << 16) | (0x8bf0 >> 2),
405 	0x00000000,
406 	(0x0e00 << 16) | (0x8bcc >> 2),
407 	0x00000000,
408 	(0x0e00 << 16) | (0x8b24 >> 2),
409 	0x00000000,
410 	(0x0e00 << 16) | (0x30a04 >> 2),
411 	0x00000000,
412 	(0x0600 << 16) | (0x30a10 >> 2),
413 	0x00000000,
414 	(0x0600 << 16) | (0x30a14 >> 2),
415 	0x00000000,
416 	(0x0600 << 16) | (0x30a18 >> 2),
417 	0x00000000,
418 	(0x0600 << 16) | (0x30a2c >> 2),
419 	0x00000000,
420 	(0x0e00 << 16) | (0xc700 >> 2),
421 	0x00000000,
422 	(0x0e00 << 16) | (0xc704 >> 2),
423 	0x00000000,
424 	(0x0e00 << 16) | (0xc708 >> 2),
425 	0x00000000,
426 	(0x0e00 << 16) | (0xc768 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc770 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc774 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc778 >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc77c >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc780 >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc784 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc788 >> 2),
441 	0x00000000,
442 	(0x0400 << 16) | (0xc78c >> 2),
443 	0x00000000,
444 	(0x0400 << 16) | (0xc798 >> 2),
445 	0x00000000,
446 	(0x0400 << 16) | (0xc79c >> 2),
447 	0x00000000,
448 	(0x0400 << 16) | (0xc7a0 >> 2),
449 	0x00000000,
450 	(0x0400 << 16) | (0xc7a4 >> 2),
451 	0x00000000,
452 	(0x0400 << 16) | (0xc7a8 >> 2),
453 	0x00000000,
454 	(0x0400 << 16) | (0xc7ac >> 2),
455 	0x00000000,
456 	(0x0400 << 16) | (0xc7b0 >> 2),
457 	0x00000000,
458 	(0x0400 << 16) | (0xc7b4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x9100 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x3c010 >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92a8 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x92ac >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x92b4 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x92b8 >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x92bc >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x92c0 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0x92c4 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x92c8 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0x92cc >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x92d0 >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0x8c00 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x8c04 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0x8c20 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0x8c38 >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0x8c3c >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xae00 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0x9604 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac08 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac0c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac10 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac14 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac58 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac68 >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xac6c >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xac70 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xac74 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xac78 >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0xac7c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0xac80 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0xac84 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0xac88 >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0xac8c >> 2),
527 	0x00000000,
528 	(0x0e00 << 16) | (0x970c >> 2),
529 	0x00000000,
530 	(0x0e00 << 16) | (0x9714 >> 2),
531 	0x00000000,
532 	(0x0e00 << 16) | (0x9718 >> 2),
533 	0x00000000,
534 	(0x0e00 << 16) | (0x971c >> 2),
535 	0x00000000,
536 	(0x0e00 << 16) | (0x31068 >> 2),
537 	0x00000000,
538 	(0x4e00 << 16) | (0x31068 >> 2),
539 	0x00000000,
540 	(0x5e00 << 16) | (0x31068 >> 2),
541 	0x00000000,
542 	(0x6e00 << 16) | (0x31068 >> 2),
543 	0x00000000,
544 	(0x7e00 << 16) | (0x31068 >> 2),
545 	0x00000000,
546 	(0x8e00 << 16) | (0x31068 >> 2),
547 	0x00000000,
548 	(0x9e00 << 16) | (0x31068 >> 2),
549 	0x00000000,
550 	(0xae00 << 16) | (0x31068 >> 2),
551 	0x00000000,
552 	(0xbe00 << 16) | (0x31068 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0xcd10 >> 2),
555 	0x00000000,
556 	(0x0e00 << 16) | (0xcd14 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88b0 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x88b4 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x88b8 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x88bc >> 2),
565 	0x00000000,
566 	(0x0400 << 16) | (0x89c0 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x88c4 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x88c8 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x88d0 >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x88d4 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x88d8 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x8980 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x30938 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x3093c >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x30940 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x89a0 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x30900 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x30904 >> 2),
591 	0x00000000,
592 	(0x0e00 << 16) | (0x89b4 >> 2),
593 	0x00000000,
594 	(0x0e00 << 16) | (0x3c210 >> 2),
595 	0x00000000,
596 	(0x0e00 << 16) | (0x3c214 >> 2),
597 	0x00000000,
598 	(0x0e00 << 16) | (0x3c218 >> 2),
599 	0x00000000,
600 	(0x0e00 << 16) | (0x8904 >> 2),
601 	0x00000000,
602 	0x5,
603 	(0x0e00 << 16) | (0x8c28 >> 2),
604 	(0x0e00 << 16) | (0x8c2c >> 2),
605 	(0x0e00 << 16) | (0x8c30 >> 2),
606 	(0x0e00 << 16) | (0x8c34 >> 2),
607 	(0x0e00 << 16) | (0x9600 >> 2),
608 };
609 
610 static const u32 kalindi_rlc_save_restore_register_list[] =
611 {
612 	(0x0e00 << 16) | (0xc12c >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc140 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xc150 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xc15c >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xc168 >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc170 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0xc204 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0xc2b4 >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0xc2b8 >> 2),
629 	0x00000000,
630 	(0x0e00 << 16) | (0xc2bc >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0xc2c0 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x8228 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0x829c >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x869c >> 2),
639 	0x00000000,
640 	(0x0600 << 16) | (0x98f4 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x98f8 >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x9900 >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0xc260 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0x90e8 >> 2),
649 	0x00000000,
650 	(0x0e00 << 16) | (0x3c000 >> 2),
651 	0x00000000,
652 	(0x0e00 << 16) | (0x3c00c >> 2),
653 	0x00000000,
654 	(0x0e00 << 16) | (0x8c1c >> 2),
655 	0x00000000,
656 	(0x0e00 << 16) | (0x9700 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0xcd20 >> 2),
659 	0x00000000,
660 	(0x4e00 << 16) | (0xcd20 >> 2),
661 	0x00000000,
662 	(0x5e00 << 16) | (0xcd20 >> 2),
663 	0x00000000,
664 	(0x6e00 << 16) | (0xcd20 >> 2),
665 	0x00000000,
666 	(0x7e00 << 16) | (0xcd20 >> 2),
667 	0x00000000,
668 	(0x0e00 << 16) | (0x89bc >> 2),
669 	0x00000000,
670 	(0x0e00 << 16) | (0x8900 >> 2),
671 	0x00000000,
672 	0x3,
673 	(0x0e00 << 16) | (0xc130 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc134 >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc1fc >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc208 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc264 >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc268 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc26c >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc270 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0xc274 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc28c >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc290 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc294 >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0xc298 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0xc2a0 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0xc2a4 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0xc2a8 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0xc2ac >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x301d0 >> 2),
708 	0x00000000,
709 	(0x0e00 << 16) | (0x30238 >> 2),
710 	0x00000000,
711 	(0x0e00 << 16) | (0x30250 >> 2),
712 	0x00000000,
713 	(0x0e00 << 16) | (0x30254 >> 2),
714 	0x00000000,
715 	(0x0e00 << 16) | (0x30258 >> 2),
716 	0x00000000,
717 	(0x0e00 << 16) | (0x3025c >> 2),
718 	0x00000000,
719 	(0x4e00 << 16) | (0xc900 >> 2),
720 	0x00000000,
721 	(0x5e00 << 16) | (0xc900 >> 2),
722 	0x00000000,
723 	(0x6e00 << 16) | (0xc900 >> 2),
724 	0x00000000,
725 	(0x7e00 << 16) | (0xc900 >> 2),
726 	0x00000000,
727 	(0x4e00 << 16) | (0xc904 >> 2),
728 	0x00000000,
729 	(0x5e00 << 16) | (0xc904 >> 2),
730 	0x00000000,
731 	(0x6e00 << 16) | (0xc904 >> 2),
732 	0x00000000,
733 	(0x7e00 << 16) | (0xc904 >> 2),
734 	0x00000000,
735 	(0x4e00 << 16) | (0xc908 >> 2),
736 	0x00000000,
737 	(0x5e00 << 16) | (0xc908 >> 2),
738 	0x00000000,
739 	(0x6e00 << 16) | (0xc908 >> 2),
740 	0x00000000,
741 	(0x7e00 << 16) | (0xc908 >> 2),
742 	0x00000000,
743 	(0x4e00 << 16) | (0xc90c >> 2),
744 	0x00000000,
745 	(0x5e00 << 16) | (0xc90c >> 2),
746 	0x00000000,
747 	(0x6e00 << 16) | (0xc90c >> 2),
748 	0x00000000,
749 	(0x7e00 << 16) | (0xc90c >> 2),
750 	0x00000000,
751 	(0x4e00 << 16) | (0xc910 >> 2),
752 	0x00000000,
753 	(0x5e00 << 16) | (0xc910 >> 2),
754 	0x00000000,
755 	(0x6e00 << 16) | (0xc910 >> 2),
756 	0x00000000,
757 	(0x7e00 << 16) | (0xc910 >> 2),
758 	0x00000000,
759 	(0x0e00 << 16) | (0xc99c >> 2),
760 	0x00000000,
761 	(0x0e00 << 16) | (0x9834 >> 2),
762 	0x00000000,
763 	(0x0000 << 16) | (0x30f00 >> 2),
764 	0x00000000,
765 	(0x0000 << 16) | (0x30f04 >> 2),
766 	0x00000000,
767 	(0x0000 << 16) | (0x30f08 >> 2),
768 	0x00000000,
769 	(0x0000 << 16) | (0x30f0c >> 2),
770 	0x00000000,
771 	(0x0600 << 16) | (0x9b7c >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0x8a14 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0x8a18 >> 2),
776 	0x00000000,
777 	(0x0600 << 16) | (0x30a00 >> 2),
778 	0x00000000,
779 	(0x0e00 << 16) | (0x8bf0 >> 2),
780 	0x00000000,
781 	(0x0e00 << 16) | (0x8bcc >> 2),
782 	0x00000000,
783 	(0x0e00 << 16) | (0x8b24 >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0x30a04 >> 2),
786 	0x00000000,
787 	(0x0600 << 16) | (0x30a10 >> 2),
788 	0x00000000,
789 	(0x0600 << 16) | (0x30a14 >> 2),
790 	0x00000000,
791 	(0x0600 << 16) | (0x30a18 >> 2),
792 	0x00000000,
793 	(0x0600 << 16) | (0x30a2c >> 2),
794 	0x00000000,
795 	(0x0e00 << 16) | (0xc700 >> 2),
796 	0x00000000,
797 	(0x0e00 << 16) | (0xc704 >> 2),
798 	0x00000000,
799 	(0x0e00 << 16) | (0xc708 >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0xc768 >> 2),
802 	0x00000000,
803 	(0x0400 << 16) | (0xc770 >> 2),
804 	0x00000000,
805 	(0x0400 << 16) | (0xc774 >> 2),
806 	0x00000000,
807 	(0x0400 << 16) | (0xc798 >> 2),
808 	0x00000000,
809 	(0x0400 << 16) | (0xc79c >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x9100 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x3c010 >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0x8c00 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x8c04 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0x8c20 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0x8c38 >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0x8c3c >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xae00 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0x9604 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac08 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac0c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac10 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac14 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac58 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac68 >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xac6c >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xac70 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xac74 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xac78 >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0xac7c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0xac80 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0xac84 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0xac88 >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0xac8c >> 2),
858 	0x00000000,
859 	(0x0e00 << 16) | (0x970c >> 2),
860 	0x00000000,
861 	(0x0e00 << 16) | (0x9714 >> 2),
862 	0x00000000,
863 	(0x0e00 << 16) | (0x9718 >> 2),
864 	0x00000000,
865 	(0x0e00 << 16) | (0x971c >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0x31068 >> 2),
868 	0x00000000,
869 	(0x4e00 << 16) | (0x31068 >> 2),
870 	0x00000000,
871 	(0x5e00 << 16) | (0x31068 >> 2),
872 	0x00000000,
873 	(0x6e00 << 16) | (0x31068 >> 2),
874 	0x00000000,
875 	(0x7e00 << 16) | (0x31068 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0xcd10 >> 2),
878 	0x00000000,
879 	(0x0e00 << 16) | (0xcd14 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88b0 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x88b4 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x88b8 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x88bc >> 2),
888 	0x00000000,
889 	(0x0400 << 16) | (0x89c0 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x88c4 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x88c8 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x88d0 >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x88d4 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x88d8 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x8980 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x30938 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x3093c >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x30940 >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x89a0 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x30900 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x30904 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x89b4 >> 2),
916 	0x00000000,
917 	(0x0e00 << 16) | (0x3e1fc >> 2),
918 	0x00000000,
919 	(0x0e00 << 16) | (0x3c210 >> 2),
920 	0x00000000,
921 	(0x0e00 << 16) | (0x3c214 >> 2),
922 	0x00000000,
923 	(0x0e00 << 16) | (0x3c218 >> 2),
924 	0x00000000,
925 	(0x0e00 << 16) | (0x8904 >> 2),
926 	0x00000000,
927 	0x5,
928 	(0x0e00 << 16) | (0x8c28 >> 2),
929 	(0x0e00 << 16) | (0x8c2c >> 2),
930 	(0x0e00 << 16) | (0x8c30 >> 2),
931 	(0x0e00 << 16) | (0x8c34 >> 2),
932 	(0x0e00 << 16) | (0x9600 >> 2),
933 };
934 
935 static const u32 bonaire_golden_spm_registers[] =
936 {
937 	0x30800, 0xe0ffffff, 0xe0000000
938 };
939 
940 static const u32 bonaire_golden_common_registers[] =
941 {
942 	0xc770, 0xffffffff, 0x00000800,
943 	0xc774, 0xffffffff, 0x00000800,
944 	0xc798, 0xffffffff, 0x00007fbf,
945 	0xc79c, 0xffffffff, 0x00007faf
946 };
947 
948 static const u32 bonaire_golden_registers[] =
949 {
950 	0x3354, 0x00000333, 0x00000333,
951 	0x3350, 0x000c0fc0, 0x00040200,
952 	0x9a10, 0x00010000, 0x00058208,
953 	0x3c000, 0xffff1fff, 0x00140000,
954 	0x3c200, 0xfdfc0fff, 0x00000100,
955 	0x3c234, 0x40000000, 0x40000200,
956 	0x9830, 0xffffffff, 0x00000000,
957 	0x9834, 0xf00fffff, 0x00000400,
958 	0x9838, 0x0002021c, 0x00020200,
959 	0xc78, 0x00000080, 0x00000000,
960 	0x5bb0, 0x000000f0, 0x00000070,
961 	0x5bc0, 0xf0311fff, 0x80300000,
962 	0x98f8, 0x73773777, 0x12010001,
963 	0x350c, 0x00810000, 0x408af000,
964 	0x7030, 0x31000111, 0x00000011,
965 	0x2f48, 0x73773777, 0x12010001,
966 	0x220c, 0x00007fb6, 0x0021a1b1,
967 	0x2210, 0x00007fb6, 0x002021b1,
968 	0x2180, 0x00007fb6, 0x00002191,
969 	0x2218, 0x00007fb6, 0x002121b1,
970 	0x221c, 0x00007fb6, 0x002021b1,
971 	0x21dc, 0x00007fb6, 0x00002191,
972 	0x21e0, 0x00007fb6, 0x00002191,
973 	0x3628, 0x0000003f, 0x0000000a,
974 	0x362c, 0x0000003f, 0x0000000a,
975 	0x2ae4, 0x00073ffe, 0x000022a2,
976 	0x240c, 0x000007ff, 0x00000000,
977 	0x8a14, 0xf000003f, 0x00000007,
978 	0x8bf0, 0x00002001, 0x00000001,
979 	0x8b24, 0xffffffff, 0x00ffffff,
980 	0x30a04, 0x0000ff0f, 0x00000000,
981 	0x28a4c, 0x07ffffff, 0x06000000,
982 	0x4d8, 0x00000fff, 0x00000100,
983 	0x3e78, 0x00000001, 0x00000002,
984 	0x9100, 0x03000000, 0x0362c688,
985 	0x8c00, 0x000000ff, 0x00000001,
986 	0xe40, 0x00001fff, 0x00001fff,
987 	0x9060, 0x0000007f, 0x00000020,
988 	0x9508, 0x00010000, 0x00010000,
989 	0xac14, 0x000003ff, 0x000000f3,
990 	0xac0c, 0xffffffff, 0x00001032
991 };
992 
993 static const u32 bonaire_mgcg_cgcg_init[] =
994 {
995 	0xc420, 0xffffffff, 0xfffffffc,
996 	0x30800, 0xffffffff, 0xe0000000,
997 	0x3c2a0, 0xffffffff, 0x00000100,
998 	0x3c208, 0xffffffff, 0x00000100,
999 	0x3c2c0, 0xffffffff, 0xc0000100,
1000 	0x3c2c8, 0xffffffff, 0xc0000100,
1001 	0x3c2c4, 0xffffffff, 0xc0000100,
1002 	0x55e4, 0xffffffff, 0x00600100,
1003 	0x3c280, 0xffffffff, 0x00000100,
1004 	0x3c214, 0xffffffff, 0x06000100,
1005 	0x3c220, 0xffffffff, 0x00000100,
1006 	0x3c218, 0xffffffff, 0x06000100,
1007 	0x3c204, 0xffffffff, 0x00000100,
1008 	0x3c2e0, 0xffffffff, 0x00000100,
1009 	0x3c224, 0xffffffff, 0x00000100,
1010 	0x3c200, 0xffffffff, 0x00000100,
1011 	0x3c230, 0xffffffff, 0x00000100,
1012 	0x3c234, 0xffffffff, 0x00000100,
1013 	0x3c250, 0xffffffff, 0x00000100,
1014 	0x3c254, 0xffffffff, 0x00000100,
1015 	0x3c258, 0xffffffff, 0x00000100,
1016 	0x3c25c, 0xffffffff, 0x00000100,
1017 	0x3c260, 0xffffffff, 0x00000100,
1018 	0x3c27c, 0xffffffff, 0x00000100,
1019 	0x3c278, 0xffffffff, 0x00000100,
1020 	0x3c210, 0xffffffff, 0x06000100,
1021 	0x3c290, 0xffffffff, 0x00000100,
1022 	0x3c274, 0xffffffff, 0x00000100,
1023 	0x3c2b4, 0xffffffff, 0x00000100,
1024 	0x3c2b0, 0xffffffff, 0x00000100,
1025 	0x3c270, 0xffffffff, 0x00000100,
1026 	0x30800, 0xffffffff, 0xe0000000,
1027 	0x3c020, 0xffffffff, 0x00010000,
1028 	0x3c024, 0xffffffff, 0x00030002,
1029 	0x3c028, 0xffffffff, 0x00040007,
1030 	0x3c02c, 0xffffffff, 0x00060005,
1031 	0x3c030, 0xffffffff, 0x00090008,
1032 	0x3c034, 0xffffffff, 0x00010000,
1033 	0x3c038, 0xffffffff, 0x00030002,
1034 	0x3c03c, 0xffffffff, 0x00040007,
1035 	0x3c040, 0xffffffff, 0x00060005,
1036 	0x3c044, 0xffffffff, 0x00090008,
1037 	0x3c048, 0xffffffff, 0x00010000,
1038 	0x3c04c, 0xffffffff, 0x00030002,
1039 	0x3c050, 0xffffffff, 0x00040007,
1040 	0x3c054, 0xffffffff, 0x00060005,
1041 	0x3c058, 0xffffffff, 0x00090008,
1042 	0x3c05c, 0xffffffff, 0x00010000,
1043 	0x3c060, 0xffffffff, 0x00030002,
1044 	0x3c064, 0xffffffff, 0x00040007,
1045 	0x3c068, 0xffffffff, 0x00060005,
1046 	0x3c06c, 0xffffffff, 0x00090008,
1047 	0x3c070, 0xffffffff, 0x00010000,
1048 	0x3c074, 0xffffffff, 0x00030002,
1049 	0x3c078, 0xffffffff, 0x00040007,
1050 	0x3c07c, 0xffffffff, 0x00060005,
1051 	0x3c080, 0xffffffff, 0x00090008,
1052 	0x3c084, 0xffffffff, 0x00010000,
1053 	0x3c088, 0xffffffff, 0x00030002,
1054 	0x3c08c, 0xffffffff, 0x00040007,
1055 	0x3c090, 0xffffffff, 0x00060005,
1056 	0x3c094, 0xffffffff, 0x00090008,
1057 	0x3c098, 0xffffffff, 0x00010000,
1058 	0x3c09c, 0xffffffff, 0x00030002,
1059 	0x3c0a0, 0xffffffff, 0x00040007,
1060 	0x3c0a4, 0xffffffff, 0x00060005,
1061 	0x3c0a8, 0xffffffff, 0x00090008,
1062 	0x3c000, 0xffffffff, 0x96e00200,
1063 	0x8708, 0xffffffff, 0x00900100,
1064 	0xc424, 0xffffffff, 0x0020003f,
1065 	0x38, 0xffffffff, 0x0140001c,
1066 	0x3c, 0x000f0000, 0x000f0000,
1067 	0x220, 0xffffffff, 0xC060000C,
1068 	0x224, 0xc0000fff, 0x00000100,
1069 	0xf90, 0xffffffff, 0x00000100,
1070 	0xf98, 0x00000101, 0x00000000,
1071 	0x20a8, 0xffffffff, 0x00000104,
1072 	0x55e4, 0xff000fff, 0x00000100,
1073 	0x30cc, 0xc0000fff, 0x00000104,
1074 	0xc1e4, 0x00000001, 0x00000001,
1075 	0xd00c, 0xff000ff0, 0x00000100,
1076 	0xd80c, 0xff000ff0, 0x00000100
1077 };
1078 
1079 static const u32 spectre_golden_spm_registers[] =
1080 {
1081 	0x30800, 0xe0ffffff, 0xe0000000
1082 };
1083 
1084 static const u32 spectre_golden_common_registers[] =
1085 {
1086 	0xc770, 0xffffffff, 0x00000800,
1087 	0xc774, 0xffffffff, 0x00000800,
1088 	0xc798, 0xffffffff, 0x00007fbf,
1089 	0xc79c, 0xffffffff, 0x00007faf
1090 };
1091 
1092 static const u32 spectre_golden_registers[] =
1093 {
1094 	0x3c000, 0xffff1fff, 0x96940200,
1095 	0x3c00c, 0xffff0001, 0xff000000,
1096 	0x3c200, 0xfffc0fff, 0x00000100,
1097 	0x6ed8, 0x00010101, 0x00010000,
1098 	0x9834, 0xf00fffff, 0x00000400,
1099 	0x9838, 0xfffffffc, 0x00020200,
1100 	0x5bb0, 0x000000f0, 0x00000070,
1101 	0x5bc0, 0xf0311fff, 0x80300000,
1102 	0x98f8, 0x73773777, 0x12010001,
1103 	0x9b7c, 0x00ff0000, 0x00fc0000,
1104 	0x2f48, 0x73773777, 0x12010001,
1105 	0x8a14, 0xf000003f, 0x00000007,
1106 	0x8b24, 0xffffffff, 0x00ffffff,
1107 	0x28350, 0x3f3f3fff, 0x00000082,
1108 	0x28354, 0x0000003f, 0x00000000,
1109 	0x3e78, 0x00000001, 0x00000002,
1110 	0x913c, 0xffff03df, 0x00000004,
1111 	0xc768, 0x00000008, 0x00000008,
1112 	0x8c00, 0x000008ff, 0x00000800,
1113 	0x9508, 0x00010000, 0x00010000,
1114 	0xac0c, 0xffffffff, 0x54763210,
1115 	0x214f8, 0x01ff01ff, 0x00000002,
1116 	0x21498, 0x007ff800, 0x00200000,
1117 	0x2015c, 0xffffffff, 0x00000f40,
1118 	0x30934, 0xffffffff, 0x00000001
1119 };
1120 
1121 static const u32 spectre_mgcg_cgcg_init[] =
1122 {
1123 	0xc420, 0xffffffff, 0xfffffffc,
1124 	0x30800, 0xffffffff, 0xe0000000,
1125 	0x3c2a0, 0xffffffff, 0x00000100,
1126 	0x3c208, 0xffffffff, 0x00000100,
1127 	0x3c2c0, 0xffffffff, 0x00000100,
1128 	0x3c2c8, 0xffffffff, 0x00000100,
1129 	0x3c2c4, 0xffffffff, 0x00000100,
1130 	0x55e4, 0xffffffff, 0x00600100,
1131 	0x3c280, 0xffffffff, 0x00000100,
1132 	0x3c214, 0xffffffff, 0x06000100,
1133 	0x3c220, 0xffffffff, 0x00000100,
1134 	0x3c218, 0xffffffff, 0x06000100,
1135 	0x3c204, 0xffffffff, 0x00000100,
1136 	0x3c2e0, 0xffffffff, 0x00000100,
1137 	0x3c224, 0xffffffff, 0x00000100,
1138 	0x3c200, 0xffffffff, 0x00000100,
1139 	0x3c230, 0xffffffff, 0x00000100,
1140 	0x3c234, 0xffffffff, 0x00000100,
1141 	0x3c250, 0xffffffff, 0x00000100,
1142 	0x3c254, 0xffffffff, 0x00000100,
1143 	0x3c258, 0xffffffff, 0x00000100,
1144 	0x3c25c, 0xffffffff, 0x00000100,
1145 	0x3c260, 0xffffffff, 0x00000100,
1146 	0x3c27c, 0xffffffff, 0x00000100,
1147 	0x3c278, 0xffffffff, 0x00000100,
1148 	0x3c210, 0xffffffff, 0x06000100,
1149 	0x3c290, 0xffffffff, 0x00000100,
1150 	0x3c274, 0xffffffff, 0x00000100,
1151 	0x3c2b4, 0xffffffff, 0x00000100,
1152 	0x3c2b0, 0xffffffff, 0x00000100,
1153 	0x3c270, 0xffffffff, 0x00000100,
1154 	0x30800, 0xffffffff, 0xe0000000,
1155 	0x3c020, 0xffffffff, 0x00010000,
1156 	0x3c024, 0xffffffff, 0x00030002,
1157 	0x3c028, 0xffffffff, 0x00040007,
1158 	0x3c02c, 0xffffffff, 0x00060005,
1159 	0x3c030, 0xffffffff, 0x00090008,
1160 	0x3c034, 0xffffffff, 0x00010000,
1161 	0x3c038, 0xffffffff, 0x00030002,
1162 	0x3c03c, 0xffffffff, 0x00040007,
1163 	0x3c040, 0xffffffff, 0x00060005,
1164 	0x3c044, 0xffffffff, 0x00090008,
1165 	0x3c048, 0xffffffff, 0x00010000,
1166 	0x3c04c, 0xffffffff, 0x00030002,
1167 	0x3c050, 0xffffffff, 0x00040007,
1168 	0x3c054, 0xffffffff, 0x00060005,
1169 	0x3c058, 0xffffffff, 0x00090008,
1170 	0x3c05c, 0xffffffff, 0x00010000,
1171 	0x3c060, 0xffffffff, 0x00030002,
1172 	0x3c064, 0xffffffff, 0x00040007,
1173 	0x3c068, 0xffffffff, 0x00060005,
1174 	0x3c06c, 0xffffffff, 0x00090008,
1175 	0x3c070, 0xffffffff, 0x00010000,
1176 	0x3c074, 0xffffffff, 0x00030002,
1177 	0x3c078, 0xffffffff, 0x00040007,
1178 	0x3c07c, 0xffffffff, 0x00060005,
1179 	0x3c080, 0xffffffff, 0x00090008,
1180 	0x3c084, 0xffffffff, 0x00010000,
1181 	0x3c088, 0xffffffff, 0x00030002,
1182 	0x3c08c, 0xffffffff, 0x00040007,
1183 	0x3c090, 0xffffffff, 0x00060005,
1184 	0x3c094, 0xffffffff, 0x00090008,
1185 	0x3c098, 0xffffffff, 0x00010000,
1186 	0x3c09c, 0xffffffff, 0x00030002,
1187 	0x3c0a0, 0xffffffff, 0x00040007,
1188 	0x3c0a4, 0xffffffff, 0x00060005,
1189 	0x3c0a8, 0xffffffff, 0x00090008,
1190 	0x3c0ac, 0xffffffff, 0x00010000,
1191 	0x3c0b0, 0xffffffff, 0x00030002,
1192 	0x3c0b4, 0xffffffff, 0x00040007,
1193 	0x3c0b8, 0xffffffff, 0x00060005,
1194 	0x3c0bc, 0xffffffff, 0x00090008,
1195 	0x3c000, 0xffffffff, 0x96e00200,
1196 	0x8708, 0xffffffff, 0x00900100,
1197 	0xc424, 0xffffffff, 0x0020003f,
1198 	0x38, 0xffffffff, 0x0140001c,
1199 	0x3c, 0x000f0000, 0x000f0000,
1200 	0x220, 0xffffffff, 0xC060000C,
1201 	0x224, 0xc0000fff, 0x00000100,
1202 	0xf90, 0xffffffff, 0x00000100,
1203 	0xf98, 0x00000101, 0x00000000,
1204 	0x20a8, 0xffffffff, 0x00000104,
1205 	0x55e4, 0xff000fff, 0x00000100,
1206 	0x30cc, 0xc0000fff, 0x00000104,
1207 	0xc1e4, 0x00000001, 0x00000001,
1208 	0xd00c, 0xff000ff0, 0x00000100,
1209 	0xd80c, 0xff000ff0, 0x00000100
1210 };
1211 
1212 static const u32 kalindi_golden_spm_registers[] =
1213 {
1214 	0x30800, 0xe0ffffff, 0xe0000000
1215 };
1216 
1217 static const u32 kalindi_golden_common_registers[] =
1218 {
1219 	0xc770, 0xffffffff, 0x00000800,
1220 	0xc774, 0xffffffff, 0x00000800,
1221 	0xc798, 0xffffffff, 0x00007fbf,
1222 	0xc79c, 0xffffffff, 0x00007faf
1223 };
1224 
1225 static const u32 kalindi_golden_registers[] =
1226 {
1227 	0x3c000, 0xffffdfff, 0x6e944040,
1228 	0x55e4, 0xff607fff, 0xfc000100,
1229 	0x3c220, 0xff000fff, 0x00000100,
1230 	0x3c224, 0xff000fff, 0x00000100,
1231 	0x3c200, 0xfffc0fff, 0x00000100,
1232 	0x6ed8, 0x00010101, 0x00010000,
1233 	0x9830, 0xffffffff, 0x00000000,
1234 	0x9834, 0xf00fffff, 0x00000400,
1235 	0x5bb0, 0x000000f0, 0x00000070,
1236 	0x5bc0, 0xf0311fff, 0x80300000,
1237 	0x98f8, 0x73773777, 0x12010001,
1238 	0x98fc, 0xffffffff, 0x00000010,
1239 	0x9b7c, 0x00ff0000, 0x00fc0000,
1240 	0x8030, 0x00001f0f, 0x0000100a,
1241 	0x2f48, 0x73773777, 0x12010001,
1242 	0x2408, 0x000fffff, 0x000c007f,
1243 	0x8a14, 0xf000003f, 0x00000007,
1244 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1245 	0x30a04, 0x0000ff0f, 0x00000000,
1246 	0x28a4c, 0x07ffffff, 0x06000000,
1247 	0x4d8, 0x00000fff, 0x00000100,
1248 	0x3e78, 0x00000001, 0x00000002,
1249 	0xc768, 0x00000008, 0x00000008,
1250 	0x8c00, 0x000000ff, 0x00000003,
1251 	0x214f8, 0x01ff01ff, 0x00000002,
1252 	0x21498, 0x007ff800, 0x00200000,
1253 	0x2015c, 0xffffffff, 0x00000f40,
1254 	0x88c4, 0x001f3ae3, 0x00000082,
1255 	0x88d4, 0x0000001f, 0x00000010,
1256 	0x30934, 0xffffffff, 0x00000000
1257 };
1258 
1259 static const u32 kalindi_mgcg_cgcg_init[] =
1260 {
1261 	0xc420, 0xffffffff, 0xfffffffc,
1262 	0x30800, 0xffffffff, 0xe0000000,
1263 	0x3c2a0, 0xffffffff, 0x00000100,
1264 	0x3c208, 0xffffffff, 0x00000100,
1265 	0x3c2c0, 0xffffffff, 0x00000100,
1266 	0x3c2c8, 0xffffffff, 0x00000100,
1267 	0x3c2c4, 0xffffffff, 0x00000100,
1268 	0x55e4, 0xffffffff, 0x00600100,
1269 	0x3c280, 0xffffffff, 0x00000100,
1270 	0x3c214, 0xffffffff, 0x06000100,
1271 	0x3c220, 0xffffffff, 0x00000100,
1272 	0x3c218, 0xffffffff, 0x06000100,
1273 	0x3c204, 0xffffffff, 0x00000100,
1274 	0x3c2e0, 0xffffffff, 0x00000100,
1275 	0x3c224, 0xffffffff, 0x00000100,
1276 	0x3c200, 0xffffffff, 0x00000100,
1277 	0x3c230, 0xffffffff, 0x00000100,
1278 	0x3c234, 0xffffffff, 0x00000100,
1279 	0x3c250, 0xffffffff, 0x00000100,
1280 	0x3c254, 0xffffffff, 0x00000100,
1281 	0x3c258, 0xffffffff, 0x00000100,
1282 	0x3c25c, 0xffffffff, 0x00000100,
1283 	0x3c260, 0xffffffff, 0x00000100,
1284 	0x3c27c, 0xffffffff, 0x00000100,
1285 	0x3c278, 0xffffffff, 0x00000100,
1286 	0x3c210, 0xffffffff, 0x06000100,
1287 	0x3c290, 0xffffffff, 0x00000100,
1288 	0x3c274, 0xffffffff, 0x00000100,
1289 	0x3c2b4, 0xffffffff, 0x00000100,
1290 	0x3c2b0, 0xffffffff, 0x00000100,
1291 	0x3c270, 0xffffffff, 0x00000100,
1292 	0x30800, 0xffffffff, 0xe0000000,
1293 	0x3c020, 0xffffffff, 0x00010000,
1294 	0x3c024, 0xffffffff, 0x00030002,
1295 	0x3c028, 0xffffffff, 0x00040007,
1296 	0x3c02c, 0xffffffff, 0x00060005,
1297 	0x3c030, 0xffffffff, 0x00090008,
1298 	0x3c034, 0xffffffff, 0x00010000,
1299 	0x3c038, 0xffffffff, 0x00030002,
1300 	0x3c03c, 0xffffffff, 0x00040007,
1301 	0x3c040, 0xffffffff, 0x00060005,
1302 	0x3c044, 0xffffffff, 0x00090008,
1303 	0x3c000, 0xffffffff, 0x96e00200,
1304 	0x8708, 0xffffffff, 0x00900100,
1305 	0xc424, 0xffffffff, 0x0020003f,
1306 	0x38, 0xffffffff, 0x0140001c,
1307 	0x3c, 0x000f0000, 0x000f0000,
1308 	0x220, 0xffffffff, 0xC060000C,
1309 	0x224, 0xc0000fff, 0x00000100,
1310 	0x20a8, 0xffffffff, 0x00000104,
1311 	0x55e4, 0xff000fff, 0x00000100,
1312 	0x30cc, 0xc0000fff, 0x00000104,
1313 	0xc1e4, 0x00000001, 0x00000001,
1314 	0xd00c, 0xff000ff0, 0x00000100,
1315 	0xd80c, 0xff000ff0, 0x00000100
1316 };
1317 
1318 static const u32 hawaii_golden_spm_registers[] =
1319 {
1320 	0x30800, 0xe0ffffff, 0xe0000000
1321 };
1322 
1323 static const u32 hawaii_golden_common_registers[] =
1324 {
1325 	0x30800, 0xffffffff, 0xe0000000,
1326 	0x28350, 0xffffffff, 0x3a00161a,
1327 	0x28354, 0xffffffff, 0x0000002e,
1328 	0x9a10, 0xffffffff, 0x00018208,
1329 	0x98f8, 0xffffffff, 0x12011003
1330 };
1331 
1332 static const u32 hawaii_golden_registers[] =
1333 {
1334 	0x3354, 0x00000333, 0x00000333,
1335 	0x9a10, 0x00010000, 0x00058208,
1336 	0x9830, 0xffffffff, 0x00000000,
1337 	0x9834, 0xf00fffff, 0x00000400,
1338 	0x9838, 0x0002021c, 0x00020200,
1339 	0xc78, 0x00000080, 0x00000000,
1340 	0x5bb0, 0x000000f0, 0x00000070,
1341 	0x5bc0, 0xf0311fff, 0x80300000,
1342 	0x350c, 0x00810000, 0x408af000,
1343 	0x7030, 0x31000111, 0x00000011,
1344 	0x2f48, 0x73773777, 0x12010001,
1345 	0x2120, 0x0000007f, 0x0000001b,
1346 	0x21dc, 0x00007fb6, 0x00002191,
1347 	0x3628, 0x0000003f, 0x0000000a,
1348 	0x362c, 0x0000003f, 0x0000000a,
1349 	0x2ae4, 0x00073ffe, 0x000022a2,
1350 	0x240c, 0x000007ff, 0x00000000,
1351 	0x8bf0, 0x00002001, 0x00000001,
1352 	0x8b24, 0xffffffff, 0x00ffffff,
1353 	0x30a04, 0x0000ff0f, 0x00000000,
1354 	0x28a4c, 0x07ffffff, 0x06000000,
1355 	0x3e78, 0x00000001, 0x00000002,
1356 	0xc768, 0x00000008, 0x00000008,
1357 	0xc770, 0x00000f00, 0x00000800,
1358 	0xc774, 0x00000f00, 0x00000800,
1359 	0xc798, 0x00ffffff, 0x00ff7fbf,
1360 	0xc79c, 0x00ffffff, 0x00ff7faf,
1361 	0x8c00, 0x000000ff, 0x00000800,
1362 	0xe40, 0x00001fff, 0x00001fff,
1363 	0x9060, 0x0000007f, 0x00000020,
1364 	0x9508, 0x00010000, 0x00010000,
1365 	0xae00, 0x00100000, 0x000ff07c,
1366 	0xac14, 0x000003ff, 0x0000000f,
1367 	0xac10, 0xffffffff, 0x7564fdec,
1368 	0xac0c, 0xffffffff, 0x3120b9a8,
1369 	0xac08, 0x20000000, 0x0f9c0000
1370 };
1371 
1372 static const u32 hawaii_mgcg_cgcg_init[] =
1373 {
1374 	0xc420, 0xffffffff, 0xfffffffd,
1375 	0x30800, 0xffffffff, 0xe0000000,
1376 	0x3c2a0, 0xffffffff, 0x00000100,
1377 	0x3c208, 0xffffffff, 0x00000100,
1378 	0x3c2c0, 0xffffffff, 0x00000100,
1379 	0x3c2c8, 0xffffffff, 0x00000100,
1380 	0x3c2c4, 0xffffffff, 0x00000100,
1381 	0x55e4, 0xffffffff, 0x00200100,
1382 	0x3c280, 0xffffffff, 0x00000100,
1383 	0x3c214, 0xffffffff, 0x06000100,
1384 	0x3c220, 0xffffffff, 0x00000100,
1385 	0x3c218, 0xffffffff, 0x06000100,
1386 	0x3c204, 0xffffffff, 0x00000100,
1387 	0x3c2e0, 0xffffffff, 0x00000100,
1388 	0x3c224, 0xffffffff, 0x00000100,
1389 	0x3c200, 0xffffffff, 0x00000100,
1390 	0x3c230, 0xffffffff, 0x00000100,
1391 	0x3c234, 0xffffffff, 0x00000100,
1392 	0x3c250, 0xffffffff, 0x00000100,
1393 	0x3c254, 0xffffffff, 0x00000100,
1394 	0x3c258, 0xffffffff, 0x00000100,
1395 	0x3c25c, 0xffffffff, 0x00000100,
1396 	0x3c260, 0xffffffff, 0x00000100,
1397 	0x3c27c, 0xffffffff, 0x00000100,
1398 	0x3c278, 0xffffffff, 0x00000100,
1399 	0x3c210, 0xffffffff, 0x06000100,
1400 	0x3c290, 0xffffffff, 0x00000100,
1401 	0x3c274, 0xffffffff, 0x00000100,
1402 	0x3c2b4, 0xffffffff, 0x00000100,
1403 	0x3c2b0, 0xffffffff, 0x00000100,
1404 	0x3c270, 0xffffffff, 0x00000100,
1405 	0x30800, 0xffffffff, 0xe0000000,
1406 	0x3c020, 0xffffffff, 0x00010000,
1407 	0x3c024, 0xffffffff, 0x00030002,
1408 	0x3c028, 0xffffffff, 0x00040007,
1409 	0x3c02c, 0xffffffff, 0x00060005,
1410 	0x3c030, 0xffffffff, 0x00090008,
1411 	0x3c034, 0xffffffff, 0x00010000,
1412 	0x3c038, 0xffffffff, 0x00030002,
1413 	0x3c03c, 0xffffffff, 0x00040007,
1414 	0x3c040, 0xffffffff, 0x00060005,
1415 	0x3c044, 0xffffffff, 0x00090008,
1416 	0x3c048, 0xffffffff, 0x00010000,
1417 	0x3c04c, 0xffffffff, 0x00030002,
1418 	0x3c050, 0xffffffff, 0x00040007,
1419 	0x3c054, 0xffffffff, 0x00060005,
1420 	0x3c058, 0xffffffff, 0x00090008,
1421 	0x3c05c, 0xffffffff, 0x00010000,
1422 	0x3c060, 0xffffffff, 0x00030002,
1423 	0x3c064, 0xffffffff, 0x00040007,
1424 	0x3c068, 0xffffffff, 0x00060005,
1425 	0x3c06c, 0xffffffff, 0x00090008,
1426 	0x3c070, 0xffffffff, 0x00010000,
1427 	0x3c074, 0xffffffff, 0x00030002,
1428 	0x3c078, 0xffffffff, 0x00040007,
1429 	0x3c07c, 0xffffffff, 0x00060005,
1430 	0x3c080, 0xffffffff, 0x00090008,
1431 	0x3c084, 0xffffffff, 0x00010000,
1432 	0x3c088, 0xffffffff, 0x00030002,
1433 	0x3c08c, 0xffffffff, 0x00040007,
1434 	0x3c090, 0xffffffff, 0x00060005,
1435 	0x3c094, 0xffffffff, 0x00090008,
1436 	0x3c098, 0xffffffff, 0x00010000,
1437 	0x3c09c, 0xffffffff, 0x00030002,
1438 	0x3c0a0, 0xffffffff, 0x00040007,
1439 	0x3c0a4, 0xffffffff, 0x00060005,
1440 	0x3c0a8, 0xffffffff, 0x00090008,
1441 	0x3c0ac, 0xffffffff, 0x00010000,
1442 	0x3c0b0, 0xffffffff, 0x00030002,
1443 	0x3c0b4, 0xffffffff, 0x00040007,
1444 	0x3c0b8, 0xffffffff, 0x00060005,
1445 	0x3c0bc, 0xffffffff, 0x00090008,
1446 	0x3c0c0, 0xffffffff, 0x00010000,
1447 	0x3c0c4, 0xffffffff, 0x00030002,
1448 	0x3c0c8, 0xffffffff, 0x00040007,
1449 	0x3c0cc, 0xffffffff, 0x00060005,
1450 	0x3c0d0, 0xffffffff, 0x00090008,
1451 	0x3c0d4, 0xffffffff, 0x00010000,
1452 	0x3c0d8, 0xffffffff, 0x00030002,
1453 	0x3c0dc, 0xffffffff, 0x00040007,
1454 	0x3c0e0, 0xffffffff, 0x00060005,
1455 	0x3c0e4, 0xffffffff, 0x00090008,
1456 	0x3c0e8, 0xffffffff, 0x00010000,
1457 	0x3c0ec, 0xffffffff, 0x00030002,
1458 	0x3c0f0, 0xffffffff, 0x00040007,
1459 	0x3c0f4, 0xffffffff, 0x00060005,
1460 	0x3c0f8, 0xffffffff, 0x00090008,
1461 	0xc318, 0xffffffff, 0x00020200,
1462 	0x3350, 0xffffffff, 0x00000200,
1463 	0x15c0, 0xffffffff, 0x00000400,
1464 	0x55e8, 0xffffffff, 0x00000000,
1465 	0x2f50, 0xffffffff, 0x00000902,
1466 	0x3c000, 0xffffffff, 0x96940200,
1467 	0x8708, 0xffffffff, 0x00900100,
1468 	0xc424, 0xffffffff, 0x0020003f,
1469 	0x38, 0xffffffff, 0x0140001c,
1470 	0x3c, 0x000f0000, 0x000f0000,
1471 	0x220, 0xffffffff, 0xc060000c,
1472 	0x224, 0xc0000fff, 0x00000100,
1473 	0xf90, 0xffffffff, 0x00000100,
1474 	0xf98, 0x00000101, 0x00000000,
1475 	0x20a8, 0xffffffff, 0x00000104,
1476 	0x55e4, 0xff000fff, 0x00000100,
1477 	0x30cc, 0xc0000fff, 0x00000104,
1478 	0xc1e4, 0x00000001, 0x00000001,
1479 	0xd00c, 0xff000ff0, 0x00000100,
1480 	0xd80c, 0xff000ff0, 0x00000100
1481 };
1482 
1483 static const u32 godavari_golden_registers[] =
1484 {
1485 	0x55e4, 0xff607fff, 0xfc000100,
1486 	0x6ed8, 0x00010101, 0x00010000,
1487 	0x9830, 0xffffffff, 0x00000000,
1488 	0x98302, 0xf00fffff, 0x00000400,
1489 	0x6130, 0xffffffff, 0x00010000,
1490 	0x5bb0, 0x000000f0, 0x00000070,
1491 	0x5bc0, 0xf0311fff, 0x80300000,
1492 	0x98f8, 0x73773777, 0x12010001,
1493 	0x98fc, 0xffffffff, 0x00000010,
1494 	0x8030, 0x00001f0f, 0x0000100a,
1495 	0x2f48, 0x73773777, 0x12010001,
1496 	0x2408, 0x000fffff, 0x000c007f,
1497 	0x8a14, 0xf000003f, 0x00000007,
1498 	0x8b24, 0xffffffff, 0x00ff0fff,
1499 	0x30a04, 0x0000ff0f, 0x00000000,
1500 	0x28a4c, 0x07ffffff, 0x06000000,
1501 	0x4d8, 0x00000fff, 0x00000100,
1502 	0xd014, 0x00010000, 0x00810001,
1503 	0xd814, 0x00010000, 0x00810001,
1504 	0x3e78, 0x00000001, 0x00000002,
1505 	0xc768, 0x00000008, 0x00000008,
1506 	0xc770, 0x00000f00, 0x00000800,
1507 	0xc774, 0x00000f00, 0x00000800,
1508 	0xc798, 0x00ffffff, 0x00ff7fbf,
1509 	0xc79c, 0x00ffffff, 0x00ff7faf,
1510 	0x8c00, 0x000000ff, 0x00000001,
1511 	0x214f8, 0x01ff01ff, 0x00000002,
1512 	0x21498, 0x007ff800, 0x00200000,
1513 	0x2015c, 0xffffffff, 0x00000f40,
1514 	0x88c4, 0x001f3ae3, 0x00000082,
1515 	0x88d4, 0x0000001f, 0x00000010,
1516 	0x30934, 0xffffffff, 0x00000000
1517 };
1518 
1519 
1520 static void cik_init_golden_registers(struct radeon_device *rdev)
1521 {
1522 	switch (rdev->family) {
1523 	case CHIP_BONAIRE:
1524 		radeon_program_register_sequence(rdev,
1525 						 bonaire_mgcg_cgcg_init,
1526 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1527 		radeon_program_register_sequence(rdev,
1528 						 bonaire_golden_registers,
1529 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1530 		radeon_program_register_sequence(rdev,
1531 						 bonaire_golden_common_registers,
1532 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1533 		radeon_program_register_sequence(rdev,
1534 						 bonaire_golden_spm_registers,
1535 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1536 		break;
1537 	case CHIP_KABINI:
1538 		radeon_program_register_sequence(rdev,
1539 						 kalindi_mgcg_cgcg_init,
1540 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1541 		radeon_program_register_sequence(rdev,
1542 						 kalindi_golden_registers,
1543 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1544 		radeon_program_register_sequence(rdev,
1545 						 kalindi_golden_common_registers,
1546 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1547 		radeon_program_register_sequence(rdev,
1548 						 kalindi_golden_spm_registers,
1549 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1550 		break;
1551 	case CHIP_MULLINS:
1552 		radeon_program_register_sequence(rdev,
1553 						 kalindi_mgcg_cgcg_init,
1554 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1555 		radeon_program_register_sequence(rdev,
1556 						 godavari_golden_registers,
1557 						 (const u32)ARRAY_SIZE(godavari_golden_registers));
1558 		radeon_program_register_sequence(rdev,
1559 						 kalindi_golden_common_registers,
1560 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1561 		radeon_program_register_sequence(rdev,
1562 						 kalindi_golden_spm_registers,
1563 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1564 		break;
1565 	case CHIP_KAVERI:
1566 		radeon_program_register_sequence(rdev,
1567 						 spectre_mgcg_cgcg_init,
1568 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1569 		radeon_program_register_sequence(rdev,
1570 						 spectre_golden_registers,
1571 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1572 		radeon_program_register_sequence(rdev,
1573 						 spectre_golden_common_registers,
1574 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1575 		radeon_program_register_sequence(rdev,
1576 						 spectre_golden_spm_registers,
1577 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1578 		break;
1579 	case CHIP_HAWAII:
1580 		radeon_program_register_sequence(rdev,
1581 						 hawaii_mgcg_cgcg_init,
1582 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1583 		radeon_program_register_sequence(rdev,
1584 						 hawaii_golden_registers,
1585 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1586 		radeon_program_register_sequence(rdev,
1587 						 hawaii_golden_common_registers,
1588 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1589 		radeon_program_register_sequence(rdev,
1590 						 hawaii_golden_spm_registers,
1591 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1592 		break;
1593 	default:
1594 		break;
1595 	}
1596 }
1597 
1598 /**
1599  * cik_get_xclk - get the xclk
1600  *
1601  * @rdev: radeon_device pointer
1602  *
1603  * Returns the reference clock used by the gfx engine
1604  * (CIK).
1605  */
1606 u32 cik_get_xclk(struct radeon_device *rdev)
1607 {
1608         u32 reference_clock = rdev->clock.spll.reference_freq;
1609 
1610 	if (rdev->flags & RADEON_IS_IGP) {
1611 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1612 			return reference_clock / 2;
1613 	} else {
1614 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1615 			return reference_clock / 4;
1616 	}
1617 	return reference_clock;
1618 }
1619 
1620 /**
1621  * cik_mm_rdoorbell - read a doorbell dword
1622  *
1623  * @rdev: radeon_device pointer
1624  * @index: doorbell index
1625  *
1626  * Returns the value in the doorbell aperture at the
1627  * requested doorbell index (CIK).
1628  */
1629 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1630 {
1631 	if (index < rdev->doorbell.num_doorbells) {
1632 		return readl(rdev->doorbell.ptr + index);
1633 	} else {
1634 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1635 		return 0;
1636 	}
1637 }
1638 
1639 /**
1640  * cik_mm_wdoorbell - write a doorbell dword
1641  *
1642  * @rdev: radeon_device pointer
1643  * @index: doorbell index
1644  * @v: value to write
1645  *
1646  * Writes @v to the doorbell aperture at the
1647  * requested doorbell index (CIK).
1648  */
1649 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1650 {
1651 	if (index < rdev->doorbell.num_doorbells) {
1652 		writel(v, rdev->doorbell.ptr + index);
1653 	} else {
1654 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1655 	}
1656 }
1657 
1658 #define BONAIRE_IO_MC_REGS_SIZE 36
1659 
1660 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1661 {
1662 	{0x00000070, 0x04400000},
1663 	{0x00000071, 0x80c01803},
1664 	{0x00000072, 0x00004004},
1665 	{0x00000073, 0x00000100},
1666 	{0x00000074, 0x00ff0000},
1667 	{0x00000075, 0x34000000},
1668 	{0x00000076, 0x08000014},
1669 	{0x00000077, 0x00cc08ec},
1670 	{0x00000078, 0x00000400},
1671 	{0x00000079, 0x00000000},
1672 	{0x0000007a, 0x04090000},
1673 	{0x0000007c, 0x00000000},
1674 	{0x0000007e, 0x4408a8e8},
1675 	{0x0000007f, 0x00000304},
1676 	{0x00000080, 0x00000000},
1677 	{0x00000082, 0x00000001},
1678 	{0x00000083, 0x00000002},
1679 	{0x00000084, 0xf3e4f400},
1680 	{0x00000085, 0x052024e3},
1681 	{0x00000087, 0x00000000},
1682 	{0x00000088, 0x01000000},
1683 	{0x0000008a, 0x1c0a0000},
1684 	{0x0000008b, 0xff010000},
1685 	{0x0000008d, 0xffffefff},
1686 	{0x0000008e, 0xfff3efff},
1687 	{0x0000008f, 0xfff3efbf},
1688 	{0x00000092, 0xf7ffffff},
1689 	{0x00000093, 0xffffff7f},
1690 	{0x00000095, 0x00101101},
1691 	{0x00000096, 0x00000fff},
1692 	{0x00000097, 0x00116fff},
1693 	{0x00000098, 0x60010000},
1694 	{0x00000099, 0x10010000},
1695 	{0x0000009a, 0x00006000},
1696 	{0x0000009b, 0x00001000},
1697 	{0x0000009f, 0x00b48000}
1698 };
1699 
1700 #define HAWAII_IO_MC_REGS_SIZE 22
1701 
1702 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1703 {
1704 	{0x0000007d, 0x40000000},
1705 	{0x0000007e, 0x40180304},
1706 	{0x0000007f, 0x0000ff00},
1707 	{0x00000081, 0x00000000},
1708 	{0x00000083, 0x00000800},
1709 	{0x00000086, 0x00000000},
1710 	{0x00000087, 0x00000100},
1711 	{0x00000088, 0x00020100},
1712 	{0x00000089, 0x00000000},
1713 	{0x0000008b, 0x00040000},
1714 	{0x0000008c, 0x00000100},
1715 	{0x0000008e, 0xff010000},
1716 	{0x00000090, 0xffffefff},
1717 	{0x00000091, 0xfff3efff},
1718 	{0x00000092, 0xfff3efbf},
1719 	{0x00000093, 0xf7ffffff},
1720 	{0x00000094, 0xffffff7f},
1721 	{0x00000095, 0x00000fff},
1722 	{0x00000096, 0x00116fff},
1723 	{0x00000097, 0x60010000},
1724 	{0x00000098, 0x10010000},
1725 	{0x0000009f, 0x00c79000}
1726 };
1727 
1728 
1729 /**
1730  * cik_srbm_select - select specific register instances
1731  *
1732  * @rdev: radeon_device pointer
1733  * @me: selected ME (micro engine)
1734  * @pipe: pipe
1735  * @queue: queue
1736  * @vmid: VMID
1737  *
1738  * Switches the currently active registers instances.  Some
1739  * registers are instanced per VMID, others are instanced per
1740  * me/pipe/queue combination.
1741  */
1742 static void cik_srbm_select(struct radeon_device *rdev,
1743 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1744 {
1745 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1746 			     MEID(me & 0x3) |
1747 			     VMID(vmid & 0xf) |
1748 			     QUEUEID(queue & 0x7));
1749 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1750 }
1751 
1752 /* ucode loading */
1753 /**
1754  * ci_mc_load_microcode - load MC ucode into the hw
1755  *
1756  * @rdev: radeon_device pointer
1757  *
1758  * Load the GDDR MC ucode into the hw (CIK).
1759  * Returns 0 on success, error on failure.
1760  */
1761 int ci_mc_load_microcode(struct radeon_device *rdev)
1762 {
1763 	const __be32 *fw_data;
1764 	u32 running, blackout = 0;
1765 	u32 *io_mc_regs;
1766 	int i, regs_size, ucode_size;
1767 
1768 	if (!rdev->mc_fw)
1769 		return -EINVAL;
1770 
1771 	ucode_size = rdev->mc_fw->size / 4;
1772 
1773 	switch (rdev->family) {
1774 	case CHIP_BONAIRE:
1775 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1776 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1777 		break;
1778 	case CHIP_HAWAII:
1779 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1780 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1781 		break;
1782 	default:
1783 		return -EINVAL;
1784 	}
1785 
1786 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1787 
1788 	if (running == 0) {
1789 		if (running) {
1790 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1791 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1792 		}
1793 
1794 		/* reset the engine and set to writable */
1795 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1796 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1797 
1798 		/* load mc io regs */
1799 		for (i = 0; i < regs_size; i++) {
1800 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1801 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1802 		}
1803 		/* load the MC ucode */
1804 		fw_data = (const __be32 *)rdev->mc_fw->data;
1805 		for (i = 0; i < ucode_size; i++)
1806 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1807 
1808 		/* put the engine back into the active state */
1809 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1810 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1811 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1812 
1813 		/* wait for training to complete */
1814 		for (i = 0; i < rdev->usec_timeout; i++) {
1815 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1816 				break;
1817 			udelay(1);
1818 		}
1819 		for (i = 0; i < rdev->usec_timeout; i++) {
1820 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1821 				break;
1822 			udelay(1);
1823 		}
1824 
1825 		if (running)
1826 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1827 	}
1828 
1829 	return 0;
1830 }
1831 
1832 /**
1833  * cik_init_microcode - load ucode images from disk
1834  *
1835  * @rdev: radeon_device pointer
1836  *
1837  * Use the firmware interface to load the ucode images into
1838  * the driver (not loaded into hw).
1839  * Returns 0 on success, error on failure.
1840  */
1841 static int cik_init_microcode(struct radeon_device *rdev)
1842 {
1843 	const char *chip_name;
1844 	size_t pfp_req_size, me_req_size, ce_req_size,
1845 		mec_req_size, rlc_req_size, mc_req_size = 0,
1846 		sdma_req_size, smc_req_size = 0, mc2_req_size = 0;
1847 	char fw_name[30];
1848 	int err;
1849 
1850 	DRM_DEBUG("\n");
1851 
1852 	switch (rdev->family) {
1853 	case CHIP_BONAIRE:
1854 		chip_name = "BONAIRE";
1855 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1856 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1857 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1858 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1859 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1860 		mc_req_size = BONAIRE_MC_UCODE_SIZE * 4;
1861 		mc2_req_size = BONAIRE_MC2_UCODE_SIZE * 4;
1862 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1863 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1864 		break;
1865 	case CHIP_HAWAII:
1866 		chip_name = "HAWAII";
1867 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1868 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1869 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1870 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1871 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1872 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1873 		mc2_req_size = HAWAII_MC2_UCODE_SIZE * 4;
1874 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1875 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1876 		break;
1877 	case CHIP_KAVERI:
1878 		chip_name = "KAVERI";
1879 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1880 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1881 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1882 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1883 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1884 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1885 		break;
1886 	case CHIP_KABINI:
1887 		chip_name = "KABINI";
1888 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1889 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1890 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1891 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1892 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1893 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1894 		break;
1895 	case CHIP_MULLINS:
1896 		chip_name = "MULLINS";
1897 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1898 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1899 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1900 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1901 		rlc_req_size = ML_RLC_UCODE_SIZE * 4;
1902 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1903 		break;
1904 	default: BUG();
1905 	}
1906 
1907 	DRM_INFO("Loading %s Microcode\n", chip_name);
1908 
1909 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1910 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1911 	if (err)
1912 		goto out;
1913 	if (rdev->pfp_fw->size != pfp_req_size) {
1914 		printk(KERN_ERR
1915 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1916 		       rdev->pfp_fw->size, fw_name);
1917 		err = -EINVAL;
1918 		goto out;
1919 	}
1920 
1921 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1922 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1923 	if (err)
1924 		goto out;
1925 	if (rdev->me_fw->size != me_req_size) {
1926 		printk(KERN_ERR
1927 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1928 		       rdev->me_fw->size, fw_name);
1929 		err = -EINVAL;
1930 	}
1931 
1932 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1933 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1934 	if (err)
1935 		goto out;
1936 	if (rdev->ce_fw->size != ce_req_size) {
1937 		printk(KERN_ERR
1938 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1939 		       rdev->ce_fw->size, fw_name);
1940 		err = -EINVAL;
1941 	}
1942 
1943 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1944 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1945 	if (err)
1946 		goto out;
1947 	if (rdev->mec_fw->size != mec_req_size) {
1948 		printk(KERN_ERR
1949 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1950 		       rdev->mec_fw->size, fw_name);
1951 		err = -EINVAL;
1952 	}
1953 
1954 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1955 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1956 	if (err)
1957 		goto out;
1958 	if (rdev->rlc_fw->size != rlc_req_size) {
1959 		printk(KERN_ERR
1960 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1961 		       rdev->rlc_fw->size, fw_name);
1962 		err = -EINVAL;
1963 	}
1964 
1965 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1966 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1967 	if (err)
1968 		goto out;
1969 	if (rdev->sdma_fw->size != sdma_req_size) {
1970 		printk(KERN_ERR
1971 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1972 		       rdev->sdma_fw->size, fw_name);
1973 		err = -EINVAL;
1974 	}
1975 
1976 	/* No SMC, MC ucode on APUs */
1977 	if (!(rdev->flags & RADEON_IS_IGP)) {
1978 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1979 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1980 		if (err) {
1981 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1982 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1983 			if (err)
1984 				goto out;
1985 		}
1986 		if ((rdev->mc_fw->size != mc_req_size) &&
1987 		    (rdev->mc_fw->size != mc2_req_size)){
1988 			printk(KERN_ERR
1989 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1990 			       rdev->mc_fw->size, fw_name);
1991 			err = -EINVAL;
1992 		}
1993 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1994 
1995 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1996 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1997 		if (err) {
1998 			printk(KERN_ERR
1999 			       "smc: error loading firmware \"%s\"\n",
2000 			       fw_name);
2001 			release_firmware(rdev->smc_fw);
2002 			rdev->smc_fw = NULL;
2003 			err = 0;
2004 		} else if (rdev->smc_fw->size != smc_req_size) {
2005 			printk(KERN_ERR
2006 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
2007 			       rdev->smc_fw->size, fw_name);
2008 			err = -EINVAL;
2009 		}
2010 	}
2011 
2012 out:
2013 	if (err) {
2014 		if (err != -EINVAL)
2015 			printk(KERN_ERR
2016 			       "cik_cp: Failed to load firmware \"%s\"\n",
2017 			       fw_name);
2018 		release_firmware(rdev->pfp_fw);
2019 		rdev->pfp_fw = NULL;
2020 		release_firmware(rdev->me_fw);
2021 		rdev->me_fw = NULL;
2022 		release_firmware(rdev->ce_fw);
2023 		rdev->ce_fw = NULL;
2024 		release_firmware(rdev->rlc_fw);
2025 		rdev->rlc_fw = NULL;
2026 		release_firmware(rdev->mc_fw);
2027 		rdev->mc_fw = NULL;
2028 		release_firmware(rdev->smc_fw);
2029 		rdev->smc_fw = NULL;
2030 	}
2031 	return err;
2032 }
2033 
2034 /*
2035  * Core functions
2036  */
2037 /**
2038  * cik_tiling_mode_table_init - init the hw tiling table
2039  *
2040  * @rdev: radeon_device pointer
2041  *
2042  * Starting with SI, the tiling setup is done globally in a
2043  * set of 32 tiling modes.  Rather than selecting each set of
2044  * parameters per surface as on older asics, we just select
2045  * which index in the tiling table we want to use, and the
2046  * surface uses those parameters (CIK).
2047  */
2048 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
2049 {
2050 	const u32 num_tile_mode_states = 32;
2051 	const u32 num_secondary_tile_mode_states = 16;
2052 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2053 	u32 num_pipe_configs;
2054 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
2055 		rdev->config.cik.max_shader_engines;
2056 
2057 	switch (rdev->config.cik.mem_row_size_in_kb) {
2058 	case 1:
2059 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2060 		break;
2061 	case 2:
2062 	default:
2063 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2064 		break;
2065 	case 4:
2066 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2067 		break;
2068 	}
2069 
2070 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
2071 	if (num_pipe_configs > 8)
2072 		num_pipe_configs = 16;
2073 
2074 	if (num_pipe_configs == 16) {
2075 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2076 			switch (reg_offset) {
2077 			case 0:
2078 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2079 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2080 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2081 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2082 				break;
2083 			case 1:
2084 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2085 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2086 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2087 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2088 				break;
2089 			case 2:
2090 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2091 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2092 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2093 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2094 				break;
2095 			case 3:
2096 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2097 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2098 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2099 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2100 				break;
2101 			case 4:
2102 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2103 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2104 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2105 						 TILE_SPLIT(split_equal_to_row_size));
2106 				break;
2107 			case 5:
2108 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2109 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2110 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2111 				break;
2112 			case 6:
2113 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2114 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2115 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2116 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2117 				break;
2118 			case 7:
2119 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2120 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2121 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2122 						 TILE_SPLIT(split_equal_to_row_size));
2123 				break;
2124 			case 8:
2125 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2126 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2127 				break;
2128 			case 9:
2129 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2130 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2131 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2132 				break;
2133 			case 10:
2134 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2135 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2136 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2137 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2138 				break;
2139 			case 11:
2140 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2141 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2142 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2143 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2144 				break;
2145 			case 12:
2146 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2147 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2148 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2149 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2150 				break;
2151 			case 13:
2152 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2153 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2154 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2155 				break;
2156 			case 14:
2157 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2158 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2159 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2160 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2161 				break;
2162 			case 16:
2163 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2164 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2165 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2166 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2167 				break;
2168 			case 17:
2169 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2170 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2171 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2172 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2173 				break;
2174 			case 27:
2175 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2176 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2177 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2178 				break;
2179 			case 28:
2180 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2181 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2182 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2183 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2184 				break;
2185 			case 29:
2186 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2187 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2188 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2189 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2190 				break;
2191 			case 30:
2192 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2193 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2194 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2195 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2196 				break;
2197 			default:
2198 				gb_tile_moden = 0;
2199 				break;
2200 			}
2201 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2202 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2203 		}
2204 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2205 			switch (reg_offset) {
2206 			case 0:
2207 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2208 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2209 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2210 						 NUM_BANKS(ADDR_SURF_16_BANK));
2211 				break;
2212 			case 1:
2213 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2214 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2215 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2216 						 NUM_BANKS(ADDR_SURF_16_BANK));
2217 				break;
2218 			case 2:
2219 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2220 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2221 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2222 						 NUM_BANKS(ADDR_SURF_16_BANK));
2223 				break;
2224 			case 3:
2225 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2226 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2227 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2228 						 NUM_BANKS(ADDR_SURF_16_BANK));
2229 				break;
2230 			case 4:
2231 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2232 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2233 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2234 						 NUM_BANKS(ADDR_SURF_8_BANK));
2235 				break;
2236 			case 5:
2237 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2238 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2239 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2240 						 NUM_BANKS(ADDR_SURF_4_BANK));
2241 				break;
2242 			case 6:
2243 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2244 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2245 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2246 						 NUM_BANKS(ADDR_SURF_2_BANK));
2247 				break;
2248 			case 8:
2249 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2250 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2251 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2252 						 NUM_BANKS(ADDR_SURF_16_BANK));
2253 				break;
2254 			case 9:
2255 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2256 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2257 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2258 						 NUM_BANKS(ADDR_SURF_16_BANK));
2259 				break;
2260 			case 10:
2261 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2262 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2263 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2264 						 NUM_BANKS(ADDR_SURF_16_BANK));
2265 				break;
2266 			case 11:
2267 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2268 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2269 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2270 						 NUM_BANKS(ADDR_SURF_8_BANK));
2271 				break;
2272 			case 12:
2273 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2275 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2276 						 NUM_BANKS(ADDR_SURF_4_BANK));
2277 				break;
2278 			case 13:
2279 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2280 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2281 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2282 						 NUM_BANKS(ADDR_SURF_2_BANK));
2283 				break;
2284 			case 14:
2285 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2286 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2287 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2288 						 NUM_BANKS(ADDR_SURF_2_BANK));
2289 				break;
2290 			default:
2291 				gb_tile_moden = 0;
2292 				break;
2293 			}
2294 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2295 		}
2296 	} else if (num_pipe_configs == 8) {
2297 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2298 			switch (reg_offset) {
2299 			case 0:
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2303 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2304 				break;
2305 			case 1:
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2310 				break;
2311 			case 2:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2314 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2315 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2316 				break;
2317 			case 3:
2318 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2320 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2321 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2322 				break;
2323 			case 4:
2324 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2325 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2326 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2327 						 TILE_SPLIT(split_equal_to_row_size));
2328 				break;
2329 			case 5:
2330 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2331 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2332 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2333 				break;
2334 			case 6:
2335 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2336 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2337 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2338 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2339 				break;
2340 			case 7:
2341 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2342 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2343 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2344 						 TILE_SPLIT(split_equal_to_row_size));
2345 				break;
2346 			case 8:
2347 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2348 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2349 				break;
2350 			case 9:
2351 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2352 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2353 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2354 				break;
2355 			case 10:
2356 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2357 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2358 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2359 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2360 				break;
2361 			case 11:
2362 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2363 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2364 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2365 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2366 				break;
2367 			case 12:
2368 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2369 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2371 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2372 				break;
2373 			case 13:
2374 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2375 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2376 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2377 				break;
2378 			case 14:
2379 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2380 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2381 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2382 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2383 				break;
2384 			case 16:
2385 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2386 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2387 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2388 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2389 				break;
2390 			case 17:
2391 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2392 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2393 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2394 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2395 				break;
2396 			case 27:
2397 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2398 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2399 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2400 				break;
2401 			case 28:
2402 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2403 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2404 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2405 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2406 				break;
2407 			case 29:
2408 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2409 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2410 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2411 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2412 				break;
2413 			case 30:
2414 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2415 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2416 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2417 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2418 				break;
2419 			default:
2420 				gb_tile_moden = 0;
2421 				break;
2422 			}
2423 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2424 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2425 		}
2426 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2427 			switch (reg_offset) {
2428 			case 0:
2429 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2430 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2431 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2432 						 NUM_BANKS(ADDR_SURF_16_BANK));
2433 				break;
2434 			case 1:
2435 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2436 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2437 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2438 						 NUM_BANKS(ADDR_SURF_16_BANK));
2439 				break;
2440 			case 2:
2441 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2442 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2443 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2444 						 NUM_BANKS(ADDR_SURF_16_BANK));
2445 				break;
2446 			case 3:
2447 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2448 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2449 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2450 						 NUM_BANKS(ADDR_SURF_16_BANK));
2451 				break;
2452 			case 4:
2453 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2455 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2456 						 NUM_BANKS(ADDR_SURF_8_BANK));
2457 				break;
2458 			case 5:
2459 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2460 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2461 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2462 						 NUM_BANKS(ADDR_SURF_4_BANK));
2463 				break;
2464 			case 6:
2465 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2466 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2467 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2468 						 NUM_BANKS(ADDR_SURF_2_BANK));
2469 				break;
2470 			case 8:
2471 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2472 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2473 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2474 						 NUM_BANKS(ADDR_SURF_16_BANK));
2475 				break;
2476 			case 9:
2477 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2478 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2479 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2480 						 NUM_BANKS(ADDR_SURF_16_BANK));
2481 				break;
2482 			case 10:
2483 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2485 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2486 						 NUM_BANKS(ADDR_SURF_16_BANK));
2487 				break;
2488 			case 11:
2489 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2490 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2491 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2492 						 NUM_BANKS(ADDR_SURF_16_BANK));
2493 				break;
2494 			case 12:
2495 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2496 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2497 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2498 						 NUM_BANKS(ADDR_SURF_8_BANK));
2499 				break;
2500 			case 13:
2501 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2503 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2504 						 NUM_BANKS(ADDR_SURF_4_BANK));
2505 				break;
2506 			case 14:
2507 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2508 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2509 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2510 						 NUM_BANKS(ADDR_SURF_2_BANK));
2511 				break;
2512 			default:
2513 				gb_tile_moden = 0;
2514 				break;
2515 			}
2516 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2517 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2518 		}
2519 	} else if (num_pipe_configs == 4) {
2520 		if (num_rbs == 4) {
2521 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2522 				switch (reg_offset) {
2523 				case 0:
2524 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2525 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2526 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2527 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2528 					break;
2529 				case 1:
2530 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2533 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2534 					break;
2535 				case 2:
2536 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2537 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2538 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2540 					break;
2541 				case 3:
2542 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2544 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2545 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2546 					break;
2547 				case 4:
2548 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2549 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2550 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 							 TILE_SPLIT(split_equal_to_row_size));
2552 					break;
2553 				case 5:
2554 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2555 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2556 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2557 					break;
2558 				case 6:
2559 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2560 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2562 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2563 					break;
2564 				case 7:
2565 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2566 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2567 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2568 							 TILE_SPLIT(split_equal_to_row_size));
2569 					break;
2570 				case 8:
2571 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2572 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2573 					break;
2574 				case 9:
2575 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2576 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2577 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2578 					break;
2579 				case 10:
2580 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2581 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2583 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2584 					break;
2585 				case 11:
2586 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2587 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2589 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2590 					break;
2591 				case 12:
2592 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2593 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2594 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2595 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2596 					break;
2597 				case 13:
2598 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2599 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2600 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2601 					break;
2602 				case 14:
2603 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2604 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2605 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2606 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2607 					break;
2608 				case 16:
2609 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2610 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2611 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2612 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2613 					break;
2614 				case 17:
2615 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2616 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2617 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2618 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2619 					break;
2620 				case 27:
2621 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2622 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2623 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2624 					break;
2625 				case 28:
2626 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2627 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2628 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2629 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2630 					break;
2631 				case 29:
2632 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2633 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2634 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2635 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2636 					break;
2637 				case 30:
2638 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2639 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2640 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2641 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2642 					break;
2643 				default:
2644 					gb_tile_moden = 0;
2645 					break;
2646 				}
2647 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2648 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2649 			}
2650 		} else if (num_rbs < 4) {
2651 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2652 				switch (reg_offset) {
2653 				case 0:
2654 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2656 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2657 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2658 					break;
2659 				case 1:
2660 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2662 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2664 					break;
2665 				case 2:
2666 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2667 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2668 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2669 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2670 					break;
2671 				case 3:
2672 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2674 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2675 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2676 					break;
2677 				case 4:
2678 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2680 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2681 							 TILE_SPLIT(split_equal_to_row_size));
2682 					break;
2683 				case 5:
2684 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2685 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2686 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2687 					break;
2688 				case 6:
2689 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2690 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2691 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2692 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2693 					break;
2694 				case 7:
2695 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2696 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2697 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2698 							 TILE_SPLIT(split_equal_to_row_size));
2699 					break;
2700 				case 8:
2701 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2702 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2703 					break;
2704 				case 9:
2705 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2706 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2707 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2708 					break;
2709 				case 10:
2710 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2712 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2713 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2714 					break;
2715 				case 11:
2716 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2717 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2718 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2719 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2720 					break;
2721 				case 12:
2722 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2723 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2724 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2726 					break;
2727 				case 13:
2728 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2729 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2730 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2731 					break;
2732 				case 14:
2733 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2734 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2735 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2736 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2737 					break;
2738 				case 16:
2739 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2740 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2741 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2742 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2743 					break;
2744 				case 17:
2745 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2746 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2747 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2748 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2749 					break;
2750 				case 27:
2751 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2752 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2753 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2754 					break;
2755 				case 28:
2756 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2757 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2758 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2760 					break;
2761 				case 29:
2762 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2763 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2764 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2765 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2766 					break;
2767 				case 30:
2768 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2769 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2770 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2771 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2772 					break;
2773 				default:
2774 					gb_tile_moden = 0;
2775 					break;
2776 				}
2777 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2778 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779 			}
2780 		}
2781 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2782 			switch (reg_offset) {
2783 			case 0:
2784 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2786 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2787 						 NUM_BANKS(ADDR_SURF_16_BANK));
2788 				break;
2789 			case 1:
2790 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2793 						 NUM_BANKS(ADDR_SURF_16_BANK));
2794 				break;
2795 			case 2:
2796 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2797 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2798 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2799 						 NUM_BANKS(ADDR_SURF_16_BANK));
2800 				break;
2801 			case 3:
2802 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2804 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2805 						 NUM_BANKS(ADDR_SURF_16_BANK));
2806 				break;
2807 			case 4:
2808 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2810 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2811 						 NUM_BANKS(ADDR_SURF_16_BANK));
2812 				break;
2813 			case 5:
2814 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2815 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2816 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2817 						 NUM_BANKS(ADDR_SURF_8_BANK));
2818 				break;
2819 			case 6:
2820 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2823 						 NUM_BANKS(ADDR_SURF_4_BANK));
2824 				break;
2825 			case 8:
2826 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2827 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2828 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2829 						 NUM_BANKS(ADDR_SURF_16_BANK));
2830 				break;
2831 			case 9:
2832 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2833 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2834 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2835 						 NUM_BANKS(ADDR_SURF_16_BANK));
2836 				break;
2837 			case 10:
2838 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2841 						 NUM_BANKS(ADDR_SURF_16_BANK));
2842 				break;
2843 			case 11:
2844 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2847 						 NUM_BANKS(ADDR_SURF_16_BANK));
2848 				break;
2849 			case 12:
2850 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2851 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2852 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2853 						 NUM_BANKS(ADDR_SURF_16_BANK));
2854 				break;
2855 			case 13:
2856 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2859 						 NUM_BANKS(ADDR_SURF_8_BANK));
2860 				break;
2861 			case 14:
2862 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2865 						 NUM_BANKS(ADDR_SURF_4_BANK));
2866 				break;
2867 			default:
2868 				gb_tile_moden = 0;
2869 				break;
2870 			}
2871 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2872 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2873 		}
2874 	} else if (num_pipe_configs == 2) {
2875 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2876 			switch (reg_offset) {
2877 			case 0:
2878 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2880 						 PIPE_CONFIG(ADDR_SURF_P2) |
2881 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2882 				break;
2883 			case 1:
2884 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2886 						 PIPE_CONFIG(ADDR_SURF_P2) |
2887 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2888 				break;
2889 			case 2:
2890 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2891 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2892 						 PIPE_CONFIG(ADDR_SURF_P2) |
2893 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2894 				break;
2895 			case 3:
2896 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2898 						 PIPE_CONFIG(ADDR_SURF_P2) |
2899 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2900 				break;
2901 			case 4:
2902 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2904 						 PIPE_CONFIG(ADDR_SURF_P2) |
2905 						 TILE_SPLIT(split_equal_to_row_size));
2906 				break;
2907 			case 5:
2908 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2909 						 PIPE_CONFIG(ADDR_SURF_P2) |
2910 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2911 				break;
2912 			case 6:
2913 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2914 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2915 						 PIPE_CONFIG(ADDR_SURF_P2) |
2916 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2917 				break;
2918 			case 7:
2919 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2920 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2921 						 PIPE_CONFIG(ADDR_SURF_P2) |
2922 						 TILE_SPLIT(split_equal_to_row_size));
2923 				break;
2924 			case 8:
2925 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2926 						PIPE_CONFIG(ADDR_SURF_P2);
2927 				break;
2928 			case 9:
2929 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2930 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2931 						 PIPE_CONFIG(ADDR_SURF_P2));
2932 				break;
2933 			case 10:
2934 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2935 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2936 						 PIPE_CONFIG(ADDR_SURF_P2) |
2937 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2938 				break;
2939 			case 11:
2940 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2941 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2942 						 PIPE_CONFIG(ADDR_SURF_P2) |
2943 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2944 				break;
2945 			case 12:
2946 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2947 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2948 						 PIPE_CONFIG(ADDR_SURF_P2) |
2949 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2950 				break;
2951 			case 13:
2952 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2953 						 PIPE_CONFIG(ADDR_SURF_P2) |
2954 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2955 				break;
2956 			case 14:
2957 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2958 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2959 						 PIPE_CONFIG(ADDR_SURF_P2) |
2960 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2961 				break;
2962 			case 16:
2963 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2964 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2965 						 PIPE_CONFIG(ADDR_SURF_P2) |
2966 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2967 				break;
2968 			case 17:
2969 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2970 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2971 						 PIPE_CONFIG(ADDR_SURF_P2) |
2972 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2973 				break;
2974 			case 27:
2975 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2976 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2977 						 PIPE_CONFIG(ADDR_SURF_P2));
2978 				break;
2979 			case 28:
2980 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2981 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2982 						 PIPE_CONFIG(ADDR_SURF_P2) |
2983 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2984 				break;
2985 			case 29:
2986 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2987 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2988 						 PIPE_CONFIG(ADDR_SURF_P2) |
2989 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2990 				break;
2991 			case 30:
2992 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2993 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2994 						 PIPE_CONFIG(ADDR_SURF_P2) |
2995 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2996 				break;
2997 			default:
2998 				gb_tile_moden = 0;
2999 				break;
3000 			}
3001 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
3002 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3003 		}
3004 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
3005 			switch (reg_offset) {
3006 			case 0:
3007 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3008 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3009 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3010 						 NUM_BANKS(ADDR_SURF_16_BANK));
3011 				break;
3012 			case 1:
3013 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3014 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3015 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3016 						 NUM_BANKS(ADDR_SURF_16_BANK));
3017 				break;
3018 			case 2:
3019 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3020 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3021 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3022 						 NUM_BANKS(ADDR_SURF_16_BANK));
3023 				break;
3024 			case 3:
3025 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3026 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3027 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3028 						 NUM_BANKS(ADDR_SURF_16_BANK));
3029 				break;
3030 			case 4:
3031 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3032 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3033 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3034 						 NUM_BANKS(ADDR_SURF_16_BANK));
3035 				break;
3036 			case 5:
3037 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3038 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3039 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3040 						 NUM_BANKS(ADDR_SURF_16_BANK));
3041 				break;
3042 			case 6:
3043 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3044 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3045 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3046 						 NUM_BANKS(ADDR_SURF_8_BANK));
3047 				break;
3048 			case 8:
3049 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3050 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
3051 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3052 						 NUM_BANKS(ADDR_SURF_16_BANK));
3053 				break;
3054 			case 9:
3055 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
3056 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3057 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3058 						 NUM_BANKS(ADDR_SURF_16_BANK));
3059 				break;
3060 			case 10:
3061 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3062 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
3063 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3064 						 NUM_BANKS(ADDR_SURF_16_BANK));
3065 				break;
3066 			case 11:
3067 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
3068 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3069 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3070 						 NUM_BANKS(ADDR_SURF_16_BANK));
3071 				break;
3072 			case 12:
3073 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3074 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
3075 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3076 						 NUM_BANKS(ADDR_SURF_16_BANK));
3077 				break;
3078 			case 13:
3079 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3080 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3081 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
3082 						 NUM_BANKS(ADDR_SURF_16_BANK));
3083 				break;
3084 			case 14:
3085 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
3086 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
3087 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
3088 						 NUM_BANKS(ADDR_SURF_8_BANK));
3089 				break;
3090 			default:
3091 				gb_tile_moden = 0;
3092 				break;
3093 			}
3094 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
3095 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
3096 		}
3097 	} else
3098 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3099 }
3100 
3101 /**
3102  * cik_select_se_sh - select which SE, SH to address
3103  *
3104  * @rdev: radeon_device pointer
3105  * @se_num: shader engine to address
3106  * @sh_num: sh block to address
3107  *
3108  * Select which SE, SH combinations to address. Certain
3109  * registers are instanced per SE or SH.  0xffffffff means
3110  * broadcast to all SEs or SHs (CIK).
3111  */
3112 static void cik_select_se_sh(struct radeon_device *rdev,
3113 			     u32 se_num, u32 sh_num)
3114 {
3115 	u32 data = INSTANCE_BROADCAST_WRITES;
3116 
3117 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3118 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3119 	else if (se_num == 0xffffffff)
3120 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3121 	else if (sh_num == 0xffffffff)
3122 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3123 	else
3124 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3125 	WREG32(GRBM_GFX_INDEX, data);
3126 }
3127 
3128 /**
3129  * cik_create_bitmask - create a bitmask
3130  *
3131  * @bit_width: length of the mask
3132  *
3133  * create a variable length bit mask (CIK).
3134  * Returns the bitmask.
3135  */
3136 static u32 cik_create_bitmask(u32 bit_width)
3137 {
3138 	u32 i, mask = 0;
3139 
3140 	for (i = 0; i < bit_width; i++) {
3141 		mask <<= 1;
3142 		mask |= 1;
3143 	}
3144 	return mask;
3145 }
3146 
3147 /**
3148  * cik_get_rb_disabled - computes the mask of disabled RBs
3149  *
3150  * @rdev: radeon_device pointer
3151  * @max_rb_num: max RBs (render backends) for the asic
3152  * @se_num: number of SEs (shader engines) for the asic
3153  * @sh_per_se: number of SH blocks per SE for the asic
3154  *
3155  * Calculates the bitmask of disabled RBs (CIK).
3156  * Returns the disabled RB bitmask.
3157  */
3158 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3159 			      u32 max_rb_num_per_se,
3160 			      u32 sh_per_se)
3161 {
3162 	u32 data, mask;
3163 
3164 	data = RREG32(CC_RB_BACKEND_DISABLE);
3165 	if (data & 1)
3166 		data &= BACKEND_DISABLE_MASK;
3167 	else
3168 		data = 0;
3169 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3170 
3171 	data >>= BACKEND_DISABLE_SHIFT;
3172 
3173 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3174 
3175 	return data & mask;
3176 }
3177 
3178 /**
3179  * cik_setup_rb - setup the RBs on the asic
3180  *
3181  * @rdev: radeon_device pointer
3182  * @se_num: number of SEs (shader engines) for the asic
3183  * @sh_per_se: number of SH blocks per SE for the asic
3184  * @max_rb_num: max RBs (render backends) for the asic
3185  *
3186  * Configures per-SE/SH RB registers (CIK).
3187  */
3188 static void cik_setup_rb(struct radeon_device *rdev,
3189 			 u32 se_num, u32 sh_per_se,
3190 			 u32 max_rb_num_per_se)
3191 {
3192 	int i, j;
3193 	u32 data, mask;
3194 	u32 disabled_rbs = 0;
3195 	u32 enabled_rbs = 0;
3196 
3197 	for (i = 0; i < se_num; i++) {
3198 		for (j = 0; j < sh_per_se; j++) {
3199 			cik_select_se_sh(rdev, i, j);
3200 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3201 			if (rdev->family == CHIP_HAWAII)
3202 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3203 			else
3204 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3205 		}
3206 	}
3207 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3208 
3209 	mask = 1;
3210 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3211 		if (!(disabled_rbs & mask))
3212 			enabled_rbs |= mask;
3213 		mask <<= 1;
3214 	}
3215 
3216 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3217 
3218 	for (i = 0; i < se_num; i++) {
3219 		cik_select_se_sh(rdev, i, 0xffffffff);
3220 		data = 0;
3221 		for (j = 0; j < sh_per_se; j++) {
3222 			switch (enabled_rbs & 3) {
3223 			case 0:
3224 				if (j == 0)
3225 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3226 				else
3227 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3228 				break;
3229 			case 1:
3230 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3231 				break;
3232 			case 2:
3233 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3234 				break;
3235 			case 3:
3236 			default:
3237 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3238 				break;
3239 			}
3240 			enabled_rbs >>= 2;
3241 		}
3242 		WREG32(PA_SC_RASTER_CONFIG, data);
3243 	}
3244 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3245 }
3246 
3247 /**
3248  * cik_gpu_init - setup the 3D engine
3249  *
3250  * @rdev: radeon_device pointer
3251  *
3252  * Configures the 3D engine and tiling configuration
3253  * registers so that the 3D engine is usable.
3254  */
3255 static void cik_gpu_init(struct radeon_device *rdev)
3256 {
3257 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3258 	u32 mc_shared_chmap, mc_arb_ramcfg;
3259 	u32 hdp_host_path_cntl;
3260 	u32 tmp;
3261 	int i, j, k;
3262 
3263 	switch (rdev->family) {
3264 	case CHIP_BONAIRE:
3265 		rdev->config.cik.max_shader_engines = 2;
3266 		rdev->config.cik.max_tile_pipes = 4;
3267 		rdev->config.cik.max_cu_per_sh = 7;
3268 		rdev->config.cik.max_sh_per_se = 1;
3269 		rdev->config.cik.max_backends_per_se = 2;
3270 		rdev->config.cik.max_texture_channel_caches = 4;
3271 		rdev->config.cik.max_gprs = 256;
3272 		rdev->config.cik.max_gs_threads = 32;
3273 		rdev->config.cik.max_hw_contexts = 8;
3274 
3275 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3276 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3277 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3278 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3279 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3280 		break;
3281 	case CHIP_HAWAII:
3282 		rdev->config.cik.max_shader_engines = 4;
3283 		rdev->config.cik.max_tile_pipes = 16;
3284 		rdev->config.cik.max_cu_per_sh = 11;
3285 		rdev->config.cik.max_sh_per_se = 1;
3286 		rdev->config.cik.max_backends_per_se = 4;
3287 		rdev->config.cik.max_texture_channel_caches = 16;
3288 		rdev->config.cik.max_gprs = 256;
3289 		rdev->config.cik.max_gs_threads = 32;
3290 		rdev->config.cik.max_hw_contexts = 8;
3291 
3292 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3293 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3294 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3295 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3296 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3297 		break;
3298 	case CHIP_KAVERI:
3299 		rdev->config.cik.max_shader_engines = 1;
3300 		rdev->config.cik.max_tile_pipes = 4;
3301 		if ((rdev->pdev->device == 0x1304) ||
3302 		    (rdev->pdev->device == 0x1305) ||
3303 		    (rdev->pdev->device == 0x130C) ||
3304 		    (rdev->pdev->device == 0x130F) ||
3305 		    (rdev->pdev->device == 0x1310) ||
3306 		    (rdev->pdev->device == 0x1311) ||
3307 		    (rdev->pdev->device == 0x131C)) {
3308 			rdev->config.cik.max_cu_per_sh = 8;
3309 			rdev->config.cik.max_backends_per_se = 2;
3310 		} else if ((rdev->pdev->device == 0x1309) ||
3311 			   (rdev->pdev->device == 0x130A) ||
3312 			   (rdev->pdev->device == 0x130D) ||
3313 			   (rdev->pdev->device == 0x1313) ||
3314 			   (rdev->pdev->device == 0x131D)) {
3315 			rdev->config.cik.max_cu_per_sh = 6;
3316 			rdev->config.cik.max_backends_per_se = 2;
3317 		} else if ((rdev->pdev->device == 0x1306) ||
3318 			   (rdev->pdev->device == 0x1307) ||
3319 			   (rdev->pdev->device == 0x130B) ||
3320 			   (rdev->pdev->device == 0x130E) ||
3321 			   (rdev->pdev->device == 0x1315) ||
3322 			   (rdev->pdev->device == 0x131B)) {
3323 			rdev->config.cik.max_cu_per_sh = 4;
3324 			rdev->config.cik.max_backends_per_se = 1;
3325 		} else {
3326 			rdev->config.cik.max_cu_per_sh = 3;
3327 			rdev->config.cik.max_backends_per_se = 1;
3328 		}
3329 		rdev->config.cik.max_sh_per_se = 1;
3330 		rdev->config.cik.max_texture_channel_caches = 4;
3331 		rdev->config.cik.max_gprs = 256;
3332 		rdev->config.cik.max_gs_threads = 16;
3333 		rdev->config.cik.max_hw_contexts = 8;
3334 
3335 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3336 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3337 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3338 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3339 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3340 		break;
3341 	case CHIP_KABINI:
3342 	case CHIP_MULLINS:
3343 	default:
3344 		rdev->config.cik.max_shader_engines = 1;
3345 		rdev->config.cik.max_tile_pipes = 2;
3346 		rdev->config.cik.max_cu_per_sh = 2;
3347 		rdev->config.cik.max_sh_per_se = 1;
3348 		rdev->config.cik.max_backends_per_se = 1;
3349 		rdev->config.cik.max_texture_channel_caches = 2;
3350 		rdev->config.cik.max_gprs = 256;
3351 		rdev->config.cik.max_gs_threads = 16;
3352 		rdev->config.cik.max_hw_contexts = 8;
3353 
3354 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3355 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3356 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3357 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3358 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3359 		break;
3360 	}
3361 
3362 	/* Initialize HDP */
3363 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3364 		WREG32((0x2c14 + j), 0x00000000);
3365 		WREG32((0x2c18 + j), 0x00000000);
3366 		WREG32((0x2c1c + j), 0x00000000);
3367 		WREG32((0x2c20 + j), 0x00000000);
3368 		WREG32((0x2c24 + j), 0x00000000);
3369 	}
3370 
3371 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3372 
3373 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3374 
3375 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3376 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3377 
3378 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3379 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3380 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3381 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3382 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3383 		rdev->config.cik.mem_row_size_in_kb = 4;
3384 	/* XXX use MC settings? */
3385 	rdev->config.cik.shader_engine_tile_size = 32;
3386 	rdev->config.cik.num_gpus = 1;
3387 	rdev->config.cik.multi_gpu_tile_size = 64;
3388 
3389 	/* fix up row size */
3390 	gb_addr_config &= ~ROW_SIZE_MASK;
3391 	switch (rdev->config.cik.mem_row_size_in_kb) {
3392 	case 1:
3393 	default:
3394 		gb_addr_config |= ROW_SIZE(0);
3395 		break;
3396 	case 2:
3397 		gb_addr_config |= ROW_SIZE(1);
3398 		break;
3399 	case 4:
3400 		gb_addr_config |= ROW_SIZE(2);
3401 		break;
3402 	}
3403 
3404 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3405 	 * not have bank info, so create a custom tiling dword.
3406 	 * bits 3:0   num_pipes
3407 	 * bits 7:4   num_banks
3408 	 * bits 11:8  group_size
3409 	 * bits 15:12 row_size
3410 	 */
3411 	rdev->config.cik.tile_config = 0;
3412 	switch (rdev->config.cik.num_tile_pipes) {
3413 	case 1:
3414 		rdev->config.cik.tile_config |= (0 << 0);
3415 		break;
3416 	case 2:
3417 		rdev->config.cik.tile_config |= (1 << 0);
3418 		break;
3419 	case 4:
3420 		rdev->config.cik.tile_config |= (2 << 0);
3421 		break;
3422 	case 8:
3423 	default:
3424 		/* XXX what about 12? */
3425 		rdev->config.cik.tile_config |= (3 << 0);
3426 		break;
3427 	}
3428 	rdev->config.cik.tile_config |=
3429 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3430 	rdev->config.cik.tile_config |=
3431 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3432 	rdev->config.cik.tile_config |=
3433 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3434 
3435 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3436 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3437 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3438 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3439 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3440 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3441 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3442 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3443 
3444 	cik_tiling_mode_table_init(rdev);
3445 
3446 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3447 		     rdev->config.cik.max_sh_per_se,
3448 		     rdev->config.cik.max_backends_per_se);
3449 
3450 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
3451 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
3452 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k++) {
3453 				rdev->config.cik.active_cus +=
3454 					hweight32(cik_get_cu_active_bitmap(rdev, i, j));
3455 			}
3456 		}
3457 	}
3458 
3459 	/* set HW defaults for 3D engine */
3460 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3461 
3462 	WREG32(SX_DEBUG_1, 0x20);
3463 
3464 	WREG32(TA_CNTL_AUX, 0x00010000);
3465 
3466 	tmp = RREG32(SPI_CONFIG_CNTL);
3467 	tmp |= 0x03000000;
3468 	WREG32(SPI_CONFIG_CNTL, tmp);
3469 
3470 	WREG32(SQ_CONFIG, 1);
3471 
3472 	WREG32(DB_DEBUG, 0);
3473 
3474 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3475 	tmp |= 0x00000400;
3476 	WREG32(DB_DEBUG2, tmp);
3477 
3478 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3479 	tmp |= 0x00020200;
3480 	WREG32(DB_DEBUG3, tmp);
3481 
3482 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3483 	tmp |= 0x00018208;
3484 	WREG32(CB_HW_CONTROL, tmp);
3485 
3486 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3487 
3488 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3489 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3490 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3491 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3492 
3493 	WREG32(VGT_NUM_INSTANCES, 1);
3494 
3495 	WREG32(CP_PERFMON_CNTL, 0);
3496 
3497 	WREG32(SQ_CONFIG, 0);
3498 
3499 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3500 					  FORCE_EOV_MAX_REZ_CNT(255)));
3501 
3502 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3503 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3504 
3505 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3506 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3507 
3508 	tmp = RREG32(HDP_MISC_CNTL);
3509 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3510 	WREG32(HDP_MISC_CNTL, tmp);
3511 
3512 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3513 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3514 
3515 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3516 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3517 
3518 	udelay(50);
3519 }
3520 
3521 /*
3522  * GPU scratch registers helpers function.
3523  */
3524 /**
3525  * cik_scratch_init - setup driver info for CP scratch regs
3526  *
3527  * @rdev: radeon_device pointer
3528  *
3529  * Set up the number and offset of the CP scratch registers.
3530  * NOTE: use of CP scratch registers is a legacy inferface and
3531  * is not used by default on newer asics (r6xx+).  On newer asics,
3532  * memory buffers are used for fences rather than scratch regs.
3533  */
3534 static void cik_scratch_init(struct radeon_device *rdev)
3535 {
3536 	int i;
3537 
3538 	rdev->scratch.num_reg = 7;
3539 	rdev->scratch.reg_base = SCRATCH_REG0;
3540 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3541 		rdev->scratch.free[i] = true;
3542 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3543 	}
3544 }
3545 
3546 /**
3547  * cik_ring_test - basic gfx ring test
3548  *
3549  * @rdev: radeon_device pointer
3550  * @ring: radeon_ring structure holding ring information
3551  *
3552  * Allocate a scratch register and write to it using the gfx ring (CIK).
3553  * Provides a basic gfx ring test to verify that the ring is working.
3554  * Used by cik_cp_gfx_resume();
3555  * Returns 0 on success, error on failure.
3556  */
3557 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3558 {
3559 	uint32_t scratch;
3560 	uint32_t tmp = 0;
3561 	unsigned i;
3562 	int r;
3563 
3564 	r = radeon_scratch_get(rdev, &scratch);
3565 	if (r) {
3566 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3567 		return r;
3568 	}
3569 	WREG32(scratch, 0xCAFEDEAD);
3570 	r = radeon_ring_lock(rdev, ring, 3);
3571 	if (r) {
3572 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3573 		radeon_scratch_free(rdev, scratch);
3574 		return r;
3575 	}
3576 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3577 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3578 	radeon_ring_write(ring, 0xDEADBEEF);
3579 	radeon_ring_unlock_commit(rdev, ring);
3580 
3581 	for (i = 0; i < rdev->usec_timeout; i++) {
3582 		tmp = RREG32(scratch);
3583 		if (tmp == 0xDEADBEEF)
3584 			break;
3585 		DRM_UDELAY(1);
3586 	}
3587 	if (i < rdev->usec_timeout) {
3588 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3589 	} else {
3590 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3591 			  ring->idx, scratch, tmp);
3592 		r = -EINVAL;
3593 	}
3594 	radeon_scratch_free(rdev, scratch);
3595 	return r;
3596 }
3597 
3598 /**
3599  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3600  *
3601  * @rdev: radeon_device pointer
3602  * @ridx: radeon ring index
3603  *
3604  * Emits an hdp flush on the cp.
3605  */
3606 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3607 				       int ridx)
3608 {
3609 	struct radeon_ring *ring = &rdev->ring[ridx];
3610 	u32 ref_and_mask;
3611 
3612 	switch (ring->idx) {
3613 	case CAYMAN_RING_TYPE_CP1_INDEX:
3614 	case CAYMAN_RING_TYPE_CP2_INDEX:
3615 	default:
3616 		switch (ring->me) {
3617 		case 0:
3618 			ref_and_mask = CP2 << ring->pipe;
3619 			break;
3620 		case 1:
3621 			ref_and_mask = CP6 << ring->pipe;
3622 			break;
3623 		default:
3624 			return;
3625 		}
3626 		break;
3627 	case RADEON_RING_TYPE_GFX_INDEX:
3628 		ref_and_mask = CP0;
3629 		break;
3630 	}
3631 
3632 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3633 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3634 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3635 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3636 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3637 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3638 	radeon_ring_write(ring, ref_and_mask);
3639 	radeon_ring_write(ring, ref_and_mask);
3640 	radeon_ring_write(ring, 0x20); /* poll interval */
3641 }
3642 
3643 /**
3644  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3645  *
3646  * @rdev: radeon_device pointer
3647  * @fence: radeon fence object
3648  *
3649  * Emits a fence sequnce number on the gfx ring and flushes
3650  * GPU caches.
3651  */
3652 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3653 			     struct radeon_fence *fence)
3654 {
3655 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3656 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3657 
3658 	/* EVENT_WRITE_EOP - flush caches, send int */
3659 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3660 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3661 				 EOP_TC_ACTION_EN |
3662 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3663 				 EVENT_INDEX(5)));
3664 	radeon_ring_write(ring, addr & 0xfffffffc);
3665 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3666 	radeon_ring_write(ring, fence->seq);
3667 	radeon_ring_write(ring, 0);
3668 	/* HDP flush */
3669 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3670 }
3671 
3672 /**
3673  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3674  *
3675  * @rdev: radeon_device pointer
3676  * @fence: radeon fence object
3677  *
3678  * Emits a fence sequnce number on the compute ring and flushes
3679  * GPU caches.
3680  */
3681 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3682 				 struct radeon_fence *fence)
3683 {
3684 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3685 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3686 
3687 	/* RELEASE_MEM - flush caches, send int */
3688 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3689 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3690 				 EOP_TC_ACTION_EN |
3691 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3692 				 EVENT_INDEX(5)));
3693 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3694 	radeon_ring_write(ring, addr & 0xfffffffc);
3695 	radeon_ring_write(ring, upper_32_bits(addr));
3696 	radeon_ring_write(ring, fence->seq);
3697 	radeon_ring_write(ring, 0);
3698 	/* HDP flush */
3699 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3700 }
3701 
3702 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3703 			     struct radeon_ring *ring,
3704 			     struct radeon_semaphore *semaphore,
3705 			     bool emit_wait)
3706 {
3707 	uint64_t addr = semaphore->gpu_addr;
3708 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3709 
3710 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3711 	radeon_ring_write(ring, lower_32_bits(addr));
3712 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3713 
3714 	return true;
3715 }
3716 
3717 /**
3718  * cik_copy_cpdma - copy pages using the CP DMA engine
3719  *
3720  * @rdev: radeon_device pointer
3721  * @src_offset: src GPU address
3722  * @dst_offset: dst GPU address
3723  * @num_gpu_pages: number of GPU pages to xfer
3724  * @fence: radeon fence object
3725  *
3726  * Copy GPU paging using the CP DMA engine (CIK+).
3727  * Used by the radeon ttm implementation to move pages if
3728  * registered as the asic copy callback.
3729  */
3730 int cik_copy_cpdma(struct radeon_device *rdev,
3731 		   uint64_t src_offset, uint64_t dst_offset,
3732 		   unsigned num_gpu_pages,
3733 		   struct radeon_fence **fence)
3734 {
3735 	struct radeon_semaphore *sem = NULL;
3736 	int ring_index = rdev->asic->copy.blit_ring_index;
3737 	struct radeon_ring *ring = &rdev->ring[ring_index];
3738 	u32 size_in_bytes, cur_size_in_bytes, control;
3739 	int i, num_loops;
3740 	int r = 0;
3741 
3742 	r = radeon_semaphore_create(rdev, &sem);
3743 	if (r) {
3744 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3745 		return r;
3746 	}
3747 
3748 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3749 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3750 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3751 	if (r) {
3752 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3753 		radeon_semaphore_free(rdev, &sem, NULL);
3754 		return r;
3755 	}
3756 
3757 	radeon_semaphore_sync_to(sem, *fence);
3758 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3759 
3760 	for (i = 0; i < num_loops; i++) {
3761 		cur_size_in_bytes = size_in_bytes;
3762 		if (cur_size_in_bytes > 0x1fffff)
3763 			cur_size_in_bytes = 0x1fffff;
3764 		size_in_bytes -= cur_size_in_bytes;
3765 		control = 0;
3766 		if (size_in_bytes == 0)
3767 			control |= PACKET3_DMA_DATA_CP_SYNC;
3768 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3769 		radeon_ring_write(ring, control);
3770 		radeon_ring_write(ring, lower_32_bits(src_offset));
3771 		radeon_ring_write(ring, upper_32_bits(src_offset));
3772 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3773 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3774 		radeon_ring_write(ring, cur_size_in_bytes);
3775 		src_offset += cur_size_in_bytes;
3776 		dst_offset += cur_size_in_bytes;
3777 	}
3778 
3779 	r = radeon_fence_emit(rdev, fence, ring->idx);
3780 	if (r) {
3781 		radeon_ring_unlock_undo(rdev, ring);
3782 		radeon_semaphore_free(rdev, &sem, NULL);
3783 		return r;
3784 	}
3785 
3786 	radeon_ring_unlock_commit(rdev, ring);
3787 	radeon_semaphore_free(rdev, &sem, *fence);
3788 
3789 	return r;
3790 }
3791 
3792 /*
3793  * IB stuff
3794  */
3795 /**
3796  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3797  *
3798  * @rdev: radeon_device pointer
3799  * @ib: radeon indirect buffer object
3800  *
3801  * Emits an DE (drawing engine) or CE (constant engine) IB
3802  * on the gfx ring.  IBs are usually generated by userspace
3803  * acceleration drivers and submitted to the kernel for
3804  * sheduling on the ring.  This function schedules the IB
3805  * on the gfx ring for execution by the GPU.
3806  */
3807 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3808 {
3809 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3810 	u32 header, control = INDIRECT_BUFFER_VALID;
3811 
3812 	if (ib->is_const_ib) {
3813 		/* set switch buffer packet before const IB */
3814 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3815 		radeon_ring_write(ring, 0);
3816 
3817 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3818 	} else {
3819 		u32 next_rptr;
3820 		if (ring->rptr_save_reg) {
3821 			next_rptr = ring->wptr + 3 + 4;
3822 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3823 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3824 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3825 			radeon_ring_write(ring, next_rptr);
3826 		} else if (rdev->wb.enabled) {
3827 			next_rptr = ring->wptr + 5 + 4;
3828 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3829 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3830 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3831 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3832 			radeon_ring_write(ring, next_rptr);
3833 		}
3834 
3835 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3836 	}
3837 
3838 	control |= ib->length_dw |
3839 		(ib->vm ? (ib->vm->id << 24) : 0);
3840 
3841 	radeon_ring_write(ring, header);
3842 	radeon_ring_write(ring,
3843 #ifdef __BIG_ENDIAN
3844 			  (2 << 0) |
3845 #endif
3846 			  (ib->gpu_addr & 0xFFFFFFFC));
3847 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3848 	radeon_ring_write(ring, control);
3849 }
3850 
3851 /**
3852  * cik_ib_test - basic gfx ring IB test
3853  *
3854  * @rdev: radeon_device pointer
3855  * @ring: radeon_ring structure holding ring information
3856  *
3857  * Allocate an IB and execute it on the gfx ring (CIK).
3858  * Provides a basic gfx ring test to verify that IBs are working.
3859  * Returns 0 on success, error on failure.
3860  */
3861 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3862 {
3863 	struct radeon_ib ib;
3864 	uint32_t scratch;
3865 	uint32_t tmp = 0;
3866 	unsigned i;
3867 	int r;
3868 
3869 	r = radeon_scratch_get(rdev, &scratch);
3870 	if (r) {
3871 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3872 		return r;
3873 	}
3874 	WREG32(scratch, 0xCAFEDEAD);
3875 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3876 	if (r) {
3877 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3878 		radeon_scratch_free(rdev, scratch);
3879 		return r;
3880 	}
3881 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3882 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3883 	ib.ptr[2] = 0xDEADBEEF;
3884 	ib.length_dw = 3;
3885 	r = radeon_ib_schedule(rdev, &ib, NULL);
3886 	if (r) {
3887 		radeon_scratch_free(rdev, scratch);
3888 		radeon_ib_free(rdev, &ib);
3889 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3890 		return r;
3891 	}
3892 	r = radeon_fence_wait(ib.fence, false);
3893 	if (r) {
3894 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3895 		radeon_scratch_free(rdev, scratch);
3896 		radeon_ib_free(rdev, &ib);
3897 		return r;
3898 	}
3899 	for (i = 0; i < rdev->usec_timeout; i++) {
3900 		tmp = RREG32(scratch);
3901 		if (tmp == 0xDEADBEEF)
3902 			break;
3903 		DRM_UDELAY(1);
3904 	}
3905 	if (i < rdev->usec_timeout) {
3906 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3907 	} else {
3908 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3909 			  scratch, tmp);
3910 		r = -EINVAL;
3911 	}
3912 	radeon_scratch_free(rdev, scratch);
3913 	radeon_ib_free(rdev, &ib);
3914 	return r;
3915 }
3916 
3917 /*
3918  * CP.
3919  * On CIK, gfx and compute now have independant command processors.
3920  *
3921  * GFX
3922  * Gfx consists of a single ring and can process both gfx jobs and
3923  * compute jobs.  The gfx CP consists of three microengines (ME):
3924  * PFP - Pre-Fetch Parser
3925  * ME - Micro Engine
3926  * CE - Constant Engine
3927  * The PFP and ME make up what is considered the Drawing Engine (DE).
3928  * The CE is an asynchronous engine used for updating buffer desciptors
3929  * used by the DE so that they can be loaded into cache in parallel
3930  * while the DE is processing state update packets.
3931  *
3932  * Compute
3933  * The compute CP consists of two microengines (ME):
3934  * MEC1 - Compute MicroEngine 1
3935  * MEC2 - Compute MicroEngine 2
3936  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3937  * The queues are exposed to userspace and are programmed directly
3938  * by the compute runtime.
3939  */
3940 /**
3941  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3942  *
3943  * @rdev: radeon_device pointer
3944  * @enable: enable or disable the MEs
3945  *
3946  * Halts or unhalts the gfx MEs.
3947  */
3948 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3949 {
3950 	if (enable)
3951 		WREG32(CP_ME_CNTL, 0);
3952 	else {
3953 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3954 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3955 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3956 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3957 	}
3958 	udelay(50);
3959 }
3960 
3961 /**
3962  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3963  *
3964  * @rdev: radeon_device pointer
3965  *
3966  * Loads the gfx PFP, ME, and CE ucode.
3967  * Returns 0 for success, -EINVAL if the ucode is not available.
3968  */
3969 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3970 {
3971 	const __be32 *fw_data;
3972 	int i;
3973 
3974 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3975 		return -EINVAL;
3976 
3977 	cik_cp_gfx_enable(rdev, false);
3978 
3979 	/* PFP */
3980 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3981 	WREG32(CP_PFP_UCODE_ADDR, 0);
3982 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3983 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3984 	WREG32(CP_PFP_UCODE_ADDR, 0);
3985 
3986 	/* CE */
3987 	fw_data = (const __be32 *)rdev->ce_fw->data;
3988 	WREG32(CP_CE_UCODE_ADDR, 0);
3989 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3990 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3991 	WREG32(CP_CE_UCODE_ADDR, 0);
3992 
3993 	/* ME */
3994 	fw_data = (const __be32 *)rdev->me_fw->data;
3995 	WREG32(CP_ME_RAM_WADDR, 0);
3996 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3997 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3998 	WREG32(CP_ME_RAM_WADDR, 0);
3999 
4000 	WREG32(CP_PFP_UCODE_ADDR, 0);
4001 	WREG32(CP_CE_UCODE_ADDR, 0);
4002 	WREG32(CP_ME_RAM_WADDR, 0);
4003 	WREG32(CP_ME_RAM_RADDR, 0);
4004 	return 0;
4005 }
4006 
4007 /**
4008  * cik_cp_gfx_start - start the gfx ring
4009  *
4010  * @rdev: radeon_device pointer
4011  *
4012  * Enables the ring and loads the clear state context and other
4013  * packets required to init the ring.
4014  * Returns 0 for success, error for failure.
4015  */
4016 static int cik_cp_gfx_start(struct radeon_device *rdev)
4017 {
4018 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4019 	int r, i;
4020 
4021 	/* init the CP */
4022 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
4023 	WREG32(CP_ENDIAN_SWAP, 0);
4024 	WREG32(CP_DEVICE_ID, 1);
4025 
4026 	cik_cp_gfx_enable(rdev, true);
4027 
4028 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
4029 	if (r) {
4030 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
4031 		return r;
4032 	}
4033 
4034 	/* init the CE partitions.  CE only used for gfx on CIK */
4035 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
4036 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
4037 	radeon_ring_write(ring, 0xc000);
4038 	radeon_ring_write(ring, 0xc000);
4039 
4040 	/* setup clear context state */
4041 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4042 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
4043 
4044 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
4045 	radeon_ring_write(ring, 0x80000000);
4046 	radeon_ring_write(ring, 0x80000000);
4047 
4048 	for (i = 0; i < cik_default_size; i++)
4049 		radeon_ring_write(ring, cik_default_state[i]);
4050 
4051 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
4052 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
4053 
4054 	/* set clear context state */
4055 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
4056 	radeon_ring_write(ring, 0);
4057 
4058 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
4059 	radeon_ring_write(ring, 0x00000316);
4060 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
4061 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
4062 
4063 	radeon_ring_unlock_commit(rdev, ring);
4064 
4065 	return 0;
4066 }
4067 
4068 /**
4069  * cik_cp_gfx_fini - stop the gfx ring
4070  *
4071  * @rdev: radeon_device pointer
4072  *
4073  * Stop the gfx ring and tear down the driver ring
4074  * info.
4075  */
4076 static void cik_cp_gfx_fini(struct radeon_device *rdev)
4077 {
4078 	cik_cp_gfx_enable(rdev, false);
4079 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4080 }
4081 
4082 /**
4083  * cik_cp_gfx_resume - setup the gfx ring buffer registers
4084  *
4085  * @rdev: radeon_device pointer
4086  *
4087  * Program the location and size of the gfx ring buffer
4088  * and test it to make sure it's working.
4089  * Returns 0 for success, error for failure.
4090  */
4091 static int cik_cp_gfx_resume(struct radeon_device *rdev)
4092 {
4093 	struct radeon_ring *ring;
4094 	u32 tmp;
4095 	u32 rb_bufsz;
4096 	u64 rb_addr;
4097 	int r;
4098 
4099 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
4100 	if (rdev->family != CHIP_HAWAII)
4101 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
4102 
4103 	/* Set the write pointer delay */
4104 	WREG32(CP_RB_WPTR_DELAY, 0);
4105 
4106 	/* set the RB to use vmid 0 */
4107 	WREG32(CP_RB_VMID, 0);
4108 
4109 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4110 
4111 	/* ring 0 - compute and gfx */
4112 	/* Set ring buffer size */
4113 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4114 	rb_bufsz = order_base_2(ring->ring_size / 8);
4115 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4116 #ifdef __BIG_ENDIAN
4117 	tmp |= BUF_SWAP_32BIT;
4118 #endif
4119 	WREG32(CP_RB0_CNTL, tmp);
4120 
4121 	/* Initialize the ring buffer's read and write pointers */
4122 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4123 	ring->wptr = 0;
4124 	WREG32(CP_RB0_WPTR, ring->wptr);
4125 
4126 	/* set the wb address wether it's enabled or not */
4127 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4128 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4129 
4130 	/* scratch register shadowing is no longer supported */
4131 	WREG32(SCRATCH_UMSK, 0);
4132 
4133 	if (!rdev->wb.enabled)
4134 		tmp |= RB_NO_UPDATE;
4135 
4136 	mdelay(1);
4137 	WREG32(CP_RB0_CNTL, tmp);
4138 
4139 	rb_addr = ring->gpu_addr >> 8;
4140 	WREG32(CP_RB0_BASE, rb_addr);
4141 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4142 
4143 	/* start the ring */
4144 	cik_cp_gfx_start(rdev);
4145 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4146 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4147 	if (r) {
4148 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4149 		return r;
4150 	}
4151 
4152 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4153 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4154 
4155 	return 0;
4156 }
4157 
4158 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4159 		     struct radeon_ring *ring)
4160 {
4161 	u32 rptr;
4162 
4163 	if (rdev->wb.enabled)
4164 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4165 	else
4166 		rptr = RREG32(CP_RB0_RPTR);
4167 
4168 	return rptr;
4169 }
4170 
4171 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4172 		     struct radeon_ring *ring)
4173 {
4174 	u32 wptr;
4175 
4176 	wptr = RREG32(CP_RB0_WPTR);
4177 
4178 	return wptr;
4179 }
4180 
4181 void cik_gfx_set_wptr(struct radeon_device *rdev,
4182 		      struct radeon_ring *ring)
4183 {
4184 	WREG32(CP_RB0_WPTR, ring->wptr);
4185 	(void)RREG32(CP_RB0_WPTR);
4186 }
4187 
4188 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4189 			 struct radeon_ring *ring)
4190 {
4191 	u32 rptr;
4192 
4193 	if (rdev->wb.enabled) {
4194 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4195 	} else {
4196 		mutex_lock(&rdev->srbm_mutex);
4197 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4198 		rptr = RREG32(CP_HQD_PQ_RPTR);
4199 		cik_srbm_select(rdev, 0, 0, 0, 0);
4200 		mutex_unlock(&rdev->srbm_mutex);
4201 	}
4202 
4203 	return rptr;
4204 }
4205 
4206 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4207 			 struct radeon_ring *ring)
4208 {
4209 	u32 wptr;
4210 
4211 	if (rdev->wb.enabled) {
4212 		/* XXX check if swapping is necessary on BE */
4213 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4214 	} else {
4215 		mutex_lock(&rdev->srbm_mutex);
4216 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4217 		wptr = RREG32(CP_HQD_PQ_WPTR);
4218 		cik_srbm_select(rdev, 0, 0, 0, 0);
4219 		mutex_unlock(&rdev->srbm_mutex);
4220 	}
4221 
4222 	return wptr;
4223 }
4224 
4225 void cik_compute_set_wptr(struct radeon_device *rdev,
4226 			  struct radeon_ring *ring)
4227 {
4228 	/* XXX check if swapping is necessary on BE */
4229 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4230 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4231 }
4232 
4233 /**
4234  * cik_cp_compute_enable - enable/disable the compute CP MEs
4235  *
4236  * @rdev: radeon_device pointer
4237  * @enable: enable or disable the MEs
4238  *
4239  * Halts or unhalts the compute MEs.
4240  */
4241 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4242 {
4243 	if (enable)
4244 		WREG32(CP_MEC_CNTL, 0);
4245 	else {
4246 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4247 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
4248 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
4249 	}
4250 	udelay(50);
4251 }
4252 
4253 /**
4254  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4255  *
4256  * @rdev: radeon_device pointer
4257  *
4258  * Loads the compute MEC1&2 ucode.
4259  * Returns 0 for success, -EINVAL if the ucode is not available.
4260  */
4261 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4262 {
4263 	const __be32 *fw_data;
4264 	int i;
4265 
4266 	if (!rdev->mec_fw)
4267 		return -EINVAL;
4268 
4269 	cik_cp_compute_enable(rdev, false);
4270 
4271 	/* MEC1 */
4272 	fw_data = (const __be32 *)rdev->mec_fw->data;
4273 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4274 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4275 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4276 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4277 
4278 	if (rdev->family == CHIP_KAVERI) {
4279 		/* MEC2 */
4280 		fw_data = (const __be32 *)rdev->mec_fw->data;
4281 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4282 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4283 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4284 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4285 	}
4286 
4287 	return 0;
4288 }
4289 
4290 /**
4291  * cik_cp_compute_start - start the compute queues
4292  *
4293  * @rdev: radeon_device pointer
4294  *
4295  * Enable the compute queues.
4296  * Returns 0 for success, error for failure.
4297  */
4298 static int cik_cp_compute_start(struct radeon_device *rdev)
4299 {
4300 	cik_cp_compute_enable(rdev, true);
4301 
4302 	return 0;
4303 }
4304 
4305 /**
4306  * cik_cp_compute_fini - stop the compute queues
4307  *
4308  * @rdev: radeon_device pointer
4309  *
4310  * Stop the compute queues and tear down the driver queue
4311  * info.
4312  */
4313 static void cik_cp_compute_fini(struct radeon_device *rdev)
4314 {
4315 	int i, idx, r;
4316 
4317 	cik_cp_compute_enable(rdev, false);
4318 
4319 	for (i = 0; i < 2; i++) {
4320 		if (i == 0)
4321 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4322 		else
4323 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4324 
4325 		if (rdev->ring[idx].mqd_obj) {
4326 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4327 			if (unlikely(r != 0))
4328 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4329 
4330 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4331 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4332 
4333 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4334 			rdev->ring[idx].mqd_obj = NULL;
4335 		}
4336 	}
4337 }
4338 
4339 static void cik_mec_fini(struct radeon_device *rdev)
4340 {
4341 	int r;
4342 
4343 	if (rdev->mec.hpd_eop_obj) {
4344 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4345 		if (unlikely(r != 0))
4346 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4347 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4348 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4349 
4350 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4351 		rdev->mec.hpd_eop_obj = NULL;
4352 	}
4353 }
4354 
4355 #define MEC_HPD_SIZE 2048
4356 
4357 static int cik_mec_init(struct radeon_device *rdev)
4358 {
4359 	int r;
4360 	u32 *hpd;
4361 
4362 	/*
4363 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4364 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4365 	 */
4366 	if (rdev->family == CHIP_KAVERI)
4367 		rdev->mec.num_mec = 2;
4368 	else
4369 		rdev->mec.num_mec = 1;
4370 	rdev->mec.num_pipe = 4;
4371 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4372 
4373 	if (rdev->mec.hpd_eop_obj == NULL) {
4374 		r = radeon_bo_create(rdev,
4375 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4376 				     PAGE_SIZE, true,
4377 				     RADEON_GEM_DOMAIN_GTT, NULL,
4378 				     &rdev->mec.hpd_eop_obj);
4379 		if (r) {
4380 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4381 			return r;
4382 		}
4383 	}
4384 
4385 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4386 	if (unlikely(r != 0)) {
4387 		cik_mec_fini(rdev);
4388 		return r;
4389 	}
4390 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4391 			  &rdev->mec.hpd_eop_gpu_addr);
4392 	if (r) {
4393 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4394 		cik_mec_fini(rdev);
4395 		return r;
4396 	}
4397 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4398 	if (r) {
4399 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4400 		cik_mec_fini(rdev);
4401 		return r;
4402 	}
4403 
4404 	/* clear memory.  Not sure if this is required or not */
4405 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4406 
4407 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4408 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4409 
4410 	return 0;
4411 }
4412 
4413 struct hqd_registers
4414 {
4415 	u32 cp_mqd_base_addr;
4416 	u32 cp_mqd_base_addr_hi;
4417 	u32 cp_hqd_active;
4418 	u32 cp_hqd_vmid;
4419 	u32 cp_hqd_persistent_state;
4420 	u32 cp_hqd_pipe_priority;
4421 	u32 cp_hqd_queue_priority;
4422 	u32 cp_hqd_quantum;
4423 	u32 cp_hqd_pq_base;
4424 	u32 cp_hqd_pq_base_hi;
4425 	u32 cp_hqd_pq_rptr;
4426 	u32 cp_hqd_pq_rptr_report_addr;
4427 	u32 cp_hqd_pq_rptr_report_addr_hi;
4428 	u32 cp_hqd_pq_wptr_poll_addr;
4429 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4430 	u32 cp_hqd_pq_doorbell_control;
4431 	u32 cp_hqd_pq_wptr;
4432 	u32 cp_hqd_pq_control;
4433 	u32 cp_hqd_ib_base_addr;
4434 	u32 cp_hqd_ib_base_addr_hi;
4435 	u32 cp_hqd_ib_rptr;
4436 	u32 cp_hqd_ib_control;
4437 	u32 cp_hqd_iq_timer;
4438 	u32 cp_hqd_iq_rptr;
4439 	u32 cp_hqd_dequeue_request;
4440 	u32 cp_hqd_dma_offload;
4441 	u32 cp_hqd_sema_cmd;
4442 	u32 cp_hqd_msg_type;
4443 	u32 cp_hqd_atomic0_preop_lo;
4444 	u32 cp_hqd_atomic0_preop_hi;
4445 	u32 cp_hqd_atomic1_preop_lo;
4446 	u32 cp_hqd_atomic1_preop_hi;
4447 	u32 cp_hqd_hq_scheduler0;
4448 	u32 cp_hqd_hq_scheduler1;
4449 	u32 cp_mqd_control;
4450 };
4451 
4452 struct bonaire_mqd
4453 {
4454 	u32 header;
4455 	u32 dispatch_initiator;
4456 	u32 dimensions[3];
4457 	u32 start_idx[3];
4458 	u32 num_threads[3];
4459 	u32 pipeline_stat_enable;
4460 	u32 perf_counter_enable;
4461 	u32 pgm[2];
4462 	u32 tba[2];
4463 	u32 tma[2];
4464 	u32 pgm_rsrc[2];
4465 	u32 vmid;
4466 	u32 resource_limits;
4467 	u32 static_thread_mgmt01[2];
4468 	u32 tmp_ring_size;
4469 	u32 static_thread_mgmt23[2];
4470 	u32 restart[3];
4471 	u32 thread_trace_enable;
4472 	u32 reserved1;
4473 	u32 user_data[16];
4474 	u32 vgtcs_invoke_count[2];
4475 	struct hqd_registers queue_state;
4476 	u32 dequeue_cntr;
4477 	u32 interrupt_queue[64];
4478 };
4479 
4480 /**
4481  * cik_cp_compute_resume - setup the compute queue registers
4482  *
4483  * @rdev: radeon_device pointer
4484  *
4485  * Program the compute queues and test them to make sure they
4486  * are working.
4487  * Returns 0 for success, error for failure.
4488  */
4489 static int cik_cp_compute_resume(struct radeon_device *rdev)
4490 {
4491 	int r, i, idx;
4492 	u32 tmp;
4493 	bool use_doorbell = true;
4494 	u64 hqd_gpu_addr;
4495 	u64 mqd_gpu_addr;
4496 	u64 eop_gpu_addr;
4497 	u64 wb_gpu_addr;
4498 	u32 *buf;
4499 	struct bonaire_mqd *mqd;
4500 
4501 	r = cik_cp_compute_start(rdev);
4502 	if (r)
4503 		return r;
4504 
4505 	/* fix up chicken bits */
4506 	tmp = RREG32(CP_CPF_DEBUG);
4507 	tmp |= (1 << 23);
4508 	WREG32(CP_CPF_DEBUG, tmp);
4509 
4510 	/* init the pipes */
4511 	mutex_lock(&rdev->srbm_mutex);
4512 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4513 		int me = (i < 4) ? 1 : 2;
4514 		int pipe = (i < 4) ? i : (i - 4);
4515 
4516 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4517 
4518 		cik_srbm_select(rdev, me, pipe, 0, 0);
4519 
4520 		/* write the EOP addr */
4521 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4522 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4523 
4524 		/* set the VMID assigned */
4525 		WREG32(CP_HPD_EOP_VMID, 0);
4526 
4527 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4528 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4529 		tmp &= ~EOP_SIZE_MASK;
4530 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4531 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4532 	}
4533 	cik_srbm_select(rdev, 0, 0, 0, 0);
4534 	mutex_unlock(&rdev->srbm_mutex);
4535 
4536 	/* init the queues.  Just two for now. */
4537 	for (i = 0; i < 2; i++) {
4538 		if (i == 0)
4539 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4540 		else
4541 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4542 
4543 		if (rdev->ring[idx].mqd_obj == NULL) {
4544 			r = radeon_bo_create(rdev,
4545 					     sizeof(struct bonaire_mqd),
4546 					     PAGE_SIZE, true,
4547 					     RADEON_GEM_DOMAIN_GTT, NULL,
4548 					     &rdev->ring[idx].mqd_obj);
4549 			if (r) {
4550 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4551 				return r;
4552 			}
4553 		}
4554 
4555 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4556 		if (unlikely(r != 0)) {
4557 			cik_cp_compute_fini(rdev);
4558 			return r;
4559 		}
4560 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4561 				  &mqd_gpu_addr);
4562 		if (r) {
4563 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4564 			cik_cp_compute_fini(rdev);
4565 			return r;
4566 		}
4567 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4568 		if (r) {
4569 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4570 			cik_cp_compute_fini(rdev);
4571 			return r;
4572 		}
4573 
4574 		/* init the mqd struct */
4575 		memset(buf, 0, sizeof(struct bonaire_mqd));
4576 
4577 		mqd = (struct bonaire_mqd *)buf;
4578 		mqd->header = 0xC0310800;
4579 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4580 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4581 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4582 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4583 
4584 		mutex_lock(&rdev->srbm_mutex);
4585 		cik_srbm_select(rdev, rdev->ring[idx].me,
4586 				rdev->ring[idx].pipe,
4587 				rdev->ring[idx].queue, 0);
4588 
4589 		/* disable wptr polling */
4590 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4591 		tmp &= ~WPTR_POLL_EN;
4592 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4593 
4594 		/* enable doorbell? */
4595 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4596 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4597 		if (use_doorbell)
4598 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4599 		else
4600 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4601 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4602 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4603 
4604 		/* disable the queue if it's active */
4605 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4606 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4607 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4608 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4609 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4610 			for (i = 0; i < rdev->usec_timeout; i++) {
4611 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4612 					break;
4613 				udelay(1);
4614 			}
4615 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4616 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4617 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4618 		}
4619 
4620 		/* set the pointer to the MQD */
4621 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4622 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4623 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4624 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4625 		/* set MQD vmid to 0 */
4626 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4627 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4628 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4629 
4630 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4631 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4632 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4633 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4634 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4635 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4636 
4637 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4638 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4639 		mqd->queue_state.cp_hqd_pq_control &=
4640 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4641 
4642 		mqd->queue_state.cp_hqd_pq_control |=
4643 			order_base_2(rdev->ring[idx].ring_size / 8);
4644 		mqd->queue_state.cp_hqd_pq_control |=
4645 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4646 #ifdef __BIG_ENDIAN
4647 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4648 #endif
4649 		mqd->queue_state.cp_hqd_pq_control &=
4650 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4651 		mqd->queue_state.cp_hqd_pq_control |=
4652 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4653 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4654 
4655 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4656 		if (i == 0)
4657 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4658 		else
4659 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4660 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4661 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4662 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4663 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4664 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4665 
4666 		/* set the wb address wether it's enabled or not */
4667 		if (i == 0)
4668 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4669 		else
4670 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4671 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4672 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4673 			upper_32_bits(wb_gpu_addr) & 0xffff;
4674 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4675 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4676 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4677 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4678 
4679 		/* enable the doorbell if requested */
4680 		if (use_doorbell) {
4681 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4682 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4683 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4684 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4685 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4686 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4687 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4688 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4689 
4690 		} else {
4691 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4692 		}
4693 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4694 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4695 
4696 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4697 		rdev->ring[idx].wptr = 0;
4698 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4699 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4700 		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);
4701 
4702 		/* set the vmid for the queue */
4703 		mqd->queue_state.cp_hqd_vmid = 0;
4704 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4705 
4706 		/* activate the queue */
4707 		mqd->queue_state.cp_hqd_active = 1;
4708 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4709 
4710 		cik_srbm_select(rdev, 0, 0, 0, 0);
4711 		mutex_unlock(&rdev->srbm_mutex);
4712 
4713 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4714 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4715 
4716 		rdev->ring[idx].ready = true;
4717 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4718 		if (r)
4719 			rdev->ring[idx].ready = false;
4720 	}
4721 
4722 	return 0;
4723 }
4724 
4725 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4726 {
4727 	cik_cp_gfx_enable(rdev, enable);
4728 	cik_cp_compute_enable(rdev, enable);
4729 }
4730 
4731 static int cik_cp_load_microcode(struct radeon_device *rdev)
4732 {
4733 	int r;
4734 
4735 	r = cik_cp_gfx_load_microcode(rdev);
4736 	if (r)
4737 		return r;
4738 	r = cik_cp_compute_load_microcode(rdev);
4739 	if (r)
4740 		return r;
4741 
4742 	return 0;
4743 }
4744 
4745 static void cik_cp_fini(struct radeon_device *rdev)
4746 {
4747 	cik_cp_gfx_fini(rdev);
4748 	cik_cp_compute_fini(rdev);
4749 }
4750 
4751 static int cik_cp_resume(struct radeon_device *rdev)
4752 {
4753 	int r;
4754 
4755 	cik_enable_gui_idle_interrupt(rdev, false);
4756 
4757 	r = cik_cp_load_microcode(rdev);
4758 	if (r)
4759 		return r;
4760 
4761 	r = cik_cp_gfx_resume(rdev);
4762 	if (r)
4763 		return r;
4764 	r = cik_cp_compute_resume(rdev);
4765 	if (r)
4766 		return r;
4767 
4768 	cik_enable_gui_idle_interrupt(rdev, true);
4769 
4770 	return 0;
4771 }
4772 
4773 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4774 {
4775 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4776 		RREG32(GRBM_STATUS));
4777 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4778 		RREG32(GRBM_STATUS2));
4779 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4780 		RREG32(GRBM_STATUS_SE0));
4781 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4782 		RREG32(GRBM_STATUS_SE1));
4783 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4784 		RREG32(GRBM_STATUS_SE2));
4785 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4786 		RREG32(GRBM_STATUS_SE3));
4787 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4788 		RREG32(SRBM_STATUS));
4789 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4790 		RREG32(SRBM_STATUS2));
4791 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4792 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4793 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4794 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4795 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4796 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4797 		 RREG32(CP_STALLED_STAT1));
4798 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4799 		 RREG32(CP_STALLED_STAT2));
4800 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4801 		 RREG32(CP_STALLED_STAT3));
4802 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4803 		 RREG32(CP_CPF_BUSY_STAT));
4804 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4805 		 RREG32(CP_CPF_STALLED_STAT1));
4806 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4807 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4808 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4809 		 RREG32(CP_CPC_STALLED_STAT1));
4810 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4811 }
4812 
4813 /**
4814  * cik_gpu_check_soft_reset - check which blocks are busy
4815  *
4816  * @rdev: radeon_device pointer
4817  *
4818  * Check which blocks are busy and return the relevant reset
4819  * mask to be used by cik_gpu_soft_reset().
4820  * Returns a mask of the blocks to be reset.
4821  */
4822 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4823 {
4824 	u32 reset_mask = 0;
4825 	u32 tmp;
4826 
4827 	/* GRBM_STATUS */
4828 	tmp = RREG32(GRBM_STATUS);
4829 	if (tmp & (PA_BUSY | SC_BUSY |
4830 		   BCI_BUSY | SX_BUSY |
4831 		   TA_BUSY | VGT_BUSY |
4832 		   DB_BUSY | CB_BUSY |
4833 		   GDS_BUSY | SPI_BUSY |
4834 		   IA_BUSY | IA_BUSY_NO_DMA))
4835 		reset_mask |= RADEON_RESET_GFX;
4836 
4837 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4838 		reset_mask |= RADEON_RESET_CP;
4839 
4840 	/* GRBM_STATUS2 */
4841 	tmp = RREG32(GRBM_STATUS2);
4842 	if (tmp & RLC_BUSY)
4843 		reset_mask |= RADEON_RESET_RLC;
4844 
4845 	/* SDMA0_STATUS_REG */
4846 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4847 	if (!(tmp & SDMA_IDLE))
4848 		reset_mask |= RADEON_RESET_DMA;
4849 
4850 	/* SDMA1_STATUS_REG */
4851 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4852 	if (!(tmp & SDMA_IDLE))
4853 		reset_mask |= RADEON_RESET_DMA1;
4854 
4855 	/* SRBM_STATUS2 */
4856 	tmp = RREG32(SRBM_STATUS2);
4857 	if (tmp & SDMA_BUSY)
4858 		reset_mask |= RADEON_RESET_DMA;
4859 
4860 	if (tmp & SDMA1_BUSY)
4861 		reset_mask |= RADEON_RESET_DMA1;
4862 
4863 	/* SRBM_STATUS */
4864 	tmp = RREG32(SRBM_STATUS);
4865 
4866 	if (tmp & IH_BUSY)
4867 		reset_mask |= RADEON_RESET_IH;
4868 
4869 	if (tmp & SEM_BUSY)
4870 		reset_mask |= RADEON_RESET_SEM;
4871 
4872 	if (tmp & GRBM_RQ_PENDING)
4873 		reset_mask |= RADEON_RESET_GRBM;
4874 
4875 	if (tmp & VMC_BUSY)
4876 		reset_mask |= RADEON_RESET_VMC;
4877 
4878 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4879 		   MCC_BUSY | MCD_BUSY))
4880 		reset_mask |= RADEON_RESET_MC;
4881 
4882 	if (evergreen_is_display_hung(rdev))
4883 		reset_mask |= RADEON_RESET_DISPLAY;
4884 
4885 	/* Skip MC reset as it's mostly likely not hung, just busy */
4886 	if (reset_mask & RADEON_RESET_MC) {
4887 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4888 		reset_mask &= ~RADEON_RESET_MC;
4889 	}
4890 
4891 	return reset_mask;
4892 }
4893 
4894 /**
4895  * cik_gpu_soft_reset - soft reset GPU
4896  *
4897  * @rdev: radeon_device pointer
4898  * @reset_mask: mask of which blocks to reset
4899  *
4900  * Soft reset the blocks specified in @reset_mask.
4901  */
4902 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4903 {
4904 	struct evergreen_mc_save save;
4905 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4906 	u32 tmp;
4907 
4908 	if (reset_mask == 0)
4909 		return;
4910 
4911 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4912 
4913 	cik_print_gpu_status_regs(rdev);
4914 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4915 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4916 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4917 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4918 
4919 	/* disable CG/PG */
4920 	cik_fini_pg(rdev);
4921 	cik_fini_cg(rdev);
4922 
4923 	/* stop the rlc */
4924 	cik_rlc_stop(rdev);
4925 
4926 	/* Disable GFX parsing/prefetching */
4927 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4928 
4929 	/* Disable MEC parsing/prefetching */
4930 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4931 
4932 	if (reset_mask & RADEON_RESET_DMA) {
4933 		/* sdma0 */
4934 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4935 		tmp |= SDMA_HALT;
4936 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4937 	}
4938 	if (reset_mask & RADEON_RESET_DMA1) {
4939 		/* sdma1 */
4940 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4941 		tmp |= SDMA_HALT;
4942 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4943 	}
4944 
4945 	evergreen_mc_stop(rdev, &save);
4946 	if (evergreen_mc_wait_for_idle(rdev)) {
4947 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4948 	}
4949 
4950 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4951 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4952 
4953 	if (reset_mask & RADEON_RESET_CP) {
4954 		grbm_soft_reset |= SOFT_RESET_CP;
4955 
4956 		srbm_soft_reset |= SOFT_RESET_GRBM;
4957 	}
4958 
4959 	if (reset_mask & RADEON_RESET_DMA)
4960 		srbm_soft_reset |= SOFT_RESET_SDMA;
4961 
4962 	if (reset_mask & RADEON_RESET_DMA1)
4963 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4964 
4965 	if (reset_mask & RADEON_RESET_DISPLAY)
4966 		srbm_soft_reset |= SOFT_RESET_DC;
4967 
4968 	if (reset_mask & RADEON_RESET_RLC)
4969 		grbm_soft_reset |= SOFT_RESET_RLC;
4970 
4971 	if (reset_mask & RADEON_RESET_SEM)
4972 		srbm_soft_reset |= SOFT_RESET_SEM;
4973 
4974 	if (reset_mask & RADEON_RESET_IH)
4975 		srbm_soft_reset |= SOFT_RESET_IH;
4976 
4977 	if (reset_mask & RADEON_RESET_GRBM)
4978 		srbm_soft_reset |= SOFT_RESET_GRBM;
4979 
4980 	if (reset_mask & RADEON_RESET_VMC)
4981 		srbm_soft_reset |= SOFT_RESET_VMC;
4982 
4983 	if (!(rdev->flags & RADEON_IS_IGP)) {
4984 		if (reset_mask & RADEON_RESET_MC)
4985 			srbm_soft_reset |= SOFT_RESET_MC;
4986 	}
4987 
4988 	if (grbm_soft_reset) {
4989 		tmp = RREG32(GRBM_SOFT_RESET);
4990 		tmp |= grbm_soft_reset;
4991 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4992 		WREG32(GRBM_SOFT_RESET, tmp);
4993 		tmp = RREG32(GRBM_SOFT_RESET);
4994 
4995 		udelay(50);
4996 
4997 		tmp &= ~grbm_soft_reset;
4998 		WREG32(GRBM_SOFT_RESET, tmp);
4999 		tmp = RREG32(GRBM_SOFT_RESET);
5000 	}
5001 
5002 	if (srbm_soft_reset) {
5003 		tmp = RREG32(SRBM_SOFT_RESET);
5004 		tmp |= srbm_soft_reset;
5005 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
5006 		WREG32(SRBM_SOFT_RESET, tmp);
5007 		tmp = RREG32(SRBM_SOFT_RESET);
5008 
5009 		udelay(50);
5010 
5011 		tmp &= ~srbm_soft_reset;
5012 		WREG32(SRBM_SOFT_RESET, tmp);
5013 		tmp = RREG32(SRBM_SOFT_RESET);
5014 	}
5015 
5016 	/* Wait a little for things to settle down */
5017 	udelay(50);
5018 
5019 	evergreen_mc_resume(rdev, &save);
5020 	udelay(50);
5021 
5022 	cik_print_gpu_status_regs(rdev);
5023 }
5024 
5025 struct kv_reset_save_regs {
5026 	u32 gmcon_reng_execute;
5027 	u32 gmcon_misc;
5028 	u32 gmcon_misc3;
5029 };
5030 
5031 static void kv_save_regs_for_reset(struct radeon_device *rdev,
5032 				   struct kv_reset_save_regs *save)
5033 {
5034 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
5035 	save->gmcon_misc = RREG32(GMCON_MISC);
5036 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
5037 
5038 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
5039 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
5040 						STCTRL_STUTTER_EN));
5041 }
5042 
5043 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
5044 				      struct kv_reset_save_regs *save)
5045 {
5046 	int i;
5047 
5048 	WREG32(GMCON_PGFSM_WRITE, 0);
5049 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
5050 
5051 	for (i = 0; i < 5; i++)
5052 		WREG32(GMCON_PGFSM_WRITE, 0);
5053 
5054 	WREG32(GMCON_PGFSM_WRITE, 0);
5055 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
5056 
5057 	for (i = 0; i < 5; i++)
5058 		WREG32(GMCON_PGFSM_WRITE, 0);
5059 
5060 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
5061 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
5062 
5063 	for (i = 0; i < 5; i++)
5064 		WREG32(GMCON_PGFSM_WRITE, 0);
5065 
5066 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
5067 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
5068 
5069 	for (i = 0; i < 5; i++)
5070 		WREG32(GMCON_PGFSM_WRITE, 0);
5071 
5072 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
5073 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
5074 
5075 	for (i = 0; i < 5; i++)
5076 		WREG32(GMCON_PGFSM_WRITE, 0);
5077 
5078 	WREG32(GMCON_PGFSM_WRITE, 0);
5079 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
5080 
5081 	for (i = 0; i < 5; i++)
5082 		WREG32(GMCON_PGFSM_WRITE, 0);
5083 
5084 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
5085 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
5086 
5087 	for (i = 0; i < 5; i++)
5088 		WREG32(GMCON_PGFSM_WRITE, 0);
5089 
5090 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
5091 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
5092 
5093 	for (i = 0; i < 5; i++)
5094 		WREG32(GMCON_PGFSM_WRITE, 0);
5095 
5096 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
5097 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
5098 
5099 	for (i = 0; i < 5; i++)
5100 		WREG32(GMCON_PGFSM_WRITE, 0);
5101 
5102 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
5103 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
5104 
5105 	for (i = 0; i < 5; i++)
5106 		WREG32(GMCON_PGFSM_WRITE, 0);
5107 
5108 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
5109 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5110 
5111 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5112 	WREG32(GMCON_MISC, save->gmcon_misc);
5113 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5114 }
5115 
5116 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5117 {
5118 	struct evergreen_mc_save save;
5119 	struct kv_reset_save_regs kv_save = { 0 };
5120 	u32 tmp, i;
5121 
5122 	dev_info(rdev->dev, "GPU pci config reset\n");
5123 
5124 	/* disable dpm? */
5125 
5126 	/* disable cg/pg */
5127 	cik_fini_pg(rdev);
5128 	cik_fini_cg(rdev);
5129 
5130 	/* Disable GFX parsing/prefetching */
5131 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5132 
5133 	/* Disable MEC parsing/prefetching */
5134 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5135 
5136 	/* sdma0 */
5137 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5138 	tmp |= SDMA_HALT;
5139 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5140 	/* sdma1 */
5141 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5142 	tmp |= SDMA_HALT;
5143 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5144 	/* XXX other engines? */
5145 
5146 	/* halt the rlc, disable cp internal ints */
5147 	cik_rlc_stop(rdev);
5148 
5149 	udelay(50);
5150 
5151 	/* disable mem access */
5152 	evergreen_mc_stop(rdev, &save);
5153 	if (evergreen_mc_wait_for_idle(rdev)) {
5154 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5155 	}
5156 
5157 	if (rdev->flags & RADEON_IS_IGP)
5158 		kv_save_regs_for_reset(rdev, &kv_save);
5159 
5160 	/* disable BM */
5161 	pci_clear_master(rdev->pdev);
5162 	/* reset */
5163 	radeon_pci_config_reset(rdev);
5164 
5165 	udelay(100);
5166 
5167 	/* wait for asic to come out of reset */
5168 	for (i = 0; i < rdev->usec_timeout; i++) {
5169 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5170 			break;
5171 		udelay(1);
5172 	}
5173 
5174 	/* does asic init need to be run first??? */
5175 	if (rdev->flags & RADEON_IS_IGP)
5176 		kv_restore_regs_for_reset(rdev, &kv_save);
5177 }
5178 
5179 /**
5180  * cik_asic_reset - soft reset GPU
5181  *
5182  * @rdev: radeon_device pointer
5183  *
5184  * Look up which blocks are hung and attempt
5185  * to reset them.
5186  * Returns 0 for success.
5187  */
5188 int cik_asic_reset(struct radeon_device *rdev)
5189 {
5190 	u32 reset_mask;
5191 
5192 	reset_mask = cik_gpu_check_soft_reset(rdev);
5193 
5194 	if (reset_mask)
5195 		r600_set_bios_scratch_engine_hung(rdev, true);
5196 
5197 	/* try soft reset */
5198 	cik_gpu_soft_reset(rdev, reset_mask);
5199 
5200 	reset_mask = cik_gpu_check_soft_reset(rdev);
5201 
5202 	/* try pci config reset */
5203 	if (reset_mask && radeon_hard_reset)
5204 		cik_gpu_pci_config_reset(rdev);
5205 
5206 	reset_mask = cik_gpu_check_soft_reset(rdev);
5207 
5208 	if (!reset_mask)
5209 		r600_set_bios_scratch_engine_hung(rdev, false);
5210 
5211 	return 0;
5212 }
5213 
5214 /**
5215  * cik_gfx_is_lockup - check if the 3D engine is locked up
5216  *
5217  * @rdev: radeon_device pointer
5218  * @ring: radeon_ring structure holding ring information
5219  *
5220  * Check if the 3D engine is locked up (CIK).
5221  * Returns true if the engine is locked, false if not.
5222  */
5223 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5224 {
5225 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5226 
5227 	if (!(reset_mask & (RADEON_RESET_GFX |
5228 			    RADEON_RESET_COMPUTE |
5229 			    RADEON_RESET_CP))) {
5230 		radeon_ring_lockup_update(rdev, ring);
5231 		return false;
5232 	}
5233 	return radeon_ring_test_lockup(rdev, ring);
5234 }
5235 
5236 /* MC */
5237 /**
5238  * cik_mc_program - program the GPU memory controller
5239  *
5240  * @rdev: radeon_device pointer
5241  *
5242  * Set the location of vram, gart, and AGP in the GPU's
5243  * physical address space (CIK).
5244  */
5245 static void cik_mc_program(struct radeon_device *rdev)
5246 {
5247 	struct evergreen_mc_save save;
5248 	u32 tmp;
5249 	int i, j;
5250 
5251 	/* Initialize HDP */
5252 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5253 		WREG32((0x2c14 + j), 0x00000000);
5254 		WREG32((0x2c18 + j), 0x00000000);
5255 		WREG32((0x2c1c + j), 0x00000000);
5256 		WREG32((0x2c20 + j), 0x00000000);
5257 		WREG32((0x2c24 + j), 0x00000000);
5258 	}
5259 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5260 
5261 	evergreen_mc_stop(rdev, &save);
5262 	if (radeon_mc_wait_for_idle(rdev)) {
5263 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5264 	}
5265 	/* Lockout access through VGA aperture*/
5266 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5267 	/* Update configuration */
5268 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5269 	       rdev->mc.vram_start >> 12);
5270 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5271 	       rdev->mc.vram_end >> 12);
5272 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5273 	       rdev->vram_scratch.gpu_addr >> 12);
5274 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5275 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5276 	WREG32(MC_VM_FB_LOCATION, tmp);
5277 	/* XXX double check these! */
5278 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5279 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5280 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5281 	WREG32(MC_VM_AGP_BASE, 0);
5282 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5283 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5284 	if (radeon_mc_wait_for_idle(rdev)) {
5285 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5286 	}
5287 	evergreen_mc_resume(rdev, &save);
5288 	/* we need to own VRAM, so turn off the VGA renderer here
5289 	 * to stop it overwriting our objects */
5290 	rv515_vga_render_disable(rdev);
5291 }
5292 
5293 /**
5294  * cik_mc_init - initialize the memory controller driver params
5295  *
5296  * @rdev: radeon_device pointer
5297  *
5298  * Look up the amount of vram, vram width, and decide how to place
5299  * vram and gart within the GPU's physical address space (CIK).
5300  * Returns 0 for success.
5301  */
5302 static int cik_mc_init(struct radeon_device *rdev)
5303 {
5304 	u32 tmp;
5305 	int chansize, numchan;
5306 
5307 	/* Get VRAM informations */
5308 	rdev->mc.vram_is_ddr = true;
5309 	tmp = RREG32(MC_ARB_RAMCFG);
5310 	if (tmp & CHANSIZE_MASK) {
5311 		chansize = 64;
5312 	} else {
5313 		chansize = 32;
5314 	}
5315 	tmp = RREG32(MC_SHARED_CHMAP);
5316 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5317 	case 0:
5318 	default:
5319 		numchan = 1;
5320 		break;
5321 	case 1:
5322 		numchan = 2;
5323 		break;
5324 	case 2:
5325 		numchan = 4;
5326 		break;
5327 	case 3:
5328 		numchan = 8;
5329 		break;
5330 	case 4:
5331 		numchan = 3;
5332 		break;
5333 	case 5:
5334 		numchan = 6;
5335 		break;
5336 	case 6:
5337 		numchan = 10;
5338 		break;
5339 	case 7:
5340 		numchan = 12;
5341 		break;
5342 	case 8:
5343 		numchan = 16;
5344 		break;
5345 	}
5346 	rdev->mc.vram_width = numchan * chansize;
5347 	/* Could aper size report 0 ? */
5348 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5349 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5350 	/* size in MB on si */
5351 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5352 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5353 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5354 	si_vram_gtt_location(rdev, &rdev->mc);
5355 	radeon_update_bandwidth_info(rdev);
5356 
5357 	return 0;
5358 }
5359 
5360 /*
5361  * GART
5362  * VMID 0 is the physical GPU addresses as used by the kernel.
5363  * VMIDs 1-15 are used for userspace clients and are handled
5364  * by the radeon vm/hsa code.
5365  */
5366 /**
5367  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5368  *
5369  * @rdev: radeon_device pointer
5370  *
5371  * Flush the TLB for the VMID 0 page table (CIK).
5372  */
5373 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5374 {
5375 	/* flush hdp cache */
5376 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5377 
5378 	/* bits 0-15 are the VM contexts0-15 */
5379 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5380 }
5381 
5382 /**
5383  * cik_pcie_gart_enable - gart enable
5384  *
5385  * @rdev: radeon_device pointer
5386  *
5387  * This sets up the TLBs, programs the page tables for VMID0,
5388  * sets up the hw for VMIDs 1-15 which are allocated on
5389  * demand, and sets up the global locations for the LDS, GDS,
5390  * and GPUVM for FSA64 clients (CIK).
5391  * Returns 0 for success, errors for failure.
5392  */
5393 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5394 {
5395 	int r, i;
5396 
5397 	if (rdev->gart.robj == NULL) {
5398 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5399 		return -EINVAL;
5400 	}
5401 	r = radeon_gart_table_vram_pin(rdev);
5402 	if (r)
5403 		return r;
5404 	radeon_gart_restore(rdev);
5405 	/* Setup TLB control */
5406 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5407 	       (0xA << 7) |
5408 	       ENABLE_L1_TLB |
5409 	       ENABLE_L1_FRAGMENT_PROCESSING |
5410 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5411 	       ENABLE_ADVANCED_DRIVER_MODEL |
5412 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5413 	/* Setup L2 cache */
5414 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5415 	       ENABLE_L2_FRAGMENT_PROCESSING |
5416 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5417 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5418 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5419 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5420 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5421 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5422 	       BANK_SELECT(4) |
5423 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
5424 	/* setup context0 */
5425 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5426 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5427 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5428 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5429 			(u32)(rdev->dummy_page.addr >> 12));
5430 	WREG32(VM_CONTEXT0_CNTL2, 0);
5431 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5432 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5433 
5434 	WREG32(0x15D4, 0);
5435 	WREG32(0x15D8, 0);
5436 	WREG32(0x15DC, 0);
5437 
5438 	/* empty context1-15 */
5439 	/* FIXME start with 4G, once using 2 level pt switch to full
5440 	 * vm size space
5441 	 */
5442 	/* set vm size, must be a multiple of 4 */
5443 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5444 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5445 	for (i = 1; i < 16; i++) {
5446 		if (i < 8)
5447 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5448 			       rdev->gart.table_addr >> 12);
5449 		else
5450 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5451 			       rdev->gart.table_addr >> 12);
5452 	}
5453 
5454 	/* enable context1-15 */
5455 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5456 	       (u32)(rdev->dummy_page.addr >> 12));
5457 	WREG32(VM_CONTEXT1_CNTL2, 4);
5458 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5459 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
5460 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5461 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5462 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5463 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5464 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5465 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5466 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5467 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5468 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5469 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5470 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5471 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5472 
5473 	if (rdev->family == CHIP_KAVERI) {
5474 		u32 tmp = RREG32(CHUB_CONTROL);
5475 		tmp &= ~BYPASS_VM;
5476 		WREG32(CHUB_CONTROL, tmp);
5477 	}
5478 
5479 	/* XXX SH_MEM regs */
5480 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5481 	mutex_lock(&rdev->srbm_mutex);
5482 	for (i = 0; i < 16; i++) {
5483 		cik_srbm_select(rdev, 0, 0, 0, i);
5484 		/* CP and shaders */
5485 		WREG32(SH_MEM_CONFIG, 0);
5486 		WREG32(SH_MEM_APE1_BASE, 1);
5487 		WREG32(SH_MEM_APE1_LIMIT, 0);
5488 		WREG32(SH_MEM_BASES, 0);
5489 		/* SDMA GFX */
5490 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5491 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5492 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5493 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5494 		/* XXX SDMA RLC - todo */
5495 	}
5496 	cik_srbm_select(rdev, 0, 0, 0, 0);
5497 	mutex_unlock(&rdev->srbm_mutex);
5498 
5499 	cik_pcie_gart_tlb_flush(rdev);
5500 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5501 		 (unsigned)(rdev->mc.gtt_size >> 20),
5502 		 (unsigned long long)rdev->gart.table_addr);
5503 	rdev->gart.ready = true;
5504 	return 0;
5505 }
5506 
5507 /**
5508  * cik_pcie_gart_disable - gart disable
5509  *
5510  * @rdev: radeon_device pointer
5511  *
5512  * This disables all VM page table (CIK).
5513  */
5514 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5515 {
5516 	/* Disable all tables */
5517 	WREG32(VM_CONTEXT0_CNTL, 0);
5518 	WREG32(VM_CONTEXT1_CNTL, 0);
5519 	/* Setup TLB control */
5520 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5521 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5522 	/* Setup L2 cache */
5523 	WREG32(VM_L2_CNTL,
5524 	       ENABLE_L2_FRAGMENT_PROCESSING |
5525 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5526 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5527 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5528 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5529 	WREG32(VM_L2_CNTL2, 0);
5530 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5531 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5532 	radeon_gart_table_vram_unpin(rdev);
5533 }
5534 
5535 /**
5536  * cik_pcie_gart_fini - vm fini callback
5537  *
5538  * @rdev: radeon_device pointer
5539  *
5540  * Tears down the driver GART/VM setup (CIK).
5541  */
5542 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5543 {
5544 	cik_pcie_gart_disable(rdev);
5545 	radeon_gart_table_vram_free(rdev);
5546 	radeon_gart_fini(rdev);
5547 }
5548 
5549 /* vm parser */
5550 /**
5551  * cik_ib_parse - vm ib_parse callback
5552  *
5553  * @rdev: radeon_device pointer
5554  * @ib: indirect buffer pointer
5555  *
5556  * CIK uses hw IB checking so this is a nop (CIK).
5557  */
5558 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5559 {
5560 	return 0;
5561 }
5562 
5563 /*
5564  * vm
5565  * VMID 0 is the physical GPU addresses as used by the kernel.
5566  * VMIDs 1-15 are used for userspace clients and are handled
5567  * by the radeon vm/hsa code.
5568  */
5569 /**
5570  * cik_vm_init - cik vm init callback
5571  *
5572  * @rdev: radeon_device pointer
5573  *
5574  * Inits cik specific vm parameters (number of VMs, base of vram for
5575  * VMIDs 1-15) (CIK).
5576  * Returns 0 for success.
5577  */
5578 int cik_vm_init(struct radeon_device *rdev)
5579 {
5580 	/* number of VMs */
5581 	rdev->vm_manager.nvm = 16;
5582 	/* base offset of vram pages */
5583 	if (rdev->flags & RADEON_IS_IGP) {
5584 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5585 		tmp <<= 22;
5586 		rdev->vm_manager.vram_base_offset = tmp;
5587 	} else
5588 		rdev->vm_manager.vram_base_offset = 0;
5589 
5590 	return 0;
5591 }
5592 
5593 /**
5594  * cik_vm_fini - cik vm fini callback
5595  *
5596  * @rdev: radeon_device pointer
5597  *
5598  * Tear down any asic specific VM setup (CIK).
5599  */
5600 void cik_vm_fini(struct radeon_device *rdev)
5601 {
5602 }
5603 
5604 /**
5605  * cik_vm_decode_fault - print human readable fault info
5606  *
5607  * @rdev: radeon_device pointer
5608  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5609  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5610  *
5611  * Print human readable fault information (CIK).
5612  */
5613 static void cik_vm_decode_fault(struct radeon_device *rdev,
5614 				u32 status, u32 addr, u32 mc_client)
5615 {
5616 	u32 mc_id;
5617 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5618 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5619 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5620 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5621 
5622 	if (rdev->family == CHIP_HAWAII)
5623 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5624 	else
5625 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5626 
5627 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5628 	       protections, vmid, addr,
5629 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5630 	       block, mc_client, mc_id);
5631 }
5632 
5633 /**
5634  * cik_vm_flush - cik vm flush using the CP
5635  *
5636  * @rdev: radeon_device pointer
5637  *
5638  * Update the page table base and flush the VM TLB
5639  * using the CP (CIK).
5640  */
5641 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5642 {
5643 	struct radeon_ring *ring = &rdev->ring[ridx];
5644 
5645 	if (vm == NULL)
5646 		return;
5647 
5648 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5649 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5650 				 WRITE_DATA_DST_SEL(0)));
5651 	if (vm->id < 8) {
5652 		radeon_ring_write(ring,
5653 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5654 	} else {
5655 		radeon_ring_write(ring,
5656 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5657 	}
5658 	radeon_ring_write(ring, 0);
5659 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5660 
5661 	/* update SH_MEM_* regs */
5662 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5663 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5664 				 WRITE_DATA_DST_SEL(0)));
5665 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5666 	radeon_ring_write(ring, 0);
5667 	radeon_ring_write(ring, VMID(vm->id));
5668 
5669 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5670 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5671 				 WRITE_DATA_DST_SEL(0)));
5672 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5673 	radeon_ring_write(ring, 0);
5674 
5675 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5676 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5677 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5678 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5679 
5680 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5681 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5682 				 WRITE_DATA_DST_SEL(0)));
5683 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5684 	radeon_ring_write(ring, 0);
5685 	radeon_ring_write(ring, VMID(0));
5686 
5687 	/* HDP flush */
5688 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5689 
5690 	/* bits 0-15 are the VM contexts0-15 */
5691 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5692 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5693 				 WRITE_DATA_DST_SEL(0)));
5694 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5695 	radeon_ring_write(ring, 0);
5696 	radeon_ring_write(ring, 1 << vm->id);
5697 
5698 	/* compute doesn't have PFP */
5699 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5700 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5701 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5702 		radeon_ring_write(ring, 0x0);
5703 	}
5704 }
5705 
5706 /*
5707  * RLC
5708  * The RLC is a multi-purpose microengine that handles a
5709  * variety of functions, the most important of which is
5710  * the interrupt controller.
5711  */
5712 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5713 					  bool enable)
5714 {
5715 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5716 
5717 	if (enable)
5718 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5719 	else
5720 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5721 	WREG32(CP_INT_CNTL_RING0, tmp);
5722 }
5723 
5724 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5725 {
5726 	u32 tmp;
5727 
5728 	tmp = RREG32(RLC_LB_CNTL);
5729 	if (enable)
5730 		tmp |= LOAD_BALANCE_ENABLE;
5731 	else
5732 		tmp &= ~LOAD_BALANCE_ENABLE;
5733 	WREG32(RLC_LB_CNTL, tmp);
5734 }
5735 
5736 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5737 {
5738 	u32 i, j, k;
5739 	u32 mask;
5740 
5741 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5742 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5743 			cik_select_se_sh(rdev, i, j);
5744 			for (k = 0; k < rdev->usec_timeout; k++) {
5745 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5746 					break;
5747 				udelay(1);
5748 			}
5749 		}
5750 	}
5751 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5752 
5753 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5754 	for (k = 0; k < rdev->usec_timeout; k++) {
5755 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5756 			break;
5757 		udelay(1);
5758 	}
5759 }
5760 
5761 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5762 {
5763 	u32 tmp;
5764 
5765 	tmp = RREG32(RLC_CNTL);
5766 	if (tmp != rlc)
5767 		WREG32(RLC_CNTL, rlc);
5768 }
5769 
5770 static u32 cik_halt_rlc(struct radeon_device *rdev)
5771 {
5772 	u32 data, orig;
5773 
5774 	orig = data = RREG32(RLC_CNTL);
5775 
5776 	if (data & RLC_ENABLE) {
5777 		u32 i;
5778 
5779 		data &= ~RLC_ENABLE;
5780 		WREG32(RLC_CNTL, data);
5781 
5782 		for (i = 0; i < rdev->usec_timeout; i++) {
5783 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5784 				break;
5785 			udelay(1);
5786 		}
5787 
5788 		cik_wait_for_rlc_serdes(rdev);
5789 	}
5790 
5791 	return orig;
5792 }
5793 
5794 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5795 {
5796 	u32 tmp, i, mask;
5797 
5798 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5799 	WREG32(RLC_GPR_REG2, tmp);
5800 
5801 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5802 	for (i = 0; i < rdev->usec_timeout; i++) {
5803 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5804 			break;
5805 		udelay(1);
5806 	}
5807 
5808 	for (i = 0; i < rdev->usec_timeout; i++) {
5809 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5810 			break;
5811 		udelay(1);
5812 	}
5813 }
5814 
5815 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5816 {
5817 	u32 tmp;
5818 
5819 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5820 	WREG32(RLC_GPR_REG2, tmp);
5821 }
5822 
5823 /**
5824  * cik_rlc_stop - stop the RLC ME
5825  *
5826  * @rdev: radeon_device pointer
5827  *
5828  * Halt the RLC ME (MicroEngine) (CIK).
5829  */
5830 static void cik_rlc_stop(struct radeon_device *rdev)
5831 {
5832 	WREG32(RLC_CNTL, 0);
5833 
5834 	cik_enable_gui_idle_interrupt(rdev, false);
5835 
5836 	cik_wait_for_rlc_serdes(rdev);
5837 }
5838 
5839 /**
5840  * cik_rlc_start - start the RLC ME
5841  *
5842  * @rdev: radeon_device pointer
5843  *
5844  * Unhalt the RLC ME (MicroEngine) (CIK).
5845  */
5846 static void cik_rlc_start(struct radeon_device *rdev)
5847 {
5848 	WREG32(RLC_CNTL, RLC_ENABLE);
5849 
5850 	cik_enable_gui_idle_interrupt(rdev, true);
5851 
5852 	udelay(50);
5853 }
5854 
5855 /**
5856  * cik_rlc_resume - setup the RLC hw
5857  *
5858  * @rdev: radeon_device pointer
5859  *
5860  * Initialize the RLC registers, load the ucode,
5861  * and start the RLC (CIK).
5862  * Returns 0 for success, -EINVAL if the ucode is not available.
5863  */
5864 static int cik_rlc_resume(struct radeon_device *rdev)
5865 {
5866 	u32 i, size, tmp;
5867 	const __be32 *fw_data;
5868 
5869 	if (!rdev->rlc_fw)
5870 		return -EINVAL;
5871 
5872 	switch (rdev->family) {
5873 	case CHIP_BONAIRE:
5874 	case CHIP_HAWAII:
5875 	default:
5876 		size = BONAIRE_RLC_UCODE_SIZE;
5877 		break;
5878 	case CHIP_KAVERI:
5879 		size = KV_RLC_UCODE_SIZE;
5880 		break;
5881 	case CHIP_KABINI:
5882 		size = KB_RLC_UCODE_SIZE;
5883 		break;
5884 	case CHIP_MULLINS:
5885 		size = ML_RLC_UCODE_SIZE;
5886 		break;
5887 	}
5888 
5889 	cik_rlc_stop(rdev);
5890 
5891 	/* disable CG */
5892 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5893 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5894 
5895 	si_rlc_reset(rdev);
5896 
5897 	cik_init_pg(rdev);
5898 
5899 	cik_init_cg(rdev);
5900 
5901 	WREG32(RLC_LB_CNTR_INIT, 0);
5902 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5903 
5904 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5905 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5906 	WREG32(RLC_LB_PARAMS, 0x00600408);
5907 	WREG32(RLC_LB_CNTL, 0x80000004);
5908 
5909 	WREG32(RLC_MC_CNTL, 0);
5910 	WREG32(RLC_UCODE_CNTL, 0);
5911 
5912 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5913 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5914 	for (i = 0; i < size; i++)
5915 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5916 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5917 
5918 	/* XXX - find out what chips support lbpw */
5919 	cik_enable_lbpw(rdev, false);
5920 
5921 	if (rdev->family == CHIP_BONAIRE)
5922 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5923 
5924 	cik_rlc_start(rdev);
5925 
5926 	return 0;
5927 }
5928 
5929 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5930 {
5931 	u32 data, orig, tmp, tmp2;
5932 
5933 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5934 
5935 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5936 		cik_enable_gui_idle_interrupt(rdev, true);
5937 
5938 		tmp = cik_halt_rlc(rdev);
5939 
5940 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5941 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5942 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5943 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5944 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5945 
5946 		cik_update_rlc(rdev, tmp);
5947 
5948 		data |= CGCG_EN | CGLS_EN;
5949 	} else {
5950 		cik_enable_gui_idle_interrupt(rdev, false);
5951 
5952 		RREG32(CB_CGTT_SCLK_CTRL);
5953 		RREG32(CB_CGTT_SCLK_CTRL);
5954 		RREG32(CB_CGTT_SCLK_CTRL);
5955 		RREG32(CB_CGTT_SCLK_CTRL);
5956 
5957 		data &= ~(CGCG_EN | CGLS_EN);
5958 	}
5959 
5960 	if (orig != data)
5961 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5962 
5963 }
5964 
5965 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5966 {
5967 	u32 data, orig, tmp = 0;
5968 
5969 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5970 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5971 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5972 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5973 				data |= CP_MEM_LS_EN;
5974 				if (orig != data)
5975 					WREG32(CP_MEM_SLP_CNTL, data);
5976 			}
5977 		}
5978 
5979 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5980 		data &= 0xfffffffd;
5981 		if (orig != data)
5982 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5983 
5984 		tmp = cik_halt_rlc(rdev);
5985 
5986 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5987 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5988 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5989 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5990 		WREG32(RLC_SERDES_WR_CTRL, data);
5991 
5992 		cik_update_rlc(rdev, tmp);
5993 
5994 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5995 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5996 			data &= ~SM_MODE_MASK;
5997 			data |= SM_MODE(0x2);
5998 			data |= SM_MODE_ENABLE;
5999 			data &= ~CGTS_OVERRIDE;
6000 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
6001 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
6002 				data &= ~CGTS_LS_OVERRIDE;
6003 			data &= ~ON_MONITOR_ADD_MASK;
6004 			data |= ON_MONITOR_ADD_EN;
6005 			data |= ON_MONITOR_ADD(0x96);
6006 			if (orig != data)
6007 				WREG32(CGTS_SM_CTRL_REG, data);
6008 		}
6009 	} else {
6010 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
6011 		data |= 0x00000002;
6012 		if (orig != data)
6013 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
6014 
6015 		data = RREG32(RLC_MEM_SLP_CNTL);
6016 		if (data & RLC_MEM_LS_EN) {
6017 			data &= ~RLC_MEM_LS_EN;
6018 			WREG32(RLC_MEM_SLP_CNTL, data);
6019 		}
6020 
6021 		data = RREG32(CP_MEM_SLP_CNTL);
6022 		if (data & CP_MEM_LS_EN) {
6023 			data &= ~CP_MEM_LS_EN;
6024 			WREG32(CP_MEM_SLP_CNTL, data);
6025 		}
6026 
6027 		orig = data = RREG32(CGTS_SM_CTRL_REG);
6028 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
6029 		if (orig != data)
6030 			WREG32(CGTS_SM_CTRL_REG, data);
6031 
6032 		tmp = cik_halt_rlc(rdev);
6033 
6034 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6035 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
6036 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
6037 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
6038 		WREG32(RLC_SERDES_WR_CTRL, data);
6039 
6040 		cik_update_rlc(rdev, tmp);
6041 	}
6042 }
6043 
6044 static const u32 mc_cg_registers[] =
6045 {
6046 	MC_HUB_MISC_HUB_CG,
6047 	MC_HUB_MISC_SIP_CG,
6048 	MC_HUB_MISC_VM_CG,
6049 	MC_XPB_CLK_GAT,
6050 	ATC_MISC_CG,
6051 	MC_CITF_MISC_WR_CG,
6052 	MC_CITF_MISC_RD_CG,
6053 	MC_CITF_MISC_VM_CG,
6054 	VM_L2_CG,
6055 };
6056 
6057 static void cik_enable_mc_ls(struct radeon_device *rdev,
6058 			     bool enable)
6059 {
6060 	int i;
6061 	u32 orig, data;
6062 
6063 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6064 		orig = data = RREG32(mc_cg_registers[i]);
6065 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
6066 			data |= MC_LS_ENABLE;
6067 		else
6068 			data &= ~MC_LS_ENABLE;
6069 		if (data != orig)
6070 			WREG32(mc_cg_registers[i], data);
6071 	}
6072 }
6073 
6074 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
6075 			       bool enable)
6076 {
6077 	int i;
6078 	u32 orig, data;
6079 
6080 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
6081 		orig = data = RREG32(mc_cg_registers[i]);
6082 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
6083 			data |= MC_CG_ENABLE;
6084 		else
6085 			data &= ~MC_CG_ENABLE;
6086 		if (data != orig)
6087 			WREG32(mc_cg_registers[i], data);
6088 	}
6089 }
6090 
6091 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
6092 				 bool enable)
6093 {
6094 	u32 orig, data;
6095 
6096 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
6097 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
6098 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
6099 	} else {
6100 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
6101 		data |= 0xff000000;
6102 		if (data != orig)
6103 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
6104 
6105 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
6106 		data |= 0xff000000;
6107 		if (data != orig)
6108 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
6109 	}
6110 }
6111 
6112 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
6113 				 bool enable)
6114 {
6115 	u32 orig, data;
6116 
6117 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6118 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6119 		data |= 0x100;
6120 		if (orig != data)
6121 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6122 
6123 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6124 		data |= 0x100;
6125 		if (orig != data)
6126 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6127 	} else {
6128 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6129 		data &= ~0x100;
6130 		if (orig != data)
6131 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6132 
6133 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6134 		data &= ~0x100;
6135 		if (orig != data)
6136 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6137 	}
6138 }
6139 
6140 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6141 				bool enable)
6142 {
6143 	u32 orig, data;
6144 
6145 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6146 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6147 		data = 0xfff;
6148 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6149 
6150 		orig = data = RREG32(UVD_CGC_CTRL);
6151 		data |= DCM;
6152 		if (orig != data)
6153 			WREG32(UVD_CGC_CTRL, data);
6154 	} else {
6155 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6156 		data &= ~0xfff;
6157 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6158 
6159 		orig = data = RREG32(UVD_CGC_CTRL);
6160 		data &= ~DCM;
6161 		if (orig != data)
6162 			WREG32(UVD_CGC_CTRL, data);
6163 	}
6164 }
6165 
6166 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6167 			       bool enable)
6168 {
6169 	u32 orig, data;
6170 
6171 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6172 
6173 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6174 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6175 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6176 	else
6177 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6178 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6179 
6180 	if (orig != data)
6181 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6182 }
6183 
6184 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6185 				bool enable)
6186 {
6187 	u32 orig, data;
6188 
6189 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6190 
6191 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6192 		data &= ~CLOCK_GATING_DIS;
6193 	else
6194 		data |= CLOCK_GATING_DIS;
6195 
6196 	if (orig != data)
6197 		WREG32(HDP_HOST_PATH_CNTL, data);
6198 }
6199 
6200 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6201 			      bool enable)
6202 {
6203 	u32 orig, data;
6204 
6205 	orig = data = RREG32(HDP_MEM_POWER_LS);
6206 
6207 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6208 		data |= HDP_LS_ENABLE;
6209 	else
6210 		data &= ~HDP_LS_ENABLE;
6211 
6212 	if (orig != data)
6213 		WREG32(HDP_MEM_POWER_LS, data);
6214 }
6215 
6216 void cik_update_cg(struct radeon_device *rdev,
6217 		   u32 block, bool enable)
6218 {
6219 
6220 	if (block & RADEON_CG_BLOCK_GFX) {
6221 		cik_enable_gui_idle_interrupt(rdev, false);
6222 		/* order matters! */
6223 		if (enable) {
6224 			cik_enable_mgcg(rdev, true);
6225 			cik_enable_cgcg(rdev, true);
6226 		} else {
6227 			cik_enable_cgcg(rdev, false);
6228 			cik_enable_mgcg(rdev, false);
6229 		}
6230 		cik_enable_gui_idle_interrupt(rdev, true);
6231 	}
6232 
6233 	if (block & RADEON_CG_BLOCK_MC) {
6234 		if (!(rdev->flags & RADEON_IS_IGP)) {
6235 			cik_enable_mc_mgcg(rdev, enable);
6236 			cik_enable_mc_ls(rdev, enable);
6237 		}
6238 	}
6239 
6240 	if (block & RADEON_CG_BLOCK_SDMA) {
6241 		cik_enable_sdma_mgcg(rdev, enable);
6242 		cik_enable_sdma_mgls(rdev, enable);
6243 	}
6244 
6245 	if (block & RADEON_CG_BLOCK_BIF) {
6246 		cik_enable_bif_mgls(rdev, enable);
6247 	}
6248 
6249 	if (block & RADEON_CG_BLOCK_UVD) {
6250 		if (rdev->has_uvd)
6251 			cik_enable_uvd_mgcg(rdev, enable);
6252 	}
6253 
6254 	if (block & RADEON_CG_BLOCK_HDP) {
6255 		cik_enable_hdp_mgcg(rdev, enable);
6256 		cik_enable_hdp_ls(rdev, enable);
6257 	}
6258 
6259 	if (block & RADEON_CG_BLOCK_VCE) {
6260 		vce_v2_0_enable_mgcg(rdev, enable);
6261 	}
6262 }
6263 
6264 static void cik_init_cg(struct radeon_device *rdev)
6265 {
6266 
6267 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6268 
6269 	if (rdev->has_uvd)
6270 		si_init_uvd_internal_cg(rdev);
6271 
6272 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6273 			     RADEON_CG_BLOCK_SDMA |
6274 			     RADEON_CG_BLOCK_BIF |
6275 			     RADEON_CG_BLOCK_UVD |
6276 			     RADEON_CG_BLOCK_HDP), true);
6277 }
6278 
6279 static void cik_fini_cg(struct radeon_device *rdev)
6280 {
6281 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6282 			     RADEON_CG_BLOCK_SDMA |
6283 			     RADEON_CG_BLOCK_BIF |
6284 			     RADEON_CG_BLOCK_UVD |
6285 			     RADEON_CG_BLOCK_HDP), false);
6286 
6287 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6288 }
6289 
6290 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6291 					  bool enable)
6292 {
6293 	u32 data, orig;
6294 
6295 	orig = data = RREG32(RLC_PG_CNTL);
6296 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6297 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6298 	else
6299 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6300 	if (orig != data)
6301 		WREG32(RLC_PG_CNTL, data);
6302 }
6303 
6304 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6305 					  bool enable)
6306 {
6307 	u32 data, orig;
6308 
6309 	orig = data = RREG32(RLC_PG_CNTL);
6310 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6311 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6312 	else
6313 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6314 	if (orig != data)
6315 		WREG32(RLC_PG_CNTL, data);
6316 }
6317 
6318 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6319 {
6320 	u32 data, orig;
6321 
6322 	orig = data = RREG32(RLC_PG_CNTL);
6323 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6324 		data &= ~DISABLE_CP_PG;
6325 	else
6326 		data |= DISABLE_CP_PG;
6327 	if (orig != data)
6328 		WREG32(RLC_PG_CNTL, data);
6329 }
6330 
6331 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6332 {
6333 	u32 data, orig;
6334 
6335 	orig = data = RREG32(RLC_PG_CNTL);
6336 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6337 		data &= ~DISABLE_GDS_PG;
6338 	else
6339 		data |= DISABLE_GDS_PG;
6340 	if (orig != data)
6341 		WREG32(RLC_PG_CNTL, data);
6342 }
6343 
6344 #define CP_ME_TABLE_SIZE    96
6345 #define CP_ME_TABLE_OFFSET  2048
6346 #define CP_MEC_TABLE_OFFSET 4096
6347 
6348 void cik_init_cp_pg_table(struct radeon_device *rdev)
6349 {
6350 	const __be32 *fw_data;
6351 	volatile u32 *dst_ptr;
6352 	int me, i, max_me = 4;
6353 	u32 bo_offset = 0;
6354 	u32 table_offset;
6355 
6356 	if (rdev->family == CHIP_KAVERI)
6357 		max_me = 5;
6358 
6359 	if (rdev->rlc.cp_table_ptr == NULL)
6360 		return;
6361 
6362 	/* write the cp table buffer */
6363 	dst_ptr = rdev->rlc.cp_table_ptr;
6364 	for (me = 0; me < max_me; me++) {
6365 		if (me == 0) {
6366 			fw_data = (const __be32 *)rdev->ce_fw->data;
6367 			table_offset = CP_ME_TABLE_OFFSET;
6368 		} else if (me == 1) {
6369 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6370 			table_offset = CP_ME_TABLE_OFFSET;
6371 		} else if (me == 2) {
6372 			fw_data = (const __be32 *)rdev->me_fw->data;
6373 			table_offset = CP_ME_TABLE_OFFSET;
6374 		} else {
6375 			fw_data = (const __be32 *)rdev->mec_fw->data;
6376 			table_offset = CP_MEC_TABLE_OFFSET;
6377 		}
6378 
6379 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6380 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6381 		}
6382 		bo_offset += CP_ME_TABLE_SIZE;
6383 	}
6384 }
6385 
6386 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6387 				bool enable)
6388 {
6389 	u32 data, orig;
6390 
6391 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6392 		orig = data = RREG32(RLC_PG_CNTL);
6393 		data |= GFX_PG_ENABLE;
6394 		if (orig != data)
6395 			WREG32(RLC_PG_CNTL, data);
6396 
6397 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6398 		data |= AUTO_PG_EN;
6399 		if (orig != data)
6400 			WREG32(RLC_AUTO_PG_CTRL, data);
6401 	} else {
6402 		orig = data = RREG32(RLC_PG_CNTL);
6403 		data &= ~GFX_PG_ENABLE;
6404 		if (orig != data)
6405 			WREG32(RLC_PG_CNTL, data);
6406 
6407 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6408 		data &= ~AUTO_PG_EN;
6409 		if (orig != data)
6410 			WREG32(RLC_AUTO_PG_CTRL, data);
6411 
6412 		data = RREG32(DB_RENDER_CONTROL);
6413 	}
6414 }
6415 
6416 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6417 {
6418 	u32 mask = 0, tmp, tmp1;
6419 	int i;
6420 
6421 	cik_select_se_sh(rdev, se, sh);
6422 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6423 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6424 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6425 
6426 	tmp &= 0xffff0000;
6427 
6428 	tmp |= tmp1;
6429 	tmp >>= 16;
6430 
6431 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6432 		mask <<= 1;
6433 		mask |= 1;
6434 	}
6435 
6436 	return (~tmp) & mask;
6437 }
6438 
6439 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6440 {
6441 	u32 i, j, k, active_cu_number = 0;
6442 	u32 mask, counter, cu_bitmap;
6443 	u32 tmp = 0;
6444 
6445 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6446 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6447 			mask = 1;
6448 			cu_bitmap = 0;
6449 			counter = 0;
6450 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6451 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6452 					if (counter < 2)
6453 						cu_bitmap |= mask;
6454 					counter ++;
6455 				}
6456 				mask <<= 1;
6457 			}
6458 
6459 			active_cu_number += counter;
6460 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6461 		}
6462 	}
6463 
6464 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6465 
6466 	tmp = RREG32(RLC_MAX_PG_CU);
6467 	tmp &= ~MAX_PU_CU_MASK;
6468 	tmp |= MAX_PU_CU(active_cu_number);
6469 	WREG32(RLC_MAX_PG_CU, tmp);
6470 }
6471 
6472 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6473 				       bool enable)
6474 {
6475 	u32 data, orig;
6476 
6477 	orig = data = RREG32(RLC_PG_CNTL);
6478 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6479 		data |= STATIC_PER_CU_PG_ENABLE;
6480 	else
6481 		data &= ~STATIC_PER_CU_PG_ENABLE;
6482 	if (orig != data)
6483 		WREG32(RLC_PG_CNTL, data);
6484 }
6485 
6486 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6487 					bool enable)
6488 {
6489 	u32 data, orig;
6490 
6491 	orig = data = RREG32(RLC_PG_CNTL);
6492 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6493 		data |= DYN_PER_CU_PG_ENABLE;
6494 	else
6495 		data &= ~DYN_PER_CU_PG_ENABLE;
6496 	if (orig != data)
6497 		WREG32(RLC_PG_CNTL, data);
6498 }
6499 
6500 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6501 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6502 
6503 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6504 {
6505 	u32 data, orig;
6506 	u32 i;
6507 
6508 	if (rdev->rlc.cs_data) {
6509 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6510 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6511 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6512 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6513 	} else {
6514 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6515 		for (i = 0; i < 3; i++)
6516 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6517 	}
6518 	if (rdev->rlc.reg_list) {
6519 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6520 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6521 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6522 	}
6523 
6524 	orig = data = RREG32(RLC_PG_CNTL);
6525 	data |= GFX_PG_SRC;
6526 	if (orig != data)
6527 		WREG32(RLC_PG_CNTL, data);
6528 
6529 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6530 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6531 
6532 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6533 	data &= ~IDLE_POLL_COUNT_MASK;
6534 	data |= IDLE_POLL_COUNT(0x60);
6535 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6536 
6537 	data = 0x10101010;
6538 	WREG32(RLC_PG_DELAY, data);
6539 
6540 	data = RREG32(RLC_PG_DELAY_2);
6541 	data &= ~0xff;
6542 	data |= 0x3;
6543 	WREG32(RLC_PG_DELAY_2, data);
6544 
6545 	data = RREG32(RLC_AUTO_PG_CTRL);
6546 	data &= ~GRBM_REG_SGIT_MASK;
6547 	data |= GRBM_REG_SGIT(0x700);
6548 	WREG32(RLC_AUTO_PG_CTRL, data);
6549 
6550 }
6551 
6552 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6553 {
6554 	cik_enable_gfx_cgpg(rdev, enable);
6555 	cik_enable_gfx_static_mgpg(rdev, enable);
6556 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6557 }
6558 
6559 u32 cik_get_csb_size(struct radeon_device *rdev)
6560 {
6561 	u32 count = 0;
6562 	const struct cs_section_def *sect = NULL;
6563 	const struct cs_extent_def *ext = NULL;
6564 
6565 	if (rdev->rlc.cs_data == NULL)
6566 		return 0;
6567 
6568 	/* begin clear state */
6569 	count += 2;
6570 	/* context control state */
6571 	count += 3;
6572 
6573 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6574 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6575 			if (sect->id == SECT_CONTEXT)
6576 				count += 2 + ext->reg_count;
6577 			else
6578 				return 0;
6579 		}
6580 	}
6581 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6582 	count += 4;
6583 	/* end clear state */
6584 	count += 2;
6585 	/* clear state */
6586 	count += 2;
6587 
6588 	return count;
6589 }
6590 
6591 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6592 {
6593 	u32 count = 0, i;
6594 	const struct cs_section_def *sect = NULL;
6595 	const struct cs_extent_def *ext = NULL;
6596 
6597 	if (rdev->rlc.cs_data == NULL)
6598 		return;
6599 	if (buffer == NULL)
6600 		return;
6601 
6602 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6603 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6604 
6605 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6606 	buffer[count++] = cpu_to_le32(0x80000000);
6607 	buffer[count++] = cpu_to_le32(0x80000000);
6608 
6609 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6610 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6611 			if (sect->id == SECT_CONTEXT) {
6612 				buffer[count++] =
6613 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6614 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6615 				for (i = 0; i < ext->reg_count; i++)
6616 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6617 			} else {
6618 				return;
6619 			}
6620 		}
6621 	}
6622 
6623 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6624 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6625 	switch (rdev->family) {
6626 	case CHIP_BONAIRE:
6627 		buffer[count++] = cpu_to_le32(0x16000012);
6628 		buffer[count++] = cpu_to_le32(0x00000000);
6629 		break;
6630 	case CHIP_KAVERI:
6631 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6632 		buffer[count++] = cpu_to_le32(0x00000000);
6633 		break;
6634 	case CHIP_KABINI:
6635 	case CHIP_MULLINS:
6636 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6637 		buffer[count++] = cpu_to_le32(0x00000000);
6638 		break;
6639 	case CHIP_HAWAII:
6640 		buffer[count++] = cpu_to_le32(0x3a00161a);
6641 		buffer[count++] = cpu_to_le32(0x0000002e);
6642 		break;
6643 	default:
6644 		buffer[count++] = cpu_to_le32(0x00000000);
6645 		buffer[count++] = cpu_to_le32(0x00000000);
6646 		break;
6647 	}
6648 
6649 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6650 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6651 
6652 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6653 	buffer[count++] = cpu_to_le32(0);
6654 }
6655 
6656 static void cik_init_pg(struct radeon_device *rdev)
6657 {
6658 	if (rdev->pg_flags) {
6659 		cik_enable_sck_slowdown_on_pu(rdev, true);
6660 		cik_enable_sck_slowdown_on_pd(rdev, true);
6661 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6662 			cik_init_gfx_cgpg(rdev);
6663 			cik_enable_cp_pg(rdev, true);
6664 			cik_enable_gds_pg(rdev, true);
6665 		}
6666 		cik_init_ao_cu_mask(rdev);
6667 		cik_update_gfx_pg(rdev, true);
6668 	}
6669 }
6670 
6671 static void cik_fini_pg(struct radeon_device *rdev)
6672 {
6673 	if (rdev->pg_flags) {
6674 		cik_update_gfx_pg(rdev, false);
6675 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6676 			cik_enable_cp_pg(rdev, false);
6677 			cik_enable_gds_pg(rdev, false);
6678 		}
6679 	}
6680 }
6681 
6682 /*
6683  * Interrupts
6684  * Starting with r6xx, interrupts are handled via a ring buffer.
6685  * Ring buffers are areas of GPU accessible memory that the GPU
6686  * writes interrupt vectors into and the host reads vectors out of.
6687  * There is a rptr (read pointer) that determines where the
6688  * host is currently reading, and a wptr (write pointer)
6689  * which determines where the GPU has written.  When the
6690  * pointers are equal, the ring is idle.  When the GPU
6691  * writes vectors to the ring buffer, it increments the
6692  * wptr.  When there is an interrupt, the host then starts
6693  * fetching commands and processing them until the pointers are
6694  * equal again at which point it updates the rptr.
6695  */
6696 
6697 /**
6698  * cik_enable_interrupts - Enable the interrupt ring buffer
6699  *
6700  * @rdev: radeon_device pointer
6701  *
6702  * Enable the interrupt ring buffer (CIK).
6703  */
6704 static void cik_enable_interrupts(struct radeon_device *rdev)
6705 {
6706 	u32 ih_cntl = RREG32(IH_CNTL);
6707 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6708 
6709 	ih_cntl |= ENABLE_INTR;
6710 	ih_rb_cntl |= IH_RB_ENABLE;
6711 	WREG32(IH_CNTL, ih_cntl);
6712 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6713 	rdev->ih.enabled = true;
6714 }
6715 
6716 /**
6717  * cik_disable_interrupts - Disable the interrupt ring buffer
6718  *
6719  * @rdev: radeon_device pointer
6720  *
6721  * Disable the interrupt ring buffer (CIK).
6722  */
6723 static void cik_disable_interrupts(struct radeon_device *rdev)
6724 {
6725 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6726 	u32 ih_cntl = RREG32(IH_CNTL);
6727 
6728 	ih_rb_cntl &= ~IH_RB_ENABLE;
6729 	ih_cntl &= ~ENABLE_INTR;
6730 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6731 	WREG32(IH_CNTL, ih_cntl);
6732 	/* set rptr, wptr to 0 */
6733 	WREG32(IH_RB_RPTR, 0);
6734 	WREG32(IH_RB_WPTR, 0);
6735 	rdev->ih.enabled = false;
6736 	rdev->ih.rptr = 0;
6737 }
6738 
6739 /**
6740  * cik_disable_interrupt_state - Disable all interrupt sources
6741  *
6742  * @rdev: radeon_device pointer
6743  *
6744  * Clear all interrupt enable bits used by the driver (CIK).
6745  */
6746 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6747 {
6748 	u32 tmp;
6749 
6750 	/* gfx ring */
6751 	tmp = RREG32(CP_INT_CNTL_RING0) &
6752 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6753 	WREG32(CP_INT_CNTL_RING0, tmp);
6754 	/* sdma */
6755 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6756 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6757 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6758 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6759 	/* compute queues */
6760 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6761 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6762 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6763 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6764 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6765 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6766 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6767 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6768 	/* grbm */
6769 	WREG32(GRBM_INT_CNTL, 0);
6770 	/* vline/vblank, etc. */
6771 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6772 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6773 	if (rdev->num_crtc >= 4) {
6774 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6775 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6776 	}
6777 	if (rdev->num_crtc >= 6) {
6778 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6779 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6780 	}
6781 	/* pflip */
6782 	if (rdev->num_crtc >= 2) {
6783 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6784 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6785 	}
6786 	if (rdev->num_crtc >= 4) {
6787 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6788 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6789 	}
6790 	if (rdev->num_crtc >= 6) {
6791 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6792 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6793 	}
6794 
6795 	/* dac hotplug */
6796 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6797 
6798 	/* digital hotplug */
6799 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6800 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6801 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6802 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6803 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6804 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6805 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6806 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6807 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6808 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6809 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6810 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6811 
6812 }
6813 
6814 /**
6815  * cik_irq_init - init and enable the interrupt ring
6816  *
6817  * @rdev: radeon_device pointer
6818  *
6819  * Allocate a ring buffer for the interrupt controller,
6820  * enable the RLC, disable interrupts, enable the IH
6821  * ring buffer and enable it (CIK).
6822  * Called at device load and reume.
6823  * Returns 0 for success, errors for failure.
6824  */
6825 static int cik_irq_init(struct radeon_device *rdev)
6826 {
6827 	int ret = 0;
6828 	int rb_bufsz;
6829 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6830 
6831 	/* allocate ring */
6832 	ret = r600_ih_ring_alloc(rdev);
6833 	if (ret)
6834 		return ret;
6835 
6836 	/* disable irqs */
6837 	cik_disable_interrupts(rdev);
6838 
6839 	/* init rlc */
6840 	ret = cik_rlc_resume(rdev);
6841 	if (ret) {
6842 		r600_ih_ring_fini(rdev);
6843 		return ret;
6844 	}
6845 
6846 	/* setup interrupt control */
6847 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6848 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6849 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6850 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6851 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6852 	 */
6853 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6854 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6855 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6856 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6857 
6858 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6859 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6860 
6861 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6862 		      IH_WPTR_OVERFLOW_CLEAR |
6863 		      (rb_bufsz << 1));
6864 
6865 	if (rdev->wb.enabled)
6866 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6867 
6868 	/* set the writeback address whether it's enabled or not */
6869 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6870 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6871 
6872 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6873 
6874 	/* set rptr, wptr to 0 */
6875 	WREG32(IH_RB_RPTR, 0);
6876 	WREG32(IH_RB_WPTR, 0);
6877 
6878 	/* Default settings for IH_CNTL (disabled at first) */
6879 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6880 	/* RPTR_REARM only works if msi's are enabled */
6881 	if (rdev->msi_enabled)
6882 		ih_cntl |= RPTR_REARM;
6883 	WREG32(IH_CNTL, ih_cntl);
6884 
6885 	/* force the active interrupt state to all disabled */
6886 	cik_disable_interrupt_state(rdev);
6887 
6888 	pci_set_master(rdev->pdev);
6889 
6890 	/* enable irqs */
6891 	cik_enable_interrupts(rdev);
6892 
6893 	return ret;
6894 }
6895 
6896 /**
6897  * cik_irq_set - enable/disable interrupt sources
6898  *
6899  * @rdev: radeon_device pointer
6900  *
6901  * Enable interrupt sources on the GPU (vblanks, hpd,
6902  * etc.) (CIK).
6903  * Returns 0 for success, errors for failure.
6904  */
6905 int cik_irq_set(struct radeon_device *rdev)
6906 {
6907 	u32 cp_int_cntl;
6908 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6909 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6910 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6911 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6912 	u32 grbm_int_cntl = 0;
6913 	u32 dma_cntl, dma_cntl1;
6914 	u32 thermal_int;
6915 
6916 	if (!rdev->irq.installed) {
6917 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6918 		return -EINVAL;
6919 	}
6920 	/* don't enable anything if the ih is disabled */
6921 	if (!rdev->ih.enabled) {
6922 		cik_disable_interrupts(rdev);
6923 		/* force the active interrupt state to all disabled */
6924 		cik_disable_interrupt_state(rdev);
6925 		return 0;
6926 	}
6927 
6928 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6929 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6930 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6931 
6932 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6933 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6934 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6935 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6936 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6937 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6938 
6939 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6940 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6941 
6942 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6943 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6944 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6945 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6946 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6947 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6948 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6949 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6950 
6951 	if (rdev->flags & RADEON_IS_IGP)
6952 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6953 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6954 	else
6955 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6956 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6957 
6958 	/* enable CP interrupts on all rings */
6959 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6960 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6961 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6962 	}
6963 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6964 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6965 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6966 		if (ring->me == 1) {
6967 			switch (ring->pipe) {
6968 			case 0:
6969 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6970 				break;
6971 			case 1:
6972 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6973 				break;
6974 			case 2:
6975 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6976 				break;
6977 			case 3:
6978 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6979 				break;
6980 			default:
6981 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6982 				break;
6983 			}
6984 		} else if (ring->me == 2) {
6985 			switch (ring->pipe) {
6986 			case 0:
6987 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6988 				break;
6989 			case 1:
6990 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6991 				break;
6992 			case 2:
6993 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6994 				break;
6995 			case 3:
6996 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6997 				break;
6998 			default:
6999 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
7000 				break;
7001 			}
7002 		} else {
7003 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
7004 		}
7005 	}
7006 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
7007 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7008 		DRM_DEBUG("si_irq_set: sw int cp2\n");
7009 		if (ring->me == 1) {
7010 			switch (ring->pipe) {
7011 			case 0:
7012 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
7013 				break;
7014 			case 1:
7015 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
7016 				break;
7017 			case 2:
7018 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7019 				break;
7020 			case 3:
7021 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
7022 				break;
7023 			default:
7024 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7025 				break;
7026 			}
7027 		} else if (ring->me == 2) {
7028 			switch (ring->pipe) {
7029 			case 0:
7030 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
7031 				break;
7032 			case 1:
7033 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
7034 				break;
7035 			case 2:
7036 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7037 				break;
7038 			case 3:
7039 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
7040 				break;
7041 			default:
7042 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
7043 				break;
7044 			}
7045 		} else {
7046 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
7047 		}
7048 	}
7049 
7050 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
7051 		DRM_DEBUG("cik_irq_set: sw int dma\n");
7052 		dma_cntl |= TRAP_ENABLE;
7053 	}
7054 
7055 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
7056 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
7057 		dma_cntl1 |= TRAP_ENABLE;
7058 	}
7059 
7060 	if (rdev->irq.crtc_vblank_int[0] ||
7061 	    atomic_read(&rdev->irq.pflip[0])) {
7062 		DRM_DEBUG("cik_irq_set: vblank 0\n");
7063 		crtc1 |= VBLANK_INTERRUPT_MASK;
7064 	}
7065 	if (rdev->irq.crtc_vblank_int[1] ||
7066 	    atomic_read(&rdev->irq.pflip[1])) {
7067 		DRM_DEBUG("cik_irq_set: vblank 1\n");
7068 		crtc2 |= VBLANK_INTERRUPT_MASK;
7069 	}
7070 	if (rdev->irq.crtc_vblank_int[2] ||
7071 	    atomic_read(&rdev->irq.pflip[2])) {
7072 		DRM_DEBUG("cik_irq_set: vblank 2\n");
7073 		crtc3 |= VBLANK_INTERRUPT_MASK;
7074 	}
7075 	if (rdev->irq.crtc_vblank_int[3] ||
7076 	    atomic_read(&rdev->irq.pflip[3])) {
7077 		DRM_DEBUG("cik_irq_set: vblank 3\n");
7078 		crtc4 |= VBLANK_INTERRUPT_MASK;
7079 	}
7080 	if (rdev->irq.crtc_vblank_int[4] ||
7081 	    atomic_read(&rdev->irq.pflip[4])) {
7082 		DRM_DEBUG("cik_irq_set: vblank 4\n");
7083 		crtc5 |= VBLANK_INTERRUPT_MASK;
7084 	}
7085 	if (rdev->irq.crtc_vblank_int[5] ||
7086 	    atomic_read(&rdev->irq.pflip[5])) {
7087 		DRM_DEBUG("cik_irq_set: vblank 5\n");
7088 		crtc6 |= VBLANK_INTERRUPT_MASK;
7089 	}
7090 	if (rdev->irq.hpd[0]) {
7091 		DRM_DEBUG("cik_irq_set: hpd 1\n");
7092 		hpd1 |= DC_HPDx_INT_EN;
7093 	}
7094 	if (rdev->irq.hpd[1]) {
7095 		DRM_DEBUG("cik_irq_set: hpd 2\n");
7096 		hpd2 |= DC_HPDx_INT_EN;
7097 	}
7098 	if (rdev->irq.hpd[2]) {
7099 		DRM_DEBUG("cik_irq_set: hpd 3\n");
7100 		hpd3 |= DC_HPDx_INT_EN;
7101 	}
7102 	if (rdev->irq.hpd[3]) {
7103 		DRM_DEBUG("cik_irq_set: hpd 4\n");
7104 		hpd4 |= DC_HPDx_INT_EN;
7105 	}
7106 	if (rdev->irq.hpd[4]) {
7107 		DRM_DEBUG("cik_irq_set: hpd 5\n");
7108 		hpd5 |= DC_HPDx_INT_EN;
7109 	}
7110 	if (rdev->irq.hpd[5]) {
7111 		DRM_DEBUG("cik_irq_set: hpd 6\n");
7112 		hpd6 |= DC_HPDx_INT_EN;
7113 	}
7114 
7115 	if (rdev->irq.dpm_thermal) {
7116 		DRM_DEBUG("dpm thermal\n");
7117 		if (rdev->flags & RADEON_IS_IGP)
7118 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
7119 		else
7120 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
7121 	}
7122 
7123 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
7124 
7125 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
7126 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
7127 
7128 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
7129 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
7130 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
7131 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7132 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7133 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7134 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7135 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7136 
7137 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7138 
7139 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7140 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7141 	if (rdev->num_crtc >= 4) {
7142 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7143 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7144 	}
7145 	if (rdev->num_crtc >= 6) {
7146 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7147 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7148 	}
7149 
7150 	if (rdev->num_crtc >= 2) {
7151 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
7152 		       GRPH_PFLIP_INT_MASK);
7153 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
7154 		       GRPH_PFLIP_INT_MASK);
7155 	}
7156 	if (rdev->num_crtc >= 4) {
7157 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
7158 		       GRPH_PFLIP_INT_MASK);
7159 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
7160 		       GRPH_PFLIP_INT_MASK);
7161 	}
7162 	if (rdev->num_crtc >= 6) {
7163 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
7164 		       GRPH_PFLIP_INT_MASK);
7165 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
7166 		       GRPH_PFLIP_INT_MASK);
7167 	}
7168 
7169 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7170 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7171 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7172 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7173 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7174 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7175 
7176 	if (rdev->flags & RADEON_IS_IGP)
7177 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7178 	else
7179 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7180 
7181 	return 0;
7182 }
7183 
7184 /**
7185  * cik_irq_ack - ack interrupt sources
7186  *
7187  * @rdev: radeon_device pointer
7188  *
7189  * Ack interrupt sources on the GPU (vblanks, hpd,
7190  * etc.) (CIK).  Certain interrupts sources are sw
7191  * generated and do not require an explicit ack.
7192  */
7193 static inline void cik_irq_ack(struct radeon_device *rdev)
7194 {
7195 	u32 tmp;
7196 
7197 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7198 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7199 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7200 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7201 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7202 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7203 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7204 
7205 	rdev->irq.stat_regs.cik.d1grph_int = RREG32(GRPH_INT_STATUS +
7206 		EVERGREEN_CRTC0_REGISTER_OFFSET);
7207 	rdev->irq.stat_regs.cik.d2grph_int = RREG32(GRPH_INT_STATUS +
7208 		EVERGREEN_CRTC1_REGISTER_OFFSET);
7209 	if (rdev->num_crtc >= 4) {
7210 		rdev->irq.stat_regs.cik.d3grph_int = RREG32(GRPH_INT_STATUS +
7211 			EVERGREEN_CRTC2_REGISTER_OFFSET);
7212 		rdev->irq.stat_regs.cik.d4grph_int = RREG32(GRPH_INT_STATUS +
7213 			EVERGREEN_CRTC3_REGISTER_OFFSET);
7214 	}
7215 	if (rdev->num_crtc >= 6) {
7216 		rdev->irq.stat_regs.cik.d5grph_int = RREG32(GRPH_INT_STATUS +
7217 			EVERGREEN_CRTC4_REGISTER_OFFSET);
7218 		rdev->irq.stat_regs.cik.d6grph_int = RREG32(GRPH_INT_STATUS +
7219 			EVERGREEN_CRTC5_REGISTER_OFFSET);
7220 	}
7221 
7222 	if (rdev->irq.stat_regs.cik.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
7223 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET,
7224 		       GRPH_PFLIP_INT_CLEAR);
7225 	if (rdev->irq.stat_regs.cik.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
7226 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET,
7227 		       GRPH_PFLIP_INT_CLEAR);
7228 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7229 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7230 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7231 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7232 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7233 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7234 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7235 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7236 
7237 	if (rdev->num_crtc >= 4) {
7238 		if (rdev->irq.stat_regs.cik.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
7239 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET,
7240 			       GRPH_PFLIP_INT_CLEAR);
7241 		if (rdev->irq.stat_regs.cik.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
7242 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET,
7243 			       GRPH_PFLIP_INT_CLEAR);
7244 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7245 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7246 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7247 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7248 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7249 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7250 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7251 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7252 	}
7253 
7254 	if (rdev->num_crtc >= 6) {
7255 		if (rdev->irq.stat_regs.cik.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
7256 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET,
7257 			       GRPH_PFLIP_INT_CLEAR);
7258 		if (rdev->irq.stat_regs.cik.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
7259 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET,
7260 			       GRPH_PFLIP_INT_CLEAR);
7261 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7262 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7263 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7264 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7265 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7266 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7267 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7268 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7269 	}
7270 
7271 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7272 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7273 		tmp |= DC_HPDx_INT_ACK;
7274 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7275 	}
7276 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7277 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7278 		tmp |= DC_HPDx_INT_ACK;
7279 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7280 	}
7281 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7282 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7283 		tmp |= DC_HPDx_INT_ACK;
7284 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7285 	}
7286 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7287 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7288 		tmp |= DC_HPDx_INT_ACK;
7289 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7290 	}
7291 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7292 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7293 		tmp |= DC_HPDx_INT_ACK;
7294 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7295 	}
7296 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7297 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7298 		tmp |= DC_HPDx_INT_ACK;
7299 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7300 	}
7301 }
7302 
7303 /**
7304  * cik_irq_disable - disable interrupts
7305  *
7306  * @rdev: radeon_device pointer
7307  *
7308  * Disable interrupts on the hw (CIK).
7309  */
7310 static void cik_irq_disable(struct radeon_device *rdev)
7311 {
7312 	cik_disable_interrupts(rdev);
7313 	/* Wait and acknowledge irq */
7314 	mdelay(1);
7315 	cik_irq_ack(rdev);
7316 	cik_disable_interrupt_state(rdev);
7317 }
7318 
7319 /**
7320  * cik_irq_disable - disable interrupts for suspend
7321  *
7322  * @rdev: radeon_device pointer
7323  *
7324  * Disable interrupts and stop the RLC (CIK).
7325  * Used for suspend.
7326  */
7327 static void cik_irq_suspend(struct radeon_device *rdev)
7328 {
7329 	cik_irq_disable(rdev);
7330 	cik_rlc_stop(rdev);
7331 }
7332 
7333 /**
7334  * cik_irq_fini - tear down interrupt support
7335  *
7336  * @rdev: radeon_device pointer
7337  *
7338  * Disable interrupts on the hw and free the IH ring
7339  * buffer (CIK).
7340  * Used for driver unload.
7341  */
7342 static void cik_irq_fini(struct radeon_device *rdev)
7343 {
7344 	cik_irq_suspend(rdev);
7345 	r600_ih_ring_fini(rdev);
7346 }
7347 
7348 /**
7349  * cik_get_ih_wptr - get the IH ring buffer wptr
7350  *
7351  * @rdev: radeon_device pointer
7352  *
7353  * Get the IH ring buffer wptr from either the register
7354  * or the writeback memory buffer (CIK).  Also check for
7355  * ring buffer overflow and deal with it.
7356  * Used by cik_irq_process().
7357  * Returns the value of the wptr.
7358  */
7359 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7360 {
7361 	u32 wptr, tmp;
7362 
7363 	if (rdev->wb.enabled)
7364 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7365 	else
7366 		wptr = RREG32(IH_RB_WPTR);
7367 
7368 	if (wptr & RB_OVERFLOW) {
7369 		/* When a ring buffer overflow happen start parsing interrupt
7370 		 * from the last not overwritten vector (wptr + 16). Hopefully
7371 		 * this should allow us to catchup.
7372 		 */
7373 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7374 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7375 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7376 		tmp = RREG32(IH_RB_CNTL);
7377 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7378 		WREG32(IH_RB_CNTL, tmp);
7379 	}
7380 	return (wptr & rdev->ih.ptr_mask);
7381 }
7382 
7383 /*        CIK IV Ring
7384  * Each IV ring entry is 128 bits:
7385  * [7:0]    - interrupt source id
7386  * [31:8]   - reserved
7387  * [59:32]  - interrupt source data
7388  * [63:60]  - reserved
7389  * [71:64]  - RINGID
7390  *            CP:
7391  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7392  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7393  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7394  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7395  *            PIPE_ID - ME0 0=3D
7396  *                    - ME1&2 compute dispatcher (4 pipes each)
7397  *            SDMA:
7398  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7399  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7400  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7401  * [79:72]  - VMID
7402  * [95:80]  - PASID
7403  * [127:96] - reserved
7404  */
7405 /**
7406  * cik_irq_process - interrupt handler
7407  *
7408  * @rdev: radeon_device pointer
7409  *
7410  * Interrupt hander (CIK).  Walk the IH ring,
7411  * ack interrupts and schedule work to handle
7412  * interrupt events.
7413  * Returns irq process return code.
7414  */
7415 int cik_irq_process(struct radeon_device *rdev)
7416 {
7417 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7418 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7419 	u32 wptr;
7420 	u32 rptr;
7421 	u32 src_id, src_data, ring_id;
7422 	u8 me_id, pipe_id, queue_id;
7423 	u32 ring_index;
7424 	bool queue_hotplug = false;
7425 	bool queue_reset = false;
7426 	u32 addr, status, mc_client;
7427 	bool queue_thermal = false;
7428 
7429 	if (!rdev->ih.enabled || rdev->shutdown)
7430 		return IRQ_NONE;
7431 
7432 	wptr = cik_get_ih_wptr(rdev);
7433 
7434 restart_ih:
7435 	/* is somebody else already processing irqs? */
7436 	if (atomic_xchg(&rdev->ih.lock, 1))
7437 		return IRQ_NONE;
7438 
7439 	rptr = rdev->ih.rptr;
7440 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7441 
7442 	/* Order reading of wptr vs. reading of IH ring data */
7443 	rmb();
7444 
7445 	/* display interrupts */
7446 	cik_irq_ack(rdev);
7447 
7448 	while (rptr != wptr) {
7449 		/* wptr/rptr are in bytes! */
7450 		ring_index = rptr / 4;
7451 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7452 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7453 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7454 
7455 		switch (src_id) {
7456 		case 1: /* D1 vblank/vline */
7457 			switch (src_data) {
7458 			case 0: /* D1 vblank */
7459 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7460 					if (rdev->irq.crtc_vblank_int[0]) {
7461 						drm_handle_vblank(rdev->ddev, 0);
7462 						rdev->pm.vblank_sync = true;
7463 						wake_up(&rdev->irq.vblank_queue);
7464 					}
7465 					if (atomic_read(&rdev->irq.pflip[0]))
7466 						radeon_crtc_handle_vblank(rdev, 0);
7467 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7468 					DRM_DEBUG("IH: D1 vblank\n");
7469 				}
7470 				break;
7471 			case 1: /* D1 vline */
7472 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7473 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7474 					DRM_DEBUG("IH: D1 vline\n");
7475 				}
7476 				break;
7477 			default:
7478 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7479 				break;
7480 			}
7481 			break;
7482 		case 2: /* D2 vblank/vline */
7483 			switch (src_data) {
7484 			case 0: /* D2 vblank */
7485 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7486 					if (rdev->irq.crtc_vblank_int[1]) {
7487 						drm_handle_vblank(rdev->ddev, 1);
7488 						rdev->pm.vblank_sync = true;
7489 						wake_up(&rdev->irq.vblank_queue);
7490 					}
7491 					if (atomic_read(&rdev->irq.pflip[1]))
7492 						radeon_crtc_handle_vblank(rdev, 1);
7493 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7494 					DRM_DEBUG("IH: D2 vblank\n");
7495 				}
7496 				break;
7497 			case 1: /* D2 vline */
7498 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7499 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7500 					DRM_DEBUG("IH: D2 vline\n");
7501 				}
7502 				break;
7503 			default:
7504 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7505 				break;
7506 			}
7507 			break;
7508 		case 3: /* D3 vblank/vline */
7509 			switch (src_data) {
7510 			case 0: /* D3 vblank */
7511 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7512 					if (rdev->irq.crtc_vblank_int[2]) {
7513 						drm_handle_vblank(rdev->ddev, 2);
7514 						rdev->pm.vblank_sync = true;
7515 						wake_up(&rdev->irq.vblank_queue);
7516 					}
7517 					if (atomic_read(&rdev->irq.pflip[2]))
7518 						radeon_crtc_handle_vblank(rdev, 2);
7519 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7520 					DRM_DEBUG("IH: D3 vblank\n");
7521 				}
7522 				break;
7523 			case 1: /* D3 vline */
7524 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7525 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7526 					DRM_DEBUG("IH: D3 vline\n");
7527 				}
7528 				break;
7529 			default:
7530 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7531 				break;
7532 			}
7533 			break;
7534 		case 4: /* D4 vblank/vline */
7535 			switch (src_data) {
7536 			case 0: /* D4 vblank */
7537 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7538 					if (rdev->irq.crtc_vblank_int[3]) {
7539 						drm_handle_vblank(rdev->ddev, 3);
7540 						rdev->pm.vblank_sync = true;
7541 						wake_up(&rdev->irq.vblank_queue);
7542 					}
7543 					if (atomic_read(&rdev->irq.pflip[3]))
7544 						radeon_crtc_handle_vblank(rdev, 3);
7545 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7546 					DRM_DEBUG("IH: D4 vblank\n");
7547 				}
7548 				break;
7549 			case 1: /* D4 vline */
7550 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7551 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7552 					DRM_DEBUG("IH: D4 vline\n");
7553 				}
7554 				break;
7555 			default:
7556 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7557 				break;
7558 			}
7559 			break;
7560 		case 5: /* D5 vblank/vline */
7561 			switch (src_data) {
7562 			case 0: /* D5 vblank */
7563 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7564 					if (rdev->irq.crtc_vblank_int[4]) {
7565 						drm_handle_vblank(rdev->ddev, 4);
7566 						rdev->pm.vblank_sync = true;
7567 						wake_up(&rdev->irq.vblank_queue);
7568 					}
7569 					if (atomic_read(&rdev->irq.pflip[4]))
7570 						radeon_crtc_handle_vblank(rdev, 4);
7571 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7572 					DRM_DEBUG("IH: D5 vblank\n");
7573 				}
7574 				break;
7575 			case 1: /* D5 vline */
7576 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7577 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7578 					DRM_DEBUG("IH: D5 vline\n");
7579 				}
7580 				break;
7581 			default:
7582 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7583 				break;
7584 			}
7585 			break;
7586 		case 6: /* D6 vblank/vline */
7587 			switch (src_data) {
7588 			case 0: /* D6 vblank */
7589 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7590 					if (rdev->irq.crtc_vblank_int[5]) {
7591 						drm_handle_vblank(rdev->ddev, 5);
7592 						rdev->pm.vblank_sync = true;
7593 						wake_up(&rdev->irq.vblank_queue);
7594 					}
7595 					if (atomic_read(&rdev->irq.pflip[5]))
7596 						radeon_crtc_handle_vblank(rdev, 5);
7597 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7598 					DRM_DEBUG("IH: D6 vblank\n");
7599 				}
7600 				break;
7601 			case 1: /* D6 vline */
7602 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7603 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7604 					DRM_DEBUG("IH: D6 vline\n");
7605 				}
7606 				break;
7607 			default:
7608 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7609 				break;
7610 			}
7611 			break;
7612 		case 8: /* D1 page flip */
7613 		case 10: /* D2 page flip */
7614 		case 12: /* D3 page flip */
7615 		case 14: /* D4 page flip */
7616 		case 16: /* D5 page flip */
7617 		case 18: /* D6 page flip */
7618 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
7619 			radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
7620 			break;
7621 		case 42: /* HPD hotplug */
7622 			switch (src_data) {
7623 			case 0:
7624 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7625 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7626 					queue_hotplug = true;
7627 					DRM_DEBUG("IH: HPD1\n");
7628 				}
7629 				break;
7630 			case 1:
7631 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7632 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7633 					queue_hotplug = true;
7634 					DRM_DEBUG("IH: HPD2\n");
7635 				}
7636 				break;
7637 			case 2:
7638 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7639 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7640 					queue_hotplug = true;
7641 					DRM_DEBUG("IH: HPD3\n");
7642 				}
7643 				break;
7644 			case 3:
7645 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7646 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7647 					queue_hotplug = true;
7648 					DRM_DEBUG("IH: HPD4\n");
7649 				}
7650 				break;
7651 			case 4:
7652 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7653 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7654 					queue_hotplug = true;
7655 					DRM_DEBUG("IH: HPD5\n");
7656 				}
7657 				break;
7658 			case 5:
7659 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7660 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7661 					queue_hotplug = true;
7662 					DRM_DEBUG("IH: HPD6\n");
7663 				}
7664 				break;
7665 			default:
7666 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7667 				break;
7668 			}
7669 			break;
7670 		case 124: /* UVD */
7671 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7672 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7673 			break;
7674 		case 146:
7675 		case 147:
7676 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7677 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7678 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7679 			/* reset addr and status */
7680 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7681 			if (addr == 0x0 && status == 0x0)
7682 				break;
7683 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7684 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7685 				addr);
7686 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7687 				status);
7688 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7689 			break;
7690 		case 167: /* VCE */
7691 			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
7692 			switch (src_data) {
7693 			case 0:
7694 				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
7695 				break;
7696 			case 1:
7697 				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
7698 				break;
7699 			default:
7700 				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
7701 				break;
7702 			}
7703 			break;
7704 		case 176: /* GFX RB CP_INT */
7705 		case 177: /* GFX IB CP_INT */
7706 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7707 			break;
7708 		case 181: /* CP EOP event */
7709 			DRM_DEBUG("IH: CP EOP\n");
7710 			/* XXX check the bitfield order! */
7711 			me_id = (ring_id & 0x60) >> 5;
7712 			pipe_id = (ring_id & 0x18) >> 3;
7713 			queue_id = (ring_id & 0x7) >> 0;
7714 			switch (me_id) {
7715 			case 0:
7716 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7717 				break;
7718 			case 1:
7719 			case 2:
7720 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7721 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7722 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7723 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7724 				break;
7725 			}
7726 			break;
7727 		case 184: /* CP Privileged reg access */
7728 			DRM_ERROR("Illegal register access in command stream\n");
7729 			/* XXX check the bitfield order! */
7730 			me_id = (ring_id & 0x60) >> 5;
7731 			pipe_id = (ring_id & 0x18) >> 3;
7732 			queue_id = (ring_id & 0x7) >> 0;
7733 			switch (me_id) {
7734 			case 0:
7735 				/* This results in a full GPU reset, but all we need to do is soft
7736 				 * reset the CP for gfx
7737 				 */
7738 				queue_reset = true;
7739 				break;
7740 			case 1:
7741 				/* XXX compute */
7742 				queue_reset = true;
7743 				break;
7744 			case 2:
7745 				/* XXX compute */
7746 				queue_reset = true;
7747 				break;
7748 			}
7749 			break;
7750 		case 185: /* CP Privileged inst */
7751 			DRM_ERROR("Illegal instruction in command stream\n");
7752 			/* XXX check the bitfield order! */
7753 			me_id = (ring_id & 0x60) >> 5;
7754 			pipe_id = (ring_id & 0x18) >> 3;
7755 			queue_id = (ring_id & 0x7) >> 0;
7756 			switch (me_id) {
7757 			case 0:
7758 				/* This results in a full GPU reset, but all we need to do is soft
7759 				 * reset the CP for gfx
7760 				 */
7761 				queue_reset = true;
7762 				break;
7763 			case 1:
7764 				/* XXX compute */
7765 				queue_reset = true;
7766 				break;
7767 			case 2:
7768 				/* XXX compute */
7769 				queue_reset = true;
7770 				break;
7771 			}
7772 			break;
7773 		case 224: /* SDMA trap event */
7774 			/* XXX check the bitfield order! */
7775 			me_id = (ring_id & 0x3) >> 0;
7776 			queue_id = (ring_id & 0xc) >> 2;
7777 			DRM_DEBUG("IH: SDMA trap\n");
7778 			switch (me_id) {
7779 			case 0:
7780 				switch (queue_id) {
7781 				case 0:
7782 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7783 					break;
7784 				case 1:
7785 					/* XXX compute */
7786 					break;
7787 				case 2:
7788 					/* XXX compute */
7789 					break;
7790 				}
7791 				break;
7792 			case 1:
7793 				switch (queue_id) {
7794 				case 0:
7795 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7796 					break;
7797 				case 1:
7798 					/* XXX compute */
7799 					break;
7800 				case 2:
7801 					/* XXX compute */
7802 					break;
7803 				}
7804 				break;
7805 			}
7806 			break;
7807 		case 230: /* thermal low to high */
7808 			DRM_DEBUG("IH: thermal low to high\n");
7809 			rdev->pm.dpm.thermal.high_to_low = false;
7810 			queue_thermal = true;
7811 			break;
7812 		case 231: /* thermal high to low */
7813 			DRM_DEBUG("IH: thermal high to low\n");
7814 			rdev->pm.dpm.thermal.high_to_low = true;
7815 			queue_thermal = true;
7816 			break;
7817 		case 233: /* GUI IDLE */
7818 			DRM_DEBUG("IH: GUI idle\n");
7819 			break;
7820 		case 241: /* SDMA Privileged inst */
7821 		case 247: /* SDMA Privileged inst */
7822 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7823 			/* XXX check the bitfield order! */
7824 			me_id = (ring_id & 0x3) >> 0;
7825 			queue_id = (ring_id & 0xc) >> 2;
7826 			switch (me_id) {
7827 			case 0:
7828 				switch (queue_id) {
7829 				case 0:
7830 					queue_reset = true;
7831 					break;
7832 				case 1:
7833 					/* XXX compute */
7834 					queue_reset = true;
7835 					break;
7836 				case 2:
7837 					/* XXX compute */
7838 					queue_reset = true;
7839 					break;
7840 				}
7841 				break;
7842 			case 1:
7843 				switch (queue_id) {
7844 				case 0:
7845 					queue_reset = true;
7846 					break;
7847 				case 1:
7848 					/* XXX compute */
7849 					queue_reset = true;
7850 					break;
7851 				case 2:
7852 					/* XXX compute */
7853 					queue_reset = true;
7854 					break;
7855 				}
7856 				break;
7857 			}
7858 			break;
7859 		default:
7860 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7861 			break;
7862 		}
7863 
7864 		/* wptr/rptr are in bytes! */
7865 		rptr += 16;
7866 		rptr &= rdev->ih.ptr_mask;
7867 	}
7868 	if (queue_hotplug)
7869 		schedule_work(&rdev->hotplug_work);
7870 	if (queue_reset)
7871 		schedule_work(&rdev->reset_work);
7872 	if (queue_thermal)
7873 		schedule_work(&rdev->pm.dpm.thermal.work);
7874 	rdev->ih.rptr = rptr;
7875 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7876 	atomic_set(&rdev->ih.lock, 0);
7877 
7878 	/* make sure wptr hasn't changed while processing */
7879 	wptr = cik_get_ih_wptr(rdev);
7880 	if (wptr != rptr)
7881 		goto restart_ih;
7882 
7883 	return IRQ_HANDLED;
7884 }
7885 
7886 /*
7887  * startup/shutdown callbacks
7888  */
7889 /**
7890  * cik_startup - program the asic to a functional state
7891  *
7892  * @rdev: radeon_device pointer
7893  *
7894  * Programs the asic to a functional state (CIK).
7895  * Called by cik_init() and cik_resume().
7896  * Returns 0 for success, error for failure.
7897  */
7898 static int cik_startup(struct radeon_device *rdev)
7899 {
7900 	struct radeon_ring *ring;
7901 	int r;
7902 
7903 	/* enable pcie gen2/3 link */
7904 	cik_pcie_gen3_enable(rdev);
7905 	/* enable aspm */
7906 	cik_program_aspm(rdev);
7907 
7908 	/* scratch needs to be initialized before MC */
7909 	r = r600_vram_scratch_init(rdev);
7910 	if (r)
7911 		return r;
7912 
7913 	cik_mc_program(rdev);
7914 
7915 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7916 		r = ci_mc_load_microcode(rdev);
7917 		if (r) {
7918 			DRM_ERROR("Failed to load MC firmware!\n");
7919 			return r;
7920 		}
7921 	}
7922 
7923 	r = cik_pcie_gart_enable(rdev);
7924 	if (r)
7925 		return r;
7926 	cik_gpu_init(rdev);
7927 
7928 	/* allocate rlc buffers */
7929 	if (rdev->flags & RADEON_IS_IGP) {
7930 		if (rdev->family == CHIP_KAVERI) {
7931 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7932 			rdev->rlc.reg_list_size =
7933 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7934 		} else {
7935 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7936 			rdev->rlc.reg_list_size =
7937 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7938 		}
7939 	}
7940 	rdev->rlc.cs_data = ci_cs_data;
7941 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7942 	r = sumo_rlc_init(rdev);
7943 	if (r) {
7944 		DRM_ERROR("Failed to init rlc BOs!\n");
7945 		return r;
7946 	}
7947 
7948 	/* allocate wb buffer */
7949 	r = radeon_wb_init(rdev);
7950 	if (r)
7951 		return r;
7952 
7953 	/* allocate mec buffers */
7954 	r = cik_mec_init(rdev);
7955 	if (r) {
7956 		DRM_ERROR("Failed to init MEC BOs!\n");
7957 		return r;
7958 	}
7959 
7960 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7961 	if (r) {
7962 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7963 		return r;
7964 	}
7965 
7966 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7967 	if (r) {
7968 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7969 		return r;
7970 	}
7971 
7972 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7973 	if (r) {
7974 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7975 		return r;
7976 	}
7977 
7978 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7979 	if (r) {
7980 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7981 		return r;
7982 	}
7983 
7984 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7985 	if (r) {
7986 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7987 		return r;
7988 	}
7989 
7990 	r = radeon_uvd_resume(rdev);
7991 	if (!r) {
7992 		r = uvd_v4_2_resume(rdev);
7993 		if (!r) {
7994 			r = radeon_fence_driver_start_ring(rdev,
7995 							   R600_RING_TYPE_UVD_INDEX);
7996 			if (r)
7997 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7998 		}
7999 	}
8000 	if (r)
8001 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
8002 
8003 	r = radeon_vce_resume(rdev);
8004 	if (!r) {
8005 		r = vce_v2_0_resume(rdev);
8006 		if (!r)
8007 			r = radeon_fence_driver_start_ring(rdev,
8008 							   TN_RING_TYPE_VCE1_INDEX);
8009 		if (!r)
8010 			r = radeon_fence_driver_start_ring(rdev,
8011 							   TN_RING_TYPE_VCE2_INDEX);
8012 	}
8013 	if (r) {
8014 		dev_err(rdev->dev, "VCE init error (%d).\n", r);
8015 		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
8016 		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
8017 	}
8018 
8019 	/* Enable IRQ */
8020 	if (!rdev->irq.installed) {
8021 		r = radeon_irq_kms_init(rdev);
8022 		if (r)
8023 			return r;
8024 	}
8025 
8026 	r = cik_irq_init(rdev);
8027 	if (r) {
8028 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
8029 		radeon_irq_kms_fini(rdev);
8030 		return r;
8031 	}
8032 	cik_irq_set(rdev);
8033 
8034 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8035 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
8036 			     PACKET3(PACKET3_NOP, 0x3FFF));
8037 	if (r)
8038 		return r;
8039 
8040 	/* set up the compute queues */
8041 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8042 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8043 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
8044 			     PACKET3(PACKET3_NOP, 0x3FFF));
8045 	if (r)
8046 		return r;
8047 	ring->me = 1; /* first MEC */
8048 	ring->pipe = 0; /* first pipe */
8049 	ring->queue = 0; /* first queue */
8050 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
8051 
8052 	/* type-2 packets are deprecated on MEC, use type-3 instead */
8053 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8054 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
8055 			     PACKET3(PACKET3_NOP, 0x3FFF));
8056 	if (r)
8057 		return r;
8058 	/* dGPU only have 1 MEC */
8059 	ring->me = 1; /* first MEC */
8060 	ring->pipe = 0; /* first pipe */
8061 	ring->queue = 1; /* second queue */
8062 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
8063 
8064 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8065 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
8066 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8067 	if (r)
8068 		return r;
8069 
8070 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8071 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
8072 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
8073 	if (r)
8074 		return r;
8075 
8076 	r = cik_cp_resume(rdev);
8077 	if (r)
8078 		return r;
8079 
8080 	r = cik_sdma_resume(rdev);
8081 	if (r)
8082 		return r;
8083 
8084 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8085 	if (ring->ring_size) {
8086 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8087 				     RADEON_CP_PACKET2);
8088 		if (!r)
8089 			r = uvd_v1_0_init(rdev);
8090 		if (r)
8091 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
8092 	}
8093 
8094 	r = -ENOENT;
8095 
8096 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8097 	if (ring->ring_size)
8098 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8099 				     VCE_CMD_NO_OP);
8100 
8101 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8102 	if (ring->ring_size)
8103 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
8104 				     VCE_CMD_NO_OP);
8105 
8106 	if (!r)
8107 		r = vce_v1_0_init(rdev);
8108 	else if (r != -ENOENT)
8109 		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
8110 
8111 	r = radeon_ib_pool_init(rdev);
8112 	if (r) {
8113 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
8114 		return r;
8115 	}
8116 
8117 	r = radeon_vm_manager_init(rdev);
8118 	if (r) {
8119 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
8120 		return r;
8121 	}
8122 
8123 	r = dce6_audio_init(rdev);
8124 	if (r)
8125 		return r;
8126 
8127 	return 0;
8128 }
8129 
8130 /**
8131  * cik_resume - resume the asic to a functional state
8132  *
8133  * @rdev: radeon_device pointer
8134  *
8135  * Programs the asic to a functional state (CIK).
8136  * Called at resume.
8137  * Returns 0 for success, error for failure.
8138  */
8139 int cik_resume(struct radeon_device *rdev)
8140 {
8141 	int r;
8142 
8143 	/* post card */
8144 	atom_asic_init(rdev->mode_info.atom_context);
8145 
8146 	/* init golden registers */
8147 	cik_init_golden_registers(rdev);
8148 
8149 	if (rdev->pm.pm_method == PM_METHOD_DPM)
8150 		radeon_pm_resume(rdev);
8151 
8152 	rdev->accel_working = true;
8153 	r = cik_startup(rdev);
8154 	if (r) {
8155 		DRM_ERROR("cik startup failed on resume\n");
8156 		rdev->accel_working = false;
8157 		return r;
8158 	}
8159 
8160 	return r;
8161 
8162 }
8163 
8164 /**
8165  * cik_suspend - suspend the asic
8166  *
8167  * @rdev: radeon_device pointer
8168  *
8169  * Bring the chip into a state suitable for suspend (CIK).
8170  * Called at suspend.
8171  * Returns 0 for success.
8172  */
8173 int cik_suspend(struct radeon_device *rdev)
8174 {
8175 	radeon_pm_suspend(rdev);
8176 	dce6_audio_fini(rdev);
8177 	radeon_vm_manager_fini(rdev);
8178 	cik_cp_enable(rdev, false);
8179 	cik_sdma_enable(rdev, false);
8180 	uvd_v1_0_fini(rdev);
8181 	radeon_uvd_suspend(rdev);
8182 	radeon_vce_suspend(rdev);
8183 	cik_fini_pg(rdev);
8184 	cik_fini_cg(rdev);
8185 	cik_irq_suspend(rdev);
8186 	radeon_wb_disable(rdev);
8187 	cik_pcie_gart_disable(rdev);
8188 	return 0;
8189 }
8190 
8191 /* Plan is to move initialization in that function and use
8192  * helper function so that radeon_device_init pretty much
8193  * do nothing more than calling asic specific function. This
8194  * should also allow to remove a bunch of callback function
8195  * like vram_info.
8196  */
8197 /**
8198  * cik_init - asic specific driver and hw init
8199  *
8200  * @rdev: radeon_device pointer
8201  *
8202  * Setup asic specific driver variables and program the hw
8203  * to a functional state (CIK).
8204  * Called at driver startup.
8205  * Returns 0 for success, errors for failure.
8206  */
8207 int cik_init(struct radeon_device *rdev)
8208 {
8209 	struct radeon_ring *ring;
8210 	int r;
8211 
8212 	/* Read BIOS */
8213 	if (!radeon_get_bios(rdev)) {
8214 		if (ASIC_IS_AVIVO(rdev))
8215 			return -EINVAL;
8216 	}
8217 	/* Must be an ATOMBIOS */
8218 	if (!rdev->is_atom_bios) {
8219 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
8220 		return -EINVAL;
8221 	}
8222 	r = radeon_atombios_init(rdev);
8223 	if (r)
8224 		return r;
8225 
8226 	/* Post card if necessary */
8227 	if (!radeon_card_posted(rdev)) {
8228 		if (!rdev->bios) {
8229 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
8230 			return -EINVAL;
8231 		}
8232 		DRM_INFO("GPU not posted. posting now...\n");
8233 		atom_asic_init(rdev->mode_info.atom_context);
8234 	}
8235 	/* init golden registers */
8236 	cik_init_golden_registers(rdev);
8237 	/* Initialize scratch registers */
8238 	cik_scratch_init(rdev);
8239 	/* Initialize surface registers */
8240 	radeon_surface_init(rdev);
8241 	/* Initialize clocks */
8242 	radeon_get_clock_info(rdev->ddev);
8243 
8244 	/* Fence driver */
8245 	r = radeon_fence_driver_init(rdev);
8246 	if (r)
8247 		return r;
8248 
8249 	/* initialize memory controller */
8250 	r = cik_mc_init(rdev);
8251 	if (r)
8252 		return r;
8253 	/* Memory manager */
8254 	r = radeon_bo_init(rdev);
8255 	if (r)
8256 		return r;
8257 
8258 	if (rdev->flags & RADEON_IS_IGP) {
8259 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8260 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8261 			r = cik_init_microcode(rdev);
8262 			if (r) {
8263 				DRM_ERROR("Failed to load firmware!\n");
8264 				return r;
8265 			}
8266 		}
8267 	} else {
8268 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8269 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8270 		    !rdev->mc_fw) {
8271 			r = cik_init_microcode(rdev);
8272 			if (r) {
8273 				DRM_ERROR("Failed to load firmware!\n");
8274 				return r;
8275 			}
8276 		}
8277 	}
8278 
8279 	/* Initialize power management */
8280 	radeon_pm_init(rdev);
8281 
8282 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8283 	ring->ring_obj = NULL;
8284 	r600_ring_init(rdev, ring, 1024 * 1024);
8285 
8286 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8287 	ring->ring_obj = NULL;
8288 	r600_ring_init(rdev, ring, 1024 * 1024);
8289 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8290 	if (r)
8291 		return r;
8292 
8293 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8294 	ring->ring_obj = NULL;
8295 	r600_ring_init(rdev, ring, 1024 * 1024);
8296 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8297 	if (r)
8298 		return r;
8299 
8300 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8301 	ring->ring_obj = NULL;
8302 	r600_ring_init(rdev, ring, 256 * 1024);
8303 
8304 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8305 	ring->ring_obj = NULL;
8306 	r600_ring_init(rdev, ring, 256 * 1024);
8307 
8308 	r = radeon_uvd_init(rdev);
8309 	if (!r) {
8310 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8311 		ring->ring_obj = NULL;
8312 		r600_ring_init(rdev, ring, 4096);
8313 	}
8314 
8315 	r = radeon_vce_init(rdev);
8316 	if (!r) {
8317 		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
8318 		ring->ring_obj = NULL;
8319 		r600_ring_init(rdev, ring, 4096);
8320 
8321 		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
8322 		ring->ring_obj = NULL;
8323 		r600_ring_init(rdev, ring, 4096);
8324 	}
8325 
8326 	rdev->ih.ring_obj = NULL;
8327 	r600_ih_ring_init(rdev, 64 * 1024);
8328 
8329 	r = r600_pcie_gart_init(rdev);
8330 	if (r)
8331 		return r;
8332 
8333 	rdev->accel_working = true;
8334 	r = cik_startup(rdev);
8335 	if (r) {
8336 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8337 		cik_cp_fini(rdev);
8338 		cik_sdma_fini(rdev);
8339 		cik_irq_fini(rdev);
8340 		sumo_rlc_fini(rdev);
8341 		cik_mec_fini(rdev);
8342 		radeon_wb_fini(rdev);
8343 		radeon_ib_pool_fini(rdev);
8344 		radeon_vm_manager_fini(rdev);
8345 		radeon_irq_kms_fini(rdev);
8346 		cik_pcie_gart_fini(rdev);
8347 		rdev->accel_working = false;
8348 	}
8349 
8350 	/* Don't start up if the MC ucode is missing.
8351 	 * The default clocks and voltages before the MC ucode
8352 	 * is loaded are not suffient for advanced operations.
8353 	 */
8354 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8355 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8356 		return -EINVAL;
8357 	}
8358 
8359 	return 0;
8360 }
8361 
8362 /**
8363  * cik_fini - asic specific driver and hw fini
8364  *
8365  * @rdev: radeon_device pointer
8366  *
8367  * Tear down the asic specific driver variables and program the hw
8368  * to an idle state (CIK).
8369  * Called at driver unload.
8370  */
8371 void cik_fini(struct radeon_device *rdev)
8372 {
8373 	radeon_pm_fini(rdev);
8374 	cik_cp_fini(rdev);
8375 	cik_sdma_fini(rdev);
8376 	cik_fini_pg(rdev);
8377 	cik_fini_cg(rdev);
8378 	cik_irq_fini(rdev);
8379 	sumo_rlc_fini(rdev);
8380 	cik_mec_fini(rdev);
8381 	radeon_wb_fini(rdev);
8382 	radeon_vm_manager_fini(rdev);
8383 	radeon_ib_pool_fini(rdev);
8384 	radeon_irq_kms_fini(rdev);
8385 	uvd_v1_0_fini(rdev);
8386 	radeon_uvd_fini(rdev);
8387 	radeon_vce_fini(rdev);
8388 	cik_pcie_gart_fini(rdev);
8389 	r600_vram_scratch_fini(rdev);
8390 	radeon_gem_fini(rdev);
8391 	radeon_fence_driver_fini(rdev);
8392 	radeon_bo_fini(rdev);
8393 	radeon_atombios_fini(rdev);
8394 	kfree(rdev->bios);
8395 	rdev->bios = NULL;
8396 }
8397 
8398 void dce8_program_fmt(struct drm_encoder *encoder)
8399 {
8400 	struct drm_device *dev = encoder->dev;
8401 	struct radeon_device *rdev = dev->dev_private;
8402 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8403 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8404 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8405 	int bpc = 0;
8406 	u32 tmp = 0;
8407 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8408 
8409 	if (connector) {
8410 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8411 		bpc = radeon_get_monitor_bpc(connector);
8412 		dither = radeon_connector->dither;
8413 	}
8414 
8415 	/* LVDS/eDP FMT is set up by atom */
8416 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8417 		return;
8418 
8419 	/* not needed for analog */
8420 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8421 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8422 		return;
8423 
8424 	if (bpc == 0)
8425 		return;
8426 
8427 	switch (bpc) {
8428 	case 6:
8429 		if (dither == RADEON_FMT_DITHER_ENABLE)
8430 			/* XXX sort out optimal dither settings */
8431 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8432 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8433 		else
8434 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8435 		break;
8436 	case 8:
8437 		if (dither == RADEON_FMT_DITHER_ENABLE)
8438 			/* XXX sort out optimal dither settings */
8439 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8440 				FMT_RGB_RANDOM_ENABLE |
8441 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8442 		else
8443 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8444 		break;
8445 	case 10:
8446 		if (dither == RADEON_FMT_DITHER_ENABLE)
8447 			/* XXX sort out optimal dither settings */
8448 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8449 				FMT_RGB_RANDOM_ENABLE |
8450 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8451 		else
8452 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8453 		break;
8454 	default:
8455 		/* not needed */
8456 		break;
8457 	}
8458 
8459 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8460 }
8461 
8462 /* display watermark setup */
8463 /**
8464  * dce8_line_buffer_adjust - Set up the line buffer
8465  *
8466  * @rdev: radeon_device pointer
8467  * @radeon_crtc: the selected display controller
8468  * @mode: the current display mode on the selected display
8469  * controller
8470  *
8471  * Setup up the line buffer allocation for
8472  * the selected display controller (CIK).
8473  * Returns the line buffer size in pixels.
8474  */
8475 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8476 				   struct radeon_crtc *radeon_crtc,
8477 				   struct drm_display_mode *mode)
8478 {
8479 	u32 tmp, buffer_alloc, i;
8480 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8481 	/*
8482 	 * Line Buffer Setup
8483 	 * There are 6 line buffers, one for each display controllers.
8484 	 * There are 3 partitions per LB. Select the number of partitions
8485 	 * to enable based on the display width.  For display widths larger
8486 	 * than 4096, you need use to use 2 display controllers and combine
8487 	 * them using the stereo blender.
8488 	 */
8489 	if (radeon_crtc->base.enabled && mode) {
8490 		if (mode->crtc_hdisplay < 1920) {
8491 			tmp = 1;
8492 			buffer_alloc = 2;
8493 		} else if (mode->crtc_hdisplay < 2560) {
8494 			tmp = 2;
8495 			buffer_alloc = 2;
8496 		} else if (mode->crtc_hdisplay < 4096) {
8497 			tmp = 0;
8498 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8499 		} else {
8500 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8501 			tmp = 0;
8502 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8503 		}
8504 	} else {
8505 		tmp = 1;
8506 		buffer_alloc = 0;
8507 	}
8508 
8509 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8510 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8511 
8512 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8513 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8514 	for (i = 0; i < rdev->usec_timeout; i++) {
8515 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8516 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8517 			break;
8518 		udelay(1);
8519 	}
8520 
8521 	if (radeon_crtc->base.enabled && mode) {
8522 		switch (tmp) {
8523 		case 0:
8524 		default:
8525 			return 4096 * 2;
8526 		case 1:
8527 			return 1920 * 2;
8528 		case 2:
8529 			return 2560 * 2;
8530 		}
8531 	}
8532 
8533 	/* controller not enabled, so no lb used */
8534 	return 0;
8535 }
8536 
8537 /**
8538  * cik_get_number_of_dram_channels - get the number of dram channels
8539  *
8540  * @rdev: radeon_device pointer
8541  *
8542  * Look up the number of video ram channels (CIK).
8543  * Used for display watermark bandwidth calculations
8544  * Returns the number of dram channels
8545  */
8546 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8547 {
8548 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8549 
8550 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8551 	case 0:
8552 	default:
8553 		return 1;
8554 	case 1:
8555 		return 2;
8556 	case 2:
8557 		return 4;
8558 	case 3:
8559 		return 8;
8560 	case 4:
8561 		return 3;
8562 	case 5:
8563 		return 6;
8564 	case 6:
8565 		return 10;
8566 	case 7:
8567 		return 12;
8568 	case 8:
8569 		return 16;
8570 	}
8571 }
8572 
8573 struct dce8_wm_params {
8574 	u32 dram_channels; /* number of dram channels */
8575 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8576 	u32 sclk;          /* engine clock in kHz */
8577 	u32 disp_clk;      /* display clock in kHz */
8578 	u32 src_width;     /* viewport width */
8579 	u32 active_time;   /* active display time in ns */
8580 	u32 blank_time;    /* blank time in ns */
8581 	bool interlaced;    /* mode is interlaced */
8582 	fixed20_12 vsc;    /* vertical scale ratio */
8583 	u32 num_heads;     /* number of active crtcs */
8584 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8585 	u32 lb_size;       /* line buffer allocated to pipe */
8586 	u32 vtaps;         /* vertical scaler taps */
8587 };
8588 
8589 /**
8590  * dce8_dram_bandwidth - get the dram bandwidth
8591  *
8592  * @wm: watermark calculation data
8593  *
8594  * Calculate the raw dram bandwidth (CIK).
8595  * Used for display watermark bandwidth calculations
8596  * Returns the dram bandwidth in MBytes/s
8597  */
8598 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8599 {
8600 	/* Calculate raw DRAM Bandwidth */
8601 	fixed20_12 dram_efficiency; /* 0.7 */
8602 	fixed20_12 yclk, dram_channels, bandwidth;
8603 	fixed20_12 a;
8604 
8605 	a.full = dfixed_const(1000);
8606 	yclk.full = dfixed_const(wm->yclk);
8607 	yclk.full = dfixed_div(yclk, a);
8608 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8609 	a.full = dfixed_const(10);
8610 	dram_efficiency.full = dfixed_const(7);
8611 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8612 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8613 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8614 
8615 	return dfixed_trunc(bandwidth);
8616 }
8617 
8618 /**
8619  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8620  *
8621  * @wm: watermark calculation data
8622  *
8623  * Calculate the dram bandwidth used for display (CIK).
8624  * Used for display watermark bandwidth calculations
8625  * Returns the dram bandwidth for display in MBytes/s
8626  */
8627 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8628 {
8629 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8630 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8631 	fixed20_12 yclk, dram_channels, bandwidth;
8632 	fixed20_12 a;
8633 
8634 	a.full = dfixed_const(1000);
8635 	yclk.full = dfixed_const(wm->yclk);
8636 	yclk.full = dfixed_div(yclk, a);
8637 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8638 	a.full = dfixed_const(10);
8639 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8640 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8641 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8642 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8643 
8644 	return dfixed_trunc(bandwidth);
8645 }
8646 
8647 /**
8648  * dce8_data_return_bandwidth - get the data return bandwidth
8649  *
8650  * @wm: watermark calculation data
8651  *
8652  * Calculate the data return bandwidth used for display (CIK).
8653  * Used for display watermark bandwidth calculations
8654  * Returns the data return bandwidth in MBytes/s
8655  */
8656 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8657 {
8658 	/* Calculate the display Data return Bandwidth */
8659 	fixed20_12 return_efficiency; /* 0.8 */
8660 	fixed20_12 sclk, bandwidth;
8661 	fixed20_12 a;
8662 
8663 	a.full = dfixed_const(1000);
8664 	sclk.full = dfixed_const(wm->sclk);
8665 	sclk.full = dfixed_div(sclk, a);
8666 	a.full = dfixed_const(10);
8667 	return_efficiency.full = dfixed_const(8);
8668 	return_efficiency.full = dfixed_div(return_efficiency, a);
8669 	a.full = dfixed_const(32);
8670 	bandwidth.full = dfixed_mul(a, sclk);
8671 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8672 
8673 	return dfixed_trunc(bandwidth);
8674 }
8675 
8676 /**
8677  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8678  *
8679  * @wm: watermark calculation data
8680  *
8681  * Calculate the dmif bandwidth used for display (CIK).
8682  * Used for display watermark bandwidth calculations
8683  * Returns the dmif bandwidth in MBytes/s
8684  */
8685 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8686 {
8687 	/* Calculate the DMIF Request Bandwidth */
8688 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8689 	fixed20_12 disp_clk, bandwidth;
8690 	fixed20_12 a, b;
8691 
8692 	a.full = dfixed_const(1000);
8693 	disp_clk.full = dfixed_const(wm->disp_clk);
8694 	disp_clk.full = dfixed_div(disp_clk, a);
8695 	a.full = dfixed_const(32);
8696 	b.full = dfixed_mul(a, disp_clk);
8697 
8698 	a.full = dfixed_const(10);
8699 	disp_clk_request_efficiency.full = dfixed_const(8);
8700 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8701 
8702 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8703 
8704 	return dfixed_trunc(bandwidth);
8705 }
8706 
8707 /**
8708  * dce8_available_bandwidth - get the min available bandwidth
8709  *
8710  * @wm: watermark calculation data
8711  *
8712  * Calculate the min available bandwidth used for display (CIK).
8713  * Used for display watermark bandwidth calculations
8714  * Returns the min available bandwidth in MBytes/s
8715  */
8716 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8717 {
8718 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8719 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8720 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8721 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8722 
8723 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8724 }
8725 
8726 /**
8727  * dce8_average_bandwidth - get the average available bandwidth
8728  *
8729  * @wm: watermark calculation data
8730  *
8731  * Calculate the average available bandwidth used for display (CIK).
8732  * Used for display watermark bandwidth calculations
8733  * Returns the average available bandwidth in MBytes/s
8734  */
8735 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8736 {
8737 	/* Calculate the display mode Average Bandwidth
8738 	 * DisplayMode should contain the source and destination dimensions,
8739 	 * timing, etc.
8740 	 */
8741 	fixed20_12 bpp;
8742 	fixed20_12 line_time;
8743 	fixed20_12 src_width;
8744 	fixed20_12 bandwidth;
8745 	fixed20_12 a;
8746 
8747 	a.full = dfixed_const(1000);
8748 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8749 	line_time.full = dfixed_div(line_time, a);
8750 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8751 	src_width.full = dfixed_const(wm->src_width);
8752 	bandwidth.full = dfixed_mul(src_width, bpp);
8753 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8754 	bandwidth.full = dfixed_div(bandwidth, line_time);
8755 
8756 	return dfixed_trunc(bandwidth);
8757 }
8758 
8759 /**
8760  * dce8_latency_watermark - get the latency watermark
8761  *
8762  * @wm: watermark calculation data
8763  *
8764  * Calculate the latency watermark (CIK).
8765  * Used for display watermark bandwidth calculations
8766  * Returns the latency watermark in ns
8767  */
8768 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8769 {
8770 	/* First calculate the latency in ns */
8771 	u32 mc_latency = 2000; /* 2000 ns. */
8772 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8773 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8774 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8775 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8776 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8777 		(wm->num_heads * cursor_line_pair_return_time);
8778 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8779 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8780 	u32 tmp, dmif_size = 12288;
8781 	fixed20_12 a, b, c;
8782 
8783 	if (wm->num_heads == 0)
8784 		return 0;
8785 
8786 	a.full = dfixed_const(2);
8787 	b.full = dfixed_const(1);
8788 	if ((wm->vsc.full > a.full) ||
8789 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8790 	    (wm->vtaps >= 5) ||
8791 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8792 		max_src_lines_per_dst_line = 4;
8793 	else
8794 		max_src_lines_per_dst_line = 2;
8795 
8796 	a.full = dfixed_const(available_bandwidth);
8797 	b.full = dfixed_const(wm->num_heads);
8798 	a.full = dfixed_div(a, b);
8799 
8800 	b.full = dfixed_const(mc_latency + 512);
8801 	c.full = dfixed_const(wm->disp_clk);
8802 	b.full = dfixed_div(b, c);
8803 
8804 	c.full = dfixed_const(dmif_size);
8805 	b.full = dfixed_div(c, b);
8806 
8807 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8808 
8809 	b.full = dfixed_const(1000);
8810 	c.full = dfixed_const(wm->disp_clk);
8811 	b.full = dfixed_div(c, b);
8812 	c.full = dfixed_const(wm->bytes_per_pixel);
8813 	b.full = dfixed_mul(b, c);
8814 
8815 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8816 
8817 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8818 	b.full = dfixed_const(1000);
8819 	c.full = dfixed_const(lb_fill_bw);
8820 	b.full = dfixed_div(c, b);
8821 	a.full = dfixed_div(a, b);
8822 	line_fill_time = dfixed_trunc(a);
8823 
8824 	if (line_fill_time < wm->active_time)
8825 		return latency;
8826 	else
8827 		return latency + (line_fill_time - wm->active_time);
8828 
8829 }
8830 
8831 /**
8832  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8833  * average and available dram bandwidth
8834  *
8835  * @wm: watermark calculation data
8836  *
8837  * Check if the display average bandwidth fits in the display
8838  * dram bandwidth (CIK).
8839  * Used for display watermark bandwidth calculations
8840  * Returns true if the display fits, false if not.
8841  */
8842 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8843 {
8844 	if (dce8_average_bandwidth(wm) <=
8845 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8846 		return true;
8847 	else
8848 		return false;
8849 }
8850 
8851 /**
8852  * dce8_average_bandwidth_vs_available_bandwidth - check
8853  * average and available bandwidth
8854  *
8855  * @wm: watermark calculation data
8856  *
8857  * Check if the display average bandwidth fits in the display
8858  * available bandwidth (CIK).
8859  * Used for display watermark bandwidth calculations
8860  * Returns true if the display fits, false if not.
8861  */
8862 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8863 {
8864 	if (dce8_average_bandwidth(wm) <=
8865 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8866 		return true;
8867 	else
8868 		return false;
8869 }
8870 
8871 /**
8872  * dce8_check_latency_hiding - check latency hiding
8873  *
8874  * @wm: watermark calculation data
8875  *
8876  * Check latency hiding (CIK).
8877  * Used for display watermark bandwidth calculations
8878  * Returns true if the display fits, false if not.
8879  */
8880 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8881 {
8882 	u32 lb_partitions = wm->lb_size / wm->src_width;
8883 	u32 line_time = wm->active_time + wm->blank_time;
8884 	u32 latency_tolerant_lines;
8885 	u32 latency_hiding;
8886 	fixed20_12 a;
8887 
8888 	a.full = dfixed_const(1);
8889 	if (wm->vsc.full > a.full)
8890 		latency_tolerant_lines = 1;
8891 	else {
8892 		if (lb_partitions <= (wm->vtaps + 1))
8893 			latency_tolerant_lines = 1;
8894 		else
8895 			latency_tolerant_lines = 2;
8896 	}
8897 
8898 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8899 
8900 	if (dce8_latency_watermark(wm) <= latency_hiding)
8901 		return true;
8902 	else
8903 		return false;
8904 }
8905 
8906 /**
8907  * dce8_program_watermarks - program display watermarks
8908  *
8909  * @rdev: radeon_device pointer
8910  * @radeon_crtc: the selected display controller
8911  * @lb_size: line buffer size
8912  * @num_heads: number of display controllers in use
8913  *
8914  * Calculate and program the display watermarks for the
8915  * selected display controller (CIK).
8916  */
8917 static void dce8_program_watermarks(struct radeon_device *rdev,
8918 				    struct radeon_crtc *radeon_crtc,
8919 				    u32 lb_size, u32 num_heads)
8920 {
8921 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8922 	struct dce8_wm_params wm_low, wm_high;
8923 	u32 pixel_period;
8924 	u32 line_time = 0;
8925 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8926 	u32 tmp, wm_mask;
8927 
8928 	if (radeon_crtc->base.enabled && num_heads && mode) {
8929 		pixel_period = 1000000 / (u32)mode->clock;
8930 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8931 
8932 		/* watermark for high clocks */
8933 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8934 		    rdev->pm.dpm_enabled) {
8935 			wm_high.yclk =
8936 				radeon_dpm_get_mclk(rdev, false) * 10;
8937 			wm_high.sclk =
8938 				radeon_dpm_get_sclk(rdev, false) * 10;
8939 		} else {
8940 			wm_high.yclk = rdev->pm.current_mclk * 10;
8941 			wm_high.sclk = rdev->pm.current_sclk * 10;
8942 		}
8943 
8944 		wm_high.disp_clk = mode->clock;
8945 		wm_high.src_width = mode->crtc_hdisplay;
8946 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8947 		wm_high.blank_time = line_time - wm_high.active_time;
8948 		wm_high.interlaced = false;
8949 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8950 			wm_high.interlaced = true;
8951 		wm_high.vsc = radeon_crtc->vsc;
8952 		wm_high.vtaps = 1;
8953 		if (radeon_crtc->rmx_type != RMX_OFF)
8954 			wm_high.vtaps = 2;
8955 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8956 		wm_high.lb_size = lb_size;
8957 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8958 		wm_high.num_heads = num_heads;
8959 
8960 		/* set for high clocks */
8961 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8962 
8963 		/* possibly force display priority to high */
8964 		/* should really do this at mode validation time... */
8965 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8966 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8967 		    !dce8_check_latency_hiding(&wm_high) ||
8968 		    (rdev->disp_priority == 2)) {
8969 			DRM_DEBUG_KMS("force priority to high\n");
8970 		}
8971 
8972 		/* watermark for low clocks */
8973 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8974 		    rdev->pm.dpm_enabled) {
8975 			wm_low.yclk =
8976 				radeon_dpm_get_mclk(rdev, true) * 10;
8977 			wm_low.sclk =
8978 				radeon_dpm_get_sclk(rdev, true) * 10;
8979 		} else {
8980 			wm_low.yclk = rdev->pm.current_mclk * 10;
8981 			wm_low.sclk = rdev->pm.current_sclk * 10;
8982 		}
8983 
8984 		wm_low.disp_clk = mode->clock;
8985 		wm_low.src_width = mode->crtc_hdisplay;
8986 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8987 		wm_low.blank_time = line_time - wm_low.active_time;
8988 		wm_low.interlaced = false;
8989 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8990 			wm_low.interlaced = true;
8991 		wm_low.vsc = radeon_crtc->vsc;
8992 		wm_low.vtaps = 1;
8993 		if (radeon_crtc->rmx_type != RMX_OFF)
8994 			wm_low.vtaps = 2;
8995 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8996 		wm_low.lb_size = lb_size;
8997 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8998 		wm_low.num_heads = num_heads;
8999 
9000 		/* set for low clocks */
9001 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
9002 
9003 		/* possibly force display priority to high */
9004 		/* should really do this at mode validation time... */
9005 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
9006 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
9007 		    !dce8_check_latency_hiding(&wm_low) ||
9008 		    (rdev->disp_priority == 2)) {
9009 			DRM_DEBUG_KMS("force priority to high\n");
9010 		}
9011 	}
9012 
9013 	/* select wm A */
9014 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9015 	tmp = wm_mask;
9016 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9017 	tmp |= LATENCY_WATERMARK_MASK(1);
9018 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9019 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9020 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
9021 		LATENCY_HIGH_WATERMARK(line_time)));
9022 	/* select wm B */
9023 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
9024 	tmp &= ~LATENCY_WATERMARK_MASK(3);
9025 	tmp |= LATENCY_WATERMARK_MASK(2);
9026 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
9027 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
9028 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
9029 		LATENCY_HIGH_WATERMARK(line_time)));
9030 	/* restore original selection */
9031 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
9032 
9033 	/* save values for DPM */
9034 	radeon_crtc->line_time = line_time;
9035 	radeon_crtc->wm_high = latency_watermark_a;
9036 	radeon_crtc->wm_low = latency_watermark_b;
9037 }
9038 
9039 /**
9040  * dce8_bandwidth_update - program display watermarks
9041  *
9042  * @rdev: radeon_device pointer
9043  *
9044  * Calculate and program the display watermarks and line
9045  * buffer allocation (CIK).
9046  */
9047 void dce8_bandwidth_update(struct radeon_device *rdev)
9048 {
9049 	struct drm_display_mode *mode = NULL;
9050 	u32 num_heads = 0, lb_size;
9051 	int i;
9052 
9053 	radeon_update_display_priority(rdev);
9054 
9055 	for (i = 0; i < rdev->num_crtc; i++) {
9056 		if (rdev->mode_info.crtcs[i]->base.enabled)
9057 			num_heads++;
9058 	}
9059 	for (i = 0; i < rdev->num_crtc; i++) {
9060 		mode = &rdev->mode_info.crtcs[i]->base.mode;
9061 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
9062 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
9063 	}
9064 }
9065 
9066 /**
9067  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
9068  *
9069  * @rdev: radeon_device pointer
9070  *
9071  * Fetches a GPU clock counter snapshot (SI).
9072  * Returns the 64 bit clock counter snapshot.
9073  */
9074 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
9075 {
9076 	uint64_t clock;
9077 
9078 	mutex_lock(&rdev->gpu_clock_mutex);
9079 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
9080 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
9081 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
9082 	mutex_unlock(&rdev->gpu_clock_mutex);
9083 	return clock;
9084 }
9085 
9086 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
9087                               u32 cntl_reg, u32 status_reg)
9088 {
9089 	int r, i;
9090 	struct atom_clock_dividers dividers;
9091 	uint32_t tmp;
9092 
9093 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9094 					   clock, false, &dividers);
9095 	if (r)
9096 		return r;
9097 
9098 	tmp = RREG32_SMC(cntl_reg);
9099 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
9100 	tmp |= dividers.post_divider;
9101 	WREG32_SMC(cntl_reg, tmp);
9102 
9103 	for (i = 0; i < 100; i++) {
9104 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
9105 			break;
9106 		mdelay(10);
9107 	}
9108 	if (i == 100)
9109 		return -ETIMEDOUT;
9110 
9111 	return 0;
9112 }
9113 
9114 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
9115 {
9116 	int r = 0;
9117 
9118 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
9119 	if (r)
9120 		return r;
9121 
9122 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
9123 	return r;
9124 }
9125 
9126 int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
9127 {
9128 	int r, i;
9129 	struct atom_clock_dividers dividers;
9130 	u32 tmp;
9131 
9132 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
9133 					   ecclk, false, &dividers);
9134 	if (r)
9135 		return r;
9136 
9137 	for (i = 0; i < 100; i++) {
9138 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9139 			break;
9140 		mdelay(10);
9141 	}
9142 	if (i == 100)
9143 		return -ETIMEDOUT;
9144 
9145 	tmp = RREG32_SMC(CG_ECLK_CNTL);
9146 	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
9147 	tmp |= dividers.post_divider;
9148 	WREG32_SMC(CG_ECLK_CNTL, tmp);
9149 
9150 	for (i = 0; i < 100; i++) {
9151 		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
9152 			break;
9153 		mdelay(10);
9154 	}
9155 	if (i == 100)
9156 		return -ETIMEDOUT;
9157 
9158 	return 0;
9159 }
9160 
9161 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
9162 {
9163 	struct pci_dev *root = rdev->pdev->bus->self;
9164 	int bridge_pos, gpu_pos;
9165 	u32 speed_cntl, mask, current_data_rate;
9166 	int ret, i;
9167 	u16 tmp16;
9168 
9169 	if (radeon_pcie_gen2 == 0)
9170 		return;
9171 
9172 	if (rdev->flags & RADEON_IS_IGP)
9173 		return;
9174 
9175 	if (!(rdev->flags & RADEON_IS_PCIE))
9176 		return;
9177 
9178 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
9179 	if (ret != 0)
9180 		return;
9181 
9182 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
9183 		return;
9184 
9185 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9186 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
9187 		LC_CURRENT_DATA_RATE_SHIFT;
9188 	if (mask & DRM_PCIE_SPEED_80) {
9189 		if (current_data_rate == 2) {
9190 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
9191 			return;
9192 		}
9193 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
9194 	} else if (mask & DRM_PCIE_SPEED_50) {
9195 		if (current_data_rate == 1) {
9196 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
9197 			return;
9198 		}
9199 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
9200 	}
9201 
9202 	bridge_pos = pci_pcie_cap(root);
9203 	if (!bridge_pos)
9204 		return;
9205 
9206 	gpu_pos = pci_pcie_cap(rdev->pdev);
9207 	if (!gpu_pos)
9208 		return;
9209 
9210 	if (mask & DRM_PCIE_SPEED_80) {
9211 		/* re-try equalization if gen3 is not already enabled */
9212 		if (current_data_rate != 2) {
9213 			u16 bridge_cfg, gpu_cfg;
9214 			u16 bridge_cfg2, gpu_cfg2;
9215 			u32 max_lw, current_lw, tmp;
9216 
9217 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9218 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9219 
9220 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
9221 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9222 
9223 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
9224 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9225 
9226 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9227 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
9228 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
9229 
9230 			if (current_lw < max_lw) {
9231 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9232 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
9233 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
9234 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
9235 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
9236 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
9237 				}
9238 			}
9239 
9240 			for (i = 0; i < 10; i++) {
9241 				/* check status */
9242 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
9243 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
9244 					break;
9245 
9246 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
9247 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
9248 
9249 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
9250 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
9251 
9252 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9253 				tmp |= LC_SET_QUIESCE;
9254 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9255 
9256 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9257 				tmp |= LC_REDO_EQ;
9258 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9259 
9260 				mdelay(100);
9261 
9262 				/* linkctl */
9263 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
9264 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9265 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
9266 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
9267 
9268 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
9269 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
9270 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
9271 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
9272 
9273 				/* linkctl2 */
9274 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
9275 				tmp16 &= ~((1 << 4) | (7 << 9));
9276 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
9277 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
9278 
9279 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9280 				tmp16 &= ~((1 << 4) | (7 << 9));
9281 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
9282 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9283 
9284 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
9285 				tmp &= ~LC_SET_QUIESCE;
9286 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
9287 			}
9288 		}
9289 	}
9290 
9291 	/* set the link speed */
9292 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9293 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9294 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9295 
9296 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9297 	tmp16 &= ~0xf;
9298 	if (mask & DRM_PCIE_SPEED_80)
9299 		tmp16 |= 3; /* gen3 */
9300 	else if (mask & DRM_PCIE_SPEED_50)
9301 		tmp16 |= 2; /* gen2 */
9302 	else
9303 		tmp16 |= 1; /* gen1 */
9304 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9305 
9306 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9307 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9308 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9309 
9310 	for (i = 0; i < rdev->usec_timeout; i++) {
9311 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9312 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9313 			break;
9314 		udelay(1);
9315 	}
9316 }
9317 
9318 static void cik_program_aspm(struct radeon_device *rdev)
9319 {
9320 	u32 data, orig;
9321 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9322 	bool disable_clkreq = false;
9323 
9324 	if (radeon_aspm == 0)
9325 		return;
9326 
9327 	/* XXX double check IGPs */
9328 	if (rdev->flags & RADEON_IS_IGP)
9329 		return;
9330 
9331 	if (!(rdev->flags & RADEON_IS_PCIE))
9332 		return;
9333 
9334 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9335 	data &= ~LC_XMIT_N_FTS_MASK;
9336 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9337 	if (orig != data)
9338 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9339 
9340 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9341 	data |= LC_GO_TO_RECOVERY;
9342 	if (orig != data)
9343 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9344 
9345 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9346 	data |= P_IGNORE_EDB_ERR;
9347 	if (orig != data)
9348 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9349 
9350 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9351 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9352 	data |= LC_PMI_TO_L1_DIS;
9353 	if (!disable_l0s)
9354 		data |= LC_L0S_INACTIVITY(7);
9355 
9356 	if (!disable_l1) {
9357 		data |= LC_L1_INACTIVITY(7);
9358 		data &= ~LC_PMI_TO_L1_DIS;
9359 		if (orig != data)
9360 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9361 
9362 		if (!disable_plloff_in_l1) {
9363 			bool clk_req_support;
9364 
9365 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9366 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9367 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9368 			if (orig != data)
9369 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9370 
9371 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9372 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9373 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9374 			if (orig != data)
9375 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9376 
9377 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9378 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9379 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9380 			if (orig != data)
9381 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9382 
9383 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9384 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9385 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9386 			if (orig != data)
9387 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9388 
9389 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9390 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9391 			data |= LC_DYN_LANES_PWR_STATE(3);
9392 			if (orig != data)
9393 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9394 
9395 			if (!disable_clkreq) {
9396 				struct pci_dev *root = rdev->pdev->bus->self;
9397 				u32 lnkcap;
9398 
9399 				clk_req_support = false;
9400 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9401 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9402 					clk_req_support = true;
9403 			} else {
9404 				clk_req_support = false;
9405 			}
9406 
9407 			if (clk_req_support) {
9408 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9409 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9410 				if (orig != data)
9411 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9412 
9413 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9414 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9415 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9416 				if (orig != data)
9417 					WREG32_SMC(THM_CLK_CNTL, data);
9418 
9419 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9420 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9421 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9422 				if (orig != data)
9423 					WREG32_SMC(MISC_CLK_CTRL, data);
9424 
9425 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9426 				data &= ~BCLK_AS_XCLK;
9427 				if (orig != data)
9428 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9429 
9430 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9431 				data &= ~FORCE_BIF_REFCLK_EN;
9432 				if (orig != data)
9433 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9434 
9435 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9436 				data &= ~MPLL_CLKOUT_SEL_MASK;
9437 				data |= MPLL_CLKOUT_SEL(4);
9438 				if (orig != data)
9439 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9440 			}
9441 		}
9442 	} else {
9443 		if (orig != data)
9444 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9445 	}
9446 
9447 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9448 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9449 	if (orig != data)
9450 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9451 
9452 	if (!disable_l0s) {
9453 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9454 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9455 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9456 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9457 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9458 				data &= ~LC_L0S_INACTIVITY_MASK;
9459 				if (orig != data)
9460 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9461 			}
9462 		}
9463 	}
9464 }
9465