xref: /linux/drivers/gpu/drm/radeon/cik.c (revision d91517839e5d95adc0cf4b28caa7af62a71de526)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include "drmP.h"
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "cikd.h"
31 #include "atom.h"
32 #include "cik_blit_shaders.h"
33 #include "radeon_ucode.h"
34 #include "clearstate_ci.h"
35 
36 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
37 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
38 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
39 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
40 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
41 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
42 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
43 MODULE_FIRMWARE("radeon/BONAIRE_smc.bin");
44 MODULE_FIRMWARE("radeon/HAWAII_pfp.bin");
45 MODULE_FIRMWARE("radeon/HAWAII_me.bin");
46 MODULE_FIRMWARE("radeon/HAWAII_ce.bin");
47 MODULE_FIRMWARE("radeon/HAWAII_mec.bin");
48 MODULE_FIRMWARE("radeon/HAWAII_mc.bin");
49 MODULE_FIRMWARE("radeon/HAWAII_rlc.bin");
50 MODULE_FIRMWARE("radeon/HAWAII_sdma.bin");
51 MODULE_FIRMWARE("radeon/HAWAII_smc.bin");
52 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
53 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
54 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
55 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
56 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
57 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
58 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KABINI_me.bin");
60 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
61 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
62 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
64 
65 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
66 extern void r600_ih_ring_fini(struct radeon_device *rdev);
67 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
68 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
69 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
70 extern void sumo_rlc_fini(struct radeon_device *rdev);
71 extern int sumo_rlc_init(struct radeon_device *rdev);
72 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
73 extern void si_rlc_reset(struct radeon_device *rdev);
74 extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
75 extern int cik_sdma_resume(struct radeon_device *rdev);
76 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
77 extern void cik_sdma_fini(struct radeon_device *rdev);
78 static void cik_rlc_stop(struct radeon_device *rdev);
79 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
80 static void cik_program_aspm(struct radeon_device *rdev);
81 static void cik_init_pg(struct radeon_device *rdev);
82 static void cik_init_cg(struct radeon_device *rdev);
83 static void cik_fini_pg(struct radeon_device *rdev);
84 static void cik_fini_cg(struct radeon_device *rdev);
85 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
86 					  bool enable);
87 
88 /* get temperature in millidegrees */
89 int ci_get_temp(struct radeon_device *rdev)
90 {
91 	u32 temp;
92 	int actual_temp = 0;
93 
94 	temp = (RREG32_SMC(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
95 		CTF_TEMP_SHIFT;
96 
97 	if (temp & 0x200)
98 		actual_temp = 255;
99 	else
100 		actual_temp = temp & 0x1ff;
101 
102 	actual_temp = actual_temp * 1000;
103 
104 	return actual_temp;
105 }
106 
107 /* get temperature in millidegrees */
108 int kv_get_temp(struct radeon_device *rdev)
109 {
110 	u32 temp;
111 	int actual_temp = 0;
112 
113 	temp = RREG32_SMC(0xC0300E0C);
114 
115 	if (temp)
116 		actual_temp = (temp / 8) - 49;
117 	else
118 		actual_temp = 0;
119 
120 	actual_temp = actual_temp * 1000;
121 
122 	return actual_temp;
123 }
124 
125 /*
126  * Indirect registers accessor
127  */
128 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
129 {
130 	unsigned long flags;
131 	u32 r;
132 
133 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
134 	WREG32(PCIE_INDEX, reg);
135 	(void)RREG32(PCIE_INDEX);
136 	r = RREG32(PCIE_DATA);
137 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
138 	return r;
139 }
140 
141 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
142 {
143 	unsigned long flags;
144 
145 	spin_lock_irqsave(&rdev->pciep_idx_lock, flags);
146 	WREG32(PCIE_INDEX, reg);
147 	(void)RREG32(PCIE_INDEX);
148 	WREG32(PCIE_DATA, v);
149 	(void)RREG32(PCIE_DATA);
150 	spin_unlock_irqrestore(&rdev->pciep_idx_lock, flags);
151 }
152 
153 static const u32 spectre_rlc_save_restore_register_list[] =
154 {
155 	(0x0e00 << 16) | (0xc12c >> 2),
156 	0x00000000,
157 	(0x0e00 << 16) | (0xc140 >> 2),
158 	0x00000000,
159 	(0x0e00 << 16) | (0xc150 >> 2),
160 	0x00000000,
161 	(0x0e00 << 16) | (0xc15c >> 2),
162 	0x00000000,
163 	(0x0e00 << 16) | (0xc168 >> 2),
164 	0x00000000,
165 	(0x0e00 << 16) | (0xc170 >> 2),
166 	0x00000000,
167 	(0x0e00 << 16) | (0xc178 >> 2),
168 	0x00000000,
169 	(0x0e00 << 16) | (0xc204 >> 2),
170 	0x00000000,
171 	(0x0e00 << 16) | (0xc2b4 >> 2),
172 	0x00000000,
173 	(0x0e00 << 16) | (0xc2b8 >> 2),
174 	0x00000000,
175 	(0x0e00 << 16) | (0xc2bc >> 2),
176 	0x00000000,
177 	(0x0e00 << 16) | (0xc2c0 >> 2),
178 	0x00000000,
179 	(0x0e00 << 16) | (0x8228 >> 2),
180 	0x00000000,
181 	(0x0e00 << 16) | (0x829c >> 2),
182 	0x00000000,
183 	(0x0e00 << 16) | (0x869c >> 2),
184 	0x00000000,
185 	(0x0600 << 16) | (0x98f4 >> 2),
186 	0x00000000,
187 	(0x0e00 << 16) | (0x98f8 >> 2),
188 	0x00000000,
189 	(0x0e00 << 16) | (0x9900 >> 2),
190 	0x00000000,
191 	(0x0e00 << 16) | (0xc260 >> 2),
192 	0x00000000,
193 	(0x0e00 << 16) | (0x90e8 >> 2),
194 	0x00000000,
195 	(0x0e00 << 16) | (0x3c000 >> 2),
196 	0x00000000,
197 	(0x0e00 << 16) | (0x3c00c >> 2),
198 	0x00000000,
199 	(0x0e00 << 16) | (0x8c1c >> 2),
200 	0x00000000,
201 	(0x0e00 << 16) | (0x9700 >> 2),
202 	0x00000000,
203 	(0x0e00 << 16) | (0xcd20 >> 2),
204 	0x00000000,
205 	(0x4e00 << 16) | (0xcd20 >> 2),
206 	0x00000000,
207 	(0x5e00 << 16) | (0xcd20 >> 2),
208 	0x00000000,
209 	(0x6e00 << 16) | (0xcd20 >> 2),
210 	0x00000000,
211 	(0x7e00 << 16) | (0xcd20 >> 2),
212 	0x00000000,
213 	(0x8e00 << 16) | (0xcd20 >> 2),
214 	0x00000000,
215 	(0x9e00 << 16) | (0xcd20 >> 2),
216 	0x00000000,
217 	(0xae00 << 16) | (0xcd20 >> 2),
218 	0x00000000,
219 	(0xbe00 << 16) | (0xcd20 >> 2),
220 	0x00000000,
221 	(0x0e00 << 16) | (0x89bc >> 2),
222 	0x00000000,
223 	(0x0e00 << 16) | (0x8900 >> 2),
224 	0x00000000,
225 	0x3,
226 	(0x0e00 << 16) | (0xc130 >> 2),
227 	0x00000000,
228 	(0x0e00 << 16) | (0xc134 >> 2),
229 	0x00000000,
230 	(0x0e00 << 16) | (0xc1fc >> 2),
231 	0x00000000,
232 	(0x0e00 << 16) | (0xc208 >> 2),
233 	0x00000000,
234 	(0x0e00 << 16) | (0xc264 >> 2),
235 	0x00000000,
236 	(0x0e00 << 16) | (0xc268 >> 2),
237 	0x00000000,
238 	(0x0e00 << 16) | (0xc26c >> 2),
239 	0x00000000,
240 	(0x0e00 << 16) | (0xc270 >> 2),
241 	0x00000000,
242 	(0x0e00 << 16) | (0xc274 >> 2),
243 	0x00000000,
244 	(0x0e00 << 16) | (0xc278 >> 2),
245 	0x00000000,
246 	(0x0e00 << 16) | (0xc27c >> 2),
247 	0x00000000,
248 	(0x0e00 << 16) | (0xc280 >> 2),
249 	0x00000000,
250 	(0x0e00 << 16) | (0xc284 >> 2),
251 	0x00000000,
252 	(0x0e00 << 16) | (0xc288 >> 2),
253 	0x00000000,
254 	(0x0e00 << 16) | (0xc28c >> 2),
255 	0x00000000,
256 	(0x0e00 << 16) | (0xc290 >> 2),
257 	0x00000000,
258 	(0x0e00 << 16) | (0xc294 >> 2),
259 	0x00000000,
260 	(0x0e00 << 16) | (0xc298 >> 2),
261 	0x00000000,
262 	(0x0e00 << 16) | (0xc29c >> 2),
263 	0x00000000,
264 	(0x0e00 << 16) | (0xc2a0 >> 2),
265 	0x00000000,
266 	(0x0e00 << 16) | (0xc2a4 >> 2),
267 	0x00000000,
268 	(0x0e00 << 16) | (0xc2a8 >> 2),
269 	0x00000000,
270 	(0x0e00 << 16) | (0xc2ac  >> 2),
271 	0x00000000,
272 	(0x0e00 << 16) | (0xc2b0 >> 2),
273 	0x00000000,
274 	(0x0e00 << 16) | (0x301d0 >> 2),
275 	0x00000000,
276 	(0x0e00 << 16) | (0x30238 >> 2),
277 	0x00000000,
278 	(0x0e00 << 16) | (0x30250 >> 2),
279 	0x00000000,
280 	(0x0e00 << 16) | (0x30254 >> 2),
281 	0x00000000,
282 	(0x0e00 << 16) | (0x30258 >> 2),
283 	0x00000000,
284 	(0x0e00 << 16) | (0x3025c >> 2),
285 	0x00000000,
286 	(0x4e00 << 16) | (0xc900 >> 2),
287 	0x00000000,
288 	(0x5e00 << 16) | (0xc900 >> 2),
289 	0x00000000,
290 	(0x6e00 << 16) | (0xc900 >> 2),
291 	0x00000000,
292 	(0x7e00 << 16) | (0xc900 >> 2),
293 	0x00000000,
294 	(0x8e00 << 16) | (0xc900 >> 2),
295 	0x00000000,
296 	(0x9e00 << 16) | (0xc900 >> 2),
297 	0x00000000,
298 	(0xae00 << 16) | (0xc900 >> 2),
299 	0x00000000,
300 	(0xbe00 << 16) | (0xc900 >> 2),
301 	0x00000000,
302 	(0x4e00 << 16) | (0xc904 >> 2),
303 	0x00000000,
304 	(0x5e00 << 16) | (0xc904 >> 2),
305 	0x00000000,
306 	(0x6e00 << 16) | (0xc904 >> 2),
307 	0x00000000,
308 	(0x7e00 << 16) | (0xc904 >> 2),
309 	0x00000000,
310 	(0x8e00 << 16) | (0xc904 >> 2),
311 	0x00000000,
312 	(0x9e00 << 16) | (0xc904 >> 2),
313 	0x00000000,
314 	(0xae00 << 16) | (0xc904 >> 2),
315 	0x00000000,
316 	(0xbe00 << 16) | (0xc904 >> 2),
317 	0x00000000,
318 	(0x4e00 << 16) | (0xc908 >> 2),
319 	0x00000000,
320 	(0x5e00 << 16) | (0xc908 >> 2),
321 	0x00000000,
322 	(0x6e00 << 16) | (0xc908 >> 2),
323 	0x00000000,
324 	(0x7e00 << 16) | (0xc908 >> 2),
325 	0x00000000,
326 	(0x8e00 << 16) | (0xc908 >> 2),
327 	0x00000000,
328 	(0x9e00 << 16) | (0xc908 >> 2),
329 	0x00000000,
330 	(0xae00 << 16) | (0xc908 >> 2),
331 	0x00000000,
332 	(0xbe00 << 16) | (0xc908 >> 2),
333 	0x00000000,
334 	(0x4e00 << 16) | (0xc90c >> 2),
335 	0x00000000,
336 	(0x5e00 << 16) | (0xc90c >> 2),
337 	0x00000000,
338 	(0x6e00 << 16) | (0xc90c >> 2),
339 	0x00000000,
340 	(0x7e00 << 16) | (0xc90c >> 2),
341 	0x00000000,
342 	(0x8e00 << 16) | (0xc90c >> 2),
343 	0x00000000,
344 	(0x9e00 << 16) | (0xc90c >> 2),
345 	0x00000000,
346 	(0xae00 << 16) | (0xc90c >> 2),
347 	0x00000000,
348 	(0xbe00 << 16) | (0xc90c >> 2),
349 	0x00000000,
350 	(0x4e00 << 16) | (0xc910 >> 2),
351 	0x00000000,
352 	(0x5e00 << 16) | (0xc910 >> 2),
353 	0x00000000,
354 	(0x6e00 << 16) | (0xc910 >> 2),
355 	0x00000000,
356 	(0x7e00 << 16) | (0xc910 >> 2),
357 	0x00000000,
358 	(0x8e00 << 16) | (0xc910 >> 2),
359 	0x00000000,
360 	(0x9e00 << 16) | (0xc910 >> 2),
361 	0x00000000,
362 	(0xae00 << 16) | (0xc910 >> 2),
363 	0x00000000,
364 	(0xbe00 << 16) | (0xc910 >> 2),
365 	0x00000000,
366 	(0x0e00 << 16) | (0xc99c >> 2),
367 	0x00000000,
368 	(0x0e00 << 16) | (0x9834 >> 2),
369 	0x00000000,
370 	(0x0000 << 16) | (0x30f00 >> 2),
371 	0x00000000,
372 	(0x0001 << 16) | (0x30f00 >> 2),
373 	0x00000000,
374 	(0x0000 << 16) | (0x30f04 >> 2),
375 	0x00000000,
376 	(0x0001 << 16) | (0x30f04 >> 2),
377 	0x00000000,
378 	(0x0000 << 16) | (0x30f08 >> 2),
379 	0x00000000,
380 	(0x0001 << 16) | (0x30f08 >> 2),
381 	0x00000000,
382 	(0x0000 << 16) | (0x30f0c >> 2),
383 	0x00000000,
384 	(0x0001 << 16) | (0x30f0c >> 2),
385 	0x00000000,
386 	(0x0600 << 16) | (0x9b7c >> 2),
387 	0x00000000,
388 	(0x0e00 << 16) | (0x8a14 >> 2),
389 	0x00000000,
390 	(0x0e00 << 16) | (0x8a18 >> 2),
391 	0x00000000,
392 	(0x0600 << 16) | (0x30a00 >> 2),
393 	0x00000000,
394 	(0x0e00 << 16) | (0x8bf0 >> 2),
395 	0x00000000,
396 	(0x0e00 << 16) | (0x8bcc >> 2),
397 	0x00000000,
398 	(0x0e00 << 16) | (0x8b24 >> 2),
399 	0x00000000,
400 	(0x0e00 << 16) | (0x30a04 >> 2),
401 	0x00000000,
402 	(0x0600 << 16) | (0x30a10 >> 2),
403 	0x00000000,
404 	(0x0600 << 16) | (0x30a14 >> 2),
405 	0x00000000,
406 	(0x0600 << 16) | (0x30a18 >> 2),
407 	0x00000000,
408 	(0x0600 << 16) | (0x30a2c >> 2),
409 	0x00000000,
410 	(0x0e00 << 16) | (0xc700 >> 2),
411 	0x00000000,
412 	(0x0e00 << 16) | (0xc704 >> 2),
413 	0x00000000,
414 	(0x0e00 << 16) | (0xc708 >> 2),
415 	0x00000000,
416 	(0x0e00 << 16) | (0xc768 >> 2),
417 	0x00000000,
418 	(0x0400 << 16) | (0xc770 >> 2),
419 	0x00000000,
420 	(0x0400 << 16) | (0xc774 >> 2),
421 	0x00000000,
422 	(0x0400 << 16) | (0xc778 >> 2),
423 	0x00000000,
424 	(0x0400 << 16) | (0xc77c >> 2),
425 	0x00000000,
426 	(0x0400 << 16) | (0xc780 >> 2),
427 	0x00000000,
428 	(0x0400 << 16) | (0xc784 >> 2),
429 	0x00000000,
430 	(0x0400 << 16) | (0xc788 >> 2),
431 	0x00000000,
432 	(0x0400 << 16) | (0xc78c >> 2),
433 	0x00000000,
434 	(0x0400 << 16) | (0xc798 >> 2),
435 	0x00000000,
436 	(0x0400 << 16) | (0xc79c >> 2),
437 	0x00000000,
438 	(0x0400 << 16) | (0xc7a0 >> 2),
439 	0x00000000,
440 	(0x0400 << 16) | (0xc7a4 >> 2),
441 	0x00000000,
442 	(0x0400 << 16) | (0xc7a8 >> 2),
443 	0x00000000,
444 	(0x0400 << 16) | (0xc7ac >> 2),
445 	0x00000000,
446 	(0x0400 << 16) | (0xc7b0 >> 2),
447 	0x00000000,
448 	(0x0400 << 16) | (0xc7b4 >> 2),
449 	0x00000000,
450 	(0x0e00 << 16) | (0x9100 >> 2),
451 	0x00000000,
452 	(0x0e00 << 16) | (0x3c010 >> 2),
453 	0x00000000,
454 	(0x0e00 << 16) | (0x92a8 >> 2),
455 	0x00000000,
456 	(0x0e00 << 16) | (0x92ac >> 2),
457 	0x00000000,
458 	(0x0e00 << 16) | (0x92b4 >> 2),
459 	0x00000000,
460 	(0x0e00 << 16) | (0x92b8 >> 2),
461 	0x00000000,
462 	(0x0e00 << 16) | (0x92bc >> 2),
463 	0x00000000,
464 	(0x0e00 << 16) | (0x92c0 >> 2),
465 	0x00000000,
466 	(0x0e00 << 16) | (0x92c4 >> 2),
467 	0x00000000,
468 	(0x0e00 << 16) | (0x92c8 >> 2),
469 	0x00000000,
470 	(0x0e00 << 16) | (0x92cc >> 2),
471 	0x00000000,
472 	(0x0e00 << 16) | (0x92d0 >> 2),
473 	0x00000000,
474 	(0x0e00 << 16) | (0x8c00 >> 2),
475 	0x00000000,
476 	(0x0e00 << 16) | (0x8c04 >> 2),
477 	0x00000000,
478 	(0x0e00 << 16) | (0x8c20 >> 2),
479 	0x00000000,
480 	(0x0e00 << 16) | (0x8c38 >> 2),
481 	0x00000000,
482 	(0x0e00 << 16) | (0x8c3c >> 2),
483 	0x00000000,
484 	(0x0e00 << 16) | (0xae00 >> 2),
485 	0x00000000,
486 	(0x0e00 << 16) | (0x9604 >> 2),
487 	0x00000000,
488 	(0x0e00 << 16) | (0xac08 >> 2),
489 	0x00000000,
490 	(0x0e00 << 16) | (0xac0c >> 2),
491 	0x00000000,
492 	(0x0e00 << 16) | (0xac10 >> 2),
493 	0x00000000,
494 	(0x0e00 << 16) | (0xac14 >> 2),
495 	0x00000000,
496 	(0x0e00 << 16) | (0xac58 >> 2),
497 	0x00000000,
498 	(0x0e00 << 16) | (0xac68 >> 2),
499 	0x00000000,
500 	(0x0e00 << 16) | (0xac6c >> 2),
501 	0x00000000,
502 	(0x0e00 << 16) | (0xac70 >> 2),
503 	0x00000000,
504 	(0x0e00 << 16) | (0xac74 >> 2),
505 	0x00000000,
506 	(0x0e00 << 16) | (0xac78 >> 2),
507 	0x00000000,
508 	(0x0e00 << 16) | (0xac7c >> 2),
509 	0x00000000,
510 	(0x0e00 << 16) | (0xac80 >> 2),
511 	0x00000000,
512 	(0x0e00 << 16) | (0xac84 >> 2),
513 	0x00000000,
514 	(0x0e00 << 16) | (0xac88 >> 2),
515 	0x00000000,
516 	(0x0e00 << 16) | (0xac8c >> 2),
517 	0x00000000,
518 	(0x0e00 << 16) | (0x970c >> 2),
519 	0x00000000,
520 	(0x0e00 << 16) | (0x9714 >> 2),
521 	0x00000000,
522 	(0x0e00 << 16) | (0x9718 >> 2),
523 	0x00000000,
524 	(0x0e00 << 16) | (0x971c >> 2),
525 	0x00000000,
526 	(0x0e00 << 16) | (0x31068 >> 2),
527 	0x00000000,
528 	(0x4e00 << 16) | (0x31068 >> 2),
529 	0x00000000,
530 	(0x5e00 << 16) | (0x31068 >> 2),
531 	0x00000000,
532 	(0x6e00 << 16) | (0x31068 >> 2),
533 	0x00000000,
534 	(0x7e00 << 16) | (0x31068 >> 2),
535 	0x00000000,
536 	(0x8e00 << 16) | (0x31068 >> 2),
537 	0x00000000,
538 	(0x9e00 << 16) | (0x31068 >> 2),
539 	0x00000000,
540 	(0xae00 << 16) | (0x31068 >> 2),
541 	0x00000000,
542 	(0xbe00 << 16) | (0x31068 >> 2),
543 	0x00000000,
544 	(0x0e00 << 16) | (0xcd10 >> 2),
545 	0x00000000,
546 	(0x0e00 << 16) | (0xcd14 >> 2),
547 	0x00000000,
548 	(0x0e00 << 16) | (0x88b0 >> 2),
549 	0x00000000,
550 	(0x0e00 << 16) | (0x88b4 >> 2),
551 	0x00000000,
552 	(0x0e00 << 16) | (0x88b8 >> 2),
553 	0x00000000,
554 	(0x0e00 << 16) | (0x88bc >> 2),
555 	0x00000000,
556 	(0x0400 << 16) | (0x89c0 >> 2),
557 	0x00000000,
558 	(0x0e00 << 16) | (0x88c4 >> 2),
559 	0x00000000,
560 	(0x0e00 << 16) | (0x88c8 >> 2),
561 	0x00000000,
562 	(0x0e00 << 16) | (0x88d0 >> 2),
563 	0x00000000,
564 	(0x0e00 << 16) | (0x88d4 >> 2),
565 	0x00000000,
566 	(0x0e00 << 16) | (0x88d8 >> 2),
567 	0x00000000,
568 	(0x0e00 << 16) | (0x8980 >> 2),
569 	0x00000000,
570 	(0x0e00 << 16) | (0x30938 >> 2),
571 	0x00000000,
572 	(0x0e00 << 16) | (0x3093c >> 2),
573 	0x00000000,
574 	(0x0e00 << 16) | (0x30940 >> 2),
575 	0x00000000,
576 	(0x0e00 << 16) | (0x89a0 >> 2),
577 	0x00000000,
578 	(0x0e00 << 16) | (0x30900 >> 2),
579 	0x00000000,
580 	(0x0e00 << 16) | (0x30904 >> 2),
581 	0x00000000,
582 	(0x0e00 << 16) | (0x89b4 >> 2),
583 	0x00000000,
584 	(0x0e00 << 16) | (0x3c210 >> 2),
585 	0x00000000,
586 	(0x0e00 << 16) | (0x3c214 >> 2),
587 	0x00000000,
588 	(0x0e00 << 16) | (0x3c218 >> 2),
589 	0x00000000,
590 	(0x0e00 << 16) | (0x8904 >> 2),
591 	0x00000000,
592 	0x5,
593 	(0x0e00 << 16) | (0x8c28 >> 2),
594 	(0x0e00 << 16) | (0x8c2c >> 2),
595 	(0x0e00 << 16) | (0x8c30 >> 2),
596 	(0x0e00 << 16) | (0x8c34 >> 2),
597 	(0x0e00 << 16) | (0x9600 >> 2),
598 };
599 
600 static const u32 kalindi_rlc_save_restore_register_list[] =
601 {
602 	(0x0e00 << 16) | (0xc12c >> 2),
603 	0x00000000,
604 	(0x0e00 << 16) | (0xc140 >> 2),
605 	0x00000000,
606 	(0x0e00 << 16) | (0xc150 >> 2),
607 	0x00000000,
608 	(0x0e00 << 16) | (0xc15c >> 2),
609 	0x00000000,
610 	(0x0e00 << 16) | (0xc168 >> 2),
611 	0x00000000,
612 	(0x0e00 << 16) | (0xc170 >> 2),
613 	0x00000000,
614 	(0x0e00 << 16) | (0xc204 >> 2),
615 	0x00000000,
616 	(0x0e00 << 16) | (0xc2b4 >> 2),
617 	0x00000000,
618 	(0x0e00 << 16) | (0xc2b8 >> 2),
619 	0x00000000,
620 	(0x0e00 << 16) | (0xc2bc >> 2),
621 	0x00000000,
622 	(0x0e00 << 16) | (0xc2c0 >> 2),
623 	0x00000000,
624 	(0x0e00 << 16) | (0x8228 >> 2),
625 	0x00000000,
626 	(0x0e00 << 16) | (0x829c >> 2),
627 	0x00000000,
628 	(0x0e00 << 16) | (0x869c >> 2),
629 	0x00000000,
630 	(0x0600 << 16) | (0x98f4 >> 2),
631 	0x00000000,
632 	(0x0e00 << 16) | (0x98f8 >> 2),
633 	0x00000000,
634 	(0x0e00 << 16) | (0x9900 >> 2),
635 	0x00000000,
636 	(0x0e00 << 16) | (0xc260 >> 2),
637 	0x00000000,
638 	(0x0e00 << 16) | (0x90e8 >> 2),
639 	0x00000000,
640 	(0x0e00 << 16) | (0x3c000 >> 2),
641 	0x00000000,
642 	(0x0e00 << 16) | (0x3c00c >> 2),
643 	0x00000000,
644 	(0x0e00 << 16) | (0x8c1c >> 2),
645 	0x00000000,
646 	(0x0e00 << 16) | (0x9700 >> 2),
647 	0x00000000,
648 	(0x0e00 << 16) | (0xcd20 >> 2),
649 	0x00000000,
650 	(0x4e00 << 16) | (0xcd20 >> 2),
651 	0x00000000,
652 	(0x5e00 << 16) | (0xcd20 >> 2),
653 	0x00000000,
654 	(0x6e00 << 16) | (0xcd20 >> 2),
655 	0x00000000,
656 	(0x7e00 << 16) | (0xcd20 >> 2),
657 	0x00000000,
658 	(0x0e00 << 16) | (0x89bc >> 2),
659 	0x00000000,
660 	(0x0e00 << 16) | (0x8900 >> 2),
661 	0x00000000,
662 	0x3,
663 	(0x0e00 << 16) | (0xc130 >> 2),
664 	0x00000000,
665 	(0x0e00 << 16) | (0xc134 >> 2),
666 	0x00000000,
667 	(0x0e00 << 16) | (0xc1fc >> 2),
668 	0x00000000,
669 	(0x0e00 << 16) | (0xc208 >> 2),
670 	0x00000000,
671 	(0x0e00 << 16) | (0xc264 >> 2),
672 	0x00000000,
673 	(0x0e00 << 16) | (0xc268 >> 2),
674 	0x00000000,
675 	(0x0e00 << 16) | (0xc26c >> 2),
676 	0x00000000,
677 	(0x0e00 << 16) | (0xc270 >> 2),
678 	0x00000000,
679 	(0x0e00 << 16) | (0xc274 >> 2),
680 	0x00000000,
681 	(0x0e00 << 16) | (0xc28c >> 2),
682 	0x00000000,
683 	(0x0e00 << 16) | (0xc290 >> 2),
684 	0x00000000,
685 	(0x0e00 << 16) | (0xc294 >> 2),
686 	0x00000000,
687 	(0x0e00 << 16) | (0xc298 >> 2),
688 	0x00000000,
689 	(0x0e00 << 16) | (0xc2a0 >> 2),
690 	0x00000000,
691 	(0x0e00 << 16) | (0xc2a4 >> 2),
692 	0x00000000,
693 	(0x0e00 << 16) | (0xc2a8 >> 2),
694 	0x00000000,
695 	(0x0e00 << 16) | (0xc2ac >> 2),
696 	0x00000000,
697 	(0x0e00 << 16) | (0x301d0 >> 2),
698 	0x00000000,
699 	(0x0e00 << 16) | (0x30238 >> 2),
700 	0x00000000,
701 	(0x0e00 << 16) | (0x30250 >> 2),
702 	0x00000000,
703 	(0x0e00 << 16) | (0x30254 >> 2),
704 	0x00000000,
705 	(0x0e00 << 16) | (0x30258 >> 2),
706 	0x00000000,
707 	(0x0e00 << 16) | (0x3025c >> 2),
708 	0x00000000,
709 	(0x4e00 << 16) | (0xc900 >> 2),
710 	0x00000000,
711 	(0x5e00 << 16) | (0xc900 >> 2),
712 	0x00000000,
713 	(0x6e00 << 16) | (0xc900 >> 2),
714 	0x00000000,
715 	(0x7e00 << 16) | (0xc900 >> 2),
716 	0x00000000,
717 	(0x4e00 << 16) | (0xc904 >> 2),
718 	0x00000000,
719 	(0x5e00 << 16) | (0xc904 >> 2),
720 	0x00000000,
721 	(0x6e00 << 16) | (0xc904 >> 2),
722 	0x00000000,
723 	(0x7e00 << 16) | (0xc904 >> 2),
724 	0x00000000,
725 	(0x4e00 << 16) | (0xc908 >> 2),
726 	0x00000000,
727 	(0x5e00 << 16) | (0xc908 >> 2),
728 	0x00000000,
729 	(0x6e00 << 16) | (0xc908 >> 2),
730 	0x00000000,
731 	(0x7e00 << 16) | (0xc908 >> 2),
732 	0x00000000,
733 	(0x4e00 << 16) | (0xc90c >> 2),
734 	0x00000000,
735 	(0x5e00 << 16) | (0xc90c >> 2),
736 	0x00000000,
737 	(0x6e00 << 16) | (0xc90c >> 2),
738 	0x00000000,
739 	(0x7e00 << 16) | (0xc90c >> 2),
740 	0x00000000,
741 	(0x4e00 << 16) | (0xc910 >> 2),
742 	0x00000000,
743 	(0x5e00 << 16) | (0xc910 >> 2),
744 	0x00000000,
745 	(0x6e00 << 16) | (0xc910 >> 2),
746 	0x00000000,
747 	(0x7e00 << 16) | (0xc910 >> 2),
748 	0x00000000,
749 	(0x0e00 << 16) | (0xc99c >> 2),
750 	0x00000000,
751 	(0x0e00 << 16) | (0x9834 >> 2),
752 	0x00000000,
753 	(0x0000 << 16) | (0x30f00 >> 2),
754 	0x00000000,
755 	(0x0000 << 16) | (0x30f04 >> 2),
756 	0x00000000,
757 	(0x0000 << 16) | (0x30f08 >> 2),
758 	0x00000000,
759 	(0x0000 << 16) | (0x30f0c >> 2),
760 	0x00000000,
761 	(0x0600 << 16) | (0x9b7c >> 2),
762 	0x00000000,
763 	(0x0e00 << 16) | (0x8a14 >> 2),
764 	0x00000000,
765 	(0x0e00 << 16) | (0x8a18 >> 2),
766 	0x00000000,
767 	(0x0600 << 16) | (0x30a00 >> 2),
768 	0x00000000,
769 	(0x0e00 << 16) | (0x8bf0 >> 2),
770 	0x00000000,
771 	(0x0e00 << 16) | (0x8bcc >> 2),
772 	0x00000000,
773 	(0x0e00 << 16) | (0x8b24 >> 2),
774 	0x00000000,
775 	(0x0e00 << 16) | (0x30a04 >> 2),
776 	0x00000000,
777 	(0x0600 << 16) | (0x30a10 >> 2),
778 	0x00000000,
779 	(0x0600 << 16) | (0x30a14 >> 2),
780 	0x00000000,
781 	(0x0600 << 16) | (0x30a18 >> 2),
782 	0x00000000,
783 	(0x0600 << 16) | (0x30a2c >> 2),
784 	0x00000000,
785 	(0x0e00 << 16) | (0xc700 >> 2),
786 	0x00000000,
787 	(0x0e00 << 16) | (0xc704 >> 2),
788 	0x00000000,
789 	(0x0e00 << 16) | (0xc708 >> 2),
790 	0x00000000,
791 	(0x0e00 << 16) | (0xc768 >> 2),
792 	0x00000000,
793 	(0x0400 << 16) | (0xc770 >> 2),
794 	0x00000000,
795 	(0x0400 << 16) | (0xc774 >> 2),
796 	0x00000000,
797 	(0x0400 << 16) | (0xc798 >> 2),
798 	0x00000000,
799 	(0x0400 << 16) | (0xc79c >> 2),
800 	0x00000000,
801 	(0x0e00 << 16) | (0x9100 >> 2),
802 	0x00000000,
803 	(0x0e00 << 16) | (0x3c010 >> 2),
804 	0x00000000,
805 	(0x0e00 << 16) | (0x8c00 >> 2),
806 	0x00000000,
807 	(0x0e00 << 16) | (0x8c04 >> 2),
808 	0x00000000,
809 	(0x0e00 << 16) | (0x8c20 >> 2),
810 	0x00000000,
811 	(0x0e00 << 16) | (0x8c38 >> 2),
812 	0x00000000,
813 	(0x0e00 << 16) | (0x8c3c >> 2),
814 	0x00000000,
815 	(0x0e00 << 16) | (0xae00 >> 2),
816 	0x00000000,
817 	(0x0e00 << 16) | (0x9604 >> 2),
818 	0x00000000,
819 	(0x0e00 << 16) | (0xac08 >> 2),
820 	0x00000000,
821 	(0x0e00 << 16) | (0xac0c >> 2),
822 	0x00000000,
823 	(0x0e00 << 16) | (0xac10 >> 2),
824 	0x00000000,
825 	(0x0e00 << 16) | (0xac14 >> 2),
826 	0x00000000,
827 	(0x0e00 << 16) | (0xac58 >> 2),
828 	0x00000000,
829 	(0x0e00 << 16) | (0xac68 >> 2),
830 	0x00000000,
831 	(0x0e00 << 16) | (0xac6c >> 2),
832 	0x00000000,
833 	(0x0e00 << 16) | (0xac70 >> 2),
834 	0x00000000,
835 	(0x0e00 << 16) | (0xac74 >> 2),
836 	0x00000000,
837 	(0x0e00 << 16) | (0xac78 >> 2),
838 	0x00000000,
839 	(0x0e00 << 16) | (0xac7c >> 2),
840 	0x00000000,
841 	(0x0e00 << 16) | (0xac80 >> 2),
842 	0x00000000,
843 	(0x0e00 << 16) | (0xac84 >> 2),
844 	0x00000000,
845 	(0x0e00 << 16) | (0xac88 >> 2),
846 	0x00000000,
847 	(0x0e00 << 16) | (0xac8c >> 2),
848 	0x00000000,
849 	(0x0e00 << 16) | (0x970c >> 2),
850 	0x00000000,
851 	(0x0e00 << 16) | (0x9714 >> 2),
852 	0x00000000,
853 	(0x0e00 << 16) | (0x9718 >> 2),
854 	0x00000000,
855 	(0x0e00 << 16) | (0x971c >> 2),
856 	0x00000000,
857 	(0x0e00 << 16) | (0x31068 >> 2),
858 	0x00000000,
859 	(0x4e00 << 16) | (0x31068 >> 2),
860 	0x00000000,
861 	(0x5e00 << 16) | (0x31068 >> 2),
862 	0x00000000,
863 	(0x6e00 << 16) | (0x31068 >> 2),
864 	0x00000000,
865 	(0x7e00 << 16) | (0x31068 >> 2),
866 	0x00000000,
867 	(0x0e00 << 16) | (0xcd10 >> 2),
868 	0x00000000,
869 	(0x0e00 << 16) | (0xcd14 >> 2),
870 	0x00000000,
871 	(0x0e00 << 16) | (0x88b0 >> 2),
872 	0x00000000,
873 	(0x0e00 << 16) | (0x88b4 >> 2),
874 	0x00000000,
875 	(0x0e00 << 16) | (0x88b8 >> 2),
876 	0x00000000,
877 	(0x0e00 << 16) | (0x88bc >> 2),
878 	0x00000000,
879 	(0x0400 << 16) | (0x89c0 >> 2),
880 	0x00000000,
881 	(0x0e00 << 16) | (0x88c4 >> 2),
882 	0x00000000,
883 	(0x0e00 << 16) | (0x88c8 >> 2),
884 	0x00000000,
885 	(0x0e00 << 16) | (0x88d0 >> 2),
886 	0x00000000,
887 	(0x0e00 << 16) | (0x88d4 >> 2),
888 	0x00000000,
889 	(0x0e00 << 16) | (0x88d8 >> 2),
890 	0x00000000,
891 	(0x0e00 << 16) | (0x8980 >> 2),
892 	0x00000000,
893 	(0x0e00 << 16) | (0x30938 >> 2),
894 	0x00000000,
895 	(0x0e00 << 16) | (0x3093c >> 2),
896 	0x00000000,
897 	(0x0e00 << 16) | (0x30940 >> 2),
898 	0x00000000,
899 	(0x0e00 << 16) | (0x89a0 >> 2),
900 	0x00000000,
901 	(0x0e00 << 16) | (0x30900 >> 2),
902 	0x00000000,
903 	(0x0e00 << 16) | (0x30904 >> 2),
904 	0x00000000,
905 	(0x0e00 << 16) | (0x89b4 >> 2),
906 	0x00000000,
907 	(0x0e00 << 16) | (0x3e1fc >> 2),
908 	0x00000000,
909 	(0x0e00 << 16) | (0x3c210 >> 2),
910 	0x00000000,
911 	(0x0e00 << 16) | (0x3c214 >> 2),
912 	0x00000000,
913 	(0x0e00 << 16) | (0x3c218 >> 2),
914 	0x00000000,
915 	(0x0e00 << 16) | (0x8904 >> 2),
916 	0x00000000,
917 	0x5,
918 	(0x0e00 << 16) | (0x8c28 >> 2),
919 	(0x0e00 << 16) | (0x8c2c >> 2),
920 	(0x0e00 << 16) | (0x8c30 >> 2),
921 	(0x0e00 << 16) | (0x8c34 >> 2),
922 	(0x0e00 << 16) | (0x9600 >> 2),
923 };
924 
925 static const u32 bonaire_golden_spm_registers[] =
926 {
927 	0x30800, 0xe0ffffff, 0xe0000000
928 };
929 
930 static const u32 bonaire_golden_common_registers[] =
931 {
932 	0xc770, 0xffffffff, 0x00000800,
933 	0xc774, 0xffffffff, 0x00000800,
934 	0xc798, 0xffffffff, 0x00007fbf,
935 	0xc79c, 0xffffffff, 0x00007faf
936 };
937 
938 static const u32 bonaire_golden_registers[] =
939 {
940 	0x3354, 0x00000333, 0x00000333,
941 	0x3350, 0x000c0fc0, 0x00040200,
942 	0x9a10, 0x00010000, 0x00058208,
943 	0x3c000, 0xffff1fff, 0x00140000,
944 	0x3c200, 0xfdfc0fff, 0x00000100,
945 	0x3c234, 0x40000000, 0x40000200,
946 	0x9830, 0xffffffff, 0x00000000,
947 	0x9834, 0xf00fffff, 0x00000400,
948 	0x9838, 0x0002021c, 0x00020200,
949 	0xc78, 0x00000080, 0x00000000,
950 	0x5bb0, 0x000000f0, 0x00000070,
951 	0x5bc0, 0xf0311fff, 0x80300000,
952 	0x98f8, 0x73773777, 0x12010001,
953 	0x350c, 0x00810000, 0x408af000,
954 	0x7030, 0x31000111, 0x00000011,
955 	0x2f48, 0x73773777, 0x12010001,
956 	0x220c, 0x00007fb6, 0x0021a1b1,
957 	0x2210, 0x00007fb6, 0x002021b1,
958 	0x2180, 0x00007fb6, 0x00002191,
959 	0x2218, 0x00007fb6, 0x002121b1,
960 	0x221c, 0x00007fb6, 0x002021b1,
961 	0x21dc, 0x00007fb6, 0x00002191,
962 	0x21e0, 0x00007fb6, 0x00002191,
963 	0x3628, 0x0000003f, 0x0000000a,
964 	0x362c, 0x0000003f, 0x0000000a,
965 	0x2ae4, 0x00073ffe, 0x000022a2,
966 	0x240c, 0x000007ff, 0x00000000,
967 	0x8a14, 0xf000003f, 0x00000007,
968 	0x8bf0, 0x00002001, 0x00000001,
969 	0x8b24, 0xffffffff, 0x00ffffff,
970 	0x30a04, 0x0000ff0f, 0x00000000,
971 	0x28a4c, 0x07ffffff, 0x06000000,
972 	0x4d8, 0x00000fff, 0x00000100,
973 	0x3e78, 0x00000001, 0x00000002,
974 	0x9100, 0x03000000, 0x0362c688,
975 	0x8c00, 0x000000ff, 0x00000001,
976 	0xe40, 0x00001fff, 0x00001fff,
977 	0x9060, 0x0000007f, 0x00000020,
978 	0x9508, 0x00010000, 0x00010000,
979 	0xac14, 0x000003ff, 0x000000f3,
980 	0xac0c, 0xffffffff, 0x00001032
981 };
982 
983 static const u32 bonaire_mgcg_cgcg_init[] =
984 {
985 	0xc420, 0xffffffff, 0xfffffffc,
986 	0x30800, 0xffffffff, 0xe0000000,
987 	0x3c2a0, 0xffffffff, 0x00000100,
988 	0x3c208, 0xffffffff, 0x00000100,
989 	0x3c2c0, 0xffffffff, 0xc0000100,
990 	0x3c2c8, 0xffffffff, 0xc0000100,
991 	0x3c2c4, 0xffffffff, 0xc0000100,
992 	0x55e4, 0xffffffff, 0x00600100,
993 	0x3c280, 0xffffffff, 0x00000100,
994 	0x3c214, 0xffffffff, 0x06000100,
995 	0x3c220, 0xffffffff, 0x00000100,
996 	0x3c218, 0xffffffff, 0x06000100,
997 	0x3c204, 0xffffffff, 0x00000100,
998 	0x3c2e0, 0xffffffff, 0x00000100,
999 	0x3c224, 0xffffffff, 0x00000100,
1000 	0x3c200, 0xffffffff, 0x00000100,
1001 	0x3c230, 0xffffffff, 0x00000100,
1002 	0x3c234, 0xffffffff, 0x00000100,
1003 	0x3c250, 0xffffffff, 0x00000100,
1004 	0x3c254, 0xffffffff, 0x00000100,
1005 	0x3c258, 0xffffffff, 0x00000100,
1006 	0x3c25c, 0xffffffff, 0x00000100,
1007 	0x3c260, 0xffffffff, 0x00000100,
1008 	0x3c27c, 0xffffffff, 0x00000100,
1009 	0x3c278, 0xffffffff, 0x00000100,
1010 	0x3c210, 0xffffffff, 0x06000100,
1011 	0x3c290, 0xffffffff, 0x00000100,
1012 	0x3c274, 0xffffffff, 0x00000100,
1013 	0x3c2b4, 0xffffffff, 0x00000100,
1014 	0x3c2b0, 0xffffffff, 0x00000100,
1015 	0x3c270, 0xffffffff, 0x00000100,
1016 	0x30800, 0xffffffff, 0xe0000000,
1017 	0x3c020, 0xffffffff, 0x00010000,
1018 	0x3c024, 0xffffffff, 0x00030002,
1019 	0x3c028, 0xffffffff, 0x00040007,
1020 	0x3c02c, 0xffffffff, 0x00060005,
1021 	0x3c030, 0xffffffff, 0x00090008,
1022 	0x3c034, 0xffffffff, 0x00010000,
1023 	0x3c038, 0xffffffff, 0x00030002,
1024 	0x3c03c, 0xffffffff, 0x00040007,
1025 	0x3c040, 0xffffffff, 0x00060005,
1026 	0x3c044, 0xffffffff, 0x00090008,
1027 	0x3c048, 0xffffffff, 0x00010000,
1028 	0x3c04c, 0xffffffff, 0x00030002,
1029 	0x3c050, 0xffffffff, 0x00040007,
1030 	0x3c054, 0xffffffff, 0x00060005,
1031 	0x3c058, 0xffffffff, 0x00090008,
1032 	0x3c05c, 0xffffffff, 0x00010000,
1033 	0x3c060, 0xffffffff, 0x00030002,
1034 	0x3c064, 0xffffffff, 0x00040007,
1035 	0x3c068, 0xffffffff, 0x00060005,
1036 	0x3c06c, 0xffffffff, 0x00090008,
1037 	0x3c070, 0xffffffff, 0x00010000,
1038 	0x3c074, 0xffffffff, 0x00030002,
1039 	0x3c078, 0xffffffff, 0x00040007,
1040 	0x3c07c, 0xffffffff, 0x00060005,
1041 	0x3c080, 0xffffffff, 0x00090008,
1042 	0x3c084, 0xffffffff, 0x00010000,
1043 	0x3c088, 0xffffffff, 0x00030002,
1044 	0x3c08c, 0xffffffff, 0x00040007,
1045 	0x3c090, 0xffffffff, 0x00060005,
1046 	0x3c094, 0xffffffff, 0x00090008,
1047 	0x3c098, 0xffffffff, 0x00010000,
1048 	0x3c09c, 0xffffffff, 0x00030002,
1049 	0x3c0a0, 0xffffffff, 0x00040007,
1050 	0x3c0a4, 0xffffffff, 0x00060005,
1051 	0x3c0a8, 0xffffffff, 0x00090008,
1052 	0x3c000, 0xffffffff, 0x96e00200,
1053 	0x8708, 0xffffffff, 0x00900100,
1054 	0xc424, 0xffffffff, 0x0020003f,
1055 	0x38, 0xffffffff, 0x0140001c,
1056 	0x3c, 0x000f0000, 0x000f0000,
1057 	0x220, 0xffffffff, 0xC060000C,
1058 	0x224, 0xc0000fff, 0x00000100,
1059 	0xf90, 0xffffffff, 0x00000100,
1060 	0xf98, 0x00000101, 0x00000000,
1061 	0x20a8, 0xffffffff, 0x00000104,
1062 	0x55e4, 0xff000fff, 0x00000100,
1063 	0x30cc, 0xc0000fff, 0x00000104,
1064 	0xc1e4, 0x00000001, 0x00000001,
1065 	0xd00c, 0xff000ff0, 0x00000100,
1066 	0xd80c, 0xff000ff0, 0x00000100
1067 };
1068 
1069 static const u32 spectre_golden_spm_registers[] =
1070 {
1071 	0x30800, 0xe0ffffff, 0xe0000000
1072 };
1073 
1074 static const u32 spectre_golden_common_registers[] =
1075 {
1076 	0xc770, 0xffffffff, 0x00000800,
1077 	0xc774, 0xffffffff, 0x00000800,
1078 	0xc798, 0xffffffff, 0x00007fbf,
1079 	0xc79c, 0xffffffff, 0x00007faf
1080 };
1081 
1082 static const u32 spectre_golden_registers[] =
1083 {
1084 	0x3c000, 0xffff1fff, 0x96940200,
1085 	0x3c00c, 0xffff0001, 0xff000000,
1086 	0x3c200, 0xfffc0fff, 0x00000100,
1087 	0x6ed8, 0x00010101, 0x00010000,
1088 	0x9834, 0xf00fffff, 0x00000400,
1089 	0x9838, 0xfffffffc, 0x00020200,
1090 	0x5bb0, 0x000000f0, 0x00000070,
1091 	0x5bc0, 0xf0311fff, 0x80300000,
1092 	0x98f8, 0x73773777, 0x12010001,
1093 	0x9b7c, 0x00ff0000, 0x00fc0000,
1094 	0x2f48, 0x73773777, 0x12010001,
1095 	0x8a14, 0xf000003f, 0x00000007,
1096 	0x8b24, 0xffffffff, 0x00ffffff,
1097 	0x28350, 0x3f3f3fff, 0x00000082,
1098 	0x28355, 0x0000003f, 0x00000000,
1099 	0x3e78, 0x00000001, 0x00000002,
1100 	0x913c, 0xffff03df, 0x00000004,
1101 	0xc768, 0x00000008, 0x00000008,
1102 	0x8c00, 0x000008ff, 0x00000800,
1103 	0x9508, 0x00010000, 0x00010000,
1104 	0xac0c, 0xffffffff, 0x54763210,
1105 	0x214f8, 0x01ff01ff, 0x00000002,
1106 	0x21498, 0x007ff800, 0x00200000,
1107 	0x2015c, 0xffffffff, 0x00000f40,
1108 	0x30934, 0xffffffff, 0x00000001
1109 };
1110 
1111 static const u32 spectre_mgcg_cgcg_init[] =
1112 {
1113 	0xc420, 0xffffffff, 0xfffffffc,
1114 	0x30800, 0xffffffff, 0xe0000000,
1115 	0x3c2a0, 0xffffffff, 0x00000100,
1116 	0x3c208, 0xffffffff, 0x00000100,
1117 	0x3c2c0, 0xffffffff, 0x00000100,
1118 	0x3c2c8, 0xffffffff, 0x00000100,
1119 	0x3c2c4, 0xffffffff, 0x00000100,
1120 	0x55e4, 0xffffffff, 0x00600100,
1121 	0x3c280, 0xffffffff, 0x00000100,
1122 	0x3c214, 0xffffffff, 0x06000100,
1123 	0x3c220, 0xffffffff, 0x00000100,
1124 	0x3c218, 0xffffffff, 0x06000100,
1125 	0x3c204, 0xffffffff, 0x00000100,
1126 	0x3c2e0, 0xffffffff, 0x00000100,
1127 	0x3c224, 0xffffffff, 0x00000100,
1128 	0x3c200, 0xffffffff, 0x00000100,
1129 	0x3c230, 0xffffffff, 0x00000100,
1130 	0x3c234, 0xffffffff, 0x00000100,
1131 	0x3c250, 0xffffffff, 0x00000100,
1132 	0x3c254, 0xffffffff, 0x00000100,
1133 	0x3c258, 0xffffffff, 0x00000100,
1134 	0x3c25c, 0xffffffff, 0x00000100,
1135 	0x3c260, 0xffffffff, 0x00000100,
1136 	0x3c27c, 0xffffffff, 0x00000100,
1137 	0x3c278, 0xffffffff, 0x00000100,
1138 	0x3c210, 0xffffffff, 0x06000100,
1139 	0x3c290, 0xffffffff, 0x00000100,
1140 	0x3c274, 0xffffffff, 0x00000100,
1141 	0x3c2b4, 0xffffffff, 0x00000100,
1142 	0x3c2b0, 0xffffffff, 0x00000100,
1143 	0x3c270, 0xffffffff, 0x00000100,
1144 	0x30800, 0xffffffff, 0xe0000000,
1145 	0x3c020, 0xffffffff, 0x00010000,
1146 	0x3c024, 0xffffffff, 0x00030002,
1147 	0x3c028, 0xffffffff, 0x00040007,
1148 	0x3c02c, 0xffffffff, 0x00060005,
1149 	0x3c030, 0xffffffff, 0x00090008,
1150 	0x3c034, 0xffffffff, 0x00010000,
1151 	0x3c038, 0xffffffff, 0x00030002,
1152 	0x3c03c, 0xffffffff, 0x00040007,
1153 	0x3c040, 0xffffffff, 0x00060005,
1154 	0x3c044, 0xffffffff, 0x00090008,
1155 	0x3c048, 0xffffffff, 0x00010000,
1156 	0x3c04c, 0xffffffff, 0x00030002,
1157 	0x3c050, 0xffffffff, 0x00040007,
1158 	0x3c054, 0xffffffff, 0x00060005,
1159 	0x3c058, 0xffffffff, 0x00090008,
1160 	0x3c05c, 0xffffffff, 0x00010000,
1161 	0x3c060, 0xffffffff, 0x00030002,
1162 	0x3c064, 0xffffffff, 0x00040007,
1163 	0x3c068, 0xffffffff, 0x00060005,
1164 	0x3c06c, 0xffffffff, 0x00090008,
1165 	0x3c070, 0xffffffff, 0x00010000,
1166 	0x3c074, 0xffffffff, 0x00030002,
1167 	0x3c078, 0xffffffff, 0x00040007,
1168 	0x3c07c, 0xffffffff, 0x00060005,
1169 	0x3c080, 0xffffffff, 0x00090008,
1170 	0x3c084, 0xffffffff, 0x00010000,
1171 	0x3c088, 0xffffffff, 0x00030002,
1172 	0x3c08c, 0xffffffff, 0x00040007,
1173 	0x3c090, 0xffffffff, 0x00060005,
1174 	0x3c094, 0xffffffff, 0x00090008,
1175 	0x3c098, 0xffffffff, 0x00010000,
1176 	0x3c09c, 0xffffffff, 0x00030002,
1177 	0x3c0a0, 0xffffffff, 0x00040007,
1178 	0x3c0a4, 0xffffffff, 0x00060005,
1179 	0x3c0a8, 0xffffffff, 0x00090008,
1180 	0x3c0ac, 0xffffffff, 0x00010000,
1181 	0x3c0b0, 0xffffffff, 0x00030002,
1182 	0x3c0b4, 0xffffffff, 0x00040007,
1183 	0x3c0b8, 0xffffffff, 0x00060005,
1184 	0x3c0bc, 0xffffffff, 0x00090008,
1185 	0x3c000, 0xffffffff, 0x96e00200,
1186 	0x8708, 0xffffffff, 0x00900100,
1187 	0xc424, 0xffffffff, 0x0020003f,
1188 	0x38, 0xffffffff, 0x0140001c,
1189 	0x3c, 0x000f0000, 0x000f0000,
1190 	0x220, 0xffffffff, 0xC060000C,
1191 	0x224, 0xc0000fff, 0x00000100,
1192 	0xf90, 0xffffffff, 0x00000100,
1193 	0xf98, 0x00000101, 0x00000000,
1194 	0x20a8, 0xffffffff, 0x00000104,
1195 	0x55e4, 0xff000fff, 0x00000100,
1196 	0x30cc, 0xc0000fff, 0x00000104,
1197 	0xc1e4, 0x00000001, 0x00000001,
1198 	0xd00c, 0xff000ff0, 0x00000100,
1199 	0xd80c, 0xff000ff0, 0x00000100
1200 };
1201 
1202 static const u32 kalindi_golden_spm_registers[] =
1203 {
1204 	0x30800, 0xe0ffffff, 0xe0000000
1205 };
1206 
1207 static const u32 kalindi_golden_common_registers[] =
1208 {
1209 	0xc770, 0xffffffff, 0x00000800,
1210 	0xc774, 0xffffffff, 0x00000800,
1211 	0xc798, 0xffffffff, 0x00007fbf,
1212 	0xc79c, 0xffffffff, 0x00007faf
1213 };
1214 
1215 static const u32 kalindi_golden_registers[] =
1216 {
1217 	0x3c000, 0xffffdfff, 0x6e944040,
1218 	0x55e4, 0xff607fff, 0xfc000100,
1219 	0x3c220, 0xff000fff, 0x00000100,
1220 	0x3c224, 0xff000fff, 0x00000100,
1221 	0x3c200, 0xfffc0fff, 0x00000100,
1222 	0x6ed8, 0x00010101, 0x00010000,
1223 	0x9830, 0xffffffff, 0x00000000,
1224 	0x9834, 0xf00fffff, 0x00000400,
1225 	0x5bb0, 0x000000f0, 0x00000070,
1226 	0x5bc0, 0xf0311fff, 0x80300000,
1227 	0x98f8, 0x73773777, 0x12010001,
1228 	0x98fc, 0xffffffff, 0x00000010,
1229 	0x9b7c, 0x00ff0000, 0x00fc0000,
1230 	0x8030, 0x00001f0f, 0x0000100a,
1231 	0x2f48, 0x73773777, 0x12010001,
1232 	0x2408, 0x000fffff, 0x000c007f,
1233 	0x8a14, 0xf000003f, 0x00000007,
1234 	0x8b24, 0x3fff3fff, 0x00ffcfff,
1235 	0x30a04, 0x0000ff0f, 0x00000000,
1236 	0x28a4c, 0x07ffffff, 0x06000000,
1237 	0x4d8, 0x00000fff, 0x00000100,
1238 	0x3e78, 0x00000001, 0x00000002,
1239 	0xc768, 0x00000008, 0x00000008,
1240 	0x8c00, 0x000000ff, 0x00000003,
1241 	0x214f8, 0x01ff01ff, 0x00000002,
1242 	0x21498, 0x007ff800, 0x00200000,
1243 	0x2015c, 0xffffffff, 0x00000f40,
1244 	0x88c4, 0x001f3ae3, 0x00000082,
1245 	0x88d4, 0x0000001f, 0x00000010,
1246 	0x30934, 0xffffffff, 0x00000000
1247 };
1248 
1249 static const u32 kalindi_mgcg_cgcg_init[] =
1250 {
1251 	0xc420, 0xffffffff, 0xfffffffc,
1252 	0x30800, 0xffffffff, 0xe0000000,
1253 	0x3c2a0, 0xffffffff, 0x00000100,
1254 	0x3c208, 0xffffffff, 0x00000100,
1255 	0x3c2c0, 0xffffffff, 0x00000100,
1256 	0x3c2c8, 0xffffffff, 0x00000100,
1257 	0x3c2c4, 0xffffffff, 0x00000100,
1258 	0x55e4, 0xffffffff, 0x00600100,
1259 	0x3c280, 0xffffffff, 0x00000100,
1260 	0x3c214, 0xffffffff, 0x06000100,
1261 	0x3c220, 0xffffffff, 0x00000100,
1262 	0x3c218, 0xffffffff, 0x06000100,
1263 	0x3c204, 0xffffffff, 0x00000100,
1264 	0x3c2e0, 0xffffffff, 0x00000100,
1265 	0x3c224, 0xffffffff, 0x00000100,
1266 	0x3c200, 0xffffffff, 0x00000100,
1267 	0x3c230, 0xffffffff, 0x00000100,
1268 	0x3c234, 0xffffffff, 0x00000100,
1269 	0x3c250, 0xffffffff, 0x00000100,
1270 	0x3c254, 0xffffffff, 0x00000100,
1271 	0x3c258, 0xffffffff, 0x00000100,
1272 	0x3c25c, 0xffffffff, 0x00000100,
1273 	0x3c260, 0xffffffff, 0x00000100,
1274 	0x3c27c, 0xffffffff, 0x00000100,
1275 	0x3c278, 0xffffffff, 0x00000100,
1276 	0x3c210, 0xffffffff, 0x06000100,
1277 	0x3c290, 0xffffffff, 0x00000100,
1278 	0x3c274, 0xffffffff, 0x00000100,
1279 	0x3c2b4, 0xffffffff, 0x00000100,
1280 	0x3c2b0, 0xffffffff, 0x00000100,
1281 	0x3c270, 0xffffffff, 0x00000100,
1282 	0x30800, 0xffffffff, 0xe0000000,
1283 	0x3c020, 0xffffffff, 0x00010000,
1284 	0x3c024, 0xffffffff, 0x00030002,
1285 	0x3c028, 0xffffffff, 0x00040007,
1286 	0x3c02c, 0xffffffff, 0x00060005,
1287 	0x3c030, 0xffffffff, 0x00090008,
1288 	0x3c034, 0xffffffff, 0x00010000,
1289 	0x3c038, 0xffffffff, 0x00030002,
1290 	0x3c03c, 0xffffffff, 0x00040007,
1291 	0x3c040, 0xffffffff, 0x00060005,
1292 	0x3c044, 0xffffffff, 0x00090008,
1293 	0x3c000, 0xffffffff, 0x96e00200,
1294 	0x8708, 0xffffffff, 0x00900100,
1295 	0xc424, 0xffffffff, 0x0020003f,
1296 	0x38, 0xffffffff, 0x0140001c,
1297 	0x3c, 0x000f0000, 0x000f0000,
1298 	0x220, 0xffffffff, 0xC060000C,
1299 	0x224, 0xc0000fff, 0x00000100,
1300 	0x20a8, 0xffffffff, 0x00000104,
1301 	0x55e4, 0xff000fff, 0x00000100,
1302 	0x30cc, 0xc0000fff, 0x00000104,
1303 	0xc1e4, 0x00000001, 0x00000001,
1304 	0xd00c, 0xff000ff0, 0x00000100,
1305 	0xd80c, 0xff000ff0, 0x00000100
1306 };
1307 
1308 static const u32 hawaii_golden_spm_registers[] =
1309 {
1310 	0x30800, 0xe0ffffff, 0xe0000000
1311 };
1312 
1313 static const u32 hawaii_golden_common_registers[] =
1314 {
1315 	0x30800, 0xffffffff, 0xe0000000,
1316 	0x28350, 0xffffffff, 0x3a00161a,
1317 	0x28354, 0xffffffff, 0x0000002e,
1318 	0x9a10, 0xffffffff, 0x00018208,
1319 	0x98f8, 0xffffffff, 0x12011003
1320 };
1321 
1322 static const u32 hawaii_golden_registers[] =
1323 {
1324 	0x3354, 0x00000333, 0x00000333,
1325 	0x9a10, 0x00010000, 0x00058208,
1326 	0x9830, 0xffffffff, 0x00000000,
1327 	0x9834, 0xf00fffff, 0x00000400,
1328 	0x9838, 0x0002021c, 0x00020200,
1329 	0xc78, 0x00000080, 0x00000000,
1330 	0x5bb0, 0x000000f0, 0x00000070,
1331 	0x5bc0, 0xf0311fff, 0x80300000,
1332 	0x350c, 0x00810000, 0x408af000,
1333 	0x7030, 0x31000111, 0x00000011,
1334 	0x2f48, 0x73773777, 0x12010001,
1335 	0x2120, 0x0000007f, 0x0000001b,
1336 	0x21dc, 0x00007fb6, 0x00002191,
1337 	0x3628, 0x0000003f, 0x0000000a,
1338 	0x362c, 0x0000003f, 0x0000000a,
1339 	0x2ae4, 0x00073ffe, 0x000022a2,
1340 	0x240c, 0x000007ff, 0x00000000,
1341 	0x8bf0, 0x00002001, 0x00000001,
1342 	0x8b24, 0xffffffff, 0x00ffffff,
1343 	0x30a04, 0x0000ff0f, 0x00000000,
1344 	0x28a4c, 0x07ffffff, 0x06000000,
1345 	0x3e78, 0x00000001, 0x00000002,
1346 	0xc768, 0x00000008, 0x00000008,
1347 	0xc770, 0x00000f00, 0x00000800,
1348 	0xc774, 0x00000f00, 0x00000800,
1349 	0xc798, 0x00ffffff, 0x00ff7fbf,
1350 	0xc79c, 0x00ffffff, 0x00ff7faf,
1351 	0x8c00, 0x000000ff, 0x00000800,
1352 	0xe40, 0x00001fff, 0x00001fff,
1353 	0x9060, 0x0000007f, 0x00000020,
1354 	0x9508, 0x00010000, 0x00010000,
1355 	0xae00, 0x00100000, 0x000ff07c,
1356 	0xac14, 0x000003ff, 0x0000000f,
1357 	0xac10, 0xffffffff, 0x7564fdec,
1358 	0xac0c, 0xffffffff, 0x3120b9a8,
1359 	0xac08, 0x20000000, 0x0f9c0000
1360 };
1361 
1362 static const u32 hawaii_mgcg_cgcg_init[] =
1363 {
1364 	0xc420, 0xffffffff, 0xfffffffd,
1365 	0x30800, 0xffffffff, 0xe0000000,
1366 	0x3c2a0, 0xffffffff, 0x00000100,
1367 	0x3c208, 0xffffffff, 0x00000100,
1368 	0x3c2c0, 0xffffffff, 0x00000100,
1369 	0x3c2c8, 0xffffffff, 0x00000100,
1370 	0x3c2c4, 0xffffffff, 0x00000100,
1371 	0x55e4, 0xffffffff, 0x00200100,
1372 	0x3c280, 0xffffffff, 0x00000100,
1373 	0x3c214, 0xffffffff, 0x06000100,
1374 	0x3c220, 0xffffffff, 0x00000100,
1375 	0x3c218, 0xffffffff, 0x06000100,
1376 	0x3c204, 0xffffffff, 0x00000100,
1377 	0x3c2e0, 0xffffffff, 0x00000100,
1378 	0x3c224, 0xffffffff, 0x00000100,
1379 	0x3c200, 0xffffffff, 0x00000100,
1380 	0x3c230, 0xffffffff, 0x00000100,
1381 	0x3c234, 0xffffffff, 0x00000100,
1382 	0x3c250, 0xffffffff, 0x00000100,
1383 	0x3c254, 0xffffffff, 0x00000100,
1384 	0x3c258, 0xffffffff, 0x00000100,
1385 	0x3c25c, 0xffffffff, 0x00000100,
1386 	0x3c260, 0xffffffff, 0x00000100,
1387 	0x3c27c, 0xffffffff, 0x00000100,
1388 	0x3c278, 0xffffffff, 0x00000100,
1389 	0x3c210, 0xffffffff, 0x06000100,
1390 	0x3c290, 0xffffffff, 0x00000100,
1391 	0x3c274, 0xffffffff, 0x00000100,
1392 	0x3c2b4, 0xffffffff, 0x00000100,
1393 	0x3c2b0, 0xffffffff, 0x00000100,
1394 	0x3c270, 0xffffffff, 0x00000100,
1395 	0x30800, 0xffffffff, 0xe0000000,
1396 	0x3c020, 0xffffffff, 0x00010000,
1397 	0x3c024, 0xffffffff, 0x00030002,
1398 	0x3c028, 0xffffffff, 0x00040007,
1399 	0x3c02c, 0xffffffff, 0x00060005,
1400 	0x3c030, 0xffffffff, 0x00090008,
1401 	0x3c034, 0xffffffff, 0x00010000,
1402 	0x3c038, 0xffffffff, 0x00030002,
1403 	0x3c03c, 0xffffffff, 0x00040007,
1404 	0x3c040, 0xffffffff, 0x00060005,
1405 	0x3c044, 0xffffffff, 0x00090008,
1406 	0x3c048, 0xffffffff, 0x00010000,
1407 	0x3c04c, 0xffffffff, 0x00030002,
1408 	0x3c050, 0xffffffff, 0x00040007,
1409 	0x3c054, 0xffffffff, 0x00060005,
1410 	0x3c058, 0xffffffff, 0x00090008,
1411 	0x3c05c, 0xffffffff, 0x00010000,
1412 	0x3c060, 0xffffffff, 0x00030002,
1413 	0x3c064, 0xffffffff, 0x00040007,
1414 	0x3c068, 0xffffffff, 0x00060005,
1415 	0x3c06c, 0xffffffff, 0x00090008,
1416 	0x3c070, 0xffffffff, 0x00010000,
1417 	0x3c074, 0xffffffff, 0x00030002,
1418 	0x3c078, 0xffffffff, 0x00040007,
1419 	0x3c07c, 0xffffffff, 0x00060005,
1420 	0x3c080, 0xffffffff, 0x00090008,
1421 	0x3c084, 0xffffffff, 0x00010000,
1422 	0x3c088, 0xffffffff, 0x00030002,
1423 	0x3c08c, 0xffffffff, 0x00040007,
1424 	0x3c090, 0xffffffff, 0x00060005,
1425 	0x3c094, 0xffffffff, 0x00090008,
1426 	0x3c098, 0xffffffff, 0x00010000,
1427 	0x3c09c, 0xffffffff, 0x00030002,
1428 	0x3c0a0, 0xffffffff, 0x00040007,
1429 	0x3c0a4, 0xffffffff, 0x00060005,
1430 	0x3c0a8, 0xffffffff, 0x00090008,
1431 	0x3c0ac, 0xffffffff, 0x00010000,
1432 	0x3c0b0, 0xffffffff, 0x00030002,
1433 	0x3c0b4, 0xffffffff, 0x00040007,
1434 	0x3c0b8, 0xffffffff, 0x00060005,
1435 	0x3c0bc, 0xffffffff, 0x00090008,
1436 	0x3c0c0, 0xffffffff, 0x00010000,
1437 	0x3c0c4, 0xffffffff, 0x00030002,
1438 	0x3c0c8, 0xffffffff, 0x00040007,
1439 	0x3c0cc, 0xffffffff, 0x00060005,
1440 	0x3c0d0, 0xffffffff, 0x00090008,
1441 	0x3c0d4, 0xffffffff, 0x00010000,
1442 	0x3c0d8, 0xffffffff, 0x00030002,
1443 	0x3c0dc, 0xffffffff, 0x00040007,
1444 	0x3c0e0, 0xffffffff, 0x00060005,
1445 	0x3c0e4, 0xffffffff, 0x00090008,
1446 	0x3c0e8, 0xffffffff, 0x00010000,
1447 	0x3c0ec, 0xffffffff, 0x00030002,
1448 	0x3c0f0, 0xffffffff, 0x00040007,
1449 	0x3c0f4, 0xffffffff, 0x00060005,
1450 	0x3c0f8, 0xffffffff, 0x00090008,
1451 	0xc318, 0xffffffff, 0x00020200,
1452 	0x3350, 0xffffffff, 0x00000200,
1453 	0x15c0, 0xffffffff, 0x00000400,
1454 	0x55e8, 0xffffffff, 0x00000000,
1455 	0x2f50, 0xffffffff, 0x00000902,
1456 	0x3c000, 0xffffffff, 0x96940200,
1457 	0x8708, 0xffffffff, 0x00900100,
1458 	0xc424, 0xffffffff, 0x0020003f,
1459 	0x38, 0xffffffff, 0x0140001c,
1460 	0x3c, 0x000f0000, 0x000f0000,
1461 	0x220, 0xffffffff, 0xc060000c,
1462 	0x224, 0xc0000fff, 0x00000100,
1463 	0xf90, 0xffffffff, 0x00000100,
1464 	0xf98, 0x00000101, 0x00000000,
1465 	0x20a8, 0xffffffff, 0x00000104,
1466 	0x55e4, 0xff000fff, 0x00000100,
1467 	0x30cc, 0xc0000fff, 0x00000104,
1468 	0xc1e4, 0x00000001, 0x00000001,
1469 	0xd00c, 0xff000ff0, 0x00000100,
1470 	0xd80c, 0xff000ff0, 0x00000100
1471 };
1472 
1473 static void cik_init_golden_registers(struct radeon_device *rdev)
1474 {
1475 	switch (rdev->family) {
1476 	case CHIP_BONAIRE:
1477 		radeon_program_register_sequence(rdev,
1478 						 bonaire_mgcg_cgcg_init,
1479 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
1480 		radeon_program_register_sequence(rdev,
1481 						 bonaire_golden_registers,
1482 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
1483 		radeon_program_register_sequence(rdev,
1484 						 bonaire_golden_common_registers,
1485 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
1486 		radeon_program_register_sequence(rdev,
1487 						 bonaire_golden_spm_registers,
1488 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
1489 		break;
1490 	case CHIP_KABINI:
1491 		radeon_program_register_sequence(rdev,
1492 						 kalindi_mgcg_cgcg_init,
1493 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
1494 		radeon_program_register_sequence(rdev,
1495 						 kalindi_golden_registers,
1496 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
1497 		radeon_program_register_sequence(rdev,
1498 						 kalindi_golden_common_registers,
1499 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
1500 		radeon_program_register_sequence(rdev,
1501 						 kalindi_golden_spm_registers,
1502 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
1503 		break;
1504 	case CHIP_KAVERI:
1505 		radeon_program_register_sequence(rdev,
1506 						 spectre_mgcg_cgcg_init,
1507 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
1508 		radeon_program_register_sequence(rdev,
1509 						 spectre_golden_registers,
1510 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
1511 		radeon_program_register_sequence(rdev,
1512 						 spectre_golden_common_registers,
1513 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
1514 		radeon_program_register_sequence(rdev,
1515 						 spectre_golden_spm_registers,
1516 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
1517 		break;
1518 	case CHIP_HAWAII:
1519 		radeon_program_register_sequence(rdev,
1520 						 hawaii_mgcg_cgcg_init,
1521 						 (const u32)ARRAY_SIZE(hawaii_mgcg_cgcg_init));
1522 		radeon_program_register_sequence(rdev,
1523 						 hawaii_golden_registers,
1524 						 (const u32)ARRAY_SIZE(hawaii_golden_registers));
1525 		radeon_program_register_sequence(rdev,
1526 						 hawaii_golden_common_registers,
1527 						 (const u32)ARRAY_SIZE(hawaii_golden_common_registers));
1528 		radeon_program_register_sequence(rdev,
1529 						 hawaii_golden_spm_registers,
1530 						 (const u32)ARRAY_SIZE(hawaii_golden_spm_registers));
1531 		break;
1532 	default:
1533 		break;
1534 	}
1535 }
1536 
1537 /**
1538  * cik_get_xclk - get the xclk
1539  *
1540  * @rdev: radeon_device pointer
1541  *
1542  * Returns the reference clock used by the gfx engine
1543  * (CIK).
1544  */
1545 u32 cik_get_xclk(struct radeon_device *rdev)
1546 {
1547         u32 reference_clock = rdev->clock.spll.reference_freq;
1548 
1549 	if (rdev->flags & RADEON_IS_IGP) {
1550 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
1551 			return reference_clock / 2;
1552 	} else {
1553 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
1554 			return reference_clock / 4;
1555 	}
1556 	return reference_clock;
1557 }
1558 
1559 /**
1560  * cik_mm_rdoorbell - read a doorbell dword
1561  *
1562  * @rdev: radeon_device pointer
1563  * @index: doorbell index
1564  *
1565  * Returns the value in the doorbell aperture at the
1566  * requested doorbell index (CIK).
1567  */
1568 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 index)
1569 {
1570 	if (index < rdev->doorbell.num_doorbells) {
1571 		return readl(rdev->doorbell.ptr + index);
1572 	} else {
1573 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index);
1574 		return 0;
1575 	}
1576 }
1577 
1578 /**
1579  * cik_mm_wdoorbell - write a doorbell dword
1580  *
1581  * @rdev: radeon_device pointer
1582  * @index: doorbell index
1583  * @v: value to write
1584  *
1585  * Writes @v to the doorbell aperture at the
1586  * requested doorbell index (CIK).
1587  */
1588 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 index, u32 v)
1589 {
1590 	if (index < rdev->doorbell.num_doorbells) {
1591 		writel(v, rdev->doorbell.ptr + index);
1592 	} else {
1593 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index);
1594 	}
1595 }
1596 
1597 #define BONAIRE_IO_MC_REGS_SIZE 36
1598 
1599 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
1600 {
1601 	{0x00000070, 0x04400000},
1602 	{0x00000071, 0x80c01803},
1603 	{0x00000072, 0x00004004},
1604 	{0x00000073, 0x00000100},
1605 	{0x00000074, 0x00ff0000},
1606 	{0x00000075, 0x34000000},
1607 	{0x00000076, 0x08000014},
1608 	{0x00000077, 0x00cc08ec},
1609 	{0x00000078, 0x00000400},
1610 	{0x00000079, 0x00000000},
1611 	{0x0000007a, 0x04090000},
1612 	{0x0000007c, 0x00000000},
1613 	{0x0000007e, 0x4408a8e8},
1614 	{0x0000007f, 0x00000304},
1615 	{0x00000080, 0x00000000},
1616 	{0x00000082, 0x00000001},
1617 	{0x00000083, 0x00000002},
1618 	{0x00000084, 0xf3e4f400},
1619 	{0x00000085, 0x052024e3},
1620 	{0x00000087, 0x00000000},
1621 	{0x00000088, 0x01000000},
1622 	{0x0000008a, 0x1c0a0000},
1623 	{0x0000008b, 0xff010000},
1624 	{0x0000008d, 0xffffefff},
1625 	{0x0000008e, 0xfff3efff},
1626 	{0x0000008f, 0xfff3efbf},
1627 	{0x00000092, 0xf7ffffff},
1628 	{0x00000093, 0xffffff7f},
1629 	{0x00000095, 0x00101101},
1630 	{0x00000096, 0x00000fff},
1631 	{0x00000097, 0x00116fff},
1632 	{0x00000098, 0x60010000},
1633 	{0x00000099, 0x10010000},
1634 	{0x0000009a, 0x00006000},
1635 	{0x0000009b, 0x00001000},
1636 	{0x0000009f, 0x00b48000}
1637 };
1638 
1639 #define HAWAII_IO_MC_REGS_SIZE 22
1640 
1641 static const u32 hawaii_io_mc_regs[HAWAII_IO_MC_REGS_SIZE][2] =
1642 {
1643 	{0x0000007d, 0x40000000},
1644 	{0x0000007e, 0x40180304},
1645 	{0x0000007f, 0x0000ff00},
1646 	{0x00000081, 0x00000000},
1647 	{0x00000083, 0x00000800},
1648 	{0x00000086, 0x00000000},
1649 	{0x00000087, 0x00000100},
1650 	{0x00000088, 0x00020100},
1651 	{0x00000089, 0x00000000},
1652 	{0x0000008b, 0x00040000},
1653 	{0x0000008c, 0x00000100},
1654 	{0x0000008e, 0xff010000},
1655 	{0x00000090, 0xffffefff},
1656 	{0x00000091, 0xfff3efff},
1657 	{0x00000092, 0xfff3efbf},
1658 	{0x00000093, 0xf7ffffff},
1659 	{0x00000094, 0xffffff7f},
1660 	{0x00000095, 0x00000fff},
1661 	{0x00000096, 0x00116fff},
1662 	{0x00000097, 0x60010000},
1663 	{0x00000098, 0x10010000},
1664 	{0x0000009f, 0x00c79000}
1665 };
1666 
1667 
1668 /**
1669  * cik_srbm_select - select specific register instances
1670  *
1671  * @rdev: radeon_device pointer
1672  * @me: selected ME (micro engine)
1673  * @pipe: pipe
1674  * @queue: queue
1675  * @vmid: VMID
1676  *
1677  * Switches the currently active registers instances.  Some
1678  * registers are instanced per VMID, others are instanced per
1679  * me/pipe/queue combination.
1680  */
1681 static void cik_srbm_select(struct radeon_device *rdev,
1682 			    u32 me, u32 pipe, u32 queue, u32 vmid)
1683 {
1684 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
1685 			     MEID(me & 0x3) |
1686 			     VMID(vmid & 0xf) |
1687 			     QUEUEID(queue & 0x7));
1688 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
1689 }
1690 
1691 /* ucode loading */
1692 /**
1693  * ci_mc_load_microcode - load MC ucode into the hw
1694  *
1695  * @rdev: radeon_device pointer
1696  *
1697  * Load the GDDR MC ucode into the hw (CIK).
1698  * Returns 0 on success, error on failure.
1699  */
1700 int ci_mc_load_microcode(struct radeon_device *rdev)
1701 {
1702 	const __be32 *fw_data;
1703 	u32 running, blackout = 0;
1704 	u32 *io_mc_regs;
1705 	int i, ucode_size, regs_size;
1706 
1707 	if (!rdev->mc_fw)
1708 		return -EINVAL;
1709 
1710 	switch (rdev->family) {
1711 	case CHIP_BONAIRE:
1712 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
1713 		ucode_size = CIK_MC_UCODE_SIZE;
1714 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
1715 		break;
1716 	case CHIP_HAWAII:
1717 		io_mc_regs = (u32 *)&hawaii_io_mc_regs;
1718 		ucode_size = HAWAII_MC_UCODE_SIZE;
1719 		regs_size = HAWAII_IO_MC_REGS_SIZE;
1720 		break;
1721 	default:
1722 		return -EINVAL;
1723 	}
1724 
1725 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1726 
1727 	if (running == 0) {
1728 		if (running) {
1729 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1730 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1731 		}
1732 
1733 		/* reset the engine and set to writable */
1734 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1735 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1736 
1737 		/* load mc io regs */
1738 		for (i = 0; i < regs_size; i++) {
1739 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1740 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1741 		}
1742 		/* load the MC ucode */
1743 		fw_data = (const __be32 *)rdev->mc_fw->data;
1744 		for (i = 0; i < ucode_size; i++)
1745 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1746 
1747 		/* put the engine back into the active state */
1748 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1749 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1750 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1751 
1752 		/* wait for training to complete */
1753 		for (i = 0; i < rdev->usec_timeout; i++) {
1754 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1755 				break;
1756 			udelay(1);
1757 		}
1758 		for (i = 0; i < rdev->usec_timeout; i++) {
1759 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1760 				break;
1761 			udelay(1);
1762 		}
1763 
1764 		if (running)
1765 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1766 	}
1767 
1768 	return 0;
1769 }
1770 
1771 /**
1772  * cik_init_microcode - load ucode images from disk
1773  *
1774  * @rdev: radeon_device pointer
1775  *
1776  * Use the firmware interface to load the ucode images into
1777  * the driver (not loaded into hw).
1778  * Returns 0 on success, error on failure.
1779  */
1780 static int cik_init_microcode(struct radeon_device *rdev)
1781 {
1782 	const char *chip_name;
1783 	size_t pfp_req_size, me_req_size, ce_req_size,
1784 		mec_req_size, rlc_req_size, mc_req_size = 0,
1785 		sdma_req_size, smc_req_size = 0;
1786 	char fw_name[30];
1787 	int err;
1788 
1789 	DRM_DEBUG("\n");
1790 
1791 	switch (rdev->family) {
1792 	case CHIP_BONAIRE:
1793 		chip_name = "BONAIRE";
1794 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1795 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1796 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1797 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1798 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1799 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
1800 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1801 		smc_req_size = ALIGN(BONAIRE_SMC_UCODE_SIZE, 4);
1802 		break;
1803 	case CHIP_HAWAII:
1804 		chip_name = "HAWAII";
1805 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1806 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1807 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1808 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1809 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
1810 		mc_req_size = HAWAII_MC_UCODE_SIZE * 4;
1811 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1812 		smc_req_size = ALIGN(HAWAII_SMC_UCODE_SIZE, 4);
1813 		break;
1814 	case CHIP_KAVERI:
1815 		chip_name = "KAVERI";
1816 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1817 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1818 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1819 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1820 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
1821 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1822 		break;
1823 	case CHIP_KABINI:
1824 		chip_name = "KABINI";
1825 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
1826 		me_req_size = CIK_ME_UCODE_SIZE * 4;
1827 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
1828 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
1829 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
1830 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
1831 		break;
1832 	default: BUG();
1833 	}
1834 
1835 	DRM_INFO("Loading %s Microcode\n", chip_name);
1836 
1837 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1838 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1839 	if (err)
1840 		goto out;
1841 	if (rdev->pfp_fw->size != pfp_req_size) {
1842 		printk(KERN_ERR
1843 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1844 		       rdev->pfp_fw->size, fw_name);
1845 		err = -EINVAL;
1846 		goto out;
1847 	}
1848 
1849 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1850 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1851 	if (err)
1852 		goto out;
1853 	if (rdev->me_fw->size != me_req_size) {
1854 		printk(KERN_ERR
1855 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1856 		       rdev->me_fw->size, fw_name);
1857 		err = -EINVAL;
1858 	}
1859 
1860 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1861 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1862 	if (err)
1863 		goto out;
1864 	if (rdev->ce_fw->size != ce_req_size) {
1865 		printk(KERN_ERR
1866 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1867 		       rdev->ce_fw->size, fw_name);
1868 		err = -EINVAL;
1869 	}
1870 
1871 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
1872 	err = request_firmware(&rdev->mec_fw, fw_name, rdev->dev);
1873 	if (err)
1874 		goto out;
1875 	if (rdev->mec_fw->size != mec_req_size) {
1876 		printk(KERN_ERR
1877 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
1878 		       rdev->mec_fw->size, fw_name);
1879 		err = -EINVAL;
1880 	}
1881 
1882 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1883 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1884 	if (err)
1885 		goto out;
1886 	if (rdev->rlc_fw->size != rlc_req_size) {
1887 		printk(KERN_ERR
1888 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
1889 		       rdev->rlc_fw->size, fw_name);
1890 		err = -EINVAL;
1891 	}
1892 
1893 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
1894 	err = request_firmware(&rdev->sdma_fw, fw_name, rdev->dev);
1895 	if (err)
1896 		goto out;
1897 	if (rdev->sdma_fw->size != sdma_req_size) {
1898 		printk(KERN_ERR
1899 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
1900 		       rdev->sdma_fw->size, fw_name);
1901 		err = -EINVAL;
1902 	}
1903 
1904 	/* No SMC, MC ucode on APUs */
1905 	if (!(rdev->flags & RADEON_IS_IGP)) {
1906 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1907 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1908 		if (err)
1909 			goto out;
1910 		if (rdev->mc_fw->size != mc_req_size) {
1911 			printk(KERN_ERR
1912 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
1913 			       rdev->mc_fw->size, fw_name);
1914 			err = -EINVAL;
1915 		}
1916 
1917 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1918 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1919 		if (err) {
1920 			printk(KERN_ERR
1921 			       "smc: error loading firmware \"%s\"\n",
1922 			       fw_name);
1923 			release_firmware(rdev->smc_fw);
1924 			rdev->smc_fw = NULL;
1925 			err = 0;
1926 		} else if (rdev->smc_fw->size != smc_req_size) {
1927 			printk(KERN_ERR
1928 			       "cik_smc: Bogus length %zu in firmware \"%s\"\n",
1929 			       rdev->smc_fw->size, fw_name);
1930 			err = -EINVAL;
1931 		}
1932 	}
1933 
1934 out:
1935 	if (err) {
1936 		if (err != -EINVAL)
1937 			printk(KERN_ERR
1938 			       "cik_cp: Failed to load firmware \"%s\"\n",
1939 			       fw_name);
1940 		release_firmware(rdev->pfp_fw);
1941 		rdev->pfp_fw = NULL;
1942 		release_firmware(rdev->me_fw);
1943 		rdev->me_fw = NULL;
1944 		release_firmware(rdev->ce_fw);
1945 		rdev->ce_fw = NULL;
1946 		release_firmware(rdev->rlc_fw);
1947 		rdev->rlc_fw = NULL;
1948 		release_firmware(rdev->mc_fw);
1949 		rdev->mc_fw = NULL;
1950 		release_firmware(rdev->smc_fw);
1951 		rdev->smc_fw = NULL;
1952 	}
1953 	return err;
1954 }
1955 
1956 /*
1957  * Core functions
1958  */
1959 /**
1960  * cik_tiling_mode_table_init - init the hw tiling table
1961  *
1962  * @rdev: radeon_device pointer
1963  *
1964  * Starting with SI, the tiling setup is done globally in a
1965  * set of 32 tiling modes.  Rather than selecting each set of
1966  * parameters per surface as on older asics, we just select
1967  * which index in the tiling table we want to use, and the
1968  * surface uses those parameters (CIK).
1969  */
1970 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
1971 {
1972 	const u32 num_tile_mode_states = 32;
1973 	const u32 num_secondary_tile_mode_states = 16;
1974 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
1975 	u32 num_pipe_configs;
1976 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
1977 		rdev->config.cik.max_shader_engines;
1978 
1979 	switch (rdev->config.cik.mem_row_size_in_kb) {
1980 	case 1:
1981 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
1982 		break;
1983 	case 2:
1984 	default:
1985 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
1986 		break;
1987 	case 4:
1988 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
1989 		break;
1990 	}
1991 
1992 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
1993 	if (num_pipe_configs > 8)
1994 		num_pipe_configs = 16;
1995 
1996 	if (num_pipe_configs == 16) {
1997 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1998 			switch (reg_offset) {
1999 			case 0:
2000 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2001 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2002 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2003 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2004 				break;
2005 			case 1:
2006 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2007 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2008 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2009 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2010 				break;
2011 			case 2:
2012 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2013 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2014 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2015 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2016 				break;
2017 			case 3:
2018 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2019 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2020 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2021 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2022 				break;
2023 			case 4:
2024 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2025 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2026 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2027 						 TILE_SPLIT(split_equal_to_row_size));
2028 				break;
2029 			case 5:
2030 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2031 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2032 				break;
2033 			case 6:
2034 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2035 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2036 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2037 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2038 				break;
2039 			case 7:
2040 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2041 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2042 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2043 						 TILE_SPLIT(split_equal_to_row_size));
2044 				break;
2045 			case 8:
2046 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2047 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
2048 				break;
2049 			case 9:
2050 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2051 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2052 				break;
2053 			case 10:
2054 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2055 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2056 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2057 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2058 				break;
2059 			case 11:
2060 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2061 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2062 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2063 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2064 				break;
2065 			case 12:
2066 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2067 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2068 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2069 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2070 				break;
2071 			case 13:
2072 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2073 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2074 				break;
2075 			case 14:
2076 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2077 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2078 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2079 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2080 				break;
2081 			case 16:
2082 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2083 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2084 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2085 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2086 				break;
2087 			case 17:
2088 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2089 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2090 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2091 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2092 				break;
2093 			case 27:
2094 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2095 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2096 				break;
2097 			case 28:
2098 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2099 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2100 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2101 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2102 				break;
2103 			case 29:
2104 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2105 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2106 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_8x16) |
2107 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2108 				break;
2109 			case 30:
2110 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2111 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2112 						 PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
2113 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2114 				break;
2115 			default:
2116 				gb_tile_moden = 0;
2117 				break;
2118 			}
2119 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2120 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2121 		}
2122 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2123 			switch (reg_offset) {
2124 			case 0:
2125 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2126 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2127 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2128 						 NUM_BANKS(ADDR_SURF_16_BANK));
2129 				break;
2130 			case 1:
2131 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2132 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2133 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2134 						 NUM_BANKS(ADDR_SURF_16_BANK));
2135 				break;
2136 			case 2:
2137 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2138 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2139 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2140 						 NUM_BANKS(ADDR_SURF_16_BANK));
2141 				break;
2142 			case 3:
2143 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2144 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2145 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2146 						 NUM_BANKS(ADDR_SURF_16_BANK));
2147 				break;
2148 			case 4:
2149 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2150 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2151 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2152 						 NUM_BANKS(ADDR_SURF_8_BANK));
2153 				break;
2154 			case 5:
2155 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2156 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2157 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2158 						 NUM_BANKS(ADDR_SURF_4_BANK));
2159 				break;
2160 			case 6:
2161 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2162 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2163 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2164 						 NUM_BANKS(ADDR_SURF_2_BANK));
2165 				break;
2166 			case 8:
2167 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2168 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2169 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2170 						 NUM_BANKS(ADDR_SURF_16_BANK));
2171 				break;
2172 			case 9:
2173 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2174 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2175 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2176 						 NUM_BANKS(ADDR_SURF_16_BANK));
2177 				break;
2178 			case 10:
2179 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2180 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2181 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2182 						 NUM_BANKS(ADDR_SURF_16_BANK));
2183 				break;
2184 			case 11:
2185 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2186 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2187 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2188 						 NUM_BANKS(ADDR_SURF_8_BANK));
2189 				break;
2190 			case 12:
2191 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2192 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2193 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2194 						 NUM_BANKS(ADDR_SURF_4_BANK));
2195 				break;
2196 			case 13:
2197 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2198 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2199 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2200 						 NUM_BANKS(ADDR_SURF_2_BANK));
2201 				break;
2202 			case 14:
2203 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2204 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2205 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2206 						 NUM_BANKS(ADDR_SURF_2_BANK));
2207 				break;
2208 			default:
2209 				gb_tile_moden = 0;
2210 				break;
2211 			}
2212 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2213 		}
2214 	} else if (num_pipe_configs == 8) {
2215 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2216 			switch (reg_offset) {
2217 			case 0:
2218 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2219 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2220 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2221 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2222 				break;
2223 			case 1:
2224 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2225 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2226 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2227 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2228 				break;
2229 			case 2:
2230 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2231 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2232 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2233 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2234 				break;
2235 			case 3:
2236 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2237 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2238 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2239 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2240 				break;
2241 			case 4:
2242 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2243 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2244 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2245 						 TILE_SPLIT(split_equal_to_row_size));
2246 				break;
2247 			case 5:
2248 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2249 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2250 				break;
2251 			case 6:
2252 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2253 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2254 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2255 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2256 				break;
2257 			case 7:
2258 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2259 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2260 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2261 						 TILE_SPLIT(split_equal_to_row_size));
2262 				break;
2263 			case 8:
2264 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2265 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
2266 				break;
2267 			case 9:
2268 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2269 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2270 				break;
2271 			case 10:
2272 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2273 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2274 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2275 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2276 				break;
2277 			case 11:
2278 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2279 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2280 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2282 				break;
2283 			case 12:
2284 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2285 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2286 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2287 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2288 				break;
2289 			case 13:
2290 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2291 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2292 				break;
2293 			case 14:
2294 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2295 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2296 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2297 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2298 				break;
2299 			case 16:
2300 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2301 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2302 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2303 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2304 				break;
2305 			case 17:
2306 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2307 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2308 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2309 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2310 				break;
2311 			case 27:
2312 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2313 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2314 				break;
2315 			case 28:
2316 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2317 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2318 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2319 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2320 				break;
2321 			case 29:
2322 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2323 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2324 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2325 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2326 				break;
2327 			case 30:
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
2331 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2332 				break;
2333 			default:
2334 				gb_tile_moden = 0;
2335 				break;
2336 			}
2337 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2338 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2339 		}
2340 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2341 			switch (reg_offset) {
2342 			case 0:
2343 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2345 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2346 						 NUM_BANKS(ADDR_SURF_16_BANK));
2347 				break;
2348 			case 1:
2349 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2350 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2351 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2352 						 NUM_BANKS(ADDR_SURF_16_BANK));
2353 				break;
2354 			case 2:
2355 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2356 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2357 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2358 						 NUM_BANKS(ADDR_SURF_16_BANK));
2359 				break;
2360 			case 3:
2361 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2362 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2363 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2364 						 NUM_BANKS(ADDR_SURF_16_BANK));
2365 				break;
2366 			case 4:
2367 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2368 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2369 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2370 						 NUM_BANKS(ADDR_SURF_8_BANK));
2371 				break;
2372 			case 5:
2373 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2375 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2376 						 NUM_BANKS(ADDR_SURF_4_BANK));
2377 				break;
2378 			case 6:
2379 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2380 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2381 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2382 						 NUM_BANKS(ADDR_SURF_2_BANK));
2383 				break;
2384 			case 8:
2385 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2386 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2387 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2388 						 NUM_BANKS(ADDR_SURF_16_BANK));
2389 				break;
2390 			case 9:
2391 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2392 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2393 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2394 						 NUM_BANKS(ADDR_SURF_16_BANK));
2395 				break;
2396 			case 10:
2397 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2398 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2399 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2400 						 NUM_BANKS(ADDR_SURF_16_BANK));
2401 				break;
2402 			case 11:
2403 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2405 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2406 						 NUM_BANKS(ADDR_SURF_16_BANK));
2407 				break;
2408 			case 12:
2409 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2410 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2411 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2412 						 NUM_BANKS(ADDR_SURF_8_BANK));
2413 				break;
2414 			case 13:
2415 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2416 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2417 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2418 						 NUM_BANKS(ADDR_SURF_4_BANK));
2419 				break;
2420 			case 14:
2421 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2422 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2423 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2424 						 NUM_BANKS(ADDR_SURF_2_BANK));
2425 				break;
2426 			default:
2427 				gb_tile_moden = 0;
2428 				break;
2429 			}
2430 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2431 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2432 		}
2433 	} else if (num_pipe_configs == 4) {
2434 		if (num_rbs == 4) {
2435 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2436 				switch (reg_offset) {
2437 				case 0:
2438 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2440 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2441 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2442 					break;
2443 				case 1:
2444 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2445 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2446 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2447 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2448 					break;
2449 				case 2:
2450 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2451 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2452 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2453 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2454 					break;
2455 				case 3:
2456 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2457 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2458 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2459 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2460 					break;
2461 				case 4:
2462 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2463 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2464 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2465 							 TILE_SPLIT(split_equal_to_row_size));
2466 					break;
2467 				case 5:
2468 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2469 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2470 					break;
2471 				case 6:
2472 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2473 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2474 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2475 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2476 					break;
2477 				case 7:
2478 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2479 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2480 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2481 							 TILE_SPLIT(split_equal_to_row_size));
2482 					break;
2483 				case 8:
2484 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2485 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
2486 					break;
2487 				case 9:
2488 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2489 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2490 					break;
2491 				case 10:
2492 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2494 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2495 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2496 					break;
2497 				case 11:
2498 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2499 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2500 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2501 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2502 					break;
2503 				case 12:
2504 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2505 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2506 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2507 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2508 					break;
2509 				case 13:
2510 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2511 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2512 					break;
2513 				case 14:
2514 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2516 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2517 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2518 					break;
2519 				case 16:
2520 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2521 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2522 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2524 					break;
2525 				case 17:
2526 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2527 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2528 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2529 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2530 					break;
2531 				case 27:
2532 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2534 					break;
2535 				case 28:
2536 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2537 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2538 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2539 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2540 					break;
2541 				case 29:
2542 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2543 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2544 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2545 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2546 					break;
2547 				case 30:
2548 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2549 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2550 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
2551 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2552 					break;
2553 				default:
2554 					gb_tile_moden = 0;
2555 					break;
2556 				}
2557 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2558 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2559 			}
2560 		} else if (num_rbs < 4) {
2561 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2562 				switch (reg_offset) {
2563 				case 0:
2564 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2565 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2566 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2567 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2568 					break;
2569 				case 1:
2570 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2572 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2574 					break;
2575 				case 2:
2576 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2577 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2578 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2579 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2580 					break;
2581 				case 3:
2582 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2584 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2585 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2586 					break;
2587 				case 4:
2588 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2590 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2591 							 TILE_SPLIT(split_equal_to_row_size));
2592 					break;
2593 				case 5:
2594 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2595 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2596 					break;
2597 				case 6:
2598 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2599 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2600 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2601 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2602 					break;
2603 				case 7:
2604 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2605 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2606 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2607 							 TILE_SPLIT(split_equal_to_row_size));
2608 					break;
2609 				case 8:
2610 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2611 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
2612 					break;
2613 				case 9:
2614 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2615 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2616 					break;
2617 				case 10:
2618 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2620 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2621 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2622 					break;
2623 				case 11:
2624 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2625 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2626 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2627 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2628 					break;
2629 				case 12:
2630 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2631 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2632 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2634 					break;
2635 				case 13:
2636 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2637 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2638 					break;
2639 				case 14:
2640 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2642 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2644 					break;
2645 				case 16:
2646 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2647 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2648 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2649 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2650 					break;
2651 				case 17:
2652 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2653 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2654 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2655 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2656 					break;
2657 				case 27:
2658 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2659 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2660 					break;
2661 				case 28:
2662 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2663 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2664 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2665 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2666 					break;
2667 				case 29:
2668 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2669 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2670 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2671 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2672 					break;
2673 				case 30:
2674 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2675 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2676 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2677 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2678 					break;
2679 				default:
2680 					gb_tile_moden = 0;
2681 					break;
2682 				}
2683 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2684 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2685 			}
2686 		}
2687 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2688 			switch (reg_offset) {
2689 			case 0:
2690 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2692 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2693 						 NUM_BANKS(ADDR_SURF_16_BANK));
2694 				break;
2695 			case 1:
2696 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2697 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2698 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2699 						 NUM_BANKS(ADDR_SURF_16_BANK));
2700 				break;
2701 			case 2:
2702 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2703 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2704 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2705 						 NUM_BANKS(ADDR_SURF_16_BANK));
2706 				break;
2707 			case 3:
2708 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2709 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2710 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2711 						 NUM_BANKS(ADDR_SURF_16_BANK));
2712 				break;
2713 			case 4:
2714 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2715 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2716 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2717 						 NUM_BANKS(ADDR_SURF_16_BANK));
2718 				break;
2719 			case 5:
2720 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2721 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2722 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2723 						 NUM_BANKS(ADDR_SURF_8_BANK));
2724 				break;
2725 			case 6:
2726 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2727 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2728 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2729 						 NUM_BANKS(ADDR_SURF_4_BANK));
2730 				break;
2731 			case 8:
2732 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2733 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2734 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2735 						 NUM_BANKS(ADDR_SURF_16_BANK));
2736 				break;
2737 			case 9:
2738 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2739 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2740 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2741 						 NUM_BANKS(ADDR_SURF_16_BANK));
2742 				break;
2743 			case 10:
2744 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2745 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2746 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2747 						 NUM_BANKS(ADDR_SURF_16_BANK));
2748 				break;
2749 			case 11:
2750 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2751 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2752 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2753 						 NUM_BANKS(ADDR_SURF_16_BANK));
2754 				break;
2755 			case 12:
2756 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2757 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2758 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2759 						 NUM_BANKS(ADDR_SURF_16_BANK));
2760 				break;
2761 			case 13:
2762 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2763 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2764 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2765 						 NUM_BANKS(ADDR_SURF_8_BANK));
2766 				break;
2767 			case 14:
2768 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2769 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2770 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
2771 						 NUM_BANKS(ADDR_SURF_4_BANK));
2772 				break;
2773 			default:
2774 				gb_tile_moden = 0;
2775 				break;
2776 			}
2777 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2778 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2779 		}
2780 	} else if (num_pipe_configs == 2) {
2781 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2782 			switch (reg_offset) {
2783 			case 0:
2784 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2785 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2786 						 PIPE_CONFIG(ADDR_SURF_P2) |
2787 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
2788 				break;
2789 			case 1:
2790 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2791 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2792 						 PIPE_CONFIG(ADDR_SURF_P2) |
2793 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
2794 				break;
2795 			case 2:
2796 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2797 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2798 						 PIPE_CONFIG(ADDR_SURF_P2) |
2799 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2800 				break;
2801 			case 3:
2802 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2803 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2804 						 PIPE_CONFIG(ADDR_SURF_P2) |
2805 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
2806 				break;
2807 			case 4:
2808 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2809 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2810 						 PIPE_CONFIG(ADDR_SURF_P2) |
2811 						 TILE_SPLIT(split_equal_to_row_size));
2812 				break;
2813 			case 5:
2814 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2815 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
2816 				break;
2817 			case 6:
2818 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2819 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2820 						 PIPE_CONFIG(ADDR_SURF_P2) |
2821 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
2822 				break;
2823 			case 7:
2824 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2825 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
2826 						 PIPE_CONFIG(ADDR_SURF_P2) |
2827 						 TILE_SPLIT(split_equal_to_row_size));
2828 				break;
2829 			case 8:
2830 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
2831 				break;
2832 			case 9:
2833 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2834 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
2835 				break;
2836 			case 10:
2837 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2839 						 PIPE_CONFIG(ADDR_SURF_P2) |
2840 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2841 				break;
2842 			case 11:
2843 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2844 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2845 						 PIPE_CONFIG(ADDR_SURF_P2) |
2846 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2847 				break;
2848 			case 12:
2849 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2850 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
2851 						 PIPE_CONFIG(ADDR_SURF_P2) |
2852 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2853 				break;
2854 			case 13:
2855 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2856 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
2857 				break;
2858 			case 14:
2859 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2860 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2861 						 PIPE_CONFIG(ADDR_SURF_P2) |
2862 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2863 				break;
2864 			case 16:
2865 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2866 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2867 						 PIPE_CONFIG(ADDR_SURF_P2) |
2868 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2869 				break;
2870 			case 17:
2871 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2872 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
2873 						 PIPE_CONFIG(ADDR_SURF_P2) |
2874 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2875 				break;
2876 			case 27:
2877 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2878 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
2879 				break;
2880 			case 28:
2881 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2882 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2883 						 PIPE_CONFIG(ADDR_SURF_P2) |
2884 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2885 				break;
2886 			case 29:
2887 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
2888 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2889 						 PIPE_CONFIG(ADDR_SURF_P2) |
2890 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2891 				break;
2892 			case 30:
2893 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
2894 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
2895 						 PIPE_CONFIG(ADDR_SURF_P2) |
2896 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
2897 				break;
2898 			default:
2899 				gb_tile_moden = 0;
2900 				break;
2901 			}
2902 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
2903 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2904 		}
2905 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
2906 			switch (reg_offset) {
2907 			case 0:
2908 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2909 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2910 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2911 						 NUM_BANKS(ADDR_SURF_16_BANK));
2912 				break;
2913 			case 1:
2914 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2915 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2916 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2917 						 NUM_BANKS(ADDR_SURF_16_BANK));
2918 				break;
2919 			case 2:
2920 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2921 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2922 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2923 						 NUM_BANKS(ADDR_SURF_16_BANK));
2924 				break;
2925 			case 3:
2926 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2927 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2928 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2929 						 NUM_BANKS(ADDR_SURF_16_BANK));
2930 				break;
2931 			case 4:
2932 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2933 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2934 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2935 						 NUM_BANKS(ADDR_SURF_16_BANK));
2936 				break;
2937 			case 5:
2938 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2939 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2940 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2941 						 NUM_BANKS(ADDR_SURF_16_BANK));
2942 				break;
2943 			case 6:
2944 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2945 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2946 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2947 						 NUM_BANKS(ADDR_SURF_8_BANK));
2948 				break;
2949 			case 8:
2950 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2951 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
2952 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2953 						 NUM_BANKS(ADDR_SURF_16_BANK));
2954 				break;
2955 			case 9:
2956 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
2957 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2958 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2959 						 NUM_BANKS(ADDR_SURF_16_BANK));
2960 				break;
2961 			case 10:
2962 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2963 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2964 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2965 						 NUM_BANKS(ADDR_SURF_16_BANK));
2966 				break;
2967 			case 11:
2968 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2969 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2970 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2971 						 NUM_BANKS(ADDR_SURF_16_BANK));
2972 				break;
2973 			case 12:
2974 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2975 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2976 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2977 						 NUM_BANKS(ADDR_SURF_16_BANK));
2978 				break;
2979 			case 13:
2980 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2981 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2982 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
2983 						 NUM_BANKS(ADDR_SURF_16_BANK));
2984 				break;
2985 			case 14:
2986 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2987 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2988 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
2989 						 NUM_BANKS(ADDR_SURF_8_BANK));
2990 				break;
2991 			default:
2992 				gb_tile_moden = 0;
2993 				break;
2994 			}
2995 			rdev->config.cik.macrotile_mode_array[reg_offset] = gb_tile_moden;
2996 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2997 		}
2998 	} else
2999 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
3000 }
3001 
3002 /**
3003  * cik_select_se_sh - select which SE, SH to address
3004  *
3005  * @rdev: radeon_device pointer
3006  * @se_num: shader engine to address
3007  * @sh_num: sh block to address
3008  *
3009  * Select which SE, SH combinations to address. Certain
3010  * registers are instanced per SE or SH.  0xffffffff means
3011  * broadcast to all SEs or SHs (CIK).
3012  */
3013 static void cik_select_se_sh(struct radeon_device *rdev,
3014 			     u32 se_num, u32 sh_num)
3015 {
3016 	u32 data = INSTANCE_BROADCAST_WRITES;
3017 
3018 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
3019 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
3020 	else if (se_num == 0xffffffff)
3021 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
3022 	else if (sh_num == 0xffffffff)
3023 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
3024 	else
3025 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
3026 	WREG32(GRBM_GFX_INDEX, data);
3027 }
3028 
3029 /**
3030  * cik_create_bitmask - create a bitmask
3031  *
3032  * @bit_width: length of the mask
3033  *
3034  * create a variable length bit mask (CIK).
3035  * Returns the bitmask.
3036  */
3037 static u32 cik_create_bitmask(u32 bit_width)
3038 {
3039 	u32 i, mask = 0;
3040 
3041 	for (i = 0; i < bit_width; i++) {
3042 		mask <<= 1;
3043 		mask |= 1;
3044 	}
3045 	return mask;
3046 }
3047 
3048 /**
3049  * cik_select_se_sh - select which SE, SH to address
3050  *
3051  * @rdev: radeon_device pointer
3052  * @max_rb_num: max RBs (render backends) for the asic
3053  * @se_num: number of SEs (shader engines) for the asic
3054  * @sh_per_se: number of SH blocks per SE for the asic
3055  *
3056  * Calculates the bitmask of disabled RBs (CIK).
3057  * Returns the disabled RB bitmask.
3058  */
3059 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
3060 			      u32 max_rb_num_per_se,
3061 			      u32 sh_per_se)
3062 {
3063 	u32 data, mask;
3064 
3065 	data = RREG32(CC_RB_BACKEND_DISABLE);
3066 	if (data & 1)
3067 		data &= BACKEND_DISABLE_MASK;
3068 	else
3069 		data = 0;
3070 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3071 
3072 	data >>= BACKEND_DISABLE_SHIFT;
3073 
3074 	mask = cik_create_bitmask(max_rb_num_per_se / sh_per_se);
3075 
3076 	return data & mask;
3077 }
3078 
3079 /**
3080  * cik_setup_rb - setup the RBs on the asic
3081  *
3082  * @rdev: radeon_device pointer
3083  * @se_num: number of SEs (shader engines) for the asic
3084  * @sh_per_se: number of SH blocks per SE for the asic
3085  * @max_rb_num: max RBs (render backends) for the asic
3086  *
3087  * Configures per-SE/SH RB registers (CIK).
3088  */
3089 static void cik_setup_rb(struct radeon_device *rdev,
3090 			 u32 se_num, u32 sh_per_se,
3091 			 u32 max_rb_num_per_se)
3092 {
3093 	int i, j;
3094 	u32 data, mask;
3095 	u32 disabled_rbs = 0;
3096 	u32 enabled_rbs = 0;
3097 
3098 	for (i = 0; i < se_num; i++) {
3099 		for (j = 0; j < sh_per_se; j++) {
3100 			cik_select_se_sh(rdev, i, j);
3101 			data = cik_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3102 			if (rdev->family == CHIP_HAWAII)
3103 				disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
3104 			else
3105 				disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
3106 		}
3107 	}
3108 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3109 
3110 	mask = 1;
3111 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3112 		if (!(disabled_rbs & mask))
3113 			enabled_rbs |= mask;
3114 		mask <<= 1;
3115 	}
3116 
3117 	rdev->config.cik.backend_enable_mask = enabled_rbs;
3118 
3119 	for (i = 0; i < se_num; i++) {
3120 		cik_select_se_sh(rdev, i, 0xffffffff);
3121 		data = 0;
3122 		for (j = 0; j < sh_per_se; j++) {
3123 			switch (enabled_rbs & 3) {
3124 			case 0:
3125 				if (j == 0)
3126 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_3);
3127 				else
3128 					data |= PKR_MAP(RASTER_CONFIG_RB_MAP_0);
3129 				break;
3130 			case 1:
3131 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3132 				break;
3133 			case 2:
3134 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3135 				break;
3136 			case 3:
3137 			default:
3138 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3139 				break;
3140 			}
3141 			enabled_rbs >>= 2;
3142 		}
3143 		WREG32(PA_SC_RASTER_CONFIG, data);
3144 	}
3145 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3146 }
3147 
3148 /**
3149  * cik_gpu_init - setup the 3D engine
3150  *
3151  * @rdev: radeon_device pointer
3152  *
3153  * Configures the 3D engine and tiling configuration
3154  * registers so that the 3D engine is usable.
3155  */
3156 static void cik_gpu_init(struct radeon_device *rdev)
3157 {
3158 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
3159 	u32 mc_shared_chmap, mc_arb_ramcfg;
3160 	u32 hdp_host_path_cntl;
3161 	u32 tmp;
3162 	int i, j;
3163 
3164 	switch (rdev->family) {
3165 	case CHIP_BONAIRE:
3166 		rdev->config.cik.max_shader_engines = 2;
3167 		rdev->config.cik.max_tile_pipes = 4;
3168 		rdev->config.cik.max_cu_per_sh = 7;
3169 		rdev->config.cik.max_sh_per_se = 1;
3170 		rdev->config.cik.max_backends_per_se = 2;
3171 		rdev->config.cik.max_texture_channel_caches = 4;
3172 		rdev->config.cik.max_gprs = 256;
3173 		rdev->config.cik.max_gs_threads = 32;
3174 		rdev->config.cik.max_hw_contexts = 8;
3175 
3176 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3177 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3178 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3179 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3180 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3181 		break;
3182 	case CHIP_HAWAII:
3183 		rdev->config.cik.max_shader_engines = 4;
3184 		rdev->config.cik.max_tile_pipes = 16;
3185 		rdev->config.cik.max_cu_per_sh = 11;
3186 		rdev->config.cik.max_sh_per_se = 1;
3187 		rdev->config.cik.max_backends_per_se = 4;
3188 		rdev->config.cik.max_texture_channel_caches = 16;
3189 		rdev->config.cik.max_gprs = 256;
3190 		rdev->config.cik.max_gs_threads = 32;
3191 		rdev->config.cik.max_hw_contexts = 8;
3192 
3193 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3194 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3195 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3196 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3197 		gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
3198 		break;
3199 	case CHIP_KAVERI:
3200 		rdev->config.cik.max_shader_engines = 1;
3201 		rdev->config.cik.max_tile_pipes = 4;
3202 		if ((rdev->pdev->device == 0x1304) ||
3203 		    (rdev->pdev->device == 0x1305) ||
3204 		    (rdev->pdev->device == 0x130C) ||
3205 		    (rdev->pdev->device == 0x130F) ||
3206 		    (rdev->pdev->device == 0x1310) ||
3207 		    (rdev->pdev->device == 0x1311) ||
3208 		    (rdev->pdev->device == 0x131C)) {
3209 			rdev->config.cik.max_cu_per_sh = 8;
3210 			rdev->config.cik.max_backends_per_se = 2;
3211 		} else if ((rdev->pdev->device == 0x1309) ||
3212 			   (rdev->pdev->device == 0x130A) ||
3213 			   (rdev->pdev->device == 0x130D) ||
3214 			   (rdev->pdev->device == 0x1313) ||
3215 			   (rdev->pdev->device == 0x131D)) {
3216 			rdev->config.cik.max_cu_per_sh = 6;
3217 			rdev->config.cik.max_backends_per_se = 2;
3218 		} else if ((rdev->pdev->device == 0x1306) ||
3219 			   (rdev->pdev->device == 0x1307) ||
3220 			   (rdev->pdev->device == 0x130B) ||
3221 			   (rdev->pdev->device == 0x130E) ||
3222 			   (rdev->pdev->device == 0x1315) ||
3223 			   (rdev->pdev->device == 0x131B)) {
3224 			rdev->config.cik.max_cu_per_sh = 4;
3225 			rdev->config.cik.max_backends_per_se = 1;
3226 		} else {
3227 			rdev->config.cik.max_cu_per_sh = 3;
3228 			rdev->config.cik.max_backends_per_se = 1;
3229 		}
3230 		rdev->config.cik.max_sh_per_se = 1;
3231 		rdev->config.cik.max_texture_channel_caches = 4;
3232 		rdev->config.cik.max_gprs = 256;
3233 		rdev->config.cik.max_gs_threads = 16;
3234 		rdev->config.cik.max_hw_contexts = 8;
3235 
3236 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3237 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3238 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3239 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3240 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3241 		break;
3242 	case CHIP_KABINI:
3243 	default:
3244 		rdev->config.cik.max_shader_engines = 1;
3245 		rdev->config.cik.max_tile_pipes = 2;
3246 		rdev->config.cik.max_cu_per_sh = 2;
3247 		rdev->config.cik.max_sh_per_se = 1;
3248 		rdev->config.cik.max_backends_per_se = 1;
3249 		rdev->config.cik.max_texture_channel_caches = 2;
3250 		rdev->config.cik.max_gprs = 256;
3251 		rdev->config.cik.max_gs_threads = 16;
3252 		rdev->config.cik.max_hw_contexts = 8;
3253 
3254 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
3255 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
3256 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
3257 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
3258 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
3259 		break;
3260 	}
3261 
3262 	/* Initialize HDP */
3263 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3264 		WREG32((0x2c14 + j), 0x00000000);
3265 		WREG32((0x2c18 + j), 0x00000000);
3266 		WREG32((0x2c1c + j), 0x00000000);
3267 		WREG32((0x2c20 + j), 0x00000000);
3268 		WREG32((0x2c24 + j), 0x00000000);
3269 	}
3270 
3271 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3272 
3273 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3274 
3275 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3276 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3277 
3278 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
3279 	rdev->config.cik.mem_max_burst_length_bytes = 256;
3280 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3281 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3282 	if (rdev->config.cik.mem_row_size_in_kb > 4)
3283 		rdev->config.cik.mem_row_size_in_kb = 4;
3284 	/* XXX use MC settings? */
3285 	rdev->config.cik.shader_engine_tile_size = 32;
3286 	rdev->config.cik.num_gpus = 1;
3287 	rdev->config.cik.multi_gpu_tile_size = 64;
3288 
3289 	/* fix up row size */
3290 	gb_addr_config &= ~ROW_SIZE_MASK;
3291 	switch (rdev->config.cik.mem_row_size_in_kb) {
3292 	case 1:
3293 	default:
3294 		gb_addr_config |= ROW_SIZE(0);
3295 		break;
3296 	case 2:
3297 		gb_addr_config |= ROW_SIZE(1);
3298 		break;
3299 	case 4:
3300 		gb_addr_config |= ROW_SIZE(2);
3301 		break;
3302 	}
3303 
3304 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3305 	 * not have bank info, so create a custom tiling dword.
3306 	 * bits 3:0   num_pipes
3307 	 * bits 7:4   num_banks
3308 	 * bits 11:8  group_size
3309 	 * bits 15:12 row_size
3310 	 */
3311 	rdev->config.cik.tile_config = 0;
3312 	switch (rdev->config.cik.num_tile_pipes) {
3313 	case 1:
3314 		rdev->config.cik.tile_config |= (0 << 0);
3315 		break;
3316 	case 2:
3317 		rdev->config.cik.tile_config |= (1 << 0);
3318 		break;
3319 	case 4:
3320 		rdev->config.cik.tile_config |= (2 << 0);
3321 		break;
3322 	case 8:
3323 	default:
3324 		/* XXX what about 12? */
3325 		rdev->config.cik.tile_config |= (3 << 0);
3326 		break;
3327 	}
3328 	rdev->config.cik.tile_config |=
3329 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
3330 	rdev->config.cik.tile_config |=
3331 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3332 	rdev->config.cik.tile_config |=
3333 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3334 
3335 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3336 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3337 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3338 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
3339 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
3340 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3341 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3342 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3343 
3344 	cik_tiling_mode_table_init(rdev);
3345 
3346 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
3347 		     rdev->config.cik.max_sh_per_se,
3348 		     rdev->config.cik.max_backends_per_se);
3349 
3350 	/* set HW defaults for 3D engine */
3351 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3352 
3353 	WREG32(SX_DEBUG_1, 0x20);
3354 
3355 	WREG32(TA_CNTL_AUX, 0x00010000);
3356 
3357 	tmp = RREG32(SPI_CONFIG_CNTL);
3358 	tmp |= 0x03000000;
3359 	WREG32(SPI_CONFIG_CNTL, tmp);
3360 
3361 	WREG32(SQ_CONFIG, 1);
3362 
3363 	WREG32(DB_DEBUG, 0);
3364 
3365 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
3366 	tmp |= 0x00000400;
3367 	WREG32(DB_DEBUG2, tmp);
3368 
3369 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
3370 	tmp |= 0x00020200;
3371 	WREG32(DB_DEBUG3, tmp);
3372 
3373 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
3374 	tmp |= 0x00018208;
3375 	WREG32(CB_HW_CONTROL, tmp);
3376 
3377 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3378 
3379 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
3380 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
3381 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
3382 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
3383 
3384 	WREG32(VGT_NUM_INSTANCES, 1);
3385 
3386 	WREG32(CP_PERFMON_CNTL, 0);
3387 
3388 	WREG32(SQ_CONFIG, 0);
3389 
3390 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3391 					  FORCE_EOV_MAX_REZ_CNT(255)));
3392 
3393 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3394 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3395 
3396 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3397 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3398 
3399 	tmp = RREG32(HDP_MISC_CNTL);
3400 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3401 	WREG32(HDP_MISC_CNTL, tmp);
3402 
3403 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3404 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3405 
3406 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3407 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
3408 
3409 	udelay(50);
3410 }
3411 
3412 /*
3413  * GPU scratch registers helpers function.
3414  */
3415 /**
3416  * cik_scratch_init - setup driver info for CP scratch regs
3417  *
3418  * @rdev: radeon_device pointer
3419  *
3420  * Set up the number and offset of the CP scratch registers.
3421  * NOTE: use of CP scratch registers is a legacy inferface and
3422  * is not used by default on newer asics (r6xx+).  On newer asics,
3423  * memory buffers are used for fences rather than scratch regs.
3424  */
3425 static void cik_scratch_init(struct radeon_device *rdev)
3426 {
3427 	int i;
3428 
3429 	rdev->scratch.num_reg = 7;
3430 	rdev->scratch.reg_base = SCRATCH_REG0;
3431 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3432 		rdev->scratch.free[i] = true;
3433 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3434 	}
3435 }
3436 
3437 /**
3438  * cik_ring_test - basic gfx ring test
3439  *
3440  * @rdev: radeon_device pointer
3441  * @ring: radeon_ring structure holding ring information
3442  *
3443  * Allocate a scratch register and write to it using the gfx ring (CIK).
3444  * Provides a basic gfx ring test to verify that the ring is working.
3445  * Used by cik_cp_gfx_resume();
3446  * Returns 0 on success, error on failure.
3447  */
3448 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
3449 {
3450 	uint32_t scratch;
3451 	uint32_t tmp = 0;
3452 	unsigned i;
3453 	int r;
3454 
3455 	r = radeon_scratch_get(rdev, &scratch);
3456 	if (r) {
3457 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
3458 		return r;
3459 	}
3460 	WREG32(scratch, 0xCAFEDEAD);
3461 	r = radeon_ring_lock(rdev, ring, 3);
3462 	if (r) {
3463 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
3464 		radeon_scratch_free(rdev, scratch);
3465 		return r;
3466 	}
3467 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3468 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
3469 	radeon_ring_write(ring, 0xDEADBEEF);
3470 	radeon_ring_unlock_commit(rdev, ring);
3471 
3472 	for (i = 0; i < rdev->usec_timeout; i++) {
3473 		tmp = RREG32(scratch);
3474 		if (tmp == 0xDEADBEEF)
3475 			break;
3476 		DRM_UDELAY(1);
3477 	}
3478 	if (i < rdev->usec_timeout) {
3479 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3480 	} else {
3481 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
3482 			  ring->idx, scratch, tmp);
3483 		r = -EINVAL;
3484 	}
3485 	radeon_scratch_free(rdev, scratch);
3486 	return r;
3487 }
3488 
3489 /**
3490  * cik_hdp_flush_cp_ring_emit - emit an hdp flush on the cp
3491  *
3492  * @rdev: radeon_device pointer
3493  * @ridx: radeon ring index
3494  *
3495  * Emits an hdp flush on the cp.
3496  */
3497 static void cik_hdp_flush_cp_ring_emit(struct radeon_device *rdev,
3498 				       int ridx)
3499 {
3500 	struct radeon_ring *ring = &rdev->ring[ridx];
3501 	u32 ref_and_mask;
3502 
3503 	switch (ring->idx) {
3504 	case CAYMAN_RING_TYPE_CP1_INDEX:
3505 	case CAYMAN_RING_TYPE_CP2_INDEX:
3506 	default:
3507 		switch (ring->me) {
3508 		case 0:
3509 			ref_and_mask = CP2 << ring->pipe;
3510 			break;
3511 		case 1:
3512 			ref_and_mask = CP6 << ring->pipe;
3513 			break;
3514 		default:
3515 			return;
3516 		}
3517 		break;
3518 	case RADEON_RING_TYPE_GFX_INDEX:
3519 		ref_and_mask = CP0;
3520 		break;
3521 	}
3522 
3523 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
3524 	radeon_ring_write(ring, (WAIT_REG_MEM_OPERATION(1) | /* write, wait, write */
3525 				 WAIT_REG_MEM_FUNCTION(3) |  /* == */
3526 				 WAIT_REG_MEM_ENGINE(1)));   /* pfp */
3527 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ >> 2);
3528 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE >> 2);
3529 	radeon_ring_write(ring, ref_and_mask);
3530 	radeon_ring_write(ring, ref_and_mask);
3531 	radeon_ring_write(ring, 0x20); /* poll interval */
3532 }
3533 
3534 /**
3535  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
3536  *
3537  * @rdev: radeon_device pointer
3538  * @fence: radeon fence object
3539  *
3540  * Emits a fence sequnce number on the gfx ring and flushes
3541  * GPU caches.
3542  */
3543 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
3544 			     struct radeon_fence *fence)
3545 {
3546 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3547 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3548 
3549 	/* EVENT_WRITE_EOP - flush caches, send int */
3550 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3551 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3552 				 EOP_TC_ACTION_EN |
3553 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3554 				 EVENT_INDEX(5)));
3555 	radeon_ring_write(ring, addr & 0xfffffffc);
3556 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
3557 	radeon_ring_write(ring, fence->seq);
3558 	radeon_ring_write(ring, 0);
3559 	/* HDP flush */
3560 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3561 }
3562 
3563 /**
3564  * cik_fence_compute_ring_emit - emit a fence on the compute ring
3565  *
3566  * @rdev: radeon_device pointer
3567  * @fence: radeon fence object
3568  *
3569  * Emits a fence sequnce number on the compute ring and flushes
3570  * GPU caches.
3571  */
3572 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
3573 				 struct radeon_fence *fence)
3574 {
3575 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3576 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3577 
3578 	/* RELEASE_MEM - flush caches, send int */
3579 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
3580 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
3581 				 EOP_TC_ACTION_EN |
3582 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
3583 				 EVENT_INDEX(5)));
3584 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
3585 	radeon_ring_write(ring, addr & 0xfffffffc);
3586 	radeon_ring_write(ring, upper_32_bits(addr));
3587 	radeon_ring_write(ring, fence->seq);
3588 	radeon_ring_write(ring, 0);
3589 	/* HDP flush */
3590 	cik_hdp_flush_cp_ring_emit(rdev, fence->ring);
3591 }
3592 
3593 bool cik_semaphore_ring_emit(struct radeon_device *rdev,
3594 			     struct radeon_ring *ring,
3595 			     struct radeon_semaphore *semaphore,
3596 			     bool emit_wait)
3597 {
3598 	uint64_t addr = semaphore->gpu_addr;
3599 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
3600 
3601 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
3602 	radeon_ring_write(ring, addr & 0xffffffff);
3603 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
3604 
3605 	return true;
3606 }
3607 
3608 /**
3609  * cik_copy_cpdma - copy pages using the CP DMA engine
3610  *
3611  * @rdev: radeon_device pointer
3612  * @src_offset: src GPU address
3613  * @dst_offset: dst GPU address
3614  * @num_gpu_pages: number of GPU pages to xfer
3615  * @fence: radeon fence object
3616  *
3617  * Copy GPU paging using the CP DMA engine (CIK+).
3618  * Used by the radeon ttm implementation to move pages if
3619  * registered as the asic copy callback.
3620  */
3621 int cik_copy_cpdma(struct radeon_device *rdev,
3622 		   uint64_t src_offset, uint64_t dst_offset,
3623 		   unsigned num_gpu_pages,
3624 		   struct radeon_fence **fence)
3625 {
3626 	struct radeon_semaphore *sem = NULL;
3627 	int ring_index = rdev->asic->copy.blit_ring_index;
3628 	struct radeon_ring *ring = &rdev->ring[ring_index];
3629 	u32 size_in_bytes, cur_size_in_bytes, control;
3630 	int i, num_loops;
3631 	int r = 0;
3632 
3633 	r = radeon_semaphore_create(rdev, &sem);
3634 	if (r) {
3635 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3636 		return r;
3637 	}
3638 
3639 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3640 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3641 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 18);
3642 	if (r) {
3643 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3644 		radeon_semaphore_free(rdev, &sem, NULL);
3645 		return r;
3646 	}
3647 
3648 	radeon_semaphore_sync_to(sem, *fence);
3649 	radeon_semaphore_sync_rings(rdev, sem, ring->idx);
3650 
3651 	for (i = 0; i < num_loops; i++) {
3652 		cur_size_in_bytes = size_in_bytes;
3653 		if (cur_size_in_bytes > 0x1fffff)
3654 			cur_size_in_bytes = 0x1fffff;
3655 		size_in_bytes -= cur_size_in_bytes;
3656 		control = 0;
3657 		if (size_in_bytes == 0)
3658 			control |= PACKET3_DMA_DATA_CP_SYNC;
3659 		radeon_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5));
3660 		radeon_ring_write(ring, control);
3661 		radeon_ring_write(ring, lower_32_bits(src_offset));
3662 		radeon_ring_write(ring, upper_32_bits(src_offset));
3663 		radeon_ring_write(ring, lower_32_bits(dst_offset));
3664 		radeon_ring_write(ring, upper_32_bits(dst_offset));
3665 		radeon_ring_write(ring, cur_size_in_bytes);
3666 		src_offset += cur_size_in_bytes;
3667 		dst_offset += cur_size_in_bytes;
3668 	}
3669 
3670 	r = radeon_fence_emit(rdev, fence, ring->idx);
3671 	if (r) {
3672 		radeon_ring_unlock_undo(rdev, ring);
3673 		return r;
3674 	}
3675 
3676 	radeon_ring_unlock_commit(rdev, ring);
3677 	radeon_semaphore_free(rdev, &sem, *fence);
3678 
3679 	return r;
3680 }
3681 
3682 /*
3683  * IB stuff
3684  */
3685 /**
3686  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
3687  *
3688  * @rdev: radeon_device pointer
3689  * @ib: radeon indirect buffer object
3690  *
3691  * Emits an DE (drawing engine) or CE (constant engine) IB
3692  * on the gfx ring.  IBs are usually generated by userspace
3693  * acceleration drivers and submitted to the kernel for
3694  * sheduling on the ring.  This function schedules the IB
3695  * on the gfx ring for execution by the GPU.
3696  */
3697 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3698 {
3699 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3700 	u32 header, control = INDIRECT_BUFFER_VALID;
3701 
3702 	if (ib->is_const_ib) {
3703 		/* set switch buffer packet before const IB */
3704 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3705 		radeon_ring_write(ring, 0);
3706 
3707 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3708 	} else {
3709 		u32 next_rptr;
3710 		if (ring->rptr_save_reg) {
3711 			next_rptr = ring->wptr + 3 + 4;
3712 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
3713 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3714 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
3715 			radeon_ring_write(ring, next_rptr);
3716 		} else if (rdev->wb.enabled) {
3717 			next_rptr = ring->wptr + 5 + 4;
3718 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3719 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
3720 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3721 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3722 			radeon_ring_write(ring, next_rptr);
3723 		}
3724 
3725 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3726 	}
3727 
3728 	control |= ib->length_dw |
3729 		(ib->vm ? (ib->vm->id << 24) : 0);
3730 
3731 	radeon_ring_write(ring, header);
3732 	radeon_ring_write(ring,
3733 #ifdef __BIG_ENDIAN
3734 			  (2 << 0) |
3735 #endif
3736 			  (ib->gpu_addr & 0xFFFFFFFC));
3737 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3738 	radeon_ring_write(ring, control);
3739 }
3740 
3741 /**
3742  * cik_ib_test - basic gfx ring IB test
3743  *
3744  * @rdev: radeon_device pointer
3745  * @ring: radeon_ring structure holding ring information
3746  *
3747  * Allocate an IB and execute it on the gfx ring (CIK).
3748  * Provides a basic gfx ring test to verify that IBs are working.
3749  * Returns 0 on success, error on failure.
3750  */
3751 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3752 {
3753 	struct radeon_ib ib;
3754 	uint32_t scratch;
3755 	uint32_t tmp = 0;
3756 	unsigned i;
3757 	int r;
3758 
3759 	r = radeon_scratch_get(rdev, &scratch);
3760 	if (r) {
3761 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
3762 		return r;
3763 	}
3764 	WREG32(scratch, 0xCAFEDEAD);
3765 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3766 	if (r) {
3767 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3768 		radeon_scratch_free(rdev, scratch);
3769 		return r;
3770 	}
3771 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
3772 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
3773 	ib.ptr[2] = 0xDEADBEEF;
3774 	ib.length_dw = 3;
3775 	r = radeon_ib_schedule(rdev, &ib, NULL);
3776 	if (r) {
3777 		radeon_scratch_free(rdev, scratch);
3778 		radeon_ib_free(rdev, &ib);
3779 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3780 		return r;
3781 	}
3782 	r = radeon_fence_wait(ib.fence, false);
3783 	if (r) {
3784 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3785 		radeon_scratch_free(rdev, scratch);
3786 		radeon_ib_free(rdev, &ib);
3787 		return r;
3788 	}
3789 	for (i = 0; i < rdev->usec_timeout; i++) {
3790 		tmp = RREG32(scratch);
3791 		if (tmp == 0xDEADBEEF)
3792 			break;
3793 		DRM_UDELAY(1);
3794 	}
3795 	if (i < rdev->usec_timeout) {
3796 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3797 	} else {
3798 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
3799 			  scratch, tmp);
3800 		r = -EINVAL;
3801 	}
3802 	radeon_scratch_free(rdev, scratch);
3803 	radeon_ib_free(rdev, &ib);
3804 	return r;
3805 }
3806 
3807 /*
3808  * CP.
3809  * On CIK, gfx and compute now have independant command processors.
3810  *
3811  * GFX
3812  * Gfx consists of a single ring and can process both gfx jobs and
3813  * compute jobs.  The gfx CP consists of three microengines (ME):
3814  * PFP - Pre-Fetch Parser
3815  * ME - Micro Engine
3816  * CE - Constant Engine
3817  * The PFP and ME make up what is considered the Drawing Engine (DE).
3818  * The CE is an asynchronous engine used for updating buffer desciptors
3819  * used by the DE so that they can be loaded into cache in parallel
3820  * while the DE is processing state update packets.
3821  *
3822  * Compute
3823  * The compute CP consists of two microengines (ME):
3824  * MEC1 - Compute MicroEngine 1
3825  * MEC2 - Compute MicroEngine 2
3826  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
3827  * The queues are exposed to userspace and are programmed directly
3828  * by the compute runtime.
3829  */
3830 /**
3831  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
3832  *
3833  * @rdev: radeon_device pointer
3834  * @enable: enable or disable the MEs
3835  *
3836  * Halts or unhalts the gfx MEs.
3837  */
3838 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
3839 {
3840 	if (enable)
3841 		WREG32(CP_ME_CNTL, 0);
3842 	else {
3843 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3844 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3845 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3846 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3847 	}
3848 	udelay(50);
3849 }
3850 
3851 /**
3852  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
3853  *
3854  * @rdev: radeon_device pointer
3855  *
3856  * Loads the gfx PFP, ME, and CE ucode.
3857  * Returns 0 for success, -EINVAL if the ucode is not available.
3858  */
3859 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
3860 {
3861 	const __be32 *fw_data;
3862 	int i;
3863 
3864 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3865 		return -EINVAL;
3866 
3867 	cik_cp_gfx_enable(rdev, false);
3868 
3869 	/* PFP */
3870 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3871 	WREG32(CP_PFP_UCODE_ADDR, 0);
3872 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
3873 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3874 	WREG32(CP_PFP_UCODE_ADDR, 0);
3875 
3876 	/* CE */
3877 	fw_data = (const __be32 *)rdev->ce_fw->data;
3878 	WREG32(CP_CE_UCODE_ADDR, 0);
3879 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
3880 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3881 	WREG32(CP_CE_UCODE_ADDR, 0);
3882 
3883 	/* ME */
3884 	fw_data = (const __be32 *)rdev->me_fw->data;
3885 	WREG32(CP_ME_RAM_WADDR, 0);
3886 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
3887 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3888 	WREG32(CP_ME_RAM_WADDR, 0);
3889 
3890 	WREG32(CP_PFP_UCODE_ADDR, 0);
3891 	WREG32(CP_CE_UCODE_ADDR, 0);
3892 	WREG32(CP_ME_RAM_WADDR, 0);
3893 	WREG32(CP_ME_RAM_RADDR, 0);
3894 	return 0;
3895 }
3896 
3897 /**
3898  * cik_cp_gfx_start - start the gfx ring
3899  *
3900  * @rdev: radeon_device pointer
3901  *
3902  * Enables the ring and loads the clear state context and other
3903  * packets required to init the ring.
3904  * Returns 0 for success, error for failure.
3905  */
3906 static int cik_cp_gfx_start(struct radeon_device *rdev)
3907 {
3908 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3909 	int r, i;
3910 
3911 	/* init the CP */
3912 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
3913 	WREG32(CP_ENDIAN_SWAP, 0);
3914 	WREG32(CP_DEVICE_ID, 1);
3915 
3916 	cik_cp_gfx_enable(rdev, true);
3917 
3918 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
3919 	if (r) {
3920 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3921 		return r;
3922 	}
3923 
3924 	/* init the CE partitions.  CE only used for gfx on CIK */
3925 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3926 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3927 	radeon_ring_write(ring, 0xc000);
3928 	radeon_ring_write(ring, 0xc000);
3929 
3930 	/* setup clear context state */
3931 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3932 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3933 
3934 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
3935 	radeon_ring_write(ring, 0x80000000);
3936 	radeon_ring_write(ring, 0x80000000);
3937 
3938 	for (i = 0; i < cik_default_size; i++)
3939 		radeon_ring_write(ring, cik_default_state[i]);
3940 
3941 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3942 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3943 
3944 	/* set clear context state */
3945 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3946 	radeon_ring_write(ring, 0);
3947 
3948 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3949 	radeon_ring_write(ring, 0x00000316);
3950 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3951 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3952 
3953 	radeon_ring_unlock_commit(rdev, ring);
3954 
3955 	return 0;
3956 }
3957 
3958 /**
3959  * cik_cp_gfx_fini - stop the gfx ring
3960  *
3961  * @rdev: radeon_device pointer
3962  *
3963  * Stop the gfx ring and tear down the driver ring
3964  * info.
3965  */
3966 static void cik_cp_gfx_fini(struct radeon_device *rdev)
3967 {
3968 	cik_cp_gfx_enable(rdev, false);
3969 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3970 }
3971 
3972 /**
3973  * cik_cp_gfx_resume - setup the gfx ring buffer registers
3974  *
3975  * @rdev: radeon_device pointer
3976  *
3977  * Program the location and size of the gfx ring buffer
3978  * and test it to make sure it's working.
3979  * Returns 0 for success, error for failure.
3980  */
3981 static int cik_cp_gfx_resume(struct radeon_device *rdev)
3982 {
3983 	struct radeon_ring *ring;
3984 	u32 tmp;
3985 	u32 rb_bufsz;
3986 	u64 rb_addr;
3987 	int r;
3988 
3989 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3990 	if (rdev->family != CHIP_HAWAII)
3991 		WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3992 
3993 	/* Set the write pointer delay */
3994 	WREG32(CP_RB_WPTR_DELAY, 0);
3995 
3996 	/* set the RB to use vmid 0 */
3997 	WREG32(CP_RB_VMID, 0);
3998 
3999 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
4000 
4001 	/* ring 0 - compute and gfx */
4002 	/* Set ring buffer size */
4003 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
4004 	rb_bufsz = order_base_2(ring->ring_size / 8);
4005 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
4006 #ifdef __BIG_ENDIAN
4007 	tmp |= BUF_SWAP_32BIT;
4008 #endif
4009 	WREG32(CP_RB0_CNTL, tmp);
4010 
4011 	/* Initialize the ring buffer's read and write pointers */
4012 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
4013 	ring->wptr = 0;
4014 	WREG32(CP_RB0_WPTR, ring->wptr);
4015 
4016 	/* set the wb address wether it's enabled or not */
4017 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
4018 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
4019 
4020 	/* scratch register shadowing is no longer supported */
4021 	WREG32(SCRATCH_UMSK, 0);
4022 
4023 	if (!rdev->wb.enabled)
4024 		tmp |= RB_NO_UPDATE;
4025 
4026 	mdelay(1);
4027 	WREG32(CP_RB0_CNTL, tmp);
4028 
4029 	rb_addr = ring->gpu_addr >> 8;
4030 	WREG32(CP_RB0_BASE, rb_addr);
4031 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
4032 
4033 	ring->rptr = RREG32(CP_RB0_RPTR);
4034 
4035 	/* start the ring */
4036 	cik_cp_gfx_start(rdev);
4037 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
4038 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
4039 	if (r) {
4040 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
4041 		return r;
4042 	}
4043 
4044 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
4045 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
4046 
4047 	return 0;
4048 }
4049 
4050 u32 cik_gfx_get_rptr(struct radeon_device *rdev,
4051 		     struct radeon_ring *ring)
4052 {
4053 	u32 rptr;
4054 
4055 	if (rdev->wb.enabled)
4056 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4057 	else
4058 		rptr = RREG32(CP_RB0_RPTR);
4059 
4060 	return rptr;
4061 }
4062 
4063 u32 cik_gfx_get_wptr(struct radeon_device *rdev,
4064 		     struct radeon_ring *ring)
4065 {
4066 	u32 wptr;
4067 
4068 	wptr = RREG32(CP_RB0_WPTR);
4069 
4070 	return wptr;
4071 }
4072 
4073 void cik_gfx_set_wptr(struct radeon_device *rdev,
4074 		      struct radeon_ring *ring)
4075 {
4076 	WREG32(CP_RB0_WPTR, ring->wptr);
4077 	(void)RREG32(CP_RB0_WPTR);
4078 }
4079 
4080 u32 cik_compute_get_rptr(struct radeon_device *rdev,
4081 			 struct radeon_ring *ring)
4082 {
4083 	u32 rptr;
4084 
4085 	if (rdev->wb.enabled) {
4086 		rptr = rdev->wb.wb[ring->rptr_offs/4];
4087 	} else {
4088 		mutex_lock(&rdev->srbm_mutex);
4089 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4090 		rptr = RREG32(CP_HQD_PQ_RPTR);
4091 		cik_srbm_select(rdev, 0, 0, 0, 0);
4092 		mutex_unlock(&rdev->srbm_mutex);
4093 	}
4094 
4095 	return rptr;
4096 }
4097 
4098 u32 cik_compute_get_wptr(struct radeon_device *rdev,
4099 			 struct radeon_ring *ring)
4100 {
4101 	u32 wptr;
4102 
4103 	if (rdev->wb.enabled) {
4104 		/* XXX check if swapping is necessary on BE */
4105 		wptr = rdev->wb.wb[ring->wptr_offs/4];
4106 	} else {
4107 		mutex_lock(&rdev->srbm_mutex);
4108 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
4109 		wptr = RREG32(CP_HQD_PQ_WPTR);
4110 		cik_srbm_select(rdev, 0, 0, 0, 0);
4111 		mutex_unlock(&rdev->srbm_mutex);
4112 	}
4113 
4114 	return wptr;
4115 }
4116 
4117 void cik_compute_set_wptr(struct radeon_device *rdev,
4118 			  struct radeon_ring *ring)
4119 {
4120 	/* XXX check if swapping is necessary on BE */
4121 	rdev->wb.wb[ring->wptr_offs/4] = ring->wptr;
4122 	WDOORBELL32(ring->doorbell_index, ring->wptr);
4123 }
4124 
4125 /**
4126  * cik_cp_compute_enable - enable/disable the compute CP MEs
4127  *
4128  * @rdev: radeon_device pointer
4129  * @enable: enable or disable the MEs
4130  *
4131  * Halts or unhalts the compute MEs.
4132  */
4133 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
4134 {
4135 	if (enable)
4136 		WREG32(CP_MEC_CNTL, 0);
4137 	else
4138 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
4139 	udelay(50);
4140 }
4141 
4142 /**
4143  * cik_cp_compute_load_microcode - load the compute CP ME ucode
4144  *
4145  * @rdev: radeon_device pointer
4146  *
4147  * Loads the compute MEC1&2 ucode.
4148  * Returns 0 for success, -EINVAL if the ucode is not available.
4149  */
4150 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
4151 {
4152 	const __be32 *fw_data;
4153 	int i;
4154 
4155 	if (!rdev->mec_fw)
4156 		return -EINVAL;
4157 
4158 	cik_cp_compute_enable(rdev, false);
4159 
4160 	/* MEC1 */
4161 	fw_data = (const __be32 *)rdev->mec_fw->data;
4162 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4163 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4164 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
4165 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
4166 
4167 	if (rdev->family == CHIP_KAVERI) {
4168 		/* MEC2 */
4169 		fw_data = (const __be32 *)rdev->mec_fw->data;
4170 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4171 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
4172 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
4173 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
4174 	}
4175 
4176 	return 0;
4177 }
4178 
4179 /**
4180  * cik_cp_compute_start - start the compute queues
4181  *
4182  * @rdev: radeon_device pointer
4183  *
4184  * Enable the compute queues.
4185  * Returns 0 for success, error for failure.
4186  */
4187 static int cik_cp_compute_start(struct radeon_device *rdev)
4188 {
4189 	cik_cp_compute_enable(rdev, true);
4190 
4191 	return 0;
4192 }
4193 
4194 /**
4195  * cik_cp_compute_fini - stop the compute queues
4196  *
4197  * @rdev: radeon_device pointer
4198  *
4199  * Stop the compute queues and tear down the driver queue
4200  * info.
4201  */
4202 static void cik_cp_compute_fini(struct radeon_device *rdev)
4203 {
4204 	int i, idx, r;
4205 
4206 	cik_cp_compute_enable(rdev, false);
4207 
4208 	for (i = 0; i < 2; i++) {
4209 		if (i == 0)
4210 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4211 		else
4212 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4213 
4214 		if (rdev->ring[idx].mqd_obj) {
4215 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4216 			if (unlikely(r != 0))
4217 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
4218 
4219 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
4220 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4221 
4222 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
4223 			rdev->ring[idx].mqd_obj = NULL;
4224 		}
4225 	}
4226 }
4227 
4228 static void cik_mec_fini(struct radeon_device *rdev)
4229 {
4230 	int r;
4231 
4232 	if (rdev->mec.hpd_eop_obj) {
4233 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4234 		if (unlikely(r != 0))
4235 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
4236 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
4237 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4238 
4239 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
4240 		rdev->mec.hpd_eop_obj = NULL;
4241 	}
4242 }
4243 
4244 #define MEC_HPD_SIZE 2048
4245 
4246 static int cik_mec_init(struct radeon_device *rdev)
4247 {
4248 	int r;
4249 	u32 *hpd;
4250 
4251 	/*
4252 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
4253 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
4254 	 */
4255 	if (rdev->family == CHIP_KAVERI)
4256 		rdev->mec.num_mec = 2;
4257 	else
4258 		rdev->mec.num_mec = 1;
4259 	rdev->mec.num_pipe = 4;
4260 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
4261 
4262 	if (rdev->mec.hpd_eop_obj == NULL) {
4263 		r = radeon_bo_create(rdev,
4264 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
4265 				     PAGE_SIZE, true,
4266 				     RADEON_GEM_DOMAIN_GTT, NULL,
4267 				     &rdev->mec.hpd_eop_obj);
4268 		if (r) {
4269 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
4270 			return r;
4271 		}
4272 	}
4273 
4274 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
4275 	if (unlikely(r != 0)) {
4276 		cik_mec_fini(rdev);
4277 		return r;
4278 	}
4279 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
4280 			  &rdev->mec.hpd_eop_gpu_addr);
4281 	if (r) {
4282 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
4283 		cik_mec_fini(rdev);
4284 		return r;
4285 	}
4286 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
4287 	if (r) {
4288 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
4289 		cik_mec_fini(rdev);
4290 		return r;
4291 	}
4292 
4293 	/* clear memory.  Not sure if this is required or not */
4294 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
4295 
4296 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
4297 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
4298 
4299 	return 0;
4300 }
4301 
4302 struct hqd_registers
4303 {
4304 	u32 cp_mqd_base_addr;
4305 	u32 cp_mqd_base_addr_hi;
4306 	u32 cp_hqd_active;
4307 	u32 cp_hqd_vmid;
4308 	u32 cp_hqd_persistent_state;
4309 	u32 cp_hqd_pipe_priority;
4310 	u32 cp_hqd_queue_priority;
4311 	u32 cp_hqd_quantum;
4312 	u32 cp_hqd_pq_base;
4313 	u32 cp_hqd_pq_base_hi;
4314 	u32 cp_hqd_pq_rptr;
4315 	u32 cp_hqd_pq_rptr_report_addr;
4316 	u32 cp_hqd_pq_rptr_report_addr_hi;
4317 	u32 cp_hqd_pq_wptr_poll_addr;
4318 	u32 cp_hqd_pq_wptr_poll_addr_hi;
4319 	u32 cp_hqd_pq_doorbell_control;
4320 	u32 cp_hqd_pq_wptr;
4321 	u32 cp_hqd_pq_control;
4322 	u32 cp_hqd_ib_base_addr;
4323 	u32 cp_hqd_ib_base_addr_hi;
4324 	u32 cp_hqd_ib_rptr;
4325 	u32 cp_hqd_ib_control;
4326 	u32 cp_hqd_iq_timer;
4327 	u32 cp_hqd_iq_rptr;
4328 	u32 cp_hqd_dequeue_request;
4329 	u32 cp_hqd_dma_offload;
4330 	u32 cp_hqd_sema_cmd;
4331 	u32 cp_hqd_msg_type;
4332 	u32 cp_hqd_atomic0_preop_lo;
4333 	u32 cp_hqd_atomic0_preop_hi;
4334 	u32 cp_hqd_atomic1_preop_lo;
4335 	u32 cp_hqd_atomic1_preop_hi;
4336 	u32 cp_hqd_hq_scheduler0;
4337 	u32 cp_hqd_hq_scheduler1;
4338 	u32 cp_mqd_control;
4339 };
4340 
4341 struct bonaire_mqd
4342 {
4343 	u32 header;
4344 	u32 dispatch_initiator;
4345 	u32 dimensions[3];
4346 	u32 start_idx[3];
4347 	u32 num_threads[3];
4348 	u32 pipeline_stat_enable;
4349 	u32 perf_counter_enable;
4350 	u32 pgm[2];
4351 	u32 tba[2];
4352 	u32 tma[2];
4353 	u32 pgm_rsrc[2];
4354 	u32 vmid;
4355 	u32 resource_limits;
4356 	u32 static_thread_mgmt01[2];
4357 	u32 tmp_ring_size;
4358 	u32 static_thread_mgmt23[2];
4359 	u32 restart[3];
4360 	u32 thread_trace_enable;
4361 	u32 reserved1;
4362 	u32 user_data[16];
4363 	u32 vgtcs_invoke_count[2];
4364 	struct hqd_registers queue_state;
4365 	u32 dequeue_cntr;
4366 	u32 interrupt_queue[64];
4367 };
4368 
4369 /**
4370  * cik_cp_compute_resume - setup the compute queue registers
4371  *
4372  * @rdev: radeon_device pointer
4373  *
4374  * Program the compute queues and test them to make sure they
4375  * are working.
4376  * Returns 0 for success, error for failure.
4377  */
4378 static int cik_cp_compute_resume(struct radeon_device *rdev)
4379 {
4380 	int r, i, idx;
4381 	u32 tmp;
4382 	bool use_doorbell = true;
4383 	u64 hqd_gpu_addr;
4384 	u64 mqd_gpu_addr;
4385 	u64 eop_gpu_addr;
4386 	u64 wb_gpu_addr;
4387 	u32 *buf;
4388 	struct bonaire_mqd *mqd;
4389 
4390 	r = cik_cp_compute_start(rdev);
4391 	if (r)
4392 		return r;
4393 
4394 	/* fix up chicken bits */
4395 	tmp = RREG32(CP_CPF_DEBUG);
4396 	tmp |= (1 << 23);
4397 	WREG32(CP_CPF_DEBUG, tmp);
4398 
4399 	/* init the pipes */
4400 	mutex_lock(&rdev->srbm_mutex);
4401 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
4402 		int me = (i < 4) ? 1 : 2;
4403 		int pipe = (i < 4) ? i : (i - 4);
4404 
4405 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
4406 
4407 		cik_srbm_select(rdev, me, pipe, 0, 0);
4408 
4409 		/* write the EOP addr */
4410 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
4411 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
4412 
4413 		/* set the VMID assigned */
4414 		WREG32(CP_HPD_EOP_VMID, 0);
4415 
4416 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
4417 		tmp = RREG32(CP_HPD_EOP_CONTROL);
4418 		tmp &= ~EOP_SIZE_MASK;
4419 		tmp |= order_base_2(MEC_HPD_SIZE / 8);
4420 		WREG32(CP_HPD_EOP_CONTROL, tmp);
4421 	}
4422 	cik_srbm_select(rdev, 0, 0, 0, 0);
4423 	mutex_unlock(&rdev->srbm_mutex);
4424 
4425 	/* init the queues.  Just two for now. */
4426 	for (i = 0; i < 2; i++) {
4427 		if (i == 0)
4428 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
4429 		else
4430 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
4431 
4432 		if (rdev->ring[idx].mqd_obj == NULL) {
4433 			r = radeon_bo_create(rdev,
4434 					     sizeof(struct bonaire_mqd),
4435 					     PAGE_SIZE, true,
4436 					     RADEON_GEM_DOMAIN_GTT, NULL,
4437 					     &rdev->ring[idx].mqd_obj);
4438 			if (r) {
4439 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
4440 				return r;
4441 			}
4442 		}
4443 
4444 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
4445 		if (unlikely(r != 0)) {
4446 			cik_cp_compute_fini(rdev);
4447 			return r;
4448 		}
4449 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
4450 				  &mqd_gpu_addr);
4451 		if (r) {
4452 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
4453 			cik_cp_compute_fini(rdev);
4454 			return r;
4455 		}
4456 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
4457 		if (r) {
4458 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
4459 			cik_cp_compute_fini(rdev);
4460 			return r;
4461 		}
4462 
4463 		/* init the mqd struct */
4464 		memset(buf, 0, sizeof(struct bonaire_mqd));
4465 
4466 		mqd = (struct bonaire_mqd *)buf;
4467 		mqd->header = 0xC0310800;
4468 		mqd->static_thread_mgmt01[0] = 0xffffffff;
4469 		mqd->static_thread_mgmt01[1] = 0xffffffff;
4470 		mqd->static_thread_mgmt23[0] = 0xffffffff;
4471 		mqd->static_thread_mgmt23[1] = 0xffffffff;
4472 
4473 		mutex_lock(&rdev->srbm_mutex);
4474 		cik_srbm_select(rdev, rdev->ring[idx].me,
4475 				rdev->ring[idx].pipe,
4476 				rdev->ring[idx].queue, 0);
4477 
4478 		/* disable wptr polling */
4479 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
4480 		tmp &= ~WPTR_POLL_EN;
4481 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
4482 
4483 		/* enable doorbell? */
4484 		mqd->queue_state.cp_hqd_pq_doorbell_control =
4485 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4486 		if (use_doorbell)
4487 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4488 		else
4489 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
4490 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4491 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4492 
4493 		/* disable the queue if it's active */
4494 		mqd->queue_state.cp_hqd_dequeue_request = 0;
4495 		mqd->queue_state.cp_hqd_pq_rptr = 0;
4496 		mqd->queue_state.cp_hqd_pq_wptr= 0;
4497 		if (RREG32(CP_HQD_ACTIVE) & 1) {
4498 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
4499 			for (i = 0; i < rdev->usec_timeout; i++) {
4500 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
4501 					break;
4502 				udelay(1);
4503 			}
4504 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
4505 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
4506 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4507 		}
4508 
4509 		/* set the pointer to the MQD */
4510 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
4511 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
4512 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
4513 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
4514 		/* set MQD vmid to 0 */
4515 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
4516 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
4517 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
4518 
4519 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
4520 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
4521 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
4522 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
4523 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
4524 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
4525 
4526 		/* set up the HQD, this is similar to CP_RB0_CNTL */
4527 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
4528 		mqd->queue_state.cp_hqd_pq_control &=
4529 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
4530 
4531 		mqd->queue_state.cp_hqd_pq_control |=
4532 			order_base_2(rdev->ring[idx].ring_size / 8);
4533 		mqd->queue_state.cp_hqd_pq_control |=
4534 			(order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8);
4535 #ifdef __BIG_ENDIAN
4536 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
4537 #endif
4538 		mqd->queue_state.cp_hqd_pq_control &=
4539 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
4540 		mqd->queue_state.cp_hqd_pq_control |=
4541 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
4542 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
4543 
4544 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
4545 		if (i == 0)
4546 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
4547 		else
4548 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
4549 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
4550 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
4551 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
4552 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
4553 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
4554 
4555 		/* set the wb address wether it's enabled or not */
4556 		if (i == 0)
4557 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
4558 		else
4559 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
4560 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
4561 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
4562 			upper_32_bits(wb_gpu_addr) & 0xffff;
4563 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
4564 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
4565 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
4566 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
4567 
4568 		/* enable the doorbell if requested */
4569 		if (use_doorbell) {
4570 			mqd->queue_state.cp_hqd_pq_doorbell_control =
4571 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
4572 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
4573 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
4574 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_index);
4575 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
4576 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
4577 				~(DOORBELL_SOURCE | DOORBELL_HIT);
4578 
4579 		} else {
4580 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
4581 		}
4582 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
4583 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
4584 
4585 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
4586 		rdev->ring[idx].wptr = 0;
4587 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
4588 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
4589 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
4590 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
4591 
4592 		/* set the vmid for the queue */
4593 		mqd->queue_state.cp_hqd_vmid = 0;
4594 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
4595 
4596 		/* activate the queue */
4597 		mqd->queue_state.cp_hqd_active = 1;
4598 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
4599 
4600 		cik_srbm_select(rdev, 0, 0, 0, 0);
4601 		mutex_unlock(&rdev->srbm_mutex);
4602 
4603 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
4604 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
4605 
4606 		rdev->ring[idx].ready = true;
4607 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
4608 		if (r)
4609 			rdev->ring[idx].ready = false;
4610 	}
4611 
4612 	return 0;
4613 }
4614 
4615 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
4616 {
4617 	cik_cp_gfx_enable(rdev, enable);
4618 	cik_cp_compute_enable(rdev, enable);
4619 }
4620 
4621 static int cik_cp_load_microcode(struct radeon_device *rdev)
4622 {
4623 	int r;
4624 
4625 	r = cik_cp_gfx_load_microcode(rdev);
4626 	if (r)
4627 		return r;
4628 	r = cik_cp_compute_load_microcode(rdev);
4629 	if (r)
4630 		return r;
4631 
4632 	return 0;
4633 }
4634 
4635 static void cik_cp_fini(struct radeon_device *rdev)
4636 {
4637 	cik_cp_gfx_fini(rdev);
4638 	cik_cp_compute_fini(rdev);
4639 }
4640 
4641 static int cik_cp_resume(struct radeon_device *rdev)
4642 {
4643 	int r;
4644 
4645 	cik_enable_gui_idle_interrupt(rdev, false);
4646 
4647 	r = cik_cp_load_microcode(rdev);
4648 	if (r)
4649 		return r;
4650 
4651 	r = cik_cp_gfx_resume(rdev);
4652 	if (r)
4653 		return r;
4654 	r = cik_cp_compute_resume(rdev);
4655 	if (r)
4656 		return r;
4657 
4658 	cik_enable_gui_idle_interrupt(rdev, true);
4659 
4660 	return 0;
4661 }
4662 
4663 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
4664 {
4665 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
4666 		RREG32(GRBM_STATUS));
4667 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
4668 		RREG32(GRBM_STATUS2));
4669 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
4670 		RREG32(GRBM_STATUS_SE0));
4671 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
4672 		RREG32(GRBM_STATUS_SE1));
4673 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
4674 		RREG32(GRBM_STATUS_SE2));
4675 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
4676 		RREG32(GRBM_STATUS_SE3));
4677 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
4678 		RREG32(SRBM_STATUS));
4679 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
4680 		RREG32(SRBM_STATUS2));
4681 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
4682 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
4683 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
4684 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
4685 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
4686 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
4687 		 RREG32(CP_STALLED_STAT1));
4688 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
4689 		 RREG32(CP_STALLED_STAT2));
4690 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
4691 		 RREG32(CP_STALLED_STAT3));
4692 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
4693 		 RREG32(CP_CPF_BUSY_STAT));
4694 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
4695 		 RREG32(CP_CPF_STALLED_STAT1));
4696 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
4697 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
4698 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
4699 		 RREG32(CP_CPC_STALLED_STAT1));
4700 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
4701 }
4702 
4703 /**
4704  * cik_gpu_check_soft_reset - check which blocks are busy
4705  *
4706  * @rdev: radeon_device pointer
4707  *
4708  * Check which blocks are busy and return the relevant reset
4709  * mask to be used by cik_gpu_soft_reset().
4710  * Returns a mask of the blocks to be reset.
4711  */
4712 u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
4713 {
4714 	u32 reset_mask = 0;
4715 	u32 tmp;
4716 
4717 	/* GRBM_STATUS */
4718 	tmp = RREG32(GRBM_STATUS);
4719 	if (tmp & (PA_BUSY | SC_BUSY |
4720 		   BCI_BUSY | SX_BUSY |
4721 		   TA_BUSY | VGT_BUSY |
4722 		   DB_BUSY | CB_BUSY |
4723 		   GDS_BUSY | SPI_BUSY |
4724 		   IA_BUSY | IA_BUSY_NO_DMA))
4725 		reset_mask |= RADEON_RESET_GFX;
4726 
4727 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
4728 		reset_mask |= RADEON_RESET_CP;
4729 
4730 	/* GRBM_STATUS2 */
4731 	tmp = RREG32(GRBM_STATUS2);
4732 	if (tmp & RLC_BUSY)
4733 		reset_mask |= RADEON_RESET_RLC;
4734 
4735 	/* SDMA0_STATUS_REG */
4736 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
4737 	if (!(tmp & SDMA_IDLE))
4738 		reset_mask |= RADEON_RESET_DMA;
4739 
4740 	/* SDMA1_STATUS_REG */
4741 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
4742 	if (!(tmp & SDMA_IDLE))
4743 		reset_mask |= RADEON_RESET_DMA1;
4744 
4745 	/* SRBM_STATUS2 */
4746 	tmp = RREG32(SRBM_STATUS2);
4747 	if (tmp & SDMA_BUSY)
4748 		reset_mask |= RADEON_RESET_DMA;
4749 
4750 	if (tmp & SDMA1_BUSY)
4751 		reset_mask |= RADEON_RESET_DMA1;
4752 
4753 	/* SRBM_STATUS */
4754 	tmp = RREG32(SRBM_STATUS);
4755 
4756 	if (tmp & IH_BUSY)
4757 		reset_mask |= RADEON_RESET_IH;
4758 
4759 	if (tmp & SEM_BUSY)
4760 		reset_mask |= RADEON_RESET_SEM;
4761 
4762 	if (tmp & GRBM_RQ_PENDING)
4763 		reset_mask |= RADEON_RESET_GRBM;
4764 
4765 	if (tmp & VMC_BUSY)
4766 		reset_mask |= RADEON_RESET_VMC;
4767 
4768 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
4769 		   MCC_BUSY | MCD_BUSY))
4770 		reset_mask |= RADEON_RESET_MC;
4771 
4772 	if (evergreen_is_display_hung(rdev))
4773 		reset_mask |= RADEON_RESET_DISPLAY;
4774 
4775 	/* Skip MC reset as it's mostly likely not hung, just busy */
4776 	if (reset_mask & RADEON_RESET_MC) {
4777 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
4778 		reset_mask &= ~RADEON_RESET_MC;
4779 	}
4780 
4781 	return reset_mask;
4782 }
4783 
4784 /**
4785  * cik_gpu_soft_reset - soft reset GPU
4786  *
4787  * @rdev: radeon_device pointer
4788  * @reset_mask: mask of which blocks to reset
4789  *
4790  * Soft reset the blocks specified in @reset_mask.
4791  */
4792 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
4793 {
4794 	struct evergreen_mc_save save;
4795 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
4796 	u32 tmp;
4797 
4798 	if (reset_mask == 0)
4799 		return;
4800 
4801 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
4802 
4803 	cik_print_gpu_status_regs(rdev);
4804 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
4805 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
4806 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
4807 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
4808 
4809 	/* disable CG/PG */
4810 	cik_fini_pg(rdev);
4811 	cik_fini_cg(rdev);
4812 
4813 	/* stop the rlc */
4814 	cik_rlc_stop(rdev);
4815 
4816 	/* Disable GFX parsing/prefetching */
4817 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4818 
4819 	/* Disable MEC parsing/prefetching */
4820 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
4821 
4822 	if (reset_mask & RADEON_RESET_DMA) {
4823 		/* sdma0 */
4824 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
4825 		tmp |= SDMA_HALT;
4826 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4827 	}
4828 	if (reset_mask & RADEON_RESET_DMA1) {
4829 		/* sdma1 */
4830 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
4831 		tmp |= SDMA_HALT;
4832 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4833 	}
4834 
4835 	evergreen_mc_stop(rdev, &save);
4836 	if (evergreen_mc_wait_for_idle(rdev)) {
4837 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4838 	}
4839 
4840 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
4841 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
4842 
4843 	if (reset_mask & RADEON_RESET_CP) {
4844 		grbm_soft_reset |= SOFT_RESET_CP;
4845 
4846 		srbm_soft_reset |= SOFT_RESET_GRBM;
4847 	}
4848 
4849 	if (reset_mask & RADEON_RESET_DMA)
4850 		srbm_soft_reset |= SOFT_RESET_SDMA;
4851 
4852 	if (reset_mask & RADEON_RESET_DMA1)
4853 		srbm_soft_reset |= SOFT_RESET_SDMA1;
4854 
4855 	if (reset_mask & RADEON_RESET_DISPLAY)
4856 		srbm_soft_reset |= SOFT_RESET_DC;
4857 
4858 	if (reset_mask & RADEON_RESET_RLC)
4859 		grbm_soft_reset |= SOFT_RESET_RLC;
4860 
4861 	if (reset_mask & RADEON_RESET_SEM)
4862 		srbm_soft_reset |= SOFT_RESET_SEM;
4863 
4864 	if (reset_mask & RADEON_RESET_IH)
4865 		srbm_soft_reset |= SOFT_RESET_IH;
4866 
4867 	if (reset_mask & RADEON_RESET_GRBM)
4868 		srbm_soft_reset |= SOFT_RESET_GRBM;
4869 
4870 	if (reset_mask & RADEON_RESET_VMC)
4871 		srbm_soft_reset |= SOFT_RESET_VMC;
4872 
4873 	if (!(rdev->flags & RADEON_IS_IGP)) {
4874 		if (reset_mask & RADEON_RESET_MC)
4875 			srbm_soft_reset |= SOFT_RESET_MC;
4876 	}
4877 
4878 	if (grbm_soft_reset) {
4879 		tmp = RREG32(GRBM_SOFT_RESET);
4880 		tmp |= grbm_soft_reset;
4881 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
4882 		WREG32(GRBM_SOFT_RESET, tmp);
4883 		tmp = RREG32(GRBM_SOFT_RESET);
4884 
4885 		udelay(50);
4886 
4887 		tmp &= ~grbm_soft_reset;
4888 		WREG32(GRBM_SOFT_RESET, tmp);
4889 		tmp = RREG32(GRBM_SOFT_RESET);
4890 	}
4891 
4892 	if (srbm_soft_reset) {
4893 		tmp = RREG32(SRBM_SOFT_RESET);
4894 		tmp |= srbm_soft_reset;
4895 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
4896 		WREG32(SRBM_SOFT_RESET, tmp);
4897 		tmp = RREG32(SRBM_SOFT_RESET);
4898 
4899 		udelay(50);
4900 
4901 		tmp &= ~srbm_soft_reset;
4902 		WREG32(SRBM_SOFT_RESET, tmp);
4903 		tmp = RREG32(SRBM_SOFT_RESET);
4904 	}
4905 
4906 	/* Wait a little for things to settle down */
4907 	udelay(50);
4908 
4909 	evergreen_mc_resume(rdev, &save);
4910 	udelay(50);
4911 
4912 	cik_print_gpu_status_regs(rdev);
4913 }
4914 
4915 struct kv_reset_save_regs {
4916 	u32 gmcon_reng_execute;
4917 	u32 gmcon_misc;
4918 	u32 gmcon_misc3;
4919 };
4920 
4921 static void kv_save_regs_for_reset(struct radeon_device *rdev,
4922 				   struct kv_reset_save_regs *save)
4923 {
4924 	save->gmcon_reng_execute = RREG32(GMCON_RENG_EXECUTE);
4925 	save->gmcon_misc = RREG32(GMCON_MISC);
4926 	save->gmcon_misc3 = RREG32(GMCON_MISC3);
4927 
4928 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute & ~RENG_EXECUTE_ON_PWR_UP);
4929 	WREG32(GMCON_MISC, save->gmcon_misc & ~(RENG_EXECUTE_ON_REG_UPDATE |
4930 						STCTRL_STUTTER_EN));
4931 }
4932 
4933 static void kv_restore_regs_for_reset(struct radeon_device *rdev,
4934 				      struct kv_reset_save_regs *save)
4935 {
4936 	int i;
4937 
4938 	WREG32(GMCON_PGFSM_WRITE, 0);
4939 	WREG32(GMCON_PGFSM_CONFIG, 0x200010ff);
4940 
4941 	for (i = 0; i < 5; i++)
4942 		WREG32(GMCON_PGFSM_WRITE, 0);
4943 
4944 	WREG32(GMCON_PGFSM_WRITE, 0);
4945 	WREG32(GMCON_PGFSM_CONFIG, 0x300010ff);
4946 
4947 	for (i = 0; i < 5; i++)
4948 		WREG32(GMCON_PGFSM_WRITE, 0);
4949 
4950 	WREG32(GMCON_PGFSM_WRITE, 0x210000);
4951 	WREG32(GMCON_PGFSM_CONFIG, 0xa00010ff);
4952 
4953 	for (i = 0; i < 5; i++)
4954 		WREG32(GMCON_PGFSM_WRITE, 0);
4955 
4956 	WREG32(GMCON_PGFSM_WRITE, 0x21003);
4957 	WREG32(GMCON_PGFSM_CONFIG, 0xb00010ff);
4958 
4959 	for (i = 0; i < 5; i++)
4960 		WREG32(GMCON_PGFSM_WRITE, 0);
4961 
4962 	WREG32(GMCON_PGFSM_WRITE, 0x2b00);
4963 	WREG32(GMCON_PGFSM_CONFIG, 0xc00010ff);
4964 
4965 	for (i = 0; i < 5; i++)
4966 		WREG32(GMCON_PGFSM_WRITE, 0);
4967 
4968 	WREG32(GMCON_PGFSM_WRITE, 0);
4969 	WREG32(GMCON_PGFSM_CONFIG, 0xd00010ff);
4970 
4971 	for (i = 0; i < 5; i++)
4972 		WREG32(GMCON_PGFSM_WRITE, 0);
4973 
4974 	WREG32(GMCON_PGFSM_WRITE, 0x420000);
4975 	WREG32(GMCON_PGFSM_CONFIG, 0x100010ff);
4976 
4977 	for (i = 0; i < 5; i++)
4978 		WREG32(GMCON_PGFSM_WRITE, 0);
4979 
4980 	WREG32(GMCON_PGFSM_WRITE, 0x120202);
4981 	WREG32(GMCON_PGFSM_CONFIG, 0x500010ff);
4982 
4983 	for (i = 0; i < 5; i++)
4984 		WREG32(GMCON_PGFSM_WRITE, 0);
4985 
4986 	WREG32(GMCON_PGFSM_WRITE, 0x3e3e36);
4987 	WREG32(GMCON_PGFSM_CONFIG, 0x600010ff);
4988 
4989 	for (i = 0; i < 5; i++)
4990 		WREG32(GMCON_PGFSM_WRITE, 0);
4991 
4992 	WREG32(GMCON_PGFSM_WRITE, 0x373f3e);
4993 	WREG32(GMCON_PGFSM_CONFIG, 0x700010ff);
4994 
4995 	for (i = 0; i < 5; i++)
4996 		WREG32(GMCON_PGFSM_WRITE, 0);
4997 
4998 	WREG32(GMCON_PGFSM_WRITE, 0x3e1332);
4999 	WREG32(GMCON_PGFSM_CONFIG, 0xe00010ff);
5000 
5001 	WREG32(GMCON_MISC3, save->gmcon_misc3);
5002 	WREG32(GMCON_MISC, save->gmcon_misc);
5003 	WREG32(GMCON_RENG_EXECUTE, save->gmcon_reng_execute);
5004 }
5005 
5006 static void cik_gpu_pci_config_reset(struct radeon_device *rdev)
5007 {
5008 	struct evergreen_mc_save save;
5009 	struct kv_reset_save_regs kv_save = { 0 };
5010 	u32 tmp, i;
5011 
5012 	dev_info(rdev->dev, "GPU pci config reset\n");
5013 
5014 	/* disable dpm? */
5015 
5016 	/* disable cg/pg */
5017 	cik_fini_pg(rdev);
5018 	cik_fini_cg(rdev);
5019 
5020 	/* Disable GFX parsing/prefetching */
5021 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
5022 
5023 	/* Disable MEC parsing/prefetching */
5024 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
5025 
5026 	/* sdma0 */
5027 	tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
5028 	tmp |= SDMA_HALT;
5029 	WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
5030 	/* sdma1 */
5031 	tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
5032 	tmp |= SDMA_HALT;
5033 	WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
5034 	/* XXX other engines? */
5035 
5036 	/* halt the rlc, disable cp internal ints */
5037 	cik_rlc_stop(rdev);
5038 
5039 	udelay(50);
5040 
5041 	/* disable mem access */
5042 	evergreen_mc_stop(rdev, &save);
5043 	if (evergreen_mc_wait_for_idle(rdev)) {
5044 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
5045 	}
5046 
5047 	if (rdev->flags & RADEON_IS_IGP)
5048 		kv_save_regs_for_reset(rdev, &kv_save);
5049 
5050 	/* disable BM */
5051 	pci_clear_master(rdev->pdev);
5052 	/* reset */
5053 	radeon_pci_config_reset(rdev);
5054 
5055 	udelay(100);
5056 
5057 	/* wait for asic to come out of reset */
5058 	for (i = 0; i < rdev->usec_timeout; i++) {
5059 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
5060 			break;
5061 		udelay(1);
5062 	}
5063 
5064 	/* does asic init need to be run first??? */
5065 	if (rdev->flags & RADEON_IS_IGP)
5066 		kv_restore_regs_for_reset(rdev, &kv_save);
5067 }
5068 
5069 /**
5070  * cik_asic_reset - soft reset GPU
5071  *
5072  * @rdev: radeon_device pointer
5073  *
5074  * Look up which blocks are hung and attempt
5075  * to reset them.
5076  * Returns 0 for success.
5077  */
5078 int cik_asic_reset(struct radeon_device *rdev)
5079 {
5080 	u32 reset_mask;
5081 
5082 	reset_mask = cik_gpu_check_soft_reset(rdev);
5083 
5084 	if (reset_mask)
5085 		r600_set_bios_scratch_engine_hung(rdev, true);
5086 
5087 	/* try soft reset */
5088 	cik_gpu_soft_reset(rdev, reset_mask);
5089 
5090 	reset_mask = cik_gpu_check_soft_reset(rdev);
5091 
5092 	/* try pci config reset */
5093 	if (reset_mask && radeon_hard_reset)
5094 		cik_gpu_pci_config_reset(rdev);
5095 
5096 	reset_mask = cik_gpu_check_soft_reset(rdev);
5097 
5098 	if (!reset_mask)
5099 		r600_set_bios_scratch_engine_hung(rdev, false);
5100 
5101 	return 0;
5102 }
5103 
5104 /**
5105  * cik_gfx_is_lockup - check if the 3D engine is locked up
5106  *
5107  * @rdev: radeon_device pointer
5108  * @ring: radeon_ring structure holding ring information
5109  *
5110  * Check if the 3D engine is locked up (CIK).
5111  * Returns true if the engine is locked, false if not.
5112  */
5113 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
5114 {
5115 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
5116 
5117 	if (!(reset_mask & (RADEON_RESET_GFX |
5118 			    RADEON_RESET_COMPUTE |
5119 			    RADEON_RESET_CP))) {
5120 		radeon_ring_lockup_update(ring);
5121 		return false;
5122 	}
5123 	/* force CP activities */
5124 	radeon_ring_force_activity(rdev, ring);
5125 	return radeon_ring_test_lockup(rdev, ring);
5126 }
5127 
5128 /* MC */
5129 /**
5130  * cik_mc_program - program the GPU memory controller
5131  *
5132  * @rdev: radeon_device pointer
5133  *
5134  * Set the location of vram, gart, and AGP in the GPU's
5135  * physical address space (CIK).
5136  */
5137 static void cik_mc_program(struct radeon_device *rdev)
5138 {
5139 	struct evergreen_mc_save save;
5140 	u32 tmp;
5141 	int i, j;
5142 
5143 	/* Initialize HDP */
5144 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
5145 		WREG32((0x2c14 + j), 0x00000000);
5146 		WREG32((0x2c18 + j), 0x00000000);
5147 		WREG32((0x2c1c + j), 0x00000000);
5148 		WREG32((0x2c20 + j), 0x00000000);
5149 		WREG32((0x2c24 + j), 0x00000000);
5150 	}
5151 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
5152 
5153 	evergreen_mc_stop(rdev, &save);
5154 	if (radeon_mc_wait_for_idle(rdev)) {
5155 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5156 	}
5157 	/* Lockout access through VGA aperture*/
5158 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
5159 	/* Update configuration */
5160 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
5161 	       rdev->mc.vram_start >> 12);
5162 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
5163 	       rdev->mc.vram_end >> 12);
5164 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
5165 	       rdev->vram_scratch.gpu_addr >> 12);
5166 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
5167 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
5168 	WREG32(MC_VM_FB_LOCATION, tmp);
5169 	/* XXX double check these! */
5170 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
5171 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
5172 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
5173 	WREG32(MC_VM_AGP_BASE, 0);
5174 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
5175 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
5176 	if (radeon_mc_wait_for_idle(rdev)) {
5177 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
5178 	}
5179 	evergreen_mc_resume(rdev, &save);
5180 	/* we need to own VRAM, so turn off the VGA renderer here
5181 	 * to stop it overwriting our objects */
5182 	rv515_vga_render_disable(rdev);
5183 }
5184 
5185 /**
5186  * cik_mc_init - initialize the memory controller driver params
5187  *
5188  * @rdev: radeon_device pointer
5189  *
5190  * Look up the amount of vram, vram width, and decide how to place
5191  * vram and gart within the GPU's physical address space (CIK).
5192  * Returns 0 for success.
5193  */
5194 static int cik_mc_init(struct radeon_device *rdev)
5195 {
5196 	u32 tmp;
5197 	int chansize, numchan;
5198 
5199 	/* Get VRAM informations */
5200 	rdev->mc.vram_is_ddr = true;
5201 	tmp = RREG32(MC_ARB_RAMCFG);
5202 	if (tmp & CHANSIZE_MASK) {
5203 		chansize = 64;
5204 	} else {
5205 		chansize = 32;
5206 	}
5207 	tmp = RREG32(MC_SHARED_CHMAP);
5208 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
5209 	case 0:
5210 	default:
5211 		numchan = 1;
5212 		break;
5213 	case 1:
5214 		numchan = 2;
5215 		break;
5216 	case 2:
5217 		numchan = 4;
5218 		break;
5219 	case 3:
5220 		numchan = 8;
5221 		break;
5222 	case 4:
5223 		numchan = 3;
5224 		break;
5225 	case 5:
5226 		numchan = 6;
5227 		break;
5228 	case 6:
5229 		numchan = 10;
5230 		break;
5231 	case 7:
5232 		numchan = 12;
5233 		break;
5234 	case 8:
5235 		numchan = 16;
5236 		break;
5237 	}
5238 	rdev->mc.vram_width = numchan * chansize;
5239 	/* Could aper size report 0 ? */
5240 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
5241 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
5242 	/* size in MB on si */
5243 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5244 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
5245 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
5246 	si_vram_gtt_location(rdev, &rdev->mc);
5247 	radeon_update_bandwidth_info(rdev);
5248 
5249 	return 0;
5250 }
5251 
5252 /*
5253  * GART
5254  * VMID 0 is the physical GPU addresses as used by the kernel.
5255  * VMIDs 1-15 are used for userspace clients and are handled
5256  * by the radeon vm/hsa code.
5257  */
5258 /**
5259  * cik_pcie_gart_tlb_flush - gart tlb flush callback
5260  *
5261  * @rdev: radeon_device pointer
5262  *
5263  * Flush the TLB for the VMID 0 page table (CIK).
5264  */
5265 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
5266 {
5267 	/* flush hdp cache */
5268 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
5269 
5270 	/* bits 0-15 are the VM contexts0-15 */
5271 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
5272 }
5273 
5274 /**
5275  * cik_pcie_gart_enable - gart enable
5276  *
5277  * @rdev: radeon_device pointer
5278  *
5279  * This sets up the TLBs, programs the page tables for VMID0,
5280  * sets up the hw for VMIDs 1-15 which are allocated on
5281  * demand, and sets up the global locations for the LDS, GDS,
5282  * and GPUVM for FSA64 clients (CIK).
5283  * Returns 0 for success, errors for failure.
5284  */
5285 static int cik_pcie_gart_enable(struct radeon_device *rdev)
5286 {
5287 	int r, i;
5288 
5289 	if (rdev->gart.robj == NULL) {
5290 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
5291 		return -EINVAL;
5292 	}
5293 	r = radeon_gart_table_vram_pin(rdev);
5294 	if (r)
5295 		return r;
5296 	radeon_gart_restore(rdev);
5297 	/* Setup TLB control */
5298 	WREG32(MC_VM_MX_L1_TLB_CNTL,
5299 	       (0xA << 7) |
5300 	       ENABLE_L1_TLB |
5301 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5302 	       ENABLE_ADVANCED_DRIVER_MODEL |
5303 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5304 	/* Setup L2 cache */
5305 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
5306 	       ENABLE_L2_FRAGMENT_PROCESSING |
5307 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5308 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5309 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5310 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5311 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
5312 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5313 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5314 	/* setup context0 */
5315 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
5316 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
5317 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
5318 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
5319 			(u32)(rdev->dummy_page.addr >> 12));
5320 	WREG32(VM_CONTEXT0_CNTL2, 0);
5321 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
5322 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
5323 
5324 	WREG32(0x15D4, 0);
5325 	WREG32(0x15D8, 0);
5326 	WREG32(0x15DC, 0);
5327 
5328 	/* empty context1-15 */
5329 	/* FIXME start with 4G, once using 2 level pt switch to full
5330 	 * vm size space
5331 	 */
5332 	/* set vm size, must be a multiple of 4 */
5333 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
5334 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
5335 	for (i = 1; i < 16; i++) {
5336 		if (i < 8)
5337 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
5338 			       rdev->gart.table_addr >> 12);
5339 		else
5340 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
5341 			       rdev->gart.table_addr >> 12);
5342 	}
5343 
5344 	/* enable context1-15 */
5345 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
5346 	       (u32)(rdev->dummy_page.addr >> 12));
5347 	WREG32(VM_CONTEXT1_CNTL2, 4);
5348 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
5349 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5350 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5351 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5352 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
5353 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
5354 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
5355 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
5356 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
5357 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
5358 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
5359 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
5360 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
5361 
5362 	if (rdev->family == CHIP_KAVERI) {
5363 		u32 tmp = RREG32(CHUB_CONTROL);
5364 		tmp &= ~BYPASS_VM;
5365 		WREG32(CHUB_CONTROL, tmp);
5366 	}
5367 
5368 	/* XXX SH_MEM regs */
5369 	/* where to put LDS, scratch, GPUVM in FSA64 space */
5370 	mutex_lock(&rdev->srbm_mutex);
5371 	for (i = 0; i < 16; i++) {
5372 		cik_srbm_select(rdev, 0, 0, 0, i);
5373 		/* CP and shaders */
5374 		WREG32(SH_MEM_CONFIG, 0);
5375 		WREG32(SH_MEM_APE1_BASE, 1);
5376 		WREG32(SH_MEM_APE1_LIMIT, 0);
5377 		WREG32(SH_MEM_BASES, 0);
5378 		/* SDMA GFX */
5379 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
5380 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
5381 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
5382 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
5383 		/* XXX SDMA RLC - todo */
5384 	}
5385 	cik_srbm_select(rdev, 0, 0, 0, 0);
5386 	mutex_unlock(&rdev->srbm_mutex);
5387 
5388 	cik_pcie_gart_tlb_flush(rdev);
5389 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
5390 		 (unsigned)(rdev->mc.gtt_size >> 20),
5391 		 (unsigned long long)rdev->gart.table_addr);
5392 	rdev->gart.ready = true;
5393 	return 0;
5394 }
5395 
5396 /**
5397  * cik_pcie_gart_disable - gart disable
5398  *
5399  * @rdev: radeon_device pointer
5400  *
5401  * This disables all VM page table (CIK).
5402  */
5403 static void cik_pcie_gart_disable(struct radeon_device *rdev)
5404 {
5405 	/* Disable all tables */
5406 	WREG32(VM_CONTEXT0_CNTL, 0);
5407 	WREG32(VM_CONTEXT1_CNTL, 0);
5408 	/* Setup TLB control */
5409 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
5410 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
5411 	/* Setup L2 cache */
5412 	WREG32(VM_L2_CNTL,
5413 	       ENABLE_L2_FRAGMENT_PROCESSING |
5414 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
5415 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
5416 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
5417 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
5418 	WREG32(VM_L2_CNTL2, 0);
5419 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
5420 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
5421 	radeon_gart_table_vram_unpin(rdev);
5422 }
5423 
5424 /**
5425  * cik_pcie_gart_fini - vm fini callback
5426  *
5427  * @rdev: radeon_device pointer
5428  *
5429  * Tears down the driver GART/VM setup (CIK).
5430  */
5431 static void cik_pcie_gart_fini(struct radeon_device *rdev)
5432 {
5433 	cik_pcie_gart_disable(rdev);
5434 	radeon_gart_table_vram_free(rdev);
5435 	radeon_gart_fini(rdev);
5436 }
5437 
5438 /* vm parser */
5439 /**
5440  * cik_ib_parse - vm ib_parse callback
5441  *
5442  * @rdev: radeon_device pointer
5443  * @ib: indirect buffer pointer
5444  *
5445  * CIK uses hw IB checking so this is a nop (CIK).
5446  */
5447 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
5448 {
5449 	return 0;
5450 }
5451 
5452 /*
5453  * vm
5454  * VMID 0 is the physical GPU addresses as used by the kernel.
5455  * VMIDs 1-15 are used for userspace clients and are handled
5456  * by the radeon vm/hsa code.
5457  */
5458 /**
5459  * cik_vm_init - cik vm init callback
5460  *
5461  * @rdev: radeon_device pointer
5462  *
5463  * Inits cik specific vm parameters (number of VMs, base of vram for
5464  * VMIDs 1-15) (CIK).
5465  * Returns 0 for success.
5466  */
5467 int cik_vm_init(struct radeon_device *rdev)
5468 {
5469 	/* number of VMs */
5470 	rdev->vm_manager.nvm = 16;
5471 	/* base offset of vram pages */
5472 	if (rdev->flags & RADEON_IS_IGP) {
5473 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
5474 		tmp <<= 22;
5475 		rdev->vm_manager.vram_base_offset = tmp;
5476 	} else
5477 		rdev->vm_manager.vram_base_offset = 0;
5478 
5479 	return 0;
5480 }
5481 
5482 /**
5483  * cik_vm_fini - cik vm fini callback
5484  *
5485  * @rdev: radeon_device pointer
5486  *
5487  * Tear down any asic specific VM setup (CIK).
5488  */
5489 void cik_vm_fini(struct radeon_device *rdev)
5490 {
5491 }
5492 
5493 /**
5494  * cik_vm_decode_fault - print human readable fault info
5495  *
5496  * @rdev: radeon_device pointer
5497  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
5498  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
5499  *
5500  * Print human readable fault information (CIK).
5501  */
5502 static void cik_vm_decode_fault(struct radeon_device *rdev,
5503 				u32 status, u32 addr, u32 mc_client)
5504 {
5505 	u32 mc_id;
5506 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
5507 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
5508 	char block[5] = { mc_client >> 24, (mc_client >> 16) & 0xff,
5509 		(mc_client >> 8) & 0xff, mc_client & 0xff, 0 };
5510 
5511 	if (rdev->family == CHIP_HAWAII)
5512 		mc_id = (status & HAWAII_MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5513 	else
5514 		mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
5515 
5516 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from '%s' (0x%08x) (%d)\n",
5517 	       protections, vmid, addr,
5518 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5519 	       block, mc_client, mc_id);
5520 }
5521 
5522 /**
5523  * cik_vm_flush - cik vm flush using the CP
5524  *
5525  * @rdev: radeon_device pointer
5526  *
5527  * Update the page table base and flush the VM TLB
5528  * using the CP (CIK).
5529  */
5530 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
5531 {
5532 	struct radeon_ring *ring = &rdev->ring[ridx];
5533 
5534 	if (vm == NULL)
5535 		return;
5536 
5537 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5538 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5539 				 WRITE_DATA_DST_SEL(0)));
5540 	if (vm->id < 8) {
5541 		radeon_ring_write(ring,
5542 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
5543 	} else {
5544 		radeon_ring_write(ring,
5545 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
5546 	}
5547 	radeon_ring_write(ring, 0);
5548 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
5549 
5550 	/* update SH_MEM_* regs */
5551 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5552 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5553 				 WRITE_DATA_DST_SEL(0)));
5554 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5555 	radeon_ring_write(ring, 0);
5556 	radeon_ring_write(ring, VMID(vm->id));
5557 
5558 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
5559 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5560 				 WRITE_DATA_DST_SEL(0)));
5561 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
5562 	radeon_ring_write(ring, 0);
5563 
5564 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
5565 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
5566 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
5567 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
5568 
5569 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5570 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5571 				 WRITE_DATA_DST_SEL(0)));
5572 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
5573 	radeon_ring_write(ring, 0);
5574 	radeon_ring_write(ring, VMID(0));
5575 
5576 	/* HDP flush */
5577 	cik_hdp_flush_cp_ring_emit(rdev, ridx);
5578 
5579 	/* bits 0-15 are the VM contexts0-15 */
5580 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5581 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
5582 				 WRITE_DATA_DST_SEL(0)));
5583 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5584 	radeon_ring_write(ring, 0);
5585 	radeon_ring_write(ring, 1 << vm->id);
5586 
5587 	/* compute doesn't have PFP */
5588 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
5589 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
5590 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5591 		radeon_ring_write(ring, 0x0);
5592 	}
5593 }
5594 
5595 /*
5596  * RLC
5597  * The RLC is a multi-purpose microengine that handles a
5598  * variety of functions, the most important of which is
5599  * the interrupt controller.
5600  */
5601 static void cik_enable_gui_idle_interrupt(struct radeon_device *rdev,
5602 					  bool enable)
5603 {
5604 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5605 
5606 	if (enable)
5607 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5608 	else
5609 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5610 	WREG32(CP_INT_CNTL_RING0, tmp);
5611 }
5612 
5613 static void cik_enable_lbpw(struct radeon_device *rdev, bool enable)
5614 {
5615 	u32 tmp;
5616 
5617 	tmp = RREG32(RLC_LB_CNTL);
5618 	if (enable)
5619 		tmp |= LOAD_BALANCE_ENABLE;
5620 	else
5621 		tmp &= ~LOAD_BALANCE_ENABLE;
5622 	WREG32(RLC_LB_CNTL, tmp);
5623 }
5624 
5625 static void cik_wait_for_rlc_serdes(struct radeon_device *rdev)
5626 {
5627 	u32 i, j, k;
5628 	u32 mask;
5629 
5630 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
5631 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
5632 			cik_select_se_sh(rdev, i, j);
5633 			for (k = 0; k < rdev->usec_timeout; k++) {
5634 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
5635 					break;
5636 				udelay(1);
5637 			}
5638 		}
5639 	}
5640 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5641 
5642 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
5643 	for (k = 0; k < rdev->usec_timeout; k++) {
5644 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
5645 			break;
5646 		udelay(1);
5647 	}
5648 }
5649 
5650 static void cik_update_rlc(struct radeon_device *rdev, u32 rlc)
5651 {
5652 	u32 tmp;
5653 
5654 	tmp = RREG32(RLC_CNTL);
5655 	if (tmp != rlc)
5656 		WREG32(RLC_CNTL, rlc);
5657 }
5658 
5659 static u32 cik_halt_rlc(struct radeon_device *rdev)
5660 {
5661 	u32 data, orig;
5662 
5663 	orig = data = RREG32(RLC_CNTL);
5664 
5665 	if (data & RLC_ENABLE) {
5666 		u32 i;
5667 
5668 		data &= ~RLC_ENABLE;
5669 		WREG32(RLC_CNTL, data);
5670 
5671 		for (i = 0; i < rdev->usec_timeout; i++) {
5672 			if ((RREG32(RLC_GPM_STAT) & RLC_GPM_BUSY) == 0)
5673 				break;
5674 			udelay(1);
5675 		}
5676 
5677 		cik_wait_for_rlc_serdes(rdev);
5678 	}
5679 
5680 	return orig;
5681 }
5682 
5683 void cik_enter_rlc_safe_mode(struct radeon_device *rdev)
5684 {
5685 	u32 tmp, i, mask;
5686 
5687 	tmp = REQ | MESSAGE(MSG_ENTER_RLC_SAFE_MODE);
5688 	WREG32(RLC_GPR_REG2, tmp);
5689 
5690 	mask = GFX_POWER_STATUS | GFX_CLOCK_STATUS;
5691 	for (i = 0; i < rdev->usec_timeout; i++) {
5692 		if ((RREG32(RLC_GPM_STAT) & mask) == mask)
5693 			break;
5694 		udelay(1);
5695 	}
5696 
5697 	for (i = 0; i < rdev->usec_timeout; i++) {
5698 		if ((RREG32(RLC_GPR_REG2) & REQ) == 0)
5699 			break;
5700 		udelay(1);
5701 	}
5702 }
5703 
5704 void cik_exit_rlc_safe_mode(struct radeon_device *rdev)
5705 {
5706 	u32 tmp;
5707 
5708 	tmp = REQ | MESSAGE(MSG_EXIT_RLC_SAFE_MODE);
5709 	WREG32(RLC_GPR_REG2, tmp);
5710 }
5711 
5712 /**
5713  * cik_rlc_stop - stop the RLC ME
5714  *
5715  * @rdev: radeon_device pointer
5716  *
5717  * Halt the RLC ME (MicroEngine) (CIK).
5718  */
5719 static void cik_rlc_stop(struct radeon_device *rdev)
5720 {
5721 	WREG32(RLC_CNTL, 0);
5722 
5723 	cik_enable_gui_idle_interrupt(rdev, false);
5724 
5725 	cik_wait_for_rlc_serdes(rdev);
5726 }
5727 
5728 /**
5729  * cik_rlc_start - start the RLC ME
5730  *
5731  * @rdev: radeon_device pointer
5732  *
5733  * Unhalt the RLC ME (MicroEngine) (CIK).
5734  */
5735 static void cik_rlc_start(struct radeon_device *rdev)
5736 {
5737 	WREG32(RLC_CNTL, RLC_ENABLE);
5738 
5739 	cik_enable_gui_idle_interrupt(rdev, true);
5740 
5741 	udelay(50);
5742 }
5743 
5744 /**
5745  * cik_rlc_resume - setup the RLC hw
5746  *
5747  * @rdev: radeon_device pointer
5748  *
5749  * Initialize the RLC registers, load the ucode,
5750  * and start the RLC (CIK).
5751  * Returns 0 for success, -EINVAL if the ucode is not available.
5752  */
5753 static int cik_rlc_resume(struct radeon_device *rdev)
5754 {
5755 	u32 i, size, tmp;
5756 	const __be32 *fw_data;
5757 
5758 	if (!rdev->rlc_fw)
5759 		return -EINVAL;
5760 
5761 	switch (rdev->family) {
5762 	case CHIP_BONAIRE:
5763 	case CHIP_HAWAII:
5764 	default:
5765 		size = BONAIRE_RLC_UCODE_SIZE;
5766 		break;
5767 	case CHIP_KAVERI:
5768 		size = KV_RLC_UCODE_SIZE;
5769 		break;
5770 	case CHIP_KABINI:
5771 		size = KB_RLC_UCODE_SIZE;
5772 		break;
5773 	}
5774 
5775 	cik_rlc_stop(rdev);
5776 
5777 	/* disable CG */
5778 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
5779 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
5780 
5781 	si_rlc_reset(rdev);
5782 
5783 	cik_init_pg(rdev);
5784 
5785 	cik_init_cg(rdev);
5786 
5787 	WREG32(RLC_LB_CNTR_INIT, 0);
5788 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
5789 
5790 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5791 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5792 	WREG32(RLC_LB_PARAMS, 0x00600408);
5793 	WREG32(RLC_LB_CNTL, 0x80000004);
5794 
5795 	WREG32(RLC_MC_CNTL, 0);
5796 	WREG32(RLC_UCODE_CNTL, 0);
5797 
5798 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5799 		WREG32(RLC_GPM_UCODE_ADDR, 0);
5800 	for (i = 0; i < size; i++)
5801 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
5802 	WREG32(RLC_GPM_UCODE_ADDR, 0);
5803 
5804 	/* XXX - find out what chips support lbpw */
5805 	cik_enable_lbpw(rdev, false);
5806 
5807 	if (rdev->family == CHIP_BONAIRE)
5808 		WREG32(RLC_DRIVER_DMA_STATUS, 0);
5809 
5810 	cik_rlc_start(rdev);
5811 
5812 	return 0;
5813 }
5814 
5815 static void cik_enable_cgcg(struct radeon_device *rdev, bool enable)
5816 {
5817 	u32 data, orig, tmp, tmp2;
5818 
5819 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5820 
5821 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5822 		cik_enable_gui_idle_interrupt(rdev, true);
5823 
5824 		tmp = cik_halt_rlc(rdev);
5825 
5826 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5827 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5828 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5829 		tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE;
5830 		WREG32(RLC_SERDES_WR_CTRL, tmp2);
5831 
5832 		cik_update_rlc(rdev, tmp);
5833 
5834 		data |= CGCG_EN | CGLS_EN;
5835 	} else {
5836 		cik_enable_gui_idle_interrupt(rdev, false);
5837 
5838 		RREG32(CB_CGTT_SCLK_CTRL);
5839 		RREG32(CB_CGTT_SCLK_CTRL);
5840 		RREG32(CB_CGTT_SCLK_CTRL);
5841 		RREG32(CB_CGTT_SCLK_CTRL);
5842 
5843 		data &= ~(CGCG_EN | CGLS_EN);
5844 	}
5845 
5846 	if (orig != data)
5847 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5848 
5849 }
5850 
5851 static void cik_enable_mgcg(struct radeon_device *rdev, bool enable)
5852 {
5853 	u32 data, orig, tmp = 0;
5854 
5855 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5856 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) {
5857 			if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5858 				orig = data = RREG32(CP_MEM_SLP_CNTL);
5859 				data |= CP_MEM_LS_EN;
5860 				if (orig != data)
5861 					WREG32(CP_MEM_SLP_CNTL, data);
5862 			}
5863 		}
5864 
5865 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5866 		data &= 0xfffffffd;
5867 		if (orig != data)
5868 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5869 
5870 		tmp = cik_halt_rlc(rdev);
5871 
5872 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5873 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5874 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5875 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_0;
5876 		WREG32(RLC_SERDES_WR_CTRL, data);
5877 
5878 		cik_update_rlc(rdev, tmp);
5879 
5880 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS) {
5881 			orig = data = RREG32(CGTS_SM_CTRL_REG);
5882 			data &= ~SM_MODE_MASK;
5883 			data |= SM_MODE(0x2);
5884 			data |= SM_MODE_ENABLE;
5885 			data &= ~CGTS_OVERRIDE;
5886 			if ((rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGLS) &&
5887 			    (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGTS_LS))
5888 				data &= ~CGTS_LS_OVERRIDE;
5889 			data &= ~ON_MONITOR_ADD_MASK;
5890 			data |= ON_MONITOR_ADD_EN;
5891 			data |= ON_MONITOR_ADD(0x96);
5892 			if (orig != data)
5893 				WREG32(CGTS_SM_CTRL_REG, data);
5894 		}
5895 	} else {
5896 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5897 		data |= 0x00000002;
5898 		if (orig != data)
5899 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5900 
5901 		data = RREG32(RLC_MEM_SLP_CNTL);
5902 		if (data & RLC_MEM_LS_EN) {
5903 			data &= ~RLC_MEM_LS_EN;
5904 			WREG32(RLC_MEM_SLP_CNTL, data);
5905 		}
5906 
5907 		data = RREG32(CP_MEM_SLP_CNTL);
5908 		if (data & CP_MEM_LS_EN) {
5909 			data &= ~CP_MEM_LS_EN;
5910 			WREG32(CP_MEM_SLP_CNTL, data);
5911 		}
5912 
5913 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5914 		data |= CGTS_OVERRIDE | CGTS_LS_OVERRIDE;
5915 		if (orig != data)
5916 			WREG32(CGTS_SM_CTRL_REG, data);
5917 
5918 		tmp = cik_halt_rlc(rdev);
5919 
5920 		cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5921 		WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
5922 		WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
5923 		data = BPM_ADDR_MASK | MGCG_OVERRIDE_1;
5924 		WREG32(RLC_SERDES_WR_CTRL, data);
5925 
5926 		cik_update_rlc(rdev, tmp);
5927 	}
5928 }
5929 
5930 static const u32 mc_cg_registers[] =
5931 {
5932 	MC_HUB_MISC_HUB_CG,
5933 	MC_HUB_MISC_SIP_CG,
5934 	MC_HUB_MISC_VM_CG,
5935 	MC_XPB_CLK_GAT,
5936 	ATC_MISC_CG,
5937 	MC_CITF_MISC_WR_CG,
5938 	MC_CITF_MISC_RD_CG,
5939 	MC_CITF_MISC_VM_CG,
5940 	VM_L2_CG,
5941 };
5942 
5943 static void cik_enable_mc_ls(struct radeon_device *rdev,
5944 			     bool enable)
5945 {
5946 	int i;
5947 	u32 orig, data;
5948 
5949 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5950 		orig = data = RREG32(mc_cg_registers[i]);
5951 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5952 			data |= MC_LS_ENABLE;
5953 		else
5954 			data &= ~MC_LS_ENABLE;
5955 		if (data != orig)
5956 			WREG32(mc_cg_registers[i], data);
5957 	}
5958 }
5959 
5960 static void cik_enable_mc_mgcg(struct radeon_device *rdev,
5961 			       bool enable)
5962 {
5963 	int i;
5964 	u32 orig, data;
5965 
5966 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5967 		orig = data = RREG32(mc_cg_registers[i]);
5968 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5969 			data |= MC_CG_ENABLE;
5970 		else
5971 			data &= ~MC_CG_ENABLE;
5972 		if (data != orig)
5973 			WREG32(mc_cg_registers[i], data);
5974 	}
5975 }
5976 
5977 static void cik_enable_sdma_mgcg(struct radeon_device *rdev,
5978 				 bool enable)
5979 {
5980 	u32 orig, data;
5981 
5982 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5983 		WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
5984 		WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
5985 	} else {
5986 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET);
5987 		data |= 0xff000000;
5988 		if (data != orig)
5989 			WREG32(SDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, data);
5990 
5991 		orig = data = RREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET);
5992 		data |= 0xff000000;
5993 		if (data != orig)
5994 			WREG32(SDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, data);
5995 	}
5996 }
5997 
5998 static void cik_enable_sdma_mgls(struct radeon_device *rdev,
5999 				 bool enable)
6000 {
6001 	u32 orig, data;
6002 
6003 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_LS)) {
6004 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6005 		data |= 0x100;
6006 		if (orig != data)
6007 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6008 
6009 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6010 		data |= 0x100;
6011 		if (orig != data)
6012 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6013 	} else {
6014 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
6015 		data &= ~0x100;
6016 		if (orig != data)
6017 			WREG32(SDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET, data);
6018 
6019 		orig = data = RREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET);
6020 		data &= ~0x100;
6021 		if (orig != data)
6022 			WREG32(SDMA0_POWER_CNTL + SDMA1_REGISTER_OFFSET, data);
6023 	}
6024 }
6025 
6026 static void cik_enable_uvd_mgcg(struct radeon_device *rdev,
6027 				bool enable)
6028 {
6029 	u32 orig, data;
6030 
6031 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
6032 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6033 		data = 0xfff;
6034 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6035 
6036 		orig = data = RREG32(UVD_CGC_CTRL);
6037 		data |= DCM;
6038 		if (orig != data)
6039 			WREG32(UVD_CGC_CTRL, data);
6040 	} else {
6041 		data = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
6042 		data &= ~0xfff;
6043 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, data);
6044 
6045 		orig = data = RREG32(UVD_CGC_CTRL);
6046 		data &= ~DCM;
6047 		if (orig != data)
6048 			WREG32(UVD_CGC_CTRL, data);
6049 	}
6050 }
6051 
6052 static void cik_enable_bif_mgls(struct radeon_device *rdev,
6053 			       bool enable)
6054 {
6055 	u32 orig, data;
6056 
6057 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
6058 
6059 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
6060 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
6061 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
6062 	else
6063 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
6064 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
6065 
6066 	if (orig != data)
6067 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
6068 }
6069 
6070 static void cik_enable_hdp_mgcg(struct radeon_device *rdev,
6071 				bool enable)
6072 {
6073 	u32 orig, data;
6074 
6075 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
6076 
6077 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
6078 		data &= ~CLOCK_GATING_DIS;
6079 	else
6080 		data |= CLOCK_GATING_DIS;
6081 
6082 	if (orig != data)
6083 		WREG32(HDP_HOST_PATH_CNTL, data);
6084 }
6085 
6086 static void cik_enable_hdp_ls(struct radeon_device *rdev,
6087 			      bool enable)
6088 {
6089 	u32 orig, data;
6090 
6091 	orig = data = RREG32(HDP_MEM_POWER_LS);
6092 
6093 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
6094 		data |= HDP_LS_ENABLE;
6095 	else
6096 		data &= ~HDP_LS_ENABLE;
6097 
6098 	if (orig != data)
6099 		WREG32(HDP_MEM_POWER_LS, data);
6100 }
6101 
6102 void cik_update_cg(struct radeon_device *rdev,
6103 		   u32 block, bool enable)
6104 {
6105 
6106 	if (block & RADEON_CG_BLOCK_GFX) {
6107 		cik_enable_gui_idle_interrupt(rdev, false);
6108 		/* order matters! */
6109 		if (enable) {
6110 			cik_enable_mgcg(rdev, true);
6111 			cik_enable_cgcg(rdev, true);
6112 		} else {
6113 			cik_enable_cgcg(rdev, false);
6114 			cik_enable_mgcg(rdev, false);
6115 		}
6116 		cik_enable_gui_idle_interrupt(rdev, true);
6117 	}
6118 
6119 	if (block & RADEON_CG_BLOCK_MC) {
6120 		if (!(rdev->flags & RADEON_IS_IGP)) {
6121 			cik_enable_mc_mgcg(rdev, enable);
6122 			cik_enable_mc_ls(rdev, enable);
6123 		}
6124 	}
6125 
6126 	if (block & RADEON_CG_BLOCK_SDMA) {
6127 		cik_enable_sdma_mgcg(rdev, enable);
6128 		cik_enable_sdma_mgls(rdev, enable);
6129 	}
6130 
6131 	if (block & RADEON_CG_BLOCK_BIF) {
6132 		cik_enable_bif_mgls(rdev, enable);
6133 	}
6134 
6135 	if (block & RADEON_CG_BLOCK_UVD) {
6136 		if (rdev->has_uvd)
6137 			cik_enable_uvd_mgcg(rdev, enable);
6138 	}
6139 
6140 	if (block & RADEON_CG_BLOCK_HDP) {
6141 		cik_enable_hdp_mgcg(rdev, enable);
6142 		cik_enable_hdp_ls(rdev, enable);
6143 	}
6144 }
6145 
6146 static void cik_init_cg(struct radeon_device *rdev)
6147 {
6148 
6149 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, true);
6150 
6151 	if (rdev->has_uvd)
6152 		si_init_uvd_internal_cg(rdev);
6153 
6154 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6155 			     RADEON_CG_BLOCK_SDMA |
6156 			     RADEON_CG_BLOCK_BIF |
6157 			     RADEON_CG_BLOCK_UVD |
6158 			     RADEON_CG_BLOCK_HDP), true);
6159 }
6160 
6161 static void cik_fini_cg(struct radeon_device *rdev)
6162 {
6163 	cik_update_cg(rdev, (RADEON_CG_BLOCK_MC |
6164 			     RADEON_CG_BLOCK_SDMA |
6165 			     RADEON_CG_BLOCK_BIF |
6166 			     RADEON_CG_BLOCK_UVD |
6167 			     RADEON_CG_BLOCK_HDP), false);
6168 
6169 	cik_update_cg(rdev, RADEON_CG_BLOCK_GFX, false);
6170 }
6171 
6172 static void cik_enable_sck_slowdown_on_pu(struct radeon_device *rdev,
6173 					  bool enable)
6174 {
6175 	u32 data, orig;
6176 
6177 	orig = data = RREG32(RLC_PG_CNTL);
6178 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6179 		data |= SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6180 	else
6181 		data &= ~SMU_CLK_SLOWDOWN_ON_PU_ENABLE;
6182 	if (orig != data)
6183 		WREG32(RLC_PG_CNTL, data);
6184 }
6185 
6186 static void cik_enable_sck_slowdown_on_pd(struct radeon_device *rdev,
6187 					  bool enable)
6188 {
6189 	u32 data, orig;
6190 
6191 	orig = data = RREG32(RLC_PG_CNTL);
6192 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_RLC_SMU_HS))
6193 		data |= SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6194 	else
6195 		data &= ~SMU_CLK_SLOWDOWN_ON_PD_ENABLE;
6196 	if (orig != data)
6197 		WREG32(RLC_PG_CNTL, data);
6198 }
6199 
6200 static void cik_enable_cp_pg(struct radeon_device *rdev, bool enable)
6201 {
6202 	u32 data, orig;
6203 
6204 	orig = data = RREG32(RLC_PG_CNTL);
6205 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_CP))
6206 		data &= ~DISABLE_CP_PG;
6207 	else
6208 		data |= DISABLE_CP_PG;
6209 	if (orig != data)
6210 		WREG32(RLC_PG_CNTL, data);
6211 }
6212 
6213 static void cik_enable_gds_pg(struct radeon_device *rdev, bool enable)
6214 {
6215 	u32 data, orig;
6216 
6217 	orig = data = RREG32(RLC_PG_CNTL);
6218 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GDS))
6219 		data &= ~DISABLE_GDS_PG;
6220 	else
6221 		data |= DISABLE_GDS_PG;
6222 	if (orig != data)
6223 		WREG32(RLC_PG_CNTL, data);
6224 }
6225 
6226 #define CP_ME_TABLE_SIZE    96
6227 #define CP_ME_TABLE_OFFSET  2048
6228 #define CP_MEC_TABLE_OFFSET 4096
6229 
6230 void cik_init_cp_pg_table(struct radeon_device *rdev)
6231 {
6232 	const __be32 *fw_data;
6233 	volatile u32 *dst_ptr;
6234 	int me, i, max_me = 4;
6235 	u32 bo_offset = 0;
6236 	u32 table_offset;
6237 
6238 	if (rdev->family == CHIP_KAVERI)
6239 		max_me = 5;
6240 
6241 	if (rdev->rlc.cp_table_ptr == NULL)
6242 		return;
6243 
6244 	/* write the cp table buffer */
6245 	dst_ptr = rdev->rlc.cp_table_ptr;
6246 	for (me = 0; me < max_me; me++) {
6247 		if (me == 0) {
6248 			fw_data = (const __be32 *)rdev->ce_fw->data;
6249 			table_offset = CP_ME_TABLE_OFFSET;
6250 		} else if (me == 1) {
6251 			fw_data = (const __be32 *)rdev->pfp_fw->data;
6252 			table_offset = CP_ME_TABLE_OFFSET;
6253 		} else if (me == 2) {
6254 			fw_data = (const __be32 *)rdev->me_fw->data;
6255 			table_offset = CP_ME_TABLE_OFFSET;
6256 		} else {
6257 			fw_data = (const __be32 *)rdev->mec_fw->data;
6258 			table_offset = CP_MEC_TABLE_OFFSET;
6259 		}
6260 
6261 		for (i = 0; i < CP_ME_TABLE_SIZE; i ++) {
6262 			dst_ptr[bo_offset + i] = cpu_to_le32(be32_to_cpu(fw_data[table_offset + i]));
6263 		}
6264 		bo_offset += CP_ME_TABLE_SIZE;
6265 	}
6266 }
6267 
6268 static void cik_enable_gfx_cgpg(struct radeon_device *rdev,
6269 				bool enable)
6270 {
6271 	u32 data, orig;
6272 
6273 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
6274 		orig = data = RREG32(RLC_PG_CNTL);
6275 		data |= GFX_PG_ENABLE;
6276 		if (orig != data)
6277 			WREG32(RLC_PG_CNTL, data);
6278 
6279 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6280 		data |= AUTO_PG_EN;
6281 		if (orig != data)
6282 			WREG32(RLC_AUTO_PG_CTRL, data);
6283 	} else {
6284 		orig = data = RREG32(RLC_PG_CNTL);
6285 		data &= ~GFX_PG_ENABLE;
6286 		if (orig != data)
6287 			WREG32(RLC_PG_CNTL, data);
6288 
6289 		orig = data = RREG32(RLC_AUTO_PG_CTRL);
6290 		data &= ~AUTO_PG_EN;
6291 		if (orig != data)
6292 			WREG32(RLC_AUTO_PG_CTRL, data);
6293 
6294 		data = RREG32(DB_RENDER_CONTROL);
6295 	}
6296 }
6297 
6298 static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
6299 {
6300 	u32 mask = 0, tmp, tmp1;
6301 	int i;
6302 
6303 	cik_select_se_sh(rdev, se, sh);
6304 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
6305 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
6306 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
6307 
6308 	tmp &= 0xffff0000;
6309 
6310 	tmp |= tmp1;
6311 	tmp >>= 16;
6312 
6313 	for (i = 0; i < rdev->config.cik.max_cu_per_sh; i ++) {
6314 		mask <<= 1;
6315 		mask |= 1;
6316 	}
6317 
6318 	return (~tmp) & mask;
6319 }
6320 
6321 static void cik_init_ao_cu_mask(struct radeon_device *rdev)
6322 {
6323 	u32 i, j, k, active_cu_number = 0;
6324 	u32 mask, counter, cu_bitmap;
6325 	u32 tmp = 0;
6326 
6327 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
6328 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
6329 			mask = 1;
6330 			cu_bitmap = 0;
6331 			counter = 0;
6332 			for (k = 0; k < rdev->config.cik.max_cu_per_sh; k ++) {
6333 				if (cik_get_cu_active_bitmap(rdev, i, j) & mask) {
6334 					if (counter < 2)
6335 						cu_bitmap |= mask;
6336 					counter ++;
6337 				}
6338 				mask <<= 1;
6339 			}
6340 
6341 			active_cu_number += counter;
6342 			tmp |= (cu_bitmap << (i * 16 + j * 8));
6343 		}
6344 	}
6345 
6346 	WREG32(RLC_PG_AO_CU_MASK, tmp);
6347 
6348 	tmp = RREG32(RLC_MAX_PG_CU);
6349 	tmp &= ~MAX_PU_CU_MASK;
6350 	tmp |= MAX_PU_CU(active_cu_number);
6351 	WREG32(RLC_MAX_PG_CU, tmp);
6352 }
6353 
6354 static void cik_enable_gfx_static_mgpg(struct radeon_device *rdev,
6355 				       bool enable)
6356 {
6357 	u32 data, orig;
6358 
6359 	orig = data = RREG32(RLC_PG_CNTL);
6360 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_SMG))
6361 		data |= STATIC_PER_CU_PG_ENABLE;
6362 	else
6363 		data &= ~STATIC_PER_CU_PG_ENABLE;
6364 	if (orig != data)
6365 		WREG32(RLC_PG_CNTL, data);
6366 }
6367 
6368 static void cik_enable_gfx_dynamic_mgpg(struct radeon_device *rdev,
6369 					bool enable)
6370 {
6371 	u32 data, orig;
6372 
6373 	orig = data = RREG32(RLC_PG_CNTL);
6374 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_DMG))
6375 		data |= DYN_PER_CU_PG_ENABLE;
6376 	else
6377 		data &= ~DYN_PER_CU_PG_ENABLE;
6378 	if (orig != data)
6379 		WREG32(RLC_PG_CNTL, data);
6380 }
6381 
6382 #define RLC_SAVE_AND_RESTORE_STARTING_OFFSET 0x90
6383 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
6384 
6385 static void cik_init_gfx_cgpg(struct radeon_device *rdev)
6386 {
6387 	u32 data, orig;
6388 	u32 i;
6389 
6390 	if (rdev->rlc.cs_data) {
6391 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6392 		WREG32(RLC_GPM_SCRATCH_DATA, upper_32_bits(rdev->rlc.clear_state_gpu_addr));
6393 		WREG32(RLC_GPM_SCRATCH_DATA, lower_32_bits(rdev->rlc.clear_state_gpu_addr));
6394 		WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.clear_state_size);
6395 	} else {
6396 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_CLEAR_STATE_DESCRIPTOR_OFFSET);
6397 		for (i = 0; i < 3; i++)
6398 			WREG32(RLC_GPM_SCRATCH_DATA, 0);
6399 	}
6400 	if (rdev->rlc.reg_list) {
6401 		WREG32(RLC_GPM_SCRATCH_ADDR, RLC_SAVE_AND_RESTORE_STARTING_OFFSET);
6402 		for (i = 0; i < rdev->rlc.reg_list_size; i++)
6403 			WREG32(RLC_GPM_SCRATCH_DATA, rdev->rlc.reg_list[i]);
6404 	}
6405 
6406 	orig = data = RREG32(RLC_PG_CNTL);
6407 	data |= GFX_PG_SRC;
6408 	if (orig != data)
6409 		WREG32(RLC_PG_CNTL, data);
6410 
6411 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
6412 	WREG32(RLC_CP_TABLE_RESTORE, rdev->rlc.cp_table_gpu_addr >> 8);
6413 
6414 	data = RREG32(CP_RB_WPTR_POLL_CNTL);
6415 	data &= ~IDLE_POLL_COUNT_MASK;
6416 	data |= IDLE_POLL_COUNT(0x60);
6417 	WREG32(CP_RB_WPTR_POLL_CNTL, data);
6418 
6419 	data = 0x10101010;
6420 	WREG32(RLC_PG_DELAY, data);
6421 
6422 	data = RREG32(RLC_PG_DELAY_2);
6423 	data &= ~0xff;
6424 	data |= 0x3;
6425 	WREG32(RLC_PG_DELAY_2, data);
6426 
6427 	data = RREG32(RLC_AUTO_PG_CTRL);
6428 	data &= ~GRBM_REG_SGIT_MASK;
6429 	data |= GRBM_REG_SGIT(0x700);
6430 	WREG32(RLC_AUTO_PG_CTRL, data);
6431 
6432 }
6433 
6434 static void cik_update_gfx_pg(struct radeon_device *rdev, bool enable)
6435 {
6436 	cik_enable_gfx_cgpg(rdev, enable);
6437 	cik_enable_gfx_static_mgpg(rdev, enable);
6438 	cik_enable_gfx_dynamic_mgpg(rdev, enable);
6439 }
6440 
6441 u32 cik_get_csb_size(struct radeon_device *rdev)
6442 {
6443 	u32 count = 0;
6444 	const struct cs_section_def *sect = NULL;
6445 	const struct cs_extent_def *ext = NULL;
6446 
6447 	if (rdev->rlc.cs_data == NULL)
6448 		return 0;
6449 
6450 	/* begin clear state */
6451 	count += 2;
6452 	/* context control state */
6453 	count += 3;
6454 
6455 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6456 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6457 			if (sect->id == SECT_CONTEXT)
6458 				count += 2 + ext->reg_count;
6459 			else
6460 				return 0;
6461 		}
6462 	}
6463 	/* pa_sc_raster_config/pa_sc_raster_config1 */
6464 	count += 4;
6465 	/* end clear state */
6466 	count += 2;
6467 	/* clear state */
6468 	count += 2;
6469 
6470 	return count;
6471 }
6472 
6473 void cik_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
6474 {
6475 	u32 count = 0, i;
6476 	const struct cs_section_def *sect = NULL;
6477 	const struct cs_extent_def *ext = NULL;
6478 
6479 	if (rdev->rlc.cs_data == NULL)
6480 		return;
6481 	if (buffer == NULL)
6482 		return;
6483 
6484 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6485 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
6486 
6487 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
6488 	buffer[count++] = cpu_to_le32(0x80000000);
6489 	buffer[count++] = cpu_to_le32(0x80000000);
6490 
6491 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
6492 		for (ext = sect->section; ext->extent != NULL; ++ext) {
6493 			if (sect->id == SECT_CONTEXT) {
6494 				buffer[count++] =
6495 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
6496 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
6497 				for (i = 0; i < ext->reg_count; i++)
6498 					buffer[count++] = cpu_to_le32(ext->extent[i]);
6499 			} else {
6500 				return;
6501 			}
6502 		}
6503 	}
6504 
6505 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 2));
6506 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
6507 	switch (rdev->family) {
6508 	case CHIP_BONAIRE:
6509 		buffer[count++] = cpu_to_le32(0x16000012);
6510 		buffer[count++] = cpu_to_le32(0x00000000);
6511 		break;
6512 	case CHIP_KAVERI:
6513 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6514 		buffer[count++] = cpu_to_le32(0x00000000);
6515 		break;
6516 	case CHIP_KABINI:
6517 		buffer[count++] = cpu_to_le32(0x00000000); /* XXX */
6518 		buffer[count++] = cpu_to_le32(0x00000000);
6519 		break;
6520 	case CHIP_HAWAII:
6521 		buffer[count++] = 0x3a00161a;
6522 		buffer[count++] = 0x0000002e;
6523 		break;
6524 	default:
6525 		buffer[count++] = cpu_to_le32(0x00000000);
6526 		buffer[count++] = cpu_to_le32(0x00000000);
6527 		break;
6528 	}
6529 
6530 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
6531 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
6532 
6533 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
6534 	buffer[count++] = cpu_to_le32(0);
6535 }
6536 
6537 static void cik_init_pg(struct radeon_device *rdev)
6538 {
6539 	if (rdev->pg_flags) {
6540 		cik_enable_sck_slowdown_on_pu(rdev, true);
6541 		cik_enable_sck_slowdown_on_pd(rdev, true);
6542 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6543 			cik_init_gfx_cgpg(rdev);
6544 			cik_enable_cp_pg(rdev, true);
6545 			cik_enable_gds_pg(rdev, true);
6546 		}
6547 		cik_init_ao_cu_mask(rdev);
6548 		cik_update_gfx_pg(rdev, true);
6549 	}
6550 }
6551 
6552 static void cik_fini_pg(struct radeon_device *rdev)
6553 {
6554 	if (rdev->pg_flags) {
6555 		cik_update_gfx_pg(rdev, false);
6556 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
6557 			cik_enable_cp_pg(rdev, false);
6558 			cik_enable_gds_pg(rdev, false);
6559 		}
6560 	}
6561 }
6562 
6563 /*
6564  * Interrupts
6565  * Starting with r6xx, interrupts are handled via a ring buffer.
6566  * Ring buffers are areas of GPU accessible memory that the GPU
6567  * writes interrupt vectors into and the host reads vectors out of.
6568  * There is a rptr (read pointer) that determines where the
6569  * host is currently reading, and a wptr (write pointer)
6570  * which determines where the GPU has written.  When the
6571  * pointers are equal, the ring is idle.  When the GPU
6572  * writes vectors to the ring buffer, it increments the
6573  * wptr.  When there is an interrupt, the host then starts
6574  * fetching commands and processing them until the pointers are
6575  * equal again at which point it updates the rptr.
6576  */
6577 
6578 /**
6579  * cik_enable_interrupts - Enable the interrupt ring buffer
6580  *
6581  * @rdev: radeon_device pointer
6582  *
6583  * Enable the interrupt ring buffer (CIK).
6584  */
6585 static void cik_enable_interrupts(struct radeon_device *rdev)
6586 {
6587 	u32 ih_cntl = RREG32(IH_CNTL);
6588 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6589 
6590 	ih_cntl |= ENABLE_INTR;
6591 	ih_rb_cntl |= IH_RB_ENABLE;
6592 	WREG32(IH_CNTL, ih_cntl);
6593 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6594 	rdev->ih.enabled = true;
6595 }
6596 
6597 /**
6598  * cik_disable_interrupts - Disable the interrupt ring buffer
6599  *
6600  * @rdev: radeon_device pointer
6601  *
6602  * Disable the interrupt ring buffer (CIK).
6603  */
6604 static void cik_disable_interrupts(struct radeon_device *rdev)
6605 {
6606 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
6607 	u32 ih_cntl = RREG32(IH_CNTL);
6608 
6609 	ih_rb_cntl &= ~IH_RB_ENABLE;
6610 	ih_cntl &= ~ENABLE_INTR;
6611 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6612 	WREG32(IH_CNTL, ih_cntl);
6613 	/* set rptr, wptr to 0 */
6614 	WREG32(IH_RB_RPTR, 0);
6615 	WREG32(IH_RB_WPTR, 0);
6616 	rdev->ih.enabled = false;
6617 	rdev->ih.rptr = 0;
6618 }
6619 
6620 /**
6621  * cik_disable_interrupt_state - Disable all interrupt sources
6622  *
6623  * @rdev: radeon_device pointer
6624  *
6625  * Clear all interrupt enable bits used by the driver (CIK).
6626  */
6627 static void cik_disable_interrupt_state(struct radeon_device *rdev)
6628 {
6629 	u32 tmp;
6630 
6631 	/* gfx ring */
6632 	tmp = RREG32(CP_INT_CNTL_RING0) &
6633 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6634 	WREG32(CP_INT_CNTL_RING0, tmp);
6635 	/* sdma */
6636 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6637 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
6638 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6639 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
6640 	/* compute queues */
6641 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
6642 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
6643 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
6644 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
6645 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
6646 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
6647 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
6648 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
6649 	/* grbm */
6650 	WREG32(GRBM_INT_CNTL, 0);
6651 	/* vline/vblank, etc. */
6652 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
6653 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
6654 	if (rdev->num_crtc >= 4) {
6655 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
6656 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
6657 	}
6658 	if (rdev->num_crtc >= 6) {
6659 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
6660 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
6661 	}
6662 
6663 	/* dac hotplug */
6664 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
6665 
6666 	/* digital hotplug */
6667 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6668 	WREG32(DC_HPD1_INT_CONTROL, tmp);
6669 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6670 	WREG32(DC_HPD2_INT_CONTROL, tmp);
6671 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6672 	WREG32(DC_HPD3_INT_CONTROL, tmp);
6673 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6674 	WREG32(DC_HPD4_INT_CONTROL, tmp);
6675 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6676 	WREG32(DC_HPD5_INT_CONTROL, tmp);
6677 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
6678 	WREG32(DC_HPD6_INT_CONTROL, tmp);
6679 
6680 }
6681 
6682 /**
6683  * cik_irq_init - init and enable the interrupt ring
6684  *
6685  * @rdev: radeon_device pointer
6686  *
6687  * Allocate a ring buffer for the interrupt controller,
6688  * enable the RLC, disable interrupts, enable the IH
6689  * ring buffer and enable it (CIK).
6690  * Called at device load and reume.
6691  * Returns 0 for success, errors for failure.
6692  */
6693 static int cik_irq_init(struct radeon_device *rdev)
6694 {
6695 	int ret = 0;
6696 	int rb_bufsz;
6697 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
6698 
6699 	/* allocate ring */
6700 	ret = r600_ih_ring_alloc(rdev);
6701 	if (ret)
6702 		return ret;
6703 
6704 	/* disable irqs */
6705 	cik_disable_interrupts(rdev);
6706 
6707 	/* init rlc */
6708 	ret = cik_rlc_resume(rdev);
6709 	if (ret) {
6710 		r600_ih_ring_fini(rdev);
6711 		return ret;
6712 	}
6713 
6714 	/* setup interrupt control */
6715 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
6716 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6717 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6718 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6719 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6720 	 */
6721 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6722 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6723 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6724 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6725 
6726 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6727 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6728 
6729 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6730 		      IH_WPTR_OVERFLOW_CLEAR |
6731 		      (rb_bufsz << 1));
6732 
6733 	if (rdev->wb.enabled)
6734 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6735 
6736 	/* set the writeback address whether it's enabled or not */
6737 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6738 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6739 
6740 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6741 
6742 	/* set rptr, wptr to 0 */
6743 	WREG32(IH_RB_RPTR, 0);
6744 	WREG32(IH_RB_WPTR, 0);
6745 
6746 	/* Default settings for IH_CNTL (disabled at first) */
6747 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6748 	/* RPTR_REARM only works if msi's are enabled */
6749 	if (rdev->msi_enabled)
6750 		ih_cntl |= RPTR_REARM;
6751 	WREG32(IH_CNTL, ih_cntl);
6752 
6753 	/* force the active interrupt state to all disabled */
6754 	cik_disable_interrupt_state(rdev);
6755 
6756 	pci_set_master(rdev->pdev);
6757 
6758 	/* enable irqs */
6759 	cik_enable_interrupts(rdev);
6760 
6761 	return ret;
6762 }
6763 
6764 /**
6765  * cik_irq_set - enable/disable interrupt sources
6766  *
6767  * @rdev: radeon_device pointer
6768  *
6769  * Enable interrupt sources on the GPU (vblanks, hpd,
6770  * etc.) (CIK).
6771  * Returns 0 for success, errors for failure.
6772  */
6773 int cik_irq_set(struct radeon_device *rdev)
6774 {
6775 	u32 cp_int_cntl;
6776 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
6777 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
6778 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6779 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
6780 	u32 grbm_int_cntl = 0;
6781 	u32 dma_cntl, dma_cntl1;
6782 	u32 thermal_int;
6783 
6784 	if (!rdev->irq.installed) {
6785 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6786 		return -EINVAL;
6787 	}
6788 	/* don't enable anything if the ih is disabled */
6789 	if (!rdev->ih.enabled) {
6790 		cik_disable_interrupts(rdev);
6791 		/* force the active interrupt state to all disabled */
6792 		cik_disable_interrupt_state(rdev);
6793 		return 0;
6794 	}
6795 
6796 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6797 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6798 	cp_int_cntl |= PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
6799 
6800 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
6801 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
6802 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
6803 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
6804 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
6805 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
6806 
6807 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6808 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6809 
6810 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6811 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6812 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6813 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6814 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6815 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6816 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6817 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
6818 
6819 	if (rdev->flags & RADEON_IS_IGP)
6820 		thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) &
6821 			~(THERM_INTH_MASK | THERM_INTL_MASK);
6822 	else
6823 		thermal_int = RREG32_SMC(CG_THERMAL_INT) &
6824 			~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6825 
6826 	/* enable CP interrupts on all rings */
6827 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6828 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
6829 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6830 	}
6831 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6832 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6833 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6834 		if (ring->me == 1) {
6835 			switch (ring->pipe) {
6836 			case 0:
6837 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6838 				break;
6839 			case 1:
6840 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6841 				break;
6842 			case 2:
6843 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6844 				break;
6845 			case 3:
6846 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6847 				break;
6848 			default:
6849 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6850 				break;
6851 			}
6852 		} else if (ring->me == 2) {
6853 			switch (ring->pipe) {
6854 			case 0:
6855 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6856 				break;
6857 			case 1:
6858 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6859 				break;
6860 			case 2:
6861 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6862 				break;
6863 			case 3:
6864 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6865 				break;
6866 			default:
6867 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
6868 				break;
6869 			}
6870 		} else {
6871 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
6872 		}
6873 	}
6874 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6875 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6876 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6877 		if (ring->me == 1) {
6878 			switch (ring->pipe) {
6879 			case 0:
6880 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
6881 				break;
6882 			case 1:
6883 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
6884 				break;
6885 			case 2:
6886 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6887 				break;
6888 			case 3:
6889 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
6890 				break;
6891 			default:
6892 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6893 				break;
6894 			}
6895 		} else if (ring->me == 2) {
6896 			switch (ring->pipe) {
6897 			case 0:
6898 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
6899 				break;
6900 			case 1:
6901 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
6902 				break;
6903 			case 2:
6904 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6905 				break;
6906 			case 3:
6907 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
6908 				break;
6909 			default:
6910 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
6911 				break;
6912 			}
6913 		} else {
6914 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
6915 		}
6916 	}
6917 
6918 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6919 		DRM_DEBUG("cik_irq_set: sw int dma\n");
6920 		dma_cntl |= TRAP_ENABLE;
6921 	}
6922 
6923 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6924 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
6925 		dma_cntl1 |= TRAP_ENABLE;
6926 	}
6927 
6928 	if (rdev->irq.crtc_vblank_int[0] ||
6929 	    atomic_read(&rdev->irq.pflip[0])) {
6930 		DRM_DEBUG("cik_irq_set: vblank 0\n");
6931 		crtc1 |= VBLANK_INTERRUPT_MASK;
6932 	}
6933 	if (rdev->irq.crtc_vblank_int[1] ||
6934 	    atomic_read(&rdev->irq.pflip[1])) {
6935 		DRM_DEBUG("cik_irq_set: vblank 1\n");
6936 		crtc2 |= VBLANK_INTERRUPT_MASK;
6937 	}
6938 	if (rdev->irq.crtc_vblank_int[2] ||
6939 	    atomic_read(&rdev->irq.pflip[2])) {
6940 		DRM_DEBUG("cik_irq_set: vblank 2\n");
6941 		crtc3 |= VBLANK_INTERRUPT_MASK;
6942 	}
6943 	if (rdev->irq.crtc_vblank_int[3] ||
6944 	    atomic_read(&rdev->irq.pflip[3])) {
6945 		DRM_DEBUG("cik_irq_set: vblank 3\n");
6946 		crtc4 |= VBLANK_INTERRUPT_MASK;
6947 	}
6948 	if (rdev->irq.crtc_vblank_int[4] ||
6949 	    atomic_read(&rdev->irq.pflip[4])) {
6950 		DRM_DEBUG("cik_irq_set: vblank 4\n");
6951 		crtc5 |= VBLANK_INTERRUPT_MASK;
6952 	}
6953 	if (rdev->irq.crtc_vblank_int[5] ||
6954 	    atomic_read(&rdev->irq.pflip[5])) {
6955 		DRM_DEBUG("cik_irq_set: vblank 5\n");
6956 		crtc6 |= VBLANK_INTERRUPT_MASK;
6957 	}
6958 	if (rdev->irq.hpd[0]) {
6959 		DRM_DEBUG("cik_irq_set: hpd 1\n");
6960 		hpd1 |= DC_HPDx_INT_EN;
6961 	}
6962 	if (rdev->irq.hpd[1]) {
6963 		DRM_DEBUG("cik_irq_set: hpd 2\n");
6964 		hpd2 |= DC_HPDx_INT_EN;
6965 	}
6966 	if (rdev->irq.hpd[2]) {
6967 		DRM_DEBUG("cik_irq_set: hpd 3\n");
6968 		hpd3 |= DC_HPDx_INT_EN;
6969 	}
6970 	if (rdev->irq.hpd[3]) {
6971 		DRM_DEBUG("cik_irq_set: hpd 4\n");
6972 		hpd4 |= DC_HPDx_INT_EN;
6973 	}
6974 	if (rdev->irq.hpd[4]) {
6975 		DRM_DEBUG("cik_irq_set: hpd 5\n");
6976 		hpd5 |= DC_HPDx_INT_EN;
6977 	}
6978 	if (rdev->irq.hpd[5]) {
6979 		DRM_DEBUG("cik_irq_set: hpd 6\n");
6980 		hpd6 |= DC_HPDx_INT_EN;
6981 	}
6982 
6983 	if (rdev->irq.dpm_thermal) {
6984 		DRM_DEBUG("dpm thermal\n");
6985 		if (rdev->flags & RADEON_IS_IGP)
6986 			thermal_int |= THERM_INTH_MASK | THERM_INTL_MASK;
6987 		else
6988 			thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6989 	}
6990 
6991 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6992 
6993 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
6994 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
6995 
6996 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
6997 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
6998 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
6999 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
7000 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
7001 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
7002 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
7003 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
7004 
7005 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
7006 
7007 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
7008 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
7009 	if (rdev->num_crtc >= 4) {
7010 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
7011 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
7012 	}
7013 	if (rdev->num_crtc >= 6) {
7014 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
7015 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
7016 	}
7017 
7018 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
7019 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
7020 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
7021 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
7022 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
7023 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
7024 
7025 	if (rdev->flags & RADEON_IS_IGP)
7026 		WREG32_SMC(CG_THERMAL_INT_CTRL, thermal_int);
7027 	else
7028 		WREG32_SMC(CG_THERMAL_INT, thermal_int);
7029 
7030 	return 0;
7031 }
7032 
7033 /**
7034  * cik_irq_ack - ack interrupt sources
7035  *
7036  * @rdev: radeon_device pointer
7037  *
7038  * Ack interrupt sources on the GPU (vblanks, hpd,
7039  * etc.) (CIK).  Certain interrupts sources are sw
7040  * generated and do not require an explicit ack.
7041  */
7042 static inline void cik_irq_ack(struct radeon_device *rdev)
7043 {
7044 	u32 tmp;
7045 
7046 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
7047 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
7048 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
7049 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
7050 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
7051 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
7052 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
7053 
7054 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
7055 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
7056 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
7057 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
7058 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
7059 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
7060 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
7061 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
7062 
7063 	if (rdev->num_crtc >= 4) {
7064 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
7065 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
7066 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
7067 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
7068 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
7069 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
7070 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
7071 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
7072 	}
7073 
7074 	if (rdev->num_crtc >= 6) {
7075 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
7076 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
7077 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
7078 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
7079 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
7080 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
7081 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
7082 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
7083 	}
7084 
7085 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7086 		tmp = RREG32(DC_HPD1_INT_CONTROL);
7087 		tmp |= DC_HPDx_INT_ACK;
7088 		WREG32(DC_HPD1_INT_CONTROL, tmp);
7089 	}
7090 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7091 		tmp = RREG32(DC_HPD2_INT_CONTROL);
7092 		tmp |= DC_HPDx_INT_ACK;
7093 		WREG32(DC_HPD2_INT_CONTROL, tmp);
7094 	}
7095 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7096 		tmp = RREG32(DC_HPD3_INT_CONTROL);
7097 		tmp |= DC_HPDx_INT_ACK;
7098 		WREG32(DC_HPD3_INT_CONTROL, tmp);
7099 	}
7100 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7101 		tmp = RREG32(DC_HPD4_INT_CONTROL);
7102 		tmp |= DC_HPDx_INT_ACK;
7103 		WREG32(DC_HPD4_INT_CONTROL, tmp);
7104 	}
7105 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7106 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7107 		tmp |= DC_HPDx_INT_ACK;
7108 		WREG32(DC_HPD5_INT_CONTROL, tmp);
7109 	}
7110 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7111 		tmp = RREG32(DC_HPD5_INT_CONTROL);
7112 		tmp |= DC_HPDx_INT_ACK;
7113 		WREG32(DC_HPD6_INT_CONTROL, tmp);
7114 	}
7115 }
7116 
7117 /**
7118  * cik_irq_disable - disable interrupts
7119  *
7120  * @rdev: radeon_device pointer
7121  *
7122  * Disable interrupts on the hw (CIK).
7123  */
7124 static void cik_irq_disable(struct radeon_device *rdev)
7125 {
7126 	cik_disable_interrupts(rdev);
7127 	/* Wait and acknowledge irq */
7128 	mdelay(1);
7129 	cik_irq_ack(rdev);
7130 	cik_disable_interrupt_state(rdev);
7131 }
7132 
7133 /**
7134  * cik_irq_disable - disable interrupts for suspend
7135  *
7136  * @rdev: radeon_device pointer
7137  *
7138  * Disable interrupts and stop the RLC (CIK).
7139  * Used for suspend.
7140  */
7141 static void cik_irq_suspend(struct radeon_device *rdev)
7142 {
7143 	cik_irq_disable(rdev);
7144 	cik_rlc_stop(rdev);
7145 }
7146 
7147 /**
7148  * cik_irq_fini - tear down interrupt support
7149  *
7150  * @rdev: radeon_device pointer
7151  *
7152  * Disable interrupts on the hw and free the IH ring
7153  * buffer (CIK).
7154  * Used for driver unload.
7155  */
7156 static void cik_irq_fini(struct radeon_device *rdev)
7157 {
7158 	cik_irq_suspend(rdev);
7159 	r600_ih_ring_fini(rdev);
7160 }
7161 
7162 /**
7163  * cik_get_ih_wptr - get the IH ring buffer wptr
7164  *
7165  * @rdev: radeon_device pointer
7166  *
7167  * Get the IH ring buffer wptr from either the register
7168  * or the writeback memory buffer (CIK).  Also check for
7169  * ring buffer overflow and deal with it.
7170  * Used by cik_irq_process().
7171  * Returns the value of the wptr.
7172  */
7173 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
7174 {
7175 	u32 wptr, tmp;
7176 
7177 	if (rdev->wb.enabled)
7178 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
7179 	else
7180 		wptr = RREG32(IH_RB_WPTR);
7181 
7182 	if (wptr & RB_OVERFLOW) {
7183 		/* When a ring buffer overflow happen start parsing interrupt
7184 		 * from the last not overwritten vector (wptr + 16). Hopefully
7185 		 * this should allow us to catchup.
7186 		 */
7187 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
7188 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
7189 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
7190 		tmp = RREG32(IH_RB_CNTL);
7191 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
7192 		WREG32(IH_RB_CNTL, tmp);
7193 	}
7194 	return (wptr & rdev->ih.ptr_mask);
7195 }
7196 
7197 /*        CIK IV Ring
7198  * Each IV ring entry is 128 bits:
7199  * [7:0]    - interrupt source id
7200  * [31:8]   - reserved
7201  * [59:32]  - interrupt source data
7202  * [63:60]  - reserved
7203  * [71:64]  - RINGID
7204  *            CP:
7205  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
7206  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
7207  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
7208  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
7209  *            PIPE_ID - ME0 0=3D
7210  *                    - ME1&2 compute dispatcher (4 pipes each)
7211  *            SDMA:
7212  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
7213  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
7214  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
7215  * [79:72]  - VMID
7216  * [95:80]  - PASID
7217  * [127:96] - reserved
7218  */
7219 /**
7220  * cik_irq_process - interrupt handler
7221  *
7222  * @rdev: radeon_device pointer
7223  *
7224  * Interrupt hander (CIK).  Walk the IH ring,
7225  * ack interrupts and schedule work to handle
7226  * interrupt events.
7227  * Returns irq process return code.
7228  */
7229 int cik_irq_process(struct radeon_device *rdev)
7230 {
7231 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7232 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7233 	u32 wptr;
7234 	u32 rptr;
7235 	u32 src_id, src_data, ring_id;
7236 	u8 me_id, pipe_id, queue_id;
7237 	u32 ring_index;
7238 	bool queue_hotplug = false;
7239 	bool queue_reset = false;
7240 	u32 addr, status, mc_client;
7241 	bool queue_thermal = false;
7242 
7243 	if (!rdev->ih.enabled || rdev->shutdown)
7244 		return IRQ_NONE;
7245 
7246 	wptr = cik_get_ih_wptr(rdev);
7247 
7248 restart_ih:
7249 	/* is somebody else already processing irqs? */
7250 	if (atomic_xchg(&rdev->ih.lock, 1))
7251 		return IRQ_NONE;
7252 
7253 	rptr = rdev->ih.rptr;
7254 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
7255 
7256 	/* Order reading of wptr vs. reading of IH ring data */
7257 	rmb();
7258 
7259 	/* display interrupts */
7260 	cik_irq_ack(rdev);
7261 
7262 	while (rptr != wptr) {
7263 		/* wptr/rptr are in bytes! */
7264 		ring_index = rptr / 4;
7265 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
7266 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
7267 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
7268 
7269 		switch (src_id) {
7270 		case 1: /* D1 vblank/vline */
7271 			switch (src_data) {
7272 			case 0: /* D1 vblank */
7273 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
7274 					if (rdev->irq.crtc_vblank_int[0]) {
7275 						drm_handle_vblank(rdev->ddev, 0);
7276 						rdev->pm.vblank_sync = true;
7277 						wake_up(&rdev->irq.vblank_queue);
7278 					}
7279 					if (atomic_read(&rdev->irq.pflip[0]))
7280 						radeon_crtc_handle_flip(rdev, 0);
7281 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
7282 					DRM_DEBUG("IH: D1 vblank\n");
7283 				}
7284 				break;
7285 			case 1: /* D1 vline */
7286 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
7287 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
7288 					DRM_DEBUG("IH: D1 vline\n");
7289 				}
7290 				break;
7291 			default:
7292 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7293 				break;
7294 			}
7295 			break;
7296 		case 2: /* D2 vblank/vline */
7297 			switch (src_data) {
7298 			case 0: /* D2 vblank */
7299 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
7300 					if (rdev->irq.crtc_vblank_int[1]) {
7301 						drm_handle_vblank(rdev->ddev, 1);
7302 						rdev->pm.vblank_sync = true;
7303 						wake_up(&rdev->irq.vblank_queue);
7304 					}
7305 					if (atomic_read(&rdev->irq.pflip[1]))
7306 						radeon_crtc_handle_flip(rdev, 1);
7307 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
7308 					DRM_DEBUG("IH: D2 vblank\n");
7309 				}
7310 				break;
7311 			case 1: /* D2 vline */
7312 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
7313 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
7314 					DRM_DEBUG("IH: D2 vline\n");
7315 				}
7316 				break;
7317 			default:
7318 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7319 				break;
7320 			}
7321 			break;
7322 		case 3: /* D3 vblank/vline */
7323 			switch (src_data) {
7324 			case 0: /* D3 vblank */
7325 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
7326 					if (rdev->irq.crtc_vblank_int[2]) {
7327 						drm_handle_vblank(rdev->ddev, 2);
7328 						rdev->pm.vblank_sync = true;
7329 						wake_up(&rdev->irq.vblank_queue);
7330 					}
7331 					if (atomic_read(&rdev->irq.pflip[2]))
7332 						radeon_crtc_handle_flip(rdev, 2);
7333 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
7334 					DRM_DEBUG("IH: D3 vblank\n");
7335 				}
7336 				break;
7337 			case 1: /* D3 vline */
7338 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
7339 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
7340 					DRM_DEBUG("IH: D3 vline\n");
7341 				}
7342 				break;
7343 			default:
7344 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7345 				break;
7346 			}
7347 			break;
7348 		case 4: /* D4 vblank/vline */
7349 			switch (src_data) {
7350 			case 0: /* D4 vblank */
7351 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
7352 					if (rdev->irq.crtc_vblank_int[3]) {
7353 						drm_handle_vblank(rdev->ddev, 3);
7354 						rdev->pm.vblank_sync = true;
7355 						wake_up(&rdev->irq.vblank_queue);
7356 					}
7357 					if (atomic_read(&rdev->irq.pflip[3]))
7358 						radeon_crtc_handle_flip(rdev, 3);
7359 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
7360 					DRM_DEBUG("IH: D4 vblank\n");
7361 				}
7362 				break;
7363 			case 1: /* D4 vline */
7364 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
7365 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
7366 					DRM_DEBUG("IH: D4 vline\n");
7367 				}
7368 				break;
7369 			default:
7370 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7371 				break;
7372 			}
7373 			break;
7374 		case 5: /* D5 vblank/vline */
7375 			switch (src_data) {
7376 			case 0: /* D5 vblank */
7377 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
7378 					if (rdev->irq.crtc_vblank_int[4]) {
7379 						drm_handle_vblank(rdev->ddev, 4);
7380 						rdev->pm.vblank_sync = true;
7381 						wake_up(&rdev->irq.vblank_queue);
7382 					}
7383 					if (atomic_read(&rdev->irq.pflip[4]))
7384 						radeon_crtc_handle_flip(rdev, 4);
7385 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
7386 					DRM_DEBUG("IH: D5 vblank\n");
7387 				}
7388 				break;
7389 			case 1: /* D5 vline */
7390 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
7391 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
7392 					DRM_DEBUG("IH: D5 vline\n");
7393 				}
7394 				break;
7395 			default:
7396 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7397 				break;
7398 			}
7399 			break;
7400 		case 6: /* D6 vblank/vline */
7401 			switch (src_data) {
7402 			case 0: /* D6 vblank */
7403 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
7404 					if (rdev->irq.crtc_vblank_int[5]) {
7405 						drm_handle_vblank(rdev->ddev, 5);
7406 						rdev->pm.vblank_sync = true;
7407 						wake_up(&rdev->irq.vblank_queue);
7408 					}
7409 					if (atomic_read(&rdev->irq.pflip[5]))
7410 						radeon_crtc_handle_flip(rdev, 5);
7411 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
7412 					DRM_DEBUG("IH: D6 vblank\n");
7413 				}
7414 				break;
7415 			case 1: /* D6 vline */
7416 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
7417 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
7418 					DRM_DEBUG("IH: D6 vline\n");
7419 				}
7420 				break;
7421 			default:
7422 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7423 				break;
7424 			}
7425 			break;
7426 		case 42: /* HPD hotplug */
7427 			switch (src_data) {
7428 			case 0:
7429 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
7430 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
7431 					queue_hotplug = true;
7432 					DRM_DEBUG("IH: HPD1\n");
7433 				}
7434 				break;
7435 			case 1:
7436 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
7437 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
7438 					queue_hotplug = true;
7439 					DRM_DEBUG("IH: HPD2\n");
7440 				}
7441 				break;
7442 			case 2:
7443 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
7444 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
7445 					queue_hotplug = true;
7446 					DRM_DEBUG("IH: HPD3\n");
7447 				}
7448 				break;
7449 			case 3:
7450 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
7451 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
7452 					queue_hotplug = true;
7453 					DRM_DEBUG("IH: HPD4\n");
7454 				}
7455 				break;
7456 			case 4:
7457 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
7458 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
7459 					queue_hotplug = true;
7460 					DRM_DEBUG("IH: HPD5\n");
7461 				}
7462 				break;
7463 			case 5:
7464 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
7465 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
7466 					queue_hotplug = true;
7467 					DRM_DEBUG("IH: HPD6\n");
7468 				}
7469 				break;
7470 			default:
7471 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7472 				break;
7473 			}
7474 			break;
7475 		case 124: /* UVD */
7476 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
7477 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
7478 			break;
7479 		case 146:
7480 		case 147:
7481 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
7482 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
7483 			mc_client = RREG32(VM_CONTEXT1_PROTECTION_FAULT_MCCLIENT);
7484 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
7485 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
7486 				addr);
7487 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
7488 				status);
7489 			cik_vm_decode_fault(rdev, status, addr, mc_client);
7490 			/* reset addr and status */
7491 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
7492 			break;
7493 		case 176: /* GFX RB CP_INT */
7494 		case 177: /* GFX IB CP_INT */
7495 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7496 			break;
7497 		case 181: /* CP EOP event */
7498 			DRM_DEBUG("IH: CP EOP\n");
7499 			/* XXX check the bitfield order! */
7500 			me_id = (ring_id & 0x60) >> 5;
7501 			pipe_id = (ring_id & 0x18) >> 3;
7502 			queue_id = (ring_id & 0x7) >> 0;
7503 			switch (me_id) {
7504 			case 0:
7505 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
7506 				break;
7507 			case 1:
7508 			case 2:
7509 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
7510 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7511 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
7512 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7513 				break;
7514 			}
7515 			break;
7516 		case 184: /* CP Privileged reg access */
7517 			DRM_ERROR("Illegal register access in command stream\n");
7518 			/* XXX check the bitfield order! */
7519 			me_id = (ring_id & 0x60) >> 5;
7520 			pipe_id = (ring_id & 0x18) >> 3;
7521 			queue_id = (ring_id & 0x7) >> 0;
7522 			switch (me_id) {
7523 			case 0:
7524 				/* This results in a full GPU reset, but all we need to do is soft
7525 				 * reset the CP for gfx
7526 				 */
7527 				queue_reset = true;
7528 				break;
7529 			case 1:
7530 				/* XXX compute */
7531 				queue_reset = true;
7532 				break;
7533 			case 2:
7534 				/* XXX compute */
7535 				queue_reset = true;
7536 				break;
7537 			}
7538 			break;
7539 		case 185: /* CP Privileged inst */
7540 			DRM_ERROR("Illegal instruction in command stream\n");
7541 			/* XXX check the bitfield order! */
7542 			me_id = (ring_id & 0x60) >> 5;
7543 			pipe_id = (ring_id & 0x18) >> 3;
7544 			queue_id = (ring_id & 0x7) >> 0;
7545 			switch (me_id) {
7546 			case 0:
7547 				/* This results in a full GPU reset, but all we need to do is soft
7548 				 * reset the CP for gfx
7549 				 */
7550 				queue_reset = true;
7551 				break;
7552 			case 1:
7553 				/* XXX compute */
7554 				queue_reset = true;
7555 				break;
7556 			case 2:
7557 				/* XXX compute */
7558 				queue_reset = true;
7559 				break;
7560 			}
7561 			break;
7562 		case 224: /* SDMA trap event */
7563 			/* XXX check the bitfield order! */
7564 			me_id = (ring_id & 0x3) >> 0;
7565 			queue_id = (ring_id & 0xc) >> 2;
7566 			DRM_DEBUG("IH: SDMA trap\n");
7567 			switch (me_id) {
7568 			case 0:
7569 				switch (queue_id) {
7570 				case 0:
7571 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
7572 					break;
7573 				case 1:
7574 					/* XXX compute */
7575 					break;
7576 				case 2:
7577 					/* XXX compute */
7578 					break;
7579 				}
7580 				break;
7581 			case 1:
7582 				switch (queue_id) {
7583 				case 0:
7584 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7585 					break;
7586 				case 1:
7587 					/* XXX compute */
7588 					break;
7589 				case 2:
7590 					/* XXX compute */
7591 					break;
7592 				}
7593 				break;
7594 			}
7595 			break;
7596 		case 230: /* thermal low to high */
7597 			DRM_DEBUG("IH: thermal low to high\n");
7598 			rdev->pm.dpm.thermal.high_to_low = false;
7599 			queue_thermal = true;
7600 			break;
7601 		case 231: /* thermal high to low */
7602 			DRM_DEBUG("IH: thermal high to low\n");
7603 			rdev->pm.dpm.thermal.high_to_low = true;
7604 			queue_thermal = true;
7605 			break;
7606 		case 233: /* GUI IDLE */
7607 			DRM_DEBUG("IH: GUI idle\n");
7608 			break;
7609 		case 241: /* SDMA Privileged inst */
7610 		case 247: /* SDMA Privileged inst */
7611 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
7612 			/* XXX check the bitfield order! */
7613 			me_id = (ring_id & 0x3) >> 0;
7614 			queue_id = (ring_id & 0xc) >> 2;
7615 			switch (me_id) {
7616 			case 0:
7617 				switch (queue_id) {
7618 				case 0:
7619 					queue_reset = true;
7620 					break;
7621 				case 1:
7622 					/* XXX compute */
7623 					queue_reset = true;
7624 					break;
7625 				case 2:
7626 					/* XXX compute */
7627 					queue_reset = true;
7628 					break;
7629 				}
7630 				break;
7631 			case 1:
7632 				switch (queue_id) {
7633 				case 0:
7634 					queue_reset = true;
7635 					break;
7636 				case 1:
7637 					/* XXX compute */
7638 					queue_reset = true;
7639 					break;
7640 				case 2:
7641 					/* XXX compute */
7642 					queue_reset = true;
7643 					break;
7644 				}
7645 				break;
7646 			}
7647 			break;
7648 		default:
7649 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
7650 			break;
7651 		}
7652 
7653 		/* wptr/rptr are in bytes! */
7654 		rptr += 16;
7655 		rptr &= rdev->ih.ptr_mask;
7656 	}
7657 	if (queue_hotplug)
7658 		schedule_work(&rdev->hotplug_work);
7659 	if (queue_reset)
7660 		schedule_work(&rdev->reset_work);
7661 	if (queue_thermal)
7662 		schedule_work(&rdev->pm.dpm.thermal.work);
7663 	rdev->ih.rptr = rptr;
7664 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
7665 	atomic_set(&rdev->ih.lock, 0);
7666 
7667 	/* make sure wptr hasn't changed while processing */
7668 	wptr = cik_get_ih_wptr(rdev);
7669 	if (wptr != rptr)
7670 		goto restart_ih;
7671 
7672 	return IRQ_HANDLED;
7673 }
7674 
7675 /*
7676  * startup/shutdown callbacks
7677  */
7678 /**
7679  * cik_startup - program the asic to a functional state
7680  *
7681  * @rdev: radeon_device pointer
7682  *
7683  * Programs the asic to a functional state (CIK).
7684  * Called by cik_init() and cik_resume().
7685  * Returns 0 for success, error for failure.
7686  */
7687 static int cik_startup(struct radeon_device *rdev)
7688 {
7689 	struct radeon_ring *ring;
7690 	int r;
7691 
7692 	/* enable pcie gen2/3 link */
7693 	cik_pcie_gen3_enable(rdev);
7694 	/* enable aspm */
7695 	cik_program_aspm(rdev);
7696 
7697 	/* scratch needs to be initialized before MC */
7698 	r = r600_vram_scratch_init(rdev);
7699 	if (r)
7700 		return r;
7701 
7702 	cik_mc_program(rdev);
7703 
7704 	if (!(rdev->flags & RADEON_IS_IGP) && !rdev->pm.dpm_enabled) {
7705 		r = ci_mc_load_microcode(rdev);
7706 		if (r) {
7707 			DRM_ERROR("Failed to load MC firmware!\n");
7708 			return r;
7709 		}
7710 	}
7711 
7712 	r = cik_pcie_gart_enable(rdev);
7713 	if (r)
7714 		return r;
7715 	cik_gpu_init(rdev);
7716 
7717 	/* allocate rlc buffers */
7718 	if (rdev->flags & RADEON_IS_IGP) {
7719 		if (rdev->family == CHIP_KAVERI) {
7720 			rdev->rlc.reg_list = spectre_rlc_save_restore_register_list;
7721 			rdev->rlc.reg_list_size =
7722 				(u32)ARRAY_SIZE(spectre_rlc_save_restore_register_list);
7723 		} else {
7724 			rdev->rlc.reg_list = kalindi_rlc_save_restore_register_list;
7725 			rdev->rlc.reg_list_size =
7726 				(u32)ARRAY_SIZE(kalindi_rlc_save_restore_register_list);
7727 		}
7728 	}
7729 	rdev->rlc.cs_data = ci_cs_data;
7730 	rdev->rlc.cp_table_size = CP_ME_TABLE_SIZE * 5 * 4;
7731 	r = sumo_rlc_init(rdev);
7732 	if (r) {
7733 		DRM_ERROR("Failed to init rlc BOs!\n");
7734 		return r;
7735 	}
7736 
7737 	/* allocate wb buffer */
7738 	r = radeon_wb_init(rdev);
7739 	if (r)
7740 		return r;
7741 
7742 	/* allocate mec buffers */
7743 	r = cik_mec_init(rdev);
7744 	if (r) {
7745 		DRM_ERROR("Failed to init MEC BOs!\n");
7746 		return r;
7747 	}
7748 
7749 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7750 	if (r) {
7751 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7752 		return r;
7753 	}
7754 
7755 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7756 	if (r) {
7757 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7758 		return r;
7759 	}
7760 
7761 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7762 	if (r) {
7763 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7764 		return r;
7765 	}
7766 
7767 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7768 	if (r) {
7769 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7770 		return r;
7771 	}
7772 
7773 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7774 	if (r) {
7775 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7776 		return r;
7777 	}
7778 
7779 	r = radeon_uvd_resume(rdev);
7780 	if (!r) {
7781 		r = uvd_v4_2_resume(rdev);
7782 		if (!r) {
7783 			r = radeon_fence_driver_start_ring(rdev,
7784 							   R600_RING_TYPE_UVD_INDEX);
7785 			if (r)
7786 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
7787 		}
7788 	}
7789 	if (r)
7790 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
7791 
7792 	/* Enable IRQ */
7793 	if (!rdev->irq.installed) {
7794 		r = radeon_irq_kms_init(rdev);
7795 		if (r)
7796 			return r;
7797 	}
7798 
7799 	r = cik_irq_init(rdev);
7800 	if (r) {
7801 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7802 		radeon_irq_kms_fini(rdev);
7803 		return r;
7804 	}
7805 	cik_irq_set(rdev);
7806 
7807 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7808 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7809 			     PACKET3(PACKET3_NOP, 0x3FFF));
7810 	if (r)
7811 		return r;
7812 
7813 	/* set up the compute queues */
7814 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7815 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7816 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7817 			     PACKET3(PACKET3_NOP, 0x3FFF));
7818 	if (r)
7819 		return r;
7820 	ring->me = 1; /* first MEC */
7821 	ring->pipe = 0; /* first pipe */
7822 	ring->queue = 0; /* first queue */
7823 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
7824 
7825 	/* type-2 packets are deprecated on MEC, use type-3 instead */
7826 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7827 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7828 			     PACKET3(PACKET3_NOP, 0x3FFF));
7829 	if (r)
7830 		return r;
7831 	/* dGPU only have 1 MEC */
7832 	ring->me = 1; /* first MEC */
7833 	ring->pipe = 0; /* first pipe */
7834 	ring->queue = 1; /* second queue */
7835 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
7836 
7837 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7838 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7839 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7840 	if (r)
7841 		return r;
7842 
7843 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7844 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7845 			     SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
7846 	if (r)
7847 		return r;
7848 
7849 	r = cik_cp_resume(rdev);
7850 	if (r)
7851 		return r;
7852 
7853 	r = cik_sdma_resume(rdev);
7854 	if (r)
7855 		return r;
7856 
7857 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
7858 	if (ring->ring_size) {
7859 		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
7860 				     RADEON_CP_PACKET2);
7861 		if (!r)
7862 			r = uvd_v1_0_init(rdev);
7863 		if (r)
7864 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
7865 	}
7866 
7867 	r = radeon_ib_pool_init(rdev);
7868 	if (r) {
7869 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7870 		return r;
7871 	}
7872 
7873 	r = radeon_vm_manager_init(rdev);
7874 	if (r) {
7875 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7876 		return r;
7877 	}
7878 
7879 	r = dce6_audio_init(rdev);
7880 	if (r)
7881 		return r;
7882 
7883 	return 0;
7884 }
7885 
7886 /**
7887  * cik_resume - resume the asic to a functional state
7888  *
7889  * @rdev: radeon_device pointer
7890  *
7891  * Programs the asic to a functional state (CIK).
7892  * Called at resume.
7893  * Returns 0 for success, error for failure.
7894  */
7895 int cik_resume(struct radeon_device *rdev)
7896 {
7897 	int r;
7898 
7899 	/* post card */
7900 	atom_asic_init(rdev->mode_info.atom_context);
7901 
7902 	/* init golden registers */
7903 	cik_init_golden_registers(rdev);
7904 
7905 	radeon_pm_resume(rdev);
7906 
7907 	rdev->accel_working = true;
7908 	r = cik_startup(rdev);
7909 	if (r) {
7910 		DRM_ERROR("cik startup failed on resume\n");
7911 		rdev->accel_working = false;
7912 		return r;
7913 	}
7914 
7915 	return r;
7916 
7917 }
7918 
7919 /**
7920  * cik_suspend - suspend the asic
7921  *
7922  * @rdev: radeon_device pointer
7923  *
7924  * Bring the chip into a state suitable for suspend (CIK).
7925  * Called at suspend.
7926  * Returns 0 for success.
7927  */
7928 int cik_suspend(struct radeon_device *rdev)
7929 {
7930 	radeon_pm_suspend(rdev);
7931 	dce6_audio_fini(rdev);
7932 	radeon_vm_manager_fini(rdev);
7933 	cik_cp_enable(rdev, false);
7934 	cik_sdma_enable(rdev, false);
7935 	uvd_v1_0_fini(rdev);
7936 	radeon_uvd_suspend(rdev);
7937 	cik_fini_pg(rdev);
7938 	cik_fini_cg(rdev);
7939 	cik_irq_suspend(rdev);
7940 	radeon_wb_disable(rdev);
7941 	cik_pcie_gart_disable(rdev);
7942 	return 0;
7943 }
7944 
7945 /* Plan is to move initialization in that function and use
7946  * helper function so that radeon_device_init pretty much
7947  * do nothing more than calling asic specific function. This
7948  * should also allow to remove a bunch of callback function
7949  * like vram_info.
7950  */
7951 /**
7952  * cik_init - asic specific driver and hw init
7953  *
7954  * @rdev: radeon_device pointer
7955  *
7956  * Setup asic specific driver variables and program the hw
7957  * to a functional state (CIK).
7958  * Called at driver startup.
7959  * Returns 0 for success, errors for failure.
7960  */
7961 int cik_init(struct radeon_device *rdev)
7962 {
7963 	struct radeon_ring *ring;
7964 	int r;
7965 
7966 	/* Read BIOS */
7967 	if (!radeon_get_bios(rdev)) {
7968 		if (ASIC_IS_AVIVO(rdev))
7969 			return -EINVAL;
7970 	}
7971 	/* Must be an ATOMBIOS */
7972 	if (!rdev->is_atom_bios) {
7973 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7974 		return -EINVAL;
7975 	}
7976 	r = radeon_atombios_init(rdev);
7977 	if (r)
7978 		return r;
7979 
7980 	/* Post card if necessary */
7981 	if (!radeon_card_posted(rdev)) {
7982 		if (!rdev->bios) {
7983 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7984 			return -EINVAL;
7985 		}
7986 		DRM_INFO("GPU not posted. posting now...\n");
7987 		atom_asic_init(rdev->mode_info.atom_context);
7988 	}
7989 	/* init golden registers */
7990 	cik_init_golden_registers(rdev);
7991 	/* Initialize scratch registers */
7992 	cik_scratch_init(rdev);
7993 	/* Initialize surface registers */
7994 	radeon_surface_init(rdev);
7995 	/* Initialize clocks */
7996 	radeon_get_clock_info(rdev->ddev);
7997 
7998 	/* Fence driver */
7999 	r = radeon_fence_driver_init(rdev);
8000 	if (r)
8001 		return r;
8002 
8003 	/* initialize memory controller */
8004 	r = cik_mc_init(rdev);
8005 	if (r)
8006 		return r;
8007 	/* Memory manager */
8008 	r = radeon_bo_init(rdev);
8009 	if (r)
8010 		return r;
8011 
8012 	if (rdev->flags & RADEON_IS_IGP) {
8013 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8014 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
8015 			r = cik_init_microcode(rdev);
8016 			if (r) {
8017 				DRM_ERROR("Failed to load firmware!\n");
8018 				return r;
8019 			}
8020 		}
8021 	} else {
8022 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
8023 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
8024 		    !rdev->mc_fw) {
8025 			r = cik_init_microcode(rdev);
8026 			if (r) {
8027 				DRM_ERROR("Failed to load firmware!\n");
8028 				return r;
8029 			}
8030 		}
8031 	}
8032 
8033 	/* Initialize power management */
8034 	radeon_pm_init(rdev);
8035 
8036 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
8037 	ring->ring_obj = NULL;
8038 	r600_ring_init(rdev, ring, 1024 * 1024);
8039 
8040 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
8041 	ring->ring_obj = NULL;
8042 	r600_ring_init(rdev, ring, 1024 * 1024);
8043 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8044 	if (r)
8045 		return r;
8046 
8047 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
8048 	ring->ring_obj = NULL;
8049 	r600_ring_init(rdev, ring, 1024 * 1024);
8050 	r = radeon_doorbell_get(rdev, &ring->doorbell_index);
8051 	if (r)
8052 		return r;
8053 
8054 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
8055 	ring->ring_obj = NULL;
8056 	r600_ring_init(rdev, ring, 256 * 1024);
8057 
8058 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
8059 	ring->ring_obj = NULL;
8060 	r600_ring_init(rdev, ring, 256 * 1024);
8061 
8062 	r = radeon_uvd_init(rdev);
8063 	if (!r) {
8064 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
8065 		ring->ring_obj = NULL;
8066 		r600_ring_init(rdev, ring, 4096);
8067 	}
8068 
8069 	rdev->ih.ring_obj = NULL;
8070 	r600_ih_ring_init(rdev, 64 * 1024);
8071 
8072 	r = r600_pcie_gart_init(rdev);
8073 	if (r)
8074 		return r;
8075 
8076 	rdev->accel_working = true;
8077 	r = cik_startup(rdev);
8078 	if (r) {
8079 		dev_err(rdev->dev, "disabling GPU acceleration\n");
8080 		cik_cp_fini(rdev);
8081 		cik_sdma_fini(rdev);
8082 		cik_irq_fini(rdev);
8083 		sumo_rlc_fini(rdev);
8084 		cik_mec_fini(rdev);
8085 		radeon_wb_fini(rdev);
8086 		radeon_ib_pool_fini(rdev);
8087 		radeon_vm_manager_fini(rdev);
8088 		radeon_irq_kms_fini(rdev);
8089 		cik_pcie_gart_fini(rdev);
8090 		rdev->accel_working = false;
8091 	}
8092 
8093 	/* Don't start up if the MC ucode is missing.
8094 	 * The default clocks and voltages before the MC ucode
8095 	 * is loaded are not suffient for advanced operations.
8096 	 */
8097 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
8098 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
8099 		return -EINVAL;
8100 	}
8101 
8102 	return 0;
8103 }
8104 
8105 /**
8106  * cik_fini - asic specific driver and hw fini
8107  *
8108  * @rdev: radeon_device pointer
8109  *
8110  * Tear down the asic specific driver variables and program the hw
8111  * to an idle state (CIK).
8112  * Called at driver unload.
8113  */
8114 void cik_fini(struct radeon_device *rdev)
8115 {
8116 	radeon_pm_fini(rdev);
8117 	cik_cp_fini(rdev);
8118 	cik_sdma_fini(rdev);
8119 	cik_fini_pg(rdev);
8120 	cik_fini_cg(rdev);
8121 	cik_irq_fini(rdev);
8122 	sumo_rlc_fini(rdev);
8123 	cik_mec_fini(rdev);
8124 	radeon_wb_fini(rdev);
8125 	radeon_vm_manager_fini(rdev);
8126 	radeon_ib_pool_fini(rdev);
8127 	radeon_irq_kms_fini(rdev);
8128 	uvd_v1_0_fini(rdev);
8129 	radeon_uvd_fini(rdev);
8130 	cik_pcie_gart_fini(rdev);
8131 	r600_vram_scratch_fini(rdev);
8132 	radeon_gem_fini(rdev);
8133 	radeon_fence_driver_fini(rdev);
8134 	radeon_bo_fini(rdev);
8135 	radeon_atombios_fini(rdev);
8136 	kfree(rdev->bios);
8137 	rdev->bios = NULL;
8138 }
8139 
8140 void dce8_program_fmt(struct drm_encoder *encoder)
8141 {
8142 	struct drm_device *dev = encoder->dev;
8143 	struct radeon_device *rdev = dev->dev_private;
8144 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
8145 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
8146 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
8147 	int bpc = 0;
8148 	u32 tmp = 0;
8149 	enum radeon_connector_dither dither = RADEON_FMT_DITHER_DISABLE;
8150 
8151 	if (connector) {
8152 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
8153 		bpc = radeon_get_monitor_bpc(connector);
8154 		dither = radeon_connector->dither;
8155 	}
8156 
8157 	/* LVDS/eDP FMT is set up by atom */
8158 	if (radeon_encoder->devices & ATOM_DEVICE_LCD_SUPPORT)
8159 		return;
8160 
8161 	/* not needed for analog */
8162 	if ((radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC1) ||
8163 	    (radeon_encoder->encoder_id == ENCODER_OBJECT_ID_INTERNAL_KLDSCP_DAC2))
8164 		return;
8165 
8166 	if (bpc == 0)
8167 		return;
8168 
8169 	switch (bpc) {
8170 	case 6:
8171 		if (dither == RADEON_FMT_DITHER_ENABLE)
8172 			/* XXX sort out optimal dither settings */
8173 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8174 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(0));
8175 		else
8176 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(0));
8177 		break;
8178 	case 8:
8179 		if (dither == RADEON_FMT_DITHER_ENABLE)
8180 			/* XXX sort out optimal dither settings */
8181 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8182 				FMT_RGB_RANDOM_ENABLE |
8183 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(1));
8184 		else
8185 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(1));
8186 		break;
8187 	case 10:
8188 		if (dither == RADEON_FMT_DITHER_ENABLE)
8189 			/* XXX sort out optimal dither settings */
8190 			tmp |= (FMT_FRAME_RANDOM_ENABLE | FMT_HIGHPASS_RANDOM_ENABLE |
8191 				FMT_RGB_RANDOM_ENABLE |
8192 				FMT_SPATIAL_DITHER_EN | FMT_SPATIAL_DITHER_DEPTH(2));
8193 		else
8194 			tmp |= (FMT_TRUNCATE_EN | FMT_TRUNCATE_DEPTH(2));
8195 		break;
8196 	default:
8197 		/* not needed */
8198 		break;
8199 	}
8200 
8201 	WREG32(FMT_BIT_DEPTH_CONTROL + radeon_crtc->crtc_offset, tmp);
8202 }
8203 
8204 /* display watermark setup */
8205 /**
8206  * dce8_line_buffer_adjust - Set up the line buffer
8207  *
8208  * @rdev: radeon_device pointer
8209  * @radeon_crtc: the selected display controller
8210  * @mode: the current display mode on the selected display
8211  * controller
8212  *
8213  * Setup up the line buffer allocation for
8214  * the selected display controller (CIK).
8215  * Returns the line buffer size in pixels.
8216  */
8217 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
8218 				   struct radeon_crtc *radeon_crtc,
8219 				   struct drm_display_mode *mode)
8220 {
8221 	u32 tmp, buffer_alloc, i;
8222 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
8223 	/*
8224 	 * Line Buffer Setup
8225 	 * There are 6 line buffers, one for each display controllers.
8226 	 * There are 3 partitions per LB. Select the number of partitions
8227 	 * to enable based on the display width.  For display widths larger
8228 	 * than 4096, you need use to use 2 display controllers and combine
8229 	 * them using the stereo blender.
8230 	 */
8231 	if (radeon_crtc->base.enabled && mode) {
8232 		if (mode->crtc_hdisplay < 1920) {
8233 			tmp = 1;
8234 			buffer_alloc = 2;
8235 		} else if (mode->crtc_hdisplay < 2560) {
8236 			tmp = 2;
8237 			buffer_alloc = 2;
8238 		} else if (mode->crtc_hdisplay < 4096) {
8239 			tmp = 0;
8240 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8241 		} else {
8242 			DRM_DEBUG_KMS("Mode too big for LB!\n");
8243 			tmp = 0;
8244 			buffer_alloc = (rdev->flags & RADEON_IS_IGP) ? 2 : 4;
8245 		}
8246 	} else {
8247 		tmp = 1;
8248 		buffer_alloc = 0;
8249 	}
8250 
8251 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
8252 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
8253 
8254 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
8255 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
8256 	for (i = 0; i < rdev->usec_timeout; i++) {
8257 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
8258 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
8259 			break;
8260 		udelay(1);
8261 	}
8262 
8263 	if (radeon_crtc->base.enabled && mode) {
8264 		switch (tmp) {
8265 		case 0:
8266 		default:
8267 			return 4096 * 2;
8268 		case 1:
8269 			return 1920 * 2;
8270 		case 2:
8271 			return 2560 * 2;
8272 		}
8273 	}
8274 
8275 	/* controller not enabled, so no lb used */
8276 	return 0;
8277 }
8278 
8279 /**
8280  * cik_get_number_of_dram_channels - get the number of dram channels
8281  *
8282  * @rdev: radeon_device pointer
8283  *
8284  * Look up the number of video ram channels (CIK).
8285  * Used for display watermark bandwidth calculations
8286  * Returns the number of dram channels
8287  */
8288 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
8289 {
8290 	u32 tmp = RREG32(MC_SHARED_CHMAP);
8291 
8292 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
8293 	case 0:
8294 	default:
8295 		return 1;
8296 	case 1:
8297 		return 2;
8298 	case 2:
8299 		return 4;
8300 	case 3:
8301 		return 8;
8302 	case 4:
8303 		return 3;
8304 	case 5:
8305 		return 6;
8306 	case 6:
8307 		return 10;
8308 	case 7:
8309 		return 12;
8310 	case 8:
8311 		return 16;
8312 	}
8313 }
8314 
8315 struct dce8_wm_params {
8316 	u32 dram_channels; /* number of dram channels */
8317 	u32 yclk;          /* bandwidth per dram data pin in kHz */
8318 	u32 sclk;          /* engine clock in kHz */
8319 	u32 disp_clk;      /* display clock in kHz */
8320 	u32 src_width;     /* viewport width */
8321 	u32 active_time;   /* active display time in ns */
8322 	u32 blank_time;    /* blank time in ns */
8323 	bool interlaced;    /* mode is interlaced */
8324 	fixed20_12 vsc;    /* vertical scale ratio */
8325 	u32 num_heads;     /* number of active crtcs */
8326 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
8327 	u32 lb_size;       /* line buffer allocated to pipe */
8328 	u32 vtaps;         /* vertical scaler taps */
8329 };
8330 
8331 /**
8332  * dce8_dram_bandwidth - get the dram bandwidth
8333  *
8334  * @wm: watermark calculation data
8335  *
8336  * Calculate the raw dram bandwidth (CIK).
8337  * Used for display watermark bandwidth calculations
8338  * Returns the dram bandwidth in MBytes/s
8339  */
8340 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
8341 {
8342 	/* Calculate raw DRAM Bandwidth */
8343 	fixed20_12 dram_efficiency; /* 0.7 */
8344 	fixed20_12 yclk, dram_channels, bandwidth;
8345 	fixed20_12 a;
8346 
8347 	a.full = dfixed_const(1000);
8348 	yclk.full = dfixed_const(wm->yclk);
8349 	yclk.full = dfixed_div(yclk, a);
8350 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8351 	a.full = dfixed_const(10);
8352 	dram_efficiency.full = dfixed_const(7);
8353 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
8354 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8355 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
8356 
8357 	return dfixed_trunc(bandwidth);
8358 }
8359 
8360 /**
8361  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
8362  *
8363  * @wm: watermark calculation data
8364  *
8365  * Calculate the dram bandwidth used for display (CIK).
8366  * Used for display watermark bandwidth calculations
8367  * Returns the dram bandwidth for display in MBytes/s
8368  */
8369 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8370 {
8371 	/* Calculate DRAM Bandwidth and the part allocated to display. */
8372 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
8373 	fixed20_12 yclk, dram_channels, bandwidth;
8374 	fixed20_12 a;
8375 
8376 	a.full = dfixed_const(1000);
8377 	yclk.full = dfixed_const(wm->yclk);
8378 	yclk.full = dfixed_div(yclk, a);
8379 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
8380 	a.full = dfixed_const(10);
8381 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
8382 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
8383 	bandwidth.full = dfixed_mul(dram_channels, yclk);
8384 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
8385 
8386 	return dfixed_trunc(bandwidth);
8387 }
8388 
8389 /**
8390  * dce8_data_return_bandwidth - get the data return bandwidth
8391  *
8392  * @wm: watermark calculation data
8393  *
8394  * Calculate the data return bandwidth used for display (CIK).
8395  * Used for display watermark bandwidth calculations
8396  * Returns the data return bandwidth in MBytes/s
8397  */
8398 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
8399 {
8400 	/* Calculate the display Data return Bandwidth */
8401 	fixed20_12 return_efficiency; /* 0.8 */
8402 	fixed20_12 sclk, bandwidth;
8403 	fixed20_12 a;
8404 
8405 	a.full = dfixed_const(1000);
8406 	sclk.full = dfixed_const(wm->sclk);
8407 	sclk.full = dfixed_div(sclk, a);
8408 	a.full = dfixed_const(10);
8409 	return_efficiency.full = dfixed_const(8);
8410 	return_efficiency.full = dfixed_div(return_efficiency, a);
8411 	a.full = dfixed_const(32);
8412 	bandwidth.full = dfixed_mul(a, sclk);
8413 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
8414 
8415 	return dfixed_trunc(bandwidth);
8416 }
8417 
8418 /**
8419  * dce8_dmif_request_bandwidth - get the dmif bandwidth
8420  *
8421  * @wm: watermark calculation data
8422  *
8423  * Calculate the dmif bandwidth used for display (CIK).
8424  * Used for display watermark bandwidth calculations
8425  * Returns the dmif bandwidth in MBytes/s
8426  */
8427 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
8428 {
8429 	/* Calculate the DMIF Request Bandwidth */
8430 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
8431 	fixed20_12 disp_clk, bandwidth;
8432 	fixed20_12 a, b;
8433 
8434 	a.full = dfixed_const(1000);
8435 	disp_clk.full = dfixed_const(wm->disp_clk);
8436 	disp_clk.full = dfixed_div(disp_clk, a);
8437 	a.full = dfixed_const(32);
8438 	b.full = dfixed_mul(a, disp_clk);
8439 
8440 	a.full = dfixed_const(10);
8441 	disp_clk_request_efficiency.full = dfixed_const(8);
8442 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
8443 
8444 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
8445 
8446 	return dfixed_trunc(bandwidth);
8447 }
8448 
8449 /**
8450  * dce8_available_bandwidth - get the min available bandwidth
8451  *
8452  * @wm: watermark calculation data
8453  *
8454  * Calculate the min available bandwidth used for display (CIK).
8455  * Used for display watermark bandwidth calculations
8456  * Returns the min available bandwidth in MBytes/s
8457  */
8458 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
8459 {
8460 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
8461 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
8462 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
8463 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
8464 
8465 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
8466 }
8467 
8468 /**
8469  * dce8_average_bandwidth - get the average available bandwidth
8470  *
8471  * @wm: watermark calculation data
8472  *
8473  * Calculate the average available bandwidth used for display (CIK).
8474  * Used for display watermark bandwidth calculations
8475  * Returns the average available bandwidth in MBytes/s
8476  */
8477 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
8478 {
8479 	/* Calculate the display mode Average Bandwidth
8480 	 * DisplayMode should contain the source and destination dimensions,
8481 	 * timing, etc.
8482 	 */
8483 	fixed20_12 bpp;
8484 	fixed20_12 line_time;
8485 	fixed20_12 src_width;
8486 	fixed20_12 bandwidth;
8487 	fixed20_12 a;
8488 
8489 	a.full = dfixed_const(1000);
8490 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
8491 	line_time.full = dfixed_div(line_time, a);
8492 	bpp.full = dfixed_const(wm->bytes_per_pixel);
8493 	src_width.full = dfixed_const(wm->src_width);
8494 	bandwidth.full = dfixed_mul(src_width, bpp);
8495 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
8496 	bandwidth.full = dfixed_div(bandwidth, line_time);
8497 
8498 	return dfixed_trunc(bandwidth);
8499 }
8500 
8501 /**
8502  * dce8_latency_watermark - get the latency watermark
8503  *
8504  * @wm: watermark calculation data
8505  *
8506  * Calculate the latency watermark (CIK).
8507  * Used for display watermark bandwidth calculations
8508  * Returns the latency watermark in ns
8509  */
8510 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
8511 {
8512 	/* First calculate the latency in ns */
8513 	u32 mc_latency = 2000; /* 2000 ns. */
8514 	u32 available_bandwidth = dce8_available_bandwidth(wm);
8515 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
8516 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
8517 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
8518 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
8519 		(wm->num_heads * cursor_line_pair_return_time);
8520 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
8521 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
8522 	u32 tmp, dmif_size = 12288;
8523 	fixed20_12 a, b, c;
8524 
8525 	if (wm->num_heads == 0)
8526 		return 0;
8527 
8528 	a.full = dfixed_const(2);
8529 	b.full = dfixed_const(1);
8530 	if ((wm->vsc.full > a.full) ||
8531 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
8532 	    (wm->vtaps >= 5) ||
8533 	    ((wm->vsc.full >= a.full) && wm->interlaced))
8534 		max_src_lines_per_dst_line = 4;
8535 	else
8536 		max_src_lines_per_dst_line = 2;
8537 
8538 	a.full = dfixed_const(available_bandwidth);
8539 	b.full = dfixed_const(wm->num_heads);
8540 	a.full = dfixed_div(a, b);
8541 
8542 	b.full = dfixed_const(mc_latency + 512);
8543 	c.full = dfixed_const(wm->disp_clk);
8544 	b.full = dfixed_div(b, c);
8545 
8546 	c.full = dfixed_const(dmif_size);
8547 	b.full = dfixed_div(c, b);
8548 
8549 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
8550 
8551 	b.full = dfixed_const(1000);
8552 	c.full = dfixed_const(wm->disp_clk);
8553 	b.full = dfixed_div(c, b);
8554 	c.full = dfixed_const(wm->bytes_per_pixel);
8555 	b.full = dfixed_mul(b, c);
8556 
8557 	lb_fill_bw = min(tmp, dfixed_trunc(b));
8558 
8559 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
8560 	b.full = dfixed_const(1000);
8561 	c.full = dfixed_const(lb_fill_bw);
8562 	b.full = dfixed_div(c, b);
8563 	a.full = dfixed_div(a, b);
8564 	line_fill_time = dfixed_trunc(a);
8565 
8566 	if (line_fill_time < wm->active_time)
8567 		return latency;
8568 	else
8569 		return latency + (line_fill_time - wm->active_time);
8570 
8571 }
8572 
8573 /**
8574  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
8575  * average and available dram bandwidth
8576  *
8577  * @wm: watermark calculation data
8578  *
8579  * Check if the display average bandwidth fits in the display
8580  * dram bandwidth (CIK).
8581  * Used for display watermark bandwidth calculations
8582  * Returns true if the display fits, false if not.
8583  */
8584 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
8585 {
8586 	if (dce8_average_bandwidth(wm) <=
8587 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
8588 		return true;
8589 	else
8590 		return false;
8591 }
8592 
8593 /**
8594  * dce8_average_bandwidth_vs_available_bandwidth - check
8595  * average and available bandwidth
8596  *
8597  * @wm: watermark calculation data
8598  *
8599  * Check if the display average bandwidth fits in the display
8600  * available bandwidth (CIK).
8601  * Used for display watermark bandwidth calculations
8602  * Returns true if the display fits, false if not.
8603  */
8604 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
8605 {
8606 	if (dce8_average_bandwidth(wm) <=
8607 	    (dce8_available_bandwidth(wm) / wm->num_heads))
8608 		return true;
8609 	else
8610 		return false;
8611 }
8612 
8613 /**
8614  * dce8_check_latency_hiding - check latency hiding
8615  *
8616  * @wm: watermark calculation data
8617  *
8618  * Check latency hiding (CIK).
8619  * Used for display watermark bandwidth calculations
8620  * Returns true if the display fits, false if not.
8621  */
8622 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
8623 {
8624 	u32 lb_partitions = wm->lb_size / wm->src_width;
8625 	u32 line_time = wm->active_time + wm->blank_time;
8626 	u32 latency_tolerant_lines;
8627 	u32 latency_hiding;
8628 	fixed20_12 a;
8629 
8630 	a.full = dfixed_const(1);
8631 	if (wm->vsc.full > a.full)
8632 		latency_tolerant_lines = 1;
8633 	else {
8634 		if (lb_partitions <= (wm->vtaps + 1))
8635 			latency_tolerant_lines = 1;
8636 		else
8637 			latency_tolerant_lines = 2;
8638 	}
8639 
8640 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
8641 
8642 	if (dce8_latency_watermark(wm) <= latency_hiding)
8643 		return true;
8644 	else
8645 		return false;
8646 }
8647 
8648 /**
8649  * dce8_program_watermarks - program display watermarks
8650  *
8651  * @rdev: radeon_device pointer
8652  * @radeon_crtc: the selected display controller
8653  * @lb_size: line buffer size
8654  * @num_heads: number of display controllers in use
8655  *
8656  * Calculate and program the display watermarks for the
8657  * selected display controller (CIK).
8658  */
8659 static void dce8_program_watermarks(struct radeon_device *rdev,
8660 				    struct radeon_crtc *radeon_crtc,
8661 				    u32 lb_size, u32 num_heads)
8662 {
8663 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
8664 	struct dce8_wm_params wm_low, wm_high;
8665 	u32 pixel_period;
8666 	u32 line_time = 0;
8667 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
8668 	u32 tmp, wm_mask;
8669 
8670 	if (radeon_crtc->base.enabled && num_heads && mode) {
8671 		pixel_period = 1000000 / (u32)mode->clock;
8672 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
8673 
8674 		/* watermark for high clocks */
8675 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8676 		    rdev->pm.dpm_enabled) {
8677 			wm_high.yclk =
8678 				radeon_dpm_get_mclk(rdev, false) * 10;
8679 			wm_high.sclk =
8680 				radeon_dpm_get_sclk(rdev, false) * 10;
8681 		} else {
8682 			wm_high.yclk = rdev->pm.current_mclk * 10;
8683 			wm_high.sclk = rdev->pm.current_sclk * 10;
8684 		}
8685 
8686 		wm_high.disp_clk = mode->clock;
8687 		wm_high.src_width = mode->crtc_hdisplay;
8688 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
8689 		wm_high.blank_time = line_time - wm_high.active_time;
8690 		wm_high.interlaced = false;
8691 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8692 			wm_high.interlaced = true;
8693 		wm_high.vsc = radeon_crtc->vsc;
8694 		wm_high.vtaps = 1;
8695 		if (radeon_crtc->rmx_type != RMX_OFF)
8696 			wm_high.vtaps = 2;
8697 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
8698 		wm_high.lb_size = lb_size;
8699 		wm_high.dram_channels = cik_get_number_of_dram_channels(rdev);
8700 		wm_high.num_heads = num_heads;
8701 
8702 		/* set for high clocks */
8703 		latency_watermark_a = min(dce8_latency_watermark(&wm_high), (u32)65535);
8704 
8705 		/* possibly force display priority to high */
8706 		/* should really do this at mode validation time... */
8707 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
8708 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_high) ||
8709 		    !dce8_check_latency_hiding(&wm_high) ||
8710 		    (rdev->disp_priority == 2)) {
8711 			DRM_DEBUG_KMS("force priority to high\n");
8712 		}
8713 
8714 		/* watermark for low clocks */
8715 		if ((rdev->pm.pm_method == PM_METHOD_DPM) &&
8716 		    rdev->pm.dpm_enabled) {
8717 			wm_low.yclk =
8718 				radeon_dpm_get_mclk(rdev, true) * 10;
8719 			wm_low.sclk =
8720 				radeon_dpm_get_sclk(rdev, true) * 10;
8721 		} else {
8722 			wm_low.yclk = rdev->pm.current_mclk * 10;
8723 			wm_low.sclk = rdev->pm.current_sclk * 10;
8724 		}
8725 
8726 		wm_low.disp_clk = mode->clock;
8727 		wm_low.src_width = mode->crtc_hdisplay;
8728 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
8729 		wm_low.blank_time = line_time - wm_low.active_time;
8730 		wm_low.interlaced = false;
8731 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
8732 			wm_low.interlaced = true;
8733 		wm_low.vsc = radeon_crtc->vsc;
8734 		wm_low.vtaps = 1;
8735 		if (radeon_crtc->rmx_type != RMX_OFF)
8736 			wm_low.vtaps = 2;
8737 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
8738 		wm_low.lb_size = lb_size;
8739 		wm_low.dram_channels = cik_get_number_of_dram_channels(rdev);
8740 		wm_low.num_heads = num_heads;
8741 
8742 		/* set for low clocks */
8743 		latency_watermark_b = min(dce8_latency_watermark(&wm_low), (u32)65535);
8744 
8745 		/* possibly force display priority to high */
8746 		/* should really do this at mode validation time... */
8747 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
8748 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm_low) ||
8749 		    !dce8_check_latency_hiding(&wm_low) ||
8750 		    (rdev->disp_priority == 2)) {
8751 			DRM_DEBUG_KMS("force priority to high\n");
8752 		}
8753 	}
8754 
8755 	/* select wm A */
8756 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8757 	tmp = wm_mask;
8758 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8759 	tmp |= LATENCY_WATERMARK_MASK(1);
8760 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8761 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8762 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
8763 		LATENCY_HIGH_WATERMARK(line_time)));
8764 	/* select wm B */
8765 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
8766 	tmp &= ~LATENCY_WATERMARK_MASK(3);
8767 	tmp |= LATENCY_WATERMARK_MASK(2);
8768 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
8769 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
8770 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
8771 		LATENCY_HIGH_WATERMARK(line_time)));
8772 	/* restore original selection */
8773 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
8774 
8775 	/* save values for DPM */
8776 	radeon_crtc->line_time = line_time;
8777 	radeon_crtc->wm_high = latency_watermark_a;
8778 	radeon_crtc->wm_low = latency_watermark_b;
8779 }
8780 
8781 /**
8782  * dce8_bandwidth_update - program display watermarks
8783  *
8784  * @rdev: radeon_device pointer
8785  *
8786  * Calculate and program the display watermarks and line
8787  * buffer allocation (CIK).
8788  */
8789 void dce8_bandwidth_update(struct radeon_device *rdev)
8790 {
8791 	struct drm_display_mode *mode = NULL;
8792 	u32 num_heads = 0, lb_size;
8793 	int i;
8794 
8795 	radeon_update_display_priority(rdev);
8796 
8797 	for (i = 0; i < rdev->num_crtc; i++) {
8798 		if (rdev->mode_info.crtcs[i]->base.enabled)
8799 			num_heads++;
8800 	}
8801 	for (i = 0; i < rdev->num_crtc; i++) {
8802 		mode = &rdev->mode_info.crtcs[i]->base.mode;
8803 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
8804 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
8805 	}
8806 }
8807 
8808 /**
8809  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
8810  *
8811  * @rdev: radeon_device pointer
8812  *
8813  * Fetches a GPU clock counter snapshot (SI).
8814  * Returns the 64 bit clock counter snapshot.
8815  */
8816 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
8817 {
8818 	uint64_t clock;
8819 
8820 	mutex_lock(&rdev->gpu_clock_mutex);
8821 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
8822 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
8823 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
8824 	mutex_unlock(&rdev->gpu_clock_mutex);
8825 	return clock;
8826 }
8827 
8828 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
8829                               u32 cntl_reg, u32 status_reg)
8830 {
8831 	int r, i;
8832 	struct atom_clock_dividers dividers;
8833 	uint32_t tmp;
8834 
8835 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
8836 					   clock, false, &dividers);
8837 	if (r)
8838 		return r;
8839 
8840 	tmp = RREG32_SMC(cntl_reg);
8841 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
8842 	tmp |= dividers.post_divider;
8843 	WREG32_SMC(cntl_reg, tmp);
8844 
8845 	for (i = 0; i < 100; i++) {
8846 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
8847 			break;
8848 		mdelay(10);
8849 	}
8850 	if (i == 100)
8851 		return -ETIMEDOUT;
8852 
8853 	return 0;
8854 }
8855 
8856 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
8857 {
8858 	int r = 0;
8859 
8860 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
8861 	if (r)
8862 		return r;
8863 
8864 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
8865 	return r;
8866 }
8867 
8868 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
8869 {
8870 	struct pci_dev *root = rdev->pdev->bus->self;
8871 	int bridge_pos, gpu_pos;
8872 	u32 speed_cntl, mask, current_data_rate;
8873 	int ret, i;
8874 	u16 tmp16;
8875 
8876 	if (radeon_pcie_gen2 == 0)
8877 		return;
8878 
8879 	if (rdev->flags & RADEON_IS_IGP)
8880 		return;
8881 
8882 	if (!(rdev->flags & RADEON_IS_PCIE))
8883 		return;
8884 
8885 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
8886 	if (ret != 0)
8887 		return;
8888 
8889 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
8890 		return;
8891 
8892 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
8893 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
8894 		LC_CURRENT_DATA_RATE_SHIFT;
8895 	if (mask & DRM_PCIE_SPEED_80) {
8896 		if (current_data_rate == 2) {
8897 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
8898 			return;
8899 		}
8900 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
8901 	} else if (mask & DRM_PCIE_SPEED_50) {
8902 		if (current_data_rate == 1) {
8903 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
8904 			return;
8905 		}
8906 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
8907 	}
8908 
8909 	bridge_pos = pci_pcie_cap(root);
8910 	if (!bridge_pos)
8911 		return;
8912 
8913 	gpu_pos = pci_pcie_cap(rdev->pdev);
8914 	if (!gpu_pos)
8915 		return;
8916 
8917 	if (mask & DRM_PCIE_SPEED_80) {
8918 		/* re-try equalization if gen3 is not already enabled */
8919 		if (current_data_rate != 2) {
8920 			u16 bridge_cfg, gpu_cfg;
8921 			u16 bridge_cfg2, gpu_cfg2;
8922 			u32 max_lw, current_lw, tmp;
8923 
8924 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8925 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8926 
8927 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
8928 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8929 
8930 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
8931 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8932 
8933 			tmp = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
8934 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
8935 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
8936 
8937 			if (current_lw < max_lw) {
8938 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
8939 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
8940 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
8941 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
8942 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
8943 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
8944 				}
8945 			}
8946 
8947 			for (i = 0; i < 10; i++) {
8948 				/* check status */
8949 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
8950 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
8951 					break;
8952 
8953 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
8954 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
8955 
8956 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
8957 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
8958 
8959 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8960 				tmp |= LC_SET_QUIESCE;
8961 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8962 
8963 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8964 				tmp |= LC_REDO_EQ;
8965 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8966 
8967 				mdelay(100);
8968 
8969 				/* linkctl */
8970 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
8971 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8972 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
8973 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
8974 
8975 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
8976 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
8977 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
8978 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
8979 
8980 				/* linkctl2 */
8981 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
8982 				tmp16 &= ~((1 << 4) | (7 << 9));
8983 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
8984 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
8985 
8986 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
8987 				tmp16 &= ~((1 << 4) | (7 << 9));
8988 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
8989 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
8990 
8991 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
8992 				tmp &= ~LC_SET_QUIESCE;
8993 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
8994 			}
8995 		}
8996 	}
8997 
8998 	/* set the link speed */
8999 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
9000 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
9001 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9002 
9003 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
9004 	tmp16 &= ~0xf;
9005 	if (mask & DRM_PCIE_SPEED_80)
9006 		tmp16 |= 3; /* gen3 */
9007 	else if (mask & DRM_PCIE_SPEED_50)
9008 		tmp16 |= 2; /* gen2 */
9009 	else
9010 		tmp16 |= 1; /* gen1 */
9011 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
9012 
9013 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9014 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
9015 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
9016 
9017 	for (i = 0; i < rdev->usec_timeout; i++) {
9018 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
9019 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
9020 			break;
9021 		udelay(1);
9022 	}
9023 }
9024 
9025 static void cik_program_aspm(struct radeon_device *rdev)
9026 {
9027 	u32 data, orig;
9028 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
9029 	bool disable_clkreq = false;
9030 
9031 	if (radeon_aspm == 0)
9032 		return;
9033 
9034 	/* XXX double check IGPs */
9035 	if (rdev->flags & RADEON_IS_IGP)
9036 		return;
9037 
9038 	if (!(rdev->flags & RADEON_IS_PCIE))
9039 		return;
9040 
9041 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9042 	data &= ~LC_XMIT_N_FTS_MASK;
9043 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
9044 	if (orig != data)
9045 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
9046 
9047 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
9048 	data |= LC_GO_TO_RECOVERY;
9049 	if (orig != data)
9050 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
9051 
9052 	orig = data = RREG32_PCIE_PORT(PCIE_P_CNTL);
9053 	data |= P_IGNORE_EDB_ERR;
9054 	if (orig != data)
9055 		WREG32_PCIE_PORT(PCIE_P_CNTL, data);
9056 
9057 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9058 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
9059 	data |= LC_PMI_TO_L1_DIS;
9060 	if (!disable_l0s)
9061 		data |= LC_L0S_INACTIVITY(7);
9062 
9063 	if (!disable_l1) {
9064 		data |= LC_L1_INACTIVITY(7);
9065 		data &= ~LC_PMI_TO_L1_DIS;
9066 		if (orig != data)
9067 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9068 
9069 		if (!disable_plloff_in_l1) {
9070 			bool clk_req_support;
9071 
9072 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0);
9073 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9074 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9075 			if (orig != data)
9076 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_0, data);
9077 
9078 			orig = data = RREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1);
9079 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9080 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9081 			if (orig != data)
9082 				WREG32_PCIE_PORT(PB0_PIF_PWRDOWN_1, data);
9083 
9084 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0);
9085 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
9086 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
9087 			if (orig != data)
9088 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_0, data);
9089 
9090 			orig = data = RREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1);
9091 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
9092 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
9093 			if (orig != data)
9094 				WREG32_PCIE_PORT(PB1_PIF_PWRDOWN_1, data);
9095 
9096 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
9097 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
9098 			data |= LC_DYN_LANES_PWR_STATE(3);
9099 			if (orig != data)
9100 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
9101 
9102 			if (!disable_clkreq) {
9103 				struct pci_dev *root = rdev->pdev->bus->self;
9104 				u32 lnkcap;
9105 
9106 				clk_req_support = false;
9107 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
9108 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
9109 					clk_req_support = true;
9110 			} else {
9111 				clk_req_support = false;
9112 			}
9113 
9114 			if (clk_req_support) {
9115 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
9116 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
9117 				if (orig != data)
9118 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
9119 
9120 				orig = data = RREG32_SMC(THM_CLK_CNTL);
9121 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
9122 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
9123 				if (orig != data)
9124 					WREG32_SMC(THM_CLK_CNTL, data);
9125 
9126 				orig = data = RREG32_SMC(MISC_CLK_CTRL);
9127 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
9128 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
9129 				if (orig != data)
9130 					WREG32_SMC(MISC_CLK_CTRL, data);
9131 
9132 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL);
9133 				data &= ~BCLK_AS_XCLK;
9134 				if (orig != data)
9135 					WREG32_SMC(CG_CLKPIN_CNTL, data);
9136 
9137 				orig = data = RREG32_SMC(CG_CLKPIN_CNTL_2);
9138 				data &= ~FORCE_BIF_REFCLK_EN;
9139 				if (orig != data)
9140 					WREG32_SMC(CG_CLKPIN_CNTL_2, data);
9141 
9142 				orig = data = RREG32_SMC(MPLL_BYPASSCLK_SEL);
9143 				data &= ~MPLL_CLKOUT_SEL_MASK;
9144 				data |= MPLL_CLKOUT_SEL(4);
9145 				if (orig != data)
9146 					WREG32_SMC(MPLL_BYPASSCLK_SEL, data);
9147 			}
9148 		}
9149 	} else {
9150 		if (orig != data)
9151 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9152 	}
9153 
9154 	orig = data = RREG32_PCIE_PORT(PCIE_CNTL2);
9155 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
9156 	if (orig != data)
9157 		WREG32_PCIE_PORT(PCIE_CNTL2, data);
9158 
9159 	if (!disable_l0s) {
9160 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
9161 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
9162 			data = RREG32_PCIE_PORT(PCIE_LC_STATUS1);
9163 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
9164 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
9165 				data &= ~LC_L0S_INACTIVITY_MASK;
9166 				if (orig != data)
9167 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
9168 			}
9169 		}
9170 	}
9171 }
9172