xref: /linux/drivers/gpu/drm/radeon/si.c (revision e58e871becec2d3b04ed91c0c16fe8deac9c9dfa)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85 
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93 
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101 
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109 
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
118 
119 MODULE_FIRMWARE("radeon/si58_mc.bin");
120 
121 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
122 static void si_pcie_gen3_enable(struct radeon_device *rdev);
123 static void si_program_aspm(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
127 extern void r600_ih_ring_fini(struct radeon_device *rdev);
128 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
132 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
133 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
134 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
135 					 bool enable);
136 static void si_init_pg(struct radeon_device *rdev);
137 static void si_init_cg(struct radeon_device *rdev);
138 static void si_fini_pg(struct radeon_device *rdev);
139 static void si_fini_cg(struct radeon_device *rdev);
140 static void si_rlc_stop(struct radeon_device *rdev);
141 
142 static const u32 verde_rlc_save_restore_register_list[] =
143 {
144 	(0x8000 << 16) | (0x98f4 >> 2),
145 	0x00000000,
146 	(0x8040 << 16) | (0x98f4 >> 2),
147 	0x00000000,
148 	(0x8000 << 16) | (0xe80 >> 2),
149 	0x00000000,
150 	(0x8040 << 16) | (0xe80 >> 2),
151 	0x00000000,
152 	(0x8000 << 16) | (0x89bc >> 2),
153 	0x00000000,
154 	(0x8040 << 16) | (0x89bc >> 2),
155 	0x00000000,
156 	(0x8000 << 16) | (0x8c1c >> 2),
157 	0x00000000,
158 	(0x8040 << 16) | (0x8c1c >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x98f0 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0xe7c >> 2),
163 	0x00000000,
164 	(0x8000 << 16) | (0x9148 >> 2),
165 	0x00000000,
166 	(0x8040 << 16) | (0x9148 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9150 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x897c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x8d8c >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0xac54 >> 2),
175 	0X00000000,
176 	0x3,
177 	(0x9c00 << 16) | (0x98f8 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9910 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9914 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x9918 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x991c >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9920 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9924 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9928 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x992c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9930 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9934 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x9938 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x993c >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9940 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9944 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9948 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x994c >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9950 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9954 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9958 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x995c >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9960 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9964 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9968 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x996c >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9970 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9974 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9978 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x997c >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9980 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9984 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x9988 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x998c >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8c00 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c14 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8c04 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x8c08 >> 2),
250 	0x00000000,
251 	(0x8000 << 16) | (0x9b7c >> 2),
252 	0x00000000,
253 	(0x8040 << 16) | (0x9b7c >> 2),
254 	0x00000000,
255 	(0x8000 << 16) | (0xe84 >> 2),
256 	0x00000000,
257 	(0x8040 << 16) | (0xe84 >> 2),
258 	0x00000000,
259 	(0x8000 << 16) | (0x89c0 >> 2),
260 	0x00000000,
261 	(0x8040 << 16) | (0x89c0 >> 2),
262 	0x00000000,
263 	(0x8000 << 16) | (0x914c >> 2),
264 	0x00000000,
265 	(0x8040 << 16) | (0x914c >> 2),
266 	0x00000000,
267 	(0x8000 << 16) | (0x8c20 >> 2),
268 	0x00000000,
269 	(0x8040 << 16) | (0x8c20 >> 2),
270 	0x00000000,
271 	(0x8000 << 16) | (0x9354 >> 2),
272 	0x00000000,
273 	(0x8040 << 16) | (0x9354 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9060 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9364 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9100 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x913c >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x90e0 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x90e4 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x90e8 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x90e0 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x90e4 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x90e8 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8bcc >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x8b24 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x88c4 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x8e50 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x8c0c >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x8e58 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x8e5c >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x9508 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x950c >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0x9494 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac0c >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0xac10 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0xac14 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0xae00 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0xac08 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x88d4 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88c8 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x88cc >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x89b0 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8b10 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x8a14 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9830 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x9834 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9838 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0x9a10 >> 2),
344 	0x00000000,
345 	(0x8000 << 16) | (0x9870 >> 2),
346 	0x00000000,
347 	(0x8000 << 16) | (0x9874 >> 2),
348 	0x00000000,
349 	(0x8001 << 16) | (0x9870 >> 2),
350 	0x00000000,
351 	(0x8001 << 16) | (0x9874 >> 2),
352 	0x00000000,
353 	(0x8040 << 16) | (0x9870 >> 2),
354 	0x00000000,
355 	(0x8040 << 16) | (0x9874 >> 2),
356 	0x00000000,
357 	(0x8041 << 16) | (0x9870 >> 2),
358 	0x00000000,
359 	(0x8041 << 16) | (0x9874 >> 2),
360 	0x00000000,
361 	0x00000000
362 };
363 
364 static const u32 tahiti_golden_rlc_registers[] =
365 {
366 	0xc424, 0xffffffff, 0x00601005,
367 	0xc47c, 0xffffffff, 0x10104040,
368 	0xc488, 0xffffffff, 0x0100000a,
369 	0xc314, 0xffffffff, 0x00000800,
370 	0xc30c, 0xffffffff, 0x800000f4,
371 	0xf4a8, 0xffffffff, 0x00000000
372 };
373 
374 static const u32 tahiti_golden_registers[] =
375 {
376 	0x9a10, 0x00010000, 0x00018208,
377 	0x9830, 0xffffffff, 0x00000000,
378 	0x9834, 0xf00fffff, 0x00000400,
379 	0x9838, 0x0002021c, 0x00020200,
380 	0xc78, 0x00000080, 0x00000000,
381 	0xd030, 0x000300c0, 0x00800040,
382 	0xd830, 0x000300c0, 0x00800040,
383 	0x5bb0, 0x000000f0, 0x00000070,
384 	0x5bc0, 0x00200000, 0x50100000,
385 	0x7030, 0x31000311, 0x00000011,
386 	0x277c, 0x00000003, 0x000007ff,
387 	0x240c, 0x000007ff, 0x00000000,
388 	0x8a14, 0xf000001f, 0x00000007,
389 	0x8b24, 0xffffffff, 0x00ffffff,
390 	0x8b10, 0x0000ff0f, 0x00000000,
391 	0x28a4c, 0x07ffffff, 0x4e000000,
392 	0x28350, 0x3f3f3fff, 0x2a00126a,
393 	0x30, 0x000000ff, 0x0040,
394 	0x34, 0x00000040, 0x00004040,
395 	0x9100, 0x07ffffff, 0x03000000,
396 	0x8e88, 0x01ff1f3f, 0x00000000,
397 	0x8e84, 0x01ff1f3f, 0x00000000,
398 	0x9060, 0x0000007f, 0x00000020,
399 	0x9508, 0x00010000, 0x00010000,
400 	0xac14, 0x00000200, 0x000002fb,
401 	0xac10, 0xffffffff, 0x0000543b,
402 	0xac0c, 0xffffffff, 0xa9210876,
403 	0x88d0, 0xffffffff, 0x000fff40,
404 	0x88d4, 0x0000001f, 0x00000010,
405 	0x1410, 0x20000000, 0x20fffed8,
406 	0x15c0, 0x000c0fc0, 0x000c0400
407 };
408 
409 static const u32 tahiti_golden_registers2[] =
410 {
411 	0xc64, 0x00000001, 0x00000001
412 };
413 
414 static const u32 pitcairn_golden_rlc_registers[] =
415 {
416 	0xc424, 0xffffffff, 0x00601004,
417 	0xc47c, 0xffffffff, 0x10102020,
418 	0xc488, 0xffffffff, 0x01000020,
419 	0xc314, 0xffffffff, 0x00000800,
420 	0xc30c, 0xffffffff, 0x800000a4
421 };
422 
423 static const u32 pitcairn_golden_registers[] =
424 {
425 	0x9a10, 0x00010000, 0x00018208,
426 	0x9830, 0xffffffff, 0x00000000,
427 	0x9834, 0xf00fffff, 0x00000400,
428 	0x9838, 0x0002021c, 0x00020200,
429 	0xc78, 0x00000080, 0x00000000,
430 	0xd030, 0x000300c0, 0x00800040,
431 	0xd830, 0x000300c0, 0x00800040,
432 	0x5bb0, 0x000000f0, 0x00000070,
433 	0x5bc0, 0x00200000, 0x50100000,
434 	0x7030, 0x31000311, 0x00000011,
435 	0x2ae4, 0x00073ffe, 0x000022a2,
436 	0x240c, 0x000007ff, 0x00000000,
437 	0x8a14, 0xf000001f, 0x00000007,
438 	0x8b24, 0xffffffff, 0x00ffffff,
439 	0x8b10, 0x0000ff0f, 0x00000000,
440 	0x28a4c, 0x07ffffff, 0x4e000000,
441 	0x28350, 0x3f3f3fff, 0x2a00126a,
442 	0x30, 0x000000ff, 0x0040,
443 	0x34, 0x00000040, 0x00004040,
444 	0x9100, 0x07ffffff, 0x03000000,
445 	0x9060, 0x0000007f, 0x00000020,
446 	0x9508, 0x00010000, 0x00010000,
447 	0xac14, 0x000003ff, 0x000000f7,
448 	0xac10, 0xffffffff, 0x00000000,
449 	0xac0c, 0xffffffff, 0x32761054,
450 	0x88d4, 0x0000001f, 0x00000010,
451 	0x15c0, 0x000c0fc0, 0x000c0400
452 };
453 
454 static const u32 verde_golden_rlc_registers[] =
455 {
456 	0xc424, 0xffffffff, 0x033f1005,
457 	0xc47c, 0xffffffff, 0x10808020,
458 	0xc488, 0xffffffff, 0x00800008,
459 	0xc314, 0xffffffff, 0x00001000,
460 	0xc30c, 0xffffffff, 0x80010014
461 };
462 
463 static const u32 verde_golden_registers[] =
464 {
465 	0x9a10, 0x00010000, 0x00018208,
466 	0x9830, 0xffffffff, 0x00000000,
467 	0x9834, 0xf00fffff, 0x00000400,
468 	0x9838, 0x0002021c, 0x00020200,
469 	0xc78, 0x00000080, 0x00000000,
470 	0xd030, 0x000300c0, 0x00800040,
471 	0xd030, 0x000300c0, 0x00800040,
472 	0xd830, 0x000300c0, 0x00800040,
473 	0xd830, 0x000300c0, 0x00800040,
474 	0x5bb0, 0x000000f0, 0x00000070,
475 	0x5bc0, 0x00200000, 0x50100000,
476 	0x7030, 0x31000311, 0x00000011,
477 	0x2ae4, 0x00073ffe, 0x000022a2,
478 	0x2ae4, 0x00073ffe, 0x000022a2,
479 	0x2ae4, 0x00073ffe, 0x000022a2,
480 	0x240c, 0x000007ff, 0x00000000,
481 	0x240c, 0x000007ff, 0x00000000,
482 	0x240c, 0x000007ff, 0x00000000,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8a14, 0xf000001f, 0x00000007,
485 	0x8a14, 0xf000001f, 0x00000007,
486 	0x8b24, 0xffffffff, 0x00ffffff,
487 	0x8b10, 0x0000ff0f, 0x00000000,
488 	0x28a4c, 0x07ffffff, 0x4e000000,
489 	0x28350, 0x3f3f3fff, 0x0000124a,
490 	0x28350, 0x3f3f3fff, 0x0000124a,
491 	0x28350, 0x3f3f3fff, 0x0000124a,
492 	0x30, 0x000000ff, 0x0040,
493 	0x34, 0x00000040, 0x00004040,
494 	0x9100, 0x07ffffff, 0x03000000,
495 	0x9100, 0x07ffffff, 0x03000000,
496 	0x8e88, 0x01ff1f3f, 0x00000000,
497 	0x8e88, 0x01ff1f3f, 0x00000000,
498 	0x8e88, 0x01ff1f3f, 0x00000000,
499 	0x8e84, 0x01ff1f3f, 0x00000000,
500 	0x8e84, 0x01ff1f3f, 0x00000000,
501 	0x8e84, 0x01ff1f3f, 0x00000000,
502 	0x9060, 0x0000007f, 0x00000020,
503 	0x9508, 0x00010000, 0x00010000,
504 	0xac14, 0x000003ff, 0x00000003,
505 	0xac14, 0x000003ff, 0x00000003,
506 	0xac14, 0x000003ff, 0x00000003,
507 	0xac10, 0xffffffff, 0x00000000,
508 	0xac10, 0xffffffff, 0x00000000,
509 	0xac10, 0xffffffff, 0x00000000,
510 	0xac0c, 0xffffffff, 0x00001032,
511 	0xac0c, 0xffffffff, 0x00001032,
512 	0xac0c, 0xffffffff, 0x00001032,
513 	0x88d4, 0x0000001f, 0x00000010,
514 	0x88d4, 0x0000001f, 0x00000010,
515 	0x88d4, 0x0000001f, 0x00000010,
516 	0x15c0, 0x000c0fc0, 0x000c0400
517 };
518 
519 static const u32 oland_golden_rlc_registers[] =
520 {
521 	0xc424, 0xffffffff, 0x00601005,
522 	0xc47c, 0xffffffff, 0x10104040,
523 	0xc488, 0xffffffff, 0x0100000a,
524 	0xc314, 0xffffffff, 0x00000800,
525 	0xc30c, 0xffffffff, 0x800000f4
526 };
527 
528 static const u32 oland_golden_registers[] =
529 {
530 	0x9a10, 0x00010000, 0x00018208,
531 	0x9830, 0xffffffff, 0x00000000,
532 	0x9834, 0xf00fffff, 0x00000400,
533 	0x9838, 0x0002021c, 0x00020200,
534 	0xc78, 0x00000080, 0x00000000,
535 	0xd030, 0x000300c0, 0x00800040,
536 	0xd830, 0x000300c0, 0x00800040,
537 	0x5bb0, 0x000000f0, 0x00000070,
538 	0x5bc0, 0x00200000, 0x50100000,
539 	0x7030, 0x31000311, 0x00000011,
540 	0x2ae4, 0x00073ffe, 0x000022a2,
541 	0x240c, 0x000007ff, 0x00000000,
542 	0x8a14, 0xf000001f, 0x00000007,
543 	0x8b24, 0xffffffff, 0x00ffffff,
544 	0x8b10, 0x0000ff0f, 0x00000000,
545 	0x28a4c, 0x07ffffff, 0x4e000000,
546 	0x28350, 0x3f3f3fff, 0x00000082,
547 	0x30, 0x000000ff, 0x0040,
548 	0x34, 0x00000040, 0x00004040,
549 	0x9100, 0x07ffffff, 0x03000000,
550 	0x9060, 0x0000007f, 0x00000020,
551 	0x9508, 0x00010000, 0x00010000,
552 	0xac14, 0x000003ff, 0x000000f3,
553 	0xac10, 0xffffffff, 0x00000000,
554 	0xac0c, 0xffffffff, 0x00003210,
555 	0x88d4, 0x0000001f, 0x00000010,
556 	0x15c0, 0x000c0fc0, 0x000c0400
557 };
558 
559 static const u32 hainan_golden_registers[] =
560 {
561 	0x9a10, 0x00010000, 0x00018208,
562 	0x9830, 0xffffffff, 0x00000000,
563 	0x9834, 0xf00fffff, 0x00000400,
564 	0x9838, 0x0002021c, 0x00020200,
565 	0xd0c0, 0xff000fff, 0x00000100,
566 	0xd030, 0x000300c0, 0x00800040,
567 	0xd8c0, 0xff000fff, 0x00000100,
568 	0xd830, 0x000300c0, 0x00800040,
569 	0x2ae4, 0x00073ffe, 0x000022a2,
570 	0x240c, 0x000007ff, 0x00000000,
571 	0x8a14, 0xf000001f, 0x00000007,
572 	0x8b24, 0xffffffff, 0x00ffffff,
573 	0x8b10, 0x0000ff0f, 0x00000000,
574 	0x28a4c, 0x07ffffff, 0x4e000000,
575 	0x28350, 0x3f3f3fff, 0x00000000,
576 	0x30, 0x000000ff, 0x0040,
577 	0x34, 0x00000040, 0x00004040,
578 	0x9100, 0x03e00000, 0x03600000,
579 	0x9060, 0x0000007f, 0x00000020,
580 	0x9508, 0x00010000, 0x00010000,
581 	0xac14, 0x000003ff, 0x000000f1,
582 	0xac10, 0xffffffff, 0x00000000,
583 	0xac0c, 0xffffffff, 0x00003210,
584 	0x88d4, 0x0000001f, 0x00000010,
585 	0x15c0, 0x000c0fc0, 0x000c0400
586 };
587 
588 static const u32 hainan_golden_registers2[] =
589 {
590 	0x98f8, 0xffffffff, 0x02010001
591 };
592 
593 static const u32 tahiti_mgcg_cgcg_init[] =
594 {
595 	0xc400, 0xffffffff, 0xfffffffc,
596 	0x802c, 0xffffffff, 0xe0000000,
597 	0x9a60, 0xffffffff, 0x00000100,
598 	0x92a4, 0xffffffff, 0x00000100,
599 	0xc164, 0xffffffff, 0x00000100,
600 	0x9774, 0xffffffff, 0x00000100,
601 	0x8984, 0xffffffff, 0x06000100,
602 	0x8a18, 0xffffffff, 0x00000100,
603 	0x92a0, 0xffffffff, 0x00000100,
604 	0xc380, 0xffffffff, 0x00000100,
605 	0x8b28, 0xffffffff, 0x00000100,
606 	0x9144, 0xffffffff, 0x00000100,
607 	0x8d88, 0xffffffff, 0x00000100,
608 	0x8d8c, 0xffffffff, 0x00000100,
609 	0x9030, 0xffffffff, 0x00000100,
610 	0x9034, 0xffffffff, 0x00000100,
611 	0x9038, 0xffffffff, 0x00000100,
612 	0x903c, 0xffffffff, 0x00000100,
613 	0xad80, 0xffffffff, 0x00000100,
614 	0xac54, 0xffffffff, 0x00000100,
615 	0x897c, 0xffffffff, 0x06000100,
616 	0x9868, 0xffffffff, 0x00000100,
617 	0x9510, 0xffffffff, 0x00000100,
618 	0xaf04, 0xffffffff, 0x00000100,
619 	0xae04, 0xffffffff, 0x00000100,
620 	0x949c, 0xffffffff, 0x00000100,
621 	0x802c, 0xffffffff, 0xe0000000,
622 	0x9160, 0xffffffff, 0x00010000,
623 	0x9164, 0xffffffff, 0x00030002,
624 	0x9168, 0xffffffff, 0x00040007,
625 	0x916c, 0xffffffff, 0x00060005,
626 	0x9170, 0xffffffff, 0x00090008,
627 	0x9174, 0xffffffff, 0x00020001,
628 	0x9178, 0xffffffff, 0x00040003,
629 	0x917c, 0xffffffff, 0x00000007,
630 	0x9180, 0xffffffff, 0x00060005,
631 	0x9184, 0xffffffff, 0x00090008,
632 	0x9188, 0xffffffff, 0x00030002,
633 	0x918c, 0xffffffff, 0x00050004,
634 	0x9190, 0xffffffff, 0x00000008,
635 	0x9194, 0xffffffff, 0x00070006,
636 	0x9198, 0xffffffff, 0x000a0009,
637 	0x919c, 0xffffffff, 0x00040003,
638 	0x91a0, 0xffffffff, 0x00060005,
639 	0x91a4, 0xffffffff, 0x00000009,
640 	0x91a8, 0xffffffff, 0x00080007,
641 	0x91ac, 0xffffffff, 0x000b000a,
642 	0x91b0, 0xffffffff, 0x00050004,
643 	0x91b4, 0xffffffff, 0x00070006,
644 	0x91b8, 0xffffffff, 0x0008000b,
645 	0x91bc, 0xffffffff, 0x000a0009,
646 	0x91c0, 0xffffffff, 0x000d000c,
647 	0x91c4, 0xffffffff, 0x00060005,
648 	0x91c8, 0xffffffff, 0x00080007,
649 	0x91cc, 0xffffffff, 0x0000000b,
650 	0x91d0, 0xffffffff, 0x000a0009,
651 	0x91d4, 0xffffffff, 0x000d000c,
652 	0x91d8, 0xffffffff, 0x00070006,
653 	0x91dc, 0xffffffff, 0x00090008,
654 	0x91e0, 0xffffffff, 0x0000000c,
655 	0x91e4, 0xffffffff, 0x000b000a,
656 	0x91e8, 0xffffffff, 0x000e000d,
657 	0x91ec, 0xffffffff, 0x00080007,
658 	0x91f0, 0xffffffff, 0x000a0009,
659 	0x91f4, 0xffffffff, 0x0000000d,
660 	0x91f8, 0xffffffff, 0x000c000b,
661 	0x91fc, 0xffffffff, 0x000f000e,
662 	0x9200, 0xffffffff, 0x00090008,
663 	0x9204, 0xffffffff, 0x000b000a,
664 	0x9208, 0xffffffff, 0x000c000f,
665 	0x920c, 0xffffffff, 0x000e000d,
666 	0x9210, 0xffffffff, 0x00110010,
667 	0x9214, 0xffffffff, 0x000a0009,
668 	0x9218, 0xffffffff, 0x000c000b,
669 	0x921c, 0xffffffff, 0x0000000f,
670 	0x9220, 0xffffffff, 0x000e000d,
671 	0x9224, 0xffffffff, 0x00110010,
672 	0x9228, 0xffffffff, 0x000b000a,
673 	0x922c, 0xffffffff, 0x000d000c,
674 	0x9230, 0xffffffff, 0x00000010,
675 	0x9234, 0xffffffff, 0x000f000e,
676 	0x9238, 0xffffffff, 0x00120011,
677 	0x923c, 0xffffffff, 0x000c000b,
678 	0x9240, 0xffffffff, 0x000e000d,
679 	0x9244, 0xffffffff, 0x00000011,
680 	0x9248, 0xffffffff, 0x0010000f,
681 	0x924c, 0xffffffff, 0x00130012,
682 	0x9250, 0xffffffff, 0x000d000c,
683 	0x9254, 0xffffffff, 0x000f000e,
684 	0x9258, 0xffffffff, 0x00100013,
685 	0x925c, 0xffffffff, 0x00120011,
686 	0x9260, 0xffffffff, 0x00150014,
687 	0x9264, 0xffffffff, 0x000e000d,
688 	0x9268, 0xffffffff, 0x0010000f,
689 	0x926c, 0xffffffff, 0x00000013,
690 	0x9270, 0xffffffff, 0x00120011,
691 	0x9274, 0xffffffff, 0x00150014,
692 	0x9278, 0xffffffff, 0x000f000e,
693 	0x927c, 0xffffffff, 0x00110010,
694 	0x9280, 0xffffffff, 0x00000014,
695 	0x9284, 0xffffffff, 0x00130012,
696 	0x9288, 0xffffffff, 0x00160015,
697 	0x928c, 0xffffffff, 0x0010000f,
698 	0x9290, 0xffffffff, 0x00120011,
699 	0x9294, 0xffffffff, 0x00000015,
700 	0x9298, 0xffffffff, 0x00140013,
701 	0x929c, 0xffffffff, 0x00170016,
702 	0x9150, 0xffffffff, 0x96940200,
703 	0x8708, 0xffffffff, 0x00900100,
704 	0xc478, 0xffffffff, 0x00000080,
705 	0xc404, 0xffffffff, 0x0020003f,
706 	0x30, 0xffffffff, 0x0000001c,
707 	0x34, 0x000f0000, 0x000f0000,
708 	0x160c, 0xffffffff, 0x00000100,
709 	0x1024, 0xffffffff, 0x00000100,
710 	0x102c, 0x00000101, 0x00000000,
711 	0x20a8, 0xffffffff, 0x00000104,
712 	0x264c, 0x000c0000, 0x000c0000,
713 	0x2648, 0x000c0000, 0x000c0000,
714 	0x55e4, 0xff000fff, 0x00000100,
715 	0x55e8, 0x00000001, 0x00000001,
716 	0x2f50, 0x00000001, 0x00000001,
717 	0x30cc, 0xc0000fff, 0x00000104,
718 	0xc1e4, 0x00000001, 0x00000001,
719 	0xd0c0, 0xfffffff0, 0x00000100,
720 	0xd8c0, 0xfffffff0, 0x00000100
721 };
722 
723 static const u32 pitcairn_mgcg_cgcg_init[] =
724 {
725 	0xc400, 0xffffffff, 0xfffffffc,
726 	0x802c, 0xffffffff, 0xe0000000,
727 	0x9a60, 0xffffffff, 0x00000100,
728 	0x92a4, 0xffffffff, 0x00000100,
729 	0xc164, 0xffffffff, 0x00000100,
730 	0x9774, 0xffffffff, 0x00000100,
731 	0x8984, 0xffffffff, 0x06000100,
732 	0x8a18, 0xffffffff, 0x00000100,
733 	0x92a0, 0xffffffff, 0x00000100,
734 	0xc380, 0xffffffff, 0x00000100,
735 	0x8b28, 0xffffffff, 0x00000100,
736 	0x9144, 0xffffffff, 0x00000100,
737 	0x8d88, 0xffffffff, 0x00000100,
738 	0x8d8c, 0xffffffff, 0x00000100,
739 	0x9030, 0xffffffff, 0x00000100,
740 	0x9034, 0xffffffff, 0x00000100,
741 	0x9038, 0xffffffff, 0x00000100,
742 	0x903c, 0xffffffff, 0x00000100,
743 	0xad80, 0xffffffff, 0x00000100,
744 	0xac54, 0xffffffff, 0x00000100,
745 	0x897c, 0xffffffff, 0x06000100,
746 	0x9868, 0xffffffff, 0x00000100,
747 	0x9510, 0xffffffff, 0x00000100,
748 	0xaf04, 0xffffffff, 0x00000100,
749 	0xae04, 0xffffffff, 0x00000100,
750 	0x949c, 0xffffffff, 0x00000100,
751 	0x802c, 0xffffffff, 0xe0000000,
752 	0x9160, 0xffffffff, 0x00010000,
753 	0x9164, 0xffffffff, 0x00030002,
754 	0x9168, 0xffffffff, 0x00040007,
755 	0x916c, 0xffffffff, 0x00060005,
756 	0x9170, 0xffffffff, 0x00090008,
757 	0x9174, 0xffffffff, 0x00020001,
758 	0x9178, 0xffffffff, 0x00040003,
759 	0x917c, 0xffffffff, 0x00000007,
760 	0x9180, 0xffffffff, 0x00060005,
761 	0x9184, 0xffffffff, 0x00090008,
762 	0x9188, 0xffffffff, 0x00030002,
763 	0x918c, 0xffffffff, 0x00050004,
764 	0x9190, 0xffffffff, 0x00000008,
765 	0x9194, 0xffffffff, 0x00070006,
766 	0x9198, 0xffffffff, 0x000a0009,
767 	0x919c, 0xffffffff, 0x00040003,
768 	0x91a0, 0xffffffff, 0x00060005,
769 	0x91a4, 0xffffffff, 0x00000009,
770 	0x91a8, 0xffffffff, 0x00080007,
771 	0x91ac, 0xffffffff, 0x000b000a,
772 	0x91b0, 0xffffffff, 0x00050004,
773 	0x91b4, 0xffffffff, 0x00070006,
774 	0x91b8, 0xffffffff, 0x0008000b,
775 	0x91bc, 0xffffffff, 0x000a0009,
776 	0x91c0, 0xffffffff, 0x000d000c,
777 	0x9200, 0xffffffff, 0x00090008,
778 	0x9204, 0xffffffff, 0x000b000a,
779 	0x9208, 0xffffffff, 0x000c000f,
780 	0x920c, 0xffffffff, 0x000e000d,
781 	0x9210, 0xffffffff, 0x00110010,
782 	0x9214, 0xffffffff, 0x000a0009,
783 	0x9218, 0xffffffff, 0x000c000b,
784 	0x921c, 0xffffffff, 0x0000000f,
785 	0x9220, 0xffffffff, 0x000e000d,
786 	0x9224, 0xffffffff, 0x00110010,
787 	0x9228, 0xffffffff, 0x000b000a,
788 	0x922c, 0xffffffff, 0x000d000c,
789 	0x9230, 0xffffffff, 0x00000010,
790 	0x9234, 0xffffffff, 0x000f000e,
791 	0x9238, 0xffffffff, 0x00120011,
792 	0x923c, 0xffffffff, 0x000c000b,
793 	0x9240, 0xffffffff, 0x000e000d,
794 	0x9244, 0xffffffff, 0x00000011,
795 	0x9248, 0xffffffff, 0x0010000f,
796 	0x924c, 0xffffffff, 0x00130012,
797 	0x9250, 0xffffffff, 0x000d000c,
798 	0x9254, 0xffffffff, 0x000f000e,
799 	0x9258, 0xffffffff, 0x00100013,
800 	0x925c, 0xffffffff, 0x00120011,
801 	0x9260, 0xffffffff, 0x00150014,
802 	0x9150, 0xffffffff, 0x96940200,
803 	0x8708, 0xffffffff, 0x00900100,
804 	0xc478, 0xffffffff, 0x00000080,
805 	0xc404, 0xffffffff, 0x0020003f,
806 	0x30, 0xffffffff, 0x0000001c,
807 	0x34, 0x000f0000, 0x000f0000,
808 	0x160c, 0xffffffff, 0x00000100,
809 	0x1024, 0xffffffff, 0x00000100,
810 	0x102c, 0x00000101, 0x00000000,
811 	0x20a8, 0xffffffff, 0x00000104,
812 	0x55e4, 0xff000fff, 0x00000100,
813 	0x55e8, 0x00000001, 0x00000001,
814 	0x2f50, 0x00000001, 0x00000001,
815 	0x30cc, 0xc0000fff, 0x00000104,
816 	0xc1e4, 0x00000001, 0x00000001,
817 	0xd0c0, 0xfffffff0, 0x00000100,
818 	0xd8c0, 0xfffffff0, 0x00000100
819 };
820 
821 static const u32 verde_mgcg_cgcg_init[] =
822 {
823 	0xc400, 0xffffffff, 0xfffffffc,
824 	0x802c, 0xffffffff, 0xe0000000,
825 	0x9a60, 0xffffffff, 0x00000100,
826 	0x92a4, 0xffffffff, 0x00000100,
827 	0xc164, 0xffffffff, 0x00000100,
828 	0x9774, 0xffffffff, 0x00000100,
829 	0x8984, 0xffffffff, 0x06000100,
830 	0x8a18, 0xffffffff, 0x00000100,
831 	0x92a0, 0xffffffff, 0x00000100,
832 	0xc380, 0xffffffff, 0x00000100,
833 	0x8b28, 0xffffffff, 0x00000100,
834 	0x9144, 0xffffffff, 0x00000100,
835 	0x8d88, 0xffffffff, 0x00000100,
836 	0x8d8c, 0xffffffff, 0x00000100,
837 	0x9030, 0xffffffff, 0x00000100,
838 	0x9034, 0xffffffff, 0x00000100,
839 	0x9038, 0xffffffff, 0x00000100,
840 	0x903c, 0xffffffff, 0x00000100,
841 	0xad80, 0xffffffff, 0x00000100,
842 	0xac54, 0xffffffff, 0x00000100,
843 	0x897c, 0xffffffff, 0x06000100,
844 	0x9868, 0xffffffff, 0x00000100,
845 	0x9510, 0xffffffff, 0x00000100,
846 	0xaf04, 0xffffffff, 0x00000100,
847 	0xae04, 0xffffffff, 0x00000100,
848 	0x949c, 0xffffffff, 0x00000100,
849 	0x802c, 0xffffffff, 0xe0000000,
850 	0x9160, 0xffffffff, 0x00010000,
851 	0x9164, 0xffffffff, 0x00030002,
852 	0x9168, 0xffffffff, 0x00040007,
853 	0x916c, 0xffffffff, 0x00060005,
854 	0x9170, 0xffffffff, 0x00090008,
855 	0x9174, 0xffffffff, 0x00020001,
856 	0x9178, 0xffffffff, 0x00040003,
857 	0x917c, 0xffffffff, 0x00000007,
858 	0x9180, 0xffffffff, 0x00060005,
859 	0x9184, 0xffffffff, 0x00090008,
860 	0x9188, 0xffffffff, 0x00030002,
861 	0x918c, 0xffffffff, 0x00050004,
862 	0x9190, 0xffffffff, 0x00000008,
863 	0x9194, 0xffffffff, 0x00070006,
864 	0x9198, 0xffffffff, 0x000a0009,
865 	0x919c, 0xffffffff, 0x00040003,
866 	0x91a0, 0xffffffff, 0x00060005,
867 	0x91a4, 0xffffffff, 0x00000009,
868 	0x91a8, 0xffffffff, 0x00080007,
869 	0x91ac, 0xffffffff, 0x000b000a,
870 	0x91b0, 0xffffffff, 0x00050004,
871 	0x91b4, 0xffffffff, 0x00070006,
872 	0x91b8, 0xffffffff, 0x0008000b,
873 	0x91bc, 0xffffffff, 0x000a0009,
874 	0x91c0, 0xffffffff, 0x000d000c,
875 	0x9200, 0xffffffff, 0x00090008,
876 	0x9204, 0xffffffff, 0x000b000a,
877 	0x9208, 0xffffffff, 0x000c000f,
878 	0x920c, 0xffffffff, 0x000e000d,
879 	0x9210, 0xffffffff, 0x00110010,
880 	0x9214, 0xffffffff, 0x000a0009,
881 	0x9218, 0xffffffff, 0x000c000b,
882 	0x921c, 0xffffffff, 0x0000000f,
883 	0x9220, 0xffffffff, 0x000e000d,
884 	0x9224, 0xffffffff, 0x00110010,
885 	0x9228, 0xffffffff, 0x000b000a,
886 	0x922c, 0xffffffff, 0x000d000c,
887 	0x9230, 0xffffffff, 0x00000010,
888 	0x9234, 0xffffffff, 0x000f000e,
889 	0x9238, 0xffffffff, 0x00120011,
890 	0x923c, 0xffffffff, 0x000c000b,
891 	0x9240, 0xffffffff, 0x000e000d,
892 	0x9244, 0xffffffff, 0x00000011,
893 	0x9248, 0xffffffff, 0x0010000f,
894 	0x924c, 0xffffffff, 0x00130012,
895 	0x9250, 0xffffffff, 0x000d000c,
896 	0x9254, 0xffffffff, 0x000f000e,
897 	0x9258, 0xffffffff, 0x00100013,
898 	0x925c, 0xffffffff, 0x00120011,
899 	0x9260, 0xffffffff, 0x00150014,
900 	0x9150, 0xffffffff, 0x96940200,
901 	0x8708, 0xffffffff, 0x00900100,
902 	0xc478, 0xffffffff, 0x00000080,
903 	0xc404, 0xffffffff, 0x0020003f,
904 	0x30, 0xffffffff, 0x0000001c,
905 	0x34, 0x000f0000, 0x000f0000,
906 	0x160c, 0xffffffff, 0x00000100,
907 	0x1024, 0xffffffff, 0x00000100,
908 	0x102c, 0x00000101, 0x00000000,
909 	0x20a8, 0xffffffff, 0x00000104,
910 	0x264c, 0x000c0000, 0x000c0000,
911 	0x2648, 0x000c0000, 0x000c0000,
912 	0x55e4, 0xff000fff, 0x00000100,
913 	0x55e8, 0x00000001, 0x00000001,
914 	0x2f50, 0x00000001, 0x00000001,
915 	0x30cc, 0xc0000fff, 0x00000104,
916 	0xc1e4, 0x00000001, 0x00000001,
917 	0xd0c0, 0xfffffff0, 0x00000100,
918 	0xd8c0, 0xfffffff0, 0x00000100
919 };
920 
921 static const u32 oland_mgcg_cgcg_init[] =
922 {
923 	0xc400, 0xffffffff, 0xfffffffc,
924 	0x802c, 0xffffffff, 0xe0000000,
925 	0x9a60, 0xffffffff, 0x00000100,
926 	0x92a4, 0xffffffff, 0x00000100,
927 	0xc164, 0xffffffff, 0x00000100,
928 	0x9774, 0xffffffff, 0x00000100,
929 	0x8984, 0xffffffff, 0x06000100,
930 	0x8a18, 0xffffffff, 0x00000100,
931 	0x92a0, 0xffffffff, 0x00000100,
932 	0xc380, 0xffffffff, 0x00000100,
933 	0x8b28, 0xffffffff, 0x00000100,
934 	0x9144, 0xffffffff, 0x00000100,
935 	0x8d88, 0xffffffff, 0x00000100,
936 	0x8d8c, 0xffffffff, 0x00000100,
937 	0x9030, 0xffffffff, 0x00000100,
938 	0x9034, 0xffffffff, 0x00000100,
939 	0x9038, 0xffffffff, 0x00000100,
940 	0x903c, 0xffffffff, 0x00000100,
941 	0xad80, 0xffffffff, 0x00000100,
942 	0xac54, 0xffffffff, 0x00000100,
943 	0x897c, 0xffffffff, 0x06000100,
944 	0x9868, 0xffffffff, 0x00000100,
945 	0x9510, 0xffffffff, 0x00000100,
946 	0xaf04, 0xffffffff, 0x00000100,
947 	0xae04, 0xffffffff, 0x00000100,
948 	0x949c, 0xffffffff, 0x00000100,
949 	0x802c, 0xffffffff, 0xe0000000,
950 	0x9160, 0xffffffff, 0x00010000,
951 	0x9164, 0xffffffff, 0x00030002,
952 	0x9168, 0xffffffff, 0x00040007,
953 	0x916c, 0xffffffff, 0x00060005,
954 	0x9170, 0xffffffff, 0x00090008,
955 	0x9174, 0xffffffff, 0x00020001,
956 	0x9178, 0xffffffff, 0x00040003,
957 	0x917c, 0xffffffff, 0x00000007,
958 	0x9180, 0xffffffff, 0x00060005,
959 	0x9184, 0xffffffff, 0x00090008,
960 	0x9188, 0xffffffff, 0x00030002,
961 	0x918c, 0xffffffff, 0x00050004,
962 	0x9190, 0xffffffff, 0x00000008,
963 	0x9194, 0xffffffff, 0x00070006,
964 	0x9198, 0xffffffff, 0x000a0009,
965 	0x919c, 0xffffffff, 0x00040003,
966 	0x91a0, 0xffffffff, 0x00060005,
967 	0x91a4, 0xffffffff, 0x00000009,
968 	0x91a8, 0xffffffff, 0x00080007,
969 	0x91ac, 0xffffffff, 0x000b000a,
970 	0x91b0, 0xffffffff, 0x00050004,
971 	0x91b4, 0xffffffff, 0x00070006,
972 	0x91b8, 0xffffffff, 0x0008000b,
973 	0x91bc, 0xffffffff, 0x000a0009,
974 	0x91c0, 0xffffffff, 0x000d000c,
975 	0x91c4, 0xffffffff, 0x00060005,
976 	0x91c8, 0xffffffff, 0x00080007,
977 	0x91cc, 0xffffffff, 0x0000000b,
978 	0x91d0, 0xffffffff, 0x000a0009,
979 	0x91d4, 0xffffffff, 0x000d000c,
980 	0x9150, 0xffffffff, 0x96940200,
981 	0x8708, 0xffffffff, 0x00900100,
982 	0xc478, 0xffffffff, 0x00000080,
983 	0xc404, 0xffffffff, 0x0020003f,
984 	0x30, 0xffffffff, 0x0000001c,
985 	0x34, 0x000f0000, 0x000f0000,
986 	0x160c, 0xffffffff, 0x00000100,
987 	0x1024, 0xffffffff, 0x00000100,
988 	0x102c, 0x00000101, 0x00000000,
989 	0x20a8, 0xffffffff, 0x00000104,
990 	0x264c, 0x000c0000, 0x000c0000,
991 	0x2648, 0x000c0000, 0x000c0000,
992 	0x55e4, 0xff000fff, 0x00000100,
993 	0x55e8, 0x00000001, 0x00000001,
994 	0x2f50, 0x00000001, 0x00000001,
995 	0x30cc, 0xc0000fff, 0x00000104,
996 	0xc1e4, 0x00000001, 0x00000001,
997 	0xd0c0, 0xfffffff0, 0x00000100,
998 	0xd8c0, 0xfffffff0, 0x00000100
999 };
1000 
1001 static const u32 hainan_mgcg_cgcg_init[] =
1002 {
1003 	0xc400, 0xffffffff, 0xfffffffc,
1004 	0x802c, 0xffffffff, 0xe0000000,
1005 	0x9a60, 0xffffffff, 0x00000100,
1006 	0x92a4, 0xffffffff, 0x00000100,
1007 	0xc164, 0xffffffff, 0x00000100,
1008 	0x9774, 0xffffffff, 0x00000100,
1009 	0x8984, 0xffffffff, 0x06000100,
1010 	0x8a18, 0xffffffff, 0x00000100,
1011 	0x92a0, 0xffffffff, 0x00000100,
1012 	0xc380, 0xffffffff, 0x00000100,
1013 	0x8b28, 0xffffffff, 0x00000100,
1014 	0x9144, 0xffffffff, 0x00000100,
1015 	0x8d88, 0xffffffff, 0x00000100,
1016 	0x8d8c, 0xffffffff, 0x00000100,
1017 	0x9030, 0xffffffff, 0x00000100,
1018 	0x9034, 0xffffffff, 0x00000100,
1019 	0x9038, 0xffffffff, 0x00000100,
1020 	0x903c, 0xffffffff, 0x00000100,
1021 	0xad80, 0xffffffff, 0x00000100,
1022 	0xac54, 0xffffffff, 0x00000100,
1023 	0x897c, 0xffffffff, 0x06000100,
1024 	0x9868, 0xffffffff, 0x00000100,
1025 	0x9510, 0xffffffff, 0x00000100,
1026 	0xaf04, 0xffffffff, 0x00000100,
1027 	0xae04, 0xffffffff, 0x00000100,
1028 	0x949c, 0xffffffff, 0x00000100,
1029 	0x802c, 0xffffffff, 0xe0000000,
1030 	0x9160, 0xffffffff, 0x00010000,
1031 	0x9164, 0xffffffff, 0x00030002,
1032 	0x9168, 0xffffffff, 0x00040007,
1033 	0x916c, 0xffffffff, 0x00060005,
1034 	0x9170, 0xffffffff, 0x00090008,
1035 	0x9174, 0xffffffff, 0x00020001,
1036 	0x9178, 0xffffffff, 0x00040003,
1037 	0x917c, 0xffffffff, 0x00000007,
1038 	0x9180, 0xffffffff, 0x00060005,
1039 	0x9184, 0xffffffff, 0x00090008,
1040 	0x9188, 0xffffffff, 0x00030002,
1041 	0x918c, 0xffffffff, 0x00050004,
1042 	0x9190, 0xffffffff, 0x00000008,
1043 	0x9194, 0xffffffff, 0x00070006,
1044 	0x9198, 0xffffffff, 0x000a0009,
1045 	0x919c, 0xffffffff, 0x00040003,
1046 	0x91a0, 0xffffffff, 0x00060005,
1047 	0x91a4, 0xffffffff, 0x00000009,
1048 	0x91a8, 0xffffffff, 0x00080007,
1049 	0x91ac, 0xffffffff, 0x000b000a,
1050 	0x91b0, 0xffffffff, 0x00050004,
1051 	0x91b4, 0xffffffff, 0x00070006,
1052 	0x91b8, 0xffffffff, 0x0008000b,
1053 	0x91bc, 0xffffffff, 0x000a0009,
1054 	0x91c0, 0xffffffff, 0x000d000c,
1055 	0x91c4, 0xffffffff, 0x00060005,
1056 	0x91c8, 0xffffffff, 0x00080007,
1057 	0x91cc, 0xffffffff, 0x0000000b,
1058 	0x91d0, 0xffffffff, 0x000a0009,
1059 	0x91d4, 0xffffffff, 0x000d000c,
1060 	0x9150, 0xffffffff, 0x96940200,
1061 	0x8708, 0xffffffff, 0x00900100,
1062 	0xc478, 0xffffffff, 0x00000080,
1063 	0xc404, 0xffffffff, 0x0020003f,
1064 	0x30, 0xffffffff, 0x0000001c,
1065 	0x34, 0x000f0000, 0x000f0000,
1066 	0x160c, 0xffffffff, 0x00000100,
1067 	0x1024, 0xffffffff, 0x00000100,
1068 	0x20a8, 0xffffffff, 0x00000104,
1069 	0x264c, 0x000c0000, 0x000c0000,
1070 	0x2648, 0x000c0000, 0x000c0000,
1071 	0x2f50, 0x00000001, 0x00000001,
1072 	0x30cc, 0xc0000fff, 0x00000104,
1073 	0xc1e4, 0x00000001, 0x00000001,
1074 	0xd0c0, 0xfffffff0, 0x00000100,
1075 	0xd8c0, 0xfffffff0, 0x00000100
1076 };
1077 
1078 static u32 verde_pg_init[] =
1079 {
1080 	0x353c, 0xffffffff, 0x40000,
1081 	0x3538, 0xffffffff, 0x200010ff,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x7007,
1088 	0x3538, 0xffffffff, 0x300010ff,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x400000,
1095 	0x3538, 0xffffffff, 0x100010ff,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x120200,
1102 	0x3538, 0xffffffff, 0x500010ff,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x1e1e16,
1109 	0x3538, 0xffffffff, 0x600010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x171f1e,
1116 	0x3538, 0xffffffff, 0x700010ff,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x0,
1123 	0x3538, 0xffffffff, 0x9ff,
1124 	0x3500, 0xffffffff, 0x0,
1125 	0x3504, 0xffffffff, 0x10000800,
1126 	0x3504, 0xffffffff, 0xf,
1127 	0x3504, 0xffffffff, 0xf,
1128 	0x3500, 0xffffffff, 0x4,
1129 	0x3504, 0xffffffff, 0x1000051e,
1130 	0x3504, 0xffffffff, 0xffff,
1131 	0x3504, 0xffffffff, 0xffff,
1132 	0x3500, 0xffffffff, 0x8,
1133 	0x3504, 0xffffffff, 0x80500,
1134 	0x3500, 0xffffffff, 0x12,
1135 	0x3504, 0xffffffff, 0x9050c,
1136 	0x3500, 0xffffffff, 0x1d,
1137 	0x3504, 0xffffffff, 0xb052c,
1138 	0x3500, 0xffffffff, 0x2a,
1139 	0x3504, 0xffffffff, 0x1053e,
1140 	0x3500, 0xffffffff, 0x2d,
1141 	0x3504, 0xffffffff, 0x10546,
1142 	0x3500, 0xffffffff, 0x30,
1143 	0x3504, 0xffffffff, 0xa054e,
1144 	0x3500, 0xffffffff, 0x3c,
1145 	0x3504, 0xffffffff, 0x1055f,
1146 	0x3500, 0xffffffff, 0x3f,
1147 	0x3504, 0xffffffff, 0x10567,
1148 	0x3500, 0xffffffff, 0x42,
1149 	0x3504, 0xffffffff, 0x1056f,
1150 	0x3500, 0xffffffff, 0x45,
1151 	0x3504, 0xffffffff, 0x10572,
1152 	0x3500, 0xffffffff, 0x48,
1153 	0x3504, 0xffffffff, 0x20575,
1154 	0x3500, 0xffffffff, 0x4c,
1155 	0x3504, 0xffffffff, 0x190801,
1156 	0x3500, 0xffffffff, 0x67,
1157 	0x3504, 0xffffffff, 0x1082a,
1158 	0x3500, 0xffffffff, 0x6a,
1159 	0x3504, 0xffffffff, 0x1b082d,
1160 	0x3500, 0xffffffff, 0x87,
1161 	0x3504, 0xffffffff, 0x310851,
1162 	0x3500, 0xffffffff, 0xba,
1163 	0x3504, 0xffffffff, 0x891,
1164 	0x3500, 0xffffffff, 0xbc,
1165 	0x3504, 0xffffffff, 0x893,
1166 	0x3500, 0xffffffff, 0xbe,
1167 	0x3504, 0xffffffff, 0x20895,
1168 	0x3500, 0xffffffff, 0xc2,
1169 	0x3504, 0xffffffff, 0x20899,
1170 	0x3500, 0xffffffff, 0xc6,
1171 	0x3504, 0xffffffff, 0x2089d,
1172 	0x3500, 0xffffffff, 0xca,
1173 	0x3504, 0xffffffff, 0x8a1,
1174 	0x3500, 0xffffffff, 0xcc,
1175 	0x3504, 0xffffffff, 0x8a3,
1176 	0x3500, 0xffffffff, 0xce,
1177 	0x3504, 0xffffffff, 0x308a5,
1178 	0x3500, 0xffffffff, 0xd3,
1179 	0x3504, 0xffffffff, 0x6d08cd,
1180 	0x3500, 0xffffffff, 0x142,
1181 	0x3504, 0xffffffff, 0x2000095a,
1182 	0x3504, 0xffffffff, 0x1,
1183 	0x3500, 0xffffffff, 0x144,
1184 	0x3504, 0xffffffff, 0x301f095b,
1185 	0x3500, 0xffffffff, 0x165,
1186 	0x3504, 0xffffffff, 0xc094d,
1187 	0x3500, 0xffffffff, 0x173,
1188 	0x3504, 0xffffffff, 0xf096d,
1189 	0x3500, 0xffffffff, 0x184,
1190 	0x3504, 0xffffffff, 0x15097f,
1191 	0x3500, 0xffffffff, 0x19b,
1192 	0x3504, 0xffffffff, 0xc0998,
1193 	0x3500, 0xffffffff, 0x1a9,
1194 	0x3504, 0xffffffff, 0x409a7,
1195 	0x3500, 0xffffffff, 0x1af,
1196 	0x3504, 0xffffffff, 0xcdc,
1197 	0x3500, 0xffffffff, 0x1b1,
1198 	0x3504, 0xffffffff, 0x800,
1199 	0x3508, 0xffffffff, 0x6c9b2000,
1200 	0x3510, 0xfc00, 0x2000,
1201 	0x3544, 0xffffffff, 0xfc0,
1202 	0x28d4, 0x00000100, 0x100
1203 };
1204 
1205 static void si_init_golden_registers(struct radeon_device *rdev)
1206 {
1207 	switch (rdev->family) {
1208 	case CHIP_TAHITI:
1209 		radeon_program_register_sequence(rdev,
1210 						 tahiti_golden_registers,
1211 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1212 		radeon_program_register_sequence(rdev,
1213 						 tahiti_golden_rlc_registers,
1214 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1215 		radeon_program_register_sequence(rdev,
1216 						 tahiti_mgcg_cgcg_init,
1217 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1218 		radeon_program_register_sequence(rdev,
1219 						 tahiti_golden_registers2,
1220 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1221 		break;
1222 	case CHIP_PITCAIRN:
1223 		radeon_program_register_sequence(rdev,
1224 						 pitcairn_golden_registers,
1225 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1226 		radeon_program_register_sequence(rdev,
1227 						 pitcairn_golden_rlc_registers,
1228 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 pitcairn_mgcg_cgcg_init,
1231 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1232 		break;
1233 	case CHIP_VERDE:
1234 		radeon_program_register_sequence(rdev,
1235 						 verde_golden_registers,
1236 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1237 		radeon_program_register_sequence(rdev,
1238 						 verde_golden_rlc_registers,
1239 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 verde_mgcg_cgcg_init,
1242 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1243 		radeon_program_register_sequence(rdev,
1244 						 verde_pg_init,
1245 						 (const u32)ARRAY_SIZE(verde_pg_init));
1246 		break;
1247 	case CHIP_OLAND:
1248 		radeon_program_register_sequence(rdev,
1249 						 oland_golden_registers,
1250 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1251 		radeon_program_register_sequence(rdev,
1252 						 oland_golden_rlc_registers,
1253 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 oland_mgcg_cgcg_init,
1256 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1257 		break;
1258 	case CHIP_HAINAN:
1259 		radeon_program_register_sequence(rdev,
1260 						 hainan_golden_registers,
1261 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1262 		radeon_program_register_sequence(rdev,
1263 						 hainan_golden_registers2,
1264 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1265 		radeon_program_register_sequence(rdev,
1266 						 hainan_mgcg_cgcg_init,
1267 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1268 		break;
1269 	default:
1270 		break;
1271 	}
1272 }
1273 
1274 /**
1275  * si_get_allowed_info_register - fetch the register for the info ioctl
1276  *
1277  * @rdev: radeon_device pointer
1278  * @reg: register offset in bytes
1279  * @val: register value
1280  *
1281  * Returns 0 for success or -EINVAL for an invalid register
1282  *
1283  */
1284 int si_get_allowed_info_register(struct radeon_device *rdev,
1285 				 u32 reg, u32 *val)
1286 {
1287 	switch (reg) {
1288 	case GRBM_STATUS:
1289 	case GRBM_STATUS2:
1290 	case GRBM_STATUS_SE0:
1291 	case GRBM_STATUS_SE1:
1292 	case SRBM_STATUS:
1293 	case SRBM_STATUS2:
1294 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1295 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1296 	case UVD_STATUS:
1297 		*val = RREG32(reg);
1298 		return 0;
1299 	default:
1300 		return -EINVAL;
1301 	}
1302 }
1303 
1304 #define PCIE_BUS_CLK                10000
1305 #define TCLK                        (PCIE_BUS_CLK / 10)
1306 
1307 /**
1308  * si_get_xclk - get the xclk
1309  *
1310  * @rdev: radeon_device pointer
1311  *
1312  * Returns the reference clock used by the gfx engine
1313  * (SI).
1314  */
1315 u32 si_get_xclk(struct radeon_device *rdev)
1316 {
1317 	u32 reference_clock = rdev->clock.spll.reference_freq;
1318 	u32 tmp;
1319 
1320 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1321 	if (tmp & MUX_TCLK_TO_XCLK)
1322 		return TCLK;
1323 
1324 	tmp = RREG32(CG_CLKPIN_CNTL);
1325 	if (tmp & XTALIN_DIVIDE)
1326 		return reference_clock / 4;
1327 
1328 	return reference_clock;
1329 }
1330 
1331 /* get temperature in millidegrees */
1332 int si_get_temp(struct radeon_device *rdev)
1333 {
1334 	u32 temp;
1335 	int actual_temp = 0;
1336 
1337 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1338 		CTF_TEMP_SHIFT;
1339 
1340 	if (temp & 0x200)
1341 		actual_temp = 255;
1342 	else
1343 		actual_temp = temp & 0x1ff;
1344 
1345 	actual_temp = (actual_temp * 1000);
1346 
1347 	return actual_temp;
1348 }
1349 
1350 #define TAHITI_IO_MC_REGS_SIZE 36
1351 
1352 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1353 	{0x0000006f, 0x03044000},
1354 	{0x00000070, 0x0480c018},
1355 	{0x00000071, 0x00000040},
1356 	{0x00000072, 0x01000000},
1357 	{0x00000074, 0x000000ff},
1358 	{0x00000075, 0x00143400},
1359 	{0x00000076, 0x08ec0800},
1360 	{0x00000077, 0x040000cc},
1361 	{0x00000079, 0x00000000},
1362 	{0x0000007a, 0x21000409},
1363 	{0x0000007c, 0x00000000},
1364 	{0x0000007d, 0xe8000000},
1365 	{0x0000007e, 0x044408a8},
1366 	{0x0000007f, 0x00000003},
1367 	{0x00000080, 0x00000000},
1368 	{0x00000081, 0x01000000},
1369 	{0x00000082, 0x02000000},
1370 	{0x00000083, 0x00000000},
1371 	{0x00000084, 0xe3f3e4f4},
1372 	{0x00000085, 0x00052024},
1373 	{0x00000087, 0x00000000},
1374 	{0x00000088, 0x66036603},
1375 	{0x00000089, 0x01000000},
1376 	{0x0000008b, 0x1c0a0000},
1377 	{0x0000008c, 0xff010000},
1378 	{0x0000008e, 0xffffefff},
1379 	{0x0000008f, 0xfff3efff},
1380 	{0x00000090, 0xfff3efbf},
1381 	{0x00000094, 0x00101101},
1382 	{0x00000095, 0x00000fff},
1383 	{0x00000096, 0x00116fff},
1384 	{0x00000097, 0x60010000},
1385 	{0x00000098, 0x10010000},
1386 	{0x00000099, 0x00006000},
1387 	{0x0000009a, 0x00001000},
1388 	{0x0000009f, 0x00a77400}
1389 };
1390 
1391 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1392 	{0x0000006f, 0x03044000},
1393 	{0x00000070, 0x0480c018},
1394 	{0x00000071, 0x00000040},
1395 	{0x00000072, 0x01000000},
1396 	{0x00000074, 0x000000ff},
1397 	{0x00000075, 0x00143400},
1398 	{0x00000076, 0x08ec0800},
1399 	{0x00000077, 0x040000cc},
1400 	{0x00000079, 0x00000000},
1401 	{0x0000007a, 0x21000409},
1402 	{0x0000007c, 0x00000000},
1403 	{0x0000007d, 0xe8000000},
1404 	{0x0000007e, 0x044408a8},
1405 	{0x0000007f, 0x00000003},
1406 	{0x00000080, 0x00000000},
1407 	{0x00000081, 0x01000000},
1408 	{0x00000082, 0x02000000},
1409 	{0x00000083, 0x00000000},
1410 	{0x00000084, 0xe3f3e4f4},
1411 	{0x00000085, 0x00052024},
1412 	{0x00000087, 0x00000000},
1413 	{0x00000088, 0x66036603},
1414 	{0x00000089, 0x01000000},
1415 	{0x0000008b, 0x1c0a0000},
1416 	{0x0000008c, 0xff010000},
1417 	{0x0000008e, 0xffffefff},
1418 	{0x0000008f, 0xfff3efff},
1419 	{0x00000090, 0xfff3efbf},
1420 	{0x00000094, 0x00101101},
1421 	{0x00000095, 0x00000fff},
1422 	{0x00000096, 0x00116fff},
1423 	{0x00000097, 0x60010000},
1424 	{0x00000098, 0x10010000},
1425 	{0x00000099, 0x00006000},
1426 	{0x0000009a, 0x00001000},
1427 	{0x0000009f, 0x00a47400}
1428 };
1429 
1430 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1431 	{0x0000006f, 0x03044000},
1432 	{0x00000070, 0x0480c018},
1433 	{0x00000071, 0x00000040},
1434 	{0x00000072, 0x01000000},
1435 	{0x00000074, 0x000000ff},
1436 	{0x00000075, 0x00143400},
1437 	{0x00000076, 0x08ec0800},
1438 	{0x00000077, 0x040000cc},
1439 	{0x00000079, 0x00000000},
1440 	{0x0000007a, 0x21000409},
1441 	{0x0000007c, 0x00000000},
1442 	{0x0000007d, 0xe8000000},
1443 	{0x0000007e, 0x044408a8},
1444 	{0x0000007f, 0x00000003},
1445 	{0x00000080, 0x00000000},
1446 	{0x00000081, 0x01000000},
1447 	{0x00000082, 0x02000000},
1448 	{0x00000083, 0x00000000},
1449 	{0x00000084, 0xe3f3e4f4},
1450 	{0x00000085, 0x00052024},
1451 	{0x00000087, 0x00000000},
1452 	{0x00000088, 0x66036603},
1453 	{0x00000089, 0x01000000},
1454 	{0x0000008b, 0x1c0a0000},
1455 	{0x0000008c, 0xff010000},
1456 	{0x0000008e, 0xffffefff},
1457 	{0x0000008f, 0xfff3efff},
1458 	{0x00000090, 0xfff3efbf},
1459 	{0x00000094, 0x00101101},
1460 	{0x00000095, 0x00000fff},
1461 	{0x00000096, 0x00116fff},
1462 	{0x00000097, 0x60010000},
1463 	{0x00000098, 0x10010000},
1464 	{0x00000099, 0x00006000},
1465 	{0x0000009a, 0x00001000},
1466 	{0x0000009f, 0x00a37400}
1467 };
1468 
1469 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1470 	{0x0000006f, 0x03044000},
1471 	{0x00000070, 0x0480c018},
1472 	{0x00000071, 0x00000040},
1473 	{0x00000072, 0x01000000},
1474 	{0x00000074, 0x000000ff},
1475 	{0x00000075, 0x00143400},
1476 	{0x00000076, 0x08ec0800},
1477 	{0x00000077, 0x040000cc},
1478 	{0x00000079, 0x00000000},
1479 	{0x0000007a, 0x21000409},
1480 	{0x0000007c, 0x00000000},
1481 	{0x0000007d, 0xe8000000},
1482 	{0x0000007e, 0x044408a8},
1483 	{0x0000007f, 0x00000003},
1484 	{0x00000080, 0x00000000},
1485 	{0x00000081, 0x01000000},
1486 	{0x00000082, 0x02000000},
1487 	{0x00000083, 0x00000000},
1488 	{0x00000084, 0xe3f3e4f4},
1489 	{0x00000085, 0x00052024},
1490 	{0x00000087, 0x00000000},
1491 	{0x00000088, 0x66036603},
1492 	{0x00000089, 0x01000000},
1493 	{0x0000008b, 0x1c0a0000},
1494 	{0x0000008c, 0xff010000},
1495 	{0x0000008e, 0xffffefff},
1496 	{0x0000008f, 0xfff3efff},
1497 	{0x00000090, 0xfff3efbf},
1498 	{0x00000094, 0x00101101},
1499 	{0x00000095, 0x00000fff},
1500 	{0x00000096, 0x00116fff},
1501 	{0x00000097, 0x60010000},
1502 	{0x00000098, 0x10010000},
1503 	{0x00000099, 0x00006000},
1504 	{0x0000009a, 0x00001000},
1505 	{0x0000009f, 0x00a17730}
1506 };
1507 
1508 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1509 	{0x0000006f, 0x03044000},
1510 	{0x00000070, 0x0480c018},
1511 	{0x00000071, 0x00000040},
1512 	{0x00000072, 0x01000000},
1513 	{0x00000074, 0x000000ff},
1514 	{0x00000075, 0x00143400},
1515 	{0x00000076, 0x08ec0800},
1516 	{0x00000077, 0x040000cc},
1517 	{0x00000079, 0x00000000},
1518 	{0x0000007a, 0x21000409},
1519 	{0x0000007c, 0x00000000},
1520 	{0x0000007d, 0xe8000000},
1521 	{0x0000007e, 0x044408a8},
1522 	{0x0000007f, 0x00000003},
1523 	{0x00000080, 0x00000000},
1524 	{0x00000081, 0x01000000},
1525 	{0x00000082, 0x02000000},
1526 	{0x00000083, 0x00000000},
1527 	{0x00000084, 0xe3f3e4f4},
1528 	{0x00000085, 0x00052024},
1529 	{0x00000087, 0x00000000},
1530 	{0x00000088, 0x66036603},
1531 	{0x00000089, 0x01000000},
1532 	{0x0000008b, 0x1c0a0000},
1533 	{0x0000008c, 0xff010000},
1534 	{0x0000008e, 0xffffefff},
1535 	{0x0000008f, 0xfff3efff},
1536 	{0x00000090, 0xfff3efbf},
1537 	{0x00000094, 0x00101101},
1538 	{0x00000095, 0x00000fff},
1539 	{0x00000096, 0x00116fff},
1540 	{0x00000097, 0x60010000},
1541 	{0x00000098, 0x10010000},
1542 	{0x00000099, 0x00006000},
1543 	{0x0000009a, 0x00001000},
1544 	{0x0000009f, 0x00a07730}
1545 };
1546 
1547 /* ucode loading */
1548 int si_mc_load_microcode(struct radeon_device *rdev)
1549 {
1550 	const __be32 *fw_data = NULL;
1551 	const __le32 *new_fw_data = NULL;
1552 	u32 running;
1553 	u32 *io_mc_regs = NULL;
1554 	const __le32 *new_io_mc_regs = NULL;
1555 	int i, regs_size, ucode_size;
1556 
1557 	if (!rdev->mc_fw)
1558 		return -EINVAL;
1559 
1560 	if (rdev->new_fw) {
1561 		const struct mc_firmware_header_v1_0 *hdr =
1562 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1563 
1564 		radeon_ucode_print_mc_hdr(&hdr->header);
1565 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1566 		new_io_mc_regs = (const __le32 *)
1567 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1568 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1569 		new_fw_data = (const __le32 *)
1570 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1571 	} else {
1572 		ucode_size = rdev->mc_fw->size / 4;
1573 
1574 		switch (rdev->family) {
1575 		case CHIP_TAHITI:
1576 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1577 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1578 			break;
1579 		case CHIP_PITCAIRN:
1580 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1581 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1582 			break;
1583 		case CHIP_VERDE:
1584 		default:
1585 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1586 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1587 			break;
1588 		case CHIP_OLAND:
1589 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1590 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1591 			break;
1592 		case CHIP_HAINAN:
1593 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1594 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1595 			break;
1596 		}
1597 		fw_data = (const __be32 *)rdev->mc_fw->data;
1598 	}
1599 
1600 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1601 
1602 	if (running == 0) {
1603 		/* reset the engine and set to writable */
1604 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1605 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1606 
1607 		/* load mc io regs */
1608 		for (i = 0; i < regs_size; i++) {
1609 			if (rdev->new_fw) {
1610 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1611 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1612 			} else {
1613 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1614 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1615 			}
1616 		}
1617 		/* load the MC ucode */
1618 		for (i = 0; i < ucode_size; i++) {
1619 			if (rdev->new_fw)
1620 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1621 			else
1622 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1623 		}
1624 
1625 		/* put the engine back into the active state */
1626 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1627 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1628 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1629 
1630 		/* wait for training to complete */
1631 		for (i = 0; i < rdev->usec_timeout; i++) {
1632 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1633 				break;
1634 			udelay(1);
1635 		}
1636 		for (i = 0; i < rdev->usec_timeout; i++) {
1637 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1638 				break;
1639 			udelay(1);
1640 		}
1641 	}
1642 
1643 	return 0;
1644 }
1645 
1646 static int si_init_microcode(struct radeon_device *rdev)
1647 {
1648 	const char *chip_name;
1649 	const char *new_chip_name;
1650 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1651 	size_t smc_req_size, mc2_req_size;
1652 	char fw_name[30];
1653 	int err;
1654 	int new_fw = 0;
1655 	bool new_smc = false;
1656 	bool si58_fw = false;
1657 	bool banks2_fw = false;
1658 
1659 	DRM_DEBUG("\n");
1660 
1661 	switch (rdev->family) {
1662 	case CHIP_TAHITI:
1663 		chip_name = "TAHITI";
1664 		new_chip_name = "tahiti";
1665 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1667 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1668 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1670 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1671 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1672 		break;
1673 	case CHIP_PITCAIRN:
1674 		chip_name = "PITCAIRN";
1675 		if ((rdev->pdev->revision == 0x81) &&
1676 		    ((rdev->pdev->device == 0x6810) ||
1677 		     (rdev->pdev->device == 0x6811)))
1678 			new_smc = true;
1679 		new_chip_name = "pitcairn";
1680 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1681 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1682 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1683 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1684 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1685 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1686 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1687 		break;
1688 	case CHIP_VERDE:
1689 		chip_name = "VERDE";
1690 		if (((rdev->pdev->device == 0x6820) &&
1691 		     ((rdev->pdev->revision == 0x81) ||
1692 		      (rdev->pdev->revision == 0x83))) ||
1693 		    ((rdev->pdev->device == 0x6821) &&
1694 		     ((rdev->pdev->revision == 0x83) ||
1695 		      (rdev->pdev->revision == 0x87))) ||
1696 		    ((rdev->pdev->revision == 0x87) &&
1697 		     ((rdev->pdev->device == 0x6823) ||
1698 		      (rdev->pdev->device == 0x682b))))
1699 			new_smc = true;
1700 		new_chip_name = "verde";
1701 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1702 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1703 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1704 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1705 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1706 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1707 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1708 		break;
1709 	case CHIP_OLAND:
1710 		chip_name = "OLAND";
1711 		if (((rdev->pdev->revision == 0x81) &&
1712 		     ((rdev->pdev->device == 0x6600) ||
1713 		      (rdev->pdev->device == 0x6604) ||
1714 		      (rdev->pdev->device == 0x6605) ||
1715 		      (rdev->pdev->device == 0x6610))) ||
1716 		    ((rdev->pdev->revision == 0x83) &&
1717 		     (rdev->pdev->device == 0x6610)))
1718 			new_smc = true;
1719 		new_chip_name = "oland";
1720 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1721 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1722 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1723 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1724 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1725 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1726 		break;
1727 	case CHIP_HAINAN:
1728 		chip_name = "HAINAN";
1729 		if (((rdev->pdev->revision == 0x81) &&
1730 		     (rdev->pdev->device == 0x6660)) ||
1731 		    ((rdev->pdev->revision == 0x83) &&
1732 		     ((rdev->pdev->device == 0x6660) ||
1733 		      (rdev->pdev->device == 0x6663) ||
1734 		      (rdev->pdev->device == 0x6665) ||
1735 		      (rdev->pdev->device == 0x6667))))
1736 			new_smc = true;
1737 		else if ((rdev->pdev->revision == 0xc3) &&
1738 			 (rdev->pdev->device == 0x6665))
1739 			banks2_fw = true;
1740 		new_chip_name = "hainan";
1741 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1742 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1743 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1744 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1745 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1746 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1747 		break;
1748 	default: BUG();
1749 	}
1750 
1751 	/* this memory configuration requires special firmware */
1752 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1753 		si58_fw = true;
1754 
1755 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1756 
1757 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1758 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1759 	if (err) {
1760 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1761 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1762 		if (err)
1763 			goto out;
1764 		if (rdev->pfp_fw->size != pfp_req_size) {
1765 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1766 			       rdev->pfp_fw->size, fw_name);
1767 			err = -EINVAL;
1768 			goto out;
1769 		}
1770 	} else {
1771 		err = radeon_ucode_validate(rdev->pfp_fw);
1772 		if (err) {
1773 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1774 			       fw_name);
1775 			goto out;
1776 		} else {
1777 			new_fw++;
1778 		}
1779 	}
1780 
1781 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1782 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1783 	if (err) {
1784 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1785 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1786 		if (err)
1787 			goto out;
1788 		if (rdev->me_fw->size != me_req_size) {
1789 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1790 			       rdev->me_fw->size, fw_name);
1791 			err = -EINVAL;
1792 		}
1793 	} else {
1794 		err = radeon_ucode_validate(rdev->me_fw);
1795 		if (err) {
1796 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797 			       fw_name);
1798 			goto out;
1799 		} else {
1800 			new_fw++;
1801 		}
1802 	}
1803 
1804 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1805 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1806 	if (err) {
1807 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1808 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1809 		if (err)
1810 			goto out;
1811 		if (rdev->ce_fw->size != ce_req_size) {
1812 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813 			       rdev->ce_fw->size, fw_name);
1814 			err = -EINVAL;
1815 		}
1816 	} else {
1817 		err = radeon_ucode_validate(rdev->ce_fw);
1818 		if (err) {
1819 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820 			       fw_name);
1821 			goto out;
1822 		} else {
1823 			new_fw++;
1824 		}
1825 	}
1826 
1827 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1828 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1829 	if (err) {
1830 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1831 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1832 		if (err)
1833 			goto out;
1834 		if (rdev->rlc_fw->size != rlc_req_size) {
1835 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1836 			       rdev->rlc_fw->size, fw_name);
1837 			err = -EINVAL;
1838 		}
1839 	} else {
1840 		err = radeon_ucode_validate(rdev->rlc_fw);
1841 		if (err) {
1842 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843 			       fw_name);
1844 			goto out;
1845 		} else {
1846 			new_fw++;
1847 		}
1848 	}
1849 
1850 	if (si58_fw)
1851 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1852 	else
1853 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1854 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1855 	if (err) {
1856 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1857 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1858 		if (err) {
1859 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1860 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1861 			if (err)
1862 				goto out;
1863 		}
1864 		if ((rdev->mc_fw->size != mc_req_size) &&
1865 		    (rdev->mc_fw->size != mc2_req_size)) {
1866 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1867 			       rdev->mc_fw->size, fw_name);
1868 			err = -EINVAL;
1869 		}
1870 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1871 	} else {
1872 		err = radeon_ucode_validate(rdev->mc_fw);
1873 		if (err) {
1874 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1875 			       fw_name);
1876 			goto out;
1877 		} else {
1878 			new_fw++;
1879 		}
1880 	}
1881 
1882 	if (banks2_fw)
1883 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1884 	else if (new_smc)
1885 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1886 	else
1887 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1888 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1889 	if (err) {
1890 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1891 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1892 		if (err) {
1893 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1894 			release_firmware(rdev->smc_fw);
1895 			rdev->smc_fw = NULL;
1896 			err = 0;
1897 		} else if (rdev->smc_fw->size != smc_req_size) {
1898 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1899 			       rdev->smc_fw->size, fw_name);
1900 			err = -EINVAL;
1901 		}
1902 	} else {
1903 		err = radeon_ucode_validate(rdev->smc_fw);
1904 		if (err) {
1905 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1906 			       fw_name);
1907 			goto out;
1908 		} else {
1909 			new_fw++;
1910 		}
1911 	}
1912 
1913 	if (new_fw == 0) {
1914 		rdev->new_fw = false;
1915 	} else if (new_fw < 6) {
1916 		pr_err("si_fw: mixing new and old firmware!\n");
1917 		err = -EINVAL;
1918 	} else {
1919 		rdev->new_fw = true;
1920 	}
1921 out:
1922 	if (err) {
1923 		if (err != -EINVAL)
1924 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1925 			       fw_name);
1926 		release_firmware(rdev->pfp_fw);
1927 		rdev->pfp_fw = NULL;
1928 		release_firmware(rdev->me_fw);
1929 		rdev->me_fw = NULL;
1930 		release_firmware(rdev->ce_fw);
1931 		rdev->ce_fw = NULL;
1932 		release_firmware(rdev->rlc_fw);
1933 		rdev->rlc_fw = NULL;
1934 		release_firmware(rdev->mc_fw);
1935 		rdev->mc_fw = NULL;
1936 		release_firmware(rdev->smc_fw);
1937 		rdev->smc_fw = NULL;
1938 	}
1939 	return err;
1940 }
1941 
1942 /* watermark setup */
1943 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1944 				   struct radeon_crtc *radeon_crtc,
1945 				   struct drm_display_mode *mode,
1946 				   struct drm_display_mode *other_mode)
1947 {
1948 	u32 tmp, buffer_alloc, i;
1949 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1950 	/*
1951 	 * Line Buffer Setup
1952 	 * There are 3 line buffers, each one shared by 2 display controllers.
1953 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1954 	 * the display controllers.  The paritioning is done via one of four
1955 	 * preset allocations specified in bits 21:20:
1956 	 *  0 - half lb
1957 	 *  2 - whole lb, other crtc must be disabled
1958 	 */
1959 	/* this can get tricky if we have two large displays on a paired group
1960 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1961 	 * non-linked crtcs for maximum line buffer allocation.
1962 	 */
1963 	if (radeon_crtc->base.enabled && mode) {
1964 		if (other_mode) {
1965 			tmp = 0; /* 1/2 */
1966 			buffer_alloc = 1;
1967 		} else {
1968 			tmp = 2; /* whole */
1969 			buffer_alloc = 2;
1970 		}
1971 	} else {
1972 		tmp = 0;
1973 		buffer_alloc = 0;
1974 	}
1975 
1976 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1977 	       DC_LB_MEMORY_CONFIG(tmp));
1978 
1979 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1980 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1981 	for (i = 0; i < rdev->usec_timeout; i++) {
1982 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1983 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1984 			break;
1985 		udelay(1);
1986 	}
1987 
1988 	if (radeon_crtc->base.enabled && mode) {
1989 		switch (tmp) {
1990 		case 0:
1991 		default:
1992 			return 4096 * 2;
1993 		case 2:
1994 			return 8192 * 2;
1995 		}
1996 	}
1997 
1998 	/* controller not enabled, so no lb used */
1999 	return 0;
2000 }
2001 
2002 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2003 {
2004 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2005 
2006 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2007 	case 0:
2008 	default:
2009 		return 1;
2010 	case 1:
2011 		return 2;
2012 	case 2:
2013 		return 4;
2014 	case 3:
2015 		return 8;
2016 	case 4:
2017 		return 3;
2018 	case 5:
2019 		return 6;
2020 	case 6:
2021 		return 10;
2022 	case 7:
2023 		return 12;
2024 	case 8:
2025 		return 16;
2026 	}
2027 }
2028 
2029 struct dce6_wm_params {
2030 	u32 dram_channels; /* number of dram channels */
2031 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2032 	u32 sclk;          /* engine clock in kHz */
2033 	u32 disp_clk;      /* display clock in kHz */
2034 	u32 src_width;     /* viewport width */
2035 	u32 active_time;   /* active display time in ns */
2036 	u32 blank_time;    /* blank time in ns */
2037 	bool interlaced;    /* mode is interlaced */
2038 	fixed20_12 vsc;    /* vertical scale ratio */
2039 	u32 num_heads;     /* number of active crtcs */
2040 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2041 	u32 lb_size;       /* line buffer allocated to pipe */
2042 	u32 vtaps;         /* vertical scaler taps */
2043 };
2044 
2045 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2046 {
2047 	/* Calculate raw DRAM Bandwidth */
2048 	fixed20_12 dram_efficiency; /* 0.7 */
2049 	fixed20_12 yclk, dram_channels, bandwidth;
2050 	fixed20_12 a;
2051 
2052 	a.full = dfixed_const(1000);
2053 	yclk.full = dfixed_const(wm->yclk);
2054 	yclk.full = dfixed_div(yclk, a);
2055 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2056 	a.full = dfixed_const(10);
2057 	dram_efficiency.full = dfixed_const(7);
2058 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2059 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2060 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2061 
2062 	return dfixed_trunc(bandwidth);
2063 }
2064 
2065 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2066 {
2067 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2068 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2069 	fixed20_12 yclk, dram_channels, bandwidth;
2070 	fixed20_12 a;
2071 
2072 	a.full = dfixed_const(1000);
2073 	yclk.full = dfixed_const(wm->yclk);
2074 	yclk.full = dfixed_div(yclk, a);
2075 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2076 	a.full = dfixed_const(10);
2077 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2078 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2079 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2080 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2081 
2082 	return dfixed_trunc(bandwidth);
2083 }
2084 
2085 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2086 {
2087 	/* Calculate the display Data return Bandwidth */
2088 	fixed20_12 return_efficiency; /* 0.8 */
2089 	fixed20_12 sclk, bandwidth;
2090 	fixed20_12 a;
2091 
2092 	a.full = dfixed_const(1000);
2093 	sclk.full = dfixed_const(wm->sclk);
2094 	sclk.full = dfixed_div(sclk, a);
2095 	a.full = dfixed_const(10);
2096 	return_efficiency.full = dfixed_const(8);
2097 	return_efficiency.full = dfixed_div(return_efficiency, a);
2098 	a.full = dfixed_const(32);
2099 	bandwidth.full = dfixed_mul(a, sclk);
2100 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2101 
2102 	return dfixed_trunc(bandwidth);
2103 }
2104 
2105 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2106 {
2107 	return 32;
2108 }
2109 
2110 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2111 {
2112 	/* Calculate the DMIF Request Bandwidth */
2113 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2114 	fixed20_12 disp_clk, sclk, bandwidth;
2115 	fixed20_12 a, b1, b2;
2116 	u32 min_bandwidth;
2117 
2118 	a.full = dfixed_const(1000);
2119 	disp_clk.full = dfixed_const(wm->disp_clk);
2120 	disp_clk.full = dfixed_div(disp_clk, a);
2121 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2122 	b1.full = dfixed_mul(a, disp_clk);
2123 
2124 	a.full = dfixed_const(1000);
2125 	sclk.full = dfixed_const(wm->sclk);
2126 	sclk.full = dfixed_div(sclk, a);
2127 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2128 	b2.full = dfixed_mul(a, sclk);
2129 
2130 	a.full = dfixed_const(10);
2131 	disp_clk_request_efficiency.full = dfixed_const(8);
2132 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2133 
2134 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2135 
2136 	a.full = dfixed_const(min_bandwidth);
2137 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2138 
2139 	return dfixed_trunc(bandwidth);
2140 }
2141 
2142 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2143 {
2144 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2145 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2146 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2147 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2148 
2149 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2150 }
2151 
2152 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2153 {
2154 	/* Calculate the display mode Average Bandwidth
2155 	 * DisplayMode should contain the source and destination dimensions,
2156 	 * timing, etc.
2157 	 */
2158 	fixed20_12 bpp;
2159 	fixed20_12 line_time;
2160 	fixed20_12 src_width;
2161 	fixed20_12 bandwidth;
2162 	fixed20_12 a;
2163 
2164 	a.full = dfixed_const(1000);
2165 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2166 	line_time.full = dfixed_div(line_time, a);
2167 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2168 	src_width.full = dfixed_const(wm->src_width);
2169 	bandwidth.full = dfixed_mul(src_width, bpp);
2170 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2171 	bandwidth.full = dfixed_div(bandwidth, line_time);
2172 
2173 	return dfixed_trunc(bandwidth);
2174 }
2175 
2176 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2177 {
2178 	/* First calcualte the latency in ns */
2179 	u32 mc_latency = 2000; /* 2000 ns. */
2180 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2181 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2182 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2183 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2184 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2185 		(wm->num_heads * cursor_line_pair_return_time);
2186 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2187 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2188 	u32 tmp, dmif_size = 12288;
2189 	fixed20_12 a, b, c;
2190 
2191 	if (wm->num_heads == 0)
2192 		return 0;
2193 
2194 	a.full = dfixed_const(2);
2195 	b.full = dfixed_const(1);
2196 	if ((wm->vsc.full > a.full) ||
2197 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2198 	    (wm->vtaps >= 5) ||
2199 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2200 		max_src_lines_per_dst_line = 4;
2201 	else
2202 		max_src_lines_per_dst_line = 2;
2203 
2204 	a.full = dfixed_const(available_bandwidth);
2205 	b.full = dfixed_const(wm->num_heads);
2206 	a.full = dfixed_div(a, b);
2207 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2208 	tmp = min(dfixed_trunc(a), tmp);
2209 
2210 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2211 
2212 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2213 	b.full = dfixed_const(1000);
2214 	c.full = dfixed_const(lb_fill_bw);
2215 	b.full = dfixed_div(c, b);
2216 	a.full = dfixed_div(a, b);
2217 	line_fill_time = dfixed_trunc(a);
2218 
2219 	if (line_fill_time < wm->active_time)
2220 		return latency;
2221 	else
2222 		return latency + (line_fill_time - wm->active_time);
2223 
2224 }
2225 
2226 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2227 {
2228 	if (dce6_average_bandwidth(wm) <=
2229 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2230 		return true;
2231 	else
2232 		return false;
2233 };
2234 
2235 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2236 {
2237 	if (dce6_average_bandwidth(wm) <=
2238 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2239 		return true;
2240 	else
2241 		return false;
2242 };
2243 
2244 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2245 {
2246 	u32 lb_partitions = wm->lb_size / wm->src_width;
2247 	u32 line_time = wm->active_time + wm->blank_time;
2248 	u32 latency_tolerant_lines;
2249 	u32 latency_hiding;
2250 	fixed20_12 a;
2251 
2252 	a.full = dfixed_const(1);
2253 	if (wm->vsc.full > a.full)
2254 		latency_tolerant_lines = 1;
2255 	else {
2256 		if (lb_partitions <= (wm->vtaps + 1))
2257 			latency_tolerant_lines = 1;
2258 		else
2259 			latency_tolerant_lines = 2;
2260 	}
2261 
2262 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2263 
2264 	if (dce6_latency_watermark(wm) <= latency_hiding)
2265 		return true;
2266 	else
2267 		return false;
2268 }
2269 
2270 static void dce6_program_watermarks(struct radeon_device *rdev,
2271 					 struct radeon_crtc *radeon_crtc,
2272 					 u32 lb_size, u32 num_heads)
2273 {
2274 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2275 	struct dce6_wm_params wm_low, wm_high;
2276 	u32 dram_channels;
2277 	u32 active_time;
2278 	u32 line_time = 0;
2279 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2280 	u32 priority_a_mark = 0, priority_b_mark = 0;
2281 	u32 priority_a_cnt = PRIORITY_OFF;
2282 	u32 priority_b_cnt = PRIORITY_OFF;
2283 	u32 tmp, arb_control3;
2284 	fixed20_12 a, b, c;
2285 
2286 	if (radeon_crtc->base.enabled && num_heads && mode) {
2287 		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
2288 		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
2289 		priority_a_cnt = 0;
2290 		priority_b_cnt = 0;
2291 
2292 		if (rdev->family == CHIP_ARUBA)
2293 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2294 		else
2295 			dram_channels = si_get_number_of_dram_channels(rdev);
2296 
2297 		/* watermark for high clocks */
2298 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2299 			wm_high.yclk =
2300 				radeon_dpm_get_mclk(rdev, false) * 10;
2301 			wm_high.sclk =
2302 				radeon_dpm_get_sclk(rdev, false) * 10;
2303 		} else {
2304 			wm_high.yclk = rdev->pm.current_mclk * 10;
2305 			wm_high.sclk = rdev->pm.current_sclk * 10;
2306 		}
2307 
2308 		wm_high.disp_clk = mode->clock;
2309 		wm_high.src_width = mode->crtc_hdisplay;
2310 		wm_high.active_time = active_time;
2311 		wm_high.blank_time = line_time - wm_high.active_time;
2312 		wm_high.interlaced = false;
2313 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2314 			wm_high.interlaced = true;
2315 		wm_high.vsc = radeon_crtc->vsc;
2316 		wm_high.vtaps = 1;
2317 		if (radeon_crtc->rmx_type != RMX_OFF)
2318 			wm_high.vtaps = 2;
2319 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2320 		wm_high.lb_size = lb_size;
2321 		wm_high.dram_channels = dram_channels;
2322 		wm_high.num_heads = num_heads;
2323 
2324 		/* watermark for low clocks */
2325 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2326 			wm_low.yclk =
2327 				radeon_dpm_get_mclk(rdev, true) * 10;
2328 			wm_low.sclk =
2329 				radeon_dpm_get_sclk(rdev, true) * 10;
2330 		} else {
2331 			wm_low.yclk = rdev->pm.current_mclk * 10;
2332 			wm_low.sclk = rdev->pm.current_sclk * 10;
2333 		}
2334 
2335 		wm_low.disp_clk = mode->clock;
2336 		wm_low.src_width = mode->crtc_hdisplay;
2337 		wm_low.active_time = active_time;
2338 		wm_low.blank_time = line_time - wm_low.active_time;
2339 		wm_low.interlaced = false;
2340 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2341 			wm_low.interlaced = true;
2342 		wm_low.vsc = radeon_crtc->vsc;
2343 		wm_low.vtaps = 1;
2344 		if (radeon_crtc->rmx_type != RMX_OFF)
2345 			wm_low.vtaps = 2;
2346 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2347 		wm_low.lb_size = lb_size;
2348 		wm_low.dram_channels = dram_channels;
2349 		wm_low.num_heads = num_heads;
2350 
2351 		/* set for high clocks */
2352 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2353 		/* set for low clocks */
2354 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2355 
2356 		/* possibly force display priority to high */
2357 		/* should really do this at mode validation time... */
2358 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2359 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2360 		    !dce6_check_latency_hiding(&wm_high) ||
2361 		    (rdev->disp_priority == 2)) {
2362 			DRM_DEBUG_KMS("force priority to high\n");
2363 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2364 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2365 		}
2366 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2367 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2368 		    !dce6_check_latency_hiding(&wm_low) ||
2369 		    (rdev->disp_priority == 2)) {
2370 			DRM_DEBUG_KMS("force priority to high\n");
2371 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2372 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2373 		}
2374 
2375 		a.full = dfixed_const(1000);
2376 		b.full = dfixed_const(mode->clock);
2377 		b.full = dfixed_div(b, a);
2378 		c.full = dfixed_const(latency_watermark_a);
2379 		c.full = dfixed_mul(c, b);
2380 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2381 		c.full = dfixed_div(c, a);
2382 		a.full = dfixed_const(16);
2383 		c.full = dfixed_div(c, a);
2384 		priority_a_mark = dfixed_trunc(c);
2385 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2386 
2387 		a.full = dfixed_const(1000);
2388 		b.full = dfixed_const(mode->clock);
2389 		b.full = dfixed_div(b, a);
2390 		c.full = dfixed_const(latency_watermark_b);
2391 		c.full = dfixed_mul(c, b);
2392 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2393 		c.full = dfixed_div(c, a);
2394 		a.full = dfixed_const(16);
2395 		c.full = dfixed_div(c, a);
2396 		priority_b_mark = dfixed_trunc(c);
2397 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2398 
2399 		/* Save number of lines the linebuffer leads before the scanout */
2400 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2401 	}
2402 
2403 	/* select wm A */
2404 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2405 	tmp = arb_control3;
2406 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2407 	tmp |= LATENCY_WATERMARK_MASK(1);
2408 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2409 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2410 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2411 		LATENCY_HIGH_WATERMARK(line_time)));
2412 	/* select wm B */
2413 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2414 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2415 	tmp |= LATENCY_WATERMARK_MASK(2);
2416 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2417 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2418 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2419 		LATENCY_HIGH_WATERMARK(line_time)));
2420 	/* restore original selection */
2421 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2422 
2423 	/* write the priority marks */
2424 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2425 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2426 
2427 	/* save values for DPM */
2428 	radeon_crtc->line_time = line_time;
2429 	radeon_crtc->wm_high = latency_watermark_a;
2430 	radeon_crtc->wm_low = latency_watermark_b;
2431 }
2432 
2433 void dce6_bandwidth_update(struct radeon_device *rdev)
2434 {
2435 	struct drm_display_mode *mode0 = NULL;
2436 	struct drm_display_mode *mode1 = NULL;
2437 	u32 num_heads = 0, lb_size;
2438 	int i;
2439 
2440 	if (!rdev->mode_info.mode_config_initialized)
2441 		return;
2442 
2443 	radeon_update_display_priority(rdev);
2444 
2445 	for (i = 0; i < rdev->num_crtc; i++) {
2446 		if (rdev->mode_info.crtcs[i]->base.enabled)
2447 			num_heads++;
2448 	}
2449 	for (i = 0; i < rdev->num_crtc; i += 2) {
2450 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2451 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2452 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2453 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2454 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2455 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2456 	}
2457 }
2458 
2459 /*
2460  * Core functions
2461  */
2462 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2463 {
2464 	u32 *tile = rdev->config.si.tile_mode_array;
2465 	const u32 num_tile_mode_states =
2466 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2467 	u32 reg_offset, split_equal_to_row_size;
2468 
2469 	switch (rdev->config.si.mem_row_size_in_kb) {
2470 	case 1:
2471 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2472 		break;
2473 	case 2:
2474 	default:
2475 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2476 		break;
2477 	case 4:
2478 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2479 		break;
2480 	}
2481 
2482 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2483 		tile[reg_offset] = 0;
2484 
2485 	switch(rdev->family) {
2486 	case CHIP_TAHITI:
2487 	case CHIP_PITCAIRN:
2488 		/* non-AA compressed depth or any compressed stencil */
2489 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2490 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2491 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2492 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2493 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2494 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2495 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2496 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2497 		/* 2xAA/4xAA compressed depth only */
2498 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2499 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2500 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2501 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2502 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2503 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2504 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2505 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2506 		/* 8xAA compressed depth only */
2507 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2508 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2509 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2510 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2511 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2512 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2513 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2514 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2515 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2516 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2517 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2518 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2519 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2520 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2521 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2522 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2523 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2524 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2525 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2526 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2527 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2528 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2529 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2530 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2531 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2532 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2533 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2534 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2535 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2536 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2537 			   TILE_SPLIT(split_equal_to_row_size) |
2538 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2539 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2540 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2541 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2542 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2543 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2544 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2545 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2546 			   TILE_SPLIT(split_equal_to_row_size) |
2547 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2548 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2549 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2550 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2551 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2552 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2553 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2554 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2555 			   TILE_SPLIT(split_equal_to_row_size) |
2556 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2557 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2558 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2559 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2560 		/* 1D and 1D Array Surfaces */
2561 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2562 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2563 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2564 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2565 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2566 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2567 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2568 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2569 		/* Displayable maps. */
2570 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2571 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2572 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2573 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2574 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2575 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2577 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2578 		/* Display 8bpp. */
2579 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2580 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2581 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2582 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2583 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2584 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2585 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2586 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2587 		/* Display 16bpp. */
2588 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2589 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2590 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2591 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2592 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2593 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2594 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2595 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2596 		/* Display 32bpp. */
2597 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2598 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2599 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2600 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2601 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2602 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2603 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2604 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2605 		/* Thin. */
2606 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2607 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2608 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2609 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2610 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2611 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2612 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2613 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2614 		/* Thin 8 bpp. */
2615 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2616 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2617 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2618 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2619 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2620 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2621 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2622 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2623 		/* Thin 16 bpp. */
2624 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2625 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2626 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2627 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2628 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2629 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2630 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2631 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2632 		/* Thin 32 bpp. */
2633 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2634 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2635 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2636 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2637 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2638 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2639 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2640 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2641 		/* Thin 64 bpp. */
2642 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2643 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2644 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2645 			   TILE_SPLIT(split_equal_to_row_size) |
2646 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2647 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2648 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2649 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2650 		/* 8 bpp PRT. */
2651 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2652 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2653 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2654 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2655 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2656 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2657 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2658 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2659 		/* 16 bpp PRT */
2660 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2662 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2663 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2664 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2665 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2667 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2668 		/* 32 bpp PRT */
2669 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2670 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2671 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2672 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2673 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2674 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2675 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2676 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2677 		/* 64 bpp PRT */
2678 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2679 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2680 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2681 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2682 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2683 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2684 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2685 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2686 		/* 128 bpp PRT */
2687 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2688 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2689 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2690 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2691 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2692 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2693 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2694 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2695 
2696 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2697 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2698 		break;
2699 
2700 	case CHIP_VERDE:
2701 	case CHIP_OLAND:
2702 	case CHIP_HAINAN:
2703 		/* non-AA compressed depth or any compressed stencil */
2704 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2705 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2706 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2707 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2708 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2709 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2710 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2711 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2712 		/* 2xAA/4xAA compressed depth only */
2713 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2714 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2715 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2716 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2717 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2718 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2719 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2720 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2721 		/* 8xAA compressed depth only */
2722 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2723 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2724 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2725 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2726 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2727 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2728 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2729 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2730 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2731 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2732 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2733 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2734 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2735 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2736 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2737 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2738 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2739 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2740 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2741 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2742 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2743 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2744 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2745 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2746 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2747 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2748 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2749 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2750 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2751 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2752 			   TILE_SPLIT(split_equal_to_row_size) |
2753 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2754 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2755 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2756 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2757 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2758 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2759 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2760 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2761 			   TILE_SPLIT(split_equal_to_row_size) |
2762 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2763 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2764 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2765 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2766 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2767 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2768 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2769 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2770 			   TILE_SPLIT(split_equal_to_row_size) |
2771 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2772 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2773 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2774 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2775 		/* 1D and 1D Array Surfaces */
2776 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2777 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2778 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2779 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2780 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2781 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2782 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2783 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2784 		/* Displayable maps. */
2785 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2786 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2787 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2788 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2789 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2790 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2791 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2792 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2793 		/* Display 8bpp. */
2794 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2795 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2796 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2797 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2798 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2799 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2800 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2801 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2802 		/* Display 16bpp. */
2803 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2804 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2805 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2806 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2807 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2808 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2809 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2810 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2811 		/* Display 32bpp. */
2812 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2813 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2814 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2815 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2816 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2817 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2818 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2819 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2820 		/* Thin. */
2821 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2822 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2823 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2824 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2825 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2826 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2827 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2828 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2829 		/* Thin 8 bpp. */
2830 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2831 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2832 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2833 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2834 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2835 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2836 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2837 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2838 		/* Thin 16 bpp. */
2839 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2840 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2841 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2842 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2843 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2844 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2845 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2846 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2847 		/* Thin 32 bpp. */
2848 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2849 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2850 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2851 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2852 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2853 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2854 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2855 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2856 		/* Thin 64 bpp. */
2857 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2858 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2859 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2860 			   TILE_SPLIT(split_equal_to_row_size) |
2861 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2862 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2863 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2864 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2865 		/* 8 bpp PRT. */
2866 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2867 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2868 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2869 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2870 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2871 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2872 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2873 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2874 		/* 16 bpp PRT */
2875 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2876 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2877 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2878 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2879 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2880 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2881 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2882 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2883 		/* 32 bpp PRT */
2884 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2885 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2886 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2887 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2888 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2889 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2890 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2891 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2892 		/* 64 bpp PRT */
2893 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2894 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2895 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2896 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2897 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2898 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2899 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2900 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2901 		/* 128 bpp PRT */
2902 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2903 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2904 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2905 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2906 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2907 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2908 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2909 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2910 
2911 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2912 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2913 		break;
2914 
2915 	default:
2916 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2917 	}
2918 }
2919 
2920 static void si_select_se_sh(struct radeon_device *rdev,
2921 			    u32 se_num, u32 sh_num)
2922 {
2923 	u32 data = INSTANCE_BROADCAST_WRITES;
2924 
2925 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2926 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2927 	else if (se_num == 0xffffffff)
2928 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2929 	else if (sh_num == 0xffffffff)
2930 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2931 	else
2932 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2933 	WREG32(GRBM_GFX_INDEX, data);
2934 }
2935 
2936 static u32 si_create_bitmask(u32 bit_width)
2937 {
2938 	u32 i, mask = 0;
2939 
2940 	for (i = 0; i < bit_width; i++) {
2941 		mask <<= 1;
2942 		mask |= 1;
2943 	}
2944 	return mask;
2945 }
2946 
2947 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2948 {
2949 	u32 data, mask;
2950 
2951 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2952 	if (data & 1)
2953 		data &= INACTIVE_CUS_MASK;
2954 	else
2955 		data = 0;
2956 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2957 
2958 	data >>= INACTIVE_CUS_SHIFT;
2959 
2960 	mask = si_create_bitmask(cu_per_sh);
2961 
2962 	return ~data & mask;
2963 }
2964 
2965 static void si_setup_spi(struct radeon_device *rdev,
2966 			 u32 se_num, u32 sh_per_se,
2967 			 u32 cu_per_sh)
2968 {
2969 	int i, j, k;
2970 	u32 data, mask, active_cu;
2971 
2972 	for (i = 0; i < se_num; i++) {
2973 		for (j = 0; j < sh_per_se; j++) {
2974 			si_select_se_sh(rdev, i, j);
2975 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2976 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2977 
2978 			mask = 1;
2979 			for (k = 0; k < 16; k++) {
2980 				mask <<= k;
2981 				if (active_cu & mask) {
2982 					data &= ~mask;
2983 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2984 					break;
2985 				}
2986 			}
2987 		}
2988 	}
2989 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2990 }
2991 
2992 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2993 			      u32 max_rb_num_per_se,
2994 			      u32 sh_per_se)
2995 {
2996 	u32 data, mask;
2997 
2998 	data = RREG32(CC_RB_BACKEND_DISABLE);
2999 	if (data & 1)
3000 		data &= BACKEND_DISABLE_MASK;
3001 	else
3002 		data = 0;
3003 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3004 
3005 	data >>= BACKEND_DISABLE_SHIFT;
3006 
3007 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3008 
3009 	return data & mask;
3010 }
3011 
3012 static void si_setup_rb(struct radeon_device *rdev,
3013 			u32 se_num, u32 sh_per_se,
3014 			u32 max_rb_num_per_se)
3015 {
3016 	int i, j;
3017 	u32 data, mask;
3018 	u32 disabled_rbs = 0;
3019 	u32 enabled_rbs = 0;
3020 
3021 	for (i = 0; i < se_num; i++) {
3022 		for (j = 0; j < sh_per_se; j++) {
3023 			si_select_se_sh(rdev, i, j);
3024 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3025 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3026 		}
3027 	}
3028 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3029 
3030 	mask = 1;
3031 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3032 		if (!(disabled_rbs & mask))
3033 			enabled_rbs |= mask;
3034 		mask <<= 1;
3035 	}
3036 
3037 	rdev->config.si.backend_enable_mask = enabled_rbs;
3038 
3039 	for (i = 0; i < se_num; i++) {
3040 		si_select_se_sh(rdev, i, 0xffffffff);
3041 		data = 0;
3042 		for (j = 0; j < sh_per_se; j++) {
3043 			switch (enabled_rbs & 3) {
3044 			case 1:
3045 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3046 				break;
3047 			case 2:
3048 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3049 				break;
3050 			case 3:
3051 			default:
3052 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3053 				break;
3054 			}
3055 			enabled_rbs >>= 2;
3056 		}
3057 		WREG32(PA_SC_RASTER_CONFIG, data);
3058 	}
3059 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3060 }
3061 
3062 static void si_gpu_init(struct radeon_device *rdev)
3063 {
3064 	u32 gb_addr_config = 0;
3065 	u32 mc_shared_chmap, mc_arb_ramcfg;
3066 	u32 sx_debug_1;
3067 	u32 hdp_host_path_cntl;
3068 	u32 tmp;
3069 	int i, j;
3070 
3071 	switch (rdev->family) {
3072 	case CHIP_TAHITI:
3073 		rdev->config.si.max_shader_engines = 2;
3074 		rdev->config.si.max_tile_pipes = 12;
3075 		rdev->config.si.max_cu_per_sh = 8;
3076 		rdev->config.si.max_sh_per_se = 2;
3077 		rdev->config.si.max_backends_per_se = 4;
3078 		rdev->config.si.max_texture_channel_caches = 12;
3079 		rdev->config.si.max_gprs = 256;
3080 		rdev->config.si.max_gs_threads = 32;
3081 		rdev->config.si.max_hw_contexts = 8;
3082 
3083 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3084 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3085 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3086 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3087 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3088 		break;
3089 	case CHIP_PITCAIRN:
3090 		rdev->config.si.max_shader_engines = 2;
3091 		rdev->config.si.max_tile_pipes = 8;
3092 		rdev->config.si.max_cu_per_sh = 5;
3093 		rdev->config.si.max_sh_per_se = 2;
3094 		rdev->config.si.max_backends_per_se = 4;
3095 		rdev->config.si.max_texture_channel_caches = 8;
3096 		rdev->config.si.max_gprs = 256;
3097 		rdev->config.si.max_gs_threads = 32;
3098 		rdev->config.si.max_hw_contexts = 8;
3099 
3100 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3101 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3102 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3103 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3104 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3105 		break;
3106 	case CHIP_VERDE:
3107 	default:
3108 		rdev->config.si.max_shader_engines = 1;
3109 		rdev->config.si.max_tile_pipes = 4;
3110 		rdev->config.si.max_cu_per_sh = 5;
3111 		rdev->config.si.max_sh_per_se = 2;
3112 		rdev->config.si.max_backends_per_se = 4;
3113 		rdev->config.si.max_texture_channel_caches = 4;
3114 		rdev->config.si.max_gprs = 256;
3115 		rdev->config.si.max_gs_threads = 32;
3116 		rdev->config.si.max_hw_contexts = 8;
3117 
3118 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3119 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3120 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3121 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3122 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3123 		break;
3124 	case CHIP_OLAND:
3125 		rdev->config.si.max_shader_engines = 1;
3126 		rdev->config.si.max_tile_pipes = 4;
3127 		rdev->config.si.max_cu_per_sh = 6;
3128 		rdev->config.si.max_sh_per_se = 1;
3129 		rdev->config.si.max_backends_per_se = 2;
3130 		rdev->config.si.max_texture_channel_caches = 4;
3131 		rdev->config.si.max_gprs = 256;
3132 		rdev->config.si.max_gs_threads = 16;
3133 		rdev->config.si.max_hw_contexts = 8;
3134 
3135 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3136 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3137 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3138 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3139 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3140 		break;
3141 	case CHIP_HAINAN:
3142 		rdev->config.si.max_shader_engines = 1;
3143 		rdev->config.si.max_tile_pipes = 4;
3144 		rdev->config.si.max_cu_per_sh = 5;
3145 		rdev->config.si.max_sh_per_se = 1;
3146 		rdev->config.si.max_backends_per_se = 1;
3147 		rdev->config.si.max_texture_channel_caches = 2;
3148 		rdev->config.si.max_gprs = 256;
3149 		rdev->config.si.max_gs_threads = 16;
3150 		rdev->config.si.max_hw_contexts = 8;
3151 
3152 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3153 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3154 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3155 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3156 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3157 		break;
3158 	}
3159 
3160 	/* Initialize HDP */
3161 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3162 		WREG32((0x2c14 + j), 0x00000000);
3163 		WREG32((0x2c18 + j), 0x00000000);
3164 		WREG32((0x2c1c + j), 0x00000000);
3165 		WREG32((0x2c20 + j), 0x00000000);
3166 		WREG32((0x2c24 + j), 0x00000000);
3167 	}
3168 
3169 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3170 	WREG32(SRBM_INT_CNTL, 1);
3171 	WREG32(SRBM_INT_ACK, 1);
3172 
3173 	evergreen_fix_pci_max_read_req_size(rdev);
3174 
3175 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3176 
3177 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3178 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3179 
3180 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3181 	rdev->config.si.mem_max_burst_length_bytes = 256;
3182 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3183 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3184 	if (rdev->config.si.mem_row_size_in_kb > 4)
3185 		rdev->config.si.mem_row_size_in_kb = 4;
3186 	/* XXX use MC settings? */
3187 	rdev->config.si.shader_engine_tile_size = 32;
3188 	rdev->config.si.num_gpus = 1;
3189 	rdev->config.si.multi_gpu_tile_size = 64;
3190 
3191 	/* fix up row size */
3192 	gb_addr_config &= ~ROW_SIZE_MASK;
3193 	switch (rdev->config.si.mem_row_size_in_kb) {
3194 	case 1:
3195 	default:
3196 		gb_addr_config |= ROW_SIZE(0);
3197 		break;
3198 	case 2:
3199 		gb_addr_config |= ROW_SIZE(1);
3200 		break;
3201 	case 4:
3202 		gb_addr_config |= ROW_SIZE(2);
3203 		break;
3204 	}
3205 
3206 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3207 	 * not have bank info, so create a custom tiling dword.
3208 	 * bits 3:0   num_pipes
3209 	 * bits 7:4   num_banks
3210 	 * bits 11:8  group_size
3211 	 * bits 15:12 row_size
3212 	 */
3213 	rdev->config.si.tile_config = 0;
3214 	switch (rdev->config.si.num_tile_pipes) {
3215 	case 1:
3216 		rdev->config.si.tile_config |= (0 << 0);
3217 		break;
3218 	case 2:
3219 		rdev->config.si.tile_config |= (1 << 0);
3220 		break;
3221 	case 4:
3222 		rdev->config.si.tile_config |= (2 << 0);
3223 		break;
3224 	case 8:
3225 	default:
3226 		/* XXX what about 12? */
3227 		rdev->config.si.tile_config |= (3 << 0);
3228 		break;
3229 	}
3230 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3231 	case 0: /* four banks */
3232 		rdev->config.si.tile_config |= 0 << 4;
3233 		break;
3234 	case 1: /* eight banks */
3235 		rdev->config.si.tile_config |= 1 << 4;
3236 		break;
3237 	case 2: /* sixteen banks */
3238 	default:
3239 		rdev->config.si.tile_config |= 2 << 4;
3240 		break;
3241 	}
3242 	rdev->config.si.tile_config |=
3243 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3244 	rdev->config.si.tile_config |=
3245 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3246 
3247 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3248 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3249 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3250 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3251 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3252 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3253 	if (rdev->has_uvd) {
3254 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3255 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3256 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3257 	}
3258 
3259 	si_tiling_mode_table_init(rdev);
3260 
3261 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3262 		    rdev->config.si.max_sh_per_se,
3263 		    rdev->config.si.max_backends_per_se);
3264 
3265 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3266 		     rdev->config.si.max_sh_per_se,
3267 		     rdev->config.si.max_cu_per_sh);
3268 
3269 	rdev->config.si.active_cus = 0;
3270 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3271 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3272 			rdev->config.si.active_cus +=
3273 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3274 		}
3275 	}
3276 
3277 	/* set HW defaults for 3D engine */
3278 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3279 				     ROQ_IB2_START(0x2b)));
3280 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3281 
3282 	sx_debug_1 = RREG32(SX_DEBUG_1);
3283 	WREG32(SX_DEBUG_1, sx_debug_1);
3284 
3285 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3286 
3287 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3288 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3289 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3290 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3291 
3292 	WREG32(VGT_NUM_INSTANCES, 1);
3293 
3294 	WREG32(CP_PERFMON_CNTL, 0);
3295 
3296 	WREG32(SQ_CONFIG, 0);
3297 
3298 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3299 					  FORCE_EOV_MAX_REZ_CNT(255)));
3300 
3301 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3302 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3303 
3304 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3305 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3306 
3307 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3308 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3309 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3310 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3311 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3312 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3313 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3314 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3315 
3316 	tmp = RREG32(HDP_MISC_CNTL);
3317 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3318 	WREG32(HDP_MISC_CNTL, tmp);
3319 
3320 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3321 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3322 
3323 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3324 
3325 	udelay(50);
3326 }
3327 
3328 /*
3329  * GPU scratch registers helpers function.
3330  */
3331 static void si_scratch_init(struct radeon_device *rdev)
3332 {
3333 	int i;
3334 
3335 	rdev->scratch.num_reg = 7;
3336 	rdev->scratch.reg_base = SCRATCH_REG0;
3337 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3338 		rdev->scratch.free[i] = true;
3339 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3340 	}
3341 }
3342 
3343 void si_fence_ring_emit(struct radeon_device *rdev,
3344 			struct radeon_fence *fence)
3345 {
3346 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3347 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3348 
3349 	/* flush read cache over gart */
3350 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3351 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3352 	radeon_ring_write(ring, 0);
3353 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3354 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3355 			  PACKET3_TC_ACTION_ENA |
3356 			  PACKET3_SH_KCACHE_ACTION_ENA |
3357 			  PACKET3_SH_ICACHE_ACTION_ENA);
3358 	radeon_ring_write(ring, 0xFFFFFFFF);
3359 	radeon_ring_write(ring, 0);
3360 	radeon_ring_write(ring, 10); /* poll interval */
3361 	/* EVENT_WRITE_EOP - flush caches, send int */
3362 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3363 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3364 	radeon_ring_write(ring, lower_32_bits(addr));
3365 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3366 	radeon_ring_write(ring, fence->seq);
3367 	radeon_ring_write(ring, 0);
3368 }
3369 
3370 /*
3371  * IB stuff
3372  */
3373 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3374 {
3375 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3376 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3377 	u32 header;
3378 
3379 	if (ib->is_const_ib) {
3380 		/* set switch buffer packet before const IB */
3381 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3382 		radeon_ring_write(ring, 0);
3383 
3384 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3385 	} else {
3386 		u32 next_rptr;
3387 		if (ring->rptr_save_reg) {
3388 			next_rptr = ring->wptr + 3 + 4 + 8;
3389 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3390 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3391 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3392 			radeon_ring_write(ring, next_rptr);
3393 		} else if (rdev->wb.enabled) {
3394 			next_rptr = ring->wptr + 5 + 4 + 8;
3395 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3396 			radeon_ring_write(ring, (1 << 8));
3397 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3398 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3399 			radeon_ring_write(ring, next_rptr);
3400 		}
3401 
3402 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3403 	}
3404 
3405 	radeon_ring_write(ring, header);
3406 	radeon_ring_write(ring,
3407 #ifdef __BIG_ENDIAN
3408 			  (2 << 0) |
3409 #endif
3410 			  (ib->gpu_addr & 0xFFFFFFFC));
3411 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3412 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3413 
3414 	if (!ib->is_const_ib) {
3415 		/* flush read cache over gart for this vmid */
3416 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3417 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3418 		radeon_ring_write(ring, vm_id);
3419 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3420 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3421 				  PACKET3_TC_ACTION_ENA |
3422 				  PACKET3_SH_KCACHE_ACTION_ENA |
3423 				  PACKET3_SH_ICACHE_ACTION_ENA);
3424 		radeon_ring_write(ring, 0xFFFFFFFF);
3425 		radeon_ring_write(ring, 0);
3426 		radeon_ring_write(ring, 10); /* poll interval */
3427 	}
3428 }
3429 
3430 /*
3431  * CP.
3432  */
3433 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3434 {
3435 	if (enable)
3436 		WREG32(CP_ME_CNTL, 0);
3437 	else {
3438 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3439 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3440 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3441 		WREG32(SCRATCH_UMSK, 0);
3442 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3443 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3444 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3445 	}
3446 	udelay(50);
3447 }
3448 
3449 static int si_cp_load_microcode(struct radeon_device *rdev)
3450 {
3451 	int i;
3452 
3453 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3454 		return -EINVAL;
3455 
3456 	si_cp_enable(rdev, false);
3457 
3458 	if (rdev->new_fw) {
3459 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3460 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3461 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3462 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3463 		const struct gfx_firmware_header_v1_0 *me_hdr =
3464 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3465 		const __le32 *fw_data;
3466 		u32 fw_size;
3467 
3468 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3469 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3470 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3471 
3472 		/* PFP */
3473 		fw_data = (const __le32 *)
3474 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3475 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3476 		WREG32(CP_PFP_UCODE_ADDR, 0);
3477 		for (i = 0; i < fw_size; i++)
3478 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3479 		WREG32(CP_PFP_UCODE_ADDR, 0);
3480 
3481 		/* CE */
3482 		fw_data = (const __le32 *)
3483 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3484 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3485 		WREG32(CP_CE_UCODE_ADDR, 0);
3486 		for (i = 0; i < fw_size; i++)
3487 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3488 		WREG32(CP_CE_UCODE_ADDR, 0);
3489 
3490 		/* ME */
3491 		fw_data = (const __be32 *)
3492 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3493 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3494 		WREG32(CP_ME_RAM_WADDR, 0);
3495 		for (i = 0; i < fw_size; i++)
3496 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3497 		WREG32(CP_ME_RAM_WADDR, 0);
3498 	} else {
3499 		const __be32 *fw_data;
3500 
3501 		/* PFP */
3502 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3503 		WREG32(CP_PFP_UCODE_ADDR, 0);
3504 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3505 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3506 		WREG32(CP_PFP_UCODE_ADDR, 0);
3507 
3508 		/* CE */
3509 		fw_data = (const __be32 *)rdev->ce_fw->data;
3510 		WREG32(CP_CE_UCODE_ADDR, 0);
3511 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3512 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3513 		WREG32(CP_CE_UCODE_ADDR, 0);
3514 
3515 		/* ME */
3516 		fw_data = (const __be32 *)rdev->me_fw->data;
3517 		WREG32(CP_ME_RAM_WADDR, 0);
3518 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3519 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3520 		WREG32(CP_ME_RAM_WADDR, 0);
3521 	}
3522 
3523 	WREG32(CP_PFP_UCODE_ADDR, 0);
3524 	WREG32(CP_CE_UCODE_ADDR, 0);
3525 	WREG32(CP_ME_RAM_WADDR, 0);
3526 	WREG32(CP_ME_RAM_RADDR, 0);
3527 	return 0;
3528 }
3529 
3530 static int si_cp_start(struct radeon_device *rdev)
3531 {
3532 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3533 	int r, i;
3534 
3535 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3536 	if (r) {
3537 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3538 		return r;
3539 	}
3540 	/* init the CP */
3541 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3542 	radeon_ring_write(ring, 0x1);
3543 	radeon_ring_write(ring, 0x0);
3544 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3545 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3546 	radeon_ring_write(ring, 0);
3547 	radeon_ring_write(ring, 0);
3548 
3549 	/* init the CE partitions */
3550 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3551 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3552 	radeon_ring_write(ring, 0xc000);
3553 	radeon_ring_write(ring, 0xe000);
3554 	radeon_ring_unlock_commit(rdev, ring, false);
3555 
3556 	si_cp_enable(rdev, true);
3557 
3558 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3559 	if (r) {
3560 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3561 		return r;
3562 	}
3563 
3564 	/* setup clear context state */
3565 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3566 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3567 
3568 	for (i = 0; i < si_default_size; i++)
3569 		radeon_ring_write(ring, si_default_state[i]);
3570 
3571 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3572 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3573 
3574 	/* set clear context state */
3575 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3576 	radeon_ring_write(ring, 0);
3577 
3578 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3579 	radeon_ring_write(ring, 0x00000316);
3580 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3581 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3582 
3583 	radeon_ring_unlock_commit(rdev, ring, false);
3584 
3585 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3586 		ring = &rdev->ring[i];
3587 		r = radeon_ring_lock(rdev, ring, 2);
3588 
3589 		/* clear the compute context state */
3590 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3591 		radeon_ring_write(ring, 0);
3592 
3593 		radeon_ring_unlock_commit(rdev, ring, false);
3594 	}
3595 
3596 	return 0;
3597 }
3598 
3599 static void si_cp_fini(struct radeon_device *rdev)
3600 {
3601 	struct radeon_ring *ring;
3602 	si_cp_enable(rdev, false);
3603 
3604 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3605 	radeon_ring_fini(rdev, ring);
3606 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3607 
3608 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3609 	radeon_ring_fini(rdev, ring);
3610 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3611 
3612 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3613 	radeon_ring_fini(rdev, ring);
3614 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3615 }
3616 
3617 static int si_cp_resume(struct radeon_device *rdev)
3618 {
3619 	struct radeon_ring *ring;
3620 	u32 tmp;
3621 	u32 rb_bufsz;
3622 	int r;
3623 
3624 	si_enable_gui_idle_interrupt(rdev, false);
3625 
3626 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3627 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3628 
3629 	/* Set the write pointer delay */
3630 	WREG32(CP_RB_WPTR_DELAY, 0);
3631 
3632 	WREG32(CP_DEBUG, 0);
3633 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3634 
3635 	/* ring 0 - compute and gfx */
3636 	/* Set ring buffer size */
3637 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3638 	rb_bufsz = order_base_2(ring->ring_size / 8);
3639 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3640 #ifdef __BIG_ENDIAN
3641 	tmp |= BUF_SWAP_32BIT;
3642 #endif
3643 	WREG32(CP_RB0_CNTL, tmp);
3644 
3645 	/* Initialize the ring buffer's read and write pointers */
3646 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3647 	ring->wptr = 0;
3648 	WREG32(CP_RB0_WPTR, ring->wptr);
3649 
3650 	/* set the wb address whether it's enabled or not */
3651 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3652 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3653 
3654 	if (rdev->wb.enabled)
3655 		WREG32(SCRATCH_UMSK, 0xff);
3656 	else {
3657 		tmp |= RB_NO_UPDATE;
3658 		WREG32(SCRATCH_UMSK, 0);
3659 	}
3660 
3661 	mdelay(1);
3662 	WREG32(CP_RB0_CNTL, tmp);
3663 
3664 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3665 
3666 	/* ring1  - compute only */
3667 	/* Set ring buffer size */
3668 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3669 	rb_bufsz = order_base_2(ring->ring_size / 8);
3670 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3671 #ifdef __BIG_ENDIAN
3672 	tmp |= BUF_SWAP_32BIT;
3673 #endif
3674 	WREG32(CP_RB1_CNTL, tmp);
3675 
3676 	/* Initialize the ring buffer's read and write pointers */
3677 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3678 	ring->wptr = 0;
3679 	WREG32(CP_RB1_WPTR, ring->wptr);
3680 
3681 	/* set the wb address whether it's enabled or not */
3682 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3683 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3684 
3685 	mdelay(1);
3686 	WREG32(CP_RB1_CNTL, tmp);
3687 
3688 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3689 
3690 	/* ring2 - compute only */
3691 	/* Set ring buffer size */
3692 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3693 	rb_bufsz = order_base_2(ring->ring_size / 8);
3694 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3695 #ifdef __BIG_ENDIAN
3696 	tmp |= BUF_SWAP_32BIT;
3697 #endif
3698 	WREG32(CP_RB2_CNTL, tmp);
3699 
3700 	/* Initialize the ring buffer's read and write pointers */
3701 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3702 	ring->wptr = 0;
3703 	WREG32(CP_RB2_WPTR, ring->wptr);
3704 
3705 	/* set the wb address whether it's enabled or not */
3706 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3707 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3708 
3709 	mdelay(1);
3710 	WREG32(CP_RB2_CNTL, tmp);
3711 
3712 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3713 
3714 	/* start the rings */
3715 	si_cp_start(rdev);
3716 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3717 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3718 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3719 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3720 	if (r) {
3721 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3722 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3723 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3724 		return r;
3725 	}
3726 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3727 	if (r) {
3728 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3729 	}
3730 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3731 	if (r) {
3732 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3733 	}
3734 
3735 	si_enable_gui_idle_interrupt(rdev, true);
3736 
3737 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3738 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3739 
3740 	return 0;
3741 }
3742 
3743 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3744 {
3745 	u32 reset_mask = 0;
3746 	u32 tmp;
3747 
3748 	/* GRBM_STATUS */
3749 	tmp = RREG32(GRBM_STATUS);
3750 	if (tmp & (PA_BUSY | SC_BUSY |
3751 		   BCI_BUSY | SX_BUSY |
3752 		   TA_BUSY | VGT_BUSY |
3753 		   DB_BUSY | CB_BUSY |
3754 		   GDS_BUSY | SPI_BUSY |
3755 		   IA_BUSY | IA_BUSY_NO_DMA))
3756 		reset_mask |= RADEON_RESET_GFX;
3757 
3758 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3759 		   CP_BUSY | CP_COHERENCY_BUSY))
3760 		reset_mask |= RADEON_RESET_CP;
3761 
3762 	if (tmp & GRBM_EE_BUSY)
3763 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3764 
3765 	/* GRBM_STATUS2 */
3766 	tmp = RREG32(GRBM_STATUS2);
3767 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3768 		reset_mask |= RADEON_RESET_RLC;
3769 
3770 	/* DMA_STATUS_REG 0 */
3771 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3772 	if (!(tmp & DMA_IDLE))
3773 		reset_mask |= RADEON_RESET_DMA;
3774 
3775 	/* DMA_STATUS_REG 1 */
3776 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3777 	if (!(tmp & DMA_IDLE))
3778 		reset_mask |= RADEON_RESET_DMA1;
3779 
3780 	/* SRBM_STATUS2 */
3781 	tmp = RREG32(SRBM_STATUS2);
3782 	if (tmp & DMA_BUSY)
3783 		reset_mask |= RADEON_RESET_DMA;
3784 
3785 	if (tmp & DMA1_BUSY)
3786 		reset_mask |= RADEON_RESET_DMA1;
3787 
3788 	/* SRBM_STATUS */
3789 	tmp = RREG32(SRBM_STATUS);
3790 
3791 	if (tmp & IH_BUSY)
3792 		reset_mask |= RADEON_RESET_IH;
3793 
3794 	if (tmp & SEM_BUSY)
3795 		reset_mask |= RADEON_RESET_SEM;
3796 
3797 	if (tmp & GRBM_RQ_PENDING)
3798 		reset_mask |= RADEON_RESET_GRBM;
3799 
3800 	if (tmp & VMC_BUSY)
3801 		reset_mask |= RADEON_RESET_VMC;
3802 
3803 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3804 		   MCC_BUSY | MCD_BUSY))
3805 		reset_mask |= RADEON_RESET_MC;
3806 
3807 	if (evergreen_is_display_hung(rdev))
3808 		reset_mask |= RADEON_RESET_DISPLAY;
3809 
3810 	/* VM_L2_STATUS */
3811 	tmp = RREG32(VM_L2_STATUS);
3812 	if (tmp & L2_BUSY)
3813 		reset_mask |= RADEON_RESET_VMC;
3814 
3815 	/* Skip MC reset as it's mostly likely not hung, just busy */
3816 	if (reset_mask & RADEON_RESET_MC) {
3817 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3818 		reset_mask &= ~RADEON_RESET_MC;
3819 	}
3820 
3821 	return reset_mask;
3822 }
3823 
3824 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3825 {
3826 	struct evergreen_mc_save save;
3827 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3828 	u32 tmp;
3829 
3830 	if (reset_mask == 0)
3831 		return;
3832 
3833 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3834 
3835 	evergreen_print_gpu_status_regs(rdev);
3836 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3837 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3838 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3839 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3840 
3841 	/* disable PG/CG */
3842 	si_fini_pg(rdev);
3843 	si_fini_cg(rdev);
3844 
3845 	/* stop the rlc */
3846 	si_rlc_stop(rdev);
3847 
3848 	/* Disable CP parsing/prefetching */
3849 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3850 
3851 	if (reset_mask & RADEON_RESET_DMA) {
3852 		/* dma0 */
3853 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3854 		tmp &= ~DMA_RB_ENABLE;
3855 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3856 	}
3857 	if (reset_mask & RADEON_RESET_DMA1) {
3858 		/* dma1 */
3859 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3860 		tmp &= ~DMA_RB_ENABLE;
3861 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3862 	}
3863 
3864 	udelay(50);
3865 
3866 	evergreen_mc_stop(rdev, &save);
3867 	if (evergreen_mc_wait_for_idle(rdev)) {
3868 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3869 	}
3870 
3871 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3872 		grbm_soft_reset = SOFT_RESET_CB |
3873 			SOFT_RESET_DB |
3874 			SOFT_RESET_GDS |
3875 			SOFT_RESET_PA |
3876 			SOFT_RESET_SC |
3877 			SOFT_RESET_BCI |
3878 			SOFT_RESET_SPI |
3879 			SOFT_RESET_SX |
3880 			SOFT_RESET_TC |
3881 			SOFT_RESET_TA |
3882 			SOFT_RESET_VGT |
3883 			SOFT_RESET_IA;
3884 	}
3885 
3886 	if (reset_mask & RADEON_RESET_CP) {
3887 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3888 
3889 		srbm_soft_reset |= SOFT_RESET_GRBM;
3890 	}
3891 
3892 	if (reset_mask & RADEON_RESET_DMA)
3893 		srbm_soft_reset |= SOFT_RESET_DMA;
3894 
3895 	if (reset_mask & RADEON_RESET_DMA1)
3896 		srbm_soft_reset |= SOFT_RESET_DMA1;
3897 
3898 	if (reset_mask & RADEON_RESET_DISPLAY)
3899 		srbm_soft_reset |= SOFT_RESET_DC;
3900 
3901 	if (reset_mask & RADEON_RESET_RLC)
3902 		grbm_soft_reset |= SOFT_RESET_RLC;
3903 
3904 	if (reset_mask & RADEON_RESET_SEM)
3905 		srbm_soft_reset |= SOFT_RESET_SEM;
3906 
3907 	if (reset_mask & RADEON_RESET_IH)
3908 		srbm_soft_reset |= SOFT_RESET_IH;
3909 
3910 	if (reset_mask & RADEON_RESET_GRBM)
3911 		srbm_soft_reset |= SOFT_RESET_GRBM;
3912 
3913 	if (reset_mask & RADEON_RESET_VMC)
3914 		srbm_soft_reset |= SOFT_RESET_VMC;
3915 
3916 	if (reset_mask & RADEON_RESET_MC)
3917 		srbm_soft_reset |= SOFT_RESET_MC;
3918 
3919 	if (grbm_soft_reset) {
3920 		tmp = RREG32(GRBM_SOFT_RESET);
3921 		tmp |= grbm_soft_reset;
3922 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3923 		WREG32(GRBM_SOFT_RESET, tmp);
3924 		tmp = RREG32(GRBM_SOFT_RESET);
3925 
3926 		udelay(50);
3927 
3928 		tmp &= ~grbm_soft_reset;
3929 		WREG32(GRBM_SOFT_RESET, tmp);
3930 		tmp = RREG32(GRBM_SOFT_RESET);
3931 	}
3932 
3933 	if (srbm_soft_reset) {
3934 		tmp = RREG32(SRBM_SOFT_RESET);
3935 		tmp |= srbm_soft_reset;
3936 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3937 		WREG32(SRBM_SOFT_RESET, tmp);
3938 		tmp = RREG32(SRBM_SOFT_RESET);
3939 
3940 		udelay(50);
3941 
3942 		tmp &= ~srbm_soft_reset;
3943 		WREG32(SRBM_SOFT_RESET, tmp);
3944 		tmp = RREG32(SRBM_SOFT_RESET);
3945 	}
3946 
3947 	/* Wait a little for things to settle down */
3948 	udelay(50);
3949 
3950 	evergreen_mc_resume(rdev, &save);
3951 	udelay(50);
3952 
3953 	evergreen_print_gpu_status_regs(rdev);
3954 }
3955 
3956 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3957 {
3958 	u32 tmp, i;
3959 
3960 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3961 	tmp |= SPLL_BYPASS_EN;
3962 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3963 
3964 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3965 	tmp |= SPLL_CTLREQ_CHG;
3966 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3967 
3968 	for (i = 0; i < rdev->usec_timeout; i++) {
3969 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3970 			break;
3971 		udelay(1);
3972 	}
3973 
3974 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3975 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3976 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3977 
3978 	tmp = RREG32(MPLL_CNTL_MODE);
3979 	tmp &= ~MPLL_MCLK_SEL;
3980 	WREG32(MPLL_CNTL_MODE, tmp);
3981 }
3982 
3983 static void si_spll_powerdown(struct radeon_device *rdev)
3984 {
3985 	u32 tmp;
3986 
3987 	tmp = RREG32(SPLL_CNTL_MODE);
3988 	tmp |= SPLL_SW_DIR_CONTROL;
3989 	WREG32(SPLL_CNTL_MODE, tmp);
3990 
3991 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3992 	tmp |= SPLL_RESET;
3993 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3994 
3995 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3996 	tmp |= SPLL_SLEEP;
3997 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3998 
3999 	tmp = RREG32(SPLL_CNTL_MODE);
4000 	tmp &= ~SPLL_SW_DIR_CONTROL;
4001 	WREG32(SPLL_CNTL_MODE, tmp);
4002 }
4003 
4004 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4005 {
4006 	struct evergreen_mc_save save;
4007 	u32 tmp, i;
4008 
4009 	dev_info(rdev->dev, "GPU pci config reset\n");
4010 
4011 	/* disable dpm? */
4012 
4013 	/* disable cg/pg */
4014 	si_fini_pg(rdev);
4015 	si_fini_cg(rdev);
4016 
4017 	/* Disable CP parsing/prefetching */
4018 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4019 	/* dma0 */
4020 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4021 	tmp &= ~DMA_RB_ENABLE;
4022 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4023 	/* dma1 */
4024 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4025 	tmp &= ~DMA_RB_ENABLE;
4026 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4027 	/* XXX other engines? */
4028 
4029 	/* halt the rlc, disable cp internal ints */
4030 	si_rlc_stop(rdev);
4031 
4032 	udelay(50);
4033 
4034 	/* disable mem access */
4035 	evergreen_mc_stop(rdev, &save);
4036 	if (evergreen_mc_wait_for_idle(rdev)) {
4037 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4038 	}
4039 
4040 	/* set mclk/sclk to bypass */
4041 	si_set_clk_bypass_mode(rdev);
4042 	/* powerdown spll */
4043 	si_spll_powerdown(rdev);
4044 	/* disable BM */
4045 	pci_clear_master(rdev->pdev);
4046 	/* reset */
4047 	radeon_pci_config_reset(rdev);
4048 	/* wait for asic to come out of reset */
4049 	for (i = 0; i < rdev->usec_timeout; i++) {
4050 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4051 			break;
4052 		udelay(1);
4053 	}
4054 }
4055 
4056 int si_asic_reset(struct radeon_device *rdev, bool hard)
4057 {
4058 	u32 reset_mask;
4059 
4060 	if (hard) {
4061 		si_gpu_pci_config_reset(rdev);
4062 		return 0;
4063 	}
4064 
4065 	reset_mask = si_gpu_check_soft_reset(rdev);
4066 
4067 	if (reset_mask)
4068 		r600_set_bios_scratch_engine_hung(rdev, true);
4069 
4070 	/* try soft reset */
4071 	si_gpu_soft_reset(rdev, reset_mask);
4072 
4073 	reset_mask = si_gpu_check_soft_reset(rdev);
4074 
4075 	/* try pci config reset */
4076 	if (reset_mask && radeon_hard_reset)
4077 		si_gpu_pci_config_reset(rdev);
4078 
4079 	reset_mask = si_gpu_check_soft_reset(rdev);
4080 
4081 	if (!reset_mask)
4082 		r600_set_bios_scratch_engine_hung(rdev, false);
4083 
4084 	return 0;
4085 }
4086 
4087 /**
4088  * si_gfx_is_lockup - Check if the GFX engine is locked up
4089  *
4090  * @rdev: radeon_device pointer
4091  * @ring: radeon_ring structure holding ring information
4092  *
4093  * Check if the GFX engine is locked up.
4094  * Returns true if the engine appears to be locked up, false if not.
4095  */
4096 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4097 {
4098 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4099 
4100 	if (!(reset_mask & (RADEON_RESET_GFX |
4101 			    RADEON_RESET_COMPUTE |
4102 			    RADEON_RESET_CP))) {
4103 		radeon_ring_lockup_update(rdev, ring);
4104 		return false;
4105 	}
4106 	return radeon_ring_test_lockup(rdev, ring);
4107 }
4108 
4109 /* MC */
4110 static void si_mc_program(struct radeon_device *rdev)
4111 {
4112 	struct evergreen_mc_save save;
4113 	u32 tmp;
4114 	int i, j;
4115 
4116 	/* Initialize HDP */
4117 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4118 		WREG32((0x2c14 + j), 0x00000000);
4119 		WREG32((0x2c18 + j), 0x00000000);
4120 		WREG32((0x2c1c + j), 0x00000000);
4121 		WREG32((0x2c20 + j), 0x00000000);
4122 		WREG32((0x2c24 + j), 0x00000000);
4123 	}
4124 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4125 
4126 	evergreen_mc_stop(rdev, &save);
4127 	if (radeon_mc_wait_for_idle(rdev)) {
4128 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4129 	}
4130 	if (!ASIC_IS_NODCE(rdev))
4131 		/* Lockout access through VGA aperture*/
4132 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4133 	/* Update configuration */
4134 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4135 	       rdev->mc.vram_start >> 12);
4136 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4137 	       rdev->mc.vram_end >> 12);
4138 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4139 	       rdev->vram_scratch.gpu_addr >> 12);
4140 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4141 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4142 	WREG32(MC_VM_FB_LOCATION, tmp);
4143 	/* XXX double check these! */
4144 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4145 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4146 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4147 	WREG32(MC_VM_AGP_BASE, 0);
4148 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4149 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4150 	if (radeon_mc_wait_for_idle(rdev)) {
4151 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4152 	}
4153 	evergreen_mc_resume(rdev, &save);
4154 	if (!ASIC_IS_NODCE(rdev)) {
4155 		/* we need to own VRAM, so turn off the VGA renderer here
4156 		 * to stop it overwriting our objects */
4157 		rv515_vga_render_disable(rdev);
4158 	}
4159 }
4160 
4161 void si_vram_gtt_location(struct radeon_device *rdev,
4162 			  struct radeon_mc *mc)
4163 {
4164 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4165 		/* leave room for at least 1024M GTT */
4166 		dev_warn(rdev->dev, "limiting VRAM\n");
4167 		mc->real_vram_size = 0xFFC0000000ULL;
4168 		mc->mc_vram_size = 0xFFC0000000ULL;
4169 	}
4170 	radeon_vram_location(rdev, &rdev->mc, 0);
4171 	rdev->mc.gtt_base_align = 0;
4172 	radeon_gtt_location(rdev, mc);
4173 }
4174 
4175 static int si_mc_init(struct radeon_device *rdev)
4176 {
4177 	u32 tmp;
4178 	int chansize, numchan;
4179 
4180 	/* Get VRAM informations */
4181 	rdev->mc.vram_is_ddr = true;
4182 	tmp = RREG32(MC_ARB_RAMCFG);
4183 	if (tmp & CHANSIZE_OVERRIDE) {
4184 		chansize = 16;
4185 	} else if (tmp & CHANSIZE_MASK) {
4186 		chansize = 64;
4187 	} else {
4188 		chansize = 32;
4189 	}
4190 	tmp = RREG32(MC_SHARED_CHMAP);
4191 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4192 	case 0:
4193 	default:
4194 		numchan = 1;
4195 		break;
4196 	case 1:
4197 		numchan = 2;
4198 		break;
4199 	case 2:
4200 		numchan = 4;
4201 		break;
4202 	case 3:
4203 		numchan = 8;
4204 		break;
4205 	case 4:
4206 		numchan = 3;
4207 		break;
4208 	case 5:
4209 		numchan = 6;
4210 		break;
4211 	case 6:
4212 		numchan = 10;
4213 		break;
4214 	case 7:
4215 		numchan = 12;
4216 		break;
4217 	case 8:
4218 		numchan = 16;
4219 		break;
4220 	}
4221 	rdev->mc.vram_width = numchan * chansize;
4222 	/* Could aper size report 0 ? */
4223 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4224 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4225 	/* size in MB on si */
4226 	tmp = RREG32(CONFIG_MEMSIZE);
4227 	/* some boards may have garbage in the upper 16 bits */
4228 	if (tmp & 0xffff0000) {
4229 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4230 		if (tmp & 0xffff)
4231 			tmp &= 0xffff;
4232 	}
4233 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4234 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4235 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4236 	si_vram_gtt_location(rdev, &rdev->mc);
4237 	radeon_update_bandwidth_info(rdev);
4238 
4239 	return 0;
4240 }
4241 
4242 /*
4243  * GART
4244  */
4245 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4246 {
4247 	/* flush hdp cache */
4248 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4249 
4250 	/* bits 0-15 are the VM contexts0-15 */
4251 	WREG32(VM_INVALIDATE_REQUEST, 1);
4252 }
4253 
4254 static int si_pcie_gart_enable(struct radeon_device *rdev)
4255 {
4256 	int r, i;
4257 
4258 	if (rdev->gart.robj == NULL) {
4259 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4260 		return -EINVAL;
4261 	}
4262 	r = radeon_gart_table_vram_pin(rdev);
4263 	if (r)
4264 		return r;
4265 	/* Setup TLB control */
4266 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4267 	       (0xA << 7) |
4268 	       ENABLE_L1_TLB |
4269 	       ENABLE_L1_FRAGMENT_PROCESSING |
4270 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4271 	       ENABLE_ADVANCED_DRIVER_MODEL |
4272 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4273 	/* Setup L2 cache */
4274 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4275 	       ENABLE_L2_FRAGMENT_PROCESSING |
4276 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4277 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4278 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4279 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4280 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4281 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4282 	       BANK_SELECT(4) |
4283 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4284 	/* setup context0 */
4285 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4286 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4287 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4288 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4289 			(u32)(rdev->dummy_page.addr >> 12));
4290 	WREG32(VM_CONTEXT0_CNTL2, 0);
4291 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4292 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4293 
4294 	WREG32(0x15D4, 0);
4295 	WREG32(0x15D8, 0);
4296 	WREG32(0x15DC, 0);
4297 
4298 	/* empty context1-15 */
4299 	/* set vm size, must be a multiple of 4 */
4300 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4301 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4302 	/* Assign the pt base to something valid for now; the pts used for
4303 	 * the VMs are determined by the application and setup and assigned
4304 	 * on the fly in the vm part of radeon_gart.c
4305 	 */
4306 	for (i = 1; i < 16; i++) {
4307 		if (i < 8)
4308 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4309 			       rdev->vm_manager.saved_table_addr[i]);
4310 		else
4311 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4312 			       rdev->vm_manager.saved_table_addr[i]);
4313 	}
4314 
4315 	/* enable context1-15 */
4316 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4317 	       (u32)(rdev->dummy_page.addr >> 12));
4318 	WREG32(VM_CONTEXT1_CNTL2, 4);
4319 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4320 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4321 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4322 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4323 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4324 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4325 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4326 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4327 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4328 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4329 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4330 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4331 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4332 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4333 
4334 	si_pcie_gart_tlb_flush(rdev);
4335 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4336 		 (unsigned)(rdev->mc.gtt_size >> 20),
4337 		 (unsigned long long)rdev->gart.table_addr);
4338 	rdev->gart.ready = true;
4339 	return 0;
4340 }
4341 
4342 static void si_pcie_gart_disable(struct radeon_device *rdev)
4343 {
4344 	unsigned i;
4345 
4346 	for (i = 1; i < 16; ++i) {
4347 		uint32_t reg;
4348 		if (i < 8)
4349 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4350 		else
4351 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4352 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4353 	}
4354 
4355 	/* Disable all tables */
4356 	WREG32(VM_CONTEXT0_CNTL, 0);
4357 	WREG32(VM_CONTEXT1_CNTL, 0);
4358 	/* Setup TLB control */
4359 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4360 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4361 	/* Setup L2 cache */
4362 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4363 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4364 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4365 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4366 	WREG32(VM_L2_CNTL2, 0);
4367 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4368 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4369 	radeon_gart_table_vram_unpin(rdev);
4370 }
4371 
4372 static void si_pcie_gart_fini(struct radeon_device *rdev)
4373 {
4374 	si_pcie_gart_disable(rdev);
4375 	radeon_gart_table_vram_free(rdev);
4376 	radeon_gart_fini(rdev);
4377 }
4378 
4379 /* vm parser */
4380 static bool si_vm_reg_valid(u32 reg)
4381 {
4382 	/* context regs are fine */
4383 	if (reg >= 0x28000)
4384 		return true;
4385 
4386 	/* shader regs are also fine */
4387 	if (reg >= 0xB000 && reg < 0xC000)
4388 		return true;
4389 
4390 	/* check config regs */
4391 	switch (reg) {
4392 	case GRBM_GFX_INDEX:
4393 	case CP_STRMOUT_CNTL:
4394 	case VGT_VTX_VECT_EJECT_REG:
4395 	case VGT_CACHE_INVALIDATION:
4396 	case VGT_ESGS_RING_SIZE:
4397 	case VGT_GSVS_RING_SIZE:
4398 	case VGT_GS_VERTEX_REUSE:
4399 	case VGT_PRIMITIVE_TYPE:
4400 	case VGT_INDEX_TYPE:
4401 	case VGT_NUM_INDICES:
4402 	case VGT_NUM_INSTANCES:
4403 	case VGT_TF_RING_SIZE:
4404 	case VGT_HS_OFFCHIP_PARAM:
4405 	case VGT_TF_MEMORY_BASE:
4406 	case PA_CL_ENHANCE:
4407 	case PA_SU_LINE_STIPPLE_VALUE:
4408 	case PA_SC_LINE_STIPPLE_STATE:
4409 	case PA_SC_ENHANCE:
4410 	case SQC_CACHES:
4411 	case SPI_STATIC_THREAD_MGMT_1:
4412 	case SPI_STATIC_THREAD_MGMT_2:
4413 	case SPI_STATIC_THREAD_MGMT_3:
4414 	case SPI_PS_MAX_WAVE_ID:
4415 	case SPI_CONFIG_CNTL:
4416 	case SPI_CONFIG_CNTL_1:
4417 	case TA_CNTL_AUX:
4418 	case TA_CS_BC_BASE_ADDR:
4419 		return true;
4420 	default:
4421 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4422 		return false;
4423 	}
4424 }
4425 
4426 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4427 				  u32 *ib, struct radeon_cs_packet *pkt)
4428 {
4429 	switch (pkt->opcode) {
4430 	case PACKET3_NOP:
4431 	case PACKET3_SET_BASE:
4432 	case PACKET3_SET_CE_DE_COUNTERS:
4433 	case PACKET3_LOAD_CONST_RAM:
4434 	case PACKET3_WRITE_CONST_RAM:
4435 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4436 	case PACKET3_DUMP_CONST_RAM:
4437 	case PACKET3_INCREMENT_CE_COUNTER:
4438 	case PACKET3_WAIT_ON_DE_COUNTER:
4439 	case PACKET3_CE_WRITE:
4440 		break;
4441 	default:
4442 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4443 		return -EINVAL;
4444 	}
4445 	return 0;
4446 }
4447 
4448 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4449 {
4450 	u32 start_reg, reg, i;
4451 	u32 command = ib[idx + 4];
4452 	u32 info = ib[idx + 1];
4453 	u32 idx_value = ib[idx];
4454 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4455 		/* src address space is register */
4456 		if (((info & 0x60000000) >> 29) == 0) {
4457 			start_reg = idx_value << 2;
4458 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4459 				reg = start_reg;
4460 				if (!si_vm_reg_valid(reg)) {
4461 					DRM_ERROR("CP DMA Bad SRC register\n");
4462 					return -EINVAL;
4463 				}
4464 			} else {
4465 				for (i = 0; i < (command & 0x1fffff); i++) {
4466 					reg = start_reg + (4 * i);
4467 					if (!si_vm_reg_valid(reg)) {
4468 						DRM_ERROR("CP DMA Bad SRC register\n");
4469 						return -EINVAL;
4470 					}
4471 				}
4472 			}
4473 		}
4474 	}
4475 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4476 		/* dst address space is register */
4477 		if (((info & 0x00300000) >> 20) == 0) {
4478 			start_reg = ib[idx + 2];
4479 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4480 				reg = start_reg;
4481 				if (!si_vm_reg_valid(reg)) {
4482 					DRM_ERROR("CP DMA Bad DST register\n");
4483 					return -EINVAL;
4484 				}
4485 			} else {
4486 				for (i = 0; i < (command & 0x1fffff); i++) {
4487 					reg = start_reg + (4 * i);
4488 				if (!si_vm_reg_valid(reg)) {
4489 						DRM_ERROR("CP DMA Bad DST register\n");
4490 						return -EINVAL;
4491 					}
4492 				}
4493 			}
4494 		}
4495 	}
4496 	return 0;
4497 }
4498 
4499 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4500 				   u32 *ib, struct radeon_cs_packet *pkt)
4501 {
4502 	int r;
4503 	u32 idx = pkt->idx + 1;
4504 	u32 idx_value = ib[idx];
4505 	u32 start_reg, end_reg, reg, i;
4506 
4507 	switch (pkt->opcode) {
4508 	case PACKET3_NOP:
4509 	case PACKET3_SET_BASE:
4510 	case PACKET3_CLEAR_STATE:
4511 	case PACKET3_INDEX_BUFFER_SIZE:
4512 	case PACKET3_DISPATCH_DIRECT:
4513 	case PACKET3_DISPATCH_INDIRECT:
4514 	case PACKET3_ALLOC_GDS:
4515 	case PACKET3_WRITE_GDS_RAM:
4516 	case PACKET3_ATOMIC_GDS:
4517 	case PACKET3_ATOMIC:
4518 	case PACKET3_OCCLUSION_QUERY:
4519 	case PACKET3_SET_PREDICATION:
4520 	case PACKET3_COND_EXEC:
4521 	case PACKET3_PRED_EXEC:
4522 	case PACKET3_DRAW_INDIRECT:
4523 	case PACKET3_DRAW_INDEX_INDIRECT:
4524 	case PACKET3_INDEX_BASE:
4525 	case PACKET3_DRAW_INDEX_2:
4526 	case PACKET3_CONTEXT_CONTROL:
4527 	case PACKET3_INDEX_TYPE:
4528 	case PACKET3_DRAW_INDIRECT_MULTI:
4529 	case PACKET3_DRAW_INDEX_AUTO:
4530 	case PACKET3_DRAW_INDEX_IMMD:
4531 	case PACKET3_NUM_INSTANCES:
4532 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4533 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4534 	case PACKET3_DRAW_INDEX_OFFSET_2:
4535 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4536 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4537 	case PACKET3_MPEG_INDEX:
4538 	case PACKET3_WAIT_REG_MEM:
4539 	case PACKET3_MEM_WRITE:
4540 	case PACKET3_PFP_SYNC_ME:
4541 	case PACKET3_SURFACE_SYNC:
4542 	case PACKET3_EVENT_WRITE:
4543 	case PACKET3_EVENT_WRITE_EOP:
4544 	case PACKET3_EVENT_WRITE_EOS:
4545 	case PACKET3_SET_CONTEXT_REG:
4546 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4547 	case PACKET3_SET_SH_REG:
4548 	case PACKET3_SET_SH_REG_OFFSET:
4549 	case PACKET3_INCREMENT_DE_COUNTER:
4550 	case PACKET3_WAIT_ON_CE_COUNTER:
4551 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4552 	case PACKET3_ME_WRITE:
4553 		break;
4554 	case PACKET3_COPY_DATA:
4555 		if ((idx_value & 0xf00) == 0) {
4556 			reg = ib[idx + 3] * 4;
4557 			if (!si_vm_reg_valid(reg))
4558 				return -EINVAL;
4559 		}
4560 		break;
4561 	case PACKET3_WRITE_DATA:
4562 		if ((idx_value & 0xf00) == 0) {
4563 			start_reg = ib[idx + 1] * 4;
4564 			if (idx_value & 0x10000) {
4565 				if (!si_vm_reg_valid(start_reg))
4566 					return -EINVAL;
4567 			} else {
4568 				for (i = 0; i < (pkt->count - 2); i++) {
4569 					reg = start_reg + (4 * i);
4570 					if (!si_vm_reg_valid(reg))
4571 						return -EINVAL;
4572 				}
4573 			}
4574 		}
4575 		break;
4576 	case PACKET3_COND_WRITE:
4577 		if (idx_value & 0x100) {
4578 			reg = ib[idx + 5] * 4;
4579 			if (!si_vm_reg_valid(reg))
4580 				return -EINVAL;
4581 		}
4582 		break;
4583 	case PACKET3_COPY_DW:
4584 		if (idx_value & 0x2) {
4585 			reg = ib[idx + 3] * 4;
4586 			if (!si_vm_reg_valid(reg))
4587 				return -EINVAL;
4588 		}
4589 		break;
4590 	case PACKET3_SET_CONFIG_REG:
4591 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4592 		end_reg = 4 * pkt->count + start_reg - 4;
4593 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4594 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4595 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4596 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4597 			return -EINVAL;
4598 		}
4599 		for (i = 0; i < pkt->count; i++) {
4600 			reg = start_reg + (4 * i);
4601 			if (!si_vm_reg_valid(reg))
4602 				return -EINVAL;
4603 		}
4604 		break;
4605 	case PACKET3_CP_DMA:
4606 		r = si_vm_packet3_cp_dma_check(ib, idx);
4607 		if (r)
4608 			return r;
4609 		break;
4610 	default:
4611 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4612 		return -EINVAL;
4613 	}
4614 	return 0;
4615 }
4616 
4617 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4618 				       u32 *ib, struct radeon_cs_packet *pkt)
4619 {
4620 	int r;
4621 	u32 idx = pkt->idx + 1;
4622 	u32 idx_value = ib[idx];
4623 	u32 start_reg, reg, i;
4624 
4625 	switch (pkt->opcode) {
4626 	case PACKET3_NOP:
4627 	case PACKET3_SET_BASE:
4628 	case PACKET3_CLEAR_STATE:
4629 	case PACKET3_DISPATCH_DIRECT:
4630 	case PACKET3_DISPATCH_INDIRECT:
4631 	case PACKET3_ALLOC_GDS:
4632 	case PACKET3_WRITE_GDS_RAM:
4633 	case PACKET3_ATOMIC_GDS:
4634 	case PACKET3_ATOMIC:
4635 	case PACKET3_OCCLUSION_QUERY:
4636 	case PACKET3_SET_PREDICATION:
4637 	case PACKET3_COND_EXEC:
4638 	case PACKET3_PRED_EXEC:
4639 	case PACKET3_CONTEXT_CONTROL:
4640 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4641 	case PACKET3_WAIT_REG_MEM:
4642 	case PACKET3_MEM_WRITE:
4643 	case PACKET3_PFP_SYNC_ME:
4644 	case PACKET3_SURFACE_SYNC:
4645 	case PACKET3_EVENT_WRITE:
4646 	case PACKET3_EVENT_WRITE_EOP:
4647 	case PACKET3_EVENT_WRITE_EOS:
4648 	case PACKET3_SET_CONTEXT_REG:
4649 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4650 	case PACKET3_SET_SH_REG:
4651 	case PACKET3_SET_SH_REG_OFFSET:
4652 	case PACKET3_INCREMENT_DE_COUNTER:
4653 	case PACKET3_WAIT_ON_CE_COUNTER:
4654 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4655 	case PACKET3_ME_WRITE:
4656 		break;
4657 	case PACKET3_COPY_DATA:
4658 		if ((idx_value & 0xf00) == 0) {
4659 			reg = ib[idx + 3] * 4;
4660 			if (!si_vm_reg_valid(reg))
4661 				return -EINVAL;
4662 		}
4663 		break;
4664 	case PACKET3_WRITE_DATA:
4665 		if ((idx_value & 0xf00) == 0) {
4666 			start_reg = ib[idx + 1] * 4;
4667 			if (idx_value & 0x10000) {
4668 				if (!si_vm_reg_valid(start_reg))
4669 					return -EINVAL;
4670 			} else {
4671 				for (i = 0; i < (pkt->count - 2); i++) {
4672 					reg = start_reg + (4 * i);
4673 					if (!si_vm_reg_valid(reg))
4674 						return -EINVAL;
4675 				}
4676 			}
4677 		}
4678 		break;
4679 	case PACKET3_COND_WRITE:
4680 		if (idx_value & 0x100) {
4681 			reg = ib[idx + 5] * 4;
4682 			if (!si_vm_reg_valid(reg))
4683 				return -EINVAL;
4684 		}
4685 		break;
4686 	case PACKET3_COPY_DW:
4687 		if (idx_value & 0x2) {
4688 			reg = ib[idx + 3] * 4;
4689 			if (!si_vm_reg_valid(reg))
4690 				return -EINVAL;
4691 		}
4692 		break;
4693 	case PACKET3_CP_DMA:
4694 		r = si_vm_packet3_cp_dma_check(ib, idx);
4695 		if (r)
4696 			return r;
4697 		break;
4698 	default:
4699 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4700 		return -EINVAL;
4701 	}
4702 	return 0;
4703 }
4704 
4705 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4706 {
4707 	int ret = 0;
4708 	u32 idx = 0, i;
4709 	struct radeon_cs_packet pkt;
4710 
4711 	do {
4712 		pkt.idx = idx;
4713 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4714 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4715 		pkt.one_reg_wr = 0;
4716 		switch (pkt.type) {
4717 		case RADEON_PACKET_TYPE0:
4718 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4719 			ret = -EINVAL;
4720 			break;
4721 		case RADEON_PACKET_TYPE2:
4722 			idx += 1;
4723 			break;
4724 		case RADEON_PACKET_TYPE3:
4725 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4726 			if (ib->is_const_ib)
4727 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4728 			else {
4729 				switch (ib->ring) {
4730 				case RADEON_RING_TYPE_GFX_INDEX:
4731 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4732 					break;
4733 				case CAYMAN_RING_TYPE_CP1_INDEX:
4734 				case CAYMAN_RING_TYPE_CP2_INDEX:
4735 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4736 					break;
4737 				default:
4738 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4739 					ret = -EINVAL;
4740 					break;
4741 				}
4742 			}
4743 			idx += pkt.count + 2;
4744 			break;
4745 		default:
4746 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4747 			ret = -EINVAL;
4748 			break;
4749 		}
4750 		if (ret) {
4751 			for (i = 0; i < ib->length_dw; i++) {
4752 				if (i == idx)
4753 					printk("\t0x%08x <---\n", ib->ptr[i]);
4754 				else
4755 					printk("\t0x%08x\n", ib->ptr[i]);
4756 			}
4757 			break;
4758 		}
4759 	} while (idx < ib->length_dw);
4760 
4761 	return ret;
4762 }
4763 
4764 /*
4765  * vm
4766  */
4767 int si_vm_init(struct radeon_device *rdev)
4768 {
4769 	/* number of VMs */
4770 	rdev->vm_manager.nvm = 16;
4771 	/* base offset of vram pages */
4772 	rdev->vm_manager.vram_base_offset = 0;
4773 
4774 	return 0;
4775 }
4776 
4777 void si_vm_fini(struct radeon_device *rdev)
4778 {
4779 }
4780 
4781 /**
4782  * si_vm_decode_fault - print human readable fault info
4783  *
4784  * @rdev: radeon_device pointer
4785  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4786  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4787  *
4788  * Print human readable fault information (SI).
4789  */
4790 static void si_vm_decode_fault(struct radeon_device *rdev,
4791 			       u32 status, u32 addr)
4792 {
4793 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4794 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4795 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4796 	char *block;
4797 
4798 	if (rdev->family == CHIP_TAHITI) {
4799 		switch (mc_id) {
4800 		case 160:
4801 		case 144:
4802 		case 96:
4803 		case 80:
4804 		case 224:
4805 		case 208:
4806 		case 32:
4807 		case 16:
4808 			block = "CB";
4809 			break;
4810 		case 161:
4811 		case 145:
4812 		case 97:
4813 		case 81:
4814 		case 225:
4815 		case 209:
4816 		case 33:
4817 		case 17:
4818 			block = "CB_FMASK";
4819 			break;
4820 		case 162:
4821 		case 146:
4822 		case 98:
4823 		case 82:
4824 		case 226:
4825 		case 210:
4826 		case 34:
4827 		case 18:
4828 			block = "CB_CMASK";
4829 			break;
4830 		case 163:
4831 		case 147:
4832 		case 99:
4833 		case 83:
4834 		case 227:
4835 		case 211:
4836 		case 35:
4837 		case 19:
4838 			block = "CB_IMMED";
4839 			break;
4840 		case 164:
4841 		case 148:
4842 		case 100:
4843 		case 84:
4844 		case 228:
4845 		case 212:
4846 		case 36:
4847 		case 20:
4848 			block = "DB";
4849 			break;
4850 		case 165:
4851 		case 149:
4852 		case 101:
4853 		case 85:
4854 		case 229:
4855 		case 213:
4856 		case 37:
4857 		case 21:
4858 			block = "DB_HTILE";
4859 			break;
4860 		case 167:
4861 		case 151:
4862 		case 103:
4863 		case 87:
4864 		case 231:
4865 		case 215:
4866 		case 39:
4867 		case 23:
4868 			block = "DB_STEN";
4869 			break;
4870 		case 72:
4871 		case 68:
4872 		case 64:
4873 		case 8:
4874 		case 4:
4875 		case 0:
4876 		case 136:
4877 		case 132:
4878 		case 128:
4879 		case 200:
4880 		case 196:
4881 		case 192:
4882 			block = "TC";
4883 			break;
4884 		case 112:
4885 		case 48:
4886 			block = "CP";
4887 			break;
4888 		case 49:
4889 		case 177:
4890 		case 50:
4891 		case 178:
4892 			block = "SH";
4893 			break;
4894 		case 53:
4895 		case 190:
4896 			block = "VGT";
4897 			break;
4898 		case 117:
4899 			block = "IH";
4900 			break;
4901 		case 51:
4902 		case 115:
4903 			block = "RLC";
4904 			break;
4905 		case 119:
4906 		case 183:
4907 			block = "DMA0";
4908 			break;
4909 		case 61:
4910 			block = "DMA1";
4911 			break;
4912 		case 248:
4913 		case 120:
4914 			block = "HDP";
4915 			break;
4916 		default:
4917 			block = "unknown";
4918 			break;
4919 		}
4920 	} else {
4921 		switch (mc_id) {
4922 		case 32:
4923 		case 16:
4924 		case 96:
4925 		case 80:
4926 		case 160:
4927 		case 144:
4928 		case 224:
4929 		case 208:
4930 			block = "CB";
4931 			break;
4932 		case 33:
4933 		case 17:
4934 		case 97:
4935 		case 81:
4936 		case 161:
4937 		case 145:
4938 		case 225:
4939 		case 209:
4940 			block = "CB_FMASK";
4941 			break;
4942 		case 34:
4943 		case 18:
4944 		case 98:
4945 		case 82:
4946 		case 162:
4947 		case 146:
4948 		case 226:
4949 		case 210:
4950 			block = "CB_CMASK";
4951 			break;
4952 		case 35:
4953 		case 19:
4954 		case 99:
4955 		case 83:
4956 		case 163:
4957 		case 147:
4958 		case 227:
4959 		case 211:
4960 			block = "CB_IMMED";
4961 			break;
4962 		case 36:
4963 		case 20:
4964 		case 100:
4965 		case 84:
4966 		case 164:
4967 		case 148:
4968 		case 228:
4969 		case 212:
4970 			block = "DB";
4971 			break;
4972 		case 37:
4973 		case 21:
4974 		case 101:
4975 		case 85:
4976 		case 165:
4977 		case 149:
4978 		case 229:
4979 		case 213:
4980 			block = "DB_HTILE";
4981 			break;
4982 		case 39:
4983 		case 23:
4984 		case 103:
4985 		case 87:
4986 		case 167:
4987 		case 151:
4988 		case 231:
4989 		case 215:
4990 			block = "DB_STEN";
4991 			break;
4992 		case 72:
4993 		case 68:
4994 		case 8:
4995 		case 4:
4996 		case 136:
4997 		case 132:
4998 		case 200:
4999 		case 196:
5000 			block = "TC";
5001 			break;
5002 		case 112:
5003 		case 48:
5004 			block = "CP";
5005 			break;
5006 		case 49:
5007 		case 177:
5008 		case 50:
5009 		case 178:
5010 			block = "SH";
5011 			break;
5012 		case 53:
5013 			block = "VGT";
5014 			break;
5015 		case 117:
5016 			block = "IH";
5017 			break;
5018 		case 51:
5019 		case 115:
5020 			block = "RLC";
5021 			break;
5022 		case 119:
5023 		case 183:
5024 			block = "DMA0";
5025 			break;
5026 		case 61:
5027 			block = "DMA1";
5028 			break;
5029 		case 248:
5030 		case 120:
5031 			block = "HDP";
5032 			break;
5033 		default:
5034 			block = "unknown";
5035 			break;
5036 		}
5037 	}
5038 
5039 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5040 	       protections, vmid, addr,
5041 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5042 	       block, mc_id);
5043 }
5044 
5045 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5046 		 unsigned vm_id, uint64_t pd_addr)
5047 {
5048 	/* write new base address */
5049 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5050 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5051 				 WRITE_DATA_DST_SEL(0)));
5052 
5053 	if (vm_id < 8) {
5054 		radeon_ring_write(ring,
5055 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5056 	} else {
5057 		radeon_ring_write(ring,
5058 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5059 	}
5060 	radeon_ring_write(ring, 0);
5061 	radeon_ring_write(ring, pd_addr >> 12);
5062 
5063 	/* flush hdp cache */
5064 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5065 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5066 				 WRITE_DATA_DST_SEL(0)));
5067 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5068 	radeon_ring_write(ring, 0);
5069 	radeon_ring_write(ring, 0x1);
5070 
5071 	/* bits 0-15 are the VM contexts0-15 */
5072 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5073 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5074 				 WRITE_DATA_DST_SEL(0)));
5075 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5076 	radeon_ring_write(ring, 0);
5077 	radeon_ring_write(ring, 1 << vm_id);
5078 
5079 	/* wait for the invalidate to complete */
5080 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5081 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5082 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5083 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5084 	radeon_ring_write(ring, 0);
5085 	radeon_ring_write(ring, 0); /* ref */
5086 	radeon_ring_write(ring, 0); /* mask */
5087 	radeon_ring_write(ring, 0x20); /* poll interval */
5088 
5089 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5090 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5091 	radeon_ring_write(ring, 0x0);
5092 }
5093 
5094 /*
5095  *  Power and clock gating
5096  */
5097 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5098 {
5099 	int i;
5100 
5101 	for (i = 0; i < rdev->usec_timeout; i++) {
5102 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5103 			break;
5104 		udelay(1);
5105 	}
5106 
5107 	for (i = 0; i < rdev->usec_timeout; i++) {
5108 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5109 			break;
5110 		udelay(1);
5111 	}
5112 }
5113 
5114 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5115 					 bool enable)
5116 {
5117 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5118 	u32 mask;
5119 	int i;
5120 
5121 	if (enable)
5122 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5123 	else
5124 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5125 	WREG32(CP_INT_CNTL_RING0, tmp);
5126 
5127 	if (!enable) {
5128 		/* read a gfx register */
5129 		tmp = RREG32(DB_DEPTH_INFO);
5130 
5131 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5132 		for (i = 0; i < rdev->usec_timeout; i++) {
5133 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5134 				break;
5135 			udelay(1);
5136 		}
5137 	}
5138 }
5139 
5140 static void si_set_uvd_dcm(struct radeon_device *rdev,
5141 			   bool sw_mode)
5142 {
5143 	u32 tmp, tmp2;
5144 
5145 	tmp = RREG32(UVD_CGC_CTRL);
5146 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5147 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5148 
5149 	if (sw_mode) {
5150 		tmp &= ~0x7ffff800;
5151 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5152 	} else {
5153 		tmp |= 0x7ffff800;
5154 		tmp2 = 0;
5155 	}
5156 
5157 	WREG32(UVD_CGC_CTRL, tmp);
5158 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5159 }
5160 
5161 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5162 {
5163 	bool hw_mode = true;
5164 
5165 	if (hw_mode) {
5166 		si_set_uvd_dcm(rdev, false);
5167 	} else {
5168 		u32 tmp = RREG32(UVD_CGC_CTRL);
5169 		tmp &= ~DCM;
5170 		WREG32(UVD_CGC_CTRL, tmp);
5171 	}
5172 }
5173 
5174 static u32 si_halt_rlc(struct radeon_device *rdev)
5175 {
5176 	u32 data, orig;
5177 
5178 	orig = data = RREG32(RLC_CNTL);
5179 
5180 	if (data & RLC_ENABLE) {
5181 		data &= ~RLC_ENABLE;
5182 		WREG32(RLC_CNTL, data);
5183 
5184 		si_wait_for_rlc_serdes(rdev);
5185 	}
5186 
5187 	return orig;
5188 }
5189 
5190 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5191 {
5192 	u32 tmp;
5193 
5194 	tmp = RREG32(RLC_CNTL);
5195 	if (tmp != rlc)
5196 		WREG32(RLC_CNTL, rlc);
5197 }
5198 
5199 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5200 {
5201 	u32 data, orig;
5202 
5203 	orig = data = RREG32(DMA_PG);
5204 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5205 		data |= PG_CNTL_ENABLE;
5206 	else
5207 		data &= ~PG_CNTL_ENABLE;
5208 	if (orig != data)
5209 		WREG32(DMA_PG, data);
5210 }
5211 
5212 static void si_init_dma_pg(struct radeon_device *rdev)
5213 {
5214 	u32 tmp;
5215 
5216 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5217 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5218 
5219 	for (tmp = 0; tmp < 5; tmp++)
5220 		WREG32(DMA_PGFSM_WRITE, 0);
5221 }
5222 
5223 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5224 			       bool enable)
5225 {
5226 	u32 tmp;
5227 
5228 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5229 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5230 		WREG32(RLC_TTOP_D, tmp);
5231 
5232 		tmp = RREG32(RLC_PG_CNTL);
5233 		tmp |= GFX_PG_ENABLE;
5234 		WREG32(RLC_PG_CNTL, tmp);
5235 
5236 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5237 		tmp |= AUTO_PG_EN;
5238 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5239 	} else {
5240 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5241 		tmp &= ~AUTO_PG_EN;
5242 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5243 
5244 		tmp = RREG32(DB_RENDER_CONTROL);
5245 	}
5246 }
5247 
5248 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5249 {
5250 	u32 tmp;
5251 
5252 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5253 
5254 	tmp = RREG32(RLC_PG_CNTL);
5255 	tmp |= GFX_PG_SRC;
5256 	WREG32(RLC_PG_CNTL, tmp);
5257 
5258 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5259 
5260 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5261 
5262 	tmp &= ~GRBM_REG_SGIT_MASK;
5263 	tmp |= GRBM_REG_SGIT(0x700);
5264 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5265 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5266 }
5267 
5268 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5269 {
5270 	u32 mask = 0, tmp, tmp1;
5271 	int i;
5272 
5273 	si_select_se_sh(rdev, se, sh);
5274 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5275 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5276 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5277 
5278 	tmp &= 0xffff0000;
5279 
5280 	tmp |= tmp1;
5281 	tmp >>= 16;
5282 
5283 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5284 		mask <<= 1;
5285 		mask |= 1;
5286 	}
5287 
5288 	return (~tmp) & mask;
5289 }
5290 
5291 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5292 {
5293 	u32 i, j, k, active_cu_number = 0;
5294 	u32 mask, counter, cu_bitmap;
5295 	u32 tmp = 0;
5296 
5297 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5298 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5299 			mask = 1;
5300 			cu_bitmap = 0;
5301 			counter  = 0;
5302 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5303 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5304 					if (counter < 2)
5305 						cu_bitmap |= mask;
5306 					counter++;
5307 				}
5308 				mask <<= 1;
5309 			}
5310 
5311 			active_cu_number += counter;
5312 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5313 		}
5314 	}
5315 
5316 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5317 
5318 	tmp = RREG32(RLC_MAX_PG_CU);
5319 	tmp &= ~MAX_PU_CU_MASK;
5320 	tmp |= MAX_PU_CU(active_cu_number);
5321 	WREG32(RLC_MAX_PG_CU, tmp);
5322 }
5323 
5324 static void si_enable_cgcg(struct radeon_device *rdev,
5325 			   bool enable)
5326 {
5327 	u32 data, orig, tmp;
5328 
5329 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5330 
5331 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5332 		si_enable_gui_idle_interrupt(rdev, true);
5333 
5334 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5335 
5336 		tmp = si_halt_rlc(rdev);
5337 
5338 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5339 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5340 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5341 
5342 		si_wait_for_rlc_serdes(rdev);
5343 
5344 		si_update_rlc(rdev, tmp);
5345 
5346 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5347 
5348 		data |= CGCG_EN | CGLS_EN;
5349 	} else {
5350 		si_enable_gui_idle_interrupt(rdev, false);
5351 
5352 		RREG32(CB_CGTT_SCLK_CTRL);
5353 		RREG32(CB_CGTT_SCLK_CTRL);
5354 		RREG32(CB_CGTT_SCLK_CTRL);
5355 		RREG32(CB_CGTT_SCLK_CTRL);
5356 
5357 		data &= ~(CGCG_EN | CGLS_EN);
5358 	}
5359 
5360 	if (orig != data)
5361 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5362 }
5363 
5364 static void si_enable_mgcg(struct radeon_device *rdev,
5365 			   bool enable)
5366 {
5367 	u32 data, orig, tmp = 0;
5368 
5369 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5370 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5371 		data = 0x96940200;
5372 		if (orig != data)
5373 			WREG32(CGTS_SM_CTRL_REG, data);
5374 
5375 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5376 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5377 			data |= CP_MEM_LS_EN;
5378 			if (orig != data)
5379 				WREG32(CP_MEM_SLP_CNTL, data);
5380 		}
5381 
5382 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5383 		data &= 0xffffffc0;
5384 		if (orig != data)
5385 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5386 
5387 		tmp = si_halt_rlc(rdev);
5388 
5389 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5390 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5391 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5392 
5393 		si_update_rlc(rdev, tmp);
5394 	} else {
5395 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5396 		data |= 0x00000003;
5397 		if (orig != data)
5398 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5399 
5400 		data = RREG32(CP_MEM_SLP_CNTL);
5401 		if (data & CP_MEM_LS_EN) {
5402 			data &= ~CP_MEM_LS_EN;
5403 			WREG32(CP_MEM_SLP_CNTL, data);
5404 		}
5405 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5406 		data |= LS_OVERRIDE | OVERRIDE;
5407 		if (orig != data)
5408 			WREG32(CGTS_SM_CTRL_REG, data);
5409 
5410 		tmp = si_halt_rlc(rdev);
5411 
5412 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5413 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5414 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5415 
5416 		si_update_rlc(rdev, tmp);
5417 	}
5418 }
5419 
5420 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5421 			       bool enable)
5422 {
5423 	u32 orig, data, tmp;
5424 
5425 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5426 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5427 		tmp |= 0x3fff;
5428 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5429 
5430 		orig = data = RREG32(UVD_CGC_CTRL);
5431 		data |= DCM;
5432 		if (orig != data)
5433 			WREG32(UVD_CGC_CTRL, data);
5434 
5435 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5436 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5437 	} else {
5438 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5439 		tmp &= ~0x3fff;
5440 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5441 
5442 		orig = data = RREG32(UVD_CGC_CTRL);
5443 		data &= ~DCM;
5444 		if (orig != data)
5445 			WREG32(UVD_CGC_CTRL, data);
5446 
5447 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5448 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5449 	}
5450 }
5451 
5452 static const u32 mc_cg_registers[] =
5453 {
5454 	MC_HUB_MISC_HUB_CG,
5455 	MC_HUB_MISC_SIP_CG,
5456 	MC_HUB_MISC_VM_CG,
5457 	MC_XPB_CLK_GAT,
5458 	ATC_MISC_CG,
5459 	MC_CITF_MISC_WR_CG,
5460 	MC_CITF_MISC_RD_CG,
5461 	MC_CITF_MISC_VM_CG,
5462 	VM_L2_CG,
5463 };
5464 
5465 static void si_enable_mc_ls(struct radeon_device *rdev,
5466 			    bool enable)
5467 {
5468 	int i;
5469 	u32 orig, data;
5470 
5471 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5472 		orig = data = RREG32(mc_cg_registers[i]);
5473 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5474 			data |= MC_LS_ENABLE;
5475 		else
5476 			data &= ~MC_LS_ENABLE;
5477 		if (data != orig)
5478 			WREG32(mc_cg_registers[i], data);
5479 	}
5480 }
5481 
5482 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5483 			       bool enable)
5484 {
5485 	int i;
5486 	u32 orig, data;
5487 
5488 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5489 		orig = data = RREG32(mc_cg_registers[i]);
5490 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5491 			data |= MC_CG_ENABLE;
5492 		else
5493 			data &= ~MC_CG_ENABLE;
5494 		if (data != orig)
5495 			WREG32(mc_cg_registers[i], data);
5496 	}
5497 }
5498 
5499 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5500 			       bool enable)
5501 {
5502 	u32 orig, data, offset;
5503 	int i;
5504 
5505 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5506 		for (i = 0; i < 2; i++) {
5507 			if (i == 0)
5508 				offset = DMA0_REGISTER_OFFSET;
5509 			else
5510 				offset = DMA1_REGISTER_OFFSET;
5511 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5512 			data &= ~MEM_POWER_OVERRIDE;
5513 			if (data != orig)
5514 				WREG32(DMA_POWER_CNTL + offset, data);
5515 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5516 		}
5517 	} else {
5518 		for (i = 0; i < 2; i++) {
5519 			if (i == 0)
5520 				offset = DMA0_REGISTER_OFFSET;
5521 			else
5522 				offset = DMA1_REGISTER_OFFSET;
5523 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5524 			data |= MEM_POWER_OVERRIDE;
5525 			if (data != orig)
5526 				WREG32(DMA_POWER_CNTL + offset, data);
5527 
5528 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5529 			data = 0xff000000;
5530 			if (data != orig)
5531 				WREG32(DMA_CLK_CTRL + offset, data);
5532 		}
5533 	}
5534 }
5535 
5536 static void si_enable_bif_mgls(struct radeon_device *rdev,
5537 			       bool enable)
5538 {
5539 	u32 orig, data;
5540 
5541 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5542 
5543 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5544 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5545 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5546 	else
5547 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5548 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5549 
5550 	if (orig != data)
5551 		WREG32_PCIE(PCIE_CNTL2, data);
5552 }
5553 
5554 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5555 			       bool enable)
5556 {
5557 	u32 orig, data;
5558 
5559 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5560 
5561 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5562 		data &= ~CLOCK_GATING_DIS;
5563 	else
5564 		data |= CLOCK_GATING_DIS;
5565 
5566 	if (orig != data)
5567 		WREG32(HDP_HOST_PATH_CNTL, data);
5568 }
5569 
5570 static void si_enable_hdp_ls(struct radeon_device *rdev,
5571 			     bool enable)
5572 {
5573 	u32 orig, data;
5574 
5575 	orig = data = RREG32(HDP_MEM_POWER_LS);
5576 
5577 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5578 		data |= HDP_LS_ENABLE;
5579 	else
5580 		data &= ~HDP_LS_ENABLE;
5581 
5582 	if (orig != data)
5583 		WREG32(HDP_MEM_POWER_LS, data);
5584 }
5585 
5586 static void si_update_cg(struct radeon_device *rdev,
5587 			 u32 block, bool enable)
5588 {
5589 	if (block & RADEON_CG_BLOCK_GFX) {
5590 		si_enable_gui_idle_interrupt(rdev, false);
5591 		/* order matters! */
5592 		if (enable) {
5593 			si_enable_mgcg(rdev, true);
5594 			si_enable_cgcg(rdev, true);
5595 		} else {
5596 			si_enable_cgcg(rdev, false);
5597 			si_enable_mgcg(rdev, false);
5598 		}
5599 		si_enable_gui_idle_interrupt(rdev, true);
5600 	}
5601 
5602 	if (block & RADEON_CG_BLOCK_MC) {
5603 		si_enable_mc_mgcg(rdev, enable);
5604 		si_enable_mc_ls(rdev, enable);
5605 	}
5606 
5607 	if (block & RADEON_CG_BLOCK_SDMA) {
5608 		si_enable_dma_mgcg(rdev, enable);
5609 	}
5610 
5611 	if (block & RADEON_CG_BLOCK_BIF) {
5612 		si_enable_bif_mgls(rdev, enable);
5613 	}
5614 
5615 	if (block & RADEON_CG_BLOCK_UVD) {
5616 		if (rdev->has_uvd) {
5617 			si_enable_uvd_mgcg(rdev, enable);
5618 		}
5619 	}
5620 
5621 	if (block & RADEON_CG_BLOCK_HDP) {
5622 		si_enable_hdp_mgcg(rdev, enable);
5623 		si_enable_hdp_ls(rdev, enable);
5624 	}
5625 }
5626 
5627 static void si_init_cg(struct radeon_device *rdev)
5628 {
5629 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5630 			    RADEON_CG_BLOCK_MC |
5631 			    RADEON_CG_BLOCK_SDMA |
5632 			    RADEON_CG_BLOCK_BIF |
5633 			    RADEON_CG_BLOCK_HDP), true);
5634 	if (rdev->has_uvd) {
5635 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5636 		si_init_uvd_internal_cg(rdev);
5637 	}
5638 }
5639 
5640 static void si_fini_cg(struct radeon_device *rdev)
5641 {
5642 	if (rdev->has_uvd) {
5643 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5644 	}
5645 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5646 			    RADEON_CG_BLOCK_MC |
5647 			    RADEON_CG_BLOCK_SDMA |
5648 			    RADEON_CG_BLOCK_BIF |
5649 			    RADEON_CG_BLOCK_HDP), false);
5650 }
5651 
5652 u32 si_get_csb_size(struct radeon_device *rdev)
5653 {
5654 	u32 count = 0;
5655 	const struct cs_section_def *sect = NULL;
5656 	const struct cs_extent_def *ext = NULL;
5657 
5658 	if (rdev->rlc.cs_data == NULL)
5659 		return 0;
5660 
5661 	/* begin clear state */
5662 	count += 2;
5663 	/* context control state */
5664 	count += 3;
5665 
5666 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5667 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5668 			if (sect->id == SECT_CONTEXT)
5669 				count += 2 + ext->reg_count;
5670 			else
5671 				return 0;
5672 		}
5673 	}
5674 	/* pa_sc_raster_config */
5675 	count += 3;
5676 	/* end clear state */
5677 	count += 2;
5678 	/* clear state */
5679 	count += 2;
5680 
5681 	return count;
5682 }
5683 
5684 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5685 {
5686 	u32 count = 0, i;
5687 	const struct cs_section_def *sect = NULL;
5688 	const struct cs_extent_def *ext = NULL;
5689 
5690 	if (rdev->rlc.cs_data == NULL)
5691 		return;
5692 	if (buffer == NULL)
5693 		return;
5694 
5695 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5696 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5697 
5698 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5699 	buffer[count++] = cpu_to_le32(0x80000000);
5700 	buffer[count++] = cpu_to_le32(0x80000000);
5701 
5702 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5703 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5704 			if (sect->id == SECT_CONTEXT) {
5705 				buffer[count++] =
5706 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5707 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5708 				for (i = 0; i < ext->reg_count; i++)
5709 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5710 			} else {
5711 				return;
5712 			}
5713 		}
5714 	}
5715 
5716 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5717 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5718 	switch (rdev->family) {
5719 	case CHIP_TAHITI:
5720 	case CHIP_PITCAIRN:
5721 		buffer[count++] = cpu_to_le32(0x2a00126a);
5722 		break;
5723 	case CHIP_VERDE:
5724 		buffer[count++] = cpu_to_le32(0x0000124a);
5725 		break;
5726 	case CHIP_OLAND:
5727 		buffer[count++] = cpu_to_le32(0x00000082);
5728 		break;
5729 	case CHIP_HAINAN:
5730 		buffer[count++] = cpu_to_le32(0x00000000);
5731 		break;
5732 	default:
5733 		buffer[count++] = cpu_to_le32(0x00000000);
5734 		break;
5735 	}
5736 
5737 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5738 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5739 
5740 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5741 	buffer[count++] = cpu_to_le32(0);
5742 }
5743 
5744 static void si_init_pg(struct radeon_device *rdev)
5745 {
5746 	if (rdev->pg_flags) {
5747 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5748 			si_init_dma_pg(rdev);
5749 		}
5750 		si_init_ao_cu_mask(rdev);
5751 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5752 			si_init_gfx_cgpg(rdev);
5753 		} else {
5754 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5755 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5756 		}
5757 		si_enable_dma_pg(rdev, true);
5758 		si_enable_gfx_cgpg(rdev, true);
5759 	} else {
5760 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5761 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5762 	}
5763 }
5764 
5765 static void si_fini_pg(struct radeon_device *rdev)
5766 {
5767 	if (rdev->pg_flags) {
5768 		si_enable_dma_pg(rdev, false);
5769 		si_enable_gfx_cgpg(rdev, false);
5770 	}
5771 }
5772 
5773 /*
5774  * RLC
5775  */
5776 void si_rlc_reset(struct radeon_device *rdev)
5777 {
5778 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5779 
5780 	tmp |= SOFT_RESET_RLC;
5781 	WREG32(GRBM_SOFT_RESET, tmp);
5782 	udelay(50);
5783 	tmp &= ~SOFT_RESET_RLC;
5784 	WREG32(GRBM_SOFT_RESET, tmp);
5785 	udelay(50);
5786 }
5787 
5788 static void si_rlc_stop(struct radeon_device *rdev)
5789 {
5790 	WREG32(RLC_CNTL, 0);
5791 
5792 	si_enable_gui_idle_interrupt(rdev, false);
5793 
5794 	si_wait_for_rlc_serdes(rdev);
5795 }
5796 
5797 static void si_rlc_start(struct radeon_device *rdev)
5798 {
5799 	WREG32(RLC_CNTL, RLC_ENABLE);
5800 
5801 	si_enable_gui_idle_interrupt(rdev, true);
5802 
5803 	udelay(50);
5804 }
5805 
5806 static bool si_lbpw_supported(struct radeon_device *rdev)
5807 {
5808 	u32 tmp;
5809 
5810 	/* Enable LBPW only for DDR3 */
5811 	tmp = RREG32(MC_SEQ_MISC0);
5812 	if ((tmp & 0xF0000000) == 0xB0000000)
5813 		return true;
5814 	return false;
5815 }
5816 
5817 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5818 {
5819 	u32 tmp;
5820 
5821 	tmp = RREG32(RLC_LB_CNTL);
5822 	if (enable)
5823 		tmp |= LOAD_BALANCE_ENABLE;
5824 	else
5825 		tmp &= ~LOAD_BALANCE_ENABLE;
5826 	WREG32(RLC_LB_CNTL, tmp);
5827 
5828 	if (!enable) {
5829 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5830 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5831 	}
5832 }
5833 
5834 static int si_rlc_resume(struct radeon_device *rdev)
5835 {
5836 	u32 i;
5837 
5838 	if (!rdev->rlc_fw)
5839 		return -EINVAL;
5840 
5841 	si_rlc_stop(rdev);
5842 
5843 	si_rlc_reset(rdev);
5844 
5845 	si_init_pg(rdev);
5846 
5847 	si_init_cg(rdev);
5848 
5849 	WREG32(RLC_RL_BASE, 0);
5850 	WREG32(RLC_RL_SIZE, 0);
5851 	WREG32(RLC_LB_CNTL, 0);
5852 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5853 	WREG32(RLC_LB_CNTR_INIT, 0);
5854 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5855 
5856 	WREG32(RLC_MC_CNTL, 0);
5857 	WREG32(RLC_UCODE_CNTL, 0);
5858 
5859 	if (rdev->new_fw) {
5860 		const struct rlc_firmware_header_v1_0 *hdr =
5861 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5862 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5863 		const __le32 *fw_data = (const __le32 *)
5864 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5865 
5866 		radeon_ucode_print_rlc_hdr(&hdr->header);
5867 
5868 		for (i = 0; i < fw_size; i++) {
5869 			WREG32(RLC_UCODE_ADDR, i);
5870 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5871 		}
5872 	} else {
5873 		const __be32 *fw_data =
5874 			(const __be32 *)rdev->rlc_fw->data;
5875 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5876 			WREG32(RLC_UCODE_ADDR, i);
5877 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5878 		}
5879 	}
5880 	WREG32(RLC_UCODE_ADDR, 0);
5881 
5882 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5883 
5884 	si_rlc_start(rdev);
5885 
5886 	return 0;
5887 }
5888 
5889 static void si_enable_interrupts(struct radeon_device *rdev)
5890 {
5891 	u32 ih_cntl = RREG32(IH_CNTL);
5892 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5893 
5894 	ih_cntl |= ENABLE_INTR;
5895 	ih_rb_cntl |= IH_RB_ENABLE;
5896 	WREG32(IH_CNTL, ih_cntl);
5897 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5898 	rdev->ih.enabled = true;
5899 }
5900 
5901 static void si_disable_interrupts(struct radeon_device *rdev)
5902 {
5903 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5904 	u32 ih_cntl = RREG32(IH_CNTL);
5905 
5906 	ih_rb_cntl &= ~IH_RB_ENABLE;
5907 	ih_cntl &= ~ENABLE_INTR;
5908 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5909 	WREG32(IH_CNTL, ih_cntl);
5910 	/* set rptr, wptr to 0 */
5911 	WREG32(IH_RB_RPTR, 0);
5912 	WREG32(IH_RB_WPTR, 0);
5913 	rdev->ih.enabled = false;
5914 	rdev->ih.rptr = 0;
5915 }
5916 
5917 static void si_disable_interrupt_state(struct radeon_device *rdev)
5918 {
5919 	u32 tmp;
5920 
5921 	tmp = RREG32(CP_INT_CNTL_RING0) &
5922 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5923 	WREG32(CP_INT_CNTL_RING0, tmp);
5924 	WREG32(CP_INT_CNTL_RING1, 0);
5925 	WREG32(CP_INT_CNTL_RING2, 0);
5926 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5927 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5928 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5929 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5930 	WREG32(GRBM_INT_CNTL, 0);
5931 	WREG32(SRBM_INT_CNTL, 0);
5932 	if (rdev->num_crtc >= 2) {
5933 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5934 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5935 	}
5936 	if (rdev->num_crtc >= 4) {
5937 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5938 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5939 	}
5940 	if (rdev->num_crtc >= 6) {
5941 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5942 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5943 	}
5944 
5945 	if (rdev->num_crtc >= 2) {
5946 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5947 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5948 	}
5949 	if (rdev->num_crtc >= 4) {
5950 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5951 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5952 	}
5953 	if (rdev->num_crtc >= 6) {
5954 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5955 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5956 	}
5957 
5958 	if (!ASIC_IS_NODCE(rdev)) {
5959 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5960 
5961 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5962 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5963 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5964 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5965 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5966 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5967 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5968 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5969 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5970 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5971 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5972 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5973 	}
5974 }
5975 
5976 static int si_irq_init(struct radeon_device *rdev)
5977 {
5978 	int ret = 0;
5979 	int rb_bufsz;
5980 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5981 
5982 	/* allocate ring */
5983 	ret = r600_ih_ring_alloc(rdev);
5984 	if (ret)
5985 		return ret;
5986 
5987 	/* disable irqs */
5988 	si_disable_interrupts(rdev);
5989 
5990 	/* init rlc */
5991 	ret = si_rlc_resume(rdev);
5992 	if (ret) {
5993 		r600_ih_ring_fini(rdev);
5994 		return ret;
5995 	}
5996 
5997 	/* setup interrupt control */
5998 	/* set dummy read address to ring address */
5999 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6000 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6001 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6002 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6003 	 */
6004 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6005 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6006 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6007 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6008 
6009 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6010 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6011 
6012 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6013 		      IH_WPTR_OVERFLOW_CLEAR |
6014 		      (rb_bufsz << 1));
6015 
6016 	if (rdev->wb.enabled)
6017 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6018 
6019 	/* set the writeback address whether it's enabled or not */
6020 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6021 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6022 
6023 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6024 
6025 	/* set rptr, wptr to 0 */
6026 	WREG32(IH_RB_RPTR, 0);
6027 	WREG32(IH_RB_WPTR, 0);
6028 
6029 	/* Default settings for IH_CNTL (disabled at first) */
6030 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6031 	/* RPTR_REARM only works if msi's are enabled */
6032 	if (rdev->msi_enabled)
6033 		ih_cntl |= RPTR_REARM;
6034 	WREG32(IH_CNTL, ih_cntl);
6035 
6036 	/* force the active interrupt state to all disabled */
6037 	si_disable_interrupt_state(rdev);
6038 
6039 	pci_set_master(rdev->pdev);
6040 
6041 	/* enable irqs */
6042 	si_enable_interrupts(rdev);
6043 
6044 	return ret;
6045 }
6046 
6047 int si_irq_set(struct radeon_device *rdev)
6048 {
6049 	u32 cp_int_cntl;
6050 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6051 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6052 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6053 	u32 grbm_int_cntl = 0;
6054 	u32 dma_cntl, dma_cntl1;
6055 	u32 thermal_int = 0;
6056 
6057 	if (!rdev->irq.installed) {
6058 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6059 		return -EINVAL;
6060 	}
6061 	/* don't enable anything if the ih is disabled */
6062 	if (!rdev->ih.enabled) {
6063 		si_disable_interrupts(rdev);
6064 		/* force the active interrupt state to all disabled */
6065 		si_disable_interrupt_state(rdev);
6066 		return 0;
6067 	}
6068 
6069 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6070 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6071 
6072 	if (!ASIC_IS_NODCE(rdev)) {
6073 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6074 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6075 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6076 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6077 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6078 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6079 	}
6080 
6081 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6082 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6083 
6084 	thermal_int = RREG32(CG_THERMAL_INT) &
6085 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6086 
6087 	/* enable CP interrupts on all rings */
6088 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6089 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6090 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6091 	}
6092 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6093 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6094 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6095 	}
6096 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6097 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6098 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6099 	}
6100 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6101 		DRM_DEBUG("si_irq_set: sw int dma\n");
6102 		dma_cntl |= TRAP_ENABLE;
6103 	}
6104 
6105 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6106 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6107 		dma_cntl1 |= TRAP_ENABLE;
6108 	}
6109 	if (rdev->irq.crtc_vblank_int[0] ||
6110 	    atomic_read(&rdev->irq.pflip[0])) {
6111 		DRM_DEBUG("si_irq_set: vblank 0\n");
6112 		crtc1 |= VBLANK_INT_MASK;
6113 	}
6114 	if (rdev->irq.crtc_vblank_int[1] ||
6115 	    atomic_read(&rdev->irq.pflip[1])) {
6116 		DRM_DEBUG("si_irq_set: vblank 1\n");
6117 		crtc2 |= VBLANK_INT_MASK;
6118 	}
6119 	if (rdev->irq.crtc_vblank_int[2] ||
6120 	    atomic_read(&rdev->irq.pflip[2])) {
6121 		DRM_DEBUG("si_irq_set: vblank 2\n");
6122 		crtc3 |= VBLANK_INT_MASK;
6123 	}
6124 	if (rdev->irq.crtc_vblank_int[3] ||
6125 	    atomic_read(&rdev->irq.pflip[3])) {
6126 		DRM_DEBUG("si_irq_set: vblank 3\n");
6127 		crtc4 |= VBLANK_INT_MASK;
6128 	}
6129 	if (rdev->irq.crtc_vblank_int[4] ||
6130 	    atomic_read(&rdev->irq.pflip[4])) {
6131 		DRM_DEBUG("si_irq_set: vblank 4\n");
6132 		crtc5 |= VBLANK_INT_MASK;
6133 	}
6134 	if (rdev->irq.crtc_vblank_int[5] ||
6135 	    atomic_read(&rdev->irq.pflip[5])) {
6136 		DRM_DEBUG("si_irq_set: vblank 5\n");
6137 		crtc6 |= VBLANK_INT_MASK;
6138 	}
6139 	if (rdev->irq.hpd[0]) {
6140 		DRM_DEBUG("si_irq_set: hpd 1\n");
6141 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6142 	}
6143 	if (rdev->irq.hpd[1]) {
6144 		DRM_DEBUG("si_irq_set: hpd 2\n");
6145 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6146 	}
6147 	if (rdev->irq.hpd[2]) {
6148 		DRM_DEBUG("si_irq_set: hpd 3\n");
6149 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6150 	}
6151 	if (rdev->irq.hpd[3]) {
6152 		DRM_DEBUG("si_irq_set: hpd 4\n");
6153 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6154 	}
6155 	if (rdev->irq.hpd[4]) {
6156 		DRM_DEBUG("si_irq_set: hpd 5\n");
6157 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6158 	}
6159 	if (rdev->irq.hpd[5]) {
6160 		DRM_DEBUG("si_irq_set: hpd 6\n");
6161 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6162 	}
6163 
6164 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6165 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6166 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6167 
6168 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6169 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6170 
6171 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6172 
6173 	if (rdev->irq.dpm_thermal) {
6174 		DRM_DEBUG("dpm thermal\n");
6175 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6176 	}
6177 
6178 	if (rdev->num_crtc >= 2) {
6179 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6180 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6181 	}
6182 	if (rdev->num_crtc >= 4) {
6183 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6184 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6185 	}
6186 	if (rdev->num_crtc >= 6) {
6187 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6188 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6189 	}
6190 
6191 	if (rdev->num_crtc >= 2) {
6192 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6193 		       GRPH_PFLIP_INT_MASK);
6194 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6195 		       GRPH_PFLIP_INT_MASK);
6196 	}
6197 	if (rdev->num_crtc >= 4) {
6198 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6199 		       GRPH_PFLIP_INT_MASK);
6200 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6201 		       GRPH_PFLIP_INT_MASK);
6202 	}
6203 	if (rdev->num_crtc >= 6) {
6204 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6205 		       GRPH_PFLIP_INT_MASK);
6206 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6207 		       GRPH_PFLIP_INT_MASK);
6208 	}
6209 
6210 	if (!ASIC_IS_NODCE(rdev)) {
6211 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6212 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6213 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6214 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6215 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6216 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6217 	}
6218 
6219 	WREG32(CG_THERMAL_INT, thermal_int);
6220 
6221 	/* posting read */
6222 	RREG32(SRBM_STATUS);
6223 
6224 	return 0;
6225 }
6226 
6227 static inline void si_irq_ack(struct radeon_device *rdev)
6228 {
6229 	u32 tmp;
6230 
6231 	if (ASIC_IS_NODCE(rdev))
6232 		return;
6233 
6234 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6235 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6236 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6237 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6238 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6239 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6240 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6241 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6242 	if (rdev->num_crtc >= 4) {
6243 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6244 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6245 	}
6246 	if (rdev->num_crtc >= 6) {
6247 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6248 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6249 	}
6250 
6251 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6252 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6253 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6254 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6255 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6256 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6257 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6258 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6259 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6260 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6261 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6262 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6263 
6264 	if (rdev->num_crtc >= 4) {
6265 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6266 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6267 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6268 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6269 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6270 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6271 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6272 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6273 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6274 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6275 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6276 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6277 	}
6278 
6279 	if (rdev->num_crtc >= 6) {
6280 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6281 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6282 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6283 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6284 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6285 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6286 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6287 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6288 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6289 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6290 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6291 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6292 	}
6293 
6294 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6295 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6296 		tmp |= DC_HPDx_INT_ACK;
6297 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6298 	}
6299 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6300 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6301 		tmp |= DC_HPDx_INT_ACK;
6302 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6303 	}
6304 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6305 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6306 		tmp |= DC_HPDx_INT_ACK;
6307 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6308 	}
6309 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6310 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6311 		tmp |= DC_HPDx_INT_ACK;
6312 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6313 	}
6314 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6315 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6316 		tmp |= DC_HPDx_INT_ACK;
6317 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6318 	}
6319 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6320 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6321 		tmp |= DC_HPDx_INT_ACK;
6322 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6323 	}
6324 
6325 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6326 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6327 		tmp |= DC_HPDx_RX_INT_ACK;
6328 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6329 	}
6330 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6331 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6332 		tmp |= DC_HPDx_RX_INT_ACK;
6333 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6334 	}
6335 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6336 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6337 		tmp |= DC_HPDx_RX_INT_ACK;
6338 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6339 	}
6340 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6341 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6342 		tmp |= DC_HPDx_RX_INT_ACK;
6343 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6344 	}
6345 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6346 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6347 		tmp |= DC_HPDx_RX_INT_ACK;
6348 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6349 	}
6350 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6351 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6352 		tmp |= DC_HPDx_RX_INT_ACK;
6353 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6354 	}
6355 }
6356 
6357 static void si_irq_disable(struct radeon_device *rdev)
6358 {
6359 	si_disable_interrupts(rdev);
6360 	/* Wait and acknowledge irq */
6361 	mdelay(1);
6362 	si_irq_ack(rdev);
6363 	si_disable_interrupt_state(rdev);
6364 }
6365 
6366 static void si_irq_suspend(struct radeon_device *rdev)
6367 {
6368 	si_irq_disable(rdev);
6369 	si_rlc_stop(rdev);
6370 }
6371 
6372 static void si_irq_fini(struct radeon_device *rdev)
6373 {
6374 	si_irq_suspend(rdev);
6375 	r600_ih_ring_fini(rdev);
6376 }
6377 
6378 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6379 {
6380 	u32 wptr, tmp;
6381 
6382 	if (rdev->wb.enabled)
6383 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6384 	else
6385 		wptr = RREG32(IH_RB_WPTR);
6386 
6387 	if (wptr & RB_OVERFLOW) {
6388 		wptr &= ~RB_OVERFLOW;
6389 		/* When a ring buffer overflow happen start parsing interrupt
6390 		 * from the last not overwritten vector (wptr + 16). Hopefully
6391 		 * this should allow us to catchup.
6392 		 */
6393 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6394 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6395 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6396 		tmp = RREG32(IH_RB_CNTL);
6397 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6398 		WREG32(IH_RB_CNTL, tmp);
6399 	}
6400 	return (wptr & rdev->ih.ptr_mask);
6401 }
6402 
6403 /*        SI IV Ring
6404  * Each IV ring entry is 128 bits:
6405  * [7:0]    - interrupt source id
6406  * [31:8]   - reserved
6407  * [59:32]  - interrupt source data
6408  * [63:60]  - reserved
6409  * [71:64]  - RINGID
6410  * [79:72]  - VMID
6411  * [127:80] - reserved
6412  */
6413 int si_irq_process(struct radeon_device *rdev)
6414 {
6415 	u32 wptr;
6416 	u32 rptr;
6417 	u32 src_id, src_data, ring_id;
6418 	u32 ring_index;
6419 	bool queue_hotplug = false;
6420 	bool queue_dp = false;
6421 	bool queue_thermal = false;
6422 	u32 status, addr;
6423 
6424 	if (!rdev->ih.enabled || rdev->shutdown)
6425 		return IRQ_NONE;
6426 
6427 	wptr = si_get_ih_wptr(rdev);
6428 
6429 restart_ih:
6430 	/* is somebody else already processing irqs? */
6431 	if (atomic_xchg(&rdev->ih.lock, 1))
6432 		return IRQ_NONE;
6433 
6434 	rptr = rdev->ih.rptr;
6435 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6436 
6437 	/* Order reading of wptr vs. reading of IH ring data */
6438 	rmb();
6439 
6440 	/* display interrupts */
6441 	si_irq_ack(rdev);
6442 
6443 	while (rptr != wptr) {
6444 		/* wptr/rptr are in bytes! */
6445 		ring_index = rptr / 4;
6446 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6447 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6448 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6449 
6450 		switch (src_id) {
6451 		case 1: /* D1 vblank/vline */
6452 			switch (src_data) {
6453 			case 0: /* D1 vblank */
6454 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6455 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6456 
6457 				if (rdev->irq.crtc_vblank_int[0]) {
6458 					drm_handle_vblank(rdev->ddev, 0);
6459 					rdev->pm.vblank_sync = true;
6460 					wake_up(&rdev->irq.vblank_queue);
6461 				}
6462 				if (atomic_read(&rdev->irq.pflip[0]))
6463 					radeon_crtc_handle_vblank(rdev, 0);
6464 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6465 				DRM_DEBUG("IH: D1 vblank\n");
6466 
6467 				break;
6468 			case 1: /* D1 vline */
6469 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6470 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6471 
6472 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6473 				DRM_DEBUG("IH: D1 vline\n");
6474 
6475 				break;
6476 			default:
6477 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6478 				break;
6479 			}
6480 			break;
6481 		case 2: /* D2 vblank/vline */
6482 			switch (src_data) {
6483 			case 0: /* D2 vblank */
6484 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6485 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6486 
6487 				if (rdev->irq.crtc_vblank_int[1]) {
6488 					drm_handle_vblank(rdev->ddev, 1);
6489 					rdev->pm.vblank_sync = true;
6490 					wake_up(&rdev->irq.vblank_queue);
6491 				}
6492 				if (atomic_read(&rdev->irq.pflip[1]))
6493 					radeon_crtc_handle_vblank(rdev, 1);
6494 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6495 				DRM_DEBUG("IH: D2 vblank\n");
6496 
6497 				break;
6498 			case 1: /* D2 vline */
6499 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6500 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6501 
6502 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6503 				DRM_DEBUG("IH: D2 vline\n");
6504 
6505 				break;
6506 			default:
6507 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6508 				break;
6509 			}
6510 			break;
6511 		case 3: /* D3 vblank/vline */
6512 			switch (src_data) {
6513 			case 0: /* D3 vblank */
6514 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6515 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6516 
6517 				if (rdev->irq.crtc_vblank_int[2]) {
6518 					drm_handle_vblank(rdev->ddev, 2);
6519 					rdev->pm.vblank_sync = true;
6520 					wake_up(&rdev->irq.vblank_queue);
6521 				}
6522 				if (atomic_read(&rdev->irq.pflip[2]))
6523 					radeon_crtc_handle_vblank(rdev, 2);
6524 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6525 				DRM_DEBUG("IH: D3 vblank\n");
6526 
6527 				break;
6528 			case 1: /* D3 vline */
6529 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6530 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6531 
6532 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6533 				DRM_DEBUG("IH: D3 vline\n");
6534 
6535 				break;
6536 			default:
6537 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6538 				break;
6539 			}
6540 			break;
6541 		case 4: /* D4 vblank/vline */
6542 			switch (src_data) {
6543 			case 0: /* D4 vblank */
6544 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6545 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6546 
6547 				if (rdev->irq.crtc_vblank_int[3]) {
6548 					drm_handle_vblank(rdev->ddev, 3);
6549 					rdev->pm.vblank_sync = true;
6550 					wake_up(&rdev->irq.vblank_queue);
6551 				}
6552 				if (atomic_read(&rdev->irq.pflip[3]))
6553 					radeon_crtc_handle_vblank(rdev, 3);
6554 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6555 				DRM_DEBUG("IH: D4 vblank\n");
6556 
6557 				break;
6558 			case 1: /* D4 vline */
6559 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6560 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6561 
6562 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6563 				DRM_DEBUG("IH: D4 vline\n");
6564 
6565 				break;
6566 			default:
6567 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6568 				break;
6569 			}
6570 			break;
6571 		case 5: /* D5 vblank/vline */
6572 			switch (src_data) {
6573 			case 0: /* D5 vblank */
6574 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6575 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6576 
6577 				if (rdev->irq.crtc_vblank_int[4]) {
6578 					drm_handle_vblank(rdev->ddev, 4);
6579 					rdev->pm.vblank_sync = true;
6580 					wake_up(&rdev->irq.vblank_queue);
6581 				}
6582 				if (atomic_read(&rdev->irq.pflip[4]))
6583 					radeon_crtc_handle_vblank(rdev, 4);
6584 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6585 				DRM_DEBUG("IH: D5 vblank\n");
6586 
6587 				break;
6588 			case 1: /* D5 vline */
6589 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6590 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6591 
6592 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6593 				DRM_DEBUG("IH: D5 vline\n");
6594 
6595 				break;
6596 			default:
6597 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6598 				break;
6599 			}
6600 			break;
6601 		case 6: /* D6 vblank/vline */
6602 			switch (src_data) {
6603 			case 0: /* D6 vblank */
6604 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6605 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6606 
6607 				if (rdev->irq.crtc_vblank_int[5]) {
6608 					drm_handle_vblank(rdev->ddev, 5);
6609 					rdev->pm.vblank_sync = true;
6610 					wake_up(&rdev->irq.vblank_queue);
6611 				}
6612 				if (atomic_read(&rdev->irq.pflip[5]))
6613 					radeon_crtc_handle_vblank(rdev, 5);
6614 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6615 				DRM_DEBUG("IH: D6 vblank\n");
6616 
6617 				break;
6618 			case 1: /* D6 vline */
6619 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6620 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6621 
6622 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6623 				DRM_DEBUG("IH: D6 vline\n");
6624 
6625 				break;
6626 			default:
6627 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6628 				break;
6629 			}
6630 			break;
6631 		case 8: /* D1 page flip */
6632 		case 10: /* D2 page flip */
6633 		case 12: /* D3 page flip */
6634 		case 14: /* D4 page flip */
6635 		case 16: /* D5 page flip */
6636 		case 18: /* D6 page flip */
6637 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6638 			if (radeon_use_pflipirq > 0)
6639 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6640 			break;
6641 		case 42: /* HPD hotplug */
6642 			switch (src_data) {
6643 			case 0:
6644 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6645 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6646 
6647 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6648 				queue_hotplug = true;
6649 				DRM_DEBUG("IH: HPD1\n");
6650 
6651 				break;
6652 			case 1:
6653 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6654 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6655 
6656 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6657 				queue_hotplug = true;
6658 				DRM_DEBUG("IH: HPD2\n");
6659 
6660 				break;
6661 			case 2:
6662 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6663 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6664 
6665 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6666 				queue_hotplug = true;
6667 				DRM_DEBUG("IH: HPD3\n");
6668 
6669 				break;
6670 			case 3:
6671 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6672 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6673 
6674 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6675 				queue_hotplug = true;
6676 				DRM_DEBUG("IH: HPD4\n");
6677 
6678 				break;
6679 			case 4:
6680 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6681 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6682 
6683 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6684 				queue_hotplug = true;
6685 				DRM_DEBUG("IH: HPD5\n");
6686 
6687 				break;
6688 			case 5:
6689 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6690 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6691 
6692 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6693 				queue_hotplug = true;
6694 				DRM_DEBUG("IH: HPD6\n");
6695 
6696 				break;
6697 			case 6:
6698 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6699 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6700 
6701 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6702 				queue_dp = true;
6703 				DRM_DEBUG("IH: HPD_RX 1\n");
6704 
6705 				break;
6706 			case 7:
6707 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6708 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6709 
6710 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6711 				queue_dp = true;
6712 				DRM_DEBUG("IH: HPD_RX 2\n");
6713 
6714 				break;
6715 			case 8:
6716 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6717 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6718 
6719 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6720 				queue_dp = true;
6721 				DRM_DEBUG("IH: HPD_RX 3\n");
6722 
6723 				break;
6724 			case 9:
6725 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6726 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6727 
6728 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6729 				queue_dp = true;
6730 				DRM_DEBUG("IH: HPD_RX 4\n");
6731 
6732 				break;
6733 			case 10:
6734 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6735 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6736 
6737 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6738 				queue_dp = true;
6739 				DRM_DEBUG("IH: HPD_RX 5\n");
6740 
6741 				break;
6742 			case 11:
6743 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6744 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6745 
6746 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6747 				queue_dp = true;
6748 				DRM_DEBUG("IH: HPD_RX 6\n");
6749 
6750 				break;
6751 			default:
6752 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6753 				break;
6754 			}
6755 			break;
6756 		case 96:
6757 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6758 			WREG32(SRBM_INT_ACK, 0x1);
6759 			break;
6760 		case 124: /* UVD */
6761 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6762 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6763 			break;
6764 		case 146:
6765 		case 147:
6766 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6767 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6768 			/* reset addr and status */
6769 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6770 			if (addr == 0x0 && status == 0x0)
6771 				break;
6772 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6773 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6774 				addr);
6775 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6776 				status);
6777 			si_vm_decode_fault(rdev, status, addr);
6778 			break;
6779 		case 176: /* RINGID0 CP_INT */
6780 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6781 			break;
6782 		case 177: /* RINGID1 CP_INT */
6783 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6784 			break;
6785 		case 178: /* RINGID2 CP_INT */
6786 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6787 			break;
6788 		case 181: /* CP EOP event */
6789 			DRM_DEBUG("IH: CP EOP\n");
6790 			switch (ring_id) {
6791 			case 0:
6792 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6793 				break;
6794 			case 1:
6795 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6796 				break;
6797 			case 2:
6798 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6799 				break;
6800 			}
6801 			break;
6802 		case 224: /* DMA trap event */
6803 			DRM_DEBUG("IH: DMA trap\n");
6804 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6805 			break;
6806 		case 230: /* thermal low to high */
6807 			DRM_DEBUG("IH: thermal low to high\n");
6808 			rdev->pm.dpm.thermal.high_to_low = false;
6809 			queue_thermal = true;
6810 			break;
6811 		case 231: /* thermal high to low */
6812 			DRM_DEBUG("IH: thermal high to low\n");
6813 			rdev->pm.dpm.thermal.high_to_low = true;
6814 			queue_thermal = true;
6815 			break;
6816 		case 233: /* GUI IDLE */
6817 			DRM_DEBUG("IH: GUI idle\n");
6818 			break;
6819 		case 244: /* DMA trap event */
6820 			DRM_DEBUG("IH: DMA1 trap\n");
6821 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6822 			break;
6823 		default:
6824 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6825 			break;
6826 		}
6827 
6828 		/* wptr/rptr are in bytes! */
6829 		rptr += 16;
6830 		rptr &= rdev->ih.ptr_mask;
6831 		WREG32(IH_RB_RPTR, rptr);
6832 	}
6833 	if (queue_dp)
6834 		schedule_work(&rdev->dp_work);
6835 	if (queue_hotplug)
6836 		schedule_delayed_work(&rdev->hotplug_work, 0);
6837 	if (queue_thermal && rdev->pm.dpm_enabled)
6838 		schedule_work(&rdev->pm.dpm.thermal.work);
6839 	rdev->ih.rptr = rptr;
6840 	atomic_set(&rdev->ih.lock, 0);
6841 
6842 	/* make sure wptr hasn't changed while processing */
6843 	wptr = si_get_ih_wptr(rdev);
6844 	if (wptr != rptr)
6845 		goto restart_ih;
6846 
6847 	return IRQ_HANDLED;
6848 }
6849 
6850 /*
6851  * startup/shutdown callbacks
6852  */
6853 static void si_uvd_init(struct radeon_device *rdev)
6854 {
6855 	int r;
6856 
6857 	if (!rdev->has_uvd)
6858 		return;
6859 
6860 	r = radeon_uvd_init(rdev);
6861 	if (r) {
6862 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6863 		/*
6864 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6865 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6866 		 * there. So it is pointless to try to go through that code
6867 		 * hence why we disable uvd here.
6868 		 */
6869 		rdev->has_uvd = 0;
6870 		return;
6871 	}
6872 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6873 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6874 }
6875 
6876 static void si_uvd_start(struct radeon_device *rdev)
6877 {
6878 	int r;
6879 
6880 	if (!rdev->has_uvd)
6881 		return;
6882 
6883 	r = uvd_v2_2_resume(rdev);
6884 	if (r) {
6885 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6886 		goto error;
6887 	}
6888 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6889 	if (r) {
6890 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6891 		goto error;
6892 	}
6893 	return;
6894 
6895 error:
6896 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6897 }
6898 
6899 static void si_uvd_resume(struct radeon_device *rdev)
6900 {
6901 	struct radeon_ring *ring;
6902 	int r;
6903 
6904 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6905 		return;
6906 
6907 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6908 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6909 	if (r) {
6910 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6911 		return;
6912 	}
6913 	r = uvd_v1_0_init(rdev);
6914 	if (r) {
6915 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6916 		return;
6917 	}
6918 }
6919 
6920 static void si_vce_init(struct radeon_device *rdev)
6921 {
6922 	int r;
6923 
6924 	if (!rdev->has_vce)
6925 		return;
6926 
6927 	r = radeon_vce_init(rdev);
6928 	if (r) {
6929 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6930 		/*
6931 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6932 		 * to early fails si_vce_start() and thus nothing happens
6933 		 * there. So it is pointless to try to go through that code
6934 		 * hence why we disable vce here.
6935 		 */
6936 		rdev->has_vce = 0;
6937 		return;
6938 	}
6939 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6940 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6941 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6942 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6943 }
6944 
6945 static void si_vce_start(struct radeon_device *rdev)
6946 {
6947 	int r;
6948 
6949 	if (!rdev->has_vce)
6950 		return;
6951 
6952 	r = radeon_vce_resume(rdev);
6953 	if (r) {
6954 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6955 		goto error;
6956 	}
6957 	r = vce_v1_0_resume(rdev);
6958 	if (r) {
6959 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6960 		goto error;
6961 	}
6962 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6963 	if (r) {
6964 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6965 		goto error;
6966 	}
6967 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6968 	if (r) {
6969 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6970 		goto error;
6971 	}
6972 	return;
6973 
6974 error:
6975 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6976 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6977 }
6978 
6979 static void si_vce_resume(struct radeon_device *rdev)
6980 {
6981 	struct radeon_ring *ring;
6982 	int r;
6983 
6984 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6985 		return;
6986 
6987 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6988 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6989 	if (r) {
6990 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6991 		return;
6992 	}
6993 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6994 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6995 	if (r) {
6996 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6997 		return;
6998 	}
6999 	r = vce_v1_0_init(rdev);
7000 	if (r) {
7001 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7002 		return;
7003 	}
7004 }
7005 
7006 static int si_startup(struct radeon_device *rdev)
7007 {
7008 	struct radeon_ring *ring;
7009 	int r;
7010 
7011 	/* enable pcie gen2/3 link */
7012 	si_pcie_gen3_enable(rdev);
7013 	/* enable aspm */
7014 	si_program_aspm(rdev);
7015 
7016 	/* scratch needs to be initialized before MC */
7017 	r = r600_vram_scratch_init(rdev);
7018 	if (r)
7019 		return r;
7020 
7021 	si_mc_program(rdev);
7022 
7023 	if (!rdev->pm.dpm_enabled) {
7024 		r = si_mc_load_microcode(rdev);
7025 		if (r) {
7026 			DRM_ERROR("Failed to load MC firmware!\n");
7027 			return r;
7028 		}
7029 	}
7030 
7031 	r = si_pcie_gart_enable(rdev);
7032 	if (r)
7033 		return r;
7034 	si_gpu_init(rdev);
7035 
7036 	/* allocate rlc buffers */
7037 	if (rdev->family == CHIP_VERDE) {
7038 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7039 		rdev->rlc.reg_list_size =
7040 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7041 	}
7042 	rdev->rlc.cs_data = si_cs_data;
7043 	r = sumo_rlc_init(rdev);
7044 	if (r) {
7045 		DRM_ERROR("Failed to init rlc BOs!\n");
7046 		return r;
7047 	}
7048 
7049 	/* allocate wb buffer */
7050 	r = radeon_wb_init(rdev);
7051 	if (r)
7052 		return r;
7053 
7054 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7055 	if (r) {
7056 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7057 		return r;
7058 	}
7059 
7060 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7061 	if (r) {
7062 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7063 		return r;
7064 	}
7065 
7066 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7067 	if (r) {
7068 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7069 		return r;
7070 	}
7071 
7072 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7073 	if (r) {
7074 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7075 		return r;
7076 	}
7077 
7078 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7079 	if (r) {
7080 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7081 		return r;
7082 	}
7083 
7084 	si_uvd_start(rdev);
7085 	si_vce_start(rdev);
7086 
7087 	/* Enable IRQ */
7088 	if (!rdev->irq.installed) {
7089 		r = radeon_irq_kms_init(rdev);
7090 		if (r)
7091 			return r;
7092 	}
7093 
7094 	r = si_irq_init(rdev);
7095 	if (r) {
7096 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7097 		radeon_irq_kms_fini(rdev);
7098 		return r;
7099 	}
7100 	si_irq_set(rdev);
7101 
7102 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7103 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7104 			     RADEON_CP_PACKET2);
7105 	if (r)
7106 		return r;
7107 
7108 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7109 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7110 			     RADEON_CP_PACKET2);
7111 	if (r)
7112 		return r;
7113 
7114 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7115 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7116 			     RADEON_CP_PACKET2);
7117 	if (r)
7118 		return r;
7119 
7120 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7121 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7122 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7123 	if (r)
7124 		return r;
7125 
7126 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7127 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7128 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7129 	if (r)
7130 		return r;
7131 
7132 	r = si_cp_load_microcode(rdev);
7133 	if (r)
7134 		return r;
7135 	r = si_cp_resume(rdev);
7136 	if (r)
7137 		return r;
7138 
7139 	r = cayman_dma_resume(rdev);
7140 	if (r)
7141 		return r;
7142 
7143 	si_uvd_resume(rdev);
7144 	si_vce_resume(rdev);
7145 
7146 	r = radeon_ib_pool_init(rdev);
7147 	if (r) {
7148 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7149 		return r;
7150 	}
7151 
7152 	r = radeon_vm_manager_init(rdev);
7153 	if (r) {
7154 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7155 		return r;
7156 	}
7157 
7158 	r = radeon_audio_init(rdev);
7159 	if (r)
7160 		return r;
7161 
7162 	return 0;
7163 }
7164 
7165 int si_resume(struct radeon_device *rdev)
7166 {
7167 	int r;
7168 
7169 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7170 	 * posting will perform necessary task to bring back GPU into good
7171 	 * shape.
7172 	 */
7173 	/* post card */
7174 	atom_asic_init(rdev->mode_info.atom_context);
7175 
7176 	/* init golden registers */
7177 	si_init_golden_registers(rdev);
7178 
7179 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7180 		radeon_pm_resume(rdev);
7181 
7182 	rdev->accel_working = true;
7183 	r = si_startup(rdev);
7184 	if (r) {
7185 		DRM_ERROR("si startup failed on resume\n");
7186 		rdev->accel_working = false;
7187 		return r;
7188 	}
7189 
7190 	return r;
7191 
7192 }
7193 
7194 int si_suspend(struct radeon_device *rdev)
7195 {
7196 	radeon_pm_suspend(rdev);
7197 	radeon_audio_fini(rdev);
7198 	radeon_vm_manager_fini(rdev);
7199 	si_cp_enable(rdev, false);
7200 	cayman_dma_stop(rdev);
7201 	if (rdev->has_uvd) {
7202 		uvd_v1_0_fini(rdev);
7203 		radeon_uvd_suspend(rdev);
7204 	}
7205 	if (rdev->has_vce)
7206 		radeon_vce_suspend(rdev);
7207 	si_fini_pg(rdev);
7208 	si_fini_cg(rdev);
7209 	si_irq_suspend(rdev);
7210 	radeon_wb_disable(rdev);
7211 	si_pcie_gart_disable(rdev);
7212 	return 0;
7213 }
7214 
7215 /* Plan is to move initialization in that function and use
7216  * helper function so that radeon_device_init pretty much
7217  * do nothing more than calling asic specific function. This
7218  * should also allow to remove a bunch of callback function
7219  * like vram_info.
7220  */
7221 int si_init(struct radeon_device *rdev)
7222 {
7223 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7224 	int r;
7225 
7226 	/* Read BIOS */
7227 	if (!radeon_get_bios(rdev)) {
7228 		if (ASIC_IS_AVIVO(rdev))
7229 			return -EINVAL;
7230 	}
7231 	/* Must be an ATOMBIOS */
7232 	if (!rdev->is_atom_bios) {
7233 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7234 		return -EINVAL;
7235 	}
7236 	r = radeon_atombios_init(rdev);
7237 	if (r)
7238 		return r;
7239 
7240 	/* Post card if necessary */
7241 	if (!radeon_card_posted(rdev)) {
7242 		if (!rdev->bios) {
7243 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7244 			return -EINVAL;
7245 		}
7246 		DRM_INFO("GPU not posted. posting now...\n");
7247 		atom_asic_init(rdev->mode_info.atom_context);
7248 	}
7249 	/* init golden registers */
7250 	si_init_golden_registers(rdev);
7251 	/* Initialize scratch registers */
7252 	si_scratch_init(rdev);
7253 	/* Initialize surface registers */
7254 	radeon_surface_init(rdev);
7255 	/* Initialize clocks */
7256 	radeon_get_clock_info(rdev->ddev);
7257 
7258 	/* Fence driver */
7259 	r = radeon_fence_driver_init(rdev);
7260 	if (r)
7261 		return r;
7262 
7263 	/* initialize memory controller */
7264 	r = si_mc_init(rdev);
7265 	if (r)
7266 		return r;
7267 	/* Memory manager */
7268 	r = radeon_bo_init(rdev);
7269 	if (r)
7270 		return r;
7271 
7272 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7273 	    !rdev->rlc_fw || !rdev->mc_fw) {
7274 		r = si_init_microcode(rdev);
7275 		if (r) {
7276 			DRM_ERROR("Failed to load firmware!\n");
7277 			return r;
7278 		}
7279 	}
7280 
7281 	/* Initialize power management */
7282 	radeon_pm_init(rdev);
7283 
7284 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7285 	ring->ring_obj = NULL;
7286 	r600_ring_init(rdev, ring, 1024 * 1024);
7287 
7288 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7289 	ring->ring_obj = NULL;
7290 	r600_ring_init(rdev, ring, 1024 * 1024);
7291 
7292 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7293 	ring->ring_obj = NULL;
7294 	r600_ring_init(rdev, ring, 1024 * 1024);
7295 
7296 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7297 	ring->ring_obj = NULL;
7298 	r600_ring_init(rdev, ring, 64 * 1024);
7299 
7300 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7301 	ring->ring_obj = NULL;
7302 	r600_ring_init(rdev, ring, 64 * 1024);
7303 
7304 	si_uvd_init(rdev);
7305 	si_vce_init(rdev);
7306 
7307 	rdev->ih.ring_obj = NULL;
7308 	r600_ih_ring_init(rdev, 64 * 1024);
7309 
7310 	r = r600_pcie_gart_init(rdev);
7311 	if (r)
7312 		return r;
7313 
7314 	rdev->accel_working = true;
7315 	r = si_startup(rdev);
7316 	if (r) {
7317 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7318 		si_cp_fini(rdev);
7319 		cayman_dma_fini(rdev);
7320 		si_irq_fini(rdev);
7321 		sumo_rlc_fini(rdev);
7322 		radeon_wb_fini(rdev);
7323 		radeon_ib_pool_fini(rdev);
7324 		radeon_vm_manager_fini(rdev);
7325 		radeon_irq_kms_fini(rdev);
7326 		si_pcie_gart_fini(rdev);
7327 		rdev->accel_working = false;
7328 	}
7329 
7330 	/* Don't start up if the MC ucode is missing.
7331 	 * The default clocks and voltages before the MC ucode
7332 	 * is loaded are not suffient for advanced operations.
7333 	 */
7334 	if (!rdev->mc_fw) {
7335 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7336 		return -EINVAL;
7337 	}
7338 
7339 	return 0;
7340 }
7341 
7342 void si_fini(struct radeon_device *rdev)
7343 {
7344 	radeon_pm_fini(rdev);
7345 	si_cp_fini(rdev);
7346 	cayman_dma_fini(rdev);
7347 	si_fini_pg(rdev);
7348 	si_fini_cg(rdev);
7349 	si_irq_fini(rdev);
7350 	sumo_rlc_fini(rdev);
7351 	radeon_wb_fini(rdev);
7352 	radeon_vm_manager_fini(rdev);
7353 	radeon_ib_pool_fini(rdev);
7354 	radeon_irq_kms_fini(rdev);
7355 	if (rdev->has_uvd) {
7356 		uvd_v1_0_fini(rdev);
7357 		radeon_uvd_fini(rdev);
7358 	}
7359 	if (rdev->has_vce)
7360 		radeon_vce_fini(rdev);
7361 	si_pcie_gart_fini(rdev);
7362 	r600_vram_scratch_fini(rdev);
7363 	radeon_gem_fini(rdev);
7364 	radeon_fence_driver_fini(rdev);
7365 	radeon_bo_fini(rdev);
7366 	radeon_atombios_fini(rdev);
7367 	kfree(rdev->bios);
7368 	rdev->bios = NULL;
7369 }
7370 
7371 /**
7372  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7373  *
7374  * @rdev: radeon_device pointer
7375  *
7376  * Fetches a GPU clock counter snapshot (SI).
7377  * Returns the 64 bit clock counter snapshot.
7378  */
7379 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7380 {
7381 	uint64_t clock;
7382 
7383 	mutex_lock(&rdev->gpu_clock_mutex);
7384 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7385 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7386 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7387 	mutex_unlock(&rdev->gpu_clock_mutex);
7388 	return clock;
7389 }
7390 
7391 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7392 {
7393 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7394 	int r;
7395 
7396 	/* bypass vclk and dclk with bclk */
7397 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7398 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7399 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7400 
7401 	/* put PLL in bypass mode */
7402 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7403 
7404 	if (!vclk || !dclk) {
7405 		/* keep the Bypass mode */
7406 		return 0;
7407 	}
7408 
7409 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7410 					  16384, 0x03FFFFFF, 0, 128, 5,
7411 					  &fb_div, &vclk_div, &dclk_div);
7412 	if (r)
7413 		return r;
7414 
7415 	/* set RESET_ANTI_MUX to 0 */
7416 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7417 
7418 	/* set VCO_MODE to 1 */
7419 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7420 
7421 	/* disable sleep mode */
7422 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7423 
7424 	/* deassert UPLL_RESET */
7425 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7426 
7427 	mdelay(1);
7428 
7429 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7430 	if (r)
7431 		return r;
7432 
7433 	/* assert UPLL_RESET again */
7434 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7435 
7436 	/* disable spread spectrum. */
7437 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7438 
7439 	/* set feedback divider */
7440 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7441 
7442 	/* set ref divider to 0 */
7443 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7444 
7445 	if (fb_div < 307200)
7446 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7447 	else
7448 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7449 
7450 	/* set PDIV_A and PDIV_B */
7451 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7452 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7453 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7454 
7455 	/* give the PLL some time to settle */
7456 	mdelay(15);
7457 
7458 	/* deassert PLL_RESET */
7459 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7460 
7461 	mdelay(15);
7462 
7463 	/* switch from bypass mode to normal mode */
7464 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7465 
7466 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7467 	if (r)
7468 		return r;
7469 
7470 	/* switch VCLK and DCLK selection */
7471 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7472 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7473 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7474 
7475 	mdelay(100);
7476 
7477 	return 0;
7478 }
7479 
7480 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7481 {
7482 	struct pci_dev *root = rdev->pdev->bus->self;
7483 	int bridge_pos, gpu_pos;
7484 	u32 speed_cntl, mask, current_data_rate;
7485 	int ret, i;
7486 	u16 tmp16;
7487 
7488 	if (pci_is_root_bus(rdev->pdev->bus))
7489 		return;
7490 
7491 	if (radeon_pcie_gen2 == 0)
7492 		return;
7493 
7494 	if (rdev->flags & RADEON_IS_IGP)
7495 		return;
7496 
7497 	if (!(rdev->flags & RADEON_IS_PCIE))
7498 		return;
7499 
7500 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7501 	if (ret != 0)
7502 		return;
7503 
7504 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7505 		return;
7506 
7507 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7508 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7509 		LC_CURRENT_DATA_RATE_SHIFT;
7510 	if (mask & DRM_PCIE_SPEED_80) {
7511 		if (current_data_rate == 2) {
7512 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7513 			return;
7514 		}
7515 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7516 	} else if (mask & DRM_PCIE_SPEED_50) {
7517 		if (current_data_rate == 1) {
7518 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7519 			return;
7520 		}
7521 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7522 	}
7523 
7524 	bridge_pos = pci_pcie_cap(root);
7525 	if (!bridge_pos)
7526 		return;
7527 
7528 	gpu_pos = pci_pcie_cap(rdev->pdev);
7529 	if (!gpu_pos)
7530 		return;
7531 
7532 	if (mask & DRM_PCIE_SPEED_80) {
7533 		/* re-try equalization if gen3 is not already enabled */
7534 		if (current_data_rate != 2) {
7535 			u16 bridge_cfg, gpu_cfg;
7536 			u16 bridge_cfg2, gpu_cfg2;
7537 			u32 max_lw, current_lw, tmp;
7538 
7539 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7540 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7541 
7542 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7543 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7544 
7545 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7546 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7547 
7548 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7549 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7550 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7551 
7552 			if (current_lw < max_lw) {
7553 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7554 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7555 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7556 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7557 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7558 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7559 				}
7560 			}
7561 
7562 			for (i = 0; i < 10; i++) {
7563 				/* check status */
7564 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7565 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7566 					break;
7567 
7568 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7569 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7570 
7571 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7572 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7573 
7574 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7575 				tmp |= LC_SET_QUIESCE;
7576 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7577 
7578 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7579 				tmp |= LC_REDO_EQ;
7580 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7581 
7582 				mdelay(100);
7583 
7584 				/* linkctl */
7585 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7586 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7587 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7588 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7589 
7590 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7591 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7592 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7593 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7594 
7595 				/* linkctl2 */
7596 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7597 				tmp16 &= ~((1 << 4) | (7 << 9));
7598 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7599 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7600 
7601 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7602 				tmp16 &= ~((1 << 4) | (7 << 9));
7603 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7604 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7605 
7606 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7607 				tmp &= ~LC_SET_QUIESCE;
7608 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7609 			}
7610 		}
7611 	}
7612 
7613 	/* set the link speed */
7614 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7615 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7616 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7617 
7618 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7619 	tmp16 &= ~0xf;
7620 	if (mask & DRM_PCIE_SPEED_80)
7621 		tmp16 |= 3; /* gen3 */
7622 	else if (mask & DRM_PCIE_SPEED_50)
7623 		tmp16 |= 2; /* gen2 */
7624 	else
7625 		tmp16 |= 1; /* gen1 */
7626 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7627 
7628 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7629 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7630 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7631 
7632 	for (i = 0; i < rdev->usec_timeout; i++) {
7633 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7634 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7635 			break;
7636 		udelay(1);
7637 	}
7638 }
7639 
7640 static void si_program_aspm(struct radeon_device *rdev)
7641 {
7642 	u32 data, orig;
7643 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7644 	bool disable_clkreq = false;
7645 
7646 	if (radeon_aspm == 0)
7647 		return;
7648 
7649 	if (!(rdev->flags & RADEON_IS_PCIE))
7650 		return;
7651 
7652 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7653 	data &= ~LC_XMIT_N_FTS_MASK;
7654 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7655 	if (orig != data)
7656 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7657 
7658 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7659 	data |= LC_GO_TO_RECOVERY;
7660 	if (orig != data)
7661 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7662 
7663 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7664 	data |= P_IGNORE_EDB_ERR;
7665 	if (orig != data)
7666 		WREG32_PCIE(PCIE_P_CNTL, data);
7667 
7668 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7669 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7670 	data |= LC_PMI_TO_L1_DIS;
7671 	if (!disable_l0s)
7672 		data |= LC_L0S_INACTIVITY(7);
7673 
7674 	if (!disable_l1) {
7675 		data |= LC_L1_INACTIVITY(7);
7676 		data &= ~LC_PMI_TO_L1_DIS;
7677 		if (orig != data)
7678 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7679 
7680 		if (!disable_plloff_in_l1) {
7681 			bool clk_req_support;
7682 
7683 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7684 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7685 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7686 			if (orig != data)
7687 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7688 
7689 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7690 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7691 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7692 			if (orig != data)
7693 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7694 
7695 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7696 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7697 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7698 			if (orig != data)
7699 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7700 
7701 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7702 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7703 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7704 			if (orig != data)
7705 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7706 
7707 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7708 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7709 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7710 				if (orig != data)
7711 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7712 
7713 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7714 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7715 				if (orig != data)
7716 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7717 
7718 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7719 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7720 				if (orig != data)
7721 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7722 
7723 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7724 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7725 				if (orig != data)
7726 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7727 
7728 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7729 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7730 				if (orig != data)
7731 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7732 
7733 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7734 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7735 				if (orig != data)
7736 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7737 
7738 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7739 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7740 				if (orig != data)
7741 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7742 
7743 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7744 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7745 				if (orig != data)
7746 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7747 			}
7748 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7749 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7750 			data |= LC_DYN_LANES_PWR_STATE(3);
7751 			if (orig != data)
7752 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7753 
7754 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7755 			data &= ~LS2_EXIT_TIME_MASK;
7756 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7757 				data |= LS2_EXIT_TIME(5);
7758 			if (orig != data)
7759 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7760 
7761 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7762 			data &= ~LS2_EXIT_TIME_MASK;
7763 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7764 				data |= LS2_EXIT_TIME(5);
7765 			if (orig != data)
7766 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7767 
7768 			if (!disable_clkreq &&
7769 			    !pci_is_root_bus(rdev->pdev->bus)) {
7770 				struct pci_dev *root = rdev->pdev->bus->self;
7771 				u32 lnkcap;
7772 
7773 				clk_req_support = false;
7774 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7775 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7776 					clk_req_support = true;
7777 			} else {
7778 				clk_req_support = false;
7779 			}
7780 
7781 			if (clk_req_support) {
7782 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7783 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7784 				if (orig != data)
7785 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7786 
7787 				orig = data = RREG32(THM_CLK_CNTL);
7788 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7789 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7790 				if (orig != data)
7791 					WREG32(THM_CLK_CNTL, data);
7792 
7793 				orig = data = RREG32(MISC_CLK_CNTL);
7794 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7795 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7796 				if (orig != data)
7797 					WREG32(MISC_CLK_CNTL, data);
7798 
7799 				orig = data = RREG32(CG_CLKPIN_CNTL);
7800 				data &= ~BCLK_AS_XCLK;
7801 				if (orig != data)
7802 					WREG32(CG_CLKPIN_CNTL, data);
7803 
7804 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7805 				data &= ~FORCE_BIF_REFCLK_EN;
7806 				if (orig != data)
7807 					WREG32(CG_CLKPIN_CNTL_2, data);
7808 
7809 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7810 				data &= ~MPLL_CLKOUT_SEL_MASK;
7811 				data |= MPLL_CLKOUT_SEL(4);
7812 				if (orig != data)
7813 					WREG32(MPLL_BYPASSCLK_SEL, data);
7814 
7815 				orig = data = RREG32(SPLL_CNTL_MODE);
7816 				data &= ~SPLL_REFCLK_SEL_MASK;
7817 				if (orig != data)
7818 					WREG32(SPLL_CNTL_MODE, data);
7819 			}
7820 		}
7821 	} else {
7822 		if (orig != data)
7823 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7824 	}
7825 
7826 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7827 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7828 	if (orig != data)
7829 		WREG32_PCIE(PCIE_CNTL2, data);
7830 
7831 	if (!disable_l0s) {
7832 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7833 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7834 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7835 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7836 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7837 				data &= ~LC_L0S_INACTIVITY_MASK;
7838 				if (orig != data)
7839 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7840 			}
7841 		}
7842 	}
7843 }
7844 
7845 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7846 {
7847 	unsigned i;
7848 
7849 	/* make sure VCEPLL_CTLREQ is deasserted */
7850 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7851 
7852 	mdelay(10);
7853 
7854 	/* assert UPLL_CTLREQ */
7855 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7856 
7857 	/* wait for CTLACK and CTLACK2 to get asserted */
7858 	for (i = 0; i < 100; ++i) {
7859 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7860 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7861 			break;
7862 		mdelay(10);
7863 	}
7864 
7865 	/* deassert UPLL_CTLREQ */
7866 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7867 
7868 	if (i == 100) {
7869 		DRM_ERROR("Timeout setting UVD clocks!\n");
7870 		return -ETIMEDOUT;
7871 	}
7872 
7873 	return 0;
7874 }
7875 
7876 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7877 {
7878 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7879 	int r;
7880 
7881 	/* bypass evclk and ecclk with bclk */
7882 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7883 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7884 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7885 
7886 	/* put PLL in bypass mode */
7887 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7888 		     ~VCEPLL_BYPASS_EN_MASK);
7889 
7890 	if (!evclk || !ecclk) {
7891 		/* keep the Bypass mode, put PLL to sleep */
7892 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7893 			     ~VCEPLL_SLEEP_MASK);
7894 		return 0;
7895 	}
7896 
7897 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7898 					  16384, 0x03FFFFFF, 0, 128, 5,
7899 					  &fb_div, &evclk_div, &ecclk_div);
7900 	if (r)
7901 		return r;
7902 
7903 	/* set RESET_ANTI_MUX to 0 */
7904 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7905 
7906 	/* set VCO_MODE to 1 */
7907 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7908 		     ~VCEPLL_VCO_MODE_MASK);
7909 
7910 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7911 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7912 		     ~VCEPLL_SLEEP_MASK);
7913 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7914 
7915 	/* deassert VCEPLL_RESET */
7916 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7917 
7918 	mdelay(1);
7919 
7920 	r = si_vce_send_vcepll_ctlreq(rdev);
7921 	if (r)
7922 		return r;
7923 
7924 	/* assert VCEPLL_RESET again */
7925 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7926 
7927 	/* disable spread spectrum. */
7928 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7929 
7930 	/* set feedback divider */
7931 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7932 
7933 	/* set ref divider to 0 */
7934 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7935 
7936 	/* set PDIV_A and PDIV_B */
7937 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7938 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7939 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7940 
7941 	/* give the PLL some time to settle */
7942 	mdelay(15);
7943 
7944 	/* deassert PLL_RESET */
7945 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7946 
7947 	mdelay(15);
7948 
7949 	/* switch from bypass mode to normal mode */
7950 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7951 
7952 	r = si_vce_send_vcepll_ctlreq(rdev);
7953 	if (r)
7954 		return r;
7955 
7956 	/* switch VCLK and DCLK selection */
7957 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7958 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7959 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7960 
7961 	mdelay(100);
7962 
7963 	return 0;
7964 }
7965