xref: /linux/drivers/gpu/drm/radeon/si.c (revision da5b2ad1c2f18834cb1ce429e2e5a5cf5cbdf21b)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 
25 #include <linux/firmware.h>
26 #include <linux/module.h>
27 #include <linux/pci.h>
28 #include <linux/slab.h>
29 
30 #include <drm/drm_vblank.h>
31 #include <drm/radeon_drm.h>
32 
33 #include "atom.h"
34 #include "clearstate_si.h"
35 #include "evergreen.h"
36 #include "r600.h"
37 #include "radeon.h"
38 #include "radeon_asic.h"
39 #include "radeon_audio.h"
40 #include "radeon_ucode.h"
41 #include "si_blit_shaders.h"
42 #include "si.h"
43 #include "sid.h"
44 
45 
46 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
47 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
48 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
49 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
50 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
51 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
52 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
55 MODULE_FIRMWARE("radeon/tahiti_me.bin");
56 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
57 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
58 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
59 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
60 
61 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
62 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
63 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
64 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
65 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
66 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
67 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
68 
69 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
70 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
71 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
72 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
73 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
74 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
75 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
76 
77 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
78 MODULE_FIRMWARE("radeon/VERDE_me.bin");
79 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
80 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
81 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
82 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
83 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
84 
85 MODULE_FIRMWARE("radeon/verde_pfp.bin");
86 MODULE_FIRMWARE("radeon/verde_me.bin");
87 MODULE_FIRMWARE("radeon/verde_ce.bin");
88 MODULE_FIRMWARE("radeon/verde_mc.bin");
89 MODULE_FIRMWARE("radeon/verde_rlc.bin");
90 MODULE_FIRMWARE("radeon/verde_smc.bin");
91 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
92 
93 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
94 MODULE_FIRMWARE("radeon/OLAND_me.bin");
95 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
96 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
97 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
98 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
99 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
100 
101 MODULE_FIRMWARE("radeon/oland_pfp.bin");
102 MODULE_FIRMWARE("radeon/oland_me.bin");
103 MODULE_FIRMWARE("radeon/oland_ce.bin");
104 MODULE_FIRMWARE("radeon/oland_mc.bin");
105 MODULE_FIRMWARE("radeon/oland_rlc.bin");
106 MODULE_FIRMWARE("radeon/oland_smc.bin");
107 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
108 
109 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
110 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
111 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
112 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
113 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
114 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
115 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
116 
117 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
118 MODULE_FIRMWARE("radeon/hainan_me.bin");
119 MODULE_FIRMWARE("radeon/hainan_ce.bin");
120 MODULE_FIRMWARE("radeon/hainan_mc.bin");
121 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
122 MODULE_FIRMWARE("radeon/hainan_smc.bin");
123 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
124 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
125 
126 MODULE_FIRMWARE("radeon/si58_mc.bin");
127 
128 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
129 static void si_pcie_gen3_enable(struct radeon_device *rdev);
130 static void si_program_aspm(struct radeon_device *rdev);
131 extern void sumo_rlc_fini(struct radeon_device *rdev);
132 extern int sumo_rlc_init(struct radeon_device *rdev);
133 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
134 					 bool enable);
135 static void si_init_pg(struct radeon_device *rdev);
136 static void si_init_cg(struct radeon_device *rdev);
137 static void si_fini_pg(struct radeon_device *rdev);
138 static void si_fini_cg(struct radeon_device *rdev);
139 static void si_rlc_stop(struct radeon_device *rdev);
140 
141 static const u32 crtc_offsets[] = {
142 	EVERGREEN_CRTC0_REGISTER_OFFSET,
143 	EVERGREEN_CRTC1_REGISTER_OFFSET,
144 	EVERGREEN_CRTC2_REGISTER_OFFSET,
145 	EVERGREEN_CRTC3_REGISTER_OFFSET,
146 	EVERGREEN_CRTC4_REGISTER_OFFSET,
147 	EVERGREEN_CRTC5_REGISTER_OFFSET
148 };
149 
150 static const u32 si_disp_int_status[] = {
151 	DISP_INTERRUPT_STATUS,
152 	DISP_INTERRUPT_STATUS_CONTINUE,
153 	DISP_INTERRUPT_STATUS_CONTINUE2,
154 	DISP_INTERRUPT_STATUS_CONTINUE3,
155 	DISP_INTERRUPT_STATUS_CONTINUE4,
156 	DISP_INTERRUPT_STATUS_CONTINUE5
157 };
158 
159 #define DC_HPDx_CONTROL(x)        (DC_HPD1_CONTROL     + (x * 0xc))
160 #define DC_HPDx_INT_CONTROL(x)    (DC_HPD1_INT_CONTROL + (x * 0xc))
161 #define DC_HPDx_INT_STATUS_REG(x) (DC_HPD1_INT_STATUS  + (x * 0xc))
162 
163 static const u32 verde_rlc_save_restore_register_list[] = {
164 	(0x8000 << 16) | (0x98f4 >> 2),
165 	0x00000000,
166 	(0x8040 << 16) | (0x98f4 >> 2),
167 	0x00000000,
168 	(0x8000 << 16) | (0xe80 >> 2),
169 	0x00000000,
170 	(0x8040 << 16) | (0xe80 >> 2),
171 	0x00000000,
172 	(0x8000 << 16) | (0x89bc >> 2),
173 	0x00000000,
174 	(0x8040 << 16) | (0x89bc >> 2),
175 	0x00000000,
176 	(0x8000 << 16) | (0x8c1c >> 2),
177 	0x00000000,
178 	(0x8040 << 16) | (0x8c1c >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x98f0 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0xe7c >> 2),
183 	0x00000000,
184 	(0x8000 << 16) | (0x9148 >> 2),
185 	0x00000000,
186 	(0x8040 << 16) | (0x9148 >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x9150 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x897c >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x8d8c >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0xac54 >> 2),
195 	0X00000000,
196 	0x3,
197 	(0x9c00 << 16) | (0x98f8 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x9910 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x9914 >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9918 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x991c >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9920 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x9924 >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9928 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x992c >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9930 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x9934 >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9938 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x993c >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9940 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x9944 >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9948 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x994c >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9950 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x9954 >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9958 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x995c >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x9960 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x9964 >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x9968 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x996c >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x9970 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x9974 >> 2),
250 	0x00000000,
251 	(0x9c00 << 16) | (0x9978 >> 2),
252 	0x00000000,
253 	(0x9c00 << 16) | (0x997c >> 2),
254 	0x00000000,
255 	(0x9c00 << 16) | (0x9980 >> 2),
256 	0x00000000,
257 	(0x9c00 << 16) | (0x9984 >> 2),
258 	0x00000000,
259 	(0x9c00 << 16) | (0x9988 >> 2),
260 	0x00000000,
261 	(0x9c00 << 16) | (0x998c >> 2),
262 	0x00000000,
263 	(0x9c00 << 16) | (0x8c00 >> 2),
264 	0x00000000,
265 	(0x9c00 << 16) | (0x8c14 >> 2),
266 	0x00000000,
267 	(0x9c00 << 16) | (0x8c04 >> 2),
268 	0x00000000,
269 	(0x9c00 << 16) | (0x8c08 >> 2),
270 	0x00000000,
271 	(0x8000 << 16) | (0x9b7c >> 2),
272 	0x00000000,
273 	(0x8040 << 16) | (0x9b7c >> 2),
274 	0x00000000,
275 	(0x8000 << 16) | (0xe84 >> 2),
276 	0x00000000,
277 	(0x8040 << 16) | (0xe84 >> 2),
278 	0x00000000,
279 	(0x8000 << 16) | (0x89c0 >> 2),
280 	0x00000000,
281 	(0x8040 << 16) | (0x89c0 >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x914c >> 2),
284 	0x00000000,
285 	(0x8040 << 16) | (0x914c >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x8c20 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x8c20 >> 2),
290 	0x00000000,
291 	(0x8000 << 16) | (0x9354 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x9354 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x9060 >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x9364 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x9100 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x913c >> 2),
302 	0x00000000,
303 	(0x8000 << 16) | (0x90e0 >> 2),
304 	0x00000000,
305 	(0x8000 << 16) | (0x90e4 >> 2),
306 	0x00000000,
307 	(0x8000 << 16) | (0x90e8 >> 2),
308 	0x00000000,
309 	(0x8040 << 16) | (0x90e0 >> 2),
310 	0x00000000,
311 	(0x8040 << 16) | (0x90e4 >> 2),
312 	0x00000000,
313 	(0x8040 << 16) | (0x90e8 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0x8bcc >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0x8b24 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0x88c4 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0x8e50 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0x8c0c >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x8e58 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x8e5c >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x9508 >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x950c >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x9494 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0xac0c >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0xac10 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0xac14 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0xae00 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0xac08 >> 2),
344 	0x00000000,
345 	(0x9c00 << 16) | (0x88d4 >> 2),
346 	0x00000000,
347 	(0x9c00 << 16) | (0x88c8 >> 2),
348 	0x00000000,
349 	(0x9c00 << 16) | (0x88cc >> 2),
350 	0x00000000,
351 	(0x9c00 << 16) | (0x89b0 >> 2),
352 	0x00000000,
353 	(0x9c00 << 16) | (0x8b10 >> 2),
354 	0x00000000,
355 	(0x9c00 << 16) | (0x8a14 >> 2),
356 	0x00000000,
357 	(0x9c00 << 16) | (0x9830 >> 2),
358 	0x00000000,
359 	(0x9c00 << 16) | (0x9834 >> 2),
360 	0x00000000,
361 	(0x9c00 << 16) | (0x9838 >> 2),
362 	0x00000000,
363 	(0x9c00 << 16) | (0x9a10 >> 2),
364 	0x00000000,
365 	(0x8000 << 16) | (0x9870 >> 2),
366 	0x00000000,
367 	(0x8000 << 16) | (0x9874 >> 2),
368 	0x00000000,
369 	(0x8001 << 16) | (0x9870 >> 2),
370 	0x00000000,
371 	(0x8001 << 16) | (0x9874 >> 2),
372 	0x00000000,
373 	(0x8040 << 16) | (0x9870 >> 2),
374 	0x00000000,
375 	(0x8040 << 16) | (0x9874 >> 2),
376 	0x00000000,
377 	(0x8041 << 16) | (0x9870 >> 2),
378 	0x00000000,
379 	(0x8041 << 16) | (0x9874 >> 2),
380 	0x00000000,
381 	0x00000000
382 };
383 
384 static const u32 tahiti_golden_rlc_registers[] = {
385 	0xc424, 0xffffffff, 0x00601005,
386 	0xc47c, 0xffffffff, 0x10104040,
387 	0xc488, 0xffffffff, 0x0100000a,
388 	0xc314, 0xffffffff, 0x00000800,
389 	0xc30c, 0xffffffff, 0x800000f4,
390 	0xf4a8, 0xffffffff, 0x00000000
391 };
392 
393 static const u32 tahiti_golden_registers[] = {
394 	0x9a10, 0x00010000, 0x00018208,
395 	0x9830, 0xffffffff, 0x00000000,
396 	0x9834, 0xf00fffff, 0x00000400,
397 	0x9838, 0x0002021c, 0x00020200,
398 	0xc78, 0x00000080, 0x00000000,
399 	0xd030, 0x000300c0, 0x00800040,
400 	0xd830, 0x000300c0, 0x00800040,
401 	0x5bb0, 0x000000f0, 0x00000070,
402 	0x5bc0, 0x00200000, 0x50100000,
403 	0x7030, 0x31000311, 0x00000011,
404 	0x277c, 0x00000003, 0x000007ff,
405 	0x240c, 0x000007ff, 0x00000000,
406 	0x8a14, 0xf000001f, 0x00000007,
407 	0x8b24, 0xffffffff, 0x00ffffff,
408 	0x8b10, 0x0000ff0f, 0x00000000,
409 	0x28a4c, 0x07ffffff, 0x4e000000,
410 	0x28350, 0x3f3f3fff, 0x2a00126a,
411 	0x30, 0x000000ff, 0x0040,
412 	0x34, 0x00000040, 0x00004040,
413 	0x9100, 0x07ffffff, 0x03000000,
414 	0x8e88, 0x01ff1f3f, 0x00000000,
415 	0x8e84, 0x01ff1f3f, 0x00000000,
416 	0x9060, 0x0000007f, 0x00000020,
417 	0x9508, 0x00010000, 0x00010000,
418 	0xac14, 0x00000200, 0x000002fb,
419 	0xac10, 0xffffffff, 0x0000543b,
420 	0xac0c, 0xffffffff, 0xa9210876,
421 	0x88d0, 0xffffffff, 0x000fff40,
422 	0x88d4, 0x0000001f, 0x00000010,
423 	0x1410, 0x20000000, 0x20fffed8,
424 	0x15c0, 0x000c0fc0, 0x000c0400
425 };
426 
427 static const u32 tahiti_golden_registers2[] = {
428 	0xc64, 0x00000001, 0x00000001
429 };
430 
431 static const u32 pitcairn_golden_rlc_registers[] = {
432 	0xc424, 0xffffffff, 0x00601004,
433 	0xc47c, 0xffffffff, 0x10102020,
434 	0xc488, 0xffffffff, 0x01000020,
435 	0xc314, 0xffffffff, 0x00000800,
436 	0xc30c, 0xffffffff, 0x800000a4
437 };
438 
439 static const u32 pitcairn_golden_registers[] = {
440 	0x9a10, 0x00010000, 0x00018208,
441 	0x9830, 0xffffffff, 0x00000000,
442 	0x9834, 0xf00fffff, 0x00000400,
443 	0x9838, 0x0002021c, 0x00020200,
444 	0xc78, 0x00000080, 0x00000000,
445 	0xd030, 0x000300c0, 0x00800040,
446 	0xd830, 0x000300c0, 0x00800040,
447 	0x5bb0, 0x000000f0, 0x00000070,
448 	0x5bc0, 0x00200000, 0x50100000,
449 	0x7030, 0x31000311, 0x00000011,
450 	0x2ae4, 0x00073ffe, 0x000022a2,
451 	0x240c, 0x000007ff, 0x00000000,
452 	0x8a14, 0xf000001f, 0x00000007,
453 	0x8b24, 0xffffffff, 0x00ffffff,
454 	0x8b10, 0x0000ff0f, 0x00000000,
455 	0x28a4c, 0x07ffffff, 0x4e000000,
456 	0x28350, 0x3f3f3fff, 0x2a00126a,
457 	0x30, 0x000000ff, 0x0040,
458 	0x34, 0x00000040, 0x00004040,
459 	0x9100, 0x07ffffff, 0x03000000,
460 	0x9060, 0x0000007f, 0x00000020,
461 	0x9508, 0x00010000, 0x00010000,
462 	0xac14, 0x000003ff, 0x000000f7,
463 	0xac10, 0xffffffff, 0x00000000,
464 	0xac0c, 0xffffffff, 0x32761054,
465 	0x88d4, 0x0000001f, 0x00000010,
466 	0x15c0, 0x000c0fc0, 0x000c0400
467 };
468 
469 static const u32 verde_golden_rlc_registers[] = {
470 	0xc424, 0xffffffff, 0x033f1005,
471 	0xc47c, 0xffffffff, 0x10808020,
472 	0xc488, 0xffffffff, 0x00800008,
473 	0xc314, 0xffffffff, 0x00001000,
474 	0xc30c, 0xffffffff, 0x80010014
475 };
476 
477 static const u32 verde_golden_registers[] = {
478 	0x9a10, 0x00010000, 0x00018208,
479 	0x9830, 0xffffffff, 0x00000000,
480 	0x9834, 0xf00fffff, 0x00000400,
481 	0x9838, 0x0002021c, 0x00020200,
482 	0xc78, 0x00000080, 0x00000000,
483 	0xd030, 0x000300c0, 0x00800040,
484 	0xd030, 0x000300c0, 0x00800040,
485 	0xd830, 0x000300c0, 0x00800040,
486 	0xd830, 0x000300c0, 0x00800040,
487 	0x5bb0, 0x000000f0, 0x00000070,
488 	0x5bc0, 0x00200000, 0x50100000,
489 	0x7030, 0x31000311, 0x00000011,
490 	0x2ae4, 0x00073ffe, 0x000022a2,
491 	0x2ae4, 0x00073ffe, 0x000022a2,
492 	0x2ae4, 0x00073ffe, 0x000022a2,
493 	0x240c, 0x000007ff, 0x00000000,
494 	0x240c, 0x000007ff, 0x00000000,
495 	0x240c, 0x000007ff, 0x00000000,
496 	0x8a14, 0xf000001f, 0x00000007,
497 	0x8a14, 0xf000001f, 0x00000007,
498 	0x8a14, 0xf000001f, 0x00000007,
499 	0x8b24, 0xffffffff, 0x00ffffff,
500 	0x8b10, 0x0000ff0f, 0x00000000,
501 	0x28a4c, 0x07ffffff, 0x4e000000,
502 	0x28350, 0x3f3f3fff, 0x0000124a,
503 	0x28350, 0x3f3f3fff, 0x0000124a,
504 	0x28350, 0x3f3f3fff, 0x0000124a,
505 	0x30, 0x000000ff, 0x0040,
506 	0x34, 0x00000040, 0x00004040,
507 	0x9100, 0x07ffffff, 0x03000000,
508 	0x9100, 0x07ffffff, 0x03000000,
509 	0x8e88, 0x01ff1f3f, 0x00000000,
510 	0x8e88, 0x01ff1f3f, 0x00000000,
511 	0x8e88, 0x01ff1f3f, 0x00000000,
512 	0x8e84, 0x01ff1f3f, 0x00000000,
513 	0x8e84, 0x01ff1f3f, 0x00000000,
514 	0x8e84, 0x01ff1f3f, 0x00000000,
515 	0x9060, 0x0000007f, 0x00000020,
516 	0x9508, 0x00010000, 0x00010000,
517 	0xac14, 0x000003ff, 0x00000003,
518 	0xac14, 0x000003ff, 0x00000003,
519 	0xac14, 0x000003ff, 0x00000003,
520 	0xac10, 0xffffffff, 0x00000000,
521 	0xac10, 0xffffffff, 0x00000000,
522 	0xac10, 0xffffffff, 0x00000000,
523 	0xac0c, 0xffffffff, 0x00001032,
524 	0xac0c, 0xffffffff, 0x00001032,
525 	0xac0c, 0xffffffff, 0x00001032,
526 	0x88d4, 0x0000001f, 0x00000010,
527 	0x88d4, 0x0000001f, 0x00000010,
528 	0x88d4, 0x0000001f, 0x00000010,
529 	0x15c0, 0x000c0fc0, 0x000c0400
530 };
531 
532 static const u32 oland_golden_rlc_registers[] = {
533 	0xc424, 0xffffffff, 0x00601005,
534 	0xc47c, 0xffffffff, 0x10104040,
535 	0xc488, 0xffffffff, 0x0100000a,
536 	0xc314, 0xffffffff, 0x00000800,
537 	0xc30c, 0xffffffff, 0x800000f4
538 };
539 
540 static const u32 oland_golden_registers[] = {
541 	0x9a10, 0x00010000, 0x00018208,
542 	0x9830, 0xffffffff, 0x00000000,
543 	0x9834, 0xf00fffff, 0x00000400,
544 	0x9838, 0x0002021c, 0x00020200,
545 	0xc78, 0x00000080, 0x00000000,
546 	0xd030, 0x000300c0, 0x00800040,
547 	0xd830, 0x000300c0, 0x00800040,
548 	0x5bb0, 0x000000f0, 0x00000070,
549 	0x5bc0, 0x00200000, 0x50100000,
550 	0x7030, 0x31000311, 0x00000011,
551 	0x2ae4, 0x00073ffe, 0x000022a2,
552 	0x240c, 0x000007ff, 0x00000000,
553 	0x8a14, 0xf000001f, 0x00000007,
554 	0x8b24, 0xffffffff, 0x00ffffff,
555 	0x8b10, 0x0000ff0f, 0x00000000,
556 	0x28a4c, 0x07ffffff, 0x4e000000,
557 	0x28350, 0x3f3f3fff, 0x00000082,
558 	0x30, 0x000000ff, 0x0040,
559 	0x34, 0x00000040, 0x00004040,
560 	0x9100, 0x07ffffff, 0x03000000,
561 	0x9060, 0x0000007f, 0x00000020,
562 	0x9508, 0x00010000, 0x00010000,
563 	0xac14, 0x000003ff, 0x000000f3,
564 	0xac10, 0xffffffff, 0x00000000,
565 	0xac0c, 0xffffffff, 0x00003210,
566 	0x88d4, 0x0000001f, 0x00000010,
567 	0x15c0, 0x000c0fc0, 0x000c0400
568 };
569 
570 static const u32 hainan_golden_registers[] = {
571 	0x9a10, 0x00010000, 0x00018208,
572 	0x9830, 0xffffffff, 0x00000000,
573 	0x9834, 0xf00fffff, 0x00000400,
574 	0x9838, 0x0002021c, 0x00020200,
575 	0xd0c0, 0xff000fff, 0x00000100,
576 	0xd030, 0x000300c0, 0x00800040,
577 	0xd8c0, 0xff000fff, 0x00000100,
578 	0xd830, 0x000300c0, 0x00800040,
579 	0x2ae4, 0x00073ffe, 0x000022a2,
580 	0x240c, 0x000007ff, 0x00000000,
581 	0x8a14, 0xf000001f, 0x00000007,
582 	0x8b24, 0xffffffff, 0x00ffffff,
583 	0x8b10, 0x0000ff0f, 0x00000000,
584 	0x28a4c, 0x07ffffff, 0x4e000000,
585 	0x28350, 0x3f3f3fff, 0x00000000,
586 	0x30, 0x000000ff, 0x0040,
587 	0x34, 0x00000040, 0x00004040,
588 	0x9100, 0x03e00000, 0x03600000,
589 	0x9060, 0x0000007f, 0x00000020,
590 	0x9508, 0x00010000, 0x00010000,
591 	0xac14, 0x000003ff, 0x000000f1,
592 	0xac10, 0xffffffff, 0x00000000,
593 	0xac0c, 0xffffffff, 0x00003210,
594 	0x88d4, 0x0000001f, 0x00000010,
595 	0x15c0, 0x000c0fc0, 0x000c0400
596 };
597 
598 static const u32 hainan_golden_registers2[] = {
599 	0x98f8, 0xffffffff, 0x02010001
600 };
601 
602 static const u32 tahiti_mgcg_cgcg_init[] = {
603 	0xc400, 0xffffffff, 0xfffffffc,
604 	0x802c, 0xffffffff, 0xe0000000,
605 	0x9a60, 0xffffffff, 0x00000100,
606 	0x92a4, 0xffffffff, 0x00000100,
607 	0xc164, 0xffffffff, 0x00000100,
608 	0x9774, 0xffffffff, 0x00000100,
609 	0x8984, 0xffffffff, 0x06000100,
610 	0x8a18, 0xffffffff, 0x00000100,
611 	0x92a0, 0xffffffff, 0x00000100,
612 	0xc380, 0xffffffff, 0x00000100,
613 	0x8b28, 0xffffffff, 0x00000100,
614 	0x9144, 0xffffffff, 0x00000100,
615 	0x8d88, 0xffffffff, 0x00000100,
616 	0x8d8c, 0xffffffff, 0x00000100,
617 	0x9030, 0xffffffff, 0x00000100,
618 	0x9034, 0xffffffff, 0x00000100,
619 	0x9038, 0xffffffff, 0x00000100,
620 	0x903c, 0xffffffff, 0x00000100,
621 	0xad80, 0xffffffff, 0x00000100,
622 	0xac54, 0xffffffff, 0x00000100,
623 	0x897c, 0xffffffff, 0x06000100,
624 	0x9868, 0xffffffff, 0x00000100,
625 	0x9510, 0xffffffff, 0x00000100,
626 	0xaf04, 0xffffffff, 0x00000100,
627 	0xae04, 0xffffffff, 0x00000100,
628 	0x949c, 0xffffffff, 0x00000100,
629 	0x802c, 0xffffffff, 0xe0000000,
630 	0x9160, 0xffffffff, 0x00010000,
631 	0x9164, 0xffffffff, 0x00030002,
632 	0x9168, 0xffffffff, 0x00040007,
633 	0x916c, 0xffffffff, 0x00060005,
634 	0x9170, 0xffffffff, 0x00090008,
635 	0x9174, 0xffffffff, 0x00020001,
636 	0x9178, 0xffffffff, 0x00040003,
637 	0x917c, 0xffffffff, 0x00000007,
638 	0x9180, 0xffffffff, 0x00060005,
639 	0x9184, 0xffffffff, 0x00090008,
640 	0x9188, 0xffffffff, 0x00030002,
641 	0x918c, 0xffffffff, 0x00050004,
642 	0x9190, 0xffffffff, 0x00000008,
643 	0x9194, 0xffffffff, 0x00070006,
644 	0x9198, 0xffffffff, 0x000a0009,
645 	0x919c, 0xffffffff, 0x00040003,
646 	0x91a0, 0xffffffff, 0x00060005,
647 	0x91a4, 0xffffffff, 0x00000009,
648 	0x91a8, 0xffffffff, 0x00080007,
649 	0x91ac, 0xffffffff, 0x000b000a,
650 	0x91b0, 0xffffffff, 0x00050004,
651 	0x91b4, 0xffffffff, 0x00070006,
652 	0x91b8, 0xffffffff, 0x0008000b,
653 	0x91bc, 0xffffffff, 0x000a0009,
654 	0x91c0, 0xffffffff, 0x000d000c,
655 	0x91c4, 0xffffffff, 0x00060005,
656 	0x91c8, 0xffffffff, 0x00080007,
657 	0x91cc, 0xffffffff, 0x0000000b,
658 	0x91d0, 0xffffffff, 0x000a0009,
659 	0x91d4, 0xffffffff, 0x000d000c,
660 	0x91d8, 0xffffffff, 0x00070006,
661 	0x91dc, 0xffffffff, 0x00090008,
662 	0x91e0, 0xffffffff, 0x0000000c,
663 	0x91e4, 0xffffffff, 0x000b000a,
664 	0x91e8, 0xffffffff, 0x000e000d,
665 	0x91ec, 0xffffffff, 0x00080007,
666 	0x91f0, 0xffffffff, 0x000a0009,
667 	0x91f4, 0xffffffff, 0x0000000d,
668 	0x91f8, 0xffffffff, 0x000c000b,
669 	0x91fc, 0xffffffff, 0x000f000e,
670 	0x9200, 0xffffffff, 0x00090008,
671 	0x9204, 0xffffffff, 0x000b000a,
672 	0x9208, 0xffffffff, 0x000c000f,
673 	0x920c, 0xffffffff, 0x000e000d,
674 	0x9210, 0xffffffff, 0x00110010,
675 	0x9214, 0xffffffff, 0x000a0009,
676 	0x9218, 0xffffffff, 0x000c000b,
677 	0x921c, 0xffffffff, 0x0000000f,
678 	0x9220, 0xffffffff, 0x000e000d,
679 	0x9224, 0xffffffff, 0x00110010,
680 	0x9228, 0xffffffff, 0x000b000a,
681 	0x922c, 0xffffffff, 0x000d000c,
682 	0x9230, 0xffffffff, 0x00000010,
683 	0x9234, 0xffffffff, 0x000f000e,
684 	0x9238, 0xffffffff, 0x00120011,
685 	0x923c, 0xffffffff, 0x000c000b,
686 	0x9240, 0xffffffff, 0x000e000d,
687 	0x9244, 0xffffffff, 0x00000011,
688 	0x9248, 0xffffffff, 0x0010000f,
689 	0x924c, 0xffffffff, 0x00130012,
690 	0x9250, 0xffffffff, 0x000d000c,
691 	0x9254, 0xffffffff, 0x000f000e,
692 	0x9258, 0xffffffff, 0x00100013,
693 	0x925c, 0xffffffff, 0x00120011,
694 	0x9260, 0xffffffff, 0x00150014,
695 	0x9264, 0xffffffff, 0x000e000d,
696 	0x9268, 0xffffffff, 0x0010000f,
697 	0x926c, 0xffffffff, 0x00000013,
698 	0x9270, 0xffffffff, 0x00120011,
699 	0x9274, 0xffffffff, 0x00150014,
700 	0x9278, 0xffffffff, 0x000f000e,
701 	0x927c, 0xffffffff, 0x00110010,
702 	0x9280, 0xffffffff, 0x00000014,
703 	0x9284, 0xffffffff, 0x00130012,
704 	0x9288, 0xffffffff, 0x00160015,
705 	0x928c, 0xffffffff, 0x0010000f,
706 	0x9290, 0xffffffff, 0x00120011,
707 	0x9294, 0xffffffff, 0x00000015,
708 	0x9298, 0xffffffff, 0x00140013,
709 	0x929c, 0xffffffff, 0x00170016,
710 	0x9150, 0xffffffff, 0x96940200,
711 	0x8708, 0xffffffff, 0x00900100,
712 	0xc478, 0xffffffff, 0x00000080,
713 	0xc404, 0xffffffff, 0x0020003f,
714 	0x30, 0xffffffff, 0x0000001c,
715 	0x34, 0x000f0000, 0x000f0000,
716 	0x160c, 0xffffffff, 0x00000100,
717 	0x1024, 0xffffffff, 0x00000100,
718 	0x102c, 0x00000101, 0x00000000,
719 	0x20a8, 0xffffffff, 0x00000104,
720 	0x264c, 0x000c0000, 0x000c0000,
721 	0x2648, 0x000c0000, 0x000c0000,
722 	0x55e4, 0xff000fff, 0x00000100,
723 	0x55e8, 0x00000001, 0x00000001,
724 	0x2f50, 0x00000001, 0x00000001,
725 	0x30cc, 0xc0000fff, 0x00000104,
726 	0xc1e4, 0x00000001, 0x00000001,
727 	0xd0c0, 0xfffffff0, 0x00000100,
728 	0xd8c0, 0xfffffff0, 0x00000100
729 };
730 
731 static const u32 pitcairn_mgcg_cgcg_init[] = {
732 	0xc400, 0xffffffff, 0xfffffffc,
733 	0x802c, 0xffffffff, 0xe0000000,
734 	0x9a60, 0xffffffff, 0x00000100,
735 	0x92a4, 0xffffffff, 0x00000100,
736 	0xc164, 0xffffffff, 0x00000100,
737 	0x9774, 0xffffffff, 0x00000100,
738 	0x8984, 0xffffffff, 0x06000100,
739 	0x8a18, 0xffffffff, 0x00000100,
740 	0x92a0, 0xffffffff, 0x00000100,
741 	0xc380, 0xffffffff, 0x00000100,
742 	0x8b28, 0xffffffff, 0x00000100,
743 	0x9144, 0xffffffff, 0x00000100,
744 	0x8d88, 0xffffffff, 0x00000100,
745 	0x8d8c, 0xffffffff, 0x00000100,
746 	0x9030, 0xffffffff, 0x00000100,
747 	0x9034, 0xffffffff, 0x00000100,
748 	0x9038, 0xffffffff, 0x00000100,
749 	0x903c, 0xffffffff, 0x00000100,
750 	0xad80, 0xffffffff, 0x00000100,
751 	0xac54, 0xffffffff, 0x00000100,
752 	0x897c, 0xffffffff, 0x06000100,
753 	0x9868, 0xffffffff, 0x00000100,
754 	0x9510, 0xffffffff, 0x00000100,
755 	0xaf04, 0xffffffff, 0x00000100,
756 	0xae04, 0xffffffff, 0x00000100,
757 	0x949c, 0xffffffff, 0x00000100,
758 	0x802c, 0xffffffff, 0xe0000000,
759 	0x9160, 0xffffffff, 0x00010000,
760 	0x9164, 0xffffffff, 0x00030002,
761 	0x9168, 0xffffffff, 0x00040007,
762 	0x916c, 0xffffffff, 0x00060005,
763 	0x9170, 0xffffffff, 0x00090008,
764 	0x9174, 0xffffffff, 0x00020001,
765 	0x9178, 0xffffffff, 0x00040003,
766 	0x917c, 0xffffffff, 0x00000007,
767 	0x9180, 0xffffffff, 0x00060005,
768 	0x9184, 0xffffffff, 0x00090008,
769 	0x9188, 0xffffffff, 0x00030002,
770 	0x918c, 0xffffffff, 0x00050004,
771 	0x9190, 0xffffffff, 0x00000008,
772 	0x9194, 0xffffffff, 0x00070006,
773 	0x9198, 0xffffffff, 0x000a0009,
774 	0x919c, 0xffffffff, 0x00040003,
775 	0x91a0, 0xffffffff, 0x00060005,
776 	0x91a4, 0xffffffff, 0x00000009,
777 	0x91a8, 0xffffffff, 0x00080007,
778 	0x91ac, 0xffffffff, 0x000b000a,
779 	0x91b0, 0xffffffff, 0x00050004,
780 	0x91b4, 0xffffffff, 0x00070006,
781 	0x91b8, 0xffffffff, 0x0008000b,
782 	0x91bc, 0xffffffff, 0x000a0009,
783 	0x91c0, 0xffffffff, 0x000d000c,
784 	0x9200, 0xffffffff, 0x00090008,
785 	0x9204, 0xffffffff, 0x000b000a,
786 	0x9208, 0xffffffff, 0x000c000f,
787 	0x920c, 0xffffffff, 0x000e000d,
788 	0x9210, 0xffffffff, 0x00110010,
789 	0x9214, 0xffffffff, 0x000a0009,
790 	0x9218, 0xffffffff, 0x000c000b,
791 	0x921c, 0xffffffff, 0x0000000f,
792 	0x9220, 0xffffffff, 0x000e000d,
793 	0x9224, 0xffffffff, 0x00110010,
794 	0x9228, 0xffffffff, 0x000b000a,
795 	0x922c, 0xffffffff, 0x000d000c,
796 	0x9230, 0xffffffff, 0x00000010,
797 	0x9234, 0xffffffff, 0x000f000e,
798 	0x9238, 0xffffffff, 0x00120011,
799 	0x923c, 0xffffffff, 0x000c000b,
800 	0x9240, 0xffffffff, 0x000e000d,
801 	0x9244, 0xffffffff, 0x00000011,
802 	0x9248, 0xffffffff, 0x0010000f,
803 	0x924c, 0xffffffff, 0x00130012,
804 	0x9250, 0xffffffff, 0x000d000c,
805 	0x9254, 0xffffffff, 0x000f000e,
806 	0x9258, 0xffffffff, 0x00100013,
807 	0x925c, 0xffffffff, 0x00120011,
808 	0x9260, 0xffffffff, 0x00150014,
809 	0x9150, 0xffffffff, 0x96940200,
810 	0x8708, 0xffffffff, 0x00900100,
811 	0xc478, 0xffffffff, 0x00000080,
812 	0xc404, 0xffffffff, 0x0020003f,
813 	0x30, 0xffffffff, 0x0000001c,
814 	0x34, 0x000f0000, 0x000f0000,
815 	0x160c, 0xffffffff, 0x00000100,
816 	0x1024, 0xffffffff, 0x00000100,
817 	0x102c, 0x00000101, 0x00000000,
818 	0x20a8, 0xffffffff, 0x00000104,
819 	0x55e4, 0xff000fff, 0x00000100,
820 	0x55e8, 0x00000001, 0x00000001,
821 	0x2f50, 0x00000001, 0x00000001,
822 	0x30cc, 0xc0000fff, 0x00000104,
823 	0xc1e4, 0x00000001, 0x00000001,
824 	0xd0c0, 0xfffffff0, 0x00000100,
825 	0xd8c0, 0xfffffff0, 0x00000100
826 };
827 
828 static const u32 verde_mgcg_cgcg_init[] = {
829 	0xc400, 0xffffffff, 0xfffffffc,
830 	0x802c, 0xffffffff, 0xe0000000,
831 	0x9a60, 0xffffffff, 0x00000100,
832 	0x92a4, 0xffffffff, 0x00000100,
833 	0xc164, 0xffffffff, 0x00000100,
834 	0x9774, 0xffffffff, 0x00000100,
835 	0x8984, 0xffffffff, 0x06000100,
836 	0x8a18, 0xffffffff, 0x00000100,
837 	0x92a0, 0xffffffff, 0x00000100,
838 	0xc380, 0xffffffff, 0x00000100,
839 	0x8b28, 0xffffffff, 0x00000100,
840 	0x9144, 0xffffffff, 0x00000100,
841 	0x8d88, 0xffffffff, 0x00000100,
842 	0x8d8c, 0xffffffff, 0x00000100,
843 	0x9030, 0xffffffff, 0x00000100,
844 	0x9034, 0xffffffff, 0x00000100,
845 	0x9038, 0xffffffff, 0x00000100,
846 	0x903c, 0xffffffff, 0x00000100,
847 	0xad80, 0xffffffff, 0x00000100,
848 	0xac54, 0xffffffff, 0x00000100,
849 	0x897c, 0xffffffff, 0x06000100,
850 	0x9868, 0xffffffff, 0x00000100,
851 	0x9510, 0xffffffff, 0x00000100,
852 	0xaf04, 0xffffffff, 0x00000100,
853 	0xae04, 0xffffffff, 0x00000100,
854 	0x949c, 0xffffffff, 0x00000100,
855 	0x802c, 0xffffffff, 0xe0000000,
856 	0x9160, 0xffffffff, 0x00010000,
857 	0x9164, 0xffffffff, 0x00030002,
858 	0x9168, 0xffffffff, 0x00040007,
859 	0x916c, 0xffffffff, 0x00060005,
860 	0x9170, 0xffffffff, 0x00090008,
861 	0x9174, 0xffffffff, 0x00020001,
862 	0x9178, 0xffffffff, 0x00040003,
863 	0x917c, 0xffffffff, 0x00000007,
864 	0x9180, 0xffffffff, 0x00060005,
865 	0x9184, 0xffffffff, 0x00090008,
866 	0x9188, 0xffffffff, 0x00030002,
867 	0x918c, 0xffffffff, 0x00050004,
868 	0x9190, 0xffffffff, 0x00000008,
869 	0x9194, 0xffffffff, 0x00070006,
870 	0x9198, 0xffffffff, 0x000a0009,
871 	0x919c, 0xffffffff, 0x00040003,
872 	0x91a0, 0xffffffff, 0x00060005,
873 	0x91a4, 0xffffffff, 0x00000009,
874 	0x91a8, 0xffffffff, 0x00080007,
875 	0x91ac, 0xffffffff, 0x000b000a,
876 	0x91b0, 0xffffffff, 0x00050004,
877 	0x91b4, 0xffffffff, 0x00070006,
878 	0x91b8, 0xffffffff, 0x0008000b,
879 	0x91bc, 0xffffffff, 0x000a0009,
880 	0x91c0, 0xffffffff, 0x000d000c,
881 	0x9200, 0xffffffff, 0x00090008,
882 	0x9204, 0xffffffff, 0x000b000a,
883 	0x9208, 0xffffffff, 0x000c000f,
884 	0x920c, 0xffffffff, 0x000e000d,
885 	0x9210, 0xffffffff, 0x00110010,
886 	0x9214, 0xffffffff, 0x000a0009,
887 	0x9218, 0xffffffff, 0x000c000b,
888 	0x921c, 0xffffffff, 0x0000000f,
889 	0x9220, 0xffffffff, 0x000e000d,
890 	0x9224, 0xffffffff, 0x00110010,
891 	0x9228, 0xffffffff, 0x000b000a,
892 	0x922c, 0xffffffff, 0x000d000c,
893 	0x9230, 0xffffffff, 0x00000010,
894 	0x9234, 0xffffffff, 0x000f000e,
895 	0x9238, 0xffffffff, 0x00120011,
896 	0x923c, 0xffffffff, 0x000c000b,
897 	0x9240, 0xffffffff, 0x000e000d,
898 	0x9244, 0xffffffff, 0x00000011,
899 	0x9248, 0xffffffff, 0x0010000f,
900 	0x924c, 0xffffffff, 0x00130012,
901 	0x9250, 0xffffffff, 0x000d000c,
902 	0x9254, 0xffffffff, 0x000f000e,
903 	0x9258, 0xffffffff, 0x00100013,
904 	0x925c, 0xffffffff, 0x00120011,
905 	0x9260, 0xffffffff, 0x00150014,
906 	0x9150, 0xffffffff, 0x96940200,
907 	0x8708, 0xffffffff, 0x00900100,
908 	0xc478, 0xffffffff, 0x00000080,
909 	0xc404, 0xffffffff, 0x0020003f,
910 	0x30, 0xffffffff, 0x0000001c,
911 	0x34, 0x000f0000, 0x000f0000,
912 	0x160c, 0xffffffff, 0x00000100,
913 	0x1024, 0xffffffff, 0x00000100,
914 	0x102c, 0x00000101, 0x00000000,
915 	0x20a8, 0xffffffff, 0x00000104,
916 	0x264c, 0x000c0000, 0x000c0000,
917 	0x2648, 0x000c0000, 0x000c0000,
918 	0x55e4, 0xff000fff, 0x00000100,
919 	0x55e8, 0x00000001, 0x00000001,
920 	0x2f50, 0x00000001, 0x00000001,
921 	0x30cc, 0xc0000fff, 0x00000104,
922 	0xc1e4, 0x00000001, 0x00000001,
923 	0xd0c0, 0xfffffff0, 0x00000100,
924 	0xd8c0, 0xfffffff0, 0x00000100
925 };
926 
927 static const u32 oland_mgcg_cgcg_init[] = {
928 	0xc400, 0xffffffff, 0xfffffffc,
929 	0x802c, 0xffffffff, 0xe0000000,
930 	0x9a60, 0xffffffff, 0x00000100,
931 	0x92a4, 0xffffffff, 0x00000100,
932 	0xc164, 0xffffffff, 0x00000100,
933 	0x9774, 0xffffffff, 0x00000100,
934 	0x8984, 0xffffffff, 0x06000100,
935 	0x8a18, 0xffffffff, 0x00000100,
936 	0x92a0, 0xffffffff, 0x00000100,
937 	0xc380, 0xffffffff, 0x00000100,
938 	0x8b28, 0xffffffff, 0x00000100,
939 	0x9144, 0xffffffff, 0x00000100,
940 	0x8d88, 0xffffffff, 0x00000100,
941 	0x8d8c, 0xffffffff, 0x00000100,
942 	0x9030, 0xffffffff, 0x00000100,
943 	0x9034, 0xffffffff, 0x00000100,
944 	0x9038, 0xffffffff, 0x00000100,
945 	0x903c, 0xffffffff, 0x00000100,
946 	0xad80, 0xffffffff, 0x00000100,
947 	0xac54, 0xffffffff, 0x00000100,
948 	0x897c, 0xffffffff, 0x06000100,
949 	0x9868, 0xffffffff, 0x00000100,
950 	0x9510, 0xffffffff, 0x00000100,
951 	0xaf04, 0xffffffff, 0x00000100,
952 	0xae04, 0xffffffff, 0x00000100,
953 	0x949c, 0xffffffff, 0x00000100,
954 	0x802c, 0xffffffff, 0xe0000000,
955 	0x9160, 0xffffffff, 0x00010000,
956 	0x9164, 0xffffffff, 0x00030002,
957 	0x9168, 0xffffffff, 0x00040007,
958 	0x916c, 0xffffffff, 0x00060005,
959 	0x9170, 0xffffffff, 0x00090008,
960 	0x9174, 0xffffffff, 0x00020001,
961 	0x9178, 0xffffffff, 0x00040003,
962 	0x917c, 0xffffffff, 0x00000007,
963 	0x9180, 0xffffffff, 0x00060005,
964 	0x9184, 0xffffffff, 0x00090008,
965 	0x9188, 0xffffffff, 0x00030002,
966 	0x918c, 0xffffffff, 0x00050004,
967 	0x9190, 0xffffffff, 0x00000008,
968 	0x9194, 0xffffffff, 0x00070006,
969 	0x9198, 0xffffffff, 0x000a0009,
970 	0x919c, 0xffffffff, 0x00040003,
971 	0x91a0, 0xffffffff, 0x00060005,
972 	0x91a4, 0xffffffff, 0x00000009,
973 	0x91a8, 0xffffffff, 0x00080007,
974 	0x91ac, 0xffffffff, 0x000b000a,
975 	0x91b0, 0xffffffff, 0x00050004,
976 	0x91b4, 0xffffffff, 0x00070006,
977 	0x91b8, 0xffffffff, 0x0008000b,
978 	0x91bc, 0xffffffff, 0x000a0009,
979 	0x91c0, 0xffffffff, 0x000d000c,
980 	0x91c4, 0xffffffff, 0x00060005,
981 	0x91c8, 0xffffffff, 0x00080007,
982 	0x91cc, 0xffffffff, 0x0000000b,
983 	0x91d0, 0xffffffff, 0x000a0009,
984 	0x91d4, 0xffffffff, 0x000d000c,
985 	0x9150, 0xffffffff, 0x96940200,
986 	0x8708, 0xffffffff, 0x00900100,
987 	0xc478, 0xffffffff, 0x00000080,
988 	0xc404, 0xffffffff, 0x0020003f,
989 	0x30, 0xffffffff, 0x0000001c,
990 	0x34, 0x000f0000, 0x000f0000,
991 	0x160c, 0xffffffff, 0x00000100,
992 	0x1024, 0xffffffff, 0x00000100,
993 	0x102c, 0x00000101, 0x00000000,
994 	0x20a8, 0xffffffff, 0x00000104,
995 	0x264c, 0x000c0000, 0x000c0000,
996 	0x2648, 0x000c0000, 0x000c0000,
997 	0x55e4, 0xff000fff, 0x00000100,
998 	0x55e8, 0x00000001, 0x00000001,
999 	0x2f50, 0x00000001, 0x00000001,
1000 	0x30cc, 0xc0000fff, 0x00000104,
1001 	0xc1e4, 0x00000001, 0x00000001,
1002 	0xd0c0, 0xfffffff0, 0x00000100,
1003 	0xd8c0, 0xfffffff0, 0x00000100
1004 };
1005 
1006 static const u32 hainan_mgcg_cgcg_init[] = {
1007 	0xc400, 0xffffffff, 0xfffffffc,
1008 	0x802c, 0xffffffff, 0xe0000000,
1009 	0x9a60, 0xffffffff, 0x00000100,
1010 	0x92a4, 0xffffffff, 0x00000100,
1011 	0xc164, 0xffffffff, 0x00000100,
1012 	0x9774, 0xffffffff, 0x00000100,
1013 	0x8984, 0xffffffff, 0x06000100,
1014 	0x8a18, 0xffffffff, 0x00000100,
1015 	0x92a0, 0xffffffff, 0x00000100,
1016 	0xc380, 0xffffffff, 0x00000100,
1017 	0x8b28, 0xffffffff, 0x00000100,
1018 	0x9144, 0xffffffff, 0x00000100,
1019 	0x8d88, 0xffffffff, 0x00000100,
1020 	0x8d8c, 0xffffffff, 0x00000100,
1021 	0x9030, 0xffffffff, 0x00000100,
1022 	0x9034, 0xffffffff, 0x00000100,
1023 	0x9038, 0xffffffff, 0x00000100,
1024 	0x903c, 0xffffffff, 0x00000100,
1025 	0xad80, 0xffffffff, 0x00000100,
1026 	0xac54, 0xffffffff, 0x00000100,
1027 	0x897c, 0xffffffff, 0x06000100,
1028 	0x9868, 0xffffffff, 0x00000100,
1029 	0x9510, 0xffffffff, 0x00000100,
1030 	0xaf04, 0xffffffff, 0x00000100,
1031 	0xae04, 0xffffffff, 0x00000100,
1032 	0x949c, 0xffffffff, 0x00000100,
1033 	0x802c, 0xffffffff, 0xe0000000,
1034 	0x9160, 0xffffffff, 0x00010000,
1035 	0x9164, 0xffffffff, 0x00030002,
1036 	0x9168, 0xffffffff, 0x00040007,
1037 	0x916c, 0xffffffff, 0x00060005,
1038 	0x9170, 0xffffffff, 0x00090008,
1039 	0x9174, 0xffffffff, 0x00020001,
1040 	0x9178, 0xffffffff, 0x00040003,
1041 	0x917c, 0xffffffff, 0x00000007,
1042 	0x9180, 0xffffffff, 0x00060005,
1043 	0x9184, 0xffffffff, 0x00090008,
1044 	0x9188, 0xffffffff, 0x00030002,
1045 	0x918c, 0xffffffff, 0x00050004,
1046 	0x9190, 0xffffffff, 0x00000008,
1047 	0x9194, 0xffffffff, 0x00070006,
1048 	0x9198, 0xffffffff, 0x000a0009,
1049 	0x919c, 0xffffffff, 0x00040003,
1050 	0x91a0, 0xffffffff, 0x00060005,
1051 	0x91a4, 0xffffffff, 0x00000009,
1052 	0x91a8, 0xffffffff, 0x00080007,
1053 	0x91ac, 0xffffffff, 0x000b000a,
1054 	0x91b0, 0xffffffff, 0x00050004,
1055 	0x91b4, 0xffffffff, 0x00070006,
1056 	0x91b8, 0xffffffff, 0x0008000b,
1057 	0x91bc, 0xffffffff, 0x000a0009,
1058 	0x91c0, 0xffffffff, 0x000d000c,
1059 	0x91c4, 0xffffffff, 0x00060005,
1060 	0x91c8, 0xffffffff, 0x00080007,
1061 	0x91cc, 0xffffffff, 0x0000000b,
1062 	0x91d0, 0xffffffff, 0x000a0009,
1063 	0x91d4, 0xffffffff, 0x000d000c,
1064 	0x9150, 0xffffffff, 0x96940200,
1065 	0x8708, 0xffffffff, 0x00900100,
1066 	0xc478, 0xffffffff, 0x00000080,
1067 	0xc404, 0xffffffff, 0x0020003f,
1068 	0x30, 0xffffffff, 0x0000001c,
1069 	0x34, 0x000f0000, 0x000f0000,
1070 	0x160c, 0xffffffff, 0x00000100,
1071 	0x1024, 0xffffffff, 0x00000100,
1072 	0x20a8, 0xffffffff, 0x00000104,
1073 	0x264c, 0x000c0000, 0x000c0000,
1074 	0x2648, 0x000c0000, 0x000c0000,
1075 	0x2f50, 0x00000001, 0x00000001,
1076 	0x30cc, 0xc0000fff, 0x00000104,
1077 	0xc1e4, 0x00000001, 0x00000001,
1078 	0xd0c0, 0xfffffff0, 0x00000100,
1079 	0xd8c0, 0xfffffff0, 0x00000100
1080 };
1081 
1082 static u32 verde_pg_init[] = {
1083 	0x353c, 0xffffffff, 0x40000,
1084 	0x3538, 0xffffffff, 0x200010ff,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x0,
1088 	0x353c, 0xffffffff, 0x0,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x7007,
1091 	0x3538, 0xffffffff, 0x300010ff,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x0,
1095 	0x353c, 0xffffffff, 0x0,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x400000,
1098 	0x3538, 0xffffffff, 0x100010ff,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x0,
1102 	0x353c, 0xffffffff, 0x0,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x120200,
1105 	0x3538, 0xffffffff, 0x500010ff,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x0,
1109 	0x353c, 0xffffffff, 0x0,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x1e1e16,
1112 	0x3538, 0xffffffff, 0x600010ff,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x0,
1116 	0x353c, 0xffffffff, 0x0,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x171f1e,
1119 	0x3538, 0xffffffff, 0x700010ff,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x0,
1123 	0x353c, 0xffffffff, 0x0,
1124 	0x353c, 0xffffffff, 0x0,
1125 	0x353c, 0xffffffff, 0x0,
1126 	0x3538, 0xffffffff, 0x9ff,
1127 	0x3500, 0xffffffff, 0x0,
1128 	0x3504, 0xffffffff, 0x10000800,
1129 	0x3504, 0xffffffff, 0xf,
1130 	0x3504, 0xffffffff, 0xf,
1131 	0x3500, 0xffffffff, 0x4,
1132 	0x3504, 0xffffffff, 0x1000051e,
1133 	0x3504, 0xffffffff, 0xffff,
1134 	0x3504, 0xffffffff, 0xffff,
1135 	0x3500, 0xffffffff, 0x8,
1136 	0x3504, 0xffffffff, 0x80500,
1137 	0x3500, 0xffffffff, 0x12,
1138 	0x3504, 0xffffffff, 0x9050c,
1139 	0x3500, 0xffffffff, 0x1d,
1140 	0x3504, 0xffffffff, 0xb052c,
1141 	0x3500, 0xffffffff, 0x2a,
1142 	0x3504, 0xffffffff, 0x1053e,
1143 	0x3500, 0xffffffff, 0x2d,
1144 	0x3504, 0xffffffff, 0x10546,
1145 	0x3500, 0xffffffff, 0x30,
1146 	0x3504, 0xffffffff, 0xa054e,
1147 	0x3500, 0xffffffff, 0x3c,
1148 	0x3504, 0xffffffff, 0x1055f,
1149 	0x3500, 0xffffffff, 0x3f,
1150 	0x3504, 0xffffffff, 0x10567,
1151 	0x3500, 0xffffffff, 0x42,
1152 	0x3504, 0xffffffff, 0x1056f,
1153 	0x3500, 0xffffffff, 0x45,
1154 	0x3504, 0xffffffff, 0x10572,
1155 	0x3500, 0xffffffff, 0x48,
1156 	0x3504, 0xffffffff, 0x20575,
1157 	0x3500, 0xffffffff, 0x4c,
1158 	0x3504, 0xffffffff, 0x190801,
1159 	0x3500, 0xffffffff, 0x67,
1160 	0x3504, 0xffffffff, 0x1082a,
1161 	0x3500, 0xffffffff, 0x6a,
1162 	0x3504, 0xffffffff, 0x1b082d,
1163 	0x3500, 0xffffffff, 0x87,
1164 	0x3504, 0xffffffff, 0x310851,
1165 	0x3500, 0xffffffff, 0xba,
1166 	0x3504, 0xffffffff, 0x891,
1167 	0x3500, 0xffffffff, 0xbc,
1168 	0x3504, 0xffffffff, 0x893,
1169 	0x3500, 0xffffffff, 0xbe,
1170 	0x3504, 0xffffffff, 0x20895,
1171 	0x3500, 0xffffffff, 0xc2,
1172 	0x3504, 0xffffffff, 0x20899,
1173 	0x3500, 0xffffffff, 0xc6,
1174 	0x3504, 0xffffffff, 0x2089d,
1175 	0x3500, 0xffffffff, 0xca,
1176 	0x3504, 0xffffffff, 0x8a1,
1177 	0x3500, 0xffffffff, 0xcc,
1178 	0x3504, 0xffffffff, 0x8a3,
1179 	0x3500, 0xffffffff, 0xce,
1180 	0x3504, 0xffffffff, 0x308a5,
1181 	0x3500, 0xffffffff, 0xd3,
1182 	0x3504, 0xffffffff, 0x6d08cd,
1183 	0x3500, 0xffffffff, 0x142,
1184 	0x3504, 0xffffffff, 0x2000095a,
1185 	0x3504, 0xffffffff, 0x1,
1186 	0x3500, 0xffffffff, 0x144,
1187 	0x3504, 0xffffffff, 0x301f095b,
1188 	0x3500, 0xffffffff, 0x165,
1189 	0x3504, 0xffffffff, 0xc094d,
1190 	0x3500, 0xffffffff, 0x173,
1191 	0x3504, 0xffffffff, 0xf096d,
1192 	0x3500, 0xffffffff, 0x184,
1193 	0x3504, 0xffffffff, 0x15097f,
1194 	0x3500, 0xffffffff, 0x19b,
1195 	0x3504, 0xffffffff, 0xc0998,
1196 	0x3500, 0xffffffff, 0x1a9,
1197 	0x3504, 0xffffffff, 0x409a7,
1198 	0x3500, 0xffffffff, 0x1af,
1199 	0x3504, 0xffffffff, 0xcdc,
1200 	0x3500, 0xffffffff, 0x1b1,
1201 	0x3504, 0xffffffff, 0x800,
1202 	0x3508, 0xffffffff, 0x6c9b2000,
1203 	0x3510, 0xfc00, 0x2000,
1204 	0x3544, 0xffffffff, 0xfc0,
1205 	0x28d4, 0x00000100, 0x100
1206 };
1207 
1208 static void si_init_golden_registers(struct radeon_device *rdev)
1209 {
1210 	switch (rdev->family) {
1211 	case CHIP_TAHITI:
1212 		radeon_program_register_sequence(rdev,
1213 						 tahiti_golden_registers,
1214 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1215 		radeon_program_register_sequence(rdev,
1216 						 tahiti_golden_rlc_registers,
1217 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1218 		radeon_program_register_sequence(rdev,
1219 						 tahiti_mgcg_cgcg_init,
1220 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1221 		radeon_program_register_sequence(rdev,
1222 						 tahiti_golden_registers2,
1223 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1224 		break;
1225 	case CHIP_PITCAIRN:
1226 		radeon_program_register_sequence(rdev,
1227 						 pitcairn_golden_registers,
1228 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 pitcairn_golden_rlc_registers,
1231 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1232 		radeon_program_register_sequence(rdev,
1233 						 pitcairn_mgcg_cgcg_init,
1234 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1235 		break;
1236 	case CHIP_VERDE:
1237 		radeon_program_register_sequence(rdev,
1238 						 verde_golden_registers,
1239 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 verde_golden_rlc_registers,
1242 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1243 		radeon_program_register_sequence(rdev,
1244 						 verde_mgcg_cgcg_init,
1245 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1246 		radeon_program_register_sequence(rdev,
1247 						 verde_pg_init,
1248 						 (const u32)ARRAY_SIZE(verde_pg_init));
1249 		break;
1250 	case CHIP_OLAND:
1251 		radeon_program_register_sequence(rdev,
1252 						 oland_golden_registers,
1253 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 oland_golden_rlc_registers,
1256 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1257 		radeon_program_register_sequence(rdev,
1258 						 oland_mgcg_cgcg_init,
1259 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1260 		break;
1261 	case CHIP_HAINAN:
1262 		radeon_program_register_sequence(rdev,
1263 						 hainan_golden_registers,
1264 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1265 		radeon_program_register_sequence(rdev,
1266 						 hainan_golden_registers2,
1267 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1268 		radeon_program_register_sequence(rdev,
1269 						 hainan_mgcg_cgcg_init,
1270 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1271 		break;
1272 	default:
1273 		break;
1274 	}
1275 }
1276 
1277 /**
1278  * si_get_allowed_info_register - fetch the register for the info ioctl
1279  *
1280  * @rdev: radeon_device pointer
1281  * @reg: register offset in bytes
1282  * @val: register value
1283  *
1284  * Returns 0 for success or -EINVAL for an invalid register
1285  *
1286  */
1287 int si_get_allowed_info_register(struct radeon_device *rdev,
1288 				 u32 reg, u32 *val)
1289 {
1290 	switch (reg) {
1291 	case GRBM_STATUS:
1292 	case GRBM_STATUS2:
1293 	case GRBM_STATUS_SE0:
1294 	case GRBM_STATUS_SE1:
1295 	case SRBM_STATUS:
1296 	case SRBM_STATUS2:
1297 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1298 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1299 	case UVD_STATUS:
1300 		*val = RREG32(reg);
1301 		return 0;
1302 	default:
1303 		return -EINVAL;
1304 	}
1305 }
1306 
1307 #define PCIE_BUS_CLK                10000
1308 #define TCLK                        (PCIE_BUS_CLK / 10)
1309 
1310 /**
1311  * si_get_xclk - get the xclk
1312  *
1313  * @rdev: radeon_device pointer
1314  *
1315  * Returns the reference clock used by the gfx engine
1316  * (SI).
1317  */
1318 u32 si_get_xclk(struct radeon_device *rdev)
1319 {
1320 	u32 reference_clock = rdev->clock.spll.reference_freq;
1321 	u32 tmp;
1322 
1323 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1324 	if (tmp & MUX_TCLK_TO_XCLK)
1325 		return TCLK;
1326 
1327 	tmp = RREG32(CG_CLKPIN_CNTL);
1328 	if (tmp & XTALIN_DIVIDE)
1329 		return reference_clock / 4;
1330 
1331 	return reference_clock;
1332 }
1333 
1334 /* get temperature in millidegrees */
1335 int si_get_temp(struct radeon_device *rdev)
1336 {
1337 	u32 temp;
1338 	int actual_temp = 0;
1339 
1340 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1341 		CTF_TEMP_SHIFT;
1342 
1343 	if (temp & 0x200)
1344 		actual_temp = 255;
1345 	else
1346 		actual_temp = temp & 0x1ff;
1347 
1348 	actual_temp = (actual_temp * 1000);
1349 
1350 	return actual_temp;
1351 }
1352 
1353 #define TAHITI_IO_MC_REGS_SIZE 36
1354 
1355 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1356 	{0x0000006f, 0x03044000},
1357 	{0x00000070, 0x0480c018},
1358 	{0x00000071, 0x00000040},
1359 	{0x00000072, 0x01000000},
1360 	{0x00000074, 0x000000ff},
1361 	{0x00000075, 0x00143400},
1362 	{0x00000076, 0x08ec0800},
1363 	{0x00000077, 0x040000cc},
1364 	{0x00000079, 0x00000000},
1365 	{0x0000007a, 0x21000409},
1366 	{0x0000007c, 0x00000000},
1367 	{0x0000007d, 0xe8000000},
1368 	{0x0000007e, 0x044408a8},
1369 	{0x0000007f, 0x00000003},
1370 	{0x00000080, 0x00000000},
1371 	{0x00000081, 0x01000000},
1372 	{0x00000082, 0x02000000},
1373 	{0x00000083, 0x00000000},
1374 	{0x00000084, 0xe3f3e4f4},
1375 	{0x00000085, 0x00052024},
1376 	{0x00000087, 0x00000000},
1377 	{0x00000088, 0x66036603},
1378 	{0x00000089, 0x01000000},
1379 	{0x0000008b, 0x1c0a0000},
1380 	{0x0000008c, 0xff010000},
1381 	{0x0000008e, 0xffffefff},
1382 	{0x0000008f, 0xfff3efff},
1383 	{0x00000090, 0xfff3efbf},
1384 	{0x00000094, 0x00101101},
1385 	{0x00000095, 0x00000fff},
1386 	{0x00000096, 0x00116fff},
1387 	{0x00000097, 0x60010000},
1388 	{0x00000098, 0x10010000},
1389 	{0x00000099, 0x00006000},
1390 	{0x0000009a, 0x00001000},
1391 	{0x0000009f, 0x00a77400}
1392 };
1393 
1394 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1395 	{0x0000006f, 0x03044000},
1396 	{0x00000070, 0x0480c018},
1397 	{0x00000071, 0x00000040},
1398 	{0x00000072, 0x01000000},
1399 	{0x00000074, 0x000000ff},
1400 	{0x00000075, 0x00143400},
1401 	{0x00000076, 0x08ec0800},
1402 	{0x00000077, 0x040000cc},
1403 	{0x00000079, 0x00000000},
1404 	{0x0000007a, 0x21000409},
1405 	{0x0000007c, 0x00000000},
1406 	{0x0000007d, 0xe8000000},
1407 	{0x0000007e, 0x044408a8},
1408 	{0x0000007f, 0x00000003},
1409 	{0x00000080, 0x00000000},
1410 	{0x00000081, 0x01000000},
1411 	{0x00000082, 0x02000000},
1412 	{0x00000083, 0x00000000},
1413 	{0x00000084, 0xe3f3e4f4},
1414 	{0x00000085, 0x00052024},
1415 	{0x00000087, 0x00000000},
1416 	{0x00000088, 0x66036603},
1417 	{0x00000089, 0x01000000},
1418 	{0x0000008b, 0x1c0a0000},
1419 	{0x0000008c, 0xff010000},
1420 	{0x0000008e, 0xffffefff},
1421 	{0x0000008f, 0xfff3efff},
1422 	{0x00000090, 0xfff3efbf},
1423 	{0x00000094, 0x00101101},
1424 	{0x00000095, 0x00000fff},
1425 	{0x00000096, 0x00116fff},
1426 	{0x00000097, 0x60010000},
1427 	{0x00000098, 0x10010000},
1428 	{0x00000099, 0x00006000},
1429 	{0x0000009a, 0x00001000},
1430 	{0x0000009f, 0x00a47400}
1431 };
1432 
1433 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1434 	{0x0000006f, 0x03044000},
1435 	{0x00000070, 0x0480c018},
1436 	{0x00000071, 0x00000040},
1437 	{0x00000072, 0x01000000},
1438 	{0x00000074, 0x000000ff},
1439 	{0x00000075, 0x00143400},
1440 	{0x00000076, 0x08ec0800},
1441 	{0x00000077, 0x040000cc},
1442 	{0x00000079, 0x00000000},
1443 	{0x0000007a, 0x21000409},
1444 	{0x0000007c, 0x00000000},
1445 	{0x0000007d, 0xe8000000},
1446 	{0x0000007e, 0x044408a8},
1447 	{0x0000007f, 0x00000003},
1448 	{0x00000080, 0x00000000},
1449 	{0x00000081, 0x01000000},
1450 	{0x00000082, 0x02000000},
1451 	{0x00000083, 0x00000000},
1452 	{0x00000084, 0xe3f3e4f4},
1453 	{0x00000085, 0x00052024},
1454 	{0x00000087, 0x00000000},
1455 	{0x00000088, 0x66036603},
1456 	{0x00000089, 0x01000000},
1457 	{0x0000008b, 0x1c0a0000},
1458 	{0x0000008c, 0xff010000},
1459 	{0x0000008e, 0xffffefff},
1460 	{0x0000008f, 0xfff3efff},
1461 	{0x00000090, 0xfff3efbf},
1462 	{0x00000094, 0x00101101},
1463 	{0x00000095, 0x00000fff},
1464 	{0x00000096, 0x00116fff},
1465 	{0x00000097, 0x60010000},
1466 	{0x00000098, 0x10010000},
1467 	{0x00000099, 0x00006000},
1468 	{0x0000009a, 0x00001000},
1469 	{0x0000009f, 0x00a37400}
1470 };
1471 
1472 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1473 	{0x0000006f, 0x03044000},
1474 	{0x00000070, 0x0480c018},
1475 	{0x00000071, 0x00000040},
1476 	{0x00000072, 0x01000000},
1477 	{0x00000074, 0x000000ff},
1478 	{0x00000075, 0x00143400},
1479 	{0x00000076, 0x08ec0800},
1480 	{0x00000077, 0x040000cc},
1481 	{0x00000079, 0x00000000},
1482 	{0x0000007a, 0x21000409},
1483 	{0x0000007c, 0x00000000},
1484 	{0x0000007d, 0xe8000000},
1485 	{0x0000007e, 0x044408a8},
1486 	{0x0000007f, 0x00000003},
1487 	{0x00000080, 0x00000000},
1488 	{0x00000081, 0x01000000},
1489 	{0x00000082, 0x02000000},
1490 	{0x00000083, 0x00000000},
1491 	{0x00000084, 0xe3f3e4f4},
1492 	{0x00000085, 0x00052024},
1493 	{0x00000087, 0x00000000},
1494 	{0x00000088, 0x66036603},
1495 	{0x00000089, 0x01000000},
1496 	{0x0000008b, 0x1c0a0000},
1497 	{0x0000008c, 0xff010000},
1498 	{0x0000008e, 0xffffefff},
1499 	{0x0000008f, 0xfff3efff},
1500 	{0x00000090, 0xfff3efbf},
1501 	{0x00000094, 0x00101101},
1502 	{0x00000095, 0x00000fff},
1503 	{0x00000096, 0x00116fff},
1504 	{0x00000097, 0x60010000},
1505 	{0x00000098, 0x10010000},
1506 	{0x00000099, 0x00006000},
1507 	{0x0000009a, 0x00001000},
1508 	{0x0000009f, 0x00a17730}
1509 };
1510 
1511 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1512 	{0x0000006f, 0x03044000},
1513 	{0x00000070, 0x0480c018},
1514 	{0x00000071, 0x00000040},
1515 	{0x00000072, 0x01000000},
1516 	{0x00000074, 0x000000ff},
1517 	{0x00000075, 0x00143400},
1518 	{0x00000076, 0x08ec0800},
1519 	{0x00000077, 0x040000cc},
1520 	{0x00000079, 0x00000000},
1521 	{0x0000007a, 0x21000409},
1522 	{0x0000007c, 0x00000000},
1523 	{0x0000007d, 0xe8000000},
1524 	{0x0000007e, 0x044408a8},
1525 	{0x0000007f, 0x00000003},
1526 	{0x00000080, 0x00000000},
1527 	{0x00000081, 0x01000000},
1528 	{0x00000082, 0x02000000},
1529 	{0x00000083, 0x00000000},
1530 	{0x00000084, 0xe3f3e4f4},
1531 	{0x00000085, 0x00052024},
1532 	{0x00000087, 0x00000000},
1533 	{0x00000088, 0x66036603},
1534 	{0x00000089, 0x01000000},
1535 	{0x0000008b, 0x1c0a0000},
1536 	{0x0000008c, 0xff010000},
1537 	{0x0000008e, 0xffffefff},
1538 	{0x0000008f, 0xfff3efff},
1539 	{0x00000090, 0xfff3efbf},
1540 	{0x00000094, 0x00101101},
1541 	{0x00000095, 0x00000fff},
1542 	{0x00000096, 0x00116fff},
1543 	{0x00000097, 0x60010000},
1544 	{0x00000098, 0x10010000},
1545 	{0x00000099, 0x00006000},
1546 	{0x0000009a, 0x00001000},
1547 	{0x0000009f, 0x00a07730}
1548 };
1549 
1550 /* ucode loading */
1551 int si_mc_load_microcode(struct radeon_device *rdev)
1552 {
1553 	const __be32 *fw_data = NULL;
1554 	const __le32 *new_fw_data = NULL;
1555 	u32 running;
1556 	u32 *io_mc_regs = NULL;
1557 	const __le32 *new_io_mc_regs = NULL;
1558 	int i, regs_size, ucode_size;
1559 
1560 	if (!rdev->mc_fw)
1561 		return -EINVAL;
1562 
1563 	if (rdev->new_fw) {
1564 		const struct mc_firmware_header_v1_0 *hdr =
1565 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1566 
1567 		radeon_ucode_print_mc_hdr(&hdr->header);
1568 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1569 		new_io_mc_regs = (const __le32 *)
1570 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1571 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1572 		new_fw_data = (const __le32 *)
1573 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1574 	} else {
1575 		ucode_size = rdev->mc_fw->size / 4;
1576 
1577 		switch (rdev->family) {
1578 		case CHIP_TAHITI:
1579 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1580 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1581 			break;
1582 		case CHIP_PITCAIRN:
1583 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1584 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1585 			break;
1586 		case CHIP_VERDE:
1587 		default:
1588 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1589 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1590 			break;
1591 		case CHIP_OLAND:
1592 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1593 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1594 			break;
1595 		case CHIP_HAINAN:
1596 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1597 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1598 			break;
1599 		}
1600 		fw_data = (const __be32 *)rdev->mc_fw->data;
1601 	}
1602 
1603 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1604 
1605 	if (running == 0) {
1606 		/* reset the engine and set to writable */
1607 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1608 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1609 
1610 		/* load mc io regs */
1611 		for (i = 0; i < regs_size; i++) {
1612 			if (rdev->new_fw) {
1613 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1614 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1615 			} else {
1616 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1617 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1618 			}
1619 		}
1620 		/* load the MC ucode */
1621 		for (i = 0; i < ucode_size; i++) {
1622 			if (rdev->new_fw)
1623 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1624 			else
1625 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1626 		}
1627 
1628 		/* put the engine back into the active state */
1629 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1630 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1631 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1632 
1633 		/* wait for training to complete */
1634 		for (i = 0; i < rdev->usec_timeout; i++) {
1635 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1636 				break;
1637 			udelay(1);
1638 		}
1639 		for (i = 0; i < rdev->usec_timeout; i++) {
1640 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1641 				break;
1642 			udelay(1);
1643 		}
1644 	}
1645 
1646 	return 0;
1647 }
1648 
1649 static int si_init_microcode(struct radeon_device *rdev)
1650 {
1651 	const char *chip_name;
1652 	const char *new_chip_name;
1653 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1654 	size_t smc_req_size, mc2_req_size;
1655 	char fw_name[30];
1656 	int err;
1657 	int new_fw = 0;
1658 	bool new_smc = false;
1659 	bool si58_fw = false;
1660 	bool banks2_fw = false;
1661 
1662 	DRM_DEBUG("\n");
1663 
1664 	switch (rdev->family) {
1665 	case CHIP_TAHITI:
1666 		chip_name = "TAHITI";
1667 		new_chip_name = "tahiti";
1668 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1669 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1670 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1671 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1672 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1673 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1674 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1675 		break;
1676 	case CHIP_PITCAIRN:
1677 		chip_name = "PITCAIRN";
1678 		if ((rdev->pdev->revision == 0x81) &&
1679 		    ((rdev->pdev->device == 0x6810) ||
1680 		     (rdev->pdev->device == 0x6811)))
1681 			new_smc = true;
1682 		new_chip_name = "pitcairn";
1683 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1684 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1685 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1686 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1687 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1688 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1689 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1690 		break;
1691 	case CHIP_VERDE:
1692 		chip_name = "VERDE";
1693 		if (((rdev->pdev->device == 0x6820) &&
1694 		     ((rdev->pdev->revision == 0x81) ||
1695 		      (rdev->pdev->revision == 0x83))) ||
1696 		    ((rdev->pdev->device == 0x6821) &&
1697 		     ((rdev->pdev->revision == 0x83) ||
1698 		      (rdev->pdev->revision == 0x87))) ||
1699 		    ((rdev->pdev->revision == 0x87) &&
1700 		     ((rdev->pdev->device == 0x6823) ||
1701 		      (rdev->pdev->device == 0x682b))))
1702 			new_smc = true;
1703 		new_chip_name = "verde";
1704 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1705 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1706 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1707 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1708 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1709 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1710 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1711 		break;
1712 	case CHIP_OLAND:
1713 		chip_name = "OLAND";
1714 		if (((rdev->pdev->revision == 0x81) &&
1715 		     ((rdev->pdev->device == 0x6600) ||
1716 		      (rdev->pdev->device == 0x6604) ||
1717 		      (rdev->pdev->device == 0x6605) ||
1718 		      (rdev->pdev->device == 0x6610))) ||
1719 		    ((rdev->pdev->revision == 0x83) &&
1720 		     (rdev->pdev->device == 0x6610)))
1721 			new_smc = true;
1722 		new_chip_name = "oland";
1723 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1724 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1725 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1726 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1727 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1728 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1729 		break;
1730 	case CHIP_HAINAN:
1731 		chip_name = "HAINAN";
1732 		if (((rdev->pdev->revision == 0x81) &&
1733 		     (rdev->pdev->device == 0x6660)) ||
1734 		    ((rdev->pdev->revision == 0x83) &&
1735 		     ((rdev->pdev->device == 0x6660) ||
1736 		      (rdev->pdev->device == 0x6663) ||
1737 		      (rdev->pdev->device == 0x6665) ||
1738 		      (rdev->pdev->device == 0x6667))))
1739 			new_smc = true;
1740 		else if ((rdev->pdev->revision == 0xc3) &&
1741 			 (rdev->pdev->device == 0x6665))
1742 			banks2_fw = true;
1743 		new_chip_name = "hainan";
1744 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1745 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1746 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1747 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1748 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1749 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1750 		break;
1751 	default:
1752 		BUG();
1753 	}
1754 
1755 	/* this memory configuration requires special firmware */
1756 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1757 		si58_fw = true;
1758 
1759 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1760 
1761 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1762 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1763 	if (err) {
1764 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1765 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1766 		if (err)
1767 			goto out;
1768 		if (rdev->pfp_fw->size != pfp_req_size) {
1769 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1770 			       rdev->pfp_fw->size, fw_name);
1771 			err = -EINVAL;
1772 			goto out;
1773 		}
1774 	} else {
1775 		err = radeon_ucode_validate(rdev->pfp_fw);
1776 		if (err) {
1777 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1778 			       fw_name);
1779 			goto out;
1780 		} else {
1781 			new_fw++;
1782 		}
1783 	}
1784 
1785 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1786 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1787 	if (err) {
1788 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1789 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1790 		if (err)
1791 			goto out;
1792 		if (rdev->me_fw->size != me_req_size) {
1793 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1794 			       rdev->me_fw->size, fw_name);
1795 			err = -EINVAL;
1796 		}
1797 	} else {
1798 		err = radeon_ucode_validate(rdev->me_fw);
1799 		if (err) {
1800 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1801 			       fw_name);
1802 			goto out;
1803 		} else {
1804 			new_fw++;
1805 		}
1806 	}
1807 
1808 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1809 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1810 	if (err) {
1811 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1812 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1813 		if (err)
1814 			goto out;
1815 		if (rdev->ce_fw->size != ce_req_size) {
1816 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1817 			       rdev->ce_fw->size, fw_name);
1818 			err = -EINVAL;
1819 		}
1820 	} else {
1821 		err = radeon_ucode_validate(rdev->ce_fw);
1822 		if (err) {
1823 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1824 			       fw_name);
1825 			goto out;
1826 		} else {
1827 			new_fw++;
1828 		}
1829 	}
1830 
1831 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1832 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1833 	if (err) {
1834 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1835 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1836 		if (err)
1837 			goto out;
1838 		if (rdev->rlc_fw->size != rlc_req_size) {
1839 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1840 			       rdev->rlc_fw->size, fw_name);
1841 			err = -EINVAL;
1842 		}
1843 	} else {
1844 		err = radeon_ucode_validate(rdev->rlc_fw);
1845 		if (err) {
1846 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1847 			       fw_name);
1848 			goto out;
1849 		} else {
1850 			new_fw++;
1851 		}
1852 	}
1853 
1854 	if (si58_fw)
1855 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1856 	else
1857 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1858 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1859 	if (err) {
1860 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1861 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1862 		if (err) {
1863 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1864 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1865 			if (err)
1866 				goto out;
1867 		}
1868 		if ((rdev->mc_fw->size != mc_req_size) &&
1869 		    (rdev->mc_fw->size != mc2_req_size)) {
1870 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1871 			       rdev->mc_fw->size, fw_name);
1872 			err = -EINVAL;
1873 		}
1874 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1875 	} else {
1876 		err = radeon_ucode_validate(rdev->mc_fw);
1877 		if (err) {
1878 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1879 			       fw_name);
1880 			goto out;
1881 		} else {
1882 			new_fw++;
1883 		}
1884 	}
1885 
1886 	if (banks2_fw)
1887 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1888 	else if (new_smc)
1889 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1890 	else
1891 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1892 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1893 	if (err) {
1894 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1895 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1896 		if (err) {
1897 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1898 			release_firmware(rdev->smc_fw);
1899 			rdev->smc_fw = NULL;
1900 			err = 0;
1901 		} else if (rdev->smc_fw->size != smc_req_size) {
1902 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1903 			       rdev->smc_fw->size, fw_name);
1904 			err = -EINVAL;
1905 		}
1906 	} else {
1907 		err = radeon_ucode_validate(rdev->smc_fw);
1908 		if (err) {
1909 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1910 			       fw_name);
1911 			goto out;
1912 		} else {
1913 			new_fw++;
1914 		}
1915 	}
1916 
1917 	if (new_fw == 0) {
1918 		rdev->new_fw = false;
1919 	} else if (new_fw < 6) {
1920 		pr_err("si_fw: mixing new and old firmware!\n");
1921 		err = -EINVAL;
1922 	} else {
1923 		rdev->new_fw = true;
1924 	}
1925 out:
1926 	if (err) {
1927 		if (err != -EINVAL)
1928 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1929 			       fw_name);
1930 		release_firmware(rdev->pfp_fw);
1931 		rdev->pfp_fw = NULL;
1932 		release_firmware(rdev->me_fw);
1933 		rdev->me_fw = NULL;
1934 		release_firmware(rdev->ce_fw);
1935 		rdev->ce_fw = NULL;
1936 		release_firmware(rdev->rlc_fw);
1937 		rdev->rlc_fw = NULL;
1938 		release_firmware(rdev->mc_fw);
1939 		rdev->mc_fw = NULL;
1940 		release_firmware(rdev->smc_fw);
1941 		rdev->smc_fw = NULL;
1942 	}
1943 	return err;
1944 }
1945 
1946 /* watermark setup */
1947 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1948 				   struct radeon_crtc *radeon_crtc,
1949 				   struct drm_display_mode *mode,
1950 				   struct drm_display_mode *other_mode)
1951 {
1952 	u32 tmp, buffer_alloc, i;
1953 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1954 	/*
1955 	 * Line Buffer Setup
1956 	 * There are 3 line buffers, each one shared by 2 display controllers.
1957 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1958 	 * the display controllers.  The paritioning is done via one of four
1959 	 * preset allocations specified in bits 21:20:
1960 	 *  0 - half lb
1961 	 *  2 - whole lb, other crtc must be disabled
1962 	 */
1963 	/* this can get tricky if we have two large displays on a paired group
1964 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1965 	 * non-linked crtcs for maximum line buffer allocation.
1966 	 */
1967 	if (radeon_crtc->base.enabled && mode) {
1968 		if (other_mode) {
1969 			tmp = 0; /* 1/2 */
1970 			buffer_alloc = 1;
1971 		} else {
1972 			tmp = 2; /* whole */
1973 			buffer_alloc = 2;
1974 		}
1975 	} else {
1976 		tmp = 0;
1977 		buffer_alloc = 0;
1978 	}
1979 
1980 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1981 	       DC_LB_MEMORY_CONFIG(tmp));
1982 
1983 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1984 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1985 	for (i = 0; i < rdev->usec_timeout; i++) {
1986 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1987 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1988 			break;
1989 		udelay(1);
1990 	}
1991 
1992 	if (radeon_crtc->base.enabled && mode) {
1993 		switch (tmp) {
1994 		case 0:
1995 		default:
1996 			return 4096 * 2;
1997 		case 2:
1998 			return 8192 * 2;
1999 		}
2000 	}
2001 
2002 	/* controller not enabled, so no lb used */
2003 	return 0;
2004 }
2005 
2006 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2007 {
2008 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2009 
2010 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2011 	case 0:
2012 	default:
2013 		return 1;
2014 	case 1:
2015 		return 2;
2016 	case 2:
2017 		return 4;
2018 	case 3:
2019 		return 8;
2020 	case 4:
2021 		return 3;
2022 	case 5:
2023 		return 6;
2024 	case 6:
2025 		return 10;
2026 	case 7:
2027 		return 12;
2028 	case 8:
2029 		return 16;
2030 	}
2031 }
2032 
2033 struct dce6_wm_params {
2034 	u32 dram_channels; /* number of dram channels */
2035 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2036 	u32 sclk;          /* engine clock in kHz */
2037 	u32 disp_clk;      /* display clock in kHz */
2038 	u32 src_width;     /* viewport width */
2039 	u32 active_time;   /* active display time in ns */
2040 	u32 blank_time;    /* blank time in ns */
2041 	bool interlaced;    /* mode is interlaced */
2042 	fixed20_12 vsc;    /* vertical scale ratio */
2043 	u32 num_heads;     /* number of active crtcs */
2044 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2045 	u32 lb_size;       /* line buffer allocated to pipe */
2046 	u32 vtaps;         /* vertical scaler taps */
2047 };
2048 
2049 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2050 {
2051 	/* Calculate raw DRAM Bandwidth */
2052 	fixed20_12 dram_efficiency; /* 0.7 */
2053 	fixed20_12 yclk, dram_channels, bandwidth;
2054 	fixed20_12 a;
2055 
2056 	a.full = dfixed_const(1000);
2057 	yclk.full = dfixed_const(wm->yclk);
2058 	yclk.full = dfixed_div(yclk, a);
2059 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2060 	a.full = dfixed_const(10);
2061 	dram_efficiency.full = dfixed_const(7);
2062 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2063 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2064 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2065 
2066 	return dfixed_trunc(bandwidth);
2067 }
2068 
2069 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2070 {
2071 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2072 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2073 	fixed20_12 yclk, dram_channels, bandwidth;
2074 	fixed20_12 a;
2075 
2076 	a.full = dfixed_const(1000);
2077 	yclk.full = dfixed_const(wm->yclk);
2078 	yclk.full = dfixed_div(yclk, a);
2079 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2080 	a.full = dfixed_const(10);
2081 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2082 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2083 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2084 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2085 
2086 	return dfixed_trunc(bandwidth);
2087 }
2088 
2089 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2090 {
2091 	/* Calculate the display Data return Bandwidth */
2092 	fixed20_12 return_efficiency; /* 0.8 */
2093 	fixed20_12 sclk, bandwidth;
2094 	fixed20_12 a;
2095 
2096 	a.full = dfixed_const(1000);
2097 	sclk.full = dfixed_const(wm->sclk);
2098 	sclk.full = dfixed_div(sclk, a);
2099 	a.full = dfixed_const(10);
2100 	return_efficiency.full = dfixed_const(8);
2101 	return_efficiency.full = dfixed_div(return_efficiency, a);
2102 	a.full = dfixed_const(32);
2103 	bandwidth.full = dfixed_mul(a, sclk);
2104 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2105 
2106 	return dfixed_trunc(bandwidth);
2107 }
2108 
2109 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2110 {
2111 	return 32;
2112 }
2113 
2114 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2115 {
2116 	/* Calculate the DMIF Request Bandwidth */
2117 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2118 	fixed20_12 disp_clk, sclk, bandwidth;
2119 	fixed20_12 a, b1, b2;
2120 	u32 min_bandwidth;
2121 
2122 	a.full = dfixed_const(1000);
2123 	disp_clk.full = dfixed_const(wm->disp_clk);
2124 	disp_clk.full = dfixed_div(disp_clk, a);
2125 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2126 	b1.full = dfixed_mul(a, disp_clk);
2127 
2128 	a.full = dfixed_const(1000);
2129 	sclk.full = dfixed_const(wm->sclk);
2130 	sclk.full = dfixed_div(sclk, a);
2131 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2132 	b2.full = dfixed_mul(a, sclk);
2133 
2134 	a.full = dfixed_const(10);
2135 	disp_clk_request_efficiency.full = dfixed_const(8);
2136 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2137 
2138 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2139 
2140 	a.full = dfixed_const(min_bandwidth);
2141 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2142 
2143 	return dfixed_trunc(bandwidth);
2144 }
2145 
2146 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2147 {
2148 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2149 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2150 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2151 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2152 
2153 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2154 }
2155 
2156 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2157 {
2158 	/* Calculate the display mode Average Bandwidth
2159 	 * DisplayMode should contain the source and destination dimensions,
2160 	 * timing, etc.
2161 	 */
2162 	fixed20_12 bpp;
2163 	fixed20_12 line_time;
2164 	fixed20_12 src_width;
2165 	fixed20_12 bandwidth;
2166 	fixed20_12 a;
2167 
2168 	a.full = dfixed_const(1000);
2169 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2170 	line_time.full = dfixed_div(line_time, a);
2171 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2172 	src_width.full = dfixed_const(wm->src_width);
2173 	bandwidth.full = dfixed_mul(src_width, bpp);
2174 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2175 	bandwidth.full = dfixed_div(bandwidth, line_time);
2176 
2177 	return dfixed_trunc(bandwidth);
2178 }
2179 
2180 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2181 {
2182 	/* First calcualte the latency in ns */
2183 	u32 mc_latency = 2000; /* 2000 ns. */
2184 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2185 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2186 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2187 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2188 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2189 		(wm->num_heads * cursor_line_pair_return_time);
2190 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2191 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2192 	u32 tmp, dmif_size = 12288;
2193 	fixed20_12 a, b, c;
2194 
2195 	if (wm->num_heads == 0)
2196 		return 0;
2197 
2198 	a.full = dfixed_const(2);
2199 	b.full = dfixed_const(1);
2200 	if ((wm->vsc.full > a.full) ||
2201 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2202 	    (wm->vtaps >= 5) ||
2203 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2204 		max_src_lines_per_dst_line = 4;
2205 	else
2206 		max_src_lines_per_dst_line = 2;
2207 
2208 	a.full = dfixed_const(available_bandwidth);
2209 	b.full = dfixed_const(wm->num_heads);
2210 	a.full = dfixed_div(a, b);
2211 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2212 	tmp = min(dfixed_trunc(a), tmp);
2213 
2214 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2215 
2216 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2217 	b.full = dfixed_const(1000);
2218 	c.full = dfixed_const(lb_fill_bw);
2219 	b.full = dfixed_div(c, b);
2220 	a.full = dfixed_div(a, b);
2221 	line_fill_time = dfixed_trunc(a);
2222 
2223 	if (line_fill_time < wm->active_time)
2224 		return latency;
2225 	else
2226 		return latency + (line_fill_time - wm->active_time);
2227 
2228 }
2229 
2230 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2231 {
2232 	if (dce6_average_bandwidth(wm) <=
2233 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2234 		return true;
2235 	else
2236 		return false;
2237 };
2238 
2239 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2240 {
2241 	if (dce6_average_bandwidth(wm) <=
2242 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2243 		return true;
2244 	else
2245 		return false;
2246 };
2247 
2248 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2249 {
2250 	u32 lb_partitions = wm->lb_size / wm->src_width;
2251 	u32 line_time = wm->active_time + wm->blank_time;
2252 	u32 latency_tolerant_lines;
2253 	u32 latency_hiding;
2254 	fixed20_12 a;
2255 
2256 	a.full = dfixed_const(1);
2257 	if (wm->vsc.full > a.full)
2258 		latency_tolerant_lines = 1;
2259 	else {
2260 		if (lb_partitions <= (wm->vtaps + 1))
2261 			latency_tolerant_lines = 1;
2262 		else
2263 			latency_tolerant_lines = 2;
2264 	}
2265 
2266 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2267 
2268 	if (dce6_latency_watermark(wm) <= latency_hiding)
2269 		return true;
2270 	else
2271 		return false;
2272 }
2273 
2274 static void dce6_program_watermarks(struct radeon_device *rdev,
2275 					 struct radeon_crtc *radeon_crtc,
2276 					 u32 lb_size, u32 num_heads)
2277 {
2278 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2279 	struct dce6_wm_params wm_low, wm_high;
2280 	u32 dram_channels;
2281 	u32 active_time;
2282 	u32 line_time = 0;
2283 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2284 	u32 priority_a_mark = 0, priority_b_mark = 0;
2285 	u32 priority_a_cnt = PRIORITY_OFF;
2286 	u32 priority_b_cnt = PRIORITY_OFF;
2287 	u32 tmp, arb_control3;
2288 	fixed20_12 a, b, c;
2289 
2290 	if (radeon_crtc->base.enabled && num_heads && mode) {
2291 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2292 					    (u32)mode->clock);
2293 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2294 					  (u32)mode->clock);
2295 		line_time = min(line_time, (u32)65535);
2296 		priority_a_cnt = 0;
2297 		priority_b_cnt = 0;
2298 
2299 		if (rdev->family == CHIP_ARUBA)
2300 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2301 		else
2302 			dram_channels = si_get_number_of_dram_channels(rdev);
2303 
2304 		/* watermark for high clocks */
2305 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2306 			wm_high.yclk =
2307 				radeon_dpm_get_mclk(rdev, false) * 10;
2308 			wm_high.sclk =
2309 				radeon_dpm_get_sclk(rdev, false) * 10;
2310 		} else {
2311 			wm_high.yclk = rdev->pm.current_mclk * 10;
2312 			wm_high.sclk = rdev->pm.current_sclk * 10;
2313 		}
2314 
2315 		wm_high.disp_clk = mode->clock;
2316 		wm_high.src_width = mode->crtc_hdisplay;
2317 		wm_high.active_time = active_time;
2318 		wm_high.blank_time = line_time - wm_high.active_time;
2319 		wm_high.interlaced = false;
2320 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2321 			wm_high.interlaced = true;
2322 		wm_high.vsc = radeon_crtc->vsc;
2323 		wm_high.vtaps = 1;
2324 		if (radeon_crtc->rmx_type != RMX_OFF)
2325 			wm_high.vtaps = 2;
2326 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2327 		wm_high.lb_size = lb_size;
2328 		wm_high.dram_channels = dram_channels;
2329 		wm_high.num_heads = num_heads;
2330 
2331 		/* watermark for low clocks */
2332 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2333 			wm_low.yclk =
2334 				radeon_dpm_get_mclk(rdev, true) * 10;
2335 			wm_low.sclk =
2336 				radeon_dpm_get_sclk(rdev, true) * 10;
2337 		} else {
2338 			wm_low.yclk = rdev->pm.current_mclk * 10;
2339 			wm_low.sclk = rdev->pm.current_sclk * 10;
2340 		}
2341 
2342 		wm_low.disp_clk = mode->clock;
2343 		wm_low.src_width = mode->crtc_hdisplay;
2344 		wm_low.active_time = active_time;
2345 		wm_low.blank_time = line_time - wm_low.active_time;
2346 		wm_low.interlaced = false;
2347 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2348 			wm_low.interlaced = true;
2349 		wm_low.vsc = radeon_crtc->vsc;
2350 		wm_low.vtaps = 1;
2351 		if (radeon_crtc->rmx_type != RMX_OFF)
2352 			wm_low.vtaps = 2;
2353 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2354 		wm_low.lb_size = lb_size;
2355 		wm_low.dram_channels = dram_channels;
2356 		wm_low.num_heads = num_heads;
2357 
2358 		/* set for high clocks */
2359 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2360 		/* set for low clocks */
2361 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2362 
2363 		/* possibly force display priority to high */
2364 		/* should really do this at mode validation time... */
2365 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2366 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2367 		    !dce6_check_latency_hiding(&wm_high) ||
2368 		    (rdev->disp_priority == 2)) {
2369 			DRM_DEBUG_KMS("force priority to high\n");
2370 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2371 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2372 		}
2373 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2374 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2375 		    !dce6_check_latency_hiding(&wm_low) ||
2376 		    (rdev->disp_priority == 2)) {
2377 			DRM_DEBUG_KMS("force priority to high\n");
2378 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2379 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2380 		}
2381 
2382 		a.full = dfixed_const(1000);
2383 		b.full = dfixed_const(mode->clock);
2384 		b.full = dfixed_div(b, a);
2385 		c.full = dfixed_const(latency_watermark_a);
2386 		c.full = dfixed_mul(c, b);
2387 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2388 		c.full = dfixed_div(c, a);
2389 		a.full = dfixed_const(16);
2390 		c.full = dfixed_div(c, a);
2391 		priority_a_mark = dfixed_trunc(c);
2392 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2393 
2394 		a.full = dfixed_const(1000);
2395 		b.full = dfixed_const(mode->clock);
2396 		b.full = dfixed_div(b, a);
2397 		c.full = dfixed_const(latency_watermark_b);
2398 		c.full = dfixed_mul(c, b);
2399 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2400 		c.full = dfixed_div(c, a);
2401 		a.full = dfixed_const(16);
2402 		c.full = dfixed_div(c, a);
2403 		priority_b_mark = dfixed_trunc(c);
2404 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2405 
2406 		/* Save number of lines the linebuffer leads before the scanout */
2407 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2408 	}
2409 
2410 	/* select wm A */
2411 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2412 	tmp = arb_control3;
2413 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2414 	tmp |= LATENCY_WATERMARK_MASK(1);
2415 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2416 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2417 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2418 		LATENCY_HIGH_WATERMARK(line_time)));
2419 	/* select wm B */
2420 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2421 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2422 	tmp |= LATENCY_WATERMARK_MASK(2);
2423 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2424 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2425 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2426 		LATENCY_HIGH_WATERMARK(line_time)));
2427 	/* restore original selection */
2428 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2429 
2430 	/* write the priority marks */
2431 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2432 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2433 
2434 	/* save values for DPM */
2435 	radeon_crtc->line_time = line_time;
2436 	radeon_crtc->wm_high = latency_watermark_a;
2437 	radeon_crtc->wm_low = latency_watermark_b;
2438 }
2439 
2440 void dce6_bandwidth_update(struct radeon_device *rdev)
2441 {
2442 	struct drm_display_mode *mode0 = NULL;
2443 	struct drm_display_mode *mode1 = NULL;
2444 	u32 num_heads = 0, lb_size;
2445 	int i;
2446 
2447 	if (!rdev->mode_info.mode_config_initialized)
2448 		return;
2449 
2450 	radeon_update_display_priority(rdev);
2451 
2452 	for (i = 0; i < rdev->num_crtc; i++) {
2453 		if (rdev->mode_info.crtcs[i]->base.enabled)
2454 			num_heads++;
2455 	}
2456 	for (i = 0; i < rdev->num_crtc; i += 2) {
2457 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2458 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2459 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2460 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2461 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2462 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2463 	}
2464 }
2465 
2466 /*
2467  * Core functions
2468  */
2469 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2470 {
2471 	u32 *tile = rdev->config.si.tile_mode_array;
2472 	const u32 num_tile_mode_states =
2473 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2474 	u32 reg_offset, split_equal_to_row_size;
2475 
2476 	switch (rdev->config.si.mem_row_size_in_kb) {
2477 	case 1:
2478 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2479 		break;
2480 	case 2:
2481 	default:
2482 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2483 		break;
2484 	case 4:
2485 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2486 		break;
2487 	}
2488 
2489 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2490 		tile[reg_offset] = 0;
2491 
2492 	switch(rdev->family) {
2493 	case CHIP_TAHITI:
2494 	case CHIP_PITCAIRN:
2495 		/* non-AA compressed depth or any compressed stencil */
2496 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2497 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2498 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2499 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2500 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2501 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2502 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2503 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2504 		/* 2xAA/4xAA compressed depth only */
2505 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2506 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2507 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2508 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2509 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2510 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2511 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2512 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2513 		/* 8xAA compressed depth only */
2514 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2515 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2516 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2517 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2518 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2519 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2520 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2521 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2522 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2523 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2524 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2525 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2526 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2527 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2528 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2529 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2530 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2531 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2532 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2533 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2534 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2535 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2536 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2537 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2538 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2539 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2540 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2541 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2542 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2543 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2544 			   TILE_SPLIT(split_equal_to_row_size) |
2545 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2546 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2547 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2548 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2549 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2550 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2552 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2553 			   TILE_SPLIT(split_equal_to_row_size) |
2554 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2555 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2557 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2558 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2559 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2560 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2561 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2562 			   TILE_SPLIT(split_equal_to_row_size) |
2563 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2564 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2565 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2566 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2567 		/* 1D and 1D Array Surfaces */
2568 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2569 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2570 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2571 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2572 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2573 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2574 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2575 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2576 		/* Displayable maps. */
2577 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2578 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2579 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2580 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2581 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2582 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2583 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2584 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2585 		/* Display 8bpp. */
2586 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2587 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2588 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2589 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2590 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2591 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2592 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2593 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2594 		/* Display 16bpp. */
2595 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2596 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2597 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2598 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2599 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2600 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2601 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2602 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2603 		/* Display 32bpp. */
2604 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2605 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2606 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2607 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2608 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2609 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2610 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2611 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2612 		/* Thin. */
2613 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2614 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2615 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2616 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2617 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2618 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2619 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2620 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2621 		/* Thin 8 bpp. */
2622 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2623 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2624 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2625 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2626 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2627 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2628 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2629 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2630 		/* Thin 16 bpp. */
2631 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2632 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2633 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2634 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2635 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2636 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2637 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2638 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2639 		/* Thin 32 bpp. */
2640 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2642 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2643 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2644 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2645 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2647 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2648 		/* Thin 64 bpp. */
2649 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2650 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2651 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2652 			   TILE_SPLIT(split_equal_to_row_size) |
2653 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2654 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2655 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2656 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2657 		/* 8 bpp PRT. */
2658 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2659 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2660 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2661 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2662 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2663 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2664 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2665 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2666 		/* 16 bpp PRT */
2667 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2668 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2669 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2670 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2671 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2672 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2673 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2674 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2675 		/* 32 bpp PRT */
2676 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2677 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2678 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2679 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2680 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2681 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2682 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2683 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2684 		/* 64 bpp PRT */
2685 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2686 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2687 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2688 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2689 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2690 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2691 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2692 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2693 		/* 128 bpp PRT */
2694 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2695 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2696 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2697 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2698 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2699 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2700 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2701 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2702 
2703 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2704 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2705 		break;
2706 
2707 	case CHIP_VERDE:
2708 	case CHIP_OLAND:
2709 	case CHIP_HAINAN:
2710 		/* non-AA compressed depth or any compressed stencil */
2711 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2712 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2713 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2714 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2715 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2716 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2717 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2718 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2719 		/* 2xAA/4xAA compressed depth only */
2720 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2722 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2723 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2724 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2725 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2727 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2728 		/* 8xAA compressed depth only */
2729 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2730 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2731 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2732 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2733 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2734 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2735 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2736 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2737 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2738 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2739 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2740 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2741 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2742 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2743 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2744 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2745 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2746 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2747 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2748 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2749 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2750 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2751 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2752 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2753 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2754 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2755 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2756 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2757 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2758 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2759 			   TILE_SPLIT(split_equal_to_row_size) |
2760 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2761 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2762 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2763 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2764 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2765 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2766 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2767 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2768 			   TILE_SPLIT(split_equal_to_row_size) |
2769 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2770 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2771 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2772 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2773 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2774 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2775 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2776 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2777 			   TILE_SPLIT(split_equal_to_row_size) |
2778 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2779 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2780 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2781 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2782 		/* 1D and 1D Array Surfaces */
2783 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2784 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2785 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2786 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2787 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2788 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2789 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2790 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2791 		/* Displayable maps. */
2792 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2793 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2794 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2795 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2796 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2797 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2798 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2799 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2800 		/* Display 8bpp. */
2801 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2802 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2803 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2804 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2805 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2806 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2807 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2808 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2809 		/* Display 16bpp. */
2810 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2811 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2812 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2813 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2814 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2815 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2816 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2817 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2818 		/* Display 32bpp. */
2819 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2820 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2821 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2822 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2823 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2824 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2825 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2826 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2827 		/* Thin. */
2828 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2829 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2830 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2831 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2832 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2833 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2834 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2835 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2836 		/* Thin 8 bpp. */
2837 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2838 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2839 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2840 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2841 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2842 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2843 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2844 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2845 		/* Thin 16 bpp. */
2846 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2847 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2848 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2849 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2850 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2851 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2852 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2853 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2854 		/* Thin 32 bpp. */
2855 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2856 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2857 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2858 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2859 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2860 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2861 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2862 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2863 		/* Thin 64 bpp. */
2864 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2865 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2866 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2867 			   TILE_SPLIT(split_equal_to_row_size) |
2868 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2869 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2870 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2871 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2872 		/* 8 bpp PRT. */
2873 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2874 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2875 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2876 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2877 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2878 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2879 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2880 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2881 		/* 16 bpp PRT */
2882 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2883 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2884 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2885 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2886 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2887 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2888 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2889 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2890 		/* 32 bpp PRT */
2891 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2892 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2893 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2894 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2895 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2896 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2897 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2898 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2899 		/* 64 bpp PRT */
2900 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2901 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2902 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2903 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2904 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2905 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2906 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2907 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2908 		/* 128 bpp PRT */
2909 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2910 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2911 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2912 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2913 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2914 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2915 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2916 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2917 
2918 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2919 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2920 		break;
2921 
2922 	default:
2923 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2924 	}
2925 }
2926 
2927 static void si_select_se_sh(struct radeon_device *rdev,
2928 			    u32 se_num, u32 sh_num)
2929 {
2930 	u32 data = INSTANCE_BROADCAST_WRITES;
2931 
2932 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2933 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2934 	else if (se_num == 0xffffffff)
2935 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2936 	else if (sh_num == 0xffffffff)
2937 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2938 	else
2939 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2940 	WREG32(GRBM_GFX_INDEX, data);
2941 }
2942 
2943 static u32 si_create_bitmask(u32 bit_width)
2944 {
2945 	u32 i, mask = 0;
2946 
2947 	for (i = 0; i < bit_width; i++) {
2948 		mask <<= 1;
2949 		mask |= 1;
2950 	}
2951 	return mask;
2952 }
2953 
2954 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2955 {
2956 	u32 data, mask;
2957 
2958 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2959 	if (data & 1)
2960 		data &= INACTIVE_CUS_MASK;
2961 	else
2962 		data = 0;
2963 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2964 
2965 	data >>= INACTIVE_CUS_SHIFT;
2966 
2967 	mask = si_create_bitmask(cu_per_sh);
2968 
2969 	return ~data & mask;
2970 }
2971 
2972 static void si_setup_spi(struct radeon_device *rdev,
2973 			 u32 se_num, u32 sh_per_se,
2974 			 u32 cu_per_sh)
2975 {
2976 	int i, j, k;
2977 	u32 data, mask, active_cu;
2978 
2979 	for (i = 0; i < se_num; i++) {
2980 		for (j = 0; j < sh_per_se; j++) {
2981 			si_select_se_sh(rdev, i, j);
2982 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2983 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2984 
2985 			mask = 1;
2986 			for (k = 0; k < 16; k++) {
2987 				mask <<= k;
2988 				if (active_cu & mask) {
2989 					data &= ~mask;
2990 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2991 					break;
2992 				}
2993 			}
2994 		}
2995 	}
2996 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2997 }
2998 
2999 static u32 si_get_rb_disabled(struct radeon_device *rdev,
3000 			      u32 max_rb_num_per_se,
3001 			      u32 sh_per_se)
3002 {
3003 	u32 data, mask;
3004 
3005 	data = RREG32(CC_RB_BACKEND_DISABLE);
3006 	if (data & 1)
3007 		data &= BACKEND_DISABLE_MASK;
3008 	else
3009 		data = 0;
3010 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3011 
3012 	data >>= BACKEND_DISABLE_SHIFT;
3013 
3014 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3015 
3016 	return data & mask;
3017 }
3018 
3019 static void si_setup_rb(struct radeon_device *rdev,
3020 			u32 se_num, u32 sh_per_se,
3021 			u32 max_rb_num_per_se)
3022 {
3023 	int i, j;
3024 	u32 data, mask;
3025 	u32 disabled_rbs = 0;
3026 	u32 enabled_rbs = 0;
3027 
3028 	for (i = 0; i < se_num; i++) {
3029 		for (j = 0; j < sh_per_se; j++) {
3030 			si_select_se_sh(rdev, i, j);
3031 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3032 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3033 		}
3034 	}
3035 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3036 
3037 	mask = 1;
3038 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3039 		if (!(disabled_rbs & mask))
3040 			enabled_rbs |= mask;
3041 		mask <<= 1;
3042 	}
3043 
3044 	rdev->config.si.backend_enable_mask = enabled_rbs;
3045 
3046 	for (i = 0; i < se_num; i++) {
3047 		si_select_se_sh(rdev, i, 0xffffffff);
3048 		data = 0;
3049 		for (j = 0; j < sh_per_se; j++) {
3050 			switch (enabled_rbs & 3) {
3051 			case 1:
3052 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3053 				break;
3054 			case 2:
3055 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3056 				break;
3057 			case 3:
3058 			default:
3059 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3060 				break;
3061 			}
3062 			enabled_rbs >>= 2;
3063 		}
3064 		WREG32(PA_SC_RASTER_CONFIG, data);
3065 	}
3066 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3067 }
3068 
3069 static void si_gpu_init(struct radeon_device *rdev)
3070 {
3071 	u32 gb_addr_config = 0;
3072 	u32 mc_arb_ramcfg;
3073 	u32 sx_debug_1;
3074 	u32 hdp_host_path_cntl;
3075 	u32 tmp;
3076 	int i, j;
3077 
3078 	switch (rdev->family) {
3079 	case CHIP_TAHITI:
3080 		rdev->config.si.max_shader_engines = 2;
3081 		rdev->config.si.max_tile_pipes = 12;
3082 		rdev->config.si.max_cu_per_sh = 8;
3083 		rdev->config.si.max_sh_per_se = 2;
3084 		rdev->config.si.max_backends_per_se = 4;
3085 		rdev->config.si.max_texture_channel_caches = 12;
3086 		rdev->config.si.max_gprs = 256;
3087 		rdev->config.si.max_gs_threads = 32;
3088 		rdev->config.si.max_hw_contexts = 8;
3089 
3090 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3091 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3092 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3093 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3094 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3095 		break;
3096 	case CHIP_PITCAIRN:
3097 		rdev->config.si.max_shader_engines = 2;
3098 		rdev->config.si.max_tile_pipes = 8;
3099 		rdev->config.si.max_cu_per_sh = 5;
3100 		rdev->config.si.max_sh_per_se = 2;
3101 		rdev->config.si.max_backends_per_se = 4;
3102 		rdev->config.si.max_texture_channel_caches = 8;
3103 		rdev->config.si.max_gprs = 256;
3104 		rdev->config.si.max_gs_threads = 32;
3105 		rdev->config.si.max_hw_contexts = 8;
3106 
3107 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3108 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3109 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3110 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3111 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3112 		break;
3113 	case CHIP_VERDE:
3114 	default:
3115 		rdev->config.si.max_shader_engines = 1;
3116 		rdev->config.si.max_tile_pipes = 4;
3117 		rdev->config.si.max_cu_per_sh = 5;
3118 		rdev->config.si.max_sh_per_se = 2;
3119 		rdev->config.si.max_backends_per_se = 4;
3120 		rdev->config.si.max_texture_channel_caches = 4;
3121 		rdev->config.si.max_gprs = 256;
3122 		rdev->config.si.max_gs_threads = 32;
3123 		rdev->config.si.max_hw_contexts = 8;
3124 
3125 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3126 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3127 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3128 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3129 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3130 		break;
3131 	case CHIP_OLAND:
3132 		rdev->config.si.max_shader_engines = 1;
3133 		rdev->config.si.max_tile_pipes = 4;
3134 		rdev->config.si.max_cu_per_sh = 6;
3135 		rdev->config.si.max_sh_per_se = 1;
3136 		rdev->config.si.max_backends_per_se = 2;
3137 		rdev->config.si.max_texture_channel_caches = 4;
3138 		rdev->config.si.max_gprs = 256;
3139 		rdev->config.si.max_gs_threads = 16;
3140 		rdev->config.si.max_hw_contexts = 8;
3141 
3142 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3143 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3144 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3145 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3146 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3147 		break;
3148 	case CHIP_HAINAN:
3149 		rdev->config.si.max_shader_engines = 1;
3150 		rdev->config.si.max_tile_pipes = 4;
3151 		rdev->config.si.max_cu_per_sh = 5;
3152 		rdev->config.si.max_sh_per_se = 1;
3153 		rdev->config.si.max_backends_per_se = 1;
3154 		rdev->config.si.max_texture_channel_caches = 2;
3155 		rdev->config.si.max_gprs = 256;
3156 		rdev->config.si.max_gs_threads = 16;
3157 		rdev->config.si.max_hw_contexts = 8;
3158 
3159 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3160 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3161 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3162 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3163 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3164 		break;
3165 	}
3166 
3167 	/* Initialize HDP */
3168 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3169 		WREG32((0x2c14 + j), 0x00000000);
3170 		WREG32((0x2c18 + j), 0x00000000);
3171 		WREG32((0x2c1c + j), 0x00000000);
3172 		WREG32((0x2c20 + j), 0x00000000);
3173 		WREG32((0x2c24 + j), 0x00000000);
3174 	}
3175 
3176 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3177 	WREG32(SRBM_INT_CNTL, 1);
3178 	WREG32(SRBM_INT_ACK, 1);
3179 
3180 	evergreen_fix_pci_max_read_req_size(rdev);
3181 
3182 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3183 
3184 	RREG32(MC_SHARED_CHMAP);
3185 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3186 
3187 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3188 	rdev->config.si.mem_max_burst_length_bytes = 256;
3189 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3190 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3191 	if (rdev->config.si.mem_row_size_in_kb > 4)
3192 		rdev->config.si.mem_row_size_in_kb = 4;
3193 	/* XXX use MC settings? */
3194 	rdev->config.si.shader_engine_tile_size = 32;
3195 	rdev->config.si.num_gpus = 1;
3196 	rdev->config.si.multi_gpu_tile_size = 64;
3197 
3198 	/* fix up row size */
3199 	gb_addr_config &= ~ROW_SIZE_MASK;
3200 	switch (rdev->config.si.mem_row_size_in_kb) {
3201 	case 1:
3202 	default:
3203 		gb_addr_config |= ROW_SIZE(0);
3204 		break;
3205 	case 2:
3206 		gb_addr_config |= ROW_SIZE(1);
3207 		break;
3208 	case 4:
3209 		gb_addr_config |= ROW_SIZE(2);
3210 		break;
3211 	}
3212 
3213 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3214 	 * not have bank info, so create a custom tiling dword.
3215 	 * bits 3:0   num_pipes
3216 	 * bits 7:4   num_banks
3217 	 * bits 11:8  group_size
3218 	 * bits 15:12 row_size
3219 	 */
3220 	rdev->config.si.tile_config = 0;
3221 	switch (rdev->config.si.num_tile_pipes) {
3222 	case 1:
3223 		rdev->config.si.tile_config |= (0 << 0);
3224 		break;
3225 	case 2:
3226 		rdev->config.si.tile_config |= (1 << 0);
3227 		break;
3228 	case 4:
3229 		rdev->config.si.tile_config |= (2 << 0);
3230 		break;
3231 	case 8:
3232 	default:
3233 		/* XXX what about 12? */
3234 		rdev->config.si.tile_config |= (3 << 0);
3235 		break;
3236 	}
3237 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3238 	case 0: /* four banks */
3239 		rdev->config.si.tile_config |= 0 << 4;
3240 		break;
3241 	case 1: /* eight banks */
3242 		rdev->config.si.tile_config |= 1 << 4;
3243 		break;
3244 	case 2: /* sixteen banks */
3245 	default:
3246 		rdev->config.si.tile_config |= 2 << 4;
3247 		break;
3248 	}
3249 	rdev->config.si.tile_config |=
3250 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3251 	rdev->config.si.tile_config |=
3252 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3253 
3254 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3255 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3256 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3257 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3258 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3259 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3260 	if (rdev->has_uvd) {
3261 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3262 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3263 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3264 	}
3265 
3266 	si_tiling_mode_table_init(rdev);
3267 
3268 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3269 		    rdev->config.si.max_sh_per_se,
3270 		    rdev->config.si.max_backends_per_se);
3271 
3272 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3273 		     rdev->config.si.max_sh_per_se,
3274 		     rdev->config.si.max_cu_per_sh);
3275 
3276 	rdev->config.si.active_cus = 0;
3277 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3278 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3279 			rdev->config.si.active_cus +=
3280 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3281 		}
3282 	}
3283 
3284 	/* set HW defaults for 3D engine */
3285 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3286 				     ROQ_IB2_START(0x2b)));
3287 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3288 
3289 	sx_debug_1 = RREG32(SX_DEBUG_1);
3290 	WREG32(SX_DEBUG_1, sx_debug_1);
3291 
3292 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3293 
3294 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3295 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3296 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3297 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3298 
3299 	WREG32(VGT_NUM_INSTANCES, 1);
3300 
3301 	WREG32(CP_PERFMON_CNTL, 0);
3302 
3303 	WREG32(SQ_CONFIG, 0);
3304 
3305 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3306 					  FORCE_EOV_MAX_REZ_CNT(255)));
3307 
3308 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3309 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3310 
3311 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3312 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3313 
3314 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3315 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3316 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3317 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3318 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3319 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3320 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3321 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3322 
3323 	tmp = RREG32(HDP_MISC_CNTL);
3324 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3325 	WREG32(HDP_MISC_CNTL, tmp);
3326 
3327 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3328 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3329 
3330 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3331 
3332 	udelay(50);
3333 }
3334 
3335 /*
3336  * GPU scratch registers helpers function.
3337  */
3338 static void si_scratch_init(struct radeon_device *rdev)
3339 {
3340 	int i;
3341 
3342 	rdev->scratch.num_reg = 7;
3343 	rdev->scratch.reg_base = SCRATCH_REG0;
3344 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3345 		rdev->scratch.free[i] = true;
3346 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3347 	}
3348 }
3349 
3350 void si_fence_ring_emit(struct radeon_device *rdev,
3351 			struct radeon_fence *fence)
3352 {
3353 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3354 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3355 
3356 	/* flush read cache over gart */
3357 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3358 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3359 	radeon_ring_write(ring, 0);
3360 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3361 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3362 			  PACKET3_TC_ACTION_ENA |
3363 			  PACKET3_SH_KCACHE_ACTION_ENA |
3364 			  PACKET3_SH_ICACHE_ACTION_ENA);
3365 	radeon_ring_write(ring, 0xFFFFFFFF);
3366 	radeon_ring_write(ring, 0);
3367 	radeon_ring_write(ring, 10); /* poll interval */
3368 	/* EVENT_WRITE_EOP - flush caches, send int */
3369 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3370 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3371 	radeon_ring_write(ring, lower_32_bits(addr));
3372 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3373 	radeon_ring_write(ring, fence->seq);
3374 	radeon_ring_write(ring, 0);
3375 }
3376 
3377 /*
3378  * IB stuff
3379  */
3380 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3381 {
3382 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3383 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3384 	u32 header;
3385 
3386 	if (ib->is_const_ib) {
3387 		/* set switch buffer packet before const IB */
3388 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3389 		radeon_ring_write(ring, 0);
3390 
3391 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3392 	} else {
3393 		u32 next_rptr;
3394 		if (ring->rptr_save_reg) {
3395 			next_rptr = ring->wptr + 3 + 4 + 8;
3396 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3397 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3398 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3399 			radeon_ring_write(ring, next_rptr);
3400 		} else if (rdev->wb.enabled) {
3401 			next_rptr = ring->wptr + 5 + 4 + 8;
3402 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3403 			radeon_ring_write(ring, (1 << 8));
3404 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3405 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3406 			radeon_ring_write(ring, next_rptr);
3407 		}
3408 
3409 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3410 	}
3411 
3412 	radeon_ring_write(ring, header);
3413 	radeon_ring_write(ring,
3414 #ifdef __BIG_ENDIAN
3415 			  (2 << 0) |
3416 #endif
3417 			  (ib->gpu_addr & 0xFFFFFFFC));
3418 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3419 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3420 
3421 	if (!ib->is_const_ib) {
3422 		/* flush read cache over gart for this vmid */
3423 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3424 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3425 		radeon_ring_write(ring, vm_id);
3426 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3427 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3428 				  PACKET3_TC_ACTION_ENA |
3429 				  PACKET3_SH_KCACHE_ACTION_ENA |
3430 				  PACKET3_SH_ICACHE_ACTION_ENA);
3431 		radeon_ring_write(ring, 0xFFFFFFFF);
3432 		radeon_ring_write(ring, 0);
3433 		radeon_ring_write(ring, 10); /* poll interval */
3434 	}
3435 }
3436 
3437 /*
3438  * CP.
3439  */
3440 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3441 {
3442 	if (enable)
3443 		WREG32(CP_ME_CNTL, 0);
3444 	else {
3445 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3446 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3447 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3448 		WREG32(SCRATCH_UMSK, 0);
3449 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3450 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3451 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3452 	}
3453 	udelay(50);
3454 }
3455 
3456 static int si_cp_load_microcode(struct radeon_device *rdev)
3457 {
3458 	int i;
3459 
3460 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3461 		return -EINVAL;
3462 
3463 	si_cp_enable(rdev, false);
3464 
3465 	if (rdev->new_fw) {
3466 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3467 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3468 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3469 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3470 		const struct gfx_firmware_header_v1_0 *me_hdr =
3471 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3472 		const __le32 *fw_data;
3473 		u32 fw_size;
3474 
3475 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3476 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3477 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3478 
3479 		/* PFP */
3480 		fw_data = (const __le32 *)
3481 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3482 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3483 		WREG32(CP_PFP_UCODE_ADDR, 0);
3484 		for (i = 0; i < fw_size; i++)
3485 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3486 		WREG32(CP_PFP_UCODE_ADDR, 0);
3487 
3488 		/* CE */
3489 		fw_data = (const __le32 *)
3490 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3491 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3492 		WREG32(CP_CE_UCODE_ADDR, 0);
3493 		for (i = 0; i < fw_size; i++)
3494 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3495 		WREG32(CP_CE_UCODE_ADDR, 0);
3496 
3497 		/* ME */
3498 		fw_data = (const __be32 *)
3499 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3500 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3501 		WREG32(CP_ME_RAM_WADDR, 0);
3502 		for (i = 0; i < fw_size; i++)
3503 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3504 		WREG32(CP_ME_RAM_WADDR, 0);
3505 	} else {
3506 		const __be32 *fw_data;
3507 
3508 		/* PFP */
3509 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3510 		WREG32(CP_PFP_UCODE_ADDR, 0);
3511 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3512 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3513 		WREG32(CP_PFP_UCODE_ADDR, 0);
3514 
3515 		/* CE */
3516 		fw_data = (const __be32 *)rdev->ce_fw->data;
3517 		WREG32(CP_CE_UCODE_ADDR, 0);
3518 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3519 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3520 		WREG32(CP_CE_UCODE_ADDR, 0);
3521 
3522 		/* ME */
3523 		fw_data = (const __be32 *)rdev->me_fw->data;
3524 		WREG32(CP_ME_RAM_WADDR, 0);
3525 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3526 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3527 		WREG32(CP_ME_RAM_WADDR, 0);
3528 	}
3529 
3530 	WREG32(CP_PFP_UCODE_ADDR, 0);
3531 	WREG32(CP_CE_UCODE_ADDR, 0);
3532 	WREG32(CP_ME_RAM_WADDR, 0);
3533 	WREG32(CP_ME_RAM_RADDR, 0);
3534 	return 0;
3535 }
3536 
3537 static int si_cp_start(struct radeon_device *rdev)
3538 {
3539 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3540 	int r, i;
3541 
3542 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3543 	if (r) {
3544 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3545 		return r;
3546 	}
3547 	/* init the CP */
3548 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3549 	radeon_ring_write(ring, 0x1);
3550 	radeon_ring_write(ring, 0x0);
3551 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3552 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3553 	radeon_ring_write(ring, 0);
3554 	radeon_ring_write(ring, 0);
3555 
3556 	/* init the CE partitions */
3557 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3558 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3559 	radeon_ring_write(ring, 0xc000);
3560 	radeon_ring_write(ring, 0xe000);
3561 	radeon_ring_unlock_commit(rdev, ring, false);
3562 
3563 	si_cp_enable(rdev, true);
3564 
3565 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3566 	if (r) {
3567 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3568 		return r;
3569 	}
3570 
3571 	/* setup clear context state */
3572 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3573 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3574 
3575 	for (i = 0; i < si_default_size; i++)
3576 		radeon_ring_write(ring, si_default_state[i]);
3577 
3578 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3579 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3580 
3581 	/* set clear context state */
3582 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3583 	radeon_ring_write(ring, 0);
3584 
3585 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3586 	radeon_ring_write(ring, 0x00000316);
3587 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3588 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3589 
3590 	radeon_ring_unlock_commit(rdev, ring, false);
3591 
3592 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3593 		ring = &rdev->ring[i];
3594 		r = radeon_ring_lock(rdev, ring, 2);
3595 		if (r) {
3596 			DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3597 			return r;
3598 		}
3599 
3600 		/* clear the compute context state */
3601 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3602 		radeon_ring_write(ring, 0);
3603 
3604 		radeon_ring_unlock_commit(rdev, ring, false);
3605 	}
3606 
3607 	return 0;
3608 }
3609 
3610 static void si_cp_fini(struct radeon_device *rdev)
3611 {
3612 	struct radeon_ring *ring;
3613 	si_cp_enable(rdev, false);
3614 
3615 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3616 	radeon_ring_fini(rdev, ring);
3617 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3618 
3619 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3620 	radeon_ring_fini(rdev, ring);
3621 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3622 
3623 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3624 	radeon_ring_fini(rdev, ring);
3625 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3626 }
3627 
3628 static int si_cp_resume(struct radeon_device *rdev)
3629 {
3630 	struct radeon_ring *ring;
3631 	u32 tmp;
3632 	u32 rb_bufsz;
3633 	int r;
3634 
3635 	si_enable_gui_idle_interrupt(rdev, false);
3636 
3637 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3638 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3639 
3640 	/* Set the write pointer delay */
3641 	WREG32(CP_RB_WPTR_DELAY, 0);
3642 
3643 	WREG32(CP_DEBUG, 0);
3644 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3645 
3646 	/* ring 0 - compute and gfx */
3647 	/* Set ring buffer size */
3648 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3649 	rb_bufsz = order_base_2(ring->ring_size / 8);
3650 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3651 #ifdef __BIG_ENDIAN
3652 	tmp |= BUF_SWAP_32BIT;
3653 #endif
3654 	WREG32(CP_RB0_CNTL, tmp);
3655 
3656 	/* Initialize the ring buffer's read and write pointers */
3657 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3658 	ring->wptr = 0;
3659 	WREG32(CP_RB0_WPTR, ring->wptr);
3660 
3661 	/* set the wb address whether it's enabled or not */
3662 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3663 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3664 
3665 	if (rdev->wb.enabled)
3666 		WREG32(SCRATCH_UMSK, 0xff);
3667 	else {
3668 		tmp |= RB_NO_UPDATE;
3669 		WREG32(SCRATCH_UMSK, 0);
3670 	}
3671 
3672 	mdelay(1);
3673 	WREG32(CP_RB0_CNTL, tmp);
3674 
3675 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3676 
3677 	/* ring1  - compute only */
3678 	/* Set ring buffer size */
3679 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3680 	rb_bufsz = order_base_2(ring->ring_size / 8);
3681 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3682 #ifdef __BIG_ENDIAN
3683 	tmp |= BUF_SWAP_32BIT;
3684 #endif
3685 	WREG32(CP_RB1_CNTL, tmp);
3686 
3687 	/* Initialize the ring buffer's read and write pointers */
3688 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3689 	ring->wptr = 0;
3690 	WREG32(CP_RB1_WPTR, ring->wptr);
3691 
3692 	/* set the wb address whether it's enabled or not */
3693 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3694 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3695 
3696 	mdelay(1);
3697 	WREG32(CP_RB1_CNTL, tmp);
3698 
3699 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3700 
3701 	/* ring2 - compute only */
3702 	/* Set ring buffer size */
3703 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3704 	rb_bufsz = order_base_2(ring->ring_size / 8);
3705 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3706 #ifdef __BIG_ENDIAN
3707 	tmp |= BUF_SWAP_32BIT;
3708 #endif
3709 	WREG32(CP_RB2_CNTL, tmp);
3710 
3711 	/* Initialize the ring buffer's read and write pointers */
3712 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3713 	ring->wptr = 0;
3714 	WREG32(CP_RB2_WPTR, ring->wptr);
3715 
3716 	/* set the wb address whether it's enabled or not */
3717 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3718 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3719 
3720 	mdelay(1);
3721 	WREG32(CP_RB2_CNTL, tmp);
3722 
3723 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3724 
3725 	/* start the rings */
3726 	si_cp_start(rdev);
3727 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3728 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3729 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3730 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3731 	if (r) {
3732 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3733 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3734 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3735 		return r;
3736 	}
3737 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3738 	if (r) {
3739 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3740 	}
3741 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3742 	if (r) {
3743 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3744 	}
3745 
3746 	si_enable_gui_idle_interrupt(rdev, true);
3747 
3748 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3749 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3750 
3751 	return 0;
3752 }
3753 
3754 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3755 {
3756 	u32 reset_mask = 0;
3757 	u32 tmp;
3758 
3759 	/* GRBM_STATUS */
3760 	tmp = RREG32(GRBM_STATUS);
3761 	if (tmp & (PA_BUSY | SC_BUSY |
3762 		   BCI_BUSY | SX_BUSY |
3763 		   TA_BUSY | VGT_BUSY |
3764 		   DB_BUSY | CB_BUSY |
3765 		   GDS_BUSY | SPI_BUSY |
3766 		   IA_BUSY | IA_BUSY_NO_DMA))
3767 		reset_mask |= RADEON_RESET_GFX;
3768 
3769 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3770 		   CP_BUSY | CP_COHERENCY_BUSY))
3771 		reset_mask |= RADEON_RESET_CP;
3772 
3773 	if (tmp & GRBM_EE_BUSY)
3774 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3775 
3776 	/* GRBM_STATUS2 */
3777 	tmp = RREG32(GRBM_STATUS2);
3778 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3779 		reset_mask |= RADEON_RESET_RLC;
3780 
3781 	/* DMA_STATUS_REG 0 */
3782 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3783 	if (!(tmp & DMA_IDLE))
3784 		reset_mask |= RADEON_RESET_DMA;
3785 
3786 	/* DMA_STATUS_REG 1 */
3787 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3788 	if (!(tmp & DMA_IDLE))
3789 		reset_mask |= RADEON_RESET_DMA1;
3790 
3791 	/* SRBM_STATUS2 */
3792 	tmp = RREG32(SRBM_STATUS2);
3793 	if (tmp & DMA_BUSY)
3794 		reset_mask |= RADEON_RESET_DMA;
3795 
3796 	if (tmp & DMA1_BUSY)
3797 		reset_mask |= RADEON_RESET_DMA1;
3798 
3799 	/* SRBM_STATUS */
3800 	tmp = RREG32(SRBM_STATUS);
3801 
3802 	if (tmp & IH_BUSY)
3803 		reset_mask |= RADEON_RESET_IH;
3804 
3805 	if (tmp & SEM_BUSY)
3806 		reset_mask |= RADEON_RESET_SEM;
3807 
3808 	if (tmp & GRBM_RQ_PENDING)
3809 		reset_mask |= RADEON_RESET_GRBM;
3810 
3811 	if (tmp & VMC_BUSY)
3812 		reset_mask |= RADEON_RESET_VMC;
3813 
3814 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3815 		   MCC_BUSY | MCD_BUSY))
3816 		reset_mask |= RADEON_RESET_MC;
3817 
3818 	if (evergreen_is_display_hung(rdev))
3819 		reset_mask |= RADEON_RESET_DISPLAY;
3820 
3821 	/* VM_L2_STATUS */
3822 	tmp = RREG32(VM_L2_STATUS);
3823 	if (tmp & L2_BUSY)
3824 		reset_mask |= RADEON_RESET_VMC;
3825 
3826 	/* Skip MC reset as it's mostly likely not hung, just busy */
3827 	if (reset_mask & RADEON_RESET_MC) {
3828 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3829 		reset_mask &= ~RADEON_RESET_MC;
3830 	}
3831 
3832 	return reset_mask;
3833 }
3834 
3835 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3836 {
3837 	struct evergreen_mc_save save;
3838 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3839 	u32 tmp;
3840 
3841 	if (reset_mask == 0)
3842 		return;
3843 
3844 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3845 
3846 	evergreen_print_gpu_status_regs(rdev);
3847 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3848 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3849 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3850 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3851 
3852 	/* disable PG/CG */
3853 	si_fini_pg(rdev);
3854 	si_fini_cg(rdev);
3855 
3856 	/* stop the rlc */
3857 	si_rlc_stop(rdev);
3858 
3859 	/* Disable CP parsing/prefetching */
3860 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3861 
3862 	if (reset_mask & RADEON_RESET_DMA) {
3863 		/* dma0 */
3864 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3865 		tmp &= ~DMA_RB_ENABLE;
3866 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3867 	}
3868 	if (reset_mask & RADEON_RESET_DMA1) {
3869 		/* dma1 */
3870 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3871 		tmp &= ~DMA_RB_ENABLE;
3872 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3873 	}
3874 
3875 	udelay(50);
3876 
3877 	evergreen_mc_stop(rdev, &save);
3878 	if (evergreen_mc_wait_for_idle(rdev)) {
3879 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3880 	}
3881 
3882 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3883 		grbm_soft_reset = SOFT_RESET_CB |
3884 			SOFT_RESET_DB |
3885 			SOFT_RESET_GDS |
3886 			SOFT_RESET_PA |
3887 			SOFT_RESET_SC |
3888 			SOFT_RESET_BCI |
3889 			SOFT_RESET_SPI |
3890 			SOFT_RESET_SX |
3891 			SOFT_RESET_TC |
3892 			SOFT_RESET_TA |
3893 			SOFT_RESET_VGT |
3894 			SOFT_RESET_IA;
3895 	}
3896 
3897 	if (reset_mask & RADEON_RESET_CP) {
3898 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3899 
3900 		srbm_soft_reset |= SOFT_RESET_GRBM;
3901 	}
3902 
3903 	if (reset_mask & RADEON_RESET_DMA)
3904 		srbm_soft_reset |= SOFT_RESET_DMA;
3905 
3906 	if (reset_mask & RADEON_RESET_DMA1)
3907 		srbm_soft_reset |= SOFT_RESET_DMA1;
3908 
3909 	if (reset_mask & RADEON_RESET_DISPLAY)
3910 		srbm_soft_reset |= SOFT_RESET_DC;
3911 
3912 	if (reset_mask & RADEON_RESET_RLC)
3913 		grbm_soft_reset |= SOFT_RESET_RLC;
3914 
3915 	if (reset_mask & RADEON_RESET_SEM)
3916 		srbm_soft_reset |= SOFT_RESET_SEM;
3917 
3918 	if (reset_mask & RADEON_RESET_IH)
3919 		srbm_soft_reset |= SOFT_RESET_IH;
3920 
3921 	if (reset_mask & RADEON_RESET_GRBM)
3922 		srbm_soft_reset |= SOFT_RESET_GRBM;
3923 
3924 	if (reset_mask & RADEON_RESET_VMC)
3925 		srbm_soft_reset |= SOFT_RESET_VMC;
3926 
3927 	if (reset_mask & RADEON_RESET_MC)
3928 		srbm_soft_reset |= SOFT_RESET_MC;
3929 
3930 	if (grbm_soft_reset) {
3931 		tmp = RREG32(GRBM_SOFT_RESET);
3932 		tmp |= grbm_soft_reset;
3933 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3934 		WREG32(GRBM_SOFT_RESET, tmp);
3935 		tmp = RREG32(GRBM_SOFT_RESET);
3936 
3937 		udelay(50);
3938 
3939 		tmp &= ~grbm_soft_reset;
3940 		WREG32(GRBM_SOFT_RESET, tmp);
3941 		tmp = RREG32(GRBM_SOFT_RESET);
3942 	}
3943 
3944 	if (srbm_soft_reset) {
3945 		tmp = RREG32(SRBM_SOFT_RESET);
3946 		tmp |= srbm_soft_reset;
3947 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3948 		WREG32(SRBM_SOFT_RESET, tmp);
3949 		tmp = RREG32(SRBM_SOFT_RESET);
3950 
3951 		udelay(50);
3952 
3953 		tmp &= ~srbm_soft_reset;
3954 		WREG32(SRBM_SOFT_RESET, tmp);
3955 		tmp = RREG32(SRBM_SOFT_RESET);
3956 	}
3957 
3958 	/* Wait a little for things to settle down */
3959 	udelay(50);
3960 
3961 	evergreen_mc_resume(rdev, &save);
3962 	udelay(50);
3963 
3964 	evergreen_print_gpu_status_regs(rdev);
3965 }
3966 
3967 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3968 {
3969 	u32 tmp, i;
3970 
3971 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3972 	tmp |= SPLL_BYPASS_EN;
3973 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3974 
3975 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3976 	tmp |= SPLL_CTLREQ_CHG;
3977 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3978 
3979 	for (i = 0; i < rdev->usec_timeout; i++) {
3980 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3981 			break;
3982 		udelay(1);
3983 	}
3984 
3985 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3986 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3987 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3988 
3989 	tmp = RREG32(MPLL_CNTL_MODE);
3990 	tmp &= ~MPLL_MCLK_SEL;
3991 	WREG32(MPLL_CNTL_MODE, tmp);
3992 }
3993 
3994 static void si_spll_powerdown(struct radeon_device *rdev)
3995 {
3996 	u32 tmp;
3997 
3998 	tmp = RREG32(SPLL_CNTL_MODE);
3999 	tmp |= SPLL_SW_DIR_CONTROL;
4000 	WREG32(SPLL_CNTL_MODE, tmp);
4001 
4002 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4003 	tmp |= SPLL_RESET;
4004 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4005 
4006 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
4007 	tmp |= SPLL_SLEEP;
4008 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4009 
4010 	tmp = RREG32(SPLL_CNTL_MODE);
4011 	tmp &= ~SPLL_SW_DIR_CONTROL;
4012 	WREG32(SPLL_CNTL_MODE, tmp);
4013 }
4014 
4015 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4016 {
4017 	struct evergreen_mc_save save;
4018 	u32 tmp, i;
4019 
4020 	dev_info(rdev->dev, "GPU pci config reset\n");
4021 
4022 	/* disable dpm? */
4023 
4024 	/* disable cg/pg */
4025 	si_fini_pg(rdev);
4026 	si_fini_cg(rdev);
4027 
4028 	/* Disable CP parsing/prefetching */
4029 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4030 	/* dma0 */
4031 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4032 	tmp &= ~DMA_RB_ENABLE;
4033 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4034 	/* dma1 */
4035 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4036 	tmp &= ~DMA_RB_ENABLE;
4037 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4038 	/* XXX other engines? */
4039 
4040 	/* halt the rlc, disable cp internal ints */
4041 	si_rlc_stop(rdev);
4042 
4043 	udelay(50);
4044 
4045 	/* disable mem access */
4046 	evergreen_mc_stop(rdev, &save);
4047 	if (evergreen_mc_wait_for_idle(rdev)) {
4048 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4049 	}
4050 
4051 	/* set mclk/sclk to bypass */
4052 	si_set_clk_bypass_mode(rdev);
4053 	/* powerdown spll */
4054 	si_spll_powerdown(rdev);
4055 	/* disable BM */
4056 	pci_clear_master(rdev->pdev);
4057 	/* reset */
4058 	radeon_pci_config_reset(rdev);
4059 	/* wait for asic to come out of reset */
4060 	for (i = 0; i < rdev->usec_timeout; i++) {
4061 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4062 			break;
4063 		udelay(1);
4064 	}
4065 }
4066 
4067 int si_asic_reset(struct radeon_device *rdev, bool hard)
4068 {
4069 	u32 reset_mask;
4070 
4071 	if (hard) {
4072 		si_gpu_pci_config_reset(rdev);
4073 		return 0;
4074 	}
4075 
4076 	reset_mask = si_gpu_check_soft_reset(rdev);
4077 
4078 	if (reset_mask)
4079 		r600_set_bios_scratch_engine_hung(rdev, true);
4080 
4081 	/* try soft reset */
4082 	si_gpu_soft_reset(rdev, reset_mask);
4083 
4084 	reset_mask = si_gpu_check_soft_reset(rdev);
4085 
4086 	/* try pci config reset */
4087 	if (reset_mask && radeon_hard_reset)
4088 		si_gpu_pci_config_reset(rdev);
4089 
4090 	reset_mask = si_gpu_check_soft_reset(rdev);
4091 
4092 	if (!reset_mask)
4093 		r600_set_bios_scratch_engine_hung(rdev, false);
4094 
4095 	return 0;
4096 }
4097 
4098 /**
4099  * si_gfx_is_lockup - Check if the GFX engine is locked up
4100  *
4101  * @rdev: radeon_device pointer
4102  * @ring: radeon_ring structure holding ring information
4103  *
4104  * Check if the GFX engine is locked up.
4105  * Returns true if the engine appears to be locked up, false if not.
4106  */
4107 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4108 {
4109 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4110 
4111 	if (!(reset_mask & (RADEON_RESET_GFX |
4112 			    RADEON_RESET_COMPUTE |
4113 			    RADEON_RESET_CP))) {
4114 		radeon_ring_lockup_update(rdev, ring);
4115 		return false;
4116 	}
4117 	return radeon_ring_test_lockup(rdev, ring);
4118 }
4119 
4120 /* MC */
4121 static void si_mc_program(struct radeon_device *rdev)
4122 {
4123 	struct evergreen_mc_save save;
4124 	u32 tmp;
4125 	int i, j;
4126 
4127 	/* Initialize HDP */
4128 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4129 		WREG32((0x2c14 + j), 0x00000000);
4130 		WREG32((0x2c18 + j), 0x00000000);
4131 		WREG32((0x2c1c + j), 0x00000000);
4132 		WREG32((0x2c20 + j), 0x00000000);
4133 		WREG32((0x2c24 + j), 0x00000000);
4134 	}
4135 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4136 
4137 	evergreen_mc_stop(rdev, &save);
4138 	if (radeon_mc_wait_for_idle(rdev)) {
4139 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4140 	}
4141 	if (!ASIC_IS_NODCE(rdev))
4142 		/* Lockout access through VGA aperture*/
4143 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4144 	/* Update configuration */
4145 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4146 	       rdev->mc.vram_start >> 12);
4147 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4148 	       rdev->mc.vram_end >> 12);
4149 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4150 	       rdev->vram_scratch.gpu_addr >> 12);
4151 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4152 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4153 	WREG32(MC_VM_FB_LOCATION, tmp);
4154 	/* XXX double check these! */
4155 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4156 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4157 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4158 	WREG32(MC_VM_AGP_BASE, 0);
4159 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4160 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4161 	if (radeon_mc_wait_for_idle(rdev)) {
4162 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4163 	}
4164 	evergreen_mc_resume(rdev, &save);
4165 	if (!ASIC_IS_NODCE(rdev)) {
4166 		/* we need to own VRAM, so turn off the VGA renderer here
4167 		 * to stop it overwriting our objects */
4168 		rv515_vga_render_disable(rdev);
4169 	}
4170 }
4171 
4172 void si_vram_gtt_location(struct radeon_device *rdev,
4173 			  struct radeon_mc *mc)
4174 {
4175 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4176 		/* leave room for at least 1024M GTT */
4177 		dev_warn(rdev->dev, "limiting VRAM\n");
4178 		mc->real_vram_size = 0xFFC0000000ULL;
4179 		mc->mc_vram_size = 0xFFC0000000ULL;
4180 	}
4181 	radeon_vram_location(rdev, &rdev->mc, 0);
4182 	rdev->mc.gtt_base_align = 0;
4183 	radeon_gtt_location(rdev, mc);
4184 }
4185 
4186 static int si_mc_init(struct radeon_device *rdev)
4187 {
4188 	u32 tmp;
4189 	int chansize, numchan;
4190 
4191 	/* Get VRAM informations */
4192 	rdev->mc.vram_is_ddr = true;
4193 	tmp = RREG32(MC_ARB_RAMCFG);
4194 	if (tmp & CHANSIZE_OVERRIDE) {
4195 		chansize = 16;
4196 	} else if (tmp & CHANSIZE_MASK) {
4197 		chansize = 64;
4198 	} else {
4199 		chansize = 32;
4200 	}
4201 	tmp = RREG32(MC_SHARED_CHMAP);
4202 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4203 	case 0:
4204 	default:
4205 		numchan = 1;
4206 		break;
4207 	case 1:
4208 		numchan = 2;
4209 		break;
4210 	case 2:
4211 		numchan = 4;
4212 		break;
4213 	case 3:
4214 		numchan = 8;
4215 		break;
4216 	case 4:
4217 		numchan = 3;
4218 		break;
4219 	case 5:
4220 		numchan = 6;
4221 		break;
4222 	case 6:
4223 		numchan = 10;
4224 		break;
4225 	case 7:
4226 		numchan = 12;
4227 		break;
4228 	case 8:
4229 		numchan = 16;
4230 		break;
4231 	}
4232 	rdev->mc.vram_width = numchan * chansize;
4233 	/* Could aper size report 0 ? */
4234 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4235 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4236 	/* size in MB on si */
4237 	tmp = RREG32(CONFIG_MEMSIZE);
4238 	/* some boards may have garbage in the upper 16 bits */
4239 	if (tmp & 0xffff0000) {
4240 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4241 		if (tmp & 0xffff)
4242 			tmp &= 0xffff;
4243 	}
4244 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4245 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4246 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4247 	si_vram_gtt_location(rdev, &rdev->mc);
4248 	radeon_update_bandwidth_info(rdev);
4249 
4250 	return 0;
4251 }
4252 
4253 /*
4254  * GART
4255  */
4256 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4257 {
4258 	/* flush hdp cache */
4259 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4260 
4261 	/* bits 0-15 are the VM contexts0-15 */
4262 	WREG32(VM_INVALIDATE_REQUEST, 1);
4263 }
4264 
4265 static int si_pcie_gart_enable(struct radeon_device *rdev)
4266 {
4267 	int r, i;
4268 
4269 	if (rdev->gart.robj == NULL) {
4270 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4271 		return -EINVAL;
4272 	}
4273 	r = radeon_gart_table_vram_pin(rdev);
4274 	if (r)
4275 		return r;
4276 	/* Setup TLB control */
4277 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4278 	       (0xA << 7) |
4279 	       ENABLE_L1_TLB |
4280 	       ENABLE_L1_FRAGMENT_PROCESSING |
4281 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4282 	       ENABLE_ADVANCED_DRIVER_MODEL |
4283 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4284 	/* Setup L2 cache */
4285 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4286 	       ENABLE_L2_FRAGMENT_PROCESSING |
4287 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4288 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4289 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4290 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4291 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4292 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4293 	       BANK_SELECT(4) |
4294 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4295 	/* setup context0 */
4296 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4297 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4298 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4299 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4300 			(u32)(rdev->dummy_page.addr >> 12));
4301 	WREG32(VM_CONTEXT0_CNTL2, 0);
4302 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4303 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4304 
4305 	WREG32(0x15D4, 0);
4306 	WREG32(0x15D8, 0);
4307 	WREG32(0x15DC, 0);
4308 
4309 	/* empty context1-15 */
4310 	/* set vm size, must be a multiple of 4 */
4311 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4312 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4313 	/* Assign the pt base to something valid for now; the pts used for
4314 	 * the VMs are determined by the application and setup and assigned
4315 	 * on the fly in the vm part of radeon_gart.c
4316 	 */
4317 	for (i = 1; i < 16; i++) {
4318 		if (i < 8)
4319 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4320 			       rdev->vm_manager.saved_table_addr[i]);
4321 		else
4322 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4323 			       rdev->vm_manager.saved_table_addr[i]);
4324 	}
4325 
4326 	/* enable context1-15 */
4327 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4328 	       (u32)(rdev->dummy_page.addr >> 12));
4329 	WREG32(VM_CONTEXT1_CNTL2, 4);
4330 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4331 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4332 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4333 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4334 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4335 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4336 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4337 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4338 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4339 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4340 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4341 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4342 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4343 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4344 
4345 	si_pcie_gart_tlb_flush(rdev);
4346 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4347 		 (unsigned)(rdev->mc.gtt_size >> 20),
4348 		 (unsigned long long)rdev->gart.table_addr);
4349 	rdev->gart.ready = true;
4350 	return 0;
4351 }
4352 
4353 static void si_pcie_gart_disable(struct radeon_device *rdev)
4354 {
4355 	unsigned i;
4356 
4357 	for (i = 1; i < 16; ++i) {
4358 		uint32_t reg;
4359 		if (i < 8)
4360 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4361 		else
4362 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4363 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4364 	}
4365 
4366 	/* Disable all tables */
4367 	WREG32(VM_CONTEXT0_CNTL, 0);
4368 	WREG32(VM_CONTEXT1_CNTL, 0);
4369 	/* Setup TLB control */
4370 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4371 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4372 	/* Setup L2 cache */
4373 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4374 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4375 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4376 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4377 	WREG32(VM_L2_CNTL2, 0);
4378 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4379 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4380 	radeon_gart_table_vram_unpin(rdev);
4381 }
4382 
4383 static void si_pcie_gart_fini(struct radeon_device *rdev)
4384 {
4385 	si_pcie_gart_disable(rdev);
4386 	radeon_gart_table_vram_free(rdev);
4387 	radeon_gart_fini(rdev);
4388 }
4389 
4390 /* vm parser */
4391 static bool si_vm_reg_valid(u32 reg)
4392 {
4393 	/* context regs are fine */
4394 	if (reg >= 0x28000)
4395 		return true;
4396 
4397 	/* shader regs are also fine */
4398 	if (reg >= 0xB000 && reg < 0xC000)
4399 		return true;
4400 
4401 	/* check config regs */
4402 	switch (reg) {
4403 	case GRBM_GFX_INDEX:
4404 	case CP_STRMOUT_CNTL:
4405 	case VGT_VTX_VECT_EJECT_REG:
4406 	case VGT_CACHE_INVALIDATION:
4407 	case VGT_ESGS_RING_SIZE:
4408 	case VGT_GSVS_RING_SIZE:
4409 	case VGT_GS_VERTEX_REUSE:
4410 	case VGT_PRIMITIVE_TYPE:
4411 	case VGT_INDEX_TYPE:
4412 	case VGT_NUM_INDICES:
4413 	case VGT_NUM_INSTANCES:
4414 	case VGT_TF_RING_SIZE:
4415 	case VGT_HS_OFFCHIP_PARAM:
4416 	case VGT_TF_MEMORY_BASE:
4417 	case PA_CL_ENHANCE:
4418 	case PA_SU_LINE_STIPPLE_VALUE:
4419 	case PA_SC_LINE_STIPPLE_STATE:
4420 	case PA_SC_ENHANCE:
4421 	case SQC_CACHES:
4422 	case SPI_STATIC_THREAD_MGMT_1:
4423 	case SPI_STATIC_THREAD_MGMT_2:
4424 	case SPI_STATIC_THREAD_MGMT_3:
4425 	case SPI_PS_MAX_WAVE_ID:
4426 	case SPI_CONFIG_CNTL:
4427 	case SPI_CONFIG_CNTL_1:
4428 	case TA_CNTL_AUX:
4429 	case TA_CS_BC_BASE_ADDR:
4430 		return true;
4431 	default:
4432 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4433 		return false;
4434 	}
4435 }
4436 
4437 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4438 				  u32 *ib, struct radeon_cs_packet *pkt)
4439 {
4440 	switch (pkt->opcode) {
4441 	case PACKET3_NOP:
4442 	case PACKET3_SET_BASE:
4443 	case PACKET3_SET_CE_DE_COUNTERS:
4444 	case PACKET3_LOAD_CONST_RAM:
4445 	case PACKET3_WRITE_CONST_RAM:
4446 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4447 	case PACKET3_DUMP_CONST_RAM:
4448 	case PACKET3_INCREMENT_CE_COUNTER:
4449 	case PACKET3_WAIT_ON_DE_COUNTER:
4450 	case PACKET3_CE_WRITE:
4451 		break;
4452 	default:
4453 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4454 		return -EINVAL;
4455 	}
4456 	return 0;
4457 }
4458 
4459 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4460 {
4461 	u32 start_reg, reg, i;
4462 	u32 command = ib[idx + 4];
4463 	u32 info = ib[idx + 1];
4464 	u32 idx_value = ib[idx];
4465 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4466 		/* src address space is register */
4467 		if (((info & 0x60000000) >> 29) == 0) {
4468 			start_reg = idx_value << 2;
4469 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4470 				reg = start_reg;
4471 				if (!si_vm_reg_valid(reg)) {
4472 					DRM_ERROR("CP DMA Bad SRC register\n");
4473 					return -EINVAL;
4474 				}
4475 			} else {
4476 				for (i = 0; i < (command & 0x1fffff); i++) {
4477 					reg = start_reg + (4 * i);
4478 					if (!si_vm_reg_valid(reg)) {
4479 						DRM_ERROR("CP DMA Bad SRC register\n");
4480 						return -EINVAL;
4481 					}
4482 				}
4483 			}
4484 		}
4485 	}
4486 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4487 		/* dst address space is register */
4488 		if (((info & 0x00300000) >> 20) == 0) {
4489 			start_reg = ib[idx + 2];
4490 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4491 				reg = start_reg;
4492 				if (!si_vm_reg_valid(reg)) {
4493 					DRM_ERROR("CP DMA Bad DST register\n");
4494 					return -EINVAL;
4495 				}
4496 			} else {
4497 				for (i = 0; i < (command & 0x1fffff); i++) {
4498 					reg = start_reg + (4 * i);
4499 					if (!si_vm_reg_valid(reg)) {
4500 						DRM_ERROR("CP DMA Bad DST register\n");
4501 						return -EINVAL;
4502 					}
4503 				}
4504 			}
4505 		}
4506 	}
4507 	return 0;
4508 }
4509 
4510 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4511 				   u32 *ib, struct radeon_cs_packet *pkt)
4512 {
4513 	int r;
4514 	u32 idx = pkt->idx + 1;
4515 	u32 idx_value = ib[idx];
4516 	u32 start_reg, end_reg, reg, i;
4517 
4518 	switch (pkt->opcode) {
4519 	case PACKET3_NOP:
4520 	case PACKET3_SET_BASE:
4521 	case PACKET3_CLEAR_STATE:
4522 	case PACKET3_INDEX_BUFFER_SIZE:
4523 	case PACKET3_DISPATCH_DIRECT:
4524 	case PACKET3_DISPATCH_INDIRECT:
4525 	case PACKET3_ALLOC_GDS:
4526 	case PACKET3_WRITE_GDS_RAM:
4527 	case PACKET3_ATOMIC_GDS:
4528 	case PACKET3_ATOMIC:
4529 	case PACKET3_OCCLUSION_QUERY:
4530 	case PACKET3_SET_PREDICATION:
4531 	case PACKET3_COND_EXEC:
4532 	case PACKET3_PRED_EXEC:
4533 	case PACKET3_DRAW_INDIRECT:
4534 	case PACKET3_DRAW_INDEX_INDIRECT:
4535 	case PACKET3_INDEX_BASE:
4536 	case PACKET3_DRAW_INDEX_2:
4537 	case PACKET3_CONTEXT_CONTROL:
4538 	case PACKET3_INDEX_TYPE:
4539 	case PACKET3_DRAW_INDIRECT_MULTI:
4540 	case PACKET3_DRAW_INDEX_AUTO:
4541 	case PACKET3_DRAW_INDEX_IMMD:
4542 	case PACKET3_NUM_INSTANCES:
4543 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4544 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4545 	case PACKET3_DRAW_INDEX_OFFSET_2:
4546 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4547 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4548 	case PACKET3_MPEG_INDEX:
4549 	case PACKET3_WAIT_REG_MEM:
4550 	case PACKET3_MEM_WRITE:
4551 	case PACKET3_PFP_SYNC_ME:
4552 	case PACKET3_SURFACE_SYNC:
4553 	case PACKET3_EVENT_WRITE:
4554 	case PACKET3_EVENT_WRITE_EOP:
4555 	case PACKET3_EVENT_WRITE_EOS:
4556 	case PACKET3_SET_CONTEXT_REG:
4557 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4558 	case PACKET3_SET_SH_REG:
4559 	case PACKET3_SET_SH_REG_OFFSET:
4560 	case PACKET3_INCREMENT_DE_COUNTER:
4561 	case PACKET3_WAIT_ON_CE_COUNTER:
4562 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4563 	case PACKET3_ME_WRITE:
4564 		break;
4565 	case PACKET3_COPY_DATA:
4566 		if ((idx_value & 0xf00) == 0) {
4567 			reg = ib[idx + 3] * 4;
4568 			if (!si_vm_reg_valid(reg))
4569 				return -EINVAL;
4570 		}
4571 		break;
4572 	case PACKET3_WRITE_DATA:
4573 		if ((idx_value & 0xf00) == 0) {
4574 			start_reg = ib[idx + 1] * 4;
4575 			if (idx_value & 0x10000) {
4576 				if (!si_vm_reg_valid(start_reg))
4577 					return -EINVAL;
4578 			} else {
4579 				for (i = 0; i < (pkt->count - 2); i++) {
4580 					reg = start_reg + (4 * i);
4581 					if (!si_vm_reg_valid(reg))
4582 						return -EINVAL;
4583 				}
4584 			}
4585 		}
4586 		break;
4587 	case PACKET3_COND_WRITE:
4588 		if (idx_value & 0x100) {
4589 			reg = ib[idx + 5] * 4;
4590 			if (!si_vm_reg_valid(reg))
4591 				return -EINVAL;
4592 		}
4593 		break;
4594 	case PACKET3_COPY_DW:
4595 		if (idx_value & 0x2) {
4596 			reg = ib[idx + 3] * 4;
4597 			if (!si_vm_reg_valid(reg))
4598 				return -EINVAL;
4599 		}
4600 		break;
4601 	case PACKET3_SET_CONFIG_REG:
4602 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4603 		end_reg = 4 * pkt->count + start_reg - 4;
4604 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4605 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4606 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4607 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4608 			return -EINVAL;
4609 		}
4610 		for (i = 0; i < pkt->count; i++) {
4611 			reg = start_reg + (4 * i);
4612 			if (!si_vm_reg_valid(reg))
4613 				return -EINVAL;
4614 		}
4615 		break;
4616 	case PACKET3_CP_DMA:
4617 		r = si_vm_packet3_cp_dma_check(ib, idx);
4618 		if (r)
4619 			return r;
4620 		break;
4621 	default:
4622 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4623 		return -EINVAL;
4624 	}
4625 	return 0;
4626 }
4627 
4628 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4629 				       u32 *ib, struct radeon_cs_packet *pkt)
4630 {
4631 	int r;
4632 	u32 idx = pkt->idx + 1;
4633 	u32 idx_value = ib[idx];
4634 	u32 start_reg, reg, i;
4635 
4636 	switch (pkt->opcode) {
4637 	case PACKET3_NOP:
4638 	case PACKET3_SET_BASE:
4639 	case PACKET3_CLEAR_STATE:
4640 	case PACKET3_DISPATCH_DIRECT:
4641 	case PACKET3_DISPATCH_INDIRECT:
4642 	case PACKET3_ALLOC_GDS:
4643 	case PACKET3_WRITE_GDS_RAM:
4644 	case PACKET3_ATOMIC_GDS:
4645 	case PACKET3_ATOMIC:
4646 	case PACKET3_OCCLUSION_QUERY:
4647 	case PACKET3_SET_PREDICATION:
4648 	case PACKET3_COND_EXEC:
4649 	case PACKET3_PRED_EXEC:
4650 	case PACKET3_CONTEXT_CONTROL:
4651 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4652 	case PACKET3_WAIT_REG_MEM:
4653 	case PACKET3_MEM_WRITE:
4654 	case PACKET3_PFP_SYNC_ME:
4655 	case PACKET3_SURFACE_SYNC:
4656 	case PACKET3_EVENT_WRITE:
4657 	case PACKET3_EVENT_WRITE_EOP:
4658 	case PACKET3_EVENT_WRITE_EOS:
4659 	case PACKET3_SET_CONTEXT_REG:
4660 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4661 	case PACKET3_SET_SH_REG:
4662 	case PACKET3_SET_SH_REG_OFFSET:
4663 	case PACKET3_INCREMENT_DE_COUNTER:
4664 	case PACKET3_WAIT_ON_CE_COUNTER:
4665 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4666 	case PACKET3_ME_WRITE:
4667 		break;
4668 	case PACKET3_COPY_DATA:
4669 		if ((idx_value & 0xf00) == 0) {
4670 			reg = ib[idx + 3] * 4;
4671 			if (!si_vm_reg_valid(reg))
4672 				return -EINVAL;
4673 		}
4674 		break;
4675 	case PACKET3_WRITE_DATA:
4676 		if ((idx_value & 0xf00) == 0) {
4677 			start_reg = ib[idx + 1] * 4;
4678 			if (idx_value & 0x10000) {
4679 				if (!si_vm_reg_valid(start_reg))
4680 					return -EINVAL;
4681 			} else {
4682 				for (i = 0; i < (pkt->count - 2); i++) {
4683 					reg = start_reg + (4 * i);
4684 					if (!si_vm_reg_valid(reg))
4685 						return -EINVAL;
4686 				}
4687 			}
4688 		}
4689 		break;
4690 	case PACKET3_COND_WRITE:
4691 		if (idx_value & 0x100) {
4692 			reg = ib[idx + 5] * 4;
4693 			if (!si_vm_reg_valid(reg))
4694 				return -EINVAL;
4695 		}
4696 		break;
4697 	case PACKET3_COPY_DW:
4698 		if (idx_value & 0x2) {
4699 			reg = ib[idx + 3] * 4;
4700 			if (!si_vm_reg_valid(reg))
4701 				return -EINVAL;
4702 		}
4703 		break;
4704 	case PACKET3_CP_DMA:
4705 		r = si_vm_packet3_cp_dma_check(ib, idx);
4706 		if (r)
4707 			return r;
4708 		break;
4709 	default:
4710 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4711 		return -EINVAL;
4712 	}
4713 	return 0;
4714 }
4715 
4716 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4717 {
4718 	int ret = 0;
4719 	u32 idx = 0, i;
4720 	struct radeon_cs_packet pkt;
4721 
4722 	do {
4723 		pkt.idx = idx;
4724 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4725 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4726 		pkt.one_reg_wr = 0;
4727 		switch (pkt.type) {
4728 		case RADEON_PACKET_TYPE0:
4729 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4730 			ret = -EINVAL;
4731 			break;
4732 		case RADEON_PACKET_TYPE2:
4733 			idx += 1;
4734 			break;
4735 		case RADEON_PACKET_TYPE3:
4736 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4737 			if (ib->is_const_ib)
4738 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4739 			else {
4740 				switch (ib->ring) {
4741 				case RADEON_RING_TYPE_GFX_INDEX:
4742 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4743 					break;
4744 				case CAYMAN_RING_TYPE_CP1_INDEX:
4745 				case CAYMAN_RING_TYPE_CP2_INDEX:
4746 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4747 					break;
4748 				default:
4749 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4750 					ret = -EINVAL;
4751 					break;
4752 				}
4753 			}
4754 			idx += pkt.count + 2;
4755 			break;
4756 		default:
4757 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4758 			ret = -EINVAL;
4759 			break;
4760 		}
4761 		if (ret) {
4762 			for (i = 0; i < ib->length_dw; i++) {
4763 				if (i == idx)
4764 					printk("\t0x%08x <---\n", ib->ptr[i]);
4765 				else
4766 					printk("\t0x%08x\n", ib->ptr[i]);
4767 			}
4768 			break;
4769 		}
4770 	} while (idx < ib->length_dw);
4771 
4772 	return ret;
4773 }
4774 
4775 /*
4776  * vm
4777  */
4778 int si_vm_init(struct radeon_device *rdev)
4779 {
4780 	/* number of VMs */
4781 	rdev->vm_manager.nvm = 16;
4782 	/* base offset of vram pages */
4783 	rdev->vm_manager.vram_base_offset = 0;
4784 
4785 	return 0;
4786 }
4787 
4788 void si_vm_fini(struct radeon_device *rdev)
4789 {
4790 }
4791 
4792 /**
4793  * si_vm_decode_fault - print human readable fault info
4794  *
4795  * @rdev: radeon_device pointer
4796  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4797  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4798  *
4799  * Print human readable fault information (SI).
4800  */
4801 static void si_vm_decode_fault(struct radeon_device *rdev,
4802 			       u32 status, u32 addr)
4803 {
4804 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4805 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4806 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4807 	char *block;
4808 
4809 	if (rdev->family == CHIP_TAHITI) {
4810 		switch (mc_id) {
4811 		case 160:
4812 		case 144:
4813 		case 96:
4814 		case 80:
4815 		case 224:
4816 		case 208:
4817 		case 32:
4818 		case 16:
4819 			block = "CB";
4820 			break;
4821 		case 161:
4822 		case 145:
4823 		case 97:
4824 		case 81:
4825 		case 225:
4826 		case 209:
4827 		case 33:
4828 		case 17:
4829 			block = "CB_FMASK";
4830 			break;
4831 		case 162:
4832 		case 146:
4833 		case 98:
4834 		case 82:
4835 		case 226:
4836 		case 210:
4837 		case 34:
4838 		case 18:
4839 			block = "CB_CMASK";
4840 			break;
4841 		case 163:
4842 		case 147:
4843 		case 99:
4844 		case 83:
4845 		case 227:
4846 		case 211:
4847 		case 35:
4848 		case 19:
4849 			block = "CB_IMMED";
4850 			break;
4851 		case 164:
4852 		case 148:
4853 		case 100:
4854 		case 84:
4855 		case 228:
4856 		case 212:
4857 		case 36:
4858 		case 20:
4859 			block = "DB";
4860 			break;
4861 		case 165:
4862 		case 149:
4863 		case 101:
4864 		case 85:
4865 		case 229:
4866 		case 213:
4867 		case 37:
4868 		case 21:
4869 			block = "DB_HTILE";
4870 			break;
4871 		case 167:
4872 		case 151:
4873 		case 103:
4874 		case 87:
4875 		case 231:
4876 		case 215:
4877 		case 39:
4878 		case 23:
4879 			block = "DB_STEN";
4880 			break;
4881 		case 72:
4882 		case 68:
4883 		case 64:
4884 		case 8:
4885 		case 4:
4886 		case 0:
4887 		case 136:
4888 		case 132:
4889 		case 128:
4890 		case 200:
4891 		case 196:
4892 		case 192:
4893 			block = "TC";
4894 			break;
4895 		case 112:
4896 		case 48:
4897 			block = "CP";
4898 			break;
4899 		case 49:
4900 		case 177:
4901 		case 50:
4902 		case 178:
4903 			block = "SH";
4904 			break;
4905 		case 53:
4906 		case 190:
4907 			block = "VGT";
4908 			break;
4909 		case 117:
4910 			block = "IH";
4911 			break;
4912 		case 51:
4913 		case 115:
4914 			block = "RLC";
4915 			break;
4916 		case 119:
4917 		case 183:
4918 			block = "DMA0";
4919 			break;
4920 		case 61:
4921 			block = "DMA1";
4922 			break;
4923 		case 248:
4924 		case 120:
4925 			block = "HDP";
4926 			break;
4927 		default:
4928 			block = "unknown";
4929 			break;
4930 		}
4931 	} else {
4932 		switch (mc_id) {
4933 		case 32:
4934 		case 16:
4935 		case 96:
4936 		case 80:
4937 		case 160:
4938 		case 144:
4939 		case 224:
4940 		case 208:
4941 			block = "CB";
4942 			break;
4943 		case 33:
4944 		case 17:
4945 		case 97:
4946 		case 81:
4947 		case 161:
4948 		case 145:
4949 		case 225:
4950 		case 209:
4951 			block = "CB_FMASK";
4952 			break;
4953 		case 34:
4954 		case 18:
4955 		case 98:
4956 		case 82:
4957 		case 162:
4958 		case 146:
4959 		case 226:
4960 		case 210:
4961 			block = "CB_CMASK";
4962 			break;
4963 		case 35:
4964 		case 19:
4965 		case 99:
4966 		case 83:
4967 		case 163:
4968 		case 147:
4969 		case 227:
4970 		case 211:
4971 			block = "CB_IMMED";
4972 			break;
4973 		case 36:
4974 		case 20:
4975 		case 100:
4976 		case 84:
4977 		case 164:
4978 		case 148:
4979 		case 228:
4980 		case 212:
4981 			block = "DB";
4982 			break;
4983 		case 37:
4984 		case 21:
4985 		case 101:
4986 		case 85:
4987 		case 165:
4988 		case 149:
4989 		case 229:
4990 		case 213:
4991 			block = "DB_HTILE";
4992 			break;
4993 		case 39:
4994 		case 23:
4995 		case 103:
4996 		case 87:
4997 		case 167:
4998 		case 151:
4999 		case 231:
5000 		case 215:
5001 			block = "DB_STEN";
5002 			break;
5003 		case 72:
5004 		case 68:
5005 		case 8:
5006 		case 4:
5007 		case 136:
5008 		case 132:
5009 		case 200:
5010 		case 196:
5011 			block = "TC";
5012 			break;
5013 		case 112:
5014 		case 48:
5015 			block = "CP";
5016 			break;
5017 		case 49:
5018 		case 177:
5019 		case 50:
5020 		case 178:
5021 			block = "SH";
5022 			break;
5023 		case 53:
5024 			block = "VGT";
5025 			break;
5026 		case 117:
5027 			block = "IH";
5028 			break;
5029 		case 51:
5030 		case 115:
5031 			block = "RLC";
5032 			break;
5033 		case 119:
5034 		case 183:
5035 			block = "DMA0";
5036 			break;
5037 		case 61:
5038 			block = "DMA1";
5039 			break;
5040 		case 248:
5041 		case 120:
5042 			block = "HDP";
5043 			break;
5044 		default:
5045 			block = "unknown";
5046 			break;
5047 		}
5048 	}
5049 
5050 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5051 	       protections, vmid, addr,
5052 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5053 	       block, mc_id);
5054 }
5055 
5056 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5057 		 unsigned vm_id, uint64_t pd_addr)
5058 {
5059 	/* write new base address */
5060 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5061 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5062 				 WRITE_DATA_DST_SEL(0)));
5063 
5064 	if (vm_id < 8) {
5065 		radeon_ring_write(ring,
5066 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5067 	} else {
5068 		radeon_ring_write(ring,
5069 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5070 	}
5071 	radeon_ring_write(ring, 0);
5072 	radeon_ring_write(ring, pd_addr >> 12);
5073 
5074 	/* flush hdp cache */
5075 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5076 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5077 				 WRITE_DATA_DST_SEL(0)));
5078 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5079 	radeon_ring_write(ring, 0);
5080 	radeon_ring_write(ring, 0x1);
5081 
5082 	/* bits 0-15 are the VM contexts0-15 */
5083 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5084 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5085 				 WRITE_DATA_DST_SEL(0)));
5086 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5087 	radeon_ring_write(ring, 0);
5088 	radeon_ring_write(ring, 1 << vm_id);
5089 
5090 	/* wait for the invalidate to complete */
5091 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5092 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5093 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5094 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5095 	radeon_ring_write(ring, 0);
5096 	radeon_ring_write(ring, 0); /* ref */
5097 	radeon_ring_write(ring, 0); /* mask */
5098 	radeon_ring_write(ring, 0x20); /* poll interval */
5099 
5100 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5101 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5102 	radeon_ring_write(ring, 0x0);
5103 }
5104 
5105 /*
5106  *  Power and clock gating
5107  */
5108 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5109 {
5110 	int i;
5111 
5112 	for (i = 0; i < rdev->usec_timeout; i++) {
5113 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5114 			break;
5115 		udelay(1);
5116 	}
5117 
5118 	for (i = 0; i < rdev->usec_timeout; i++) {
5119 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5120 			break;
5121 		udelay(1);
5122 	}
5123 }
5124 
5125 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5126 					 bool enable)
5127 {
5128 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5129 	u32 mask;
5130 	int i;
5131 
5132 	if (enable)
5133 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5134 	else
5135 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5136 	WREG32(CP_INT_CNTL_RING0, tmp);
5137 
5138 	if (!enable) {
5139 		/* read a gfx register */
5140 		tmp = RREG32(DB_DEPTH_INFO);
5141 
5142 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5143 		for (i = 0; i < rdev->usec_timeout; i++) {
5144 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5145 				break;
5146 			udelay(1);
5147 		}
5148 	}
5149 }
5150 
5151 static void si_set_uvd_dcm(struct radeon_device *rdev,
5152 			   bool sw_mode)
5153 {
5154 	u32 tmp, tmp2;
5155 
5156 	tmp = RREG32(UVD_CGC_CTRL);
5157 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5158 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5159 
5160 	if (sw_mode) {
5161 		tmp &= ~0x7ffff800;
5162 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5163 	} else {
5164 		tmp |= 0x7ffff800;
5165 		tmp2 = 0;
5166 	}
5167 
5168 	WREG32(UVD_CGC_CTRL, tmp);
5169 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5170 }
5171 
5172 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5173 {
5174 	bool hw_mode = true;
5175 
5176 	if (hw_mode) {
5177 		si_set_uvd_dcm(rdev, false);
5178 	} else {
5179 		u32 tmp = RREG32(UVD_CGC_CTRL);
5180 		tmp &= ~DCM;
5181 		WREG32(UVD_CGC_CTRL, tmp);
5182 	}
5183 }
5184 
5185 static u32 si_halt_rlc(struct radeon_device *rdev)
5186 {
5187 	u32 data, orig;
5188 
5189 	orig = data = RREG32(RLC_CNTL);
5190 
5191 	if (data & RLC_ENABLE) {
5192 		data &= ~RLC_ENABLE;
5193 		WREG32(RLC_CNTL, data);
5194 
5195 		si_wait_for_rlc_serdes(rdev);
5196 	}
5197 
5198 	return orig;
5199 }
5200 
5201 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5202 {
5203 	u32 tmp;
5204 
5205 	tmp = RREG32(RLC_CNTL);
5206 	if (tmp != rlc)
5207 		WREG32(RLC_CNTL, rlc);
5208 }
5209 
5210 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5211 {
5212 	u32 data, orig;
5213 
5214 	orig = data = RREG32(DMA_PG);
5215 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5216 		data |= PG_CNTL_ENABLE;
5217 	else
5218 		data &= ~PG_CNTL_ENABLE;
5219 	if (orig != data)
5220 		WREG32(DMA_PG, data);
5221 }
5222 
5223 static void si_init_dma_pg(struct radeon_device *rdev)
5224 {
5225 	u32 tmp;
5226 
5227 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5228 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5229 
5230 	for (tmp = 0; tmp < 5; tmp++)
5231 		WREG32(DMA_PGFSM_WRITE, 0);
5232 }
5233 
5234 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5235 			       bool enable)
5236 {
5237 	u32 tmp;
5238 
5239 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5240 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5241 		WREG32(RLC_TTOP_D, tmp);
5242 
5243 		tmp = RREG32(RLC_PG_CNTL);
5244 		tmp |= GFX_PG_ENABLE;
5245 		WREG32(RLC_PG_CNTL, tmp);
5246 
5247 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5248 		tmp |= AUTO_PG_EN;
5249 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5250 	} else {
5251 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5252 		tmp &= ~AUTO_PG_EN;
5253 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5254 
5255 		tmp = RREG32(DB_RENDER_CONTROL);
5256 	}
5257 }
5258 
5259 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5260 {
5261 	u32 tmp;
5262 
5263 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5264 
5265 	tmp = RREG32(RLC_PG_CNTL);
5266 	tmp |= GFX_PG_SRC;
5267 	WREG32(RLC_PG_CNTL, tmp);
5268 
5269 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5270 
5271 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5272 
5273 	tmp &= ~GRBM_REG_SGIT_MASK;
5274 	tmp |= GRBM_REG_SGIT(0x700);
5275 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5276 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5277 }
5278 
5279 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5280 {
5281 	u32 mask = 0, tmp, tmp1;
5282 	int i;
5283 
5284 	si_select_se_sh(rdev, se, sh);
5285 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5286 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5287 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5288 
5289 	tmp &= 0xffff0000;
5290 
5291 	tmp |= tmp1;
5292 	tmp >>= 16;
5293 
5294 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5295 		mask <<= 1;
5296 		mask |= 1;
5297 	}
5298 
5299 	return (~tmp) & mask;
5300 }
5301 
5302 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5303 {
5304 	u32 i, j, k, active_cu_number = 0;
5305 	u32 mask, counter, cu_bitmap;
5306 	u32 tmp = 0;
5307 
5308 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5309 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5310 			mask = 1;
5311 			cu_bitmap = 0;
5312 			counter  = 0;
5313 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5314 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5315 					if (counter < 2)
5316 						cu_bitmap |= mask;
5317 					counter++;
5318 				}
5319 				mask <<= 1;
5320 			}
5321 
5322 			active_cu_number += counter;
5323 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5324 		}
5325 	}
5326 
5327 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5328 
5329 	tmp = RREG32(RLC_MAX_PG_CU);
5330 	tmp &= ~MAX_PU_CU_MASK;
5331 	tmp |= MAX_PU_CU(active_cu_number);
5332 	WREG32(RLC_MAX_PG_CU, tmp);
5333 }
5334 
5335 static void si_enable_cgcg(struct radeon_device *rdev,
5336 			   bool enable)
5337 {
5338 	u32 data, orig, tmp;
5339 
5340 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5341 
5342 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5343 		si_enable_gui_idle_interrupt(rdev, true);
5344 
5345 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5346 
5347 		tmp = si_halt_rlc(rdev);
5348 
5349 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5350 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5351 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5352 
5353 		si_wait_for_rlc_serdes(rdev);
5354 
5355 		si_update_rlc(rdev, tmp);
5356 
5357 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5358 
5359 		data |= CGCG_EN | CGLS_EN;
5360 	} else {
5361 		si_enable_gui_idle_interrupt(rdev, false);
5362 
5363 		RREG32(CB_CGTT_SCLK_CTRL);
5364 		RREG32(CB_CGTT_SCLK_CTRL);
5365 		RREG32(CB_CGTT_SCLK_CTRL);
5366 		RREG32(CB_CGTT_SCLK_CTRL);
5367 
5368 		data &= ~(CGCG_EN | CGLS_EN);
5369 	}
5370 
5371 	if (orig != data)
5372 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5373 }
5374 
5375 static void si_enable_mgcg(struct radeon_device *rdev,
5376 			   bool enable)
5377 {
5378 	u32 data, orig, tmp = 0;
5379 
5380 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5381 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5382 		data = 0x96940200;
5383 		if (orig != data)
5384 			WREG32(CGTS_SM_CTRL_REG, data);
5385 
5386 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5387 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5388 			data |= CP_MEM_LS_EN;
5389 			if (orig != data)
5390 				WREG32(CP_MEM_SLP_CNTL, data);
5391 		}
5392 
5393 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5394 		data &= 0xffffffc0;
5395 		if (orig != data)
5396 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5397 
5398 		tmp = si_halt_rlc(rdev);
5399 
5400 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5401 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5402 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5403 
5404 		si_update_rlc(rdev, tmp);
5405 	} else {
5406 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5407 		data |= 0x00000003;
5408 		if (orig != data)
5409 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5410 
5411 		data = RREG32(CP_MEM_SLP_CNTL);
5412 		if (data & CP_MEM_LS_EN) {
5413 			data &= ~CP_MEM_LS_EN;
5414 			WREG32(CP_MEM_SLP_CNTL, data);
5415 		}
5416 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5417 		data |= LS_OVERRIDE | OVERRIDE;
5418 		if (orig != data)
5419 			WREG32(CGTS_SM_CTRL_REG, data);
5420 
5421 		tmp = si_halt_rlc(rdev);
5422 
5423 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5424 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5425 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5426 
5427 		si_update_rlc(rdev, tmp);
5428 	}
5429 }
5430 
5431 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5432 			       bool enable)
5433 {
5434 	u32 orig, data, tmp;
5435 
5436 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5437 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5438 		tmp |= 0x3fff;
5439 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5440 
5441 		orig = data = RREG32(UVD_CGC_CTRL);
5442 		data |= DCM;
5443 		if (orig != data)
5444 			WREG32(UVD_CGC_CTRL, data);
5445 
5446 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5447 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5448 	} else {
5449 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5450 		tmp &= ~0x3fff;
5451 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5452 
5453 		orig = data = RREG32(UVD_CGC_CTRL);
5454 		data &= ~DCM;
5455 		if (orig != data)
5456 			WREG32(UVD_CGC_CTRL, data);
5457 
5458 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5459 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5460 	}
5461 }
5462 
5463 static const u32 mc_cg_registers[] =
5464 {
5465 	MC_HUB_MISC_HUB_CG,
5466 	MC_HUB_MISC_SIP_CG,
5467 	MC_HUB_MISC_VM_CG,
5468 	MC_XPB_CLK_GAT,
5469 	ATC_MISC_CG,
5470 	MC_CITF_MISC_WR_CG,
5471 	MC_CITF_MISC_RD_CG,
5472 	MC_CITF_MISC_VM_CG,
5473 	VM_L2_CG,
5474 };
5475 
5476 static void si_enable_mc_ls(struct radeon_device *rdev,
5477 			    bool enable)
5478 {
5479 	int i;
5480 	u32 orig, data;
5481 
5482 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5483 		orig = data = RREG32(mc_cg_registers[i]);
5484 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5485 			data |= MC_LS_ENABLE;
5486 		else
5487 			data &= ~MC_LS_ENABLE;
5488 		if (data != orig)
5489 			WREG32(mc_cg_registers[i], data);
5490 	}
5491 }
5492 
5493 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5494 			       bool enable)
5495 {
5496 	int i;
5497 	u32 orig, data;
5498 
5499 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5500 		orig = data = RREG32(mc_cg_registers[i]);
5501 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5502 			data |= MC_CG_ENABLE;
5503 		else
5504 			data &= ~MC_CG_ENABLE;
5505 		if (data != orig)
5506 			WREG32(mc_cg_registers[i], data);
5507 	}
5508 }
5509 
5510 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5511 			       bool enable)
5512 {
5513 	u32 orig, data, offset;
5514 	int i;
5515 
5516 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5517 		for (i = 0; i < 2; i++) {
5518 			if (i == 0)
5519 				offset = DMA0_REGISTER_OFFSET;
5520 			else
5521 				offset = DMA1_REGISTER_OFFSET;
5522 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5523 			data &= ~MEM_POWER_OVERRIDE;
5524 			if (data != orig)
5525 				WREG32(DMA_POWER_CNTL + offset, data);
5526 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5527 		}
5528 	} else {
5529 		for (i = 0; i < 2; i++) {
5530 			if (i == 0)
5531 				offset = DMA0_REGISTER_OFFSET;
5532 			else
5533 				offset = DMA1_REGISTER_OFFSET;
5534 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5535 			data |= MEM_POWER_OVERRIDE;
5536 			if (data != orig)
5537 				WREG32(DMA_POWER_CNTL + offset, data);
5538 
5539 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5540 			data = 0xff000000;
5541 			if (data != orig)
5542 				WREG32(DMA_CLK_CTRL + offset, data);
5543 		}
5544 	}
5545 }
5546 
5547 static void si_enable_bif_mgls(struct radeon_device *rdev,
5548 			       bool enable)
5549 {
5550 	u32 orig, data;
5551 
5552 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5553 
5554 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5555 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5556 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5557 	else
5558 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5559 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5560 
5561 	if (orig != data)
5562 		WREG32_PCIE(PCIE_CNTL2, data);
5563 }
5564 
5565 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5566 			       bool enable)
5567 {
5568 	u32 orig, data;
5569 
5570 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5571 
5572 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5573 		data &= ~CLOCK_GATING_DIS;
5574 	else
5575 		data |= CLOCK_GATING_DIS;
5576 
5577 	if (orig != data)
5578 		WREG32(HDP_HOST_PATH_CNTL, data);
5579 }
5580 
5581 static void si_enable_hdp_ls(struct radeon_device *rdev,
5582 			     bool enable)
5583 {
5584 	u32 orig, data;
5585 
5586 	orig = data = RREG32(HDP_MEM_POWER_LS);
5587 
5588 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5589 		data |= HDP_LS_ENABLE;
5590 	else
5591 		data &= ~HDP_LS_ENABLE;
5592 
5593 	if (orig != data)
5594 		WREG32(HDP_MEM_POWER_LS, data);
5595 }
5596 
5597 static void si_update_cg(struct radeon_device *rdev,
5598 			 u32 block, bool enable)
5599 {
5600 	if (block & RADEON_CG_BLOCK_GFX) {
5601 		si_enable_gui_idle_interrupt(rdev, false);
5602 		/* order matters! */
5603 		if (enable) {
5604 			si_enable_mgcg(rdev, true);
5605 			si_enable_cgcg(rdev, true);
5606 		} else {
5607 			si_enable_cgcg(rdev, false);
5608 			si_enable_mgcg(rdev, false);
5609 		}
5610 		si_enable_gui_idle_interrupt(rdev, true);
5611 	}
5612 
5613 	if (block & RADEON_CG_BLOCK_MC) {
5614 		si_enable_mc_mgcg(rdev, enable);
5615 		si_enable_mc_ls(rdev, enable);
5616 	}
5617 
5618 	if (block & RADEON_CG_BLOCK_SDMA) {
5619 		si_enable_dma_mgcg(rdev, enable);
5620 	}
5621 
5622 	if (block & RADEON_CG_BLOCK_BIF) {
5623 		si_enable_bif_mgls(rdev, enable);
5624 	}
5625 
5626 	if (block & RADEON_CG_BLOCK_UVD) {
5627 		if (rdev->has_uvd) {
5628 			si_enable_uvd_mgcg(rdev, enable);
5629 		}
5630 	}
5631 
5632 	if (block & RADEON_CG_BLOCK_HDP) {
5633 		si_enable_hdp_mgcg(rdev, enable);
5634 		si_enable_hdp_ls(rdev, enable);
5635 	}
5636 }
5637 
5638 static void si_init_cg(struct radeon_device *rdev)
5639 {
5640 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5641 			    RADEON_CG_BLOCK_MC |
5642 			    RADEON_CG_BLOCK_SDMA |
5643 			    RADEON_CG_BLOCK_BIF |
5644 			    RADEON_CG_BLOCK_HDP), true);
5645 	if (rdev->has_uvd) {
5646 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5647 		si_init_uvd_internal_cg(rdev);
5648 	}
5649 }
5650 
5651 static void si_fini_cg(struct radeon_device *rdev)
5652 {
5653 	if (rdev->has_uvd) {
5654 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5655 	}
5656 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5657 			    RADEON_CG_BLOCK_MC |
5658 			    RADEON_CG_BLOCK_SDMA |
5659 			    RADEON_CG_BLOCK_BIF |
5660 			    RADEON_CG_BLOCK_HDP), false);
5661 }
5662 
5663 u32 si_get_csb_size(struct radeon_device *rdev)
5664 {
5665 	u32 count = 0;
5666 	const struct cs_section_def *sect = NULL;
5667 	const struct cs_extent_def *ext = NULL;
5668 
5669 	if (rdev->rlc.cs_data == NULL)
5670 		return 0;
5671 
5672 	/* begin clear state */
5673 	count += 2;
5674 	/* context control state */
5675 	count += 3;
5676 
5677 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5678 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5679 			if (sect->id == SECT_CONTEXT)
5680 				count += 2 + ext->reg_count;
5681 			else
5682 				return 0;
5683 		}
5684 	}
5685 	/* pa_sc_raster_config */
5686 	count += 3;
5687 	/* end clear state */
5688 	count += 2;
5689 	/* clear state */
5690 	count += 2;
5691 
5692 	return count;
5693 }
5694 
5695 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5696 {
5697 	u32 count = 0, i;
5698 	const struct cs_section_def *sect = NULL;
5699 	const struct cs_extent_def *ext = NULL;
5700 
5701 	if (rdev->rlc.cs_data == NULL)
5702 		return;
5703 	if (buffer == NULL)
5704 		return;
5705 
5706 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5707 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5708 
5709 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5710 	buffer[count++] = cpu_to_le32(0x80000000);
5711 	buffer[count++] = cpu_to_le32(0x80000000);
5712 
5713 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5714 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5715 			if (sect->id == SECT_CONTEXT) {
5716 				buffer[count++] =
5717 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5718 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5719 				for (i = 0; i < ext->reg_count; i++)
5720 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5721 			} else {
5722 				return;
5723 			}
5724 		}
5725 	}
5726 
5727 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5728 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5729 	switch (rdev->family) {
5730 	case CHIP_TAHITI:
5731 	case CHIP_PITCAIRN:
5732 		buffer[count++] = cpu_to_le32(0x2a00126a);
5733 		break;
5734 	case CHIP_VERDE:
5735 		buffer[count++] = cpu_to_le32(0x0000124a);
5736 		break;
5737 	case CHIP_OLAND:
5738 		buffer[count++] = cpu_to_le32(0x00000082);
5739 		break;
5740 	case CHIP_HAINAN:
5741 		buffer[count++] = cpu_to_le32(0x00000000);
5742 		break;
5743 	default:
5744 		buffer[count++] = cpu_to_le32(0x00000000);
5745 		break;
5746 	}
5747 
5748 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5749 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5750 
5751 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5752 	buffer[count++] = cpu_to_le32(0);
5753 }
5754 
5755 static void si_init_pg(struct radeon_device *rdev)
5756 {
5757 	if (rdev->pg_flags) {
5758 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5759 			si_init_dma_pg(rdev);
5760 		}
5761 		si_init_ao_cu_mask(rdev);
5762 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5763 			si_init_gfx_cgpg(rdev);
5764 		} else {
5765 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5766 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5767 		}
5768 		si_enable_dma_pg(rdev, true);
5769 		si_enable_gfx_cgpg(rdev, true);
5770 	} else {
5771 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5772 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5773 	}
5774 }
5775 
5776 static void si_fini_pg(struct radeon_device *rdev)
5777 {
5778 	if (rdev->pg_flags) {
5779 		si_enable_dma_pg(rdev, false);
5780 		si_enable_gfx_cgpg(rdev, false);
5781 	}
5782 }
5783 
5784 /*
5785  * RLC
5786  */
5787 void si_rlc_reset(struct radeon_device *rdev)
5788 {
5789 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5790 
5791 	tmp |= SOFT_RESET_RLC;
5792 	WREG32(GRBM_SOFT_RESET, tmp);
5793 	udelay(50);
5794 	tmp &= ~SOFT_RESET_RLC;
5795 	WREG32(GRBM_SOFT_RESET, tmp);
5796 	udelay(50);
5797 }
5798 
5799 static void si_rlc_stop(struct radeon_device *rdev)
5800 {
5801 	WREG32(RLC_CNTL, 0);
5802 
5803 	si_enable_gui_idle_interrupt(rdev, false);
5804 
5805 	si_wait_for_rlc_serdes(rdev);
5806 }
5807 
5808 static void si_rlc_start(struct radeon_device *rdev)
5809 {
5810 	WREG32(RLC_CNTL, RLC_ENABLE);
5811 
5812 	si_enable_gui_idle_interrupt(rdev, true);
5813 
5814 	udelay(50);
5815 }
5816 
5817 static bool si_lbpw_supported(struct radeon_device *rdev)
5818 {
5819 	u32 tmp;
5820 
5821 	/* Enable LBPW only for DDR3 */
5822 	tmp = RREG32(MC_SEQ_MISC0);
5823 	if ((tmp & 0xF0000000) == 0xB0000000)
5824 		return true;
5825 	return false;
5826 }
5827 
5828 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5829 {
5830 	u32 tmp;
5831 
5832 	tmp = RREG32(RLC_LB_CNTL);
5833 	if (enable)
5834 		tmp |= LOAD_BALANCE_ENABLE;
5835 	else
5836 		tmp &= ~LOAD_BALANCE_ENABLE;
5837 	WREG32(RLC_LB_CNTL, tmp);
5838 
5839 	if (!enable) {
5840 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5841 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5842 	}
5843 }
5844 
5845 static int si_rlc_resume(struct radeon_device *rdev)
5846 {
5847 	u32 i;
5848 
5849 	if (!rdev->rlc_fw)
5850 		return -EINVAL;
5851 
5852 	si_rlc_stop(rdev);
5853 
5854 	si_rlc_reset(rdev);
5855 
5856 	si_init_pg(rdev);
5857 
5858 	si_init_cg(rdev);
5859 
5860 	WREG32(RLC_RL_BASE, 0);
5861 	WREG32(RLC_RL_SIZE, 0);
5862 	WREG32(RLC_LB_CNTL, 0);
5863 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5864 	WREG32(RLC_LB_CNTR_INIT, 0);
5865 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5866 
5867 	WREG32(RLC_MC_CNTL, 0);
5868 	WREG32(RLC_UCODE_CNTL, 0);
5869 
5870 	if (rdev->new_fw) {
5871 		const struct rlc_firmware_header_v1_0 *hdr =
5872 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5873 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5874 		const __le32 *fw_data = (const __le32 *)
5875 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5876 
5877 		radeon_ucode_print_rlc_hdr(&hdr->header);
5878 
5879 		for (i = 0; i < fw_size; i++) {
5880 			WREG32(RLC_UCODE_ADDR, i);
5881 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5882 		}
5883 	} else {
5884 		const __be32 *fw_data =
5885 			(const __be32 *)rdev->rlc_fw->data;
5886 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5887 			WREG32(RLC_UCODE_ADDR, i);
5888 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5889 		}
5890 	}
5891 	WREG32(RLC_UCODE_ADDR, 0);
5892 
5893 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5894 
5895 	si_rlc_start(rdev);
5896 
5897 	return 0;
5898 }
5899 
5900 static void si_enable_interrupts(struct radeon_device *rdev)
5901 {
5902 	u32 ih_cntl = RREG32(IH_CNTL);
5903 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5904 
5905 	ih_cntl |= ENABLE_INTR;
5906 	ih_rb_cntl |= IH_RB_ENABLE;
5907 	WREG32(IH_CNTL, ih_cntl);
5908 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5909 	rdev->ih.enabled = true;
5910 }
5911 
5912 static void si_disable_interrupts(struct radeon_device *rdev)
5913 {
5914 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5915 	u32 ih_cntl = RREG32(IH_CNTL);
5916 
5917 	ih_rb_cntl &= ~IH_RB_ENABLE;
5918 	ih_cntl &= ~ENABLE_INTR;
5919 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5920 	WREG32(IH_CNTL, ih_cntl);
5921 	/* set rptr, wptr to 0 */
5922 	WREG32(IH_RB_RPTR, 0);
5923 	WREG32(IH_RB_WPTR, 0);
5924 	rdev->ih.enabled = false;
5925 	rdev->ih.rptr = 0;
5926 }
5927 
5928 static void si_disable_interrupt_state(struct radeon_device *rdev)
5929 {
5930 	int i;
5931 	u32 tmp;
5932 
5933 	tmp = RREG32(CP_INT_CNTL_RING0) &
5934 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5935 	WREG32(CP_INT_CNTL_RING0, tmp);
5936 	WREG32(CP_INT_CNTL_RING1, 0);
5937 	WREG32(CP_INT_CNTL_RING2, 0);
5938 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5939 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5940 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5941 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5942 	WREG32(GRBM_INT_CNTL, 0);
5943 	WREG32(SRBM_INT_CNTL, 0);
5944 	for (i = 0; i < rdev->num_crtc; i++)
5945 		WREG32(INT_MASK + crtc_offsets[i], 0);
5946 	for (i = 0; i < rdev->num_crtc; i++)
5947 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], 0);
5948 
5949 	if (!ASIC_IS_NODCE(rdev)) {
5950 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5951 
5952 		for (i = 0; i < 6; i++)
5953 			WREG32_AND(DC_HPDx_INT_CONTROL(i),
5954 				   DC_HPDx_INT_POLARITY);
5955 	}
5956 }
5957 
5958 static int si_irq_init(struct radeon_device *rdev)
5959 {
5960 	int ret = 0;
5961 	int rb_bufsz;
5962 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5963 
5964 	/* allocate ring */
5965 	ret = r600_ih_ring_alloc(rdev);
5966 	if (ret)
5967 		return ret;
5968 
5969 	/* disable irqs */
5970 	si_disable_interrupts(rdev);
5971 
5972 	/* init rlc */
5973 	ret = si_rlc_resume(rdev);
5974 	if (ret) {
5975 		r600_ih_ring_fini(rdev);
5976 		return ret;
5977 	}
5978 
5979 	/* setup interrupt control */
5980 	/* set dummy read address to dummy page address */
5981 	WREG32(INTERRUPT_CNTL2, rdev->dummy_page.addr >> 8);
5982 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5983 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5984 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5985 	 */
5986 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5987 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5988 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5989 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5990 
5991 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5992 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5993 
5994 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5995 		      IH_WPTR_OVERFLOW_CLEAR |
5996 		      (rb_bufsz << 1));
5997 
5998 	if (rdev->wb.enabled)
5999 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6000 
6001 	/* set the writeback address whether it's enabled or not */
6002 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6003 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6004 
6005 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6006 
6007 	/* set rptr, wptr to 0 */
6008 	WREG32(IH_RB_RPTR, 0);
6009 	WREG32(IH_RB_WPTR, 0);
6010 
6011 	/* Default settings for IH_CNTL (disabled at first) */
6012 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6013 	/* RPTR_REARM only works if msi's are enabled */
6014 	if (rdev->msi_enabled)
6015 		ih_cntl |= RPTR_REARM;
6016 	WREG32(IH_CNTL, ih_cntl);
6017 
6018 	/* force the active interrupt state to all disabled */
6019 	si_disable_interrupt_state(rdev);
6020 
6021 	pci_set_master(rdev->pdev);
6022 
6023 	/* enable irqs */
6024 	si_enable_interrupts(rdev);
6025 
6026 	return ret;
6027 }
6028 
6029 /* The order we write back each register here is important */
6030 int si_irq_set(struct radeon_device *rdev)
6031 {
6032 	int i;
6033 	u32 cp_int_cntl;
6034 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6035 	u32 grbm_int_cntl = 0;
6036 	u32 dma_cntl, dma_cntl1;
6037 	u32 thermal_int = 0;
6038 
6039 	if (!rdev->irq.installed) {
6040 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6041 		return -EINVAL;
6042 	}
6043 	/* don't enable anything if the ih is disabled */
6044 	if (!rdev->ih.enabled) {
6045 		si_disable_interrupts(rdev);
6046 		/* force the active interrupt state to all disabled */
6047 		si_disable_interrupt_state(rdev);
6048 		return 0;
6049 	}
6050 
6051 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6052 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6053 
6054 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6055 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6056 
6057 	thermal_int = RREG32(CG_THERMAL_INT) &
6058 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6059 
6060 	/* enable CP interrupts on all rings */
6061 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6062 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6063 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6064 	}
6065 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6066 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6067 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6068 	}
6069 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6070 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6071 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6072 	}
6073 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6074 		DRM_DEBUG("si_irq_set: sw int dma\n");
6075 		dma_cntl |= TRAP_ENABLE;
6076 	}
6077 
6078 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6079 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6080 		dma_cntl1 |= TRAP_ENABLE;
6081 	}
6082 
6083 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6084 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6085 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6086 
6087 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6088 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6089 
6090 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6091 
6092 	if (rdev->irq.dpm_thermal) {
6093 		DRM_DEBUG("dpm thermal\n");
6094 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6095 	}
6096 
6097 	for (i = 0; i < rdev->num_crtc; i++) {
6098 		radeon_irq_kms_set_irq_n_enabled(
6099 		    rdev, INT_MASK + crtc_offsets[i], VBLANK_INT_MASK,
6100 		    rdev->irq.crtc_vblank_int[i] ||
6101 		    atomic_read(&rdev->irq.pflip[i]), "vblank", i);
6102 	}
6103 
6104 	for (i = 0; i < rdev->num_crtc; i++)
6105 		WREG32(GRPH_INT_CONTROL + crtc_offsets[i], GRPH_PFLIP_INT_MASK);
6106 
6107 	if (!ASIC_IS_NODCE(rdev)) {
6108 		for (i = 0; i < 6; i++) {
6109 			radeon_irq_kms_set_irq_n_enabled(
6110 			    rdev, DC_HPDx_INT_CONTROL(i),
6111 			    DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN,
6112 			    rdev->irq.hpd[i], "HPD", i);
6113 		}
6114 	}
6115 
6116 	WREG32(CG_THERMAL_INT, thermal_int);
6117 
6118 	/* posting read */
6119 	RREG32(SRBM_STATUS);
6120 
6121 	return 0;
6122 }
6123 
6124 /* The order we write back each register here is important */
6125 static inline void si_irq_ack(struct radeon_device *rdev)
6126 {
6127 	int i, j;
6128 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6129 	u32 *grph_int = rdev->irq.stat_regs.evergreen.grph_int;
6130 
6131 	if (ASIC_IS_NODCE(rdev))
6132 		return;
6133 
6134 	for (i = 0; i < 6; i++) {
6135 		disp_int[i] = RREG32(si_disp_int_status[i]);
6136 		if (i < rdev->num_crtc)
6137 			grph_int[i] = RREG32(GRPH_INT_STATUS + crtc_offsets[i]);
6138 	}
6139 
6140 	/* We write back each interrupt register in pairs of two */
6141 	for (i = 0; i < rdev->num_crtc; i += 2) {
6142 		for (j = i; j < (i + 2); j++) {
6143 			if (grph_int[j] & GRPH_PFLIP_INT_OCCURRED)
6144 				WREG32(GRPH_INT_STATUS + crtc_offsets[j],
6145 				       GRPH_PFLIP_INT_CLEAR);
6146 		}
6147 
6148 		for (j = i; j < (i + 2); j++) {
6149 			if (disp_int[j] & LB_D1_VBLANK_INTERRUPT)
6150 				WREG32(VBLANK_STATUS + crtc_offsets[j],
6151 				       VBLANK_ACK);
6152 			if (disp_int[j] & LB_D1_VLINE_INTERRUPT)
6153 				WREG32(VLINE_STATUS + crtc_offsets[j],
6154 				       VLINE_ACK);
6155 		}
6156 	}
6157 
6158 	for (i = 0; i < 6; i++) {
6159 		if (disp_int[i] & DC_HPD1_INTERRUPT)
6160 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_INT_ACK);
6161 	}
6162 
6163 	for (i = 0; i < 6; i++) {
6164 		if (disp_int[i] & DC_HPD1_RX_INTERRUPT)
6165 			WREG32_OR(DC_HPDx_INT_CONTROL(i), DC_HPDx_RX_INT_ACK);
6166 	}
6167 }
6168 
6169 static void si_irq_disable(struct radeon_device *rdev)
6170 {
6171 	si_disable_interrupts(rdev);
6172 	/* Wait and acknowledge irq */
6173 	mdelay(1);
6174 	si_irq_ack(rdev);
6175 	si_disable_interrupt_state(rdev);
6176 }
6177 
6178 static void si_irq_suspend(struct radeon_device *rdev)
6179 {
6180 	si_irq_disable(rdev);
6181 	si_rlc_stop(rdev);
6182 }
6183 
6184 static void si_irq_fini(struct radeon_device *rdev)
6185 {
6186 	si_irq_suspend(rdev);
6187 	r600_ih_ring_fini(rdev);
6188 }
6189 
6190 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6191 {
6192 	u32 wptr, tmp;
6193 
6194 	if (rdev->wb.enabled)
6195 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6196 	else
6197 		wptr = RREG32(IH_RB_WPTR);
6198 
6199 	if (wptr & RB_OVERFLOW) {
6200 		wptr &= ~RB_OVERFLOW;
6201 		/* When a ring buffer overflow happen start parsing interrupt
6202 		 * from the last not overwritten vector (wptr + 16). Hopefully
6203 		 * this should allow us to catchup.
6204 		 */
6205 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6206 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6207 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6208 		tmp = RREG32(IH_RB_CNTL);
6209 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6210 		WREG32(IH_RB_CNTL, tmp);
6211 	}
6212 	return (wptr & rdev->ih.ptr_mask);
6213 }
6214 
6215 /*        SI IV Ring
6216  * Each IV ring entry is 128 bits:
6217  * [7:0]    - interrupt source id
6218  * [31:8]   - reserved
6219  * [59:32]  - interrupt source data
6220  * [63:60]  - reserved
6221  * [71:64]  - RINGID
6222  * [79:72]  - VMID
6223  * [127:80] - reserved
6224  */
6225 int si_irq_process(struct radeon_device *rdev)
6226 {
6227 	u32 *disp_int = rdev->irq.stat_regs.evergreen.disp_int;
6228 	u32 crtc_idx, hpd_idx;
6229 	u32 mask;
6230 	u32 wptr;
6231 	u32 rptr;
6232 	u32 src_id, src_data, ring_id;
6233 	u32 ring_index;
6234 	bool queue_hotplug = false;
6235 	bool queue_dp = false;
6236 	bool queue_thermal = false;
6237 	u32 status, addr;
6238 	const char *event_name;
6239 
6240 	if (!rdev->ih.enabled || rdev->shutdown)
6241 		return IRQ_NONE;
6242 
6243 	wptr = si_get_ih_wptr(rdev);
6244 
6245 restart_ih:
6246 	/* is somebody else already processing irqs? */
6247 	if (atomic_xchg(&rdev->ih.lock, 1))
6248 		return IRQ_NONE;
6249 
6250 	rptr = rdev->ih.rptr;
6251 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6252 
6253 	/* Order reading of wptr vs. reading of IH ring data */
6254 	rmb();
6255 
6256 	/* display interrupts */
6257 	si_irq_ack(rdev);
6258 
6259 	while (rptr != wptr) {
6260 		/* wptr/rptr are in bytes! */
6261 		ring_index = rptr / 4;
6262 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6263 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6264 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6265 
6266 		switch (src_id) {
6267 		case 1: /* D1 vblank/vline */
6268 		case 2: /* D2 vblank/vline */
6269 		case 3: /* D3 vblank/vline */
6270 		case 4: /* D4 vblank/vline */
6271 		case 5: /* D5 vblank/vline */
6272 		case 6: /* D6 vblank/vline */
6273 			crtc_idx = src_id - 1;
6274 
6275 			if (src_data == 0) { /* vblank */
6276 				mask = LB_D1_VBLANK_INTERRUPT;
6277 				event_name = "vblank";
6278 
6279 				if (rdev->irq.crtc_vblank_int[crtc_idx]) {
6280 					drm_handle_vblank(rdev->ddev, crtc_idx);
6281 					rdev->pm.vblank_sync = true;
6282 					wake_up(&rdev->irq.vblank_queue);
6283 				}
6284 				if (atomic_read(&rdev->irq.pflip[crtc_idx])) {
6285 					radeon_crtc_handle_vblank(rdev,
6286 								  crtc_idx);
6287 				}
6288 
6289 			} else if (src_data == 1) { /* vline */
6290 				mask = LB_D1_VLINE_INTERRUPT;
6291 				event_name = "vline";
6292 			} else {
6293 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6294 					  src_id, src_data);
6295 				break;
6296 			}
6297 
6298 			if (!(disp_int[crtc_idx] & mask)) {
6299 				DRM_DEBUG("IH: D%d %s - IH event w/o asserted irq bit?\n",
6300 					  crtc_idx + 1, event_name);
6301 			}
6302 
6303 			disp_int[crtc_idx] &= ~mask;
6304 			DRM_DEBUG("IH: D%d %s\n", crtc_idx + 1, event_name);
6305 
6306 			break;
6307 		case 8: /* D1 page flip */
6308 		case 10: /* D2 page flip */
6309 		case 12: /* D3 page flip */
6310 		case 14: /* D4 page flip */
6311 		case 16: /* D5 page flip */
6312 		case 18: /* D6 page flip */
6313 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6314 			if (radeon_use_pflipirq > 0)
6315 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6316 			break;
6317 		case 42: /* HPD hotplug */
6318 			if (src_data <= 5) {
6319 				hpd_idx = src_data;
6320 				mask = DC_HPD1_INTERRUPT;
6321 				queue_hotplug = true;
6322 				event_name = "HPD";
6323 
6324 			} else if (src_data <= 11) {
6325 				hpd_idx = src_data - 6;
6326 				mask = DC_HPD1_RX_INTERRUPT;
6327 				queue_dp = true;
6328 				event_name = "HPD_RX";
6329 
6330 			} else {
6331 				DRM_DEBUG("Unhandled interrupt: %d %d\n",
6332 					  src_id, src_data);
6333 				break;
6334 			}
6335 
6336 			if (!(disp_int[hpd_idx] & mask))
6337 				DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6338 
6339 			disp_int[hpd_idx] &= ~mask;
6340 			DRM_DEBUG("IH: %s%d\n", event_name, hpd_idx + 1);
6341 			break;
6342 		case 96:
6343 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6344 			WREG32(SRBM_INT_ACK, 0x1);
6345 			break;
6346 		case 124: /* UVD */
6347 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6348 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6349 			break;
6350 		case 146:
6351 		case 147:
6352 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6353 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6354 			/* reset addr and status */
6355 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6356 			if (addr == 0x0 && status == 0x0)
6357 				break;
6358 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6359 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6360 				addr);
6361 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6362 				status);
6363 			si_vm_decode_fault(rdev, status, addr);
6364 			break;
6365 		case 176: /* RINGID0 CP_INT */
6366 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6367 			break;
6368 		case 177: /* RINGID1 CP_INT */
6369 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6370 			break;
6371 		case 178: /* RINGID2 CP_INT */
6372 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6373 			break;
6374 		case 181: /* CP EOP event */
6375 			DRM_DEBUG("IH: CP EOP\n");
6376 			switch (ring_id) {
6377 			case 0:
6378 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6379 				break;
6380 			case 1:
6381 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6382 				break;
6383 			case 2:
6384 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6385 				break;
6386 			}
6387 			break;
6388 		case 224: /* DMA trap event */
6389 			DRM_DEBUG("IH: DMA trap\n");
6390 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6391 			break;
6392 		case 230: /* thermal low to high */
6393 			DRM_DEBUG("IH: thermal low to high\n");
6394 			rdev->pm.dpm.thermal.high_to_low = false;
6395 			queue_thermal = true;
6396 			break;
6397 		case 231: /* thermal high to low */
6398 			DRM_DEBUG("IH: thermal high to low\n");
6399 			rdev->pm.dpm.thermal.high_to_low = true;
6400 			queue_thermal = true;
6401 			break;
6402 		case 233: /* GUI IDLE */
6403 			DRM_DEBUG("IH: GUI idle\n");
6404 			break;
6405 		case 244: /* DMA trap event */
6406 			DRM_DEBUG("IH: DMA1 trap\n");
6407 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6408 			break;
6409 		default:
6410 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6411 			break;
6412 		}
6413 
6414 		/* wptr/rptr are in bytes! */
6415 		rptr += 16;
6416 		rptr &= rdev->ih.ptr_mask;
6417 		WREG32(IH_RB_RPTR, rptr);
6418 	}
6419 	if (queue_dp)
6420 		schedule_work(&rdev->dp_work);
6421 	if (queue_hotplug)
6422 		schedule_delayed_work(&rdev->hotplug_work, 0);
6423 	if (queue_thermal && rdev->pm.dpm_enabled)
6424 		schedule_work(&rdev->pm.dpm.thermal.work);
6425 	rdev->ih.rptr = rptr;
6426 	atomic_set(&rdev->ih.lock, 0);
6427 
6428 	/* make sure wptr hasn't changed while processing */
6429 	wptr = si_get_ih_wptr(rdev);
6430 	if (wptr != rptr)
6431 		goto restart_ih;
6432 
6433 	return IRQ_HANDLED;
6434 }
6435 
6436 /*
6437  * startup/shutdown callbacks
6438  */
6439 static void si_uvd_init(struct radeon_device *rdev)
6440 {
6441 	int r;
6442 
6443 	if (!rdev->has_uvd)
6444 		return;
6445 
6446 	r = radeon_uvd_init(rdev);
6447 	if (r) {
6448 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6449 		/*
6450 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6451 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6452 		 * there. So it is pointless to try to go through that code
6453 		 * hence why we disable uvd here.
6454 		 */
6455 		rdev->has_uvd = false;
6456 		return;
6457 	}
6458 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6459 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6460 }
6461 
6462 static void si_uvd_start(struct radeon_device *rdev)
6463 {
6464 	int r;
6465 
6466 	if (!rdev->has_uvd)
6467 		return;
6468 
6469 	r = uvd_v2_2_resume(rdev);
6470 	if (r) {
6471 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6472 		goto error;
6473 	}
6474 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6475 	if (r) {
6476 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6477 		goto error;
6478 	}
6479 	return;
6480 
6481 error:
6482 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6483 }
6484 
6485 static void si_uvd_resume(struct radeon_device *rdev)
6486 {
6487 	struct radeon_ring *ring;
6488 	int r;
6489 
6490 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6491 		return;
6492 
6493 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6494 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6495 	if (r) {
6496 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6497 		return;
6498 	}
6499 	r = uvd_v1_0_init(rdev);
6500 	if (r) {
6501 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6502 		return;
6503 	}
6504 }
6505 
6506 static void si_vce_init(struct radeon_device *rdev)
6507 {
6508 	int r;
6509 
6510 	if (!rdev->has_vce)
6511 		return;
6512 
6513 	r = radeon_vce_init(rdev);
6514 	if (r) {
6515 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6516 		/*
6517 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6518 		 * to early fails si_vce_start() and thus nothing happens
6519 		 * there. So it is pointless to try to go through that code
6520 		 * hence why we disable vce here.
6521 		 */
6522 		rdev->has_vce = false;
6523 		return;
6524 	}
6525 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6526 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6527 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6528 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6529 }
6530 
6531 static void si_vce_start(struct radeon_device *rdev)
6532 {
6533 	int r;
6534 
6535 	if (!rdev->has_vce)
6536 		return;
6537 
6538 	r = radeon_vce_resume(rdev);
6539 	if (r) {
6540 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6541 		goto error;
6542 	}
6543 	r = vce_v1_0_resume(rdev);
6544 	if (r) {
6545 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6546 		goto error;
6547 	}
6548 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6549 	if (r) {
6550 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6551 		goto error;
6552 	}
6553 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6554 	if (r) {
6555 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6556 		goto error;
6557 	}
6558 	return;
6559 
6560 error:
6561 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6562 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6563 }
6564 
6565 static void si_vce_resume(struct radeon_device *rdev)
6566 {
6567 	struct radeon_ring *ring;
6568 	int r;
6569 
6570 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6571 		return;
6572 
6573 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6574 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6575 	if (r) {
6576 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6577 		return;
6578 	}
6579 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6580 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6581 	if (r) {
6582 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6583 		return;
6584 	}
6585 	r = vce_v1_0_init(rdev);
6586 	if (r) {
6587 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
6588 		return;
6589 	}
6590 }
6591 
6592 static int si_startup(struct radeon_device *rdev)
6593 {
6594 	struct radeon_ring *ring;
6595 	int r;
6596 
6597 	/* enable pcie gen2/3 link */
6598 	si_pcie_gen3_enable(rdev);
6599 	/* enable aspm */
6600 	si_program_aspm(rdev);
6601 
6602 	/* scratch needs to be initialized before MC */
6603 	r = r600_vram_scratch_init(rdev);
6604 	if (r)
6605 		return r;
6606 
6607 	si_mc_program(rdev);
6608 
6609 	if (!rdev->pm.dpm_enabled) {
6610 		r = si_mc_load_microcode(rdev);
6611 		if (r) {
6612 			DRM_ERROR("Failed to load MC firmware!\n");
6613 			return r;
6614 		}
6615 	}
6616 
6617 	r = si_pcie_gart_enable(rdev);
6618 	if (r)
6619 		return r;
6620 	si_gpu_init(rdev);
6621 
6622 	/* allocate rlc buffers */
6623 	if (rdev->family == CHIP_VERDE) {
6624 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6625 		rdev->rlc.reg_list_size =
6626 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6627 	}
6628 	rdev->rlc.cs_data = si_cs_data;
6629 	r = sumo_rlc_init(rdev);
6630 	if (r) {
6631 		DRM_ERROR("Failed to init rlc BOs!\n");
6632 		return r;
6633 	}
6634 
6635 	/* allocate wb buffer */
6636 	r = radeon_wb_init(rdev);
6637 	if (r)
6638 		return r;
6639 
6640 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6641 	if (r) {
6642 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6643 		return r;
6644 	}
6645 
6646 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6647 	if (r) {
6648 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6649 		return r;
6650 	}
6651 
6652 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6653 	if (r) {
6654 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6655 		return r;
6656 	}
6657 
6658 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6659 	if (r) {
6660 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6661 		return r;
6662 	}
6663 
6664 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6665 	if (r) {
6666 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6667 		return r;
6668 	}
6669 
6670 	si_uvd_start(rdev);
6671 	si_vce_start(rdev);
6672 
6673 	/* Enable IRQ */
6674 	if (!rdev->irq.installed) {
6675 		r = radeon_irq_kms_init(rdev);
6676 		if (r)
6677 			return r;
6678 	}
6679 
6680 	r = si_irq_init(rdev);
6681 	if (r) {
6682 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6683 		radeon_irq_kms_fini(rdev);
6684 		return r;
6685 	}
6686 	si_irq_set(rdev);
6687 
6688 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6689 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6690 			     RADEON_CP_PACKET2);
6691 	if (r)
6692 		return r;
6693 
6694 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6695 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6696 			     RADEON_CP_PACKET2);
6697 	if (r)
6698 		return r;
6699 
6700 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6701 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6702 			     RADEON_CP_PACKET2);
6703 	if (r)
6704 		return r;
6705 
6706 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6707 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6708 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6709 	if (r)
6710 		return r;
6711 
6712 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6713 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6714 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6715 	if (r)
6716 		return r;
6717 
6718 	r = si_cp_load_microcode(rdev);
6719 	if (r)
6720 		return r;
6721 	r = si_cp_resume(rdev);
6722 	if (r)
6723 		return r;
6724 
6725 	r = cayman_dma_resume(rdev);
6726 	if (r)
6727 		return r;
6728 
6729 	si_uvd_resume(rdev);
6730 	si_vce_resume(rdev);
6731 
6732 	r = radeon_ib_pool_init(rdev);
6733 	if (r) {
6734 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6735 		return r;
6736 	}
6737 
6738 	r = radeon_vm_manager_init(rdev);
6739 	if (r) {
6740 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6741 		return r;
6742 	}
6743 
6744 	r = radeon_audio_init(rdev);
6745 	if (r)
6746 		return r;
6747 
6748 	return 0;
6749 }
6750 
6751 int si_resume(struct radeon_device *rdev)
6752 {
6753 	int r;
6754 
6755 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6756 	 * posting will perform necessary task to bring back GPU into good
6757 	 * shape.
6758 	 */
6759 	/* post card */
6760 	atom_asic_init(rdev->mode_info.atom_context);
6761 
6762 	/* init golden registers */
6763 	si_init_golden_registers(rdev);
6764 
6765 	if (rdev->pm.pm_method == PM_METHOD_DPM)
6766 		radeon_pm_resume(rdev);
6767 
6768 	rdev->accel_working = true;
6769 	r = si_startup(rdev);
6770 	if (r) {
6771 		DRM_ERROR("si startup failed on resume\n");
6772 		rdev->accel_working = false;
6773 		return r;
6774 	}
6775 
6776 	return r;
6777 
6778 }
6779 
6780 int si_suspend(struct radeon_device *rdev)
6781 {
6782 	radeon_pm_suspend(rdev);
6783 	radeon_audio_fini(rdev);
6784 	radeon_vm_manager_fini(rdev);
6785 	si_cp_enable(rdev, false);
6786 	cayman_dma_stop(rdev);
6787 	if (rdev->has_uvd) {
6788 		radeon_uvd_suspend(rdev);
6789 		uvd_v1_0_fini(rdev);
6790 	}
6791 	if (rdev->has_vce)
6792 		radeon_vce_suspend(rdev);
6793 	si_fini_pg(rdev);
6794 	si_fini_cg(rdev);
6795 	si_irq_suspend(rdev);
6796 	radeon_wb_disable(rdev);
6797 	si_pcie_gart_disable(rdev);
6798 	return 0;
6799 }
6800 
6801 /* Plan is to move initialization in that function and use
6802  * helper function so that radeon_device_init pretty much
6803  * do nothing more than calling asic specific function. This
6804  * should also allow to remove a bunch of callback function
6805  * like vram_info.
6806  */
6807 int si_init(struct radeon_device *rdev)
6808 {
6809 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6810 	int r;
6811 
6812 	/* Read BIOS */
6813 	if (!radeon_get_bios(rdev)) {
6814 		if (ASIC_IS_AVIVO(rdev))
6815 			return -EINVAL;
6816 	}
6817 	/* Must be an ATOMBIOS */
6818 	if (!rdev->is_atom_bios) {
6819 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6820 		return -EINVAL;
6821 	}
6822 	r = radeon_atombios_init(rdev);
6823 	if (r)
6824 		return r;
6825 
6826 	/* Post card if necessary */
6827 	if (!radeon_card_posted(rdev)) {
6828 		if (!rdev->bios) {
6829 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6830 			return -EINVAL;
6831 		}
6832 		DRM_INFO("GPU not posted. posting now...\n");
6833 		atom_asic_init(rdev->mode_info.atom_context);
6834 	}
6835 	/* init golden registers */
6836 	si_init_golden_registers(rdev);
6837 	/* Initialize scratch registers */
6838 	si_scratch_init(rdev);
6839 	/* Initialize surface registers */
6840 	radeon_surface_init(rdev);
6841 	/* Initialize clocks */
6842 	radeon_get_clock_info(rdev->ddev);
6843 
6844 	/* Fence driver */
6845 	radeon_fence_driver_init(rdev);
6846 
6847 	/* initialize memory controller */
6848 	r = si_mc_init(rdev);
6849 	if (r)
6850 		return r;
6851 	/* Memory manager */
6852 	r = radeon_bo_init(rdev);
6853 	if (r)
6854 		return r;
6855 
6856 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6857 	    !rdev->rlc_fw || !rdev->mc_fw) {
6858 		r = si_init_microcode(rdev);
6859 		if (r) {
6860 			DRM_ERROR("Failed to load firmware!\n");
6861 			return r;
6862 		}
6863 	}
6864 
6865 	/* Initialize power management */
6866 	radeon_pm_init(rdev);
6867 
6868 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6869 	ring->ring_obj = NULL;
6870 	r600_ring_init(rdev, ring, 1024 * 1024);
6871 
6872 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6873 	ring->ring_obj = NULL;
6874 	r600_ring_init(rdev, ring, 1024 * 1024);
6875 
6876 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6877 	ring->ring_obj = NULL;
6878 	r600_ring_init(rdev, ring, 1024 * 1024);
6879 
6880 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6881 	ring->ring_obj = NULL;
6882 	r600_ring_init(rdev, ring, 64 * 1024);
6883 
6884 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6885 	ring->ring_obj = NULL;
6886 	r600_ring_init(rdev, ring, 64 * 1024);
6887 
6888 	si_uvd_init(rdev);
6889 	si_vce_init(rdev);
6890 
6891 	rdev->ih.ring_obj = NULL;
6892 	r600_ih_ring_init(rdev, 64 * 1024);
6893 
6894 	r = r600_pcie_gart_init(rdev);
6895 	if (r)
6896 		return r;
6897 
6898 	rdev->accel_working = true;
6899 	r = si_startup(rdev);
6900 	if (r) {
6901 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6902 		si_cp_fini(rdev);
6903 		cayman_dma_fini(rdev);
6904 		si_irq_fini(rdev);
6905 		sumo_rlc_fini(rdev);
6906 		radeon_wb_fini(rdev);
6907 		radeon_ib_pool_fini(rdev);
6908 		radeon_vm_manager_fini(rdev);
6909 		radeon_irq_kms_fini(rdev);
6910 		si_pcie_gart_fini(rdev);
6911 		rdev->accel_working = false;
6912 	}
6913 
6914 	/* Don't start up if the MC ucode is missing.
6915 	 * The default clocks and voltages before the MC ucode
6916 	 * is loaded are not suffient for advanced operations.
6917 	 */
6918 	if (!rdev->mc_fw) {
6919 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6920 		return -EINVAL;
6921 	}
6922 
6923 	return 0;
6924 }
6925 
6926 void si_fini(struct radeon_device *rdev)
6927 {
6928 	radeon_pm_fini(rdev);
6929 	si_cp_fini(rdev);
6930 	cayman_dma_fini(rdev);
6931 	si_fini_pg(rdev);
6932 	si_fini_cg(rdev);
6933 	si_irq_fini(rdev);
6934 	sumo_rlc_fini(rdev);
6935 	radeon_wb_fini(rdev);
6936 	radeon_vm_manager_fini(rdev);
6937 	radeon_ib_pool_fini(rdev);
6938 	radeon_irq_kms_fini(rdev);
6939 	if (rdev->has_uvd) {
6940 		uvd_v1_0_fini(rdev);
6941 		radeon_uvd_fini(rdev);
6942 	}
6943 	if (rdev->has_vce)
6944 		radeon_vce_fini(rdev);
6945 	si_pcie_gart_fini(rdev);
6946 	r600_vram_scratch_fini(rdev);
6947 	radeon_gem_fini(rdev);
6948 	radeon_fence_driver_fini(rdev);
6949 	radeon_bo_fini(rdev);
6950 	radeon_atombios_fini(rdev);
6951 	kfree(rdev->bios);
6952 	rdev->bios = NULL;
6953 }
6954 
6955 /**
6956  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6957  *
6958  * @rdev: radeon_device pointer
6959  *
6960  * Fetches a GPU clock counter snapshot (SI).
6961  * Returns the 64 bit clock counter snapshot.
6962  */
6963 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6964 {
6965 	uint64_t clock;
6966 
6967 	mutex_lock(&rdev->gpu_clock_mutex);
6968 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6969 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6970 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6971 	mutex_unlock(&rdev->gpu_clock_mutex);
6972 	return clock;
6973 }
6974 
6975 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6976 {
6977 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6978 	int r;
6979 
6980 	/* bypass vclk and dclk with bclk */
6981 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6982 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6983 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6984 
6985 	/* put PLL in bypass mode */
6986 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6987 
6988 	if (!vclk || !dclk) {
6989 		/* keep the Bypass mode */
6990 		return 0;
6991 	}
6992 
6993 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6994 					  16384, 0x03FFFFFF, 0, 128, 5,
6995 					  &fb_div, &vclk_div, &dclk_div);
6996 	if (r)
6997 		return r;
6998 
6999 	/* set RESET_ANTI_MUX to 0 */
7000 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7001 
7002 	/* set VCO_MODE to 1 */
7003 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7004 
7005 	/* disable sleep mode */
7006 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7007 
7008 	/* deassert UPLL_RESET */
7009 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7010 
7011 	mdelay(1);
7012 
7013 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7014 	if (r)
7015 		return r;
7016 
7017 	/* assert UPLL_RESET again */
7018 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7019 
7020 	/* disable spread spectrum. */
7021 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7022 
7023 	/* set feedback divider */
7024 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7025 
7026 	/* set ref divider to 0 */
7027 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7028 
7029 	if (fb_div < 307200)
7030 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7031 	else
7032 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7033 
7034 	/* set PDIV_A and PDIV_B */
7035 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7036 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7037 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7038 
7039 	/* give the PLL some time to settle */
7040 	mdelay(15);
7041 
7042 	/* deassert PLL_RESET */
7043 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7044 
7045 	mdelay(15);
7046 
7047 	/* switch from bypass mode to normal mode */
7048 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7049 
7050 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7051 	if (r)
7052 		return r;
7053 
7054 	/* switch VCLK and DCLK selection */
7055 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7056 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7057 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7058 
7059 	mdelay(100);
7060 
7061 	return 0;
7062 }
7063 
7064 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7065 {
7066 	struct pci_dev *root = rdev->pdev->bus->self;
7067 	enum pci_bus_speed speed_cap;
7068 	u32 speed_cntl, current_data_rate;
7069 	int i;
7070 	u16 tmp16;
7071 
7072 	if (pci_is_root_bus(rdev->pdev->bus))
7073 		return;
7074 
7075 	if (radeon_pcie_gen2 == 0)
7076 		return;
7077 
7078 	if (rdev->flags & RADEON_IS_IGP)
7079 		return;
7080 
7081 	if (!(rdev->flags & RADEON_IS_PCIE))
7082 		return;
7083 
7084 	speed_cap = pcie_get_speed_cap(root);
7085 	if (speed_cap == PCI_SPEED_UNKNOWN)
7086 		return;
7087 
7088 	if ((speed_cap != PCIE_SPEED_8_0GT) &&
7089 	    (speed_cap != PCIE_SPEED_5_0GT))
7090 		return;
7091 
7092 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7093 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7094 		LC_CURRENT_DATA_RATE_SHIFT;
7095 	if (speed_cap == PCIE_SPEED_8_0GT) {
7096 		if (current_data_rate == 2) {
7097 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7098 			return;
7099 		}
7100 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7101 	} else if (speed_cap == PCIE_SPEED_5_0GT) {
7102 		if (current_data_rate == 1) {
7103 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7104 			return;
7105 		}
7106 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7107 	}
7108 
7109 	if (!pci_is_pcie(root) || !pci_is_pcie(rdev->pdev))
7110 		return;
7111 
7112 	if (speed_cap == PCIE_SPEED_8_0GT) {
7113 		/* re-try equalization if gen3 is not already enabled */
7114 		if (current_data_rate != 2) {
7115 			u16 bridge_cfg, gpu_cfg;
7116 			u16 bridge_cfg2, gpu_cfg2;
7117 			u32 max_lw, current_lw, tmp;
7118 
7119 			pcie_capability_set_word(root, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
7120 			pcie_capability_set_word(rdev->pdev, PCI_EXP_LNKCTL, PCI_EXP_LNKCTL_HAWD);
7121 
7122 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7123 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7124 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7125 
7126 			if (current_lw < max_lw) {
7127 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7128 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7129 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7130 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7131 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7132 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7133 				}
7134 			}
7135 
7136 			for (i = 0; i < 10; i++) {
7137 				/* check status */
7138 				pcie_capability_read_word(rdev->pdev,
7139 							  PCI_EXP_DEVSTA,
7140 							  &tmp16);
7141 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7142 					break;
7143 
7144 				pcie_capability_read_word(root, PCI_EXP_LNKCTL,
7145 							  &bridge_cfg);
7146 				pcie_capability_read_word(rdev->pdev,
7147 							  PCI_EXP_LNKCTL,
7148 							  &gpu_cfg);
7149 
7150 				pcie_capability_read_word(root, PCI_EXP_LNKCTL2,
7151 							  &bridge_cfg2);
7152 				pcie_capability_read_word(rdev->pdev,
7153 							  PCI_EXP_LNKCTL2,
7154 							  &gpu_cfg2);
7155 
7156 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7157 				tmp |= LC_SET_QUIESCE;
7158 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7159 
7160 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7161 				tmp |= LC_REDO_EQ;
7162 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7163 
7164 				msleep(100);
7165 
7166 				/* linkctl */
7167 				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL,
7168 								   PCI_EXP_LNKCTL_HAWD,
7169 								   bridge_cfg &
7170 								   PCI_EXP_LNKCTL_HAWD);
7171 				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL,
7172 								   PCI_EXP_LNKCTL_HAWD,
7173 								   gpu_cfg &
7174 								   PCI_EXP_LNKCTL_HAWD);
7175 
7176 				/* linkctl2 */
7177 				pcie_capability_clear_and_set_word(root, PCI_EXP_LNKCTL2,
7178 								   PCI_EXP_LNKCTL2_ENTER_COMP |
7179 								   PCI_EXP_LNKCTL2_TX_MARGIN,
7180 								   bridge_cfg2 &
7181 								   (PCI_EXP_LNKCTL2_ENTER_COMP |
7182 								    PCI_EXP_LNKCTL2_TX_MARGIN));
7183 				pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
7184 								   PCI_EXP_LNKCTL2_ENTER_COMP |
7185 								   PCI_EXP_LNKCTL2_TX_MARGIN,
7186 								   gpu_cfg2 &
7187 								   (PCI_EXP_LNKCTL2_ENTER_COMP |
7188 								    PCI_EXP_LNKCTL2_TX_MARGIN));
7189 
7190 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7191 				tmp &= ~LC_SET_QUIESCE;
7192 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7193 			}
7194 		}
7195 	}
7196 
7197 	/* set the link speed */
7198 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7199 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7200 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7201 
7202 	tmp16 = 0;
7203 	if (speed_cap == PCIE_SPEED_8_0GT)
7204 		tmp16 |= PCI_EXP_LNKCTL2_TLS_8_0GT; /* gen3 */
7205 	else if (speed_cap == PCIE_SPEED_5_0GT)
7206 		tmp16 |= PCI_EXP_LNKCTL2_TLS_5_0GT; /* gen2 */
7207 	else
7208 		tmp16 |= PCI_EXP_LNKCTL2_TLS_2_5GT; /* gen1 */
7209 	pcie_capability_clear_and_set_word(rdev->pdev, PCI_EXP_LNKCTL2,
7210 					   PCI_EXP_LNKCTL2_TLS, tmp16);
7211 
7212 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7213 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7214 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7215 
7216 	for (i = 0; i < rdev->usec_timeout; i++) {
7217 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7218 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7219 			break;
7220 		udelay(1);
7221 	}
7222 }
7223 
7224 static void si_program_aspm(struct radeon_device *rdev)
7225 {
7226 	u32 data, orig;
7227 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7228 	bool disable_clkreq = false;
7229 
7230 	if (radeon_aspm == 0)
7231 		return;
7232 
7233 	if (!(rdev->flags & RADEON_IS_PCIE))
7234 		return;
7235 
7236 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7237 	data &= ~LC_XMIT_N_FTS_MASK;
7238 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7239 	if (orig != data)
7240 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7241 
7242 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7243 	data |= LC_GO_TO_RECOVERY;
7244 	if (orig != data)
7245 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7246 
7247 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7248 	data |= P_IGNORE_EDB_ERR;
7249 	if (orig != data)
7250 		WREG32_PCIE(PCIE_P_CNTL, data);
7251 
7252 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7253 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7254 	data |= LC_PMI_TO_L1_DIS;
7255 	if (!disable_l0s)
7256 		data |= LC_L0S_INACTIVITY(7);
7257 
7258 	if (!disable_l1) {
7259 		data |= LC_L1_INACTIVITY(7);
7260 		data &= ~LC_PMI_TO_L1_DIS;
7261 		if (orig != data)
7262 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7263 
7264 		if (!disable_plloff_in_l1) {
7265 			bool clk_req_support;
7266 
7267 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7268 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7269 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7270 			if (orig != data)
7271 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7272 
7273 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7274 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7275 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7276 			if (orig != data)
7277 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7278 
7279 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7280 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7281 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7282 			if (orig != data)
7283 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7284 
7285 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7286 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7287 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7288 			if (orig != data)
7289 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7290 
7291 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7292 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7293 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7294 				if (orig != data)
7295 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7296 
7297 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7298 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7299 				if (orig != data)
7300 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7301 
7302 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7303 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7304 				if (orig != data)
7305 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7306 
7307 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7308 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7309 				if (orig != data)
7310 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7311 
7312 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7313 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7314 				if (orig != data)
7315 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7316 
7317 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7318 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7319 				if (orig != data)
7320 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7321 
7322 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7323 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7324 				if (orig != data)
7325 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7326 
7327 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7328 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7329 				if (orig != data)
7330 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7331 			}
7332 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7333 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7334 			data |= LC_DYN_LANES_PWR_STATE(3);
7335 			if (orig != data)
7336 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7337 
7338 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7339 			data &= ~LS2_EXIT_TIME_MASK;
7340 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7341 				data |= LS2_EXIT_TIME(5);
7342 			if (orig != data)
7343 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7344 
7345 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7346 			data &= ~LS2_EXIT_TIME_MASK;
7347 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7348 				data |= LS2_EXIT_TIME(5);
7349 			if (orig != data)
7350 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7351 
7352 			if (!disable_clkreq &&
7353 			    !pci_is_root_bus(rdev->pdev->bus)) {
7354 				struct pci_dev *root = rdev->pdev->bus->self;
7355 				u32 lnkcap;
7356 
7357 				clk_req_support = false;
7358 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7359 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7360 					clk_req_support = true;
7361 			} else {
7362 				clk_req_support = false;
7363 			}
7364 
7365 			if (clk_req_support) {
7366 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7367 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7368 				if (orig != data)
7369 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7370 
7371 				orig = data = RREG32(THM_CLK_CNTL);
7372 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7373 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7374 				if (orig != data)
7375 					WREG32(THM_CLK_CNTL, data);
7376 
7377 				orig = data = RREG32(MISC_CLK_CNTL);
7378 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7379 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7380 				if (orig != data)
7381 					WREG32(MISC_CLK_CNTL, data);
7382 
7383 				orig = data = RREG32(CG_CLKPIN_CNTL);
7384 				data &= ~BCLK_AS_XCLK;
7385 				if (orig != data)
7386 					WREG32(CG_CLKPIN_CNTL, data);
7387 
7388 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7389 				data &= ~FORCE_BIF_REFCLK_EN;
7390 				if (orig != data)
7391 					WREG32(CG_CLKPIN_CNTL_2, data);
7392 
7393 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7394 				data &= ~MPLL_CLKOUT_SEL_MASK;
7395 				data |= MPLL_CLKOUT_SEL(4);
7396 				if (orig != data)
7397 					WREG32(MPLL_BYPASSCLK_SEL, data);
7398 
7399 				orig = data = RREG32(SPLL_CNTL_MODE);
7400 				data &= ~SPLL_REFCLK_SEL_MASK;
7401 				if (orig != data)
7402 					WREG32(SPLL_CNTL_MODE, data);
7403 			}
7404 		}
7405 	} else {
7406 		if (orig != data)
7407 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7408 	}
7409 
7410 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7411 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7412 	if (orig != data)
7413 		WREG32_PCIE(PCIE_CNTL2, data);
7414 
7415 	if (!disable_l0s) {
7416 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7417 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7418 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7419 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7420 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7421 				data &= ~LC_L0S_INACTIVITY_MASK;
7422 				if (orig != data)
7423 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7424 			}
7425 		}
7426 	}
7427 }
7428 
7429 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7430 {
7431 	unsigned i;
7432 
7433 	/* make sure VCEPLL_CTLREQ is deasserted */
7434 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7435 
7436 	mdelay(10);
7437 
7438 	/* assert UPLL_CTLREQ */
7439 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7440 
7441 	/* wait for CTLACK and CTLACK2 to get asserted */
7442 	for (i = 0; i < 100; ++i) {
7443 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7444 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7445 			break;
7446 		mdelay(10);
7447 	}
7448 
7449 	/* deassert UPLL_CTLREQ */
7450 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7451 
7452 	if (i == 100) {
7453 		DRM_ERROR("Timeout setting UVD clocks!\n");
7454 		return -ETIMEDOUT;
7455 	}
7456 
7457 	return 0;
7458 }
7459 
7460 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7461 {
7462 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7463 	int r;
7464 
7465 	/* bypass evclk and ecclk with bclk */
7466 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7467 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7468 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7469 
7470 	/* put PLL in bypass mode */
7471 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7472 		     ~VCEPLL_BYPASS_EN_MASK);
7473 
7474 	if (!evclk || !ecclk) {
7475 		/* keep the Bypass mode, put PLL to sleep */
7476 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7477 			     ~VCEPLL_SLEEP_MASK);
7478 		return 0;
7479 	}
7480 
7481 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7482 					  16384, 0x03FFFFFF, 0, 128, 5,
7483 					  &fb_div, &evclk_div, &ecclk_div);
7484 	if (r)
7485 		return r;
7486 
7487 	/* set RESET_ANTI_MUX to 0 */
7488 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7489 
7490 	/* set VCO_MODE to 1 */
7491 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7492 		     ~VCEPLL_VCO_MODE_MASK);
7493 
7494 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7495 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7496 		     ~VCEPLL_SLEEP_MASK);
7497 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7498 
7499 	/* deassert VCEPLL_RESET */
7500 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7501 
7502 	mdelay(1);
7503 
7504 	r = si_vce_send_vcepll_ctlreq(rdev);
7505 	if (r)
7506 		return r;
7507 
7508 	/* assert VCEPLL_RESET again */
7509 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7510 
7511 	/* disable spread spectrum. */
7512 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7513 
7514 	/* set feedback divider */
7515 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7516 
7517 	/* set ref divider to 0 */
7518 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7519 
7520 	/* set PDIV_A and PDIV_B */
7521 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7522 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7523 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7524 
7525 	/* give the PLL some time to settle */
7526 	mdelay(15);
7527 
7528 	/* deassert PLL_RESET */
7529 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7530 
7531 	mdelay(15);
7532 
7533 	/* switch from bypass mode to normal mode */
7534 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7535 
7536 	r = si_vce_send_vcepll_ctlreq(rdev);
7537 	if (r)
7538 		return r;
7539 
7540 	/* switch VCLK and DCLK selection */
7541 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7542 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7543 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7544 
7545 	mdelay(100);
7546 
7547 	return 0;
7548 }
7549