xref: /linux/drivers/gpu/drm/radeon/si.c (revision 69fb09f6ccdb2f070557fd1f4c56c4d646694c8e)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include "radeon_audio.h"
31 #include <drm/radeon_drm.h>
32 #include "sid.h"
33 #include "atom.h"
34 #include "si_blit_shaders.h"
35 #include "clearstate_si.h"
36 #include "radeon_ucode.h"
37 
38 
39 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_mc2.bin");
44 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
45 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
46 
47 MODULE_FIRMWARE("radeon/tahiti_pfp.bin");
48 MODULE_FIRMWARE("radeon/tahiti_me.bin");
49 MODULE_FIRMWARE("radeon/tahiti_ce.bin");
50 MODULE_FIRMWARE("radeon/tahiti_mc.bin");
51 MODULE_FIRMWARE("radeon/tahiti_rlc.bin");
52 MODULE_FIRMWARE("radeon/tahiti_smc.bin");
53 
54 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
55 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
56 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
57 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
58 MODULE_FIRMWARE("radeon/PITCAIRN_mc2.bin");
59 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
60 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
61 
62 MODULE_FIRMWARE("radeon/pitcairn_pfp.bin");
63 MODULE_FIRMWARE("radeon/pitcairn_me.bin");
64 MODULE_FIRMWARE("radeon/pitcairn_ce.bin");
65 MODULE_FIRMWARE("radeon/pitcairn_mc.bin");
66 MODULE_FIRMWARE("radeon/pitcairn_rlc.bin");
67 MODULE_FIRMWARE("radeon/pitcairn_smc.bin");
68 MODULE_FIRMWARE("radeon/pitcairn_k_smc.bin");
69 
70 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
71 MODULE_FIRMWARE("radeon/VERDE_me.bin");
72 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
73 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
74 MODULE_FIRMWARE("radeon/VERDE_mc2.bin");
75 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
76 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
77 
78 MODULE_FIRMWARE("radeon/verde_pfp.bin");
79 MODULE_FIRMWARE("radeon/verde_me.bin");
80 MODULE_FIRMWARE("radeon/verde_ce.bin");
81 MODULE_FIRMWARE("radeon/verde_mc.bin");
82 MODULE_FIRMWARE("radeon/verde_rlc.bin");
83 MODULE_FIRMWARE("radeon/verde_smc.bin");
84 MODULE_FIRMWARE("radeon/verde_k_smc.bin");
85 
86 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
87 MODULE_FIRMWARE("radeon/OLAND_me.bin");
88 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
89 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
90 MODULE_FIRMWARE("radeon/OLAND_mc2.bin");
91 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
92 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
93 
94 MODULE_FIRMWARE("radeon/oland_pfp.bin");
95 MODULE_FIRMWARE("radeon/oland_me.bin");
96 MODULE_FIRMWARE("radeon/oland_ce.bin");
97 MODULE_FIRMWARE("radeon/oland_mc.bin");
98 MODULE_FIRMWARE("radeon/oland_rlc.bin");
99 MODULE_FIRMWARE("radeon/oland_smc.bin");
100 MODULE_FIRMWARE("radeon/oland_k_smc.bin");
101 
102 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
103 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
104 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
105 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
106 MODULE_FIRMWARE("radeon/HAINAN_mc2.bin");
107 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
108 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
109 
110 MODULE_FIRMWARE("radeon/hainan_pfp.bin");
111 MODULE_FIRMWARE("radeon/hainan_me.bin");
112 MODULE_FIRMWARE("radeon/hainan_ce.bin");
113 MODULE_FIRMWARE("radeon/hainan_mc.bin");
114 MODULE_FIRMWARE("radeon/hainan_rlc.bin");
115 MODULE_FIRMWARE("radeon/hainan_smc.bin");
116 MODULE_FIRMWARE("radeon/hainan_k_smc.bin");
117 MODULE_FIRMWARE("radeon/banks_k_2_smc.bin");
118 
119 MODULE_FIRMWARE("radeon/si58_mc.bin");
120 
121 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh);
122 static void si_pcie_gen3_enable(struct radeon_device *rdev);
123 static void si_program_aspm(struct radeon_device *rdev);
124 extern void sumo_rlc_fini(struct radeon_device *rdev);
125 extern int sumo_rlc_init(struct radeon_device *rdev);
126 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
127 extern void r600_ih_ring_fini(struct radeon_device *rdev);
128 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
129 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
130 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
131 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
132 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
133 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
134 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
135 					 bool enable);
136 static void si_init_pg(struct radeon_device *rdev);
137 static void si_init_cg(struct radeon_device *rdev);
138 static void si_fini_pg(struct radeon_device *rdev);
139 static void si_fini_cg(struct radeon_device *rdev);
140 static void si_rlc_stop(struct radeon_device *rdev);
141 
142 static const u32 verde_rlc_save_restore_register_list[] =
143 {
144 	(0x8000 << 16) | (0x98f4 >> 2),
145 	0x00000000,
146 	(0x8040 << 16) | (0x98f4 >> 2),
147 	0x00000000,
148 	(0x8000 << 16) | (0xe80 >> 2),
149 	0x00000000,
150 	(0x8040 << 16) | (0xe80 >> 2),
151 	0x00000000,
152 	(0x8000 << 16) | (0x89bc >> 2),
153 	0x00000000,
154 	(0x8040 << 16) | (0x89bc >> 2),
155 	0x00000000,
156 	(0x8000 << 16) | (0x8c1c >> 2),
157 	0x00000000,
158 	(0x8040 << 16) | (0x8c1c >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x98f0 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0xe7c >> 2),
163 	0x00000000,
164 	(0x8000 << 16) | (0x9148 >> 2),
165 	0x00000000,
166 	(0x8040 << 16) | (0x9148 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9150 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x897c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x8d8c >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0xac54 >> 2),
175 	0X00000000,
176 	0x3,
177 	(0x9c00 << 16) | (0x98f8 >> 2),
178 	0x00000000,
179 	(0x9c00 << 16) | (0x9910 >> 2),
180 	0x00000000,
181 	(0x9c00 << 16) | (0x9914 >> 2),
182 	0x00000000,
183 	(0x9c00 << 16) | (0x9918 >> 2),
184 	0x00000000,
185 	(0x9c00 << 16) | (0x991c >> 2),
186 	0x00000000,
187 	(0x9c00 << 16) | (0x9920 >> 2),
188 	0x00000000,
189 	(0x9c00 << 16) | (0x9924 >> 2),
190 	0x00000000,
191 	(0x9c00 << 16) | (0x9928 >> 2),
192 	0x00000000,
193 	(0x9c00 << 16) | (0x992c >> 2),
194 	0x00000000,
195 	(0x9c00 << 16) | (0x9930 >> 2),
196 	0x00000000,
197 	(0x9c00 << 16) | (0x9934 >> 2),
198 	0x00000000,
199 	(0x9c00 << 16) | (0x9938 >> 2),
200 	0x00000000,
201 	(0x9c00 << 16) | (0x993c >> 2),
202 	0x00000000,
203 	(0x9c00 << 16) | (0x9940 >> 2),
204 	0x00000000,
205 	(0x9c00 << 16) | (0x9944 >> 2),
206 	0x00000000,
207 	(0x9c00 << 16) | (0x9948 >> 2),
208 	0x00000000,
209 	(0x9c00 << 16) | (0x994c >> 2),
210 	0x00000000,
211 	(0x9c00 << 16) | (0x9950 >> 2),
212 	0x00000000,
213 	(0x9c00 << 16) | (0x9954 >> 2),
214 	0x00000000,
215 	(0x9c00 << 16) | (0x9958 >> 2),
216 	0x00000000,
217 	(0x9c00 << 16) | (0x995c >> 2),
218 	0x00000000,
219 	(0x9c00 << 16) | (0x9960 >> 2),
220 	0x00000000,
221 	(0x9c00 << 16) | (0x9964 >> 2),
222 	0x00000000,
223 	(0x9c00 << 16) | (0x9968 >> 2),
224 	0x00000000,
225 	(0x9c00 << 16) | (0x996c >> 2),
226 	0x00000000,
227 	(0x9c00 << 16) | (0x9970 >> 2),
228 	0x00000000,
229 	(0x9c00 << 16) | (0x9974 >> 2),
230 	0x00000000,
231 	(0x9c00 << 16) | (0x9978 >> 2),
232 	0x00000000,
233 	(0x9c00 << 16) | (0x997c >> 2),
234 	0x00000000,
235 	(0x9c00 << 16) | (0x9980 >> 2),
236 	0x00000000,
237 	(0x9c00 << 16) | (0x9984 >> 2),
238 	0x00000000,
239 	(0x9c00 << 16) | (0x9988 >> 2),
240 	0x00000000,
241 	(0x9c00 << 16) | (0x998c >> 2),
242 	0x00000000,
243 	(0x9c00 << 16) | (0x8c00 >> 2),
244 	0x00000000,
245 	(0x9c00 << 16) | (0x8c14 >> 2),
246 	0x00000000,
247 	(0x9c00 << 16) | (0x8c04 >> 2),
248 	0x00000000,
249 	(0x9c00 << 16) | (0x8c08 >> 2),
250 	0x00000000,
251 	(0x8000 << 16) | (0x9b7c >> 2),
252 	0x00000000,
253 	(0x8040 << 16) | (0x9b7c >> 2),
254 	0x00000000,
255 	(0x8000 << 16) | (0xe84 >> 2),
256 	0x00000000,
257 	(0x8040 << 16) | (0xe84 >> 2),
258 	0x00000000,
259 	(0x8000 << 16) | (0x89c0 >> 2),
260 	0x00000000,
261 	(0x8040 << 16) | (0x89c0 >> 2),
262 	0x00000000,
263 	(0x8000 << 16) | (0x914c >> 2),
264 	0x00000000,
265 	(0x8040 << 16) | (0x914c >> 2),
266 	0x00000000,
267 	(0x8000 << 16) | (0x8c20 >> 2),
268 	0x00000000,
269 	(0x8040 << 16) | (0x8c20 >> 2),
270 	0x00000000,
271 	(0x8000 << 16) | (0x9354 >> 2),
272 	0x00000000,
273 	(0x8040 << 16) | (0x9354 >> 2),
274 	0x00000000,
275 	(0x9c00 << 16) | (0x9060 >> 2),
276 	0x00000000,
277 	(0x9c00 << 16) | (0x9364 >> 2),
278 	0x00000000,
279 	(0x9c00 << 16) | (0x9100 >> 2),
280 	0x00000000,
281 	(0x9c00 << 16) | (0x913c >> 2),
282 	0x00000000,
283 	(0x8000 << 16) | (0x90e0 >> 2),
284 	0x00000000,
285 	(0x8000 << 16) | (0x90e4 >> 2),
286 	0x00000000,
287 	(0x8000 << 16) | (0x90e8 >> 2),
288 	0x00000000,
289 	(0x8040 << 16) | (0x90e0 >> 2),
290 	0x00000000,
291 	(0x8040 << 16) | (0x90e4 >> 2),
292 	0x00000000,
293 	(0x8040 << 16) | (0x90e8 >> 2),
294 	0x00000000,
295 	(0x9c00 << 16) | (0x8bcc >> 2),
296 	0x00000000,
297 	(0x9c00 << 16) | (0x8b24 >> 2),
298 	0x00000000,
299 	(0x9c00 << 16) | (0x88c4 >> 2),
300 	0x00000000,
301 	(0x9c00 << 16) | (0x8e50 >> 2),
302 	0x00000000,
303 	(0x9c00 << 16) | (0x8c0c >> 2),
304 	0x00000000,
305 	(0x9c00 << 16) | (0x8e58 >> 2),
306 	0x00000000,
307 	(0x9c00 << 16) | (0x8e5c >> 2),
308 	0x00000000,
309 	(0x9c00 << 16) | (0x9508 >> 2),
310 	0x00000000,
311 	(0x9c00 << 16) | (0x950c >> 2),
312 	0x00000000,
313 	(0x9c00 << 16) | (0x9494 >> 2),
314 	0x00000000,
315 	(0x9c00 << 16) | (0xac0c >> 2),
316 	0x00000000,
317 	(0x9c00 << 16) | (0xac10 >> 2),
318 	0x00000000,
319 	(0x9c00 << 16) | (0xac14 >> 2),
320 	0x00000000,
321 	(0x9c00 << 16) | (0xae00 >> 2),
322 	0x00000000,
323 	(0x9c00 << 16) | (0xac08 >> 2),
324 	0x00000000,
325 	(0x9c00 << 16) | (0x88d4 >> 2),
326 	0x00000000,
327 	(0x9c00 << 16) | (0x88c8 >> 2),
328 	0x00000000,
329 	(0x9c00 << 16) | (0x88cc >> 2),
330 	0x00000000,
331 	(0x9c00 << 16) | (0x89b0 >> 2),
332 	0x00000000,
333 	(0x9c00 << 16) | (0x8b10 >> 2),
334 	0x00000000,
335 	(0x9c00 << 16) | (0x8a14 >> 2),
336 	0x00000000,
337 	(0x9c00 << 16) | (0x9830 >> 2),
338 	0x00000000,
339 	(0x9c00 << 16) | (0x9834 >> 2),
340 	0x00000000,
341 	(0x9c00 << 16) | (0x9838 >> 2),
342 	0x00000000,
343 	(0x9c00 << 16) | (0x9a10 >> 2),
344 	0x00000000,
345 	(0x8000 << 16) | (0x9870 >> 2),
346 	0x00000000,
347 	(0x8000 << 16) | (0x9874 >> 2),
348 	0x00000000,
349 	(0x8001 << 16) | (0x9870 >> 2),
350 	0x00000000,
351 	(0x8001 << 16) | (0x9874 >> 2),
352 	0x00000000,
353 	(0x8040 << 16) | (0x9870 >> 2),
354 	0x00000000,
355 	(0x8040 << 16) | (0x9874 >> 2),
356 	0x00000000,
357 	(0x8041 << 16) | (0x9870 >> 2),
358 	0x00000000,
359 	(0x8041 << 16) | (0x9874 >> 2),
360 	0x00000000,
361 	0x00000000
362 };
363 
364 static const u32 tahiti_golden_rlc_registers[] =
365 {
366 	0xc424, 0xffffffff, 0x00601005,
367 	0xc47c, 0xffffffff, 0x10104040,
368 	0xc488, 0xffffffff, 0x0100000a,
369 	0xc314, 0xffffffff, 0x00000800,
370 	0xc30c, 0xffffffff, 0x800000f4,
371 	0xf4a8, 0xffffffff, 0x00000000
372 };
373 
374 static const u32 tahiti_golden_registers[] =
375 {
376 	0x9a10, 0x00010000, 0x00018208,
377 	0x9830, 0xffffffff, 0x00000000,
378 	0x9834, 0xf00fffff, 0x00000400,
379 	0x9838, 0x0002021c, 0x00020200,
380 	0xc78, 0x00000080, 0x00000000,
381 	0xd030, 0x000300c0, 0x00800040,
382 	0xd830, 0x000300c0, 0x00800040,
383 	0x5bb0, 0x000000f0, 0x00000070,
384 	0x5bc0, 0x00200000, 0x50100000,
385 	0x7030, 0x31000311, 0x00000011,
386 	0x277c, 0x00000003, 0x000007ff,
387 	0x240c, 0x000007ff, 0x00000000,
388 	0x8a14, 0xf000001f, 0x00000007,
389 	0x8b24, 0xffffffff, 0x00ffffff,
390 	0x8b10, 0x0000ff0f, 0x00000000,
391 	0x28a4c, 0x07ffffff, 0x4e000000,
392 	0x28350, 0x3f3f3fff, 0x2a00126a,
393 	0x30, 0x000000ff, 0x0040,
394 	0x34, 0x00000040, 0x00004040,
395 	0x9100, 0x07ffffff, 0x03000000,
396 	0x8e88, 0x01ff1f3f, 0x00000000,
397 	0x8e84, 0x01ff1f3f, 0x00000000,
398 	0x9060, 0x0000007f, 0x00000020,
399 	0x9508, 0x00010000, 0x00010000,
400 	0xac14, 0x00000200, 0x000002fb,
401 	0xac10, 0xffffffff, 0x0000543b,
402 	0xac0c, 0xffffffff, 0xa9210876,
403 	0x88d0, 0xffffffff, 0x000fff40,
404 	0x88d4, 0x0000001f, 0x00000010,
405 	0x1410, 0x20000000, 0x20fffed8,
406 	0x15c0, 0x000c0fc0, 0x000c0400
407 };
408 
409 static const u32 tahiti_golden_registers2[] =
410 {
411 	0xc64, 0x00000001, 0x00000001
412 };
413 
414 static const u32 pitcairn_golden_rlc_registers[] =
415 {
416 	0xc424, 0xffffffff, 0x00601004,
417 	0xc47c, 0xffffffff, 0x10102020,
418 	0xc488, 0xffffffff, 0x01000020,
419 	0xc314, 0xffffffff, 0x00000800,
420 	0xc30c, 0xffffffff, 0x800000a4
421 };
422 
423 static const u32 pitcairn_golden_registers[] =
424 {
425 	0x9a10, 0x00010000, 0x00018208,
426 	0x9830, 0xffffffff, 0x00000000,
427 	0x9834, 0xf00fffff, 0x00000400,
428 	0x9838, 0x0002021c, 0x00020200,
429 	0xc78, 0x00000080, 0x00000000,
430 	0xd030, 0x000300c0, 0x00800040,
431 	0xd830, 0x000300c0, 0x00800040,
432 	0x5bb0, 0x000000f0, 0x00000070,
433 	0x5bc0, 0x00200000, 0x50100000,
434 	0x7030, 0x31000311, 0x00000011,
435 	0x2ae4, 0x00073ffe, 0x000022a2,
436 	0x240c, 0x000007ff, 0x00000000,
437 	0x8a14, 0xf000001f, 0x00000007,
438 	0x8b24, 0xffffffff, 0x00ffffff,
439 	0x8b10, 0x0000ff0f, 0x00000000,
440 	0x28a4c, 0x07ffffff, 0x4e000000,
441 	0x28350, 0x3f3f3fff, 0x2a00126a,
442 	0x30, 0x000000ff, 0x0040,
443 	0x34, 0x00000040, 0x00004040,
444 	0x9100, 0x07ffffff, 0x03000000,
445 	0x9060, 0x0000007f, 0x00000020,
446 	0x9508, 0x00010000, 0x00010000,
447 	0xac14, 0x000003ff, 0x000000f7,
448 	0xac10, 0xffffffff, 0x00000000,
449 	0xac0c, 0xffffffff, 0x32761054,
450 	0x88d4, 0x0000001f, 0x00000010,
451 	0x15c0, 0x000c0fc0, 0x000c0400
452 };
453 
454 static const u32 verde_golden_rlc_registers[] =
455 {
456 	0xc424, 0xffffffff, 0x033f1005,
457 	0xc47c, 0xffffffff, 0x10808020,
458 	0xc488, 0xffffffff, 0x00800008,
459 	0xc314, 0xffffffff, 0x00001000,
460 	0xc30c, 0xffffffff, 0x80010014
461 };
462 
463 static const u32 verde_golden_registers[] =
464 {
465 	0x9a10, 0x00010000, 0x00018208,
466 	0x9830, 0xffffffff, 0x00000000,
467 	0x9834, 0xf00fffff, 0x00000400,
468 	0x9838, 0x0002021c, 0x00020200,
469 	0xc78, 0x00000080, 0x00000000,
470 	0xd030, 0x000300c0, 0x00800040,
471 	0xd030, 0x000300c0, 0x00800040,
472 	0xd830, 0x000300c0, 0x00800040,
473 	0xd830, 0x000300c0, 0x00800040,
474 	0x5bb0, 0x000000f0, 0x00000070,
475 	0x5bc0, 0x00200000, 0x50100000,
476 	0x7030, 0x31000311, 0x00000011,
477 	0x2ae4, 0x00073ffe, 0x000022a2,
478 	0x2ae4, 0x00073ffe, 0x000022a2,
479 	0x2ae4, 0x00073ffe, 0x000022a2,
480 	0x240c, 0x000007ff, 0x00000000,
481 	0x240c, 0x000007ff, 0x00000000,
482 	0x240c, 0x000007ff, 0x00000000,
483 	0x8a14, 0xf000001f, 0x00000007,
484 	0x8a14, 0xf000001f, 0x00000007,
485 	0x8a14, 0xf000001f, 0x00000007,
486 	0x8b24, 0xffffffff, 0x00ffffff,
487 	0x8b10, 0x0000ff0f, 0x00000000,
488 	0x28a4c, 0x07ffffff, 0x4e000000,
489 	0x28350, 0x3f3f3fff, 0x0000124a,
490 	0x28350, 0x3f3f3fff, 0x0000124a,
491 	0x28350, 0x3f3f3fff, 0x0000124a,
492 	0x30, 0x000000ff, 0x0040,
493 	0x34, 0x00000040, 0x00004040,
494 	0x9100, 0x07ffffff, 0x03000000,
495 	0x9100, 0x07ffffff, 0x03000000,
496 	0x8e88, 0x01ff1f3f, 0x00000000,
497 	0x8e88, 0x01ff1f3f, 0x00000000,
498 	0x8e88, 0x01ff1f3f, 0x00000000,
499 	0x8e84, 0x01ff1f3f, 0x00000000,
500 	0x8e84, 0x01ff1f3f, 0x00000000,
501 	0x8e84, 0x01ff1f3f, 0x00000000,
502 	0x9060, 0x0000007f, 0x00000020,
503 	0x9508, 0x00010000, 0x00010000,
504 	0xac14, 0x000003ff, 0x00000003,
505 	0xac14, 0x000003ff, 0x00000003,
506 	0xac14, 0x000003ff, 0x00000003,
507 	0xac10, 0xffffffff, 0x00000000,
508 	0xac10, 0xffffffff, 0x00000000,
509 	0xac10, 0xffffffff, 0x00000000,
510 	0xac0c, 0xffffffff, 0x00001032,
511 	0xac0c, 0xffffffff, 0x00001032,
512 	0xac0c, 0xffffffff, 0x00001032,
513 	0x88d4, 0x0000001f, 0x00000010,
514 	0x88d4, 0x0000001f, 0x00000010,
515 	0x88d4, 0x0000001f, 0x00000010,
516 	0x15c0, 0x000c0fc0, 0x000c0400
517 };
518 
519 static const u32 oland_golden_rlc_registers[] =
520 {
521 	0xc424, 0xffffffff, 0x00601005,
522 	0xc47c, 0xffffffff, 0x10104040,
523 	0xc488, 0xffffffff, 0x0100000a,
524 	0xc314, 0xffffffff, 0x00000800,
525 	0xc30c, 0xffffffff, 0x800000f4
526 };
527 
528 static const u32 oland_golden_registers[] =
529 {
530 	0x9a10, 0x00010000, 0x00018208,
531 	0x9830, 0xffffffff, 0x00000000,
532 	0x9834, 0xf00fffff, 0x00000400,
533 	0x9838, 0x0002021c, 0x00020200,
534 	0xc78, 0x00000080, 0x00000000,
535 	0xd030, 0x000300c0, 0x00800040,
536 	0xd830, 0x000300c0, 0x00800040,
537 	0x5bb0, 0x000000f0, 0x00000070,
538 	0x5bc0, 0x00200000, 0x50100000,
539 	0x7030, 0x31000311, 0x00000011,
540 	0x2ae4, 0x00073ffe, 0x000022a2,
541 	0x240c, 0x000007ff, 0x00000000,
542 	0x8a14, 0xf000001f, 0x00000007,
543 	0x8b24, 0xffffffff, 0x00ffffff,
544 	0x8b10, 0x0000ff0f, 0x00000000,
545 	0x28a4c, 0x07ffffff, 0x4e000000,
546 	0x28350, 0x3f3f3fff, 0x00000082,
547 	0x30, 0x000000ff, 0x0040,
548 	0x34, 0x00000040, 0x00004040,
549 	0x9100, 0x07ffffff, 0x03000000,
550 	0x9060, 0x0000007f, 0x00000020,
551 	0x9508, 0x00010000, 0x00010000,
552 	0xac14, 0x000003ff, 0x000000f3,
553 	0xac10, 0xffffffff, 0x00000000,
554 	0xac0c, 0xffffffff, 0x00003210,
555 	0x88d4, 0x0000001f, 0x00000010,
556 	0x15c0, 0x000c0fc0, 0x000c0400
557 };
558 
559 static const u32 hainan_golden_registers[] =
560 {
561 	0x9a10, 0x00010000, 0x00018208,
562 	0x9830, 0xffffffff, 0x00000000,
563 	0x9834, 0xf00fffff, 0x00000400,
564 	0x9838, 0x0002021c, 0x00020200,
565 	0xd0c0, 0xff000fff, 0x00000100,
566 	0xd030, 0x000300c0, 0x00800040,
567 	0xd8c0, 0xff000fff, 0x00000100,
568 	0xd830, 0x000300c0, 0x00800040,
569 	0x2ae4, 0x00073ffe, 0x000022a2,
570 	0x240c, 0x000007ff, 0x00000000,
571 	0x8a14, 0xf000001f, 0x00000007,
572 	0x8b24, 0xffffffff, 0x00ffffff,
573 	0x8b10, 0x0000ff0f, 0x00000000,
574 	0x28a4c, 0x07ffffff, 0x4e000000,
575 	0x28350, 0x3f3f3fff, 0x00000000,
576 	0x30, 0x000000ff, 0x0040,
577 	0x34, 0x00000040, 0x00004040,
578 	0x9100, 0x03e00000, 0x03600000,
579 	0x9060, 0x0000007f, 0x00000020,
580 	0x9508, 0x00010000, 0x00010000,
581 	0xac14, 0x000003ff, 0x000000f1,
582 	0xac10, 0xffffffff, 0x00000000,
583 	0xac0c, 0xffffffff, 0x00003210,
584 	0x88d4, 0x0000001f, 0x00000010,
585 	0x15c0, 0x000c0fc0, 0x000c0400
586 };
587 
588 static const u32 hainan_golden_registers2[] =
589 {
590 	0x98f8, 0xffffffff, 0x02010001
591 };
592 
593 static const u32 tahiti_mgcg_cgcg_init[] =
594 {
595 	0xc400, 0xffffffff, 0xfffffffc,
596 	0x802c, 0xffffffff, 0xe0000000,
597 	0x9a60, 0xffffffff, 0x00000100,
598 	0x92a4, 0xffffffff, 0x00000100,
599 	0xc164, 0xffffffff, 0x00000100,
600 	0x9774, 0xffffffff, 0x00000100,
601 	0x8984, 0xffffffff, 0x06000100,
602 	0x8a18, 0xffffffff, 0x00000100,
603 	0x92a0, 0xffffffff, 0x00000100,
604 	0xc380, 0xffffffff, 0x00000100,
605 	0x8b28, 0xffffffff, 0x00000100,
606 	0x9144, 0xffffffff, 0x00000100,
607 	0x8d88, 0xffffffff, 0x00000100,
608 	0x8d8c, 0xffffffff, 0x00000100,
609 	0x9030, 0xffffffff, 0x00000100,
610 	0x9034, 0xffffffff, 0x00000100,
611 	0x9038, 0xffffffff, 0x00000100,
612 	0x903c, 0xffffffff, 0x00000100,
613 	0xad80, 0xffffffff, 0x00000100,
614 	0xac54, 0xffffffff, 0x00000100,
615 	0x897c, 0xffffffff, 0x06000100,
616 	0x9868, 0xffffffff, 0x00000100,
617 	0x9510, 0xffffffff, 0x00000100,
618 	0xaf04, 0xffffffff, 0x00000100,
619 	0xae04, 0xffffffff, 0x00000100,
620 	0x949c, 0xffffffff, 0x00000100,
621 	0x802c, 0xffffffff, 0xe0000000,
622 	0x9160, 0xffffffff, 0x00010000,
623 	0x9164, 0xffffffff, 0x00030002,
624 	0x9168, 0xffffffff, 0x00040007,
625 	0x916c, 0xffffffff, 0x00060005,
626 	0x9170, 0xffffffff, 0x00090008,
627 	0x9174, 0xffffffff, 0x00020001,
628 	0x9178, 0xffffffff, 0x00040003,
629 	0x917c, 0xffffffff, 0x00000007,
630 	0x9180, 0xffffffff, 0x00060005,
631 	0x9184, 0xffffffff, 0x00090008,
632 	0x9188, 0xffffffff, 0x00030002,
633 	0x918c, 0xffffffff, 0x00050004,
634 	0x9190, 0xffffffff, 0x00000008,
635 	0x9194, 0xffffffff, 0x00070006,
636 	0x9198, 0xffffffff, 0x000a0009,
637 	0x919c, 0xffffffff, 0x00040003,
638 	0x91a0, 0xffffffff, 0x00060005,
639 	0x91a4, 0xffffffff, 0x00000009,
640 	0x91a8, 0xffffffff, 0x00080007,
641 	0x91ac, 0xffffffff, 0x000b000a,
642 	0x91b0, 0xffffffff, 0x00050004,
643 	0x91b4, 0xffffffff, 0x00070006,
644 	0x91b8, 0xffffffff, 0x0008000b,
645 	0x91bc, 0xffffffff, 0x000a0009,
646 	0x91c0, 0xffffffff, 0x000d000c,
647 	0x91c4, 0xffffffff, 0x00060005,
648 	0x91c8, 0xffffffff, 0x00080007,
649 	0x91cc, 0xffffffff, 0x0000000b,
650 	0x91d0, 0xffffffff, 0x000a0009,
651 	0x91d4, 0xffffffff, 0x000d000c,
652 	0x91d8, 0xffffffff, 0x00070006,
653 	0x91dc, 0xffffffff, 0x00090008,
654 	0x91e0, 0xffffffff, 0x0000000c,
655 	0x91e4, 0xffffffff, 0x000b000a,
656 	0x91e8, 0xffffffff, 0x000e000d,
657 	0x91ec, 0xffffffff, 0x00080007,
658 	0x91f0, 0xffffffff, 0x000a0009,
659 	0x91f4, 0xffffffff, 0x0000000d,
660 	0x91f8, 0xffffffff, 0x000c000b,
661 	0x91fc, 0xffffffff, 0x000f000e,
662 	0x9200, 0xffffffff, 0x00090008,
663 	0x9204, 0xffffffff, 0x000b000a,
664 	0x9208, 0xffffffff, 0x000c000f,
665 	0x920c, 0xffffffff, 0x000e000d,
666 	0x9210, 0xffffffff, 0x00110010,
667 	0x9214, 0xffffffff, 0x000a0009,
668 	0x9218, 0xffffffff, 0x000c000b,
669 	0x921c, 0xffffffff, 0x0000000f,
670 	0x9220, 0xffffffff, 0x000e000d,
671 	0x9224, 0xffffffff, 0x00110010,
672 	0x9228, 0xffffffff, 0x000b000a,
673 	0x922c, 0xffffffff, 0x000d000c,
674 	0x9230, 0xffffffff, 0x00000010,
675 	0x9234, 0xffffffff, 0x000f000e,
676 	0x9238, 0xffffffff, 0x00120011,
677 	0x923c, 0xffffffff, 0x000c000b,
678 	0x9240, 0xffffffff, 0x000e000d,
679 	0x9244, 0xffffffff, 0x00000011,
680 	0x9248, 0xffffffff, 0x0010000f,
681 	0x924c, 0xffffffff, 0x00130012,
682 	0x9250, 0xffffffff, 0x000d000c,
683 	0x9254, 0xffffffff, 0x000f000e,
684 	0x9258, 0xffffffff, 0x00100013,
685 	0x925c, 0xffffffff, 0x00120011,
686 	0x9260, 0xffffffff, 0x00150014,
687 	0x9264, 0xffffffff, 0x000e000d,
688 	0x9268, 0xffffffff, 0x0010000f,
689 	0x926c, 0xffffffff, 0x00000013,
690 	0x9270, 0xffffffff, 0x00120011,
691 	0x9274, 0xffffffff, 0x00150014,
692 	0x9278, 0xffffffff, 0x000f000e,
693 	0x927c, 0xffffffff, 0x00110010,
694 	0x9280, 0xffffffff, 0x00000014,
695 	0x9284, 0xffffffff, 0x00130012,
696 	0x9288, 0xffffffff, 0x00160015,
697 	0x928c, 0xffffffff, 0x0010000f,
698 	0x9290, 0xffffffff, 0x00120011,
699 	0x9294, 0xffffffff, 0x00000015,
700 	0x9298, 0xffffffff, 0x00140013,
701 	0x929c, 0xffffffff, 0x00170016,
702 	0x9150, 0xffffffff, 0x96940200,
703 	0x8708, 0xffffffff, 0x00900100,
704 	0xc478, 0xffffffff, 0x00000080,
705 	0xc404, 0xffffffff, 0x0020003f,
706 	0x30, 0xffffffff, 0x0000001c,
707 	0x34, 0x000f0000, 0x000f0000,
708 	0x160c, 0xffffffff, 0x00000100,
709 	0x1024, 0xffffffff, 0x00000100,
710 	0x102c, 0x00000101, 0x00000000,
711 	0x20a8, 0xffffffff, 0x00000104,
712 	0x264c, 0x000c0000, 0x000c0000,
713 	0x2648, 0x000c0000, 0x000c0000,
714 	0x55e4, 0xff000fff, 0x00000100,
715 	0x55e8, 0x00000001, 0x00000001,
716 	0x2f50, 0x00000001, 0x00000001,
717 	0x30cc, 0xc0000fff, 0x00000104,
718 	0xc1e4, 0x00000001, 0x00000001,
719 	0xd0c0, 0xfffffff0, 0x00000100,
720 	0xd8c0, 0xfffffff0, 0x00000100
721 };
722 
723 static const u32 pitcairn_mgcg_cgcg_init[] =
724 {
725 	0xc400, 0xffffffff, 0xfffffffc,
726 	0x802c, 0xffffffff, 0xe0000000,
727 	0x9a60, 0xffffffff, 0x00000100,
728 	0x92a4, 0xffffffff, 0x00000100,
729 	0xc164, 0xffffffff, 0x00000100,
730 	0x9774, 0xffffffff, 0x00000100,
731 	0x8984, 0xffffffff, 0x06000100,
732 	0x8a18, 0xffffffff, 0x00000100,
733 	0x92a0, 0xffffffff, 0x00000100,
734 	0xc380, 0xffffffff, 0x00000100,
735 	0x8b28, 0xffffffff, 0x00000100,
736 	0x9144, 0xffffffff, 0x00000100,
737 	0x8d88, 0xffffffff, 0x00000100,
738 	0x8d8c, 0xffffffff, 0x00000100,
739 	0x9030, 0xffffffff, 0x00000100,
740 	0x9034, 0xffffffff, 0x00000100,
741 	0x9038, 0xffffffff, 0x00000100,
742 	0x903c, 0xffffffff, 0x00000100,
743 	0xad80, 0xffffffff, 0x00000100,
744 	0xac54, 0xffffffff, 0x00000100,
745 	0x897c, 0xffffffff, 0x06000100,
746 	0x9868, 0xffffffff, 0x00000100,
747 	0x9510, 0xffffffff, 0x00000100,
748 	0xaf04, 0xffffffff, 0x00000100,
749 	0xae04, 0xffffffff, 0x00000100,
750 	0x949c, 0xffffffff, 0x00000100,
751 	0x802c, 0xffffffff, 0xe0000000,
752 	0x9160, 0xffffffff, 0x00010000,
753 	0x9164, 0xffffffff, 0x00030002,
754 	0x9168, 0xffffffff, 0x00040007,
755 	0x916c, 0xffffffff, 0x00060005,
756 	0x9170, 0xffffffff, 0x00090008,
757 	0x9174, 0xffffffff, 0x00020001,
758 	0x9178, 0xffffffff, 0x00040003,
759 	0x917c, 0xffffffff, 0x00000007,
760 	0x9180, 0xffffffff, 0x00060005,
761 	0x9184, 0xffffffff, 0x00090008,
762 	0x9188, 0xffffffff, 0x00030002,
763 	0x918c, 0xffffffff, 0x00050004,
764 	0x9190, 0xffffffff, 0x00000008,
765 	0x9194, 0xffffffff, 0x00070006,
766 	0x9198, 0xffffffff, 0x000a0009,
767 	0x919c, 0xffffffff, 0x00040003,
768 	0x91a0, 0xffffffff, 0x00060005,
769 	0x91a4, 0xffffffff, 0x00000009,
770 	0x91a8, 0xffffffff, 0x00080007,
771 	0x91ac, 0xffffffff, 0x000b000a,
772 	0x91b0, 0xffffffff, 0x00050004,
773 	0x91b4, 0xffffffff, 0x00070006,
774 	0x91b8, 0xffffffff, 0x0008000b,
775 	0x91bc, 0xffffffff, 0x000a0009,
776 	0x91c0, 0xffffffff, 0x000d000c,
777 	0x9200, 0xffffffff, 0x00090008,
778 	0x9204, 0xffffffff, 0x000b000a,
779 	0x9208, 0xffffffff, 0x000c000f,
780 	0x920c, 0xffffffff, 0x000e000d,
781 	0x9210, 0xffffffff, 0x00110010,
782 	0x9214, 0xffffffff, 0x000a0009,
783 	0x9218, 0xffffffff, 0x000c000b,
784 	0x921c, 0xffffffff, 0x0000000f,
785 	0x9220, 0xffffffff, 0x000e000d,
786 	0x9224, 0xffffffff, 0x00110010,
787 	0x9228, 0xffffffff, 0x000b000a,
788 	0x922c, 0xffffffff, 0x000d000c,
789 	0x9230, 0xffffffff, 0x00000010,
790 	0x9234, 0xffffffff, 0x000f000e,
791 	0x9238, 0xffffffff, 0x00120011,
792 	0x923c, 0xffffffff, 0x000c000b,
793 	0x9240, 0xffffffff, 0x000e000d,
794 	0x9244, 0xffffffff, 0x00000011,
795 	0x9248, 0xffffffff, 0x0010000f,
796 	0x924c, 0xffffffff, 0x00130012,
797 	0x9250, 0xffffffff, 0x000d000c,
798 	0x9254, 0xffffffff, 0x000f000e,
799 	0x9258, 0xffffffff, 0x00100013,
800 	0x925c, 0xffffffff, 0x00120011,
801 	0x9260, 0xffffffff, 0x00150014,
802 	0x9150, 0xffffffff, 0x96940200,
803 	0x8708, 0xffffffff, 0x00900100,
804 	0xc478, 0xffffffff, 0x00000080,
805 	0xc404, 0xffffffff, 0x0020003f,
806 	0x30, 0xffffffff, 0x0000001c,
807 	0x34, 0x000f0000, 0x000f0000,
808 	0x160c, 0xffffffff, 0x00000100,
809 	0x1024, 0xffffffff, 0x00000100,
810 	0x102c, 0x00000101, 0x00000000,
811 	0x20a8, 0xffffffff, 0x00000104,
812 	0x55e4, 0xff000fff, 0x00000100,
813 	0x55e8, 0x00000001, 0x00000001,
814 	0x2f50, 0x00000001, 0x00000001,
815 	0x30cc, 0xc0000fff, 0x00000104,
816 	0xc1e4, 0x00000001, 0x00000001,
817 	0xd0c0, 0xfffffff0, 0x00000100,
818 	0xd8c0, 0xfffffff0, 0x00000100
819 };
820 
821 static const u32 verde_mgcg_cgcg_init[] =
822 {
823 	0xc400, 0xffffffff, 0xfffffffc,
824 	0x802c, 0xffffffff, 0xe0000000,
825 	0x9a60, 0xffffffff, 0x00000100,
826 	0x92a4, 0xffffffff, 0x00000100,
827 	0xc164, 0xffffffff, 0x00000100,
828 	0x9774, 0xffffffff, 0x00000100,
829 	0x8984, 0xffffffff, 0x06000100,
830 	0x8a18, 0xffffffff, 0x00000100,
831 	0x92a0, 0xffffffff, 0x00000100,
832 	0xc380, 0xffffffff, 0x00000100,
833 	0x8b28, 0xffffffff, 0x00000100,
834 	0x9144, 0xffffffff, 0x00000100,
835 	0x8d88, 0xffffffff, 0x00000100,
836 	0x8d8c, 0xffffffff, 0x00000100,
837 	0x9030, 0xffffffff, 0x00000100,
838 	0x9034, 0xffffffff, 0x00000100,
839 	0x9038, 0xffffffff, 0x00000100,
840 	0x903c, 0xffffffff, 0x00000100,
841 	0xad80, 0xffffffff, 0x00000100,
842 	0xac54, 0xffffffff, 0x00000100,
843 	0x897c, 0xffffffff, 0x06000100,
844 	0x9868, 0xffffffff, 0x00000100,
845 	0x9510, 0xffffffff, 0x00000100,
846 	0xaf04, 0xffffffff, 0x00000100,
847 	0xae04, 0xffffffff, 0x00000100,
848 	0x949c, 0xffffffff, 0x00000100,
849 	0x802c, 0xffffffff, 0xe0000000,
850 	0x9160, 0xffffffff, 0x00010000,
851 	0x9164, 0xffffffff, 0x00030002,
852 	0x9168, 0xffffffff, 0x00040007,
853 	0x916c, 0xffffffff, 0x00060005,
854 	0x9170, 0xffffffff, 0x00090008,
855 	0x9174, 0xffffffff, 0x00020001,
856 	0x9178, 0xffffffff, 0x00040003,
857 	0x917c, 0xffffffff, 0x00000007,
858 	0x9180, 0xffffffff, 0x00060005,
859 	0x9184, 0xffffffff, 0x00090008,
860 	0x9188, 0xffffffff, 0x00030002,
861 	0x918c, 0xffffffff, 0x00050004,
862 	0x9190, 0xffffffff, 0x00000008,
863 	0x9194, 0xffffffff, 0x00070006,
864 	0x9198, 0xffffffff, 0x000a0009,
865 	0x919c, 0xffffffff, 0x00040003,
866 	0x91a0, 0xffffffff, 0x00060005,
867 	0x91a4, 0xffffffff, 0x00000009,
868 	0x91a8, 0xffffffff, 0x00080007,
869 	0x91ac, 0xffffffff, 0x000b000a,
870 	0x91b0, 0xffffffff, 0x00050004,
871 	0x91b4, 0xffffffff, 0x00070006,
872 	0x91b8, 0xffffffff, 0x0008000b,
873 	0x91bc, 0xffffffff, 0x000a0009,
874 	0x91c0, 0xffffffff, 0x000d000c,
875 	0x9200, 0xffffffff, 0x00090008,
876 	0x9204, 0xffffffff, 0x000b000a,
877 	0x9208, 0xffffffff, 0x000c000f,
878 	0x920c, 0xffffffff, 0x000e000d,
879 	0x9210, 0xffffffff, 0x00110010,
880 	0x9214, 0xffffffff, 0x000a0009,
881 	0x9218, 0xffffffff, 0x000c000b,
882 	0x921c, 0xffffffff, 0x0000000f,
883 	0x9220, 0xffffffff, 0x000e000d,
884 	0x9224, 0xffffffff, 0x00110010,
885 	0x9228, 0xffffffff, 0x000b000a,
886 	0x922c, 0xffffffff, 0x000d000c,
887 	0x9230, 0xffffffff, 0x00000010,
888 	0x9234, 0xffffffff, 0x000f000e,
889 	0x9238, 0xffffffff, 0x00120011,
890 	0x923c, 0xffffffff, 0x000c000b,
891 	0x9240, 0xffffffff, 0x000e000d,
892 	0x9244, 0xffffffff, 0x00000011,
893 	0x9248, 0xffffffff, 0x0010000f,
894 	0x924c, 0xffffffff, 0x00130012,
895 	0x9250, 0xffffffff, 0x000d000c,
896 	0x9254, 0xffffffff, 0x000f000e,
897 	0x9258, 0xffffffff, 0x00100013,
898 	0x925c, 0xffffffff, 0x00120011,
899 	0x9260, 0xffffffff, 0x00150014,
900 	0x9150, 0xffffffff, 0x96940200,
901 	0x8708, 0xffffffff, 0x00900100,
902 	0xc478, 0xffffffff, 0x00000080,
903 	0xc404, 0xffffffff, 0x0020003f,
904 	0x30, 0xffffffff, 0x0000001c,
905 	0x34, 0x000f0000, 0x000f0000,
906 	0x160c, 0xffffffff, 0x00000100,
907 	0x1024, 0xffffffff, 0x00000100,
908 	0x102c, 0x00000101, 0x00000000,
909 	0x20a8, 0xffffffff, 0x00000104,
910 	0x264c, 0x000c0000, 0x000c0000,
911 	0x2648, 0x000c0000, 0x000c0000,
912 	0x55e4, 0xff000fff, 0x00000100,
913 	0x55e8, 0x00000001, 0x00000001,
914 	0x2f50, 0x00000001, 0x00000001,
915 	0x30cc, 0xc0000fff, 0x00000104,
916 	0xc1e4, 0x00000001, 0x00000001,
917 	0xd0c0, 0xfffffff0, 0x00000100,
918 	0xd8c0, 0xfffffff0, 0x00000100
919 };
920 
921 static const u32 oland_mgcg_cgcg_init[] =
922 {
923 	0xc400, 0xffffffff, 0xfffffffc,
924 	0x802c, 0xffffffff, 0xe0000000,
925 	0x9a60, 0xffffffff, 0x00000100,
926 	0x92a4, 0xffffffff, 0x00000100,
927 	0xc164, 0xffffffff, 0x00000100,
928 	0x9774, 0xffffffff, 0x00000100,
929 	0x8984, 0xffffffff, 0x06000100,
930 	0x8a18, 0xffffffff, 0x00000100,
931 	0x92a0, 0xffffffff, 0x00000100,
932 	0xc380, 0xffffffff, 0x00000100,
933 	0x8b28, 0xffffffff, 0x00000100,
934 	0x9144, 0xffffffff, 0x00000100,
935 	0x8d88, 0xffffffff, 0x00000100,
936 	0x8d8c, 0xffffffff, 0x00000100,
937 	0x9030, 0xffffffff, 0x00000100,
938 	0x9034, 0xffffffff, 0x00000100,
939 	0x9038, 0xffffffff, 0x00000100,
940 	0x903c, 0xffffffff, 0x00000100,
941 	0xad80, 0xffffffff, 0x00000100,
942 	0xac54, 0xffffffff, 0x00000100,
943 	0x897c, 0xffffffff, 0x06000100,
944 	0x9868, 0xffffffff, 0x00000100,
945 	0x9510, 0xffffffff, 0x00000100,
946 	0xaf04, 0xffffffff, 0x00000100,
947 	0xae04, 0xffffffff, 0x00000100,
948 	0x949c, 0xffffffff, 0x00000100,
949 	0x802c, 0xffffffff, 0xe0000000,
950 	0x9160, 0xffffffff, 0x00010000,
951 	0x9164, 0xffffffff, 0x00030002,
952 	0x9168, 0xffffffff, 0x00040007,
953 	0x916c, 0xffffffff, 0x00060005,
954 	0x9170, 0xffffffff, 0x00090008,
955 	0x9174, 0xffffffff, 0x00020001,
956 	0x9178, 0xffffffff, 0x00040003,
957 	0x917c, 0xffffffff, 0x00000007,
958 	0x9180, 0xffffffff, 0x00060005,
959 	0x9184, 0xffffffff, 0x00090008,
960 	0x9188, 0xffffffff, 0x00030002,
961 	0x918c, 0xffffffff, 0x00050004,
962 	0x9190, 0xffffffff, 0x00000008,
963 	0x9194, 0xffffffff, 0x00070006,
964 	0x9198, 0xffffffff, 0x000a0009,
965 	0x919c, 0xffffffff, 0x00040003,
966 	0x91a0, 0xffffffff, 0x00060005,
967 	0x91a4, 0xffffffff, 0x00000009,
968 	0x91a8, 0xffffffff, 0x00080007,
969 	0x91ac, 0xffffffff, 0x000b000a,
970 	0x91b0, 0xffffffff, 0x00050004,
971 	0x91b4, 0xffffffff, 0x00070006,
972 	0x91b8, 0xffffffff, 0x0008000b,
973 	0x91bc, 0xffffffff, 0x000a0009,
974 	0x91c0, 0xffffffff, 0x000d000c,
975 	0x91c4, 0xffffffff, 0x00060005,
976 	0x91c8, 0xffffffff, 0x00080007,
977 	0x91cc, 0xffffffff, 0x0000000b,
978 	0x91d0, 0xffffffff, 0x000a0009,
979 	0x91d4, 0xffffffff, 0x000d000c,
980 	0x9150, 0xffffffff, 0x96940200,
981 	0x8708, 0xffffffff, 0x00900100,
982 	0xc478, 0xffffffff, 0x00000080,
983 	0xc404, 0xffffffff, 0x0020003f,
984 	0x30, 0xffffffff, 0x0000001c,
985 	0x34, 0x000f0000, 0x000f0000,
986 	0x160c, 0xffffffff, 0x00000100,
987 	0x1024, 0xffffffff, 0x00000100,
988 	0x102c, 0x00000101, 0x00000000,
989 	0x20a8, 0xffffffff, 0x00000104,
990 	0x264c, 0x000c0000, 0x000c0000,
991 	0x2648, 0x000c0000, 0x000c0000,
992 	0x55e4, 0xff000fff, 0x00000100,
993 	0x55e8, 0x00000001, 0x00000001,
994 	0x2f50, 0x00000001, 0x00000001,
995 	0x30cc, 0xc0000fff, 0x00000104,
996 	0xc1e4, 0x00000001, 0x00000001,
997 	0xd0c0, 0xfffffff0, 0x00000100,
998 	0xd8c0, 0xfffffff0, 0x00000100
999 };
1000 
1001 static const u32 hainan_mgcg_cgcg_init[] =
1002 {
1003 	0xc400, 0xffffffff, 0xfffffffc,
1004 	0x802c, 0xffffffff, 0xe0000000,
1005 	0x9a60, 0xffffffff, 0x00000100,
1006 	0x92a4, 0xffffffff, 0x00000100,
1007 	0xc164, 0xffffffff, 0x00000100,
1008 	0x9774, 0xffffffff, 0x00000100,
1009 	0x8984, 0xffffffff, 0x06000100,
1010 	0x8a18, 0xffffffff, 0x00000100,
1011 	0x92a0, 0xffffffff, 0x00000100,
1012 	0xc380, 0xffffffff, 0x00000100,
1013 	0x8b28, 0xffffffff, 0x00000100,
1014 	0x9144, 0xffffffff, 0x00000100,
1015 	0x8d88, 0xffffffff, 0x00000100,
1016 	0x8d8c, 0xffffffff, 0x00000100,
1017 	0x9030, 0xffffffff, 0x00000100,
1018 	0x9034, 0xffffffff, 0x00000100,
1019 	0x9038, 0xffffffff, 0x00000100,
1020 	0x903c, 0xffffffff, 0x00000100,
1021 	0xad80, 0xffffffff, 0x00000100,
1022 	0xac54, 0xffffffff, 0x00000100,
1023 	0x897c, 0xffffffff, 0x06000100,
1024 	0x9868, 0xffffffff, 0x00000100,
1025 	0x9510, 0xffffffff, 0x00000100,
1026 	0xaf04, 0xffffffff, 0x00000100,
1027 	0xae04, 0xffffffff, 0x00000100,
1028 	0x949c, 0xffffffff, 0x00000100,
1029 	0x802c, 0xffffffff, 0xe0000000,
1030 	0x9160, 0xffffffff, 0x00010000,
1031 	0x9164, 0xffffffff, 0x00030002,
1032 	0x9168, 0xffffffff, 0x00040007,
1033 	0x916c, 0xffffffff, 0x00060005,
1034 	0x9170, 0xffffffff, 0x00090008,
1035 	0x9174, 0xffffffff, 0x00020001,
1036 	0x9178, 0xffffffff, 0x00040003,
1037 	0x917c, 0xffffffff, 0x00000007,
1038 	0x9180, 0xffffffff, 0x00060005,
1039 	0x9184, 0xffffffff, 0x00090008,
1040 	0x9188, 0xffffffff, 0x00030002,
1041 	0x918c, 0xffffffff, 0x00050004,
1042 	0x9190, 0xffffffff, 0x00000008,
1043 	0x9194, 0xffffffff, 0x00070006,
1044 	0x9198, 0xffffffff, 0x000a0009,
1045 	0x919c, 0xffffffff, 0x00040003,
1046 	0x91a0, 0xffffffff, 0x00060005,
1047 	0x91a4, 0xffffffff, 0x00000009,
1048 	0x91a8, 0xffffffff, 0x00080007,
1049 	0x91ac, 0xffffffff, 0x000b000a,
1050 	0x91b0, 0xffffffff, 0x00050004,
1051 	0x91b4, 0xffffffff, 0x00070006,
1052 	0x91b8, 0xffffffff, 0x0008000b,
1053 	0x91bc, 0xffffffff, 0x000a0009,
1054 	0x91c0, 0xffffffff, 0x000d000c,
1055 	0x91c4, 0xffffffff, 0x00060005,
1056 	0x91c8, 0xffffffff, 0x00080007,
1057 	0x91cc, 0xffffffff, 0x0000000b,
1058 	0x91d0, 0xffffffff, 0x000a0009,
1059 	0x91d4, 0xffffffff, 0x000d000c,
1060 	0x9150, 0xffffffff, 0x96940200,
1061 	0x8708, 0xffffffff, 0x00900100,
1062 	0xc478, 0xffffffff, 0x00000080,
1063 	0xc404, 0xffffffff, 0x0020003f,
1064 	0x30, 0xffffffff, 0x0000001c,
1065 	0x34, 0x000f0000, 0x000f0000,
1066 	0x160c, 0xffffffff, 0x00000100,
1067 	0x1024, 0xffffffff, 0x00000100,
1068 	0x20a8, 0xffffffff, 0x00000104,
1069 	0x264c, 0x000c0000, 0x000c0000,
1070 	0x2648, 0x000c0000, 0x000c0000,
1071 	0x2f50, 0x00000001, 0x00000001,
1072 	0x30cc, 0xc0000fff, 0x00000104,
1073 	0xc1e4, 0x00000001, 0x00000001,
1074 	0xd0c0, 0xfffffff0, 0x00000100,
1075 	0xd8c0, 0xfffffff0, 0x00000100
1076 };
1077 
1078 static u32 verde_pg_init[] =
1079 {
1080 	0x353c, 0xffffffff, 0x40000,
1081 	0x3538, 0xffffffff, 0x200010ff,
1082 	0x353c, 0xffffffff, 0x0,
1083 	0x353c, 0xffffffff, 0x0,
1084 	0x353c, 0xffffffff, 0x0,
1085 	0x353c, 0xffffffff, 0x0,
1086 	0x353c, 0xffffffff, 0x0,
1087 	0x353c, 0xffffffff, 0x7007,
1088 	0x3538, 0xffffffff, 0x300010ff,
1089 	0x353c, 0xffffffff, 0x0,
1090 	0x353c, 0xffffffff, 0x0,
1091 	0x353c, 0xffffffff, 0x0,
1092 	0x353c, 0xffffffff, 0x0,
1093 	0x353c, 0xffffffff, 0x0,
1094 	0x353c, 0xffffffff, 0x400000,
1095 	0x3538, 0xffffffff, 0x100010ff,
1096 	0x353c, 0xffffffff, 0x0,
1097 	0x353c, 0xffffffff, 0x0,
1098 	0x353c, 0xffffffff, 0x0,
1099 	0x353c, 0xffffffff, 0x0,
1100 	0x353c, 0xffffffff, 0x0,
1101 	0x353c, 0xffffffff, 0x120200,
1102 	0x3538, 0xffffffff, 0x500010ff,
1103 	0x353c, 0xffffffff, 0x0,
1104 	0x353c, 0xffffffff, 0x0,
1105 	0x353c, 0xffffffff, 0x0,
1106 	0x353c, 0xffffffff, 0x0,
1107 	0x353c, 0xffffffff, 0x0,
1108 	0x353c, 0xffffffff, 0x1e1e16,
1109 	0x3538, 0xffffffff, 0x600010ff,
1110 	0x353c, 0xffffffff, 0x0,
1111 	0x353c, 0xffffffff, 0x0,
1112 	0x353c, 0xffffffff, 0x0,
1113 	0x353c, 0xffffffff, 0x0,
1114 	0x353c, 0xffffffff, 0x0,
1115 	0x353c, 0xffffffff, 0x171f1e,
1116 	0x3538, 0xffffffff, 0x700010ff,
1117 	0x353c, 0xffffffff, 0x0,
1118 	0x353c, 0xffffffff, 0x0,
1119 	0x353c, 0xffffffff, 0x0,
1120 	0x353c, 0xffffffff, 0x0,
1121 	0x353c, 0xffffffff, 0x0,
1122 	0x353c, 0xffffffff, 0x0,
1123 	0x3538, 0xffffffff, 0x9ff,
1124 	0x3500, 0xffffffff, 0x0,
1125 	0x3504, 0xffffffff, 0x10000800,
1126 	0x3504, 0xffffffff, 0xf,
1127 	0x3504, 0xffffffff, 0xf,
1128 	0x3500, 0xffffffff, 0x4,
1129 	0x3504, 0xffffffff, 0x1000051e,
1130 	0x3504, 0xffffffff, 0xffff,
1131 	0x3504, 0xffffffff, 0xffff,
1132 	0x3500, 0xffffffff, 0x8,
1133 	0x3504, 0xffffffff, 0x80500,
1134 	0x3500, 0xffffffff, 0x12,
1135 	0x3504, 0xffffffff, 0x9050c,
1136 	0x3500, 0xffffffff, 0x1d,
1137 	0x3504, 0xffffffff, 0xb052c,
1138 	0x3500, 0xffffffff, 0x2a,
1139 	0x3504, 0xffffffff, 0x1053e,
1140 	0x3500, 0xffffffff, 0x2d,
1141 	0x3504, 0xffffffff, 0x10546,
1142 	0x3500, 0xffffffff, 0x30,
1143 	0x3504, 0xffffffff, 0xa054e,
1144 	0x3500, 0xffffffff, 0x3c,
1145 	0x3504, 0xffffffff, 0x1055f,
1146 	0x3500, 0xffffffff, 0x3f,
1147 	0x3504, 0xffffffff, 0x10567,
1148 	0x3500, 0xffffffff, 0x42,
1149 	0x3504, 0xffffffff, 0x1056f,
1150 	0x3500, 0xffffffff, 0x45,
1151 	0x3504, 0xffffffff, 0x10572,
1152 	0x3500, 0xffffffff, 0x48,
1153 	0x3504, 0xffffffff, 0x20575,
1154 	0x3500, 0xffffffff, 0x4c,
1155 	0x3504, 0xffffffff, 0x190801,
1156 	0x3500, 0xffffffff, 0x67,
1157 	0x3504, 0xffffffff, 0x1082a,
1158 	0x3500, 0xffffffff, 0x6a,
1159 	0x3504, 0xffffffff, 0x1b082d,
1160 	0x3500, 0xffffffff, 0x87,
1161 	0x3504, 0xffffffff, 0x310851,
1162 	0x3500, 0xffffffff, 0xba,
1163 	0x3504, 0xffffffff, 0x891,
1164 	0x3500, 0xffffffff, 0xbc,
1165 	0x3504, 0xffffffff, 0x893,
1166 	0x3500, 0xffffffff, 0xbe,
1167 	0x3504, 0xffffffff, 0x20895,
1168 	0x3500, 0xffffffff, 0xc2,
1169 	0x3504, 0xffffffff, 0x20899,
1170 	0x3500, 0xffffffff, 0xc6,
1171 	0x3504, 0xffffffff, 0x2089d,
1172 	0x3500, 0xffffffff, 0xca,
1173 	0x3504, 0xffffffff, 0x8a1,
1174 	0x3500, 0xffffffff, 0xcc,
1175 	0x3504, 0xffffffff, 0x8a3,
1176 	0x3500, 0xffffffff, 0xce,
1177 	0x3504, 0xffffffff, 0x308a5,
1178 	0x3500, 0xffffffff, 0xd3,
1179 	0x3504, 0xffffffff, 0x6d08cd,
1180 	0x3500, 0xffffffff, 0x142,
1181 	0x3504, 0xffffffff, 0x2000095a,
1182 	0x3504, 0xffffffff, 0x1,
1183 	0x3500, 0xffffffff, 0x144,
1184 	0x3504, 0xffffffff, 0x301f095b,
1185 	0x3500, 0xffffffff, 0x165,
1186 	0x3504, 0xffffffff, 0xc094d,
1187 	0x3500, 0xffffffff, 0x173,
1188 	0x3504, 0xffffffff, 0xf096d,
1189 	0x3500, 0xffffffff, 0x184,
1190 	0x3504, 0xffffffff, 0x15097f,
1191 	0x3500, 0xffffffff, 0x19b,
1192 	0x3504, 0xffffffff, 0xc0998,
1193 	0x3500, 0xffffffff, 0x1a9,
1194 	0x3504, 0xffffffff, 0x409a7,
1195 	0x3500, 0xffffffff, 0x1af,
1196 	0x3504, 0xffffffff, 0xcdc,
1197 	0x3500, 0xffffffff, 0x1b1,
1198 	0x3504, 0xffffffff, 0x800,
1199 	0x3508, 0xffffffff, 0x6c9b2000,
1200 	0x3510, 0xfc00, 0x2000,
1201 	0x3544, 0xffffffff, 0xfc0,
1202 	0x28d4, 0x00000100, 0x100
1203 };
1204 
1205 static void si_init_golden_registers(struct radeon_device *rdev)
1206 {
1207 	switch (rdev->family) {
1208 	case CHIP_TAHITI:
1209 		radeon_program_register_sequence(rdev,
1210 						 tahiti_golden_registers,
1211 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1212 		radeon_program_register_sequence(rdev,
1213 						 tahiti_golden_rlc_registers,
1214 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1215 		radeon_program_register_sequence(rdev,
1216 						 tahiti_mgcg_cgcg_init,
1217 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1218 		radeon_program_register_sequence(rdev,
1219 						 tahiti_golden_registers2,
1220 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1221 		break;
1222 	case CHIP_PITCAIRN:
1223 		radeon_program_register_sequence(rdev,
1224 						 pitcairn_golden_registers,
1225 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1226 		radeon_program_register_sequence(rdev,
1227 						 pitcairn_golden_rlc_registers,
1228 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1229 		radeon_program_register_sequence(rdev,
1230 						 pitcairn_mgcg_cgcg_init,
1231 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1232 		break;
1233 	case CHIP_VERDE:
1234 		radeon_program_register_sequence(rdev,
1235 						 verde_golden_registers,
1236 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1237 		radeon_program_register_sequence(rdev,
1238 						 verde_golden_rlc_registers,
1239 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1240 		radeon_program_register_sequence(rdev,
1241 						 verde_mgcg_cgcg_init,
1242 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1243 		radeon_program_register_sequence(rdev,
1244 						 verde_pg_init,
1245 						 (const u32)ARRAY_SIZE(verde_pg_init));
1246 		break;
1247 	case CHIP_OLAND:
1248 		radeon_program_register_sequence(rdev,
1249 						 oland_golden_registers,
1250 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1251 		radeon_program_register_sequence(rdev,
1252 						 oland_golden_rlc_registers,
1253 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1254 		radeon_program_register_sequence(rdev,
1255 						 oland_mgcg_cgcg_init,
1256 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1257 		break;
1258 	case CHIP_HAINAN:
1259 		radeon_program_register_sequence(rdev,
1260 						 hainan_golden_registers,
1261 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1262 		radeon_program_register_sequence(rdev,
1263 						 hainan_golden_registers2,
1264 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1265 		radeon_program_register_sequence(rdev,
1266 						 hainan_mgcg_cgcg_init,
1267 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1268 		break;
1269 	default:
1270 		break;
1271 	}
1272 }
1273 
1274 /**
1275  * si_get_allowed_info_register - fetch the register for the info ioctl
1276  *
1277  * @rdev: radeon_device pointer
1278  * @reg: register offset in bytes
1279  * @val: register value
1280  *
1281  * Returns 0 for success or -EINVAL for an invalid register
1282  *
1283  */
1284 int si_get_allowed_info_register(struct radeon_device *rdev,
1285 				 u32 reg, u32 *val)
1286 {
1287 	switch (reg) {
1288 	case GRBM_STATUS:
1289 	case GRBM_STATUS2:
1290 	case GRBM_STATUS_SE0:
1291 	case GRBM_STATUS_SE1:
1292 	case SRBM_STATUS:
1293 	case SRBM_STATUS2:
1294 	case (DMA_STATUS_REG + DMA0_REGISTER_OFFSET):
1295 	case (DMA_STATUS_REG + DMA1_REGISTER_OFFSET):
1296 	case UVD_STATUS:
1297 		*val = RREG32(reg);
1298 		return 0;
1299 	default:
1300 		return -EINVAL;
1301 	}
1302 }
1303 
1304 #define PCIE_BUS_CLK                10000
1305 #define TCLK                        (PCIE_BUS_CLK / 10)
1306 
1307 /**
1308  * si_get_xclk - get the xclk
1309  *
1310  * @rdev: radeon_device pointer
1311  *
1312  * Returns the reference clock used by the gfx engine
1313  * (SI).
1314  */
1315 u32 si_get_xclk(struct radeon_device *rdev)
1316 {
1317 	u32 reference_clock = rdev->clock.spll.reference_freq;
1318 	u32 tmp;
1319 
1320 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1321 	if (tmp & MUX_TCLK_TO_XCLK)
1322 		return TCLK;
1323 
1324 	tmp = RREG32(CG_CLKPIN_CNTL);
1325 	if (tmp & XTALIN_DIVIDE)
1326 		return reference_clock / 4;
1327 
1328 	return reference_clock;
1329 }
1330 
1331 /* get temperature in millidegrees */
1332 int si_get_temp(struct radeon_device *rdev)
1333 {
1334 	u32 temp;
1335 	int actual_temp = 0;
1336 
1337 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1338 		CTF_TEMP_SHIFT;
1339 
1340 	if (temp & 0x200)
1341 		actual_temp = 255;
1342 	else
1343 		actual_temp = temp & 0x1ff;
1344 
1345 	actual_temp = (actual_temp * 1000);
1346 
1347 	return actual_temp;
1348 }
1349 
1350 #define TAHITI_IO_MC_REGS_SIZE 36
1351 
1352 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1353 	{0x0000006f, 0x03044000},
1354 	{0x00000070, 0x0480c018},
1355 	{0x00000071, 0x00000040},
1356 	{0x00000072, 0x01000000},
1357 	{0x00000074, 0x000000ff},
1358 	{0x00000075, 0x00143400},
1359 	{0x00000076, 0x08ec0800},
1360 	{0x00000077, 0x040000cc},
1361 	{0x00000079, 0x00000000},
1362 	{0x0000007a, 0x21000409},
1363 	{0x0000007c, 0x00000000},
1364 	{0x0000007d, 0xe8000000},
1365 	{0x0000007e, 0x044408a8},
1366 	{0x0000007f, 0x00000003},
1367 	{0x00000080, 0x00000000},
1368 	{0x00000081, 0x01000000},
1369 	{0x00000082, 0x02000000},
1370 	{0x00000083, 0x00000000},
1371 	{0x00000084, 0xe3f3e4f4},
1372 	{0x00000085, 0x00052024},
1373 	{0x00000087, 0x00000000},
1374 	{0x00000088, 0x66036603},
1375 	{0x00000089, 0x01000000},
1376 	{0x0000008b, 0x1c0a0000},
1377 	{0x0000008c, 0xff010000},
1378 	{0x0000008e, 0xffffefff},
1379 	{0x0000008f, 0xfff3efff},
1380 	{0x00000090, 0xfff3efbf},
1381 	{0x00000094, 0x00101101},
1382 	{0x00000095, 0x00000fff},
1383 	{0x00000096, 0x00116fff},
1384 	{0x00000097, 0x60010000},
1385 	{0x00000098, 0x10010000},
1386 	{0x00000099, 0x00006000},
1387 	{0x0000009a, 0x00001000},
1388 	{0x0000009f, 0x00a77400}
1389 };
1390 
1391 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1392 	{0x0000006f, 0x03044000},
1393 	{0x00000070, 0x0480c018},
1394 	{0x00000071, 0x00000040},
1395 	{0x00000072, 0x01000000},
1396 	{0x00000074, 0x000000ff},
1397 	{0x00000075, 0x00143400},
1398 	{0x00000076, 0x08ec0800},
1399 	{0x00000077, 0x040000cc},
1400 	{0x00000079, 0x00000000},
1401 	{0x0000007a, 0x21000409},
1402 	{0x0000007c, 0x00000000},
1403 	{0x0000007d, 0xe8000000},
1404 	{0x0000007e, 0x044408a8},
1405 	{0x0000007f, 0x00000003},
1406 	{0x00000080, 0x00000000},
1407 	{0x00000081, 0x01000000},
1408 	{0x00000082, 0x02000000},
1409 	{0x00000083, 0x00000000},
1410 	{0x00000084, 0xe3f3e4f4},
1411 	{0x00000085, 0x00052024},
1412 	{0x00000087, 0x00000000},
1413 	{0x00000088, 0x66036603},
1414 	{0x00000089, 0x01000000},
1415 	{0x0000008b, 0x1c0a0000},
1416 	{0x0000008c, 0xff010000},
1417 	{0x0000008e, 0xffffefff},
1418 	{0x0000008f, 0xfff3efff},
1419 	{0x00000090, 0xfff3efbf},
1420 	{0x00000094, 0x00101101},
1421 	{0x00000095, 0x00000fff},
1422 	{0x00000096, 0x00116fff},
1423 	{0x00000097, 0x60010000},
1424 	{0x00000098, 0x10010000},
1425 	{0x00000099, 0x00006000},
1426 	{0x0000009a, 0x00001000},
1427 	{0x0000009f, 0x00a47400}
1428 };
1429 
1430 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1431 	{0x0000006f, 0x03044000},
1432 	{0x00000070, 0x0480c018},
1433 	{0x00000071, 0x00000040},
1434 	{0x00000072, 0x01000000},
1435 	{0x00000074, 0x000000ff},
1436 	{0x00000075, 0x00143400},
1437 	{0x00000076, 0x08ec0800},
1438 	{0x00000077, 0x040000cc},
1439 	{0x00000079, 0x00000000},
1440 	{0x0000007a, 0x21000409},
1441 	{0x0000007c, 0x00000000},
1442 	{0x0000007d, 0xe8000000},
1443 	{0x0000007e, 0x044408a8},
1444 	{0x0000007f, 0x00000003},
1445 	{0x00000080, 0x00000000},
1446 	{0x00000081, 0x01000000},
1447 	{0x00000082, 0x02000000},
1448 	{0x00000083, 0x00000000},
1449 	{0x00000084, 0xe3f3e4f4},
1450 	{0x00000085, 0x00052024},
1451 	{0x00000087, 0x00000000},
1452 	{0x00000088, 0x66036603},
1453 	{0x00000089, 0x01000000},
1454 	{0x0000008b, 0x1c0a0000},
1455 	{0x0000008c, 0xff010000},
1456 	{0x0000008e, 0xffffefff},
1457 	{0x0000008f, 0xfff3efff},
1458 	{0x00000090, 0xfff3efbf},
1459 	{0x00000094, 0x00101101},
1460 	{0x00000095, 0x00000fff},
1461 	{0x00000096, 0x00116fff},
1462 	{0x00000097, 0x60010000},
1463 	{0x00000098, 0x10010000},
1464 	{0x00000099, 0x00006000},
1465 	{0x0000009a, 0x00001000},
1466 	{0x0000009f, 0x00a37400}
1467 };
1468 
1469 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1470 	{0x0000006f, 0x03044000},
1471 	{0x00000070, 0x0480c018},
1472 	{0x00000071, 0x00000040},
1473 	{0x00000072, 0x01000000},
1474 	{0x00000074, 0x000000ff},
1475 	{0x00000075, 0x00143400},
1476 	{0x00000076, 0x08ec0800},
1477 	{0x00000077, 0x040000cc},
1478 	{0x00000079, 0x00000000},
1479 	{0x0000007a, 0x21000409},
1480 	{0x0000007c, 0x00000000},
1481 	{0x0000007d, 0xe8000000},
1482 	{0x0000007e, 0x044408a8},
1483 	{0x0000007f, 0x00000003},
1484 	{0x00000080, 0x00000000},
1485 	{0x00000081, 0x01000000},
1486 	{0x00000082, 0x02000000},
1487 	{0x00000083, 0x00000000},
1488 	{0x00000084, 0xe3f3e4f4},
1489 	{0x00000085, 0x00052024},
1490 	{0x00000087, 0x00000000},
1491 	{0x00000088, 0x66036603},
1492 	{0x00000089, 0x01000000},
1493 	{0x0000008b, 0x1c0a0000},
1494 	{0x0000008c, 0xff010000},
1495 	{0x0000008e, 0xffffefff},
1496 	{0x0000008f, 0xfff3efff},
1497 	{0x00000090, 0xfff3efbf},
1498 	{0x00000094, 0x00101101},
1499 	{0x00000095, 0x00000fff},
1500 	{0x00000096, 0x00116fff},
1501 	{0x00000097, 0x60010000},
1502 	{0x00000098, 0x10010000},
1503 	{0x00000099, 0x00006000},
1504 	{0x0000009a, 0x00001000},
1505 	{0x0000009f, 0x00a17730}
1506 };
1507 
1508 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1509 	{0x0000006f, 0x03044000},
1510 	{0x00000070, 0x0480c018},
1511 	{0x00000071, 0x00000040},
1512 	{0x00000072, 0x01000000},
1513 	{0x00000074, 0x000000ff},
1514 	{0x00000075, 0x00143400},
1515 	{0x00000076, 0x08ec0800},
1516 	{0x00000077, 0x040000cc},
1517 	{0x00000079, 0x00000000},
1518 	{0x0000007a, 0x21000409},
1519 	{0x0000007c, 0x00000000},
1520 	{0x0000007d, 0xe8000000},
1521 	{0x0000007e, 0x044408a8},
1522 	{0x0000007f, 0x00000003},
1523 	{0x00000080, 0x00000000},
1524 	{0x00000081, 0x01000000},
1525 	{0x00000082, 0x02000000},
1526 	{0x00000083, 0x00000000},
1527 	{0x00000084, 0xe3f3e4f4},
1528 	{0x00000085, 0x00052024},
1529 	{0x00000087, 0x00000000},
1530 	{0x00000088, 0x66036603},
1531 	{0x00000089, 0x01000000},
1532 	{0x0000008b, 0x1c0a0000},
1533 	{0x0000008c, 0xff010000},
1534 	{0x0000008e, 0xffffefff},
1535 	{0x0000008f, 0xfff3efff},
1536 	{0x00000090, 0xfff3efbf},
1537 	{0x00000094, 0x00101101},
1538 	{0x00000095, 0x00000fff},
1539 	{0x00000096, 0x00116fff},
1540 	{0x00000097, 0x60010000},
1541 	{0x00000098, 0x10010000},
1542 	{0x00000099, 0x00006000},
1543 	{0x0000009a, 0x00001000},
1544 	{0x0000009f, 0x00a07730}
1545 };
1546 
1547 /* ucode loading */
1548 int si_mc_load_microcode(struct radeon_device *rdev)
1549 {
1550 	const __be32 *fw_data = NULL;
1551 	const __le32 *new_fw_data = NULL;
1552 	u32 running;
1553 	u32 *io_mc_regs = NULL;
1554 	const __le32 *new_io_mc_regs = NULL;
1555 	int i, regs_size, ucode_size;
1556 
1557 	if (!rdev->mc_fw)
1558 		return -EINVAL;
1559 
1560 	if (rdev->new_fw) {
1561 		const struct mc_firmware_header_v1_0 *hdr =
1562 			(const struct mc_firmware_header_v1_0 *)rdev->mc_fw->data;
1563 
1564 		radeon_ucode_print_mc_hdr(&hdr->header);
1565 		regs_size = le32_to_cpu(hdr->io_debug_size_bytes) / (4 * 2);
1566 		new_io_mc_regs = (const __le32 *)
1567 			(rdev->mc_fw->data + le32_to_cpu(hdr->io_debug_array_offset_bytes));
1568 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
1569 		new_fw_data = (const __le32 *)
1570 			(rdev->mc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
1571 	} else {
1572 		ucode_size = rdev->mc_fw->size / 4;
1573 
1574 		switch (rdev->family) {
1575 		case CHIP_TAHITI:
1576 			io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1577 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1578 			break;
1579 		case CHIP_PITCAIRN:
1580 			io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1581 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1582 			break;
1583 		case CHIP_VERDE:
1584 		default:
1585 			io_mc_regs = (u32 *)&verde_io_mc_regs;
1586 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1587 			break;
1588 		case CHIP_OLAND:
1589 			io_mc_regs = (u32 *)&oland_io_mc_regs;
1590 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1591 			break;
1592 		case CHIP_HAINAN:
1593 			io_mc_regs = (u32 *)&hainan_io_mc_regs;
1594 			regs_size = TAHITI_IO_MC_REGS_SIZE;
1595 			break;
1596 		}
1597 		fw_data = (const __be32 *)rdev->mc_fw->data;
1598 	}
1599 
1600 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1601 
1602 	if (running == 0) {
1603 		/* reset the engine and set to writable */
1604 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1605 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1606 
1607 		/* load mc io regs */
1608 		for (i = 0; i < regs_size; i++) {
1609 			if (rdev->new_fw) {
1610 				WREG32(MC_SEQ_IO_DEBUG_INDEX, le32_to_cpup(new_io_mc_regs++));
1611 				WREG32(MC_SEQ_IO_DEBUG_DATA, le32_to_cpup(new_io_mc_regs++));
1612 			} else {
1613 				WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1614 				WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1615 			}
1616 		}
1617 		/* load the MC ucode */
1618 		for (i = 0; i < ucode_size; i++) {
1619 			if (rdev->new_fw)
1620 				WREG32(MC_SEQ_SUP_PGM, le32_to_cpup(new_fw_data++));
1621 			else
1622 				WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1623 		}
1624 
1625 		/* put the engine back into the active state */
1626 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1627 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1628 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1629 
1630 		/* wait for training to complete */
1631 		for (i = 0; i < rdev->usec_timeout; i++) {
1632 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1633 				break;
1634 			udelay(1);
1635 		}
1636 		for (i = 0; i < rdev->usec_timeout; i++) {
1637 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1638 				break;
1639 			udelay(1);
1640 		}
1641 	}
1642 
1643 	return 0;
1644 }
1645 
1646 static int si_init_microcode(struct radeon_device *rdev)
1647 {
1648 	const char *chip_name;
1649 	const char *new_chip_name;
1650 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1651 	size_t smc_req_size, mc2_req_size;
1652 	char fw_name[30];
1653 	int err;
1654 	int new_fw = 0;
1655 	bool new_smc = false;
1656 	bool si58_fw = false;
1657 	bool banks2_fw = false;
1658 
1659 	DRM_DEBUG("\n");
1660 
1661 	switch (rdev->family) {
1662 	case CHIP_TAHITI:
1663 		chip_name = "TAHITI";
1664 		new_chip_name = "tahiti";
1665 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1666 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1667 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1668 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1669 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1670 		mc2_req_size = TAHITI_MC_UCODE_SIZE * 4;
1671 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1672 		break;
1673 	case CHIP_PITCAIRN:
1674 		chip_name = "PITCAIRN";
1675 		if ((rdev->pdev->revision == 0x81) &&
1676 		    ((rdev->pdev->device == 0x6810) ||
1677 		     (rdev->pdev->device == 0x6811)))
1678 			new_smc = true;
1679 		new_chip_name = "pitcairn";
1680 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1681 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1682 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1683 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1684 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1685 		mc2_req_size = PITCAIRN_MC_UCODE_SIZE * 4;
1686 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1687 		break;
1688 	case CHIP_VERDE:
1689 		chip_name = "VERDE";
1690 		if (((rdev->pdev->device == 0x6820) &&
1691 		     ((rdev->pdev->revision == 0x81) ||
1692 		      (rdev->pdev->revision == 0x83))) ||
1693 		    ((rdev->pdev->device == 0x6821) &&
1694 		     ((rdev->pdev->revision == 0x83) ||
1695 		      (rdev->pdev->revision == 0x87))) ||
1696 		    ((rdev->pdev->revision == 0x87) &&
1697 		     ((rdev->pdev->device == 0x6823) ||
1698 		      (rdev->pdev->device == 0x682b))))
1699 			new_smc = true;
1700 		new_chip_name = "verde";
1701 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1702 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1703 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1704 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1705 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1706 		mc2_req_size = VERDE_MC_UCODE_SIZE * 4;
1707 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1708 		break;
1709 	case CHIP_OLAND:
1710 		chip_name = "OLAND";
1711 		if (((rdev->pdev->revision == 0x81) &&
1712 		     ((rdev->pdev->device == 0x6600) ||
1713 		      (rdev->pdev->device == 0x6604) ||
1714 		      (rdev->pdev->device == 0x6605) ||
1715 		      (rdev->pdev->device == 0x6610))) ||
1716 		    ((rdev->pdev->revision == 0x83) &&
1717 		     (rdev->pdev->device == 0x6610)))
1718 			new_smc = true;
1719 		new_chip_name = "oland";
1720 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1721 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1722 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1723 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1724 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1725 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1726 		break;
1727 	case CHIP_HAINAN:
1728 		chip_name = "HAINAN";
1729 		if (((rdev->pdev->revision == 0x81) &&
1730 		     (rdev->pdev->device == 0x6660)) ||
1731 		    ((rdev->pdev->revision == 0x83) &&
1732 		     ((rdev->pdev->device == 0x6660) ||
1733 		      (rdev->pdev->device == 0x6663) ||
1734 		      (rdev->pdev->device == 0x6665) ||
1735 		      (rdev->pdev->device == 0x6667))))
1736 			new_smc = true;
1737 		else if ((rdev->pdev->revision == 0xc3) &&
1738 			 (rdev->pdev->device == 0x6665))
1739 			banks2_fw = true;
1740 		new_chip_name = "hainan";
1741 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1742 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1743 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1744 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1745 		mc_req_size = mc2_req_size = OLAND_MC_UCODE_SIZE * 4;
1746 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1747 		break;
1748 	default: BUG();
1749 	}
1750 
1751 	/* this memory configuration requires special firmware */
1752 	if (((RREG32(MC_SEQ_MISC0) & 0xff000000) >> 24) == 0x58)
1753 		si58_fw = true;
1754 
1755 	DRM_INFO("Loading %s Microcode\n", new_chip_name);
1756 
1757 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", new_chip_name);
1758 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1759 	if (err) {
1760 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1761 		err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1762 		if (err)
1763 			goto out;
1764 		if (rdev->pfp_fw->size != pfp_req_size) {
1765 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1766 			       rdev->pfp_fw->size, fw_name);
1767 			err = -EINVAL;
1768 			goto out;
1769 		}
1770 	} else {
1771 		err = radeon_ucode_validate(rdev->pfp_fw);
1772 		if (err) {
1773 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1774 			       fw_name);
1775 			goto out;
1776 		} else {
1777 			new_fw++;
1778 		}
1779 	}
1780 
1781 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", new_chip_name);
1782 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1783 	if (err) {
1784 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1785 		err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1786 		if (err)
1787 			goto out;
1788 		if (rdev->me_fw->size != me_req_size) {
1789 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1790 			       rdev->me_fw->size, fw_name);
1791 			err = -EINVAL;
1792 		}
1793 	} else {
1794 		err = radeon_ucode_validate(rdev->me_fw);
1795 		if (err) {
1796 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1797 			       fw_name);
1798 			goto out;
1799 		} else {
1800 			new_fw++;
1801 		}
1802 	}
1803 
1804 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", new_chip_name);
1805 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1806 	if (err) {
1807 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1808 		err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1809 		if (err)
1810 			goto out;
1811 		if (rdev->ce_fw->size != ce_req_size) {
1812 			pr_err("si_cp: Bogus length %zu in firmware \"%s\"\n",
1813 			       rdev->ce_fw->size, fw_name);
1814 			err = -EINVAL;
1815 		}
1816 	} else {
1817 		err = radeon_ucode_validate(rdev->ce_fw);
1818 		if (err) {
1819 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1820 			       fw_name);
1821 			goto out;
1822 		} else {
1823 			new_fw++;
1824 		}
1825 	}
1826 
1827 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", new_chip_name);
1828 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1829 	if (err) {
1830 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
1831 		err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1832 		if (err)
1833 			goto out;
1834 		if (rdev->rlc_fw->size != rlc_req_size) {
1835 			pr_err("si_rlc: Bogus length %zu in firmware \"%s\"\n",
1836 			       rdev->rlc_fw->size, fw_name);
1837 			err = -EINVAL;
1838 		}
1839 	} else {
1840 		err = radeon_ucode_validate(rdev->rlc_fw);
1841 		if (err) {
1842 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1843 			       fw_name);
1844 			goto out;
1845 		} else {
1846 			new_fw++;
1847 		}
1848 	}
1849 
1850 	if (si58_fw)
1851 		snprintf(fw_name, sizeof(fw_name), "radeon/si58_mc.bin");
1852 	else
1853 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", new_chip_name);
1854 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1855 	if (err) {
1856 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc2.bin", chip_name);
1857 		err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1858 		if (err) {
1859 			snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1860 			err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1861 			if (err)
1862 				goto out;
1863 		}
1864 		if ((rdev->mc_fw->size != mc_req_size) &&
1865 		    (rdev->mc_fw->size != mc2_req_size)) {
1866 			pr_err("si_mc: Bogus length %zu in firmware \"%s\"\n",
1867 			       rdev->mc_fw->size, fw_name);
1868 			err = -EINVAL;
1869 		}
1870 		DRM_INFO("%s: %zu bytes\n", fw_name, rdev->mc_fw->size);
1871 	} else {
1872 		err = radeon_ucode_validate(rdev->mc_fw);
1873 		if (err) {
1874 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1875 			       fw_name);
1876 			goto out;
1877 		} else {
1878 			new_fw++;
1879 		}
1880 	}
1881 
1882 	if (banks2_fw)
1883 		snprintf(fw_name, sizeof(fw_name), "radeon/banks_k_2_smc.bin");
1884 	else if (new_smc)
1885 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_k_smc.bin", new_chip_name);
1886 	else
1887 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", new_chip_name);
1888 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1889 	if (err) {
1890 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1891 		err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1892 		if (err) {
1893 			pr_err("smc: error loading firmware \"%s\"\n", fw_name);
1894 			release_firmware(rdev->smc_fw);
1895 			rdev->smc_fw = NULL;
1896 			err = 0;
1897 		} else if (rdev->smc_fw->size != smc_req_size) {
1898 			pr_err("si_smc: Bogus length %zu in firmware \"%s\"\n",
1899 			       rdev->smc_fw->size, fw_name);
1900 			err = -EINVAL;
1901 		}
1902 	} else {
1903 		err = radeon_ucode_validate(rdev->smc_fw);
1904 		if (err) {
1905 			pr_err("si_cp: validation failed for firmware \"%s\"\n",
1906 			       fw_name);
1907 			goto out;
1908 		} else {
1909 			new_fw++;
1910 		}
1911 	}
1912 
1913 	if (new_fw == 0) {
1914 		rdev->new_fw = false;
1915 	} else if (new_fw < 6) {
1916 		pr_err("si_fw: mixing new and old firmware!\n");
1917 		err = -EINVAL;
1918 	} else {
1919 		rdev->new_fw = true;
1920 	}
1921 out:
1922 	if (err) {
1923 		if (err != -EINVAL)
1924 			pr_err("si_cp: Failed to load firmware \"%s\"\n",
1925 			       fw_name);
1926 		release_firmware(rdev->pfp_fw);
1927 		rdev->pfp_fw = NULL;
1928 		release_firmware(rdev->me_fw);
1929 		rdev->me_fw = NULL;
1930 		release_firmware(rdev->ce_fw);
1931 		rdev->ce_fw = NULL;
1932 		release_firmware(rdev->rlc_fw);
1933 		rdev->rlc_fw = NULL;
1934 		release_firmware(rdev->mc_fw);
1935 		rdev->mc_fw = NULL;
1936 		release_firmware(rdev->smc_fw);
1937 		rdev->smc_fw = NULL;
1938 	}
1939 	return err;
1940 }
1941 
1942 /* watermark setup */
1943 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1944 				   struct radeon_crtc *radeon_crtc,
1945 				   struct drm_display_mode *mode,
1946 				   struct drm_display_mode *other_mode)
1947 {
1948 	u32 tmp, buffer_alloc, i;
1949 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1950 	/*
1951 	 * Line Buffer Setup
1952 	 * There are 3 line buffers, each one shared by 2 display controllers.
1953 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1954 	 * the display controllers.  The paritioning is done via one of four
1955 	 * preset allocations specified in bits 21:20:
1956 	 *  0 - half lb
1957 	 *  2 - whole lb, other crtc must be disabled
1958 	 */
1959 	/* this can get tricky if we have two large displays on a paired group
1960 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1961 	 * non-linked crtcs for maximum line buffer allocation.
1962 	 */
1963 	if (radeon_crtc->base.enabled && mode) {
1964 		if (other_mode) {
1965 			tmp = 0; /* 1/2 */
1966 			buffer_alloc = 1;
1967 		} else {
1968 			tmp = 2; /* whole */
1969 			buffer_alloc = 2;
1970 		}
1971 	} else {
1972 		tmp = 0;
1973 		buffer_alloc = 0;
1974 	}
1975 
1976 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1977 	       DC_LB_MEMORY_CONFIG(tmp));
1978 
1979 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1980 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1981 	for (i = 0; i < rdev->usec_timeout; i++) {
1982 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1983 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1984 			break;
1985 		udelay(1);
1986 	}
1987 
1988 	if (radeon_crtc->base.enabled && mode) {
1989 		switch (tmp) {
1990 		case 0:
1991 		default:
1992 			return 4096 * 2;
1993 		case 2:
1994 			return 8192 * 2;
1995 		}
1996 	}
1997 
1998 	/* controller not enabled, so no lb used */
1999 	return 0;
2000 }
2001 
2002 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
2003 {
2004 	u32 tmp = RREG32(MC_SHARED_CHMAP);
2005 
2006 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
2007 	case 0:
2008 	default:
2009 		return 1;
2010 	case 1:
2011 		return 2;
2012 	case 2:
2013 		return 4;
2014 	case 3:
2015 		return 8;
2016 	case 4:
2017 		return 3;
2018 	case 5:
2019 		return 6;
2020 	case 6:
2021 		return 10;
2022 	case 7:
2023 		return 12;
2024 	case 8:
2025 		return 16;
2026 	}
2027 }
2028 
2029 struct dce6_wm_params {
2030 	u32 dram_channels; /* number of dram channels */
2031 	u32 yclk;          /* bandwidth per dram data pin in kHz */
2032 	u32 sclk;          /* engine clock in kHz */
2033 	u32 disp_clk;      /* display clock in kHz */
2034 	u32 src_width;     /* viewport width */
2035 	u32 active_time;   /* active display time in ns */
2036 	u32 blank_time;    /* blank time in ns */
2037 	bool interlaced;    /* mode is interlaced */
2038 	fixed20_12 vsc;    /* vertical scale ratio */
2039 	u32 num_heads;     /* number of active crtcs */
2040 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
2041 	u32 lb_size;       /* line buffer allocated to pipe */
2042 	u32 vtaps;         /* vertical scaler taps */
2043 };
2044 
2045 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
2046 {
2047 	/* Calculate raw DRAM Bandwidth */
2048 	fixed20_12 dram_efficiency; /* 0.7 */
2049 	fixed20_12 yclk, dram_channels, bandwidth;
2050 	fixed20_12 a;
2051 
2052 	a.full = dfixed_const(1000);
2053 	yclk.full = dfixed_const(wm->yclk);
2054 	yclk.full = dfixed_div(yclk, a);
2055 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2056 	a.full = dfixed_const(10);
2057 	dram_efficiency.full = dfixed_const(7);
2058 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
2059 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2060 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
2061 
2062 	return dfixed_trunc(bandwidth);
2063 }
2064 
2065 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2066 {
2067 	/* Calculate DRAM Bandwidth and the part allocated to display. */
2068 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
2069 	fixed20_12 yclk, dram_channels, bandwidth;
2070 	fixed20_12 a;
2071 
2072 	a.full = dfixed_const(1000);
2073 	yclk.full = dfixed_const(wm->yclk);
2074 	yclk.full = dfixed_div(yclk, a);
2075 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
2076 	a.full = dfixed_const(10);
2077 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
2078 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
2079 	bandwidth.full = dfixed_mul(dram_channels, yclk);
2080 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
2081 
2082 	return dfixed_trunc(bandwidth);
2083 }
2084 
2085 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
2086 {
2087 	/* Calculate the display Data return Bandwidth */
2088 	fixed20_12 return_efficiency; /* 0.8 */
2089 	fixed20_12 sclk, bandwidth;
2090 	fixed20_12 a;
2091 
2092 	a.full = dfixed_const(1000);
2093 	sclk.full = dfixed_const(wm->sclk);
2094 	sclk.full = dfixed_div(sclk, a);
2095 	a.full = dfixed_const(10);
2096 	return_efficiency.full = dfixed_const(8);
2097 	return_efficiency.full = dfixed_div(return_efficiency, a);
2098 	a.full = dfixed_const(32);
2099 	bandwidth.full = dfixed_mul(a, sclk);
2100 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
2101 
2102 	return dfixed_trunc(bandwidth);
2103 }
2104 
2105 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
2106 {
2107 	return 32;
2108 }
2109 
2110 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
2111 {
2112 	/* Calculate the DMIF Request Bandwidth */
2113 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
2114 	fixed20_12 disp_clk, sclk, bandwidth;
2115 	fixed20_12 a, b1, b2;
2116 	u32 min_bandwidth;
2117 
2118 	a.full = dfixed_const(1000);
2119 	disp_clk.full = dfixed_const(wm->disp_clk);
2120 	disp_clk.full = dfixed_div(disp_clk, a);
2121 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
2122 	b1.full = dfixed_mul(a, disp_clk);
2123 
2124 	a.full = dfixed_const(1000);
2125 	sclk.full = dfixed_const(wm->sclk);
2126 	sclk.full = dfixed_div(sclk, a);
2127 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
2128 	b2.full = dfixed_mul(a, sclk);
2129 
2130 	a.full = dfixed_const(10);
2131 	disp_clk_request_efficiency.full = dfixed_const(8);
2132 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
2133 
2134 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
2135 
2136 	a.full = dfixed_const(min_bandwidth);
2137 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
2138 
2139 	return dfixed_trunc(bandwidth);
2140 }
2141 
2142 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
2143 {
2144 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
2145 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
2146 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
2147 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
2148 
2149 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
2150 }
2151 
2152 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
2153 {
2154 	/* Calculate the display mode Average Bandwidth
2155 	 * DisplayMode should contain the source and destination dimensions,
2156 	 * timing, etc.
2157 	 */
2158 	fixed20_12 bpp;
2159 	fixed20_12 line_time;
2160 	fixed20_12 src_width;
2161 	fixed20_12 bandwidth;
2162 	fixed20_12 a;
2163 
2164 	a.full = dfixed_const(1000);
2165 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
2166 	line_time.full = dfixed_div(line_time, a);
2167 	bpp.full = dfixed_const(wm->bytes_per_pixel);
2168 	src_width.full = dfixed_const(wm->src_width);
2169 	bandwidth.full = dfixed_mul(src_width, bpp);
2170 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
2171 	bandwidth.full = dfixed_div(bandwidth, line_time);
2172 
2173 	return dfixed_trunc(bandwidth);
2174 }
2175 
2176 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
2177 {
2178 	/* First calcualte the latency in ns */
2179 	u32 mc_latency = 2000; /* 2000 ns. */
2180 	u32 available_bandwidth = dce6_available_bandwidth(wm);
2181 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
2182 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
2183 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
2184 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
2185 		(wm->num_heads * cursor_line_pair_return_time);
2186 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
2187 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
2188 	u32 tmp, dmif_size = 12288;
2189 	fixed20_12 a, b, c;
2190 
2191 	if (wm->num_heads == 0)
2192 		return 0;
2193 
2194 	a.full = dfixed_const(2);
2195 	b.full = dfixed_const(1);
2196 	if ((wm->vsc.full > a.full) ||
2197 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
2198 	    (wm->vtaps >= 5) ||
2199 	    ((wm->vsc.full >= a.full) && wm->interlaced))
2200 		max_src_lines_per_dst_line = 4;
2201 	else
2202 		max_src_lines_per_dst_line = 2;
2203 
2204 	a.full = dfixed_const(available_bandwidth);
2205 	b.full = dfixed_const(wm->num_heads);
2206 	a.full = dfixed_div(a, b);
2207 	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
2208 	tmp = min(dfixed_trunc(a), tmp);
2209 
2210 	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
2211 
2212 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
2213 	b.full = dfixed_const(1000);
2214 	c.full = dfixed_const(lb_fill_bw);
2215 	b.full = dfixed_div(c, b);
2216 	a.full = dfixed_div(a, b);
2217 	line_fill_time = dfixed_trunc(a);
2218 
2219 	if (line_fill_time < wm->active_time)
2220 		return latency;
2221 	else
2222 		return latency + (line_fill_time - wm->active_time);
2223 
2224 }
2225 
2226 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2227 {
2228 	if (dce6_average_bandwidth(wm) <=
2229 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2230 		return true;
2231 	else
2232 		return false;
2233 };
2234 
2235 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2236 {
2237 	if (dce6_average_bandwidth(wm) <=
2238 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2239 		return true;
2240 	else
2241 		return false;
2242 };
2243 
2244 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2245 {
2246 	u32 lb_partitions = wm->lb_size / wm->src_width;
2247 	u32 line_time = wm->active_time + wm->blank_time;
2248 	u32 latency_tolerant_lines;
2249 	u32 latency_hiding;
2250 	fixed20_12 a;
2251 
2252 	a.full = dfixed_const(1);
2253 	if (wm->vsc.full > a.full)
2254 		latency_tolerant_lines = 1;
2255 	else {
2256 		if (lb_partitions <= (wm->vtaps + 1))
2257 			latency_tolerant_lines = 1;
2258 		else
2259 			latency_tolerant_lines = 2;
2260 	}
2261 
2262 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2263 
2264 	if (dce6_latency_watermark(wm) <= latency_hiding)
2265 		return true;
2266 	else
2267 		return false;
2268 }
2269 
2270 static void dce6_program_watermarks(struct radeon_device *rdev,
2271 					 struct radeon_crtc *radeon_crtc,
2272 					 u32 lb_size, u32 num_heads)
2273 {
2274 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2275 	struct dce6_wm_params wm_low, wm_high;
2276 	u32 dram_channels;
2277 	u32 active_time;
2278 	u32 line_time = 0;
2279 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2280 	u32 priority_a_mark = 0, priority_b_mark = 0;
2281 	u32 priority_a_cnt = PRIORITY_OFF;
2282 	u32 priority_b_cnt = PRIORITY_OFF;
2283 	u32 tmp, arb_control3;
2284 	fixed20_12 a, b, c;
2285 
2286 	if (radeon_crtc->base.enabled && num_heads && mode) {
2287 		active_time = (u32) div_u64((u64)mode->crtc_hdisplay * 1000000,
2288 					    (u32)mode->clock);
2289 		line_time = (u32) div_u64((u64)mode->crtc_htotal * 1000000,
2290 					  (u32)mode->clock);
2291 		line_time = min(line_time, (u32)65535);
2292 		priority_a_cnt = 0;
2293 		priority_b_cnt = 0;
2294 
2295 		if (rdev->family == CHIP_ARUBA)
2296 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2297 		else
2298 			dram_channels = si_get_number_of_dram_channels(rdev);
2299 
2300 		/* watermark for high clocks */
2301 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2302 			wm_high.yclk =
2303 				radeon_dpm_get_mclk(rdev, false) * 10;
2304 			wm_high.sclk =
2305 				radeon_dpm_get_sclk(rdev, false) * 10;
2306 		} else {
2307 			wm_high.yclk = rdev->pm.current_mclk * 10;
2308 			wm_high.sclk = rdev->pm.current_sclk * 10;
2309 		}
2310 
2311 		wm_high.disp_clk = mode->clock;
2312 		wm_high.src_width = mode->crtc_hdisplay;
2313 		wm_high.active_time = active_time;
2314 		wm_high.blank_time = line_time - wm_high.active_time;
2315 		wm_high.interlaced = false;
2316 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2317 			wm_high.interlaced = true;
2318 		wm_high.vsc = radeon_crtc->vsc;
2319 		wm_high.vtaps = 1;
2320 		if (radeon_crtc->rmx_type != RMX_OFF)
2321 			wm_high.vtaps = 2;
2322 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2323 		wm_high.lb_size = lb_size;
2324 		wm_high.dram_channels = dram_channels;
2325 		wm_high.num_heads = num_heads;
2326 
2327 		/* watermark for low clocks */
2328 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2329 			wm_low.yclk =
2330 				radeon_dpm_get_mclk(rdev, true) * 10;
2331 			wm_low.sclk =
2332 				radeon_dpm_get_sclk(rdev, true) * 10;
2333 		} else {
2334 			wm_low.yclk = rdev->pm.current_mclk * 10;
2335 			wm_low.sclk = rdev->pm.current_sclk * 10;
2336 		}
2337 
2338 		wm_low.disp_clk = mode->clock;
2339 		wm_low.src_width = mode->crtc_hdisplay;
2340 		wm_low.active_time = active_time;
2341 		wm_low.blank_time = line_time - wm_low.active_time;
2342 		wm_low.interlaced = false;
2343 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2344 			wm_low.interlaced = true;
2345 		wm_low.vsc = radeon_crtc->vsc;
2346 		wm_low.vtaps = 1;
2347 		if (radeon_crtc->rmx_type != RMX_OFF)
2348 			wm_low.vtaps = 2;
2349 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2350 		wm_low.lb_size = lb_size;
2351 		wm_low.dram_channels = dram_channels;
2352 		wm_low.num_heads = num_heads;
2353 
2354 		/* set for high clocks */
2355 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2356 		/* set for low clocks */
2357 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2358 
2359 		/* possibly force display priority to high */
2360 		/* should really do this at mode validation time... */
2361 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2362 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2363 		    !dce6_check_latency_hiding(&wm_high) ||
2364 		    (rdev->disp_priority == 2)) {
2365 			DRM_DEBUG_KMS("force priority to high\n");
2366 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2367 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2368 		}
2369 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2370 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2371 		    !dce6_check_latency_hiding(&wm_low) ||
2372 		    (rdev->disp_priority == 2)) {
2373 			DRM_DEBUG_KMS("force priority to high\n");
2374 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2375 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2376 		}
2377 
2378 		a.full = dfixed_const(1000);
2379 		b.full = dfixed_const(mode->clock);
2380 		b.full = dfixed_div(b, a);
2381 		c.full = dfixed_const(latency_watermark_a);
2382 		c.full = dfixed_mul(c, b);
2383 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2384 		c.full = dfixed_div(c, a);
2385 		a.full = dfixed_const(16);
2386 		c.full = dfixed_div(c, a);
2387 		priority_a_mark = dfixed_trunc(c);
2388 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2389 
2390 		a.full = dfixed_const(1000);
2391 		b.full = dfixed_const(mode->clock);
2392 		b.full = dfixed_div(b, a);
2393 		c.full = dfixed_const(latency_watermark_b);
2394 		c.full = dfixed_mul(c, b);
2395 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2396 		c.full = dfixed_div(c, a);
2397 		a.full = dfixed_const(16);
2398 		c.full = dfixed_div(c, a);
2399 		priority_b_mark = dfixed_trunc(c);
2400 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2401 
2402 		/* Save number of lines the linebuffer leads before the scanout */
2403 		radeon_crtc->lb_vblank_lead_lines = DIV_ROUND_UP(lb_size, mode->crtc_hdisplay);
2404 	}
2405 
2406 	/* select wm A */
2407 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2408 	tmp = arb_control3;
2409 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2410 	tmp |= LATENCY_WATERMARK_MASK(1);
2411 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2412 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2413 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2414 		LATENCY_HIGH_WATERMARK(line_time)));
2415 	/* select wm B */
2416 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2417 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2418 	tmp |= LATENCY_WATERMARK_MASK(2);
2419 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2420 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2421 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2422 		LATENCY_HIGH_WATERMARK(line_time)));
2423 	/* restore original selection */
2424 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2425 
2426 	/* write the priority marks */
2427 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2428 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2429 
2430 	/* save values for DPM */
2431 	radeon_crtc->line_time = line_time;
2432 	radeon_crtc->wm_high = latency_watermark_a;
2433 	radeon_crtc->wm_low = latency_watermark_b;
2434 }
2435 
2436 void dce6_bandwidth_update(struct radeon_device *rdev)
2437 {
2438 	struct drm_display_mode *mode0 = NULL;
2439 	struct drm_display_mode *mode1 = NULL;
2440 	u32 num_heads = 0, lb_size;
2441 	int i;
2442 
2443 	if (!rdev->mode_info.mode_config_initialized)
2444 		return;
2445 
2446 	radeon_update_display_priority(rdev);
2447 
2448 	for (i = 0; i < rdev->num_crtc; i++) {
2449 		if (rdev->mode_info.crtcs[i]->base.enabled)
2450 			num_heads++;
2451 	}
2452 	for (i = 0; i < rdev->num_crtc; i += 2) {
2453 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2454 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2455 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2456 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2457 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2458 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2459 	}
2460 }
2461 
2462 /*
2463  * Core functions
2464  */
2465 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2466 {
2467 	u32 *tile = rdev->config.si.tile_mode_array;
2468 	const u32 num_tile_mode_states =
2469 			ARRAY_SIZE(rdev->config.si.tile_mode_array);
2470 	u32 reg_offset, split_equal_to_row_size;
2471 
2472 	switch (rdev->config.si.mem_row_size_in_kb) {
2473 	case 1:
2474 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2475 		break;
2476 	case 2:
2477 	default:
2478 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2479 		break;
2480 	case 4:
2481 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2482 		break;
2483 	}
2484 
2485 	for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2486 		tile[reg_offset] = 0;
2487 
2488 	switch(rdev->family) {
2489 	case CHIP_TAHITI:
2490 	case CHIP_PITCAIRN:
2491 		/* non-AA compressed depth or any compressed stencil */
2492 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2493 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2494 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2495 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2496 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2497 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2498 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2499 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2500 		/* 2xAA/4xAA compressed depth only */
2501 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2502 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2503 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2504 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2505 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2506 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2507 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2508 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2509 		/* 8xAA compressed depth only */
2510 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2512 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2513 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2514 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2515 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2517 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2518 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2519 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2520 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2521 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2522 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2523 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2524 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2525 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2526 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2527 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2528 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2529 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2530 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2531 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2532 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2533 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2534 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2535 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2536 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2537 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2538 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2539 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2540 			   TILE_SPLIT(split_equal_to_row_size) |
2541 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2542 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2543 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2544 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2545 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2546 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2547 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2548 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2549 			   TILE_SPLIT(split_equal_to_row_size) |
2550 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2551 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2552 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2553 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2554 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2555 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2556 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2557 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2558 			   TILE_SPLIT(split_equal_to_row_size) |
2559 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2560 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2561 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2562 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2563 		/* 1D and 1D Array Surfaces */
2564 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2565 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2566 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2567 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2568 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2569 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2570 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2571 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2572 		/* Displayable maps. */
2573 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2574 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2575 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2576 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2577 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2578 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2579 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2580 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2581 		/* Display 8bpp. */
2582 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2583 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2584 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2585 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2586 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2587 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2588 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2589 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2590 		/* Display 16bpp. */
2591 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2592 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2593 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2594 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2595 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2596 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2597 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2598 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2599 		/* Display 32bpp. */
2600 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2601 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2603 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2604 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2605 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2607 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2608 		/* Thin. */
2609 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2610 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2611 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2612 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2613 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2614 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2615 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2616 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2617 		/* Thin 8 bpp. */
2618 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2619 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2620 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2621 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2622 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2623 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2624 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2625 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2626 		/* Thin 16 bpp. */
2627 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2628 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2629 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2630 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2631 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2632 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2633 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2634 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2635 		/* Thin 32 bpp. */
2636 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2637 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2638 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2639 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2640 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2641 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2642 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2643 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2644 		/* Thin 64 bpp. */
2645 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2646 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2647 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2648 			   TILE_SPLIT(split_equal_to_row_size) |
2649 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2650 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2651 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2652 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2653 		/* 8 bpp PRT. */
2654 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2655 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2656 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2657 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2658 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2659 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2660 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2661 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2662 		/* 16 bpp PRT */
2663 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2664 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2665 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2666 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2667 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2668 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2669 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2670 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2671 		/* 32 bpp PRT */
2672 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2673 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2674 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2675 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2676 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2677 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2678 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2679 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2680 		/* 64 bpp PRT */
2681 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2682 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2683 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2684 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2685 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2686 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2687 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2688 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2689 		/* 128 bpp PRT */
2690 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2691 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2692 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2693 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2694 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2695 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2696 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2697 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2698 
2699 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2700 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2701 		break;
2702 
2703 	case CHIP_VERDE:
2704 	case CHIP_OLAND:
2705 	case CHIP_HAINAN:
2706 		/* non-AA compressed depth or any compressed stencil */
2707 		tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2708 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2709 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2710 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2711 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2712 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2713 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2714 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2715 		/* 2xAA/4xAA compressed depth only */
2716 		tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2717 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2718 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2719 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2720 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2721 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2722 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2723 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2724 		/* 8xAA compressed depth only */
2725 		tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2726 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2727 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2728 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2729 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2730 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2731 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2732 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2733 		/* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2734 		tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2735 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2736 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2737 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2738 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2739 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2740 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2741 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2742 		/* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2743 		tile[4] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2744 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2745 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2746 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2747 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2748 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2749 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2750 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2751 		/* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2752 		tile[5] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2753 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2754 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2755 			   TILE_SPLIT(split_equal_to_row_size) |
2756 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2757 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2758 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2759 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2760 		/* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2761 		tile[6] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2762 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2763 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2764 			   TILE_SPLIT(split_equal_to_row_size) |
2765 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2766 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2767 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2768 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2769 		/* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2770 		tile[7] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2771 			   MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2772 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2773 			   TILE_SPLIT(split_equal_to_row_size) |
2774 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2775 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2776 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2777 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2778 		/* 1D and 1D Array Surfaces */
2779 		tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2780 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2781 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2782 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2783 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2784 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2785 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2786 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2787 		/* Displayable maps. */
2788 		tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2789 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2790 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2791 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2792 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2793 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2794 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2795 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2796 		/* Display 8bpp. */
2797 		tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2798 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2799 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2800 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2801 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2802 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2803 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2804 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2805 		/* Display 16bpp. */
2806 		tile[11] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2807 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2808 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2809 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2810 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2811 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2812 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2813 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2814 		/* Display 32bpp. */
2815 		tile[12] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2816 			   MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2817 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2818 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2819 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2820 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2821 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2822 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2823 		/* Thin. */
2824 		tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2825 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2826 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2827 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2828 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2829 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2830 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2831 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2832 		/* Thin 8 bpp. */
2833 		tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2834 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2835 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2836 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2837 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2838 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2839 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2840 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2841 		/* Thin 16 bpp. */
2842 		tile[15] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2843 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2844 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2845 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2846 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2847 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2848 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2849 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2850 		/* Thin 32 bpp. */
2851 		tile[16] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2852 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2853 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2854 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2855 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2856 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2857 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2858 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2859 		/* Thin 64 bpp. */
2860 		tile[17] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2861 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2862 			   PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2863 			   TILE_SPLIT(split_equal_to_row_size) |
2864 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2865 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2866 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2867 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2868 		/* 8 bpp PRT. */
2869 		tile[21] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2870 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2871 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2872 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2873 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2874 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2875 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2876 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2877 		/* 16 bpp PRT */
2878 		tile[22] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2879 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2880 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2881 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2882 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2883 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2884 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2885 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2886 		/* 32 bpp PRT */
2887 		tile[23] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2888 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2889 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2890 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2891 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2892 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2893 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2894 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2895 		/* 64 bpp PRT */
2896 		tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2897 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2898 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2899 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2900 			   NUM_BANKS(ADDR_SURF_16_BANK) |
2901 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2902 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2903 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2904 		/* 128 bpp PRT */
2905 		tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2906 			   MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2907 			   PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2908 			   TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2909 			   NUM_BANKS(ADDR_SURF_8_BANK) |
2910 			   BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2911 			   BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2912 			   MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2913 
2914 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
2915 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), tile[reg_offset]);
2916 		break;
2917 
2918 	default:
2919 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2920 	}
2921 }
2922 
2923 static void si_select_se_sh(struct radeon_device *rdev,
2924 			    u32 se_num, u32 sh_num)
2925 {
2926 	u32 data = INSTANCE_BROADCAST_WRITES;
2927 
2928 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2929 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2930 	else if (se_num == 0xffffffff)
2931 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2932 	else if (sh_num == 0xffffffff)
2933 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2934 	else
2935 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2936 	WREG32(GRBM_GFX_INDEX, data);
2937 }
2938 
2939 static u32 si_create_bitmask(u32 bit_width)
2940 {
2941 	u32 i, mask = 0;
2942 
2943 	for (i = 0; i < bit_width; i++) {
2944 		mask <<= 1;
2945 		mask |= 1;
2946 	}
2947 	return mask;
2948 }
2949 
2950 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2951 {
2952 	u32 data, mask;
2953 
2954 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2955 	if (data & 1)
2956 		data &= INACTIVE_CUS_MASK;
2957 	else
2958 		data = 0;
2959 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2960 
2961 	data >>= INACTIVE_CUS_SHIFT;
2962 
2963 	mask = si_create_bitmask(cu_per_sh);
2964 
2965 	return ~data & mask;
2966 }
2967 
2968 static void si_setup_spi(struct radeon_device *rdev,
2969 			 u32 se_num, u32 sh_per_se,
2970 			 u32 cu_per_sh)
2971 {
2972 	int i, j, k;
2973 	u32 data, mask, active_cu;
2974 
2975 	for (i = 0; i < se_num; i++) {
2976 		for (j = 0; j < sh_per_se; j++) {
2977 			si_select_se_sh(rdev, i, j);
2978 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2979 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2980 
2981 			mask = 1;
2982 			for (k = 0; k < 16; k++) {
2983 				mask <<= k;
2984 				if (active_cu & mask) {
2985 					data &= ~mask;
2986 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2987 					break;
2988 				}
2989 			}
2990 		}
2991 	}
2992 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2993 }
2994 
2995 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2996 			      u32 max_rb_num_per_se,
2997 			      u32 sh_per_se)
2998 {
2999 	u32 data, mask;
3000 
3001 	data = RREG32(CC_RB_BACKEND_DISABLE);
3002 	if (data & 1)
3003 		data &= BACKEND_DISABLE_MASK;
3004 	else
3005 		data = 0;
3006 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
3007 
3008 	data >>= BACKEND_DISABLE_SHIFT;
3009 
3010 	mask = si_create_bitmask(max_rb_num_per_se / sh_per_se);
3011 
3012 	return data & mask;
3013 }
3014 
3015 static void si_setup_rb(struct radeon_device *rdev,
3016 			u32 se_num, u32 sh_per_se,
3017 			u32 max_rb_num_per_se)
3018 {
3019 	int i, j;
3020 	u32 data, mask;
3021 	u32 disabled_rbs = 0;
3022 	u32 enabled_rbs = 0;
3023 
3024 	for (i = 0; i < se_num; i++) {
3025 		for (j = 0; j < sh_per_se; j++) {
3026 			si_select_se_sh(rdev, i, j);
3027 			data = si_get_rb_disabled(rdev, max_rb_num_per_se, sh_per_se);
3028 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
3029 		}
3030 	}
3031 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3032 
3033 	mask = 1;
3034 	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
3035 		if (!(disabled_rbs & mask))
3036 			enabled_rbs |= mask;
3037 		mask <<= 1;
3038 	}
3039 
3040 	rdev->config.si.backend_enable_mask = enabled_rbs;
3041 
3042 	for (i = 0; i < se_num; i++) {
3043 		si_select_se_sh(rdev, i, 0xffffffff);
3044 		data = 0;
3045 		for (j = 0; j < sh_per_se; j++) {
3046 			switch (enabled_rbs & 3) {
3047 			case 1:
3048 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
3049 				break;
3050 			case 2:
3051 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
3052 				break;
3053 			case 3:
3054 			default:
3055 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
3056 				break;
3057 			}
3058 			enabled_rbs >>= 2;
3059 		}
3060 		WREG32(PA_SC_RASTER_CONFIG, data);
3061 	}
3062 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
3063 }
3064 
3065 static void si_gpu_init(struct radeon_device *rdev)
3066 {
3067 	u32 gb_addr_config = 0;
3068 	u32 mc_shared_chmap, mc_arb_ramcfg;
3069 	u32 sx_debug_1;
3070 	u32 hdp_host_path_cntl;
3071 	u32 tmp;
3072 	int i, j;
3073 
3074 	switch (rdev->family) {
3075 	case CHIP_TAHITI:
3076 		rdev->config.si.max_shader_engines = 2;
3077 		rdev->config.si.max_tile_pipes = 12;
3078 		rdev->config.si.max_cu_per_sh = 8;
3079 		rdev->config.si.max_sh_per_se = 2;
3080 		rdev->config.si.max_backends_per_se = 4;
3081 		rdev->config.si.max_texture_channel_caches = 12;
3082 		rdev->config.si.max_gprs = 256;
3083 		rdev->config.si.max_gs_threads = 32;
3084 		rdev->config.si.max_hw_contexts = 8;
3085 
3086 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3087 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3088 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3089 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3090 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3091 		break;
3092 	case CHIP_PITCAIRN:
3093 		rdev->config.si.max_shader_engines = 2;
3094 		rdev->config.si.max_tile_pipes = 8;
3095 		rdev->config.si.max_cu_per_sh = 5;
3096 		rdev->config.si.max_sh_per_se = 2;
3097 		rdev->config.si.max_backends_per_se = 4;
3098 		rdev->config.si.max_texture_channel_caches = 8;
3099 		rdev->config.si.max_gprs = 256;
3100 		rdev->config.si.max_gs_threads = 32;
3101 		rdev->config.si.max_hw_contexts = 8;
3102 
3103 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3104 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
3105 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3106 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3107 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
3108 		break;
3109 	case CHIP_VERDE:
3110 	default:
3111 		rdev->config.si.max_shader_engines = 1;
3112 		rdev->config.si.max_tile_pipes = 4;
3113 		rdev->config.si.max_cu_per_sh = 5;
3114 		rdev->config.si.max_sh_per_se = 2;
3115 		rdev->config.si.max_backends_per_se = 4;
3116 		rdev->config.si.max_texture_channel_caches = 4;
3117 		rdev->config.si.max_gprs = 256;
3118 		rdev->config.si.max_gs_threads = 32;
3119 		rdev->config.si.max_hw_contexts = 8;
3120 
3121 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3122 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3123 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3124 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3125 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3126 		break;
3127 	case CHIP_OLAND:
3128 		rdev->config.si.max_shader_engines = 1;
3129 		rdev->config.si.max_tile_pipes = 4;
3130 		rdev->config.si.max_cu_per_sh = 6;
3131 		rdev->config.si.max_sh_per_se = 1;
3132 		rdev->config.si.max_backends_per_se = 2;
3133 		rdev->config.si.max_texture_channel_caches = 4;
3134 		rdev->config.si.max_gprs = 256;
3135 		rdev->config.si.max_gs_threads = 16;
3136 		rdev->config.si.max_hw_contexts = 8;
3137 
3138 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3139 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3140 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3141 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3142 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
3143 		break;
3144 	case CHIP_HAINAN:
3145 		rdev->config.si.max_shader_engines = 1;
3146 		rdev->config.si.max_tile_pipes = 4;
3147 		rdev->config.si.max_cu_per_sh = 5;
3148 		rdev->config.si.max_sh_per_se = 1;
3149 		rdev->config.si.max_backends_per_se = 1;
3150 		rdev->config.si.max_texture_channel_caches = 2;
3151 		rdev->config.si.max_gprs = 256;
3152 		rdev->config.si.max_gs_threads = 16;
3153 		rdev->config.si.max_hw_contexts = 8;
3154 
3155 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
3156 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
3157 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
3158 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
3159 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
3160 		break;
3161 	}
3162 
3163 	/* Initialize HDP */
3164 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3165 		WREG32((0x2c14 + j), 0x00000000);
3166 		WREG32((0x2c18 + j), 0x00000000);
3167 		WREG32((0x2c1c + j), 0x00000000);
3168 		WREG32((0x2c20 + j), 0x00000000);
3169 		WREG32((0x2c24 + j), 0x00000000);
3170 	}
3171 
3172 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
3173 	WREG32(SRBM_INT_CNTL, 1);
3174 	WREG32(SRBM_INT_ACK, 1);
3175 
3176 	evergreen_fix_pci_max_read_req_size(rdev);
3177 
3178 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
3179 
3180 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
3181 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
3182 
3183 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
3184 	rdev->config.si.mem_max_burst_length_bytes = 256;
3185 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
3186 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3187 	if (rdev->config.si.mem_row_size_in_kb > 4)
3188 		rdev->config.si.mem_row_size_in_kb = 4;
3189 	/* XXX use MC settings? */
3190 	rdev->config.si.shader_engine_tile_size = 32;
3191 	rdev->config.si.num_gpus = 1;
3192 	rdev->config.si.multi_gpu_tile_size = 64;
3193 
3194 	/* fix up row size */
3195 	gb_addr_config &= ~ROW_SIZE_MASK;
3196 	switch (rdev->config.si.mem_row_size_in_kb) {
3197 	case 1:
3198 	default:
3199 		gb_addr_config |= ROW_SIZE(0);
3200 		break;
3201 	case 2:
3202 		gb_addr_config |= ROW_SIZE(1);
3203 		break;
3204 	case 4:
3205 		gb_addr_config |= ROW_SIZE(2);
3206 		break;
3207 	}
3208 
3209 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3210 	 * not have bank info, so create a custom tiling dword.
3211 	 * bits 3:0   num_pipes
3212 	 * bits 7:4   num_banks
3213 	 * bits 11:8  group_size
3214 	 * bits 15:12 row_size
3215 	 */
3216 	rdev->config.si.tile_config = 0;
3217 	switch (rdev->config.si.num_tile_pipes) {
3218 	case 1:
3219 		rdev->config.si.tile_config |= (0 << 0);
3220 		break;
3221 	case 2:
3222 		rdev->config.si.tile_config |= (1 << 0);
3223 		break;
3224 	case 4:
3225 		rdev->config.si.tile_config |= (2 << 0);
3226 		break;
3227 	case 8:
3228 	default:
3229 		/* XXX what about 12? */
3230 		rdev->config.si.tile_config |= (3 << 0);
3231 		break;
3232 	}
3233 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3234 	case 0: /* four banks */
3235 		rdev->config.si.tile_config |= 0 << 4;
3236 		break;
3237 	case 1: /* eight banks */
3238 		rdev->config.si.tile_config |= 1 << 4;
3239 		break;
3240 	case 2: /* sixteen banks */
3241 	default:
3242 		rdev->config.si.tile_config |= 2 << 4;
3243 		break;
3244 	}
3245 	rdev->config.si.tile_config |=
3246 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3247 	rdev->config.si.tile_config |=
3248 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3249 
3250 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3251 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3252 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3253 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3254 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3255 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3256 	if (rdev->has_uvd) {
3257 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3258 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3259 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3260 	}
3261 
3262 	si_tiling_mode_table_init(rdev);
3263 
3264 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3265 		    rdev->config.si.max_sh_per_se,
3266 		    rdev->config.si.max_backends_per_se);
3267 
3268 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3269 		     rdev->config.si.max_sh_per_se,
3270 		     rdev->config.si.max_cu_per_sh);
3271 
3272 	rdev->config.si.active_cus = 0;
3273 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
3274 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
3275 			rdev->config.si.active_cus +=
3276 				hweight32(si_get_cu_active_bitmap(rdev, i, j));
3277 		}
3278 	}
3279 
3280 	/* set HW defaults for 3D engine */
3281 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3282 				     ROQ_IB2_START(0x2b)));
3283 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3284 
3285 	sx_debug_1 = RREG32(SX_DEBUG_1);
3286 	WREG32(SX_DEBUG_1, sx_debug_1);
3287 
3288 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3289 
3290 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3291 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3292 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3293 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3294 
3295 	WREG32(VGT_NUM_INSTANCES, 1);
3296 
3297 	WREG32(CP_PERFMON_CNTL, 0);
3298 
3299 	WREG32(SQ_CONFIG, 0);
3300 
3301 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3302 					  FORCE_EOV_MAX_REZ_CNT(255)));
3303 
3304 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3305 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3306 
3307 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3308 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3309 
3310 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3311 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3312 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3313 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3314 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3315 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3316 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3317 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3318 
3319 	tmp = RREG32(HDP_MISC_CNTL);
3320 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3321 	WREG32(HDP_MISC_CNTL, tmp);
3322 
3323 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3324 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3325 
3326 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3327 
3328 	udelay(50);
3329 }
3330 
3331 /*
3332  * GPU scratch registers helpers function.
3333  */
3334 static void si_scratch_init(struct radeon_device *rdev)
3335 {
3336 	int i;
3337 
3338 	rdev->scratch.num_reg = 7;
3339 	rdev->scratch.reg_base = SCRATCH_REG0;
3340 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3341 		rdev->scratch.free[i] = true;
3342 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3343 	}
3344 }
3345 
3346 void si_fence_ring_emit(struct radeon_device *rdev,
3347 			struct radeon_fence *fence)
3348 {
3349 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3350 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3351 
3352 	/* flush read cache over gart */
3353 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3354 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3355 	radeon_ring_write(ring, 0);
3356 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3357 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3358 			  PACKET3_TC_ACTION_ENA |
3359 			  PACKET3_SH_KCACHE_ACTION_ENA |
3360 			  PACKET3_SH_ICACHE_ACTION_ENA);
3361 	radeon_ring_write(ring, 0xFFFFFFFF);
3362 	radeon_ring_write(ring, 0);
3363 	radeon_ring_write(ring, 10); /* poll interval */
3364 	/* EVENT_WRITE_EOP - flush caches, send int */
3365 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3366 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3367 	radeon_ring_write(ring, lower_32_bits(addr));
3368 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3369 	radeon_ring_write(ring, fence->seq);
3370 	radeon_ring_write(ring, 0);
3371 }
3372 
3373 /*
3374  * IB stuff
3375  */
3376 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3377 {
3378 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3379 	unsigned vm_id = ib->vm ? ib->vm->ids[ib->ring].id : 0;
3380 	u32 header;
3381 
3382 	if (ib->is_const_ib) {
3383 		/* set switch buffer packet before const IB */
3384 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3385 		radeon_ring_write(ring, 0);
3386 
3387 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3388 	} else {
3389 		u32 next_rptr;
3390 		if (ring->rptr_save_reg) {
3391 			next_rptr = ring->wptr + 3 + 4 + 8;
3392 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3393 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3394 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3395 			radeon_ring_write(ring, next_rptr);
3396 		} else if (rdev->wb.enabled) {
3397 			next_rptr = ring->wptr + 5 + 4 + 8;
3398 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3399 			radeon_ring_write(ring, (1 << 8));
3400 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3401 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr));
3402 			radeon_ring_write(ring, next_rptr);
3403 		}
3404 
3405 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3406 	}
3407 
3408 	radeon_ring_write(ring, header);
3409 	radeon_ring_write(ring,
3410 #ifdef __BIG_ENDIAN
3411 			  (2 << 0) |
3412 #endif
3413 			  (ib->gpu_addr & 0xFFFFFFFC));
3414 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3415 	radeon_ring_write(ring, ib->length_dw | (vm_id << 24));
3416 
3417 	if (!ib->is_const_ib) {
3418 		/* flush read cache over gart for this vmid */
3419 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3420 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3421 		radeon_ring_write(ring, vm_id);
3422 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3423 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3424 				  PACKET3_TC_ACTION_ENA |
3425 				  PACKET3_SH_KCACHE_ACTION_ENA |
3426 				  PACKET3_SH_ICACHE_ACTION_ENA);
3427 		radeon_ring_write(ring, 0xFFFFFFFF);
3428 		radeon_ring_write(ring, 0);
3429 		radeon_ring_write(ring, 10); /* poll interval */
3430 	}
3431 }
3432 
3433 /*
3434  * CP.
3435  */
3436 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3437 {
3438 	if (enable)
3439 		WREG32(CP_ME_CNTL, 0);
3440 	else {
3441 		if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3442 			radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3443 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3444 		WREG32(SCRATCH_UMSK, 0);
3445 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3446 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3447 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3448 	}
3449 	udelay(50);
3450 }
3451 
3452 static int si_cp_load_microcode(struct radeon_device *rdev)
3453 {
3454 	int i;
3455 
3456 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
3457 		return -EINVAL;
3458 
3459 	si_cp_enable(rdev, false);
3460 
3461 	if (rdev->new_fw) {
3462 		const struct gfx_firmware_header_v1_0 *pfp_hdr =
3463 			(const struct gfx_firmware_header_v1_0 *)rdev->pfp_fw->data;
3464 		const struct gfx_firmware_header_v1_0 *ce_hdr =
3465 			(const struct gfx_firmware_header_v1_0 *)rdev->ce_fw->data;
3466 		const struct gfx_firmware_header_v1_0 *me_hdr =
3467 			(const struct gfx_firmware_header_v1_0 *)rdev->me_fw->data;
3468 		const __le32 *fw_data;
3469 		u32 fw_size;
3470 
3471 		radeon_ucode_print_gfx_hdr(&pfp_hdr->header);
3472 		radeon_ucode_print_gfx_hdr(&ce_hdr->header);
3473 		radeon_ucode_print_gfx_hdr(&me_hdr->header);
3474 
3475 		/* PFP */
3476 		fw_data = (const __le32 *)
3477 			(rdev->pfp_fw->data + le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes));
3478 		fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4;
3479 		WREG32(CP_PFP_UCODE_ADDR, 0);
3480 		for (i = 0; i < fw_size; i++)
3481 			WREG32(CP_PFP_UCODE_DATA, le32_to_cpup(fw_data++));
3482 		WREG32(CP_PFP_UCODE_ADDR, 0);
3483 
3484 		/* CE */
3485 		fw_data = (const __le32 *)
3486 			(rdev->ce_fw->data + le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes));
3487 		fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4;
3488 		WREG32(CP_CE_UCODE_ADDR, 0);
3489 		for (i = 0; i < fw_size; i++)
3490 			WREG32(CP_CE_UCODE_DATA, le32_to_cpup(fw_data++));
3491 		WREG32(CP_CE_UCODE_ADDR, 0);
3492 
3493 		/* ME */
3494 		fw_data = (const __be32 *)
3495 			(rdev->me_fw->data + le32_to_cpu(me_hdr->header.ucode_array_offset_bytes));
3496 		fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4;
3497 		WREG32(CP_ME_RAM_WADDR, 0);
3498 		for (i = 0; i < fw_size; i++)
3499 			WREG32(CP_ME_RAM_DATA, le32_to_cpup(fw_data++));
3500 		WREG32(CP_ME_RAM_WADDR, 0);
3501 	} else {
3502 		const __be32 *fw_data;
3503 
3504 		/* PFP */
3505 		fw_data = (const __be32 *)rdev->pfp_fw->data;
3506 		WREG32(CP_PFP_UCODE_ADDR, 0);
3507 		for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3508 			WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3509 		WREG32(CP_PFP_UCODE_ADDR, 0);
3510 
3511 		/* CE */
3512 		fw_data = (const __be32 *)rdev->ce_fw->data;
3513 		WREG32(CP_CE_UCODE_ADDR, 0);
3514 		for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3515 			WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3516 		WREG32(CP_CE_UCODE_ADDR, 0);
3517 
3518 		/* ME */
3519 		fw_data = (const __be32 *)rdev->me_fw->data;
3520 		WREG32(CP_ME_RAM_WADDR, 0);
3521 		for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3522 			WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3523 		WREG32(CP_ME_RAM_WADDR, 0);
3524 	}
3525 
3526 	WREG32(CP_PFP_UCODE_ADDR, 0);
3527 	WREG32(CP_CE_UCODE_ADDR, 0);
3528 	WREG32(CP_ME_RAM_WADDR, 0);
3529 	WREG32(CP_ME_RAM_RADDR, 0);
3530 	return 0;
3531 }
3532 
3533 static int si_cp_start(struct radeon_device *rdev)
3534 {
3535 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3536 	int r, i;
3537 
3538 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3539 	if (r) {
3540 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3541 		return r;
3542 	}
3543 	/* init the CP */
3544 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3545 	radeon_ring_write(ring, 0x1);
3546 	radeon_ring_write(ring, 0x0);
3547 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3548 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3549 	radeon_ring_write(ring, 0);
3550 	radeon_ring_write(ring, 0);
3551 
3552 	/* init the CE partitions */
3553 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3554 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3555 	radeon_ring_write(ring, 0xc000);
3556 	radeon_ring_write(ring, 0xe000);
3557 	radeon_ring_unlock_commit(rdev, ring, false);
3558 
3559 	si_cp_enable(rdev, true);
3560 
3561 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3562 	if (r) {
3563 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3564 		return r;
3565 	}
3566 
3567 	/* setup clear context state */
3568 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3569 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3570 
3571 	for (i = 0; i < si_default_size; i++)
3572 		radeon_ring_write(ring, si_default_state[i]);
3573 
3574 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3575 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3576 
3577 	/* set clear context state */
3578 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3579 	radeon_ring_write(ring, 0);
3580 
3581 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3582 	radeon_ring_write(ring, 0x00000316);
3583 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3584 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3585 
3586 	radeon_ring_unlock_commit(rdev, ring, false);
3587 
3588 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3589 		ring = &rdev->ring[i];
3590 		r = radeon_ring_lock(rdev, ring, 2);
3591 
3592 		/* clear the compute context state */
3593 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3594 		radeon_ring_write(ring, 0);
3595 
3596 		radeon_ring_unlock_commit(rdev, ring, false);
3597 	}
3598 
3599 	return 0;
3600 }
3601 
3602 static void si_cp_fini(struct radeon_device *rdev)
3603 {
3604 	struct radeon_ring *ring;
3605 	si_cp_enable(rdev, false);
3606 
3607 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3608 	radeon_ring_fini(rdev, ring);
3609 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3610 
3611 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3612 	radeon_ring_fini(rdev, ring);
3613 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3614 
3615 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3616 	radeon_ring_fini(rdev, ring);
3617 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3618 }
3619 
3620 static int si_cp_resume(struct radeon_device *rdev)
3621 {
3622 	struct radeon_ring *ring;
3623 	u32 tmp;
3624 	u32 rb_bufsz;
3625 	int r;
3626 
3627 	si_enable_gui_idle_interrupt(rdev, false);
3628 
3629 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3630 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3631 
3632 	/* Set the write pointer delay */
3633 	WREG32(CP_RB_WPTR_DELAY, 0);
3634 
3635 	WREG32(CP_DEBUG, 0);
3636 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3637 
3638 	/* ring 0 - compute and gfx */
3639 	/* Set ring buffer size */
3640 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3641 	rb_bufsz = order_base_2(ring->ring_size / 8);
3642 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3643 #ifdef __BIG_ENDIAN
3644 	tmp |= BUF_SWAP_32BIT;
3645 #endif
3646 	WREG32(CP_RB0_CNTL, tmp);
3647 
3648 	/* Initialize the ring buffer's read and write pointers */
3649 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3650 	ring->wptr = 0;
3651 	WREG32(CP_RB0_WPTR, ring->wptr);
3652 
3653 	/* set the wb address whether it's enabled or not */
3654 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3655 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3656 
3657 	if (rdev->wb.enabled)
3658 		WREG32(SCRATCH_UMSK, 0xff);
3659 	else {
3660 		tmp |= RB_NO_UPDATE;
3661 		WREG32(SCRATCH_UMSK, 0);
3662 	}
3663 
3664 	mdelay(1);
3665 	WREG32(CP_RB0_CNTL, tmp);
3666 
3667 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3668 
3669 	/* ring1  - compute only */
3670 	/* Set ring buffer size */
3671 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3672 	rb_bufsz = order_base_2(ring->ring_size / 8);
3673 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3674 #ifdef __BIG_ENDIAN
3675 	tmp |= BUF_SWAP_32BIT;
3676 #endif
3677 	WREG32(CP_RB1_CNTL, tmp);
3678 
3679 	/* Initialize the ring buffer's read and write pointers */
3680 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3681 	ring->wptr = 0;
3682 	WREG32(CP_RB1_WPTR, ring->wptr);
3683 
3684 	/* set the wb address whether it's enabled or not */
3685 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3686 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3687 
3688 	mdelay(1);
3689 	WREG32(CP_RB1_CNTL, tmp);
3690 
3691 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3692 
3693 	/* ring2 - compute only */
3694 	/* Set ring buffer size */
3695 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3696 	rb_bufsz = order_base_2(ring->ring_size / 8);
3697 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3698 #ifdef __BIG_ENDIAN
3699 	tmp |= BUF_SWAP_32BIT;
3700 #endif
3701 	WREG32(CP_RB2_CNTL, tmp);
3702 
3703 	/* Initialize the ring buffer's read and write pointers */
3704 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3705 	ring->wptr = 0;
3706 	WREG32(CP_RB2_WPTR, ring->wptr);
3707 
3708 	/* set the wb address whether it's enabled or not */
3709 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3710 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3711 
3712 	mdelay(1);
3713 	WREG32(CP_RB2_CNTL, tmp);
3714 
3715 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3716 
3717 	/* start the rings */
3718 	si_cp_start(rdev);
3719 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3720 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3721 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3722 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3723 	if (r) {
3724 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3725 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3726 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3727 		return r;
3728 	}
3729 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3730 	if (r) {
3731 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3732 	}
3733 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3734 	if (r) {
3735 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3736 	}
3737 
3738 	si_enable_gui_idle_interrupt(rdev, true);
3739 
3740 	if (rdev->asic->copy.copy_ring_index == RADEON_RING_TYPE_GFX_INDEX)
3741 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3742 
3743 	return 0;
3744 }
3745 
3746 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3747 {
3748 	u32 reset_mask = 0;
3749 	u32 tmp;
3750 
3751 	/* GRBM_STATUS */
3752 	tmp = RREG32(GRBM_STATUS);
3753 	if (tmp & (PA_BUSY | SC_BUSY |
3754 		   BCI_BUSY | SX_BUSY |
3755 		   TA_BUSY | VGT_BUSY |
3756 		   DB_BUSY | CB_BUSY |
3757 		   GDS_BUSY | SPI_BUSY |
3758 		   IA_BUSY | IA_BUSY_NO_DMA))
3759 		reset_mask |= RADEON_RESET_GFX;
3760 
3761 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3762 		   CP_BUSY | CP_COHERENCY_BUSY))
3763 		reset_mask |= RADEON_RESET_CP;
3764 
3765 	if (tmp & GRBM_EE_BUSY)
3766 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3767 
3768 	/* GRBM_STATUS2 */
3769 	tmp = RREG32(GRBM_STATUS2);
3770 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3771 		reset_mask |= RADEON_RESET_RLC;
3772 
3773 	/* DMA_STATUS_REG 0 */
3774 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3775 	if (!(tmp & DMA_IDLE))
3776 		reset_mask |= RADEON_RESET_DMA;
3777 
3778 	/* DMA_STATUS_REG 1 */
3779 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3780 	if (!(tmp & DMA_IDLE))
3781 		reset_mask |= RADEON_RESET_DMA1;
3782 
3783 	/* SRBM_STATUS2 */
3784 	tmp = RREG32(SRBM_STATUS2);
3785 	if (tmp & DMA_BUSY)
3786 		reset_mask |= RADEON_RESET_DMA;
3787 
3788 	if (tmp & DMA1_BUSY)
3789 		reset_mask |= RADEON_RESET_DMA1;
3790 
3791 	/* SRBM_STATUS */
3792 	tmp = RREG32(SRBM_STATUS);
3793 
3794 	if (tmp & IH_BUSY)
3795 		reset_mask |= RADEON_RESET_IH;
3796 
3797 	if (tmp & SEM_BUSY)
3798 		reset_mask |= RADEON_RESET_SEM;
3799 
3800 	if (tmp & GRBM_RQ_PENDING)
3801 		reset_mask |= RADEON_RESET_GRBM;
3802 
3803 	if (tmp & VMC_BUSY)
3804 		reset_mask |= RADEON_RESET_VMC;
3805 
3806 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3807 		   MCC_BUSY | MCD_BUSY))
3808 		reset_mask |= RADEON_RESET_MC;
3809 
3810 	if (evergreen_is_display_hung(rdev))
3811 		reset_mask |= RADEON_RESET_DISPLAY;
3812 
3813 	/* VM_L2_STATUS */
3814 	tmp = RREG32(VM_L2_STATUS);
3815 	if (tmp & L2_BUSY)
3816 		reset_mask |= RADEON_RESET_VMC;
3817 
3818 	/* Skip MC reset as it's mostly likely not hung, just busy */
3819 	if (reset_mask & RADEON_RESET_MC) {
3820 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3821 		reset_mask &= ~RADEON_RESET_MC;
3822 	}
3823 
3824 	return reset_mask;
3825 }
3826 
3827 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3828 {
3829 	struct evergreen_mc_save save;
3830 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3831 	u32 tmp;
3832 
3833 	if (reset_mask == 0)
3834 		return;
3835 
3836 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3837 
3838 	evergreen_print_gpu_status_regs(rdev);
3839 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3840 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3841 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3842 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3843 
3844 	/* disable PG/CG */
3845 	si_fini_pg(rdev);
3846 	si_fini_cg(rdev);
3847 
3848 	/* stop the rlc */
3849 	si_rlc_stop(rdev);
3850 
3851 	/* Disable CP parsing/prefetching */
3852 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3853 
3854 	if (reset_mask & RADEON_RESET_DMA) {
3855 		/* dma0 */
3856 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3857 		tmp &= ~DMA_RB_ENABLE;
3858 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3859 	}
3860 	if (reset_mask & RADEON_RESET_DMA1) {
3861 		/* dma1 */
3862 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3863 		tmp &= ~DMA_RB_ENABLE;
3864 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3865 	}
3866 
3867 	udelay(50);
3868 
3869 	evergreen_mc_stop(rdev, &save);
3870 	if (evergreen_mc_wait_for_idle(rdev)) {
3871 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3872 	}
3873 
3874 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3875 		grbm_soft_reset = SOFT_RESET_CB |
3876 			SOFT_RESET_DB |
3877 			SOFT_RESET_GDS |
3878 			SOFT_RESET_PA |
3879 			SOFT_RESET_SC |
3880 			SOFT_RESET_BCI |
3881 			SOFT_RESET_SPI |
3882 			SOFT_RESET_SX |
3883 			SOFT_RESET_TC |
3884 			SOFT_RESET_TA |
3885 			SOFT_RESET_VGT |
3886 			SOFT_RESET_IA;
3887 	}
3888 
3889 	if (reset_mask & RADEON_RESET_CP) {
3890 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3891 
3892 		srbm_soft_reset |= SOFT_RESET_GRBM;
3893 	}
3894 
3895 	if (reset_mask & RADEON_RESET_DMA)
3896 		srbm_soft_reset |= SOFT_RESET_DMA;
3897 
3898 	if (reset_mask & RADEON_RESET_DMA1)
3899 		srbm_soft_reset |= SOFT_RESET_DMA1;
3900 
3901 	if (reset_mask & RADEON_RESET_DISPLAY)
3902 		srbm_soft_reset |= SOFT_RESET_DC;
3903 
3904 	if (reset_mask & RADEON_RESET_RLC)
3905 		grbm_soft_reset |= SOFT_RESET_RLC;
3906 
3907 	if (reset_mask & RADEON_RESET_SEM)
3908 		srbm_soft_reset |= SOFT_RESET_SEM;
3909 
3910 	if (reset_mask & RADEON_RESET_IH)
3911 		srbm_soft_reset |= SOFT_RESET_IH;
3912 
3913 	if (reset_mask & RADEON_RESET_GRBM)
3914 		srbm_soft_reset |= SOFT_RESET_GRBM;
3915 
3916 	if (reset_mask & RADEON_RESET_VMC)
3917 		srbm_soft_reset |= SOFT_RESET_VMC;
3918 
3919 	if (reset_mask & RADEON_RESET_MC)
3920 		srbm_soft_reset |= SOFT_RESET_MC;
3921 
3922 	if (grbm_soft_reset) {
3923 		tmp = RREG32(GRBM_SOFT_RESET);
3924 		tmp |= grbm_soft_reset;
3925 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3926 		WREG32(GRBM_SOFT_RESET, tmp);
3927 		tmp = RREG32(GRBM_SOFT_RESET);
3928 
3929 		udelay(50);
3930 
3931 		tmp &= ~grbm_soft_reset;
3932 		WREG32(GRBM_SOFT_RESET, tmp);
3933 		tmp = RREG32(GRBM_SOFT_RESET);
3934 	}
3935 
3936 	if (srbm_soft_reset) {
3937 		tmp = RREG32(SRBM_SOFT_RESET);
3938 		tmp |= srbm_soft_reset;
3939 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3940 		WREG32(SRBM_SOFT_RESET, tmp);
3941 		tmp = RREG32(SRBM_SOFT_RESET);
3942 
3943 		udelay(50);
3944 
3945 		tmp &= ~srbm_soft_reset;
3946 		WREG32(SRBM_SOFT_RESET, tmp);
3947 		tmp = RREG32(SRBM_SOFT_RESET);
3948 	}
3949 
3950 	/* Wait a little for things to settle down */
3951 	udelay(50);
3952 
3953 	evergreen_mc_resume(rdev, &save);
3954 	udelay(50);
3955 
3956 	evergreen_print_gpu_status_regs(rdev);
3957 }
3958 
3959 static void si_set_clk_bypass_mode(struct radeon_device *rdev)
3960 {
3961 	u32 tmp, i;
3962 
3963 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3964 	tmp |= SPLL_BYPASS_EN;
3965 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3966 
3967 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3968 	tmp |= SPLL_CTLREQ_CHG;
3969 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3970 
3971 	for (i = 0; i < rdev->usec_timeout; i++) {
3972 		if (RREG32(SPLL_STATUS) & SPLL_CHG_STATUS)
3973 			break;
3974 		udelay(1);
3975 	}
3976 
3977 	tmp = RREG32(CG_SPLL_FUNC_CNTL_2);
3978 	tmp &= ~(SPLL_CTLREQ_CHG | SCLK_MUX_UPDATE);
3979 	WREG32(CG_SPLL_FUNC_CNTL_2, tmp);
3980 
3981 	tmp = RREG32(MPLL_CNTL_MODE);
3982 	tmp &= ~MPLL_MCLK_SEL;
3983 	WREG32(MPLL_CNTL_MODE, tmp);
3984 }
3985 
3986 static void si_spll_powerdown(struct radeon_device *rdev)
3987 {
3988 	u32 tmp;
3989 
3990 	tmp = RREG32(SPLL_CNTL_MODE);
3991 	tmp |= SPLL_SW_DIR_CONTROL;
3992 	WREG32(SPLL_CNTL_MODE, tmp);
3993 
3994 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3995 	tmp |= SPLL_RESET;
3996 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
3997 
3998 	tmp = RREG32(CG_SPLL_FUNC_CNTL);
3999 	tmp |= SPLL_SLEEP;
4000 	WREG32(CG_SPLL_FUNC_CNTL, tmp);
4001 
4002 	tmp = RREG32(SPLL_CNTL_MODE);
4003 	tmp &= ~SPLL_SW_DIR_CONTROL;
4004 	WREG32(SPLL_CNTL_MODE, tmp);
4005 }
4006 
4007 static void si_gpu_pci_config_reset(struct radeon_device *rdev)
4008 {
4009 	struct evergreen_mc_save save;
4010 	u32 tmp, i;
4011 
4012 	dev_info(rdev->dev, "GPU pci config reset\n");
4013 
4014 	/* disable dpm? */
4015 
4016 	/* disable cg/pg */
4017 	si_fini_pg(rdev);
4018 	si_fini_cg(rdev);
4019 
4020 	/* Disable CP parsing/prefetching */
4021 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
4022 	/* dma0 */
4023 	tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
4024 	tmp &= ~DMA_RB_ENABLE;
4025 	WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
4026 	/* dma1 */
4027 	tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
4028 	tmp &= ~DMA_RB_ENABLE;
4029 	WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
4030 	/* XXX other engines? */
4031 
4032 	/* halt the rlc, disable cp internal ints */
4033 	si_rlc_stop(rdev);
4034 
4035 	udelay(50);
4036 
4037 	/* disable mem access */
4038 	evergreen_mc_stop(rdev, &save);
4039 	if (evergreen_mc_wait_for_idle(rdev)) {
4040 		dev_warn(rdev->dev, "Wait for MC idle timed out !\n");
4041 	}
4042 
4043 	/* set mclk/sclk to bypass */
4044 	si_set_clk_bypass_mode(rdev);
4045 	/* powerdown spll */
4046 	si_spll_powerdown(rdev);
4047 	/* disable BM */
4048 	pci_clear_master(rdev->pdev);
4049 	/* reset */
4050 	radeon_pci_config_reset(rdev);
4051 	/* wait for asic to come out of reset */
4052 	for (i = 0; i < rdev->usec_timeout; i++) {
4053 		if (RREG32(CONFIG_MEMSIZE) != 0xffffffff)
4054 			break;
4055 		udelay(1);
4056 	}
4057 }
4058 
4059 int si_asic_reset(struct radeon_device *rdev, bool hard)
4060 {
4061 	u32 reset_mask;
4062 
4063 	if (hard) {
4064 		si_gpu_pci_config_reset(rdev);
4065 		return 0;
4066 	}
4067 
4068 	reset_mask = si_gpu_check_soft_reset(rdev);
4069 
4070 	if (reset_mask)
4071 		r600_set_bios_scratch_engine_hung(rdev, true);
4072 
4073 	/* try soft reset */
4074 	si_gpu_soft_reset(rdev, reset_mask);
4075 
4076 	reset_mask = si_gpu_check_soft_reset(rdev);
4077 
4078 	/* try pci config reset */
4079 	if (reset_mask && radeon_hard_reset)
4080 		si_gpu_pci_config_reset(rdev);
4081 
4082 	reset_mask = si_gpu_check_soft_reset(rdev);
4083 
4084 	if (!reset_mask)
4085 		r600_set_bios_scratch_engine_hung(rdev, false);
4086 
4087 	return 0;
4088 }
4089 
4090 /**
4091  * si_gfx_is_lockup - Check if the GFX engine is locked up
4092  *
4093  * @rdev: radeon_device pointer
4094  * @ring: radeon_ring structure holding ring information
4095  *
4096  * Check if the GFX engine is locked up.
4097  * Returns true if the engine appears to be locked up, false if not.
4098  */
4099 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4100 {
4101 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
4102 
4103 	if (!(reset_mask & (RADEON_RESET_GFX |
4104 			    RADEON_RESET_COMPUTE |
4105 			    RADEON_RESET_CP))) {
4106 		radeon_ring_lockup_update(rdev, ring);
4107 		return false;
4108 	}
4109 	return radeon_ring_test_lockup(rdev, ring);
4110 }
4111 
4112 /* MC */
4113 static void si_mc_program(struct radeon_device *rdev)
4114 {
4115 	struct evergreen_mc_save save;
4116 	u32 tmp;
4117 	int i, j;
4118 
4119 	/* Initialize HDP */
4120 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4121 		WREG32((0x2c14 + j), 0x00000000);
4122 		WREG32((0x2c18 + j), 0x00000000);
4123 		WREG32((0x2c1c + j), 0x00000000);
4124 		WREG32((0x2c20 + j), 0x00000000);
4125 		WREG32((0x2c24 + j), 0x00000000);
4126 	}
4127 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4128 
4129 	evergreen_mc_stop(rdev, &save);
4130 	if (radeon_mc_wait_for_idle(rdev)) {
4131 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4132 	}
4133 	if (!ASIC_IS_NODCE(rdev))
4134 		/* Lockout access through VGA aperture*/
4135 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4136 	/* Update configuration */
4137 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4138 	       rdev->mc.vram_start >> 12);
4139 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4140 	       rdev->mc.vram_end >> 12);
4141 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4142 	       rdev->vram_scratch.gpu_addr >> 12);
4143 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4144 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4145 	WREG32(MC_VM_FB_LOCATION, tmp);
4146 	/* XXX double check these! */
4147 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4148 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4149 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4150 	WREG32(MC_VM_AGP_BASE, 0);
4151 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4152 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4153 	if (radeon_mc_wait_for_idle(rdev)) {
4154 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4155 	}
4156 	evergreen_mc_resume(rdev, &save);
4157 	if (!ASIC_IS_NODCE(rdev)) {
4158 		/* we need to own VRAM, so turn off the VGA renderer here
4159 		 * to stop it overwriting our objects */
4160 		rv515_vga_render_disable(rdev);
4161 	}
4162 }
4163 
4164 void si_vram_gtt_location(struct radeon_device *rdev,
4165 			  struct radeon_mc *mc)
4166 {
4167 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
4168 		/* leave room for at least 1024M GTT */
4169 		dev_warn(rdev->dev, "limiting VRAM\n");
4170 		mc->real_vram_size = 0xFFC0000000ULL;
4171 		mc->mc_vram_size = 0xFFC0000000ULL;
4172 	}
4173 	radeon_vram_location(rdev, &rdev->mc, 0);
4174 	rdev->mc.gtt_base_align = 0;
4175 	radeon_gtt_location(rdev, mc);
4176 }
4177 
4178 static int si_mc_init(struct radeon_device *rdev)
4179 {
4180 	u32 tmp;
4181 	int chansize, numchan;
4182 
4183 	/* Get VRAM informations */
4184 	rdev->mc.vram_is_ddr = true;
4185 	tmp = RREG32(MC_ARB_RAMCFG);
4186 	if (tmp & CHANSIZE_OVERRIDE) {
4187 		chansize = 16;
4188 	} else if (tmp & CHANSIZE_MASK) {
4189 		chansize = 64;
4190 	} else {
4191 		chansize = 32;
4192 	}
4193 	tmp = RREG32(MC_SHARED_CHMAP);
4194 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4195 	case 0:
4196 	default:
4197 		numchan = 1;
4198 		break;
4199 	case 1:
4200 		numchan = 2;
4201 		break;
4202 	case 2:
4203 		numchan = 4;
4204 		break;
4205 	case 3:
4206 		numchan = 8;
4207 		break;
4208 	case 4:
4209 		numchan = 3;
4210 		break;
4211 	case 5:
4212 		numchan = 6;
4213 		break;
4214 	case 6:
4215 		numchan = 10;
4216 		break;
4217 	case 7:
4218 		numchan = 12;
4219 		break;
4220 	case 8:
4221 		numchan = 16;
4222 		break;
4223 	}
4224 	rdev->mc.vram_width = numchan * chansize;
4225 	/* Could aper size report 0 ? */
4226 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4227 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4228 	/* size in MB on si */
4229 	tmp = RREG32(CONFIG_MEMSIZE);
4230 	/* some boards may have garbage in the upper 16 bits */
4231 	if (tmp & 0xffff0000) {
4232 		DRM_INFO("Probable bad vram size: 0x%08x\n", tmp);
4233 		if (tmp & 0xffff)
4234 			tmp &= 0xffff;
4235 	}
4236 	rdev->mc.mc_vram_size = tmp * 1024ULL * 1024ULL;
4237 	rdev->mc.real_vram_size = rdev->mc.mc_vram_size;
4238 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4239 	si_vram_gtt_location(rdev, &rdev->mc);
4240 	radeon_update_bandwidth_info(rdev);
4241 
4242 	return 0;
4243 }
4244 
4245 /*
4246  * GART
4247  */
4248 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
4249 {
4250 	/* flush hdp cache */
4251 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
4252 
4253 	/* bits 0-15 are the VM contexts0-15 */
4254 	WREG32(VM_INVALIDATE_REQUEST, 1);
4255 }
4256 
4257 static int si_pcie_gart_enable(struct radeon_device *rdev)
4258 {
4259 	int r, i;
4260 
4261 	if (rdev->gart.robj == NULL) {
4262 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4263 		return -EINVAL;
4264 	}
4265 	r = radeon_gart_table_vram_pin(rdev);
4266 	if (r)
4267 		return r;
4268 	/* Setup TLB control */
4269 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4270 	       (0xA << 7) |
4271 	       ENABLE_L1_TLB |
4272 	       ENABLE_L1_FRAGMENT_PROCESSING |
4273 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4274 	       ENABLE_ADVANCED_DRIVER_MODEL |
4275 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4276 	/* Setup L2 cache */
4277 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4278 	       ENABLE_L2_FRAGMENT_PROCESSING |
4279 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4280 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4281 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4282 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4283 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4284 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4285 	       BANK_SELECT(4) |
4286 	       L2_CACHE_BIGK_FRAGMENT_SIZE(4));
4287 	/* setup context0 */
4288 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4289 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4290 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4291 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4292 			(u32)(rdev->dummy_page.addr >> 12));
4293 	WREG32(VM_CONTEXT0_CNTL2, 0);
4294 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4295 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4296 
4297 	WREG32(0x15D4, 0);
4298 	WREG32(0x15D8, 0);
4299 	WREG32(0x15DC, 0);
4300 
4301 	/* empty context1-15 */
4302 	/* set vm size, must be a multiple of 4 */
4303 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4304 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn - 1);
4305 	/* Assign the pt base to something valid for now; the pts used for
4306 	 * the VMs are determined by the application and setup and assigned
4307 	 * on the fly in the vm part of radeon_gart.c
4308 	 */
4309 	for (i = 1; i < 16; i++) {
4310 		if (i < 8)
4311 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4312 			       rdev->vm_manager.saved_table_addr[i]);
4313 		else
4314 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4315 			       rdev->vm_manager.saved_table_addr[i]);
4316 	}
4317 
4318 	/* enable context1-15 */
4319 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4320 	       (u32)(rdev->dummy_page.addr >> 12));
4321 	WREG32(VM_CONTEXT1_CNTL2, 4);
4322 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4323 				PAGE_TABLE_BLOCK_SIZE(radeon_vm_block_size - 9) |
4324 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4325 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4326 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4327 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4328 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4329 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4330 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4331 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4332 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4333 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4334 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4335 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4336 
4337 	si_pcie_gart_tlb_flush(rdev);
4338 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4339 		 (unsigned)(rdev->mc.gtt_size >> 20),
4340 		 (unsigned long long)rdev->gart.table_addr);
4341 	rdev->gart.ready = true;
4342 	return 0;
4343 }
4344 
4345 static void si_pcie_gart_disable(struct radeon_device *rdev)
4346 {
4347 	unsigned i;
4348 
4349 	for (i = 1; i < 16; ++i) {
4350 		uint32_t reg;
4351 		if (i < 8)
4352 			reg = VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2);
4353 		else
4354 			reg = VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2);
4355 		rdev->vm_manager.saved_table_addr[i] = RREG32(reg);
4356 	}
4357 
4358 	/* Disable all tables */
4359 	WREG32(VM_CONTEXT0_CNTL, 0);
4360 	WREG32(VM_CONTEXT1_CNTL, 0);
4361 	/* Setup TLB control */
4362 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4363 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4364 	/* Setup L2 cache */
4365 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4366 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4367 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4368 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4369 	WREG32(VM_L2_CNTL2, 0);
4370 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4371 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
4372 	radeon_gart_table_vram_unpin(rdev);
4373 }
4374 
4375 static void si_pcie_gart_fini(struct radeon_device *rdev)
4376 {
4377 	si_pcie_gart_disable(rdev);
4378 	radeon_gart_table_vram_free(rdev);
4379 	radeon_gart_fini(rdev);
4380 }
4381 
4382 /* vm parser */
4383 static bool si_vm_reg_valid(u32 reg)
4384 {
4385 	/* context regs are fine */
4386 	if (reg >= 0x28000)
4387 		return true;
4388 
4389 	/* shader regs are also fine */
4390 	if (reg >= 0xB000 && reg < 0xC000)
4391 		return true;
4392 
4393 	/* check config regs */
4394 	switch (reg) {
4395 	case GRBM_GFX_INDEX:
4396 	case CP_STRMOUT_CNTL:
4397 	case VGT_VTX_VECT_EJECT_REG:
4398 	case VGT_CACHE_INVALIDATION:
4399 	case VGT_ESGS_RING_SIZE:
4400 	case VGT_GSVS_RING_SIZE:
4401 	case VGT_GS_VERTEX_REUSE:
4402 	case VGT_PRIMITIVE_TYPE:
4403 	case VGT_INDEX_TYPE:
4404 	case VGT_NUM_INDICES:
4405 	case VGT_NUM_INSTANCES:
4406 	case VGT_TF_RING_SIZE:
4407 	case VGT_HS_OFFCHIP_PARAM:
4408 	case VGT_TF_MEMORY_BASE:
4409 	case PA_CL_ENHANCE:
4410 	case PA_SU_LINE_STIPPLE_VALUE:
4411 	case PA_SC_LINE_STIPPLE_STATE:
4412 	case PA_SC_ENHANCE:
4413 	case SQC_CACHES:
4414 	case SPI_STATIC_THREAD_MGMT_1:
4415 	case SPI_STATIC_THREAD_MGMT_2:
4416 	case SPI_STATIC_THREAD_MGMT_3:
4417 	case SPI_PS_MAX_WAVE_ID:
4418 	case SPI_CONFIG_CNTL:
4419 	case SPI_CONFIG_CNTL_1:
4420 	case TA_CNTL_AUX:
4421 	case TA_CS_BC_BASE_ADDR:
4422 		return true;
4423 	default:
4424 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4425 		return false;
4426 	}
4427 }
4428 
4429 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4430 				  u32 *ib, struct radeon_cs_packet *pkt)
4431 {
4432 	switch (pkt->opcode) {
4433 	case PACKET3_NOP:
4434 	case PACKET3_SET_BASE:
4435 	case PACKET3_SET_CE_DE_COUNTERS:
4436 	case PACKET3_LOAD_CONST_RAM:
4437 	case PACKET3_WRITE_CONST_RAM:
4438 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4439 	case PACKET3_DUMP_CONST_RAM:
4440 	case PACKET3_INCREMENT_CE_COUNTER:
4441 	case PACKET3_WAIT_ON_DE_COUNTER:
4442 	case PACKET3_CE_WRITE:
4443 		break;
4444 	default:
4445 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4446 		return -EINVAL;
4447 	}
4448 	return 0;
4449 }
4450 
4451 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4452 {
4453 	u32 start_reg, reg, i;
4454 	u32 command = ib[idx + 4];
4455 	u32 info = ib[idx + 1];
4456 	u32 idx_value = ib[idx];
4457 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4458 		/* src address space is register */
4459 		if (((info & 0x60000000) >> 29) == 0) {
4460 			start_reg = idx_value << 2;
4461 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4462 				reg = start_reg;
4463 				if (!si_vm_reg_valid(reg)) {
4464 					DRM_ERROR("CP DMA Bad SRC register\n");
4465 					return -EINVAL;
4466 				}
4467 			} else {
4468 				for (i = 0; i < (command & 0x1fffff); i++) {
4469 					reg = start_reg + (4 * i);
4470 					if (!si_vm_reg_valid(reg)) {
4471 						DRM_ERROR("CP DMA Bad SRC register\n");
4472 						return -EINVAL;
4473 					}
4474 				}
4475 			}
4476 		}
4477 	}
4478 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4479 		/* dst address space is register */
4480 		if (((info & 0x00300000) >> 20) == 0) {
4481 			start_reg = ib[idx + 2];
4482 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4483 				reg = start_reg;
4484 				if (!si_vm_reg_valid(reg)) {
4485 					DRM_ERROR("CP DMA Bad DST register\n");
4486 					return -EINVAL;
4487 				}
4488 			} else {
4489 				for (i = 0; i < (command & 0x1fffff); i++) {
4490 					reg = start_reg + (4 * i);
4491 				if (!si_vm_reg_valid(reg)) {
4492 						DRM_ERROR("CP DMA Bad DST register\n");
4493 						return -EINVAL;
4494 					}
4495 				}
4496 			}
4497 		}
4498 	}
4499 	return 0;
4500 }
4501 
4502 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4503 				   u32 *ib, struct radeon_cs_packet *pkt)
4504 {
4505 	int r;
4506 	u32 idx = pkt->idx + 1;
4507 	u32 idx_value = ib[idx];
4508 	u32 start_reg, end_reg, reg, i;
4509 
4510 	switch (pkt->opcode) {
4511 	case PACKET3_NOP:
4512 	case PACKET3_SET_BASE:
4513 	case PACKET3_CLEAR_STATE:
4514 	case PACKET3_INDEX_BUFFER_SIZE:
4515 	case PACKET3_DISPATCH_DIRECT:
4516 	case PACKET3_DISPATCH_INDIRECT:
4517 	case PACKET3_ALLOC_GDS:
4518 	case PACKET3_WRITE_GDS_RAM:
4519 	case PACKET3_ATOMIC_GDS:
4520 	case PACKET3_ATOMIC:
4521 	case PACKET3_OCCLUSION_QUERY:
4522 	case PACKET3_SET_PREDICATION:
4523 	case PACKET3_COND_EXEC:
4524 	case PACKET3_PRED_EXEC:
4525 	case PACKET3_DRAW_INDIRECT:
4526 	case PACKET3_DRAW_INDEX_INDIRECT:
4527 	case PACKET3_INDEX_BASE:
4528 	case PACKET3_DRAW_INDEX_2:
4529 	case PACKET3_CONTEXT_CONTROL:
4530 	case PACKET3_INDEX_TYPE:
4531 	case PACKET3_DRAW_INDIRECT_MULTI:
4532 	case PACKET3_DRAW_INDEX_AUTO:
4533 	case PACKET3_DRAW_INDEX_IMMD:
4534 	case PACKET3_NUM_INSTANCES:
4535 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4536 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4537 	case PACKET3_DRAW_INDEX_OFFSET_2:
4538 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4539 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4540 	case PACKET3_MPEG_INDEX:
4541 	case PACKET3_WAIT_REG_MEM:
4542 	case PACKET3_MEM_WRITE:
4543 	case PACKET3_PFP_SYNC_ME:
4544 	case PACKET3_SURFACE_SYNC:
4545 	case PACKET3_EVENT_WRITE:
4546 	case PACKET3_EVENT_WRITE_EOP:
4547 	case PACKET3_EVENT_WRITE_EOS:
4548 	case PACKET3_SET_CONTEXT_REG:
4549 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4550 	case PACKET3_SET_SH_REG:
4551 	case PACKET3_SET_SH_REG_OFFSET:
4552 	case PACKET3_INCREMENT_DE_COUNTER:
4553 	case PACKET3_WAIT_ON_CE_COUNTER:
4554 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4555 	case PACKET3_ME_WRITE:
4556 		break;
4557 	case PACKET3_COPY_DATA:
4558 		if ((idx_value & 0xf00) == 0) {
4559 			reg = ib[idx + 3] * 4;
4560 			if (!si_vm_reg_valid(reg))
4561 				return -EINVAL;
4562 		}
4563 		break;
4564 	case PACKET3_WRITE_DATA:
4565 		if ((idx_value & 0xf00) == 0) {
4566 			start_reg = ib[idx + 1] * 4;
4567 			if (idx_value & 0x10000) {
4568 				if (!si_vm_reg_valid(start_reg))
4569 					return -EINVAL;
4570 			} else {
4571 				for (i = 0; i < (pkt->count - 2); i++) {
4572 					reg = start_reg + (4 * i);
4573 					if (!si_vm_reg_valid(reg))
4574 						return -EINVAL;
4575 				}
4576 			}
4577 		}
4578 		break;
4579 	case PACKET3_COND_WRITE:
4580 		if (idx_value & 0x100) {
4581 			reg = ib[idx + 5] * 4;
4582 			if (!si_vm_reg_valid(reg))
4583 				return -EINVAL;
4584 		}
4585 		break;
4586 	case PACKET3_COPY_DW:
4587 		if (idx_value & 0x2) {
4588 			reg = ib[idx + 3] * 4;
4589 			if (!si_vm_reg_valid(reg))
4590 				return -EINVAL;
4591 		}
4592 		break;
4593 	case PACKET3_SET_CONFIG_REG:
4594 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4595 		end_reg = 4 * pkt->count + start_reg - 4;
4596 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4597 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4598 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4599 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4600 			return -EINVAL;
4601 		}
4602 		for (i = 0; i < pkt->count; i++) {
4603 			reg = start_reg + (4 * i);
4604 			if (!si_vm_reg_valid(reg))
4605 				return -EINVAL;
4606 		}
4607 		break;
4608 	case PACKET3_CP_DMA:
4609 		r = si_vm_packet3_cp_dma_check(ib, idx);
4610 		if (r)
4611 			return r;
4612 		break;
4613 	default:
4614 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4615 		return -EINVAL;
4616 	}
4617 	return 0;
4618 }
4619 
4620 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4621 				       u32 *ib, struct radeon_cs_packet *pkt)
4622 {
4623 	int r;
4624 	u32 idx = pkt->idx + 1;
4625 	u32 idx_value = ib[idx];
4626 	u32 start_reg, reg, i;
4627 
4628 	switch (pkt->opcode) {
4629 	case PACKET3_NOP:
4630 	case PACKET3_SET_BASE:
4631 	case PACKET3_CLEAR_STATE:
4632 	case PACKET3_DISPATCH_DIRECT:
4633 	case PACKET3_DISPATCH_INDIRECT:
4634 	case PACKET3_ALLOC_GDS:
4635 	case PACKET3_WRITE_GDS_RAM:
4636 	case PACKET3_ATOMIC_GDS:
4637 	case PACKET3_ATOMIC:
4638 	case PACKET3_OCCLUSION_QUERY:
4639 	case PACKET3_SET_PREDICATION:
4640 	case PACKET3_COND_EXEC:
4641 	case PACKET3_PRED_EXEC:
4642 	case PACKET3_CONTEXT_CONTROL:
4643 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4644 	case PACKET3_WAIT_REG_MEM:
4645 	case PACKET3_MEM_WRITE:
4646 	case PACKET3_PFP_SYNC_ME:
4647 	case PACKET3_SURFACE_SYNC:
4648 	case PACKET3_EVENT_WRITE:
4649 	case PACKET3_EVENT_WRITE_EOP:
4650 	case PACKET3_EVENT_WRITE_EOS:
4651 	case PACKET3_SET_CONTEXT_REG:
4652 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4653 	case PACKET3_SET_SH_REG:
4654 	case PACKET3_SET_SH_REG_OFFSET:
4655 	case PACKET3_INCREMENT_DE_COUNTER:
4656 	case PACKET3_WAIT_ON_CE_COUNTER:
4657 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4658 	case PACKET3_ME_WRITE:
4659 		break;
4660 	case PACKET3_COPY_DATA:
4661 		if ((idx_value & 0xf00) == 0) {
4662 			reg = ib[idx + 3] * 4;
4663 			if (!si_vm_reg_valid(reg))
4664 				return -EINVAL;
4665 		}
4666 		break;
4667 	case PACKET3_WRITE_DATA:
4668 		if ((idx_value & 0xf00) == 0) {
4669 			start_reg = ib[idx + 1] * 4;
4670 			if (idx_value & 0x10000) {
4671 				if (!si_vm_reg_valid(start_reg))
4672 					return -EINVAL;
4673 			} else {
4674 				for (i = 0; i < (pkt->count - 2); i++) {
4675 					reg = start_reg + (4 * i);
4676 					if (!si_vm_reg_valid(reg))
4677 						return -EINVAL;
4678 				}
4679 			}
4680 		}
4681 		break;
4682 	case PACKET3_COND_WRITE:
4683 		if (idx_value & 0x100) {
4684 			reg = ib[idx + 5] * 4;
4685 			if (!si_vm_reg_valid(reg))
4686 				return -EINVAL;
4687 		}
4688 		break;
4689 	case PACKET3_COPY_DW:
4690 		if (idx_value & 0x2) {
4691 			reg = ib[idx + 3] * 4;
4692 			if (!si_vm_reg_valid(reg))
4693 				return -EINVAL;
4694 		}
4695 		break;
4696 	case PACKET3_CP_DMA:
4697 		r = si_vm_packet3_cp_dma_check(ib, idx);
4698 		if (r)
4699 			return r;
4700 		break;
4701 	default:
4702 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4703 		return -EINVAL;
4704 	}
4705 	return 0;
4706 }
4707 
4708 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4709 {
4710 	int ret = 0;
4711 	u32 idx = 0, i;
4712 	struct radeon_cs_packet pkt;
4713 
4714 	do {
4715 		pkt.idx = idx;
4716 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4717 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4718 		pkt.one_reg_wr = 0;
4719 		switch (pkt.type) {
4720 		case RADEON_PACKET_TYPE0:
4721 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4722 			ret = -EINVAL;
4723 			break;
4724 		case RADEON_PACKET_TYPE2:
4725 			idx += 1;
4726 			break;
4727 		case RADEON_PACKET_TYPE3:
4728 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4729 			if (ib->is_const_ib)
4730 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4731 			else {
4732 				switch (ib->ring) {
4733 				case RADEON_RING_TYPE_GFX_INDEX:
4734 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4735 					break;
4736 				case CAYMAN_RING_TYPE_CP1_INDEX:
4737 				case CAYMAN_RING_TYPE_CP2_INDEX:
4738 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4739 					break;
4740 				default:
4741 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4742 					ret = -EINVAL;
4743 					break;
4744 				}
4745 			}
4746 			idx += pkt.count + 2;
4747 			break;
4748 		default:
4749 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4750 			ret = -EINVAL;
4751 			break;
4752 		}
4753 		if (ret) {
4754 			for (i = 0; i < ib->length_dw; i++) {
4755 				if (i == idx)
4756 					printk("\t0x%08x <---\n", ib->ptr[i]);
4757 				else
4758 					printk("\t0x%08x\n", ib->ptr[i]);
4759 			}
4760 			break;
4761 		}
4762 	} while (idx < ib->length_dw);
4763 
4764 	return ret;
4765 }
4766 
4767 /*
4768  * vm
4769  */
4770 int si_vm_init(struct radeon_device *rdev)
4771 {
4772 	/* number of VMs */
4773 	rdev->vm_manager.nvm = 16;
4774 	/* base offset of vram pages */
4775 	rdev->vm_manager.vram_base_offset = 0;
4776 
4777 	return 0;
4778 }
4779 
4780 void si_vm_fini(struct radeon_device *rdev)
4781 {
4782 }
4783 
4784 /**
4785  * si_vm_decode_fault - print human readable fault info
4786  *
4787  * @rdev: radeon_device pointer
4788  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4789  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4790  *
4791  * Print human readable fault information (SI).
4792  */
4793 static void si_vm_decode_fault(struct radeon_device *rdev,
4794 			       u32 status, u32 addr)
4795 {
4796 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4797 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4798 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4799 	char *block;
4800 
4801 	if (rdev->family == CHIP_TAHITI) {
4802 		switch (mc_id) {
4803 		case 160:
4804 		case 144:
4805 		case 96:
4806 		case 80:
4807 		case 224:
4808 		case 208:
4809 		case 32:
4810 		case 16:
4811 			block = "CB";
4812 			break;
4813 		case 161:
4814 		case 145:
4815 		case 97:
4816 		case 81:
4817 		case 225:
4818 		case 209:
4819 		case 33:
4820 		case 17:
4821 			block = "CB_FMASK";
4822 			break;
4823 		case 162:
4824 		case 146:
4825 		case 98:
4826 		case 82:
4827 		case 226:
4828 		case 210:
4829 		case 34:
4830 		case 18:
4831 			block = "CB_CMASK";
4832 			break;
4833 		case 163:
4834 		case 147:
4835 		case 99:
4836 		case 83:
4837 		case 227:
4838 		case 211:
4839 		case 35:
4840 		case 19:
4841 			block = "CB_IMMED";
4842 			break;
4843 		case 164:
4844 		case 148:
4845 		case 100:
4846 		case 84:
4847 		case 228:
4848 		case 212:
4849 		case 36:
4850 		case 20:
4851 			block = "DB";
4852 			break;
4853 		case 165:
4854 		case 149:
4855 		case 101:
4856 		case 85:
4857 		case 229:
4858 		case 213:
4859 		case 37:
4860 		case 21:
4861 			block = "DB_HTILE";
4862 			break;
4863 		case 167:
4864 		case 151:
4865 		case 103:
4866 		case 87:
4867 		case 231:
4868 		case 215:
4869 		case 39:
4870 		case 23:
4871 			block = "DB_STEN";
4872 			break;
4873 		case 72:
4874 		case 68:
4875 		case 64:
4876 		case 8:
4877 		case 4:
4878 		case 0:
4879 		case 136:
4880 		case 132:
4881 		case 128:
4882 		case 200:
4883 		case 196:
4884 		case 192:
4885 			block = "TC";
4886 			break;
4887 		case 112:
4888 		case 48:
4889 			block = "CP";
4890 			break;
4891 		case 49:
4892 		case 177:
4893 		case 50:
4894 		case 178:
4895 			block = "SH";
4896 			break;
4897 		case 53:
4898 		case 190:
4899 			block = "VGT";
4900 			break;
4901 		case 117:
4902 			block = "IH";
4903 			break;
4904 		case 51:
4905 		case 115:
4906 			block = "RLC";
4907 			break;
4908 		case 119:
4909 		case 183:
4910 			block = "DMA0";
4911 			break;
4912 		case 61:
4913 			block = "DMA1";
4914 			break;
4915 		case 248:
4916 		case 120:
4917 			block = "HDP";
4918 			break;
4919 		default:
4920 			block = "unknown";
4921 			break;
4922 		}
4923 	} else {
4924 		switch (mc_id) {
4925 		case 32:
4926 		case 16:
4927 		case 96:
4928 		case 80:
4929 		case 160:
4930 		case 144:
4931 		case 224:
4932 		case 208:
4933 			block = "CB";
4934 			break;
4935 		case 33:
4936 		case 17:
4937 		case 97:
4938 		case 81:
4939 		case 161:
4940 		case 145:
4941 		case 225:
4942 		case 209:
4943 			block = "CB_FMASK";
4944 			break;
4945 		case 34:
4946 		case 18:
4947 		case 98:
4948 		case 82:
4949 		case 162:
4950 		case 146:
4951 		case 226:
4952 		case 210:
4953 			block = "CB_CMASK";
4954 			break;
4955 		case 35:
4956 		case 19:
4957 		case 99:
4958 		case 83:
4959 		case 163:
4960 		case 147:
4961 		case 227:
4962 		case 211:
4963 			block = "CB_IMMED";
4964 			break;
4965 		case 36:
4966 		case 20:
4967 		case 100:
4968 		case 84:
4969 		case 164:
4970 		case 148:
4971 		case 228:
4972 		case 212:
4973 			block = "DB";
4974 			break;
4975 		case 37:
4976 		case 21:
4977 		case 101:
4978 		case 85:
4979 		case 165:
4980 		case 149:
4981 		case 229:
4982 		case 213:
4983 			block = "DB_HTILE";
4984 			break;
4985 		case 39:
4986 		case 23:
4987 		case 103:
4988 		case 87:
4989 		case 167:
4990 		case 151:
4991 		case 231:
4992 		case 215:
4993 			block = "DB_STEN";
4994 			break;
4995 		case 72:
4996 		case 68:
4997 		case 8:
4998 		case 4:
4999 		case 136:
5000 		case 132:
5001 		case 200:
5002 		case 196:
5003 			block = "TC";
5004 			break;
5005 		case 112:
5006 		case 48:
5007 			block = "CP";
5008 			break;
5009 		case 49:
5010 		case 177:
5011 		case 50:
5012 		case 178:
5013 			block = "SH";
5014 			break;
5015 		case 53:
5016 			block = "VGT";
5017 			break;
5018 		case 117:
5019 			block = "IH";
5020 			break;
5021 		case 51:
5022 		case 115:
5023 			block = "RLC";
5024 			break;
5025 		case 119:
5026 		case 183:
5027 			block = "DMA0";
5028 			break;
5029 		case 61:
5030 			block = "DMA1";
5031 			break;
5032 		case 248:
5033 		case 120:
5034 			block = "HDP";
5035 			break;
5036 		default:
5037 			block = "unknown";
5038 			break;
5039 		}
5040 	}
5041 
5042 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
5043 	       protections, vmid, addr,
5044 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
5045 	       block, mc_id);
5046 }
5047 
5048 void si_vm_flush(struct radeon_device *rdev, struct radeon_ring *ring,
5049 		 unsigned vm_id, uint64_t pd_addr)
5050 {
5051 	/* write new base address */
5052 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5053 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5054 				 WRITE_DATA_DST_SEL(0)));
5055 
5056 	if (vm_id < 8) {
5057 		radeon_ring_write(ring,
5058 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm_id << 2)) >> 2);
5059 	} else {
5060 		radeon_ring_write(ring,
5061 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm_id - 8) << 2)) >> 2);
5062 	}
5063 	radeon_ring_write(ring, 0);
5064 	radeon_ring_write(ring, pd_addr >> 12);
5065 
5066 	/* flush hdp cache */
5067 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5068 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5069 				 WRITE_DATA_DST_SEL(0)));
5070 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
5071 	radeon_ring_write(ring, 0);
5072 	radeon_ring_write(ring, 0x1);
5073 
5074 	/* bits 0-15 are the VM contexts0-15 */
5075 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
5076 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
5077 				 WRITE_DATA_DST_SEL(0)));
5078 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5079 	radeon_ring_write(ring, 0);
5080 	radeon_ring_write(ring, 1 << vm_id);
5081 
5082 	/* wait for the invalidate to complete */
5083 	radeon_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
5084 	radeon_ring_write(ring, (WAIT_REG_MEM_FUNCTION(0) |  /* always */
5085 				 WAIT_REG_MEM_ENGINE(0))); /* me */
5086 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
5087 	radeon_ring_write(ring, 0);
5088 	radeon_ring_write(ring, 0); /* ref */
5089 	radeon_ring_write(ring, 0); /* mask */
5090 	radeon_ring_write(ring, 0x20); /* poll interval */
5091 
5092 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
5093 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
5094 	radeon_ring_write(ring, 0x0);
5095 }
5096 
5097 /*
5098  *  Power and clock gating
5099  */
5100 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
5101 {
5102 	int i;
5103 
5104 	for (i = 0; i < rdev->usec_timeout; i++) {
5105 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
5106 			break;
5107 		udelay(1);
5108 	}
5109 
5110 	for (i = 0; i < rdev->usec_timeout; i++) {
5111 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
5112 			break;
5113 		udelay(1);
5114 	}
5115 }
5116 
5117 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
5118 					 bool enable)
5119 {
5120 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
5121 	u32 mask;
5122 	int i;
5123 
5124 	if (enable)
5125 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5126 	else
5127 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5128 	WREG32(CP_INT_CNTL_RING0, tmp);
5129 
5130 	if (!enable) {
5131 		/* read a gfx register */
5132 		tmp = RREG32(DB_DEPTH_INFO);
5133 
5134 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
5135 		for (i = 0; i < rdev->usec_timeout; i++) {
5136 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
5137 				break;
5138 			udelay(1);
5139 		}
5140 	}
5141 }
5142 
5143 static void si_set_uvd_dcm(struct radeon_device *rdev,
5144 			   bool sw_mode)
5145 {
5146 	u32 tmp, tmp2;
5147 
5148 	tmp = RREG32(UVD_CGC_CTRL);
5149 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
5150 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
5151 
5152 	if (sw_mode) {
5153 		tmp &= ~0x7ffff800;
5154 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
5155 	} else {
5156 		tmp |= 0x7ffff800;
5157 		tmp2 = 0;
5158 	}
5159 
5160 	WREG32(UVD_CGC_CTRL, tmp);
5161 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
5162 }
5163 
5164 void si_init_uvd_internal_cg(struct radeon_device *rdev)
5165 {
5166 	bool hw_mode = true;
5167 
5168 	if (hw_mode) {
5169 		si_set_uvd_dcm(rdev, false);
5170 	} else {
5171 		u32 tmp = RREG32(UVD_CGC_CTRL);
5172 		tmp &= ~DCM;
5173 		WREG32(UVD_CGC_CTRL, tmp);
5174 	}
5175 }
5176 
5177 static u32 si_halt_rlc(struct radeon_device *rdev)
5178 {
5179 	u32 data, orig;
5180 
5181 	orig = data = RREG32(RLC_CNTL);
5182 
5183 	if (data & RLC_ENABLE) {
5184 		data &= ~RLC_ENABLE;
5185 		WREG32(RLC_CNTL, data);
5186 
5187 		si_wait_for_rlc_serdes(rdev);
5188 	}
5189 
5190 	return orig;
5191 }
5192 
5193 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
5194 {
5195 	u32 tmp;
5196 
5197 	tmp = RREG32(RLC_CNTL);
5198 	if (tmp != rlc)
5199 		WREG32(RLC_CNTL, rlc);
5200 }
5201 
5202 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
5203 {
5204 	u32 data, orig;
5205 
5206 	orig = data = RREG32(DMA_PG);
5207 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
5208 		data |= PG_CNTL_ENABLE;
5209 	else
5210 		data &= ~PG_CNTL_ENABLE;
5211 	if (orig != data)
5212 		WREG32(DMA_PG, data);
5213 }
5214 
5215 static void si_init_dma_pg(struct radeon_device *rdev)
5216 {
5217 	u32 tmp;
5218 
5219 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
5220 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
5221 
5222 	for (tmp = 0; tmp < 5; tmp++)
5223 		WREG32(DMA_PGFSM_WRITE, 0);
5224 }
5225 
5226 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
5227 			       bool enable)
5228 {
5229 	u32 tmp;
5230 
5231 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG)) {
5232 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
5233 		WREG32(RLC_TTOP_D, tmp);
5234 
5235 		tmp = RREG32(RLC_PG_CNTL);
5236 		tmp |= GFX_PG_ENABLE;
5237 		WREG32(RLC_PG_CNTL, tmp);
5238 
5239 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5240 		tmp |= AUTO_PG_EN;
5241 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5242 	} else {
5243 		tmp = RREG32(RLC_AUTO_PG_CTRL);
5244 		tmp &= ~AUTO_PG_EN;
5245 		WREG32(RLC_AUTO_PG_CTRL, tmp);
5246 
5247 		tmp = RREG32(DB_RENDER_CONTROL);
5248 	}
5249 }
5250 
5251 static void si_init_gfx_cgpg(struct radeon_device *rdev)
5252 {
5253 	u32 tmp;
5254 
5255 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5256 
5257 	tmp = RREG32(RLC_PG_CNTL);
5258 	tmp |= GFX_PG_SRC;
5259 	WREG32(RLC_PG_CNTL, tmp);
5260 
5261 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5262 
5263 	tmp = RREG32(RLC_AUTO_PG_CTRL);
5264 
5265 	tmp &= ~GRBM_REG_SGIT_MASK;
5266 	tmp |= GRBM_REG_SGIT(0x700);
5267 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
5268 	WREG32(RLC_AUTO_PG_CTRL, tmp);
5269 }
5270 
5271 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
5272 {
5273 	u32 mask = 0, tmp, tmp1;
5274 	int i;
5275 
5276 	si_select_se_sh(rdev, se, sh);
5277 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
5278 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
5279 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5280 
5281 	tmp &= 0xffff0000;
5282 
5283 	tmp |= tmp1;
5284 	tmp >>= 16;
5285 
5286 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
5287 		mask <<= 1;
5288 		mask |= 1;
5289 	}
5290 
5291 	return (~tmp) & mask;
5292 }
5293 
5294 static void si_init_ao_cu_mask(struct radeon_device *rdev)
5295 {
5296 	u32 i, j, k, active_cu_number = 0;
5297 	u32 mask, counter, cu_bitmap;
5298 	u32 tmp = 0;
5299 
5300 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
5301 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
5302 			mask = 1;
5303 			cu_bitmap = 0;
5304 			counter  = 0;
5305 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
5306 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
5307 					if (counter < 2)
5308 						cu_bitmap |= mask;
5309 					counter++;
5310 				}
5311 				mask <<= 1;
5312 			}
5313 
5314 			active_cu_number += counter;
5315 			tmp |= (cu_bitmap << (i * 16 + j * 8));
5316 		}
5317 	}
5318 
5319 	WREG32(RLC_PG_AO_CU_MASK, tmp);
5320 
5321 	tmp = RREG32(RLC_MAX_PG_CU);
5322 	tmp &= ~MAX_PU_CU_MASK;
5323 	tmp |= MAX_PU_CU(active_cu_number);
5324 	WREG32(RLC_MAX_PG_CU, tmp);
5325 }
5326 
5327 static void si_enable_cgcg(struct radeon_device *rdev,
5328 			   bool enable)
5329 {
5330 	u32 data, orig, tmp;
5331 
5332 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
5333 
5334 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
5335 		si_enable_gui_idle_interrupt(rdev, true);
5336 
5337 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
5338 
5339 		tmp = si_halt_rlc(rdev);
5340 
5341 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5342 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5343 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5344 
5345 		si_wait_for_rlc_serdes(rdev);
5346 
5347 		si_update_rlc(rdev, tmp);
5348 
5349 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5350 
5351 		data |= CGCG_EN | CGLS_EN;
5352 	} else {
5353 		si_enable_gui_idle_interrupt(rdev, false);
5354 
5355 		RREG32(CB_CGTT_SCLK_CTRL);
5356 		RREG32(CB_CGTT_SCLK_CTRL);
5357 		RREG32(CB_CGTT_SCLK_CTRL);
5358 		RREG32(CB_CGTT_SCLK_CTRL);
5359 
5360 		data &= ~(CGCG_EN | CGLS_EN);
5361 	}
5362 
5363 	if (orig != data)
5364 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5365 }
5366 
5367 static void si_enable_mgcg(struct radeon_device *rdev,
5368 			   bool enable)
5369 {
5370 	u32 data, orig, tmp = 0;
5371 
5372 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5373 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5374 		data = 0x96940200;
5375 		if (orig != data)
5376 			WREG32(CGTS_SM_CTRL_REG, data);
5377 
5378 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5379 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5380 			data |= CP_MEM_LS_EN;
5381 			if (orig != data)
5382 				WREG32(CP_MEM_SLP_CNTL, data);
5383 		}
5384 
5385 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5386 		data &= 0xffffffc0;
5387 		if (orig != data)
5388 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5389 
5390 		tmp = si_halt_rlc(rdev);
5391 
5392 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5393 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5394 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5395 
5396 		si_update_rlc(rdev, tmp);
5397 	} else {
5398 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5399 		data |= 0x00000003;
5400 		if (orig != data)
5401 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5402 
5403 		data = RREG32(CP_MEM_SLP_CNTL);
5404 		if (data & CP_MEM_LS_EN) {
5405 			data &= ~CP_MEM_LS_EN;
5406 			WREG32(CP_MEM_SLP_CNTL, data);
5407 		}
5408 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5409 		data |= LS_OVERRIDE | OVERRIDE;
5410 		if (orig != data)
5411 			WREG32(CGTS_SM_CTRL_REG, data);
5412 
5413 		tmp = si_halt_rlc(rdev);
5414 
5415 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5416 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5417 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5418 
5419 		si_update_rlc(rdev, tmp);
5420 	}
5421 }
5422 
5423 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5424 			       bool enable)
5425 {
5426 	u32 orig, data, tmp;
5427 
5428 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5429 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5430 		tmp |= 0x3fff;
5431 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5432 
5433 		orig = data = RREG32(UVD_CGC_CTRL);
5434 		data |= DCM;
5435 		if (orig != data)
5436 			WREG32(UVD_CGC_CTRL, data);
5437 
5438 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5439 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5440 	} else {
5441 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5442 		tmp &= ~0x3fff;
5443 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5444 
5445 		orig = data = RREG32(UVD_CGC_CTRL);
5446 		data &= ~DCM;
5447 		if (orig != data)
5448 			WREG32(UVD_CGC_CTRL, data);
5449 
5450 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5451 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5452 	}
5453 }
5454 
5455 static const u32 mc_cg_registers[] =
5456 {
5457 	MC_HUB_MISC_HUB_CG,
5458 	MC_HUB_MISC_SIP_CG,
5459 	MC_HUB_MISC_VM_CG,
5460 	MC_XPB_CLK_GAT,
5461 	ATC_MISC_CG,
5462 	MC_CITF_MISC_WR_CG,
5463 	MC_CITF_MISC_RD_CG,
5464 	MC_CITF_MISC_VM_CG,
5465 	VM_L2_CG,
5466 };
5467 
5468 static void si_enable_mc_ls(struct radeon_device *rdev,
5469 			    bool enable)
5470 {
5471 	int i;
5472 	u32 orig, data;
5473 
5474 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5475 		orig = data = RREG32(mc_cg_registers[i]);
5476 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5477 			data |= MC_LS_ENABLE;
5478 		else
5479 			data &= ~MC_LS_ENABLE;
5480 		if (data != orig)
5481 			WREG32(mc_cg_registers[i], data);
5482 	}
5483 }
5484 
5485 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5486 			       bool enable)
5487 {
5488 	int i;
5489 	u32 orig, data;
5490 
5491 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5492 		orig = data = RREG32(mc_cg_registers[i]);
5493 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5494 			data |= MC_CG_ENABLE;
5495 		else
5496 			data &= ~MC_CG_ENABLE;
5497 		if (data != orig)
5498 			WREG32(mc_cg_registers[i], data);
5499 	}
5500 }
5501 
5502 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5503 			       bool enable)
5504 {
5505 	u32 orig, data, offset;
5506 	int i;
5507 
5508 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5509 		for (i = 0; i < 2; i++) {
5510 			if (i == 0)
5511 				offset = DMA0_REGISTER_OFFSET;
5512 			else
5513 				offset = DMA1_REGISTER_OFFSET;
5514 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5515 			data &= ~MEM_POWER_OVERRIDE;
5516 			if (data != orig)
5517 				WREG32(DMA_POWER_CNTL + offset, data);
5518 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5519 		}
5520 	} else {
5521 		for (i = 0; i < 2; i++) {
5522 			if (i == 0)
5523 				offset = DMA0_REGISTER_OFFSET;
5524 			else
5525 				offset = DMA1_REGISTER_OFFSET;
5526 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5527 			data |= MEM_POWER_OVERRIDE;
5528 			if (data != orig)
5529 				WREG32(DMA_POWER_CNTL + offset, data);
5530 
5531 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5532 			data = 0xff000000;
5533 			if (data != orig)
5534 				WREG32(DMA_CLK_CTRL + offset, data);
5535 		}
5536 	}
5537 }
5538 
5539 static void si_enable_bif_mgls(struct radeon_device *rdev,
5540 			       bool enable)
5541 {
5542 	u32 orig, data;
5543 
5544 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5545 
5546 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5547 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5548 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5549 	else
5550 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5551 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5552 
5553 	if (orig != data)
5554 		WREG32_PCIE(PCIE_CNTL2, data);
5555 }
5556 
5557 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5558 			       bool enable)
5559 {
5560 	u32 orig, data;
5561 
5562 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5563 
5564 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5565 		data &= ~CLOCK_GATING_DIS;
5566 	else
5567 		data |= CLOCK_GATING_DIS;
5568 
5569 	if (orig != data)
5570 		WREG32(HDP_HOST_PATH_CNTL, data);
5571 }
5572 
5573 static void si_enable_hdp_ls(struct radeon_device *rdev,
5574 			     bool enable)
5575 {
5576 	u32 orig, data;
5577 
5578 	orig = data = RREG32(HDP_MEM_POWER_LS);
5579 
5580 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5581 		data |= HDP_LS_ENABLE;
5582 	else
5583 		data &= ~HDP_LS_ENABLE;
5584 
5585 	if (orig != data)
5586 		WREG32(HDP_MEM_POWER_LS, data);
5587 }
5588 
5589 static void si_update_cg(struct radeon_device *rdev,
5590 			 u32 block, bool enable)
5591 {
5592 	if (block & RADEON_CG_BLOCK_GFX) {
5593 		si_enable_gui_idle_interrupt(rdev, false);
5594 		/* order matters! */
5595 		if (enable) {
5596 			si_enable_mgcg(rdev, true);
5597 			si_enable_cgcg(rdev, true);
5598 		} else {
5599 			si_enable_cgcg(rdev, false);
5600 			si_enable_mgcg(rdev, false);
5601 		}
5602 		si_enable_gui_idle_interrupt(rdev, true);
5603 	}
5604 
5605 	if (block & RADEON_CG_BLOCK_MC) {
5606 		si_enable_mc_mgcg(rdev, enable);
5607 		si_enable_mc_ls(rdev, enable);
5608 	}
5609 
5610 	if (block & RADEON_CG_BLOCK_SDMA) {
5611 		si_enable_dma_mgcg(rdev, enable);
5612 	}
5613 
5614 	if (block & RADEON_CG_BLOCK_BIF) {
5615 		si_enable_bif_mgls(rdev, enable);
5616 	}
5617 
5618 	if (block & RADEON_CG_BLOCK_UVD) {
5619 		if (rdev->has_uvd) {
5620 			si_enable_uvd_mgcg(rdev, enable);
5621 		}
5622 	}
5623 
5624 	if (block & RADEON_CG_BLOCK_HDP) {
5625 		si_enable_hdp_mgcg(rdev, enable);
5626 		si_enable_hdp_ls(rdev, enable);
5627 	}
5628 }
5629 
5630 static void si_init_cg(struct radeon_device *rdev)
5631 {
5632 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5633 			    RADEON_CG_BLOCK_MC |
5634 			    RADEON_CG_BLOCK_SDMA |
5635 			    RADEON_CG_BLOCK_BIF |
5636 			    RADEON_CG_BLOCK_HDP), true);
5637 	if (rdev->has_uvd) {
5638 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5639 		si_init_uvd_internal_cg(rdev);
5640 	}
5641 }
5642 
5643 static void si_fini_cg(struct radeon_device *rdev)
5644 {
5645 	if (rdev->has_uvd) {
5646 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5647 	}
5648 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5649 			    RADEON_CG_BLOCK_MC |
5650 			    RADEON_CG_BLOCK_SDMA |
5651 			    RADEON_CG_BLOCK_BIF |
5652 			    RADEON_CG_BLOCK_HDP), false);
5653 }
5654 
5655 u32 si_get_csb_size(struct radeon_device *rdev)
5656 {
5657 	u32 count = 0;
5658 	const struct cs_section_def *sect = NULL;
5659 	const struct cs_extent_def *ext = NULL;
5660 
5661 	if (rdev->rlc.cs_data == NULL)
5662 		return 0;
5663 
5664 	/* begin clear state */
5665 	count += 2;
5666 	/* context control state */
5667 	count += 3;
5668 
5669 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5670 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5671 			if (sect->id == SECT_CONTEXT)
5672 				count += 2 + ext->reg_count;
5673 			else
5674 				return 0;
5675 		}
5676 	}
5677 	/* pa_sc_raster_config */
5678 	count += 3;
5679 	/* end clear state */
5680 	count += 2;
5681 	/* clear state */
5682 	count += 2;
5683 
5684 	return count;
5685 }
5686 
5687 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5688 {
5689 	u32 count = 0, i;
5690 	const struct cs_section_def *sect = NULL;
5691 	const struct cs_extent_def *ext = NULL;
5692 
5693 	if (rdev->rlc.cs_data == NULL)
5694 		return;
5695 	if (buffer == NULL)
5696 		return;
5697 
5698 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5699 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
5700 
5701 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CONTEXT_CONTROL, 1));
5702 	buffer[count++] = cpu_to_le32(0x80000000);
5703 	buffer[count++] = cpu_to_le32(0x80000000);
5704 
5705 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5706 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5707 			if (sect->id == SECT_CONTEXT) {
5708 				buffer[count++] =
5709 					cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count));
5710 				buffer[count++] = cpu_to_le32(ext->reg_index - 0xa000);
5711 				for (i = 0; i < ext->reg_count; i++)
5712 					buffer[count++] = cpu_to_le32(ext->extent[i]);
5713 			} else {
5714 				return;
5715 			}
5716 		}
5717 	}
5718 
5719 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_SET_CONTEXT_REG, 1));
5720 	buffer[count++] = cpu_to_le32(PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START);
5721 	switch (rdev->family) {
5722 	case CHIP_TAHITI:
5723 	case CHIP_PITCAIRN:
5724 		buffer[count++] = cpu_to_le32(0x2a00126a);
5725 		break;
5726 	case CHIP_VERDE:
5727 		buffer[count++] = cpu_to_le32(0x0000124a);
5728 		break;
5729 	case CHIP_OLAND:
5730 		buffer[count++] = cpu_to_le32(0x00000082);
5731 		break;
5732 	case CHIP_HAINAN:
5733 		buffer[count++] = cpu_to_le32(0x00000000);
5734 		break;
5735 	default:
5736 		buffer[count++] = cpu_to_le32(0x00000000);
5737 		break;
5738 	}
5739 
5740 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_PREAMBLE_CNTL, 0));
5741 	buffer[count++] = cpu_to_le32(PACKET3_PREAMBLE_END_CLEAR_STATE);
5742 
5743 	buffer[count++] = cpu_to_le32(PACKET3(PACKET3_CLEAR_STATE, 0));
5744 	buffer[count++] = cpu_to_le32(0);
5745 }
5746 
5747 static void si_init_pg(struct radeon_device *rdev)
5748 {
5749 	if (rdev->pg_flags) {
5750 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5751 			si_init_dma_pg(rdev);
5752 		}
5753 		si_init_ao_cu_mask(rdev);
5754 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_PG) {
5755 			si_init_gfx_cgpg(rdev);
5756 		} else {
5757 			WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5758 			WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5759 		}
5760 		si_enable_dma_pg(rdev, true);
5761 		si_enable_gfx_cgpg(rdev, true);
5762 	} else {
5763 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5764 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5765 	}
5766 }
5767 
5768 static void si_fini_pg(struct radeon_device *rdev)
5769 {
5770 	if (rdev->pg_flags) {
5771 		si_enable_dma_pg(rdev, false);
5772 		si_enable_gfx_cgpg(rdev, false);
5773 	}
5774 }
5775 
5776 /*
5777  * RLC
5778  */
5779 void si_rlc_reset(struct radeon_device *rdev)
5780 {
5781 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5782 
5783 	tmp |= SOFT_RESET_RLC;
5784 	WREG32(GRBM_SOFT_RESET, tmp);
5785 	udelay(50);
5786 	tmp &= ~SOFT_RESET_RLC;
5787 	WREG32(GRBM_SOFT_RESET, tmp);
5788 	udelay(50);
5789 }
5790 
5791 static void si_rlc_stop(struct radeon_device *rdev)
5792 {
5793 	WREG32(RLC_CNTL, 0);
5794 
5795 	si_enable_gui_idle_interrupt(rdev, false);
5796 
5797 	si_wait_for_rlc_serdes(rdev);
5798 }
5799 
5800 static void si_rlc_start(struct radeon_device *rdev)
5801 {
5802 	WREG32(RLC_CNTL, RLC_ENABLE);
5803 
5804 	si_enable_gui_idle_interrupt(rdev, true);
5805 
5806 	udelay(50);
5807 }
5808 
5809 static bool si_lbpw_supported(struct radeon_device *rdev)
5810 {
5811 	u32 tmp;
5812 
5813 	/* Enable LBPW only for DDR3 */
5814 	tmp = RREG32(MC_SEQ_MISC0);
5815 	if ((tmp & 0xF0000000) == 0xB0000000)
5816 		return true;
5817 	return false;
5818 }
5819 
5820 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5821 {
5822 	u32 tmp;
5823 
5824 	tmp = RREG32(RLC_LB_CNTL);
5825 	if (enable)
5826 		tmp |= LOAD_BALANCE_ENABLE;
5827 	else
5828 		tmp &= ~LOAD_BALANCE_ENABLE;
5829 	WREG32(RLC_LB_CNTL, tmp);
5830 
5831 	if (!enable) {
5832 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5833 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5834 	}
5835 }
5836 
5837 static int si_rlc_resume(struct radeon_device *rdev)
5838 {
5839 	u32 i;
5840 
5841 	if (!rdev->rlc_fw)
5842 		return -EINVAL;
5843 
5844 	si_rlc_stop(rdev);
5845 
5846 	si_rlc_reset(rdev);
5847 
5848 	si_init_pg(rdev);
5849 
5850 	si_init_cg(rdev);
5851 
5852 	WREG32(RLC_RL_BASE, 0);
5853 	WREG32(RLC_RL_SIZE, 0);
5854 	WREG32(RLC_LB_CNTL, 0);
5855 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5856 	WREG32(RLC_LB_CNTR_INIT, 0);
5857 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5858 
5859 	WREG32(RLC_MC_CNTL, 0);
5860 	WREG32(RLC_UCODE_CNTL, 0);
5861 
5862 	if (rdev->new_fw) {
5863 		const struct rlc_firmware_header_v1_0 *hdr =
5864 			(const struct rlc_firmware_header_v1_0 *)rdev->rlc_fw->data;
5865 		u32 fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4;
5866 		const __le32 *fw_data = (const __le32 *)
5867 			(rdev->rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
5868 
5869 		radeon_ucode_print_rlc_hdr(&hdr->header);
5870 
5871 		for (i = 0; i < fw_size; i++) {
5872 			WREG32(RLC_UCODE_ADDR, i);
5873 			WREG32(RLC_UCODE_DATA, le32_to_cpup(fw_data++));
5874 		}
5875 	} else {
5876 		const __be32 *fw_data =
5877 			(const __be32 *)rdev->rlc_fw->data;
5878 		for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5879 			WREG32(RLC_UCODE_ADDR, i);
5880 			WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5881 		}
5882 	}
5883 	WREG32(RLC_UCODE_ADDR, 0);
5884 
5885 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5886 
5887 	si_rlc_start(rdev);
5888 
5889 	return 0;
5890 }
5891 
5892 static void si_enable_interrupts(struct radeon_device *rdev)
5893 {
5894 	u32 ih_cntl = RREG32(IH_CNTL);
5895 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5896 
5897 	ih_cntl |= ENABLE_INTR;
5898 	ih_rb_cntl |= IH_RB_ENABLE;
5899 	WREG32(IH_CNTL, ih_cntl);
5900 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5901 	rdev->ih.enabled = true;
5902 }
5903 
5904 static void si_disable_interrupts(struct radeon_device *rdev)
5905 {
5906 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5907 	u32 ih_cntl = RREG32(IH_CNTL);
5908 
5909 	ih_rb_cntl &= ~IH_RB_ENABLE;
5910 	ih_cntl &= ~ENABLE_INTR;
5911 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5912 	WREG32(IH_CNTL, ih_cntl);
5913 	/* set rptr, wptr to 0 */
5914 	WREG32(IH_RB_RPTR, 0);
5915 	WREG32(IH_RB_WPTR, 0);
5916 	rdev->ih.enabled = false;
5917 	rdev->ih.rptr = 0;
5918 }
5919 
5920 static void si_disable_interrupt_state(struct radeon_device *rdev)
5921 {
5922 	u32 tmp;
5923 
5924 	tmp = RREG32(CP_INT_CNTL_RING0) &
5925 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5926 	WREG32(CP_INT_CNTL_RING0, tmp);
5927 	WREG32(CP_INT_CNTL_RING1, 0);
5928 	WREG32(CP_INT_CNTL_RING2, 0);
5929 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5930 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5931 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5932 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5933 	WREG32(GRBM_INT_CNTL, 0);
5934 	WREG32(SRBM_INT_CNTL, 0);
5935 	if (rdev->num_crtc >= 2) {
5936 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5937 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5938 	}
5939 	if (rdev->num_crtc >= 4) {
5940 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5941 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5942 	}
5943 	if (rdev->num_crtc >= 6) {
5944 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5945 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5946 	}
5947 
5948 	if (rdev->num_crtc >= 2) {
5949 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5950 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5951 	}
5952 	if (rdev->num_crtc >= 4) {
5953 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5954 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5955 	}
5956 	if (rdev->num_crtc >= 6) {
5957 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5958 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5959 	}
5960 
5961 	if (!ASIC_IS_NODCE(rdev)) {
5962 		WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
5963 
5964 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5965 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5966 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5967 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5968 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5969 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5970 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5971 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5972 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5973 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5974 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5975 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5976 	}
5977 }
5978 
5979 static int si_irq_init(struct radeon_device *rdev)
5980 {
5981 	int ret = 0;
5982 	int rb_bufsz;
5983 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5984 
5985 	/* allocate ring */
5986 	ret = r600_ih_ring_alloc(rdev);
5987 	if (ret)
5988 		return ret;
5989 
5990 	/* disable irqs */
5991 	si_disable_interrupts(rdev);
5992 
5993 	/* init rlc */
5994 	ret = si_rlc_resume(rdev);
5995 	if (ret) {
5996 		r600_ih_ring_fini(rdev);
5997 		return ret;
5998 	}
5999 
6000 	/* setup interrupt control */
6001 	/* set dummy read address to ring address */
6002 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
6003 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
6004 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
6005 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
6006 	 */
6007 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
6008 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
6009 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
6010 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
6011 
6012 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
6013 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
6014 
6015 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
6016 		      IH_WPTR_OVERFLOW_CLEAR |
6017 		      (rb_bufsz << 1));
6018 
6019 	if (rdev->wb.enabled)
6020 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
6021 
6022 	/* set the writeback address whether it's enabled or not */
6023 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
6024 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
6025 
6026 	WREG32(IH_RB_CNTL, ih_rb_cntl);
6027 
6028 	/* set rptr, wptr to 0 */
6029 	WREG32(IH_RB_RPTR, 0);
6030 	WREG32(IH_RB_WPTR, 0);
6031 
6032 	/* Default settings for IH_CNTL (disabled at first) */
6033 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
6034 	/* RPTR_REARM only works if msi's are enabled */
6035 	if (rdev->msi_enabled)
6036 		ih_cntl |= RPTR_REARM;
6037 	WREG32(IH_CNTL, ih_cntl);
6038 
6039 	/* force the active interrupt state to all disabled */
6040 	si_disable_interrupt_state(rdev);
6041 
6042 	pci_set_master(rdev->pdev);
6043 
6044 	/* enable irqs */
6045 	si_enable_interrupts(rdev);
6046 
6047 	return ret;
6048 }
6049 
6050 int si_irq_set(struct radeon_device *rdev)
6051 {
6052 	u32 cp_int_cntl;
6053 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
6054 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
6055 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
6056 	u32 grbm_int_cntl = 0;
6057 	u32 dma_cntl, dma_cntl1;
6058 	u32 thermal_int = 0;
6059 
6060 	if (!rdev->irq.installed) {
6061 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
6062 		return -EINVAL;
6063 	}
6064 	/* don't enable anything if the ih is disabled */
6065 	if (!rdev->ih.enabled) {
6066 		si_disable_interrupts(rdev);
6067 		/* force the active interrupt state to all disabled */
6068 		si_disable_interrupt_state(rdev);
6069 		return 0;
6070 	}
6071 
6072 	cp_int_cntl = RREG32(CP_INT_CNTL_RING0) &
6073 		(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
6074 
6075 	if (!ASIC_IS_NODCE(rdev)) {
6076 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6077 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6078 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6079 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6080 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6081 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~(DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN);
6082 	}
6083 
6084 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
6085 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
6086 
6087 	thermal_int = RREG32(CG_THERMAL_INT) &
6088 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
6089 
6090 	/* enable CP interrupts on all rings */
6091 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
6092 		DRM_DEBUG("si_irq_set: sw int gfx\n");
6093 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
6094 	}
6095 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
6096 		DRM_DEBUG("si_irq_set: sw int cp1\n");
6097 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
6098 	}
6099 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
6100 		DRM_DEBUG("si_irq_set: sw int cp2\n");
6101 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
6102 	}
6103 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
6104 		DRM_DEBUG("si_irq_set: sw int dma\n");
6105 		dma_cntl |= TRAP_ENABLE;
6106 	}
6107 
6108 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
6109 		DRM_DEBUG("si_irq_set: sw int dma1\n");
6110 		dma_cntl1 |= TRAP_ENABLE;
6111 	}
6112 	if (rdev->irq.crtc_vblank_int[0] ||
6113 	    atomic_read(&rdev->irq.pflip[0])) {
6114 		DRM_DEBUG("si_irq_set: vblank 0\n");
6115 		crtc1 |= VBLANK_INT_MASK;
6116 	}
6117 	if (rdev->irq.crtc_vblank_int[1] ||
6118 	    atomic_read(&rdev->irq.pflip[1])) {
6119 		DRM_DEBUG("si_irq_set: vblank 1\n");
6120 		crtc2 |= VBLANK_INT_MASK;
6121 	}
6122 	if (rdev->irq.crtc_vblank_int[2] ||
6123 	    atomic_read(&rdev->irq.pflip[2])) {
6124 		DRM_DEBUG("si_irq_set: vblank 2\n");
6125 		crtc3 |= VBLANK_INT_MASK;
6126 	}
6127 	if (rdev->irq.crtc_vblank_int[3] ||
6128 	    atomic_read(&rdev->irq.pflip[3])) {
6129 		DRM_DEBUG("si_irq_set: vblank 3\n");
6130 		crtc4 |= VBLANK_INT_MASK;
6131 	}
6132 	if (rdev->irq.crtc_vblank_int[4] ||
6133 	    atomic_read(&rdev->irq.pflip[4])) {
6134 		DRM_DEBUG("si_irq_set: vblank 4\n");
6135 		crtc5 |= VBLANK_INT_MASK;
6136 	}
6137 	if (rdev->irq.crtc_vblank_int[5] ||
6138 	    atomic_read(&rdev->irq.pflip[5])) {
6139 		DRM_DEBUG("si_irq_set: vblank 5\n");
6140 		crtc6 |= VBLANK_INT_MASK;
6141 	}
6142 	if (rdev->irq.hpd[0]) {
6143 		DRM_DEBUG("si_irq_set: hpd 1\n");
6144 		hpd1 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6145 	}
6146 	if (rdev->irq.hpd[1]) {
6147 		DRM_DEBUG("si_irq_set: hpd 2\n");
6148 		hpd2 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6149 	}
6150 	if (rdev->irq.hpd[2]) {
6151 		DRM_DEBUG("si_irq_set: hpd 3\n");
6152 		hpd3 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6153 	}
6154 	if (rdev->irq.hpd[3]) {
6155 		DRM_DEBUG("si_irq_set: hpd 4\n");
6156 		hpd4 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6157 	}
6158 	if (rdev->irq.hpd[4]) {
6159 		DRM_DEBUG("si_irq_set: hpd 5\n");
6160 		hpd5 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6161 	}
6162 	if (rdev->irq.hpd[5]) {
6163 		DRM_DEBUG("si_irq_set: hpd 6\n");
6164 		hpd6 |= DC_HPDx_INT_EN | DC_HPDx_RX_INT_EN;
6165 	}
6166 
6167 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
6168 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
6169 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
6170 
6171 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
6172 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
6173 
6174 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
6175 
6176 	if (rdev->irq.dpm_thermal) {
6177 		DRM_DEBUG("dpm thermal\n");
6178 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
6179 	}
6180 
6181 	if (rdev->num_crtc >= 2) {
6182 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
6183 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
6184 	}
6185 	if (rdev->num_crtc >= 4) {
6186 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
6187 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
6188 	}
6189 	if (rdev->num_crtc >= 6) {
6190 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
6191 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
6192 	}
6193 
6194 	if (rdev->num_crtc >= 2) {
6195 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET,
6196 		       GRPH_PFLIP_INT_MASK);
6197 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET,
6198 		       GRPH_PFLIP_INT_MASK);
6199 	}
6200 	if (rdev->num_crtc >= 4) {
6201 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET,
6202 		       GRPH_PFLIP_INT_MASK);
6203 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET,
6204 		       GRPH_PFLIP_INT_MASK);
6205 	}
6206 	if (rdev->num_crtc >= 6) {
6207 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET,
6208 		       GRPH_PFLIP_INT_MASK);
6209 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET,
6210 		       GRPH_PFLIP_INT_MASK);
6211 	}
6212 
6213 	if (!ASIC_IS_NODCE(rdev)) {
6214 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
6215 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
6216 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
6217 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
6218 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
6219 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
6220 	}
6221 
6222 	WREG32(CG_THERMAL_INT, thermal_int);
6223 
6224 	/* posting read */
6225 	RREG32(SRBM_STATUS);
6226 
6227 	return 0;
6228 }
6229 
6230 static inline void si_irq_ack(struct radeon_device *rdev)
6231 {
6232 	u32 tmp;
6233 
6234 	if (ASIC_IS_NODCE(rdev))
6235 		return;
6236 
6237 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
6238 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
6239 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
6240 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
6241 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
6242 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
6243 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
6244 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
6245 	if (rdev->num_crtc >= 4) {
6246 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
6247 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
6248 	}
6249 	if (rdev->num_crtc >= 6) {
6250 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
6251 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
6252 	}
6253 
6254 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
6255 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6256 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
6257 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6258 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
6259 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
6260 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
6261 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
6262 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
6263 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
6264 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
6265 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
6266 
6267 	if (rdev->num_crtc >= 4) {
6268 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
6269 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6270 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
6271 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6272 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
6273 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
6274 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
6275 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
6276 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
6277 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
6278 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
6279 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
6280 	}
6281 
6282 	if (rdev->num_crtc >= 6) {
6283 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
6284 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6285 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
6286 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
6287 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
6288 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
6289 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
6290 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
6291 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
6292 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
6293 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
6294 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
6295 	}
6296 
6297 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6298 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6299 		tmp |= DC_HPDx_INT_ACK;
6300 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6301 	}
6302 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6303 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6304 		tmp |= DC_HPDx_INT_ACK;
6305 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6306 	}
6307 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6308 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6309 		tmp |= DC_HPDx_INT_ACK;
6310 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6311 	}
6312 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6313 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6314 		tmp |= DC_HPDx_INT_ACK;
6315 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6316 	}
6317 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6318 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6319 		tmp |= DC_HPDx_INT_ACK;
6320 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6321 	}
6322 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6323 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6324 		tmp |= DC_HPDx_INT_ACK;
6325 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6326 	}
6327 
6328 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT) {
6329 		tmp = RREG32(DC_HPD1_INT_CONTROL);
6330 		tmp |= DC_HPDx_RX_INT_ACK;
6331 		WREG32(DC_HPD1_INT_CONTROL, tmp);
6332 	}
6333 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT) {
6334 		tmp = RREG32(DC_HPD2_INT_CONTROL);
6335 		tmp |= DC_HPDx_RX_INT_ACK;
6336 		WREG32(DC_HPD2_INT_CONTROL, tmp);
6337 	}
6338 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT) {
6339 		tmp = RREG32(DC_HPD3_INT_CONTROL);
6340 		tmp |= DC_HPDx_RX_INT_ACK;
6341 		WREG32(DC_HPD3_INT_CONTROL, tmp);
6342 	}
6343 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT) {
6344 		tmp = RREG32(DC_HPD4_INT_CONTROL);
6345 		tmp |= DC_HPDx_RX_INT_ACK;
6346 		WREG32(DC_HPD4_INT_CONTROL, tmp);
6347 	}
6348 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT) {
6349 		tmp = RREG32(DC_HPD5_INT_CONTROL);
6350 		tmp |= DC_HPDx_RX_INT_ACK;
6351 		WREG32(DC_HPD5_INT_CONTROL, tmp);
6352 	}
6353 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT) {
6354 		tmp = RREG32(DC_HPD6_INT_CONTROL);
6355 		tmp |= DC_HPDx_RX_INT_ACK;
6356 		WREG32(DC_HPD6_INT_CONTROL, tmp);
6357 	}
6358 }
6359 
6360 static void si_irq_disable(struct radeon_device *rdev)
6361 {
6362 	si_disable_interrupts(rdev);
6363 	/* Wait and acknowledge irq */
6364 	mdelay(1);
6365 	si_irq_ack(rdev);
6366 	si_disable_interrupt_state(rdev);
6367 }
6368 
6369 static void si_irq_suspend(struct radeon_device *rdev)
6370 {
6371 	si_irq_disable(rdev);
6372 	si_rlc_stop(rdev);
6373 }
6374 
6375 static void si_irq_fini(struct radeon_device *rdev)
6376 {
6377 	si_irq_suspend(rdev);
6378 	r600_ih_ring_fini(rdev);
6379 }
6380 
6381 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
6382 {
6383 	u32 wptr, tmp;
6384 
6385 	if (rdev->wb.enabled)
6386 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
6387 	else
6388 		wptr = RREG32(IH_RB_WPTR);
6389 
6390 	if (wptr & RB_OVERFLOW) {
6391 		wptr &= ~RB_OVERFLOW;
6392 		/* When a ring buffer overflow happen start parsing interrupt
6393 		 * from the last not overwritten vector (wptr + 16). Hopefully
6394 		 * this should allow us to catchup.
6395 		 */
6396 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
6397 			 wptr, rdev->ih.rptr, (wptr + 16) & rdev->ih.ptr_mask);
6398 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
6399 		tmp = RREG32(IH_RB_CNTL);
6400 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
6401 		WREG32(IH_RB_CNTL, tmp);
6402 	}
6403 	return (wptr & rdev->ih.ptr_mask);
6404 }
6405 
6406 /*        SI IV Ring
6407  * Each IV ring entry is 128 bits:
6408  * [7:0]    - interrupt source id
6409  * [31:8]   - reserved
6410  * [59:32]  - interrupt source data
6411  * [63:60]  - reserved
6412  * [71:64]  - RINGID
6413  * [79:72]  - VMID
6414  * [127:80] - reserved
6415  */
6416 int si_irq_process(struct radeon_device *rdev)
6417 {
6418 	u32 wptr;
6419 	u32 rptr;
6420 	u32 src_id, src_data, ring_id;
6421 	u32 ring_index;
6422 	bool queue_hotplug = false;
6423 	bool queue_dp = false;
6424 	bool queue_thermal = false;
6425 	u32 status, addr;
6426 
6427 	if (!rdev->ih.enabled || rdev->shutdown)
6428 		return IRQ_NONE;
6429 
6430 	wptr = si_get_ih_wptr(rdev);
6431 
6432 restart_ih:
6433 	/* is somebody else already processing irqs? */
6434 	if (atomic_xchg(&rdev->ih.lock, 1))
6435 		return IRQ_NONE;
6436 
6437 	rptr = rdev->ih.rptr;
6438 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6439 
6440 	/* Order reading of wptr vs. reading of IH ring data */
6441 	rmb();
6442 
6443 	/* display interrupts */
6444 	si_irq_ack(rdev);
6445 
6446 	while (rptr != wptr) {
6447 		/* wptr/rptr are in bytes! */
6448 		ring_index = rptr / 4;
6449 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6450 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6451 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6452 
6453 		switch (src_id) {
6454 		case 1: /* D1 vblank/vline */
6455 			switch (src_data) {
6456 			case 0: /* D1 vblank */
6457 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT))
6458 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6459 
6460 				if (rdev->irq.crtc_vblank_int[0]) {
6461 					drm_handle_vblank(rdev->ddev, 0);
6462 					rdev->pm.vblank_sync = true;
6463 					wake_up(&rdev->irq.vblank_queue);
6464 				}
6465 				if (atomic_read(&rdev->irq.pflip[0]))
6466 					radeon_crtc_handle_vblank(rdev, 0);
6467 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6468 				DRM_DEBUG("IH: D1 vblank\n");
6469 
6470 				break;
6471 			case 1: /* D1 vline */
6472 				if (!(rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT))
6473 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6474 
6475 				rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6476 				DRM_DEBUG("IH: D1 vline\n");
6477 
6478 				break;
6479 			default:
6480 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6481 				break;
6482 			}
6483 			break;
6484 		case 2: /* D2 vblank/vline */
6485 			switch (src_data) {
6486 			case 0: /* D2 vblank */
6487 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT))
6488 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6489 
6490 				if (rdev->irq.crtc_vblank_int[1]) {
6491 					drm_handle_vblank(rdev->ddev, 1);
6492 					rdev->pm.vblank_sync = true;
6493 					wake_up(&rdev->irq.vblank_queue);
6494 				}
6495 				if (atomic_read(&rdev->irq.pflip[1]))
6496 					radeon_crtc_handle_vblank(rdev, 1);
6497 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6498 				DRM_DEBUG("IH: D2 vblank\n");
6499 
6500 				break;
6501 			case 1: /* D2 vline */
6502 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT))
6503 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6504 
6505 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6506 				DRM_DEBUG("IH: D2 vline\n");
6507 
6508 				break;
6509 			default:
6510 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6511 				break;
6512 			}
6513 			break;
6514 		case 3: /* D3 vblank/vline */
6515 			switch (src_data) {
6516 			case 0: /* D3 vblank */
6517 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT))
6518 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6519 
6520 				if (rdev->irq.crtc_vblank_int[2]) {
6521 					drm_handle_vblank(rdev->ddev, 2);
6522 					rdev->pm.vblank_sync = true;
6523 					wake_up(&rdev->irq.vblank_queue);
6524 				}
6525 				if (atomic_read(&rdev->irq.pflip[2]))
6526 					radeon_crtc_handle_vblank(rdev, 2);
6527 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6528 				DRM_DEBUG("IH: D3 vblank\n");
6529 
6530 				break;
6531 			case 1: /* D3 vline */
6532 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT))
6533 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6534 
6535 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6536 				DRM_DEBUG("IH: D3 vline\n");
6537 
6538 				break;
6539 			default:
6540 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6541 				break;
6542 			}
6543 			break;
6544 		case 4: /* D4 vblank/vline */
6545 			switch (src_data) {
6546 			case 0: /* D4 vblank */
6547 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT))
6548 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6549 
6550 				if (rdev->irq.crtc_vblank_int[3]) {
6551 					drm_handle_vblank(rdev->ddev, 3);
6552 					rdev->pm.vblank_sync = true;
6553 					wake_up(&rdev->irq.vblank_queue);
6554 				}
6555 				if (atomic_read(&rdev->irq.pflip[3]))
6556 					radeon_crtc_handle_vblank(rdev, 3);
6557 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6558 				DRM_DEBUG("IH: D4 vblank\n");
6559 
6560 				break;
6561 			case 1: /* D4 vline */
6562 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT))
6563 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6564 
6565 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6566 				DRM_DEBUG("IH: D4 vline\n");
6567 
6568 				break;
6569 			default:
6570 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6571 				break;
6572 			}
6573 			break;
6574 		case 5: /* D5 vblank/vline */
6575 			switch (src_data) {
6576 			case 0: /* D5 vblank */
6577 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT))
6578 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6579 
6580 				if (rdev->irq.crtc_vblank_int[4]) {
6581 					drm_handle_vblank(rdev->ddev, 4);
6582 					rdev->pm.vblank_sync = true;
6583 					wake_up(&rdev->irq.vblank_queue);
6584 				}
6585 				if (atomic_read(&rdev->irq.pflip[4]))
6586 					radeon_crtc_handle_vblank(rdev, 4);
6587 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6588 				DRM_DEBUG("IH: D5 vblank\n");
6589 
6590 				break;
6591 			case 1: /* D5 vline */
6592 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT))
6593 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6594 
6595 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6596 				DRM_DEBUG("IH: D5 vline\n");
6597 
6598 				break;
6599 			default:
6600 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6601 				break;
6602 			}
6603 			break;
6604 		case 6: /* D6 vblank/vline */
6605 			switch (src_data) {
6606 			case 0: /* D6 vblank */
6607 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT))
6608 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6609 
6610 				if (rdev->irq.crtc_vblank_int[5]) {
6611 					drm_handle_vblank(rdev->ddev, 5);
6612 					rdev->pm.vblank_sync = true;
6613 					wake_up(&rdev->irq.vblank_queue);
6614 				}
6615 				if (atomic_read(&rdev->irq.pflip[5]))
6616 					radeon_crtc_handle_vblank(rdev, 5);
6617 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6618 				DRM_DEBUG("IH: D6 vblank\n");
6619 
6620 				break;
6621 			case 1: /* D6 vline */
6622 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT))
6623 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6624 
6625 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6626 				DRM_DEBUG("IH: D6 vline\n");
6627 
6628 				break;
6629 			default:
6630 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6631 				break;
6632 			}
6633 			break;
6634 		case 8: /* D1 page flip */
6635 		case 10: /* D2 page flip */
6636 		case 12: /* D3 page flip */
6637 		case 14: /* D4 page flip */
6638 		case 16: /* D5 page flip */
6639 		case 18: /* D6 page flip */
6640 			DRM_DEBUG("IH: D%d flip\n", ((src_id - 8) >> 1) + 1);
6641 			if (radeon_use_pflipirq > 0)
6642 				radeon_crtc_handle_flip(rdev, (src_id - 8) >> 1);
6643 			break;
6644 		case 42: /* HPD hotplug */
6645 			switch (src_data) {
6646 			case 0:
6647 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT))
6648 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6649 
6650 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6651 				queue_hotplug = true;
6652 				DRM_DEBUG("IH: HPD1\n");
6653 
6654 				break;
6655 			case 1:
6656 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT))
6657 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6658 
6659 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6660 				queue_hotplug = true;
6661 				DRM_DEBUG("IH: HPD2\n");
6662 
6663 				break;
6664 			case 2:
6665 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT))
6666 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6667 
6668 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6669 				queue_hotplug = true;
6670 				DRM_DEBUG("IH: HPD3\n");
6671 
6672 				break;
6673 			case 3:
6674 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT))
6675 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6676 
6677 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6678 				queue_hotplug = true;
6679 				DRM_DEBUG("IH: HPD4\n");
6680 
6681 				break;
6682 			case 4:
6683 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT))
6684 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6685 
6686 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6687 				queue_hotplug = true;
6688 				DRM_DEBUG("IH: HPD5\n");
6689 
6690 				break;
6691 			case 5:
6692 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT))
6693 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6694 
6695 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6696 				queue_hotplug = true;
6697 				DRM_DEBUG("IH: HPD6\n");
6698 
6699 				break;
6700 			case 6:
6701 				if (!(rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_RX_INTERRUPT))
6702 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6703 
6704 				rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_RX_INTERRUPT;
6705 				queue_dp = true;
6706 				DRM_DEBUG("IH: HPD_RX 1\n");
6707 
6708 				break;
6709 			case 7:
6710 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_RX_INTERRUPT))
6711 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6712 
6713 				rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_RX_INTERRUPT;
6714 				queue_dp = true;
6715 				DRM_DEBUG("IH: HPD_RX 2\n");
6716 
6717 				break;
6718 			case 8:
6719 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_RX_INTERRUPT))
6720 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6721 
6722 				rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_RX_INTERRUPT;
6723 				queue_dp = true;
6724 				DRM_DEBUG("IH: HPD_RX 3\n");
6725 
6726 				break;
6727 			case 9:
6728 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_RX_INTERRUPT))
6729 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6730 
6731 				rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_RX_INTERRUPT;
6732 				queue_dp = true;
6733 				DRM_DEBUG("IH: HPD_RX 4\n");
6734 
6735 				break;
6736 			case 10:
6737 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_RX_INTERRUPT))
6738 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6739 
6740 				rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_RX_INTERRUPT;
6741 				queue_dp = true;
6742 				DRM_DEBUG("IH: HPD_RX 5\n");
6743 
6744 				break;
6745 			case 11:
6746 				if (!(rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_RX_INTERRUPT))
6747 					DRM_DEBUG("IH: IH event w/o asserted irq bit?\n");
6748 
6749 				rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_RX_INTERRUPT;
6750 				queue_dp = true;
6751 				DRM_DEBUG("IH: HPD_RX 6\n");
6752 
6753 				break;
6754 			default:
6755 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6756 				break;
6757 			}
6758 			break;
6759 		case 96:
6760 			DRM_ERROR("SRBM_READ_ERROR: 0x%x\n", RREG32(SRBM_READ_ERROR));
6761 			WREG32(SRBM_INT_ACK, 0x1);
6762 			break;
6763 		case 124: /* UVD */
6764 			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
6765 			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
6766 			break;
6767 		case 146:
6768 		case 147:
6769 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6770 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6771 			/* reset addr and status */
6772 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6773 			if (addr == 0x0 && status == 0x0)
6774 				break;
6775 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6776 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6777 				addr);
6778 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6779 				status);
6780 			si_vm_decode_fault(rdev, status, addr);
6781 			break;
6782 		case 176: /* RINGID0 CP_INT */
6783 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6784 			break;
6785 		case 177: /* RINGID1 CP_INT */
6786 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6787 			break;
6788 		case 178: /* RINGID2 CP_INT */
6789 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6790 			break;
6791 		case 181: /* CP EOP event */
6792 			DRM_DEBUG("IH: CP EOP\n");
6793 			switch (ring_id) {
6794 			case 0:
6795 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6796 				break;
6797 			case 1:
6798 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6799 				break;
6800 			case 2:
6801 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6802 				break;
6803 			}
6804 			break;
6805 		case 224: /* DMA trap event */
6806 			DRM_DEBUG("IH: DMA trap\n");
6807 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6808 			break;
6809 		case 230: /* thermal low to high */
6810 			DRM_DEBUG("IH: thermal low to high\n");
6811 			rdev->pm.dpm.thermal.high_to_low = false;
6812 			queue_thermal = true;
6813 			break;
6814 		case 231: /* thermal high to low */
6815 			DRM_DEBUG("IH: thermal high to low\n");
6816 			rdev->pm.dpm.thermal.high_to_low = true;
6817 			queue_thermal = true;
6818 			break;
6819 		case 233: /* GUI IDLE */
6820 			DRM_DEBUG("IH: GUI idle\n");
6821 			break;
6822 		case 244: /* DMA trap event */
6823 			DRM_DEBUG("IH: DMA1 trap\n");
6824 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6825 			break;
6826 		default:
6827 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6828 			break;
6829 		}
6830 
6831 		/* wptr/rptr are in bytes! */
6832 		rptr += 16;
6833 		rptr &= rdev->ih.ptr_mask;
6834 		WREG32(IH_RB_RPTR, rptr);
6835 	}
6836 	if (queue_dp)
6837 		schedule_work(&rdev->dp_work);
6838 	if (queue_hotplug)
6839 		schedule_delayed_work(&rdev->hotplug_work, 0);
6840 	if (queue_thermal && rdev->pm.dpm_enabled)
6841 		schedule_work(&rdev->pm.dpm.thermal.work);
6842 	rdev->ih.rptr = rptr;
6843 	atomic_set(&rdev->ih.lock, 0);
6844 
6845 	/* make sure wptr hasn't changed while processing */
6846 	wptr = si_get_ih_wptr(rdev);
6847 	if (wptr != rptr)
6848 		goto restart_ih;
6849 
6850 	return IRQ_HANDLED;
6851 }
6852 
6853 /*
6854  * startup/shutdown callbacks
6855  */
6856 static void si_uvd_init(struct radeon_device *rdev)
6857 {
6858 	int r;
6859 
6860 	if (!rdev->has_uvd)
6861 		return;
6862 
6863 	r = radeon_uvd_init(rdev);
6864 	if (r) {
6865 		dev_err(rdev->dev, "failed UVD (%d) init.\n", r);
6866 		/*
6867 		 * At this point rdev->uvd.vcpu_bo is NULL which trickles down
6868 		 * to early fails uvd_v2_2_resume() and thus nothing happens
6869 		 * there. So it is pointless to try to go through that code
6870 		 * hence why we disable uvd here.
6871 		 */
6872 		rdev->has_uvd = 0;
6873 		return;
6874 	}
6875 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
6876 	r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
6877 }
6878 
6879 static void si_uvd_start(struct radeon_device *rdev)
6880 {
6881 	int r;
6882 
6883 	if (!rdev->has_uvd)
6884 		return;
6885 
6886 	r = uvd_v2_2_resume(rdev);
6887 	if (r) {
6888 		dev_err(rdev->dev, "failed UVD resume (%d).\n", r);
6889 		goto error;
6890 	}
6891 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
6892 	if (r) {
6893 		dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
6894 		goto error;
6895 	}
6896 	return;
6897 
6898 error:
6899 	rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6900 }
6901 
6902 static void si_uvd_resume(struct radeon_device *rdev)
6903 {
6904 	struct radeon_ring *ring;
6905 	int r;
6906 
6907 	if (!rdev->has_uvd || !rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size)
6908 		return;
6909 
6910 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6911 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, PACKET0(UVD_NO_OP, 0));
6912 	if (r) {
6913 		dev_err(rdev->dev, "failed initializing UVD ring (%d).\n", r);
6914 		return;
6915 	}
6916 	r = uvd_v1_0_init(rdev);
6917 	if (r) {
6918 		dev_err(rdev->dev, "failed initializing UVD (%d).\n", r);
6919 		return;
6920 	}
6921 }
6922 
6923 static void si_vce_init(struct radeon_device *rdev)
6924 {
6925 	int r;
6926 
6927 	if (!rdev->has_vce)
6928 		return;
6929 
6930 	r = radeon_vce_init(rdev);
6931 	if (r) {
6932 		dev_err(rdev->dev, "failed VCE (%d) init.\n", r);
6933 		/*
6934 		 * At this point rdev->vce.vcpu_bo is NULL which trickles down
6935 		 * to early fails si_vce_start() and thus nothing happens
6936 		 * there. So it is pointless to try to go through that code
6937 		 * hence why we disable vce here.
6938 		 */
6939 		rdev->has_vce = 0;
6940 		return;
6941 	}
6942 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_obj = NULL;
6943 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE1_INDEX], 4096);
6944 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_obj = NULL;
6945 	r600_ring_init(rdev, &rdev->ring[TN_RING_TYPE_VCE2_INDEX], 4096);
6946 }
6947 
6948 static void si_vce_start(struct radeon_device *rdev)
6949 {
6950 	int r;
6951 
6952 	if (!rdev->has_vce)
6953 		return;
6954 
6955 	r = radeon_vce_resume(rdev);
6956 	if (r) {
6957 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6958 		goto error;
6959 	}
6960 	r = vce_v1_0_resume(rdev);
6961 	if (r) {
6962 		dev_err(rdev->dev, "failed VCE resume (%d).\n", r);
6963 		goto error;
6964 	}
6965 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE1_INDEX);
6966 	if (r) {
6967 		dev_err(rdev->dev, "failed initializing VCE1 fences (%d).\n", r);
6968 		goto error;
6969 	}
6970 	r = radeon_fence_driver_start_ring(rdev, TN_RING_TYPE_VCE2_INDEX);
6971 	if (r) {
6972 		dev_err(rdev->dev, "failed initializing VCE2 fences (%d).\n", r);
6973 		goto error;
6974 	}
6975 	return;
6976 
6977 error:
6978 	rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
6979 	rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
6980 }
6981 
6982 static void si_vce_resume(struct radeon_device *rdev)
6983 {
6984 	struct radeon_ring *ring;
6985 	int r;
6986 
6987 	if (!rdev->has_vce || !rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size)
6988 		return;
6989 
6990 	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
6991 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6992 	if (r) {
6993 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
6994 		return;
6995 	}
6996 	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
6997 	r = radeon_ring_init(rdev, ring, ring->ring_size, 0, VCE_CMD_NO_OP);
6998 	if (r) {
6999 		dev_err(rdev->dev, "failed initializing VCE1 ring (%d).\n", r);
7000 		return;
7001 	}
7002 	r = vce_v1_0_init(rdev);
7003 	if (r) {
7004 		dev_err(rdev->dev, "failed initializing VCE (%d).\n", r);
7005 		return;
7006 	}
7007 }
7008 
7009 static int si_startup(struct radeon_device *rdev)
7010 {
7011 	struct radeon_ring *ring;
7012 	int r;
7013 
7014 	/* enable pcie gen2/3 link */
7015 	si_pcie_gen3_enable(rdev);
7016 	/* enable aspm */
7017 	si_program_aspm(rdev);
7018 
7019 	/* scratch needs to be initialized before MC */
7020 	r = r600_vram_scratch_init(rdev);
7021 	if (r)
7022 		return r;
7023 
7024 	si_mc_program(rdev);
7025 
7026 	if (!rdev->pm.dpm_enabled) {
7027 		r = si_mc_load_microcode(rdev);
7028 		if (r) {
7029 			DRM_ERROR("Failed to load MC firmware!\n");
7030 			return r;
7031 		}
7032 	}
7033 
7034 	r = si_pcie_gart_enable(rdev);
7035 	if (r)
7036 		return r;
7037 	si_gpu_init(rdev);
7038 
7039 	/* allocate rlc buffers */
7040 	if (rdev->family == CHIP_VERDE) {
7041 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
7042 		rdev->rlc.reg_list_size =
7043 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
7044 	}
7045 	rdev->rlc.cs_data = si_cs_data;
7046 	r = sumo_rlc_init(rdev);
7047 	if (r) {
7048 		DRM_ERROR("Failed to init rlc BOs!\n");
7049 		return r;
7050 	}
7051 
7052 	/* allocate wb buffer */
7053 	r = radeon_wb_init(rdev);
7054 	if (r)
7055 		return r;
7056 
7057 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
7058 	if (r) {
7059 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7060 		return r;
7061 	}
7062 
7063 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
7064 	if (r) {
7065 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7066 		return r;
7067 	}
7068 
7069 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
7070 	if (r) {
7071 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
7072 		return r;
7073 	}
7074 
7075 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
7076 	if (r) {
7077 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7078 		return r;
7079 	}
7080 
7081 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
7082 	if (r) {
7083 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
7084 		return r;
7085 	}
7086 
7087 	si_uvd_start(rdev);
7088 	si_vce_start(rdev);
7089 
7090 	/* Enable IRQ */
7091 	if (!rdev->irq.installed) {
7092 		r = radeon_irq_kms_init(rdev);
7093 		if (r)
7094 			return r;
7095 	}
7096 
7097 	r = si_irq_init(rdev);
7098 	if (r) {
7099 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
7100 		radeon_irq_kms_fini(rdev);
7101 		return r;
7102 	}
7103 	si_irq_set(rdev);
7104 
7105 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7106 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
7107 			     RADEON_CP_PACKET2);
7108 	if (r)
7109 		return r;
7110 
7111 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7112 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
7113 			     RADEON_CP_PACKET2);
7114 	if (r)
7115 		return r;
7116 
7117 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7118 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
7119 			     RADEON_CP_PACKET2);
7120 	if (r)
7121 		return r;
7122 
7123 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7124 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
7125 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7126 	if (r)
7127 		return r;
7128 
7129 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7130 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
7131 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
7132 	if (r)
7133 		return r;
7134 
7135 	r = si_cp_load_microcode(rdev);
7136 	if (r)
7137 		return r;
7138 	r = si_cp_resume(rdev);
7139 	if (r)
7140 		return r;
7141 
7142 	r = cayman_dma_resume(rdev);
7143 	if (r)
7144 		return r;
7145 
7146 	si_uvd_resume(rdev);
7147 	si_vce_resume(rdev);
7148 
7149 	r = radeon_ib_pool_init(rdev);
7150 	if (r) {
7151 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
7152 		return r;
7153 	}
7154 
7155 	r = radeon_vm_manager_init(rdev);
7156 	if (r) {
7157 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
7158 		return r;
7159 	}
7160 
7161 	r = radeon_audio_init(rdev);
7162 	if (r)
7163 		return r;
7164 
7165 	return 0;
7166 }
7167 
7168 int si_resume(struct radeon_device *rdev)
7169 {
7170 	int r;
7171 
7172 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
7173 	 * posting will perform necessary task to bring back GPU into good
7174 	 * shape.
7175 	 */
7176 	/* post card */
7177 	atom_asic_init(rdev->mode_info.atom_context);
7178 
7179 	/* init golden registers */
7180 	si_init_golden_registers(rdev);
7181 
7182 	if (rdev->pm.pm_method == PM_METHOD_DPM)
7183 		radeon_pm_resume(rdev);
7184 
7185 	rdev->accel_working = true;
7186 	r = si_startup(rdev);
7187 	if (r) {
7188 		DRM_ERROR("si startup failed on resume\n");
7189 		rdev->accel_working = false;
7190 		return r;
7191 	}
7192 
7193 	return r;
7194 
7195 }
7196 
7197 int si_suspend(struct radeon_device *rdev)
7198 {
7199 	radeon_pm_suspend(rdev);
7200 	radeon_audio_fini(rdev);
7201 	radeon_vm_manager_fini(rdev);
7202 	si_cp_enable(rdev, false);
7203 	cayman_dma_stop(rdev);
7204 	if (rdev->has_uvd) {
7205 		uvd_v1_0_fini(rdev);
7206 		radeon_uvd_suspend(rdev);
7207 	}
7208 	if (rdev->has_vce)
7209 		radeon_vce_suspend(rdev);
7210 	si_fini_pg(rdev);
7211 	si_fini_cg(rdev);
7212 	si_irq_suspend(rdev);
7213 	radeon_wb_disable(rdev);
7214 	si_pcie_gart_disable(rdev);
7215 	return 0;
7216 }
7217 
7218 /* Plan is to move initialization in that function and use
7219  * helper function so that radeon_device_init pretty much
7220  * do nothing more than calling asic specific function. This
7221  * should also allow to remove a bunch of callback function
7222  * like vram_info.
7223  */
7224 int si_init(struct radeon_device *rdev)
7225 {
7226 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7227 	int r;
7228 
7229 	/* Read BIOS */
7230 	if (!radeon_get_bios(rdev)) {
7231 		if (ASIC_IS_AVIVO(rdev))
7232 			return -EINVAL;
7233 	}
7234 	/* Must be an ATOMBIOS */
7235 	if (!rdev->is_atom_bios) {
7236 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
7237 		return -EINVAL;
7238 	}
7239 	r = radeon_atombios_init(rdev);
7240 	if (r)
7241 		return r;
7242 
7243 	/* Post card if necessary */
7244 	if (!radeon_card_posted(rdev)) {
7245 		if (!rdev->bios) {
7246 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
7247 			return -EINVAL;
7248 		}
7249 		DRM_INFO("GPU not posted. posting now...\n");
7250 		atom_asic_init(rdev->mode_info.atom_context);
7251 	}
7252 	/* init golden registers */
7253 	si_init_golden_registers(rdev);
7254 	/* Initialize scratch registers */
7255 	si_scratch_init(rdev);
7256 	/* Initialize surface registers */
7257 	radeon_surface_init(rdev);
7258 	/* Initialize clocks */
7259 	radeon_get_clock_info(rdev->ddev);
7260 
7261 	/* Fence driver */
7262 	r = radeon_fence_driver_init(rdev);
7263 	if (r)
7264 		return r;
7265 
7266 	/* initialize memory controller */
7267 	r = si_mc_init(rdev);
7268 	if (r)
7269 		return r;
7270 	/* Memory manager */
7271 	r = radeon_bo_init(rdev);
7272 	if (r)
7273 		return r;
7274 
7275 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
7276 	    !rdev->rlc_fw || !rdev->mc_fw) {
7277 		r = si_init_microcode(rdev);
7278 		if (r) {
7279 			DRM_ERROR("Failed to load firmware!\n");
7280 			return r;
7281 		}
7282 	}
7283 
7284 	/* Initialize power management */
7285 	radeon_pm_init(rdev);
7286 
7287 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
7288 	ring->ring_obj = NULL;
7289 	r600_ring_init(rdev, ring, 1024 * 1024);
7290 
7291 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
7292 	ring->ring_obj = NULL;
7293 	r600_ring_init(rdev, ring, 1024 * 1024);
7294 
7295 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
7296 	ring->ring_obj = NULL;
7297 	r600_ring_init(rdev, ring, 1024 * 1024);
7298 
7299 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
7300 	ring->ring_obj = NULL;
7301 	r600_ring_init(rdev, ring, 64 * 1024);
7302 
7303 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
7304 	ring->ring_obj = NULL;
7305 	r600_ring_init(rdev, ring, 64 * 1024);
7306 
7307 	si_uvd_init(rdev);
7308 	si_vce_init(rdev);
7309 
7310 	rdev->ih.ring_obj = NULL;
7311 	r600_ih_ring_init(rdev, 64 * 1024);
7312 
7313 	r = r600_pcie_gart_init(rdev);
7314 	if (r)
7315 		return r;
7316 
7317 	rdev->accel_working = true;
7318 	r = si_startup(rdev);
7319 	if (r) {
7320 		dev_err(rdev->dev, "disabling GPU acceleration\n");
7321 		si_cp_fini(rdev);
7322 		cayman_dma_fini(rdev);
7323 		si_irq_fini(rdev);
7324 		sumo_rlc_fini(rdev);
7325 		radeon_wb_fini(rdev);
7326 		radeon_ib_pool_fini(rdev);
7327 		radeon_vm_manager_fini(rdev);
7328 		radeon_irq_kms_fini(rdev);
7329 		si_pcie_gart_fini(rdev);
7330 		rdev->accel_working = false;
7331 	}
7332 
7333 	/* Don't start up if the MC ucode is missing.
7334 	 * The default clocks and voltages before the MC ucode
7335 	 * is loaded are not suffient for advanced operations.
7336 	 */
7337 	if (!rdev->mc_fw) {
7338 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
7339 		return -EINVAL;
7340 	}
7341 
7342 	return 0;
7343 }
7344 
7345 void si_fini(struct radeon_device *rdev)
7346 {
7347 	radeon_pm_fini(rdev);
7348 	si_cp_fini(rdev);
7349 	cayman_dma_fini(rdev);
7350 	si_fini_pg(rdev);
7351 	si_fini_cg(rdev);
7352 	si_irq_fini(rdev);
7353 	sumo_rlc_fini(rdev);
7354 	radeon_wb_fini(rdev);
7355 	radeon_vm_manager_fini(rdev);
7356 	radeon_ib_pool_fini(rdev);
7357 	radeon_irq_kms_fini(rdev);
7358 	if (rdev->has_uvd) {
7359 		uvd_v1_0_fini(rdev);
7360 		radeon_uvd_fini(rdev);
7361 	}
7362 	if (rdev->has_vce)
7363 		radeon_vce_fini(rdev);
7364 	si_pcie_gart_fini(rdev);
7365 	r600_vram_scratch_fini(rdev);
7366 	radeon_gem_fini(rdev);
7367 	radeon_fence_driver_fini(rdev);
7368 	radeon_bo_fini(rdev);
7369 	radeon_atombios_fini(rdev);
7370 	kfree(rdev->bios);
7371 	rdev->bios = NULL;
7372 }
7373 
7374 /**
7375  * si_get_gpu_clock_counter - return GPU clock counter snapshot
7376  *
7377  * @rdev: radeon_device pointer
7378  *
7379  * Fetches a GPU clock counter snapshot (SI).
7380  * Returns the 64 bit clock counter snapshot.
7381  */
7382 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
7383 {
7384 	uint64_t clock;
7385 
7386 	mutex_lock(&rdev->gpu_clock_mutex);
7387 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
7388 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
7389 		((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
7390 	mutex_unlock(&rdev->gpu_clock_mutex);
7391 	return clock;
7392 }
7393 
7394 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
7395 {
7396 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
7397 	int r;
7398 
7399 	/* bypass vclk and dclk with bclk */
7400 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7401 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
7402 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7403 
7404 	/* put PLL in bypass mode */
7405 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
7406 
7407 	if (!vclk || !dclk) {
7408 		/* keep the Bypass mode */
7409 		return 0;
7410 	}
7411 
7412 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
7413 					  16384, 0x03FFFFFF, 0, 128, 5,
7414 					  &fb_div, &vclk_div, &dclk_div);
7415 	if (r)
7416 		return r;
7417 
7418 	/* set RESET_ANTI_MUX to 0 */
7419 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7420 
7421 	/* set VCO_MODE to 1 */
7422 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
7423 
7424 	/* disable sleep mode */
7425 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
7426 
7427 	/* deassert UPLL_RESET */
7428 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7429 
7430 	mdelay(1);
7431 
7432 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7433 	if (r)
7434 		return r;
7435 
7436 	/* assert UPLL_RESET again */
7437 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
7438 
7439 	/* disable spread spectrum. */
7440 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7441 
7442 	/* set feedback divider */
7443 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
7444 
7445 	/* set ref divider to 0 */
7446 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
7447 
7448 	if (fb_div < 307200)
7449 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
7450 	else
7451 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
7452 
7453 	/* set PDIV_A and PDIV_B */
7454 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7455 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
7456 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
7457 
7458 	/* give the PLL some time to settle */
7459 	mdelay(15);
7460 
7461 	/* deassert PLL_RESET */
7462 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
7463 
7464 	mdelay(15);
7465 
7466 	/* switch from bypass mode to normal mode */
7467 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
7468 
7469 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
7470 	if (r)
7471 		return r;
7472 
7473 	/* switch VCLK and DCLK selection */
7474 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
7475 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
7476 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
7477 
7478 	mdelay(100);
7479 
7480 	return 0;
7481 }
7482 
7483 static void si_pcie_gen3_enable(struct radeon_device *rdev)
7484 {
7485 	struct pci_dev *root = rdev->pdev->bus->self;
7486 	int bridge_pos, gpu_pos;
7487 	u32 speed_cntl, mask, current_data_rate;
7488 	int ret, i;
7489 	u16 tmp16;
7490 
7491 	if (pci_is_root_bus(rdev->pdev->bus))
7492 		return;
7493 
7494 	if (radeon_pcie_gen2 == 0)
7495 		return;
7496 
7497 	if (rdev->flags & RADEON_IS_IGP)
7498 		return;
7499 
7500 	if (!(rdev->flags & RADEON_IS_PCIE))
7501 		return;
7502 
7503 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
7504 	if (ret != 0)
7505 		return;
7506 
7507 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
7508 		return;
7509 
7510 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7511 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
7512 		LC_CURRENT_DATA_RATE_SHIFT;
7513 	if (mask & DRM_PCIE_SPEED_80) {
7514 		if (current_data_rate == 2) {
7515 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
7516 			return;
7517 		}
7518 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
7519 	} else if (mask & DRM_PCIE_SPEED_50) {
7520 		if (current_data_rate == 1) {
7521 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
7522 			return;
7523 		}
7524 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
7525 	}
7526 
7527 	bridge_pos = pci_pcie_cap(root);
7528 	if (!bridge_pos)
7529 		return;
7530 
7531 	gpu_pos = pci_pcie_cap(rdev->pdev);
7532 	if (!gpu_pos)
7533 		return;
7534 
7535 	if (mask & DRM_PCIE_SPEED_80) {
7536 		/* re-try equalization if gen3 is not already enabled */
7537 		if (current_data_rate != 2) {
7538 			u16 bridge_cfg, gpu_cfg;
7539 			u16 bridge_cfg2, gpu_cfg2;
7540 			u32 max_lw, current_lw, tmp;
7541 
7542 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7543 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7544 
7545 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
7546 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7547 
7548 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
7549 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7550 
7551 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
7552 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
7553 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
7554 
7555 			if (current_lw < max_lw) {
7556 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7557 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
7558 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
7559 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
7560 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
7561 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
7562 				}
7563 			}
7564 
7565 			for (i = 0; i < 10; i++) {
7566 				/* check status */
7567 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
7568 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
7569 					break;
7570 
7571 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
7572 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
7573 
7574 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
7575 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
7576 
7577 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7578 				tmp |= LC_SET_QUIESCE;
7579 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7580 
7581 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7582 				tmp |= LC_REDO_EQ;
7583 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7584 
7585 				mdelay(100);
7586 
7587 				/* linkctl */
7588 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
7589 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7590 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
7591 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
7592 
7593 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
7594 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
7595 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
7596 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
7597 
7598 				/* linkctl2 */
7599 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
7600 				tmp16 &= ~((1 << 4) | (7 << 9));
7601 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
7602 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
7603 
7604 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7605 				tmp16 &= ~((1 << 4) | (7 << 9));
7606 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
7607 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7608 
7609 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
7610 				tmp &= ~LC_SET_QUIESCE;
7611 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
7612 			}
7613 		}
7614 	}
7615 
7616 	/* set the link speed */
7617 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
7618 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
7619 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7620 
7621 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
7622 	tmp16 &= ~0xf;
7623 	if (mask & DRM_PCIE_SPEED_80)
7624 		tmp16 |= 3; /* gen3 */
7625 	else if (mask & DRM_PCIE_SPEED_50)
7626 		tmp16 |= 2; /* gen2 */
7627 	else
7628 		tmp16 |= 1; /* gen1 */
7629 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
7630 
7631 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7632 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
7633 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
7634 
7635 	for (i = 0; i < rdev->usec_timeout; i++) {
7636 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
7637 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
7638 			break;
7639 		udelay(1);
7640 	}
7641 }
7642 
7643 static void si_program_aspm(struct radeon_device *rdev)
7644 {
7645 	u32 data, orig;
7646 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
7647 	bool disable_clkreq = false;
7648 
7649 	if (radeon_aspm == 0)
7650 		return;
7651 
7652 	if (!(rdev->flags & RADEON_IS_PCIE))
7653 		return;
7654 
7655 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7656 	data &= ~LC_XMIT_N_FTS_MASK;
7657 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7658 	if (orig != data)
7659 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7660 
7661 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7662 	data |= LC_GO_TO_RECOVERY;
7663 	if (orig != data)
7664 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7665 
7666 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7667 	data |= P_IGNORE_EDB_ERR;
7668 	if (orig != data)
7669 		WREG32_PCIE(PCIE_P_CNTL, data);
7670 
7671 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7672 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7673 	data |= LC_PMI_TO_L1_DIS;
7674 	if (!disable_l0s)
7675 		data |= LC_L0S_INACTIVITY(7);
7676 
7677 	if (!disable_l1) {
7678 		data |= LC_L1_INACTIVITY(7);
7679 		data &= ~LC_PMI_TO_L1_DIS;
7680 		if (orig != data)
7681 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7682 
7683 		if (!disable_plloff_in_l1) {
7684 			bool clk_req_support;
7685 
7686 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7687 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7688 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7689 			if (orig != data)
7690 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7691 
7692 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7693 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7694 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7695 			if (orig != data)
7696 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7697 
7698 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7699 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7700 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7701 			if (orig != data)
7702 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7703 
7704 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7705 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7706 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7707 			if (orig != data)
7708 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7709 
7710 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7711 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7712 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7713 				if (orig != data)
7714 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7715 
7716 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7717 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7718 				if (orig != data)
7719 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7720 
7721 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7722 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7723 				if (orig != data)
7724 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7725 
7726 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7727 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7728 				if (orig != data)
7729 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7730 
7731 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7732 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7733 				if (orig != data)
7734 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7735 
7736 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7737 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7738 				if (orig != data)
7739 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7740 
7741 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7742 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7743 				if (orig != data)
7744 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7745 
7746 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7747 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7748 				if (orig != data)
7749 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7750 			}
7751 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7752 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7753 			data |= LC_DYN_LANES_PWR_STATE(3);
7754 			if (orig != data)
7755 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7756 
7757 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7758 			data &= ~LS2_EXIT_TIME_MASK;
7759 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7760 				data |= LS2_EXIT_TIME(5);
7761 			if (orig != data)
7762 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7763 
7764 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7765 			data &= ~LS2_EXIT_TIME_MASK;
7766 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7767 				data |= LS2_EXIT_TIME(5);
7768 			if (orig != data)
7769 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7770 
7771 			if (!disable_clkreq &&
7772 			    !pci_is_root_bus(rdev->pdev->bus)) {
7773 				struct pci_dev *root = rdev->pdev->bus->self;
7774 				u32 lnkcap;
7775 
7776 				clk_req_support = false;
7777 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7778 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7779 					clk_req_support = true;
7780 			} else {
7781 				clk_req_support = false;
7782 			}
7783 
7784 			if (clk_req_support) {
7785 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7786 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7787 				if (orig != data)
7788 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7789 
7790 				orig = data = RREG32(THM_CLK_CNTL);
7791 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7792 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7793 				if (orig != data)
7794 					WREG32(THM_CLK_CNTL, data);
7795 
7796 				orig = data = RREG32(MISC_CLK_CNTL);
7797 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7798 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7799 				if (orig != data)
7800 					WREG32(MISC_CLK_CNTL, data);
7801 
7802 				orig = data = RREG32(CG_CLKPIN_CNTL);
7803 				data &= ~BCLK_AS_XCLK;
7804 				if (orig != data)
7805 					WREG32(CG_CLKPIN_CNTL, data);
7806 
7807 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7808 				data &= ~FORCE_BIF_REFCLK_EN;
7809 				if (orig != data)
7810 					WREG32(CG_CLKPIN_CNTL_2, data);
7811 
7812 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7813 				data &= ~MPLL_CLKOUT_SEL_MASK;
7814 				data |= MPLL_CLKOUT_SEL(4);
7815 				if (orig != data)
7816 					WREG32(MPLL_BYPASSCLK_SEL, data);
7817 
7818 				orig = data = RREG32(SPLL_CNTL_MODE);
7819 				data &= ~SPLL_REFCLK_SEL_MASK;
7820 				if (orig != data)
7821 					WREG32(SPLL_CNTL_MODE, data);
7822 			}
7823 		}
7824 	} else {
7825 		if (orig != data)
7826 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7827 	}
7828 
7829 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7830 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7831 	if (orig != data)
7832 		WREG32_PCIE(PCIE_CNTL2, data);
7833 
7834 	if (!disable_l0s) {
7835 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7836 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7837 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7838 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7839 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7840 				data &= ~LC_L0S_INACTIVITY_MASK;
7841 				if (orig != data)
7842 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7843 			}
7844 		}
7845 	}
7846 }
7847 
7848 static int si_vce_send_vcepll_ctlreq(struct radeon_device *rdev)
7849 {
7850 	unsigned i;
7851 
7852 	/* make sure VCEPLL_CTLREQ is deasserted */
7853 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7854 
7855 	mdelay(10);
7856 
7857 	/* assert UPLL_CTLREQ */
7858 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, UPLL_CTLREQ_MASK, ~UPLL_CTLREQ_MASK);
7859 
7860 	/* wait for CTLACK and CTLACK2 to get asserted */
7861 	for (i = 0; i < 100; ++i) {
7862 		uint32_t mask = UPLL_CTLACK_MASK | UPLL_CTLACK2_MASK;
7863 		if ((RREG32_SMC(CG_VCEPLL_FUNC_CNTL) & mask) == mask)
7864 			break;
7865 		mdelay(10);
7866 	}
7867 
7868 	/* deassert UPLL_CTLREQ */
7869 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~UPLL_CTLREQ_MASK);
7870 
7871 	if (i == 100) {
7872 		DRM_ERROR("Timeout setting UVD clocks!\n");
7873 		return -ETIMEDOUT;
7874 	}
7875 
7876 	return 0;
7877 }
7878 
7879 int si_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
7880 {
7881 	unsigned fb_div = 0, evclk_div = 0, ecclk_div = 0;
7882 	int r;
7883 
7884 	/* bypass evclk and ecclk with bclk */
7885 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7886 		     EVCLK_SRC_SEL(1) | ECCLK_SRC_SEL(1),
7887 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7888 
7889 	/* put PLL in bypass mode */
7890 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_BYPASS_EN_MASK,
7891 		     ~VCEPLL_BYPASS_EN_MASK);
7892 
7893 	if (!evclk || !ecclk) {
7894 		/* keep the Bypass mode, put PLL to sleep */
7895 		WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7896 			     ~VCEPLL_SLEEP_MASK);
7897 		return 0;
7898 	}
7899 
7900 	r = radeon_uvd_calc_upll_dividers(rdev, evclk, ecclk, 125000, 250000,
7901 					  16384, 0x03FFFFFF, 0, 128, 5,
7902 					  &fb_div, &evclk_div, &ecclk_div);
7903 	if (r)
7904 		return r;
7905 
7906 	/* set RESET_ANTI_MUX to 0 */
7907 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
7908 
7909 	/* set VCO_MODE to 1 */
7910 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_VCO_MODE_MASK,
7911 		     ~VCEPLL_VCO_MODE_MASK);
7912 
7913 	/* toggle VCEPLL_SLEEP to 1 then back to 0 */
7914 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_SLEEP_MASK,
7915 		     ~VCEPLL_SLEEP_MASK);
7916 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_SLEEP_MASK);
7917 
7918 	/* deassert VCEPLL_RESET */
7919 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7920 
7921 	mdelay(1);
7922 
7923 	r = si_vce_send_vcepll_ctlreq(rdev);
7924 	if (r)
7925 		return r;
7926 
7927 	/* assert VCEPLL_RESET again */
7928 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, VCEPLL_RESET_MASK, ~VCEPLL_RESET_MASK);
7929 
7930 	/* disable spread spectrum. */
7931 	WREG32_SMC_P(CG_VCEPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
7932 
7933 	/* set feedback divider */
7934 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_3, VCEPLL_FB_DIV(fb_div), ~VCEPLL_FB_DIV_MASK);
7935 
7936 	/* set ref divider to 0 */
7937 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_REF_DIV_MASK);
7938 
7939 	/* set PDIV_A and PDIV_B */
7940 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7941 		     VCEPLL_PDIV_A(evclk_div) | VCEPLL_PDIV_B(ecclk_div),
7942 		     ~(VCEPLL_PDIV_A_MASK | VCEPLL_PDIV_B_MASK));
7943 
7944 	/* give the PLL some time to settle */
7945 	mdelay(15);
7946 
7947 	/* deassert PLL_RESET */
7948 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_RESET_MASK);
7949 
7950 	mdelay(15);
7951 
7952 	/* switch from bypass mode to normal mode */
7953 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL, 0, ~VCEPLL_BYPASS_EN_MASK);
7954 
7955 	r = si_vce_send_vcepll_ctlreq(rdev);
7956 	if (r)
7957 		return r;
7958 
7959 	/* switch VCLK and DCLK selection */
7960 	WREG32_SMC_P(CG_VCEPLL_FUNC_CNTL_2,
7961 		     EVCLK_SRC_SEL(16) | ECCLK_SRC_SEL(16),
7962 		     ~(EVCLK_SRC_SEL_MASK | ECCLK_SRC_SEL_MASK));
7963 
7964 	mdelay(100);
7965 
7966 	return 0;
7967 }
7968