xref: /linux/drivers/gpu/drm/radeon/si.c (revision 4d7696f1b05f4aeb586c74868fe3da2731daca4b)
1 /*
2  * Copyright 2011 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/slab.h>
26 #include <linux/module.h>
27 #include <drm/drmP.h>
28 #include "radeon.h"
29 #include "radeon_asic.h"
30 #include <drm/radeon_drm.h>
31 #include "sid.h"
32 #include "atom.h"
33 #include "si_blit_shaders.h"
34 #include "clearstate_si.h"
35 #include "radeon_ucode.h"
36 
37 
38 MODULE_FIRMWARE("radeon/TAHITI_pfp.bin");
39 MODULE_FIRMWARE("radeon/TAHITI_me.bin");
40 MODULE_FIRMWARE("radeon/TAHITI_ce.bin");
41 MODULE_FIRMWARE("radeon/TAHITI_mc.bin");
42 MODULE_FIRMWARE("radeon/TAHITI_rlc.bin");
43 MODULE_FIRMWARE("radeon/TAHITI_smc.bin");
44 MODULE_FIRMWARE("radeon/PITCAIRN_pfp.bin");
45 MODULE_FIRMWARE("radeon/PITCAIRN_me.bin");
46 MODULE_FIRMWARE("radeon/PITCAIRN_ce.bin");
47 MODULE_FIRMWARE("radeon/PITCAIRN_mc.bin");
48 MODULE_FIRMWARE("radeon/PITCAIRN_rlc.bin");
49 MODULE_FIRMWARE("radeon/PITCAIRN_smc.bin");
50 MODULE_FIRMWARE("radeon/VERDE_pfp.bin");
51 MODULE_FIRMWARE("radeon/VERDE_me.bin");
52 MODULE_FIRMWARE("radeon/VERDE_ce.bin");
53 MODULE_FIRMWARE("radeon/VERDE_mc.bin");
54 MODULE_FIRMWARE("radeon/VERDE_rlc.bin");
55 MODULE_FIRMWARE("radeon/VERDE_smc.bin");
56 MODULE_FIRMWARE("radeon/OLAND_pfp.bin");
57 MODULE_FIRMWARE("radeon/OLAND_me.bin");
58 MODULE_FIRMWARE("radeon/OLAND_ce.bin");
59 MODULE_FIRMWARE("radeon/OLAND_mc.bin");
60 MODULE_FIRMWARE("radeon/OLAND_rlc.bin");
61 MODULE_FIRMWARE("radeon/OLAND_smc.bin");
62 MODULE_FIRMWARE("radeon/HAINAN_pfp.bin");
63 MODULE_FIRMWARE("radeon/HAINAN_me.bin");
64 MODULE_FIRMWARE("radeon/HAINAN_ce.bin");
65 MODULE_FIRMWARE("radeon/HAINAN_mc.bin");
66 MODULE_FIRMWARE("radeon/HAINAN_rlc.bin");
67 MODULE_FIRMWARE("radeon/HAINAN_smc.bin");
68 
69 static void si_pcie_gen3_enable(struct radeon_device *rdev);
70 static void si_program_aspm(struct radeon_device *rdev);
71 extern void sumo_rlc_fini(struct radeon_device *rdev);
72 extern int sumo_rlc_init(struct radeon_device *rdev);
73 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
74 extern void r600_ih_ring_fini(struct radeon_device *rdev);
75 extern void evergreen_fix_pci_max_read_req_size(struct radeon_device *rdev);
76 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
77 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
78 extern u32 evergreen_get_number_of_dram_channels(struct radeon_device *rdev);
79 extern void evergreen_print_gpu_status_regs(struct radeon_device *rdev);
80 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
81 extern void si_dma_vm_set_page(struct radeon_device *rdev,
82 			       struct radeon_ib *ib,
83 			       uint64_t pe,
84 			       uint64_t addr, unsigned count,
85 			       uint32_t incr, uint32_t flags);
86 
87 static const u32 verde_rlc_save_restore_register_list[] =
88 {
89 	(0x8000 << 16) | (0x98f4 >> 2),
90 	0x00000000,
91 	(0x8040 << 16) | (0x98f4 >> 2),
92 	0x00000000,
93 	(0x8000 << 16) | (0xe80 >> 2),
94 	0x00000000,
95 	(0x8040 << 16) | (0xe80 >> 2),
96 	0x00000000,
97 	(0x8000 << 16) | (0x89bc >> 2),
98 	0x00000000,
99 	(0x8040 << 16) | (0x89bc >> 2),
100 	0x00000000,
101 	(0x8000 << 16) | (0x8c1c >> 2),
102 	0x00000000,
103 	(0x8040 << 16) | (0x8c1c >> 2),
104 	0x00000000,
105 	(0x9c00 << 16) | (0x98f0 >> 2),
106 	0x00000000,
107 	(0x9c00 << 16) | (0xe7c >> 2),
108 	0x00000000,
109 	(0x8000 << 16) | (0x9148 >> 2),
110 	0x00000000,
111 	(0x8040 << 16) | (0x9148 >> 2),
112 	0x00000000,
113 	(0x9c00 << 16) | (0x9150 >> 2),
114 	0x00000000,
115 	(0x9c00 << 16) | (0x897c >> 2),
116 	0x00000000,
117 	(0x9c00 << 16) | (0x8d8c >> 2),
118 	0x00000000,
119 	(0x9c00 << 16) | (0xac54 >> 2),
120 	0X00000000,
121 	0x3,
122 	(0x9c00 << 16) | (0x98f8 >> 2),
123 	0x00000000,
124 	(0x9c00 << 16) | (0x9910 >> 2),
125 	0x00000000,
126 	(0x9c00 << 16) | (0x9914 >> 2),
127 	0x00000000,
128 	(0x9c00 << 16) | (0x9918 >> 2),
129 	0x00000000,
130 	(0x9c00 << 16) | (0x991c >> 2),
131 	0x00000000,
132 	(0x9c00 << 16) | (0x9920 >> 2),
133 	0x00000000,
134 	(0x9c00 << 16) | (0x9924 >> 2),
135 	0x00000000,
136 	(0x9c00 << 16) | (0x9928 >> 2),
137 	0x00000000,
138 	(0x9c00 << 16) | (0x992c >> 2),
139 	0x00000000,
140 	(0x9c00 << 16) | (0x9930 >> 2),
141 	0x00000000,
142 	(0x9c00 << 16) | (0x9934 >> 2),
143 	0x00000000,
144 	(0x9c00 << 16) | (0x9938 >> 2),
145 	0x00000000,
146 	(0x9c00 << 16) | (0x993c >> 2),
147 	0x00000000,
148 	(0x9c00 << 16) | (0x9940 >> 2),
149 	0x00000000,
150 	(0x9c00 << 16) | (0x9944 >> 2),
151 	0x00000000,
152 	(0x9c00 << 16) | (0x9948 >> 2),
153 	0x00000000,
154 	(0x9c00 << 16) | (0x994c >> 2),
155 	0x00000000,
156 	(0x9c00 << 16) | (0x9950 >> 2),
157 	0x00000000,
158 	(0x9c00 << 16) | (0x9954 >> 2),
159 	0x00000000,
160 	(0x9c00 << 16) | (0x9958 >> 2),
161 	0x00000000,
162 	(0x9c00 << 16) | (0x995c >> 2),
163 	0x00000000,
164 	(0x9c00 << 16) | (0x9960 >> 2),
165 	0x00000000,
166 	(0x9c00 << 16) | (0x9964 >> 2),
167 	0x00000000,
168 	(0x9c00 << 16) | (0x9968 >> 2),
169 	0x00000000,
170 	(0x9c00 << 16) | (0x996c >> 2),
171 	0x00000000,
172 	(0x9c00 << 16) | (0x9970 >> 2),
173 	0x00000000,
174 	(0x9c00 << 16) | (0x9974 >> 2),
175 	0x00000000,
176 	(0x9c00 << 16) | (0x9978 >> 2),
177 	0x00000000,
178 	(0x9c00 << 16) | (0x997c >> 2),
179 	0x00000000,
180 	(0x9c00 << 16) | (0x9980 >> 2),
181 	0x00000000,
182 	(0x9c00 << 16) | (0x9984 >> 2),
183 	0x00000000,
184 	(0x9c00 << 16) | (0x9988 >> 2),
185 	0x00000000,
186 	(0x9c00 << 16) | (0x998c >> 2),
187 	0x00000000,
188 	(0x9c00 << 16) | (0x8c00 >> 2),
189 	0x00000000,
190 	(0x9c00 << 16) | (0x8c14 >> 2),
191 	0x00000000,
192 	(0x9c00 << 16) | (0x8c04 >> 2),
193 	0x00000000,
194 	(0x9c00 << 16) | (0x8c08 >> 2),
195 	0x00000000,
196 	(0x8000 << 16) | (0x9b7c >> 2),
197 	0x00000000,
198 	(0x8040 << 16) | (0x9b7c >> 2),
199 	0x00000000,
200 	(0x8000 << 16) | (0xe84 >> 2),
201 	0x00000000,
202 	(0x8040 << 16) | (0xe84 >> 2),
203 	0x00000000,
204 	(0x8000 << 16) | (0x89c0 >> 2),
205 	0x00000000,
206 	(0x8040 << 16) | (0x89c0 >> 2),
207 	0x00000000,
208 	(0x8000 << 16) | (0x914c >> 2),
209 	0x00000000,
210 	(0x8040 << 16) | (0x914c >> 2),
211 	0x00000000,
212 	(0x8000 << 16) | (0x8c20 >> 2),
213 	0x00000000,
214 	(0x8040 << 16) | (0x8c20 >> 2),
215 	0x00000000,
216 	(0x8000 << 16) | (0x9354 >> 2),
217 	0x00000000,
218 	(0x8040 << 16) | (0x9354 >> 2),
219 	0x00000000,
220 	(0x9c00 << 16) | (0x9060 >> 2),
221 	0x00000000,
222 	(0x9c00 << 16) | (0x9364 >> 2),
223 	0x00000000,
224 	(0x9c00 << 16) | (0x9100 >> 2),
225 	0x00000000,
226 	(0x9c00 << 16) | (0x913c >> 2),
227 	0x00000000,
228 	(0x8000 << 16) | (0x90e0 >> 2),
229 	0x00000000,
230 	(0x8000 << 16) | (0x90e4 >> 2),
231 	0x00000000,
232 	(0x8000 << 16) | (0x90e8 >> 2),
233 	0x00000000,
234 	(0x8040 << 16) | (0x90e0 >> 2),
235 	0x00000000,
236 	(0x8040 << 16) | (0x90e4 >> 2),
237 	0x00000000,
238 	(0x8040 << 16) | (0x90e8 >> 2),
239 	0x00000000,
240 	(0x9c00 << 16) | (0x8bcc >> 2),
241 	0x00000000,
242 	(0x9c00 << 16) | (0x8b24 >> 2),
243 	0x00000000,
244 	(0x9c00 << 16) | (0x88c4 >> 2),
245 	0x00000000,
246 	(0x9c00 << 16) | (0x8e50 >> 2),
247 	0x00000000,
248 	(0x9c00 << 16) | (0x8c0c >> 2),
249 	0x00000000,
250 	(0x9c00 << 16) | (0x8e58 >> 2),
251 	0x00000000,
252 	(0x9c00 << 16) | (0x8e5c >> 2),
253 	0x00000000,
254 	(0x9c00 << 16) | (0x9508 >> 2),
255 	0x00000000,
256 	(0x9c00 << 16) | (0x950c >> 2),
257 	0x00000000,
258 	(0x9c00 << 16) | (0x9494 >> 2),
259 	0x00000000,
260 	(0x9c00 << 16) | (0xac0c >> 2),
261 	0x00000000,
262 	(0x9c00 << 16) | (0xac10 >> 2),
263 	0x00000000,
264 	(0x9c00 << 16) | (0xac14 >> 2),
265 	0x00000000,
266 	(0x9c00 << 16) | (0xae00 >> 2),
267 	0x00000000,
268 	(0x9c00 << 16) | (0xac08 >> 2),
269 	0x00000000,
270 	(0x9c00 << 16) | (0x88d4 >> 2),
271 	0x00000000,
272 	(0x9c00 << 16) | (0x88c8 >> 2),
273 	0x00000000,
274 	(0x9c00 << 16) | (0x88cc >> 2),
275 	0x00000000,
276 	(0x9c00 << 16) | (0x89b0 >> 2),
277 	0x00000000,
278 	(0x9c00 << 16) | (0x8b10 >> 2),
279 	0x00000000,
280 	(0x9c00 << 16) | (0x8a14 >> 2),
281 	0x00000000,
282 	(0x9c00 << 16) | (0x9830 >> 2),
283 	0x00000000,
284 	(0x9c00 << 16) | (0x9834 >> 2),
285 	0x00000000,
286 	(0x9c00 << 16) | (0x9838 >> 2),
287 	0x00000000,
288 	(0x9c00 << 16) | (0x9a10 >> 2),
289 	0x00000000,
290 	(0x8000 << 16) | (0x9870 >> 2),
291 	0x00000000,
292 	(0x8000 << 16) | (0x9874 >> 2),
293 	0x00000000,
294 	(0x8001 << 16) | (0x9870 >> 2),
295 	0x00000000,
296 	(0x8001 << 16) | (0x9874 >> 2),
297 	0x00000000,
298 	(0x8040 << 16) | (0x9870 >> 2),
299 	0x00000000,
300 	(0x8040 << 16) | (0x9874 >> 2),
301 	0x00000000,
302 	(0x8041 << 16) | (0x9870 >> 2),
303 	0x00000000,
304 	(0x8041 << 16) | (0x9874 >> 2),
305 	0x00000000,
306 	0x00000000
307 };
308 
309 static const u32 tahiti_golden_rlc_registers[] =
310 {
311 	0xc424, 0xffffffff, 0x00601005,
312 	0xc47c, 0xffffffff, 0x10104040,
313 	0xc488, 0xffffffff, 0x0100000a,
314 	0xc314, 0xffffffff, 0x00000800,
315 	0xc30c, 0xffffffff, 0x800000f4,
316 	0xf4a8, 0xffffffff, 0x00000000
317 };
318 
319 static const u32 tahiti_golden_registers[] =
320 {
321 	0x9a10, 0x00010000, 0x00018208,
322 	0x9830, 0xffffffff, 0x00000000,
323 	0x9834, 0xf00fffff, 0x00000400,
324 	0x9838, 0x0002021c, 0x00020200,
325 	0xc78, 0x00000080, 0x00000000,
326 	0xd030, 0x000300c0, 0x00800040,
327 	0xd830, 0x000300c0, 0x00800040,
328 	0x5bb0, 0x000000f0, 0x00000070,
329 	0x5bc0, 0x00200000, 0x50100000,
330 	0x7030, 0x31000311, 0x00000011,
331 	0x277c, 0x00000003, 0x000007ff,
332 	0x240c, 0x000007ff, 0x00000000,
333 	0x8a14, 0xf000001f, 0x00000007,
334 	0x8b24, 0xffffffff, 0x00ffffff,
335 	0x8b10, 0x0000ff0f, 0x00000000,
336 	0x28a4c, 0x07ffffff, 0x4e000000,
337 	0x28350, 0x3f3f3fff, 0x2a00126a,
338 	0x30, 0x000000ff, 0x0040,
339 	0x34, 0x00000040, 0x00004040,
340 	0x9100, 0x07ffffff, 0x03000000,
341 	0x8e88, 0x01ff1f3f, 0x00000000,
342 	0x8e84, 0x01ff1f3f, 0x00000000,
343 	0x9060, 0x0000007f, 0x00000020,
344 	0x9508, 0x00010000, 0x00010000,
345 	0xac14, 0x00000200, 0x000002fb,
346 	0xac10, 0xffffffff, 0x0000543b,
347 	0xac0c, 0xffffffff, 0xa9210876,
348 	0x88d0, 0xffffffff, 0x000fff40,
349 	0x88d4, 0x0000001f, 0x00000010,
350 	0x1410, 0x20000000, 0x20fffed8,
351 	0x15c0, 0x000c0fc0, 0x000c0400
352 };
353 
354 static const u32 tahiti_golden_registers2[] =
355 {
356 	0xc64, 0x00000001, 0x00000001
357 };
358 
359 static const u32 pitcairn_golden_rlc_registers[] =
360 {
361 	0xc424, 0xffffffff, 0x00601004,
362 	0xc47c, 0xffffffff, 0x10102020,
363 	0xc488, 0xffffffff, 0x01000020,
364 	0xc314, 0xffffffff, 0x00000800,
365 	0xc30c, 0xffffffff, 0x800000a4
366 };
367 
368 static const u32 pitcairn_golden_registers[] =
369 {
370 	0x9a10, 0x00010000, 0x00018208,
371 	0x9830, 0xffffffff, 0x00000000,
372 	0x9834, 0xf00fffff, 0x00000400,
373 	0x9838, 0x0002021c, 0x00020200,
374 	0xc78, 0x00000080, 0x00000000,
375 	0xd030, 0x000300c0, 0x00800040,
376 	0xd830, 0x000300c0, 0x00800040,
377 	0x5bb0, 0x000000f0, 0x00000070,
378 	0x5bc0, 0x00200000, 0x50100000,
379 	0x7030, 0x31000311, 0x00000011,
380 	0x2ae4, 0x00073ffe, 0x000022a2,
381 	0x240c, 0x000007ff, 0x00000000,
382 	0x8a14, 0xf000001f, 0x00000007,
383 	0x8b24, 0xffffffff, 0x00ffffff,
384 	0x8b10, 0x0000ff0f, 0x00000000,
385 	0x28a4c, 0x07ffffff, 0x4e000000,
386 	0x28350, 0x3f3f3fff, 0x2a00126a,
387 	0x30, 0x000000ff, 0x0040,
388 	0x34, 0x00000040, 0x00004040,
389 	0x9100, 0x07ffffff, 0x03000000,
390 	0x9060, 0x0000007f, 0x00000020,
391 	0x9508, 0x00010000, 0x00010000,
392 	0xac14, 0x000003ff, 0x000000f7,
393 	0xac10, 0xffffffff, 0x00000000,
394 	0xac0c, 0xffffffff, 0x32761054,
395 	0x88d4, 0x0000001f, 0x00000010,
396 	0x15c0, 0x000c0fc0, 0x000c0400
397 };
398 
399 static const u32 verde_golden_rlc_registers[] =
400 {
401 	0xc424, 0xffffffff, 0x033f1005,
402 	0xc47c, 0xffffffff, 0x10808020,
403 	0xc488, 0xffffffff, 0x00800008,
404 	0xc314, 0xffffffff, 0x00001000,
405 	0xc30c, 0xffffffff, 0x80010014
406 };
407 
408 static const u32 verde_golden_registers[] =
409 {
410 	0x9a10, 0x00010000, 0x00018208,
411 	0x9830, 0xffffffff, 0x00000000,
412 	0x9834, 0xf00fffff, 0x00000400,
413 	0x9838, 0x0002021c, 0x00020200,
414 	0xc78, 0x00000080, 0x00000000,
415 	0xd030, 0x000300c0, 0x00800040,
416 	0xd030, 0x000300c0, 0x00800040,
417 	0xd830, 0x000300c0, 0x00800040,
418 	0xd830, 0x000300c0, 0x00800040,
419 	0x5bb0, 0x000000f0, 0x00000070,
420 	0x5bc0, 0x00200000, 0x50100000,
421 	0x7030, 0x31000311, 0x00000011,
422 	0x2ae4, 0x00073ffe, 0x000022a2,
423 	0x2ae4, 0x00073ffe, 0x000022a2,
424 	0x2ae4, 0x00073ffe, 0x000022a2,
425 	0x240c, 0x000007ff, 0x00000000,
426 	0x240c, 0x000007ff, 0x00000000,
427 	0x240c, 0x000007ff, 0x00000000,
428 	0x8a14, 0xf000001f, 0x00000007,
429 	0x8a14, 0xf000001f, 0x00000007,
430 	0x8a14, 0xf000001f, 0x00000007,
431 	0x8b24, 0xffffffff, 0x00ffffff,
432 	0x8b10, 0x0000ff0f, 0x00000000,
433 	0x28a4c, 0x07ffffff, 0x4e000000,
434 	0x28350, 0x3f3f3fff, 0x0000124a,
435 	0x28350, 0x3f3f3fff, 0x0000124a,
436 	0x28350, 0x3f3f3fff, 0x0000124a,
437 	0x30, 0x000000ff, 0x0040,
438 	0x34, 0x00000040, 0x00004040,
439 	0x9100, 0x07ffffff, 0x03000000,
440 	0x9100, 0x07ffffff, 0x03000000,
441 	0x8e88, 0x01ff1f3f, 0x00000000,
442 	0x8e88, 0x01ff1f3f, 0x00000000,
443 	0x8e88, 0x01ff1f3f, 0x00000000,
444 	0x8e84, 0x01ff1f3f, 0x00000000,
445 	0x8e84, 0x01ff1f3f, 0x00000000,
446 	0x8e84, 0x01ff1f3f, 0x00000000,
447 	0x9060, 0x0000007f, 0x00000020,
448 	0x9508, 0x00010000, 0x00010000,
449 	0xac14, 0x000003ff, 0x00000003,
450 	0xac14, 0x000003ff, 0x00000003,
451 	0xac14, 0x000003ff, 0x00000003,
452 	0xac10, 0xffffffff, 0x00000000,
453 	0xac10, 0xffffffff, 0x00000000,
454 	0xac10, 0xffffffff, 0x00000000,
455 	0xac0c, 0xffffffff, 0x00001032,
456 	0xac0c, 0xffffffff, 0x00001032,
457 	0xac0c, 0xffffffff, 0x00001032,
458 	0x88d4, 0x0000001f, 0x00000010,
459 	0x88d4, 0x0000001f, 0x00000010,
460 	0x88d4, 0x0000001f, 0x00000010,
461 	0x15c0, 0x000c0fc0, 0x000c0400
462 };
463 
464 static const u32 oland_golden_rlc_registers[] =
465 {
466 	0xc424, 0xffffffff, 0x00601005,
467 	0xc47c, 0xffffffff, 0x10104040,
468 	0xc488, 0xffffffff, 0x0100000a,
469 	0xc314, 0xffffffff, 0x00000800,
470 	0xc30c, 0xffffffff, 0x800000f4
471 };
472 
473 static const u32 oland_golden_registers[] =
474 {
475 	0x9a10, 0x00010000, 0x00018208,
476 	0x9830, 0xffffffff, 0x00000000,
477 	0x9834, 0xf00fffff, 0x00000400,
478 	0x9838, 0x0002021c, 0x00020200,
479 	0xc78, 0x00000080, 0x00000000,
480 	0xd030, 0x000300c0, 0x00800040,
481 	0xd830, 0x000300c0, 0x00800040,
482 	0x5bb0, 0x000000f0, 0x00000070,
483 	0x5bc0, 0x00200000, 0x50100000,
484 	0x7030, 0x31000311, 0x00000011,
485 	0x2ae4, 0x00073ffe, 0x000022a2,
486 	0x240c, 0x000007ff, 0x00000000,
487 	0x8a14, 0xf000001f, 0x00000007,
488 	0x8b24, 0xffffffff, 0x00ffffff,
489 	0x8b10, 0x0000ff0f, 0x00000000,
490 	0x28a4c, 0x07ffffff, 0x4e000000,
491 	0x28350, 0x3f3f3fff, 0x00000082,
492 	0x30, 0x000000ff, 0x0040,
493 	0x34, 0x00000040, 0x00004040,
494 	0x9100, 0x07ffffff, 0x03000000,
495 	0x9060, 0x0000007f, 0x00000020,
496 	0x9508, 0x00010000, 0x00010000,
497 	0xac14, 0x000003ff, 0x000000f3,
498 	0xac10, 0xffffffff, 0x00000000,
499 	0xac0c, 0xffffffff, 0x00003210,
500 	0x88d4, 0x0000001f, 0x00000010,
501 	0x15c0, 0x000c0fc0, 0x000c0400
502 };
503 
504 static const u32 hainan_golden_registers[] =
505 {
506 	0x9a10, 0x00010000, 0x00018208,
507 	0x9830, 0xffffffff, 0x00000000,
508 	0x9834, 0xf00fffff, 0x00000400,
509 	0x9838, 0x0002021c, 0x00020200,
510 	0xd0c0, 0xff000fff, 0x00000100,
511 	0xd030, 0x000300c0, 0x00800040,
512 	0xd8c0, 0xff000fff, 0x00000100,
513 	0xd830, 0x000300c0, 0x00800040,
514 	0x2ae4, 0x00073ffe, 0x000022a2,
515 	0x240c, 0x000007ff, 0x00000000,
516 	0x8a14, 0xf000001f, 0x00000007,
517 	0x8b24, 0xffffffff, 0x00ffffff,
518 	0x8b10, 0x0000ff0f, 0x00000000,
519 	0x28a4c, 0x07ffffff, 0x4e000000,
520 	0x28350, 0x3f3f3fff, 0x00000000,
521 	0x30, 0x000000ff, 0x0040,
522 	0x34, 0x00000040, 0x00004040,
523 	0x9100, 0x03e00000, 0x03600000,
524 	0x9060, 0x0000007f, 0x00000020,
525 	0x9508, 0x00010000, 0x00010000,
526 	0xac14, 0x000003ff, 0x000000f1,
527 	0xac10, 0xffffffff, 0x00000000,
528 	0xac0c, 0xffffffff, 0x00003210,
529 	0x88d4, 0x0000001f, 0x00000010,
530 	0x15c0, 0x000c0fc0, 0x000c0400
531 };
532 
533 static const u32 hainan_golden_registers2[] =
534 {
535 	0x98f8, 0xffffffff, 0x02010001
536 };
537 
538 static const u32 tahiti_mgcg_cgcg_init[] =
539 {
540 	0xc400, 0xffffffff, 0xfffffffc,
541 	0x802c, 0xffffffff, 0xe0000000,
542 	0x9a60, 0xffffffff, 0x00000100,
543 	0x92a4, 0xffffffff, 0x00000100,
544 	0xc164, 0xffffffff, 0x00000100,
545 	0x9774, 0xffffffff, 0x00000100,
546 	0x8984, 0xffffffff, 0x06000100,
547 	0x8a18, 0xffffffff, 0x00000100,
548 	0x92a0, 0xffffffff, 0x00000100,
549 	0xc380, 0xffffffff, 0x00000100,
550 	0x8b28, 0xffffffff, 0x00000100,
551 	0x9144, 0xffffffff, 0x00000100,
552 	0x8d88, 0xffffffff, 0x00000100,
553 	0x8d8c, 0xffffffff, 0x00000100,
554 	0x9030, 0xffffffff, 0x00000100,
555 	0x9034, 0xffffffff, 0x00000100,
556 	0x9038, 0xffffffff, 0x00000100,
557 	0x903c, 0xffffffff, 0x00000100,
558 	0xad80, 0xffffffff, 0x00000100,
559 	0xac54, 0xffffffff, 0x00000100,
560 	0x897c, 0xffffffff, 0x06000100,
561 	0x9868, 0xffffffff, 0x00000100,
562 	0x9510, 0xffffffff, 0x00000100,
563 	0xaf04, 0xffffffff, 0x00000100,
564 	0xae04, 0xffffffff, 0x00000100,
565 	0x949c, 0xffffffff, 0x00000100,
566 	0x802c, 0xffffffff, 0xe0000000,
567 	0x9160, 0xffffffff, 0x00010000,
568 	0x9164, 0xffffffff, 0x00030002,
569 	0x9168, 0xffffffff, 0x00040007,
570 	0x916c, 0xffffffff, 0x00060005,
571 	0x9170, 0xffffffff, 0x00090008,
572 	0x9174, 0xffffffff, 0x00020001,
573 	0x9178, 0xffffffff, 0x00040003,
574 	0x917c, 0xffffffff, 0x00000007,
575 	0x9180, 0xffffffff, 0x00060005,
576 	0x9184, 0xffffffff, 0x00090008,
577 	0x9188, 0xffffffff, 0x00030002,
578 	0x918c, 0xffffffff, 0x00050004,
579 	0x9190, 0xffffffff, 0x00000008,
580 	0x9194, 0xffffffff, 0x00070006,
581 	0x9198, 0xffffffff, 0x000a0009,
582 	0x919c, 0xffffffff, 0x00040003,
583 	0x91a0, 0xffffffff, 0x00060005,
584 	0x91a4, 0xffffffff, 0x00000009,
585 	0x91a8, 0xffffffff, 0x00080007,
586 	0x91ac, 0xffffffff, 0x000b000a,
587 	0x91b0, 0xffffffff, 0x00050004,
588 	0x91b4, 0xffffffff, 0x00070006,
589 	0x91b8, 0xffffffff, 0x0008000b,
590 	0x91bc, 0xffffffff, 0x000a0009,
591 	0x91c0, 0xffffffff, 0x000d000c,
592 	0x91c4, 0xffffffff, 0x00060005,
593 	0x91c8, 0xffffffff, 0x00080007,
594 	0x91cc, 0xffffffff, 0x0000000b,
595 	0x91d0, 0xffffffff, 0x000a0009,
596 	0x91d4, 0xffffffff, 0x000d000c,
597 	0x91d8, 0xffffffff, 0x00070006,
598 	0x91dc, 0xffffffff, 0x00090008,
599 	0x91e0, 0xffffffff, 0x0000000c,
600 	0x91e4, 0xffffffff, 0x000b000a,
601 	0x91e8, 0xffffffff, 0x000e000d,
602 	0x91ec, 0xffffffff, 0x00080007,
603 	0x91f0, 0xffffffff, 0x000a0009,
604 	0x91f4, 0xffffffff, 0x0000000d,
605 	0x91f8, 0xffffffff, 0x000c000b,
606 	0x91fc, 0xffffffff, 0x000f000e,
607 	0x9200, 0xffffffff, 0x00090008,
608 	0x9204, 0xffffffff, 0x000b000a,
609 	0x9208, 0xffffffff, 0x000c000f,
610 	0x920c, 0xffffffff, 0x000e000d,
611 	0x9210, 0xffffffff, 0x00110010,
612 	0x9214, 0xffffffff, 0x000a0009,
613 	0x9218, 0xffffffff, 0x000c000b,
614 	0x921c, 0xffffffff, 0x0000000f,
615 	0x9220, 0xffffffff, 0x000e000d,
616 	0x9224, 0xffffffff, 0x00110010,
617 	0x9228, 0xffffffff, 0x000b000a,
618 	0x922c, 0xffffffff, 0x000d000c,
619 	0x9230, 0xffffffff, 0x00000010,
620 	0x9234, 0xffffffff, 0x000f000e,
621 	0x9238, 0xffffffff, 0x00120011,
622 	0x923c, 0xffffffff, 0x000c000b,
623 	0x9240, 0xffffffff, 0x000e000d,
624 	0x9244, 0xffffffff, 0x00000011,
625 	0x9248, 0xffffffff, 0x0010000f,
626 	0x924c, 0xffffffff, 0x00130012,
627 	0x9250, 0xffffffff, 0x000d000c,
628 	0x9254, 0xffffffff, 0x000f000e,
629 	0x9258, 0xffffffff, 0x00100013,
630 	0x925c, 0xffffffff, 0x00120011,
631 	0x9260, 0xffffffff, 0x00150014,
632 	0x9264, 0xffffffff, 0x000e000d,
633 	0x9268, 0xffffffff, 0x0010000f,
634 	0x926c, 0xffffffff, 0x00000013,
635 	0x9270, 0xffffffff, 0x00120011,
636 	0x9274, 0xffffffff, 0x00150014,
637 	0x9278, 0xffffffff, 0x000f000e,
638 	0x927c, 0xffffffff, 0x00110010,
639 	0x9280, 0xffffffff, 0x00000014,
640 	0x9284, 0xffffffff, 0x00130012,
641 	0x9288, 0xffffffff, 0x00160015,
642 	0x928c, 0xffffffff, 0x0010000f,
643 	0x9290, 0xffffffff, 0x00120011,
644 	0x9294, 0xffffffff, 0x00000015,
645 	0x9298, 0xffffffff, 0x00140013,
646 	0x929c, 0xffffffff, 0x00170016,
647 	0x9150, 0xffffffff, 0x96940200,
648 	0x8708, 0xffffffff, 0x00900100,
649 	0xc478, 0xffffffff, 0x00000080,
650 	0xc404, 0xffffffff, 0x0020003f,
651 	0x30, 0xffffffff, 0x0000001c,
652 	0x34, 0x000f0000, 0x000f0000,
653 	0x160c, 0xffffffff, 0x00000100,
654 	0x1024, 0xffffffff, 0x00000100,
655 	0x102c, 0x00000101, 0x00000000,
656 	0x20a8, 0xffffffff, 0x00000104,
657 	0x264c, 0x000c0000, 0x000c0000,
658 	0x2648, 0x000c0000, 0x000c0000,
659 	0x55e4, 0xff000fff, 0x00000100,
660 	0x55e8, 0x00000001, 0x00000001,
661 	0x2f50, 0x00000001, 0x00000001,
662 	0x30cc, 0xc0000fff, 0x00000104,
663 	0xc1e4, 0x00000001, 0x00000001,
664 	0xd0c0, 0xfffffff0, 0x00000100,
665 	0xd8c0, 0xfffffff0, 0x00000100
666 };
667 
668 static const u32 pitcairn_mgcg_cgcg_init[] =
669 {
670 	0xc400, 0xffffffff, 0xfffffffc,
671 	0x802c, 0xffffffff, 0xe0000000,
672 	0x9a60, 0xffffffff, 0x00000100,
673 	0x92a4, 0xffffffff, 0x00000100,
674 	0xc164, 0xffffffff, 0x00000100,
675 	0x9774, 0xffffffff, 0x00000100,
676 	0x8984, 0xffffffff, 0x06000100,
677 	0x8a18, 0xffffffff, 0x00000100,
678 	0x92a0, 0xffffffff, 0x00000100,
679 	0xc380, 0xffffffff, 0x00000100,
680 	0x8b28, 0xffffffff, 0x00000100,
681 	0x9144, 0xffffffff, 0x00000100,
682 	0x8d88, 0xffffffff, 0x00000100,
683 	0x8d8c, 0xffffffff, 0x00000100,
684 	0x9030, 0xffffffff, 0x00000100,
685 	0x9034, 0xffffffff, 0x00000100,
686 	0x9038, 0xffffffff, 0x00000100,
687 	0x903c, 0xffffffff, 0x00000100,
688 	0xad80, 0xffffffff, 0x00000100,
689 	0xac54, 0xffffffff, 0x00000100,
690 	0x897c, 0xffffffff, 0x06000100,
691 	0x9868, 0xffffffff, 0x00000100,
692 	0x9510, 0xffffffff, 0x00000100,
693 	0xaf04, 0xffffffff, 0x00000100,
694 	0xae04, 0xffffffff, 0x00000100,
695 	0x949c, 0xffffffff, 0x00000100,
696 	0x802c, 0xffffffff, 0xe0000000,
697 	0x9160, 0xffffffff, 0x00010000,
698 	0x9164, 0xffffffff, 0x00030002,
699 	0x9168, 0xffffffff, 0x00040007,
700 	0x916c, 0xffffffff, 0x00060005,
701 	0x9170, 0xffffffff, 0x00090008,
702 	0x9174, 0xffffffff, 0x00020001,
703 	0x9178, 0xffffffff, 0x00040003,
704 	0x917c, 0xffffffff, 0x00000007,
705 	0x9180, 0xffffffff, 0x00060005,
706 	0x9184, 0xffffffff, 0x00090008,
707 	0x9188, 0xffffffff, 0x00030002,
708 	0x918c, 0xffffffff, 0x00050004,
709 	0x9190, 0xffffffff, 0x00000008,
710 	0x9194, 0xffffffff, 0x00070006,
711 	0x9198, 0xffffffff, 0x000a0009,
712 	0x919c, 0xffffffff, 0x00040003,
713 	0x91a0, 0xffffffff, 0x00060005,
714 	0x91a4, 0xffffffff, 0x00000009,
715 	0x91a8, 0xffffffff, 0x00080007,
716 	0x91ac, 0xffffffff, 0x000b000a,
717 	0x91b0, 0xffffffff, 0x00050004,
718 	0x91b4, 0xffffffff, 0x00070006,
719 	0x91b8, 0xffffffff, 0x0008000b,
720 	0x91bc, 0xffffffff, 0x000a0009,
721 	0x91c0, 0xffffffff, 0x000d000c,
722 	0x9200, 0xffffffff, 0x00090008,
723 	0x9204, 0xffffffff, 0x000b000a,
724 	0x9208, 0xffffffff, 0x000c000f,
725 	0x920c, 0xffffffff, 0x000e000d,
726 	0x9210, 0xffffffff, 0x00110010,
727 	0x9214, 0xffffffff, 0x000a0009,
728 	0x9218, 0xffffffff, 0x000c000b,
729 	0x921c, 0xffffffff, 0x0000000f,
730 	0x9220, 0xffffffff, 0x000e000d,
731 	0x9224, 0xffffffff, 0x00110010,
732 	0x9228, 0xffffffff, 0x000b000a,
733 	0x922c, 0xffffffff, 0x000d000c,
734 	0x9230, 0xffffffff, 0x00000010,
735 	0x9234, 0xffffffff, 0x000f000e,
736 	0x9238, 0xffffffff, 0x00120011,
737 	0x923c, 0xffffffff, 0x000c000b,
738 	0x9240, 0xffffffff, 0x000e000d,
739 	0x9244, 0xffffffff, 0x00000011,
740 	0x9248, 0xffffffff, 0x0010000f,
741 	0x924c, 0xffffffff, 0x00130012,
742 	0x9250, 0xffffffff, 0x000d000c,
743 	0x9254, 0xffffffff, 0x000f000e,
744 	0x9258, 0xffffffff, 0x00100013,
745 	0x925c, 0xffffffff, 0x00120011,
746 	0x9260, 0xffffffff, 0x00150014,
747 	0x9150, 0xffffffff, 0x96940200,
748 	0x8708, 0xffffffff, 0x00900100,
749 	0xc478, 0xffffffff, 0x00000080,
750 	0xc404, 0xffffffff, 0x0020003f,
751 	0x30, 0xffffffff, 0x0000001c,
752 	0x34, 0x000f0000, 0x000f0000,
753 	0x160c, 0xffffffff, 0x00000100,
754 	0x1024, 0xffffffff, 0x00000100,
755 	0x102c, 0x00000101, 0x00000000,
756 	0x20a8, 0xffffffff, 0x00000104,
757 	0x55e4, 0xff000fff, 0x00000100,
758 	0x55e8, 0x00000001, 0x00000001,
759 	0x2f50, 0x00000001, 0x00000001,
760 	0x30cc, 0xc0000fff, 0x00000104,
761 	0xc1e4, 0x00000001, 0x00000001,
762 	0xd0c0, 0xfffffff0, 0x00000100,
763 	0xd8c0, 0xfffffff0, 0x00000100
764 };
765 
766 static const u32 verde_mgcg_cgcg_init[] =
767 {
768 	0xc400, 0xffffffff, 0xfffffffc,
769 	0x802c, 0xffffffff, 0xe0000000,
770 	0x9a60, 0xffffffff, 0x00000100,
771 	0x92a4, 0xffffffff, 0x00000100,
772 	0xc164, 0xffffffff, 0x00000100,
773 	0x9774, 0xffffffff, 0x00000100,
774 	0x8984, 0xffffffff, 0x06000100,
775 	0x8a18, 0xffffffff, 0x00000100,
776 	0x92a0, 0xffffffff, 0x00000100,
777 	0xc380, 0xffffffff, 0x00000100,
778 	0x8b28, 0xffffffff, 0x00000100,
779 	0x9144, 0xffffffff, 0x00000100,
780 	0x8d88, 0xffffffff, 0x00000100,
781 	0x8d8c, 0xffffffff, 0x00000100,
782 	0x9030, 0xffffffff, 0x00000100,
783 	0x9034, 0xffffffff, 0x00000100,
784 	0x9038, 0xffffffff, 0x00000100,
785 	0x903c, 0xffffffff, 0x00000100,
786 	0xad80, 0xffffffff, 0x00000100,
787 	0xac54, 0xffffffff, 0x00000100,
788 	0x897c, 0xffffffff, 0x06000100,
789 	0x9868, 0xffffffff, 0x00000100,
790 	0x9510, 0xffffffff, 0x00000100,
791 	0xaf04, 0xffffffff, 0x00000100,
792 	0xae04, 0xffffffff, 0x00000100,
793 	0x949c, 0xffffffff, 0x00000100,
794 	0x802c, 0xffffffff, 0xe0000000,
795 	0x9160, 0xffffffff, 0x00010000,
796 	0x9164, 0xffffffff, 0x00030002,
797 	0x9168, 0xffffffff, 0x00040007,
798 	0x916c, 0xffffffff, 0x00060005,
799 	0x9170, 0xffffffff, 0x00090008,
800 	0x9174, 0xffffffff, 0x00020001,
801 	0x9178, 0xffffffff, 0x00040003,
802 	0x917c, 0xffffffff, 0x00000007,
803 	0x9180, 0xffffffff, 0x00060005,
804 	0x9184, 0xffffffff, 0x00090008,
805 	0x9188, 0xffffffff, 0x00030002,
806 	0x918c, 0xffffffff, 0x00050004,
807 	0x9190, 0xffffffff, 0x00000008,
808 	0x9194, 0xffffffff, 0x00070006,
809 	0x9198, 0xffffffff, 0x000a0009,
810 	0x919c, 0xffffffff, 0x00040003,
811 	0x91a0, 0xffffffff, 0x00060005,
812 	0x91a4, 0xffffffff, 0x00000009,
813 	0x91a8, 0xffffffff, 0x00080007,
814 	0x91ac, 0xffffffff, 0x000b000a,
815 	0x91b0, 0xffffffff, 0x00050004,
816 	0x91b4, 0xffffffff, 0x00070006,
817 	0x91b8, 0xffffffff, 0x0008000b,
818 	0x91bc, 0xffffffff, 0x000a0009,
819 	0x91c0, 0xffffffff, 0x000d000c,
820 	0x9200, 0xffffffff, 0x00090008,
821 	0x9204, 0xffffffff, 0x000b000a,
822 	0x9208, 0xffffffff, 0x000c000f,
823 	0x920c, 0xffffffff, 0x000e000d,
824 	0x9210, 0xffffffff, 0x00110010,
825 	0x9214, 0xffffffff, 0x000a0009,
826 	0x9218, 0xffffffff, 0x000c000b,
827 	0x921c, 0xffffffff, 0x0000000f,
828 	0x9220, 0xffffffff, 0x000e000d,
829 	0x9224, 0xffffffff, 0x00110010,
830 	0x9228, 0xffffffff, 0x000b000a,
831 	0x922c, 0xffffffff, 0x000d000c,
832 	0x9230, 0xffffffff, 0x00000010,
833 	0x9234, 0xffffffff, 0x000f000e,
834 	0x9238, 0xffffffff, 0x00120011,
835 	0x923c, 0xffffffff, 0x000c000b,
836 	0x9240, 0xffffffff, 0x000e000d,
837 	0x9244, 0xffffffff, 0x00000011,
838 	0x9248, 0xffffffff, 0x0010000f,
839 	0x924c, 0xffffffff, 0x00130012,
840 	0x9250, 0xffffffff, 0x000d000c,
841 	0x9254, 0xffffffff, 0x000f000e,
842 	0x9258, 0xffffffff, 0x00100013,
843 	0x925c, 0xffffffff, 0x00120011,
844 	0x9260, 0xffffffff, 0x00150014,
845 	0x9150, 0xffffffff, 0x96940200,
846 	0x8708, 0xffffffff, 0x00900100,
847 	0xc478, 0xffffffff, 0x00000080,
848 	0xc404, 0xffffffff, 0x0020003f,
849 	0x30, 0xffffffff, 0x0000001c,
850 	0x34, 0x000f0000, 0x000f0000,
851 	0x160c, 0xffffffff, 0x00000100,
852 	0x1024, 0xffffffff, 0x00000100,
853 	0x102c, 0x00000101, 0x00000000,
854 	0x20a8, 0xffffffff, 0x00000104,
855 	0x264c, 0x000c0000, 0x000c0000,
856 	0x2648, 0x000c0000, 0x000c0000,
857 	0x55e4, 0xff000fff, 0x00000100,
858 	0x55e8, 0x00000001, 0x00000001,
859 	0x2f50, 0x00000001, 0x00000001,
860 	0x30cc, 0xc0000fff, 0x00000104,
861 	0xc1e4, 0x00000001, 0x00000001,
862 	0xd0c0, 0xfffffff0, 0x00000100,
863 	0xd8c0, 0xfffffff0, 0x00000100
864 };
865 
866 static const u32 oland_mgcg_cgcg_init[] =
867 {
868 	0xc400, 0xffffffff, 0xfffffffc,
869 	0x802c, 0xffffffff, 0xe0000000,
870 	0x9a60, 0xffffffff, 0x00000100,
871 	0x92a4, 0xffffffff, 0x00000100,
872 	0xc164, 0xffffffff, 0x00000100,
873 	0x9774, 0xffffffff, 0x00000100,
874 	0x8984, 0xffffffff, 0x06000100,
875 	0x8a18, 0xffffffff, 0x00000100,
876 	0x92a0, 0xffffffff, 0x00000100,
877 	0xc380, 0xffffffff, 0x00000100,
878 	0x8b28, 0xffffffff, 0x00000100,
879 	0x9144, 0xffffffff, 0x00000100,
880 	0x8d88, 0xffffffff, 0x00000100,
881 	0x8d8c, 0xffffffff, 0x00000100,
882 	0x9030, 0xffffffff, 0x00000100,
883 	0x9034, 0xffffffff, 0x00000100,
884 	0x9038, 0xffffffff, 0x00000100,
885 	0x903c, 0xffffffff, 0x00000100,
886 	0xad80, 0xffffffff, 0x00000100,
887 	0xac54, 0xffffffff, 0x00000100,
888 	0x897c, 0xffffffff, 0x06000100,
889 	0x9868, 0xffffffff, 0x00000100,
890 	0x9510, 0xffffffff, 0x00000100,
891 	0xaf04, 0xffffffff, 0x00000100,
892 	0xae04, 0xffffffff, 0x00000100,
893 	0x949c, 0xffffffff, 0x00000100,
894 	0x802c, 0xffffffff, 0xe0000000,
895 	0x9160, 0xffffffff, 0x00010000,
896 	0x9164, 0xffffffff, 0x00030002,
897 	0x9168, 0xffffffff, 0x00040007,
898 	0x916c, 0xffffffff, 0x00060005,
899 	0x9170, 0xffffffff, 0x00090008,
900 	0x9174, 0xffffffff, 0x00020001,
901 	0x9178, 0xffffffff, 0x00040003,
902 	0x917c, 0xffffffff, 0x00000007,
903 	0x9180, 0xffffffff, 0x00060005,
904 	0x9184, 0xffffffff, 0x00090008,
905 	0x9188, 0xffffffff, 0x00030002,
906 	0x918c, 0xffffffff, 0x00050004,
907 	0x9190, 0xffffffff, 0x00000008,
908 	0x9194, 0xffffffff, 0x00070006,
909 	0x9198, 0xffffffff, 0x000a0009,
910 	0x919c, 0xffffffff, 0x00040003,
911 	0x91a0, 0xffffffff, 0x00060005,
912 	0x91a4, 0xffffffff, 0x00000009,
913 	0x91a8, 0xffffffff, 0x00080007,
914 	0x91ac, 0xffffffff, 0x000b000a,
915 	0x91b0, 0xffffffff, 0x00050004,
916 	0x91b4, 0xffffffff, 0x00070006,
917 	0x91b8, 0xffffffff, 0x0008000b,
918 	0x91bc, 0xffffffff, 0x000a0009,
919 	0x91c0, 0xffffffff, 0x000d000c,
920 	0x91c4, 0xffffffff, 0x00060005,
921 	0x91c8, 0xffffffff, 0x00080007,
922 	0x91cc, 0xffffffff, 0x0000000b,
923 	0x91d0, 0xffffffff, 0x000a0009,
924 	0x91d4, 0xffffffff, 0x000d000c,
925 	0x9150, 0xffffffff, 0x96940200,
926 	0x8708, 0xffffffff, 0x00900100,
927 	0xc478, 0xffffffff, 0x00000080,
928 	0xc404, 0xffffffff, 0x0020003f,
929 	0x30, 0xffffffff, 0x0000001c,
930 	0x34, 0x000f0000, 0x000f0000,
931 	0x160c, 0xffffffff, 0x00000100,
932 	0x1024, 0xffffffff, 0x00000100,
933 	0x102c, 0x00000101, 0x00000000,
934 	0x20a8, 0xffffffff, 0x00000104,
935 	0x264c, 0x000c0000, 0x000c0000,
936 	0x2648, 0x000c0000, 0x000c0000,
937 	0x55e4, 0xff000fff, 0x00000100,
938 	0x55e8, 0x00000001, 0x00000001,
939 	0x2f50, 0x00000001, 0x00000001,
940 	0x30cc, 0xc0000fff, 0x00000104,
941 	0xc1e4, 0x00000001, 0x00000001,
942 	0xd0c0, 0xfffffff0, 0x00000100,
943 	0xd8c0, 0xfffffff0, 0x00000100
944 };
945 
946 static const u32 hainan_mgcg_cgcg_init[] =
947 {
948 	0xc400, 0xffffffff, 0xfffffffc,
949 	0x802c, 0xffffffff, 0xe0000000,
950 	0x9a60, 0xffffffff, 0x00000100,
951 	0x92a4, 0xffffffff, 0x00000100,
952 	0xc164, 0xffffffff, 0x00000100,
953 	0x9774, 0xffffffff, 0x00000100,
954 	0x8984, 0xffffffff, 0x06000100,
955 	0x8a18, 0xffffffff, 0x00000100,
956 	0x92a0, 0xffffffff, 0x00000100,
957 	0xc380, 0xffffffff, 0x00000100,
958 	0x8b28, 0xffffffff, 0x00000100,
959 	0x9144, 0xffffffff, 0x00000100,
960 	0x8d88, 0xffffffff, 0x00000100,
961 	0x8d8c, 0xffffffff, 0x00000100,
962 	0x9030, 0xffffffff, 0x00000100,
963 	0x9034, 0xffffffff, 0x00000100,
964 	0x9038, 0xffffffff, 0x00000100,
965 	0x903c, 0xffffffff, 0x00000100,
966 	0xad80, 0xffffffff, 0x00000100,
967 	0xac54, 0xffffffff, 0x00000100,
968 	0x897c, 0xffffffff, 0x06000100,
969 	0x9868, 0xffffffff, 0x00000100,
970 	0x9510, 0xffffffff, 0x00000100,
971 	0xaf04, 0xffffffff, 0x00000100,
972 	0xae04, 0xffffffff, 0x00000100,
973 	0x949c, 0xffffffff, 0x00000100,
974 	0x802c, 0xffffffff, 0xe0000000,
975 	0x9160, 0xffffffff, 0x00010000,
976 	0x9164, 0xffffffff, 0x00030002,
977 	0x9168, 0xffffffff, 0x00040007,
978 	0x916c, 0xffffffff, 0x00060005,
979 	0x9170, 0xffffffff, 0x00090008,
980 	0x9174, 0xffffffff, 0x00020001,
981 	0x9178, 0xffffffff, 0x00040003,
982 	0x917c, 0xffffffff, 0x00000007,
983 	0x9180, 0xffffffff, 0x00060005,
984 	0x9184, 0xffffffff, 0x00090008,
985 	0x9188, 0xffffffff, 0x00030002,
986 	0x918c, 0xffffffff, 0x00050004,
987 	0x9190, 0xffffffff, 0x00000008,
988 	0x9194, 0xffffffff, 0x00070006,
989 	0x9198, 0xffffffff, 0x000a0009,
990 	0x919c, 0xffffffff, 0x00040003,
991 	0x91a0, 0xffffffff, 0x00060005,
992 	0x91a4, 0xffffffff, 0x00000009,
993 	0x91a8, 0xffffffff, 0x00080007,
994 	0x91ac, 0xffffffff, 0x000b000a,
995 	0x91b0, 0xffffffff, 0x00050004,
996 	0x91b4, 0xffffffff, 0x00070006,
997 	0x91b8, 0xffffffff, 0x0008000b,
998 	0x91bc, 0xffffffff, 0x000a0009,
999 	0x91c0, 0xffffffff, 0x000d000c,
1000 	0x91c4, 0xffffffff, 0x00060005,
1001 	0x91c8, 0xffffffff, 0x00080007,
1002 	0x91cc, 0xffffffff, 0x0000000b,
1003 	0x91d0, 0xffffffff, 0x000a0009,
1004 	0x91d4, 0xffffffff, 0x000d000c,
1005 	0x9150, 0xffffffff, 0x96940200,
1006 	0x8708, 0xffffffff, 0x00900100,
1007 	0xc478, 0xffffffff, 0x00000080,
1008 	0xc404, 0xffffffff, 0x0020003f,
1009 	0x30, 0xffffffff, 0x0000001c,
1010 	0x34, 0x000f0000, 0x000f0000,
1011 	0x160c, 0xffffffff, 0x00000100,
1012 	0x1024, 0xffffffff, 0x00000100,
1013 	0x20a8, 0xffffffff, 0x00000104,
1014 	0x264c, 0x000c0000, 0x000c0000,
1015 	0x2648, 0x000c0000, 0x000c0000,
1016 	0x2f50, 0x00000001, 0x00000001,
1017 	0x30cc, 0xc0000fff, 0x00000104,
1018 	0xc1e4, 0x00000001, 0x00000001,
1019 	0xd0c0, 0xfffffff0, 0x00000100,
1020 	0xd8c0, 0xfffffff0, 0x00000100
1021 };
1022 
1023 static u32 verde_pg_init[] =
1024 {
1025 	0x353c, 0xffffffff, 0x40000,
1026 	0x3538, 0xffffffff, 0x200010ff,
1027 	0x353c, 0xffffffff, 0x0,
1028 	0x353c, 0xffffffff, 0x0,
1029 	0x353c, 0xffffffff, 0x0,
1030 	0x353c, 0xffffffff, 0x0,
1031 	0x353c, 0xffffffff, 0x0,
1032 	0x353c, 0xffffffff, 0x7007,
1033 	0x3538, 0xffffffff, 0x300010ff,
1034 	0x353c, 0xffffffff, 0x0,
1035 	0x353c, 0xffffffff, 0x0,
1036 	0x353c, 0xffffffff, 0x0,
1037 	0x353c, 0xffffffff, 0x0,
1038 	0x353c, 0xffffffff, 0x0,
1039 	0x353c, 0xffffffff, 0x400000,
1040 	0x3538, 0xffffffff, 0x100010ff,
1041 	0x353c, 0xffffffff, 0x0,
1042 	0x353c, 0xffffffff, 0x0,
1043 	0x353c, 0xffffffff, 0x0,
1044 	0x353c, 0xffffffff, 0x0,
1045 	0x353c, 0xffffffff, 0x0,
1046 	0x353c, 0xffffffff, 0x120200,
1047 	0x3538, 0xffffffff, 0x500010ff,
1048 	0x353c, 0xffffffff, 0x0,
1049 	0x353c, 0xffffffff, 0x0,
1050 	0x353c, 0xffffffff, 0x0,
1051 	0x353c, 0xffffffff, 0x0,
1052 	0x353c, 0xffffffff, 0x0,
1053 	0x353c, 0xffffffff, 0x1e1e16,
1054 	0x3538, 0xffffffff, 0x600010ff,
1055 	0x353c, 0xffffffff, 0x0,
1056 	0x353c, 0xffffffff, 0x0,
1057 	0x353c, 0xffffffff, 0x0,
1058 	0x353c, 0xffffffff, 0x0,
1059 	0x353c, 0xffffffff, 0x0,
1060 	0x353c, 0xffffffff, 0x171f1e,
1061 	0x3538, 0xffffffff, 0x700010ff,
1062 	0x353c, 0xffffffff, 0x0,
1063 	0x353c, 0xffffffff, 0x0,
1064 	0x353c, 0xffffffff, 0x0,
1065 	0x353c, 0xffffffff, 0x0,
1066 	0x353c, 0xffffffff, 0x0,
1067 	0x353c, 0xffffffff, 0x0,
1068 	0x3538, 0xffffffff, 0x9ff,
1069 	0x3500, 0xffffffff, 0x0,
1070 	0x3504, 0xffffffff, 0x10000800,
1071 	0x3504, 0xffffffff, 0xf,
1072 	0x3504, 0xffffffff, 0xf,
1073 	0x3500, 0xffffffff, 0x4,
1074 	0x3504, 0xffffffff, 0x1000051e,
1075 	0x3504, 0xffffffff, 0xffff,
1076 	0x3504, 0xffffffff, 0xffff,
1077 	0x3500, 0xffffffff, 0x8,
1078 	0x3504, 0xffffffff, 0x80500,
1079 	0x3500, 0xffffffff, 0x12,
1080 	0x3504, 0xffffffff, 0x9050c,
1081 	0x3500, 0xffffffff, 0x1d,
1082 	0x3504, 0xffffffff, 0xb052c,
1083 	0x3500, 0xffffffff, 0x2a,
1084 	0x3504, 0xffffffff, 0x1053e,
1085 	0x3500, 0xffffffff, 0x2d,
1086 	0x3504, 0xffffffff, 0x10546,
1087 	0x3500, 0xffffffff, 0x30,
1088 	0x3504, 0xffffffff, 0xa054e,
1089 	0x3500, 0xffffffff, 0x3c,
1090 	0x3504, 0xffffffff, 0x1055f,
1091 	0x3500, 0xffffffff, 0x3f,
1092 	0x3504, 0xffffffff, 0x10567,
1093 	0x3500, 0xffffffff, 0x42,
1094 	0x3504, 0xffffffff, 0x1056f,
1095 	0x3500, 0xffffffff, 0x45,
1096 	0x3504, 0xffffffff, 0x10572,
1097 	0x3500, 0xffffffff, 0x48,
1098 	0x3504, 0xffffffff, 0x20575,
1099 	0x3500, 0xffffffff, 0x4c,
1100 	0x3504, 0xffffffff, 0x190801,
1101 	0x3500, 0xffffffff, 0x67,
1102 	0x3504, 0xffffffff, 0x1082a,
1103 	0x3500, 0xffffffff, 0x6a,
1104 	0x3504, 0xffffffff, 0x1b082d,
1105 	0x3500, 0xffffffff, 0x87,
1106 	0x3504, 0xffffffff, 0x310851,
1107 	0x3500, 0xffffffff, 0xba,
1108 	0x3504, 0xffffffff, 0x891,
1109 	0x3500, 0xffffffff, 0xbc,
1110 	0x3504, 0xffffffff, 0x893,
1111 	0x3500, 0xffffffff, 0xbe,
1112 	0x3504, 0xffffffff, 0x20895,
1113 	0x3500, 0xffffffff, 0xc2,
1114 	0x3504, 0xffffffff, 0x20899,
1115 	0x3500, 0xffffffff, 0xc6,
1116 	0x3504, 0xffffffff, 0x2089d,
1117 	0x3500, 0xffffffff, 0xca,
1118 	0x3504, 0xffffffff, 0x8a1,
1119 	0x3500, 0xffffffff, 0xcc,
1120 	0x3504, 0xffffffff, 0x8a3,
1121 	0x3500, 0xffffffff, 0xce,
1122 	0x3504, 0xffffffff, 0x308a5,
1123 	0x3500, 0xffffffff, 0xd3,
1124 	0x3504, 0xffffffff, 0x6d08cd,
1125 	0x3500, 0xffffffff, 0x142,
1126 	0x3504, 0xffffffff, 0x2000095a,
1127 	0x3504, 0xffffffff, 0x1,
1128 	0x3500, 0xffffffff, 0x144,
1129 	0x3504, 0xffffffff, 0x301f095b,
1130 	0x3500, 0xffffffff, 0x165,
1131 	0x3504, 0xffffffff, 0xc094d,
1132 	0x3500, 0xffffffff, 0x173,
1133 	0x3504, 0xffffffff, 0xf096d,
1134 	0x3500, 0xffffffff, 0x184,
1135 	0x3504, 0xffffffff, 0x15097f,
1136 	0x3500, 0xffffffff, 0x19b,
1137 	0x3504, 0xffffffff, 0xc0998,
1138 	0x3500, 0xffffffff, 0x1a9,
1139 	0x3504, 0xffffffff, 0x409a7,
1140 	0x3500, 0xffffffff, 0x1af,
1141 	0x3504, 0xffffffff, 0xcdc,
1142 	0x3500, 0xffffffff, 0x1b1,
1143 	0x3504, 0xffffffff, 0x800,
1144 	0x3508, 0xffffffff, 0x6c9b2000,
1145 	0x3510, 0xfc00, 0x2000,
1146 	0x3544, 0xffffffff, 0xfc0,
1147 	0x28d4, 0x00000100, 0x100
1148 };
1149 
1150 static void si_init_golden_registers(struct radeon_device *rdev)
1151 {
1152 	switch (rdev->family) {
1153 	case CHIP_TAHITI:
1154 		radeon_program_register_sequence(rdev,
1155 						 tahiti_golden_registers,
1156 						 (const u32)ARRAY_SIZE(tahiti_golden_registers));
1157 		radeon_program_register_sequence(rdev,
1158 						 tahiti_golden_rlc_registers,
1159 						 (const u32)ARRAY_SIZE(tahiti_golden_rlc_registers));
1160 		radeon_program_register_sequence(rdev,
1161 						 tahiti_mgcg_cgcg_init,
1162 						 (const u32)ARRAY_SIZE(tahiti_mgcg_cgcg_init));
1163 		radeon_program_register_sequence(rdev,
1164 						 tahiti_golden_registers2,
1165 						 (const u32)ARRAY_SIZE(tahiti_golden_registers2));
1166 		break;
1167 	case CHIP_PITCAIRN:
1168 		radeon_program_register_sequence(rdev,
1169 						 pitcairn_golden_registers,
1170 						 (const u32)ARRAY_SIZE(pitcairn_golden_registers));
1171 		radeon_program_register_sequence(rdev,
1172 						 pitcairn_golden_rlc_registers,
1173 						 (const u32)ARRAY_SIZE(pitcairn_golden_rlc_registers));
1174 		radeon_program_register_sequence(rdev,
1175 						 pitcairn_mgcg_cgcg_init,
1176 						 (const u32)ARRAY_SIZE(pitcairn_mgcg_cgcg_init));
1177 		break;
1178 	case CHIP_VERDE:
1179 		radeon_program_register_sequence(rdev,
1180 						 verde_golden_registers,
1181 						 (const u32)ARRAY_SIZE(verde_golden_registers));
1182 		radeon_program_register_sequence(rdev,
1183 						 verde_golden_rlc_registers,
1184 						 (const u32)ARRAY_SIZE(verde_golden_rlc_registers));
1185 		radeon_program_register_sequence(rdev,
1186 						 verde_mgcg_cgcg_init,
1187 						 (const u32)ARRAY_SIZE(verde_mgcg_cgcg_init));
1188 		radeon_program_register_sequence(rdev,
1189 						 verde_pg_init,
1190 						 (const u32)ARRAY_SIZE(verde_pg_init));
1191 		break;
1192 	case CHIP_OLAND:
1193 		radeon_program_register_sequence(rdev,
1194 						 oland_golden_registers,
1195 						 (const u32)ARRAY_SIZE(oland_golden_registers));
1196 		radeon_program_register_sequence(rdev,
1197 						 oland_golden_rlc_registers,
1198 						 (const u32)ARRAY_SIZE(oland_golden_rlc_registers));
1199 		radeon_program_register_sequence(rdev,
1200 						 oland_mgcg_cgcg_init,
1201 						 (const u32)ARRAY_SIZE(oland_mgcg_cgcg_init));
1202 		break;
1203 	case CHIP_HAINAN:
1204 		radeon_program_register_sequence(rdev,
1205 						 hainan_golden_registers,
1206 						 (const u32)ARRAY_SIZE(hainan_golden_registers));
1207 		radeon_program_register_sequence(rdev,
1208 						 hainan_golden_registers2,
1209 						 (const u32)ARRAY_SIZE(hainan_golden_registers2));
1210 		radeon_program_register_sequence(rdev,
1211 						 hainan_mgcg_cgcg_init,
1212 						 (const u32)ARRAY_SIZE(hainan_mgcg_cgcg_init));
1213 		break;
1214 	default:
1215 		break;
1216 	}
1217 }
1218 
1219 #define PCIE_BUS_CLK                10000
1220 #define TCLK                        (PCIE_BUS_CLK / 10)
1221 
1222 /**
1223  * si_get_xclk - get the xclk
1224  *
1225  * @rdev: radeon_device pointer
1226  *
1227  * Returns the reference clock used by the gfx engine
1228  * (SI).
1229  */
1230 u32 si_get_xclk(struct radeon_device *rdev)
1231 {
1232         u32 reference_clock = rdev->clock.spll.reference_freq;
1233 	u32 tmp;
1234 
1235 	tmp = RREG32(CG_CLKPIN_CNTL_2);
1236 	if (tmp & MUX_TCLK_TO_XCLK)
1237 		return TCLK;
1238 
1239 	tmp = RREG32(CG_CLKPIN_CNTL);
1240 	if (tmp & XTALIN_DIVIDE)
1241 		return reference_clock / 4;
1242 
1243 	return reference_clock;
1244 }
1245 
1246 /* get temperature in millidegrees */
1247 int si_get_temp(struct radeon_device *rdev)
1248 {
1249 	u32 temp;
1250 	int actual_temp = 0;
1251 
1252 	temp = (RREG32(CG_MULT_THERMAL_STATUS) & CTF_TEMP_MASK) >>
1253 		CTF_TEMP_SHIFT;
1254 
1255 	if (temp & 0x200)
1256 		actual_temp = 255;
1257 	else
1258 		actual_temp = temp & 0x1ff;
1259 
1260 	actual_temp = (actual_temp * 1000);
1261 
1262 	return actual_temp;
1263 }
1264 
1265 #define TAHITI_IO_MC_REGS_SIZE 36
1266 
1267 static const u32 tahiti_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1268 	{0x0000006f, 0x03044000},
1269 	{0x00000070, 0x0480c018},
1270 	{0x00000071, 0x00000040},
1271 	{0x00000072, 0x01000000},
1272 	{0x00000074, 0x000000ff},
1273 	{0x00000075, 0x00143400},
1274 	{0x00000076, 0x08ec0800},
1275 	{0x00000077, 0x040000cc},
1276 	{0x00000079, 0x00000000},
1277 	{0x0000007a, 0x21000409},
1278 	{0x0000007c, 0x00000000},
1279 	{0x0000007d, 0xe8000000},
1280 	{0x0000007e, 0x044408a8},
1281 	{0x0000007f, 0x00000003},
1282 	{0x00000080, 0x00000000},
1283 	{0x00000081, 0x01000000},
1284 	{0x00000082, 0x02000000},
1285 	{0x00000083, 0x00000000},
1286 	{0x00000084, 0xe3f3e4f4},
1287 	{0x00000085, 0x00052024},
1288 	{0x00000087, 0x00000000},
1289 	{0x00000088, 0x66036603},
1290 	{0x00000089, 0x01000000},
1291 	{0x0000008b, 0x1c0a0000},
1292 	{0x0000008c, 0xff010000},
1293 	{0x0000008e, 0xffffefff},
1294 	{0x0000008f, 0xfff3efff},
1295 	{0x00000090, 0xfff3efbf},
1296 	{0x00000094, 0x00101101},
1297 	{0x00000095, 0x00000fff},
1298 	{0x00000096, 0x00116fff},
1299 	{0x00000097, 0x60010000},
1300 	{0x00000098, 0x10010000},
1301 	{0x00000099, 0x00006000},
1302 	{0x0000009a, 0x00001000},
1303 	{0x0000009f, 0x00a77400}
1304 };
1305 
1306 static const u32 pitcairn_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1307 	{0x0000006f, 0x03044000},
1308 	{0x00000070, 0x0480c018},
1309 	{0x00000071, 0x00000040},
1310 	{0x00000072, 0x01000000},
1311 	{0x00000074, 0x000000ff},
1312 	{0x00000075, 0x00143400},
1313 	{0x00000076, 0x08ec0800},
1314 	{0x00000077, 0x040000cc},
1315 	{0x00000079, 0x00000000},
1316 	{0x0000007a, 0x21000409},
1317 	{0x0000007c, 0x00000000},
1318 	{0x0000007d, 0xe8000000},
1319 	{0x0000007e, 0x044408a8},
1320 	{0x0000007f, 0x00000003},
1321 	{0x00000080, 0x00000000},
1322 	{0x00000081, 0x01000000},
1323 	{0x00000082, 0x02000000},
1324 	{0x00000083, 0x00000000},
1325 	{0x00000084, 0xe3f3e4f4},
1326 	{0x00000085, 0x00052024},
1327 	{0x00000087, 0x00000000},
1328 	{0x00000088, 0x66036603},
1329 	{0x00000089, 0x01000000},
1330 	{0x0000008b, 0x1c0a0000},
1331 	{0x0000008c, 0xff010000},
1332 	{0x0000008e, 0xffffefff},
1333 	{0x0000008f, 0xfff3efff},
1334 	{0x00000090, 0xfff3efbf},
1335 	{0x00000094, 0x00101101},
1336 	{0x00000095, 0x00000fff},
1337 	{0x00000096, 0x00116fff},
1338 	{0x00000097, 0x60010000},
1339 	{0x00000098, 0x10010000},
1340 	{0x00000099, 0x00006000},
1341 	{0x0000009a, 0x00001000},
1342 	{0x0000009f, 0x00a47400}
1343 };
1344 
1345 static const u32 verde_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1346 	{0x0000006f, 0x03044000},
1347 	{0x00000070, 0x0480c018},
1348 	{0x00000071, 0x00000040},
1349 	{0x00000072, 0x01000000},
1350 	{0x00000074, 0x000000ff},
1351 	{0x00000075, 0x00143400},
1352 	{0x00000076, 0x08ec0800},
1353 	{0x00000077, 0x040000cc},
1354 	{0x00000079, 0x00000000},
1355 	{0x0000007a, 0x21000409},
1356 	{0x0000007c, 0x00000000},
1357 	{0x0000007d, 0xe8000000},
1358 	{0x0000007e, 0x044408a8},
1359 	{0x0000007f, 0x00000003},
1360 	{0x00000080, 0x00000000},
1361 	{0x00000081, 0x01000000},
1362 	{0x00000082, 0x02000000},
1363 	{0x00000083, 0x00000000},
1364 	{0x00000084, 0xe3f3e4f4},
1365 	{0x00000085, 0x00052024},
1366 	{0x00000087, 0x00000000},
1367 	{0x00000088, 0x66036603},
1368 	{0x00000089, 0x01000000},
1369 	{0x0000008b, 0x1c0a0000},
1370 	{0x0000008c, 0xff010000},
1371 	{0x0000008e, 0xffffefff},
1372 	{0x0000008f, 0xfff3efff},
1373 	{0x00000090, 0xfff3efbf},
1374 	{0x00000094, 0x00101101},
1375 	{0x00000095, 0x00000fff},
1376 	{0x00000096, 0x00116fff},
1377 	{0x00000097, 0x60010000},
1378 	{0x00000098, 0x10010000},
1379 	{0x00000099, 0x00006000},
1380 	{0x0000009a, 0x00001000},
1381 	{0x0000009f, 0x00a37400}
1382 };
1383 
1384 static const u32 oland_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1385 	{0x0000006f, 0x03044000},
1386 	{0x00000070, 0x0480c018},
1387 	{0x00000071, 0x00000040},
1388 	{0x00000072, 0x01000000},
1389 	{0x00000074, 0x000000ff},
1390 	{0x00000075, 0x00143400},
1391 	{0x00000076, 0x08ec0800},
1392 	{0x00000077, 0x040000cc},
1393 	{0x00000079, 0x00000000},
1394 	{0x0000007a, 0x21000409},
1395 	{0x0000007c, 0x00000000},
1396 	{0x0000007d, 0xe8000000},
1397 	{0x0000007e, 0x044408a8},
1398 	{0x0000007f, 0x00000003},
1399 	{0x00000080, 0x00000000},
1400 	{0x00000081, 0x01000000},
1401 	{0x00000082, 0x02000000},
1402 	{0x00000083, 0x00000000},
1403 	{0x00000084, 0xe3f3e4f4},
1404 	{0x00000085, 0x00052024},
1405 	{0x00000087, 0x00000000},
1406 	{0x00000088, 0x66036603},
1407 	{0x00000089, 0x01000000},
1408 	{0x0000008b, 0x1c0a0000},
1409 	{0x0000008c, 0xff010000},
1410 	{0x0000008e, 0xffffefff},
1411 	{0x0000008f, 0xfff3efff},
1412 	{0x00000090, 0xfff3efbf},
1413 	{0x00000094, 0x00101101},
1414 	{0x00000095, 0x00000fff},
1415 	{0x00000096, 0x00116fff},
1416 	{0x00000097, 0x60010000},
1417 	{0x00000098, 0x10010000},
1418 	{0x00000099, 0x00006000},
1419 	{0x0000009a, 0x00001000},
1420 	{0x0000009f, 0x00a17730}
1421 };
1422 
1423 static const u32 hainan_io_mc_regs[TAHITI_IO_MC_REGS_SIZE][2] = {
1424 	{0x0000006f, 0x03044000},
1425 	{0x00000070, 0x0480c018},
1426 	{0x00000071, 0x00000040},
1427 	{0x00000072, 0x01000000},
1428 	{0x00000074, 0x000000ff},
1429 	{0x00000075, 0x00143400},
1430 	{0x00000076, 0x08ec0800},
1431 	{0x00000077, 0x040000cc},
1432 	{0x00000079, 0x00000000},
1433 	{0x0000007a, 0x21000409},
1434 	{0x0000007c, 0x00000000},
1435 	{0x0000007d, 0xe8000000},
1436 	{0x0000007e, 0x044408a8},
1437 	{0x0000007f, 0x00000003},
1438 	{0x00000080, 0x00000000},
1439 	{0x00000081, 0x01000000},
1440 	{0x00000082, 0x02000000},
1441 	{0x00000083, 0x00000000},
1442 	{0x00000084, 0xe3f3e4f4},
1443 	{0x00000085, 0x00052024},
1444 	{0x00000087, 0x00000000},
1445 	{0x00000088, 0x66036603},
1446 	{0x00000089, 0x01000000},
1447 	{0x0000008b, 0x1c0a0000},
1448 	{0x0000008c, 0xff010000},
1449 	{0x0000008e, 0xffffefff},
1450 	{0x0000008f, 0xfff3efff},
1451 	{0x00000090, 0xfff3efbf},
1452 	{0x00000094, 0x00101101},
1453 	{0x00000095, 0x00000fff},
1454 	{0x00000096, 0x00116fff},
1455 	{0x00000097, 0x60010000},
1456 	{0x00000098, 0x10010000},
1457 	{0x00000099, 0x00006000},
1458 	{0x0000009a, 0x00001000},
1459 	{0x0000009f, 0x00a07730}
1460 };
1461 
1462 /* ucode loading */
1463 static int si_mc_load_microcode(struct radeon_device *rdev)
1464 {
1465 	const __be32 *fw_data;
1466 	u32 running, blackout = 0;
1467 	u32 *io_mc_regs;
1468 	int i, ucode_size, regs_size;
1469 
1470 	if (!rdev->mc_fw)
1471 		return -EINVAL;
1472 
1473 	switch (rdev->family) {
1474 	case CHIP_TAHITI:
1475 		io_mc_regs = (u32 *)&tahiti_io_mc_regs;
1476 		ucode_size = SI_MC_UCODE_SIZE;
1477 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1478 		break;
1479 	case CHIP_PITCAIRN:
1480 		io_mc_regs = (u32 *)&pitcairn_io_mc_regs;
1481 		ucode_size = SI_MC_UCODE_SIZE;
1482 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1483 		break;
1484 	case CHIP_VERDE:
1485 	default:
1486 		io_mc_regs = (u32 *)&verde_io_mc_regs;
1487 		ucode_size = SI_MC_UCODE_SIZE;
1488 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1489 		break;
1490 	case CHIP_OLAND:
1491 		io_mc_regs = (u32 *)&oland_io_mc_regs;
1492 		ucode_size = OLAND_MC_UCODE_SIZE;
1493 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1494 		break;
1495 	case CHIP_HAINAN:
1496 		io_mc_regs = (u32 *)&hainan_io_mc_regs;
1497 		ucode_size = OLAND_MC_UCODE_SIZE;
1498 		regs_size = TAHITI_IO_MC_REGS_SIZE;
1499 		break;
1500 	}
1501 
1502 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
1503 
1504 	if (running == 0) {
1505 		if (running) {
1506 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
1507 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
1508 		}
1509 
1510 		/* reset the engine and set to writable */
1511 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1512 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
1513 
1514 		/* load mc io regs */
1515 		for (i = 0; i < regs_size; i++) {
1516 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
1517 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
1518 		}
1519 		/* load the MC ucode */
1520 		fw_data = (const __be32 *)rdev->mc_fw->data;
1521 		for (i = 0; i < ucode_size; i++)
1522 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
1523 
1524 		/* put the engine back into the active state */
1525 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
1526 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
1527 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
1528 
1529 		/* wait for training to complete */
1530 		for (i = 0; i < rdev->usec_timeout; i++) {
1531 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
1532 				break;
1533 			udelay(1);
1534 		}
1535 		for (i = 0; i < rdev->usec_timeout; i++) {
1536 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
1537 				break;
1538 			udelay(1);
1539 		}
1540 
1541 		if (running)
1542 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
1543 	}
1544 
1545 	return 0;
1546 }
1547 
1548 static int si_init_microcode(struct radeon_device *rdev)
1549 {
1550 	const char *chip_name;
1551 	const char *rlc_chip_name;
1552 	size_t pfp_req_size, me_req_size, ce_req_size, rlc_req_size, mc_req_size;
1553 	size_t smc_req_size;
1554 	char fw_name[30];
1555 	int err;
1556 
1557 	DRM_DEBUG("\n");
1558 
1559 	switch (rdev->family) {
1560 	case CHIP_TAHITI:
1561 		chip_name = "TAHITI";
1562 		rlc_chip_name = "TAHITI";
1563 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1564 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1565 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1566 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1567 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1568 		smc_req_size = ALIGN(TAHITI_SMC_UCODE_SIZE, 4);
1569 		break;
1570 	case CHIP_PITCAIRN:
1571 		chip_name = "PITCAIRN";
1572 		rlc_chip_name = "PITCAIRN";
1573 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1574 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1575 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1576 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1577 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1578 		smc_req_size = ALIGN(PITCAIRN_SMC_UCODE_SIZE, 4);
1579 		break;
1580 	case CHIP_VERDE:
1581 		chip_name = "VERDE";
1582 		rlc_chip_name = "VERDE";
1583 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1584 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1585 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1586 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1587 		mc_req_size = SI_MC_UCODE_SIZE * 4;
1588 		smc_req_size = ALIGN(VERDE_SMC_UCODE_SIZE, 4);
1589 		break;
1590 	case CHIP_OLAND:
1591 		chip_name = "OLAND";
1592 		rlc_chip_name = "OLAND";
1593 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1594 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1595 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1596 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1597 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1598 		smc_req_size = ALIGN(OLAND_SMC_UCODE_SIZE, 4);
1599 		break;
1600 	case CHIP_HAINAN:
1601 		chip_name = "HAINAN";
1602 		rlc_chip_name = "HAINAN";
1603 		pfp_req_size = SI_PFP_UCODE_SIZE * 4;
1604 		me_req_size = SI_PM4_UCODE_SIZE * 4;
1605 		ce_req_size = SI_CE_UCODE_SIZE * 4;
1606 		rlc_req_size = SI_RLC_UCODE_SIZE * 4;
1607 		mc_req_size = OLAND_MC_UCODE_SIZE * 4;
1608 		smc_req_size = ALIGN(HAINAN_SMC_UCODE_SIZE, 4);
1609 		break;
1610 	default: BUG();
1611 	}
1612 
1613 	DRM_INFO("Loading %s Microcode\n", chip_name);
1614 
1615 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
1616 	err = request_firmware(&rdev->pfp_fw, fw_name, rdev->dev);
1617 	if (err)
1618 		goto out;
1619 	if (rdev->pfp_fw->size != pfp_req_size) {
1620 		printk(KERN_ERR
1621 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1622 		       rdev->pfp_fw->size, fw_name);
1623 		err = -EINVAL;
1624 		goto out;
1625 	}
1626 
1627 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
1628 	err = request_firmware(&rdev->me_fw, fw_name, rdev->dev);
1629 	if (err)
1630 		goto out;
1631 	if (rdev->me_fw->size != me_req_size) {
1632 		printk(KERN_ERR
1633 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1634 		       rdev->me_fw->size, fw_name);
1635 		err = -EINVAL;
1636 	}
1637 
1638 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
1639 	err = request_firmware(&rdev->ce_fw, fw_name, rdev->dev);
1640 	if (err)
1641 		goto out;
1642 	if (rdev->ce_fw->size != ce_req_size) {
1643 		printk(KERN_ERR
1644 		       "si_cp: Bogus length %zu in firmware \"%s\"\n",
1645 		       rdev->ce_fw->size, fw_name);
1646 		err = -EINVAL;
1647 	}
1648 
1649 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", rlc_chip_name);
1650 	err = request_firmware(&rdev->rlc_fw, fw_name, rdev->dev);
1651 	if (err)
1652 		goto out;
1653 	if (rdev->rlc_fw->size != rlc_req_size) {
1654 		printk(KERN_ERR
1655 		       "si_rlc: Bogus length %zu in firmware \"%s\"\n",
1656 		       rdev->rlc_fw->size, fw_name);
1657 		err = -EINVAL;
1658 	}
1659 
1660 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
1661 	err = request_firmware(&rdev->mc_fw, fw_name, rdev->dev);
1662 	if (err)
1663 		goto out;
1664 	if (rdev->mc_fw->size != mc_req_size) {
1665 		printk(KERN_ERR
1666 		       "si_mc: Bogus length %zu in firmware \"%s\"\n",
1667 		       rdev->mc_fw->size, fw_name);
1668 		err = -EINVAL;
1669 	}
1670 
1671 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_smc.bin", chip_name);
1672 	err = request_firmware(&rdev->smc_fw, fw_name, rdev->dev);
1673 	if (err) {
1674 		printk(KERN_ERR
1675 		       "smc: error loading firmware \"%s\"\n",
1676 		       fw_name);
1677 		release_firmware(rdev->smc_fw);
1678 		rdev->smc_fw = NULL;
1679 	} else if (rdev->smc_fw->size != smc_req_size) {
1680 		printk(KERN_ERR
1681 		       "si_smc: Bogus length %zu in firmware \"%s\"\n",
1682 		       rdev->smc_fw->size, fw_name);
1683 		err = -EINVAL;
1684 	}
1685 
1686 out:
1687 	if (err) {
1688 		if (err != -EINVAL)
1689 			printk(KERN_ERR
1690 			       "si_cp: Failed to load firmware \"%s\"\n",
1691 			       fw_name);
1692 		release_firmware(rdev->pfp_fw);
1693 		rdev->pfp_fw = NULL;
1694 		release_firmware(rdev->me_fw);
1695 		rdev->me_fw = NULL;
1696 		release_firmware(rdev->ce_fw);
1697 		rdev->ce_fw = NULL;
1698 		release_firmware(rdev->rlc_fw);
1699 		rdev->rlc_fw = NULL;
1700 		release_firmware(rdev->mc_fw);
1701 		rdev->mc_fw = NULL;
1702 		release_firmware(rdev->smc_fw);
1703 		rdev->smc_fw = NULL;
1704 	}
1705 	return err;
1706 }
1707 
1708 /* watermark setup */
1709 static u32 dce6_line_buffer_adjust(struct radeon_device *rdev,
1710 				   struct radeon_crtc *radeon_crtc,
1711 				   struct drm_display_mode *mode,
1712 				   struct drm_display_mode *other_mode)
1713 {
1714 	u32 tmp, buffer_alloc, i;
1715 	u32 pipe_offset = radeon_crtc->crtc_id * 0x20;
1716 	/*
1717 	 * Line Buffer Setup
1718 	 * There are 3 line buffers, each one shared by 2 display controllers.
1719 	 * DC_LB_MEMORY_SPLIT controls how that line buffer is shared between
1720 	 * the display controllers.  The paritioning is done via one of four
1721 	 * preset allocations specified in bits 21:20:
1722 	 *  0 - half lb
1723 	 *  2 - whole lb, other crtc must be disabled
1724 	 */
1725 	/* this can get tricky if we have two large displays on a paired group
1726 	 * of crtcs.  Ideally for multiple large displays we'd assign them to
1727 	 * non-linked crtcs for maximum line buffer allocation.
1728 	 */
1729 	if (radeon_crtc->base.enabled && mode) {
1730 		if (other_mode) {
1731 			tmp = 0; /* 1/2 */
1732 			buffer_alloc = 1;
1733 		} else {
1734 			tmp = 2; /* whole */
1735 			buffer_alloc = 2;
1736 		}
1737 	} else {
1738 		tmp = 0;
1739 		buffer_alloc = 0;
1740 	}
1741 
1742 	WREG32(DC_LB_MEMORY_SPLIT + radeon_crtc->crtc_offset,
1743 	       DC_LB_MEMORY_CONFIG(tmp));
1744 
1745 	WREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset,
1746 	       DMIF_BUFFERS_ALLOCATED(buffer_alloc));
1747 	for (i = 0; i < rdev->usec_timeout; i++) {
1748 		if (RREG32(PIPE0_DMIF_BUFFER_CONTROL + pipe_offset) &
1749 		    DMIF_BUFFERS_ALLOCATED_COMPLETED)
1750 			break;
1751 		udelay(1);
1752 	}
1753 
1754 	if (radeon_crtc->base.enabled && mode) {
1755 		switch (tmp) {
1756 		case 0:
1757 		default:
1758 			return 4096 * 2;
1759 		case 2:
1760 			return 8192 * 2;
1761 		}
1762 	}
1763 
1764 	/* controller not enabled, so no lb used */
1765 	return 0;
1766 }
1767 
1768 static u32 si_get_number_of_dram_channels(struct radeon_device *rdev)
1769 {
1770 	u32 tmp = RREG32(MC_SHARED_CHMAP);
1771 
1772 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
1773 	case 0:
1774 	default:
1775 		return 1;
1776 	case 1:
1777 		return 2;
1778 	case 2:
1779 		return 4;
1780 	case 3:
1781 		return 8;
1782 	case 4:
1783 		return 3;
1784 	case 5:
1785 		return 6;
1786 	case 6:
1787 		return 10;
1788 	case 7:
1789 		return 12;
1790 	case 8:
1791 		return 16;
1792 	}
1793 }
1794 
1795 struct dce6_wm_params {
1796 	u32 dram_channels; /* number of dram channels */
1797 	u32 yclk;          /* bandwidth per dram data pin in kHz */
1798 	u32 sclk;          /* engine clock in kHz */
1799 	u32 disp_clk;      /* display clock in kHz */
1800 	u32 src_width;     /* viewport width */
1801 	u32 active_time;   /* active display time in ns */
1802 	u32 blank_time;    /* blank time in ns */
1803 	bool interlaced;    /* mode is interlaced */
1804 	fixed20_12 vsc;    /* vertical scale ratio */
1805 	u32 num_heads;     /* number of active crtcs */
1806 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
1807 	u32 lb_size;       /* line buffer allocated to pipe */
1808 	u32 vtaps;         /* vertical scaler taps */
1809 };
1810 
1811 static u32 dce6_dram_bandwidth(struct dce6_wm_params *wm)
1812 {
1813 	/* Calculate raw DRAM Bandwidth */
1814 	fixed20_12 dram_efficiency; /* 0.7 */
1815 	fixed20_12 yclk, dram_channels, bandwidth;
1816 	fixed20_12 a;
1817 
1818 	a.full = dfixed_const(1000);
1819 	yclk.full = dfixed_const(wm->yclk);
1820 	yclk.full = dfixed_div(yclk, a);
1821 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1822 	a.full = dfixed_const(10);
1823 	dram_efficiency.full = dfixed_const(7);
1824 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
1825 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1826 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
1827 
1828 	return dfixed_trunc(bandwidth);
1829 }
1830 
1831 static u32 dce6_dram_bandwidth_for_display(struct dce6_wm_params *wm)
1832 {
1833 	/* Calculate DRAM Bandwidth and the part allocated to display. */
1834 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
1835 	fixed20_12 yclk, dram_channels, bandwidth;
1836 	fixed20_12 a;
1837 
1838 	a.full = dfixed_const(1000);
1839 	yclk.full = dfixed_const(wm->yclk);
1840 	yclk.full = dfixed_div(yclk, a);
1841 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
1842 	a.full = dfixed_const(10);
1843 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
1844 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
1845 	bandwidth.full = dfixed_mul(dram_channels, yclk);
1846 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
1847 
1848 	return dfixed_trunc(bandwidth);
1849 }
1850 
1851 static u32 dce6_data_return_bandwidth(struct dce6_wm_params *wm)
1852 {
1853 	/* Calculate the display Data return Bandwidth */
1854 	fixed20_12 return_efficiency; /* 0.8 */
1855 	fixed20_12 sclk, bandwidth;
1856 	fixed20_12 a;
1857 
1858 	a.full = dfixed_const(1000);
1859 	sclk.full = dfixed_const(wm->sclk);
1860 	sclk.full = dfixed_div(sclk, a);
1861 	a.full = dfixed_const(10);
1862 	return_efficiency.full = dfixed_const(8);
1863 	return_efficiency.full = dfixed_div(return_efficiency, a);
1864 	a.full = dfixed_const(32);
1865 	bandwidth.full = dfixed_mul(a, sclk);
1866 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
1867 
1868 	return dfixed_trunc(bandwidth);
1869 }
1870 
1871 static u32 dce6_get_dmif_bytes_per_request(struct dce6_wm_params *wm)
1872 {
1873 	return 32;
1874 }
1875 
1876 static u32 dce6_dmif_request_bandwidth(struct dce6_wm_params *wm)
1877 {
1878 	/* Calculate the DMIF Request Bandwidth */
1879 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
1880 	fixed20_12 disp_clk, sclk, bandwidth;
1881 	fixed20_12 a, b1, b2;
1882 	u32 min_bandwidth;
1883 
1884 	a.full = dfixed_const(1000);
1885 	disp_clk.full = dfixed_const(wm->disp_clk);
1886 	disp_clk.full = dfixed_div(disp_clk, a);
1887 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm) / 2);
1888 	b1.full = dfixed_mul(a, disp_clk);
1889 
1890 	a.full = dfixed_const(1000);
1891 	sclk.full = dfixed_const(wm->sclk);
1892 	sclk.full = dfixed_div(sclk, a);
1893 	a.full = dfixed_const(dce6_get_dmif_bytes_per_request(wm));
1894 	b2.full = dfixed_mul(a, sclk);
1895 
1896 	a.full = dfixed_const(10);
1897 	disp_clk_request_efficiency.full = dfixed_const(8);
1898 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
1899 
1900 	min_bandwidth = min(dfixed_trunc(b1), dfixed_trunc(b2));
1901 
1902 	a.full = dfixed_const(min_bandwidth);
1903 	bandwidth.full = dfixed_mul(a, disp_clk_request_efficiency);
1904 
1905 	return dfixed_trunc(bandwidth);
1906 }
1907 
1908 static u32 dce6_available_bandwidth(struct dce6_wm_params *wm)
1909 {
1910 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
1911 	u32 dram_bandwidth = dce6_dram_bandwidth(wm);
1912 	u32 data_return_bandwidth = dce6_data_return_bandwidth(wm);
1913 	u32 dmif_req_bandwidth = dce6_dmif_request_bandwidth(wm);
1914 
1915 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
1916 }
1917 
1918 static u32 dce6_average_bandwidth(struct dce6_wm_params *wm)
1919 {
1920 	/* Calculate the display mode Average Bandwidth
1921 	 * DisplayMode should contain the source and destination dimensions,
1922 	 * timing, etc.
1923 	 */
1924 	fixed20_12 bpp;
1925 	fixed20_12 line_time;
1926 	fixed20_12 src_width;
1927 	fixed20_12 bandwidth;
1928 	fixed20_12 a;
1929 
1930 	a.full = dfixed_const(1000);
1931 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
1932 	line_time.full = dfixed_div(line_time, a);
1933 	bpp.full = dfixed_const(wm->bytes_per_pixel);
1934 	src_width.full = dfixed_const(wm->src_width);
1935 	bandwidth.full = dfixed_mul(src_width, bpp);
1936 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
1937 	bandwidth.full = dfixed_div(bandwidth, line_time);
1938 
1939 	return dfixed_trunc(bandwidth);
1940 }
1941 
1942 static u32 dce6_latency_watermark(struct dce6_wm_params *wm)
1943 {
1944 	/* First calcualte the latency in ns */
1945 	u32 mc_latency = 2000; /* 2000 ns. */
1946 	u32 available_bandwidth = dce6_available_bandwidth(wm);
1947 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
1948 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
1949 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
1950 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
1951 		(wm->num_heads * cursor_line_pair_return_time);
1952 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
1953 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
1954 	u32 tmp, dmif_size = 12288;
1955 	fixed20_12 a, b, c;
1956 
1957 	if (wm->num_heads == 0)
1958 		return 0;
1959 
1960 	a.full = dfixed_const(2);
1961 	b.full = dfixed_const(1);
1962 	if ((wm->vsc.full > a.full) ||
1963 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
1964 	    (wm->vtaps >= 5) ||
1965 	    ((wm->vsc.full >= a.full) && wm->interlaced))
1966 		max_src_lines_per_dst_line = 4;
1967 	else
1968 		max_src_lines_per_dst_line = 2;
1969 
1970 	a.full = dfixed_const(available_bandwidth);
1971 	b.full = dfixed_const(wm->num_heads);
1972 	a.full = dfixed_div(a, b);
1973 
1974 	b.full = dfixed_const(mc_latency + 512);
1975 	c.full = dfixed_const(wm->disp_clk);
1976 	b.full = dfixed_div(b, c);
1977 
1978 	c.full = dfixed_const(dmif_size);
1979 	b.full = dfixed_div(c, b);
1980 
1981 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
1982 
1983 	b.full = dfixed_const(1000);
1984 	c.full = dfixed_const(wm->disp_clk);
1985 	b.full = dfixed_div(c, b);
1986 	c.full = dfixed_const(wm->bytes_per_pixel);
1987 	b.full = dfixed_mul(b, c);
1988 
1989 	lb_fill_bw = min(tmp, dfixed_trunc(b));
1990 
1991 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
1992 	b.full = dfixed_const(1000);
1993 	c.full = dfixed_const(lb_fill_bw);
1994 	b.full = dfixed_div(c, b);
1995 	a.full = dfixed_div(a, b);
1996 	line_fill_time = dfixed_trunc(a);
1997 
1998 	if (line_fill_time < wm->active_time)
1999 		return latency;
2000 	else
2001 		return latency + (line_fill_time - wm->active_time);
2002 
2003 }
2004 
2005 static bool dce6_average_bandwidth_vs_dram_bandwidth_for_display(struct dce6_wm_params *wm)
2006 {
2007 	if (dce6_average_bandwidth(wm) <=
2008 	    (dce6_dram_bandwidth_for_display(wm) / wm->num_heads))
2009 		return true;
2010 	else
2011 		return false;
2012 };
2013 
2014 static bool dce6_average_bandwidth_vs_available_bandwidth(struct dce6_wm_params *wm)
2015 {
2016 	if (dce6_average_bandwidth(wm) <=
2017 	    (dce6_available_bandwidth(wm) / wm->num_heads))
2018 		return true;
2019 	else
2020 		return false;
2021 };
2022 
2023 static bool dce6_check_latency_hiding(struct dce6_wm_params *wm)
2024 {
2025 	u32 lb_partitions = wm->lb_size / wm->src_width;
2026 	u32 line_time = wm->active_time + wm->blank_time;
2027 	u32 latency_tolerant_lines;
2028 	u32 latency_hiding;
2029 	fixed20_12 a;
2030 
2031 	a.full = dfixed_const(1);
2032 	if (wm->vsc.full > a.full)
2033 		latency_tolerant_lines = 1;
2034 	else {
2035 		if (lb_partitions <= (wm->vtaps + 1))
2036 			latency_tolerant_lines = 1;
2037 		else
2038 			latency_tolerant_lines = 2;
2039 	}
2040 
2041 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
2042 
2043 	if (dce6_latency_watermark(wm) <= latency_hiding)
2044 		return true;
2045 	else
2046 		return false;
2047 }
2048 
2049 static void dce6_program_watermarks(struct radeon_device *rdev,
2050 					 struct radeon_crtc *radeon_crtc,
2051 					 u32 lb_size, u32 num_heads)
2052 {
2053 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
2054 	struct dce6_wm_params wm_low, wm_high;
2055 	u32 dram_channels;
2056 	u32 pixel_period;
2057 	u32 line_time = 0;
2058 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
2059 	u32 priority_a_mark = 0, priority_b_mark = 0;
2060 	u32 priority_a_cnt = PRIORITY_OFF;
2061 	u32 priority_b_cnt = PRIORITY_OFF;
2062 	u32 tmp, arb_control3;
2063 	fixed20_12 a, b, c;
2064 
2065 	if (radeon_crtc->base.enabled && num_heads && mode) {
2066 		pixel_period = 1000000 / (u32)mode->clock;
2067 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
2068 		priority_a_cnt = 0;
2069 		priority_b_cnt = 0;
2070 
2071 		if (rdev->family == CHIP_ARUBA)
2072 			dram_channels = evergreen_get_number_of_dram_channels(rdev);
2073 		else
2074 			dram_channels = si_get_number_of_dram_channels(rdev);
2075 
2076 		/* watermark for high clocks */
2077 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2078 			wm_high.yclk =
2079 				radeon_dpm_get_mclk(rdev, false) * 10;
2080 			wm_high.sclk =
2081 				radeon_dpm_get_sclk(rdev, false) * 10;
2082 		} else {
2083 			wm_high.yclk = rdev->pm.current_mclk * 10;
2084 			wm_high.sclk = rdev->pm.current_sclk * 10;
2085 		}
2086 
2087 		wm_high.disp_clk = mode->clock;
2088 		wm_high.src_width = mode->crtc_hdisplay;
2089 		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
2090 		wm_high.blank_time = line_time - wm_high.active_time;
2091 		wm_high.interlaced = false;
2092 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2093 			wm_high.interlaced = true;
2094 		wm_high.vsc = radeon_crtc->vsc;
2095 		wm_high.vtaps = 1;
2096 		if (radeon_crtc->rmx_type != RMX_OFF)
2097 			wm_high.vtaps = 2;
2098 		wm_high.bytes_per_pixel = 4; /* XXX: get this from fb config */
2099 		wm_high.lb_size = lb_size;
2100 		wm_high.dram_channels = dram_channels;
2101 		wm_high.num_heads = num_heads;
2102 
2103 		/* watermark for low clocks */
2104 		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
2105 			wm_low.yclk =
2106 				radeon_dpm_get_mclk(rdev, true) * 10;
2107 			wm_low.sclk =
2108 				radeon_dpm_get_sclk(rdev, true) * 10;
2109 		} else {
2110 			wm_low.yclk = rdev->pm.current_mclk * 10;
2111 			wm_low.sclk = rdev->pm.current_sclk * 10;
2112 		}
2113 
2114 		wm_low.disp_clk = mode->clock;
2115 		wm_low.src_width = mode->crtc_hdisplay;
2116 		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
2117 		wm_low.blank_time = line_time - wm_low.active_time;
2118 		wm_low.interlaced = false;
2119 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
2120 			wm_low.interlaced = true;
2121 		wm_low.vsc = radeon_crtc->vsc;
2122 		wm_low.vtaps = 1;
2123 		if (radeon_crtc->rmx_type != RMX_OFF)
2124 			wm_low.vtaps = 2;
2125 		wm_low.bytes_per_pixel = 4; /* XXX: get this from fb config */
2126 		wm_low.lb_size = lb_size;
2127 		wm_low.dram_channels = dram_channels;
2128 		wm_low.num_heads = num_heads;
2129 
2130 		/* set for high clocks */
2131 		latency_watermark_a = min(dce6_latency_watermark(&wm_high), (u32)65535);
2132 		/* set for low clocks */
2133 		latency_watermark_b = min(dce6_latency_watermark(&wm_low), (u32)65535);
2134 
2135 		/* possibly force display priority to high */
2136 		/* should really do this at mode validation time... */
2137 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_high) ||
2138 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_high) ||
2139 		    !dce6_check_latency_hiding(&wm_high) ||
2140 		    (rdev->disp_priority == 2)) {
2141 			DRM_DEBUG_KMS("force priority to high\n");
2142 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2143 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2144 		}
2145 		if (!dce6_average_bandwidth_vs_dram_bandwidth_for_display(&wm_low) ||
2146 		    !dce6_average_bandwidth_vs_available_bandwidth(&wm_low) ||
2147 		    !dce6_check_latency_hiding(&wm_low) ||
2148 		    (rdev->disp_priority == 2)) {
2149 			DRM_DEBUG_KMS("force priority to high\n");
2150 			priority_a_cnt |= PRIORITY_ALWAYS_ON;
2151 			priority_b_cnt |= PRIORITY_ALWAYS_ON;
2152 		}
2153 
2154 		a.full = dfixed_const(1000);
2155 		b.full = dfixed_const(mode->clock);
2156 		b.full = dfixed_div(b, a);
2157 		c.full = dfixed_const(latency_watermark_a);
2158 		c.full = dfixed_mul(c, b);
2159 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2160 		c.full = dfixed_div(c, a);
2161 		a.full = dfixed_const(16);
2162 		c.full = dfixed_div(c, a);
2163 		priority_a_mark = dfixed_trunc(c);
2164 		priority_a_cnt |= priority_a_mark & PRIORITY_MARK_MASK;
2165 
2166 		a.full = dfixed_const(1000);
2167 		b.full = dfixed_const(mode->clock);
2168 		b.full = dfixed_div(b, a);
2169 		c.full = dfixed_const(latency_watermark_b);
2170 		c.full = dfixed_mul(c, b);
2171 		c.full = dfixed_mul(c, radeon_crtc->hsc);
2172 		c.full = dfixed_div(c, a);
2173 		a.full = dfixed_const(16);
2174 		c.full = dfixed_div(c, a);
2175 		priority_b_mark = dfixed_trunc(c);
2176 		priority_b_cnt |= priority_b_mark & PRIORITY_MARK_MASK;
2177 	}
2178 
2179 	/* select wm A */
2180 	arb_control3 = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2181 	tmp = arb_control3;
2182 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2183 	tmp |= LATENCY_WATERMARK_MASK(1);
2184 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2185 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2186 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
2187 		LATENCY_HIGH_WATERMARK(line_time)));
2188 	/* select wm B */
2189 	tmp = RREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset);
2190 	tmp &= ~LATENCY_WATERMARK_MASK(3);
2191 	tmp |= LATENCY_WATERMARK_MASK(2);
2192 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, tmp);
2193 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
2194 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
2195 		LATENCY_HIGH_WATERMARK(line_time)));
2196 	/* restore original selection */
2197 	WREG32(DPG_PIPE_ARBITRATION_CONTROL3 + radeon_crtc->crtc_offset, arb_control3);
2198 
2199 	/* write the priority marks */
2200 	WREG32(PRIORITY_A_CNT + radeon_crtc->crtc_offset, priority_a_cnt);
2201 	WREG32(PRIORITY_B_CNT + radeon_crtc->crtc_offset, priority_b_cnt);
2202 
2203 	/* save values for DPM */
2204 	radeon_crtc->line_time = line_time;
2205 	radeon_crtc->wm_high = latency_watermark_a;
2206 	radeon_crtc->wm_low = latency_watermark_b;
2207 }
2208 
2209 void dce6_bandwidth_update(struct radeon_device *rdev)
2210 {
2211 	struct drm_display_mode *mode0 = NULL;
2212 	struct drm_display_mode *mode1 = NULL;
2213 	u32 num_heads = 0, lb_size;
2214 	int i;
2215 
2216 	radeon_update_display_priority(rdev);
2217 
2218 	for (i = 0; i < rdev->num_crtc; i++) {
2219 		if (rdev->mode_info.crtcs[i]->base.enabled)
2220 			num_heads++;
2221 	}
2222 	for (i = 0; i < rdev->num_crtc; i += 2) {
2223 		mode0 = &rdev->mode_info.crtcs[i]->base.mode;
2224 		mode1 = &rdev->mode_info.crtcs[i+1]->base.mode;
2225 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode0, mode1);
2226 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
2227 		lb_size = dce6_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i+1], mode1, mode0);
2228 		dce6_program_watermarks(rdev, rdev->mode_info.crtcs[i+1], lb_size, num_heads);
2229 	}
2230 }
2231 
2232 /*
2233  * Core functions
2234  */
2235 static void si_tiling_mode_table_init(struct radeon_device *rdev)
2236 {
2237 	const u32 num_tile_mode_states = 32;
2238 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
2239 
2240 	switch (rdev->config.si.mem_row_size_in_kb) {
2241 	case 1:
2242 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
2243 		break;
2244 	case 2:
2245 	default:
2246 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
2247 		break;
2248 	case 4:
2249 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
2250 		break;
2251 	}
2252 
2253 	if ((rdev->family == CHIP_TAHITI) ||
2254 	    (rdev->family == CHIP_PITCAIRN)) {
2255 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2256 			switch (reg_offset) {
2257 			case 0:  /* non-AA compressed depth or any compressed stencil */
2258 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2259 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2260 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2261 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2262 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2263 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2264 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2265 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2266 				break;
2267 			case 1:  /* 2xAA/4xAA compressed depth only */
2268 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2269 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2270 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2271 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2272 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2273 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2274 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2275 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2276 				break;
2277 			case 2:  /* 8xAA compressed depth only */
2278 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2279 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2280 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2281 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2282 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2283 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2284 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2285 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2286 				break;
2287 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2288 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2289 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2290 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2291 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2292 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2293 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2294 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2295 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2296 				break;
2297 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2298 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2299 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2300 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2301 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2302 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2303 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2304 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2305 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2306 				break;
2307 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2308 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2309 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2310 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2311 						 TILE_SPLIT(split_equal_to_row_size) |
2312 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2313 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2314 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2315 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2316 				break;
2317 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2318 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2319 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2320 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2321 						 TILE_SPLIT(split_equal_to_row_size) |
2322 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2323 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2324 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2325 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2326 				break;
2327 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2328 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2329 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2330 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2331 						 TILE_SPLIT(split_equal_to_row_size) |
2332 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2333 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2334 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2335 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2336 				break;
2337 			case 8:  /* 1D and 1D Array Surfaces */
2338 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2339 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2340 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2341 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2342 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2343 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2344 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2345 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2346 				break;
2347 			case 9:  /* Displayable maps. */
2348 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2349 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2350 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2351 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2352 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2353 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2354 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2355 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2356 				break;
2357 			case 10:  /* Display 8bpp. */
2358 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2359 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2360 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2361 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2362 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2363 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2364 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2365 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2366 				break;
2367 			case 11:  /* Display 16bpp. */
2368 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2369 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2370 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2371 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2372 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2373 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2374 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2375 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2376 				break;
2377 			case 12:  /* Display 32bpp. */
2378 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2379 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2380 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2381 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2382 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2383 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2384 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2385 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2386 				break;
2387 			case 13:  /* Thin. */
2388 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2389 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2390 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2391 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2392 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2393 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2394 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2395 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2396 				break;
2397 			case 14:  /* Thin 8 bpp. */
2398 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2399 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2400 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2401 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2402 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2403 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2404 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2405 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2406 				break;
2407 			case 15:  /* Thin 16 bpp. */
2408 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2409 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2410 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2411 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2412 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2413 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2414 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2415 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2416 				break;
2417 			case 16:  /* Thin 32 bpp. */
2418 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2419 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2420 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2421 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2422 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2423 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2424 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2425 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2426 				break;
2427 			case 17:  /* Thin 64 bpp. */
2428 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2429 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2430 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2431 						 TILE_SPLIT(split_equal_to_row_size) |
2432 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2433 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2434 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2435 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2436 				break;
2437 			case 21:  /* 8 bpp PRT. */
2438 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2439 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2440 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2441 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2442 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2443 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2444 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2445 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2446 				break;
2447 			case 22:  /* 16 bpp PRT */
2448 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2449 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2450 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2451 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2452 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2453 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2454 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2455 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2456 				break;
2457 			case 23:  /* 32 bpp PRT */
2458 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2459 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2460 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2461 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2462 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2463 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2464 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2465 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2466 				break;
2467 			case 24:  /* 64 bpp PRT */
2468 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2469 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2470 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2471 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2472 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2473 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2474 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2475 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2476 				break;
2477 			case 25:  /* 128 bpp PRT */
2478 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2479 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2480 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2481 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2482 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2483 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2484 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2485 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2486 				break;
2487 			default:
2488 				gb_tile_moden = 0;
2489 				break;
2490 			}
2491 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2492 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2493 		}
2494 	} else if ((rdev->family == CHIP_VERDE) ||
2495 		   (rdev->family == CHIP_OLAND) ||
2496 		   (rdev->family == CHIP_HAINAN)) {
2497 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
2498 			switch (reg_offset) {
2499 			case 0:  /* non-AA compressed depth or any compressed stencil */
2500 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2501 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2502 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2503 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2504 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2505 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2506 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2507 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2508 				break;
2509 			case 1:  /* 2xAA/4xAA compressed depth only */
2510 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2511 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2512 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2513 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2514 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2515 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2516 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2517 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2518 				break;
2519 			case 2:  /* 8xAA compressed depth only */
2520 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2521 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2522 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2523 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2524 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2525 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2526 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2527 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2528 				break;
2529 			case 3:  /* 2xAA/4xAA compressed depth with stencil (for depth buffer) */
2530 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2531 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2532 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2533 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
2534 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2535 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2536 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2537 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2538 				break;
2539 			case 4:  /* Maps w/ a dimension less than the 2D macro-tile dimensions (for mipmapped depth textures) */
2540 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2541 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2542 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2543 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2544 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2545 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2546 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2547 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2548 				break;
2549 			case 5:  /* Uncompressed 16bpp depth - and stencil buffer allocated with it */
2550 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2551 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2552 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2553 						 TILE_SPLIT(split_equal_to_row_size) |
2554 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2555 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2556 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2557 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2558 				break;
2559 			case 6:  /* Uncompressed 32bpp depth - and stencil buffer allocated with it */
2560 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2561 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2562 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2563 						 TILE_SPLIT(split_equal_to_row_size) |
2564 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2565 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2566 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2567 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2568 				break;
2569 			case 7:  /* Uncompressed 8bpp stencil without depth (drivers typically do not use) */
2570 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2571 						 MICRO_TILE_MODE(ADDR_SURF_DEPTH_MICRO_TILING) |
2572 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2573 						 TILE_SPLIT(split_equal_to_row_size) |
2574 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2575 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2576 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2577 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2578 				break;
2579 			case 8:  /* 1D and 1D Array Surfaces */
2580 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
2581 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2582 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2583 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2584 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2585 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2586 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2587 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2588 				break;
2589 			case 9:  /* Displayable maps. */
2590 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2591 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2592 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2593 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2594 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2595 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2596 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2597 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2598 				break;
2599 			case 10:  /* Display 8bpp. */
2600 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2601 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2602 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2603 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2604 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2605 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2606 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2607 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2608 				break;
2609 			case 11:  /* Display 16bpp. */
2610 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2611 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2612 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2613 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2614 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2615 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2616 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2617 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2618 				break;
2619 			case 12:  /* Display 32bpp. */
2620 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2621 						 MICRO_TILE_MODE(ADDR_SURF_DISPLAY_MICRO_TILING) |
2622 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2623 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2624 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2625 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2626 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2627 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2628 				break;
2629 			case 13:  /* Thin. */
2630 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
2631 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2632 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2633 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
2634 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2635 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2636 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2637 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2638 				break;
2639 			case 14:  /* Thin 8 bpp. */
2640 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2641 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2642 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2643 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2644 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2645 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2646 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2647 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2648 				break;
2649 			case 15:  /* Thin 16 bpp. */
2650 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2651 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2652 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2653 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2654 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2655 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2656 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2657 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2658 				break;
2659 			case 16:  /* Thin 32 bpp. */
2660 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2661 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2662 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2663 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2664 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2665 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2666 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2667 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2668 				break;
2669 			case 17:  /* Thin 64 bpp. */
2670 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2671 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2672 						 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
2673 						 TILE_SPLIT(split_equal_to_row_size) |
2674 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2675 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2676 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2677 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2678 				break;
2679 			case 21:  /* 8 bpp PRT. */
2680 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2681 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2682 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2683 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2684 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2685 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
2686 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2687 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2688 				break;
2689 			case 22:  /* 16 bpp PRT */
2690 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2691 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2692 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2693 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2694 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2695 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2696 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
2697 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4));
2698 				break;
2699 			case 23:  /* 32 bpp PRT */
2700 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2701 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2702 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2703 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
2704 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2705 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2706 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
2707 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2708 				break;
2709 			case 24:  /* 64 bpp PRT */
2710 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2711 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2712 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2713 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
2714 						 NUM_BANKS(ADDR_SURF_16_BANK) |
2715 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2716 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2717 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2));
2718 				break;
2719 			case 25:  /* 128 bpp PRT */
2720 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
2721 						 MICRO_TILE_MODE(ADDR_SURF_THIN_MICRO_TILING) |
2722 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
2723 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_1KB) |
2724 						 NUM_BANKS(ADDR_SURF_8_BANK) |
2725 						 BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
2726 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
2727 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1));
2728 				break;
2729 			default:
2730 				gb_tile_moden = 0;
2731 				break;
2732 			}
2733 			rdev->config.si.tile_mode_array[reg_offset] = gb_tile_moden;
2734 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
2735 		}
2736 	} else
2737 		DRM_ERROR("unknown asic: 0x%x\n", rdev->family);
2738 }
2739 
2740 static void si_select_se_sh(struct radeon_device *rdev,
2741 			    u32 se_num, u32 sh_num)
2742 {
2743 	u32 data = INSTANCE_BROADCAST_WRITES;
2744 
2745 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
2746 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
2747 	else if (se_num == 0xffffffff)
2748 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
2749 	else if (sh_num == 0xffffffff)
2750 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
2751 	else
2752 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
2753 	WREG32(GRBM_GFX_INDEX, data);
2754 }
2755 
2756 static u32 si_create_bitmask(u32 bit_width)
2757 {
2758 	u32 i, mask = 0;
2759 
2760 	for (i = 0; i < bit_width; i++) {
2761 		mask <<= 1;
2762 		mask |= 1;
2763 	}
2764 	return mask;
2765 }
2766 
2767 static u32 si_get_cu_enabled(struct radeon_device *rdev, u32 cu_per_sh)
2768 {
2769 	u32 data, mask;
2770 
2771 	data = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
2772 	if (data & 1)
2773 		data &= INACTIVE_CUS_MASK;
2774 	else
2775 		data = 0;
2776 	data |= RREG32(GC_USER_SHADER_ARRAY_CONFIG);
2777 
2778 	data >>= INACTIVE_CUS_SHIFT;
2779 
2780 	mask = si_create_bitmask(cu_per_sh);
2781 
2782 	return ~data & mask;
2783 }
2784 
2785 static void si_setup_spi(struct radeon_device *rdev,
2786 			 u32 se_num, u32 sh_per_se,
2787 			 u32 cu_per_sh)
2788 {
2789 	int i, j, k;
2790 	u32 data, mask, active_cu;
2791 
2792 	for (i = 0; i < se_num; i++) {
2793 		for (j = 0; j < sh_per_se; j++) {
2794 			si_select_se_sh(rdev, i, j);
2795 			data = RREG32(SPI_STATIC_THREAD_MGMT_3);
2796 			active_cu = si_get_cu_enabled(rdev, cu_per_sh);
2797 
2798 			mask = 1;
2799 			for (k = 0; k < 16; k++) {
2800 				mask <<= k;
2801 				if (active_cu & mask) {
2802 					data &= ~mask;
2803 					WREG32(SPI_STATIC_THREAD_MGMT_3, data);
2804 					break;
2805 				}
2806 			}
2807 		}
2808 	}
2809 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2810 }
2811 
2812 static u32 si_get_rb_disabled(struct radeon_device *rdev,
2813 			      u32 max_rb_num, u32 se_num,
2814 			      u32 sh_per_se)
2815 {
2816 	u32 data, mask;
2817 
2818 	data = RREG32(CC_RB_BACKEND_DISABLE);
2819 	if (data & 1)
2820 		data &= BACKEND_DISABLE_MASK;
2821 	else
2822 		data = 0;
2823 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
2824 
2825 	data >>= BACKEND_DISABLE_SHIFT;
2826 
2827 	mask = si_create_bitmask(max_rb_num / se_num / sh_per_se);
2828 
2829 	return data & mask;
2830 }
2831 
2832 static void si_setup_rb(struct radeon_device *rdev,
2833 			u32 se_num, u32 sh_per_se,
2834 			u32 max_rb_num)
2835 {
2836 	int i, j;
2837 	u32 data, mask;
2838 	u32 disabled_rbs = 0;
2839 	u32 enabled_rbs = 0;
2840 
2841 	for (i = 0; i < se_num; i++) {
2842 		for (j = 0; j < sh_per_se; j++) {
2843 			si_select_se_sh(rdev, i, j);
2844 			data = si_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
2845 			disabled_rbs |= data << ((i * sh_per_se + j) * TAHITI_RB_BITMAP_WIDTH_PER_SH);
2846 		}
2847 	}
2848 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2849 
2850 	mask = 1;
2851 	for (i = 0; i < max_rb_num; i++) {
2852 		if (!(disabled_rbs & mask))
2853 			enabled_rbs |= mask;
2854 		mask <<= 1;
2855 	}
2856 
2857 	for (i = 0; i < se_num; i++) {
2858 		si_select_se_sh(rdev, i, 0xffffffff);
2859 		data = 0;
2860 		for (j = 0; j < sh_per_se; j++) {
2861 			switch (enabled_rbs & 3) {
2862 			case 1:
2863 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
2864 				break;
2865 			case 2:
2866 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
2867 				break;
2868 			case 3:
2869 			default:
2870 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
2871 				break;
2872 			}
2873 			enabled_rbs >>= 2;
2874 		}
2875 		WREG32(PA_SC_RASTER_CONFIG, data);
2876 	}
2877 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
2878 }
2879 
2880 static void si_gpu_init(struct radeon_device *rdev)
2881 {
2882 	u32 gb_addr_config = 0;
2883 	u32 mc_shared_chmap, mc_arb_ramcfg;
2884 	u32 sx_debug_1;
2885 	u32 hdp_host_path_cntl;
2886 	u32 tmp;
2887 	int i, j;
2888 
2889 	switch (rdev->family) {
2890 	case CHIP_TAHITI:
2891 		rdev->config.si.max_shader_engines = 2;
2892 		rdev->config.si.max_tile_pipes = 12;
2893 		rdev->config.si.max_cu_per_sh = 8;
2894 		rdev->config.si.max_sh_per_se = 2;
2895 		rdev->config.si.max_backends_per_se = 4;
2896 		rdev->config.si.max_texture_channel_caches = 12;
2897 		rdev->config.si.max_gprs = 256;
2898 		rdev->config.si.max_gs_threads = 32;
2899 		rdev->config.si.max_hw_contexts = 8;
2900 
2901 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2902 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2903 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2904 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2905 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2906 		break;
2907 	case CHIP_PITCAIRN:
2908 		rdev->config.si.max_shader_engines = 2;
2909 		rdev->config.si.max_tile_pipes = 8;
2910 		rdev->config.si.max_cu_per_sh = 5;
2911 		rdev->config.si.max_sh_per_se = 2;
2912 		rdev->config.si.max_backends_per_se = 4;
2913 		rdev->config.si.max_texture_channel_caches = 8;
2914 		rdev->config.si.max_gprs = 256;
2915 		rdev->config.si.max_gs_threads = 32;
2916 		rdev->config.si.max_hw_contexts = 8;
2917 
2918 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2919 		rdev->config.si.sc_prim_fifo_size_backend = 0x100;
2920 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2921 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2922 		gb_addr_config = TAHITI_GB_ADDR_CONFIG_GOLDEN;
2923 		break;
2924 	case CHIP_VERDE:
2925 	default:
2926 		rdev->config.si.max_shader_engines = 1;
2927 		rdev->config.si.max_tile_pipes = 4;
2928 		rdev->config.si.max_cu_per_sh = 5;
2929 		rdev->config.si.max_sh_per_se = 2;
2930 		rdev->config.si.max_backends_per_se = 4;
2931 		rdev->config.si.max_texture_channel_caches = 4;
2932 		rdev->config.si.max_gprs = 256;
2933 		rdev->config.si.max_gs_threads = 32;
2934 		rdev->config.si.max_hw_contexts = 8;
2935 
2936 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2937 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2938 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2939 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2940 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2941 		break;
2942 	case CHIP_OLAND:
2943 		rdev->config.si.max_shader_engines = 1;
2944 		rdev->config.si.max_tile_pipes = 4;
2945 		rdev->config.si.max_cu_per_sh = 6;
2946 		rdev->config.si.max_sh_per_se = 1;
2947 		rdev->config.si.max_backends_per_se = 2;
2948 		rdev->config.si.max_texture_channel_caches = 4;
2949 		rdev->config.si.max_gprs = 256;
2950 		rdev->config.si.max_gs_threads = 16;
2951 		rdev->config.si.max_hw_contexts = 8;
2952 
2953 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2954 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2955 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2956 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2957 		gb_addr_config = VERDE_GB_ADDR_CONFIG_GOLDEN;
2958 		break;
2959 	case CHIP_HAINAN:
2960 		rdev->config.si.max_shader_engines = 1;
2961 		rdev->config.si.max_tile_pipes = 4;
2962 		rdev->config.si.max_cu_per_sh = 5;
2963 		rdev->config.si.max_sh_per_se = 1;
2964 		rdev->config.si.max_backends_per_se = 1;
2965 		rdev->config.si.max_texture_channel_caches = 2;
2966 		rdev->config.si.max_gprs = 256;
2967 		rdev->config.si.max_gs_threads = 16;
2968 		rdev->config.si.max_hw_contexts = 8;
2969 
2970 		rdev->config.si.sc_prim_fifo_size_frontend = 0x20;
2971 		rdev->config.si.sc_prim_fifo_size_backend = 0x40;
2972 		rdev->config.si.sc_hiz_tile_fifo_size = 0x30;
2973 		rdev->config.si.sc_earlyz_tile_fifo_size = 0x130;
2974 		gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
2975 		break;
2976 	}
2977 
2978 	/* Initialize HDP */
2979 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
2980 		WREG32((0x2c14 + j), 0x00000000);
2981 		WREG32((0x2c18 + j), 0x00000000);
2982 		WREG32((0x2c1c + j), 0x00000000);
2983 		WREG32((0x2c20 + j), 0x00000000);
2984 		WREG32((0x2c24 + j), 0x00000000);
2985 	}
2986 
2987 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
2988 
2989 	evergreen_fix_pci_max_read_req_size(rdev);
2990 
2991 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
2992 
2993 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
2994 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
2995 
2996 	rdev->config.si.num_tile_pipes = rdev->config.si.max_tile_pipes;
2997 	rdev->config.si.mem_max_burst_length_bytes = 256;
2998 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
2999 	rdev->config.si.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
3000 	if (rdev->config.si.mem_row_size_in_kb > 4)
3001 		rdev->config.si.mem_row_size_in_kb = 4;
3002 	/* XXX use MC settings? */
3003 	rdev->config.si.shader_engine_tile_size = 32;
3004 	rdev->config.si.num_gpus = 1;
3005 	rdev->config.si.multi_gpu_tile_size = 64;
3006 
3007 	/* fix up row size */
3008 	gb_addr_config &= ~ROW_SIZE_MASK;
3009 	switch (rdev->config.si.mem_row_size_in_kb) {
3010 	case 1:
3011 	default:
3012 		gb_addr_config |= ROW_SIZE(0);
3013 		break;
3014 	case 2:
3015 		gb_addr_config |= ROW_SIZE(1);
3016 		break;
3017 	case 4:
3018 		gb_addr_config |= ROW_SIZE(2);
3019 		break;
3020 	}
3021 
3022 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
3023 	 * not have bank info, so create a custom tiling dword.
3024 	 * bits 3:0   num_pipes
3025 	 * bits 7:4   num_banks
3026 	 * bits 11:8  group_size
3027 	 * bits 15:12 row_size
3028 	 */
3029 	rdev->config.si.tile_config = 0;
3030 	switch (rdev->config.si.num_tile_pipes) {
3031 	case 1:
3032 		rdev->config.si.tile_config |= (0 << 0);
3033 		break;
3034 	case 2:
3035 		rdev->config.si.tile_config |= (1 << 0);
3036 		break;
3037 	case 4:
3038 		rdev->config.si.tile_config |= (2 << 0);
3039 		break;
3040 	case 8:
3041 	default:
3042 		/* XXX what about 12? */
3043 		rdev->config.si.tile_config |= (3 << 0);
3044 		break;
3045 	}
3046 	switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
3047 	case 0: /* four banks */
3048 		rdev->config.si.tile_config |= 0 << 4;
3049 		break;
3050 	case 1: /* eight banks */
3051 		rdev->config.si.tile_config |= 1 << 4;
3052 		break;
3053 	case 2: /* sixteen banks */
3054 	default:
3055 		rdev->config.si.tile_config |= 2 << 4;
3056 		break;
3057 	}
3058 	rdev->config.si.tile_config |=
3059 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
3060 	rdev->config.si.tile_config |=
3061 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
3062 
3063 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
3064 	WREG32(DMIF_ADDR_CONFIG, gb_addr_config);
3065 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
3066 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
3067 	WREG32(DMA_TILING_CONFIG + DMA0_REGISTER_OFFSET, gb_addr_config);
3068 	WREG32(DMA_TILING_CONFIG + DMA1_REGISTER_OFFSET, gb_addr_config);
3069 	if (rdev->has_uvd) {
3070 		WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
3071 		WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
3072 		WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
3073 	}
3074 
3075 	si_tiling_mode_table_init(rdev);
3076 
3077 	si_setup_rb(rdev, rdev->config.si.max_shader_engines,
3078 		    rdev->config.si.max_sh_per_se,
3079 		    rdev->config.si.max_backends_per_se);
3080 
3081 	si_setup_spi(rdev, rdev->config.si.max_shader_engines,
3082 		     rdev->config.si.max_sh_per_se,
3083 		     rdev->config.si.max_cu_per_sh);
3084 
3085 
3086 	/* set HW defaults for 3D engine */
3087 	WREG32(CP_QUEUE_THRESHOLDS, (ROQ_IB1_START(0x16) |
3088 				     ROQ_IB2_START(0x2b)));
3089 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
3090 
3091 	sx_debug_1 = RREG32(SX_DEBUG_1);
3092 	WREG32(SX_DEBUG_1, sx_debug_1);
3093 
3094 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
3095 
3096 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_frontend) |
3097 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.si.sc_prim_fifo_size_backend) |
3098 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.si.sc_hiz_tile_fifo_size) |
3099 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.si.sc_earlyz_tile_fifo_size)));
3100 
3101 	WREG32(VGT_NUM_INSTANCES, 1);
3102 
3103 	WREG32(CP_PERFMON_CNTL, 0);
3104 
3105 	WREG32(SQ_CONFIG, 0);
3106 
3107 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
3108 					  FORCE_EOV_MAX_REZ_CNT(255)));
3109 
3110 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
3111 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
3112 
3113 	WREG32(VGT_GS_VERTEX_REUSE, 16);
3114 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
3115 
3116 	WREG32(CB_PERFCOUNTER0_SELECT0, 0);
3117 	WREG32(CB_PERFCOUNTER0_SELECT1, 0);
3118 	WREG32(CB_PERFCOUNTER1_SELECT0, 0);
3119 	WREG32(CB_PERFCOUNTER1_SELECT1, 0);
3120 	WREG32(CB_PERFCOUNTER2_SELECT0, 0);
3121 	WREG32(CB_PERFCOUNTER2_SELECT1, 0);
3122 	WREG32(CB_PERFCOUNTER3_SELECT0, 0);
3123 	WREG32(CB_PERFCOUNTER3_SELECT1, 0);
3124 
3125 	tmp = RREG32(HDP_MISC_CNTL);
3126 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
3127 	WREG32(HDP_MISC_CNTL, tmp);
3128 
3129 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
3130 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
3131 
3132 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
3133 
3134 	udelay(50);
3135 }
3136 
3137 /*
3138  * GPU scratch registers helpers function.
3139  */
3140 static void si_scratch_init(struct radeon_device *rdev)
3141 {
3142 	int i;
3143 
3144 	rdev->scratch.num_reg = 7;
3145 	rdev->scratch.reg_base = SCRATCH_REG0;
3146 	for (i = 0; i < rdev->scratch.num_reg; i++) {
3147 		rdev->scratch.free[i] = true;
3148 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
3149 	}
3150 }
3151 
3152 void si_fence_ring_emit(struct radeon_device *rdev,
3153 			struct radeon_fence *fence)
3154 {
3155 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3156 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3157 
3158 	/* flush read cache over gart */
3159 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3160 	radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3161 	radeon_ring_write(ring, 0);
3162 	radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3163 	radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3164 			  PACKET3_TC_ACTION_ENA |
3165 			  PACKET3_SH_KCACHE_ACTION_ENA |
3166 			  PACKET3_SH_ICACHE_ACTION_ENA);
3167 	radeon_ring_write(ring, 0xFFFFFFFF);
3168 	radeon_ring_write(ring, 0);
3169 	radeon_ring_write(ring, 10); /* poll interval */
3170 	/* EVENT_WRITE_EOP - flush caches, send int */
3171 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
3172 	radeon_ring_write(ring, EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5));
3173 	radeon_ring_write(ring, addr & 0xffffffff);
3174 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xff) | DATA_SEL(1) | INT_SEL(2));
3175 	radeon_ring_write(ring, fence->seq);
3176 	radeon_ring_write(ring, 0);
3177 }
3178 
3179 /*
3180  * IB stuff
3181  */
3182 void si_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
3183 {
3184 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3185 	u32 header;
3186 
3187 	if (ib->is_const_ib) {
3188 		/* set switch buffer packet before const IB */
3189 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
3190 		radeon_ring_write(ring, 0);
3191 
3192 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
3193 	} else {
3194 		u32 next_rptr;
3195 		if (ring->rptr_save_reg) {
3196 			next_rptr = ring->wptr + 3 + 4 + 8;
3197 			radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3198 			radeon_ring_write(ring, ((ring->rptr_save_reg -
3199 						  PACKET3_SET_CONFIG_REG_START) >> 2));
3200 			radeon_ring_write(ring, next_rptr);
3201 		} else if (rdev->wb.enabled) {
3202 			next_rptr = ring->wptr + 5 + 4 + 8;
3203 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
3204 			radeon_ring_write(ring, (1 << 8));
3205 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3206 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3207 			radeon_ring_write(ring, next_rptr);
3208 		}
3209 
3210 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
3211 	}
3212 
3213 	radeon_ring_write(ring, header);
3214 	radeon_ring_write(ring,
3215 #ifdef __BIG_ENDIAN
3216 			  (2 << 0) |
3217 #endif
3218 			  (ib->gpu_addr & 0xFFFFFFFC));
3219 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
3220 	radeon_ring_write(ring, ib->length_dw |
3221 			  (ib->vm ? (ib->vm->id << 24) : 0));
3222 
3223 	if (!ib->is_const_ib) {
3224 		/* flush read cache over gart for this vmid */
3225 		radeon_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
3226 		radeon_ring_write(ring, (CP_COHER_CNTL2 - PACKET3_SET_CONFIG_REG_START) >> 2);
3227 		radeon_ring_write(ring, ib->vm ? ib->vm->id : 0);
3228 		radeon_ring_write(ring, PACKET3(PACKET3_SURFACE_SYNC, 3));
3229 		radeon_ring_write(ring, PACKET3_TCL1_ACTION_ENA |
3230 				  PACKET3_TC_ACTION_ENA |
3231 				  PACKET3_SH_KCACHE_ACTION_ENA |
3232 				  PACKET3_SH_ICACHE_ACTION_ENA);
3233 		radeon_ring_write(ring, 0xFFFFFFFF);
3234 		radeon_ring_write(ring, 0);
3235 		radeon_ring_write(ring, 10); /* poll interval */
3236 	}
3237 }
3238 
3239 /*
3240  * CP.
3241  */
3242 static void si_cp_enable(struct radeon_device *rdev, bool enable)
3243 {
3244 	if (enable)
3245 		WREG32(CP_ME_CNTL, 0);
3246 	else {
3247 		radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3248 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
3249 		WREG32(SCRATCH_UMSK, 0);
3250 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3251 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3252 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3253 	}
3254 	udelay(50);
3255 }
3256 
3257 static int si_cp_load_microcode(struct radeon_device *rdev)
3258 {
3259 	const __be32 *fw_data;
3260 	int i;
3261 
3262 	if (!rdev->me_fw || !rdev->pfp_fw)
3263 		return -EINVAL;
3264 
3265 	si_cp_enable(rdev, false);
3266 
3267 	/* PFP */
3268 	fw_data = (const __be32 *)rdev->pfp_fw->data;
3269 	WREG32(CP_PFP_UCODE_ADDR, 0);
3270 	for (i = 0; i < SI_PFP_UCODE_SIZE; i++)
3271 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
3272 	WREG32(CP_PFP_UCODE_ADDR, 0);
3273 
3274 	/* CE */
3275 	fw_data = (const __be32 *)rdev->ce_fw->data;
3276 	WREG32(CP_CE_UCODE_ADDR, 0);
3277 	for (i = 0; i < SI_CE_UCODE_SIZE; i++)
3278 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
3279 	WREG32(CP_CE_UCODE_ADDR, 0);
3280 
3281 	/* ME */
3282 	fw_data = (const __be32 *)rdev->me_fw->data;
3283 	WREG32(CP_ME_RAM_WADDR, 0);
3284 	for (i = 0; i < SI_PM4_UCODE_SIZE; i++)
3285 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
3286 	WREG32(CP_ME_RAM_WADDR, 0);
3287 
3288 	WREG32(CP_PFP_UCODE_ADDR, 0);
3289 	WREG32(CP_CE_UCODE_ADDR, 0);
3290 	WREG32(CP_ME_RAM_WADDR, 0);
3291 	WREG32(CP_ME_RAM_RADDR, 0);
3292 	return 0;
3293 }
3294 
3295 static int si_cp_start(struct radeon_device *rdev)
3296 {
3297 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3298 	int r, i;
3299 
3300 	r = radeon_ring_lock(rdev, ring, 7 + 4);
3301 	if (r) {
3302 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3303 		return r;
3304 	}
3305 	/* init the CP */
3306 	radeon_ring_write(ring, PACKET3(PACKET3_ME_INITIALIZE, 5));
3307 	radeon_ring_write(ring, 0x1);
3308 	radeon_ring_write(ring, 0x0);
3309 	radeon_ring_write(ring, rdev->config.si.max_hw_contexts - 1);
3310 	radeon_ring_write(ring, PACKET3_ME_INITIALIZE_DEVICE_ID(1));
3311 	radeon_ring_write(ring, 0);
3312 	radeon_ring_write(ring, 0);
3313 
3314 	/* init the CE partitions */
3315 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
3316 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
3317 	radeon_ring_write(ring, 0xc000);
3318 	radeon_ring_write(ring, 0xe000);
3319 	radeon_ring_unlock_commit(rdev, ring);
3320 
3321 	si_cp_enable(rdev, true);
3322 
3323 	r = radeon_ring_lock(rdev, ring, si_default_size + 10);
3324 	if (r) {
3325 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
3326 		return r;
3327 	}
3328 
3329 	/* setup clear context state */
3330 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3331 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
3332 
3333 	for (i = 0; i < si_default_size; i++)
3334 		radeon_ring_write(ring, si_default_state[i]);
3335 
3336 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
3337 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
3338 
3339 	/* set clear context state */
3340 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
3341 	radeon_ring_write(ring, 0);
3342 
3343 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
3344 	radeon_ring_write(ring, 0x00000316);
3345 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
3346 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
3347 
3348 	radeon_ring_unlock_commit(rdev, ring);
3349 
3350 	for (i = RADEON_RING_TYPE_GFX_INDEX; i <= CAYMAN_RING_TYPE_CP2_INDEX; ++i) {
3351 		ring = &rdev->ring[i];
3352 		r = radeon_ring_lock(rdev, ring, 2);
3353 
3354 		/* clear the compute context state */
3355 		radeon_ring_write(ring, PACKET3_COMPUTE(PACKET3_CLEAR_STATE, 0));
3356 		radeon_ring_write(ring, 0);
3357 
3358 		radeon_ring_unlock_commit(rdev, ring);
3359 	}
3360 
3361 	return 0;
3362 }
3363 
3364 static void si_cp_fini(struct radeon_device *rdev)
3365 {
3366 	struct radeon_ring *ring;
3367 	si_cp_enable(rdev, false);
3368 
3369 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3370 	radeon_ring_fini(rdev, ring);
3371 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3372 
3373 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3374 	radeon_ring_fini(rdev, ring);
3375 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3376 
3377 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3378 	radeon_ring_fini(rdev, ring);
3379 	radeon_scratch_free(rdev, ring->rptr_save_reg);
3380 }
3381 
3382 static int si_cp_resume(struct radeon_device *rdev)
3383 {
3384 	struct radeon_ring *ring;
3385 	u32 tmp;
3386 	u32 rb_bufsz;
3387 	int r;
3388 
3389 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
3390 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
3391 
3392 	/* Set the write pointer delay */
3393 	WREG32(CP_RB_WPTR_DELAY, 0);
3394 
3395 	WREG32(CP_DEBUG, 0);
3396 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
3397 
3398 	/* ring 0 - compute and gfx */
3399 	/* Set ring buffer size */
3400 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
3401 	rb_bufsz = order_base_2(ring->ring_size / 8);
3402 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3403 #ifdef __BIG_ENDIAN
3404 	tmp |= BUF_SWAP_32BIT;
3405 #endif
3406 	WREG32(CP_RB0_CNTL, tmp);
3407 
3408 	/* Initialize the ring buffer's read and write pointers */
3409 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
3410 	ring->wptr = 0;
3411 	WREG32(CP_RB0_WPTR, ring->wptr);
3412 
3413 	/* set the wb address whether it's enabled or not */
3414 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
3415 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
3416 
3417 	if (rdev->wb.enabled)
3418 		WREG32(SCRATCH_UMSK, 0xff);
3419 	else {
3420 		tmp |= RB_NO_UPDATE;
3421 		WREG32(SCRATCH_UMSK, 0);
3422 	}
3423 
3424 	mdelay(1);
3425 	WREG32(CP_RB0_CNTL, tmp);
3426 
3427 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);
3428 
3429 	ring->rptr = RREG32(CP_RB0_RPTR);
3430 
3431 	/* ring1  - compute only */
3432 	/* Set ring buffer size */
3433 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
3434 	rb_bufsz = order_base_2(ring->ring_size / 8);
3435 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3436 #ifdef __BIG_ENDIAN
3437 	tmp |= BUF_SWAP_32BIT;
3438 #endif
3439 	WREG32(CP_RB1_CNTL, tmp);
3440 
3441 	/* Initialize the ring buffer's read and write pointers */
3442 	WREG32(CP_RB1_CNTL, tmp | RB_RPTR_WR_ENA);
3443 	ring->wptr = 0;
3444 	WREG32(CP_RB1_WPTR, ring->wptr);
3445 
3446 	/* set the wb address whether it's enabled or not */
3447 	WREG32(CP_RB1_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFFFFFFFC);
3448 	WREG32(CP_RB1_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET) & 0xFF);
3449 
3450 	mdelay(1);
3451 	WREG32(CP_RB1_CNTL, tmp);
3452 
3453 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);
3454 
3455 	ring->rptr = RREG32(CP_RB1_RPTR);
3456 
3457 	/* ring2 - compute only */
3458 	/* Set ring buffer size */
3459 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
3460 	rb_bufsz = order_base_2(ring->ring_size / 8);
3461 	tmp = (order_base_2(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
3462 #ifdef __BIG_ENDIAN
3463 	tmp |= BUF_SWAP_32BIT;
3464 #endif
3465 	WREG32(CP_RB2_CNTL, tmp);
3466 
3467 	/* Initialize the ring buffer's read and write pointers */
3468 	WREG32(CP_RB2_CNTL, tmp | RB_RPTR_WR_ENA);
3469 	ring->wptr = 0;
3470 	WREG32(CP_RB2_WPTR, ring->wptr);
3471 
3472 	/* set the wb address whether it's enabled or not */
3473 	WREG32(CP_RB2_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFFFFFFFC);
3474 	WREG32(CP_RB2_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET) & 0xFF);
3475 
3476 	mdelay(1);
3477 	WREG32(CP_RB2_CNTL, tmp);
3478 
3479 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);
3480 
3481 	ring->rptr = RREG32(CP_RB2_RPTR);
3482 
3483 	/* start the rings */
3484 	si_cp_start(rdev);
3485 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
3486 	rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = true;
3487 	rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = true;
3488 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
3489 	if (r) {
3490 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
3491 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3492 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3493 		return r;
3494 	}
3495 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP1_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX]);
3496 	if (r) {
3497 		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
3498 	}
3499 	r = radeon_ring_test(rdev, CAYMAN_RING_TYPE_CP2_INDEX, &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX]);
3500 	if (r) {
3501 		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
3502 	}
3503 
3504 	return 0;
3505 }
3506 
3507 u32 si_gpu_check_soft_reset(struct radeon_device *rdev)
3508 {
3509 	u32 reset_mask = 0;
3510 	u32 tmp;
3511 
3512 	/* GRBM_STATUS */
3513 	tmp = RREG32(GRBM_STATUS);
3514 	if (tmp & (PA_BUSY | SC_BUSY |
3515 		   BCI_BUSY | SX_BUSY |
3516 		   TA_BUSY | VGT_BUSY |
3517 		   DB_BUSY | CB_BUSY |
3518 		   GDS_BUSY | SPI_BUSY |
3519 		   IA_BUSY | IA_BUSY_NO_DMA))
3520 		reset_mask |= RADEON_RESET_GFX;
3521 
3522 	if (tmp & (CF_RQ_PENDING | PF_RQ_PENDING |
3523 		   CP_BUSY | CP_COHERENCY_BUSY))
3524 		reset_mask |= RADEON_RESET_CP;
3525 
3526 	if (tmp & GRBM_EE_BUSY)
3527 		reset_mask |= RADEON_RESET_GRBM | RADEON_RESET_GFX | RADEON_RESET_CP;
3528 
3529 	/* GRBM_STATUS2 */
3530 	tmp = RREG32(GRBM_STATUS2);
3531 	if (tmp & (RLC_RQ_PENDING | RLC_BUSY))
3532 		reset_mask |= RADEON_RESET_RLC;
3533 
3534 	/* DMA_STATUS_REG 0 */
3535 	tmp = RREG32(DMA_STATUS_REG + DMA0_REGISTER_OFFSET);
3536 	if (!(tmp & DMA_IDLE))
3537 		reset_mask |= RADEON_RESET_DMA;
3538 
3539 	/* DMA_STATUS_REG 1 */
3540 	tmp = RREG32(DMA_STATUS_REG + DMA1_REGISTER_OFFSET);
3541 	if (!(tmp & DMA_IDLE))
3542 		reset_mask |= RADEON_RESET_DMA1;
3543 
3544 	/* SRBM_STATUS2 */
3545 	tmp = RREG32(SRBM_STATUS2);
3546 	if (tmp & DMA_BUSY)
3547 		reset_mask |= RADEON_RESET_DMA;
3548 
3549 	if (tmp & DMA1_BUSY)
3550 		reset_mask |= RADEON_RESET_DMA1;
3551 
3552 	/* SRBM_STATUS */
3553 	tmp = RREG32(SRBM_STATUS);
3554 
3555 	if (tmp & IH_BUSY)
3556 		reset_mask |= RADEON_RESET_IH;
3557 
3558 	if (tmp & SEM_BUSY)
3559 		reset_mask |= RADEON_RESET_SEM;
3560 
3561 	if (tmp & GRBM_RQ_PENDING)
3562 		reset_mask |= RADEON_RESET_GRBM;
3563 
3564 	if (tmp & VMC_BUSY)
3565 		reset_mask |= RADEON_RESET_VMC;
3566 
3567 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3568 		   MCC_BUSY | MCD_BUSY))
3569 		reset_mask |= RADEON_RESET_MC;
3570 
3571 	if (evergreen_is_display_hung(rdev))
3572 		reset_mask |= RADEON_RESET_DISPLAY;
3573 
3574 	/* VM_L2_STATUS */
3575 	tmp = RREG32(VM_L2_STATUS);
3576 	if (tmp & L2_BUSY)
3577 		reset_mask |= RADEON_RESET_VMC;
3578 
3579 	/* Skip MC reset as it's mostly likely not hung, just busy */
3580 	if (reset_mask & RADEON_RESET_MC) {
3581 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3582 		reset_mask &= ~RADEON_RESET_MC;
3583 	}
3584 
3585 	return reset_mask;
3586 }
3587 
3588 static void si_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3589 {
3590 	struct evergreen_mc_save save;
3591 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3592 	u32 tmp;
3593 
3594 	if (reset_mask == 0)
3595 		return;
3596 
3597 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3598 
3599 	evergreen_print_gpu_status_regs(rdev);
3600 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3601 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3602 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3603 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3604 
3605 	/* Disable CP parsing/prefetching */
3606 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3607 
3608 	if (reset_mask & RADEON_RESET_DMA) {
3609 		/* dma0 */
3610 		tmp = RREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET);
3611 		tmp &= ~DMA_RB_ENABLE;
3612 		WREG32(DMA_RB_CNTL + DMA0_REGISTER_OFFSET, tmp);
3613 	}
3614 	if (reset_mask & RADEON_RESET_DMA1) {
3615 		/* dma1 */
3616 		tmp = RREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET);
3617 		tmp &= ~DMA_RB_ENABLE;
3618 		WREG32(DMA_RB_CNTL + DMA1_REGISTER_OFFSET, tmp);
3619 	}
3620 
3621 	udelay(50);
3622 
3623 	evergreen_mc_stop(rdev, &save);
3624 	if (evergreen_mc_wait_for_idle(rdev)) {
3625 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3626 	}
3627 
3628 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP)) {
3629 		grbm_soft_reset = SOFT_RESET_CB |
3630 			SOFT_RESET_DB |
3631 			SOFT_RESET_GDS |
3632 			SOFT_RESET_PA |
3633 			SOFT_RESET_SC |
3634 			SOFT_RESET_BCI |
3635 			SOFT_RESET_SPI |
3636 			SOFT_RESET_SX |
3637 			SOFT_RESET_TC |
3638 			SOFT_RESET_TA |
3639 			SOFT_RESET_VGT |
3640 			SOFT_RESET_IA;
3641 	}
3642 
3643 	if (reset_mask & RADEON_RESET_CP) {
3644 		grbm_soft_reset |= SOFT_RESET_CP | SOFT_RESET_VGT;
3645 
3646 		srbm_soft_reset |= SOFT_RESET_GRBM;
3647 	}
3648 
3649 	if (reset_mask & RADEON_RESET_DMA)
3650 		srbm_soft_reset |= SOFT_RESET_DMA;
3651 
3652 	if (reset_mask & RADEON_RESET_DMA1)
3653 		srbm_soft_reset |= SOFT_RESET_DMA1;
3654 
3655 	if (reset_mask & RADEON_RESET_DISPLAY)
3656 		srbm_soft_reset |= SOFT_RESET_DC;
3657 
3658 	if (reset_mask & RADEON_RESET_RLC)
3659 		grbm_soft_reset |= SOFT_RESET_RLC;
3660 
3661 	if (reset_mask & RADEON_RESET_SEM)
3662 		srbm_soft_reset |= SOFT_RESET_SEM;
3663 
3664 	if (reset_mask & RADEON_RESET_IH)
3665 		srbm_soft_reset |= SOFT_RESET_IH;
3666 
3667 	if (reset_mask & RADEON_RESET_GRBM)
3668 		srbm_soft_reset |= SOFT_RESET_GRBM;
3669 
3670 	if (reset_mask & RADEON_RESET_VMC)
3671 		srbm_soft_reset |= SOFT_RESET_VMC;
3672 
3673 	if (reset_mask & RADEON_RESET_MC)
3674 		srbm_soft_reset |= SOFT_RESET_MC;
3675 
3676 	if (grbm_soft_reset) {
3677 		tmp = RREG32(GRBM_SOFT_RESET);
3678 		tmp |= grbm_soft_reset;
3679 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3680 		WREG32(GRBM_SOFT_RESET, tmp);
3681 		tmp = RREG32(GRBM_SOFT_RESET);
3682 
3683 		udelay(50);
3684 
3685 		tmp &= ~grbm_soft_reset;
3686 		WREG32(GRBM_SOFT_RESET, tmp);
3687 		tmp = RREG32(GRBM_SOFT_RESET);
3688 	}
3689 
3690 	if (srbm_soft_reset) {
3691 		tmp = RREG32(SRBM_SOFT_RESET);
3692 		tmp |= srbm_soft_reset;
3693 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3694 		WREG32(SRBM_SOFT_RESET, tmp);
3695 		tmp = RREG32(SRBM_SOFT_RESET);
3696 
3697 		udelay(50);
3698 
3699 		tmp &= ~srbm_soft_reset;
3700 		WREG32(SRBM_SOFT_RESET, tmp);
3701 		tmp = RREG32(SRBM_SOFT_RESET);
3702 	}
3703 
3704 	/* Wait a little for things to settle down */
3705 	udelay(50);
3706 
3707 	evergreen_mc_resume(rdev, &save);
3708 	udelay(50);
3709 
3710 	evergreen_print_gpu_status_regs(rdev);
3711 }
3712 
3713 int si_asic_reset(struct radeon_device *rdev)
3714 {
3715 	u32 reset_mask;
3716 
3717 	reset_mask = si_gpu_check_soft_reset(rdev);
3718 
3719 	if (reset_mask)
3720 		r600_set_bios_scratch_engine_hung(rdev, true);
3721 
3722 	si_gpu_soft_reset(rdev, reset_mask);
3723 
3724 	reset_mask = si_gpu_check_soft_reset(rdev);
3725 
3726 	if (!reset_mask)
3727 		r600_set_bios_scratch_engine_hung(rdev, false);
3728 
3729 	return 0;
3730 }
3731 
3732 /**
3733  * si_gfx_is_lockup - Check if the GFX engine is locked up
3734  *
3735  * @rdev: radeon_device pointer
3736  * @ring: radeon_ring structure holding ring information
3737  *
3738  * Check if the GFX engine is locked up.
3739  * Returns true if the engine appears to be locked up, false if not.
3740  */
3741 bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
3742 {
3743 	u32 reset_mask = si_gpu_check_soft_reset(rdev);
3744 
3745 	if (!(reset_mask & (RADEON_RESET_GFX |
3746 			    RADEON_RESET_COMPUTE |
3747 			    RADEON_RESET_CP))) {
3748 		radeon_ring_lockup_update(ring);
3749 		return false;
3750 	}
3751 	/* force CP activities */
3752 	radeon_ring_force_activity(rdev, ring);
3753 	return radeon_ring_test_lockup(rdev, ring);
3754 }
3755 
3756 /* MC */
3757 static void si_mc_program(struct radeon_device *rdev)
3758 {
3759 	struct evergreen_mc_save save;
3760 	u32 tmp;
3761 	int i, j;
3762 
3763 	/* Initialize HDP */
3764 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
3765 		WREG32((0x2c14 + j), 0x00000000);
3766 		WREG32((0x2c18 + j), 0x00000000);
3767 		WREG32((0x2c1c + j), 0x00000000);
3768 		WREG32((0x2c20 + j), 0x00000000);
3769 		WREG32((0x2c24 + j), 0x00000000);
3770 	}
3771 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
3772 
3773 	evergreen_mc_stop(rdev, &save);
3774 	if (radeon_mc_wait_for_idle(rdev)) {
3775 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3776 	}
3777 	if (!ASIC_IS_NODCE(rdev))
3778 		/* Lockout access through VGA aperture*/
3779 		WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
3780 	/* Update configuration */
3781 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
3782 	       rdev->mc.vram_start >> 12);
3783 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
3784 	       rdev->mc.vram_end >> 12);
3785 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
3786 	       rdev->vram_scratch.gpu_addr >> 12);
3787 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
3788 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
3789 	WREG32(MC_VM_FB_LOCATION, tmp);
3790 	/* XXX double check these! */
3791 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
3792 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
3793 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
3794 	WREG32(MC_VM_AGP_BASE, 0);
3795 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
3796 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
3797 	if (radeon_mc_wait_for_idle(rdev)) {
3798 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3799 	}
3800 	evergreen_mc_resume(rdev, &save);
3801 	if (!ASIC_IS_NODCE(rdev)) {
3802 		/* we need to own VRAM, so turn off the VGA renderer here
3803 		 * to stop it overwriting our objects */
3804 		rv515_vga_render_disable(rdev);
3805 	}
3806 }
3807 
3808 void si_vram_gtt_location(struct radeon_device *rdev,
3809 			  struct radeon_mc *mc)
3810 {
3811 	if (mc->mc_vram_size > 0xFFC0000000ULL) {
3812 		/* leave room for at least 1024M GTT */
3813 		dev_warn(rdev->dev, "limiting VRAM\n");
3814 		mc->real_vram_size = 0xFFC0000000ULL;
3815 		mc->mc_vram_size = 0xFFC0000000ULL;
3816 	}
3817 	radeon_vram_location(rdev, &rdev->mc, 0);
3818 	rdev->mc.gtt_base_align = 0;
3819 	radeon_gtt_location(rdev, mc);
3820 }
3821 
3822 static int si_mc_init(struct radeon_device *rdev)
3823 {
3824 	u32 tmp;
3825 	int chansize, numchan;
3826 
3827 	/* Get VRAM informations */
3828 	rdev->mc.vram_is_ddr = true;
3829 	tmp = RREG32(MC_ARB_RAMCFG);
3830 	if (tmp & CHANSIZE_OVERRIDE) {
3831 		chansize = 16;
3832 	} else if (tmp & CHANSIZE_MASK) {
3833 		chansize = 64;
3834 	} else {
3835 		chansize = 32;
3836 	}
3837 	tmp = RREG32(MC_SHARED_CHMAP);
3838 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
3839 	case 0:
3840 	default:
3841 		numchan = 1;
3842 		break;
3843 	case 1:
3844 		numchan = 2;
3845 		break;
3846 	case 2:
3847 		numchan = 4;
3848 		break;
3849 	case 3:
3850 		numchan = 8;
3851 		break;
3852 	case 4:
3853 		numchan = 3;
3854 		break;
3855 	case 5:
3856 		numchan = 6;
3857 		break;
3858 	case 6:
3859 		numchan = 10;
3860 		break;
3861 	case 7:
3862 		numchan = 12;
3863 		break;
3864 	case 8:
3865 		numchan = 16;
3866 		break;
3867 	}
3868 	rdev->mc.vram_width = numchan * chansize;
3869 	/* Could aper size report 0 ? */
3870 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
3871 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
3872 	/* size in MB on si */
3873 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3874 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024ULL * 1024ULL;
3875 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
3876 	si_vram_gtt_location(rdev, &rdev->mc);
3877 	radeon_update_bandwidth_info(rdev);
3878 
3879 	return 0;
3880 }
3881 
3882 /*
3883  * GART
3884  */
3885 void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
3886 {
3887 	/* flush hdp cache */
3888 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
3889 
3890 	/* bits 0-15 are the VM contexts0-15 */
3891 	WREG32(VM_INVALIDATE_REQUEST, 1);
3892 }
3893 
3894 static int si_pcie_gart_enable(struct radeon_device *rdev)
3895 {
3896 	int r, i;
3897 
3898 	if (rdev->gart.robj == NULL) {
3899 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
3900 		return -EINVAL;
3901 	}
3902 	r = radeon_gart_table_vram_pin(rdev);
3903 	if (r)
3904 		return r;
3905 	radeon_gart_restore(rdev);
3906 	/* Setup TLB control */
3907 	WREG32(MC_VM_MX_L1_TLB_CNTL,
3908 	       (0xA << 7) |
3909 	       ENABLE_L1_TLB |
3910 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3911 	       ENABLE_ADVANCED_DRIVER_MODEL |
3912 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3913 	/* Setup L2 cache */
3914 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
3915 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3916 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3917 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3918 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3919 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
3920 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3921 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3922 	/* setup context0 */
3923 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
3924 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
3925 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
3926 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
3927 			(u32)(rdev->dummy_page.addr >> 12));
3928 	WREG32(VM_CONTEXT0_CNTL2, 0);
3929 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
3930 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
3931 
3932 	WREG32(0x15D4, 0);
3933 	WREG32(0x15D8, 0);
3934 	WREG32(0x15DC, 0);
3935 
3936 	/* empty context1-15 */
3937 	/* set vm size, must be a multiple of 4 */
3938 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
3939 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
3940 	/* Assign the pt base to something valid for now; the pts used for
3941 	 * the VMs are determined by the application and setup and assigned
3942 	 * on the fly in the vm part of radeon_gart.c
3943 	 */
3944 	for (i = 1; i < 16; i++) {
3945 		if (i < 8)
3946 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
3947 			       rdev->gart.table_addr >> 12);
3948 		else
3949 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
3950 			       rdev->gart.table_addr >> 12);
3951 	}
3952 
3953 	/* enable context1-15 */
3954 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
3955 	       (u32)(rdev->dummy_page.addr >> 12));
3956 	WREG32(VM_CONTEXT1_CNTL2, 4);
3957 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
3958 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3959 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3960 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3961 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
3962 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
3963 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
3964 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
3965 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
3966 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
3967 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
3968 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
3969 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
3970 
3971 	si_pcie_gart_tlb_flush(rdev);
3972 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
3973 		 (unsigned)(rdev->mc.gtt_size >> 20),
3974 		 (unsigned long long)rdev->gart.table_addr);
3975 	rdev->gart.ready = true;
3976 	return 0;
3977 }
3978 
3979 static void si_pcie_gart_disable(struct radeon_device *rdev)
3980 {
3981 	/* Disable all tables */
3982 	WREG32(VM_CONTEXT0_CNTL, 0);
3983 	WREG32(VM_CONTEXT1_CNTL, 0);
3984 	/* Setup TLB control */
3985 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
3986 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
3987 	/* Setup L2 cache */
3988 	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
3989 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
3990 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
3991 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
3992 	WREG32(VM_L2_CNTL2, 0);
3993 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
3994 	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
3995 	radeon_gart_table_vram_unpin(rdev);
3996 }
3997 
3998 static void si_pcie_gart_fini(struct radeon_device *rdev)
3999 {
4000 	si_pcie_gart_disable(rdev);
4001 	radeon_gart_table_vram_free(rdev);
4002 	radeon_gart_fini(rdev);
4003 }
4004 
4005 /* vm parser */
4006 static bool si_vm_reg_valid(u32 reg)
4007 {
4008 	/* context regs are fine */
4009 	if (reg >= 0x28000)
4010 		return true;
4011 
4012 	/* check config regs */
4013 	switch (reg) {
4014 	case GRBM_GFX_INDEX:
4015 	case CP_STRMOUT_CNTL:
4016 	case VGT_VTX_VECT_EJECT_REG:
4017 	case VGT_CACHE_INVALIDATION:
4018 	case VGT_ESGS_RING_SIZE:
4019 	case VGT_GSVS_RING_SIZE:
4020 	case VGT_GS_VERTEX_REUSE:
4021 	case VGT_PRIMITIVE_TYPE:
4022 	case VGT_INDEX_TYPE:
4023 	case VGT_NUM_INDICES:
4024 	case VGT_NUM_INSTANCES:
4025 	case VGT_TF_RING_SIZE:
4026 	case VGT_HS_OFFCHIP_PARAM:
4027 	case VGT_TF_MEMORY_BASE:
4028 	case PA_CL_ENHANCE:
4029 	case PA_SU_LINE_STIPPLE_VALUE:
4030 	case PA_SC_LINE_STIPPLE_STATE:
4031 	case PA_SC_ENHANCE:
4032 	case SQC_CACHES:
4033 	case SPI_STATIC_THREAD_MGMT_1:
4034 	case SPI_STATIC_THREAD_MGMT_2:
4035 	case SPI_STATIC_THREAD_MGMT_3:
4036 	case SPI_PS_MAX_WAVE_ID:
4037 	case SPI_CONFIG_CNTL:
4038 	case SPI_CONFIG_CNTL_1:
4039 	case TA_CNTL_AUX:
4040 		return true;
4041 	default:
4042 		DRM_ERROR("Invalid register 0x%x in CS\n", reg);
4043 		return false;
4044 	}
4045 }
4046 
4047 static int si_vm_packet3_ce_check(struct radeon_device *rdev,
4048 				  u32 *ib, struct radeon_cs_packet *pkt)
4049 {
4050 	switch (pkt->opcode) {
4051 	case PACKET3_NOP:
4052 	case PACKET3_SET_BASE:
4053 	case PACKET3_SET_CE_DE_COUNTERS:
4054 	case PACKET3_LOAD_CONST_RAM:
4055 	case PACKET3_WRITE_CONST_RAM:
4056 	case PACKET3_WRITE_CONST_RAM_OFFSET:
4057 	case PACKET3_DUMP_CONST_RAM:
4058 	case PACKET3_INCREMENT_CE_COUNTER:
4059 	case PACKET3_WAIT_ON_DE_COUNTER:
4060 	case PACKET3_CE_WRITE:
4061 		break;
4062 	default:
4063 		DRM_ERROR("Invalid CE packet3: 0x%x\n", pkt->opcode);
4064 		return -EINVAL;
4065 	}
4066 	return 0;
4067 }
4068 
4069 static int si_vm_packet3_cp_dma_check(u32 *ib, u32 idx)
4070 {
4071 	u32 start_reg, reg, i;
4072 	u32 command = ib[idx + 4];
4073 	u32 info = ib[idx + 1];
4074 	u32 idx_value = ib[idx];
4075 	if (command & PACKET3_CP_DMA_CMD_SAS) {
4076 		/* src address space is register */
4077 		if (((info & 0x60000000) >> 29) == 0) {
4078 			start_reg = idx_value << 2;
4079 			if (command & PACKET3_CP_DMA_CMD_SAIC) {
4080 				reg = start_reg;
4081 				if (!si_vm_reg_valid(reg)) {
4082 					DRM_ERROR("CP DMA Bad SRC register\n");
4083 					return -EINVAL;
4084 				}
4085 			} else {
4086 				for (i = 0; i < (command & 0x1fffff); i++) {
4087 					reg = start_reg + (4 * i);
4088 					if (!si_vm_reg_valid(reg)) {
4089 						DRM_ERROR("CP DMA Bad SRC register\n");
4090 						return -EINVAL;
4091 					}
4092 				}
4093 			}
4094 		}
4095 	}
4096 	if (command & PACKET3_CP_DMA_CMD_DAS) {
4097 		/* dst address space is register */
4098 		if (((info & 0x00300000) >> 20) == 0) {
4099 			start_reg = ib[idx + 2];
4100 			if (command & PACKET3_CP_DMA_CMD_DAIC) {
4101 				reg = start_reg;
4102 				if (!si_vm_reg_valid(reg)) {
4103 					DRM_ERROR("CP DMA Bad DST register\n");
4104 					return -EINVAL;
4105 				}
4106 			} else {
4107 				for (i = 0; i < (command & 0x1fffff); i++) {
4108 					reg = start_reg + (4 * i);
4109 				if (!si_vm_reg_valid(reg)) {
4110 						DRM_ERROR("CP DMA Bad DST register\n");
4111 						return -EINVAL;
4112 					}
4113 				}
4114 			}
4115 		}
4116 	}
4117 	return 0;
4118 }
4119 
4120 static int si_vm_packet3_gfx_check(struct radeon_device *rdev,
4121 				   u32 *ib, struct radeon_cs_packet *pkt)
4122 {
4123 	int r;
4124 	u32 idx = pkt->idx + 1;
4125 	u32 idx_value = ib[idx];
4126 	u32 start_reg, end_reg, reg, i;
4127 
4128 	switch (pkt->opcode) {
4129 	case PACKET3_NOP:
4130 	case PACKET3_SET_BASE:
4131 	case PACKET3_CLEAR_STATE:
4132 	case PACKET3_INDEX_BUFFER_SIZE:
4133 	case PACKET3_DISPATCH_DIRECT:
4134 	case PACKET3_DISPATCH_INDIRECT:
4135 	case PACKET3_ALLOC_GDS:
4136 	case PACKET3_WRITE_GDS_RAM:
4137 	case PACKET3_ATOMIC_GDS:
4138 	case PACKET3_ATOMIC:
4139 	case PACKET3_OCCLUSION_QUERY:
4140 	case PACKET3_SET_PREDICATION:
4141 	case PACKET3_COND_EXEC:
4142 	case PACKET3_PRED_EXEC:
4143 	case PACKET3_DRAW_INDIRECT:
4144 	case PACKET3_DRAW_INDEX_INDIRECT:
4145 	case PACKET3_INDEX_BASE:
4146 	case PACKET3_DRAW_INDEX_2:
4147 	case PACKET3_CONTEXT_CONTROL:
4148 	case PACKET3_INDEX_TYPE:
4149 	case PACKET3_DRAW_INDIRECT_MULTI:
4150 	case PACKET3_DRAW_INDEX_AUTO:
4151 	case PACKET3_DRAW_INDEX_IMMD:
4152 	case PACKET3_NUM_INSTANCES:
4153 	case PACKET3_DRAW_INDEX_MULTI_AUTO:
4154 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4155 	case PACKET3_DRAW_INDEX_OFFSET_2:
4156 	case PACKET3_DRAW_INDEX_MULTI_ELEMENT:
4157 	case PACKET3_DRAW_INDEX_INDIRECT_MULTI:
4158 	case PACKET3_MPEG_INDEX:
4159 	case PACKET3_WAIT_REG_MEM:
4160 	case PACKET3_MEM_WRITE:
4161 	case PACKET3_PFP_SYNC_ME:
4162 	case PACKET3_SURFACE_SYNC:
4163 	case PACKET3_EVENT_WRITE:
4164 	case PACKET3_EVENT_WRITE_EOP:
4165 	case PACKET3_EVENT_WRITE_EOS:
4166 	case PACKET3_SET_CONTEXT_REG:
4167 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4168 	case PACKET3_SET_SH_REG:
4169 	case PACKET3_SET_SH_REG_OFFSET:
4170 	case PACKET3_INCREMENT_DE_COUNTER:
4171 	case PACKET3_WAIT_ON_CE_COUNTER:
4172 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4173 	case PACKET3_ME_WRITE:
4174 		break;
4175 	case PACKET3_COPY_DATA:
4176 		if ((idx_value & 0xf00) == 0) {
4177 			reg = ib[idx + 3] * 4;
4178 			if (!si_vm_reg_valid(reg))
4179 				return -EINVAL;
4180 		}
4181 		break;
4182 	case PACKET3_WRITE_DATA:
4183 		if ((idx_value & 0xf00) == 0) {
4184 			start_reg = ib[idx + 1] * 4;
4185 			if (idx_value & 0x10000) {
4186 				if (!si_vm_reg_valid(start_reg))
4187 					return -EINVAL;
4188 			} else {
4189 				for (i = 0; i < (pkt->count - 2); i++) {
4190 					reg = start_reg + (4 * i);
4191 					if (!si_vm_reg_valid(reg))
4192 						return -EINVAL;
4193 				}
4194 			}
4195 		}
4196 		break;
4197 	case PACKET3_COND_WRITE:
4198 		if (idx_value & 0x100) {
4199 			reg = ib[idx + 5] * 4;
4200 			if (!si_vm_reg_valid(reg))
4201 				return -EINVAL;
4202 		}
4203 		break;
4204 	case PACKET3_COPY_DW:
4205 		if (idx_value & 0x2) {
4206 			reg = ib[idx + 3] * 4;
4207 			if (!si_vm_reg_valid(reg))
4208 				return -EINVAL;
4209 		}
4210 		break;
4211 	case PACKET3_SET_CONFIG_REG:
4212 		start_reg = (idx_value << 2) + PACKET3_SET_CONFIG_REG_START;
4213 		end_reg = 4 * pkt->count + start_reg - 4;
4214 		if ((start_reg < PACKET3_SET_CONFIG_REG_START) ||
4215 		    (start_reg >= PACKET3_SET_CONFIG_REG_END) ||
4216 		    (end_reg >= PACKET3_SET_CONFIG_REG_END)) {
4217 			DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
4218 			return -EINVAL;
4219 		}
4220 		for (i = 0; i < pkt->count; i++) {
4221 			reg = start_reg + (4 * i);
4222 			if (!si_vm_reg_valid(reg))
4223 				return -EINVAL;
4224 		}
4225 		break;
4226 	case PACKET3_CP_DMA:
4227 		r = si_vm_packet3_cp_dma_check(ib, idx);
4228 		if (r)
4229 			return r;
4230 		break;
4231 	default:
4232 		DRM_ERROR("Invalid GFX packet3: 0x%x\n", pkt->opcode);
4233 		return -EINVAL;
4234 	}
4235 	return 0;
4236 }
4237 
4238 static int si_vm_packet3_compute_check(struct radeon_device *rdev,
4239 				       u32 *ib, struct radeon_cs_packet *pkt)
4240 {
4241 	int r;
4242 	u32 idx = pkt->idx + 1;
4243 	u32 idx_value = ib[idx];
4244 	u32 start_reg, reg, i;
4245 
4246 	switch (pkt->opcode) {
4247 	case PACKET3_NOP:
4248 	case PACKET3_SET_BASE:
4249 	case PACKET3_CLEAR_STATE:
4250 	case PACKET3_DISPATCH_DIRECT:
4251 	case PACKET3_DISPATCH_INDIRECT:
4252 	case PACKET3_ALLOC_GDS:
4253 	case PACKET3_WRITE_GDS_RAM:
4254 	case PACKET3_ATOMIC_GDS:
4255 	case PACKET3_ATOMIC:
4256 	case PACKET3_OCCLUSION_QUERY:
4257 	case PACKET3_SET_PREDICATION:
4258 	case PACKET3_COND_EXEC:
4259 	case PACKET3_PRED_EXEC:
4260 	case PACKET3_CONTEXT_CONTROL:
4261 	case PACKET3_STRMOUT_BUFFER_UPDATE:
4262 	case PACKET3_WAIT_REG_MEM:
4263 	case PACKET3_MEM_WRITE:
4264 	case PACKET3_PFP_SYNC_ME:
4265 	case PACKET3_SURFACE_SYNC:
4266 	case PACKET3_EVENT_WRITE:
4267 	case PACKET3_EVENT_WRITE_EOP:
4268 	case PACKET3_EVENT_WRITE_EOS:
4269 	case PACKET3_SET_CONTEXT_REG:
4270 	case PACKET3_SET_CONTEXT_REG_INDIRECT:
4271 	case PACKET3_SET_SH_REG:
4272 	case PACKET3_SET_SH_REG_OFFSET:
4273 	case PACKET3_INCREMENT_DE_COUNTER:
4274 	case PACKET3_WAIT_ON_CE_COUNTER:
4275 	case PACKET3_WAIT_ON_AVAIL_BUFFER:
4276 	case PACKET3_ME_WRITE:
4277 		break;
4278 	case PACKET3_COPY_DATA:
4279 		if ((idx_value & 0xf00) == 0) {
4280 			reg = ib[idx + 3] * 4;
4281 			if (!si_vm_reg_valid(reg))
4282 				return -EINVAL;
4283 		}
4284 		break;
4285 	case PACKET3_WRITE_DATA:
4286 		if ((idx_value & 0xf00) == 0) {
4287 			start_reg = ib[idx + 1] * 4;
4288 			if (idx_value & 0x10000) {
4289 				if (!si_vm_reg_valid(start_reg))
4290 					return -EINVAL;
4291 			} else {
4292 				for (i = 0; i < (pkt->count - 2); i++) {
4293 					reg = start_reg + (4 * i);
4294 					if (!si_vm_reg_valid(reg))
4295 						return -EINVAL;
4296 				}
4297 			}
4298 		}
4299 		break;
4300 	case PACKET3_COND_WRITE:
4301 		if (idx_value & 0x100) {
4302 			reg = ib[idx + 5] * 4;
4303 			if (!si_vm_reg_valid(reg))
4304 				return -EINVAL;
4305 		}
4306 		break;
4307 	case PACKET3_COPY_DW:
4308 		if (idx_value & 0x2) {
4309 			reg = ib[idx + 3] * 4;
4310 			if (!si_vm_reg_valid(reg))
4311 				return -EINVAL;
4312 		}
4313 		break;
4314 	case PACKET3_CP_DMA:
4315 		r = si_vm_packet3_cp_dma_check(ib, idx);
4316 		if (r)
4317 			return r;
4318 		break;
4319 	default:
4320 		DRM_ERROR("Invalid Compute packet3: 0x%x\n", pkt->opcode);
4321 		return -EINVAL;
4322 	}
4323 	return 0;
4324 }
4325 
4326 int si_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4327 {
4328 	int ret = 0;
4329 	u32 idx = 0;
4330 	struct radeon_cs_packet pkt;
4331 
4332 	do {
4333 		pkt.idx = idx;
4334 		pkt.type = RADEON_CP_PACKET_GET_TYPE(ib->ptr[idx]);
4335 		pkt.count = RADEON_CP_PACKET_GET_COUNT(ib->ptr[idx]);
4336 		pkt.one_reg_wr = 0;
4337 		switch (pkt.type) {
4338 		case RADEON_PACKET_TYPE0:
4339 			dev_err(rdev->dev, "Packet0 not allowed!\n");
4340 			ret = -EINVAL;
4341 			break;
4342 		case RADEON_PACKET_TYPE2:
4343 			idx += 1;
4344 			break;
4345 		case RADEON_PACKET_TYPE3:
4346 			pkt.opcode = RADEON_CP_PACKET3_GET_OPCODE(ib->ptr[idx]);
4347 			if (ib->is_const_ib)
4348 				ret = si_vm_packet3_ce_check(rdev, ib->ptr, &pkt);
4349 			else {
4350 				switch (ib->ring) {
4351 				case RADEON_RING_TYPE_GFX_INDEX:
4352 					ret = si_vm_packet3_gfx_check(rdev, ib->ptr, &pkt);
4353 					break;
4354 				case CAYMAN_RING_TYPE_CP1_INDEX:
4355 				case CAYMAN_RING_TYPE_CP2_INDEX:
4356 					ret = si_vm_packet3_compute_check(rdev, ib->ptr, &pkt);
4357 					break;
4358 				default:
4359 					dev_err(rdev->dev, "Non-PM4 ring %d !\n", ib->ring);
4360 					ret = -EINVAL;
4361 					break;
4362 				}
4363 			}
4364 			idx += pkt.count + 2;
4365 			break;
4366 		default:
4367 			dev_err(rdev->dev, "Unknown packet type %d !\n", pkt.type);
4368 			ret = -EINVAL;
4369 			break;
4370 		}
4371 		if (ret)
4372 			break;
4373 	} while (idx < ib->length_dw);
4374 
4375 	return ret;
4376 }
4377 
4378 /*
4379  * vm
4380  */
4381 int si_vm_init(struct radeon_device *rdev)
4382 {
4383 	/* number of VMs */
4384 	rdev->vm_manager.nvm = 16;
4385 	/* base offset of vram pages */
4386 	rdev->vm_manager.vram_base_offset = 0;
4387 
4388 	return 0;
4389 }
4390 
4391 void si_vm_fini(struct radeon_device *rdev)
4392 {
4393 }
4394 
4395 /**
4396  * si_vm_decode_fault - print human readable fault info
4397  *
4398  * @rdev: radeon_device pointer
4399  * @status: VM_CONTEXT1_PROTECTION_FAULT_STATUS register value
4400  * @addr: VM_CONTEXT1_PROTECTION_FAULT_ADDR register value
4401  *
4402  * Print human readable fault information (SI).
4403  */
4404 static void si_vm_decode_fault(struct radeon_device *rdev,
4405 			       u32 status, u32 addr)
4406 {
4407 	u32 mc_id = (status & MEMORY_CLIENT_ID_MASK) >> MEMORY_CLIENT_ID_SHIFT;
4408 	u32 vmid = (status & FAULT_VMID_MASK) >> FAULT_VMID_SHIFT;
4409 	u32 protections = (status & PROTECTIONS_MASK) >> PROTECTIONS_SHIFT;
4410 	char *block;
4411 
4412 	if (rdev->family == CHIP_TAHITI) {
4413 		switch (mc_id) {
4414 		case 160:
4415 		case 144:
4416 		case 96:
4417 		case 80:
4418 		case 224:
4419 		case 208:
4420 		case 32:
4421 		case 16:
4422 			block = "CB";
4423 			break;
4424 		case 161:
4425 		case 145:
4426 		case 97:
4427 		case 81:
4428 		case 225:
4429 		case 209:
4430 		case 33:
4431 		case 17:
4432 			block = "CB_FMASK";
4433 			break;
4434 		case 162:
4435 		case 146:
4436 		case 98:
4437 		case 82:
4438 		case 226:
4439 		case 210:
4440 		case 34:
4441 		case 18:
4442 			block = "CB_CMASK";
4443 			break;
4444 		case 163:
4445 		case 147:
4446 		case 99:
4447 		case 83:
4448 		case 227:
4449 		case 211:
4450 		case 35:
4451 		case 19:
4452 			block = "CB_IMMED";
4453 			break;
4454 		case 164:
4455 		case 148:
4456 		case 100:
4457 		case 84:
4458 		case 228:
4459 		case 212:
4460 		case 36:
4461 		case 20:
4462 			block = "DB";
4463 			break;
4464 		case 165:
4465 		case 149:
4466 		case 101:
4467 		case 85:
4468 		case 229:
4469 		case 213:
4470 		case 37:
4471 		case 21:
4472 			block = "DB_HTILE";
4473 			break;
4474 		case 167:
4475 		case 151:
4476 		case 103:
4477 		case 87:
4478 		case 231:
4479 		case 215:
4480 		case 39:
4481 		case 23:
4482 			block = "DB_STEN";
4483 			break;
4484 		case 72:
4485 		case 68:
4486 		case 64:
4487 		case 8:
4488 		case 4:
4489 		case 0:
4490 		case 136:
4491 		case 132:
4492 		case 128:
4493 		case 200:
4494 		case 196:
4495 		case 192:
4496 			block = "TC";
4497 			break;
4498 		case 112:
4499 		case 48:
4500 			block = "CP";
4501 			break;
4502 		case 49:
4503 		case 177:
4504 		case 50:
4505 		case 178:
4506 			block = "SH";
4507 			break;
4508 		case 53:
4509 		case 190:
4510 			block = "VGT";
4511 			break;
4512 		case 117:
4513 			block = "IH";
4514 			break;
4515 		case 51:
4516 		case 115:
4517 			block = "RLC";
4518 			break;
4519 		case 119:
4520 		case 183:
4521 			block = "DMA0";
4522 			break;
4523 		case 61:
4524 			block = "DMA1";
4525 			break;
4526 		case 248:
4527 		case 120:
4528 			block = "HDP";
4529 			break;
4530 		default:
4531 			block = "unknown";
4532 			break;
4533 		}
4534 	} else {
4535 		switch (mc_id) {
4536 		case 32:
4537 		case 16:
4538 		case 96:
4539 		case 80:
4540 		case 160:
4541 		case 144:
4542 		case 224:
4543 		case 208:
4544 			block = "CB";
4545 			break;
4546 		case 33:
4547 		case 17:
4548 		case 97:
4549 		case 81:
4550 		case 161:
4551 		case 145:
4552 		case 225:
4553 		case 209:
4554 			block = "CB_FMASK";
4555 			break;
4556 		case 34:
4557 		case 18:
4558 		case 98:
4559 		case 82:
4560 		case 162:
4561 		case 146:
4562 		case 226:
4563 		case 210:
4564 			block = "CB_CMASK";
4565 			break;
4566 		case 35:
4567 		case 19:
4568 		case 99:
4569 		case 83:
4570 		case 163:
4571 		case 147:
4572 		case 227:
4573 		case 211:
4574 			block = "CB_IMMED";
4575 			break;
4576 		case 36:
4577 		case 20:
4578 		case 100:
4579 		case 84:
4580 		case 164:
4581 		case 148:
4582 		case 228:
4583 		case 212:
4584 			block = "DB";
4585 			break;
4586 		case 37:
4587 		case 21:
4588 		case 101:
4589 		case 85:
4590 		case 165:
4591 		case 149:
4592 		case 229:
4593 		case 213:
4594 			block = "DB_HTILE";
4595 			break;
4596 		case 39:
4597 		case 23:
4598 		case 103:
4599 		case 87:
4600 		case 167:
4601 		case 151:
4602 		case 231:
4603 		case 215:
4604 			block = "DB_STEN";
4605 			break;
4606 		case 72:
4607 		case 68:
4608 		case 8:
4609 		case 4:
4610 		case 136:
4611 		case 132:
4612 		case 200:
4613 		case 196:
4614 			block = "TC";
4615 			break;
4616 		case 112:
4617 		case 48:
4618 			block = "CP";
4619 			break;
4620 		case 49:
4621 		case 177:
4622 		case 50:
4623 		case 178:
4624 			block = "SH";
4625 			break;
4626 		case 53:
4627 			block = "VGT";
4628 			break;
4629 		case 117:
4630 			block = "IH";
4631 			break;
4632 		case 51:
4633 		case 115:
4634 			block = "RLC";
4635 			break;
4636 		case 119:
4637 		case 183:
4638 			block = "DMA0";
4639 			break;
4640 		case 61:
4641 			block = "DMA1";
4642 			break;
4643 		case 248:
4644 		case 120:
4645 			block = "HDP";
4646 			break;
4647 		default:
4648 			block = "unknown";
4649 			break;
4650 		}
4651 	}
4652 
4653 	printk("VM fault (0x%02x, vmid %d) at page %u, %s from %s (%d)\n",
4654 	       protections, vmid, addr,
4655 	       (status & MEMORY_CLIENT_RW_MASK) ? "write" : "read",
4656 	       block, mc_id);
4657 }
4658 
4659 /**
4660  * si_vm_set_page - update the page tables using the CP
4661  *
4662  * @rdev: radeon_device pointer
4663  * @ib: indirect buffer to fill with commands
4664  * @pe: addr of the page entry
4665  * @addr: dst addr to write into pe
4666  * @count: number of page entries to update
4667  * @incr: increase next addr by incr bytes
4668  * @flags: access flags
4669  *
4670  * Update the page tables using the CP (SI).
4671  */
4672 void si_vm_set_page(struct radeon_device *rdev,
4673 		    struct radeon_ib *ib,
4674 		    uint64_t pe,
4675 		    uint64_t addr, unsigned count,
4676 		    uint32_t incr, uint32_t flags)
4677 {
4678 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4679 	uint64_t value;
4680 	unsigned ndw;
4681 
4682 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4683 		while (count) {
4684 			ndw = 2 + count * 2;
4685 			if (ndw > 0x3FFE)
4686 				ndw = 0x3FFE;
4687 
4688 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4689 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4690 					WRITE_DATA_DST_SEL(1));
4691 			ib->ptr[ib->length_dw++] = pe;
4692 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4693 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4694 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4695 					value = radeon_vm_map_gart(rdev, addr);
4696 					value &= 0xFFFFFFFFFFFFF000ULL;
4697 				} else if (flags & RADEON_VM_PAGE_VALID) {
4698 					value = addr;
4699 				} else {
4700 					value = 0;
4701 				}
4702 				addr += incr;
4703 				value |= r600_flags;
4704 				ib->ptr[ib->length_dw++] = value;
4705 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4706 			}
4707 		}
4708 	} else {
4709 		/* DMA */
4710 		si_dma_vm_set_page(rdev, ib, pe, addr, count, incr, flags);
4711 	}
4712 }
4713 
4714 void si_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4715 {
4716 	struct radeon_ring *ring = &rdev->ring[ridx];
4717 
4718 	if (vm == NULL)
4719 		return;
4720 
4721 	/* write new base address */
4722 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4723 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4724 				 WRITE_DATA_DST_SEL(0)));
4725 
4726 	if (vm->id < 8) {
4727 		radeon_ring_write(ring,
4728 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4729 	} else {
4730 		radeon_ring_write(ring,
4731 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4732 	}
4733 	radeon_ring_write(ring, 0);
4734 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4735 
4736 	/* flush hdp cache */
4737 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4738 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4739 				 WRITE_DATA_DST_SEL(0)));
4740 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4741 	radeon_ring_write(ring, 0);
4742 	radeon_ring_write(ring, 0x1);
4743 
4744 	/* bits 0-15 are the VM contexts0-15 */
4745 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4746 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4747 				 WRITE_DATA_DST_SEL(0)));
4748 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4749 	radeon_ring_write(ring, 0);
4750 	radeon_ring_write(ring, 1 << vm->id);
4751 
4752 	/* sync PFP to ME, otherwise we might get invalid PFP reads */
4753 	radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4754 	radeon_ring_write(ring, 0x0);
4755 }
4756 
4757 /*
4758  *  Power and clock gating
4759  */
4760 static void si_wait_for_rlc_serdes(struct radeon_device *rdev)
4761 {
4762 	int i;
4763 
4764 	for (i = 0; i < rdev->usec_timeout; i++) {
4765 		if (RREG32(RLC_SERDES_MASTER_BUSY_0) == 0)
4766 			break;
4767 		udelay(1);
4768 	}
4769 
4770 	for (i = 0; i < rdev->usec_timeout; i++) {
4771 		if (RREG32(RLC_SERDES_MASTER_BUSY_1) == 0)
4772 			break;
4773 		udelay(1);
4774 	}
4775 }
4776 
4777 static void si_enable_gui_idle_interrupt(struct radeon_device *rdev,
4778 					 bool enable)
4779 {
4780 	u32 tmp = RREG32(CP_INT_CNTL_RING0);
4781 	u32 mask;
4782 	int i;
4783 
4784 	if (enable)
4785 		tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4786 	else
4787 		tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4788 	WREG32(CP_INT_CNTL_RING0, tmp);
4789 
4790 	if (!enable) {
4791 		/* read a gfx register */
4792 		tmp = RREG32(DB_DEPTH_INFO);
4793 
4794 		mask = RLC_BUSY_STATUS | GFX_POWER_STATUS | GFX_CLOCK_STATUS | GFX_LS_STATUS;
4795 		for (i = 0; i < rdev->usec_timeout; i++) {
4796 			if ((RREG32(RLC_STAT) & mask) == (GFX_CLOCK_STATUS | GFX_POWER_STATUS))
4797 				break;
4798 			udelay(1);
4799 		}
4800 	}
4801 }
4802 
4803 static void si_set_uvd_dcm(struct radeon_device *rdev,
4804 			   bool sw_mode)
4805 {
4806 	u32 tmp, tmp2;
4807 
4808 	tmp = RREG32(UVD_CGC_CTRL);
4809 	tmp &= ~(CLK_OD_MASK | CG_DT_MASK);
4810 	tmp |= DCM | CG_DT(1) | CLK_OD(4);
4811 
4812 	if (sw_mode) {
4813 		tmp &= ~0x7ffff800;
4814 		tmp2 = DYN_OR_EN | DYN_RR_EN | G_DIV_ID(7);
4815 	} else {
4816 		tmp |= 0x7ffff800;
4817 		tmp2 = 0;
4818 	}
4819 
4820 	WREG32(UVD_CGC_CTRL, tmp);
4821 	WREG32_UVD_CTX(UVD_CGC_CTRL2, tmp2);
4822 }
4823 
4824 void si_init_uvd_internal_cg(struct radeon_device *rdev)
4825 {
4826 	bool hw_mode = true;
4827 
4828 	if (hw_mode) {
4829 		si_set_uvd_dcm(rdev, false);
4830 	} else {
4831 		u32 tmp = RREG32(UVD_CGC_CTRL);
4832 		tmp &= ~DCM;
4833 		WREG32(UVD_CGC_CTRL, tmp);
4834 	}
4835 }
4836 
4837 static u32 si_halt_rlc(struct radeon_device *rdev)
4838 {
4839 	u32 data, orig;
4840 
4841 	orig = data = RREG32(RLC_CNTL);
4842 
4843 	if (data & RLC_ENABLE) {
4844 		data &= ~RLC_ENABLE;
4845 		WREG32(RLC_CNTL, data);
4846 
4847 		si_wait_for_rlc_serdes(rdev);
4848 	}
4849 
4850 	return orig;
4851 }
4852 
4853 static void si_update_rlc(struct radeon_device *rdev, u32 rlc)
4854 {
4855 	u32 tmp;
4856 
4857 	tmp = RREG32(RLC_CNTL);
4858 	if (tmp != rlc)
4859 		WREG32(RLC_CNTL, rlc);
4860 }
4861 
4862 static void si_enable_dma_pg(struct radeon_device *rdev, bool enable)
4863 {
4864 	u32 data, orig;
4865 
4866 	orig = data = RREG32(DMA_PG);
4867 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA))
4868 		data |= PG_CNTL_ENABLE;
4869 	else
4870 		data &= ~PG_CNTL_ENABLE;
4871 	if (orig != data)
4872 		WREG32(DMA_PG, data);
4873 }
4874 
4875 static void si_init_dma_pg(struct radeon_device *rdev)
4876 {
4877 	u32 tmp;
4878 
4879 	WREG32(DMA_PGFSM_WRITE,  0x00002000);
4880 	WREG32(DMA_PGFSM_CONFIG, 0x100010ff);
4881 
4882 	for (tmp = 0; tmp < 5; tmp++)
4883 		WREG32(DMA_PGFSM_WRITE, 0);
4884 }
4885 
4886 static void si_enable_gfx_cgpg(struct radeon_device *rdev,
4887 			       bool enable)
4888 {
4889 	u32 tmp;
4890 
4891 	if (enable && (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG)) {
4892 		tmp = RLC_PUD(0x10) | RLC_PDD(0x10) | RLC_TTPD(0x10) | RLC_MSD(0x10);
4893 		WREG32(RLC_TTOP_D, tmp);
4894 
4895 		tmp = RREG32(RLC_PG_CNTL);
4896 		tmp |= GFX_PG_ENABLE;
4897 		WREG32(RLC_PG_CNTL, tmp);
4898 
4899 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4900 		tmp |= AUTO_PG_EN;
4901 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4902 	} else {
4903 		tmp = RREG32(RLC_AUTO_PG_CTRL);
4904 		tmp &= ~AUTO_PG_EN;
4905 		WREG32(RLC_AUTO_PG_CTRL, tmp);
4906 
4907 		tmp = RREG32(DB_RENDER_CONTROL);
4908 	}
4909 }
4910 
4911 static void si_init_gfx_cgpg(struct radeon_device *rdev)
4912 {
4913 	u32 tmp;
4914 
4915 	WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
4916 
4917 	tmp = RREG32(RLC_PG_CNTL);
4918 	tmp |= GFX_PG_SRC;
4919 	WREG32(RLC_PG_CNTL, tmp);
4920 
4921 	WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
4922 
4923 	tmp = RREG32(RLC_AUTO_PG_CTRL);
4924 
4925 	tmp &= ~GRBM_REG_SGIT_MASK;
4926 	tmp |= GRBM_REG_SGIT(0x700);
4927 	tmp &= ~PG_AFTER_GRBM_REG_ST_MASK;
4928 	WREG32(RLC_AUTO_PG_CTRL, tmp);
4929 }
4930 
4931 static u32 si_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh)
4932 {
4933 	u32 mask = 0, tmp, tmp1;
4934 	int i;
4935 
4936 	si_select_se_sh(rdev, se, sh);
4937 	tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG);
4938 	tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG);
4939 	si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4940 
4941 	tmp &= 0xffff0000;
4942 
4943 	tmp |= tmp1;
4944 	tmp >>= 16;
4945 
4946 	for (i = 0; i < rdev->config.si.max_cu_per_sh; i ++) {
4947 		mask <<= 1;
4948 		mask |= 1;
4949 	}
4950 
4951 	return (~tmp) & mask;
4952 }
4953 
4954 static void si_init_ao_cu_mask(struct radeon_device *rdev)
4955 {
4956 	u32 i, j, k, active_cu_number = 0;
4957 	u32 mask, counter, cu_bitmap;
4958 	u32 tmp = 0;
4959 
4960 	for (i = 0; i < rdev->config.si.max_shader_engines; i++) {
4961 		for (j = 0; j < rdev->config.si.max_sh_per_se; j++) {
4962 			mask = 1;
4963 			cu_bitmap = 0;
4964 			counter  = 0;
4965 			for (k = 0; k < rdev->config.si.max_cu_per_sh; k++) {
4966 				if (si_get_cu_active_bitmap(rdev, i, j) & mask) {
4967 					if (counter < 2)
4968 						cu_bitmap |= mask;
4969 					counter++;
4970 				}
4971 				mask <<= 1;
4972 			}
4973 
4974 			active_cu_number += counter;
4975 			tmp |= (cu_bitmap << (i * 16 + j * 8));
4976 		}
4977 	}
4978 
4979 	WREG32(RLC_PG_AO_CU_MASK, tmp);
4980 
4981 	tmp = RREG32(RLC_MAX_PG_CU);
4982 	tmp &= ~MAX_PU_CU_MASK;
4983 	tmp |= MAX_PU_CU(active_cu_number);
4984 	WREG32(RLC_MAX_PG_CU, tmp);
4985 }
4986 
4987 static void si_enable_cgcg(struct radeon_device *rdev,
4988 			   bool enable)
4989 {
4990 	u32 data, orig, tmp;
4991 
4992 	orig = data = RREG32(RLC_CGCG_CGLS_CTRL);
4993 
4994 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CGCG)) {
4995 		si_enable_gui_idle_interrupt(rdev, true);
4996 
4997 		WREG32(RLC_GCPM_GENERAL_3, 0x00000080);
4998 
4999 		tmp = si_halt_rlc(rdev);
5000 
5001 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5002 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5003 		WREG32(RLC_SERDES_WR_CTRL, 0x00b000ff);
5004 
5005 		si_wait_for_rlc_serdes(rdev);
5006 
5007 		si_update_rlc(rdev, tmp);
5008 
5009 		WREG32(RLC_SERDES_WR_CTRL, 0x007000ff);
5010 
5011 		data |= CGCG_EN | CGLS_EN;
5012 	} else {
5013 		si_enable_gui_idle_interrupt(rdev, false);
5014 
5015 		RREG32(CB_CGTT_SCLK_CTRL);
5016 		RREG32(CB_CGTT_SCLK_CTRL);
5017 		RREG32(CB_CGTT_SCLK_CTRL);
5018 		RREG32(CB_CGTT_SCLK_CTRL);
5019 
5020 		data &= ~(CGCG_EN | CGLS_EN);
5021 	}
5022 
5023 	if (orig != data)
5024 		WREG32(RLC_CGCG_CGLS_CTRL, data);
5025 }
5026 
5027 static void si_enable_mgcg(struct radeon_device *rdev,
5028 			   bool enable)
5029 {
5030 	u32 data, orig, tmp = 0;
5031 
5032 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_MGCG)) {
5033 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5034 		data = 0x96940200;
5035 		if (orig != data)
5036 			WREG32(CGTS_SM_CTRL_REG, data);
5037 
5038 		if (rdev->cg_flags & RADEON_CG_SUPPORT_GFX_CP_LS) {
5039 			orig = data = RREG32(CP_MEM_SLP_CNTL);
5040 			data |= CP_MEM_LS_EN;
5041 			if (orig != data)
5042 				WREG32(CP_MEM_SLP_CNTL, data);
5043 		}
5044 
5045 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5046 		data &= 0xffffffc0;
5047 		if (orig != data)
5048 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5049 
5050 		tmp = si_halt_rlc(rdev);
5051 
5052 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5053 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5054 		WREG32(RLC_SERDES_WR_CTRL, 0x00d000ff);
5055 
5056 		si_update_rlc(rdev, tmp);
5057 	} else {
5058 		orig = data = RREG32(RLC_CGTT_MGCG_OVERRIDE);
5059 		data |= 0x00000003;
5060 		if (orig != data)
5061 			WREG32(RLC_CGTT_MGCG_OVERRIDE, data);
5062 
5063 		data = RREG32(CP_MEM_SLP_CNTL);
5064 		if (data & CP_MEM_LS_EN) {
5065 			data &= ~CP_MEM_LS_EN;
5066 			WREG32(CP_MEM_SLP_CNTL, data);
5067 		}
5068 		orig = data = RREG32(CGTS_SM_CTRL_REG);
5069 		data |= LS_OVERRIDE | OVERRIDE;
5070 		if (orig != data)
5071 			WREG32(CGTS_SM_CTRL_REG, data);
5072 
5073 		tmp = si_halt_rlc(rdev);
5074 
5075 		WREG32(RLC_SERDES_WR_MASTER_MASK_0, 0xffffffff);
5076 		WREG32(RLC_SERDES_WR_MASTER_MASK_1, 0xffffffff);
5077 		WREG32(RLC_SERDES_WR_CTRL, 0x00e000ff);
5078 
5079 		si_update_rlc(rdev, tmp);
5080 	}
5081 }
5082 
5083 static void si_enable_uvd_mgcg(struct radeon_device *rdev,
5084 			       bool enable)
5085 {
5086 	u32 orig, data, tmp;
5087 
5088 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_UVD_MGCG)) {
5089 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5090 		tmp |= 0x3fff;
5091 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5092 
5093 		orig = data = RREG32(UVD_CGC_CTRL);
5094 		data |= DCM;
5095 		if (orig != data)
5096 			WREG32(UVD_CGC_CTRL, data);
5097 
5098 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0);
5099 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0);
5100 	} else {
5101 		tmp = RREG32_UVD_CTX(UVD_CGC_MEM_CTRL);
5102 		tmp &= ~0x3fff;
5103 		WREG32_UVD_CTX(UVD_CGC_MEM_CTRL, tmp);
5104 
5105 		orig = data = RREG32(UVD_CGC_CTRL);
5106 		data &= ~DCM;
5107 		if (orig != data)
5108 			WREG32(UVD_CGC_CTRL, data);
5109 
5110 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_0, 0xffffffff);
5111 		WREG32_SMC(SMC_CG_IND_START + CG_CGTT_LOCAL_1, 0xffffffff);
5112 	}
5113 }
5114 
5115 static const u32 mc_cg_registers[] =
5116 {
5117 	MC_HUB_MISC_HUB_CG,
5118 	MC_HUB_MISC_SIP_CG,
5119 	MC_HUB_MISC_VM_CG,
5120 	MC_XPB_CLK_GAT,
5121 	ATC_MISC_CG,
5122 	MC_CITF_MISC_WR_CG,
5123 	MC_CITF_MISC_RD_CG,
5124 	MC_CITF_MISC_VM_CG,
5125 	VM_L2_CG,
5126 };
5127 
5128 static void si_enable_mc_ls(struct radeon_device *rdev,
5129 			    bool enable)
5130 {
5131 	int i;
5132 	u32 orig, data;
5133 
5134 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5135 		orig = data = RREG32(mc_cg_registers[i]);
5136 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_LS))
5137 			data |= MC_LS_ENABLE;
5138 		else
5139 			data &= ~MC_LS_ENABLE;
5140 		if (data != orig)
5141 			WREG32(mc_cg_registers[i], data);
5142 	}
5143 }
5144 
5145 static void si_enable_mc_mgcg(struct radeon_device *rdev,
5146 			       bool enable)
5147 {
5148 	int i;
5149 	u32 orig, data;
5150 
5151 	for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
5152 		orig = data = RREG32(mc_cg_registers[i]);
5153 		if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_MC_MGCG))
5154 			data |= MC_CG_ENABLE;
5155 		else
5156 			data &= ~MC_CG_ENABLE;
5157 		if (data != orig)
5158 			WREG32(mc_cg_registers[i], data);
5159 	}
5160 }
5161 
5162 static void si_enable_dma_mgcg(struct radeon_device *rdev,
5163 			       bool enable)
5164 {
5165 	u32 orig, data, offset;
5166 	int i;
5167 
5168 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_SDMA_MGCG)) {
5169 		for (i = 0; i < 2; i++) {
5170 			if (i == 0)
5171 				offset = DMA0_REGISTER_OFFSET;
5172 			else
5173 				offset = DMA1_REGISTER_OFFSET;
5174 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5175 			data &= ~MEM_POWER_OVERRIDE;
5176 			if (data != orig)
5177 				WREG32(DMA_POWER_CNTL + offset, data);
5178 			WREG32(DMA_CLK_CTRL + offset, 0x00000100);
5179 		}
5180 	} else {
5181 		for (i = 0; i < 2; i++) {
5182 			if (i == 0)
5183 				offset = DMA0_REGISTER_OFFSET;
5184 			else
5185 				offset = DMA1_REGISTER_OFFSET;
5186 			orig = data = RREG32(DMA_POWER_CNTL + offset);
5187 			data |= MEM_POWER_OVERRIDE;
5188 			if (data != orig)
5189 				WREG32(DMA_POWER_CNTL + offset, data);
5190 
5191 			orig = data = RREG32(DMA_CLK_CTRL + offset);
5192 			data = 0xff000000;
5193 			if (data != orig)
5194 				WREG32(DMA_CLK_CTRL + offset, data);
5195 		}
5196 	}
5197 }
5198 
5199 static void si_enable_bif_mgls(struct radeon_device *rdev,
5200 			       bool enable)
5201 {
5202 	u32 orig, data;
5203 
5204 	orig = data = RREG32_PCIE(PCIE_CNTL2);
5205 
5206 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_BIF_LS))
5207 		data |= SLV_MEM_LS_EN | MST_MEM_LS_EN |
5208 			REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN;
5209 	else
5210 		data &= ~(SLV_MEM_LS_EN | MST_MEM_LS_EN |
5211 			  REPLAY_MEM_LS_EN | SLV_MEM_AGGRESSIVE_LS_EN);
5212 
5213 	if (orig != data)
5214 		WREG32_PCIE(PCIE_CNTL2, data);
5215 }
5216 
5217 static void si_enable_hdp_mgcg(struct radeon_device *rdev,
5218 			       bool enable)
5219 {
5220 	u32 orig, data;
5221 
5222 	orig = data = RREG32(HDP_HOST_PATH_CNTL);
5223 
5224 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_MGCG))
5225 		data &= ~CLOCK_GATING_DIS;
5226 	else
5227 		data |= CLOCK_GATING_DIS;
5228 
5229 	if (orig != data)
5230 		WREG32(HDP_HOST_PATH_CNTL, data);
5231 }
5232 
5233 static void si_enable_hdp_ls(struct radeon_device *rdev,
5234 			     bool enable)
5235 {
5236 	u32 orig, data;
5237 
5238 	orig = data = RREG32(HDP_MEM_POWER_LS);
5239 
5240 	if (enable && (rdev->cg_flags & RADEON_CG_SUPPORT_HDP_LS))
5241 		data |= HDP_LS_ENABLE;
5242 	else
5243 		data &= ~HDP_LS_ENABLE;
5244 
5245 	if (orig != data)
5246 		WREG32(HDP_MEM_POWER_LS, data);
5247 }
5248 
5249 void si_update_cg(struct radeon_device *rdev,
5250 		  u32 block, bool enable)
5251 {
5252 	if (block & RADEON_CG_BLOCK_GFX) {
5253 		/* order matters! */
5254 		if (enable) {
5255 			si_enable_mgcg(rdev, true);
5256 			si_enable_cgcg(rdev, true);
5257 		} else {
5258 			si_enable_cgcg(rdev, false);
5259 			si_enable_mgcg(rdev, false);
5260 		}
5261 	}
5262 
5263 	if (block & RADEON_CG_BLOCK_MC) {
5264 		si_enable_mc_mgcg(rdev, enable);
5265 		si_enable_mc_ls(rdev, enable);
5266 	}
5267 
5268 	if (block & RADEON_CG_BLOCK_SDMA) {
5269 		si_enable_dma_mgcg(rdev, enable);
5270 	}
5271 
5272 	if (block & RADEON_CG_BLOCK_BIF) {
5273 		si_enable_bif_mgls(rdev, enable);
5274 	}
5275 
5276 	if (block & RADEON_CG_BLOCK_UVD) {
5277 		if (rdev->has_uvd) {
5278 			si_enable_uvd_mgcg(rdev, enable);
5279 		}
5280 	}
5281 
5282 	if (block & RADEON_CG_BLOCK_HDP) {
5283 		si_enable_hdp_mgcg(rdev, enable);
5284 		si_enable_hdp_ls(rdev, enable);
5285 	}
5286 }
5287 
5288 static void si_init_cg(struct radeon_device *rdev)
5289 {
5290 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5291 			    RADEON_CG_BLOCK_MC |
5292 			    RADEON_CG_BLOCK_SDMA |
5293 			    RADEON_CG_BLOCK_BIF |
5294 			    RADEON_CG_BLOCK_HDP), true);
5295 	if (rdev->has_uvd) {
5296 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, true);
5297 		si_init_uvd_internal_cg(rdev);
5298 	}
5299 }
5300 
5301 static void si_fini_cg(struct radeon_device *rdev)
5302 {
5303 	if (rdev->has_uvd) {
5304 		si_update_cg(rdev, RADEON_CG_BLOCK_UVD, false);
5305 	}
5306 	si_update_cg(rdev, (RADEON_CG_BLOCK_GFX |
5307 			    RADEON_CG_BLOCK_MC |
5308 			    RADEON_CG_BLOCK_SDMA |
5309 			    RADEON_CG_BLOCK_BIF |
5310 			    RADEON_CG_BLOCK_HDP), false);
5311 }
5312 
5313 u32 si_get_csb_size(struct radeon_device *rdev)
5314 {
5315 	u32 count = 0;
5316 	const struct cs_section_def *sect = NULL;
5317 	const struct cs_extent_def *ext = NULL;
5318 
5319 	if (rdev->rlc.cs_data == NULL)
5320 		return 0;
5321 
5322 	/* begin clear state */
5323 	count += 2;
5324 	/* context control state */
5325 	count += 3;
5326 
5327 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5328 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5329 			if (sect->id == SECT_CONTEXT)
5330 				count += 2 + ext->reg_count;
5331 			else
5332 				return 0;
5333 		}
5334 	}
5335 	/* pa_sc_raster_config */
5336 	count += 3;
5337 	/* end clear state */
5338 	count += 2;
5339 	/* clear state */
5340 	count += 2;
5341 
5342 	return count;
5343 }
5344 
5345 void si_get_csb_buffer(struct radeon_device *rdev, volatile u32 *buffer)
5346 {
5347 	u32 count = 0, i;
5348 	const struct cs_section_def *sect = NULL;
5349 	const struct cs_extent_def *ext = NULL;
5350 
5351 	if (rdev->rlc.cs_data == NULL)
5352 		return;
5353 	if (buffer == NULL)
5354 		return;
5355 
5356 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5357 	buffer[count++] = PACKET3_PREAMBLE_BEGIN_CLEAR_STATE;
5358 
5359 	buffer[count++] = PACKET3(PACKET3_CONTEXT_CONTROL, 1);
5360 	buffer[count++] = 0x80000000;
5361 	buffer[count++] = 0x80000000;
5362 
5363 	for (sect = rdev->rlc.cs_data; sect->section != NULL; ++sect) {
5364 		for (ext = sect->section; ext->extent != NULL; ++ext) {
5365 			if (sect->id == SECT_CONTEXT) {
5366 				buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, ext->reg_count);
5367 				buffer[count++] = ext->reg_index - 0xa000;
5368 				for (i = 0; i < ext->reg_count; i++)
5369 					buffer[count++] = ext->extent[i];
5370 			} else {
5371 				return;
5372 			}
5373 		}
5374 	}
5375 
5376 	buffer[count++] = PACKET3(PACKET3_SET_CONTEXT_REG, 1);
5377 	buffer[count++] = PA_SC_RASTER_CONFIG - PACKET3_SET_CONTEXT_REG_START;
5378 	switch (rdev->family) {
5379 	case CHIP_TAHITI:
5380 	case CHIP_PITCAIRN:
5381 		buffer[count++] = 0x2a00126a;
5382 		break;
5383 	case CHIP_VERDE:
5384 		buffer[count++] = 0x0000124a;
5385 		break;
5386 	case CHIP_OLAND:
5387 		buffer[count++] = 0x00000082;
5388 		break;
5389 	case CHIP_HAINAN:
5390 		buffer[count++] = 0x00000000;
5391 		break;
5392 	default:
5393 		buffer[count++] = 0x00000000;
5394 		break;
5395 	}
5396 
5397 	buffer[count++] = PACKET3(PACKET3_PREAMBLE_CNTL, 0);
5398 	buffer[count++] = PACKET3_PREAMBLE_END_CLEAR_STATE;
5399 
5400 	buffer[count++] = PACKET3(PACKET3_CLEAR_STATE, 0);
5401 	buffer[count++] = 0;
5402 }
5403 
5404 static void si_init_pg(struct radeon_device *rdev)
5405 {
5406 	if (rdev->pg_flags) {
5407 		if (rdev->pg_flags & RADEON_PG_SUPPORT_SDMA) {
5408 			si_init_dma_pg(rdev);
5409 		}
5410 		si_init_ao_cu_mask(rdev);
5411 		if (rdev->pg_flags & RADEON_PG_SUPPORT_GFX_CG) {
5412 			si_init_gfx_cgpg(rdev);
5413 		}
5414 		si_enable_dma_pg(rdev, true);
5415 		si_enable_gfx_cgpg(rdev, true);
5416 	} else {
5417 		WREG32(RLC_SAVE_AND_RESTORE_BASE, rdev->rlc.save_restore_gpu_addr >> 8);
5418 		WREG32(RLC_CLEAR_STATE_RESTORE_BASE, rdev->rlc.clear_state_gpu_addr >> 8);
5419 	}
5420 }
5421 
5422 static void si_fini_pg(struct radeon_device *rdev)
5423 {
5424 	if (rdev->pg_flags) {
5425 		si_enable_dma_pg(rdev, false);
5426 		si_enable_gfx_cgpg(rdev, false);
5427 	}
5428 }
5429 
5430 /*
5431  * RLC
5432  */
5433 void si_rlc_reset(struct radeon_device *rdev)
5434 {
5435 	u32 tmp = RREG32(GRBM_SOFT_RESET);
5436 
5437 	tmp |= SOFT_RESET_RLC;
5438 	WREG32(GRBM_SOFT_RESET, tmp);
5439 	udelay(50);
5440 	tmp &= ~SOFT_RESET_RLC;
5441 	WREG32(GRBM_SOFT_RESET, tmp);
5442 	udelay(50);
5443 }
5444 
5445 static void si_rlc_stop(struct radeon_device *rdev)
5446 {
5447 	WREG32(RLC_CNTL, 0);
5448 
5449 	si_enable_gui_idle_interrupt(rdev, false);
5450 
5451 	si_wait_for_rlc_serdes(rdev);
5452 }
5453 
5454 static void si_rlc_start(struct radeon_device *rdev)
5455 {
5456 	WREG32(RLC_CNTL, RLC_ENABLE);
5457 
5458 	si_enable_gui_idle_interrupt(rdev, true);
5459 
5460 	udelay(50);
5461 }
5462 
5463 static bool si_lbpw_supported(struct radeon_device *rdev)
5464 {
5465 	u32 tmp;
5466 
5467 	/* Enable LBPW only for DDR3 */
5468 	tmp = RREG32(MC_SEQ_MISC0);
5469 	if ((tmp & 0xF0000000) == 0xB0000000)
5470 		return true;
5471 	return false;
5472 }
5473 
5474 static void si_enable_lbpw(struct radeon_device *rdev, bool enable)
5475 {
5476 	u32 tmp;
5477 
5478 	tmp = RREG32(RLC_LB_CNTL);
5479 	if (enable)
5480 		tmp |= LOAD_BALANCE_ENABLE;
5481 	else
5482 		tmp &= ~LOAD_BALANCE_ENABLE;
5483 	WREG32(RLC_LB_CNTL, tmp);
5484 
5485 	if (!enable) {
5486 		si_select_se_sh(rdev, 0xffffffff, 0xffffffff);
5487 		WREG32(SPI_LB_CU_MASK, 0x00ff);
5488 	}
5489 }
5490 
5491 static int si_rlc_resume(struct radeon_device *rdev)
5492 {
5493 	u32 i;
5494 	const __be32 *fw_data;
5495 
5496 	if (!rdev->rlc_fw)
5497 		return -EINVAL;
5498 
5499 	si_rlc_stop(rdev);
5500 
5501 	si_rlc_reset(rdev);
5502 
5503 	si_init_pg(rdev);
5504 
5505 	si_init_cg(rdev);
5506 
5507 	WREG32(RLC_RL_BASE, 0);
5508 	WREG32(RLC_RL_SIZE, 0);
5509 	WREG32(RLC_LB_CNTL, 0);
5510 	WREG32(RLC_LB_CNTR_MAX, 0xffffffff);
5511 	WREG32(RLC_LB_CNTR_INIT, 0);
5512 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
5513 
5514 	WREG32(RLC_MC_CNTL, 0);
5515 	WREG32(RLC_UCODE_CNTL, 0);
5516 
5517 	fw_data = (const __be32 *)rdev->rlc_fw->data;
5518 	for (i = 0; i < SI_RLC_UCODE_SIZE; i++) {
5519 		WREG32(RLC_UCODE_ADDR, i);
5520 		WREG32(RLC_UCODE_DATA, be32_to_cpup(fw_data++));
5521 	}
5522 	WREG32(RLC_UCODE_ADDR, 0);
5523 
5524 	si_enable_lbpw(rdev, si_lbpw_supported(rdev));
5525 
5526 	si_rlc_start(rdev);
5527 
5528 	return 0;
5529 }
5530 
5531 static void si_enable_interrupts(struct radeon_device *rdev)
5532 {
5533 	u32 ih_cntl = RREG32(IH_CNTL);
5534 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5535 
5536 	ih_cntl |= ENABLE_INTR;
5537 	ih_rb_cntl |= IH_RB_ENABLE;
5538 	WREG32(IH_CNTL, ih_cntl);
5539 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5540 	rdev->ih.enabled = true;
5541 }
5542 
5543 static void si_disable_interrupts(struct radeon_device *rdev)
5544 {
5545 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
5546 	u32 ih_cntl = RREG32(IH_CNTL);
5547 
5548 	ih_rb_cntl &= ~IH_RB_ENABLE;
5549 	ih_cntl &= ~ENABLE_INTR;
5550 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5551 	WREG32(IH_CNTL, ih_cntl);
5552 	/* set rptr, wptr to 0 */
5553 	WREG32(IH_RB_RPTR, 0);
5554 	WREG32(IH_RB_WPTR, 0);
5555 	rdev->ih.enabled = false;
5556 	rdev->ih.rptr = 0;
5557 }
5558 
5559 static void si_disable_interrupt_state(struct radeon_device *rdev)
5560 {
5561 	u32 tmp;
5562 
5563 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
5564 	WREG32(CP_INT_CNTL_RING1, 0);
5565 	WREG32(CP_INT_CNTL_RING2, 0);
5566 	tmp = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5567 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, tmp);
5568 	tmp = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5569 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, tmp);
5570 	WREG32(GRBM_INT_CNTL, 0);
5571 	if (rdev->num_crtc >= 2) {
5572 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5573 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5574 	}
5575 	if (rdev->num_crtc >= 4) {
5576 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5577 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5578 	}
5579 	if (rdev->num_crtc >= 6) {
5580 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5581 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5582 	}
5583 
5584 	if (rdev->num_crtc >= 2) {
5585 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
5586 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
5587 	}
5588 	if (rdev->num_crtc >= 4) {
5589 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
5590 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
5591 	}
5592 	if (rdev->num_crtc >= 6) {
5593 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
5594 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
5595 	}
5596 
5597 	if (!ASIC_IS_NODCE(rdev)) {
5598 		WREG32(DACA_AUTODETECT_INT_CONTROL, 0);
5599 
5600 		tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5601 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5602 		tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5603 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5604 		tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5605 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5606 		tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5607 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5608 		tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5609 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5610 		tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
5611 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5612 	}
5613 }
5614 
5615 static int si_irq_init(struct radeon_device *rdev)
5616 {
5617 	int ret = 0;
5618 	int rb_bufsz;
5619 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
5620 
5621 	/* allocate ring */
5622 	ret = r600_ih_ring_alloc(rdev);
5623 	if (ret)
5624 		return ret;
5625 
5626 	/* disable irqs */
5627 	si_disable_interrupts(rdev);
5628 
5629 	/* init rlc */
5630 	ret = si_rlc_resume(rdev);
5631 	if (ret) {
5632 		r600_ih_ring_fini(rdev);
5633 		return ret;
5634 	}
5635 
5636 	/* setup interrupt control */
5637 	/* set dummy read address to ring address */
5638 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5639 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5640 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5641 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5642 	 */
5643 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5644 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5645 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5646 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5647 
5648 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5649 	rb_bufsz = order_base_2(rdev->ih.ring_size / 4);
5650 
5651 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5652 		      IH_WPTR_OVERFLOW_CLEAR |
5653 		      (rb_bufsz << 1));
5654 
5655 	if (rdev->wb.enabled)
5656 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5657 
5658 	/* set the writeback address whether it's enabled or not */
5659 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5660 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5661 
5662 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5663 
5664 	/* set rptr, wptr to 0 */
5665 	WREG32(IH_RB_RPTR, 0);
5666 	WREG32(IH_RB_WPTR, 0);
5667 
5668 	/* Default settings for IH_CNTL (disabled at first) */
5669 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5670 	/* RPTR_REARM only works if msi's are enabled */
5671 	if (rdev->msi_enabled)
5672 		ih_cntl |= RPTR_REARM;
5673 	WREG32(IH_CNTL, ih_cntl);
5674 
5675 	/* force the active interrupt state to all disabled */
5676 	si_disable_interrupt_state(rdev);
5677 
5678 	pci_set_master(rdev->pdev);
5679 
5680 	/* enable irqs */
5681 	si_enable_interrupts(rdev);
5682 
5683 	return ret;
5684 }
5685 
5686 int si_irq_set(struct radeon_device *rdev)
5687 {
5688 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE;
5689 	u32 cp_int_cntl1 = 0, cp_int_cntl2 = 0;
5690 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5691 	u32 hpd1 = 0, hpd2 = 0, hpd3 = 0, hpd4 = 0, hpd5 = 0, hpd6 = 0;
5692 	u32 grbm_int_cntl = 0;
5693 	u32 grph1 = 0, grph2 = 0, grph3 = 0, grph4 = 0, grph5 = 0, grph6 = 0;
5694 	u32 dma_cntl, dma_cntl1;
5695 	u32 thermal_int = 0;
5696 
5697 	if (!rdev->irq.installed) {
5698 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5699 		return -EINVAL;
5700 	}
5701 	/* don't enable anything if the ih is disabled */
5702 	if (!rdev->ih.enabled) {
5703 		si_disable_interrupts(rdev);
5704 		/* force the active interrupt state to all disabled */
5705 		si_disable_interrupt_state(rdev);
5706 		return 0;
5707 	}
5708 
5709 	if (!ASIC_IS_NODCE(rdev)) {
5710 		hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5711 		hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5712 		hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5713 		hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5714 		hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5715 		hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5716 	}
5717 
5718 	dma_cntl = RREG32(DMA_CNTL + DMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5719 	dma_cntl1 = RREG32(DMA_CNTL + DMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5720 
5721 	thermal_int = RREG32(CG_THERMAL_INT) &
5722 		~(THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW);
5723 
5724 	/* enable CP interrupts on all rings */
5725 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5726 		DRM_DEBUG("si_irq_set: sw int gfx\n");
5727 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5728 	}
5729 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5730 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5731 		cp_int_cntl1 |= TIME_STAMP_INT_ENABLE;
5732 	}
5733 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5734 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5735 		cp_int_cntl2 |= TIME_STAMP_INT_ENABLE;
5736 	}
5737 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5738 		DRM_DEBUG("si_irq_set: sw int dma\n");
5739 		dma_cntl |= TRAP_ENABLE;
5740 	}
5741 
5742 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5743 		DRM_DEBUG("si_irq_set: sw int dma1\n");
5744 		dma_cntl1 |= TRAP_ENABLE;
5745 	}
5746 	if (rdev->irq.crtc_vblank_int[0] ||
5747 	    atomic_read(&rdev->irq.pflip[0])) {
5748 		DRM_DEBUG("si_irq_set: vblank 0\n");
5749 		crtc1 |= VBLANK_INT_MASK;
5750 	}
5751 	if (rdev->irq.crtc_vblank_int[1] ||
5752 	    atomic_read(&rdev->irq.pflip[1])) {
5753 		DRM_DEBUG("si_irq_set: vblank 1\n");
5754 		crtc2 |= VBLANK_INT_MASK;
5755 	}
5756 	if (rdev->irq.crtc_vblank_int[2] ||
5757 	    atomic_read(&rdev->irq.pflip[2])) {
5758 		DRM_DEBUG("si_irq_set: vblank 2\n");
5759 		crtc3 |= VBLANK_INT_MASK;
5760 	}
5761 	if (rdev->irq.crtc_vblank_int[3] ||
5762 	    atomic_read(&rdev->irq.pflip[3])) {
5763 		DRM_DEBUG("si_irq_set: vblank 3\n");
5764 		crtc4 |= VBLANK_INT_MASK;
5765 	}
5766 	if (rdev->irq.crtc_vblank_int[4] ||
5767 	    atomic_read(&rdev->irq.pflip[4])) {
5768 		DRM_DEBUG("si_irq_set: vblank 4\n");
5769 		crtc5 |= VBLANK_INT_MASK;
5770 	}
5771 	if (rdev->irq.crtc_vblank_int[5] ||
5772 	    atomic_read(&rdev->irq.pflip[5])) {
5773 		DRM_DEBUG("si_irq_set: vblank 5\n");
5774 		crtc6 |= VBLANK_INT_MASK;
5775 	}
5776 	if (rdev->irq.hpd[0]) {
5777 		DRM_DEBUG("si_irq_set: hpd 1\n");
5778 		hpd1 |= DC_HPDx_INT_EN;
5779 	}
5780 	if (rdev->irq.hpd[1]) {
5781 		DRM_DEBUG("si_irq_set: hpd 2\n");
5782 		hpd2 |= DC_HPDx_INT_EN;
5783 	}
5784 	if (rdev->irq.hpd[2]) {
5785 		DRM_DEBUG("si_irq_set: hpd 3\n");
5786 		hpd3 |= DC_HPDx_INT_EN;
5787 	}
5788 	if (rdev->irq.hpd[3]) {
5789 		DRM_DEBUG("si_irq_set: hpd 4\n");
5790 		hpd4 |= DC_HPDx_INT_EN;
5791 	}
5792 	if (rdev->irq.hpd[4]) {
5793 		DRM_DEBUG("si_irq_set: hpd 5\n");
5794 		hpd5 |= DC_HPDx_INT_EN;
5795 	}
5796 	if (rdev->irq.hpd[5]) {
5797 		DRM_DEBUG("si_irq_set: hpd 6\n");
5798 		hpd6 |= DC_HPDx_INT_EN;
5799 	}
5800 
5801 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5802 	WREG32(CP_INT_CNTL_RING1, cp_int_cntl1);
5803 	WREG32(CP_INT_CNTL_RING2, cp_int_cntl2);
5804 
5805 	WREG32(DMA_CNTL + DMA0_REGISTER_OFFSET, dma_cntl);
5806 	WREG32(DMA_CNTL + DMA1_REGISTER_OFFSET, dma_cntl1);
5807 
5808 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5809 
5810 	if (rdev->irq.dpm_thermal) {
5811 		DRM_DEBUG("dpm thermal\n");
5812 		thermal_int |= THERM_INT_MASK_HIGH | THERM_INT_MASK_LOW;
5813 	}
5814 
5815 	if (rdev->num_crtc >= 2) {
5816 		WREG32(INT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5817 		WREG32(INT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5818 	}
5819 	if (rdev->num_crtc >= 4) {
5820 		WREG32(INT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5821 		WREG32(INT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5822 	}
5823 	if (rdev->num_crtc >= 6) {
5824 		WREG32(INT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5825 		WREG32(INT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5826 	}
5827 
5828 	if (rdev->num_crtc >= 2) {
5829 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, grph1);
5830 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, grph2);
5831 	}
5832 	if (rdev->num_crtc >= 4) {
5833 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, grph3);
5834 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, grph4);
5835 	}
5836 	if (rdev->num_crtc >= 6) {
5837 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, grph5);
5838 		WREG32(GRPH_INT_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, grph6);
5839 	}
5840 
5841 	if (!ASIC_IS_NODCE(rdev)) {
5842 		WREG32(DC_HPD1_INT_CONTROL, hpd1);
5843 		WREG32(DC_HPD2_INT_CONTROL, hpd2);
5844 		WREG32(DC_HPD3_INT_CONTROL, hpd3);
5845 		WREG32(DC_HPD4_INT_CONTROL, hpd4);
5846 		WREG32(DC_HPD5_INT_CONTROL, hpd5);
5847 		WREG32(DC_HPD6_INT_CONTROL, hpd6);
5848 	}
5849 
5850 	WREG32(CG_THERMAL_INT, thermal_int);
5851 
5852 	return 0;
5853 }
5854 
5855 static inline void si_irq_ack(struct radeon_device *rdev)
5856 {
5857 	u32 tmp;
5858 
5859 	if (ASIC_IS_NODCE(rdev))
5860 		return;
5861 
5862 	rdev->irq.stat_regs.evergreen.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5863 	rdev->irq.stat_regs.evergreen.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5864 	rdev->irq.stat_regs.evergreen.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5865 	rdev->irq.stat_regs.evergreen.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5866 	rdev->irq.stat_regs.evergreen.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5867 	rdev->irq.stat_regs.evergreen.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5868 	rdev->irq.stat_regs.evergreen.d1grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET);
5869 	rdev->irq.stat_regs.evergreen.d2grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET);
5870 	if (rdev->num_crtc >= 4) {
5871 		rdev->irq.stat_regs.evergreen.d3grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET);
5872 		rdev->irq.stat_regs.evergreen.d4grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET);
5873 	}
5874 	if (rdev->num_crtc >= 6) {
5875 		rdev->irq.stat_regs.evergreen.d5grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET);
5876 		rdev->irq.stat_regs.evergreen.d6grph_int = RREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET);
5877 	}
5878 
5879 	if (rdev->irq.stat_regs.evergreen.d1grph_int & GRPH_PFLIP_INT_OCCURRED)
5880 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5881 	if (rdev->irq.stat_regs.evergreen.d2grph_int & GRPH_PFLIP_INT_OCCURRED)
5882 		WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5883 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT)
5884 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5885 	if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT)
5886 		WREG32(VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5887 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5888 		WREG32(VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5889 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5890 		WREG32(VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5891 
5892 	if (rdev->num_crtc >= 4) {
5893 		if (rdev->irq.stat_regs.evergreen.d3grph_int & GRPH_PFLIP_INT_OCCURRED)
5894 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5895 		if (rdev->irq.stat_regs.evergreen.d4grph_int & GRPH_PFLIP_INT_OCCURRED)
5896 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5897 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5898 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5899 		if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5900 			WREG32(VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5901 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5902 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5903 		if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5904 			WREG32(VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5905 	}
5906 
5907 	if (rdev->num_crtc >= 6) {
5908 		if (rdev->irq.stat_regs.evergreen.d5grph_int & GRPH_PFLIP_INT_OCCURRED)
5909 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5910 		if (rdev->irq.stat_regs.evergreen.d6grph_int & GRPH_PFLIP_INT_OCCURRED)
5911 			WREG32(GRPH_INT_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, GRPH_PFLIP_INT_CLEAR);
5912 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5913 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5914 		if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5915 			WREG32(VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5916 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5917 			WREG32(VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5918 		if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5919 			WREG32(VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5920 	}
5921 
5922 	if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
5923 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5924 		tmp |= DC_HPDx_INT_ACK;
5925 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5926 	}
5927 	if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
5928 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5929 		tmp |= DC_HPDx_INT_ACK;
5930 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5931 	}
5932 	if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5933 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5934 		tmp |= DC_HPDx_INT_ACK;
5935 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5936 	}
5937 	if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5938 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5939 		tmp |= DC_HPDx_INT_ACK;
5940 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5941 	}
5942 	if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5943 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5944 		tmp |= DC_HPDx_INT_ACK;
5945 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5946 	}
5947 	if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5948 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5949 		tmp |= DC_HPDx_INT_ACK;
5950 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5951 	}
5952 }
5953 
5954 static void si_irq_disable(struct radeon_device *rdev)
5955 {
5956 	si_disable_interrupts(rdev);
5957 	/* Wait and acknowledge irq */
5958 	mdelay(1);
5959 	si_irq_ack(rdev);
5960 	si_disable_interrupt_state(rdev);
5961 }
5962 
5963 static void si_irq_suspend(struct radeon_device *rdev)
5964 {
5965 	si_irq_disable(rdev);
5966 	si_rlc_stop(rdev);
5967 }
5968 
5969 static void si_irq_fini(struct radeon_device *rdev)
5970 {
5971 	si_irq_suspend(rdev);
5972 	r600_ih_ring_fini(rdev);
5973 }
5974 
5975 static inline u32 si_get_ih_wptr(struct radeon_device *rdev)
5976 {
5977 	u32 wptr, tmp;
5978 
5979 	if (rdev->wb.enabled)
5980 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5981 	else
5982 		wptr = RREG32(IH_RB_WPTR);
5983 
5984 	if (wptr & RB_OVERFLOW) {
5985 		/* When a ring buffer overflow happen start parsing interrupt
5986 		 * from the last not overwritten vector (wptr + 16). Hopefully
5987 		 * this should allow us to catchup.
5988 		 */
5989 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5990 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5991 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5992 		tmp = RREG32(IH_RB_CNTL);
5993 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5994 		WREG32(IH_RB_CNTL, tmp);
5995 	}
5996 	return (wptr & rdev->ih.ptr_mask);
5997 }
5998 
5999 /*        SI IV Ring
6000  * Each IV ring entry is 128 bits:
6001  * [7:0]    - interrupt source id
6002  * [31:8]   - reserved
6003  * [59:32]  - interrupt source data
6004  * [63:60]  - reserved
6005  * [71:64]  - RINGID
6006  * [79:72]  - VMID
6007  * [127:80] - reserved
6008  */
6009 int si_irq_process(struct radeon_device *rdev)
6010 {
6011 	u32 wptr;
6012 	u32 rptr;
6013 	u32 src_id, src_data, ring_id;
6014 	u32 ring_index;
6015 	bool queue_hotplug = false;
6016 	bool queue_thermal = false;
6017 	u32 status, addr;
6018 
6019 	if (!rdev->ih.enabled || rdev->shutdown)
6020 		return IRQ_NONE;
6021 
6022 	wptr = si_get_ih_wptr(rdev);
6023 
6024 restart_ih:
6025 	/* is somebody else already processing irqs? */
6026 	if (atomic_xchg(&rdev->ih.lock, 1))
6027 		return IRQ_NONE;
6028 
6029 	rptr = rdev->ih.rptr;
6030 	DRM_DEBUG("si_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
6031 
6032 	/* Order reading of wptr vs. reading of IH ring data */
6033 	rmb();
6034 
6035 	/* display interrupts */
6036 	si_irq_ack(rdev);
6037 
6038 	while (rptr != wptr) {
6039 		/* wptr/rptr are in bytes! */
6040 		ring_index = rptr / 4;
6041 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
6042 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
6043 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
6044 
6045 		switch (src_id) {
6046 		case 1: /* D1 vblank/vline */
6047 			switch (src_data) {
6048 			case 0: /* D1 vblank */
6049 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VBLANK_INTERRUPT) {
6050 					if (rdev->irq.crtc_vblank_int[0]) {
6051 						drm_handle_vblank(rdev->ddev, 0);
6052 						rdev->pm.vblank_sync = true;
6053 						wake_up(&rdev->irq.vblank_queue);
6054 					}
6055 					if (atomic_read(&rdev->irq.pflip[0]))
6056 						radeon_crtc_handle_flip(rdev, 0);
6057 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
6058 					DRM_DEBUG("IH: D1 vblank\n");
6059 				}
6060 				break;
6061 			case 1: /* D1 vline */
6062 				if (rdev->irq.stat_regs.evergreen.disp_int & LB_D1_VLINE_INTERRUPT) {
6063 					rdev->irq.stat_regs.evergreen.disp_int &= ~LB_D1_VLINE_INTERRUPT;
6064 					DRM_DEBUG("IH: D1 vline\n");
6065 				}
6066 				break;
6067 			default:
6068 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6069 				break;
6070 			}
6071 			break;
6072 		case 2: /* D2 vblank/vline */
6073 			switch (src_data) {
6074 			case 0: /* D2 vblank */
6075 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
6076 					if (rdev->irq.crtc_vblank_int[1]) {
6077 						drm_handle_vblank(rdev->ddev, 1);
6078 						rdev->pm.vblank_sync = true;
6079 						wake_up(&rdev->irq.vblank_queue);
6080 					}
6081 					if (atomic_read(&rdev->irq.pflip[1]))
6082 						radeon_crtc_handle_flip(rdev, 1);
6083 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
6084 					DRM_DEBUG("IH: D2 vblank\n");
6085 				}
6086 				break;
6087 			case 1: /* D2 vline */
6088 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
6089 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
6090 					DRM_DEBUG("IH: D2 vline\n");
6091 				}
6092 				break;
6093 			default:
6094 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6095 				break;
6096 			}
6097 			break;
6098 		case 3: /* D3 vblank/vline */
6099 			switch (src_data) {
6100 			case 0: /* D3 vblank */
6101 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
6102 					if (rdev->irq.crtc_vblank_int[2]) {
6103 						drm_handle_vblank(rdev->ddev, 2);
6104 						rdev->pm.vblank_sync = true;
6105 						wake_up(&rdev->irq.vblank_queue);
6106 					}
6107 					if (atomic_read(&rdev->irq.pflip[2]))
6108 						radeon_crtc_handle_flip(rdev, 2);
6109 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
6110 					DRM_DEBUG("IH: D3 vblank\n");
6111 				}
6112 				break;
6113 			case 1: /* D3 vline */
6114 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
6115 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
6116 					DRM_DEBUG("IH: D3 vline\n");
6117 				}
6118 				break;
6119 			default:
6120 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6121 				break;
6122 			}
6123 			break;
6124 		case 4: /* D4 vblank/vline */
6125 			switch (src_data) {
6126 			case 0: /* D4 vblank */
6127 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
6128 					if (rdev->irq.crtc_vblank_int[3]) {
6129 						drm_handle_vblank(rdev->ddev, 3);
6130 						rdev->pm.vblank_sync = true;
6131 						wake_up(&rdev->irq.vblank_queue);
6132 					}
6133 					if (atomic_read(&rdev->irq.pflip[3]))
6134 						radeon_crtc_handle_flip(rdev, 3);
6135 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
6136 					DRM_DEBUG("IH: D4 vblank\n");
6137 				}
6138 				break;
6139 			case 1: /* D4 vline */
6140 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
6141 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
6142 					DRM_DEBUG("IH: D4 vline\n");
6143 				}
6144 				break;
6145 			default:
6146 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6147 				break;
6148 			}
6149 			break;
6150 		case 5: /* D5 vblank/vline */
6151 			switch (src_data) {
6152 			case 0: /* D5 vblank */
6153 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
6154 					if (rdev->irq.crtc_vblank_int[4]) {
6155 						drm_handle_vblank(rdev->ddev, 4);
6156 						rdev->pm.vblank_sync = true;
6157 						wake_up(&rdev->irq.vblank_queue);
6158 					}
6159 					if (atomic_read(&rdev->irq.pflip[4]))
6160 						radeon_crtc_handle_flip(rdev, 4);
6161 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
6162 					DRM_DEBUG("IH: D5 vblank\n");
6163 				}
6164 				break;
6165 			case 1: /* D5 vline */
6166 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
6167 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
6168 					DRM_DEBUG("IH: D5 vline\n");
6169 				}
6170 				break;
6171 			default:
6172 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6173 				break;
6174 			}
6175 			break;
6176 		case 6: /* D6 vblank/vline */
6177 			switch (src_data) {
6178 			case 0: /* D6 vblank */
6179 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
6180 					if (rdev->irq.crtc_vblank_int[5]) {
6181 						drm_handle_vblank(rdev->ddev, 5);
6182 						rdev->pm.vblank_sync = true;
6183 						wake_up(&rdev->irq.vblank_queue);
6184 					}
6185 					if (atomic_read(&rdev->irq.pflip[5]))
6186 						radeon_crtc_handle_flip(rdev, 5);
6187 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
6188 					DRM_DEBUG("IH: D6 vblank\n");
6189 				}
6190 				break;
6191 			case 1: /* D6 vline */
6192 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
6193 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
6194 					DRM_DEBUG("IH: D6 vline\n");
6195 				}
6196 				break;
6197 			default:
6198 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6199 				break;
6200 			}
6201 			break;
6202 		case 42: /* HPD hotplug */
6203 			switch (src_data) {
6204 			case 0:
6205 				if (rdev->irq.stat_regs.evergreen.disp_int & DC_HPD1_INTERRUPT) {
6206 					rdev->irq.stat_regs.evergreen.disp_int &= ~DC_HPD1_INTERRUPT;
6207 					queue_hotplug = true;
6208 					DRM_DEBUG("IH: HPD1\n");
6209 				}
6210 				break;
6211 			case 1:
6212 				if (rdev->irq.stat_regs.evergreen.disp_int_cont & DC_HPD2_INTERRUPT) {
6213 					rdev->irq.stat_regs.evergreen.disp_int_cont &= ~DC_HPD2_INTERRUPT;
6214 					queue_hotplug = true;
6215 					DRM_DEBUG("IH: HPD2\n");
6216 				}
6217 				break;
6218 			case 2:
6219 				if (rdev->irq.stat_regs.evergreen.disp_int_cont2 & DC_HPD3_INTERRUPT) {
6220 					rdev->irq.stat_regs.evergreen.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
6221 					queue_hotplug = true;
6222 					DRM_DEBUG("IH: HPD3\n");
6223 				}
6224 				break;
6225 			case 3:
6226 				if (rdev->irq.stat_regs.evergreen.disp_int_cont3 & DC_HPD4_INTERRUPT) {
6227 					rdev->irq.stat_regs.evergreen.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
6228 					queue_hotplug = true;
6229 					DRM_DEBUG("IH: HPD4\n");
6230 				}
6231 				break;
6232 			case 4:
6233 				if (rdev->irq.stat_regs.evergreen.disp_int_cont4 & DC_HPD5_INTERRUPT) {
6234 					rdev->irq.stat_regs.evergreen.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
6235 					queue_hotplug = true;
6236 					DRM_DEBUG("IH: HPD5\n");
6237 				}
6238 				break;
6239 			case 5:
6240 				if (rdev->irq.stat_regs.evergreen.disp_int_cont5 & DC_HPD6_INTERRUPT) {
6241 					rdev->irq.stat_regs.evergreen.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
6242 					queue_hotplug = true;
6243 					DRM_DEBUG("IH: HPD6\n");
6244 				}
6245 				break;
6246 			default:
6247 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6248 				break;
6249 			}
6250 			break;
6251 		case 146:
6252 		case 147:
6253 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
6254 			status = RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS);
6255 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
6256 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
6257 				addr);
6258 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
6259 				status);
6260 			si_vm_decode_fault(rdev, status, addr);
6261 			/* reset addr and status */
6262 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
6263 			break;
6264 		case 176: /* RINGID0 CP_INT */
6265 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6266 			break;
6267 		case 177: /* RINGID1 CP_INT */
6268 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6269 			break;
6270 		case 178: /* RINGID2 CP_INT */
6271 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6272 			break;
6273 		case 181: /* CP EOP event */
6274 			DRM_DEBUG("IH: CP EOP\n");
6275 			switch (ring_id) {
6276 			case 0:
6277 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
6278 				break;
6279 			case 1:
6280 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6281 				break;
6282 			case 2:
6283 				radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6284 				break;
6285 			}
6286 			break;
6287 		case 224: /* DMA trap event */
6288 			DRM_DEBUG("IH: DMA trap\n");
6289 			radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
6290 			break;
6291 		case 230: /* thermal low to high */
6292 			DRM_DEBUG("IH: thermal low to high\n");
6293 			rdev->pm.dpm.thermal.high_to_low = false;
6294 			queue_thermal = true;
6295 			break;
6296 		case 231: /* thermal high to low */
6297 			DRM_DEBUG("IH: thermal high to low\n");
6298 			rdev->pm.dpm.thermal.high_to_low = true;
6299 			queue_thermal = true;
6300 			break;
6301 		case 233: /* GUI IDLE */
6302 			DRM_DEBUG("IH: GUI idle\n");
6303 			break;
6304 		case 244: /* DMA trap event */
6305 			DRM_DEBUG("IH: DMA1 trap\n");
6306 			radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6307 			break;
6308 		default:
6309 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
6310 			break;
6311 		}
6312 
6313 		/* wptr/rptr are in bytes! */
6314 		rptr += 16;
6315 		rptr &= rdev->ih.ptr_mask;
6316 	}
6317 	if (queue_hotplug)
6318 		schedule_work(&rdev->hotplug_work);
6319 	if (queue_thermal && rdev->pm.dpm_enabled)
6320 		schedule_work(&rdev->pm.dpm.thermal.work);
6321 	rdev->ih.rptr = rptr;
6322 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
6323 	atomic_set(&rdev->ih.lock, 0);
6324 
6325 	/* make sure wptr hasn't changed while processing */
6326 	wptr = si_get_ih_wptr(rdev);
6327 	if (wptr != rptr)
6328 		goto restart_ih;
6329 
6330 	return IRQ_HANDLED;
6331 }
6332 
6333 /*
6334  * startup/shutdown callbacks
6335  */
6336 static int si_startup(struct radeon_device *rdev)
6337 {
6338 	struct radeon_ring *ring;
6339 	int r;
6340 
6341 	/* enable pcie gen2/3 link */
6342 	si_pcie_gen3_enable(rdev);
6343 	/* enable aspm */
6344 	si_program_aspm(rdev);
6345 
6346 	/* scratch needs to be initialized before MC */
6347 	r = r600_vram_scratch_init(rdev);
6348 	if (r)
6349 		return r;
6350 
6351 	si_mc_program(rdev);
6352 
6353 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
6354 	    !rdev->rlc_fw || !rdev->mc_fw) {
6355 		r = si_init_microcode(rdev);
6356 		if (r) {
6357 			DRM_ERROR("Failed to load firmware!\n");
6358 			return r;
6359 		}
6360 	}
6361 
6362 	r = si_mc_load_microcode(rdev);
6363 	if (r) {
6364 		DRM_ERROR("Failed to load MC firmware!\n");
6365 		return r;
6366 	}
6367 
6368 	r = si_pcie_gart_enable(rdev);
6369 	if (r)
6370 		return r;
6371 	si_gpu_init(rdev);
6372 
6373 	/* allocate rlc buffers */
6374 	if (rdev->family == CHIP_VERDE) {
6375 		rdev->rlc.reg_list = verde_rlc_save_restore_register_list;
6376 		rdev->rlc.reg_list_size =
6377 			(u32)ARRAY_SIZE(verde_rlc_save_restore_register_list);
6378 	}
6379 	rdev->rlc.cs_data = si_cs_data;
6380 	r = sumo_rlc_init(rdev);
6381 	if (r) {
6382 		DRM_ERROR("Failed to init rlc BOs!\n");
6383 		return r;
6384 	}
6385 
6386 	/* allocate wb buffer */
6387 	r = radeon_wb_init(rdev);
6388 	if (r)
6389 		return r;
6390 
6391 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
6392 	if (r) {
6393 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6394 		return r;
6395 	}
6396 
6397 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6398 	if (r) {
6399 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6400 		return r;
6401 	}
6402 
6403 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6404 	if (r) {
6405 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6406 		return r;
6407 	}
6408 
6409 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6410 	if (r) {
6411 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6412 		return r;
6413 	}
6414 
6415 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6416 	if (r) {
6417 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6418 		return r;
6419 	}
6420 
6421 	if (rdev->has_uvd) {
6422 		r = uvd_v2_2_resume(rdev);
6423 		if (!r) {
6424 			r = radeon_fence_driver_start_ring(rdev,
6425 							   R600_RING_TYPE_UVD_INDEX);
6426 			if (r)
6427 				dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6428 		}
6429 		if (r)
6430 			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6431 	}
6432 
6433 	/* Enable IRQ */
6434 	if (!rdev->irq.installed) {
6435 		r = radeon_irq_kms_init(rdev);
6436 		if (r)
6437 			return r;
6438 	}
6439 
6440 	r = si_irq_init(rdev);
6441 	if (r) {
6442 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6443 		radeon_irq_kms_fini(rdev);
6444 		return r;
6445 	}
6446 	si_irq_set(rdev);
6447 
6448 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6449 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6450 			     CP_RB0_RPTR, CP_RB0_WPTR,
6451 			     RADEON_CP_PACKET2);
6452 	if (r)
6453 		return r;
6454 
6455 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6456 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6457 			     CP_RB1_RPTR, CP_RB1_WPTR,
6458 			     RADEON_CP_PACKET2);
6459 	if (r)
6460 		return r;
6461 
6462 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6463 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6464 			     CP_RB2_RPTR, CP_RB2_WPTR,
6465 			     RADEON_CP_PACKET2);
6466 	if (r)
6467 		return r;
6468 
6469 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6470 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6471 			     DMA_RB_RPTR + DMA0_REGISTER_OFFSET,
6472 			     DMA_RB_WPTR + DMA0_REGISTER_OFFSET,
6473 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6474 	if (r)
6475 		return r;
6476 
6477 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6478 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6479 			     DMA_RB_RPTR + DMA1_REGISTER_OFFSET,
6480 			     DMA_RB_WPTR + DMA1_REGISTER_OFFSET,
6481 			     DMA_PACKET(DMA_PACKET_NOP, 0, 0, 0, 0));
6482 	if (r)
6483 		return r;
6484 
6485 	r = si_cp_load_microcode(rdev);
6486 	if (r)
6487 		return r;
6488 	r = si_cp_resume(rdev);
6489 	if (r)
6490 		return r;
6491 
6492 	r = cayman_dma_resume(rdev);
6493 	if (r)
6494 		return r;
6495 
6496 	if (rdev->has_uvd) {
6497 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6498 		if (ring->ring_size) {
6499 			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
6500 					     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6501 					     RADEON_CP_PACKET2);
6502 			if (!r)
6503 				r = uvd_v1_0_init(rdev);
6504 			if (r)
6505 				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6506 		}
6507 	}
6508 
6509 	r = radeon_ib_pool_init(rdev);
6510 	if (r) {
6511 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6512 		return r;
6513 	}
6514 
6515 	r = radeon_vm_manager_init(rdev);
6516 	if (r) {
6517 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6518 		return r;
6519 	}
6520 
6521 	r = dce6_audio_init(rdev);
6522 	if (r)
6523 		return r;
6524 
6525 	return 0;
6526 }
6527 
6528 int si_resume(struct radeon_device *rdev)
6529 {
6530 	int r;
6531 
6532 	/* Do not reset GPU before posting, on rv770 hw unlike on r500 hw,
6533 	 * posting will perform necessary task to bring back GPU into good
6534 	 * shape.
6535 	 */
6536 	/* post card */
6537 	atom_asic_init(rdev->mode_info.atom_context);
6538 
6539 	/* init golden registers */
6540 	si_init_golden_registers(rdev);
6541 
6542 	rdev->accel_working = true;
6543 	r = si_startup(rdev);
6544 	if (r) {
6545 		DRM_ERROR("si startup failed on resume\n");
6546 		rdev->accel_working = false;
6547 		return r;
6548 	}
6549 
6550 	return r;
6551 
6552 }
6553 
6554 int si_suspend(struct radeon_device *rdev)
6555 {
6556 	dce6_audio_fini(rdev);
6557 	radeon_vm_manager_fini(rdev);
6558 	si_cp_enable(rdev, false);
6559 	cayman_dma_stop(rdev);
6560 	if (rdev->has_uvd) {
6561 		uvd_v1_0_fini(rdev);
6562 		radeon_uvd_suspend(rdev);
6563 	}
6564 	si_fini_pg(rdev);
6565 	si_fini_cg(rdev);
6566 	si_irq_suspend(rdev);
6567 	radeon_wb_disable(rdev);
6568 	si_pcie_gart_disable(rdev);
6569 	return 0;
6570 }
6571 
6572 /* Plan is to move initialization in that function and use
6573  * helper function so that radeon_device_init pretty much
6574  * do nothing more than calling asic specific function. This
6575  * should also allow to remove a bunch of callback function
6576  * like vram_info.
6577  */
6578 int si_init(struct radeon_device *rdev)
6579 {
6580 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6581 	int r;
6582 
6583 	/* Read BIOS */
6584 	if (!radeon_get_bios(rdev)) {
6585 		if (ASIC_IS_AVIVO(rdev))
6586 			return -EINVAL;
6587 	}
6588 	/* Must be an ATOMBIOS */
6589 	if (!rdev->is_atom_bios) {
6590 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6591 		return -EINVAL;
6592 	}
6593 	r = radeon_atombios_init(rdev);
6594 	if (r)
6595 		return r;
6596 
6597 	/* Post card if necessary */
6598 	if (!radeon_card_posted(rdev)) {
6599 		if (!rdev->bios) {
6600 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6601 			return -EINVAL;
6602 		}
6603 		DRM_INFO("GPU not posted. posting now...\n");
6604 		atom_asic_init(rdev->mode_info.atom_context);
6605 	}
6606 	/* init golden registers */
6607 	si_init_golden_registers(rdev);
6608 	/* Initialize scratch registers */
6609 	si_scratch_init(rdev);
6610 	/* Initialize surface registers */
6611 	radeon_surface_init(rdev);
6612 	/* Initialize clocks */
6613 	radeon_get_clock_info(rdev->ddev);
6614 
6615 	/* Fence driver */
6616 	r = radeon_fence_driver_init(rdev);
6617 	if (r)
6618 		return r;
6619 
6620 	/* initialize memory controller */
6621 	r = si_mc_init(rdev);
6622 	if (r)
6623 		return r;
6624 	/* Memory manager */
6625 	r = radeon_bo_init(rdev);
6626 	if (r)
6627 		return r;
6628 
6629 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6630 	ring->ring_obj = NULL;
6631 	r600_ring_init(rdev, ring, 1024 * 1024);
6632 
6633 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6634 	ring->ring_obj = NULL;
6635 	r600_ring_init(rdev, ring, 1024 * 1024);
6636 
6637 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6638 	ring->ring_obj = NULL;
6639 	r600_ring_init(rdev, ring, 1024 * 1024);
6640 
6641 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6642 	ring->ring_obj = NULL;
6643 	r600_ring_init(rdev, ring, 64 * 1024);
6644 
6645 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6646 	ring->ring_obj = NULL;
6647 	r600_ring_init(rdev, ring, 64 * 1024);
6648 
6649 	if (rdev->has_uvd) {
6650 		r = radeon_uvd_init(rdev);
6651 		if (!r) {
6652 			ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6653 			ring->ring_obj = NULL;
6654 			r600_ring_init(rdev, ring, 4096);
6655 		}
6656 	}
6657 
6658 	rdev->ih.ring_obj = NULL;
6659 	r600_ih_ring_init(rdev, 64 * 1024);
6660 
6661 	r = r600_pcie_gart_init(rdev);
6662 	if (r)
6663 		return r;
6664 
6665 	rdev->accel_working = true;
6666 	r = si_startup(rdev);
6667 	if (r) {
6668 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6669 		si_cp_fini(rdev);
6670 		cayman_dma_fini(rdev);
6671 		si_irq_fini(rdev);
6672 		sumo_rlc_fini(rdev);
6673 		radeon_wb_fini(rdev);
6674 		radeon_ib_pool_fini(rdev);
6675 		radeon_vm_manager_fini(rdev);
6676 		radeon_irq_kms_fini(rdev);
6677 		si_pcie_gart_fini(rdev);
6678 		rdev->accel_working = false;
6679 	}
6680 
6681 	/* Don't start up if the MC ucode is missing.
6682 	 * The default clocks and voltages before the MC ucode
6683 	 * is loaded are not suffient for advanced operations.
6684 	 */
6685 	if (!rdev->mc_fw) {
6686 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6687 		return -EINVAL;
6688 	}
6689 
6690 	return 0;
6691 }
6692 
6693 void si_fini(struct radeon_device *rdev)
6694 {
6695 	si_cp_fini(rdev);
6696 	cayman_dma_fini(rdev);
6697 	si_fini_pg(rdev);
6698 	si_fini_cg(rdev);
6699 	si_irq_fini(rdev);
6700 	sumo_rlc_fini(rdev);
6701 	radeon_wb_fini(rdev);
6702 	radeon_vm_manager_fini(rdev);
6703 	radeon_ib_pool_fini(rdev);
6704 	radeon_irq_kms_fini(rdev);
6705 	if (rdev->has_uvd) {
6706 		uvd_v1_0_fini(rdev);
6707 		radeon_uvd_fini(rdev);
6708 	}
6709 	si_pcie_gart_fini(rdev);
6710 	r600_vram_scratch_fini(rdev);
6711 	radeon_gem_fini(rdev);
6712 	radeon_fence_driver_fini(rdev);
6713 	radeon_bo_fini(rdev);
6714 	radeon_atombios_fini(rdev);
6715 	kfree(rdev->bios);
6716 	rdev->bios = NULL;
6717 }
6718 
6719 /**
6720  * si_get_gpu_clock_counter - return GPU clock counter snapshot
6721  *
6722  * @rdev: radeon_device pointer
6723  *
6724  * Fetches a GPU clock counter snapshot (SI).
6725  * Returns the 64 bit clock counter snapshot.
6726  */
6727 uint64_t si_get_gpu_clock_counter(struct radeon_device *rdev)
6728 {
6729 	uint64_t clock;
6730 
6731 	mutex_lock(&rdev->gpu_clock_mutex);
6732 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6733 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6734 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6735 	mutex_unlock(&rdev->gpu_clock_mutex);
6736 	return clock;
6737 }
6738 
6739 int si_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6740 {
6741 	unsigned fb_div = 0, vclk_div = 0, dclk_div = 0;
6742 	int r;
6743 
6744 	/* bypass vclk and dclk with bclk */
6745 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6746 		VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
6747 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6748 
6749 	/* put PLL in bypass mode */
6750 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~UPLL_BYPASS_EN_MASK);
6751 
6752 	if (!vclk || !dclk) {
6753 		/* keep the Bypass mode, put PLL to sleep */
6754 		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6755 		return 0;
6756 	}
6757 
6758 	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 125000, 250000,
6759 					  16384, 0x03FFFFFF, 0, 128, 5,
6760 					  &fb_div, &vclk_div, &dclk_div);
6761 	if (r)
6762 		return r;
6763 
6764 	/* set RESET_ANTI_MUX to 0 */
6765 	WREG32_P(CG_UPLL_FUNC_CNTL_5, 0, ~RESET_ANTI_MUX_MASK);
6766 
6767 	/* set VCO_MODE to 1 */
6768 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_VCO_MODE_MASK, ~UPLL_VCO_MODE_MASK);
6769 
6770 	/* toggle UPLL_SLEEP to 1 then back to 0 */
6771 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
6772 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_SLEEP_MASK);
6773 
6774 	/* deassert UPLL_RESET */
6775 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6776 
6777 	mdelay(1);
6778 
6779 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6780 	if (r)
6781 		return r;
6782 
6783 	/* assert UPLL_RESET again */
6784 	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
6785 
6786 	/* disable spread spectrum. */
6787 	WREG32_P(CG_UPLL_SPREAD_SPECTRUM, 0, ~SSEN_MASK);
6788 
6789 	/* set feedback divider */
6790 	WREG32_P(CG_UPLL_FUNC_CNTL_3, UPLL_FB_DIV(fb_div), ~UPLL_FB_DIV_MASK);
6791 
6792 	/* set ref divider to 0 */
6793 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_REF_DIV_MASK);
6794 
6795 	if (fb_div < 307200)
6796 		WREG32_P(CG_UPLL_FUNC_CNTL_4, 0, ~UPLL_SPARE_ISPARE9);
6797 	else
6798 		WREG32_P(CG_UPLL_FUNC_CNTL_4, UPLL_SPARE_ISPARE9, ~UPLL_SPARE_ISPARE9);
6799 
6800 	/* set PDIV_A and PDIV_B */
6801 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6802 		UPLL_PDIV_A(vclk_div) | UPLL_PDIV_B(dclk_div),
6803 		~(UPLL_PDIV_A_MASK | UPLL_PDIV_B_MASK));
6804 
6805 	/* give the PLL some time to settle */
6806 	mdelay(15);
6807 
6808 	/* deassert PLL_RESET */
6809 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
6810 
6811 	mdelay(15);
6812 
6813 	/* switch from bypass mode to normal mode */
6814 	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
6815 
6816 	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
6817 	if (r)
6818 		return r;
6819 
6820 	/* switch VCLK and DCLK selection */
6821 	WREG32_P(CG_UPLL_FUNC_CNTL_2,
6822 		VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
6823 		~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
6824 
6825 	mdelay(100);
6826 
6827 	return 0;
6828 }
6829 
6830 static void si_pcie_gen3_enable(struct radeon_device *rdev)
6831 {
6832 	struct pci_dev *root = rdev->pdev->bus->self;
6833 	int bridge_pos, gpu_pos;
6834 	u32 speed_cntl, mask, current_data_rate;
6835 	int ret, i;
6836 	u16 tmp16;
6837 
6838 	if (radeon_pcie_gen2 == 0)
6839 		return;
6840 
6841 	if (rdev->flags & RADEON_IS_IGP)
6842 		return;
6843 
6844 	if (!(rdev->flags & RADEON_IS_PCIE))
6845 		return;
6846 
6847 	ret = drm_pcie_get_speed_cap_mask(rdev->ddev, &mask);
6848 	if (ret != 0)
6849 		return;
6850 
6851 	if (!(mask & (DRM_PCIE_SPEED_50 | DRM_PCIE_SPEED_80)))
6852 		return;
6853 
6854 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6855 	current_data_rate = (speed_cntl & LC_CURRENT_DATA_RATE_MASK) >>
6856 		LC_CURRENT_DATA_RATE_SHIFT;
6857 	if (mask & DRM_PCIE_SPEED_80) {
6858 		if (current_data_rate == 2) {
6859 			DRM_INFO("PCIE gen 3 link speeds already enabled\n");
6860 			return;
6861 		}
6862 		DRM_INFO("enabling PCIE gen 3 link speeds, disable with radeon.pcie_gen2=0\n");
6863 	} else if (mask & DRM_PCIE_SPEED_50) {
6864 		if (current_data_rate == 1) {
6865 			DRM_INFO("PCIE gen 2 link speeds already enabled\n");
6866 			return;
6867 		}
6868 		DRM_INFO("enabling PCIE gen 2 link speeds, disable with radeon.pcie_gen2=0\n");
6869 	}
6870 
6871 	bridge_pos = pci_pcie_cap(root);
6872 	if (!bridge_pos)
6873 		return;
6874 
6875 	gpu_pos = pci_pcie_cap(rdev->pdev);
6876 	if (!gpu_pos)
6877 		return;
6878 
6879 	if (mask & DRM_PCIE_SPEED_80) {
6880 		/* re-try equalization if gen3 is not already enabled */
6881 		if (current_data_rate != 2) {
6882 			u16 bridge_cfg, gpu_cfg;
6883 			u16 bridge_cfg2, gpu_cfg2;
6884 			u32 max_lw, current_lw, tmp;
6885 
6886 			pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6887 			pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6888 
6889 			tmp16 = bridge_cfg | PCI_EXP_LNKCTL_HAWD;
6890 			pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6891 
6892 			tmp16 = gpu_cfg | PCI_EXP_LNKCTL_HAWD;
6893 			pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6894 
6895 			tmp = RREG32_PCIE(PCIE_LC_STATUS1);
6896 			max_lw = (tmp & LC_DETECTED_LINK_WIDTH_MASK) >> LC_DETECTED_LINK_WIDTH_SHIFT;
6897 			current_lw = (tmp & LC_OPERATING_LINK_WIDTH_MASK) >> LC_OPERATING_LINK_WIDTH_SHIFT;
6898 
6899 			if (current_lw < max_lw) {
6900 				tmp = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
6901 				if (tmp & LC_RENEGOTIATION_SUPPORT) {
6902 					tmp &= ~(LC_LINK_WIDTH_MASK | LC_UPCONFIGURE_DIS);
6903 					tmp |= (max_lw << LC_LINK_WIDTH_SHIFT);
6904 					tmp |= LC_UPCONFIGURE_SUPPORT | LC_RENEGOTIATE_EN | LC_RECONFIG_NOW;
6905 					WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, tmp);
6906 				}
6907 			}
6908 
6909 			for (i = 0; i < 10; i++) {
6910 				/* check status */
6911 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_DEVSTA, &tmp16);
6912 				if (tmp16 & PCI_EXP_DEVSTA_TRPND)
6913 					break;
6914 
6915 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &bridge_cfg);
6916 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &gpu_cfg);
6917 
6918 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &bridge_cfg2);
6919 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &gpu_cfg2);
6920 
6921 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6922 				tmp |= LC_SET_QUIESCE;
6923 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6924 
6925 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6926 				tmp |= LC_REDO_EQ;
6927 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6928 
6929 				mdelay(100);
6930 
6931 				/* linkctl */
6932 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL, &tmp16);
6933 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6934 				tmp16 |= (bridge_cfg & PCI_EXP_LNKCTL_HAWD);
6935 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL, tmp16);
6936 
6937 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, &tmp16);
6938 				tmp16 &= ~PCI_EXP_LNKCTL_HAWD;
6939 				tmp16 |= (gpu_cfg & PCI_EXP_LNKCTL_HAWD);
6940 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL, tmp16);
6941 
6942 				/* linkctl2 */
6943 				pci_read_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, &tmp16);
6944 				tmp16 &= ~((1 << 4) | (7 << 9));
6945 				tmp16 |= (bridge_cfg2 & ((1 << 4) | (7 << 9)));
6946 				pci_write_config_word(root, bridge_pos + PCI_EXP_LNKCTL2, tmp16);
6947 
6948 				pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6949 				tmp16 &= ~((1 << 4) | (7 << 9));
6950 				tmp16 |= (gpu_cfg2 & ((1 << 4) | (7 << 9)));
6951 				pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6952 
6953 				tmp = RREG32_PCIE_PORT(PCIE_LC_CNTL4);
6954 				tmp &= ~LC_SET_QUIESCE;
6955 				WREG32_PCIE_PORT(PCIE_LC_CNTL4, tmp);
6956 			}
6957 		}
6958 	}
6959 
6960 	/* set the link speed */
6961 	speed_cntl |= LC_FORCE_EN_SW_SPEED_CHANGE | LC_FORCE_DIS_HW_SPEED_CHANGE;
6962 	speed_cntl &= ~LC_FORCE_DIS_SW_SPEED_CHANGE;
6963 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6964 
6965 	pci_read_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, &tmp16);
6966 	tmp16 &= ~0xf;
6967 	if (mask & DRM_PCIE_SPEED_80)
6968 		tmp16 |= 3; /* gen3 */
6969 	else if (mask & DRM_PCIE_SPEED_50)
6970 		tmp16 |= 2; /* gen2 */
6971 	else
6972 		tmp16 |= 1; /* gen1 */
6973 	pci_write_config_word(rdev->pdev, gpu_pos + PCI_EXP_LNKCTL2, tmp16);
6974 
6975 	speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6976 	speed_cntl |= LC_INITIATE_LINK_SPEED_CHANGE;
6977 	WREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL, speed_cntl);
6978 
6979 	for (i = 0; i < rdev->usec_timeout; i++) {
6980 		speed_cntl = RREG32_PCIE_PORT(PCIE_LC_SPEED_CNTL);
6981 		if ((speed_cntl & LC_INITIATE_LINK_SPEED_CHANGE) == 0)
6982 			break;
6983 		udelay(1);
6984 	}
6985 }
6986 
6987 static void si_program_aspm(struct radeon_device *rdev)
6988 {
6989 	u32 data, orig;
6990 	bool disable_l0s = false, disable_l1 = false, disable_plloff_in_l1 = false;
6991 	bool disable_clkreq = false;
6992 
6993 	if (radeon_aspm == 0)
6994 		return;
6995 
6996 	if (!(rdev->flags & RADEON_IS_PCIE))
6997 		return;
6998 
6999 	orig = data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7000 	data &= ~LC_XMIT_N_FTS_MASK;
7001 	data |= LC_XMIT_N_FTS(0x24) | LC_XMIT_N_FTS_OVERRIDE_EN;
7002 	if (orig != data)
7003 		WREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL, data);
7004 
7005 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL3);
7006 	data |= LC_GO_TO_RECOVERY;
7007 	if (orig != data)
7008 		WREG32_PCIE_PORT(PCIE_LC_CNTL3, data);
7009 
7010 	orig = data = RREG32_PCIE(PCIE_P_CNTL);
7011 	data |= P_IGNORE_EDB_ERR;
7012 	if (orig != data)
7013 		WREG32_PCIE(PCIE_P_CNTL, data);
7014 
7015 	orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7016 	data &= ~(LC_L0S_INACTIVITY_MASK | LC_L1_INACTIVITY_MASK);
7017 	data |= LC_PMI_TO_L1_DIS;
7018 	if (!disable_l0s)
7019 		data |= LC_L0S_INACTIVITY(7);
7020 
7021 	if (!disable_l1) {
7022 		data |= LC_L1_INACTIVITY(7);
7023 		data &= ~LC_PMI_TO_L1_DIS;
7024 		if (orig != data)
7025 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7026 
7027 		if (!disable_plloff_in_l1) {
7028 			bool clk_req_support;
7029 
7030 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7031 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7032 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7033 			if (orig != data)
7034 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7035 
7036 			orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7037 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7038 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7039 			if (orig != data)
7040 				WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7041 
7042 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7043 			data &= ~(PLL_POWER_STATE_IN_OFF_0_MASK | PLL_POWER_STATE_IN_TXS2_0_MASK);
7044 			data |= PLL_POWER_STATE_IN_OFF_0(7) | PLL_POWER_STATE_IN_TXS2_0(7);
7045 			if (orig != data)
7046 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7047 
7048 			orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7049 			data &= ~(PLL_POWER_STATE_IN_OFF_1_MASK | PLL_POWER_STATE_IN_TXS2_1_MASK);
7050 			data |= PLL_POWER_STATE_IN_OFF_1(7) | PLL_POWER_STATE_IN_TXS2_1(7);
7051 			if (orig != data)
7052 				WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7053 
7054 			if ((rdev->family != CHIP_OLAND) && (rdev->family != CHIP_HAINAN)) {
7055 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0);
7056 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7057 				if (orig != data)
7058 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_0, data);
7059 
7060 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1);
7061 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7062 				if (orig != data)
7063 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_1, data);
7064 
7065 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2);
7066 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7067 				if (orig != data)
7068 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_2, data);
7069 
7070 				orig = data = RREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3);
7071 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7072 				if (orig != data)
7073 					WREG32_PIF_PHY0(PB0_PIF_PWRDOWN_3, data);
7074 
7075 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0);
7076 				data &= ~PLL_RAMP_UP_TIME_0_MASK;
7077 				if (orig != data)
7078 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_0, data);
7079 
7080 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1);
7081 				data &= ~PLL_RAMP_UP_TIME_1_MASK;
7082 				if (orig != data)
7083 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_1, data);
7084 
7085 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2);
7086 				data &= ~PLL_RAMP_UP_TIME_2_MASK;
7087 				if (orig != data)
7088 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_2, data);
7089 
7090 				orig = data = RREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3);
7091 				data &= ~PLL_RAMP_UP_TIME_3_MASK;
7092 				if (orig != data)
7093 					WREG32_PIF_PHY1(PB1_PIF_PWRDOWN_3, data);
7094 			}
7095 			orig = data = RREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL);
7096 			data &= ~LC_DYN_LANES_PWR_STATE_MASK;
7097 			data |= LC_DYN_LANES_PWR_STATE(3);
7098 			if (orig != data)
7099 				WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, data);
7100 
7101 			orig = data = RREG32_PIF_PHY0(PB0_PIF_CNTL);
7102 			data &= ~LS2_EXIT_TIME_MASK;
7103 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7104 				data |= LS2_EXIT_TIME(5);
7105 			if (orig != data)
7106 				WREG32_PIF_PHY0(PB0_PIF_CNTL, data);
7107 
7108 			orig = data = RREG32_PIF_PHY1(PB1_PIF_CNTL);
7109 			data &= ~LS2_EXIT_TIME_MASK;
7110 			if ((rdev->family == CHIP_OLAND) || (rdev->family == CHIP_HAINAN))
7111 				data |= LS2_EXIT_TIME(5);
7112 			if (orig != data)
7113 				WREG32_PIF_PHY1(PB1_PIF_CNTL, data);
7114 
7115 			if (!disable_clkreq) {
7116 				struct pci_dev *root = rdev->pdev->bus->self;
7117 				u32 lnkcap;
7118 
7119 				clk_req_support = false;
7120 				pcie_capability_read_dword(root, PCI_EXP_LNKCAP, &lnkcap);
7121 				if (lnkcap & PCI_EXP_LNKCAP_CLKPM)
7122 					clk_req_support = true;
7123 			} else {
7124 				clk_req_support = false;
7125 			}
7126 
7127 			if (clk_req_support) {
7128 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL2);
7129 				data |= LC_ALLOW_PDWN_IN_L1 | LC_ALLOW_PDWN_IN_L23;
7130 				if (orig != data)
7131 					WREG32_PCIE_PORT(PCIE_LC_CNTL2, data);
7132 
7133 				orig = data = RREG32(THM_CLK_CNTL);
7134 				data &= ~(CMON_CLK_SEL_MASK | TMON_CLK_SEL_MASK);
7135 				data |= CMON_CLK_SEL(1) | TMON_CLK_SEL(1);
7136 				if (orig != data)
7137 					WREG32(THM_CLK_CNTL, data);
7138 
7139 				orig = data = RREG32(MISC_CLK_CNTL);
7140 				data &= ~(DEEP_SLEEP_CLK_SEL_MASK | ZCLK_SEL_MASK);
7141 				data |= DEEP_SLEEP_CLK_SEL(1) | ZCLK_SEL(1);
7142 				if (orig != data)
7143 					WREG32(MISC_CLK_CNTL, data);
7144 
7145 				orig = data = RREG32(CG_CLKPIN_CNTL);
7146 				data &= ~BCLK_AS_XCLK;
7147 				if (orig != data)
7148 					WREG32(CG_CLKPIN_CNTL, data);
7149 
7150 				orig = data = RREG32(CG_CLKPIN_CNTL_2);
7151 				data &= ~FORCE_BIF_REFCLK_EN;
7152 				if (orig != data)
7153 					WREG32(CG_CLKPIN_CNTL_2, data);
7154 
7155 				orig = data = RREG32(MPLL_BYPASSCLK_SEL);
7156 				data &= ~MPLL_CLKOUT_SEL_MASK;
7157 				data |= MPLL_CLKOUT_SEL(4);
7158 				if (orig != data)
7159 					WREG32(MPLL_BYPASSCLK_SEL, data);
7160 
7161 				orig = data = RREG32(SPLL_CNTL_MODE);
7162 				data &= ~SPLL_REFCLK_SEL_MASK;
7163 				if (orig != data)
7164 					WREG32(SPLL_CNTL_MODE, data);
7165 			}
7166 		}
7167 	} else {
7168 		if (orig != data)
7169 			WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7170 	}
7171 
7172 	orig = data = RREG32_PCIE(PCIE_CNTL2);
7173 	data |= SLV_MEM_LS_EN | MST_MEM_LS_EN | REPLAY_MEM_LS_EN;
7174 	if (orig != data)
7175 		WREG32_PCIE(PCIE_CNTL2, data);
7176 
7177 	if (!disable_l0s) {
7178 		data = RREG32_PCIE_PORT(PCIE_LC_N_FTS_CNTL);
7179 		if((data & LC_N_FTS_MASK) == LC_N_FTS_MASK) {
7180 			data = RREG32_PCIE(PCIE_LC_STATUS1);
7181 			if ((data & LC_REVERSE_XMIT) && (data & LC_REVERSE_RCVR)) {
7182 				orig = data = RREG32_PCIE_PORT(PCIE_LC_CNTL);
7183 				data &= ~LC_L0S_INACTIVITY_MASK;
7184 				if (orig != data)
7185 					WREG32_PCIE_PORT(PCIE_LC_CNTL, data);
7186 			}
7187 		}
7188 	}
7189 }
7190