xref: /linux/drivers/gpu/drm/radeon/cik.c (revision ff5599816711d2e67da2d7561fd36ac48debd433)
1 /*
2  * Copyright 2012 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * Authors: Alex Deucher
23  */
24 #include <linux/firmware.h>
25 #include <linux/platform_device.h>
26 #include <linux/slab.h>
27 #include <linux/module.h>
28 #include "drmP.h"
29 #include "radeon.h"
30 #include "radeon_asic.h"
31 #include "cikd.h"
32 #include "atom.h"
33 #include "cik_blit_shaders.h"
34 
35 /* GFX */
36 #define CIK_PFP_UCODE_SIZE 2144
37 #define CIK_ME_UCODE_SIZE 2144
38 #define CIK_CE_UCODE_SIZE 2144
39 /* compute */
40 #define CIK_MEC_UCODE_SIZE 4192
41 /* interrupts */
42 #define BONAIRE_RLC_UCODE_SIZE 2048
43 #define KB_RLC_UCODE_SIZE 2560
44 #define KV_RLC_UCODE_SIZE 2560
45 /* gddr controller */
46 #define CIK_MC_UCODE_SIZE 7866
47 /* sdma */
48 #define CIK_SDMA_UCODE_SIZE 1050
49 #define CIK_SDMA_UCODE_VERSION 64
50 
51 MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin");
52 MODULE_FIRMWARE("radeon/BONAIRE_me.bin");
53 MODULE_FIRMWARE("radeon/BONAIRE_ce.bin");
54 MODULE_FIRMWARE("radeon/BONAIRE_mec.bin");
55 MODULE_FIRMWARE("radeon/BONAIRE_mc.bin");
56 MODULE_FIRMWARE("radeon/BONAIRE_rlc.bin");
57 MODULE_FIRMWARE("radeon/BONAIRE_sdma.bin");
58 MODULE_FIRMWARE("radeon/KAVERI_pfp.bin");
59 MODULE_FIRMWARE("radeon/KAVERI_me.bin");
60 MODULE_FIRMWARE("radeon/KAVERI_ce.bin");
61 MODULE_FIRMWARE("radeon/KAVERI_mec.bin");
62 MODULE_FIRMWARE("radeon/KAVERI_rlc.bin");
63 MODULE_FIRMWARE("radeon/KAVERI_sdma.bin");
64 MODULE_FIRMWARE("radeon/KABINI_pfp.bin");
65 MODULE_FIRMWARE("radeon/KABINI_me.bin");
66 MODULE_FIRMWARE("radeon/KABINI_ce.bin");
67 MODULE_FIRMWARE("radeon/KABINI_mec.bin");
68 MODULE_FIRMWARE("radeon/KABINI_rlc.bin");
69 MODULE_FIRMWARE("radeon/KABINI_sdma.bin");
70 
71 extern int r600_ih_ring_alloc(struct radeon_device *rdev);
72 extern void r600_ih_ring_fini(struct radeon_device *rdev);
73 extern void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save);
74 extern void evergreen_mc_resume(struct radeon_device *rdev, struct evergreen_mc_save *save);
75 extern bool evergreen_is_display_hung(struct radeon_device *rdev);
76 extern void si_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc);
77 extern void si_rlc_fini(struct radeon_device *rdev);
78 extern int si_rlc_init(struct radeon_device *rdev);
79 static void cik_rlc_stop(struct radeon_device *rdev);
80 
81 /*
82  * Indirect registers accessor
83  */
84 u32 cik_pciep_rreg(struct radeon_device *rdev, u32 reg)
85 {
86 	u32 r;
87 
88 	WREG32(PCIE_INDEX, reg);
89 	(void)RREG32(PCIE_INDEX);
90 	r = RREG32(PCIE_DATA);
91 	return r;
92 }
93 
94 void cik_pciep_wreg(struct radeon_device *rdev, u32 reg, u32 v)
95 {
96 	WREG32(PCIE_INDEX, reg);
97 	(void)RREG32(PCIE_INDEX);
98 	WREG32(PCIE_DATA, v);
99 	(void)RREG32(PCIE_DATA);
100 }
101 
102 static const u32 bonaire_golden_spm_registers[] =
103 {
104 	0x30800, 0xe0ffffff, 0xe0000000
105 };
106 
107 static const u32 bonaire_golden_common_registers[] =
108 {
109 	0xc770, 0xffffffff, 0x00000800,
110 	0xc774, 0xffffffff, 0x00000800,
111 	0xc798, 0xffffffff, 0x00007fbf,
112 	0xc79c, 0xffffffff, 0x00007faf
113 };
114 
115 static const u32 bonaire_golden_registers[] =
116 {
117 	0x3354, 0x00000333, 0x00000333,
118 	0x3350, 0x000c0fc0, 0x00040200,
119 	0x9a10, 0x00010000, 0x00058208,
120 	0x3c000, 0xffff1fff, 0x00140000,
121 	0x3c200, 0xfdfc0fff, 0x00000100,
122 	0x3c234, 0x40000000, 0x40000200,
123 	0x9830, 0xffffffff, 0x00000000,
124 	0x9834, 0xf00fffff, 0x00000400,
125 	0x9838, 0x0002021c, 0x00020200,
126 	0xc78, 0x00000080, 0x00000000,
127 	0x5bb0, 0x000000f0, 0x00000070,
128 	0x5bc0, 0xf0311fff, 0x80300000,
129 	0x98f8, 0x73773777, 0x12010001,
130 	0x350c, 0x00810000, 0x408af000,
131 	0x7030, 0x31000111, 0x00000011,
132 	0x2f48, 0x73773777, 0x12010001,
133 	0x220c, 0x00007fb6, 0x0021a1b1,
134 	0x2210, 0x00007fb6, 0x002021b1,
135 	0x2180, 0x00007fb6, 0x00002191,
136 	0x2218, 0x00007fb6, 0x002121b1,
137 	0x221c, 0x00007fb6, 0x002021b1,
138 	0x21dc, 0x00007fb6, 0x00002191,
139 	0x21e0, 0x00007fb6, 0x00002191,
140 	0x3628, 0x0000003f, 0x0000000a,
141 	0x362c, 0x0000003f, 0x0000000a,
142 	0x2ae4, 0x00073ffe, 0x000022a2,
143 	0x240c, 0x000007ff, 0x00000000,
144 	0x8a14, 0xf000003f, 0x00000007,
145 	0x8bf0, 0x00002001, 0x00000001,
146 	0x8b24, 0xffffffff, 0x00ffffff,
147 	0x30a04, 0x0000ff0f, 0x00000000,
148 	0x28a4c, 0x07ffffff, 0x06000000,
149 	0x4d8, 0x00000fff, 0x00000100,
150 	0x3e78, 0x00000001, 0x00000002,
151 	0x9100, 0x03000000, 0x0362c688,
152 	0x8c00, 0x000000ff, 0x00000001,
153 	0xe40, 0x00001fff, 0x00001fff,
154 	0x9060, 0x0000007f, 0x00000020,
155 	0x9508, 0x00010000, 0x00010000,
156 	0xac14, 0x000003ff, 0x000000f3,
157 	0xac0c, 0xffffffff, 0x00001032
158 };
159 
160 static const u32 bonaire_mgcg_cgcg_init[] =
161 {
162 	0xc420, 0xffffffff, 0xfffffffc,
163 	0x30800, 0xffffffff, 0xe0000000,
164 	0x3c2a0, 0xffffffff, 0x00000100,
165 	0x3c208, 0xffffffff, 0x00000100,
166 	0x3c2c0, 0xffffffff, 0xc0000100,
167 	0x3c2c8, 0xffffffff, 0xc0000100,
168 	0x3c2c4, 0xffffffff, 0xc0000100,
169 	0x55e4, 0xffffffff, 0x00600100,
170 	0x3c280, 0xffffffff, 0x00000100,
171 	0x3c214, 0xffffffff, 0x06000100,
172 	0x3c220, 0xffffffff, 0x00000100,
173 	0x3c218, 0xffffffff, 0x06000100,
174 	0x3c204, 0xffffffff, 0x00000100,
175 	0x3c2e0, 0xffffffff, 0x00000100,
176 	0x3c224, 0xffffffff, 0x00000100,
177 	0x3c200, 0xffffffff, 0x00000100,
178 	0x3c230, 0xffffffff, 0x00000100,
179 	0x3c234, 0xffffffff, 0x00000100,
180 	0x3c250, 0xffffffff, 0x00000100,
181 	0x3c254, 0xffffffff, 0x00000100,
182 	0x3c258, 0xffffffff, 0x00000100,
183 	0x3c25c, 0xffffffff, 0x00000100,
184 	0x3c260, 0xffffffff, 0x00000100,
185 	0x3c27c, 0xffffffff, 0x00000100,
186 	0x3c278, 0xffffffff, 0x00000100,
187 	0x3c210, 0xffffffff, 0x06000100,
188 	0x3c290, 0xffffffff, 0x00000100,
189 	0x3c274, 0xffffffff, 0x00000100,
190 	0x3c2b4, 0xffffffff, 0x00000100,
191 	0x3c2b0, 0xffffffff, 0x00000100,
192 	0x3c270, 0xffffffff, 0x00000100,
193 	0x30800, 0xffffffff, 0xe0000000,
194 	0x3c020, 0xffffffff, 0x00010000,
195 	0x3c024, 0xffffffff, 0x00030002,
196 	0x3c028, 0xffffffff, 0x00040007,
197 	0x3c02c, 0xffffffff, 0x00060005,
198 	0x3c030, 0xffffffff, 0x00090008,
199 	0x3c034, 0xffffffff, 0x00010000,
200 	0x3c038, 0xffffffff, 0x00030002,
201 	0x3c03c, 0xffffffff, 0x00040007,
202 	0x3c040, 0xffffffff, 0x00060005,
203 	0x3c044, 0xffffffff, 0x00090008,
204 	0x3c048, 0xffffffff, 0x00010000,
205 	0x3c04c, 0xffffffff, 0x00030002,
206 	0x3c050, 0xffffffff, 0x00040007,
207 	0x3c054, 0xffffffff, 0x00060005,
208 	0x3c058, 0xffffffff, 0x00090008,
209 	0x3c05c, 0xffffffff, 0x00010000,
210 	0x3c060, 0xffffffff, 0x00030002,
211 	0x3c064, 0xffffffff, 0x00040007,
212 	0x3c068, 0xffffffff, 0x00060005,
213 	0x3c06c, 0xffffffff, 0x00090008,
214 	0x3c070, 0xffffffff, 0x00010000,
215 	0x3c074, 0xffffffff, 0x00030002,
216 	0x3c078, 0xffffffff, 0x00040007,
217 	0x3c07c, 0xffffffff, 0x00060005,
218 	0x3c080, 0xffffffff, 0x00090008,
219 	0x3c084, 0xffffffff, 0x00010000,
220 	0x3c088, 0xffffffff, 0x00030002,
221 	0x3c08c, 0xffffffff, 0x00040007,
222 	0x3c090, 0xffffffff, 0x00060005,
223 	0x3c094, 0xffffffff, 0x00090008,
224 	0x3c098, 0xffffffff, 0x00010000,
225 	0x3c09c, 0xffffffff, 0x00030002,
226 	0x3c0a0, 0xffffffff, 0x00040007,
227 	0x3c0a4, 0xffffffff, 0x00060005,
228 	0x3c0a8, 0xffffffff, 0x00090008,
229 	0x3c000, 0xffffffff, 0x96e00200,
230 	0x8708, 0xffffffff, 0x00900100,
231 	0xc424, 0xffffffff, 0x0020003f,
232 	0x38, 0xffffffff, 0x0140001c,
233 	0x3c, 0x000f0000, 0x000f0000,
234 	0x220, 0xffffffff, 0xC060000C,
235 	0x224, 0xc0000fff, 0x00000100,
236 	0xf90, 0xffffffff, 0x00000100,
237 	0xf98, 0x00000101, 0x00000000,
238 	0x20a8, 0xffffffff, 0x00000104,
239 	0x55e4, 0xff000fff, 0x00000100,
240 	0x30cc, 0xc0000fff, 0x00000104,
241 	0xc1e4, 0x00000001, 0x00000001,
242 	0xd00c, 0xff000ff0, 0x00000100,
243 	0xd80c, 0xff000ff0, 0x00000100
244 };
245 
246 static const u32 spectre_golden_spm_registers[] =
247 {
248 	0x30800, 0xe0ffffff, 0xe0000000
249 };
250 
251 static const u32 spectre_golden_common_registers[] =
252 {
253 	0xc770, 0xffffffff, 0x00000800,
254 	0xc774, 0xffffffff, 0x00000800,
255 	0xc798, 0xffffffff, 0x00007fbf,
256 	0xc79c, 0xffffffff, 0x00007faf
257 };
258 
259 static const u32 spectre_golden_registers[] =
260 {
261 	0x3c000, 0xffff1fff, 0x96940200,
262 	0x3c00c, 0xffff0001, 0xff000000,
263 	0x3c200, 0xfffc0fff, 0x00000100,
264 	0x6ed8, 0x00010101, 0x00010000,
265 	0x9834, 0xf00fffff, 0x00000400,
266 	0x9838, 0xfffffffc, 0x00020200,
267 	0x5bb0, 0x000000f0, 0x00000070,
268 	0x5bc0, 0xf0311fff, 0x80300000,
269 	0x98f8, 0x73773777, 0x12010001,
270 	0x9b7c, 0x00ff0000, 0x00fc0000,
271 	0x2f48, 0x73773777, 0x12010001,
272 	0x8a14, 0xf000003f, 0x00000007,
273 	0x8b24, 0xffffffff, 0x00ffffff,
274 	0x28350, 0x3f3f3fff, 0x00000082,
275 	0x28355, 0x0000003f, 0x00000000,
276 	0x3e78, 0x00000001, 0x00000002,
277 	0x913c, 0xffff03df, 0x00000004,
278 	0xc768, 0x00000008, 0x00000008,
279 	0x8c00, 0x000008ff, 0x00000800,
280 	0x9508, 0x00010000, 0x00010000,
281 	0xac0c, 0xffffffff, 0x54763210,
282 	0x214f8, 0x01ff01ff, 0x00000002,
283 	0x21498, 0x007ff800, 0x00200000,
284 	0x2015c, 0xffffffff, 0x00000f40,
285 	0x30934, 0xffffffff, 0x00000001
286 };
287 
288 static const u32 spectre_mgcg_cgcg_init[] =
289 {
290 	0xc420, 0xffffffff, 0xfffffffc,
291 	0x30800, 0xffffffff, 0xe0000000,
292 	0x3c2a0, 0xffffffff, 0x00000100,
293 	0x3c208, 0xffffffff, 0x00000100,
294 	0x3c2c0, 0xffffffff, 0x00000100,
295 	0x3c2c8, 0xffffffff, 0x00000100,
296 	0x3c2c4, 0xffffffff, 0x00000100,
297 	0x55e4, 0xffffffff, 0x00600100,
298 	0x3c280, 0xffffffff, 0x00000100,
299 	0x3c214, 0xffffffff, 0x06000100,
300 	0x3c220, 0xffffffff, 0x00000100,
301 	0x3c218, 0xffffffff, 0x06000100,
302 	0x3c204, 0xffffffff, 0x00000100,
303 	0x3c2e0, 0xffffffff, 0x00000100,
304 	0x3c224, 0xffffffff, 0x00000100,
305 	0x3c200, 0xffffffff, 0x00000100,
306 	0x3c230, 0xffffffff, 0x00000100,
307 	0x3c234, 0xffffffff, 0x00000100,
308 	0x3c250, 0xffffffff, 0x00000100,
309 	0x3c254, 0xffffffff, 0x00000100,
310 	0x3c258, 0xffffffff, 0x00000100,
311 	0x3c25c, 0xffffffff, 0x00000100,
312 	0x3c260, 0xffffffff, 0x00000100,
313 	0x3c27c, 0xffffffff, 0x00000100,
314 	0x3c278, 0xffffffff, 0x00000100,
315 	0x3c210, 0xffffffff, 0x06000100,
316 	0x3c290, 0xffffffff, 0x00000100,
317 	0x3c274, 0xffffffff, 0x00000100,
318 	0x3c2b4, 0xffffffff, 0x00000100,
319 	0x3c2b0, 0xffffffff, 0x00000100,
320 	0x3c270, 0xffffffff, 0x00000100,
321 	0x30800, 0xffffffff, 0xe0000000,
322 	0x3c020, 0xffffffff, 0x00010000,
323 	0x3c024, 0xffffffff, 0x00030002,
324 	0x3c028, 0xffffffff, 0x00040007,
325 	0x3c02c, 0xffffffff, 0x00060005,
326 	0x3c030, 0xffffffff, 0x00090008,
327 	0x3c034, 0xffffffff, 0x00010000,
328 	0x3c038, 0xffffffff, 0x00030002,
329 	0x3c03c, 0xffffffff, 0x00040007,
330 	0x3c040, 0xffffffff, 0x00060005,
331 	0x3c044, 0xffffffff, 0x00090008,
332 	0x3c048, 0xffffffff, 0x00010000,
333 	0x3c04c, 0xffffffff, 0x00030002,
334 	0x3c050, 0xffffffff, 0x00040007,
335 	0x3c054, 0xffffffff, 0x00060005,
336 	0x3c058, 0xffffffff, 0x00090008,
337 	0x3c05c, 0xffffffff, 0x00010000,
338 	0x3c060, 0xffffffff, 0x00030002,
339 	0x3c064, 0xffffffff, 0x00040007,
340 	0x3c068, 0xffffffff, 0x00060005,
341 	0x3c06c, 0xffffffff, 0x00090008,
342 	0x3c070, 0xffffffff, 0x00010000,
343 	0x3c074, 0xffffffff, 0x00030002,
344 	0x3c078, 0xffffffff, 0x00040007,
345 	0x3c07c, 0xffffffff, 0x00060005,
346 	0x3c080, 0xffffffff, 0x00090008,
347 	0x3c084, 0xffffffff, 0x00010000,
348 	0x3c088, 0xffffffff, 0x00030002,
349 	0x3c08c, 0xffffffff, 0x00040007,
350 	0x3c090, 0xffffffff, 0x00060005,
351 	0x3c094, 0xffffffff, 0x00090008,
352 	0x3c098, 0xffffffff, 0x00010000,
353 	0x3c09c, 0xffffffff, 0x00030002,
354 	0x3c0a0, 0xffffffff, 0x00040007,
355 	0x3c0a4, 0xffffffff, 0x00060005,
356 	0x3c0a8, 0xffffffff, 0x00090008,
357 	0x3c0ac, 0xffffffff, 0x00010000,
358 	0x3c0b0, 0xffffffff, 0x00030002,
359 	0x3c0b4, 0xffffffff, 0x00040007,
360 	0x3c0b8, 0xffffffff, 0x00060005,
361 	0x3c0bc, 0xffffffff, 0x00090008,
362 	0x3c000, 0xffffffff, 0x96e00200,
363 	0x8708, 0xffffffff, 0x00900100,
364 	0xc424, 0xffffffff, 0x0020003f,
365 	0x38, 0xffffffff, 0x0140001c,
366 	0x3c, 0x000f0000, 0x000f0000,
367 	0x220, 0xffffffff, 0xC060000C,
368 	0x224, 0xc0000fff, 0x00000100,
369 	0xf90, 0xffffffff, 0x00000100,
370 	0xf98, 0x00000101, 0x00000000,
371 	0x20a8, 0xffffffff, 0x00000104,
372 	0x55e4, 0xff000fff, 0x00000100,
373 	0x30cc, 0xc0000fff, 0x00000104,
374 	0xc1e4, 0x00000001, 0x00000001,
375 	0xd00c, 0xff000ff0, 0x00000100,
376 	0xd80c, 0xff000ff0, 0x00000100
377 };
378 
379 static const u32 kalindi_golden_spm_registers[] =
380 {
381 	0x30800, 0xe0ffffff, 0xe0000000
382 };
383 
384 static const u32 kalindi_golden_common_registers[] =
385 {
386 	0xc770, 0xffffffff, 0x00000800,
387 	0xc774, 0xffffffff, 0x00000800,
388 	0xc798, 0xffffffff, 0x00007fbf,
389 	0xc79c, 0xffffffff, 0x00007faf
390 };
391 
392 static const u32 kalindi_golden_registers[] =
393 {
394 	0x3c000, 0xffffdfff, 0x6e944040,
395 	0x55e4, 0xff607fff, 0xfc000100,
396 	0x3c220, 0xff000fff, 0x00000100,
397 	0x3c224, 0xff000fff, 0x00000100,
398 	0x3c200, 0xfffc0fff, 0x00000100,
399 	0x6ed8, 0x00010101, 0x00010000,
400 	0x9830, 0xffffffff, 0x00000000,
401 	0x9834, 0xf00fffff, 0x00000400,
402 	0x5bb0, 0x000000f0, 0x00000070,
403 	0x5bc0, 0xf0311fff, 0x80300000,
404 	0x98f8, 0x73773777, 0x12010001,
405 	0x98fc, 0xffffffff, 0x00000010,
406 	0x9b7c, 0x00ff0000, 0x00fc0000,
407 	0x8030, 0x00001f0f, 0x0000100a,
408 	0x2f48, 0x73773777, 0x12010001,
409 	0x2408, 0x000fffff, 0x000c007f,
410 	0x8a14, 0xf000003f, 0x00000007,
411 	0x8b24, 0x3fff3fff, 0x00ffcfff,
412 	0x30a04, 0x0000ff0f, 0x00000000,
413 	0x28a4c, 0x07ffffff, 0x06000000,
414 	0x4d8, 0x00000fff, 0x00000100,
415 	0x3e78, 0x00000001, 0x00000002,
416 	0xc768, 0x00000008, 0x00000008,
417 	0x8c00, 0x000000ff, 0x00000003,
418 	0x214f8, 0x01ff01ff, 0x00000002,
419 	0x21498, 0x007ff800, 0x00200000,
420 	0x2015c, 0xffffffff, 0x00000f40,
421 	0x88c4, 0x001f3ae3, 0x00000082,
422 	0x88d4, 0x0000001f, 0x00000010,
423 	0x30934, 0xffffffff, 0x00000000
424 };
425 
426 static const u32 kalindi_mgcg_cgcg_init[] =
427 {
428 	0xc420, 0xffffffff, 0xfffffffc,
429 	0x30800, 0xffffffff, 0xe0000000,
430 	0x3c2a0, 0xffffffff, 0x00000100,
431 	0x3c208, 0xffffffff, 0x00000100,
432 	0x3c2c0, 0xffffffff, 0x00000100,
433 	0x3c2c8, 0xffffffff, 0x00000100,
434 	0x3c2c4, 0xffffffff, 0x00000100,
435 	0x55e4, 0xffffffff, 0x00600100,
436 	0x3c280, 0xffffffff, 0x00000100,
437 	0x3c214, 0xffffffff, 0x06000100,
438 	0x3c220, 0xffffffff, 0x00000100,
439 	0x3c218, 0xffffffff, 0x06000100,
440 	0x3c204, 0xffffffff, 0x00000100,
441 	0x3c2e0, 0xffffffff, 0x00000100,
442 	0x3c224, 0xffffffff, 0x00000100,
443 	0x3c200, 0xffffffff, 0x00000100,
444 	0x3c230, 0xffffffff, 0x00000100,
445 	0x3c234, 0xffffffff, 0x00000100,
446 	0x3c250, 0xffffffff, 0x00000100,
447 	0x3c254, 0xffffffff, 0x00000100,
448 	0x3c258, 0xffffffff, 0x00000100,
449 	0x3c25c, 0xffffffff, 0x00000100,
450 	0x3c260, 0xffffffff, 0x00000100,
451 	0x3c27c, 0xffffffff, 0x00000100,
452 	0x3c278, 0xffffffff, 0x00000100,
453 	0x3c210, 0xffffffff, 0x06000100,
454 	0x3c290, 0xffffffff, 0x00000100,
455 	0x3c274, 0xffffffff, 0x00000100,
456 	0x3c2b4, 0xffffffff, 0x00000100,
457 	0x3c2b0, 0xffffffff, 0x00000100,
458 	0x3c270, 0xffffffff, 0x00000100,
459 	0x30800, 0xffffffff, 0xe0000000,
460 	0x3c020, 0xffffffff, 0x00010000,
461 	0x3c024, 0xffffffff, 0x00030002,
462 	0x3c028, 0xffffffff, 0x00040007,
463 	0x3c02c, 0xffffffff, 0x00060005,
464 	0x3c030, 0xffffffff, 0x00090008,
465 	0x3c034, 0xffffffff, 0x00010000,
466 	0x3c038, 0xffffffff, 0x00030002,
467 	0x3c03c, 0xffffffff, 0x00040007,
468 	0x3c040, 0xffffffff, 0x00060005,
469 	0x3c044, 0xffffffff, 0x00090008,
470 	0x3c000, 0xffffffff, 0x96e00200,
471 	0x8708, 0xffffffff, 0x00900100,
472 	0xc424, 0xffffffff, 0x0020003f,
473 	0x38, 0xffffffff, 0x0140001c,
474 	0x3c, 0x000f0000, 0x000f0000,
475 	0x220, 0xffffffff, 0xC060000C,
476 	0x224, 0xc0000fff, 0x00000100,
477 	0x20a8, 0xffffffff, 0x00000104,
478 	0x55e4, 0xff000fff, 0x00000100,
479 	0x30cc, 0xc0000fff, 0x00000104,
480 	0xc1e4, 0x00000001, 0x00000001,
481 	0xd00c, 0xff000ff0, 0x00000100,
482 	0xd80c, 0xff000ff0, 0x00000100
483 };
484 
485 static void cik_init_golden_registers(struct radeon_device *rdev)
486 {
487 	switch (rdev->family) {
488 	case CHIP_BONAIRE:
489 		radeon_program_register_sequence(rdev,
490 						 bonaire_mgcg_cgcg_init,
491 						 (const u32)ARRAY_SIZE(bonaire_mgcg_cgcg_init));
492 		radeon_program_register_sequence(rdev,
493 						 bonaire_golden_registers,
494 						 (const u32)ARRAY_SIZE(bonaire_golden_registers));
495 		radeon_program_register_sequence(rdev,
496 						 bonaire_golden_common_registers,
497 						 (const u32)ARRAY_SIZE(bonaire_golden_common_registers));
498 		radeon_program_register_sequence(rdev,
499 						 bonaire_golden_spm_registers,
500 						 (const u32)ARRAY_SIZE(bonaire_golden_spm_registers));
501 		break;
502 	case CHIP_KABINI:
503 		radeon_program_register_sequence(rdev,
504 						 kalindi_mgcg_cgcg_init,
505 						 (const u32)ARRAY_SIZE(kalindi_mgcg_cgcg_init));
506 		radeon_program_register_sequence(rdev,
507 						 kalindi_golden_registers,
508 						 (const u32)ARRAY_SIZE(kalindi_golden_registers));
509 		radeon_program_register_sequence(rdev,
510 						 kalindi_golden_common_registers,
511 						 (const u32)ARRAY_SIZE(kalindi_golden_common_registers));
512 		radeon_program_register_sequence(rdev,
513 						 kalindi_golden_spm_registers,
514 						 (const u32)ARRAY_SIZE(kalindi_golden_spm_registers));
515 		break;
516 	case CHIP_KAVERI:
517 		radeon_program_register_sequence(rdev,
518 						 spectre_mgcg_cgcg_init,
519 						 (const u32)ARRAY_SIZE(spectre_mgcg_cgcg_init));
520 		radeon_program_register_sequence(rdev,
521 						 spectre_golden_registers,
522 						 (const u32)ARRAY_SIZE(spectre_golden_registers));
523 		radeon_program_register_sequence(rdev,
524 						 spectre_golden_common_registers,
525 						 (const u32)ARRAY_SIZE(spectre_golden_common_registers));
526 		radeon_program_register_sequence(rdev,
527 						 spectre_golden_spm_registers,
528 						 (const u32)ARRAY_SIZE(spectre_golden_spm_registers));
529 		break;
530 	default:
531 		break;
532 	}
533 }
534 
535 /**
536  * cik_get_xclk - get the xclk
537  *
538  * @rdev: radeon_device pointer
539  *
540  * Returns the reference clock used by the gfx engine
541  * (CIK).
542  */
543 u32 cik_get_xclk(struct radeon_device *rdev)
544 {
545         u32 reference_clock = rdev->clock.spll.reference_freq;
546 
547 	if (rdev->flags & RADEON_IS_IGP) {
548 		if (RREG32_SMC(GENERAL_PWRMGT) & GPU_COUNTER_CLK)
549 			return reference_clock / 2;
550 	} else {
551 		if (RREG32_SMC(CG_CLKPIN_CNTL) & XTALIN_DIVIDE)
552 			return reference_clock / 4;
553 	}
554 	return reference_clock;
555 }
556 
557 /**
558  * cik_mm_rdoorbell - read a doorbell dword
559  *
560  * @rdev: radeon_device pointer
561  * @offset: byte offset into the aperture
562  *
563  * Returns the value in the doorbell aperture at the
564  * requested offset (CIK).
565  */
566 u32 cik_mm_rdoorbell(struct radeon_device *rdev, u32 offset)
567 {
568 	if (offset < rdev->doorbell.size) {
569 		return readl(((void __iomem *)rdev->doorbell.ptr) + offset);
570 	} else {
571 		DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", offset);
572 		return 0;
573 	}
574 }
575 
576 /**
577  * cik_mm_wdoorbell - write a doorbell dword
578  *
579  * @rdev: radeon_device pointer
580  * @offset: byte offset into the aperture
581  * @v: value to write
582  *
583  * Writes @v to the doorbell aperture at the
584  * requested offset (CIK).
585  */
586 void cik_mm_wdoorbell(struct radeon_device *rdev, u32 offset, u32 v)
587 {
588 	if (offset < rdev->doorbell.size) {
589 		writel(v, ((void __iomem *)rdev->doorbell.ptr) + offset);
590 	} else {
591 		DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", offset);
592 	}
593 }
594 
595 #define BONAIRE_IO_MC_REGS_SIZE 36
596 
597 static const u32 bonaire_io_mc_regs[BONAIRE_IO_MC_REGS_SIZE][2] =
598 {
599 	{0x00000070, 0x04400000},
600 	{0x00000071, 0x80c01803},
601 	{0x00000072, 0x00004004},
602 	{0x00000073, 0x00000100},
603 	{0x00000074, 0x00ff0000},
604 	{0x00000075, 0x34000000},
605 	{0x00000076, 0x08000014},
606 	{0x00000077, 0x00cc08ec},
607 	{0x00000078, 0x00000400},
608 	{0x00000079, 0x00000000},
609 	{0x0000007a, 0x04090000},
610 	{0x0000007c, 0x00000000},
611 	{0x0000007e, 0x4408a8e8},
612 	{0x0000007f, 0x00000304},
613 	{0x00000080, 0x00000000},
614 	{0x00000082, 0x00000001},
615 	{0x00000083, 0x00000002},
616 	{0x00000084, 0xf3e4f400},
617 	{0x00000085, 0x052024e3},
618 	{0x00000087, 0x00000000},
619 	{0x00000088, 0x01000000},
620 	{0x0000008a, 0x1c0a0000},
621 	{0x0000008b, 0xff010000},
622 	{0x0000008d, 0xffffefff},
623 	{0x0000008e, 0xfff3efff},
624 	{0x0000008f, 0xfff3efbf},
625 	{0x00000092, 0xf7ffffff},
626 	{0x00000093, 0xffffff7f},
627 	{0x00000095, 0x00101101},
628 	{0x00000096, 0x00000fff},
629 	{0x00000097, 0x00116fff},
630 	{0x00000098, 0x60010000},
631 	{0x00000099, 0x10010000},
632 	{0x0000009a, 0x00006000},
633 	{0x0000009b, 0x00001000},
634 	{0x0000009f, 0x00b48000}
635 };
636 
637 /**
638  * cik_srbm_select - select specific register instances
639  *
640  * @rdev: radeon_device pointer
641  * @me: selected ME (micro engine)
642  * @pipe: pipe
643  * @queue: queue
644  * @vmid: VMID
645  *
646  * Switches the currently active registers instances.  Some
647  * registers are instanced per VMID, others are instanced per
648  * me/pipe/queue combination.
649  */
650 static void cik_srbm_select(struct radeon_device *rdev,
651 			    u32 me, u32 pipe, u32 queue, u32 vmid)
652 {
653 	u32 srbm_gfx_cntl = (PIPEID(pipe & 0x3) |
654 			     MEID(me & 0x3) |
655 			     VMID(vmid & 0xf) |
656 			     QUEUEID(queue & 0x7));
657 	WREG32(SRBM_GFX_CNTL, srbm_gfx_cntl);
658 }
659 
660 /* ucode loading */
661 /**
662  * ci_mc_load_microcode - load MC ucode into the hw
663  *
664  * @rdev: radeon_device pointer
665  *
666  * Load the GDDR MC ucode into the hw (CIK).
667  * Returns 0 on success, error on failure.
668  */
669 static int ci_mc_load_microcode(struct radeon_device *rdev)
670 {
671 	const __be32 *fw_data;
672 	u32 running, blackout = 0;
673 	u32 *io_mc_regs;
674 	int i, ucode_size, regs_size;
675 
676 	if (!rdev->mc_fw)
677 		return -EINVAL;
678 
679 	switch (rdev->family) {
680 	case CHIP_BONAIRE:
681 	default:
682 		io_mc_regs = (u32 *)&bonaire_io_mc_regs;
683 		ucode_size = CIK_MC_UCODE_SIZE;
684 		regs_size = BONAIRE_IO_MC_REGS_SIZE;
685 		break;
686 	}
687 
688 	running = RREG32(MC_SEQ_SUP_CNTL) & RUN_MASK;
689 
690 	if (running == 0) {
691 		if (running) {
692 			blackout = RREG32(MC_SHARED_BLACKOUT_CNTL);
693 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout | 1);
694 		}
695 
696 		/* reset the engine and set to writable */
697 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
698 		WREG32(MC_SEQ_SUP_CNTL, 0x00000010);
699 
700 		/* load mc io regs */
701 		for (i = 0; i < regs_size; i++) {
702 			WREG32(MC_SEQ_IO_DEBUG_INDEX, io_mc_regs[(i << 1)]);
703 			WREG32(MC_SEQ_IO_DEBUG_DATA, io_mc_regs[(i << 1) + 1]);
704 		}
705 		/* load the MC ucode */
706 		fw_data = (const __be32 *)rdev->mc_fw->data;
707 		for (i = 0; i < ucode_size; i++)
708 			WREG32(MC_SEQ_SUP_PGM, be32_to_cpup(fw_data++));
709 
710 		/* put the engine back into the active state */
711 		WREG32(MC_SEQ_SUP_CNTL, 0x00000008);
712 		WREG32(MC_SEQ_SUP_CNTL, 0x00000004);
713 		WREG32(MC_SEQ_SUP_CNTL, 0x00000001);
714 
715 		/* wait for training to complete */
716 		for (i = 0; i < rdev->usec_timeout; i++) {
717 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D0)
718 				break;
719 			udelay(1);
720 		}
721 		for (i = 0; i < rdev->usec_timeout; i++) {
722 			if (RREG32(MC_SEQ_TRAIN_WAKEUP_CNTL) & TRAIN_DONE_D1)
723 				break;
724 			udelay(1);
725 		}
726 
727 		if (running)
728 			WREG32(MC_SHARED_BLACKOUT_CNTL, blackout);
729 	}
730 
731 	return 0;
732 }
733 
734 /**
735  * cik_init_microcode - load ucode images from disk
736  *
737  * @rdev: radeon_device pointer
738  *
739  * Use the firmware interface to load the ucode images into
740  * the driver (not loaded into hw).
741  * Returns 0 on success, error on failure.
742  */
743 static int cik_init_microcode(struct radeon_device *rdev)
744 {
745 	struct platform_device *pdev;
746 	const char *chip_name;
747 	size_t pfp_req_size, me_req_size, ce_req_size,
748 		mec_req_size, rlc_req_size, mc_req_size,
749 		sdma_req_size;
750 	char fw_name[30];
751 	int err;
752 
753 	DRM_DEBUG("\n");
754 
755 	pdev = platform_device_register_simple("radeon_cp", 0, NULL, 0);
756 	err = IS_ERR(pdev);
757 	if (err) {
758 		printk(KERN_ERR "radeon_cp: Failed to register firmware\n");
759 		return -EINVAL;
760 	}
761 
762 	switch (rdev->family) {
763 	case CHIP_BONAIRE:
764 		chip_name = "BONAIRE";
765 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
766 		me_req_size = CIK_ME_UCODE_SIZE * 4;
767 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
768 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
769 		rlc_req_size = BONAIRE_RLC_UCODE_SIZE * 4;
770 		mc_req_size = CIK_MC_UCODE_SIZE * 4;
771 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
772 		break;
773 	case CHIP_KAVERI:
774 		chip_name = "KAVERI";
775 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
776 		me_req_size = CIK_ME_UCODE_SIZE * 4;
777 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
778 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
779 		rlc_req_size = KV_RLC_UCODE_SIZE * 4;
780 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
781 		break;
782 	case CHIP_KABINI:
783 		chip_name = "KABINI";
784 		pfp_req_size = CIK_PFP_UCODE_SIZE * 4;
785 		me_req_size = CIK_ME_UCODE_SIZE * 4;
786 		ce_req_size = CIK_CE_UCODE_SIZE * 4;
787 		mec_req_size = CIK_MEC_UCODE_SIZE * 4;
788 		rlc_req_size = KB_RLC_UCODE_SIZE * 4;
789 		sdma_req_size = CIK_SDMA_UCODE_SIZE * 4;
790 		break;
791 	default: BUG();
792 	}
793 
794 	DRM_INFO("Loading %s Microcode\n", chip_name);
795 
796 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_pfp.bin", chip_name);
797 	err = request_firmware(&rdev->pfp_fw, fw_name, &pdev->dev);
798 	if (err)
799 		goto out;
800 	if (rdev->pfp_fw->size != pfp_req_size) {
801 		printk(KERN_ERR
802 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
803 		       rdev->pfp_fw->size, fw_name);
804 		err = -EINVAL;
805 		goto out;
806 	}
807 
808 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_me.bin", chip_name);
809 	err = request_firmware(&rdev->me_fw, fw_name, &pdev->dev);
810 	if (err)
811 		goto out;
812 	if (rdev->me_fw->size != me_req_size) {
813 		printk(KERN_ERR
814 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
815 		       rdev->me_fw->size, fw_name);
816 		err = -EINVAL;
817 	}
818 
819 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_ce.bin", chip_name);
820 	err = request_firmware(&rdev->ce_fw, fw_name, &pdev->dev);
821 	if (err)
822 		goto out;
823 	if (rdev->ce_fw->size != ce_req_size) {
824 		printk(KERN_ERR
825 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
826 		       rdev->ce_fw->size, fw_name);
827 		err = -EINVAL;
828 	}
829 
830 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_mec.bin", chip_name);
831 	err = request_firmware(&rdev->mec_fw, fw_name, &pdev->dev);
832 	if (err)
833 		goto out;
834 	if (rdev->mec_fw->size != mec_req_size) {
835 		printk(KERN_ERR
836 		       "cik_cp: Bogus length %zu in firmware \"%s\"\n",
837 		       rdev->mec_fw->size, fw_name);
838 		err = -EINVAL;
839 	}
840 
841 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_rlc.bin", chip_name);
842 	err = request_firmware(&rdev->rlc_fw, fw_name, &pdev->dev);
843 	if (err)
844 		goto out;
845 	if (rdev->rlc_fw->size != rlc_req_size) {
846 		printk(KERN_ERR
847 		       "cik_rlc: Bogus length %zu in firmware \"%s\"\n",
848 		       rdev->rlc_fw->size, fw_name);
849 		err = -EINVAL;
850 	}
851 
852 	snprintf(fw_name, sizeof(fw_name), "radeon/%s_sdma.bin", chip_name);
853 	err = request_firmware(&rdev->sdma_fw, fw_name, &pdev->dev);
854 	if (err)
855 		goto out;
856 	if (rdev->sdma_fw->size != sdma_req_size) {
857 		printk(KERN_ERR
858 		       "cik_sdma: Bogus length %zu in firmware \"%s\"\n",
859 		       rdev->sdma_fw->size, fw_name);
860 		err = -EINVAL;
861 	}
862 
863 	/* No MC ucode on APUs */
864 	if (!(rdev->flags & RADEON_IS_IGP)) {
865 		snprintf(fw_name, sizeof(fw_name), "radeon/%s_mc.bin", chip_name);
866 		err = request_firmware(&rdev->mc_fw, fw_name, &pdev->dev);
867 		if (err)
868 			goto out;
869 		if (rdev->mc_fw->size != mc_req_size) {
870 			printk(KERN_ERR
871 			       "cik_mc: Bogus length %zu in firmware \"%s\"\n",
872 			       rdev->mc_fw->size, fw_name);
873 			err = -EINVAL;
874 		}
875 	}
876 
877 out:
878 	platform_device_unregister(pdev);
879 
880 	if (err) {
881 		if (err != -EINVAL)
882 			printk(KERN_ERR
883 			       "cik_cp: Failed to load firmware \"%s\"\n",
884 			       fw_name);
885 		release_firmware(rdev->pfp_fw);
886 		rdev->pfp_fw = NULL;
887 		release_firmware(rdev->me_fw);
888 		rdev->me_fw = NULL;
889 		release_firmware(rdev->ce_fw);
890 		rdev->ce_fw = NULL;
891 		release_firmware(rdev->rlc_fw);
892 		rdev->rlc_fw = NULL;
893 		release_firmware(rdev->mc_fw);
894 		rdev->mc_fw = NULL;
895 	}
896 	return err;
897 }
898 
899 /*
900  * Core functions
901  */
902 /**
903  * cik_tiling_mode_table_init - init the hw tiling table
904  *
905  * @rdev: radeon_device pointer
906  *
907  * Starting with SI, the tiling setup is done globally in a
908  * set of 32 tiling modes.  Rather than selecting each set of
909  * parameters per surface as on older asics, we just select
910  * which index in the tiling table we want to use, and the
911  * surface uses those parameters (CIK).
912  */
913 static void cik_tiling_mode_table_init(struct radeon_device *rdev)
914 {
915 	const u32 num_tile_mode_states = 32;
916 	const u32 num_secondary_tile_mode_states = 16;
917 	u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
918 	u32 num_pipe_configs;
919 	u32 num_rbs = rdev->config.cik.max_backends_per_se *
920 		rdev->config.cik.max_shader_engines;
921 
922 	switch (rdev->config.cik.mem_row_size_in_kb) {
923 	case 1:
924 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_1KB;
925 		break;
926 	case 2:
927 	default:
928 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_2KB;
929 		break;
930 	case 4:
931 		split_equal_to_row_size = ADDR_SURF_TILE_SPLIT_4KB;
932 		break;
933 	}
934 
935 	num_pipe_configs = rdev->config.cik.max_tile_pipes;
936 	if (num_pipe_configs > 8)
937 		num_pipe_configs = 8; /* ??? */
938 
939 	if (num_pipe_configs == 8) {
940 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
941 			switch (reg_offset) {
942 			case 0:
943 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
944 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
945 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
946 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
947 				break;
948 			case 1:
949 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
950 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
951 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
952 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
953 				break;
954 			case 2:
955 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
956 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
957 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
958 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
959 				break;
960 			case 3:
961 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
962 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
963 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
964 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
965 				break;
966 			case 4:
967 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
968 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
969 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
970 						 TILE_SPLIT(split_equal_to_row_size));
971 				break;
972 			case 5:
973 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
974 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
975 				break;
976 			case 6:
977 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
978 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
979 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
980 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
981 				break;
982 			case 7:
983 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
984 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
985 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
986 						 TILE_SPLIT(split_equal_to_row_size));
987 				break;
988 			case 8:
989 				gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
990 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16));
991 				break;
992 			case 9:
993 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
994 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
995 				break;
996 			case 10:
997 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
998 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
999 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1000 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1001 				break;
1002 			case 11:
1003 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1004 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1005 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1006 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1007 				break;
1008 			case 12:
1009 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1010 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1011 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1012 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1013 				break;
1014 			case 13:
1015 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1016 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1017 				break;
1018 			case 14:
1019 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1020 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1021 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1022 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1023 				break;
1024 			case 16:
1025 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1026 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1027 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1028 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1029 				break;
1030 			case 17:
1031 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1032 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1033 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1034 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1035 				break;
1036 			case 27:
1037 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1038 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1039 				break;
1040 			case 28:
1041 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1042 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1043 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1044 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1045 				break;
1046 			case 29:
1047 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1048 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1049 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_8x16) |
1050 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1051 				break;
1052 			case 30:
1053 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1054 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1055 						 PIPE_CONFIG(ADDR_SURF_P8_32x32_16x16) |
1056 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1057 				break;
1058 			default:
1059 				gb_tile_moden = 0;
1060 				break;
1061 			}
1062 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1063 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1064 		}
1065 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1066 			switch (reg_offset) {
1067 			case 0:
1068 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1069 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1070 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1071 						 NUM_BANKS(ADDR_SURF_16_BANK));
1072 				break;
1073 			case 1:
1074 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1075 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1076 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1077 						 NUM_BANKS(ADDR_SURF_16_BANK));
1078 				break;
1079 			case 2:
1080 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1081 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1082 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1083 						 NUM_BANKS(ADDR_SURF_16_BANK));
1084 				break;
1085 			case 3:
1086 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1087 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1088 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1089 						 NUM_BANKS(ADDR_SURF_16_BANK));
1090 				break;
1091 			case 4:
1092 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1093 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1094 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1095 						 NUM_BANKS(ADDR_SURF_8_BANK));
1096 				break;
1097 			case 5:
1098 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1099 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1100 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1101 						 NUM_BANKS(ADDR_SURF_4_BANK));
1102 				break;
1103 			case 6:
1104 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1105 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1106 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1107 						 NUM_BANKS(ADDR_SURF_2_BANK));
1108 				break;
1109 			case 8:
1110 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1111 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1112 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1113 						 NUM_BANKS(ADDR_SURF_16_BANK));
1114 				break;
1115 			case 9:
1116 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1117 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1118 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1119 						 NUM_BANKS(ADDR_SURF_16_BANK));
1120 				break;
1121 			case 10:
1122 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1123 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1124 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1125 						 NUM_BANKS(ADDR_SURF_16_BANK));
1126 				break;
1127 			case 11:
1128 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1129 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1130 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1131 						 NUM_BANKS(ADDR_SURF_16_BANK));
1132 				break;
1133 			case 12:
1134 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1135 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1136 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1137 						 NUM_BANKS(ADDR_SURF_8_BANK));
1138 				break;
1139 			case 13:
1140 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1141 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1142 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1143 						 NUM_BANKS(ADDR_SURF_4_BANK));
1144 				break;
1145 			case 14:
1146 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1147 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1148 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1149 						 NUM_BANKS(ADDR_SURF_2_BANK));
1150 				break;
1151 			default:
1152 				gb_tile_moden = 0;
1153 				break;
1154 			}
1155 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1156 		}
1157 	} else if (num_pipe_configs == 4) {
1158 		if (num_rbs == 4) {
1159 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1160 				switch (reg_offset) {
1161 				case 0:
1162 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1163 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1164 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1165 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1166 					break;
1167 				case 1:
1168 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1169 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1170 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1171 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1172 					break;
1173 				case 2:
1174 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1175 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1176 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1177 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1178 					break;
1179 				case 3:
1180 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1181 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1182 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1183 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1184 					break;
1185 				case 4:
1186 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1187 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1188 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1189 							 TILE_SPLIT(split_equal_to_row_size));
1190 					break;
1191 				case 5:
1192 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1193 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1194 					break;
1195 				case 6:
1196 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1197 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1198 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1199 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1200 					break;
1201 				case 7:
1202 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1203 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1204 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1205 							 TILE_SPLIT(split_equal_to_row_size));
1206 					break;
1207 				case 8:
1208 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1209 							 PIPE_CONFIG(ADDR_SURF_P4_16x16));
1210 					break;
1211 				case 9:
1212 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1213 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1214 					break;
1215 				case 10:
1216 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1217 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1218 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1219 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1220 					break;
1221 				case 11:
1222 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1223 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1224 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1225 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1226 					break;
1227 				case 12:
1228 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1229 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1230 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1231 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1232 					break;
1233 				case 13:
1234 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1235 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1236 					break;
1237 				case 14:
1238 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1239 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1240 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1241 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1242 					break;
1243 				case 16:
1244 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1245 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1246 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1247 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1248 					break;
1249 				case 17:
1250 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1251 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1252 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1253 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1254 					break;
1255 				case 27:
1256 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1257 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1258 					break;
1259 				case 28:
1260 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1261 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1262 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1263 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1264 					break;
1265 				case 29:
1266 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1267 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1268 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1269 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1270 					break;
1271 				case 30:
1272 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1273 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1274 							 PIPE_CONFIG(ADDR_SURF_P4_16x16) |
1275 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1276 					break;
1277 				default:
1278 					gb_tile_moden = 0;
1279 					break;
1280 				}
1281 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1282 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1283 			}
1284 		} else if (num_rbs < 4) {
1285 			for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1286 				switch (reg_offset) {
1287 				case 0:
1288 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1289 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1290 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1291 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1292 					break;
1293 				case 1:
1294 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1295 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1296 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1297 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1298 					break;
1299 				case 2:
1300 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1301 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1302 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1303 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1304 					break;
1305 				case 3:
1306 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1307 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1308 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1309 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1310 					break;
1311 				case 4:
1312 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1313 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1314 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1315 							 TILE_SPLIT(split_equal_to_row_size));
1316 					break;
1317 				case 5:
1318 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1319 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1320 					break;
1321 				case 6:
1322 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1323 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1324 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1325 							 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1326 					break;
1327 				case 7:
1328 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1329 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1330 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1331 							 TILE_SPLIT(split_equal_to_row_size));
1332 					break;
1333 				case 8:
1334 					gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
1335 						 PIPE_CONFIG(ADDR_SURF_P4_8x16));
1336 					break;
1337 				case 9:
1338 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1339 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1340 					break;
1341 				case 10:
1342 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1343 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1344 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1345 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1346 					break;
1347 				case 11:
1348 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1349 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1350 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1351 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1352 					break;
1353 				case 12:
1354 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1355 							 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1356 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1357 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1358 					break;
1359 				case 13:
1360 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1361 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1362 					break;
1363 				case 14:
1364 					gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1365 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1366 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1367 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1368 					break;
1369 				case 16:
1370 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1371 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1372 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1373 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1374 					break;
1375 				case 17:
1376 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1377 							 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1378 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1379 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1380 					break;
1381 				case 27:
1382 					gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1383 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1384 					break;
1385 				case 28:
1386 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1387 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1388 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1389 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1390 					break;
1391 				case 29:
1392 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1393 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1394 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1395 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1396 					break;
1397 				case 30:
1398 					gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1399 							 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1400 							 PIPE_CONFIG(ADDR_SURF_P4_8x16) |
1401 							 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1402 					break;
1403 				default:
1404 					gb_tile_moden = 0;
1405 					break;
1406 				}
1407 				rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1408 				WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1409 			}
1410 		}
1411 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1412 			switch (reg_offset) {
1413 			case 0:
1414 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1415 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1416 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1417 						 NUM_BANKS(ADDR_SURF_16_BANK));
1418 				break;
1419 			case 1:
1420 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1421 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1422 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1423 						 NUM_BANKS(ADDR_SURF_16_BANK));
1424 				break;
1425 			case 2:
1426 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1427 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1428 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1429 						 NUM_BANKS(ADDR_SURF_16_BANK));
1430 				break;
1431 			case 3:
1432 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1433 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1434 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1435 						 NUM_BANKS(ADDR_SURF_16_BANK));
1436 				break;
1437 			case 4:
1438 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1439 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1440 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1441 						 NUM_BANKS(ADDR_SURF_16_BANK));
1442 				break;
1443 			case 5:
1444 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1445 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1446 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1447 						 NUM_BANKS(ADDR_SURF_8_BANK));
1448 				break;
1449 			case 6:
1450 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1451 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1452 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1453 						 NUM_BANKS(ADDR_SURF_4_BANK));
1454 				break;
1455 			case 8:
1456 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1457 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1458 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1459 						 NUM_BANKS(ADDR_SURF_16_BANK));
1460 				break;
1461 			case 9:
1462 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1463 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1464 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1465 						 NUM_BANKS(ADDR_SURF_16_BANK));
1466 				break;
1467 			case 10:
1468 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1469 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1470 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1471 						 NUM_BANKS(ADDR_SURF_16_BANK));
1472 				break;
1473 			case 11:
1474 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1475 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1476 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1477 						 NUM_BANKS(ADDR_SURF_16_BANK));
1478 				break;
1479 			case 12:
1480 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1481 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1482 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1483 						 NUM_BANKS(ADDR_SURF_16_BANK));
1484 				break;
1485 			case 13:
1486 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1487 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1488 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1489 						 NUM_BANKS(ADDR_SURF_8_BANK));
1490 				break;
1491 			case 14:
1492 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1493 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1494 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
1495 						 NUM_BANKS(ADDR_SURF_4_BANK));
1496 				break;
1497 			default:
1498 				gb_tile_moden = 0;
1499 				break;
1500 			}
1501 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1502 		}
1503 	} else if (num_pipe_configs == 2) {
1504 		for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
1505 			switch (reg_offset) {
1506 			case 0:
1507 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1508 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1509 						 PIPE_CONFIG(ADDR_SURF_P2) |
1510 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B));
1511 				break;
1512 			case 1:
1513 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1514 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1515 						 PIPE_CONFIG(ADDR_SURF_P2) |
1516 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B));
1517 				break;
1518 			case 2:
1519 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1520 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1521 						 PIPE_CONFIG(ADDR_SURF_P2) |
1522 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1523 				break;
1524 			case 3:
1525 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1526 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1527 						 PIPE_CONFIG(ADDR_SURF_P2) |
1528 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B));
1529 				break;
1530 			case 4:
1531 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1532 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1533 						 PIPE_CONFIG(ADDR_SURF_P2) |
1534 						 TILE_SPLIT(split_equal_to_row_size));
1535 				break;
1536 			case 5:
1537 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1538 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
1539 				break;
1540 			case 6:
1541 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1542 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1543 						 PIPE_CONFIG(ADDR_SURF_P2) |
1544 						 TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B));
1545 				break;
1546 			case 7:
1547 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1548 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
1549 						 PIPE_CONFIG(ADDR_SURF_P2) |
1550 						 TILE_SPLIT(split_equal_to_row_size));
1551 				break;
1552 			case 8:
1553 				gb_tile_moden = ARRAY_MODE(ARRAY_LINEAR_ALIGNED);
1554 				break;
1555 			case 9:
1556 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1557 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
1558 				break;
1559 			case 10:
1560 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1561 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1562 						 PIPE_CONFIG(ADDR_SURF_P2) |
1563 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1564 				break;
1565 			case 11:
1566 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1567 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1568 						 PIPE_CONFIG(ADDR_SURF_P2) |
1569 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1570 				break;
1571 			case 12:
1572 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1573 						 MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
1574 						 PIPE_CONFIG(ADDR_SURF_P2) |
1575 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1576 				break;
1577 			case 13:
1578 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1579 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
1580 				break;
1581 			case 14:
1582 				gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
1583 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1584 						 PIPE_CONFIG(ADDR_SURF_P2) |
1585 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1586 				break;
1587 			case 16:
1588 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1589 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1590 						 PIPE_CONFIG(ADDR_SURF_P2) |
1591 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1592 				break;
1593 			case 17:
1594 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1595 						 MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
1596 						 PIPE_CONFIG(ADDR_SURF_P2) |
1597 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1598 				break;
1599 			case 27:
1600 				gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
1601 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
1602 				break;
1603 			case 28:
1604 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1605 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1606 						 PIPE_CONFIG(ADDR_SURF_P2) |
1607 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1608 				break;
1609 			case 29:
1610 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
1611 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1612 						 PIPE_CONFIG(ADDR_SURF_P2) |
1613 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1614 				break;
1615 			case 30:
1616 				gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
1617 						 MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
1618 						 PIPE_CONFIG(ADDR_SURF_P2) |
1619 						 SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
1620 				break;
1621 			default:
1622 				gb_tile_moden = 0;
1623 				break;
1624 			}
1625 			rdev->config.cik.tile_mode_array[reg_offset] = gb_tile_moden;
1626 			WREG32(GB_TILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1627 		}
1628 		for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
1629 			switch (reg_offset) {
1630 			case 0:
1631 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1632 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1633 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1634 						 NUM_BANKS(ADDR_SURF_16_BANK));
1635 				break;
1636 			case 1:
1637 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1638 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1639 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1640 						 NUM_BANKS(ADDR_SURF_16_BANK));
1641 				break;
1642 			case 2:
1643 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1644 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1645 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1646 						 NUM_BANKS(ADDR_SURF_16_BANK));
1647 				break;
1648 			case 3:
1649 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1650 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1651 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1652 						 NUM_BANKS(ADDR_SURF_16_BANK));
1653 				break;
1654 			case 4:
1655 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1656 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1657 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1658 						 NUM_BANKS(ADDR_SURF_16_BANK));
1659 				break;
1660 			case 5:
1661 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1662 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1663 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1664 						 NUM_BANKS(ADDR_SURF_16_BANK));
1665 				break;
1666 			case 6:
1667 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1668 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1669 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1670 						 NUM_BANKS(ADDR_SURF_8_BANK));
1671 				break;
1672 			case 8:
1673 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1674 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
1675 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1676 						 NUM_BANKS(ADDR_SURF_16_BANK));
1677 				break;
1678 			case 9:
1679 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
1680 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1681 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1682 						 NUM_BANKS(ADDR_SURF_16_BANK));
1683 				break;
1684 			case 10:
1685 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1686 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
1687 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1688 						 NUM_BANKS(ADDR_SURF_16_BANK));
1689 				break;
1690 			case 11:
1691 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
1692 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1693 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1694 						 NUM_BANKS(ADDR_SURF_16_BANK));
1695 				break;
1696 			case 12:
1697 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1698 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
1699 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1700 						 NUM_BANKS(ADDR_SURF_16_BANK));
1701 				break;
1702 			case 13:
1703 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1704 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1705 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
1706 						 NUM_BANKS(ADDR_SURF_16_BANK));
1707 				break;
1708 			case 14:
1709 				gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
1710 						 BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
1711 						 MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
1712 						 NUM_BANKS(ADDR_SURF_8_BANK));
1713 				break;
1714 			default:
1715 				gb_tile_moden = 0;
1716 				break;
1717 			}
1718 			WREG32(GB_MACROTILE_MODE0 + (reg_offset * 4), gb_tile_moden);
1719 		}
1720 	} else
1721 		DRM_ERROR("unknown num pipe config: 0x%x\n", num_pipe_configs);
1722 }
1723 
1724 /**
1725  * cik_select_se_sh - select which SE, SH to address
1726  *
1727  * @rdev: radeon_device pointer
1728  * @se_num: shader engine to address
1729  * @sh_num: sh block to address
1730  *
1731  * Select which SE, SH combinations to address. Certain
1732  * registers are instanced per SE or SH.  0xffffffff means
1733  * broadcast to all SEs or SHs (CIK).
1734  */
1735 static void cik_select_se_sh(struct radeon_device *rdev,
1736 			     u32 se_num, u32 sh_num)
1737 {
1738 	u32 data = INSTANCE_BROADCAST_WRITES;
1739 
1740 	if ((se_num == 0xffffffff) && (sh_num == 0xffffffff))
1741 		data |= SH_BROADCAST_WRITES | SE_BROADCAST_WRITES;
1742 	else if (se_num == 0xffffffff)
1743 		data |= SE_BROADCAST_WRITES | SH_INDEX(sh_num);
1744 	else if (sh_num == 0xffffffff)
1745 		data |= SH_BROADCAST_WRITES | SE_INDEX(se_num);
1746 	else
1747 		data |= SH_INDEX(sh_num) | SE_INDEX(se_num);
1748 	WREG32(GRBM_GFX_INDEX, data);
1749 }
1750 
1751 /**
1752  * cik_create_bitmask - create a bitmask
1753  *
1754  * @bit_width: length of the mask
1755  *
1756  * create a variable length bit mask (CIK).
1757  * Returns the bitmask.
1758  */
1759 static u32 cik_create_bitmask(u32 bit_width)
1760 {
1761 	u32 i, mask = 0;
1762 
1763 	for (i = 0; i < bit_width; i++) {
1764 		mask <<= 1;
1765 		mask |= 1;
1766 	}
1767 	return mask;
1768 }
1769 
1770 /**
1771  * cik_select_se_sh - select which SE, SH to address
1772  *
1773  * @rdev: radeon_device pointer
1774  * @max_rb_num: max RBs (render backends) for the asic
1775  * @se_num: number of SEs (shader engines) for the asic
1776  * @sh_per_se: number of SH blocks per SE for the asic
1777  *
1778  * Calculates the bitmask of disabled RBs (CIK).
1779  * Returns the disabled RB bitmask.
1780  */
1781 static u32 cik_get_rb_disabled(struct radeon_device *rdev,
1782 			      u32 max_rb_num, u32 se_num,
1783 			      u32 sh_per_se)
1784 {
1785 	u32 data, mask;
1786 
1787 	data = RREG32(CC_RB_BACKEND_DISABLE);
1788 	if (data & 1)
1789 		data &= BACKEND_DISABLE_MASK;
1790 	else
1791 		data = 0;
1792 	data |= RREG32(GC_USER_RB_BACKEND_DISABLE);
1793 
1794 	data >>= BACKEND_DISABLE_SHIFT;
1795 
1796 	mask = cik_create_bitmask(max_rb_num / se_num / sh_per_se);
1797 
1798 	return data & mask;
1799 }
1800 
1801 /**
1802  * cik_setup_rb - setup the RBs on the asic
1803  *
1804  * @rdev: radeon_device pointer
1805  * @se_num: number of SEs (shader engines) for the asic
1806  * @sh_per_se: number of SH blocks per SE for the asic
1807  * @max_rb_num: max RBs (render backends) for the asic
1808  *
1809  * Configures per-SE/SH RB registers (CIK).
1810  */
1811 static void cik_setup_rb(struct radeon_device *rdev,
1812 			 u32 se_num, u32 sh_per_se,
1813 			 u32 max_rb_num)
1814 {
1815 	int i, j;
1816 	u32 data, mask;
1817 	u32 disabled_rbs = 0;
1818 	u32 enabled_rbs = 0;
1819 
1820 	for (i = 0; i < se_num; i++) {
1821 		for (j = 0; j < sh_per_se; j++) {
1822 			cik_select_se_sh(rdev, i, j);
1823 			data = cik_get_rb_disabled(rdev, max_rb_num, se_num, sh_per_se);
1824 			disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
1825 		}
1826 	}
1827 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1828 
1829 	mask = 1;
1830 	for (i = 0; i < max_rb_num; i++) {
1831 		if (!(disabled_rbs & mask))
1832 			enabled_rbs |= mask;
1833 		mask <<= 1;
1834 	}
1835 
1836 	for (i = 0; i < se_num; i++) {
1837 		cik_select_se_sh(rdev, i, 0xffffffff);
1838 		data = 0;
1839 		for (j = 0; j < sh_per_se; j++) {
1840 			switch (enabled_rbs & 3) {
1841 			case 1:
1842 				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
1843 				break;
1844 			case 2:
1845 				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
1846 				break;
1847 			case 3:
1848 			default:
1849 				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
1850 				break;
1851 			}
1852 			enabled_rbs >>= 2;
1853 		}
1854 		WREG32(PA_SC_RASTER_CONFIG, data);
1855 	}
1856 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
1857 }
1858 
1859 /**
1860  * cik_gpu_init - setup the 3D engine
1861  *
1862  * @rdev: radeon_device pointer
1863  *
1864  * Configures the 3D engine and tiling configuration
1865  * registers so that the 3D engine is usable.
1866  */
1867 static void cik_gpu_init(struct radeon_device *rdev)
1868 {
1869 	u32 gb_addr_config = RREG32(GB_ADDR_CONFIG);
1870 	u32 mc_shared_chmap, mc_arb_ramcfg;
1871 	u32 hdp_host_path_cntl;
1872 	u32 tmp;
1873 	int i, j;
1874 
1875 	switch (rdev->family) {
1876 	case CHIP_BONAIRE:
1877 		rdev->config.cik.max_shader_engines = 2;
1878 		rdev->config.cik.max_tile_pipes = 4;
1879 		rdev->config.cik.max_cu_per_sh = 7;
1880 		rdev->config.cik.max_sh_per_se = 1;
1881 		rdev->config.cik.max_backends_per_se = 2;
1882 		rdev->config.cik.max_texture_channel_caches = 4;
1883 		rdev->config.cik.max_gprs = 256;
1884 		rdev->config.cik.max_gs_threads = 32;
1885 		rdev->config.cik.max_hw_contexts = 8;
1886 
1887 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1888 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1889 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1890 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1891 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1892 		break;
1893 	case CHIP_KAVERI:
1894 		/* TODO */
1895 		break;
1896 	case CHIP_KABINI:
1897 	default:
1898 		rdev->config.cik.max_shader_engines = 1;
1899 		rdev->config.cik.max_tile_pipes = 2;
1900 		rdev->config.cik.max_cu_per_sh = 2;
1901 		rdev->config.cik.max_sh_per_se = 1;
1902 		rdev->config.cik.max_backends_per_se = 1;
1903 		rdev->config.cik.max_texture_channel_caches = 2;
1904 		rdev->config.cik.max_gprs = 256;
1905 		rdev->config.cik.max_gs_threads = 16;
1906 		rdev->config.cik.max_hw_contexts = 8;
1907 
1908 		rdev->config.cik.sc_prim_fifo_size_frontend = 0x20;
1909 		rdev->config.cik.sc_prim_fifo_size_backend = 0x100;
1910 		rdev->config.cik.sc_hiz_tile_fifo_size = 0x30;
1911 		rdev->config.cik.sc_earlyz_tile_fifo_size = 0x130;
1912 		gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
1913 		break;
1914 	}
1915 
1916 	/* Initialize HDP */
1917 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
1918 		WREG32((0x2c14 + j), 0x00000000);
1919 		WREG32((0x2c18 + j), 0x00000000);
1920 		WREG32((0x2c1c + j), 0x00000000);
1921 		WREG32((0x2c20 + j), 0x00000000);
1922 		WREG32((0x2c24 + j), 0x00000000);
1923 	}
1924 
1925 	WREG32(GRBM_CNTL, GRBM_READ_TIMEOUT(0xff));
1926 
1927 	WREG32(BIF_FB_EN, FB_READ_EN | FB_WRITE_EN);
1928 
1929 	mc_shared_chmap = RREG32(MC_SHARED_CHMAP);
1930 	mc_arb_ramcfg = RREG32(MC_ARB_RAMCFG);
1931 
1932 	rdev->config.cik.num_tile_pipes = rdev->config.cik.max_tile_pipes;
1933 	rdev->config.cik.mem_max_burst_length_bytes = 256;
1934 	tmp = (mc_arb_ramcfg & NOOFCOLS_MASK) >> NOOFCOLS_SHIFT;
1935 	rdev->config.cik.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
1936 	if (rdev->config.cik.mem_row_size_in_kb > 4)
1937 		rdev->config.cik.mem_row_size_in_kb = 4;
1938 	/* XXX use MC settings? */
1939 	rdev->config.cik.shader_engine_tile_size = 32;
1940 	rdev->config.cik.num_gpus = 1;
1941 	rdev->config.cik.multi_gpu_tile_size = 64;
1942 
1943 	/* fix up row size */
1944 	gb_addr_config &= ~ROW_SIZE_MASK;
1945 	switch (rdev->config.cik.mem_row_size_in_kb) {
1946 	case 1:
1947 	default:
1948 		gb_addr_config |= ROW_SIZE(0);
1949 		break;
1950 	case 2:
1951 		gb_addr_config |= ROW_SIZE(1);
1952 		break;
1953 	case 4:
1954 		gb_addr_config |= ROW_SIZE(2);
1955 		break;
1956 	}
1957 
1958 	/* setup tiling info dword.  gb_addr_config is not adequate since it does
1959 	 * not have bank info, so create a custom tiling dword.
1960 	 * bits 3:0   num_pipes
1961 	 * bits 7:4   num_banks
1962 	 * bits 11:8  group_size
1963 	 * bits 15:12 row_size
1964 	 */
1965 	rdev->config.cik.tile_config = 0;
1966 	switch (rdev->config.cik.num_tile_pipes) {
1967 	case 1:
1968 		rdev->config.cik.tile_config |= (0 << 0);
1969 		break;
1970 	case 2:
1971 		rdev->config.cik.tile_config |= (1 << 0);
1972 		break;
1973 	case 4:
1974 		rdev->config.cik.tile_config |= (2 << 0);
1975 		break;
1976 	case 8:
1977 	default:
1978 		/* XXX what about 12? */
1979 		rdev->config.cik.tile_config |= (3 << 0);
1980 		break;
1981 	}
1982 	if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
1983 		rdev->config.cik.tile_config |= 1 << 4;
1984 	else
1985 		rdev->config.cik.tile_config |= 0 << 4;
1986 	rdev->config.cik.tile_config |=
1987 		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
1988 	rdev->config.cik.tile_config |=
1989 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
1990 
1991 	WREG32(GB_ADDR_CONFIG, gb_addr_config);
1992 	WREG32(HDP_ADDR_CONFIG, gb_addr_config);
1993 	WREG32(DMIF_ADDR_CALC, gb_addr_config);
1994 	WREG32(SDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
1995 	WREG32(SDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
1996 	WREG32(UVD_UDEC_ADDR_CONFIG, gb_addr_config);
1997 	WREG32(UVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
1998 	WREG32(UVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
1999 
2000 	cik_tiling_mode_table_init(rdev);
2001 
2002 	cik_setup_rb(rdev, rdev->config.cik.max_shader_engines,
2003 		     rdev->config.cik.max_sh_per_se,
2004 		     rdev->config.cik.max_backends_per_se);
2005 
2006 	/* set HW defaults for 3D engine */
2007 	WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60));
2008 
2009 	WREG32(SX_DEBUG_1, 0x20);
2010 
2011 	WREG32(TA_CNTL_AUX, 0x00010000);
2012 
2013 	tmp = RREG32(SPI_CONFIG_CNTL);
2014 	tmp |= 0x03000000;
2015 	WREG32(SPI_CONFIG_CNTL, tmp);
2016 
2017 	WREG32(SQ_CONFIG, 1);
2018 
2019 	WREG32(DB_DEBUG, 0);
2020 
2021 	tmp = RREG32(DB_DEBUG2) & ~0xf00fffff;
2022 	tmp |= 0x00000400;
2023 	WREG32(DB_DEBUG2, tmp);
2024 
2025 	tmp = RREG32(DB_DEBUG3) & ~0x0002021c;
2026 	tmp |= 0x00020200;
2027 	WREG32(DB_DEBUG3, tmp);
2028 
2029 	tmp = RREG32(CB_HW_CONTROL) & ~0x00010000;
2030 	tmp |= 0x00018208;
2031 	WREG32(CB_HW_CONTROL, tmp);
2032 
2033 	WREG32(SPI_CONFIG_CNTL_1, VTX_DONE_DELAY(4));
2034 
2035 	WREG32(PA_SC_FIFO_SIZE, (SC_FRONTEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_frontend) |
2036 				 SC_BACKEND_PRIM_FIFO_SIZE(rdev->config.cik.sc_prim_fifo_size_backend) |
2037 				 SC_HIZ_TILE_FIFO_SIZE(rdev->config.cik.sc_hiz_tile_fifo_size) |
2038 				 SC_EARLYZ_TILE_FIFO_SIZE(rdev->config.cik.sc_earlyz_tile_fifo_size)));
2039 
2040 	WREG32(VGT_NUM_INSTANCES, 1);
2041 
2042 	WREG32(CP_PERFMON_CNTL, 0);
2043 
2044 	WREG32(SQ_CONFIG, 0);
2045 
2046 	WREG32(PA_SC_FORCE_EOV_MAX_CNTS, (FORCE_EOV_MAX_CLK_CNT(4095) |
2047 					  FORCE_EOV_MAX_REZ_CNT(255)));
2048 
2049 	WREG32(VGT_CACHE_INVALIDATION, CACHE_INVALIDATION(VC_AND_TC) |
2050 	       AUTO_INVLD_EN(ES_AND_GS_AUTO));
2051 
2052 	WREG32(VGT_GS_VERTEX_REUSE, 16);
2053 	WREG32(PA_SC_LINE_STIPPLE_STATE, 0);
2054 
2055 	tmp = RREG32(HDP_MISC_CNTL);
2056 	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
2057 	WREG32(HDP_MISC_CNTL, tmp);
2058 
2059 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
2060 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
2061 
2062 	WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3));
2063 	WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER);
2064 
2065 	udelay(50);
2066 }
2067 
2068 /*
2069  * GPU scratch registers helpers function.
2070  */
2071 /**
2072  * cik_scratch_init - setup driver info for CP scratch regs
2073  *
2074  * @rdev: radeon_device pointer
2075  *
2076  * Set up the number and offset of the CP scratch registers.
2077  * NOTE: use of CP scratch registers is a legacy inferface and
2078  * is not used by default on newer asics (r6xx+).  On newer asics,
2079  * memory buffers are used for fences rather than scratch regs.
2080  */
2081 static void cik_scratch_init(struct radeon_device *rdev)
2082 {
2083 	int i;
2084 
2085 	rdev->scratch.num_reg = 7;
2086 	rdev->scratch.reg_base = SCRATCH_REG0;
2087 	for (i = 0; i < rdev->scratch.num_reg; i++) {
2088 		rdev->scratch.free[i] = true;
2089 		rdev->scratch.reg[i] = rdev->scratch.reg_base + (i * 4);
2090 	}
2091 }
2092 
2093 /**
2094  * cik_ring_test - basic gfx ring test
2095  *
2096  * @rdev: radeon_device pointer
2097  * @ring: radeon_ring structure holding ring information
2098  *
2099  * Allocate a scratch register and write to it using the gfx ring (CIK).
2100  * Provides a basic gfx ring test to verify that the ring is working.
2101  * Used by cik_cp_gfx_resume();
2102  * Returns 0 on success, error on failure.
2103  */
2104 int cik_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
2105 {
2106 	uint32_t scratch;
2107 	uint32_t tmp = 0;
2108 	unsigned i;
2109 	int r;
2110 
2111 	r = radeon_scratch_get(rdev, &scratch);
2112 	if (r) {
2113 		DRM_ERROR("radeon: cp failed to get scratch reg (%d).\n", r);
2114 		return r;
2115 	}
2116 	WREG32(scratch, 0xCAFEDEAD);
2117 	r = radeon_ring_lock(rdev, ring, 3);
2118 	if (r) {
2119 		DRM_ERROR("radeon: cp failed to lock ring %d (%d).\n", ring->idx, r);
2120 		radeon_scratch_free(rdev, scratch);
2121 		return r;
2122 	}
2123 	radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2124 	radeon_ring_write(ring, ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2));
2125 	radeon_ring_write(ring, 0xDEADBEEF);
2126 	radeon_ring_unlock_commit(rdev, ring);
2127 
2128 	for (i = 0; i < rdev->usec_timeout; i++) {
2129 		tmp = RREG32(scratch);
2130 		if (tmp == 0xDEADBEEF)
2131 			break;
2132 		DRM_UDELAY(1);
2133 	}
2134 	if (i < rdev->usec_timeout) {
2135 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
2136 	} else {
2137 		DRM_ERROR("radeon: ring %d test failed (scratch(0x%04X)=0x%08X)\n",
2138 			  ring->idx, scratch, tmp);
2139 		r = -EINVAL;
2140 	}
2141 	radeon_scratch_free(rdev, scratch);
2142 	return r;
2143 }
2144 
2145 /**
2146  * cik_fence_gfx_ring_emit - emit a fence on the gfx ring
2147  *
2148  * @rdev: radeon_device pointer
2149  * @fence: radeon fence object
2150  *
2151  * Emits a fence sequnce number on the gfx ring and flushes
2152  * GPU caches.
2153  */
2154 void cik_fence_gfx_ring_emit(struct radeon_device *rdev,
2155 			     struct radeon_fence *fence)
2156 {
2157 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2158 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2159 
2160 	/* EVENT_WRITE_EOP - flush caches, send int */
2161 	radeon_ring_write(ring, PACKET3(PACKET3_EVENT_WRITE_EOP, 4));
2162 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2163 				 EOP_TC_ACTION_EN |
2164 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2165 				 EVENT_INDEX(5)));
2166 	radeon_ring_write(ring, addr & 0xfffffffc);
2167 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | DATA_SEL(1) | INT_SEL(2));
2168 	radeon_ring_write(ring, fence->seq);
2169 	radeon_ring_write(ring, 0);
2170 	/* HDP flush */
2171 	/* We should be using the new WAIT_REG_MEM special op packet here
2172 	 * but it causes the CP to hang
2173 	 */
2174 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2175 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2176 				 WRITE_DATA_DST_SEL(0)));
2177 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2178 	radeon_ring_write(ring, 0);
2179 	radeon_ring_write(ring, 0);
2180 }
2181 
2182 /**
2183  * cik_fence_compute_ring_emit - emit a fence on the compute ring
2184  *
2185  * @rdev: radeon_device pointer
2186  * @fence: radeon fence object
2187  *
2188  * Emits a fence sequnce number on the compute ring and flushes
2189  * GPU caches.
2190  */
2191 void cik_fence_compute_ring_emit(struct radeon_device *rdev,
2192 				 struct radeon_fence *fence)
2193 {
2194 	struct radeon_ring *ring = &rdev->ring[fence->ring];
2195 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
2196 
2197 	/* RELEASE_MEM - flush caches, send int */
2198 	radeon_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 5));
2199 	radeon_ring_write(ring, (EOP_TCL1_ACTION_EN |
2200 				 EOP_TC_ACTION_EN |
2201 				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
2202 				 EVENT_INDEX(5)));
2203 	radeon_ring_write(ring, DATA_SEL(1) | INT_SEL(2));
2204 	radeon_ring_write(ring, addr & 0xfffffffc);
2205 	radeon_ring_write(ring, upper_32_bits(addr));
2206 	radeon_ring_write(ring, fence->seq);
2207 	radeon_ring_write(ring, 0);
2208 	/* HDP flush */
2209 	/* We should be using the new WAIT_REG_MEM special op packet here
2210 	 * but it causes the CP to hang
2211 	 */
2212 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2213 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
2214 				 WRITE_DATA_DST_SEL(0)));
2215 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
2216 	radeon_ring_write(ring, 0);
2217 	radeon_ring_write(ring, 0);
2218 }
2219 
2220 void cik_semaphore_ring_emit(struct radeon_device *rdev,
2221 			     struct radeon_ring *ring,
2222 			     struct radeon_semaphore *semaphore,
2223 			     bool emit_wait)
2224 {
2225 	uint64_t addr = semaphore->gpu_addr;
2226 	unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
2227 
2228 	radeon_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
2229 	radeon_ring_write(ring, addr & 0xffffffff);
2230 	radeon_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
2231 }
2232 
2233 /*
2234  * IB stuff
2235  */
2236 /**
2237  * cik_ring_ib_execute - emit an IB (Indirect Buffer) on the gfx ring
2238  *
2239  * @rdev: radeon_device pointer
2240  * @ib: radeon indirect buffer object
2241  *
2242  * Emits an DE (drawing engine) or CE (constant engine) IB
2243  * on the gfx ring.  IBs are usually generated by userspace
2244  * acceleration drivers and submitted to the kernel for
2245  * sheduling on the ring.  This function schedules the IB
2246  * on the gfx ring for execution by the GPU.
2247  */
2248 void cik_ring_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
2249 {
2250 	struct radeon_ring *ring = &rdev->ring[ib->ring];
2251 	u32 header, control = INDIRECT_BUFFER_VALID;
2252 
2253 	if (ib->is_const_ib) {
2254 		/* set switch buffer packet before const IB */
2255 		radeon_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
2256 		radeon_ring_write(ring, 0);
2257 
2258 		header = PACKET3(PACKET3_INDIRECT_BUFFER_CONST, 2);
2259 	} else {
2260 		u32 next_rptr;
2261 		if (ring->rptr_save_reg) {
2262 			next_rptr = ring->wptr + 3 + 4;
2263 			radeon_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
2264 			radeon_ring_write(ring, ((ring->rptr_save_reg -
2265 						  PACKET3_SET_UCONFIG_REG_START) >> 2));
2266 			radeon_ring_write(ring, next_rptr);
2267 		} else if (rdev->wb.enabled) {
2268 			next_rptr = ring->wptr + 5 + 4;
2269 			radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
2270 			radeon_ring_write(ring, WRITE_DATA_DST_SEL(1));
2271 			radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
2272 			radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
2273 			radeon_ring_write(ring, next_rptr);
2274 		}
2275 
2276 		header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
2277 	}
2278 
2279 	control |= ib->length_dw |
2280 		(ib->vm ? (ib->vm->id << 24) : 0);
2281 
2282 	radeon_ring_write(ring, header);
2283 	radeon_ring_write(ring,
2284 #ifdef __BIG_ENDIAN
2285 			  (2 << 0) |
2286 #endif
2287 			  (ib->gpu_addr & 0xFFFFFFFC));
2288 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xFFFF);
2289 	radeon_ring_write(ring, control);
2290 }
2291 
2292 /**
2293  * cik_ib_test - basic gfx ring IB test
2294  *
2295  * @rdev: radeon_device pointer
2296  * @ring: radeon_ring structure holding ring information
2297  *
2298  * Allocate an IB and execute it on the gfx ring (CIK).
2299  * Provides a basic gfx ring test to verify that IBs are working.
2300  * Returns 0 on success, error on failure.
2301  */
2302 int cik_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
2303 {
2304 	struct radeon_ib ib;
2305 	uint32_t scratch;
2306 	uint32_t tmp = 0;
2307 	unsigned i;
2308 	int r;
2309 
2310 	r = radeon_scratch_get(rdev, &scratch);
2311 	if (r) {
2312 		DRM_ERROR("radeon: failed to get scratch reg (%d).\n", r);
2313 		return r;
2314 	}
2315 	WREG32(scratch, 0xCAFEDEAD);
2316 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
2317 	if (r) {
2318 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
2319 		return r;
2320 	}
2321 	ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1);
2322 	ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START) >> 2);
2323 	ib.ptr[2] = 0xDEADBEEF;
2324 	ib.length_dw = 3;
2325 	r = radeon_ib_schedule(rdev, &ib, NULL);
2326 	if (r) {
2327 		radeon_scratch_free(rdev, scratch);
2328 		radeon_ib_free(rdev, &ib);
2329 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
2330 		return r;
2331 	}
2332 	r = radeon_fence_wait(ib.fence, false);
2333 	if (r) {
2334 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
2335 		return r;
2336 	}
2337 	for (i = 0; i < rdev->usec_timeout; i++) {
2338 		tmp = RREG32(scratch);
2339 		if (tmp == 0xDEADBEEF)
2340 			break;
2341 		DRM_UDELAY(1);
2342 	}
2343 	if (i < rdev->usec_timeout) {
2344 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
2345 	} else {
2346 		DRM_ERROR("radeon: ib test failed (scratch(0x%04X)=0x%08X)\n",
2347 			  scratch, tmp);
2348 		r = -EINVAL;
2349 	}
2350 	radeon_scratch_free(rdev, scratch);
2351 	radeon_ib_free(rdev, &ib);
2352 	return r;
2353 }
2354 
2355 /*
2356  * CP.
2357  * On CIK, gfx and compute now have independant command processors.
2358  *
2359  * GFX
2360  * Gfx consists of a single ring and can process both gfx jobs and
2361  * compute jobs.  The gfx CP consists of three microengines (ME):
2362  * PFP - Pre-Fetch Parser
2363  * ME - Micro Engine
2364  * CE - Constant Engine
2365  * The PFP and ME make up what is considered the Drawing Engine (DE).
2366  * The CE is an asynchronous engine used for updating buffer desciptors
2367  * used by the DE so that they can be loaded into cache in parallel
2368  * while the DE is processing state update packets.
2369  *
2370  * Compute
2371  * The compute CP consists of two microengines (ME):
2372  * MEC1 - Compute MicroEngine 1
2373  * MEC2 - Compute MicroEngine 2
2374  * Each MEC supports 4 compute pipes and each pipe supports 8 queues.
2375  * The queues are exposed to userspace and are programmed directly
2376  * by the compute runtime.
2377  */
2378 /**
2379  * cik_cp_gfx_enable - enable/disable the gfx CP MEs
2380  *
2381  * @rdev: radeon_device pointer
2382  * @enable: enable or disable the MEs
2383  *
2384  * Halts or unhalts the gfx MEs.
2385  */
2386 static void cik_cp_gfx_enable(struct radeon_device *rdev, bool enable)
2387 {
2388 	if (enable)
2389 		WREG32(CP_ME_CNTL, 0);
2390 	else {
2391 		WREG32(CP_ME_CNTL, (CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT));
2392 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2393 	}
2394 	udelay(50);
2395 }
2396 
2397 /**
2398  * cik_cp_gfx_load_microcode - load the gfx CP ME ucode
2399  *
2400  * @rdev: radeon_device pointer
2401  *
2402  * Loads the gfx PFP, ME, and CE ucode.
2403  * Returns 0 for success, -EINVAL if the ucode is not available.
2404  */
2405 static int cik_cp_gfx_load_microcode(struct radeon_device *rdev)
2406 {
2407 	const __be32 *fw_data;
2408 	int i;
2409 
2410 	if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw)
2411 		return -EINVAL;
2412 
2413 	cik_cp_gfx_enable(rdev, false);
2414 
2415 	/* PFP */
2416 	fw_data = (const __be32 *)rdev->pfp_fw->data;
2417 	WREG32(CP_PFP_UCODE_ADDR, 0);
2418 	for (i = 0; i < CIK_PFP_UCODE_SIZE; i++)
2419 		WREG32(CP_PFP_UCODE_DATA, be32_to_cpup(fw_data++));
2420 	WREG32(CP_PFP_UCODE_ADDR, 0);
2421 
2422 	/* CE */
2423 	fw_data = (const __be32 *)rdev->ce_fw->data;
2424 	WREG32(CP_CE_UCODE_ADDR, 0);
2425 	for (i = 0; i < CIK_CE_UCODE_SIZE; i++)
2426 		WREG32(CP_CE_UCODE_DATA, be32_to_cpup(fw_data++));
2427 	WREG32(CP_CE_UCODE_ADDR, 0);
2428 
2429 	/* ME */
2430 	fw_data = (const __be32 *)rdev->me_fw->data;
2431 	WREG32(CP_ME_RAM_WADDR, 0);
2432 	for (i = 0; i < CIK_ME_UCODE_SIZE; i++)
2433 		WREG32(CP_ME_RAM_DATA, be32_to_cpup(fw_data++));
2434 	WREG32(CP_ME_RAM_WADDR, 0);
2435 
2436 	WREG32(CP_PFP_UCODE_ADDR, 0);
2437 	WREG32(CP_CE_UCODE_ADDR, 0);
2438 	WREG32(CP_ME_RAM_WADDR, 0);
2439 	WREG32(CP_ME_RAM_RADDR, 0);
2440 	return 0;
2441 }
2442 
2443 /**
2444  * cik_cp_gfx_start - start the gfx ring
2445  *
2446  * @rdev: radeon_device pointer
2447  *
2448  * Enables the ring and loads the clear state context and other
2449  * packets required to init the ring.
2450  * Returns 0 for success, error for failure.
2451  */
2452 static int cik_cp_gfx_start(struct radeon_device *rdev)
2453 {
2454 	struct radeon_ring *ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2455 	int r, i;
2456 
2457 	/* init the CP */
2458 	WREG32(CP_MAX_CONTEXT, rdev->config.cik.max_hw_contexts - 1);
2459 	WREG32(CP_ENDIAN_SWAP, 0);
2460 	WREG32(CP_DEVICE_ID, 1);
2461 
2462 	cik_cp_gfx_enable(rdev, true);
2463 
2464 	r = radeon_ring_lock(rdev, ring, cik_default_size + 17);
2465 	if (r) {
2466 		DRM_ERROR("radeon: cp failed to lock ring (%d).\n", r);
2467 		return r;
2468 	}
2469 
2470 	/* init the CE partitions.  CE only used for gfx on CIK */
2471 	radeon_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
2472 	radeon_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
2473 	radeon_ring_write(ring, 0xc000);
2474 	radeon_ring_write(ring, 0xc000);
2475 
2476 	/* setup clear context state */
2477 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2478 	radeon_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
2479 
2480 	radeon_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
2481 	radeon_ring_write(ring, 0x80000000);
2482 	radeon_ring_write(ring, 0x80000000);
2483 
2484 	for (i = 0; i < cik_default_size; i++)
2485 		radeon_ring_write(ring, cik_default_state[i]);
2486 
2487 	radeon_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
2488 	radeon_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
2489 
2490 	/* set clear context state */
2491 	radeon_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
2492 	radeon_ring_write(ring, 0);
2493 
2494 	radeon_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 2));
2495 	radeon_ring_write(ring, 0x00000316);
2496 	radeon_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
2497 	radeon_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
2498 
2499 	radeon_ring_unlock_commit(rdev, ring);
2500 
2501 	return 0;
2502 }
2503 
2504 /**
2505  * cik_cp_gfx_fini - stop the gfx ring
2506  *
2507  * @rdev: radeon_device pointer
2508  *
2509  * Stop the gfx ring and tear down the driver ring
2510  * info.
2511  */
2512 static void cik_cp_gfx_fini(struct radeon_device *rdev)
2513 {
2514 	cik_cp_gfx_enable(rdev, false);
2515 	radeon_ring_fini(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2516 }
2517 
2518 /**
2519  * cik_cp_gfx_resume - setup the gfx ring buffer registers
2520  *
2521  * @rdev: radeon_device pointer
2522  *
2523  * Program the location and size of the gfx ring buffer
2524  * and test it to make sure it's working.
2525  * Returns 0 for success, error for failure.
2526  */
2527 static int cik_cp_gfx_resume(struct radeon_device *rdev)
2528 {
2529 	struct radeon_ring *ring;
2530 	u32 tmp;
2531 	u32 rb_bufsz;
2532 	u64 rb_addr;
2533 	int r;
2534 
2535 	WREG32(CP_SEM_WAIT_TIMER, 0x0);
2536 	WREG32(CP_SEM_INCOMPLETE_TIMER_CNTL, 0x0);
2537 
2538 	/* Set the write pointer delay */
2539 	WREG32(CP_RB_WPTR_DELAY, 0);
2540 
2541 	/* set the RB to use vmid 0 */
2542 	WREG32(CP_RB_VMID, 0);
2543 
2544 	WREG32(SCRATCH_ADDR, ((rdev->wb.gpu_addr + RADEON_WB_SCRATCH_OFFSET) >> 8) & 0xFFFFFFFF);
2545 
2546 	/* ring 0 - compute and gfx */
2547 	/* Set ring buffer size */
2548 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
2549 	rb_bufsz = drm_order(ring->ring_size / 8);
2550 	tmp = (drm_order(RADEON_GPU_PAGE_SIZE/8) << 8) | rb_bufsz;
2551 #ifdef __BIG_ENDIAN
2552 	tmp |= BUF_SWAP_32BIT;
2553 #endif
2554 	WREG32(CP_RB0_CNTL, tmp);
2555 
2556 	/* Initialize the ring buffer's read and write pointers */
2557 	WREG32(CP_RB0_CNTL, tmp | RB_RPTR_WR_ENA);
2558 	ring->wptr = 0;
2559 	WREG32(CP_RB0_WPTR, ring->wptr);
2560 
2561 	/* set the wb address wether it's enabled or not */
2562 	WREG32(CP_RB0_RPTR_ADDR, (rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFFFFFFFC);
2563 	WREG32(CP_RB0_RPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + RADEON_WB_CP_RPTR_OFFSET) & 0xFF);
2564 
2565 	/* scratch register shadowing is no longer supported */
2566 	WREG32(SCRATCH_UMSK, 0);
2567 
2568 	if (!rdev->wb.enabled)
2569 		tmp |= RB_NO_UPDATE;
2570 
2571 	mdelay(1);
2572 	WREG32(CP_RB0_CNTL, tmp);
2573 
2574 	rb_addr = ring->gpu_addr >> 8;
2575 	WREG32(CP_RB0_BASE, rb_addr);
2576 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));
2577 
2578 	ring->rptr = RREG32(CP_RB0_RPTR);
2579 
2580 	/* start the ring */
2581 	cik_cp_gfx_start(rdev);
2582 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
2583 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX]);
2584 	if (r) {
2585 		rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = false;
2586 		return r;
2587 	}
2588 	return 0;
2589 }
2590 
2591 u32 cik_compute_ring_get_rptr(struct radeon_device *rdev,
2592 			      struct radeon_ring *ring)
2593 {
2594 	u32 rptr;
2595 
2596 
2597 
2598 	if (rdev->wb.enabled) {
2599 		rptr = le32_to_cpu(rdev->wb.wb[ring->rptr_offs/4]);
2600 	} else {
2601 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2602 		rptr = RREG32(CP_HQD_PQ_RPTR);
2603 		cik_srbm_select(rdev, 0, 0, 0, 0);
2604 	}
2605 	rptr = (rptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2606 
2607 	return rptr;
2608 }
2609 
2610 u32 cik_compute_ring_get_wptr(struct radeon_device *rdev,
2611 			      struct radeon_ring *ring)
2612 {
2613 	u32 wptr;
2614 
2615 	if (rdev->wb.enabled) {
2616 		wptr = le32_to_cpu(rdev->wb.wb[ring->wptr_offs/4]);
2617 	} else {
2618 		cik_srbm_select(rdev, ring->me, ring->pipe, ring->queue, 0);
2619 		wptr = RREG32(CP_HQD_PQ_WPTR);
2620 		cik_srbm_select(rdev, 0, 0, 0, 0);
2621 	}
2622 	wptr = (wptr & ring->ptr_reg_mask) >> ring->ptr_reg_shift;
2623 
2624 	return wptr;
2625 }
2626 
2627 void cik_compute_ring_set_wptr(struct radeon_device *rdev,
2628 			       struct radeon_ring *ring)
2629 {
2630 	u32 wptr = (ring->wptr << ring->ptr_reg_shift) & ring->ptr_reg_mask;
2631 
2632 	rdev->wb.wb[ring->wptr_offs/4] = cpu_to_le32(wptr);
2633 	WDOORBELL32(ring->doorbell_offset, wptr);
2634 }
2635 
2636 /**
2637  * cik_cp_compute_enable - enable/disable the compute CP MEs
2638  *
2639  * @rdev: radeon_device pointer
2640  * @enable: enable or disable the MEs
2641  *
2642  * Halts or unhalts the compute MEs.
2643  */
2644 static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
2645 {
2646 	if (enable)
2647 		WREG32(CP_MEC_CNTL, 0);
2648 	else
2649 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
2650 	udelay(50);
2651 }
2652 
2653 /**
2654  * cik_cp_compute_load_microcode - load the compute CP ME ucode
2655  *
2656  * @rdev: radeon_device pointer
2657  *
2658  * Loads the compute MEC1&2 ucode.
2659  * Returns 0 for success, -EINVAL if the ucode is not available.
2660  */
2661 static int cik_cp_compute_load_microcode(struct radeon_device *rdev)
2662 {
2663 	const __be32 *fw_data;
2664 	int i;
2665 
2666 	if (!rdev->mec_fw)
2667 		return -EINVAL;
2668 
2669 	cik_cp_compute_enable(rdev, false);
2670 
2671 	/* MEC1 */
2672 	fw_data = (const __be32 *)rdev->mec_fw->data;
2673 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2674 	for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2675 		WREG32(CP_MEC_ME1_UCODE_DATA, be32_to_cpup(fw_data++));
2676 	WREG32(CP_MEC_ME1_UCODE_ADDR, 0);
2677 
2678 	if (rdev->family == CHIP_KAVERI) {
2679 		/* MEC2 */
2680 		fw_data = (const __be32 *)rdev->mec_fw->data;
2681 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2682 		for (i = 0; i < CIK_MEC_UCODE_SIZE; i++)
2683 			WREG32(CP_MEC_ME2_UCODE_DATA, be32_to_cpup(fw_data++));
2684 		WREG32(CP_MEC_ME2_UCODE_ADDR, 0);
2685 	}
2686 
2687 	return 0;
2688 }
2689 
2690 /**
2691  * cik_cp_compute_start - start the compute queues
2692  *
2693  * @rdev: radeon_device pointer
2694  *
2695  * Enable the compute queues.
2696  * Returns 0 for success, error for failure.
2697  */
2698 static int cik_cp_compute_start(struct radeon_device *rdev)
2699 {
2700 	cik_cp_compute_enable(rdev, true);
2701 
2702 	return 0;
2703 }
2704 
2705 /**
2706  * cik_cp_compute_fini - stop the compute queues
2707  *
2708  * @rdev: radeon_device pointer
2709  *
2710  * Stop the compute queues and tear down the driver queue
2711  * info.
2712  */
2713 static void cik_cp_compute_fini(struct radeon_device *rdev)
2714 {
2715 	int i, idx, r;
2716 
2717 	cik_cp_compute_enable(rdev, false);
2718 
2719 	for (i = 0; i < 2; i++) {
2720 		if (i == 0)
2721 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2722 		else
2723 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2724 
2725 		if (rdev->ring[idx].mqd_obj) {
2726 			r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2727 			if (unlikely(r != 0))
2728 				dev_warn(rdev->dev, "(%d) reserve MQD bo failed\n", r);
2729 
2730 			radeon_bo_unpin(rdev->ring[idx].mqd_obj);
2731 			radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
2732 
2733 			radeon_bo_unref(&rdev->ring[idx].mqd_obj);
2734 			rdev->ring[idx].mqd_obj = NULL;
2735 		}
2736 	}
2737 }
2738 
2739 static void cik_mec_fini(struct radeon_device *rdev)
2740 {
2741 	int r;
2742 
2743 	if (rdev->mec.hpd_eop_obj) {
2744 		r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2745 		if (unlikely(r != 0))
2746 			dev_warn(rdev->dev, "(%d) reserve HPD EOP bo failed\n", r);
2747 		radeon_bo_unpin(rdev->mec.hpd_eop_obj);
2748 		radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2749 
2750 		radeon_bo_unref(&rdev->mec.hpd_eop_obj);
2751 		rdev->mec.hpd_eop_obj = NULL;
2752 	}
2753 }
2754 
2755 #define MEC_HPD_SIZE 2048
2756 
2757 static int cik_mec_init(struct radeon_device *rdev)
2758 {
2759 	int r;
2760 	u32 *hpd;
2761 
2762 	/*
2763 	 * KV:    2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total
2764 	 * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total
2765 	 */
2766 	if (rdev->family == CHIP_KAVERI)
2767 		rdev->mec.num_mec = 2;
2768 	else
2769 		rdev->mec.num_mec = 1;
2770 	rdev->mec.num_pipe = 4;
2771 	rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8;
2772 
2773 	if (rdev->mec.hpd_eop_obj == NULL) {
2774 		r = radeon_bo_create(rdev,
2775 				     rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2,
2776 				     PAGE_SIZE, true,
2777 				     RADEON_GEM_DOMAIN_GTT, NULL,
2778 				     &rdev->mec.hpd_eop_obj);
2779 		if (r) {
2780 			dev_warn(rdev->dev, "(%d) create HDP EOP bo failed\n", r);
2781 			return r;
2782 		}
2783 	}
2784 
2785 	r = radeon_bo_reserve(rdev->mec.hpd_eop_obj, false);
2786 	if (unlikely(r != 0)) {
2787 		cik_mec_fini(rdev);
2788 		return r;
2789 	}
2790 	r = radeon_bo_pin(rdev->mec.hpd_eop_obj, RADEON_GEM_DOMAIN_GTT,
2791 			  &rdev->mec.hpd_eop_gpu_addr);
2792 	if (r) {
2793 		dev_warn(rdev->dev, "(%d) pin HDP EOP bo failed\n", r);
2794 		cik_mec_fini(rdev);
2795 		return r;
2796 	}
2797 	r = radeon_bo_kmap(rdev->mec.hpd_eop_obj, (void **)&hpd);
2798 	if (r) {
2799 		dev_warn(rdev->dev, "(%d) map HDP EOP bo failed\n", r);
2800 		cik_mec_fini(rdev);
2801 		return r;
2802 	}
2803 
2804 	/* clear memory.  Not sure if this is required or not */
2805 	memset(hpd, 0, rdev->mec.num_mec *rdev->mec.num_pipe * MEC_HPD_SIZE * 2);
2806 
2807 	radeon_bo_kunmap(rdev->mec.hpd_eop_obj);
2808 	radeon_bo_unreserve(rdev->mec.hpd_eop_obj);
2809 
2810 	return 0;
2811 }
2812 
2813 struct hqd_registers
2814 {
2815 	u32 cp_mqd_base_addr;
2816 	u32 cp_mqd_base_addr_hi;
2817 	u32 cp_hqd_active;
2818 	u32 cp_hqd_vmid;
2819 	u32 cp_hqd_persistent_state;
2820 	u32 cp_hqd_pipe_priority;
2821 	u32 cp_hqd_queue_priority;
2822 	u32 cp_hqd_quantum;
2823 	u32 cp_hqd_pq_base;
2824 	u32 cp_hqd_pq_base_hi;
2825 	u32 cp_hqd_pq_rptr;
2826 	u32 cp_hqd_pq_rptr_report_addr;
2827 	u32 cp_hqd_pq_rptr_report_addr_hi;
2828 	u32 cp_hqd_pq_wptr_poll_addr;
2829 	u32 cp_hqd_pq_wptr_poll_addr_hi;
2830 	u32 cp_hqd_pq_doorbell_control;
2831 	u32 cp_hqd_pq_wptr;
2832 	u32 cp_hqd_pq_control;
2833 	u32 cp_hqd_ib_base_addr;
2834 	u32 cp_hqd_ib_base_addr_hi;
2835 	u32 cp_hqd_ib_rptr;
2836 	u32 cp_hqd_ib_control;
2837 	u32 cp_hqd_iq_timer;
2838 	u32 cp_hqd_iq_rptr;
2839 	u32 cp_hqd_dequeue_request;
2840 	u32 cp_hqd_dma_offload;
2841 	u32 cp_hqd_sema_cmd;
2842 	u32 cp_hqd_msg_type;
2843 	u32 cp_hqd_atomic0_preop_lo;
2844 	u32 cp_hqd_atomic0_preop_hi;
2845 	u32 cp_hqd_atomic1_preop_lo;
2846 	u32 cp_hqd_atomic1_preop_hi;
2847 	u32 cp_hqd_hq_scheduler0;
2848 	u32 cp_hqd_hq_scheduler1;
2849 	u32 cp_mqd_control;
2850 };
2851 
2852 struct bonaire_mqd
2853 {
2854 	u32 header;
2855 	u32 dispatch_initiator;
2856 	u32 dimensions[3];
2857 	u32 start_idx[3];
2858 	u32 num_threads[3];
2859 	u32 pipeline_stat_enable;
2860 	u32 perf_counter_enable;
2861 	u32 pgm[2];
2862 	u32 tba[2];
2863 	u32 tma[2];
2864 	u32 pgm_rsrc[2];
2865 	u32 vmid;
2866 	u32 resource_limits;
2867 	u32 static_thread_mgmt01[2];
2868 	u32 tmp_ring_size;
2869 	u32 static_thread_mgmt23[2];
2870 	u32 restart[3];
2871 	u32 thread_trace_enable;
2872 	u32 reserved1;
2873 	u32 user_data[16];
2874 	u32 vgtcs_invoke_count[2];
2875 	struct hqd_registers queue_state;
2876 	u32 dequeue_cntr;
2877 	u32 interrupt_queue[64];
2878 };
2879 
2880 /**
2881  * cik_cp_compute_resume - setup the compute queue registers
2882  *
2883  * @rdev: radeon_device pointer
2884  *
2885  * Program the compute queues and test them to make sure they
2886  * are working.
2887  * Returns 0 for success, error for failure.
2888  */
2889 static int cik_cp_compute_resume(struct radeon_device *rdev)
2890 {
2891 	int r, i, idx;
2892 	u32 tmp;
2893 	bool use_doorbell = true;
2894 	u64 hqd_gpu_addr;
2895 	u64 mqd_gpu_addr;
2896 	u64 eop_gpu_addr;
2897 	u64 wb_gpu_addr;
2898 	u32 *buf;
2899 	struct bonaire_mqd *mqd;
2900 
2901 	r = cik_cp_compute_start(rdev);
2902 	if (r)
2903 		return r;
2904 
2905 	/* fix up chicken bits */
2906 	tmp = RREG32(CP_CPF_DEBUG);
2907 	tmp |= (1 << 23);
2908 	WREG32(CP_CPF_DEBUG, tmp);
2909 
2910 	/* init the pipes */
2911 	for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) {
2912 		int me = (i < 4) ? 1 : 2;
2913 		int pipe = (i < 4) ? i : (i - 4);
2914 
2915 		eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2);
2916 
2917 		cik_srbm_select(rdev, me, pipe, 0, 0);
2918 
2919 		/* write the EOP addr */
2920 		WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8);
2921 		WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8);
2922 
2923 		/* set the VMID assigned */
2924 		WREG32(CP_HPD_EOP_VMID, 0);
2925 
2926 		/* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */
2927 		tmp = RREG32(CP_HPD_EOP_CONTROL);
2928 		tmp &= ~EOP_SIZE_MASK;
2929 		tmp |= drm_order(MEC_HPD_SIZE / 8);
2930 		WREG32(CP_HPD_EOP_CONTROL, tmp);
2931 	}
2932 	cik_srbm_select(rdev, 0, 0, 0, 0);
2933 
2934 	/* init the queues.  Just two for now. */
2935 	for (i = 0; i < 2; i++) {
2936 		if (i == 0)
2937 			idx = CAYMAN_RING_TYPE_CP1_INDEX;
2938 		else
2939 			idx = CAYMAN_RING_TYPE_CP2_INDEX;
2940 
2941 		if (rdev->ring[idx].mqd_obj == NULL) {
2942 			r = radeon_bo_create(rdev,
2943 					     sizeof(struct bonaire_mqd),
2944 					     PAGE_SIZE, true,
2945 					     RADEON_GEM_DOMAIN_GTT, NULL,
2946 					     &rdev->ring[idx].mqd_obj);
2947 			if (r) {
2948 				dev_warn(rdev->dev, "(%d) create MQD bo failed\n", r);
2949 				return r;
2950 			}
2951 		}
2952 
2953 		r = radeon_bo_reserve(rdev->ring[idx].mqd_obj, false);
2954 		if (unlikely(r != 0)) {
2955 			cik_cp_compute_fini(rdev);
2956 			return r;
2957 		}
2958 		r = radeon_bo_pin(rdev->ring[idx].mqd_obj, RADEON_GEM_DOMAIN_GTT,
2959 				  &mqd_gpu_addr);
2960 		if (r) {
2961 			dev_warn(rdev->dev, "(%d) pin MQD bo failed\n", r);
2962 			cik_cp_compute_fini(rdev);
2963 			return r;
2964 		}
2965 		r = radeon_bo_kmap(rdev->ring[idx].mqd_obj, (void **)&buf);
2966 		if (r) {
2967 			dev_warn(rdev->dev, "(%d) map MQD bo failed\n", r);
2968 			cik_cp_compute_fini(rdev);
2969 			return r;
2970 		}
2971 
2972 		/* doorbell offset */
2973 		rdev->ring[idx].doorbell_offset =
2974 			(rdev->ring[idx].doorbell_page_num * PAGE_SIZE) + 0;
2975 
2976 		/* init the mqd struct */
2977 		memset(buf, 0, sizeof(struct bonaire_mqd));
2978 
2979 		mqd = (struct bonaire_mqd *)buf;
2980 		mqd->header = 0xC0310800;
2981 		mqd->static_thread_mgmt01[0] = 0xffffffff;
2982 		mqd->static_thread_mgmt01[1] = 0xffffffff;
2983 		mqd->static_thread_mgmt23[0] = 0xffffffff;
2984 		mqd->static_thread_mgmt23[1] = 0xffffffff;
2985 
2986 		cik_srbm_select(rdev, rdev->ring[idx].me,
2987 				rdev->ring[idx].pipe,
2988 				rdev->ring[idx].queue, 0);
2989 
2990 		/* disable wptr polling */
2991 		tmp = RREG32(CP_PQ_WPTR_POLL_CNTL);
2992 		tmp &= ~WPTR_POLL_EN;
2993 		WREG32(CP_PQ_WPTR_POLL_CNTL, tmp);
2994 
2995 		/* enable doorbell? */
2996 		mqd->queue_state.cp_hqd_pq_doorbell_control =
2997 			RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
2998 		if (use_doorbell)
2999 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3000 		else
3001 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_EN;
3002 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3003 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3004 
3005 		/* disable the queue if it's active */
3006 		mqd->queue_state.cp_hqd_dequeue_request = 0;
3007 		mqd->queue_state.cp_hqd_pq_rptr = 0;
3008 		mqd->queue_state.cp_hqd_pq_wptr= 0;
3009 		if (RREG32(CP_HQD_ACTIVE) & 1) {
3010 			WREG32(CP_HQD_DEQUEUE_REQUEST, 1);
3011 			for (i = 0; i < rdev->usec_timeout; i++) {
3012 				if (!(RREG32(CP_HQD_ACTIVE) & 1))
3013 					break;
3014 				udelay(1);
3015 			}
3016 			WREG32(CP_HQD_DEQUEUE_REQUEST, mqd->queue_state.cp_hqd_dequeue_request);
3017 			WREG32(CP_HQD_PQ_RPTR, mqd->queue_state.cp_hqd_pq_rptr);
3018 			WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3019 		}
3020 
3021 		/* set the pointer to the MQD */
3022 		mqd->queue_state.cp_mqd_base_addr = mqd_gpu_addr & 0xfffffffc;
3023 		mqd->queue_state.cp_mqd_base_addr_hi = upper_32_bits(mqd_gpu_addr);
3024 		WREG32(CP_MQD_BASE_ADDR, mqd->queue_state.cp_mqd_base_addr);
3025 		WREG32(CP_MQD_BASE_ADDR_HI, mqd->queue_state.cp_mqd_base_addr_hi);
3026 		/* set MQD vmid to 0 */
3027 		mqd->queue_state.cp_mqd_control = RREG32(CP_MQD_CONTROL);
3028 		mqd->queue_state.cp_mqd_control &= ~MQD_VMID_MASK;
3029 		WREG32(CP_MQD_CONTROL, mqd->queue_state.cp_mqd_control);
3030 
3031 		/* set the pointer to the HQD, this is similar CP_RB0_BASE/_HI */
3032 		hqd_gpu_addr = rdev->ring[idx].gpu_addr >> 8;
3033 		mqd->queue_state.cp_hqd_pq_base = hqd_gpu_addr;
3034 		mqd->queue_state.cp_hqd_pq_base_hi = upper_32_bits(hqd_gpu_addr);
3035 		WREG32(CP_HQD_PQ_BASE, mqd->queue_state.cp_hqd_pq_base);
3036 		WREG32(CP_HQD_PQ_BASE_HI, mqd->queue_state.cp_hqd_pq_base_hi);
3037 
3038 		/* set up the HQD, this is similar to CP_RB0_CNTL */
3039 		mqd->queue_state.cp_hqd_pq_control = RREG32(CP_HQD_PQ_CONTROL);
3040 		mqd->queue_state.cp_hqd_pq_control &=
3041 			~(QUEUE_SIZE_MASK | RPTR_BLOCK_SIZE_MASK);
3042 
3043 		mqd->queue_state.cp_hqd_pq_control |=
3044 			drm_order(rdev->ring[idx].ring_size / 8);
3045 		mqd->queue_state.cp_hqd_pq_control |=
3046 			(drm_order(RADEON_GPU_PAGE_SIZE/8) << 8);
3047 #ifdef __BIG_ENDIAN
3048 		mqd->queue_state.cp_hqd_pq_control |= BUF_SWAP_32BIT;
3049 #endif
3050 		mqd->queue_state.cp_hqd_pq_control &=
3051 			~(UNORD_DISPATCH | ROQ_PQ_IB_FLIP | PQ_VOLATILE);
3052 		mqd->queue_state.cp_hqd_pq_control |=
3053 			PRIV_STATE | KMD_QUEUE; /* assuming kernel queue control */
3054 		WREG32(CP_HQD_PQ_CONTROL, mqd->queue_state.cp_hqd_pq_control);
3055 
3056 		/* only used if CP_PQ_WPTR_POLL_CNTL.WPTR_POLL_EN=1 */
3057 		if (i == 0)
3058 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP1_WPTR_OFFSET;
3059 		else
3060 			wb_gpu_addr = rdev->wb.gpu_addr + CIK_WB_CP2_WPTR_OFFSET;
3061 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr = wb_gpu_addr & 0xfffffffc;
3062 		mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi = upper_32_bits(wb_gpu_addr) & 0xffff;
3063 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR, mqd->queue_state.cp_hqd_pq_wptr_poll_addr);
3064 		WREG32(CP_HQD_PQ_WPTR_POLL_ADDR_HI,
3065 		       mqd->queue_state.cp_hqd_pq_wptr_poll_addr_hi);
3066 
3067 		/* set the wb address wether it's enabled or not */
3068 		if (i == 0)
3069 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP1_RPTR_OFFSET;
3070 		else
3071 			wb_gpu_addr = rdev->wb.gpu_addr + RADEON_WB_CP2_RPTR_OFFSET;
3072 		mqd->queue_state.cp_hqd_pq_rptr_report_addr = wb_gpu_addr & 0xfffffffc;
3073 		mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi =
3074 			upper_32_bits(wb_gpu_addr) & 0xffff;
3075 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR,
3076 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr);
3077 		WREG32(CP_HQD_PQ_RPTR_REPORT_ADDR_HI,
3078 		       mqd->queue_state.cp_hqd_pq_rptr_report_addr_hi);
3079 
3080 		/* enable the doorbell if requested */
3081 		if (use_doorbell) {
3082 			mqd->queue_state.cp_hqd_pq_doorbell_control =
3083 				RREG32(CP_HQD_PQ_DOORBELL_CONTROL);
3084 			mqd->queue_state.cp_hqd_pq_doorbell_control &= ~DOORBELL_OFFSET_MASK;
3085 			mqd->queue_state.cp_hqd_pq_doorbell_control |=
3086 				DOORBELL_OFFSET(rdev->ring[idx].doorbell_offset / 4);
3087 			mqd->queue_state.cp_hqd_pq_doorbell_control |= DOORBELL_EN;
3088 			mqd->queue_state.cp_hqd_pq_doorbell_control &=
3089 				~(DOORBELL_SOURCE | DOORBELL_HIT);
3090 
3091 		} else {
3092 			mqd->queue_state.cp_hqd_pq_doorbell_control = 0;
3093 		}
3094 		WREG32(CP_HQD_PQ_DOORBELL_CONTROL,
3095 		       mqd->queue_state.cp_hqd_pq_doorbell_control);
3096 
3097 		/* read and write pointers, similar to CP_RB0_WPTR/_RPTR */
3098 		rdev->ring[idx].wptr = 0;
3099 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
3100 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
3101 		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
3102 		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
3103 
3104 		/* set the vmid for the queue */
3105 		mqd->queue_state.cp_hqd_vmid = 0;
3106 		WREG32(CP_HQD_VMID, mqd->queue_state.cp_hqd_vmid);
3107 
3108 		/* activate the queue */
3109 		mqd->queue_state.cp_hqd_active = 1;
3110 		WREG32(CP_HQD_ACTIVE, mqd->queue_state.cp_hqd_active);
3111 
3112 		cik_srbm_select(rdev, 0, 0, 0, 0);
3113 
3114 		radeon_bo_kunmap(rdev->ring[idx].mqd_obj);
3115 		radeon_bo_unreserve(rdev->ring[idx].mqd_obj);
3116 
3117 		rdev->ring[idx].ready = true;
3118 		r = radeon_ring_test(rdev, idx, &rdev->ring[idx]);
3119 		if (r)
3120 			rdev->ring[idx].ready = false;
3121 	}
3122 
3123 	return 0;
3124 }
3125 
3126 static void cik_cp_enable(struct radeon_device *rdev, bool enable)
3127 {
3128 	cik_cp_gfx_enable(rdev, enable);
3129 	cik_cp_compute_enable(rdev, enable);
3130 }
3131 
3132 static int cik_cp_load_microcode(struct radeon_device *rdev)
3133 {
3134 	int r;
3135 
3136 	r = cik_cp_gfx_load_microcode(rdev);
3137 	if (r)
3138 		return r;
3139 	r = cik_cp_compute_load_microcode(rdev);
3140 	if (r)
3141 		return r;
3142 
3143 	return 0;
3144 }
3145 
3146 static void cik_cp_fini(struct radeon_device *rdev)
3147 {
3148 	cik_cp_gfx_fini(rdev);
3149 	cik_cp_compute_fini(rdev);
3150 }
3151 
3152 static int cik_cp_resume(struct radeon_device *rdev)
3153 {
3154 	int r;
3155 
3156 	/* Reset all cp blocks */
3157 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_CP);
3158 	RREG32(GRBM_SOFT_RESET);
3159 	mdelay(15);
3160 	WREG32(GRBM_SOFT_RESET, 0);
3161 	RREG32(GRBM_SOFT_RESET);
3162 
3163 	r = cik_cp_load_microcode(rdev);
3164 	if (r)
3165 		return r;
3166 
3167 	r = cik_cp_gfx_resume(rdev);
3168 	if (r)
3169 		return r;
3170 	r = cik_cp_compute_resume(rdev);
3171 	if (r)
3172 		return r;
3173 
3174 	return 0;
3175 }
3176 
3177 /*
3178  * sDMA - System DMA
3179  * Starting with CIK, the GPU has new asynchronous
3180  * DMA engines.  These engines are used for compute
3181  * and gfx.  There are two DMA engines (SDMA0, SDMA1)
3182  * and each one supports 1 ring buffer used for gfx
3183  * and 2 queues used for compute.
3184  *
3185  * The programming model is very similar to the CP
3186  * (ring buffer, IBs, etc.), but sDMA has it's own
3187  * packet format that is different from the PM4 format
3188  * used by the CP. sDMA supports copying data, writing
3189  * embedded data, solid fills, and a number of other
3190  * things.  It also has support for tiling/detiling of
3191  * buffers.
3192  */
3193 /**
3194  * cik_sdma_ring_ib_execute - Schedule an IB on the DMA engine
3195  *
3196  * @rdev: radeon_device pointer
3197  * @ib: IB object to schedule
3198  *
3199  * Schedule an IB in the DMA ring (CIK).
3200  */
3201 void cik_sdma_ring_ib_execute(struct radeon_device *rdev,
3202 			      struct radeon_ib *ib)
3203 {
3204 	struct radeon_ring *ring = &rdev->ring[ib->ring];
3205 	u32 extra_bits = (ib->vm ? ib->vm->id : 0) & 0xf;
3206 
3207 	if (rdev->wb.enabled) {
3208 		u32 next_rptr = ring->wptr + 5;
3209 		while ((next_rptr & 7) != 4)
3210 			next_rptr++;
3211 		next_rptr += 4;
3212 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3213 		radeon_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
3214 		radeon_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
3215 		radeon_ring_write(ring, 1); /* number of DWs to follow */
3216 		radeon_ring_write(ring, next_rptr);
3217 	}
3218 
3219 	/* IB packet must end on a 8 DW boundary */
3220 	while ((ring->wptr & 7) != 4)
3221 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
3222 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits));
3223 	radeon_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */
3224 	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff);
3225 	radeon_ring_write(ring, ib->length_dw);
3226 
3227 }
3228 
3229 /**
3230  * cik_sdma_fence_ring_emit - emit a fence on the DMA ring
3231  *
3232  * @rdev: radeon_device pointer
3233  * @fence: radeon fence object
3234  *
3235  * Add a DMA fence packet to the ring to write
3236  * the fence seq number and DMA trap packet to generate
3237  * an interrupt if needed (CIK).
3238  */
3239 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
3240 			      struct radeon_fence *fence)
3241 {
3242 	struct radeon_ring *ring = &rdev->ring[fence->ring];
3243 	u64 addr = rdev->fence_drv[fence->ring].gpu_addr;
3244 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
3245 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
3246 	u32 ref_and_mask;
3247 
3248 	if (fence->ring == R600_RING_TYPE_DMA_INDEX)
3249 		ref_and_mask = SDMA0;
3250 	else
3251 		ref_and_mask = SDMA1;
3252 
3253 	/* write the fence */
3254 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_FENCE, 0, 0));
3255 	radeon_ring_write(ring, addr & 0xffffffff);
3256 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3257 	radeon_ring_write(ring, fence->seq);
3258 	/* generate an interrupt */
3259 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
3260 	/* flush HDP */
3261 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
3262 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
3263 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
3264 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
3265 	radeon_ring_write(ring, ref_and_mask); /* MASK */
3266 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
3267 }
3268 
3269 /**
3270  * cik_sdma_semaphore_ring_emit - emit a semaphore on the dma ring
3271  *
3272  * @rdev: radeon_device pointer
3273  * @ring: radeon_ring structure holding ring information
3274  * @semaphore: radeon semaphore object
3275  * @emit_wait: wait or signal semaphore
3276  *
3277  * Add a DMA semaphore packet to the ring wait on or signal
3278  * other rings (CIK).
3279  */
3280 void cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
3281 				  struct radeon_ring *ring,
3282 				  struct radeon_semaphore *semaphore,
3283 				  bool emit_wait)
3284 {
3285 	u64 addr = semaphore->gpu_addr;
3286 	u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
3287 
3288 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
3289 	radeon_ring_write(ring, addr & 0xfffffff8);
3290 	radeon_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
3291 }
3292 
3293 /**
3294  * cik_sdma_gfx_stop - stop the gfx async dma engines
3295  *
3296  * @rdev: radeon_device pointer
3297  *
3298  * Stop the gfx async dma ring buffers (CIK).
3299  */
3300 static void cik_sdma_gfx_stop(struct radeon_device *rdev)
3301 {
3302 	u32 rb_cntl, reg_offset;
3303 	int i;
3304 
3305 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.visible_vram_size);
3306 
3307 	for (i = 0; i < 2; i++) {
3308 		if (i == 0)
3309 			reg_offset = SDMA0_REGISTER_OFFSET;
3310 		else
3311 			reg_offset = SDMA1_REGISTER_OFFSET;
3312 		rb_cntl = RREG32(SDMA0_GFX_RB_CNTL + reg_offset);
3313 		rb_cntl &= ~SDMA_RB_ENABLE;
3314 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3315 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
3316 	}
3317 }
3318 
3319 /**
3320  * cik_sdma_rlc_stop - stop the compute async dma engines
3321  *
3322  * @rdev: radeon_device pointer
3323  *
3324  * Stop the compute async dma queues (CIK).
3325  */
3326 static void cik_sdma_rlc_stop(struct radeon_device *rdev)
3327 {
3328 	/* XXX todo */
3329 }
3330 
3331 /**
3332  * cik_sdma_enable - stop the async dma engines
3333  *
3334  * @rdev: radeon_device pointer
3335  * @enable: enable/disable the DMA MEs.
3336  *
3337  * Halt or unhalt the async dma engines (CIK).
3338  */
3339 static void cik_sdma_enable(struct radeon_device *rdev, bool enable)
3340 {
3341 	u32 me_cntl, reg_offset;
3342 	int i;
3343 
3344 	for (i = 0; i < 2; i++) {
3345 		if (i == 0)
3346 			reg_offset = SDMA0_REGISTER_OFFSET;
3347 		else
3348 			reg_offset = SDMA1_REGISTER_OFFSET;
3349 		me_cntl = RREG32(SDMA0_ME_CNTL + reg_offset);
3350 		if (enable)
3351 			me_cntl &= ~SDMA_HALT;
3352 		else
3353 			me_cntl |= SDMA_HALT;
3354 		WREG32(SDMA0_ME_CNTL + reg_offset, me_cntl);
3355 	}
3356 }
3357 
3358 /**
3359  * cik_sdma_gfx_resume - setup and start the async dma engines
3360  *
3361  * @rdev: radeon_device pointer
3362  *
3363  * Set up the gfx DMA ring buffers and enable them (CIK).
3364  * Returns 0 for success, error for failure.
3365  */
3366 static int cik_sdma_gfx_resume(struct radeon_device *rdev)
3367 {
3368 	struct radeon_ring *ring;
3369 	u32 rb_cntl, ib_cntl;
3370 	u32 rb_bufsz;
3371 	u32 reg_offset, wb_offset;
3372 	int i, r;
3373 
3374 	for (i = 0; i < 2; i++) {
3375 		if (i == 0) {
3376 			ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
3377 			reg_offset = SDMA0_REGISTER_OFFSET;
3378 			wb_offset = R600_WB_DMA_RPTR_OFFSET;
3379 		} else {
3380 			ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
3381 			reg_offset = SDMA1_REGISTER_OFFSET;
3382 			wb_offset = CAYMAN_WB_DMA1_RPTR_OFFSET;
3383 		}
3384 
3385 		WREG32(SDMA0_SEM_INCOMPLETE_TIMER_CNTL + reg_offset, 0);
3386 		WREG32(SDMA0_SEM_WAIT_FAIL_TIMER_CNTL + reg_offset, 0);
3387 
3388 		/* Set ring buffer size in dwords */
3389 		rb_bufsz = drm_order(ring->ring_size / 4);
3390 		rb_cntl = rb_bufsz << 1;
3391 #ifdef __BIG_ENDIAN
3392 		rb_cntl |= SDMA_RB_SWAP_ENABLE | SDMA_RPTR_WRITEBACK_SWAP_ENABLE;
3393 #endif
3394 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
3395 
3396 		/* Initialize the ring buffer's read and write pointers */
3397 		WREG32(SDMA0_GFX_RB_RPTR + reg_offset, 0);
3398 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, 0);
3399 
3400 		/* set the wb address whether it's enabled or not */
3401 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_HI + reg_offset,
3402 		       upper_32_bits(rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF);
3403 		WREG32(SDMA0_GFX_RB_RPTR_ADDR_LO + reg_offset,
3404 		       ((rdev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC));
3405 
3406 		if (rdev->wb.enabled)
3407 			rb_cntl |= SDMA_RPTR_WRITEBACK_ENABLE;
3408 
3409 		WREG32(SDMA0_GFX_RB_BASE + reg_offset, ring->gpu_addr >> 8);
3410 		WREG32(SDMA0_GFX_RB_BASE_HI + reg_offset, ring->gpu_addr >> 40);
3411 
3412 		ring->wptr = 0;
3413 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);
3414 
3415 		ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
3416 
3417 		/* enable DMA RB */
3418 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);
3419 
3420 		ib_cntl = SDMA_IB_ENABLE;
3421 #ifdef __BIG_ENDIAN
3422 		ib_cntl |= SDMA_IB_SWAP_ENABLE;
3423 #endif
3424 		/* enable DMA IBs */
3425 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, ib_cntl);
3426 
3427 		ring->ready = true;
3428 
3429 		r = radeon_ring_test(rdev, ring->idx, ring);
3430 		if (r) {
3431 			ring->ready = false;
3432 			return r;
3433 		}
3434 	}
3435 
3436 	radeon_ttm_set_active_vram_size(rdev, rdev->mc.real_vram_size);
3437 
3438 	return 0;
3439 }
3440 
3441 /**
3442  * cik_sdma_rlc_resume - setup and start the async dma engines
3443  *
3444  * @rdev: radeon_device pointer
3445  *
3446  * Set up the compute DMA queues and enable them (CIK).
3447  * Returns 0 for success, error for failure.
3448  */
3449 static int cik_sdma_rlc_resume(struct radeon_device *rdev)
3450 {
3451 	/* XXX todo */
3452 	return 0;
3453 }
3454 
3455 /**
3456  * cik_sdma_load_microcode - load the sDMA ME ucode
3457  *
3458  * @rdev: radeon_device pointer
3459  *
3460  * Loads the sDMA0/1 ucode.
3461  * Returns 0 for success, -EINVAL if the ucode is not available.
3462  */
3463 static int cik_sdma_load_microcode(struct radeon_device *rdev)
3464 {
3465 	const __be32 *fw_data;
3466 	int i;
3467 
3468 	if (!rdev->sdma_fw)
3469 		return -EINVAL;
3470 
3471 	/* stop the gfx rings and rlc compute queues */
3472 	cik_sdma_gfx_stop(rdev);
3473 	cik_sdma_rlc_stop(rdev);
3474 
3475 	/* halt the MEs */
3476 	cik_sdma_enable(rdev, false);
3477 
3478 	/* sdma0 */
3479 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3480 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3481 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3482 		WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3483 	WREG32(SDMA0_UCODE_DATA + SDMA0_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3484 
3485 	/* sdma1 */
3486 	fw_data = (const __be32 *)rdev->sdma_fw->data;
3487 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3488 	for (i = 0; i < CIK_SDMA_UCODE_SIZE; i++)
3489 		WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, be32_to_cpup(fw_data++));
3490 	WREG32(SDMA0_UCODE_DATA + SDMA1_REGISTER_OFFSET, CIK_SDMA_UCODE_VERSION);
3491 
3492 	WREG32(SDMA0_UCODE_ADDR + SDMA0_REGISTER_OFFSET, 0);
3493 	WREG32(SDMA0_UCODE_ADDR + SDMA1_REGISTER_OFFSET, 0);
3494 	return 0;
3495 }
3496 
3497 /**
3498  * cik_sdma_resume - setup and start the async dma engines
3499  *
3500  * @rdev: radeon_device pointer
3501  *
3502  * Set up the DMA engines and enable them (CIK).
3503  * Returns 0 for success, error for failure.
3504  */
3505 static int cik_sdma_resume(struct radeon_device *rdev)
3506 {
3507 	int r;
3508 
3509 	/* Reset dma */
3510 	WREG32(SRBM_SOFT_RESET, SOFT_RESET_SDMA | SOFT_RESET_SDMA1);
3511 	RREG32(SRBM_SOFT_RESET);
3512 	udelay(50);
3513 	WREG32(SRBM_SOFT_RESET, 0);
3514 	RREG32(SRBM_SOFT_RESET);
3515 
3516 	r = cik_sdma_load_microcode(rdev);
3517 	if (r)
3518 		return r;
3519 
3520 	/* unhalt the MEs */
3521 	cik_sdma_enable(rdev, true);
3522 
3523 	/* start the gfx rings and rlc compute queues */
3524 	r = cik_sdma_gfx_resume(rdev);
3525 	if (r)
3526 		return r;
3527 	r = cik_sdma_rlc_resume(rdev);
3528 	if (r)
3529 		return r;
3530 
3531 	return 0;
3532 }
3533 
3534 /**
3535  * cik_sdma_fini - tear down the async dma engines
3536  *
3537  * @rdev: radeon_device pointer
3538  *
3539  * Stop the async dma engines and free the rings (CIK).
3540  */
3541 static void cik_sdma_fini(struct radeon_device *rdev)
3542 {
3543 	/* stop the gfx rings and rlc compute queues */
3544 	cik_sdma_gfx_stop(rdev);
3545 	cik_sdma_rlc_stop(rdev);
3546 	/* halt the MEs */
3547 	cik_sdma_enable(rdev, false);
3548 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
3549 	radeon_ring_fini(rdev, &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX]);
3550 	/* XXX - compute dma queue tear down */
3551 }
3552 
3553 /**
3554  * cik_copy_dma - copy pages using the DMA engine
3555  *
3556  * @rdev: radeon_device pointer
3557  * @src_offset: src GPU address
3558  * @dst_offset: dst GPU address
3559  * @num_gpu_pages: number of GPU pages to xfer
3560  * @fence: radeon fence object
3561  *
3562  * Copy GPU paging using the DMA engine (CIK).
3563  * Used by the radeon ttm implementation to move pages if
3564  * registered as the asic copy callback.
3565  */
3566 int cik_copy_dma(struct radeon_device *rdev,
3567 		 uint64_t src_offset, uint64_t dst_offset,
3568 		 unsigned num_gpu_pages,
3569 		 struct radeon_fence **fence)
3570 {
3571 	struct radeon_semaphore *sem = NULL;
3572 	int ring_index = rdev->asic->copy.dma_ring_index;
3573 	struct radeon_ring *ring = &rdev->ring[ring_index];
3574 	u32 size_in_bytes, cur_size_in_bytes;
3575 	int i, num_loops;
3576 	int r = 0;
3577 
3578 	r = radeon_semaphore_create(rdev, &sem);
3579 	if (r) {
3580 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3581 		return r;
3582 	}
3583 
3584 	size_in_bytes = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT);
3585 	num_loops = DIV_ROUND_UP(size_in_bytes, 0x1fffff);
3586 	r = radeon_ring_lock(rdev, ring, num_loops * 7 + 14);
3587 	if (r) {
3588 		DRM_ERROR("radeon: moving bo (%d).\n", r);
3589 		radeon_semaphore_free(rdev, &sem, NULL);
3590 		return r;
3591 	}
3592 
3593 	if (radeon_fence_need_sync(*fence, ring->idx)) {
3594 		radeon_semaphore_sync_rings(rdev, sem, (*fence)->ring,
3595 					    ring->idx);
3596 		radeon_fence_note_sync(*fence, ring->idx);
3597 	} else {
3598 		radeon_semaphore_free(rdev, &sem, NULL);
3599 	}
3600 
3601 	for (i = 0; i < num_loops; i++) {
3602 		cur_size_in_bytes = size_in_bytes;
3603 		if (cur_size_in_bytes > 0x1fffff)
3604 			cur_size_in_bytes = 0x1fffff;
3605 		size_in_bytes -= cur_size_in_bytes;
3606 		radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0));
3607 		radeon_ring_write(ring, cur_size_in_bytes);
3608 		radeon_ring_write(ring, 0); /* src/dst endian swap */
3609 		radeon_ring_write(ring, src_offset & 0xffffffff);
3610 		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xffffffff);
3611 		radeon_ring_write(ring, dst_offset & 0xfffffffc);
3612 		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xffffffff);
3613 		src_offset += cur_size_in_bytes;
3614 		dst_offset += cur_size_in_bytes;
3615 	}
3616 
3617 	r = radeon_fence_emit(rdev, fence, ring->idx);
3618 	if (r) {
3619 		radeon_ring_unlock_undo(rdev, ring);
3620 		return r;
3621 	}
3622 
3623 	radeon_ring_unlock_commit(rdev, ring);
3624 	radeon_semaphore_free(rdev, &sem, *fence);
3625 
3626 	return r;
3627 }
3628 
3629 /**
3630  * cik_sdma_ring_test - simple async dma engine test
3631  *
3632  * @rdev: radeon_device pointer
3633  * @ring: radeon_ring structure holding ring information
3634  *
3635  * Test the DMA engine by writing using it to write an
3636  * value to memory. (CIK).
3637  * Returns 0 for success, error for failure.
3638  */
3639 int cik_sdma_ring_test(struct radeon_device *rdev,
3640 		       struct radeon_ring *ring)
3641 {
3642 	unsigned i;
3643 	int r;
3644 	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3645 	u32 tmp;
3646 
3647 	if (!ptr) {
3648 		DRM_ERROR("invalid vram scratch pointer\n");
3649 		return -EINVAL;
3650 	}
3651 
3652 	tmp = 0xCAFEDEAD;
3653 	writel(tmp, ptr);
3654 
3655 	r = radeon_ring_lock(rdev, ring, 4);
3656 	if (r) {
3657 		DRM_ERROR("radeon: dma failed to lock ring %d (%d).\n", ring->idx, r);
3658 		return r;
3659 	}
3660 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
3661 	radeon_ring_write(ring, rdev->vram_scratch.gpu_addr & 0xfffffffc);
3662 	radeon_ring_write(ring, upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff);
3663 	radeon_ring_write(ring, 1); /* number of DWs to follow */
3664 	radeon_ring_write(ring, 0xDEADBEEF);
3665 	radeon_ring_unlock_commit(rdev, ring);
3666 
3667 	for (i = 0; i < rdev->usec_timeout; i++) {
3668 		tmp = readl(ptr);
3669 		if (tmp == 0xDEADBEEF)
3670 			break;
3671 		DRM_UDELAY(1);
3672 	}
3673 
3674 	if (i < rdev->usec_timeout) {
3675 		DRM_INFO("ring test on %d succeeded in %d usecs\n", ring->idx, i);
3676 	} else {
3677 		DRM_ERROR("radeon: ring %d test failed (0x%08X)\n",
3678 			  ring->idx, tmp);
3679 		r = -EINVAL;
3680 	}
3681 	return r;
3682 }
3683 
3684 /**
3685  * cik_sdma_ib_test - test an IB on the DMA engine
3686  *
3687  * @rdev: radeon_device pointer
3688  * @ring: radeon_ring structure holding ring information
3689  *
3690  * Test a simple IB in the DMA ring (CIK).
3691  * Returns 0 on success, error on failure.
3692  */
3693 int cik_sdma_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
3694 {
3695 	struct radeon_ib ib;
3696 	unsigned i;
3697 	int r;
3698 	void __iomem *ptr = (void *)rdev->vram_scratch.ptr;
3699 	u32 tmp = 0;
3700 
3701 	if (!ptr) {
3702 		DRM_ERROR("invalid vram scratch pointer\n");
3703 		return -EINVAL;
3704 	}
3705 
3706 	tmp = 0xCAFEDEAD;
3707 	writel(tmp, ptr);
3708 
3709 	r = radeon_ib_get(rdev, ring->idx, &ib, NULL, 256);
3710 	if (r) {
3711 		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
3712 		return r;
3713 	}
3714 
3715 	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
3716 	ib.ptr[1] = rdev->vram_scratch.gpu_addr & 0xfffffffc;
3717 	ib.ptr[2] = upper_32_bits(rdev->vram_scratch.gpu_addr) & 0xffffffff;
3718 	ib.ptr[3] = 1;
3719 	ib.ptr[4] = 0xDEADBEEF;
3720 	ib.length_dw = 5;
3721 
3722 	r = radeon_ib_schedule(rdev, &ib, NULL);
3723 	if (r) {
3724 		radeon_ib_free(rdev, &ib);
3725 		DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
3726 		return r;
3727 	}
3728 	r = radeon_fence_wait(ib.fence, false);
3729 	if (r) {
3730 		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
3731 		return r;
3732 	}
3733 	for (i = 0; i < rdev->usec_timeout; i++) {
3734 		tmp = readl(ptr);
3735 		if (tmp == 0xDEADBEEF)
3736 			break;
3737 		DRM_UDELAY(1);
3738 	}
3739 	if (i < rdev->usec_timeout) {
3740 		DRM_INFO("ib test on ring %d succeeded in %u usecs\n", ib.fence->ring, i);
3741 	} else {
3742 		DRM_ERROR("radeon: ib test failed (0x%08X)\n", tmp);
3743 		r = -EINVAL;
3744 	}
3745 	radeon_ib_free(rdev, &ib);
3746 	return r;
3747 }
3748 
3749 
3750 static void cik_print_gpu_status_regs(struct radeon_device *rdev)
3751 {
3752 	dev_info(rdev->dev, "  GRBM_STATUS=0x%08X\n",
3753 		RREG32(GRBM_STATUS));
3754 	dev_info(rdev->dev, "  GRBM_STATUS2=0x%08X\n",
3755 		RREG32(GRBM_STATUS2));
3756 	dev_info(rdev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
3757 		RREG32(GRBM_STATUS_SE0));
3758 	dev_info(rdev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
3759 		RREG32(GRBM_STATUS_SE1));
3760 	dev_info(rdev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
3761 		RREG32(GRBM_STATUS_SE2));
3762 	dev_info(rdev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
3763 		RREG32(GRBM_STATUS_SE3));
3764 	dev_info(rdev->dev, "  SRBM_STATUS=0x%08X\n",
3765 		RREG32(SRBM_STATUS));
3766 	dev_info(rdev->dev, "  SRBM_STATUS2=0x%08X\n",
3767 		RREG32(SRBM_STATUS2));
3768 	dev_info(rdev->dev, "  SDMA0_STATUS_REG   = 0x%08X\n",
3769 		RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
3770 	dev_info(rdev->dev, "  SDMA1_STATUS_REG   = 0x%08X\n",
3771 		 RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
3772 	dev_info(rdev->dev, "  CP_STAT = 0x%08x\n", RREG32(CP_STAT));
3773 	dev_info(rdev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
3774 		 RREG32(CP_STALLED_STAT1));
3775 	dev_info(rdev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
3776 		 RREG32(CP_STALLED_STAT2));
3777 	dev_info(rdev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
3778 		 RREG32(CP_STALLED_STAT3));
3779 	dev_info(rdev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
3780 		 RREG32(CP_CPF_BUSY_STAT));
3781 	dev_info(rdev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
3782 		 RREG32(CP_CPF_STALLED_STAT1));
3783 	dev_info(rdev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(CP_CPF_STATUS));
3784 	dev_info(rdev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(CP_CPC_BUSY_STAT));
3785 	dev_info(rdev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
3786 		 RREG32(CP_CPC_STALLED_STAT1));
3787 	dev_info(rdev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(CP_CPC_STATUS));
3788 }
3789 
3790 /**
3791  * cik_gpu_check_soft_reset - check which blocks are busy
3792  *
3793  * @rdev: radeon_device pointer
3794  *
3795  * Check which blocks are busy and return the relevant reset
3796  * mask to be used by cik_gpu_soft_reset().
3797  * Returns a mask of the blocks to be reset.
3798  */
3799 static u32 cik_gpu_check_soft_reset(struct radeon_device *rdev)
3800 {
3801 	u32 reset_mask = 0;
3802 	u32 tmp;
3803 
3804 	/* GRBM_STATUS */
3805 	tmp = RREG32(GRBM_STATUS);
3806 	if (tmp & (PA_BUSY | SC_BUSY |
3807 		   BCI_BUSY | SX_BUSY |
3808 		   TA_BUSY | VGT_BUSY |
3809 		   DB_BUSY | CB_BUSY |
3810 		   GDS_BUSY | SPI_BUSY |
3811 		   IA_BUSY | IA_BUSY_NO_DMA))
3812 		reset_mask |= RADEON_RESET_GFX;
3813 
3814 	if (tmp & (CP_BUSY | CP_COHERENCY_BUSY))
3815 		reset_mask |= RADEON_RESET_CP;
3816 
3817 	/* GRBM_STATUS2 */
3818 	tmp = RREG32(GRBM_STATUS2);
3819 	if (tmp & RLC_BUSY)
3820 		reset_mask |= RADEON_RESET_RLC;
3821 
3822 	/* SDMA0_STATUS_REG */
3823 	tmp = RREG32(SDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
3824 	if (!(tmp & SDMA_IDLE))
3825 		reset_mask |= RADEON_RESET_DMA;
3826 
3827 	/* SDMA1_STATUS_REG */
3828 	tmp = RREG32(SDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
3829 	if (!(tmp & SDMA_IDLE))
3830 		reset_mask |= RADEON_RESET_DMA1;
3831 
3832 	/* SRBM_STATUS2 */
3833 	tmp = RREG32(SRBM_STATUS2);
3834 	if (tmp & SDMA_BUSY)
3835 		reset_mask |= RADEON_RESET_DMA;
3836 
3837 	if (tmp & SDMA1_BUSY)
3838 		reset_mask |= RADEON_RESET_DMA1;
3839 
3840 	/* SRBM_STATUS */
3841 	tmp = RREG32(SRBM_STATUS);
3842 
3843 	if (tmp & IH_BUSY)
3844 		reset_mask |= RADEON_RESET_IH;
3845 
3846 	if (tmp & SEM_BUSY)
3847 		reset_mask |= RADEON_RESET_SEM;
3848 
3849 	if (tmp & GRBM_RQ_PENDING)
3850 		reset_mask |= RADEON_RESET_GRBM;
3851 
3852 	if (tmp & VMC_BUSY)
3853 		reset_mask |= RADEON_RESET_VMC;
3854 
3855 	if (tmp & (MCB_BUSY | MCB_NON_DISPLAY_BUSY |
3856 		   MCC_BUSY | MCD_BUSY))
3857 		reset_mask |= RADEON_RESET_MC;
3858 
3859 	if (evergreen_is_display_hung(rdev))
3860 		reset_mask |= RADEON_RESET_DISPLAY;
3861 
3862 	/* Skip MC reset as it's mostly likely not hung, just busy */
3863 	if (reset_mask & RADEON_RESET_MC) {
3864 		DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
3865 		reset_mask &= ~RADEON_RESET_MC;
3866 	}
3867 
3868 	return reset_mask;
3869 }
3870 
3871 /**
3872  * cik_gpu_soft_reset - soft reset GPU
3873  *
3874  * @rdev: radeon_device pointer
3875  * @reset_mask: mask of which blocks to reset
3876  *
3877  * Soft reset the blocks specified in @reset_mask.
3878  */
3879 static void cik_gpu_soft_reset(struct radeon_device *rdev, u32 reset_mask)
3880 {
3881 	struct evergreen_mc_save save;
3882 	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
3883 	u32 tmp;
3884 
3885 	if (reset_mask == 0)
3886 		return;
3887 
3888 	dev_info(rdev->dev, "GPU softreset: 0x%08X\n", reset_mask);
3889 
3890 	cik_print_gpu_status_regs(rdev);
3891 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
3892 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
3893 	dev_info(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
3894 		 RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
3895 
3896 	/* stop the rlc */
3897 	cik_rlc_stop(rdev);
3898 
3899 	/* Disable GFX parsing/prefetching */
3900 	WREG32(CP_ME_CNTL, CP_ME_HALT | CP_PFP_HALT | CP_CE_HALT);
3901 
3902 	/* Disable MEC parsing/prefetching */
3903 	WREG32(CP_MEC_CNTL, MEC_ME1_HALT | MEC_ME2_HALT);
3904 
3905 	if (reset_mask & RADEON_RESET_DMA) {
3906 		/* sdma0 */
3907 		tmp = RREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET);
3908 		tmp |= SDMA_HALT;
3909 		WREG32(SDMA0_ME_CNTL + SDMA0_REGISTER_OFFSET, tmp);
3910 	}
3911 	if (reset_mask & RADEON_RESET_DMA1) {
3912 		/* sdma1 */
3913 		tmp = RREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET);
3914 		tmp |= SDMA_HALT;
3915 		WREG32(SDMA0_ME_CNTL + SDMA1_REGISTER_OFFSET, tmp);
3916 	}
3917 
3918 	evergreen_mc_stop(rdev, &save);
3919 	if (evergreen_mc_wait_for_idle(rdev)) {
3920 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
3921 	}
3922 
3923 	if (reset_mask & (RADEON_RESET_GFX | RADEON_RESET_COMPUTE | RADEON_RESET_CP))
3924 		grbm_soft_reset = SOFT_RESET_CP | SOFT_RESET_GFX;
3925 
3926 	if (reset_mask & RADEON_RESET_CP) {
3927 		grbm_soft_reset |= SOFT_RESET_CP;
3928 
3929 		srbm_soft_reset |= SOFT_RESET_GRBM;
3930 	}
3931 
3932 	if (reset_mask & RADEON_RESET_DMA)
3933 		srbm_soft_reset |= SOFT_RESET_SDMA;
3934 
3935 	if (reset_mask & RADEON_RESET_DMA1)
3936 		srbm_soft_reset |= SOFT_RESET_SDMA1;
3937 
3938 	if (reset_mask & RADEON_RESET_DISPLAY)
3939 		srbm_soft_reset |= SOFT_RESET_DC;
3940 
3941 	if (reset_mask & RADEON_RESET_RLC)
3942 		grbm_soft_reset |= SOFT_RESET_RLC;
3943 
3944 	if (reset_mask & RADEON_RESET_SEM)
3945 		srbm_soft_reset |= SOFT_RESET_SEM;
3946 
3947 	if (reset_mask & RADEON_RESET_IH)
3948 		srbm_soft_reset |= SOFT_RESET_IH;
3949 
3950 	if (reset_mask & RADEON_RESET_GRBM)
3951 		srbm_soft_reset |= SOFT_RESET_GRBM;
3952 
3953 	if (reset_mask & RADEON_RESET_VMC)
3954 		srbm_soft_reset |= SOFT_RESET_VMC;
3955 
3956 	if (!(rdev->flags & RADEON_IS_IGP)) {
3957 		if (reset_mask & RADEON_RESET_MC)
3958 			srbm_soft_reset |= SOFT_RESET_MC;
3959 	}
3960 
3961 	if (grbm_soft_reset) {
3962 		tmp = RREG32(GRBM_SOFT_RESET);
3963 		tmp |= grbm_soft_reset;
3964 		dev_info(rdev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
3965 		WREG32(GRBM_SOFT_RESET, tmp);
3966 		tmp = RREG32(GRBM_SOFT_RESET);
3967 
3968 		udelay(50);
3969 
3970 		tmp &= ~grbm_soft_reset;
3971 		WREG32(GRBM_SOFT_RESET, tmp);
3972 		tmp = RREG32(GRBM_SOFT_RESET);
3973 	}
3974 
3975 	if (srbm_soft_reset) {
3976 		tmp = RREG32(SRBM_SOFT_RESET);
3977 		tmp |= srbm_soft_reset;
3978 		dev_info(rdev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
3979 		WREG32(SRBM_SOFT_RESET, tmp);
3980 		tmp = RREG32(SRBM_SOFT_RESET);
3981 
3982 		udelay(50);
3983 
3984 		tmp &= ~srbm_soft_reset;
3985 		WREG32(SRBM_SOFT_RESET, tmp);
3986 		tmp = RREG32(SRBM_SOFT_RESET);
3987 	}
3988 
3989 	/* Wait a little for things to settle down */
3990 	udelay(50);
3991 
3992 	evergreen_mc_resume(rdev, &save);
3993 	udelay(50);
3994 
3995 	cik_print_gpu_status_regs(rdev);
3996 }
3997 
3998 /**
3999  * cik_asic_reset - soft reset GPU
4000  *
4001  * @rdev: radeon_device pointer
4002  *
4003  * Look up which blocks are hung and attempt
4004  * to reset them.
4005  * Returns 0 for success.
4006  */
4007 int cik_asic_reset(struct radeon_device *rdev)
4008 {
4009 	u32 reset_mask;
4010 
4011 	reset_mask = cik_gpu_check_soft_reset(rdev);
4012 
4013 	if (reset_mask)
4014 		r600_set_bios_scratch_engine_hung(rdev, true);
4015 
4016 	cik_gpu_soft_reset(rdev, reset_mask);
4017 
4018 	reset_mask = cik_gpu_check_soft_reset(rdev);
4019 
4020 	if (!reset_mask)
4021 		r600_set_bios_scratch_engine_hung(rdev, false);
4022 
4023 	return 0;
4024 }
4025 
4026 /**
4027  * cik_gfx_is_lockup - check if the 3D engine is locked up
4028  *
4029  * @rdev: radeon_device pointer
4030  * @ring: radeon_ring structure holding ring information
4031  *
4032  * Check if the 3D engine is locked up (CIK).
4033  * Returns true if the engine is locked, false if not.
4034  */
4035 bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4036 {
4037 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4038 
4039 	if (!(reset_mask & (RADEON_RESET_GFX |
4040 			    RADEON_RESET_COMPUTE |
4041 			    RADEON_RESET_CP))) {
4042 		radeon_ring_lockup_update(ring);
4043 		return false;
4044 	}
4045 	/* force CP activities */
4046 	radeon_ring_force_activity(rdev, ring);
4047 	return radeon_ring_test_lockup(rdev, ring);
4048 }
4049 
4050 /**
4051  * cik_sdma_is_lockup - Check if the DMA engine is locked up
4052  *
4053  * @rdev: radeon_device pointer
4054  * @ring: radeon_ring structure holding ring information
4055  *
4056  * Check if the async DMA engine is locked up (CIK).
4057  * Returns true if the engine appears to be locked up, false if not.
4058  */
4059 bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
4060 {
4061 	u32 reset_mask = cik_gpu_check_soft_reset(rdev);
4062 	u32 mask;
4063 
4064 	if (ring->idx == R600_RING_TYPE_DMA_INDEX)
4065 		mask = RADEON_RESET_DMA;
4066 	else
4067 		mask = RADEON_RESET_DMA1;
4068 
4069 	if (!(reset_mask & mask)) {
4070 		radeon_ring_lockup_update(ring);
4071 		return false;
4072 	}
4073 	/* force ring activities */
4074 	radeon_ring_force_activity(rdev, ring);
4075 	return radeon_ring_test_lockup(rdev, ring);
4076 }
4077 
4078 /* MC */
4079 /**
4080  * cik_mc_program - program the GPU memory controller
4081  *
4082  * @rdev: radeon_device pointer
4083  *
4084  * Set the location of vram, gart, and AGP in the GPU's
4085  * physical address space (CIK).
4086  */
4087 static void cik_mc_program(struct radeon_device *rdev)
4088 {
4089 	struct evergreen_mc_save save;
4090 	u32 tmp;
4091 	int i, j;
4092 
4093 	/* Initialize HDP */
4094 	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
4095 		WREG32((0x2c14 + j), 0x00000000);
4096 		WREG32((0x2c18 + j), 0x00000000);
4097 		WREG32((0x2c1c + j), 0x00000000);
4098 		WREG32((0x2c20 + j), 0x00000000);
4099 		WREG32((0x2c24 + j), 0x00000000);
4100 	}
4101 	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
4102 
4103 	evergreen_mc_stop(rdev, &save);
4104 	if (radeon_mc_wait_for_idle(rdev)) {
4105 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4106 	}
4107 	/* Lockout access through VGA aperture*/
4108 	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
4109 	/* Update configuration */
4110 	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
4111 	       rdev->mc.vram_start >> 12);
4112 	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
4113 	       rdev->mc.vram_end >> 12);
4114 	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
4115 	       rdev->vram_scratch.gpu_addr >> 12);
4116 	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
4117 	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
4118 	WREG32(MC_VM_FB_LOCATION, tmp);
4119 	/* XXX double check these! */
4120 	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
4121 	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
4122 	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
4123 	WREG32(MC_VM_AGP_BASE, 0);
4124 	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
4125 	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
4126 	if (radeon_mc_wait_for_idle(rdev)) {
4127 		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
4128 	}
4129 	evergreen_mc_resume(rdev, &save);
4130 	/* we need to own VRAM, so turn off the VGA renderer here
4131 	 * to stop it overwriting our objects */
4132 	rv515_vga_render_disable(rdev);
4133 }
4134 
4135 /**
4136  * cik_mc_init - initialize the memory controller driver params
4137  *
4138  * @rdev: radeon_device pointer
4139  *
4140  * Look up the amount of vram, vram width, and decide how to place
4141  * vram and gart within the GPU's physical address space (CIK).
4142  * Returns 0 for success.
4143  */
4144 static int cik_mc_init(struct radeon_device *rdev)
4145 {
4146 	u32 tmp;
4147 	int chansize, numchan;
4148 
4149 	/* Get VRAM informations */
4150 	rdev->mc.vram_is_ddr = true;
4151 	tmp = RREG32(MC_ARB_RAMCFG);
4152 	if (tmp & CHANSIZE_MASK) {
4153 		chansize = 64;
4154 	} else {
4155 		chansize = 32;
4156 	}
4157 	tmp = RREG32(MC_SHARED_CHMAP);
4158 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
4159 	case 0:
4160 	default:
4161 		numchan = 1;
4162 		break;
4163 	case 1:
4164 		numchan = 2;
4165 		break;
4166 	case 2:
4167 		numchan = 4;
4168 		break;
4169 	case 3:
4170 		numchan = 8;
4171 		break;
4172 	case 4:
4173 		numchan = 3;
4174 		break;
4175 	case 5:
4176 		numchan = 6;
4177 		break;
4178 	case 6:
4179 		numchan = 10;
4180 		break;
4181 	case 7:
4182 		numchan = 12;
4183 		break;
4184 	case 8:
4185 		numchan = 16;
4186 		break;
4187 	}
4188 	rdev->mc.vram_width = numchan * chansize;
4189 	/* Could aper size report 0 ? */
4190 	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
4191 	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
4192 	/* size in MB on si */
4193 	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4194 	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
4195 	rdev->mc.visible_vram_size = rdev->mc.aper_size;
4196 	si_vram_gtt_location(rdev, &rdev->mc);
4197 	radeon_update_bandwidth_info(rdev);
4198 
4199 	return 0;
4200 }
4201 
4202 /*
4203  * GART
4204  * VMID 0 is the physical GPU addresses as used by the kernel.
4205  * VMIDs 1-15 are used for userspace clients and are handled
4206  * by the radeon vm/hsa code.
4207  */
4208 /**
4209  * cik_pcie_gart_tlb_flush - gart tlb flush callback
4210  *
4211  * @rdev: radeon_device pointer
4212  *
4213  * Flush the TLB for the VMID 0 page table (CIK).
4214  */
4215 void cik_pcie_gart_tlb_flush(struct radeon_device *rdev)
4216 {
4217 	/* flush hdp cache */
4218 	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0);
4219 
4220 	/* bits 0-15 are the VM contexts0-15 */
4221 	WREG32(VM_INVALIDATE_REQUEST, 0x1);
4222 }
4223 
4224 /**
4225  * cik_pcie_gart_enable - gart enable
4226  *
4227  * @rdev: radeon_device pointer
4228  *
4229  * This sets up the TLBs, programs the page tables for VMID0,
4230  * sets up the hw for VMIDs 1-15 which are allocated on
4231  * demand, and sets up the global locations for the LDS, GDS,
4232  * and GPUVM for FSA64 clients (CIK).
4233  * Returns 0 for success, errors for failure.
4234  */
4235 static int cik_pcie_gart_enable(struct radeon_device *rdev)
4236 {
4237 	int r, i;
4238 
4239 	if (rdev->gart.robj == NULL) {
4240 		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
4241 		return -EINVAL;
4242 	}
4243 	r = radeon_gart_table_vram_pin(rdev);
4244 	if (r)
4245 		return r;
4246 	radeon_gart_restore(rdev);
4247 	/* Setup TLB control */
4248 	WREG32(MC_VM_MX_L1_TLB_CNTL,
4249 	       (0xA << 7) |
4250 	       ENABLE_L1_TLB |
4251 	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4252 	       ENABLE_ADVANCED_DRIVER_MODEL |
4253 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4254 	/* Setup L2 cache */
4255 	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
4256 	       ENABLE_L2_FRAGMENT_PROCESSING |
4257 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4258 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4259 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4260 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4261 	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
4262 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4263 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4264 	/* setup context0 */
4265 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
4266 	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
4267 	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
4268 	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
4269 			(u32)(rdev->dummy_page.addr >> 12));
4270 	WREG32(VM_CONTEXT0_CNTL2, 0);
4271 	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
4272 				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
4273 
4274 	WREG32(0x15D4, 0);
4275 	WREG32(0x15D8, 0);
4276 	WREG32(0x15DC, 0);
4277 
4278 	/* empty context1-15 */
4279 	/* FIXME start with 4G, once using 2 level pt switch to full
4280 	 * vm size space
4281 	 */
4282 	/* set vm size, must be a multiple of 4 */
4283 	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
4284 	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
4285 	for (i = 1; i < 16; i++) {
4286 		if (i < 8)
4287 			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
4288 			       rdev->gart.table_addr >> 12);
4289 		else
4290 			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
4291 			       rdev->gart.table_addr >> 12);
4292 	}
4293 
4294 	/* enable context1-15 */
4295 	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
4296 	       (u32)(rdev->dummy_page.addr >> 12));
4297 	WREG32(VM_CONTEXT1_CNTL2, 4);
4298 	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(1) |
4299 				RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4300 				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4301 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4302 				DUMMY_PAGE_PROTECTION_FAULT_ENABLE_DEFAULT |
4303 				PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT |
4304 				PDE0_PROTECTION_FAULT_ENABLE_DEFAULT |
4305 				VALID_PROTECTION_FAULT_ENABLE_INTERRUPT |
4306 				VALID_PROTECTION_FAULT_ENABLE_DEFAULT |
4307 				READ_PROTECTION_FAULT_ENABLE_INTERRUPT |
4308 				READ_PROTECTION_FAULT_ENABLE_DEFAULT |
4309 				WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT |
4310 				WRITE_PROTECTION_FAULT_ENABLE_DEFAULT);
4311 
4312 	/* TC cache setup ??? */
4313 	WREG32(TC_CFG_L1_LOAD_POLICY0, 0);
4314 	WREG32(TC_CFG_L1_LOAD_POLICY1, 0);
4315 	WREG32(TC_CFG_L1_STORE_POLICY, 0);
4316 
4317 	WREG32(TC_CFG_L2_LOAD_POLICY0, 0);
4318 	WREG32(TC_CFG_L2_LOAD_POLICY1, 0);
4319 	WREG32(TC_CFG_L2_STORE_POLICY0, 0);
4320 	WREG32(TC_CFG_L2_STORE_POLICY1, 0);
4321 	WREG32(TC_CFG_L2_ATOMIC_POLICY, 0);
4322 
4323 	WREG32(TC_CFG_L1_VOLATILE, 0);
4324 	WREG32(TC_CFG_L2_VOLATILE, 0);
4325 
4326 	if (rdev->family == CHIP_KAVERI) {
4327 		u32 tmp = RREG32(CHUB_CONTROL);
4328 		tmp &= ~BYPASS_VM;
4329 		WREG32(CHUB_CONTROL, tmp);
4330 	}
4331 
4332 	/* XXX SH_MEM regs */
4333 	/* where to put LDS, scratch, GPUVM in FSA64 space */
4334 	for (i = 0; i < 16; i++) {
4335 		cik_srbm_select(rdev, 0, 0, 0, i);
4336 		/* CP and shaders */
4337 		WREG32(SH_MEM_CONFIG, 0);
4338 		WREG32(SH_MEM_APE1_BASE, 1);
4339 		WREG32(SH_MEM_APE1_LIMIT, 0);
4340 		WREG32(SH_MEM_BASES, 0);
4341 		/* SDMA GFX */
4342 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA0_REGISTER_OFFSET, 0);
4343 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA0_REGISTER_OFFSET, 0);
4344 		WREG32(SDMA0_GFX_VIRTUAL_ADDR + SDMA1_REGISTER_OFFSET, 0);
4345 		WREG32(SDMA0_GFX_APE1_CNTL + SDMA1_REGISTER_OFFSET, 0);
4346 		/* XXX SDMA RLC - todo */
4347 	}
4348 	cik_srbm_select(rdev, 0, 0, 0, 0);
4349 
4350 	cik_pcie_gart_tlb_flush(rdev);
4351 	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
4352 		 (unsigned)(rdev->mc.gtt_size >> 20),
4353 		 (unsigned long long)rdev->gart.table_addr);
4354 	rdev->gart.ready = true;
4355 	return 0;
4356 }
4357 
4358 /**
4359  * cik_pcie_gart_disable - gart disable
4360  *
4361  * @rdev: radeon_device pointer
4362  *
4363  * This disables all VM page table (CIK).
4364  */
4365 static void cik_pcie_gart_disable(struct radeon_device *rdev)
4366 {
4367 	/* Disable all tables */
4368 	WREG32(VM_CONTEXT0_CNTL, 0);
4369 	WREG32(VM_CONTEXT1_CNTL, 0);
4370 	/* Setup TLB control */
4371 	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
4372 	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
4373 	/* Setup L2 cache */
4374 	WREG32(VM_L2_CNTL,
4375 	       ENABLE_L2_FRAGMENT_PROCESSING |
4376 	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
4377 	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
4378 	       EFFECTIVE_L2_QUEUE_SIZE(7) |
4379 	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
4380 	WREG32(VM_L2_CNTL2, 0);
4381 	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
4382 	       L2_CACHE_BIGK_FRAGMENT_SIZE(6));
4383 	radeon_gart_table_vram_unpin(rdev);
4384 }
4385 
4386 /**
4387  * cik_pcie_gart_fini - vm fini callback
4388  *
4389  * @rdev: radeon_device pointer
4390  *
4391  * Tears down the driver GART/VM setup (CIK).
4392  */
4393 static void cik_pcie_gart_fini(struct radeon_device *rdev)
4394 {
4395 	cik_pcie_gart_disable(rdev);
4396 	radeon_gart_table_vram_free(rdev);
4397 	radeon_gart_fini(rdev);
4398 }
4399 
4400 /* vm parser */
4401 /**
4402  * cik_ib_parse - vm ib_parse callback
4403  *
4404  * @rdev: radeon_device pointer
4405  * @ib: indirect buffer pointer
4406  *
4407  * CIK uses hw IB checking so this is a nop (CIK).
4408  */
4409 int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib)
4410 {
4411 	return 0;
4412 }
4413 
4414 /*
4415  * vm
4416  * VMID 0 is the physical GPU addresses as used by the kernel.
4417  * VMIDs 1-15 are used for userspace clients and are handled
4418  * by the radeon vm/hsa code.
4419  */
4420 /**
4421  * cik_vm_init - cik vm init callback
4422  *
4423  * @rdev: radeon_device pointer
4424  *
4425  * Inits cik specific vm parameters (number of VMs, base of vram for
4426  * VMIDs 1-15) (CIK).
4427  * Returns 0 for success.
4428  */
4429 int cik_vm_init(struct radeon_device *rdev)
4430 {
4431 	/* number of VMs */
4432 	rdev->vm_manager.nvm = 16;
4433 	/* base offset of vram pages */
4434 	if (rdev->flags & RADEON_IS_IGP) {
4435 		u64 tmp = RREG32(MC_VM_FB_OFFSET);
4436 		tmp <<= 22;
4437 		rdev->vm_manager.vram_base_offset = tmp;
4438 	} else
4439 		rdev->vm_manager.vram_base_offset = 0;
4440 
4441 	return 0;
4442 }
4443 
4444 /**
4445  * cik_vm_fini - cik vm fini callback
4446  *
4447  * @rdev: radeon_device pointer
4448  *
4449  * Tear down any asic specific VM setup (CIK).
4450  */
4451 void cik_vm_fini(struct radeon_device *rdev)
4452 {
4453 }
4454 
4455 /**
4456  * cik_vm_flush - cik vm flush using the CP
4457  *
4458  * @rdev: radeon_device pointer
4459  *
4460  * Update the page table base and flush the VM TLB
4461  * using the CP (CIK).
4462  */
4463 void cik_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4464 {
4465 	struct radeon_ring *ring = &rdev->ring[ridx];
4466 
4467 	if (vm == NULL)
4468 		return;
4469 
4470 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4471 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4472 				 WRITE_DATA_DST_SEL(0)));
4473 	if (vm->id < 8) {
4474 		radeon_ring_write(ring,
4475 				  (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4476 	} else {
4477 		radeon_ring_write(ring,
4478 				  (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4479 	}
4480 	radeon_ring_write(ring, 0);
4481 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4482 
4483 	/* update SH_MEM_* regs */
4484 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4485 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4486 				 WRITE_DATA_DST_SEL(0)));
4487 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4488 	radeon_ring_write(ring, 0);
4489 	radeon_ring_write(ring, VMID(vm->id));
4490 
4491 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 6));
4492 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4493 				 WRITE_DATA_DST_SEL(0)));
4494 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4495 	radeon_ring_write(ring, 0);
4496 
4497 	radeon_ring_write(ring, 0); /* SH_MEM_BASES */
4498 	radeon_ring_write(ring, 0); /* SH_MEM_CONFIG */
4499 	radeon_ring_write(ring, 1); /* SH_MEM_APE1_BASE */
4500 	radeon_ring_write(ring, 0); /* SH_MEM_APE1_LIMIT */
4501 
4502 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4503 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4504 				 WRITE_DATA_DST_SEL(0)));
4505 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4506 	radeon_ring_write(ring, 0);
4507 	radeon_ring_write(ring, VMID(0));
4508 
4509 	/* HDP flush */
4510 	/* We should be using the WAIT_REG_MEM packet here like in
4511 	 * cik_fence_ring_emit(), but it causes the CP to hang in this
4512 	 * context...
4513 	 */
4514 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4515 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4516 				 WRITE_DATA_DST_SEL(0)));
4517 	radeon_ring_write(ring, HDP_MEM_COHERENCY_FLUSH_CNTL >> 2);
4518 	radeon_ring_write(ring, 0);
4519 	radeon_ring_write(ring, 0);
4520 
4521 	/* bits 0-15 are the VM contexts0-15 */
4522 	radeon_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
4523 	radeon_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
4524 				 WRITE_DATA_DST_SEL(0)));
4525 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4526 	radeon_ring_write(ring, 0);
4527 	radeon_ring_write(ring, 1 << vm->id);
4528 
4529 	/* compute doesn't have PFP */
4530 	if (ridx == RADEON_RING_TYPE_GFX_INDEX) {
4531 		/* sync PFP to ME, otherwise we might get invalid PFP reads */
4532 		radeon_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
4533 		radeon_ring_write(ring, 0x0);
4534 	}
4535 }
4536 
4537 /**
4538  * cik_vm_set_page - update the page tables using sDMA
4539  *
4540  * @rdev: radeon_device pointer
4541  * @ib: indirect buffer to fill with commands
4542  * @pe: addr of the page entry
4543  * @addr: dst addr to write into pe
4544  * @count: number of page entries to update
4545  * @incr: increase next addr by incr bytes
4546  * @flags: access flags
4547  *
4548  * Update the page tables using CP or sDMA (CIK).
4549  */
4550 void cik_vm_set_page(struct radeon_device *rdev,
4551 		     struct radeon_ib *ib,
4552 		     uint64_t pe,
4553 		     uint64_t addr, unsigned count,
4554 		     uint32_t incr, uint32_t flags)
4555 {
4556 	uint32_t r600_flags = cayman_vm_page_flags(rdev, flags);
4557 	uint64_t value;
4558 	unsigned ndw;
4559 
4560 	if (rdev->asic->vm.pt_ring_index == RADEON_RING_TYPE_GFX_INDEX) {
4561 		/* CP */
4562 		while (count) {
4563 			ndw = 2 + count * 2;
4564 			if (ndw > 0x3FFE)
4565 				ndw = 0x3FFE;
4566 
4567 			ib->ptr[ib->length_dw++] = PACKET3(PACKET3_WRITE_DATA, ndw);
4568 			ib->ptr[ib->length_dw++] = (WRITE_DATA_ENGINE_SEL(0) |
4569 						    WRITE_DATA_DST_SEL(1));
4570 			ib->ptr[ib->length_dw++] = pe;
4571 			ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4572 			for (; ndw > 2; ndw -= 2, --count, pe += 8) {
4573 				if (flags & RADEON_VM_PAGE_SYSTEM) {
4574 					value = radeon_vm_map_gart(rdev, addr);
4575 					value &= 0xFFFFFFFFFFFFF000ULL;
4576 				} else if (flags & RADEON_VM_PAGE_VALID) {
4577 					value = addr;
4578 				} else {
4579 					value = 0;
4580 				}
4581 				addr += incr;
4582 				value |= r600_flags;
4583 				ib->ptr[ib->length_dw++] = value;
4584 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4585 			}
4586 		}
4587 	} else {
4588 		/* DMA */
4589 		if (flags & RADEON_VM_PAGE_SYSTEM) {
4590 			while (count) {
4591 				ndw = count * 2;
4592 				if (ndw > 0xFFFFE)
4593 					ndw = 0xFFFFE;
4594 
4595 				/* for non-physically contiguous pages (system) */
4596 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
4597 				ib->ptr[ib->length_dw++] = pe;
4598 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4599 				ib->ptr[ib->length_dw++] = ndw;
4600 				for (; ndw > 0; ndw -= 2, --count, pe += 8) {
4601 					if (flags & RADEON_VM_PAGE_SYSTEM) {
4602 						value = radeon_vm_map_gart(rdev, addr);
4603 						value &= 0xFFFFFFFFFFFFF000ULL;
4604 					} else if (flags & RADEON_VM_PAGE_VALID) {
4605 						value = addr;
4606 					} else {
4607 						value = 0;
4608 					}
4609 					addr += incr;
4610 					value |= r600_flags;
4611 					ib->ptr[ib->length_dw++] = value;
4612 					ib->ptr[ib->length_dw++] = upper_32_bits(value);
4613 				}
4614 			}
4615 		} else {
4616 			while (count) {
4617 				ndw = count;
4618 				if (ndw > 0x7FFFF)
4619 					ndw = 0x7FFFF;
4620 
4621 				if (flags & RADEON_VM_PAGE_VALID)
4622 					value = addr;
4623 				else
4624 					value = 0;
4625 				/* for physically contiguous pages (vram) */
4626 				ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_GENERATE_PTE_PDE, 0, 0);
4627 				ib->ptr[ib->length_dw++] = pe; /* dst addr */
4628 				ib->ptr[ib->length_dw++] = upper_32_bits(pe);
4629 				ib->ptr[ib->length_dw++] = r600_flags; /* mask */
4630 				ib->ptr[ib->length_dw++] = 0;
4631 				ib->ptr[ib->length_dw++] = value; /* value */
4632 				ib->ptr[ib->length_dw++] = upper_32_bits(value);
4633 				ib->ptr[ib->length_dw++] = incr; /* increment size */
4634 				ib->ptr[ib->length_dw++] = 0;
4635 				ib->ptr[ib->length_dw++] = ndw; /* number of entries */
4636 				pe += ndw * 8;
4637 				addr += ndw * incr;
4638 				count -= ndw;
4639 			}
4640 		}
4641 		while (ib->length_dw & 0x7)
4642 			ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
4643 	}
4644 }
4645 
4646 /**
4647  * cik_dma_vm_flush - cik vm flush using sDMA
4648  *
4649  * @rdev: radeon_device pointer
4650  *
4651  * Update the page table base and flush the VM TLB
4652  * using sDMA (CIK).
4653  */
4654 void cik_dma_vm_flush(struct radeon_device *rdev, int ridx, struct radeon_vm *vm)
4655 {
4656 	struct radeon_ring *ring = &rdev->ring[ridx];
4657 	u32 extra_bits = (SDMA_POLL_REG_MEM_EXTRA_OP(1) |
4658 			  SDMA_POLL_REG_MEM_EXTRA_FUNC(3)); /* == */
4659 	u32 ref_and_mask;
4660 
4661 	if (vm == NULL)
4662 		return;
4663 
4664 	if (ridx == R600_RING_TYPE_DMA_INDEX)
4665 		ref_and_mask = SDMA0;
4666 	else
4667 		ref_and_mask = SDMA1;
4668 
4669 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4670 	if (vm->id < 8) {
4671 		radeon_ring_write(ring, (VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2)) >> 2);
4672 	} else {
4673 		radeon_ring_write(ring, (VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2)) >> 2);
4674 	}
4675 	radeon_ring_write(ring, vm->pd_gpu_addr >> 12);
4676 
4677 	/* update SH_MEM_* regs */
4678 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4679 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4680 	radeon_ring_write(ring, VMID(vm->id));
4681 
4682 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4683 	radeon_ring_write(ring, SH_MEM_BASES >> 2);
4684 	radeon_ring_write(ring, 0);
4685 
4686 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4687 	radeon_ring_write(ring, SH_MEM_CONFIG >> 2);
4688 	radeon_ring_write(ring, 0);
4689 
4690 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4691 	radeon_ring_write(ring, SH_MEM_APE1_BASE >> 2);
4692 	radeon_ring_write(ring, 1);
4693 
4694 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4695 	radeon_ring_write(ring, SH_MEM_APE1_LIMIT >> 2);
4696 	radeon_ring_write(ring, 0);
4697 
4698 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4699 	radeon_ring_write(ring, SRBM_GFX_CNTL >> 2);
4700 	radeon_ring_write(ring, VMID(0));
4701 
4702 	/* flush HDP */
4703 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0, extra_bits));
4704 	radeon_ring_write(ring, GPU_HDP_FLUSH_DONE);
4705 	radeon_ring_write(ring, GPU_HDP_FLUSH_REQ);
4706 	radeon_ring_write(ring, ref_and_mask); /* REFERENCE */
4707 	radeon_ring_write(ring, ref_and_mask); /* MASK */
4708 	radeon_ring_write(ring, (4 << 16) | 10); /* RETRY_COUNT, POLL_INTERVAL */
4709 
4710 	/* flush TLB */
4711 	radeon_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
4712 	radeon_ring_write(ring, VM_INVALIDATE_REQUEST >> 2);
4713 	radeon_ring_write(ring, 1 << vm->id);
4714 }
4715 
4716 /*
4717  * RLC
4718  * The RLC is a multi-purpose microengine that handles a
4719  * variety of functions, the most important of which is
4720  * the interrupt controller.
4721  */
4722 /**
4723  * cik_rlc_stop - stop the RLC ME
4724  *
4725  * @rdev: radeon_device pointer
4726  *
4727  * Halt the RLC ME (MicroEngine) (CIK).
4728  */
4729 static void cik_rlc_stop(struct radeon_device *rdev)
4730 {
4731 	int i, j, k;
4732 	u32 mask, tmp;
4733 
4734 	tmp = RREG32(CP_INT_CNTL_RING0);
4735 	tmp &= ~(CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4736 	WREG32(CP_INT_CNTL_RING0, tmp);
4737 
4738 	RREG32(CB_CGTT_SCLK_CTRL);
4739 	RREG32(CB_CGTT_SCLK_CTRL);
4740 	RREG32(CB_CGTT_SCLK_CTRL);
4741 	RREG32(CB_CGTT_SCLK_CTRL);
4742 
4743 	tmp = RREG32(RLC_CGCG_CGLS_CTRL) & 0xfffffffc;
4744 	WREG32(RLC_CGCG_CGLS_CTRL, tmp);
4745 
4746 	WREG32(RLC_CNTL, 0);
4747 
4748 	for (i = 0; i < rdev->config.cik.max_shader_engines; i++) {
4749 		for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) {
4750 			cik_select_se_sh(rdev, i, j);
4751 			for (k = 0; k < rdev->usec_timeout; k++) {
4752 				if (RREG32(RLC_SERDES_CU_MASTER_BUSY) == 0)
4753 					break;
4754 				udelay(1);
4755 			}
4756 		}
4757 	}
4758 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4759 
4760 	mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY;
4761 	for (k = 0; k < rdev->usec_timeout; k++) {
4762 		if ((RREG32(RLC_SERDES_NONCU_MASTER_BUSY) & mask) == 0)
4763 			break;
4764 		udelay(1);
4765 	}
4766 }
4767 
4768 /**
4769  * cik_rlc_start - start the RLC ME
4770  *
4771  * @rdev: radeon_device pointer
4772  *
4773  * Unhalt the RLC ME (MicroEngine) (CIK).
4774  */
4775 static void cik_rlc_start(struct radeon_device *rdev)
4776 {
4777 	u32 tmp;
4778 
4779 	WREG32(RLC_CNTL, RLC_ENABLE);
4780 
4781 	tmp = RREG32(CP_INT_CNTL_RING0);
4782 	tmp |= (CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4783 	WREG32(CP_INT_CNTL_RING0, tmp);
4784 
4785 	udelay(50);
4786 }
4787 
4788 /**
4789  * cik_rlc_resume - setup the RLC hw
4790  *
4791  * @rdev: radeon_device pointer
4792  *
4793  * Initialize the RLC registers, load the ucode,
4794  * and start the RLC (CIK).
4795  * Returns 0 for success, -EINVAL if the ucode is not available.
4796  */
4797 static int cik_rlc_resume(struct radeon_device *rdev)
4798 {
4799 	u32 i, size;
4800 	u32 clear_state_info[3];
4801 	const __be32 *fw_data;
4802 
4803 	if (!rdev->rlc_fw)
4804 		return -EINVAL;
4805 
4806 	switch (rdev->family) {
4807 	case CHIP_BONAIRE:
4808 	default:
4809 		size = BONAIRE_RLC_UCODE_SIZE;
4810 		break;
4811 	case CHIP_KAVERI:
4812 		size = KV_RLC_UCODE_SIZE;
4813 		break;
4814 	case CHIP_KABINI:
4815 		size = KB_RLC_UCODE_SIZE;
4816 		break;
4817 	}
4818 
4819 	cik_rlc_stop(rdev);
4820 
4821 	WREG32(GRBM_SOFT_RESET, SOFT_RESET_RLC);
4822 	RREG32(GRBM_SOFT_RESET);
4823 	udelay(50);
4824 	WREG32(GRBM_SOFT_RESET, 0);
4825 	RREG32(GRBM_SOFT_RESET);
4826 	udelay(50);
4827 
4828 	WREG32(RLC_LB_CNTR_INIT, 0);
4829 	WREG32(RLC_LB_CNTR_MAX, 0x00008000);
4830 
4831 	cik_select_se_sh(rdev, 0xffffffff, 0xffffffff);
4832 	WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff);
4833 	WREG32(RLC_LB_PARAMS, 0x00600408);
4834 	WREG32(RLC_LB_CNTL, 0x80000004);
4835 
4836 	WREG32(RLC_MC_CNTL, 0);
4837 	WREG32(RLC_UCODE_CNTL, 0);
4838 
4839 	fw_data = (const __be32 *)rdev->rlc_fw->data;
4840 		WREG32(RLC_GPM_UCODE_ADDR, 0);
4841 	for (i = 0; i < size; i++)
4842 		WREG32(RLC_GPM_UCODE_DATA, be32_to_cpup(fw_data++));
4843 	WREG32(RLC_GPM_UCODE_ADDR, 0);
4844 
4845 	/* XXX */
4846 	clear_state_info[0] = 0;//upper_32_bits(rdev->rlc.save_restore_gpu_addr);
4847 	clear_state_info[1] = 0;//rdev->rlc.save_restore_gpu_addr;
4848 	clear_state_info[2] = 0;//cik_default_size;
4849 	WREG32(RLC_GPM_SCRATCH_ADDR, 0x3d);
4850 	for (i = 0; i < 3; i++)
4851 		WREG32(RLC_GPM_SCRATCH_DATA, clear_state_info[i]);
4852 	WREG32(RLC_DRIVER_DMA_STATUS, 0);
4853 
4854 	cik_rlc_start(rdev);
4855 
4856 	return 0;
4857 }
4858 
4859 /*
4860  * Interrupts
4861  * Starting with r6xx, interrupts are handled via a ring buffer.
4862  * Ring buffers are areas of GPU accessible memory that the GPU
4863  * writes interrupt vectors into and the host reads vectors out of.
4864  * There is a rptr (read pointer) that determines where the
4865  * host is currently reading, and a wptr (write pointer)
4866  * which determines where the GPU has written.  When the
4867  * pointers are equal, the ring is idle.  When the GPU
4868  * writes vectors to the ring buffer, it increments the
4869  * wptr.  When there is an interrupt, the host then starts
4870  * fetching commands and processing them until the pointers are
4871  * equal again at which point it updates the rptr.
4872  */
4873 
4874 /**
4875  * cik_enable_interrupts - Enable the interrupt ring buffer
4876  *
4877  * @rdev: radeon_device pointer
4878  *
4879  * Enable the interrupt ring buffer (CIK).
4880  */
4881 static void cik_enable_interrupts(struct radeon_device *rdev)
4882 {
4883 	u32 ih_cntl = RREG32(IH_CNTL);
4884 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4885 
4886 	ih_cntl |= ENABLE_INTR;
4887 	ih_rb_cntl |= IH_RB_ENABLE;
4888 	WREG32(IH_CNTL, ih_cntl);
4889 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4890 	rdev->ih.enabled = true;
4891 }
4892 
4893 /**
4894  * cik_disable_interrupts - Disable the interrupt ring buffer
4895  *
4896  * @rdev: radeon_device pointer
4897  *
4898  * Disable the interrupt ring buffer (CIK).
4899  */
4900 static void cik_disable_interrupts(struct radeon_device *rdev)
4901 {
4902 	u32 ih_rb_cntl = RREG32(IH_RB_CNTL);
4903 	u32 ih_cntl = RREG32(IH_CNTL);
4904 
4905 	ih_rb_cntl &= ~IH_RB_ENABLE;
4906 	ih_cntl &= ~ENABLE_INTR;
4907 	WREG32(IH_RB_CNTL, ih_rb_cntl);
4908 	WREG32(IH_CNTL, ih_cntl);
4909 	/* set rptr, wptr to 0 */
4910 	WREG32(IH_RB_RPTR, 0);
4911 	WREG32(IH_RB_WPTR, 0);
4912 	rdev->ih.enabled = false;
4913 	rdev->ih.rptr = 0;
4914 }
4915 
4916 /**
4917  * cik_disable_interrupt_state - Disable all interrupt sources
4918  *
4919  * @rdev: radeon_device pointer
4920  *
4921  * Clear all interrupt enable bits used by the driver (CIK).
4922  */
4923 static void cik_disable_interrupt_state(struct radeon_device *rdev)
4924 {
4925 	u32 tmp;
4926 
4927 	/* gfx ring */
4928 	WREG32(CP_INT_CNTL_RING0, CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE);
4929 	/* sdma */
4930 	tmp = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
4931 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, tmp);
4932 	tmp = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
4933 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, tmp);
4934 	/* compute queues */
4935 	WREG32(CP_ME1_PIPE0_INT_CNTL, 0);
4936 	WREG32(CP_ME1_PIPE1_INT_CNTL, 0);
4937 	WREG32(CP_ME1_PIPE2_INT_CNTL, 0);
4938 	WREG32(CP_ME1_PIPE3_INT_CNTL, 0);
4939 	WREG32(CP_ME2_PIPE0_INT_CNTL, 0);
4940 	WREG32(CP_ME2_PIPE1_INT_CNTL, 0);
4941 	WREG32(CP_ME2_PIPE2_INT_CNTL, 0);
4942 	WREG32(CP_ME2_PIPE3_INT_CNTL, 0);
4943 	/* grbm */
4944 	WREG32(GRBM_INT_CNTL, 0);
4945 	/* vline/vblank, etc. */
4946 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
4947 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
4948 	if (rdev->num_crtc >= 4) {
4949 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
4950 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
4951 	}
4952 	if (rdev->num_crtc >= 6) {
4953 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
4954 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
4955 	}
4956 
4957 	/* dac hotplug */
4958 	WREG32(DAC_AUTODETECT_INT_CONTROL, 0);
4959 
4960 	/* digital hotplug */
4961 	tmp = RREG32(DC_HPD1_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4962 	WREG32(DC_HPD1_INT_CONTROL, tmp);
4963 	tmp = RREG32(DC_HPD2_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4964 	WREG32(DC_HPD2_INT_CONTROL, tmp);
4965 	tmp = RREG32(DC_HPD3_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4966 	WREG32(DC_HPD3_INT_CONTROL, tmp);
4967 	tmp = RREG32(DC_HPD4_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4968 	WREG32(DC_HPD4_INT_CONTROL, tmp);
4969 	tmp = RREG32(DC_HPD5_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4970 	WREG32(DC_HPD5_INT_CONTROL, tmp);
4971 	tmp = RREG32(DC_HPD6_INT_CONTROL) & DC_HPDx_INT_POLARITY;
4972 	WREG32(DC_HPD6_INT_CONTROL, tmp);
4973 
4974 }
4975 
4976 /**
4977  * cik_irq_init - init and enable the interrupt ring
4978  *
4979  * @rdev: radeon_device pointer
4980  *
4981  * Allocate a ring buffer for the interrupt controller,
4982  * enable the RLC, disable interrupts, enable the IH
4983  * ring buffer and enable it (CIK).
4984  * Called at device load and reume.
4985  * Returns 0 for success, errors for failure.
4986  */
4987 static int cik_irq_init(struct radeon_device *rdev)
4988 {
4989 	int ret = 0;
4990 	int rb_bufsz;
4991 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
4992 
4993 	/* allocate ring */
4994 	ret = r600_ih_ring_alloc(rdev);
4995 	if (ret)
4996 		return ret;
4997 
4998 	/* disable irqs */
4999 	cik_disable_interrupts(rdev);
5000 
5001 	/* init rlc */
5002 	ret = cik_rlc_resume(rdev);
5003 	if (ret) {
5004 		r600_ih_ring_fini(rdev);
5005 		return ret;
5006 	}
5007 
5008 	/* setup interrupt control */
5009 	/* XXX this should actually be a bus address, not an MC address. same on older asics */
5010 	WREG32(INTERRUPT_CNTL2, rdev->ih.gpu_addr >> 8);
5011 	interrupt_cntl = RREG32(INTERRUPT_CNTL);
5012 	/* IH_DUMMY_RD_OVERRIDE=0 - dummy read disabled with msi, enabled without msi
5013 	 * IH_DUMMY_RD_OVERRIDE=1 - dummy read controlled by IH_DUMMY_RD_EN
5014 	 */
5015 	interrupt_cntl &= ~IH_DUMMY_RD_OVERRIDE;
5016 	/* IH_REQ_NONSNOOP_EN=1 if ring is in non-cacheable memory, e.g., vram */
5017 	interrupt_cntl &= ~IH_REQ_NONSNOOP_EN;
5018 	WREG32(INTERRUPT_CNTL, interrupt_cntl);
5019 
5020 	WREG32(IH_RB_BASE, rdev->ih.gpu_addr >> 8);
5021 	rb_bufsz = drm_order(rdev->ih.ring_size / 4);
5022 
5023 	ih_rb_cntl = (IH_WPTR_OVERFLOW_ENABLE |
5024 		      IH_WPTR_OVERFLOW_CLEAR |
5025 		      (rb_bufsz << 1));
5026 
5027 	if (rdev->wb.enabled)
5028 		ih_rb_cntl |= IH_WPTR_WRITEBACK_ENABLE;
5029 
5030 	/* set the writeback address whether it's enabled or not */
5031 	WREG32(IH_RB_WPTR_ADDR_LO, (rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFFFFFFFC);
5032 	WREG32(IH_RB_WPTR_ADDR_HI, upper_32_bits(rdev->wb.gpu_addr + R600_WB_IH_WPTR_OFFSET) & 0xFF);
5033 
5034 	WREG32(IH_RB_CNTL, ih_rb_cntl);
5035 
5036 	/* set rptr, wptr to 0 */
5037 	WREG32(IH_RB_RPTR, 0);
5038 	WREG32(IH_RB_WPTR, 0);
5039 
5040 	/* Default settings for IH_CNTL (disabled at first) */
5041 	ih_cntl = MC_WRREQ_CREDIT(0x10) | MC_WR_CLEAN_CNT(0x10) | MC_VMID(0);
5042 	/* RPTR_REARM only works if msi's are enabled */
5043 	if (rdev->msi_enabled)
5044 		ih_cntl |= RPTR_REARM;
5045 	WREG32(IH_CNTL, ih_cntl);
5046 
5047 	/* force the active interrupt state to all disabled */
5048 	cik_disable_interrupt_state(rdev);
5049 
5050 	pci_set_master(rdev->pdev);
5051 
5052 	/* enable irqs */
5053 	cik_enable_interrupts(rdev);
5054 
5055 	return ret;
5056 }
5057 
5058 /**
5059  * cik_irq_set - enable/disable interrupt sources
5060  *
5061  * @rdev: radeon_device pointer
5062  *
5063  * Enable interrupt sources on the GPU (vblanks, hpd,
5064  * etc.) (CIK).
5065  * Returns 0 for success, errors for failure.
5066  */
5067 int cik_irq_set(struct radeon_device *rdev)
5068 {
5069 	u32 cp_int_cntl = CNTX_BUSY_INT_ENABLE | CNTX_EMPTY_INT_ENABLE |
5070 		PRIV_INSTR_INT_ENABLE | PRIV_REG_INT_ENABLE;
5071 	u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3;
5072 	u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3;
5073 	u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0;
5074 	u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6;
5075 	u32 grbm_int_cntl = 0;
5076 	u32 dma_cntl, dma_cntl1;
5077 
5078 	if (!rdev->irq.installed) {
5079 		WARN(1, "Can't enable IRQ/MSI because no handler is installed\n");
5080 		return -EINVAL;
5081 	}
5082 	/* don't enable anything if the ih is disabled */
5083 	if (!rdev->ih.enabled) {
5084 		cik_disable_interrupts(rdev);
5085 		/* force the active interrupt state to all disabled */
5086 		cik_disable_interrupt_state(rdev);
5087 		return 0;
5088 	}
5089 
5090 	hpd1 = RREG32(DC_HPD1_INT_CONTROL) & ~DC_HPDx_INT_EN;
5091 	hpd2 = RREG32(DC_HPD2_INT_CONTROL) & ~DC_HPDx_INT_EN;
5092 	hpd3 = RREG32(DC_HPD3_INT_CONTROL) & ~DC_HPDx_INT_EN;
5093 	hpd4 = RREG32(DC_HPD4_INT_CONTROL) & ~DC_HPDx_INT_EN;
5094 	hpd5 = RREG32(DC_HPD5_INT_CONTROL) & ~DC_HPDx_INT_EN;
5095 	hpd6 = RREG32(DC_HPD6_INT_CONTROL) & ~DC_HPDx_INT_EN;
5096 
5097 	dma_cntl = RREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET) & ~TRAP_ENABLE;
5098 	dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE;
5099 
5100 	cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5101 	cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5102 	cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5103 	cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5104 	cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5105 	cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5106 	cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5107 	cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE;
5108 
5109 	/* enable CP interrupts on all rings */
5110 	if (atomic_read(&rdev->irq.ring_int[RADEON_RING_TYPE_GFX_INDEX])) {
5111 		DRM_DEBUG("cik_irq_set: sw int gfx\n");
5112 		cp_int_cntl |= TIME_STAMP_INT_ENABLE;
5113 	}
5114 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP1_INDEX])) {
5115 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5116 		DRM_DEBUG("si_irq_set: sw int cp1\n");
5117 		if (ring->me == 1) {
5118 			switch (ring->pipe) {
5119 			case 0:
5120 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5121 				break;
5122 			case 1:
5123 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5124 				break;
5125 			case 2:
5126 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5127 				break;
5128 			case 3:
5129 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5130 				break;
5131 			default:
5132 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5133 				break;
5134 			}
5135 		} else if (ring->me == 2) {
5136 			switch (ring->pipe) {
5137 			case 0:
5138 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5139 				break;
5140 			case 1:
5141 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5142 				break;
5143 			case 2:
5144 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5145 				break;
5146 			case 3:
5147 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5148 				break;
5149 			default:
5150 				DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe);
5151 				break;
5152 			}
5153 		} else {
5154 			DRM_DEBUG("si_irq_set: sw int cp1 invalid me %d\n", ring->me);
5155 		}
5156 	}
5157 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_CP2_INDEX])) {
5158 		struct radeon_ring *ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5159 		DRM_DEBUG("si_irq_set: sw int cp2\n");
5160 		if (ring->me == 1) {
5161 			switch (ring->pipe) {
5162 			case 0:
5163 				cp_m1p0 |= TIME_STAMP_INT_ENABLE;
5164 				break;
5165 			case 1:
5166 				cp_m1p1 |= TIME_STAMP_INT_ENABLE;
5167 				break;
5168 			case 2:
5169 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5170 				break;
5171 			case 3:
5172 				cp_m1p2 |= TIME_STAMP_INT_ENABLE;
5173 				break;
5174 			default:
5175 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5176 				break;
5177 			}
5178 		} else if (ring->me == 2) {
5179 			switch (ring->pipe) {
5180 			case 0:
5181 				cp_m2p0 |= TIME_STAMP_INT_ENABLE;
5182 				break;
5183 			case 1:
5184 				cp_m2p1 |= TIME_STAMP_INT_ENABLE;
5185 				break;
5186 			case 2:
5187 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5188 				break;
5189 			case 3:
5190 				cp_m2p2 |= TIME_STAMP_INT_ENABLE;
5191 				break;
5192 			default:
5193 				DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe);
5194 				break;
5195 			}
5196 		} else {
5197 			DRM_DEBUG("si_irq_set: sw int cp2 invalid me %d\n", ring->me);
5198 		}
5199 	}
5200 
5201 	if (atomic_read(&rdev->irq.ring_int[R600_RING_TYPE_DMA_INDEX])) {
5202 		DRM_DEBUG("cik_irq_set: sw int dma\n");
5203 		dma_cntl |= TRAP_ENABLE;
5204 	}
5205 
5206 	if (atomic_read(&rdev->irq.ring_int[CAYMAN_RING_TYPE_DMA1_INDEX])) {
5207 		DRM_DEBUG("cik_irq_set: sw int dma1\n");
5208 		dma_cntl1 |= TRAP_ENABLE;
5209 	}
5210 
5211 	if (rdev->irq.crtc_vblank_int[0] ||
5212 	    atomic_read(&rdev->irq.pflip[0])) {
5213 		DRM_DEBUG("cik_irq_set: vblank 0\n");
5214 		crtc1 |= VBLANK_INTERRUPT_MASK;
5215 	}
5216 	if (rdev->irq.crtc_vblank_int[1] ||
5217 	    atomic_read(&rdev->irq.pflip[1])) {
5218 		DRM_DEBUG("cik_irq_set: vblank 1\n");
5219 		crtc2 |= VBLANK_INTERRUPT_MASK;
5220 	}
5221 	if (rdev->irq.crtc_vblank_int[2] ||
5222 	    atomic_read(&rdev->irq.pflip[2])) {
5223 		DRM_DEBUG("cik_irq_set: vblank 2\n");
5224 		crtc3 |= VBLANK_INTERRUPT_MASK;
5225 	}
5226 	if (rdev->irq.crtc_vblank_int[3] ||
5227 	    atomic_read(&rdev->irq.pflip[3])) {
5228 		DRM_DEBUG("cik_irq_set: vblank 3\n");
5229 		crtc4 |= VBLANK_INTERRUPT_MASK;
5230 	}
5231 	if (rdev->irq.crtc_vblank_int[4] ||
5232 	    atomic_read(&rdev->irq.pflip[4])) {
5233 		DRM_DEBUG("cik_irq_set: vblank 4\n");
5234 		crtc5 |= VBLANK_INTERRUPT_MASK;
5235 	}
5236 	if (rdev->irq.crtc_vblank_int[5] ||
5237 	    atomic_read(&rdev->irq.pflip[5])) {
5238 		DRM_DEBUG("cik_irq_set: vblank 5\n");
5239 		crtc6 |= VBLANK_INTERRUPT_MASK;
5240 	}
5241 	if (rdev->irq.hpd[0]) {
5242 		DRM_DEBUG("cik_irq_set: hpd 1\n");
5243 		hpd1 |= DC_HPDx_INT_EN;
5244 	}
5245 	if (rdev->irq.hpd[1]) {
5246 		DRM_DEBUG("cik_irq_set: hpd 2\n");
5247 		hpd2 |= DC_HPDx_INT_EN;
5248 	}
5249 	if (rdev->irq.hpd[2]) {
5250 		DRM_DEBUG("cik_irq_set: hpd 3\n");
5251 		hpd3 |= DC_HPDx_INT_EN;
5252 	}
5253 	if (rdev->irq.hpd[3]) {
5254 		DRM_DEBUG("cik_irq_set: hpd 4\n");
5255 		hpd4 |= DC_HPDx_INT_EN;
5256 	}
5257 	if (rdev->irq.hpd[4]) {
5258 		DRM_DEBUG("cik_irq_set: hpd 5\n");
5259 		hpd5 |= DC_HPDx_INT_EN;
5260 	}
5261 	if (rdev->irq.hpd[5]) {
5262 		DRM_DEBUG("cik_irq_set: hpd 6\n");
5263 		hpd6 |= DC_HPDx_INT_EN;
5264 	}
5265 
5266 	WREG32(CP_INT_CNTL_RING0, cp_int_cntl);
5267 
5268 	WREG32(SDMA0_CNTL + SDMA0_REGISTER_OFFSET, dma_cntl);
5269 	WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1);
5270 
5271 	WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0);
5272 	WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1);
5273 	WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2);
5274 	WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3);
5275 	WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0);
5276 	WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1);
5277 	WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2);
5278 	WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3);
5279 
5280 	WREG32(GRBM_INT_CNTL, grbm_int_cntl);
5281 
5282 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC0_REGISTER_OFFSET, crtc1);
5283 	WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC1_REGISTER_OFFSET, crtc2);
5284 	if (rdev->num_crtc >= 4) {
5285 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC2_REGISTER_OFFSET, crtc3);
5286 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC3_REGISTER_OFFSET, crtc4);
5287 	}
5288 	if (rdev->num_crtc >= 6) {
5289 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC4_REGISTER_OFFSET, crtc5);
5290 		WREG32(LB_INTERRUPT_MASK + EVERGREEN_CRTC5_REGISTER_OFFSET, crtc6);
5291 	}
5292 
5293 	WREG32(DC_HPD1_INT_CONTROL, hpd1);
5294 	WREG32(DC_HPD2_INT_CONTROL, hpd2);
5295 	WREG32(DC_HPD3_INT_CONTROL, hpd3);
5296 	WREG32(DC_HPD4_INT_CONTROL, hpd4);
5297 	WREG32(DC_HPD5_INT_CONTROL, hpd5);
5298 	WREG32(DC_HPD6_INT_CONTROL, hpd6);
5299 
5300 	return 0;
5301 }
5302 
5303 /**
5304  * cik_irq_ack - ack interrupt sources
5305  *
5306  * @rdev: radeon_device pointer
5307  *
5308  * Ack interrupt sources on the GPU (vblanks, hpd,
5309  * etc.) (CIK).  Certain interrupts sources are sw
5310  * generated and do not require an explicit ack.
5311  */
5312 static inline void cik_irq_ack(struct radeon_device *rdev)
5313 {
5314 	u32 tmp;
5315 
5316 	rdev->irq.stat_regs.cik.disp_int = RREG32(DISP_INTERRUPT_STATUS);
5317 	rdev->irq.stat_regs.cik.disp_int_cont = RREG32(DISP_INTERRUPT_STATUS_CONTINUE);
5318 	rdev->irq.stat_regs.cik.disp_int_cont2 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE2);
5319 	rdev->irq.stat_regs.cik.disp_int_cont3 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE3);
5320 	rdev->irq.stat_regs.cik.disp_int_cont4 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE4);
5321 	rdev->irq.stat_regs.cik.disp_int_cont5 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE5);
5322 	rdev->irq.stat_regs.cik.disp_int_cont6 = RREG32(DISP_INTERRUPT_STATUS_CONTINUE6);
5323 
5324 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT)
5325 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VBLANK_ACK);
5326 	if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT)
5327 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC0_REGISTER_OFFSET, VLINE_ACK);
5328 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT)
5329 		WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VBLANK_ACK);
5330 	if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT)
5331 		WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC1_REGISTER_OFFSET, VLINE_ACK);
5332 
5333 	if (rdev->num_crtc >= 4) {
5334 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT)
5335 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VBLANK_ACK);
5336 		if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT)
5337 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC2_REGISTER_OFFSET, VLINE_ACK);
5338 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT)
5339 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VBLANK_ACK);
5340 		if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT)
5341 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC3_REGISTER_OFFSET, VLINE_ACK);
5342 	}
5343 
5344 	if (rdev->num_crtc >= 6) {
5345 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT)
5346 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VBLANK_ACK);
5347 		if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT)
5348 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC4_REGISTER_OFFSET, VLINE_ACK);
5349 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT)
5350 			WREG32(LB_VBLANK_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VBLANK_ACK);
5351 		if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT)
5352 			WREG32(LB_VLINE_STATUS + EVERGREEN_CRTC5_REGISTER_OFFSET, VLINE_ACK);
5353 	}
5354 
5355 	if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5356 		tmp = RREG32(DC_HPD1_INT_CONTROL);
5357 		tmp |= DC_HPDx_INT_ACK;
5358 		WREG32(DC_HPD1_INT_CONTROL, tmp);
5359 	}
5360 	if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5361 		tmp = RREG32(DC_HPD2_INT_CONTROL);
5362 		tmp |= DC_HPDx_INT_ACK;
5363 		WREG32(DC_HPD2_INT_CONTROL, tmp);
5364 	}
5365 	if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5366 		tmp = RREG32(DC_HPD3_INT_CONTROL);
5367 		tmp |= DC_HPDx_INT_ACK;
5368 		WREG32(DC_HPD3_INT_CONTROL, tmp);
5369 	}
5370 	if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5371 		tmp = RREG32(DC_HPD4_INT_CONTROL);
5372 		tmp |= DC_HPDx_INT_ACK;
5373 		WREG32(DC_HPD4_INT_CONTROL, tmp);
5374 	}
5375 	if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5376 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5377 		tmp |= DC_HPDx_INT_ACK;
5378 		WREG32(DC_HPD5_INT_CONTROL, tmp);
5379 	}
5380 	if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5381 		tmp = RREG32(DC_HPD5_INT_CONTROL);
5382 		tmp |= DC_HPDx_INT_ACK;
5383 		WREG32(DC_HPD6_INT_CONTROL, tmp);
5384 	}
5385 }
5386 
5387 /**
5388  * cik_irq_disable - disable interrupts
5389  *
5390  * @rdev: radeon_device pointer
5391  *
5392  * Disable interrupts on the hw (CIK).
5393  */
5394 static void cik_irq_disable(struct radeon_device *rdev)
5395 {
5396 	cik_disable_interrupts(rdev);
5397 	/* Wait and acknowledge irq */
5398 	mdelay(1);
5399 	cik_irq_ack(rdev);
5400 	cik_disable_interrupt_state(rdev);
5401 }
5402 
5403 /**
5404  * cik_irq_disable - disable interrupts for suspend
5405  *
5406  * @rdev: radeon_device pointer
5407  *
5408  * Disable interrupts and stop the RLC (CIK).
5409  * Used for suspend.
5410  */
5411 static void cik_irq_suspend(struct radeon_device *rdev)
5412 {
5413 	cik_irq_disable(rdev);
5414 	cik_rlc_stop(rdev);
5415 }
5416 
5417 /**
5418  * cik_irq_fini - tear down interrupt support
5419  *
5420  * @rdev: radeon_device pointer
5421  *
5422  * Disable interrupts on the hw and free the IH ring
5423  * buffer (CIK).
5424  * Used for driver unload.
5425  */
5426 static void cik_irq_fini(struct radeon_device *rdev)
5427 {
5428 	cik_irq_suspend(rdev);
5429 	r600_ih_ring_fini(rdev);
5430 }
5431 
5432 /**
5433  * cik_get_ih_wptr - get the IH ring buffer wptr
5434  *
5435  * @rdev: radeon_device pointer
5436  *
5437  * Get the IH ring buffer wptr from either the register
5438  * or the writeback memory buffer (CIK).  Also check for
5439  * ring buffer overflow and deal with it.
5440  * Used by cik_irq_process().
5441  * Returns the value of the wptr.
5442  */
5443 static inline u32 cik_get_ih_wptr(struct radeon_device *rdev)
5444 {
5445 	u32 wptr, tmp;
5446 
5447 	if (rdev->wb.enabled)
5448 		wptr = le32_to_cpu(rdev->wb.wb[R600_WB_IH_WPTR_OFFSET/4]);
5449 	else
5450 		wptr = RREG32(IH_RB_WPTR);
5451 
5452 	if (wptr & RB_OVERFLOW) {
5453 		/* When a ring buffer overflow happen start parsing interrupt
5454 		 * from the last not overwritten vector (wptr + 16). Hopefully
5455 		 * this should allow us to catchup.
5456 		 */
5457 		dev_warn(rdev->dev, "IH ring buffer overflow (0x%08X, %d, %d)\n",
5458 			wptr, rdev->ih.rptr, (wptr + 16) + rdev->ih.ptr_mask);
5459 		rdev->ih.rptr = (wptr + 16) & rdev->ih.ptr_mask;
5460 		tmp = RREG32(IH_RB_CNTL);
5461 		tmp |= IH_WPTR_OVERFLOW_CLEAR;
5462 		WREG32(IH_RB_CNTL, tmp);
5463 	}
5464 	return (wptr & rdev->ih.ptr_mask);
5465 }
5466 
5467 /*        CIK IV Ring
5468  * Each IV ring entry is 128 bits:
5469  * [7:0]    - interrupt source id
5470  * [31:8]   - reserved
5471  * [59:32]  - interrupt source data
5472  * [63:60]  - reserved
5473  * [71:64]  - RINGID
5474  *            CP:
5475  *            ME_ID [1:0], PIPE_ID[1:0], QUEUE_ID[2:0]
5476  *            QUEUE_ID - for compute, which of the 8 queues owned by the dispatcher
5477  *                     - for gfx, hw shader state (0=PS...5=LS, 6=CS)
5478  *            ME_ID - 0 = gfx, 1 = first 4 CS pipes, 2 = second 4 CS pipes
5479  *            PIPE_ID - ME0 0=3D
5480  *                    - ME1&2 compute dispatcher (4 pipes each)
5481  *            SDMA:
5482  *            INSTANCE_ID [1:0], QUEUE_ID[1:0]
5483  *            INSTANCE_ID - 0 = sdma0, 1 = sdma1
5484  *            QUEUE_ID - 0 = gfx, 1 = rlc0, 2 = rlc1
5485  * [79:72]  - VMID
5486  * [95:80]  - PASID
5487  * [127:96] - reserved
5488  */
5489 /**
5490  * cik_irq_process - interrupt handler
5491  *
5492  * @rdev: radeon_device pointer
5493  *
5494  * Interrupt hander (CIK).  Walk the IH ring,
5495  * ack interrupts and schedule work to handle
5496  * interrupt events.
5497  * Returns irq process return code.
5498  */
5499 int cik_irq_process(struct radeon_device *rdev)
5500 {
5501 	struct radeon_ring *cp1_ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
5502 	struct radeon_ring *cp2_ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
5503 	u32 wptr;
5504 	u32 rptr;
5505 	u32 src_id, src_data, ring_id;
5506 	u8 me_id, pipe_id, queue_id;
5507 	u32 ring_index;
5508 	bool queue_hotplug = false;
5509 	bool queue_reset = false;
5510 
5511 	if (!rdev->ih.enabled || rdev->shutdown)
5512 		return IRQ_NONE;
5513 
5514 	wptr = cik_get_ih_wptr(rdev);
5515 
5516 restart_ih:
5517 	/* is somebody else already processing irqs? */
5518 	if (atomic_xchg(&rdev->ih.lock, 1))
5519 		return IRQ_NONE;
5520 
5521 	rptr = rdev->ih.rptr;
5522 	DRM_DEBUG("cik_irq_process start: rptr %d, wptr %d\n", rptr, wptr);
5523 
5524 	/* Order reading of wptr vs. reading of IH ring data */
5525 	rmb();
5526 
5527 	/* display interrupts */
5528 	cik_irq_ack(rdev);
5529 
5530 	while (rptr != wptr) {
5531 		/* wptr/rptr are in bytes! */
5532 		ring_index = rptr / 4;
5533 		src_id =  le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff;
5534 		src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff;
5535 		ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff;
5536 
5537 		switch (src_id) {
5538 		case 1: /* D1 vblank/vline */
5539 			switch (src_data) {
5540 			case 0: /* D1 vblank */
5541 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VBLANK_INTERRUPT) {
5542 					if (rdev->irq.crtc_vblank_int[0]) {
5543 						drm_handle_vblank(rdev->ddev, 0);
5544 						rdev->pm.vblank_sync = true;
5545 						wake_up(&rdev->irq.vblank_queue);
5546 					}
5547 					if (atomic_read(&rdev->irq.pflip[0]))
5548 						radeon_crtc_handle_flip(rdev, 0);
5549 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VBLANK_INTERRUPT;
5550 					DRM_DEBUG("IH: D1 vblank\n");
5551 				}
5552 				break;
5553 			case 1: /* D1 vline */
5554 				if (rdev->irq.stat_regs.cik.disp_int & LB_D1_VLINE_INTERRUPT) {
5555 					rdev->irq.stat_regs.cik.disp_int &= ~LB_D1_VLINE_INTERRUPT;
5556 					DRM_DEBUG("IH: D1 vline\n");
5557 				}
5558 				break;
5559 			default:
5560 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5561 				break;
5562 			}
5563 			break;
5564 		case 2: /* D2 vblank/vline */
5565 			switch (src_data) {
5566 			case 0: /* D2 vblank */
5567 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VBLANK_INTERRUPT) {
5568 					if (rdev->irq.crtc_vblank_int[1]) {
5569 						drm_handle_vblank(rdev->ddev, 1);
5570 						rdev->pm.vblank_sync = true;
5571 						wake_up(&rdev->irq.vblank_queue);
5572 					}
5573 					if (atomic_read(&rdev->irq.pflip[1]))
5574 						radeon_crtc_handle_flip(rdev, 1);
5575 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VBLANK_INTERRUPT;
5576 					DRM_DEBUG("IH: D2 vblank\n");
5577 				}
5578 				break;
5579 			case 1: /* D2 vline */
5580 				if (rdev->irq.stat_regs.cik.disp_int_cont & LB_D2_VLINE_INTERRUPT) {
5581 					rdev->irq.stat_regs.cik.disp_int_cont &= ~LB_D2_VLINE_INTERRUPT;
5582 					DRM_DEBUG("IH: D2 vline\n");
5583 				}
5584 				break;
5585 			default:
5586 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5587 				break;
5588 			}
5589 			break;
5590 		case 3: /* D3 vblank/vline */
5591 			switch (src_data) {
5592 			case 0: /* D3 vblank */
5593 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VBLANK_INTERRUPT) {
5594 					if (rdev->irq.crtc_vblank_int[2]) {
5595 						drm_handle_vblank(rdev->ddev, 2);
5596 						rdev->pm.vblank_sync = true;
5597 						wake_up(&rdev->irq.vblank_queue);
5598 					}
5599 					if (atomic_read(&rdev->irq.pflip[2]))
5600 						radeon_crtc_handle_flip(rdev, 2);
5601 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VBLANK_INTERRUPT;
5602 					DRM_DEBUG("IH: D3 vblank\n");
5603 				}
5604 				break;
5605 			case 1: /* D3 vline */
5606 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & LB_D3_VLINE_INTERRUPT) {
5607 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~LB_D3_VLINE_INTERRUPT;
5608 					DRM_DEBUG("IH: D3 vline\n");
5609 				}
5610 				break;
5611 			default:
5612 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5613 				break;
5614 			}
5615 			break;
5616 		case 4: /* D4 vblank/vline */
5617 			switch (src_data) {
5618 			case 0: /* D4 vblank */
5619 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VBLANK_INTERRUPT) {
5620 					if (rdev->irq.crtc_vblank_int[3]) {
5621 						drm_handle_vblank(rdev->ddev, 3);
5622 						rdev->pm.vblank_sync = true;
5623 						wake_up(&rdev->irq.vblank_queue);
5624 					}
5625 					if (atomic_read(&rdev->irq.pflip[3]))
5626 						radeon_crtc_handle_flip(rdev, 3);
5627 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VBLANK_INTERRUPT;
5628 					DRM_DEBUG("IH: D4 vblank\n");
5629 				}
5630 				break;
5631 			case 1: /* D4 vline */
5632 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & LB_D4_VLINE_INTERRUPT) {
5633 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~LB_D4_VLINE_INTERRUPT;
5634 					DRM_DEBUG("IH: D4 vline\n");
5635 				}
5636 				break;
5637 			default:
5638 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5639 				break;
5640 			}
5641 			break;
5642 		case 5: /* D5 vblank/vline */
5643 			switch (src_data) {
5644 			case 0: /* D5 vblank */
5645 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VBLANK_INTERRUPT) {
5646 					if (rdev->irq.crtc_vblank_int[4]) {
5647 						drm_handle_vblank(rdev->ddev, 4);
5648 						rdev->pm.vblank_sync = true;
5649 						wake_up(&rdev->irq.vblank_queue);
5650 					}
5651 					if (atomic_read(&rdev->irq.pflip[4]))
5652 						radeon_crtc_handle_flip(rdev, 4);
5653 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VBLANK_INTERRUPT;
5654 					DRM_DEBUG("IH: D5 vblank\n");
5655 				}
5656 				break;
5657 			case 1: /* D5 vline */
5658 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & LB_D5_VLINE_INTERRUPT) {
5659 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~LB_D5_VLINE_INTERRUPT;
5660 					DRM_DEBUG("IH: D5 vline\n");
5661 				}
5662 				break;
5663 			default:
5664 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5665 				break;
5666 			}
5667 			break;
5668 		case 6: /* D6 vblank/vline */
5669 			switch (src_data) {
5670 			case 0: /* D6 vblank */
5671 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VBLANK_INTERRUPT) {
5672 					if (rdev->irq.crtc_vblank_int[5]) {
5673 						drm_handle_vblank(rdev->ddev, 5);
5674 						rdev->pm.vblank_sync = true;
5675 						wake_up(&rdev->irq.vblank_queue);
5676 					}
5677 					if (atomic_read(&rdev->irq.pflip[5]))
5678 						radeon_crtc_handle_flip(rdev, 5);
5679 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VBLANK_INTERRUPT;
5680 					DRM_DEBUG("IH: D6 vblank\n");
5681 				}
5682 				break;
5683 			case 1: /* D6 vline */
5684 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & LB_D6_VLINE_INTERRUPT) {
5685 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~LB_D6_VLINE_INTERRUPT;
5686 					DRM_DEBUG("IH: D6 vline\n");
5687 				}
5688 				break;
5689 			default:
5690 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5691 				break;
5692 			}
5693 			break;
5694 		case 42: /* HPD hotplug */
5695 			switch (src_data) {
5696 			case 0:
5697 				if (rdev->irq.stat_regs.cik.disp_int & DC_HPD1_INTERRUPT) {
5698 					rdev->irq.stat_regs.cik.disp_int &= ~DC_HPD1_INTERRUPT;
5699 					queue_hotplug = true;
5700 					DRM_DEBUG("IH: HPD1\n");
5701 				}
5702 				break;
5703 			case 1:
5704 				if (rdev->irq.stat_regs.cik.disp_int_cont & DC_HPD2_INTERRUPT) {
5705 					rdev->irq.stat_regs.cik.disp_int_cont &= ~DC_HPD2_INTERRUPT;
5706 					queue_hotplug = true;
5707 					DRM_DEBUG("IH: HPD2\n");
5708 				}
5709 				break;
5710 			case 2:
5711 				if (rdev->irq.stat_regs.cik.disp_int_cont2 & DC_HPD3_INTERRUPT) {
5712 					rdev->irq.stat_regs.cik.disp_int_cont2 &= ~DC_HPD3_INTERRUPT;
5713 					queue_hotplug = true;
5714 					DRM_DEBUG("IH: HPD3\n");
5715 				}
5716 				break;
5717 			case 3:
5718 				if (rdev->irq.stat_regs.cik.disp_int_cont3 & DC_HPD4_INTERRUPT) {
5719 					rdev->irq.stat_regs.cik.disp_int_cont3 &= ~DC_HPD4_INTERRUPT;
5720 					queue_hotplug = true;
5721 					DRM_DEBUG("IH: HPD4\n");
5722 				}
5723 				break;
5724 			case 4:
5725 				if (rdev->irq.stat_regs.cik.disp_int_cont4 & DC_HPD5_INTERRUPT) {
5726 					rdev->irq.stat_regs.cik.disp_int_cont4 &= ~DC_HPD5_INTERRUPT;
5727 					queue_hotplug = true;
5728 					DRM_DEBUG("IH: HPD5\n");
5729 				}
5730 				break;
5731 			case 5:
5732 				if (rdev->irq.stat_regs.cik.disp_int_cont5 & DC_HPD6_INTERRUPT) {
5733 					rdev->irq.stat_regs.cik.disp_int_cont5 &= ~DC_HPD6_INTERRUPT;
5734 					queue_hotplug = true;
5735 					DRM_DEBUG("IH: HPD6\n");
5736 				}
5737 				break;
5738 			default:
5739 				DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5740 				break;
5741 			}
5742 			break;
5743 		case 146:
5744 		case 147:
5745 			dev_err(rdev->dev, "GPU fault detected: %d 0x%08x\n", src_id, src_data);
5746 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_ADDR   0x%08X\n",
5747 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR));
5748 			dev_err(rdev->dev, "  VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
5749 				RREG32(VM_CONTEXT1_PROTECTION_FAULT_STATUS));
5750 			/* reset addr and status */
5751 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
5752 			break;
5753 		case 176: /* GFX RB CP_INT */
5754 		case 177: /* GFX IB CP_INT */
5755 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5756 			break;
5757 		case 181: /* CP EOP event */
5758 			DRM_DEBUG("IH: CP EOP\n");
5759 			/* XXX check the bitfield order! */
5760 			me_id = (ring_id & 0x60) >> 5;
5761 			pipe_id = (ring_id & 0x18) >> 3;
5762 			queue_id = (ring_id & 0x7) >> 0;
5763 			switch (me_id) {
5764 			case 0:
5765 				radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
5766 				break;
5767 			case 1:
5768 			case 2:
5769 				if ((cp1_ring->me == me_id) & (cp1_ring->pipe == pipe_id))
5770 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
5771 				if ((cp2_ring->me == me_id) & (cp2_ring->pipe == pipe_id))
5772 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
5773 				break;
5774 			}
5775 			break;
5776 		case 184: /* CP Privileged reg access */
5777 			DRM_ERROR("Illegal register access in command stream\n");
5778 			/* XXX check the bitfield order! */
5779 			me_id = (ring_id & 0x60) >> 5;
5780 			pipe_id = (ring_id & 0x18) >> 3;
5781 			queue_id = (ring_id & 0x7) >> 0;
5782 			switch (me_id) {
5783 			case 0:
5784 				/* This results in a full GPU reset, but all we need to do is soft
5785 				 * reset the CP for gfx
5786 				 */
5787 				queue_reset = true;
5788 				break;
5789 			case 1:
5790 				/* XXX compute */
5791 				queue_reset = true;
5792 				break;
5793 			case 2:
5794 				/* XXX compute */
5795 				queue_reset = true;
5796 				break;
5797 			}
5798 			break;
5799 		case 185: /* CP Privileged inst */
5800 			DRM_ERROR("Illegal instruction in command stream\n");
5801 			/* XXX check the bitfield order! */
5802 			me_id = (ring_id & 0x60) >> 5;
5803 			pipe_id = (ring_id & 0x18) >> 3;
5804 			queue_id = (ring_id & 0x7) >> 0;
5805 			switch (me_id) {
5806 			case 0:
5807 				/* This results in a full GPU reset, but all we need to do is soft
5808 				 * reset the CP for gfx
5809 				 */
5810 				queue_reset = true;
5811 				break;
5812 			case 1:
5813 				/* XXX compute */
5814 				queue_reset = true;
5815 				break;
5816 			case 2:
5817 				/* XXX compute */
5818 				queue_reset = true;
5819 				break;
5820 			}
5821 			break;
5822 		case 224: /* SDMA trap event */
5823 			/* XXX check the bitfield order! */
5824 			me_id = (ring_id & 0x3) >> 0;
5825 			queue_id = (ring_id & 0xc) >> 2;
5826 			DRM_DEBUG("IH: SDMA trap\n");
5827 			switch (me_id) {
5828 			case 0:
5829 				switch (queue_id) {
5830 				case 0:
5831 					radeon_fence_process(rdev, R600_RING_TYPE_DMA_INDEX);
5832 					break;
5833 				case 1:
5834 					/* XXX compute */
5835 					break;
5836 				case 2:
5837 					/* XXX compute */
5838 					break;
5839 				}
5840 				break;
5841 			case 1:
5842 				switch (queue_id) {
5843 				case 0:
5844 					radeon_fence_process(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
5845 					break;
5846 				case 1:
5847 					/* XXX compute */
5848 					break;
5849 				case 2:
5850 					/* XXX compute */
5851 					break;
5852 				}
5853 				break;
5854 			}
5855 			break;
5856 		case 241: /* SDMA Privileged inst */
5857 		case 247: /* SDMA Privileged inst */
5858 			DRM_ERROR("Illegal instruction in SDMA command stream\n");
5859 			/* XXX check the bitfield order! */
5860 			me_id = (ring_id & 0x3) >> 0;
5861 			queue_id = (ring_id & 0xc) >> 2;
5862 			switch (me_id) {
5863 			case 0:
5864 				switch (queue_id) {
5865 				case 0:
5866 					queue_reset = true;
5867 					break;
5868 				case 1:
5869 					/* XXX compute */
5870 					queue_reset = true;
5871 					break;
5872 				case 2:
5873 					/* XXX compute */
5874 					queue_reset = true;
5875 					break;
5876 				}
5877 				break;
5878 			case 1:
5879 				switch (queue_id) {
5880 				case 0:
5881 					queue_reset = true;
5882 					break;
5883 				case 1:
5884 					/* XXX compute */
5885 					queue_reset = true;
5886 					break;
5887 				case 2:
5888 					/* XXX compute */
5889 					queue_reset = true;
5890 					break;
5891 				}
5892 				break;
5893 			}
5894 			break;
5895 		case 233: /* GUI IDLE */
5896 			DRM_DEBUG("IH: GUI idle\n");
5897 			break;
5898 		default:
5899 			DRM_DEBUG("Unhandled interrupt: %d %d\n", src_id, src_data);
5900 			break;
5901 		}
5902 
5903 		/* wptr/rptr are in bytes! */
5904 		rptr += 16;
5905 		rptr &= rdev->ih.ptr_mask;
5906 	}
5907 	if (queue_hotplug)
5908 		schedule_work(&rdev->hotplug_work);
5909 	if (queue_reset)
5910 		schedule_work(&rdev->reset_work);
5911 	rdev->ih.rptr = rptr;
5912 	WREG32(IH_RB_RPTR, rdev->ih.rptr);
5913 	atomic_set(&rdev->ih.lock, 0);
5914 
5915 	/* make sure wptr hasn't changed while processing */
5916 	wptr = cik_get_ih_wptr(rdev);
5917 	if (wptr != rptr)
5918 		goto restart_ih;
5919 
5920 	return IRQ_HANDLED;
5921 }
5922 
5923 /*
5924  * startup/shutdown callbacks
5925  */
5926 /**
5927  * cik_startup - program the asic to a functional state
5928  *
5929  * @rdev: radeon_device pointer
5930  *
5931  * Programs the asic to a functional state (CIK).
5932  * Called by cik_init() and cik_resume().
5933  * Returns 0 for success, error for failure.
5934  */
5935 static int cik_startup(struct radeon_device *rdev)
5936 {
5937 	struct radeon_ring *ring;
5938 	int r;
5939 
5940 	if (rdev->flags & RADEON_IS_IGP) {
5941 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5942 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw) {
5943 			r = cik_init_microcode(rdev);
5944 			if (r) {
5945 				DRM_ERROR("Failed to load firmware!\n");
5946 				return r;
5947 			}
5948 		}
5949 	} else {
5950 		if (!rdev->me_fw || !rdev->pfp_fw || !rdev->ce_fw ||
5951 		    !rdev->mec_fw || !rdev->sdma_fw || !rdev->rlc_fw ||
5952 		    !rdev->mc_fw) {
5953 			r = cik_init_microcode(rdev);
5954 			if (r) {
5955 				DRM_ERROR("Failed to load firmware!\n");
5956 				return r;
5957 			}
5958 		}
5959 
5960 		r = ci_mc_load_microcode(rdev);
5961 		if (r) {
5962 			DRM_ERROR("Failed to load MC firmware!\n");
5963 			return r;
5964 		}
5965 	}
5966 
5967 	r = r600_vram_scratch_init(rdev);
5968 	if (r)
5969 		return r;
5970 
5971 	cik_mc_program(rdev);
5972 	r = cik_pcie_gart_enable(rdev);
5973 	if (r)
5974 		return r;
5975 	cik_gpu_init(rdev);
5976 
5977 	/* allocate rlc buffers */
5978 	r = si_rlc_init(rdev);
5979 	if (r) {
5980 		DRM_ERROR("Failed to init rlc BOs!\n");
5981 		return r;
5982 	}
5983 
5984 	/* allocate wb buffer */
5985 	r = radeon_wb_init(rdev);
5986 	if (r)
5987 		return r;
5988 
5989 	/* allocate mec buffers */
5990 	r = cik_mec_init(rdev);
5991 	if (r) {
5992 		DRM_ERROR("Failed to init MEC BOs!\n");
5993 		return r;
5994 	}
5995 
5996 	r = radeon_fence_driver_start_ring(rdev, RADEON_RING_TYPE_GFX_INDEX);
5997 	if (r) {
5998 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
5999 		return r;
6000 	}
6001 
6002 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP1_INDEX);
6003 	if (r) {
6004 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6005 		return r;
6006 	}
6007 
6008 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_CP2_INDEX);
6009 	if (r) {
6010 		dev_err(rdev->dev, "failed initializing CP fences (%d).\n", r);
6011 		return r;
6012 	}
6013 
6014 	r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_DMA_INDEX);
6015 	if (r) {
6016 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6017 		return r;
6018 	}
6019 
6020 	r = radeon_fence_driver_start_ring(rdev, CAYMAN_RING_TYPE_DMA1_INDEX);
6021 	if (r) {
6022 		dev_err(rdev->dev, "failed initializing DMA fences (%d).\n", r);
6023 		return r;
6024 	}
6025 
6026 	r = cik_uvd_resume(rdev);
6027 	if (!r) {
6028 		r = radeon_fence_driver_start_ring(rdev,
6029 						   R600_RING_TYPE_UVD_INDEX);
6030 		if (r)
6031 			dev_err(rdev->dev, "UVD fences init error (%d).\n", r);
6032 	}
6033 	if (r)
6034 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
6035 
6036 	/* Enable IRQ */
6037 	if (!rdev->irq.installed) {
6038 		r = radeon_irq_kms_init(rdev);
6039 		if (r)
6040 			return r;
6041 	}
6042 
6043 	r = cik_irq_init(rdev);
6044 	if (r) {
6045 		DRM_ERROR("radeon: IH init failed (%d).\n", r);
6046 		radeon_irq_kms_fini(rdev);
6047 		return r;
6048 	}
6049 	cik_irq_set(rdev);
6050 
6051 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6052 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP_RPTR_OFFSET,
6053 			     CP_RB0_RPTR, CP_RB0_WPTR,
6054 			     0, 0xfffff, RADEON_CP_PACKET2);
6055 	if (r)
6056 		return r;
6057 
6058 	/* set up the compute queues */
6059 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6060 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6061 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP1_RPTR_OFFSET,
6062 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6063 			     0, 0xfffff, PACKET3(PACKET3_NOP, 0x3FFF));
6064 	if (r)
6065 		return r;
6066 	ring->me = 1; /* first MEC */
6067 	ring->pipe = 0; /* first pipe */
6068 	ring->queue = 0; /* first queue */
6069 	ring->wptr_offs = CIK_WB_CP1_WPTR_OFFSET;
6070 
6071 	/* type-2 packets are deprecated on MEC, use type-3 instead */
6072 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6073 	r = radeon_ring_init(rdev, ring, ring->ring_size, RADEON_WB_CP2_RPTR_OFFSET,
6074 			     CP_HQD_PQ_RPTR, CP_HQD_PQ_WPTR,
6075 			     0, 0xffffffff, PACKET3(PACKET3_NOP, 0x3FFF));
6076 	if (r)
6077 		return r;
6078 	/* dGPU only have 1 MEC */
6079 	ring->me = 1; /* first MEC */
6080 	ring->pipe = 0; /* first pipe */
6081 	ring->queue = 1; /* second queue */
6082 	ring->wptr_offs = CIK_WB_CP2_WPTR_OFFSET;
6083 
6084 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6085 	r = radeon_ring_init(rdev, ring, ring->ring_size, R600_WB_DMA_RPTR_OFFSET,
6086 			     SDMA0_GFX_RB_RPTR + SDMA0_REGISTER_OFFSET,
6087 			     SDMA0_GFX_RB_WPTR + SDMA0_REGISTER_OFFSET,
6088 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6089 	if (r)
6090 		return r;
6091 
6092 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6093 	r = radeon_ring_init(rdev, ring, ring->ring_size, CAYMAN_WB_DMA1_RPTR_OFFSET,
6094 			     SDMA0_GFX_RB_RPTR + SDMA1_REGISTER_OFFSET,
6095 			     SDMA0_GFX_RB_WPTR + SDMA1_REGISTER_OFFSET,
6096 			     2, 0xfffffffc, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0));
6097 	if (r)
6098 		return r;
6099 
6100 	r = cik_cp_resume(rdev);
6101 	if (r)
6102 		return r;
6103 
6104 	r = cik_sdma_resume(rdev);
6105 	if (r)
6106 		return r;
6107 
6108 	ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6109 	if (ring->ring_size) {
6110 		r = radeon_ring_init(rdev, ring, ring->ring_size,
6111 				     R600_WB_UVD_RPTR_OFFSET,
6112 				     UVD_RBC_RB_RPTR, UVD_RBC_RB_WPTR,
6113 				     0, 0xfffff, RADEON_CP_PACKET2);
6114 		if (!r)
6115 			r = r600_uvd_init(rdev);
6116 		if (r)
6117 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
6118 	}
6119 
6120 	r = radeon_ib_pool_init(rdev);
6121 	if (r) {
6122 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
6123 		return r;
6124 	}
6125 
6126 	r = radeon_vm_manager_init(rdev);
6127 	if (r) {
6128 		dev_err(rdev->dev, "vm manager initialization failed (%d).\n", r);
6129 		return r;
6130 	}
6131 
6132 	return 0;
6133 }
6134 
6135 /**
6136  * cik_resume - resume the asic to a functional state
6137  *
6138  * @rdev: radeon_device pointer
6139  *
6140  * Programs the asic to a functional state (CIK).
6141  * Called at resume.
6142  * Returns 0 for success, error for failure.
6143  */
6144 int cik_resume(struct radeon_device *rdev)
6145 {
6146 	int r;
6147 
6148 	/* post card */
6149 	atom_asic_init(rdev->mode_info.atom_context);
6150 
6151 	/* init golden registers */
6152 	cik_init_golden_registers(rdev);
6153 
6154 	rdev->accel_working = true;
6155 	r = cik_startup(rdev);
6156 	if (r) {
6157 		DRM_ERROR("cik startup failed on resume\n");
6158 		rdev->accel_working = false;
6159 		return r;
6160 	}
6161 
6162 	return r;
6163 
6164 }
6165 
6166 /**
6167  * cik_suspend - suspend the asic
6168  *
6169  * @rdev: radeon_device pointer
6170  *
6171  * Bring the chip into a state suitable for suspend (CIK).
6172  * Called at suspend.
6173  * Returns 0 for success.
6174  */
6175 int cik_suspend(struct radeon_device *rdev)
6176 {
6177 	radeon_vm_manager_fini(rdev);
6178 	cik_cp_enable(rdev, false);
6179 	cik_sdma_enable(rdev, false);
6180 	r600_uvd_rbc_stop(rdev);
6181 	radeon_uvd_suspend(rdev);
6182 	cik_irq_suspend(rdev);
6183 	radeon_wb_disable(rdev);
6184 	cik_pcie_gart_disable(rdev);
6185 	return 0;
6186 }
6187 
6188 /* Plan is to move initialization in that function and use
6189  * helper function so that radeon_device_init pretty much
6190  * do nothing more than calling asic specific function. This
6191  * should also allow to remove a bunch of callback function
6192  * like vram_info.
6193  */
6194 /**
6195  * cik_init - asic specific driver and hw init
6196  *
6197  * @rdev: radeon_device pointer
6198  *
6199  * Setup asic specific driver variables and program the hw
6200  * to a functional state (CIK).
6201  * Called at driver startup.
6202  * Returns 0 for success, errors for failure.
6203  */
6204 int cik_init(struct radeon_device *rdev)
6205 {
6206 	struct radeon_ring *ring;
6207 	int r;
6208 
6209 	/* Read BIOS */
6210 	if (!radeon_get_bios(rdev)) {
6211 		if (ASIC_IS_AVIVO(rdev))
6212 			return -EINVAL;
6213 	}
6214 	/* Must be an ATOMBIOS */
6215 	if (!rdev->is_atom_bios) {
6216 		dev_err(rdev->dev, "Expecting atombios for cayman GPU\n");
6217 		return -EINVAL;
6218 	}
6219 	r = radeon_atombios_init(rdev);
6220 	if (r)
6221 		return r;
6222 
6223 	/* Post card if necessary */
6224 	if (!radeon_card_posted(rdev)) {
6225 		if (!rdev->bios) {
6226 			dev_err(rdev->dev, "Card not posted and no BIOS - ignoring\n");
6227 			return -EINVAL;
6228 		}
6229 		DRM_INFO("GPU not posted. posting now...\n");
6230 		atom_asic_init(rdev->mode_info.atom_context);
6231 	}
6232 	/* init golden registers */
6233 	cik_init_golden_registers(rdev);
6234 	/* Initialize scratch registers */
6235 	cik_scratch_init(rdev);
6236 	/* Initialize surface registers */
6237 	radeon_surface_init(rdev);
6238 	/* Initialize clocks */
6239 	radeon_get_clock_info(rdev->ddev);
6240 
6241 	/* Fence driver */
6242 	r = radeon_fence_driver_init(rdev);
6243 	if (r)
6244 		return r;
6245 
6246 	/* initialize memory controller */
6247 	r = cik_mc_init(rdev);
6248 	if (r)
6249 		return r;
6250 	/* Memory manager */
6251 	r = radeon_bo_init(rdev);
6252 	if (r)
6253 		return r;
6254 
6255 	ring = &rdev->ring[RADEON_RING_TYPE_GFX_INDEX];
6256 	ring->ring_obj = NULL;
6257 	r600_ring_init(rdev, ring, 1024 * 1024);
6258 
6259 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
6260 	ring->ring_obj = NULL;
6261 	r600_ring_init(rdev, ring, 1024 * 1024);
6262 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6263 	if (r)
6264 		return r;
6265 
6266 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
6267 	ring->ring_obj = NULL;
6268 	r600_ring_init(rdev, ring, 1024 * 1024);
6269 	r = radeon_doorbell_get(rdev, &ring->doorbell_page_num);
6270 	if (r)
6271 		return r;
6272 
6273 	ring = &rdev->ring[R600_RING_TYPE_DMA_INDEX];
6274 	ring->ring_obj = NULL;
6275 	r600_ring_init(rdev, ring, 256 * 1024);
6276 
6277 	ring = &rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX];
6278 	ring->ring_obj = NULL;
6279 	r600_ring_init(rdev, ring, 256 * 1024);
6280 
6281 	r = radeon_uvd_init(rdev);
6282 	if (!r) {
6283 		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
6284 		ring->ring_obj = NULL;
6285 		r600_ring_init(rdev, ring, 4096);
6286 	}
6287 
6288 	rdev->ih.ring_obj = NULL;
6289 	r600_ih_ring_init(rdev, 64 * 1024);
6290 
6291 	r = r600_pcie_gart_init(rdev);
6292 	if (r)
6293 		return r;
6294 
6295 	rdev->accel_working = true;
6296 	r = cik_startup(rdev);
6297 	if (r) {
6298 		dev_err(rdev->dev, "disabling GPU acceleration\n");
6299 		cik_cp_fini(rdev);
6300 		cik_sdma_fini(rdev);
6301 		cik_irq_fini(rdev);
6302 		si_rlc_fini(rdev);
6303 		cik_mec_fini(rdev);
6304 		radeon_wb_fini(rdev);
6305 		radeon_ib_pool_fini(rdev);
6306 		radeon_vm_manager_fini(rdev);
6307 		radeon_irq_kms_fini(rdev);
6308 		cik_pcie_gart_fini(rdev);
6309 		rdev->accel_working = false;
6310 	}
6311 
6312 	/* Don't start up if the MC ucode is missing.
6313 	 * The default clocks and voltages before the MC ucode
6314 	 * is loaded are not suffient for advanced operations.
6315 	 */
6316 	if (!rdev->mc_fw && !(rdev->flags & RADEON_IS_IGP)) {
6317 		DRM_ERROR("radeon: MC ucode required for NI+.\n");
6318 		return -EINVAL;
6319 	}
6320 
6321 	return 0;
6322 }
6323 
6324 /**
6325  * cik_fini - asic specific driver and hw fini
6326  *
6327  * @rdev: radeon_device pointer
6328  *
6329  * Tear down the asic specific driver variables and program the hw
6330  * to an idle state (CIK).
6331  * Called at driver unload.
6332  */
6333 void cik_fini(struct radeon_device *rdev)
6334 {
6335 	cik_cp_fini(rdev);
6336 	cik_sdma_fini(rdev);
6337 	cik_irq_fini(rdev);
6338 	si_rlc_fini(rdev);
6339 	cik_mec_fini(rdev);
6340 	radeon_wb_fini(rdev);
6341 	radeon_vm_manager_fini(rdev);
6342 	radeon_ib_pool_fini(rdev);
6343 	radeon_irq_kms_fini(rdev);
6344 	radeon_uvd_fini(rdev);
6345 	cik_pcie_gart_fini(rdev);
6346 	r600_vram_scratch_fini(rdev);
6347 	radeon_gem_fini(rdev);
6348 	radeon_fence_driver_fini(rdev);
6349 	radeon_bo_fini(rdev);
6350 	radeon_atombios_fini(rdev);
6351 	kfree(rdev->bios);
6352 	rdev->bios = NULL;
6353 }
6354 
6355 /* display watermark setup */
6356 /**
6357  * dce8_line_buffer_adjust - Set up the line buffer
6358  *
6359  * @rdev: radeon_device pointer
6360  * @radeon_crtc: the selected display controller
6361  * @mode: the current display mode on the selected display
6362  * controller
6363  *
6364  * Setup up the line buffer allocation for
6365  * the selected display controller (CIK).
6366  * Returns the line buffer size in pixels.
6367  */
6368 static u32 dce8_line_buffer_adjust(struct radeon_device *rdev,
6369 				   struct radeon_crtc *radeon_crtc,
6370 				   struct drm_display_mode *mode)
6371 {
6372 	u32 tmp;
6373 
6374 	/*
6375 	 * Line Buffer Setup
6376 	 * There are 6 line buffers, one for each display controllers.
6377 	 * There are 3 partitions per LB. Select the number of partitions
6378 	 * to enable based on the display width.  For display widths larger
6379 	 * than 4096, you need use to use 2 display controllers and combine
6380 	 * them using the stereo blender.
6381 	 */
6382 	if (radeon_crtc->base.enabled && mode) {
6383 		if (mode->crtc_hdisplay < 1920)
6384 			tmp = 1;
6385 		else if (mode->crtc_hdisplay < 2560)
6386 			tmp = 2;
6387 		else if (mode->crtc_hdisplay < 4096)
6388 			tmp = 0;
6389 		else {
6390 			DRM_DEBUG_KMS("Mode too big for LB!\n");
6391 			tmp = 0;
6392 		}
6393 	} else
6394 		tmp = 1;
6395 
6396 	WREG32(LB_MEMORY_CTRL + radeon_crtc->crtc_offset,
6397 	       LB_MEMORY_CONFIG(tmp) | LB_MEMORY_SIZE(0x6B0));
6398 
6399 	if (radeon_crtc->base.enabled && mode) {
6400 		switch (tmp) {
6401 		case 0:
6402 		default:
6403 			return 4096 * 2;
6404 		case 1:
6405 			return 1920 * 2;
6406 		case 2:
6407 			return 2560 * 2;
6408 		}
6409 	}
6410 
6411 	/* controller not enabled, so no lb used */
6412 	return 0;
6413 }
6414 
6415 /**
6416  * cik_get_number_of_dram_channels - get the number of dram channels
6417  *
6418  * @rdev: radeon_device pointer
6419  *
6420  * Look up the number of video ram channels (CIK).
6421  * Used for display watermark bandwidth calculations
6422  * Returns the number of dram channels
6423  */
6424 static u32 cik_get_number_of_dram_channels(struct radeon_device *rdev)
6425 {
6426 	u32 tmp = RREG32(MC_SHARED_CHMAP);
6427 
6428 	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
6429 	case 0:
6430 	default:
6431 		return 1;
6432 	case 1:
6433 		return 2;
6434 	case 2:
6435 		return 4;
6436 	case 3:
6437 		return 8;
6438 	case 4:
6439 		return 3;
6440 	case 5:
6441 		return 6;
6442 	case 6:
6443 		return 10;
6444 	case 7:
6445 		return 12;
6446 	case 8:
6447 		return 16;
6448 	}
6449 }
6450 
6451 struct dce8_wm_params {
6452 	u32 dram_channels; /* number of dram channels */
6453 	u32 yclk;          /* bandwidth per dram data pin in kHz */
6454 	u32 sclk;          /* engine clock in kHz */
6455 	u32 disp_clk;      /* display clock in kHz */
6456 	u32 src_width;     /* viewport width */
6457 	u32 active_time;   /* active display time in ns */
6458 	u32 blank_time;    /* blank time in ns */
6459 	bool interlaced;    /* mode is interlaced */
6460 	fixed20_12 vsc;    /* vertical scale ratio */
6461 	u32 num_heads;     /* number of active crtcs */
6462 	u32 bytes_per_pixel; /* bytes per pixel display + overlay */
6463 	u32 lb_size;       /* line buffer allocated to pipe */
6464 	u32 vtaps;         /* vertical scaler taps */
6465 };
6466 
6467 /**
6468  * dce8_dram_bandwidth - get the dram bandwidth
6469  *
6470  * @wm: watermark calculation data
6471  *
6472  * Calculate the raw dram bandwidth (CIK).
6473  * Used for display watermark bandwidth calculations
6474  * Returns the dram bandwidth in MBytes/s
6475  */
6476 static u32 dce8_dram_bandwidth(struct dce8_wm_params *wm)
6477 {
6478 	/* Calculate raw DRAM Bandwidth */
6479 	fixed20_12 dram_efficiency; /* 0.7 */
6480 	fixed20_12 yclk, dram_channels, bandwidth;
6481 	fixed20_12 a;
6482 
6483 	a.full = dfixed_const(1000);
6484 	yclk.full = dfixed_const(wm->yclk);
6485 	yclk.full = dfixed_div(yclk, a);
6486 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6487 	a.full = dfixed_const(10);
6488 	dram_efficiency.full = dfixed_const(7);
6489 	dram_efficiency.full = dfixed_div(dram_efficiency, a);
6490 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6491 	bandwidth.full = dfixed_mul(bandwidth, dram_efficiency);
6492 
6493 	return dfixed_trunc(bandwidth);
6494 }
6495 
6496 /**
6497  * dce8_dram_bandwidth_for_display - get the dram bandwidth for display
6498  *
6499  * @wm: watermark calculation data
6500  *
6501  * Calculate the dram bandwidth used for display (CIK).
6502  * Used for display watermark bandwidth calculations
6503  * Returns the dram bandwidth for display in MBytes/s
6504  */
6505 static u32 dce8_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6506 {
6507 	/* Calculate DRAM Bandwidth and the part allocated to display. */
6508 	fixed20_12 disp_dram_allocation; /* 0.3 to 0.7 */
6509 	fixed20_12 yclk, dram_channels, bandwidth;
6510 	fixed20_12 a;
6511 
6512 	a.full = dfixed_const(1000);
6513 	yclk.full = dfixed_const(wm->yclk);
6514 	yclk.full = dfixed_div(yclk, a);
6515 	dram_channels.full = dfixed_const(wm->dram_channels * 4);
6516 	a.full = dfixed_const(10);
6517 	disp_dram_allocation.full = dfixed_const(3); /* XXX worse case value 0.3 */
6518 	disp_dram_allocation.full = dfixed_div(disp_dram_allocation, a);
6519 	bandwidth.full = dfixed_mul(dram_channels, yclk);
6520 	bandwidth.full = dfixed_mul(bandwidth, disp_dram_allocation);
6521 
6522 	return dfixed_trunc(bandwidth);
6523 }
6524 
6525 /**
6526  * dce8_data_return_bandwidth - get the data return bandwidth
6527  *
6528  * @wm: watermark calculation data
6529  *
6530  * Calculate the data return bandwidth used for display (CIK).
6531  * Used for display watermark bandwidth calculations
6532  * Returns the data return bandwidth in MBytes/s
6533  */
6534 static u32 dce8_data_return_bandwidth(struct dce8_wm_params *wm)
6535 {
6536 	/* Calculate the display Data return Bandwidth */
6537 	fixed20_12 return_efficiency; /* 0.8 */
6538 	fixed20_12 sclk, bandwidth;
6539 	fixed20_12 a;
6540 
6541 	a.full = dfixed_const(1000);
6542 	sclk.full = dfixed_const(wm->sclk);
6543 	sclk.full = dfixed_div(sclk, a);
6544 	a.full = dfixed_const(10);
6545 	return_efficiency.full = dfixed_const(8);
6546 	return_efficiency.full = dfixed_div(return_efficiency, a);
6547 	a.full = dfixed_const(32);
6548 	bandwidth.full = dfixed_mul(a, sclk);
6549 	bandwidth.full = dfixed_mul(bandwidth, return_efficiency);
6550 
6551 	return dfixed_trunc(bandwidth);
6552 }
6553 
6554 /**
6555  * dce8_dmif_request_bandwidth - get the dmif bandwidth
6556  *
6557  * @wm: watermark calculation data
6558  *
6559  * Calculate the dmif bandwidth used for display (CIK).
6560  * Used for display watermark bandwidth calculations
6561  * Returns the dmif bandwidth in MBytes/s
6562  */
6563 static u32 dce8_dmif_request_bandwidth(struct dce8_wm_params *wm)
6564 {
6565 	/* Calculate the DMIF Request Bandwidth */
6566 	fixed20_12 disp_clk_request_efficiency; /* 0.8 */
6567 	fixed20_12 disp_clk, bandwidth;
6568 	fixed20_12 a, b;
6569 
6570 	a.full = dfixed_const(1000);
6571 	disp_clk.full = dfixed_const(wm->disp_clk);
6572 	disp_clk.full = dfixed_div(disp_clk, a);
6573 	a.full = dfixed_const(32);
6574 	b.full = dfixed_mul(a, disp_clk);
6575 
6576 	a.full = dfixed_const(10);
6577 	disp_clk_request_efficiency.full = dfixed_const(8);
6578 	disp_clk_request_efficiency.full = dfixed_div(disp_clk_request_efficiency, a);
6579 
6580 	bandwidth.full = dfixed_mul(b, disp_clk_request_efficiency);
6581 
6582 	return dfixed_trunc(bandwidth);
6583 }
6584 
6585 /**
6586  * dce8_available_bandwidth - get the min available bandwidth
6587  *
6588  * @wm: watermark calculation data
6589  *
6590  * Calculate the min available bandwidth used for display (CIK).
6591  * Used for display watermark bandwidth calculations
6592  * Returns the min available bandwidth in MBytes/s
6593  */
6594 static u32 dce8_available_bandwidth(struct dce8_wm_params *wm)
6595 {
6596 	/* Calculate the Available bandwidth. Display can use this temporarily but not in average. */
6597 	u32 dram_bandwidth = dce8_dram_bandwidth(wm);
6598 	u32 data_return_bandwidth = dce8_data_return_bandwidth(wm);
6599 	u32 dmif_req_bandwidth = dce8_dmif_request_bandwidth(wm);
6600 
6601 	return min(dram_bandwidth, min(data_return_bandwidth, dmif_req_bandwidth));
6602 }
6603 
6604 /**
6605  * dce8_average_bandwidth - get the average available bandwidth
6606  *
6607  * @wm: watermark calculation data
6608  *
6609  * Calculate the average available bandwidth used for display (CIK).
6610  * Used for display watermark bandwidth calculations
6611  * Returns the average available bandwidth in MBytes/s
6612  */
6613 static u32 dce8_average_bandwidth(struct dce8_wm_params *wm)
6614 {
6615 	/* Calculate the display mode Average Bandwidth
6616 	 * DisplayMode should contain the source and destination dimensions,
6617 	 * timing, etc.
6618 	 */
6619 	fixed20_12 bpp;
6620 	fixed20_12 line_time;
6621 	fixed20_12 src_width;
6622 	fixed20_12 bandwidth;
6623 	fixed20_12 a;
6624 
6625 	a.full = dfixed_const(1000);
6626 	line_time.full = dfixed_const(wm->active_time + wm->blank_time);
6627 	line_time.full = dfixed_div(line_time, a);
6628 	bpp.full = dfixed_const(wm->bytes_per_pixel);
6629 	src_width.full = dfixed_const(wm->src_width);
6630 	bandwidth.full = dfixed_mul(src_width, bpp);
6631 	bandwidth.full = dfixed_mul(bandwidth, wm->vsc);
6632 	bandwidth.full = dfixed_div(bandwidth, line_time);
6633 
6634 	return dfixed_trunc(bandwidth);
6635 }
6636 
6637 /**
6638  * dce8_latency_watermark - get the latency watermark
6639  *
6640  * @wm: watermark calculation data
6641  *
6642  * Calculate the latency watermark (CIK).
6643  * Used for display watermark bandwidth calculations
6644  * Returns the latency watermark in ns
6645  */
6646 static u32 dce8_latency_watermark(struct dce8_wm_params *wm)
6647 {
6648 	/* First calculate the latency in ns */
6649 	u32 mc_latency = 2000; /* 2000 ns. */
6650 	u32 available_bandwidth = dce8_available_bandwidth(wm);
6651 	u32 worst_chunk_return_time = (512 * 8 * 1000) / available_bandwidth;
6652 	u32 cursor_line_pair_return_time = (128 * 4 * 1000) / available_bandwidth;
6653 	u32 dc_latency = 40000000 / wm->disp_clk; /* dc pipe latency */
6654 	u32 other_heads_data_return_time = ((wm->num_heads + 1) * worst_chunk_return_time) +
6655 		(wm->num_heads * cursor_line_pair_return_time);
6656 	u32 latency = mc_latency + other_heads_data_return_time + dc_latency;
6657 	u32 max_src_lines_per_dst_line, lb_fill_bw, line_fill_time;
6658 	u32 tmp, dmif_size = 12288;
6659 	fixed20_12 a, b, c;
6660 
6661 	if (wm->num_heads == 0)
6662 		return 0;
6663 
6664 	a.full = dfixed_const(2);
6665 	b.full = dfixed_const(1);
6666 	if ((wm->vsc.full > a.full) ||
6667 	    ((wm->vsc.full > b.full) && (wm->vtaps >= 3)) ||
6668 	    (wm->vtaps >= 5) ||
6669 	    ((wm->vsc.full >= a.full) && wm->interlaced))
6670 		max_src_lines_per_dst_line = 4;
6671 	else
6672 		max_src_lines_per_dst_line = 2;
6673 
6674 	a.full = dfixed_const(available_bandwidth);
6675 	b.full = dfixed_const(wm->num_heads);
6676 	a.full = dfixed_div(a, b);
6677 
6678 	b.full = dfixed_const(mc_latency + 512);
6679 	c.full = dfixed_const(wm->disp_clk);
6680 	b.full = dfixed_div(b, c);
6681 
6682 	c.full = dfixed_const(dmif_size);
6683 	b.full = dfixed_div(c, b);
6684 
6685 	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
6686 
6687 	b.full = dfixed_const(1000);
6688 	c.full = dfixed_const(wm->disp_clk);
6689 	b.full = dfixed_div(c, b);
6690 	c.full = dfixed_const(wm->bytes_per_pixel);
6691 	b.full = dfixed_mul(b, c);
6692 
6693 	lb_fill_bw = min(tmp, dfixed_trunc(b));
6694 
6695 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
6696 	b.full = dfixed_const(1000);
6697 	c.full = dfixed_const(lb_fill_bw);
6698 	b.full = dfixed_div(c, b);
6699 	a.full = dfixed_div(a, b);
6700 	line_fill_time = dfixed_trunc(a);
6701 
6702 	if (line_fill_time < wm->active_time)
6703 		return latency;
6704 	else
6705 		return latency + (line_fill_time - wm->active_time);
6706 
6707 }
6708 
6709 /**
6710  * dce8_average_bandwidth_vs_dram_bandwidth_for_display - check
6711  * average and available dram bandwidth
6712  *
6713  * @wm: watermark calculation data
6714  *
6715  * Check if the display average bandwidth fits in the display
6716  * dram bandwidth (CIK).
6717  * Used for display watermark bandwidth calculations
6718  * Returns true if the display fits, false if not.
6719  */
6720 static bool dce8_average_bandwidth_vs_dram_bandwidth_for_display(struct dce8_wm_params *wm)
6721 {
6722 	if (dce8_average_bandwidth(wm) <=
6723 	    (dce8_dram_bandwidth_for_display(wm) / wm->num_heads))
6724 		return true;
6725 	else
6726 		return false;
6727 }
6728 
6729 /**
6730  * dce8_average_bandwidth_vs_available_bandwidth - check
6731  * average and available bandwidth
6732  *
6733  * @wm: watermark calculation data
6734  *
6735  * Check if the display average bandwidth fits in the display
6736  * available bandwidth (CIK).
6737  * Used for display watermark bandwidth calculations
6738  * Returns true if the display fits, false if not.
6739  */
6740 static bool dce8_average_bandwidth_vs_available_bandwidth(struct dce8_wm_params *wm)
6741 {
6742 	if (dce8_average_bandwidth(wm) <=
6743 	    (dce8_available_bandwidth(wm) / wm->num_heads))
6744 		return true;
6745 	else
6746 		return false;
6747 }
6748 
6749 /**
6750  * dce8_check_latency_hiding - check latency hiding
6751  *
6752  * @wm: watermark calculation data
6753  *
6754  * Check latency hiding (CIK).
6755  * Used for display watermark bandwidth calculations
6756  * Returns true if the display fits, false if not.
6757  */
6758 static bool dce8_check_latency_hiding(struct dce8_wm_params *wm)
6759 {
6760 	u32 lb_partitions = wm->lb_size / wm->src_width;
6761 	u32 line_time = wm->active_time + wm->blank_time;
6762 	u32 latency_tolerant_lines;
6763 	u32 latency_hiding;
6764 	fixed20_12 a;
6765 
6766 	a.full = dfixed_const(1);
6767 	if (wm->vsc.full > a.full)
6768 		latency_tolerant_lines = 1;
6769 	else {
6770 		if (lb_partitions <= (wm->vtaps + 1))
6771 			latency_tolerant_lines = 1;
6772 		else
6773 			latency_tolerant_lines = 2;
6774 	}
6775 
6776 	latency_hiding = (latency_tolerant_lines * line_time + wm->blank_time);
6777 
6778 	if (dce8_latency_watermark(wm) <= latency_hiding)
6779 		return true;
6780 	else
6781 		return false;
6782 }
6783 
6784 /**
6785  * dce8_program_watermarks - program display watermarks
6786  *
6787  * @rdev: radeon_device pointer
6788  * @radeon_crtc: the selected display controller
6789  * @lb_size: line buffer size
6790  * @num_heads: number of display controllers in use
6791  *
6792  * Calculate and program the display watermarks for the
6793  * selected display controller (CIK).
6794  */
6795 static void dce8_program_watermarks(struct radeon_device *rdev,
6796 				    struct radeon_crtc *radeon_crtc,
6797 				    u32 lb_size, u32 num_heads)
6798 {
6799 	struct drm_display_mode *mode = &radeon_crtc->base.mode;
6800 	struct dce8_wm_params wm;
6801 	u32 pixel_period;
6802 	u32 line_time = 0;
6803 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
6804 	u32 tmp, wm_mask;
6805 
6806 	if (radeon_crtc->base.enabled && num_heads && mode) {
6807 		pixel_period = 1000000 / (u32)mode->clock;
6808 		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
6809 
6810 		wm.yclk = rdev->pm.current_mclk * 10;
6811 		wm.sclk = rdev->pm.current_sclk * 10;
6812 		wm.disp_clk = mode->clock;
6813 		wm.src_width = mode->crtc_hdisplay;
6814 		wm.active_time = mode->crtc_hdisplay * pixel_period;
6815 		wm.blank_time = line_time - wm.active_time;
6816 		wm.interlaced = false;
6817 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
6818 			wm.interlaced = true;
6819 		wm.vsc = radeon_crtc->vsc;
6820 		wm.vtaps = 1;
6821 		if (radeon_crtc->rmx_type != RMX_OFF)
6822 			wm.vtaps = 2;
6823 		wm.bytes_per_pixel = 4; /* XXX: get this from fb config */
6824 		wm.lb_size = lb_size;
6825 		wm.dram_channels = cik_get_number_of_dram_channels(rdev);
6826 		wm.num_heads = num_heads;
6827 
6828 		/* set for high clocks */
6829 		latency_watermark_a = min(dce8_latency_watermark(&wm), (u32)65535);
6830 		/* set for low clocks */
6831 		/* wm.yclk = low clk; wm.sclk = low clk */
6832 		latency_watermark_b = min(dce8_latency_watermark(&wm), (u32)65535);
6833 
6834 		/* possibly force display priority to high */
6835 		/* should really do this at mode validation time... */
6836 		if (!dce8_average_bandwidth_vs_dram_bandwidth_for_display(&wm) ||
6837 		    !dce8_average_bandwidth_vs_available_bandwidth(&wm) ||
6838 		    !dce8_check_latency_hiding(&wm) ||
6839 		    (rdev->disp_priority == 2)) {
6840 			DRM_DEBUG_KMS("force priority to high\n");
6841 		}
6842 	}
6843 
6844 	/* select wm A */
6845 	wm_mask = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6846 	tmp = wm_mask;
6847 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6848 	tmp |= LATENCY_WATERMARK_MASK(1);
6849 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6850 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6851 	       (LATENCY_LOW_WATERMARK(latency_watermark_a) |
6852 		LATENCY_HIGH_WATERMARK(line_time)));
6853 	/* select wm B */
6854 	tmp = RREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset);
6855 	tmp &= ~LATENCY_WATERMARK_MASK(3);
6856 	tmp |= LATENCY_WATERMARK_MASK(2);
6857 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, tmp);
6858 	WREG32(DPG_PIPE_LATENCY_CONTROL + radeon_crtc->crtc_offset,
6859 	       (LATENCY_LOW_WATERMARK(latency_watermark_b) |
6860 		LATENCY_HIGH_WATERMARK(line_time)));
6861 	/* restore original selection */
6862 	WREG32(DPG_WATERMARK_MASK_CONTROL + radeon_crtc->crtc_offset, wm_mask);
6863 }
6864 
6865 /**
6866  * dce8_bandwidth_update - program display watermarks
6867  *
6868  * @rdev: radeon_device pointer
6869  *
6870  * Calculate and program the display watermarks and line
6871  * buffer allocation (CIK).
6872  */
6873 void dce8_bandwidth_update(struct radeon_device *rdev)
6874 {
6875 	struct drm_display_mode *mode = NULL;
6876 	u32 num_heads = 0, lb_size;
6877 	int i;
6878 
6879 	radeon_update_display_priority(rdev);
6880 
6881 	for (i = 0; i < rdev->num_crtc; i++) {
6882 		if (rdev->mode_info.crtcs[i]->base.enabled)
6883 			num_heads++;
6884 	}
6885 	for (i = 0; i < rdev->num_crtc; i++) {
6886 		mode = &rdev->mode_info.crtcs[i]->base.mode;
6887 		lb_size = dce8_line_buffer_adjust(rdev, rdev->mode_info.crtcs[i], mode);
6888 		dce8_program_watermarks(rdev, rdev->mode_info.crtcs[i], lb_size, num_heads);
6889 	}
6890 }
6891 
6892 /**
6893  * cik_get_gpu_clock_counter - return GPU clock counter snapshot
6894  *
6895  * @rdev: radeon_device pointer
6896  *
6897  * Fetches a GPU clock counter snapshot (SI).
6898  * Returns the 64 bit clock counter snapshot.
6899  */
6900 uint64_t cik_get_gpu_clock_counter(struct radeon_device *rdev)
6901 {
6902 	uint64_t clock;
6903 
6904 	mutex_lock(&rdev->gpu_clock_mutex);
6905 	WREG32(RLC_CAPTURE_GPU_CLOCK_COUNT, 1);
6906 	clock = (uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_LSB) |
6907 	        ((uint64_t)RREG32(RLC_GPU_CLOCK_COUNT_MSB) << 32ULL);
6908 	mutex_unlock(&rdev->gpu_clock_mutex);
6909 	return clock;
6910 }
6911 
6912 static int cik_set_uvd_clock(struct radeon_device *rdev, u32 clock,
6913                               u32 cntl_reg, u32 status_reg)
6914 {
6915 	int r, i;
6916 	struct atom_clock_dividers dividers;
6917 	uint32_t tmp;
6918 
6919 	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
6920 					   clock, false, &dividers);
6921 	if (r)
6922 		return r;
6923 
6924 	tmp = RREG32_SMC(cntl_reg);
6925 	tmp &= ~(DCLK_DIR_CNTL_EN|DCLK_DIVIDER_MASK);
6926 	tmp |= dividers.post_divider;
6927 	WREG32_SMC(cntl_reg, tmp);
6928 
6929 	for (i = 0; i < 100; i++) {
6930 		if (RREG32_SMC(status_reg) & DCLK_STATUS)
6931 			break;
6932 		mdelay(10);
6933 	}
6934 	if (i == 100)
6935 		return -ETIMEDOUT;
6936 
6937 	return 0;
6938 }
6939 
6940 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
6941 {
6942 	int r = 0;
6943 
6944 	r = cik_set_uvd_clock(rdev, vclk, CG_VCLK_CNTL, CG_VCLK_STATUS);
6945 	if (r)
6946 		return r;
6947 
6948 	r = cik_set_uvd_clock(rdev, dclk, CG_DCLK_CNTL, CG_DCLK_STATUS);
6949 	return r;
6950 }
6951 
6952 int cik_uvd_resume(struct radeon_device *rdev)
6953 {
6954 	uint64_t addr;
6955 	uint32_t size;
6956 	int r;
6957 
6958 	r = radeon_uvd_resume(rdev);
6959 	if (r)
6960 		return r;
6961 
6962 	/* programm the VCPU memory controller bits 0-27 */
6963 	addr = rdev->uvd.gpu_addr >> 3;
6964 	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size + 4) >> 3;
6965 	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
6966 	WREG32(UVD_VCPU_CACHE_SIZE0, size);
6967 
6968 	addr += size;
6969 	size = RADEON_UVD_STACK_SIZE >> 3;
6970 	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
6971 	WREG32(UVD_VCPU_CACHE_SIZE1, size);
6972 
6973 	addr += size;
6974 	size = RADEON_UVD_HEAP_SIZE >> 3;
6975 	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
6976 	WREG32(UVD_VCPU_CACHE_SIZE2, size);
6977 
6978 	/* bits 28-31 */
6979 	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
6980 	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
6981 
6982 	/* bits 32-39 */
6983 	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
6984 	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
6985 
6986 	return 0;
6987 }
6988