1 /*
2 * Copyright 2019 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/firmware.h>
25 #include "amdgpu.h"
26 #include "amdgpu_vcn.h"
27 #include "amdgpu_pm.h"
28 #include "amdgpu_cs.h"
29 #include "soc15.h"
30 #include "soc15d.h"
31 #include "vcn_v2_0.h"
32 #include "mmsch_v3_0.h"
33 #include "vcn_sw_ring.h"
34
35 #include "vcn/vcn_3_0_0_offset.h"
36 #include "vcn/vcn_3_0_0_sh_mask.h"
37 #include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"
38
39 #include <drm/drm_drv.h>
40
41 #define VCN_VID_SOC_ADDRESS_2_0 0x1fa00
42 #define VCN1_VID_SOC_ADDRESS_3_0 0x48200
43 #define VCN1_AON_SOC_ADDRESS_3_0 0x48000
44
45 #define mmUVD_CONTEXT_ID_INTERNAL_OFFSET 0x27
46 #define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET 0x0f
47 #define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET 0x10
48 #define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET 0x11
49 #define mmUVD_NO_OP_INTERNAL_OFFSET 0x29
50 #define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET 0x66
51 #define mmUVD_SCRATCH9_INTERNAL_OFFSET 0xc01d
52
53 #define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET 0x431
54 #define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET 0x3b4
55 #define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 0x3b5
56 #define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET 0x25c
57
58 #define VCN_INSTANCES_SIENNA_CICHLID 2
59 #define DEC_SW_RING_ENABLED FALSE
60
61 #define RDECODE_MSG_CREATE 0x00000000
62 #define RDECODE_MESSAGE_CREATE 0x00000001
63
64 static const struct amdgpu_hwip_reg_entry vcn_reg_list_3_0[] = {
65 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_POWER_STATUS),
66 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_STATUS),
67 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID),
68 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_CONTEXT_ID2),
69 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA0),
70 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_DATA1),
71 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_GPCOM_VCPU_CMD),
72 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI),
73 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO),
74 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI2),
75 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO2),
76 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI3),
77 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO3),
78 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_HI4),
79 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_BASE_LO4),
80 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR),
81 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR),
82 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR2),
83 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR2),
84 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR3),
85 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR3),
86 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_RPTR4),
87 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_WPTR4),
88 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE),
89 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE2),
90 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE3),
91 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_RB_SIZE4),
92 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_CONFIG),
93 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_PGFSM_STATUS),
94 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_CTL),
95 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_DATA),
96 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_LMA_MASK),
97 SOC15_REG_ENTRY_STR(VCN, 0, mmUVD_DPG_PAUSE)
98 };
99
100 static int amdgpu_ih_clientid_vcns[] = {
101 SOC15_IH_CLIENTID_VCN,
102 SOC15_IH_CLIENTID_VCN1
103 };
104
105 static int vcn_v3_0_start_sriov(struct amdgpu_device *adev);
106 static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev);
107 static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev);
108 static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev);
109 static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
110 enum amd_powergating_state state);
111 static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
112 struct dpg_pause_state *new_state);
113 static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst);
114
115 static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring);
116 static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring);
117
118 /**
119 * vcn_v3_0_early_init - set function pointers and load microcode
120 *
121 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
122 *
123 * Set ring and irq function pointers
124 * Load microcode from filesystem
125 */
vcn_v3_0_early_init(struct amdgpu_ip_block * ip_block)126 static int vcn_v3_0_early_init(struct amdgpu_ip_block *ip_block)
127 {
128 struct amdgpu_device *adev = ip_block->adev;
129 int i, r;
130
131 if (amdgpu_sriov_vf(adev)) {
132 adev->vcn.num_vcn_inst = VCN_INSTANCES_SIENNA_CICHLID;
133 adev->vcn.harvest_config = 0;
134 for (i = 0; i < adev->vcn.num_vcn_inst; i++)
135 adev->vcn.inst[i].num_enc_rings = 1;
136
137 } else {
138 if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
139 AMDGPU_VCN_HARVEST_VCN1))
140 /* both instances are harvested, disable the block */
141 return -ENOENT;
142
143 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
144 if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
145 IP_VERSION(3, 0, 33))
146 adev->vcn.inst[i].num_enc_rings = 0;
147 else
148 adev->vcn.inst[i].num_enc_rings = 2;
149 }
150 }
151
152 vcn_v3_0_set_dec_ring_funcs(adev);
153 vcn_v3_0_set_enc_ring_funcs(adev);
154 vcn_v3_0_set_irq_funcs(adev);
155
156 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
157 adev->vcn.inst[i].set_pg_state = vcn_v3_0_set_pg_state;
158
159 r = amdgpu_vcn_early_init(adev, i);
160 if (r)
161 return r;
162 }
163 return 0;
164 }
165
166 /**
167 * vcn_v3_0_sw_init - sw init for VCN block
168 *
169 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
170 *
171 * Load firmware and sw initialization
172 */
vcn_v3_0_sw_init(struct amdgpu_ip_block * ip_block)173 static int vcn_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
174 {
175 struct amdgpu_ring *ring;
176 int i, j, r;
177 int vcn_doorbell_index = 0;
178 struct amdgpu_device *adev = ip_block->adev;
179
180 /*
181 * Note: doorbell assignment is fixed for SRIOV multiple VCN engines
182 * Formula:
183 * vcn_db_base = adev->doorbell_index.vcn.vcn_ring0_1 << 1;
184 * dec_ring_i = vcn_db_base + i * (adev->vcn.num_enc_rings + 1)
185 * enc_ring_i,j = vcn_db_base + i * (adev->vcn.num_enc_rings + 1) + 1 + j
186 */
187 if (amdgpu_sriov_vf(adev)) {
188 vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1;
189 /* get DWORD offset */
190 vcn_doorbell_index = vcn_doorbell_index << 1;
191 }
192
193 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
194 struct amdgpu_fw_shared *fw_shared;
195
196 if (adev->vcn.harvest_config & (1 << i))
197 continue;
198
199 r = amdgpu_vcn_sw_init(adev, i);
200 if (r)
201 return r;
202
203 amdgpu_vcn_setup_ucode(adev, i);
204
205 r = amdgpu_vcn_resume(adev, i);
206 if (r)
207 return r;
208
209 adev->vcn.inst[i].internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
210 adev->vcn.inst[i].internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
211 adev->vcn.inst[i].internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
212 adev->vcn.inst[i].internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
213 adev->vcn.inst[i].internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
214 adev->vcn.inst[i].internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;
215
216 adev->vcn.inst[i].internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
217 adev->vcn.inst[i].external.scratch9 = SOC15_REG_OFFSET(VCN, i, mmUVD_SCRATCH9);
218 adev->vcn.inst[i].internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
219 adev->vcn.inst[i].external.data0 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA0);
220 adev->vcn.inst[i].internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
221 adev->vcn.inst[i].external.data1 = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_DATA1);
222 adev->vcn.inst[i].internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
223 adev->vcn.inst[i].external.cmd = SOC15_REG_OFFSET(VCN, i, mmUVD_GPCOM_VCPU_CMD);
224 adev->vcn.inst[i].internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
225 adev->vcn.inst[i].external.nop = SOC15_REG_OFFSET(VCN, i, mmUVD_NO_OP);
226
227 /* VCN DEC TRAP */
228 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
229 VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[i].irq);
230 if (r)
231 return r;
232
233 atomic_set(&adev->vcn.inst[i].sched_score, 0);
234
235 ring = &adev->vcn.inst[i].ring_dec;
236 ring->use_doorbell = true;
237 if (amdgpu_sriov_vf(adev)) {
238 ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1);
239 } else {
240 ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 8 * i;
241 }
242 ring->vm_hub = AMDGPU_MMHUB0(0);
243 sprintf(ring->name, "vcn_dec_%d", i);
244 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
245 AMDGPU_RING_PRIO_DEFAULT,
246 &adev->vcn.inst[i].sched_score);
247 if (r)
248 return r;
249
250 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
251 enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(j);
252
253 /* VCN ENC TRAP */
254 r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
255 j + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[i].irq);
256 if (r)
257 return r;
258
259 ring = &adev->vcn.inst[i].ring_enc[j];
260 ring->use_doorbell = true;
261 if (amdgpu_sriov_vf(adev)) {
262 ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.inst[i].num_enc_rings + 1) + 1 + j;
263 } else {
264 ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + j + 8 * i;
265 }
266 ring->vm_hub = AMDGPU_MMHUB0(0);
267 sprintf(ring->name, "vcn_enc_%d.%d", i, j);
268 r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[i].irq, 0,
269 hw_prio, &adev->vcn.inst[i].sched_score);
270 if (r)
271 return r;
272 }
273
274 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
275 fw_shared->present_flag_0 |= cpu_to_le32(AMDGPU_VCN_SW_RING_FLAG) |
276 cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG) |
277 cpu_to_le32(AMDGPU_VCN_FW_SHARED_FLAG_0_RB);
278 fw_shared->sw_ring.is_enabled = cpu_to_le32(DEC_SW_RING_ENABLED);
279 fw_shared->present_flag_0 |= AMDGPU_VCN_SMU_VERSION_INFO_FLAG;
280 if (amdgpu_ip_version(adev, UVD_HWIP, 0) == IP_VERSION(3, 1, 2))
281 fw_shared->smu_interface_info.smu_interface_type = 2;
282 else if (amdgpu_ip_version(adev, UVD_HWIP, 0) ==
283 IP_VERSION(3, 1, 1))
284 fw_shared->smu_interface_info.smu_interface_type = 1;
285
286 if (amdgpu_vcnfw_log)
287 amdgpu_vcn_fwlog_init(&adev->vcn.inst[i]);
288
289 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
290 adev->vcn.inst[i].pause_dpg_mode = vcn_v3_0_pause_dpg_mode;
291 adev->vcn.inst[i].reset = vcn_v3_0_reset;
292 }
293
294 adev->vcn.supported_reset =
295 amdgpu_get_soft_full_reset_mask(&adev->vcn.inst[0].ring_enc[0]);
296 if (!amdgpu_sriov_vf(adev))
297 adev->vcn.supported_reset |= AMDGPU_RESET_TYPE_PER_QUEUE;
298
299 if (amdgpu_sriov_vf(adev)) {
300 r = amdgpu_virt_alloc_mm_table(adev);
301 if (r)
302 return r;
303 }
304
305 r = amdgpu_vcn_reg_dump_init(adev, vcn_reg_list_3_0, ARRAY_SIZE(vcn_reg_list_3_0));
306 if (r)
307 return r;
308
309 r = amdgpu_vcn_sysfs_reset_mask_init(adev);
310 if (r)
311 return r;
312
313 return 0;
314 }
315
316 /**
317 * vcn_v3_0_sw_fini - sw fini for VCN block
318 *
319 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
320 *
321 * VCN suspend and free up sw allocation
322 */
vcn_v3_0_sw_fini(struct amdgpu_ip_block * ip_block)323 static int vcn_v3_0_sw_fini(struct amdgpu_ip_block *ip_block)
324 {
325 struct amdgpu_device *adev = ip_block->adev;
326 int i, r, idx;
327
328 if (drm_dev_enter(adev_to_drm(adev), &idx)) {
329 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
330 struct amdgpu_fw_shared *fw_shared;
331
332 if (adev->vcn.harvest_config & (1 << i))
333 continue;
334 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
335 fw_shared->present_flag_0 = 0;
336 fw_shared->sw_ring.is_enabled = false;
337 }
338
339 drm_dev_exit(idx);
340 }
341
342 if (amdgpu_sriov_vf(adev))
343 amdgpu_virt_free_mm_table(adev);
344
345 amdgpu_vcn_sysfs_reset_mask_fini(adev);
346
347 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
348 r = amdgpu_vcn_suspend(adev, i);
349 if (r)
350 return r;
351
352 amdgpu_vcn_sw_fini(adev, i);
353 }
354
355 return 0;
356 }
357
358 /**
359 * vcn_v3_0_hw_init - start and test VCN block
360 *
361 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
362 *
363 * Initialize the hardware, boot up the VCPU and do some testing
364 */
vcn_v3_0_hw_init(struct amdgpu_ip_block * ip_block)365 static int vcn_v3_0_hw_init(struct amdgpu_ip_block *ip_block)
366 {
367 struct amdgpu_device *adev = ip_block->adev;
368 struct amdgpu_ring *ring;
369 int i, j, r;
370
371 if (amdgpu_sriov_vf(adev)) {
372 r = vcn_v3_0_start_sriov(adev);
373 if (r)
374 return r;
375
376 /* initialize VCN dec and enc ring buffers */
377 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
378 if (adev->vcn.harvest_config & (1 << i))
379 continue;
380
381 ring = &adev->vcn.inst[i].ring_dec;
382 if (amdgpu_vcn_is_disabled_vcn(adev, VCN_DECODE_RING, i)) {
383 ring->sched.ready = false;
384 ring->no_scheduler = true;
385 dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
386 } else {
387 ring->wptr = 0;
388 ring->wptr_old = 0;
389 vcn_v3_0_dec_ring_set_wptr(ring);
390 ring->sched.ready = true;
391 }
392
393 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
394 ring = &adev->vcn.inst[i].ring_enc[j];
395 if (amdgpu_vcn_is_disabled_vcn(adev, VCN_ENCODE_RING, i)) {
396 ring->sched.ready = false;
397 ring->no_scheduler = true;
398 dev_info(adev->dev, "ring %s is disabled by hypervisor\n", ring->name);
399 } else {
400 ring->wptr = 0;
401 ring->wptr_old = 0;
402 vcn_v3_0_enc_ring_set_wptr(ring);
403 ring->sched.ready = true;
404 }
405 }
406 }
407 } else {
408 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
409 if (adev->vcn.harvest_config & (1 << i))
410 continue;
411
412 ring = &adev->vcn.inst[i].ring_dec;
413
414 adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
415 ring->doorbell_index, i);
416
417 r = amdgpu_ring_test_helper(ring);
418 if (r)
419 return r;
420
421 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
422 ring = &adev->vcn.inst[i].ring_enc[j];
423 r = amdgpu_ring_test_helper(ring);
424 if (r)
425 return r;
426 }
427 }
428 }
429
430 return 0;
431 }
432
433 /**
434 * vcn_v3_0_hw_fini - stop the hardware block
435 *
436 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
437 *
438 * Stop the VCN block, mark ring as not ready any more
439 */
vcn_v3_0_hw_fini(struct amdgpu_ip_block * ip_block)440 static int vcn_v3_0_hw_fini(struct amdgpu_ip_block *ip_block)
441 {
442 struct amdgpu_device *adev = ip_block->adev;
443 int i;
444
445 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
446 struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
447
448 if (adev->vcn.harvest_config & (1 << i))
449 continue;
450
451 cancel_delayed_work_sync(&vinst->idle_work);
452
453 if (!amdgpu_sriov_vf(adev)) {
454 if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
455 (vinst->cur_state != AMD_PG_STATE_GATE &&
456 RREG32_SOC15(VCN, i, mmUVD_STATUS))) {
457 vinst->set_pg_state(vinst, AMD_PG_STATE_GATE);
458 }
459 }
460 }
461
462 return 0;
463 }
464
465 /**
466 * vcn_v3_0_suspend - suspend VCN block
467 *
468 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
469 *
470 * HW fini and suspend VCN block
471 */
vcn_v3_0_suspend(struct amdgpu_ip_block * ip_block)472 static int vcn_v3_0_suspend(struct amdgpu_ip_block *ip_block)
473 {
474 struct amdgpu_device *adev = ip_block->adev;
475 int r, i;
476
477 r = vcn_v3_0_hw_fini(ip_block);
478 if (r)
479 return r;
480
481 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
482 r = amdgpu_vcn_suspend(ip_block->adev, i);
483 if (r)
484 return r;
485 }
486
487 return 0;
488 }
489
490 /**
491 * vcn_v3_0_resume - resume VCN block
492 *
493 * @ip_block: Pointer to the amdgpu_ip_block for this hw instance.
494 *
495 * Resume firmware and hw init VCN block
496 */
vcn_v3_0_resume(struct amdgpu_ip_block * ip_block)497 static int vcn_v3_0_resume(struct amdgpu_ip_block *ip_block)
498 {
499 struct amdgpu_device *adev = ip_block->adev;
500 int r, i;
501
502 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
503 r = amdgpu_vcn_resume(ip_block->adev, i);
504 if (r)
505 return r;
506 }
507
508 r = vcn_v3_0_hw_init(ip_block);
509
510 return r;
511 }
512
513 /**
514 * vcn_v3_0_mc_resume - memory controller programming
515 *
516 * @vinst: VCN instance
517 *
518 * Let the VCN memory controller know it's offsets
519 */
vcn_v3_0_mc_resume(struct amdgpu_vcn_inst * vinst)520 static void vcn_v3_0_mc_resume(struct amdgpu_vcn_inst *vinst)
521 {
522 struct amdgpu_device *adev = vinst->adev;
523 int inst = vinst->inst;
524 uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst].fw->size + 4);
525 uint32_t offset;
526
527 /* cache window 0: fw */
528 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
529 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
530 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_lo));
531 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
532 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst].tmr_mc_addr_hi));
533 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0, 0);
534 offset = 0;
535 } else {
536 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
537 lower_32_bits(adev->vcn.inst[inst].gpu_addr));
538 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
539 upper_32_bits(adev->vcn.inst[inst].gpu_addr));
540 offset = size;
541 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET0,
542 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
543 }
544 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE0, size);
545
546 /* cache window 1: stack */
547 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
548 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
549 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
550 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset));
551 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET1, 0);
552 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
553
554 /* cache window 2: context */
555 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
556 lower_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
557 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
558 upper_32_bits(adev->vcn.inst[inst].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
559 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_OFFSET2, 0);
560 WREG32_SOC15(VCN, inst, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
561
562 /* non-cache window */
563 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
564 lower_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
565 WREG32_SOC15(VCN, inst, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
566 upper_32_bits(adev->vcn.inst[inst].fw_shared.gpu_addr));
567 WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
568 WREG32_SOC15(VCN, inst, mmUVD_VCPU_NONCACHE_SIZE0,
569 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
570 }
571
vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst * vinst,bool indirect)572 static void vcn_v3_0_mc_resume_dpg_mode(struct amdgpu_vcn_inst *vinst,
573 bool indirect)
574 {
575 struct amdgpu_device *adev = vinst->adev;
576 int inst_idx = vinst->inst;
577 uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[inst_idx].fw->size + 4);
578 uint32_t offset;
579
580 /* cache window 0: fw */
581 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
582 if (!indirect) {
583 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
584 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
585 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
586 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
587 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
588 (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
589 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
590 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
591 } else {
592 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
593 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
594 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
595 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
596 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
597 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
598 }
599 offset = 0;
600 } else {
601 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
602 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
603 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
604 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
605 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
606 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
607 offset = size;
608 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
609 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET0),
610 AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
611 }
612
613 if (!indirect)
614 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
615 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
616 else
617 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
618 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
619
620 /* cache window 1: stack */
621 if (!indirect) {
622 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
623 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
624 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
625 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
626 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
627 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
628 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
629 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
630 } else {
631 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
632 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
633 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
634 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
635 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
636 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
637 }
638 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
639 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
640
641 /* cache window 2: context */
642 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
643 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
644 lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
645 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
646 VCN, inst_idx, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
647 upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
648 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
649 VCN, inst_idx, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
650 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
651 VCN, inst_idx, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
652
653 /* non-cache window */
654 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
655 VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
656 lower_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
657 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
658 VCN, inst_idx, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
659 upper_32_bits(adev->vcn.inst[inst_idx].fw_shared.gpu_addr), 0, indirect);
660 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
661 VCN, inst_idx, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
662 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
663 VCN, inst_idx, mmUVD_VCPU_NONCACHE_SIZE0),
664 AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
665
666 /* VCN global tiling registers */
667 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
668 UVD, inst_idx, mmUVD_GFX10_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
669 }
670
vcn_v3_0_disable_static_power_gating(struct amdgpu_vcn_inst * vinst)671 static void vcn_v3_0_disable_static_power_gating(struct amdgpu_vcn_inst *vinst)
672 {
673 struct amdgpu_device *adev = vinst->adev;
674 int inst = vinst->inst;
675 uint32_t data = 0;
676
677 if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
678 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
679 | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
680 | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
681 | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
682 | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
683 | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
684 | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
685 | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
686 | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
687 | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
688 | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
689 | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
690 | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
691 | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
692
693 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
694 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS,
695 UVD_PGFSM_STATUS__UVDM_UVDU_UVDLM_PWR_ON_3_0, 0x3F3FFFFF);
696 } else {
697 data = (1 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
698 | 1 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
699 | 1 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
700 | 1 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
701 | 1 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
702 | 1 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
703 | 1 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
704 | 1 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
705 | 1 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
706 | 1 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
707 | 1 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
708 | 1 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
709 | 1 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
710 | 1 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
711 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
712 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, 0, 0x3F3FFFFF);
713 }
714
715 data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
716 data &= ~0x103;
717 if (adev->pg_flags & AMD_PG_SUPPORT_VCN)
718 data |= UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON |
719 UVD_POWER_STATUS__UVD_PG_EN_MASK;
720
721 WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
722 }
723
vcn_v3_0_enable_static_power_gating(struct amdgpu_vcn_inst * vinst)724 static void vcn_v3_0_enable_static_power_gating(struct amdgpu_vcn_inst *vinst)
725 {
726 struct amdgpu_device *adev = vinst->adev;
727 int inst = vinst->inst;
728 uint32_t data;
729
730 if (adev->pg_flags & AMD_PG_SUPPORT_VCN) {
731 /* Before power off, this indicator has to be turned on */
732 data = RREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS);
733 data &= ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK;
734 data |= UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF;
735 WREG32_SOC15(VCN, inst, mmUVD_POWER_STATUS, data);
736
737 data = (2 << UVD_PGFSM_CONFIG__UVDM_PWR_CONFIG__SHIFT
738 | 2 << UVD_PGFSM_CONFIG__UVDU_PWR_CONFIG__SHIFT
739 | 2 << UVD_PGFSM_CONFIG__UVDF_PWR_CONFIG__SHIFT
740 | 2 << UVD_PGFSM_CONFIG__UVDC_PWR_CONFIG__SHIFT
741 | 2 << UVD_PGFSM_CONFIG__UVDB_PWR_CONFIG__SHIFT
742 | 2 << UVD_PGFSM_CONFIG__UVDIRL_PWR_CONFIG__SHIFT
743 | 2 << UVD_PGFSM_CONFIG__UVDLM_PWR_CONFIG__SHIFT
744 | 2 << UVD_PGFSM_CONFIG__UVDTD_PWR_CONFIG__SHIFT
745 | 2 << UVD_PGFSM_CONFIG__UVDTE_PWR_CONFIG__SHIFT
746 | 2 << UVD_PGFSM_CONFIG__UVDE_PWR_CONFIG__SHIFT
747 | 2 << UVD_PGFSM_CONFIG__UVDAB_PWR_CONFIG__SHIFT
748 | 2 << UVD_PGFSM_CONFIG__UVDATD_PWR_CONFIG__SHIFT
749 | 2 << UVD_PGFSM_CONFIG__UVDNA_PWR_CONFIG__SHIFT
750 | 2 << UVD_PGFSM_CONFIG__UVDNB_PWR_CONFIG__SHIFT);
751 WREG32_SOC15(VCN, inst, mmUVD_PGFSM_CONFIG, data);
752
753 data = (2 << UVD_PGFSM_STATUS__UVDM_PWR_STATUS__SHIFT
754 | 2 << UVD_PGFSM_STATUS__UVDU_PWR_STATUS__SHIFT
755 | 2 << UVD_PGFSM_STATUS__UVDF_PWR_STATUS__SHIFT
756 | 2 << UVD_PGFSM_STATUS__UVDC_PWR_STATUS__SHIFT
757 | 2 << UVD_PGFSM_STATUS__UVDB_PWR_STATUS__SHIFT
758 | 2 << UVD_PGFSM_STATUS__UVDIRL_PWR_STATUS__SHIFT
759 | 2 << UVD_PGFSM_STATUS__UVDLM_PWR_STATUS__SHIFT
760 | 2 << UVD_PGFSM_STATUS__UVDTD_PWR_STATUS__SHIFT
761 | 2 << UVD_PGFSM_STATUS__UVDTE_PWR_STATUS__SHIFT
762 | 2 << UVD_PGFSM_STATUS__UVDE_PWR_STATUS__SHIFT
763 | 2 << UVD_PGFSM_STATUS__UVDAB_PWR_STATUS__SHIFT
764 | 2 << UVD_PGFSM_STATUS__UVDATD_PWR_STATUS__SHIFT
765 | 2 << UVD_PGFSM_STATUS__UVDNA_PWR_STATUS__SHIFT
766 | 2 << UVD_PGFSM_STATUS__UVDNB_PWR_STATUS__SHIFT);
767 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_PGFSM_STATUS, data, 0x3F3FFFFF);
768 }
769 }
770
771 /**
772 * vcn_v3_0_disable_clock_gating - disable VCN clock gating
773 *
774 * @vinst: Pointer to the VCN instance structure
775 *
776 * Disable clock gating for VCN block
777 */
vcn_v3_0_disable_clock_gating(struct amdgpu_vcn_inst * vinst)778 static void vcn_v3_0_disable_clock_gating(struct amdgpu_vcn_inst *vinst)
779 {
780 struct amdgpu_device *adev = vinst->adev;
781 int inst = vinst->inst;
782 uint32_t data;
783
784 /* VCN disable CGC */
785 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
786 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
787 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
788 else
789 data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
790 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
791 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
792 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
793
794 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_GATE);
795 data &= ~(UVD_CGC_GATE__SYS_MASK
796 | UVD_CGC_GATE__UDEC_MASK
797 | UVD_CGC_GATE__MPEG2_MASK
798 | UVD_CGC_GATE__REGS_MASK
799 | UVD_CGC_GATE__RBC_MASK
800 | UVD_CGC_GATE__LMI_MC_MASK
801 | UVD_CGC_GATE__LMI_UMC_MASK
802 | UVD_CGC_GATE__IDCT_MASK
803 | UVD_CGC_GATE__MPRD_MASK
804 | UVD_CGC_GATE__MPC_MASK
805 | UVD_CGC_GATE__LBSI_MASK
806 | UVD_CGC_GATE__LRBBM_MASK
807 | UVD_CGC_GATE__UDEC_RE_MASK
808 | UVD_CGC_GATE__UDEC_CM_MASK
809 | UVD_CGC_GATE__UDEC_IT_MASK
810 | UVD_CGC_GATE__UDEC_DB_MASK
811 | UVD_CGC_GATE__UDEC_MP_MASK
812 | UVD_CGC_GATE__WCB_MASK
813 | UVD_CGC_GATE__VCPU_MASK
814 | UVD_CGC_GATE__MMSCH_MASK);
815
816 WREG32_SOC15(VCN, inst, mmUVD_CGC_GATE, data);
817
818 SOC15_WAIT_ON_RREG(VCN, inst, mmUVD_CGC_GATE, 0, 0xFFFFFFFF);
819
820 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
821 data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
822 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
823 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
824 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
825 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
826 | UVD_CGC_CTRL__SYS_MODE_MASK
827 | UVD_CGC_CTRL__UDEC_MODE_MASK
828 | UVD_CGC_CTRL__MPEG2_MODE_MASK
829 | UVD_CGC_CTRL__REGS_MODE_MASK
830 | UVD_CGC_CTRL__RBC_MODE_MASK
831 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
832 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
833 | UVD_CGC_CTRL__IDCT_MODE_MASK
834 | UVD_CGC_CTRL__MPRD_MODE_MASK
835 | UVD_CGC_CTRL__MPC_MODE_MASK
836 | UVD_CGC_CTRL__LBSI_MODE_MASK
837 | UVD_CGC_CTRL__LRBBM_MODE_MASK
838 | UVD_CGC_CTRL__WCB_MODE_MASK
839 | UVD_CGC_CTRL__VCPU_MODE_MASK
840 | UVD_CGC_CTRL__MMSCH_MODE_MASK);
841 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
842
843 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE);
844 data |= (UVD_SUVD_CGC_GATE__SRE_MASK
845 | UVD_SUVD_CGC_GATE__SIT_MASK
846 | UVD_SUVD_CGC_GATE__SMP_MASK
847 | UVD_SUVD_CGC_GATE__SCM_MASK
848 | UVD_SUVD_CGC_GATE__SDB_MASK
849 | UVD_SUVD_CGC_GATE__SRE_H264_MASK
850 | UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
851 | UVD_SUVD_CGC_GATE__SIT_H264_MASK
852 | UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
853 | UVD_SUVD_CGC_GATE__SCM_H264_MASK
854 | UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
855 | UVD_SUVD_CGC_GATE__SDB_H264_MASK
856 | UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
857 | UVD_SUVD_CGC_GATE__SCLR_MASK
858 | UVD_SUVD_CGC_GATE__ENT_MASK
859 | UVD_SUVD_CGC_GATE__IME_MASK
860 | UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
861 | UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
862 | UVD_SUVD_CGC_GATE__SITE_MASK
863 | UVD_SUVD_CGC_GATE__SRE_VP9_MASK
864 | UVD_SUVD_CGC_GATE__SCM_VP9_MASK
865 | UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
866 | UVD_SUVD_CGC_GATE__SDB_VP9_MASK
867 | UVD_SUVD_CGC_GATE__IME_HEVC_MASK
868 | UVD_SUVD_CGC_GATE__EFC_MASK
869 | UVD_SUVD_CGC_GATE__SAOE_MASK
870 | UVD_SUVD_CGC_GATE__SRE_AV1_MASK
871 | UVD_SUVD_CGC_GATE__FBC_PCLK_MASK
872 | UVD_SUVD_CGC_GATE__FBC_CCLK_MASK
873 | UVD_SUVD_CGC_GATE__SCM_AV1_MASK
874 | UVD_SUVD_CGC_GATE__SMPA_MASK);
875 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE, data);
876
877 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2);
878 data |= (UVD_SUVD_CGC_GATE2__MPBE0_MASK
879 | UVD_SUVD_CGC_GATE2__MPBE1_MASK
880 | UVD_SUVD_CGC_GATE2__SIT_AV1_MASK
881 | UVD_SUVD_CGC_GATE2__SDB_AV1_MASK
882 | UVD_SUVD_CGC_GATE2__MPC1_MASK);
883 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_GATE2, data);
884
885 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
886 data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
887 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
888 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
889 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
890 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
891 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
892 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
893 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
894 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
895 | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
896 | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
897 | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
898 | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
899 | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
900 | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
901 | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
902 | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
903 | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
904 | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
905 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
906 }
907
vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst * vinst,uint8_t sram_sel,uint8_t indirect)908 static void vcn_v3_0_clock_gating_dpg_mode(struct amdgpu_vcn_inst *vinst,
909 uint8_t sram_sel,
910 uint8_t indirect)
911 {
912 struct amdgpu_device *adev = vinst->adev;
913 int inst_idx = vinst->inst;
914 uint32_t reg_data = 0;
915
916 /* enable sw clock gating control */
917 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
918 reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
919 else
920 reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
921 reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
922 reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
923 reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
924 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
925 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
926 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
927 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
928 UVD_CGC_CTRL__SYS_MODE_MASK |
929 UVD_CGC_CTRL__UDEC_MODE_MASK |
930 UVD_CGC_CTRL__MPEG2_MODE_MASK |
931 UVD_CGC_CTRL__REGS_MODE_MASK |
932 UVD_CGC_CTRL__RBC_MODE_MASK |
933 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
934 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
935 UVD_CGC_CTRL__IDCT_MODE_MASK |
936 UVD_CGC_CTRL__MPRD_MODE_MASK |
937 UVD_CGC_CTRL__MPC_MODE_MASK |
938 UVD_CGC_CTRL__LBSI_MODE_MASK |
939 UVD_CGC_CTRL__LRBBM_MODE_MASK |
940 UVD_CGC_CTRL__WCB_MODE_MASK |
941 UVD_CGC_CTRL__VCPU_MODE_MASK |
942 UVD_CGC_CTRL__MMSCH_MODE_MASK);
943 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
944 VCN, inst_idx, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
945
946 /* turn off clock gating */
947 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
948 VCN, inst_idx, mmUVD_CGC_GATE), 0, sram_sel, indirect);
949
950 /* turn on SUVD clock gating */
951 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
952 VCN, inst_idx, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
953
954 /* turn on sw mode in UVD_SUVD_CGC_CTRL */
955 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
956 VCN, inst_idx, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
957 }
958
959 /**
960 * vcn_v3_0_enable_clock_gating - enable VCN clock gating
961 *
962 * @vinst: Pointer to the VCN instance structure
963 *
964 * Enable clock gating for VCN block
965 */
vcn_v3_0_enable_clock_gating(struct amdgpu_vcn_inst * vinst)966 static void vcn_v3_0_enable_clock_gating(struct amdgpu_vcn_inst *vinst)
967 {
968 struct amdgpu_device *adev = vinst->adev;
969 int inst = vinst->inst;
970 uint32_t data;
971
972 /* enable VCN CGC */
973 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
974 if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
975 data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
976 else
977 data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
978 data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
979 data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
980 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
981
982 data = RREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL);
983 data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
984 | UVD_CGC_CTRL__UDEC_CM_MODE_MASK
985 | UVD_CGC_CTRL__UDEC_IT_MODE_MASK
986 | UVD_CGC_CTRL__UDEC_DB_MODE_MASK
987 | UVD_CGC_CTRL__UDEC_MP_MODE_MASK
988 | UVD_CGC_CTRL__SYS_MODE_MASK
989 | UVD_CGC_CTRL__UDEC_MODE_MASK
990 | UVD_CGC_CTRL__MPEG2_MODE_MASK
991 | UVD_CGC_CTRL__REGS_MODE_MASK
992 | UVD_CGC_CTRL__RBC_MODE_MASK
993 | UVD_CGC_CTRL__LMI_MC_MODE_MASK
994 | UVD_CGC_CTRL__LMI_UMC_MODE_MASK
995 | UVD_CGC_CTRL__IDCT_MODE_MASK
996 | UVD_CGC_CTRL__MPRD_MODE_MASK
997 | UVD_CGC_CTRL__MPC_MODE_MASK
998 | UVD_CGC_CTRL__LBSI_MODE_MASK
999 | UVD_CGC_CTRL__LRBBM_MODE_MASK
1000 | UVD_CGC_CTRL__WCB_MODE_MASK
1001 | UVD_CGC_CTRL__VCPU_MODE_MASK
1002 | UVD_CGC_CTRL__MMSCH_MODE_MASK);
1003 WREG32_SOC15(VCN, inst, mmUVD_CGC_CTRL, data);
1004
1005 data = RREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL);
1006 data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
1007 | UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
1008 | UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
1009 | UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
1010 | UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
1011 | UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
1012 | UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
1013 | UVD_SUVD_CGC_CTRL__IME_MODE_MASK
1014 | UVD_SUVD_CGC_CTRL__SITE_MODE_MASK
1015 | UVD_SUVD_CGC_CTRL__EFC_MODE_MASK
1016 | UVD_SUVD_CGC_CTRL__SAOE_MODE_MASK
1017 | UVD_SUVD_CGC_CTRL__SMPA_MODE_MASK
1018 | UVD_SUVD_CGC_CTRL__MPBE0_MODE_MASK
1019 | UVD_SUVD_CGC_CTRL__MPBE1_MODE_MASK
1020 | UVD_SUVD_CGC_CTRL__SIT_AV1_MODE_MASK
1021 | UVD_SUVD_CGC_CTRL__SDB_AV1_MODE_MASK
1022 | UVD_SUVD_CGC_CTRL__MPC1_MODE_MASK
1023 | UVD_SUVD_CGC_CTRL__FBC_PCLK_MASK
1024 | UVD_SUVD_CGC_CTRL__FBC_CCLK_MASK);
1025 WREG32_SOC15(VCN, inst, mmUVD_SUVD_CGC_CTRL, data);
1026 }
1027
vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst * vinst,bool indirect)1028 static int vcn_v3_0_start_dpg_mode(struct amdgpu_vcn_inst *vinst, bool indirect)
1029 {
1030 struct amdgpu_device *adev = vinst->adev;
1031 int inst_idx = vinst->inst;
1032 struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
1033 struct amdgpu_ring *ring;
1034 uint32_t rb_bufsz, tmp;
1035 int ret;
1036
1037 /* disable register anti-hang mechanism */
1038 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
1039 ~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1040 /* enable dynamic power gating mode */
1041 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
1042 tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
1043 tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
1044 WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
1045
1046 if (indirect)
1047 adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
1048
1049 /* enable clock gating */
1050 vcn_v3_0_clock_gating_dpg_mode(vinst, 0, indirect);
1051
1052 /* enable VCPU clock */
1053 tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
1054 tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
1055 tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
1056 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1057 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
1058
1059 /* disable master interupt */
1060 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1061 VCN, inst_idx, mmUVD_MASTINT_EN), 0, 0, indirect);
1062
1063 /* setup mmUVD_LMI_CTRL */
1064 tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1065 UVD_LMI_CTRL__REQ_MODE_MASK |
1066 UVD_LMI_CTRL__CRC_RESET_MASK |
1067 UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1068 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1069 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
1070 (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
1071 0x00100000L);
1072 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1073 VCN, inst_idx, mmUVD_LMI_CTRL), tmp, 0, indirect);
1074
1075 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1076 VCN, inst_idx, mmUVD_MPC_CNTL),
1077 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);
1078
1079 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1080 VCN, inst_idx, mmUVD_MPC_SET_MUXA0),
1081 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1082 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1083 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1084 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);
1085
1086 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1087 VCN, inst_idx, mmUVD_MPC_SET_MUXB0),
1088 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1089 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1090 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1091 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);
1092
1093 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1094 VCN, inst_idx, mmUVD_MPC_SET_MUX),
1095 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1096 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1097 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);
1098
1099 vcn_v3_0_mc_resume_dpg_mode(vinst, indirect);
1100
1101 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1102 VCN, inst_idx, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
1103 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1104 VCN, inst_idx, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
1105
1106 /* enable LMI MC and UMC channels */
1107 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1108 VCN, inst_idx, mmUVD_LMI_CTRL2), 0, 0, indirect);
1109
1110 /* unblock VCPU register access */
1111 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1112 VCN, inst_idx, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
1113
1114 tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
1115 tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
1116 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1117 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
1118
1119 /* enable master interrupt */
1120 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1121 VCN, inst_idx, mmUVD_MASTINT_EN),
1122 UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);
1123
1124 /* add nop to workaround PSP size check */
1125 WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
1126 VCN, inst_idx, mmUVD_VCPU_CNTL), tmp, 0, indirect);
1127
1128 if (indirect) {
1129 ret = amdgpu_vcn_psp_update_sram(adev, inst_idx, 0);
1130 if (ret) {
1131 dev_err(adev->dev, "vcn sram load failed %d\n", ret);
1132 return ret;
1133 }
1134 }
1135
1136 ring = &adev->vcn.inst[inst_idx].ring_dec;
1137 /* force RBC into idle state */
1138 rb_bufsz = order_base_2(ring->ring_size);
1139 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1140 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1141 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1142 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1143 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1144 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
1145
1146 /* Stall DPG before WPTR/RPTR reset */
1147 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1148 UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
1149 ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1150 fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1151
1152 /* set the write pointer delay */
1153 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
1154
1155 /* set the wb address */
1156 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
1157 (upper_32_bits(ring->gpu_addr) >> 2));
1158
1159 /* programm the RB_BASE for ring buffer */
1160 WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1161 lower_32_bits(ring->gpu_addr));
1162 WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1163 upper_32_bits(ring->gpu_addr));
1164
1165 /* Initialize the ring buffer's read and write pointers */
1166 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
1167
1168 WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
1169
1170 ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
1171 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
1172 lower_32_bits(ring->wptr));
1173
1174 /* Reset FW shared memory RBC WPTR/RPTR */
1175 fw_shared->rb.rptr = 0;
1176 fw_shared->rb.wptr = lower_32_bits(ring->wptr);
1177
1178 /*resetting done, fw can check RB ring */
1179 fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1180
1181 /* Unstall DPG */
1182 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1183 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1184
1185 /* Keeping one read-back to ensure all register writes are done,
1186 * otherwise it may introduce race conditions.
1187 */
1188 RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
1189
1190 return 0;
1191 }
1192
vcn_v3_0_start(struct amdgpu_vcn_inst * vinst)1193 static int vcn_v3_0_start(struct amdgpu_vcn_inst *vinst)
1194 {
1195 struct amdgpu_device *adev = vinst->adev;
1196 int i = vinst->inst;
1197 struct amdgpu_fw_shared *fw_shared;
1198 struct amdgpu_ring *ring;
1199 uint32_t rb_bufsz, tmp;
1200 int j, k, r;
1201
1202 if (adev->vcn.harvest_config & (1 << i))
1203 return 0;
1204
1205 if (adev->pm.dpm_enabled)
1206 amdgpu_dpm_enable_vcn(adev, true, i);
1207
1208 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
1209 return vcn_v3_0_start_dpg_mode(vinst, vinst->indirect_sram);
1210
1211 /* disable VCN power gating */
1212 vcn_v3_0_disable_static_power_gating(vinst);
1213
1214 /* set VCN status busy */
1215 tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
1216 WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
1217
1218 /* SW clock gating */
1219 vcn_v3_0_disable_clock_gating(vinst);
1220
1221 /* enable VCPU clock */
1222 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1223 UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);
1224
1225 /* disable master interrupt */
1226 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
1227 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1228
1229 /* enable LMI MC and UMC channels */
1230 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
1231 ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1232
1233 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
1234 tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1235 tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1236 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
1237
1238 /* setup mmUVD_LMI_CTRL */
1239 tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
1240 WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp |
1241 UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
1242 UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
1243 UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
1244 UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);
1245
1246 /* setup mmUVD_MPC_CNTL */
1247 tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
1248 tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
1249 tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
1250 WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);
1251
1252 /* setup UVD_MPC_SET_MUXA0 */
1253 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
1254 ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
1255 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
1256 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
1257 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));
1258
1259 /* setup UVD_MPC_SET_MUXB0 */
1260 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
1261 ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
1262 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
1263 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
1264 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));
1265
1266 /* setup mmUVD_MPC_SET_MUX */
1267 WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
1268 ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
1269 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
1270 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
1271
1272 vcn_v3_0_mc_resume(vinst);
1273
1274 /* VCN global tiling registers */
1275 WREG32_SOC15(VCN, i, mmUVD_GFX10_ADDR_CONFIG,
1276 adev->gfx.config.gb_addr_config);
1277
1278 /* unblock VCPU register access */
1279 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
1280 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1281
1282 /* release VCPU reset to boot */
1283 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1284 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1285
1286 for (j = 0; j < 10; ++j) {
1287 uint32_t status;
1288
1289 for (k = 0; k < 100; ++k) {
1290 status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
1291 if (status & 2)
1292 break;
1293 mdelay(10);
1294 }
1295 r = 0;
1296 if (status & 2)
1297 break;
1298
1299 DRM_ERROR("VCN[%d] decode not responding, trying to reset the VCPU!!!\n", i);
1300 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1301 UVD_VCPU_CNTL__BLK_RST_MASK,
1302 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1303 mdelay(10);
1304 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1305 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1306
1307 mdelay(10);
1308 r = -1;
1309 }
1310
1311 if (r) {
1312 DRM_ERROR("VCN[%d] decode not responding, giving up!!!\n", i);
1313 return r;
1314 }
1315
1316 /* enable master interrupt */
1317 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
1318 UVD_MASTINT_EN__VCPU_EN_MASK,
1319 ~UVD_MASTINT_EN__VCPU_EN_MASK);
1320
1321 /* clear the busy bit of VCN_STATUS */
1322 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
1323 ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));
1324
1325 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
1326
1327 ring = &adev->vcn.inst[i].ring_dec;
1328 /* force RBC into idle state */
1329 rb_bufsz = order_base_2(ring->ring_size);
1330 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
1331 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1332 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1333 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1334 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1335 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
1336
1337 fw_shared = adev->vcn.inst[i].fw_shared.cpu_addr;
1338 fw_shared->multi_queue.decode_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1339
1340 /* programm the RB_BASE for ring buffer */
1341 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1342 lower_32_bits(ring->gpu_addr));
1343 WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1344 upper_32_bits(ring->gpu_addr));
1345
1346 /* Initialize the ring buffer's read and write pointers */
1347 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
1348
1349 WREG32_SOC15(VCN, i, mmUVD_SCRATCH2, 0);
1350 ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
1351 WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
1352 lower_32_bits(ring->wptr));
1353 fw_shared->rb.wptr = lower_32_bits(ring->wptr);
1354 fw_shared->multi_queue.decode_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1355
1356 if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
1357 IP_VERSION(3, 0, 33)) {
1358 fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1359 ring = &adev->vcn.inst[i].ring_enc[0];
1360 WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1361 WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1362 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
1363 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1364 WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
1365 fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1366
1367 fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1368 ring = &adev->vcn.inst[i].ring_enc[1];
1369 WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1370 WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1371 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1372 WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1373 WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
1374 fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1375 }
1376
1377 /* Keeping one read-back to ensure all register writes are done,
1378 * otherwise it may introduce race conditions.
1379 */
1380 RREG32_SOC15(VCN, i, mmUVD_STATUS);
1381
1382 return 0;
1383 }
1384
vcn_v3_0_start_sriov(struct amdgpu_device * adev)1385 static int vcn_v3_0_start_sriov(struct amdgpu_device *adev)
1386 {
1387 int i, j;
1388 struct amdgpu_ring *ring;
1389 uint64_t cache_addr;
1390 uint64_t rb_addr;
1391 uint64_t ctx_addr;
1392 uint32_t param, resp, expected;
1393 uint32_t offset, cache_size;
1394 uint32_t tmp, timeout;
1395
1396 struct amdgpu_mm_table *table = &adev->virt.mm_table;
1397 uint32_t *table_loc;
1398 uint32_t table_size;
1399 uint32_t size, size_dw;
1400
1401 struct mmsch_v3_0_cmd_direct_write
1402 direct_wt = { {0} };
1403 struct mmsch_v3_0_cmd_direct_read_modify_write
1404 direct_rd_mod_wt = { {0} };
1405 struct mmsch_v3_0_cmd_end end = { {0} };
1406 struct mmsch_v3_0_init_header header;
1407
1408 direct_wt.cmd_header.command_type =
1409 MMSCH_COMMAND__DIRECT_REG_WRITE;
1410 direct_rd_mod_wt.cmd_header.command_type =
1411 MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
1412 end.cmd_header.command_type =
1413 MMSCH_COMMAND__END;
1414
1415 header.version = MMSCH_VERSION;
1416 header.total_size = sizeof(struct mmsch_v3_0_init_header) >> 2;
1417 for (i = 0; i < MMSCH_V3_0_VCN_INSTANCES; i++) {
1418 header.inst[i].init_status = 0;
1419 header.inst[i].table_offset = 0;
1420 header.inst[i].table_size = 0;
1421 }
1422
1423 table_loc = (uint32_t *)table->cpu_addr;
1424 table_loc += header.total_size;
1425 for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
1426 if (adev->vcn.harvest_config & (1 << i))
1427 continue;
1428
1429 table_size = 0;
1430
1431 MMSCH_V3_0_INSERT_DIRECT_RD_MOD_WT(SOC15_REG_OFFSET(VCN, i,
1432 mmUVD_STATUS),
1433 ~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);
1434
1435 cache_size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.inst[i].fw->size + 4);
1436
1437 if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
1438 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1439 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1440 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
1441 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1442 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1443 adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
1444 offset = 0;
1445 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1446 mmUVD_VCPU_CACHE_OFFSET0),
1447 0);
1448 } else {
1449 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1450 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
1451 lower_32_bits(adev->vcn.inst[i].gpu_addr));
1452 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1453 mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
1454 upper_32_bits(adev->vcn.inst[i].gpu_addr));
1455 offset = cache_size;
1456 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1457 mmUVD_VCPU_CACHE_OFFSET0),
1458 AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
1459 }
1460
1461 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1462 mmUVD_VCPU_CACHE_SIZE0),
1463 cache_size);
1464
1465 cache_addr = adev->vcn.inst[i].gpu_addr + offset;
1466 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1467 mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
1468 lower_32_bits(cache_addr));
1469 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1470 mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
1471 upper_32_bits(cache_addr));
1472 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1473 mmUVD_VCPU_CACHE_OFFSET1),
1474 0);
1475 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1476 mmUVD_VCPU_CACHE_SIZE1),
1477 AMDGPU_VCN_STACK_SIZE);
1478
1479 cache_addr = adev->vcn.inst[i].gpu_addr + offset +
1480 AMDGPU_VCN_STACK_SIZE;
1481 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1482 mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
1483 lower_32_bits(cache_addr));
1484 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1485 mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
1486 upper_32_bits(cache_addr));
1487 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1488 mmUVD_VCPU_CACHE_OFFSET2),
1489 0);
1490 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1491 mmUVD_VCPU_CACHE_SIZE2),
1492 AMDGPU_VCN_CONTEXT_SIZE);
1493
1494 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
1495 ring = &adev->vcn.inst[i].ring_enc[j];
1496 ring->wptr = 0;
1497 rb_addr = ring->gpu_addr;
1498 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1499 mmUVD_RB_BASE_LO),
1500 lower_32_bits(rb_addr));
1501 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1502 mmUVD_RB_BASE_HI),
1503 upper_32_bits(rb_addr));
1504 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1505 mmUVD_RB_SIZE),
1506 ring->ring_size / 4);
1507 }
1508
1509 ring = &adev->vcn.inst[i].ring_dec;
1510 ring->wptr = 0;
1511 rb_addr = ring->gpu_addr;
1512 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1513 mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
1514 lower_32_bits(rb_addr));
1515 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1516 mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
1517 upper_32_bits(rb_addr));
1518 /* force RBC into idle state */
1519 tmp = order_base_2(ring->ring_size);
1520 tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, tmp);
1521 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
1522 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
1523 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
1524 tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1525 MMSCH_V3_0_INSERT_DIRECT_WT(SOC15_REG_OFFSET(VCN, i,
1526 mmUVD_RBC_RB_CNTL),
1527 tmp);
1528
1529 /* add end packet */
1530 MMSCH_V3_0_INSERT_END();
1531
1532 /* refine header */
1533 header.inst[i].init_status = 0;
1534 header.inst[i].table_offset = header.total_size;
1535 header.inst[i].table_size = table_size;
1536 header.total_size += table_size;
1537 }
1538
1539 /* Update init table header in memory */
1540 size = sizeof(struct mmsch_v3_0_init_header);
1541 table_loc = (uint32_t *)table->cpu_addr;
1542 memcpy((void *)table_loc, &header, size);
1543
1544 /* message MMSCH (in VCN[0]) to initialize this client
1545 * 1, write to mmsch_vf_ctx_addr_lo/hi register with GPU mc addr
1546 * of memory descriptor location
1547 */
1548 ctx_addr = table->gpu_addr;
1549 WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(ctx_addr));
1550 WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(ctx_addr));
1551
1552 /* 2, update vmid of descriptor */
1553 tmp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
1554 tmp &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
1555 /* use domain0 for MM scheduler */
1556 tmp |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1557 WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, tmp);
1558
1559 /* 3, notify mmsch about the size of this descriptor */
1560 size = header.total_size;
1561 WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
1562
1563 /* 4, set resp to zero */
1564 WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
1565
1566 /* 5, kick off the initialization and wait until
1567 * MMSCH_VF_MAILBOX_RESP becomes non-zero
1568 */
1569 param = 0x10000001;
1570 WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, param);
1571 tmp = 0;
1572 timeout = 1000;
1573 resp = 0;
1574 expected = param + 1;
1575 while (resp != expected) {
1576 resp = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1577 if (resp == expected)
1578 break;
1579
1580 udelay(10);
1581 tmp = tmp + 10;
1582 if (tmp >= timeout) {
1583 DRM_ERROR("failed to init MMSCH. TIME-OUT after %d usec"\
1584 " waiting for mmMMSCH_VF_MAILBOX_RESP "\
1585 "(expected=0x%08x, readback=0x%08x)\n",
1586 tmp, expected, resp);
1587 return -EBUSY;
1588 }
1589 }
1590
1591 return 0;
1592 }
1593
vcn_v3_0_stop_dpg_mode(struct amdgpu_vcn_inst * vinst)1594 static int vcn_v3_0_stop_dpg_mode(struct amdgpu_vcn_inst *vinst)
1595 {
1596 struct amdgpu_device *adev = vinst->adev;
1597 int inst_idx = vinst->inst;
1598 struct dpg_pause_state state = {.fw_based = VCN_DPG_STATE__UNPAUSE};
1599 uint32_t tmp;
1600
1601 vcn_v3_0_pause_dpg_mode(vinst, &state);
1602
1603 /* Wait for power status to be 1 */
1604 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1605 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1606
1607 /* wait for read ptr to be equal to write ptr */
1608 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
1609 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1610
1611 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
1612 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
1613
1614 tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
1615 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
1616
1617 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1618 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1619
1620 /* disable dynamic power gating mode */
1621 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
1622 ~UVD_POWER_STATUS__UVD_PG_MODE_MASK);
1623
1624 /* Keeping one read-back to ensure all register writes are done,
1625 * otherwise it may introduce race conditions.
1626 */
1627 RREG32_SOC15(VCN, inst_idx, mmUVD_STATUS);
1628
1629 return 0;
1630 }
1631
vcn_v3_0_stop(struct amdgpu_vcn_inst * vinst)1632 static int vcn_v3_0_stop(struct amdgpu_vcn_inst *vinst)
1633 {
1634 struct amdgpu_device *adev = vinst->adev;
1635 int i = vinst->inst;
1636 uint32_t tmp;
1637 int r = 0;
1638
1639 if (adev->vcn.harvest_config & (1 << i))
1640 return 0;
1641
1642 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1643 r = vcn_v3_0_stop_dpg_mode(vinst);
1644 goto done;
1645 }
1646
1647 /* wait for vcn idle */
1648 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1649 if (r)
1650 goto done;
1651
1652 tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
1653 UVD_LMI_STATUS__READ_CLEAN_MASK |
1654 UVD_LMI_STATUS__WRITE_CLEAN_MASK |
1655 UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
1656 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1657 if (r)
1658 goto done;
1659
1660 /* disable LMI UMC channel */
1661 tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
1662 tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
1663 WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
1664 tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
1665 UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
1666 r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1667 if (r)
1668 goto done;
1669
1670 /* block VCPU register access */
1671 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
1672 UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
1673 ~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1674
1675 /* reset VCPU */
1676 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1677 UVD_VCPU_CNTL__BLK_RST_MASK,
1678 ~UVD_VCPU_CNTL__BLK_RST_MASK);
1679
1680 /* disable VCPU clock */
1681 WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1682 ~(UVD_VCPU_CNTL__CLK_EN_MASK));
1683
1684 /* apply soft reset */
1685 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
1686 tmp |= UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK;
1687 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
1688 tmp = RREG32_SOC15(VCN, i, mmUVD_SOFT_RESET);
1689 tmp |= UVD_SOFT_RESET__LMI_SOFT_RESET_MASK;
1690 WREG32_SOC15(VCN, i, mmUVD_SOFT_RESET, tmp);
1691
1692 /* clear status */
1693 WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
1694
1695 /* apply HW clock gating */
1696 vcn_v3_0_enable_clock_gating(vinst);
1697
1698 /* enable VCN power gating */
1699 vcn_v3_0_enable_static_power_gating(vinst);
1700
1701 /* Keeping one read-back to ensure all register writes are done,
1702 * otherwise it may introduce race conditions.
1703 */
1704 RREG32_SOC15(VCN, i, mmUVD_STATUS);
1705
1706 done:
1707 if (adev->pm.dpm_enabled)
1708 amdgpu_dpm_enable_vcn(adev, false, i);
1709
1710 return r;
1711 }
1712
vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst * vinst,struct dpg_pause_state * new_state)1713 static int vcn_v3_0_pause_dpg_mode(struct amdgpu_vcn_inst *vinst,
1714 struct dpg_pause_state *new_state)
1715 {
1716 struct amdgpu_device *adev = vinst->adev;
1717 int inst_idx = vinst->inst;
1718 struct amdgpu_fw_shared *fw_shared;
1719 struct amdgpu_ring *ring;
1720 uint32_t reg_data = 0;
1721 int ret_code;
1722
1723 /* pause/unpause if state is changed */
1724 if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1725 DRM_DEBUG("dpg pause state changed %d -> %d",
1726 adev->vcn.inst[inst_idx].pause_state.fw_based, new_state->fw_based);
1727 reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
1728 (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1729
1730 if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1731 ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1732 UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1733
1734 if (!ret_code) {
1735 /* pause DPG */
1736 reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1737 WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1738
1739 /* wait for ACK */
1740 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
1741 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
1742 UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1743
1744 /* Stall DPG before WPTR/RPTR reset */
1745 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1746 UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
1747 ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1748
1749 if (amdgpu_ip_version(adev, UVD_HWIP, 0) !=
1750 IP_VERSION(3, 0, 33)) {
1751 /* Restore */
1752 fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
1753 fw_shared->multi_queue.encode_generalpurpose_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1754 ring = &adev->vcn.inst[inst_idx].ring_enc[0];
1755 ring->wptr = 0;
1756 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
1757 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
1758 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
1759 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
1760 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1761 fw_shared->multi_queue.encode_generalpurpose_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1762
1763 fw_shared->multi_queue.encode_lowlatency_queue_mode |= cpu_to_le32(FW_QUEUE_RING_RESET);
1764 ring = &adev->vcn.inst[inst_idx].ring_enc[1];
1765 ring->wptr = 0;
1766 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
1767 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
1768 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
1769 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
1770 WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1771 fw_shared->multi_queue.encode_lowlatency_queue_mode &= cpu_to_le32(~FW_QUEUE_RING_RESET);
1772
1773 /* restore wptr/rptr with pointers saved in FW shared memory*/
1774 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, fw_shared->rb.rptr);
1775 WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR, fw_shared->rb.wptr);
1776 }
1777
1778 /* Unstall DPG */
1779 WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1780 0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
1781
1782 SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
1783 UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1784 }
1785 } else {
1786 /* unpause dpg, no need to wait */
1787 reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1788 WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1789 }
1790 adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1791 }
1792
1793 return 0;
1794 }
1795
1796 /**
1797 * vcn_v3_0_dec_ring_get_rptr - get read pointer
1798 *
1799 * @ring: amdgpu_ring pointer
1800 *
1801 * Returns the current hardware read pointer
1802 */
vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring * ring)1803 static uint64_t vcn_v3_0_dec_ring_get_rptr(struct amdgpu_ring *ring)
1804 {
1805 struct amdgpu_device *adev = ring->adev;
1806
1807 return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
1808 }
1809
1810 /**
1811 * vcn_v3_0_dec_ring_get_wptr - get write pointer
1812 *
1813 * @ring: amdgpu_ring pointer
1814 *
1815 * Returns the current hardware write pointer
1816 */
vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring * ring)1817 static uint64_t vcn_v3_0_dec_ring_get_wptr(struct amdgpu_ring *ring)
1818 {
1819 struct amdgpu_device *adev = ring->adev;
1820
1821 if (ring->use_doorbell)
1822 return *ring->wptr_cpu_addr;
1823 else
1824 return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
1825 }
1826
1827 /**
1828 * vcn_v3_0_dec_ring_set_wptr - set write pointer
1829 *
1830 * @ring: amdgpu_ring pointer
1831 *
1832 * Commits the write pointer to the hardware
1833 */
vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring * ring)1834 static void vcn_v3_0_dec_ring_set_wptr(struct amdgpu_ring *ring)
1835 {
1836 struct amdgpu_device *adev = ring->adev;
1837 struct amdgpu_fw_shared *fw_shared;
1838
1839 if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
1840 /*whenever update RBC_RB_WPTR, we save the wptr in shared rb.wptr and scratch2 */
1841 fw_shared = adev->vcn.inst[ring->me].fw_shared.cpu_addr;
1842 fw_shared->rb.wptr = lower_32_bits(ring->wptr);
1843 WREG32_SOC15(VCN, ring->me, mmUVD_SCRATCH2,
1844 lower_32_bits(ring->wptr));
1845 }
1846
1847 if (ring->use_doorbell) {
1848 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
1849 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
1850 } else {
1851 WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
1852 }
1853 }
1854
1855 static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {
1856 .type = AMDGPU_RING_TYPE_VCN_DEC,
1857 .align_mask = 0x3f,
1858 .nop = VCN_DEC_SW_CMD_NO_OP,
1859 .secure_submission_supported = true,
1860 .get_rptr = vcn_v3_0_dec_ring_get_rptr,
1861 .get_wptr = vcn_v3_0_dec_ring_get_wptr,
1862 .set_wptr = vcn_v3_0_dec_ring_set_wptr,
1863 .emit_frame_size =
1864 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
1865 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
1866 VCN_SW_RING_EMIT_FRAME_SIZE,
1867 .emit_ib_size = 5, /* vcn_dec_sw_ring_emit_ib */
1868 .emit_ib = vcn_dec_sw_ring_emit_ib,
1869 .emit_fence = vcn_dec_sw_ring_emit_fence,
1870 .emit_vm_flush = vcn_dec_sw_ring_emit_vm_flush,
1871 .test_ring = amdgpu_vcn_dec_sw_ring_test_ring,
1872 .test_ib = NULL,//amdgpu_vcn_dec_sw_ring_test_ib,
1873 .insert_nop = amdgpu_ring_insert_nop,
1874 .insert_end = vcn_dec_sw_ring_insert_end,
1875 .pad_ib = amdgpu_ring_generic_pad_ib,
1876 .begin_use = amdgpu_vcn_ring_begin_use,
1877 .end_use = amdgpu_vcn_ring_end_use,
1878 .emit_wreg = vcn_dec_sw_ring_emit_wreg,
1879 .emit_reg_wait = vcn_dec_sw_ring_emit_reg_wait,
1880 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
1881 };
1882
vcn_v3_0_limit_sched(struct amdgpu_cs_parser * p,struct amdgpu_job * job)1883 static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
1884 struct amdgpu_job *job)
1885 {
1886 struct drm_gpu_scheduler **scheds;
1887 struct dma_fence *fence;
1888
1889 /* if VCN0 is harvested, we can't support AV1 */
1890 if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
1891 return -EINVAL;
1892
1893 /* wait for all jobs to finish before switching to instance 0 */
1894 fence = amdgpu_ctx_get_fence(p->ctx, job->base.entity, ~0ull);
1895 if (fence) {
1896 dma_fence_wait(fence, false);
1897 dma_fence_put(fence);
1898 }
1899
1900 scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
1901 [AMDGPU_RING_PRIO_DEFAULT].sched;
1902 drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
1903 return 0;
1904 }
1905
vcn_v3_0_dec_msg(struct amdgpu_cs_parser * p,struct amdgpu_job * job,uint64_t addr)1906 static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
1907 uint64_t addr)
1908 {
1909 struct ttm_operation_ctx ctx = { false, false };
1910 struct amdgpu_bo_va_mapping *map;
1911 uint32_t *msg, num_buffers;
1912 struct amdgpu_bo *bo;
1913 uint64_t start, end;
1914 unsigned int i;
1915 void *ptr;
1916 int r;
1917
1918 addr &= AMDGPU_GMC_HOLE_MASK;
1919 r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
1920 if (r) {
1921 DRM_ERROR("Can't find BO for addr 0x%08Lx\n", addr);
1922 return r;
1923 }
1924
1925 start = map->start * AMDGPU_GPU_PAGE_SIZE;
1926 end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
1927 if (addr & 0x7) {
1928 DRM_ERROR("VCN messages must be 8 byte aligned!\n");
1929 return -EINVAL;
1930 }
1931
1932 bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
1933 amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
1934 r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
1935 if (r) {
1936 DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
1937 return r;
1938 }
1939
1940 r = amdgpu_bo_kmap(bo, &ptr);
1941 if (r) {
1942 DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
1943 return r;
1944 }
1945
1946 msg = ptr + addr - start;
1947
1948 /* Check length */
1949 if (msg[1] > end - addr) {
1950 r = -EINVAL;
1951 goto out;
1952 }
1953
1954 if (msg[3] != RDECODE_MSG_CREATE)
1955 goto out;
1956
1957 num_buffers = msg[2];
1958 for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
1959 uint32_t offset, size, *create;
1960
1961 if (msg[0] != RDECODE_MESSAGE_CREATE)
1962 continue;
1963
1964 offset = msg[1];
1965 size = msg[2];
1966
1967 if (offset + size > end) {
1968 r = -EINVAL;
1969 goto out;
1970 }
1971
1972 create = ptr + addr + offset - start;
1973
1974 /* H246, HEVC and VP9 can run on any instance */
1975 if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
1976 continue;
1977
1978 r = vcn_v3_0_limit_sched(p, job);
1979 if (r)
1980 goto out;
1981 }
1982
1983 out:
1984 amdgpu_bo_kunmap(bo);
1985 return r;
1986 }
1987
vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser * p,struct amdgpu_job * job,struct amdgpu_ib * ib)1988 static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
1989 struct amdgpu_job *job,
1990 struct amdgpu_ib *ib)
1991 {
1992 struct amdgpu_ring *ring = amdgpu_job_ring(job);
1993 uint32_t msg_lo = 0, msg_hi = 0;
1994 unsigned i;
1995 int r;
1996
1997 /* The first instance can decode anything */
1998 if (!ring->me)
1999 return 0;
2000
2001 for (i = 0; i < ib->length_dw; i += 2) {
2002 uint32_t reg = amdgpu_ib_get_value(ib, i);
2003 uint32_t val = amdgpu_ib_get_value(ib, i + 1);
2004
2005 if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data0, 0)) {
2006 msg_lo = val;
2007 } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.data1, 0)) {
2008 msg_hi = val;
2009 } else if (reg == PACKET0(p->adev->vcn.inst[ring->me].internal.cmd, 0) &&
2010 val == 0) {
2011 r = vcn_v3_0_dec_msg(p, job,
2012 ((u64)msg_hi) << 32 | msg_lo);
2013 if (r)
2014 return r;
2015 }
2016 }
2017 return 0;
2018 }
2019
2020 static const struct amdgpu_ring_funcs vcn_v3_0_dec_ring_vm_funcs = {
2021 .type = AMDGPU_RING_TYPE_VCN_DEC,
2022 .align_mask = 0xf,
2023 .secure_submission_supported = true,
2024 .get_rptr = vcn_v3_0_dec_ring_get_rptr,
2025 .get_wptr = vcn_v3_0_dec_ring_get_wptr,
2026 .set_wptr = vcn_v3_0_dec_ring_set_wptr,
2027 .patch_cs_in_place = vcn_v3_0_ring_patch_cs_in_place,
2028 .emit_frame_size =
2029 SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
2030 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
2031 8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
2032 14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
2033 6,
2034 .emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
2035 .emit_ib = vcn_v2_0_dec_ring_emit_ib,
2036 .emit_fence = vcn_v2_0_dec_ring_emit_fence,
2037 .emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
2038 .test_ring = vcn_v2_0_dec_ring_test_ring,
2039 .test_ib = amdgpu_vcn_dec_ring_test_ib,
2040 .insert_nop = vcn_v2_0_dec_ring_insert_nop,
2041 .insert_start = vcn_v2_0_dec_ring_insert_start,
2042 .insert_end = vcn_v2_0_dec_ring_insert_end,
2043 .pad_ib = amdgpu_ring_generic_pad_ib,
2044 .begin_use = amdgpu_vcn_ring_begin_use,
2045 .end_use = amdgpu_vcn_ring_end_use,
2046 .emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
2047 .emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
2048 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2049 .reset = amdgpu_vcn_ring_reset,
2050 };
2051
2052 /**
2053 * vcn_v3_0_enc_ring_get_rptr - get enc read pointer
2054 *
2055 * @ring: amdgpu_ring pointer
2056 *
2057 * Returns the current hardware enc read pointer
2058 */
vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring * ring)2059 static uint64_t vcn_v3_0_enc_ring_get_rptr(struct amdgpu_ring *ring)
2060 {
2061 struct amdgpu_device *adev = ring->adev;
2062
2063 if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
2064 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
2065 else
2066 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
2067 }
2068
2069 /**
2070 * vcn_v3_0_enc_ring_get_wptr - get enc write pointer
2071 *
2072 * @ring: amdgpu_ring pointer
2073 *
2074 * Returns the current hardware enc write pointer
2075 */
vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring * ring)2076 static uint64_t vcn_v3_0_enc_ring_get_wptr(struct amdgpu_ring *ring)
2077 {
2078 struct amdgpu_device *adev = ring->adev;
2079
2080 if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
2081 if (ring->use_doorbell)
2082 return *ring->wptr_cpu_addr;
2083 else
2084 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
2085 } else {
2086 if (ring->use_doorbell)
2087 return *ring->wptr_cpu_addr;
2088 else
2089 return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
2090 }
2091 }
2092
2093 /**
2094 * vcn_v3_0_enc_ring_set_wptr - set enc write pointer
2095 *
2096 * @ring: amdgpu_ring pointer
2097 *
2098 * Commits the enc write pointer to the hardware
2099 */
vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring * ring)2100 static void vcn_v3_0_enc_ring_set_wptr(struct amdgpu_ring *ring)
2101 {
2102 struct amdgpu_device *adev = ring->adev;
2103
2104 if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
2105 if (ring->use_doorbell) {
2106 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
2107 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2108 } else {
2109 WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
2110 }
2111 } else {
2112 if (ring->use_doorbell) {
2113 *ring->wptr_cpu_addr = lower_32_bits(ring->wptr);
2114 WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
2115 } else {
2116 WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
2117 }
2118 }
2119 }
2120
2121 static const struct amdgpu_ring_funcs vcn_v3_0_enc_ring_vm_funcs = {
2122 .type = AMDGPU_RING_TYPE_VCN_ENC,
2123 .align_mask = 0x3f,
2124 .nop = VCN_ENC_CMD_NO_OP,
2125 .get_rptr = vcn_v3_0_enc_ring_get_rptr,
2126 .get_wptr = vcn_v3_0_enc_ring_get_wptr,
2127 .set_wptr = vcn_v3_0_enc_ring_set_wptr,
2128 .emit_frame_size =
2129 SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
2130 SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
2131 4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
2132 5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
2133 1, /* vcn_v2_0_enc_ring_insert_end */
2134 .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
2135 .emit_ib = vcn_v2_0_enc_ring_emit_ib,
2136 .emit_fence = vcn_v2_0_enc_ring_emit_fence,
2137 .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
2138 .test_ring = amdgpu_vcn_enc_ring_test_ring,
2139 .test_ib = amdgpu_vcn_enc_ring_test_ib,
2140 .insert_nop = amdgpu_ring_insert_nop,
2141 .insert_end = vcn_v2_0_enc_ring_insert_end,
2142 .pad_ib = amdgpu_ring_generic_pad_ib,
2143 .begin_use = amdgpu_vcn_ring_begin_use,
2144 .end_use = amdgpu_vcn_ring_end_use,
2145 .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
2146 .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
2147 .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
2148 .reset = amdgpu_vcn_ring_reset,
2149 };
2150
vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device * adev)2151 static void vcn_v3_0_set_dec_ring_funcs(struct amdgpu_device *adev)
2152 {
2153 int i;
2154
2155 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2156 if (adev->vcn.harvest_config & (1 << i))
2157 continue;
2158
2159 if (!DEC_SW_RING_ENABLED)
2160 adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_ring_vm_funcs;
2161 else
2162 adev->vcn.inst[i].ring_dec.funcs = &vcn_v3_0_dec_sw_ring_vm_funcs;
2163 adev->vcn.inst[i].ring_dec.me = i;
2164 }
2165 }
2166
vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device * adev)2167 static void vcn_v3_0_set_enc_ring_funcs(struct amdgpu_device *adev)
2168 {
2169 int i, j;
2170
2171 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2172 if (adev->vcn.harvest_config & (1 << i))
2173 continue;
2174
2175 for (j = 0; j < adev->vcn.inst[i].num_enc_rings; ++j) {
2176 adev->vcn.inst[i].ring_enc[j].funcs = &vcn_v3_0_enc_ring_vm_funcs;
2177 adev->vcn.inst[i].ring_enc[j].me = i;
2178 }
2179 }
2180 }
2181
vcn_v3_0_reset(struct amdgpu_vcn_inst * vinst)2182 static int vcn_v3_0_reset(struct amdgpu_vcn_inst *vinst)
2183 {
2184 int r;
2185
2186 r = vcn_v3_0_stop(vinst);
2187 if (r)
2188 return r;
2189 vcn_v3_0_enable_clock_gating(vinst);
2190 vcn_v3_0_enable_static_power_gating(vinst);
2191 return vcn_v3_0_start(vinst);
2192 }
2193
vcn_v3_0_is_idle(struct amdgpu_ip_block * ip_block)2194 static bool vcn_v3_0_is_idle(struct amdgpu_ip_block *ip_block)
2195 {
2196 struct amdgpu_device *adev = ip_block->adev;
2197 int i, ret = 1;
2198
2199 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2200 if (adev->vcn.harvest_config & (1 << i))
2201 continue;
2202
2203 ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
2204 }
2205
2206 return ret;
2207 }
2208
vcn_v3_0_wait_for_idle(struct amdgpu_ip_block * ip_block)2209 static int vcn_v3_0_wait_for_idle(struct amdgpu_ip_block *ip_block)
2210 {
2211 struct amdgpu_device *adev = ip_block->adev;
2212 int i, ret = 0;
2213
2214 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2215 if (adev->vcn.harvest_config & (1 << i))
2216 continue;
2217
2218 ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
2219 UVD_STATUS__IDLE);
2220 if (ret)
2221 return ret;
2222 }
2223
2224 return ret;
2225 }
2226
vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block * ip_block,enum amd_clockgating_state state)2227 static int vcn_v3_0_set_clockgating_state(struct amdgpu_ip_block *ip_block,
2228 enum amd_clockgating_state state)
2229 {
2230 struct amdgpu_device *adev = ip_block->adev;
2231 bool enable = state == AMD_CG_STATE_GATE;
2232 int i;
2233
2234 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2235 struct amdgpu_vcn_inst *vinst = &adev->vcn.inst[i];
2236 if (adev->vcn.harvest_config & (1 << i))
2237 continue;
2238
2239 if (enable) {
2240 if (RREG32_SOC15(VCN, i, mmUVD_STATUS) != UVD_STATUS__IDLE)
2241 return -EBUSY;
2242 vcn_v3_0_enable_clock_gating(vinst);
2243 } else {
2244 vcn_v3_0_disable_clock_gating(vinst);
2245 }
2246 }
2247
2248 return 0;
2249 }
2250
vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst * vinst,enum amd_powergating_state state)2251 static int vcn_v3_0_set_pg_state(struct amdgpu_vcn_inst *vinst,
2252 enum amd_powergating_state state)
2253 {
2254 struct amdgpu_device *adev = vinst->adev;
2255 int ret = 0;
2256
2257 /* for SRIOV, guest should not control VCN Power-gating
2258 * MMSCH FW should control Power-gating and clock-gating
2259 * guest should avoid touching CGC and PG
2260 */
2261 if (amdgpu_sriov_vf(adev)) {
2262 vinst->cur_state = AMD_PG_STATE_UNGATE;
2263 return 0;
2264 }
2265
2266 if (state == vinst->cur_state)
2267 return 0;
2268
2269 if (state == AMD_PG_STATE_GATE)
2270 ret = vcn_v3_0_stop(vinst);
2271 else
2272 ret = vcn_v3_0_start(vinst);
2273
2274 if (!ret)
2275 vinst->cur_state = state;
2276
2277 return ret;
2278 }
2279
vcn_v3_0_set_interrupt_state(struct amdgpu_device * adev,struct amdgpu_irq_src * source,unsigned type,enum amdgpu_interrupt_state state)2280 static int vcn_v3_0_set_interrupt_state(struct amdgpu_device *adev,
2281 struct amdgpu_irq_src *source,
2282 unsigned type,
2283 enum amdgpu_interrupt_state state)
2284 {
2285 return 0;
2286 }
2287
vcn_v3_0_process_interrupt(struct amdgpu_device * adev,struct amdgpu_irq_src * source,struct amdgpu_iv_entry * entry)2288 static int vcn_v3_0_process_interrupt(struct amdgpu_device *adev,
2289 struct amdgpu_irq_src *source,
2290 struct amdgpu_iv_entry *entry)
2291 {
2292 uint32_t ip_instance;
2293
2294 switch (entry->client_id) {
2295 case SOC15_IH_CLIENTID_VCN:
2296 ip_instance = 0;
2297 break;
2298 case SOC15_IH_CLIENTID_VCN1:
2299 ip_instance = 1;
2300 break;
2301 default:
2302 DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
2303 return 0;
2304 }
2305
2306 DRM_DEBUG("IH: VCN TRAP\n");
2307
2308 switch (entry->src_id) {
2309 case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
2310 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
2311 break;
2312 case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
2313 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
2314 break;
2315 case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
2316 amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
2317 break;
2318 default:
2319 DRM_ERROR("Unhandled interrupt: %d %d\n",
2320 entry->src_id, entry->src_data[0]);
2321 break;
2322 }
2323
2324 return 0;
2325 }
2326
2327 static const struct amdgpu_irq_src_funcs vcn_v3_0_irq_funcs = {
2328 .set = vcn_v3_0_set_interrupt_state,
2329 .process = vcn_v3_0_process_interrupt,
2330 };
2331
vcn_v3_0_set_irq_funcs(struct amdgpu_device * adev)2332 static void vcn_v3_0_set_irq_funcs(struct amdgpu_device *adev)
2333 {
2334 int i;
2335
2336 for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
2337 if (adev->vcn.harvest_config & (1 << i))
2338 continue;
2339
2340 adev->vcn.inst[i].irq.num_types = adev->vcn.inst[i].num_enc_rings + 1;
2341 adev->vcn.inst[i].irq.funcs = &vcn_v3_0_irq_funcs;
2342 }
2343 }
2344
2345 static const struct amd_ip_funcs vcn_v3_0_ip_funcs = {
2346 .name = "vcn_v3_0",
2347 .early_init = vcn_v3_0_early_init,
2348 .sw_init = vcn_v3_0_sw_init,
2349 .sw_fini = vcn_v3_0_sw_fini,
2350 .hw_init = vcn_v3_0_hw_init,
2351 .hw_fini = vcn_v3_0_hw_fini,
2352 .suspend = vcn_v3_0_suspend,
2353 .resume = vcn_v3_0_resume,
2354 .is_idle = vcn_v3_0_is_idle,
2355 .wait_for_idle = vcn_v3_0_wait_for_idle,
2356 .set_clockgating_state = vcn_v3_0_set_clockgating_state,
2357 .set_powergating_state = vcn_set_powergating_state,
2358 .dump_ip_state = amdgpu_vcn_dump_ip_state,
2359 .print_ip_state = amdgpu_vcn_print_ip_state,
2360 };
2361
2362 const struct amdgpu_ip_block_version vcn_v3_0_ip_block = {
2363 .type = AMD_IP_BLOCK_TYPE_VCN,
2364 .major = 3,
2365 .minor = 0,
2366 .rev = 0,
2367 .funcs = &vcn_v3_0_ip_funcs,
2368 };
2369