1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 */
23
24 #include <linux/module.h>
25
26 #ifdef CONFIG_X86
27 #include <asm/hypervisor.h>
28 #endif
29
30 #include <drm/drm_drv.h>
31 #include <xen/xen.h>
32
33 #include "amdgpu.h"
34 #include "amdgpu_ras.h"
35 #include "amdgpu_reset.h"
36 #include "amdgpu_dpm.h"
37 #include "vi.h"
38 #include "soc15.h"
39 #include "nv.h"
40 #include "amdgpu_virt_ras_cmd.h"
41
42 #define POPULATE_UCODE_INFO(vf2pf_info, ucode, ver) \
43 do { \
44 vf2pf_info->ucode_info[ucode].id = ucode; \
45 vf2pf_info->ucode_info[ucode].version = ver; \
46 } while (0)
47
48 #define mmRCC_CONFIG_MEMSIZE 0xde3
49
50 const char *amdgpu_virt_dynamic_crit_table_name[] = {
51 "IP DISCOVERY",
52 "VBIOS IMG",
53 "RAS TELEMETRY",
54 "DATA EXCHANGE",
55 "BAD PAGE INFO",
56 "INIT HEADER",
57 "LAST",
58 };
59
amdgpu_virt_mmio_blocked(struct amdgpu_device * adev)60 bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev)
61 {
62 /* By now all MMIO pages except mailbox are blocked */
63 /* if blocking is enabled in hypervisor. Choose the */
64 /* SCRATCH_REG0 to test. */
65 return RREG32_NO_KIQ(0xc040) == 0xffffffff;
66 }
67
amdgpu_virt_init_setting(struct amdgpu_device * adev)68 void amdgpu_virt_init_setting(struct amdgpu_device *adev)
69 {
70 struct drm_device *ddev = adev_to_drm(adev);
71
72 /* enable virtual display */
73 if (adev->asic_type != CHIP_ALDEBARAN &&
74 adev->asic_type != CHIP_ARCTURUS &&
75 ((adev->pdev->class >> 8) != PCI_CLASS_ACCELERATOR_PROCESSING)) {
76 if (adev->mode_info.num_crtc == 0)
77 adev->mode_info.num_crtc = 1;
78 adev->enable_virtual_display = true;
79 }
80 ddev->driver_features &= ~DRIVER_ATOMIC;
81 adev->cg_flags = 0;
82 adev->pg_flags = 0;
83
84 /* Reduce kcq number to 2 to reduce latency */
85 if (amdgpu_num_kcq == -1)
86 amdgpu_num_kcq = 2;
87 }
88
89 /**
90 * amdgpu_virt_request_full_gpu() - request full gpu access
91 * @adev: amdgpu device.
92 * @init: is driver init time.
93 * When start to init/fini driver, first need to request full gpu access.
94 * Return: Zero if request success, otherwise will return error.
95 */
amdgpu_virt_request_full_gpu(struct amdgpu_device * adev,bool init)96 int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init)
97 {
98 struct amdgpu_virt *virt = &adev->virt;
99 int r;
100
101 if (virt->ops && virt->ops->req_full_gpu) {
102 r = virt->ops->req_full_gpu(adev, init);
103 if (r) {
104 adev->no_hw_access = true;
105 return r;
106 }
107
108 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
109 }
110
111 return 0;
112 }
113
114 /**
115 * amdgpu_virt_release_full_gpu() - release full gpu access
116 * @adev: amdgpu device.
117 * @init: is driver init time.
118 * When finishing driver init/fini, need to release full gpu access.
119 * Return: Zero if release success, otherwise will returen error.
120 */
amdgpu_virt_release_full_gpu(struct amdgpu_device * adev,bool init)121 int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init)
122 {
123 struct amdgpu_virt *virt = &adev->virt;
124 int r;
125
126 if (virt->ops && virt->ops->rel_full_gpu) {
127 r = virt->ops->rel_full_gpu(adev, init);
128 if (r)
129 return r;
130
131 adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
132 }
133 return 0;
134 }
135
136 /**
137 * amdgpu_virt_reset_gpu() - reset gpu
138 * @adev: amdgpu device.
139 * Send reset command to GPU hypervisor to reset GPU that VM is using
140 * Return: Zero if reset success, otherwise will return error.
141 */
amdgpu_virt_reset_gpu(struct amdgpu_device * adev)142 int amdgpu_virt_reset_gpu(struct amdgpu_device *adev)
143 {
144 struct amdgpu_virt *virt = &adev->virt;
145 int r;
146
147 if (virt->ops && virt->ops->reset_gpu) {
148 r = virt->ops->reset_gpu(adev);
149 if (r)
150 return r;
151
152 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
153 }
154
155 return 0;
156 }
157
amdgpu_virt_request_init_data(struct amdgpu_device * adev)158 void amdgpu_virt_request_init_data(struct amdgpu_device *adev)
159 {
160 struct amdgpu_virt *virt = &adev->virt;
161
162 if (virt->ops && virt->ops->req_init_data)
163 virt->ops->req_init_data(adev);
164
165 if (adev->virt.req_init_data_ver > 0)
166 dev_info(adev->dev, "host supports REQ_INIT_DATA handshake of critical_region_version %d\n",
167 adev->virt.req_init_data_ver);
168 else
169 dev_warn(adev->dev, "host doesn't support REQ_INIT_DATA handshake\n");
170 }
171
172 /**
173 * amdgpu_virt_ready_to_reset() - send ready to reset to host
174 * @adev: amdgpu device.
175 * Send ready to reset message to GPU hypervisor to signal we have stopped GPU
176 * activity and is ready for host FLR
177 */
amdgpu_virt_ready_to_reset(struct amdgpu_device * adev)178 void amdgpu_virt_ready_to_reset(struct amdgpu_device *adev)
179 {
180 struct amdgpu_virt *virt = &adev->virt;
181
182 if (virt->ops && virt->ops->reset_gpu)
183 virt->ops->ready_to_reset(adev);
184 }
185
186 /**
187 * amdgpu_virt_wait_reset() - wait for reset gpu completed
188 * @adev: amdgpu device.
189 * Wait for GPU reset completed.
190 * Return: Zero if reset success, otherwise will return error.
191 */
amdgpu_virt_wait_reset(struct amdgpu_device * adev)192 int amdgpu_virt_wait_reset(struct amdgpu_device *adev)
193 {
194 struct amdgpu_virt *virt = &adev->virt;
195
196 if (!virt->ops || !virt->ops->wait_reset)
197 return -EINVAL;
198
199 return virt->ops->wait_reset(adev);
200 }
201
202 /**
203 * amdgpu_virt_alloc_mm_table() - alloc memory for mm table
204 * @adev: amdgpu device.
205 * MM table is used by UVD and VCE for its initialization
206 * Return: Zero if allocate success.
207 */
amdgpu_virt_alloc_mm_table(struct amdgpu_device * adev)208 int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev)
209 {
210 int r;
211
212 if (!amdgpu_sriov_vf(adev) || adev->virt.mm_table.gpu_addr)
213 return 0;
214
215 r = amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
216 AMDGPU_GEM_DOMAIN_VRAM |
217 AMDGPU_GEM_DOMAIN_GTT,
218 &adev->virt.mm_table.bo,
219 &adev->virt.mm_table.gpu_addr,
220 (void *)&adev->virt.mm_table.cpu_addr);
221 if (r) {
222 dev_err(adev->dev, "failed to alloc mm table and error = %d.\n", r);
223 return r;
224 }
225
226 memset((void *)adev->virt.mm_table.cpu_addr, 0, PAGE_SIZE);
227 dev_info(adev->dev, "MM table gpu addr = 0x%llx, cpu addr = %p.\n",
228 adev->virt.mm_table.gpu_addr,
229 adev->virt.mm_table.cpu_addr);
230 return 0;
231 }
232
233 /**
234 * amdgpu_virt_free_mm_table() - free mm table memory
235 * @adev: amdgpu device.
236 * Free MM table memory
237 */
amdgpu_virt_free_mm_table(struct amdgpu_device * adev)238 void amdgpu_virt_free_mm_table(struct amdgpu_device *adev)
239 {
240 if (!amdgpu_sriov_vf(adev) || !adev->virt.mm_table.gpu_addr)
241 return;
242
243 amdgpu_bo_free_kernel(&adev->virt.mm_table.bo,
244 &adev->virt.mm_table.gpu_addr,
245 (void *)&adev->virt.mm_table.cpu_addr);
246 adev->virt.mm_table.gpu_addr = 0;
247 }
248
249 /**
250 * amdgpu_virt_rcvd_ras_interrupt() - receive ras interrupt
251 * @adev: amdgpu device.
252 * Check whether host sent RAS error message
253 * Return: true if found, otherwise false
254 */
amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device * adev)255 bool amdgpu_virt_rcvd_ras_interrupt(struct amdgpu_device *adev)
256 {
257 struct amdgpu_virt *virt = &adev->virt;
258
259 if (!virt->ops || !virt->ops->rcvd_ras_intr)
260 return false;
261
262 return virt->ops->rcvd_ras_intr(adev);
263 }
264
265
amd_sriov_msg_checksum(void * obj,unsigned long obj_size,unsigned int key,unsigned int checksum)266 unsigned int amd_sriov_msg_checksum(void *obj,
267 unsigned long obj_size,
268 unsigned int key,
269 unsigned int checksum)
270 {
271 unsigned int ret = key;
272 unsigned long i = 0;
273 unsigned char *pos;
274
275 pos = (char *)obj;
276 /* calculate checksum */
277 for (i = 0; i < obj_size; ++i)
278 ret += *(pos + i);
279 /* minus the checksum itself */
280 pos = (char *)&checksum;
281 for (i = 0; i < sizeof(checksum); ++i)
282 ret -= *(pos + i);
283 return ret;
284 }
285
amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device * adev)286 static int amdgpu_virt_init_ras_err_handler_data(struct amdgpu_device *adev)
287 {
288 struct amdgpu_virt *virt = &adev->virt;
289 struct amdgpu_virt_ras_err_handler_data **data = &virt->virt_eh_data;
290 /* GPU will be marked bad on host if bp count more then 10,
291 * so alloc 512 is enough.
292 */
293 unsigned int align_space = 512;
294 void *bps = NULL;
295 struct amdgpu_bo **bps_bo = NULL;
296
297 *data = kmalloc_obj(struct amdgpu_virt_ras_err_handler_data);
298 if (!*data)
299 goto data_failure;
300
301 bps = kmalloc_objs(*(*data)->bps, align_space);
302 if (!bps)
303 goto bps_failure;
304
305 bps_bo = kmalloc_objs(*(*data)->bps_bo, align_space);
306 if (!bps_bo)
307 goto bps_bo_failure;
308
309 (*data)->bps = bps;
310 (*data)->bps_bo = bps_bo;
311 (*data)->count = 0;
312 (*data)->last_reserved = 0;
313
314 virt->ras_init_done = true;
315
316 return 0;
317
318 bps_bo_failure:
319 kfree(bps);
320 bps_failure:
321 kfree(*data);
322 data_failure:
323 return -ENOMEM;
324 }
325
amdgpu_virt_ras_release_bp(struct amdgpu_device * adev)326 static void amdgpu_virt_ras_release_bp(struct amdgpu_device *adev)
327 {
328 struct amdgpu_virt *virt = &adev->virt;
329 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
330 struct amdgpu_bo *bo;
331 int i;
332
333 if (!data)
334 return;
335
336 for (i = data->last_reserved - 1; i >= 0; i--) {
337 bo = data->bps_bo[i];
338 if (bo) {
339 amdgpu_bo_free_kernel(&bo, NULL, NULL);
340 data->bps_bo[i] = bo;
341 }
342 data->last_reserved = i;
343 }
344 }
345
amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device * adev)346 void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device *adev)
347 {
348 struct amdgpu_virt *virt = &adev->virt;
349 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
350
351 virt->ras_init_done = false;
352
353 if (!data)
354 return;
355
356 amdgpu_virt_ras_release_bp(adev);
357
358 kfree(data->bps);
359 kfree(data->bps_bo);
360 kfree(data);
361 virt->virt_eh_data = NULL;
362 }
363
amdgpu_virt_ras_add_bps(struct amdgpu_device * adev,struct eeprom_table_record * bps,int pages)364 static void amdgpu_virt_ras_add_bps(struct amdgpu_device *adev,
365 struct eeprom_table_record *bps, int pages)
366 {
367 struct amdgpu_virt *virt = &adev->virt;
368 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
369
370 if (!data)
371 return;
372
373 memcpy(&data->bps[data->count], bps, pages * sizeof(*data->bps));
374 data->count += pages;
375 }
376
amdgpu_virt_ras_reserve_bps(struct amdgpu_device * adev)377 static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
378 {
379 struct amdgpu_virt *virt = &adev->virt;
380 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
381 struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
382 struct ttm_resource_manager *man = &mgr->manager;
383 struct amdgpu_bo *bo = NULL;
384 uint64_t bp;
385 int i;
386
387 if (!data)
388 return;
389
390 for (i = data->last_reserved; i < data->count; i++) {
391 bp = data->bps[i].retired_page;
392
393 /* There are two cases of reserve error should be ignored:
394 * 1) a ras bad page has been allocated (used by someone);
395 * 2) a ras bad page has been reserved (duplicate error injection
396 * for one page);
397 */
398 if (ttm_resource_manager_used(man)) {
399 amdgpu_vram_mgr_reserve_range(&adev->mman.vram_mgr,
400 bp << AMDGPU_GPU_PAGE_SHIFT,
401 AMDGPU_GPU_PAGE_SIZE);
402 data->bps_bo[i] = NULL;
403 } else {
404 if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
405 AMDGPU_GPU_PAGE_SIZE,
406 &bo, NULL))
407 dev_dbg(adev->dev,
408 "RAS WARN: reserve vram for retired page %llx fail\n",
409 bp);
410 data->bps_bo[i] = bo;
411 }
412 data->last_reserved = i + 1;
413 bo = NULL;
414 }
415 }
416
amdgpu_virt_ras_check_bad_page(struct amdgpu_device * adev,uint64_t retired_page)417 static bool amdgpu_virt_ras_check_bad_page(struct amdgpu_device *adev,
418 uint64_t retired_page)
419 {
420 struct amdgpu_virt *virt = &adev->virt;
421 struct amdgpu_virt_ras_err_handler_data *data = virt->virt_eh_data;
422 int i;
423
424 if (!data)
425 return true;
426
427 for (i = 0; i < data->count; i++)
428 if (retired_page == data->bps[i].retired_page)
429 return true;
430
431 return false;
432 }
433
amdgpu_virt_add_bad_page(struct amdgpu_device * adev,uint64_t bp_block_offset,uint32_t bp_block_size)434 static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
435 uint64_t bp_block_offset, uint32_t bp_block_size)
436 {
437 struct eeprom_table_record bp;
438 uint64_t retired_page;
439 uint32_t bp_idx, bp_cnt;
440 void *vram_usage_va = NULL;
441
442 if (adev->mman.fw_vram_usage_va)
443 vram_usage_va = adev->mman.fw_vram_usage_va;
444 else
445 vram_usage_va = adev->mman.drv_vram_usage_va;
446
447 memset(&bp, 0, sizeof(bp));
448
449 if (bp_block_size) {
450 bp_cnt = bp_block_size / sizeof(uint64_t);
451 for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
452 retired_page = *(uint64_t *)(vram_usage_va +
453 bp_block_offset + bp_idx * sizeof(uint64_t));
454 bp.retired_page = retired_page;
455
456 if (amdgpu_virt_ras_check_bad_page(adev, retired_page))
457 continue;
458
459 amdgpu_virt_ras_add_bps(adev, &bp, 1);
460
461 amdgpu_virt_ras_reserve_bps(adev);
462 }
463 }
464 }
465
amdgpu_virt_read_pf2vf_data(struct amdgpu_device * adev)466 static int amdgpu_virt_read_pf2vf_data(struct amdgpu_device *adev)
467 {
468 struct amd_sriov_msg_pf2vf_info_header *pf2vf_info = adev->virt.fw_reserve.p_pf2vf;
469 uint32_t checksum;
470 uint32_t checkval;
471
472 uint32_t i;
473 uint32_t tmp;
474
475 if (adev->virt.fw_reserve.p_pf2vf == NULL)
476 return -EINVAL;
477
478 if (pf2vf_info->size > 1024) {
479 dev_err(adev->dev, "invalid pf2vf message size: 0x%x\n", pf2vf_info->size);
480 return -EINVAL;
481 }
482
483 switch (pf2vf_info->version) {
484 case 1:
485 checksum = ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->checksum;
486 checkval = amd_sriov_msg_checksum(
487 adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
488 adev->virt.fw_reserve.checksum_key, checksum);
489 if (checksum != checkval) {
490 dev_err(adev->dev,
491 "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
492 checksum, checkval);
493 return -EINVAL;
494 }
495
496 adev->virt.gim_feature =
497 ((struct amdgim_pf2vf_info_v1 *)pf2vf_info)->feature_flags;
498 break;
499 case 2:
500 /* TODO: missing key, need to add it later */
501 checksum = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->checksum;
502 checkval = amd_sriov_msg_checksum(
503 adev->virt.fw_reserve.p_pf2vf, pf2vf_info->size,
504 0, checksum);
505 if (checksum != checkval) {
506 dev_err(adev->dev,
507 "invalid pf2vf message: header checksum=0x%x calculated checksum=0x%x\n",
508 checksum, checkval);
509 return -EINVAL;
510 }
511
512 adev->virt.vf2pf_update_interval_ms =
513 ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->vf2pf_update_interval_ms;
514 adev->virt.gim_feature =
515 ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->feature_flags.all;
516 adev->virt.reg_access =
517 ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->reg_access_flags.all;
518
519 adev->virt.decode_max_dimension_pixels = 0;
520 adev->virt.decode_max_frame_pixels = 0;
521 adev->virt.encode_max_dimension_pixels = 0;
522 adev->virt.encode_max_frame_pixels = 0;
523 adev->virt.is_mm_bw_enabled = false;
524 for (i = 0; i < AMD_SRIOV_MSG_RESERVE_VCN_INST; i++) {
525 tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_dimension_pixels;
526 adev->virt.decode_max_dimension_pixels = max(tmp, adev->virt.decode_max_dimension_pixels);
527
528 tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].decode_max_frame_pixels;
529 adev->virt.decode_max_frame_pixels = max(tmp, adev->virt.decode_max_frame_pixels);
530
531 tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_dimension_pixels;
532 adev->virt.encode_max_dimension_pixels = max(tmp, adev->virt.encode_max_dimension_pixels);
533
534 tmp = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->mm_bw_management[i].encode_max_frame_pixels;
535 adev->virt.encode_max_frame_pixels = max(tmp, adev->virt.encode_max_frame_pixels);
536 }
537 if ((adev->virt.decode_max_dimension_pixels > 0) || (adev->virt.encode_max_dimension_pixels > 0))
538 adev->virt.is_mm_bw_enabled = true;
539
540 adev->unique_id =
541 ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->uuid;
542 adev->virt.ras_en_caps.all = ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_en_caps.all;
543 adev->virt.ras_telemetry_en_caps.all =
544 ((struct amd_sriov_msg_pf2vf_info *)pf2vf_info)->ras_telemetry_en_caps.all;
545 break;
546 default:
547 dev_err(adev->dev, "invalid pf2vf version: 0x%x\n", pf2vf_info->version);
548 return -EINVAL;
549 }
550
551 /* correct too large or too little interval value */
552 if (adev->virt.vf2pf_update_interval_ms < 200 || adev->virt.vf2pf_update_interval_ms > 10000)
553 adev->virt.vf2pf_update_interval_ms = 2000;
554
555 return 0;
556 }
557
amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device * adev)558 static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
559 {
560 struct amd_sriov_msg_vf2pf_info *vf2pf_info;
561 vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
562
563 if (adev->virt.fw_reserve.p_vf2pf == NULL)
564 return;
565
566 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCE, adev->vce.fw_version);
567 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_UVD, adev->uvd.fw_version);
568 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MC, adev->gmc.fw_version);
569 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ME, adev->gfx.me_fw_version);
570 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_PFP, adev->gfx.pfp_fw_version);
571 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_CE, adev->gfx.ce_fw_version);
572 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC, adev->gfx.rlc_fw_version);
573 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLC, adev->gfx.rlc_srlc_fw_version);
574 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLG, adev->gfx.rlc_srlg_fw_version);
575 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
576 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC, adev->gfx.mec_fw_version);
577 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2, adev->gfx.mec2_fw_version);
578 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS, adev->psp.sos.fw_version);
579 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
580 adev->psp.asd_context.bin_desc.fw_version);
581 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_RAS,
582 adev->psp.ras_context.context.bin_desc.fw_version);
583 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_TA_XGMI,
584 adev->psp.xgmi_context.context.bin_desc.fw_version);
585 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SMC, adev->pm.fw_version);
586 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA, adev->sdma.instance[0].fw_version);
587 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SDMA2, adev->sdma.instance[1].fw_version);
588 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_VCN, adev->vcn.fw_version);
589 POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_DMCU, adev->dm.dmcu_fw_version);
590 }
591
amdgpu_virt_write_vf2pf_data(struct amdgpu_device * adev)592 static int amdgpu_virt_write_vf2pf_data(struct amdgpu_device *adev)
593 {
594 struct amd_sriov_msg_vf2pf_info *vf2pf_info;
595
596 vf2pf_info = (struct amd_sriov_msg_vf2pf_info *) adev->virt.fw_reserve.p_vf2pf;
597
598 if (adev->virt.fw_reserve.p_vf2pf == NULL)
599 return -EINVAL;
600
601 memset(vf2pf_info, 0, sizeof(struct amd_sriov_msg_vf2pf_info));
602
603 vf2pf_info->header.size = sizeof(struct amd_sriov_msg_vf2pf_info);
604 vf2pf_info->header.version = AMD_SRIOV_MSG_FW_VRAM_VF2PF_VER;
605
606 #ifdef MODULE
607 if (THIS_MODULE->version != NULL)
608 strcpy(vf2pf_info->driver_version, THIS_MODULE->version);
609 else
610 #endif
611 strcpy(vf2pf_info->driver_version, "N/A");
612
613 vf2pf_info->pf2vf_version_required = 0; // no requirement, guest understands all
614 vf2pf_info->driver_cert = 0;
615 vf2pf_info->os_info.all = 0;
616
617 vf2pf_info->fb_usage = ttm_resource_manager_used(&adev->mman.vram_mgr.manager) ?
618 ttm_resource_manager_usage(&adev->mman.vram_mgr.manager) >> 20 : 0;
619 vf2pf_info->fb_vis_usage =
620 amdgpu_vram_mgr_vis_usage(&adev->mman.vram_mgr) >> 20;
621 vf2pf_info->fb_size = adev->gmc.real_vram_size >> 20;
622 vf2pf_info->fb_vis_size = adev->gmc.visible_vram_size >> 20;
623
624 amdgpu_virt_populate_vf2pf_ucode_info(adev);
625
626 /* TODO: read dynamic info */
627 vf2pf_info->gfx_usage = 0;
628 vf2pf_info->compute_usage = 0;
629 vf2pf_info->encode_usage = 0;
630 vf2pf_info->decode_usage = 0;
631
632 vf2pf_info->dummy_page_addr = (uint64_t)adev->dummy_page_addr;
633 if (amdgpu_sriov_is_mes_info_enable(adev)) {
634 vf2pf_info->mes_info_addr =
635 (uint64_t)(adev->mes.resource_1_gpu_addr[0] + AMDGPU_GPU_PAGE_SIZE);
636 vf2pf_info->mes_info_size =
637 adev->mes.resource_1[0]->tbo.base.size - AMDGPU_GPU_PAGE_SIZE;
638 }
639 vf2pf_info->checksum =
640 amd_sriov_msg_checksum(
641 vf2pf_info, sizeof(*vf2pf_info), 0, 0);
642
643 return 0;
644 }
645
amdgpu_virt_update_vf2pf_work_item(struct work_struct * work)646 static void amdgpu_virt_update_vf2pf_work_item(struct work_struct *work)
647 {
648 struct amdgpu_device *adev = container_of(work, struct amdgpu_device, virt.vf2pf_work.work);
649 int ret;
650
651 ret = amdgpu_virt_read_pf2vf_data(adev);
652 if (ret) {
653 adev->virt.vf2pf_update_retry_cnt++;
654
655 if ((amdgpu_virt_rcvd_ras_interrupt(adev) ||
656 adev->virt.vf2pf_update_retry_cnt >= AMDGPU_VF2PF_UPDATE_MAX_RETRY_LIMIT) &&
657 amdgpu_sriov_runtime(adev)) {
658
659 amdgpu_ras_set_fed(adev, true);
660 if (amdgpu_reset_domain_schedule(adev->reset_domain,
661 &adev->kfd.reset_work))
662 return;
663 else
664 dev_err(adev->dev, "Failed to queue work! at %s", __func__);
665 }
666
667 goto out;
668 }
669
670 adev->virt.vf2pf_update_retry_cnt = 0;
671 amdgpu_virt_write_vf2pf_data(adev);
672
673 out:
674 schedule_delayed_work(&(adev->virt.vf2pf_work), adev->virt.vf2pf_update_interval_ms);
675 }
676
amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device * adev,uint32_t * pfvf_data)677 static int amdgpu_virt_read_exchange_data_from_mem(struct amdgpu_device *adev, uint32_t *pfvf_data)
678 {
679 uint32_t dataexchange_offset =
680 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset;
681 uint32_t dataexchange_size =
682 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10;
683 uint64_t pos = 0;
684
685 dev_info(adev->dev,
686 "Got data exchange info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
687 dataexchange_offset, dataexchange_size);
688
689 if (!IS_ALIGNED(dataexchange_offset, 4) || !IS_ALIGNED(dataexchange_size, 4)) {
690 dev_err(adev->dev, "Data exchange data not aligned to 4 bytes\n");
691 return -EINVAL;
692 }
693
694 pos = (uint64_t)dataexchange_offset;
695 amdgpu_device_vram_access(adev, pos, pfvf_data,
696 dataexchange_size, false);
697
698 return 0;
699 }
700
amdgpu_virt_fini_data_exchange(struct amdgpu_device * adev)701 void amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev)
702 {
703 if (adev->virt.vf2pf_update_interval_ms != 0) {
704 dev_info(adev->dev, "clean up the vf2pf work item\n");
705 cancel_delayed_work_sync(&adev->virt.vf2pf_work);
706 adev->virt.vf2pf_update_interval_ms = 0;
707 }
708 }
709
amdgpu_virt_init_data_exchange(struct amdgpu_device * adev)710 void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
711 {
712 uint32_t *pfvf_data = NULL;
713
714 adev->virt.fw_reserve.p_pf2vf = NULL;
715 adev->virt.fw_reserve.p_vf2pf = NULL;
716 adev->virt.vf2pf_update_interval_ms = 0;
717 adev->virt.vf2pf_update_retry_cnt = 0;
718
719 if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
720 dev_warn(adev->dev, "Currently fw_vram and drv_vram should not have values at the same time!");
721 } else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
722 /* go through this logic in ip_init and reset to init workqueue*/
723 amdgpu_virt_exchange_data(adev);
724
725 INIT_DELAYED_WORK(&adev->virt.vf2pf_work, amdgpu_virt_update_vf2pf_work_item);
726 schedule_delayed_work(&(adev->virt.vf2pf_work), msecs_to_jiffies(adev->virt.vf2pf_update_interval_ms));
727 } else if (adev->bios != NULL) {
728 /* got through this logic in early init stage to get necessary flags, e.g. rlcg_acc related*/
729 if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
730 pfvf_data =
731 kzalloc(adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb << 10,
732 GFP_KERNEL);
733 if (!pfvf_data) {
734 dev_err(adev->dev, "Failed to allocate memory for pfvf_data\n");
735 return;
736 }
737
738 if (amdgpu_virt_read_exchange_data_from_mem(adev, pfvf_data))
739 goto free_pfvf_data;
740
741 adev->virt.fw_reserve.p_pf2vf =
742 (struct amd_sriov_msg_pf2vf_info_header *)pfvf_data;
743
744 amdgpu_virt_read_pf2vf_data(adev);
745
746 free_pfvf_data:
747 kfree(pfvf_data);
748 pfvf_data = NULL;
749 adev->virt.fw_reserve.p_pf2vf = NULL;
750 } else {
751 adev->virt.fw_reserve.p_pf2vf =
752 (struct amd_sriov_msg_pf2vf_info_header *)
753 (adev->bios + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
754
755 amdgpu_virt_read_pf2vf_data(adev);
756 }
757 }
758 }
759
760
amdgpu_virt_exchange_data(struct amdgpu_device * adev)761 void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
762 {
763 uint64_t bp_block_offset = 0;
764 uint32_t bp_block_size = 0;
765 struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
766
767 if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
768 if (adev->mman.fw_vram_usage_va) {
769 if (adev->virt.req_init_data_ver == GPU_CRIT_REGION_V2) {
770 adev->virt.fw_reserve.p_pf2vf =
771 (struct amd_sriov_msg_pf2vf_info_header *)
772 (adev->mman.fw_vram_usage_va +
773 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset);
774 adev->virt.fw_reserve.p_vf2pf =
775 (struct amd_sriov_msg_vf2pf_info_header *)
776 (adev->mman.fw_vram_usage_va +
777 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset +
778 (AMD_SRIOV_MSG_SIZE_KB << 10));
779 adev->virt.fw_reserve.ras_telemetry =
780 (adev->mman.fw_vram_usage_va +
781 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset);
782 } else {
783 adev->virt.fw_reserve.p_pf2vf =
784 (struct amd_sriov_msg_pf2vf_info_header *)
785 (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
786 adev->virt.fw_reserve.p_vf2pf =
787 (struct amd_sriov_msg_vf2pf_info_header *)
788 (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
789 adev->virt.fw_reserve.ras_telemetry =
790 (adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
791 }
792 } else if (adev->mman.drv_vram_usage_va) {
793 adev->virt.fw_reserve.p_pf2vf =
794 (struct amd_sriov_msg_pf2vf_info_header *)
795 (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB_V1 << 10));
796 adev->virt.fw_reserve.p_vf2pf =
797 (struct amd_sriov_msg_vf2pf_info_header *)
798 (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB_V1 << 10));
799 adev->virt.fw_reserve.ras_telemetry =
800 (adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_RAS_TELEMETRY_OFFSET_KB_V1 << 10));
801 }
802
803 amdgpu_virt_read_pf2vf_data(adev);
804 amdgpu_virt_write_vf2pf_data(adev);
805
806 /* bad page handling for version 2 */
807 if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
808 pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
809
810 bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
811 ((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
812 bp_block_size = pf2vf_v2->bp_block_size;
813
814 if (bp_block_size && !adev->virt.ras_init_done)
815 amdgpu_virt_init_ras_err_handler_data(adev);
816
817 if (adev->virt.ras_init_done)
818 amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
819 }
820 }
821 }
822
amdgpu_virt_init_detect_asic(struct amdgpu_device * adev)823 static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
824 {
825 uint32_t reg;
826
827 switch (adev->asic_type) {
828 case CHIP_TONGA:
829 case CHIP_FIJI:
830 reg = RREG32(mmBIF_IOV_FUNC_IDENTIFIER);
831 break;
832 case CHIP_VEGA10:
833 case CHIP_VEGA20:
834 case CHIP_NAVI10:
835 case CHIP_NAVI12:
836 case CHIP_SIENNA_CICHLID:
837 case CHIP_ARCTURUS:
838 case CHIP_ALDEBARAN:
839 case CHIP_IP_DISCOVERY:
840 reg = RREG32(mmRCC_IOV_FUNC_IDENTIFIER);
841 break;
842 default: /* other chip doesn't support SRIOV */
843 reg = 0;
844 break;
845 }
846
847 if (reg & 1)
848 adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF;
849
850 if (reg & 0x80000000)
851 adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV;
852
853 if (!reg) {
854 /* passthrough mode exclus sriov mod */
855 if (is_virtual_machine() && !xen_initial_domain())
856 adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
857 }
858
859 return reg;
860 }
861
amdgpu_virt_init_req_data(struct amdgpu_device * adev,u32 reg)862 static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32 reg)
863 {
864 bool is_sriov = false;
865
866 /* we have the ability to check now */
867 if (amdgpu_sriov_vf(adev)) {
868 is_sriov = true;
869
870 switch (adev->asic_type) {
871 case CHIP_TONGA:
872 case CHIP_FIJI:
873 vi_set_virt_ops(adev);
874 break;
875 case CHIP_VEGA10:
876 soc15_set_virt_ops(adev);
877 #ifdef CONFIG_X86
878 /* not send GPU_INIT_DATA with MS_HYPERV*/
879 if (!hypervisor_is_type(X86_HYPER_MS_HYPERV))
880 #endif
881 /* send a dummy GPU_INIT_DATA request to host on vega10 */
882 amdgpu_virt_request_init_data(adev);
883 break;
884 case CHIP_VEGA20:
885 case CHIP_ARCTURUS:
886 case CHIP_ALDEBARAN:
887 soc15_set_virt_ops(adev);
888 break;
889 case CHIP_NAVI10:
890 case CHIP_NAVI12:
891 case CHIP_SIENNA_CICHLID:
892 case CHIP_IP_DISCOVERY:
893 nv_set_virt_ops(adev);
894 /* try send GPU_INIT_DATA request to host */
895 amdgpu_virt_request_init_data(adev);
896 break;
897 default: /* other chip doesn't support SRIOV */
898 is_sriov = false;
899 dev_err(adev->dev, "Unknown asic type: %d!\n", adev->asic_type);
900 break;
901 }
902 }
903
904 return is_sriov;
905 }
906
amdgpu_virt_init_ras(struct amdgpu_device * adev)907 static void amdgpu_virt_init_ras(struct amdgpu_device *adev)
908 {
909 ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
910 ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
911 ratelimit_state_init(&adev->virt.ras.ras_chk_criti_rs, 5 * HZ, 1);
912
913 ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
914 RATELIMIT_MSG_ON_RELEASE);
915 ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
916 RATELIMIT_MSG_ON_RELEASE);
917 ratelimit_set_flags(&adev->virt.ras.ras_chk_criti_rs,
918 RATELIMIT_MSG_ON_RELEASE);
919
920 mutex_init(&adev->virt.ras.ras_telemetry_mutex);
921 mutex_init(&adev->virt.access_req_mutex);
922
923 adev->virt.ras.cper_rptr = 0;
924 }
925
amdgpu_virt_crit_region_calc_checksum(uint8_t * buf_start,uint8_t * buf_end)926 static uint8_t amdgpu_virt_crit_region_calc_checksum(uint8_t *buf_start, uint8_t *buf_end)
927 {
928 uint32_t sum = 0;
929
930 if (buf_start >= buf_end)
931 return 0;
932
933 for (; buf_start < buf_end; buf_start++)
934 sum += buf_start[0];
935
936 return 0xffffffff - sum;
937 }
938
amdgpu_virt_init_critical_region(struct amdgpu_device * adev)939 int amdgpu_virt_init_critical_region(struct amdgpu_device *adev)
940 {
941 struct amd_sriov_msg_init_data_header *init_data_hdr = NULL;
942 u64 init_hdr_offset = adev->virt.init_data_header.offset;
943 u64 init_hdr_size = (u64)adev->virt.init_data_header.size_kb << 10; /* KB → bytes */
944 u64 vram_size;
945 u64 end;
946 int r = 0;
947 uint8_t checksum = 0;
948
949 /* Skip below init if critical region version != v2 */
950 if (adev->virt.req_init_data_ver != GPU_CRIT_REGION_V2)
951 return 0;
952
953 if (init_hdr_offset < 0) {
954 dev_err(adev->dev, "Invalid init header offset\n");
955 return -EINVAL;
956 }
957
958 vram_size = RREG32(mmRCC_CONFIG_MEMSIZE);
959 if (!vram_size || vram_size == U32_MAX)
960 return -EINVAL;
961 vram_size <<= 20;
962
963 if (check_add_overflow(init_hdr_offset, init_hdr_size, &end) || end > vram_size) {
964 dev_err(adev->dev, "init_data_header exceeds VRAM size, exiting\n");
965 return -EINVAL;
966 }
967
968 /* Allocate for init_data_hdr */
969 init_data_hdr = kzalloc_obj(struct amd_sriov_msg_init_data_header);
970 if (!init_data_hdr)
971 return -ENOMEM;
972
973 amdgpu_device_vram_access(adev, (uint64_t)init_hdr_offset, (uint32_t *)init_data_hdr,
974 sizeof(struct amd_sriov_msg_init_data_header), false);
975
976 /* Table validation */
977 if (strncmp(init_data_hdr->signature,
978 AMDGPU_SRIOV_CRIT_DATA_SIGNATURE,
979 AMDGPU_SRIOV_CRIT_DATA_SIG_LEN) != 0) {
980 dev_err(adev->dev, "Invalid init data signature: %.4s\n",
981 init_data_hdr->signature);
982 r = -EINVAL;
983 goto out;
984 }
985
986 checksum = amdgpu_virt_crit_region_calc_checksum(
987 (uint8_t *)&init_data_hdr->initdata_offset,
988 (uint8_t *)init_data_hdr +
989 sizeof(struct amd_sriov_msg_init_data_header));
990 if (checksum != init_data_hdr->checksum) {
991 dev_err(adev->dev, "Found unmatching checksum from calculation 0x%x and init_data 0x%x\n",
992 checksum, init_data_hdr->checksum);
993 r = -EINVAL;
994 goto out;
995 }
996
997 memset(&adev->virt.crit_regn, 0, sizeof(adev->virt.crit_regn));
998 memset(adev->virt.crit_regn_tbl, 0, sizeof(adev->virt.crit_regn_tbl));
999
1000 adev->virt.crit_regn.offset = init_data_hdr->initdata_offset;
1001 adev->virt.crit_regn.size_kb = init_data_hdr->initdata_size_in_kb;
1002
1003 /* Validation and initialization for each table entry */
1004 if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_IPD_TABLE_ID)) {
1005 if (!init_data_hdr->ip_discovery_size_in_kb ||
1006 init_data_hdr->ip_discovery_size_in_kb > DISCOVERY_TMR_SIZE) {
1007 dev_err(adev->dev, "Invalid %s size: 0x%x\n",
1008 amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_IPD_TABLE_ID],
1009 init_data_hdr->ip_discovery_size_in_kb);
1010 r = -EINVAL;
1011 goto out;
1012 }
1013
1014 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].offset =
1015 init_data_hdr->ip_discovery_offset;
1016 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb =
1017 init_data_hdr->ip_discovery_size_in_kb;
1018 }
1019
1020 if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID)) {
1021 if (!init_data_hdr->vbios_img_size_in_kb) {
1022 dev_err(adev->dev, "Invalid %s size: 0x%x\n",
1023 amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID],
1024 init_data_hdr->vbios_img_size_in_kb);
1025 r = -EINVAL;
1026 goto out;
1027 }
1028
1029 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].offset =
1030 init_data_hdr->vbios_img_offset;
1031 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_VBIOS_IMG_TABLE_ID].size_kb =
1032 init_data_hdr->vbios_img_size_in_kb;
1033 }
1034
1035 if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID)) {
1036 if (!init_data_hdr->ras_tele_info_size_in_kb) {
1037 dev_err(adev->dev, "Invalid %s size: 0x%x\n",
1038 amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID],
1039 init_data_hdr->ras_tele_info_size_in_kb);
1040 r = -EINVAL;
1041 goto out;
1042 }
1043
1044 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].offset =
1045 init_data_hdr->ras_tele_info_offset;
1046 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_RAS_TELEMETRY_TABLE_ID].size_kb =
1047 init_data_hdr->ras_tele_info_size_in_kb;
1048 }
1049
1050 if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID)) {
1051 if (!init_data_hdr->dataexchange_size_in_kb) {
1052 dev_err(adev->dev, "Invalid %s size: 0x%x\n",
1053 amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID],
1054 init_data_hdr->dataexchange_size_in_kb);
1055 r = -EINVAL;
1056 goto out;
1057 }
1058
1059 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].offset =
1060 init_data_hdr->dataexchange_offset;
1061 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_DATAEXCHANGE_TABLE_ID].size_kb =
1062 init_data_hdr->dataexchange_size_in_kb;
1063 }
1064
1065 if (IS_SRIOV_CRIT_REGN_ENTRY_VALID(init_data_hdr, AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID)) {
1066 if (!init_data_hdr->bad_page_size_in_kb) {
1067 dev_err(adev->dev, "Invalid %s size: 0x%x\n",
1068 amdgpu_virt_dynamic_crit_table_name[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID],
1069 init_data_hdr->bad_page_size_in_kb);
1070 r = -EINVAL;
1071 goto out;
1072 }
1073
1074 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].offset =
1075 init_data_hdr->bad_page_info_offset;
1076 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_BAD_PAGE_INFO_TABLE_ID].size_kb =
1077 init_data_hdr->bad_page_size_in_kb;
1078 }
1079
1080 /* Validation for critical region info */
1081 if (adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb > DISCOVERY_TMR_SIZE) {
1082 dev_err(adev->dev, "Invalid IP discovery size: 0x%x\n",
1083 adev->virt.crit_regn_tbl[AMD_SRIOV_MSG_IPD_TABLE_ID].size_kb);
1084 r = -EINVAL;
1085 goto out;
1086 }
1087
1088 /* reserved memory starts from crit region base offset with the size of 5MB */
1089 adev->mman.fw_vram_usage_start_offset = adev->virt.crit_regn.offset;
1090 adev->mman.fw_vram_usage_size = adev->virt.crit_regn.size_kb << 10;
1091 dev_info(adev->dev,
1092 "critical region v%d requested to reserve memory start at %08llx with %llu KB.\n",
1093 init_data_hdr->version,
1094 adev->mman.fw_vram_usage_start_offset,
1095 adev->mman.fw_vram_usage_size >> 10);
1096
1097 adev->virt.is_dynamic_crit_regn_enabled = true;
1098
1099 out:
1100 kfree(init_data_hdr);
1101 init_data_hdr = NULL;
1102
1103 return r;
1104 }
1105
amdgpu_virt_get_dynamic_data_info(struct amdgpu_device * adev,int data_id,uint8_t * binary,u32 * size)1106 int amdgpu_virt_get_dynamic_data_info(struct amdgpu_device *adev,
1107 int data_id, uint8_t *binary, u32 *size)
1108 {
1109 uint32_t data_offset = 0;
1110 uint32_t data_size = 0;
1111 enum amd_sriov_msg_table_id_enum data_table_id = data_id;
1112
1113 if (data_table_id >= AMD_SRIOV_MSG_MAX_TABLE_ID)
1114 return -EINVAL;
1115
1116 data_offset = adev->virt.crit_regn_tbl[data_table_id].offset;
1117 data_size = adev->virt.crit_regn_tbl[data_table_id].size_kb << 10;
1118
1119 /* Validate on input params */
1120 if (!binary || !size || *size < (uint64_t)data_size)
1121 return -EINVAL;
1122
1123 /* Proceed to copy the dynamic content */
1124 amdgpu_device_vram_access(adev,
1125 (uint64_t)data_offset, (uint32_t *)binary, data_size, false);
1126 *size = (uint64_t)data_size;
1127
1128 dev_dbg(adev->dev,
1129 "Got %s info from dynamic crit_region_table at offset 0x%x with size of 0x%x bytes.\n",
1130 amdgpu_virt_dynamic_crit_table_name[data_id], data_offset, data_size);
1131
1132 return 0;
1133 }
1134
amdgpu_virt_init(struct amdgpu_device * adev)1135 void amdgpu_virt_init(struct amdgpu_device *adev)
1136 {
1137 bool is_sriov = false;
1138 uint32_t reg = amdgpu_virt_init_detect_asic(adev);
1139
1140 is_sriov = amdgpu_virt_init_req_data(adev, reg);
1141
1142 if (is_sriov)
1143 amdgpu_virt_init_ras(adev);
1144 }
1145
amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device * adev)1146 static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
1147 {
1148 return amdgpu_sriov_is_debug(adev) ? true : false;
1149 }
1150
amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device * adev)1151 static bool amdgpu_virt_access_debugfs_is_kiq(struct amdgpu_device *adev)
1152 {
1153 return amdgpu_sriov_is_normal(adev) ? true : false;
1154 }
1155
amdgpu_virt_enable_access_debugfs(struct amdgpu_device * adev)1156 int amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
1157 {
1158 if (!amdgpu_sriov_vf(adev) ||
1159 amdgpu_virt_access_debugfs_is_kiq(adev))
1160 return 0;
1161
1162 if (amdgpu_virt_access_debugfs_is_mmio(adev))
1163 adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
1164 else
1165 return -EPERM;
1166
1167 return 0;
1168 }
1169
amdgpu_virt_disable_access_debugfs(struct amdgpu_device * adev)1170 void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
1171 {
1172 if (amdgpu_sriov_vf(adev))
1173 adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
1174 }
1175
amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device * adev)1176 enum amdgpu_sriov_vf_mode amdgpu_virt_get_sriov_vf_mode(struct amdgpu_device *adev)
1177 {
1178 enum amdgpu_sriov_vf_mode mode;
1179
1180 if (amdgpu_sriov_vf(adev)) {
1181 if (amdgpu_sriov_is_pp_one_vf(adev))
1182 mode = SRIOV_VF_MODE_ONE_VF;
1183 else
1184 mode = SRIOV_VF_MODE_MULTI_VF;
1185 } else {
1186 mode = SRIOV_VF_MODE_BARE_METAL;
1187 }
1188
1189 return mode;
1190 }
1191
amdgpu_virt_pre_reset(struct amdgpu_device * adev)1192 void amdgpu_virt_pre_reset(struct amdgpu_device *adev)
1193 {
1194 /* stop the data exchange thread */
1195 amdgpu_virt_fini_data_exchange(adev);
1196 amdgpu_dpm_set_mp1_state(adev, PP_MP1_STATE_FLR);
1197 }
1198
amdgpu_virt_post_reset(struct amdgpu_device * adev)1199 void amdgpu_virt_post_reset(struct amdgpu_device *adev)
1200 {
1201 if (amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(11, 0, 3)) {
1202 /* force set to GFXOFF state after reset,
1203 * to avoid some invalid operation before GC enable
1204 */
1205 adev->gfx.is_poweron = false;
1206 }
1207
1208 adev->mes.ring[0].sched.ready = false;
1209 }
1210
amdgpu_virt_fw_load_skip_check(struct amdgpu_device * adev,uint32_t ucode_id)1211 bool amdgpu_virt_fw_load_skip_check(struct amdgpu_device *adev, uint32_t ucode_id)
1212 {
1213 switch (amdgpu_ip_version(adev, MP0_HWIP, 0)) {
1214 case IP_VERSION(13, 0, 0):
1215 /* no vf autoload, white list */
1216 if (ucode_id == AMDGPU_UCODE_ID_VCN1 ||
1217 ucode_id == AMDGPU_UCODE_ID_VCN)
1218 return false;
1219 else
1220 return true;
1221 case IP_VERSION(11, 0, 9):
1222 case IP_VERSION(11, 0, 7):
1223 /* black list for CHIP_NAVI12 and CHIP_SIENNA_CICHLID */
1224 if (ucode_id == AMDGPU_UCODE_ID_RLC_G
1225 || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
1226 || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
1227 || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
1228 || ucode_id == AMDGPU_UCODE_ID_SMC)
1229 return true;
1230 else
1231 return false;
1232 case IP_VERSION(13, 0, 10):
1233 /* white list */
1234 if (ucode_id == AMDGPU_UCODE_ID_CAP
1235 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP
1236 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME
1237 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC
1238 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P0_STACK
1239 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_PFP_P1_STACK
1240 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P0_STACK
1241 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_ME_P1_STACK
1242 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P0_STACK
1243 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P1_STACK
1244 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P2_STACK
1245 || ucode_id == AMDGPU_UCODE_ID_CP_RS64_MEC_P3_STACK
1246 || ucode_id == AMDGPU_UCODE_ID_CP_MES
1247 || ucode_id == AMDGPU_UCODE_ID_CP_MES_DATA
1248 || ucode_id == AMDGPU_UCODE_ID_CP_MES1
1249 || ucode_id == AMDGPU_UCODE_ID_CP_MES1_DATA
1250 || ucode_id == AMDGPU_UCODE_ID_VCN1
1251 || ucode_id == AMDGPU_UCODE_ID_VCN)
1252 return false;
1253 else
1254 return true;
1255 default:
1256 /* lagacy black list */
1257 if (ucode_id == AMDGPU_UCODE_ID_SDMA0
1258 || ucode_id == AMDGPU_UCODE_ID_SDMA1
1259 || ucode_id == AMDGPU_UCODE_ID_SDMA2
1260 || ucode_id == AMDGPU_UCODE_ID_SDMA3
1261 || ucode_id == AMDGPU_UCODE_ID_SDMA4
1262 || ucode_id == AMDGPU_UCODE_ID_SDMA5
1263 || ucode_id == AMDGPU_UCODE_ID_SDMA6
1264 || ucode_id == AMDGPU_UCODE_ID_SDMA7
1265 || ucode_id == AMDGPU_UCODE_ID_SDMA_RS64
1266 || ucode_id == AMDGPU_UCODE_ID_RLC_G
1267 || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL
1268 || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM
1269 || ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM
1270 || ucode_id == AMDGPU_UCODE_ID_SMC)
1271 return true;
1272 else
1273 return false;
1274 }
1275 }
1276
amdgpu_virt_update_sriov_video_codec(struct amdgpu_device * adev,struct amdgpu_video_codec_info * encode,uint32_t encode_array_size,struct amdgpu_video_codec_info * decode,uint32_t decode_array_size)1277 void amdgpu_virt_update_sriov_video_codec(struct amdgpu_device *adev,
1278 struct amdgpu_video_codec_info *encode, uint32_t encode_array_size,
1279 struct amdgpu_video_codec_info *decode, uint32_t decode_array_size)
1280 {
1281 uint32_t i;
1282
1283 if (!adev->virt.is_mm_bw_enabled)
1284 return;
1285
1286 if (encode) {
1287 for (i = 0; i < encode_array_size; i++) {
1288 encode[i].max_width = adev->virt.encode_max_dimension_pixels;
1289 encode[i].max_pixels_per_frame = adev->virt.encode_max_frame_pixels;
1290 if (encode[i].max_width > 0)
1291 encode[i].max_height = encode[i].max_pixels_per_frame / encode[i].max_width;
1292 else
1293 encode[i].max_height = 0;
1294 }
1295 }
1296
1297 if (decode) {
1298 for (i = 0; i < decode_array_size; i++) {
1299 decode[i].max_width = adev->virt.decode_max_dimension_pixels;
1300 decode[i].max_pixels_per_frame = adev->virt.decode_max_frame_pixels;
1301 if (decode[i].max_width > 0)
1302 decode[i].max_height = decode[i].max_pixels_per_frame / decode[i].max_width;
1303 else
1304 decode[i].max_height = 0;
1305 }
1306 }
1307 }
1308
amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device * adev,u32 acc_flags,u32 hwip,bool write,u32 * rlcg_flag)1309 bool amdgpu_virt_get_rlcg_reg_access_flag(struct amdgpu_device *adev,
1310 u32 acc_flags, u32 hwip,
1311 bool write, u32 *rlcg_flag)
1312 {
1313 bool ret = false;
1314
1315 switch (hwip) {
1316 case GC_HWIP:
1317 if (amdgpu_sriov_reg_indirect_gc(adev)) {
1318 *rlcg_flag =
1319 write ? AMDGPU_RLCG_GC_WRITE : AMDGPU_RLCG_GC_READ;
1320 ret = true;
1321 /* only in new version, AMDGPU_REGS_NO_KIQ and
1322 * AMDGPU_REGS_RLC are enabled simultaneously */
1323 } else if ((acc_flags & AMDGPU_REGS_RLC) &&
1324 !(acc_flags & AMDGPU_REGS_NO_KIQ) && write) {
1325 *rlcg_flag = AMDGPU_RLCG_GC_WRITE_LEGACY;
1326 ret = true;
1327 }
1328 break;
1329 case MMHUB_HWIP:
1330 if (amdgpu_sriov_reg_indirect_mmhub(adev) &&
1331 (acc_flags & AMDGPU_REGS_RLC) && write) {
1332 *rlcg_flag = AMDGPU_RLCG_MMHUB_WRITE;
1333 ret = true;
1334 }
1335 break;
1336 default:
1337 break;
1338 }
1339 return ret;
1340 }
1341
amdgpu_virt_rlcg_vfi_reg_rw(struct amdgpu_device * adev,u32 offset,u32 v,u32 flag,u32 xcc_id)1342 static u32 amdgpu_virt_rlcg_vfi_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
1343 {
1344 uint32_t timeout = 100;
1345 uint32_t i;
1346
1347 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1348 void *vfi_cmd;
1349 void *vfi_stat;
1350 void *vfi_addr;
1351 void *vfi_data;
1352 void *vfi_grbm_cntl;
1353 void *vfi_grbm_idx;
1354 uint32_t cmd;
1355 uint32_t stat;
1356 uint32_t addr = offset;
1357 uint32_t data;
1358 uint32_t grbm_cntl_data;
1359 uint32_t grbm_idx_data;
1360
1361 unsigned long flags;
1362 bool is_err = true;
1363
1364 if (!adev->gfx.rlc.rlcg_reg_access_supported) {
1365 dev_err(adev->dev, "VFi interface is not available\n");
1366 return 0;
1367 }
1368
1369 if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
1370 dev_err(adev->dev, "VFi invalid XCC, xcc_id=0x%x\n", xcc_id);
1371 return 0;
1372 }
1373
1374 if (amdgpu_device_skip_hw_access(adev))
1375 return 0;
1376
1377 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
1378 vfi_cmd = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_cmd;
1379 vfi_stat = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_stat;
1380 vfi_addr = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_addr;
1381 vfi_data = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_data;
1382 vfi_grbm_cntl = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_grbm_cntl;
1383 vfi_grbm_idx = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->vfi_grbm_idx;
1384 grbm_cntl_data = reg_access_ctrl->vfi_grbm_cntl_data;
1385 grbm_idx_data = reg_access_ctrl->vfi_grbm_idx_data;
1386
1387 if (flag == AMDGPU_RLCG_GC_WRITE) {
1388 data = v;
1389 cmd = AMDGPU_RLCG_VFI_CMD__WR;
1390
1391 // the GRBM_GFX_CNTL and GRBM_GFX_INDEX are protected by mutex outside this call
1392 if (addr == reg_access_ctrl->grbm_cntl) {
1393 reg_access_ctrl->vfi_grbm_cntl_data = data;
1394 return 0;
1395 } else if (addr == reg_access_ctrl->grbm_idx) {
1396 reg_access_ctrl->vfi_grbm_idx_data = data;
1397 return 0;
1398 }
1399
1400 } else if (flag == AMDGPU_RLCG_GC_READ) {
1401 data = 0;
1402 cmd = AMDGPU_RLCG_VFI_CMD__RD;
1403
1404 // the GRBM_GFX_CNTL and GRBM_GFX_INDEX are protected by mutex outside this call
1405 if (addr == reg_access_ctrl->grbm_cntl)
1406 return grbm_cntl_data;
1407 else if (addr == reg_access_ctrl->grbm_idx)
1408 return grbm_idx_data;
1409
1410 } else {
1411 dev_err(adev->dev, "VFi invalid access, flag=0x%x\n", flag);
1412 return 0;
1413 }
1414
1415 spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
1416
1417 writel(addr, vfi_addr);
1418 writel(data, vfi_data);
1419 writel(grbm_cntl_data, vfi_grbm_cntl);
1420 writel(grbm_idx_data, vfi_grbm_idx);
1421
1422 writel(AMDGPU_RLCG_VFI_STAT__BUSY, vfi_stat);
1423 writel(cmd, vfi_cmd);
1424
1425 for (i = 0; i < timeout; i++) {
1426 stat = readl(vfi_stat);
1427 if (stat != AMDGPU_RLCG_VFI_STAT__BUSY)
1428 break;
1429 udelay(10);
1430 }
1431
1432 switch (stat) {
1433 case AMDGPU_RLCG_VFI_STAT__DONE:
1434 is_err = false;
1435 if (cmd == AMDGPU_RLCG_VFI_CMD__RD)
1436 data = readl(vfi_data);
1437 break;
1438 case AMDGPU_RLCG_VFI_STAT__BUSY:
1439 dev_err(adev->dev, "VFi access timeout\n");
1440 break;
1441 case AMDGPU_RLCG_VFI_STAT__INV_CMD:
1442 dev_err(adev->dev, "VFi invalid command\n");
1443 break;
1444 case AMDGPU_RLCG_VFI_STAT__INV_ADDR:
1445 dev_err(adev->dev, "VFi invalid address\n");
1446 break;
1447 case AMDGPU_RLCG_VFI_STAT__ERR:
1448 dev_err(adev->dev, "VFi unknown error\n");
1449 break;
1450 default:
1451 dev_err(adev->dev, "VFi unknown status code\n");
1452 break;
1453 }
1454
1455 spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
1456
1457 if (is_err)
1458 dev_err(adev->dev, "VFi: [grbm_cntl=0x%x grbm_idx=0x%x] addr=0x%x (byte addr 0x%x), data=0x%x, cmd=0x%x\n",
1459 grbm_cntl_data, grbm_idx_data,
1460 addr, addr * 4, data, cmd);
1461 else
1462 dev_dbg(adev->dev, "VFi: [grbm_cntl=0x%x grbm_idx=0x%x] addr=0x%x (byte addr 0x%x), data=0x%x, cmd=0x%x\n",
1463 grbm_cntl_data, grbm_idx_data,
1464 addr, addr * 4, data, cmd);
1465
1466 return data;
1467 }
1468
amdgpu_virt_rlcg_reg_rw(struct amdgpu_device * adev,u32 offset,u32 v,u32 flag,u32 xcc_id)1469 u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset, u32 v, u32 flag, u32 xcc_id)
1470 {
1471 struct amdgpu_rlcg_reg_access_ctrl *reg_access_ctrl;
1472 uint32_t timeout = 50000;
1473 uint32_t i, tmp;
1474 uint32_t ret = 0;
1475 void *scratch_reg0;
1476 void *scratch_reg1;
1477 void *scratch_reg2;
1478 void *scratch_reg3;
1479 void *spare_int;
1480 unsigned long flags;
1481
1482 if (amdgpu_ip_version(adev, GC_HWIP, 0) >= IP_VERSION(12, 1, 0))
1483 return amdgpu_virt_rlcg_vfi_reg_rw(adev, offset, v, flag, xcc_id);
1484
1485 if (!adev->gfx.rlc.rlcg_reg_access_supported) {
1486 dev_err(adev->dev,
1487 "indirect registers access through rlcg is not available\n");
1488 return 0;
1489 }
1490
1491 if (adev->gfx.xcc_mask && (((1 << xcc_id) & adev->gfx.xcc_mask) == 0)) {
1492 dev_err(adev->dev, "invalid xcc\n");
1493 return 0;
1494 }
1495
1496 if (amdgpu_device_skip_hw_access(adev))
1497 return 0;
1498
1499 reg_access_ctrl = &adev->gfx.rlc.reg_access_ctrl[xcc_id];
1500 scratch_reg0 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg0;
1501 scratch_reg1 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg1;
1502 scratch_reg2 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg2;
1503 scratch_reg3 = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->scratch_reg3;
1504
1505 spin_lock_irqsave(&adev->virt.rlcg_reg_lock, flags);
1506
1507 if (reg_access_ctrl->spare_int)
1508 spare_int = (void __iomem *)adev->rmmio + 4 * reg_access_ctrl->spare_int;
1509
1510 if (offset == reg_access_ctrl->grbm_cntl) {
1511 /* if the target reg offset is grbm_cntl, write to scratch_reg2 */
1512 writel(v, scratch_reg2);
1513 if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
1514 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
1515 } else if (offset == reg_access_ctrl->grbm_idx) {
1516 /* if the target reg offset is grbm_idx, write to scratch_reg3 */
1517 writel(v, scratch_reg3);
1518 if (flag == AMDGPU_RLCG_GC_WRITE_LEGACY)
1519 writel(v, ((void __iomem *)adev->rmmio) + (offset * 4));
1520 } else {
1521 /*
1522 * SCRATCH_REG0 = read/write value
1523 * SCRATCH_REG1[30:28] = command
1524 * SCRATCH_REG1[19:0] = address in dword
1525 * SCRATCH_REG1[27:24] = Error reporting
1526 */
1527 writel(v, scratch_reg0);
1528 writel((offset | flag), scratch_reg1);
1529 if (reg_access_ctrl->spare_int)
1530 writel(1, spare_int);
1531
1532 for (i = 0; i < timeout; i++) {
1533 tmp = readl(scratch_reg1);
1534 if (!(tmp & AMDGPU_RLCG_SCRATCH1_ADDRESS_MASK))
1535 break;
1536 udelay(10);
1537 }
1538
1539 tmp = readl(scratch_reg1);
1540 if (i >= timeout || (tmp & AMDGPU_RLCG_SCRATCH1_ERROR_MASK) != 0) {
1541 if (amdgpu_sriov_rlcg_error_report_enabled(adev)) {
1542 if (tmp & AMDGPU_RLCG_VFGATE_DISABLED) {
1543 dev_err(adev->dev,
1544 "vfgate is disabled, rlcg failed to program reg: 0x%05x\n", offset);
1545 } else if (tmp & AMDGPU_RLCG_WRONG_OPERATION_TYPE) {
1546 dev_err(adev->dev,
1547 "wrong operation type, rlcg failed to program reg: 0x%05x\n", offset);
1548 } else if (tmp & AMDGPU_RLCG_REG_NOT_IN_RANGE) {
1549 dev_err(adev->dev,
1550 "register is not in range, rlcg failed to program reg: 0x%05x\n", offset);
1551 } else {
1552 dev_err(adev->dev,
1553 "unknown error type, rlcg failed to program reg: 0x%05x\n", offset);
1554 }
1555 } else {
1556 dev_err(adev->dev,
1557 "timeout: rlcg faled to program reg: 0x%05x\n", offset);
1558 }
1559 }
1560 }
1561
1562 ret = readl(scratch_reg0);
1563
1564 spin_unlock_irqrestore(&adev->virt.rlcg_reg_lock, flags);
1565
1566 return ret;
1567 }
1568
amdgpu_sriov_wreg(struct amdgpu_device * adev,u32 offset,u32 value,u32 acc_flags,u32 hwip,u32 xcc_id)1569 void amdgpu_sriov_wreg(struct amdgpu_device *adev,
1570 u32 offset, u32 value,
1571 u32 acc_flags, u32 hwip, u32 xcc_id)
1572 {
1573 u32 rlcg_flag;
1574
1575 if (amdgpu_device_skip_hw_access(adev))
1576 return;
1577
1578 if (!amdgpu_sriov_runtime(adev) &&
1579 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, true, &rlcg_flag)) {
1580 amdgpu_virt_rlcg_reg_rw(adev, offset, value, rlcg_flag, xcc_id);
1581 return;
1582 }
1583
1584 if (acc_flags & AMDGPU_REGS_NO_KIQ)
1585 WREG32_NO_KIQ(offset, value);
1586 else
1587 WREG32(offset, value);
1588 }
1589
amdgpu_sriov_rreg(struct amdgpu_device * adev,u32 offset,u32 acc_flags,u32 hwip,u32 xcc_id)1590 u32 amdgpu_sriov_rreg(struct amdgpu_device *adev,
1591 u32 offset, u32 acc_flags, u32 hwip, u32 xcc_id)
1592 {
1593 u32 rlcg_flag;
1594
1595 if (amdgpu_device_skip_hw_access(adev))
1596 return 0;
1597
1598 if (!amdgpu_sriov_runtime(adev) &&
1599 amdgpu_virt_get_rlcg_reg_access_flag(adev, acc_flags, hwip, false, &rlcg_flag))
1600 return amdgpu_virt_rlcg_reg_rw(adev, offset, 0, rlcg_flag, xcc_id);
1601
1602 if (acc_flags & AMDGPU_REGS_NO_KIQ)
1603 return RREG32_NO_KIQ(offset);
1604 else
1605 return RREG32(offset);
1606 }
1607
amdgpu_sriov_xnack_support(struct amdgpu_device * adev)1608 bool amdgpu_sriov_xnack_support(struct amdgpu_device *adev)
1609 {
1610 bool xnack_mode = true;
1611
1612 if (amdgpu_sriov_vf(adev) &&
1613 amdgpu_ip_version(adev, GC_HWIP, 0) == IP_VERSION(9, 4, 2))
1614 xnack_mode = false;
1615
1616 return xnack_mode;
1617 }
1618
amdgpu_virt_get_ras_capability(struct amdgpu_device * adev)1619 bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev)
1620 {
1621 struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
1622
1623 if (!amdgpu_sriov_ras_caps_en(adev))
1624 return false;
1625
1626 if (adev->virt.ras_en_caps.bits.block_umc)
1627 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__UMC);
1628 if (adev->virt.ras_en_caps.bits.block_sdma)
1629 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SDMA);
1630 if (adev->virt.ras_en_caps.bits.block_gfx)
1631 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__GFX);
1632 if (adev->virt.ras_en_caps.bits.block_mmhub)
1633 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MMHUB);
1634 if (adev->virt.ras_en_caps.bits.block_athub)
1635 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__ATHUB);
1636 if (adev->virt.ras_en_caps.bits.block_pcie_bif)
1637 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__PCIE_BIF);
1638 if (adev->virt.ras_en_caps.bits.block_hdp)
1639 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__HDP);
1640 if (adev->virt.ras_en_caps.bits.block_xgmi_wafl)
1641 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__XGMI_WAFL);
1642 if (adev->virt.ras_en_caps.bits.block_df)
1643 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__DF);
1644 if (adev->virt.ras_en_caps.bits.block_smn)
1645 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SMN);
1646 if (adev->virt.ras_en_caps.bits.block_sem)
1647 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__SEM);
1648 if (adev->virt.ras_en_caps.bits.block_mp0)
1649 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP0);
1650 if (adev->virt.ras_en_caps.bits.block_mp1)
1651 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MP1);
1652 if (adev->virt.ras_en_caps.bits.block_fuse)
1653 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__FUSE);
1654 if (adev->virt.ras_en_caps.bits.block_mca)
1655 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MCA);
1656 if (adev->virt.ras_en_caps.bits.block_vcn)
1657 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__VCN);
1658 if (adev->virt.ras_en_caps.bits.block_jpeg)
1659 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__JPEG);
1660 if (adev->virt.ras_en_caps.bits.block_ih)
1661 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__IH);
1662 if (adev->virt.ras_en_caps.bits.block_mpio)
1663 adev->ras_hw_enabled |= BIT(AMDGPU_RAS_BLOCK__MPIO);
1664
1665 if (adev->virt.ras_en_caps.bits.poison_propogation_mode)
1666 con->poison_supported = true; /* Poison is handled by host */
1667
1668 if (adev->virt.ras_en_caps.bits.uniras_supported)
1669 amdgpu_virt_ras_set_remote_uniras(adev, true);
1670
1671 return true;
1672 }
1673
1674 static inline enum amd_sriov_ras_telemetry_gpu_block
amdgpu_ras_block_to_sriov(struct amdgpu_device * adev,enum amdgpu_ras_block block)1675 amdgpu_ras_block_to_sriov(struct amdgpu_device *adev, enum amdgpu_ras_block block) {
1676 switch (block) {
1677 case AMDGPU_RAS_BLOCK__UMC:
1678 return RAS_TELEMETRY_GPU_BLOCK_UMC;
1679 case AMDGPU_RAS_BLOCK__SDMA:
1680 return RAS_TELEMETRY_GPU_BLOCK_SDMA;
1681 case AMDGPU_RAS_BLOCK__GFX:
1682 return RAS_TELEMETRY_GPU_BLOCK_GFX;
1683 case AMDGPU_RAS_BLOCK__MMHUB:
1684 return RAS_TELEMETRY_GPU_BLOCK_MMHUB;
1685 case AMDGPU_RAS_BLOCK__ATHUB:
1686 return RAS_TELEMETRY_GPU_BLOCK_ATHUB;
1687 case AMDGPU_RAS_BLOCK__PCIE_BIF:
1688 return RAS_TELEMETRY_GPU_BLOCK_PCIE_BIF;
1689 case AMDGPU_RAS_BLOCK__HDP:
1690 return RAS_TELEMETRY_GPU_BLOCK_HDP;
1691 case AMDGPU_RAS_BLOCK__XGMI_WAFL:
1692 return RAS_TELEMETRY_GPU_BLOCK_XGMI_WAFL;
1693 case AMDGPU_RAS_BLOCK__DF:
1694 return RAS_TELEMETRY_GPU_BLOCK_DF;
1695 case AMDGPU_RAS_BLOCK__SMN:
1696 return RAS_TELEMETRY_GPU_BLOCK_SMN;
1697 case AMDGPU_RAS_BLOCK__SEM:
1698 return RAS_TELEMETRY_GPU_BLOCK_SEM;
1699 case AMDGPU_RAS_BLOCK__MP0:
1700 return RAS_TELEMETRY_GPU_BLOCK_MP0;
1701 case AMDGPU_RAS_BLOCK__MP1:
1702 return RAS_TELEMETRY_GPU_BLOCK_MP1;
1703 case AMDGPU_RAS_BLOCK__FUSE:
1704 return RAS_TELEMETRY_GPU_BLOCK_FUSE;
1705 case AMDGPU_RAS_BLOCK__MCA:
1706 return RAS_TELEMETRY_GPU_BLOCK_MCA;
1707 case AMDGPU_RAS_BLOCK__VCN:
1708 return RAS_TELEMETRY_GPU_BLOCK_VCN;
1709 case AMDGPU_RAS_BLOCK__JPEG:
1710 return RAS_TELEMETRY_GPU_BLOCK_JPEG;
1711 case AMDGPU_RAS_BLOCK__IH:
1712 return RAS_TELEMETRY_GPU_BLOCK_IH;
1713 case AMDGPU_RAS_BLOCK__MPIO:
1714 return RAS_TELEMETRY_GPU_BLOCK_MPIO;
1715 default:
1716 dev_warn(adev->dev, "Unsupported SRIOV RAS telemetry block 0x%x\n",
1717 block);
1718 return RAS_TELEMETRY_GPU_BLOCK_COUNT;
1719 }
1720 }
1721
amdgpu_virt_cache_host_error_counts(struct amdgpu_device * adev,struct amdsriov_ras_telemetry * host_telemetry)1722 static int amdgpu_virt_cache_host_error_counts(struct amdgpu_device *adev,
1723 struct amdsriov_ras_telemetry *host_telemetry)
1724 {
1725 struct amd_sriov_ras_telemetry_error_count *tmp = NULL;
1726 uint32_t checksum, used_size;
1727
1728 checksum = host_telemetry->header.checksum;
1729 used_size = host_telemetry->header.used_size;
1730
1731 if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
1732 return 0;
1733
1734 tmp = kmemdup(&host_telemetry->body.error_count, used_size, GFP_KERNEL);
1735 if (!tmp)
1736 return -ENOMEM;
1737
1738 if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
1739 goto out;
1740
1741 memcpy(&adev->virt.count_cache, tmp,
1742 min(used_size, sizeof(adev->virt.count_cache)));
1743 out:
1744 kfree(tmp);
1745
1746 return 0;
1747 }
1748
amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device * adev,bool force_update)1749 static int amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bool force_update)
1750 {
1751 struct amdgpu_virt *virt = &adev->virt;
1752
1753 if (!virt->ops || !virt->ops->req_ras_err_count)
1754 return -EOPNOTSUPP;
1755
1756 /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
1757 * will ignore incoming guest messages. Ratelimit the guest messages to
1758 * prevent guest self DOS.
1759 */
1760 if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
1761 mutex_lock(&virt->ras.ras_telemetry_mutex);
1762 if (!virt->ops->req_ras_err_count(adev))
1763 amdgpu_virt_cache_host_error_counts(adev,
1764 virt->fw_reserve.ras_telemetry);
1765 mutex_unlock(&virt->ras.ras_telemetry_mutex);
1766 }
1767
1768 return 0;
1769 }
1770
1771 /* Bypass ACA interface and query ECC counts directly from host */
amdgpu_virt_req_ras_err_count(struct amdgpu_device * adev,enum amdgpu_ras_block block,struct ras_err_data * err_data)1772 int amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum amdgpu_ras_block block,
1773 struct ras_err_data *err_data)
1774 {
1775 enum amd_sriov_ras_telemetry_gpu_block sriov_block;
1776
1777 sriov_block = amdgpu_ras_block_to_sriov(adev, block);
1778
1779 if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
1780 !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
1781 return -EOPNOTSUPP;
1782
1783 /* Host Access may be lost during reset, just return last cached data. */
1784 if (down_read_trylock(&adev->reset_domain->sem)) {
1785 amdgpu_virt_req_ras_err_count_internal(adev, false);
1786 up_read(&adev->reset_domain->sem);
1787 }
1788
1789 err_data->ue_count = adev->virt.count_cache.block[sriov_block].ue_count;
1790 err_data->ce_count = adev->virt.count_cache.block[sriov_block].ce_count;
1791 err_data->de_count = adev->virt.count_cache.block[sriov_block].de_count;
1792
1793 return 0;
1794 }
1795
1796 static int
amdgpu_virt_write_cpers_to_ring(struct amdgpu_device * adev,struct amdsriov_ras_telemetry * host_telemetry,u32 * more)1797 amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
1798 struct amdsriov_ras_telemetry *host_telemetry,
1799 u32 *more)
1800 {
1801 struct amd_sriov_ras_cper_dump *cper_dump = NULL;
1802 struct cper_hdr *entry = NULL;
1803 struct amdgpu_ring *ring = &adev->cper.ring_buf;
1804 uint32_t checksum, used_size, i;
1805 int ret = 0;
1806
1807 checksum = host_telemetry->header.checksum;
1808 used_size = host_telemetry->header.used_size;
1809
1810 if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
1811 return -EINVAL;
1812
1813 cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size, GFP_KERNEL);
1814 if (!cper_dump)
1815 return -ENOMEM;
1816
1817 if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0)) {
1818 ret = -EINVAL;
1819 goto out;
1820 }
1821
1822 *more = cper_dump->more;
1823
1824 if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
1825 dev_warn(
1826 adev->dev,
1827 "guest specified rptr that was too high! guest rptr: 0x%llx, host rptr: 0x%llx\n",
1828 adev->virt.ras.cper_rptr, cper_dump->wptr);
1829
1830 adev->virt.ras.cper_rptr = cper_dump->wptr;
1831 goto out;
1832 }
1833
1834 entry = (struct cper_hdr *)&cper_dump->buf[0];
1835
1836 for (i = 0; i < cper_dump->count; i++) {
1837 amdgpu_cper_ring_write(ring, entry, entry->record_length);
1838 entry = (struct cper_hdr *)((char *)entry +
1839 entry->record_length);
1840 }
1841
1842 if (cper_dump->overflow_count)
1843 dev_warn(adev->dev,
1844 "host reported CPER overflow of 0x%llx entries!\n",
1845 cper_dump->overflow_count);
1846
1847 adev->virt.ras.cper_rptr = cper_dump->wptr;
1848 out:
1849 kfree(cper_dump);
1850
1851 return ret;
1852 }
1853
amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device * adev)1854 static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device *adev)
1855 {
1856 struct amdgpu_virt *virt = &adev->virt;
1857 int ret = 0;
1858 uint32_t more = 0;
1859
1860 if (!virt->ops || !virt->ops->req_ras_cper_dump)
1861 return -EOPNOTSUPP;
1862
1863 do {
1864 if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
1865 ret = amdgpu_virt_write_cpers_to_ring(
1866 adev, virt->fw_reserve.ras_telemetry, &more);
1867 else
1868 ret = 0;
1869 } while (more && !ret);
1870
1871 return ret;
1872 }
1873
amdgpu_virt_req_ras_cper_dump(struct amdgpu_device * adev,bool force_update)1874 int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool force_update)
1875 {
1876 struct amdgpu_virt *virt = &adev->virt;
1877 int ret = 0;
1878
1879 if (!amdgpu_sriov_ras_cper_en(adev))
1880 return -EOPNOTSUPP;
1881
1882 if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
1883 down_read_trylock(&adev->reset_domain->sem)) {
1884 mutex_lock(&virt->ras.ras_telemetry_mutex);
1885 ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
1886 mutex_unlock(&virt->ras.ras_telemetry_mutex);
1887 up_read(&adev->reset_domain->sem);
1888 }
1889
1890 return ret;
1891 }
1892
amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device * adev)1893 int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev)
1894 {
1895 unsigned long ue_count, ce_count;
1896
1897 if (amdgpu_sriov_ras_telemetry_en(adev)) {
1898 amdgpu_virt_req_ras_err_count_internal(adev, true);
1899 amdgpu_ras_query_error_count(adev, &ce_count, &ue_count, NULL);
1900 }
1901
1902 return 0;
1903 }
1904
amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device * adev,enum amdgpu_ras_block block)1905 bool amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
1906 enum amdgpu_ras_block block)
1907 {
1908 enum amd_sriov_ras_telemetry_gpu_block sriov_block;
1909
1910 sriov_block = amdgpu_ras_block_to_sriov(adev, block);
1911
1912 if (sriov_block >= RAS_TELEMETRY_GPU_BLOCK_COUNT ||
1913 !amdgpu_sriov_ras_telemetry_block_en(adev, sriov_block))
1914 return false;
1915
1916 return true;
1917 }
1918
1919 /*
1920 * amdgpu_virt_request_bad_pages() - request bad pages
1921 * @adev: amdgpu device.
1922 * Send command to GPU hypervisor to write new bad pages into the shared PF2VF region
1923 */
amdgpu_virt_request_bad_pages(struct amdgpu_device * adev)1924 void amdgpu_virt_request_bad_pages(struct amdgpu_device *adev)
1925 {
1926 struct amdgpu_virt *virt = &adev->virt;
1927
1928 if (virt->ops && virt->ops->req_bad_pages)
1929 virt->ops->req_bad_pages(adev);
1930 }
1931
amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device * adev,struct amdsriov_ras_telemetry * host_telemetry,bool * hit)1932 static int amdgpu_virt_cache_chk_criti_hit(struct amdgpu_device *adev,
1933 struct amdsriov_ras_telemetry *host_telemetry,
1934 bool *hit)
1935 {
1936 struct amd_sriov_ras_chk_criti *tmp = NULL;
1937 uint32_t checksum, used_size;
1938
1939 checksum = host_telemetry->header.checksum;
1940 used_size = host_telemetry->header.used_size;
1941
1942 if (used_size > (AMD_SRIOV_MSG_RAS_TELEMETRY_SIZE_KB_V1 << 10))
1943 return 0;
1944
1945 tmp = kmemdup(&host_telemetry->body.chk_criti, used_size, GFP_KERNEL);
1946 if (!tmp)
1947 return -ENOMEM;
1948
1949 if (checksum != amd_sriov_msg_checksum(tmp, used_size, 0, 0))
1950 goto out;
1951
1952 if (hit)
1953 *hit = tmp->hit ? true : false;
1954
1955 out:
1956 kfree(tmp);
1957
1958 return 0;
1959 }
1960
amdgpu_virt_check_vf_critical_region(struct amdgpu_device * adev,u64 addr,bool * hit)1961 int amdgpu_virt_check_vf_critical_region(struct amdgpu_device *adev, u64 addr, bool *hit)
1962 {
1963 struct amdgpu_virt *virt = &adev->virt;
1964 int r = -EPERM;
1965
1966 if (!virt->ops || !virt->ops->req_ras_chk_criti)
1967 return -EOPNOTSUPP;
1968
1969 /* Host allows 15 ras telemetry requests per 60 seconds. Afterwhich, the Host
1970 * will ignore incoming guest messages. Ratelimit the guest messages to
1971 * prevent guest self DOS.
1972 */
1973 if (__ratelimit(&virt->ras.ras_chk_criti_rs)) {
1974 mutex_lock(&virt->ras.ras_telemetry_mutex);
1975 if (!virt->ops->req_ras_chk_criti(adev, addr))
1976 r = amdgpu_virt_cache_chk_criti_hit(
1977 adev, virt->fw_reserve.ras_telemetry, hit);
1978 mutex_unlock(&virt->ras.ras_telemetry_mutex);
1979 }
1980
1981 return r;
1982 }
1983
req_remote_ras_cmd(struct amdgpu_device * adev,u32 param1,u32 param2,u32 param3)1984 static int req_remote_ras_cmd(struct amdgpu_device *adev,
1985 u32 param1, u32 param2, u32 param3)
1986 {
1987 struct amdgpu_virt *virt = &adev->virt;
1988
1989 if (virt->ops && virt->ops->req_remote_ras_cmd)
1990 return virt->ops->req_remote_ras_cmd(adev, param1, param2, param3);
1991 return -ENOENT;
1992 }
1993
amdgpu_virt_send_remote_ras_cmd(struct amdgpu_device * adev,uint64_t buf,uint32_t buf_len)1994 int amdgpu_virt_send_remote_ras_cmd(struct amdgpu_device *adev,
1995 uint64_t buf, uint32_t buf_len)
1996 {
1997 uint64_t gpa = buf;
1998 int ret = -EIO;
1999
2000 if (down_read_trylock(&adev->reset_domain->sem)) {
2001 ret = req_remote_ras_cmd(adev,
2002 lower_32_bits(gpa), upper_32_bits(gpa), buf_len);
2003 up_read(&adev->reset_domain->sem);
2004 }
2005
2006 return ret;
2007 }
2008