Lines Matching defs:kfd
42 * kfd_locked is used to lock the kfd driver during suspend or reset
43 * once locked, kfd driver will stop any further GPU execution.
61 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
63 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
65 static int kfd_resume(struct kfd_node *kfd);
67 static void kfd_device_info_set_sdma_info(struct kfd_dev *kfd)
69 uint32_t sdma_version = amdgpu_ip_version(kfd->adev, SDMA0_HWIP, 0);
81 kfd->device_info.num_sdma_queues_per_engine = 2;
107 kfd->device_info.num_sdma_queues_per_engine = 8;
113 kfd->device_info.num_sdma_queues_per_engine = 8;
116 bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES);
130 kfd->device_info.num_reserved_sdma_queues_per_engine = 2;
132 bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0,
133 kfd->adev->sdma.num_instances *
134 kfd->device_info.num_reserved_sdma_queues_per_engine);
141 static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
143 uint32_t gc_version = KFD_GC_VERSION(kfd);
154 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
159 kfd->device_info.event_interrupt_class =
175 kfd->device_info.event_interrupt_class = &event_interrupt_class_v10;
186 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
191 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
196 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9;
200 static void kfd_device_info_init(struct kfd_dev *kfd,
203 uint32_t gc_version = KFD_GC_VERSION(kfd);
204 uint32_t asic_type = kfd->adev->asic_type;
206 kfd->device_info.max_pasid_bits = 16;
207 kfd->device_info.max_no_of_hqd = 24;
208 kfd->device_info.num_of_watch_points = 4;
209 kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED;
210 kfd->device_info.gfx_target_version = gfx_target_version;
212 if (KFD_IS_SOC15(kfd)) {
213 kfd->device_info.doorbell_size = 8;
214 kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t);
215 kfd->device_info.supports_cwsr = true;
217 kfd_device_info_set_sdma_info(kfd);
219 kfd_device_info_set_event_interrupt_class(kfd);
224 kfd->device_info.no_atomic_fw_version = 14;
226 kfd->device_info.no_atomic_fw_version = 3;
228 kfd->device_info.no_atomic_fw_version = 92;
230 kfd->device_info.no_atomic_fw_version = 145;
234 kfd->device_info.needs_pci_atomics = true;
241 kfd->device_info.needs_pci_atomics = true;
242 kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
244 kfd->device_info.needs_pci_atomics = true;
245 kfd->device_info.no_atomic_fw_version = 2090;
247 kfd->device_info.needs_pci_atomics = true;
250 kfd->device_info.doorbell_size = 4;
251 kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t);
252 kfd->device_info.event_interrupt_class = &event_interrupt_class_cik;
253 kfd->device_info.num_sdma_queues_per_engine = 2;
258 kfd->device_info.supports_cwsr = true;
261 kfd->device_info.needs_pci_atomics = true;
267 struct kfd_dev *kfd = NULL;
476 "GC IP %06x %s not supported in kfd\n",
480 dev_info(kfd_device, "%s %s not supported in kfd\n",
485 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
486 if (!kfd)
489 kfd->adev = adev;
490 kfd_device_info_init(kfd, vf, gfx_target_version);
491 kfd->init_complete = false;
492 kfd->kfd2kgd = f2g;
493 atomic_set(&kfd->compute_profile, 0);
495 mutex_init(&kfd->doorbell_mutex);
497 ida_init(&kfd->doorbell_ida);
499 return kfd;
502 static void kfd_cwsr_init(struct kfd_dev *kfd)
504 if (cwsr_enable && kfd->device_info.supports_cwsr) {
505 if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
508 kfd->cwsr_isa = cwsr_trap_gfx8_hex;
509 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
510 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
513 kfd->cwsr_isa = cwsr_trap_arcturus_hex;
514 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
515 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
518 kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
519 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
520 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 3) ||
521 KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 4)) {
524 kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex;
525 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex);
526 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 5, 0)) {
528 kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex;
529 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex);
530 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
533 kfd->cwsr_isa = cwsr_trap_gfx9_hex;
534 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
535 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
538 kfd->cwsr_isa = cwsr_trap_nv1x_hex;
539 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
540 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)) {
543 kfd->cwsr_isa = cwsr_trap_gfx10_hex;
544 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
545 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(12, 0, 0)) {
549 kfd->cwsr_isa = cwsr_trap_gfx11_hex;
550 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex);
554 kfd->cwsr_isa = cwsr_trap_gfx12_hex;
555 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex);
558 kfd->cwsr_enabled = true;
565 struct kfd_dev *kfd = node->kfd;
573 && kfd->mec2_fw_version >= 0x81b3) ||
575 && kfd->mec2_fw_version >= 0x1b3) ||
577 && kfd->mec2_fw_version >= 0x30) ||
579 && kfd->mec2_fw_version >= 0x28) ||
585 && kfd->mec2_fw_version >= 0x6b) ||
653 static void kfd_cleanup_nodes(struct kfd_dev *kfd, unsigned int num_nodes)
663 flush_workqueue(kfd->ih_wq);
664 destroy_workqueue(kfd->ih_wq);
667 knode = kfd->nodes[i];
674 kfd->nodes[i] = NULL;
713 bool kgd2kfd_device_init(struct kfd_dev *kfd,
723 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
725 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
727 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
729 kfd->shared_resources = *gpu_resources;
731 kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr);
733 if (kfd->num_nodes == 0) {
736 kfd->adev->gfx.num_xcc_per_xcp);
744 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
745 if (!kfd->pci_atomic_requested &&
746 kfd->device_info.needs_pci_atomics &&
747 (!kfd->device_info.no_atomic_fw_version ||
748 kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
751 kfd->adev->pdev->vendor, kfd->adev->pdev->device,
752 kfd->mec_fw_version,
753 kfd->device_info.no_atomic_fw_version);
770 if (kfd->adev->xcp_mgr) {
771 partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr,
774 kfd->num_nodes != 1) {
788 kfd->device_info.mqd_size_aligned;
794 map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
808 kfd->adev, size, &kfd->gtt_mem,
809 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
818 if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
823 if (kfd_doorbell_init(kfd)) {
830 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
838 if (!kfd->hive_id && kfd->num_nodes > 1)
839 kfd->hive_id = pci_dev_id(kfd->adev->pdev);
841 kfd->noretry = kfd->adev->gmc.noretry;
843 kfd_cwsr_init(kfd);
846 kfd->num_nodes);
849 for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) {
855 node->adev = kfd->adev;
856 node->kfd = kfd;
857 node->kfd2kgd = kfd->kfd2kgd;
859 node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx);
867 (1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1;
877 kfd->num_nodes != 1) {
901 amdgpu_amdkfd_get_local_mem_info(kfd->adev,
904 if (kfd->adev->xcp_mgr)
915 kfd->nodes[i] = node;
918 svm_range_set_max_pages(kfd->adev);
920 kfd->init_complete = true;
921 dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
922 kfd->adev->pdev->device);
924 pr_debug("Starting kfd with the following scheduling policy %d\n",
931 kfd_cleanup_nodes(kfd, i);
932 kfd_doorbell_fini(kfd);
934 kfd_gtt_sa_fini(kfd);
936 amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
940 kfd->adev->pdev->vendor, kfd->adev->pdev->device);
942 return kfd->init_complete;
945 void kgd2kfd_device_exit(struct kfd_dev *kfd)
947 if (kfd->init_complete) {
949 kfd_cleanup_nodes(kfd, kfd->num_nodes);
951 kfd_doorbell_fini(kfd);
952 ida_destroy(&kfd->doorbell_ida);
953 kfd_gtt_sa_fini(kfd);
954 amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem);
957 kfree(kfd);
960 int kgd2kfd_pre_reset(struct kfd_dev *kfd,
966 if (!kfd->init_complete)
969 for (i = 0; i < kfd->num_nodes; i++) {
970 node = kfd->nodes[i];
974 kgd2kfd_suspend(kfd, true);
976 for (i = 0; i < kfd->num_nodes; i++)
977 kfd_signal_reset_event(kfd->nodes[i]);
988 int kgd2kfd_post_reset(struct kfd_dev *kfd)
994 if (!kfd->init_complete)
997 for (i = 0; i < kfd->num_nodes; i++) {
998 ret = kfd_resume(kfd->nodes[i]);
1007 for (i = 0; i < kfd->num_nodes; i++) {
1008 node = kfd->nodes[i];
1016 bool kfd_is_locked(struct kfd_dev *kfd)
1027 if (kfd)
1028 return kfd->kfd_dev_lock > 0;
1035 if (dev->kfd->kfd_dev_lock > 0)
1042 void kgd2kfd_suspend(struct kfd_dev *kfd, bool suspend_proc)
1047 if (!kfd->init_complete)
1051 kgd2kfd_suspend_process(kfd);
1053 for (i = 0; i < kfd->num_nodes; i++) {
1054 node = kfd->nodes[i];
1059 int kgd2kfd_resume(struct kfd_dev *kfd, bool resume_proc)
1063 if (!kfd->init_complete)
1066 for (i = 0; i < kfd->num_nodes; i++) {
1067 ret = kfd_resume(kfd->nodes[i]);
1073 ret = kgd2kfd_resume_process(kfd);
1078 void kgd2kfd_suspend_process(struct kfd_dev *kfd)
1080 if (!kfd->init_complete)
1090 int kgd2kfd_resume_process(struct kfd_dev *kfd)
1094 if (!kfd->init_complete)
1120 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1127 if (!kfd->init_complete)
1130 if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) {
1135 for (i = 0; i < kfd->num_nodes; i++) {
1136 node = kfd->nodes[i];
1144 queue_work(node->kfd->ih_wq, &node->interrupt_work);
1242 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1252 kfd->gtt_sa_chunk_size = chunk_size;
1253 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1255 kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks,
1257 if (!kfd->gtt_sa_bitmap)
1261 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1263 mutex_init(&kfd->gtt_sa_lock);
1268 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1270 mutex_destroy(&kfd->gtt_sa_lock);
1271 bitmap_free(kfd->gtt_sa_bitmap);
1292 struct kfd_dev *kfd = node->kfd;
1297 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1308 mutex_lock(&kfd->gtt_sa_lock);
1312 found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1313 kfd->gtt_sa_num_of_chunks,
1319 if (found == kfd->gtt_sa_num_of_chunks)
1326 kfd->gtt_start_gpu_addr,
1328 kfd->gtt_sa_chunk_size);
1330 kfd->gtt_start_cpu_ptr,
1332 kfd->gtt_sa_chunk_size);
1338 if (size <= kfd->gtt_sa_chunk_size) {
1340 __set_bit(found, kfd->gtt_sa_bitmap);
1345 cur_size = size - kfd->gtt_sa_chunk_size;
1348 find_next_zero_bit(kfd->gtt_sa_bitmap,
1349 kfd->gtt_sa_num_of_chunks, ++found);
1363 if (found == kfd->gtt_sa_num_of_chunks)
1367 if (cur_size <= kfd->gtt_sa_chunk_size)
1370 cur_size -= kfd->gtt_sa_chunk_size;
1378 bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start,
1382 mutex_unlock(&kfd->gtt_sa_lock);
1387 mutex_unlock(&kfd->gtt_sa_lock);
1394 struct kfd_dev *kfd = node->kfd;
1403 mutex_lock(&kfd->gtt_sa_lock);
1406 bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start,
1409 mutex_unlock(&kfd->gtt_sa_lock);
1415 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1422 if (kfd)
1423 atomic_inc(&kfd->nodes[0]->sram_ecc_flag);
1428 if (atomic_inc_return(&node->kfd->compute_profile) == 1)
1434 int count = atomic_dec_return(&node->kfd->compute_profile);
1443 if (atomic_read(&node->kfd->compute_profile))
1448 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1455 if (kfd && kfd->init_complete)
1456 kfd_smi_event_update_thermal_throttling(kfd->nodes[0],
1469 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes;
1471 return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2);
1477 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes -
1481 int kgd2kfd_check_and_lock_kfd(struct kfd_dev *kfd)
1488 if (hash_empty(kfd_processes_table) && !kfd_is_locked(kfd))
1491 /* fail under system reset/resume or kfd device is partition switching. */
1492 if (kfd_is_locked(kfd)) {
1506 if (p->pdds[i]->dev->kfd != kfd)
1518 ++kfd->kfd_dev_lock;
1524 void kgd2kfd_unlock_kfd(struct kfd_dev *kfd)
1527 --kfd->kfd_dev_lock;
1531 int kgd2kfd_start_sched(struct kfd_dev *kfd, uint32_t node_id)
1536 if (!kfd->init_complete)
1539 if (node_id >= kfd->num_nodes) {
1540 dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
1541 node_id, kfd->num_nodes - 1);
1544 node = kfd->nodes[node_id];
1553 int kgd2kfd_stop_sched(struct kfd_dev *kfd, uint32_t node_id)
1557 if (!kfd->init_complete)
1560 if (node_id >= kfd->num_nodes) {
1561 dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
1562 node_id, kfd->num_nodes - 1);
1566 node = kfd->nodes[node_id];
1570 bool kgd2kfd_compute_active(struct kfd_dev *kfd, uint32_t node_id)
1574 if (!kfd->init_complete)
1577 if (node_id >= kfd->num_nodes) {
1578 dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n",
1579 node_id, kfd->num_nodes - 1);
1583 node = kfd->nodes[node_id];
1667 if (dev->kfd->shared_resources.enable_mes) {