Lines Matching +full:sub +full:- +full:engines
1 // SPDX-License-Identifier: GPL-2.0 OR MIT
3 * Copyright 2014-2022 Advanced Micro Devices, Inc.
44 * create process (open) will return -EAGAIN.
69 uint32_t sdma_version = amdgpu_ip_version(kfd->adev, SDMA0_HWIP, 0); in kfd_device_info_set_sdma_info()
81 kfd->device_info.num_sdma_queues_per_engine = 2; in kfd_device_info_set_sdma_info()
107 kfd->device_info.num_sdma_queues_per_engine = 8; in kfd_device_info_set_sdma_info()
113 kfd->device_info.num_sdma_queues_per_engine = 8; in kfd_device_info_set_sdma_info()
116 bitmap_zero(kfd->device_info.reserved_sdma_queues_bitmap, KFD_MAX_SDMA_QUEUES); in kfd_device_info_set_sdma_info()
130 kfd->device_info.num_reserved_sdma_queues_per_engine = 2; in kfd_device_info_set_sdma_info()
131 /* BIT(0)=engine-0 queue-0; BIT(1)=engine-1 queue-0; BIT(2)=engine-0 queue-1; ... */ in kfd_device_info_set_sdma_info()
132 bitmap_set(kfd->device_info.reserved_sdma_queues_bitmap, 0, in kfd_device_info_set_sdma_info()
133 kfd->adev->sdma.num_instances * in kfd_device_info_set_sdma_info()
134 kfd->device_info.num_reserved_sdma_queues_per_engine); in kfd_device_info_set_sdma_info()
154 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; in kfd_device_info_set_event_interrupt_class()
159 kfd->device_info.event_interrupt_class = in kfd_device_info_set_event_interrupt_class()
175 kfd->device_info.event_interrupt_class = &event_interrupt_class_v10; in kfd_device_info_set_event_interrupt_class()
186 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; in kfd_device_info_set_event_interrupt_class()
191 kfd->device_info.event_interrupt_class = &event_interrupt_class_v11; in kfd_device_info_set_event_interrupt_class()
196 kfd->device_info.event_interrupt_class = &event_interrupt_class_v9; in kfd_device_info_set_event_interrupt_class()
204 uint32_t asic_type = kfd->adev->asic_type; in kfd_device_info_init()
206 kfd->device_info.max_pasid_bits = 16; in kfd_device_info_init()
207 kfd->device_info.max_no_of_hqd = 24; in kfd_device_info_init()
208 kfd->device_info.num_of_watch_points = 4; in kfd_device_info_init()
209 kfd->device_info.mqd_size_aligned = MQD_SIZE_ALIGNED; in kfd_device_info_init()
210 kfd->device_info.gfx_target_version = gfx_target_version; in kfd_device_info_init()
213 kfd->device_info.doorbell_size = 8; in kfd_device_info_init()
214 kfd->device_info.ih_ring_entry_size = 8 * sizeof(uint32_t); in kfd_device_info_init()
215 kfd->device_info.supports_cwsr = true; in kfd_device_info_init()
224 kfd->device_info.no_atomic_fw_version = 14; in kfd_device_info_init()
226 kfd->device_info.no_atomic_fw_version = 3; in kfd_device_info_init()
228 kfd->device_info.no_atomic_fw_version = 92; in kfd_device_info_init()
230 kfd->device_info.no_atomic_fw_version = 145; in kfd_device_info_init()
234 kfd->device_info.needs_pci_atomics = true; in kfd_device_info_init()
241 kfd->device_info.needs_pci_atomics = true; in kfd_device_info_init()
242 kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0; in kfd_device_info_init()
244 kfd->device_info.needs_pci_atomics = true; in kfd_device_info_init()
245 kfd->device_info.no_atomic_fw_version = 2090; in kfd_device_info_init()
247 kfd->device_info.needs_pci_atomics = true; in kfd_device_info_init()
250 kfd->device_info.doorbell_size = 4; in kfd_device_info_init()
251 kfd->device_info.ih_ring_entry_size = 4 * sizeof(uint32_t); in kfd_device_info_init()
252 kfd->device_info.event_interrupt_class = &event_interrupt_class_cik; in kfd_device_info_init()
253 kfd->device_info.num_sdma_queues_per_engine = 2; in kfd_device_info_init()
258 kfd->device_info.supports_cwsr = true; in kfd_device_info_init()
261 kfd->device_info.needs_pci_atomics = true; in kfd_device_info_init()
271 switch (adev->asic_type) { in kgd2kfd_probe()
481 amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); in kgd2kfd_probe()
489 kfd->adev = adev; in kgd2kfd_probe()
491 kfd->init_complete = false; in kgd2kfd_probe()
492 kfd->kfd2kgd = f2g; in kgd2kfd_probe()
493 atomic_set(&kfd->compute_profile, 0); in kgd2kfd_probe()
495 mutex_init(&kfd->doorbell_mutex); in kgd2kfd_probe()
497 ida_init(&kfd->doorbell_ida); in kgd2kfd_probe()
498 atomic_set(&kfd->kfd_processes_count, 0); in kgd2kfd_probe()
505 if (cwsr_enable && kfd->device_info.supports_cwsr) { in kfd_cwsr_init()
509 kfd->cwsr_isa = cwsr_trap_gfx8_hex; in kfd_cwsr_init()
510 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); in kfd_cwsr_init()
514 kfd->cwsr_isa = cwsr_trap_arcturus_hex; in kfd_cwsr_init()
515 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); in kfd_cwsr_init()
519 kfd->cwsr_isa = cwsr_trap_aldebaran_hex; in kfd_cwsr_init()
520 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); in kfd_cwsr_init()
525 kfd->cwsr_isa = cwsr_trap_gfx9_4_3_hex; in kfd_cwsr_init()
526 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_4_3_hex); in kfd_cwsr_init()
529 kfd->cwsr_isa = cwsr_trap_gfx9_5_0_hex; in kfd_cwsr_init()
530 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_5_0_hex); in kfd_cwsr_init()
534 kfd->cwsr_isa = cwsr_trap_gfx9_hex; in kfd_cwsr_init()
535 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); in kfd_cwsr_init()
539 kfd->cwsr_isa = cwsr_trap_nv1x_hex; in kfd_cwsr_init()
540 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); in kfd_cwsr_init()
544 kfd->cwsr_isa = cwsr_trap_gfx10_hex; in kfd_cwsr_init()
545 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); in kfd_cwsr_init()
550 kfd->cwsr_isa = cwsr_trap_gfx11_hex; in kfd_cwsr_init()
551 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx11_hex); in kfd_cwsr_init()
555 kfd->cwsr_isa = cwsr_trap_gfx12_hex; in kfd_cwsr_init()
556 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx12_hex); in kfd_cwsr_init()
559 kfd->cwsr_enabled = true; in kfd_cwsr_init()
566 struct kfd_dev *kfd = node->kfd; in kfd_gws_init()
567 uint32_t mes_rev = node->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK; in kfd_gws_init()
569 if (node->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) in kfd_gws_init()
574 && kfd->mec2_fw_version >= 0x81b3) || in kfd_gws_init()
576 && kfd->mec2_fw_version >= 0x1b3) || in kfd_gws_init()
578 && kfd->mec2_fw_version >= 0x30) || in kfd_gws_init()
580 && kfd->mec2_fw_version >= 0x28) || in kfd_gws_init()
586 && kfd->mec2_fw_version >= 0x6b) || in kfd_gws_init()
592 node->adev->gds.gws_size = 64; in kfd_gws_init()
593 ret = amdgpu_amdkfd_alloc_gws(node->adev, in kfd_gws_init()
594 node->adev->gds.gws_size, &node->gws); in kfd_gws_init()
602 INIT_LIST_HEAD(&dev->smi_clients); in kfd_smi_init()
603 spin_lock_init(&dev->smi_lock); in kfd_smi_init()
608 int err = -1; in kfd_init_node()
615 node->dqm = device_queue_manager_init(node); in kfd_init_node()
616 if (!node->dqm) { in kfd_init_node()
623 node->adev->gds.gws_size); in kfd_init_node()
642 device_queue_manager_uninit(node->dqm); in kfd_init_node()
646 if (node->gws) in kfd_init_node()
647 amdgpu_amdkfd_free_gws(node->adev, node->gws); in kfd_init_node()
661 * work-queue items that will access interrupt_ring. New work items in kfd_cleanup_nodes()
664 flush_workqueue(kfd->ih_wq); in kfd_cleanup_nodes()
665 destroy_workqueue(kfd->ih_wq); in kfd_cleanup_nodes()
668 knode = kfd->nodes[i]; in kfd_cleanup_nodes()
669 device_queue_manager_uninit(knode->dqm); in kfd_cleanup_nodes()
672 if (knode->gws) in kfd_cleanup_nodes()
673 amdgpu_amdkfd_free_gws(knode->adev, knode->gws); in kfd_cleanup_nodes()
675 kfd->nodes[i] = NULL; in kfd_cleanup_nodes()
682 struct amdgpu_device *adev = node->adev; in kfd_setup_interrupt_bitmap()
683 uint32_t xcc_mask = node->xcc_mask; in kfd_setup_interrupt_bitmap()
689 * 1. Bits 0-15 - correspond to the NodeId field. in kfd_setup_interrupt_bitmap()
694 * 2. Bits 16-31 - unused. in kfd_setup_interrupt_bitmap()
701 * - the Node Id matches the corresponding bit set in in kfd_setup_interrupt_bitmap()
702 * Bits 0-15. in kfd_setup_interrupt_bitmap()
703 * - AND VMID reported in the interrupt lies within the in kfd_setup_interrupt_bitmap()
708 node->interrupt_bitmap |= (mapped_xcc % 2 ? 5 : 3) << (4 * (mapped_xcc / 2)); in kfd_setup_interrupt_bitmap()
711 node->interrupt_bitmap); in kfd_setup_interrupt_bitmap()
724 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, in kgd2kfd_device_init()
726 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, in kgd2kfd_device_init()
728 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, in kgd2kfd_device_init()
730 kfd->shared_resources = *gpu_resources; in kgd2kfd_device_init()
732 kfd->num_nodes = amdgpu_xcp_get_num_xcp(kfd->adev->xcp_mgr); in kgd2kfd_device_init()
734 if (kfd->num_nodes == 0) { in kgd2kfd_device_init()
737 kfd->adev->gfx.num_xcc_per_xcp); in kgd2kfd_device_init()
742 * 32 and 64-bit requests are possible and must be in kgd2kfd_device_init()
745 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev); in kgd2kfd_device_init()
746 if (!kfd->pci_atomic_requested && in kgd2kfd_device_init()
747 kfd->device_info.needs_pci_atomics && in kgd2kfd_device_init()
748 (!kfd->device_info.no_atomic_fw_version || in kgd2kfd_device_init()
749 kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) { in kgd2kfd_device_init()
752 kfd->adev->pdev->vendor, kfd->adev->pdev->device, in kgd2kfd_device_init()
753 kfd->mec_fw_version, in kgd2kfd_device_init()
754 kfd->device_info.no_atomic_fw_version); in kgd2kfd_device_init()
758 first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; in kgd2kfd_device_init()
759 last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; in kgd2kfd_device_init()
760 vmid_num_kfd = last_vmid_kfd - first_vmid_kfd + 1; in kgd2kfd_device_init()
762 /* For multi-partition capable GPUs, we need special handling for VMIDs in kgd2kfd_device_init()
765 * Additionally, there are 13 VMIDs (3-15) available for KFD. To in kgd2kfd_device_init()
768 * If the VMID range changes for multi-partition capable GPUs, then in kgd2kfd_device_init()
771 if (kfd->adev->xcp_mgr) { in kgd2kfd_device_init()
772 partition_mode = amdgpu_xcp_query_partition_mode(kfd->adev->xcp_mgr, in kgd2kfd_device_init()
775 kfd->num_nodes != 1) { in kgd2kfd_device_init()
777 first_vmid_kfd = last_vmid_kfd + 1 - vmid_num_kfd*2; in kgd2kfd_device_init()
789 kfd->device_info.mqd_size_aligned; in kgd2kfd_device_init()
809 kfd->adev, size, &kfd->gtt_mem, in kgd2kfd_device_init()
810 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, in kgd2kfd_device_init()
820 dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); in kgd2kfd_device_init()
831 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; in kgd2kfd_device_init()
834 * For multi-partition capable GPUs, the KFD abstracts all partitions in kgd2kfd_device_init()
839 if (!kfd->hive_id && kfd->num_nodes > 1) in kgd2kfd_device_init()
840 kfd->hive_id = pci_dev_id(kfd->adev->pdev); in kgd2kfd_device_init()
842 kfd->noretry = kfd->adev->gmc.noretry; in kgd2kfd_device_init()
847 kfd->num_nodes); in kgd2kfd_device_init()
850 for (i = 0, xcp_idx = 0; i < kfd->num_nodes; i++) { in kgd2kfd_device_init()
855 node->node_id = i; in kgd2kfd_device_init()
856 node->adev = kfd->adev; in kgd2kfd_device_init()
857 node->kfd = kfd; in kgd2kfd_device_init()
858 node->kfd2kgd = kfd->kfd2kgd; in kgd2kfd_device_init()
859 node->vm_info.vmid_num_kfd = vmid_num_kfd; in kgd2kfd_device_init()
860 node->xcp = amdgpu_get_next_xcp(kfd->adev->xcp_mgr, &xcp_idx); in kgd2kfd_device_init()
862 if (node->xcp) { in kgd2kfd_device_init()
863 amdgpu_xcp_get_inst_details(node->xcp, AMDGPU_XCP_GFX, in kgd2kfd_device_init()
864 &node->xcc_mask); in kgd2kfd_device_init()
867 node->xcc_mask = in kgd2kfd_device_init()
868 (1U << NUM_XCC(kfd->adev->gfx.xcc_mask)) - 1; in kgd2kfd_device_init()
871 if (node->xcp) { in kgd2kfd_device_init()
873 node->node_id, node->xcp->mem_id, in kgd2kfd_device_init()
874 KFD_XCP_MEMORY_SIZE(node->adev, node->node_id) >> 20); in kgd2kfd_device_init()
878 kfd->num_nodes != 1) { in kgd2kfd_device_init()
879 /* For multi-partition capable GPUs and CPX mode, first in kgd2kfd_device_init()
880 * XCD gets VMID range 4-9 and second XCD gets VMID in kgd2kfd_device_init()
881 * range 10-15. in kgd2kfd_device_init()
884 node->vm_info.first_vmid_kfd = (i%2 == 0) ? in kgd2kfd_device_init()
887 node->vm_info.last_vmid_kfd = (i%2 == 0) ? in kgd2kfd_device_init()
888 last_vmid_kfd-vmid_num_kfd : in kgd2kfd_device_init()
890 node->compute_vmid_bitmap = in kgd2kfd_device_init()
891 ((0x1 << (node->vm_info.last_vmid_kfd + 1)) - 1) - in kgd2kfd_device_init()
892 ((0x1 << (node->vm_info.first_vmid_kfd)) - 1); in kgd2kfd_device_init()
894 node->vm_info.first_vmid_kfd = first_vmid_kfd; in kgd2kfd_device_init()
895 node->vm_info.last_vmid_kfd = last_vmid_kfd; in kgd2kfd_device_init()
896 node->compute_vmid_bitmap = in kgd2kfd_device_init()
897 gpu_resources->compute_vmid_bitmap; in kgd2kfd_device_init()
899 node->max_proc_per_quantum = max_proc_per_quantum; in kgd2kfd_device_init()
900 atomic_set(&node->sram_ecc_flag, 0); in kgd2kfd_device_init()
902 amdgpu_amdkfd_get_local_mem_info(kfd->adev, in kgd2kfd_device_init()
903 &node->local_mem_info, node->xcp); in kgd2kfd_device_init()
905 if (kfd->adev->xcp_mgr) in kgd2kfd_device_init()
914 spin_lock_init(&node->watch_points_lock); in kgd2kfd_device_init()
916 kfd->nodes[i] = node; in kgd2kfd_device_init()
919 svm_range_set_max_pages(kfd->adev); in kgd2kfd_device_init()
921 kfd->init_complete = true; in kgd2kfd_device_init()
922 dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor, in kgd2kfd_device_init()
923 kfd->adev->pdev->device); in kgd2kfd_device_init()
926 node->dqm->sched_policy); in kgd2kfd_device_init()
937 amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); in kgd2kfd_device_init()
941 kfd->adev->pdev->vendor, kfd->adev->pdev->device); in kgd2kfd_device_init()
943 return kfd->init_complete; in kgd2kfd_device_init()
948 if (kfd->init_complete) { in kgd2kfd_device_exit()
950 kfd_cleanup_nodes(kfd, kfd->num_nodes); in kgd2kfd_device_exit()
953 ida_destroy(&kfd->doorbell_ida); in kgd2kfd_device_exit()
955 amdgpu_amdkfd_free_gtt_mem(kfd->adev, &kfd->gtt_mem); in kgd2kfd_device_exit()
967 if (!kfd->init_complete) in kgd2kfd_pre_reset()
970 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_pre_reset()
971 node = kfd->nodes[i]; in kgd2kfd_pre_reset()
977 for (i = 0; i < kfd->num_nodes; i++) in kgd2kfd_pre_reset()
978 kfd_signal_reset_event(kfd->nodes[i]); in kgd2kfd_pre_reset()
995 if (!kfd->init_complete) in kgd2kfd_post_reset()
998 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_post_reset()
999 ret = kfd_resume(kfd->nodes[i]); in kgd2kfd_post_reset()
1005 --kfd_locked; in kgd2kfd_post_reset()
1008 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_post_reset()
1009 node = kfd->nodes[i]; in kgd2kfd_post_reset()
1010 atomic_set(&node->sram_ecc_flag, 0); in kgd2kfd_post_reset()
1029 return kfd->kfd_dev_lock > 0; in kfd_is_locked()
1036 if (dev->kfd->kfd_dev_lock > 0) in kfd_is_locked()
1048 if (!kfd->init_complete) in kgd2kfd_suspend()
1054 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_suspend()
1055 node = kfd->nodes[i]; in kgd2kfd_suspend()
1056 node->dqm->ops.stop(node->dqm); in kgd2kfd_suspend()
1064 if (!kfd->init_complete) in kgd2kfd_resume()
1067 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_resume()
1068 ret = kfd_resume(kfd->nodes[i]); in kgd2kfd_resume()
1081 if (!kfd->init_complete) in kgd2kfd_suspend_process()
1095 if (!kfd->init_complete) in kgd2kfd_resume_process()
1099 if (--kfd_locked == 0) in kgd2kfd_resume_process()
1111 err = node->dqm->ops.start(node->dqm); in kfd_resume()
1115 node->adev->pdev->vendor, node->adev->pdev->device); in kfd_resume()
1128 if (!kfd->init_complete) in kgd2kfd_interrupt()
1131 if (kfd->device_info.ih_ring_entry_size > sizeof(patched_ihre)) { in kgd2kfd_interrupt()
1136 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_interrupt()
1139 * when kfd->nodes[i] = NULL in kgd2kfd_interrupt()
1141 if (kfd->nodes[i]) in kgd2kfd_interrupt()
1142 node = kfd->nodes[i]; in kgd2kfd_interrupt()
1146 spin_lock_irqsave(&node->interrupt_lock, flags); in kgd2kfd_interrupt()
1148 if (node->interrupts_active in kgd2kfd_interrupt()
1153 queue_work(node->kfd->ih_wq, &node->interrupt_work); in kgd2kfd_interrupt()
1154 spin_unlock_irqrestore(&node->interrupt_lock, flags); in kgd2kfd_interrupt()
1157 spin_unlock_irqrestore(&node->interrupt_lock, flags); in kgd2kfd_interrupt()
1173 return -ESRCH; in kgd2kfd_quiesce_mm()
1175 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); in kgd2kfd_quiesce_mm()
1193 return -ESRCH; in kgd2kfd_resume_mm()
1201 /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
1217 return -EINVAL; in kgd2kfd_schedule_evict_and_restore_process()
1224 return -ENODEV; in kgd2kfd_schedule_evict_and_restore_process()
1226 if (fence->seqno == p->last_eviction_seqno) in kgd2kfd_schedule_evict_and_restore_process()
1229 p->last_eviction_seqno = fence->seqno; in kgd2kfd_schedule_evict_and_restore_process()
1234 active_time = get_jiffies_64() - p->last_restore_timestamp; in kgd2kfd_schedule_evict_and_restore_process()
1236 delay_jiffies -= active_time; in kgd2kfd_schedule_evict_and_restore_process()
1244 p->lead_thread->pid, delay_jiffies); in kgd2kfd_schedule_evict_and_restore_process()
1245 schedule_delayed_work(&p->eviction_work, delay_jiffies); in kgd2kfd_schedule_evict_and_restore_process()
1255 return -EINVAL; in kfd_gtt_sa_init()
1257 return -EINVAL; in kfd_gtt_sa_init()
1259 return -EINVAL; in kfd_gtt_sa_init()
1261 kfd->gtt_sa_chunk_size = chunk_size; in kfd_gtt_sa_init()
1262 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; in kfd_gtt_sa_init()
1264 kfd->gtt_sa_bitmap = bitmap_zalloc(kfd->gtt_sa_num_of_chunks, in kfd_gtt_sa_init()
1266 if (!kfd->gtt_sa_bitmap) in kfd_gtt_sa_init()
1267 return -ENOMEM; in kfd_gtt_sa_init()
1270 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); in kfd_gtt_sa_init()
1272 mutex_init(&kfd->gtt_sa_lock); in kfd_gtt_sa_init()
1279 mutex_destroy(&kfd->gtt_sa_lock); in kfd_gtt_sa_fini()
1280 bitmap_free(kfd->gtt_sa_bitmap); in kfd_gtt_sa_fini()
1301 struct kfd_dev *kfd = node->kfd; in kfd_gtt_sa_allocate()
1304 return -EINVAL; in kfd_gtt_sa_allocate()
1306 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) in kfd_gtt_sa_allocate()
1307 return -ENOMEM; in kfd_gtt_sa_allocate()
1311 return -ENOMEM; in kfd_gtt_sa_allocate()
1317 mutex_lock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1321 found = find_next_zero_bit(kfd->gtt_sa_bitmap, in kfd_gtt_sa_allocate()
1322 kfd->gtt_sa_num_of_chunks, in kfd_gtt_sa_allocate()
1328 if (found == kfd->gtt_sa_num_of_chunks) in kfd_gtt_sa_allocate()
1332 (*mem_obj)->range_start = found; in kfd_gtt_sa_allocate()
1333 (*mem_obj)->range_end = found; in kfd_gtt_sa_allocate()
1334 (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( in kfd_gtt_sa_allocate()
1335 kfd->gtt_start_gpu_addr, in kfd_gtt_sa_allocate()
1337 kfd->gtt_sa_chunk_size); in kfd_gtt_sa_allocate()
1338 (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( in kfd_gtt_sa_allocate()
1339 kfd->gtt_start_cpu_ptr, in kfd_gtt_sa_allocate()
1341 kfd->gtt_sa_chunk_size); in kfd_gtt_sa_allocate()
1344 (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); in kfd_gtt_sa_allocate()
1347 if (size <= kfd->gtt_sa_chunk_size) { in kfd_gtt_sa_allocate()
1349 __set_bit(found, kfd->gtt_sa_bitmap); in kfd_gtt_sa_allocate()
1354 cur_size = size - kfd->gtt_sa_chunk_size; in kfd_gtt_sa_allocate()
1356 (*mem_obj)->range_end = in kfd_gtt_sa_allocate()
1357 find_next_zero_bit(kfd->gtt_sa_bitmap, in kfd_gtt_sa_allocate()
1358 kfd->gtt_sa_num_of_chunks, ++found); in kfd_gtt_sa_allocate()
1364 if ((*mem_obj)->range_end != found) { in kfd_gtt_sa_allocate()
1372 if (found == kfd->gtt_sa_num_of_chunks) in kfd_gtt_sa_allocate()
1376 if (cur_size <= kfd->gtt_sa_chunk_size) in kfd_gtt_sa_allocate()
1379 cur_size -= kfd->gtt_sa_chunk_size; in kfd_gtt_sa_allocate()
1384 (*mem_obj)->range_start, (*mem_obj)->range_end); in kfd_gtt_sa_allocate()
1387 bitmap_set(kfd->gtt_sa_bitmap, (*mem_obj)->range_start, in kfd_gtt_sa_allocate()
1388 (*mem_obj)->range_end - (*mem_obj)->range_start + 1); in kfd_gtt_sa_allocate()
1391 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1396 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_allocate()
1398 return -ENOMEM; in kfd_gtt_sa_allocate()
1403 struct kfd_dev *kfd = node->kfd; in kfd_gtt_sa_free()
1410 mem_obj, mem_obj->range_start, mem_obj->range_end); in kfd_gtt_sa_free()
1412 mutex_lock(&kfd->gtt_sa_lock); in kfd_gtt_sa_free()
1415 bitmap_clear(kfd->gtt_sa_bitmap, mem_obj->range_start, in kfd_gtt_sa_free()
1416 mem_obj->range_end - mem_obj->range_start + 1); in kfd_gtt_sa_free()
1418 mutex_unlock(&kfd->gtt_sa_lock); in kfd_gtt_sa_free()
1432 atomic_inc(&kfd->nodes[0]->sram_ecc_flag); in kgd2kfd_set_sram_ecc_flag()
1437 if (atomic_inc_return(&node->kfd->compute_profile) == 1) in kfd_inc_compute_active()
1438 amdgpu_amdkfd_set_compute_idle(node->adev, false); in kfd_inc_compute_active()
1443 int count = atomic_dec_return(&node->kfd->compute_profile); in kfd_dec_compute_active()
1446 amdgpu_amdkfd_set_compute_idle(node->adev, true); in kfd_dec_compute_active()
1452 if (atomic_read(&node->kfd->compute_profile)) in kfd_compute_active()
1464 if (kfd && kfd->init_complete) in kgd2kfd_smi_event_throttle()
1465 kfd_smi_event_update_thermal_throttling(kfd->nodes[0], in kgd2kfd_smi_event_throttle()
1471 * When the device has more than two engines, we reserve two for PCIe to enable
1472 * full-duplex and the rest are used as XGMI.
1476 /* If XGMI is not supported, all SDMA engines are PCIe */ in kfd_get_num_sdma_engines()
1477 if (!node->adev->gmc.xgmi.supported) in kfd_get_num_sdma_engines()
1478 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes; in kfd_get_num_sdma_engines()
1480 return min(node->adev->sdma.num_instances/(int)node->kfd->num_nodes, 2); in kfd_get_num_sdma_engines()
1485 /* After reserved for PCIe, the rest of engines are XGMI */ in kfd_get_num_xgmi_sdma_engines()
1486 return node->adev->sdma.num_instances/(int)node->kfd->num_nodes - in kfd_get_num_xgmi_sdma_engines()
1497 /* kfd_processes_count is per kfd_dev, return -EBUSY without in kgd2kfd_check_and_lock_kfd()
1500 if (!!atomic_read(&kfd->kfd_processes_count)) { in kgd2kfd_check_and_lock_kfd()
1502 r = -EBUSY; in kgd2kfd_check_and_lock_kfd()
1511 r = -EBUSY; in kgd2kfd_check_and_lock_kfd()
1523 for (i = 0; i < p->n_pdds; i++) { in kgd2kfd_check_and_lock_kfd()
1524 if (p->pdds[i]->dev->kfd != kfd) in kgd2kfd_check_and_lock_kfd()
1527 r = -EBUSY; in kgd2kfd_check_and_lock_kfd()
1536 ++kfd->kfd_dev_lock; in kgd2kfd_check_and_lock_kfd()
1545 --kfd->kfd_dev_lock; in kgd2kfd_unlock_kfd()
1554 if (!kfd->init_complete) in kgd2kfd_start_sched()
1557 if (node_id >= kfd->num_nodes) { in kgd2kfd_start_sched()
1558 dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", in kgd2kfd_start_sched()
1559 node_id, kfd->num_nodes - 1); in kgd2kfd_start_sched()
1560 return -EINVAL; in kgd2kfd_start_sched()
1562 node = kfd->nodes[node_id]; in kgd2kfd_start_sched()
1564 ret = node->dqm->ops.unhalt(node->dqm); in kgd2kfd_start_sched()
1576 if (!kfd->init_complete) in kgd2kfd_start_sched_all_nodes()
1579 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_start_sched_all_nodes()
1580 node = kfd->nodes[i]; in kgd2kfd_start_sched_all_nodes()
1581 r = node->dqm->ops.unhalt(node->dqm); in kgd2kfd_start_sched_all_nodes()
1594 if (!kfd->init_complete) in kgd2kfd_stop_sched()
1597 if (node_id >= kfd->num_nodes) { in kgd2kfd_stop_sched()
1598 dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", in kgd2kfd_stop_sched()
1599 node_id, kfd->num_nodes - 1); in kgd2kfd_stop_sched()
1600 return -EINVAL; in kgd2kfd_stop_sched()
1603 node = kfd->nodes[node_id]; in kgd2kfd_stop_sched()
1604 return node->dqm->ops.halt(node->dqm); in kgd2kfd_stop_sched()
1612 if (!kfd->init_complete) in kgd2kfd_stop_sched_all_nodes()
1615 for (i = 0; i < kfd->num_nodes; i++) { in kgd2kfd_stop_sched_all_nodes()
1616 node = kfd->nodes[i]; in kgd2kfd_stop_sched_all_nodes()
1617 r = node->dqm->ops.halt(node->dqm); in kgd2kfd_stop_sched_all_nodes()
1628 if (!kfd->init_complete) in kgd2kfd_compute_active()
1631 if (node_id >= kfd->num_nodes) { in kgd2kfd_compute_active()
1632 dev_warn(kfd->adev->dev, "Invalid node ID: %u exceeds %u\n", in kgd2kfd_compute_active()
1633 node_id, kfd->num_nodes - 1); in kgd2kfd_compute_active()
1637 node = kfd->nodes[node_id]; in kgd2kfd_compute_active()
1643 * kgd2kfd_vmfault_fast_path() - KFD vm page fault interrupt handling fast path for gmc v9
1648 * retry fault -
1663 * no-retry fault -
1670 * fast path - After kfd_signal_vm_fault_event, gmc_v9_0_process_interrupt drop the page fault
1672 * With gdb debugger enabled, need convert the retry fault to no-retry fault for
1676 * true - use the fast path to handle this fault
1677 * false - use normal path to handle it
1685 if (entry->ih == &adev->irq.ih_soft || entry->ih == &adev->irq.ih1) { in kgd2kfd_vmfault_fast_path()
1686 p = kfd_lookup_process_by_pasid(entry->pasid, NULL); in kgd2kfd_vmfault_fast_path()
1690 if (p->gpu_page_fault && !p->debug_trap_enabled) { in kgd2kfd_vmfault_fast_path()
1691 if (retry_fault && adev->irq.retry_cam_enabled) { in kgd2kfd_vmfault_fast_path()
1692 cam_index = entry->src_data[2] & 0x3ff; in kgd2kfd_vmfault_fast_path()
1693 WDOORBELL32(adev->irq.retry_cam_doorbell_index, cam_index); in kgd2kfd_vmfault_fast_path()
1703 p->gpu_page_fault = true; in kgd2kfd_vmfault_fast_path()
1716 if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { in kfd_debugfs_hang_hws()
1718 return -EINVAL; in kfd_debugfs_hang_hws()
1721 if (dev->kfd->shared_resources.enable_mes) { in kfd_debugfs_hang_hws()
1722 dev_err(dev->adev->dev, "Inducing MES hang is not supported\n"); in kfd_debugfs_hang_hws()
1723 return -EINVAL; in kfd_debugfs_hang_hws()
1726 return dqm_debugfs_hang_hws(dev->dqm); in kfd_debugfs_hang_hws()