xref: /linux/drivers/gpu/drm/amd/amdkfd/cik_event_interrupt.c (revision 17e548405a81665fd14cee960db7d093d1396400)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include "kfd_priv.h"
24 #include "kfd_events.h"
25 #include "cik_int.h"
26 #include "amdgpu_amdkfd.h"
27 #include "kfd_smi_events.h"
28 
29 static bool cik_event_interrupt_isr(struct kfd_node *dev,
30 					const uint32_t *ih_ring_entry,
31 					uint32_t *patched_ihre,
32 					bool *patched_flag)
33 {
34 	const struct cik_ih_ring_entry *ihre =
35 			(const struct cik_ih_ring_entry *)ih_ring_entry;
36 	const struct kfd2kgd_calls *f2g = dev->kfd2kgd;
37 	unsigned int vmid;
38 	uint16_t pasid;
39 	bool ret;
40 
41 	/* This workaround is due to HW/FW limitation on Hawaii that
42 	 * VMID and PASID are not written into ih_ring_entry
43 	 */
44 	if ((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
45 		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
46 		dev->adev->asic_type == CHIP_HAWAII) {
47 		struct cik_ih_ring_entry *tmp_ihre =
48 			(struct cik_ih_ring_entry *)patched_ihre;
49 
50 		*patched_flag = true;
51 		*tmp_ihre = *ihre;
52 
53 		vmid = f2g->read_vmid_from_vmfault_reg(dev->adev);
54 		ret = f2g->get_atc_vmid_pasid_mapping_info(dev->adev, vmid, &pasid);
55 
56 		tmp_ihre->ring_id &= 0x000000ff;
57 		tmp_ihre->ring_id |= vmid << 8;
58 		tmp_ihre->ring_id |= pasid << 16;
59 
60 		return ret && (pasid != 0) &&
61 			vmid >= dev->vm_info.first_vmid_kfd &&
62 			vmid <= dev->vm_info.last_vmid_kfd;
63 	}
64 
65 	/* Only handle interrupts from KFD VMIDs */
66 	vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
67 	if (vmid < dev->vm_info.first_vmid_kfd ||
68 	    vmid > dev->vm_info.last_vmid_kfd)
69 		return false;
70 
71 	/* If there is no valid PASID, it's likely a firmware bug */
72 	pasid = (ihre->ring_id & 0xffff0000) >> 16;
73 	if (WARN_ONCE(pasid == 0, "FW bug: No PASID in KFD interrupt"))
74 		return false;
75 
76 	/* Interrupt types we care about: various signals and faults.
77 	 * They will be forwarded to a work queue (see below).
78 	 */
79 	return ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE ||
80 		ihre->source_id == CIK_INTSRC_SDMA_TRAP ||
81 		ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG ||
82 		ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE ||
83 		((ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
84 		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) &&
85 		!amdgpu_no_queue_eviction_on_vm_fault);
86 }
87 
88 static void cik_event_interrupt_wq(struct kfd_node *dev,
89 					const uint32_t *ih_ring_entry)
90 {
91 	const struct cik_ih_ring_entry *ihre =
92 			(const struct cik_ih_ring_entry *)ih_ring_entry;
93 	uint32_t context_id = ihre->data & 0xfffffff;
94 	unsigned int vmid  = (ihre->ring_id & 0x0000ff00) >> 8;
95 	u32 pasid = (ihre->ring_id & 0xffff0000) >> 16;
96 
97 	if (pasid == 0)
98 		return;
99 
100 	if (ihre->source_id == CIK_INTSRC_CP_END_OF_PIPE)
101 		kfd_signal_event_interrupt(pasid, context_id, 28);
102 	else if (ihre->source_id == CIK_INTSRC_SDMA_TRAP)
103 		kfd_signal_event_interrupt(pasid, context_id, 28);
104 	else if (ihre->source_id == CIK_INTSRC_SQ_INTERRUPT_MSG)
105 		kfd_signal_event_interrupt(pasid, context_id & 0xff, 8);
106 	else if (ihre->source_id == CIK_INTSRC_CP_BAD_OPCODE)
107 		kfd_signal_hw_exception_event(pasid);
108 	else if (ihre->source_id == CIK_INTSRC_GFX_PAGE_INV_FAULT ||
109 		ihre->source_id == CIK_INTSRC_GFX_MEM_PROT_FAULT) {
110 		struct kfd_process_device *pdd = NULL;
111 		struct kfd_vm_fault_info info;
112 		struct kfd_process *p;
113 
114 		kfd_smi_event_update_vmfault(dev, pasid);
115 		p = kfd_lookup_process_by_pasid(pasid, &pdd);
116 		if (!pdd)
117 			return;
118 
119 		kfd_evict_process_device(pdd);
120 
121 		memset(&info, 0, sizeof(info));
122 		amdgpu_amdkfd_gpuvm_get_vm_fault_info(dev->adev, &info);
123 		if (!info.page_addr && !info.status) {
124 			kfd_unref_process(p);
125 			return;
126 		}
127 
128 		if (info.vmid == vmid)
129 			kfd_signal_vm_fault_event(pdd, &info, NULL);
130 		else
131 			kfd_signal_vm_fault_event(pdd, &info, NULL);
132 
133 		kfd_unref_process(p);
134 	}
135 }
136 
137 const struct kfd_event_interrupt_class event_interrupt_class_cik = {
138 	.interrupt_isr = cik_event_interrupt_isr,
139 	.interrupt_wq = cik_event_interrupt_wq,
140 };
141