xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_device.c (revision 5e66e818e0358fe42704404580b70e1ffc7afb6a)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/bsearch.h>
24 #include <linux/pci.h>
25 #include <linux/slab.h>
26 #include "kfd_priv.h"
27 #include "kfd_device_queue_manager.h"
28 #include "kfd_pm4_headers_vi.h"
29 #include "kfd_pm4_headers_aldebaran.h"
30 #include "cwsr_trap_handler.h"
31 #include "kfd_iommu.h"
32 #include "amdgpu_amdkfd.h"
33 #include "kfd_smi_events.h"
34 #include "kfd_migrate.h"
35 #include "amdgpu.h"
36 
37 #define MQD_SIZE_ALIGNED 768
38 
39 /*
40  * kfd_locked is used to lock the kfd driver during suspend or reset
41  * once locked, kfd driver will stop any further GPU execution.
42  * create process (open) will return -EAGAIN.
43  */
44 static atomic_t kfd_locked = ATOMIC_INIT(0);
45 
46 #ifdef CONFIG_DRM_AMDGPU_CIK
47 extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
48 #endif
49 extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
50 extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
51 extern const struct kfd2kgd_calls arcturus_kfd2kgd;
52 extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
53 extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
54 extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
55 
56 #ifdef KFD_SUPPORT_IOMMU_V2
57 static const struct kfd_device_info kaveri_device_info = {
58 	.asic_name = "kaveri",
59 	.gfx_target_version = 70000,
60 	.max_pasid_bits = 16,
61 	/* max num of queues for KV.TODO should be a dynamic value */
62 	.max_no_of_hqd	= 24,
63 	.doorbell_size  = 4,
64 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
65 	.event_interrupt_class = &event_interrupt_class_cik,
66 	.num_of_watch_points = 4,
67 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
68 	.supports_cwsr = false,
69 	.needs_iommu_device = true,
70 	.needs_pci_atomics = false,
71 	.num_sdma_queues_per_engine = 2,
72 };
73 
74 static const struct kfd_device_info carrizo_device_info = {
75 	.asic_name = "carrizo",
76 	.gfx_target_version = 80001,
77 	.max_pasid_bits = 16,
78 	/* max num of queues for CZ.TODO should be a dynamic value */
79 	.max_no_of_hqd	= 24,
80 	.doorbell_size  = 4,
81 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
82 	.event_interrupt_class = &event_interrupt_class_cik,
83 	.num_of_watch_points = 4,
84 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
85 	.supports_cwsr = true,
86 	.needs_iommu_device = true,
87 	.needs_pci_atomics = false,
88 	.num_sdma_queues_per_engine = 2,
89 };
90 
91 static const struct kfd_device_info raven_device_info = {
92 	.asic_name = "raven",
93 	.gfx_target_version = 90002,
94 	.max_pasid_bits = 16,
95 	.max_no_of_hqd  = 24,
96 	.doorbell_size  = 8,
97 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
98 	.event_interrupt_class = &event_interrupt_class_v9,
99 	.num_of_watch_points = 4,
100 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
101 	.supports_cwsr = true,
102 	.needs_iommu_device = true,
103 	.needs_pci_atomics = true,
104 	.num_sdma_queues_per_engine = 2,
105 };
106 #endif
107 
108 #ifdef CONFIG_DRM_AMDGPU_CIK
109 static const struct kfd_device_info hawaii_device_info = {
110 	.asic_name = "hawaii",
111 	.gfx_target_version = 70001,
112 	.max_pasid_bits = 16,
113 	/* max num of queues for KV.TODO should be a dynamic value */
114 	.max_no_of_hqd	= 24,
115 	.doorbell_size  = 4,
116 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
117 	.event_interrupt_class = &event_interrupt_class_cik,
118 	.num_of_watch_points = 4,
119 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
120 	.supports_cwsr = false,
121 	.needs_iommu_device = false,
122 	.needs_pci_atomics = false,
123 	.num_sdma_queues_per_engine = 2,
124 };
125 #endif
126 
127 static const struct kfd_device_info tonga_device_info = {
128 	.asic_name = "tonga",
129 	.gfx_target_version = 80002,
130 	.max_pasid_bits = 16,
131 	.max_no_of_hqd  = 24,
132 	.doorbell_size  = 4,
133 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
134 	.event_interrupt_class = &event_interrupt_class_cik,
135 	.num_of_watch_points = 4,
136 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
137 	.supports_cwsr = false,
138 	.needs_iommu_device = false,
139 	.needs_pci_atomics = true,
140 	.num_sdma_queues_per_engine = 2,
141 };
142 
143 static const struct kfd_device_info fiji_device_info = {
144 	.asic_name = "fiji",
145 	.gfx_target_version = 80003,
146 	.max_pasid_bits = 16,
147 	.max_no_of_hqd  = 24,
148 	.doorbell_size  = 4,
149 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
150 	.event_interrupt_class = &event_interrupt_class_cik,
151 	.num_of_watch_points = 4,
152 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
153 	.supports_cwsr = true,
154 	.needs_iommu_device = false,
155 	.needs_pci_atomics = true,
156 	.num_sdma_queues_per_engine = 2,
157 };
158 
159 static const struct kfd_device_info fiji_vf_device_info = {
160 	.asic_name = "fiji",
161 	.gfx_target_version = 80003,
162 	.max_pasid_bits = 16,
163 	.max_no_of_hqd  = 24,
164 	.doorbell_size  = 4,
165 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
166 	.event_interrupt_class = &event_interrupt_class_cik,
167 	.num_of_watch_points = 4,
168 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
169 	.supports_cwsr = true,
170 	.needs_iommu_device = false,
171 	.needs_pci_atomics = false,
172 	.num_sdma_queues_per_engine = 2,
173 };
174 
175 
176 static const struct kfd_device_info polaris10_device_info = {
177 	.asic_name = "polaris10",
178 	.gfx_target_version = 80003,
179 	.max_pasid_bits = 16,
180 	.max_no_of_hqd  = 24,
181 	.doorbell_size  = 4,
182 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
183 	.event_interrupt_class = &event_interrupt_class_cik,
184 	.num_of_watch_points = 4,
185 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
186 	.supports_cwsr = true,
187 	.needs_iommu_device = false,
188 	.needs_pci_atomics = true,
189 	.num_sdma_queues_per_engine = 2,
190 };
191 
192 static const struct kfd_device_info polaris10_vf_device_info = {
193 	.asic_name = "polaris10",
194 	.gfx_target_version = 80003,
195 	.max_pasid_bits = 16,
196 	.max_no_of_hqd  = 24,
197 	.doorbell_size  = 4,
198 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
199 	.event_interrupt_class = &event_interrupt_class_cik,
200 	.num_of_watch_points = 4,
201 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
202 	.supports_cwsr = true,
203 	.needs_iommu_device = false,
204 	.needs_pci_atomics = false,
205 	.num_sdma_queues_per_engine = 2,
206 };
207 
208 static const struct kfd_device_info polaris11_device_info = {
209 	.asic_name = "polaris11",
210 	.gfx_target_version = 80003,
211 	.max_pasid_bits = 16,
212 	.max_no_of_hqd  = 24,
213 	.doorbell_size  = 4,
214 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
215 	.event_interrupt_class = &event_interrupt_class_cik,
216 	.num_of_watch_points = 4,
217 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
218 	.supports_cwsr = true,
219 	.needs_iommu_device = false,
220 	.needs_pci_atomics = true,
221 	.num_sdma_queues_per_engine = 2,
222 };
223 
224 static const struct kfd_device_info polaris12_device_info = {
225 	.asic_name = "polaris12",
226 	.gfx_target_version = 80003,
227 	.max_pasid_bits = 16,
228 	.max_no_of_hqd  = 24,
229 	.doorbell_size  = 4,
230 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
231 	.event_interrupt_class = &event_interrupt_class_cik,
232 	.num_of_watch_points = 4,
233 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
234 	.supports_cwsr = true,
235 	.needs_iommu_device = false,
236 	.needs_pci_atomics = true,
237 	.num_sdma_queues_per_engine = 2,
238 };
239 
240 static const struct kfd_device_info vegam_device_info = {
241 	.asic_name = "vegam",
242 	.gfx_target_version = 80003,
243 	.max_pasid_bits = 16,
244 	.max_no_of_hqd  = 24,
245 	.doorbell_size  = 4,
246 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
247 	.event_interrupt_class = &event_interrupt_class_cik,
248 	.num_of_watch_points = 4,
249 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
250 	.supports_cwsr = true,
251 	.needs_iommu_device = false,
252 	.needs_pci_atomics = true,
253 	.num_sdma_queues_per_engine = 2,
254 };
255 
256 static const struct kfd_device_info vega10_device_info = {
257 	.asic_name = "vega10",
258 	.gfx_target_version = 90000,
259 	.max_pasid_bits = 16,
260 	.max_no_of_hqd  = 24,
261 	.doorbell_size  = 8,
262 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
263 	.event_interrupt_class = &event_interrupt_class_v9,
264 	.num_of_watch_points = 4,
265 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
266 	.supports_cwsr = true,
267 	.needs_iommu_device = false,
268 	.needs_pci_atomics = false,
269 	.num_sdma_queues_per_engine = 2,
270 };
271 
272 static const struct kfd_device_info vega10_vf_device_info = {
273 	.asic_name = "vega10",
274 	.gfx_target_version = 90000,
275 	.max_pasid_bits = 16,
276 	.max_no_of_hqd  = 24,
277 	.doorbell_size  = 8,
278 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
279 	.event_interrupt_class = &event_interrupt_class_v9,
280 	.num_of_watch_points = 4,
281 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
282 	.supports_cwsr = true,
283 	.needs_iommu_device = false,
284 	.needs_pci_atomics = false,
285 	.num_sdma_queues_per_engine = 2,
286 };
287 
288 static const struct kfd_device_info vega12_device_info = {
289 	.asic_name = "vega12",
290 	.gfx_target_version = 90004,
291 	.max_pasid_bits = 16,
292 	.max_no_of_hqd  = 24,
293 	.doorbell_size  = 8,
294 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
295 	.event_interrupt_class = &event_interrupt_class_v9,
296 	.num_of_watch_points = 4,
297 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
298 	.supports_cwsr = true,
299 	.needs_iommu_device = false,
300 	.needs_pci_atomics = false,
301 	.num_sdma_queues_per_engine = 2,
302 };
303 
304 static const struct kfd_device_info vega20_device_info = {
305 	.asic_name = "vega20",
306 	.gfx_target_version = 90006,
307 	.max_pasid_bits = 16,
308 	.max_no_of_hqd	= 24,
309 	.doorbell_size	= 8,
310 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
311 	.event_interrupt_class = &event_interrupt_class_v9,
312 	.num_of_watch_points = 4,
313 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
314 	.supports_cwsr = true,
315 	.needs_iommu_device = false,
316 	.needs_pci_atomics = false,
317 	.num_sdma_queues_per_engine = 8,
318 };
319 
320 static const struct kfd_device_info arcturus_device_info = {
321 	.asic_name = "arcturus",
322 	.gfx_target_version = 90008,
323 	.max_pasid_bits = 16,
324 	.max_no_of_hqd	= 24,
325 	.doorbell_size	= 8,
326 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
327 	.event_interrupt_class = &event_interrupt_class_v9,
328 	.num_of_watch_points = 4,
329 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
330 	.supports_cwsr = true,
331 	.needs_iommu_device = false,
332 	.needs_pci_atomics = false,
333 	.num_sdma_queues_per_engine = 8,
334 };
335 
336 static const struct kfd_device_info aldebaran_device_info = {
337 	.asic_name = "aldebaran",
338 	.gfx_target_version = 90010,
339 	.max_pasid_bits = 16,
340 	.max_no_of_hqd	= 24,
341 	.doorbell_size	= 8,
342 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
343 	.event_interrupt_class = &event_interrupt_class_v9,
344 	.num_of_watch_points = 4,
345 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
346 	.supports_cwsr = true,
347 	.needs_iommu_device = false,
348 	.needs_pci_atomics = false,
349 	.num_sdma_queues_per_engine = 8,
350 };
351 
352 static const struct kfd_device_info renoir_device_info = {
353 	.asic_name = "renoir",
354 	.gfx_target_version = 90012,
355 	.max_pasid_bits = 16,
356 	.max_no_of_hqd  = 24,
357 	.doorbell_size  = 8,
358 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
359 	.event_interrupt_class = &event_interrupt_class_v9,
360 	.num_of_watch_points = 4,
361 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
362 	.supports_cwsr = true,
363 	.needs_iommu_device = false,
364 	.needs_pci_atomics = false,
365 	.num_sdma_queues_per_engine = 2,
366 };
367 
368 static const struct kfd_device_info navi10_device_info = {
369 	.asic_name = "navi10",
370 	.gfx_target_version = 100100,
371 	.max_pasid_bits = 16,
372 	.max_no_of_hqd  = 24,
373 	.doorbell_size  = 8,
374 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
375 	.event_interrupt_class = &event_interrupt_class_v9,
376 	.num_of_watch_points = 4,
377 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
378 	.needs_iommu_device = false,
379 	.supports_cwsr = true,
380 	.needs_pci_atomics = true,
381 	.no_atomic_fw_version = 145,
382 	.num_sdma_queues_per_engine = 8,
383 };
384 
385 static const struct kfd_device_info navi12_device_info = {
386 	.asic_name = "navi12",
387 	.gfx_target_version = 100101,
388 	.max_pasid_bits = 16,
389 	.max_no_of_hqd  = 24,
390 	.doorbell_size  = 8,
391 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
392 	.event_interrupt_class = &event_interrupt_class_v9,
393 	.num_of_watch_points = 4,
394 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
395 	.needs_iommu_device = false,
396 	.supports_cwsr = true,
397 	.needs_pci_atomics = true,
398 	.no_atomic_fw_version = 145,
399 	.num_sdma_queues_per_engine = 8,
400 };
401 
402 static const struct kfd_device_info navi14_device_info = {
403 	.asic_name = "navi14",
404 	.gfx_target_version = 100102,
405 	.max_pasid_bits = 16,
406 	.max_no_of_hqd  = 24,
407 	.doorbell_size  = 8,
408 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
409 	.event_interrupt_class = &event_interrupt_class_v9,
410 	.num_of_watch_points = 4,
411 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
412 	.needs_iommu_device = false,
413 	.supports_cwsr = true,
414 	.needs_pci_atomics = true,
415 	.no_atomic_fw_version = 145,
416 	.num_sdma_queues_per_engine = 8,
417 };
418 
419 static const struct kfd_device_info sienna_cichlid_device_info = {
420 	.asic_name = "sienna_cichlid",
421 	.gfx_target_version = 100300,
422 	.max_pasid_bits = 16,
423 	.max_no_of_hqd  = 24,
424 	.doorbell_size  = 8,
425 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
426 	.event_interrupt_class = &event_interrupt_class_v9,
427 	.num_of_watch_points = 4,
428 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
429 	.needs_iommu_device = false,
430 	.supports_cwsr = true,
431 	.needs_pci_atomics = true,
432 	.no_atomic_fw_version = 92,
433 	.num_sdma_queues_per_engine = 8,
434 };
435 
436 static const struct kfd_device_info navy_flounder_device_info = {
437 	.asic_name = "navy_flounder",
438 	.gfx_target_version = 100301,
439 	.max_pasid_bits = 16,
440 	.max_no_of_hqd  = 24,
441 	.doorbell_size  = 8,
442 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
443 	.event_interrupt_class = &event_interrupt_class_v9,
444 	.num_of_watch_points = 4,
445 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
446 	.needs_iommu_device = false,
447 	.supports_cwsr = true,
448 	.needs_pci_atomics = true,
449 	.no_atomic_fw_version = 92,
450 	.num_sdma_queues_per_engine = 8,
451 };
452 
453 static const struct kfd_device_info vangogh_device_info = {
454 	.asic_name = "vangogh",
455 	.gfx_target_version = 100303,
456 	.max_pasid_bits = 16,
457 	.max_no_of_hqd  = 24,
458 	.doorbell_size  = 8,
459 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
460 	.event_interrupt_class = &event_interrupt_class_v9,
461 	.num_of_watch_points = 4,
462 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
463 	.needs_iommu_device = false,
464 	.supports_cwsr = true,
465 	.needs_pci_atomics = true,
466 	.no_atomic_fw_version = 92,
467 	.num_sdma_queues_per_engine = 2,
468 };
469 
470 static const struct kfd_device_info dimgrey_cavefish_device_info = {
471 	.asic_name = "dimgrey_cavefish",
472 	.gfx_target_version = 100302,
473 	.max_pasid_bits = 16,
474 	.max_no_of_hqd  = 24,
475 	.doorbell_size  = 8,
476 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
477 	.event_interrupt_class = &event_interrupt_class_v9,
478 	.num_of_watch_points = 4,
479 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
480 	.needs_iommu_device = false,
481 	.supports_cwsr = true,
482 	.needs_pci_atomics = true,
483 	.no_atomic_fw_version = 92,
484 	.num_sdma_queues_per_engine = 8,
485 };
486 
487 static const struct kfd_device_info beige_goby_device_info = {
488 	.asic_name = "beige_goby",
489 	.gfx_target_version = 100304,
490 	.max_pasid_bits = 16,
491 	.max_no_of_hqd  = 24,
492 	.doorbell_size  = 8,
493 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
494 	.event_interrupt_class = &event_interrupt_class_v9,
495 	.num_of_watch_points = 4,
496 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
497 	.needs_iommu_device = false,
498 	.supports_cwsr = true,
499 	.needs_pci_atomics = true,
500 	.no_atomic_fw_version = 92,
501 	.num_sdma_queues_per_engine = 8,
502 };
503 
504 static const struct kfd_device_info yellow_carp_device_info = {
505 	.asic_name = "yellow_carp",
506 	.gfx_target_version = 100305,
507 	.max_pasid_bits = 16,
508 	.max_no_of_hqd  = 24,
509 	.doorbell_size  = 8,
510 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
511 	.event_interrupt_class = &event_interrupt_class_v9,
512 	.num_of_watch_points = 4,
513 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
514 	.needs_iommu_device = false,
515 	.supports_cwsr = true,
516 	.needs_pci_atomics = true,
517 	.no_atomic_fw_version = 92,
518 	.num_sdma_queues_per_engine = 2,
519 };
520 
521 static const struct kfd_device_info cyan_skillfish_device_info = {
522 	.asic_name = "cyan_skillfish",
523 	.gfx_target_version = 100103,
524 	.max_pasid_bits = 16,
525 	.max_no_of_hqd  = 24,
526 	.doorbell_size  = 8,
527 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
528 	.event_interrupt_class = &event_interrupt_class_v9,
529 	.num_of_watch_points = 4,
530 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
531 	.needs_iommu_device = false,
532 	.supports_cwsr = true,
533 	.needs_pci_atomics = true,
534 	.num_sdma_queues_per_engine = 8,
535 };
536 
537 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
538 				unsigned int chunk_size);
539 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
540 
541 static int kfd_resume(struct kfd_dev *kfd);
542 
543 struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
544 {
545 	struct kfd_dev *kfd;
546 	const struct kfd_device_info *device_info;
547 	const struct kfd2kgd_calls *f2g;
548 	struct pci_dev *pdev = adev->pdev;
549 
550 	switch (adev->asic_type) {
551 #ifdef KFD_SUPPORT_IOMMU_V2
552 #ifdef CONFIG_DRM_AMDGPU_CIK
553 	case CHIP_KAVERI:
554 		if (vf)
555 			device_info = NULL;
556 		else
557 			device_info = &kaveri_device_info;
558 		f2g = &gfx_v7_kfd2kgd;
559 		break;
560 #endif
561 	case CHIP_CARRIZO:
562 		if (vf)
563 			device_info = NULL;
564 		else
565 			device_info = &carrizo_device_info;
566 		f2g = &gfx_v8_kfd2kgd;
567 		break;
568 #endif
569 #ifdef CONFIG_DRM_AMDGPU_CIK
570 	case CHIP_HAWAII:
571 		if (vf)
572 			device_info = NULL;
573 		else
574 			device_info = &hawaii_device_info;
575 		f2g = &gfx_v7_kfd2kgd;
576 		break;
577 #endif
578 	case CHIP_TONGA:
579 		if (vf)
580 			device_info = NULL;
581 		else
582 			device_info = &tonga_device_info;
583 		f2g = &gfx_v8_kfd2kgd;
584 		break;
585 	case CHIP_FIJI:
586 		if (vf)
587 			device_info = &fiji_vf_device_info;
588 		else
589 			device_info = &fiji_device_info;
590 		f2g = &gfx_v8_kfd2kgd;
591 		break;
592 	case CHIP_POLARIS10:
593 		if (vf)
594 			device_info = &polaris10_vf_device_info;
595 		else
596 			device_info = &polaris10_device_info;
597 		f2g = &gfx_v8_kfd2kgd;
598 		break;
599 	case CHIP_POLARIS11:
600 		if (vf)
601 			device_info = NULL;
602 		else
603 			device_info = &polaris11_device_info;
604 		f2g = &gfx_v8_kfd2kgd;
605 		break;
606 	case CHIP_POLARIS12:
607 		if (vf)
608 			device_info = NULL;
609 		else
610 			device_info = &polaris12_device_info;
611 		f2g = &gfx_v8_kfd2kgd;
612 		break;
613 	case CHIP_VEGAM:
614 		if (vf)
615 			device_info = NULL;
616 		else
617 			device_info = &vegam_device_info;
618 		f2g = &gfx_v8_kfd2kgd;
619 		break;
620 	default:
621 		switch (adev->ip_versions[GC_HWIP][0]) {
622 		case IP_VERSION(9, 0, 1):
623 			if (vf)
624 				device_info = &vega10_vf_device_info;
625 			else
626 				device_info = &vega10_device_info;
627 			f2g = &gfx_v9_kfd2kgd;
628 			break;
629 #ifdef KFD_SUPPORT_IOMMU_V2
630 		case IP_VERSION(9, 1, 0):
631 		case IP_VERSION(9, 2, 2):
632 			if (vf)
633 				device_info = NULL;
634 			else
635 				device_info = &raven_device_info;
636 			f2g = &gfx_v9_kfd2kgd;
637 			break;
638 #endif
639 		case IP_VERSION(9, 2, 1):
640 			if (vf)
641 				device_info = NULL;
642 			else
643 				device_info = &vega12_device_info;
644 			f2g = &gfx_v9_kfd2kgd;
645 			break;
646 		case IP_VERSION(9, 3, 0):
647 			if (vf)
648 				device_info = NULL;
649 			else
650 				device_info = &renoir_device_info;
651 			f2g = &gfx_v9_kfd2kgd;
652 			break;
653 		case IP_VERSION(9, 4, 0):
654 			if (vf)
655 				device_info = NULL;
656 			else
657 				device_info = &vega20_device_info;
658 			f2g = &gfx_v9_kfd2kgd;
659 			break;
660 		case IP_VERSION(9, 4, 1):
661 			device_info = &arcturus_device_info;
662 			f2g = &arcturus_kfd2kgd;
663 			break;
664 		case IP_VERSION(9, 4, 2):
665 			device_info = &aldebaran_device_info;
666 			f2g = &aldebaran_kfd2kgd;
667 			break;
668 		case IP_VERSION(10, 1, 10):
669 			if (vf)
670 				device_info = NULL;
671 			else
672 				device_info = &navi10_device_info;
673 			f2g = &gfx_v10_kfd2kgd;
674 			break;
675 		case IP_VERSION(10, 1, 2):
676 			device_info = &navi12_device_info;
677 			f2g = &gfx_v10_kfd2kgd;
678 			break;
679 		case IP_VERSION(10, 1, 1):
680 			if (vf)
681 				device_info = NULL;
682 			else
683 				device_info = &navi14_device_info;
684 			f2g = &gfx_v10_kfd2kgd;
685 			break;
686 		case IP_VERSION(10, 1, 3):
687 			if (vf)
688 				device_info = NULL;
689 			else
690 				device_info = &cyan_skillfish_device_info;
691 			f2g = &gfx_v10_kfd2kgd;
692 			break;
693 		case IP_VERSION(10, 3, 0):
694 			device_info = &sienna_cichlid_device_info;
695 			f2g = &gfx_v10_3_kfd2kgd;
696 			break;
697 		case IP_VERSION(10, 3, 2):
698 			device_info = &navy_flounder_device_info;
699 			f2g = &gfx_v10_3_kfd2kgd;
700 			break;
701 		case IP_VERSION(10, 3, 1):
702 			if (vf)
703 				device_info = NULL;
704 			else
705 				device_info = &vangogh_device_info;
706 			f2g = &gfx_v10_3_kfd2kgd;
707 			break;
708 		case IP_VERSION(10, 3, 4):
709 			device_info = &dimgrey_cavefish_device_info;
710 			f2g = &gfx_v10_3_kfd2kgd;
711 			break;
712 		case IP_VERSION(10, 3, 5):
713 			device_info = &beige_goby_device_info;
714 			f2g = &gfx_v10_3_kfd2kgd;
715 			break;
716 		case IP_VERSION(10, 3, 3):
717 			if (vf)
718 				device_info = NULL;
719 			else
720 				device_info = &yellow_carp_device_info;
721 			f2g = &gfx_v10_3_kfd2kgd;
722 			break;
723 		default:
724 			return NULL;
725 		}
726 		break;
727 	}
728 
729 	if (!device_info || !f2g) {
730 		if (adev->ip_versions[GC_HWIP][0])
731 			dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
732 				adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
733 		else
734 			dev_err(kfd_device, "%s %s not supported in kfd\n",
735 				amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
736 		return NULL;
737 	}
738 
739 	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
740 	if (!kfd)
741 		return NULL;
742 
743 	kfd->adev = adev;
744 	kfd->device_info = device_info;
745 	kfd->pdev = pdev;
746 	kfd->init_complete = false;
747 	kfd->kfd2kgd = f2g;
748 	atomic_set(&kfd->compute_profile, 0);
749 
750 	mutex_init(&kfd->doorbell_mutex);
751 	memset(&kfd->doorbell_available_index, 0,
752 		sizeof(kfd->doorbell_available_index));
753 
754 	atomic_set(&kfd->sram_ecc_flag, 0);
755 
756 	ida_init(&kfd->doorbell_ida);
757 
758 	return kfd;
759 }
760 
761 static void kfd_cwsr_init(struct kfd_dev *kfd)
762 {
763 	if (cwsr_enable && kfd->device_info->supports_cwsr) {
764 		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
765 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
766 			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
767 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
768 		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
769 			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
770 			kfd->cwsr_isa = cwsr_trap_arcturus_hex;
771 			kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
772 		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
773 			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
774 			kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
775 			kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
776 		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
777 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
778 			kfd->cwsr_isa = cwsr_trap_gfx9_hex;
779 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
780 		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
781 			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
782 			kfd->cwsr_isa = cwsr_trap_nv1x_hex;
783 			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
784 		} else {
785 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
786 			kfd->cwsr_isa = cwsr_trap_gfx10_hex;
787 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
788 		}
789 
790 		kfd->cwsr_enabled = true;
791 	}
792 }
793 
794 static int kfd_gws_init(struct kfd_dev *kfd)
795 {
796 	int ret = 0;
797 
798 	if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
799 		return 0;
800 
801 	if (hws_gws_support || (KFD_IS_SOC15(kfd) &&
802 		((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1)
803 			&& kfd->mec2_fw_version >= 0x81b3) ||
804 		(KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0)
805 			&& kfd->mec2_fw_version >= 0x1b3)  ||
806 		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)
807 			&& kfd->mec2_fw_version >= 0x30)   ||
808 		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)
809 			&& kfd->mec2_fw_version >= 0x28))))
810 		ret = amdgpu_amdkfd_alloc_gws(kfd->adev,
811 				kfd->adev->gds.gws_size, &kfd->gws);
812 
813 	return ret;
814 }
815 
816 static void kfd_smi_init(struct kfd_dev *dev) {
817 	INIT_LIST_HEAD(&dev->smi_clients);
818 	spin_lock_init(&dev->smi_lock);
819 }
820 
821 bool kgd2kfd_device_init(struct kfd_dev *kfd,
822 			 struct drm_device *ddev,
823 			 const struct kgd2kfd_shared_resources *gpu_resources)
824 {
825 	unsigned int size, map_process_packet_size;
826 
827 	kfd->ddev = ddev;
828 	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
829 			KGD_ENGINE_MEC1);
830 	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
831 			KGD_ENGINE_MEC2);
832 	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
833 			KGD_ENGINE_SDMA1);
834 	kfd->shared_resources = *gpu_resources;
835 
836 	kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
837 	kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
838 	kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
839 			- kfd->vm_info.first_vmid_kfd + 1;
840 
841 	/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
842 	 * 32 and 64-bit requests are possible and must be
843 	 * supported.
844 	 */
845 	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
846 	if (!kfd->pci_atomic_requested &&
847 	    kfd->device_info->needs_pci_atomics &&
848 	    (!kfd->device_info->no_atomic_fw_version ||
849 	     kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
850 		dev_info(kfd_device,
851 			 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
852 			 kfd->pdev->vendor, kfd->pdev->device,
853 			 kfd->mec_fw_version,
854 			 kfd->device_info->no_atomic_fw_version);
855 		return false;
856 	}
857 
858 	/* Verify module parameters regarding mapped process number*/
859 	if ((hws_max_conc_proc < 0)
860 			|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
861 		dev_err(kfd_device,
862 			"hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
863 			hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
864 			kfd->vm_info.vmid_num_kfd);
865 		kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
866 	} else
867 		kfd->max_proc_per_quantum = hws_max_conc_proc;
868 
869 	/* calculate max size of mqds needed for queues */
870 	size = max_num_of_queues_per_device *
871 			kfd->device_info->mqd_size_aligned;
872 
873 	/*
874 	 * calculate max size of runlist packet.
875 	 * There can be only 2 packets at once
876 	 */
877 	map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
878 				sizeof(struct pm4_mes_map_process_aldebaran) :
879 				sizeof(struct pm4_mes_map_process);
880 	size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
881 		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
882 		+ sizeof(struct pm4_mes_runlist)) * 2;
883 
884 	/* Add size of HIQ & DIQ */
885 	size += KFD_KERNEL_QUEUE_SIZE * 2;
886 
887 	/* add another 512KB for all other allocations on gart (HPD, fences) */
888 	size += 512 * 1024;
889 
890 	if (amdgpu_amdkfd_alloc_gtt_mem(
891 			kfd->adev, size, &kfd->gtt_mem,
892 			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
893 			false)) {
894 		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
895 		goto alloc_gtt_mem_failure;
896 	}
897 
898 	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
899 
900 	/* Initialize GTT sa with 512 byte chunk size */
901 	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
902 		dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
903 		goto kfd_gtt_sa_init_error;
904 	}
905 
906 	if (kfd_doorbell_init(kfd)) {
907 		dev_err(kfd_device,
908 			"Error initializing doorbell aperture\n");
909 		goto kfd_doorbell_error;
910 	}
911 
912 	kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
913 
914 	kfd->noretry = kfd->adev->gmc.noretry;
915 
916 	if (kfd_interrupt_init(kfd)) {
917 		dev_err(kfd_device, "Error initializing interrupts\n");
918 		goto kfd_interrupt_error;
919 	}
920 
921 	kfd->dqm = device_queue_manager_init(kfd);
922 	if (!kfd->dqm) {
923 		dev_err(kfd_device, "Error initializing queue manager\n");
924 		goto device_queue_manager_error;
925 	}
926 
927 	/* If supported on this device, allocate global GWS that is shared
928 	 * by all KFD processes
929 	 */
930 	if (kfd_gws_init(kfd)) {
931 		dev_err(kfd_device, "Could not allocate %d gws\n",
932 			kfd->adev->gds.gws_size);
933 		goto gws_error;
934 	}
935 
936 	/* If CRAT is broken, won't set iommu enabled */
937 	kfd_double_confirm_iommu_support(kfd);
938 
939 	if (kfd_iommu_device_init(kfd)) {
940 		kfd->use_iommu_v2 = false;
941 		dev_err(kfd_device, "Error initializing iommuv2\n");
942 		goto device_iommu_error;
943 	}
944 
945 	kfd_cwsr_init(kfd);
946 
947 	svm_migrate_init(kfd->adev);
948 
949 	if(kgd2kfd_resume_iommu(kfd))
950 		goto device_iommu_error;
951 
952 	if (kfd_resume(kfd))
953 		goto kfd_resume_error;
954 
955 	kfd->dbgmgr = NULL;
956 
957 	if (kfd_topology_add_device(kfd)) {
958 		dev_err(kfd_device, "Error adding device to topology\n");
959 		goto kfd_topology_add_device_error;
960 	}
961 
962 	kfd_smi_init(kfd);
963 
964 	kfd->init_complete = true;
965 	dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
966 		 kfd->pdev->device);
967 
968 	pr_debug("Starting kfd with the following scheduling policy %d\n",
969 		kfd->dqm->sched_policy);
970 
971 	goto out;
972 
973 kfd_topology_add_device_error:
974 kfd_resume_error:
975 device_iommu_error:
976 gws_error:
977 	device_queue_manager_uninit(kfd->dqm);
978 device_queue_manager_error:
979 	kfd_interrupt_exit(kfd);
980 kfd_interrupt_error:
981 	kfd_doorbell_fini(kfd);
982 kfd_doorbell_error:
983 	kfd_gtt_sa_fini(kfd);
984 kfd_gtt_sa_init_error:
985 	amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
986 alloc_gtt_mem_failure:
987 	if (kfd->gws)
988 		amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
989 	dev_err(kfd_device,
990 		"device %x:%x NOT added due to errors\n",
991 		kfd->pdev->vendor, kfd->pdev->device);
992 out:
993 	return kfd->init_complete;
994 }
995 
996 void kgd2kfd_device_exit(struct kfd_dev *kfd)
997 {
998 	if (kfd->init_complete) {
999 		device_queue_manager_uninit(kfd->dqm);
1000 		kfd_interrupt_exit(kfd);
1001 		kfd_topology_remove_device(kfd);
1002 		kfd_doorbell_fini(kfd);
1003 		ida_destroy(&kfd->doorbell_ida);
1004 		kfd_gtt_sa_fini(kfd);
1005 		amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
1006 		if (kfd->gws)
1007 			amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
1008 	}
1009 
1010 	kfree(kfd);
1011 }
1012 
1013 int kgd2kfd_pre_reset(struct kfd_dev *kfd)
1014 {
1015 	if (!kfd->init_complete)
1016 		return 0;
1017 
1018 	kfd_smi_event_update_gpu_reset(kfd, false);
1019 
1020 	kfd->dqm->ops.pre_reset(kfd->dqm);
1021 
1022 	kgd2kfd_suspend(kfd, false);
1023 
1024 	kfd_signal_reset_event(kfd);
1025 	return 0;
1026 }
1027 
1028 /*
1029  * Fix me. KFD won't be able to resume existing process for now.
1030  * We will keep all existing process in a evicted state and
1031  * wait the process to be terminated.
1032  */
1033 
1034 int kgd2kfd_post_reset(struct kfd_dev *kfd)
1035 {
1036 	int ret;
1037 
1038 	if (!kfd->init_complete)
1039 		return 0;
1040 
1041 	ret = kfd_resume(kfd);
1042 	if (ret)
1043 		return ret;
1044 	atomic_dec(&kfd_locked);
1045 
1046 	atomic_set(&kfd->sram_ecc_flag, 0);
1047 
1048 	kfd_smi_event_update_gpu_reset(kfd, true);
1049 
1050 	return 0;
1051 }
1052 
1053 bool kfd_is_locked(void)
1054 {
1055 	return  (atomic_read(&kfd_locked) > 0);
1056 }
1057 
1058 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
1059 {
1060 	if (!kfd->init_complete)
1061 		return;
1062 
1063 	/* for runtime suspend, skip locking kfd */
1064 	if (!run_pm) {
1065 		/* For first KFD device suspend all the KFD processes */
1066 		if (atomic_inc_return(&kfd_locked) == 1)
1067 			kfd_suspend_all_processes();
1068 	}
1069 
1070 	kfd->dqm->ops.stop(kfd->dqm);
1071 	kfd_iommu_suspend(kfd);
1072 }
1073 
1074 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
1075 {
1076 	int ret, count;
1077 
1078 	if (!kfd->init_complete)
1079 		return 0;
1080 
1081 	ret = kfd_resume(kfd);
1082 	if (ret)
1083 		return ret;
1084 
1085 	/* for runtime resume, skip unlocking kfd */
1086 	if (!run_pm) {
1087 		count = atomic_dec_return(&kfd_locked);
1088 		WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
1089 		if (count == 0)
1090 			ret = kfd_resume_all_processes();
1091 	}
1092 
1093 	return ret;
1094 }
1095 
1096 int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
1097 {
1098 	int err = 0;
1099 
1100 	err = kfd_iommu_resume(kfd);
1101 	if (err)
1102 		dev_err(kfd_device,
1103 			"Failed to resume IOMMU for device %x:%x\n",
1104 			kfd->pdev->vendor, kfd->pdev->device);
1105 	return err;
1106 }
1107 
1108 static int kfd_resume(struct kfd_dev *kfd)
1109 {
1110 	int err = 0;
1111 
1112 	err = kfd->dqm->ops.start(kfd->dqm);
1113 	if (err)
1114 		dev_err(kfd_device,
1115 			"Error starting queue manager for device %x:%x\n",
1116 			kfd->pdev->vendor, kfd->pdev->device);
1117 
1118 	return err;
1119 }
1120 
1121 static inline void kfd_queue_work(struct workqueue_struct *wq,
1122 				  struct work_struct *work)
1123 {
1124 	int cpu, new_cpu;
1125 
1126 	cpu = new_cpu = smp_processor_id();
1127 	do {
1128 		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
1129 		if (cpu_to_node(new_cpu) == numa_node_id())
1130 			break;
1131 	} while (cpu != new_cpu);
1132 
1133 	queue_work_on(new_cpu, wq, work);
1134 }
1135 
1136 /* This is called directly from KGD at ISR. */
1137 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1138 {
1139 	uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
1140 	bool is_patched = false;
1141 	unsigned long flags;
1142 
1143 	if (!kfd->init_complete)
1144 		return;
1145 
1146 	if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
1147 		dev_err_once(kfd_device, "Ring entry too small\n");
1148 		return;
1149 	}
1150 
1151 	spin_lock_irqsave(&kfd->interrupt_lock, flags);
1152 
1153 	if (kfd->interrupts_active
1154 	    && interrupt_is_wanted(kfd, ih_ring_entry,
1155 				   patched_ihre, &is_patched)
1156 	    && enqueue_ih_ring_entry(kfd,
1157 				     is_patched ? patched_ihre : ih_ring_entry))
1158 		kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
1159 
1160 	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
1161 }
1162 
1163 int kgd2kfd_quiesce_mm(struct mm_struct *mm)
1164 {
1165 	struct kfd_process *p;
1166 	int r;
1167 
1168 	/* Because we are called from arbitrary context (workqueue) as opposed
1169 	 * to process context, kfd_process could attempt to exit while we are
1170 	 * running so the lookup function increments the process ref count.
1171 	 */
1172 	p = kfd_lookup_process_by_mm(mm);
1173 	if (!p)
1174 		return -ESRCH;
1175 
1176 	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
1177 	r = kfd_process_evict_queues(p);
1178 
1179 	kfd_unref_process(p);
1180 	return r;
1181 }
1182 
1183 int kgd2kfd_resume_mm(struct mm_struct *mm)
1184 {
1185 	struct kfd_process *p;
1186 	int r;
1187 
1188 	/* Because we are called from arbitrary context (workqueue) as opposed
1189 	 * to process context, kfd_process could attempt to exit while we are
1190 	 * running so the lookup function increments the process ref count.
1191 	 */
1192 	p = kfd_lookup_process_by_mm(mm);
1193 	if (!p)
1194 		return -ESRCH;
1195 
1196 	r = kfd_process_restore_queues(p);
1197 
1198 	kfd_unref_process(p);
1199 	return r;
1200 }
1201 
1202 /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
1203  *   prepare for safe eviction of KFD BOs that belong to the specified
1204  *   process.
1205  *
1206  * @mm: mm_struct that identifies the specified KFD process
1207  * @fence: eviction fence attached to KFD process BOs
1208  *
1209  */
1210 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
1211 					       struct dma_fence *fence)
1212 {
1213 	struct kfd_process *p;
1214 	unsigned long active_time;
1215 	unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
1216 
1217 	if (!fence)
1218 		return -EINVAL;
1219 
1220 	if (dma_fence_is_signaled(fence))
1221 		return 0;
1222 
1223 	p = kfd_lookup_process_by_mm(mm);
1224 	if (!p)
1225 		return -ENODEV;
1226 
1227 	if (fence->seqno == p->last_eviction_seqno)
1228 		goto out;
1229 
1230 	p->last_eviction_seqno = fence->seqno;
1231 
1232 	/* Avoid KFD process starvation. Wait for at least
1233 	 * PROCESS_ACTIVE_TIME_MS before evicting the process again
1234 	 */
1235 	active_time = get_jiffies_64() - p->last_restore_timestamp;
1236 	if (delay_jiffies > active_time)
1237 		delay_jiffies -= active_time;
1238 	else
1239 		delay_jiffies = 0;
1240 
1241 	/* During process initialization eviction_work.dwork is initialized
1242 	 * to kfd_evict_bo_worker
1243 	 */
1244 	WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
1245 	     p->lead_thread->pid, delay_jiffies);
1246 	schedule_delayed_work(&p->eviction_work, delay_jiffies);
1247 out:
1248 	kfd_unref_process(p);
1249 	return 0;
1250 }
1251 
1252 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1253 				unsigned int chunk_size)
1254 {
1255 	unsigned int num_of_longs;
1256 
1257 	if (WARN_ON(buf_size < chunk_size))
1258 		return -EINVAL;
1259 	if (WARN_ON(buf_size == 0))
1260 		return -EINVAL;
1261 	if (WARN_ON(chunk_size == 0))
1262 		return -EINVAL;
1263 
1264 	kfd->gtt_sa_chunk_size = chunk_size;
1265 	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1266 
1267 	num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
1268 		BITS_PER_LONG;
1269 
1270 	kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
1271 
1272 	if (!kfd->gtt_sa_bitmap)
1273 		return -ENOMEM;
1274 
1275 	pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
1276 			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1277 
1278 	mutex_init(&kfd->gtt_sa_lock);
1279 
1280 	return 0;
1281 
1282 }
1283 
1284 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1285 {
1286 	mutex_destroy(&kfd->gtt_sa_lock);
1287 	kfree(kfd->gtt_sa_bitmap);
1288 }
1289 
1290 static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
1291 						unsigned int bit_num,
1292 						unsigned int chunk_size)
1293 {
1294 	return start_addr + bit_num * chunk_size;
1295 }
1296 
1297 static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
1298 						unsigned int bit_num,
1299 						unsigned int chunk_size)
1300 {
1301 	return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
1302 }
1303 
1304 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1305 			struct kfd_mem_obj **mem_obj)
1306 {
1307 	unsigned int found, start_search, cur_size;
1308 
1309 	if (size == 0)
1310 		return -EINVAL;
1311 
1312 	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1313 		return -ENOMEM;
1314 
1315 	*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
1316 	if (!(*mem_obj))
1317 		return -ENOMEM;
1318 
1319 	pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
1320 
1321 	start_search = 0;
1322 
1323 	mutex_lock(&kfd->gtt_sa_lock);
1324 
1325 kfd_gtt_restart_search:
1326 	/* Find the first chunk that is free */
1327 	found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1328 					kfd->gtt_sa_num_of_chunks,
1329 					start_search);
1330 
1331 	pr_debug("Found = %d\n", found);
1332 
1333 	/* If there wasn't any free chunk, bail out */
1334 	if (found == kfd->gtt_sa_num_of_chunks)
1335 		goto kfd_gtt_no_free_chunk;
1336 
1337 	/* Update fields of mem_obj */
1338 	(*mem_obj)->range_start = found;
1339 	(*mem_obj)->range_end = found;
1340 	(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
1341 					kfd->gtt_start_gpu_addr,
1342 					found,
1343 					kfd->gtt_sa_chunk_size);
1344 	(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
1345 					kfd->gtt_start_cpu_ptr,
1346 					found,
1347 					kfd->gtt_sa_chunk_size);
1348 
1349 	pr_debug("gpu_addr = %p, cpu_addr = %p\n",
1350 			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
1351 
1352 	/* If we need only one chunk, mark it as allocated and get out */
1353 	if (size <= kfd->gtt_sa_chunk_size) {
1354 		pr_debug("Single bit\n");
1355 		set_bit(found, kfd->gtt_sa_bitmap);
1356 		goto kfd_gtt_out;
1357 	}
1358 
1359 	/* Otherwise, try to see if we have enough contiguous chunks */
1360 	cur_size = size - kfd->gtt_sa_chunk_size;
1361 	do {
1362 		(*mem_obj)->range_end =
1363 			find_next_zero_bit(kfd->gtt_sa_bitmap,
1364 					kfd->gtt_sa_num_of_chunks, ++found);
1365 		/*
1366 		 * If next free chunk is not contiguous than we need to
1367 		 * restart our search from the last free chunk we found (which
1368 		 * wasn't contiguous to the previous ones
1369 		 */
1370 		if ((*mem_obj)->range_end != found) {
1371 			start_search = found;
1372 			goto kfd_gtt_restart_search;
1373 		}
1374 
1375 		/*
1376 		 * If we reached end of buffer, bail out with error
1377 		 */
1378 		if (found == kfd->gtt_sa_num_of_chunks)
1379 			goto kfd_gtt_no_free_chunk;
1380 
1381 		/* Check if we don't need another chunk */
1382 		if (cur_size <= kfd->gtt_sa_chunk_size)
1383 			cur_size = 0;
1384 		else
1385 			cur_size -= kfd->gtt_sa_chunk_size;
1386 
1387 	} while (cur_size > 0);
1388 
1389 	pr_debug("range_start = %d, range_end = %d\n",
1390 		(*mem_obj)->range_start, (*mem_obj)->range_end);
1391 
1392 	/* Mark the chunks as allocated */
1393 	for (found = (*mem_obj)->range_start;
1394 		found <= (*mem_obj)->range_end;
1395 		found++)
1396 		set_bit(found, kfd->gtt_sa_bitmap);
1397 
1398 kfd_gtt_out:
1399 	mutex_unlock(&kfd->gtt_sa_lock);
1400 	return 0;
1401 
1402 kfd_gtt_no_free_chunk:
1403 	pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
1404 	mutex_unlock(&kfd->gtt_sa_lock);
1405 	kfree(*mem_obj);
1406 	return -ENOMEM;
1407 }
1408 
1409 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1410 {
1411 	unsigned int bit;
1412 
1413 	/* Act like kfree when trying to free a NULL object */
1414 	if (!mem_obj)
1415 		return 0;
1416 
1417 	pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
1418 			mem_obj, mem_obj->range_start, mem_obj->range_end);
1419 
1420 	mutex_lock(&kfd->gtt_sa_lock);
1421 
1422 	/* Mark the chunks as free */
1423 	for (bit = mem_obj->range_start;
1424 		bit <= mem_obj->range_end;
1425 		bit++)
1426 		clear_bit(bit, kfd->gtt_sa_bitmap);
1427 
1428 	mutex_unlock(&kfd->gtt_sa_lock);
1429 
1430 	kfree(mem_obj);
1431 	return 0;
1432 }
1433 
1434 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1435 {
1436 	if (kfd)
1437 		atomic_inc(&kfd->sram_ecc_flag);
1438 }
1439 
1440 void kfd_inc_compute_active(struct kfd_dev *kfd)
1441 {
1442 	if (atomic_inc_return(&kfd->compute_profile) == 1)
1443 		amdgpu_amdkfd_set_compute_idle(kfd->adev, false);
1444 }
1445 
1446 void kfd_dec_compute_active(struct kfd_dev *kfd)
1447 {
1448 	int count = atomic_dec_return(&kfd->compute_profile);
1449 
1450 	if (count == 0)
1451 		amdgpu_amdkfd_set_compute_idle(kfd->adev, true);
1452 	WARN_ONCE(count < 0, "Compute profile ref. count error");
1453 }
1454 
1455 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1456 {
1457 	if (kfd && kfd->init_complete)
1458 		kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
1459 }
1460 
1461 /* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
1462  * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA.
1463  * When the device has more than two engines, we reserve two for PCIe to enable
1464  * full-duplex and the rest are used as XGMI.
1465  */
1466 unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev)
1467 {
1468 	/* If XGMI is not supported, all SDMA engines are PCIe */
1469 	if (!kdev->adev->gmc.xgmi.supported)
1470 		return kdev->adev->sdma.num_instances;
1471 
1472 	return min(kdev->adev->sdma.num_instances, 2);
1473 }
1474 
1475 unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev)
1476 {
1477 	/* After reserved for PCIe, the rest of engines are XGMI */
1478 	return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev);
1479 }
1480 
1481 #if defined(CONFIG_DEBUG_FS)
1482 
1483 /* This function will send a package to HIQ to hang the HWS
1484  * which will trigger a GPU reset and bring the HWS back to normal state
1485  */
1486 int kfd_debugfs_hang_hws(struct kfd_dev *dev)
1487 {
1488 	if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1489 		pr_err("HWS is not enabled");
1490 		return -EINVAL;
1491 	}
1492 
1493 	return dqm_debugfs_hang_hws(dev->dqm);
1494 }
1495 
1496 #endif
1497