xref: /linux/drivers/gpu/drm/amd/amdkfd/kfd_device.c (revision db5b5c679e6cad2bb147337af6c378d278231b45)
1 /*
2  * Copyright 2014 Advanced Micro Devices, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20  * OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <linux/bsearch.h>
24 #include <linux/pci.h>
25 #include <linux/slab.h>
26 #include "kfd_priv.h"
27 #include "kfd_device_queue_manager.h"
28 #include "kfd_pm4_headers_vi.h"
29 #include "kfd_pm4_headers_aldebaran.h"
30 #include "cwsr_trap_handler.h"
31 #include "kfd_iommu.h"
32 #include "amdgpu_amdkfd.h"
33 #include "kfd_smi_events.h"
34 #include "kfd_migrate.h"
35 #include "amdgpu.h"
36 
37 #define MQD_SIZE_ALIGNED 768
38 
39 /*
40  * kfd_locked is used to lock the kfd driver during suspend or reset
41  * once locked, kfd driver will stop any further GPU execution.
42  * create process (open) will return -EAGAIN.
43  */
44 static atomic_t kfd_locked = ATOMIC_INIT(0);
45 
46 #ifdef CONFIG_DRM_AMDGPU_CIK
47 extern const struct kfd2kgd_calls gfx_v7_kfd2kgd;
48 #endif
49 extern const struct kfd2kgd_calls gfx_v8_kfd2kgd;
50 extern const struct kfd2kgd_calls gfx_v9_kfd2kgd;
51 extern const struct kfd2kgd_calls arcturus_kfd2kgd;
52 extern const struct kfd2kgd_calls aldebaran_kfd2kgd;
53 extern const struct kfd2kgd_calls gfx_v10_kfd2kgd;
54 extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd;
55 
56 #ifdef KFD_SUPPORT_IOMMU_V2
57 static const struct kfd_device_info kaveri_device_info = {
58 	.asic_name = "kaveri",
59 	.gfx_target_version = 70000,
60 	.max_pasid_bits = 16,
61 	/* max num of queues for KV.TODO should be a dynamic value */
62 	.max_no_of_hqd	= 24,
63 	.doorbell_size  = 4,
64 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
65 	.event_interrupt_class = &event_interrupt_class_cik,
66 	.num_of_watch_points = 4,
67 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
68 	.supports_cwsr = false,
69 	.needs_iommu_device = true,
70 	.needs_pci_atomics = false,
71 	.num_sdma_engines = 2,
72 	.num_xgmi_sdma_engines = 0,
73 	.num_sdma_queues_per_engine = 2,
74 };
75 
76 static const struct kfd_device_info carrizo_device_info = {
77 	.asic_name = "carrizo",
78 	.gfx_target_version = 80001,
79 	.max_pasid_bits = 16,
80 	/* max num of queues for CZ.TODO should be a dynamic value */
81 	.max_no_of_hqd	= 24,
82 	.doorbell_size  = 4,
83 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
84 	.event_interrupt_class = &event_interrupt_class_cik,
85 	.num_of_watch_points = 4,
86 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
87 	.supports_cwsr = true,
88 	.needs_iommu_device = true,
89 	.needs_pci_atomics = false,
90 	.num_sdma_engines = 2,
91 	.num_xgmi_sdma_engines = 0,
92 	.num_sdma_queues_per_engine = 2,
93 };
94 
95 static const struct kfd_device_info raven_device_info = {
96 	.asic_name = "raven",
97 	.gfx_target_version = 90002,
98 	.max_pasid_bits = 16,
99 	.max_no_of_hqd  = 24,
100 	.doorbell_size  = 8,
101 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
102 	.event_interrupt_class = &event_interrupt_class_v9,
103 	.num_of_watch_points = 4,
104 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
105 	.supports_cwsr = true,
106 	.needs_iommu_device = true,
107 	.needs_pci_atomics = true,
108 	.num_sdma_engines = 1,
109 	.num_xgmi_sdma_engines = 0,
110 	.num_sdma_queues_per_engine = 2,
111 };
112 #endif
113 
114 #ifdef CONFIG_DRM_AMDGPU_CIK
115 static const struct kfd_device_info hawaii_device_info = {
116 	.asic_name = "hawaii",
117 	.gfx_target_version = 70001,
118 	.max_pasid_bits = 16,
119 	/* max num of queues for KV.TODO should be a dynamic value */
120 	.max_no_of_hqd	= 24,
121 	.doorbell_size  = 4,
122 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
123 	.event_interrupt_class = &event_interrupt_class_cik,
124 	.num_of_watch_points = 4,
125 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
126 	.supports_cwsr = false,
127 	.needs_iommu_device = false,
128 	.needs_pci_atomics = false,
129 	.num_sdma_engines = 2,
130 	.num_xgmi_sdma_engines = 0,
131 	.num_sdma_queues_per_engine = 2,
132 };
133 #endif
134 
135 static const struct kfd_device_info tonga_device_info = {
136 	.asic_name = "tonga",
137 	.gfx_target_version = 80002,
138 	.max_pasid_bits = 16,
139 	.max_no_of_hqd  = 24,
140 	.doorbell_size  = 4,
141 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
142 	.event_interrupt_class = &event_interrupt_class_cik,
143 	.num_of_watch_points = 4,
144 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
145 	.supports_cwsr = false,
146 	.needs_iommu_device = false,
147 	.needs_pci_atomics = true,
148 	.num_sdma_engines = 2,
149 	.num_xgmi_sdma_engines = 0,
150 	.num_sdma_queues_per_engine = 2,
151 };
152 
153 static const struct kfd_device_info fiji_device_info = {
154 	.asic_name = "fiji",
155 	.gfx_target_version = 80003,
156 	.max_pasid_bits = 16,
157 	.max_no_of_hqd  = 24,
158 	.doorbell_size  = 4,
159 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
160 	.event_interrupt_class = &event_interrupt_class_cik,
161 	.num_of_watch_points = 4,
162 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
163 	.supports_cwsr = true,
164 	.needs_iommu_device = false,
165 	.needs_pci_atomics = true,
166 	.num_sdma_engines = 2,
167 	.num_xgmi_sdma_engines = 0,
168 	.num_sdma_queues_per_engine = 2,
169 };
170 
171 static const struct kfd_device_info fiji_vf_device_info = {
172 	.asic_name = "fiji",
173 	.gfx_target_version = 80003,
174 	.max_pasid_bits = 16,
175 	.max_no_of_hqd  = 24,
176 	.doorbell_size  = 4,
177 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
178 	.event_interrupt_class = &event_interrupt_class_cik,
179 	.num_of_watch_points = 4,
180 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
181 	.supports_cwsr = true,
182 	.needs_iommu_device = false,
183 	.needs_pci_atomics = false,
184 	.num_sdma_engines = 2,
185 	.num_xgmi_sdma_engines = 0,
186 	.num_sdma_queues_per_engine = 2,
187 };
188 
189 
190 static const struct kfd_device_info polaris10_device_info = {
191 	.asic_name = "polaris10",
192 	.gfx_target_version = 80003,
193 	.max_pasid_bits = 16,
194 	.max_no_of_hqd  = 24,
195 	.doorbell_size  = 4,
196 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
197 	.event_interrupt_class = &event_interrupt_class_cik,
198 	.num_of_watch_points = 4,
199 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
200 	.supports_cwsr = true,
201 	.needs_iommu_device = false,
202 	.needs_pci_atomics = true,
203 	.num_sdma_engines = 2,
204 	.num_xgmi_sdma_engines = 0,
205 	.num_sdma_queues_per_engine = 2,
206 };
207 
208 static const struct kfd_device_info polaris10_vf_device_info = {
209 	.asic_name = "polaris10",
210 	.gfx_target_version = 80003,
211 	.max_pasid_bits = 16,
212 	.max_no_of_hqd  = 24,
213 	.doorbell_size  = 4,
214 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
215 	.event_interrupt_class = &event_interrupt_class_cik,
216 	.num_of_watch_points = 4,
217 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
218 	.supports_cwsr = true,
219 	.needs_iommu_device = false,
220 	.needs_pci_atomics = false,
221 	.num_sdma_engines = 2,
222 	.num_xgmi_sdma_engines = 0,
223 	.num_sdma_queues_per_engine = 2,
224 };
225 
226 static const struct kfd_device_info polaris11_device_info = {
227 	.asic_name = "polaris11",
228 	.gfx_target_version = 80003,
229 	.max_pasid_bits = 16,
230 	.max_no_of_hqd  = 24,
231 	.doorbell_size  = 4,
232 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
233 	.event_interrupt_class = &event_interrupt_class_cik,
234 	.num_of_watch_points = 4,
235 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
236 	.supports_cwsr = true,
237 	.needs_iommu_device = false,
238 	.needs_pci_atomics = true,
239 	.num_sdma_engines = 2,
240 	.num_xgmi_sdma_engines = 0,
241 	.num_sdma_queues_per_engine = 2,
242 };
243 
244 static const struct kfd_device_info polaris12_device_info = {
245 	.asic_name = "polaris12",
246 	.gfx_target_version = 80003,
247 	.max_pasid_bits = 16,
248 	.max_no_of_hqd  = 24,
249 	.doorbell_size  = 4,
250 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
251 	.event_interrupt_class = &event_interrupt_class_cik,
252 	.num_of_watch_points = 4,
253 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
254 	.supports_cwsr = true,
255 	.needs_iommu_device = false,
256 	.needs_pci_atomics = true,
257 	.num_sdma_engines = 2,
258 	.num_xgmi_sdma_engines = 0,
259 	.num_sdma_queues_per_engine = 2,
260 };
261 
262 static const struct kfd_device_info vegam_device_info = {
263 	.asic_name = "vegam",
264 	.gfx_target_version = 80003,
265 	.max_pasid_bits = 16,
266 	.max_no_of_hqd  = 24,
267 	.doorbell_size  = 4,
268 	.ih_ring_entry_size = 4 * sizeof(uint32_t),
269 	.event_interrupt_class = &event_interrupt_class_cik,
270 	.num_of_watch_points = 4,
271 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
272 	.supports_cwsr = true,
273 	.needs_iommu_device = false,
274 	.needs_pci_atomics = true,
275 	.num_sdma_engines = 2,
276 	.num_xgmi_sdma_engines = 0,
277 	.num_sdma_queues_per_engine = 2,
278 };
279 
280 static const struct kfd_device_info vega10_device_info = {
281 	.asic_name = "vega10",
282 	.gfx_target_version = 90000,
283 	.max_pasid_bits = 16,
284 	.max_no_of_hqd  = 24,
285 	.doorbell_size  = 8,
286 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
287 	.event_interrupt_class = &event_interrupt_class_v9,
288 	.num_of_watch_points = 4,
289 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
290 	.supports_cwsr = true,
291 	.needs_iommu_device = false,
292 	.needs_pci_atomics = false,
293 	.num_sdma_engines = 2,
294 	.num_xgmi_sdma_engines = 0,
295 	.num_sdma_queues_per_engine = 2,
296 };
297 
298 static const struct kfd_device_info vega10_vf_device_info = {
299 	.asic_name = "vega10",
300 	.gfx_target_version = 90000,
301 	.max_pasid_bits = 16,
302 	.max_no_of_hqd  = 24,
303 	.doorbell_size  = 8,
304 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
305 	.event_interrupt_class = &event_interrupt_class_v9,
306 	.num_of_watch_points = 4,
307 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
308 	.supports_cwsr = true,
309 	.needs_iommu_device = false,
310 	.needs_pci_atomics = false,
311 	.num_sdma_engines = 2,
312 	.num_xgmi_sdma_engines = 0,
313 	.num_sdma_queues_per_engine = 2,
314 };
315 
316 static const struct kfd_device_info vega12_device_info = {
317 	.asic_name = "vega12",
318 	.gfx_target_version = 90004,
319 	.max_pasid_bits = 16,
320 	.max_no_of_hqd  = 24,
321 	.doorbell_size  = 8,
322 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
323 	.event_interrupt_class = &event_interrupt_class_v9,
324 	.num_of_watch_points = 4,
325 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
326 	.supports_cwsr = true,
327 	.needs_iommu_device = false,
328 	.needs_pci_atomics = false,
329 	.num_sdma_engines = 2,
330 	.num_xgmi_sdma_engines = 0,
331 	.num_sdma_queues_per_engine = 2,
332 };
333 
334 static const struct kfd_device_info vega20_device_info = {
335 	.asic_name = "vega20",
336 	.gfx_target_version = 90006,
337 	.max_pasid_bits = 16,
338 	.max_no_of_hqd	= 24,
339 	.doorbell_size	= 8,
340 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
341 	.event_interrupt_class = &event_interrupt_class_v9,
342 	.num_of_watch_points = 4,
343 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
344 	.supports_cwsr = true,
345 	.needs_iommu_device = false,
346 	.needs_pci_atomics = false,
347 	.num_sdma_engines = 2,
348 	.num_xgmi_sdma_engines = 0,
349 	.num_sdma_queues_per_engine = 8,
350 };
351 
352 static const struct kfd_device_info arcturus_device_info = {
353 	.asic_name = "arcturus",
354 	.gfx_target_version = 90008,
355 	.max_pasid_bits = 16,
356 	.max_no_of_hqd	= 24,
357 	.doorbell_size	= 8,
358 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
359 	.event_interrupt_class = &event_interrupt_class_v9,
360 	.num_of_watch_points = 4,
361 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
362 	.supports_cwsr = true,
363 	.needs_iommu_device = false,
364 	.needs_pci_atomics = false,
365 	.num_sdma_engines = 2,
366 	.num_xgmi_sdma_engines = 6,
367 	.num_sdma_queues_per_engine = 8,
368 };
369 
370 static const struct kfd_device_info aldebaran_device_info = {
371 	.asic_name = "aldebaran",
372 	.gfx_target_version = 90010,
373 	.max_pasid_bits = 16,
374 	.max_no_of_hqd	= 24,
375 	.doorbell_size	= 8,
376 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
377 	.event_interrupt_class = &event_interrupt_class_v9,
378 	.num_of_watch_points = 4,
379 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
380 	.supports_cwsr = true,
381 	.needs_iommu_device = false,
382 	.needs_pci_atomics = false,
383 	.num_sdma_engines = 2,
384 	.num_xgmi_sdma_engines = 3,
385 	.num_sdma_queues_per_engine = 8,
386 };
387 
388 static const struct kfd_device_info renoir_device_info = {
389 	.asic_name = "renoir",
390 	.gfx_target_version = 90012,
391 	.max_pasid_bits = 16,
392 	.max_no_of_hqd  = 24,
393 	.doorbell_size  = 8,
394 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
395 	.event_interrupt_class = &event_interrupt_class_v9,
396 	.num_of_watch_points = 4,
397 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
398 	.supports_cwsr = true,
399 	.needs_iommu_device = false,
400 	.needs_pci_atomics = false,
401 	.num_sdma_engines = 1,
402 	.num_xgmi_sdma_engines = 0,
403 	.num_sdma_queues_per_engine = 2,
404 };
405 
406 static const struct kfd_device_info navi10_device_info = {
407 	.asic_name = "navi10",
408 	.gfx_target_version = 100100,
409 	.max_pasid_bits = 16,
410 	.max_no_of_hqd  = 24,
411 	.doorbell_size  = 8,
412 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
413 	.event_interrupt_class = &event_interrupt_class_v9,
414 	.num_of_watch_points = 4,
415 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
416 	.needs_iommu_device = false,
417 	.supports_cwsr = true,
418 	.needs_pci_atomics = true,
419 	.no_atomic_fw_version = 145,
420 	.num_sdma_engines = 2,
421 	.num_xgmi_sdma_engines = 0,
422 	.num_sdma_queues_per_engine = 8,
423 };
424 
425 static const struct kfd_device_info navi12_device_info = {
426 	.asic_name = "navi12",
427 	.gfx_target_version = 100101,
428 	.max_pasid_bits = 16,
429 	.max_no_of_hqd  = 24,
430 	.doorbell_size  = 8,
431 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
432 	.event_interrupt_class = &event_interrupt_class_v9,
433 	.num_of_watch_points = 4,
434 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
435 	.needs_iommu_device = false,
436 	.supports_cwsr = true,
437 	.needs_pci_atomics = true,
438 	.no_atomic_fw_version = 145,
439 	.num_sdma_engines = 2,
440 	.num_xgmi_sdma_engines = 0,
441 	.num_sdma_queues_per_engine = 8,
442 };
443 
444 static const struct kfd_device_info navi14_device_info = {
445 	.asic_name = "navi14",
446 	.gfx_target_version = 100102,
447 	.max_pasid_bits = 16,
448 	.max_no_of_hqd  = 24,
449 	.doorbell_size  = 8,
450 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
451 	.event_interrupt_class = &event_interrupt_class_v9,
452 	.num_of_watch_points = 4,
453 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
454 	.needs_iommu_device = false,
455 	.supports_cwsr = true,
456 	.needs_pci_atomics = true,
457 	.no_atomic_fw_version = 145,
458 	.num_sdma_engines = 2,
459 	.num_xgmi_sdma_engines = 0,
460 	.num_sdma_queues_per_engine = 8,
461 };
462 
463 static const struct kfd_device_info sienna_cichlid_device_info = {
464 	.asic_name = "sienna_cichlid",
465 	.gfx_target_version = 100300,
466 	.max_pasid_bits = 16,
467 	.max_no_of_hqd  = 24,
468 	.doorbell_size  = 8,
469 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
470 	.event_interrupt_class = &event_interrupt_class_v9,
471 	.num_of_watch_points = 4,
472 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
473 	.needs_iommu_device = false,
474 	.supports_cwsr = true,
475 	.needs_pci_atomics = true,
476 	.no_atomic_fw_version = 92,
477 	.num_sdma_engines = 4,
478 	.num_xgmi_sdma_engines = 0,
479 	.num_sdma_queues_per_engine = 8,
480 };
481 
482 static const struct kfd_device_info navy_flounder_device_info = {
483 	.asic_name = "navy_flounder",
484 	.gfx_target_version = 100301,
485 	.max_pasid_bits = 16,
486 	.max_no_of_hqd  = 24,
487 	.doorbell_size  = 8,
488 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
489 	.event_interrupt_class = &event_interrupt_class_v9,
490 	.num_of_watch_points = 4,
491 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
492 	.needs_iommu_device = false,
493 	.supports_cwsr = true,
494 	.needs_pci_atomics = true,
495 	.no_atomic_fw_version = 92,
496 	.num_sdma_engines = 2,
497 	.num_xgmi_sdma_engines = 0,
498 	.num_sdma_queues_per_engine = 8,
499 };
500 
501 static const struct kfd_device_info vangogh_device_info = {
502 	.asic_name = "vangogh",
503 	.gfx_target_version = 100303,
504 	.max_pasid_bits = 16,
505 	.max_no_of_hqd  = 24,
506 	.doorbell_size  = 8,
507 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
508 	.event_interrupt_class = &event_interrupt_class_v9,
509 	.num_of_watch_points = 4,
510 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
511 	.needs_iommu_device = false,
512 	.supports_cwsr = true,
513 	.needs_pci_atomics = true,
514 	.no_atomic_fw_version = 92,
515 	.num_sdma_engines = 1,
516 	.num_xgmi_sdma_engines = 0,
517 	.num_sdma_queues_per_engine = 2,
518 };
519 
520 static const struct kfd_device_info dimgrey_cavefish_device_info = {
521 	.asic_name = "dimgrey_cavefish",
522 	.gfx_target_version = 100302,
523 	.max_pasid_bits = 16,
524 	.max_no_of_hqd  = 24,
525 	.doorbell_size  = 8,
526 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
527 	.event_interrupt_class = &event_interrupt_class_v9,
528 	.num_of_watch_points = 4,
529 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
530 	.needs_iommu_device = false,
531 	.supports_cwsr = true,
532 	.needs_pci_atomics = true,
533 	.no_atomic_fw_version = 92,
534 	.num_sdma_engines = 2,
535 	.num_xgmi_sdma_engines = 0,
536 	.num_sdma_queues_per_engine = 8,
537 };
538 
539 static const struct kfd_device_info beige_goby_device_info = {
540 	.asic_name = "beige_goby",
541 	.gfx_target_version = 100304,
542 	.max_pasid_bits = 16,
543 	.max_no_of_hqd  = 24,
544 	.doorbell_size  = 8,
545 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
546 	.event_interrupt_class = &event_interrupt_class_v9,
547 	.num_of_watch_points = 4,
548 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
549 	.needs_iommu_device = false,
550 	.supports_cwsr = true,
551 	.needs_pci_atomics = true,
552 	.no_atomic_fw_version = 92,
553 	.num_sdma_engines = 1,
554 	.num_xgmi_sdma_engines = 0,
555 	.num_sdma_queues_per_engine = 8,
556 };
557 
558 static const struct kfd_device_info yellow_carp_device_info = {
559 	.asic_name = "yellow_carp",
560 	.gfx_target_version = 100305,
561 	.max_pasid_bits = 16,
562 	.max_no_of_hqd  = 24,
563 	.doorbell_size  = 8,
564 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
565 	.event_interrupt_class = &event_interrupt_class_v9,
566 	.num_of_watch_points = 4,
567 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
568 	.needs_iommu_device = false,
569 	.supports_cwsr = true,
570 	.needs_pci_atomics = true,
571 	.no_atomic_fw_version = 92,
572 	.num_sdma_engines = 1,
573 	.num_xgmi_sdma_engines = 0,
574 	.num_sdma_queues_per_engine = 2,
575 };
576 
577 static const struct kfd_device_info cyan_skillfish_device_info = {
578 	.asic_name = "cyan_skillfish",
579 	.gfx_target_version = 100103,
580 	.max_pasid_bits = 16,
581 	.max_no_of_hqd  = 24,
582 	.doorbell_size  = 8,
583 	.ih_ring_entry_size = 8 * sizeof(uint32_t),
584 	.event_interrupt_class = &event_interrupt_class_v9,
585 	.num_of_watch_points = 4,
586 	.mqd_size_aligned = MQD_SIZE_ALIGNED,
587 	.needs_iommu_device = false,
588 	.supports_cwsr = true,
589 	.needs_pci_atomics = true,
590 	.num_sdma_engines = 2,
591 	.num_xgmi_sdma_engines = 0,
592 	.num_sdma_queues_per_engine = 8,
593 };
594 
595 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
596 				unsigned int chunk_size);
597 static void kfd_gtt_sa_fini(struct kfd_dev *kfd);
598 
599 static int kfd_resume(struct kfd_dev *kfd);
600 
601 struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
602 {
603 	struct kfd_dev *kfd;
604 	const struct kfd_device_info *device_info;
605 	const struct kfd2kgd_calls *f2g;
606 	struct pci_dev *pdev = adev->pdev;
607 
608 	switch (adev->asic_type) {
609 #ifdef KFD_SUPPORT_IOMMU_V2
610 #ifdef CONFIG_DRM_AMDGPU_CIK
611 	case CHIP_KAVERI:
612 		if (vf)
613 			device_info = NULL;
614 		else
615 			device_info = &kaveri_device_info;
616 		f2g = &gfx_v7_kfd2kgd;
617 		break;
618 #endif
619 	case CHIP_CARRIZO:
620 		if (vf)
621 			device_info = NULL;
622 		else
623 			device_info = &carrizo_device_info;
624 		f2g = &gfx_v8_kfd2kgd;
625 		break;
626 #endif
627 #ifdef CONFIG_DRM_AMDGPU_CIK
628 	case CHIP_HAWAII:
629 		if (vf)
630 			device_info = NULL;
631 		else
632 			device_info = &hawaii_device_info;
633 		f2g = &gfx_v7_kfd2kgd;
634 		break;
635 #endif
636 	case CHIP_TONGA:
637 		if (vf)
638 			device_info = NULL;
639 		else
640 			device_info = &tonga_device_info;
641 		f2g = &gfx_v8_kfd2kgd;
642 		break;
643 	case CHIP_FIJI:
644 		if (vf)
645 			device_info = &fiji_vf_device_info;
646 		else
647 			device_info = &fiji_device_info;
648 		f2g = &gfx_v8_kfd2kgd;
649 		break;
650 	case CHIP_POLARIS10:
651 		if (vf)
652 			device_info = &polaris10_vf_device_info;
653 		else
654 			device_info = &polaris10_device_info;
655 		f2g = &gfx_v8_kfd2kgd;
656 		break;
657 	case CHIP_POLARIS11:
658 		if (vf)
659 			device_info = NULL;
660 		else
661 			device_info = &polaris11_device_info;
662 		f2g = &gfx_v8_kfd2kgd;
663 		break;
664 	case CHIP_POLARIS12:
665 		if (vf)
666 			device_info = NULL;
667 		else
668 			device_info = &polaris12_device_info;
669 		f2g = &gfx_v8_kfd2kgd;
670 		break;
671 	case CHIP_VEGAM:
672 		if (vf)
673 			device_info = NULL;
674 		else
675 			device_info = &vegam_device_info;
676 		f2g = &gfx_v8_kfd2kgd;
677 		break;
678 	default:
679 		switch (adev->ip_versions[GC_HWIP][0]) {
680 		case IP_VERSION(9, 0, 1):
681 			if (vf)
682 				device_info = &vega10_vf_device_info;
683 			else
684 				device_info = &vega10_device_info;
685 			f2g = &gfx_v9_kfd2kgd;
686 			break;
687 #ifdef KFD_SUPPORT_IOMMU_V2
688 		case IP_VERSION(9, 1, 0):
689 		case IP_VERSION(9, 2, 2):
690 			if (vf)
691 				device_info = NULL;
692 			else
693 				device_info = &raven_device_info;
694 			f2g = &gfx_v9_kfd2kgd;
695 			break;
696 #endif
697 		case IP_VERSION(9, 2, 1):
698 			if (vf)
699 				device_info = NULL;
700 			else
701 				device_info = &vega12_device_info;
702 			f2g = &gfx_v9_kfd2kgd;
703 			break;
704 		case IP_VERSION(9, 3, 0):
705 			if (vf)
706 				device_info = NULL;
707 			else
708 				device_info = &renoir_device_info;
709 			f2g = &gfx_v9_kfd2kgd;
710 			break;
711 		case IP_VERSION(9, 4, 0):
712 			if (vf)
713 				device_info = NULL;
714 			else
715 				device_info = &vega20_device_info;
716 			f2g = &gfx_v9_kfd2kgd;
717 			break;
718 		case IP_VERSION(9, 4, 1):
719 			device_info = &arcturus_device_info;
720 			f2g = &arcturus_kfd2kgd;
721 			break;
722 		case IP_VERSION(9, 4, 2):
723 			device_info = &aldebaran_device_info;
724 			f2g = &aldebaran_kfd2kgd;
725 			break;
726 		case IP_VERSION(10, 1, 10):
727 			if (vf)
728 				device_info = NULL;
729 			else
730 				device_info = &navi10_device_info;
731 			f2g = &gfx_v10_kfd2kgd;
732 			break;
733 		case IP_VERSION(10, 1, 2):
734 			device_info = &navi12_device_info;
735 			f2g = &gfx_v10_kfd2kgd;
736 			break;
737 		case IP_VERSION(10, 1, 1):
738 			if (vf)
739 				device_info = NULL;
740 			else
741 				device_info = &navi14_device_info;
742 			f2g = &gfx_v10_kfd2kgd;
743 			break;
744 		case IP_VERSION(10, 1, 3):
745 			if (vf)
746 				device_info = NULL;
747 			else
748 				device_info = &cyan_skillfish_device_info;
749 			f2g = &gfx_v10_kfd2kgd;
750 			break;
751 		case IP_VERSION(10, 3, 0):
752 			device_info = &sienna_cichlid_device_info;
753 			f2g = &gfx_v10_3_kfd2kgd;
754 			break;
755 		case IP_VERSION(10, 3, 2):
756 			device_info = &navy_flounder_device_info;
757 			f2g = &gfx_v10_3_kfd2kgd;
758 			break;
759 		case IP_VERSION(10, 3, 1):
760 			if (vf)
761 				device_info = NULL;
762 			else
763 				device_info = &vangogh_device_info;
764 			f2g = &gfx_v10_3_kfd2kgd;
765 			break;
766 		case IP_VERSION(10, 3, 4):
767 			device_info = &dimgrey_cavefish_device_info;
768 			f2g = &gfx_v10_3_kfd2kgd;
769 			break;
770 		case IP_VERSION(10, 3, 5):
771 			device_info = &beige_goby_device_info;
772 			f2g = &gfx_v10_3_kfd2kgd;
773 			break;
774 		case IP_VERSION(10, 3, 3):
775 			if (vf)
776 				device_info = NULL;
777 			else
778 				device_info = &yellow_carp_device_info;
779 			f2g = &gfx_v10_3_kfd2kgd;
780 			break;
781 		default:
782 			return NULL;
783 		}
784 		break;
785 	}
786 
787 	if (!device_info || !f2g) {
788 		if (adev->ip_versions[GC_HWIP][0])
789 			dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n",
790 				adev->ip_versions[GC_HWIP][0], vf ? "VF" : "");
791 		else
792 			dev_err(kfd_device, "%s %s not supported in kfd\n",
793 				amdgpu_asic_name[adev->asic_type], vf ? "VF" : "");
794 		return NULL;
795 	}
796 
797 	kfd = kzalloc(sizeof(*kfd), GFP_KERNEL);
798 	if (!kfd)
799 		return NULL;
800 
801 	kfd->adev = adev;
802 	kfd->device_info = device_info;
803 	kfd->pdev = pdev;
804 	kfd->init_complete = false;
805 	kfd->kfd2kgd = f2g;
806 	atomic_set(&kfd->compute_profile, 0);
807 
808 	mutex_init(&kfd->doorbell_mutex);
809 	memset(&kfd->doorbell_available_index, 0,
810 		sizeof(kfd->doorbell_available_index));
811 
812 	atomic_set(&kfd->sram_ecc_flag, 0);
813 
814 	ida_init(&kfd->doorbell_ida);
815 
816 	return kfd;
817 }
818 
819 static void kfd_cwsr_init(struct kfd_dev *kfd)
820 {
821 	if (cwsr_enable && kfd->device_info->supports_cwsr) {
822 		if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) {
823 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
824 			kfd->cwsr_isa = cwsr_trap_gfx8_hex;
825 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex);
826 		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) {
827 			BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE);
828 			kfd->cwsr_isa = cwsr_trap_arcturus_hex;
829 			kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex);
830 		} else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) {
831 			BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE);
832 			kfd->cwsr_isa = cwsr_trap_aldebaran_hex;
833 			kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex);
834 		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) {
835 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE);
836 			kfd->cwsr_isa = cwsr_trap_gfx9_hex;
837 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex);
838 		} else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) {
839 			BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE);
840 			kfd->cwsr_isa = cwsr_trap_nv1x_hex;
841 			kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex);
842 		} else {
843 			BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE);
844 			kfd->cwsr_isa = cwsr_trap_gfx10_hex;
845 			kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex);
846 		}
847 
848 		kfd->cwsr_enabled = true;
849 	}
850 }
851 
852 static int kfd_gws_init(struct kfd_dev *kfd)
853 {
854 	int ret = 0;
855 
856 	if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS)
857 		return 0;
858 
859 	if (hws_gws_support || (KFD_IS_SOC15(kfd) &&
860 		((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1)
861 			&& kfd->mec2_fw_version >= 0x81b3) ||
862 		(KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0)
863 			&& kfd->mec2_fw_version >= 0x1b3)  ||
864 		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)
865 			&& kfd->mec2_fw_version >= 0x30)   ||
866 		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)
867 			&& kfd->mec2_fw_version >= 0x28))))
868 		ret = amdgpu_amdkfd_alloc_gws(kfd->adev,
869 				kfd->adev->gds.gws_size, &kfd->gws);
870 
871 	return ret;
872 }
873 
874 static void kfd_smi_init(struct kfd_dev *dev) {
875 	INIT_LIST_HEAD(&dev->smi_clients);
876 	spin_lock_init(&dev->smi_lock);
877 }
878 
879 bool kgd2kfd_device_init(struct kfd_dev *kfd,
880 			 struct drm_device *ddev,
881 			 const struct kgd2kfd_shared_resources *gpu_resources)
882 {
883 	unsigned int size, map_process_packet_size;
884 
885 	kfd->ddev = ddev;
886 	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
887 			KGD_ENGINE_MEC1);
888 	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
889 			KGD_ENGINE_MEC2);
890 	kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
891 			KGD_ENGINE_SDMA1);
892 	kfd->shared_resources = *gpu_resources;
893 
894 	kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1;
895 	kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1;
896 	kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd
897 			- kfd->vm_info.first_vmid_kfd + 1;
898 
899 	/* Allow BIF to recode atomics to PCIe 3.0 AtomicOps.
900 	 * 32 and 64-bit requests are possible and must be
901 	 * supported.
902 	 */
903 	kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev);
904 	if (!kfd->pci_atomic_requested &&
905 	    kfd->device_info->needs_pci_atomics &&
906 	    (!kfd->device_info->no_atomic_fw_version ||
907 	     kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) {
908 		dev_info(kfd_device,
909 			 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
910 			 kfd->pdev->vendor, kfd->pdev->device,
911 			 kfd->mec_fw_version,
912 			 kfd->device_info->no_atomic_fw_version);
913 		return false;
914 	}
915 
916 	/* Verify module parameters regarding mapped process number*/
917 	if ((hws_max_conc_proc < 0)
918 			|| (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) {
919 		dev_err(kfd_device,
920 			"hws_max_conc_proc %d must be between 0 and %d, use %d instead\n",
921 			hws_max_conc_proc, kfd->vm_info.vmid_num_kfd,
922 			kfd->vm_info.vmid_num_kfd);
923 		kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd;
924 	} else
925 		kfd->max_proc_per_quantum = hws_max_conc_proc;
926 
927 	/* calculate max size of mqds needed for queues */
928 	size = max_num_of_queues_per_device *
929 			kfd->device_info->mqd_size_aligned;
930 
931 	/*
932 	 * calculate max size of runlist packet.
933 	 * There can be only 2 packets at once
934 	 */
935 	map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ?
936 				sizeof(struct pm4_mes_map_process_aldebaran) :
937 				sizeof(struct pm4_mes_map_process);
938 	size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size +
939 		max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues)
940 		+ sizeof(struct pm4_mes_runlist)) * 2;
941 
942 	/* Add size of HIQ & DIQ */
943 	size += KFD_KERNEL_QUEUE_SIZE * 2;
944 
945 	/* add another 512KB for all other allocations on gart (HPD, fences) */
946 	size += 512 * 1024;
947 
948 	if (amdgpu_amdkfd_alloc_gtt_mem(
949 			kfd->adev, size, &kfd->gtt_mem,
950 			&kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr,
951 			false)) {
952 		dev_err(kfd_device, "Could not allocate %d bytes\n", size);
953 		goto alloc_gtt_mem_failure;
954 	}
955 
956 	dev_info(kfd_device, "Allocated %d bytes on gart\n", size);
957 
958 	/* Initialize GTT sa with 512 byte chunk size */
959 	if (kfd_gtt_sa_init(kfd, size, 512) != 0) {
960 		dev_err(kfd_device, "Error initializing gtt sub-allocator\n");
961 		goto kfd_gtt_sa_init_error;
962 	}
963 
964 	if (kfd_doorbell_init(kfd)) {
965 		dev_err(kfd_device,
966 			"Error initializing doorbell aperture\n");
967 		goto kfd_doorbell_error;
968 	}
969 
970 	kfd->hive_id = kfd->adev->gmc.xgmi.hive_id;
971 
972 	kfd->noretry = kfd->adev->gmc.noretry;
973 
974 	if (kfd_interrupt_init(kfd)) {
975 		dev_err(kfd_device, "Error initializing interrupts\n");
976 		goto kfd_interrupt_error;
977 	}
978 
979 	kfd->dqm = device_queue_manager_init(kfd);
980 	if (!kfd->dqm) {
981 		dev_err(kfd_device, "Error initializing queue manager\n");
982 		goto device_queue_manager_error;
983 	}
984 
985 	/* If supported on this device, allocate global GWS that is shared
986 	 * by all KFD processes
987 	 */
988 	if (kfd_gws_init(kfd)) {
989 		dev_err(kfd_device, "Could not allocate %d gws\n",
990 			kfd->adev->gds.gws_size);
991 		goto gws_error;
992 	}
993 
994 	/* If CRAT is broken, won't set iommu enabled */
995 	kfd_double_confirm_iommu_support(kfd);
996 
997 	if (kfd_iommu_device_init(kfd)) {
998 		kfd->use_iommu_v2 = false;
999 		dev_err(kfd_device, "Error initializing iommuv2\n");
1000 		goto device_iommu_error;
1001 	}
1002 
1003 	kfd_cwsr_init(kfd);
1004 
1005 	svm_migrate_init(kfd->adev);
1006 
1007 	if(kgd2kfd_resume_iommu(kfd))
1008 		goto device_iommu_error;
1009 
1010 	if (kfd_resume(kfd))
1011 		goto kfd_resume_error;
1012 
1013 	kfd->dbgmgr = NULL;
1014 
1015 	if (kfd_topology_add_device(kfd)) {
1016 		dev_err(kfd_device, "Error adding device to topology\n");
1017 		goto kfd_topology_add_device_error;
1018 	}
1019 
1020 	kfd_smi_init(kfd);
1021 
1022 	kfd->init_complete = true;
1023 	dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
1024 		 kfd->pdev->device);
1025 
1026 	pr_debug("Starting kfd with the following scheduling policy %d\n",
1027 		kfd->dqm->sched_policy);
1028 
1029 	goto out;
1030 
1031 kfd_topology_add_device_error:
1032 kfd_resume_error:
1033 device_iommu_error:
1034 gws_error:
1035 	device_queue_manager_uninit(kfd->dqm);
1036 device_queue_manager_error:
1037 	kfd_interrupt_exit(kfd);
1038 kfd_interrupt_error:
1039 	kfd_doorbell_fini(kfd);
1040 kfd_doorbell_error:
1041 	kfd_gtt_sa_fini(kfd);
1042 kfd_gtt_sa_init_error:
1043 	amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
1044 alloc_gtt_mem_failure:
1045 	if (kfd->gws)
1046 		amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
1047 	dev_err(kfd_device,
1048 		"device %x:%x NOT added due to errors\n",
1049 		kfd->pdev->vendor, kfd->pdev->device);
1050 out:
1051 	return kfd->init_complete;
1052 }
1053 
1054 void kgd2kfd_device_exit(struct kfd_dev *kfd)
1055 {
1056 	if (kfd->init_complete) {
1057 		device_queue_manager_uninit(kfd->dqm);
1058 		kfd_interrupt_exit(kfd);
1059 		kfd_topology_remove_device(kfd);
1060 		kfd_doorbell_fini(kfd);
1061 		ida_destroy(&kfd->doorbell_ida);
1062 		kfd_gtt_sa_fini(kfd);
1063 		amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem);
1064 		if (kfd->gws)
1065 			amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
1066 	}
1067 
1068 	kfree(kfd);
1069 }
1070 
1071 int kgd2kfd_pre_reset(struct kfd_dev *kfd)
1072 {
1073 	if (!kfd->init_complete)
1074 		return 0;
1075 
1076 	kfd_smi_event_update_gpu_reset(kfd, false);
1077 
1078 	kfd->dqm->ops.pre_reset(kfd->dqm);
1079 
1080 	kgd2kfd_suspend(kfd, false);
1081 
1082 	kfd_signal_reset_event(kfd);
1083 	return 0;
1084 }
1085 
1086 /*
1087  * Fix me. KFD won't be able to resume existing process for now.
1088  * We will keep all existing process in a evicted state and
1089  * wait the process to be terminated.
1090  */
1091 
1092 int kgd2kfd_post_reset(struct kfd_dev *kfd)
1093 {
1094 	int ret;
1095 
1096 	if (!kfd->init_complete)
1097 		return 0;
1098 
1099 	ret = kfd_resume(kfd);
1100 	if (ret)
1101 		return ret;
1102 	atomic_dec(&kfd_locked);
1103 
1104 	atomic_set(&kfd->sram_ecc_flag, 0);
1105 
1106 	kfd_smi_event_update_gpu_reset(kfd, true);
1107 
1108 	return 0;
1109 }
1110 
1111 bool kfd_is_locked(void)
1112 {
1113 	return  (atomic_read(&kfd_locked) > 0);
1114 }
1115 
1116 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm)
1117 {
1118 	if (!kfd->init_complete)
1119 		return;
1120 
1121 	/* for runtime suspend, skip locking kfd */
1122 	if (!run_pm) {
1123 		/* For first KFD device suspend all the KFD processes */
1124 		if (atomic_inc_return(&kfd_locked) == 1)
1125 			kfd_suspend_all_processes();
1126 	}
1127 
1128 	kfd->dqm->ops.stop(kfd->dqm);
1129 	kfd_iommu_suspend(kfd);
1130 }
1131 
1132 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm)
1133 {
1134 	int ret, count;
1135 
1136 	if (!kfd->init_complete)
1137 		return 0;
1138 
1139 	ret = kfd_resume(kfd);
1140 	if (ret)
1141 		return ret;
1142 
1143 	/* for runtime resume, skip unlocking kfd */
1144 	if (!run_pm) {
1145 		count = atomic_dec_return(&kfd_locked);
1146 		WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
1147 		if (count == 0)
1148 			ret = kfd_resume_all_processes();
1149 	}
1150 
1151 	return ret;
1152 }
1153 
1154 int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
1155 {
1156 	int err = 0;
1157 
1158 	err = kfd_iommu_resume(kfd);
1159 	if (err)
1160 		dev_err(kfd_device,
1161 			"Failed to resume IOMMU for device %x:%x\n",
1162 			kfd->pdev->vendor, kfd->pdev->device);
1163 	return err;
1164 }
1165 
1166 static int kfd_resume(struct kfd_dev *kfd)
1167 {
1168 	int err = 0;
1169 
1170 	err = kfd->dqm->ops.start(kfd->dqm);
1171 	if (err)
1172 		dev_err(kfd_device,
1173 			"Error starting queue manager for device %x:%x\n",
1174 			kfd->pdev->vendor, kfd->pdev->device);
1175 
1176 	return err;
1177 }
1178 
1179 static inline void kfd_queue_work(struct workqueue_struct *wq,
1180 				  struct work_struct *work)
1181 {
1182 	int cpu, new_cpu;
1183 
1184 	cpu = new_cpu = smp_processor_id();
1185 	do {
1186 		new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids;
1187 		if (cpu_to_node(new_cpu) == numa_node_id())
1188 			break;
1189 	} while (cpu != new_cpu);
1190 
1191 	queue_work_on(new_cpu, wq, work);
1192 }
1193 
1194 /* This is called directly from KGD at ISR. */
1195 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
1196 {
1197 	uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE];
1198 	bool is_patched = false;
1199 	unsigned long flags;
1200 
1201 	if (!kfd->init_complete)
1202 		return;
1203 
1204 	if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) {
1205 		dev_err_once(kfd_device, "Ring entry too small\n");
1206 		return;
1207 	}
1208 
1209 	spin_lock_irqsave(&kfd->interrupt_lock, flags);
1210 
1211 	if (kfd->interrupts_active
1212 	    && interrupt_is_wanted(kfd, ih_ring_entry,
1213 				   patched_ihre, &is_patched)
1214 	    && enqueue_ih_ring_entry(kfd,
1215 				     is_patched ? patched_ihre : ih_ring_entry))
1216 		kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work);
1217 
1218 	spin_unlock_irqrestore(&kfd->interrupt_lock, flags);
1219 }
1220 
1221 int kgd2kfd_quiesce_mm(struct mm_struct *mm)
1222 {
1223 	struct kfd_process *p;
1224 	int r;
1225 
1226 	/* Because we are called from arbitrary context (workqueue) as opposed
1227 	 * to process context, kfd_process could attempt to exit while we are
1228 	 * running so the lookup function increments the process ref count.
1229 	 */
1230 	p = kfd_lookup_process_by_mm(mm);
1231 	if (!p)
1232 		return -ESRCH;
1233 
1234 	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
1235 	r = kfd_process_evict_queues(p);
1236 
1237 	kfd_unref_process(p);
1238 	return r;
1239 }
1240 
1241 int kgd2kfd_resume_mm(struct mm_struct *mm)
1242 {
1243 	struct kfd_process *p;
1244 	int r;
1245 
1246 	/* Because we are called from arbitrary context (workqueue) as opposed
1247 	 * to process context, kfd_process could attempt to exit while we are
1248 	 * running so the lookup function increments the process ref count.
1249 	 */
1250 	p = kfd_lookup_process_by_mm(mm);
1251 	if (!p)
1252 		return -ESRCH;
1253 
1254 	r = kfd_process_restore_queues(p);
1255 
1256 	kfd_unref_process(p);
1257 	return r;
1258 }
1259 
1260 /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
1261  *   prepare for safe eviction of KFD BOs that belong to the specified
1262  *   process.
1263  *
1264  * @mm: mm_struct that identifies the specified KFD process
1265  * @fence: eviction fence attached to KFD process BOs
1266  *
1267  */
1268 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
1269 					       struct dma_fence *fence)
1270 {
1271 	struct kfd_process *p;
1272 	unsigned long active_time;
1273 	unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
1274 
1275 	if (!fence)
1276 		return -EINVAL;
1277 
1278 	if (dma_fence_is_signaled(fence))
1279 		return 0;
1280 
1281 	p = kfd_lookup_process_by_mm(mm);
1282 	if (!p)
1283 		return -ENODEV;
1284 
1285 	if (fence->seqno == p->last_eviction_seqno)
1286 		goto out;
1287 
1288 	p->last_eviction_seqno = fence->seqno;
1289 
1290 	/* Avoid KFD process starvation. Wait for at least
1291 	 * PROCESS_ACTIVE_TIME_MS before evicting the process again
1292 	 */
1293 	active_time = get_jiffies_64() - p->last_restore_timestamp;
1294 	if (delay_jiffies > active_time)
1295 		delay_jiffies -= active_time;
1296 	else
1297 		delay_jiffies = 0;
1298 
1299 	/* During process initialization eviction_work.dwork is initialized
1300 	 * to kfd_evict_bo_worker
1301 	 */
1302 	WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies",
1303 	     p->lead_thread->pid, delay_jiffies);
1304 	schedule_delayed_work(&p->eviction_work, delay_jiffies);
1305 out:
1306 	kfd_unref_process(p);
1307 	return 0;
1308 }
1309 
1310 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
1311 				unsigned int chunk_size)
1312 {
1313 	unsigned int num_of_longs;
1314 
1315 	if (WARN_ON(buf_size < chunk_size))
1316 		return -EINVAL;
1317 	if (WARN_ON(buf_size == 0))
1318 		return -EINVAL;
1319 	if (WARN_ON(chunk_size == 0))
1320 		return -EINVAL;
1321 
1322 	kfd->gtt_sa_chunk_size = chunk_size;
1323 	kfd->gtt_sa_num_of_chunks = buf_size / chunk_size;
1324 
1325 	num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) /
1326 		BITS_PER_LONG;
1327 
1328 	kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL);
1329 
1330 	if (!kfd->gtt_sa_bitmap)
1331 		return -ENOMEM;
1332 
1333 	pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n",
1334 			kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap);
1335 
1336 	mutex_init(&kfd->gtt_sa_lock);
1337 
1338 	return 0;
1339 
1340 }
1341 
1342 static void kfd_gtt_sa_fini(struct kfd_dev *kfd)
1343 {
1344 	mutex_destroy(&kfd->gtt_sa_lock);
1345 	kfree(kfd->gtt_sa_bitmap);
1346 }
1347 
1348 static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr,
1349 						unsigned int bit_num,
1350 						unsigned int chunk_size)
1351 {
1352 	return start_addr + bit_num * chunk_size;
1353 }
1354 
1355 static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr,
1356 						unsigned int bit_num,
1357 						unsigned int chunk_size)
1358 {
1359 	return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size);
1360 }
1361 
1362 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size,
1363 			struct kfd_mem_obj **mem_obj)
1364 {
1365 	unsigned int found, start_search, cur_size;
1366 
1367 	if (size == 0)
1368 		return -EINVAL;
1369 
1370 	if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size)
1371 		return -ENOMEM;
1372 
1373 	*mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL);
1374 	if (!(*mem_obj))
1375 		return -ENOMEM;
1376 
1377 	pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size);
1378 
1379 	start_search = 0;
1380 
1381 	mutex_lock(&kfd->gtt_sa_lock);
1382 
1383 kfd_gtt_restart_search:
1384 	/* Find the first chunk that is free */
1385 	found = find_next_zero_bit(kfd->gtt_sa_bitmap,
1386 					kfd->gtt_sa_num_of_chunks,
1387 					start_search);
1388 
1389 	pr_debug("Found = %d\n", found);
1390 
1391 	/* If there wasn't any free chunk, bail out */
1392 	if (found == kfd->gtt_sa_num_of_chunks)
1393 		goto kfd_gtt_no_free_chunk;
1394 
1395 	/* Update fields of mem_obj */
1396 	(*mem_obj)->range_start = found;
1397 	(*mem_obj)->range_end = found;
1398 	(*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr(
1399 					kfd->gtt_start_gpu_addr,
1400 					found,
1401 					kfd->gtt_sa_chunk_size);
1402 	(*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr(
1403 					kfd->gtt_start_cpu_ptr,
1404 					found,
1405 					kfd->gtt_sa_chunk_size);
1406 
1407 	pr_debug("gpu_addr = %p, cpu_addr = %p\n",
1408 			(uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr);
1409 
1410 	/* If we need only one chunk, mark it as allocated and get out */
1411 	if (size <= kfd->gtt_sa_chunk_size) {
1412 		pr_debug("Single bit\n");
1413 		set_bit(found, kfd->gtt_sa_bitmap);
1414 		goto kfd_gtt_out;
1415 	}
1416 
1417 	/* Otherwise, try to see if we have enough contiguous chunks */
1418 	cur_size = size - kfd->gtt_sa_chunk_size;
1419 	do {
1420 		(*mem_obj)->range_end =
1421 			find_next_zero_bit(kfd->gtt_sa_bitmap,
1422 					kfd->gtt_sa_num_of_chunks, ++found);
1423 		/*
1424 		 * If next free chunk is not contiguous than we need to
1425 		 * restart our search from the last free chunk we found (which
1426 		 * wasn't contiguous to the previous ones
1427 		 */
1428 		if ((*mem_obj)->range_end != found) {
1429 			start_search = found;
1430 			goto kfd_gtt_restart_search;
1431 		}
1432 
1433 		/*
1434 		 * If we reached end of buffer, bail out with error
1435 		 */
1436 		if (found == kfd->gtt_sa_num_of_chunks)
1437 			goto kfd_gtt_no_free_chunk;
1438 
1439 		/* Check if we don't need another chunk */
1440 		if (cur_size <= kfd->gtt_sa_chunk_size)
1441 			cur_size = 0;
1442 		else
1443 			cur_size -= kfd->gtt_sa_chunk_size;
1444 
1445 	} while (cur_size > 0);
1446 
1447 	pr_debug("range_start = %d, range_end = %d\n",
1448 		(*mem_obj)->range_start, (*mem_obj)->range_end);
1449 
1450 	/* Mark the chunks as allocated */
1451 	for (found = (*mem_obj)->range_start;
1452 		found <= (*mem_obj)->range_end;
1453 		found++)
1454 		set_bit(found, kfd->gtt_sa_bitmap);
1455 
1456 kfd_gtt_out:
1457 	mutex_unlock(&kfd->gtt_sa_lock);
1458 	return 0;
1459 
1460 kfd_gtt_no_free_chunk:
1461 	pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj);
1462 	mutex_unlock(&kfd->gtt_sa_lock);
1463 	kfree(*mem_obj);
1464 	return -ENOMEM;
1465 }
1466 
1467 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj)
1468 {
1469 	unsigned int bit;
1470 
1471 	/* Act like kfree when trying to free a NULL object */
1472 	if (!mem_obj)
1473 		return 0;
1474 
1475 	pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n",
1476 			mem_obj, mem_obj->range_start, mem_obj->range_end);
1477 
1478 	mutex_lock(&kfd->gtt_sa_lock);
1479 
1480 	/* Mark the chunks as free */
1481 	for (bit = mem_obj->range_start;
1482 		bit <= mem_obj->range_end;
1483 		bit++)
1484 		clear_bit(bit, kfd->gtt_sa_bitmap);
1485 
1486 	mutex_unlock(&kfd->gtt_sa_lock);
1487 
1488 	kfree(mem_obj);
1489 	return 0;
1490 }
1491 
1492 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd)
1493 {
1494 	if (kfd)
1495 		atomic_inc(&kfd->sram_ecc_flag);
1496 }
1497 
1498 void kfd_inc_compute_active(struct kfd_dev *kfd)
1499 {
1500 	if (atomic_inc_return(&kfd->compute_profile) == 1)
1501 		amdgpu_amdkfd_set_compute_idle(kfd->adev, false);
1502 }
1503 
1504 void kfd_dec_compute_active(struct kfd_dev *kfd)
1505 {
1506 	int count = atomic_dec_return(&kfd->compute_profile);
1507 
1508 	if (count == 0)
1509 		amdgpu_amdkfd_set_compute_idle(kfd->adev, true);
1510 	WARN_ONCE(count < 0, "Compute profile ref. count error");
1511 }
1512 
1513 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask)
1514 {
1515 	if (kfd && kfd->init_complete)
1516 		kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask);
1517 }
1518 
1519 /* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and
1520  * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA.
1521  * When the device has more than two engines, we reserve two for PCIe to enable
1522  * full-duplex and the rest are used as XGMI.
1523  */
1524 unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev)
1525 {
1526 	/* If XGMI is not supported, all SDMA engines are PCIe */
1527 	if (!kdev->adev->gmc.xgmi.supported)
1528 		return kdev->adev->sdma.num_instances;
1529 
1530 	return min(kdev->adev->sdma.num_instances, 2);
1531 }
1532 
1533 unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev)
1534 {
1535 	/* After reserved for PCIe, the rest of engines are XGMI */
1536 	return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev);
1537 }
1538 
1539 #if defined(CONFIG_DEBUG_FS)
1540 
1541 /* This function will send a package to HIQ to hang the HWS
1542  * which will trigger a GPU reset and bring the HWS back to normal state
1543  */
1544 int kfd_debugfs_hang_hws(struct kfd_dev *dev)
1545 {
1546 	if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) {
1547 		pr_err("HWS is not enabled");
1548 		return -EINVAL;
1549 	}
1550 
1551 	return dqm_debugfs_hang_hws(dev->dqm);
1552 }
1553 
1554 #endif
1555