1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/bsearch.h> 24 #include <linux/pci.h> 25 #include <linux/slab.h> 26 #include "kfd_priv.h" 27 #include "kfd_device_queue_manager.h" 28 #include "kfd_pm4_headers_vi.h" 29 #include "kfd_pm4_headers_aldebaran.h" 30 #include "cwsr_trap_handler.h" 31 #include "kfd_iommu.h" 32 #include "amdgpu_amdkfd.h" 33 #include "kfd_smi_events.h" 34 #include "kfd_migrate.h" 35 #include "amdgpu.h" 36 37 #define MQD_SIZE_ALIGNED 768 38 39 /* 40 * kfd_locked is used to lock the kfd driver during suspend or reset 41 * once locked, kfd driver will stop any further GPU execution. 42 * create process (open) will return -EAGAIN. 43 */ 44 static atomic_t kfd_locked = ATOMIC_INIT(0); 45 46 #ifdef CONFIG_DRM_AMDGPU_CIK 47 extern const struct kfd2kgd_calls gfx_v7_kfd2kgd; 48 #endif 49 extern const struct kfd2kgd_calls gfx_v8_kfd2kgd; 50 extern const struct kfd2kgd_calls gfx_v9_kfd2kgd; 51 extern const struct kfd2kgd_calls arcturus_kfd2kgd; 52 extern const struct kfd2kgd_calls aldebaran_kfd2kgd; 53 extern const struct kfd2kgd_calls gfx_v10_kfd2kgd; 54 extern const struct kfd2kgd_calls gfx_v10_3_kfd2kgd; 55 56 #ifdef KFD_SUPPORT_IOMMU_V2 57 static const struct kfd_device_info kaveri_device_info = { 58 .asic_name = "kaveri", 59 .gfx_target_version = 70000, 60 .max_pasid_bits = 16, 61 /* max num of queues for KV.TODO should be a dynamic value */ 62 .max_no_of_hqd = 24, 63 .doorbell_size = 4, 64 .ih_ring_entry_size = 4 * sizeof(uint32_t), 65 .event_interrupt_class = &event_interrupt_class_cik, 66 .num_of_watch_points = 4, 67 .mqd_size_aligned = MQD_SIZE_ALIGNED, 68 .supports_cwsr = false, 69 .needs_iommu_device = true, 70 .needs_pci_atomics = false, 71 .num_sdma_queues_per_engine = 2, 72 }; 73 74 static const struct kfd_device_info carrizo_device_info = { 75 .asic_name = "carrizo", 76 .gfx_target_version = 80001, 77 .max_pasid_bits = 16, 78 /* max num of queues for CZ.TODO should be a dynamic value */ 79 .max_no_of_hqd = 24, 80 .doorbell_size = 4, 81 .ih_ring_entry_size = 4 * sizeof(uint32_t), 82 .event_interrupt_class = &event_interrupt_class_cik, 83 .num_of_watch_points = 4, 84 .mqd_size_aligned = MQD_SIZE_ALIGNED, 85 .supports_cwsr = true, 86 .needs_iommu_device = true, 87 .needs_pci_atomics = false, 88 .num_sdma_queues_per_engine = 2, 89 }; 90 91 static const struct kfd_device_info raven_device_info = { 92 .asic_name = "raven", 93 .gfx_target_version = 90002, 94 .max_pasid_bits = 16, 95 .max_no_of_hqd = 24, 96 .doorbell_size = 8, 97 .ih_ring_entry_size = 8 * sizeof(uint32_t), 98 .event_interrupt_class = &event_interrupt_class_v9, 99 .num_of_watch_points = 4, 100 .mqd_size_aligned = MQD_SIZE_ALIGNED, 101 .supports_cwsr = true, 102 .needs_iommu_device = true, 103 .needs_pci_atomics = true, 104 .num_sdma_queues_per_engine = 2, 105 }; 106 #endif 107 108 #ifdef CONFIG_DRM_AMDGPU_CIK 109 static const struct kfd_device_info hawaii_device_info = { 110 .asic_name = "hawaii", 111 .gfx_target_version = 70001, 112 .max_pasid_bits = 16, 113 /* max num of queues for KV.TODO should be a dynamic value */ 114 .max_no_of_hqd = 24, 115 .doorbell_size = 4, 116 .ih_ring_entry_size = 4 * sizeof(uint32_t), 117 .event_interrupt_class = &event_interrupt_class_cik, 118 .num_of_watch_points = 4, 119 .mqd_size_aligned = MQD_SIZE_ALIGNED, 120 .supports_cwsr = false, 121 .needs_iommu_device = false, 122 .needs_pci_atomics = false, 123 .num_sdma_queues_per_engine = 2, 124 }; 125 #endif 126 127 static const struct kfd_device_info tonga_device_info = { 128 .asic_name = "tonga", 129 .gfx_target_version = 80002, 130 .max_pasid_bits = 16, 131 .max_no_of_hqd = 24, 132 .doorbell_size = 4, 133 .ih_ring_entry_size = 4 * sizeof(uint32_t), 134 .event_interrupt_class = &event_interrupt_class_cik, 135 .num_of_watch_points = 4, 136 .mqd_size_aligned = MQD_SIZE_ALIGNED, 137 .supports_cwsr = false, 138 .needs_iommu_device = false, 139 .needs_pci_atomics = true, 140 .num_sdma_queues_per_engine = 2, 141 }; 142 143 static const struct kfd_device_info fiji_device_info = { 144 .asic_name = "fiji", 145 .gfx_target_version = 80003, 146 .max_pasid_bits = 16, 147 .max_no_of_hqd = 24, 148 .doorbell_size = 4, 149 .ih_ring_entry_size = 4 * sizeof(uint32_t), 150 .event_interrupt_class = &event_interrupt_class_cik, 151 .num_of_watch_points = 4, 152 .mqd_size_aligned = MQD_SIZE_ALIGNED, 153 .supports_cwsr = true, 154 .needs_iommu_device = false, 155 .needs_pci_atomics = true, 156 .num_sdma_queues_per_engine = 2, 157 }; 158 159 static const struct kfd_device_info fiji_vf_device_info = { 160 .asic_name = "fiji", 161 .gfx_target_version = 80003, 162 .max_pasid_bits = 16, 163 .max_no_of_hqd = 24, 164 .doorbell_size = 4, 165 .ih_ring_entry_size = 4 * sizeof(uint32_t), 166 .event_interrupt_class = &event_interrupt_class_cik, 167 .num_of_watch_points = 4, 168 .mqd_size_aligned = MQD_SIZE_ALIGNED, 169 .supports_cwsr = true, 170 .needs_iommu_device = false, 171 .needs_pci_atomics = false, 172 .num_sdma_queues_per_engine = 2, 173 }; 174 175 176 static const struct kfd_device_info polaris10_device_info = { 177 .asic_name = "polaris10", 178 .gfx_target_version = 80003, 179 .max_pasid_bits = 16, 180 .max_no_of_hqd = 24, 181 .doorbell_size = 4, 182 .ih_ring_entry_size = 4 * sizeof(uint32_t), 183 .event_interrupt_class = &event_interrupt_class_cik, 184 .num_of_watch_points = 4, 185 .mqd_size_aligned = MQD_SIZE_ALIGNED, 186 .supports_cwsr = true, 187 .needs_iommu_device = false, 188 .needs_pci_atomics = true, 189 .num_sdma_queues_per_engine = 2, 190 }; 191 192 static const struct kfd_device_info polaris10_vf_device_info = { 193 .asic_name = "polaris10", 194 .gfx_target_version = 80003, 195 .max_pasid_bits = 16, 196 .max_no_of_hqd = 24, 197 .doorbell_size = 4, 198 .ih_ring_entry_size = 4 * sizeof(uint32_t), 199 .event_interrupt_class = &event_interrupt_class_cik, 200 .num_of_watch_points = 4, 201 .mqd_size_aligned = MQD_SIZE_ALIGNED, 202 .supports_cwsr = true, 203 .needs_iommu_device = false, 204 .needs_pci_atomics = false, 205 .num_sdma_queues_per_engine = 2, 206 }; 207 208 static const struct kfd_device_info polaris11_device_info = { 209 .asic_name = "polaris11", 210 .gfx_target_version = 80003, 211 .max_pasid_bits = 16, 212 .max_no_of_hqd = 24, 213 .doorbell_size = 4, 214 .ih_ring_entry_size = 4 * sizeof(uint32_t), 215 .event_interrupt_class = &event_interrupt_class_cik, 216 .num_of_watch_points = 4, 217 .mqd_size_aligned = MQD_SIZE_ALIGNED, 218 .supports_cwsr = true, 219 .needs_iommu_device = false, 220 .needs_pci_atomics = true, 221 .num_sdma_queues_per_engine = 2, 222 }; 223 224 static const struct kfd_device_info polaris12_device_info = { 225 .asic_name = "polaris12", 226 .gfx_target_version = 80003, 227 .max_pasid_bits = 16, 228 .max_no_of_hqd = 24, 229 .doorbell_size = 4, 230 .ih_ring_entry_size = 4 * sizeof(uint32_t), 231 .event_interrupt_class = &event_interrupt_class_cik, 232 .num_of_watch_points = 4, 233 .mqd_size_aligned = MQD_SIZE_ALIGNED, 234 .supports_cwsr = true, 235 .needs_iommu_device = false, 236 .needs_pci_atomics = true, 237 .num_sdma_queues_per_engine = 2, 238 }; 239 240 static const struct kfd_device_info vegam_device_info = { 241 .asic_name = "vegam", 242 .gfx_target_version = 80003, 243 .max_pasid_bits = 16, 244 .max_no_of_hqd = 24, 245 .doorbell_size = 4, 246 .ih_ring_entry_size = 4 * sizeof(uint32_t), 247 .event_interrupt_class = &event_interrupt_class_cik, 248 .num_of_watch_points = 4, 249 .mqd_size_aligned = MQD_SIZE_ALIGNED, 250 .supports_cwsr = true, 251 .needs_iommu_device = false, 252 .needs_pci_atomics = true, 253 .num_sdma_queues_per_engine = 2, 254 }; 255 256 static const struct kfd_device_info vega10_device_info = { 257 .asic_name = "vega10", 258 .gfx_target_version = 90000, 259 .max_pasid_bits = 16, 260 .max_no_of_hqd = 24, 261 .doorbell_size = 8, 262 .ih_ring_entry_size = 8 * sizeof(uint32_t), 263 .event_interrupt_class = &event_interrupt_class_v9, 264 .num_of_watch_points = 4, 265 .mqd_size_aligned = MQD_SIZE_ALIGNED, 266 .supports_cwsr = true, 267 .needs_iommu_device = false, 268 .needs_pci_atomics = false, 269 .num_sdma_queues_per_engine = 2, 270 }; 271 272 static const struct kfd_device_info vega10_vf_device_info = { 273 .asic_name = "vega10", 274 .gfx_target_version = 90000, 275 .max_pasid_bits = 16, 276 .max_no_of_hqd = 24, 277 .doorbell_size = 8, 278 .ih_ring_entry_size = 8 * sizeof(uint32_t), 279 .event_interrupt_class = &event_interrupt_class_v9, 280 .num_of_watch_points = 4, 281 .mqd_size_aligned = MQD_SIZE_ALIGNED, 282 .supports_cwsr = true, 283 .needs_iommu_device = false, 284 .needs_pci_atomics = false, 285 .num_sdma_queues_per_engine = 2, 286 }; 287 288 static const struct kfd_device_info vega12_device_info = { 289 .asic_name = "vega12", 290 .gfx_target_version = 90004, 291 .max_pasid_bits = 16, 292 .max_no_of_hqd = 24, 293 .doorbell_size = 8, 294 .ih_ring_entry_size = 8 * sizeof(uint32_t), 295 .event_interrupt_class = &event_interrupt_class_v9, 296 .num_of_watch_points = 4, 297 .mqd_size_aligned = MQD_SIZE_ALIGNED, 298 .supports_cwsr = true, 299 .needs_iommu_device = false, 300 .needs_pci_atomics = false, 301 .num_sdma_queues_per_engine = 2, 302 }; 303 304 static const struct kfd_device_info vega20_device_info = { 305 .asic_name = "vega20", 306 .gfx_target_version = 90006, 307 .max_pasid_bits = 16, 308 .max_no_of_hqd = 24, 309 .doorbell_size = 8, 310 .ih_ring_entry_size = 8 * sizeof(uint32_t), 311 .event_interrupt_class = &event_interrupt_class_v9, 312 .num_of_watch_points = 4, 313 .mqd_size_aligned = MQD_SIZE_ALIGNED, 314 .supports_cwsr = true, 315 .needs_iommu_device = false, 316 .needs_pci_atomics = false, 317 .num_sdma_queues_per_engine = 8, 318 }; 319 320 static const struct kfd_device_info arcturus_device_info = { 321 .asic_name = "arcturus", 322 .gfx_target_version = 90008, 323 .max_pasid_bits = 16, 324 .max_no_of_hqd = 24, 325 .doorbell_size = 8, 326 .ih_ring_entry_size = 8 * sizeof(uint32_t), 327 .event_interrupt_class = &event_interrupt_class_v9, 328 .num_of_watch_points = 4, 329 .mqd_size_aligned = MQD_SIZE_ALIGNED, 330 .supports_cwsr = true, 331 .needs_iommu_device = false, 332 .needs_pci_atomics = false, 333 .num_sdma_queues_per_engine = 8, 334 }; 335 336 static const struct kfd_device_info aldebaran_device_info = { 337 .asic_name = "aldebaran", 338 .gfx_target_version = 90010, 339 .max_pasid_bits = 16, 340 .max_no_of_hqd = 24, 341 .doorbell_size = 8, 342 .ih_ring_entry_size = 8 * sizeof(uint32_t), 343 .event_interrupt_class = &event_interrupt_class_v9, 344 .num_of_watch_points = 4, 345 .mqd_size_aligned = MQD_SIZE_ALIGNED, 346 .supports_cwsr = true, 347 .needs_iommu_device = false, 348 .needs_pci_atomics = false, 349 .num_sdma_queues_per_engine = 8, 350 }; 351 352 static const struct kfd_device_info renoir_device_info = { 353 .asic_name = "renoir", 354 .gfx_target_version = 90012, 355 .max_pasid_bits = 16, 356 .max_no_of_hqd = 24, 357 .doorbell_size = 8, 358 .ih_ring_entry_size = 8 * sizeof(uint32_t), 359 .event_interrupt_class = &event_interrupt_class_v9, 360 .num_of_watch_points = 4, 361 .mqd_size_aligned = MQD_SIZE_ALIGNED, 362 .supports_cwsr = true, 363 .needs_iommu_device = false, 364 .needs_pci_atomics = false, 365 .num_sdma_queues_per_engine = 2, 366 }; 367 368 static const struct kfd_device_info navi10_device_info = { 369 .asic_name = "navi10", 370 .gfx_target_version = 100100, 371 .max_pasid_bits = 16, 372 .max_no_of_hqd = 24, 373 .doorbell_size = 8, 374 .ih_ring_entry_size = 8 * sizeof(uint32_t), 375 .event_interrupt_class = &event_interrupt_class_v9, 376 .num_of_watch_points = 4, 377 .mqd_size_aligned = MQD_SIZE_ALIGNED, 378 .needs_iommu_device = false, 379 .supports_cwsr = true, 380 .needs_pci_atomics = true, 381 .no_atomic_fw_version = 145, 382 .num_sdma_queues_per_engine = 8, 383 }; 384 385 static const struct kfd_device_info navi12_device_info = { 386 .asic_name = "navi12", 387 .gfx_target_version = 100101, 388 .max_pasid_bits = 16, 389 .max_no_of_hqd = 24, 390 .doorbell_size = 8, 391 .ih_ring_entry_size = 8 * sizeof(uint32_t), 392 .event_interrupt_class = &event_interrupt_class_v9, 393 .num_of_watch_points = 4, 394 .mqd_size_aligned = MQD_SIZE_ALIGNED, 395 .needs_iommu_device = false, 396 .supports_cwsr = true, 397 .needs_pci_atomics = true, 398 .no_atomic_fw_version = 145, 399 .num_sdma_queues_per_engine = 8, 400 }; 401 402 static const struct kfd_device_info navi14_device_info = { 403 .asic_name = "navi14", 404 .gfx_target_version = 100102, 405 .max_pasid_bits = 16, 406 .max_no_of_hqd = 24, 407 .doorbell_size = 8, 408 .ih_ring_entry_size = 8 * sizeof(uint32_t), 409 .event_interrupt_class = &event_interrupt_class_v9, 410 .num_of_watch_points = 4, 411 .mqd_size_aligned = MQD_SIZE_ALIGNED, 412 .needs_iommu_device = false, 413 .supports_cwsr = true, 414 .needs_pci_atomics = true, 415 .no_atomic_fw_version = 145, 416 .num_sdma_queues_per_engine = 8, 417 }; 418 419 static const struct kfd_device_info sienna_cichlid_device_info = { 420 .asic_name = "sienna_cichlid", 421 .gfx_target_version = 100300, 422 .max_pasid_bits = 16, 423 .max_no_of_hqd = 24, 424 .doorbell_size = 8, 425 .ih_ring_entry_size = 8 * sizeof(uint32_t), 426 .event_interrupt_class = &event_interrupt_class_v9, 427 .num_of_watch_points = 4, 428 .mqd_size_aligned = MQD_SIZE_ALIGNED, 429 .needs_iommu_device = false, 430 .supports_cwsr = true, 431 .needs_pci_atomics = true, 432 .no_atomic_fw_version = 92, 433 .num_sdma_queues_per_engine = 8, 434 }; 435 436 static const struct kfd_device_info navy_flounder_device_info = { 437 .asic_name = "navy_flounder", 438 .gfx_target_version = 100301, 439 .max_pasid_bits = 16, 440 .max_no_of_hqd = 24, 441 .doorbell_size = 8, 442 .ih_ring_entry_size = 8 * sizeof(uint32_t), 443 .event_interrupt_class = &event_interrupt_class_v9, 444 .num_of_watch_points = 4, 445 .mqd_size_aligned = MQD_SIZE_ALIGNED, 446 .needs_iommu_device = false, 447 .supports_cwsr = true, 448 .needs_pci_atomics = true, 449 .no_atomic_fw_version = 92, 450 .num_sdma_queues_per_engine = 8, 451 }; 452 453 static const struct kfd_device_info vangogh_device_info = { 454 .asic_name = "vangogh", 455 .gfx_target_version = 100303, 456 .max_pasid_bits = 16, 457 .max_no_of_hqd = 24, 458 .doorbell_size = 8, 459 .ih_ring_entry_size = 8 * sizeof(uint32_t), 460 .event_interrupt_class = &event_interrupt_class_v9, 461 .num_of_watch_points = 4, 462 .mqd_size_aligned = MQD_SIZE_ALIGNED, 463 .needs_iommu_device = false, 464 .supports_cwsr = true, 465 .needs_pci_atomics = true, 466 .no_atomic_fw_version = 92, 467 .num_sdma_queues_per_engine = 2, 468 }; 469 470 static const struct kfd_device_info dimgrey_cavefish_device_info = { 471 .asic_name = "dimgrey_cavefish", 472 .gfx_target_version = 100302, 473 .max_pasid_bits = 16, 474 .max_no_of_hqd = 24, 475 .doorbell_size = 8, 476 .ih_ring_entry_size = 8 * sizeof(uint32_t), 477 .event_interrupt_class = &event_interrupt_class_v9, 478 .num_of_watch_points = 4, 479 .mqd_size_aligned = MQD_SIZE_ALIGNED, 480 .needs_iommu_device = false, 481 .supports_cwsr = true, 482 .needs_pci_atomics = true, 483 .no_atomic_fw_version = 92, 484 .num_sdma_queues_per_engine = 8, 485 }; 486 487 static const struct kfd_device_info beige_goby_device_info = { 488 .asic_name = "beige_goby", 489 .gfx_target_version = 100304, 490 .max_pasid_bits = 16, 491 .max_no_of_hqd = 24, 492 .doorbell_size = 8, 493 .ih_ring_entry_size = 8 * sizeof(uint32_t), 494 .event_interrupt_class = &event_interrupt_class_v9, 495 .num_of_watch_points = 4, 496 .mqd_size_aligned = MQD_SIZE_ALIGNED, 497 .needs_iommu_device = false, 498 .supports_cwsr = true, 499 .needs_pci_atomics = true, 500 .no_atomic_fw_version = 92, 501 .num_sdma_queues_per_engine = 8, 502 }; 503 504 static const struct kfd_device_info yellow_carp_device_info = { 505 .asic_name = "yellow_carp", 506 .gfx_target_version = 100305, 507 .max_pasid_bits = 16, 508 .max_no_of_hqd = 24, 509 .doorbell_size = 8, 510 .ih_ring_entry_size = 8 * sizeof(uint32_t), 511 .event_interrupt_class = &event_interrupt_class_v9, 512 .num_of_watch_points = 4, 513 .mqd_size_aligned = MQD_SIZE_ALIGNED, 514 .needs_iommu_device = false, 515 .supports_cwsr = true, 516 .needs_pci_atomics = true, 517 .no_atomic_fw_version = 92, 518 .num_sdma_queues_per_engine = 2, 519 }; 520 521 static const struct kfd_device_info cyan_skillfish_device_info = { 522 .asic_name = "cyan_skillfish", 523 .gfx_target_version = 100103, 524 .max_pasid_bits = 16, 525 .max_no_of_hqd = 24, 526 .doorbell_size = 8, 527 .ih_ring_entry_size = 8 * sizeof(uint32_t), 528 .event_interrupt_class = &event_interrupt_class_v9, 529 .num_of_watch_points = 4, 530 .mqd_size_aligned = MQD_SIZE_ALIGNED, 531 .needs_iommu_device = false, 532 .supports_cwsr = true, 533 .needs_pci_atomics = true, 534 .num_sdma_queues_per_engine = 8, 535 }; 536 537 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 538 unsigned int chunk_size); 539 static void kfd_gtt_sa_fini(struct kfd_dev *kfd); 540 541 static int kfd_resume(struct kfd_dev *kfd); 542 543 struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf) 544 { 545 struct kfd_dev *kfd; 546 const struct kfd_device_info *device_info; 547 const struct kfd2kgd_calls *f2g; 548 struct pci_dev *pdev = adev->pdev; 549 550 switch (adev->asic_type) { 551 #ifdef KFD_SUPPORT_IOMMU_V2 552 #ifdef CONFIG_DRM_AMDGPU_CIK 553 case CHIP_KAVERI: 554 if (vf) 555 device_info = NULL; 556 else 557 device_info = &kaveri_device_info; 558 f2g = &gfx_v7_kfd2kgd; 559 break; 560 #endif 561 case CHIP_CARRIZO: 562 if (vf) 563 device_info = NULL; 564 else 565 device_info = &carrizo_device_info; 566 f2g = &gfx_v8_kfd2kgd; 567 break; 568 #endif 569 #ifdef CONFIG_DRM_AMDGPU_CIK 570 case CHIP_HAWAII: 571 if (vf) 572 device_info = NULL; 573 else 574 device_info = &hawaii_device_info; 575 f2g = &gfx_v7_kfd2kgd; 576 break; 577 #endif 578 case CHIP_TONGA: 579 if (vf) 580 device_info = NULL; 581 else 582 device_info = &tonga_device_info; 583 f2g = &gfx_v8_kfd2kgd; 584 break; 585 case CHIP_FIJI: 586 if (vf) 587 device_info = &fiji_vf_device_info; 588 else 589 device_info = &fiji_device_info; 590 f2g = &gfx_v8_kfd2kgd; 591 break; 592 case CHIP_POLARIS10: 593 if (vf) 594 device_info = &polaris10_vf_device_info; 595 else 596 device_info = &polaris10_device_info; 597 f2g = &gfx_v8_kfd2kgd; 598 break; 599 case CHIP_POLARIS11: 600 if (vf) 601 device_info = NULL; 602 else 603 device_info = &polaris11_device_info; 604 f2g = &gfx_v8_kfd2kgd; 605 break; 606 case CHIP_POLARIS12: 607 if (vf) 608 device_info = NULL; 609 else 610 device_info = &polaris12_device_info; 611 f2g = &gfx_v8_kfd2kgd; 612 break; 613 case CHIP_VEGAM: 614 if (vf) 615 device_info = NULL; 616 else 617 device_info = &vegam_device_info; 618 f2g = &gfx_v8_kfd2kgd; 619 break; 620 default: 621 switch (adev->ip_versions[GC_HWIP][0]) { 622 case IP_VERSION(9, 0, 1): 623 if (vf) 624 device_info = &vega10_vf_device_info; 625 else 626 device_info = &vega10_device_info; 627 f2g = &gfx_v9_kfd2kgd; 628 break; 629 #ifdef KFD_SUPPORT_IOMMU_V2 630 case IP_VERSION(9, 1, 0): 631 case IP_VERSION(9, 2, 2): 632 if (vf) 633 device_info = NULL; 634 else 635 device_info = &raven_device_info; 636 f2g = &gfx_v9_kfd2kgd; 637 break; 638 #endif 639 case IP_VERSION(9, 2, 1): 640 if (vf) 641 device_info = NULL; 642 else 643 device_info = &vega12_device_info; 644 f2g = &gfx_v9_kfd2kgd; 645 break; 646 case IP_VERSION(9, 3, 0): 647 if (vf) 648 device_info = NULL; 649 else 650 device_info = &renoir_device_info; 651 f2g = &gfx_v9_kfd2kgd; 652 break; 653 case IP_VERSION(9, 4, 0): 654 if (vf) 655 device_info = NULL; 656 else 657 device_info = &vega20_device_info; 658 f2g = &gfx_v9_kfd2kgd; 659 break; 660 case IP_VERSION(9, 4, 1): 661 device_info = &arcturus_device_info; 662 f2g = &arcturus_kfd2kgd; 663 break; 664 case IP_VERSION(9, 4, 2): 665 device_info = &aldebaran_device_info; 666 f2g = &aldebaran_kfd2kgd; 667 break; 668 case IP_VERSION(10, 1, 10): 669 if (vf) 670 device_info = NULL; 671 else 672 device_info = &navi10_device_info; 673 f2g = &gfx_v10_kfd2kgd; 674 break; 675 case IP_VERSION(10, 1, 2): 676 device_info = &navi12_device_info; 677 f2g = &gfx_v10_kfd2kgd; 678 break; 679 case IP_VERSION(10, 1, 1): 680 if (vf) 681 device_info = NULL; 682 else 683 device_info = &navi14_device_info; 684 f2g = &gfx_v10_kfd2kgd; 685 break; 686 case IP_VERSION(10, 1, 3): 687 if (vf) 688 device_info = NULL; 689 else 690 device_info = &cyan_skillfish_device_info; 691 f2g = &gfx_v10_kfd2kgd; 692 break; 693 case IP_VERSION(10, 3, 0): 694 device_info = &sienna_cichlid_device_info; 695 f2g = &gfx_v10_3_kfd2kgd; 696 break; 697 case IP_VERSION(10, 3, 2): 698 device_info = &navy_flounder_device_info; 699 f2g = &gfx_v10_3_kfd2kgd; 700 break; 701 case IP_VERSION(10, 3, 1): 702 if (vf) 703 device_info = NULL; 704 else 705 device_info = &vangogh_device_info; 706 f2g = &gfx_v10_3_kfd2kgd; 707 break; 708 case IP_VERSION(10, 3, 4): 709 device_info = &dimgrey_cavefish_device_info; 710 f2g = &gfx_v10_3_kfd2kgd; 711 break; 712 case IP_VERSION(10, 3, 5): 713 device_info = &beige_goby_device_info; 714 f2g = &gfx_v10_3_kfd2kgd; 715 break; 716 case IP_VERSION(10, 3, 3): 717 if (vf) 718 device_info = NULL; 719 else 720 device_info = &yellow_carp_device_info; 721 f2g = &gfx_v10_3_kfd2kgd; 722 break; 723 default: 724 return NULL; 725 } 726 break; 727 } 728 729 if (!device_info || !f2g) { 730 if (adev->ip_versions[GC_HWIP][0]) 731 dev_err(kfd_device, "GC IP %06x %s not supported in kfd\n", 732 adev->ip_versions[GC_HWIP][0], vf ? "VF" : ""); 733 else 734 dev_err(kfd_device, "%s %s not supported in kfd\n", 735 amdgpu_asic_name[adev->asic_type], vf ? "VF" : ""); 736 return NULL; 737 } 738 739 kfd = kzalloc(sizeof(*kfd), GFP_KERNEL); 740 if (!kfd) 741 return NULL; 742 743 kfd->adev = adev; 744 kfd->device_info = device_info; 745 kfd->pdev = pdev; 746 kfd->init_complete = false; 747 kfd->kfd2kgd = f2g; 748 atomic_set(&kfd->compute_profile, 0); 749 750 mutex_init(&kfd->doorbell_mutex); 751 memset(&kfd->doorbell_available_index, 0, 752 sizeof(kfd->doorbell_available_index)); 753 754 atomic_set(&kfd->sram_ecc_flag, 0); 755 756 ida_init(&kfd->doorbell_ida); 757 758 return kfd; 759 } 760 761 static void kfd_cwsr_init(struct kfd_dev *kfd) 762 { 763 if (cwsr_enable && kfd->device_info->supports_cwsr) { 764 if (KFD_GC_VERSION(kfd) < IP_VERSION(9, 0, 1)) { 765 BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE); 766 kfd->cwsr_isa = cwsr_trap_gfx8_hex; 767 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx8_hex); 768 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)) { 769 BUILD_BUG_ON(sizeof(cwsr_trap_arcturus_hex) > PAGE_SIZE); 770 kfd->cwsr_isa = cwsr_trap_arcturus_hex; 771 kfd->cwsr_isa_size = sizeof(cwsr_trap_arcturus_hex); 772 } else if (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)) { 773 BUILD_BUG_ON(sizeof(cwsr_trap_aldebaran_hex) > PAGE_SIZE); 774 kfd->cwsr_isa = cwsr_trap_aldebaran_hex; 775 kfd->cwsr_isa_size = sizeof(cwsr_trap_aldebaran_hex); 776 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 1, 1)) { 777 BUILD_BUG_ON(sizeof(cwsr_trap_gfx9_hex) > PAGE_SIZE); 778 kfd->cwsr_isa = cwsr_trap_gfx9_hex; 779 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx9_hex); 780 } else if (KFD_GC_VERSION(kfd) < IP_VERSION(10, 3, 0)) { 781 BUILD_BUG_ON(sizeof(cwsr_trap_nv1x_hex) > PAGE_SIZE); 782 kfd->cwsr_isa = cwsr_trap_nv1x_hex; 783 kfd->cwsr_isa_size = sizeof(cwsr_trap_nv1x_hex); 784 } else { 785 BUILD_BUG_ON(sizeof(cwsr_trap_gfx10_hex) > PAGE_SIZE); 786 kfd->cwsr_isa = cwsr_trap_gfx10_hex; 787 kfd->cwsr_isa_size = sizeof(cwsr_trap_gfx10_hex); 788 } 789 790 kfd->cwsr_enabled = true; 791 } 792 } 793 794 static int kfd_gws_init(struct kfd_dev *kfd) 795 { 796 int ret = 0; 797 798 if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) 799 return 0; 800 801 if (hws_gws_support || (KFD_IS_SOC15(kfd) && 802 ((KFD_GC_VERSION(kfd) == IP_VERSION(9, 0, 1) 803 && kfd->mec2_fw_version >= 0x81b3) || 804 (KFD_GC_VERSION(kfd) <= IP_VERSION(9, 4, 0) 805 && kfd->mec2_fw_version >= 0x1b3) || 806 (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1) 807 && kfd->mec2_fw_version >= 0x30) || 808 (KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) 809 && kfd->mec2_fw_version >= 0x28)))) 810 ret = amdgpu_amdkfd_alloc_gws(kfd->adev, 811 kfd->adev->gds.gws_size, &kfd->gws); 812 813 return ret; 814 } 815 816 static void kfd_smi_init(struct kfd_dev *dev) { 817 INIT_LIST_HEAD(&dev->smi_clients); 818 spin_lock_init(&dev->smi_lock); 819 } 820 821 bool kgd2kfd_device_init(struct kfd_dev *kfd, 822 struct drm_device *ddev, 823 const struct kgd2kfd_shared_resources *gpu_resources) 824 { 825 unsigned int size, map_process_packet_size; 826 827 kfd->ddev = ddev; 828 kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 829 KGD_ENGINE_MEC1); 830 kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 831 KGD_ENGINE_MEC2); 832 kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, 833 KGD_ENGINE_SDMA1); 834 kfd->shared_resources = *gpu_resources; 835 836 kfd->vm_info.first_vmid_kfd = ffs(gpu_resources->compute_vmid_bitmap)-1; 837 kfd->vm_info.last_vmid_kfd = fls(gpu_resources->compute_vmid_bitmap)-1; 838 kfd->vm_info.vmid_num_kfd = kfd->vm_info.last_vmid_kfd 839 - kfd->vm_info.first_vmid_kfd + 1; 840 841 /* Allow BIF to recode atomics to PCIe 3.0 AtomicOps. 842 * 32 and 64-bit requests are possible and must be 843 * supported. 844 */ 845 kfd->pci_atomic_requested = amdgpu_amdkfd_have_atomics_support(kfd->adev); 846 if (!kfd->pci_atomic_requested && 847 kfd->device_info->needs_pci_atomics && 848 (!kfd->device_info->no_atomic_fw_version || 849 kfd->mec_fw_version < kfd->device_info->no_atomic_fw_version)) { 850 dev_info(kfd_device, 851 "skipped device %x:%x, PCI rejects atomics %d<%d\n", 852 kfd->pdev->vendor, kfd->pdev->device, 853 kfd->mec_fw_version, 854 kfd->device_info->no_atomic_fw_version); 855 return false; 856 } 857 858 /* Verify module parameters regarding mapped process number*/ 859 if ((hws_max_conc_proc < 0) 860 || (hws_max_conc_proc > kfd->vm_info.vmid_num_kfd)) { 861 dev_err(kfd_device, 862 "hws_max_conc_proc %d must be between 0 and %d, use %d instead\n", 863 hws_max_conc_proc, kfd->vm_info.vmid_num_kfd, 864 kfd->vm_info.vmid_num_kfd); 865 kfd->max_proc_per_quantum = kfd->vm_info.vmid_num_kfd; 866 } else 867 kfd->max_proc_per_quantum = hws_max_conc_proc; 868 869 /* calculate max size of mqds needed for queues */ 870 size = max_num_of_queues_per_device * 871 kfd->device_info->mqd_size_aligned; 872 873 /* 874 * calculate max size of runlist packet. 875 * There can be only 2 packets at once 876 */ 877 map_process_packet_size = KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2) ? 878 sizeof(struct pm4_mes_map_process_aldebaran) : 879 sizeof(struct pm4_mes_map_process); 880 size += (KFD_MAX_NUM_OF_PROCESSES * map_process_packet_size + 881 max_num_of_queues_per_device * sizeof(struct pm4_mes_map_queues) 882 + sizeof(struct pm4_mes_runlist)) * 2; 883 884 /* Add size of HIQ & DIQ */ 885 size += KFD_KERNEL_QUEUE_SIZE * 2; 886 887 /* add another 512KB for all other allocations on gart (HPD, fences) */ 888 size += 512 * 1024; 889 890 if (amdgpu_amdkfd_alloc_gtt_mem( 891 kfd->adev, size, &kfd->gtt_mem, 892 &kfd->gtt_start_gpu_addr, &kfd->gtt_start_cpu_ptr, 893 false)) { 894 dev_err(kfd_device, "Could not allocate %d bytes\n", size); 895 goto alloc_gtt_mem_failure; 896 } 897 898 dev_info(kfd_device, "Allocated %d bytes on gart\n", size); 899 900 /* Initialize GTT sa with 512 byte chunk size */ 901 if (kfd_gtt_sa_init(kfd, size, 512) != 0) { 902 dev_err(kfd_device, "Error initializing gtt sub-allocator\n"); 903 goto kfd_gtt_sa_init_error; 904 } 905 906 if (kfd_doorbell_init(kfd)) { 907 dev_err(kfd_device, 908 "Error initializing doorbell aperture\n"); 909 goto kfd_doorbell_error; 910 } 911 912 kfd->hive_id = kfd->adev->gmc.xgmi.hive_id; 913 914 kfd->noretry = kfd->adev->gmc.noretry; 915 916 if (kfd_interrupt_init(kfd)) { 917 dev_err(kfd_device, "Error initializing interrupts\n"); 918 goto kfd_interrupt_error; 919 } 920 921 kfd->dqm = device_queue_manager_init(kfd); 922 if (!kfd->dqm) { 923 dev_err(kfd_device, "Error initializing queue manager\n"); 924 goto device_queue_manager_error; 925 } 926 927 /* If supported on this device, allocate global GWS that is shared 928 * by all KFD processes 929 */ 930 if (kfd_gws_init(kfd)) { 931 dev_err(kfd_device, "Could not allocate %d gws\n", 932 kfd->adev->gds.gws_size); 933 goto gws_error; 934 } 935 936 /* If CRAT is broken, won't set iommu enabled */ 937 kfd_double_confirm_iommu_support(kfd); 938 939 if (kfd_iommu_device_init(kfd)) { 940 kfd->use_iommu_v2 = false; 941 dev_err(kfd_device, "Error initializing iommuv2\n"); 942 goto device_iommu_error; 943 } 944 945 kfd_cwsr_init(kfd); 946 947 svm_migrate_init(kfd->adev); 948 949 if(kgd2kfd_resume_iommu(kfd)) 950 goto device_iommu_error; 951 952 if (kfd_resume(kfd)) 953 goto kfd_resume_error; 954 955 kfd->dbgmgr = NULL; 956 957 if (kfd_topology_add_device(kfd)) { 958 dev_err(kfd_device, "Error adding device to topology\n"); 959 goto kfd_topology_add_device_error; 960 } 961 962 kfd_smi_init(kfd); 963 964 kfd->init_complete = true; 965 dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor, 966 kfd->pdev->device); 967 968 pr_debug("Starting kfd with the following scheduling policy %d\n", 969 kfd->dqm->sched_policy); 970 971 goto out; 972 973 kfd_topology_add_device_error: 974 kfd_resume_error: 975 device_iommu_error: 976 gws_error: 977 device_queue_manager_uninit(kfd->dqm); 978 device_queue_manager_error: 979 kfd_interrupt_exit(kfd); 980 kfd_interrupt_error: 981 kfd_doorbell_fini(kfd); 982 kfd_doorbell_error: 983 kfd_gtt_sa_fini(kfd); 984 kfd_gtt_sa_init_error: 985 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 986 alloc_gtt_mem_failure: 987 if (kfd->gws) 988 amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); 989 dev_err(kfd_device, 990 "device %x:%x NOT added due to errors\n", 991 kfd->pdev->vendor, kfd->pdev->device); 992 out: 993 return kfd->init_complete; 994 } 995 996 void kgd2kfd_device_exit(struct kfd_dev *kfd) 997 { 998 if (kfd->init_complete) { 999 device_queue_manager_uninit(kfd->dqm); 1000 kfd_interrupt_exit(kfd); 1001 kfd_topology_remove_device(kfd); 1002 kfd_doorbell_fini(kfd); 1003 ida_destroy(&kfd->doorbell_ida); 1004 kfd_gtt_sa_fini(kfd); 1005 amdgpu_amdkfd_free_gtt_mem(kfd->adev, kfd->gtt_mem); 1006 if (kfd->gws) 1007 amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws); 1008 } 1009 1010 kfree(kfd); 1011 } 1012 1013 int kgd2kfd_pre_reset(struct kfd_dev *kfd) 1014 { 1015 if (!kfd->init_complete) 1016 return 0; 1017 1018 kfd_smi_event_update_gpu_reset(kfd, false); 1019 1020 kfd->dqm->ops.pre_reset(kfd->dqm); 1021 1022 kgd2kfd_suspend(kfd, false); 1023 1024 kfd_signal_reset_event(kfd); 1025 return 0; 1026 } 1027 1028 /* 1029 * Fix me. KFD won't be able to resume existing process for now. 1030 * We will keep all existing process in a evicted state and 1031 * wait the process to be terminated. 1032 */ 1033 1034 int kgd2kfd_post_reset(struct kfd_dev *kfd) 1035 { 1036 int ret; 1037 1038 if (!kfd->init_complete) 1039 return 0; 1040 1041 ret = kfd_resume(kfd); 1042 if (ret) 1043 return ret; 1044 atomic_dec(&kfd_locked); 1045 1046 atomic_set(&kfd->sram_ecc_flag, 0); 1047 1048 kfd_smi_event_update_gpu_reset(kfd, true); 1049 1050 return 0; 1051 } 1052 1053 bool kfd_is_locked(void) 1054 { 1055 return (atomic_read(&kfd_locked) > 0); 1056 } 1057 1058 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm) 1059 { 1060 if (!kfd->init_complete) 1061 return; 1062 1063 /* for runtime suspend, skip locking kfd */ 1064 if (!run_pm) { 1065 /* For first KFD device suspend all the KFD processes */ 1066 if (atomic_inc_return(&kfd_locked) == 1) 1067 kfd_suspend_all_processes(); 1068 } 1069 1070 kfd->dqm->ops.stop(kfd->dqm); 1071 kfd_iommu_suspend(kfd); 1072 } 1073 1074 int kgd2kfd_resume(struct kfd_dev *kfd, bool run_pm) 1075 { 1076 int ret, count; 1077 1078 if (!kfd->init_complete) 1079 return 0; 1080 1081 ret = kfd_resume(kfd); 1082 if (ret) 1083 return ret; 1084 1085 /* for runtime resume, skip unlocking kfd */ 1086 if (!run_pm) { 1087 count = atomic_dec_return(&kfd_locked); 1088 WARN_ONCE(count < 0, "KFD suspend / resume ref. error"); 1089 if (count == 0) 1090 ret = kfd_resume_all_processes(); 1091 } 1092 1093 return ret; 1094 } 1095 1096 int kgd2kfd_resume_iommu(struct kfd_dev *kfd) 1097 { 1098 int err = 0; 1099 1100 err = kfd_iommu_resume(kfd); 1101 if (err) 1102 dev_err(kfd_device, 1103 "Failed to resume IOMMU for device %x:%x\n", 1104 kfd->pdev->vendor, kfd->pdev->device); 1105 return err; 1106 } 1107 1108 static int kfd_resume(struct kfd_dev *kfd) 1109 { 1110 int err = 0; 1111 1112 err = kfd->dqm->ops.start(kfd->dqm); 1113 if (err) 1114 dev_err(kfd_device, 1115 "Error starting queue manager for device %x:%x\n", 1116 kfd->pdev->vendor, kfd->pdev->device); 1117 1118 return err; 1119 } 1120 1121 static inline void kfd_queue_work(struct workqueue_struct *wq, 1122 struct work_struct *work) 1123 { 1124 int cpu, new_cpu; 1125 1126 cpu = new_cpu = smp_processor_id(); 1127 do { 1128 new_cpu = cpumask_next(new_cpu, cpu_online_mask) % nr_cpu_ids; 1129 if (cpu_to_node(new_cpu) == numa_node_id()) 1130 break; 1131 } while (cpu != new_cpu); 1132 1133 queue_work_on(new_cpu, wq, work); 1134 } 1135 1136 /* This is called directly from KGD at ISR. */ 1137 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry) 1138 { 1139 uint32_t patched_ihre[KFD_MAX_RING_ENTRY_SIZE]; 1140 bool is_patched = false; 1141 unsigned long flags; 1142 1143 if (!kfd->init_complete) 1144 return; 1145 1146 if (kfd->device_info->ih_ring_entry_size > sizeof(patched_ihre)) { 1147 dev_err_once(kfd_device, "Ring entry too small\n"); 1148 return; 1149 } 1150 1151 spin_lock_irqsave(&kfd->interrupt_lock, flags); 1152 1153 if (kfd->interrupts_active 1154 && interrupt_is_wanted(kfd, ih_ring_entry, 1155 patched_ihre, &is_patched) 1156 && enqueue_ih_ring_entry(kfd, 1157 is_patched ? patched_ihre : ih_ring_entry)) 1158 kfd_queue_work(kfd->ih_wq, &kfd->interrupt_work); 1159 1160 spin_unlock_irqrestore(&kfd->interrupt_lock, flags); 1161 } 1162 1163 int kgd2kfd_quiesce_mm(struct mm_struct *mm) 1164 { 1165 struct kfd_process *p; 1166 int r; 1167 1168 /* Because we are called from arbitrary context (workqueue) as opposed 1169 * to process context, kfd_process could attempt to exit while we are 1170 * running so the lookup function increments the process ref count. 1171 */ 1172 p = kfd_lookup_process_by_mm(mm); 1173 if (!p) 1174 return -ESRCH; 1175 1176 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 1177 r = kfd_process_evict_queues(p); 1178 1179 kfd_unref_process(p); 1180 return r; 1181 } 1182 1183 int kgd2kfd_resume_mm(struct mm_struct *mm) 1184 { 1185 struct kfd_process *p; 1186 int r; 1187 1188 /* Because we are called from arbitrary context (workqueue) as opposed 1189 * to process context, kfd_process could attempt to exit while we are 1190 * running so the lookup function increments the process ref count. 1191 */ 1192 p = kfd_lookup_process_by_mm(mm); 1193 if (!p) 1194 return -ESRCH; 1195 1196 r = kfd_process_restore_queues(p); 1197 1198 kfd_unref_process(p); 1199 return r; 1200 } 1201 1202 /** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will 1203 * prepare for safe eviction of KFD BOs that belong to the specified 1204 * process. 1205 * 1206 * @mm: mm_struct that identifies the specified KFD process 1207 * @fence: eviction fence attached to KFD process BOs 1208 * 1209 */ 1210 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 1211 struct dma_fence *fence) 1212 { 1213 struct kfd_process *p; 1214 unsigned long active_time; 1215 unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS); 1216 1217 if (!fence) 1218 return -EINVAL; 1219 1220 if (dma_fence_is_signaled(fence)) 1221 return 0; 1222 1223 p = kfd_lookup_process_by_mm(mm); 1224 if (!p) 1225 return -ENODEV; 1226 1227 if (fence->seqno == p->last_eviction_seqno) 1228 goto out; 1229 1230 p->last_eviction_seqno = fence->seqno; 1231 1232 /* Avoid KFD process starvation. Wait for at least 1233 * PROCESS_ACTIVE_TIME_MS before evicting the process again 1234 */ 1235 active_time = get_jiffies_64() - p->last_restore_timestamp; 1236 if (delay_jiffies > active_time) 1237 delay_jiffies -= active_time; 1238 else 1239 delay_jiffies = 0; 1240 1241 /* During process initialization eviction_work.dwork is initialized 1242 * to kfd_evict_bo_worker 1243 */ 1244 WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies", 1245 p->lead_thread->pid, delay_jiffies); 1246 schedule_delayed_work(&p->eviction_work, delay_jiffies); 1247 out: 1248 kfd_unref_process(p); 1249 return 0; 1250 } 1251 1252 static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size, 1253 unsigned int chunk_size) 1254 { 1255 unsigned int num_of_longs; 1256 1257 if (WARN_ON(buf_size < chunk_size)) 1258 return -EINVAL; 1259 if (WARN_ON(buf_size == 0)) 1260 return -EINVAL; 1261 if (WARN_ON(chunk_size == 0)) 1262 return -EINVAL; 1263 1264 kfd->gtt_sa_chunk_size = chunk_size; 1265 kfd->gtt_sa_num_of_chunks = buf_size / chunk_size; 1266 1267 num_of_longs = (kfd->gtt_sa_num_of_chunks + BITS_PER_LONG - 1) / 1268 BITS_PER_LONG; 1269 1270 kfd->gtt_sa_bitmap = kcalloc(num_of_longs, sizeof(long), GFP_KERNEL); 1271 1272 if (!kfd->gtt_sa_bitmap) 1273 return -ENOMEM; 1274 1275 pr_debug("gtt_sa_num_of_chunks = %d, gtt_sa_bitmap = %p\n", 1276 kfd->gtt_sa_num_of_chunks, kfd->gtt_sa_bitmap); 1277 1278 mutex_init(&kfd->gtt_sa_lock); 1279 1280 return 0; 1281 1282 } 1283 1284 static void kfd_gtt_sa_fini(struct kfd_dev *kfd) 1285 { 1286 mutex_destroy(&kfd->gtt_sa_lock); 1287 kfree(kfd->gtt_sa_bitmap); 1288 } 1289 1290 static inline uint64_t kfd_gtt_sa_calc_gpu_addr(uint64_t start_addr, 1291 unsigned int bit_num, 1292 unsigned int chunk_size) 1293 { 1294 return start_addr + bit_num * chunk_size; 1295 } 1296 1297 static inline uint32_t *kfd_gtt_sa_calc_cpu_addr(void *start_addr, 1298 unsigned int bit_num, 1299 unsigned int chunk_size) 1300 { 1301 return (uint32_t *) ((uint64_t) start_addr + bit_num * chunk_size); 1302 } 1303 1304 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 1305 struct kfd_mem_obj **mem_obj) 1306 { 1307 unsigned int found, start_search, cur_size; 1308 1309 if (size == 0) 1310 return -EINVAL; 1311 1312 if (size > kfd->gtt_sa_num_of_chunks * kfd->gtt_sa_chunk_size) 1313 return -ENOMEM; 1314 1315 *mem_obj = kzalloc(sizeof(struct kfd_mem_obj), GFP_KERNEL); 1316 if (!(*mem_obj)) 1317 return -ENOMEM; 1318 1319 pr_debug("Allocated mem_obj = %p for size = %d\n", *mem_obj, size); 1320 1321 start_search = 0; 1322 1323 mutex_lock(&kfd->gtt_sa_lock); 1324 1325 kfd_gtt_restart_search: 1326 /* Find the first chunk that is free */ 1327 found = find_next_zero_bit(kfd->gtt_sa_bitmap, 1328 kfd->gtt_sa_num_of_chunks, 1329 start_search); 1330 1331 pr_debug("Found = %d\n", found); 1332 1333 /* If there wasn't any free chunk, bail out */ 1334 if (found == kfd->gtt_sa_num_of_chunks) 1335 goto kfd_gtt_no_free_chunk; 1336 1337 /* Update fields of mem_obj */ 1338 (*mem_obj)->range_start = found; 1339 (*mem_obj)->range_end = found; 1340 (*mem_obj)->gpu_addr = kfd_gtt_sa_calc_gpu_addr( 1341 kfd->gtt_start_gpu_addr, 1342 found, 1343 kfd->gtt_sa_chunk_size); 1344 (*mem_obj)->cpu_ptr = kfd_gtt_sa_calc_cpu_addr( 1345 kfd->gtt_start_cpu_ptr, 1346 found, 1347 kfd->gtt_sa_chunk_size); 1348 1349 pr_debug("gpu_addr = %p, cpu_addr = %p\n", 1350 (uint64_t *) (*mem_obj)->gpu_addr, (*mem_obj)->cpu_ptr); 1351 1352 /* If we need only one chunk, mark it as allocated and get out */ 1353 if (size <= kfd->gtt_sa_chunk_size) { 1354 pr_debug("Single bit\n"); 1355 set_bit(found, kfd->gtt_sa_bitmap); 1356 goto kfd_gtt_out; 1357 } 1358 1359 /* Otherwise, try to see if we have enough contiguous chunks */ 1360 cur_size = size - kfd->gtt_sa_chunk_size; 1361 do { 1362 (*mem_obj)->range_end = 1363 find_next_zero_bit(kfd->gtt_sa_bitmap, 1364 kfd->gtt_sa_num_of_chunks, ++found); 1365 /* 1366 * If next free chunk is not contiguous than we need to 1367 * restart our search from the last free chunk we found (which 1368 * wasn't contiguous to the previous ones 1369 */ 1370 if ((*mem_obj)->range_end != found) { 1371 start_search = found; 1372 goto kfd_gtt_restart_search; 1373 } 1374 1375 /* 1376 * If we reached end of buffer, bail out with error 1377 */ 1378 if (found == kfd->gtt_sa_num_of_chunks) 1379 goto kfd_gtt_no_free_chunk; 1380 1381 /* Check if we don't need another chunk */ 1382 if (cur_size <= kfd->gtt_sa_chunk_size) 1383 cur_size = 0; 1384 else 1385 cur_size -= kfd->gtt_sa_chunk_size; 1386 1387 } while (cur_size > 0); 1388 1389 pr_debug("range_start = %d, range_end = %d\n", 1390 (*mem_obj)->range_start, (*mem_obj)->range_end); 1391 1392 /* Mark the chunks as allocated */ 1393 for (found = (*mem_obj)->range_start; 1394 found <= (*mem_obj)->range_end; 1395 found++) 1396 set_bit(found, kfd->gtt_sa_bitmap); 1397 1398 kfd_gtt_out: 1399 mutex_unlock(&kfd->gtt_sa_lock); 1400 return 0; 1401 1402 kfd_gtt_no_free_chunk: 1403 pr_debug("Allocation failed with mem_obj = %p\n", *mem_obj); 1404 mutex_unlock(&kfd->gtt_sa_lock); 1405 kfree(*mem_obj); 1406 return -ENOMEM; 1407 } 1408 1409 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj) 1410 { 1411 unsigned int bit; 1412 1413 /* Act like kfree when trying to free a NULL object */ 1414 if (!mem_obj) 1415 return 0; 1416 1417 pr_debug("Free mem_obj = %p, range_start = %d, range_end = %d\n", 1418 mem_obj, mem_obj->range_start, mem_obj->range_end); 1419 1420 mutex_lock(&kfd->gtt_sa_lock); 1421 1422 /* Mark the chunks as free */ 1423 for (bit = mem_obj->range_start; 1424 bit <= mem_obj->range_end; 1425 bit++) 1426 clear_bit(bit, kfd->gtt_sa_bitmap); 1427 1428 mutex_unlock(&kfd->gtt_sa_lock); 1429 1430 kfree(mem_obj); 1431 return 0; 1432 } 1433 1434 void kgd2kfd_set_sram_ecc_flag(struct kfd_dev *kfd) 1435 { 1436 if (kfd) 1437 atomic_inc(&kfd->sram_ecc_flag); 1438 } 1439 1440 void kfd_inc_compute_active(struct kfd_dev *kfd) 1441 { 1442 if (atomic_inc_return(&kfd->compute_profile) == 1) 1443 amdgpu_amdkfd_set_compute_idle(kfd->adev, false); 1444 } 1445 1446 void kfd_dec_compute_active(struct kfd_dev *kfd) 1447 { 1448 int count = atomic_dec_return(&kfd->compute_profile); 1449 1450 if (count == 0) 1451 amdgpu_amdkfd_set_compute_idle(kfd->adev, true); 1452 WARN_ONCE(count < 0, "Compute profile ref. count error"); 1453 } 1454 1455 void kgd2kfd_smi_event_throttle(struct kfd_dev *kfd, uint64_t throttle_bitmask) 1456 { 1457 if (kfd && kfd->init_complete) 1458 kfd_smi_event_update_thermal_throttling(kfd, throttle_bitmask); 1459 } 1460 1461 /* kfd_get_num_sdma_engines returns the number of PCIe optimized SDMA and 1462 * kfd_get_num_xgmi_sdma_engines returns the number of XGMI SDMA. 1463 * When the device has more than two engines, we reserve two for PCIe to enable 1464 * full-duplex and the rest are used as XGMI. 1465 */ 1466 unsigned int kfd_get_num_sdma_engines(struct kfd_dev *kdev) 1467 { 1468 /* If XGMI is not supported, all SDMA engines are PCIe */ 1469 if (!kdev->adev->gmc.xgmi.supported) 1470 return kdev->adev->sdma.num_instances; 1471 1472 return min(kdev->adev->sdma.num_instances, 2); 1473 } 1474 1475 unsigned int kfd_get_num_xgmi_sdma_engines(struct kfd_dev *kdev) 1476 { 1477 /* After reserved for PCIe, the rest of engines are XGMI */ 1478 return kdev->adev->sdma.num_instances - kfd_get_num_sdma_engines(kdev); 1479 } 1480 1481 #if defined(CONFIG_DEBUG_FS) 1482 1483 /* This function will send a package to HIQ to hang the HWS 1484 * which will trigger a GPU reset and bring the HWS back to normal state 1485 */ 1486 int kfd_debugfs_hang_hws(struct kfd_dev *dev) 1487 { 1488 if (dev->dqm->sched_policy != KFD_SCHED_POLICY_HWS) { 1489 pr_err("HWS is not enabled"); 1490 return -EINVAL; 1491 } 1492 1493 return dqm_debugfs_hang_hws(dev->dqm); 1494 } 1495 1496 #endif 1497