1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 #include "amdgpu_amdkfd.h" 37 38 /* Size of the per-pipe EOP queue */ 39 #define CIK_HPD_EOP_BYTES_LOG2 11 40 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 41 42 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 43 unsigned int pasid, unsigned int vmid); 44 45 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 46 struct queue *q, 47 struct qcm_process_device *qpd); 48 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param); 52 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param); 55 56 static int map_queues_cpsch(struct device_queue_manager *dqm); 57 58 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 59 struct queue *q, 60 struct qcm_process_device *qpd); 61 62 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 63 struct queue *q); 64 65 static void kfd_process_hw_exception(struct work_struct *work); 66 67 static inline 68 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 69 { 70 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 71 return KFD_MQD_TYPE_SDMA; 72 return KFD_MQD_TYPE_CP; 73 } 74 75 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 76 { 77 int i; 78 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 79 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 80 81 /* queue is available for KFD usage if bit is 1 */ 82 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 83 if (test_bit(pipe_offset + i, 84 dqm->dev->shared_resources.queue_bitmap)) 85 return true; 86 return false; 87 } 88 89 unsigned int get_queues_num(struct device_queue_manager *dqm) 90 { 91 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 92 KGD_MAX_QUEUES); 93 } 94 95 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 96 { 97 return dqm->dev->shared_resources.num_queue_per_pipe; 98 } 99 100 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 101 { 102 return dqm->dev->shared_resources.num_pipe_per_mec; 103 } 104 105 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) 106 { 107 return dqm->dev->device_info->num_sdma_engines; 108 } 109 110 static unsigned int get_num_xgmi_sdma_engines(struct device_queue_manager *dqm) 111 { 112 return dqm->dev->device_info->num_xgmi_sdma_engines; 113 } 114 115 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 116 { 117 return dqm->dev->device_info->num_sdma_engines 118 * dqm->dev->device_info->num_sdma_queues_per_engine; 119 } 120 121 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 122 { 123 return dqm->dev->device_info->num_xgmi_sdma_engines 124 * dqm->dev->device_info->num_sdma_queues_per_engine; 125 } 126 127 void program_sh_mem_settings(struct device_queue_manager *dqm, 128 struct qcm_process_device *qpd) 129 { 130 return dqm->dev->kfd2kgd->program_sh_mem_settings( 131 dqm->dev->kgd, qpd->vmid, 132 qpd->sh_mem_config, 133 qpd->sh_mem_ape1_base, 134 qpd->sh_mem_ape1_limit, 135 qpd->sh_mem_bases); 136 } 137 138 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) 139 { 140 struct kfd_dev *dev = qpd->dqm->dev; 141 142 if (!KFD_IS_SOC15(dev->device_info->asic_family)) { 143 /* On pre-SOC15 chips we need to use the queue ID to 144 * preserve the user mode ABI. 145 */ 146 q->doorbell_id = q->properties.queue_id; 147 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 148 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 149 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 150 * doorbell assignments based on the engine and queue id. 151 * The doobell index distance between RLC (2*i) and (2*i+1) 152 * for a SDMA engine is 512. 153 */ 154 uint32_t *idx_offset = 155 dev->shared_resources.sdma_doorbell_idx; 156 157 q->doorbell_id = idx_offset[q->properties.sdma_engine_id] 158 + (q->properties.sdma_queue_id & 1) 159 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 160 + (q->properties.sdma_queue_id >> 1); 161 } else { 162 /* For CP queues on SOC15 reserve a free doorbell ID */ 163 unsigned int found; 164 165 found = find_first_zero_bit(qpd->doorbell_bitmap, 166 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 167 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 168 pr_debug("No doorbells available"); 169 return -EBUSY; 170 } 171 set_bit(found, qpd->doorbell_bitmap); 172 q->doorbell_id = found; 173 } 174 175 q->properties.doorbell_off = 176 kfd_doorbell_id_to_offset(dev, q->process, 177 q->doorbell_id); 178 179 return 0; 180 } 181 182 static void deallocate_doorbell(struct qcm_process_device *qpd, 183 struct queue *q) 184 { 185 unsigned int old; 186 struct kfd_dev *dev = qpd->dqm->dev; 187 188 if (!KFD_IS_SOC15(dev->device_info->asic_family) || 189 q->properties.type == KFD_QUEUE_TYPE_SDMA || 190 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 191 return; 192 193 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 194 WARN_ON(!old); 195 } 196 197 static int allocate_vmid(struct device_queue_manager *dqm, 198 struct qcm_process_device *qpd, 199 struct queue *q) 200 { 201 int bit, allocated_vmid; 202 203 if (dqm->vmid_bitmap == 0) 204 return -ENOMEM; 205 206 bit = ffs(dqm->vmid_bitmap) - 1; 207 dqm->vmid_bitmap &= ~(1 << bit); 208 209 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 210 pr_debug("vmid allocation %d\n", allocated_vmid); 211 qpd->vmid = allocated_vmid; 212 q->properties.vmid = allocated_vmid; 213 214 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 215 program_sh_mem_settings(dqm, qpd); 216 217 /* qpd->page_table_base is set earlier when register_process() 218 * is called, i.e. when the first queue is created. 219 */ 220 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, 221 qpd->vmid, 222 qpd->page_table_base); 223 /* invalidate the VM context after pasid and vmid mapping is set up */ 224 kfd_flush_tlb(qpd_to_pdd(qpd)); 225 226 return 0; 227 } 228 229 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 230 struct qcm_process_device *qpd) 231 { 232 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; 233 int ret; 234 235 if (!qpd->ib_kaddr) 236 return -ENOMEM; 237 238 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 239 if (ret) 240 return ret; 241 242 return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, 243 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 244 pmf->release_mem_size / sizeof(uint32_t)); 245 } 246 247 static void deallocate_vmid(struct device_queue_manager *dqm, 248 struct qcm_process_device *qpd, 249 struct queue *q) 250 { 251 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 252 253 /* On GFX v7, CP doesn't flush TC at dequeue */ 254 if (q->device->device_info->asic_family == CHIP_HAWAII) 255 if (flush_texture_cache_nocpsch(q->device, qpd)) 256 pr_err("Failed to flush TC\n"); 257 258 kfd_flush_tlb(qpd_to_pdd(qpd)); 259 260 /* Release the vmid mapping */ 261 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 262 263 dqm->vmid_bitmap |= (1 << bit); 264 qpd->vmid = 0; 265 q->properties.vmid = 0; 266 } 267 268 static int create_queue_nocpsch(struct device_queue_manager *dqm, 269 struct queue *q, 270 struct qcm_process_device *qpd) 271 { 272 int retval; 273 274 print_queue(q); 275 276 dqm_lock(dqm); 277 278 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 279 pr_warn("Can't create new usermode queue because %d queues were already created\n", 280 dqm->total_queue_count); 281 retval = -EPERM; 282 goto out_unlock; 283 } 284 285 if (list_empty(&qpd->queues_list)) { 286 retval = allocate_vmid(dqm, qpd, q); 287 if (retval) 288 goto out_unlock; 289 } 290 q->properties.vmid = qpd->vmid; 291 /* 292 * Eviction state logic: we only mark active queues as evicted 293 * to avoid the overhead of restoring inactive queues later 294 */ 295 if (qpd->evicted) 296 q->properties.is_evicted = (q->properties.queue_size > 0 && 297 q->properties.queue_percent > 0 && 298 q->properties.queue_address != 0); 299 300 q->properties.tba_addr = qpd->tba_addr; 301 q->properties.tma_addr = qpd->tma_addr; 302 303 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 304 retval = create_compute_queue_nocpsch(dqm, q, qpd); 305 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 306 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 307 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 308 else 309 retval = -EINVAL; 310 311 if (retval) { 312 if (list_empty(&qpd->queues_list)) 313 deallocate_vmid(dqm, qpd, q); 314 goto out_unlock; 315 } 316 317 list_add(&q->list, &qpd->queues_list); 318 qpd->queue_count++; 319 if (q->properties.is_active) 320 dqm->queue_count++; 321 322 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 323 dqm->sdma_queue_count++; 324 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 325 dqm->xgmi_sdma_queue_count++; 326 327 /* 328 * Unconditionally increment this counter, regardless of the queue's 329 * type or whether the queue is active. 330 */ 331 dqm->total_queue_count++; 332 pr_debug("Total of %d queues are accountable so far\n", 333 dqm->total_queue_count); 334 335 out_unlock: 336 dqm_unlock(dqm); 337 return retval; 338 } 339 340 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 341 { 342 bool set; 343 int pipe, bit, i; 344 345 set = false; 346 347 for (pipe = dqm->next_pipe_to_allocate, i = 0; 348 i < get_pipes_per_mec(dqm); 349 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 350 351 if (!is_pipe_enabled(dqm, 0, pipe)) 352 continue; 353 354 if (dqm->allocated_queues[pipe] != 0) { 355 bit = ffs(dqm->allocated_queues[pipe]) - 1; 356 dqm->allocated_queues[pipe] &= ~(1 << bit); 357 q->pipe = pipe; 358 q->queue = bit; 359 set = true; 360 break; 361 } 362 } 363 364 if (!set) 365 return -EBUSY; 366 367 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 368 /* horizontal hqd allocation */ 369 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 370 371 return 0; 372 } 373 374 static inline void deallocate_hqd(struct device_queue_manager *dqm, 375 struct queue *q) 376 { 377 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 378 } 379 380 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 381 struct queue *q, 382 struct qcm_process_device *qpd) 383 { 384 struct mqd_manager *mqd_mgr; 385 int retval; 386 387 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; 388 389 retval = allocate_hqd(dqm, q); 390 if (retval) 391 return retval; 392 393 retval = allocate_doorbell(qpd, q); 394 if (retval) 395 goto out_deallocate_hqd; 396 397 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 398 &q->gart_mqd_addr, &q->properties); 399 if (retval) 400 goto out_deallocate_doorbell; 401 402 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 403 q->pipe, q->queue); 404 405 dqm->dev->kfd2kgd->set_scratch_backing_va( 406 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 407 408 if (!q->properties.is_active) 409 return 0; 410 411 if (WARN(q->process->mm != current->mm, 412 "should only run in user thread")) 413 retval = -EFAULT; 414 else 415 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 416 &q->properties, current->mm); 417 if (retval) 418 goto out_uninit_mqd; 419 420 return 0; 421 422 out_uninit_mqd: 423 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 424 out_deallocate_doorbell: 425 deallocate_doorbell(qpd, q); 426 out_deallocate_hqd: 427 deallocate_hqd(dqm, q); 428 429 return retval; 430 } 431 432 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 433 * to avoid asynchronized access 434 */ 435 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 436 struct qcm_process_device *qpd, 437 struct queue *q) 438 { 439 int retval; 440 struct mqd_manager *mqd_mgr; 441 442 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 443 q->properties.type)]; 444 445 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 446 deallocate_hqd(dqm, q); 447 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 448 dqm->sdma_queue_count--; 449 deallocate_sdma_queue(dqm, q); 450 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 451 dqm->xgmi_sdma_queue_count--; 452 deallocate_sdma_queue(dqm, q); 453 } else { 454 pr_debug("q->properties.type %d is invalid\n", 455 q->properties.type); 456 return -EINVAL; 457 } 458 dqm->total_queue_count--; 459 460 deallocate_doorbell(qpd, q); 461 462 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 463 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 464 KFD_UNMAP_LATENCY_MS, 465 q->pipe, q->queue); 466 if (retval == -ETIME) 467 qpd->reset_wavefronts = true; 468 469 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 470 471 list_del(&q->list); 472 if (list_empty(&qpd->queues_list)) { 473 if (qpd->reset_wavefronts) { 474 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 475 dqm->dev); 476 /* dbgdev_wave_reset_wavefronts has to be called before 477 * deallocate_vmid(), i.e. when vmid is still in use. 478 */ 479 dbgdev_wave_reset_wavefronts(dqm->dev, 480 qpd->pqm->process); 481 qpd->reset_wavefronts = false; 482 } 483 484 deallocate_vmid(dqm, qpd, q); 485 } 486 qpd->queue_count--; 487 if (q->properties.is_active) 488 dqm->queue_count--; 489 490 return retval; 491 } 492 493 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 494 struct qcm_process_device *qpd, 495 struct queue *q) 496 { 497 int retval; 498 499 dqm_lock(dqm); 500 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 501 dqm_unlock(dqm); 502 503 return retval; 504 } 505 506 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 507 { 508 int retval; 509 struct mqd_manager *mqd_mgr; 510 struct kfd_process_device *pdd; 511 bool prev_active = false; 512 513 dqm_lock(dqm); 514 pdd = kfd_get_process_device_data(q->device, q->process); 515 if (!pdd) { 516 retval = -ENODEV; 517 goto out_unlock; 518 } 519 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 520 q->properties.type)]; 521 /* 522 * Eviction state logic: we only mark active queues as evicted 523 * to avoid the overhead of restoring inactive queues later 524 */ 525 if (pdd->qpd.evicted) 526 q->properties.is_evicted = (q->properties.queue_size > 0 && 527 q->properties.queue_percent > 0 && 528 q->properties.queue_address != 0); 529 530 /* Save previous activity state for counters */ 531 prev_active = q->properties.is_active; 532 533 /* Make sure the queue is unmapped before updating the MQD */ 534 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 535 retval = unmap_queues_cpsch(dqm, 536 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 537 if (retval) { 538 pr_err("unmap queue failed\n"); 539 goto out_unlock; 540 } 541 } else if (prev_active && 542 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 543 q->properties.type == KFD_QUEUE_TYPE_SDMA || 544 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 545 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 546 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 547 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 548 if (retval) { 549 pr_err("destroy mqd failed\n"); 550 goto out_unlock; 551 } 552 } 553 554 retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); 555 556 /* 557 * check active state vs. the previous state and modify 558 * counter accordingly. map_queues_cpsch uses the 559 * dqm->queue_count to determine whether a new runlist must be 560 * uploaded. 561 */ 562 if (q->properties.is_active && !prev_active) 563 dqm->queue_count++; 564 else if (!q->properties.is_active && prev_active) 565 dqm->queue_count--; 566 567 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 568 retval = map_queues_cpsch(dqm); 569 else if (q->properties.is_active && 570 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 571 q->properties.type == KFD_QUEUE_TYPE_SDMA || 572 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 573 if (WARN(q->process->mm != current->mm, 574 "should only run in user thread")) 575 retval = -EFAULT; 576 else 577 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 578 q->pipe, q->queue, 579 &q->properties, current->mm); 580 } 581 582 out_unlock: 583 dqm_unlock(dqm); 584 return retval; 585 } 586 587 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 588 struct qcm_process_device *qpd) 589 { 590 struct queue *q; 591 struct mqd_manager *mqd_mgr; 592 struct kfd_process_device *pdd; 593 int retval = 0; 594 595 dqm_lock(dqm); 596 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 597 goto out; 598 599 pdd = qpd_to_pdd(qpd); 600 pr_info_ratelimited("Evicting PASID %u queues\n", 601 pdd->process->pasid); 602 603 /* unactivate all active queues on the qpd */ 604 list_for_each_entry(q, &qpd->queues_list, list) { 605 if (!q->properties.is_active) 606 continue; 607 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 608 q->properties.type)]; 609 q->properties.is_evicted = true; 610 q->properties.is_active = false; 611 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 612 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 613 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 614 if (retval) 615 goto out; 616 dqm->queue_count--; 617 } 618 619 out: 620 dqm_unlock(dqm); 621 return retval; 622 } 623 624 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 625 struct qcm_process_device *qpd) 626 { 627 struct queue *q; 628 struct kfd_process_device *pdd; 629 int retval = 0; 630 631 dqm_lock(dqm); 632 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 633 goto out; 634 635 pdd = qpd_to_pdd(qpd); 636 pr_info_ratelimited("Evicting PASID %u queues\n", 637 pdd->process->pasid); 638 639 /* unactivate all active queues on the qpd */ 640 list_for_each_entry(q, &qpd->queues_list, list) { 641 if (!q->properties.is_active) 642 continue; 643 q->properties.is_evicted = true; 644 q->properties.is_active = false; 645 dqm->queue_count--; 646 } 647 retval = execute_queues_cpsch(dqm, 648 qpd->is_debug ? 649 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 650 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 651 652 out: 653 dqm_unlock(dqm); 654 return retval; 655 } 656 657 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 658 struct qcm_process_device *qpd) 659 { 660 struct mm_struct *mm = NULL; 661 struct queue *q; 662 struct mqd_manager *mqd_mgr; 663 struct kfd_process_device *pdd; 664 uint64_t pd_base; 665 int retval = 0; 666 667 pdd = qpd_to_pdd(qpd); 668 /* Retrieve PD base */ 669 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 670 671 dqm_lock(dqm); 672 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 673 goto out; 674 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 675 qpd->evicted--; 676 goto out; 677 } 678 679 pr_info_ratelimited("Restoring PASID %u queues\n", 680 pdd->process->pasid); 681 682 /* Update PD Base in QPD */ 683 qpd->page_table_base = pd_base; 684 pr_debug("Updated PD address to 0x%llx\n", pd_base); 685 686 if (!list_empty(&qpd->queues_list)) { 687 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 688 dqm->dev->kgd, 689 qpd->vmid, 690 qpd->page_table_base); 691 kfd_flush_tlb(pdd); 692 } 693 694 /* Take a safe reference to the mm_struct, which may otherwise 695 * disappear even while the kfd_process is still referenced. 696 */ 697 mm = get_task_mm(pdd->process->lead_thread); 698 if (!mm) { 699 retval = -EFAULT; 700 goto out; 701 } 702 703 /* activate all active queues on the qpd */ 704 list_for_each_entry(q, &qpd->queues_list, list) { 705 if (!q->properties.is_evicted) 706 continue; 707 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 708 q->properties.type)]; 709 q->properties.is_evicted = false; 710 q->properties.is_active = true; 711 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 712 q->queue, &q->properties, mm); 713 if (retval) 714 goto out; 715 dqm->queue_count++; 716 } 717 qpd->evicted = 0; 718 out: 719 if (mm) 720 mmput(mm); 721 dqm_unlock(dqm); 722 return retval; 723 } 724 725 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 726 struct qcm_process_device *qpd) 727 { 728 struct queue *q; 729 struct kfd_process_device *pdd; 730 uint64_t pd_base; 731 int retval = 0; 732 733 pdd = qpd_to_pdd(qpd); 734 /* Retrieve PD base */ 735 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 736 737 dqm_lock(dqm); 738 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 739 goto out; 740 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 741 qpd->evicted--; 742 goto out; 743 } 744 745 pr_info_ratelimited("Restoring PASID %u queues\n", 746 pdd->process->pasid); 747 748 /* Update PD Base in QPD */ 749 qpd->page_table_base = pd_base; 750 pr_debug("Updated PD address to 0x%llx\n", pd_base); 751 752 /* activate all active queues on the qpd */ 753 list_for_each_entry(q, &qpd->queues_list, list) { 754 if (!q->properties.is_evicted) 755 continue; 756 q->properties.is_evicted = false; 757 q->properties.is_active = true; 758 dqm->queue_count++; 759 } 760 retval = execute_queues_cpsch(dqm, 761 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 762 if (!retval) 763 qpd->evicted = 0; 764 out: 765 dqm_unlock(dqm); 766 return retval; 767 } 768 769 static int register_process(struct device_queue_manager *dqm, 770 struct qcm_process_device *qpd) 771 { 772 struct device_process_node *n; 773 struct kfd_process_device *pdd; 774 uint64_t pd_base; 775 int retval; 776 777 n = kzalloc(sizeof(*n), GFP_KERNEL); 778 if (!n) 779 return -ENOMEM; 780 781 n->qpd = qpd; 782 783 pdd = qpd_to_pdd(qpd); 784 /* Retrieve PD base */ 785 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm); 786 787 dqm_lock(dqm); 788 list_add(&n->list, &dqm->queues); 789 790 /* Update PD Base in QPD */ 791 qpd->page_table_base = pd_base; 792 pr_debug("Updated PD address to 0x%llx\n", pd_base); 793 794 retval = dqm->asic_ops.update_qpd(dqm, qpd); 795 796 dqm->processes_count++; 797 798 dqm_unlock(dqm); 799 800 /* Outside the DQM lock because under the DQM lock we can't do 801 * reclaim or take other locks that others hold while reclaiming. 802 */ 803 kfd_inc_compute_active(dqm->dev); 804 805 return retval; 806 } 807 808 static int unregister_process(struct device_queue_manager *dqm, 809 struct qcm_process_device *qpd) 810 { 811 int retval; 812 struct device_process_node *cur, *next; 813 814 pr_debug("qpd->queues_list is %s\n", 815 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 816 817 retval = 0; 818 dqm_lock(dqm); 819 820 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 821 if (qpd == cur->qpd) { 822 list_del(&cur->list); 823 kfree(cur); 824 dqm->processes_count--; 825 goto out; 826 } 827 } 828 /* qpd not found in dqm list */ 829 retval = 1; 830 out: 831 dqm_unlock(dqm); 832 833 /* Outside the DQM lock because under the DQM lock we can't do 834 * reclaim or take other locks that others hold while reclaiming. 835 */ 836 if (!retval) 837 kfd_dec_compute_active(dqm->dev); 838 839 return retval; 840 } 841 842 static int 843 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 844 unsigned int vmid) 845 { 846 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 847 dqm->dev->kgd, pasid, vmid); 848 } 849 850 static void init_interrupts(struct device_queue_manager *dqm) 851 { 852 unsigned int i; 853 854 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 855 if (is_pipe_enabled(dqm, 0, i)) 856 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 857 } 858 859 static int initialize_nocpsch(struct device_queue_manager *dqm) 860 { 861 int pipe, queue; 862 863 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 864 865 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 866 sizeof(unsigned int), GFP_KERNEL); 867 if (!dqm->allocated_queues) 868 return -ENOMEM; 869 870 mutex_init(&dqm->lock_hidden); 871 INIT_LIST_HEAD(&dqm->queues); 872 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 873 dqm->sdma_queue_count = 0; 874 dqm->xgmi_sdma_queue_count = 0; 875 876 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 877 int pipe_offset = pipe * get_queues_per_pipe(dqm); 878 879 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 880 if (test_bit(pipe_offset + queue, 881 dqm->dev->shared_resources.queue_bitmap)) 882 dqm->allocated_queues[pipe] |= 1 << queue; 883 } 884 885 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 886 dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; 887 dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; 888 889 return 0; 890 } 891 892 static void uninitialize(struct device_queue_manager *dqm) 893 { 894 int i; 895 896 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 897 898 kfree(dqm->allocated_queues); 899 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 900 kfree(dqm->mqd_mgrs[i]); 901 mutex_destroy(&dqm->lock_hidden); 902 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 903 } 904 905 static int start_nocpsch(struct device_queue_manager *dqm) 906 { 907 init_interrupts(dqm); 908 return pm_init(&dqm->packets, dqm); 909 } 910 911 static int stop_nocpsch(struct device_queue_manager *dqm) 912 { 913 pm_uninit(&dqm->packets); 914 return 0; 915 } 916 917 static int allocate_sdma_queue(struct device_queue_manager *dqm, 918 struct queue *q) 919 { 920 int bit; 921 922 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 923 if (dqm->sdma_bitmap == 0) 924 return -ENOMEM; 925 bit = __ffs64(dqm->sdma_bitmap); 926 dqm->sdma_bitmap &= ~(1ULL << bit); 927 q->sdma_id = bit; 928 q->properties.sdma_engine_id = q->sdma_id % 929 get_num_sdma_engines(dqm); 930 q->properties.sdma_queue_id = q->sdma_id / 931 get_num_sdma_engines(dqm); 932 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 933 if (dqm->xgmi_sdma_bitmap == 0) 934 return -ENOMEM; 935 bit = __ffs64(dqm->xgmi_sdma_bitmap); 936 dqm->xgmi_sdma_bitmap &= ~(1ULL << bit); 937 q->sdma_id = bit; 938 /* sdma_engine_id is sdma id including 939 * both PCIe-optimized SDMAs and XGMI- 940 * optimized SDMAs. The calculation below 941 * assumes the first N engines are always 942 * PCIe-optimized ones 943 */ 944 q->properties.sdma_engine_id = get_num_sdma_engines(dqm) + 945 q->sdma_id % get_num_xgmi_sdma_engines(dqm); 946 q->properties.sdma_queue_id = q->sdma_id / 947 get_num_xgmi_sdma_engines(dqm); 948 } 949 950 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 951 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 952 953 return 0; 954 } 955 956 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 957 struct queue *q) 958 { 959 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 960 if (q->sdma_id >= get_num_sdma_queues(dqm)) 961 return; 962 dqm->sdma_bitmap |= (1ULL << q->sdma_id); 963 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 964 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 965 return; 966 dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id); 967 } 968 } 969 970 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 971 struct queue *q, 972 struct qcm_process_device *qpd) 973 { 974 struct mqd_manager *mqd_mgr; 975 int retval; 976 977 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]; 978 979 retval = allocate_sdma_queue(dqm, q); 980 if (retval) 981 return retval; 982 983 retval = allocate_doorbell(qpd, q); 984 if (retval) 985 goto out_deallocate_sdma_queue; 986 987 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 988 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 989 &q->gart_mqd_addr, &q->properties); 990 if (retval) 991 goto out_deallocate_doorbell; 992 993 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, 994 NULL); 995 if (retval) 996 goto out_uninit_mqd; 997 998 return 0; 999 1000 out_uninit_mqd: 1001 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1002 out_deallocate_doorbell: 1003 deallocate_doorbell(qpd, q); 1004 out_deallocate_sdma_queue: 1005 deallocate_sdma_queue(dqm, q); 1006 1007 return retval; 1008 } 1009 1010 /* 1011 * Device Queue Manager implementation for cp scheduler 1012 */ 1013 1014 static int set_sched_resources(struct device_queue_manager *dqm) 1015 { 1016 int i, mec; 1017 struct scheduling_resources res; 1018 1019 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1020 1021 res.queue_mask = 0; 1022 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1023 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1024 / dqm->dev->shared_resources.num_pipe_per_mec; 1025 1026 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 1027 continue; 1028 1029 /* only acquire queues from the first MEC */ 1030 if (mec > 0) 1031 continue; 1032 1033 /* This situation may be hit in the future if a new HW 1034 * generation exposes more than 64 queues. If so, the 1035 * definition of res.queue_mask needs updating 1036 */ 1037 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1038 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1039 break; 1040 } 1041 1042 res.queue_mask |= (1ull << i); 1043 } 1044 res.gws_mask = res.oac_mask = res.gds_heap_base = 1045 res.gds_heap_size = 0; 1046 1047 pr_debug("Scheduling resources:\n" 1048 "vmid mask: 0x%8X\n" 1049 "queue mask: 0x%8llX\n", 1050 res.vmid_mask, res.queue_mask); 1051 1052 return pm_send_set_resources(&dqm->packets, &res); 1053 } 1054 1055 static int initialize_cpsch(struct device_queue_manager *dqm) 1056 { 1057 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1058 1059 mutex_init(&dqm->lock_hidden); 1060 INIT_LIST_HEAD(&dqm->queues); 1061 dqm->queue_count = dqm->processes_count = 0; 1062 dqm->sdma_queue_count = 0; 1063 dqm->xgmi_sdma_queue_count = 0; 1064 dqm->active_runlist = false; 1065 dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1; 1066 dqm->xgmi_sdma_bitmap = (1ULL << get_num_xgmi_sdma_queues(dqm)) - 1; 1067 1068 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1069 1070 return 0; 1071 } 1072 1073 static int start_cpsch(struct device_queue_manager *dqm) 1074 { 1075 int retval; 1076 1077 retval = 0; 1078 1079 retval = pm_init(&dqm->packets, dqm); 1080 if (retval) 1081 goto fail_packet_manager_init; 1082 1083 retval = set_sched_resources(dqm); 1084 if (retval) 1085 goto fail_set_sched_resources; 1086 1087 pr_debug("Allocating fence memory\n"); 1088 1089 /* allocate fence memory on the gart */ 1090 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1091 &dqm->fence_mem); 1092 1093 if (retval) 1094 goto fail_allocate_vidmem; 1095 1096 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 1097 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1098 1099 init_interrupts(dqm); 1100 1101 dqm_lock(dqm); 1102 /* clear hang status when driver try to start the hw scheduler */ 1103 dqm->is_hws_hang = false; 1104 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1105 dqm_unlock(dqm); 1106 1107 return 0; 1108 fail_allocate_vidmem: 1109 fail_set_sched_resources: 1110 pm_uninit(&dqm->packets); 1111 fail_packet_manager_init: 1112 return retval; 1113 } 1114 1115 static int stop_cpsch(struct device_queue_manager *dqm) 1116 { 1117 dqm_lock(dqm); 1118 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1119 dqm_unlock(dqm); 1120 1121 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1122 pm_uninit(&dqm->packets); 1123 1124 return 0; 1125 } 1126 1127 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1128 struct kernel_queue *kq, 1129 struct qcm_process_device *qpd) 1130 { 1131 dqm_lock(dqm); 1132 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1133 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1134 dqm->total_queue_count); 1135 dqm_unlock(dqm); 1136 return -EPERM; 1137 } 1138 1139 /* 1140 * Unconditionally increment this counter, regardless of the queue's 1141 * type or whether the queue is active. 1142 */ 1143 dqm->total_queue_count++; 1144 pr_debug("Total of %d queues are accountable so far\n", 1145 dqm->total_queue_count); 1146 1147 list_add(&kq->list, &qpd->priv_queue_list); 1148 dqm->queue_count++; 1149 qpd->is_debug = true; 1150 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1151 dqm_unlock(dqm); 1152 1153 return 0; 1154 } 1155 1156 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1157 struct kernel_queue *kq, 1158 struct qcm_process_device *qpd) 1159 { 1160 dqm_lock(dqm); 1161 list_del(&kq->list); 1162 dqm->queue_count--; 1163 qpd->is_debug = false; 1164 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1165 /* 1166 * Unconditionally decrement this counter, regardless of the queue's 1167 * type. 1168 */ 1169 dqm->total_queue_count--; 1170 pr_debug("Total of %d queues are accountable so far\n", 1171 dqm->total_queue_count); 1172 dqm_unlock(dqm); 1173 } 1174 1175 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1176 struct qcm_process_device *qpd) 1177 { 1178 int retval; 1179 struct mqd_manager *mqd_mgr; 1180 1181 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1182 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1183 dqm->total_queue_count); 1184 retval = -EPERM; 1185 goto out; 1186 } 1187 1188 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1189 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1190 retval = allocate_sdma_queue(dqm, q); 1191 if (retval) 1192 goto out; 1193 } 1194 1195 retval = allocate_doorbell(qpd, q); 1196 if (retval) 1197 goto out_deallocate_sdma_queue; 1198 1199 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1200 q->properties.type)]; 1201 /* 1202 * Eviction state logic: we only mark active queues as evicted 1203 * to avoid the overhead of restoring inactive queues later 1204 */ 1205 if (qpd->evicted) 1206 q->properties.is_evicted = (q->properties.queue_size > 0 && 1207 q->properties.queue_percent > 0 && 1208 q->properties.queue_address != 0); 1209 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1210 q->properties.tba_addr = qpd->tba_addr; 1211 q->properties.tma_addr = qpd->tma_addr; 1212 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 1213 &q->gart_mqd_addr, &q->properties); 1214 if (retval) 1215 goto out_deallocate_doorbell; 1216 1217 dqm_lock(dqm); 1218 1219 list_add(&q->list, &qpd->queues_list); 1220 qpd->queue_count++; 1221 if (q->properties.is_active) { 1222 dqm->queue_count++; 1223 retval = execute_queues_cpsch(dqm, 1224 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1225 } 1226 1227 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1228 dqm->sdma_queue_count++; 1229 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1230 dqm->xgmi_sdma_queue_count++; 1231 /* 1232 * Unconditionally increment this counter, regardless of the queue's 1233 * type or whether the queue is active. 1234 */ 1235 dqm->total_queue_count++; 1236 1237 pr_debug("Total of %d queues are accountable so far\n", 1238 dqm->total_queue_count); 1239 1240 dqm_unlock(dqm); 1241 return retval; 1242 1243 out_deallocate_doorbell: 1244 deallocate_doorbell(qpd, q); 1245 out_deallocate_sdma_queue: 1246 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1247 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1248 deallocate_sdma_queue(dqm, q); 1249 out: 1250 return retval; 1251 } 1252 1253 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 1254 unsigned int fence_value, 1255 unsigned int timeout_ms) 1256 { 1257 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1258 1259 while (*fence_addr != fence_value) { 1260 if (time_after(jiffies, end_jiffies)) { 1261 pr_err("qcm fence wait loop timeout expired\n"); 1262 /* In HWS case, this is used to halt the driver thread 1263 * in order not to mess up CP states before doing 1264 * scandumps for FW debugging. 1265 */ 1266 while (halt_if_hws_hang) 1267 schedule(); 1268 1269 return -ETIME; 1270 } 1271 schedule(); 1272 } 1273 1274 return 0; 1275 } 1276 1277 static int unmap_sdma_queues(struct device_queue_manager *dqm) 1278 { 1279 int i, retval = 0; 1280 1281 for (i = 0; i < dqm->dev->device_info->num_sdma_engines + 1282 dqm->dev->device_info->num_xgmi_sdma_engines; i++) { 1283 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1284 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, i); 1285 if (retval) 1286 return retval; 1287 } 1288 return retval; 1289 } 1290 1291 /* dqm->lock mutex has to be locked before calling this function */ 1292 static int map_queues_cpsch(struct device_queue_manager *dqm) 1293 { 1294 int retval; 1295 1296 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 1297 return 0; 1298 1299 if (dqm->active_runlist) 1300 return 0; 1301 1302 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1303 if (retval) { 1304 pr_err("failed to execute runlist\n"); 1305 return retval; 1306 } 1307 dqm->active_runlist = true; 1308 1309 return retval; 1310 } 1311 1312 /* dqm->lock mutex has to be locked before calling this function */ 1313 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1314 enum kfd_unmap_queues_filter filter, 1315 uint32_t filter_param) 1316 { 1317 int retval = 0; 1318 1319 if (dqm->is_hws_hang) 1320 return -EIO; 1321 if (!dqm->active_runlist) 1322 return retval; 1323 1324 pr_debug("Before destroying queues, sdma queue count is : %u, xgmi sdma queue count is : %u\n", 1325 dqm->sdma_queue_count, dqm->xgmi_sdma_queue_count); 1326 1327 if (dqm->sdma_queue_count > 0 || dqm->xgmi_sdma_queue_count) 1328 unmap_sdma_queues(dqm); 1329 1330 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1331 filter, filter_param, false, 0); 1332 if (retval) 1333 return retval; 1334 1335 *dqm->fence_addr = KFD_FENCE_INIT; 1336 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 1337 KFD_FENCE_COMPLETED); 1338 /* should be timed out */ 1339 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1340 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 1341 if (retval) 1342 return retval; 1343 1344 pm_release_ib(&dqm->packets); 1345 dqm->active_runlist = false; 1346 1347 return retval; 1348 } 1349 1350 /* dqm->lock mutex has to be locked before calling this function */ 1351 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1352 enum kfd_unmap_queues_filter filter, 1353 uint32_t filter_param) 1354 { 1355 int retval; 1356 1357 if (dqm->is_hws_hang) 1358 return -EIO; 1359 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1360 if (retval) { 1361 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1362 dqm->is_hws_hang = true; 1363 schedule_work(&dqm->hw_exception_work); 1364 return retval; 1365 } 1366 1367 return map_queues_cpsch(dqm); 1368 } 1369 1370 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1371 struct qcm_process_device *qpd, 1372 struct queue *q) 1373 { 1374 int retval; 1375 struct mqd_manager *mqd_mgr; 1376 1377 retval = 0; 1378 1379 /* remove queue from list to prevent rescheduling after preemption */ 1380 dqm_lock(dqm); 1381 1382 if (qpd->is_debug) { 1383 /* 1384 * error, currently we do not allow to destroy a queue 1385 * of a currently debugged process 1386 */ 1387 retval = -EBUSY; 1388 goto failed_try_destroy_debugged_queue; 1389 1390 } 1391 1392 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1393 q->properties.type)]; 1394 1395 deallocate_doorbell(qpd, q); 1396 1397 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1398 dqm->sdma_queue_count--; 1399 deallocate_sdma_queue(dqm, q); 1400 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1401 dqm->xgmi_sdma_queue_count--; 1402 deallocate_sdma_queue(dqm, q); 1403 } 1404 1405 list_del(&q->list); 1406 qpd->queue_count--; 1407 if (q->properties.is_active) { 1408 dqm->queue_count--; 1409 retval = execute_queues_cpsch(dqm, 1410 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1411 if (retval == -ETIME) 1412 qpd->reset_wavefronts = true; 1413 } 1414 1415 /* 1416 * Unconditionally decrement this counter, regardless of the queue's 1417 * type 1418 */ 1419 dqm->total_queue_count--; 1420 pr_debug("Total of %d queues are accountable so far\n", 1421 dqm->total_queue_count); 1422 1423 dqm_unlock(dqm); 1424 1425 /* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */ 1426 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1427 1428 return retval; 1429 1430 failed_try_destroy_debugged_queue: 1431 1432 dqm_unlock(dqm); 1433 return retval; 1434 } 1435 1436 /* 1437 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1438 * stay in user mode. 1439 */ 1440 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1441 /* APE1 limit is inclusive and 64K aligned. */ 1442 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1443 1444 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1445 struct qcm_process_device *qpd, 1446 enum cache_policy default_policy, 1447 enum cache_policy alternate_policy, 1448 void __user *alternate_aperture_base, 1449 uint64_t alternate_aperture_size) 1450 { 1451 bool retval = true; 1452 1453 if (!dqm->asic_ops.set_cache_memory_policy) 1454 return retval; 1455 1456 dqm_lock(dqm); 1457 1458 if (alternate_aperture_size == 0) { 1459 /* base > limit disables APE1 */ 1460 qpd->sh_mem_ape1_base = 1; 1461 qpd->sh_mem_ape1_limit = 0; 1462 } else { 1463 /* 1464 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1465 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1466 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1467 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1468 * Verify that the base and size parameters can be 1469 * represented in this format and convert them. 1470 * Additionally restrict APE1 to user-mode addresses. 1471 */ 1472 1473 uint64_t base = (uintptr_t)alternate_aperture_base; 1474 uint64_t limit = base + alternate_aperture_size - 1; 1475 1476 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1477 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1478 retval = false; 1479 goto out; 1480 } 1481 1482 qpd->sh_mem_ape1_base = base >> 16; 1483 qpd->sh_mem_ape1_limit = limit >> 16; 1484 } 1485 1486 retval = dqm->asic_ops.set_cache_memory_policy( 1487 dqm, 1488 qpd, 1489 default_policy, 1490 alternate_policy, 1491 alternate_aperture_base, 1492 alternate_aperture_size); 1493 1494 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1495 program_sh_mem_settings(dqm, qpd); 1496 1497 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1498 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1499 qpd->sh_mem_ape1_limit); 1500 1501 out: 1502 dqm_unlock(dqm); 1503 return retval; 1504 } 1505 1506 static int set_trap_handler(struct device_queue_manager *dqm, 1507 struct qcm_process_device *qpd, 1508 uint64_t tba_addr, 1509 uint64_t tma_addr) 1510 { 1511 uint64_t *tma; 1512 1513 if (dqm->dev->cwsr_enabled) { 1514 /* Jump from CWSR trap handler to user trap */ 1515 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); 1516 tma[0] = tba_addr; 1517 tma[1] = tma_addr; 1518 } else { 1519 qpd->tba_addr = tba_addr; 1520 qpd->tma_addr = tma_addr; 1521 } 1522 1523 return 0; 1524 } 1525 1526 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1527 struct qcm_process_device *qpd) 1528 { 1529 struct queue *q, *next; 1530 struct device_process_node *cur, *next_dpn; 1531 int retval = 0; 1532 bool found = false; 1533 1534 dqm_lock(dqm); 1535 1536 /* Clear all user mode queues */ 1537 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1538 int ret; 1539 1540 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1541 if (ret) 1542 retval = ret; 1543 } 1544 1545 /* Unregister process */ 1546 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1547 if (qpd == cur->qpd) { 1548 list_del(&cur->list); 1549 kfree(cur); 1550 dqm->processes_count--; 1551 found = true; 1552 break; 1553 } 1554 } 1555 1556 dqm_unlock(dqm); 1557 1558 /* Outside the DQM lock because under the DQM lock we can't do 1559 * reclaim or take other locks that others hold while reclaiming. 1560 */ 1561 if (found) 1562 kfd_dec_compute_active(dqm->dev); 1563 1564 return retval; 1565 } 1566 1567 static int get_wave_state(struct device_queue_manager *dqm, 1568 struct queue *q, 1569 void __user *ctl_stack, 1570 u32 *ctl_stack_used_size, 1571 u32 *save_area_used_size) 1572 { 1573 struct mqd_manager *mqd_mgr; 1574 int r; 1575 1576 dqm_lock(dqm); 1577 1578 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 1579 q->properties.is_active || !q->device->cwsr_enabled) { 1580 r = -EINVAL; 1581 goto dqm_unlock; 1582 } 1583 1584 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE]; 1585 1586 if (!mqd_mgr->get_wave_state) { 1587 r = -EINVAL; 1588 goto dqm_unlock; 1589 } 1590 1591 r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack, 1592 ctl_stack_used_size, save_area_used_size); 1593 1594 dqm_unlock: 1595 dqm_unlock(dqm); 1596 return r; 1597 } 1598 1599 static int process_termination_cpsch(struct device_queue_manager *dqm, 1600 struct qcm_process_device *qpd) 1601 { 1602 int retval; 1603 struct queue *q, *next; 1604 struct kernel_queue *kq, *kq_next; 1605 struct mqd_manager *mqd_mgr; 1606 struct device_process_node *cur, *next_dpn; 1607 enum kfd_unmap_queues_filter filter = 1608 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1609 bool found = false; 1610 1611 retval = 0; 1612 1613 dqm_lock(dqm); 1614 1615 /* Clean all kernel queues */ 1616 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1617 list_del(&kq->list); 1618 dqm->queue_count--; 1619 qpd->is_debug = false; 1620 dqm->total_queue_count--; 1621 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1622 } 1623 1624 /* Clear all user mode queues */ 1625 list_for_each_entry(q, &qpd->queues_list, list) { 1626 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1627 dqm->sdma_queue_count--; 1628 deallocate_sdma_queue(dqm, q); 1629 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1630 dqm->xgmi_sdma_queue_count--; 1631 deallocate_sdma_queue(dqm, q); 1632 } 1633 1634 if (q->properties.is_active) 1635 dqm->queue_count--; 1636 1637 dqm->total_queue_count--; 1638 } 1639 1640 /* Unregister process */ 1641 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1642 if (qpd == cur->qpd) { 1643 list_del(&cur->list); 1644 kfree(cur); 1645 dqm->processes_count--; 1646 found = true; 1647 break; 1648 } 1649 } 1650 1651 retval = execute_queues_cpsch(dqm, filter, 0); 1652 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1653 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1654 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1655 qpd->reset_wavefronts = false; 1656 } 1657 1658 dqm_unlock(dqm); 1659 1660 /* Outside the DQM lock because under the DQM lock we can't do 1661 * reclaim or take other locks that others hold while reclaiming. 1662 */ 1663 if (found) 1664 kfd_dec_compute_active(dqm->dev); 1665 1666 /* Lastly, free mqd resources. 1667 * Do uninit_mqd() after dqm_unlock to avoid circular locking. 1668 */ 1669 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1670 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1671 q->properties.type)]; 1672 list_del(&q->list); 1673 qpd->queue_count--; 1674 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1675 } 1676 1677 return retval; 1678 } 1679 1680 static int init_mqd_managers(struct device_queue_manager *dqm) 1681 { 1682 int i, j; 1683 struct mqd_manager *mqd_mgr; 1684 1685 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 1686 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 1687 if (!mqd_mgr) { 1688 pr_err("mqd manager [%d] initialization failed\n", i); 1689 goto out_free; 1690 } 1691 dqm->mqd_mgrs[i] = mqd_mgr; 1692 } 1693 1694 return 0; 1695 1696 out_free: 1697 for (j = 0; j < i; j++) { 1698 kfree(dqm->mqd_mgrs[j]); 1699 dqm->mqd_mgrs[j] = NULL; 1700 } 1701 1702 return -ENOMEM; 1703 } 1704 1705 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 1706 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 1707 { 1708 int retval; 1709 struct kfd_dev *dev = dqm->dev; 1710 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 1711 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 1712 dev->device_info->num_sdma_engines * 1713 dev->device_info->num_sdma_queues_per_engine + 1714 dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size; 1715 1716 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size, 1717 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 1718 (void *)&(mem_obj->cpu_ptr), true); 1719 1720 return retval; 1721 } 1722 1723 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1724 { 1725 struct device_queue_manager *dqm; 1726 1727 pr_debug("Loading device queue manager\n"); 1728 1729 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1730 if (!dqm) 1731 return NULL; 1732 1733 switch (dev->device_info->asic_family) { 1734 /* HWS is not available on Hawaii. */ 1735 case CHIP_HAWAII: 1736 /* HWS depends on CWSR for timely dequeue. CWSR is not 1737 * available on Tonga. 1738 * 1739 * FIXME: This argument also applies to Kaveri. 1740 */ 1741 case CHIP_TONGA: 1742 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1743 break; 1744 default: 1745 dqm->sched_policy = sched_policy; 1746 break; 1747 } 1748 1749 dqm->dev = dev; 1750 switch (dqm->sched_policy) { 1751 case KFD_SCHED_POLICY_HWS: 1752 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1753 /* initialize dqm for cp scheduling */ 1754 dqm->ops.create_queue = create_queue_cpsch; 1755 dqm->ops.initialize = initialize_cpsch; 1756 dqm->ops.start = start_cpsch; 1757 dqm->ops.stop = stop_cpsch; 1758 dqm->ops.destroy_queue = destroy_queue_cpsch; 1759 dqm->ops.update_queue = update_queue; 1760 dqm->ops.register_process = register_process; 1761 dqm->ops.unregister_process = unregister_process; 1762 dqm->ops.uninitialize = uninitialize; 1763 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1764 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1765 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1766 dqm->ops.set_trap_handler = set_trap_handler; 1767 dqm->ops.process_termination = process_termination_cpsch; 1768 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1769 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1770 dqm->ops.get_wave_state = get_wave_state; 1771 break; 1772 case KFD_SCHED_POLICY_NO_HWS: 1773 /* initialize dqm for no cp scheduling */ 1774 dqm->ops.start = start_nocpsch; 1775 dqm->ops.stop = stop_nocpsch; 1776 dqm->ops.create_queue = create_queue_nocpsch; 1777 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1778 dqm->ops.update_queue = update_queue; 1779 dqm->ops.register_process = register_process; 1780 dqm->ops.unregister_process = unregister_process; 1781 dqm->ops.initialize = initialize_nocpsch; 1782 dqm->ops.uninitialize = uninitialize; 1783 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1784 dqm->ops.set_trap_handler = set_trap_handler; 1785 dqm->ops.process_termination = process_termination_nocpsch; 1786 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1787 dqm->ops.restore_process_queues = 1788 restore_process_queues_nocpsch; 1789 dqm->ops.get_wave_state = get_wave_state; 1790 break; 1791 default: 1792 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1793 goto out_free; 1794 } 1795 1796 switch (dev->device_info->asic_family) { 1797 case CHIP_CARRIZO: 1798 device_queue_manager_init_vi(&dqm->asic_ops); 1799 break; 1800 1801 case CHIP_KAVERI: 1802 device_queue_manager_init_cik(&dqm->asic_ops); 1803 break; 1804 1805 case CHIP_HAWAII: 1806 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1807 break; 1808 1809 case CHIP_TONGA: 1810 case CHIP_FIJI: 1811 case CHIP_POLARIS10: 1812 case CHIP_POLARIS11: 1813 case CHIP_POLARIS12: 1814 case CHIP_VEGAM: 1815 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1816 break; 1817 1818 case CHIP_VEGA10: 1819 case CHIP_VEGA12: 1820 case CHIP_VEGA20: 1821 case CHIP_RAVEN: 1822 device_queue_manager_init_v9(&dqm->asic_ops); 1823 break; 1824 default: 1825 WARN(1, "Unexpected ASIC family %u", 1826 dev->device_info->asic_family); 1827 goto out_free; 1828 } 1829 1830 if (init_mqd_managers(dqm)) 1831 goto out_free; 1832 1833 if (allocate_hiq_sdma_mqd(dqm)) { 1834 pr_err("Failed to allocate hiq sdma mqd trunk buffer\n"); 1835 goto out_free; 1836 } 1837 1838 if (!dqm->ops.initialize(dqm)) 1839 return dqm; 1840 1841 out_free: 1842 kfree(dqm); 1843 return NULL; 1844 } 1845 1846 void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd) 1847 { 1848 WARN(!mqd, "No hiq sdma mqd trunk to free"); 1849 1850 amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem); 1851 } 1852 1853 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1854 { 1855 dqm->ops.uninitialize(dqm); 1856 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 1857 kfree(dqm); 1858 } 1859 1860 int kfd_process_vm_fault(struct device_queue_manager *dqm, 1861 unsigned int pasid) 1862 { 1863 struct kfd_process_device *pdd; 1864 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 1865 int ret = 0; 1866 1867 if (!p) 1868 return -EINVAL; 1869 pdd = kfd_get_process_device_data(dqm->dev, p); 1870 if (pdd) 1871 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 1872 kfd_unref_process(p); 1873 1874 return ret; 1875 } 1876 1877 static void kfd_process_hw_exception(struct work_struct *work) 1878 { 1879 struct device_queue_manager *dqm = container_of(work, 1880 struct device_queue_manager, hw_exception_work); 1881 amdgpu_amdkfd_gpu_reset(dqm->dev->kgd); 1882 } 1883 1884 #if defined(CONFIG_DEBUG_FS) 1885 1886 static void seq_reg_dump(struct seq_file *m, 1887 uint32_t (*dump)[2], uint32_t n_regs) 1888 { 1889 uint32_t i, count; 1890 1891 for (i = 0, count = 0; i < n_regs; i++) { 1892 if (count == 0 || 1893 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 1894 seq_printf(m, "%s %08x: %08x", 1895 i ? "\n" : "", 1896 dump[i][0], dump[i][1]); 1897 count = 7; 1898 } else { 1899 seq_printf(m, " %08x", dump[i][1]); 1900 count--; 1901 } 1902 } 1903 1904 seq_puts(m, "\n"); 1905 } 1906 1907 int dqm_debugfs_hqds(struct seq_file *m, void *data) 1908 { 1909 struct device_queue_manager *dqm = data; 1910 uint32_t (*dump)[2], n_regs; 1911 int pipe, queue; 1912 int r = 0; 1913 1914 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, 1915 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); 1916 if (!r) { 1917 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 1918 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 1919 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 1920 KFD_CIK_HIQ_QUEUE); 1921 seq_reg_dump(m, dump, n_regs); 1922 1923 kfree(dump); 1924 } 1925 1926 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1927 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1928 1929 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 1930 if (!test_bit(pipe_offset + queue, 1931 dqm->dev->shared_resources.queue_bitmap)) 1932 continue; 1933 1934 r = dqm->dev->kfd2kgd->hqd_dump( 1935 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1936 if (r) 1937 break; 1938 1939 seq_printf(m, " CP Pipe %d, Queue %d\n", 1940 pipe, queue); 1941 seq_reg_dump(m, dump, n_regs); 1942 1943 kfree(dump); 1944 } 1945 } 1946 1947 for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { 1948 for (queue = 0; 1949 queue < dqm->dev->device_info->num_sdma_queues_per_engine; 1950 queue++) { 1951 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 1952 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1953 if (r) 1954 break; 1955 1956 seq_printf(m, " SDMA Engine %d, RLC %d\n", 1957 pipe, queue); 1958 seq_reg_dump(m, dump, n_regs); 1959 1960 kfree(dump); 1961 } 1962 } 1963 1964 return r; 1965 } 1966 1967 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) 1968 { 1969 int r = 0; 1970 1971 dqm_lock(dqm); 1972 dqm->active_runlist = true; 1973 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1974 dqm_unlock(dqm); 1975 1976 return r; 1977 } 1978 1979 #endif 1980