1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "amdgpu_sdma.h" 40 #include "mes_v11_api_def.h" 41 #include "kfd_debug.h" 42 43 /* Size of the per-pipe EOP queue */ 44 #define CIK_HPD_EOP_BYTES_LOG2 11 45 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 46 47 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 48 u32 pasid, unsigned int vmid); 49 50 static int execute_queues_cpsch(struct device_queue_manager *dqm, 51 enum kfd_unmap_queues_filter filter, 52 uint32_t filter_param, 53 uint32_t grace_period); 54 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 55 enum kfd_unmap_queues_filter filter, 56 uint32_t filter_param, 57 uint32_t grace_period, 58 bool reset); 59 60 static int map_queues_cpsch(struct device_queue_manager *dqm); 61 62 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 63 struct queue *q); 64 65 static inline void deallocate_hqd(struct device_queue_manager *dqm, 66 struct queue *q); 67 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 68 static int allocate_sdma_queue(struct device_queue_manager *dqm, 69 struct queue *q, const uint32_t *restore_sdma_id); 70 71 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma); 72 73 static inline 74 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 75 { 76 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 77 return KFD_MQD_TYPE_SDMA; 78 return KFD_MQD_TYPE_CP; 79 } 80 81 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 82 { 83 int i; 84 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 85 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 86 87 /* queue is available for KFD usage if bit is 1 */ 88 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 89 if (test_bit(pipe_offset + i, 90 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 91 return true; 92 return false; 93 } 94 95 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 96 { 97 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 98 AMDGPU_MAX_QUEUES); 99 } 100 101 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 102 { 103 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 104 } 105 106 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 107 { 108 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 109 } 110 111 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 112 { 113 return kfd_get_num_sdma_engines(dqm->dev) + 114 kfd_get_num_xgmi_sdma_engines(dqm->dev); 115 } 116 117 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 118 { 119 return kfd_get_num_sdma_engines(dqm->dev) * 120 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 121 } 122 123 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 124 { 125 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 126 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 127 } 128 129 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 130 { 131 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 132 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 133 134 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 135 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 136 137 /* Mask out the reserved queues */ 138 bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap, 139 dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap, 140 KFD_MAX_SDMA_QUEUES); 141 } 142 143 void program_sh_mem_settings(struct device_queue_manager *dqm, 144 struct qcm_process_device *qpd) 145 { 146 uint32_t xcc_mask = dqm->dev->xcc_mask; 147 int xcc_id; 148 149 for_each_inst(xcc_id, xcc_mask) 150 dqm->dev->kfd2kgd->program_sh_mem_settings( 151 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 152 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 153 qpd->sh_mem_bases, xcc_id); 154 } 155 156 static void kfd_hws_hang(struct device_queue_manager *dqm) 157 { 158 struct device_process_node *cur; 159 struct qcm_process_device *qpd; 160 struct queue *q; 161 162 /* Mark all device queues as reset. */ 163 list_for_each_entry(cur, &dqm->queues, list) { 164 qpd = cur->qpd; 165 list_for_each_entry(q, &qpd->queues_list, list) { 166 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 167 168 pdd->has_reset_queue = true; 169 } 170 } 171 172 /* 173 * Issue a GPU reset if HWS is unresponsive 174 */ 175 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 176 } 177 178 static int convert_to_mes_queue_type(int queue_type) 179 { 180 int mes_queue_type; 181 182 switch (queue_type) { 183 case KFD_QUEUE_TYPE_COMPUTE: 184 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 185 break; 186 case KFD_QUEUE_TYPE_SDMA: 187 mes_queue_type = MES_QUEUE_TYPE_SDMA; 188 break; 189 default: 190 WARN(1, "Invalid queue type %d", queue_type); 191 mes_queue_type = -EINVAL; 192 break; 193 } 194 195 return mes_queue_type; 196 } 197 198 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 199 struct qcm_process_device *qpd) 200 { 201 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 202 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 203 struct mes_add_queue_input queue_input; 204 int r, queue_type; 205 uint64_t wptr_addr_off; 206 207 if (!dqm->sched_running || dqm->sched_halt) 208 return 0; 209 if (!down_read_trylock(&adev->reset_domain->sem)) 210 return -EIO; 211 212 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 213 queue_input.process_id = pdd->pasid; 214 queue_input.page_table_base_addr = qpd->page_table_base; 215 queue_input.process_va_start = 0; 216 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 217 /* MES unit for quantum is 100ns */ 218 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 219 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 220 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 221 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 222 queue_input.inprocess_gang_priority = q->properties.priority; 223 queue_input.gang_global_priority_level = 224 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 225 queue_input.doorbell_offset = q->properties.doorbell_off; 226 queue_input.mqd_addr = q->gart_mqd_addr; 227 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 228 229 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 230 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 231 232 queue_input.is_kfd_process = 1; 233 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 234 queue_input.queue_size = q->properties.queue_size >> 2; 235 236 queue_input.paging = false; 237 queue_input.tba_addr = qpd->tba_addr; 238 queue_input.tma_addr = qpd->tma_addr; 239 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 240 queue_input.skip_process_ctx_clear = 241 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 242 (qpd->pqm->process->debug_trap_enabled || 243 kfd_dbg_has_ttmps_always_setup(q->device)); 244 245 queue_type = convert_to_mes_queue_type(q->properties.type); 246 if (queue_type < 0) { 247 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 248 q->properties.type); 249 up_read(&adev->reset_domain->sem); 250 return -EINVAL; 251 } 252 queue_input.queue_type = (uint32_t)queue_type; 253 254 queue_input.exclusively_scheduled = q->properties.is_gws; 255 256 amdgpu_mes_lock(&adev->mes); 257 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 258 amdgpu_mes_unlock(&adev->mes); 259 up_read(&adev->reset_domain->sem); 260 if (r) { 261 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 262 q->properties.doorbell_off); 263 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 264 kfd_hws_hang(dqm); 265 } 266 267 return r; 268 } 269 270 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 271 struct qcm_process_device *qpd) 272 { 273 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 274 int r; 275 struct mes_remove_queue_input queue_input; 276 277 if (!dqm->sched_running || dqm->sched_halt) 278 return 0; 279 if (!down_read_trylock(&adev->reset_domain->sem)) 280 return -EIO; 281 282 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 283 queue_input.doorbell_offset = q->properties.doorbell_off; 284 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 285 286 amdgpu_mes_lock(&adev->mes); 287 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 288 amdgpu_mes_unlock(&adev->mes); 289 up_read(&adev->reset_domain->sem); 290 291 if (r) { 292 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 293 q->properties.doorbell_off); 294 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 295 kfd_hws_hang(dqm); 296 } 297 298 return r; 299 } 300 301 static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm) 302 { 303 struct device_process_node *cur; 304 struct device *dev = dqm->dev->adev->dev; 305 struct qcm_process_device *qpd; 306 struct queue *q; 307 int retval = 0; 308 309 list_for_each_entry(cur, &dqm->queues, list) { 310 qpd = cur->qpd; 311 list_for_each_entry(q, &qpd->queues_list, list) { 312 if (q->properties.is_active) { 313 retval = remove_queue_mes(dqm, q, qpd); 314 if (retval) { 315 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 316 __func__, 317 q->properties.queue_id, 318 dqm->dev->id); 319 return retval; 320 } 321 } 322 } 323 } 324 325 return retval; 326 } 327 328 static int add_all_kfd_queues_mes(struct device_queue_manager *dqm) 329 { 330 struct device_process_node *cur; 331 struct device *dev = dqm->dev->adev->dev; 332 struct qcm_process_device *qpd; 333 struct queue *q; 334 int retval = 0; 335 336 list_for_each_entry(cur, &dqm->queues, list) { 337 qpd = cur->qpd; 338 list_for_each_entry(q, &qpd->queues_list, list) { 339 if (!q->properties.is_active) 340 continue; 341 retval = add_queue_mes(dqm, q, qpd); 342 if (retval) { 343 dev_err(dev, "%s: Failed to add queue %d for dev %d", 344 __func__, 345 q->properties.queue_id, 346 dqm->dev->id); 347 return retval; 348 } 349 } 350 } 351 352 return retval; 353 } 354 355 static int suspend_all_queues_mes(struct device_queue_manager *dqm) 356 { 357 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 358 int r = 0; 359 360 if (!down_read_trylock(&adev->reset_domain->sem)) 361 return -EIO; 362 363 r = amdgpu_mes_suspend(adev); 364 up_read(&adev->reset_domain->sem); 365 366 if (r) { 367 dev_err(adev->dev, "failed to suspend gangs from MES\n"); 368 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 369 kfd_hws_hang(dqm); 370 } 371 372 return r; 373 } 374 375 static int resume_all_queues_mes(struct device_queue_manager *dqm) 376 { 377 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 378 int r = 0; 379 380 if (!down_read_trylock(&adev->reset_domain->sem)) 381 return -EIO; 382 383 r = amdgpu_mes_resume(adev); 384 up_read(&adev->reset_domain->sem); 385 386 if (r) { 387 dev_err(adev->dev, "failed to resume gangs from MES\n"); 388 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 389 kfd_hws_hang(dqm); 390 } 391 392 return r; 393 } 394 395 static void increment_queue_count(struct device_queue_manager *dqm, 396 struct qcm_process_device *qpd, 397 struct queue *q) 398 { 399 dqm->active_queue_count++; 400 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 401 q->properties.type == KFD_QUEUE_TYPE_DIQ) 402 dqm->active_cp_queue_count++; 403 404 if (q->properties.is_gws) { 405 dqm->gws_queue_count++; 406 qpd->mapped_gws_queue = true; 407 } 408 } 409 410 static void decrement_queue_count(struct device_queue_manager *dqm, 411 struct qcm_process_device *qpd, 412 struct queue *q) 413 { 414 dqm->active_queue_count--; 415 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 416 q->properties.type == KFD_QUEUE_TYPE_DIQ) 417 dqm->active_cp_queue_count--; 418 419 if (q->properties.is_gws) { 420 dqm->gws_queue_count--; 421 qpd->mapped_gws_queue = false; 422 } 423 } 424 425 /* 426 * Allocate a doorbell ID to this queue. 427 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 428 */ 429 static int allocate_doorbell(struct qcm_process_device *qpd, 430 struct queue *q, 431 uint32_t const *restore_id) 432 { 433 struct kfd_node *dev = qpd->dqm->dev; 434 435 if (!KFD_IS_SOC15(dev)) { 436 /* On pre-SOC15 chips we need to use the queue ID to 437 * preserve the user mode ABI. 438 */ 439 440 if (restore_id && *restore_id != q->properties.queue_id) 441 return -EINVAL; 442 443 q->doorbell_id = q->properties.queue_id; 444 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 445 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 446 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 447 * doorbell assignments based on the engine and queue id. 448 * The doobell index distance between RLC (2*i) and (2*i+1) 449 * for a SDMA engine is 512. 450 */ 451 452 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 453 454 /* 455 * q->properties.sdma_engine_id corresponds to the virtual 456 * sdma engine number. However, for doorbell allocation, 457 * we need the physical sdma engine id in order to get the 458 * correct doorbell offset. 459 */ 460 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 461 get_num_all_sdma_engines(qpd->dqm) + 462 q->properties.sdma_engine_id] 463 + (q->properties.sdma_queue_id & 1) 464 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 465 + (q->properties.sdma_queue_id >> 1); 466 467 if (restore_id && *restore_id != valid_id) 468 return -EINVAL; 469 q->doorbell_id = valid_id; 470 } else { 471 /* For CP queues on SOC15 */ 472 if (restore_id) { 473 /* make sure that ID is free */ 474 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 475 return -EINVAL; 476 477 q->doorbell_id = *restore_id; 478 } else { 479 /* or reserve a free doorbell ID */ 480 unsigned int found; 481 482 found = find_first_zero_bit(qpd->doorbell_bitmap, 483 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 484 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 485 pr_debug("No doorbells available"); 486 return -EBUSY; 487 } 488 set_bit(found, qpd->doorbell_bitmap); 489 q->doorbell_id = found; 490 } 491 } 492 493 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 494 qpd->proc_doorbells, 495 q->doorbell_id, 496 dev->kfd->device_info.doorbell_size); 497 return 0; 498 } 499 500 static void deallocate_doorbell(struct qcm_process_device *qpd, 501 struct queue *q) 502 { 503 unsigned int old; 504 struct kfd_node *dev = qpd->dqm->dev; 505 506 if (!KFD_IS_SOC15(dev) || 507 q->properties.type == KFD_QUEUE_TYPE_SDMA || 508 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 509 return; 510 511 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 512 WARN_ON(!old); 513 } 514 515 static void program_trap_handler_settings(struct device_queue_manager *dqm, 516 struct qcm_process_device *qpd) 517 { 518 uint32_t xcc_mask = dqm->dev->xcc_mask; 519 int xcc_id; 520 521 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 522 for_each_inst(xcc_id, xcc_mask) 523 dqm->dev->kfd2kgd->program_trap_handler_settings( 524 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 525 qpd->tma_addr, xcc_id); 526 } 527 528 static int allocate_vmid(struct device_queue_manager *dqm, 529 struct qcm_process_device *qpd, 530 struct queue *q) 531 { 532 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 533 struct device *dev = dqm->dev->adev->dev; 534 int allocated_vmid = -1, i; 535 536 for (i = dqm->dev->vm_info.first_vmid_kfd; 537 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 538 if (!dqm->vmid_pasid[i]) { 539 allocated_vmid = i; 540 break; 541 } 542 } 543 544 if (allocated_vmid < 0) { 545 dev_err(dev, "no more vmid to allocate\n"); 546 return -ENOSPC; 547 } 548 549 pr_debug("vmid allocated: %d\n", allocated_vmid); 550 551 dqm->vmid_pasid[allocated_vmid] = pdd->pasid; 552 553 set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid); 554 555 qpd->vmid = allocated_vmid; 556 q->properties.vmid = allocated_vmid; 557 558 program_sh_mem_settings(dqm, qpd); 559 560 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 561 program_trap_handler_settings(dqm, qpd); 562 563 /* qpd->page_table_base is set earlier when register_process() 564 * is called, i.e. when the first queue is created. 565 */ 566 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 567 qpd->vmid, 568 qpd->page_table_base); 569 /* invalidate the VM context after pasid and vmid mapping is set up */ 570 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 571 572 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 573 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 574 qpd->sh_hidden_private_base, qpd->vmid); 575 576 return 0; 577 } 578 579 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 580 struct qcm_process_device *qpd) 581 { 582 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 583 int ret; 584 585 if (!qpd->ib_kaddr) 586 return -ENOMEM; 587 588 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 589 if (ret) 590 return ret; 591 592 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 593 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 594 pmf->release_mem_size / sizeof(uint32_t)); 595 } 596 597 static void deallocate_vmid(struct device_queue_manager *dqm, 598 struct qcm_process_device *qpd, 599 struct queue *q) 600 { 601 struct device *dev = dqm->dev->adev->dev; 602 603 /* On GFX v7, CP doesn't flush TC at dequeue */ 604 if (q->device->adev->asic_type == CHIP_HAWAII) 605 if (flush_texture_cache_nocpsch(q->device, qpd)) 606 dev_err(dev, "Failed to flush TC\n"); 607 608 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 609 610 /* Release the vmid mapping */ 611 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 612 dqm->vmid_pasid[qpd->vmid] = 0; 613 614 qpd->vmid = 0; 615 q->properties.vmid = 0; 616 } 617 618 static int create_queue_nocpsch(struct device_queue_manager *dqm, 619 struct queue *q, 620 struct qcm_process_device *qpd, 621 const struct kfd_criu_queue_priv_data *qd, 622 const void *restore_mqd, const void *restore_ctl_stack) 623 { 624 struct mqd_manager *mqd_mgr; 625 int retval; 626 627 dqm_lock(dqm); 628 629 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 630 pr_warn("Can't create new usermode queue because %d queues were already created\n", 631 dqm->total_queue_count); 632 retval = -EPERM; 633 goto out_unlock; 634 } 635 636 if (list_empty(&qpd->queues_list)) { 637 retval = allocate_vmid(dqm, qpd, q); 638 if (retval) 639 goto out_unlock; 640 } 641 q->properties.vmid = qpd->vmid; 642 /* 643 * Eviction state logic: mark all queues as evicted, even ones 644 * not currently active. Restoring inactive queues later only 645 * updates the is_evicted flag but is a no-op otherwise. 646 */ 647 q->properties.is_evicted = !!qpd->evicted; 648 649 q->properties.tba_addr = qpd->tba_addr; 650 q->properties.tma_addr = qpd->tma_addr; 651 652 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 653 q->properties.type)]; 654 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 655 retval = allocate_hqd(dqm, q); 656 if (retval) 657 goto deallocate_vmid; 658 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 659 q->pipe, q->queue); 660 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 661 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 662 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 663 if (retval) 664 goto deallocate_vmid; 665 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 666 } 667 668 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 669 if (retval) 670 goto out_deallocate_hqd; 671 672 /* Temporarily release dqm lock to avoid a circular lock dependency */ 673 dqm_unlock(dqm); 674 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 675 dqm_lock(dqm); 676 677 if (!q->mqd_mem_obj) { 678 retval = -ENOMEM; 679 goto out_deallocate_doorbell; 680 } 681 682 if (qd) 683 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 684 &q->properties, restore_mqd, restore_ctl_stack, 685 qd->ctl_stack_size); 686 else 687 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 688 &q->gart_mqd_addr, &q->properties); 689 690 if (q->properties.is_active) { 691 if (!dqm->sched_running) { 692 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 693 goto add_queue_to_list; 694 } 695 696 if (WARN(q->process->mm != current->mm, 697 "should only run in user thread")) 698 retval = -EFAULT; 699 else 700 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 701 q->queue, &q->properties, current->mm); 702 if (retval) 703 goto out_free_mqd; 704 } 705 706 add_queue_to_list: 707 list_add(&q->list, &qpd->queues_list); 708 qpd->queue_count++; 709 if (q->properties.is_active) 710 increment_queue_count(dqm, qpd, q); 711 712 /* 713 * Unconditionally increment this counter, regardless of the queue's 714 * type or whether the queue is active. 715 */ 716 dqm->total_queue_count++; 717 pr_debug("Total of %d queues are accountable so far\n", 718 dqm->total_queue_count); 719 goto out_unlock; 720 721 out_free_mqd: 722 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 723 out_deallocate_doorbell: 724 deallocate_doorbell(qpd, q); 725 out_deallocate_hqd: 726 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 727 deallocate_hqd(dqm, q); 728 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 729 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 730 deallocate_sdma_queue(dqm, q); 731 deallocate_vmid: 732 if (list_empty(&qpd->queues_list)) 733 deallocate_vmid(dqm, qpd, q); 734 out_unlock: 735 dqm_unlock(dqm); 736 return retval; 737 } 738 739 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 740 { 741 bool set; 742 int pipe, bit, i; 743 744 set = false; 745 746 for (pipe = dqm->next_pipe_to_allocate, i = 0; 747 i < get_pipes_per_mec(dqm); 748 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 749 750 if (!is_pipe_enabled(dqm, 0, pipe)) 751 continue; 752 753 if (dqm->allocated_queues[pipe] != 0) { 754 bit = ffs(dqm->allocated_queues[pipe]) - 1; 755 dqm->allocated_queues[pipe] &= ~(1 << bit); 756 q->pipe = pipe; 757 q->queue = bit; 758 set = true; 759 break; 760 } 761 } 762 763 if (!set) 764 return -EBUSY; 765 766 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 767 /* horizontal hqd allocation */ 768 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 769 770 return 0; 771 } 772 773 static inline void deallocate_hqd(struct device_queue_manager *dqm, 774 struct queue *q) 775 { 776 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 777 } 778 779 #define SQ_IND_CMD_CMD_KILL 0x00000003 780 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 781 782 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 783 { 784 int status = 0; 785 unsigned int vmid; 786 uint16_t queried_pasid; 787 union SQ_CMD_BITS reg_sq_cmd; 788 union GRBM_GFX_INDEX_BITS reg_gfx_index; 789 struct kfd_process_device *pdd; 790 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 791 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 792 uint32_t xcc_mask = dev->xcc_mask; 793 int xcc_id; 794 795 reg_sq_cmd.u32All = 0; 796 reg_gfx_index.u32All = 0; 797 798 pr_debug("Killing all process wavefronts\n"); 799 800 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 801 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 802 return -EOPNOTSUPP; 803 } 804 805 /* taking the VMID for that process on the safe way using PDD */ 806 pdd = kfd_get_process_device_data(dev, p); 807 if (!pdd) 808 return -EFAULT; 809 810 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 811 * ATC_VMID15_PASID_MAPPING 812 * to check which VMID the current process is mapped to. 813 */ 814 815 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 816 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 817 (dev->adev, vmid, &queried_pasid); 818 819 if (status && queried_pasid == pdd->pasid) { 820 pr_debug("Killing wave fronts of vmid %d and process pid %d\n", 821 vmid, p->lead_thread->pid); 822 break; 823 } 824 } 825 826 if (vmid > last_vmid_to_scan) { 827 dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n", 828 p->lead_thread->pid); 829 return -EFAULT; 830 } 831 832 reg_gfx_index.bits.sh_broadcast_writes = 1; 833 reg_gfx_index.bits.se_broadcast_writes = 1; 834 reg_gfx_index.bits.instance_broadcast_writes = 1; 835 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 836 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 837 reg_sq_cmd.bits.vm_id = vmid; 838 839 for_each_inst(xcc_id, xcc_mask) 840 dev->kfd2kgd->wave_control_execute( 841 dev->adev, reg_gfx_index.u32All, 842 reg_sq_cmd.u32All, xcc_id); 843 844 return 0; 845 } 846 847 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 848 * to avoid asynchronized access 849 */ 850 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 851 struct qcm_process_device *qpd, 852 struct queue *q) 853 { 854 int retval; 855 struct mqd_manager *mqd_mgr; 856 857 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 858 q->properties.type)]; 859 860 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 861 deallocate_hqd(dqm, q); 862 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 863 deallocate_sdma_queue(dqm, q); 864 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 865 deallocate_sdma_queue(dqm, q); 866 else { 867 pr_debug("q->properties.type %d is invalid\n", 868 q->properties.type); 869 return -EINVAL; 870 } 871 dqm->total_queue_count--; 872 873 deallocate_doorbell(qpd, q); 874 875 if (!dqm->sched_running) { 876 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 877 return 0; 878 } 879 880 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 881 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 882 KFD_UNMAP_LATENCY_MS, 883 q->pipe, q->queue); 884 if (retval == -ETIME) 885 qpd->reset_wavefronts = true; 886 887 list_del(&q->list); 888 if (list_empty(&qpd->queues_list)) { 889 if (qpd->reset_wavefronts) { 890 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 891 dqm->dev); 892 /* dbgdev_wave_reset_wavefronts has to be called before 893 * deallocate_vmid(), i.e. when vmid is still in use. 894 */ 895 dbgdev_wave_reset_wavefronts(dqm->dev, 896 qpd->pqm->process); 897 qpd->reset_wavefronts = false; 898 } 899 900 deallocate_vmid(dqm, qpd, q); 901 } 902 qpd->queue_count--; 903 if (q->properties.is_active) 904 decrement_queue_count(dqm, qpd, q); 905 906 return retval; 907 } 908 909 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 910 struct qcm_process_device *qpd, 911 struct queue *q) 912 { 913 int retval; 914 uint64_t sdma_val = 0; 915 struct device *dev = dqm->dev->adev->dev; 916 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 917 struct mqd_manager *mqd_mgr = 918 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 919 920 /* Get the SDMA queue stats */ 921 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 922 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 923 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 924 &sdma_val); 925 if (retval) 926 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 927 q->properties.queue_id); 928 } 929 930 dqm_lock(dqm); 931 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 932 if (!retval) 933 pdd->sdma_past_activity_counter += sdma_val; 934 dqm_unlock(dqm); 935 936 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 937 938 return retval; 939 } 940 941 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 942 struct mqd_update_info *minfo) 943 { 944 int retval = 0; 945 struct device *dev = dqm->dev->adev->dev; 946 struct mqd_manager *mqd_mgr; 947 struct kfd_process_device *pdd; 948 bool prev_active = false; 949 950 dqm_lock(dqm); 951 pdd = kfd_get_process_device_data(q->device, q->process); 952 if (!pdd) { 953 retval = -ENODEV; 954 goto out_unlock; 955 } 956 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 957 q->properties.type)]; 958 959 /* Save previous activity state for counters */ 960 prev_active = q->properties.is_active; 961 962 /* Make sure the queue is unmapped before updating the MQD */ 963 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 964 if (!dqm->dev->kfd->shared_resources.enable_mes) 965 retval = unmap_queues_cpsch(dqm, 966 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 967 else if (prev_active) 968 retval = remove_queue_mes(dqm, q, &pdd->qpd); 969 970 /* queue is reset so inaccessable */ 971 if (pdd->has_reset_queue) { 972 retval = -EACCES; 973 goto out_unlock; 974 } 975 976 if (retval) { 977 dev_err(dev, "unmap queue failed\n"); 978 goto out_unlock; 979 } 980 } else if (prev_active && 981 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 982 q->properties.type == KFD_QUEUE_TYPE_SDMA || 983 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 984 985 if (!dqm->sched_running) { 986 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 987 goto out_unlock; 988 } 989 990 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 991 (dqm->dev->kfd->cwsr_enabled ? 992 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 993 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 994 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 995 if (retval) { 996 dev_err(dev, "destroy mqd failed\n"); 997 goto out_unlock; 998 } 999 } 1000 1001 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 1002 1003 /* 1004 * check active state vs. the previous state and modify 1005 * counter accordingly. map_queues_cpsch uses the 1006 * dqm->active_queue_count to determine whether a new runlist must be 1007 * uploaded. 1008 */ 1009 if (q->properties.is_active && !prev_active) { 1010 increment_queue_count(dqm, &pdd->qpd, q); 1011 } else if (!q->properties.is_active && prev_active) { 1012 decrement_queue_count(dqm, &pdd->qpd, q); 1013 } else if (q->gws && !q->properties.is_gws) { 1014 if (q->properties.is_active) { 1015 dqm->gws_queue_count++; 1016 pdd->qpd.mapped_gws_queue = true; 1017 } 1018 q->properties.is_gws = true; 1019 } else if (!q->gws && q->properties.is_gws) { 1020 if (q->properties.is_active) { 1021 dqm->gws_queue_count--; 1022 pdd->qpd.mapped_gws_queue = false; 1023 } 1024 q->properties.is_gws = false; 1025 } 1026 1027 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 1028 if (!dqm->dev->kfd->shared_resources.enable_mes) 1029 retval = map_queues_cpsch(dqm); 1030 else if (q->properties.is_active) 1031 retval = add_queue_mes(dqm, q, &pdd->qpd); 1032 } else if (q->properties.is_active && 1033 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 1034 q->properties.type == KFD_QUEUE_TYPE_SDMA || 1035 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1036 if (WARN(q->process->mm != current->mm, 1037 "should only run in user thread")) 1038 retval = -EFAULT; 1039 else 1040 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 1041 q->pipe, q->queue, 1042 &q->properties, current->mm); 1043 } 1044 1045 out_unlock: 1046 dqm_unlock(dqm); 1047 return retval; 1048 } 1049 1050 /* suspend_single_queue does not lock the dqm like the 1051 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 1052 * lock the dqm before calling, and unlock after calling. 1053 * 1054 * The reason we don't lock the dqm is because this function may be 1055 * called on multiple queues in a loop, so rather than locking/unlocking 1056 * multiple times, we will just keep the dqm locked for all of the calls. 1057 */ 1058 static int suspend_single_queue(struct device_queue_manager *dqm, 1059 struct kfd_process_device *pdd, 1060 struct queue *q) 1061 { 1062 bool is_new; 1063 1064 if (q->properties.is_suspended) 1065 return 0; 1066 1067 pr_debug("Suspending process pid %d queue [%i]\n", 1068 pdd->process->lead_thread->pid, 1069 q->properties.queue_id); 1070 1071 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 1072 1073 if (is_new || q->properties.is_being_destroyed) { 1074 pr_debug("Suspend: skip %s queue id %i\n", 1075 is_new ? "new" : "destroyed", 1076 q->properties.queue_id); 1077 return -EBUSY; 1078 } 1079 1080 q->properties.is_suspended = true; 1081 if (q->properties.is_active) { 1082 if (dqm->dev->kfd->shared_resources.enable_mes) { 1083 int r = remove_queue_mes(dqm, q, &pdd->qpd); 1084 1085 if (r) 1086 return r; 1087 } 1088 1089 decrement_queue_count(dqm, &pdd->qpd, q); 1090 q->properties.is_active = false; 1091 } 1092 1093 return 0; 1094 } 1095 1096 /* resume_single_queue does not lock the dqm like the functions 1097 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1098 * lock the dqm before calling, and unlock after calling. 1099 * 1100 * The reason we don't lock the dqm is because this function may be 1101 * called on multiple queues in a loop, so rather than locking/unlocking 1102 * multiple times, we will just keep the dqm locked for all of the calls. 1103 */ 1104 static int resume_single_queue(struct device_queue_manager *dqm, 1105 struct qcm_process_device *qpd, 1106 struct queue *q) 1107 { 1108 struct kfd_process_device *pdd; 1109 1110 if (!q->properties.is_suspended) 1111 return 0; 1112 1113 pdd = qpd_to_pdd(qpd); 1114 1115 pr_debug("Restoring from suspend process pid %d queue [%i]\n", 1116 pdd->process->lead_thread->pid, 1117 q->properties.queue_id); 1118 1119 q->properties.is_suspended = false; 1120 1121 if (QUEUE_IS_ACTIVE(q->properties)) { 1122 if (dqm->dev->kfd->shared_resources.enable_mes) { 1123 int r = add_queue_mes(dqm, q, &pdd->qpd); 1124 1125 if (r) 1126 return r; 1127 } 1128 1129 q->properties.is_active = true; 1130 increment_queue_count(dqm, qpd, q); 1131 } 1132 1133 return 0; 1134 } 1135 1136 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1137 struct qcm_process_device *qpd) 1138 { 1139 struct queue *q; 1140 struct mqd_manager *mqd_mgr; 1141 struct kfd_process_device *pdd; 1142 int retval, ret = 0; 1143 1144 dqm_lock(dqm); 1145 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1146 goto out; 1147 1148 pdd = qpd_to_pdd(qpd); 1149 pr_debug_ratelimited("Evicting process pid %d queues\n", 1150 pdd->process->lead_thread->pid); 1151 1152 pdd->last_evict_timestamp = get_jiffies_64(); 1153 /* Mark all queues as evicted. Deactivate all active queues on 1154 * the qpd. 1155 */ 1156 list_for_each_entry(q, &qpd->queues_list, list) { 1157 q->properties.is_evicted = true; 1158 if (!q->properties.is_active) 1159 continue; 1160 1161 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1162 q->properties.type)]; 1163 q->properties.is_active = false; 1164 decrement_queue_count(dqm, qpd, q); 1165 1166 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1167 continue; 1168 1169 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1170 (dqm->dev->kfd->cwsr_enabled ? 1171 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1172 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1173 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1174 if (retval && !ret) 1175 /* Return the first error, but keep going to 1176 * maintain a consistent eviction state 1177 */ 1178 ret = retval; 1179 } 1180 1181 out: 1182 dqm_unlock(dqm); 1183 return ret; 1184 } 1185 1186 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1187 struct qcm_process_device *qpd) 1188 { 1189 struct queue *q; 1190 struct device *dev = dqm->dev->adev->dev; 1191 struct kfd_process_device *pdd; 1192 int retval = 0; 1193 1194 dqm_lock(dqm); 1195 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1196 goto out; 1197 1198 pdd = qpd_to_pdd(qpd); 1199 1200 /* The debugger creates processes that temporarily have not acquired 1201 * all VMs for all devices and has no VMs itself. 1202 * Skip queue eviction on process eviction. 1203 */ 1204 if (!pdd->drm_priv) 1205 goto out; 1206 1207 pr_debug_ratelimited("Evicting process pid %d queues\n", 1208 pdd->process->lead_thread->pid); 1209 1210 /* Mark all queues as evicted. Deactivate all active queues on 1211 * the qpd. 1212 */ 1213 list_for_each_entry(q, &qpd->queues_list, list) { 1214 q->properties.is_evicted = true; 1215 if (!q->properties.is_active) 1216 continue; 1217 1218 q->properties.is_active = false; 1219 decrement_queue_count(dqm, qpd, q); 1220 1221 if (dqm->dev->kfd->shared_resources.enable_mes) { 1222 retval = remove_queue_mes(dqm, q, qpd); 1223 if (retval) { 1224 dev_err(dev, "Failed to evict queue %d\n", 1225 q->properties.queue_id); 1226 goto out; 1227 } 1228 } 1229 } 1230 pdd->last_evict_timestamp = get_jiffies_64(); 1231 if (!dqm->dev->kfd->shared_resources.enable_mes) 1232 retval = execute_queues_cpsch(dqm, 1233 qpd->is_debug ? 1234 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1235 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1236 USE_DEFAULT_GRACE_PERIOD); 1237 1238 out: 1239 dqm_unlock(dqm); 1240 return retval; 1241 } 1242 1243 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1244 struct qcm_process_device *qpd) 1245 { 1246 struct mm_struct *mm = NULL; 1247 struct queue *q; 1248 struct mqd_manager *mqd_mgr; 1249 struct kfd_process_device *pdd; 1250 uint64_t pd_base; 1251 uint64_t eviction_duration; 1252 int retval, ret = 0; 1253 1254 pdd = qpd_to_pdd(qpd); 1255 /* Retrieve PD base */ 1256 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1257 1258 dqm_lock(dqm); 1259 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1260 goto out; 1261 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1262 qpd->evicted--; 1263 goto out; 1264 } 1265 1266 pr_debug_ratelimited("Restoring process pid %d queues\n", 1267 pdd->process->lead_thread->pid); 1268 1269 /* Update PD Base in QPD */ 1270 qpd->page_table_base = pd_base; 1271 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1272 1273 if (!list_empty(&qpd->queues_list)) { 1274 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1275 dqm->dev->adev, 1276 qpd->vmid, 1277 qpd->page_table_base); 1278 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1279 } 1280 1281 /* Take a safe reference to the mm_struct, which may otherwise 1282 * disappear even while the kfd_process is still referenced. 1283 */ 1284 mm = get_task_mm(pdd->process->lead_thread); 1285 if (!mm) { 1286 ret = -EFAULT; 1287 goto out; 1288 } 1289 1290 /* Remove the eviction flags. Activate queues that are not 1291 * inactive for other reasons. 1292 */ 1293 list_for_each_entry(q, &qpd->queues_list, list) { 1294 q->properties.is_evicted = false; 1295 if (!QUEUE_IS_ACTIVE(q->properties)) 1296 continue; 1297 1298 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1299 q->properties.type)]; 1300 q->properties.is_active = true; 1301 increment_queue_count(dqm, qpd, q); 1302 1303 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1304 continue; 1305 1306 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1307 q->queue, &q->properties, mm); 1308 if (retval && !ret) 1309 /* Return the first error, but keep going to 1310 * maintain a consistent eviction state 1311 */ 1312 ret = retval; 1313 } 1314 qpd->evicted = 0; 1315 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1316 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1317 out: 1318 if (mm) 1319 mmput(mm); 1320 dqm_unlock(dqm); 1321 return ret; 1322 } 1323 1324 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1325 struct qcm_process_device *qpd) 1326 { 1327 struct queue *q; 1328 struct device *dev = dqm->dev->adev->dev; 1329 struct kfd_process_device *pdd; 1330 uint64_t eviction_duration; 1331 int retval = 0; 1332 1333 pdd = qpd_to_pdd(qpd); 1334 1335 dqm_lock(dqm); 1336 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1337 goto out; 1338 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1339 qpd->evicted--; 1340 goto out; 1341 } 1342 1343 /* The debugger creates processes that temporarily have not acquired 1344 * all VMs for all devices and has no VMs itself. 1345 * Skip queue restore on process restore. 1346 */ 1347 if (!pdd->drm_priv) 1348 goto vm_not_acquired; 1349 1350 pr_debug_ratelimited("Restoring process pid %d queues\n", 1351 pdd->process->lead_thread->pid); 1352 1353 /* Update PD Base in QPD */ 1354 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1355 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1356 1357 /* activate all active queues on the qpd */ 1358 list_for_each_entry(q, &qpd->queues_list, list) { 1359 q->properties.is_evicted = false; 1360 if (!QUEUE_IS_ACTIVE(q->properties)) 1361 continue; 1362 1363 q->properties.is_active = true; 1364 increment_queue_count(dqm, &pdd->qpd, q); 1365 1366 if (dqm->dev->kfd->shared_resources.enable_mes) { 1367 retval = add_queue_mes(dqm, q, qpd); 1368 if (retval) { 1369 dev_err(dev, "Failed to restore queue %d\n", 1370 q->properties.queue_id); 1371 goto out; 1372 } 1373 } 1374 } 1375 if (!dqm->dev->kfd->shared_resources.enable_mes) 1376 retval = execute_queues_cpsch(dqm, 1377 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1378 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1379 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1380 vm_not_acquired: 1381 qpd->evicted = 0; 1382 out: 1383 dqm_unlock(dqm); 1384 return retval; 1385 } 1386 1387 static int register_process(struct device_queue_manager *dqm, 1388 struct qcm_process_device *qpd) 1389 { 1390 struct device_process_node *n; 1391 struct kfd_process_device *pdd; 1392 uint64_t pd_base; 1393 int retval; 1394 1395 n = kzalloc(sizeof(*n), GFP_KERNEL); 1396 if (!n) 1397 return -ENOMEM; 1398 1399 n->qpd = qpd; 1400 1401 pdd = qpd_to_pdd(qpd); 1402 /* Retrieve PD base */ 1403 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1404 1405 dqm_lock(dqm); 1406 list_add(&n->list, &dqm->queues); 1407 1408 /* Update PD Base in QPD */ 1409 qpd->page_table_base = pd_base; 1410 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1411 1412 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1413 1414 dqm->processes_count++; 1415 1416 dqm_unlock(dqm); 1417 1418 /* Outside the DQM lock because under the DQM lock we can't do 1419 * reclaim or take other locks that others hold while reclaiming. 1420 */ 1421 kfd_inc_compute_active(dqm->dev); 1422 1423 return retval; 1424 } 1425 1426 static int unregister_process(struct device_queue_manager *dqm, 1427 struct qcm_process_device *qpd) 1428 { 1429 int retval; 1430 struct device_process_node *cur, *next; 1431 1432 pr_debug("qpd->queues_list is %s\n", 1433 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1434 1435 retval = 0; 1436 dqm_lock(dqm); 1437 1438 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1439 if (qpd == cur->qpd) { 1440 list_del(&cur->list); 1441 kfree(cur); 1442 dqm->processes_count--; 1443 goto out; 1444 } 1445 } 1446 /* qpd not found in dqm list */ 1447 retval = 1; 1448 out: 1449 dqm_unlock(dqm); 1450 1451 /* Outside the DQM lock because under the DQM lock we can't do 1452 * reclaim or take other locks that others hold while reclaiming. 1453 */ 1454 if (!retval) 1455 kfd_dec_compute_active(dqm->dev); 1456 1457 return retval; 1458 } 1459 1460 static int 1461 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1462 unsigned int vmid) 1463 { 1464 uint32_t xcc_mask = dqm->dev->xcc_mask; 1465 int xcc_id, ret; 1466 1467 for_each_inst(xcc_id, xcc_mask) { 1468 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1469 dqm->dev->adev, pasid, vmid, xcc_id); 1470 if (ret) 1471 break; 1472 } 1473 1474 return ret; 1475 } 1476 1477 static void init_interrupts(struct device_queue_manager *dqm) 1478 { 1479 uint32_t xcc_mask = dqm->dev->xcc_mask; 1480 unsigned int i, xcc_id; 1481 1482 for_each_inst(xcc_id, xcc_mask) { 1483 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1484 if (is_pipe_enabled(dqm, 0, i)) { 1485 dqm->dev->kfd2kgd->init_interrupts( 1486 dqm->dev->adev, i, xcc_id); 1487 } 1488 } 1489 } 1490 } 1491 1492 static int initialize_nocpsch(struct device_queue_manager *dqm) 1493 { 1494 int pipe, queue; 1495 1496 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1497 1498 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1499 sizeof(unsigned int), GFP_KERNEL); 1500 if (!dqm->allocated_queues) 1501 return -ENOMEM; 1502 1503 mutex_init(&dqm->lock_hidden); 1504 INIT_LIST_HEAD(&dqm->queues); 1505 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1506 dqm->active_cp_queue_count = 0; 1507 dqm->gws_queue_count = 0; 1508 1509 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1510 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1511 1512 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1513 if (test_bit(pipe_offset + queue, 1514 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1515 dqm->allocated_queues[pipe] |= 1 << queue; 1516 } 1517 1518 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1519 1520 init_sdma_bitmaps(dqm); 1521 1522 return 0; 1523 } 1524 1525 static void uninitialize(struct device_queue_manager *dqm) 1526 { 1527 int i; 1528 1529 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1530 1531 kfree(dqm->allocated_queues); 1532 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1533 kfree(dqm->mqd_mgrs[i]); 1534 mutex_destroy(&dqm->lock_hidden); 1535 } 1536 1537 static int start_nocpsch(struct device_queue_manager *dqm) 1538 { 1539 int r = 0; 1540 1541 pr_info("SW scheduler is used"); 1542 init_interrupts(dqm); 1543 1544 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1545 r = pm_init(&dqm->packet_mgr, dqm); 1546 if (!r) 1547 dqm->sched_running = true; 1548 1549 return r; 1550 } 1551 1552 static int stop_nocpsch(struct device_queue_manager *dqm) 1553 { 1554 dqm_lock(dqm); 1555 if (!dqm->sched_running) { 1556 dqm_unlock(dqm); 1557 return 0; 1558 } 1559 1560 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1561 pm_uninit(&dqm->packet_mgr); 1562 dqm->sched_running = false; 1563 dqm_unlock(dqm); 1564 1565 return 0; 1566 } 1567 1568 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1569 struct queue *q, const uint32_t *restore_sdma_id) 1570 { 1571 struct device *dev = dqm->dev->adev->dev; 1572 int bit; 1573 1574 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1575 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1576 dev_err(dev, "No more SDMA queue to allocate\n"); 1577 return -ENOMEM; 1578 } 1579 1580 if (restore_sdma_id) { 1581 /* Re-use existing sdma_id */ 1582 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1583 dev_err(dev, "SDMA queue already in use\n"); 1584 return -EBUSY; 1585 } 1586 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1587 q->sdma_id = *restore_sdma_id; 1588 } else { 1589 /* Find first available sdma_id */ 1590 bit = find_first_bit(dqm->sdma_bitmap, 1591 get_num_sdma_queues(dqm)); 1592 clear_bit(bit, dqm->sdma_bitmap); 1593 q->sdma_id = bit; 1594 } 1595 1596 q->properties.sdma_engine_id = 1597 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1598 q->properties.sdma_queue_id = q->sdma_id / 1599 kfd_get_num_sdma_engines(dqm->dev); 1600 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1601 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1602 dev_err(dev, "No more XGMI SDMA queue to allocate\n"); 1603 return -ENOMEM; 1604 } 1605 if (restore_sdma_id) { 1606 /* Re-use existing sdma_id */ 1607 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1608 dev_err(dev, "SDMA queue already in use\n"); 1609 return -EBUSY; 1610 } 1611 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1612 q->sdma_id = *restore_sdma_id; 1613 } else { 1614 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1615 get_num_xgmi_sdma_queues(dqm)); 1616 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1617 q->sdma_id = bit; 1618 } 1619 /* sdma_engine_id is sdma id including 1620 * both PCIe-optimized SDMAs and XGMI- 1621 * optimized SDMAs. The calculation below 1622 * assumes the first N engines are always 1623 * PCIe-optimized ones 1624 */ 1625 q->properties.sdma_engine_id = 1626 kfd_get_num_sdma_engines(dqm->dev) + 1627 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1628 q->properties.sdma_queue_id = q->sdma_id / 1629 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1630 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1631 int i, num_queues, num_engines, eng_offset = 0, start_engine; 1632 bool free_bit_found = false, is_xgmi = false; 1633 1634 if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { 1635 num_queues = get_num_sdma_queues(dqm); 1636 num_engines = kfd_get_num_sdma_engines(dqm->dev); 1637 q->properties.type = KFD_QUEUE_TYPE_SDMA; 1638 } else { 1639 num_queues = get_num_xgmi_sdma_queues(dqm); 1640 num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); 1641 eng_offset = kfd_get_num_sdma_engines(dqm->dev); 1642 q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; 1643 is_xgmi = true; 1644 } 1645 1646 /* Scan available bit based on target engine ID. */ 1647 start_engine = q->properties.sdma_engine_id - eng_offset; 1648 for (i = start_engine; i < num_queues; i += num_engines) { 1649 1650 if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) 1651 continue; 1652 1653 clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); 1654 q->sdma_id = i; 1655 q->properties.sdma_queue_id = q->sdma_id / num_engines; 1656 free_bit_found = true; 1657 break; 1658 } 1659 1660 if (!free_bit_found) { 1661 dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", 1662 q->properties.sdma_engine_id); 1663 return -ENOMEM; 1664 } 1665 } 1666 1667 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1668 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1669 1670 return 0; 1671 } 1672 1673 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1674 struct queue *q) 1675 { 1676 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1677 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1678 return; 1679 set_bit(q->sdma_id, dqm->sdma_bitmap); 1680 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1681 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1682 return; 1683 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1684 } 1685 } 1686 1687 /* 1688 * Device Queue Manager implementation for cp scheduler 1689 */ 1690 1691 static int set_sched_resources(struct device_queue_manager *dqm) 1692 { 1693 int i, mec; 1694 struct scheduling_resources res; 1695 struct device *dev = dqm->dev->adev->dev; 1696 1697 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1698 1699 res.queue_mask = 0; 1700 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1701 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1702 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1703 1704 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1705 continue; 1706 1707 /* only acquire queues from the first MEC */ 1708 if (mec > 0) 1709 continue; 1710 1711 /* This situation may be hit in the future if a new HW 1712 * generation exposes more than 64 queues. If so, the 1713 * definition of res.queue_mask needs updating 1714 */ 1715 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1716 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1717 break; 1718 } 1719 1720 res.queue_mask |= 1ull 1721 << amdgpu_queue_mask_bit_to_set_resource_bit( 1722 dqm->dev->adev, i); 1723 } 1724 res.gws_mask = ~0ull; 1725 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1726 1727 pr_debug("Scheduling resources:\n" 1728 "vmid mask: 0x%8X\n" 1729 "queue mask: 0x%8llX\n", 1730 res.vmid_mask, res.queue_mask); 1731 1732 return pm_send_set_resources(&dqm->packet_mgr, &res); 1733 } 1734 1735 static int initialize_cpsch(struct device_queue_manager *dqm) 1736 { 1737 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1738 1739 mutex_init(&dqm->lock_hidden); 1740 INIT_LIST_HEAD(&dqm->queues); 1741 dqm->active_queue_count = dqm->processes_count = 0; 1742 dqm->active_cp_queue_count = 0; 1743 dqm->gws_queue_count = 0; 1744 dqm->active_runlist = false; 1745 dqm->trap_debug_vmid = 0; 1746 1747 init_sdma_bitmaps(dqm); 1748 1749 if (dqm->dev->kfd2kgd->get_iq_wait_times) 1750 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1751 &dqm->wait_times, 1752 ffs(dqm->dev->xcc_mask) - 1); 1753 return 0; 1754 } 1755 1756 /* halt_cpsch: 1757 * Unmap queues so the schedule doesn't continue remaining jobs in the queue. 1758 * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch 1759 * is called. 1760 */ 1761 static int halt_cpsch(struct device_queue_manager *dqm) 1762 { 1763 int ret = 0; 1764 1765 dqm_lock(dqm); 1766 if (!dqm->sched_running) { 1767 dqm_unlock(dqm); 1768 return 0; 1769 } 1770 1771 WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); 1772 1773 if (!dqm->is_hws_hang) { 1774 if (!dqm->dev->kfd->shared_resources.enable_mes) 1775 ret = unmap_queues_cpsch(dqm, 1776 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1777 USE_DEFAULT_GRACE_PERIOD, false); 1778 else 1779 ret = remove_all_kfd_queues_mes(dqm); 1780 } 1781 dqm->sched_halt = true; 1782 dqm_unlock(dqm); 1783 1784 return ret; 1785 } 1786 1787 /* unhalt_cpsch 1788 * Unset dqm->sched_halt and map queues back to runlist 1789 */ 1790 static int unhalt_cpsch(struct device_queue_manager *dqm) 1791 { 1792 int ret = 0; 1793 1794 dqm_lock(dqm); 1795 if (!dqm->sched_running || !dqm->sched_halt) { 1796 WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); 1797 dqm_unlock(dqm); 1798 return 0; 1799 } 1800 dqm->sched_halt = false; 1801 if (!dqm->dev->kfd->shared_resources.enable_mes) 1802 ret = execute_queues_cpsch(dqm, 1803 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 1804 0, USE_DEFAULT_GRACE_PERIOD); 1805 else 1806 ret = add_all_kfd_queues_mes(dqm); 1807 1808 dqm_unlock(dqm); 1809 1810 return ret; 1811 } 1812 1813 static int start_cpsch(struct device_queue_manager *dqm) 1814 { 1815 struct device *dev = dqm->dev->adev->dev; 1816 int retval, num_hw_queue_slots; 1817 1818 retval = 0; 1819 1820 dqm_lock(dqm); 1821 1822 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1823 retval = pm_init(&dqm->packet_mgr, dqm); 1824 if (retval) 1825 goto fail_packet_manager_init; 1826 1827 retval = set_sched_resources(dqm); 1828 if (retval) 1829 goto fail_set_sched_resources; 1830 } 1831 pr_debug("Allocating fence memory\n"); 1832 1833 /* allocate fence memory on the gart */ 1834 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1835 &dqm->fence_mem); 1836 1837 if (retval) 1838 goto fail_allocate_vidmem; 1839 1840 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1841 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1842 1843 init_interrupts(dqm); 1844 1845 /* clear hang status when driver try to start the hw scheduler */ 1846 dqm->sched_running = true; 1847 1848 if (!dqm->dev->kfd->shared_resources.enable_mes) 1849 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1850 1851 /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 1852 if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && 1853 (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { 1854 uint32_t reg_offset = 0; 1855 uint32_t grace_period = 1; 1856 1857 retval = pm_update_grace_period(&dqm->packet_mgr, 1858 grace_period); 1859 if (retval) 1860 dev_err(dev, "Setting grace timeout failed\n"); 1861 else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) 1862 /* Update dqm->wait_times maintained in software */ 1863 dqm->dev->kfd2kgd->build_grace_period_packet_info( 1864 dqm->dev->adev, dqm->wait_times, 1865 grace_period, ®_offset, 1866 &dqm->wait_times); 1867 } 1868 1869 /* setup per-queue reset detection buffer */ 1870 num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * 1871 dqm->dev->kfd->shared_resources.num_pipe_per_mec * 1872 NUM_XCC(dqm->dev->xcc_mask); 1873 1874 dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); 1875 dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); 1876 1877 if (!dqm->detect_hang_info) { 1878 retval = -ENOMEM; 1879 goto fail_detect_hang_buffer; 1880 } 1881 1882 dqm_unlock(dqm); 1883 1884 return 0; 1885 fail_detect_hang_buffer: 1886 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1887 fail_allocate_vidmem: 1888 fail_set_sched_resources: 1889 if (!dqm->dev->kfd->shared_resources.enable_mes) 1890 pm_uninit(&dqm->packet_mgr); 1891 fail_packet_manager_init: 1892 dqm_unlock(dqm); 1893 return retval; 1894 } 1895 1896 static int stop_cpsch(struct device_queue_manager *dqm) 1897 { 1898 dqm_lock(dqm); 1899 if (!dqm->sched_running) { 1900 dqm_unlock(dqm); 1901 return 0; 1902 } 1903 1904 if (!dqm->dev->kfd->shared_resources.enable_mes) 1905 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 1906 else 1907 remove_all_kfd_queues_mes(dqm); 1908 1909 dqm->sched_running = false; 1910 1911 if (!dqm->dev->kfd->shared_resources.enable_mes) 1912 pm_release_ib(&dqm->packet_mgr); 1913 1914 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1915 if (!dqm->dev->kfd->shared_resources.enable_mes) 1916 pm_uninit(&dqm->packet_mgr); 1917 kfree(dqm->detect_hang_info); 1918 dqm->detect_hang_info = NULL; 1919 dqm_unlock(dqm); 1920 1921 return 0; 1922 } 1923 1924 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1925 struct kernel_queue *kq, 1926 struct qcm_process_device *qpd) 1927 { 1928 dqm_lock(dqm); 1929 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1930 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1931 dqm->total_queue_count); 1932 dqm_unlock(dqm); 1933 return -EPERM; 1934 } 1935 1936 /* 1937 * Unconditionally increment this counter, regardless of the queue's 1938 * type or whether the queue is active. 1939 */ 1940 dqm->total_queue_count++; 1941 pr_debug("Total of %d queues are accountable so far\n", 1942 dqm->total_queue_count); 1943 1944 list_add(&kq->list, &qpd->priv_queue_list); 1945 increment_queue_count(dqm, qpd, kq->queue); 1946 qpd->is_debug = true; 1947 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1948 USE_DEFAULT_GRACE_PERIOD); 1949 dqm_unlock(dqm); 1950 1951 return 0; 1952 } 1953 1954 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1955 struct kernel_queue *kq, 1956 struct qcm_process_device *qpd) 1957 { 1958 dqm_lock(dqm); 1959 list_del(&kq->list); 1960 decrement_queue_count(dqm, qpd, kq->queue); 1961 qpd->is_debug = false; 1962 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1963 USE_DEFAULT_GRACE_PERIOD); 1964 /* 1965 * Unconditionally decrement this counter, regardless of the queue's 1966 * type. 1967 */ 1968 dqm->total_queue_count--; 1969 pr_debug("Total of %d queues are accountable so far\n", 1970 dqm->total_queue_count); 1971 dqm_unlock(dqm); 1972 } 1973 1974 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1975 struct qcm_process_device *qpd, 1976 const struct kfd_criu_queue_priv_data *qd, 1977 const void *restore_mqd, const void *restore_ctl_stack) 1978 { 1979 int retval; 1980 struct mqd_manager *mqd_mgr; 1981 1982 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1983 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1984 dqm->total_queue_count); 1985 retval = -EPERM; 1986 goto out; 1987 } 1988 1989 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1990 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || 1991 q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1992 dqm_lock(dqm); 1993 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1994 dqm_unlock(dqm); 1995 if (retval) 1996 goto out; 1997 } 1998 1999 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 2000 if (retval) 2001 goto out_deallocate_sdma_queue; 2002 2003 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2004 q->properties.type)]; 2005 2006 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2007 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2008 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 2009 q->properties.tba_addr = qpd->tba_addr; 2010 q->properties.tma_addr = qpd->tma_addr; 2011 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 2012 if (!q->mqd_mem_obj) { 2013 retval = -ENOMEM; 2014 goto out_deallocate_doorbell; 2015 } 2016 2017 dqm_lock(dqm); 2018 /* 2019 * Eviction state logic: mark all queues as evicted, even ones 2020 * not currently active. Restoring inactive queues later only 2021 * updates the is_evicted flag but is a no-op otherwise. 2022 */ 2023 q->properties.is_evicted = !!qpd->evicted; 2024 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 2025 kfd_dbg_has_cwsr_workaround(q->device); 2026 2027 if (qd) 2028 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 2029 &q->properties, restore_mqd, restore_ctl_stack, 2030 qd->ctl_stack_size); 2031 else 2032 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 2033 &q->gart_mqd_addr, &q->properties); 2034 2035 list_add(&q->list, &qpd->queues_list); 2036 qpd->queue_count++; 2037 2038 if (q->properties.is_active) { 2039 increment_queue_count(dqm, qpd, q); 2040 2041 if (!dqm->dev->kfd->shared_resources.enable_mes) 2042 retval = execute_queues_cpsch(dqm, 2043 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 2044 else 2045 retval = add_queue_mes(dqm, q, qpd); 2046 if (retval) 2047 goto cleanup_queue; 2048 } 2049 2050 /* 2051 * Unconditionally increment this counter, regardless of the queue's 2052 * type or whether the queue is active. 2053 */ 2054 dqm->total_queue_count++; 2055 2056 pr_debug("Total of %d queues are accountable so far\n", 2057 dqm->total_queue_count); 2058 2059 dqm_unlock(dqm); 2060 return retval; 2061 2062 cleanup_queue: 2063 qpd->queue_count--; 2064 list_del(&q->list); 2065 if (q->properties.is_active) 2066 decrement_queue_count(dqm, qpd, q); 2067 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2068 dqm_unlock(dqm); 2069 out_deallocate_doorbell: 2070 deallocate_doorbell(qpd, q); 2071 out_deallocate_sdma_queue: 2072 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2073 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 2074 dqm_lock(dqm); 2075 deallocate_sdma_queue(dqm, q); 2076 dqm_unlock(dqm); 2077 } 2078 out: 2079 return retval; 2080 } 2081 2082 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 2083 uint64_t fence_value, 2084 unsigned int timeout_ms) 2085 { 2086 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 2087 struct device *dev = dqm->dev->adev->dev; 2088 uint64_t *fence_addr = dqm->fence_addr; 2089 2090 while (*fence_addr != fence_value) { 2091 /* Fatal err detected, this response won't come */ 2092 if (amdgpu_amdkfd_is_fed(dqm->dev->adev)) 2093 return -EIO; 2094 2095 if (time_after(jiffies, end_jiffies)) { 2096 dev_err(dev, "qcm fence wait loop timeout expired\n"); 2097 /* In HWS case, this is used to halt the driver thread 2098 * in order not to mess up CP states before doing 2099 * scandumps for FW debugging. 2100 */ 2101 while (halt_if_hws_hang) 2102 schedule(); 2103 2104 return -ETIME; 2105 } 2106 schedule(); 2107 } 2108 2109 return 0; 2110 } 2111 2112 /* dqm->lock mutex has to be locked before calling this function */ 2113 static int map_queues_cpsch(struct device_queue_manager *dqm) 2114 { 2115 struct device *dev = dqm->dev->adev->dev; 2116 int retval; 2117 2118 if (!dqm->sched_running || dqm->sched_halt) 2119 return 0; 2120 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 2121 return 0; 2122 if (dqm->active_runlist) 2123 return 0; 2124 2125 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 2126 pr_debug("%s sent runlist\n", __func__); 2127 if (retval) { 2128 dev_err(dev, "failed to execute runlist\n"); 2129 return retval; 2130 } 2131 dqm->active_runlist = true; 2132 2133 return retval; 2134 } 2135 2136 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 2137 struct qcm_process_device *qpd) 2138 { 2139 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2140 2141 dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n", 2142 q->properties.queue_id, pdd->process->lead_thread->pid); 2143 2144 pdd->has_reset_queue = true; 2145 if (q->properties.is_active) { 2146 q->properties.is_active = false; 2147 decrement_queue_count(dqm, qpd, q); 2148 } 2149 } 2150 2151 static int detect_queue_hang(struct device_queue_manager *dqm) 2152 { 2153 int i; 2154 2155 /* detect should be used only in dqm locked queue reset */ 2156 if (WARN_ON(dqm->detect_hang_count > 0)) 2157 return 0; 2158 2159 memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); 2160 2161 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 2162 uint32_t mec, pipe, queue; 2163 int xcc_id; 2164 2165 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 2166 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 2167 2168 if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2169 continue; 2170 2171 amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); 2172 2173 for_each_inst(xcc_id, dqm->dev->xcc_mask) { 2174 uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( 2175 dqm->dev->adev, pipe, queue, xcc_id); 2176 struct dqm_detect_hang_info hang_info; 2177 2178 if (!queue_addr) 2179 continue; 2180 2181 hang_info.pipe_id = pipe; 2182 hang_info.queue_id = queue; 2183 hang_info.xcc_id = xcc_id; 2184 hang_info.queue_address = queue_addr; 2185 2186 dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; 2187 dqm->detect_hang_count++; 2188 } 2189 } 2190 2191 return dqm->detect_hang_count; 2192 } 2193 2194 static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) 2195 { 2196 struct device_process_node *cur; 2197 struct qcm_process_device *qpd; 2198 struct queue *q; 2199 2200 list_for_each_entry(cur, &dqm->queues, list) { 2201 qpd = cur->qpd; 2202 list_for_each_entry(q, &qpd->queues_list, list) { 2203 if (queue_address == q->properties.queue_address) 2204 return q; 2205 } 2206 } 2207 2208 return NULL; 2209 } 2210 2211 static int reset_hung_queues(struct device_queue_manager *dqm) 2212 { 2213 int r = 0, reset_count = 0, i; 2214 2215 if (!dqm->detect_hang_info || dqm->is_hws_hang) 2216 return -EIO; 2217 2218 /* assume dqm locked. */ 2219 if (!detect_queue_hang(dqm)) 2220 return -ENOTRECOVERABLE; 2221 2222 for (i = 0; i < dqm->detect_hang_count; i++) { 2223 struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; 2224 struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); 2225 struct kfd_process_device *pdd; 2226 uint64_t queue_addr = 0; 2227 2228 if (!q) { 2229 r = -ENOTRECOVERABLE; 2230 goto reset_fail; 2231 } 2232 2233 pdd = kfd_get_process_device_data(dqm->dev, q->process); 2234 if (!pdd) { 2235 r = -ENOTRECOVERABLE; 2236 goto reset_fail; 2237 } 2238 2239 queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, 2240 hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, 2241 KFD_UNMAP_LATENCY_MS); 2242 2243 /* either reset failed or we reset an unexpected queue. */ 2244 if (queue_addr != q->properties.queue_address) { 2245 r = -ENOTRECOVERABLE; 2246 goto reset_fail; 2247 } 2248 2249 set_queue_as_reset(dqm, q, &pdd->qpd); 2250 reset_count++; 2251 } 2252 2253 if (reset_count == dqm->detect_hang_count) 2254 kfd_signal_reset_event(dqm->dev); 2255 else 2256 r = -ENOTRECOVERABLE; 2257 2258 reset_fail: 2259 dqm->detect_hang_count = 0; 2260 2261 return r; 2262 } 2263 2264 static bool sdma_has_hang(struct device_queue_manager *dqm) 2265 { 2266 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2267 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2268 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2269 int i, j; 2270 2271 for (i = engine_start; i < engine_end; i++) { 2272 for (j = 0; j < num_queues_per_eng; j++) { 2273 if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j)) 2274 continue; 2275 2276 return true; 2277 } 2278 } 2279 2280 return false; 2281 } 2282 2283 static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm, 2284 uint32_t doorbell_off) 2285 { 2286 struct device_process_node *cur; 2287 struct qcm_process_device *qpd; 2288 struct queue *q; 2289 2290 list_for_each_entry(cur, &dqm->queues, list) { 2291 qpd = cur->qpd; 2292 list_for_each_entry(q, &qpd->queues_list, list) { 2293 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA || 2294 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) && 2295 q->properties.doorbell_off == doorbell_off) { 2296 set_queue_as_reset(dqm, q, qpd); 2297 return true; 2298 } 2299 } 2300 } 2301 2302 return false; 2303 } 2304 2305 static int reset_hung_queues_sdma(struct device_queue_manager *dqm) 2306 { 2307 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2308 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2309 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2310 int r = 0, i, j; 2311 2312 if (dqm->is_hws_hang) 2313 return -EIO; 2314 2315 /* Scan for hung HW queues and reset engine. */ 2316 dqm->detect_hang_count = 0; 2317 for (i = engine_start; i < engine_end; i++) { 2318 for (j = 0; j < num_queues_per_eng; j++) { 2319 uint32_t doorbell_off = 2320 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j); 2321 2322 if (!doorbell_off) 2323 continue; 2324 2325 /* Reset engine and check. */ 2326 if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || 2327 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || 2328 !set_sdma_queue_as_reset(dqm, doorbell_off)) { 2329 r = -ENOTRECOVERABLE; 2330 goto reset_fail; 2331 } 2332 2333 /* Should only expect one queue active per engine */ 2334 dqm->detect_hang_count++; 2335 break; 2336 } 2337 } 2338 2339 /* Signal process reset */ 2340 if (dqm->detect_hang_count) 2341 kfd_signal_reset_event(dqm->dev); 2342 else 2343 r = -ENOTRECOVERABLE; 2344 2345 reset_fail: 2346 dqm->detect_hang_count = 0; 2347 2348 return r; 2349 } 2350 2351 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma) 2352 { 2353 while (halt_if_hws_hang) 2354 schedule(); 2355 2356 if (!amdgpu_gpu_recovery) 2357 return -ENOTRECOVERABLE; 2358 2359 return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm); 2360 } 2361 2362 /* dqm->lock mutex has to be locked before calling this function */ 2363 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 2364 enum kfd_unmap_queues_filter filter, 2365 uint32_t filter_param, 2366 uint32_t grace_period, 2367 bool reset) 2368 { 2369 struct device *dev = dqm->dev->adev->dev; 2370 struct mqd_manager *mqd_mgr; 2371 int retval; 2372 2373 if (!dqm->sched_running) 2374 return 0; 2375 if (!dqm->active_runlist) 2376 return 0; 2377 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2378 return -EIO; 2379 2380 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2381 retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); 2382 if (retval) 2383 goto out; 2384 } 2385 2386 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 2387 if (retval) 2388 goto out; 2389 2390 *dqm->fence_addr = KFD_FENCE_INIT; 2391 mb(); 2392 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 2393 KFD_FENCE_COMPLETED); 2394 /* should be timed out */ 2395 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 2396 queue_preemption_timeout_ms); 2397 if (retval) { 2398 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 2399 kfd_hws_hang(dqm); 2400 goto out; 2401 } 2402 2403 /* In the current MEC firmware implementation, if compute queue 2404 * doesn't response to the preemption request in time, HIQ will 2405 * abandon the unmap request without returning any timeout error 2406 * to driver. Instead, MEC firmware will log the doorbell of the 2407 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 2408 * To make sure the queue unmap was successful, driver need to 2409 * check those fields 2410 */ 2411 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 2412 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) && 2413 reset_queues_on_hws_hang(dqm, false)) 2414 goto reset_fail; 2415 2416 /* Check for SDMA hang and attempt SDMA reset */ 2417 if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true)) 2418 goto reset_fail; 2419 2420 /* We need to reset the grace period value for this device */ 2421 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2422 if (pm_update_grace_period(&dqm->packet_mgr, 2423 USE_DEFAULT_GRACE_PERIOD)) 2424 dev_err(dev, "Failed to reset grace period\n"); 2425 } 2426 2427 pm_release_ib(&dqm->packet_mgr); 2428 dqm->active_runlist = false; 2429 out: 2430 up_read(&dqm->dev->adev->reset_domain->sem); 2431 return retval; 2432 2433 reset_fail: 2434 dqm->is_hws_hang = true; 2435 kfd_hws_hang(dqm); 2436 up_read(&dqm->dev->adev->reset_domain->sem); 2437 return -ETIME; 2438 } 2439 2440 /* only for compute queue */ 2441 static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) 2442 { 2443 int retval; 2444 2445 dqm_lock(dqm); 2446 2447 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2448 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2449 2450 dqm_unlock(dqm); 2451 return retval; 2452 } 2453 2454 /* dqm->lock mutex has to be locked before calling this function */ 2455 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2456 enum kfd_unmap_queues_filter filter, 2457 uint32_t filter_param, 2458 uint32_t grace_period) 2459 { 2460 int retval; 2461 2462 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2463 return -EIO; 2464 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2465 if (!retval) 2466 retval = map_queues_cpsch(dqm); 2467 up_read(&dqm->dev->adev->reset_domain->sem); 2468 return retval; 2469 } 2470 2471 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2472 struct queue *q) 2473 { 2474 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2475 q->process); 2476 int ret = 0; 2477 2478 if (WARN_ON(!pdd)) 2479 return ret; 2480 2481 if (pdd->qpd.is_debug) 2482 return ret; 2483 2484 q->properties.is_being_destroyed = true; 2485 2486 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2487 dqm_unlock(dqm); 2488 mutex_unlock(&q->process->mutex); 2489 ret = wait_event_interruptible(dqm->destroy_wait, 2490 !q->properties.is_suspended); 2491 2492 mutex_lock(&q->process->mutex); 2493 dqm_lock(dqm); 2494 } 2495 2496 return ret; 2497 } 2498 2499 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2500 struct qcm_process_device *qpd, 2501 struct queue *q) 2502 { 2503 int retval; 2504 struct mqd_manager *mqd_mgr; 2505 uint64_t sdma_val = 0; 2506 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2507 struct device *dev = dqm->dev->adev->dev; 2508 2509 /* Get the SDMA queue stats */ 2510 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2511 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2512 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2513 &sdma_val); 2514 if (retval) 2515 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2516 q->properties.queue_id); 2517 } 2518 2519 /* remove queue from list to prevent rescheduling after preemption */ 2520 dqm_lock(dqm); 2521 2522 retval = wait_on_destroy_queue(dqm, q); 2523 2524 if (retval) { 2525 dqm_unlock(dqm); 2526 return retval; 2527 } 2528 2529 if (qpd->is_debug) { 2530 /* 2531 * error, currently we do not allow to destroy a queue 2532 * of a currently debugged process 2533 */ 2534 retval = -EBUSY; 2535 goto failed_try_destroy_debugged_queue; 2536 2537 } 2538 2539 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2540 q->properties.type)]; 2541 2542 deallocate_doorbell(qpd, q); 2543 2544 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2545 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2546 deallocate_sdma_queue(dqm, q); 2547 pdd->sdma_past_activity_counter += sdma_val; 2548 } 2549 2550 if (q->properties.is_active) { 2551 decrement_queue_count(dqm, qpd, q); 2552 q->properties.is_active = false; 2553 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2554 retval = execute_queues_cpsch(dqm, 2555 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2556 USE_DEFAULT_GRACE_PERIOD); 2557 if (retval == -ETIME) 2558 qpd->reset_wavefronts = true; 2559 } else { 2560 retval = remove_queue_mes(dqm, q, qpd); 2561 } 2562 } 2563 list_del(&q->list); 2564 qpd->queue_count--; 2565 2566 /* 2567 * Unconditionally decrement this counter, regardless of the queue's 2568 * type 2569 */ 2570 dqm->total_queue_count--; 2571 pr_debug("Total of %d queues are accountable so far\n", 2572 dqm->total_queue_count); 2573 2574 dqm_unlock(dqm); 2575 2576 /* 2577 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2578 * circular locking 2579 */ 2580 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2581 qpd->pqm->process, q->device, 2582 -1, false, NULL, 0); 2583 2584 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2585 2586 return retval; 2587 2588 failed_try_destroy_debugged_queue: 2589 2590 dqm_unlock(dqm); 2591 return retval; 2592 } 2593 2594 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2595 struct qcm_process_device *qpd, 2596 enum cache_policy default_policy, 2597 enum cache_policy alternate_policy, 2598 void __user *alternate_aperture_base, 2599 uint64_t alternate_aperture_size, 2600 u32 misc_process_properties) 2601 { 2602 bool retval = true; 2603 2604 if (!dqm->asic_ops.set_cache_memory_policy) 2605 return retval; 2606 2607 dqm_lock(dqm); 2608 2609 retval = dqm->asic_ops.set_cache_memory_policy( 2610 dqm, 2611 qpd, 2612 default_policy, 2613 alternate_policy, 2614 alternate_aperture_base, 2615 alternate_aperture_size, 2616 misc_process_properties); 2617 2618 if (retval) 2619 goto out; 2620 2621 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2622 program_sh_mem_settings(dqm, qpd); 2623 2624 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2625 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2626 qpd->sh_mem_ape1_limit); 2627 2628 out: 2629 dqm_unlock(dqm); 2630 return retval; 2631 } 2632 2633 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2634 struct qcm_process_device *qpd) 2635 { 2636 struct queue *q; 2637 struct device_process_node *cur, *next_dpn; 2638 int retval = 0; 2639 bool found = false; 2640 2641 dqm_lock(dqm); 2642 2643 /* Clear all user mode queues */ 2644 while (!list_empty(&qpd->queues_list)) { 2645 struct mqd_manager *mqd_mgr; 2646 int ret; 2647 2648 q = list_first_entry(&qpd->queues_list, struct queue, list); 2649 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2650 q->properties.type)]; 2651 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2652 if (ret) 2653 retval = ret; 2654 dqm_unlock(dqm); 2655 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2656 dqm_lock(dqm); 2657 } 2658 2659 /* Unregister process */ 2660 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2661 if (qpd == cur->qpd) { 2662 list_del(&cur->list); 2663 kfree(cur); 2664 dqm->processes_count--; 2665 found = true; 2666 break; 2667 } 2668 } 2669 2670 dqm_unlock(dqm); 2671 2672 /* Outside the DQM lock because under the DQM lock we can't do 2673 * reclaim or take other locks that others hold while reclaiming. 2674 */ 2675 if (found) 2676 kfd_dec_compute_active(dqm->dev); 2677 2678 return retval; 2679 } 2680 2681 static int get_wave_state(struct device_queue_manager *dqm, 2682 struct queue *q, 2683 void __user *ctl_stack, 2684 u32 *ctl_stack_used_size, 2685 u32 *save_area_used_size) 2686 { 2687 struct mqd_manager *mqd_mgr; 2688 2689 dqm_lock(dqm); 2690 2691 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2692 2693 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2694 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2695 !mqd_mgr->get_wave_state) { 2696 dqm_unlock(dqm); 2697 return -EINVAL; 2698 } 2699 2700 dqm_unlock(dqm); 2701 2702 /* 2703 * get_wave_state is outside the dqm lock to prevent circular locking 2704 * and the queue should be protected against destruction by the process 2705 * lock. 2706 */ 2707 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2708 ctl_stack, ctl_stack_used_size, save_area_used_size); 2709 } 2710 2711 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2712 const struct queue *q, 2713 u32 *mqd_size, 2714 u32 *ctl_stack_size) 2715 { 2716 struct mqd_manager *mqd_mgr; 2717 enum KFD_MQD_TYPE mqd_type = 2718 get_mqd_type_from_queue_type(q->properties.type); 2719 2720 dqm_lock(dqm); 2721 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2722 *mqd_size = mqd_mgr->mqd_size; 2723 *ctl_stack_size = 0; 2724 2725 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2726 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2727 2728 dqm_unlock(dqm); 2729 } 2730 2731 static int checkpoint_mqd(struct device_queue_manager *dqm, 2732 const struct queue *q, 2733 void *mqd, 2734 void *ctl_stack) 2735 { 2736 struct mqd_manager *mqd_mgr; 2737 int r = 0; 2738 enum KFD_MQD_TYPE mqd_type = 2739 get_mqd_type_from_queue_type(q->properties.type); 2740 2741 dqm_lock(dqm); 2742 2743 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2744 r = -EINVAL; 2745 goto dqm_unlock; 2746 } 2747 2748 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2749 if (!mqd_mgr->checkpoint_mqd) { 2750 r = -EOPNOTSUPP; 2751 goto dqm_unlock; 2752 } 2753 2754 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2755 2756 dqm_unlock: 2757 dqm_unlock(dqm); 2758 return r; 2759 } 2760 2761 static int process_termination_cpsch(struct device_queue_manager *dqm, 2762 struct qcm_process_device *qpd) 2763 { 2764 int retval; 2765 struct queue *q; 2766 struct device *dev = dqm->dev->adev->dev; 2767 struct kernel_queue *kq, *kq_next; 2768 struct mqd_manager *mqd_mgr; 2769 struct device_process_node *cur, *next_dpn; 2770 enum kfd_unmap_queues_filter filter = 2771 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2772 bool found = false; 2773 2774 retval = 0; 2775 2776 dqm_lock(dqm); 2777 2778 /* Clean all kernel queues */ 2779 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2780 list_del(&kq->list); 2781 decrement_queue_count(dqm, qpd, kq->queue); 2782 qpd->is_debug = false; 2783 dqm->total_queue_count--; 2784 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2785 } 2786 2787 /* Clear all user mode queues */ 2788 list_for_each_entry(q, &qpd->queues_list, list) { 2789 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2790 deallocate_sdma_queue(dqm, q); 2791 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2792 deallocate_sdma_queue(dqm, q); 2793 2794 if (q->properties.is_active) { 2795 decrement_queue_count(dqm, qpd, q); 2796 2797 if (dqm->dev->kfd->shared_resources.enable_mes) { 2798 retval = remove_queue_mes(dqm, q, qpd); 2799 if (retval) 2800 dev_err(dev, "Failed to remove queue %d\n", 2801 q->properties.queue_id); 2802 } 2803 } 2804 2805 dqm->total_queue_count--; 2806 } 2807 2808 /* Unregister process */ 2809 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2810 if (qpd == cur->qpd) { 2811 list_del(&cur->list); 2812 kfree(cur); 2813 dqm->processes_count--; 2814 found = true; 2815 break; 2816 } 2817 } 2818 2819 if (!dqm->dev->kfd->shared_resources.enable_mes) 2820 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2821 2822 if ((retval || qpd->reset_wavefronts) && 2823 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 2824 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2825 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2826 qpd->reset_wavefronts = false; 2827 up_read(&dqm->dev->adev->reset_domain->sem); 2828 } 2829 2830 /* Lastly, free mqd resources. 2831 * Do free_mqd() after dqm_unlock to avoid circular locking. 2832 */ 2833 while (!list_empty(&qpd->queues_list)) { 2834 q = list_first_entry(&qpd->queues_list, struct queue, list); 2835 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2836 q->properties.type)]; 2837 list_del(&q->list); 2838 qpd->queue_count--; 2839 dqm_unlock(dqm); 2840 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2841 dqm_lock(dqm); 2842 } 2843 dqm_unlock(dqm); 2844 2845 /* Outside the DQM lock because under the DQM lock we can't do 2846 * reclaim or take other locks that others hold while reclaiming. 2847 */ 2848 if (found) 2849 kfd_dec_compute_active(dqm->dev); 2850 2851 return retval; 2852 } 2853 2854 static int init_mqd_managers(struct device_queue_manager *dqm) 2855 { 2856 int i, j; 2857 struct device *dev = dqm->dev->adev->dev; 2858 struct mqd_manager *mqd_mgr; 2859 2860 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2861 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2862 if (!mqd_mgr) { 2863 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 2864 goto out_free; 2865 } 2866 dqm->mqd_mgrs[i] = mqd_mgr; 2867 } 2868 2869 return 0; 2870 2871 out_free: 2872 for (j = 0; j < i; j++) { 2873 kfree(dqm->mqd_mgrs[j]); 2874 dqm->mqd_mgrs[j] = NULL; 2875 } 2876 2877 return -ENOMEM; 2878 } 2879 2880 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2881 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2882 { 2883 int retval; 2884 struct kfd_node *dev = dqm->dev; 2885 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2886 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2887 get_num_all_sdma_engines(dqm) * 2888 dev->kfd->device_info.num_sdma_queues_per_engine + 2889 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2890 NUM_XCC(dqm->dev->xcc_mask)); 2891 2892 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2893 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2894 (void *)&(mem_obj->cpu_ptr), false); 2895 2896 return retval; 2897 } 2898 2899 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2900 { 2901 struct device_queue_manager *dqm; 2902 2903 pr_debug("Loading device queue manager\n"); 2904 2905 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2906 if (!dqm) 2907 return NULL; 2908 2909 switch (dev->adev->asic_type) { 2910 /* HWS is not available on Hawaii. */ 2911 case CHIP_HAWAII: 2912 /* HWS depends on CWSR for timely dequeue. CWSR is not 2913 * available on Tonga. 2914 * 2915 * FIXME: This argument also applies to Kaveri. 2916 */ 2917 case CHIP_TONGA: 2918 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2919 break; 2920 default: 2921 dqm->sched_policy = sched_policy; 2922 break; 2923 } 2924 2925 dqm->dev = dev; 2926 switch (dqm->sched_policy) { 2927 case KFD_SCHED_POLICY_HWS: 2928 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2929 /* initialize dqm for cp scheduling */ 2930 dqm->ops.create_queue = create_queue_cpsch; 2931 dqm->ops.initialize = initialize_cpsch; 2932 dqm->ops.start = start_cpsch; 2933 dqm->ops.stop = stop_cpsch; 2934 dqm->ops.halt = halt_cpsch; 2935 dqm->ops.unhalt = unhalt_cpsch; 2936 dqm->ops.destroy_queue = destroy_queue_cpsch; 2937 dqm->ops.update_queue = update_queue; 2938 dqm->ops.register_process = register_process; 2939 dqm->ops.unregister_process = unregister_process; 2940 dqm->ops.uninitialize = uninitialize; 2941 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2942 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2943 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2944 dqm->ops.process_termination = process_termination_cpsch; 2945 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2946 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2947 dqm->ops.get_wave_state = get_wave_state; 2948 dqm->ops.reset_queues = reset_queues_cpsch; 2949 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2950 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2951 break; 2952 case KFD_SCHED_POLICY_NO_HWS: 2953 /* initialize dqm for no cp scheduling */ 2954 dqm->ops.start = start_nocpsch; 2955 dqm->ops.stop = stop_nocpsch; 2956 dqm->ops.create_queue = create_queue_nocpsch; 2957 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2958 dqm->ops.update_queue = update_queue; 2959 dqm->ops.register_process = register_process; 2960 dqm->ops.unregister_process = unregister_process; 2961 dqm->ops.initialize = initialize_nocpsch; 2962 dqm->ops.uninitialize = uninitialize; 2963 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2964 dqm->ops.process_termination = process_termination_nocpsch; 2965 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2966 dqm->ops.restore_process_queues = 2967 restore_process_queues_nocpsch; 2968 dqm->ops.get_wave_state = get_wave_state; 2969 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2970 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2971 break; 2972 default: 2973 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 2974 goto out_free; 2975 } 2976 2977 switch (dev->adev->asic_type) { 2978 case CHIP_KAVERI: 2979 case CHIP_HAWAII: 2980 device_queue_manager_init_cik(&dqm->asic_ops); 2981 break; 2982 2983 case CHIP_CARRIZO: 2984 case CHIP_TONGA: 2985 case CHIP_FIJI: 2986 case CHIP_POLARIS10: 2987 case CHIP_POLARIS11: 2988 case CHIP_POLARIS12: 2989 case CHIP_VEGAM: 2990 device_queue_manager_init_vi(&dqm->asic_ops); 2991 break; 2992 2993 default: 2994 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 2995 device_queue_manager_init_v12(&dqm->asic_ops); 2996 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2997 device_queue_manager_init_v11(&dqm->asic_ops); 2998 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2999 device_queue_manager_init_v10(&dqm->asic_ops); 3000 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 3001 device_queue_manager_init_v9(&dqm->asic_ops); 3002 else { 3003 WARN(1, "Unexpected ASIC family %u", 3004 dev->adev->asic_type); 3005 goto out_free; 3006 } 3007 } 3008 3009 if (init_mqd_managers(dqm)) 3010 goto out_free; 3011 3012 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 3013 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 3014 goto out_free; 3015 } 3016 3017 if (!dqm->ops.initialize(dqm)) { 3018 init_waitqueue_head(&dqm->destroy_wait); 3019 return dqm; 3020 } 3021 3022 out_free: 3023 kfree(dqm); 3024 return NULL; 3025 } 3026 3027 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 3028 struct kfd_mem_obj *mqd) 3029 { 3030 WARN(!mqd, "No hiq sdma mqd trunk to free"); 3031 3032 amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); 3033 } 3034 3035 void device_queue_manager_uninit(struct device_queue_manager *dqm) 3036 { 3037 dqm->ops.stop(dqm); 3038 dqm->ops.uninitialize(dqm); 3039 if (!dqm->dev->kfd->shared_resources.enable_mes) 3040 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 3041 kfree(dqm); 3042 } 3043 3044 int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) 3045 { 3046 struct kfd_process_device *pdd = NULL; 3047 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd); 3048 struct device_queue_manager *dqm = knode->dqm; 3049 struct device *dev = dqm->dev->adev->dev; 3050 struct qcm_process_device *qpd; 3051 struct queue *q = NULL; 3052 int ret = 0; 3053 3054 if (!pdd) 3055 return -EINVAL; 3056 3057 dqm_lock(dqm); 3058 3059 if (pdd) { 3060 qpd = &pdd->qpd; 3061 3062 list_for_each_entry(q, &qpd->queues_list, list) { 3063 if (q->doorbell_id == doorbell_id && q->properties.is_active) { 3064 ret = suspend_all_queues_mes(dqm); 3065 if (ret) { 3066 dev_err(dev, "Suspending all queues failed"); 3067 goto out; 3068 } 3069 3070 q->properties.is_evicted = true; 3071 q->properties.is_active = false; 3072 decrement_queue_count(dqm, qpd, q); 3073 3074 ret = remove_queue_mes(dqm, q, qpd); 3075 if (ret) { 3076 dev_err(dev, "Removing bad queue failed"); 3077 goto out; 3078 } 3079 3080 ret = resume_all_queues_mes(dqm); 3081 if (ret) 3082 dev_err(dev, "Resuming all queues failed"); 3083 3084 break; 3085 } 3086 } 3087 } 3088 3089 out: 3090 dqm_unlock(dqm); 3091 kfd_unref_process(p); 3092 return ret; 3093 } 3094 3095 static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, 3096 struct qcm_process_device *qpd) 3097 { 3098 struct device *dev = dqm->dev->adev->dev; 3099 int ret = 0; 3100 3101 /* Check if process is already evicted */ 3102 dqm_lock(dqm); 3103 if (qpd->evicted) { 3104 /* Increment the evicted count to make sure the 3105 * process stays evicted before its terminated. 3106 */ 3107 qpd->evicted++; 3108 dqm_unlock(dqm); 3109 goto out; 3110 } 3111 dqm_unlock(dqm); 3112 3113 ret = suspend_all_queues_mes(dqm); 3114 if (ret) { 3115 dev_err(dev, "Suspending all queues failed"); 3116 goto out; 3117 } 3118 3119 ret = dqm->ops.evict_process_queues(dqm, qpd); 3120 if (ret) { 3121 dev_err(dev, "Evicting process queues failed"); 3122 goto out; 3123 } 3124 3125 ret = resume_all_queues_mes(dqm); 3126 if (ret) 3127 dev_err(dev, "Resuming all queues failed"); 3128 3129 out: 3130 return ret; 3131 } 3132 3133 int kfd_evict_process_device(struct kfd_process_device *pdd) 3134 { 3135 struct device_queue_manager *dqm; 3136 struct kfd_process *p; 3137 int ret = 0; 3138 3139 p = pdd->process; 3140 dqm = pdd->dev->dqm; 3141 3142 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 3143 3144 if (dqm->dev->kfd->shared_resources.enable_mes) 3145 ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); 3146 else 3147 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 3148 3149 return ret; 3150 } 3151 3152 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 3153 struct qcm_process_device *qpd) 3154 { 3155 int r; 3156 struct device *dev = dqm->dev->adev->dev; 3157 int updated_vmid_mask; 3158 3159 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3160 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3161 return -EINVAL; 3162 } 3163 3164 dqm_lock(dqm); 3165 3166 if (dqm->trap_debug_vmid != 0) { 3167 dev_err(dev, "Trap debug id already reserved\n"); 3168 r = -EBUSY; 3169 goto out_unlock; 3170 } 3171 3172 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3173 USE_DEFAULT_GRACE_PERIOD, false); 3174 if (r) 3175 goto out_unlock; 3176 3177 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3178 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 3179 3180 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3181 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 3182 r = set_sched_resources(dqm); 3183 if (r) 3184 goto out_unlock; 3185 3186 r = map_queues_cpsch(dqm); 3187 if (r) 3188 goto out_unlock; 3189 3190 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 3191 3192 out_unlock: 3193 dqm_unlock(dqm); 3194 return r; 3195 } 3196 3197 /* 3198 * Releases vmid for the trap debugger 3199 */ 3200 int release_debug_trap_vmid(struct device_queue_manager *dqm, 3201 struct qcm_process_device *qpd) 3202 { 3203 struct device *dev = dqm->dev->adev->dev; 3204 int r; 3205 int updated_vmid_mask; 3206 uint32_t trap_debug_vmid; 3207 3208 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3209 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3210 return -EINVAL; 3211 } 3212 3213 dqm_lock(dqm); 3214 trap_debug_vmid = dqm->trap_debug_vmid; 3215 if (dqm->trap_debug_vmid == 0) { 3216 dev_err(dev, "Trap debug id is not reserved\n"); 3217 r = -EINVAL; 3218 goto out_unlock; 3219 } 3220 3221 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3222 USE_DEFAULT_GRACE_PERIOD, false); 3223 if (r) 3224 goto out_unlock; 3225 3226 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3227 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 3228 3229 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3230 dqm->trap_debug_vmid = 0; 3231 r = set_sched_resources(dqm); 3232 if (r) 3233 goto out_unlock; 3234 3235 r = map_queues_cpsch(dqm); 3236 if (r) 3237 goto out_unlock; 3238 3239 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 3240 3241 out_unlock: 3242 dqm_unlock(dqm); 3243 return r; 3244 } 3245 3246 #define QUEUE_NOT_FOUND -1 3247 /* invalidate queue operation in array */ 3248 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 3249 { 3250 int i; 3251 3252 for (i = 0; i < num_queues; i++) 3253 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 3254 } 3255 3256 /* find queue index in array */ 3257 static int q_array_get_index(unsigned int queue_id, 3258 uint32_t num_queues, 3259 uint32_t *queue_ids) 3260 { 3261 int i; 3262 3263 for (i = 0; i < num_queues; i++) 3264 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 3265 return i; 3266 3267 return QUEUE_NOT_FOUND; 3268 } 3269 3270 struct copy_context_work_handler_workarea { 3271 struct work_struct copy_context_work; 3272 struct kfd_process *p; 3273 }; 3274 3275 static void copy_context_work_handler(struct work_struct *work) 3276 { 3277 struct copy_context_work_handler_workarea *workarea; 3278 struct mqd_manager *mqd_mgr; 3279 struct queue *q; 3280 struct mm_struct *mm; 3281 struct kfd_process *p; 3282 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 3283 int i; 3284 3285 workarea = container_of(work, 3286 struct copy_context_work_handler_workarea, 3287 copy_context_work); 3288 3289 p = workarea->p; 3290 mm = get_task_mm(p->lead_thread); 3291 3292 if (!mm) 3293 return; 3294 3295 kthread_use_mm(mm); 3296 for (i = 0; i < p->n_pdds; i++) { 3297 struct kfd_process_device *pdd = p->pdds[i]; 3298 struct device_queue_manager *dqm = pdd->dev->dqm; 3299 struct qcm_process_device *qpd = &pdd->qpd; 3300 3301 list_for_each_entry(q, &qpd->queues_list, list) { 3302 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE) 3303 continue; 3304 3305 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 3306 3307 /* We ignore the return value from get_wave_state 3308 * because 3309 * i) right now, it always returns 0, and 3310 * ii) if we hit an error, we would continue to the 3311 * next queue anyway. 3312 */ 3313 mqd_mgr->get_wave_state(mqd_mgr, 3314 q->mqd, 3315 &q->properties, 3316 (void __user *) q->properties.ctx_save_restore_area_address, 3317 &tmp_ctl_stack_used_size, 3318 &tmp_save_area_used_size); 3319 } 3320 } 3321 kthread_unuse_mm(mm); 3322 mmput(mm); 3323 } 3324 3325 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 3326 { 3327 size_t array_size = num_queues * sizeof(uint32_t); 3328 3329 if (!usr_queue_id_array) 3330 return NULL; 3331 3332 return memdup_user(usr_queue_id_array, array_size); 3333 } 3334 3335 int resume_queues(struct kfd_process *p, 3336 uint32_t num_queues, 3337 uint32_t *usr_queue_id_array) 3338 { 3339 uint32_t *queue_ids = NULL; 3340 int total_resumed = 0; 3341 int i; 3342 3343 if (usr_queue_id_array) { 3344 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3345 3346 if (IS_ERR(queue_ids)) 3347 return PTR_ERR(queue_ids); 3348 3349 /* mask all queues as invalid. unmask per successful request */ 3350 q_array_invalidate(num_queues, queue_ids); 3351 } 3352 3353 for (i = 0; i < p->n_pdds; i++) { 3354 struct kfd_process_device *pdd = p->pdds[i]; 3355 struct device_queue_manager *dqm = pdd->dev->dqm; 3356 struct device *dev = dqm->dev->adev->dev; 3357 struct qcm_process_device *qpd = &pdd->qpd; 3358 struct queue *q; 3359 int r, per_device_resumed = 0; 3360 3361 dqm_lock(dqm); 3362 3363 /* unmask queues that resume or already resumed as valid */ 3364 list_for_each_entry(q, &qpd->queues_list, list) { 3365 int q_idx = QUEUE_NOT_FOUND; 3366 3367 if (queue_ids) 3368 q_idx = q_array_get_index( 3369 q->properties.queue_id, 3370 num_queues, 3371 queue_ids); 3372 3373 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 3374 int err = resume_single_queue(dqm, &pdd->qpd, q); 3375 3376 if (queue_ids) { 3377 if (!err) { 3378 queue_ids[q_idx] &= 3379 ~KFD_DBG_QUEUE_INVALID_MASK; 3380 } else { 3381 queue_ids[q_idx] |= 3382 KFD_DBG_QUEUE_ERROR_MASK; 3383 break; 3384 } 3385 } 3386 3387 if (dqm->dev->kfd->shared_resources.enable_mes) { 3388 wake_up_all(&dqm->destroy_wait); 3389 if (!err) 3390 total_resumed++; 3391 } else { 3392 per_device_resumed++; 3393 } 3394 } 3395 } 3396 3397 if (!per_device_resumed) { 3398 dqm_unlock(dqm); 3399 continue; 3400 } 3401 3402 r = execute_queues_cpsch(dqm, 3403 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3404 0, 3405 USE_DEFAULT_GRACE_PERIOD); 3406 if (r) { 3407 dev_err(dev, "Failed to resume process queues\n"); 3408 if (queue_ids) { 3409 list_for_each_entry(q, &qpd->queues_list, list) { 3410 int q_idx = q_array_get_index( 3411 q->properties.queue_id, 3412 num_queues, 3413 queue_ids); 3414 3415 /* mask queue as error on resume fail */ 3416 if (q_idx != QUEUE_NOT_FOUND) 3417 queue_ids[q_idx] |= 3418 KFD_DBG_QUEUE_ERROR_MASK; 3419 } 3420 } 3421 } else { 3422 wake_up_all(&dqm->destroy_wait); 3423 total_resumed += per_device_resumed; 3424 } 3425 3426 dqm_unlock(dqm); 3427 } 3428 3429 if (queue_ids) { 3430 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3431 num_queues * sizeof(uint32_t))) 3432 pr_err("copy_to_user failed on queue resume\n"); 3433 3434 kfree(queue_ids); 3435 } 3436 3437 return total_resumed; 3438 } 3439 3440 int suspend_queues(struct kfd_process *p, 3441 uint32_t num_queues, 3442 uint32_t grace_period, 3443 uint64_t exception_clear_mask, 3444 uint32_t *usr_queue_id_array) 3445 { 3446 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3447 int total_suspended = 0; 3448 int i; 3449 3450 if (IS_ERR(queue_ids)) 3451 return PTR_ERR(queue_ids); 3452 3453 /* mask all queues as invalid. umask on successful request */ 3454 q_array_invalidate(num_queues, queue_ids); 3455 3456 for (i = 0; i < p->n_pdds; i++) { 3457 struct kfd_process_device *pdd = p->pdds[i]; 3458 struct device_queue_manager *dqm = pdd->dev->dqm; 3459 struct device *dev = dqm->dev->adev->dev; 3460 struct qcm_process_device *qpd = &pdd->qpd; 3461 struct queue *q; 3462 int r, per_device_suspended = 0; 3463 3464 mutex_lock(&p->event_mutex); 3465 dqm_lock(dqm); 3466 3467 /* unmask queues that suspend or already suspended */ 3468 list_for_each_entry(q, &qpd->queues_list, list) { 3469 int q_idx = q_array_get_index(q->properties.queue_id, 3470 num_queues, 3471 queue_ids); 3472 3473 if (q_idx != QUEUE_NOT_FOUND) { 3474 int err = suspend_single_queue(dqm, pdd, q); 3475 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 3476 3477 if (!err) { 3478 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 3479 if (exception_clear_mask && is_mes) 3480 q->properties.exception_status &= 3481 ~exception_clear_mask; 3482 3483 if (is_mes) 3484 total_suspended++; 3485 else 3486 per_device_suspended++; 3487 } else if (err != -EBUSY) { 3488 r = err; 3489 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3490 break; 3491 } 3492 } 3493 } 3494 3495 if (!per_device_suspended) { 3496 dqm_unlock(dqm); 3497 mutex_unlock(&p->event_mutex); 3498 if (total_suspended) 3499 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3500 continue; 3501 } 3502 3503 r = execute_queues_cpsch(dqm, 3504 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3505 grace_period); 3506 3507 if (r) 3508 dev_err(dev, "Failed to suspend process queues.\n"); 3509 else 3510 total_suspended += per_device_suspended; 3511 3512 list_for_each_entry(q, &qpd->queues_list, list) { 3513 int q_idx = q_array_get_index(q->properties.queue_id, 3514 num_queues, queue_ids); 3515 3516 if (q_idx == QUEUE_NOT_FOUND) 3517 continue; 3518 3519 /* mask queue as error on suspend fail */ 3520 if (r) 3521 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3522 else if (exception_clear_mask) 3523 q->properties.exception_status &= 3524 ~exception_clear_mask; 3525 } 3526 3527 dqm_unlock(dqm); 3528 mutex_unlock(&p->event_mutex); 3529 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3530 } 3531 3532 if (total_suspended) { 3533 struct copy_context_work_handler_workarea copy_context_worker; 3534 3535 INIT_WORK_ONSTACK( 3536 ©_context_worker.copy_context_work, 3537 copy_context_work_handler); 3538 3539 copy_context_worker.p = p; 3540 3541 schedule_work(©_context_worker.copy_context_work); 3542 3543 3544 flush_work(©_context_worker.copy_context_work); 3545 destroy_work_on_stack(©_context_worker.copy_context_work); 3546 } 3547 3548 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3549 num_queues * sizeof(uint32_t))) 3550 pr_err("copy_to_user failed on queue suspend\n"); 3551 3552 kfree(queue_ids); 3553 3554 return total_suspended; 3555 } 3556 3557 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3558 { 3559 switch (q_props->type) { 3560 case KFD_QUEUE_TYPE_COMPUTE: 3561 return q_props->format == KFD_QUEUE_FORMAT_PM4 3562 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3563 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3564 case KFD_QUEUE_TYPE_SDMA: 3565 return KFD_IOC_QUEUE_TYPE_SDMA; 3566 case KFD_QUEUE_TYPE_SDMA_XGMI: 3567 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3568 default: 3569 WARN_ONCE(true, "queue type not recognized!"); 3570 return 0xffffffff; 3571 }; 3572 } 3573 3574 void set_queue_snapshot_entry(struct queue *q, 3575 uint64_t exception_clear_mask, 3576 struct kfd_queue_snapshot_entry *qss_entry) 3577 { 3578 qss_entry->ring_base_address = q->properties.queue_address; 3579 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3580 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3581 qss_entry->ctx_save_restore_address = 3582 q->properties.ctx_save_restore_area_address; 3583 qss_entry->ctx_save_restore_area_size = 3584 q->properties.ctx_save_restore_area_size; 3585 qss_entry->exception_status = q->properties.exception_status; 3586 qss_entry->queue_id = q->properties.queue_id; 3587 qss_entry->gpu_id = q->device->id; 3588 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3589 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3590 q->properties.exception_status &= ~exception_clear_mask; 3591 } 3592 3593 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3594 { 3595 struct device *dev = dqm->dev->adev->dev; 3596 int r; 3597 3598 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3599 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3600 return -EINVAL; 3601 } 3602 3603 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3604 return 0; 3605 3606 dqm_lock(dqm); 3607 3608 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3609 if (r) 3610 dqm_unlock(dqm); 3611 3612 return r; 3613 } 3614 3615 int debug_map_and_unlock(struct device_queue_manager *dqm) 3616 { 3617 struct device *dev = dqm->dev->adev->dev; 3618 int r; 3619 3620 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3621 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3622 return -EINVAL; 3623 } 3624 3625 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3626 return 0; 3627 3628 r = map_queues_cpsch(dqm); 3629 3630 dqm_unlock(dqm); 3631 3632 return r; 3633 } 3634 3635 int debug_refresh_runlist(struct device_queue_manager *dqm) 3636 { 3637 int r = debug_lock_and_unmap(dqm); 3638 3639 if (r) 3640 return r; 3641 3642 return debug_map_and_unlock(dqm); 3643 } 3644 3645 bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 3646 struct qcm_process_device *qpd, 3647 int doorbell_off, u32 *queue_format) 3648 { 3649 struct queue *q; 3650 bool r = false; 3651 3652 if (!queue_format) 3653 return r; 3654 3655 dqm_lock(dqm); 3656 3657 list_for_each_entry(q, &qpd->queues_list, list) { 3658 if (q->properties.doorbell_off == doorbell_off) { 3659 *queue_format = q->properties.format; 3660 r = true; 3661 goto out; 3662 } 3663 } 3664 3665 out: 3666 dqm_unlock(dqm); 3667 return r; 3668 } 3669 #if defined(CONFIG_DEBUG_FS) 3670 3671 static void seq_reg_dump(struct seq_file *m, 3672 uint32_t (*dump)[2], uint32_t n_regs) 3673 { 3674 uint32_t i, count; 3675 3676 for (i = 0, count = 0; i < n_regs; i++) { 3677 if (count == 0 || 3678 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3679 seq_printf(m, "%s %08x: %08x", 3680 i ? "\n" : "", 3681 dump[i][0], dump[i][1]); 3682 count = 7; 3683 } else { 3684 seq_printf(m, " %08x", dump[i][1]); 3685 count--; 3686 } 3687 } 3688 3689 seq_puts(m, "\n"); 3690 } 3691 3692 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3693 { 3694 struct device_queue_manager *dqm = data; 3695 uint32_t xcc_mask = dqm->dev->xcc_mask; 3696 uint32_t (*dump)[2], n_regs; 3697 int pipe, queue; 3698 int r = 0, xcc_id; 3699 uint32_t sdma_engine_start; 3700 3701 if (!dqm->sched_running) { 3702 seq_puts(m, " Device is stopped\n"); 3703 return 0; 3704 } 3705 3706 for_each_inst(xcc_id, xcc_mask) { 3707 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3708 KFD_CIK_HIQ_PIPE, 3709 KFD_CIK_HIQ_QUEUE, &dump, 3710 &n_regs, xcc_id); 3711 if (!r) { 3712 seq_printf( 3713 m, 3714 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3715 xcc_id, 3716 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3717 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3718 KFD_CIK_HIQ_QUEUE); 3719 seq_reg_dump(m, dump, n_regs); 3720 3721 kfree(dump); 3722 } 3723 3724 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3725 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3726 3727 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3728 if (!test_bit(pipe_offset + queue, 3729 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3730 continue; 3731 3732 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3733 pipe, queue, 3734 &dump, &n_regs, 3735 xcc_id); 3736 if (r) 3737 break; 3738 3739 seq_printf(m, 3740 " Inst %d, CP Pipe %d, Queue %d\n", 3741 xcc_id, pipe, queue); 3742 seq_reg_dump(m, dump, n_regs); 3743 3744 kfree(dump); 3745 } 3746 } 3747 } 3748 3749 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3750 for (pipe = sdma_engine_start; 3751 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3752 pipe++) { 3753 for (queue = 0; 3754 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3755 queue++) { 3756 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3757 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3758 if (r) 3759 break; 3760 3761 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3762 pipe, queue); 3763 seq_reg_dump(m, dump, n_regs); 3764 3765 kfree(dump); 3766 } 3767 } 3768 3769 return r; 3770 } 3771 3772 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3773 { 3774 int r = 0; 3775 3776 dqm_lock(dqm); 3777 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3778 if (r) { 3779 dqm_unlock(dqm); 3780 return r; 3781 } 3782 dqm->active_runlist = true; 3783 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3784 0, USE_DEFAULT_GRACE_PERIOD); 3785 dqm_unlock(dqm); 3786 3787 return r; 3788 } 3789 3790 #endif 3791