1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "amdgpu_sdma.h" 40 #include "mes_v11_api_def.h" 41 #include "kfd_debug.h" 42 43 /* Size of the per-pipe EOP queue */ 44 #define CIK_HPD_EOP_BYTES_LOG2 11 45 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 46 /* See unmap_queues_cpsch() */ 47 #define USE_DEFAULT_GRACE_PERIOD 0xffffffff 48 49 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 50 u32 pasid, unsigned int vmid); 51 52 static int execute_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param, 55 uint32_t grace_period); 56 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 57 enum kfd_unmap_queues_filter filter, 58 uint32_t filter_param, 59 uint32_t grace_period, 60 bool reset); 61 62 static int map_queues_cpsch(struct device_queue_manager *dqm); 63 64 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 65 struct queue *q); 66 67 static inline void deallocate_hqd(struct device_queue_manager *dqm, 68 struct queue *q); 69 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 70 static int allocate_sdma_queue(struct device_queue_manager *dqm, 71 struct queue *q, const uint32_t *restore_sdma_id); 72 73 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma); 74 75 static inline 76 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 77 { 78 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 79 return KFD_MQD_TYPE_SDMA; 80 return KFD_MQD_TYPE_CP; 81 } 82 83 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 84 { 85 int i; 86 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 87 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 88 89 /* queue is available for KFD usage if bit is 1 */ 90 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 91 if (test_bit(pipe_offset + i, 92 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 93 return true; 94 return false; 95 } 96 97 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 98 { 99 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 100 AMDGPU_MAX_QUEUES); 101 } 102 103 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 104 { 105 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 106 } 107 108 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 109 { 110 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 111 } 112 113 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 114 { 115 return kfd_get_num_sdma_engines(dqm->dev) + 116 kfd_get_num_xgmi_sdma_engines(dqm->dev); 117 } 118 119 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 120 { 121 return kfd_get_num_sdma_engines(dqm->dev) * 122 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 123 } 124 125 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 126 { 127 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 128 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 129 } 130 131 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 132 { 133 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 134 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 135 136 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 137 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 138 139 /* Mask out the reserved queues */ 140 bitmap_clear(dqm->sdma_bitmap, 0, kfd_get_num_sdma_engines(dqm->dev) * 141 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 142 bitmap_clear(dqm->xgmi_sdma_bitmap, 0, kfd_get_num_xgmi_sdma_engines(dqm->dev) * 143 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 144 } 145 146 void program_sh_mem_settings(struct device_queue_manager *dqm, 147 struct qcm_process_device *qpd) 148 { 149 uint32_t xcc_mask = dqm->dev->xcc_mask; 150 int xcc_id; 151 152 for_each_inst(xcc_id, xcc_mask) 153 dqm->dev->kfd2kgd->program_sh_mem_settings( 154 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 155 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 156 qpd->sh_mem_bases, xcc_id); 157 } 158 159 static void kfd_hws_hang(struct device_queue_manager *dqm) 160 { 161 struct device_process_node *cur; 162 struct qcm_process_device *qpd; 163 struct queue *q; 164 165 /* Mark all device queues as reset. */ 166 list_for_each_entry(cur, &dqm->queues, list) { 167 qpd = cur->qpd; 168 list_for_each_entry(q, &qpd->queues_list, list) { 169 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 170 171 pdd->has_reset_queue = true; 172 } 173 } 174 175 /* 176 * Issue a GPU reset if HWS is unresponsive 177 */ 178 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 179 } 180 181 static int convert_to_mes_queue_type(int queue_type) 182 { 183 int mes_queue_type; 184 185 switch (queue_type) { 186 case KFD_QUEUE_TYPE_COMPUTE: 187 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 188 break; 189 case KFD_QUEUE_TYPE_SDMA: 190 mes_queue_type = MES_QUEUE_TYPE_SDMA; 191 break; 192 default: 193 WARN(1, "Invalid queue type %d", queue_type); 194 mes_queue_type = -EINVAL; 195 break; 196 } 197 198 return mes_queue_type; 199 } 200 201 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 202 struct qcm_process_device *qpd) 203 { 204 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 205 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 206 struct mes_add_queue_input queue_input; 207 int r, queue_type; 208 uint64_t wptr_addr_off; 209 210 if (!dqm->sched_running || dqm->sched_halt) 211 return 0; 212 if (!down_read_trylock(&adev->reset_domain->sem)) 213 return -EIO; 214 215 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 216 queue_input.process_id = pdd->pasid; 217 queue_input.page_table_base_addr = qpd->page_table_base; 218 queue_input.process_va_start = 0; 219 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 220 /* MES unit for quantum is 100ns */ 221 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 222 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 223 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 224 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 225 queue_input.inprocess_gang_priority = q->properties.priority; 226 queue_input.gang_global_priority_level = 227 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 228 queue_input.doorbell_offset = q->properties.doorbell_off; 229 queue_input.mqd_addr = q->gart_mqd_addr; 230 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 231 232 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 233 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 234 235 queue_input.is_kfd_process = 1; 236 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 237 queue_input.queue_size = q->properties.queue_size >> 2; 238 239 queue_input.paging = false; 240 queue_input.tba_addr = qpd->tba_addr; 241 queue_input.tma_addr = qpd->tma_addr; 242 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 243 queue_input.skip_process_ctx_clear = 244 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 245 (qpd->pqm->process->debug_trap_enabled || 246 kfd_dbg_has_ttmps_always_setup(q->device)); 247 248 queue_type = convert_to_mes_queue_type(q->properties.type); 249 if (queue_type < 0) { 250 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 251 q->properties.type); 252 up_read(&adev->reset_domain->sem); 253 return -EINVAL; 254 } 255 queue_input.queue_type = (uint32_t)queue_type; 256 257 queue_input.exclusively_scheduled = q->properties.is_gws; 258 queue_input.sh_mem_config_data = qpd->sh_mem_config; 259 queue_input.vm_cntx_cntl = qpd->vm_cntx_cntl; 260 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 261 262 amdgpu_mes_lock(&adev->mes); 263 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 264 amdgpu_mes_unlock(&adev->mes); 265 up_read(&adev->reset_domain->sem); 266 if (r) { 267 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 268 q->properties.doorbell_off); 269 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 270 kfd_hws_hang(dqm); 271 } 272 273 return r; 274 } 275 276 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 277 struct qcm_process_device *qpd) 278 { 279 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 280 int r; 281 struct mes_remove_queue_input queue_input; 282 283 if (!dqm->sched_running || dqm->sched_halt) 284 return 0; 285 if (!down_read_trylock(&adev->reset_domain->sem)) 286 return -EIO; 287 288 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 289 queue_input.doorbell_offset = q->properties.doorbell_off; 290 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 291 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 292 293 amdgpu_mes_lock(&adev->mes); 294 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 295 amdgpu_mes_unlock(&adev->mes); 296 up_read(&adev->reset_domain->sem); 297 298 if (r) { 299 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 300 q->properties.doorbell_off); 301 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 302 kfd_hws_hang(dqm); 303 } 304 305 return r; 306 } 307 308 static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm) 309 { 310 struct device_process_node *cur; 311 struct device *dev = dqm->dev->adev->dev; 312 struct qcm_process_device *qpd; 313 struct queue *q; 314 int retval = 0; 315 316 list_for_each_entry(cur, &dqm->queues, list) { 317 qpd = cur->qpd; 318 list_for_each_entry(q, &qpd->queues_list, list) { 319 if (q->properties.is_active) { 320 retval = remove_queue_mes(dqm, q, qpd); 321 if (retval) { 322 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 323 __func__, 324 q->properties.queue_id, 325 dqm->dev->id); 326 return retval; 327 } 328 } 329 } 330 } 331 332 return retval; 333 } 334 335 static int add_all_kfd_queues_mes(struct device_queue_manager *dqm) 336 { 337 struct device_process_node *cur; 338 struct device *dev = dqm->dev->adev->dev; 339 struct qcm_process_device *qpd; 340 struct queue *q; 341 int retval = 0; 342 343 list_for_each_entry(cur, &dqm->queues, list) { 344 qpd = cur->qpd; 345 list_for_each_entry(q, &qpd->queues_list, list) { 346 if (!q->properties.is_active) 347 continue; 348 retval = add_queue_mes(dqm, q, qpd); 349 if (retval) { 350 dev_err(dev, "%s: Failed to add queue %d for dev %d", 351 __func__, 352 q->properties.queue_id, 353 dqm->dev->id); 354 return retval; 355 } 356 } 357 } 358 359 return retval; 360 } 361 362 static int suspend_all_queues_mes(struct device_queue_manager *dqm) 363 { 364 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 365 int r = 0; 366 367 if (!down_read_trylock(&adev->reset_domain->sem)) 368 return -EIO; 369 370 r = amdgpu_mes_suspend(adev); 371 up_read(&adev->reset_domain->sem); 372 373 if (r) { 374 dev_err(adev->dev, "failed to suspend gangs from MES\n"); 375 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 376 kfd_hws_hang(dqm); 377 } 378 379 return r; 380 } 381 382 static int resume_all_queues_mes(struct device_queue_manager *dqm) 383 { 384 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 385 int r = 0; 386 387 if (!down_read_trylock(&adev->reset_domain->sem)) 388 return -EIO; 389 390 r = amdgpu_mes_resume(adev); 391 up_read(&adev->reset_domain->sem); 392 393 if (r) { 394 dev_err(adev->dev, "failed to resume gangs from MES\n"); 395 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 396 kfd_hws_hang(dqm); 397 } 398 399 return r; 400 } 401 402 static void increment_queue_count(struct device_queue_manager *dqm, 403 struct qcm_process_device *qpd, 404 struct queue *q) 405 { 406 dqm->active_queue_count++; 407 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 408 dqm->active_cp_queue_count++; 409 410 if (q->properties.is_gws) { 411 dqm->gws_queue_count++; 412 qpd->mapped_gws_queue = true; 413 } 414 } 415 416 static void decrement_queue_count(struct device_queue_manager *dqm, 417 struct qcm_process_device *qpd, 418 struct queue *q) 419 { 420 dqm->active_queue_count--; 421 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 422 dqm->active_cp_queue_count--; 423 424 if (q->properties.is_gws) { 425 dqm->gws_queue_count--; 426 qpd->mapped_gws_queue = false; 427 } 428 } 429 430 /* 431 * Allocate a doorbell ID to this queue. 432 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 433 */ 434 static int allocate_doorbell(struct qcm_process_device *qpd, 435 struct queue *q, 436 uint32_t const *restore_id) 437 { 438 struct kfd_node *dev = qpd->dqm->dev; 439 440 if (!KFD_IS_SOC15(dev)) { 441 /* On pre-SOC15 chips we need to use the queue ID to 442 * preserve the user mode ABI. 443 */ 444 445 if (restore_id && *restore_id != q->properties.queue_id) 446 return -EINVAL; 447 448 q->doorbell_id = q->properties.queue_id; 449 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 450 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 451 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 452 * doorbell assignments based on the engine and queue id. 453 * The doobell index distance between RLC (2*i) and (2*i+1) 454 * for a SDMA engine is 512. 455 */ 456 457 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 458 459 /* 460 * q->properties.sdma_engine_id corresponds to the virtual 461 * sdma engine number. However, for doorbell allocation, 462 * we need the physical sdma engine id in order to get the 463 * correct doorbell offset. 464 */ 465 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 466 get_num_all_sdma_engines(qpd->dqm) + 467 q->properties.sdma_engine_id] 468 + (q->properties.sdma_queue_id & 1) 469 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 470 + (q->properties.sdma_queue_id >> 1); 471 472 if (restore_id && *restore_id != valid_id) 473 return -EINVAL; 474 q->doorbell_id = valid_id; 475 } else { 476 /* For CP queues on SOC15 */ 477 if (restore_id) { 478 /* make sure that ID is free */ 479 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 480 return -EINVAL; 481 482 q->doorbell_id = *restore_id; 483 } else { 484 /* or reserve a free doorbell ID */ 485 unsigned int found; 486 487 found = find_first_zero_bit(qpd->doorbell_bitmap, 488 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 489 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 490 pr_debug("No doorbells available"); 491 return -EBUSY; 492 } 493 set_bit(found, qpd->doorbell_bitmap); 494 q->doorbell_id = found; 495 } 496 } 497 498 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 499 qpd->proc_doorbells, 500 q->doorbell_id, 501 dev->kfd->device_info.doorbell_size); 502 return 0; 503 } 504 505 static void deallocate_doorbell(struct qcm_process_device *qpd, 506 struct queue *q) 507 { 508 unsigned int old; 509 struct kfd_node *dev = qpd->dqm->dev; 510 511 if (!KFD_IS_SOC15(dev) || 512 q->properties.type == KFD_QUEUE_TYPE_SDMA || 513 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 514 return; 515 516 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 517 WARN_ON(!old); 518 } 519 520 static void program_trap_handler_settings(struct device_queue_manager *dqm, 521 struct qcm_process_device *qpd) 522 { 523 uint32_t xcc_mask = dqm->dev->xcc_mask; 524 int xcc_id; 525 526 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 527 for_each_inst(xcc_id, xcc_mask) 528 dqm->dev->kfd2kgd->program_trap_handler_settings( 529 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 530 qpd->tma_addr, xcc_id); 531 } 532 533 static int allocate_vmid(struct device_queue_manager *dqm, 534 struct qcm_process_device *qpd, 535 struct queue *q) 536 { 537 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 538 struct device *dev = dqm->dev->adev->dev; 539 int allocated_vmid = -1, i; 540 541 for (i = dqm->dev->vm_info.first_vmid_kfd; 542 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 543 if (!dqm->vmid_pasid[i]) { 544 allocated_vmid = i; 545 break; 546 } 547 } 548 549 if (allocated_vmid < 0) { 550 dev_err(dev, "no more vmid to allocate\n"); 551 return -ENOSPC; 552 } 553 554 pr_debug("vmid allocated: %d\n", allocated_vmid); 555 556 dqm->vmid_pasid[allocated_vmid] = pdd->pasid; 557 558 set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid); 559 560 qpd->vmid = allocated_vmid; 561 q->properties.vmid = allocated_vmid; 562 563 program_sh_mem_settings(dqm, qpd); 564 565 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 566 program_trap_handler_settings(dqm, qpd); 567 568 /* qpd->page_table_base is set earlier when register_process() 569 * is called, i.e. when the first queue is created. 570 */ 571 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 572 qpd->vmid, 573 qpd->page_table_base); 574 /* invalidate the VM context after pasid and vmid mapping is set up */ 575 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 576 577 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 578 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 579 qpd->sh_hidden_private_base, qpd->vmid); 580 581 return 0; 582 } 583 584 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 585 struct qcm_process_device *qpd) 586 { 587 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 588 int ret; 589 590 if (!qpd->ib_kaddr) 591 return -ENOMEM; 592 593 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 594 if (ret) 595 return ret; 596 597 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 598 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 599 pmf->release_mem_size / sizeof(uint32_t)); 600 } 601 602 static void deallocate_vmid(struct device_queue_manager *dqm, 603 struct qcm_process_device *qpd, 604 struct queue *q) 605 { 606 struct device *dev = dqm->dev->adev->dev; 607 608 /* On GFX v7, CP doesn't flush TC at dequeue */ 609 if (q->device->adev->asic_type == CHIP_HAWAII) 610 if (flush_texture_cache_nocpsch(q->device, qpd)) 611 dev_err(dev, "Failed to flush TC\n"); 612 613 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 614 615 /* Release the vmid mapping */ 616 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 617 dqm->vmid_pasid[qpd->vmid] = 0; 618 619 qpd->vmid = 0; 620 q->properties.vmid = 0; 621 } 622 623 static int create_queue_nocpsch(struct device_queue_manager *dqm, 624 struct queue *q, 625 struct qcm_process_device *qpd, 626 const struct kfd_criu_queue_priv_data *qd, 627 const void *restore_mqd, const void *restore_ctl_stack) 628 { 629 struct mqd_manager *mqd_mgr; 630 int retval; 631 632 dqm_lock(dqm); 633 634 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 635 pr_warn("Can't create new usermode queue because %d queues were already created\n", 636 dqm->total_queue_count); 637 retval = -EPERM; 638 goto out_unlock; 639 } 640 641 if (list_empty(&qpd->queues_list)) { 642 retval = allocate_vmid(dqm, qpd, q); 643 if (retval) 644 goto out_unlock; 645 } 646 q->properties.vmid = qpd->vmid; 647 /* 648 * Eviction state logic: mark all queues as evicted, even ones 649 * not currently active. Restoring inactive queues later only 650 * updates the is_evicted flag but is a no-op otherwise. 651 */ 652 q->properties.is_evicted = !!qpd->evicted; 653 654 q->properties.tba_addr = qpd->tba_addr; 655 q->properties.tma_addr = qpd->tma_addr; 656 657 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 658 q->properties.type)]; 659 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 660 retval = allocate_hqd(dqm, q); 661 if (retval) 662 goto deallocate_vmid; 663 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 664 q->pipe, q->queue); 665 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 666 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 667 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 668 if (retval) 669 goto deallocate_vmid; 670 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 671 } 672 673 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 674 if (retval) 675 goto out_deallocate_hqd; 676 677 /* Temporarily release dqm lock to avoid a circular lock dependency */ 678 dqm_unlock(dqm); 679 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 680 dqm_lock(dqm); 681 682 if (!q->mqd_mem_obj) { 683 retval = -ENOMEM; 684 goto out_deallocate_doorbell; 685 } 686 687 if (qd) 688 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 689 &q->properties, restore_mqd, restore_ctl_stack, 690 qd->ctl_stack_size); 691 else 692 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 693 &q->gart_mqd_addr, &q->properties); 694 695 if (q->properties.is_active) { 696 if (!dqm->sched_running) { 697 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 698 goto add_queue_to_list; 699 } 700 701 if (WARN(q->process->mm != current->mm, 702 "should only run in user thread")) 703 retval = -EFAULT; 704 else 705 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 706 q->queue, &q->properties, current->mm); 707 if (retval) 708 goto out_free_mqd; 709 } 710 711 add_queue_to_list: 712 list_add(&q->list, &qpd->queues_list); 713 qpd->queue_count++; 714 if (q->properties.is_active) 715 increment_queue_count(dqm, qpd, q); 716 717 /* 718 * Unconditionally increment this counter, regardless of the queue's 719 * type or whether the queue is active. 720 */ 721 dqm->total_queue_count++; 722 pr_debug("Total of %d queues are accountable so far\n", 723 dqm->total_queue_count); 724 goto out_unlock; 725 726 out_free_mqd: 727 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 728 out_deallocate_doorbell: 729 deallocate_doorbell(qpd, q); 730 out_deallocate_hqd: 731 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 732 deallocate_hqd(dqm, q); 733 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 734 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 735 deallocate_sdma_queue(dqm, q); 736 deallocate_vmid: 737 if (list_empty(&qpd->queues_list)) 738 deallocate_vmid(dqm, qpd, q); 739 out_unlock: 740 dqm_unlock(dqm); 741 return retval; 742 } 743 744 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 745 { 746 bool set; 747 int pipe, bit, i; 748 749 set = false; 750 751 for (pipe = dqm->next_pipe_to_allocate, i = 0; 752 i < get_pipes_per_mec(dqm); 753 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 754 755 if (!is_pipe_enabled(dqm, 0, pipe)) 756 continue; 757 758 if (dqm->allocated_queues[pipe] != 0) { 759 bit = ffs(dqm->allocated_queues[pipe]) - 1; 760 dqm->allocated_queues[pipe] &= ~(1 << bit); 761 q->pipe = pipe; 762 q->queue = bit; 763 set = true; 764 break; 765 } 766 } 767 768 if (!set) 769 return -EBUSY; 770 771 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 772 /* horizontal hqd allocation */ 773 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 774 775 return 0; 776 } 777 778 static inline void deallocate_hqd(struct device_queue_manager *dqm, 779 struct queue *q) 780 { 781 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 782 } 783 784 #define SQ_IND_CMD_CMD_KILL 0x00000003 785 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 786 787 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 788 { 789 int status = 0; 790 unsigned int vmid; 791 uint16_t queried_pasid; 792 union SQ_CMD_BITS reg_sq_cmd; 793 union GRBM_GFX_INDEX_BITS reg_gfx_index; 794 struct kfd_process_device *pdd; 795 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 796 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 797 uint32_t xcc_mask = dev->xcc_mask; 798 int xcc_id; 799 800 reg_sq_cmd.u32All = 0; 801 reg_gfx_index.u32All = 0; 802 803 pr_debug("Killing all process wavefronts\n"); 804 805 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 806 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 807 return -EOPNOTSUPP; 808 } 809 810 /* taking the VMID for that process on the safe way using PDD */ 811 pdd = kfd_get_process_device_data(dev, p); 812 if (!pdd) 813 return -EFAULT; 814 815 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 816 * ATC_VMID15_PASID_MAPPING 817 * to check which VMID the current process is mapped to. 818 */ 819 820 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 821 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 822 (dev->adev, vmid, &queried_pasid); 823 824 if (status && queried_pasid == pdd->pasid) { 825 pr_debug("Killing wave fronts of vmid %d and process pid %d\n", 826 vmid, p->lead_thread->pid); 827 break; 828 } 829 } 830 831 if (vmid > last_vmid_to_scan) { 832 dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n", 833 p->lead_thread->pid); 834 return -EFAULT; 835 } 836 837 reg_gfx_index.bits.sh_broadcast_writes = 1; 838 reg_gfx_index.bits.se_broadcast_writes = 1; 839 reg_gfx_index.bits.instance_broadcast_writes = 1; 840 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 841 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 842 reg_sq_cmd.bits.vm_id = vmid; 843 844 for_each_inst(xcc_id, xcc_mask) 845 dev->kfd2kgd->wave_control_execute( 846 dev->adev, reg_gfx_index.u32All, 847 reg_sq_cmd.u32All, xcc_id); 848 849 return 0; 850 } 851 852 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 853 * to avoid asynchronized access 854 */ 855 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 856 struct qcm_process_device *qpd, 857 struct queue *q) 858 { 859 int retval; 860 struct mqd_manager *mqd_mgr; 861 862 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 863 864 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 865 deallocate_hqd(dqm, q); 866 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 867 deallocate_sdma_queue(dqm, q); 868 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 869 deallocate_sdma_queue(dqm, q); 870 else { 871 pr_debug("q->properties.type %d is invalid\n", 872 q->properties.type); 873 return -EINVAL; 874 } 875 dqm->total_queue_count--; 876 877 deallocate_doorbell(qpd, q); 878 879 if (!dqm->sched_running) { 880 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 881 return 0; 882 } 883 884 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 885 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 886 KFD_UNMAP_LATENCY_MS, 887 q->pipe, q->queue); 888 if (retval == -ETIME) 889 qpd->reset_wavefronts = true; 890 891 list_del(&q->list); 892 if (list_empty(&qpd->queues_list)) { 893 if (qpd->reset_wavefronts) { 894 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 895 dqm->dev); 896 /* dbgdev_wave_reset_wavefronts has to be called before 897 * deallocate_vmid(), i.e. when vmid is still in use. 898 */ 899 dbgdev_wave_reset_wavefronts(dqm->dev, 900 qpd->pqm->process); 901 qpd->reset_wavefronts = false; 902 } 903 904 deallocate_vmid(dqm, qpd, q); 905 } 906 qpd->queue_count--; 907 if (q->properties.is_active) 908 decrement_queue_count(dqm, qpd, q); 909 910 return retval; 911 } 912 913 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 914 struct qcm_process_device *qpd, 915 struct queue *q) 916 { 917 int retval; 918 uint64_t sdma_val = 0; 919 struct device *dev = dqm->dev->adev->dev; 920 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 921 struct mqd_manager *mqd_mgr = 922 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 923 924 /* Get the SDMA queue stats */ 925 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 926 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 927 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 928 &sdma_val); 929 if (retval) 930 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 931 q->properties.queue_id); 932 } 933 934 dqm_lock(dqm); 935 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 936 if (!retval) 937 pdd->sdma_past_activity_counter += sdma_val; 938 dqm_unlock(dqm); 939 940 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 941 942 return retval; 943 } 944 945 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 946 struct mqd_update_info *minfo) 947 { 948 int retval = 0; 949 struct device *dev = dqm->dev->adev->dev; 950 struct mqd_manager *mqd_mgr; 951 struct kfd_process_device *pdd; 952 bool prev_active = false; 953 954 dqm_lock(dqm); 955 pdd = kfd_get_process_device_data(q->device, q->process); 956 if (!pdd) { 957 retval = -ENODEV; 958 goto out_unlock; 959 } 960 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 961 q->properties.type)]; 962 963 /* Save previous activity state for counters */ 964 prev_active = q->properties.is_active; 965 966 /* Make sure the queue is unmapped before updating the MQD */ 967 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 968 if (!dqm->dev->kfd->shared_resources.enable_mes) 969 retval = unmap_queues_cpsch(dqm, 970 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 971 else if (prev_active) 972 retval = remove_queue_mes(dqm, q, &pdd->qpd); 973 974 /* queue is reset so inaccessable */ 975 if (pdd->has_reset_queue) { 976 retval = -EACCES; 977 goto out_unlock; 978 } 979 980 if (retval) { 981 dev_err(dev, "unmap queue failed\n"); 982 goto out_unlock; 983 } 984 } else if (prev_active && 985 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 986 q->properties.type == KFD_QUEUE_TYPE_SDMA || 987 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 988 989 if (!dqm->sched_running) { 990 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 991 goto out_unlock; 992 } 993 994 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 995 (dqm->dev->kfd->cwsr_enabled ? 996 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 997 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 998 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 999 if (retval) { 1000 dev_err(dev, "destroy mqd failed\n"); 1001 goto out_unlock; 1002 } 1003 } 1004 1005 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 1006 1007 /* 1008 * check active state vs. the previous state and modify 1009 * counter accordingly. map_queues_cpsch uses the 1010 * dqm->active_queue_count to determine whether a new runlist must be 1011 * uploaded. 1012 */ 1013 if (q->properties.is_active && !prev_active) { 1014 increment_queue_count(dqm, &pdd->qpd, q); 1015 } else if (!q->properties.is_active && prev_active) { 1016 decrement_queue_count(dqm, &pdd->qpd, q); 1017 } else if (q->gws && !q->properties.is_gws) { 1018 if (q->properties.is_active) { 1019 dqm->gws_queue_count++; 1020 pdd->qpd.mapped_gws_queue = true; 1021 } 1022 q->properties.is_gws = true; 1023 } else if (!q->gws && q->properties.is_gws) { 1024 if (q->properties.is_active) { 1025 dqm->gws_queue_count--; 1026 pdd->qpd.mapped_gws_queue = false; 1027 } 1028 q->properties.is_gws = false; 1029 } 1030 1031 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 1032 if (!dqm->dev->kfd->shared_resources.enable_mes) 1033 retval = map_queues_cpsch(dqm); 1034 else if (q->properties.is_active) 1035 retval = add_queue_mes(dqm, q, &pdd->qpd); 1036 } else if (q->properties.is_active && 1037 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 1038 q->properties.type == KFD_QUEUE_TYPE_SDMA || 1039 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1040 if (WARN(q->process->mm != current->mm, 1041 "should only run in user thread")) 1042 retval = -EFAULT; 1043 else 1044 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 1045 q->pipe, q->queue, 1046 &q->properties, current->mm); 1047 } 1048 1049 out_unlock: 1050 dqm_unlock(dqm); 1051 return retval; 1052 } 1053 1054 /* suspend_single_queue does not lock the dqm like the 1055 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 1056 * lock the dqm before calling, and unlock after calling. 1057 * 1058 * The reason we don't lock the dqm is because this function may be 1059 * called on multiple queues in a loop, so rather than locking/unlocking 1060 * multiple times, we will just keep the dqm locked for all of the calls. 1061 */ 1062 static int suspend_single_queue(struct device_queue_manager *dqm, 1063 struct kfd_process_device *pdd, 1064 struct queue *q) 1065 { 1066 bool is_new; 1067 1068 if (q->properties.is_suspended) 1069 return 0; 1070 1071 pr_debug("Suspending process pid %d queue [%i]\n", 1072 pdd->process->lead_thread->pid, 1073 q->properties.queue_id); 1074 1075 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 1076 1077 if (is_new || q->properties.is_being_destroyed) { 1078 pr_debug("Suspend: skip %s queue id %i\n", 1079 is_new ? "new" : "destroyed", 1080 q->properties.queue_id); 1081 return -EBUSY; 1082 } 1083 1084 q->properties.is_suspended = true; 1085 if (q->properties.is_active) { 1086 if (dqm->dev->kfd->shared_resources.enable_mes) { 1087 int r = remove_queue_mes(dqm, q, &pdd->qpd); 1088 1089 if (r) 1090 return r; 1091 } 1092 1093 decrement_queue_count(dqm, &pdd->qpd, q); 1094 q->properties.is_active = false; 1095 } 1096 1097 return 0; 1098 } 1099 1100 /* resume_single_queue does not lock the dqm like the functions 1101 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1102 * lock the dqm before calling, and unlock after calling. 1103 * 1104 * The reason we don't lock the dqm is because this function may be 1105 * called on multiple queues in a loop, so rather than locking/unlocking 1106 * multiple times, we will just keep the dqm locked for all of the calls. 1107 */ 1108 static int resume_single_queue(struct device_queue_manager *dqm, 1109 struct qcm_process_device *qpd, 1110 struct queue *q) 1111 { 1112 struct kfd_process_device *pdd; 1113 1114 if (!q->properties.is_suspended) 1115 return 0; 1116 1117 pdd = qpd_to_pdd(qpd); 1118 1119 pr_debug("Restoring from suspend process pid %d queue [%i]\n", 1120 pdd->process->lead_thread->pid, 1121 q->properties.queue_id); 1122 1123 q->properties.is_suspended = false; 1124 1125 if (QUEUE_IS_ACTIVE(q->properties)) { 1126 if (dqm->dev->kfd->shared_resources.enable_mes) { 1127 int r = add_queue_mes(dqm, q, &pdd->qpd); 1128 1129 if (r) 1130 return r; 1131 } 1132 1133 q->properties.is_active = true; 1134 increment_queue_count(dqm, qpd, q); 1135 } 1136 1137 return 0; 1138 } 1139 1140 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1141 struct qcm_process_device *qpd) 1142 { 1143 struct queue *q; 1144 struct mqd_manager *mqd_mgr; 1145 struct kfd_process_device *pdd; 1146 int retval, ret = 0; 1147 1148 dqm_lock(dqm); 1149 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1150 goto out; 1151 1152 pdd = qpd_to_pdd(qpd); 1153 pr_debug_ratelimited("Evicting process pid %d queues\n", 1154 pdd->process->lead_thread->pid); 1155 1156 pdd->last_evict_timestamp = get_jiffies_64(); 1157 /* Mark all queues as evicted. Deactivate all active queues on 1158 * the qpd. 1159 */ 1160 list_for_each_entry(q, &qpd->queues_list, list) { 1161 q->properties.is_evicted = true; 1162 if (!q->properties.is_active) 1163 continue; 1164 1165 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1166 q->properties.type)]; 1167 q->properties.is_active = false; 1168 decrement_queue_count(dqm, qpd, q); 1169 1170 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1171 continue; 1172 1173 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1174 (dqm->dev->kfd->cwsr_enabled ? 1175 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1176 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1177 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1178 if (retval && !ret) 1179 /* Return the first error, but keep going to 1180 * maintain a consistent eviction state 1181 */ 1182 ret = retval; 1183 } 1184 1185 out: 1186 dqm_unlock(dqm); 1187 return ret; 1188 } 1189 1190 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1191 struct qcm_process_device *qpd) 1192 { 1193 struct queue *q; 1194 struct device *dev = dqm->dev->adev->dev; 1195 struct kfd_process_device *pdd; 1196 int retval = 0; 1197 1198 dqm_lock(dqm); 1199 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1200 goto out; 1201 1202 pdd = qpd_to_pdd(qpd); 1203 1204 /* The debugger creates processes that temporarily have not acquired 1205 * all VMs for all devices and has no VMs itself. 1206 * Skip queue eviction on process eviction. 1207 */ 1208 if (!pdd->drm_priv) 1209 goto out; 1210 1211 pr_debug_ratelimited("Evicting process pid %d queues\n", 1212 pdd->process->lead_thread->pid); 1213 1214 if (dqm->dev->kfd->shared_resources.enable_mes) 1215 pdd->last_evict_timestamp = get_jiffies_64(); 1216 1217 /* Mark all queues as evicted. Deactivate all active queues on 1218 * the qpd. 1219 */ 1220 list_for_each_entry(q, &qpd->queues_list, list) { 1221 q->properties.is_evicted = true; 1222 if (!q->properties.is_active) 1223 continue; 1224 1225 q->properties.is_active = false; 1226 decrement_queue_count(dqm, qpd, q); 1227 1228 if (dqm->dev->kfd->shared_resources.enable_mes) { 1229 retval = remove_queue_mes(dqm, q, qpd); 1230 if (retval) { 1231 dev_err(dev, "Failed to evict queue %d\n", 1232 q->properties.queue_id); 1233 goto out; 1234 } 1235 } 1236 } 1237 1238 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1239 pdd->last_evict_timestamp = get_jiffies_64(); 1240 retval = execute_queues_cpsch(dqm, 1241 qpd->is_debug ? 1242 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1243 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1244 USE_DEFAULT_GRACE_PERIOD); 1245 } 1246 1247 out: 1248 dqm_unlock(dqm); 1249 return retval; 1250 } 1251 1252 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1253 struct qcm_process_device *qpd) 1254 { 1255 struct mm_struct *mm = NULL; 1256 struct queue *q; 1257 struct mqd_manager *mqd_mgr; 1258 struct kfd_process_device *pdd; 1259 uint64_t pd_base; 1260 uint64_t eviction_duration; 1261 int retval, ret = 0; 1262 1263 pdd = qpd_to_pdd(qpd); 1264 /* Retrieve PD base */ 1265 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1266 1267 dqm_lock(dqm); 1268 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1269 goto out; 1270 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1271 qpd->evicted--; 1272 goto out; 1273 } 1274 1275 pr_debug_ratelimited("Restoring process pid %d queues\n", 1276 pdd->process->lead_thread->pid); 1277 1278 /* Update PD Base in QPD */ 1279 qpd->page_table_base = pd_base; 1280 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1281 1282 if (!list_empty(&qpd->queues_list)) { 1283 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1284 dqm->dev->adev, 1285 qpd->vmid, 1286 qpd->page_table_base); 1287 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1288 } 1289 1290 /* Take a safe reference to the mm_struct, which may otherwise 1291 * disappear even while the kfd_process is still referenced. 1292 */ 1293 mm = get_task_mm(pdd->process->lead_thread); 1294 if (!mm) { 1295 ret = -EFAULT; 1296 goto out; 1297 } 1298 1299 /* Remove the eviction flags. Activate queues that are not 1300 * inactive for other reasons. 1301 */ 1302 list_for_each_entry(q, &qpd->queues_list, list) { 1303 q->properties.is_evicted = false; 1304 if (!QUEUE_IS_ACTIVE(q->properties)) 1305 continue; 1306 1307 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1308 q->properties.type)]; 1309 q->properties.is_active = true; 1310 increment_queue_count(dqm, qpd, q); 1311 1312 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1313 continue; 1314 1315 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1316 q->queue, &q->properties, mm); 1317 if (retval && !ret) 1318 /* Return the first error, but keep going to 1319 * maintain a consistent eviction state 1320 */ 1321 ret = retval; 1322 } 1323 qpd->evicted = 0; 1324 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1325 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1326 out: 1327 if (mm) 1328 mmput(mm); 1329 dqm_unlock(dqm); 1330 return ret; 1331 } 1332 1333 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1334 struct qcm_process_device *qpd) 1335 { 1336 struct queue *q; 1337 struct device *dev = dqm->dev->adev->dev; 1338 struct kfd_process_device *pdd; 1339 uint64_t eviction_duration; 1340 int retval = 0; 1341 1342 pdd = qpd_to_pdd(qpd); 1343 1344 dqm_lock(dqm); 1345 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1346 goto out; 1347 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1348 qpd->evicted--; 1349 goto out; 1350 } 1351 1352 /* The debugger creates processes that temporarily have not acquired 1353 * all VMs for all devices and has no VMs itself. 1354 * Skip queue restore on process restore. 1355 */ 1356 if (!pdd->drm_priv) 1357 goto vm_not_acquired; 1358 1359 pr_debug_ratelimited("Restoring process pid %d queues\n", 1360 pdd->process->lead_thread->pid); 1361 1362 /* Update PD Base in QPD */ 1363 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1364 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1365 1366 /* activate all active queues on the qpd */ 1367 list_for_each_entry(q, &qpd->queues_list, list) { 1368 q->properties.is_evicted = false; 1369 if (!QUEUE_IS_ACTIVE(q->properties)) 1370 continue; 1371 1372 q->properties.is_active = true; 1373 increment_queue_count(dqm, &pdd->qpd, q); 1374 1375 if (dqm->dev->kfd->shared_resources.enable_mes) { 1376 retval = add_queue_mes(dqm, q, qpd); 1377 if (retval) { 1378 dev_err(dev, "Failed to restore queue %d\n", 1379 q->properties.queue_id); 1380 goto out; 1381 } 1382 } 1383 } 1384 if (!dqm->dev->kfd->shared_resources.enable_mes) 1385 retval = execute_queues_cpsch(dqm, 1386 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1387 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1388 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1389 vm_not_acquired: 1390 qpd->evicted = 0; 1391 out: 1392 dqm_unlock(dqm); 1393 return retval; 1394 } 1395 1396 static int register_process(struct device_queue_manager *dqm, 1397 struct qcm_process_device *qpd) 1398 { 1399 struct device_process_node *n; 1400 struct kfd_process_device *pdd; 1401 uint64_t pd_base; 1402 int retval; 1403 1404 n = kzalloc(sizeof(*n), GFP_KERNEL); 1405 if (!n) 1406 return -ENOMEM; 1407 1408 n->qpd = qpd; 1409 1410 pdd = qpd_to_pdd(qpd); 1411 /* Retrieve PD base */ 1412 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1413 1414 dqm_lock(dqm); 1415 list_add(&n->list, &dqm->queues); 1416 1417 /* Update PD Base in QPD */ 1418 qpd->page_table_base = pd_base; 1419 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1420 1421 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1422 1423 dqm->processes_count++; 1424 1425 dqm_unlock(dqm); 1426 1427 /* Outside the DQM lock because under the DQM lock we can't do 1428 * reclaim or take other locks that others hold while reclaiming. 1429 */ 1430 kfd_inc_compute_active(dqm->dev); 1431 1432 return retval; 1433 } 1434 1435 static int unregister_process(struct device_queue_manager *dqm, 1436 struct qcm_process_device *qpd) 1437 { 1438 int retval = 0; 1439 struct device_process_node *cur, *next; 1440 1441 pr_debug("qpd->queues_list is %s\n", 1442 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1443 1444 dqm_lock(dqm); 1445 1446 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1447 if (qpd == cur->qpd) { 1448 list_del(&cur->list); 1449 kfree(cur); 1450 dqm->processes_count--; 1451 goto out; 1452 } 1453 } 1454 /* qpd not found in dqm list */ 1455 retval = 1; 1456 out: 1457 dqm_unlock(dqm); 1458 1459 /* Outside the DQM lock because under the DQM lock we can't do 1460 * reclaim or take other locks that others hold while reclaiming. 1461 */ 1462 if (!retval) 1463 kfd_dec_compute_active(dqm->dev); 1464 1465 return retval; 1466 } 1467 1468 static int 1469 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1470 unsigned int vmid) 1471 { 1472 uint32_t xcc_mask = dqm->dev->xcc_mask; 1473 int xcc_id, ret = 0; 1474 1475 for_each_inst(xcc_id, xcc_mask) { 1476 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1477 dqm->dev->adev, pasid, vmid, xcc_id); 1478 if (ret) 1479 break; 1480 } 1481 1482 return ret; 1483 } 1484 1485 static void init_interrupts(struct device_queue_manager *dqm) 1486 { 1487 uint32_t xcc_mask = dqm->dev->xcc_mask; 1488 unsigned int i, xcc_id; 1489 1490 for_each_inst(xcc_id, xcc_mask) { 1491 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1492 if (is_pipe_enabled(dqm, 0, i)) { 1493 dqm->dev->kfd2kgd->init_interrupts( 1494 dqm->dev->adev, i, xcc_id); 1495 } 1496 } 1497 } 1498 } 1499 1500 static int initialize_nocpsch(struct device_queue_manager *dqm) 1501 { 1502 int pipe, queue; 1503 1504 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1505 1506 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1507 sizeof(unsigned int), GFP_KERNEL); 1508 if (!dqm->allocated_queues) 1509 return -ENOMEM; 1510 1511 mutex_init(&dqm->lock_hidden); 1512 INIT_LIST_HEAD(&dqm->queues); 1513 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1514 dqm->active_cp_queue_count = 0; 1515 dqm->gws_queue_count = 0; 1516 1517 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1518 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1519 1520 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1521 if (test_bit(pipe_offset + queue, 1522 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1523 dqm->allocated_queues[pipe] |= 1 << queue; 1524 } 1525 1526 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1527 1528 init_sdma_bitmaps(dqm); 1529 1530 return 0; 1531 } 1532 1533 static void uninitialize(struct device_queue_manager *dqm) 1534 { 1535 int i; 1536 1537 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1538 1539 kfree(dqm->allocated_queues); 1540 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1541 kfree(dqm->mqd_mgrs[i]); 1542 mutex_destroy(&dqm->lock_hidden); 1543 } 1544 1545 static int start_nocpsch(struct device_queue_manager *dqm) 1546 { 1547 int r = 0; 1548 1549 pr_info("SW scheduler is used"); 1550 init_interrupts(dqm); 1551 1552 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1553 r = pm_init(&dqm->packet_mgr, dqm); 1554 if (!r) 1555 dqm->sched_running = true; 1556 1557 return r; 1558 } 1559 1560 static int stop_nocpsch(struct device_queue_manager *dqm) 1561 { 1562 dqm_lock(dqm); 1563 if (!dqm->sched_running) { 1564 dqm_unlock(dqm); 1565 return 0; 1566 } 1567 1568 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1569 pm_uninit(&dqm->packet_mgr); 1570 dqm->sched_running = false; 1571 dqm_unlock(dqm); 1572 1573 return 0; 1574 } 1575 1576 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1577 struct queue *q, const uint32_t *restore_sdma_id) 1578 { 1579 struct device *dev = dqm->dev->adev->dev; 1580 int bit; 1581 1582 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1583 if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) { 1584 dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n", 1585 get_num_sdma_queues(dqm)); 1586 return -ENOMEM; 1587 } 1588 1589 if (restore_sdma_id) { 1590 /* Re-use existing sdma_id */ 1591 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1592 dev_err(dev, "SDMA queue already in use\n"); 1593 return -EBUSY; 1594 } 1595 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1596 q->sdma_id = *restore_sdma_id; 1597 } else { 1598 /* Find first available sdma_id */ 1599 bit = find_first_bit(dqm->sdma_bitmap, 1600 get_num_sdma_queues(dqm)); 1601 clear_bit(bit, dqm->sdma_bitmap); 1602 q->sdma_id = bit; 1603 } 1604 1605 q->properties.sdma_engine_id = 1606 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1607 q->properties.sdma_queue_id = q->sdma_id / 1608 kfd_get_num_sdma_engines(dqm->dev); 1609 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1610 if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) { 1611 dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n", 1612 get_num_xgmi_sdma_queues(dqm)); 1613 return -ENOMEM; 1614 } 1615 if (restore_sdma_id) { 1616 /* Re-use existing sdma_id */ 1617 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1618 dev_err(dev, "SDMA queue already in use\n"); 1619 return -EBUSY; 1620 } 1621 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1622 q->sdma_id = *restore_sdma_id; 1623 } else { 1624 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1625 get_num_xgmi_sdma_queues(dqm)); 1626 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1627 q->sdma_id = bit; 1628 } 1629 /* sdma_engine_id is sdma id including 1630 * both PCIe-optimized SDMAs and XGMI- 1631 * optimized SDMAs. The calculation below 1632 * assumes the first N engines are always 1633 * PCIe-optimized ones 1634 */ 1635 q->properties.sdma_engine_id = 1636 kfd_get_num_sdma_engines(dqm->dev) + 1637 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1638 q->properties.sdma_queue_id = q->sdma_id / 1639 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1640 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1641 int i, num_queues, num_engines, eng_offset = 0, start_engine; 1642 bool free_bit_found = false, is_xgmi = false; 1643 1644 if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { 1645 num_queues = get_num_sdma_queues(dqm); 1646 num_engines = kfd_get_num_sdma_engines(dqm->dev); 1647 q->properties.type = KFD_QUEUE_TYPE_SDMA; 1648 } else { 1649 num_queues = get_num_xgmi_sdma_queues(dqm); 1650 num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); 1651 eng_offset = kfd_get_num_sdma_engines(dqm->dev); 1652 q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; 1653 is_xgmi = true; 1654 } 1655 1656 /* Scan available bit based on target engine ID. */ 1657 start_engine = q->properties.sdma_engine_id - eng_offset; 1658 for (i = start_engine; i < num_queues; i += num_engines) { 1659 1660 if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) 1661 continue; 1662 1663 clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); 1664 q->sdma_id = i; 1665 q->properties.sdma_queue_id = q->sdma_id / num_engines; 1666 free_bit_found = true; 1667 break; 1668 } 1669 1670 if (!free_bit_found) { 1671 dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n", 1672 q->properties.sdma_engine_id, num_queues); 1673 return -ENOMEM; 1674 } 1675 } 1676 1677 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1678 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1679 1680 return 0; 1681 } 1682 1683 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1684 struct queue *q) 1685 { 1686 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1687 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1688 return; 1689 set_bit(q->sdma_id, dqm->sdma_bitmap); 1690 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1691 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1692 return; 1693 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1694 } 1695 } 1696 1697 /* 1698 * Device Queue Manager implementation for cp scheduler 1699 */ 1700 1701 static int set_sched_resources(struct device_queue_manager *dqm) 1702 { 1703 int i, mec; 1704 struct scheduling_resources res; 1705 struct device *dev = dqm->dev->adev->dev; 1706 1707 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1708 1709 res.queue_mask = 0; 1710 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1711 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1712 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1713 1714 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1715 continue; 1716 1717 /* only acquire queues from the first MEC */ 1718 if (mec > 0) 1719 continue; 1720 1721 /* This situation may be hit in the future if a new HW 1722 * generation exposes more than 64 queues. If so, the 1723 * definition of res.queue_mask needs updating 1724 */ 1725 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1726 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1727 break; 1728 } 1729 1730 res.queue_mask |= 1ull 1731 << amdgpu_queue_mask_bit_to_set_resource_bit( 1732 dqm->dev->adev, i); 1733 } 1734 res.gws_mask = ~0ull; 1735 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1736 1737 pr_debug("Scheduling resources:\n" 1738 "vmid mask: 0x%8X\n" 1739 "queue mask: 0x%8llX\n", 1740 res.vmid_mask, res.queue_mask); 1741 1742 return pm_send_set_resources(&dqm->packet_mgr, &res); 1743 } 1744 1745 static int initialize_cpsch(struct device_queue_manager *dqm) 1746 { 1747 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1748 1749 mutex_init(&dqm->lock_hidden); 1750 INIT_LIST_HEAD(&dqm->queues); 1751 dqm->active_queue_count = dqm->processes_count = 0; 1752 dqm->active_cp_queue_count = 0; 1753 dqm->gws_queue_count = 0; 1754 dqm->active_runlist = false; 1755 dqm->trap_debug_vmid = 0; 1756 1757 init_sdma_bitmaps(dqm); 1758 1759 update_dqm_wait_times(dqm); 1760 return 0; 1761 } 1762 1763 /* halt_cpsch: 1764 * Unmap queues so the schedule doesn't continue remaining jobs in the queue. 1765 * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch 1766 * is called. 1767 */ 1768 static int halt_cpsch(struct device_queue_manager *dqm) 1769 { 1770 int ret = 0; 1771 1772 dqm_lock(dqm); 1773 if (!dqm->sched_running) { 1774 dqm_unlock(dqm); 1775 return 0; 1776 } 1777 1778 WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); 1779 1780 if (!dqm->is_hws_hang) { 1781 if (!dqm->dev->kfd->shared_resources.enable_mes) 1782 ret = unmap_queues_cpsch(dqm, 1783 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1784 USE_DEFAULT_GRACE_PERIOD, false); 1785 else 1786 ret = remove_all_kfd_queues_mes(dqm); 1787 } 1788 dqm->sched_halt = true; 1789 dqm_unlock(dqm); 1790 1791 return ret; 1792 } 1793 1794 /* unhalt_cpsch 1795 * Unset dqm->sched_halt and map queues back to runlist 1796 */ 1797 static int unhalt_cpsch(struct device_queue_manager *dqm) 1798 { 1799 int ret = 0; 1800 1801 dqm_lock(dqm); 1802 if (!dqm->sched_running || !dqm->sched_halt) { 1803 WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); 1804 dqm_unlock(dqm); 1805 return 0; 1806 } 1807 dqm->sched_halt = false; 1808 if (!dqm->dev->kfd->shared_resources.enable_mes) 1809 ret = execute_queues_cpsch(dqm, 1810 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 1811 0, USE_DEFAULT_GRACE_PERIOD); 1812 else 1813 ret = add_all_kfd_queues_mes(dqm); 1814 1815 dqm_unlock(dqm); 1816 1817 return ret; 1818 } 1819 1820 static int start_cpsch(struct device_queue_manager *dqm) 1821 { 1822 struct device *dev = dqm->dev->adev->dev; 1823 int retval, num_hw_queue_slots; 1824 1825 dqm_lock(dqm); 1826 1827 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1828 retval = pm_init(&dqm->packet_mgr, dqm); 1829 if (retval) 1830 goto fail_packet_manager_init; 1831 1832 retval = set_sched_resources(dqm); 1833 if (retval) 1834 goto fail_set_sched_resources; 1835 } 1836 pr_debug("Allocating fence memory\n"); 1837 1838 /* allocate fence memory on the gart */ 1839 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1840 &dqm->fence_mem); 1841 1842 if (retval) 1843 goto fail_allocate_vidmem; 1844 1845 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1846 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1847 1848 init_interrupts(dqm); 1849 1850 /* clear hang status when driver try to start the hw scheduler */ 1851 dqm->sched_running = true; 1852 1853 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1854 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 1855 KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) 1856 dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n"); 1857 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1858 } 1859 1860 /* setup per-queue reset detection buffer */ 1861 num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * 1862 dqm->dev->kfd->shared_resources.num_pipe_per_mec * 1863 NUM_XCC(dqm->dev->xcc_mask); 1864 1865 dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); 1866 dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); 1867 1868 if (!dqm->detect_hang_info) { 1869 retval = -ENOMEM; 1870 goto fail_detect_hang_buffer; 1871 } 1872 1873 dqm_unlock(dqm); 1874 1875 return 0; 1876 fail_detect_hang_buffer: 1877 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1878 fail_allocate_vidmem: 1879 fail_set_sched_resources: 1880 if (!dqm->dev->kfd->shared_resources.enable_mes) 1881 pm_uninit(&dqm->packet_mgr); 1882 fail_packet_manager_init: 1883 dqm_unlock(dqm); 1884 return retval; 1885 } 1886 1887 static int stop_cpsch(struct device_queue_manager *dqm) 1888 { 1889 int ret = 0; 1890 1891 dqm_lock(dqm); 1892 if (!dqm->sched_running) { 1893 dqm_unlock(dqm); 1894 return 0; 1895 } 1896 1897 if (!dqm->dev->kfd->shared_resources.enable_mes) 1898 ret = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 1899 0, USE_DEFAULT_GRACE_PERIOD, false); 1900 else 1901 ret = remove_all_kfd_queues_mes(dqm); 1902 1903 dqm->sched_running = false; 1904 1905 if (!dqm->dev->kfd->shared_resources.enable_mes) 1906 pm_release_ib(&dqm->packet_mgr); 1907 1908 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1909 if (!dqm->dev->kfd->shared_resources.enable_mes) 1910 pm_uninit(&dqm->packet_mgr); 1911 kfree(dqm->detect_hang_info); 1912 dqm->detect_hang_info = NULL; 1913 dqm_unlock(dqm); 1914 1915 return ret; 1916 } 1917 1918 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1919 struct kernel_queue *kq, 1920 struct qcm_process_device *qpd) 1921 { 1922 dqm_lock(dqm); 1923 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1924 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1925 dqm->total_queue_count); 1926 dqm_unlock(dqm); 1927 return -EPERM; 1928 } 1929 1930 /* 1931 * Unconditionally increment this counter, regardless of the queue's 1932 * type or whether the queue is active. 1933 */ 1934 dqm->total_queue_count++; 1935 pr_debug("Total of %d queues are accountable so far\n", 1936 dqm->total_queue_count); 1937 1938 list_add(&kq->list, &qpd->priv_queue_list); 1939 increment_queue_count(dqm, qpd, kq->queue); 1940 qpd->is_debug = true; 1941 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1942 USE_DEFAULT_GRACE_PERIOD); 1943 dqm_unlock(dqm); 1944 1945 return 0; 1946 } 1947 1948 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1949 struct kernel_queue *kq, 1950 struct qcm_process_device *qpd) 1951 { 1952 dqm_lock(dqm); 1953 list_del(&kq->list); 1954 decrement_queue_count(dqm, qpd, kq->queue); 1955 qpd->is_debug = false; 1956 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1957 USE_DEFAULT_GRACE_PERIOD); 1958 /* 1959 * Unconditionally decrement this counter, regardless of the queue's 1960 * type. 1961 */ 1962 dqm->total_queue_count--; 1963 pr_debug("Total of %d queues are accountable so far\n", 1964 dqm->total_queue_count); 1965 dqm_unlock(dqm); 1966 } 1967 1968 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1969 struct qcm_process_device *qpd, 1970 const struct kfd_criu_queue_priv_data *qd, 1971 const void *restore_mqd, const void *restore_ctl_stack) 1972 { 1973 int retval; 1974 struct mqd_manager *mqd_mgr; 1975 1976 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1977 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1978 dqm->total_queue_count); 1979 retval = -EPERM; 1980 goto out; 1981 } 1982 1983 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1984 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || 1985 q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1986 dqm_lock(dqm); 1987 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1988 dqm_unlock(dqm); 1989 if (retval) 1990 goto out; 1991 } 1992 1993 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 1994 if (retval) 1995 goto out_deallocate_sdma_queue; 1996 1997 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1998 q->properties.type)]; 1999 2000 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2001 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2002 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 2003 q->properties.tba_addr = qpd->tba_addr; 2004 q->properties.tma_addr = qpd->tma_addr; 2005 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 2006 if (!q->mqd_mem_obj) { 2007 retval = -ENOMEM; 2008 goto out_deallocate_doorbell; 2009 } 2010 2011 dqm_lock(dqm); 2012 /* 2013 * Eviction state logic: mark all queues as evicted, even ones 2014 * not currently active. Restoring inactive queues later only 2015 * updates the is_evicted flag but is a no-op otherwise. 2016 */ 2017 q->properties.is_evicted = !!qpd->evicted; 2018 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 2019 kfd_dbg_has_cwsr_workaround(q->device); 2020 2021 if (qd) 2022 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 2023 &q->properties, restore_mqd, restore_ctl_stack, 2024 qd->ctl_stack_size); 2025 else 2026 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 2027 &q->gart_mqd_addr, &q->properties); 2028 2029 list_add(&q->list, &qpd->queues_list); 2030 qpd->queue_count++; 2031 2032 if (q->properties.is_active) { 2033 increment_queue_count(dqm, qpd, q); 2034 2035 if (!dqm->dev->kfd->shared_resources.enable_mes) 2036 retval = execute_queues_cpsch(dqm, 2037 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 2038 else 2039 retval = add_queue_mes(dqm, q, qpd); 2040 if (retval) 2041 goto cleanup_queue; 2042 } 2043 2044 /* 2045 * Unconditionally increment this counter, regardless of the queue's 2046 * type or whether the queue is active. 2047 */ 2048 dqm->total_queue_count++; 2049 2050 pr_debug("Total of %d queues are accountable so far\n", 2051 dqm->total_queue_count); 2052 2053 dqm_unlock(dqm); 2054 return retval; 2055 2056 cleanup_queue: 2057 qpd->queue_count--; 2058 list_del(&q->list); 2059 if (q->properties.is_active) 2060 decrement_queue_count(dqm, qpd, q); 2061 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2062 dqm_unlock(dqm); 2063 out_deallocate_doorbell: 2064 deallocate_doorbell(qpd, q); 2065 out_deallocate_sdma_queue: 2066 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2067 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 2068 dqm_lock(dqm); 2069 deallocate_sdma_queue(dqm, q); 2070 dqm_unlock(dqm); 2071 } 2072 out: 2073 return retval; 2074 } 2075 2076 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 2077 uint64_t fence_value, 2078 unsigned int timeout_ms) 2079 { 2080 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 2081 struct device *dev = dqm->dev->adev->dev; 2082 uint64_t *fence_addr = dqm->fence_addr; 2083 2084 while (*fence_addr != fence_value) { 2085 /* Fatal err detected, this response won't come */ 2086 if (amdgpu_amdkfd_is_fed(dqm->dev->adev) || 2087 amdgpu_in_reset(dqm->dev->adev)) 2088 return -EIO; 2089 2090 if (time_after(jiffies, end_jiffies)) { 2091 dev_err(dev, "qcm fence wait loop timeout expired\n"); 2092 /* In HWS case, this is used to halt the driver thread 2093 * in order not to mess up CP states before doing 2094 * scandumps for FW debugging. 2095 */ 2096 while (halt_if_hws_hang) 2097 schedule(); 2098 2099 return -ETIME; 2100 } 2101 schedule(); 2102 } 2103 2104 return 0; 2105 } 2106 2107 /* dqm->lock mutex has to be locked before calling this function */ 2108 static int map_queues_cpsch(struct device_queue_manager *dqm) 2109 { 2110 struct device *dev = dqm->dev->adev->dev; 2111 int retval; 2112 2113 if (!dqm->sched_running || dqm->sched_halt) 2114 return 0; 2115 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 2116 return 0; 2117 if (dqm->active_runlist) 2118 return 0; 2119 2120 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 2121 pr_debug("%s sent runlist\n", __func__); 2122 if (retval) { 2123 dev_err(dev, "failed to execute runlist\n"); 2124 return retval; 2125 } 2126 dqm->active_runlist = true; 2127 2128 return retval; 2129 } 2130 2131 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 2132 struct qcm_process_device *qpd) 2133 { 2134 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2135 2136 dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n", 2137 q->properties.queue_id, pdd->process->lead_thread->pid); 2138 2139 pdd->has_reset_queue = true; 2140 if (q->properties.is_active) { 2141 q->properties.is_active = false; 2142 decrement_queue_count(dqm, qpd, q); 2143 } 2144 } 2145 2146 static int detect_queue_hang(struct device_queue_manager *dqm) 2147 { 2148 int i; 2149 2150 /* detect should be used only in dqm locked queue reset */ 2151 if (WARN_ON(dqm->detect_hang_count > 0)) 2152 return 0; 2153 2154 memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); 2155 2156 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 2157 uint32_t mec, pipe, queue; 2158 int xcc_id; 2159 2160 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 2161 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 2162 2163 if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2164 continue; 2165 2166 amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); 2167 2168 for_each_inst(xcc_id, dqm->dev->xcc_mask) { 2169 uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( 2170 dqm->dev->adev, pipe, queue, xcc_id); 2171 struct dqm_detect_hang_info hang_info; 2172 2173 if (!queue_addr) 2174 continue; 2175 2176 hang_info.pipe_id = pipe; 2177 hang_info.queue_id = queue; 2178 hang_info.xcc_id = xcc_id; 2179 hang_info.queue_address = queue_addr; 2180 2181 dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; 2182 dqm->detect_hang_count++; 2183 } 2184 } 2185 2186 return dqm->detect_hang_count; 2187 } 2188 2189 static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) 2190 { 2191 struct device_process_node *cur; 2192 struct qcm_process_device *qpd; 2193 struct queue *q; 2194 2195 list_for_each_entry(cur, &dqm->queues, list) { 2196 qpd = cur->qpd; 2197 list_for_each_entry(q, &qpd->queues_list, list) { 2198 if (queue_address == q->properties.queue_address) 2199 return q; 2200 } 2201 } 2202 2203 return NULL; 2204 } 2205 2206 static int reset_hung_queues(struct device_queue_manager *dqm) 2207 { 2208 int r = 0, reset_count = 0, i; 2209 2210 if (!dqm->detect_hang_info || dqm->is_hws_hang) 2211 return -EIO; 2212 2213 /* assume dqm locked. */ 2214 if (!detect_queue_hang(dqm)) 2215 return -ENOTRECOVERABLE; 2216 2217 for (i = 0; i < dqm->detect_hang_count; i++) { 2218 struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; 2219 struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); 2220 struct kfd_process_device *pdd; 2221 uint64_t queue_addr = 0; 2222 2223 if (!q) { 2224 r = -ENOTRECOVERABLE; 2225 goto reset_fail; 2226 } 2227 2228 pdd = kfd_get_process_device_data(dqm->dev, q->process); 2229 if (!pdd) { 2230 r = -ENOTRECOVERABLE; 2231 goto reset_fail; 2232 } 2233 2234 queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, 2235 hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, 2236 KFD_UNMAP_LATENCY_MS); 2237 2238 /* either reset failed or we reset an unexpected queue. */ 2239 if (queue_addr != q->properties.queue_address) { 2240 r = -ENOTRECOVERABLE; 2241 goto reset_fail; 2242 } 2243 2244 set_queue_as_reset(dqm, q, &pdd->qpd); 2245 reset_count++; 2246 } 2247 2248 if (reset_count == dqm->detect_hang_count) 2249 kfd_signal_reset_event(dqm->dev); 2250 else 2251 r = -ENOTRECOVERABLE; 2252 2253 reset_fail: 2254 dqm->detect_hang_count = 0; 2255 2256 return r; 2257 } 2258 2259 static bool sdma_has_hang(struct device_queue_manager *dqm) 2260 { 2261 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2262 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2263 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2264 int i, j; 2265 2266 for (i = engine_start; i < engine_end; i++) { 2267 for (j = 0; j < num_queues_per_eng; j++) { 2268 if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j)) 2269 continue; 2270 2271 return true; 2272 } 2273 } 2274 2275 return false; 2276 } 2277 2278 static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm, 2279 uint32_t doorbell_off) 2280 { 2281 struct device_process_node *cur; 2282 struct qcm_process_device *qpd; 2283 struct queue *q; 2284 2285 list_for_each_entry(cur, &dqm->queues, list) { 2286 qpd = cur->qpd; 2287 list_for_each_entry(q, &qpd->queues_list, list) { 2288 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA || 2289 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) && 2290 q->properties.doorbell_off == doorbell_off) { 2291 set_queue_as_reset(dqm, q, qpd); 2292 return true; 2293 } 2294 } 2295 } 2296 2297 return false; 2298 } 2299 2300 static int reset_hung_queues_sdma(struct device_queue_manager *dqm) 2301 { 2302 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2303 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2304 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2305 int r = 0, i, j; 2306 2307 if (dqm->is_hws_hang) 2308 return -EIO; 2309 2310 /* Scan for hung HW queues and reset engine. */ 2311 dqm->detect_hang_count = 0; 2312 for (i = engine_start; i < engine_end; i++) { 2313 for (j = 0; j < num_queues_per_eng; j++) { 2314 uint32_t doorbell_off = 2315 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j); 2316 2317 if (!doorbell_off) 2318 continue; 2319 2320 /* Reset engine and check. */ 2321 if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || 2322 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || 2323 !set_sdma_queue_as_reset(dqm, doorbell_off)) { 2324 r = -ENOTRECOVERABLE; 2325 goto reset_fail; 2326 } 2327 2328 /* Should only expect one queue active per engine */ 2329 dqm->detect_hang_count++; 2330 break; 2331 } 2332 } 2333 2334 /* Signal process reset */ 2335 if (dqm->detect_hang_count) 2336 kfd_signal_reset_event(dqm->dev); 2337 else 2338 r = -ENOTRECOVERABLE; 2339 2340 reset_fail: 2341 dqm->detect_hang_count = 0; 2342 2343 return r; 2344 } 2345 2346 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma) 2347 { 2348 struct amdgpu_device *adev = dqm->dev->adev; 2349 2350 while (halt_if_hws_hang) 2351 schedule(); 2352 2353 if (adev->debug_disable_gpu_ring_reset) { 2354 dev_info_once(adev->dev, 2355 "%s queue hung, but ring reset disabled", 2356 is_sdma ? "sdma" : "compute"); 2357 2358 return -EPERM; 2359 } 2360 if (!amdgpu_gpu_recovery) 2361 return -ENOTRECOVERABLE; 2362 2363 return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm); 2364 } 2365 2366 /* dqm->lock mutex has to be locked before calling this function 2367 * 2368 * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time 2369 * for context switch latency. Lower values are used by debugger 2370 * since context switching are triggered at high frequency. 2371 * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE 2372 * 2373 */ 2374 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 2375 enum kfd_unmap_queues_filter filter, 2376 uint32_t filter_param, 2377 uint32_t grace_period, 2378 bool reset) 2379 { 2380 struct device *dev = dqm->dev->adev->dev; 2381 struct mqd_manager *mqd_mgr; 2382 int retval; 2383 2384 if (!dqm->sched_running) 2385 return 0; 2386 if (!dqm->active_runlist) 2387 return 0; 2388 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2389 return -EIO; 2390 2391 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2392 retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2393 KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period); 2394 if (retval) 2395 goto out; 2396 } 2397 2398 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 2399 if (retval) 2400 goto out; 2401 2402 *dqm->fence_addr = KFD_FENCE_INIT; 2403 mb(); 2404 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 2405 KFD_FENCE_COMPLETED); 2406 /* should be timed out */ 2407 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 2408 queue_preemption_timeout_ms); 2409 if (retval) { 2410 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 2411 kfd_hws_hang(dqm); 2412 goto out; 2413 } 2414 2415 /* In the current MEC firmware implementation, if compute queue 2416 * doesn't response to the preemption request in time, HIQ will 2417 * abandon the unmap request without returning any timeout error 2418 * to driver. Instead, MEC firmware will log the doorbell of the 2419 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 2420 * To make sure the queue unmap was successful, driver need to 2421 * check those fields 2422 */ 2423 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 2424 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) && 2425 reset_queues_on_hws_hang(dqm, false)) 2426 goto reset_fail; 2427 2428 /* Check for SDMA hang and attempt SDMA reset */ 2429 if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true)) 2430 goto reset_fail; 2431 2432 /* We need to reset the grace period value for this device */ 2433 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2434 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2435 KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) 2436 dev_err(dev, "Failed to reset grace period\n"); 2437 } 2438 2439 pm_release_ib(&dqm->packet_mgr); 2440 dqm->active_runlist = false; 2441 out: 2442 up_read(&dqm->dev->adev->reset_domain->sem); 2443 return retval; 2444 2445 reset_fail: 2446 dqm->is_hws_hang = true; 2447 kfd_hws_hang(dqm); 2448 up_read(&dqm->dev->adev->reset_domain->sem); 2449 return -ETIME; 2450 } 2451 2452 /* only for compute queue */ 2453 static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) 2454 { 2455 int retval; 2456 2457 dqm_lock(dqm); 2458 2459 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2460 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2461 2462 dqm_unlock(dqm); 2463 return retval; 2464 } 2465 2466 /* dqm->lock mutex has to be locked before calling this function */ 2467 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2468 enum kfd_unmap_queues_filter filter, 2469 uint32_t filter_param, 2470 uint32_t grace_period) 2471 { 2472 int retval; 2473 2474 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2475 return -EIO; 2476 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2477 if (!retval) 2478 retval = map_queues_cpsch(dqm); 2479 up_read(&dqm->dev->adev->reset_domain->sem); 2480 return retval; 2481 } 2482 2483 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2484 struct queue *q) 2485 { 2486 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2487 q->process); 2488 int ret = 0; 2489 2490 if (WARN_ON(!pdd)) 2491 return ret; 2492 2493 if (pdd->qpd.is_debug) 2494 return ret; 2495 2496 q->properties.is_being_destroyed = true; 2497 2498 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2499 dqm_unlock(dqm); 2500 mutex_unlock(&q->process->mutex); 2501 ret = wait_event_interruptible(dqm->destroy_wait, 2502 !q->properties.is_suspended); 2503 2504 mutex_lock(&q->process->mutex); 2505 dqm_lock(dqm); 2506 } 2507 2508 return ret; 2509 } 2510 2511 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2512 struct qcm_process_device *qpd, 2513 struct queue *q) 2514 { 2515 int retval; 2516 struct mqd_manager *mqd_mgr; 2517 uint64_t sdma_val = 0; 2518 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2519 struct device *dev = dqm->dev->adev->dev; 2520 2521 /* Get the SDMA queue stats */ 2522 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2523 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2524 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2525 &sdma_val); 2526 if (retval) 2527 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2528 q->properties.queue_id); 2529 } 2530 2531 /* remove queue from list to prevent rescheduling after preemption */ 2532 dqm_lock(dqm); 2533 2534 retval = wait_on_destroy_queue(dqm, q); 2535 2536 if (retval) { 2537 dqm_unlock(dqm); 2538 return retval; 2539 } 2540 2541 if (qpd->is_debug) { 2542 /* 2543 * error, currently we do not allow to destroy a queue 2544 * of a currently debugged process 2545 */ 2546 retval = -EBUSY; 2547 goto failed_try_destroy_debugged_queue; 2548 2549 } 2550 2551 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2552 q->properties.type)]; 2553 2554 deallocate_doorbell(qpd, q); 2555 2556 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2557 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2558 deallocate_sdma_queue(dqm, q); 2559 pdd->sdma_past_activity_counter += sdma_val; 2560 } 2561 2562 if (q->properties.is_active) { 2563 decrement_queue_count(dqm, qpd, q); 2564 q->properties.is_active = false; 2565 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2566 retval = execute_queues_cpsch(dqm, 2567 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2568 USE_DEFAULT_GRACE_PERIOD); 2569 if (retval == -ETIME) 2570 qpd->reset_wavefronts = true; 2571 } else { 2572 retval = remove_queue_mes(dqm, q, qpd); 2573 } 2574 } 2575 list_del(&q->list); 2576 qpd->queue_count--; 2577 2578 /* 2579 * Unconditionally decrement this counter, regardless of the queue's 2580 * type 2581 */ 2582 dqm->total_queue_count--; 2583 pr_debug("Total of %d queues are accountable so far\n", 2584 dqm->total_queue_count); 2585 2586 dqm_unlock(dqm); 2587 2588 /* 2589 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2590 * circular locking 2591 */ 2592 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2593 qpd->pqm->process, q->device, 2594 -1, false, NULL, 0); 2595 2596 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2597 2598 return retval; 2599 2600 failed_try_destroy_debugged_queue: 2601 2602 dqm_unlock(dqm); 2603 return retval; 2604 } 2605 2606 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2607 struct qcm_process_device *qpd, 2608 enum cache_policy default_policy, 2609 enum cache_policy alternate_policy, 2610 void __user *alternate_aperture_base, 2611 uint64_t alternate_aperture_size, 2612 u32 misc_process_properties) 2613 { 2614 bool retval = true; 2615 2616 if (!dqm->asic_ops.set_cache_memory_policy) 2617 return retval; 2618 2619 dqm_lock(dqm); 2620 2621 retval = dqm->asic_ops.set_cache_memory_policy( 2622 dqm, 2623 qpd, 2624 default_policy, 2625 alternate_policy, 2626 alternate_aperture_base, 2627 alternate_aperture_size, 2628 misc_process_properties); 2629 2630 if (retval) 2631 goto out; 2632 2633 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2634 program_sh_mem_settings(dqm, qpd); 2635 2636 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2637 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2638 qpd->sh_mem_ape1_limit); 2639 2640 out: 2641 dqm_unlock(dqm); 2642 return retval; 2643 } 2644 2645 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2646 struct qcm_process_device *qpd) 2647 { 2648 struct queue *q; 2649 struct device_process_node *cur, *next_dpn; 2650 int retval = 0; 2651 bool found = false; 2652 2653 dqm_lock(dqm); 2654 2655 /* Clear all user mode queues */ 2656 while (!list_empty(&qpd->queues_list)) { 2657 struct mqd_manager *mqd_mgr; 2658 int ret; 2659 2660 q = list_first_entry(&qpd->queues_list, struct queue, list); 2661 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2662 q->properties.type)]; 2663 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2664 if (ret) 2665 retval = ret; 2666 dqm_unlock(dqm); 2667 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2668 dqm_lock(dqm); 2669 } 2670 2671 /* Unregister process */ 2672 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2673 if (qpd == cur->qpd) { 2674 list_del(&cur->list); 2675 kfree(cur); 2676 dqm->processes_count--; 2677 found = true; 2678 break; 2679 } 2680 } 2681 2682 dqm_unlock(dqm); 2683 2684 /* Outside the DQM lock because under the DQM lock we can't do 2685 * reclaim or take other locks that others hold while reclaiming. 2686 */ 2687 if (found) 2688 kfd_dec_compute_active(dqm->dev); 2689 2690 return retval; 2691 } 2692 2693 static int get_wave_state(struct device_queue_manager *dqm, 2694 struct queue *q, 2695 void __user *ctl_stack, 2696 u32 *ctl_stack_used_size, 2697 u32 *save_area_used_size) 2698 { 2699 struct mqd_manager *mqd_mgr; 2700 2701 dqm_lock(dqm); 2702 2703 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2704 2705 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2706 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2707 !mqd_mgr->get_wave_state) { 2708 dqm_unlock(dqm); 2709 return -EINVAL; 2710 } 2711 2712 dqm_unlock(dqm); 2713 2714 /* 2715 * get_wave_state is outside the dqm lock to prevent circular locking 2716 * and the queue should be protected against destruction by the process 2717 * lock. 2718 */ 2719 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2720 ctl_stack, ctl_stack_used_size, save_area_used_size); 2721 } 2722 2723 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2724 const struct queue *q, 2725 u32 *mqd_size, 2726 u32 *ctl_stack_size) 2727 { 2728 struct mqd_manager *mqd_mgr; 2729 enum KFD_MQD_TYPE mqd_type = 2730 get_mqd_type_from_queue_type(q->properties.type); 2731 2732 dqm_lock(dqm); 2733 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2734 *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask); 2735 *ctl_stack_size = 0; 2736 2737 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2738 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2739 2740 dqm_unlock(dqm); 2741 } 2742 2743 static int checkpoint_mqd(struct device_queue_manager *dqm, 2744 const struct queue *q, 2745 void *mqd, 2746 void *ctl_stack) 2747 { 2748 struct mqd_manager *mqd_mgr; 2749 int r = 0; 2750 enum KFD_MQD_TYPE mqd_type = 2751 get_mqd_type_from_queue_type(q->properties.type); 2752 2753 dqm_lock(dqm); 2754 2755 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2756 r = -EINVAL; 2757 goto dqm_unlock; 2758 } 2759 2760 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2761 if (!mqd_mgr->checkpoint_mqd) { 2762 r = -EOPNOTSUPP; 2763 goto dqm_unlock; 2764 } 2765 2766 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2767 2768 dqm_unlock: 2769 dqm_unlock(dqm); 2770 return r; 2771 } 2772 2773 static int process_termination_cpsch(struct device_queue_manager *dqm, 2774 struct qcm_process_device *qpd) 2775 { 2776 int retval = 0; 2777 struct queue *q; 2778 struct device *dev = dqm->dev->adev->dev; 2779 struct kernel_queue *kq, *kq_next; 2780 struct mqd_manager *mqd_mgr; 2781 struct device_process_node *cur, *next_dpn; 2782 enum kfd_unmap_queues_filter filter = 2783 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2784 bool found = false; 2785 2786 dqm_lock(dqm); 2787 2788 /* Clean all kernel queues */ 2789 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2790 list_del(&kq->list); 2791 decrement_queue_count(dqm, qpd, kq->queue); 2792 qpd->is_debug = false; 2793 dqm->total_queue_count--; 2794 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2795 } 2796 2797 /* Clear all user mode queues */ 2798 list_for_each_entry(q, &qpd->queues_list, list) { 2799 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2800 deallocate_sdma_queue(dqm, q); 2801 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2802 deallocate_sdma_queue(dqm, q); 2803 2804 if (q->properties.is_active) { 2805 decrement_queue_count(dqm, qpd, q); 2806 2807 if (dqm->dev->kfd->shared_resources.enable_mes) { 2808 retval = remove_queue_mes(dqm, q, qpd); 2809 if (retval) 2810 dev_err(dev, "Failed to remove queue %d\n", 2811 q->properties.queue_id); 2812 } 2813 } 2814 2815 dqm->total_queue_count--; 2816 } 2817 2818 /* Unregister process */ 2819 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2820 if (qpd == cur->qpd) { 2821 list_del(&cur->list); 2822 kfree(cur); 2823 dqm->processes_count--; 2824 found = true; 2825 break; 2826 } 2827 } 2828 2829 if (!dqm->dev->kfd->shared_resources.enable_mes) 2830 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2831 2832 if ((retval || qpd->reset_wavefronts) && 2833 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 2834 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2835 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2836 qpd->reset_wavefronts = false; 2837 up_read(&dqm->dev->adev->reset_domain->sem); 2838 } 2839 2840 /* Lastly, free mqd resources. 2841 * Do free_mqd() after dqm_unlock to avoid circular locking. 2842 */ 2843 while (!list_empty(&qpd->queues_list)) { 2844 q = list_first_entry(&qpd->queues_list, struct queue, list); 2845 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2846 q->properties.type)]; 2847 list_del(&q->list); 2848 qpd->queue_count--; 2849 dqm_unlock(dqm); 2850 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2851 dqm_lock(dqm); 2852 } 2853 dqm_unlock(dqm); 2854 2855 /* Outside the DQM lock because under the DQM lock we can't do 2856 * reclaim or take other locks that others hold while reclaiming. 2857 */ 2858 if (found) 2859 kfd_dec_compute_active(dqm->dev); 2860 2861 return retval; 2862 } 2863 2864 static int init_mqd_managers(struct device_queue_manager *dqm) 2865 { 2866 int i, j; 2867 struct device *dev = dqm->dev->adev->dev; 2868 struct mqd_manager *mqd_mgr; 2869 2870 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2871 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2872 if (!mqd_mgr) { 2873 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 2874 goto out_free; 2875 } 2876 dqm->mqd_mgrs[i] = mqd_mgr; 2877 } 2878 2879 return 0; 2880 2881 out_free: 2882 for (j = 0; j < i; j++) { 2883 kfree(dqm->mqd_mgrs[j]); 2884 dqm->mqd_mgrs[j] = NULL; 2885 } 2886 2887 return -ENOMEM; 2888 } 2889 2890 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2891 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2892 { 2893 int retval; 2894 struct kfd_node *dev = dqm->dev; 2895 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2896 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2897 get_num_all_sdma_engines(dqm) * 2898 dev->kfd->device_info.num_sdma_queues_per_engine + 2899 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2900 NUM_XCC(dqm->dev->xcc_mask)); 2901 2902 retval = amdgpu_amdkfd_alloc_kernel_mem(dev->adev, size, 2903 AMDGPU_GEM_DOMAIN_GTT, 2904 &(mem_obj->mem), &(mem_obj->gpu_addr), 2905 (void *)&(mem_obj->cpu_ptr), false); 2906 2907 return retval; 2908 } 2909 2910 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2911 struct kfd_mem_obj *mqd) 2912 { 2913 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2914 2915 amdgpu_amdkfd_free_kernel_mem(dev->adev, &mqd->mem); 2916 } 2917 2918 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2919 { 2920 struct device_queue_manager *dqm; 2921 2922 pr_debug("Loading device queue manager\n"); 2923 2924 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2925 if (!dqm) 2926 return NULL; 2927 2928 switch (dev->adev->asic_type) { 2929 /* HWS is not available on Hawaii. */ 2930 case CHIP_HAWAII: 2931 /* HWS depends on CWSR for timely dequeue. CWSR is not 2932 * available on Tonga. 2933 * 2934 * FIXME: This argument also applies to Kaveri. 2935 */ 2936 case CHIP_TONGA: 2937 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2938 break; 2939 default: 2940 dqm->sched_policy = sched_policy; 2941 break; 2942 } 2943 2944 dqm->dev = dev; 2945 switch (dqm->sched_policy) { 2946 case KFD_SCHED_POLICY_HWS: 2947 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2948 /* initialize dqm for cp scheduling */ 2949 dqm->ops.create_queue = create_queue_cpsch; 2950 dqm->ops.initialize = initialize_cpsch; 2951 dqm->ops.start = start_cpsch; 2952 dqm->ops.stop = stop_cpsch; 2953 dqm->ops.halt = halt_cpsch; 2954 dqm->ops.unhalt = unhalt_cpsch; 2955 dqm->ops.destroy_queue = destroy_queue_cpsch; 2956 dqm->ops.update_queue = update_queue; 2957 dqm->ops.register_process = register_process; 2958 dqm->ops.unregister_process = unregister_process; 2959 dqm->ops.uninitialize = uninitialize; 2960 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2961 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2962 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2963 dqm->ops.process_termination = process_termination_cpsch; 2964 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2965 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2966 dqm->ops.get_wave_state = get_wave_state; 2967 dqm->ops.reset_queues = reset_queues_cpsch; 2968 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2969 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2970 break; 2971 case KFD_SCHED_POLICY_NO_HWS: 2972 /* initialize dqm for no cp scheduling */ 2973 dqm->ops.start = start_nocpsch; 2974 dqm->ops.stop = stop_nocpsch; 2975 dqm->ops.create_queue = create_queue_nocpsch; 2976 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2977 dqm->ops.update_queue = update_queue; 2978 dqm->ops.register_process = register_process; 2979 dqm->ops.unregister_process = unregister_process; 2980 dqm->ops.initialize = initialize_nocpsch; 2981 dqm->ops.uninitialize = uninitialize; 2982 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2983 dqm->ops.process_termination = process_termination_nocpsch; 2984 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2985 dqm->ops.restore_process_queues = 2986 restore_process_queues_nocpsch; 2987 dqm->ops.get_wave_state = get_wave_state; 2988 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2989 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2990 break; 2991 default: 2992 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 2993 goto out_free; 2994 } 2995 2996 switch (dev->adev->asic_type) { 2997 case CHIP_KAVERI: 2998 case CHIP_HAWAII: 2999 device_queue_manager_init_cik(&dqm->asic_ops); 3000 break; 3001 3002 case CHIP_CARRIZO: 3003 case CHIP_TONGA: 3004 case CHIP_FIJI: 3005 case CHIP_POLARIS10: 3006 case CHIP_POLARIS11: 3007 case CHIP_POLARIS12: 3008 case CHIP_VEGAM: 3009 device_queue_manager_init_vi(&dqm->asic_ops); 3010 break; 3011 3012 default: 3013 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 1, 0)) 3014 device_queue_manager_init_v12_1(&dqm->asic_ops); 3015 else if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 3016 device_queue_manager_init_v12(&dqm->asic_ops); 3017 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 3018 device_queue_manager_init_v11(&dqm->asic_ops); 3019 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 3020 device_queue_manager_init_v10(&dqm->asic_ops); 3021 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 3022 device_queue_manager_init_v9(&dqm->asic_ops); 3023 else { 3024 WARN(1, "Unexpected ASIC family %u", 3025 dev->adev->asic_type); 3026 goto out_free; 3027 } 3028 } 3029 3030 if (init_mqd_managers(dqm)) 3031 goto out_free; 3032 3033 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 3034 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 3035 goto out_free; 3036 } 3037 3038 if (!dqm->ops.initialize(dqm)) { 3039 init_waitqueue_head(&dqm->destroy_wait); 3040 return dqm; 3041 } 3042 3043 if (!dev->kfd->shared_resources.enable_mes) 3044 deallocate_hiq_sdma_mqd(dev, &dqm->hiq_sdma_mqd); 3045 3046 out_free: 3047 kfree(dqm); 3048 return NULL; 3049 } 3050 3051 void device_queue_manager_uninit(struct device_queue_manager *dqm) 3052 { 3053 dqm->ops.stop(dqm); 3054 dqm->ops.uninitialize(dqm); 3055 if (!dqm->dev->kfd->shared_resources.enable_mes) 3056 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 3057 kfree(dqm); 3058 } 3059 3060 int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) 3061 { 3062 struct kfd_process_device *pdd = NULL; 3063 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd); 3064 struct device_queue_manager *dqm = knode->dqm; 3065 struct device *dev = dqm->dev->adev->dev; 3066 struct qcm_process_device *qpd; 3067 struct queue *q = NULL; 3068 int ret = 0; 3069 3070 if (!pdd) 3071 return -EINVAL; 3072 3073 dqm_lock(dqm); 3074 3075 if (pdd) { 3076 qpd = &pdd->qpd; 3077 3078 list_for_each_entry(q, &qpd->queues_list, list) { 3079 if (q->doorbell_id == doorbell_id && q->properties.is_active) { 3080 ret = suspend_all_queues_mes(dqm); 3081 if (ret) { 3082 dev_err(dev, "Suspending all queues failed"); 3083 goto out; 3084 } 3085 3086 q->properties.is_evicted = true; 3087 q->properties.is_active = false; 3088 decrement_queue_count(dqm, qpd, q); 3089 3090 ret = remove_queue_mes(dqm, q, qpd); 3091 if (ret) { 3092 dev_err(dev, "Removing bad queue failed"); 3093 goto out; 3094 } 3095 3096 ret = resume_all_queues_mes(dqm); 3097 if (ret) 3098 dev_err(dev, "Resuming all queues failed"); 3099 3100 break; 3101 } 3102 } 3103 } 3104 3105 out: 3106 dqm_unlock(dqm); 3107 kfd_unref_process(p); 3108 return ret; 3109 } 3110 3111 int kfd_evict_process_device(struct kfd_process_device *pdd) 3112 { 3113 struct device_queue_manager *dqm; 3114 struct kfd_process *p; 3115 3116 p = pdd->process; 3117 dqm = pdd->dev->dqm; 3118 3119 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 3120 3121 return dqm->ops.evict_process_queues(dqm, &pdd->qpd); 3122 } 3123 3124 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 3125 struct qcm_process_device *qpd) 3126 { 3127 int r; 3128 struct device *dev = dqm->dev->adev->dev; 3129 int updated_vmid_mask; 3130 3131 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3132 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3133 return -EINVAL; 3134 } 3135 3136 dqm_lock(dqm); 3137 3138 if (dqm->trap_debug_vmid != 0) { 3139 dev_err(dev, "Trap debug id already reserved\n"); 3140 r = -EBUSY; 3141 goto out_unlock; 3142 } 3143 3144 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3145 USE_DEFAULT_GRACE_PERIOD, false); 3146 if (r) 3147 goto out_unlock; 3148 3149 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3150 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 3151 3152 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3153 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 3154 r = set_sched_resources(dqm); 3155 if (r) 3156 goto out_unlock; 3157 3158 r = map_queues_cpsch(dqm); 3159 if (r) 3160 goto out_unlock; 3161 3162 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 3163 3164 out_unlock: 3165 dqm_unlock(dqm); 3166 return r; 3167 } 3168 3169 /* 3170 * Releases vmid for the trap debugger 3171 */ 3172 int release_debug_trap_vmid(struct device_queue_manager *dqm, 3173 struct qcm_process_device *qpd) 3174 { 3175 struct device *dev = dqm->dev->adev->dev; 3176 int r; 3177 int updated_vmid_mask; 3178 uint32_t trap_debug_vmid; 3179 3180 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3181 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3182 return -EINVAL; 3183 } 3184 3185 dqm_lock(dqm); 3186 trap_debug_vmid = dqm->trap_debug_vmid; 3187 if (dqm->trap_debug_vmid == 0) { 3188 dev_err(dev, "Trap debug id is not reserved\n"); 3189 r = -EINVAL; 3190 goto out_unlock; 3191 } 3192 3193 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3194 USE_DEFAULT_GRACE_PERIOD, false); 3195 if (r) 3196 goto out_unlock; 3197 3198 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3199 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 3200 3201 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3202 dqm->trap_debug_vmid = 0; 3203 r = set_sched_resources(dqm); 3204 if (r) 3205 goto out_unlock; 3206 3207 r = map_queues_cpsch(dqm); 3208 if (r) 3209 goto out_unlock; 3210 3211 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 3212 3213 out_unlock: 3214 dqm_unlock(dqm); 3215 return r; 3216 } 3217 3218 #define QUEUE_NOT_FOUND -1 3219 /* invalidate queue operation in array */ 3220 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 3221 { 3222 int i; 3223 3224 for (i = 0; i < num_queues; i++) 3225 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 3226 } 3227 3228 /* find queue index in array */ 3229 static int q_array_get_index(unsigned int queue_id, 3230 uint32_t num_queues, 3231 uint32_t *queue_ids) 3232 { 3233 int i; 3234 3235 for (i = 0; i < num_queues; i++) 3236 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 3237 return i; 3238 3239 return QUEUE_NOT_FOUND; 3240 } 3241 3242 struct copy_context_work_handler_workarea { 3243 struct work_struct copy_context_work; 3244 struct kfd_process *p; 3245 }; 3246 3247 static void copy_context_work_handler(struct work_struct *work) 3248 { 3249 struct copy_context_work_handler_workarea *workarea; 3250 struct mqd_manager *mqd_mgr; 3251 struct queue *q; 3252 struct mm_struct *mm; 3253 struct kfd_process *p; 3254 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 3255 int i; 3256 3257 workarea = container_of(work, 3258 struct copy_context_work_handler_workarea, 3259 copy_context_work); 3260 3261 p = workarea->p; 3262 mm = get_task_mm(p->lead_thread); 3263 3264 if (!mm) 3265 return; 3266 3267 kthread_use_mm(mm); 3268 for (i = 0; i < p->n_pdds; i++) { 3269 struct kfd_process_device *pdd = p->pdds[i]; 3270 struct device_queue_manager *dqm = pdd->dev->dqm; 3271 struct qcm_process_device *qpd = &pdd->qpd; 3272 3273 list_for_each_entry(q, &qpd->queues_list, list) { 3274 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE) 3275 continue; 3276 3277 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 3278 3279 /* We ignore the return value from get_wave_state 3280 * because 3281 * i) right now, it always returns 0, and 3282 * ii) if we hit an error, we would continue to the 3283 * next queue anyway. 3284 */ 3285 mqd_mgr->get_wave_state(mqd_mgr, 3286 q->mqd, 3287 &q->properties, 3288 (void __user *) q->properties.ctx_save_restore_area_address, 3289 &tmp_ctl_stack_used_size, 3290 &tmp_save_area_used_size); 3291 } 3292 } 3293 kthread_unuse_mm(mm); 3294 mmput(mm); 3295 } 3296 3297 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 3298 { 3299 size_t array_size = num_queues * sizeof(uint32_t); 3300 3301 if (!usr_queue_id_array) 3302 return NULL; 3303 3304 return memdup_user(usr_queue_id_array, array_size); 3305 } 3306 3307 int resume_queues(struct kfd_process *p, 3308 uint32_t num_queues, 3309 uint32_t *usr_queue_id_array) 3310 { 3311 uint32_t *queue_ids = NULL; 3312 int total_resumed = 0; 3313 int i; 3314 3315 if (usr_queue_id_array) { 3316 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3317 3318 if (IS_ERR(queue_ids)) 3319 return PTR_ERR(queue_ids); 3320 3321 /* mask all queues as invalid. unmask per successful request */ 3322 q_array_invalidate(num_queues, queue_ids); 3323 } 3324 3325 for (i = 0; i < p->n_pdds; i++) { 3326 struct kfd_process_device *pdd = p->pdds[i]; 3327 struct device_queue_manager *dqm = pdd->dev->dqm; 3328 struct device *dev = dqm->dev->adev->dev; 3329 struct qcm_process_device *qpd = &pdd->qpd; 3330 struct queue *q; 3331 int r, per_device_resumed = 0; 3332 3333 dqm_lock(dqm); 3334 3335 /* unmask queues that resume or already resumed as valid */ 3336 list_for_each_entry(q, &qpd->queues_list, list) { 3337 int q_idx = QUEUE_NOT_FOUND; 3338 3339 if (queue_ids) 3340 q_idx = q_array_get_index( 3341 q->properties.queue_id, 3342 num_queues, 3343 queue_ids); 3344 3345 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 3346 int err = resume_single_queue(dqm, &pdd->qpd, q); 3347 3348 if (queue_ids) { 3349 if (!err) { 3350 queue_ids[q_idx] &= 3351 ~KFD_DBG_QUEUE_INVALID_MASK; 3352 } else { 3353 queue_ids[q_idx] |= 3354 KFD_DBG_QUEUE_ERROR_MASK; 3355 break; 3356 } 3357 } 3358 3359 if (dqm->dev->kfd->shared_resources.enable_mes) { 3360 wake_up_all(&dqm->destroy_wait); 3361 if (!err) 3362 total_resumed++; 3363 } else { 3364 per_device_resumed++; 3365 } 3366 } 3367 } 3368 3369 if (!per_device_resumed) { 3370 dqm_unlock(dqm); 3371 continue; 3372 } 3373 3374 r = execute_queues_cpsch(dqm, 3375 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3376 0, 3377 USE_DEFAULT_GRACE_PERIOD); 3378 if (r) { 3379 dev_err(dev, "Failed to resume process queues\n"); 3380 if (queue_ids) { 3381 list_for_each_entry(q, &qpd->queues_list, list) { 3382 int q_idx = q_array_get_index( 3383 q->properties.queue_id, 3384 num_queues, 3385 queue_ids); 3386 3387 /* mask queue as error on resume fail */ 3388 if (q_idx != QUEUE_NOT_FOUND) 3389 queue_ids[q_idx] |= 3390 KFD_DBG_QUEUE_ERROR_MASK; 3391 } 3392 } 3393 } else { 3394 wake_up_all(&dqm->destroy_wait); 3395 total_resumed += per_device_resumed; 3396 } 3397 3398 dqm_unlock(dqm); 3399 } 3400 3401 if (queue_ids) { 3402 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3403 num_queues * sizeof(uint32_t))) 3404 pr_err("copy_to_user failed on queue resume\n"); 3405 3406 kfree(queue_ids); 3407 } 3408 3409 return total_resumed; 3410 } 3411 3412 int suspend_queues(struct kfd_process *p, 3413 uint32_t num_queues, 3414 uint32_t grace_period, 3415 uint64_t exception_clear_mask, 3416 uint32_t *usr_queue_id_array) 3417 { 3418 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3419 int total_suspended = 0; 3420 int i; 3421 3422 if (IS_ERR(queue_ids)) 3423 return PTR_ERR(queue_ids); 3424 3425 /* mask all queues as invalid. umask on successful request */ 3426 q_array_invalidate(num_queues, queue_ids); 3427 3428 for (i = 0; i < p->n_pdds; i++) { 3429 struct kfd_process_device *pdd = p->pdds[i]; 3430 struct device_queue_manager *dqm = pdd->dev->dqm; 3431 struct device *dev = dqm->dev->adev->dev; 3432 struct qcm_process_device *qpd = &pdd->qpd; 3433 struct queue *q; 3434 int r, per_device_suspended = 0; 3435 3436 mutex_lock(&p->event_mutex); 3437 dqm_lock(dqm); 3438 3439 /* unmask queues that suspend or already suspended */ 3440 list_for_each_entry(q, &qpd->queues_list, list) { 3441 int q_idx = q_array_get_index(q->properties.queue_id, 3442 num_queues, 3443 queue_ids); 3444 3445 if (q_idx != QUEUE_NOT_FOUND) { 3446 int err = suspend_single_queue(dqm, pdd, q); 3447 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 3448 3449 if (!err) { 3450 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 3451 if (exception_clear_mask && is_mes) 3452 q->properties.exception_status &= 3453 ~exception_clear_mask; 3454 3455 if (is_mes) 3456 total_suspended++; 3457 else 3458 per_device_suspended++; 3459 } else if (err != -EBUSY) { 3460 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3461 break; 3462 } 3463 } 3464 } 3465 3466 if (!per_device_suspended) { 3467 dqm_unlock(dqm); 3468 mutex_unlock(&p->event_mutex); 3469 if (total_suspended) 3470 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3471 continue; 3472 } 3473 3474 r = execute_queues_cpsch(dqm, 3475 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3476 grace_period); 3477 3478 if (r) 3479 dev_err(dev, "Failed to suspend process queues.\n"); 3480 else 3481 total_suspended += per_device_suspended; 3482 3483 list_for_each_entry(q, &qpd->queues_list, list) { 3484 int q_idx = q_array_get_index(q->properties.queue_id, 3485 num_queues, queue_ids); 3486 3487 if (q_idx == QUEUE_NOT_FOUND) 3488 continue; 3489 3490 /* mask queue as error on suspend fail */ 3491 if (r) 3492 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3493 else if (exception_clear_mask) 3494 q->properties.exception_status &= 3495 ~exception_clear_mask; 3496 } 3497 3498 dqm_unlock(dqm); 3499 mutex_unlock(&p->event_mutex); 3500 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3501 } 3502 3503 if (total_suspended) { 3504 struct copy_context_work_handler_workarea copy_context_worker; 3505 3506 INIT_WORK_ONSTACK( 3507 ©_context_worker.copy_context_work, 3508 copy_context_work_handler); 3509 3510 copy_context_worker.p = p; 3511 3512 schedule_work(©_context_worker.copy_context_work); 3513 3514 3515 flush_work(©_context_worker.copy_context_work); 3516 destroy_work_on_stack(©_context_worker.copy_context_work); 3517 } 3518 3519 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3520 num_queues * sizeof(uint32_t))) 3521 pr_err("copy_to_user failed on queue suspend\n"); 3522 3523 kfree(queue_ids); 3524 3525 return total_suspended; 3526 } 3527 3528 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3529 { 3530 switch (q_props->type) { 3531 case KFD_QUEUE_TYPE_COMPUTE: 3532 return q_props->format == KFD_QUEUE_FORMAT_PM4 3533 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3534 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3535 case KFD_QUEUE_TYPE_SDMA: 3536 return KFD_IOC_QUEUE_TYPE_SDMA; 3537 case KFD_QUEUE_TYPE_SDMA_XGMI: 3538 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3539 default: 3540 WARN_ONCE(true, "queue type not recognized!"); 3541 return 0xffffffff; 3542 }; 3543 } 3544 3545 void set_queue_snapshot_entry(struct queue *q, 3546 uint64_t exception_clear_mask, 3547 struct kfd_queue_snapshot_entry *qss_entry) 3548 { 3549 qss_entry->ring_base_address = q->properties.queue_address; 3550 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3551 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3552 qss_entry->ctx_save_restore_address = 3553 q->properties.ctx_save_restore_area_address; 3554 qss_entry->ctx_save_restore_area_size = 3555 q->properties.ctx_save_restore_area_size; 3556 qss_entry->exception_status = q->properties.exception_status; 3557 qss_entry->queue_id = q->properties.queue_id; 3558 qss_entry->gpu_id = q->device->id; 3559 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3560 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3561 q->properties.exception_status &= ~exception_clear_mask; 3562 } 3563 3564 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3565 { 3566 struct device *dev = dqm->dev->adev->dev; 3567 int r; 3568 3569 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3570 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3571 return -EINVAL; 3572 } 3573 3574 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3575 return 0; 3576 3577 dqm_lock(dqm); 3578 3579 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3580 if (r) 3581 dqm_unlock(dqm); 3582 3583 return r; 3584 } 3585 3586 int debug_map_and_unlock(struct device_queue_manager *dqm) 3587 { 3588 struct device *dev = dqm->dev->adev->dev; 3589 int r; 3590 3591 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3592 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3593 return -EINVAL; 3594 } 3595 3596 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3597 return 0; 3598 3599 r = map_queues_cpsch(dqm); 3600 3601 dqm_unlock(dqm); 3602 3603 return r; 3604 } 3605 3606 int debug_refresh_runlist(struct device_queue_manager *dqm) 3607 { 3608 int r = debug_lock_and_unmap(dqm); 3609 3610 if (r) 3611 return r; 3612 3613 return debug_map_and_unlock(dqm); 3614 } 3615 3616 bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 3617 struct qcm_process_device *qpd, 3618 int doorbell_off, u32 *queue_format) 3619 { 3620 struct queue *q; 3621 bool r = false; 3622 3623 if (!queue_format) 3624 return r; 3625 3626 dqm_lock(dqm); 3627 3628 list_for_each_entry(q, &qpd->queues_list, list) { 3629 if (q->properties.doorbell_off == doorbell_off) { 3630 *queue_format = q->properties.format; 3631 r = true; 3632 goto out; 3633 } 3634 } 3635 3636 out: 3637 dqm_unlock(dqm); 3638 return r; 3639 } 3640 #if defined(CONFIG_DEBUG_FS) 3641 3642 static void seq_reg_dump(struct seq_file *m, 3643 uint32_t (*dump)[2], uint32_t n_regs) 3644 { 3645 uint32_t i, count; 3646 3647 for (i = 0, count = 0; i < n_regs; i++) { 3648 if (count == 0 || 3649 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3650 seq_printf(m, "%s %08x: %08x", 3651 i ? "\n" : "", 3652 dump[i][0], dump[i][1]); 3653 count = 7; 3654 } else { 3655 seq_printf(m, " %08x", dump[i][1]); 3656 count--; 3657 } 3658 } 3659 3660 seq_puts(m, "\n"); 3661 } 3662 3663 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3664 { 3665 struct device_queue_manager *dqm = data; 3666 uint32_t xcc_mask = dqm->dev->xcc_mask; 3667 uint32_t (*dump)[2], n_regs; 3668 int pipe, queue; 3669 int r = 0, xcc_id; 3670 uint32_t sdma_engine_start; 3671 3672 if (!dqm->sched_running) { 3673 seq_puts(m, " Device is stopped\n"); 3674 return 0; 3675 } 3676 3677 for_each_inst(xcc_id, xcc_mask) { 3678 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3679 KFD_CIK_HIQ_PIPE, 3680 KFD_CIK_HIQ_QUEUE, &dump, 3681 &n_regs, xcc_id); 3682 if (!r) { 3683 seq_printf( 3684 m, 3685 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3686 xcc_id, 3687 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3688 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3689 KFD_CIK_HIQ_QUEUE); 3690 seq_reg_dump(m, dump, n_regs); 3691 3692 kfree(dump); 3693 } 3694 3695 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3696 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3697 3698 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3699 if (!test_bit(pipe_offset + queue, 3700 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3701 continue; 3702 3703 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3704 pipe, queue, 3705 &dump, &n_regs, 3706 xcc_id); 3707 if (r) 3708 break; 3709 3710 seq_printf(m, 3711 " Inst %d, CP Pipe %d, Queue %d\n", 3712 xcc_id, pipe, queue); 3713 seq_reg_dump(m, dump, n_regs); 3714 3715 kfree(dump); 3716 } 3717 } 3718 } 3719 3720 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3721 for (pipe = sdma_engine_start; 3722 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3723 pipe++) { 3724 for (queue = 0; 3725 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3726 queue++) { 3727 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3728 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3729 if (r) 3730 break; 3731 3732 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3733 pipe, queue); 3734 seq_reg_dump(m, dump, n_regs); 3735 3736 kfree(dump); 3737 } 3738 } 3739 3740 return r; 3741 } 3742 3743 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3744 { 3745 int r = 0; 3746 3747 dqm_lock(dqm); 3748 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3749 if (r) { 3750 dqm_unlock(dqm); 3751 return r; 3752 } 3753 dqm->active_runlist = true; 3754 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3755 0, USE_DEFAULT_GRACE_PERIOD); 3756 dqm_unlock(dqm); 3757 3758 return r; 3759 } 3760 3761 #endif 3762