1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "amdgpu_sdma.h" 40 #include "mes_v11_api_def.h" 41 #include "kfd_debug.h" 42 43 /* Size of the per-pipe EOP queue */ 44 #define CIK_HPD_EOP_BYTES_LOG2 11 45 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 46 /* See unmap_queues_cpsch() */ 47 #define USE_DEFAULT_GRACE_PERIOD 0xffffffff 48 49 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 50 u32 pasid, unsigned int vmid); 51 52 static int execute_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param, 55 uint32_t grace_period); 56 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 57 enum kfd_unmap_queues_filter filter, 58 uint32_t filter_param, 59 uint32_t grace_period, 60 bool reset); 61 62 static int map_queues_cpsch(struct device_queue_manager *dqm); 63 64 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 65 struct queue *q); 66 67 static inline void deallocate_hqd(struct device_queue_manager *dqm, 68 struct queue *q); 69 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 70 static int allocate_sdma_queue(struct device_queue_manager *dqm, 71 struct queue *q, const uint32_t *restore_sdma_id); 72 73 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma); 74 75 static inline 76 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 77 { 78 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 79 return KFD_MQD_TYPE_SDMA; 80 return KFD_MQD_TYPE_CP; 81 } 82 83 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 84 { 85 int i; 86 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 87 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 88 89 /* queue is available for KFD usage if bit is 1 */ 90 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 91 if (test_bit(pipe_offset + i, 92 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 93 return true; 94 return false; 95 } 96 97 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 98 { 99 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 100 AMDGPU_MAX_QUEUES); 101 } 102 103 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 104 { 105 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 106 } 107 108 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 109 { 110 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 111 } 112 113 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 114 { 115 return kfd_get_num_sdma_engines(dqm->dev) + 116 kfd_get_num_xgmi_sdma_engines(dqm->dev); 117 } 118 119 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 120 { 121 return kfd_get_num_sdma_engines(dqm->dev) * 122 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 123 } 124 125 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 126 { 127 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 128 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 129 } 130 131 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 132 { 133 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 134 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 135 136 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 137 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 138 139 /* Mask out the reserved queues */ 140 bitmap_clear(dqm->sdma_bitmap, 0, kfd_get_num_sdma_engines(dqm->dev) * 141 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 142 bitmap_clear(dqm->xgmi_sdma_bitmap, 0, kfd_get_num_xgmi_sdma_engines(dqm->dev) * 143 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 144 } 145 146 void program_sh_mem_settings(struct device_queue_manager *dqm, 147 struct qcm_process_device *qpd) 148 { 149 uint32_t xcc_mask = dqm->dev->xcc_mask; 150 int xcc_id; 151 152 for_each_inst(xcc_id, xcc_mask) 153 dqm->dev->kfd2kgd->program_sh_mem_settings( 154 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 155 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 156 qpd->sh_mem_bases, xcc_id); 157 } 158 159 static void kfd_hws_hang(struct device_queue_manager *dqm) 160 { 161 struct device_process_node *cur; 162 struct qcm_process_device *qpd; 163 struct queue *q; 164 165 /* Mark all device queues as reset. */ 166 list_for_each_entry(cur, &dqm->queues, list) { 167 qpd = cur->qpd; 168 list_for_each_entry(q, &qpd->queues_list, list) { 169 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 170 171 pdd->has_reset_queue = true; 172 } 173 } 174 175 /* 176 * Issue a GPU reset if HWS is unresponsive 177 */ 178 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 179 } 180 181 static int convert_to_mes_queue_type(int queue_type) 182 { 183 int mes_queue_type; 184 185 switch (queue_type) { 186 case KFD_QUEUE_TYPE_COMPUTE: 187 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 188 break; 189 case KFD_QUEUE_TYPE_SDMA: 190 mes_queue_type = MES_QUEUE_TYPE_SDMA; 191 break; 192 default: 193 WARN(1, "Invalid queue type %d", queue_type); 194 mes_queue_type = -EINVAL; 195 break; 196 } 197 198 return mes_queue_type; 199 } 200 201 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 202 struct qcm_process_device *qpd) 203 { 204 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 205 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 206 struct mes_add_queue_input queue_input; 207 int r, queue_type; 208 uint64_t wptr_addr_off; 209 210 if (!dqm->sched_running || dqm->sched_halt) 211 return 0; 212 if (!down_read_trylock(&adev->reset_domain->sem)) 213 return -EIO; 214 215 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 216 queue_input.process_id = pdd->pasid; 217 queue_input.page_table_base_addr = qpd->page_table_base; 218 queue_input.process_va_start = 0; 219 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 220 /* MES unit for quantum is 100ns */ 221 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 222 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 223 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 224 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 225 queue_input.inprocess_gang_priority = q->properties.priority; 226 queue_input.gang_global_priority_level = 227 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 228 queue_input.doorbell_offset = q->properties.doorbell_off; 229 queue_input.mqd_addr = q->gart_mqd_addr; 230 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 231 232 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 233 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 234 235 queue_input.is_kfd_process = 1; 236 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 237 queue_input.queue_size = q->properties.queue_size >> 2; 238 239 queue_input.paging = false; 240 queue_input.tba_addr = qpd->tba_addr; 241 queue_input.tma_addr = qpd->tma_addr; 242 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 243 queue_input.skip_process_ctx_clear = 244 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 245 (qpd->pqm->process->debug_trap_enabled || 246 kfd_dbg_has_ttmps_always_setup(q->device)); 247 248 queue_type = convert_to_mes_queue_type(q->properties.type); 249 if (queue_type < 0) { 250 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 251 q->properties.type); 252 up_read(&adev->reset_domain->sem); 253 return -EINVAL; 254 } 255 queue_input.queue_type = (uint32_t)queue_type; 256 257 queue_input.exclusively_scheduled = q->properties.is_gws; 258 queue_input.sh_mem_config_data = qpd->sh_mem_config; 259 queue_input.vm_cntx_cntl = qpd->vm_cntx_cntl; 260 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 261 262 amdgpu_mes_lock(&adev->mes); 263 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 264 amdgpu_mes_unlock(&adev->mes); 265 up_read(&adev->reset_domain->sem); 266 if (r) { 267 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 268 q->properties.doorbell_off); 269 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 270 kfd_hws_hang(dqm); 271 } 272 273 return r; 274 } 275 276 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 277 struct qcm_process_device *qpd) 278 { 279 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 280 int r; 281 struct mes_remove_queue_input queue_input; 282 283 if (!dqm->sched_running || dqm->sched_halt) 284 return 0; 285 if (!down_read_trylock(&adev->reset_domain->sem)) 286 return -EIO; 287 288 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 289 queue_input.doorbell_offset = q->properties.doorbell_off; 290 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 291 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 292 293 amdgpu_mes_lock(&adev->mes); 294 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 295 amdgpu_mes_unlock(&adev->mes); 296 up_read(&adev->reset_domain->sem); 297 298 if (r) { 299 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 300 q->properties.doorbell_off); 301 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 302 kfd_hws_hang(dqm); 303 } 304 305 return r; 306 } 307 308 static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm) 309 { 310 struct device_process_node *cur; 311 struct device *dev = dqm->dev->adev->dev; 312 struct qcm_process_device *qpd; 313 struct queue *q; 314 int retval = 0; 315 316 list_for_each_entry(cur, &dqm->queues, list) { 317 qpd = cur->qpd; 318 list_for_each_entry(q, &qpd->queues_list, list) { 319 if (q->properties.is_active) { 320 retval = remove_queue_mes(dqm, q, qpd); 321 if (retval) { 322 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 323 __func__, 324 q->properties.queue_id, 325 dqm->dev->id); 326 return retval; 327 } 328 } 329 } 330 } 331 332 return retval; 333 } 334 335 static int add_all_kfd_queues_mes(struct device_queue_manager *dqm) 336 { 337 struct device_process_node *cur; 338 struct device *dev = dqm->dev->adev->dev; 339 struct qcm_process_device *qpd; 340 struct queue *q; 341 int retval = 0; 342 343 list_for_each_entry(cur, &dqm->queues, list) { 344 qpd = cur->qpd; 345 list_for_each_entry(q, &qpd->queues_list, list) { 346 if (!q->properties.is_active) 347 continue; 348 retval = add_queue_mes(dqm, q, qpd); 349 if (retval) { 350 dev_err(dev, "%s: Failed to add queue %d for dev %d", 351 __func__, 352 q->properties.queue_id, 353 dqm->dev->id); 354 return retval; 355 } 356 } 357 } 358 359 return retval; 360 } 361 362 static int suspend_all_queues_mes(struct device_queue_manager *dqm) 363 { 364 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 365 int r = 0; 366 367 if (!down_read_trylock(&adev->reset_domain->sem)) 368 return -EIO; 369 370 r = amdgpu_mes_suspend(adev); 371 up_read(&adev->reset_domain->sem); 372 373 if (r) { 374 dev_err(adev->dev, "failed to suspend gangs from MES\n"); 375 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 376 kfd_hws_hang(dqm); 377 } 378 379 return r; 380 } 381 382 static int resume_all_queues_mes(struct device_queue_manager *dqm) 383 { 384 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 385 int r = 0; 386 387 if (!down_read_trylock(&adev->reset_domain->sem)) 388 return -EIO; 389 390 r = amdgpu_mes_resume(adev); 391 up_read(&adev->reset_domain->sem); 392 393 if (r) { 394 dev_err(adev->dev, "failed to resume gangs from MES\n"); 395 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 396 kfd_hws_hang(dqm); 397 } 398 399 return r; 400 } 401 402 static void increment_queue_count(struct device_queue_manager *dqm, 403 struct qcm_process_device *qpd, 404 struct queue *q) 405 { 406 dqm->active_queue_count++; 407 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 408 dqm->active_cp_queue_count++; 409 410 if (q->properties.is_gws) { 411 dqm->gws_queue_count++; 412 qpd->mapped_gws_queue = true; 413 } 414 } 415 416 static void decrement_queue_count(struct device_queue_manager *dqm, 417 struct qcm_process_device *qpd, 418 struct queue *q) 419 { 420 dqm->active_queue_count--; 421 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 422 dqm->active_cp_queue_count--; 423 424 if (q->properties.is_gws) { 425 dqm->gws_queue_count--; 426 qpd->mapped_gws_queue = false; 427 } 428 } 429 430 /* 431 * Allocate a doorbell ID to this queue. 432 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 433 */ 434 static int allocate_doorbell(struct qcm_process_device *qpd, 435 struct queue *q, 436 uint32_t const *restore_id) 437 { 438 struct kfd_node *dev = qpd->dqm->dev; 439 440 if (!KFD_IS_SOC15(dev)) { 441 /* On pre-SOC15 chips we need to use the queue ID to 442 * preserve the user mode ABI. 443 */ 444 445 if (restore_id && *restore_id != q->properties.queue_id) 446 return -EINVAL; 447 448 q->doorbell_id = q->properties.queue_id; 449 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 450 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 451 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 452 * doorbell assignments based on the engine and queue id. 453 * The doobell index distance between RLC (2*i) and (2*i+1) 454 * for a SDMA engine is 512. 455 */ 456 457 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 458 459 /* 460 * q->properties.sdma_engine_id corresponds to the virtual 461 * sdma engine number. However, for doorbell allocation, 462 * we need the physical sdma engine id in order to get the 463 * correct doorbell offset. 464 */ 465 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 466 get_num_all_sdma_engines(qpd->dqm) + 467 q->properties.sdma_engine_id] 468 + (q->properties.sdma_queue_id & 1) 469 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 470 + (q->properties.sdma_queue_id >> 1); 471 472 if (restore_id && *restore_id != valid_id) 473 return -EINVAL; 474 q->doorbell_id = valid_id; 475 } else { 476 /* For CP queues on SOC15 */ 477 if (restore_id) { 478 if (*restore_id >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 479 return -EINVAL; 480 481 /* make sure that ID is free */ 482 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 483 return -EINVAL; 484 485 q->doorbell_id = *restore_id; 486 } else { 487 /* or reserve a free doorbell ID */ 488 unsigned int found; 489 490 found = find_first_zero_bit(qpd->doorbell_bitmap, 491 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 492 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 493 pr_debug("No doorbells available"); 494 return -EBUSY; 495 } 496 set_bit(found, qpd->doorbell_bitmap); 497 q->doorbell_id = found; 498 } 499 } 500 501 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 502 qpd->proc_doorbells, 503 q->doorbell_id, 504 dev->kfd->device_info.doorbell_size); 505 return 0; 506 } 507 508 static void deallocate_doorbell(struct qcm_process_device *qpd, 509 struct queue *q) 510 { 511 unsigned int old; 512 struct kfd_node *dev = qpd->dqm->dev; 513 514 if (!KFD_IS_SOC15(dev) || 515 q->properties.type == KFD_QUEUE_TYPE_SDMA || 516 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 517 return; 518 519 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 520 WARN_ON(!old); 521 } 522 523 static void program_trap_handler_settings(struct device_queue_manager *dqm, 524 struct qcm_process_device *qpd) 525 { 526 uint32_t xcc_mask = dqm->dev->xcc_mask; 527 int xcc_id; 528 529 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 530 for_each_inst(xcc_id, xcc_mask) 531 dqm->dev->kfd2kgd->program_trap_handler_settings( 532 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 533 qpd->tma_addr, xcc_id); 534 } 535 536 static int allocate_vmid(struct device_queue_manager *dqm, 537 struct qcm_process_device *qpd, 538 struct queue *q) 539 { 540 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 541 struct device *dev = dqm->dev->adev->dev; 542 int allocated_vmid = -1, i; 543 544 for (i = dqm->dev->vm_info.first_vmid_kfd; 545 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 546 if (!dqm->vmid_pasid[i]) { 547 allocated_vmid = i; 548 break; 549 } 550 } 551 552 if (allocated_vmid < 0) { 553 dev_err(dev, "no more vmid to allocate\n"); 554 return -ENOSPC; 555 } 556 557 pr_debug("vmid allocated: %d\n", allocated_vmid); 558 559 dqm->vmid_pasid[allocated_vmid] = pdd->pasid; 560 561 set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid); 562 563 qpd->vmid = allocated_vmid; 564 q->properties.vmid = allocated_vmid; 565 566 program_sh_mem_settings(dqm, qpd); 567 568 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 569 program_trap_handler_settings(dqm, qpd); 570 571 /* qpd->page_table_base is set earlier when register_process() 572 * is called, i.e. when the first queue is created. 573 */ 574 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 575 qpd->vmid, 576 qpd->page_table_base); 577 /* invalidate the VM context after pasid and vmid mapping is set up */ 578 kfd_flush_tlb(qpd_to_pdd(qpd)); 579 580 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 581 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 582 qpd->sh_hidden_private_base, qpd->vmid); 583 584 return 0; 585 } 586 587 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 588 struct qcm_process_device *qpd) 589 { 590 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 591 int ret; 592 593 if (!qpd->ib_kaddr) 594 return -ENOMEM; 595 596 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 597 if (ret) 598 return ret; 599 600 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 601 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 602 pmf->release_mem_size / sizeof(uint32_t)); 603 } 604 605 static void deallocate_vmid(struct device_queue_manager *dqm, 606 struct qcm_process_device *qpd, 607 struct queue *q) 608 { 609 struct device *dev = dqm->dev->adev->dev; 610 611 /* On GFX v7, CP doesn't flush TC at dequeue */ 612 if (q->device->adev->asic_type == CHIP_HAWAII) 613 if (flush_texture_cache_nocpsch(q->device, qpd)) 614 dev_err(dev, "Failed to flush TC\n"); 615 616 kfd_flush_tlb(qpd_to_pdd(qpd)); 617 618 /* Release the vmid mapping */ 619 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 620 dqm->vmid_pasid[qpd->vmid] = 0; 621 622 qpd->vmid = 0; 623 q->properties.vmid = 0; 624 } 625 626 static int create_queue_nocpsch(struct device_queue_manager *dqm, 627 struct queue *q, 628 struct qcm_process_device *qpd, 629 const struct kfd_criu_queue_priv_data *qd, 630 const void *restore_mqd, const void *restore_ctl_stack) 631 { 632 struct mqd_manager *mqd_mgr; 633 int retval; 634 635 dqm_lock(dqm); 636 637 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 638 pr_warn("Can't create new usermode queue because %d queues were already created\n", 639 dqm->total_queue_count); 640 retval = -EPERM; 641 goto out_unlock; 642 } 643 644 if (list_empty(&qpd->queues_list)) { 645 retval = allocate_vmid(dqm, qpd, q); 646 if (retval) 647 goto out_unlock; 648 } 649 q->properties.vmid = qpd->vmid; 650 /* 651 * Eviction state logic: mark all queues as evicted, even ones 652 * not currently active. Restoring inactive queues later only 653 * updates the is_evicted flag but is a no-op otherwise. 654 */ 655 q->properties.is_evicted = !!qpd->evicted; 656 657 q->properties.tba_addr = qpd->tba_addr; 658 q->properties.tma_addr = qpd->tma_addr; 659 660 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 661 q->properties.type)]; 662 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 663 retval = allocate_hqd(dqm, q); 664 if (retval) 665 goto deallocate_vmid; 666 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 667 q->pipe, q->queue); 668 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 669 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 670 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 671 if (retval) 672 goto deallocate_vmid; 673 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 674 } 675 676 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 677 if (retval) 678 goto out_deallocate_hqd; 679 680 /* Temporarily release dqm lock to avoid a circular lock dependency */ 681 dqm_unlock(dqm); 682 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 683 dqm_lock(dqm); 684 685 if (!q->mqd_mem_obj) { 686 retval = -ENOMEM; 687 goto out_deallocate_doorbell; 688 } 689 690 if (qd) 691 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 692 &q->properties, restore_mqd, restore_ctl_stack, 693 qd->ctl_stack_size); 694 else 695 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 696 &q->gart_mqd_addr, &q->properties); 697 698 if (q->properties.is_active) { 699 if (!dqm->sched_running) { 700 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 701 goto add_queue_to_list; 702 } 703 704 if (WARN(q->process->mm != current->mm, 705 "should only run in user thread")) 706 retval = -EFAULT; 707 else 708 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 709 q->queue, &q->properties, current->mm); 710 if (retval) 711 goto out_free_mqd; 712 } 713 714 add_queue_to_list: 715 list_add(&q->list, &qpd->queues_list); 716 qpd->queue_count++; 717 if (q->properties.is_active) 718 increment_queue_count(dqm, qpd, q); 719 720 /* 721 * Unconditionally increment this counter, regardless of the queue's 722 * type or whether the queue is active. 723 */ 724 dqm->total_queue_count++; 725 pr_debug("Total of %d queues are accountable so far\n", 726 dqm->total_queue_count); 727 goto out_unlock; 728 729 out_free_mqd: 730 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 731 out_deallocate_doorbell: 732 deallocate_doorbell(qpd, q); 733 out_deallocate_hqd: 734 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 735 deallocate_hqd(dqm, q); 736 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 737 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 738 deallocate_sdma_queue(dqm, q); 739 deallocate_vmid: 740 if (list_empty(&qpd->queues_list)) 741 deallocate_vmid(dqm, qpd, q); 742 out_unlock: 743 dqm_unlock(dqm); 744 return retval; 745 } 746 747 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 748 { 749 bool set; 750 int pipe, bit, i; 751 752 set = false; 753 754 for (pipe = dqm->next_pipe_to_allocate, i = 0; 755 i < get_pipes_per_mec(dqm); 756 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 757 758 if (!is_pipe_enabled(dqm, 0, pipe)) 759 continue; 760 761 if (dqm->allocated_queues[pipe] != 0) { 762 bit = ffs(dqm->allocated_queues[pipe]) - 1; 763 dqm->allocated_queues[pipe] &= ~(1 << bit); 764 q->pipe = pipe; 765 q->queue = bit; 766 set = true; 767 break; 768 } 769 } 770 771 if (!set) 772 return -EBUSY; 773 774 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 775 /* horizontal hqd allocation */ 776 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 777 778 return 0; 779 } 780 781 static inline void deallocate_hqd(struct device_queue_manager *dqm, 782 struct queue *q) 783 { 784 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 785 } 786 787 #define SQ_IND_CMD_CMD_KILL 0x00000003 788 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 789 790 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 791 { 792 int status = 0; 793 unsigned int vmid; 794 uint16_t queried_pasid; 795 union SQ_CMD_BITS reg_sq_cmd; 796 union GRBM_GFX_INDEX_BITS reg_gfx_index; 797 struct kfd_process_device *pdd; 798 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 799 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 800 uint32_t xcc_mask = dev->xcc_mask; 801 int xcc_id; 802 803 reg_sq_cmd.u32All = 0; 804 reg_gfx_index.u32All = 0; 805 806 pr_debug("Killing all process wavefronts\n"); 807 808 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 809 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 810 return -EOPNOTSUPP; 811 } 812 813 /* taking the VMID for that process on the safe way using PDD */ 814 pdd = kfd_get_process_device_data(dev, p); 815 if (!pdd) 816 return -EFAULT; 817 818 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 819 * ATC_VMID15_PASID_MAPPING 820 * to check which VMID the current process is mapped to. 821 */ 822 823 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 824 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 825 (dev->adev, vmid, &queried_pasid); 826 827 if (status && queried_pasid == pdd->pasid) { 828 pr_debug("Killing wave fronts of vmid %d and process pid %d\n", 829 vmid, p->lead_thread->pid); 830 break; 831 } 832 } 833 834 if (vmid > last_vmid_to_scan) { 835 dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n", 836 p->lead_thread->pid); 837 return -EFAULT; 838 } 839 840 reg_gfx_index.bits.sh_broadcast_writes = 1; 841 reg_gfx_index.bits.se_broadcast_writes = 1; 842 reg_gfx_index.bits.instance_broadcast_writes = 1; 843 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 844 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 845 reg_sq_cmd.bits.vm_id = vmid; 846 847 for_each_inst(xcc_id, xcc_mask) 848 dev->kfd2kgd->wave_control_execute( 849 dev->adev, reg_gfx_index.u32All, 850 reg_sq_cmd.u32All, xcc_id); 851 852 return 0; 853 } 854 855 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 856 * to avoid asynchronized access 857 */ 858 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 859 struct qcm_process_device *qpd, 860 struct queue *q) 861 { 862 int retval; 863 struct mqd_manager *mqd_mgr; 864 865 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 866 867 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 868 deallocate_hqd(dqm, q); 869 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 870 deallocate_sdma_queue(dqm, q); 871 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 872 deallocate_sdma_queue(dqm, q); 873 else { 874 pr_debug("q->properties.type %d is invalid\n", 875 q->properties.type); 876 return -EINVAL; 877 } 878 dqm->total_queue_count--; 879 880 deallocate_doorbell(qpd, q); 881 882 if (!dqm->sched_running) { 883 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 884 return 0; 885 } 886 887 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 888 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 889 KFD_UNMAP_LATENCY_MS, 890 q->pipe, q->queue); 891 if (retval == -ETIME) 892 qpd->reset_wavefronts = true; 893 894 list_del(&q->list); 895 if (list_empty(&qpd->queues_list)) { 896 if (qpd->reset_wavefronts) { 897 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 898 dqm->dev); 899 /* dbgdev_wave_reset_wavefronts has to be called before 900 * deallocate_vmid(), i.e. when vmid is still in use. 901 */ 902 dbgdev_wave_reset_wavefronts(dqm->dev, 903 qpd->pqm->process); 904 qpd->reset_wavefronts = false; 905 } 906 907 deallocate_vmid(dqm, qpd, q); 908 } 909 qpd->queue_count--; 910 if (q->properties.is_active) 911 decrement_queue_count(dqm, qpd, q); 912 913 return retval; 914 } 915 916 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 917 struct qcm_process_device *qpd, 918 struct queue *q) 919 { 920 int retval; 921 uint64_t sdma_val = 0; 922 struct device *dev = dqm->dev->adev->dev; 923 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 924 struct mqd_manager *mqd_mgr = 925 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 926 927 /* Get the SDMA queue stats */ 928 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 929 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 930 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 931 &sdma_val); 932 if (retval) 933 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 934 q->properties.queue_id); 935 } 936 937 dqm_lock(dqm); 938 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 939 if (!retval) 940 pdd->sdma_past_activity_counter += sdma_val; 941 dqm_unlock(dqm); 942 943 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 944 945 return retval; 946 } 947 948 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 949 struct mqd_update_info *minfo) 950 { 951 int retval = 0; 952 struct device *dev = dqm->dev->adev->dev; 953 struct mqd_manager *mqd_mgr; 954 struct kfd_process_device *pdd; 955 bool prev_active = false; 956 957 dqm_lock(dqm); 958 pdd = kfd_get_process_device_data(q->device, q->process); 959 if (!pdd) { 960 retval = -ENODEV; 961 goto out_unlock; 962 } 963 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 964 q->properties.type)]; 965 966 /* Save previous activity state for counters */ 967 prev_active = q->properties.is_active; 968 969 /* Make sure the queue is unmapped before updating the MQD */ 970 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 971 if (!dqm->dev->kfd->shared_resources.enable_mes) 972 retval = unmap_queues_cpsch(dqm, 973 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 974 else if (prev_active) 975 retval = remove_queue_mes(dqm, q, &pdd->qpd); 976 977 /* queue is reset so inaccessable */ 978 if (pdd->has_reset_queue) { 979 retval = -EACCES; 980 goto out_unlock; 981 } 982 983 if (retval) { 984 dev_err(dev, "unmap queue failed\n"); 985 goto out_unlock; 986 } 987 } else if (prev_active && 988 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 989 q->properties.type == KFD_QUEUE_TYPE_SDMA || 990 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 991 992 if (!dqm->sched_running) { 993 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 994 goto out_unlock; 995 } 996 997 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 998 (dqm->dev->kfd->cwsr_enabled ? 999 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1000 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1001 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1002 if (retval) { 1003 dev_err(dev, "destroy mqd failed\n"); 1004 goto out_unlock; 1005 } 1006 } 1007 1008 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 1009 1010 /* 1011 * check active state vs. the previous state and modify 1012 * counter accordingly. map_queues_cpsch uses the 1013 * dqm->active_queue_count to determine whether a new runlist must be 1014 * uploaded. 1015 */ 1016 if (q->properties.is_active && !prev_active) { 1017 increment_queue_count(dqm, &pdd->qpd, q); 1018 } else if (!q->properties.is_active && prev_active) { 1019 decrement_queue_count(dqm, &pdd->qpd, q); 1020 } else if (q->gws && !q->properties.is_gws) { 1021 if (q->properties.is_active) { 1022 dqm->gws_queue_count++; 1023 pdd->qpd.mapped_gws_queue = true; 1024 } 1025 q->properties.is_gws = true; 1026 } else if (!q->gws && q->properties.is_gws) { 1027 if (q->properties.is_active) { 1028 dqm->gws_queue_count--; 1029 pdd->qpd.mapped_gws_queue = false; 1030 } 1031 q->properties.is_gws = false; 1032 } 1033 1034 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 1035 if (!dqm->dev->kfd->shared_resources.enable_mes) 1036 retval = map_queues_cpsch(dqm); 1037 else if (q->properties.is_active) 1038 retval = add_queue_mes(dqm, q, &pdd->qpd); 1039 } else if (q->properties.is_active && 1040 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 1041 q->properties.type == KFD_QUEUE_TYPE_SDMA || 1042 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1043 if (WARN(q->process->mm != current->mm, 1044 "should only run in user thread")) 1045 retval = -EFAULT; 1046 else 1047 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 1048 q->pipe, q->queue, 1049 &q->properties, current->mm); 1050 } 1051 1052 out_unlock: 1053 dqm_unlock(dqm); 1054 return retval; 1055 } 1056 1057 /* suspend_single_queue does not lock the dqm like the 1058 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 1059 * lock the dqm before calling, and unlock after calling. 1060 * 1061 * The reason we don't lock the dqm is because this function may be 1062 * called on multiple queues in a loop, so rather than locking/unlocking 1063 * multiple times, we will just keep the dqm locked for all of the calls. 1064 */ 1065 static int suspend_single_queue(struct device_queue_manager *dqm, 1066 struct kfd_process_device *pdd, 1067 struct queue *q) 1068 { 1069 bool is_new; 1070 1071 if (q->properties.is_suspended) 1072 return 0; 1073 1074 pr_debug("Suspending process pid %d queue [%i]\n", 1075 pdd->process->lead_thread->pid, 1076 q->properties.queue_id); 1077 1078 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 1079 1080 if (is_new || q->properties.is_being_destroyed) { 1081 pr_debug("Suspend: skip %s queue id %i\n", 1082 is_new ? "new" : "destroyed", 1083 q->properties.queue_id); 1084 return -EBUSY; 1085 } 1086 1087 q->properties.is_suspended = true; 1088 if (q->properties.is_active) { 1089 if (dqm->dev->kfd->shared_resources.enable_mes) { 1090 int r = remove_queue_mes(dqm, q, &pdd->qpd); 1091 1092 if (r) 1093 return r; 1094 } 1095 1096 decrement_queue_count(dqm, &pdd->qpd, q); 1097 q->properties.is_active = false; 1098 } 1099 1100 return 0; 1101 } 1102 1103 /* resume_single_queue does not lock the dqm like the functions 1104 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1105 * lock the dqm before calling, and unlock after calling. 1106 * 1107 * The reason we don't lock the dqm is because this function may be 1108 * called on multiple queues in a loop, so rather than locking/unlocking 1109 * multiple times, we will just keep the dqm locked for all of the calls. 1110 */ 1111 static int resume_single_queue(struct device_queue_manager *dqm, 1112 struct qcm_process_device *qpd, 1113 struct queue *q) 1114 { 1115 struct kfd_process_device *pdd; 1116 1117 if (!q->properties.is_suspended) 1118 return 0; 1119 1120 pdd = qpd_to_pdd(qpd); 1121 1122 pr_debug("Restoring from suspend process pid %d queue [%i]\n", 1123 pdd->process->lead_thread->pid, 1124 q->properties.queue_id); 1125 1126 q->properties.is_suspended = false; 1127 1128 if (QUEUE_IS_ACTIVE(q->properties)) { 1129 if (dqm->dev->kfd->shared_resources.enable_mes) { 1130 int r = add_queue_mes(dqm, q, &pdd->qpd); 1131 1132 if (r) 1133 return r; 1134 } 1135 1136 q->properties.is_active = true; 1137 increment_queue_count(dqm, qpd, q); 1138 } 1139 1140 return 0; 1141 } 1142 1143 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1144 struct qcm_process_device *qpd) 1145 { 1146 struct queue *q; 1147 struct mqd_manager *mqd_mgr; 1148 struct kfd_process_device *pdd; 1149 int retval, ret = 0; 1150 1151 dqm_lock(dqm); 1152 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1153 goto out; 1154 1155 pdd = qpd_to_pdd(qpd); 1156 pr_debug_ratelimited("Evicting process pid %d queues\n", 1157 pdd->process->lead_thread->pid); 1158 1159 pdd->last_evict_timestamp = get_jiffies_64(); 1160 /* Mark all queues as evicted. Deactivate all active queues on 1161 * the qpd. 1162 */ 1163 list_for_each_entry(q, &qpd->queues_list, list) { 1164 q->properties.is_evicted = true; 1165 if (!q->properties.is_active) 1166 continue; 1167 1168 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1169 q->properties.type)]; 1170 q->properties.is_active = false; 1171 decrement_queue_count(dqm, qpd, q); 1172 1173 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1174 continue; 1175 1176 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1177 (dqm->dev->kfd->cwsr_enabled ? 1178 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1179 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1180 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1181 if (retval && !ret) 1182 /* Return the first error, but keep going to 1183 * maintain a consistent eviction state 1184 */ 1185 ret = retval; 1186 } 1187 1188 out: 1189 dqm_unlock(dqm); 1190 return ret; 1191 } 1192 1193 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1194 struct qcm_process_device *qpd) 1195 { 1196 struct queue *q; 1197 struct device *dev = dqm->dev->adev->dev; 1198 struct kfd_process_device *pdd; 1199 int retval = 0; 1200 1201 dqm_lock(dqm); 1202 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1203 goto out; 1204 1205 pdd = qpd_to_pdd(qpd); 1206 1207 /* The debugger creates processes that temporarily have not acquired 1208 * all VMs for all devices and has no VMs itself. 1209 * Skip queue eviction on process eviction. 1210 */ 1211 if (!pdd->drm_priv) 1212 goto out; 1213 1214 pr_debug_ratelimited("Evicting process pid %d queues\n", 1215 pdd->process->lead_thread->pid); 1216 1217 if (dqm->dev->kfd->shared_resources.enable_mes) 1218 pdd->last_evict_timestamp = get_jiffies_64(); 1219 1220 /* Mark all queues as evicted. Deactivate all active queues on 1221 * the qpd. 1222 */ 1223 list_for_each_entry(q, &qpd->queues_list, list) { 1224 q->properties.is_evicted = true; 1225 if (!q->properties.is_active) 1226 continue; 1227 1228 q->properties.is_active = false; 1229 decrement_queue_count(dqm, qpd, q); 1230 1231 if (dqm->dev->kfd->shared_resources.enable_mes) { 1232 retval = remove_queue_mes(dqm, q, qpd); 1233 if (retval) { 1234 dev_err(dev, "Failed to evict queue %d\n", 1235 q->properties.queue_id); 1236 goto out; 1237 } 1238 } 1239 } 1240 1241 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1242 pdd->last_evict_timestamp = get_jiffies_64(); 1243 retval = execute_queues_cpsch(dqm, 1244 qpd->is_debug ? 1245 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1246 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1247 USE_DEFAULT_GRACE_PERIOD); 1248 } 1249 1250 out: 1251 dqm_unlock(dqm); 1252 return retval; 1253 } 1254 1255 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1256 struct qcm_process_device *qpd) 1257 { 1258 struct mm_struct *mm = NULL; 1259 struct queue *q; 1260 struct mqd_manager *mqd_mgr; 1261 struct kfd_process_device *pdd; 1262 uint64_t pd_base; 1263 uint64_t eviction_duration; 1264 int retval, ret = 0; 1265 1266 pdd = qpd_to_pdd(qpd); 1267 /* Retrieve PD base */ 1268 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1269 1270 dqm_lock(dqm); 1271 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1272 goto out; 1273 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1274 qpd->evicted--; 1275 goto out; 1276 } 1277 1278 pr_debug_ratelimited("Restoring process pid %d queues\n", 1279 pdd->process->lead_thread->pid); 1280 1281 /* Update PD Base in QPD */ 1282 qpd->page_table_base = pd_base; 1283 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1284 1285 if (!list_empty(&qpd->queues_list)) { 1286 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1287 dqm->dev->adev, 1288 qpd->vmid, 1289 qpd->page_table_base); 1290 kfd_flush_tlb(pdd); 1291 } 1292 1293 /* Take a safe reference to the mm_struct, which may otherwise 1294 * disappear even while the kfd_process is still referenced. 1295 */ 1296 mm = get_task_mm(pdd->process->lead_thread); 1297 if (!mm) { 1298 ret = -EFAULT; 1299 goto out; 1300 } 1301 1302 /* Remove the eviction flags. Activate queues that are not 1303 * inactive for other reasons. 1304 */ 1305 list_for_each_entry(q, &qpd->queues_list, list) { 1306 q->properties.is_evicted = false; 1307 if (!QUEUE_IS_ACTIVE(q->properties)) 1308 continue; 1309 1310 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1311 q->properties.type)]; 1312 q->properties.is_active = true; 1313 increment_queue_count(dqm, qpd, q); 1314 1315 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1316 continue; 1317 1318 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1319 q->queue, &q->properties, mm); 1320 if (retval && !ret) 1321 /* Return the first error, but keep going to 1322 * maintain a consistent eviction state 1323 */ 1324 ret = retval; 1325 } 1326 qpd->evicted = 0; 1327 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1328 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1329 out: 1330 if (mm) 1331 mmput(mm); 1332 dqm_unlock(dqm); 1333 return ret; 1334 } 1335 1336 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1337 struct qcm_process_device *qpd) 1338 { 1339 struct queue *q; 1340 struct device *dev = dqm->dev->adev->dev; 1341 struct kfd_process_device *pdd; 1342 uint64_t eviction_duration; 1343 int retval = 0; 1344 1345 pdd = qpd_to_pdd(qpd); 1346 1347 dqm_lock(dqm); 1348 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1349 goto out; 1350 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1351 qpd->evicted--; 1352 goto out; 1353 } 1354 1355 /* The debugger creates processes that temporarily have not acquired 1356 * all VMs for all devices and has no VMs itself. 1357 * Skip queue restore on process restore. 1358 */ 1359 if (!pdd->drm_priv) 1360 goto vm_not_acquired; 1361 1362 pr_debug_ratelimited("Restoring process pid %d queues\n", 1363 pdd->process->lead_thread->pid); 1364 1365 /* Update PD Base in QPD */ 1366 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1367 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1368 1369 /* activate all active queues on the qpd */ 1370 list_for_each_entry(q, &qpd->queues_list, list) { 1371 q->properties.is_evicted = false; 1372 if (!QUEUE_IS_ACTIVE(q->properties)) 1373 continue; 1374 1375 q->properties.is_active = true; 1376 increment_queue_count(dqm, &pdd->qpd, q); 1377 1378 if (dqm->dev->kfd->shared_resources.enable_mes) { 1379 retval = add_queue_mes(dqm, q, qpd); 1380 if (retval) { 1381 dev_err(dev, "Failed to restore queue %d\n", 1382 q->properties.queue_id); 1383 goto out; 1384 } 1385 } 1386 } 1387 if (!dqm->dev->kfd->shared_resources.enable_mes) 1388 retval = execute_queues_cpsch(dqm, 1389 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1390 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1391 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1392 vm_not_acquired: 1393 qpd->evicted = 0; 1394 out: 1395 dqm_unlock(dqm); 1396 return retval; 1397 } 1398 1399 static int register_process(struct device_queue_manager *dqm, 1400 struct qcm_process_device *qpd) 1401 { 1402 struct device_process_node *n; 1403 struct kfd_process_device *pdd; 1404 uint64_t pd_base; 1405 int retval; 1406 1407 n = kzalloc_obj(*n); 1408 if (!n) 1409 return -ENOMEM; 1410 1411 n->qpd = qpd; 1412 1413 pdd = qpd_to_pdd(qpd); 1414 /* Retrieve PD base */ 1415 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1416 1417 dqm_lock(dqm); 1418 list_add(&n->list, &dqm->queues); 1419 1420 /* Update PD Base in QPD */ 1421 qpd->page_table_base = pd_base; 1422 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1423 1424 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1425 1426 dqm->processes_count++; 1427 1428 dqm_unlock(dqm); 1429 1430 /* Outside the DQM lock because under the DQM lock we can't do 1431 * reclaim or take other locks that others hold while reclaiming. 1432 */ 1433 kfd_inc_compute_active(dqm->dev); 1434 1435 return retval; 1436 } 1437 1438 static int unregister_process(struct device_queue_manager *dqm, 1439 struct qcm_process_device *qpd) 1440 { 1441 int retval = 0; 1442 struct device_process_node *cur, *next; 1443 1444 pr_debug("qpd->queues_list is %s\n", 1445 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1446 1447 dqm_lock(dqm); 1448 1449 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1450 if (qpd == cur->qpd) { 1451 list_del(&cur->list); 1452 kfree(cur); 1453 dqm->processes_count--; 1454 goto out; 1455 } 1456 } 1457 /* qpd not found in dqm list */ 1458 retval = 1; 1459 out: 1460 dqm_unlock(dqm); 1461 1462 /* Outside the DQM lock because under the DQM lock we can't do 1463 * reclaim or take other locks that others hold while reclaiming. 1464 */ 1465 if (!retval) 1466 kfd_dec_compute_active(dqm->dev); 1467 1468 return retval; 1469 } 1470 1471 static int 1472 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1473 unsigned int vmid) 1474 { 1475 uint32_t xcc_mask = dqm->dev->xcc_mask; 1476 int xcc_id, ret = 0; 1477 1478 for_each_inst(xcc_id, xcc_mask) { 1479 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1480 dqm->dev->adev, pasid, vmid, xcc_id); 1481 if (ret) 1482 break; 1483 } 1484 1485 return ret; 1486 } 1487 1488 static void init_interrupts(struct device_queue_manager *dqm) 1489 { 1490 uint32_t xcc_mask = dqm->dev->xcc_mask; 1491 unsigned int i, xcc_id; 1492 1493 for_each_inst(xcc_id, xcc_mask) { 1494 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1495 if (is_pipe_enabled(dqm, 0, i)) { 1496 dqm->dev->kfd2kgd->init_interrupts( 1497 dqm->dev->adev, i, xcc_id); 1498 } 1499 } 1500 } 1501 } 1502 1503 static int initialize_nocpsch(struct device_queue_manager *dqm) 1504 { 1505 int pipe, queue; 1506 1507 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1508 1509 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1510 sizeof(unsigned int), GFP_KERNEL); 1511 if (!dqm->allocated_queues) 1512 return -ENOMEM; 1513 1514 mutex_init(&dqm->lock_hidden); 1515 INIT_LIST_HEAD(&dqm->queues); 1516 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1517 dqm->active_cp_queue_count = 0; 1518 dqm->gws_queue_count = 0; 1519 1520 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1521 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1522 1523 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1524 if (test_bit(pipe_offset + queue, 1525 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1526 dqm->allocated_queues[pipe] |= 1 << queue; 1527 } 1528 1529 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1530 1531 init_sdma_bitmaps(dqm); 1532 1533 return 0; 1534 } 1535 1536 static void uninitialize(struct device_queue_manager *dqm) 1537 { 1538 int i; 1539 1540 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1541 1542 kfree(dqm->allocated_queues); 1543 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1544 kfree(dqm->mqd_mgrs[i]); 1545 mutex_destroy(&dqm->lock_hidden); 1546 } 1547 1548 static int start_nocpsch(struct device_queue_manager *dqm) 1549 { 1550 int r = 0; 1551 1552 pr_info("SW scheduler is used"); 1553 init_interrupts(dqm); 1554 1555 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1556 r = pm_init(&dqm->packet_mgr, dqm); 1557 if (!r) 1558 dqm->sched_running = true; 1559 1560 return r; 1561 } 1562 1563 static int stop_nocpsch(struct device_queue_manager *dqm) 1564 { 1565 dqm_lock(dqm); 1566 if (!dqm->sched_running) { 1567 dqm_unlock(dqm); 1568 return 0; 1569 } 1570 1571 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1572 pm_uninit(&dqm->packet_mgr); 1573 dqm->sched_running = false; 1574 dqm_unlock(dqm); 1575 1576 return 0; 1577 } 1578 1579 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1580 struct queue *q, const uint32_t *restore_sdma_id) 1581 { 1582 struct device *dev = dqm->dev->adev->dev; 1583 int bit; 1584 1585 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1586 if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) { 1587 dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n", 1588 get_num_sdma_queues(dqm)); 1589 return -ENOMEM; 1590 } 1591 1592 if (restore_sdma_id) { 1593 if (*restore_sdma_id >= get_num_sdma_queues(dqm)) 1594 return -EINVAL; 1595 1596 /* Re-use existing sdma_id */ 1597 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1598 dev_err(dev, "SDMA queue already in use\n"); 1599 return -EBUSY; 1600 } 1601 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1602 q->sdma_id = *restore_sdma_id; 1603 } else { 1604 /* Find first available sdma_id */ 1605 bit = find_first_bit(dqm->sdma_bitmap, 1606 get_num_sdma_queues(dqm)); 1607 clear_bit(bit, dqm->sdma_bitmap); 1608 q->sdma_id = bit; 1609 } 1610 1611 q->properties.sdma_engine_id = 1612 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1613 q->properties.sdma_queue_id = q->sdma_id / 1614 kfd_get_num_sdma_engines(dqm->dev); 1615 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1616 if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) { 1617 dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n", 1618 get_num_xgmi_sdma_queues(dqm)); 1619 return -ENOMEM; 1620 } 1621 if (restore_sdma_id) { 1622 if (*restore_sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1623 return -EINVAL; 1624 1625 /* Re-use existing sdma_id */ 1626 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1627 dev_err(dev, "SDMA queue already in use\n"); 1628 return -EBUSY; 1629 } 1630 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1631 q->sdma_id = *restore_sdma_id; 1632 } else { 1633 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1634 get_num_xgmi_sdma_queues(dqm)); 1635 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1636 q->sdma_id = bit; 1637 } 1638 /* sdma_engine_id is sdma id including 1639 * both PCIe-optimized SDMAs and XGMI- 1640 * optimized SDMAs. The calculation below 1641 * assumes the first N engines are always 1642 * PCIe-optimized ones 1643 */ 1644 q->properties.sdma_engine_id = 1645 kfd_get_num_sdma_engines(dqm->dev) + 1646 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1647 q->properties.sdma_queue_id = q->sdma_id / 1648 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1649 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1650 int i, num_queues, num_engines, eng_offset = 0, start_engine; 1651 bool free_bit_found = false, is_xgmi = false; 1652 1653 if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { 1654 num_queues = get_num_sdma_queues(dqm); 1655 num_engines = kfd_get_num_sdma_engines(dqm->dev); 1656 q->properties.type = KFD_QUEUE_TYPE_SDMA; 1657 } else { 1658 num_queues = get_num_xgmi_sdma_queues(dqm); 1659 num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); 1660 eng_offset = kfd_get_num_sdma_engines(dqm->dev); 1661 q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; 1662 is_xgmi = true; 1663 } 1664 1665 /* Scan available bit based on target engine ID. */ 1666 start_engine = q->properties.sdma_engine_id - eng_offset; 1667 for (i = start_engine; i < num_queues; i += num_engines) { 1668 1669 if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) 1670 continue; 1671 1672 clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); 1673 q->sdma_id = i; 1674 q->properties.sdma_queue_id = q->sdma_id / num_engines; 1675 free_bit_found = true; 1676 break; 1677 } 1678 1679 if (!free_bit_found) { 1680 dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n", 1681 q->properties.sdma_engine_id, num_queues); 1682 return -ENOMEM; 1683 } 1684 } 1685 1686 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1687 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1688 1689 return 0; 1690 } 1691 1692 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1693 struct queue *q) 1694 { 1695 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1696 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1697 return; 1698 set_bit(q->sdma_id, dqm->sdma_bitmap); 1699 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1700 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1701 return; 1702 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1703 } 1704 } 1705 1706 /* 1707 * Device Queue Manager implementation for cp scheduler 1708 */ 1709 1710 static int set_sched_resources(struct device_queue_manager *dqm) 1711 { 1712 int i, mec; 1713 struct scheduling_resources res; 1714 struct device *dev = dqm->dev->adev->dev; 1715 1716 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1717 1718 res.queue_mask = 0; 1719 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1720 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1721 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1722 1723 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1724 continue; 1725 1726 /* only acquire queues from the first MEC */ 1727 if (mec > 0) 1728 continue; 1729 1730 /* This situation may be hit in the future if a new HW 1731 * generation exposes more than 64 queues. If so, the 1732 * definition of res.queue_mask needs updating 1733 */ 1734 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1735 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1736 break; 1737 } 1738 1739 res.queue_mask |= 1ull 1740 << amdgpu_queue_mask_bit_to_set_resource_bit( 1741 dqm->dev->adev, i); 1742 } 1743 res.gws_mask = ~0ull; 1744 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1745 1746 pr_debug("Scheduling resources:\n" 1747 "vmid mask: 0x%8X\n" 1748 "queue mask: 0x%8llX\n", 1749 res.vmid_mask, res.queue_mask); 1750 1751 return pm_send_set_resources(&dqm->packet_mgr, &res); 1752 } 1753 1754 static int initialize_cpsch(struct device_queue_manager *dqm) 1755 { 1756 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1757 1758 mutex_init(&dqm->lock_hidden); 1759 INIT_LIST_HEAD(&dqm->queues); 1760 dqm->active_queue_count = dqm->processes_count = 0; 1761 dqm->active_cp_queue_count = 0; 1762 dqm->gws_queue_count = 0; 1763 dqm->active_runlist = false; 1764 dqm->trap_debug_vmid = 0; 1765 1766 init_sdma_bitmaps(dqm); 1767 1768 update_dqm_wait_times(dqm); 1769 return 0; 1770 } 1771 1772 /* halt_cpsch: 1773 * Unmap queues so the schedule doesn't continue remaining jobs in the queue. 1774 * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch 1775 * is called. 1776 */ 1777 static int halt_cpsch(struct device_queue_manager *dqm) 1778 { 1779 int ret = 0; 1780 1781 dqm_lock(dqm); 1782 if (!dqm->sched_running) { 1783 dqm_unlock(dqm); 1784 return 0; 1785 } 1786 1787 WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); 1788 1789 if (!dqm->is_hws_hang) { 1790 if (!dqm->dev->kfd->shared_resources.enable_mes) 1791 ret = unmap_queues_cpsch(dqm, 1792 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1793 USE_DEFAULT_GRACE_PERIOD, false); 1794 else 1795 ret = remove_all_kfd_queues_mes(dqm); 1796 } 1797 dqm->sched_halt = true; 1798 dqm_unlock(dqm); 1799 1800 return ret; 1801 } 1802 1803 /* unhalt_cpsch 1804 * Unset dqm->sched_halt and map queues back to runlist 1805 */ 1806 static int unhalt_cpsch(struct device_queue_manager *dqm) 1807 { 1808 int ret = 0; 1809 1810 dqm_lock(dqm); 1811 if (!dqm->sched_running || !dqm->sched_halt) { 1812 WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); 1813 dqm_unlock(dqm); 1814 return 0; 1815 } 1816 dqm->sched_halt = false; 1817 if (!dqm->dev->kfd->shared_resources.enable_mes) 1818 ret = execute_queues_cpsch(dqm, 1819 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 1820 0, USE_DEFAULT_GRACE_PERIOD); 1821 else 1822 ret = add_all_kfd_queues_mes(dqm); 1823 1824 dqm_unlock(dqm); 1825 1826 return ret; 1827 } 1828 1829 static int start_cpsch(struct device_queue_manager *dqm) 1830 { 1831 struct device *dev = dqm->dev->adev->dev; 1832 int retval, num_hw_queue_slots; 1833 1834 dqm_lock(dqm); 1835 1836 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1837 retval = pm_init(&dqm->packet_mgr, dqm); 1838 if (retval) 1839 goto fail_packet_manager_init; 1840 1841 retval = set_sched_resources(dqm); 1842 if (retval) 1843 goto fail_set_sched_resources; 1844 } 1845 pr_debug("Allocating fence memory\n"); 1846 1847 /* allocate fence memory on the gart */ 1848 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1849 &dqm->fence_mem); 1850 1851 if (retval) 1852 goto fail_allocate_vidmem; 1853 1854 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1855 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1856 1857 init_interrupts(dqm); 1858 1859 /* clear hang status when driver try to start the hw scheduler */ 1860 dqm->sched_running = true; 1861 1862 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1863 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 1864 KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) 1865 dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n"); 1866 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1867 } 1868 1869 /* setup per-queue reset detection buffer */ 1870 num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * 1871 dqm->dev->kfd->shared_resources.num_pipe_per_mec * 1872 NUM_XCC(dqm->dev->xcc_mask); 1873 1874 dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); 1875 dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); 1876 1877 if (!dqm->detect_hang_info) { 1878 retval = -ENOMEM; 1879 goto fail_detect_hang_buffer; 1880 } 1881 1882 dqm_unlock(dqm); 1883 1884 return 0; 1885 fail_detect_hang_buffer: 1886 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1887 fail_allocate_vidmem: 1888 fail_set_sched_resources: 1889 if (!dqm->dev->kfd->shared_resources.enable_mes) 1890 pm_uninit(&dqm->packet_mgr); 1891 fail_packet_manager_init: 1892 dqm_unlock(dqm); 1893 return retval; 1894 } 1895 1896 static int stop_cpsch(struct device_queue_manager *dqm) 1897 { 1898 int ret = 0; 1899 1900 dqm_lock(dqm); 1901 if (!dqm->sched_running) { 1902 dqm_unlock(dqm); 1903 return 0; 1904 } 1905 1906 if (!dqm->dev->kfd->shared_resources.enable_mes) 1907 ret = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 1908 0, USE_DEFAULT_GRACE_PERIOD, false); 1909 else 1910 ret = remove_all_kfd_queues_mes(dqm); 1911 1912 dqm->sched_running = false; 1913 1914 if (!dqm->dev->kfd->shared_resources.enable_mes) 1915 pm_release_ib(&dqm->packet_mgr); 1916 1917 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1918 if (!dqm->dev->kfd->shared_resources.enable_mes) 1919 pm_uninit(&dqm->packet_mgr); 1920 kfree(dqm->detect_hang_info); 1921 dqm->detect_hang_info = NULL; 1922 dqm_unlock(dqm); 1923 1924 return ret; 1925 } 1926 1927 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1928 struct kernel_queue *kq, 1929 struct qcm_process_device *qpd) 1930 { 1931 dqm_lock(dqm); 1932 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1933 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1934 dqm->total_queue_count); 1935 dqm_unlock(dqm); 1936 return -EPERM; 1937 } 1938 1939 /* 1940 * Unconditionally increment this counter, regardless of the queue's 1941 * type or whether the queue is active. 1942 */ 1943 dqm->total_queue_count++; 1944 pr_debug("Total of %d queues are accountable so far\n", 1945 dqm->total_queue_count); 1946 1947 list_add(&kq->list, &qpd->priv_queue_list); 1948 increment_queue_count(dqm, qpd, kq->queue); 1949 qpd->is_debug = true; 1950 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1951 USE_DEFAULT_GRACE_PERIOD); 1952 dqm_unlock(dqm); 1953 1954 return 0; 1955 } 1956 1957 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1958 struct kernel_queue *kq, 1959 struct qcm_process_device *qpd) 1960 { 1961 dqm_lock(dqm); 1962 list_del(&kq->list); 1963 decrement_queue_count(dqm, qpd, kq->queue); 1964 qpd->is_debug = false; 1965 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1966 USE_DEFAULT_GRACE_PERIOD); 1967 /* 1968 * Unconditionally decrement this counter, regardless of the queue's 1969 * type. 1970 */ 1971 dqm->total_queue_count--; 1972 pr_debug("Total of %d queues are accountable so far\n", 1973 dqm->total_queue_count); 1974 dqm_unlock(dqm); 1975 } 1976 1977 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1978 struct qcm_process_device *qpd, 1979 const struct kfd_criu_queue_priv_data *qd, 1980 const void *restore_mqd, const void *restore_ctl_stack) 1981 { 1982 int retval; 1983 struct mqd_manager *mqd_mgr; 1984 1985 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1986 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1987 dqm->total_queue_count); 1988 retval = -EPERM; 1989 goto out; 1990 } 1991 1992 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1993 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || 1994 q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1995 dqm_lock(dqm); 1996 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1997 dqm_unlock(dqm); 1998 if (retval) 1999 goto out; 2000 } 2001 2002 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 2003 if (retval) 2004 goto out_deallocate_sdma_queue; 2005 2006 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2007 q->properties.type)]; 2008 2009 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2010 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2011 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 2012 q->properties.tba_addr = qpd->tba_addr; 2013 q->properties.tma_addr = qpd->tma_addr; 2014 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 2015 if (!q->mqd_mem_obj) { 2016 retval = -ENOMEM; 2017 goto out_deallocate_doorbell; 2018 } 2019 2020 dqm_lock(dqm); 2021 /* 2022 * Eviction state logic: mark all queues as evicted, even ones 2023 * not currently active. Restoring inactive queues later only 2024 * updates the is_evicted flag but is a no-op otherwise. 2025 */ 2026 q->properties.is_evicted = !!qpd->evicted; 2027 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 2028 kfd_dbg_has_cwsr_workaround(q->device); 2029 2030 if (qd) 2031 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 2032 &q->properties, restore_mqd, restore_ctl_stack, 2033 qd->ctl_stack_size); 2034 else 2035 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 2036 &q->gart_mqd_addr, &q->properties); 2037 2038 list_add(&q->list, &qpd->queues_list); 2039 qpd->queue_count++; 2040 2041 if (q->properties.is_active) { 2042 increment_queue_count(dqm, qpd, q); 2043 2044 if (!dqm->dev->kfd->shared_resources.enable_mes) 2045 retval = execute_queues_cpsch(dqm, 2046 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 2047 else 2048 retval = add_queue_mes(dqm, q, qpd); 2049 if (retval) 2050 goto cleanup_queue; 2051 } 2052 2053 /* 2054 * Unconditionally increment this counter, regardless of the queue's 2055 * type or whether the queue is active. 2056 */ 2057 dqm->total_queue_count++; 2058 2059 pr_debug("Total of %d queues are accountable so far\n", 2060 dqm->total_queue_count); 2061 2062 dqm_unlock(dqm); 2063 return retval; 2064 2065 cleanup_queue: 2066 qpd->queue_count--; 2067 list_del(&q->list); 2068 if (q->properties.is_active) 2069 decrement_queue_count(dqm, qpd, q); 2070 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2071 dqm_unlock(dqm); 2072 out_deallocate_doorbell: 2073 deallocate_doorbell(qpd, q); 2074 out_deallocate_sdma_queue: 2075 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2076 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 2077 dqm_lock(dqm); 2078 deallocate_sdma_queue(dqm, q); 2079 dqm_unlock(dqm); 2080 } 2081 out: 2082 return retval; 2083 } 2084 2085 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 2086 uint64_t fence_value, 2087 unsigned int timeout_ms) 2088 { 2089 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 2090 struct device *dev = dqm->dev->adev->dev; 2091 uint64_t *fence_addr = dqm->fence_addr; 2092 2093 while (*fence_addr != fence_value) { 2094 /* Fatal err detected, this response won't come */ 2095 if (amdgpu_amdkfd_is_fed(dqm->dev->adev) || 2096 amdgpu_in_reset(dqm->dev->adev)) 2097 return -EIO; 2098 2099 if (time_after(jiffies, end_jiffies)) { 2100 dev_err(dev, "qcm fence wait loop timeout expired\n"); 2101 /* In HWS case, this is used to halt the driver thread 2102 * in order not to mess up CP states before doing 2103 * scandumps for FW debugging. 2104 */ 2105 while (halt_if_hws_hang) 2106 schedule(); 2107 2108 return -ETIME; 2109 } 2110 schedule(); 2111 } 2112 2113 return 0; 2114 } 2115 2116 /* dqm->lock mutex has to be locked before calling this function */ 2117 static int map_queues_cpsch(struct device_queue_manager *dqm) 2118 { 2119 struct device *dev = dqm->dev->adev->dev; 2120 int retval; 2121 2122 if (!dqm->sched_running || dqm->sched_halt) 2123 return 0; 2124 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 2125 return 0; 2126 if (dqm->active_runlist) 2127 return 0; 2128 2129 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 2130 pr_debug("%s sent runlist\n", __func__); 2131 if (retval) { 2132 dev_err(dev, "failed to execute runlist\n"); 2133 return retval; 2134 } 2135 dqm->active_runlist = true; 2136 2137 return retval; 2138 } 2139 2140 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 2141 struct qcm_process_device *qpd) 2142 { 2143 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2144 2145 dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n", 2146 q->properties.queue_id, pdd->process->lead_thread->pid); 2147 2148 pdd->has_reset_queue = true; 2149 if (q->properties.is_active) { 2150 q->properties.is_active = false; 2151 decrement_queue_count(dqm, qpd, q); 2152 } 2153 } 2154 2155 static int detect_queue_hang(struct device_queue_manager *dqm) 2156 { 2157 int i; 2158 2159 /* detect should be used only in dqm locked queue reset */ 2160 if (WARN_ON(dqm->detect_hang_count > 0)) 2161 return 0; 2162 2163 memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); 2164 2165 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 2166 uint32_t mec, pipe, queue; 2167 int xcc_id; 2168 2169 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 2170 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 2171 2172 if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2173 continue; 2174 2175 amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); 2176 2177 for_each_inst(xcc_id, dqm->dev->xcc_mask) { 2178 uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( 2179 dqm->dev->adev, pipe, queue, xcc_id); 2180 struct dqm_detect_hang_info hang_info; 2181 2182 if (!queue_addr) 2183 continue; 2184 2185 hang_info.pipe_id = pipe; 2186 hang_info.queue_id = queue; 2187 hang_info.xcc_id = xcc_id; 2188 hang_info.queue_address = queue_addr; 2189 2190 dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; 2191 dqm->detect_hang_count++; 2192 } 2193 } 2194 2195 return dqm->detect_hang_count; 2196 } 2197 2198 static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) 2199 { 2200 struct device_process_node *cur; 2201 struct qcm_process_device *qpd; 2202 struct queue *q; 2203 2204 list_for_each_entry(cur, &dqm->queues, list) { 2205 qpd = cur->qpd; 2206 list_for_each_entry(q, &qpd->queues_list, list) { 2207 if (queue_address == q->properties.queue_address) 2208 return q; 2209 } 2210 } 2211 2212 return NULL; 2213 } 2214 2215 static int reset_hung_queues(struct device_queue_manager *dqm) 2216 { 2217 int r = 0, reset_count = 0, i; 2218 2219 if (!dqm->detect_hang_info || dqm->is_hws_hang) 2220 return -EIO; 2221 2222 /* assume dqm locked. */ 2223 if (!detect_queue_hang(dqm)) 2224 return -ENOTRECOVERABLE; 2225 2226 for (i = 0; i < dqm->detect_hang_count; i++) { 2227 struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; 2228 struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); 2229 struct kfd_process_device *pdd; 2230 uint64_t queue_addr = 0; 2231 2232 if (!q) { 2233 r = -ENOTRECOVERABLE; 2234 goto reset_fail; 2235 } 2236 2237 pdd = kfd_get_process_device_data(dqm->dev, q->process); 2238 if (!pdd) { 2239 r = -ENOTRECOVERABLE; 2240 goto reset_fail; 2241 } 2242 2243 queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, 2244 hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, 2245 KFD_UNMAP_LATENCY_MS); 2246 2247 /* either reset failed or we reset an unexpected queue. */ 2248 if (queue_addr != q->properties.queue_address) { 2249 r = -ENOTRECOVERABLE; 2250 goto reset_fail; 2251 } 2252 2253 set_queue_as_reset(dqm, q, &pdd->qpd); 2254 reset_count++; 2255 } 2256 2257 if (reset_count == dqm->detect_hang_count) 2258 kfd_signal_reset_event(dqm->dev); 2259 else 2260 r = -ENOTRECOVERABLE; 2261 2262 reset_fail: 2263 dqm->detect_hang_count = 0; 2264 2265 return r; 2266 } 2267 2268 static bool sdma_has_hang(struct device_queue_manager *dqm) 2269 { 2270 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2271 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2272 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2273 int i, j; 2274 2275 for (i = engine_start; i < engine_end; i++) { 2276 for (j = 0; j < num_queues_per_eng; j++) { 2277 if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j)) 2278 continue; 2279 2280 return true; 2281 } 2282 } 2283 2284 return false; 2285 } 2286 2287 static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm, 2288 uint32_t doorbell_off) 2289 { 2290 struct device_process_node *cur; 2291 struct qcm_process_device *qpd; 2292 struct queue *q; 2293 2294 list_for_each_entry(cur, &dqm->queues, list) { 2295 qpd = cur->qpd; 2296 list_for_each_entry(q, &qpd->queues_list, list) { 2297 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA || 2298 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) && 2299 q->properties.doorbell_off == doorbell_off) { 2300 set_queue_as_reset(dqm, q, qpd); 2301 return true; 2302 } 2303 } 2304 } 2305 2306 return false; 2307 } 2308 2309 static int reset_hung_queues_sdma(struct device_queue_manager *dqm) 2310 { 2311 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2312 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2313 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2314 int r = 0, i, j; 2315 2316 if (dqm->is_hws_hang) 2317 return -EIO; 2318 2319 /* Scan for hung HW queues and reset engine. */ 2320 dqm->detect_hang_count = 0; 2321 for (i = engine_start; i < engine_end; i++) { 2322 for (j = 0; j < num_queues_per_eng; j++) { 2323 uint32_t doorbell_off = 2324 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j); 2325 2326 if (!doorbell_off) 2327 continue; 2328 2329 /* Reset engine and check. */ 2330 if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || 2331 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || 2332 !set_sdma_queue_as_reset(dqm, doorbell_off)) { 2333 r = -ENOTRECOVERABLE; 2334 goto reset_fail; 2335 } 2336 2337 /* Should only expect one queue active per engine */ 2338 dqm->detect_hang_count++; 2339 break; 2340 } 2341 } 2342 2343 /* Signal process reset */ 2344 if (dqm->detect_hang_count) 2345 kfd_signal_reset_event(dqm->dev); 2346 else 2347 r = -ENOTRECOVERABLE; 2348 2349 reset_fail: 2350 dqm->detect_hang_count = 0; 2351 2352 return r; 2353 } 2354 2355 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma) 2356 { 2357 struct amdgpu_device *adev = dqm->dev->adev; 2358 2359 while (halt_if_hws_hang) 2360 schedule(); 2361 2362 if (adev->debug_disable_gpu_ring_reset) { 2363 dev_info_once(adev->dev, 2364 "%s queue hung, but ring reset disabled", 2365 is_sdma ? "sdma" : "compute"); 2366 2367 return -EPERM; 2368 } 2369 if (!amdgpu_gpu_recovery) 2370 return -ENOTRECOVERABLE; 2371 2372 return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm); 2373 } 2374 2375 /* dqm->lock mutex has to be locked before calling this function 2376 * 2377 * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time 2378 * for context switch latency. Lower values are used by debugger 2379 * since context switching are triggered at high frequency. 2380 * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE 2381 * 2382 */ 2383 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 2384 enum kfd_unmap_queues_filter filter, 2385 uint32_t filter_param, 2386 uint32_t grace_period, 2387 bool reset) 2388 { 2389 struct device *dev = dqm->dev->adev->dev; 2390 struct mqd_manager *mqd_mgr; 2391 int retval; 2392 2393 if (!dqm->sched_running) 2394 return 0; 2395 if (!dqm->active_runlist) 2396 return 0; 2397 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2398 return -EIO; 2399 2400 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2401 retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2402 KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period); 2403 if (retval) 2404 goto out; 2405 } 2406 2407 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 2408 if (retval) 2409 goto out; 2410 2411 *dqm->fence_addr = KFD_FENCE_INIT; 2412 mb(); 2413 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 2414 KFD_FENCE_COMPLETED); 2415 /* should be timed out */ 2416 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 2417 queue_preemption_timeout_ms); 2418 if (retval) { 2419 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 2420 kfd_hws_hang(dqm); 2421 goto out; 2422 } 2423 2424 /* In the current MEC firmware implementation, if compute queue 2425 * doesn't response to the preemption request in time, HIQ will 2426 * abandon the unmap request without returning any timeout error 2427 * to driver. Instead, MEC firmware will log the doorbell of the 2428 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 2429 * To make sure the queue unmap was successful, driver need to 2430 * check those fields 2431 */ 2432 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 2433 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) && 2434 reset_queues_on_hws_hang(dqm, false)) 2435 goto reset_fail; 2436 2437 /* Check for SDMA hang and attempt SDMA reset */ 2438 if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true)) 2439 goto reset_fail; 2440 2441 /* We need to reset the grace period value for this device */ 2442 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2443 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2444 KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) 2445 dev_err(dev, "Failed to reset grace period\n"); 2446 } 2447 2448 pm_release_ib(&dqm->packet_mgr); 2449 dqm->active_runlist = false; 2450 out: 2451 up_read(&dqm->dev->adev->reset_domain->sem); 2452 return retval; 2453 2454 reset_fail: 2455 dqm->is_hws_hang = true; 2456 kfd_hws_hang(dqm); 2457 up_read(&dqm->dev->adev->reset_domain->sem); 2458 return -ETIME; 2459 } 2460 2461 /* only for compute queue */ 2462 static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) 2463 { 2464 int retval; 2465 2466 dqm_lock(dqm); 2467 2468 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2469 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2470 2471 dqm_unlock(dqm); 2472 return retval; 2473 } 2474 2475 /* dqm->lock mutex has to be locked before calling this function */ 2476 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2477 enum kfd_unmap_queues_filter filter, 2478 uint32_t filter_param, 2479 uint32_t grace_period) 2480 { 2481 int retval; 2482 2483 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2484 return -EIO; 2485 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2486 if (!retval) 2487 retval = map_queues_cpsch(dqm); 2488 up_read(&dqm->dev->adev->reset_domain->sem); 2489 return retval; 2490 } 2491 2492 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2493 struct queue *q) 2494 { 2495 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2496 q->process); 2497 int ret = 0; 2498 2499 if (WARN_ON(!pdd)) 2500 return ret; 2501 2502 if (pdd->qpd.is_debug) 2503 return ret; 2504 2505 q->properties.is_being_destroyed = true; 2506 2507 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2508 dqm_unlock(dqm); 2509 mutex_unlock(&q->process->mutex); 2510 ret = wait_event_interruptible(dqm->destroy_wait, 2511 !q->properties.is_suspended); 2512 2513 mutex_lock(&q->process->mutex); 2514 dqm_lock(dqm); 2515 } 2516 2517 return ret; 2518 } 2519 2520 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2521 struct qcm_process_device *qpd, 2522 struct queue *q) 2523 { 2524 int retval; 2525 struct mqd_manager *mqd_mgr; 2526 uint64_t sdma_val = 0; 2527 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2528 struct device *dev = dqm->dev->adev->dev; 2529 2530 /* Get the SDMA queue stats */ 2531 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2532 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2533 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2534 &sdma_val); 2535 if (retval) 2536 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2537 q->properties.queue_id); 2538 } 2539 2540 /* remove queue from list to prevent rescheduling after preemption */ 2541 dqm_lock(dqm); 2542 2543 retval = wait_on_destroy_queue(dqm, q); 2544 2545 if (retval) { 2546 dqm_unlock(dqm); 2547 return retval; 2548 } 2549 2550 if (qpd->is_debug) { 2551 /* 2552 * error, currently we do not allow to destroy a queue 2553 * of a currently debugged process 2554 */ 2555 retval = -EBUSY; 2556 goto failed_try_destroy_debugged_queue; 2557 2558 } 2559 2560 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2561 q->properties.type)]; 2562 2563 deallocate_doorbell(qpd, q); 2564 2565 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2566 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2567 deallocate_sdma_queue(dqm, q); 2568 pdd->sdma_past_activity_counter += sdma_val; 2569 } 2570 2571 if (q->properties.is_active) { 2572 decrement_queue_count(dqm, qpd, q); 2573 q->properties.is_active = false; 2574 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2575 retval = execute_queues_cpsch(dqm, 2576 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2577 USE_DEFAULT_GRACE_PERIOD); 2578 if (retval == -ETIME) 2579 qpd->reset_wavefronts = true; 2580 } else { 2581 retval = remove_queue_mes(dqm, q, qpd); 2582 } 2583 } 2584 list_del(&q->list); 2585 qpd->queue_count--; 2586 2587 /* 2588 * Unconditionally decrement this counter, regardless of the queue's 2589 * type 2590 */ 2591 dqm->total_queue_count--; 2592 pr_debug("Total of %d queues are accountable so far\n", 2593 dqm->total_queue_count); 2594 2595 dqm_unlock(dqm); 2596 2597 /* 2598 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2599 * circular locking 2600 */ 2601 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2602 qpd->pqm->process, q->device, 2603 -1, false, NULL, 0); 2604 2605 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2606 2607 return retval; 2608 2609 failed_try_destroy_debugged_queue: 2610 2611 dqm_unlock(dqm); 2612 return retval; 2613 } 2614 2615 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2616 struct qcm_process_device *qpd, 2617 enum cache_policy default_policy, 2618 enum cache_policy alternate_policy, 2619 void __user *alternate_aperture_base, 2620 uint64_t alternate_aperture_size, 2621 u32 misc_process_properties) 2622 { 2623 bool retval = true; 2624 2625 if (!dqm->asic_ops.set_cache_memory_policy) 2626 return retval; 2627 2628 dqm_lock(dqm); 2629 2630 retval = dqm->asic_ops.set_cache_memory_policy( 2631 dqm, 2632 qpd, 2633 default_policy, 2634 alternate_policy, 2635 alternate_aperture_base, 2636 alternate_aperture_size, 2637 misc_process_properties); 2638 2639 if (retval) 2640 goto out; 2641 2642 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2643 program_sh_mem_settings(dqm, qpd); 2644 2645 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2646 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2647 qpd->sh_mem_ape1_limit); 2648 2649 out: 2650 dqm_unlock(dqm); 2651 return retval; 2652 } 2653 2654 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2655 struct qcm_process_device *qpd) 2656 { 2657 struct queue *q; 2658 struct device_process_node *cur, *next_dpn; 2659 int retval = 0; 2660 bool found = false; 2661 2662 dqm_lock(dqm); 2663 2664 /* Clear all user mode queues */ 2665 while (!list_empty(&qpd->queues_list)) { 2666 struct mqd_manager *mqd_mgr; 2667 int ret; 2668 2669 q = list_first_entry(&qpd->queues_list, struct queue, list); 2670 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2671 q->properties.type)]; 2672 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2673 if (ret) 2674 retval = ret; 2675 dqm_unlock(dqm); 2676 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2677 dqm_lock(dqm); 2678 } 2679 2680 /* Unregister process */ 2681 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2682 if (qpd == cur->qpd) { 2683 list_del(&cur->list); 2684 kfree(cur); 2685 dqm->processes_count--; 2686 found = true; 2687 break; 2688 } 2689 } 2690 2691 dqm_unlock(dqm); 2692 2693 /* Outside the DQM lock because under the DQM lock we can't do 2694 * reclaim or take other locks that others hold while reclaiming. 2695 */ 2696 if (found) 2697 kfd_dec_compute_active(dqm->dev); 2698 2699 return retval; 2700 } 2701 2702 static int get_wave_state(struct device_queue_manager *dqm, 2703 struct queue *q, 2704 void __user *ctl_stack, 2705 u32 *ctl_stack_used_size, 2706 u32 *save_area_used_size) 2707 { 2708 struct mqd_manager *mqd_mgr; 2709 2710 dqm_lock(dqm); 2711 2712 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2713 2714 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2715 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2716 !mqd_mgr->get_wave_state) { 2717 dqm_unlock(dqm); 2718 return -EINVAL; 2719 } 2720 2721 dqm_unlock(dqm); 2722 2723 /* 2724 * get_wave_state is outside the dqm lock to prevent circular locking 2725 * and the queue should be protected against destruction by the process 2726 * lock. 2727 */ 2728 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2729 ctl_stack, ctl_stack_used_size, save_area_used_size); 2730 } 2731 2732 static int get_queue_checkpoint_info(struct device_queue_manager *dqm, 2733 const struct queue *q, 2734 u32 *mqd_size, 2735 u32 *ctl_stack_size) 2736 { 2737 struct mqd_manager *mqd_mgr; 2738 enum KFD_MQD_TYPE mqd_type = 2739 get_mqd_type_from_queue_type(q->properties.type); 2740 int ret = 0; 2741 2742 dqm_lock(dqm); 2743 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2744 *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask); 2745 *ctl_stack_size = 0; 2746 2747 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2748 ret = mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2749 2750 dqm_unlock(dqm); 2751 2752 return ret; 2753 } 2754 2755 static int checkpoint_mqd(struct device_queue_manager *dqm, 2756 const struct queue *q, 2757 void *mqd, 2758 void *ctl_stack) 2759 { 2760 struct mqd_manager *mqd_mgr; 2761 int r = 0; 2762 enum KFD_MQD_TYPE mqd_type = 2763 get_mqd_type_from_queue_type(q->properties.type); 2764 2765 dqm_lock(dqm); 2766 2767 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2768 r = -EINVAL; 2769 goto dqm_unlock; 2770 } 2771 2772 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2773 if (!mqd_mgr->checkpoint_mqd) { 2774 r = -EOPNOTSUPP; 2775 goto dqm_unlock; 2776 } 2777 2778 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2779 2780 dqm_unlock: 2781 dqm_unlock(dqm); 2782 return r; 2783 } 2784 2785 static int process_termination_cpsch(struct device_queue_manager *dqm, 2786 struct qcm_process_device *qpd) 2787 { 2788 int retval = 0; 2789 struct queue *q; 2790 struct device *dev = dqm->dev->adev->dev; 2791 struct kernel_queue *kq, *kq_next; 2792 struct mqd_manager *mqd_mgr; 2793 struct device_process_node *cur, *next_dpn; 2794 enum kfd_unmap_queues_filter filter = 2795 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2796 bool found = false; 2797 2798 dqm_lock(dqm); 2799 2800 /* Clean all kernel queues */ 2801 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2802 list_del(&kq->list); 2803 decrement_queue_count(dqm, qpd, kq->queue); 2804 qpd->is_debug = false; 2805 dqm->total_queue_count--; 2806 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2807 } 2808 2809 /* Clear all user mode queues */ 2810 list_for_each_entry(q, &qpd->queues_list, list) { 2811 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2812 deallocate_sdma_queue(dqm, q); 2813 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2814 deallocate_sdma_queue(dqm, q); 2815 2816 if (q->properties.is_active) { 2817 decrement_queue_count(dqm, qpd, q); 2818 2819 if (dqm->dev->kfd->shared_resources.enable_mes) { 2820 retval = remove_queue_mes(dqm, q, qpd); 2821 if (retval) 2822 dev_err(dev, "Failed to remove queue %d\n", 2823 q->properties.queue_id); 2824 } 2825 } 2826 2827 dqm->total_queue_count--; 2828 } 2829 2830 /* Unregister process */ 2831 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2832 if (qpd == cur->qpd) { 2833 list_del(&cur->list); 2834 kfree(cur); 2835 dqm->processes_count--; 2836 found = true; 2837 break; 2838 } 2839 } 2840 2841 if (!dqm->dev->kfd->shared_resources.enable_mes) 2842 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2843 2844 if ((retval || qpd->reset_wavefronts) && 2845 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 2846 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2847 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2848 qpd->reset_wavefronts = false; 2849 up_read(&dqm->dev->adev->reset_domain->sem); 2850 } 2851 2852 /* Lastly, free mqd resources. 2853 * Do free_mqd() after dqm_unlock to avoid circular locking. 2854 */ 2855 while (!list_empty(&qpd->queues_list)) { 2856 q = list_first_entry(&qpd->queues_list, struct queue, list); 2857 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2858 q->properties.type)]; 2859 list_del(&q->list); 2860 qpd->queue_count--; 2861 dqm_unlock(dqm); 2862 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2863 dqm_lock(dqm); 2864 } 2865 dqm_unlock(dqm); 2866 2867 /* Outside the DQM lock because under the DQM lock we can't do 2868 * reclaim or take other locks that others hold while reclaiming. 2869 */ 2870 if (found) 2871 kfd_dec_compute_active(dqm->dev); 2872 2873 return retval; 2874 } 2875 2876 static int init_mqd_managers(struct device_queue_manager *dqm) 2877 { 2878 int i, j; 2879 struct device *dev = dqm->dev->adev->dev; 2880 struct mqd_manager *mqd_mgr; 2881 2882 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2883 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2884 if (!mqd_mgr) { 2885 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 2886 goto out_free; 2887 } 2888 dqm->mqd_mgrs[i] = mqd_mgr; 2889 } 2890 2891 return 0; 2892 2893 out_free: 2894 for (j = 0; j < i; j++) { 2895 kfree(dqm->mqd_mgrs[j]); 2896 dqm->mqd_mgrs[j] = NULL; 2897 } 2898 2899 return -ENOMEM; 2900 } 2901 2902 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2903 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2904 { 2905 int retval; 2906 struct kfd_node *dev = dqm->dev; 2907 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2908 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2909 get_num_all_sdma_engines(dqm) * 2910 dev->kfd->device_info.num_sdma_queues_per_engine + 2911 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2912 NUM_XCC(dqm->dev->xcc_mask)); 2913 2914 retval = amdgpu_amdkfd_alloc_kernel_mem(dev->adev, size, 2915 AMDGPU_GEM_DOMAIN_GTT, 2916 &(mem_obj->mem), &(mem_obj->gpu_addr), 2917 (void *)&(mem_obj->cpu_ptr), false); 2918 2919 return retval; 2920 } 2921 2922 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2923 struct kfd_mem_obj *mqd) 2924 { 2925 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2926 2927 amdgpu_amdkfd_free_kernel_mem(dev->adev, &mqd->mem); 2928 } 2929 2930 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2931 { 2932 struct device_queue_manager *dqm; 2933 2934 pr_debug("Loading device queue manager\n"); 2935 2936 dqm = kzalloc_obj(*dqm); 2937 if (!dqm) 2938 return NULL; 2939 2940 switch (dev->adev->asic_type) { 2941 /* HWS is not available on Hawaii. */ 2942 case CHIP_HAWAII: 2943 /* HWS depends on CWSR for timely dequeue. CWSR is not 2944 * available on Tonga. 2945 * 2946 * FIXME: This argument also applies to Kaveri. 2947 */ 2948 case CHIP_TONGA: 2949 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2950 break; 2951 default: 2952 dqm->sched_policy = sched_policy; 2953 break; 2954 } 2955 2956 dqm->dev = dev; 2957 switch (dqm->sched_policy) { 2958 case KFD_SCHED_POLICY_HWS: 2959 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2960 /* initialize dqm for cp scheduling */ 2961 dqm->ops.create_queue = create_queue_cpsch; 2962 dqm->ops.initialize = initialize_cpsch; 2963 dqm->ops.start = start_cpsch; 2964 dqm->ops.stop = stop_cpsch; 2965 dqm->ops.halt = halt_cpsch; 2966 dqm->ops.unhalt = unhalt_cpsch; 2967 dqm->ops.destroy_queue = destroy_queue_cpsch; 2968 dqm->ops.update_queue = update_queue; 2969 dqm->ops.register_process = register_process; 2970 dqm->ops.unregister_process = unregister_process; 2971 dqm->ops.uninitialize = uninitialize; 2972 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2973 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2974 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2975 dqm->ops.process_termination = process_termination_cpsch; 2976 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2977 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2978 dqm->ops.get_wave_state = get_wave_state; 2979 dqm->ops.reset_queues = reset_queues_cpsch; 2980 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2981 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2982 break; 2983 case KFD_SCHED_POLICY_NO_HWS: 2984 /* initialize dqm for no cp scheduling */ 2985 dqm->ops.start = start_nocpsch; 2986 dqm->ops.stop = stop_nocpsch; 2987 dqm->ops.create_queue = create_queue_nocpsch; 2988 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2989 dqm->ops.update_queue = update_queue; 2990 dqm->ops.register_process = register_process; 2991 dqm->ops.unregister_process = unregister_process; 2992 dqm->ops.initialize = initialize_nocpsch; 2993 dqm->ops.uninitialize = uninitialize; 2994 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2995 dqm->ops.process_termination = process_termination_nocpsch; 2996 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2997 dqm->ops.restore_process_queues = 2998 restore_process_queues_nocpsch; 2999 dqm->ops.get_wave_state = get_wave_state; 3000 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 3001 dqm->ops.checkpoint_mqd = checkpoint_mqd; 3002 break; 3003 default: 3004 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 3005 goto out_free; 3006 } 3007 3008 switch (dev->adev->asic_type) { 3009 case CHIP_KAVERI: 3010 case CHIP_HAWAII: 3011 device_queue_manager_init_cik(&dqm->asic_ops); 3012 break; 3013 3014 case CHIP_CARRIZO: 3015 case CHIP_TONGA: 3016 case CHIP_FIJI: 3017 case CHIP_POLARIS10: 3018 case CHIP_POLARIS11: 3019 case CHIP_POLARIS12: 3020 case CHIP_VEGAM: 3021 device_queue_manager_init_vi(&dqm->asic_ops); 3022 break; 3023 3024 default: 3025 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 1, 0)) 3026 device_queue_manager_init_v12_1(&dqm->asic_ops); 3027 else if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 3028 device_queue_manager_init_v12(&dqm->asic_ops); 3029 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 3030 device_queue_manager_init_v11(&dqm->asic_ops); 3031 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 3032 device_queue_manager_init_v10(&dqm->asic_ops); 3033 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 3034 device_queue_manager_init_v9(&dqm->asic_ops); 3035 else { 3036 WARN(1, "Unexpected ASIC family %u", 3037 dev->adev->asic_type); 3038 goto out_free; 3039 } 3040 } 3041 3042 if (init_mqd_managers(dqm)) 3043 goto out_free; 3044 3045 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 3046 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 3047 goto out_free; 3048 } 3049 3050 if (!dqm->ops.initialize(dqm)) { 3051 init_waitqueue_head(&dqm->destroy_wait); 3052 return dqm; 3053 } 3054 3055 if (!dev->kfd->shared_resources.enable_mes) 3056 deallocate_hiq_sdma_mqd(dev, &dqm->hiq_sdma_mqd); 3057 3058 out_free: 3059 kfree(dqm); 3060 return NULL; 3061 } 3062 3063 void device_queue_manager_uninit(struct device_queue_manager *dqm) 3064 { 3065 dqm->ops.stop(dqm); 3066 dqm->ops.uninitialize(dqm); 3067 if (!dqm->dev->kfd->shared_resources.enable_mes) 3068 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 3069 kfree(dqm); 3070 } 3071 3072 int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) 3073 { 3074 struct kfd_process_device *pdd = NULL; 3075 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd); 3076 struct device_queue_manager *dqm = knode->dqm; 3077 struct device *dev = dqm->dev->adev->dev; 3078 struct qcm_process_device *qpd; 3079 struct queue *q = NULL; 3080 int ret = 0; 3081 3082 if (!pdd) 3083 return -EINVAL; 3084 3085 dqm_lock(dqm); 3086 3087 if (pdd) { 3088 qpd = &pdd->qpd; 3089 3090 list_for_each_entry(q, &qpd->queues_list, list) { 3091 if (q->doorbell_id == doorbell_id && q->properties.is_active) { 3092 ret = suspend_all_queues_mes(dqm); 3093 if (ret) { 3094 dev_err(dev, "Suspending all queues failed"); 3095 goto out; 3096 } 3097 3098 q->properties.is_evicted = true; 3099 q->properties.is_active = false; 3100 decrement_queue_count(dqm, qpd, q); 3101 3102 ret = remove_queue_mes(dqm, q, qpd); 3103 if (ret) { 3104 dev_err(dev, "Removing bad queue failed"); 3105 goto out; 3106 } 3107 3108 ret = resume_all_queues_mes(dqm); 3109 if (ret) 3110 dev_err(dev, "Resuming all queues failed"); 3111 3112 break; 3113 } 3114 } 3115 } 3116 3117 out: 3118 dqm_unlock(dqm); 3119 kfd_unref_process(p); 3120 return ret; 3121 } 3122 3123 int kfd_evict_process_device(struct kfd_process_device *pdd) 3124 { 3125 struct device_queue_manager *dqm; 3126 struct kfd_process *p; 3127 3128 p = pdd->process; 3129 dqm = pdd->dev->dqm; 3130 3131 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 3132 3133 return dqm->ops.evict_process_queues(dqm, &pdd->qpd); 3134 } 3135 3136 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 3137 struct qcm_process_device *qpd) 3138 { 3139 int r; 3140 struct device *dev = dqm->dev->adev->dev; 3141 int updated_vmid_mask; 3142 3143 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3144 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3145 return -EINVAL; 3146 } 3147 3148 dqm_lock(dqm); 3149 3150 if (dqm->trap_debug_vmid != 0) { 3151 dev_err(dev, "Trap debug id already reserved\n"); 3152 r = -EBUSY; 3153 goto out_unlock; 3154 } 3155 3156 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3157 USE_DEFAULT_GRACE_PERIOD, false); 3158 if (r) 3159 goto out_unlock; 3160 3161 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3162 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 3163 3164 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3165 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 3166 r = set_sched_resources(dqm); 3167 if (r) 3168 goto out_unlock; 3169 3170 r = map_queues_cpsch(dqm); 3171 if (r) 3172 goto out_unlock; 3173 3174 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 3175 3176 out_unlock: 3177 dqm_unlock(dqm); 3178 return r; 3179 } 3180 3181 /* 3182 * Releases vmid for the trap debugger 3183 */ 3184 int release_debug_trap_vmid(struct device_queue_manager *dqm, 3185 struct qcm_process_device *qpd) 3186 { 3187 struct device *dev = dqm->dev->adev->dev; 3188 int r; 3189 int updated_vmid_mask; 3190 uint32_t trap_debug_vmid; 3191 3192 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3193 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3194 return -EINVAL; 3195 } 3196 3197 dqm_lock(dqm); 3198 trap_debug_vmid = dqm->trap_debug_vmid; 3199 if (dqm->trap_debug_vmid == 0) { 3200 dev_err(dev, "Trap debug id is not reserved\n"); 3201 r = -EINVAL; 3202 goto out_unlock; 3203 } 3204 3205 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3206 USE_DEFAULT_GRACE_PERIOD, false); 3207 if (r) 3208 goto out_unlock; 3209 3210 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3211 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 3212 3213 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3214 dqm->trap_debug_vmid = 0; 3215 r = set_sched_resources(dqm); 3216 if (r) 3217 goto out_unlock; 3218 3219 r = map_queues_cpsch(dqm); 3220 if (r) 3221 goto out_unlock; 3222 3223 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 3224 3225 out_unlock: 3226 dqm_unlock(dqm); 3227 return r; 3228 } 3229 3230 #define QUEUE_NOT_FOUND -1 3231 /* invalidate queue operation in array */ 3232 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 3233 { 3234 int i; 3235 3236 for (i = 0; i < num_queues; i++) 3237 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 3238 } 3239 3240 /* find queue index in array */ 3241 static int q_array_get_index(unsigned int queue_id, 3242 uint32_t num_queues, 3243 uint32_t *queue_ids) 3244 { 3245 int i; 3246 3247 for (i = 0; i < num_queues; i++) 3248 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 3249 return i; 3250 3251 return QUEUE_NOT_FOUND; 3252 } 3253 3254 struct copy_context_work_handler_workarea { 3255 struct work_struct copy_context_work; 3256 struct kfd_process *p; 3257 }; 3258 3259 static void copy_context_work_handler(struct work_struct *work) 3260 { 3261 struct copy_context_work_handler_workarea *workarea; 3262 struct mqd_manager *mqd_mgr; 3263 struct queue *q; 3264 struct mm_struct *mm; 3265 struct kfd_process *p; 3266 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 3267 int i; 3268 3269 workarea = container_of(work, 3270 struct copy_context_work_handler_workarea, 3271 copy_context_work); 3272 3273 p = workarea->p; 3274 mm = get_task_mm(p->lead_thread); 3275 3276 if (!mm) 3277 return; 3278 3279 kthread_use_mm(mm); 3280 for (i = 0; i < p->n_pdds; i++) { 3281 struct kfd_process_device *pdd = p->pdds[i]; 3282 struct device_queue_manager *dqm = pdd->dev->dqm; 3283 struct qcm_process_device *qpd = &pdd->qpd; 3284 3285 list_for_each_entry(q, &qpd->queues_list, list) { 3286 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE) 3287 continue; 3288 3289 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 3290 3291 /* We ignore the return value from get_wave_state 3292 * because 3293 * i) right now, it always returns 0, and 3294 * ii) if we hit an error, we would continue to the 3295 * next queue anyway. 3296 */ 3297 mqd_mgr->get_wave_state(mqd_mgr, 3298 q->mqd, 3299 &q->properties, 3300 (void __user *) q->properties.ctx_save_restore_area_address, 3301 &tmp_ctl_stack_used_size, 3302 &tmp_save_area_used_size); 3303 } 3304 } 3305 kthread_unuse_mm(mm); 3306 mmput(mm); 3307 } 3308 3309 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 3310 { 3311 if (!usr_queue_id_array) 3312 return NULL; 3313 3314 if (num_queues > KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 3315 return ERR_PTR(-EINVAL); 3316 3317 return memdup_user(usr_queue_id_array, 3318 array_size(num_queues, sizeof(uint32_t))); 3319 } 3320 3321 int resume_queues(struct kfd_process *p, 3322 uint32_t num_queues, 3323 uint32_t *usr_queue_id_array) 3324 { 3325 uint32_t *queue_ids = NULL; 3326 int total_resumed = 0; 3327 int i; 3328 3329 if (usr_queue_id_array) { 3330 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3331 3332 if (IS_ERR(queue_ids)) 3333 return PTR_ERR(queue_ids); 3334 3335 /* mask all queues as invalid. unmask per successful request */ 3336 q_array_invalidate(num_queues, queue_ids); 3337 } 3338 3339 for (i = 0; i < p->n_pdds; i++) { 3340 struct kfd_process_device *pdd = p->pdds[i]; 3341 struct device_queue_manager *dqm = pdd->dev->dqm; 3342 struct device *dev = dqm->dev->adev->dev; 3343 struct qcm_process_device *qpd = &pdd->qpd; 3344 struct queue *q; 3345 int r, per_device_resumed = 0; 3346 3347 dqm_lock(dqm); 3348 3349 /* unmask queues that resume or already resumed as valid */ 3350 list_for_each_entry(q, &qpd->queues_list, list) { 3351 int q_idx = QUEUE_NOT_FOUND; 3352 3353 if (queue_ids) 3354 q_idx = q_array_get_index( 3355 q->properties.queue_id, 3356 num_queues, 3357 queue_ids); 3358 3359 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 3360 int err = resume_single_queue(dqm, &pdd->qpd, q); 3361 3362 if (queue_ids) { 3363 if (!err) { 3364 queue_ids[q_idx] &= 3365 ~KFD_DBG_QUEUE_INVALID_MASK; 3366 } else { 3367 queue_ids[q_idx] |= 3368 KFD_DBG_QUEUE_ERROR_MASK; 3369 break; 3370 } 3371 } 3372 3373 if (dqm->dev->kfd->shared_resources.enable_mes) { 3374 wake_up_all(&dqm->destroy_wait); 3375 if (!err) 3376 total_resumed++; 3377 } else { 3378 per_device_resumed++; 3379 } 3380 } 3381 } 3382 3383 if (!per_device_resumed) { 3384 dqm_unlock(dqm); 3385 continue; 3386 } 3387 3388 r = execute_queues_cpsch(dqm, 3389 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3390 0, 3391 USE_DEFAULT_GRACE_PERIOD); 3392 if (r) { 3393 dev_err(dev, "Failed to resume process queues\n"); 3394 if (queue_ids) { 3395 list_for_each_entry(q, &qpd->queues_list, list) { 3396 int q_idx = q_array_get_index( 3397 q->properties.queue_id, 3398 num_queues, 3399 queue_ids); 3400 3401 /* mask queue as error on resume fail */ 3402 if (q_idx != QUEUE_NOT_FOUND) 3403 queue_ids[q_idx] |= 3404 KFD_DBG_QUEUE_ERROR_MASK; 3405 } 3406 } 3407 } else { 3408 wake_up_all(&dqm->destroy_wait); 3409 total_resumed += per_device_resumed; 3410 } 3411 3412 dqm_unlock(dqm); 3413 } 3414 3415 if (queue_ids) { 3416 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3417 num_queues * sizeof(uint32_t))) 3418 pr_err("copy_to_user failed on queue resume\n"); 3419 3420 kfree(queue_ids); 3421 } 3422 3423 return total_resumed; 3424 } 3425 3426 int suspend_queues(struct kfd_process *p, 3427 uint32_t num_queues, 3428 uint32_t grace_period, 3429 uint64_t exception_clear_mask, 3430 uint32_t *usr_queue_id_array) 3431 { 3432 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3433 int total_suspended = 0; 3434 int i; 3435 3436 if (IS_ERR(queue_ids)) 3437 return PTR_ERR(queue_ids); 3438 3439 /* mask all queues as invalid. umask on successful request */ 3440 q_array_invalidate(num_queues, queue_ids); 3441 3442 for (i = 0; i < p->n_pdds; i++) { 3443 struct kfd_process_device *pdd = p->pdds[i]; 3444 struct device_queue_manager *dqm = pdd->dev->dqm; 3445 struct device *dev = dqm->dev->adev->dev; 3446 struct qcm_process_device *qpd = &pdd->qpd; 3447 struct queue *q; 3448 int r, per_device_suspended = 0; 3449 3450 mutex_lock(&p->event_mutex); 3451 dqm_lock(dqm); 3452 3453 /* unmask queues that suspend or already suspended */ 3454 list_for_each_entry(q, &qpd->queues_list, list) { 3455 int q_idx = q_array_get_index(q->properties.queue_id, 3456 num_queues, 3457 queue_ids); 3458 3459 if (q_idx != QUEUE_NOT_FOUND) { 3460 int err = suspend_single_queue(dqm, pdd, q); 3461 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 3462 3463 if (!err) { 3464 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 3465 if (exception_clear_mask && is_mes) 3466 q->properties.exception_status &= 3467 ~exception_clear_mask; 3468 3469 if (is_mes) 3470 total_suspended++; 3471 else 3472 per_device_suspended++; 3473 } else if (err != -EBUSY) { 3474 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3475 break; 3476 } 3477 } 3478 } 3479 3480 if (!per_device_suspended) { 3481 dqm_unlock(dqm); 3482 mutex_unlock(&p->event_mutex); 3483 if (total_suspended) 3484 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3485 continue; 3486 } 3487 3488 r = execute_queues_cpsch(dqm, 3489 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3490 grace_period); 3491 3492 if (r) 3493 dev_err(dev, "Failed to suspend process queues.\n"); 3494 else 3495 total_suspended += per_device_suspended; 3496 3497 list_for_each_entry(q, &qpd->queues_list, list) { 3498 int q_idx = q_array_get_index(q->properties.queue_id, 3499 num_queues, queue_ids); 3500 3501 if (q_idx == QUEUE_NOT_FOUND) 3502 continue; 3503 3504 /* mask queue as error on suspend fail */ 3505 if (r) 3506 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3507 else if (exception_clear_mask) 3508 q->properties.exception_status &= 3509 ~exception_clear_mask; 3510 } 3511 3512 dqm_unlock(dqm); 3513 mutex_unlock(&p->event_mutex); 3514 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3515 } 3516 3517 if (total_suspended) { 3518 struct copy_context_work_handler_workarea copy_context_worker; 3519 3520 INIT_WORK_ONSTACK( 3521 ©_context_worker.copy_context_work, 3522 copy_context_work_handler); 3523 3524 copy_context_worker.p = p; 3525 3526 schedule_work(©_context_worker.copy_context_work); 3527 3528 3529 flush_work(©_context_worker.copy_context_work); 3530 destroy_work_on_stack(©_context_worker.copy_context_work); 3531 } 3532 3533 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3534 num_queues * sizeof(uint32_t))) 3535 pr_err("copy_to_user failed on queue suspend\n"); 3536 3537 kfree(queue_ids); 3538 3539 return total_suspended; 3540 } 3541 3542 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3543 { 3544 switch (q_props->type) { 3545 case KFD_QUEUE_TYPE_COMPUTE: 3546 return q_props->format == KFD_QUEUE_FORMAT_PM4 3547 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3548 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3549 case KFD_QUEUE_TYPE_SDMA: 3550 return KFD_IOC_QUEUE_TYPE_SDMA; 3551 case KFD_QUEUE_TYPE_SDMA_XGMI: 3552 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3553 default: 3554 WARN_ONCE(true, "queue type not recognized!"); 3555 return 0xffffffff; 3556 }; 3557 } 3558 3559 void set_queue_snapshot_entry(struct queue *q, 3560 uint64_t exception_clear_mask, 3561 struct kfd_queue_snapshot_entry *qss_entry) 3562 { 3563 qss_entry->ring_base_address = q->properties.queue_address; 3564 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3565 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3566 qss_entry->ctx_save_restore_address = 3567 q->properties.ctx_save_restore_area_address; 3568 qss_entry->ctx_save_restore_area_size = 3569 q->properties.ctx_save_restore_area_size; 3570 qss_entry->exception_status = q->properties.exception_status; 3571 qss_entry->queue_id = q->properties.queue_id; 3572 qss_entry->gpu_id = q->device->id; 3573 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3574 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3575 q->properties.exception_status &= ~exception_clear_mask; 3576 } 3577 3578 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3579 { 3580 struct device *dev = dqm->dev->adev->dev; 3581 int r; 3582 3583 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3584 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3585 return -EINVAL; 3586 } 3587 3588 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3589 return 0; 3590 3591 dqm_lock(dqm); 3592 3593 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3594 if (r) 3595 dqm_unlock(dqm); 3596 3597 return r; 3598 } 3599 3600 int debug_map_and_unlock(struct device_queue_manager *dqm) 3601 { 3602 struct device *dev = dqm->dev->adev->dev; 3603 int r; 3604 3605 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3606 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3607 return -EINVAL; 3608 } 3609 3610 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3611 return 0; 3612 3613 r = map_queues_cpsch(dqm); 3614 3615 dqm_unlock(dqm); 3616 3617 return r; 3618 } 3619 3620 int debug_refresh_runlist(struct device_queue_manager *dqm) 3621 { 3622 int r = debug_lock_and_unmap(dqm); 3623 3624 if (r) 3625 return r; 3626 3627 return debug_map_and_unlock(dqm); 3628 } 3629 3630 bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 3631 struct qcm_process_device *qpd, 3632 int doorbell_off, u32 *queue_format) 3633 { 3634 struct queue *q; 3635 bool r = false; 3636 3637 if (!queue_format) 3638 return r; 3639 3640 dqm_lock(dqm); 3641 3642 list_for_each_entry(q, &qpd->queues_list, list) { 3643 if (q->properties.doorbell_off == doorbell_off) { 3644 *queue_format = q->properties.format; 3645 r = true; 3646 goto out; 3647 } 3648 } 3649 3650 out: 3651 dqm_unlock(dqm); 3652 return r; 3653 } 3654 #if defined(CONFIG_DEBUG_FS) 3655 3656 static void seq_reg_dump(struct seq_file *m, 3657 uint32_t (*dump)[2], uint32_t n_regs) 3658 { 3659 uint32_t i, count; 3660 3661 for (i = 0, count = 0; i < n_regs; i++) { 3662 if (count == 0 || 3663 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3664 seq_printf(m, "%s %08x: %08x", 3665 i ? "\n" : "", 3666 dump[i][0], dump[i][1]); 3667 count = 7; 3668 } else { 3669 seq_printf(m, " %08x", dump[i][1]); 3670 count--; 3671 } 3672 } 3673 3674 seq_puts(m, "\n"); 3675 } 3676 3677 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3678 { 3679 struct device_queue_manager *dqm = data; 3680 uint32_t xcc_mask = dqm->dev->xcc_mask; 3681 uint32_t (*dump)[2], n_regs; 3682 int pipe, queue; 3683 int r = 0, xcc_id; 3684 uint32_t sdma_engine_start; 3685 3686 if (!dqm->sched_running) { 3687 seq_puts(m, " Device is stopped\n"); 3688 return 0; 3689 } 3690 3691 for_each_inst(xcc_id, xcc_mask) { 3692 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3693 KFD_CIK_HIQ_PIPE, 3694 KFD_CIK_HIQ_QUEUE, &dump, 3695 &n_regs, xcc_id); 3696 if (!r) { 3697 seq_printf( 3698 m, 3699 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3700 xcc_id, 3701 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3702 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3703 KFD_CIK_HIQ_QUEUE); 3704 seq_reg_dump(m, dump, n_regs); 3705 3706 kfree(dump); 3707 } 3708 3709 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3710 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3711 3712 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3713 if (!test_bit(pipe_offset + queue, 3714 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3715 continue; 3716 3717 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3718 pipe, queue, 3719 &dump, &n_regs, 3720 xcc_id); 3721 if (r) 3722 break; 3723 3724 seq_printf(m, 3725 " Inst %d, CP Pipe %d, Queue %d\n", 3726 xcc_id, pipe, queue); 3727 seq_reg_dump(m, dump, n_regs); 3728 3729 kfree(dump); 3730 } 3731 } 3732 } 3733 3734 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3735 for (pipe = sdma_engine_start; 3736 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3737 pipe++) { 3738 for (queue = 0; 3739 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3740 queue++) { 3741 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3742 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3743 if (r) 3744 break; 3745 3746 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3747 pipe, queue); 3748 seq_reg_dump(m, dump, n_regs); 3749 3750 kfree(dump); 3751 } 3752 } 3753 3754 return r; 3755 } 3756 3757 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3758 { 3759 int r = 0; 3760 3761 dqm_lock(dqm); 3762 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3763 if (r) { 3764 dqm_unlock(dqm); 3765 return r; 3766 } 3767 dqm->active_runlist = true; 3768 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3769 0, USE_DEFAULT_GRACE_PERIOD); 3770 dqm_unlock(dqm); 3771 3772 return r; 3773 } 3774 3775 #endif 3776