1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "amdgpu_sdma.h" 40 #include "mes_v11_api_def.h" 41 #include "kfd_debug.h" 42 43 /* Size of the per-pipe EOP queue */ 44 #define CIK_HPD_EOP_BYTES_LOG2 11 45 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 46 /* See unmap_queues_cpsch() */ 47 #define USE_DEFAULT_GRACE_PERIOD 0xffffffff 48 49 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 50 u32 pasid, unsigned int vmid); 51 52 static int execute_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param, 55 uint32_t grace_period); 56 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 57 enum kfd_unmap_queues_filter filter, 58 uint32_t filter_param, 59 uint32_t grace_period, 60 bool reset); 61 62 static int map_queues_cpsch(struct device_queue_manager *dqm); 63 64 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 65 struct queue *q); 66 67 static inline void deallocate_hqd(struct device_queue_manager *dqm, 68 struct queue *q); 69 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 70 static int allocate_sdma_queue(struct device_queue_manager *dqm, 71 struct queue *q, const uint32_t *restore_sdma_id); 72 73 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma); 74 75 static inline 76 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 77 { 78 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 79 return KFD_MQD_TYPE_SDMA; 80 return KFD_MQD_TYPE_CP; 81 } 82 83 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 84 { 85 int i; 86 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 87 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 88 89 /* queue is available for KFD usage if bit is 1 */ 90 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 91 if (test_bit(pipe_offset + i, 92 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 93 return true; 94 return false; 95 } 96 97 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 98 { 99 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 100 AMDGPU_MAX_QUEUES); 101 } 102 103 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 104 { 105 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 106 } 107 108 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 109 { 110 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 111 } 112 113 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 114 { 115 return kfd_get_num_sdma_engines(dqm->dev) + 116 kfd_get_num_xgmi_sdma_engines(dqm->dev); 117 } 118 119 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 120 { 121 return kfd_get_num_sdma_engines(dqm->dev) * 122 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 123 } 124 125 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 126 { 127 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 128 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 129 } 130 131 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 132 { 133 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 134 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 135 136 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 137 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 138 139 /* Mask out the reserved queues */ 140 bitmap_clear(dqm->sdma_bitmap, 0, kfd_get_num_sdma_engines(dqm->dev) * 141 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 142 bitmap_clear(dqm->xgmi_sdma_bitmap, 0, kfd_get_num_xgmi_sdma_engines(dqm->dev) * 143 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 144 } 145 146 void program_sh_mem_settings(struct device_queue_manager *dqm, 147 struct qcm_process_device *qpd) 148 { 149 uint32_t xcc_mask = dqm->dev->xcc_mask; 150 int xcc_id; 151 152 for_each_inst(xcc_id, xcc_mask) 153 dqm->dev->kfd2kgd->program_sh_mem_settings( 154 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 155 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 156 qpd->sh_mem_bases, xcc_id); 157 } 158 159 static void kfd_hws_hang(struct device_queue_manager *dqm) 160 { 161 struct device_process_node *cur; 162 struct qcm_process_device *qpd; 163 struct queue *q; 164 165 /* Mark all device queues as reset. */ 166 list_for_each_entry(cur, &dqm->queues, list) { 167 qpd = cur->qpd; 168 list_for_each_entry(q, &qpd->queues_list, list) { 169 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 170 171 pdd->has_reset_queue = true; 172 } 173 } 174 175 /* 176 * Issue a GPU reset if HWS is unresponsive 177 */ 178 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 179 } 180 181 static int convert_to_mes_queue_type(int queue_type) 182 { 183 int mes_queue_type; 184 185 switch (queue_type) { 186 case KFD_QUEUE_TYPE_COMPUTE: 187 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 188 break; 189 case KFD_QUEUE_TYPE_SDMA: 190 mes_queue_type = MES_QUEUE_TYPE_SDMA; 191 break; 192 default: 193 WARN(1, "Invalid queue type %d", queue_type); 194 mes_queue_type = -EINVAL; 195 break; 196 } 197 198 return mes_queue_type; 199 } 200 201 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 202 struct qcm_process_device *qpd) 203 { 204 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 205 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 206 struct mes_add_queue_input queue_input; 207 int r, queue_type; 208 uint64_t wptr_addr_off; 209 210 if (!dqm->sched_running || dqm->sched_halt) 211 return 0; 212 if (!down_read_trylock(&adev->reset_domain->sem)) 213 return -EIO; 214 215 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 216 queue_input.process_id = pdd->pasid; 217 queue_input.page_table_base_addr = qpd->page_table_base; 218 queue_input.process_va_start = 0; 219 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 220 /* MES unit for quantum is 100ns */ 221 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 222 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 223 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 224 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 225 queue_input.inprocess_gang_priority = q->properties.priority; 226 queue_input.gang_global_priority_level = 227 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 228 queue_input.doorbell_offset = q->properties.doorbell_off; 229 queue_input.mqd_addr = q->gart_mqd_addr; 230 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 231 232 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 233 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 234 235 queue_input.is_kfd_process = 1; 236 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 237 queue_input.queue_size = q->properties.queue_size >> 2; 238 239 queue_input.paging = false; 240 queue_input.tba_addr = qpd->tba_addr; 241 queue_input.tma_addr = qpd->tma_addr; 242 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 243 queue_input.skip_process_ctx_clear = 244 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 245 (qpd->pqm->process->debug_trap_enabled || 246 kfd_dbg_has_ttmps_always_setup(q->device)); 247 248 queue_type = convert_to_mes_queue_type(q->properties.type); 249 if (queue_type < 0) { 250 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 251 q->properties.type); 252 up_read(&adev->reset_domain->sem); 253 return -EINVAL; 254 } 255 queue_input.queue_type = (uint32_t)queue_type; 256 257 queue_input.exclusively_scheduled = q->properties.is_gws; 258 queue_input.sh_mem_config_data = qpd->sh_mem_config; 259 queue_input.vm_cntx_cntl = qpd->vm_cntx_cntl; 260 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 261 262 amdgpu_mes_lock(&adev->mes); 263 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 264 amdgpu_mes_unlock(&adev->mes); 265 up_read(&adev->reset_domain->sem); 266 if (r) { 267 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 268 q->properties.doorbell_off); 269 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 270 kfd_hws_hang(dqm); 271 } 272 273 return r; 274 } 275 276 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 277 struct qcm_process_device *qpd) 278 { 279 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 280 int r; 281 struct mes_remove_queue_input queue_input; 282 283 if (!dqm->sched_running || dqm->sched_halt) 284 return 0; 285 if (!down_read_trylock(&adev->reset_domain->sem)) 286 return -EIO; 287 288 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 289 queue_input.doorbell_offset = q->properties.doorbell_off; 290 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 291 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 292 293 amdgpu_mes_lock(&adev->mes); 294 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 295 amdgpu_mes_unlock(&adev->mes); 296 up_read(&adev->reset_domain->sem); 297 298 if (r) { 299 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 300 q->properties.doorbell_off); 301 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 302 kfd_hws_hang(dqm); 303 } 304 305 return r; 306 } 307 308 static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm) 309 { 310 struct device_process_node *cur; 311 struct device *dev = dqm->dev->adev->dev; 312 struct qcm_process_device *qpd; 313 struct queue *q; 314 int retval = 0; 315 316 list_for_each_entry(cur, &dqm->queues, list) { 317 qpd = cur->qpd; 318 list_for_each_entry(q, &qpd->queues_list, list) { 319 if (q->properties.is_active) { 320 retval = remove_queue_mes(dqm, q, qpd); 321 if (retval) { 322 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 323 __func__, 324 q->properties.queue_id, 325 dqm->dev->id); 326 return retval; 327 } 328 } 329 } 330 } 331 332 return retval; 333 } 334 335 static int add_all_kfd_queues_mes(struct device_queue_manager *dqm) 336 { 337 struct device_process_node *cur; 338 struct device *dev = dqm->dev->adev->dev; 339 struct qcm_process_device *qpd; 340 struct queue *q; 341 int retval = 0; 342 343 list_for_each_entry(cur, &dqm->queues, list) { 344 qpd = cur->qpd; 345 list_for_each_entry(q, &qpd->queues_list, list) { 346 if (!q->properties.is_active) 347 continue; 348 retval = add_queue_mes(dqm, q, qpd); 349 if (retval) { 350 dev_err(dev, "%s: Failed to add queue %d for dev %d", 351 __func__, 352 q->properties.queue_id, 353 dqm->dev->id); 354 return retval; 355 } 356 } 357 } 358 359 return retval; 360 } 361 362 static int suspend_all_queues_mes(struct device_queue_manager *dqm) 363 { 364 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 365 int r = 0; 366 367 if (!down_read_trylock(&adev->reset_domain->sem)) 368 return -EIO; 369 370 r = amdgpu_mes_suspend(adev); 371 up_read(&adev->reset_domain->sem); 372 373 if (r) { 374 dev_err(adev->dev, "failed to suspend gangs from MES\n"); 375 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 376 kfd_hws_hang(dqm); 377 } 378 379 return r; 380 } 381 382 static int resume_all_queues_mes(struct device_queue_manager *dqm) 383 { 384 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 385 int r = 0; 386 387 if (!down_read_trylock(&adev->reset_domain->sem)) 388 return -EIO; 389 390 r = amdgpu_mes_resume(adev); 391 up_read(&adev->reset_domain->sem); 392 393 if (r) { 394 dev_err(adev->dev, "failed to resume gangs from MES\n"); 395 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 396 kfd_hws_hang(dqm); 397 } 398 399 return r; 400 } 401 402 static void increment_queue_count(struct device_queue_manager *dqm, 403 struct qcm_process_device *qpd, 404 struct queue *q) 405 { 406 dqm->active_queue_count++; 407 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 408 dqm->active_cp_queue_count++; 409 410 if (q->properties.is_gws) { 411 dqm->gws_queue_count++; 412 qpd->mapped_gws_queue = true; 413 } 414 } 415 416 static void decrement_queue_count(struct device_queue_manager *dqm, 417 struct qcm_process_device *qpd, 418 struct queue *q) 419 { 420 dqm->active_queue_count--; 421 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 422 dqm->active_cp_queue_count--; 423 424 if (q->properties.is_gws) { 425 dqm->gws_queue_count--; 426 qpd->mapped_gws_queue = false; 427 } 428 } 429 430 /* 431 * Allocate a doorbell ID to this queue. 432 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 433 */ 434 static int allocate_doorbell(struct qcm_process_device *qpd, 435 struct queue *q, 436 uint32_t const *restore_id) 437 { 438 struct kfd_node *dev = qpd->dqm->dev; 439 440 if (!KFD_IS_SOC15(dev)) { 441 /* On pre-SOC15 chips we need to use the queue ID to 442 * preserve the user mode ABI. 443 */ 444 445 if (restore_id && *restore_id != q->properties.queue_id) 446 return -EINVAL; 447 448 q->doorbell_id = q->properties.queue_id; 449 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 450 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 451 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 452 * doorbell assignments based on the engine and queue id. 453 * The doobell index distance between RLC (2*i) and (2*i+1) 454 * for a SDMA engine is 512. 455 */ 456 457 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 458 459 /* 460 * q->properties.sdma_engine_id corresponds to the virtual 461 * sdma engine number. However, for doorbell allocation, 462 * we need the physical sdma engine id in order to get the 463 * correct doorbell offset. 464 */ 465 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 466 get_num_all_sdma_engines(qpd->dqm) + 467 q->properties.sdma_engine_id] 468 + (q->properties.sdma_queue_id & 1) 469 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 470 + (q->properties.sdma_queue_id >> 1); 471 472 if (restore_id && *restore_id != valid_id) 473 return -EINVAL; 474 q->doorbell_id = valid_id; 475 } else { 476 /* For CP queues on SOC15 */ 477 if (restore_id) { 478 if (*restore_id >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 479 return -EINVAL; 480 481 /* make sure that ID is free */ 482 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 483 return -EINVAL; 484 485 q->doorbell_id = *restore_id; 486 } else { 487 /* or reserve a free doorbell ID */ 488 unsigned int found; 489 490 found = find_first_zero_bit(qpd->doorbell_bitmap, 491 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 492 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 493 pr_debug("No doorbells available"); 494 return -EBUSY; 495 } 496 set_bit(found, qpd->doorbell_bitmap); 497 q->doorbell_id = found; 498 } 499 } 500 501 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 502 qpd->proc_doorbells, 503 q->doorbell_id, 504 dev->kfd->device_info.doorbell_size); 505 return 0; 506 } 507 508 static void deallocate_doorbell(struct qcm_process_device *qpd, 509 struct queue *q) 510 { 511 unsigned int old; 512 struct kfd_node *dev = qpd->dqm->dev; 513 514 if (!KFD_IS_SOC15(dev) || 515 q->properties.type == KFD_QUEUE_TYPE_SDMA || 516 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 517 return; 518 519 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 520 WARN_ON(!old); 521 } 522 523 static void program_trap_handler_settings(struct device_queue_manager *dqm, 524 struct qcm_process_device *qpd) 525 { 526 uint32_t xcc_mask = dqm->dev->xcc_mask; 527 int xcc_id; 528 529 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 530 for_each_inst(xcc_id, xcc_mask) 531 dqm->dev->kfd2kgd->program_trap_handler_settings( 532 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 533 qpd->tma_addr, xcc_id); 534 } 535 536 static int allocate_vmid(struct device_queue_manager *dqm, 537 struct qcm_process_device *qpd, 538 struct queue *q) 539 { 540 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 541 struct device *dev = dqm->dev->adev->dev; 542 int allocated_vmid = -1, i; 543 544 for (i = dqm->dev->vm_info.first_vmid_kfd; 545 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 546 if (!dqm->vmid_pasid[i]) { 547 allocated_vmid = i; 548 break; 549 } 550 } 551 552 if (allocated_vmid < 0) { 553 dev_err(dev, "no more vmid to allocate\n"); 554 return -ENOSPC; 555 } 556 557 pr_debug("vmid allocated: %d\n", allocated_vmid); 558 559 dqm->vmid_pasid[allocated_vmid] = pdd->pasid; 560 561 set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid); 562 563 qpd->vmid = allocated_vmid; 564 q->properties.vmid = allocated_vmid; 565 566 program_sh_mem_settings(dqm, qpd); 567 568 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 569 program_trap_handler_settings(dqm, qpd); 570 571 /* qpd->page_table_base is set earlier when register_process() 572 * is called, i.e. when the first queue is created. 573 */ 574 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 575 qpd->vmid, 576 qpd->page_table_base); 577 /* invalidate the VM context after pasid and vmid mapping is set up */ 578 kfd_flush_tlb(qpd_to_pdd(qpd)); 579 580 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 581 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 582 qpd->sh_hidden_private_base, qpd->vmid); 583 584 return 0; 585 } 586 587 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 588 struct qcm_process_device *qpd) 589 { 590 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 591 int ret; 592 593 if (!qpd->ib_kaddr) 594 return -ENOMEM; 595 596 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 597 if (ret) 598 return ret; 599 600 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 601 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 602 pmf->release_mem_size / sizeof(uint32_t)); 603 } 604 605 static void deallocate_vmid(struct device_queue_manager *dqm, 606 struct qcm_process_device *qpd, 607 struct queue *q) 608 { 609 struct device *dev = dqm->dev->adev->dev; 610 611 /* On GFX v7, CP doesn't flush TC at dequeue */ 612 if (q->device->adev->asic_type == CHIP_HAWAII) 613 if (flush_texture_cache_nocpsch(q->device, qpd)) 614 dev_err(dev, "Failed to flush TC\n"); 615 616 kfd_flush_tlb(qpd_to_pdd(qpd)); 617 618 /* Release the vmid mapping */ 619 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 620 dqm->vmid_pasid[qpd->vmid] = 0; 621 622 qpd->vmid = 0; 623 q->properties.vmid = 0; 624 } 625 626 static int create_queue_nocpsch(struct device_queue_manager *dqm, 627 struct queue *q, 628 struct qcm_process_device *qpd, 629 const struct kfd_criu_queue_priv_data *qd, 630 const void *restore_mqd, const void *restore_ctl_stack) 631 { 632 struct mqd_manager *mqd_mgr; 633 int retval; 634 635 dqm_lock(dqm); 636 637 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 638 pr_warn("Can't create new usermode queue because %d queues were already created\n", 639 dqm->total_queue_count); 640 retval = -EPERM; 641 goto out_unlock; 642 } 643 644 if (list_empty(&qpd->queues_list)) { 645 retval = allocate_vmid(dqm, qpd, q); 646 if (retval) 647 goto out_unlock; 648 } 649 q->properties.vmid = qpd->vmid; 650 /* 651 * Eviction state logic: mark all queues as evicted, even ones 652 * not currently active. Restoring inactive queues later only 653 * updates the is_evicted flag but is a no-op otherwise. 654 */ 655 q->properties.is_evicted = !!qpd->evicted; 656 657 q->properties.tba_addr = qpd->tba_addr; 658 q->properties.tma_addr = qpd->tma_addr; 659 660 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 661 q->properties.type)]; 662 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 663 retval = allocate_hqd(dqm, q); 664 if (retval) 665 goto deallocate_vmid; 666 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 667 q->pipe, q->queue); 668 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 669 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 670 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 671 if (retval) 672 goto deallocate_vmid; 673 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 674 } 675 676 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 677 if (retval) 678 goto out_deallocate_hqd; 679 680 /* Temporarily release dqm lock to avoid a circular lock dependency */ 681 dqm_unlock(dqm); 682 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 683 dqm_lock(dqm); 684 685 if (!q->mqd_mem_obj) { 686 retval = -ENOMEM; 687 goto out_deallocate_doorbell; 688 } 689 690 if (qd) 691 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 692 &q->properties, restore_mqd, restore_ctl_stack, 693 qd->ctl_stack_size); 694 else 695 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 696 &q->gart_mqd_addr, &q->properties); 697 698 if (q->properties.is_active) { 699 if (!dqm->sched_running) { 700 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 701 goto add_queue_to_list; 702 } 703 704 if (WARN(q->process->mm != current->mm, 705 "should only run in user thread")) 706 retval = -EFAULT; 707 else 708 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 709 q->queue, &q->properties, current->mm); 710 if (retval) 711 goto out_free_mqd; 712 } 713 714 add_queue_to_list: 715 list_add(&q->list, &qpd->queues_list); 716 qpd->queue_count++; 717 if (q->properties.is_active) 718 increment_queue_count(dqm, qpd, q); 719 720 /* 721 * Unconditionally increment this counter, regardless of the queue's 722 * type or whether the queue is active. 723 */ 724 dqm->total_queue_count++; 725 pr_debug("Total of %d queues are accountable so far\n", 726 dqm->total_queue_count); 727 goto out_unlock; 728 729 out_free_mqd: 730 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 731 out_deallocate_doorbell: 732 deallocate_doorbell(qpd, q); 733 out_deallocate_hqd: 734 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 735 deallocate_hqd(dqm, q); 736 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 737 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 738 deallocate_sdma_queue(dqm, q); 739 deallocate_vmid: 740 if (list_empty(&qpd->queues_list)) 741 deallocate_vmid(dqm, qpd, q); 742 out_unlock: 743 dqm_unlock(dqm); 744 return retval; 745 } 746 747 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 748 { 749 bool set; 750 int pipe, bit, i; 751 752 set = false; 753 754 for (pipe = dqm->next_pipe_to_allocate, i = 0; 755 i < get_pipes_per_mec(dqm); 756 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 757 758 if (!is_pipe_enabled(dqm, 0, pipe)) 759 continue; 760 761 if (dqm->allocated_queues[pipe] != 0) { 762 bit = ffs(dqm->allocated_queues[pipe]) - 1; 763 dqm->allocated_queues[pipe] &= ~(1 << bit); 764 q->pipe = pipe; 765 q->queue = bit; 766 set = true; 767 break; 768 } 769 } 770 771 if (!set) 772 return -EBUSY; 773 774 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 775 /* horizontal hqd allocation */ 776 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 777 778 return 0; 779 } 780 781 static inline void deallocate_hqd(struct device_queue_manager *dqm, 782 struct queue *q) 783 { 784 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 785 } 786 787 #define SQ_IND_CMD_CMD_KILL 0x00000003 788 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 789 790 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 791 { 792 int status = 0; 793 unsigned int vmid; 794 uint16_t queried_pasid; 795 union SQ_CMD_BITS reg_sq_cmd; 796 union GRBM_GFX_INDEX_BITS reg_gfx_index; 797 struct kfd_process_device *pdd; 798 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 799 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 800 uint32_t xcc_mask = dev->xcc_mask; 801 int xcc_id; 802 803 reg_sq_cmd.u32All = 0; 804 reg_gfx_index.u32All = 0; 805 806 pr_debug("Killing all process wavefronts\n"); 807 808 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 809 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 810 return -EOPNOTSUPP; 811 } 812 813 /* taking the VMID for that process on the safe way using PDD */ 814 pdd = kfd_get_process_device_data(dev, p); 815 if (!pdd) 816 return -EFAULT; 817 818 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 819 * ATC_VMID15_PASID_MAPPING 820 * to check which VMID the current process is mapped to. 821 */ 822 823 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 824 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 825 (dev->adev, vmid, &queried_pasid); 826 827 if (status && queried_pasid == pdd->pasid) { 828 pr_debug("Killing wave fronts of vmid %d and process pid %d\n", 829 vmid, p->lead_thread->pid); 830 break; 831 } 832 } 833 834 if (vmid > last_vmid_to_scan) { 835 dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n", 836 p->lead_thread->pid); 837 return -EFAULT; 838 } 839 840 reg_gfx_index.bits.sh_broadcast_writes = 1; 841 reg_gfx_index.bits.se_broadcast_writes = 1; 842 reg_gfx_index.bits.instance_broadcast_writes = 1; 843 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 844 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 845 reg_sq_cmd.bits.vm_id = vmid; 846 847 for_each_inst(xcc_id, xcc_mask) 848 dev->kfd2kgd->wave_control_execute( 849 dev->adev, reg_gfx_index.u32All, 850 reg_sq_cmd.u32All, xcc_id); 851 852 return 0; 853 } 854 855 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 856 * to avoid asynchronized access 857 */ 858 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 859 struct qcm_process_device *qpd, 860 struct queue *q) 861 { 862 int retval; 863 struct mqd_manager *mqd_mgr; 864 865 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 866 867 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 868 deallocate_hqd(dqm, q); 869 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 870 deallocate_sdma_queue(dqm, q); 871 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 872 deallocate_sdma_queue(dqm, q); 873 else { 874 pr_debug("q->properties.type %d is invalid\n", 875 q->properties.type); 876 return -EINVAL; 877 } 878 dqm->total_queue_count--; 879 880 deallocate_doorbell(qpd, q); 881 882 if (!dqm->sched_running) { 883 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 884 return 0; 885 } 886 887 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 888 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 889 KFD_UNMAP_LATENCY_MS, 890 q->pipe, q->queue); 891 if (retval == -ETIME) 892 qpd->reset_wavefronts = true; 893 894 list_del(&q->list); 895 if (list_empty(&qpd->queues_list)) { 896 if (qpd->reset_wavefronts) { 897 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 898 dqm->dev); 899 /* dbgdev_wave_reset_wavefronts has to be called before 900 * deallocate_vmid(), i.e. when vmid is still in use. 901 */ 902 dbgdev_wave_reset_wavefronts(dqm->dev, 903 qpd->pqm->process); 904 qpd->reset_wavefronts = false; 905 } 906 907 deallocate_vmid(dqm, qpd, q); 908 } 909 qpd->queue_count--; 910 if (q->properties.is_active) 911 decrement_queue_count(dqm, qpd, q); 912 913 return retval; 914 } 915 916 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 917 struct qcm_process_device *qpd, 918 struct queue *q) 919 { 920 int retval; 921 uint64_t sdma_val = 0; 922 struct device *dev = dqm->dev->adev->dev; 923 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 924 struct mqd_manager *mqd_mgr = 925 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 926 927 /* Get the SDMA queue stats */ 928 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 929 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 930 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 931 &sdma_val); 932 if (retval) 933 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 934 q->properties.queue_id); 935 } 936 937 dqm_lock(dqm); 938 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 939 if (!retval) 940 pdd->sdma_past_activity_counter += sdma_val; 941 dqm_unlock(dqm); 942 943 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 944 945 return retval; 946 } 947 948 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 949 struct mqd_update_info *minfo) 950 { 951 int retval = 0; 952 struct device *dev = dqm->dev->adev->dev; 953 struct mqd_manager *mqd_mgr; 954 struct kfd_process_device *pdd; 955 bool prev_active = false; 956 957 dqm_lock(dqm); 958 pdd = kfd_get_process_device_data(q->device, q->process); 959 if (!pdd) { 960 retval = -ENODEV; 961 goto out_unlock; 962 } 963 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 964 q->properties.type)]; 965 966 /* Save previous activity state for counters */ 967 prev_active = q->properties.is_active; 968 969 /* Make sure the queue is unmapped before updating the MQD */ 970 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 971 if (!dqm->dev->kfd->shared_resources.enable_mes) 972 retval = unmap_queues_cpsch(dqm, 973 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 974 else if (prev_active) 975 retval = remove_queue_mes(dqm, q, &pdd->qpd); 976 977 /* queue is reset so inaccessable */ 978 if (pdd->has_reset_queue) { 979 retval = -EACCES; 980 goto out_unlock; 981 } 982 983 if (retval) { 984 dev_err(dev, "unmap queue failed\n"); 985 goto out_unlock; 986 } 987 } else if (prev_active && 988 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 989 q->properties.type == KFD_QUEUE_TYPE_SDMA || 990 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 991 992 if (!dqm->sched_running) { 993 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 994 goto out_unlock; 995 } 996 997 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 998 (dqm->dev->kfd->cwsr_enabled ? 999 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1000 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1001 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1002 if (retval) { 1003 dev_err(dev, "destroy mqd failed\n"); 1004 goto out_unlock; 1005 } 1006 } 1007 1008 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 1009 1010 /* 1011 * check active state vs. the previous state and modify 1012 * counter accordingly. map_queues_cpsch uses the 1013 * dqm->active_queue_count to determine whether a new runlist must be 1014 * uploaded. 1015 */ 1016 if (q->properties.is_active && !prev_active) { 1017 increment_queue_count(dqm, &pdd->qpd, q); 1018 } else if (!q->properties.is_active && prev_active) { 1019 decrement_queue_count(dqm, &pdd->qpd, q); 1020 } else if (q->gws && !q->properties.is_gws) { 1021 if (q->properties.is_active) { 1022 dqm->gws_queue_count++; 1023 pdd->qpd.mapped_gws_queue = true; 1024 } 1025 q->properties.is_gws = true; 1026 } else if (!q->gws && q->properties.is_gws) { 1027 if (q->properties.is_active) { 1028 dqm->gws_queue_count--; 1029 pdd->qpd.mapped_gws_queue = false; 1030 } 1031 q->properties.is_gws = false; 1032 } 1033 1034 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 1035 if (!dqm->dev->kfd->shared_resources.enable_mes) 1036 retval = map_queues_cpsch(dqm); 1037 else if (q->properties.is_active) 1038 retval = add_queue_mes(dqm, q, &pdd->qpd); 1039 } else if (q->properties.is_active && 1040 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 1041 q->properties.type == KFD_QUEUE_TYPE_SDMA || 1042 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1043 if (WARN(q->process->mm != current->mm, 1044 "should only run in user thread")) 1045 retval = -EFAULT; 1046 else 1047 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 1048 q->pipe, q->queue, 1049 &q->properties, current->mm); 1050 } 1051 1052 out_unlock: 1053 dqm_unlock(dqm); 1054 return retval; 1055 } 1056 1057 /* suspend_single_queue does not lock the dqm like the 1058 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 1059 * lock the dqm before calling, and unlock after calling. 1060 * 1061 * The reason we don't lock the dqm is because this function may be 1062 * called on multiple queues in a loop, so rather than locking/unlocking 1063 * multiple times, we will just keep the dqm locked for all of the calls. 1064 */ 1065 static int suspend_single_queue(struct device_queue_manager *dqm, 1066 struct kfd_process_device *pdd, 1067 struct queue *q) 1068 { 1069 bool is_new; 1070 1071 if (q->properties.is_suspended) 1072 return 0; 1073 1074 pr_debug("Suspending process pid %d queue [%i]\n", 1075 pdd->process->lead_thread->pid, 1076 q->properties.queue_id); 1077 1078 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 1079 1080 if (is_new || q->properties.is_being_destroyed) { 1081 pr_debug("Suspend: skip %s queue id %i\n", 1082 is_new ? "new" : "destroyed", 1083 q->properties.queue_id); 1084 return -EBUSY; 1085 } 1086 1087 q->properties.is_suspended = true; 1088 if (q->properties.is_active) { 1089 if (dqm->dev->kfd->shared_resources.enable_mes) { 1090 int r = remove_queue_mes(dqm, q, &pdd->qpd); 1091 1092 if (r) 1093 return r; 1094 } 1095 1096 decrement_queue_count(dqm, &pdd->qpd, q); 1097 q->properties.is_active = false; 1098 } 1099 1100 return 0; 1101 } 1102 1103 /* resume_single_queue does not lock the dqm like the functions 1104 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1105 * lock the dqm before calling, and unlock after calling. 1106 * 1107 * The reason we don't lock the dqm is because this function may be 1108 * called on multiple queues in a loop, so rather than locking/unlocking 1109 * multiple times, we will just keep the dqm locked for all of the calls. 1110 */ 1111 static int resume_single_queue(struct device_queue_manager *dqm, 1112 struct qcm_process_device *qpd, 1113 struct queue *q) 1114 { 1115 struct kfd_process_device *pdd; 1116 1117 if (!q->properties.is_suspended) 1118 return 0; 1119 1120 pdd = qpd_to_pdd(qpd); 1121 1122 pr_debug("Restoring from suspend process pid %d queue [%i]\n", 1123 pdd->process->lead_thread->pid, 1124 q->properties.queue_id); 1125 1126 q->properties.is_suspended = false; 1127 1128 if (QUEUE_IS_ACTIVE(q->properties)) { 1129 if (dqm->dev->kfd->shared_resources.enable_mes) { 1130 int r = add_queue_mes(dqm, q, &pdd->qpd); 1131 1132 if (r) 1133 return r; 1134 } 1135 1136 q->properties.is_active = true; 1137 increment_queue_count(dqm, qpd, q); 1138 } 1139 1140 return 0; 1141 } 1142 1143 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1144 struct qcm_process_device *qpd) 1145 { 1146 struct queue *q; 1147 struct mqd_manager *mqd_mgr; 1148 struct kfd_process_device *pdd; 1149 int retval, ret = 0; 1150 1151 dqm_lock(dqm); 1152 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1153 goto out; 1154 1155 pdd = qpd_to_pdd(qpd); 1156 pr_debug_ratelimited("Evicting process pid %d queues\n", 1157 pdd->process->lead_thread->pid); 1158 1159 pdd->last_evict_timestamp = get_jiffies_64(); 1160 /* Mark all queues as evicted. Deactivate all active queues on 1161 * the qpd. 1162 */ 1163 list_for_each_entry(q, &qpd->queues_list, list) { 1164 q->properties.is_evicted = true; 1165 if (!q->properties.is_active) 1166 continue; 1167 1168 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1169 q->properties.type)]; 1170 q->properties.is_active = false; 1171 decrement_queue_count(dqm, qpd, q); 1172 1173 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1174 continue; 1175 1176 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1177 (dqm->dev->kfd->cwsr_enabled ? 1178 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1179 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1180 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1181 if (retval && !ret) 1182 /* Return the first error, but keep going to 1183 * maintain a consistent eviction state 1184 */ 1185 ret = retval; 1186 } 1187 1188 out: 1189 dqm_unlock(dqm); 1190 return ret; 1191 } 1192 1193 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1194 struct qcm_process_device *qpd) 1195 { 1196 struct queue *q; 1197 struct device *dev = dqm->dev->adev->dev; 1198 struct kfd_process_device *pdd; 1199 int retval = 0; 1200 1201 dqm_lock(dqm); 1202 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1203 goto out; 1204 1205 pdd = qpd_to_pdd(qpd); 1206 1207 /* The debugger creates processes that temporarily have not acquired 1208 * all VMs for all devices and has no VMs itself. 1209 * Skip queue eviction on process eviction. 1210 */ 1211 if (!pdd->drm_priv) 1212 goto out; 1213 1214 pr_debug_ratelimited("Evicting process pid %d queues\n", 1215 pdd->process->lead_thread->pid); 1216 1217 if (dqm->dev->kfd->shared_resources.enable_mes) 1218 pdd->last_evict_timestamp = get_jiffies_64(); 1219 1220 /* Mark all queues as evicted. Deactivate all active queues on 1221 * the qpd. 1222 */ 1223 list_for_each_entry(q, &qpd->queues_list, list) { 1224 q->properties.is_evicted = true; 1225 if (!q->properties.is_active) 1226 continue; 1227 1228 q->properties.is_active = false; 1229 decrement_queue_count(dqm, qpd, q); 1230 1231 if (dqm->dev->kfd->shared_resources.enable_mes) { 1232 retval = remove_queue_mes(dqm, q, qpd); 1233 if (retval) { 1234 dev_err(dev, "Failed to evict queue %d\n", 1235 q->properties.queue_id); 1236 goto out; 1237 } 1238 } 1239 } 1240 1241 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1242 pdd->last_evict_timestamp = get_jiffies_64(); 1243 retval = execute_queues_cpsch(dqm, 1244 qpd->is_debug ? 1245 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1246 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1247 USE_DEFAULT_GRACE_PERIOD); 1248 } 1249 1250 out: 1251 dqm_unlock(dqm); 1252 return retval; 1253 } 1254 1255 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1256 struct qcm_process_device *qpd) 1257 { 1258 struct mm_struct *mm = NULL; 1259 struct queue *q; 1260 struct mqd_manager *mqd_mgr; 1261 struct kfd_process_device *pdd; 1262 uint64_t pd_base; 1263 uint64_t eviction_duration; 1264 int retval, ret = 0; 1265 1266 pdd = qpd_to_pdd(qpd); 1267 /* Retrieve PD base */ 1268 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1269 1270 dqm_lock(dqm); 1271 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1272 goto out; 1273 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1274 qpd->evicted--; 1275 goto out; 1276 } 1277 1278 pr_debug_ratelimited("Restoring process pid %d queues\n", 1279 pdd->process->lead_thread->pid); 1280 1281 /* Update PD Base in QPD */ 1282 qpd->page_table_base = pd_base; 1283 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1284 1285 if (!list_empty(&qpd->queues_list)) { 1286 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1287 dqm->dev->adev, 1288 qpd->vmid, 1289 qpd->page_table_base); 1290 kfd_flush_tlb(pdd); 1291 } 1292 1293 /* Take a safe reference to the mm_struct, which may otherwise 1294 * disappear even while the kfd_process is still referenced. 1295 */ 1296 mm = get_task_mm(pdd->process->lead_thread); 1297 if (!mm) { 1298 ret = -EFAULT; 1299 goto out; 1300 } 1301 1302 /* Remove the eviction flags. Activate queues that are not 1303 * inactive for other reasons. 1304 */ 1305 list_for_each_entry(q, &qpd->queues_list, list) { 1306 q->properties.is_evicted = false; 1307 if (!QUEUE_IS_ACTIVE(q->properties)) 1308 continue; 1309 1310 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1311 q->properties.type)]; 1312 q->properties.is_active = true; 1313 increment_queue_count(dqm, qpd, q); 1314 1315 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1316 continue; 1317 1318 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1319 q->queue, &q->properties, mm); 1320 if (retval && !ret) 1321 /* Return the first error, but keep going to 1322 * maintain a consistent eviction state 1323 */ 1324 ret = retval; 1325 } 1326 qpd->evicted = 0; 1327 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1328 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1329 out: 1330 if (mm) 1331 mmput(mm); 1332 dqm_unlock(dqm); 1333 return ret; 1334 } 1335 1336 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1337 struct qcm_process_device *qpd) 1338 { 1339 struct queue *q; 1340 struct device *dev = dqm->dev->adev->dev; 1341 struct kfd_process_device *pdd; 1342 uint64_t eviction_duration; 1343 int retval = 0; 1344 1345 pdd = qpd_to_pdd(qpd); 1346 1347 dqm_lock(dqm); 1348 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1349 goto out; 1350 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1351 qpd->evicted--; 1352 goto out; 1353 } 1354 1355 /* The debugger creates processes that temporarily have not acquired 1356 * all VMs for all devices and has no VMs itself. 1357 * Skip queue restore on process restore. 1358 */ 1359 if (!pdd->drm_priv) 1360 goto vm_not_acquired; 1361 1362 pr_debug_ratelimited("Restoring process pid %d queues\n", 1363 pdd->process->lead_thread->pid); 1364 1365 /* Update PD Base in QPD */ 1366 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1367 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1368 1369 /* activate all active queues on the qpd */ 1370 list_for_each_entry(q, &qpd->queues_list, list) { 1371 q->properties.is_evicted = false; 1372 if (!QUEUE_IS_ACTIVE(q->properties)) 1373 continue; 1374 1375 q->properties.is_active = true; 1376 increment_queue_count(dqm, &pdd->qpd, q); 1377 1378 if (dqm->dev->kfd->shared_resources.enable_mes) { 1379 retval = add_queue_mes(dqm, q, qpd); 1380 if (retval) { 1381 dev_err(dev, "Failed to restore queue %d\n", 1382 q->properties.queue_id); 1383 goto out; 1384 } 1385 } 1386 } 1387 if (!dqm->dev->kfd->shared_resources.enable_mes) 1388 retval = execute_queues_cpsch(dqm, 1389 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1390 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1391 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1392 vm_not_acquired: 1393 qpd->evicted = 0; 1394 out: 1395 dqm_unlock(dqm); 1396 return retval; 1397 } 1398 1399 static int register_process(struct device_queue_manager *dqm, 1400 struct qcm_process_device *qpd) 1401 { 1402 struct device_process_node *n; 1403 struct kfd_process_device *pdd; 1404 uint64_t pd_base; 1405 int retval; 1406 1407 n = kzalloc_obj(*n); 1408 if (!n) 1409 return -ENOMEM; 1410 1411 n->qpd = qpd; 1412 1413 pdd = qpd_to_pdd(qpd); 1414 /* Retrieve PD base */ 1415 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1416 1417 dqm_lock(dqm); 1418 list_add(&n->list, &dqm->queues); 1419 1420 /* Update PD Base in QPD */ 1421 qpd->page_table_base = pd_base; 1422 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1423 1424 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1425 1426 dqm->processes_count++; 1427 1428 dqm_unlock(dqm); 1429 1430 /* Outside the DQM lock because under the DQM lock we can't do 1431 * reclaim or take other locks that others hold while reclaiming. 1432 */ 1433 kfd_inc_compute_active(dqm->dev); 1434 1435 return retval; 1436 } 1437 1438 static int unregister_process(struct device_queue_manager *dqm, 1439 struct qcm_process_device *qpd) 1440 { 1441 int retval = 0; 1442 struct device_process_node *cur, *next; 1443 1444 pr_debug("qpd->queues_list is %s\n", 1445 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1446 1447 dqm_lock(dqm); 1448 1449 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1450 if (qpd == cur->qpd) { 1451 list_del(&cur->list); 1452 kfree(cur); 1453 dqm->processes_count--; 1454 goto out; 1455 } 1456 } 1457 /* qpd not found in dqm list */ 1458 retval = 1; 1459 out: 1460 dqm_unlock(dqm); 1461 1462 /* Outside the DQM lock because under the DQM lock we can't do 1463 * reclaim or take other locks that others hold while reclaiming. 1464 */ 1465 if (!retval) 1466 kfd_dec_compute_active(dqm->dev); 1467 1468 return retval; 1469 } 1470 1471 static int 1472 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1473 unsigned int vmid) 1474 { 1475 uint32_t xcc_mask = dqm->dev->xcc_mask; 1476 int xcc_id, ret = 0; 1477 1478 for_each_inst(xcc_id, xcc_mask) { 1479 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1480 dqm->dev->adev, pasid, vmid, xcc_id); 1481 if (ret) 1482 break; 1483 } 1484 1485 return ret; 1486 } 1487 1488 static void init_interrupts(struct device_queue_manager *dqm) 1489 { 1490 uint32_t xcc_mask = dqm->dev->xcc_mask; 1491 unsigned int i, xcc_id; 1492 1493 for_each_inst(xcc_id, xcc_mask) { 1494 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1495 if (is_pipe_enabled(dqm, 0, i)) { 1496 dqm->dev->kfd2kgd->init_interrupts( 1497 dqm->dev->adev, i, xcc_id); 1498 } 1499 } 1500 } 1501 } 1502 1503 static int initialize_nocpsch(struct device_queue_manager *dqm) 1504 { 1505 int pipe, queue; 1506 1507 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1508 1509 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1510 sizeof(unsigned int), GFP_KERNEL); 1511 if (!dqm->allocated_queues) 1512 return -ENOMEM; 1513 1514 mutex_init(&dqm->lock_hidden); 1515 INIT_LIST_HEAD(&dqm->queues); 1516 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1517 dqm->active_cp_queue_count = 0; 1518 dqm->gws_queue_count = 0; 1519 1520 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1521 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1522 1523 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1524 if (test_bit(pipe_offset + queue, 1525 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1526 dqm->allocated_queues[pipe] |= 1 << queue; 1527 } 1528 1529 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1530 1531 init_sdma_bitmaps(dqm); 1532 1533 return 0; 1534 } 1535 1536 static void uninitialize(struct device_queue_manager *dqm) 1537 { 1538 int i; 1539 1540 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1541 1542 kfree(dqm->allocated_queues); 1543 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1544 kfree(dqm->mqd_mgrs[i]); 1545 mutex_destroy(&dqm->lock_hidden); 1546 } 1547 1548 static int start_nocpsch(struct device_queue_manager *dqm) 1549 { 1550 int r = 0; 1551 1552 pr_info("SW scheduler is used"); 1553 init_interrupts(dqm); 1554 1555 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1556 r = pm_init(&dqm->packet_mgr, dqm); 1557 if (!r) 1558 dqm->sched_running = true; 1559 1560 return r; 1561 } 1562 1563 static int stop_nocpsch(struct device_queue_manager *dqm) 1564 { 1565 dqm_lock(dqm); 1566 if (!dqm->sched_running) { 1567 dqm_unlock(dqm); 1568 return 0; 1569 } 1570 1571 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1572 pm_uninit(&dqm->packet_mgr); 1573 dqm->sched_running = false; 1574 dqm_unlock(dqm); 1575 1576 return 0; 1577 } 1578 1579 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1580 struct queue *q, const uint32_t *restore_sdma_id) 1581 { 1582 struct device *dev = dqm->dev->adev->dev; 1583 int bit; 1584 1585 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1586 if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) { 1587 dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n", 1588 get_num_sdma_queues(dqm)); 1589 return -ENOMEM; 1590 } 1591 1592 if (restore_sdma_id) { 1593 if (*restore_sdma_id >= get_num_sdma_queues(dqm)) 1594 return -EINVAL; 1595 1596 /* Re-use existing sdma_id */ 1597 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1598 dev_err(dev, "SDMA queue already in use\n"); 1599 return -EBUSY; 1600 } 1601 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1602 q->sdma_id = *restore_sdma_id; 1603 } else { 1604 /* Find first available sdma_id */ 1605 bit = find_first_bit(dqm->sdma_bitmap, 1606 get_num_sdma_queues(dqm)); 1607 clear_bit(bit, dqm->sdma_bitmap); 1608 q->sdma_id = bit; 1609 } 1610 1611 q->properties.sdma_engine_id = 1612 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1613 q->properties.sdma_queue_id = q->sdma_id / 1614 kfd_get_num_sdma_engines(dqm->dev); 1615 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1616 if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) { 1617 dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n", 1618 get_num_xgmi_sdma_queues(dqm)); 1619 return -ENOMEM; 1620 } 1621 if (restore_sdma_id) { 1622 if (*restore_sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1623 return -EINVAL; 1624 1625 /* Re-use existing sdma_id */ 1626 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1627 dev_err(dev, "SDMA queue already in use\n"); 1628 return -EBUSY; 1629 } 1630 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1631 q->sdma_id = *restore_sdma_id; 1632 } else { 1633 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1634 get_num_xgmi_sdma_queues(dqm)); 1635 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1636 q->sdma_id = bit; 1637 } 1638 /* sdma_engine_id is sdma id including 1639 * both PCIe-optimized SDMAs and XGMI- 1640 * optimized SDMAs. The calculation below 1641 * assumes the first N engines are always 1642 * PCIe-optimized ones 1643 */ 1644 q->properties.sdma_engine_id = 1645 kfd_get_num_sdma_engines(dqm->dev) + 1646 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1647 q->properties.sdma_queue_id = q->sdma_id / 1648 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1649 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1650 int i, num_queues, num_engines, eng_offset = 0, start_engine; 1651 bool free_bit_found = false, is_xgmi = false; 1652 1653 if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { 1654 num_queues = get_num_sdma_queues(dqm); 1655 num_engines = kfd_get_num_sdma_engines(dqm->dev); 1656 q->properties.type = KFD_QUEUE_TYPE_SDMA; 1657 } else { 1658 num_queues = get_num_xgmi_sdma_queues(dqm); 1659 num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); 1660 eng_offset = kfd_get_num_sdma_engines(dqm->dev); 1661 q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; 1662 is_xgmi = true; 1663 } 1664 1665 /* Scan available bit based on target engine ID. */ 1666 start_engine = q->properties.sdma_engine_id - eng_offset; 1667 for (i = start_engine; i < num_queues; i += num_engines) { 1668 1669 if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) 1670 continue; 1671 1672 clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); 1673 q->sdma_id = i; 1674 q->properties.sdma_queue_id = q->sdma_id / num_engines; 1675 free_bit_found = true; 1676 break; 1677 } 1678 1679 if (!free_bit_found) { 1680 dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n", 1681 q->properties.sdma_engine_id, num_queues); 1682 return -ENOMEM; 1683 } 1684 } 1685 1686 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1687 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1688 1689 return 0; 1690 } 1691 1692 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1693 struct queue *q) 1694 { 1695 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1696 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1697 return; 1698 set_bit(q->sdma_id, dqm->sdma_bitmap); 1699 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1700 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1701 return; 1702 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1703 } 1704 } 1705 1706 /* 1707 * Device Queue Manager implementation for cp scheduler 1708 */ 1709 1710 static int set_sched_resources(struct device_queue_manager *dqm) 1711 { 1712 int i, mec; 1713 struct scheduling_resources res; 1714 struct device *dev = dqm->dev->adev->dev; 1715 1716 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1717 1718 res.queue_mask = 0; 1719 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1720 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1721 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1722 1723 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1724 continue; 1725 1726 /* only acquire queues from the first MEC */ 1727 if (mec > 0) 1728 continue; 1729 1730 /* This situation may be hit in the future if a new HW 1731 * generation exposes more than 64 queues. If so, the 1732 * definition of res.queue_mask needs updating 1733 */ 1734 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1735 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1736 break; 1737 } 1738 1739 res.queue_mask |= 1ull 1740 << amdgpu_queue_mask_bit_to_set_resource_bit( 1741 dqm->dev->adev, i); 1742 } 1743 res.gws_mask = ~0ull; 1744 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1745 1746 pr_debug("Scheduling resources:\n" 1747 "vmid mask: 0x%8X\n" 1748 "queue mask: 0x%8llX\n", 1749 res.vmid_mask, res.queue_mask); 1750 1751 return pm_send_set_resources(&dqm->packet_mgr, &res); 1752 } 1753 1754 static int initialize_cpsch(struct device_queue_manager *dqm) 1755 { 1756 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1757 1758 mutex_init(&dqm->lock_hidden); 1759 INIT_LIST_HEAD(&dqm->queues); 1760 dqm->active_queue_count = dqm->processes_count = 0; 1761 dqm->active_cp_queue_count = 0; 1762 dqm->gws_queue_count = 0; 1763 dqm->active_runlist = false; 1764 dqm->trap_debug_vmid = 0; 1765 1766 init_sdma_bitmaps(dqm); 1767 1768 update_dqm_wait_times(dqm); 1769 return 0; 1770 } 1771 1772 /* halt_cpsch: 1773 * Unmap queues so the schedule doesn't continue remaining jobs in the queue. 1774 * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch 1775 * is called. 1776 */ 1777 static int halt_cpsch(struct device_queue_manager *dqm) 1778 { 1779 int ret = 0; 1780 1781 dqm_lock(dqm); 1782 if (!dqm->sched_running) { 1783 dqm_unlock(dqm); 1784 return 0; 1785 } 1786 1787 WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); 1788 1789 if (!dqm->is_hws_hang) { 1790 if (!dqm->dev->kfd->shared_resources.enable_mes) 1791 ret = unmap_queues_cpsch(dqm, 1792 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1793 USE_DEFAULT_GRACE_PERIOD, false); 1794 else 1795 ret = remove_all_kfd_queues_mes(dqm); 1796 } 1797 dqm->sched_halt = true; 1798 dqm_unlock(dqm); 1799 1800 return ret; 1801 } 1802 1803 /* unhalt_cpsch 1804 * Unset dqm->sched_halt and map queues back to runlist 1805 */ 1806 static int unhalt_cpsch(struct device_queue_manager *dqm) 1807 { 1808 int ret = 0; 1809 1810 dqm_lock(dqm); 1811 if (!dqm->sched_running || !dqm->sched_halt) { 1812 WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); 1813 dqm_unlock(dqm); 1814 return 0; 1815 } 1816 dqm->sched_halt = false; 1817 if (!dqm->dev->kfd->shared_resources.enable_mes) 1818 ret = execute_queues_cpsch(dqm, 1819 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 1820 0, USE_DEFAULT_GRACE_PERIOD); 1821 else 1822 ret = add_all_kfd_queues_mes(dqm); 1823 1824 dqm_unlock(dqm); 1825 1826 return ret; 1827 } 1828 1829 static int start_cpsch(struct device_queue_manager *dqm) 1830 { 1831 struct device *dev = dqm->dev->adev->dev; 1832 int retval, num_hw_queue_slots; 1833 1834 dqm_lock(dqm); 1835 1836 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1837 retval = pm_init(&dqm->packet_mgr, dqm); 1838 if (retval) 1839 goto fail_packet_manager_init; 1840 1841 retval = set_sched_resources(dqm); 1842 if (retval) 1843 goto fail_set_sched_resources; 1844 } 1845 pr_debug("Allocating fence memory\n"); 1846 1847 /* allocate fence memory on the gart */ 1848 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1849 &dqm->fence_mem); 1850 1851 if (retval) 1852 goto fail_allocate_vidmem; 1853 1854 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1855 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1856 1857 init_interrupts(dqm); 1858 1859 /* clear hang status when driver try to start the hw scheduler */ 1860 dqm->sched_running = true; 1861 1862 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1863 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 1864 KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) 1865 dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n"); 1866 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1867 } 1868 1869 /* setup per-queue reset detection buffer */ 1870 num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * 1871 dqm->dev->kfd->shared_resources.num_pipe_per_mec * 1872 NUM_XCC(dqm->dev->xcc_mask); 1873 1874 dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); 1875 dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); 1876 1877 if (!dqm->detect_hang_info) { 1878 retval = -ENOMEM; 1879 goto fail_detect_hang_buffer; 1880 } 1881 1882 dqm_unlock(dqm); 1883 1884 return 0; 1885 fail_detect_hang_buffer: 1886 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1887 fail_allocate_vidmem: 1888 fail_set_sched_resources: 1889 if (!dqm->dev->kfd->shared_resources.enable_mes) 1890 pm_uninit(&dqm->packet_mgr); 1891 fail_packet_manager_init: 1892 dqm_unlock(dqm); 1893 return retval; 1894 } 1895 1896 static int stop_cpsch(struct device_queue_manager *dqm) 1897 { 1898 int ret = 0; 1899 1900 dqm_lock(dqm); 1901 if (!dqm->sched_running) { 1902 dqm_unlock(dqm); 1903 return 0; 1904 } 1905 1906 if (!dqm->dev->kfd->shared_resources.enable_mes) 1907 ret = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 1908 0, USE_DEFAULT_GRACE_PERIOD, false); 1909 else 1910 ret = remove_all_kfd_queues_mes(dqm); 1911 1912 dqm->sched_running = false; 1913 1914 if (!dqm->dev->kfd->shared_resources.enable_mes) 1915 pm_release_ib(&dqm->packet_mgr); 1916 1917 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1918 if (!dqm->dev->kfd->shared_resources.enable_mes) 1919 pm_uninit(&dqm->packet_mgr); 1920 kfree(dqm->detect_hang_info); 1921 dqm->detect_hang_info = NULL; 1922 dqm_unlock(dqm); 1923 1924 return ret; 1925 } 1926 1927 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1928 struct kernel_queue *kq, 1929 struct qcm_process_device *qpd) 1930 { 1931 dqm_lock(dqm); 1932 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1933 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1934 dqm->total_queue_count); 1935 dqm_unlock(dqm); 1936 return -EPERM; 1937 } 1938 1939 /* 1940 * Unconditionally increment this counter, regardless of the queue's 1941 * type or whether the queue is active. 1942 */ 1943 dqm->total_queue_count++; 1944 pr_debug("Total of %d queues are accountable so far\n", 1945 dqm->total_queue_count); 1946 1947 list_add(&kq->list, &qpd->priv_queue_list); 1948 increment_queue_count(dqm, qpd, kq->queue); 1949 qpd->is_debug = true; 1950 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1951 USE_DEFAULT_GRACE_PERIOD); 1952 dqm_unlock(dqm); 1953 1954 return 0; 1955 } 1956 1957 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1958 struct kernel_queue *kq, 1959 struct qcm_process_device *qpd) 1960 { 1961 dqm_lock(dqm); 1962 list_del(&kq->list); 1963 decrement_queue_count(dqm, qpd, kq->queue); 1964 qpd->is_debug = false; 1965 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1966 USE_DEFAULT_GRACE_PERIOD); 1967 /* 1968 * Unconditionally decrement this counter, regardless of the queue's 1969 * type. 1970 */ 1971 dqm->total_queue_count--; 1972 pr_debug("Total of %d queues are accountable so far\n", 1973 dqm->total_queue_count); 1974 dqm_unlock(dqm); 1975 } 1976 1977 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1978 struct qcm_process_device *qpd, 1979 const struct kfd_criu_queue_priv_data *qd, 1980 const void *restore_mqd, const void *restore_ctl_stack) 1981 { 1982 int retval; 1983 struct mqd_manager *mqd_mgr; 1984 1985 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1986 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1987 dqm->total_queue_count); 1988 retval = -EPERM; 1989 goto out; 1990 } 1991 1992 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 1993 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || 1994 q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1995 dqm_lock(dqm); 1996 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 1997 dqm_unlock(dqm); 1998 if (retval) 1999 goto out; 2000 } 2001 2002 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 2003 if (retval) 2004 goto out_deallocate_sdma_queue; 2005 2006 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2007 q->properties.type)]; 2008 2009 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2010 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2011 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 2012 q->properties.tba_addr = qpd->tba_addr; 2013 q->properties.tma_addr = qpd->tma_addr; 2014 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 2015 if (!q->mqd_mem_obj) { 2016 retval = -ENOMEM; 2017 goto out_deallocate_doorbell; 2018 } 2019 2020 dqm_lock(dqm); 2021 /* 2022 * Eviction state logic: mark all queues as evicted, even ones 2023 * not currently active. Restoring inactive queues later only 2024 * updates the is_evicted flag but is a no-op otherwise. 2025 */ 2026 q->properties.is_evicted = !!qpd->evicted; 2027 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 2028 kfd_dbg_has_cwsr_workaround(q->device); 2029 2030 if (qd) 2031 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 2032 &q->properties, restore_mqd, restore_ctl_stack, 2033 qd->ctl_stack_size); 2034 else 2035 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 2036 &q->gart_mqd_addr, &q->properties); 2037 2038 list_add(&q->list, &qpd->queues_list); 2039 qpd->queue_count++; 2040 2041 if (q->properties.is_active) { 2042 increment_queue_count(dqm, qpd, q); 2043 2044 if (!dqm->dev->kfd->shared_resources.enable_mes) 2045 retval = execute_queues_cpsch(dqm, 2046 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 2047 else 2048 retval = add_queue_mes(dqm, q, qpd); 2049 if (retval) 2050 goto cleanup_queue; 2051 } 2052 2053 /* 2054 * Unconditionally increment this counter, regardless of the queue's 2055 * type or whether the queue is active. 2056 */ 2057 dqm->total_queue_count++; 2058 2059 pr_debug("Total of %d queues are accountable so far\n", 2060 dqm->total_queue_count); 2061 2062 dqm_unlock(dqm); 2063 return retval; 2064 2065 cleanup_queue: 2066 qpd->queue_count--; 2067 list_del(&q->list); 2068 if (q->properties.is_active) 2069 decrement_queue_count(dqm, qpd, q); 2070 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2071 dqm_unlock(dqm); 2072 out_deallocate_doorbell: 2073 deallocate_doorbell(qpd, q); 2074 out_deallocate_sdma_queue: 2075 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2076 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 2077 dqm_lock(dqm); 2078 deallocate_sdma_queue(dqm, q); 2079 dqm_unlock(dqm); 2080 } 2081 out: 2082 return retval; 2083 } 2084 2085 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 2086 uint64_t fence_value, 2087 unsigned int timeout_ms) 2088 { 2089 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 2090 struct device *dev = dqm->dev->adev->dev; 2091 uint64_t *fence_addr = dqm->fence_addr; 2092 2093 while (*fence_addr != fence_value) { 2094 /* Fatal err detected, this response won't come */ 2095 if (amdgpu_amdkfd_is_fed(dqm->dev->adev) || 2096 amdgpu_in_reset(dqm->dev->adev)) 2097 return -EIO; 2098 2099 if (time_after(jiffies, end_jiffies)) { 2100 dev_err(dev, "qcm fence wait loop timeout expired\n"); 2101 /* In HWS case, this is used to halt the driver thread 2102 * in order not to mess up CP states before doing 2103 * scandumps for FW debugging. 2104 */ 2105 while (halt_if_hws_hang) 2106 schedule(); 2107 2108 return -ETIME; 2109 } 2110 schedule(); 2111 } 2112 2113 return 0; 2114 } 2115 2116 /* dqm->lock mutex has to be locked before calling this function */ 2117 static int map_queues_cpsch(struct device_queue_manager *dqm) 2118 { 2119 struct device *dev = dqm->dev->adev->dev; 2120 int retval; 2121 2122 if (!dqm->sched_running || dqm->sched_halt) 2123 return 0; 2124 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 2125 return 0; 2126 if (dqm->active_runlist) 2127 return 0; 2128 2129 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 2130 pr_debug("%s sent runlist\n", __func__); 2131 if (retval) { 2132 dev_err(dev, "failed to execute runlist\n"); 2133 return retval; 2134 } 2135 dqm->active_runlist = true; 2136 2137 return retval; 2138 } 2139 2140 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 2141 struct qcm_process_device *qpd) 2142 { 2143 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2144 2145 dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n", 2146 q->properties.queue_id, pdd->process->lead_thread->pid); 2147 2148 pdd->has_reset_queue = true; 2149 if (q->properties.is_active) { 2150 q->properties.is_active = false; 2151 decrement_queue_count(dqm, qpd, q); 2152 } 2153 } 2154 2155 static int detect_queue_hang(struct device_queue_manager *dqm) 2156 { 2157 int i; 2158 2159 /* detect should be used only in dqm locked queue reset */ 2160 if (WARN_ON(dqm->detect_hang_count > 0)) 2161 return 0; 2162 2163 memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); 2164 2165 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 2166 uint32_t mec, pipe, queue; 2167 int xcc_id; 2168 2169 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 2170 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 2171 2172 if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2173 continue; 2174 2175 amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); 2176 2177 for_each_inst(xcc_id, dqm->dev->xcc_mask) { 2178 uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( 2179 dqm->dev->adev, pipe, queue, xcc_id); 2180 struct dqm_detect_hang_info hang_info; 2181 2182 if (!queue_addr) 2183 continue; 2184 2185 hang_info.pipe_id = pipe; 2186 hang_info.queue_id = queue; 2187 hang_info.xcc_id = xcc_id; 2188 hang_info.queue_address = queue_addr; 2189 2190 dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; 2191 dqm->detect_hang_count++; 2192 } 2193 } 2194 2195 return dqm->detect_hang_count; 2196 } 2197 2198 static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) 2199 { 2200 struct device_process_node *cur; 2201 struct qcm_process_device *qpd; 2202 struct queue *q; 2203 2204 list_for_each_entry(cur, &dqm->queues, list) { 2205 qpd = cur->qpd; 2206 list_for_each_entry(q, &qpd->queues_list, list) { 2207 if (queue_address == q->properties.queue_address) 2208 return q; 2209 } 2210 } 2211 2212 return NULL; 2213 } 2214 2215 static int reset_hung_queues(struct device_queue_manager *dqm) 2216 { 2217 int r = 0, reset_count = 0, i; 2218 2219 if (!dqm->detect_hang_info || dqm->is_hws_hang) 2220 return -EIO; 2221 2222 /* assume dqm locked. */ 2223 if (!detect_queue_hang(dqm)) 2224 return -ENOTRECOVERABLE; 2225 2226 for (i = 0; i < dqm->detect_hang_count; i++) { 2227 struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; 2228 struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); 2229 struct kfd_process_device *pdd; 2230 uint64_t queue_addr = 0; 2231 2232 if (!q) { 2233 r = -ENOTRECOVERABLE; 2234 goto reset_fail; 2235 } 2236 2237 pdd = kfd_get_process_device_data(dqm->dev, q->process); 2238 if (!pdd) { 2239 r = -ENOTRECOVERABLE; 2240 goto reset_fail; 2241 } 2242 2243 queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, 2244 hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, 2245 KFD_UNMAP_LATENCY_MS); 2246 2247 /* either reset failed or we reset an unexpected queue. */ 2248 if (queue_addr != q->properties.queue_address) { 2249 r = -ENOTRECOVERABLE; 2250 goto reset_fail; 2251 } 2252 2253 set_queue_as_reset(dqm, q, &pdd->qpd); 2254 reset_count++; 2255 } 2256 2257 if (reset_count == dqm->detect_hang_count) 2258 kfd_signal_reset_event(dqm->dev); 2259 else 2260 r = -ENOTRECOVERABLE; 2261 2262 reset_fail: 2263 dqm->detect_hang_count = 0; 2264 2265 return r; 2266 } 2267 2268 static bool sdma_has_hang(struct device_queue_manager *dqm) 2269 { 2270 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2271 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2272 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2273 int i, j; 2274 2275 for (i = engine_start; i < engine_end; i++) { 2276 for (j = 0; j < num_queues_per_eng; j++) { 2277 if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j)) 2278 continue; 2279 2280 return true; 2281 } 2282 } 2283 2284 return false; 2285 } 2286 2287 static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm, 2288 uint32_t doorbell_off) 2289 { 2290 struct device_process_node *cur; 2291 struct qcm_process_device *qpd; 2292 struct queue *q; 2293 2294 list_for_each_entry(cur, &dqm->queues, list) { 2295 qpd = cur->qpd; 2296 list_for_each_entry(q, &qpd->queues_list, list) { 2297 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA || 2298 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) && 2299 q->properties.doorbell_off == doorbell_off) { 2300 set_queue_as_reset(dqm, q, qpd); 2301 return true; 2302 } 2303 } 2304 } 2305 2306 return false; 2307 } 2308 2309 static int reset_hung_queues_sdma(struct device_queue_manager *dqm) 2310 { 2311 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2312 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2313 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2314 int r = 0, i, j; 2315 2316 if (dqm->is_hws_hang) 2317 return -EIO; 2318 2319 /* Scan for hung HW queues and reset engine. */ 2320 dqm->detect_hang_count = 0; 2321 for (i = engine_start; i < engine_end; i++) { 2322 for (j = 0; j < num_queues_per_eng; j++) { 2323 uint32_t doorbell_off = 2324 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j); 2325 2326 if (!doorbell_off) 2327 continue; 2328 2329 /* Reset engine and check. */ 2330 if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || 2331 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || 2332 !set_sdma_queue_as_reset(dqm, doorbell_off)) { 2333 r = -ENOTRECOVERABLE; 2334 goto reset_fail; 2335 } 2336 2337 /* Should only expect one queue active per engine */ 2338 dqm->detect_hang_count++; 2339 break; 2340 } 2341 } 2342 2343 /* Signal process reset */ 2344 if (dqm->detect_hang_count) 2345 kfd_signal_reset_event(dqm->dev); 2346 else 2347 r = -ENOTRECOVERABLE; 2348 2349 reset_fail: 2350 dqm->detect_hang_count = 0; 2351 2352 return r; 2353 } 2354 2355 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma) 2356 { 2357 struct amdgpu_device *adev = dqm->dev->adev; 2358 2359 while (halt_if_hws_hang) 2360 schedule(); 2361 2362 if (adev->debug_disable_gpu_ring_reset) { 2363 dev_info_once(adev->dev, 2364 "%s queue hung, but ring reset disabled", 2365 is_sdma ? "sdma" : "compute"); 2366 2367 return -EPERM; 2368 } 2369 if (!amdgpu_gpu_recovery) 2370 return -ENOTRECOVERABLE; 2371 2372 return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm); 2373 } 2374 2375 /* dqm->lock mutex has to be locked before calling this function 2376 * 2377 * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time 2378 * for context switch latency. Lower values are used by debugger 2379 * since context switching are triggered at high frequency. 2380 * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE 2381 * 2382 */ 2383 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 2384 enum kfd_unmap_queues_filter filter, 2385 uint32_t filter_param, 2386 uint32_t grace_period, 2387 bool reset) 2388 { 2389 struct device *dev = dqm->dev->adev->dev; 2390 struct mqd_manager *mqd_mgr; 2391 int retval; 2392 2393 if (!dqm->sched_running) 2394 return 0; 2395 if (!dqm->active_runlist) 2396 return 0; 2397 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2398 return -EIO; 2399 2400 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2401 retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2402 KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period); 2403 if (retval) 2404 goto out; 2405 } 2406 2407 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 2408 if (retval) 2409 goto out; 2410 2411 *dqm->fence_addr = KFD_FENCE_INIT; 2412 mb(); 2413 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 2414 KFD_FENCE_COMPLETED); 2415 /* should be timed out */ 2416 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 2417 queue_preemption_timeout_ms); 2418 if (retval) { 2419 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 2420 kfd_hws_hang(dqm); 2421 goto out; 2422 } 2423 2424 /* In the current MEC firmware implementation, if compute queue 2425 * doesn't response to the preemption request in time, HIQ will 2426 * abandon the unmap request without returning any timeout error 2427 * to driver. Instead, MEC firmware will log the doorbell of the 2428 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 2429 * To make sure the queue unmap was successful, driver need to 2430 * check those fields 2431 */ 2432 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 2433 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) && 2434 reset_queues_on_hws_hang(dqm, false)) 2435 goto reset_fail; 2436 2437 /* Check for SDMA hang and attempt SDMA reset */ 2438 if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true)) 2439 goto reset_fail; 2440 2441 /* We need to reset the grace period value for this device */ 2442 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2443 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2444 KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) 2445 dev_err(dev, "Failed to reset grace period\n"); 2446 } 2447 2448 pm_release_ib(&dqm->packet_mgr); 2449 dqm->active_runlist = false; 2450 out: 2451 up_read(&dqm->dev->adev->reset_domain->sem); 2452 return retval; 2453 2454 reset_fail: 2455 dqm->is_hws_hang = true; 2456 kfd_hws_hang(dqm); 2457 up_read(&dqm->dev->adev->reset_domain->sem); 2458 return -ETIME; 2459 } 2460 2461 /* only for compute queue */ 2462 static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) 2463 { 2464 int retval; 2465 2466 dqm_lock(dqm); 2467 2468 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2469 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2470 2471 dqm_unlock(dqm); 2472 return retval; 2473 } 2474 2475 /* dqm->lock mutex has to be locked before calling this function */ 2476 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2477 enum kfd_unmap_queues_filter filter, 2478 uint32_t filter_param, 2479 uint32_t grace_period) 2480 { 2481 int retval; 2482 2483 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2484 return -EIO; 2485 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2486 if (!retval) 2487 retval = map_queues_cpsch(dqm); 2488 up_read(&dqm->dev->adev->reset_domain->sem); 2489 return retval; 2490 } 2491 2492 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2493 struct queue *q) 2494 { 2495 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2496 q->process); 2497 int ret = 0; 2498 2499 if (WARN_ON(!pdd)) 2500 return ret; 2501 2502 if (pdd->qpd.is_debug) 2503 return ret; 2504 2505 if (q->properties.is_being_destroyed) 2506 return -EBUSY; 2507 2508 q->properties.is_being_destroyed = true; 2509 2510 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2511 dqm_unlock(dqm); 2512 mutex_unlock(&q->process->mutex); 2513 ret = wait_event_interruptible(dqm->destroy_wait, 2514 !q->properties.is_suspended); 2515 2516 mutex_lock(&q->process->mutex); 2517 dqm_lock(dqm); 2518 } 2519 2520 if (ret) 2521 q->properties.is_being_destroyed = false; 2522 2523 return ret; 2524 } 2525 2526 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2527 struct qcm_process_device *qpd, 2528 struct queue *q) 2529 { 2530 int retval; 2531 struct mqd_manager *mqd_mgr; 2532 uint64_t sdma_val = 0; 2533 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2534 struct device *dev = dqm->dev->adev->dev; 2535 2536 /* Get the SDMA queue stats */ 2537 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2538 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2539 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2540 &sdma_val); 2541 if (retval) 2542 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2543 q->properties.queue_id); 2544 } 2545 2546 /* remove queue from list to prevent rescheduling after preemption */ 2547 dqm_lock(dqm); 2548 2549 retval = wait_on_destroy_queue(dqm, q); 2550 2551 if (retval) { 2552 dqm_unlock(dqm); 2553 return retval; 2554 } 2555 2556 if (qpd->is_debug) { 2557 /* 2558 * error, currently we do not allow to destroy a queue 2559 * of a currently debugged process 2560 */ 2561 retval = -EBUSY; 2562 goto failed_try_destroy_debugged_queue; 2563 2564 } 2565 2566 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2567 q->properties.type)]; 2568 2569 deallocate_doorbell(qpd, q); 2570 2571 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2572 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2573 deallocate_sdma_queue(dqm, q); 2574 pdd->sdma_past_activity_counter += sdma_val; 2575 } 2576 2577 if (q->properties.is_active) { 2578 decrement_queue_count(dqm, qpd, q); 2579 q->properties.is_active = false; 2580 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2581 retval = execute_queues_cpsch(dqm, 2582 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2583 USE_DEFAULT_GRACE_PERIOD); 2584 if (retval == -ETIME) 2585 qpd->reset_wavefronts = true; 2586 } else { 2587 retval = remove_queue_mes(dqm, q, qpd); 2588 } 2589 } 2590 list_del(&q->list); 2591 qpd->queue_count--; 2592 2593 /* 2594 * Unconditionally decrement this counter, regardless of the queue's 2595 * type 2596 */ 2597 dqm->total_queue_count--; 2598 pr_debug("Total of %d queues are accountable so far\n", 2599 dqm->total_queue_count); 2600 2601 dqm_unlock(dqm); 2602 2603 /* 2604 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2605 * circular locking 2606 */ 2607 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2608 qpd->pqm->process, q->device, 2609 -1, false, NULL, 0); 2610 2611 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2612 2613 return retval; 2614 2615 failed_try_destroy_debugged_queue: 2616 q->properties.is_being_destroyed = false; 2617 dqm_unlock(dqm); 2618 return retval; 2619 } 2620 2621 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2622 struct qcm_process_device *qpd, 2623 enum cache_policy default_policy, 2624 enum cache_policy alternate_policy, 2625 void __user *alternate_aperture_base, 2626 uint64_t alternate_aperture_size, 2627 u32 misc_process_properties) 2628 { 2629 bool retval = true; 2630 2631 if (!dqm->asic_ops.set_cache_memory_policy) 2632 return retval; 2633 2634 dqm_lock(dqm); 2635 2636 retval = dqm->asic_ops.set_cache_memory_policy( 2637 dqm, 2638 qpd, 2639 default_policy, 2640 alternate_policy, 2641 alternate_aperture_base, 2642 alternate_aperture_size, 2643 misc_process_properties); 2644 2645 if (retval) 2646 goto out; 2647 2648 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2649 program_sh_mem_settings(dqm, qpd); 2650 2651 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2652 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2653 qpd->sh_mem_ape1_limit); 2654 2655 out: 2656 dqm_unlock(dqm); 2657 return retval; 2658 } 2659 2660 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2661 struct qcm_process_device *qpd) 2662 { 2663 struct queue *q; 2664 struct device_process_node *cur, *next_dpn; 2665 int retval = 0; 2666 bool found = false; 2667 2668 dqm_lock(dqm); 2669 2670 /* Clear all user mode queues */ 2671 while (!list_empty(&qpd->queues_list)) { 2672 struct mqd_manager *mqd_mgr; 2673 int ret; 2674 2675 q = list_first_entry(&qpd->queues_list, struct queue, list); 2676 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2677 q->properties.type)]; 2678 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2679 if (ret) 2680 retval = ret; 2681 dqm_unlock(dqm); 2682 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2683 dqm_lock(dqm); 2684 } 2685 2686 /* Unregister process */ 2687 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2688 if (qpd == cur->qpd) { 2689 list_del(&cur->list); 2690 kfree(cur); 2691 dqm->processes_count--; 2692 found = true; 2693 break; 2694 } 2695 } 2696 2697 dqm_unlock(dqm); 2698 2699 /* Outside the DQM lock because under the DQM lock we can't do 2700 * reclaim or take other locks that others hold while reclaiming. 2701 */ 2702 if (found) 2703 kfd_dec_compute_active(dqm->dev); 2704 2705 return retval; 2706 } 2707 2708 static int get_wave_state(struct device_queue_manager *dqm, 2709 struct queue *q, 2710 void __user *ctl_stack, 2711 u32 *ctl_stack_used_size, 2712 u32 *save_area_used_size) 2713 { 2714 struct mqd_manager *mqd_mgr; 2715 2716 dqm_lock(dqm); 2717 2718 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2719 2720 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2721 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2722 !mqd_mgr->get_wave_state) { 2723 dqm_unlock(dqm); 2724 return -EINVAL; 2725 } 2726 2727 dqm_unlock(dqm); 2728 2729 /* 2730 * get_wave_state is outside the dqm lock to prevent circular locking 2731 * and the queue should be protected against destruction by the process 2732 * lock. 2733 */ 2734 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2735 ctl_stack, ctl_stack_used_size, save_area_used_size); 2736 } 2737 2738 static int get_queue_checkpoint_info(struct device_queue_manager *dqm, 2739 const struct queue *q, 2740 u32 *mqd_size, 2741 u32 *ctl_stack_size) 2742 { 2743 struct mqd_manager *mqd_mgr; 2744 enum KFD_MQD_TYPE mqd_type = 2745 get_mqd_type_from_queue_type(q->properties.type); 2746 int ret = 0; 2747 2748 dqm_lock(dqm); 2749 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2750 *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask); 2751 *ctl_stack_size = 0; 2752 2753 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2754 ret = mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2755 2756 dqm_unlock(dqm); 2757 2758 return ret; 2759 } 2760 2761 static int checkpoint_mqd(struct device_queue_manager *dqm, 2762 const struct queue *q, 2763 void *mqd, 2764 void *ctl_stack) 2765 { 2766 struct mqd_manager *mqd_mgr; 2767 int r = 0; 2768 enum KFD_MQD_TYPE mqd_type = 2769 get_mqd_type_from_queue_type(q->properties.type); 2770 2771 dqm_lock(dqm); 2772 2773 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2774 r = -EINVAL; 2775 goto dqm_unlock; 2776 } 2777 2778 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2779 if (!mqd_mgr->checkpoint_mqd) { 2780 r = -EOPNOTSUPP; 2781 goto dqm_unlock; 2782 } 2783 2784 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2785 2786 dqm_unlock: 2787 dqm_unlock(dqm); 2788 return r; 2789 } 2790 2791 static int process_termination_cpsch(struct device_queue_manager *dqm, 2792 struct qcm_process_device *qpd) 2793 { 2794 int retval = 0; 2795 struct queue *q; 2796 struct device *dev = dqm->dev->adev->dev; 2797 struct kernel_queue *kq, *kq_next; 2798 struct mqd_manager *mqd_mgr; 2799 struct device_process_node *cur, *next_dpn; 2800 enum kfd_unmap_queues_filter filter = 2801 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2802 bool found = false; 2803 2804 dqm_lock(dqm); 2805 2806 /* Clean all kernel queues */ 2807 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2808 list_del(&kq->list); 2809 decrement_queue_count(dqm, qpd, kq->queue); 2810 qpd->is_debug = false; 2811 dqm->total_queue_count--; 2812 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2813 } 2814 2815 /* Clear all user mode queues */ 2816 list_for_each_entry(q, &qpd->queues_list, list) { 2817 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2818 deallocate_sdma_queue(dqm, q); 2819 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2820 deallocate_sdma_queue(dqm, q); 2821 2822 if (q->properties.is_active) { 2823 decrement_queue_count(dqm, qpd, q); 2824 2825 if (dqm->dev->kfd->shared_resources.enable_mes) { 2826 retval = remove_queue_mes(dqm, q, qpd); 2827 if (retval) 2828 dev_err(dev, "Failed to remove queue %d\n", 2829 q->properties.queue_id); 2830 } 2831 } 2832 2833 dqm->total_queue_count--; 2834 } 2835 2836 /* Unregister process */ 2837 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2838 if (qpd == cur->qpd) { 2839 list_del(&cur->list); 2840 kfree(cur); 2841 dqm->processes_count--; 2842 found = true; 2843 break; 2844 } 2845 } 2846 2847 if (!dqm->dev->kfd->shared_resources.enable_mes) 2848 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2849 2850 if ((retval || qpd->reset_wavefronts) && 2851 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 2852 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2853 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2854 qpd->reset_wavefronts = false; 2855 up_read(&dqm->dev->adev->reset_domain->sem); 2856 } 2857 2858 /* Lastly, free mqd resources. 2859 * Do free_mqd() after dqm_unlock to avoid circular locking. 2860 */ 2861 while (!list_empty(&qpd->queues_list)) { 2862 q = list_first_entry(&qpd->queues_list, struct queue, list); 2863 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2864 q->properties.type)]; 2865 list_del(&q->list); 2866 qpd->queue_count--; 2867 dqm_unlock(dqm); 2868 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2869 dqm_lock(dqm); 2870 } 2871 dqm_unlock(dqm); 2872 2873 /* Outside the DQM lock because under the DQM lock we can't do 2874 * reclaim or take other locks that others hold while reclaiming. 2875 */ 2876 if (found) 2877 kfd_dec_compute_active(dqm->dev); 2878 2879 return retval; 2880 } 2881 2882 static int init_mqd_managers(struct device_queue_manager *dqm) 2883 { 2884 int i, j; 2885 struct device *dev = dqm->dev->adev->dev; 2886 struct mqd_manager *mqd_mgr; 2887 2888 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2889 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2890 if (!mqd_mgr) { 2891 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 2892 goto out_free; 2893 } 2894 dqm->mqd_mgrs[i] = mqd_mgr; 2895 } 2896 2897 return 0; 2898 2899 out_free: 2900 for (j = 0; j < i; j++) { 2901 kfree(dqm->mqd_mgrs[j]); 2902 dqm->mqd_mgrs[j] = NULL; 2903 } 2904 2905 return -ENOMEM; 2906 } 2907 2908 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2909 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2910 { 2911 int retval; 2912 struct kfd_node *dev = dqm->dev; 2913 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2914 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2915 get_num_all_sdma_engines(dqm) * 2916 dev->kfd->device_info.num_sdma_queues_per_engine + 2917 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2918 NUM_XCC(dqm->dev->xcc_mask)); 2919 2920 retval = amdgpu_amdkfd_alloc_kernel_mem(dev->adev, size, 2921 AMDGPU_GEM_DOMAIN_GTT, 2922 &(mem_obj->mem), &(mem_obj->gpu_addr), 2923 (void *)&(mem_obj->cpu_ptr), false); 2924 2925 return retval; 2926 } 2927 2928 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2929 struct kfd_mem_obj *mqd) 2930 { 2931 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2932 2933 amdgpu_amdkfd_free_kernel_mem(dev->adev, &mqd->mem); 2934 } 2935 2936 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2937 { 2938 struct device_queue_manager *dqm; 2939 2940 pr_debug("Loading device queue manager\n"); 2941 2942 dqm = kzalloc_obj(*dqm); 2943 if (!dqm) 2944 return NULL; 2945 2946 switch (dev->adev->asic_type) { 2947 /* HWS is not available on Hawaii. */ 2948 case CHIP_HAWAII: 2949 /* HWS depends on CWSR for timely dequeue. CWSR is not 2950 * available on Tonga. 2951 * 2952 * FIXME: This argument also applies to Kaveri. 2953 */ 2954 case CHIP_TONGA: 2955 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2956 break; 2957 default: 2958 dqm->sched_policy = sched_policy; 2959 break; 2960 } 2961 2962 dqm->dev = dev; 2963 switch (dqm->sched_policy) { 2964 case KFD_SCHED_POLICY_HWS: 2965 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2966 /* initialize dqm for cp scheduling */ 2967 dqm->ops.create_queue = create_queue_cpsch; 2968 dqm->ops.initialize = initialize_cpsch; 2969 dqm->ops.start = start_cpsch; 2970 dqm->ops.stop = stop_cpsch; 2971 dqm->ops.halt = halt_cpsch; 2972 dqm->ops.unhalt = unhalt_cpsch; 2973 dqm->ops.destroy_queue = destroy_queue_cpsch; 2974 dqm->ops.update_queue = update_queue; 2975 dqm->ops.register_process = register_process; 2976 dqm->ops.unregister_process = unregister_process; 2977 dqm->ops.uninitialize = uninitialize; 2978 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2979 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2980 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2981 dqm->ops.process_termination = process_termination_cpsch; 2982 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2983 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2984 dqm->ops.get_wave_state = get_wave_state; 2985 dqm->ops.reset_queues = reset_queues_cpsch; 2986 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2987 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2988 break; 2989 case KFD_SCHED_POLICY_NO_HWS: 2990 /* initialize dqm for no cp scheduling */ 2991 dqm->ops.start = start_nocpsch; 2992 dqm->ops.stop = stop_nocpsch; 2993 dqm->ops.create_queue = create_queue_nocpsch; 2994 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2995 dqm->ops.update_queue = update_queue; 2996 dqm->ops.register_process = register_process; 2997 dqm->ops.unregister_process = unregister_process; 2998 dqm->ops.initialize = initialize_nocpsch; 2999 dqm->ops.uninitialize = uninitialize; 3000 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 3001 dqm->ops.process_termination = process_termination_nocpsch; 3002 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 3003 dqm->ops.restore_process_queues = 3004 restore_process_queues_nocpsch; 3005 dqm->ops.get_wave_state = get_wave_state; 3006 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 3007 dqm->ops.checkpoint_mqd = checkpoint_mqd; 3008 break; 3009 default: 3010 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 3011 goto out_free; 3012 } 3013 3014 switch (dev->adev->asic_type) { 3015 case CHIP_KAVERI: 3016 case CHIP_HAWAII: 3017 device_queue_manager_init_cik(&dqm->asic_ops); 3018 break; 3019 3020 case CHIP_CARRIZO: 3021 case CHIP_TONGA: 3022 case CHIP_FIJI: 3023 case CHIP_POLARIS10: 3024 case CHIP_POLARIS11: 3025 case CHIP_POLARIS12: 3026 case CHIP_VEGAM: 3027 device_queue_manager_init_vi(&dqm->asic_ops); 3028 break; 3029 3030 default: 3031 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 1, 0)) 3032 device_queue_manager_init_v12_1(&dqm->asic_ops); 3033 else if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 3034 device_queue_manager_init_v12(&dqm->asic_ops); 3035 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 3036 device_queue_manager_init_v11(&dqm->asic_ops); 3037 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 3038 device_queue_manager_init_v10(&dqm->asic_ops); 3039 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 3040 device_queue_manager_init_v9(&dqm->asic_ops); 3041 else { 3042 WARN(1, "Unexpected ASIC family %u", 3043 dev->adev->asic_type); 3044 goto out_free; 3045 } 3046 } 3047 3048 if (init_mqd_managers(dqm)) 3049 goto out_free; 3050 3051 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 3052 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 3053 goto out_free; 3054 } 3055 3056 if (!dqm->ops.initialize(dqm)) { 3057 init_waitqueue_head(&dqm->destroy_wait); 3058 return dqm; 3059 } 3060 3061 if (!dev->kfd->shared_resources.enable_mes) 3062 deallocate_hiq_sdma_mqd(dev, &dqm->hiq_sdma_mqd); 3063 3064 out_free: 3065 kfree(dqm); 3066 return NULL; 3067 } 3068 3069 void device_queue_manager_uninit(struct device_queue_manager *dqm) 3070 { 3071 dqm->ops.stop(dqm); 3072 dqm->ops.uninitialize(dqm); 3073 if (!dqm->dev->kfd->shared_resources.enable_mes) 3074 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 3075 kfree(dqm); 3076 } 3077 3078 int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) 3079 { 3080 struct kfd_process_device *pdd = NULL; 3081 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd); 3082 struct device_queue_manager *dqm = knode->dqm; 3083 struct device *dev = dqm->dev->adev->dev; 3084 struct qcm_process_device *qpd; 3085 struct queue *q = NULL; 3086 int ret = 0; 3087 3088 if (!pdd) 3089 return -EINVAL; 3090 3091 dqm_lock(dqm); 3092 3093 if (pdd) { 3094 qpd = &pdd->qpd; 3095 3096 list_for_each_entry(q, &qpd->queues_list, list) { 3097 if (q->doorbell_id == doorbell_id && q->properties.is_active) { 3098 ret = suspend_all_queues_mes(dqm); 3099 if (ret) { 3100 dev_err(dev, "Suspending all queues failed"); 3101 goto out; 3102 } 3103 3104 q->properties.is_evicted = true; 3105 q->properties.is_active = false; 3106 decrement_queue_count(dqm, qpd, q); 3107 3108 ret = remove_queue_mes(dqm, q, qpd); 3109 if (ret) { 3110 dev_err(dev, "Removing bad queue failed"); 3111 goto out; 3112 } 3113 3114 ret = resume_all_queues_mes(dqm); 3115 if (ret) 3116 dev_err(dev, "Resuming all queues failed"); 3117 3118 break; 3119 } 3120 } 3121 } 3122 3123 out: 3124 dqm_unlock(dqm); 3125 kfd_unref_process(p); 3126 return ret; 3127 } 3128 3129 int kfd_evict_process_device(struct kfd_process_device *pdd) 3130 { 3131 struct device_queue_manager *dqm; 3132 struct kfd_process *p; 3133 3134 p = pdd->process; 3135 dqm = pdd->dev->dqm; 3136 3137 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 3138 3139 return dqm->ops.evict_process_queues(dqm, &pdd->qpd); 3140 } 3141 3142 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 3143 struct qcm_process_device *qpd) 3144 { 3145 int r; 3146 struct device *dev = dqm->dev->adev->dev; 3147 int updated_vmid_mask; 3148 3149 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3150 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3151 return -EINVAL; 3152 } 3153 3154 dqm_lock(dqm); 3155 3156 if (dqm->trap_debug_vmid != 0) { 3157 dev_err(dev, "Trap debug id already reserved\n"); 3158 r = -EBUSY; 3159 goto out_unlock; 3160 } 3161 3162 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3163 USE_DEFAULT_GRACE_PERIOD, false); 3164 if (r) 3165 goto out_unlock; 3166 3167 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3168 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 3169 3170 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3171 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 3172 r = set_sched_resources(dqm); 3173 if (r) 3174 goto out_unlock; 3175 3176 r = map_queues_cpsch(dqm); 3177 if (r) 3178 goto out_unlock; 3179 3180 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 3181 3182 out_unlock: 3183 dqm_unlock(dqm); 3184 return r; 3185 } 3186 3187 /* 3188 * Releases vmid for the trap debugger 3189 */ 3190 int release_debug_trap_vmid(struct device_queue_manager *dqm, 3191 struct qcm_process_device *qpd) 3192 { 3193 struct device *dev = dqm->dev->adev->dev; 3194 int r; 3195 int updated_vmid_mask; 3196 uint32_t trap_debug_vmid; 3197 3198 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3199 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3200 return -EINVAL; 3201 } 3202 3203 dqm_lock(dqm); 3204 trap_debug_vmid = dqm->trap_debug_vmid; 3205 if (dqm->trap_debug_vmid == 0) { 3206 dev_err(dev, "Trap debug id is not reserved\n"); 3207 r = -EINVAL; 3208 goto out_unlock; 3209 } 3210 3211 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3212 USE_DEFAULT_GRACE_PERIOD, false); 3213 if (r) 3214 goto out_unlock; 3215 3216 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3217 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 3218 3219 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3220 dqm->trap_debug_vmid = 0; 3221 r = set_sched_resources(dqm); 3222 if (r) 3223 goto out_unlock; 3224 3225 r = map_queues_cpsch(dqm); 3226 if (r) 3227 goto out_unlock; 3228 3229 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 3230 3231 out_unlock: 3232 dqm_unlock(dqm); 3233 return r; 3234 } 3235 3236 #define QUEUE_NOT_FOUND -1 3237 /* invalidate queue operation in array */ 3238 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 3239 { 3240 int i; 3241 3242 for (i = 0; i < num_queues; i++) 3243 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 3244 } 3245 3246 /* find queue index in array */ 3247 static int q_array_get_index(unsigned int queue_id, 3248 uint32_t num_queues, 3249 uint32_t *queue_ids) 3250 { 3251 int i; 3252 3253 for (i = 0; i < num_queues; i++) 3254 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 3255 return i; 3256 3257 return QUEUE_NOT_FOUND; 3258 } 3259 3260 struct copy_context_work_handler_workarea { 3261 struct work_struct copy_context_work; 3262 struct kfd_process *p; 3263 }; 3264 3265 static void copy_context_work_handler(struct work_struct *work) 3266 { 3267 struct copy_context_work_handler_workarea *workarea; 3268 struct mqd_manager *mqd_mgr; 3269 struct queue *q; 3270 struct mm_struct *mm; 3271 struct kfd_process *p; 3272 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 3273 int i; 3274 3275 workarea = container_of(work, 3276 struct copy_context_work_handler_workarea, 3277 copy_context_work); 3278 3279 p = workarea->p; 3280 mm = get_task_mm(p->lead_thread); 3281 3282 if (!mm) 3283 return; 3284 3285 kthread_use_mm(mm); 3286 for (i = 0; i < p->n_pdds; i++) { 3287 struct kfd_process_device *pdd = p->pdds[i]; 3288 struct device_queue_manager *dqm = pdd->dev->dqm; 3289 struct qcm_process_device *qpd = &pdd->qpd; 3290 3291 list_for_each_entry(q, &qpd->queues_list, list) { 3292 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE) 3293 continue; 3294 3295 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 3296 3297 /* We ignore the return value from get_wave_state 3298 * because 3299 * i) right now, it always returns 0, and 3300 * ii) if we hit an error, we would continue to the 3301 * next queue anyway. 3302 */ 3303 mqd_mgr->get_wave_state(mqd_mgr, 3304 q->mqd, 3305 &q->properties, 3306 (void __user *) q->properties.ctx_save_restore_area_address, 3307 &tmp_ctl_stack_used_size, 3308 &tmp_save_area_used_size); 3309 } 3310 } 3311 kthread_unuse_mm(mm); 3312 mmput(mm); 3313 } 3314 3315 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 3316 { 3317 if (!usr_queue_id_array) 3318 return num_queues ? ERR_PTR(-EINVAL) : NULL; 3319 3320 if (num_queues > KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 3321 return ERR_PTR(-EINVAL); 3322 3323 return memdup_user(usr_queue_id_array, 3324 array_size(num_queues, sizeof(uint32_t))); 3325 } 3326 3327 int resume_queues(struct kfd_process *p, 3328 uint32_t num_queues, 3329 uint32_t *usr_queue_id_array) 3330 { 3331 uint32_t *queue_ids = NULL; 3332 int total_resumed = 0; 3333 int i; 3334 3335 if (usr_queue_id_array) { 3336 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3337 3338 if (IS_ERR(queue_ids)) 3339 return PTR_ERR(queue_ids); 3340 3341 /* mask all queues as invalid. unmask per successful request */ 3342 q_array_invalidate(num_queues, queue_ids); 3343 } 3344 3345 for (i = 0; i < p->n_pdds; i++) { 3346 struct kfd_process_device *pdd = p->pdds[i]; 3347 struct device_queue_manager *dqm = pdd->dev->dqm; 3348 struct device *dev = dqm->dev->adev->dev; 3349 struct qcm_process_device *qpd = &pdd->qpd; 3350 struct queue *q; 3351 int r, per_device_resumed = 0; 3352 3353 dqm_lock(dqm); 3354 3355 /* unmask queues that resume or already resumed as valid */ 3356 list_for_each_entry(q, &qpd->queues_list, list) { 3357 int q_idx = QUEUE_NOT_FOUND; 3358 3359 if (queue_ids) 3360 q_idx = q_array_get_index( 3361 q->properties.queue_id, 3362 num_queues, 3363 queue_ids); 3364 3365 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 3366 int err = resume_single_queue(dqm, &pdd->qpd, q); 3367 3368 if (queue_ids) { 3369 if (!err) { 3370 queue_ids[q_idx] &= 3371 ~KFD_DBG_QUEUE_INVALID_MASK; 3372 } else { 3373 queue_ids[q_idx] |= 3374 KFD_DBG_QUEUE_ERROR_MASK; 3375 break; 3376 } 3377 } 3378 3379 if (dqm->dev->kfd->shared_resources.enable_mes) { 3380 wake_up_all(&dqm->destroy_wait); 3381 if (!err) 3382 total_resumed++; 3383 } else { 3384 per_device_resumed++; 3385 } 3386 } 3387 } 3388 3389 if (!per_device_resumed) { 3390 dqm_unlock(dqm); 3391 continue; 3392 } 3393 3394 r = execute_queues_cpsch(dqm, 3395 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3396 0, 3397 USE_DEFAULT_GRACE_PERIOD); 3398 if (r) { 3399 dev_err(dev, "Failed to resume process queues\n"); 3400 if (queue_ids) { 3401 list_for_each_entry(q, &qpd->queues_list, list) { 3402 int q_idx = q_array_get_index( 3403 q->properties.queue_id, 3404 num_queues, 3405 queue_ids); 3406 3407 /* mask queue as error on resume fail */ 3408 if (q_idx != QUEUE_NOT_FOUND) 3409 queue_ids[q_idx] |= 3410 KFD_DBG_QUEUE_ERROR_MASK; 3411 } 3412 } 3413 } else { 3414 wake_up_all(&dqm->destroy_wait); 3415 total_resumed += per_device_resumed; 3416 } 3417 3418 dqm_unlock(dqm); 3419 } 3420 3421 if (queue_ids) { 3422 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3423 num_queues * sizeof(uint32_t))) 3424 pr_err("copy_to_user failed on queue resume\n"); 3425 3426 kfree(queue_ids); 3427 } 3428 3429 return total_resumed; 3430 } 3431 3432 int suspend_queues(struct kfd_process *p, 3433 uint32_t num_queues, 3434 uint32_t grace_period, 3435 uint64_t exception_clear_mask, 3436 uint32_t *usr_queue_id_array) 3437 { 3438 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3439 int total_suspended = 0; 3440 int i; 3441 3442 if (IS_ERR(queue_ids)) 3443 return PTR_ERR(queue_ids); 3444 3445 /* mask all queues as invalid. umask on successful request */ 3446 q_array_invalidate(num_queues, queue_ids); 3447 3448 for (i = 0; i < p->n_pdds; i++) { 3449 struct kfd_process_device *pdd = p->pdds[i]; 3450 struct device_queue_manager *dqm = pdd->dev->dqm; 3451 struct device *dev = dqm->dev->adev->dev; 3452 struct qcm_process_device *qpd = &pdd->qpd; 3453 struct queue *q; 3454 int r, per_device_suspended = 0; 3455 3456 mutex_lock(&p->event_mutex); 3457 dqm_lock(dqm); 3458 3459 /* unmask queues that suspend or already suspended */ 3460 list_for_each_entry(q, &qpd->queues_list, list) { 3461 int q_idx = q_array_get_index(q->properties.queue_id, 3462 num_queues, 3463 queue_ids); 3464 3465 if (q_idx != QUEUE_NOT_FOUND) { 3466 int err = suspend_single_queue(dqm, pdd, q); 3467 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 3468 3469 if (!err) { 3470 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 3471 if (exception_clear_mask && is_mes) 3472 q->properties.exception_status &= 3473 ~exception_clear_mask; 3474 3475 if (is_mes) 3476 total_suspended++; 3477 else 3478 per_device_suspended++; 3479 } else if (err != -EBUSY) { 3480 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3481 break; 3482 } 3483 } 3484 } 3485 3486 if (!per_device_suspended) { 3487 dqm_unlock(dqm); 3488 mutex_unlock(&p->event_mutex); 3489 if (total_suspended) 3490 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3491 continue; 3492 } 3493 3494 r = execute_queues_cpsch(dqm, 3495 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3496 grace_period); 3497 3498 if (r) 3499 dev_err(dev, "Failed to suspend process queues.\n"); 3500 else 3501 total_suspended += per_device_suspended; 3502 3503 list_for_each_entry(q, &qpd->queues_list, list) { 3504 int q_idx = q_array_get_index(q->properties.queue_id, 3505 num_queues, queue_ids); 3506 3507 if (q_idx == QUEUE_NOT_FOUND) 3508 continue; 3509 3510 /* mask queue as error on suspend fail */ 3511 if (r) 3512 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3513 else if (exception_clear_mask) 3514 q->properties.exception_status &= 3515 ~exception_clear_mask; 3516 } 3517 3518 dqm_unlock(dqm); 3519 mutex_unlock(&p->event_mutex); 3520 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3521 } 3522 3523 if (total_suspended) { 3524 struct copy_context_work_handler_workarea copy_context_worker; 3525 3526 INIT_WORK_ONSTACK( 3527 ©_context_worker.copy_context_work, 3528 copy_context_work_handler); 3529 3530 copy_context_worker.p = p; 3531 3532 schedule_work(©_context_worker.copy_context_work); 3533 3534 3535 flush_work(©_context_worker.copy_context_work); 3536 destroy_work_on_stack(©_context_worker.copy_context_work); 3537 } 3538 3539 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3540 num_queues * sizeof(uint32_t))) 3541 pr_err("copy_to_user failed on queue suspend\n"); 3542 3543 kfree(queue_ids); 3544 3545 return total_suspended; 3546 } 3547 3548 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3549 { 3550 switch (q_props->type) { 3551 case KFD_QUEUE_TYPE_COMPUTE: 3552 return q_props->format == KFD_QUEUE_FORMAT_PM4 3553 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3554 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3555 case KFD_QUEUE_TYPE_SDMA: 3556 return KFD_IOC_QUEUE_TYPE_SDMA; 3557 case KFD_QUEUE_TYPE_SDMA_XGMI: 3558 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3559 default: 3560 WARN_ONCE(true, "queue type not recognized!"); 3561 return 0xffffffff; 3562 }; 3563 } 3564 3565 void set_queue_snapshot_entry(struct queue *q, 3566 uint64_t exception_clear_mask, 3567 struct kfd_queue_snapshot_entry *qss_entry) 3568 { 3569 qss_entry->ring_base_address = q->properties.queue_address; 3570 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3571 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3572 qss_entry->ctx_save_restore_address = 3573 q->properties.ctx_save_restore_area_address; 3574 qss_entry->ctx_save_restore_area_size = 3575 q->properties.ctx_save_restore_area_size; 3576 qss_entry->exception_status = q->properties.exception_status; 3577 qss_entry->queue_id = q->properties.queue_id; 3578 qss_entry->gpu_id = q->device->id; 3579 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3580 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3581 q->properties.exception_status &= ~exception_clear_mask; 3582 } 3583 3584 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3585 { 3586 struct device *dev = dqm->dev->adev->dev; 3587 int r; 3588 3589 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3590 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3591 return -EINVAL; 3592 } 3593 3594 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3595 return 0; 3596 3597 dqm_lock(dqm); 3598 3599 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3600 if (r) 3601 dqm_unlock(dqm); 3602 3603 return r; 3604 } 3605 3606 int debug_map_and_unlock(struct device_queue_manager *dqm) 3607 { 3608 struct device *dev = dqm->dev->adev->dev; 3609 int r; 3610 3611 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3612 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3613 return -EINVAL; 3614 } 3615 3616 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3617 return 0; 3618 3619 r = map_queues_cpsch(dqm); 3620 3621 dqm_unlock(dqm); 3622 3623 return r; 3624 } 3625 3626 int debug_refresh_runlist(struct device_queue_manager *dqm) 3627 { 3628 int r = debug_lock_and_unmap(dqm); 3629 3630 if (r) 3631 return r; 3632 3633 return debug_map_and_unlock(dqm); 3634 } 3635 3636 bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 3637 struct qcm_process_device *qpd, 3638 int doorbell_off, u32 *queue_format) 3639 { 3640 struct queue *q; 3641 bool r = false; 3642 3643 if (!queue_format) 3644 return r; 3645 3646 dqm_lock(dqm); 3647 3648 list_for_each_entry(q, &qpd->queues_list, list) { 3649 if (q->properties.doorbell_off == doorbell_off) { 3650 *queue_format = q->properties.format; 3651 r = true; 3652 goto out; 3653 } 3654 } 3655 3656 out: 3657 dqm_unlock(dqm); 3658 return r; 3659 } 3660 #if defined(CONFIG_DEBUG_FS) 3661 3662 static void seq_reg_dump(struct seq_file *m, 3663 uint32_t (*dump)[2], uint32_t n_regs) 3664 { 3665 uint32_t i, count; 3666 3667 for (i = 0, count = 0; i < n_regs; i++) { 3668 if (count == 0 || 3669 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3670 seq_printf(m, "%s %08x: %08x", 3671 i ? "\n" : "", 3672 dump[i][0], dump[i][1]); 3673 count = 7; 3674 } else { 3675 seq_printf(m, " %08x", dump[i][1]); 3676 count--; 3677 } 3678 } 3679 3680 seq_puts(m, "\n"); 3681 } 3682 3683 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3684 { 3685 struct device_queue_manager *dqm = data; 3686 uint32_t xcc_mask = dqm->dev->xcc_mask; 3687 uint32_t (*dump)[2], n_regs; 3688 int pipe, queue; 3689 int r = 0, xcc_id; 3690 uint32_t sdma_engine_start; 3691 3692 if (!dqm->sched_running) { 3693 seq_puts(m, " Device is stopped\n"); 3694 return 0; 3695 } 3696 3697 for_each_inst(xcc_id, xcc_mask) { 3698 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3699 KFD_CIK_HIQ_PIPE, 3700 KFD_CIK_HIQ_QUEUE, &dump, 3701 &n_regs, xcc_id); 3702 if (!r) { 3703 seq_printf( 3704 m, 3705 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3706 xcc_id, 3707 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3708 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3709 KFD_CIK_HIQ_QUEUE); 3710 seq_reg_dump(m, dump, n_regs); 3711 3712 kfree(dump); 3713 } 3714 3715 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3716 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3717 3718 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3719 if (!test_bit(pipe_offset + queue, 3720 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3721 continue; 3722 3723 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3724 pipe, queue, 3725 &dump, &n_regs, 3726 xcc_id); 3727 if (r) 3728 break; 3729 3730 seq_printf(m, 3731 " Inst %d, CP Pipe %d, Queue %d\n", 3732 xcc_id, pipe, queue); 3733 seq_reg_dump(m, dump, n_regs); 3734 3735 kfree(dump); 3736 } 3737 } 3738 } 3739 3740 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3741 for (pipe = sdma_engine_start; 3742 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3743 pipe++) { 3744 for (queue = 0; 3745 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3746 queue++) { 3747 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3748 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3749 if (r) 3750 break; 3751 3752 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3753 pipe, queue); 3754 seq_reg_dump(m, dump, n_regs); 3755 3756 kfree(dump); 3757 } 3758 } 3759 3760 return r; 3761 } 3762 3763 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3764 { 3765 int r = 0; 3766 3767 dqm_lock(dqm); 3768 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3769 if (r) { 3770 dqm_unlock(dqm); 3771 return r; 3772 } 3773 dqm->active_runlist = true; 3774 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3775 0, USE_DEFAULT_GRACE_PERIOD); 3776 dqm_unlock(dqm); 3777 3778 return r; 3779 } 3780 3781 #endif 3782