1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "mes_v11_api_def.h" 40 #include "kfd_debug.h" 41 42 /* Size of the per-pipe EOP queue */ 43 #define CIK_HPD_EOP_BYTES_LOG2 11 44 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 45 46 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 47 u32 pasid, unsigned int vmid); 48 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, 50 enum kfd_unmap_queues_filter filter, 51 uint32_t filter_param, 52 uint32_t grace_period); 53 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 54 enum kfd_unmap_queues_filter filter, 55 uint32_t filter_param, 56 uint32_t grace_period, 57 bool reset); 58 59 static int map_queues_cpsch(struct device_queue_manager *dqm); 60 61 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62 struct queue *q); 63 64 static inline void deallocate_hqd(struct device_queue_manager *dqm, 65 struct queue *q); 66 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 67 static int allocate_sdma_queue(struct device_queue_manager *dqm, 68 struct queue *q, const uint32_t *restore_sdma_id); 69 static void kfd_process_hw_exception(struct work_struct *work); 70 71 static inline 72 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 73 { 74 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 75 return KFD_MQD_TYPE_SDMA; 76 return KFD_MQD_TYPE_CP; 77 } 78 79 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 80 { 81 int i; 82 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 83 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 84 85 /* queue is available for KFD usage if bit is 1 */ 86 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 87 if (test_bit(pipe_offset + i, 88 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 89 return true; 90 return false; 91 } 92 93 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 94 { 95 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 96 AMDGPU_MAX_QUEUES); 97 } 98 99 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 102 } 103 104 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 105 { 106 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 107 } 108 109 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 110 { 111 return kfd_get_num_sdma_engines(dqm->dev) + 112 kfd_get_num_xgmi_sdma_engines(dqm->dev); 113 } 114 115 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 116 { 117 return kfd_get_num_sdma_engines(dqm->dev) * 118 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 119 } 120 121 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 122 { 123 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 124 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 125 } 126 127 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 128 { 129 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 130 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 131 132 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 133 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 134 135 /* Mask out the reserved queues */ 136 bitmap_andnot(dqm->sdma_bitmap, dqm->sdma_bitmap, 137 dqm->dev->kfd->device_info.reserved_sdma_queues_bitmap, 138 KFD_MAX_SDMA_QUEUES); 139 } 140 141 void program_sh_mem_settings(struct device_queue_manager *dqm, 142 struct qcm_process_device *qpd) 143 { 144 uint32_t xcc_mask = dqm->dev->xcc_mask; 145 int xcc_id; 146 147 for_each_inst(xcc_id, xcc_mask) 148 dqm->dev->kfd2kgd->program_sh_mem_settings( 149 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 150 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 151 qpd->sh_mem_bases, xcc_id); 152 } 153 154 static void kfd_hws_hang(struct device_queue_manager *dqm) 155 { 156 struct device_process_node *cur; 157 struct qcm_process_device *qpd; 158 struct queue *q; 159 160 /* Mark all device queues as reset. */ 161 list_for_each_entry(cur, &dqm->queues, list) { 162 qpd = cur->qpd; 163 list_for_each_entry(q, &qpd->queues_list, list) { 164 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 165 166 pdd->has_reset_queue = true; 167 } 168 } 169 170 /* 171 * Issue a GPU reset if HWS is unresponsive 172 */ 173 schedule_work(&dqm->hw_exception_work); 174 } 175 176 static int convert_to_mes_queue_type(int queue_type) 177 { 178 int mes_queue_type; 179 180 switch (queue_type) { 181 case KFD_QUEUE_TYPE_COMPUTE: 182 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 183 break; 184 case KFD_QUEUE_TYPE_SDMA: 185 mes_queue_type = MES_QUEUE_TYPE_SDMA; 186 break; 187 default: 188 WARN(1, "Invalid queue type %d", queue_type); 189 mes_queue_type = -EINVAL; 190 break; 191 } 192 193 return mes_queue_type; 194 } 195 196 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 197 struct qcm_process_device *qpd) 198 { 199 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 200 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 201 struct mes_add_queue_input queue_input; 202 int r, queue_type; 203 uint64_t wptr_addr_off; 204 205 if (!dqm->sched_running || dqm->sched_halt) 206 return 0; 207 if (!down_read_trylock(&adev->reset_domain->sem)) 208 return -EIO; 209 210 if (!pdd->proc_ctx_cpu_ptr) { 211 r = amdgpu_amdkfd_alloc_gtt_mem(adev, 212 AMDGPU_MES_PROC_CTX_SIZE, 213 &pdd->proc_ctx_bo, 214 &pdd->proc_ctx_gpu_addr, 215 &pdd->proc_ctx_cpu_ptr, 216 false); 217 if (r) { 218 dev_err(adev->dev, 219 "failed to allocate process context bo\n"); 220 return r; 221 } 222 memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE); 223 } 224 225 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 226 queue_input.process_id = pdd->pasid; 227 queue_input.page_table_base_addr = qpd->page_table_base; 228 queue_input.process_va_start = 0; 229 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 230 /* MES unit for quantum is 100ns */ 231 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 232 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 233 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 234 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 235 queue_input.inprocess_gang_priority = q->properties.priority; 236 queue_input.gang_global_priority_level = 237 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 238 queue_input.doorbell_offset = q->properties.doorbell_off; 239 queue_input.mqd_addr = q->gart_mqd_addr; 240 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 241 242 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 243 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 244 245 queue_input.is_kfd_process = 1; 246 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 247 queue_input.queue_size = q->properties.queue_size >> 2; 248 249 queue_input.paging = false; 250 queue_input.tba_addr = qpd->tba_addr; 251 queue_input.tma_addr = qpd->tma_addr; 252 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 253 queue_input.skip_process_ctx_clear = 254 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 255 (qpd->pqm->process->debug_trap_enabled || 256 kfd_dbg_has_ttmps_always_setup(q->device)); 257 258 queue_type = convert_to_mes_queue_type(q->properties.type); 259 if (queue_type < 0) { 260 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 261 q->properties.type); 262 up_read(&adev->reset_domain->sem); 263 return -EINVAL; 264 } 265 queue_input.queue_type = (uint32_t)queue_type; 266 267 queue_input.exclusively_scheduled = q->properties.is_gws; 268 269 amdgpu_mes_lock(&adev->mes); 270 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 271 amdgpu_mes_unlock(&adev->mes); 272 up_read(&adev->reset_domain->sem); 273 if (r) { 274 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 275 q->properties.doorbell_off); 276 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 277 kfd_hws_hang(dqm); 278 } 279 280 return r; 281 } 282 283 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 284 struct qcm_process_device *qpd) 285 { 286 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 287 int r; 288 struct mes_remove_queue_input queue_input; 289 290 if (!dqm->sched_running || dqm->sched_halt) 291 return 0; 292 if (!down_read_trylock(&adev->reset_domain->sem)) 293 return -EIO; 294 295 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 296 queue_input.doorbell_offset = q->properties.doorbell_off; 297 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 298 299 amdgpu_mes_lock(&adev->mes); 300 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 301 amdgpu_mes_unlock(&adev->mes); 302 up_read(&adev->reset_domain->sem); 303 304 if (r) { 305 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 306 q->properties.doorbell_off); 307 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 308 kfd_hws_hang(dqm); 309 } 310 311 return r; 312 } 313 314 static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm) 315 { 316 struct device_process_node *cur; 317 struct device *dev = dqm->dev->adev->dev; 318 struct qcm_process_device *qpd; 319 struct queue *q; 320 int retval = 0; 321 322 list_for_each_entry(cur, &dqm->queues, list) { 323 qpd = cur->qpd; 324 list_for_each_entry(q, &qpd->queues_list, list) { 325 if (q->properties.is_active) { 326 retval = remove_queue_mes(dqm, q, qpd); 327 if (retval) { 328 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 329 __func__, 330 q->properties.queue_id, 331 dqm->dev->id); 332 return retval; 333 } 334 } 335 } 336 } 337 338 return retval; 339 } 340 341 static int add_all_kfd_queues_mes(struct device_queue_manager *dqm) 342 { 343 struct device_process_node *cur; 344 struct device *dev = dqm->dev->adev->dev; 345 struct qcm_process_device *qpd; 346 struct queue *q; 347 int retval = 0; 348 349 list_for_each_entry(cur, &dqm->queues, list) { 350 qpd = cur->qpd; 351 list_for_each_entry(q, &qpd->queues_list, list) { 352 if (!q->properties.is_active) 353 continue; 354 retval = add_queue_mes(dqm, q, qpd); 355 if (retval) { 356 dev_err(dev, "%s: Failed to add queue %d for dev %d", 357 __func__, 358 q->properties.queue_id, 359 dqm->dev->id); 360 return retval; 361 } 362 } 363 } 364 365 return retval; 366 } 367 368 static int suspend_all_queues_mes(struct device_queue_manager *dqm) 369 { 370 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 371 int r = 0; 372 373 if (!down_read_trylock(&adev->reset_domain->sem)) 374 return -EIO; 375 376 r = amdgpu_mes_suspend(adev); 377 up_read(&adev->reset_domain->sem); 378 379 if (r) { 380 dev_err(adev->dev, "failed to suspend gangs from MES\n"); 381 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 382 kfd_hws_hang(dqm); 383 } 384 385 return r; 386 } 387 388 static int resume_all_queues_mes(struct device_queue_manager *dqm) 389 { 390 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 391 int r = 0; 392 393 if (!down_read_trylock(&adev->reset_domain->sem)) 394 return -EIO; 395 396 r = amdgpu_mes_resume(adev); 397 up_read(&adev->reset_domain->sem); 398 399 if (r) { 400 dev_err(adev->dev, "failed to resume gangs from MES\n"); 401 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 402 kfd_hws_hang(dqm); 403 } 404 405 return r; 406 } 407 408 static void increment_queue_count(struct device_queue_manager *dqm, 409 struct qcm_process_device *qpd, 410 struct queue *q) 411 { 412 dqm->active_queue_count++; 413 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 414 q->properties.type == KFD_QUEUE_TYPE_DIQ) 415 dqm->active_cp_queue_count++; 416 417 if (q->properties.is_gws) { 418 dqm->gws_queue_count++; 419 qpd->mapped_gws_queue = true; 420 } 421 } 422 423 static void decrement_queue_count(struct device_queue_manager *dqm, 424 struct qcm_process_device *qpd, 425 struct queue *q) 426 { 427 dqm->active_queue_count--; 428 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 429 q->properties.type == KFD_QUEUE_TYPE_DIQ) 430 dqm->active_cp_queue_count--; 431 432 if (q->properties.is_gws) { 433 dqm->gws_queue_count--; 434 qpd->mapped_gws_queue = false; 435 } 436 } 437 438 /* 439 * Allocate a doorbell ID to this queue. 440 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 441 */ 442 static int allocate_doorbell(struct qcm_process_device *qpd, 443 struct queue *q, 444 uint32_t const *restore_id) 445 { 446 struct kfd_node *dev = qpd->dqm->dev; 447 448 if (!KFD_IS_SOC15(dev)) { 449 /* On pre-SOC15 chips we need to use the queue ID to 450 * preserve the user mode ABI. 451 */ 452 453 if (restore_id && *restore_id != q->properties.queue_id) 454 return -EINVAL; 455 456 q->doorbell_id = q->properties.queue_id; 457 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 458 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 459 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 460 * doorbell assignments based on the engine and queue id. 461 * The doobell index distance between RLC (2*i) and (2*i+1) 462 * for a SDMA engine is 512. 463 */ 464 465 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 466 467 /* 468 * q->properties.sdma_engine_id corresponds to the virtual 469 * sdma engine number. However, for doorbell allocation, 470 * we need the physical sdma engine id in order to get the 471 * correct doorbell offset. 472 */ 473 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 474 get_num_all_sdma_engines(qpd->dqm) + 475 q->properties.sdma_engine_id] 476 + (q->properties.sdma_queue_id & 1) 477 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 478 + (q->properties.sdma_queue_id >> 1); 479 480 if (restore_id && *restore_id != valid_id) 481 return -EINVAL; 482 q->doorbell_id = valid_id; 483 } else { 484 /* For CP queues on SOC15 */ 485 if (restore_id) { 486 /* make sure that ID is free */ 487 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 488 return -EINVAL; 489 490 q->doorbell_id = *restore_id; 491 } else { 492 /* or reserve a free doorbell ID */ 493 unsigned int found; 494 495 found = find_first_zero_bit(qpd->doorbell_bitmap, 496 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 497 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 498 pr_debug("No doorbells available"); 499 return -EBUSY; 500 } 501 set_bit(found, qpd->doorbell_bitmap); 502 q->doorbell_id = found; 503 } 504 } 505 506 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 507 qpd->proc_doorbells, 508 q->doorbell_id, 509 dev->kfd->device_info.doorbell_size); 510 return 0; 511 } 512 513 static void deallocate_doorbell(struct qcm_process_device *qpd, 514 struct queue *q) 515 { 516 unsigned int old; 517 struct kfd_node *dev = qpd->dqm->dev; 518 519 if (!KFD_IS_SOC15(dev) || 520 q->properties.type == KFD_QUEUE_TYPE_SDMA || 521 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 522 return; 523 524 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 525 WARN_ON(!old); 526 } 527 528 static void program_trap_handler_settings(struct device_queue_manager *dqm, 529 struct qcm_process_device *qpd) 530 { 531 uint32_t xcc_mask = dqm->dev->xcc_mask; 532 int xcc_id; 533 534 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 535 for_each_inst(xcc_id, xcc_mask) 536 dqm->dev->kfd2kgd->program_trap_handler_settings( 537 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 538 qpd->tma_addr, xcc_id); 539 } 540 541 static int allocate_vmid(struct device_queue_manager *dqm, 542 struct qcm_process_device *qpd, 543 struct queue *q) 544 { 545 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 546 struct device *dev = dqm->dev->adev->dev; 547 int allocated_vmid = -1, i; 548 549 for (i = dqm->dev->vm_info.first_vmid_kfd; 550 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 551 if (!dqm->vmid_pasid[i]) { 552 allocated_vmid = i; 553 break; 554 } 555 } 556 557 if (allocated_vmid < 0) { 558 dev_err(dev, "no more vmid to allocate\n"); 559 return -ENOSPC; 560 } 561 562 pr_debug("vmid allocated: %d\n", allocated_vmid); 563 564 dqm->vmid_pasid[allocated_vmid] = pdd->pasid; 565 566 set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid); 567 568 qpd->vmid = allocated_vmid; 569 q->properties.vmid = allocated_vmid; 570 571 program_sh_mem_settings(dqm, qpd); 572 573 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 574 program_trap_handler_settings(dqm, qpd); 575 576 /* qpd->page_table_base is set earlier when register_process() 577 * is called, i.e. when the first queue is created. 578 */ 579 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 580 qpd->vmid, 581 qpd->page_table_base); 582 /* invalidate the VM context after pasid and vmid mapping is set up */ 583 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 584 585 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 586 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 587 qpd->sh_hidden_private_base, qpd->vmid); 588 589 return 0; 590 } 591 592 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 593 struct qcm_process_device *qpd) 594 { 595 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 596 int ret; 597 598 if (!qpd->ib_kaddr) 599 return -ENOMEM; 600 601 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 602 if (ret) 603 return ret; 604 605 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 606 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 607 pmf->release_mem_size / sizeof(uint32_t)); 608 } 609 610 static void deallocate_vmid(struct device_queue_manager *dqm, 611 struct qcm_process_device *qpd, 612 struct queue *q) 613 { 614 struct device *dev = dqm->dev->adev->dev; 615 616 /* On GFX v7, CP doesn't flush TC at dequeue */ 617 if (q->device->adev->asic_type == CHIP_HAWAII) 618 if (flush_texture_cache_nocpsch(q->device, qpd)) 619 dev_err(dev, "Failed to flush TC\n"); 620 621 kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY); 622 623 /* Release the vmid mapping */ 624 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 625 dqm->vmid_pasid[qpd->vmid] = 0; 626 627 qpd->vmid = 0; 628 q->properties.vmid = 0; 629 } 630 631 static int create_queue_nocpsch(struct device_queue_manager *dqm, 632 struct queue *q, 633 struct qcm_process_device *qpd, 634 const struct kfd_criu_queue_priv_data *qd, 635 const void *restore_mqd, const void *restore_ctl_stack) 636 { 637 struct mqd_manager *mqd_mgr; 638 int retval; 639 640 dqm_lock(dqm); 641 642 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 643 pr_warn("Can't create new usermode queue because %d queues were already created\n", 644 dqm->total_queue_count); 645 retval = -EPERM; 646 goto out_unlock; 647 } 648 649 if (list_empty(&qpd->queues_list)) { 650 retval = allocate_vmid(dqm, qpd, q); 651 if (retval) 652 goto out_unlock; 653 } 654 q->properties.vmid = qpd->vmid; 655 /* 656 * Eviction state logic: mark all queues as evicted, even ones 657 * not currently active. Restoring inactive queues later only 658 * updates the is_evicted flag but is a no-op otherwise. 659 */ 660 q->properties.is_evicted = !!qpd->evicted; 661 662 q->properties.tba_addr = qpd->tba_addr; 663 q->properties.tma_addr = qpd->tma_addr; 664 665 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 666 q->properties.type)]; 667 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 668 retval = allocate_hqd(dqm, q); 669 if (retval) 670 goto deallocate_vmid; 671 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 672 q->pipe, q->queue); 673 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 674 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 675 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 676 if (retval) 677 goto deallocate_vmid; 678 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 679 } 680 681 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 682 if (retval) 683 goto out_deallocate_hqd; 684 685 /* Temporarily release dqm lock to avoid a circular lock dependency */ 686 dqm_unlock(dqm); 687 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 688 dqm_lock(dqm); 689 690 if (!q->mqd_mem_obj) { 691 retval = -ENOMEM; 692 goto out_deallocate_doorbell; 693 } 694 695 if (qd) 696 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 697 &q->properties, restore_mqd, restore_ctl_stack, 698 qd->ctl_stack_size); 699 else 700 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 701 &q->gart_mqd_addr, &q->properties); 702 703 if (q->properties.is_active) { 704 if (!dqm->sched_running) { 705 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 706 goto add_queue_to_list; 707 } 708 709 if (WARN(q->process->mm != current->mm, 710 "should only run in user thread")) 711 retval = -EFAULT; 712 else 713 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 714 q->queue, &q->properties, current->mm); 715 if (retval) 716 goto out_free_mqd; 717 } 718 719 add_queue_to_list: 720 list_add(&q->list, &qpd->queues_list); 721 qpd->queue_count++; 722 if (q->properties.is_active) 723 increment_queue_count(dqm, qpd, q); 724 725 /* 726 * Unconditionally increment this counter, regardless of the queue's 727 * type or whether the queue is active. 728 */ 729 dqm->total_queue_count++; 730 pr_debug("Total of %d queues are accountable so far\n", 731 dqm->total_queue_count); 732 goto out_unlock; 733 734 out_free_mqd: 735 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 736 out_deallocate_doorbell: 737 deallocate_doorbell(qpd, q); 738 out_deallocate_hqd: 739 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 740 deallocate_hqd(dqm, q); 741 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 742 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 743 deallocate_sdma_queue(dqm, q); 744 deallocate_vmid: 745 if (list_empty(&qpd->queues_list)) 746 deallocate_vmid(dqm, qpd, q); 747 out_unlock: 748 dqm_unlock(dqm); 749 return retval; 750 } 751 752 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 753 { 754 bool set; 755 int pipe, bit, i; 756 757 set = false; 758 759 for (pipe = dqm->next_pipe_to_allocate, i = 0; 760 i < get_pipes_per_mec(dqm); 761 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 762 763 if (!is_pipe_enabled(dqm, 0, pipe)) 764 continue; 765 766 if (dqm->allocated_queues[pipe] != 0) { 767 bit = ffs(dqm->allocated_queues[pipe]) - 1; 768 dqm->allocated_queues[pipe] &= ~(1 << bit); 769 q->pipe = pipe; 770 q->queue = bit; 771 set = true; 772 break; 773 } 774 } 775 776 if (!set) 777 return -EBUSY; 778 779 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 780 /* horizontal hqd allocation */ 781 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 782 783 return 0; 784 } 785 786 static inline void deallocate_hqd(struct device_queue_manager *dqm, 787 struct queue *q) 788 { 789 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 790 } 791 792 #define SQ_IND_CMD_CMD_KILL 0x00000003 793 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 794 795 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 796 { 797 int status = 0; 798 unsigned int vmid; 799 uint16_t queried_pasid; 800 union SQ_CMD_BITS reg_sq_cmd; 801 union GRBM_GFX_INDEX_BITS reg_gfx_index; 802 struct kfd_process_device *pdd; 803 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 804 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 805 uint32_t xcc_mask = dev->xcc_mask; 806 int xcc_id; 807 808 reg_sq_cmd.u32All = 0; 809 reg_gfx_index.u32All = 0; 810 811 pr_debug("Killing all process wavefronts\n"); 812 813 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 814 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 815 return -EOPNOTSUPP; 816 } 817 818 /* taking the VMID for that process on the safe way using PDD */ 819 pdd = kfd_get_process_device_data(dev, p); 820 if (!pdd) 821 return -EFAULT; 822 823 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 824 * ATC_VMID15_PASID_MAPPING 825 * to check which VMID the current process is mapped to. 826 */ 827 828 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 829 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 830 (dev->adev, vmid, &queried_pasid); 831 832 if (status && queried_pasid == pdd->pasid) { 833 pr_debug("Killing wave fronts of vmid %d and process pid %d\n", 834 vmid, p->lead_thread->pid); 835 break; 836 } 837 } 838 839 if (vmid > last_vmid_to_scan) { 840 dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n", 841 p->lead_thread->pid); 842 return -EFAULT; 843 } 844 845 reg_gfx_index.bits.sh_broadcast_writes = 1; 846 reg_gfx_index.bits.se_broadcast_writes = 1; 847 reg_gfx_index.bits.instance_broadcast_writes = 1; 848 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 849 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 850 reg_sq_cmd.bits.vm_id = vmid; 851 852 for_each_inst(xcc_id, xcc_mask) 853 dev->kfd2kgd->wave_control_execute( 854 dev->adev, reg_gfx_index.u32All, 855 reg_sq_cmd.u32All, xcc_id); 856 857 return 0; 858 } 859 860 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 861 * to avoid asynchronized access 862 */ 863 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 864 struct qcm_process_device *qpd, 865 struct queue *q) 866 { 867 int retval; 868 struct mqd_manager *mqd_mgr; 869 870 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 871 q->properties.type)]; 872 873 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 874 deallocate_hqd(dqm, q); 875 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 876 deallocate_sdma_queue(dqm, q); 877 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 878 deallocate_sdma_queue(dqm, q); 879 else { 880 pr_debug("q->properties.type %d is invalid\n", 881 q->properties.type); 882 return -EINVAL; 883 } 884 dqm->total_queue_count--; 885 886 deallocate_doorbell(qpd, q); 887 888 if (!dqm->sched_running) { 889 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 890 return 0; 891 } 892 893 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 894 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 895 KFD_UNMAP_LATENCY_MS, 896 q->pipe, q->queue); 897 if (retval == -ETIME) 898 qpd->reset_wavefronts = true; 899 900 list_del(&q->list); 901 if (list_empty(&qpd->queues_list)) { 902 if (qpd->reset_wavefronts) { 903 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 904 dqm->dev); 905 /* dbgdev_wave_reset_wavefronts has to be called before 906 * deallocate_vmid(), i.e. when vmid is still in use. 907 */ 908 dbgdev_wave_reset_wavefronts(dqm->dev, 909 qpd->pqm->process); 910 qpd->reset_wavefronts = false; 911 } 912 913 deallocate_vmid(dqm, qpd, q); 914 } 915 qpd->queue_count--; 916 if (q->properties.is_active) 917 decrement_queue_count(dqm, qpd, q); 918 919 return retval; 920 } 921 922 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 923 struct qcm_process_device *qpd, 924 struct queue *q) 925 { 926 int retval; 927 uint64_t sdma_val = 0; 928 struct device *dev = dqm->dev->adev->dev; 929 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 930 struct mqd_manager *mqd_mgr = 931 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 932 933 /* Get the SDMA queue stats */ 934 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 935 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 936 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 937 &sdma_val); 938 if (retval) 939 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 940 q->properties.queue_id); 941 } 942 943 dqm_lock(dqm); 944 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 945 if (!retval) 946 pdd->sdma_past_activity_counter += sdma_val; 947 dqm_unlock(dqm); 948 949 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 950 951 return retval; 952 } 953 954 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 955 struct mqd_update_info *minfo) 956 { 957 int retval = 0; 958 struct device *dev = dqm->dev->adev->dev; 959 struct mqd_manager *mqd_mgr; 960 struct kfd_process_device *pdd; 961 bool prev_active = false; 962 963 dqm_lock(dqm); 964 pdd = kfd_get_process_device_data(q->device, q->process); 965 if (!pdd) { 966 retval = -ENODEV; 967 goto out_unlock; 968 } 969 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 970 q->properties.type)]; 971 972 /* Save previous activity state for counters */ 973 prev_active = q->properties.is_active; 974 975 /* Make sure the queue is unmapped before updating the MQD */ 976 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 977 if (!dqm->dev->kfd->shared_resources.enable_mes) 978 retval = unmap_queues_cpsch(dqm, 979 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 980 else if (prev_active) 981 retval = remove_queue_mes(dqm, q, &pdd->qpd); 982 983 /* queue is reset so inaccessable */ 984 if (pdd->has_reset_queue) { 985 retval = -EACCES; 986 goto out_unlock; 987 } 988 989 if (retval) { 990 dev_err(dev, "unmap queue failed\n"); 991 goto out_unlock; 992 } 993 } else if (prev_active && 994 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 995 q->properties.type == KFD_QUEUE_TYPE_SDMA || 996 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 997 998 if (!dqm->sched_running) { 999 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 1000 goto out_unlock; 1001 } 1002 1003 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1004 (dqm->dev->kfd->cwsr_enabled ? 1005 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1006 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1007 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1008 if (retval) { 1009 dev_err(dev, "destroy mqd failed\n"); 1010 goto out_unlock; 1011 } 1012 } 1013 1014 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 1015 1016 /* 1017 * check active state vs. the previous state and modify 1018 * counter accordingly. map_queues_cpsch uses the 1019 * dqm->active_queue_count to determine whether a new runlist must be 1020 * uploaded. 1021 */ 1022 if (q->properties.is_active && !prev_active) { 1023 increment_queue_count(dqm, &pdd->qpd, q); 1024 } else if (!q->properties.is_active && prev_active) { 1025 decrement_queue_count(dqm, &pdd->qpd, q); 1026 } else if (q->gws && !q->properties.is_gws) { 1027 if (q->properties.is_active) { 1028 dqm->gws_queue_count++; 1029 pdd->qpd.mapped_gws_queue = true; 1030 } 1031 q->properties.is_gws = true; 1032 } else if (!q->gws && q->properties.is_gws) { 1033 if (q->properties.is_active) { 1034 dqm->gws_queue_count--; 1035 pdd->qpd.mapped_gws_queue = false; 1036 } 1037 q->properties.is_gws = false; 1038 } 1039 1040 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 1041 if (!dqm->dev->kfd->shared_resources.enable_mes) 1042 retval = map_queues_cpsch(dqm); 1043 else if (q->properties.is_active) 1044 retval = add_queue_mes(dqm, q, &pdd->qpd); 1045 } else if (q->properties.is_active && 1046 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 1047 q->properties.type == KFD_QUEUE_TYPE_SDMA || 1048 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1049 if (WARN(q->process->mm != current->mm, 1050 "should only run in user thread")) 1051 retval = -EFAULT; 1052 else 1053 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 1054 q->pipe, q->queue, 1055 &q->properties, current->mm); 1056 } 1057 1058 out_unlock: 1059 dqm_unlock(dqm); 1060 return retval; 1061 } 1062 1063 /* suspend_single_queue does not lock the dqm like the 1064 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 1065 * lock the dqm before calling, and unlock after calling. 1066 * 1067 * The reason we don't lock the dqm is because this function may be 1068 * called on multiple queues in a loop, so rather than locking/unlocking 1069 * multiple times, we will just keep the dqm locked for all of the calls. 1070 */ 1071 static int suspend_single_queue(struct device_queue_manager *dqm, 1072 struct kfd_process_device *pdd, 1073 struct queue *q) 1074 { 1075 bool is_new; 1076 1077 if (q->properties.is_suspended) 1078 return 0; 1079 1080 pr_debug("Suspending process pid %d queue [%i]\n", 1081 pdd->process->lead_thread->pid, 1082 q->properties.queue_id); 1083 1084 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 1085 1086 if (is_new || q->properties.is_being_destroyed) { 1087 pr_debug("Suspend: skip %s queue id %i\n", 1088 is_new ? "new" : "destroyed", 1089 q->properties.queue_id); 1090 return -EBUSY; 1091 } 1092 1093 q->properties.is_suspended = true; 1094 if (q->properties.is_active) { 1095 if (dqm->dev->kfd->shared_resources.enable_mes) { 1096 int r = remove_queue_mes(dqm, q, &pdd->qpd); 1097 1098 if (r) 1099 return r; 1100 } 1101 1102 decrement_queue_count(dqm, &pdd->qpd, q); 1103 q->properties.is_active = false; 1104 } 1105 1106 return 0; 1107 } 1108 1109 /* resume_single_queue does not lock the dqm like the functions 1110 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1111 * lock the dqm before calling, and unlock after calling. 1112 * 1113 * The reason we don't lock the dqm is because this function may be 1114 * called on multiple queues in a loop, so rather than locking/unlocking 1115 * multiple times, we will just keep the dqm locked for all of the calls. 1116 */ 1117 static int resume_single_queue(struct device_queue_manager *dqm, 1118 struct qcm_process_device *qpd, 1119 struct queue *q) 1120 { 1121 struct kfd_process_device *pdd; 1122 1123 if (!q->properties.is_suspended) 1124 return 0; 1125 1126 pdd = qpd_to_pdd(qpd); 1127 1128 pr_debug("Restoring from suspend process pid %d queue [%i]\n", 1129 pdd->process->lead_thread->pid, 1130 q->properties.queue_id); 1131 1132 q->properties.is_suspended = false; 1133 1134 if (QUEUE_IS_ACTIVE(q->properties)) { 1135 if (dqm->dev->kfd->shared_resources.enable_mes) { 1136 int r = add_queue_mes(dqm, q, &pdd->qpd); 1137 1138 if (r) 1139 return r; 1140 } 1141 1142 q->properties.is_active = true; 1143 increment_queue_count(dqm, qpd, q); 1144 } 1145 1146 return 0; 1147 } 1148 1149 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1150 struct qcm_process_device *qpd) 1151 { 1152 struct queue *q; 1153 struct mqd_manager *mqd_mgr; 1154 struct kfd_process_device *pdd; 1155 int retval, ret = 0; 1156 1157 dqm_lock(dqm); 1158 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1159 goto out; 1160 1161 pdd = qpd_to_pdd(qpd); 1162 pr_debug_ratelimited("Evicting process pid %d queues\n", 1163 pdd->process->lead_thread->pid); 1164 1165 pdd->last_evict_timestamp = get_jiffies_64(); 1166 /* Mark all queues as evicted. Deactivate all active queues on 1167 * the qpd. 1168 */ 1169 list_for_each_entry(q, &qpd->queues_list, list) { 1170 q->properties.is_evicted = true; 1171 if (!q->properties.is_active) 1172 continue; 1173 1174 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1175 q->properties.type)]; 1176 q->properties.is_active = false; 1177 decrement_queue_count(dqm, qpd, q); 1178 1179 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1180 continue; 1181 1182 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1183 (dqm->dev->kfd->cwsr_enabled ? 1184 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1185 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1186 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1187 if (retval && !ret) 1188 /* Return the first error, but keep going to 1189 * maintain a consistent eviction state 1190 */ 1191 ret = retval; 1192 } 1193 1194 out: 1195 dqm_unlock(dqm); 1196 return ret; 1197 } 1198 1199 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1200 struct qcm_process_device *qpd) 1201 { 1202 struct queue *q; 1203 struct device *dev = dqm->dev->adev->dev; 1204 struct kfd_process_device *pdd; 1205 int retval = 0; 1206 1207 dqm_lock(dqm); 1208 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1209 goto out; 1210 1211 pdd = qpd_to_pdd(qpd); 1212 1213 /* The debugger creates processes that temporarily have not acquired 1214 * all VMs for all devices and has no VMs itself. 1215 * Skip queue eviction on process eviction. 1216 */ 1217 if (!pdd->drm_priv) 1218 goto out; 1219 1220 pr_debug_ratelimited("Evicting process pid %d queues\n", 1221 pdd->process->lead_thread->pid); 1222 1223 /* Mark all queues as evicted. Deactivate all active queues on 1224 * the qpd. 1225 */ 1226 list_for_each_entry(q, &qpd->queues_list, list) { 1227 q->properties.is_evicted = true; 1228 if (!q->properties.is_active) 1229 continue; 1230 1231 q->properties.is_active = false; 1232 decrement_queue_count(dqm, qpd, q); 1233 1234 if (dqm->dev->kfd->shared_resources.enable_mes) { 1235 retval = remove_queue_mes(dqm, q, qpd); 1236 if (retval) { 1237 dev_err(dev, "Failed to evict queue %d\n", 1238 q->properties.queue_id); 1239 goto out; 1240 } 1241 } 1242 } 1243 pdd->last_evict_timestamp = get_jiffies_64(); 1244 if (!dqm->dev->kfd->shared_resources.enable_mes) 1245 retval = execute_queues_cpsch(dqm, 1246 qpd->is_debug ? 1247 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1248 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1249 USE_DEFAULT_GRACE_PERIOD); 1250 1251 out: 1252 dqm_unlock(dqm); 1253 return retval; 1254 } 1255 1256 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1257 struct qcm_process_device *qpd) 1258 { 1259 struct mm_struct *mm = NULL; 1260 struct queue *q; 1261 struct mqd_manager *mqd_mgr; 1262 struct kfd_process_device *pdd; 1263 uint64_t pd_base; 1264 uint64_t eviction_duration; 1265 int retval, ret = 0; 1266 1267 pdd = qpd_to_pdd(qpd); 1268 /* Retrieve PD base */ 1269 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1270 1271 dqm_lock(dqm); 1272 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1273 goto out; 1274 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1275 qpd->evicted--; 1276 goto out; 1277 } 1278 1279 pr_debug_ratelimited("Restoring process pid %d queues\n", 1280 pdd->process->lead_thread->pid); 1281 1282 /* Update PD Base in QPD */ 1283 qpd->page_table_base = pd_base; 1284 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1285 1286 if (!list_empty(&qpd->queues_list)) { 1287 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1288 dqm->dev->adev, 1289 qpd->vmid, 1290 qpd->page_table_base); 1291 kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY); 1292 } 1293 1294 /* Take a safe reference to the mm_struct, which may otherwise 1295 * disappear even while the kfd_process is still referenced. 1296 */ 1297 mm = get_task_mm(pdd->process->lead_thread); 1298 if (!mm) { 1299 ret = -EFAULT; 1300 goto out; 1301 } 1302 1303 /* Remove the eviction flags. Activate queues that are not 1304 * inactive for other reasons. 1305 */ 1306 list_for_each_entry(q, &qpd->queues_list, list) { 1307 q->properties.is_evicted = false; 1308 if (!QUEUE_IS_ACTIVE(q->properties)) 1309 continue; 1310 1311 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1312 q->properties.type)]; 1313 q->properties.is_active = true; 1314 increment_queue_count(dqm, qpd, q); 1315 1316 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1317 continue; 1318 1319 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1320 q->queue, &q->properties, mm); 1321 if (retval && !ret) 1322 /* Return the first error, but keep going to 1323 * maintain a consistent eviction state 1324 */ 1325 ret = retval; 1326 } 1327 qpd->evicted = 0; 1328 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1329 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1330 out: 1331 if (mm) 1332 mmput(mm); 1333 dqm_unlock(dqm); 1334 return ret; 1335 } 1336 1337 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1338 struct qcm_process_device *qpd) 1339 { 1340 struct queue *q; 1341 struct device *dev = dqm->dev->adev->dev; 1342 struct kfd_process_device *pdd; 1343 uint64_t eviction_duration; 1344 int retval = 0; 1345 1346 pdd = qpd_to_pdd(qpd); 1347 1348 dqm_lock(dqm); 1349 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1350 goto out; 1351 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1352 qpd->evicted--; 1353 goto out; 1354 } 1355 1356 /* The debugger creates processes that temporarily have not acquired 1357 * all VMs for all devices and has no VMs itself. 1358 * Skip queue restore on process restore. 1359 */ 1360 if (!pdd->drm_priv) 1361 goto vm_not_acquired; 1362 1363 pr_debug_ratelimited("Restoring process pid %d queues\n", 1364 pdd->process->lead_thread->pid); 1365 1366 /* Update PD Base in QPD */ 1367 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1368 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1369 1370 /* activate all active queues on the qpd */ 1371 list_for_each_entry(q, &qpd->queues_list, list) { 1372 q->properties.is_evicted = false; 1373 if (!QUEUE_IS_ACTIVE(q->properties)) 1374 continue; 1375 1376 q->properties.is_active = true; 1377 increment_queue_count(dqm, &pdd->qpd, q); 1378 1379 if (dqm->dev->kfd->shared_resources.enable_mes) { 1380 retval = add_queue_mes(dqm, q, qpd); 1381 if (retval) { 1382 dev_err(dev, "Failed to restore queue %d\n", 1383 q->properties.queue_id); 1384 goto out; 1385 } 1386 } 1387 } 1388 if (!dqm->dev->kfd->shared_resources.enable_mes) 1389 retval = execute_queues_cpsch(dqm, 1390 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1391 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1392 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1393 vm_not_acquired: 1394 qpd->evicted = 0; 1395 out: 1396 dqm_unlock(dqm); 1397 return retval; 1398 } 1399 1400 static int register_process(struct device_queue_manager *dqm, 1401 struct qcm_process_device *qpd) 1402 { 1403 struct device_process_node *n; 1404 struct kfd_process_device *pdd; 1405 uint64_t pd_base; 1406 int retval; 1407 1408 n = kzalloc(sizeof(*n), GFP_KERNEL); 1409 if (!n) 1410 return -ENOMEM; 1411 1412 n->qpd = qpd; 1413 1414 pdd = qpd_to_pdd(qpd); 1415 /* Retrieve PD base */ 1416 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1417 1418 dqm_lock(dqm); 1419 list_add(&n->list, &dqm->queues); 1420 1421 /* Update PD Base in QPD */ 1422 qpd->page_table_base = pd_base; 1423 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1424 1425 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1426 1427 dqm->processes_count++; 1428 1429 dqm_unlock(dqm); 1430 1431 /* Outside the DQM lock because under the DQM lock we can't do 1432 * reclaim or take other locks that others hold while reclaiming. 1433 */ 1434 kfd_inc_compute_active(dqm->dev); 1435 1436 return retval; 1437 } 1438 1439 static int unregister_process(struct device_queue_manager *dqm, 1440 struct qcm_process_device *qpd) 1441 { 1442 int retval; 1443 struct device_process_node *cur, *next; 1444 1445 pr_debug("qpd->queues_list is %s\n", 1446 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1447 1448 retval = 0; 1449 dqm_lock(dqm); 1450 1451 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1452 if (qpd == cur->qpd) { 1453 list_del(&cur->list); 1454 kfree(cur); 1455 dqm->processes_count--; 1456 goto out; 1457 } 1458 } 1459 /* qpd not found in dqm list */ 1460 retval = 1; 1461 out: 1462 dqm_unlock(dqm); 1463 1464 /* Outside the DQM lock because under the DQM lock we can't do 1465 * reclaim or take other locks that others hold while reclaiming. 1466 */ 1467 if (!retval) 1468 kfd_dec_compute_active(dqm->dev); 1469 1470 return retval; 1471 } 1472 1473 static int 1474 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1475 unsigned int vmid) 1476 { 1477 uint32_t xcc_mask = dqm->dev->xcc_mask; 1478 int xcc_id, ret; 1479 1480 for_each_inst(xcc_id, xcc_mask) { 1481 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1482 dqm->dev->adev, pasid, vmid, xcc_id); 1483 if (ret) 1484 break; 1485 } 1486 1487 return ret; 1488 } 1489 1490 static void init_interrupts(struct device_queue_manager *dqm) 1491 { 1492 uint32_t xcc_mask = dqm->dev->xcc_mask; 1493 unsigned int i, xcc_id; 1494 1495 for_each_inst(xcc_id, xcc_mask) { 1496 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1497 if (is_pipe_enabled(dqm, 0, i)) { 1498 dqm->dev->kfd2kgd->init_interrupts( 1499 dqm->dev->adev, i, xcc_id); 1500 } 1501 } 1502 } 1503 } 1504 1505 static int initialize_nocpsch(struct device_queue_manager *dqm) 1506 { 1507 int pipe, queue; 1508 1509 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1510 1511 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1512 sizeof(unsigned int), GFP_KERNEL); 1513 if (!dqm->allocated_queues) 1514 return -ENOMEM; 1515 1516 mutex_init(&dqm->lock_hidden); 1517 INIT_LIST_HEAD(&dqm->queues); 1518 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1519 dqm->active_cp_queue_count = 0; 1520 dqm->gws_queue_count = 0; 1521 1522 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1523 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1524 1525 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1526 if (test_bit(pipe_offset + queue, 1527 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1528 dqm->allocated_queues[pipe] |= 1 << queue; 1529 } 1530 1531 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1532 1533 init_sdma_bitmaps(dqm); 1534 1535 return 0; 1536 } 1537 1538 static void uninitialize(struct device_queue_manager *dqm) 1539 { 1540 int i; 1541 1542 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1543 1544 kfree(dqm->allocated_queues); 1545 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1546 kfree(dqm->mqd_mgrs[i]); 1547 mutex_destroy(&dqm->lock_hidden); 1548 } 1549 1550 static int start_nocpsch(struct device_queue_manager *dqm) 1551 { 1552 int r = 0; 1553 1554 pr_info("SW scheduler is used"); 1555 init_interrupts(dqm); 1556 1557 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1558 r = pm_init(&dqm->packet_mgr, dqm); 1559 if (!r) 1560 dqm->sched_running = true; 1561 1562 return r; 1563 } 1564 1565 static int stop_nocpsch(struct device_queue_manager *dqm) 1566 { 1567 dqm_lock(dqm); 1568 if (!dqm->sched_running) { 1569 dqm_unlock(dqm); 1570 return 0; 1571 } 1572 1573 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1574 pm_uninit(&dqm->packet_mgr); 1575 dqm->sched_running = false; 1576 dqm_unlock(dqm); 1577 1578 return 0; 1579 } 1580 1581 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1582 struct queue *q, const uint32_t *restore_sdma_id) 1583 { 1584 struct device *dev = dqm->dev->adev->dev; 1585 int bit; 1586 1587 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1588 if (bitmap_empty(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1589 dev_err(dev, "No more SDMA queue to allocate\n"); 1590 return -ENOMEM; 1591 } 1592 1593 if (restore_sdma_id) { 1594 /* Re-use existing sdma_id */ 1595 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1596 dev_err(dev, "SDMA queue already in use\n"); 1597 return -EBUSY; 1598 } 1599 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1600 q->sdma_id = *restore_sdma_id; 1601 } else { 1602 /* Find first available sdma_id */ 1603 bit = find_first_bit(dqm->sdma_bitmap, 1604 get_num_sdma_queues(dqm)); 1605 clear_bit(bit, dqm->sdma_bitmap); 1606 q->sdma_id = bit; 1607 } 1608 1609 q->properties.sdma_engine_id = 1610 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1611 q->properties.sdma_queue_id = q->sdma_id / 1612 kfd_get_num_sdma_engines(dqm->dev); 1613 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1614 if (bitmap_empty(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES)) { 1615 dev_err(dev, "No more XGMI SDMA queue to allocate\n"); 1616 return -ENOMEM; 1617 } 1618 if (restore_sdma_id) { 1619 /* Re-use existing sdma_id */ 1620 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1621 dev_err(dev, "SDMA queue already in use\n"); 1622 return -EBUSY; 1623 } 1624 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1625 q->sdma_id = *restore_sdma_id; 1626 } else { 1627 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1628 get_num_xgmi_sdma_queues(dqm)); 1629 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1630 q->sdma_id = bit; 1631 } 1632 /* sdma_engine_id is sdma id including 1633 * both PCIe-optimized SDMAs and XGMI- 1634 * optimized SDMAs. The calculation below 1635 * assumes the first N engines are always 1636 * PCIe-optimized ones 1637 */ 1638 q->properties.sdma_engine_id = 1639 kfd_get_num_sdma_engines(dqm->dev) + 1640 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1641 q->properties.sdma_queue_id = q->sdma_id / 1642 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1643 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1644 int i, num_queues, num_engines, eng_offset = 0, start_engine; 1645 bool free_bit_found = false, is_xgmi = false; 1646 1647 if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { 1648 num_queues = get_num_sdma_queues(dqm); 1649 num_engines = kfd_get_num_sdma_engines(dqm->dev); 1650 q->properties.type = KFD_QUEUE_TYPE_SDMA; 1651 } else { 1652 num_queues = get_num_xgmi_sdma_queues(dqm); 1653 num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); 1654 eng_offset = kfd_get_num_sdma_engines(dqm->dev); 1655 q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; 1656 is_xgmi = true; 1657 } 1658 1659 /* Scan available bit based on target engine ID. */ 1660 start_engine = q->properties.sdma_engine_id - eng_offset; 1661 for (i = start_engine; i < num_queues; i += num_engines) { 1662 1663 if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) 1664 continue; 1665 1666 clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); 1667 q->sdma_id = i; 1668 q->properties.sdma_queue_id = q->sdma_id / num_engines; 1669 free_bit_found = true; 1670 break; 1671 } 1672 1673 if (!free_bit_found) { 1674 dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", 1675 q->properties.sdma_engine_id); 1676 return -ENOMEM; 1677 } 1678 } 1679 1680 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1681 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1682 1683 return 0; 1684 } 1685 1686 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1687 struct queue *q) 1688 { 1689 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1690 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1691 return; 1692 set_bit(q->sdma_id, dqm->sdma_bitmap); 1693 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1694 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1695 return; 1696 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1697 } 1698 } 1699 1700 /* 1701 * Device Queue Manager implementation for cp scheduler 1702 */ 1703 1704 static int set_sched_resources(struct device_queue_manager *dqm) 1705 { 1706 int i, mec; 1707 struct scheduling_resources res; 1708 struct device *dev = dqm->dev->adev->dev; 1709 1710 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1711 1712 res.queue_mask = 0; 1713 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1714 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1715 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1716 1717 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1718 continue; 1719 1720 /* only acquire queues from the first MEC */ 1721 if (mec > 0) 1722 continue; 1723 1724 /* This situation may be hit in the future if a new HW 1725 * generation exposes more than 64 queues. If so, the 1726 * definition of res.queue_mask needs updating 1727 */ 1728 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1729 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1730 break; 1731 } 1732 1733 res.queue_mask |= 1ull 1734 << amdgpu_queue_mask_bit_to_set_resource_bit( 1735 dqm->dev->adev, i); 1736 } 1737 res.gws_mask = ~0ull; 1738 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1739 1740 pr_debug("Scheduling resources:\n" 1741 "vmid mask: 0x%8X\n" 1742 "queue mask: 0x%8llX\n", 1743 res.vmid_mask, res.queue_mask); 1744 1745 return pm_send_set_resources(&dqm->packet_mgr, &res); 1746 } 1747 1748 static int initialize_cpsch(struct device_queue_manager *dqm) 1749 { 1750 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1751 1752 mutex_init(&dqm->lock_hidden); 1753 INIT_LIST_HEAD(&dqm->queues); 1754 dqm->active_queue_count = dqm->processes_count = 0; 1755 dqm->active_cp_queue_count = 0; 1756 dqm->gws_queue_count = 0; 1757 dqm->active_runlist = false; 1758 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1759 dqm->trap_debug_vmid = 0; 1760 1761 init_sdma_bitmaps(dqm); 1762 1763 if (dqm->dev->kfd2kgd->get_iq_wait_times) 1764 dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1765 &dqm->wait_times, 1766 ffs(dqm->dev->xcc_mask) - 1); 1767 return 0; 1768 } 1769 1770 /* halt_cpsch: 1771 * Unmap queues so the schedule doesn't continue remaining jobs in the queue. 1772 * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch 1773 * is called. 1774 */ 1775 static int halt_cpsch(struct device_queue_manager *dqm) 1776 { 1777 int ret = 0; 1778 1779 dqm_lock(dqm); 1780 if (!dqm->sched_running) { 1781 dqm_unlock(dqm); 1782 return 0; 1783 } 1784 1785 WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); 1786 1787 if (!dqm->is_hws_hang) { 1788 if (!dqm->dev->kfd->shared_resources.enable_mes) 1789 ret = unmap_queues_cpsch(dqm, 1790 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1791 USE_DEFAULT_GRACE_PERIOD, false); 1792 else 1793 ret = remove_all_kfd_queues_mes(dqm); 1794 } 1795 dqm->sched_halt = true; 1796 dqm_unlock(dqm); 1797 1798 return ret; 1799 } 1800 1801 /* unhalt_cpsch 1802 * Unset dqm->sched_halt and map queues back to runlist 1803 */ 1804 static int unhalt_cpsch(struct device_queue_manager *dqm) 1805 { 1806 int ret = 0; 1807 1808 dqm_lock(dqm); 1809 if (!dqm->sched_running || !dqm->sched_halt) { 1810 WARN_ONCE(!dqm->sched_halt, "Scheduling is not on halt.\n"); 1811 dqm_unlock(dqm); 1812 return 0; 1813 } 1814 dqm->sched_halt = false; 1815 if (!dqm->dev->kfd->shared_resources.enable_mes) 1816 ret = execute_queues_cpsch(dqm, 1817 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 1818 0, USE_DEFAULT_GRACE_PERIOD); 1819 else 1820 ret = add_all_kfd_queues_mes(dqm); 1821 1822 dqm_unlock(dqm); 1823 1824 return ret; 1825 } 1826 1827 static int start_cpsch(struct device_queue_manager *dqm) 1828 { 1829 struct device *dev = dqm->dev->adev->dev; 1830 int retval, num_hw_queue_slots; 1831 1832 retval = 0; 1833 1834 dqm_lock(dqm); 1835 1836 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1837 retval = pm_init(&dqm->packet_mgr, dqm); 1838 if (retval) 1839 goto fail_packet_manager_init; 1840 1841 retval = set_sched_resources(dqm); 1842 if (retval) 1843 goto fail_set_sched_resources; 1844 } 1845 pr_debug("Allocating fence memory\n"); 1846 1847 /* allocate fence memory on the gart */ 1848 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1849 &dqm->fence_mem); 1850 1851 if (retval) 1852 goto fail_allocate_vidmem; 1853 1854 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1855 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1856 1857 init_interrupts(dqm); 1858 1859 /* clear hang status when driver try to start the hw scheduler */ 1860 dqm->sched_running = true; 1861 1862 if (!dqm->dev->kfd->shared_resources.enable_mes) 1863 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1864 1865 /* Set CWSR grace period to 1x1000 cycle for GFX9.4.3 APU */ 1866 if (amdgpu_emu_mode == 0 && dqm->dev->adev->gmc.is_app_apu && 1867 (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3))) { 1868 uint32_t reg_offset = 0; 1869 uint32_t grace_period = 1; 1870 1871 retval = pm_update_grace_period(&dqm->packet_mgr, 1872 grace_period); 1873 if (retval) 1874 dev_err(dev, "Setting grace timeout failed\n"); 1875 else if (dqm->dev->kfd2kgd->build_grace_period_packet_info) 1876 /* Update dqm->wait_times maintained in software */ 1877 dqm->dev->kfd2kgd->build_grace_period_packet_info( 1878 dqm->dev->adev, dqm->wait_times, 1879 grace_period, ®_offset, 1880 &dqm->wait_times); 1881 } 1882 1883 /* setup per-queue reset detection buffer */ 1884 num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * 1885 dqm->dev->kfd->shared_resources.num_pipe_per_mec * 1886 NUM_XCC(dqm->dev->xcc_mask); 1887 1888 dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); 1889 dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); 1890 1891 if (!dqm->detect_hang_info) { 1892 retval = -ENOMEM; 1893 goto fail_detect_hang_buffer; 1894 } 1895 1896 dqm_unlock(dqm); 1897 1898 return 0; 1899 fail_detect_hang_buffer: 1900 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1901 fail_allocate_vidmem: 1902 fail_set_sched_resources: 1903 if (!dqm->dev->kfd->shared_resources.enable_mes) 1904 pm_uninit(&dqm->packet_mgr); 1905 fail_packet_manager_init: 1906 dqm_unlock(dqm); 1907 return retval; 1908 } 1909 1910 static int stop_cpsch(struct device_queue_manager *dqm) 1911 { 1912 dqm_lock(dqm); 1913 if (!dqm->sched_running) { 1914 dqm_unlock(dqm); 1915 return 0; 1916 } 1917 1918 if (!dqm->dev->kfd->shared_resources.enable_mes) 1919 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 1920 else 1921 remove_all_kfd_queues_mes(dqm); 1922 1923 dqm->sched_running = false; 1924 1925 if (!dqm->dev->kfd->shared_resources.enable_mes) 1926 pm_release_ib(&dqm->packet_mgr); 1927 1928 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1929 if (!dqm->dev->kfd->shared_resources.enable_mes) 1930 pm_uninit(&dqm->packet_mgr); 1931 kfree(dqm->detect_hang_info); 1932 dqm->detect_hang_info = NULL; 1933 dqm_unlock(dqm); 1934 1935 return 0; 1936 } 1937 1938 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1939 struct kernel_queue *kq, 1940 struct qcm_process_device *qpd) 1941 { 1942 dqm_lock(dqm); 1943 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1944 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1945 dqm->total_queue_count); 1946 dqm_unlock(dqm); 1947 return -EPERM; 1948 } 1949 1950 /* 1951 * Unconditionally increment this counter, regardless of the queue's 1952 * type or whether the queue is active. 1953 */ 1954 dqm->total_queue_count++; 1955 pr_debug("Total of %d queues are accountable so far\n", 1956 dqm->total_queue_count); 1957 1958 list_add(&kq->list, &qpd->priv_queue_list); 1959 increment_queue_count(dqm, qpd, kq->queue); 1960 qpd->is_debug = true; 1961 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1962 USE_DEFAULT_GRACE_PERIOD); 1963 dqm_unlock(dqm); 1964 1965 return 0; 1966 } 1967 1968 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1969 struct kernel_queue *kq, 1970 struct qcm_process_device *qpd) 1971 { 1972 dqm_lock(dqm); 1973 list_del(&kq->list); 1974 decrement_queue_count(dqm, qpd, kq->queue); 1975 qpd->is_debug = false; 1976 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1977 USE_DEFAULT_GRACE_PERIOD); 1978 /* 1979 * Unconditionally decrement this counter, regardless of the queue's 1980 * type. 1981 */ 1982 dqm->total_queue_count--; 1983 pr_debug("Total of %d queues are accountable so far\n", 1984 dqm->total_queue_count); 1985 dqm_unlock(dqm); 1986 } 1987 1988 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1989 struct qcm_process_device *qpd, 1990 const struct kfd_criu_queue_priv_data *qd, 1991 const void *restore_mqd, const void *restore_ctl_stack) 1992 { 1993 int retval; 1994 struct mqd_manager *mqd_mgr; 1995 1996 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1997 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1998 dqm->total_queue_count); 1999 retval = -EPERM; 2000 goto out; 2001 } 2002 2003 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2004 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || 2005 q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 2006 dqm_lock(dqm); 2007 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 2008 dqm_unlock(dqm); 2009 if (retval) 2010 goto out; 2011 } 2012 2013 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 2014 if (retval) 2015 goto out_deallocate_sdma_queue; 2016 2017 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2018 q->properties.type)]; 2019 2020 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2021 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2022 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 2023 q->properties.tba_addr = qpd->tba_addr; 2024 q->properties.tma_addr = qpd->tma_addr; 2025 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties); 2026 if (!q->mqd_mem_obj) { 2027 retval = -ENOMEM; 2028 goto out_deallocate_doorbell; 2029 } 2030 2031 dqm_lock(dqm); 2032 /* 2033 * Eviction state logic: mark all queues as evicted, even ones 2034 * not currently active. Restoring inactive queues later only 2035 * updates the is_evicted flag but is a no-op otherwise. 2036 */ 2037 q->properties.is_evicted = !!qpd->evicted; 2038 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 2039 kfd_dbg_has_cwsr_workaround(q->device); 2040 2041 if (qd) 2042 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 2043 &q->properties, restore_mqd, restore_ctl_stack, 2044 qd->ctl_stack_size); 2045 else 2046 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 2047 &q->gart_mqd_addr, &q->properties); 2048 2049 list_add(&q->list, &qpd->queues_list); 2050 qpd->queue_count++; 2051 2052 if (q->properties.is_active) { 2053 increment_queue_count(dqm, qpd, q); 2054 2055 if (!dqm->dev->kfd->shared_resources.enable_mes) 2056 retval = execute_queues_cpsch(dqm, 2057 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 2058 else 2059 retval = add_queue_mes(dqm, q, qpd); 2060 if (retval) 2061 goto cleanup_queue; 2062 } 2063 2064 /* 2065 * Unconditionally increment this counter, regardless of the queue's 2066 * type or whether the queue is active. 2067 */ 2068 dqm->total_queue_count++; 2069 2070 pr_debug("Total of %d queues are accountable so far\n", 2071 dqm->total_queue_count); 2072 2073 dqm_unlock(dqm); 2074 return retval; 2075 2076 cleanup_queue: 2077 qpd->queue_count--; 2078 list_del(&q->list); 2079 if (q->properties.is_active) 2080 decrement_queue_count(dqm, qpd, q); 2081 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2082 dqm_unlock(dqm); 2083 out_deallocate_doorbell: 2084 deallocate_doorbell(qpd, q); 2085 out_deallocate_sdma_queue: 2086 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2087 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 2088 dqm_lock(dqm); 2089 deallocate_sdma_queue(dqm, q); 2090 dqm_unlock(dqm); 2091 } 2092 out: 2093 return retval; 2094 } 2095 2096 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 2097 uint64_t fence_value, 2098 unsigned int timeout_ms) 2099 { 2100 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 2101 struct device *dev = dqm->dev->adev->dev; 2102 uint64_t *fence_addr = dqm->fence_addr; 2103 2104 while (*fence_addr != fence_value) { 2105 /* Fatal err detected, this response won't come */ 2106 if (amdgpu_amdkfd_is_fed(dqm->dev->adev)) 2107 return -EIO; 2108 2109 if (time_after(jiffies, end_jiffies)) { 2110 dev_err(dev, "qcm fence wait loop timeout expired\n"); 2111 /* In HWS case, this is used to halt the driver thread 2112 * in order not to mess up CP states before doing 2113 * scandumps for FW debugging. 2114 */ 2115 while (halt_if_hws_hang) 2116 schedule(); 2117 2118 return -ETIME; 2119 } 2120 schedule(); 2121 } 2122 2123 return 0; 2124 } 2125 2126 /* dqm->lock mutex has to be locked before calling this function */ 2127 static int map_queues_cpsch(struct device_queue_manager *dqm) 2128 { 2129 struct device *dev = dqm->dev->adev->dev; 2130 int retval; 2131 2132 if (!dqm->sched_running || dqm->sched_halt) 2133 return 0; 2134 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 2135 return 0; 2136 if (dqm->active_runlist) 2137 return 0; 2138 2139 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 2140 pr_debug("%s sent runlist\n", __func__); 2141 if (retval) { 2142 dev_err(dev, "failed to execute runlist\n"); 2143 return retval; 2144 } 2145 dqm->active_runlist = true; 2146 2147 return retval; 2148 } 2149 2150 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 2151 struct qcm_process_device *qpd) 2152 { 2153 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2154 2155 dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n", 2156 q->properties.queue_id, pdd->process->lead_thread->pid); 2157 2158 pdd->has_reset_queue = true; 2159 if (q->properties.is_active) { 2160 q->properties.is_active = false; 2161 decrement_queue_count(dqm, qpd, q); 2162 } 2163 } 2164 2165 static int detect_queue_hang(struct device_queue_manager *dqm) 2166 { 2167 int i; 2168 2169 /* detect should be used only in dqm locked queue reset */ 2170 if (WARN_ON(dqm->detect_hang_count > 0)) 2171 return 0; 2172 2173 memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); 2174 2175 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 2176 uint32_t mec, pipe, queue; 2177 int xcc_id; 2178 2179 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 2180 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 2181 2182 if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2183 continue; 2184 2185 amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); 2186 2187 for_each_inst(xcc_id, dqm->dev->xcc_mask) { 2188 uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( 2189 dqm->dev->adev, pipe, queue, xcc_id); 2190 struct dqm_detect_hang_info hang_info; 2191 2192 if (!queue_addr) 2193 continue; 2194 2195 hang_info.pipe_id = pipe; 2196 hang_info.queue_id = queue; 2197 hang_info.xcc_id = xcc_id; 2198 hang_info.queue_address = queue_addr; 2199 2200 dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; 2201 dqm->detect_hang_count++; 2202 } 2203 } 2204 2205 return dqm->detect_hang_count; 2206 } 2207 2208 static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) 2209 { 2210 struct device_process_node *cur; 2211 struct qcm_process_device *qpd; 2212 struct queue *q; 2213 2214 list_for_each_entry(cur, &dqm->queues, list) { 2215 qpd = cur->qpd; 2216 list_for_each_entry(q, &qpd->queues_list, list) { 2217 if (queue_address == q->properties.queue_address) 2218 return q; 2219 } 2220 } 2221 2222 return NULL; 2223 } 2224 2225 /* only for compute queue */ 2226 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm) 2227 { 2228 int r = 0, reset_count = 0, i; 2229 2230 if (!dqm->detect_hang_info || dqm->is_hws_hang) 2231 return -EIO; 2232 2233 /* assume dqm locked. */ 2234 if (!detect_queue_hang(dqm)) 2235 return -ENOTRECOVERABLE; 2236 2237 for (i = 0; i < dqm->detect_hang_count; i++) { 2238 struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; 2239 struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); 2240 struct kfd_process_device *pdd; 2241 uint64_t queue_addr = 0; 2242 2243 if (!q) { 2244 r = -ENOTRECOVERABLE; 2245 goto reset_fail; 2246 } 2247 2248 pdd = kfd_get_process_device_data(dqm->dev, q->process); 2249 if (!pdd) { 2250 r = -ENOTRECOVERABLE; 2251 goto reset_fail; 2252 } 2253 2254 queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, 2255 hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, 2256 KFD_UNMAP_LATENCY_MS); 2257 2258 /* either reset failed or we reset an unexpected queue. */ 2259 if (queue_addr != q->properties.queue_address) { 2260 r = -ENOTRECOVERABLE; 2261 goto reset_fail; 2262 } 2263 2264 set_queue_as_reset(dqm, q, &pdd->qpd); 2265 reset_count++; 2266 } 2267 2268 if (reset_count == dqm->detect_hang_count) 2269 kfd_signal_reset_event(dqm->dev); 2270 else 2271 r = -ENOTRECOVERABLE; 2272 2273 reset_fail: 2274 dqm->detect_hang_count = 0; 2275 2276 return r; 2277 } 2278 2279 /* dqm->lock mutex has to be locked before calling this function */ 2280 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 2281 enum kfd_unmap_queues_filter filter, 2282 uint32_t filter_param, 2283 uint32_t grace_period, 2284 bool reset) 2285 { 2286 struct device *dev = dqm->dev->adev->dev; 2287 struct mqd_manager *mqd_mgr; 2288 int retval; 2289 2290 if (!dqm->sched_running) 2291 return 0; 2292 if (!dqm->active_runlist) 2293 return 0; 2294 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2295 return -EIO; 2296 2297 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2298 retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); 2299 if (retval) 2300 goto out; 2301 } 2302 2303 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 2304 if (retval) 2305 goto out; 2306 2307 *dqm->fence_addr = KFD_FENCE_INIT; 2308 mb(); 2309 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 2310 KFD_FENCE_COMPLETED); 2311 /* should be timed out */ 2312 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 2313 queue_preemption_timeout_ms); 2314 if (retval) { 2315 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 2316 kfd_hws_hang(dqm); 2317 goto out; 2318 } 2319 2320 /* In the current MEC firmware implementation, if compute queue 2321 * doesn't response to the preemption request in time, HIQ will 2322 * abandon the unmap request without returning any timeout error 2323 * to driver. Instead, MEC firmware will log the doorbell of the 2324 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 2325 * To make sure the queue unmap was successful, driver need to 2326 * check those fields 2327 */ 2328 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 2329 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd)) { 2330 while (halt_if_hws_hang) 2331 schedule(); 2332 if (reset_queues_on_hws_hang(dqm)) { 2333 dqm->is_hws_hang = true; 2334 kfd_hws_hang(dqm); 2335 retval = -ETIME; 2336 goto out; 2337 } 2338 } 2339 2340 /* We need to reset the grace period value for this device */ 2341 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2342 if (pm_update_grace_period(&dqm->packet_mgr, 2343 USE_DEFAULT_GRACE_PERIOD)) 2344 dev_err(dev, "Failed to reset grace period\n"); 2345 } 2346 2347 pm_release_ib(&dqm->packet_mgr); 2348 dqm->active_runlist = false; 2349 2350 out: 2351 up_read(&dqm->dev->adev->reset_domain->sem); 2352 return retval; 2353 } 2354 2355 /* only for compute queue */ 2356 static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) 2357 { 2358 int retval; 2359 2360 dqm_lock(dqm); 2361 2362 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2363 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2364 2365 dqm_unlock(dqm); 2366 return retval; 2367 } 2368 2369 /* dqm->lock mutex has to be locked before calling this function */ 2370 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2371 enum kfd_unmap_queues_filter filter, 2372 uint32_t filter_param, 2373 uint32_t grace_period) 2374 { 2375 int retval; 2376 2377 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2378 return -EIO; 2379 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2380 if (!retval) 2381 retval = map_queues_cpsch(dqm); 2382 up_read(&dqm->dev->adev->reset_domain->sem); 2383 return retval; 2384 } 2385 2386 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2387 struct queue *q) 2388 { 2389 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2390 q->process); 2391 int ret = 0; 2392 2393 if (WARN_ON(!pdd)) 2394 return ret; 2395 2396 if (pdd->qpd.is_debug) 2397 return ret; 2398 2399 q->properties.is_being_destroyed = true; 2400 2401 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2402 dqm_unlock(dqm); 2403 mutex_unlock(&q->process->mutex); 2404 ret = wait_event_interruptible(dqm->destroy_wait, 2405 !q->properties.is_suspended); 2406 2407 mutex_lock(&q->process->mutex); 2408 dqm_lock(dqm); 2409 } 2410 2411 return ret; 2412 } 2413 2414 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2415 struct qcm_process_device *qpd, 2416 struct queue *q) 2417 { 2418 int retval; 2419 struct mqd_manager *mqd_mgr; 2420 uint64_t sdma_val = 0; 2421 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2422 struct device *dev = dqm->dev->adev->dev; 2423 2424 /* Get the SDMA queue stats */ 2425 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2426 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2427 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2428 &sdma_val); 2429 if (retval) 2430 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2431 q->properties.queue_id); 2432 } 2433 2434 /* remove queue from list to prevent rescheduling after preemption */ 2435 dqm_lock(dqm); 2436 2437 retval = wait_on_destroy_queue(dqm, q); 2438 2439 if (retval) { 2440 dqm_unlock(dqm); 2441 return retval; 2442 } 2443 2444 if (qpd->is_debug) { 2445 /* 2446 * error, currently we do not allow to destroy a queue 2447 * of a currently debugged process 2448 */ 2449 retval = -EBUSY; 2450 goto failed_try_destroy_debugged_queue; 2451 2452 } 2453 2454 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2455 q->properties.type)]; 2456 2457 deallocate_doorbell(qpd, q); 2458 2459 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2460 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2461 deallocate_sdma_queue(dqm, q); 2462 pdd->sdma_past_activity_counter += sdma_val; 2463 } 2464 2465 if (q->properties.is_active) { 2466 decrement_queue_count(dqm, qpd, q); 2467 q->properties.is_active = false; 2468 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2469 retval = execute_queues_cpsch(dqm, 2470 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2471 USE_DEFAULT_GRACE_PERIOD); 2472 if (retval == -ETIME) 2473 qpd->reset_wavefronts = true; 2474 } else { 2475 retval = remove_queue_mes(dqm, q, qpd); 2476 } 2477 } 2478 list_del(&q->list); 2479 qpd->queue_count--; 2480 2481 /* 2482 * Unconditionally decrement this counter, regardless of the queue's 2483 * type 2484 */ 2485 dqm->total_queue_count--; 2486 pr_debug("Total of %d queues are accountable so far\n", 2487 dqm->total_queue_count); 2488 2489 dqm_unlock(dqm); 2490 2491 /* 2492 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2493 * circular locking 2494 */ 2495 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2496 qpd->pqm->process, q->device, 2497 -1, false, NULL, 0); 2498 2499 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2500 2501 return retval; 2502 2503 failed_try_destroy_debugged_queue: 2504 2505 dqm_unlock(dqm); 2506 return retval; 2507 } 2508 2509 /* 2510 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 2511 * stay in user mode. 2512 */ 2513 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 2514 /* APE1 limit is inclusive and 64K aligned. */ 2515 #define APE1_LIMIT_ALIGNMENT 0xFFFF 2516 2517 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2518 struct qcm_process_device *qpd, 2519 enum cache_policy default_policy, 2520 enum cache_policy alternate_policy, 2521 void __user *alternate_aperture_base, 2522 uint64_t alternate_aperture_size) 2523 { 2524 bool retval = true; 2525 2526 if (!dqm->asic_ops.set_cache_memory_policy) 2527 return retval; 2528 2529 dqm_lock(dqm); 2530 2531 if (alternate_aperture_size == 0) { 2532 /* base > limit disables APE1 */ 2533 qpd->sh_mem_ape1_base = 1; 2534 qpd->sh_mem_ape1_limit = 0; 2535 } else { 2536 /* 2537 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 2538 * SH_MEM_APE1_BASE[31:0], 0x0000 } 2539 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 2540 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 2541 * Verify that the base and size parameters can be 2542 * represented in this format and convert them. 2543 * Additionally restrict APE1 to user-mode addresses. 2544 */ 2545 2546 uint64_t base = (uintptr_t)alternate_aperture_base; 2547 uint64_t limit = base + alternate_aperture_size - 1; 2548 2549 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 2550 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 2551 retval = false; 2552 goto out; 2553 } 2554 2555 qpd->sh_mem_ape1_base = base >> 16; 2556 qpd->sh_mem_ape1_limit = limit >> 16; 2557 } 2558 2559 retval = dqm->asic_ops.set_cache_memory_policy( 2560 dqm, 2561 qpd, 2562 default_policy, 2563 alternate_policy, 2564 alternate_aperture_base, 2565 alternate_aperture_size); 2566 2567 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2568 program_sh_mem_settings(dqm, qpd); 2569 2570 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2571 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2572 qpd->sh_mem_ape1_limit); 2573 2574 out: 2575 dqm_unlock(dqm); 2576 return retval; 2577 } 2578 2579 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2580 struct qcm_process_device *qpd) 2581 { 2582 struct queue *q; 2583 struct device_process_node *cur, *next_dpn; 2584 int retval = 0; 2585 bool found = false; 2586 2587 dqm_lock(dqm); 2588 2589 /* Clear all user mode queues */ 2590 while (!list_empty(&qpd->queues_list)) { 2591 struct mqd_manager *mqd_mgr; 2592 int ret; 2593 2594 q = list_first_entry(&qpd->queues_list, struct queue, list); 2595 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2596 q->properties.type)]; 2597 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2598 if (ret) 2599 retval = ret; 2600 dqm_unlock(dqm); 2601 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2602 dqm_lock(dqm); 2603 } 2604 2605 /* Unregister process */ 2606 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2607 if (qpd == cur->qpd) { 2608 list_del(&cur->list); 2609 kfree(cur); 2610 dqm->processes_count--; 2611 found = true; 2612 break; 2613 } 2614 } 2615 2616 dqm_unlock(dqm); 2617 2618 /* Outside the DQM lock because under the DQM lock we can't do 2619 * reclaim or take other locks that others hold while reclaiming. 2620 */ 2621 if (found) 2622 kfd_dec_compute_active(dqm->dev); 2623 2624 return retval; 2625 } 2626 2627 static int get_wave_state(struct device_queue_manager *dqm, 2628 struct queue *q, 2629 void __user *ctl_stack, 2630 u32 *ctl_stack_used_size, 2631 u32 *save_area_used_size) 2632 { 2633 struct mqd_manager *mqd_mgr; 2634 2635 dqm_lock(dqm); 2636 2637 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2638 2639 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2640 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2641 !mqd_mgr->get_wave_state) { 2642 dqm_unlock(dqm); 2643 return -EINVAL; 2644 } 2645 2646 dqm_unlock(dqm); 2647 2648 /* 2649 * get_wave_state is outside the dqm lock to prevent circular locking 2650 * and the queue should be protected against destruction by the process 2651 * lock. 2652 */ 2653 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2654 ctl_stack, ctl_stack_used_size, save_area_used_size); 2655 } 2656 2657 static void get_queue_checkpoint_info(struct device_queue_manager *dqm, 2658 const struct queue *q, 2659 u32 *mqd_size, 2660 u32 *ctl_stack_size) 2661 { 2662 struct mqd_manager *mqd_mgr; 2663 enum KFD_MQD_TYPE mqd_type = 2664 get_mqd_type_from_queue_type(q->properties.type); 2665 2666 dqm_lock(dqm); 2667 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2668 *mqd_size = mqd_mgr->mqd_size; 2669 *ctl_stack_size = 0; 2670 2671 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2672 mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2673 2674 dqm_unlock(dqm); 2675 } 2676 2677 static int checkpoint_mqd(struct device_queue_manager *dqm, 2678 const struct queue *q, 2679 void *mqd, 2680 void *ctl_stack) 2681 { 2682 struct mqd_manager *mqd_mgr; 2683 int r = 0; 2684 enum KFD_MQD_TYPE mqd_type = 2685 get_mqd_type_from_queue_type(q->properties.type); 2686 2687 dqm_lock(dqm); 2688 2689 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2690 r = -EINVAL; 2691 goto dqm_unlock; 2692 } 2693 2694 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2695 if (!mqd_mgr->checkpoint_mqd) { 2696 r = -EOPNOTSUPP; 2697 goto dqm_unlock; 2698 } 2699 2700 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2701 2702 dqm_unlock: 2703 dqm_unlock(dqm); 2704 return r; 2705 } 2706 2707 static int process_termination_cpsch(struct device_queue_manager *dqm, 2708 struct qcm_process_device *qpd) 2709 { 2710 int retval; 2711 struct queue *q; 2712 struct device *dev = dqm->dev->adev->dev; 2713 struct kernel_queue *kq, *kq_next; 2714 struct mqd_manager *mqd_mgr; 2715 struct device_process_node *cur, *next_dpn; 2716 enum kfd_unmap_queues_filter filter = 2717 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2718 bool found = false; 2719 2720 retval = 0; 2721 2722 dqm_lock(dqm); 2723 2724 /* Clean all kernel queues */ 2725 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2726 list_del(&kq->list); 2727 decrement_queue_count(dqm, qpd, kq->queue); 2728 qpd->is_debug = false; 2729 dqm->total_queue_count--; 2730 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2731 } 2732 2733 /* Clear all user mode queues */ 2734 list_for_each_entry(q, &qpd->queues_list, list) { 2735 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2736 deallocate_sdma_queue(dqm, q); 2737 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2738 deallocate_sdma_queue(dqm, q); 2739 2740 if (q->properties.is_active) { 2741 decrement_queue_count(dqm, qpd, q); 2742 2743 if (dqm->dev->kfd->shared_resources.enable_mes) { 2744 retval = remove_queue_mes(dqm, q, qpd); 2745 if (retval) 2746 dev_err(dev, "Failed to remove queue %d\n", 2747 q->properties.queue_id); 2748 } 2749 } 2750 2751 dqm->total_queue_count--; 2752 } 2753 2754 /* Unregister process */ 2755 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2756 if (qpd == cur->qpd) { 2757 list_del(&cur->list); 2758 kfree(cur); 2759 dqm->processes_count--; 2760 found = true; 2761 break; 2762 } 2763 } 2764 2765 if (!dqm->dev->kfd->shared_resources.enable_mes) 2766 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2767 2768 if ((retval || qpd->reset_wavefronts) && 2769 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 2770 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 2771 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 2772 qpd->reset_wavefronts = false; 2773 up_read(&dqm->dev->adev->reset_domain->sem); 2774 } 2775 2776 /* Lastly, free mqd resources. 2777 * Do free_mqd() after dqm_unlock to avoid circular locking. 2778 */ 2779 while (!list_empty(&qpd->queues_list)) { 2780 q = list_first_entry(&qpd->queues_list, struct queue, list); 2781 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2782 q->properties.type)]; 2783 list_del(&q->list); 2784 qpd->queue_count--; 2785 dqm_unlock(dqm); 2786 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2787 dqm_lock(dqm); 2788 } 2789 dqm_unlock(dqm); 2790 2791 /* Outside the DQM lock because under the DQM lock we can't do 2792 * reclaim or take other locks that others hold while reclaiming. 2793 */ 2794 if (found) 2795 kfd_dec_compute_active(dqm->dev); 2796 2797 return retval; 2798 } 2799 2800 static int init_mqd_managers(struct device_queue_manager *dqm) 2801 { 2802 int i, j; 2803 struct device *dev = dqm->dev->adev->dev; 2804 struct mqd_manager *mqd_mgr; 2805 2806 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 2807 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 2808 if (!mqd_mgr) { 2809 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 2810 goto out_free; 2811 } 2812 dqm->mqd_mgrs[i] = mqd_mgr; 2813 } 2814 2815 return 0; 2816 2817 out_free: 2818 for (j = 0; j < i; j++) { 2819 kfree(dqm->mqd_mgrs[j]); 2820 dqm->mqd_mgrs[j] = NULL; 2821 } 2822 2823 return -ENOMEM; 2824 } 2825 2826 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 2827 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 2828 { 2829 int retval; 2830 struct kfd_node *dev = dqm->dev; 2831 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 2832 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 2833 get_num_all_sdma_engines(dqm) * 2834 dev->kfd->device_info.num_sdma_queues_per_engine + 2835 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 2836 NUM_XCC(dqm->dev->xcc_mask)); 2837 2838 retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size, 2839 &(mem_obj->gtt_mem), &(mem_obj->gpu_addr), 2840 (void *)&(mem_obj->cpu_ptr), false); 2841 2842 return retval; 2843 } 2844 2845 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 2846 { 2847 struct device_queue_manager *dqm; 2848 2849 pr_debug("Loading device queue manager\n"); 2850 2851 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 2852 if (!dqm) 2853 return NULL; 2854 2855 switch (dev->adev->asic_type) { 2856 /* HWS is not available on Hawaii. */ 2857 case CHIP_HAWAII: 2858 /* HWS depends on CWSR for timely dequeue. CWSR is not 2859 * available on Tonga. 2860 * 2861 * FIXME: This argument also applies to Kaveri. 2862 */ 2863 case CHIP_TONGA: 2864 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 2865 break; 2866 default: 2867 dqm->sched_policy = sched_policy; 2868 break; 2869 } 2870 2871 dqm->dev = dev; 2872 switch (dqm->sched_policy) { 2873 case KFD_SCHED_POLICY_HWS: 2874 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 2875 /* initialize dqm for cp scheduling */ 2876 dqm->ops.create_queue = create_queue_cpsch; 2877 dqm->ops.initialize = initialize_cpsch; 2878 dqm->ops.start = start_cpsch; 2879 dqm->ops.stop = stop_cpsch; 2880 dqm->ops.halt = halt_cpsch; 2881 dqm->ops.unhalt = unhalt_cpsch; 2882 dqm->ops.destroy_queue = destroy_queue_cpsch; 2883 dqm->ops.update_queue = update_queue; 2884 dqm->ops.register_process = register_process; 2885 dqm->ops.unregister_process = unregister_process; 2886 dqm->ops.uninitialize = uninitialize; 2887 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 2888 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 2889 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2890 dqm->ops.process_termination = process_termination_cpsch; 2891 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 2892 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 2893 dqm->ops.get_wave_state = get_wave_state; 2894 dqm->ops.reset_queues = reset_queues_cpsch; 2895 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2896 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2897 break; 2898 case KFD_SCHED_POLICY_NO_HWS: 2899 /* initialize dqm for no cp scheduling */ 2900 dqm->ops.start = start_nocpsch; 2901 dqm->ops.stop = stop_nocpsch; 2902 dqm->ops.create_queue = create_queue_nocpsch; 2903 dqm->ops.destroy_queue = destroy_queue_nocpsch; 2904 dqm->ops.update_queue = update_queue; 2905 dqm->ops.register_process = register_process; 2906 dqm->ops.unregister_process = unregister_process; 2907 dqm->ops.initialize = initialize_nocpsch; 2908 dqm->ops.uninitialize = uninitialize; 2909 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 2910 dqm->ops.process_termination = process_termination_nocpsch; 2911 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 2912 dqm->ops.restore_process_queues = 2913 restore_process_queues_nocpsch; 2914 dqm->ops.get_wave_state = get_wave_state; 2915 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 2916 dqm->ops.checkpoint_mqd = checkpoint_mqd; 2917 break; 2918 default: 2919 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 2920 goto out_free; 2921 } 2922 2923 switch (dev->adev->asic_type) { 2924 case CHIP_KAVERI: 2925 case CHIP_HAWAII: 2926 device_queue_manager_init_cik(&dqm->asic_ops); 2927 break; 2928 2929 case CHIP_CARRIZO: 2930 case CHIP_TONGA: 2931 case CHIP_FIJI: 2932 case CHIP_POLARIS10: 2933 case CHIP_POLARIS11: 2934 case CHIP_POLARIS12: 2935 case CHIP_VEGAM: 2936 device_queue_manager_init_vi(&dqm->asic_ops); 2937 break; 2938 2939 default: 2940 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 2941 device_queue_manager_init_v12(&dqm->asic_ops); 2942 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 2943 device_queue_manager_init_v11(&dqm->asic_ops); 2944 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 2945 device_queue_manager_init_v10(&dqm->asic_ops); 2946 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 2947 device_queue_manager_init_v9(&dqm->asic_ops); 2948 else { 2949 WARN(1, "Unexpected ASIC family %u", 2950 dev->adev->asic_type); 2951 goto out_free; 2952 } 2953 } 2954 2955 if (init_mqd_managers(dqm)) 2956 goto out_free; 2957 2958 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 2959 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 2960 goto out_free; 2961 } 2962 2963 if (!dqm->ops.initialize(dqm)) { 2964 init_waitqueue_head(&dqm->destroy_wait); 2965 return dqm; 2966 } 2967 2968 out_free: 2969 kfree(dqm); 2970 return NULL; 2971 } 2972 2973 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 2974 struct kfd_mem_obj *mqd) 2975 { 2976 WARN(!mqd, "No hiq sdma mqd trunk to free"); 2977 2978 amdgpu_amdkfd_free_gtt_mem(dev->adev, &mqd->gtt_mem); 2979 } 2980 2981 void device_queue_manager_uninit(struct device_queue_manager *dqm) 2982 { 2983 dqm->ops.stop(dqm); 2984 dqm->ops.uninitialize(dqm); 2985 if (!dqm->dev->kfd->shared_resources.enable_mes) 2986 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 2987 kfree(dqm); 2988 } 2989 2990 int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) 2991 { 2992 struct kfd_process_device *pdd = NULL; 2993 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd); 2994 struct device_queue_manager *dqm = knode->dqm; 2995 struct device *dev = dqm->dev->adev->dev; 2996 struct qcm_process_device *qpd; 2997 struct queue *q = NULL; 2998 int ret = 0; 2999 3000 if (!pdd) 3001 return -EINVAL; 3002 3003 dqm_lock(dqm); 3004 3005 if (pdd) { 3006 qpd = &pdd->qpd; 3007 3008 list_for_each_entry(q, &qpd->queues_list, list) { 3009 if (q->doorbell_id == doorbell_id && q->properties.is_active) { 3010 ret = suspend_all_queues_mes(dqm); 3011 if (ret) { 3012 dev_err(dev, "Suspending all queues failed"); 3013 goto out; 3014 } 3015 3016 q->properties.is_evicted = true; 3017 q->properties.is_active = false; 3018 decrement_queue_count(dqm, qpd, q); 3019 3020 ret = remove_queue_mes(dqm, q, qpd); 3021 if (ret) { 3022 dev_err(dev, "Removing bad queue failed"); 3023 goto out; 3024 } 3025 3026 ret = resume_all_queues_mes(dqm); 3027 if (ret) 3028 dev_err(dev, "Resuming all queues failed"); 3029 3030 break; 3031 } 3032 } 3033 } 3034 3035 out: 3036 dqm_unlock(dqm); 3037 kfd_unref_process(p); 3038 return ret; 3039 } 3040 3041 static int kfd_dqm_evict_pasid_mes(struct device_queue_manager *dqm, 3042 struct qcm_process_device *qpd) 3043 { 3044 struct device *dev = dqm->dev->adev->dev; 3045 int ret = 0; 3046 3047 /* Check if process is already evicted */ 3048 dqm_lock(dqm); 3049 if (qpd->evicted) { 3050 /* Increment the evicted count to make sure the 3051 * process stays evicted before its terminated. 3052 */ 3053 qpd->evicted++; 3054 dqm_unlock(dqm); 3055 goto out; 3056 } 3057 dqm_unlock(dqm); 3058 3059 ret = suspend_all_queues_mes(dqm); 3060 if (ret) { 3061 dev_err(dev, "Suspending all queues failed"); 3062 goto out; 3063 } 3064 3065 ret = dqm->ops.evict_process_queues(dqm, qpd); 3066 if (ret) { 3067 dev_err(dev, "Evicting process queues failed"); 3068 goto out; 3069 } 3070 3071 ret = resume_all_queues_mes(dqm); 3072 if (ret) 3073 dev_err(dev, "Resuming all queues failed"); 3074 3075 out: 3076 return ret; 3077 } 3078 3079 int kfd_evict_process_device(struct kfd_process_device *pdd) 3080 { 3081 struct device_queue_manager *dqm; 3082 struct kfd_process *p; 3083 int ret = 0; 3084 3085 p = pdd->process; 3086 dqm = pdd->dev->dqm; 3087 3088 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 3089 3090 if (dqm->dev->kfd->shared_resources.enable_mes) 3091 ret = kfd_dqm_evict_pasid_mes(dqm, &pdd->qpd); 3092 else 3093 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 3094 3095 return ret; 3096 } 3097 3098 static void kfd_process_hw_exception(struct work_struct *work) 3099 { 3100 struct device_queue_manager *dqm = container_of(work, 3101 struct device_queue_manager, hw_exception_work); 3102 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 3103 } 3104 3105 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 3106 struct qcm_process_device *qpd) 3107 { 3108 int r; 3109 struct device *dev = dqm->dev->adev->dev; 3110 int updated_vmid_mask; 3111 3112 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3113 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3114 return -EINVAL; 3115 } 3116 3117 dqm_lock(dqm); 3118 3119 if (dqm->trap_debug_vmid != 0) { 3120 dev_err(dev, "Trap debug id already reserved\n"); 3121 r = -EBUSY; 3122 goto out_unlock; 3123 } 3124 3125 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3126 USE_DEFAULT_GRACE_PERIOD, false); 3127 if (r) 3128 goto out_unlock; 3129 3130 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3131 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 3132 3133 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3134 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 3135 r = set_sched_resources(dqm); 3136 if (r) 3137 goto out_unlock; 3138 3139 r = map_queues_cpsch(dqm); 3140 if (r) 3141 goto out_unlock; 3142 3143 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 3144 3145 out_unlock: 3146 dqm_unlock(dqm); 3147 return r; 3148 } 3149 3150 /* 3151 * Releases vmid for the trap debugger 3152 */ 3153 int release_debug_trap_vmid(struct device_queue_manager *dqm, 3154 struct qcm_process_device *qpd) 3155 { 3156 struct device *dev = dqm->dev->adev->dev; 3157 int r; 3158 int updated_vmid_mask; 3159 uint32_t trap_debug_vmid; 3160 3161 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3162 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3163 return -EINVAL; 3164 } 3165 3166 dqm_lock(dqm); 3167 trap_debug_vmid = dqm->trap_debug_vmid; 3168 if (dqm->trap_debug_vmid == 0) { 3169 dev_err(dev, "Trap debug id is not reserved\n"); 3170 r = -EINVAL; 3171 goto out_unlock; 3172 } 3173 3174 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3175 USE_DEFAULT_GRACE_PERIOD, false); 3176 if (r) 3177 goto out_unlock; 3178 3179 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3180 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 3181 3182 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3183 dqm->trap_debug_vmid = 0; 3184 r = set_sched_resources(dqm); 3185 if (r) 3186 goto out_unlock; 3187 3188 r = map_queues_cpsch(dqm); 3189 if (r) 3190 goto out_unlock; 3191 3192 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 3193 3194 out_unlock: 3195 dqm_unlock(dqm); 3196 return r; 3197 } 3198 3199 #define QUEUE_NOT_FOUND -1 3200 /* invalidate queue operation in array */ 3201 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 3202 { 3203 int i; 3204 3205 for (i = 0; i < num_queues; i++) 3206 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 3207 } 3208 3209 /* find queue index in array */ 3210 static int q_array_get_index(unsigned int queue_id, 3211 uint32_t num_queues, 3212 uint32_t *queue_ids) 3213 { 3214 int i; 3215 3216 for (i = 0; i < num_queues; i++) 3217 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 3218 return i; 3219 3220 return QUEUE_NOT_FOUND; 3221 } 3222 3223 struct copy_context_work_handler_workarea { 3224 struct work_struct copy_context_work; 3225 struct kfd_process *p; 3226 }; 3227 3228 static void copy_context_work_handler(struct work_struct *work) 3229 { 3230 struct copy_context_work_handler_workarea *workarea; 3231 struct mqd_manager *mqd_mgr; 3232 struct queue *q; 3233 struct mm_struct *mm; 3234 struct kfd_process *p; 3235 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 3236 int i; 3237 3238 workarea = container_of(work, 3239 struct copy_context_work_handler_workarea, 3240 copy_context_work); 3241 3242 p = workarea->p; 3243 mm = get_task_mm(p->lead_thread); 3244 3245 if (!mm) 3246 return; 3247 3248 kthread_use_mm(mm); 3249 for (i = 0; i < p->n_pdds; i++) { 3250 struct kfd_process_device *pdd = p->pdds[i]; 3251 struct device_queue_manager *dqm = pdd->dev->dqm; 3252 struct qcm_process_device *qpd = &pdd->qpd; 3253 3254 list_for_each_entry(q, &qpd->queues_list, list) { 3255 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE) 3256 continue; 3257 3258 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 3259 3260 /* We ignore the return value from get_wave_state 3261 * because 3262 * i) right now, it always returns 0, and 3263 * ii) if we hit an error, we would continue to the 3264 * next queue anyway. 3265 */ 3266 mqd_mgr->get_wave_state(mqd_mgr, 3267 q->mqd, 3268 &q->properties, 3269 (void __user *) q->properties.ctx_save_restore_area_address, 3270 &tmp_ctl_stack_used_size, 3271 &tmp_save_area_used_size); 3272 } 3273 } 3274 kthread_unuse_mm(mm); 3275 mmput(mm); 3276 } 3277 3278 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 3279 { 3280 size_t array_size = num_queues * sizeof(uint32_t); 3281 3282 if (!usr_queue_id_array) 3283 return NULL; 3284 3285 return memdup_user(usr_queue_id_array, array_size); 3286 } 3287 3288 int resume_queues(struct kfd_process *p, 3289 uint32_t num_queues, 3290 uint32_t *usr_queue_id_array) 3291 { 3292 uint32_t *queue_ids = NULL; 3293 int total_resumed = 0; 3294 int i; 3295 3296 if (usr_queue_id_array) { 3297 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3298 3299 if (IS_ERR(queue_ids)) 3300 return PTR_ERR(queue_ids); 3301 3302 /* mask all queues as invalid. unmask per successful request */ 3303 q_array_invalidate(num_queues, queue_ids); 3304 } 3305 3306 for (i = 0; i < p->n_pdds; i++) { 3307 struct kfd_process_device *pdd = p->pdds[i]; 3308 struct device_queue_manager *dqm = pdd->dev->dqm; 3309 struct device *dev = dqm->dev->adev->dev; 3310 struct qcm_process_device *qpd = &pdd->qpd; 3311 struct queue *q; 3312 int r, per_device_resumed = 0; 3313 3314 dqm_lock(dqm); 3315 3316 /* unmask queues that resume or already resumed as valid */ 3317 list_for_each_entry(q, &qpd->queues_list, list) { 3318 int q_idx = QUEUE_NOT_FOUND; 3319 3320 if (queue_ids) 3321 q_idx = q_array_get_index( 3322 q->properties.queue_id, 3323 num_queues, 3324 queue_ids); 3325 3326 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 3327 int err = resume_single_queue(dqm, &pdd->qpd, q); 3328 3329 if (queue_ids) { 3330 if (!err) { 3331 queue_ids[q_idx] &= 3332 ~KFD_DBG_QUEUE_INVALID_MASK; 3333 } else { 3334 queue_ids[q_idx] |= 3335 KFD_DBG_QUEUE_ERROR_MASK; 3336 break; 3337 } 3338 } 3339 3340 if (dqm->dev->kfd->shared_resources.enable_mes) { 3341 wake_up_all(&dqm->destroy_wait); 3342 if (!err) 3343 total_resumed++; 3344 } else { 3345 per_device_resumed++; 3346 } 3347 } 3348 } 3349 3350 if (!per_device_resumed) { 3351 dqm_unlock(dqm); 3352 continue; 3353 } 3354 3355 r = execute_queues_cpsch(dqm, 3356 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3357 0, 3358 USE_DEFAULT_GRACE_PERIOD); 3359 if (r) { 3360 dev_err(dev, "Failed to resume process queues\n"); 3361 if (queue_ids) { 3362 list_for_each_entry(q, &qpd->queues_list, list) { 3363 int q_idx = q_array_get_index( 3364 q->properties.queue_id, 3365 num_queues, 3366 queue_ids); 3367 3368 /* mask queue as error on resume fail */ 3369 if (q_idx != QUEUE_NOT_FOUND) 3370 queue_ids[q_idx] |= 3371 KFD_DBG_QUEUE_ERROR_MASK; 3372 } 3373 } 3374 } else { 3375 wake_up_all(&dqm->destroy_wait); 3376 total_resumed += per_device_resumed; 3377 } 3378 3379 dqm_unlock(dqm); 3380 } 3381 3382 if (queue_ids) { 3383 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3384 num_queues * sizeof(uint32_t))) 3385 pr_err("copy_to_user failed on queue resume\n"); 3386 3387 kfree(queue_ids); 3388 } 3389 3390 return total_resumed; 3391 } 3392 3393 int suspend_queues(struct kfd_process *p, 3394 uint32_t num_queues, 3395 uint32_t grace_period, 3396 uint64_t exception_clear_mask, 3397 uint32_t *usr_queue_id_array) 3398 { 3399 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3400 int total_suspended = 0; 3401 int i; 3402 3403 if (IS_ERR(queue_ids)) 3404 return PTR_ERR(queue_ids); 3405 3406 /* mask all queues as invalid. umask on successful request */ 3407 q_array_invalidate(num_queues, queue_ids); 3408 3409 for (i = 0; i < p->n_pdds; i++) { 3410 struct kfd_process_device *pdd = p->pdds[i]; 3411 struct device_queue_manager *dqm = pdd->dev->dqm; 3412 struct device *dev = dqm->dev->adev->dev; 3413 struct qcm_process_device *qpd = &pdd->qpd; 3414 struct queue *q; 3415 int r, per_device_suspended = 0; 3416 3417 mutex_lock(&p->event_mutex); 3418 dqm_lock(dqm); 3419 3420 /* unmask queues that suspend or already suspended */ 3421 list_for_each_entry(q, &qpd->queues_list, list) { 3422 int q_idx = q_array_get_index(q->properties.queue_id, 3423 num_queues, 3424 queue_ids); 3425 3426 if (q_idx != QUEUE_NOT_FOUND) { 3427 int err = suspend_single_queue(dqm, pdd, q); 3428 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 3429 3430 if (!err) { 3431 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 3432 if (exception_clear_mask && is_mes) 3433 q->properties.exception_status &= 3434 ~exception_clear_mask; 3435 3436 if (is_mes) 3437 total_suspended++; 3438 else 3439 per_device_suspended++; 3440 } else if (err != -EBUSY) { 3441 r = err; 3442 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3443 break; 3444 } 3445 } 3446 } 3447 3448 if (!per_device_suspended) { 3449 dqm_unlock(dqm); 3450 mutex_unlock(&p->event_mutex); 3451 if (total_suspended) 3452 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3453 continue; 3454 } 3455 3456 r = execute_queues_cpsch(dqm, 3457 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3458 grace_period); 3459 3460 if (r) 3461 dev_err(dev, "Failed to suspend process queues.\n"); 3462 else 3463 total_suspended += per_device_suspended; 3464 3465 list_for_each_entry(q, &qpd->queues_list, list) { 3466 int q_idx = q_array_get_index(q->properties.queue_id, 3467 num_queues, queue_ids); 3468 3469 if (q_idx == QUEUE_NOT_FOUND) 3470 continue; 3471 3472 /* mask queue as error on suspend fail */ 3473 if (r) 3474 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3475 else if (exception_clear_mask) 3476 q->properties.exception_status &= 3477 ~exception_clear_mask; 3478 } 3479 3480 dqm_unlock(dqm); 3481 mutex_unlock(&p->event_mutex); 3482 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3483 } 3484 3485 if (total_suspended) { 3486 struct copy_context_work_handler_workarea copy_context_worker; 3487 3488 INIT_WORK_ONSTACK( 3489 ©_context_worker.copy_context_work, 3490 copy_context_work_handler); 3491 3492 copy_context_worker.p = p; 3493 3494 schedule_work(©_context_worker.copy_context_work); 3495 3496 3497 flush_work(©_context_worker.copy_context_work); 3498 destroy_work_on_stack(©_context_worker.copy_context_work); 3499 } 3500 3501 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3502 num_queues * sizeof(uint32_t))) 3503 pr_err("copy_to_user failed on queue suspend\n"); 3504 3505 kfree(queue_ids); 3506 3507 return total_suspended; 3508 } 3509 3510 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3511 { 3512 switch (q_props->type) { 3513 case KFD_QUEUE_TYPE_COMPUTE: 3514 return q_props->format == KFD_QUEUE_FORMAT_PM4 3515 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3516 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3517 case KFD_QUEUE_TYPE_SDMA: 3518 return KFD_IOC_QUEUE_TYPE_SDMA; 3519 case KFD_QUEUE_TYPE_SDMA_XGMI: 3520 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3521 default: 3522 WARN_ONCE(true, "queue type not recognized!"); 3523 return 0xffffffff; 3524 }; 3525 } 3526 3527 void set_queue_snapshot_entry(struct queue *q, 3528 uint64_t exception_clear_mask, 3529 struct kfd_queue_snapshot_entry *qss_entry) 3530 { 3531 qss_entry->ring_base_address = q->properties.queue_address; 3532 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3533 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3534 qss_entry->ctx_save_restore_address = 3535 q->properties.ctx_save_restore_area_address; 3536 qss_entry->ctx_save_restore_area_size = 3537 q->properties.ctx_save_restore_area_size; 3538 qss_entry->exception_status = q->properties.exception_status; 3539 qss_entry->queue_id = q->properties.queue_id; 3540 qss_entry->gpu_id = q->device->id; 3541 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3542 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3543 q->properties.exception_status &= ~exception_clear_mask; 3544 } 3545 3546 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3547 { 3548 struct device *dev = dqm->dev->adev->dev; 3549 int r; 3550 3551 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3552 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3553 return -EINVAL; 3554 } 3555 3556 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3557 return 0; 3558 3559 dqm_lock(dqm); 3560 3561 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3562 if (r) 3563 dqm_unlock(dqm); 3564 3565 return r; 3566 } 3567 3568 int debug_map_and_unlock(struct device_queue_manager *dqm) 3569 { 3570 struct device *dev = dqm->dev->adev->dev; 3571 int r; 3572 3573 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3574 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3575 return -EINVAL; 3576 } 3577 3578 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3579 return 0; 3580 3581 r = map_queues_cpsch(dqm); 3582 3583 dqm_unlock(dqm); 3584 3585 return r; 3586 } 3587 3588 int debug_refresh_runlist(struct device_queue_manager *dqm) 3589 { 3590 int r = debug_lock_and_unmap(dqm); 3591 3592 if (r) 3593 return r; 3594 3595 return debug_map_and_unlock(dqm); 3596 } 3597 3598 bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 3599 struct qcm_process_device *qpd, 3600 int doorbell_off, u32 *queue_format) 3601 { 3602 struct queue *q; 3603 bool r = false; 3604 3605 if (!queue_format) 3606 return r; 3607 3608 dqm_lock(dqm); 3609 3610 list_for_each_entry(q, &qpd->queues_list, list) { 3611 if (q->properties.doorbell_off == doorbell_off) { 3612 *queue_format = q->properties.format; 3613 r = true; 3614 goto out; 3615 } 3616 } 3617 3618 out: 3619 dqm_unlock(dqm); 3620 return r; 3621 } 3622 #if defined(CONFIG_DEBUG_FS) 3623 3624 static void seq_reg_dump(struct seq_file *m, 3625 uint32_t (*dump)[2], uint32_t n_regs) 3626 { 3627 uint32_t i, count; 3628 3629 for (i = 0, count = 0; i < n_regs; i++) { 3630 if (count == 0 || 3631 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3632 seq_printf(m, "%s %08x: %08x", 3633 i ? "\n" : "", 3634 dump[i][0], dump[i][1]); 3635 count = 7; 3636 } else { 3637 seq_printf(m, " %08x", dump[i][1]); 3638 count--; 3639 } 3640 } 3641 3642 seq_puts(m, "\n"); 3643 } 3644 3645 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3646 { 3647 struct device_queue_manager *dqm = data; 3648 uint32_t xcc_mask = dqm->dev->xcc_mask; 3649 uint32_t (*dump)[2], n_regs; 3650 int pipe, queue; 3651 int r = 0, xcc_id; 3652 uint32_t sdma_engine_start; 3653 3654 if (!dqm->sched_running) { 3655 seq_puts(m, " Device is stopped\n"); 3656 return 0; 3657 } 3658 3659 for_each_inst(xcc_id, xcc_mask) { 3660 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3661 KFD_CIK_HIQ_PIPE, 3662 KFD_CIK_HIQ_QUEUE, &dump, 3663 &n_regs, xcc_id); 3664 if (!r) { 3665 seq_printf( 3666 m, 3667 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3668 xcc_id, 3669 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3670 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3671 KFD_CIK_HIQ_QUEUE); 3672 seq_reg_dump(m, dump, n_regs); 3673 3674 kfree(dump); 3675 } 3676 3677 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3678 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3679 3680 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3681 if (!test_bit(pipe_offset + queue, 3682 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3683 continue; 3684 3685 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3686 pipe, queue, 3687 &dump, &n_regs, 3688 xcc_id); 3689 if (r) 3690 break; 3691 3692 seq_printf(m, 3693 " Inst %d, CP Pipe %d, Queue %d\n", 3694 xcc_id, pipe, queue); 3695 seq_reg_dump(m, dump, n_regs); 3696 3697 kfree(dump); 3698 } 3699 } 3700 } 3701 3702 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3703 for (pipe = sdma_engine_start; 3704 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3705 pipe++) { 3706 for (queue = 0; 3707 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3708 queue++) { 3709 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3710 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3711 if (r) 3712 break; 3713 3714 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3715 pipe, queue); 3716 seq_reg_dump(m, dump, n_regs); 3717 3718 kfree(dump); 3719 } 3720 } 3721 3722 return r; 3723 } 3724 3725 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3726 { 3727 int r = 0; 3728 3729 dqm_lock(dqm); 3730 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3731 if (r) { 3732 dqm_unlock(dqm); 3733 return r; 3734 } 3735 dqm->active_runlist = true; 3736 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3737 0, USE_DEFAULT_GRACE_PERIOD); 3738 dqm_unlock(dqm); 3739 3740 return r; 3741 } 3742 3743 #endif 3744