1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/ratelimit.h> 26 #include <linux/printk.h> 27 #include <linux/slab.h> 28 #include <linux/list.h> 29 #include <linux/types.h> 30 #include <linux/bitops.h> 31 #include <linux/sched.h> 32 #include "kfd_priv.h" 33 #include "kfd_device_queue_manager.h" 34 #include "kfd_mqd_manager.h" 35 #include "cik_regs.h" 36 #include "kfd_kernel_queue.h" 37 #include "amdgpu_amdkfd.h" 38 #include "amdgpu_reset.h" 39 #include "amdgpu_sdma.h" 40 #include "mes_v11_api_def.h" 41 #include "kfd_debug.h" 42 43 /* Size of the per-pipe EOP queue */ 44 #define CIK_HPD_EOP_BYTES_LOG2 11 45 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 46 /* See unmap_queues_cpsch() */ 47 #define USE_DEFAULT_GRACE_PERIOD 0xffffffff 48 49 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 50 u32 pasid, unsigned int vmid); 51 52 static int execute_queues_cpsch(struct device_queue_manager *dqm, 53 enum kfd_unmap_queues_filter filter, 54 uint32_t filter_param, 55 uint32_t grace_period); 56 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 57 enum kfd_unmap_queues_filter filter, 58 uint32_t filter_param, 59 uint32_t grace_period, 60 bool reset); 61 62 static int map_queues_cpsch(struct device_queue_manager *dqm); 63 64 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 65 struct queue *q); 66 67 static inline void deallocate_hqd(struct device_queue_manager *dqm, 68 struct queue *q); 69 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q); 70 static int allocate_sdma_queue(struct device_queue_manager *dqm, 71 struct queue *q, const uint32_t *restore_sdma_id); 72 73 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma); 74 static int resume_all_queues_mes(struct device_queue_manager *dqm); 75 static int suspend_all_queues_mes(struct device_queue_manager *dqm); 76 static struct queue *find_queue_by_doorbell_offset(struct device_queue_manager *dqm, 77 u32 doorbell_offset); 78 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 79 struct qcm_process_device *qpd); 80 81 static inline 82 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 83 { 84 if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI) 85 return KFD_MQD_TYPE_SDMA; 86 return KFD_MQD_TYPE_CP; 87 } 88 89 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 90 { 91 int i; 92 int pipe_offset = (mec * dqm->dev->kfd->shared_resources.num_pipe_per_mec 93 + pipe) * dqm->dev->kfd->shared_resources.num_queue_per_pipe; 94 95 /* queue is available for KFD usage if bit is 1 */ 96 for (i = 0; i < dqm->dev->kfd->shared_resources.num_queue_per_pipe; ++i) 97 if (test_bit(pipe_offset + i, 98 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 99 return true; 100 return false; 101 } 102 103 unsigned int get_cp_queues_num(struct device_queue_manager *dqm) 104 { 105 return bitmap_weight(dqm->dev->kfd->shared_resources.cp_queue_bitmap, 106 AMDGPU_MAX_QUEUES); 107 } 108 109 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 110 { 111 return dqm->dev->kfd->shared_resources.num_queue_per_pipe; 112 } 113 114 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 115 { 116 return dqm->dev->kfd->shared_resources.num_pipe_per_mec; 117 } 118 119 static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm) 120 { 121 return kfd_get_num_sdma_engines(dqm->dev) + 122 kfd_get_num_xgmi_sdma_engines(dqm->dev); 123 } 124 125 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 126 { 127 return kfd_get_num_sdma_engines(dqm->dev) * 128 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 129 } 130 131 unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm) 132 { 133 return kfd_get_num_xgmi_sdma_engines(dqm->dev) * 134 dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 135 } 136 137 static void init_sdma_bitmaps(struct device_queue_manager *dqm) 138 { 139 bitmap_zero(dqm->sdma_bitmap, KFD_MAX_SDMA_QUEUES); 140 bitmap_set(dqm->sdma_bitmap, 0, get_num_sdma_queues(dqm)); 141 142 bitmap_zero(dqm->xgmi_sdma_bitmap, KFD_MAX_SDMA_QUEUES); 143 bitmap_set(dqm->xgmi_sdma_bitmap, 0, get_num_xgmi_sdma_queues(dqm)); 144 145 /* Mask out the reserved queues */ 146 bitmap_clear(dqm->sdma_bitmap, 0, kfd_get_num_sdma_engines(dqm->dev) * 147 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 148 bitmap_clear(dqm->xgmi_sdma_bitmap, 0, kfd_get_num_xgmi_sdma_engines(dqm->dev) * 149 dqm->dev->kfd->device_info.num_reserved_sdma_queues_per_engine); 150 } 151 152 void program_sh_mem_settings(struct device_queue_manager *dqm, 153 struct qcm_process_device *qpd) 154 { 155 uint32_t xcc_mask = dqm->dev->xcc_mask; 156 int xcc_id; 157 158 for_each_inst(xcc_id, xcc_mask) 159 dqm->dev->kfd2kgd->program_sh_mem_settings( 160 dqm->dev->adev, qpd->vmid, qpd->sh_mem_config, 161 qpd->sh_mem_ape1_base, qpd->sh_mem_ape1_limit, 162 qpd->sh_mem_bases, xcc_id); 163 } 164 165 static void kfd_hws_hang(struct device_queue_manager *dqm) 166 { 167 struct device_process_node *cur; 168 struct qcm_process_device *qpd; 169 struct queue *q; 170 171 /* Mark all device queues as reset. */ 172 list_for_each_entry(cur, &dqm->queues, list) { 173 qpd = cur->qpd; 174 list_for_each_entry(q, &qpd->queues_list, list) { 175 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 176 177 pdd->has_reset_queue = true; 178 } 179 } 180 181 /* 182 * Issue a GPU reset if HWS is unresponsive 183 */ 184 amdgpu_amdkfd_gpu_reset(dqm->dev->adev); 185 } 186 187 static int convert_to_mes_queue_type(int queue_type) 188 { 189 int mes_queue_type; 190 191 switch (queue_type) { 192 case KFD_QUEUE_TYPE_COMPUTE: 193 mes_queue_type = MES_QUEUE_TYPE_COMPUTE; 194 break; 195 case KFD_QUEUE_TYPE_SDMA: 196 mes_queue_type = MES_QUEUE_TYPE_SDMA; 197 break; 198 default: 199 WARN(1, "Invalid queue type %d", queue_type); 200 mes_queue_type = -EINVAL; 201 break; 202 } 203 204 return mes_queue_type; 205 } 206 207 static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q, 208 struct qcm_process_device *qpd) 209 { 210 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 211 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 212 struct mes_add_queue_input queue_input; 213 int r, queue_type; 214 uint64_t wptr_addr_off; 215 216 if (!dqm->sched_running || dqm->sched_halt) 217 return 0; 218 if (!down_read_trylock(&adev->reset_domain->sem)) 219 return -EIO; 220 221 memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input)); 222 queue_input.process_id = pdd->pasid; 223 queue_input.page_table_base_addr = qpd->page_table_base; 224 queue_input.process_va_start = 0; 225 queue_input.process_va_end = adev->vm_manager.max_pfn - 1; 226 /* MES unit for quantum is 100ns */ 227 queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM; /* Equivalent to 10ms. */ 228 queue_input.process_context_addr = pdd->proc_ctx_gpu_addr; 229 queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */ 230 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 231 queue_input.inprocess_gang_priority = q->properties.priority; 232 queue_input.gang_global_priority_level = 233 AMDGPU_MES_PRIORITY_LEVEL_NORMAL; 234 queue_input.doorbell_offset = q->properties.doorbell_off; 235 queue_input.mqd_addr = q->gart_mqd_addr; 236 queue_input.wptr_addr = (uint64_t)q->properties.write_ptr; 237 238 wptr_addr_off = (uint64_t)q->properties.write_ptr & (PAGE_SIZE - 1); 239 queue_input.wptr_mc_addr = amdgpu_bo_gpu_offset(q->properties.wptr_bo) + wptr_addr_off; 240 241 queue_input.is_kfd_process = 1; 242 queue_input.is_aql_queue = (q->properties.format == KFD_QUEUE_FORMAT_AQL); 243 queue_input.queue_size = q->properties.queue_size >> 2; 244 245 queue_input.paging = false; 246 queue_input.tba_addr = qpd->tba_addr; 247 queue_input.tma_addr = qpd->tma_addr; 248 queue_input.trap_en = !kfd_dbg_has_cwsr_workaround(q->device); 249 queue_input.skip_process_ctx_clear = 250 qpd->pqm->process->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED && 251 (qpd->pqm->process->debug_trap_enabled || 252 kfd_dbg_has_ttmps_always_setup(q->device)); 253 254 queue_type = convert_to_mes_queue_type(q->properties.type); 255 if (queue_type < 0) { 256 dev_err(adev->dev, "Queue type not supported with MES, queue:%d\n", 257 q->properties.type); 258 up_read(&adev->reset_domain->sem); 259 return -EINVAL; 260 } 261 queue_input.queue_type = (uint32_t)queue_type; 262 263 queue_input.exclusively_scheduled = q->properties.is_gws; 264 queue_input.sh_mem_config_data = qpd->sh_mem_config; 265 queue_input.vm_cntx_cntl = qpd->vm_cntx_cntl; 266 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 267 268 amdgpu_mes_lock(&adev->mes); 269 r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input); 270 amdgpu_mes_unlock(&adev->mes); 271 up_read(&adev->reset_domain->sem); 272 if (r) { 273 dev_err(adev->dev, "failed to add hardware queue to MES, doorbell=0x%x\n", 274 q->properties.doorbell_off); 275 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 276 kfd_hws_hang(dqm); 277 } 278 279 return r; 280 } 281 282 static int remove_queue_mes_on_reset_option(struct device_queue_manager *dqm, struct queue *q, 283 struct qcm_process_device *qpd, 284 bool is_for_reset, 285 bool flush_mes_queue) 286 { 287 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 288 int r; 289 struct mes_remove_queue_input queue_input; 290 291 /* queue was already removed during reset */ 292 if (q->properties.is_reset) 293 return 0; 294 295 if (!dqm->sched_running || dqm->sched_halt) 296 return 0; 297 if (!down_read_trylock(&adev->reset_domain->sem)) 298 return -EIO; 299 300 memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input)); 301 queue_input.doorbell_offset = q->properties.doorbell_off; 302 queue_input.gang_context_addr = q->gang_ctx_gpu_addr; 303 queue_input.remove_queue_after_reset = flush_mes_queue; 304 queue_input.xcc_id = ffs(dqm->dev->xcc_mask) - 1; 305 306 amdgpu_mes_lock(&adev->mes); 307 r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input); 308 amdgpu_mes_unlock(&adev->mes); 309 up_read(&adev->reset_domain->sem); 310 311 if (is_for_reset) 312 return r; 313 314 if (r) { 315 if (!suspend_all_queues_mes(dqm)) 316 return resume_all_queues_mes(dqm); 317 318 dev_err(adev->dev, "failed to remove hardware queue from MES, doorbell=0x%x\n", 319 q->properties.doorbell_off); 320 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 321 kfd_hws_hang(dqm); 322 } 323 324 return r; 325 } 326 327 static void set_perfcount(struct device_queue_manager *dqm, int enable) 328 { 329 struct device_process_node *cur; 330 struct qcm_process_device *qpd; 331 struct queue *q; 332 struct mqd_update_info minfo = { 0 }; 333 334 if (!dqm) 335 return; 336 337 minfo.update_flag = (enable == 1 ? UPDATE_FLAG_PERFCOUNT_ENABLE : 338 UPDATE_FLAG_PERFCOUNT_DISABLE); 339 dqm_lock(dqm); 340 list_for_each_entry(cur, &dqm->queues, list) { 341 qpd = cur->qpd; 342 list_for_each_entry(q, &qpd->queues_list, list) { 343 pqm_update_mqd(qpd->pqm, q->properties.queue_id, 344 &minfo); 345 } 346 } 347 dqm_unlock(dqm); 348 } 349 350 static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q, 351 struct qcm_process_device *qpd) 352 { 353 return remove_queue_mes_on_reset_option(dqm, q, qpd, false, false); 354 } 355 356 static int remove_all_kfd_queues_mes(struct device_queue_manager *dqm) 357 { 358 struct device_process_node *cur; 359 struct device *dev = dqm->dev->adev->dev; 360 struct qcm_process_device *qpd; 361 struct queue *q; 362 int retval = 0; 363 364 list_for_each_entry(cur, &dqm->queues, list) { 365 qpd = cur->qpd; 366 list_for_each_entry(q, &qpd->queues_list, list) { 367 if (q->properties.is_active) { 368 retval = remove_queue_mes(dqm, q, qpd); 369 if (retval) { 370 dev_err(dev, "%s: Failed to remove queue %d for dev %d", 371 __func__, 372 q->properties.queue_id, 373 dqm->dev->id); 374 return retval; 375 } 376 } 377 } 378 } 379 380 return retval; 381 } 382 383 static int add_all_kfd_queues_mes(struct device_queue_manager *dqm) 384 { 385 struct device_process_node *cur; 386 struct device *dev = dqm->dev->adev->dev; 387 struct qcm_process_device *qpd; 388 struct queue *q; 389 int retval = 0; 390 391 list_for_each_entry(cur, &dqm->queues, list) { 392 qpd = cur->qpd; 393 list_for_each_entry(q, &qpd->queues_list, list) { 394 if (!q->properties.is_active) 395 continue; 396 retval = add_queue_mes(dqm, q, qpd); 397 if (retval) { 398 dev_err(dev, "%s: Failed to add queue %d for dev %d", 399 __func__, 400 q->properties.queue_id, 401 dqm->dev->id); 402 return retval; 403 } 404 } 405 } 406 407 return retval; 408 } 409 410 static int reset_queues_mes(struct device_queue_manager *dqm) 411 { 412 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 413 int hqd_info_size = adev->mes.hung_queue_hqd_info_offset; 414 int num_hung = 0, r = 0, i, pipe, queue, queue_type; 415 u32 *hung_array = dqm->hung_db_array; 416 struct amdgpu_mes_hung_queue_hqd_info *hqd_info = dqm->hqd_info; 417 struct kfd_process_device *pdd; 418 struct queue *q; 419 420 if (!amdgpu_mes_queue_reset_by_mes_supported(adev)) { 421 r = -ENOTRECOVERABLE; 422 goto fail; 423 } 424 425 /* reset should be used only in dqm locked queue reset */ 426 if (WARN_ON(dqm->detect_hang_count > 0)) 427 return 0; 428 429 if (!amdgpu_gpu_recovery) { 430 r = -ENOTRECOVERABLE; 431 goto fail; 432 } 433 434 if (!hung_array || !hqd_info) { 435 r = -ENOMEM; 436 goto fail; 437 } 438 439 memset(hqd_info, 0, hqd_info_size * sizeof(struct amdgpu_mes_hung_queue_hqd_info)); 440 441 /* 442 * AMDGPU_RING_TYPE_COMPUTE parameter does not matter if called 443 * post suspend_all as reset & detect will return all hung queue types. 444 * 445 * Passed parameter is for targeting queues not scheduled by MES add_queue. 446 */ 447 r = amdgpu_mes_detect_and_reset_hung_queues(adev, AMDGPU_RING_TYPE_COMPUTE, 448 false, &num_hung, hung_array, ffs(dqm->dev->xcc_mask) - 1); 449 450 if (!num_hung || r) { 451 r = -ENOTRECOVERABLE; 452 goto fail; 453 } 454 455 /* MES resets queue/pipe and cleans up internally */ 456 for (i = 0; i < num_hung; i++) { 457 hqd_info[i].bit0_31 = hung_array[i + hqd_info_size]; 458 pipe = hqd_info[i].pipe_index; 459 queue = hqd_info[i].queue_index; 460 queue_type = hqd_info[i].queue_type; 461 462 if (queue_type != MES_QUEUE_TYPE_COMPUTE && 463 queue_type != MES_QUEUE_TYPE_SDMA) { 464 pr_warn("Unsupported hung queue reset type: %d\n", queue_type); 465 hung_array[i] = AMDGPU_MES_INVALID_DB_OFFSET; 466 continue; 467 } 468 469 q = find_queue_by_doorbell_offset(dqm, hung_array[i]); 470 if (!q) { 471 r = -ENOTRECOVERABLE; 472 goto fail; 473 } 474 475 pdd = kfd_get_process_device_data(q->device, q->process); 476 if (!pdd) { 477 r = -ENODEV; 478 goto fail; 479 } 480 481 pr_warn("Hang detected doorbell %x pipe %d queue %d type %d\n", 482 hung_array[i], pipe, queue, queue_type); 483 /* Proceed remove_queue with reset=true */ 484 remove_queue_mes_on_reset_option(dqm, q, &pdd->qpd, true, false); 485 set_queue_as_reset(dqm, q, &pdd->qpd); 486 } 487 488 dqm->detect_hang_count = num_hung; 489 kfd_signal_reset_event(dqm->dev); 490 491 fail: 492 dqm->detect_hang_count = 0; 493 return r; 494 } 495 496 static int suspend_all_queues_mes(struct device_queue_manager *dqm) 497 { 498 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 499 int r = 0; 500 501 if (!down_read_trylock(&adev->reset_domain->sem)) 502 return -EIO; 503 504 r = amdgpu_mes_suspend(adev, ffs(dqm->dev->xcc_mask) - 1); 505 up_read(&adev->reset_domain->sem); 506 507 if (r) { 508 if (!reset_queues_mes(dqm)) 509 return 0; 510 511 dev_err(adev->dev, "failed to suspend gangs from MES\n"); 512 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 513 kfd_hws_hang(dqm); 514 } 515 516 return r; 517 } 518 519 static int resume_all_queues_mes(struct device_queue_manager *dqm) 520 { 521 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 522 int r = 0; 523 524 if (!down_read_trylock(&adev->reset_domain->sem)) 525 return -EIO; 526 527 r = amdgpu_mes_resume(adev, ffs(dqm->dev->xcc_mask) - 1); 528 up_read(&adev->reset_domain->sem); 529 530 if (r) { 531 dev_err(adev->dev, "failed to resume gangs from MES\n"); 532 dev_err(adev->dev, "MES might be in unrecoverable state, issue a GPU reset\n"); 533 kfd_hws_hang(dqm); 534 } 535 536 return r; 537 } 538 539 static void increment_queue_count(struct device_queue_manager *dqm, 540 struct qcm_process_device *qpd, 541 struct queue *q) 542 { 543 dqm->active_queue_count++; 544 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 545 dqm->active_cp_queue_count++; 546 547 if (q->properties.is_gws) { 548 dqm->gws_queue_count++; 549 qpd->mapped_gws_queue = true; 550 } 551 } 552 553 static void decrement_queue_count(struct device_queue_manager *dqm, 554 struct qcm_process_device *qpd, 555 struct queue *q) 556 { 557 dqm->active_queue_count--; 558 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 559 dqm->active_cp_queue_count--; 560 561 if (q->properties.is_gws) { 562 dqm->gws_queue_count--; 563 qpd->mapped_gws_queue = false; 564 } 565 } 566 567 /* 568 * Allocate a doorbell ID to this queue. 569 * If doorbell_id is passed in, make sure requested ID is valid then allocate it. 570 */ 571 static int allocate_doorbell(struct qcm_process_device *qpd, 572 struct queue *q, 573 uint32_t const *restore_id) 574 { 575 struct kfd_node *dev = qpd->dqm->dev; 576 577 if (!KFD_IS_SOC15(dev)) { 578 /* On pre-SOC15 chips we need to use the queue ID to 579 * preserve the user mode ABI. 580 */ 581 582 if (restore_id && *restore_id != q->properties.queue_id) 583 return -EINVAL; 584 585 q->doorbell_id = q->properties.queue_id; 586 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 587 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 588 /* For SDMA queues on SOC15 with 8-byte doorbell, use static 589 * doorbell assignments based on the engine and queue id. 590 * The doobell index distance between RLC (2*i) and (2*i+1) 591 * for a SDMA engine is 512. 592 */ 593 594 uint32_t *idx_offset = dev->kfd->shared_resources.sdma_doorbell_idx; 595 596 /* 597 * q->properties.sdma_engine_id corresponds to the virtual 598 * sdma engine number. However, for doorbell allocation, 599 * we need the physical sdma engine id in order to get the 600 * correct doorbell offset. 601 */ 602 uint32_t valid_id = idx_offset[qpd->dqm->dev->node_id * 603 get_num_all_sdma_engines(qpd->dqm) + 604 q->properties.sdma_engine_id] 605 + (q->properties.sdma_queue_id & 1) 606 * KFD_QUEUE_DOORBELL_MIRROR_OFFSET 607 + (q->properties.sdma_queue_id >> 1); 608 609 if (restore_id && *restore_id != valid_id) 610 return -EINVAL; 611 q->doorbell_id = valid_id; 612 } else { 613 /* For CP queues on SOC15 */ 614 if (restore_id) { 615 /* make sure that ID is free */ 616 if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap)) 617 return -EINVAL; 618 619 q->doorbell_id = *restore_id; 620 } else { 621 /* or reserve a free doorbell ID */ 622 unsigned int found; 623 624 found = find_first_zero_bit(qpd->doorbell_bitmap, 625 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 626 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 627 pr_debug("No doorbells available"); 628 return -EBUSY; 629 } 630 set_bit(found, qpd->doorbell_bitmap); 631 q->doorbell_id = found; 632 } 633 } 634 635 q->properties.doorbell_off = amdgpu_doorbell_index_on_bar(dev->adev, 636 qpd->proc_doorbells, 637 q->doorbell_id, 638 dev->kfd->device_info.doorbell_size); 639 return 0; 640 } 641 642 static void deallocate_doorbell(struct qcm_process_device *qpd, 643 struct queue *q) 644 { 645 unsigned int old; 646 struct kfd_node *dev = qpd->dqm->dev; 647 648 if (!KFD_IS_SOC15(dev) || 649 q->properties.type == KFD_QUEUE_TYPE_SDMA || 650 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 651 return; 652 653 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 654 WARN_ON(!old); 655 } 656 657 static void program_trap_handler_settings(struct device_queue_manager *dqm, 658 struct qcm_process_device *qpd) 659 { 660 uint32_t xcc_mask = dqm->dev->xcc_mask; 661 int xcc_id; 662 663 if (dqm->dev->kfd2kgd->program_trap_handler_settings) 664 for_each_inst(xcc_id, xcc_mask) 665 dqm->dev->kfd2kgd->program_trap_handler_settings( 666 dqm->dev->adev, qpd->vmid, qpd->tba_addr, 667 qpd->tma_addr, xcc_id); 668 } 669 670 static int allocate_vmid(struct device_queue_manager *dqm, 671 struct qcm_process_device *qpd, 672 struct queue *q) 673 { 674 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 675 struct device *dev = dqm->dev->adev->dev; 676 int allocated_vmid = -1, i; 677 678 for (i = dqm->dev->vm_info.first_vmid_kfd; 679 i <= dqm->dev->vm_info.last_vmid_kfd; i++) { 680 if (!dqm->vmid_pasid[i]) { 681 allocated_vmid = i; 682 break; 683 } 684 } 685 686 if (allocated_vmid < 0) { 687 dev_err(dev, "no more vmid to allocate\n"); 688 return -ENOSPC; 689 } 690 691 pr_debug("vmid allocated: %d\n", allocated_vmid); 692 693 dqm->vmid_pasid[allocated_vmid] = pdd->pasid; 694 695 set_pasid_vmid_mapping(dqm, pdd->pasid, allocated_vmid); 696 697 qpd->vmid = allocated_vmid; 698 q->properties.vmid = allocated_vmid; 699 700 program_sh_mem_settings(dqm, qpd); 701 702 if (KFD_IS_SOC15(dqm->dev) && dqm->dev->kfd->cwsr_enabled) 703 program_trap_handler_settings(dqm, qpd); 704 705 /* qpd->page_table_base is set earlier when register_process() 706 * is called, i.e. when the first queue is created. 707 */ 708 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev, 709 qpd->vmid, 710 qpd->page_table_base); 711 /* invalidate the VM context after pasid and vmid mapping is set up */ 712 kfd_flush_tlb(qpd_to_pdd(qpd)); 713 714 if (dqm->dev->kfd2kgd->set_scratch_backing_va) 715 dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev, 716 qpd->sh_hidden_private_base, qpd->vmid); 717 718 return 0; 719 } 720 721 static int flush_texture_cache_nocpsch(struct kfd_node *kdev, 722 struct qcm_process_device *qpd) 723 { 724 const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf; 725 int ret; 726 727 if (!qpd->ib_kaddr) 728 return -ENOMEM; 729 730 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 731 if (ret) 732 return ret; 733 734 return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid, 735 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 736 pmf->release_mem_size / sizeof(uint32_t)); 737 } 738 739 static void deallocate_vmid(struct device_queue_manager *dqm, 740 struct qcm_process_device *qpd, 741 struct queue *q) 742 { 743 struct device *dev = dqm->dev->adev->dev; 744 745 /* On GFX v7, CP doesn't flush TC at dequeue */ 746 if (q->device->adev->asic_type == CHIP_HAWAII) 747 if (flush_texture_cache_nocpsch(q->device, qpd)) 748 dev_err(dev, "Failed to flush TC\n"); 749 750 kfd_flush_tlb(qpd_to_pdd(qpd)); 751 752 /* Release the vmid mapping */ 753 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 754 dqm->vmid_pasid[qpd->vmid] = 0; 755 756 qpd->vmid = 0; 757 q->properties.vmid = 0; 758 } 759 760 static int create_queue_nocpsch(struct device_queue_manager *dqm, 761 struct queue *q, 762 struct qcm_process_device *qpd, 763 const struct kfd_criu_queue_priv_data *qd, 764 const void *restore_mqd, const void *restore_ctl_stack) 765 { 766 struct mqd_manager *mqd_mgr; 767 int retval; 768 769 dqm_lock(dqm); 770 771 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 772 pr_warn("Can't create new usermode queue because %d queues were already created\n", 773 dqm->total_queue_count); 774 retval = -EPERM; 775 goto out_unlock; 776 } 777 778 if (list_empty(&qpd->queues_list)) { 779 retval = allocate_vmid(dqm, qpd, q); 780 if (retval) 781 goto out_unlock; 782 } 783 q->properties.vmid = qpd->vmid; 784 /* 785 * Eviction state logic: mark all queues as evicted, even ones 786 * not currently active. Restoring inactive queues later only 787 * updates the is_evicted flag but is a no-op otherwise. 788 */ 789 q->properties.is_evicted = !!qpd->evicted; 790 791 q->properties.tba_addr = qpd->tba_addr; 792 q->properties.tma_addr = qpd->tma_addr; 793 794 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 795 q->properties.type)]; 796 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 797 retval = allocate_hqd(dqm, q); 798 if (retval) 799 goto deallocate_vmid; 800 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 801 q->pipe, q->queue); 802 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 803 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 804 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 805 if (retval) 806 goto deallocate_vmid; 807 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 808 } 809 810 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 811 if (retval) 812 goto out_deallocate_hqd; 813 814 /* Temporarily release dqm lock to avoid a circular lock dependency */ 815 dqm_unlock(dqm); 816 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 817 dqm_lock(dqm); 818 819 if (!q->mqd_mem_obj) { 820 retval = -ENOMEM; 821 goto out_deallocate_doorbell; 822 } 823 824 if (qd) 825 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 826 &q->properties, restore_mqd, restore_ctl_stack, 827 qd->ctl_stack_size); 828 else 829 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 830 &q->gart_mqd_addr, &q->properties); 831 832 if (q->properties.is_active) { 833 if (!dqm->sched_running) { 834 WARN_ONCE(1, "Load non-HWS mqd while stopped\n"); 835 goto add_queue_to_list; 836 } 837 838 if (WARN(q->process->mm != current->mm, 839 "should only run in user thread")) 840 retval = -EFAULT; 841 else 842 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 843 q->queue, &q->properties, current->mm); 844 if (retval) 845 goto out_free_mqd; 846 } 847 848 add_queue_to_list: 849 list_add(&q->list, &qpd->queues_list); 850 qpd->queue_count++; 851 if (q->properties.is_active) 852 increment_queue_count(dqm, qpd, q); 853 854 /* 855 * Unconditionally increment this counter, regardless of the queue's 856 * type or whether the queue is active. 857 */ 858 dqm->total_queue_count++; 859 pr_debug("Total of %d queues are accountable so far\n", 860 dqm->total_queue_count); 861 goto out_unlock; 862 863 out_free_mqd: 864 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 865 out_deallocate_doorbell: 866 deallocate_doorbell(qpd, q); 867 out_deallocate_hqd: 868 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 869 deallocate_hqd(dqm, q); 870 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 871 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 872 deallocate_sdma_queue(dqm, q); 873 deallocate_vmid: 874 if (list_empty(&qpd->queues_list)) 875 deallocate_vmid(dqm, qpd, q); 876 out_unlock: 877 dqm_unlock(dqm); 878 return retval; 879 } 880 881 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 882 { 883 bool set; 884 int pipe, bit, i; 885 886 set = false; 887 888 for (pipe = dqm->next_pipe_to_allocate, i = 0; 889 i < get_pipes_per_mec(dqm); 890 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 891 892 if (!is_pipe_enabled(dqm, 0, pipe)) 893 continue; 894 895 if (dqm->allocated_queues[pipe] != 0) { 896 bit = ffs(dqm->allocated_queues[pipe]) - 1; 897 dqm->allocated_queues[pipe] &= ~(1 << bit); 898 q->pipe = pipe; 899 q->queue = bit; 900 set = true; 901 break; 902 } 903 } 904 905 if (!set) 906 return -EBUSY; 907 908 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 909 /* horizontal hqd allocation */ 910 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 911 912 return 0; 913 } 914 915 static inline void deallocate_hqd(struct device_queue_manager *dqm, 916 struct queue *q) 917 { 918 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 919 } 920 921 #define SQ_IND_CMD_CMD_KILL 0x00000003 922 #define SQ_IND_CMD_MODE_BROADCAST 0x00000001 923 924 static int dbgdev_wave_reset_wavefronts(struct kfd_node *dev, struct kfd_process *p) 925 { 926 int status = 0; 927 unsigned int vmid; 928 uint16_t queried_pasid; 929 union SQ_CMD_BITS reg_sq_cmd; 930 union GRBM_GFX_INDEX_BITS reg_gfx_index; 931 struct kfd_process_device *pdd; 932 int first_vmid_to_scan = dev->vm_info.first_vmid_kfd; 933 int last_vmid_to_scan = dev->vm_info.last_vmid_kfd; 934 uint32_t xcc_mask = dev->xcc_mask; 935 int xcc_id; 936 937 reg_sq_cmd.u32All = 0; 938 reg_gfx_index.u32All = 0; 939 940 pr_debug("Killing all process wavefronts\n"); 941 942 if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) { 943 dev_err(dev->adev->dev, "no vmid pasid mapping supported\n"); 944 return -EOPNOTSUPP; 945 } 946 947 /* taking the VMID for that process on the safe way using PDD */ 948 pdd = kfd_get_process_device_data(dev, p); 949 if (!pdd) 950 return -EFAULT; 951 952 /* Scan all registers in the range ATC_VMID8_PASID_MAPPING .. 953 * ATC_VMID15_PASID_MAPPING 954 * to check which VMID the current process is mapped to. 955 */ 956 957 for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) { 958 status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info 959 (dev->adev, vmid, &queried_pasid); 960 961 if (status && queried_pasid == pdd->pasid) { 962 pr_debug("Killing wave fronts of vmid %d and process pid %d\n", 963 vmid, p->lead_thread->pid); 964 break; 965 } 966 } 967 968 if (vmid > last_vmid_to_scan) { 969 dev_err(dev->adev->dev, "Didn't find vmid for process pid %d\n", 970 p->lead_thread->pid); 971 return -EFAULT; 972 } 973 974 reg_gfx_index.bits.sh_broadcast_writes = 1; 975 reg_gfx_index.bits.se_broadcast_writes = 1; 976 reg_gfx_index.bits.instance_broadcast_writes = 1; 977 reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST; 978 reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL; 979 reg_sq_cmd.bits.vm_id = vmid; 980 981 for_each_inst(xcc_id, xcc_mask) 982 dev->kfd2kgd->wave_control_execute( 983 dev->adev, reg_gfx_index.u32All, 984 reg_sq_cmd.u32All, xcc_id); 985 986 return 0; 987 } 988 989 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 990 * to avoid asynchronized access 991 */ 992 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 993 struct qcm_process_device *qpd, 994 struct queue *q) 995 { 996 int retval; 997 struct mqd_manager *mqd_mgr; 998 999 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 1000 1001 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 1002 deallocate_hqd(dqm, q); 1003 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1004 deallocate_sdma_queue(dqm, q); 1005 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 1006 deallocate_sdma_queue(dqm, q); 1007 else { 1008 pr_debug("q->properties.type %d is invalid\n", 1009 q->properties.type); 1010 return -EINVAL; 1011 } 1012 dqm->total_queue_count--; 1013 1014 deallocate_doorbell(qpd, q); 1015 1016 if (!dqm->sched_running) { 1017 WARN_ONCE(1, "Destroy non-HWS queue while stopped\n"); 1018 return 0; 1019 } 1020 1021 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1022 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 1023 KFD_UNMAP_LATENCY_MS, 1024 q->pipe, q->queue); 1025 if (retval == -ETIME) 1026 qpd->reset_wavefronts = true; 1027 1028 list_del(&q->list); 1029 if (list_empty(&qpd->queues_list)) { 1030 if (qpd->reset_wavefronts) { 1031 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 1032 dqm->dev); 1033 /* dbgdev_wave_reset_wavefronts has to be called before 1034 * deallocate_vmid(), i.e. when vmid is still in use. 1035 */ 1036 dbgdev_wave_reset_wavefronts(dqm->dev, 1037 qpd->pqm->process); 1038 qpd->reset_wavefronts = false; 1039 } 1040 1041 deallocate_vmid(dqm, qpd, q); 1042 } 1043 qpd->queue_count--; 1044 if (q->properties.is_active) 1045 decrement_queue_count(dqm, qpd, q); 1046 1047 return retval; 1048 } 1049 1050 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 1051 struct qcm_process_device *qpd, 1052 struct queue *q) 1053 { 1054 int retval; 1055 uint64_t sdma_val = 0; 1056 struct device *dev = dqm->dev->adev->dev; 1057 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 1058 struct mqd_manager *mqd_mgr = 1059 dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)]; 1060 1061 /* Get the SDMA queue stats */ 1062 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 1063 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1064 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 1065 &sdma_val); 1066 if (retval) 1067 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 1068 q->properties.queue_id); 1069 } 1070 1071 dqm_lock(dqm); 1072 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 1073 if (!retval) 1074 pdd->sdma_past_activity_counter += sdma_val; 1075 dqm_unlock(dqm); 1076 1077 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1078 1079 return retval; 1080 } 1081 1082 static int update_queue(struct device_queue_manager *dqm, struct queue *q, 1083 struct mqd_update_info *minfo) 1084 { 1085 int retval = 0; 1086 struct device *dev = dqm->dev->adev->dev; 1087 struct mqd_manager *mqd_mgr; 1088 struct kfd_process_device *pdd; 1089 bool prev_active = false; 1090 1091 dqm_lock(dqm); 1092 pdd = kfd_get_process_device_data(q->device, q->process); 1093 if (!pdd) { 1094 retval = -ENODEV; 1095 goto out_unlock; 1096 } 1097 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1098 q->properties.type)]; 1099 1100 /* Save previous activity state for counters */ 1101 prev_active = q->properties.is_active; 1102 1103 /* Make sure the queue is unmapped before updating the MQD */ 1104 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 1105 if (!dqm->dev->kfd->shared_resources.enable_mes) 1106 retval = unmap_queues_cpsch(dqm, 1107 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 1108 else if (prev_active) 1109 retval = remove_queue_mes(dqm, q, &pdd->qpd); 1110 1111 /* queue is reset so inaccessable */ 1112 if (pdd->has_reset_queue) { 1113 retval = -EACCES; 1114 goto out_unlock; 1115 } 1116 1117 if (retval) { 1118 dev_err(dev, "unmap queue failed\n"); 1119 goto out_unlock; 1120 } 1121 } else if (prev_active && 1122 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 1123 q->properties.type == KFD_QUEUE_TYPE_SDMA || 1124 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1125 1126 if (!dqm->sched_running) { 1127 WARN_ONCE(1, "Update non-HWS queue while stopped\n"); 1128 goto out_unlock; 1129 } 1130 1131 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1132 (dqm->dev->kfd->cwsr_enabled ? 1133 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1134 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1135 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1136 if (retval) { 1137 dev_err(dev, "destroy mqd failed\n"); 1138 goto out_unlock; 1139 } 1140 } 1141 1142 mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo); 1143 1144 /* 1145 * check active state vs. the previous state and modify 1146 * counter accordingly. map_queues_cpsch uses the 1147 * dqm->active_queue_count to determine whether a new runlist must be 1148 * uploaded. 1149 */ 1150 if (q->properties.is_active && !prev_active) { 1151 increment_queue_count(dqm, &pdd->qpd, q); 1152 } else if (!q->properties.is_active && prev_active) { 1153 decrement_queue_count(dqm, &pdd->qpd, q); 1154 } else if (q->gws && !q->properties.is_gws) { 1155 if (q->properties.is_active) { 1156 dqm->gws_queue_count++; 1157 pdd->qpd.mapped_gws_queue = true; 1158 } 1159 q->properties.is_gws = true; 1160 } else if (!q->gws && q->properties.is_gws) { 1161 if (q->properties.is_active) { 1162 dqm->gws_queue_count--; 1163 pdd->qpd.mapped_gws_queue = false; 1164 } 1165 q->properties.is_gws = false; 1166 } 1167 1168 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 1169 if (!dqm->dev->kfd->shared_resources.enable_mes) 1170 retval = map_queues_cpsch(dqm); 1171 else if (q->properties.is_active) 1172 retval = add_queue_mes(dqm, q, &pdd->qpd); 1173 } else if (q->properties.is_active && 1174 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 1175 q->properties.type == KFD_QUEUE_TYPE_SDMA || 1176 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 1177 if (WARN(q->process->mm != current->mm, 1178 "should only run in user thread")) 1179 retval = -EFAULT; 1180 else 1181 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 1182 q->pipe, q->queue, 1183 &q->properties, current->mm); 1184 } 1185 1186 out_unlock: 1187 dqm_unlock(dqm); 1188 return retval; 1189 } 1190 1191 /* suspend_single_queue does not lock the dqm like the 1192 * evict_process_queues_cpsch or evict_process_queues_nocpsch. You should 1193 * lock the dqm before calling, and unlock after calling. 1194 * 1195 * The reason we don't lock the dqm is because this function may be 1196 * called on multiple queues in a loop, so rather than locking/unlocking 1197 * multiple times, we will just keep the dqm locked for all of the calls. 1198 */ 1199 static int suspend_single_queue(struct device_queue_manager *dqm, 1200 struct kfd_process_device *pdd, 1201 struct queue *q) 1202 { 1203 bool is_new; 1204 1205 if (q->properties.is_suspended) 1206 return 0; 1207 1208 pr_debug("Suspending process pid %d queue [%i]\n", 1209 pdd->process->lead_thread->pid, 1210 q->properties.queue_id); 1211 1212 is_new = q->properties.exception_status & KFD_EC_MASK(EC_QUEUE_NEW); 1213 1214 if (is_new || q->properties.is_being_destroyed) { 1215 pr_debug("Suspend: skip %s queue id %i\n", 1216 is_new ? "new" : "destroyed", 1217 q->properties.queue_id); 1218 return -EBUSY; 1219 } 1220 1221 q->properties.is_suspended = true; 1222 if (q->properties.is_active) { 1223 if (dqm->dev->kfd->shared_resources.enable_mes) { 1224 int r = remove_queue_mes(dqm, q, &pdd->qpd); 1225 1226 if (r) 1227 return r; 1228 } 1229 1230 decrement_queue_count(dqm, &pdd->qpd, q); 1231 q->properties.is_active = false; 1232 } 1233 1234 return 0; 1235 } 1236 1237 /* resume_single_queue does not lock the dqm like the functions 1238 * restore_process_queues_cpsch or restore_process_queues_nocpsch. You should 1239 * lock the dqm before calling, and unlock after calling. 1240 * 1241 * The reason we don't lock the dqm is because this function may be 1242 * called on multiple queues in a loop, so rather than locking/unlocking 1243 * multiple times, we will just keep the dqm locked for all of the calls. 1244 */ 1245 static int resume_single_queue(struct device_queue_manager *dqm, 1246 struct qcm_process_device *qpd, 1247 struct queue *q) 1248 { 1249 struct kfd_process_device *pdd; 1250 1251 if (!q->properties.is_suspended) 1252 return 0; 1253 1254 pdd = qpd_to_pdd(qpd); 1255 1256 pr_debug("Restoring from suspend process pid %d queue [%i]\n", 1257 pdd->process->lead_thread->pid, 1258 q->properties.queue_id); 1259 1260 q->properties.is_suspended = false; 1261 1262 if (QUEUE_IS_ACTIVE(q->properties)) { 1263 if (dqm->dev->kfd->shared_resources.enable_mes) { 1264 int r = add_queue_mes(dqm, q, &pdd->qpd); 1265 1266 if (r) 1267 return r; 1268 } 1269 1270 q->properties.is_active = true; 1271 increment_queue_count(dqm, qpd, q); 1272 } 1273 1274 return 0; 1275 } 1276 1277 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 1278 struct qcm_process_device *qpd) 1279 { 1280 struct queue *q; 1281 struct mqd_manager *mqd_mgr; 1282 struct kfd_process_device *pdd; 1283 int retval, ret = 0; 1284 1285 dqm_lock(dqm); 1286 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1287 goto out; 1288 1289 pdd = qpd_to_pdd(qpd); 1290 pr_debug_ratelimited("Evicting process pid %d queues\n", 1291 pdd->process->lead_thread->pid); 1292 1293 pdd->last_evict_timestamp = get_jiffies_64(); 1294 /* Mark all queues as evicted. Deactivate all active queues on 1295 * the qpd. 1296 */ 1297 list_for_each_entry(q, &qpd->queues_list, list) { 1298 q->properties.is_evicted = true; 1299 if (!q->properties.is_active) 1300 continue; 1301 1302 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1303 q->properties.type)]; 1304 q->properties.is_active = false; 1305 decrement_queue_count(dqm, qpd, q); 1306 1307 if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) 1308 continue; 1309 1310 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 1311 (dqm->dev->kfd->cwsr_enabled ? 1312 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE : 1313 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN), 1314 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 1315 if (retval && !ret) 1316 /* Return the first error, but keep going to 1317 * maintain a consistent eviction state 1318 */ 1319 ret = retval; 1320 } 1321 1322 out: 1323 dqm_unlock(dqm); 1324 return ret; 1325 } 1326 1327 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 1328 struct qcm_process_device *qpd) 1329 { 1330 struct queue *q; 1331 struct device *dev = dqm->dev->adev->dev; 1332 struct kfd_process_device *pdd; 1333 int retval = 0; 1334 1335 dqm_lock(dqm); 1336 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 1337 goto out; 1338 1339 pdd = qpd_to_pdd(qpd); 1340 1341 /* The debugger creates processes that temporarily have not acquired 1342 * all VMs for all devices and has no VMs itself. 1343 * Skip queue eviction on process eviction. 1344 */ 1345 if (!pdd->drm_priv) 1346 goto out; 1347 1348 pr_debug_ratelimited("Evicting process pid %d queues\n", 1349 pdd->process->lead_thread->pid); 1350 1351 if (dqm->dev->kfd->shared_resources.enable_mes) 1352 pdd->last_evict_timestamp = get_jiffies_64(); 1353 1354 /* Mark all queues as evicted. Deactivate all active queues on 1355 * the qpd. 1356 */ 1357 list_for_each_entry(q, &qpd->queues_list, list) { 1358 q->properties.is_evicted = true; 1359 if (!q->properties.is_active) 1360 continue; 1361 1362 q->properties.is_active = false; 1363 decrement_queue_count(dqm, qpd, q); 1364 1365 if (dqm->dev->kfd->shared_resources.enable_mes) { 1366 retval = remove_queue_mes(dqm, q, qpd); 1367 if (retval) { 1368 dev_err(dev, "Failed to evict queue %d\n", 1369 q->properties.queue_id); 1370 goto out; 1371 } 1372 } 1373 } 1374 1375 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1376 pdd->last_evict_timestamp = get_jiffies_64(); 1377 retval = execute_queues_cpsch(dqm, 1378 qpd->is_debug ? 1379 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1380 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1381 USE_DEFAULT_GRACE_PERIOD); 1382 } 1383 1384 out: 1385 dqm_unlock(dqm); 1386 return retval; 1387 } 1388 1389 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 1390 struct qcm_process_device *qpd) 1391 { 1392 struct mm_struct *mm = NULL; 1393 struct queue *q; 1394 struct mqd_manager *mqd_mgr; 1395 struct kfd_process_device *pdd; 1396 uint64_t pd_base; 1397 uint64_t eviction_duration; 1398 int retval, ret = 0; 1399 1400 pdd = qpd_to_pdd(qpd); 1401 /* Retrieve PD base */ 1402 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1403 1404 dqm_lock(dqm); 1405 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1406 goto out; 1407 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1408 qpd->evicted--; 1409 goto out; 1410 } 1411 1412 pr_debug_ratelimited("Restoring process pid %d queues\n", 1413 pdd->process->lead_thread->pid); 1414 1415 /* Update PD Base in QPD */ 1416 qpd->page_table_base = pd_base; 1417 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1418 1419 if (!list_empty(&qpd->queues_list)) { 1420 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 1421 dqm->dev->adev, 1422 qpd->vmid, 1423 qpd->page_table_base); 1424 kfd_flush_tlb(pdd); 1425 } 1426 1427 /* Take a safe reference to the mm_struct, which may otherwise 1428 * disappear even while the kfd_process is still referenced. 1429 */ 1430 mm = get_task_mm(pdd->process->lead_thread); 1431 if (!mm) { 1432 ret = -EFAULT; 1433 goto out; 1434 } 1435 1436 /* Remove the eviction flags. Activate queues that are not 1437 * inactive for other reasons. 1438 */ 1439 list_for_each_entry(q, &qpd->queues_list, list) { 1440 q->properties.is_evicted = false; 1441 if (!QUEUE_IS_ACTIVE(q->properties)) 1442 continue; 1443 1444 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 1445 q->properties.type)]; 1446 q->properties.is_active = true; 1447 increment_queue_count(dqm, qpd, q); 1448 1449 if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) 1450 continue; 1451 1452 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 1453 q->queue, &q->properties, mm); 1454 if (retval && !ret) 1455 /* Return the first error, but keep going to 1456 * maintain a consistent eviction state 1457 */ 1458 ret = retval; 1459 } 1460 qpd->evicted = 0; 1461 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1462 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1463 out: 1464 if (mm) 1465 mmput(mm); 1466 dqm_unlock(dqm); 1467 return ret; 1468 } 1469 1470 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 1471 struct qcm_process_device *qpd) 1472 { 1473 struct queue *q; 1474 struct device *dev = dqm->dev->adev->dev; 1475 struct kfd_process_device *pdd; 1476 uint64_t eviction_duration; 1477 int retval = 0; 1478 1479 pdd = qpd_to_pdd(qpd); 1480 1481 dqm_lock(dqm); 1482 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 1483 goto out; 1484 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 1485 qpd->evicted--; 1486 goto out; 1487 } 1488 1489 /* The debugger creates processes that temporarily have not acquired 1490 * all VMs for all devices and has no VMs itself. 1491 * Skip queue restore on process restore. 1492 */ 1493 if (!pdd->drm_priv) 1494 goto vm_not_acquired; 1495 1496 pr_debug_ratelimited("Restoring process pid %d queues\n", 1497 pdd->process->lead_thread->pid); 1498 1499 /* Update PD Base in QPD */ 1500 qpd->page_table_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1501 pr_debug("Updated PD address to 0x%llx\n", qpd->page_table_base); 1502 1503 /* activate all active queues on the qpd */ 1504 list_for_each_entry(q, &qpd->queues_list, list) { 1505 q->properties.is_evicted = false; 1506 if (!QUEUE_IS_ACTIVE(q->properties)) 1507 continue; 1508 1509 q->properties.is_active = true; 1510 increment_queue_count(dqm, &pdd->qpd, q); 1511 1512 if (dqm->dev->kfd->shared_resources.enable_mes) { 1513 retval = add_queue_mes(dqm, q, qpd); 1514 if (retval) { 1515 dev_err(dev, "Failed to restore queue %d\n", 1516 q->properties.queue_id); 1517 goto out; 1518 } 1519 } 1520 } 1521 if (!dqm->dev->kfd->shared_resources.enable_mes) 1522 retval = execute_queues_cpsch(dqm, 1523 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1524 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1525 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1526 vm_not_acquired: 1527 qpd->evicted = 0; 1528 out: 1529 dqm_unlock(dqm); 1530 return retval; 1531 } 1532 1533 static int register_process(struct device_queue_manager *dqm, 1534 struct qcm_process_device *qpd) 1535 { 1536 struct device_process_node *n; 1537 struct kfd_process_device *pdd; 1538 uint64_t pd_base; 1539 int retval; 1540 1541 n = kzalloc_obj(*n); 1542 if (!n) 1543 return -ENOMEM; 1544 1545 n->qpd = qpd; 1546 1547 pdd = qpd_to_pdd(qpd); 1548 /* Retrieve PD base */ 1549 pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv); 1550 1551 dqm_lock(dqm); 1552 list_add(&n->list, &dqm->queues); 1553 1554 /* Update PD Base in QPD */ 1555 qpd->page_table_base = pd_base; 1556 pr_debug("Updated PD address to 0x%llx\n", pd_base); 1557 1558 retval = dqm->asic_ops.update_qpd(dqm, qpd); 1559 1560 dqm->processes_count++; 1561 1562 dqm_unlock(dqm); 1563 1564 /* Outside the DQM lock because under the DQM lock we can't do 1565 * reclaim or take other locks that others hold while reclaiming. 1566 */ 1567 kfd_inc_compute_active(dqm->dev); 1568 1569 return retval; 1570 } 1571 1572 static int unregister_process(struct device_queue_manager *dqm, 1573 struct qcm_process_device *qpd) 1574 { 1575 int retval = 0; 1576 struct device_process_node *cur, *next; 1577 1578 pr_debug("qpd->queues_list is %s\n", 1579 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 1580 1581 dqm_lock(dqm); 1582 1583 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 1584 if (qpd == cur->qpd) { 1585 list_del(&cur->list); 1586 kfree(cur); 1587 dqm->processes_count--; 1588 goto out; 1589 } 1590 } 1591 /* qpd not found in dqm list */ 1592 retval = 1; 1593 out: 1594 dqm_unlock(dqm); 1595 1596 /* Outside the DQM lock because under the DQM lock we can't do 1597 * reclaim or take other locks that others hold while reclaiming. 1598 */ 1599 if (!retval) 1600 kfd_dec_compute_active(dqm->dev); 1601 1602 return retval; 1603 } 1604 1605 static int 1606 set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid, 1607 unsigned int vmid) 1608 { 1609 uint32_t xcc_mask = dqm->dev->xcc_mask; 1610 int xcc_id, ret = 0; 1611 1612 for_each_inst(xcc_id, xcc_mask) { 1613 ret = dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 1614 dqm->dev->adev, pasid, vmid, xcc_id); 1615 if (ret) 1616 break; 1617 } 1618 1619 return ret; 1620 } 1621 1622 static void init_interrupts(struct device_queue_manager *dqm) 1623 { 1624 uint32_t xcc_mask = dqm->dev->xcc_mask; 1625 unsigned int i, xcc_id; 1626 1627 for_each_inst(xcc_id, xcc_mask) { 1628 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) { 1629 if (is_pipe_enabled(dqm, 0, i)) { 1630 dqm->dev->kfd2kgd->init_interrupts( 1631 dqm->dev->adev, i, xcc_id); 1632 } 1633 } 1634 } 1635 } 1636 1637 static int initialize_nocpsch(struct device_queue_manager *dqm) 1638 { 1639 int pipe, queue; 1640 1641 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1642 1643 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 1644 sizeof(unsigned int), GFP_KERNEL); 1645 if (!dqm->allocated_queues) 1646 return -ENOMEM; 1647 1648 mutex_init(&dqm->lock_hidden); 1649 INIT_LIST_HEAD(&dqm->queues); 1650 dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; 1651 dqm->active_cp_queue_count = 0; 1652 dqm->gws_queue_count = 0; 1653 1654 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1655 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1656 1657 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 1658 if (test_bit(pipe_offset + queue, 1659 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1660 dqm->allocated_queues[pipe] |= 1 << queue; 1661 } 1662 1663 memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid)); 1664 1665 init_sdma_bitmaps(dqm); 1666 1667 return 0; 1668 } 1669 1670 static void uninitialize(struct device_queue_manager *dqm) 1671 { 1672 int i; 1673 1674 WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0); 1675 1676 kfree(dqm->allocated_queues); 1677 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 1678 kfree(dqm->mqd_mgrs[i]); 1679 mutex_destroy(&dqm->lock_hidden); 1680 } 1681 1682 static int start_nocpsch(struct device_queue_manager *dqm) 1683 { 1684 int r = 0; 1685 1686 pr_info("SW scheduler is used"); 1687 init_interrupts(dqm); 1688 1689 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1690 r = pm_init(&dqm->packet_mgr, dqm); 1691 if (!r) 1692 dqm->sched_running = true; 1693 1694 return r; 1695 } 1696 1697 static int stop_nocpsch(struct device_queue_manager *dqm) 1698 { 1699 dqm_lock(dqm); 1700 if (!dqm->sched_running) { 1701 dqm_unlock(dqm); 1702 return 0; 1703 } 1704 1705 if (dqm->dev->adev->asic_type == CHIP_HAWAII) 1706 pm_uninit(&dqm->packet_mgr); 1707 dqm->sched_running = false; 1708 dqm_unlock(dqm); 1709 1710 return 0; 1711 } 1712 1713 static int allocate_sdma_queue(struct device_queue_manager *dqm, 1714 struct queue *q, const uint32_t *restore_sdma_id) 1715 { 1716 struct device *dev = dqm->dev->adev->dev; 1717 int bit; 1718 1719 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1720 if (bitmap_empty(dqm->sdma_bitmap, get_num_sdma_queues(dqm))) { 1721 dev_warn(dev, "No more SDMA queue to allocate (%d total queues)\n", 1722 get_num_sdma_queues(dqm)); 1723 return -ENOMEM; 1724 } 1725 1726 if (restore_sdma_id) { 1727 /* Re-use existing sdma_id */ 1728 if (!test_bit(*restore_sdma_id, dqm->sdma_bitmap)) { 1729 dev_err(dev, "SDMA queue already in use\n"); 1730 return -EBUSY; 1731 } 1732 clear_bit(*restore_sdma_id, dqm->sdma_bitmap); 1733 q->sdma_id = *restore_sdma_id; 1734 } else { 1735 /* Find first available sdma_id */ 1736 bit = find_first_bit(dqm->sdma_bitmap, 1737 get_num_sdma_queues(dqm)); 1738 clear_bit(bit, dqm->sdma_bitmap); 1739 q->sdma_id = bit; 1740 } 1741 1742 q->properties.sdma_engine_id = 1743 q->sdma_id % kfd_get_num_sdma_engines(dqm->dev); 1744 q->properties.sdma_queue_id = q->sdma_id / 1745 kfd_get_num_sdma_engines(dqm->dev); 1746 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1747 if (bitmap_empty(dqm->xgmi_sdma_bitmap, get_num_xgmi_sdma_queues(dqm))) { 1748 dev_warn(dev, "No more XGMI SDMA queue to allocate (%d total queues)\n", 1749 get_num_xgmi_sdma_queues(dqm)); 1750 return -ENOMEM; 1751 } 1752 if (restore_sdma_id) { 1753 /* Re-use existing sdma_id */ 1754 if (!test_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap)) { 1755 dev_err(dev, "SDMA queue already in use\n"); 1756 return -EBUSY; 1757 } 1758 clear_bit(*restore_sdma_id, dqm->xgmi_sdma_bitmap); 1759 q->sdma_id = *restore_sdma_id; 1760 } else { 1761 bit = find_first_bit(dqm->xgmi_sdma_bitmap, 1762 get_num_xgmi_sdma_queues(dqm)); 1763 clear_bit(bit, dqm->xgmi_sdma_bitmap); 1764 q->sdma_id = bit; 1765 } 1766 /* sdma_engine_id is sdma id including 1767 * both PCIe-optimized SDMAs and XGMI- 1768 * optimized SDMAs. The calculation below 1769 * assumes the first N engines are always 1770 * PCIe-optimized ones 1771 */ 1772 q->properties.sdma_engine_id = 1773 kfd_get_num_sdma_engines(dqm->dev) + 1774 q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); 1775 q->properties.sdma_queue_id = q->sdma_id / 1776 kfd_get_num_xgmi_sdma_engines(dqm->dev); 1777 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 1778 int i, num_queues, num_engines, eng_offset = 0, start_engine; 1779 bool free_bit_found = false, is_xgmi = false; 1780 1781 if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { 1782 num_queues = get_num_sdma_queues(dqm); 1783 num_engines = kfd_get_num_sdma_engines(dqm->dev); 1784 q->properties.type = KFD_QUEUE_TYPE_SDMA; 1785 } else { 1786 num_queues = get_num_xgmi_sdma_queues(dqm); 1787 num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); 1788 eng_offset = kfd_get_num_sdma_engines(dqm->dev); 1789 q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; 1790 is_xgmi = true; 1791 } 1792 1793 /* Scan available bit based on target engine ID. */ 1794 start_engine = q->properties.sdma_engine_id - eng_offset; 1795 for (i = start_engine; i < num_queues; i += num_engines) { 1796 1797 if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) 1798 continue; 1799 1800 clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); 1801 q->sdma_id = i; 1802 q->properties.sdma_queue_id = q->sdma_id / num_engines; 1803 free_bit_found = true; 1804 break; 1805 } 1806 1807 if (!free_bit_found) { 1808 dev_warn(dev, "No more SDMA queue to allocate for target ID %i (%d total queues)\n", 1809 q->properties.sdma_engine_id, num_queues); 1810 return -ENOMEM; 1811 } 1812 } 1813 1814 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 1815 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 1816 1817 return 0; 1818 } 1819 1820 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 1821 struct queue *q) 1822 { 1823 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1824 if (q->sdma_id >= get_num_sdma_queues(dqm)) 1825 return; 1826 set_bit(q->sdma_id, dqm->sdma_bitmap); 1827 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 1828 if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm)) 1829 return; 1830 set_bit(q->sdma_id, dqm->xgmi_sdma_bitmap); 1831 } 1832 } 1833 1834 /* 1835 * Device Queue Manager implementation for cp scheduler 1836 */ 1837 1838 static int set_sched_resources(struct device_queue_manager *dqm) 1839 { 1840 int i, mec; 1841 struct scheduling_resources res; 1842 struct device *dev = dqm->dev->adev->dev; 1843 1844 res.vmid_mask = dqm->dev->compute_vmid_bitmap; 1845 1846 res.queue_mask = 0; 1847 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 1848 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 1849 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 1850 1851 if (!test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 1852 continue; 1853 1854 /* only acquire queues from the first MEC */ 1855 if (mec > 0) 1856 continue; 1857 1858 /* This situation may be hit in the future if a new HW 1859 * generation exposes more than 64 queues. If so, the 1860 * definition of res.queue_mask needs updating 1861 */ 1862 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1863 dev_err(dev, "Invalid queue enabled by amdgpu: %d\n", i); 1864 break; 1865 } 1866 1867 res.queue_mask |= 1ull 1868 << amdgpu_queue_mask_bit_to_set_resource_bit( 1869 dqm->dev->adev, i); 1870 } 1871 res.gws_mask = ~0ull; 1872 res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0; 1873 1874 pr_debug("Scheduling resources:\n" 1875 "vmid mask: 0x%8X\n" 1876 "queue mask: 0x%8llX\n", 1877 res.vmid_mask, res.queue_mask); 1878 1879 return pm_send_set_resources(&dqm->packet_mgr, &res); 1880 } 1881 1882 static int initialize_cpsch(struct device_queue_manager *dqm) 1883 { 1884 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1885 1886 mutex_init(&dqm->lock_hidden); 1887 INIT_LIST_HEAD(&dqm->queues); 1888 dqm->active_queue_count = dqm->processes_count = 0; 1889 dqm->active_cp_queue_count = 0; 1890 dqm->gws_queue_count = 0; 1891 dqm->active_runlist = false; 1892 dqm->trap_debug_vmid = 0; 1893 1894 init_sdma_bitmaps(dqm); 1895 1896 update_dqm_wait_times(dqm); 1897 return 0; 1898 } 1899 1900 /* halt_cpsch: 1901 * Unmap queues so the schedule doesn't continue remaining jobs in the queue. 1902 * Then set dqm->sched_halt so queues don't map to runlist until unhalt_cpsch 1903 * is called. 1904 */ 1905 static int halt_cpsch(struct device_queue_manager *dqm) 1906 { 1907 int ret = 0; 1908 1909 dqm_lock(dqm); 1910 if (!dqm->sched_running) { 1911 dqm_unlock(dqm); 1912 return 0; 1913 } 1914 1915 WARN_ONCE(dqm->sched_halt, "Scheduling is already on halt\n"); 1916 1917 if (!dqm->is_hws_hang) { 1918 if (!dqm->dev->kfd->shared_resources.enable_mes) 1919 ret = unmap_queues_cpsch(dqm, 1920 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1921 USE_DEFAULT_GRACE_PERIOD, false); 1922 else 1923 ret = remove_all_kfd_queues_mes(dqm); 1924 } 1925 dqm->sched_halt = true; 1926 dqm_unlock(dqm); 1927 1928 return ret; 1929 } 1930 1931 /* unhalt_cpsch 1932 * Unset dqm->sched_halt and map queues back to runlist 1933 */ 1934 static int unhalt_cpsch(struct device_queue_manager *dqm) 1935 { 1936 int ret = 0; 1937 struct amdgpu_device *adev = dqm->dev->adev; 1938 1939 dqm_lock(dqm); 1940 if (!dqm->sched_running || !dqm->sched_halt) { 1941 dev_dbg(adev->dev, "Scheduling is not on halt.\n"); 1942 dqm_unlock(dqm); 1943 return 0; 1944 } 1945 dqm->sched_halt = false; 1946 if (!dqm->dev->kfd->shared_resources.enable_mes) 1947 ret = execute_queues_cpsch(dqm, 1948 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 1949 0, USE_DEFAULT_GRACE_PERIOD); 1950 else 1951 ret = add_all_kfd_queues_mes(dqm); 1952 1953 dqm_unlock(dqm); 1954 1955 return ret; 1956 } 1957 1958 static int start_cpsch(struct device_queue_manager *dqm) 1959 { 1960 struct device *dev = dqm->dev->adev->dev; 1961 int retval, num_hw_queue_slots; 1962 struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev; 1963 int hung_array_size = amdgpu_mes_get_hung_queue_db_array_size(adev); 1964 int hqd_info_size = adev->mes.hung_queue_hqd_info_offset; 1965 1966 dqm_lock(dqm); 1967 1968 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1969 retval = pm_init(&dqm->packet_mgr, dqm); 1970 if (retval) 1971 goto fail_packet_manager_init; 1972 1973 retval = set_sched_resources(dqm); 1974 if (retval) 1975 goto fail_set_sched_resources; 1976 } 1977 pr_debug("Allocating fence memory\n"); 1978 1979 /* allocate fence memory on the gart */ 1980 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1981 &dqm->fence_mem); 1982 1983 if (retval) 1984 goto fail_allocate_vidmem; 1985 1986 dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr; 1987 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1988 1989 init_interrupts(dqm); 1990 1991 /* clear hang status when driver try to start the hw scheduler */ 1992 dqm->sched_running = true; 1993 1994 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1995 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 1996 KFD_DEQUEUE_WAIT_INIT, 0 /* unused */)) 1997 dev_err(dev, "Setting optimized dequeue wait failed. Using default values\n"); 1998 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1999 } 2000 2001 /* setup per-queue reset detection buffer */ 2002 num_hw_queue_slots = dqm->dev->kfd->shared_resources.num_queue_per_pipe * 2003 dqm->dev->kfd->shared_resources.num_pipe_per_mec * 2004 NUM_XCC(dqm->dev->xcc_mask); 2005 2006 dqm->detect_hang_info_size = num_hw_queue_slots * sizeof(struct dqm_detect_hang_info); 2007 dqm->detect_hang_info = kzalloc(dqm->detect_hang_info_size, GFP_KERNEL); 2008 2009 if (!dqm->detect_hang_info) { 2010 retval = -ENOMEM; 2011 goto fail_detect_hang_buffer; 2012 } 2013 2014 dqm->hung_db_array = kzalloc(hung_array_size * sizeof(u32), GFP_KERNEL); 2015 dqm->hqd_info = kzalloc( 2016 hqd_info_size * sizeof(struct amdgpu_mes_hung_queue_hqd_info), 2017 GFP_KERNEL); 2018 2019 dqm_unlock(dqm); 2020 2021 return 0; 2022 fail_detect_hang_buffer: 2023 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 2024 fail_allocate_vidmem: 2025 fail_set_sched_resources: 2026 if (!dqm->dev->kfd->shared_resources.enable_mes) 2027 pm_uninit(&dqm->packet_mgr); 2028 fail_packet_manager_init: 2029 dqm_unlock(dqm); 2030 return retval; 2031 } 2032 2033 static int stop_cpsch(struct device_queue_manager *dqm) 2034 { 2035 int ret = 0; 2036 2037 dqm_lock(dqm); 2038 if (!dqm->sched_running) { 2039 dqm_unlock(dqm); 2040 return 0; 2041 } 2042 2043 if (!dqm->dev->kfd->shared_resources.enable_mes) 2044 ret = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 2045 0, USE_DEFAULT_GRACE_PERIOD, false); 2046 else 2047 ret = remove_all_kfd_queues_mes(dqm); 2048 2049 dqm->sched_running = false; 2050 2051 if (!dqm->dev->kfd->shared_resources.enable_mes) 2052 pm_release_ib(&dqm->packet_mgr); 2053 2054 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 2055 if (!dqm->dev->kfd->shared_resources.enable_mes) 2056 pm_uninit(&dqm->packet_mgr); 2057 kfree(dqm->detect_hang_info); 2058 dqm->detect_hang_info = NULL; 2059 kfree(dqm->hung_db_array); 2060 kfree(dqm->hqd_info); 2061 2062 dqm_unlock(dqm); 2063 2064 return ret; 2065 } 2066 2067 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 2068 struct kernel_queue *kq, 2069 struct qcm_process_device *qpd) 2070 { 2071 dqm_lock(dqm); 2072 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 2073 pr_warn("Can't create new kernel queue because %d queues were already created\n", 2074 dqm->total_queue_count); 2075 dqm_unlock(dqm); 2076 return -EPERM; 2077 } 2078 2079 /* 2080 * Unconditionally increment this counter, regardless of the queue's 2081 * type or whether the queue is active. 2082 */ 2083 dqm->total_queue_count++; 2084 pr_debug("Total of %d queues are accountable so far\n", 2085 dqm->total_queue_count); 2086 2087 list_add(&kq->list, &qpd->priv_queue_list); 2088 increment_queue_count(dqm, qpd, kq->queue); 2089 qpd->is_debug = true; 2090 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2091 USE_DEFAULT_GRACE_PERIOD); 2092 dqm_unlock(dqm); 2093 2094 return 0; 2095 } 2096 2097 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 2098 struct kernel_queue *kq, 2099 struct qcm_process_device *qpd) 2100 { 2101 dqm_lock(dqm); 2102 list_del(&kq->list); 2103 decrement_queue_count(dqm, qpd, kq->queue); 2104 qpd->is_debug = false; 2105 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 2106 USE_DEFAULT_GRACE_PERIOD); 2107 /* 2108 * Unconditionally decrement this counter, regardless of the queue's 2109 * type. 2110 */ 2111 dqm->total_queue_count--; 2112 pr_debug("Total of %d queues are accountable so far\n", 2113 dqm->total_queue_count); 2114 dqm_unlock(dqm); 2115 } 2116 2117 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 2118 struct qcm_process_device *qpd, 2119 const struct kfd_criu_queue_priv_data *qd, 2120 const void *restore_mqd, const void *restore_ctl_stack) 2121 { 2122 int retval; 2123 struct mqd_manager *mqd_mgr; 2124 2125 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 2126 pr_warn("Can't create new usermode queue because %d queues were already created\n", 2127 dqm->total_queue_count); 2128 retval = -EPERM; 2129 goto out; 2130 } 2131 2132 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2133 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || 2134 q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { 2135 dqm_lock(dqm); 2136 retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); 2137 dqm_unlock(dqm); 2138 if (retval) 2139 goto out; 2140 } 2141 2142 retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL); 2143 if (retval) 2144 goto out_deallocate_sdma_queue; 2145 2146 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2147 q->properties.type)]; 2148 2149 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2150 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2151 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 2152 q->properties.tba_addr = qpd->tba_addr; 2153 q->properties.tma_addr = qpd->tma_addr; 2154 q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr, &q->properties); 2155 if (!q->mqd_mem_obj) { 2156 retval = -ENOMEM; 2157 goto out_deallocate_doorbell; 2158 } 2159 2160 dqm_lock(dqm); 2161 /* 2162 * Eviction state logic: mark all queues as evicted, even ones 2163 * not currently active. Restoring inactive queues later only 2164 * updates the is_evicted flag but is a no-op otherwise. 2165 */ 2166 q->properties.is_evicted = !!qpd->evicted; 2167 q->properties.is_dbg_wa = qpd->pqm->process->debug_trap_enabled && 2168 kfd_dbg_has_cwsr_workaround(q->device); 2169 2170 if (qd) 2171 mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr, 2172 &q->properties, restore_mqd, restore_ctl_stack, 2173 qd->ctl_stack_size); 2174 else 2175 mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, 2176 &q->gart_mqd_addr, &q->properties); 2177 2178 list_add(&q->list, &qpd->queues_list); 2179 qpd->queue_count++; 2180 2181 if (q->properties.is_active) { 2182 increment_queue_count(dqm, qpd, q); 2183 2184 if (!dqm->dev->kfd->shared_resources.enable_mes) 2185 retval = execute_queues_cpsch(dqm, 2186 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 2187 else 2188 retval = add_queue_mes(dqm, q, qpd); 2189 if (retval) 2190 goto cleanup_queue; 2191 } 2192 2193 /* 2194 * Unconditionally increment this counter, regardless of the queue's 2195 * type or whether the queue is active. 2196 */ 2197 dqm->total_queue_count++; 2198 2199 pr_debug("Total of %d queues are accountable so far\n", 2200 dqm->total_queue_count); 2201 2202 dqm_unlock(dqm); 2203 return retval; 2204 2205 cleanup_queue: 2206 qpd->queue_count--; 2207 list_del(&q->list); 2208 if (q->properties.is_active) 2209 decrement_queue_count(dqm, qpd, q); 2210 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2211 dqm_unlock(dqm); 2212 out_deallocate_doorbell: 2213 deallocate_doorbell(qpd, q); 2214 out_deallocate_sdma_queue: 2215 if (q->properties.type == KFD_QUEUE_TYPE_SDMA || 2216 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { 2217 dqm_lock(dqm); 2218 deallocate_sdma_queue(dqm, q); 2219 dqm_unlock(dqm); 2220 } 2221 out: 2222 return retval; 2223 } 2224 2225 int amdkfd_fence_wait_timeout(struct device_queue_manager *dqm, 2226 uint64_t fence_value, 2227 unsigned int timeout_ms) 2228 { 2229 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 2230 struct device *dev = dqm->dev->adev->dev; 2231 uint64_t *fence_addr = dqm->fence_addr; 2232 2233 while (*fence_addr != fence_value) { 2234 /* Fatal err detected, this response won't come */ 2235 if (amdgpu_amdkfd_is_fed(dqm->dev->adev) || 2236 amdgpu_in_reset(dqm->dev->adev)) 2237 return -EIO; 2238 2239 if (time_after(jiffies, end_jiffies)) { 2240 dev_err(dev, "qcm fence wait loop timeout expired\n"); 2241 /* In HWS case, this is used to halt the driver thread 2242 * in order not to mess up CP states before doing 2243 * scandumps for FW debugging. 2244 */ 2245 while (halt_if_hws_hang) 2246 schedule(); 2247 2248 return -ETIME; 2249 } 2250 schedule(); 2251 } 2252 2253 return 0; 2254 } 2255 2256 /* dqm->lock mutex has to be locked before calling this function */ 2257 static int map_queues_cpsch(struct device_queue_manager *dqm) 2258 { 2259 struct device *dev = dqm->dev->adev->dev; 2260 int retval; 2261 2262 if (!dqm->sched_running || dqm->sched_halt) 2263 return 0; 2264 if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0) 2265 return 0; 2266 if (dqm->active_runlist) 2267 return 0; 2268 2269 retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues); 2270 pr_debug("%s sent runlist\n", __func__); 2271 if (retval) { 2272 dev_err(dev, "failed to execute runlist\n"); 2273 return retval; 2274 } 2275 dqm->active_runlist = true; 2276 2277 return retval; 2278 } 2279 2280 static void set_queue_as_reset(struct device_queue_manager *dqm, struct queue *q, 2281 struct qcm_process_device *qpd) 2282 { 2283 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2284 2285 dev_err(dqm->dev->adev->dev, "queue id 0x%0x at pasid %d is reset\n", 2286 q->properties.queue_id, pdd->process->lead_thread->pid); 2287 2288 pdd->has_reset_queue = true; 2289 q->properties.is_reset = true; 2290 if (q->properties.is_active) { 2291 q->properties.is_active = false; 2292 decrement_queue_count(dqm, qpd, q); 2293 } 2294 } 2295 2296 static int detect_queue_hang(struct device_queue_manager *dqm) 2297 { 2298 int i; 2299 2300 /* detect should be used only in dqm locked queue reset */ 2301 if (WARN_ON(dqm->detect_hang_count > 0)) 2302 return 0; 2303 2304 memset(dqm->detect_hang_info, 0, dqm->detect_hang_info_size); 2305 2306 for (i = 0; i < AMDGPU_MAX_QUEUES; ++i) { 2307 uint32_t mec, pipe, queue; 2308 int xcc_id; 2309 2310 mec = (i / dqm->dev->kfd->shared_resources.num_queue_per_pipe) 2311 / dqm->dev->kfd->shared_resources.num_pipe_per_mec; 2312 2313 if (mec || !test_bit(i, dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 2314 continue; 2315 2316 amdgpu_queue_mask_bit_to_mec_queue(dqm->dev->adev, i, &mec, &pipe, &queue); 2317 2318 for_each_inst(xcc_id, dqm->dev->xcc_mask) { 2319 uint64_t queue_addr = dqm->dev->kfd2kgd->hqd_get_pq_addr( 2320 dqm->dev->adev, pipe, queue, xcc_id); 2321 struct dqm_detect_hang_info hang_info; 2322 2323 if (!queue_addr) 2324 continue; 2325 2326 hang_info.pipe_id = pipe; 2327 hang_info.queue_id = queue; 2328 hang_info.xcc_id = xcc_id; 2329 hang_info.queue_address = queue_addr; 2330 2331 dqm->detect_hang_info[dqm->detect_hang_count] = hang_info; 2332 dqm->detect_hang_count++; 2333 } 2334 } 2335 2336 return dqm->detect_hang_count; 2337 } 2338 2339 static struct queue *find_queue_by_address(struct device_queue_manager *dqm, uint64_t queue_address) 2340 { 2341 struct device_process_node *cur; 2342 struct qcm_process_device *qpd; 2343 struct queue *q; 2344 2345 list_for_each_entry(cur, &dqm->queues, list) { 2346 qpd = cur->qpd; 2347 list_for_each_entry(q, &qpd->queues_list, list) { 2348 if (queue_address == q->properties.queue_address) 2349 return q; 2350 } 2351 } 2352 2353 return NULL; 2354 } 2355 2356 static struct queue *find_queue_by_doorbell_offset(struct device_queue_manager *dqm, u32 doorbell_offset) 2357 { 2358 struct device_process_node *cur; 2359 struct qcm_process_device *qpd; 2360 struct queue *q; 2361 2362 list_for_each_entry(cur, &dqm->queues, list) { 2363 qpd = cur->qpd; 2364 list_for_each_entry(q, &qpd->queues_list, list) { 2365 if (doorbell_offset == q->properties.doorbell_off) 2366 return q; 2367 } 2368 } 2369 2370 return NULL; 2371 } 2372 2373 static int reset_hung_queues(struct device_queue_manager *dqm) 2374 { 2375 int r = 0, reset_count = 0, i; 2376 2377 if (!dqm->detect_hang_info || dqm->is_hws_hang) 2378 return -EIO; 2379 2380 /* assume dqm locked. */ 2381 if (!detect_queue_hang(dqm)) 2382 return -ENOTRECOVERABLE; 2383 2384 for (i = 0; i < dqm->detect_hang_count; i++) { 2385 struct dqm_detect_hang_info hang_info = dqm->detect_hang_info[i]; 2386 struct queue *q = find_queue_by_address(dqm, hang_info.queue_address); 2387 struct kfd_process_device *pdd; 2388 uint64_t queue_addr = 0; 2389 2390 if (!q) { 2391 r = -ENOTRECOVERABLE; 2392 goto reset_fail; 2393 } 2394 2395 pdd = kfd_get_process_device_data(dqm->dev, q->process); 2396 if (!pdd) { 2397 r = -ENOTRECOVERABLE; 2398 goto reset_fail; 2399 } 2400 2401 queue_addr = dqm->dev->kfd2kgd->hqd_reset(dqm->dev->adev, 2402 hang_info.pipe_id, hang_info.queue_id, hang_info.xcc_id, 2403 KFD_UNMAP_LATENCY_MS); 2404 2405 /* either reset failed or we reset an unexpected queue. */ 2406 if (queue_addr != q->properties.queue_address) { 2407 r = -ENOTRECOVERABLE; 2408 goto reset_fail; 2409 } 2410 2411 set_queue_as_reset(dqm, q, &pdd->qpd); 2412 reset_count++; 2413 } 2414 2415 if (reset_count == dqm->detect_hang_count) 2416 kfd_signal_reset_event(dqm->dev); 2417 else 2418 r = -ENOTRECOVERABLE; 2419 2420 reset_fail: 2421 dqm->detect_hang_count = 0; 2422 2423 return r; 2424 } 2425 2426 static bool sdma_has_hang(struct device_queue_manager *dqm) 2427 { 2428 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2429 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2430 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2431 int i, j; 2432 2433 for (i = engine_start; i < engine_end; i++) { 2434 for (j = 0; j < num_queues_per_eng; j++) { 2435 if (!dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j)) 2436 continue; 2437 2438 return true; 2439 } 2440 } 2441 2442 return false; 2443 } 2444 2445 static bool set_sdma_queue_as_reset(struct device_queue_manager *dqm, 2446 uint32_t doorbell_off) 2447 { 2448 struct device_process_node *cur; 2449 struct qcm_process_device *qpd; 2450 struct queue *q; 2451 2452 list_for_each_entry(cur, &dqm->queues, list) { 2453 qpd = cur->qpd; 2454 list_for_each_entry(q, &qpd->queues_list, list) { 2455 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA || 2456 q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) && 2457 q->properties.doorbell_off == doorbell_off) { 2458 set_queue_as_reset(dqm, q, qpd); 2459 return true; 2460 } 2461 } 2462 } 2463 2464 return false; 2465 } 2466 2467 static int reset_hung_queues_sdma(struct device_queue_manager *dqm) 2468 { 2469 int engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 2470 int engine_end = engine_start + get_num_all_sdma_engines(dqm); 2471 int num_queues_per_eng = dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 2472 int r = 0, i, j; 2473 2474 if (dqm->is_hws_hang) 2475 return -EIO; 2476 2477 /* Scan for hung HW queues and reset engine. */ 2478 dqm->detect_hang_count = 0; 2479 for (i = engine_start; i < engine_end; i++) { 2480 for (j = 0; j < num_queues_per_eng; j++) { 2481 uint32_t doorbell_off = 2482 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j); 2483 2484 if (!doorbell_off) 2485 continue; 2486 2487 /* Reset engine and check. */ 2488 if (amdgpu_sdma_reset_engine(dqm->dev->adev, i, false) || 2489 dqm->dev->kfd2kgd->hqd_sdma_get_doorbell(dqm->dev->adev, i, j) || 2490 !set_sdma_queue_as_reset(dqm, doorbell_off)) { 2491 r = -ENOTRECOVERABLE; 2492 goto reset_fail; 2493 } 2494 2495 /* Should only expect one queue active per engine */ 2496 dqm->detect_hang_count++; 2497 break; 2498 } 2499 } 2500 2501 /* Signal process reset */ 2502 if (dqm->detect_hang_count) 2503 kfd_signal_reset_event(dqm->dev); 2504 else 2505 r = -ENOTRECOVERABLE; 2506 2507 reset_fail: 2508 dqm->detect_hang_count = 0; 2509 2510 return r; 2511 } 2512 2513 static int reset_queues_on_hws_hang(struct device_queue_manager *dqm, bool is_sdma) 2514 { 2515 struct amdgpu_device *adev = dqm->dev->adev; 2516 2517 while (halt_if_hws_hang) 2518 schedule(); 2519 2520 if (adev->debug_disable_gpu_ring_reset) { 2521 dev_info_once(adev->dev, 2522 "%s queue hung, but ring reset disabled", 2523 is_sdma ? "sdma" : "compute"); 2524 2525 return -EPERM; 2526 } 2527 if (!amdgpu_gpu_recovery) 2528 return -ENOTRECOVERABLE; 2529 2530 return is_sdma ? reset_hung_queues_sdma(dqm) : reset_hung_queues(dqm); 2531 } 2532 2533 /* dqm->lock mutex has to be locked before calling this function 2534 * 2535 * @grace_period: If USE_DEFAULT_GRACE_PERIOD then default wait time 2536 * for context switch latency. Lower values are used by debugger 2537 * since context switching are triggered at high frequency. 2538 * This is configured by setting CP_IQ_WAIT_TIME2.SCH_WAVE 2539 * 2540 */ 2541 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 2542 enum kfd_unmap_queues_filter filter, 2543 uint32_t filter_param, 2544 uint32_t grace_period, 2545 bool reset) 2546 { 2547 struct device *dev = dqm->dev->adev->dev; 2548 struct mqd_manager *mqd_mgr; 2549 int retval; 2550 2551 if (!dqm->sched_running) 2552 return 0; 2553 if (!dqm->active_runlist) 2554 return 0; 2555 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2556 return -EIO; 2557 2558 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2559 retval = pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2560 KFD_DEQUEUE_WAIT_SET_SCH_WAVE, grace_period); 2561 if (retval) 2562 goto out; 2563 } 2564 2565 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 2566 if (retval) 2567 goto out; 2568 2569 *dqm->fence_addr = KFD_FENCE_INIT; 2570 mb(); 2571 pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr, 2572 KFD_FENCE_COMPLETED); 2573 /* should be timed out */ 2574 retval = amdkfd_fence_wait_timeout(dqm, KFD_FENCE_COMPLETED, 2575 queue_preemption_timeout_ms); 2576 if (retval) { 2577 dev_err(dev, "The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 2578 kfd_hws_hang(dqm); 2579 goto out; 2580 } 2581 2582 /* In the current MEC firmware implementation, if compute queue 2583 * doesn't response to the preemption request in time, HIQ will 2584 * abandon the unmap request without returning any timeout error 2585 * to driver. Instead, MEC firmware will log the doorbell of the 2586 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields. 2587 * To make sure the queue unmap was successful, driver need to 2588 * check those fields 2589 */ 2590 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]; 2591 if (mqd_mgr->check_preemption_failed(mqd_mgr, dqm->packet_mgr.priv_queue->queue->mqd) && 2592 reset_queues_on_hws_hang(dqm, false)) 2593 goto reset_fail; 2594 2595 /* Check for SDMA hang and attempt SDMA reset */ 2596 if (sdma_has_hang(dqm) && reset_queues_on_hws_hang(dqm, true)) 2597 goto reset_fail; 2598 2599 /* We need to reset the grace period value for this device */ 2600 if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 2601 if (pm_config_dequeue_wait_counts(&dqm->packet_mgr, 2602 KFD_DEQUEUE_WAIT_RESET, 0 /* unused */)) 2603 dev_err(dev, "Failed to reset grace period\n"); 2604 } 2605 2606 pm_release_ib(&dqm->packet_mgr); 2607 dqm->active_runlist = false; 2608 out: 2609 up_read(&dqm->dev->adev->reset_domain->sem); 2610 return retval; 2611 2612 reset_fail: 2613 dqm->is_hws_hang = true; 2614 kfd_hws_hang(dqm); 2615 up_read(&dqm->dev->adev->reset_domain->sem); 2616 return -ETIME; 2617 } 2618 2619 /* only for compute queue */ 2620 static int reset_queues_cpsch(struct device_queue_manager *dqm, uint16_t pasid) 2621 { 2622 int retval; 2623 2624 dqm_lock(dqm); 2625 2626 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 2627 pasid, USE_DEFAULT_GRACE_PERIOD, true); 2628 2629 dqm_unlock(dqm); 2630 return retval; 2631 } 2632 2633 /* dqm->lock mutex has to be locked before calling this function */ 2634 static int execute_queues_cpsch(struct device_queue_manager *dqm, 2635 enum kfd_unmap_queues_filter filter, 2636 uint32_t filter_param, 2637 uint32_t grace_period) 2638 { 2639 int retval; 2640 2641 if (!down_read_trylock(&dqm->dev->adev->reset_domain->sem)) 2642 return -EIO; 2643 retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 2644 if (!retval) 2645 retval = map_queues_cpsch(dqm); 2646 up_read(&dqm->dev->adev->reset_domain->sem); 2647 return retval; 2648 } 2649 2650 static int wait_on_destroy_queue(struct device_queue_manager *dqm, 2651 struct queue *q) 2652 { 2653 struct kfd_process_device *pdd = kfd_get_process_device_data(q->device, 2654 q->process); 2655 int ret = 0; 2656 2657 if (WARN_ON(!pdd)) 2658 return ret; 2659 2660 if (pdd->qpd.is_debug) 2661 return ret; 2662 2663 q->properties.is_being_destroyed = true; 2664 2665 if (pdd->process->debug_trap_enabled && q->properties.is_suspended) { 2666 dqm_unlock(dqm); 2667 mutex_unlock(&q->process->mutex); 2668 ret = wait_event_interruptible(dqm->destroy_wait, 2669 !q->properties.is_suspended); 2670 2671 mutex_lock(&q->process->mutex); 2672 dqm_lock(dqm); 2673 } 2674 2675 return ret; 2676 } 2677 2678 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 2679 struct qcm_process_device *qpd, 2680 struct queue *q) 2681 { 2682 int retval; 2683 struct mqd_manager *mqd_mgr; 2684 uint64_t sdma_val = 0; 2685 struct kfd_process_device *pdd = qpd_to_pdd(qpd); 2686 struct device *dev = dqm->dev->adev->dev; 2687 2688 /* Get the SDMA queue stats */ 2689 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2690 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2691 retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr, 2692 &sdma_val); 2693 if (retval) 2694 dev_err(dev, "Failed to read SDMA queue counter for queue: %d\n", 2695 q->properties.queue_id); 2696 } 2697 2698 /* remove queue from list to prevent rescheduling after preemption */ 2699 dqm_lock(dqm); 2700 2701 retval = wait_on_destroy_queue(dqm, q); 2702 2703 if (retval) { 2704 dqm_unlock(dqm); 2705 return retval; 2706 } 2707 2708 if (qpd->is_debug) { 2709 /* 2710 * error, currently we do not allow to destroy a queue 2711 * of a currently debugged process 2712 */ 2713 retval = -EBUSY; 2714 goto failed_try_destroy_debugged_queue; 2715 2716 } 2717 2718 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2719 q->properties.type)]; 2720 2721 deallocate_doorbell(qpd, q); 2722 2723 if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) || 2724 (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) { 2725 deallocate_sdma_queue(dqm, q); 2726 pdd->sdma_past_activity_counter += sdma_val; 2727 } 2728 2729 if (q->properties.is_active) { 2730 decrement_queue_count(dqm, qpd, q); 2731 q->properties.is_active = false; 2732 if (!dqm->dev->kfd->shared_resources.enable_mes) { 2733 retval = execute_queues_cpsch(dqm, 2734 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 2735 USE_DEFAULT_GRACE_PERIOD); 2736 if (retval == -ETIME) 2737 qpd->reset_wavefronts = true; 2738 } else { 2739 retval = remove_queue_mes(dqm, q, qpd); 2740 } 2741 } 2742 list_del(&q->list); 2743 qpd->queue_count--; 2744 2745 /* 2746 * Unconditionally decrement this counter, regardless of the queue's 2747 * type 2748 */ 2749 dqm->total_queue_count--; 2750 pr_debug("Total of %d queues are accountable so far\n", 2751 dqm->total_queue_count); 2752 2753 dqm_unlock(dqm); 2754 2755 /* 2756 * Do free_mqd and raise delete event after dqm_unlock(dqm) to avoid 2757 * circular locking 2758 */ 2759 kfd_dbg_ev_raise(KFD_EC_MASK(EC_DEVICE_QUEUE_DELETE), 2760 qpd->pqm->process, q->device, 2761 -1, false, NULL, 0); 2762 2763 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2764 2765 return retval; 2766 2767 failed_try_destroy_debugged_queue: 2768 2769 dqm_unlock(dqm); 2770 return retval; 2771 } 2772 2773 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 2774 struct qcm_process_device *qpd, 2775 enum cache_policy default_policy, 2776 enum cache_policy alternate_policy, 2777 void __user *alternate_aperture_base, 2778 uint64_t alternate_aperture_size, 2779 u32 misc_process_properties) 2780 { 2781 bool retval = true; 2782 2783 if (!dqm->asic_ops.set_cache_memory_policy) 2784 return retval; 2785 2786 dqm_lock(dqm); 2787 2788 retval = dqm->asic_ops.set_cache_memory_policy( 2789 dqm, 2790 qpd, 2791 default_policy, 2792 alternate_policy, 2793 alternate_aperture_base, 2794 alternate_aperture_size, 2795 misc_process_properties); 2796 2797 if (retval) 2798 goto out; 2799 2800 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 2801 program_sh_mem_settings(dqm, qpd); 2802 2803 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 2804 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 2805 qpd->sh_mem_ape1_limit); 2806 2807 out: 2808 dqm_unlock(dqm); 2809 return retval; 2810 } 2811 2812 static int process_termination_nocpsch(struct device_queue_manager *dqm, 2813 struct qcm_process_device *qpd) 2814 { 2815 struct queue *q; 2816 struct device_process_node *cur, *next_dpn; 2817 int retval = 0; 2818 bool found = false; 2819 2820 dqm_lock(dqm); 2821 2822 /* Clear all user mode queues */ 2823 while (!list_empty(&qpd->queues_list)) { 2824 struct mqd_manager *mqd_mgr; 2825 int ret; 2826 2827 q = list_first_entry(&qpd->queues_list, struct queue, list); 2828 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 2829 q->properties.type)]; 2830 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 2831 if (ret) 2832 retval = ret; 2833 dqm_unlock(dqm); 2834 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 2835 dqm_lock(dqm); 2836 } 2837 2838 /* Unregister process */ 2839 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2840 if (qpd == cur->qpd) { 2841 list_del(&cur->list); 2842 kfree(cur); 2843 dqm->processes_count--; 2844 found = true; 2845 break; 2846 } 2847 } 2848 2849 dqm_unlock(dqm); 2850 2851 /* Outside the DQM lock because under the DQM lock we can't do 2852 * reclaim or take other locks that others hold while reclaiming. 2853 */ 2854 if (found) 2855 kfd_dec_compute_active(dqm->dev); 2856 2857 return retval; 2858 } 2859 2860 static int get_wave_state(struct device_queue_manager *dqm, 2861 struct queue *q, 2862 void __user *ctl_stack, 2863 u32 *ctl_stack_used_size, 2864 u32 *save_area_used_size) 2865 { 2866 struct mqd_manager *mqd_mgr; 2867 2868 dqm_lock(dqm); 2869 2870 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 2871 2872 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE || 2873 q->properties.is_active || !q->device->kfd->cwsr_enabled || 2874 !mqd_mgr->get_wave_state) { 2875 dqm_unlock(dqm); 2876 return -EINVAL; 2877 } 2878 2879 dqm_unlock(dqm); 2880 2881 /* 2882 * get_wave_state is outside the dqm lock to prevent circular locking 2883 * and the queue should be protected against destruction by the process 2884 * lock. 2885 */ 2886 return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, &q->properties, 2887 ctl_stack, ctl_stack_used_size, save_area_used_size); 2888 } 2889 2890 static int get_queue_checkpoint_info(struct device_queue_manager *dqm, 2891 const struct queue *q, 2892 u32 *mqd_size, 2893 u32 *ctl_stack_size) 2894 { 2895 struct mqd_manager *mqd_mgr; 2896 enum KFD_MQD_TYPE mqd_type = 2897 get_mqd_type_from_queue_type(q->properties.type); 2898 int ret = 0; 2899 2900 dqm_lock(dqm); 2901 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2902 *mqd_size = mqd_mgr->mqd_size * NUM_XCC(mqd_mgr->dev->xcc_mask); 2903 *ctl_stack_size = 0; 2904 2905 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info) 2906 ret = mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size); 2907 2908 dqm_unlock(dqm); 2909 2910 return ret; 2911 } 2912 2913 static int checkpoint_mqd(struct device_queue_manager *dqm, 2914 const struct queue *q, 2915 void *mqd, 2916 void *ctl_stack) 2917 { 2918 struct mqd_manager *mqd_mgr; 2919 int r = 0; 2920 enum KFD_MQD_TYPE mqd_type = 2921 get_mqd_type_from_queue_type(q->properties.type); 2922 2923 dqm_lock(dqm); 2924 2925 if (q->properties.is_active || !q->device->kfd->cwsr_enabled) { 2926 r = -EINVAL; 2927 goto dqm_unlock; 2928 } 2929 2930 mqd_mgr = dqm->mqd_mgrs[mqd_type]; 2931 if (!mqd_mgr->checkpoint_mqd) { 2932 r = -EOPNOTSUPP; 2933 goto dqm_unlock; 2934 } 2935 2936 mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack); 2937 2938 dqm_unlock: 2939 dqm_unlock(dqm); 2940 return r; 2941 } 2942 2943 static int process_termination_cpsch(struct device_queue_manager *dqm, 2944 struct qcm_process_device *qpd) 2945 { 2946 int retval = 0; 2947 struct queue *q; 2948 struct device *dev = dqm->dev->adev->dev; 2949 struct kernel_queue *kq, *kq_next; 2950 struct mqd_manager *mqd_mgr; 2951 struct device_process_node *cur, *next_dpn; 2952 enum kfd_unmap_queues_filter filter = 2953 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 2954 bool found = false; 2955 2956 dqm_lock(dqm); 2957 2958 /* Clean all kernel queues */ 2959 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 2960 list_del(&kq->list); 2961 decrement_queue_count(dqm, qpd, kq->queue); 2962 qpd->is_debug = false; 2963 dqm->total_queue_count--; 2964 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 2965 } 2966 2967 /* Clear all user mode queues */ 2968 list_for_each_entry(q, &qpd->queues_list, list) { 2969 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 2970 deallocate_sdma_queue(dqm, q); 2971 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) 2972 deallocate_sdma_queue(dqm, q); 2973 2974 if (q->properties.is_active) { 2975 decrement_queue_count(dqm, qpd, q); 2976 2977 if (dqm->dev->kfd->shared_resources.enable_mes) { 2978 retval = remove_queue_mes(dqm, q, qpd); 2979 if (retval) 2980 dev_err(dev, "Failed to remove queue %d\n", 2981 q->properties.queue_id); 2982 } 2983 } 2984 2985 dqm->total_queue_count--; 2986 } 2987 2988 /* Unregister process */ 2989 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 2990 if (qpd == cur->qpd) { 2991 list_del(&cur->list); 2992 kfree(cur); 2993 dqm->processes_count--; 2994 found = true; 2995 break; 2996 } 2997 } 2998 2999 if (!dqm->dev->kfd->shared_resources.enable_mes) 3000 retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 3001 3002 if ((retval || qpd->reset_wavefronts) && 3003 down_read_trylock(&dqm->dev->adev->reset_domain->sem)) { 3004 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 3005 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 3006 qpd->reset_wavefronts = false; 3007 up_read(&dqm->dev->adev->reset_domain->sem); 3008 } 3009 3010 /* Lastly, free mqd resources. 3011 * Do free_mqd() after dqm_unlock to avoid circular locking. 3012 */ 3013 while (!list_empty(&qpd->queues_list)) { 3014 q = list_first_entry(&qpd->queues_list, struct queue, list); 3015 mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type( 3016 q->properties.type)]; 3017 list_del(&q->list); 3018 qpd->queue_count--; 3019 dqm_unlock(dqm); 3020 mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 3021 dqm_lock(dqm); 3022 } 3023 dqm_unlock(dqm); 3024 3025 /* Outside the DQM lock because under the DQM lock we can't do 3026 * reclaim or take other locks that others hold while reclaiming. 3027 */ 3028 if (found) 3029 kfd_dec_compute_active(dqm->dev); 3030 3031 return retval; 3032 } 3033 3034 static int init_mqd_managers(struct device_queue_manager *dqm) 3035 { 3036 int i, j; 3037 struct device *dev = dqm->dev->adev->dev; 3038 struct mqd_manager *mqd_mgr; 3039 3040 for (i = 0; i < KFD_MQD_TYPE_MAX; i++) { 3041 mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev); 3042 if (!mqd_mgr) { 3043 dev_err(dev, "mqd manager [%d] initialization failed\n", i); 3044 goto out_free; 3045 } 3046 dqm->mqd_mgrs[i] = mqd_mgr; 3047 } 3048 3049 return 0; 3050 3051 out_free: 3052 for (j = 0; j < i; j++) { 3053 kfree(dqm->mqd_mgrs[j]); 3054 dqm->mqd_mgrs[j] = NULL; 3055 } 3056 3057 return -ENOMEM; 3058 } 3059 3060 /* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/ 3061 static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm) 3062 { 3063 int retval; 3064 struct kfd_node *dev = dqm->dev; 3065 struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd; 3066 uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size * 3067 get_num_all_sdma_engines(dqm) * 3068 dev->kfd->device_info.num_sdma_queues_per_engine + 3069 (dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size * 3070 NUM_XCC(dqm->dev->xcc_mask)); 3071 3072 retval = amdgpu_amdkfd_alloc_kernel_mem(dev->adev, size, 3073 AMDGPU_GEM_DOMAIN_GTT, 3074 &(mem_obj->mem), &(mem_obj->gpu_addr), 3075 (void *)&(mem_obj->cpu_ptr), false); 3076 3077 return retval; 3078 } 3079 3080 static void deallocate_hiq_sdma_mqd(struct kfd_node *dev, 3081 struct kfd_mem_obj *mqd) 3082 { 3083 WARN(!mqd, "No hiq sdma mqd trunk to free"); 3084 3085 amdgpu_amdkfd_free_kernel_mem(dev->adev, &mqd->mem); 3086 } 3087 3088 struct device_queue_manager *device_queue_manager_init(struct kfd_node *dev) 3089 { 3090 struct device_queue_manager *dqm; 3091 3092 pr_debug("Loading device queue manager\n"); 3093 3094 dqm = kzalloc_obj(*dqm); 3095 if (!dqm) 3096 return NULL; 3097 3098 switch (dev->adev->asic_type) { 3099 /* HWS is not available on Hawaii. */ 3100 case CHIP_HAWAII: 3101 /* HWS depends on CWSR for timely dequeue. CWSR is not 3102 * available on Tonga. 3103 * 3104 * FIXME: This argument also applies to Kaveri. 3105 */ 3106 case CHIP_TONGA: 3107 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 3108 break; 3109 default: 3110 dqm->sched_policy = sched_policy; 3111 break; 3112 } 3113 3114 dqm->dev = dev; 3115 switch (dqm->sched_policy) { 3116 case KFD_SCHED_POLICY_HWS: 3117 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 3118 /* initialize dqm for cp scheduling */ 3119 dqm->ops.create_queue = create_queue_cpsch; 3120 dqm->ops.initialize = initialize_cpsch; 3121 dqm->ops.start = start_cpsch; 3122 dqm->ops.stop = stop_cpsch; 3123 dqm->ops.halt = halt_cpsch; 3124 dqm->ops.unhalt = unhalt_cpsch; 3125 dqm->ops.destroy_queue = destroy_queue_cpsch; 3126 dqm->ops.update_queue = update_queue; 3127 dqm->ops.register_process = register_process; 3128 dqm->ops.unregister_process = unregister_process; 3129 dqm->ops.uninitialize = uninitialize; 3130 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 3131 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 3132 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 3133 dqm->ops.process_termination = process_termination_cpsch; 3134 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 3135 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 3136 dqm->ops.get_wave_state = get_wave_state; 3137 dqm->ops.reset_queues = reset_queues_cpsch; 3138 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 3139 dqm->ops.checkpoint_mqd = checkpoint_mqd; 3140 dqm->ops.set_perfcount = set_perfcount; 3141 break; 3142 case KFD_SCHED_POLICY_NO_HWS: 3143 /* initialize dqm for no cp scheduling */ 3144 dqm->ops.start = start_nocpsch; 3145 dqm->ops.stop = stop_nocpsch; 3146 dqm->ops.create_queue = create_queue_nocpsch; 3147 dqm->ops.destroy_queue = destroy_queue_nocpsch; 3148 dqm->ops.update_queue = update_queue; 3149 dqm->ops.register_process = register_process; 3150 dqm->ops.unregister_process = unregister_process; 3151 dqm->ops.initialize = initialize_nocpsch; 3152 dqm->ops.uninitialize = uninitialize; 3153 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 3154 dqm->ops.process_termination = process_termination_nocpsch; 3155 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 3156 dqm->ops.restore_process_queues = 3157 restore_process_queues_nocpsch; 3158 dqm->ops.get_wave_state = get_wave_state; 3159 dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info; 3160 dqm->ops.checkpoint_mqd = checkpoint_mqd; 3161 dqm->ops.set_perfcount = set_perfcount; 3162 break; 3163 default: 3164 dev_err(dev->adev->dev, "Invalid scheduling policy %d\n", dqm->sched_policy); 3165 goto out_free; 3166 } 3167 3168 switch (dev->adev->asic_type) { 3169 case CHIP_KAVERI: 3170 case CHIP_HAWAII: 3171 device_queue_manager_init_cik(&dqm->asic_ops); 3172 break; 3173 3174 case CHIP_CARRIZO: 3175 case CHIP_TONGA: 3176 case CHIP_FIJI: 3177 case CHIP_POLARIS10: 3178 case CHIP_POLARIS11: 3179 case CHIP_POLARIS12: 3180 case CHIP_VEGAM: 3181 device_queue_manager_init_vi(&dqm->asic_ops); 3182 break; 3183 3184 default: 3185 if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 1, 0)) 3186 device_queue_manager_init_v12_1(&dqm->asic_ops); 3187 else if (KFD_GC_VERSION(dev) >= IP_VERSION(12, 0, 0)) 3188 device_queue_manager_init_v12(&dqm->asic_ops); 3189 else if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0)) 3190 device_queue_manager_init_v11(&dqm->asic_ops); 3191 else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) 3192 device_queue_manager_init_v10(&dqm->asic_ops); 3193 else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1)) 3194 device_queue_manager_init_v9(&dqm->asic_ops); 3195 else { 3196 WARN(1, "Unexpected ASIC family %u", 3197 dev->adev->asic_type); 3198 goto out_free; 3199 } 3200 } 3201 3202 if (init_mqd_managers(dqm)) 3203 goto out_free; 3204 3205 if (!dev->kfd->shared_resources.enable_mes && allocate_hiq_sdma_mqd(dqm)) { 3206 dev_err(dev->adev->dev, "Failed to allocate hiq sdma mqd trunk buffer\n"); 3207 goto out_free; 3208 } 3209 3210 if (!dqm->ops.initialize(dqm)) { 3211 init_waitqueue_head(&dqm->destroy_wait); 3212 return dqm; 3213 } 3214 3215 if (!dev->kfd->shared_resources.enable_mes) 3216 deallocate_hiq_sdma_mqd(dev, &dqm->hiq_sdma_mqd); 3217 3218 out_free: 3219 kfree(dqm); 3220 return NULL; 3221 } 3222 3223 void device_queue_manager_uninit(struct device_queue_manager *dqm) 3224 { 3225 dqm->ops.stop(dqm); 3226 dqm->ops.uninitialize(dqm); 3227 if (!dqm->dev->kfd->shared_resources.enable_mes) 3228 deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd); 3229 kfree(dqm); 3230 } 3231 3232 int kfd_dqm_suspend_bad_queue_mes(struct kfd_node *knode, u32 pasid, u32 doorbell_id) 3233 { 3234 struct kfd_process_device *pdd = NULL; 3235 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid, &pdd); 3236 struct device_queue_manager *dqm = knode->dqm; 3237 struct device *dev = dqm->dev->adev->dev; 3238 struct qcm_process_device *qpd; 3239 struct queue *q = NULL; 3240 int ret = 0; 3241 3242 if (!pdd) 3243 return -EINVAL; 3244 3245 dqm_lock(dqm); 3246 3247 if (pdd) { 3248 qpd = &pdd->qpd; 3249 3250 list_for_each_entry(q, &qpd->queues_list, list) { 3251 if (q->doorbell_id == doorbell_id && q->properties.is_active) { 3252 ret = suspend_all_queues_mes(dqm); 3253 if (ret) { 3254 dev_err(dev, "Suspending all queues failed"); 3255 goto out; 3256 } 3257 3258 q->properties.is_evicted = true; 3259 q->properties.is_active = false; 3260 decrement_queue_count(dqm, qpd, q); 3261 3262 ret = remove_queue_mes(dqm, q, qpd); 3263 if (ret) { 3264 dev_err(dev, "Removing bad queue failed"); 3265 goto out; 3266 } 3267 3268 ret = resume_all_queues_mes(dqm); 3269 if (ret) 3270 dev_err(dev, "Resuming all queues failed"); 3271 3272 break; 3273 } 3274 } 3275 } 3276 3277 out: 3278 dqm_unlock(dqm); 3279 kfd_unref_process(p); 3280 return ret; 3281 } 3282 3283 int kfd_evict_process_device(struct kfd_process_device *pdd) 3284 { 3285 struct device_queue_manager *dqm; 3286 struct kfd_process *p; 3287 3288 p = pdd->process; 3289 dqm = pdd->dev->dqm; 3290 3291 WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); 3292 3293 return dqm->ops.evict_process_queues(dqm, &pdd->qpd); 3294 } 3295 3296 int reserve_debug_trap_vmid(struct device_queue_manager *dqm, 3297 struct qcm_process_device *qpd) 3298 { 3299 int r; 3300 struct device *dev = dqm->dev->adev->dev; 3301 int updated_vmid_mask; 3302 3303 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3304 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3305 return -EINVAL; 3306 } 3307 3308 dqm_lock(dqm); 3309 3310 if (dqm->trap_debug_vmid != 0) { 3311 dev_err(dev, "Trap debug id already reserved\n"); 3312 r = -EBUSY; 3313 goto out_unlock; 3314 } 3315 3316 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3317 USE_DEFAULT_GRACE_PERIOD, false); 3318 if (r) 3319 goto out_unlock; 3320 3321 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3322 updated_vmid_mask &= ~(1 << dqm->dev->vm_info.last_vmid_kfd); 3323 3324 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3325 dqm->trap_debug_vmid = dqm->dev->vm_info.last_vmid_kfd; 3326 r = set_sched_resources(dqm); 3327 if (r) 3328 goto out_unlock; 3329 3330 r = map_queues_cpsch(dqm); 3331 if (r) 3332 goto out_unlock; 3333 3334 pr_debug("Reserved VMID for trap debug: %i\n", dqm->trap_debug_vmid); 3335 3336 out_unlock: 3337 dqm_unlock(dqm); 3338 return r; 3339 } 3340 3341 /* 3342 * Releases vmid for the trap debugger 3343 */ 3344 int release_debug_trap_vmid(struct device_queue_manager *dqm, 3345 struct qcm_process_device *qpd) 3346 { 3347 struct device *dev = dqm->dev->adev->dev; 3348 int r; 3349 int updated_vmid_mask; 3350 uint32_t trap_debug_vmid; 3351 3352 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3353 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3354 return -EINVAL; 3355 } 3356 3357 dqm_lock(dqm); 3358 trap_debug_vmid = dqm->trap_debug_vmid; 3359 if (dqm->trap_debug_vmid == 0) { 3360 dev_err(dev, "Trap debug id is not reserved\n"); 3361 r = -EINVAL; 3362 goto out_unlock; 3363 } 3364 3365 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 3366 USE_DEFAULT_GRACE_PERIOD, false); 3367 if (r) 3368 goto out_unlock; 3369 3370 updated_vmid_mask = dqm->dev->kfd->shared_resources.compute_vmid_bitmap; 3371 updated_vmid_mask |= (1 << dqm->dev->vm_info.last_vmid_kfd); 3372 3373 dqm->dev->kfd->shared_resources.compute_vmid_bitmap = updated_vmid_mask; 3374 dqm->trap_debug_vmid = 0; 3375 r = set_sched_resources(dqm); 3376 if (r) 3377 goto out_unlock; 3378 3379 r = map_queues_cpsch(dqm); 3380 if (r) 3381 goto out_unlock; 3382 3383 pr_debug("Released VMID for trap debug: %i\n", trap_debug_vmid); 3384 3385 out_unlock: 3386 dqm_unlock(dqm); 3387 return r; 3388 } 3389 3390 #define QUEUE_NOT_FOUND -1 3391 /* invalidate queue operation in array */ 3392 static void q_array_invalidate(uint32_t num_queues, uint32_t *queue_ids) 3393 { 3394 int i; 3395 3396 for (i = 0; i < num_queues; i++) 3397 queue_ids[i] |= KFD_DBG_QUEUE_INVALID_MASK; 3398 } 3399 3400 /* find queue index in array */ 3401 static int q_array_get_index(unsigned int queue_id, 3402 uint32_t num_queues, 3403 uint32_t *queue_ids) 3404 { 3405 int i; 3406 3407 for (i = 0; i < num_queues; i++) 3408 if (queue_id == (queue_ids[i] & ~KFD_DBG_QUEUE_INVALID_MASK)) 3409 return i; 3410 3411 return QUEUE_NOT_FOUND; 3412 } 3413 3414 struct copy_context_work_handler_workarea { 3415 struct work_struct copy_context_work; 3416 struct kfd_process *p; 3417 }; 3418 3419 static void copy_context_work_handler(struct work_struct *work) 3420 { 3421 struct copy_context_work_handler_workarea *workarea; 3422 struct mqd_manager *mqd_mgr; 3423 struct queue *q; 3424 struct mm_struct *mm; 3425 struct kfd_process *p; 3426 uint32_t tmp_ctl_stack_used_size, tmp_save_area_used_size; 3427 int i; 3428 3429 workarea = container_of(work, 3430 struct copy_context_work_handler_workarea, 3431 copy_context_work); 3432 3433 p = workarea->p; 3434 mm = get_task_mm(p->lead_thread); 3435 3436 if (!mm) 3437 return; 3438 3439 kthread_use_mm(mm); 3440 for (i = 0; i < p->n_pdds; i++) { 3441 struct kfd_process_device *pdd = p->pdds[i]; 3442 struct device_queue_manager *dqm = pdd->dev->dqm; 3443 struct qcm_process_device *qpd = &pdd->qpd; 3444 3445 list_for_each_entry(q, &qpd->queues_list, list) { 3446 if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE) 3447 continue; 3448 3449 mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP]; 3450 3451 /* We ignore the return value from get_wave_state 3452 * because 3453 * i) right now, it always returns 0, and 3454 * ii) if we hit an error, we would continue to the 3455 * next queue anyway. 3456 */ 3457 mqd_mgr->get_wave_state(mqd_mgr, 3458 q->mqd, 3459 &q->properties, 3460 (void __user *) q->properties.ctx_save_restore_area_address, 3461 &tmp_ctl_stack_used_size, 3462 &tmp_save_area_used_size); 3463 } 3464 } 3465 kthread_unuse_mm(mm); 3466 mmput(mm); 3467 } 3468 3469 static uint32_t *get_queue_ids(uint32_t num_queues, uint32_t *usr_queue_id_array) 3470 { 3471 size_t array_size = num_queues * sizeof(uint32_t); 3472 3473 if (!usr_queue_id_array) 3474 return NULL; 3475 3476 return memdup_user(usr_queue_id_array, array_size); 3477 } 3478 3479 int resume_queues(struct kfd_process *p, 3480 uint32_t num_queues, 3481 uint32_t *usr_queue_id_array) 3482 { 3483 uint32_t *queue_ids = NULL; 3484 int total_resumed = 0; 3485 int i; 3486 3487 if (usr_queue_id_array) { 3488 queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3489 3490 if (IS_ERR(queue_ids)) 3491 return PTR_ERR(queue_ids); 3492 3493 /* mask all queues as invalid. unmask per successful request */ 3494 q_array_invalidate(num_queues, queue_ids); 3495 } 3496 3497 for (i = 0; i < p->n_pdds; i++) { 3498 struct kfd_process_device *pdd = p->pdds[i]; 3499 struct device_queue_manager *dqm = pdd->dev->dqm; 3500 struct device *dev = dqm->dev->adev->dev; 3501 struct qcm_process_device *qpd = &pdd->qpd; 3502 struct queue *q; 3503 int r, per_device_resumed = 0; 3504 3505 dqm_lock(dqm); 3506 3507 /* unmask queues that resume or already resumed as valid */ 3508 list_for_each_entry(q, &qpd->queues_list, list) { 3509 int q_idx = QUEUE_NOT_FOUND; 3510 3511 if (queue_ids) 3512 q_idx = q_array_get_index( 3513 q->properties.queue_id, 3514 num_queues, 3515 queue_ids); 3516 3517 if (!queue_ids || q_idx != QUEUE_NOT_FOUND) { 3518 int err = resume_single_queue(dqm, &pdd->qpd, q); 3519 3520 if (queue_ids) { 3521 if (!err) { 3522 queue_ids[q_idx] &= 3523 ~KFD_DBG_QUEUE_INVALID_MASK; 3524 } else { 3525 queue_ids[q_idx] |= 3526 KFD_DBG_QUEUE_ERROR_MASK; 3527 break; 3528 } 3529 } 3530 3531 if (dqm->dev->kfd->shared_resources.enable_mes) { 3532 wake_up_all(&dqm->destroy_wait); 3533 if (!err) 3534 total_resumed++; 3535 } else { 3536 per_device_resumed++; 3537 } 3538 } 3539 } 3540 3541 if (!per_device_resumed) { 3542 dqm_unlock(dqm); 3543 continue; 3544 } 3545 3546 r = execute_queues_cpsch(dqm, 3547 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 3548 0, 3549 USE_DEFAULT_GRACE_PERIOD); 3550 if (r) { 3551 dev_err(dev, "Failed to resume process queues\n"); 3552 if (queue_ids) { 3553 list_for_each_entry(q, &qpd->queues_list, list) { 3554 int q_idx = q_array_get_index( 3555 q->properties.queue_id, 3556 num_queues, 3557 queue_ids); 3558 3559 /* mask queue as error on resume fail */ 3560 if (q_idx != QUEUE_NOT_FOUND) 3561 queue_ids[q_idx] |= 3562 KFD_DBG_QUEUE_ERROR_MASK; 3563 } 3564 } 3565 } else { 3566 wake_up_all(&dqm->destroy_wait); 3567 total_resumed += per_device_resumed; 3568 } 3569 3570 dqm_unlock(dqm); 3571 } 3572 3573 if (queue_ids) { 3574 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3575 num_queues * sizeof(uint32_t))) 3576 pr_err("copy_to_user failed on queue resume\n"); 3577 3578 kfree(queue_ids); 3579 } 3580 3581 return total_resumed; 3582 } 3583 3584 int suspend_queues(struct kfd_process *p, 3585 uint32_t num_queues, 3586 uint32_t grace_period, 3587 uint64_t exception_clear_mask, 3588 uint32_t *usr_queue_id_array) 3589 { 3590 uint32_t *queue_ids = get_queue_ids(num_queues, usr_queue_id_array); 3591 int total_suspended = 0; 3592 int i; 3593 3594 if (IS_ERR(queue_ids)) 3595 return PTR_ERR(queue_ids); 3596 3597 /* mask all queues as invalid. umask on successful request */ 3598 q_array_invalidate(num_queues, queue_ids); 3599 3600 for (i = 0; i < p->n_pdds; i++) { 3601 struct kfd_process_device *pdd = p->pdds[i]; 3602 struct device_queue_manager *dqm = pdd->dev->dqm; 3603 struct device *dev = dqm->dev->adev->dev; 3604 struct qcm_process_device *qpd = &pdd->qpd; 3605 struct queue *q; 3606 int r, per_device_suspended = 0; 3607 3608 mutex_lock(&p->event_mutex); 3609 dqm_lock(dqm); 3610 3611 /* unmask queues that suspend or already suspended */ 3612 list_for_each_entry(q, &qpd->queues_list, list) { 3613 int q_idx = q_array_get_index(q->properties.queue_id, 3614 num_queues, 3615 queue_ids); 3616 3617 if (q_idx != QUEUE_NOT_FOUND) { 3618 int err = suspend_single_queue(dqm, pdd, q); 3619 bool is_mes = dqm->dev->kfd->shared_resources.enable_mes; 3620 3621 if (!err) { 3622 queue_ids[q_idx] &= ~KFD_DBG_QUEUE_INVALID_MASK; 3623 if (exception_clear_mask && is_mes) 3624 q->properties.exception_status &= 3625 ~exception_clear_mask; 3626 3627 if (is_mes) 3628 total_suspended++; 3629 else 3630 per_device_suspended++; 3631 } else if (err != -EBUSY) { 3632 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3633 break; 3634 } 3635 } 3636 } 3637 3638 if (!per_device_suspended) { 3639 dqm_unlock(dqm); 3640 mutex_unlock(&p->event_mutex); 3641 if (total_suspended) 3642 amdgpu_amdkfd_debug_mem_fence(dqm->dev->adev); 3643 continue; 3644 } 3645 3646 r = execute_queues_cpsch(dqm, 3647 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 3648 grace_period); 3649 3650 if (r) 3651 dev_err(dev, "Failed to suspend process queues.\n"); 3652 else 3653 total_suspended += per_device_suspended; 3654 3655 list_for_each_entry(q, &qpd->queues_list, list) { 3656 int q_idx = q_array_get_index(q->properties.queue_id, 3657 num_queues, queue_ids); 3658 3659 if (q_idx == QUEUE_NOT_FOUND) 3660 continue; 3661 3662 /* mask queue as error on suspend fail */ 3663 if (r) 3664 queue_ids[q_idx] |= KFD_DBG_QUEUE_ERROR_MASK; 3665 else if (exception_clear_mask) 3666 q->properties.exception_status &= 3667 ~exception_clear_mask; 3668 } 3669 3670 dqm_unlock(dqm); 3671 mutex_unlock(&p->event_mutex); 3672 amdgpu_device_flush_hdp(dqm->dev->adev, NULL); 3673 } 3674 3675 if (total_suspended) { 3676 struct copy_context_work_handler_workarea copy_context_worker; 3677 3678 INIT_WORK_ONSTACK( 3679 ©_context_worker.copy_context_work, 3680 copy_context_work_handler); 3681 3682 copy_context_worker.p = p; 3683 3684 schedule_work(©_context_worker.copy_context_work); 3685 3686 3687 flush_work(©_context_worker.copy_context_work); 3688 destroy_work_on_stack(©_context_worker.copy_context_work); 3689 } 3690 3691 if (copy_to_user((void __user *)usr_queue_id_array, queue_ids, 3692 num_queues * sizeof(uint32_t))) 3693 pr_err("copy_to_user failed on queue suspend\n"); 3694 3695 kfree(queue_ids); 3696 3697 return total_suspended; 3698 } 3699 3700 static uint32_t set_queue_type_for_user(struct queue_properties *q_props) 3701 { 3702 switch (q_props->type) { 3703 case KFD_QUEUE_TYPE_COMPUTE: 3704 return q_props->format == KFD_QUEUE_FORMAT_PM4 3705 ? KFD_IOC_QUEUE_TYPE_COMPUTE 3706 : KFD_IOC_QUEUE_TYPE_COMPUTE_AQL; 3707 case KFD_QUEUE_TYPE_SDMA: 3708 return KFD_IOC_QUEUE_TYPE_SDMA; 3709 case KFD_QUEUE_TYPE_SDMA_XGMI: 3710 return KFD_IOC_QUEUE_TYPE_SDMA_XGMI; 3711 default: 3712 WARN_ONCE(true, "queue type not recognized!"); 3713 return 0xffffffff; 3714 }; 3715 } 3716 3717 void set_queue_snapshot_entry(struct queue *q, 3718 uint64_t exception_clear_mask, 3719 struct kfd_queue_snapshot_entry *qss_entry) 3720 { 3721 qss_entry->ring_base_address = q->properties.queue_address; 3722 qss_entry->write_pointer_address = (uint64_t)q->properties.write_ptr; 3723 qss_entry->read_pointer_address = (uint64_t)q->properties.read_ptr; 3724 qss_entry->ctx_save_restore_address = 3725 q->properties.ctx_save_restore_area_address; 3726 qss_entry->ctx_save_restore_area_size = 3727 q->properties.ctx_save_restore_area_size; 3728 qss_entry->exception_status = q->properties.exception_status; 3729 qss_entry->queue_id = q->properties.queue_id; 3730 qss_entry->gpu_id = q->device->id; 3731 qss_entry->ring_size = (uint32_t)q->properties.queue_size; 3732 qss_entry->queue_type = set_queue_type_for_user(&q->properties); 3733 q->properties.exception_status &= ~exception_clear_mask; 3734 } 3735 3736 int debug_lock_and_unmap(struct device_queue_manager *dqm) 3737 { 3738 struct device *dev = dqm->dev->adev->dev; 3739 int r; 3740 3741 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3742 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3743 return -EINVAL; 3744 } 3745 3746 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3747 return 0; 3748 3749 dqm_lock(dqm); 3750 3751 r = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 0, false); 3752 if (r) 3753 dqm_unlock(dqm); 3754 3755 return r; 3756 } 3757 3758 int debug_map_and_unlock(struct device_queue_manager *dqm) 3759 { 3760 struct device *dev = dqm->dev->adev->dev; 3761 int r; 3762 3763 if (dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { 3764 dev_err(dev, "Unsupported on sched_policy: %i\n", dqm->sched_policy); 3765 return -EINVAL; 3766 } 3767 3768 if (!kfd_dbg_is_per_vmid_supported(dqm->dev)) 3769 return 0; 3770 3771 r = map_queues_cpsch(dqm); 3772 3773 dqm_unlock(dqm); 3774 3775 return r; 3776 } 3777 3778 int debug_refresh_runlist(struct device_queue_manager *dqm) 3779 { 3780 int r = debug_lock_and_unmap(dqm); 3781 3782 if (r) 3783 return r; 3784 3785 return debug_map_and_unlock(dqm); 3786 } 3787 3788 bool kfd_dqm_is_queue_in_process(struct device_queue_manager *dqm, 3789 struct qcm_process_device *qpd, 3790 int doorbell_off, u32 *queue_format) 3791 { 3792 struct queue *q; 3793 bool r = false; 3794 3795 if (!queue_format) 3796 return r; 3797 3798 dqm_lock(dqm); 3799 3800 list_for_each_entry(q, &qpd->queues_list, list) { 3801 if (q->properties.doorbell_off == doorbell_off) { 3802 *queue_format = q->properties.format; 3803 r = true; 3804 goto out; 3805 } 3806 } 3807 3808 out: 3809 dqm_unlock(dqm); 3810 return r; 3811 } 3812 #if defined(CONFIG_DEBUG_FS) 3813 3814 static void seq_reg_dump(struct seq_file *m, 3815 uint32_t (*dump)[2], uint32_t n_regs) 3816 { 3817 uint32_t i, count; 3818 3819 for (i = 0, count = 0; i < n_regs; i++) { 3820 if (count == 0 || 3821 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 3822 seq_printf(m, "%s %08x: %08x", 3823 i ? "\n" : "", 3824 dump[i][0], dump[i][1]); 3825 count = 7; 3826 } else { 3827 seq_printf(m, " %08x", dump[i][1]); 3828 count--; 3829 } 3830 } 3831 3832 seq_puts(m, "\n"); 3833 } 3834 3835 int dqm_debugfs_hqds(struct seq_file *m, void *data) 3836 { 3837 struct device_queue_manager *dqm = data; 3838 uint32_t xcc_mask = dqm->dev->xcc_mask; 3839 uint32_t (*dump)[2], n_regs; 3840 int pipe, queue; 3841 int r = 0, xcc_id; 3842 uint32_t sdma_engine_start; 3843 3844 if (!dqm->sched_running) { 3845 seq_puts(m, " Device is stopped\n"); 3846 return 0; 3847 } 3848 3849 for_each_inst(xcc_id, xcc_mask) { 3850 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3851 KFD_CIK_HIQ_PIPE, 3852 KFD_CIK_HIQ_QUEUE, &dump, 3853 &n_regs, xcc_id); 3854 if (!r) { 3855 seq_printf( 3856 m, 3857 " Inst %d, HIQ on MEC %d Pipe %d Queue %d\n", 3858 xcc_id, 3859 KFD_CIK_HIQ_PIPE / get_pipes_per_mec(dqm) + 1, 3860 KFD_CIK_HIQ_PIPE % get_pipes_per_mec(dqm), 3861 KFD_CIK_HIQ_QUEUE); 3862 seq_reg_dump(m, dump, n_regs); 3863 3864 kfree(dump); 3865 } 3866 3867 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 3868 int pipe_offset = pipe * get_queues_per_pipe(dqm); 3869 3870 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 3871 if (!test_bit(pipe_offset + queue, 3872 dqm->dev->kfd->shared_resources.cp_queue_bitmap)) 3873 continue; 3874 3875 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev, 3876 pipe, queue, 3877 &dump, &n_regs, 3878 xcc_id); 3879 if (r) 3880 break; 3881 3882 seq_printf(m, 3883 " Inst %d, CP Pipe %d, Queue %d\n", 3884 xcc_id, pipe, queue); 3885 seq_reg_dump(m, dump, n_regs); 3886 3887 kfree(dump); 3888 } 3889 } 3890 } 3891 3892 sdma_engine_start = dqm->dev->node_id * get_num_all_sdma_engines(dqm); 3893 for (pipe = sdma_engine_start; 3894 pipe < (sdma_engine_start + get_num_all_sdma_engines(dqm)); 3895 pipe++) { 3896 for (queue = 0; 3897 queue < dqm->dev->kfd->device_info.num_sdma_queues_per_engine; 3898 queue++) { 3899 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 3900 dqm->dev->adev, pipe, queue, &dump, &n_regs); 3901 if (r) 3902 break; 3903 3904 seq_printf(m, " SDMA Engine %d, RLC %d\n", 3905 pipe, queue); 3906 seq_reg_dump(m, dump, n_regs); 3907 3908 kfree(dump); 3909 } 3910 } 3911 3912 return r; 3913 } 3914 3915 int dqm_debugfs_hang_hws(struct device_queue_manager *dqm) 3916 { 3917 int r = 0; 3918 3919 dqm_lock(dqm); 3920 r = pm_debugfs_hang_hws(&dqm->packet_mgr); 3921 if (r) { 3922 dqm_unlock(dqm); 3923 return r; 3924 } 3925 dqm->active_runlist = true; 3926 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 3927 0, USE_DEFAULT_GRACE_PERIOD); 3928 dqm_unlock(dqm); 3929 3930 return r; 3931 } 3932 3933 #endif 3934