1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/ratelimit.h> 25 #include <linux/printk.h> 26 #include <linux/slab.h> 27 #include <linux/list.h> 28 #include <linux/types.h> 29 #include <linux/bitops.h> 30 #include <linux/sched.h> 31 #include "kfd_priv.h" 32 #include "kfd_device_queue_manager.h" 33 #include "kfd_mqd_manager.h" 34 #include "cik_regs.h" 35 #include "kfd_kernel_queue.h" 36 37 /* Size of the per-pipe EOP queue */ 38 #define CIK_HPD_EOP_BYTES_LOG2 11 39 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 40 41 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 42 unsigned int pasid, unsigned int vmid); 43 44 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 45 struct queue *q, 46 struct qcm_process_device *qpd); 47 48 static int execute_queues_cpsch(struct device_queue_manager *dqm, 49 enum kfd_unmap_queues_filter filter, 50 uint32_t filter_param); 51 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 52 enum kfd_unmap_queues_filter filter, 53 uint32_t filter_param); 54 55 static int map_queues_cpsch(struct device_queue_manager *dqm); 56 57 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 58 struct queue *q, 59 struct qcm_process_device *qpd); 60 61 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 62 unsigned int sdma_queue_id); 63 64 static void kfd_process_hw_exception(struct work_struct *work); 65 66 static inline 67 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 68 { 69 if (type == KFD_QUEUE_TYPE_SDMA) 70 return KFD_MQD_TYPE_SDMA; 71 return KFD_MQD_TYPE_CP; 72 } 73 74 static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe) 75 { 76 int i; 77 int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec 78 + pipe * dqm->dev->shared_resources.num_queue_per_pipe; 79 80 /* queue is available for KFD usage if bit is 1 */ 81 for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i) 82 if (test_bit(pipe_offset + i, 83 dqm->dev->shared_resources.queue_bitmap)) 84 return true; 85 return false; 86 } 87 88 unsigned int get_queues_num(struct device_queue_manager *dqm) 89 { 90 return bitmap_weight(dqm->dev->shared_resources.queue_bitmap, 91 KGD_MAX_QUEUES); 92 } 93 94 unsigned int get_queues_per_pipe(struct device_queue_manager *dqm) 95 { 96 return dqm->dev->shared_resources.num_queue_per_pipe; 97 } 98 99 unsigned int get_pipes_per_mec(struct device_queue_manager *dqm) 100 { 101 return dqm->dev->shared_resources.num_pipe_per_mec; 102 } 103 104 static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm) 105 { 106 return dqm->dev->device_info->num_sdma_engines; 107 } 108 109 unsigned int get_num_sdma_queues(struct device_queue_manager *dqm) 110 { 111 return dqm->dev->device_info->num_sdma_engines 112 * KFD_SDMA_QUEUES_PER_ENGINE; 113 } 114 115 void program_sh_mem_settings(struct device_queue_manager *dqm, 116 struct qcm_process_device *qpd) 117 { 118 return dqm->dev->kfd2kgd->program_sh_mem_settings( 119 dqm->dev->kgd, qpd->vmid, 120 qpd->sh_mem_config, 121 qpd->sh_mem_ape1_base, 122 qpd->sh_mem_ape1_limit, 123 qpd->sh_mem_bases); 124 } 125 126 static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q) 127 { 128 struct kfd_dev *dev = qpd->dqm->dev; 129 130 if (!KFD_IS_SOC15(dev->device_info->asic_family)) { 131 /* On pre-SOC15 chips we need to use the queue ID to 132 * preserve the user mode ABI. 133 */ 134 q->doorbell_id = q->properties.queue_id; 135 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 136 /* For SDMA queues on SOC15, use static doorbell 137 * assignments based on the engine and queue. 138 */ 139 q->doorbell_id = dev->shared_resources.sdma_doorbell 140 [q->properties.sdma_engine_id] 141 [q->properties.sdma_queue_id]; 142 } else { 143 /* For CP queues on SOC15 reserve a free doorbell ID */ 144 unsigned int found; 145 146 found = find_first_zero_bit(qpd->doorbell_bitmap, 147 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 148 if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) { 149 pr_debug("No doorbells available"); 150 return -EBUSY; 151 } 152 set_bit(found, qpd->doorbell_bitmap); 153 q->doorbell_id = found; 154 } 155 156 q->properties.doorbell_off = 157 kfd_doorbell_id_to_offset(dev, q->process, 158 q->doorbell_id); 159 160 return 0; 161 } 162 163 static void deallocate_doorbell(struct qcm_process_device *qpd, 164 struct queue *q) 165 { 166 unsigned int old; 167 struct kfd_dev *dev = qpd->dqm->dev; 168 169 if (!KFD_IS_SOC15(dev->device_info->asic_family) || 170 q->properties.type == KFD_QUEUE_TYPE_SDMA) 171 return; 172 173 old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap); 174 WARN_ON(!old); 175 } 176 177 static int allocate_vmid(struct device_queue_manager *dqm, 178 struct qcm_process_device *qpd, 179 struct queue *q) 180 { 181 int bit, allocated_vmid; 182 183 if (dqm->vmid_bitmap == 0) 184 return -ENOMEM; 185 186 bit = ffs(dqm->vmid_bitmap) - 1; 187 dqm->vmid_bitmap &= ~(1 << bit); 188 189 allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd; 190 pr_debug("vmid allocation %d\n", allocated_vmid); 191 qpd->vmid = allocated_vmid; 192 q->properties.vmid = allocated_vmid; 193 194 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 195 program_sh_mem_settings(dqm, qpd); 196 197 /* qpd->page_table_base is set earlier when register_process() 198 * is called, i.e. when the first queue is created. 199 */ 200 dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd, 201 qpd->vmid, 202 qpd->page_table_base); 203 /* invalidate the VM context after pasid and vmid mapping is set up */ 204 kfd_flush_tlb(qpd_to_pdd(qpd)); 205 206 return 0; 207 } 208 209 static int flush_texture_cache_nocpsch(struct kfd_dev *kdev, 210 struct qcm_process_device *qpd) 211 { 212 const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf; 213 int ret; 214 215 if (!qpd->ib_kaddr) 216 return -ENOMEM; 217 218 ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr); 219 if (ret) 220 return ret; 221 222 return kdev->kfd2kgd->submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid, 223 qpd->ib_base, (uint32_t *)qpd->ib_kaddr, 224 pmf->release_mem_size / sizeof(uint32_t)); 225 } 226 227 static void deallocate_vmid(struct device_queue_manager *dqm, 228 struct qcm_process_device *qpd, 229 struct queue *q) 230 { 231 int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd; 232 233 /* On GFX v7, CP doesn't flush TC at dequeue */ 234 if (q->device->device_info->asic_family == CHIP_HAWAII) 235 if (flush_texture_cache_nocpsch(q->device, qpd)) 236 pr_err("Failed to flush TC\n"); 237 238 kfd_flush_tlb(qpd_to_pdd(qpd)); 239 240 /* Release the vmid mapping */ 241 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 242 243 dqm->vmid_bitmap |= (1 << bit); 244 qpd->vmid = 0; 245 q->properties.vmid = 0; 246 } 247 248 static int create_queue_nocpsch(struct device_queue_manager *dqm, 249 struct queue *q, 250 struct qcm_process_device *qpd) 251 { 252 int retval; 253 254 print_queue(q); 255 256 dqm_lock(dqm); 257 258 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 259 pr_warn("Can't create new usermode queue because %d queues were already created\n", 260 dqm->total_queue_count); 261 retval = -EPERM; 262 goto out_unlock; 263 } 264 265 if (list_empty(&qpd->queues_list)) { 266 retval = allocate_vmid(dqm, qpd, q); 267 if (retval) 268 goto out_unlock; 269 } 270 q->properties.vmid = qpd->vmid; 271 /* 272 * Eviction state logic: we only mark active queues as evicted 273 * to avoid the overhead of restoring inactive queues later 274 */ 275 if (qpd->evicted) 276 q->properties.is_evicted = (q->properties.queue_size > 0 && 277 q->properties.queue_percent > 0 && 278 q->properties.queue_address != 0); 279 280 q->properties.tba_addr = qpd->tba_addr; 281 q->properties.tma_addr = qpd->tma_addr; 282 283 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 284 retval = create_compute_queue_nocpsch(dqm, q, qpd); 285 else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 286 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 287 else 288 retval = -EINVAL; 289 290 if (retval) { 291 if (list_empty(&qpd->queues_list)) 292 deallocate_vmid(dqm, qpd, q); 293 goto out_unlock; 294 } 295 296 list_add(&q->list, &qpd->queues_list); 297 qpd->queue_count++; 298 if (q->properties.is_active) 299 dqm->queue_count++; 300 301 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 302 dqm->sdma_queue_count++; 303 304 /* 305 * Unconditionally increment this counter, regardless of the queue's 306 * type or whether the queue is active. 307 */ 308 dqm->total_queue_count++; 309 pr_debug("Total of %d queues are accountable so far\n", 310 dqm->total_queue_count); 311 312 out_unlock: 313 dqm_unlock(dqm); 314 return retval; 315 } 316 317 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 318 { 319 bool set; 320 int pipe, bit, i; 321 322 set = false; 323 324 for (pipe = dqm->next_pipe_to_allocate, i = 0; 325 i < get_pipes_per_mec(dqm); 326 pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) { 327 328 if (!is_pipe_enabled(dqm, 0, pipe)) 329 continue; 330 331 if (dqm->allocated_queues[pipe] != 0) { 332 bit = ffs(dqm->allocated_queues[pipe]) - 1; 333 dqm->allocated_queues[pipe] &= ~(1 << bit); 334 q->pipe = pipe; 335 q->queue = bit; 336 set = true; 337 break; 338 } 339 } 340 341 if (!set) 342 return -EBUSY; 343 344 pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue); 345 /* horizontal hqd allocation */ 346 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm); 347 348 return 0; 349 } 350 351 static inline void deallocate_hqd(struct device_queue_manager *dqm, 352 struct queue *q) 353 { 354 dqm->allocated_queues[q->pipe] |= (1 << q->queue); 355 } 356 357 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 358 struct queue *q, 359 struct qcm_process_device *qpd) 360 { 361 struct mqd_manager *mqd_mgr; 362 int retval; 363 364 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 365 if (!mqd_mgr) 366 return -ENOMEM; 367 368 retval = allocate_hqd(dqm, q); 369 if (retval) 370 return retval; 371 372 retval = allocate_doorbell(qpd, q); 373 if (retval) 374 goto out_deallocate_hqd; 375 376 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 377 &q->gart_mqd_addr, &q->properties); 378 if (retval) 379 goto out_deallocate_doorbell; 380 381 pr_debug("Loading mqd to hqd on pipe %d, queue %d\n", 382 q->pipe, q->queue); 383 384 dqm->dev->kfd2kgd->set_scratch_backing_va( 385 dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid); 386 387 if (!q->properties.is_active) 388 return 0; 389 390 if (WARN(q->process->mm != current->mm, 391 "should only run in user thread")) 392 retval = -EFAULT; 393 else 394 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue, 395 &q->properties, current->mm); 396 if (retval) 397 goto out_uninit_mqd; 398 399 return 0; 400 401 out_uninit_mqd: 402 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 403 out_deallocate_doorbell: 404 deallocate_doorbell(qpd, q); 405 out_deallocate_hqd: 406 deallocate_hqd(dqm, q); 407 408 return retval; 409 } 410 411 /* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked 412 * to avoid asynchronized access 413 */ 414 static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, 415 struct qcm_process_device *qpd, 416 struct queue *q) 417 { 418 int retval; 419 struct mqd_manager *mqd_mgr; 420 421 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 422 get_mqd_type_from_queue_type(q->properties.type)); 423 if (!mqd_mgr) 424 return -ENOMEM; 425 426 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 427 deallocate_hqd(dqm, q); 428 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 429 dqm->sdma_queue_count--; 430 deallocate_sdma_queue(dqm, q->sdma_id); 431 } else { 432 pr_debug("q->properties.type %d is invalid\n", 433 q->properties.type); 434 return -EINVAL; 435 } 436 dqm->total_queue_count--; 437 438 deallocate_doorbell(qpd, q); 439 440 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 441 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 442 KFD_UNMAP_LATENCY_MS, 443 q->pipe, q->queue); 444 if (retval == -ETIME) 445 qpd->reset_wavefronts = true; 446 447 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 448 449 list_del(&q->list); 450 if (list_empty(&qpd->queues_list)) { 451 if (qpd->reset_wavefronts) { 452 pr_warn("Resetting wave fronts (nocpsch) on dev %p\n", 453 dqm->dev); 454 /* dbgdev_wave_reset_wavefronts has to be called before 455 * deallocate_vmid(), i.e. when vmid is still in use. 456 */ 457 dbgdev_wave_reset_wavefronts(dqm->dev, 458 qpd->pqm->process); 459 qpd->reset_wavefronts = false; 460 } 461 462 deallocate_vmid(dqm, qpd, q); 463 } 464 qpd->queue_count--; 465 if (q->properties.is_active) 466 dqm->queue_count--; 467 468 return retval; 469 } 470 471 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 472 struct qcm_process_device *qpd, 473 struct queue *q) 474 { 475 int retval; 476 477 dqm_lock(dqm); 478 retval = destroy_queue_nocpsch_locked(dqm, qpd, q); 479 dqm_unlock(dqm); 480 481 return retval; 482 } 483 484 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 485 { 486 int retval; 487 struct mqd_manager *mqd_mgr; 488 struct kfd_process_device *pdd; 489 bool prev_active = false; 490 491 dqm_lock(dqm); 492 pdd = kfd_get_process_device_data(q->device, q->process); 493 if (!pdd) { 494 retval = -ENODEV; 495 goto out_unlock; 496 } 497 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 498 get_mqd_type_from_queue_type(q->properties.type)); 499 if (!mqd_mgr) { 500 retval = -ENOMEM; 501 goto out_unlock; 502 } 503 /* 504 * Eviction state logic: we only mark active queues as evicted 505 * to avoid the overhead of restoring inactive queues later 506 */ 507 if (pdd->qpd.evicted) 508 q->properties.is_evicted = (q->properties.queue_size > 0 && 509 q->properties.queue_percent > 0 && 510 q->properties.queue_address != 0); 511 512 /* Save previous activity state for counters */ 513 prev_active = q->properties.is_active; 514 515 /* Make sure the queue is unmapped before updating the MQD */ 516 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 517 retval = unmap_queues_cpsch(dqm, 518 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 519 if (retval) { 520 pr_err("unmap queue failed\n"); 521 goto out_unlock; 522 } 523 } else if (prev_active && 524 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 525 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 526 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 527 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 528 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 529 if (retval) { 530 pr_err("destroy mqd failed\n"); 531 goto out_unlock; 532 } 533 } 534 535 retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties); 536 537 /* 538 * check active state vs. the previous state and modify 539 * counter accordingly. map_queues_cpsch uses the 540 * dqm->queue_count to determine whether a new runlist must be 541 * uploaded. 542 */ 543 if (q->properties.is_active && !prev_active) 544 dqm->queue_count++; 545 else if (!q->properties.is_active && prev_active) 546 dqm->queue_count--; 547 548 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) 549 retval = map_queues_cpsch(dqm); 550 else if (q->properties.is_active && 551 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE || 552 q->properties.type == KFD_QUEUE_TYPE_SDMA)) { 553 if (WARN(q->process->mm != current->mm, 554 "should only run in user thread")) 555 retval = -EFAULT; 556 else 557 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 558 q->pipe, q->queue, 559 &q->properties, current->mm); 560 } 561 562 out_unlock: 563 dqm_unlock(dqm); 564 return retval; 565 } 566 567 static struct mqd_manager *get_mqd_manager( 568 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 569 { 570 struct mqd_manager *mqd_mgr; 571 572 if (WARN_ON(type >= KFD_MQD_TYPE_MAX)) 573 return NULL; 574 575 pr_debug("mqd type %d\n", type); 576 577 mqd_mgr = dqm->mqd_mgrs[type]; 578 if (!mqd_mgr) { 579 mqd_mgr = mqd_manager_init(type, dqm->dev); 580 if (!mqd_mgr) 581 pr_err("mqd manager is NULL"); 582 dqm->mqd_mgrs[type] = mqd_mgr; 583 } 584 585 return mqd_mgr; 586 } 587 588 static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, 589 struct qcm_process_device *qpd) 590 { 591 struct queue *q; 592 struct mqd_manager *mqd_mgr; 593 struct kfd_process_device *pdd; 594 int retval = 0; 595 596 dqm_lock(dqm); 597 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 598 goto out; 599 600 pdd = qpd_to_pdd(qpd); 601 pr_info_ratelimited("Evicting PASID %u queues\n", 602 pdd->process->pasid); 603 604 /* unactivate all active queues on the qpd */ 605 list_for_each_entry(q, &qpd->queues_list, list) { 606 if (!q->properties.is_active) 607 continue; 608 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 609 get_mqd_type_from_queue_type(q->properties.type)); 610 if (!mqd_mgr) { /* should not be here */ 611 pr_err("Cannot evict queue, mqd mgr is NULL\n"); 612 retval = -ENOMEM; 613 goto out; 614 } 615 q->properties.is_evicted = true; 616 q->properties.is_active = false; 617 retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd, 618 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN, 619 KFD_UNMAP_LATENCY_MS, q->pipe, q->queue); 620 if (retval) 621 goto out; 622 dqm->queue_count--; 623 } 624 625 out: 626 dqm_unlock(dqm); 627 return retval; 628 } 629 630 static int evict_process_queues_cpsch(struct device_queue_manager *dqm, 631 struct qcm_process_device *qpd) 632 { 633 struct queue *q; 634 struct kfd_process_device *pdd; 635 int retval = 0; 636 637 dqm_lock(dqm); 638 if (qpd->evicted++ > 0) /* already evicted, do nothing */ 639 goto out; 640 641 pdd = qpd_to_pdd(qpd); 642 pr_info_ratelimited("Evicting PASID %u queues\n", 643 pdd->process->pasid); 644 645 /* unactivate all active queues on the qpd */ 646 list_for_each_entry(q, &qpd->queues_list, list) { 647 if (!q->properties.is_active) 648 continue; 649 q->properties.is_evicted = true; 650 q->properties.is_active = false; 651 dqm->queue_count--; 652 } 653 retval = execute_queues_cpsch(dqm, 654 qpd->is_debug ? 655 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 656 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 657 658 out: 659 dqm_unlock(dqm); 660 return retval; 661 } 662 663 static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, 664 struct qcm_process_device *qpd) 665 { 666 struct mm_struct *mm = NULL; 667 struct queue *q; 668 struct mqd_manager *mqd_mgr; 669 struct kfd_process_device *pdd; 670 uint32_t pd_base; 671 int retval = 0; 672 673 pdd = qpd_to_pdd(qpd); 674 /* Retrieve PD base */ 675 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 676 677 dqm_lock(dqm); 678 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 679 goto out; 680 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 681 qpd->evicted--; 682 goto out; 683 } 684 685 pr_info_ratelimited("Restoring PASID %u queues\n", 686 pdd->process->pasid); 687 688 /* Update PD Base in QPD */ 689 qpd->page_table_base = pd_base; 690 pr_debug("Updated PD address to 0x%08x\n", pd_base); 691 692 if (!list_empty(&qpd->queues_list)) { 693 dqm->dev->kfd2kgd->set_vm_context_page_table_base( 694 dqm->dev->kgd, 695 qpd->vmid, 696 qpd->page_table_base); 697 kfd_flush_tlb(pdd); 698 } 699 700 /* Take a safe reference to the mm_struct, which may otherwise 701 * disappear even while the kfd_process is still referenced. 702 */ 703 mm = get_task_mm(pdd->process->lead_thread); 704 if (!mm) { 705 retval = -EFAULT; 706 goto out; 707 } 708 709 /* activate all active queues on the qpd */ 710 list_for_each_entry(q, &qpd->queues_list, list) { 711 if (!q->properties.is_evicted) 712 continue; 713 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 714 get_mqd_type_from_queue_type(q->properties.type)); 715 if (!mqd_mgr) { /* should not be here */ 716 pr_err("Cannot restore queue, mqd mgr is NULL\n"); 717 retval = -ENOMEM; 718 goto out; 719 } 720 q->properties.is_evicted = false; 721 q->properties.is_active = true; 722 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, 723 q->queue, &q->properties, mm); 724 if (retval) 725 goto out; 726 dqm->queue_count++; 727 } 728 qpd->evicted = 0; 729 out: 730 if (mm) 731 mmput(mm); 732 dqm_unlock(dqm); 733 return retval; 734 } 735 736 static int restore_process_queues_cpsch(struct device_queue_manager *dqm, 737 struct qcm_process_device *qpd) 738 { 739 struct queue *q; 740 struct kfd_process_device *pdd; 741 uint32_t pd_base; 742 int retval = 0; 743 744 pdd = qpd_to_pdd(qpd); 745 /* Retrieve PD base */ 746 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 747 748 dqm_lock(dqm); 749 if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */ 750 goto out; 751 if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */ 752 qpd->evicted--; 753 goto out; 754 } 755 756 pr_info_ratelimited("Restoring PASID %u queues\n", 757 pdd->process->pasid); 758 759 /* Update PD Base in QPD */ 760 qpd->page_table_base = pd_base; 761 pr_debug("Updated PD address to 0x%08x\n", pd_base); 762 763 /* activate all active queues on the qpd */ 764 list_for_each_entry(q, &qpd->queues_list, list) { 765 if (!q->properties.is_evicted) 766 continue; 767 q->properties.is_evicted = false; 768 q->properties.is_active = true; 769 dqm->queue_count++; 770 } 771 retval = execute_queues_cpsch(dqm, 772 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 773 if (!retval) 774 qpd->evicted = 0; 775 out: 776 dqm_unlock(dqm); 777 return retval; 778 } 779 780 static int register_process(struct device_queue_manager *dqm, 781 struct qcm_process_device *qpd) 782 { 783 struct device_process_node *n; 784 struct kfd_process_device *pdd; 785 uint32_t pd_base; 786 int retval; 787 788 n = kzalloc(sizeof(*n), GFP_KERNEL); 789 if (!n) 790 return -ENOMEM; 791 792 n->qpd = qpd; 793 794 pdd = qpd_to_pdd(qpd); 795 /* Retrieve PD base */ 796 pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm); 797 798 dqm_lock(dqm); 799 list_add(&n->list, &dqm->queues); 800 801 /* Update PD Base in QPD */ 802 qpd->page_table_base = pd_base; 803 804 retval = dqm->asic_ops.update_qpd(dqm, qpd); 805 806 if (dqm->processes_count++ == 0) 807 dqm->dev->kfd2kgd->set_compute_idle(dqm->dev->kgd, false); 808 809 dqm_unlock(dqm); 810 811 return retval; 812 } 813 814 static int unregister_process(struct device_queue_manager *dqm, 815 struct qcm_process_device *qpd) 816 { 817 int retval; 818 struct device_process_node *cur, *next; 819 820 pr_debug("qpd->queues_list is %s\n", 821 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 822 823 retval = 0; 824 dqm_lock(dqm); 825 826 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 827 if (qpd == cur->qpd) { 828 list_del(&cur->list); 829 kfree(cur); 830 if (--dqm->processes_count == 0) 831 dqm->dev->kfd2kgd->set_compute_idle( 832 dqm->dev->kgd, true); 833 goto out; 834 } 835 } 836 /* qpd not found in dqm list */ 837 retval = 1; 838 out: 839 dqm_unlock(dqm); 840 return retval; 841 } 842 843 static int 844 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 845 unsigned int vmid) 846 { 847 uint32_t pasid_mapping; 848 849 pasid_mapping = (pasid == 0) ? 0 : 850 (uint32_t)pasid | 851 ATC_VMID_PASID_MAPPING_VALID; 852 853 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 854 dqm->dev->kgd, pasid_mapping, 855 vmid); 856 } 857 858 static void init_interrupts(struct device_queue_manager *dqm) 859 { 860 unsigned int i; 861 862 for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++) 863 if (is_pipe_enabled(dqm, 0, i)) 864 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i); 865 } 866 867 static int initialize_nocpsch(struct device_queue_manager *dqm) 868 { 869 int pipe, queue; 870 871 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 872 873 dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm), 874 sizeof(unsigned int), GFP_KERNEL); 875 if (!dqm->allocated_queues) 876 return -ENOMEM; 877 878 mutex_init(&dqm->lock_hidden); 879 INIT_LIST_HEAD(&dqm->queues); 880 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 881 dqm->sdma_queue_count = 0; 882 883 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 884 int pipe_offset = pipe * get_queues_per_pipe(dqm); 885 886 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) 887 if (test_bit(pipe_offset + queue, 888 dqm->dev->shared_resources.queue_bitmap)) 889 dqm->allocated_queues[pipe] |= 1 << queue; 890 } 891 892 dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1; 893 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 894 895 return 0; 896 } 897 898 static void uninitialize(struct device_queue_manager *dqm) 899 { 900 int i; 901 902 WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 903 904 kfree(dqm->allocated_queues); 905 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 906 kfree(dqm->mqd_mgrs[i]); 907 mutex_destroy(&dqm->lock_hidden); 908 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 909 } 910 911 static int start_nocpsch(struct device_queue_manager *dqm) 912 { 913 init_interrupts(dqm); 914 return pm_init(&dqm->packets, dqm); 915 } 916 917 static int stop_nocpsch(struct device_queue_manager *dqm) 918 { 919 pm_uninit(&dqm->packets); 920 return 0; 921 } 922 923 static int allocate_sdma_queue(struct device_queue_manager *dqm, 924 unsigned int *sdma_queue_id) 925 { 926 int bit; 927 928 if (dqm->sdma_bitmap == 0) 929 return -ENOMEM; 930 931 bit = ffs(dqm->sdma_bitmap) - 1; 932 dqm->sdma_bitmap &= ~(1 << bit); 933 *sdma_queue_id = bit; 934 935 return 0; 936 } 937 938 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 939 unsigned int sdma_queue_id) 940 { 941 if (sdma_queue_id >= get_num_sdma_queues(dqm)) 942 return; 943 dqm->sdma_bitmap |= (1 << sdma_queue_id); 944 } 945 946 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 947 struct queue *q, 948 struct qcm_process_device *qpd) 949 { 950 struct mqd_manager *mqd_mgr; 951 int retval; 952 953 mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 954 if (!mqd_mgr) 955 return -ENOMEM; 956 957 retval = allocate_sdma_queue(dqm, &q->sdma_id); 958 if (retval) 959 return retval; 960 961 q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm); 962 q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm); 963 964 retval = allocate_doorbell(qpd, q); 965 if (retval) 966 goto out_deallocate_sdma_queue; 967 968 pr_debug("SDMA id is: %d\n", q->sdma_id); 969 pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id); 970 pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); 971 972 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 973 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 974 &q->gart_mqd_addr, &q->properties); 975 if (retval) 976 goto out_deallocate_doorbell; 977 978 retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties, 979 NULL); 980 if (retval) 981 goto out_uninit_mqd; 982 983 return 0; 984 985 out_uninit_mqd: 986 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 987 out_deallocate_doorbell: 988 deallocate_doorbell(qpd, q); 989 out_deallocate_sdma_queue: 990 deallocate_sdma_queue(dqm, q->sdma_id); 991 992 return retval; 993 } 994 995 /* 996 * Device Queue Manager implementation for cp scheduler 997 */ 998 999 static int set_sched_resources(struct device_queue_manager *dqm) 1000 { 1001 int i, mec; 1002 struct scheduling_resources res; 1003 1004 res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap; 1005 1006 res.queue_mask = 0; 1007 for (i = 0; i < KGD_MAX_QUEUES; ++i) { 1008 mec = (i / dqm->dev->shared_resources.num_queue_per_pipe) 1009 / dqm->dev->shared_resources.num_pipe_per_mec; 1010 1011 if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap)) 1012 continue; 1013 1014 /* only acquire queues from the first MEC */ 1015 if (mec > 0) 1016 continue; 1017 1018 /* This situation may be hit in the future if a new HW 1019 * generation exposes more than 64 queues. If so, the 1020 * definition of res.queue_mask needs updating 1021 */ 1022 if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) { 1023 pr_err("Invalid queue enabled by amdgpu: %d\n", i); 1024 break; 1025 } 1026 1027 res.queue_mask |= (1ull << i); 1028 } 1029 res.gws_mask = res.oac_mask = res.gds_heap_base = 1030 res.gds_heap_size = 0; 1031 1032 pr_debug("Scheduling resources:\n" 1033 "vmid mask: 0x%8X\n" 1034 "queue mask: 0x%8llX\n", 1035 res.vmid_mask, res.queue_mask); 1036 1037 return pm_send_set_resources(&dqm->packets, &res); 1038 } 1039 1040 static int initialize_cpsch(struct device_queue_manager *dqm) 1041 { 1042 pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm)); 1043 1044 mutex_init(&dqm->lock_hidden); 1045 INIT_LIST_HEAD(&dqm->queues); 1046 dqm->queue_count = dqm->processes_count = 0; 1047 dqm->sdma_queue_count = 0; 1048 dqm->active_runlist = false; 1049 dqm->sdma_bitmap = (1 << get_num_sdma_queues(dqm)) - 1; 1050 1051 INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception); 1052 1053 return 0; 1054 } 1055 1056 static int start_cpsch(struct device_queue_manager *dqm) 1057 { 1058 int retval; 1059 1060 retval = 0; 1061 1062 retval = pm_init(&dqm->packets, dqm); 1063 if (retval) 1064 goto fail_packet_manager_init; 1065 1066 retval = set_sched_resources(dqm); 1067 if (retval) 1068 goto fail_set_sched_resources; 1069 1070 pr_debug("Allocating fence memory\n"); 1071 1072 /* allocate fence memory on the gart */ 1073 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 1074 &dqm->fence_mem); 1075 1076 if (retval) 1077 goto fail_allocate_vidmem; 1078 1079 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 1080 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 1081 1082 init_interrupts(dqm); 1083 1084 dqm_lock(dqm); 1085 /* clear hang status when driver try to start the hw scheduler */ 1086 dqm->is_hws_hang = false; 1087 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1088 dqm_unlock(dqm); 1089 1090 return 0; 1091 fail_allocate_vidmem: 1092 fail_set_sched_resources: 1093 pm_uninit(&dqm->packets); 1094 fail_packet_manager_init: 1095 return retval; 1096 } 1097 1098 static int stop_cpsch(struct device_queue_manager *dqm) 1099 { 1100 dqm_lock(dqm); 1101 unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1102 dqm_unlock(dqm); 1103 1104 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 1105 pm_uninit(&dqm->packets); 1106 1107 return 0; 1108 } 1109 1110 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 1111 struct kernel_queue *kq, 1112 struct qcm_process_device *qpd) 1113 { 1114 dqm_lock(dqm); 1115 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1116 pr_warn("Can't create new kernel queue because %d queues were already created\n", 1117 dqm->total_queue_count); 1118 dqm_unlock(dqm); 1119 return -EPERM; 1120 } 1121 1122 /* 1123 * Unconditionally increment this counter, regardless of the queue's 1124 * type or whether the queue is active. 1125 */ 1126 dqm->total_queue_count++; 1127 pr_debug("Total of %d queues are accountable so far\n", 1128 dqm->total_queue_count); 1129 1130 list_add(&kq->list, &qpd->priv_queue_list); 1131 dqm->queue_count++; 1132 qpd->is_debug = true; 1133 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1134 dqm_unlock(dqm); 1135 1136 return 0; 1137 } 1138 1139 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 1140 struct kernel_queue *kq, 1141 struct qcm_process_device *qpd) 1142 { 1143 dqm_lock(dqm); 1144 list_del(&kq->list); 1145 dqm->queue_count--; 1146 qpd->is_debug = false; 1147 execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1148 /* 1149 * Unconditionally decrement this counter, regardless of the queue's 1150 * type. 1151 */ 1152 dqm->total_queue_count--; 1153 pr_debug("Total of %d queues are accountable so far\n", 1154 dqm->total_queue_count); 1155 dqm_unlock(dqm); 1156 } 1157 1158 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 1159 struct qcm_process_device *qpd) 1160 { 1161 int retval; 1162 struct mqd_manager *mqd_mgr; 1163 1164 retval = 0; 1165 1166 dqm_lock(dqm); 1167 1168 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 1169 pr_warn("Can't create new usermode queue because %d queues were already created\n", 1170 dqm->total_queue_count); 1171 retval = -EPERM; 1172 goto out_unlock; 1173 } 1174 1175 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1176 retval = allocate_sdma_queue(dqm, &q->sdma_id); 1177 if (retval) 1178 goto out_unlock; 1179 q->properties.sdma_queue_id = 1180 q->sdma_id / get_num_sdma_engines(dqm); 1181 q->properties.sdma_engine_id = 1182 q->sdma_id % get_num_sdma_engines(dqm); 1183 } 1184 1185 retval = allocate_doorbell(qpd, q); 1186 if (retval) 1187 goto out_deallocate_sdma_queue; 1188 1189 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1190 get_mqd_type_from_queue_type(q->properties.type)); 1191 1192 if (!mqd_mgr) { 1193 retval = -ENOMEM; 1194 goto out_deallocate_doorbell; 1195 } 1196 /* 1197 * Eviction state logic: we only mark active queues as evicted 1198 * to avoid the overhead of restoring inactive queues later 1199 */ 1200 if (qpd->evicted) 1201 q->properties.is_evicted = (q->properties.queue_size > 0 && 1202 q->properties.queue_percent > 0 && 1203 q->properties.queue_address != 0); 1204 1205 dqm->asic_ops.init_sdma_vm(dqm, q, qpd); 1206 1207 q->properties.tba_addr = qpd->tba_addr; 1208 q->properties.tma_addr = qpd->tma_addr; 1209 retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj, 1210 &q->gart_mqd_addr, &q->properties); 1211 if (retval) 1212 goto out_deallocate_doorbell; 1213 1214 list_add(&q->list, &qpd->queues_list); 1215 qpd->queue_count++; 1216 if (q->properties.is_active) { 1217 dqm->queue_count++; 1218 retval = execute_queues_cpsch(dqm, 1219 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1220 } 1221 1222 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1223 dqm->sdma_queue_count++; 1224 /* 1225 * Unconditionally increment this counter, regardless of the queue's 1226 * type or whether the queue is active. 1227 */ 1228 dqm->total_queue_count++; 1229 1230 pr_debug("Total of %d queues are accountable so far\n", 1231 dqm->total_queue_count); 1232 1233 dqm_unlock(dqm); 1234 return retval; 1235 1236 out_deallocate_doorbell: 1237 deallocate_doorbell(qpd, q); 1238 out_deallocate_sdma_queue: 1239 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1240 deallocate_sdma_queue(dqm, q->sdma_id); 1241 out_unlock: 1242 dqm_unlock(dqm); 1243 1244 return retval; 1245 } 1246 1247 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 1248 unsigned int fence_value, 1249 unsigned int timeout_ms) 1250 { 1251 unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies; 1252 1253 while (*fence_addr != fence_value) { 1254 if (time_after(jiffies, end_jiffies)) { 1255 pr_err("qcm fence wait loop timeout expired\n"); 1256 /* In HWS case, this is used to halt the driver thread 1257 * in order not to mess up CP states before doing 1258 * scandumps for FW debugging. 1259 */ 1260 while (halt_if_hws_hang) 1261 schedule(); 1262 1263 return -ETIME; 1264 } 1265 schedule(); 1266 } 1267 1268 return 0; 1269 } 1270 1271 static int unmap_sdma_queues(struct device_queue_manager *dqm, 1272 unsigned int sdma_engine) 1273 { 1274 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 1275 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false, 1276 sdma_engine); 1277 } 1278 1279 /* dqm->lock mutex has to be locked before calling this function */ 1280 static int map_queues_cpsch(struct device_queue_manager *dqm) 1281 { 1282 int retval; 1283 1284 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) 1285 return 0; 1286 1287 if (dqm->active_runlist) 1288 return 0; 1289 1290 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1291 if (retval) { 1292 pr_err("failed to execute runlist\n"); 1293 return retval; 1294 } 1295 dqm->active_runlist = true; 1296 1297 return retval; 1298 } 1299 1300 /* dqm->lock mutex has to be locked before calling this function */ 1301 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1302 enum kfd_unmap_queues_filter filter, 1303 uint32_t filter_param) 1304 { 1305 int retval = 0; 1306 1307 if (dqm->is_hws_hang) 1308 return -EIO; 1309 if (!dqm->active_runlist) 1310 return retval; 1311 1312 pr_debug("Before destroying queues, sdma queue count is : %u\n", 1313 dqm->sdma_queue_count); 1314 1315 if (dqm->sdma_queue_count > 0) { 1316 unmap_sdma_queues(dqm, 0); 1317 unmap_sdma_queues(dqm, 1); 1318 } 1319 1320 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 1321 filter, filter_param, false, 0); 1322 if (retval) 1323 return retval; 1324 1325 *dqm->fence_addr = KFD_FENCE_INIT; 1326 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 1327 KFD_FENCE_COMPLETED); 1328 /* should be timed out */ 1329 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 1330 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 1331 if (retval) 1332 return retval; 1333 1334 pm_release_ib(&dqm->packets); 1335 dqm->active_runlist = false; 1336 1337 return retval; 1338 } 1339 1340 /* dqm->lock mutex has to be locked before calling this function */ 1341 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1342 enum kfd_unmap_queues_filter filter, 1343 uint32_t filter_param) 1344 { 1345 int retval; 1346 1347 if (dqm->is_hws_hang) 1348 return -EIO; 1349 retval = unmap_queues_cpsch(dqm, filter, filter_param); 1350 if (retval) { 1351 pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); 1352 dqm->is_hws_hang = true; 1353 schedule_work(&dqm->hw_exception_work); 1354 return retval; 1355 } 1356 1357 return map_queues_cpsch(dqm); 1358 } 1359 1360 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1361 struct qcm_process_device *qpd, 1362 struct queue *q) 1363 { 1364 int retval; 1365 struct mqd_manager *mqd_mgr; 1366 bool preempt_all_queues; 1367 1368 preempt_all_queues = false; 1369 1370 retval = 0; 1371 1372 /* remove queue from list to prevent rescheduling after preemption */ 1373 dqm_lock(dqm); 1374 1375 if (qpd->is_debug) { 1376 /* 1377 * error, currently we do not allow to destroy a queue 1378 * of a currently debugged process 1379 */ 1380 retval = -EBUSY; 1381 goto failed_try_destroy_debugged_queue; 1382 1383 } 1384 1385 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1386 get_mqd_type_from_queue_type(q->properties.type)); 1387 if (!mqd_mgr) { 1388 retval = -ENOMEM; 1389 goto failed; 1390 } 1391 1392 deallocate_doorbell(qpd, q); 1393 1394 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1395 dqm->sdma_queue_count--; 1396 deallocate_sdma_queue(dqm, q->sdma_id); 1397 } 1398 1399 list_del(&q->list); 1400 qpd->queue_count--; 1401 if (q->properties.is_active) { 1402 dqm->queue_count--; 1403 retval = execute_queues_cpsch(dqm, 1404 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1405 if (retval == -ETIME) 1406 qpd->reset_wavefronts = true; 1407 } 1408 1409 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1410 1411 /* 1412 * Unconditionally decrement this counter, regardless of the queue's 1413 * type 1414 */ 1415 dqm->total_queue_count--; 1416 pr_debug("Total of %d queues are accountable so far\n", 1417 dqm->total_queue_count); 1418 1419 dqm_unlock(dqm); 1420 1421 return retval; 1422 1423 failed: 1424 failed_try_destroy_debugged_queue: 1425 1426 dqm_unlock(dqm); 1427 return retval; 1428 } 1429 1430 /* 1431 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1432 * stay in user mode. 1433 */ 1434 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1435 /* APE1 limit is inclusive and 64K aligned. */ 1436 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1437 1438 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1439 struct qcm_process_device *qpd, 1440 enum cache_policy default_policy, 1441 enum cache_policy alternate_policy, 1442 void __user *alternate_aperture_base, 1443 uint64_t alternate_aperture_size) 1444 { 1445 bool retval = true; 1446 1447 if (!dqm->asic_ops.set_cache_memory_policy) 1448 return retval; 1449 1450 dqm_lock(dqm); 1451 1452 if (alternate_aperture_size == 0) { 1453 /* base > limit disables APE1 */ 1454 qpd->sh_mem_ape1_base = 1; 1455 qpd->sh_mem_ape1_limit = 0; 1456 } else { 1457 /* 1458 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1459 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1460 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1461 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1462 * Verify that the base and size parameters can be 1463 * represented in this format and convert them. 1464 * Additionally restrict APE1 to user-mode addresses. 1465 */ 1466 1467 uint64_t base = (uintptr_t)alternate_aperture_base; 1468 uint64_t limit = base + alternate_aperture_size - 1; 1469 1470 if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 || 1471 (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) { 1472 retval = false; 1473 goto out; 1474 } 1475 1476 qpd->sh_mem_ape1_base = base >> 16; 1477 qpd->sh_mem_ape1_limit = limit >> 16; 1478 } 1479 1480 retval = dqm->asic_ops.set_cache_memory_policy( 1481 dqm, 1482 qpd, 1483 default_policy, 1484 alternate_policy, 1485 alternate_aperture_base, 1486 alternate_aperture_size); 1487 1488 if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1489 program_sh_mem_settings(dqm, qpd); 1490 1491 pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1492 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1493 qpd->sh_mem_ape1_limit); 1494 1495 out: 1496 dqm_unlock(dqm); 1497 return retval; 1498 } 1499 1500 static int set_trap_handler(struct device_queue_manager *dqm, 1501 struct qcm_process_device *qpd, 1502 uint64_t tba_addr, 1503 uint64_t tma_addr) 1504 { 1505 uint64_t *tma; 1506 1507 if (dqm->dev->cwsr_enabled) { 1508 /* Jump from CWSR trap handler to user trap */ 1509 tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); 1510 tma[0] = tba_addr; 1511 tma[1] = tma_addr; 1512 } else { 1513 qpd->tba_addr = tba_addr; 1514 qpd->tma_addr = tma_addr; 1515 } 1516 1517 return 0; 1518 } 1519 1520 static int process_termination_nocpsch(struct device_queue_manager *dqm, 1521 struct qcm_process_device *qpd) 1522 { 1523 struct queue *q, *next; 1524 struct device_process_node *cur, *next_dpn; 1525 int retval = 0; 1526 1527 dqm_lock(dqm); 1528 1529 /* Clear all user mode queues */ 1530 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1531 int ret; 1532 1533 ret = destroy_queue_nocpsch_locked(dqm, qpd, q); 1534 if (ret) 1535 retval = ret; 1536 } 1537 1538 /* Unregister process */ 1539 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1540 if (qpd == cur->qpd) { 1541 list_del(&cur->list); 1542 kfree(cur); 1543 dqm->processes_count--; 1544 break; 1545 } 1546 } 1547 1548 dqm_unlock(dqm); 1549 return retval; 1550 } 1551 1552 1553 static int process_termination_cpsch(struct device_queue_manager *dqm, 1554 struct qcm_process_device *qpd) 1555 { 1556 int retval; 1557 struct queue *q, *next; 1558 struct kernel_queue *kq, *kq_next; 1559 struct mqd_manager *mqd_mgr; 1560 struct device_process_node *cur, *next_dpn; 1561 enum kfd_unmap_queues_filter filter = 1562 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES; 1563 1564 retval = 0; 1565 1566 dqm_lock(dqm); 1567 1568 /* Clean all kernel queues */ 1569 list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) { 1570 list_del(&kq->list); 1571 dqm->queue_count--; 1572 qpd->is_debug = false; 1573 dqm->total_queue_count--; 1574 filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES; 1575 } 1576 1577 /* Clear all user mode queues */ 1578 list_for_each_entry(q, &qpd->queues_list, list) { 1579 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 1580 dqm->sdma_queue_count--; 1581 deallocate_sdma_queue(dqm, q->sdma_id); 1582 } 1583 1584 if (q->properties.is_active) 1585 dqm->queue_count--; 1586 1587 dqm->total_queue_count--; 1588 } 1589 1590 /* Unregister process */ 1591 list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) { 1592 if (qpd == cur->qpd) { 1593 list_del(&cur->list); 1594 kfree(cur); 1595 dqm->processes_count--; 1596 break; 1597 } 1598 } 1599 1600 retval = execute_queues_cpsch(dqm, filter, 0); 1601 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 1602 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); 1603 dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process); 1604 qpd->reset_wavefronts = false; 1605 } 1606 1607 /* lastly, free mqd resources */ 1608 list_for_each_entry_safe(q, next, &qpd->queues_list, list) { 1609 mqd_mgr = dqm->ops.get_mqd_manager(dqm, 1610 get_mqd_type_from_queue_type(q->properties.type)); 1611 if (!mqd_mgr) { 1612 retval = -ENOMEM; 1613 goto out; 1614 } 1615 list_del(&q->list); 1616 qpd->queue_count--; 1617 mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj); 1618 } 1619 1620 out: 1621 dqm_unlock(dqm); 1622 return retval; 1623 } 1624 1625 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1626 { 1627 struct device_queue_manager *dqm; 1628 1629 pr_debug("Loading device queue manager\n"); 1630 1631 dqm = kzalloc(sizeof(*dqm), GFP_KERNEL); 1632 if (!dqm) 1633 return NULL; 1634 1635 switch (dev->device_info->asic_family) { 1636 /* HWS is not available on Hawaii. */ 1637 case CHIP_HAWAII: 1638 /* HWS depends on CWSR for timely dequeue. CWSR is not 1639 * available on Tonga. 1640 * 1641 * FIXME: This argument also applies to Kaveri. 1642 */ 1643 case CHIP_TONGA: 1644 dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS; 1645 break; 1646 default: 1647 dqm->sched_policy = sched_policy; 1648 break; 1649 } 1650 1651 dqm->dev = dev; 1652 switch (dqm->sched_policy) { 1653 case KFD_SCHED_POLICY_HWS: 1654 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1655 /* initialize dqm for cp scheduling */ 1656 dqm->ops.create_queue = create_queue_cpsch; 1657 dqm->ops.initialize = initialize_cpsch; 1658 dqm->ops.start = start_cpsch; 1659 dqm->ops.stop = stop_cpsch; 1660 dqm->ops.destroy_queue = destroy_queue_cpsch; 1661 dqm->ops.update_queue = update_queue; 1662 dqm->ops.get_mqd_manager = get_mqd_manager; 1663 dqm->ops.register_process = register_process; 1664 dqm->ops.unregister_process = unregister_process; 1665 dqm->ops.uninitialize = uninitialize; 1666 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1667 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1668 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1669 dqm->ops.set_trap_handler = set_trap_handler; 1670 dqm->ops.process_termination = process_termination_cpsch; 1671 dqm->ops.evict_process_queues = evict_process_queues_cpsch; 1672 dqm->ops.restore_process_queues = restore_process_queues_cpsch; 1673 break; 1674 case KFD_SCHED_POLICY_NO_HWS: 1675 /* initialize dqm for no cp scheduling */ 1676 dqm->ops.start = start_nocpsch; 1677 dqm->ops.stop = stop_nocpsch; 1678 dqm->ops.create_queue = create_queue_nocpsch; 1679 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1680 dqm->ops.update_queue = update_queue; 1681 dqm->ops.get_mqd_manager = get_mqd_manager; 1682 dqm->ops.register_process = register_process; 1683 dqm->ops.unregister_process = unregister_process; 1684 dqm->ops.initialize = initialize_nocpsch; 1685 dqm->ops.uninitialize = uninitialize; 1686 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1687 dqm->ops.set_trap_handler = set_trap_handler; 1688 dqm->ops.process_termination = process_termination_nocpsch; 1689 dqm->ops.evict_process_queues = evict_process_queues_nocpsch; 1690 dqm->ops.restore_process_queues = 1691 restore_process_queues_nocpsch; 1692 break; 1693 default: 1694 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy); 1695 goto out_free; 1696 } 1697 1698 switch (dev->device_info->asic_family) { 1699 case CHIP_CARRIZO: 1700 device_queue_manager_init_vi(&dqm->asic_ops); 1701 break; 1702 1703 case CHIP_KAVERI: 1704 device_queue_manager_init_cik(&dqm->asic_ops); 1705 break; 1706 1707 case CHIP_HAWAII: 1708 device_queue_manager_init_cik_hawaii(&dqm->asic_ops); 1709 break; 1710 1711 case CHIP_TONGA: 1712 case CHIP_FIJI: 1713 case CHIP_POLARIS10: 1714 case CHIP_POLARIS11: 1715 device_queue_manager_init_vi_tonga(&dqm->asic_ops); 1716 break; 1717 1718 case CHIP_VEGA10: 1719 case CHIP_RAVEN: 1720 device_queue_manager_init_v9(&dqm->asic_ops); 1721 break; 1722 default: 1723 WARN(1, "Unexpected ASIC family %u", 1724 dev->device_info->asic_family); 1725 goto out_free; 1726 } 1727 1728 if (!dqm->ops.initialize(dqm)) 1729 return dqm; 1730 1731 out_free: 1732 kfree(dqm); 1733 return NULL; 1734 } 1735 1736 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1737 { 1738 dqm->ops.uninitialize(dqm); 1739 kfree(dqm); 1740 } 1741 1742 int kfd_process_vm_fault(struct device_queue_manager *dqm, 1743 unsigned int pasid) 1744 { 1745 struct kfd_process_device *pdd; 1746 struct kfd_process *p = kfd_lookup_process_by_pasid(pasid); 1747 int ret = 0; 1748 1749 if (!p) 1750 return -EINVAL; 1751 pdd = kfd_get_process_device_data(dqm->dev, p); 1752 if (pdd) 1753 ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd); 1754 kfd_unref_process(p); 1755 1756 return ret; 1757 } 1758 1759 static void kfd_process_hw_exception(struct work_struct *work) 1760 { 1761 struct device_queue_manager *dqm = container_of(work, 1762 struct device_queue_manager, hw_exception_work); 1763 dqm->dev->kfd2kgd->gpu_recover(dqm->dev->kgd); 1764 } 1765 1766 #if defined(CONFIG_DEBUG_FS) 1767 1768 static void seq_reg_dump(struct seq_file *m, 1769 uint32_t (*dump)[2], uint32_t n_regs) 1770 { 1771 uint32_t i, count; 1772 1773 for (i = 0, count = 0; i < n_regs; i++) { 1774 if (count == 0 || 1775 dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) { 1776 seq_printf(m, "%s %08x: %08x", 1777 i ? "\n" : "", 1778 dump[i][0], dump[i][1]); 1779 count = 7; 1780 } else { 1781 seq_printf(m, " %08x", dump[i][1]); 1782 count--; 1783 } 1784 } 1785 1786 seq_puts(m, "\n"); 1787 } 1788 1789 int dqm_debugfs_hqds(struct seq_file *m, void *data) 1790 { 1791 struct device_queue_manager *dqm = data; 1792 uint32_t (*dump)[2], n_regs; 1793 int pipe, queue; 1794 int r = 0; 1795 1796 r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd, 1797 KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs); 1798 if (!r) { 1799 seq_printf(m, " HIQ on MEC %d Pipe %d Queue %d\n", 1800 KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1, 1801 KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm), 1802 KFD_CIK_HIQ_QUEUE); 1803 seq_reg_dump(m, dump, n_regs); 1804 1805 kfree(dump); 1806 } 1807 1808 for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { 1809 int pipe_offset = pipe * get_queues_per_pipe(dqm); 1810 1811 for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) { 1812 if (!test_bit(pipe_offset + queue, 1813 dqm->dev->shared_resources.queue_bitmap)) 1814 continue; 1815 1816 r = dqm->dev->kfd2kgd->hqd_dump( 1817 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1818 if (r) 1819 break; 1820 1821 seq_printf(m, " CP Pipe %d, Queue %d\n", 1822 pipe, queue); 1823 seq_reg_dump(m, dump, n_regs); 1824 1825 kfree(dump); 1826 } 1827 } 1828 1829 for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) { 1830 for (queue = 0; queue < KFD_SDMA_QUEUES_PER_ENGINE; queue++) { 1831 r = dqm->dev->kfd2kgd->hqd_sdma_dump( 1832 dqm->dev->kgd, pipe, queue, &dump, &n_regs); 1833 if (r) 1834 break; 1835 1836 seq_printf(m, " SDMA Engine %d, RLC %d\n", 1837 pipe, queue); 1838 seq_reg_dump(m, dump, n_regs); 1839 1840 kfree(dump); 1841 } 1842 } 1843 1844 return r; 1845 } 1846 1847 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm) 1848 { 1849 int r = 0; 1850 1851 dqm_lock(dqm); 1852 dqm->active_runlist = true; 1853 r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1854 dqm_unlock(dqm); 1855 1856 return r; 1857 } 1858 1859 #endif 1860