1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/slab.h> 25 #include <linux/list.h> 26 #include <linux/types.h> 27 #include <linux/printk.h> 28 #include <linux/bitops.h> 29 #include <linux/sched.h> 30 #include "kfd_priv.h" 31 #include "kfd_device_queue_manager.h" 32 #include "kfd_mqd_manager.h" 33 #include "cik_regs.h" 34 #include "kfd_kernel_queue.h" 35 36 /* Size of the per-pipe EOP queue */ 37 #define CIK_HPD_EOP_BYTES_LOG2 11 38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 39 40 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 41 unsigned int pasid, unsigned int vmid); 42 43 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 44 struct queue *q, 45 struct qcm_process_device *qpd); 46 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); 48 static int destroy_queues_cpsch(struct device_queue_manager *dqm, 49 bool preempt_static_queues, bool lock); 50 51 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 52 struct queue *q, 53 struct qcm_process_device *qpd); 54 55 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 56 unsigned int sdma_queue_id); 57 58 static inline 59 enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type) 60 { 61 if (type == KFD_QUEUE_TYPE_SDMA) 62 return KFD_MQD_TYPE_SDMA; 63 return KFD_MQD_TYPE_CP; 64 } 65 66 unsigned int get_first_pipe(struct device_queue_manager *dqm) 67 { 68 BUG_ON(!dqm || !dqm->dev); 69 return dqm->dev->shared_resources.first_compute_pipe; 70 } 71 72 unsigned int get_pipes_num(struct device_queue_manager *dqm) 73 { 74 BUG_ON(!dqm || !dqm->dev); 75 return dqm->dev->shared_resources.compute_pipe_count; 76 } 77 78 static inline unsigned int get_pipes_num_cpsch(void) 79 { 80 return PIPE_PER_ME_CP_SCHEDULING; 81 } 82 83 void program_sh_mem_settings(struct device_queue_manager *dqm, 84 struct qcm_process_device *qpd) 85 { 86 return dqm->dev->kfd2kgd->program_sh_mem_settings( 87 dqm->dev->kgd, qpd->vmid, 88 qpd->sh_mem_config, 89 qpd->sh_mem_ape1_base, 90 qpd->sh_mem_ape1_limit, 91 qpd->sh_mem_bases); 92 } 93 94 static int allocate_vmid(struct device_queue_manager *dqm, 95 struct qcm_process_device *qpd, 96 struct queue *q) 97 { 98 int bit, allocated_vmid; 99 100 if (dqm->vmid_bitmap == 0) 101 return -ENOMEM; 102 103 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); 104 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 105 106 /* Kaveri kfd vmid's starts from vmid 8 */ 107 allocated_vmid = bit + KFD_VMID_START_OFFSET; 108 pr_debug("kfd: vmid allocation %d\n", allocated_vmid); 109 qpd->vmid = allocated_vmid; 110 q->properties.vmid = allocated_vmid; 111 112 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 113 program_sh_mem_settings(dqm, qpd); 114 115 return 0; 116 } 117 118 static void deallocate_vmid(struct device_queue_manager *dqm, 119 struct qcm_process_device *qpd, 120 struct queue *q) 121 { 122 int bit = qpd->vmid - KFD_VMID_START_OFFSET; 123 124 /* Release the vmid mapping */ 125 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 126 127 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 128 qpd->vmid = 0; 129 q->properties.vmid = 0; 130 } 131 132 static int create_queue_nocpsch(struct device_queue_manager *dqm, 133 struct queue *q, 134 struct qcm_process_device *qpd, 135 int *allocated_vmid) 136 { 137 int retval; 138 139 BUG_ON(!dqm || !q || !qpd || !allocated_vmid); 140 141 pr_debug("kfd: In func %s\n", __func__); 142 print_queue(q); 143 144 mutex_lock(&dqm->lock); 145 146 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 147 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", 148 dqm->total_queue_count); 149 mutex_unlock(&dqm->lock); 150 return -EPERM; 151 } 152 153 if (list_empty(&qpd->queues_list)) { 154 retval = allocate_vmid(dqm, qpd, q); 155 if (retval != 0) { 156 mutex_unlock(&dqm->lock); 157 return retval; 158 } 159 } 160 *allocated_vmid = qpd->vmid; 161 q->properties.vmid = qpd->vmid; 162 163 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) 164 retval = create_compute_queue_nocpsch(dqm, q, qpd); 165 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 166 retval = create_sdma_queue_nocpsch(dqm, q, qpd); 167 168 if (retval != 0) { 169 if (list_empty(&qpd->queues_list)) { 170 deallocate_vmid(dqm, qpd, q); 171 *allocated_vmid = 0; 172 } 173 mutex_unlock(&dqm->lock); 174 return retval; 175 } 176 177 list_add(&q->list, &qpd->queues_list); 178 if (q->properties.is_active) 179 dqm->queue_count++; 180 181 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 182 dqm->sdma_queue_count++; 183 184 /* 185 * Unconditionally increment this counter, regardless of the queue's 186 * type or whether the queue is active. 187 */ 188 dqm->total_queue_count++; 189 pr_debug("Total of %d queues are accountable so far\n", 190 dqm->total_queue_count); 191 192 mutex_unlock(&dqm->lock); 193 return 0; 194 } 195 196 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 197 { 198 bool set; 199 int pipe, bit, i; 200 201 set = false; 202 203 for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm); 204 pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) { 205 if (dqm->allocated_queues[pipe] != 0) { 206 bit = find_first_bit( 207 (unsigned long *)&dqm->allocated_queues[pipe], 208 QUEUES_PER_PIPE); 209 210 clear_bit(bit, 211 (unsigned long *)&dqm->allocated_queues[pipe]); 212 q->pipe = pipe; 213 q->queue = bit; 214 set = true; 215 break; 216 } 217 } 218 219 if (!set) 220 return -EBUSY; 221 222 pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", 223 __func__, q->pipe, q->queue); 224 /* horizontal hqd allocation */ 225 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm); 226 227 return 0; 228 } 229 230 static inline void deallocate_hqd(struct device_queue_manager *dqm, 231 struct queue *q) 232 { 233 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); 234 } 235 236 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 237 struct queue *q, 238 struct qcm_process_device *qpd) 239 { 240 int retval; 241 struct mqd_manager *mqd; 242 243 BUG_ON(!dqm || !q || !qpd); 244 245 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 246 if (mqd == NULL) 247 return -ENOMEM; 248 249 retval = allocate_hqd(dqm, q); 250 if (retval != 0) 251 return retval; 252 253 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 254 &q->gart_mqd_addr, &q->properties); 255 if (retval != 0) { 256 deallocate_hqd(dqm, q); 257 return retval; 258 } 259 260 pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", 261 q->pipe, 262 q->queue); 263 264 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, 265 q->queue, (uint32_t __user *) q->properties.write_ptr); 266 if (retval != 0) { 267 deallocate_hqd(dqm, q); 268 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 269 return retval; 270 } 271 272 return 0; 273 } 274 275 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 276 struct qcm_process_device *qpd, 277 struct queue *q) 278 { 279 int retval; 280 struct mqd_manager *mqd; 281 282 BUG_ON(!dqm || !q || !q->mqd || !qpd); 283 284 retval = 0; 285 286 pr_debug("kfd: In Func %s\n", __func__); 287 288 mutex_lock(&dqm->lock); 289 290 if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) { 291 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 292 if (mqd == NULL) { 293 retval = -ENOMEM; 294 goto out; 295 } 296 deallocate_hqd(dqm, q); 297 } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) { 298 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 299 if (mqd == NULL) { 300 retval = -ENOMEM; 301 goto out; 302 } 303 dqm->sdma_queue_count--; 304 deallocate_sdma_queue(dqm, q->sdma_id); 305 } else { 306 pr_debug("q->properties.type is invalid (%d)\n", 307 q->properties.type); 308 retval = -EINVAL; 309 goto out; 310 } 311 312 retval = mqd->destroy_mqd(mqd, q->mqd, 313 KFD_PREEMPT_TYPE_WAVEFRONT_RESET, 314 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 315 q->pipe, q->queue); 316 317 if (retval != 0) 318 goto out; 319 320 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 321 322 list_del(&q->list); 323 if (list_empty(&qpd->queues_list)) 324 deallocate_vmid(dqm, qpd, q); 325 if (q->properties.is_active) 326 dqm->queue_count--; 327 328 /* 329 * Unconditionally decrement this counter, regardless of the queue's 330 * type 331 */ 332 dqm->total_queue_count--; 333 pr_debug("Total of %d queues are accountable so far\n", 334 dqm->total_queue_count); 335 336 out: 337 mutex_unlock(&dqm->lock); 338 return retval; 339 } 340 341 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 342 { 343 int retval; 344 struct mqd_manager *mqd; 345 bool prev_active = false; 346 347 BUG_ON(!dqm || !q || !q->mqd); 348 349 mutex_lock(&dqm->lock); 350 mqd = dqm->ops.get_mqd_manager(dqm, 351 get_mqd_type_from_queue_type(q->properties.type)); 352 if (mqd == NULL) { 353 mutex_unlock(&dqm->lock); 354 return -ENOMEM; 355 } 356 357 if (q->properties.is_active) 358 prev_active = true; 359 360 /* 361 * 362 * check active state vs. the previous state 363 * and modify counter accordingly 364 */ 365 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 366 if ((q->properties.is_active) && (!prev_active)) 367 dqm->queue_count++; 368 else if ((!q->properties.is_active) && (prev_active)) 369 dqm->queue_count--; 370 371 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) 372 retval = execute_queues_cpsch(dqm, false); 373 374 mutex_unlock(&dqm->lock); 375 return retval; 376 } 377 378 static struct mqd_manager *get_mqd_manager_nocpsch( 379 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 380 { 381 struct mqd_manager *mqd; 382 383 BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); 384 385 pr_debug("kfd: In func %s mqd type %d\n", __func__, type); 386 387 mqd = dqm->mqds[type]; 388 if (!mqd) { 389 mqd = mqd_manager_init(type, dqm->dev); 390 if (mqd == NULL) 391 pr_err("kfd: mqd manager is NULL"); 392 dqm->mqds[type] = mqd; 393 } 394 395 return mqd; 396 } 397 398 static int register_process_nocpsch(struct device_queue_manager *dqm, 399 struct qcm_process_device *qpd) 400 { 401 struct device_process_node *n; 402 int retval; 403 404 BUG_ON(!dqm || !qpd); 405 406 pr_debug("kfd: In func %s\n", __func__); 407 408 n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); 409 if (!n) 410 return -ENOMEM; 411 412 n->qpd = qpd; 413 414 mutex_lock(&dqm->lock); 415 list_add(&n->list, &dqm->queues); 416 417 retval = dqm->ops_asic_specific.register_process(dqm, qpd); 418 419 dqm->processes_count++; 420 421 mutex_unlock(&dqm->lock); 422 423 return retval; 424 } 425 426 static int unregister_process_nocpsch(struct device_queue_manager *dqm, 427 struct qcm_process_device *qpd) 428 { 429 int retval; 430 struct device_process_node *cur, *next; 431 432 BUG_ON(!dqm || !qpd); 433 434 pr_debug("In func %s\n", __func__); 435 436 pr_debug("qpd->queues_list is %s\n", 437 list_empty(&qpd->queues_list) ? "empty" : "not empty"); 438 439 retval = 0; 440 mutex_lock(&dqm->lock); 441 442 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 443 if (qpd == cur->qpd) { 444 list_del(&cur->list); 445 kfree(cur); 446 dqm->processes_count--; 447 goto out; 448 } 449 } 450 /* qpd not found in dqm list */ 451 retval = 1; 452 out: 453 mutex_unlock(&dqm->lock); 454 return retval; 455 } 456 457 static int 458 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 459 unsigned int vmid) 460 { 461 uint32_t pasid_mapping; 462 463 pasid_mapping = (pasid == 0) ? 0 : 464 (uint32_t)pasid | 465 ATC_VMID_PASID_MAPPING_VALID; 466 467 return dqm->dev->kfd2kgd->set_pasid_vmid_mapping( 468 dqm->dev->kgd, pasid_mapping, 469 vmid); 470 } 471 472 int init_pipelines(struct device_queue_manager *dqm, 473 unsigned int pipes_num, unsigned int first_pipe) 474 { 475 void *hpdptr; 476 struct mqd_manager *mqd; 477 unsigned int i, err, inx; 478 uint64_t pipe_hpd_addr; 479 480 BUG_ON(!dqm || !dqm->dev); 481 482 pr_debug("kfd: In func %s\n", __func__); 483 484 /* 485 * Allocate memory for the HPDs. This is hardware-owned per-pipe data. 486 * The driver never accesses this memory after zeroing it. 487 * It doesn't even have to be saved/restored on suspend/resume 488 * because it contains no data when there are no active queues. 489 */ 490 491 err = kfd_gtt_sa_allocate(dqm->dev, CIK_HPD_EOP_BYTES * pipes_num, 492 &dqm->pipeline_mem); 493 494 if (err) { 495 pr_err("kfd: error allocate vidmem num pipes: %d\n", 496 pipes_num); 497 return -ENOMEM; 498 } 499 500 hpdptr = dqm->pipeline_mem->cpu_ptr; 501 dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr; 502 503 memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num); 504 505 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE); 506 if (mqd == NULL) { 507 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 508 return -ENOMEM; 509 } 510 511 for (i = 0; i < pipes_num; i++) { 512 inx = i + first_pipe; 513 /* 514 * HPD buffer on GTT is allocated by amdkfd, no need to waste 515 * space in GTT for pipelines we don't initialize 516 */ 517 pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES; 518 pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr); 519 /* = log2(bytes/4)-1 */ 520 dqm->dev->kfd2kgd->init_pipeline(dqm->dev->kgd, inx, 521 CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr); 522 } 523 524 return 0; 525 } 526 527 static void init_interrupts(struct device_queue_manager *dqm) 528 { 529 unsigned int i; 530 531 BUG_ON(dqm == NULL); 532 533 for (i = 0 ; i < get_pipes_num(dqm) ; i++) 534 dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, 535 i + get_first_pipe(dqm)); 536 } 537 538 static int init_scheduler(struct device_queue_manager *dqm) 539 { 540 int retval; 541 542 BUG_ON(!dqm); 543 544 pr_debug("kfd: In %s\n", __func__); 545 546 retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); 547 return retval; 548 } 549 550 static int initialize_nocpsch(struct device_queue_manager *dqm) 551 { 552 int i; 553 554 BUG_ON(!dqm); 555 556 pr_debug("kfd: In func %s num of pipes: %d\n", 557 __func__, get_pipes_num(dqm)); 558 559 mutex_init(&dqm->lock); 560 INIT_LIST_HEAD(&dqm->queues); 561 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 562 dqm->sdma_queue_count = 0; 563 dqm->allocated_queues = kcalloc(get_pipes_num(dqm), 564 sizeof(unsigned int), GFP_KERNEL); 565 if (!dqm->allocated_queues) { 566 mutex_destroy(&dqm->lock); 567 return -ENOMEM; 568 } 569 570 for (i = 0; i < get_pipes_num(dqm); i++) 571 dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; 572 573 dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; 574 dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1; 575 576 init_scheduler(dqm); 577 return 0; 578 } 579 580 static void uninitialize_nocpsch(struct device_queue_manager *dqm) 581 { 582 int i; 583 584 BUG_ON(!dqm); 585 586 BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 587 588 kfree(dqm->allocated_queues); 589 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 590 kfree(dqm->mqds[i]); 591 mutex_destroy(&dqm->lock); 592 kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); 593 } 594 595 static int start_nocpsch(struct device_queue_manager *dqm) 596 { 597 init_interrupts(dqm); 598 return 0; 599 } 600 601 static int stop_nocpsch(struct device_queue_manager *dqm) 602 { 603 return 0; 604 } 605 606 static int allocate_sdma_queue(struct device_queue_manager *dqm, 607 unsigned int *sdma_queue_id) 608 { 609 int bit; 610 611 if (dqm->sdma_bitmap == 0) 612 return -ENOMEM; 613 614 bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap, 615 CIK_SDMA_QUEUES); 616 617 clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap); 618 *sdma_queue_id = bit; 619 620 return 0; 621 } 622 623 static void deallocate_sdma_queue(struct device_queue_manager *dqm, 624 unsigned int sdma_queue_id) 625 { 626 if (sdma_queue_id >= CIK_SDMA_QUEUES) 627 return; 628 set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap); 629 } 630 631 static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm, 632 struct queue *q, 633 struct qcm_process_device *qpd) 634 { 635 struct mqd_manager *mqd; 636 int retval; 637 638 mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA); 639 if (!mqd) 640 return -ENOMEM; 641 642 retval = allocate_sdma_queue(dqm, &q->sdma_id); 643 if (retval != 0) 644 return retval; 645 646 q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE; 647 q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM; 648 649 pr_debug("kfd: sdma id is: %d\n", q->sdma_id); 650 pr_debug(" sdma queue id: %d\n", q->properties.sdma_queue_id); 651 pr_debug(" sdma engine id: %d\n", q->properties.sdma_engine_id); 652 653 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 654 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 655 &q->gart_mqd_addr, &q->properties); 656 if (retval != 0) { 657 deallocate_sdma_queue(dqm, q->sdma_id); 658 return retval; 659 } 660 661 retval = mqd->load_mqd(mqd, q->mqd, 0, 662 0, NULL); 663 if (retval != 0) { 664 deallocate_sdma_queue(dqm, q->sdma_id); 665 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 666 return retval; 667 } 668 669 return 0; 670 } 671 672 /* 673 * Device Queue Manager implementation for cp scheduler 674 */ 675 676 static int set_sched_resources(struct device_queue_manager *dqm) 677 { 678 struct scheduling_resources res; 679 unsigned int queue_num, queue_mask; 680 681 BUG_ON(!dqm); 682 683 pr_debug("kfd: In func %s\n", __func__); 684 685 queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE; 686 queue_mask = (1 << queue_num) - 1; 687 res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; 688 res.vmid_mask <<= KFD_VMID_START_OFFSET; 689 res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE); 690 res.gws_mask = res.oac_mask = res.gds_heap_base = 691 res.gds_heap_size = 0; 692 693 pr_debug("kfd: scheduling resources:\n" 694 " vmid mask: 0x%8X\n" 695 " queue mask: 0x%8llX\n", 696 res.vmid_mask, res.queue_mask); 697 698 return pm_send_set_resources(&dqm->packets, &res); 699 } 700 701 static int initialize_cpsch(struct device_queue_manager *dqm) 702 { 703 int retval; 704 705 BUG_ON(!dqm); 706 707 pr_debug("kfd: In func %s num of pipes: %d\n", 708 __func__, get_pipes_num_cpsch()); 709 710 mutex_init(&dqm->lock); 711 INIT_LIST_HEAD(&dqm->queues); 712 dqm->queue_count = dqm->processes_count = 0; 713 dqm->sdma_queue_count = 0; 714 dqm->active_runlist = false; 715 retval = dqm->ops_asic_specific.initialize(dqm); 716 if (retval != 0) 717 goto fail_init_pipelines; 718 719 return 0; 720 721 fail_init_pipelines: 722 mutex_destroy(&dqm->lock); 723 return retval; 724 } 725 726 static int start_cpsch(struct device_queue_manager *dqm) 727 { 728 struct device_process_node *node; 729 int retval; 730 731 BUG_ON(!dqm); 732 733 retval = 0; 734 735 retval = pm_init(&dqm->packets, dqm); 736 if (retval != 0) 737 goto fail_packet_manager_init; 738 739 retval = set_sched_resources(dqm); 740 if (retval != 0) 741 goto fail_set_sched_resources; 742 743 pr_debug("kfd: allocating fence memory\n"); 744 745 /* allocate fence memory on the gart */ 746 retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr), 747 &dqm->fence_mem); 748 749 if (retval != 0) 750 goto fail_allocate_vidmem; 751 752 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 753 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 754 755 init_interrupts(dqm); 756 757 list_for_each_entry(node, &dqm->queues, list) 758 if (node->qpd->pqm->process && dqm->dev) 759 kfd_bind_process_to_device(dqm->dev, 760 node->qpd->pqm->process); 761 762 execute_queues_cpsch(dqm, true); 763 764 return 0; 765 fail_allocate_vidmem: 766 fail_set_sched_resources: 767 pm_uninit(&dqm->packets); 768 fail_packet_manager_init: 769 return retval; 770 } 771 772 static int stop_cpsch(struct device_queue_manager *dqm) 773 { 774 struct device_process_node *node; 775 struct kfd_process_device *pdd; 776 777 BUG_ON(!dqm); 778 779 destroy_queues_cpsch(dqm, true, true); 780 781 list_for_each_entry(node, &dqm->queues, list) { 782 pdd = qpd_to_pdd(node->qpd); 783 pdd->bound = false; 784 } 785 kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); 786 pm_uninit(&dqm->packets); 787 788 return 0; 789 } 790 791 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 792 struct kernel_queue *kq, 793 struct qcm_process_device *qpd) 794 { 795 BUG_ON(!dqm || !kq || !qpd); 796 797 pr_debug("kfd: In func %s\n", __func__); 798 799 mutex_lock(&dqm->lock); 800 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 801 pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n", 802 dqm->total_queue_count); 803 mutex_unlock(&dqm->lock); 804 return -EPERM; 805 } 806 807 /* 808 * Unconditionally increment this counter, regardless of the queue's 809 * type or whether the queue is active. 810 */ 811 dqm->total_queue_count++; 812 pr_debug("Total of %d queues are accountable so far\n", 813 dqm->total_queue_count); 814 815 list_add(&kq->list, &qpd->priv_queue_list); 816 dqm->queue_count++; 817 qpd->is_debug = true; 818 execute_queues_cpsch(dqm, false); 819 mutex_unlock(&dqm->lock); 820 821 return 0; 822 } 823 824 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 825 struct kernel_queue *kq, 826 struct qcm_process_device *qpd) 827 { 828 BUG_ON(!dqm || !kq); 829 830 pr_debug("kfd: In %s\n", __func__); 831 832 mutex_lock(&dqm->lock); 833 /* here we actually preempt the DIQ */ 834 destroy_queues_cpsch(dqm, true, false); 835 list_del(&kq->list); 836 dqm->queue_count--; 837 qpd->is_debug = false; 838 execute_queues_cpsch(dqm, false); 839 /* 840 * Unconditionally decrement this counter, regardless of the queue's 841 * type. 842 */ 843 dqm->total_queue_count--; 844 pr_debug("Total of %d queues are accountable so far\n", 845 dqm->total_queue_count); 846 mutex_unlock(&dqm->lock); 847 } 848 849 static void select_sdma_engine_id(struct queue *q) 850 { 851 static int sdma_id; 852 853 q->sdma_id = sdma_id; 854 sdma_id = (sdma_id + 1) % 2; 855 } 856 857 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 858 struct qcm_process_device *qpd, int *allocate_vmid) 859 { 860 int retval; 861 struct mqd_manager *mqd; 862 863 BUG_ON(!dqm || !q || !qpd); 864 865 retval = 0; 866 867 if (allocate_vmid) 868 *allocate_vmid = 0; 869 870 mutex_lock(&dqm->lock); 871 872 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 873 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", 874 dqm->total_queue_count); 875 retval = -EPERM; 876 goto out; 877 } 878 879 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 880 select_sdma_engine_id(q); 881 882 mqd = dqm->ops.get_mqd_manager(dqm, 883 get_mqd_type_from_queue_type(q->properties.type)); 884 885 if (mqd == NULL) { 886 mutex_unlock(&dqm->lock); 887 return -ENOMEM; 888 } 889 890 dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd); 891 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 892 &q->gart_mqd_addr, &q->properties); 893 if (retval != 0) 894 goto out; 895 896 list_add(&q->list, &qpd->queues_list); 897 if (q->properties.is_active) { 898 dqm->queue_count++; 899 retval = execute_queues_cpsch(dqm, false); 900 } 901 902 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 903 dqm->sdma_queue_count++; 904 /* 905 * Unconditionally increment this counter, regardless of the queue's 906 * type or whether the queue is active. 907 */ 908 dqm->total_queue_count++; 909 910 pr_debug("Total of %d queues are accountable so far\n", 911 dqm->total_queue_count); 912 913 out: 914 mutex_unlock(&dqm->lock); 915 return retval; 916 } 917 918 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 919 unsigned int fence_value, 920 unsigned long timeout) 921 { 922 BUG_ON(!fence_addr); 923 timeout += jiffies; 924 925 while (*fence_addr != fence_value) { 926 if (time_after(jiffies, timeout)) { 927 pr_err("kfd: qcm fence wait loop timeout expired\n"); 928 return -ETIME; 929 } 930 schedule(); 931 } 932 933 return 0; 934 } 935 936 static int destroy_sdma_queues(struct device_queue_manager *dqm, 937 unsigned int sdma_engine) 938 { 939 return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA, 940 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false, 941 sdma_engine); 942 } 943 944 static int destroy_queues_cpsch(struct device_queue_manager *dqm, 945 bool preempt_static_queues, bool lock) 946 { 947 int retval; 948 enum kfd_preempt_type_filter preempt_type; 949 struct kfd_process_device *pdd; 950 951 BUG_ON(!dqm); 952 953 retval = 0; 954 955 if (lock) 956 mutex_lock(&dqm->lock); 957 if (!dqm->active_runlist) 958 goto out; 959 960 pr_debug("kfd: Before destroying queues, sdma queue count is : %u\n", 961 dqm->sdma_queue_count); 962 963 if (dqm->sdma_queue_count > 0) { 964 destroy_sdma_queues(dqm, 0); 965 destroy_sdma_queues(dqm, 1); 966 } 967 968 preempt_type = preempt_static_queues ? 969 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES : 970 KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES; 971 972 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 973 preempt_type, 0, false, 0); 974 if (retval != 0) 975 goto out; 976 977 *dqm->fence_addr = KFD_FENCE_INIT; 978 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 979 KFD_FENCE_COMPLETED); 980 /* should be timed out */ 981 retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 982 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 983 if (retval != 0) { 984 pdd = kfd_get_process_device_data(dqm->dev, 985 kfd_get_process(current)); 986 pdd->reset_wavefronts = true; 987 goto out; 988 } 989 pm_release_ib(&dqm->packets); 990 dqm->active_runlist = false; 991 992 out: 993 if (lock) 994 mutex_unlock(&dqm->lock); 995 return retval; 996 } 997 998 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) 999 { 1000 int retval; 1001 1002 BUG_ON(!dqm); 1003 1004 if (lock) 1005 mutex_lock(&dqm->lock); 1006 1007 retval = destroy_queues_cpsch(dqm, false, false); 1008 if (retval != 0) { 1009 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); 1010 goto out; 1011 } 1012 1013 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { 1014 retval = 0; 1015 goto out; 1016 } 1017 1018 if (dqm->active_runlist) { 1019 retval = 0; 1020 goto out; 1021 } 1022 1023 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 1024 if (retval != 0) { 1025 pr_err("kfd: failed to execute runlist"); 1026 goto out; 1027 } 1028 dqm->active_runlist = true; 1029 1030 out: 1031 if (lock) 1032 mutex_unlock(&dqm->lock); 1033 return retval; 1034 } 1035 1036 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 1037 struct qcm_process_device *qpd, 1038 struct queue *q) 1039 { 1040 int retval; 1041 struct mqd_manager *mqd; 1042 bool preempt_all_queues; 1043 1044 BUG_ON(!dqm || !qpd || !q); 1045 1046 preempt_all_queues = false; 1047 1048 retval = 0; 1049 1050 /* remove queue from list to prevent rescheduling after preemption */ 1051 mutex_lock(&dqm->lock); 1052 1053 if (qpd->is_debug) { 1054 /* 1055 * error, currently we do not allow to destroy a queue 1056 * of a currently debugged process 1057 */ 1058 retval = -EBUSY; 1059 goto failed_try_destroy_debugged_queue; 1060 1061 } 1062 1063 mqd = dqm->ops.get_mqd_manager(dqm, 1064 get_mqd_type_from_queue_type(q->properties.type)); 1065 if (!mqd) { 1066 retval = -ENOMEM; 1067 goto failed; 1068 } 1069 1070 if (q->properties.type == KFD_QUEUE_TYPE_SDMA) 1071 dqm->sdma_queue_count--; 1072 1073 list_del(&q->list); 1074 if (q->properties.is_active) 1075 dqm->queue_count--; 1076 1077 execute_queues_cpsch(dqm, false); 1078 1079 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 1080 1081 /* 1082 * Unconditionally decrement this counter, regardless of the queue's 1083 * type 1084 */ 1085 dqm->total_queue_count--; 1086 pr_debug("Total of %d queues are accountable so far\n", 1087 dqm->total_queue_count); 1088 1089 mutex_unlock(&dqm->lock); 1090 1091 return 0; 1092 1093 failed: 1094 failed_try_destroy_debugged_queue: 1095 1096 mutex_unlock(&dqm->lock); 1097 return retval; 1098 } 1099 1100 /* 1101 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1102 * stay in user mode. 1103 */ 1104 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1105 /* APE1 limit is inclusive and 64K aligned. */ 1106 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1107 1108 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1109 struct qcm_process_device *qpd, 1110 enum cache_policy default_policy, 1111 enum cache_policy alternate_policy, 1112 void __user *alternate_aperture_base, 1113 uint64_t alternate_aperture_size) 1114 { 1115 bool retval; 1116 1117 pr_debug("kfd: In func %s\n", __func__); 1118 1119 mutex_lock(&dqm->lock); 1120 1121 if (alternate_aperture_size == 0) { 1122 /* base > limit disables APE1 */ 1123 qpd->sh_mem_ape1_base = 1; 1124 qpd->sh_mem_ape1_limit = 0; 1125 } else { 1126 /* 1127 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1128 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1129 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1130 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1131 * Verify that the base and size parameters can be 1132 * represented in this format and convert them. 1133 * Additionally restrict APE1 to user-mode addresses. 1134 */ 1135 1136 uint64_t base = (uintptr_t)alternate_aperture_base; 1137 uint64_t limit = base + alternate_aperture_size - 1; 1138 1139 if (limit <= base) 1140 goto out; 1141 1142 if ((base & APE1_FIXED_BITS_MASK) != 0) 1143 goto out; 1144 1145 if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) 1146 goto out; 1147 1148 qpd->sh_mem_ape1_base = base >> 16; 1149 qpd->sh_mem_ape1_limit = limit >> 16; 1150 } 1151 1152 retval = dqm->ops_asic_specific.set_cache_memory_policy( 1153 dqm, 1154 qpd, 1155 default_policy, 1156 alternate_policy, 1157 alternate_aperture_base, 1158 alternate_aperture_size); 1159 1160 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1161 program_sh_mem_settings(dqm, qpd); 1162 1163 pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1164 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1165 qpd->sh_mem_ape1_limit); 1166 1167 mutex_unlock(&dqm->lock); 1168 return retval; 1169 1170 out: 1171 mutex_unlock(&dqm->lock); 1172 return false; 1173 } 1174 1175 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1176 { 1177 struct device_queue_manager *dqm; 1178 1179 BUG_ON(!dev); 1180 1181 pr_debug("kfd: loading device queue manager\n"); 1182 1183 dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); 1184 if (!dqm) 1185 return NULL; 1186 1187 dqm->dev = dev; 1188 switch (sched_policy) { 1189 case KFD_SCHED_POLICY_HWS: 1190 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1191 /* initialize dqm for cp scheduling */ 1192 dqm->ops.create_queue = create_queue_cpsch; 1193 dqm->ops.initialize = initialize_cpsch; 1194 dqm->ops.start = start_cpsch; 1195 dqm->ops.stop = stop_cpsch; 1196 dqm->ops.destroy_queue = destroy_queue_cpsch; 1197 dqm->ops.update_queue = update_queue; 1198 dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; 1199 dqm->ops.register_process = register_process_nocpsch; 1200 dqm->ops.unregister_process = unregister_process_nocpsch; 1201 dqm->ops.uninitialize = uninitialize_nocpsch; 1202 dqm->ops.create_kernel_queue = create_kernel_queue_cpsch; 1203 dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch; 1204 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1205 break; 1206 case KFD_SCHED_POLICY_NO_HWS: 1207 /* initialize dqm for no cp scheduling */ 1208 dqm->ops.start = start_nocpsch; 1209 dqm->ops.stop = stop_nocpsch; 1210 dqm->ops.create_queue = create_queue_nocpsch; 1211 dqm->ops.destroy_queue = destroy_queue_nocpsch; 1212 dqm->ops.update_queue = update_queue; 1213 dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch; 1214 dqm->ops.register_process = register_process_nocpsch; 1215 dqm->ops.unregister_process = unregister_process_nocpsch; 1216 dqm->ops.initialize = initialize_nocpsch; 1217 dqm->ops.uninitialize = uninitialize_nocpsch; 1218 dqm->ops.set_cache_memory_policy = set_cache_memory_policy; 1219 break; 1220 default: 1221 BUG(); 1222 break; 1223 } 1224 1225 switch (dev->device_info->asic_family) { 1226 case CHIP_CARRIZO: 1227 device_queue_manager_init_vi(&dqm->ops_asic_specific); 1228 break; 1229 1230 case CHIP_KAVERI: 1231 device_queue_manager_init_cik(&dqm->ops_asic_specific); 1232 break; 1233 } 1234 1235 if (dqm->ops.initialize(dqm) != 0) { 1236 kfree(dqm); 1237 return NULL; 1238 } 1239 1240 return dqm; 1241 } 1242 1243 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1244 { 1245 BUG_ON(!dqm); 1246 1247 dqm->ops.uninitialize(dqm); 1248 kfree(dqm); 1249 } 1250