1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 */ 23 24 #include <linux/slab.h> 25 #include <linux/list.h> 26 #include <linux/types.h> 27 #include <linux/printk.h> 28 #include <linux/bitops.h> 29 #include "kfd_priv.h" 30 #include "kfd_device_queue_manager.h" 31 #include "kfd_mqd_manager.h" 32 #include "cik_regs.h" 33 #include "kfd_kernel_queue.h" 34 #include "../../radeon/cik_reg.h" 35 36 /* Size of the per-pipe EOP queue */ 37 #define CIK_HPD_EOP_BYTES_LOG2 11 38 #define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2) 39 40 static bool is_mem_initialized; 41 42 static int init_memory(struct device_queue_manager *dqm); 43 static int set_pasid_vmid_mapping(struct device_queue_manager *dqm, 44 unsigned int pasid, unsigned int vmid); 45 46 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 47 struct queue *q, 48 struct qcm_process_device *qpd); 49 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock); 50 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock); 51 52 53 static inline unsigned int get_pipes_num(struct device_queue_manager *dqm) 54 { 55 BUG_ON(!dqm || !dqm->dev); 56 return dqm->dev->shared_resources.compute_pipe_count; 57 } 58 59 static inline unsigned int get_first_pipe(struct device_queue_manager *dqm) 60 { 61 BUG_ON(!dqm); 62 return dqm->dev->shared_resources.first_compute_pipe; 63 } 64 65 static inline unsigned int get_pipes_num_cpsch(void) 66 { 67 return PIPE_PER_ME_CP_SCHEDULING; 68 } 69 70 static inline unsigned int 71 get_sh_mem_bases_nybble_64(struct kfd_process_device *pdd) 72 { 73 uint32_t nybble; 74 75 nybble = (pdd->lds_base >> 60) & 0x0E; 76 77 return nybble; 78 79 } 80 81 static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd) 82 { 83 unsigned int shared_base; 84 85 shared_base = (pdd->lds_base >> 16) & 0xFF; 86 87 return shared_base; 88 } 89 90 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble); 91 static void init_process_memory(struct device_queue_manager *dqm, 92 struct qcm_process_device *qpd) 93 { 94 struct kfd_process_device *pdd; 95 unsigned int temp; 96 97 BUG_ON(!dqm || !qpd); 98 99 pdd = qpd_to_pdd(qpd); 100 101 /* check if sh_mem_config register already configured */ 102 if (qpd->sh_mem_config == 0) { 103 qpd->sh_mem_config = 104 ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) | 105 DEFAULT_MTYPE(MTYPE_NONCACHED) | 106 APE1_MTYPE(MTYPE_NONCACHED); 107 qpd->sh_mem_ape1_limit = 0; 108 qpd->sh_mem_ape1_base = 0; 109 } 110 111 if (qpd->pqm->process->is_32bit_user_mode) { 112 temp = get_sh_mem_bases_32(pdd); 113 qpd->sh_mem_bases = SHARED_BASE(temp); 114 qpd->sh_mem_config |= PTR32; 115 } else { 116 temp = get_sh_mem_bases_nybble_64(pdd); 117 qpd->sh_mem_bases = compute_sh_mem_bases_64bit(temp); 118 } 119 120 pr_debug("kfd: is32bit process: %d sh_mem_bases nybble: 0x%X and register 0x%X\n", 121 qpd->pqm->process->is_32bit_user_mode, temp, qpd->sh_mem_bases); 122 } 123 124 static void program_sh_mem_settings(struct device_queue_manager *dqm, 125 struct qcm_process_device *qpd) 126 { 127 return kfd2kgd->program_sh_mem_settings(dqm->dev->kgd, qpd->vmid, 128 qpd->sh_mem_config, 129 qpd->sh_mem_ape1_base, 130 qpd->sh_mem_ape1_limit, 131 qpd->sh_mem_bases); 132 } 133 134 static int allocate_vmid(struct device_queue_manager *dqm, 135 struct qcm_process_device *qpd, 136 struct queue *q) 137 { 138 int bit, allocated_vmid; 139 140 if (dqm->vmid_bitmap == 0) 141 return -ENOMEM; 142 143 bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM); 144 clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 145 146 /* Kaveri kfd vmid's starts from vmid 8 */ 147 allocated_vmid = bit + KFD_VMID_START_OFFSET; 148 pr_debug("kfd: vmid allocation %d\n", allocated_vmid); 149 qpd->vmid = allocated_vmid; 150 q->properties.vmid = allocated_vmid; 151 152 set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid); 153 program_sh_mem_settings(dqm, qpd); 154 155 return 0; 156 } 157 158 static void deallocate_vmid(struct device_queue_manager *dqm, 159 struct qcm_process_device *qpd, 160 struct queue *q) 161 { 162 int bit = qpd->vmid - KFD_VMID_START_OFFSET; 163 164 /* Release the vmid mapping */ 165 set_pasid_vmid_mapping(dqm, 0, qpd->vmid); 166 167 set_bit(bit, (unsigned long *)&dqm->vmid_bitmap); 168 qpd->vmid = 0; 169 q->properties.vmid = 0; 170 } 171 172 static int create_queue_nocpsch(struct device_queue_manager *dqm, 173 struct queue *q, 174 struct qcm_process_device *qpd, 175 int *allocated_vmid) 176 { 177 int retval; 178 179 BUG_ON(!dqm || !q || !qpd || !allocated_vmid); 180 181 pr_debug("kfd: In func %s\n", __func__); 182 print_queue(q); 183 184 mutex_lock(&dqm->lock); 185 186 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 187 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", 188 dqm->total_queue_count); 189 mutex_unlock(&dqm->lock); 190 return -EPERM; 191 } 192 193 if (list_empty(&qpd->queues_list)) { 194 retval = allocate_vmid(dqm, qpd, q); 195 if (retval != 0) { 196 mutex_unlock(&dqm->lock); 197 return retval; 198 } 199 } 200 *allocated_vmid = qpd->vmid; 201 q->properties.vmid = qpd->vmid; 202 203 retval = create_compute_queue_nocpsch(dqm, q, qpd); 204 205 if (retval != 0) { 206 if (list_empty(&qpd->queues_list)) { 207 deallocate_vmid(dqm, qpd, q); 208 *allocated_vmid = 0; 209 } 210 mutex_unlock(&dqm->lock); 211 return retval; 212 } 213 214 list_add(&q->list, &qpd->queues_list); 215 dqm->queue_count++; 216 217 /* 218 * Unconditionally increment this counter, regardless of the queue's 219 * type or whether the queue is active. 220 */ 221 dqm->total_queue_count++; 222 pr_debug("Total of %d queues are accountable so far\n", 223 dqm->total_queue_count); 224 225 mutex_unlock(&dqm->lock); 226 return 0; 227 } 228 229 static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q) 230 { 231 bool set; 232 int pipe, bit; 233 234 set = false; 235 236 for (pipe = dqm->next_pipe_to_allocate; pipe < get_pipes_num(dqm); 237 pipe = (pipe + 1) % get_pipes_num(dqm)) { 238 if (dqm->allocated_queues[pipe] != 0) { 239 bit = find_first_bit( 240 (unsigned long *)&dqm->allocated_queues[pipe], 241 QUEUES_PER_PIPE); 242 243 clear_bit(bit, 244 (unsigned long *)&dqm->allocated_queues[pipe]); 245 q->pipe = pipe; 246 q->queue = bit; 247 set = true; 248 break; 249 } 250 } 251 252 if (set == false) 253 return -EBUSY; 254 255 pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n", 256 __func__, q->pipe, q->queue); 257 /* horizontal hqd allocation */ 258 dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm); 259 260 return 0; 261 } 262 263 static inline void deallocate_hqd(struct device_queue_manager *dqm, 264 struct queue *q) 265 { 266 set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]); 267 } 268 269 static int create_compute_queue_nocpsch(struct device_queue_manager *dqm, 270 struct queue *q, 271 struct qcm_process_device *qpd) 272 { 273 int retval; 274 struct mqd_manager *mqd; 275 276 BUG_ON(!dqm || !q || !qpd); 277 278 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 279 if (mqd == NULL) 280 return -ENOMEM; 281 282 retval = allocate_hqd(dqm, q); 283 if (retval != 0) 284 return retval; 285 286 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 287 &q->gart_mqd_addr, &q->properties); 288 if (retval != 0) { 289 deallocate_hqd(dqm, q); 290 return retval; 291 } 292 293 pr_debug("kfd: loading mqd to hqd on pipe (%d) queue (%d)\n", 294 q->pipe, 295 q->queue); 296 297 retval = mqd->load_mqd(mqd, q->mqd, q->pipe, 298 q->queue, (uint32_t __user *) q->properties.write_ptr); 299 if (retval != 0) { 300 deallocate_hqd(dqm, q); 301 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 302 return retval; 303 } 304 305 return 0; 306 } 307 308 static int destroy_queue_nocpsch(struct device_queue_manager *dqm, 309 struct qcm_process_device *qpd, 310 struct queue *q) 311 { 312 int retval; 313 struct mqd_manager *mqd; 314 315 BUG_ON(!dqm || !q || !q->mqd || !qpd); 316 317 retval = 0; 318 319 pr_debug("kfd: In Func %s\n", __func__); 320 321 mutex_lock(&dqm->lock); 322 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 323 if (mqd == NULL) { 324 retval = -ENOMEM; 325 goto out; 326 } 327 328 retval = mqd->destroy_mqd(mqd, q->mqd, 329 KFD_PREEMPT_TYPE_WAVEFRONT, 330 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS, 331 q->pipe, q->queue); 332 333 if (retval != 0) 334 goto out; 335 336 deallocate_hqd(dqm, q); 337 338 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 339 340 list_del(&q->list); 341 if (list_empty(&qpd->queues_list)) 342 deallocate_vmid(dqm, qpd, q); 343 dqm->queue_count--; 344 345 /* 346 * Unconditionally decrement this counter, regardless of the queue's 347 * type 348 */ 349 dqm->total_queue_count--; 350 pr_debug("Total of %d queues are accountable so far\n", 351 dqm->total_queue_count); 352 353 out: 354 mutex_unlock(&dqm->lock); 355 return retval; 356 } 357 358 static int update_queue(struct device_queue_manager *dqm, struct queue *q) 359 { 360 int retval; 361 struct mqd_manager *mqd; 362 bool prev_active = false; 363 364 BUG_ON(!dqm || !q || !q->mqd); 365 366 mutex_lock(&dqm->lock); 367 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 368 if (mqd == NULL) { 369 mutex_unlock(&dqm->lock); 370 return -ENOMEM; 371 } 372 373 if (q->properties.is_active == true) 374 prev_active = true; 375 376 /* 377 * 378 * check active state vs. the previous state 379 * and modify counter accordingly 380 */ 381 retval = mqd->update_mqd(mqd, q->mqd, &q->properties); 382 if ((q->properties.is_active == true) && (prev_active == false)) 383 dqm->queue_count++; 384 else if ((q->properties.is_active == false) && (prev_active == true)) 385 dqm->queue_count--; 386 387 if (sched_policy != KFD_SCHED_POLICY_NO_HWS) 388 retval = execute_queues_cpsch(dqm, false); 389 390 mutex_unlock(&dqm->lock); 391 return retval; 392 } 393 394 static struct mqd_manager *get_mqd_manager_nocpsch( 395 struct device_queue_manager *dqm, enum KFD_MQD_TYPE type) 396 { 397 struct mqd_manager *mqd; 398 399 BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX); 400 401 pr_debug("kfd: In func %s mqd type %d\n", __func__, type); 402 403 mqd = dqm->mqds[type]; 404 if (!mqd) { 405 mqd = mqd_manager_init(type, dqm->dev); 406 if (mqd == NULL) 407 pr_err("kfd: mqd manager is NULL"); 408 dqm->mqds[type] = mqd; 409 } 410 411 return mqd; 412 } 413 414 static int register_process_nocpsch(struct device_queue_manager *dqm, 415 struct qcm_process_device *qpd) 416 { 417 struct device_process_node *n; 418 419 BUG_ON(!dqm || !qpd); 420 421 pr_debug("kfd: In func %s\n", __func__); 422 423 n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL); 424 if (!n) 425 return -ENOMEM; 426 427 n->qpd = qpd; 428 429 mutex_lock(&dqm->lock); 430 list_add(&n->list, &dqm->queues); 431 432 init_process_memory(dqm, qpd); 433 dqm->processes_count++; 434 435 mutex_unlock(&dqm->lock); 436 437 return 0; 438 } 439 440 static int unregister_process_nocpsch(struct device_queue_manager *dqm, 441 struct qcm_process_device *qpd) 442 { 443 int retval; 444 struct device_process_node *cur, *next; 445 446 BUG_ON(!dqm || !qpd); 447 448 BUG_ON(!list_empty(&qpd->queues_list)); 449 450 pr_debug("kfd: In func %s\n", __func__); 451 452 retval = 0; 453 mutex_lock(&dqm->lock); 454 455 list_for_each_entry_safe(cur, next, &dqm->queues, list) { 456 if (qpd == cur->qpd) { 457 list_del(&cur->list); 458 kfree(cur); 459 dqm->processes_count--; 460 goto out; 461 } 462 } 463 /* qpd not found in dqm list */ 464 retval = 1; 465 out: 466 mutex_unlock(&dqm->lock); 467 return retval; 468 } 469 470 static int 471 set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid, 472 unsigned int vmid) 473 { 474 uint32_t pasid_mapping; 475 476 pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid | 477 ATC_VMID_PASID_MAPPING_VALID; 478 return kfd2kgd->set_pasid_vmid_mapping(dqm->dev->kgd, pasid_mapping, 479 vmid); 480 } 481 482 static uint32_t compute_sh_mem_bases_64bit(unsigned int top_address_nybble) 483 { 484 /* In 64-bit mode, we can only control the top 3 bits of the LDS, 485 * scratch and GPUVM apertures. 486 * The hardware fills in the remaining 59 bits according to the 487 * following pattern: 488 * LDS: X0000000'00000000 - X0000001'00000000 (4GB) 489 * Scratch: X0000001'00000000 - X0000002'00000000 (4GB) 490 * GPUVM: Y0010000'00000000 - Y0020000'00000000 (1TB) 491 * 492 * (where X/Y is the configurable nybble with the low-bit 0) 493 * 494 * LDS and scratch will have the same top nybble programmed in the 495 * top 3 bits of SH_MEM_BASES.PRIVATE_BASE. 496 * GPUVM can have a different top nybble programmed in the 497 * top 3 bits of SH_MEM_BASES.SHARED_BASE. 498 * We don't bother to support different top nybbles 499 * for LDS/Scratch and GPUVM. 500 */ 501 502 BUG_ON((top_address_nybble & 1) || top_address_nybble > 0xE || 503 top_address_nybble == 0); 504 505 return PRIVATE_BASE(top_address_nybble << 12) | 506 SHARED_BASE(top_address_nybble << 12); 507 } 508 509 static int init_memory(struct device_queue_manager *dqm) 510 { 511 int i, retval; 512 513 for (i = 8; i < 16; i++) 514 set_pasid_vmid_mapping(dqm, 0, i); 515 516 retval = kfd2kgd->init_memory(dqm->dev->kgd); 517 if (retval == 0) 518 is_mem_initialized = true; 519 return retval; 520 } 521 522 523 static int init_pipelines(struct device_queue_manager *dqm, 524 unsigned int pipes_num, unsigned int first_pipe) 525 { 526 void *hpdptr; 527 struct mqd_manager *mqd; 528 unsigned int i, err, inx; 529 uint64_t pipe_hpd_addr; 530 531 BUG_ON(!dqm || !dqm->dev); 532 533 pr_debug("kfd: In func %s\n", __func__); 534 535 /* 536 * Allocate memory for the HPDs. This is hardware-owned per-pipe data. 537 * The driver never accesses this memory after zeroing it. 538 * It doesn't even have to be saved/restored on suspend/resume 539 * because it contains no data when there are no active queues. 540 */ 541 542 err = kfd2kgd->allocate_mem(dqm->dev->kgd, 543 CIK_HPD_EOP_BYTES * pipes_num, 544 PAGE_SIZE, 545 KFD_MEMPOOL_SYSTEM_WRITECOMBINE, 546 (struct kgd_mem **) &dqm->pipeline_mem); 547 548 if (err) { 549 pr_err("kfd: error allocate vidmem num pipes: %d\n", 550 pipes_num); 551 return -ENOMEM; 552 } 553 554 hpdptr = dqm->pipeline_mem->cpu_ptr; 555 dqm->pipelines_addr = dqm->pipeline_mem->gpu_addr; 556 557 memset(hpdptr, 0, CIK_HPD_EOP_BYTES * pipes_num); 558 559 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_COMPUTE); 560 if (mqd == NULL) { 561 kfd2kgd->free_mem(dqm->dev->kgd, 562 (struct kgd_mem *) dqm->pipeline_mem); 563 return -ENOMEM; 564 } 565 566 for (i = 0; i < pipes_num; i++) { 567 inx = i + first_pipe; 568 /* 569 * HPD buffer on GTT is allocated by amdkfd, no need to waste 570 * space in GTT for pipelines we don't initialize 571 */ 572 pipe_hpd_addr = dqm->pipelines_addr + i * CIK_HPD_EOP_BYTES; 573 pr_debug("kfd: pipeline address %llX\n", pipe_hpd_addr); 574 /* = log2(bytes/4)-1 */ 575 kfd2kgd->init_pipeline(dqm->dev->kgd, inx, 576 CIK_HPD_EOP_BYTES_LOG2 - 3, pipe_hpd_addr); 577 } 578 579 return 0; 580 } 581 582 583 static int init_scheduler(struct device_queue_manager *dqm) 584 { 585 int retval; 586 587 BUG_ON(!dqm); 588 589 pr_debug("kfd: In %s\n", __func__); 590 591 retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm)); 592 if (retval != 0) 593 return retval; 594 595 retval = init_memory(dqm); 596 597 return retval; 598 } 599 600 static int initialize_nocpsch(struct device_queue_manager *dqm) 601 { 602 int i; 603 604 BUG_ON(!dqm); 605 606 pr_debug("kfd: In func %s num of pipes: %d\n", 607 __func__, get_pipes_num(dqm)); 608 609 mutex_init(&dqm->lock); 610 INIT_LIST_HEAD(&dqm->queues); 611 dqm->queue_count = dqm->next_pipe_to_allocate = 0; 612 dqm->allocated_queues = kcalloc(get_pipes_num(dqm), 613 sizeof(unsigned int), GFP_KERNEL); 614 if (!dqm->allocated_queues) { 615 mutex_destroy(&dqm->lock); 616 return -ENOMEM; 617 } 618 619 for (i = 0; i < get_pipes_num(dqm); i++) 620 dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1; 621 622 dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1; 623 624 init_scheduler(dqm); 625 return 0; 626 } 627 628 static void uninitialize_nocpsch(struct device_queue_manager *dqm) 629 { 630 int i; 631 632 BUG_ON(!dqm); 633 634 BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0); 635 636 kfree(dqm->allocated_queues); 637 for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) 638 kfree(dqm->mqds[i]); 639 mutex_destroy(&dqm->lock); 640 kfd2kgd->free_mem(dqm->dev->kgd, 641 (struct kgd_mem *) dqm->pipeline_mem); 642 } 643 644 static int start_nocpsch(struct device_queue_manager *dqm) 645 { 646 return 0; 647 } 648 649 static int stop_nocpsch(struct device_queue_manager *dqm) 650 { 651 return 0; 652 } 653 654 /* 655 * Device Queue Manager implementation for cp scheduler 656 */ 657 658 static int set_sched_resources(struct device_queue_manager *dqm) 659 { 660 struct scheduling_resources res; 661 unsigned int queue_num, queue_mask; 662 663 BUG_ON(!dqm); 664 665 pr_debug("kfd: In func %s\n", __func__); 666 667 queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE; 668 queue_mask = (1 << queue_num) - 1; 669 res.vmid_mask = (1 << VMID_PER_DEVICE) - 1; 670 res.vmid_mask <<= KFD_VMID_START_OFFSET; 671 res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE); 672 res.gws_mask = res.oac_mask = res.gds_heap_base = 673 res.gds_heap_size = 0; 674 675 pr_debug("kfd: scheduling resources:\n" 676 " vmid mask: 0x%8X\n" 677 " queue mask: 0x%8llX\n", 678 res.vmid_mask, res.queue_mask); 679 680 return pm_send_set_resources(&dqm->packets, &res); 681 } 682 683 static int initialize_cpsch(struct device_queue_manager *dqm) 684 { 685 int retval; 686 687 BUG_ON(!dqm); 688 689 pr_debug("kfd: In func %s num of pipes: %d\n", 690 __func__, get_pipes_num_cpsch()); 691 692 mutex_init(&dqm->lock); 693 INIT_LIST_HEAD(&dqm->queues); 694 dqm->queue_count = dqm->processes_count = 0; 695 dqm->active_runlist = false; 696 retval = init_pipelines(dqm, get_pipes_num(dqm), 0); 697 if (retval != 0) 698 goto fail_init_pipelines; 699 700 return 0; 701 702 fail_init_pipelines: 703 mutex_destroy(&dqm->lock); 704 return retval; 705 } 706 707 static int start_cpsch(struct device_queue_manager *dqm) 708 { 709 struct device_process_node *node; 710 int retval; 711 712 BUG_ON(!dqm); 713 714 retval = 0; 715 716 retval = pm_init(&dqm->packets, dqm); 717 if (retval != 0) 718 goto fail_packet_manager_init; 719 720 retval = set_sched_resources(dqm); 721 if (retval != 0) 722 goto fail_set_sched_resources; 723 724 pr_debug("kfd: allocating fence memory\n"); 725 726 /* allocate fence memory on the gart */ 727 retval = kfd2kgd->allocate_mem(dqm->dev->kgd, 728 sizeof(*dqm->fence_addr), 729 32, 730 KFD_MEMPOOL_SYSTEM_WRITECOMBINE, 731 (struct kgd_mem **) &dqm->fence_mem); 732 733 if (retval != 0) 734 goto fail_allocate_vidmem; 735 736 dqm->fence_addr = dqm->fence_mem->cpu_ptr; 737 dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr; 738 739 list_for_each_entry(node, &dqm->queues, list) 740 if (node->qpd->pqm->process && dqm->dev) 741 kfd_bind_process_to_device(dqm->dev, 742 node->qpd->pqm->process); 743 744 execute_queues_cpsch(dqm, true); 745 746 return 0; 747 fail_allocate_vidmem: 748 fail_set_sched_resources: 749 pm_uninit(&dqm->packets); 750 fail_packet_manager_init: 751 return retval; 752 } 753 754 static int stop_cpsch(struct device_queue_manager *dqm) 755 { 756 struct device_process_node *node; 757 struct kfd_process_device *pdd; 758 759 BUG_ON(!dqm); 760 761 destroy_queues_cpsch(dqm, true); 762 763 list_for_each_entry(node, &dqm->queues, list) { 764 pdd = qpd_to_pdd(node->qpd); 765 pdd->bound = false; 766 } 767 kfd2kgd->free_mem(dqm->dev->kgd, 768 (struct kgd_mem *) dqm->fence_mem); 769 pm_uninit(&dqm->packets); 770 771 return 0; 772 } 773 774 static int create_kernel_queue_cpsch(struct device_queue_manager *dqm, 775 struct kernel_queue *kq, 776 struct qcm_process_device *qpd) 777 { 778 BUG_ON(!dqm || !kq || !qpd); 779 780 pr_debug("kfd: In func %s\n", __func__); 781 782 mutex_lock(&dqm->lock); 783 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 784 pr_warn("amdkfd: Can't create new kernel queue because %d queues were already created\n", 785 dqm->total_queue_count); 786 mutex_unlock(&dqm->lock); 787 return -EPERM; 788 } 789 790 /* 791 * Unconditionally increment this counter, regardless of the queue's 792 * type or whether the queue is active. 793 */ 794 dqm->total_queue_count++; 795 pr_debug("Total of %d queues are accountable so far\n", 796 dqm->total_queue_count); 797 798 list_add(&kq->list, &qpd->priv_queue_list); 799 dqm->queue_count++; 800 qpd->is_debug = true; 801 execute_queues_cpsch(dqm, false); 802 mutex_unlock(&dqm->lock); 803 804 return 0; 805 } 806 807 static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm, 808 struct kernel_queue *kq, 809 struct qcm_process_device *qpd) 810 { 811 BUG_ON(!dqm || !kq); 812 813 pr_debug("kfd: In %s\n", __func__); 814 815 mutex_lock(&dqm->lock); 816 destroy_queues_cpsch(dqm, false); 817 list_del(&kq->list); 818 dqm->queue_count--; 819 qpd->is_debug = false; 820 execute_queues_cpsch(dqm, false); 821 /* 822 * Unconditionally decrement this counter, regardless of the queue's 823 * type. 824 */ 825 dqm->total_queue_count--; 826 pr_debug("Total of %d queues are accountable so far\n", 827 dqm->total_queue_count); 828 mutex_unlock(&dqm->lock); 829 } 830 831 static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, 832 struct qcm_process_device *qpd, int *allocate_vmid) 833 { 834 int retval; 835 struct mqd_manager *mqd; 836 837 BUG_ON(!dqm || !q || !qpd); 838 839 retval = 0; 840 841 if (allocate_vmid) 842 *allocate_vmid = 0; 843 844 mutex_lock(&dqm->lock); 845 846 if (dqm->total_queue_count >= max_num_of_queues_per_device) { 847 pr_warn("amdkfd: Can't create new usermode queue because %d queues were already created\n", 848 dqm->total_queue_count); 849 retval = -EPERM; 850 goto out; 851 } 852 853 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); 854 if (mqd == NULL) { 855 mutex_unlock(&dqm->lock); 856 return -ENOMEM; 857 } 858 859 retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj, 860 &q->gart_mqd_addr, &q->properties); 861 if (retval != 0) 862 goto out; 863 864 list_add(&q->list, &qpd->queues_list); 865 if (q->properties.is_active) { 866 dqm->queue_count++; 867 retval = execute_queues_cpsch(dqm, false); 868 } 869 870 /* 871 * Unconditionally increment this counter, regardless of the queue's 872 * type or whether the queue is active. 873 */ 874 dqm->total_queue_count++; 875 876 pr_debug("Total of %d queues are accountable so far\n", 877 dqm->total_queue_count); 878 879 out: 880 mutex_unlock(&dqm->lock); 881 return retval; 882 } 883 884 static int fence_wait_timeout(unsigned int *fence_addr, 885 unsigned int fence_value, 886 unsigned long timeout) 887 { 888 BUG_ON(!fence_addr); 889 timeout += jiffies; 890 891 while (*fence_addr != fence_value) { 892 if (time_after(jiffies, timeout)) { 893 pr_err("kfd: qcm fence wait loop timeout expired\n"); 894 return -ETIME; 895 } 896 cpu_relax(); 897 } 898 899 return 0; 900 } 901 902 static int destroy_queues_cpsch(struct device_queue_manager *dqm, bool lock) 903 { 904 int retval; 905 906 BUG_ON(!dqm); 907 908 retval = 0; 909 910 if (lock) 911 mutex_lock(&dqm->lock); 912 if (dqm->active_runlist == false) 913 goto out; 914 retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE, 915 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 0, false, 0); 916 if (retval != 0) 917 goto out; 918 919 *dqm->fence_addr = KFD_FENCE_INIT; 920 pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr, 921 KFD_FENCE_COMPLETED); 922 /* should be timed out */ 923 fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, 924 QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS); 925 pm_release_ib(&dqm->packets); 926 dqm->active_runlist = false; 927 928 out: 929 if (lock) 930 mutex_unlock(&dqm->lock); 931 return retval; 932 } 933 934 static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock) 935 { 936 int retval; 937 938 BUG_ON(!dqm); 939 940 if (lock) 941 mutex_lock(&dqm->lock); 942 943 retval = destroy_queues_cpsch(dqm, false); 944 if (retval != 0) { 945 pr_err("kfd: the cp might be in an unrecoverable state due to an unsuccessful queues preemption"); 946 goto out; 947 } 948 949 if (dqm->queue_count <= 0 || dqm->processes_count <= 0) { 950 retval = 0; 951 goto out; 952 } 953 954 if (dqm->active_runlist) { 955 retval = 0; 956 goto out; 957 } 958 959 retval = pm_send_runlist(&dqm->packets, &dqm->queues); 960 if (retval != 0) { 961 pr_err("kfd: failed to execute runlist"); 962 goto out; 963 } 964 dqm->active_runlist = true; 965 966 out: 967 if (lock) 968 mutex_unlock(&dqm->lock); 969 return retval; 970 } 971 972 static int destroy_queue_cpsch(struct device_queue_manager *dqm, 973 struct qcm_process_device *qpd, 974 struct queue *q) 975 { 976 int retval; 977 struct mqd_manager *mqd; 978 979 BUG_ON(!dqm || !qpd || !q); 980 981 retval = 0; 982 983 /* remove queue from list to prevent rescheduling after preemption */ 984 mutex_lock(&dqm->lock); 985 986 mqd = dqm->get_mqd_manager(dqm, KFD_MQD_TYPE_CIK_CP); 987 if (!mqd) { 988 retval = -ENOMEM; 989 goto failed; 990 } 991 992 list_del(&q->list); 993 dqm->queue_count--; 994 995 execute_queues_cpsch(dqm, false); 996 997 mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj); 998 999 /* 1000 * Unconditionally decrement this counter, regardless of the queue's 1001 * type 1002 */ 1003 dqm->total_queue_count--; 1004 pr_debug("Total of %d queues are accountable so far\n", 1005 dqm->total_queue_count); 1006 1007 mutex_unlock(&dqm->lock); 1008 1009 return 0; 1010 1011 failed: 1012 mutex_unlock(&dqm->lock); 1013 return retval; 1014 } 1015 1016 /* 1017 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to 1018 * stay in user mode. 1019 */ 1020 #define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL 1021 /* APE1 limit is inclusive and 64K aligned. */ 1022 #define APE1_LIMIT_ALIGNMENT 0xFFFF 1023 1024 static bool set_cache_memory_policy(struct device_queue_manager *dqm, 1025 struct qcm_process_device *qpd, 1026 enum cache_policy default_policy, 1027 enum cache_policy alternate_policy, 1028 void __user *alternate_aperture_base, 1029 uint64_t alternate_aperture_size) 1030 { 1031 uint32_t default_mtype; 1032 uint32_t ape1_mtype; 1033 1034 pr_debug("kfd: In func %s\n", __func__); 1035 1036 mutex_lock(&dqm->lock); 1037 1038 if (alternate_aperture_size == 0) { 1039 /* base > limit disables APE1 */ 1040 qpd->sh_mem_ape1_base = 1; 1041 qpd->sh_mem_ape1_limit = 0; 1042 } else { 1043 /* 1044 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]}, 1045 * SH_MEM_APE1_BASE[31:0], 0x0000 } 1046 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]}, 1047 * SH_MEM_APE1_LIMIT[31:0], 0xFFFF } 1048 * Verify that the base and size parameters can be 1049 * represented in this format and convert them. 1050 * Additionally restrict APE1 to user-mode addresses. 1051 */ 1052 1053 uint64_t base = (uintptr_t)alternate_aperture_base; 1054 uint64_t limit = base + alternate_aperture_size - 1; 1055 1056 if (limit <= base) 1057 goto out; 1058 1059 if ((base & APE1_FIXED_BITS_MASK) != 0) 1060 goto out; 1061 1062 if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) 1063 goto out; 1064 1065 qpd->sh_mem_ape1_base = base >> 16; 1066 qpd->sh_mem_ape1_limit = limit >> 16; 1067 } 1068 1069 default_mtype = (default_policy == cache_policy_coherent) ? 1070 MTYPE_NONCACHED : 1071 MTYPE_CACHED; 1072 1073 ape1_mtype = (alternate_policy == cache_policy_coherent) ? 1074 MTYPE_NONCACHED : 1075 MTYPE_CACHED; 1076 1077 qpd->sh_mem_config = (qpd->sh_mem_config & PTR32) 1078 | ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED) 1079 | DEFAULT_MTYPE(default_mtype) 1080 | APE1_MTYPE(ape1_mtype); 1081 1082 if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0)) 1083 program_sh_mem_settings(dqm, qpd); 1084 1085 pr_debug("kfd: sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n", 1086 qpd->sh_mem_config, qpd->sh_mem_ape1_base, 1087 qpd->sh_mem_ape1_limit); 1088 1089 mutex_unlock(&dqm->lock); 1090 return true; 1091 1092 out: 1093 mutex_unlock(&dqm->lock); 1094 return false; 1095 } 1096 1097 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) 1098 { 1099 struct device_queue_manager *dqm; 1100 1101 BUG_ON(!dev); 1102 1103 dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL); 1104 if (!dqm) 1105 return NULL; 1106 1107 dqm->dev = dev; 1108 switch (sched_policy) { 1109 case KFD_SCHED_POLICY_HWS: 1110 case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: 1111 /* initialize dqm for cp scheduling */ 1112 dqm->create_queue = create_queue_cpsch; 1113 dqm->initialize = initialize_cpsch; 1114 dqm->start = start_cpsch; 1115 dqm->stop = stop_cpsch; 1116 dqm->destroy_queue = destroy_queue_cpsch; 1117 dqm->update_queue = update_queue; 1118 dqm->get_mqd_manager = get_mqd_manager_nocpsch; 1119 dqm->register_process = register_process_nocpsch; 1120 dqm->unregister_process = unregister_process_nocpsch; 1121 dqm->uninitialize = uninitialize_nocpsch; 1122 dqm->create_kernel_queue = create_kernel_queue_cpsch; 1123 dqm->destroy_kernel_queue = destroy_kernel_queue_cpsch; 1124 dqm->set_cache_memory_policy = set_cache_memory_policy; 1125 break; 1126 case KFD_SCHED_POLICY_NO_HWS: 1127 /* initialize dqm for no cp scheduling */ 1128 dqm->start = start_nocpsch; 1129 dqm->stop = stop_nocpsch; 1130 dqm->create_queue = create_queue_nocpsch; 1131 dqm->destroy_queue = destroy_queue_nocpsch; 1132 dqm->update_queue = update_queue; 1133 dqm->get_mqd_manager = get_mqd_manager_nocpsch; 1134 dqm->register_process = register_process_nocpsch; 1135 dqm->unregister_process = unregister_process_nocpsch; 1136 dqm->initialize = initialize_nocpsch; 1137 dqm->uninitialize = uninitialize_nocpsch; 1138 dqm->set_cache_memory_policy = set_cache_memory_policy; 1139 break; 1140 default: 1141 BUG(); 1142 break; 1143 } 1144 1145 if (dqm->initialize(dqm) != 0) { 1146 kfree(dqm); 1147 return NULL; 1148 } 1149 1150 return dqm; 1151 } 1152 1153 void device_queue_manager_uninit(struct device_queue_manager *dqm) 1154 { 1155 BUG_ON(!dqm); 1156 1157 dqm->uninitialize(dqm); 1158 kfree(dqm); 1159 } 1160 1161