1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/slab.h> 26 #include <linux/mutex.h> 27 #include "kfd_device_queue_manager.h" 28 #include "kfd_kernel_queue.h" 29 #include "kfd_priv.h" 30 31 #define OVER_SUBSCRIPTION_PROCESS_COUNT (1 << 0) 32 #define OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT (1 << 1) 33 #define OVER_SUBSCRIPTION_GWS_QUEUE_COUNT (1 << 2) 34 #define OVER_SUBSCRIPTION_XNACK_CONFLICT (1 << 3) 35 36 static inline void inc_wptr(unsigned int *wptr, unsigned int increment_bytes, 37 unsigned int buffer_size_bytes) 38 { 39 unsigned int temp = *wptr + increment_bytes / sizeof(uint32_t); 40 41 WARN((temp * sizeof(uint32_t)) > buffer_size_bytes, 42 "Runlist IB overflow"); 43 *wptr = temp; 44 } 45 46 static void pm_calc_rlib_size(struct packet_manager *pm, 47 unsigned int *rlib_size, 48 int *over_subscription, 49 int xnack_conflict) 50 { 51 unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; 52 unsigned int map_queue_size; 53 unsigned int max_proc_per_quantum = 1; 54 struct kfd_node *node = pm->dqm->dev; 55 struct device *dev = node->adev->dev; 56 57 process_count = pm->dqm->processes_count; 58 queue_count = pm->dqm->active_queue_count; 59 compute_queue_count = pm->dqm->active_cp_queue_count; 60 gws_queue_count = pm->dqm->gws_queue_count; 61 62 /* check if there is over subscription 63 * Note: the arbitration between the number of VMIDs and 64 * hws_max_conc_proc has been done in 65 * kgd2kfd_device_init(). 66 */ 67 *over_subscription = 0; 68 69 if (node->max_proc_per_quantum > 1) 70 max_proc_per_quantum = node->max_proc_per_quantum; 71 72 if (process_count > max_proc_per_quantum) 73 *over_subscription |= OVER_SUBSCRIPTION_PROCESS_COUNT; 74 if (compute_queue_count > get_cp_queues_num(pm->dqm)) 75 *over_subscription |= OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT; 76 if (gws_queue_count > 1) 77 *over_subscription |= OVER_SUBSCRIPTION_GWS_QUEUE_COUNT; 78 if (xnack_conflict && (node->adev->gmc.xnack_flags & AMDGPU_GMC_XNACK_FLAG_CHAIN)) 79 *over_subscription |= OVER_SUBSCRIPTION_XNACK_CONFLICT; 80 81 if (*over_subscription) 82 dev_dbg(dev, "Over subscribed runlist\n"); 83 84 map_queue_size = pm->pmf->map_queues_size; 85 /* calculate run list ib allocation size */ 86 *rlib_size = process_count * pm->pmf->map_process_size + 87 queue_count * map_queue_size; 88 89 /* 90 * Increase the allocation size in case we need a chained run list 91 * when over subscription 92 */ 93 if (*over_subscription) 94 *rlib_size += pm->pmf->runlist_size; 95 96 dev_dbg(dev, "runlist ib size %d\n", *rlib_size); 97 } 98 99 static int pm_allocate_runlist_ib(struct packet_manager *pm, 100 unsigned int **rl_buffer, 101 uint64_t *rl_gpu_buffer, 102 unsigned int *rl_buffer_size, 103 int *is_over_subscription, 104 int xnack_conflict) 105 { 106 struct kfd_node *node = pm->dqm->dev; 107 struct device *dev = node->adev->dev; 108 int retval; 109 110 if (WARN_ON(pm->allocated)) 111 return -EINVAL; 112 113 pm_calc_rlib_size(pm, rl_buffer_size, is_over_subscription, 114 xnack_conflict); 115 116 mutex_lock(&pm->lock); 117 118 retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj); 119 120 if (retval) { 121 dev_err(dev, "Failed to allocate runlist IB\n"); 122 goto out; 123 } 124 125 *(void **)rl_buffer = pm->ib_buffer_obj->cpu_ptr; 126 *rl_gpu_buffer = pm->ib_buffer_obj->gpu_addr; 127 128 memset(*rl_buffer, 0, *rl_buffer_size); 129 pm->allocated = true; 130 131 out: 132 mutex_unlock(&pm->lock); 133 return retval; 134 } 135 136 static int pm_create_runlist_ib(struct packet_manager *pm, 137 struct list_head *queues, 138 uint64_t *rl_gpu_addr, 139 size_t *rl_size_bytes) 140 { 141 unsigned int alloc_size_bytes; 142 unsigned int *rl_buffer, rl_wptr, i; 143 struct kfd_node *node = pm->dqm->dev; 144 struct device *dev = node->adev->dev; 145 int retval, processes_mapped; 146 struct device_process_node *cur; 147 struct qcm_process_device *qpd; 148 struct queue *q; 149 struct kernel_queue *kq; 150 int is_over_subscription; 151 int xnack_enabled = -1; 152 bool xnack_conflict = 0; 153 154 rl_wptr = retval = processes_mapped = 0; 155 156 /* Check if processes set different xnack modes */ 157 list_for_each_entry(cur, queues, list) { 158 qpd = cur->qpd; 159 if (xnack_enabled < 0) 160 /* First process */ 161 xnack_enabled = qpd->pqm->process->xnack_enabled; 162 else if (qpd->pqm->process->xnack_enabled != xnack_enabled) { 163 /* Found a process with a different xnack mode */ 164 xnack_conflict = 1; 165 break; 166 } 167 } 168 169 retval = pm_allocate_runlist_ib(pm, &rl_buffer, rl_gpu_addr, 170 &alloc_size_bytes, &is_over_subscription, 171 xnack_conflict); 172 if (retval) 173 return retval; 174 175 *rl_size_bytes = alloc_size_bytes; 176 pm->ib_size_bytes = alloc_size_bytes; 177 178 dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n", 179 pm->dqm->processes_count, pm->dqm->active_queue_count); 180 181 build_runlist_ib: 182 /* build the run list ib packet */ 183 list_for_each_entry(cur, queues, list) { 184 qpd = cur->qpd; 185 /* group processes with the same xnack mode together */ 186 if (qpd->pqm->process->xnack_enabled != xnack_enabled) 187 continue; 188 /* build map process packet */ 189 if (processes_mapped >= pm->dqm->processes_count) { 190 dev_dbg(dev, "Not enough space left in runlist IB\n"); 191 pm_release_ib(pm); 192 return -ENOMEM; 193 } 194 195 retval = pm->pmf->map_process(pm, &rl_buffer[rl_wptr], qpd); 196 if (retval) 197 return retval; 198 199 processes_mapped++; 200 inc_wptr(&rl_wptr, pm->pmf->map_process_size, 201 alloc_size_bytes); 202 203 list_for_each_entry(kq, &qpd->priv_queue_list, list) { 204 if (!kq->queue->properties.is_active) 205 continue; 206 207 dev_dbg(dev, 208 "static_queue, mapping kernel q %d, is debug status %d\n", 209 kq->queue->queue, qpd->is_debug); 210 211 retval = pm->pmf->map_queues(pm, 212 &rl_buffer[rl_wptr], 213 kq->queue, 214 qpd->is_debug); 215 if (retval) 216 return retval; 217 218 inc_wptr(&rl_wptr, 219 pm->pmf->map_queues_size, 220 alloc_size_bytes); 221 } 222 223 list_for_each_entry(q, &qpd->queues_list, list) { 224 if (!q->properties.is_active) 225 continue; 226 227 dev_dbg(dev, 228 "static_queue, mapping user queue %d, is debug status %d\n", 229 q->queue, qpd->is_debug); 230 231 retval = pm->pmf->map_queues(pm, 232 &rl_buffer[rl_wptr], 233 q, 234 qpd->is_debug); 235 236 if (retval) 237 return retval; 238 239 inc_wptr(&rl_wptr, 240 pm->pmf->map_queues_size, 241 alloc_size_bytes); 242 } 243 } 244 if (xnack_conflict) { 245 /* pick up processes with the other xnack mode */ 246 xnack_enabled = !xnack_enabled; 247 xnack_conflict = 0; 248 goto build_runlist_ib; 249 } 250 251 dev_dbg(dev, "Finished map process and queues to runlist\n"); 252 253 if (is_over_subscription) { 254 if (!pm->is_over_subscription) 255 dev_warn(dev, "Runlist is getting oversubscribed due to%s%s%s%s. Expect reduced ROCm performance.\n", 256 is_over_subscription & OVER_SUBSCRIPTION_PROCESS_COUNT ? 257 " too many processes" : "", 258 is_over_subscription & OVER_SUBSCRIPTION_COMPUTE_QUEUE_COUNT ? 259 " too many queues" : "", 260 is_over_subscription & OVER_SUBSCRIPTION_GWS_QUEUE_COUNT ? 261 " multiple processes using cooperative launch" : "", 262 is_over_subscription & OVER_SUBSCRIPTION_XNACK_CONFLICT ? 263 " xnack on/off processes mixed on gfx9" : ""); 264 265 retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr], 266 *rl_gpu_addr, 267 alloc_size_bytes / sizeof(uint32_t), 268 true); 269 } 270 pm->is_over_subscription = !!is_over_subscription; 271 272 for (i = 0; i < alloc_size_bytes / sizeof(uint32_t); i++) 273 pr_debug("0x%2X ", rl_buffer[i]); 274 pr_debug("\n"); 275 276 return retval; 277 } 278 279 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm) 280 { 281 switch (dqm->dev->adev->asic_type) { 282 case CHIP_KAVERI: 283 case CHIP_HAWAII: 284 /* PM4 packet structures on CIK are the same as on VI */ 285 case CHIP_CARRIZO: 286 case CHIP_TONGA: 287 case CHIP_FIJI: 288 case CHIP_POLARIS10: 289 case CHIP_POLARIS11: 290 case CHIP_POLARIS12: 291 case CHIP_VEGAM: 292 pm->pmf = &kfd_vi_pm_funcs; 293 break; 294 default: 295 if (KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 2) || 296 KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 3) || 297 KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 4, 4) || 298 KFD_GC_VERSION(dqm->dev) == IP_VERSION(9, 5, 0)) 299 pm->pmf = &kfd_aldebaran_pm_funcs; 300 else if (KFD_GC_VERSION(dqm->dev) >= IP_VERSION(9, 0, 1)) 301 pm->pmf = &kfd_v9_pm_funcs; 302 else { 303 WARN(1, "Unexpected ASIC family %u", 304 dqm->dev->adev->asic_type); 305 return -EINVAL; 306 } 307 } 308 309 pm->dqm = dqm; 310 mutex_init(&pm->lock); 311 pm->priv_queue = kernel_queue_init(dqm->dev, KFD_QUEUE_TYPE_HIQ); 312 if (!pm->priv_queue) { 313 mutex_destroy(&pm->lock); 314 return -ENOMEM; 315 } 316 pm->allocated = false; 317 318 return 0; 319 } 320 321 void pm_uninit(struct packet_manager *pm) 322 { 323 mutex_destroy(&pm->lock); 324 kernel_queue_uninit(pm->priv_queue); 325 pm->priv_queue = NULL; 326 } 327 328 int pm_send_set_resources(struct packet_manager *pm, 329 struct scheduling_resources *res) 330 { 331 struct kfd_node *node = pm->dqm->dev; 332 struct device *dev = node->adev->dev; 333 uint32_t *buffer, size; 334 int retval = 0; 335 336 size = pm->pmf->set_resources_size; 337 mutex_lock(&pm->lock); 338 kq_acquire_packet_buffer(pm->priv_queue, 339 size / sizeof(uint32_t), 340 (unsigned int **)&buffer); 341 if (!buffer) { 342 dev_err(dev, "Failed to allocate buffer on kernel queue\n"); 343 retval = -ENOMEM; 344 goto out; 345 } 346 347 retval = pm->pmf->set_resources(pm, buffer, res); 348 if (!retval) 349 retval = kq_submit_packet(pm->priv_queue); 350 else 351 kq_rollback_packet(pm->priv_queue); 352 353 out: 354 mutex_unlock(&pm->lock); 355 356 return retval; 357 } 358 359 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues) 360 { 361 uint64_t rl_gpu_ib_addr; 362 uint32_t *rl_buffer; 363 size_t rl_ib_size, packet_size_dwords; 364 int retval; 365 366 retval = pm_create_runlist_ib(pm, dqm_queues, &rl_gpu_ib_addr, 367 &rl_ib_size); 368 if (retval) 369 goto fail_create_runlist_ib; 370 371 pr_debug("runlist IB address: 0x%llX\n", rl_gpu_ib_addr); 372 373 packet_size_dwords = pm->pmf->runlist_size / sizeof(uint32_t); 374 mutex_lock(&pm->lock); 375 376 retval = kq_acquire_packet_buffer(pm->priv_queue, 377 packet_size_dwords, &rl_buffer); 378 if (retval) 379 goto fail_acquire_packet_buffer; 380 381 retval = pm->pmf->runlist(pm, rl_buffer, rl_gpu_ib_addr, 382 rl_ib_size / sizeof(uint32_t), false); 383 if (retval) 384 goto fail_create_runlist; 385 386 retval = kq_submit_packet(pm->priv_queue); 387 388 mutex_unlock(&pm->lock); 389 390 return retval; 391 392 fail_create_runlist: 393 kq_rollback_packet(pm->priv_queue); 394 fail_acquire_packet_buffer: 395 mutex_unlock(&pm->lock); 396 fail_create_runlist_ib: 397 pm_release_ib(pm); 398 return retval; 399 } 400 401 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, 402 uint64_t fence_value) 403 { 404 struct kfd_node *node = pm->dqm->dev; 405 struct device *dev = node->adev->dev; 406 uint32_t *buffer, size; 407 int retval = 0; 408 409 if (WARN_ON(!fence_address)) 410 return -EFAULT; 411 412 size = pm->pmf->query_status_size; 413 mutex_lock(&pm->lock); 414 kq_acquire_packet_buffer(pm->priv_queue, 415 size / sizeof(uint32_t), (unsigned int **)&buffer); 416 if (!buffer) { 417 dev_err(dev, "Failed to allocate buffer on kernel queue\n"); 418 retval = -ENOMEM; 419 goto out; 420 } 421 422 retval = pm->pmf->query_status(pm, buffer, fence_address, fence_value); 423 if (!retval) 424 retval = kq_submit_packet(pm->priv_queue); 425 else 426 kq_rollback_packet(pm->priv_queue); 427 428 out: 429 mutex_unlock(&pm->lock); 430 return retval; 431 } 432 433 /* pm_config_dequeue_wait_counts: Configure dequeue timer Wait Counts 434 * by writing to CP_IQ_WAIT_TIME2 registers. 435 * 436 * @cmd: See emum kfd_config_dequeue_wait_counts_cmd definition 437 * @value: Depends on the cmd. This parameter is unused for 438 * KFD_DEQUEUE_WAIT_INIT and KFD_DEQUEUE_WAIT_RESET. For 439 * KFD_DEQUEUE_WAIT_SET_SCH_WAVE it holds value to be set 440 * 441 */ 442 int pm_config_dequeue_wait_counts(struct packet_manager *pm, 443 enum kfd_config_dequeue_wait_counts_cmd cmd, 444 uint32_t value) 445 { 446 struct kfd_node *node = pm->dqm->dev; 447 struct device *dev = node->adev->dev; 448 int retval = 0; 449 uint32_t *buffer, size; 450 451 if (!pm->pmf->config_dequeue_wait_counts || 452 !pm->pmf->config_dequeue_wait_counts_size) 453 return 0; 454 455 if (cmd == KFD_DEQUEUE_WAIT_INIT && (KFD_GC_VERSION(pm->dqm->dev) < IP_VERSION(9, 4, 1) || 456 KFD_GC_VERSION(pm->dqm->dev) >= IP_VERSION(10, 0, 0))) 457 return 0; 458 459 size = pm->pmf->config_dequeue_wait_counts_size; 460 461 mutex_lock(&pm->lock); 462 463 if (size) { 464 kq_acquire_packet_buffer(pm->priv_queue, 465 size / sizeof(uint32_t), 466 (unsigned int **)&buffer); 467 468 if (!buffer) { 469 dev_err(dev, 470 "Failed to allocate buffer on kernel queue\n"); 471 retval = -ENOMEM; 472 goto out; 473 } 474 475 retval = pm->pmf->config_dequeue_wait_counts(pm, buffer, 476 cmd, value); 477 if (!retval) { 478 retval = kq_submit_packet(pm->priv_queue); 479 480 /* If default value is modified, cache that in dqm->wait_times */ 481 if (!retval && cmd == KFD_DEQUEUE_WAIT_INIT) 482 update_dqm_wait_times(pm->dqm); 483 } else { 484 kq_rollback_packet(pm->priv_queue); 485 } 486 } 487 out: 488 mutex_unlock(&pm->lock); 489 return retval; 490 } 491 492 int pm_send_unmap_queue(struct packet_manager *pm, 493 enum kfd_unmap_queues_filter filter, 494 uint32_t filter_param, bool reset) 495 { 496 struct kfd_node *node = pm->dqm->dev; 497 struct device *dev = node->adev->dev; 498 uint32_t *buffer, size; 499 int retval = 0; 500 501 size = pm->pmf->unmap_queues_size; 502 mutex_lock(&pm->lock); 503 kq_acquire_packet_buffer(pm->priv_queue, 504 size / sizeof(uint32_t), (unsigned int **)&buffer); 505 if (!buffer) { 506 dev_err(dev, "Failed to allocate buffer on kernel queue\n"); 507 retval = -ENOMEM; 508 goto out; 509 } 510 511 retval = pm->pmf->unmap_queues(pm, buffer, filter, filter_param, reset); 512 if (!retval) 513 retval = kq_submit_packet(pm->priv_queue); 514 else 515 kq_rollback_packet(pm->priv_queue); 516 517 out: 518 mutex_unlock(&pm->lock); 519 return retval; 520 } 521 522 void pm_release_ib(struct packet_manager *pm) 523 { 524 mutex_lock(&pm->lock); 525 if (pm->allocated) { 526 kfd_gtt_sa_free(pm->dqm->dev, pm->ib_buffer_obj); 527 pm->allocated = false; 528 } 529 mutex_unlock(&pm->lock); 530 } 531 532 #if defined(CONFIG_DEBUG_FS) 533 534 int pm_debugfs_runlist(struct seq_file *m, void *data) 535 { 536 struct packet_manager *pm = data; 537 538 mutex_lock(&pm->lock); 539 540 if (!pm->allocated) { 541 seq_puts(m, " No active runlist\n"); 542 goto out; 543 } 544 545 seq_hex_dump(m, " ", DUMP_PREFIX_OFFSET, 32, 4, 546 pm->ib_buffer_obj->cpu_ptr, pm->ib_size_bytes, false); 547 548 out: 549 mutex_unlock(&pm->lock); 550 return 0; 551 } 552 553 int pm_debugfs_hang_hws(struct packet_manager *pm) 554 { 555 struct kfd_node *node = pm->dqm->dev; 556 struct device *dev = node->adev->dev; 557 uint32_t *buffer, size; 558 int r = 0; 559 560 if (!pm->priv_queue) 561 return -EAGAIN; 562 563 size = pm->pmf->query_status_size; 564 mutex_lock(&pm->lock); 565 kq_acquire_packet_buffer(pm->priv_queue, 566 size / sizeof(uint32_t), (unsigned int **)&buffer); 567 if (!buffer) { 568 dev_err(dev, "Failed to allocate buffer on kernel queue\n"); 569 r = -ENOMEM; 570 goto out; 571 } 572 memset(buffer, 0x55, size); 573 kq_submit_packet(pm->priv_queue); 574 575 dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.", 576 buffer[0], buffer[1], buffer[2], buffer[3], buffer[4], 577 buffer[5], buffer[6]); 578 out: 579 mutex_unlock(&pm->lock); 580 return r; 581 } 582 583 584 #endif 585