1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Copyright 2016-2021 HabanaLabs, Ltd. 5 * All Rights Reserved. 6 */ 7 8 #include <uapi/drm/habanalabs_accel.h> 9 #include "habanalabs.h" 10 11 #include <linux/uaccess.h> 12 #include <linux/slab.h> 13 14 #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ 15 HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ 16 HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND | \ 17 HL_CS_FLAGS_ENGINES_COMMAND | HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 18 19 20 #define MAX_TS_ITER_NUM 100 21 22 /** 23 * enum hl_cs_wait_status - cs wait status 24 * @CS_WAIT_STATUS_BUSY: cs was not completed yet 25 * @CS_WAIT_STATUS_COMPLETED: cs completed 26 * @CS_WAIT_STATUS_GONE: cs completed but fence is already gone 27 */ 28 enum hl_cs_wait_status { 29 CS_WAIT_STATUS_BUSY, 30 CS_WAIT_STATUS_COMPLETED, 31 CS_WAIT_STATUS_GONE 32 }; 33 34 /* 35 * Data used while handling wait/timestamp nodes. 36 * The purpose of this struct is to store the needed data for both operations 37 * in one variable instead of passing large number of arguments to functions. 38 */ 39 struct wait_interrupt_data { 40 struct hl_user_interrupt *interrupt; 41 struct hl_mmap_mem_buf *buf; 42 struct hl_mem_mgr *mmg; 43 struct hl_cb *cq_cb; 44 u64 ts_handle; 45 u64 ts_offset; 46 u64 cq_handle; 47 u64 cq_offset; 48 u64 target_value; 49 u64 intr_timeout_us; 50 }; 51 52 static void job_wq_completion(struct work_struct *work); 53 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 54 enum hl_cs_wait_status *status, s64 *timestamp); 55 static void cs_do_release(struct kref *ref); 56 57 static void hl_push_cs_outcome(struct hl_device *hdev, 58 struct hl_cs_outcome_store *outcome_store, 59 u64 seq, ktime_t ts, int error) 60 { 61 struct hl_cs_outcome *node; 62 unsigned long flags; 63 64 /* 65 * CS outcome store supports the following operations: 66 * push outcome - store a recent CS outcome in the store 67 * pop outcome - retrieve a SPECIFIC (by seq) CS outcome from the store 68 * It uses 2 lists: used list and free list. 69 * It has a pre-allocated amount of nodes, each node stores 70 * a single CS outcome. 71 * Initially, all the nodes are in the free list. 72 * On push outcome, a node (any) is taken from the free list, its 73 * information is filled in, and the node is moved to the used list. 74 * It is possible, that there are no nodes left in the free list. 75 * In this case, we will lose some information about old outcomes. We 76 * will pop the OLDEST node from the used list, and make it free. 77 * On pop, the node is searched for in the used list (using a search 78 * index). 79 * If found, the node is then removed from the used list, and moved 80 * back to the free list. The outcome data that the node contained is 81 * returned back to the user. 82 */ 83 84 spin_lock_irqsave(&outcome_store->db_lock, flags); 85 86 if (list_empty(&outcome_store->free_list)) { 87 node = list_last_entry(&outcome_store->used_list, 88 struct hl_cs_outcome, list_link); 89 hash_del(&node->map_link); 90 dev_dbg(hdev->dev, "CS %llu outcome was lost\n", node->seq); 91 } else { 92 node = list_last_entry(&outcome_store->free_list, 93 struct hl_cs_outcome, list_link); 94 } 95 96 list_del_init(&node->list_link); 97 98 node->seq = seq; 99 node->ts = ts; 100 node->error = error; 101 102 list_add(&node->list_link, &outcome_store->used_list); 103 hash_add(outcome_store->outcome_map, &node->map_link, node->seq); 104 105 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 106 } 107 108 static bool hl_pop_cs_outcome(struct hl_cs_outcome_store *outcome_store, 109 u64 seq, ktime_t *ts, int *error) 110 { 111 struct hl_cs_outcome *node; 112 unsigned long flags; 113 114 spin_lock_irqsave(&outcome_store->db_lock, flags); 115 116 hash_for_each_possible(outcome_store->outcome_map, node, map_link, seq) 117 if (node->seq == seq) { 118 *ts = node->ts; 119 *error = node->error; 120 121 hash_del(&node->map_link); 122 list_del_init(&node->list_link); 123 list_add(&node->list_link, &outcome_store->free_list); 124 125 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 126 127 return true; 128 } 129 130 spin_unlock_irqrestore(&outcome_store->db_lock, flags); 131 132 return false; 133 } 134 135 static void hl_sob_reset(struct kref *ref) 136 { 137 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 138 kref); 139 struct hl_device *hdev = hw_sob->hdev; 140 141 dev_dbg(hdev->dev, "reset sob id %u\n", hw_sob->sob_id); 142 143 hdev->asic_funcs->reset_sob(hdev, hw_sob); 144 145 hw_sob->need_reset = false; 146 } 147 148 void hl_sob_reset_error(struct kref *ref) 149 { 150 struct hl_hw_sob *hw_sob = container_of(ref, struct hl_hw_sob, 151 kref); 152 struct hl_device *hdev = hw_sob->hdev; 153 154 dev_crit(hdev->dev, 155 "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n", 156 hw_sob->q_idx, hw_sob->sob_id); 157 } 158 159 void hw_sob_put(struct hl_hw_sob *hw_sob) 160 { 161 if (hw_sob) 162 kref_put(&hw_sob->kref, hl_sob_reset); 163 } 164 165 static void hw_sob_put_err(struct hl_hw_sob *hw_sob) 166 { 167 if (hw_sob) 168 kref_put(&hw_sob->kref, hl_sob_reset_error); 169 } 170 171 void hw_sob_get(struct hl_hw_sob *hw_sob) 172 { 173 if (hw_sob) 174 kref_get(&hw_sob->kref); 175 } 176 177 /** 178 * hl_gen_sob_mask() - Generates a sob mask to be used in a monitor arm packet 179 * @sob_base: sob base id 180 * @sob_mask: sob user mask, each bit represents a sob offset from sob base 181 * @mask: generated mask 182 * 183 * Return: 0 if given parameters are valid 184 */ 185 int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask) 186 { 187 int i; 188 189 if (sob_mask == 0) 190 return -EINVAL; 191 192 if (sob_mask == 0x1) { 193 *mask = ~(1 << (sob_base & 0x7)); 194 } else { 195 /* find msb in order to verify sob range is valid */ 196 for (i = BITS_PER_BYTE - 1 ; i >= 0 ; i--) 197 if (BIT(i) & sob_mask) 198 break; 199 200 if (i > (HL_MAX_SOBS_PER_MONITOR - (sob_base & 0x7) - 1)) 201 return -EINVAL; 202 203 *mask = ~sob_mask; 204 } 205 206 return 0; 207 } 208 209 static void hl_fence_release(struct kref *kref) 210 { 211 struct hl_fence *fence = 212 container_of(kref, struct hl_fence, refcount); 213 struct hl_cs_compl *hl_cs_cmpl = 214 container_of(fence, struct hl_cs_compl, base_fence); 215 216 kfree(hl_cs_cmpl); 217 } 218 219 void hl_fence_put(struct hl_fence *fence) 220 { 221 if (IS_ERR_OR_NULL(fence)) 222 return; 223 kref_put(&fence->refcount, hl_fence_release); 224 } 225 226 void hl_fences_put(struct hl_fence **fence, int len) 227 { 228 int i; 229 230 for (i = 0; i < len; i++, fence++) 231 hl_fence_put(*fence); 232 } 233 234 void hl_fence_get(struct hl_fence *fence) 235 { 236 if (fence) 237 kref_get(&fence->refcount); 238 } 239 240 static void hl_fence_init(struct hl_fence *fence, u64 sequence) 241 { 242 kref_init(&fence->refcount); 243 fence->cs_sequence = sequence; 244 fence->error = 0; 245 fence->timestamp = ktime_set(0, 0); 246 fence->mcs_handling_done = false; 247 init_completion(&fence->completion); 248 } 249 250 void cs_get(struct hl_cs *cs) 251 { 252 kref_get(&cs->refcount); 253 } 254 255 static int cs_get_unless_zero(struct hl_cs *cs) 256 { 257 return kref_get_unless_zero(&cs->refcount); 258 } 259 260 static void cs_put(struct hl_cs *cs) 261 { 262 kref_put(&cs->refcount, cs_do_release); 263 } 264 265 static void cs_job_do_release(struct kref *ref) 266 { 267 struct hl_cs_job *job = container_of(ref, struct hl_cs_job, refcount); 268 269 kfree(job); 270 } 271 272 static void hl_cs_job_put(struct hl_cs_job *job) 273 { 274 kref_put(&job->refcount, cs_job_do_release); 275 } 276 277 bool cs_needs_completion(struct hl_cs *cs) 278 { 279 /* In case this is a staged CS, only the last CS in sequence should 280 * get a completion, any non staged CS will always get a completion 281 */ 282 if (cs->staged_cs && !cs->staged_last) 283 return false; 284 285 return true; 286 } 287 288 bool cs_needs_timeout(struct hl_cs *cs) 289 { 290 /* In case this is a staged CS, only the first CS in sequence should 291 * get a timeout, any non staged CS will always get a timeout 292 */ 293 if (cs->staged_cs && !cs->staged_first) 294 return false; 295 296 return true; 297 } 298 299 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job) 300 { 301 /* Patched CB is created for external queues jobs */ 302 return (job->queue_type == QUEUE_TYPE_EXT); 303 } 304 305 /* 306 * cs_parser - parse the user command submission 307 * 308 * @hpriv : pointer to the private data of the fd 309 * @job : pointer to the job that holds the command submission info 310 * 311 * The function parses the command submission of the user. It calls the 312 * ASIC specific parser, which returns a list of memory blocks to send 313 * to the device as different command buffers 314 * 315 */ 316 static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job) 317 { 318 struct hl_device *hdev = hpriv->hdev; 319 struct hl_cs_parser parser; 320 int rc; 321 322 parser.ctx_id = job->cs->ctx->asid; 323 parser.cs_sequence = job->cs->sequence; 324 parser.job_id = job->id; 325 326 parser.hw_queue_id = job->hw_queue_id; 327 parser.job_userptr_list = &job->userptr_list; 328 parser.patched_cb = NULL; 329 parser.user_cb = job->user_cb; 330 parser.user_cb_size = job->user_cb_size; 331 parser.queue_type = job->queue_type; 332 parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb; 333 job->patched_cb = NULL; 334 parser.completion = cs_needs_completion(job->cs); 335 336 rc = hdev->asic_funcs->cs_parser(hdev, &parser); 337 338 if (is_cb_patched(hdev, job)) { 339 if (!rc) { 340 job->patched_cb = parser.patched_cb; 341 job->job_cb_size = parser.patched_cb_size; 342 job->contains_dma_pkt = parser.contains_dma_pkt; 343 atomic_inc(&job->patched_cb->cs_cnt); 344 } 345 346 /* 347 * Whether the parsing worked or not, we don't need the 348 * original CB anymore because it was already parsed and 349 * won't be accessed again for this CS 350 */ 351 atomic_dec(&job->user_cb->cs_cnt); 352 hl_cb_put(job->user_cb); 353 job->user_cb = NULL; 354 } else if (!rc) { 355 job->job_cb_size = job->user_cb_size; 356 } 357 358 return rc; 359 } 360 361 static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job) 362 { 363 struct hl_cs *cs = job->cs; 364 365 if (is_cb_patched(hdev, job)) { 366 hl_userptr_delete_list(hdev, &job->userptr_list); 367 368 /* 369 * We might arrive here from rollback and patched CB wasn't 370 * created, so we need to check it's not NULL 371 */ 372 if (job->patched_cb) { 373 atomic_dec(&job->patched_cb->cs_cnt); 374 hl_cb_put(job->patched_cb); 375 } 376 } 377 378 /* For H/W queue jobs, if a user CB was allocated by driver, 379 * the user CB isn't released in cs_parser() and thus should be 380 * released here. This is also true for INT queues jobs which were 381 * allocated by driver. 382 */ 383 if (job->is_kernel_allocated_cb && 384 (job->queue_type == QUEUE_TYPE_HW || job->queue_type == QUEUE_TYPE_INT)) { 385 atomic_dec(&job->user_cb->cs_cnt); 386 hl_cb_put(job->user_cb); 387 } 388 389 /* 390 * This is the only place where there can be multiple threads 391 * modifying the list at the same time 392 */ 393 spin_lock(&cs->job_lock); 394 list_del(&job->cs_node); 395 spin_unlock(&cs->job_lock); 396 397 hl_debugfs_remove_job(hdev, job); 398 399 /* We decrement reference only for a CS that gets completion 400 * because the reference was incremented only for this kind of CS 401 * right before it was scheduled. 402 * 403 * In staged submission, only the last CS marked as 'staged_last' 404 * gets completion, hence its release function will be called from here. 405 * As for all the rest CS's in the staged submission which do not get 406 * completion, their CS reference will be decremented by the 407 * 'staged_last' CS during the CS release flow. 408 * All relevant PQ CI counters will be incremented during the CS release 409 * flow by calling 'hl_hw_queue_update_ci'. 410 */ 411 if (cs_needs_completion(cs) && 412 (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { 413 414 /* In CS based completions, the timestamp is already available, 415 * so no need to extract it from job 416 */ 417 if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) 418 cs->completion_timestamp = job->timestamp; 419 420 cs_put(cs); 421 } 422 423 hl_cs_job_put(job); 424 } 425 426 /* 427 * hl_staged_cs_find_first - locate the first CS in this staged submission 428 * 429 * @hdev: pointer to device structure 430 * @cs_seq: staged submission sequence number 431 * 432 * @note: This function must be called under 'hdev->cs_mirror_lock' 433 * 434 * Find and return a CS pointer with the given sequence 435 */ 436 struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq) 437 { 438 struct hl_cs *cs; 439 440 list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node) 441 if (cs->staged_cs && cs->staged_first && 442 cs->sequence == cs_seq) 443 return cs; 444 445 return NULL; 446 } 447 448 /* 449 * is_staged_cs_last_exists - returns true if the last CS in sequence exists 450 * 451 * @hdev: pointer to device structure 452 * @cs: staged submission member 453 * 454 */ 455 bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs) 456 { 457 struct hl_cs *last_entry; 458 459 last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs, 460 staged_cs_node); 461 462 if (last_entry->staged_last) 463 return true; 464 465 return false; 466 } 467 468 /* 469 * staged_cs_get - get CS reference if this CS is a part of a staged CS 470 * 471 * @hdev: pointer to device structure 472 * @cs: current CS 473 * @cs_seq: staged submission sequence number 474 * 475 * Increment CS reference for every CS in this staged submission except for 476 * the CS which get completion. 477 */ 478 static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs) 479 { 480 /* Only the last CS in this staged submission will get a completion. 481 * We must increment the reference for all other CS's in this 482 * staged submission. 483 * Once we get a completion we will release the whole staged submission. 484 */ 485 if (!cs->staged_last) 486 cs_get(cs); 487 } 488 489 /* 490 * staged_cs_put - put a CS in case it is part of staged submission 491 * 492 * @hdev: pointer to device structure 493 * @cs: CS to put 494 * 495 * This function decrements a CS reference (for a non completion CS) 496 */ 497 static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs) 498 { 499 /* We release all CS's in a staged submission except the last 500 * CS which we have never incremented its reference. 501 */ 502 if (!cs_needs_completion(cs)) 503 cs_put(cs); 504 } 505 506 static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs) 507 { 508 struct hl_cs *next = NULL, *iter, *first_cs; 509 510 if (!cs_needs_timeout(cs)) 511 return; 512 513 spin_lock(&hdev->cs_mirror_lock); 514 515 /* We need to handle tdr only once for the complete staged submission. 516 * Hence, we choose the CS that reaches this function first which is 517 * the CS marked as 'staged_last'. 518 * In case single staged cs was submitted which has both first and last 519 * indications, then "cs_find_first" below will return NULL, since we 520 * removed the cs node from the list before getting here, 521 * in such cases just continue with the cs to cancel it's TDR work. 522 */ 523 if (cs->staged_cs && cs->staged_last) { 524 first_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence); 525 if (first_cs) 526 cs = first_cs; 527 } 528 529 spin_unlock(&hdev->cs_mirror_lock); 530 531 /* Don't cancel TDR in case this CS was timedout because we might be 532 * running from the TDR context 533 */ 534 if (cs->timedout || hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT) 535 return; 536 537 if (cs->tdr_active) 538 cancel_delayed_work_sync(&cs->work_tdr); 539 540 spin_lock(&hdev->cs_mirror_lock); 541 542 /* queue TDR for next CS */ 543 list_for_each_entry(iter, &hdev->cs_mirror_list, mirror_node) 544 if (cs_needs_timeout(iter)) { 545 next = iter; 546 break; 547 } 548 549 if (next && !next->tdr_active) { 550 next->tdr_active = true; 551 schedule_delayed_work(&next->work_tdr, next->timeout_jiffies); 552 } 553 554 spin_unlock(&hdev->cs_mirror_lock); 555 } 556 557 /* 558 * force_complete_multi_cs - complete all contexts that wait on multi-CS 559 * 560 * @hdev: pointer to habanalabs device structure 561 */ 562 static void force_complete_multi_cs(struct hl_device *hdev) 563 { 564 int i; 565 566 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 567 struct multi_cs_completion *mcs_compl; 568 569 mcs_compl = &hdev->multi_cs_completion[i]; 570 571 spin_lock(&mcs_compl->lock); 572 573 if (!mcs_compl->used) { 574 spin_unlock(&mcs_compl->lock); 575 continue; 576 } 577 578 /* when calling force complete no context should be waiting on 579 * multi-cS. 580 * We are calling the function as a protection for such case 581 * to free any pending context and print error message 582 */ 583 dev_err(hdev->dev, 584 "multi-CS completion context %d still waiting when calling force completion\n", 585 i); 586 complete_all(&mcs_compl->completion); 587 spin_unlock(&mcs_compl->lock); 588 } 589 } 590 591 /* 592 * complete_multi_cs - complete all waiting entities on multi-CS 593 * 594 * @hdev: pointer to habanalabs device structure 595 * @cs: CS structure 596 * The function signals a waiting entity that has an overlapping stream masters 597 * with the completed CS. 598 * For example: 599 * - a completed CS worked on stream master QID 4, multi CS completion 600 * is actively waiting on stream master QIDs 3, 5. don't send signal as no 601 * common stream master QID 602 * - a completed CS worked on stream master QID 4, multi CS completion 603 * is actively waiting on stream master QIDs 3, 4. send signal as stream 604 * master QID 4 is common 605 */ 606 static void complete_multi_cs(struct hl_device *hdev, struct hl_cs *cs) 607 { 608 struct hl_fence *fence = cs->fence; 609 int i; 610 611 /* in case of multi CS check for completion only for the first CS */ 612 if (cs->staged_cs && !cs->staged_first) 613 return; 614 615 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 616 struct multi_cs_completion *mcs_compl; 617 618 mcs_compl = &hdev->multi_cs_completion[i]; 619 if (!mcs_compl->used) 620 continue; 621 622 spin_lock(&mcs_compl->lock); 623 624 /* 625 * complete if: 626 * 1. still waiting for completion 627 * 2. the completed CS has at least one overlapping stream 628 * master with the stream masters in the completion 629 */ 630 if (mcs_compl->used && 631 (fence->stream_master_qid_map & 632 mcs_compl->stream_master_qid_map)) { 633 /* extract the timestamp only of first completed CS */ 634 if (!mcs_compl->timestamp) 635 mcs_compl->timestamp = ktime_to_ns(fence->timestamp); 636 637 complete_all(&mcs_compl->completion); 638 639 /* 640 * Setting mcs_handling_done inside the lock ensures 641 * at least one fence have mcs_handling_done set to 642 * true before wait for mcs finish. This ensures at 643 * least one CS will be set as completed when polling 644 * mcs fences. 645 */ 646 fence->mcs_handling_done = true; 647 } 648 649 spin_unlock(&mcs_compl->lock); 650 } 651 /* In case CS completed without mcs completion initialized */ 652 fence->mcs_handling_done = true; 653 } 654 655 static inline void cs_release_sob_reset_handler(struct hl_device *hdev, 656 struct hl_cs *cs, 657 struct hl_cs_compl *hl_cs_cmpl) 658 { 659 /* Skip this handler if the cs wasn't submitted, to avoid putting 660 * the hw_sob twice, since this case already handled at this point, 661 * also skip if the hw_sob pointer wasn't set. 662 */ 663 if (!hl_cs_cmpl->hw_sob || !cs->submitted) 664 return; 665 666 spin_lock(&hl_cs_cmpl->lock); 667 668 /* 669 * we get refcount upon reservation of signals or signal/wait cs for the 670 * hw_sob object, and need to put it when the first staged cs 671 * (which contains the encaps signals) or cs signal/wait is completed. 672 */ 673 if ((hl_cs_cmpl->type == CS_TYPE_SIGNAL) || 674 (hl_cs_cmpl->type == CS_TYPE_WAIT) || 675 (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) || 676 (!!hl_cs_cmpl->encaps_signals)) { 677 dev_dbg(hdev->dev, 678 "CS 0x%llx type %d finished, sob_id: %d, sob_val: %u\n", 679 hl_cs_cmpl->cs_seq, 680 hl_cs_cmpl->type, 681 hl_cs_cmpl->hw_sob->sob_id, 682 hl_cs_cmpl->sob_val); 683 684 hw_sob_put(hl_cs_cmpl->hw_sob); 685 686 if (hl_cs_cmpl->type == CS_TYPE_COLLECTIVE_WAIT) 687 hdev->asic_funcs->reset_sob_group(hdev, 688 hl_cs_cmpl->sob_group); 689 } 690 691 spin_unlock(&hl_cs_cmpl->lock); 692 } 693 694 static void cs_do_release(struct kref *ref) 695 { 696 struct hl_cs *cs = container_of(ref, struct hl_cs, refcount); 697 struct hl_device *hdev = cs->ctx->hdev; 698 struct hl_cs_job *job, *tmp; 699 struct hl_cs_compl *hl_cs_cmpl = 700 container_of(cs->fence, struct hl_cs_compl, base_fence); 701 702 cs->completed = true; 703 704 /* 705 * Although if we reached here it means that all external jobs have 706 * finished, because each one of them took refcnt to CS, we still 707 * need to go over the internal jobs and complete them. Otherwise, we 708 * will have leaked memory and what's worse, the CS object (and 709 * potentially the CTX object) could be released, while the JOB 710 * still holds a pointer to them (but no reference). 711 */ 712 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 713 hl_complete_job(hdev, job); 714 715 if (!cs->submitted) { 716 /* 717 * In case the wait for signal CS was submitted, the fence put 718 * occurs in init_signal_wait_cs() or collective_wait_init_cs() 719 * right before hanging on the PQ. 720 */ 721 if (cs->type == CS_TYPE_WAIT || 722 cs->type == CS_TYPE_COLLECTIVE_WAIT) 723 hl_fence_put(cs->signal_fence); 724 725 goto out; 726 } 727 728 /* Need to update CI for all queue jobs that does not get completion */ 729 hl_hw_queue_update_ci(cs); 730 731 /* remove CS from CS mirror list */ 732 spin_lock(&hdev->cs_mirror_lock); 733 list_del_init(&cs->mirror_node); 734 spin_unlock(&hdev->cs_mirror_lock); 735 736 cs_handle_tdr(hdev, cs); 737 738 if (cs->staged_cs) { 739 /* the completion CS decrements reference for the entire 740 * staged submission 741 */ 742 if (cs->staged_last) { 743 struct hl_cs *staged_cs, *tmp_cs; 744 745 list_for_each_entry_safe(staged_cs, tmp_cs, 746 &cs->staged_cs_node, staged_cs_node) 747 staged_cs_put(hdev, staged_cs); 748 } 749 750 /* A staged CS will be a member in the list only after it 751 * was submitted. We used 'cs_mirror_lock' when inserting 752 * it to list so we will use it again when removing it 753 */ 754 if (cs->submitted) { 755 spin_lock(&hdev->cs_mirror_lock); 756 list_del(&cs->staged_cs_node); 757 spin_unlock(&hdev->cs_mirror_lock); 758 } 759 760 /* decrement refcount to handle when first staged cs 761 * with encaps signals is completed. 762 */ 763 if (hl_cs_cmpl->encaps_signals) 764 kref_put(&hl_cs_cmpl->encaps_sig_hdl->refcount, 765 hl_encaps_release_handle_and_put_ctx); 766 } 767 768 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) && cs->encaps_signals) 769 kref_put(&cs->encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 770 771 out: 772 /* Must be called before hl_ctx_put because inside we use ctx to get 773 * the device 774 */ 775 hl_debugfs_remove_cs(cs); 776 777 hdev->shadow_cs_queue[cs->sequence & (hdev->asic_prop.max_pending_cs - 1)] = NULL; 778 779 /* We need to mark an error for not submitted because in that case 780 * the hl fence release flow is different. Mainly, we don't need 781 * to handle hw_sob for signal/wait 782 */ 783 if (cs->timedout) 784 cs->fence->error = -ETIMEDOUT; 785 else if (cs->aborted) 786 cs->fence->error = -EIO; 787 else if (!cs->submitted) 788 cs->fence->error = -EBUSY; 789 790 if (unlikely(cs->skip_reset_on_timeout)) { 791 dev_err(hdev->dev, 792 "Command submission %llu completed after %llu (s)\n", 793 cs->sequence, 794 div_u64(jiffies - cs->submission_time_jiffies, HZ)); 795 } 796 797 if (cs->timestamp) { 798 cs->fence->timestamp = cs->completion_timestamp; 799 hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, 800 cs->fence->timestamp, cs->fence->error); 801 } 802 803 hl_ctx_put(cs->ctx); 804 805 complete_all(&cs->fence->completion); 806 complete_multi_cs(hdev, cs); 807 808 cs_release_sob_reset_handler(hdev, cs, hl_cs_cmpl); 809 810 hl_fence_put(cs->fence); 811 812 kfree(cs->jobs_in_queue_cnt); 813 kfree(cs); 814 } 815 816 static void cs_timedout(struct work_struct *work) 817 { 818 struct hl_cs *cs = container_of(work, struct hl_cs, work_tdr.work); 819 bool skip_reset_on_timeout, device_reset = false; 820 struct hl_device *hdev; 821 u64 event_mask = 0x0; 822 uint timeout_sec; 823 int rc; 824 825 skip_reset_on_timeout = cs->skip_reset_on_timeout; 826 827 rc = cs_get_unless_zero(cs); 828 if (!rc) 829 return; 830 831 if ((!cs->submitted) || (cs->completed)) { 832 cs_put(cs); 833 return; 834 } 835 836 hdev = cs->ctx->hdev; 837 838 if (likely(!skip_reset_on_timeout)) { 839 if (hdev->reset_on_lockup) 840 device_reset = true; 841 else 842 hdev->reset_info.needs_reset = true; 843 844 /* Mark the CS is timed out so we won't try to cancel its TDR */ 845 cs->timedout = true; 846 } 847 848 /* Save only the first CS timeout parameters */ 849 rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); 850 if (rc) { 851 hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); 852 hdev->captured_err_info.cs_timeout.seq = cs->sequence; 853 event_mask |= HL_NOTIFIER_EVENT_CS_TIMEOUT; 854 } 855 856 timeout_sec = jiffies_to_msecs(hdev->timeout_jiffies) / 1000; 857 858 switch (cs->type) { 859 case CS_TYPE_SIGNAL: 860 dev_err(hdev->dev, 861 "Signal command submission %llu has not finished in %u seconds!\n", 862 cs->sequence, timeout_sec); 863 break; 864 865 case CS_TYPE_WAIT: 866 dev_err(hdev->dev, 867 "Wait command submission %llu has not finished in %u seconds!\n", 868 cs->sequence, timeout_sec); 869 break; 870 871 case CS_TYPE_COLLECTIVE_WAIT: 872 dev_err(hdev->dev, 873 "Collective Wait command submission %llu has not finished in %u seconds!\n", 874 cs->sequence, timeout_sec); 875 break; 876 877 default: 878 dev_err(hdev->dev, 879 "Command submission %llu has not finished in %u seconds!\n", 880 cs->sequence, timeout_sec); 881 break; 882 } 883 884 rc = hl_state_dump(hdev); 885 if (rc) 886 dev_err(hdev->dev, "Error during system state dump %d\n", rc); 887 888 cs_put(cs); 889 890 if (device_reset) { 891 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 892 hl_device_cond_reset(hdev, HL_DRV_RESET_TDR, event_mask); 893 } else if (event_mask) { 894 hl_notifier_event_send_all(hdev, event_mask); 895 } 896 } 897 898 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx, 899 enum hl_cs_type cs_type, u64 user_sequence, 900 struct hl_cs **cs_new, u32 flags, u32 timeout) 901 { 902 struct hl_cs_counters_atomic *cntr; 903 struct hl_fence *other = NULL; 904 struct hl_cs_compl *cs_cmpl; 905 struct hl_cs *cs; 906 int rc; 907 908 cntr = &hdev->aggregated_cs_counters; 909 910 cs = kzalloc(sizeof(*cs), GFP_ATOMIC); 911 if (!cs) 912 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 913 914 if (!cs) { 915 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 916 atomic64_inc(&cntr->out_of_mem_drop_cnt); 917 return -ENOMEM; 918 } 919 920 /* increment refcnt for context */ 921 hl_ctx_get(ctx); 922 923 cs->ctx = ctx; 924 cs->submitted = false; 925 cs->completed = false; 926 cs->type = cs_type; 927 cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP); 928 cs->encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 929 cs->timeout_jiffies = timeout; 930 cs->skip_reset_on_timeout = 931 hdev->reset_info.skip_reset_on_timeout || 932 !!(flags & HL_CS_FLAGS_SKIP_RESET_ON_TIMEOUT); 933 cs->submission_time_jiffies = jiffies; 934 INIT_LIST_HEAD(&cs->job_list); 935 INIT_DELAYED_WORK(&cs->work_tdr, cs_timedout); 936 kref_init(&cs->refcount); 937 spin_lock_init(&cs->job_lock); 938 939 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_ATOMIC); 940 if (!cs_cmpl) 941 cs_cmpl = kzalloc(sizeof(*cs_cmpl), GFP_KERNEL); 942 943 if (!cs_cmpl) { 944 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 945 atomic64_inc(&cntr->out_of_mem_drop_cnt); 946 rc = -ENOMEM; 947 goto free_cs; 948 } 949 950 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 951 sizeof(*cs->jobs_in_queue_cnt), GFP_ATOMIC); 952 if (!cs->jobs_in_queue_cnt) 953 cs->jobs_in_queue_cnt = kcalloc(hdev->asic_prop.max_queues, 954 sizeof(*cs->jobs_in_queue_cnt), GFP_KERNEL); 955 956 if (!cs->jobs_in_queue_cnt) { 957 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 958 atomic64_inc(&cntr->out_of_mem_drop_cnt); 959 rc = -ENOMEM; 960 goto free_cs_cmpl; 961 } 962 963 cs_cmpl->hdev = hdev; 964 cs_cmpl->type = cs->type; 965 spin_lock_init(&cs_cmpl->lock); 966 cs->fence = &cs_cmpl->base_fence; 967 968 spin_lock(&ctx->cs_lock); 969 970 cs_cmpl->cs_seq = ctx->cs_sequence; 971 other = ctx->cs_pending[cs_cmpl->cs_seq & 972 (hdev->asic_prop.max_pending_cs - 1)]; 973 974 if (other && !completion_done(&other->completion)) { 975 /* If the following statement is true, it means we have reached 976 * a point in which only part of the staged submission was 977 * submitted and we don't have enough room in the 'cs_pending' 978 * array for the rest of the submission. 979 * This causes a deadlock because this CS will never be 980 * completed as it depends on future CS's for completion. 981 */ 982 if (other->cs_sequence == user_sequence) 983 dev_crit_ratelimited(hdev->dev, 984 "Staged CS %llu deadlock due to lack of resources", 985 user_sequence); 986 987 dev_dbg_ratelimited(hdev->dev, 988 "Rejecting CS because of too many in-flights CS\n"); 989 atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt); 990 atomic64_inc(&cntr->max_cs_in_flight_drop_cnt); 991 rc = -EAGAIN; 992 goto free_fence; 993 } 994 995 /* init hl_fence */ 996 hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq); 997 998 cs->sequence = cs_cmpl->cs_seq; 999 1000 ctx->cs_pending[cs_cmpl->cs_seq & 1001 (hdev->asic_prop.max_pending_cs - 1)] = 1002 &cs_cmpl->base_fence; 1003 ctx->cs_sequence++; 1004 1005 hl_fence_get(&cs_cmpl->base_fence); 1006 1007 hl_fence_put(other); 1008 1009 spin_unlock(&ctx->cs_lock); 1010 1011 *cs_new = cs; 1012 1013 return 0; 1014 1015 free_fence: 1016 spin_unlock(&ctx->cs_lock); 1017 kfree(cs->jobs_in_queue_cnt); 1018 free_cs_cmpl: 1019 kfree(cs_cmpl); 1020 free_cs: 1021 kfree(cs); 1022 hl_ctx_put(ctx); 1023 return rc; 1024 } 1025 1026 static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs) 1027 { 1028 struct hl_cs_job *job, *tmp; 1029 1030 staged_cs_put(hdev, cs); 1031 1032 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 1033 hl_complete_job(hdev, job); 1034 } 1035 1036 /* 1037 * release_reserved_encaps_signals() - release reserved encapsulated signals. 1038 * @hdev: pointer to habanalabs device structure 1039 * 1040 * Release reserved encapsulated signals which weren't un-reserved, or for which a CS with 1041 * encapsulated signals wasn't submitted and thus weren't released as part of CS roll-back. 1042 * For these signals need also to put the refcount of the H/W SOB which was taken at the 1043 * reservation. 1044 */ 1045 static void release_reserved_encaps_signals(struct hl_device *hdev) 1046 { 1047 struct hl_ctx *ctx = hl_get_compute_ctx(hdev); 1048 struct hl_cs_encaps_sig_handle *handle; 1049 struct hl_encaps_signals_mgr *mgr; 1050 u32 id; 1051 1052 if (!ctx) 1053 return; 1054 1055 mgr = &ctx->sig_mgr; 1056 1057 idr_for_each_entry(&mgr->handles, handle, id) 1058 if (handle->cs_seq == ULLONG_MAX) 1059 kref_put(&handle->refcount, hl_encaps_release_handle_and_put_sob_ctx); 1060 1061 hl_ctx_put(ctx); 1062 } 1063 1064 void hl_cs_rollback_all(struct hl_device *hdev, bool skip_wq_flush) 1065 { 1066 int i; 1067 struct hl_cs *cs, *tmp; 1068 1069 if (!skip_wq_flush) { 1070 flush_workqueue(hdev->ts_free_obj_wq); 1071 1072 /* flush all completions before iterating over the CS mirror list in 1073 * order to avoid a race with the release functions 1074 */ 1075 for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++) 1076 flush_workqueue(hdev->cq_wq[i]); 1077 1078 flush_workqueue(hdev->cs_cmplt_wq); 1079 } 1080 1081 /* Make sure we don't have leftovers in the CS mirror list */ 1082 list_for_each_entry_safe(cs, tmp, &hdev->cs_mirror_list, mirror_node) { 1083 cs_get(cs); 1084 cs->aborted = true; 1085 dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n", 1086 cs->ctx->asid, cs->sequence); 1087 cs_rollback(hdev, cs); 1088 cs_put(cs); 1089 } 1090 1091 force_complete_multi_cs(hdev); 1092 1093 release_reserved_encaps_signals(hdev); 1094 } 1095 1096 static void 1097 wake_pending_user_interrupt_threads(struct hl_user_interrupt *interrupt) 1098 { 1099 struct hl_user_pending_interrupt *pend, *temp; 1100 unsigned long flags; 1101 1102 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 1103 list_for_each_entry_safe(pend, temp, &interrupt->wait_list_head, list_node) { 1104 pend->fence.error = -EIO; 1105 complete_all(&pend->fence.completion); 1106 } 1107 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 1108 1109 spin_lock_irqsave(&interrupt->ts_list_lock, flags); 1110 list_for_each_entry_safe(pend, temp, &interrupt->ts_list_head, list_node) { 1111 list_del(&pend->list_node); 1112 hl_mmap_mem_buf_put(pend->ts_reg_info.buf); 1113 hl_cb_put(pend->ts_reg_info.cq_cb); 1114 } 1115 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); 1116 } 1117 1118 void hl_release_pending_user_interrupts(struct hl_device *hdev) 1119 { 1120 struct asic_fixed_properties *prop = &hdev->asic_prop; 1121 struct hl_user_interrupt *interrupt; 1122 int i; 1123 1124 if (!prop->user_interrupt_count) 1125 return; 1126 1127 /* We iterate through the user interrupt requests and waking up all 1128 * user threads waiting for interrupt completion. We iterate the 1129 * list under a lock, this is why all user threads, once awake, 1130 * will wait on the same lock and will release the waiting object upon 1131 * unlock. 1132 */ 1133 1134 for (i = 0 ; i < prop->user_interrupt_count ; i++) { 1135 interrupt = &hdev->user_interrupt[i]; 1136 wake_pending_user_interrupt_threads(interrupt); 1137 } 1138 1139 interrupt = &hdev->common_user_cq_interrupt; 1140 wake_pending_user_interrupt_threads(interrupt); 1141 1142 interrupt = &hdev->common_decoder_interrupt; 1143 wake_pending_user_interrupt_threads(interrupt); 1144 } 1145 1146 static void force_complete_cs(struct hl_device *hdev) 1147 { 1148 struct hl_cs *cs; 1149 1150 spin_lock(&hdev->cs_mirror_lock); 1151 1152 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) { 1153 cs->fence->error = -EIO; 1154 complete_all(&cs->fence->completion); 1155 } 1156 1157 spin_unlock(&hdev->cs_mirror_lock); 1158 } 1159 1160 void hl_abort_waiting_for_cs_completions(struct hl_device *hdev) 1161 { 1162 force_complete_cs(hdev); 1163 force_complete_multi_cs(hdev); 1164 } 1165 1166 static void job_wq_completion(struct work_struct *work) 1167 { 1168 struct hl_cs_job *job = container_of(work, struct hl_cs_job, 1169 finish_work); 1170 struct hl_cs *cs = job->cs; 1171 struct hl_device *hdev = cs->ctx->hdev; 1172 1173 /* job is no longer needed */ 1174 hl_complete_job(hdev, job); 1175 } 1176 1177 static void cs_completion(struct work_struct *work) 1178 { 1179 struct hl_cs *cs = container_of(work, struct hl_cs, finish_work); 1180 struct hl_device *hdev = cs->ctx->hdev; 1181 struct hl_cs_job *job, *tmp; 1182 1183 list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node) 1184 hl_complete_job(hdev, job); 1185 } 1186 1187 u32 hl_get_active_cs_num(struct hl_device *hdev) 1188 { 1189 u32 active_cs_num = 0; 1190 struct hl_cs *cs; 1191 1192 spin_lock(&hdev->cs_mirror_lock); 1193 1194 list_for_each_entry(cs, &hdev->cs_mirror_list, mirror_node) 1195 if (!cs->completed) 1196 active_cs_num++; 1197 1198 spin_unlock(&hdev->cs_mirror_lock); 1199 1200 return active_cs_num; 1201 } 1202 1203 static int validate_queue_index(struct hl_device *hdev, 1204 struct hl_cs_chunk *chunk, 1205 enum hl_queue_type *queue_type, 1206 bool *is_kernel_allocated_cb) 1207 { 1208 struct asic_fixed_properties *asic = &hdev->asic_prop; 1209 struct hw_queue_properties *hw_queue_prop; 1210 1211 /* This must be checked here to prevent out-of-bounds access to 1212 * hw_queues_props array 1213 */ 1214 if (chunk->queue_index >= asic->max_queues) { 1215 dev_err(hdev->dev, "Queue index %d is invalid\n", 1216 chunk->queue_index); 1217 return -EINVAL; 1218 } 1219 1220 hw_queue_prop = &asic->hw_queues_props[chunk->queue_index]; 1221 1222 if (hw_queue_prop->type == QUEUE_TYPE_NA) { 1223 dev_err(hdev->dev, "Queue index %d is not applicable\n", 1224 chunk->queue_index); 1225 return -EINVAL; 1226 } 1227 1228 if (hw_queue_prop->binned) { 1229 dev_err(hdev->dev, "Queue index %d is binned out\n", 1230 chunk->queue_index); 1231 return -EINVAL; 1232 } 1233 1234 if (hw_queue_prop->driver_only) { 1235 dev_err(hdev->dev, 1236 "Queue index %d is restricted for the kernel driver\n", 1237 chunk->queue_index); 1238 return -EINVAL; 1239 } 1240 1241 /* When hw queue type isn't QUEUE_TYPE_HW, 1242 * USER_ALLOC_CB flag shall be referred as "don't care". 1243 */ 1244 if (hw_queue_prop->type == QUEUE_TYPE_HW) { 1245 if (chunk->cs_chunk_flags & HL_CS_CHUNK_FLAGS_USER_ALLOC_CB) { 1246 if (!(hw_queue_prop->cb_alloc_flags & CB_ALLOC_USER)) { 1247 dev_err(hdev->dev, 1248 "Queue index %d doesn't support user CB\n", 1249 chunk->queue_index); 1250 return -EINVAL; 1251 } 1252 1253 *is_kernel_allocated_cb = false; 1254 } else { 1255 if (!(hw_queue_prop->cb_alloc_flags & 1256 CB_ALLOC_KERNEL)) { 1257 dev_err(hdev->dev, 1258 "Queue index %d doesn't support kernel CB\n", 1259 chunk->queue_index); 1260 return -EINVAL; 1261 } 1262 1263 *is_kernel_allocated_cb = true; 1264 } 1265 } else { 1266 *is_kernel_allocated_cb = !!(hw_queue_prop->cb_alloc_flags 1267 & CB_ALLOC_KERNEL); 1268 } 1269 1270 *queue_type = hw_queue_prop->type; 1271 return 0; 1272 } 1273 1274 static struct hl_cb *get_cb_from_cs_chunk(struct hl_device *hdev, 1275 struct hl_mem_mgr *mmg, 1276 struct hl_cs_chunk *chunk) 1277 { 1278 struct hl_cb *cb; 1279 1280 cb = hl_cb_get(mmg, chunk->cb_handle); 1281 if (!cb) { 1282 dev_err(hdev->dev, "CB handle 0x%llx invalid\n", chunk->cb_handle); 1283 return NULL; 1284 } 1285 1286 if ((chunk->cb_size < 8) || (chunk->cb_size > cb->size)) { 1287 dev_err(hdev->dev, "CB size %u invalid\n", chunk->cb_size); 1288 goto release_cb; 1289 } 1290 1291 atomic_inc(&cb->cs_cnt); 1292 1293 return cb; 1294 1295 release_cb: 1296 hl_cb_put(cb); 1297 return NULL; 1298 } 1299 1300 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev, 1301 enum hl_queue_type queue_type, bool is_kernel_allocated_cb) 1302 { 1303 struct hl_cs_job *job; 1304 1305 job = kzalloc(sizeof(*job), GFP_ATOMIC); 1306 if (!job) 1307 job = kzalloc(sizeof(*job), GFP_KERNEL); 1308 1309 if (!job) 1310 return NULL; 1311 1312 kref_init(&job->refcount); 1313 job->queue_type = queue_type; 1314 job->is_kernel_allocated_cb = is_kernel_allocated_cb; 1315 1316 if (is_cb_patched(hdev, job)) 1317 INIT_LIST_HEAD(&job->userptr_list); 1318 1319 if (job->queue_type == QUEUE_TYPE_EXT) 1320 INIT_WORK(&job->finish_work, job_wq_completion); 1321 1322 return job; 1323 } 1324 1325 static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags) 1326 { 1327 if (cs_type_flags & HL_CS_FLAGS_SIGNAL) 1328 return CS_TYPE_SIGNAL; 1329 else if (cs_type_flags & HL_CS_FLAGS_WAIT) 1330 return CS_TYPE_WAIT; 1331 else if (cs_type_flags & HL_CS_FLAGS_COLLECTIVE_WAIT) 1332 return CS_TYPE_COLLECTIVE_WAIT; 1333 else if (cs_type_flags & HL_CS_FLAGS_RESERVE_SIGNALS_ONLY) 1334 return CS_RESERVE_SIGNALS; 1335 else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) 1336 return CS_UNRESERVE_SIGNALS; 1337 else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) 1338 return CS_TYPE_ENGINE_CORE; 1339 else if (cs_type_flags & HL_CS_FLAGS_ENGINES_COMMAND) 1340 return CS_TYPE_ENGINES; 1341 else if (cs_type_flags & HL_CS_FLAGS_FLUSH_PCI_HBW_WRITES) 1342 return CS_TYPE_FLUSH_PCI_HBW_WRITES; 1343 else 1344 return CS_TYPE_DEFAULT; 1345 } 1346 1347 static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args) 1348 { 1349 struct hl_device *hdev = hpriv->hdev; 1350 struct hl_ctx *ctx = hpriv->ctx; 1351 u32 cs_type_flags, num_chunks; 1352 enum hl_device_status status; 1353 enum hl_cs_type cs_type; 1354 bool is_sync_stream; 1355 int i; 1356 1357 for (i = 0 ; i < sizeof(args->in.pad) ; i++) 1358 if (args->in.pad[i]) { 1359 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 1360 return -EINVAL; 1361 } 1362 1363 if (!hl_device_operational(hdev, &status)) { 1364 return -EBUSY; 1365 } 1366 1367 if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 1368 !hdev->supports_staged_submission) { 1369 dev_err(hdev->dev, "staged submission not supported"); 1370 return -EPERM; 1371 } 1372 1373 cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK; 1374 1375 if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) { 1376 dev_err(hdev->dev, 1377 "CS type flags are mutually exclusive, context %d\n", 1378 ctx->asid); 1379 return -EINVAL; 1380 } 1381 1382 cs_type = hl_cs_get_cs_type(cs_type_flags); 1383 num_chunks = args->in.num_chunks_execute; 1384 1385 is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || 1386 cs_type == CS_TYPE_COLLECTIVE_WAIT); 1387 1388 if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { 1389 dev_err(hdev->dev, "Sync stream CS is not supported\n"); 1390 return -EINVAL; 1391 } 1392 1393 if (cs_type == CS_TYPE_DEFAULT) { 1394 if (!num_chunks) { 1395 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); 1396 return -EINVAL; 1397 } 1398 } else if (is_sync_stream && num_chunks != 1) { 1399 dev_err(hdev->dev, 1400 "Sync stream CS mandates one chunk only, context %d\n", 1401 ctx->asid); 1402 return -EINVAL; 1403 } 1404 1405 return 0; 1406 } 1407 1408 static int hl_cs_copy_chunk_array(struct hl_device *hdev, 1409 struct hl_cs_chunk **cs_chunk_array, 1410 void __user *chunks, u32 num_chunks, 1411 struct hl_ctx *ctx) 1412 { 1413 u32 size_to_copy; 1414 1415 if (num_chunks > HL_MAX_JOBS_PER_CS) { 1416 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1417 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1418 dev_err(hdev->dev, 1419 "Number of chunks can NOT be larger than %d\n", 1420 HL_MAX_JOBS_PER_CS); 1421 return -EINVAL; 1422 } 1423 1424 *cs_chunk_array = kmalloc_array(num_chunks, sizeof(**cs_chunk_array), 1425 GFP_ATOMIC); 1426 if (!*cs_chunk_array) 1427 *cs_chunk_array = kmalloc_array(num_chunks, 1428 sizeof(**cs_chunk_array), GFP_KERNEL); 1429 if (!*cs_chunk_array) { 1430 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1431 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 1432 return -ENOMEM; 1433 } 1434 1435 size_to_copy = num_chunks * sizeof(struct hl_cs_chunk); 1436 if (copy_from_user(*cs_chunk_array, chunks, size_to_copy)) { 1437 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1438 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1439 dev_err(hdev->dev, "Failed to copy cs chunk array from user\n"); 1440 kfree(*cs_chunk_array); 1441 return -EFAULT; 1442 } 1443 1444 return 0; 1445 } 1446 1447 static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs, 1448 u64 sequence, u32 flags, 1449 u32 encaps_signal_handle) 1450 { 1451 if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION)) 1452 return 0; 1453 1454 cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST); 1455 cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST); 1456 1457 if (cs->staged_first) { 1458 /* Staged CS sequence is the first CS sequence */ 1459 INIT_LIST_HEAD(&cs->staged_cs_node); 1460 cs->staged_sequence = cs->sequence; 1461 1462 if (cs->encaps_signals) 1463 cs->encaps_sig_hdl_id = encaps_signal_handle; 1464 } else { 1465 /* User sequence will be validated in 'hl_hw_queue_schedule_cs' 1466 * under the cs_mirror_lock 1467 */ 1468 cs->staged_sequence = sequence; 1469 } 1470 1471 /* Increment CS reference if needed */ 1472 staged_cs_get(hdev, cs); 1473 1474 cs->staged_cs = true; 1475 1476 return 0; 1477 } 1478 1479 static u32 get_stream_master_qid_mask(struct hl_device *hdev, u32 qid) 1480 { 1481 int i; 1482 1483 for (i = 0; i < hdev->stream_master_qid_arr_size; i++) 1484 if (qid == hdev->stream_master_qid_arr[i]) 1485 return BIT(i); 1486 1487 return 0; 1488 } 1489 1490 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks, 1491 u32 num_chunks, u64 *cs_seq, u32 flags, 1492 u32 encaps_signals_handle, u32 timeout, 1493 u16 *signal_initial_sob_count) 1494 { 1495 bool staged_mid, int_queues_only = true, using_hw_queues = false; 1496 struct hl_device *hdev = hpriv->hdev; 1497 struct hl_cs_chunk *cs_chunk_array; 1498 struct hl_cs_counters_atomic *cntr; 1499 struct hl_ctx *ctx = hpriv->ctx; 1500 struct hl_cs_job *job; 1501 struct hl_cs *cs; 1502 struct hl_cb *cb; 1503 u64 user_sequence; 1504 u8 stream_master_qid_map = 0; 1505 int rc, i; 1506 1507 cntr = &hdev->aggregated_cs_counters; 1508 user_sequence = *cs_seq; 1509 *cs_seq = ULLONG_MAX; 1510 1511 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 1512 hpriv->ctx); 1513 if (rc) 1514 goto out; 1515 1516 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 1517 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 1518 staged_mid = true; 1519 else 1520 staged_mid = false; 1521 1522 rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, 1523 staged_mid ? user_sequence : ULLONG_MAX, &cs, flags, 1524 timeout); 1525 if (rc) 1526 goto free_cs_chunk_array; 1527 1528 *cs_seq = cs->sequence; 1529 1530 hl_debugfs_add_cs(cs); 1531 1532 rc = cs_staged_submission(hdev, cs, user_sequence, flags, 1533 encaps_signals_handle); 1534 if (rc) 1535 goto free_cs_object; 1536 1537 /* If this is a staged submission we must return the staged sequence 1538 * rather than the internal CS sequence 1539 */ 1540 if (cs->staged_cs) 1541 *cs_seq = cs->staged_sequence; 1542 1543 /* Validate ALL the CS chunks before submitting the CS */ 1544 for (i = 0 ; i < num_chunks ; i++) { 1545 struct hl_cs_chunk *chunk = &cs_chunk_array[i]; 1546 enum hl_queue_type queue_type; 1547 bool is_kernel_allocated_cb; 1548 1549 rc = validate_queue_index(hdev, chunk, &queue_type, 1550 &is_kernel_allocated_cb); 1551 if (rc) { 1552 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1553 atomic64_inc(&cntr->validation_drop_cnt); 1554 goto free_cs_object; 1555 } 1556 1557 if (is_kernel_allocated_cb) { 1558 cb = get_cb_from_cs_chunk(hdev, &hpriv->mem_mgr, chunk); 1559 if (!cb) { 1560 atomic64_inc( 1561 &ctx->cs_counters.validation_drop_cnt); 1562 atomic64_inc(&cntr->validation_drop_cnt); 1563 rc = -EINVAL; 1564 goto free_cs_object; 1565 } 1566 } else { 1567 cb = (struct hl_cb *) (uintptr_t) chunk->cb_handle; 1568 } 1569 1570 if (queue_type == QUEUE_TYPE_EXT || 1571 queue_type == QUEUE_TYPE_HW) { 1572 int_queues_only = false; 1573 1574 /* 1575 * store which stream are being used for external/HW 1576 * queues of this CS 1577 */ 1578 if (hdev->supports_wait_for_multi_cs) 1579 stream_master_qid_map |= 1580 get_stream_master_qid_mask(hdev, 1581 chunk->queue_index); 1582 } 1583 1584 if (queue_type == QUEUE_TYPE_HW) 1585 using_hw_queues = true; 1586 1587 job = hl_cs_allocate_job(hdev, queue_type, 1588 is_kernel_allocated_cb); 1589 if (!job) { 1590 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1591 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1592 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1593 rc = -ENOMEM; 1594 if (is_kernel_allocated_cb) 1595 goto release_cb; 1596 1597 goto free_cs_object; 1598 } 1599 1600 job->id = i + 1; 1601 job->cs = cs; 1602 job->user_cb = cb; 1603 job->user_cb_size = chunk->cb_size; 1604 job->hw_queue_id = chunk->queue_index; 1605 1606 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1607 cs->jobs_cnt++; 1608 1609 list_add_tail(&job->cs_node, &cs->job_list); 1610 1611 /* 1612 * Increment CS reference. When CS reference is 0, CS is 1613 * done and can be signaled to user and free all its resources 1614 * Only increment for JOB on external or H/W queues, because 1615 * only for those JOBs we get completion 1616 */ 1617 if (cs_needs_completion(cs) && 1618 (job->queue_type == QUEUE_TYPE_EXT || 1619 job->queue_type == QUEUE_TYPE_HW)) 1620 cs_get(cs); 1621 1622 hl_debugfs_add_job(hdev, job); 1623 1624 rc = cs_parser(hpriv, job); 1625 if (rc) { 1626 atomic64_inc(&ctx->cs_counters.parsing_drop_cnt); 1627 atomic64_inc(&cntr->parsing_drop_cnt); 1628 dev_err(hdev->dev, 1629 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n", 1630 cs->ctx->asid, cs->sequence, job->id, rc); 1631 goto free_cs_object; 1632 } 1633 } 1634 1635 /* We allow a CS with any queue type combination as long as it does 1636 * not get a completion 1637 */ 1638 if (int_queues_only && cs_needs_completion(cs)) { 1639 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1640 atomic64_inc(&cntr->validation_drop_cnt); 1641 dev_err(hdev->dev, 1642 "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n", 1643 cs->ctx->asid, cs->sequence); 1644 rc = -EINVAL; 1645 goto free_cs_object; 1646 } 1647 1648 if (using_hw_queues) 1649 INIT_WORK(&cs->finish_work, cs_completion); 1650 1651 /* 1652 * store the (external/HW queues) streams used by the CS in the 1653 * fence object for multi-CS completion 1654 */ 1655 if (hdev->supports_wait_for_multi_cs) 1656 cs->fence->stream_master_qid_map = stream_master_qid_map; 1657 1658 rc = hl_hw_queue_schedule_cs(cs); 1659 if (rc) { 1660 if (rc != -EAGAIN) 1661 dev_err(hdev->dev, 1662 "Failed to submit CS %d.%llu to H/W queues, error %d\n", 1663 cs->ctx->asid, cs->sequence, rc); 1664 goto free_cs_object; 1665 } 1666 1667 *signal_initial_sob_count = cs->initial_sob_count; 1668 1669 rc = HL_CS_STATUS_SUCCESS; 1670 goto put_cs; 1671 1672 release_cb: 1673 atomic_dec(&cb->cs_cnt); 1674 hl_cb_put(cb); 1675 free_cs_object: 1676 cs_rollback(hdev, cs); 1677 *cs_seq = ULLONG_MAX; 1678 /* The path below is both for good and erroneous exits */ 1679 put_cs: 1680 /* We finished with the CS in this function, so put the ref */ 1681 cs_put(cs); 1682 free_cs_chunk_array: 1683 kfree(cs_chunk_array); 1684 out: 1685 return rc; 1686 } 1687 1688 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args, 1689 u64 *cs_seq) 1690 { 1691 struct hl_device *hdev = hpriv->hdev; 1692 struct hl_ctx *ctx = hpriv->ctx; 1693 bool need_soft_reset = false; 1694 int rc = 0, do_ctx_switch = 0; 1695 void __user *chunks; 1696 u32 num_chunks, tmp; 1697 u16 sob_count; 1698 int ret; 1699 1700 if (hdev->supports_ctx_switch) 1701 do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); 1702 1703 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { 1704 mutex_lock(&hpriv->restore_phase_mutex); 1705 1706 if (do_ctx_switch) { 1707 rc = hdev->asic_funcs->context_switch(hdev, ctx->asid); 1708 if (rc) { 1709 dev_err_ratelimited(hdev->dev, 1710 "Failed to switch to context %d, rejecting CS! %d\n", 1711 ctx->asid, rc); 1712 /* 1713 * If we timedout, or if the device is not IDLE 1714 * while we want to do context-switch (-EBUSY), 1715 * we need to soft-reset because QMAN is 1716 * probably stuck. However, we can't call to 1717 * reset here directly because of deadlock, so 1718 * need to do it at the very end of this 1719 * function 1720 */ 1721 if ((rc == -ETIMEDOUT) || (rc == -EBUSY)) 1722 need_soft_reset = true; 1723 mutex_unlock(&hpriv->restore_phase_mutex); 1724 goto out; 1725 } 1726 } 1727 1728 hdev->asic_funcs->restore_phase_topology(hdev); 1729 1730 chunks = (void __user *) (uintptr_t) args->in.chunks_restore; 1731 num_chunks = args->in.num_chunks_restore; 1732 1733 if (!num_chunks) { 1734 dev_dbg(hdev->dev, 1735 "Need to run restore phase but restore CS is empty\n"); 1736 rc = 0; 1737 } else { 1738 rc = cs_ioctl_default(hpriv, chunks, num_chunks, 1739 cs_seq, 0, 0, hdev->timeout_jiffies, &sob_count); 1740 } 1741 1742 mutex_unlock(&hpriv->restore_phase_mutex); 1743 1744 if (rc) { 1745 dev_err(hdev->dev, 1746 "Failed to submit restore CS for context %d (%d)\n", 1747 ctx->asid, rc); 1748 goto out; 1749 } 1750 1751 /* Need to wait for restore completion before execution phase */ 1752 if (num_chunks) { 1753 enum hl_cs_wait_status status; 1754 1755 ret = _hl_cs_wait_ioctl(hdev, ctx, 1756 jiffies_to_usecs(hdev->timeout_jiffies), 1757 *cs_seq, &status, NULL); 1758 if (ret) { 1759 dev_err(hdev->dev, 1760 "Restore CS for context %d failed to complete %d\n", 1761 ctx->asid, ret); 1762 rc = -ENOEXEC; 1763 goto out; 1764 } 1765 } 1766 1767 if (hdev->supports_ctx_switch) 1768 ctx->thread_ctx_switch_wait_token = 1; 1769 1770 } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { 1771 rc = hl_poll_timeout_memory(hdev, 1772 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), 1773 100, jiffies_to_usecs(hdev->timeout_jiffies), false); 1774 1775 if (rc == -ETIMEDOUT) { 1776 dev_err(hdev->dev, 1777 "context switch phase timeout (%d)\n", tmp); 1778 goto out; 1779 } 1780 } 1781 1782 out: 1783 if ((rc == -ETIMEDOUT || rc == -EBUSY) && (need_soft_reset)) 1784 hl_device_reset(hdev, 0); 1785 1786 return rc; 1787 } 1788 1789 /* 1790 * hl_cs_signal_sob_wraparound_handler: handle SOB value wrapaound case. 1791 * if the SOB value reaches the max value move to the other SOB reserved 1792 * to the queue. 1793 * @hdev: pointer to device structure 1794 * @q_idx: stream queue index 1795 * @hw_sob: the H/W SOB used in this signal CS. 1796 * @count: signals count 1797 * @encaps_sig: tells whether it's reservation for encaps signals or not. 1798 * 1799 * Note that this function must be called while hw_queues_lock is taken. 1800 */ 1801 int hl_cs_signal_sob_wraparound_handler(struct hl_device *hdev, u32 q_idx, 1802 struct hl_hw_sob **hw_sob, u32 count, bool encaps_sig) 1803 1804 { 1805 struct hl_sync_stream_properties *prop; 1806 struct hl_hw_sob *sob = *hw_sob, *other_sob; 1807 u8 other_sob_offset; 1808 1809 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 1810 1811 hw_sob_get(sob); 1812 1813 /* check for wraparound */ 1814 if (prop->next_sob_val + count >= HL_MAX_SOB_VAL) { 1815 /* 1816 * Decrement as we reached the max value. 1817 * The release function won't be called here as we've 1818 * just incremented the refcount right before calling this 1819 * function. 1820 */ 1821 hw_sob_put_err(sob); 1822 1823 /* 1824 * check the other sob value, if it still in use then fail 1825 * otherwise make the switch 1826 */ 1827 other_sob_offset = (prop->curr_sob_offset + 1) % HL_RSVD_SOBS; 1828 other_sob = &prop->hw_sob[other_sob_offset]; 1829 1830 if (kref_read(&other_sob->kref) != 1) { 1831 dev_err(hdev->dev, "error: Cannot switch SOBs q_idx: %d\n", 1832 q_idx); 1833 return -EINVAL; 1834 } 1835 1836 /* 1837 * next_sob_val always points to the next available signal 1838 * in the sob, so in encaps signals it will be the next one 1839 * after reserving the required amount. 1840 */ 1841 if (encaps_sig) 1842 prop->next_sob_val = count + 1; 1843 else 1844 prop->next_sob_val = count; 1845 1846 /* only two SOBs are currently in use */ 1847 prop->curr_sob_offset = other_sob_offset; 1848 *hw_sob = other_sob; 1849 1850 /* 1851 * check if other_sob needs reset, then do it before using it 1852 * for the reservation or the next signal cs. 1853 * we do it here, and for both encaps and regular signal cs 1854 * cases in order to avoid possible races of two kref_put 1855 * of the sob which can occur at the same time if we move the 1856 * sob reset(kref_put) to cs_do_release function. 1857 * in addition, if we have combination of cs signal and 1858 * encaps, and at the point we need to reset the sob there was 1859 * no more reservations and only signal cs keep coming, 1860 * in such case we need signal_cs to put the refcount and 1861 * reset the sob. 1862 */ 1863 if (other_sob->need_reset) 1864 hw_sob_put(other_sob); 1865 1866 if (encaps_sig) { 1867 /* set reset indication for the sob */ 1868 sob->need_reset = true; 1869 hw_sob_get(other_sob); 1870 } 1871 1872 dev_dbg(hdev->dev, "switched to SOB %d, q_idx: %d\n", 1873 prop->curr_sob_offset, q_idx); 1874 } else { 1875 prop->next_sob_val += count; 1876 } 1877 1878 return 0; 1879 } 1880 1881 static int cs_ioctl_extract_signal_seq(struct hl_device *hdev, 1882 struct hl_cs_chunk *chunk, u64 *signal_seq, struct hl_ctx *ctx, 1883 bool encaps_signals) 1884 { 1885 u64 *signal_seq_arr = NULL; 1886 u32 size_to_copy, signal_seq_arr_len; 1887 int rc = 0; 1888 1889 if (encaps_signals) { 1890 *signal_seq = chunk->encaps_signal_seq; 1891 return 0; 1892 } 1893 1894 signal_seq_arr_len = chunk->num_signal_seq_arr; 1895 1896 /* currently only one signal seq is supported */ 1897 if (signal_seq_arr_len != 1) { 1898 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1899 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1900 dev_err(hdev->dev, 1901 "Wait for signal CS supports only one signal CS seq\n"); 1902 return -EINVAL; 1903 } 1904 1905 signal_seq_arr = kmalloc_array(signal_seq_arr_len, 1906 sizeof(*signal_seq_arr), 1907 GFP_ATOMIC); 1908 if (!signal_seq_arr) 1909 signal_seq_arr = kmalloc_array(signal_seq_arr_len, 1910 sizeof(*signal_seq_arr), 1911 GFP_KERNEL); 1912 if (!signal_seq_arr) { 1913 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1914 atomic64_inc(&hdev->aggregated_cs_counters.out_of_mem_drop_cnt); 1915 return -ENOMEM; 1916 } 1917 1918 size_to_copy = signal_seq_arr_len * sizeof(*signal_seq_arr); 1919 if (copy_from_user(signal_seq_arr, 1920 u64_to_user_ptr(chunk->signal_seq_arr), 1921 size_to_copy)) { 1922 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 1923 atomic64_inc(&hdev->aggregated_cs_counters.validation_drop_cnt); 1924 dev_err(hdev->dev, 1925 "Failed to copy signal seq array from user\n"); 1926 rc = -EFAULT; 1927 goto out; 1928 } 1929 1930 /* currently it is guaranteed to have only one signal seq */ 1931 *signal_seq = signal_seq_arr[0]; 1932 1933 out: 1934 kfree(signal_seq_arr); 1935 1936 return rc; 1937 } 1938 1939 static int cs_ioctl_signal_wait_create_jobs(struct hl_device *hdev, 1940 struct hl_ctx *ctx, struct hl_cs *cs, 1941 enum hl_queue_type q_type, u32 q_idx, u32 encaps_signal_offset) 1942 { 1943 struct hl_cs_counters_atomic *cntr; 1944 struct hl_cs_job *job; 1945 struct hl_cb *cb; 1946 u32 cb_size; 1947 1948 cntr = &hdev->aggregated_cs_counters; 1949 1950 job = hl_cs_allocate_job(hdev, q_type, true); 1951 if (!job) { 1952 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1953 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1954 dev_err(hdev->dev, "Failed to allocate a new job\n"); 1955 return -ENOMEM; 1956 } 1957 1958 if (cs->type == CS_TYPE_WAIT) 1959 cb_size = hdev->asic_funcs->get_wait_cb_size(hdev); 1960 else 1961 cb_size = hdev->asic_funcs->get_signal_cb_size(hdev); 1962 1963 cb = hl_cb_kernel_create(hdev, cb_size, q_type == QUEUE_TYPE_HW); 1964 if (!cb) { 1965 atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt); 1966 atomic64_inc(&cntr->out_of_mem_drop_cnt); 1967 kfree(job); 1968 return -EFAULT; 1969 } 1970 1971 job->id = 0; 1972 job->cs = cs; 1973 job->user_cb = cb; 1974 atomic_inc(&job->user_cb->cs_cnt); 1975 job->user_cb_size = cb_size; 1976 job->hw_queue_id = q_idx; 1977 1978 if ((cs->type == CS_TYPE_WAIT || cs->type == CS_TYPE_COLLECTIVE_WAIT) 1979 && cs->encaps_signals) 1980 job->encaps_sig_wait_offset = encaps_signal_offset; 1981 /* 1982 * No need in parsing, user CB is the patched CB. 1983 * We call hl_cb_destroy() out of two reasons - we don't need the CB in 1984 * the CB idr anymore and to decrement its refcount as it was 1985 * incremented inside hl_cb_kernel_create(). 1986 */ 1987 job->patched_cb = job->user_cb; 1988 job->job_cb_size = job->user_cb_size; 1989 hl_cb_destroy(&hdev->kernel_mem_mgr, cb->buf->handle); 1990 1991 /* increment refcount as for external queues we get completion */ 1992 cs_get(cs); 1993 1994 cs->jobs_in_queue_cnt[job->hw_queue_id]++; 1995 cs->jobs_cnt++; 1996 1997 list_add_tail(&job->cs_node, &cs->job_list); 1998 1999 hl_debugfs_add_job(hdev, job); 2000 2001 return 0; 2002 } 2003 2004 static int cs_ioctl_reserve_signals(struct hl_fpriv *hpriv, 2005 u32 q_idx, u32 count, 2006 u32 *handle_id, u32 *sob_addr, 2007 u32 *signals_count) 2008 { 2009 struct hw_queue_properties *hw_queue_prop; 2010 struct hl_sync_stream_properties *prop; 2011 struct hl_device *hdev = hpriv->hdev; 2012 struct hl_cs_encaps_sig_handle *handle; 2013 struct hl_encaps_signals_mgr *mgr; 2014 struct hl_hw_sob *hw_sob; 2015 int hdl_id; 2016 int rc = 0; 2017 2018 if (count >= HL_MAX_SOB_VAL) { 2019 dev_err(hdev->dev, "signals count(%u) exceeds the max SOB value\n", 2020 count); 2021 rc = -EINVAL; 2022 goto out; 2023 } 2024 2025 if (q_idx >= hdev->asic_prop.max_queues) { 2026 dev_err(hdev->dev, "Queue index %d is invalid\n", 2027 q_idx); 2028 rc = -EINVAL; 2029 goto out; 2030 } 2031 2032 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 2033 2034 if (!hw_queue_prop->supports_sync_stream) { 2035 dev_err(hdev->dev, 2036 "Queue index %d does not support sync stream operations\n", 2037 q_idx); 2038 rc = -EINVAL; 2039 goto out; 2040 } 2041 2042 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 2043 2044 handle = kzalloc(sizeof(*handle), GFP_KERNEL); 2045 if (!handle) { 2046 rc = -ENOMEM; 2047 goto out; 2048 } 2049 2050 handle->count = count; 2051 2052 hl_ctx_get(hpriv->ctx); 2053 handle->ctx = hpriv->ctx; 2054 mgr = &hpriv->ctx->sig_mgr; 2055 2056 spin_lock(&mgr->lock); 2057 hdl_id = idr_alloc(&mgr->handles, handle, 1, 0, GFP_ATOMIC); 2058 spin_unlock(&mgr->lock); 2059 2060 if (hdl_id < 0) { 2061 dev_err(hdev->dev, "Failed to allocate IDR for a new signal reservation\n"); 2062 rc = -EINVAL; 2063 goto put_ctx; 2064 } 2065 2066 handle->id = hdl_id; 2067 handle->q_idx = q_idx; 2068 handle->hdev = hdev; 2069 kref_init(&handle->refcount); 2070 2071 hdev->asic_funcs->hw_queues_lock(hdev); 2072 2073 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 2074 2075 /* 2076 * Increment the SOB value by count by user request 2077 * to reserve those signals 2078 * check if the signals amount to reserve is not exceeding the max sob 2079 * value, if yes then switch sob. 2080 */ 2081 rc = hl_cs_signal_sob_wraparound_handler(hdev, q_idx, &hw_sob, count, 2082 true); 2083 if (rc) { 2084 dev_err(hdev->dev, "Failed to switch SOB\n"); 2085 hdev->asic_funcs->hw_queues_unlock(hdev); 2086 rc = -EINVAL; 2087 goto remove_idr; 2088 } 2089 /* set the hw_sob to the handle after calling the sob wraparound handler 2090 * since sob could have changed. 2091 */ 2092 handle->hw_sob = hw_sob; 2093 2094 /* store the current sob value for unreserve validity check, and 2095 * signal offset support 2096 */ 2097 handle->pre_sob_val = prop->next_sob_val - handle->count; 2098 2099 handle->cs_seq = ULLONG_MAX; 2100 2101 *signals_count = prop->next_sob_val; 2102 hdev->asic_funcs->hw_queues_unlock(hdev); 2103 2104 *sob_addr = handle->hw_sob->sob_addr; 2105 *handle_id = hdl_id; 2106 2107 dev_dbg(hdev->dev, 2108 "Signals reserved, sob_id: %d, sob addr: 0x%x, last sob_val: %u, q_idx: %d, hdl_id: %d\n", 2109 hw_sob->sob_id, handle->hw_sob->sob_addr, 2110 prop->next_sob_val - 1, q_idx, hdl_id); 2111 goto out; 2112 2113 remove_idr: 2114 spin_lock(&mgr->lock); 2115 idr_remove(&mgr->handles, hdl_id); 2116 spin_unlock(&mgr->lock); 2117 2118 put_ctx: 2119 hl_ctx_put(handle->ctx); 2120 kfree(handle); 2121 2122 out: 2123 return rc; 2124 } 2125 2126 static int cs_ioctl_unreserve_signals(struct hl_fpriv *hpriv, u32 handle_id) 2127 { 2128 struct hl_cs_encaps_sig_handle *encaps_sig_hdl; 2129 struct hl_sync_stream_properties *prop; 2130 struct hl_device *hdev = hpriv->hdev; 2131 struct hl_encaps_signals_mgr *mgr; 2132 struct hl_hw_sob *hw_sob; 2133 u32 q_idx, sob_addr; 2134 int rc = 0; 2135 2136 mgr = &hpriv->ctx->sig_mgr; 2137 2138 spin_lock(&mgr->lock); 2139 encaps_sig_hdl = idr_find(&mgr->handles, handle_id); 2140 if (encaps_sig_hdl) { 2141 dev_dbg(hdev->dev, "unreserve signals, handle: %u, SOB:0x%x, count: %u\n", 2142 handle_id, encaps_sig_hdl->hw_sob->sob_addr, 2143 encaps_sig_hdl->count); 2144 2145 hdev->asic_funcs->hw_queues_lock(hdev); 2146 2147 q_idx = encaps_sig_hdl->q_idx; 2148 prop = &hdev->kernel_queues[q_idx].sync_stream_prop; 2149 hw_sob = &prop->hw_sob[prop->curr_sob_offset]; 2150 sob_addr = hdev->asic_funcs->get_sob_addr(hdev, hw_sob->sob_id); 2151 2152 /* Check if sob_val got out of sync due to other 2153 * signal submission requests which were handled 2154 * between the reserve-unreserve calls or SOB switch 2155 * upon reaching SOB max value. 2156 */ 2157 if (encaps_sig_hdl->pre_sob_val + encaps_sig_hdl->count 2158 != prop->next_sob_val || 2159 sob_addr != encaps_sig_hdl->hw_sob->sob_addr) { 2160 dev_err(hdev->dev, "Cannot unreserve signals, SOB val ran out of sync, expected: %u, actual val: %u\n", 2161 encaps_sig_hdl->pre_sob_val, 2162 (prop->next_sob_val - encaps_sig_hdl->count)); 2163 2164 hdev->asic_funcs->hw_queues_unlock(hdev); 2165 rc = -EINVAL; 2166 goto out_unlock; 2167 } 2168 2169 /* 2170 * Decrement the SOB value by count by user request 2171 * to unreserve those signals 2172 */ 2173 prop->next_sob_val -= encaps_sig_hdl->count; 2174 2175 hdev->asic_funcs->hw_queues_unlock(hdev); 2176 2177 hw_sob_put(hw_sob); 2178 2179 /* Release the id and free allocated memory of the handle */ 2180 idr_remove(&mgr->handles, handle_id); 2181 2182 /* unlock before calling ctx_put, where we might sleep */ 2183 spin_unlock(&mgr->lock); 2184 hl_ctx_put(encaps_sig_hdl->ctx); 2185 kfree(encaps_sig_hdl); 2186 goto out; 2187 } else { 2188 rc = -EINVAL; 2189 dev_err(hdev->dev, "failed to unreserve signals, cannot find handler\n"); 2190 } 2191 2192 out_unlock: 2193 spin_unlock(&mgr->lock); 2194 2195 out: 2196 return rc; 2197 } 2198 2199 static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type, 2200 void __user *chunks, u32 num_chunks, 2201 u64 *cs_seq, u32 flags, u32 timeout, 2202 u32 *signal_sob_addr_offset, u16 *signal_initial_sob_count) 2203 { 2204 struct hl_cs_encaps_sig_handle *encaps_sig_hdl = NULL; 2205 bool handle_found = false, is_wait_cs = false, 2206 wait_cs_submitted = false, 2207 cs_encaps_signals = false; 2208 struct hl_cs_chunk *cs_chunk_array, *chunk; 2209 bool staged_cs_with_encaps_signals = false; 2210 struct hw_queue_properties *hw_queue_prop; 2211 struct hl_device *hdev = hpriv->hdev; 2212 struct hl_cs_compl *sig_waitcs_cmpl; 2213 u32 q_idx, collective_engine_id = 0; 2214 struct hl_cs_counters_atomic *cntr; 2215 struct hl_fence *sig_fence = NULL; 2216 struct hl_ctx *ctx = hpriv->ctx; 2217 enum hl_queue_type q_type; 2218 struct hl_cs *cs; 2219 u64 signal_seq; 2220 int rc; 2221 2222 cntr = &hdev->aggregated_cs_counters; 2223 *cs_seq = ULLONG_MAX; 2224 2225 rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks, 2226 ctx); 2227 if (rc) 2228 goto out; 2229 2230 /* currently it is guaranteed to have only one chunk */ 2231 chunk = &cs_chunk_array[0]; 2232 2233 if (chunk->queue_index >= hdev->asic_prop.max_queues) { 2234 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2235 atomic64_inc(&cntr->validation_drop_cnt); 2236 dev_err(hdev->dev, "Queue index %d is invalid\n", 2237 chunk->queue_index); 2238 rc = -EINVAL; 2239 goto free_cs_chunk_array; 2240 } 2241 2242 q_idx = chunk->queue_index; 2243 hw_queue_prop = &hdev->asic_prop.hw_queues_props[q_idx]; 2244 q_type = hw_queue_prop->type; 2245 2246 if (!hw_queue_prop->supports_sync_stream) { 2247 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2248 atomic64_inc(&cntr->validation_drop_cnt); 2249 dev_err(hdev->dev, 2250 "Queue index %d does not support sync stream operations\n", 2251 q_idx); 2252 rc = -EINVAL; 2253 goto free_cs_chunk_array; 2254 } 2255 2256 if (cs_type == CS_TYPE_COLLECTIVE_WAIT) { 2257 if (!(hw_queue_prop->collective_mode == HL_COLLECTIVE_MASTER)) { 2258 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2259 atomic64_inc(&cntr->validation_drop_cnt); 2260 dev_err(hdev->dev, 2261 "Queue index %d is invalid\n", q_idx); 2262 rc = -EINVAL; 2263 goto free_cs_chunk_array; 2264 } 2265 2266 if (!hdev->nic_ports_mask) { 2267 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2268 atomic64_inc(&cntr->validation_drop_cnt); 2269 dev_err(hdev->dev, 2270 "Collective operations not supported when NIC ports are disabled"); 2271 rc = -EINVAL; 2272 goto free_cs_chunk_array; 2273 } 2274 2275 collective_engine_id = chunk->collective_engine_id; 2276 } 2277 2278 is_wait_cs = !!(cs_type == CS_TYPE_WAIT || 2279 cs_type == CS_TYPE_COLLECTIVE_WAIT); 2280 2281 cs_encaps_signals = !!(flags & HL_CS_FLAGS_ENCAP_SIGNALS); 2282 2283 if (is_wait_cs) { 2284 rc = cs_ioctl_extract_signal_seq(hdev, chunk, &signal_seq, 2285 ctx, cs_encaps_signals); 2286 if (rc) 2287 goto free_cs_chunk_array; 2288 2289 if (cs_encaps_signals) { 2290 /* check if cs sequence has encapsulated 2291 * signals handle 2292 */ 2293 struct idr *idp; 2294 u32 id; 2295 2296 spin_lock(&ctx->sig_mgr.lock); 2297 idp = &ctx->sig_mgr.handles; 2298 idr_for_each_entry(idp, encaps_sig_hdl, id) { 2299 if (encaps_sig_hdl->cs_seq == signal_seq) { 2300 /* get refcount to protect removing this handle from idr, 2301 * needed when multiple wait cs are used with offset 2302 * to wait on reserved encaps signals. 2303 * Since kref_put of this handle is executed outside the 2304 * current lock, it is possible that the handle refcount 2305 * is 0 but it yet to be removed from the list. In this 2306 * case need to consider the handle as not valid. 2307 */ 2308 if (kref_get_unless_zero(&encaps_sig_hdl->refcount)) 2309 handle_found = true; 2310 break; 2311 } 2312 } 2313 spin_unlock(&ctx->sig_mgr.lock); 2314 2315 if (!handle_found) { 2316 /* treat as signal CS already finished */ 2317 dev_dbg(hdev->dev, "Cannot find encapsulated signals handle for seq 0x%llx\n", 2318 signal_seq); 2319 rc = 0; 2320 goto free_cs_chunk_array; 2321 } 2322 2323 /* validate also the signal offset value */ 2324 if (chunk->encaps_signal_offset > 2325 encaps_sig_hdl->count) { 2326 dev_err(hdev->dev, "offset(%u) value exceed max reserved signals count(%u)!\n", 2327 chunk->encaps_signal_offset, 2328 encaps_sig_hdl->count); 2329 rc = -EINVAL; 2330 goto free_cs_chunk_array; 2331 } 2332 } 2333 2334 sig_fence = hl_ctx_get_fence(ctx, signal_seq); 2335 if (IS_ERR(sig_fence)) { 2336 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2337 atomic64_inc(&cntr->validation_drop_cnt); 2338 dev_err(hdev->dev, 2339 "Failed to get signal CS with seq 0x%llx\n", 2340 signal_seq); 2341 rc = PTR_ERR(sig_fence); 2342 goto free_cs_chunk_array; 2343 } 2344 2345 if (!sig_fence) { 2346 /* signal CS already finished */ 2347 rc = 0; 2348 goto free_cs_chunk_array; 2349 } 2350 2351 sig_waitcs_cmpl = 2352 container_of(sig_fence, struct hl_cs_compl, base_fence); 2353 2354 staged_cs_with_encaps_signals = !! 2355 (sig_waitcs_cmpl->type == CS_TYPE_DEFAULT && 2356 (flags & HL_CS_FLAGS_ENCAP_SIGNALS)); 2357 2358 if (sig_waitcs_cmpl->type != CS_TYPE_SIGNAL && 2359 !staged_cs_with_encaps_signals) { 2360 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2361 atomic64_inc(&cntr->validation_drop_cnt); 2362 dev_err(hdev->dev, 2363 "CS seq 0x%llx is not of a signal/encaps-signal CS\n", 2364 signal_seq); 2365 hl_fence_put(sig_fence); 2366 rc = -EINVAL; 2367 goto free_cs_chunk_array; 2368 } 2369 2370 if (completion_done(&sig_fence->completion)) { 2371 /* signal CS already finished */ 2372 hl_fence_put(sig_fence); 2373 rc = 0; 2374 goto free_cs_chunk_array; 2375 } 2376 } 2377 2378 rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs, flags, timeout); 2379 if (rc) { 2380 if (is_wait_cs) 2381 hl_fence_put(sig_fence); 2382 2383 goto free_cs_chunk_array; 2384 } 2385 2386 /* 2387 * Save the signal CS fence for later initialization right before 2388 * hanging the wait CS on the queue. 2389 * for encaps signals case, we save the cs sequence and handle pointer 2390 * for later initialization. 2391 */ 2392 if (is_wait_cs) { 2393 cs->signal_fence = sig_fence; 2394 /* store the handle pointer, so we don't have to 2395 * look for it again, later on the flow 2396 * when we need to set SOB info in hw_queue. 2397 */ 2398 if (cs->encaps_signals) 2399 cs->encaps_sig_hdl = encaps_sig_hdl; 2400 } 2401 2402 hl_debugfs_add_cs(cs); 2403 2404 *cs_seq = cs->sequence; 2405 2406 if (cs_type == CS_TYPE_WAIT || cs_type == CS_TYPE_SIGNAL) 2407 rc = cs_ioctl_signal_wait_create_jobs(hdev, ctx, cs, q_type, 2408 q_idx, chunk->encaps_signal_offset); 2409 else if (cs_type == CS_TYPE_COLLECTIVE_WAIT) 2410 rc = hdev->asic_funcs->collective_wait_create_jobs(hdev, ctx, 2411 cs, q_idx, collective_engine_id, 2412 chunk->encaps_signal_offset); 2413 else { 2414 atomic64_inc(&ctx->cs_counters.validation_drop_cnt); 2415 atomic64_inc(&cntr->validation_drop_cnt); 2416 rc = -EINVAL; 2417 } 2418 2419 if (rc) 2420 goto free_cs_object; 2421 2422 if (q_type == QUEUE_TYPE_HW) 2423 INIT_WORK(&cs->finish_work, cs_completion); 2424 2425 rc = hl_hw_queue_schedule_cs(cs); 2426 if (rc) { 2427 /* In case wait cs failed here, it means the signal cs 2428 * already completed. we want to free all it's related objects 2429 * but we don't want to fail the ioctl. 2430 */ 2431 if (is_wait_cs) 2432 rc = 0; 2433 else if (rc != -EAGAIN) 2434 dev_err(hdev->dev, 2435 "Failed to submit CS %d.%llu to H/W queues, error %d\n", 2436 ctx->asid, cs->sequence, rc); 2437 goto free_cs_object; 2438 } 2439 2440 *signal_sob_addr_offset = cs->sob_addr_offset; 2441 *signal_initial_sob_count = cs->initial_sob_count; 2442 2443 rc = HL_CS_STATUS_SUCCESS; 2444 if (is_wait_cs) 2445 wait_cs_submitted = true; 2446 goto put_cs; 2447 2448 free_cs_object: 2449 cs_rollback(hdev, cs); 2450 *cs_seq = ULLONG_MAX; 2451 /* The path below is both for good and erroneous exits */ 2452 put_cs: 2453 /* We finished with the CS in this function, so put the ref */ 2454 cs_put(cs); 2455 free_cs_chunk_array: 2456 if (!wait_cs_submitted && cs_encaps_signals && handle_found && is_wait_cs) 2457 kref_put(&encaps_sig_hdl->refcount, hl_encaps_release_handle_and_put_ctx); 2458 kfree(cs_chunk_array); 2459 out: 2460 return rc; 2461 } 2462 2463 static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, 2464 u32 num_engine_cores, u32 core_command) 2465 { 2466 struct hl_device *hdev = hpriv->hdev; 2467 void __user *engine_cores_arr; 2468 u32 *cores; 2469 int rc; 2470 2471 if (!hdev->asic_prop.supports_engine_modes) 2472 return -EPERM; 2473 2474 if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { 2475 dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); 2476 return -EINVAL; 2477 } 2478 2479 if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { 2480 dev_err(hdev->dev, "Engine core command is invalid\n"); 2481 return -EINVAL; 2482 } 2483 2484 engine_cores_arr = (void __user *) (uintptr_t) engine_cores; 2485 cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); 2486 if (!cores) 2487 return -ENOMEM; 2488 2489 if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { 2490 dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); 2491 kfree(cores); 2492 return -EFAULT; 2493 } 2494 2495 rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); 2496 kfree(cores); 2497 2498 return rc; 2499 } 2500 2501 static int cs_ioctl_engines(struct hl_fpriv *hpriv, u64 engines_arr_user_addr, 2502 u32 num_engines, enum hl_engine_command command) 2503 { 2504 struct hl_device *hdev = hpriv->hdev; 2505 u32 *engines, max_num_of_engines; 2506 void __user *engines_arr; 2507 int rc; 2508 2509 if (!hdev->asic_prop.supports_engine_modes) 2510 return -EPERM; 2511 2512 if (command >= HL_ENGINE_COMMAND_MAX) { 2513 dev_err(hdev->dev, "Engine command is invalid\n"); 2514 return -EINVAL; 2515 } 2516 2517 max_num_of_engines = hdev->asic_prop.max_num_of_engines; 2518 if (command == HL_ENGINE_CORE_RUN || command == HL_ENGINE_CORE_HALT) 2519 max_num_of_engines = hdev->asic_prop.num_engine_cores; 2520 2521 if (!num_engines || num_engines > max_num_of_engines) { 2522 dev_err(hdev->dev, "Number of engines %d is invalid\n", num_engines); 2523 return -EINVAL; 2524 } 2525 2526 engines_arr = (void __user *) (uintptr_t) engines_arr_user_addr; 2527 engines = kmalloc_array(num_engines, sizeof(u32), GFP_KERNEL); 2528 if (!engines) 2529 return -ENOMEM; 2530 2531 if (copy_from_user(engines, engines_arr, num_engines * sizeof(u32))) { 2532 dev_err(hdev->dev, "Failed to copy engine-ids array from user\n"); 2533 kfree(engines); 2534 return -EFAULT; 2535 } 2536 2537 rc = hdev->asic_funcs->set_engines(hdev, engines, num_engines, command); 2538 kfree(engines); 2539 2540 return rc; 2541 } 2542 2543 static int cs_ioctl_flush_pci_hbw_writes(struct hl_fpriv *hpriv) 2544 { 2545 struct hl_device *hdev = hpriv->hdev; 2546 struct asic_fixed_properties *prop = &hdev->asic_prop; 2547 2548 if (!prop->hbw_flush_reg) { 2549 dev_dbg(hdev->dev, "HBW flush is not supported\n"); 2550 return -EOPNOTSUPP; 2551 } 2552 2553 RREG32(prop->hbw_flush_reg); 2554 2555 return 0; 2556 } 2557 2558 int hl_cs_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) 2559 { 2560 struct hl_fpriv *hpriv = file_priv->driver_priv; 2561 union hl_cs_args *args = data; 2562 enum hl_cs_type cs_type = 0; 2563 u64 cs_seq = ULONG_MAX; 2564 void __user *chunks; 2565 u32 num_chunks, flags, timeout, 2566 signals_count = 0, sob_addr = 0, handle_id = 0; 2567 u16 sob_initial_count = 0; 2568 int rc; 2569 2570 rc = hl_cs_sanity_checks(hpriv, args); 2571 if (rc) 2572 goto out; 2573 2574 rc = hl_cs_ctx_switch(hpriv, args, &cs_seq); 2575 if (rc) 2576 goto out; 2577 2578 cs_type = hl_cs_get_cs_type(args->in.cs_flags & 2579 ~HL_CS_FLAGS_FORCE_RESTORE); 2580 chunks = (void __user *) (uintptr_t) args->in.chunks_execute; 2581 num_chunks = args->in.num_chunks_execute; 2582 flags = args->in.cs_flags; 2583 2584 /* In case this is a staged CS, user should supply the CS sequence */ 2585 if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) && 2586 !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST)) 2587 cs_seq = args->in.seq; 2588 2589 timeout = flags & HL_CS_FLAGS_CUSTOM_TIMEOUT 2590 ? msecs_to_jiffies(args->in.timeout * 1000) 2591 : hpriv->hdev->timeout_jiffies; 2592 2593 switch (cs_type) { 2594 case CS_TYPE_SIGNAL: 2595 case CS_TYPE_WAIT: 2596 case CS_TYPE_COLLECTIVE_WAIT: 2597 rc = cs_ioctl_signal_wait(hpriv, cs_type, chunks, num_chunks, 2598 &cs_seq, args->in.cs_flags, timeout, 2599 &sob_addr, &sob_initial_count); 2600 break; 2601 case CS_RESERVE_SIGNALS: 2602 rc = cs_ioctl_reserve_signals(hpriv, 2603 args->in.encaps_signals_q_idx, 2604 args->in.encaps_signals_count, 2605 &handle_id, &sob_addr, &signals_count); 2606 break; 2607 case CS_UNRESERVE_SIGNALS: 2608 rc = cs_ioctl_unreserve_signals(hpriv, 2609 args->in.encaps_sig_handle_id); 2610 break; 2611 case CS_TYPE_ENGINE_CORE: 2612 rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, 2613 args->in.num_engine_cores, args->in.core_command); 2614 break; 2615 case CS_TYPE_ENGINES: 2616 rc = cs_ioctl_engines(hpriv, args->in.engines, 2617 args->in.num_engines, args->in.engine_command); 2618 break; 2619 case CS_TYPE_FLUSH_PCI_HBW_WRITES: 2620 rc = cs_ioctl_flush_pci_hbw_writes(hpriv); 2621 break; 2622 default: 2623 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, 2624 args->in.cs_flags, 2625 args->in.encaps_sig_handle_id, 2626 timeout, &sob_initial_count); 2627 break; 2628 } 2629 out: 2630 if (rc != -EAGAIN) { 2631 memset(args, 0, sizeof(*args)); 2632 2633 switch (cs_type) { 2634 case CS_RESERVE_SIGNALS: 2635 args->out.handle_id = handle_id; 2636 args->out.sob_base_addr_offset = sob_addr; 2637 args->out.count = signals_count; 2638 break; 2639 case CS_TYPE_SIGNAL: 2640 args->out.sob_base_addr_offset = sob_addr; 2641 args->out.sob_count_before_submission = sob_initial_count; 2642 args->out.seq = cs_seq; 2643 break; 2644 case CS_TYPE_DEFAULT: 2645 args->out.sob_count_before_submission = sob_initial_count; 2646 args->out.seq = cs_seq; 2647 break; 2648 default: 2649 args->out.seq = cs_seq; 2650 break; 2651 } 2652 2653 args->out.status = rc; 2654 } 2655 2656 return rc; 2657 } 2658 2659 static int hl_wait_for_fence(struct hl_ctx *ctx, u64 seq, struct hl_fence *fence, 2660 enum hl_cs_wait_status *status, u64 timeout_us, s64 *timestamp) 2661 { 2662 struct hl_device *hdev = ctx->hdev; 2663 ktime_t timestamp_kt; 2664 long completion_rc; 2665 int rc = 0, error; 2666 2667 if (IS_ERR(fence)) { 2668 rc = PTR_ERR(fence); 2669 if (rc == -EINVAL) 2670 dev_notice_ratelimited(hdev->dev, 2671 "Can't wait on CS %llu because current CS is at seq %llu\n", 2672 seq, ctx->cs_sequence); 2673 return rc; 2674 } 2675 2676 if (!fence) { 2677 if (!hl_pop_cs_outcome(&ctx->outcome_store, seq, ×tamp_kt, &error)) { 2678 dev_dbg(hdev->dev, 2679 "Can't wait on seq %llu because current CS is at seq %llu (Fence is gone)\n", 2680 seq, ctx->cs_sequence); 2681 *status = CS_WAIT_STATUS_GONE; 2682 return 0; 2683 } 2684 2685 completion_rc = 1; 2686 goto report_results; 2687 } 2688 2689 if (!timeout_us) { 2690 completion_rc = completion_done(&fence->completion); 2691 } else { 2692 unsigned long timeout; 2693 2694 timeout = (timeout_us == MAX_SCHEDULE_TIMEOUT) ? 2695 timeout_us : usecs_to_jiffies(timeout_us); 2696 completion_rc = 2697 wait_for_completion_interruptible_timeout( 2698 &fence->completion, timeout); 2699 } 2700 2701 error = fence->error; 2702 timestamp_kt = fence->timestamp; 2703 2704 report_results: 2705 if (completion_rc > 0) { 2706 *status = CS_WAIT_STATUS_COMPLETED; 2707 if (timestamp) 2708 *timestamp = ktime_to_ns(timestamp_kt); 2709 } else { 2710 *status = CS_WAIT_STATUS_BUSY; 2711 } 2712 2713 if (completion_rc == -ERESTARTSYS) 2714 rc = completion_rc; 2715 else if (error == -ETIMEDOUT || error == -EIO) 2716 rc = error; 2717 2718 return rc; 2719 } 2720 2721 /* 2722 * hl_cs_poll_fences - iterate CS fences to check for CS completion 2723 * 2724 * @mcs_data: multi-CS internal data 2725 * @mcs_compl: multi-CS completion structure 2726 * 2727 * @return 0 on success, otherwise non 0 error code 2728 * 2729 * The function iterates on all CS sequence in the list and set bit in 2730 * completion_bitmap for each completed CS. 2731 * While iterating, the function sets the stream map of each fence in the fence 2732 * array in the completion QID stream map to be used by CSs to perform 2733 * completion to the multi-CS context. 2734 * This function shall be called after taking context ref 2735 */ 2736 static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_completion *mcs_compl) 2737 { 2738 struct hl_fence **fence_ptr = mcs_data->fence_arr; 2739 struct hl_device *hdev = mcs_data->ctx->hdev; 2740 int i, rc, arr_len = mcs_data->arr_len; 2741 u64 *seq_arr = mcs_data->seq_arr; 2742 ktime_t max_ktime, first_cs_time; 2743 enum hl_cs_wait_status status; 2744 2745 memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); 2746 2747 /* get all fences under the same lock */ 2748 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); 2749 if (rc) 2750 return rc; 2751 2752 /* 2753 * re-initialize the completion here to handle 2 possible cases: 2754 * 1. CS will complete the multi-CS prior clearing the completion. in which 2755 * case the fence iteration is guaranteed to catch the CS completion. 2756 * 2. the completion will occur after re-init of the completion. 2757 * in which case we will wake up immediately in wait_for_completion. 2758 */ 2759 reinit_completion(&mcs_compl->completion); 2760 2761 /* 2762 * set to maximum time to verify timestamp is valid: if at the end 2763 * this value is maintained- no timestamp was updated 2764 */ 2765 max_ktime = ktime_set(KTIME_SEC_MAX, 0); 2766 first_cs_time = max_ktime; 2767 2768 for (i = 0; i < arr_len; i++, fence_ptr++) { 2769 struct hl_fence *fence = *fence_ptr; 2770 2771 /* 2772 * In order to prevent case where we wait until timeout even though a CS associated 2773 * with the multi-CS actually completed we do things in the below order: 2774 * 1. for each fence set it's QID map in the multi-CS completion QID map. This way 2775 * any CS can, potentially, complete the multi CS for the specific QID (note 2776 * that once completion is initialized, calling complete* and then wait on the 2777 * completion will cause it to return at once) 2778 * 2. only after allowing multi-CS completion for the specific QID we check whether 2779 * the specific CS already completed (and thus the wait for completion part will 2780 * be skipped). if the CS not completed it is guaranteed that completing CS will 2781 * wake up the completion. 2782 */ 2783 if (fence) 2784 mcs_compl->stream_master_qid_map |= fence->stream_master_qid_map; 2785 2786 /* 2787 * function won't sleep as it is called with timeout 0 (i.e. 2788 * poll the fence) 2789 */ 2790 rc = hl_wait_for_fence(mcs_data->ctx, seq_arr[i], fence, &status, 0, NULL); 2791 if (rc) { 2792 dev_err(hdev->dev, 2793 "wait_for_fence error :%d for CS seq %llu\n", 2794 rc, seq_arr[i]); 2795 break; 2796 } 2797 2798 switch (status) { 2799 case CS_WAIT_STATUS_BUSY: 2800 /* CS did not finished, QID to wait on already stored */ 2801 break; 2802 case CS_WAIT_STATUS_COMPLETED: 2803 /* 2804 * Using mcs_handling_done to avoid possibility of mcs_data 2805 * returns to user indicating CS completed before it finished 2806 * all of its mcs handling, to avoid race the next time the 2807 * user waits for mcs. 2808 * note: when reaching this case fence is definitely not NULL 2809 * but NULL check was added to overcome static analysis 2810 */ 2811 if (fence && !fence->mcs_handling_done) { 2812 /* 2813 * in case multi CS is completed but MCS handling not done 2814 * we "complete" the multi CS to prevent it from waiting 2815 * until time-out and the "multi-CS handling done" will have 2816 * another chance at the next iteration 2817 */ 2818 complete_all(&mcs_compl->completion); 2819 break; 2820 } 2821 2822 mcs_data->completion_bitmap |= BIT(i); 2823 /* 2824 * For all completed CSs we take the earliest timestamp. 2825 * For this we have to validate that the timestamp is 2826 * earliest of all timestamps so far. 2827 */ 2828 if (fence && mcs_data->update_ts && 2829 (ktime_compare(fence->timestamp, first_cs_time) < 0)) 2830 first_cs_time = fence->timestamp; 2831 break; 2832 case CS_WAIT_STATUS_GONE: 2833 mcs_data->update_ts = false; 2834 mcs_data->gone_cs = true; 2835 /* 2836 * It is possible to get an old sequence numbers from user 2837 * which related to already completed CSs and their fences 2838 * already gone. In this case, CS set as completed but 2839 * no need to consider its QID for mcs completion. 2840 */ 2841 mcs_data->completion_bitmap |= BIT(i); 2842 break; 2843 default: 2844 dev_err(hdev->dev, "Invalid fence status\n"); 2845 rc = -EINVAL; 2846 break; 2847 } 2848 2849 } 2850 2851 hl_fences_put(mcs_data->fence_arr, arr_len); 2852 2853 if (mcs_data->update_ts && 2854 (ktime_compare(first_cs_time, max_ktime) != 0)) 2855 mcs_data->timestamp = ktime_to_ns(first_cs_time); 2856 2857 return rc; 2858 } 2859 2860 static int _hl_cs_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, u64 timeout_us, u64 seq, 2861 enum hl_cs_wait_status *status, s64 *timestamp) 2862 { 2863 struct hl_fence *fence; 2864 int rc = 0; 2865 2866 if (timestamp) 2867 *timestamp = 0; 2868 2869 hl_ctx_get(ctx); 2870 2871 fence = hl_ctx_get_fence(ctx, seq); 2872 2873 rc = hl_wait_for_fence(ctx, seq, fence, status, timeout_us, timestamp); 2874 hl_fence_put(fence); 2875 hl_ctx_put(ctx); 2876 2877 return rc; 2878 } 2879 2880 static inline unsigned long hl_usecs64_to_jiffies(const u64 usecs) 2881 { 2882 if (usecs <= U32_MAX) 2883 return usecs_to_jiffies(usecs); 2884 2885 /* 2886 * If the value in nanoseconds is larger than 64 bit, use the largest 2887 * 64 bit value. 2888 */ 2889 if (usecs >= ((u64)(U64_MAX / NSEC_PER_USEC))) 2890 return nsecs_to_jiffies(U64_MAX); 2891 2892 return nsecs_to_jiffies(usecs * NSEC_PER_USEC); 2893 } 2894 2895 /* 2896 * hl_wait_multi_cs_completion_init - init completion structure 2897 * 2898 * @hdev: pointer to habanalabs device structure 2899 * @stream_master_bitmap: stream master QIDs map, set bit indicates stream 2900 * master QID to wait on 2901 * 2902 * @return valid completion struct pointer on success, otherwise error pointer 2903 * 2904 * up to MULTI_CS_MAX_USER_CTX calls can be done concurrently to the driver. 2905 * the function gets the first available completion (by marking it "used") 2906 * and initialize its values. 2907 */ 2908 static struct multi_cs_completion *hl_wait_multi_cs_completion_init(struct hl_device *hdev) 2909 { 2910 struct multi_cs_completion *mcs_compl; 2911 int i; 2912 2913 /* find free multi_cs completion structure */ 2914 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 2915 mcs_compl = &hdev->multi_cs_completion[i]; 2916 spin_lock(&mcs_compl->lock); 2917 if (!mcs_compl->used) { 2918 mcs_compl->used = 1; 2919 mcs_compl->timestamp = 0; 2920 /* 2921 * init QID map to 0 to avoid completion by CSs. the actual QID map 2922 * to multi-CS CSs will be set incrementally at a later stage 2923 */ 2924 mcs_compl->stream_master_qid_map = 0; 2925 spin_unlock(&mcs_compl->lock); 2926 break; 2927 } 2928 spin_unlock(&mcs_compl->lock); 2929 } 2930 2931 if (i == MULTI_CS_MAX_USER_CTX) { 2932 dev_err(hdev->dev, "no available multi-CS completion structure\n"); 2933 return ERR_PTR(-ENOMEM); 2934 } 2935 return mcs_compl; 2936 } 2937 2938 /* 2939 * hl_wait_multi_cs_completion_fini - return completion structure and set as 2940 * unused 2941 * 2942 * @mcs_compl: pointer to the completion structure 2943 */ 2944 static void hl_wait_multi_cs_completion_fini( 2945 struct multi_cs_completion *mcs_compl) 2946 { 2947 /* 2948 * free completion structure, do it under lock to be in-sync with the 2949 * thread that signals completion 2950 */ 2951 spin_lock(&mcs_compl->lock); 2952 mcs_compl->used = 0; 2953 spin_unlock(&mcs_compl->lock); 2954 } 2955 2956 /* 2957 * hl_wait_multi_cs_completion - wait for first CS to complete 2958 * 2959 * @mcs_data: multi-CS internal data 2960 * 2961 * @return 0 on success, otherwise non 0 error code 2962 */ 2963 static int hl_wait_multi_cs_completion(struct multi_cs_data *mcs_data, 2964 struct multi_cs_completion *mcs_compl) 2965 { 2966 long completion_rc; 2967 2968 completion_rc = wait_for_completion_interruptible_timeout(&mcs_compl->completion, 2969 mcs_data->timeout_jiffies); 2970 2971 /* update timestamp */ 2972 if (completion_rc > 0) 2973 mcs_data->timestamp = mcs_compl->timestamp; 2974 2975 if (completion_rc == -ERESTARTSYS) 2976 return completion_rc; 2977 2978 mcs_data->wait_status = completion_rc; 2979 2980 return 0; 2981 } 2982 2983 /* 2984 * hl_multi_cs_completion_init - init array of multi-CS completion structures 2985 * 2986 * @hdev: pointer to habanalabs device structure 2987 */ 2988 void hl_multi_cs_completion_init(struct hl_device *hdev) 2989 { 2990 struct multi_cs_completion *mcs_cmpl; 2991 int i; 2992 2993 for (i = 0; i < MULTI_CS_MAX_USER_CTX; i++) { 2994 mcs_cmpl = &hdev->multi_cs_completion[i]; 2995 mcs_cmpl->used = 0; 2996 spin_lock_init(&mcs_cmpl->lock); 2997 init_completion(&mcs_cmpl->completion); 2998 } 2999 } 3000 3001 /* 3002 * hl_multi_cs_wait_ioctl - implementation of the multi-CS wait ioctl 3003 * 3004 * @hpriv: pointer to the private data of the fd 3005 * @data: pointer to multi-CS wait ioctl in/out args 3006 * 3007 */ 3008 static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3009 { 3010 struct multi_cs_completion *mcs_compl; 3011 struct hl_device *hdev = hpriv->hdev; 3012 struct multi_cs_data mcs_data = {}; 3013 union hl_wait_cs_args *args = data; 3014 struct hl_ctx *ctx = hpriv->ctx; 3015 struct hl_fence **fence_arr; 3016 void __user *seq_arr; 3017 u32 size_to_copy; 3018 u64 *cs_seq_arr; 3019 u8 seq_arr_len; 3020 int rc, i; 3021 3022 for (i = 0 ; i < sizeof(args->in.pad) ; i++) 3023 if (args->in.pad[i]) { 3024 dev_dbg(hdev->dev, "Padding bytes must be 0\n"); 3025 return -EINVAL; 3026 } 3027 3028 if (!hdev->supports_wait_for_multi_cs) { 3029 dev_err(hdev->dev, "Wait for multi CS is not supported\n"); 3030 return -EPERM; 3031 } 3032 3033 seq_arr_len = args->in.seq_arr_len; 3034 3035 if (seq_arr_len > HL_WAIT_MULTI_CS_LIST_MAX_LEN) { 3036 dev_err(hdev->dev, "Can wait only up to %d CSs, input sequence is of length %u\n", 3037 HL_WAIT_MULTI_CS_LIST_MAX_LEN, seq_arr_len); 3038 return -EINVAL; 3039 } 3040 3041 /* allocate memory for sequence array */ 3042 cs_seq_arr = 3043 kmalloc_array(seq_arr_len, sizeof(*cs_seq_arr), GFP_KERNEL); 3044 if (!cs_seq_arr) 3045 return -ENOMEM; 3046 3047 /* copy CS sequence array from user */ 3048 seq_arr = (void __user *) (uintptr_t) args->in.seq; 3049 size_to_copy = seq_arr_len * sizeof(*cs_seq_arr); 3050 if (copy_from_user(cs_seq_arr, seq_arr, size_to_copy)) { 3051 dev_err(hdev->dev, "Failed to copy multi-cs sequence array from user\n"); 3052 rc = -EFAULT; 3053 goto free_seq_arr; 3054 } 3055 3056 /* allocate array for the fences */ 3057 fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); 3058 if (!fence_arr) { 3059 rc = -ENOMEM; 3060 goto free_seq_arr; 3061 } 3062 3063 /* initialize the multi-CS internal data */ 3064 mcs_data.ctx = ctx; 3065 mcs_data.seq_arr = cs_seq_arr; 3066 mcs_data.fence_arr = fence_arr; 3067 mcs_data.arr_len = seq_arr_len; 3068 3069 hl_ctx_get(ctx); 3070 3071 /* wait (with timeout) for the first CS to be completed */ 3072 mcs_data.timeout_jiffies = hl_usecs64_to_jiffies(args->in.timeout_us); 3073 mcs_compl = hl_wait_multi_cs_completion_init(hdev); 3074 if (IS_ERR(mcs_compl)) { 3075 rc = PTR_ERR(mcs_compl); 3076 goto put_ctx; 3077 } 3078 3079 /* poll all CS fences, extract timestamp */ 3080 mcs_data.update_ts = true; 3081 rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 3082 /* 3083 * skip wait for CS completion when one of the below is true: 3084 * - an error on the poll function 3085 * - one or more CS in the list completed 3086 * - the user called ioctl with timeout 0 3087 */ 3088 if (rc || mcs_data.completion_bitmap || !args->in.timeout_us) 3089 goto completion_fini; 3090 3091 while (true) { 3092 rc = hl_wait_multi_cs_completion(&mcs_data, mcs_compl); 3093 if (rc || (mcs_data.wait_status == 0)) 3094 break; 3095 3096 /* 3097 * poll fences once again to update the CS map. 3098 * no timestamp should be updated this time. 3099 */ 3100 mcs_data.update_ts = false; 3101 rc = hl_cs_poll_fences(&mcs_data, mcs_compl); 3102 3103 if (rc || mcs_data.completion_bitmap) 3104 break; 3105 3106 /* 3107 * if hl_wait_multi_cs_completion returned before timeout (i.e. 3108 * it got a completion) it either got completed by CS in the multi CS list 3109 * (in which case the indication will be non empty completion_bitmap) or it 3110 * got completed by CS submitted to one of the shared stream master but 3111 * not in the multi CS list (in which case we should wait again but modify 3112 * the timeout and set timestamp as zero to let a CS related to the current 3113 * multi-CS set a new, relevant, timestamp) 3114 */ 3115 mcs_data.timeout_jiffies = mcs_data.wait_status; 3116 mcs_compl->timestamp = 0; 3117 } 3118 3119 completion_fini: 3120 hl_wait_multi_cs_completion_fini(mcs_compl); 3121 3122 put_ctx: 3123 hl_ctx_put(ctx); 3124 kfree(fence_arr); 3125 3126 free_seq_arr: 3127 kfree(cs_seq_arr); 3128 3129 if (rc == -ERESTARTSYS) { 3130 dev_err_ratelimited(hdev->dev, 3131 "user process got signal while waiting for Multi-CS\n"); 3132 rc = -EINTR; 3133 } 3134 3135 if (rc) 3136 return rc; 3137 3138 /* update output args */ 3139 memset(args, 0, sizeof(*args)); 3140 3141 if (mcs_data.completion_bitmap) { 3142 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 3143 args->out.cs_completion_map = mcs_data.completion_bitmap; 3144 3145 /* if timestamp not 0- it's valid */ 3146 if (mcs_data.timestamp) { 3147 args->out.timestamp_nsec = mcs_data.timestamp; 3148 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3149 } 3150 3151 /* update if some CS was gone */ 3152 if (!mcs_data.timestamp) 3153 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 3154 } else { 3155 args->out.status = HL_WAIT_CS_STATUS_BUSY; 3156 } 3157 3158 return 0; 3159 } 3160 3161 static int hl_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3162 { 3163 struct hl_device *hdev = hpriv->hdev; 3164 union hl_wait_cs_args *args = data; 3165 enum hl_cs_wait_status status; 3166 u64 seq = args->in.seq; 3167 s64 timestamp; 3168 int rc; 3169 3170 rc = _hl_cs_wait_ioctl(hdev, hpriv->ctx, args->in.timeout_us, seq, &status, ×tamp); 3171 3172 if (rc == -ERESTARTSYS) { 3173 dev_err_ratelimited(hdev->dev, 3174 "user process got signal while waiting for CS handle %llu\n", 3175 seq); 3176 return -EINTR; 3177 } 3178 3179 memset(args, 0, sizeof(*args)); 3180 3181 if (rc) { 3182 if (rc == -ETIMEDOUT) { 3183 dev_err_ratelimited(hdev->dev, 3184 "CS %llu has timed-out while user process is waiting for it\n", 3185 seq); 3186 args->out.status = HL_WAIT_CS_STATUS_TIMEDOUT; 3187 } else if (rc == -EIO) { 3188 dev_err_ratelimited(hdev->dev, 3189 "CS %llu has been aborted while user process is waiting for it\n", 3190 seq); 3191 args->out.status = HL_WAIT_CS_STATUS_ABORTED; 3192 } 3193 return rc; 3194 } 3195 3196 if (timestamp) { 3197 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3198 args->out.timestamp_nsec = timestamp; 3199 } 3200 3201 switch (status) { 3202 case CS_WAIT_STATUS_GONE: 3203 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_GONE; 3204 fallthrough; 3205 case CS_WAIT_STATUS_COMPLETED: 3206 args->out.status = HL_WAIT_CS_STATUS_COMPLETED; 3207 break; 3208 case CS_WAIT_STATUS_BUSY: 3209 default: 3210 args->out.status = HL_WAIT_CS_STATUS_BUSY; 3211 break; 3212 } 3213 3214 return 0; 3215 } 3216 3217 static inline void set_record_cq_info(struct hl_user_pending_interrupt *record, 3218 struct hl_cb *cq_cb, u32 cq_offset, u32 target_value) 3219 { 3220 record->ts_reg_info.cq_cb = cq_cb; 3221 record->cq_kernel_addr = (u64 *) cq_cb->kernel_address + cq_offset; 3222 record->cq_target_value = target_value; 3223 } 3224 3225 static int validate_and_get_ts_record(struct device *dev, 3226 struct hl_ts_buff *ts_buff, u64 ts_offset, 3227 struct hl_user_pending_interrupt **req_event_record) 3228 { 3229 struct hl_user_pending_interrupt *ts_cb_last; 3230 3231 *req_event_record = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 3232 ts_offset; 3233 ts_cb_last = (struct hl_user_pending_interrupt *)ts_buff->kernel_buff_address + 3234 (ts_buff->kernel_buff_size / sizeof(struct hl_user_pending_interrupt)); 3235 3236 /* Validate ts_offset not exceeding last max */ 3237 if (*req_event_record >= ts_cb_last) { 3238 dev_err(dev, "Ts offset(%llu) exceeds max CB offset(0x%llx)\n", 3239 ts_offset, (u64)(uintptr_t)ts_cb_last); 3240 return -EINVAL; 3241 } 3242 3243 return 0; 3244 } 3245 3246 static void unregister_timestamp_node(struct hl_device *hdev, 3247 struct hl_user_pending_interrupt *record, bool need_lock) 3248 { 3249 struct hl_user_interrupt *interrupt = record->ts_reg_info.interrupt; 3250 bool ts_rec_found = false; 3251 unsigned long flags; 3252 3253 if (need_lock) 3254 spin_lock_irqsave(&interrupt->ts_list_lock, flags); 3255 3256 if (record->ts_reg_info.in_use) { 3257 record->ts_reg_info.in_use = false; 3258 list_del(&record->list_node); 3259 ts_rec_found = true; 3260 } 3261 3262 if (need_lock) 3263 spin_unlock_irqrestore(&interrupt->ts_list_lock, flags); 3264 3265 /* Put refcounts that were taken when we registered the event */ 3266 if (ts_rec_found) { 3267 hl_mmap_mem_buf_put(record->ts_reg_info.buf); 3268 hl_cb_put(record->ts_reg_info.cq_cb); 3269 } 3270 } 3271 3272 static int ts_get_and_handle_kernel_record(struct hl_device *hdev, struct hl_ctx *ctx, 3273 struct wait_interrupt_data *data, unsigned long *flags, 3274 struct hl_user_pending_interrupt **pend) 3275 { 3276 struct hl_user_pending_interrupt *req_offset_record; 3277 struct hl_ts_buff *ts_buff = data->buf->private; 3278 bool need_lock = false; 3279 int rc; 3280 3281 rc = validate_and_get_ts_record(data->buf->mmg->dev, ts_buff, data->ts_offset, 3282 &req_offset_record); 3283 if (rc) 3284 return rc; 3285 3286 /* In case the node already registered, need to unregister first then re-use */ 3287 if (req_offset_record->ts_reg_info.in_use) { 3288 dev_dbg(data->buf->mmg->dev, 3289 "Requested record %p is in use on irq: %u ts addr: %p, unregister first then put on irq: %u\n", 3290 req_offset_record, 3291 req_offset_record->ts_reg_info.interrupt->interrupt_id, 3292 req_offset_record->ts_reg_info.timestamp_kernel_addr, 3293 data->interrupt->interrupt_id); 3294 /* 3295 * Since interrupt here can be different than the one the node currently registered 3296 * on, and we don't want to lock two lists while we're doing unregister, so 3297 * unlock the new interrupt wait list here and acquire the lock again after you done 3298 */ 3299 if (data->interrupt->interrupt_id != 3300 req_offset_record->ts_reg_info.interrupt->interrupt_id) { 3301 3302 need_lock = true; 3303 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, *flags); 3304 } 3305 3306 unregister_timestamp_node(hdev, req_offset_record, need_lock); 3307 3308 if (need_lock) 3309 spin_lock_irqsave(&data->interrupt->ts_list_lock, *flags); 3310 } 3311 3312 /* Fill up the new registration node info and add it to the list */ 3313 req_offset_record->ts_reg_info.in_use = true; 3314 req_offset_record->ts_reg_info.buf = data->buf; 3315 req_offset_record->ts_reg_info.timestamp_kernel_addr = 3316 (u64 *) ts_buff->user_buff_address + data->ts_offset; 3317 req_offset_record->ts_reg_info.interrupt = data->interrupt; 3318 set_record_cq_info(req_offset_record, data->cq_cb, data->cq_offset, 3319 data->target_value); 3320 3321 *pend = req_offset_record; 3322 3323 return rc; 3324 } 3325 3326 static int _hl_interrupt_ts_reg_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, 3327 struct wait_interrupt_data *data, 3328 u32 *status, u64 *timestamp) 3329 { 3330 struct hl_user_pending_interrupt *pend; 3331 unsigned long flags; 3332 int rc = 0; 3333 3334 hl_ctx_get(ctx); 3335 3336 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); 3337 if (!data->cq_cb) { 3338 rc = -EINVAL; 3339 goto put_ctx; 3340 } 3341 3342 /* Validate the cq offset */ 3343 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= 3344 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { 3345 rc = -EINVAL; 3346 goto put_cq_cb; 3347 } 3348 3349 dev_dbg(hdev->dev, "Timestamp registration: interrupt id: %u, handle: 0x%llx, ts offset: %llu, cq_offset: %llu\n", 3350 data->interrupt->interrupt_id, data->ts_handle, 3351 data->ts_offset, data->cq_offset); 3352 3353 data->buf = hl_mmap_mem_buf_get(data->mmg, data->ts_handle); 3354 if (!data->buf) { 3355 rc = -EINVAL; 3356 goto put_cq_cb; 3357 } 3358 3359 spin_lock_irqsave(&data->interrupt->ts_list_lock, flags); 3360 3361 /* get ts buffer record */ 3362 rc = ts_get_and_handle_kernel_record(hdev, ctx, data, &flags, &pend); 3363 if (rc) { 3364 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); 3365 goto put_ts_buff; 3366 } 3367 3368 /* We check for completion value as interrupt could have been received 3369 * before we add the timestamp node to the ts list. 3370 */ 3371 if (*pend->cq_kernel_addr >= data->target_value) { 3372 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); 3373 3374 dev_dbg(hdev->dev, "Target value already reached release ts record: pend: %p, offset: %llu, interrupt: %u\n", 3375 pend, data->ts_offset, data->interrupt->interrupt_id); 3376 3377 pend->ts_reg_info.in_use = 0; 3378 *status = HL_WAIT_CS_STATUS_COMPLETED; 3379 *pend->ts_reg_info.timestamp_kernel_addr = ktime_get_ns(); 3380 3381 goto put_ts_buff; 3382 } 3383 3384 list_add_tail(&pend->list_node, &data->interrupt->ts_list_head); 3385 spin_unlock_irqrestore(&data->interrupt->ts_list_lock, flags); 3386 3387 rc = *status = HL_WAIT_CS_STATUS_COMPLETED; 3388 3389 hl_ctx_put(ctx); 3390 3391 return rc; 3392 3393 put_ts_buff: 3394 hl_mmap_mem_buf_put(data->buf); 3395 put_cq_cb: 3396 hl_cb_put(data->cq_cb); 3397 put_ctx: 3398 hl_ctx_put(ctx); 3399 3400 return rc; 3401 } 3402 3403 static int _hl_interrupt_wait_ioctl(struct hl_device *hdev, struct hl_ctx *ctx, 3404 struct wait_interrupt_data *data, 3405 u32 *status, u64 *timestamp) 3406 { 3407 struct hl_user_pending_interrupt *pend; 3408 unsigned long timeout, flags; 3409 long completion_rc; 3410 int rc = 0; 3411 3412 timeout = hl_usecs64_to_jiffies(data->intr_timeout_us); 3413 3414 hl_ctx_get(ctx); 3415 3416 data->cq_cb = hl_cb_get(data->mmg, data->cq_handle); 3417 if (!data->cq_cb) { 3418 rc = -EINVAL; 3419 goto put_ctx; 3420 } 3421 3422 /* Validate the cq offset */ 3423 if (((u64 *) data->cq_cb->kernel_address + data->cq_offset) >= 3424 ((u64 *) data->cq_cb->kernel_address + (data->cq_cb->size / sizeof(u64)))) { 3425 rc = -EINVAL; 3426 goto put_cq_cb; 3427 } 3428 3429 pend = kzalloc(sizeof(*pend), GFP_KERNEL); 3430 if (!pend) { 3431 rc = -ENOMEM; 3432 goto put_cq_cb; 3433 } 3434 3435 hl_fence_init(&pend->fence, ULONG_MAX); 3436 pend->cq_kernel_addr = (u64 *) data->cq_cb->kernel_address + data->cq_offset; 3437 pend->cq_target_value = data->target_value; 3438 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); 3439 3440 3441 /* We check for completion value as interrupt could have been received 3442 * before we add the wait node to the wait list. 3443 */ 3444 if (*pend->cq_kernel_addr >= data->target_value || (!data->intr_timeout_us)) { 3445 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); 3446 3447 if (*pend->cq_kernel_addr >= data->target_value) 3448 *status = HL_WAIT_CS_STATUS_COMPLETED; 3449 else 3450 *status = HL_WAIT_CS_STATUS_BUSY; 3451 3452 pend->fence.timestamp = ktime_get(); 3453 goto set_timestamp; 3454 } 3455 3456 /* Add pending user interrupt to relevant list for the interrupt 3457 * handler to monitor. 3458 * Note that we cannot have sorted list by target value, 3459 * in order to shorten the list pass loop, since 3460 * same list could have nodes for different cq counter handle. 3461 */ 3462 list_add_tail(&pend->list_node, &data->interrupt->wait_list_head); 3463 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); 3464 3465 /* Wait for interrupt handler to signal completion */ 3466 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 3467 timeout); 3468 if (completion_rc > 0) { 3469 if (pend->fence.error == -EIO) { 3470 dev_err_ratelimited(hdev->dev, 3471 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 3472 pend->fence.error); 3473 rc = -EIO; 3474 *status = HL_WAIT_CS_STATUS_ABORTED; 3475 } else { 3476 *status = HL_WAIT_CS_STATUS_COMPLETED; 3477 } 3478 } else { 3479 if (completion_rc == -ERESTARTSYS) { 3480 dev_err_ratelimited(hdev->dev, 3481 "user process got signal while waiting for interrupt ID %d\n", 3482 data->interrupt->interrupt_id); 3483 rc = -EINTR; 3484 *status = HL_WAIT_CS_STATUS_ABORTED; 3485 } else { 3486 /* The wait has timed-out. We don't know anything beyond that 3487 * because the workload was not submitted through the driver. 3488 * Therefore, from driver's perspective, the workload is still 3489 * executing. 3490 */ 3491 rc = 0; 3492 *status = HL_WAIT_CS_STATUS_BUSY; 3493 } 3494 } 3495 3496 /* 3497 * We keep removing the node from list here, and not at the irq handler 3498 * for completion timeout case. and if it's a registration 3499 * for ts record, the node will be deleted in the irq handler after 3500 * we reach the target value. 3501 */ 3502 spin_lock_irqsave(&data->interrupt->wait_list_lock, flags); 3503 list_del(&pend->list_node); 3504 spin_unlock_irqrestore(&data->interrupt->wait_list_lock, flags); 3505 3506 set_timestamp: 3507 *timestamp = ktime_to_ns(pend->fence.timestamp); 3508 kfree(pend); 3509 hl_cb_put(data->cq_cb); 3510 hl_ctx_put(ctx); 3511 3512 return rc; 3513 3514 put_cq_cb: 3515 hl_cb_put(data->cq_cb); 3516 put_ctx: 3517 hl_ctx_put(ctx); 3518 3519 return rc; 3520 } 3521 3522 static int _hl_interrupt_wait_ioctl_user_addr(struct hl_device *hdev, struct hl_ctx *ctx, 3523 u64 timeout_us, u64 user_address, 3524 u64 target_value, struct hl_user_interrupt *interrupt, 3525 u32 *status, 3526 u64 *timestamp) 3527 { 3528 struct hl_user_pending_interrupt *pend; 3529 unsigned long timeout, flags; 3530 u64 completion_value; 3531 long completion_rc; 3532 int rc = 0; 3533 3534 timeout = hl_usecs64_to_jiffies(timeout_us); 3535 3536 hl_ctx_get(ctx); 3537 3538 pend = kzalloc(sizeof(*pend), GFP_KERNEL); 3539 if (!pend) { 3540 hl_ctx_put(ctx); 3541 return -ENOMEM; 3542 } 3543 3544 hl_fence_init(&pend->fence, ULONG_MAX); 3545 3546 /* Add pending user interrupt to relevant list for the interrupt 3547 * handler to monitor 3548 */ 3549 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3550 list_add_tail(&pend->list_node, &interrupt->wait_list_head); 3551 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3552 3553 /* We check for completion value as interrupt could have been received 3554 * before we added the node to the wait list 3555 */ 3556 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 3557 dev_err(hdev->dev, "Failed to copy completion value from user\n"); 3558 rc = -EFAULT; 3559 goto remove_pending_user_interrupt; 3560 } 3561 3562 if (completion_value >= target_value) { 3563 *status = HL_WAIT_CS_STATUS_COMPLETED; 3564 /* There was no interrupt, we assume the completion is now. */ 3565 pend->fence.timestamp = ktime_get(); 3566 } else { 3567 *status = HL_WAIT_CS_STATUS_BUSY; 3568 } 3569 3570 if (!timeout_us || (*status == HL_WAIT_CS_STATUS_COMPLETED)) 3571 goto remove_pending_user_interrupt; 3572 3573 wait_again: 3574 /* Wait for interrupt handler to signal completion */ 3575 completion_rc = wait_for_completion_interruptible_timeout(&pend->fence.completion, 3576 timeout); 3577 3578 /* If timeout did not expire we need to perform the comparison. 3579 * If comparison fails, keep waiting until timeout expires 3580 */ 3581 if (completion_rc > 0) { 3582 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3583 /* reinit_completion must be called before we check for user 3584 * completion value, otherwise, if interrupt is received after 3585 * the comparison and before the next wait_for_completion, 3586 * we will reach timeout and fail 3587 */ 3588 reinit_completion(&pend->fence.completion); 3589 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3590 3591 if (copy_from_user(&completion_value, u64_to_user_ptr(user_address), 8)) { 3592 dev_err(hdev->dev, "Failed to copy completion value from user\n"); 3593 rc = -EFAULT; 3594 3595 goto remove_pending_user_interrupt; 3596 } 3597 3598 if (completion_value >= target_value) { 3599 *status = HL_WAIT_CS_STATUS_COMPLETED; 3600 } else if (pend->fence.error) { 3601 dev_err_ratelimited(hdev->dev, 3602 "interrupt based wait ioctl aborted(error:%d) due to a reset cycle initiated\n", 3603 pend->fence.error); 3604 /* set the command completion status as ABORTED */ 3605 *status = HL_WAIT_CS_STATUS_ABORTED; 3606 } else { 3607 timeout = completion_rc; 3608 goto wait_again; 3609 } 3610 } else if (completion_rc == -ERESTARTSYS) { 3611 dev_err_ratelimited(hdev->dev, 3612 "user process got signal while waiting for interrupt ID %d\n", 3613 interrupt->interrupt_id); 3614 rc = -EINTR; 3615 } else { 3616 /* The wait has timed-out. We don't know anything beyond that 3617 * because the workload wasn't submitted through the driver. 3618 * Therefore, from driver's perspective, the workload is still 3619 * executing. 3620 */ 3621 rc = 0; 3622 *status = HL_WAIT_CS_STATUS_BUSY; 3623 } 3624 3625 remove_pending_user_interrupt: 3626 spin_lock_irqsave(&interrupt->wait_list_lock, flags); 3627 list_del(&pend->list_node); 3628 spin_unlock_irqrestore(&interrupt->wait_list_lock, flags); 3629 3630 *timestamp = ktime_to_ns(pend->fence.timestamp); 3631 3632 kfree(pend); 3633 hl_ctx_put(ctx); 3634 3635 return rc; 3636 } 3637 3638 static int hl_interrupt_wait_ioctl(struct hl_fpriv *hpriv, void *data) 3639 { 3640 u16 interrupt_id, first_interrupt, last_interrupt; 3641 struct hl_device *hdev = hpriv->hdev; 3642 struct asic_fixed_properties *prop; 3643 struct hl_user_interrupt *interrupt; 3644 union hl_wait_cs_args *args = data; 3645 u32 status = HL_WAIT_CS_STATUS_BUSY; 3646 u64 timestamp = 0; 3647 int rc, int_idx; 3648 3649 prop = &hdev->asic_prop; 3650 3651 if (!(prop->user_interrupt_count + prop->user_dec_intr_count)) { 3652 dev_err(hdev->dev, "no user interrupts allowed"); 3653 return -EPERM; 3654 } 3655 3656 interrupt_id = FIELD_GET(HL_WAIT_CS_FLAGS_INTERRUPT_MASK, args->in.flags); 3657 3658 first_interrupt = prop->first_available_user_interrupt; 3659 last_interrupt = prop->first_available_user_interrupt + prop->user_interrupt_count - 1; 3660 3661 if (interrupt_id < prop->user_dec_intr_count) { 3662 3663 /* Check if the requested core is enabled */ 3664 if (!(prop->decoder_enabled_mask & BIT(interrupt_id))) { 3665 dev_err(hdev->dev, "interrupt on a disabled core(%u) not allowed", 3666 interrupt_id); 3667 return -EINVAL; 3668 } 3669 3670 interrupt = &hdev->user_interrupt[interrupt_id]; 3671 3672 } else if (interrupt_id >= first_interrupt && interrupt_id <= last_interrupt) { 3673 3674 int_idx = interrupt_id - first_interrupt + prop->user_dec_intr_count; 3675 interrupt = &hdev->user_interrupt[int_idx]; 3676 3677 } else if (interrupt_id == HL_COMMON_USER_CQ_INTERRUPT_ID) { 3678 interrupt = &hdev->common_user_cq_interrupt; 3679 } else if (interrupt_id == HL_COMMON_DEC_INTERRUPT_ID) { 3680 interrupt = &hdev->common_decoder_interrupt; 3681 } else { 3682 dev_err(hdev->dev, "invalid user interrupt %u", interrupt_id); 3683 return -EINVAL; 3684 } 3685 3686 if (args->in.flags & HL_WAIT_CS_FLAGS_INTERRUPT_KERNEL_CQ) { 3687 struct wait_interrupt_data wait_intr_data = {0}; 3688 3689 wait_intr_data.interrupt = interrupt; 3690 wait_intr_data.mmg = &hpriv->mem_mgr; 3691 wait_intr_data.cq_handle = args->in.cq_counters_handle; 3692 wait_intr_data.cq_offset = args->in.cq_counters_offset; 3693 wait_intr_data.ts_handle = args->in.timestamp_handle; 3694 wait_intr_data.ts_offset = args->in.timestamp_offset; 3695 wait_intr_data.target_value = args->in.target; 3696 wait_intr_data.intr_timeout_us = args->in.interrupt_timeout_us; 3697 3698 if (args->in.flags & HL_WAIT_CS_FLAGS_REGISTER_INTERRUPT) { 3699 /* 3700 * Allow only one registration at a time. this is needed in order to prevent 3701 * issues while handling the flow of re-use of the same offset. 3702 * Since the registration flow is protected only by the interrupt lock, 3703 * re-use flow might request to move ts node to another interrupt list, 3704 * and in such case we're not protected. 3705 */ 3706 mutex_lock(&hpriv->ctx->ts_reg_lock); 3707 3708 rc = _hl_interrupt_ts_reg_ioctl(hdev, hpriv->ctx, &wait_intr_data, 3709 &status, ×tamp); 3710 3711 mutex_unlock(&hpriv->ctx->ts_reg_lock); 3712 } else 3713 rc = _hl_interrupt_wait_ioctl(hdev, hpriv->ctx, &wait_intr_data, 3714 &status, ×tamp); 3715 } else { 3716 rc = _hl_interrupt_wait_ioctl_user_addr(hdev, hpriv->ctx, 3717 args->in.interrupt_timeout_us, args->in.addr, 3718 args->in.target, interrupt, &status, 3719 ×tamp); 3720 } 3721 3722 if (rc) 3723 return rc; 3724 3725 memset(args, 0, sizeof(*args)); 3726 args->out.status = status; 3727 3728 if (timestamp) { 3729 args->out.timestamp_nsec = timestamp; 3730 args->out.flags |= HL_WAIT_CS_STATUS_FLAG_TIMESTAMP_VLD; 3731 } 3732 3733 return 0; 3734 } 3735 3736 int hl_wait_ioctl(struct drm_device *ddev, void *data, struct drm_file *file_priv) 3737 { 3738 struct hl_fpriv *hpriv = file_priv->driver_priv; 3739 struct hl_device *hdev = hpriv->hdev; 3740 union hl_wait_cs_args *args = data; 3741 u32 flags = args->in.flags; 3742 int rc; 3743 3744 /* If the device is not operational, or if an error has happened and user should release the 3745 * device, there is no point in waiting for any command submission or user interrupt. 3746 */ 3747 if (!hl_device_operational(hpriv->hdev, NULL) || hdev->reset_info.watchdog_active) 3748 return -EBUSY; 3749 3750 if (flags & HL_WAIT_CS_FLAGS_INTERRUPT) 3751 rc = hl_interrupt_wait_ioctl(hpriv, data); 3752 else if (flags & HL_WAIT_CS_FLAGS_MULTI_CS) 3753 rc = hl_multi_cs_wait_ioctl(hpriv, data); 3754 else 3755 rc = hl_cs_wait_ioctl(hpriv, data); 3756 3757 return rc; 3758 } 3759