1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #ifndef KFD_PRIV_H_INCLUDED 24 #define KFD_PRIV_H_INCLUDED 25 26 #include <linux/hashtable.h> 27 #include <linux/mmu_notifier.h> 28 #include <linux/mutex.h> 29 #include <linux/types.h> 30 #include <linux/atomic.h> 31 #include <linux/workqueue.h> 32 #include <linux/spinlock.h> 33 #include <linux/kfd_ioctl.h> 34 #include <linux/idr.h> 35 #include <linux/kfifo.h> 36 #include <linux/seq_file.h> 37 #include <linux/kref.h> 38 #include <kgd_kfd_interface.h> 39 40 #include "amd_shared.h" 41 42 #define KFD_MAX_RING_ENTRY_SIZE 8 43 44 #define KFD_SYSFS_FILE_MODE 0444 45 46 /* GPU ID hash width in bits */ 47 #define KFD_GPU_ID_HASH_WIDTH 16 48 49 /* Use upper bits of mmap offset to store KFD driver specific information. 50 * BITS[63:62] - Encode MMAP type 51 * BITS[61:46] - Encode gpu_id. To identify to which GPU the offset belongs to 52 * BITS[45:0] - MMAP offset value 53 * 54 * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these 55 * defines are w.r.t to PAGE_SIZE 56 */ 57 #define KFD_MMAP_TYPE_SHIFT (62 - PAGE_SHIFT) 58 #define KFD_MMAP_TYPE_MASK (0x3ULL << KFD_MMAP_TYPE_SHIFT) 59 #define KFD_MMAP_TYPE_DOORBELL (0x3ULL << KFD_MMAP_TYPE_SHIFT) 60 #define KFD_MMAP_TYPE_EVENTS (0x2ULL << KFD_MMAP_TYPE_SHIFT) 61 #define KFD_MMAP_TYPE_RESERVED_MEM (0x1ULL << KFD_MMAP_TYPE_SHIFT) 62 63 #define KFD_MMAP_GPU_ID_SHIFT (46 - PAGE_SHIFT) 64 #define KFD_MMAP_GPU_ID_MASK (((1ULL << KFD_GPU_ID_HASH_WIDTH) - 1) \ 65 << KFD_MMAP_GPU_ID_SHIFT) 66 #define KFD_MMAP_GPU_ID(gpu_id) ((((uint64_t)gpu_id) << KFD_MMAP_GPU_ID_SHIFT)\ 67 & KFD_MMAP_GPU_ID_MASK) 68 #define KFD_MMAP_GPU_ID_GET(offset) ((offset & KFD_MMAP_GPU_ID_MASK) \ 69 >> KFD_MMAP_GPU_ID_SHIFT) 70 71 #define KFD_MMAP_OFFSET_VALUE_MASK (0x3FFFFFFFFFFFULL >> PAGE_SHIFT) 72 #define KFD_MMAP_OFFSET_VALUE_GET(offset) (offset & KFD_MMAP_OFFSET_VALUE_MASK) 73 74 /* 75 * When working with cp scheduler we should assign the HIQ manually or via 76 * the amdgpu driver to a fixed hqd slot, here are the fixed HIQ hqd slot 77 * definitions for Kaveri. In Kaveri only the first ME queues participates 78 * in the cp scheduling taking that in mind we set the HIQ slot in the 79 * second ME. 80 */ 81 #define KFD_CIK_HIQ_PIPE 4 82 #define KFD_CIK_HIQ_QUEUE 0 83 84 /* Macro for allocating structures */ 85 #define kfd_alloc_struct(ptr_to_struct) \ 86 ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) 87 88 #define KFD_MAX_NUM_OF_PROCESSES 512 89 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 90 91 /* 92 * Size of the per-process TBA+TMA buffer: 2 pages 93 * 94 * The first page is the TBA used for the CWSR ISA code. The second 95 * page is used as TMA for daisy changing a user-mode trap handler. 96 */ 97 #define KFD_CWSR_TBA_TMA_SIZE (PAGE_SIZE * 2) 98 #define KFD_CWSR_TMA_OFFSET PAGE_SIZE 99 100 /* 101 * Kernel module parameter to specify maximum number of supported queues per 102 * device 103 */ 104 extern int max_num_of_queues_per_device; 105 106 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096 107 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ 108 (KFD_MAX_NUM_OF_PROCESSES * \ 109 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 110 111 #define KFD_KERNEL_QUEUE_SIZE 2048 112 113 /* Kernel module parameter to specify the scheduling policy */ 114 extern int sched_policy; 115 116 /* 117 * Kernel module parameter to specify the maximum process 118 * number per HW scheduler 119 */ 120 extern int hws_max_conc_proc; 121 122 extern int cwsr_enable; 123 124 /* 125 * Kernel module parameter to specify whether to send sigterm to HSA process on 126 * unhandled exception 127 */ 128 extern int send_sigterm; 129 130 /* 131 * This kernel module is used to simulate large bar machine on non-large bar 132 * enabled machines. 133 */ 134 extern int debug_largebar; 135 136 /* 137 * Ignore CRAT table during KFD initialization, can be used to work around 138 * broken CRAT tables on some AMD systems 139 */ 140 extern int ignore_crat; 141 142 /* 143 * Set sh_mem_config.retry_disable on Vega10 144 */ 145 extern int noretry; 146 147 /* 148 * Halt if HWS hang is detected 149 */ 150 extern int halt_if_hws_hang; 151 152 /** 153 * enum kfd_sched_policy 154 * 155 * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) 156 * scheduling. In this scheduling mode we're using the firmware code to 157 * schedule the user mode queues and kernel queues such as HIQ and DIQ. 158 * the HIQ queue is used as a special queue that dispatches the configuration 159 * to the cp and the user mode queues list that are currently running. 160 * the DIQ queue is a debugging queue that dispatches debugging commands to the 161 * firmware. 162 * in this scheduling mode user mode queues over subscription feature is 163 * enabled. 164 * 165 * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over 166 * subscription feature disabled. 167 * 168 * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly 169 * set the command processor registers and sets the queues "manually". This 170 * mode is used *ONLY* for debugging proposes. 171 * 172 */ 173 enum kfd_sched_policy { 174 KFD_SCHED_POLICY_HWS = 0, 175 KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, 176 KFD_SCHED_POLICY_NO_HWS 177 }; 178 179 enum cache_policy { 180 cache_policy_coherent, 181 cache_policy_noncoherent 182 }; 183 184 #define KFD_IS_SOC15(chip) ((chip) >= CHIP_VEGA10) 185 186 struct kfd_event_interrupt_class { 187 bool (*interrupt_isr)(struct kfd_dev *dev, 188 const uint32_t *ih_ring_entry, uint32_t *patched_ihre, 189 bool *patched_flag); 190 void (*interrupt_wq)(struct kfd_dev *dev, 191 const uint32_t *ih_ring_entry); 192 }; 193 194 struct kfd_device_info { 195 enum amd_asic_type asic_family; 196 const struct kfd_event_interrupt_class *event_interrupt_class; 197 unsigned int max_pasid_bits; 198 unsigned int max_no_of_hqd; 199 unsigned int doorbell_size; 200 size_t ih_ring_entry_size; 201 uint8_t num_of_watch_points; 202 uint16_t mqd_size_aligned; 203 bool supports_cwsr; 204 bool needs_iommu_device; 205 bool needs_pci_atomics; 206 unsigned int num_sdma_engines; 207 }; 208 209 struct kfd_mem_obj { 210 uint32_t range_start; 211 uint32_t range_end; 212 uint64_t gpu_addr; 213 uint32_t *cpu_ptr; 214 void *gtt_mem; 215 }; 216 217 struct kfd_vmid_info { 218 uint32_t first_vmid_kfd; 219 uint32_t last_vmid_kfd; 220 uint32_t vmid_num_kfd; 221 }; 222 223 struct kfd_dev { 224 struct kgd_dev *kgd; 225 226 const struct kfd_device_info *device_info; 227 struct pci_dev *pdev; 228 229 unsigned int id; /* topology stub index */ 230 231 phys_addr_t doorbell_base; /* Start of actual doorbells used by 232 * KFD. It is aligned for mapping 233 * into user mode 234 */ 235 size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell 236 * to HW doorbell, GFX reserved some 237 * at the start) 238 */ 239 u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells 240 * page used by kernel queue 241 */ 242 243 struct kgd2kfd_shared_resources shared_resources; 244 struct kfd_vmid_info vm_info; 245 246 const struct kfd2kgd_calls *kfd2kgd; 247 struct mutex doorbell_mutex; 248 DECLARE_BITMAP(doorbell_available_index, 249 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 250 251 void *gtt_mem; 252 uint64_t gtt_start_gpu_addr; 253 void *gtt_start_cpu_ptr; 254 void *gtt_sa_bitmap; 255 struct mutex gtt_sa_lock; 256 unsigned int gtt_sa_chunk_size; 257 unsigned int gtt_sa_num_of_chunks; 258 259 /* Interrupts */ 260 struct kfifo ih_fifo; 261 struct workqueue_struct *ih_wq; 262 struct work_struct interrupt_work; 263 spinlock_t interrupt_lock; 264 265 /* QCM Device instance */ 266 struct device_queue_manager *dqm; 267 268 bool init_complete; 269 /* 270 * Interrupts of interest to KFD are copied 271 * from the HW ring into a SW ring. 272 */ 273 bool interrupts_active; 274 275 /* Debug manager */ 276 struct kfd_dbgmgr *dbgmgr; 277 278 /* Maximum process number mapped to HW scheduler */ 279 unsigned int max_proc_per_quantum; 280 281 /* CWSR */ 282 bool cwsr_enabled; 283 const void *cwsr_isa; 284 unsigned int cwsr_isa_size; 285 }; 286 287 /* KGD2KFD callbacks */ 288 void kgd2kfd_exit(void); 289 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, 290 struct pci_dev *pdev, const struct kfd2kgd_calls *f2g); 291 bool kgd2kfd_device_init(struct kfd_dev *kfd, 292 const struct kgd2kfd_shared_resources *gpu_resources); 293 void kgd2kfd_device_exit(struct kfd_dev *kfd); 294 295 enum kfd_mempool { 296 KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, 297 KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, 298 KFD_MEMPOOL_FRAMEBUFFER = 3, 299 }; 300 301 /* Character device interface */ 302 int kfd_chardev_init(void); 303 void kfd_chardev_exit(void); 304 struct device *kfd_chardev(void); 305 306 /** 307 * enum kfd_unmap_queues_filter 308 * 309 * @KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE: Preempts single queue. 310 * 311 * @KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES: Preempts all queues in the 312 * running queues list. 313 * 314 * @KFD_UNMAP_QUEUES_FILTER_BY_PASID: Preempts queues that belongs to 315 * specific process. 316 * 317 */ 318 enum kfd_unmap_queues_filter { 319 KFD_UNMAP_QUEUES_FILTER_SINGLE_QUEUE, 320 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 321 KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 322 KFD_UNMAP_QUEUES_FILTER_BY_PASID 323 }; 324 325 /** 326 * enum kfd_queue_type 327 * 328 * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. 329 * 330 * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. 331 * 332 * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. 333 * 334 * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. 335 */ 336 enum kfd_queue_type { 337 KFD_QUEUE_TYPE_COMPUTE, 338 KFD_QUEUE_TYPE_SDMA, 339 KFD_QUEUE_TYPE_HIQ, 340 KFD_QUEUE_TYPE_DIQ 341 }; 342 343 enum kfd_queue_format { 344 KFD_QUEUE_FORMAT_PM4, 345 KFD_QUEUE_FORMAT_AQL 346 }; 347 348 /** 349 * struct queue_properties 350 * 351 * @type: The queue type. 352 * 353 * @queue_id: Queue identifier. 354 * 355 * @queue_address: Queue ring buffer address. 356 * 357 * @queue_size: Queue ring buffer size. 358 * 359 * @priority: Defines the queue priority relative to other queues in the 360 * process. 361 * This is just an indication and HW scheduling may override the priority as 362 * necessary while keeping the relative prioritization. 363 * the priority granularity is from 0 to f which f is the highest priority. 364 * currently all queues are initialized with the highest priority. 365 * 366 * @queue_percent: This field is partially implemented and currently a zero in 367 * this field defines that the queue is non active. 368 * 369 * @read_ptr: User space address which points to the number of dwords the 370 * cp read from the ring buffer. This field updates automatically by the H/W. 371 * 372 * @write_ptr: Defines the number of dwords written to the ring buffer. 373 * 374 * @doorbell_ptr: This field aim is to notify the H/W of new packet written to 375 * the queue ring buffer. This field should be similar to write_ptr and the 376 * user should update this field after he updated the write_ptr. 377 * 378 * @doorbell_off: The doorbell offset in the doorbell pci-bar. 379 * 380 * @is_interop: Defines if this is a interop queue. Interop queue means that 381 * the queue can access both graphics and compute resources. 382 * 383 * @is_evicted: Defines if the queue is evicted. Only active queues 384 * are evicted, rendering them inactive. 385 * 386 * @is_active: Defines if the queue is active or not. @is_active and 387 * @is_evicted are protected by the DQM lock. 388 * 389 * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid 390 * of the queue. 391 * 392 * This structure represents the queue properties for each queue no matter if 393 * it's user mode or kernel mode queue. 394 * 395 */ 396 struct queue_properties { 397 enum kfd_queue_type type; 398 enum kfd_queue_format format; 399 unsigned int queue_id; 400 uint64_t queue_address; 401 uint64_t queue_size; 402 uint32_t priority; 403 uint32_t queue_percent; 404 uint32_t *read_ptr; 405 uint32_t *write_ptr; 406 void __iomem *doorbell_ptr; 407 uint32_t doorbell_off; 408 bool is_interop; 409 bool is_evicted; 410 bool is_active; 411 /* Not relevant for user mode queues in cp scheduling */ 412 unsigned int vmid; 413 /* Relevant only for sdma queues*/ 414 uint32_t sdma_engine_id; 415 uint32_t sdma_queue_id; 416 uint32_t sdma_vm_addr; 417 /* Relevant only for VI */ 418 uint64_t eop_ring_buffer_address; 419 uint32_t eop_ring_buffer_size; 420 uint64_t ctx_save_restore_area_address; 421 uint32_t ctx_save_restore_area_size; 422 uint32_t ctl_stack_size; 423 uint64_t tba_addr; 424 uint64_t tma_addr; 425 /* Relevant for CU */ 426 uint32_t cu_mask_count; /* Must be a multiple of 32 */ 427 uint32_t *cu_mask; 428 }; 429 430 /** 431 * struct queue 432 * 433 * @list: Queue linked list. 434 * 435 * @mqd: The queue MQD. 436 * 437 * @mqd_mem_obj: The MQD local gpu memory object. 438 * 439 * @gart_mqd_addr: The MQD gart mc address. 440 * 441 * @properties: The queue properties. 442 * 443 * @mec: Used only in no cp scheduling mode and identifies to micro engine id 444 * that the queue should be execute on. 445 * 446 * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe 447 * id. 448 * 449 * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. 450 * 451 * @process: The kfd process that created this queue. 452 * 453 * @device: The kfd device that created this queue. 454 * 455 * This structure represents user mode compute queues. 456 * It contains all the necessary data to handle such queues. 457 * 458 */ 459 460 struct queue { 461 struct list_head list; 462 void *mqd; 463 struct kfd_mem_obj *mqd_mem_obj; 464 uint64_t gart_mqd_addr; 465 struct queue_properties properties; 466 467 uint32_t mec; 468 uint32_t pipe; 469 uint32_t queue; 470 471 unsigned int sdma_id; 472 unsigned int doorbell_id; 473 474 struct kfd_process *process; 475 struct kfd_dev *device; 476 }; 477 478 /* 479 * Please read the kfd_mqd_manager.h description. 480 */ 481 enum KFD_MQD_TYPE { 482 KFD_MQD_TYPE_COMPUTE = 0, /* for no cp scheduling */ 483 KFD_MQD_TYPE_HIQ, /* for hiq */ 484 KFD_MQD_TYPE_CP, /* for cp queues and diq */ 485 KFD_MQD_TYPE_SDMA, /* for sdma queues */ 486 KFD_MQD_TYPE_MAX 487 }; 488 489 struct scheduling_resources { 490 unsigned int vmid_mask; 491 enum kfd_queue_type type; 492 uint64_t queue_mask; 493 uint64_t gws_mask; 494 uint32_t oac_mask; 495 uint32_t gds_heap_base; 496 uint32_t gds_heap_size; 497 }; 498 499 struct process_queue_manager { 500 /* data */ 501 struct kfd_process *process; 502 struct list_head queues; 503 unsigned long *queue_slot_bitmap; 504 }; 505 506 struct qcm_process_device { 507 /* The Device Queue Manager that owns this data */ 508 struct device_queue_manager *dqm; 509 struct process_queue_manager *pqm; 510 /* Queues list */ 511 struct list_head queues_list; 512 struct list_head priv_queue_list; 513 514 unsigned int queue_count; 515 unsigned int vmid; 516 bool is_debug; 517 unsigned int evicted; /* eviction counter, 0=active */ 518 519 /* This flag tells if we should reset all wavefronts on 520 * process termination 521 */ 522 bool reset_wavefronts; 523 524 /* 525 * All the memory management data should be here too 526 */ 527 uint64_t gds_context_area; 528 uint32_t sh_mem_config; 529 uint32_t sh_mem_bases; 530 uint32_t sh_mem_ape1_base; 531 uint32_t sh_mem_ape1_limit; 532 uint32_t page_table_base; 533 uint32_t gds_size; 534 uint32_t num_gws; 535 uint32_t num_oac; 536 uint32_t sh_hidden_private_base; 537 538 /* CWSR memory */ 539 void *cwsr_kaddr; 540 uint64_t cwsr_base; 541 uint64_t tba_addr; 542 uint64_t tma_addr; 543 544 /* IB memory */ 545 uint64_t ib_base; 546 void *ib_kaddr; 547 548 /* doorbell resources per process per device */ 549 unsigned long *doorbell_bitmap; 550 }; 551 552 /* KFD Memory Eviction */ 553 554 /* Approx. wait time before attempting to restore evicted BOs */ 555 #define PROCESS_RESTORE_TIME_MS 100 556 /* Approx. back off time if restore fails due to lack of memory */ 557 #define PROCESS_BACK_OFF_TIME_MS 100 558 /* Approx. time before evicting the process again */ 559 #define PROCESS_ACTIVE_TIME_MS 10 560 561 int kgd2kfd_quiesce_mm(struct mm_struct *mm); 562 int kgd2kfd_resume_mm(struct mm_struct *mm); 563 int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, 564 struct dma_fence *fence); 565 566 /* 8 byte handle containing GPU ID in the most significant 4 bytes and 567 * idr_handle in the least significant 4 bytes 568 */ 569 #define MAKE_HANDLE(gpu_id, idr_handle) \ 570 (((uint64_t)(gpu_id) << 32) + idr_handle) 571 #define GET_GPU_ID(handle) (handle >> 32) 572 #define GET_IDR_HANDLE(handle) (handle & 0xFFFFFFFF) 573 574 enum kfd_pdd_bound { 575 PDD_UNBOUND = 0, 576 PDD_BOUND, 577 PDD_BOUND_SUSPENDED, 578 }; 579 580 /* Data that is per-process-per device. */ 581 struct kfd_process_device { 582 /* 583 * List of all per-device data for a process. 584 * Starts from kfd_process.per_device_data. 585 */ 586 struct list_head per_device_list; 587 588 /* The device that owns this data. */ 589 struct kfd_dev *dev; 590 591 /* The process that owns this kfd_process_device. */ 592 struct kfd_process *process; 593 594 /* per-process-per device QCM data structure */ 595 struct qcm_process_device qpd; 596 597 /*Apertures*/ 598 uint64_t lds_base; 599 uint64_t lds_limit; 600 uint64_t gpuvm_base; 601 uint64_t gpuvm_limit; 602 uint64_t scratch_base; 603 uint64_t scratch_limit; 604 605 /* VM context for GPUVM allocations */ 606 struct file *drm_file; 607 void *vm; 608 609 /* GPUVM allocations storage */ 610 struct idr alloc_idr; 611 612 /* Flag used to tell the pdd has dequeued from the dqm. 613 * This is used to prevent dev->dqm->ops.process_termination() from 614 * being called twice when it is already called in IOMMU callback 615 * function. 616 */ 617 bool already_dequeued; 618 619 /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ 620 enum kfd_pdd_bound bound; 621 }; 622 623 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) 624 625 /* Process data */ 626 struct kfd_process { 627 /* 628 * kfd_process are stored in an mm_struct*->kfd_process* 629 * hash table (kfd_processes in kfd_process.c) 630 */ 631 struct hlist_node kfd_processes; 632 633 /* 634 * Opaque pointer to mm_struct. We don't hold a reference to 635 * it so it should never be dereferenced from here. This is 636 * only used for looking up processes by their mm. 637 */ 638 void *mm; 639 640 struct kref ref; 641 struct work_struct release_work; 642 643 struct mutex mutex; 644 645 /* 646 * In any process, the thread that started main() is the lead 647 * thread and outlives the rest. 648 * It is here because amd_iommu_bind_pasid wants a task_struct. 649 * It can also be used for safely getting a reference to the 650 * mm_struct of the process. 651 */ 652 struct task_struct *lead_thread; 653 654 /* We want to receive a notification when the mm_struct is destroyed */ 655 struct mmu_notifier mmu_notifier; 656 657 /* Use for delayed freeing of kfd_process structure */ 658 struct rcu_head rcu; 659 660 unsigned int pasid; 661 unsigned int doorbell_index; 662 663 /* 664 * List of kfd_process_device structures, 665 * one for each device the process is using. 666 */ 667 struct list_head per_device_data; 668 669 struct process_queue_manager pqm; 670 671 /*Is the user space process 32 bit?*/ 672 bool is_32bit_user_mode; 673 674 /* Event-related data */ 675 struct mutex event_mutex; 676 /* Event ID allocator and lookup */ 677 struct idr event_idr; 678 /* Event page */ 679 struct kfd_signal_page *signal_page; 680 size_t signal_mapped_size; 681 size_t signal_event_count; 682 bool signal_event_limit_reached; 683 684 /* Information used for memory eviction */ 685 void *kgd_process_info; 686 /* Eviction fence that is attached to all the BOs of this process. The 687 * fence will be triggered during eviction and new one will be created 688 * during restore 689 */ 690 struct dma_fence *ef; 691 692 /* Work items for evicting and restoring BOs */ 693 struct delayed_work eviction_work; 694 struct delayed_work restore_work; 695 /* seqno of the last scheduled eviction */ 696 unsigned int last_eviction_seqno; 697 /* Approx. the last timestamp (in jiffies) when the process was 698 * restored after an eviction 699 */ 700 unsigned long last_restore_timestamp; 701 }; 702 703 #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ 704 extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); 705 extern struct srcu_struct kfd_processes_srcu; 706 707 /** 708 * Ioctl function type. 709 * 710 * \param filep pointer to file structure. 711 * \param p amdkfd process pointer. 712 * \param data pointer to arg that was copied from user. 713 */ 714 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, 715 void *data); 716 717 struct amdkfd_ioctl_desc { 718 unsigned int cmd; 719 int flags; 720 amdkfd_ioctl_t *func; 721 unsigned int cmd_drv; 722 const char *name; 723 }; 724 725 int kfd_process_create_wq(void); 726 void kfd_process_destroy_wq(void); 727 struct kfd_process *kfd_create_process(struct file *filep); 728 struct kfd_process *kfd_get_process(const struct task_struct *); 729 struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid); 730 struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); 731 void kfd_unref_process(struct kfd_process *p); 732 int kfd_process_evict_queues(struct kfd_process *p); 733 int kfd_process_restore_queues(struct kfd_process *p); 734 void kfd_suspend_all_processes(void); 735 int kfd_resume_all_processes(void); 736 737 int kfd_process_device_init_vm(struct kfd_process_device *pdd, 738 struct file *drm_file); 739 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 740 struct kfd_process *p); 741 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 742 struct kfd_process *p); 743 struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, 744 struct kfd_process *p); 745 746 int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, 747 struct vm_area_struct *vma); 748 749 /* KFD process API for creating and translating handles */ 750 int kfd_process_device_create_obj_handle(struct kfd_process_device *pdd, 751 void *mem); 752 void *kfd_process_device_translate_handle(struct kfd_process_device *p, 753 int handle); 754 void kfd_process_device_remove_obj_handle(struct kfd_process_device *pdd, 755 int handle); 756 757 /* Process device data iterator */ 758 struct kfd_process_device *kfd_get_first_process_device_data( 759 struct kfd_process *p); 760 struct kfd_process_device *kfd_get_next_process_device_data( 761 struct kfd_process *p, 762 struct kfd_process_device *pdd); 763 bool kfd_has_process_device_data(struct kfd_process *p); 764 765 /* PASIDs */ 766 int kfd_pasid_init(void); 767 void kfd_pasid_exit(void); 768 bool kfd_set_pasid_limit(unsigned int new_limit); 769 unsigned int kfd_get_pasid_limit(void); 770 unsigned int kfd_pasid_alloc(void); 771 void kfd_pasid_free(unsigned int pasid); 772 773 /* Doorbells */ 774 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd); 775 int kfd_doorbell_init(struct kfd_dev *kfd); 776 void kfd_doorbell_fini(struct kfd_dev *kfd); 777 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, 778 struct vm_area_struct *vma); 779 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 780 unsigned int *doorbell_off); 781 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); 782 u32 read_kernel_doorbell(u32 __iomem *db); 783 void write_kernel_doorbell(void __iomem *db, u32 value); 784 void write_kernel_doorbell64(void __iomem *db, u64 value); 785 unsigned int kfd_doorbell_id_to_offset(struct kfd_dev *kfd, 786 struct kfd_process *process, 787 unsigned int doorbell_id); 788 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, 789 struct kfd_process *process); 790 int kfd_alloc_process_doorbells(struct kfd_process *process); 791 void kfd_free_process_doorbells(struct kfd_process *process); 792 793 /* GTT Sub-Allocator */ 794 795 int kfd_gtt_sa_allocate(struct kfd_dev *kfd, unsigned int size, 796 struct kfd_mem_obj **mem_obj); 797 798 int kfd_gtt_sa_free(struct kfd_dev *kfd, struct kfd_mem_obj *mem_obj); 799 800 extern struct device *kfd_device; 801 802 /* Topology */ 803 int kfd_topology_init(void); 804 void kfd_topology_shutdown(void); 805 int kfd_topology_add_device(struct kfd_dev *gpu); 806 int kfd_topology_remove_device(struct kfd_dev *gpu); 807 struct kfd_topology_device *kfd_topology_device_by_proximity_domain( 808 uint32_t proximity_domain); 809 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id); 810 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 811 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 812 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev); 813 int kfd_numa_node_to_apic_id(int numa_node_id); 814 815 /* Interrupts */ 816 int kfd_interrupt_init(struct kfd_dev *dev); 817 void kfd_interrupt_exit(struct kfd_dev *dev); 818 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 819 bool enqueue_ih_ring_entry(struct kfd_dev *kfd, const void *ih_ring_entry); 820 bool interrupt_is_wanted(struct kfd_dev *dev, 821 const uint32_t *ih_ring_entry, 822 uint32_t *patched_ihre, bool *flag); 823 824 /* Power Management */ 825 void kgd2kfd_suspend(struct kfd_dev *kfd); 826 int kgd2kfd_resume(struct kfd_dev *kfd); 827 828 /* GPU reset */ 829 int kgd2kfd_pre_reset(struct kfd_dev *kfd); 830 int kgd2kfd_post_reset(struct kfd_dev *kfd); 831 832 /* amdkfd Apertures */ 833 int kfd_init_apertures(struct kfd_process *process); 834 835 /* Queue Context Management */ 836 int init_queue(struct queue **q, const struct queue_properties *properties); 837 void uninit_queue(struct queue *q); 838 void print_queue_properties(struct queue_properties *q); 839 void print_queue(struct queue *q); 840 841 struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, 842 struct kfd_dev *dev); 843 struct mqd_manager *mqd_manager_init_cik(enum KFD_MQD_TYPE type, 844 struct kfd_dev *dev); 845 struct mqd_manager *mqd_manager_init_cik_hawaii(enum KFD_MQD_TYPE type, 846 struct kfd_dev *dev); 847 struct mqd_manager *mqd_manager_init_vi(enum KFD_MQD_TYPE type, 848 struct kfd_dev *dev); 849 struct mqd_manager *mqd_manager_init_vi_tonga(enum KFD_MQD_TYPE type, 850 struct kfd_dev *dev); 851 struct mqd_manager *mqd_manager_init_v9(enum KFD_MQD_TYPE type, 852 struct kfd_dev *dev); 853 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); 854 void device_queue_manager_uninit(struct device_queue_manager *dqm); 855 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 856 enum kfd_queue_type type); 857 void kernel_queue_uninit(struct kernel_queue *kq); 858 int kfd_process_vm_fault(struct device_queue_manager *dqm, unsigned int pasid); 859 860 /* Process Queue Manager */ 861 struct process_queue_node { 862 struct queue *q; 863 struct kernel_queue *kq; 864 struct list_head process_queue_list; 865 }; 866 867 void kfd_process_dequeue_from_device(struct kfd_process_device *pdd); 868 void kfd_process_dequeue_from_all_devices(struct kfd_process *p); 869 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); 870 void pqm_uninit(struct process_queue_manager *pqm); 871 int pqm_create_queue(struct process_queue_manager *pqm, 872 struct kfd_dev *dev, 873 struct file *f, 874 struct queue_properties *properties, 875 unsigned int *qid); 876 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 877 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 878 struct queue_properties *p); 879 int pqm_set_cu_mask(struct process_queue_manager *pqm, unsigned int qid, 880 struct queue_properties *p); 881 struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, 882 unsigned int qid); 883 884 int amdkfd_fence_wait_timeout(unsigned int *fence_addr, 885 unsigned int fence_value, 886 unsigned int timeout_ms); 887 888 /* Packet Manager */ 889 890 #define KFD_FENCE_COMPLETED (100) 891 #define KFD_FENCE_INIT (10) 892 893 struct packet_manager { 894 struct device_queue_manager *dqm; 895 struct kernel_queue *priv_queue; 896 struct mutex lock; 897 bool allocated; 898 struct kfd_mem_obj *ib_buffer_obj; 899 unsigned int ib_size_bytes; 900 901 const struct packet_manager_funcs *pmf; 902 }; 903 904 struct packet_manager_funcs { 905 /* Support ASIC-specific packet formats for PM4 packets */ 906 int (*map_process)(struct packet_manager *pm, uint32_t *buffer, 907 struct qcm_process_device *qpd); 908 int (*runlist)(struct packet_manager *pm, uint32_t *buffer, 909 uint64_t ib, size_t ib_size_in_dwords, bool chain); 910 int (*set_resources)(struct packet_manager *pm, uint32_t *buffer, 911 struct scheduling_resources *res); 912 int (*map_queues)(struct packet_manager *pm, uint32_t *buffer, 913 struct queue *q, bool is_static); 914 int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, 915 enum kfd_queue_type type, 916 enum kfd_unmap_queues_filter mode, 917 uint32_t filter_param, bool reset, 918 unsigned int sdma_engine); 919 int (*query_status)(struct packet_manager *pm, uint32_t *buffer, 920 uint64_t fence_address, uint32_t fence_value); 921 int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); 922 923 /* Packet sizes */ 924 int map_process_size; 925 int runlist_size; 926 int set_resources_size; 927 int map_queues_size; 928 int unmap_queues_size; 929 int query_status_size; 930 int release_mem_size; 931 }; 932 933 extern const struct packet_manager_funcs kfd_vi_pm_funcs; 934 extern const struct packet_manager_funcs kfd_v9_pm_funcs; 935 936 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); 937 void pm_uninit(struct packet_manager *pm); 938 int pm_send_set_resources(struct packet_manager *pm, 939 struct scheduling_resources *res); 940 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); 941 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, 942 uint32_t fence_value); 943 944 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, 945 enum kfd_unmap_queues_filter mode, 946 uint32_t filter_param, bool reset, 947 unsigned int sdma_engine); 948 949 void pm_release_ib(struct packet_manager *pm); 950 951 /* Following PM funcs can be shared among VI and AI */ 952 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size); 953 int pm_set_resources_vi(struct packet_manager *pm, uint32_t *buffer, 954 struct scheduling_resources *res); 955 956 uint64_t kfd_get_number_elems(struct kfd_dev *kfd); 957 958 /* Events */ 959 extern const struct kfd_event_interrupt_class event_interrupt_class_cik; 960 extern const struct kfd_event_interrupt_class event_interrupt_class_v9; 961 962 extern const struct kfd_device_global_init_class device_global_init_class_cik; 963 964 void kfd_event_init_process(struct kfd_process *p); 965 void kfd_event_free_process(struct kfd_process *p); 966 int kfd_event_mmap(struct kfd_process *process, struct vm_area_struct *vma); 967 int kfd_wait_on_events(struct kfd_process *p, 968 uint32_t num_events, void __user *data, 969 bool all, uint32_t user_timeout_ms, 970 uint32_t *wait_result); 971 void kfd_signal_event_interrupt(unsigned int pasid, uint32_t partial_id, 972 uint32_t valid_id_bits); 973 void kfd_signal_iommu_event(struct kfd_dev *dev, 974 unsigned int pasid, unsigned long address, 975 bool is_write_requested, bool is_execute_requested); 976 void kfd_signal_hw_exception_event(unsigned int pasid); 977 int kfd_set_event(struct kfd_process *p, uint32_t event_id); 978 int kfd_reset_event(struct kfd_process *p, uint32_t event_id); 979 int kfd_event_page_set(struct kfd_process *p, void *kernel_address, 980 uint64_t size); 981 int kfd_event_create(struct file *devkfd, struct kfd_process *p, 982 uint32_t event_type, bool auto_reset, uint32_t node_id, 983 uint32_t *event_id, uint32_t *event_trigger_data, 984 uint64_t *event_page_offset, uint32_t *event_slot_index); 985 int kfd_event_destroy(struct kfd_process *p, uint32_t event_id); 986 987 void kfd_signal_vm_fault_event(struct kfd_dev *dev, unsigned int pasid, 988 struct kfd_vm_fault_info *info); 989 990 void kfd_signal_reset_event(struct kfd_dev *dev); 991 992 void kfd_flush_tlb(struct kfd_process_device *pdd); 993 994 int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p); 995 996 bool kfd_is_locked(void); 997 998 /* Debugfs */ 999 #if defined(CONFIG_DEBUG_FS) 1000 1001 void kfd_debugfs_init(void); 1002 void kfd_debugfs_fini(void); 1003 int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data); 1004 int pqm_debugfs_mqds(struct seq_file *m, void *data); 1005 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data); 1006 int dqm_debugfs_hqds(struct seq_file *m, void *data); 1007 int kfd_debugfs_rls_by_device(struct seq_file *m, void *data); 1008 int pm_debugfs_runlist(struct seq_file *m, void *data); 1009 1010 int kfd_debugfs_hang_hws(struct kfd_dev *dev); 1011 int pm_debugfs_hang_hws(struct packet_manager *pm); 1012 int dqm_debugfs_execute_queues(struct device_queue_manager *dqm); 1013 1014 #else 1015 1016 static inline void kfd_debugfs_init(void) {} 1017 static inline void kfd_debugfs_fini(void) {} 1018 1019 #endif 1020 1021 #endif 1022