1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #ifndef KFD_PRIV_H_INCLUDED 24 #define KFD_PRIV_H_INCLUDED 25 26 #include <linux/hashtable.h> 27 #include <linux/mmu_notifier.h> 28 #include <linux/mutex.h> 29 #include <linux/types.h> 30 #include <linux/atomic.h> 31 #include <linux/workqueue.h> 32 #include <linux/spinlock.h> 33 #include <linux/kfd_ioctl.h> 34 #include <kgd_kfd_interface.h> 35 36 #define KFD_SYSFS_FILE_MODE 0444 37 38 /* 39 * When working with cp scheduler we should assign the HIQ manually or via 40 * the radeon driver to a fixed hqd slot, here are the fixed HIQ hqd slot 41 * definitions for Kaveri. In Kaveri only the first ME queues participates 42 * in the cp scheduling taking that in mind we set the HIQ slot in the 43 * second ME. 44 */ 45 #define KFD_CIK_HIQ_PIPE 4 46 #define KFD_CIK_HIQ_QUEUE 0 47 48 /* GPU ID hash width in bits */ 49 #define KFD_GPU_ID_HASH_WIDTH 16 50 51 /* Macro for allocating structures */ 52 #define kfd_alloc_struct(ptr_to_struct) \ 53 ((typeof(ptr_to_struct)) kzalloc(sizeof(*ptr_to_struct), GFP_KERNEL)) 54 55 #define KFD_MAX_NUM_OF_PROCESSES 512 56 #define KFD_MAX_NUM_OF_QUEUES_PER_PROCESS 1024 57 58 /* 59 * Kernel module parameter to specify maximum number of supported queues per 60 * device 61 */ 62 extern int max_num_of_queues_per_device; 63 64 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT 4096 65 #define KFD_MAX_NUM_OF_QUEUES_PER_DEVICE \ 66 (KFD_MAX_NUM_OF_PROCESSES * \ 67 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 68 69 #define KFD_KERNEL_QUEUE_SIZE 2048 70 71 /* Kernel module parameter to specify the scheduling policy */ 72 extern int sched_policy; 73 74 /** 75 * enum kfd_sched_policy 76 * 77 * @KFD_SCHED_POLICY_HWS: H/W scheduling policy known as command processor (cp) 78 * scheduling. In this scheduling mode we're using the firmware code to 79 * schedule the user mode queues and kernel queues such as HIQ and DIQ. 80 * the HIQ queue is used as a special queue that dispatches the configuration 81 * to the cp and the user mode queues list that are currently running. 82 * the DIQ queue is a debugging queue that dispatches debugging commands to the 83 * firmware. 84 * in this scheduling mode user mode queues over subscription feature is 85 * enabled. 86 * 87 * @KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION: The same as above but the over 88 * subscription feature disabled. 89 * 90 * @KFD_SCHED_POLICY_NO_HWS: no H/W scheduling policy is a mode which directly 91 * set the command processor registers and sets the queues "manually". This 92 * mode is used *ONLY* for debugging proposes. 93 * 94 */ 95 enum kfd_sched_policy { 96 KFD_SCHED_POLICY_HWS = 0, 97 KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION, 98 KFD_SCHED_POLICY_NO_HWS 99 }; 100 101 enum cache_policy { 102 cache_policy_coherent, 103 cache_policy_noncoherent 104 }; 105 106 struct kfd_device_info { 107 unsigned int max_pasid_bits; 108 size_t ih_ring_entry_size; 109 uint16_t mqd_size_aligned; 110 }; 111 112 struct kfd_dev { 113 struct kgd_dev *kgd; 114 115 const struct kfd_device_info *device_info; 116 struct pci_dev *pdev; 117 118 unsigned int id; /* topology stub index */ 119 120 phys_addr_t doorbell_base; /* Start of actual doorbells used by 121 * KFD. It is aligned for mapping 122 * into user mode 123 */ 124 size_t doorbell_id_offset; /* Doorbell offset (from KFD doorbell 125 * to HW doorbell, GFX reserved some 126 * at the start) 127 */ 128 size_t doorbell_process_limit; /* Number of processes we have doorbell 129 * space for. 130 */ 131 u32 __iomem *doorbell_kernel_ptr; /* This is a pointer for a doorbells 132 * page used by kernel queue 133 */ 134 135 struct kgd2kfd_shared_resources shared_resources; 136 137 /* QCM Device instance */ 138 struct device_queue_manager *dqm; 139 140 bool init_complete; 141 }; 142 143 /* KGD2KFD callbacks */ 144 void kgd2kfd_exit(void); 145 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev); 146 bool kgd2kfd_device_init(struct kfd_dev *kfd, 147 const struct kgd2kfd_shared_resources *gpu_resources); 148 void kgd2kfd_device_exit(struct kfd_dev *kfd); 149 150 extern const struct kfd2kgd_calls *kfd2kgd; 151 152 struct kfd_mem_obj { 153 void *bo; 154 uint64_t gpu_addr; 155 uint32_t *cpu_ptr; 156 }; 157 158 enum kfd_mempool { 159 KFD_MEMPOOL_SYSTEM_CACHEABLE = 1, 160 KFD_MEMPOOL_SYSTEM_WRITECOMBINE = 2, 161 KFD_MEMPOOL_FRAMEBUFFER = 3, 162 }; 163 164 /* Character device interface */ 165 int kfd_chardev_init(void); 166 void kfd_chardev_exit(void); 167 struct device *kfd_chardev(void); 168 169 /** 170 * enum kfd_preempt_type_filter 171 * 172 * @KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE: Preempts single queue. 173 * 174 * @KFD_PRERMPT_TYPE_FILTER_ALL_QUEUES: Preempts all queues in the 175 * running queues list. 176 * 177 * @KFD_PRERMPT_TYPE_FILTER_BY_PASID: Preempts queues that belongs to 178 * specific process. 179 * 180 */ 181 enum kfd_preempt_type_filter { 182 KFD_PREEMPT_TYPE_FILTER_SINGLE_QUEUE, 183 KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES, 184 KFD_PREEMPT_TYPE_FILTER_BY_PASID 185 }; 186 187 enum kfd_preempt_type { 188 KFD_PREEMPT_TYPE_WAVEFRONT, 189 KFD_PREEMPT_TYPE_WAVEFRONT_RESET 190 }; 191 192 /** 193 * enum kfd_queue_type 194 * 195 * @KFD_QUEUE_TYPE_COMPUTE: Regular user mode queue type. 196 * 197 * @KFD_QUEUE_TYPE_SDMA: Sdma user mode queue type. 198 * 199 * @KFD_QUEUE_TYPE_HIQ: HIQ queue type. 200 * 201 * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. 202 */ 203 enum kfd_queue_type { 204 KFD_QUEUE_TYPE_COMPUTE, 205 KFD_QUEUE_TYPE_SDMA, 206 KFD_QUEUE_TYPE_HIQ, 207 KFD_QUEUE_TYPE_DIQ 208 }; 209 210 enum kfd_queue_format { 211 KFD_QUEUE_FORMAT_PM4, 212 KFD_QUEUE_FORMAT_AQL 213 }; 214 215 /** 216 * struct queue_properties 217 * 218 * @type: The queue type. 219 * 220 * @queue_id: Queue identifier. 221 * 222 * @queue_address: Queue ring buffer address. 223 * 224 * @queue_size: Queue ring buffer size. 225 * 226 * @priority: Defines the queue priority relative to other queues in the 227 * process. 228 * This is just an indication and HW scheduling may override the priority as 229 * necessary while keeping the relative prioritization. 230 * the priority granularity is from 0 to f which f is the highest priority. 231 * currently all queues are initialized with the highest priority. 232 * 233 * @queue_percent: This field is partially implemented and currently a zero in 234 * this field defines that the queue is non active. 235 * 236 * @read_ptr: User space address which points to the number of dwords the 237 * cp read from the ring buffer. This field updates automatically by the H/W. 238 * 239 * @write_ptr: Defines the number of dwords written to the ring buffer. 240 * 241 * @doorbell_ptr: This field aim is to notify the H/W of new packet written to 242 * the queue ring buffer. This field should be similar to write_ptr and the user 243 * should update this field after he updated the write_ptr. 244 * 245 * @doorbell_off: The doorbell offset in the doorbell pci-bar. 246 * 247 * @is_interop: Defines if this is a interop queue. Interop queue means that the 248 * queue can access both graphics and compute resources. 249 * 250 * @is_active: Defines if the queue is active or not. 251 * 252 * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid 253 * of the queue. 254 * 255 * This structure represents the queue properties for each queue no matter if 256 * it's user mode or kernel mode queue. 257 * 258 */ 259 struct queue_properties { 260 enum kfd_queue_type type; 261 enum kfd_queue_format format; 262 unsigned int queue_id; 263 uint64_t queue_address; 264 uint64_t queue_size; 265 uint32_t priority; 266 uint32_t queue_percent; 267 uint32_t *read_ptr; 268 uint32_t *write_ptr; 269 uint32_t __iomem *doorbell_ptr; 270 uint32_t doorbell_off; 271 bool is_interop; 272 bool is_active; 273 /* Not relevant for user mode queues in cp scheduling */ 274 unsigned int vmid; 275 }; 276 277 /** 278 * struct queue 279 * 280 * @list: Queue linked list. 281 * 282 * @mqd: The queue MQD. 283 * 284 * @mqd_mem_obj: The MQD local gpu memory object. 285 * 286 * @gart_mqd_addr: The MQD gart mc address. 287 * 288 * @properties: The queue properties. 289 * 290 * @mec: Used only in no cp scheduling mode and identifies to micro engine id 291 * that the queue should be execute on. 292 * 293 * @pipe: Used only in no cp scheduling mode and identifies the queue's pipe id. 294 * 295 * @queue: Used only in no cp scheduliong mode and identifies the queue's slot. 296 * 297 * @process: The kfd process that created this queue. 298 * 299 * @device: The kfd device that created this queue. 300 * 301 * This structure represents user mode compute queues. 302 * It contains all the necessary data to handle such queues. 303 * 304 */ 305 306 struct queue { 307 struct list_head list; 308 void *mqd; 309 struct kfd_mem_obj *mqd_mem_obj; 310 uint64_t gart_mqd_addr; 311 struct queue_properties properties; 312 313 uint32_t mec; 314 uint32_t pipe; 315 uint32_t queue; 316 317 struct kfd_process *process; 318 struct kfd_dev *device; 319 }; 320 321 /* 322 * Please read the kfd_mqd_manager.h description. 323 */ 324 enum KFD_MQD_TYPE { 325 KFD_MQD_TYPE_CIK_COMPUTE = 0, /* for no cp scheduling */ 326 KFD_MQD_TYPE_CIK_HIQ, /* for hiq */ 327 KFD_MQD_TYPE_CIK_CP, /* for cp queues and diq */ 328 KFD_MQD_TYPE_CIK_SDMA, /* for sdma queues */ 329 KFD_MQD_TYPE_MAX 330 }; 331 332 struct scheduling_resources { 333 unsigned int vmid_mask; 334 enum kfd_queue_type type; 335 uint64_t queue_mask; 336 uint64_t gws_mask; 337 uint32_t oac_mask; 338 uint32_t gds_heap_base; 339 uint32_t gds_heap_size; 340 }; 341 342 struct process_queue_manager { 343 /* data */ 344 struct kfd_process *process; 345 unsigned int num_concurrent_processes; 346 struct list_head queues; 347 unsigned long *queue_slot_bitmap; 348 }; 349 350 struct qcm_process_device { 351 /* The Device Queue Manager that owns this data */ 352 struct device_queue_manager *dqm; 353 struct process_queue_manager *pqm; 354 /* Device Queue Manager lock */ 355 struct mutex *lock; 356 /* Queues list */ 357 struct list_head queues_list; 358 struct list_head priv_queue_list; 359 360 unsigned int queue_count; 361 unsigned int vmid; 362 bool is_debug; 363 /* 364 * All the memory management data should be here too 365 */ 366 uint64_t gds_context_area; 367 uint32_t sh_mem_config; 368 uint32_t sh_mem_bases; 369 uint32_t sh_mem_ape1_base; 370 uint32_t sh_mem_ape1_limit; 371 uint32_t page_table_base; 372 uint32_t gds_size; 373 uint32_t num_gws; 374 uint32_t num_oac; 375 }; 376 377 /* Data that is per-process-per device. */ 378 struct kfd_process_device { 379 /* 380 * List of all per-device data for a process. 381 * Starts from kfd_process.per_device_data. 382 */ 383 struct list_head per_device_list; 384 385 /* The device that owns this data. */ 386 struct kfd_dev *dev; 387 388 389 /* per-process-per device QCM data structure */ 390 struct qcm_process_device qpd; 391 392 /*Apertures*/ 393 uint64_t lds_base; 394 uint64_t lds_limit; 395 uint64_t gpuvm_base; 396 uint64_t gpuvm_limit; 397 uint64_t scratch_base; 398 uint64_t scratch_limit; 399 400 /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ 401 bool bound; 402 }; 403 404 #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) 405 406 /* Process data */ 407 struct kfd_process { 408 /* 409 * kfd_process are stored in an mm_struct*->kfd_process* 410 * hash table (kfd_processes in kfd_process.c) 411 */ 412 struct hlist_node kfd_processes; 413 414 struct mm_struct *mm; 415 416 struct mutex mutex; 417 418 /* 419 * In any process, the thread that started main() is the lead 420 * thread and outlives the rest. 421 * It is here because amd_iommu_bind_pasid wants a task_struct. 422 */ 423 struct task_struct *lead_thread; 424 425 /* We want to receive a notification when the mm_struct is destroyed */ 426 struct mmu_notifier mmu_notifier; 427 428 /* Use for delayed freeing of kfd_process structure */ 429 struct rcu_head rcu; 430 431 unsigned int pasid; 432 433 /* 434 * List of kfd_process_device structures, 435 * one for each device the process is using. 436 */ 437 struct list_head per_device_data; 438 439 struct process_queue_manager pqm; 440 441 /* The process's queues. */ 442 size_t queue_array_size; 443 444 /* Size is queue_array_size, up to MAX_PROCESS_QUEUES. */ 445 struct kfd_queue **queues; 446 447 unsigned long allocated_queue_bitmap[DIV_ROUND_UP(KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, BITS_PER_LONG)]; 448 449 /*Is the user space process 32 bit?*/ 450 bool is_32bit_user_mode; 451 }; 452 453 /** 454 * Ioctl function type. 455 * 456 * \param filep pointer to file structure. 457 * \param p amdkfd process pointer. 458 * \param data pointer to arg that was copied from user. 459 */ 460 typedef int amdkfd_ioctl_t(struct file *filep, struct kfd_process *p, 461 void *data); 462 463 struct amdkfd_ioctl_desc { 464 unsigned int cmd; 465 int flags; 466 amdkfd_ioctl_t *func; 467 unsigned int cmd_drv; 468 const char *name; 469 }; 470 471 void kfd_process_create_wq(void); 472 void kfd_process_destroy_wq(void); 473 struct kfd_process *kfd_create_process(const struct task_struct *); 474 struct kfd_process *kfd_get_process(const struct task_struct *); 475 476 struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, 477 struct kfd_process *p); 478 void kfd_unbind_process_from_device(struct kfd_dev *dev, unsigned int pasid); 479 struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, 480 struct kfd_process *p, 481 int create_pdd); 482 483 /* Process device data iterator */ 484 struct kfd_process_device *kfd_get_first_process_device_data(struct kfd_process *p); 485 struct kfd_process_device *kfd_get_next_process_device_data(struct kfd_process *p, 486 struct kfd_process_device *pdd); 487 bool kfd_has_process_device_data(struct kfd_process *p); 488 489 /* PASIDs */ 490 int kfd_pasid_init(void); 491 void kfd_pasid_exit(void); 492 bool kfd_set_pasid_limit(unsigned int new_limit); 493 unsigned int kfd_get_pasid_limit(void); 494 unsigned int kfd_pasid_alloc(void); 495 void kfd_pasid_free(unsigned int pasid); 496 497 /* Doorbells */ 498 void kfd_doorbell_init(struct kfd_dev *kfd); 499 int kfd_doorbell_mmap(struct kfd_process *process, struct vm_area_struct *vma); 500 u32 __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 501 unsigned int *doorbell_off); 502 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr); 503 u32 read_kernel_doorbell(u32 __iomem *db); 504 void write_kernel_doorbell(u32 __iomem *db, u32 value); 505 unsigned int kfd_queue_id_to_doorbell(struct kfd_dev *kfd, 506 struct kfd_process *process, 507 unsigned int queue_id); 508 509 extern struct device *kfd_device; 510 511 /* Topology */ 512 int kfd_topology_init(void); 513 void kfd_topology_shutdown(void); 514 int kfd_topology_add_device(struct kfd_dev *gpu); 515 int kfd_topology_remove_device(struct kfd_dev *gpu); 516 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id); 517 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev); 518 struct kfd_dev *kfd_topology_enum_kfd_devices(uint8_t idx); 519 520 /* Interrupts */ 521 void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry); 522 523 /* Power Management */ 524 void kgd2kfd_suspend(struct kfd_dev *kfd); 525 int kgd2kfd_resume(struct kfd_dev *kfd); 526 527 /* amdkfd Apertures */ 528 int kfd_init_apertures(struct kfd_process *process); 529 530 /* Queue Context Management */ 531 inline uint32_t lower_32(uint64_t x); 532 inline uint32_t upper_32(uint64_t x); 533 534 int init_queue(struct queue **q, struct queue_properties properties); 535 void uninit_queue(struct queue *q); 536 void print_queue_properties(struct queue_properties *q); 537 void print_queue(struct queue *q); 538 539 struct mqd_manager *mqd_manager_init(enum KFD_MQD_TYPE type, 540 struct kfd_dev *dev); 541 struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev); 542 void device_queue_manager_uninit(struct device_queue_manager *dqm); 543 struct kernel_queue *kernel_queue_init(struct kfd_dev *dev, 544 enum kfd_queue_type type); 545 void kernel_queue_uninit(struct kernel_queue *kq); 546 547 /* Process Queue Manager */ 548 struct process_queue_node { 549 struct queue *q; 550 struct kernel_queue *kq; 551 struct list_head process_queue_list; 552 }; 553 554 int pqm_init(struct process_queue_manager *pqm, struct kfd_process *p); 555 void pqm_uninit(struct process_queue_manager *pqm); 556 int pqm_create_queue(struct process_queue_manager *pqm, 557 struct kfd_dev *dev, 558 struct file *f, 559 struct queue_properties *properties, 560 unsigned int flags, 561 enum kfd_queue_type type, 562 unsigned int *qid); 563 int pqm_destroy_queue(struct process_queue_manager *pqm, unsigned int qid); 564 int pqm_update_queue(struct process_queue_manager *pqm, unsigned int qid, 565 struct queue_properties *p); 566 567 /* Packet Manager */ 568 569 #define KFD_HIQ_TIMEOUT (500) 570 571 #define KFD_FENCE_COMPLETED (100) 572 #define KFD_FENCE_INIT (10) 573 #define KFD_UNMAP_LATENCY (150) 574 575 struct packet_manager { 576 struct device_queue_manager *dqm; 577 struct kernel_queue *priv_queue; 578 struct mutex lock; 579 bool allocated; 580 struct kfd_mem_obj *ib_buffer_obj; 581 }; 582 583 int pm_init(struct packet_manager *pm, struct device_queue_manager *dqm); 584 void pm_uninit(struct packet_manager *pm); 585 int pm_send_set_resources(struct packet_manager *pm, 586 struct scheduling_resources *res); 587 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues); 588 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address, 589 uint32_t fence_value); 590 591 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type, 592 enum kfd_preempt_type_filter mode, 593 uint32_t filter_param, bool reset, 594 unsigned int sdma_engine); 595 596 void pm_release_ib(struct packet_manager *pm); 597 598 uint64_t kfd_get_number_elems(struct kfd_dev *kfd); 599 phys_addr_t kfd_get_process_doorbells(struct kfd_dev *dev, 600 struct kfd_process *process); 601 602 #endif 603