1 /* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 */ 22 23 #include <linux/device.h> 24 #include <linux/export.h> 25 #include <linux/err.h> 26 #include <linux/fs.h> 27 #include <linux/file.h> 28 #include <linux/sched.h> 29 #include <linux/slab.h> 30 #include <linux/uaccess.h> 31 #include <linux/compat.h> 32 #include <uapi/linux/kfd_ioctl.h> 33 #include <linux/time.h> 34 #include <linux/mm.h> 35 #include <linux/mman.h> 36 #include <linux/dma-buf.h> 37 #include <asm/processor.h> 38 #include "kfd_priv.h" 39 #include "kfd_device_queue_manager.h" 40 #include "kfd_dbgmgr.h" 41 #include "amdgpu_amdkfd.h" 42 43 static long kfd_ioctl(struct file *, unsigned int, unsigned long); 44 static int kfd_open(struct inode *, struct file *); 45 static int kfd_mmap(struct file *, struct vm_area_struct *); 46 47 static const char kfd_dev_name[] = "kfd"; 48 49 static const struct file_operations kfd_fops = { 50 .owner = THIS_MODULE, 51 .unlocked_ioctl = kfd_ioctl, 52 .compat_ioctl = kfd_ioctl, 53 .open = kfd_open, 54 .mmap = kfd_mmap, 55 }; 56 57 static int kfd_char_dev_major = -1; 58 static struct class *kfd_class; 59 struct device *kfd_device; 60 61 int kfd_chardev_init(void) 62 { 63 int err = 0; 64 65 kfd_char_dev_major = register_chrdev(0, kfd_dev_name, &kfd_fops); 66 err = kfd_char_dev_major; 67 if (err < 0) 68 goto err_register_chrdev; 69 70 kfd_class = class_create(THIS_MODULE, kfd_dev_name); 71 err = PTR_ERR(kfd_class); 72 if (IS_ERR(kfd_class)) 73 goto err_class_create; 74 75 kfd_device = device_create(kfd_class, NULL, 76 MKDEV(kfd_char_dev_major, 0), 77 NULL, kfd_dev_name); 78 err = PTR_ERR(kfd_device); 79 if (IS_ERR(kfd_device)) 80 goto err_device_create; 81 82 return 0; 83 84 err_device_create: 85 class_destroy(kfd_class); 86 err_class_create: 87 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 88 err_register_chrdev: 89 return err; 90 } 91 92 void kfd_chardev_exit(void) 93 { 94 device_destroy(kfd_class, MKDEV(kfd_char_dev_major, 0)); 95 class_destroy(kfd_class); 96 unregister_chrdev(kfd_char_dev_major, kfd_dev_name); 97 } 98 99 struct device *kfd_chardev(void) 100 { 101 return kfd_device; 102 } 103 104 105 static int kfd_open(struct inode *inode, struct file *filep) 106 { 107 struct kfd_process *process; 108 bool is_32bit_user_mode; 109 110 if (iminor(inode) != 0) 111 return -ENODEV; 112 113 is_32bit_user_mode = in_compat_syscall(); 114 115 if (is_32bit_user_mode) { 116 dev_warn(kfd_device, 117 "Process %d (32-bit) failed to open /dev/kfd\n" 118 "32-bit processes are not supported by amdkfd\n", 119 current->pid); 120 return -EPERM; 121 } 122 123 process = kfd_create_process(filep); 124 if (IS_ERR(process)) 125 return PTR_ERR(process); 126 127 if (kfd_is_locked()) 128 return -EAGAIN; 129 130 dev_dbg(kfd_device, "process %d opened, compat mode (32 bit) - %d\n", 131 process->pasid, process->is_32bit_user_mode); 132 133 return 0; 134 } 135 136 static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, 137 void *data) 138 { 139 struct kfd_ioctl_get_version_args *args = data; 140 141 args->major_version = KFD_IOCTL_MAJOR_VERSION; 142 args->minor_version = KFD_IOCTL_MINOR_VERSION; 143 144 return 0; 145 } 146 147 static int set_queue_properties_from_user(struct queue_properties *q_properties, 148 struct kfd_ioctl_create_queue_args *args) 149 { 150 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 151 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 152 return -EINVAL; 153 } 154 155 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 156 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 157 return -EINVAL; 158 } 159 160 if ((args->ring_base_address) && 161 (!access_ok(VERIFY_WRITE, 162 (const void __user *) args->ring_base_address, 163 sizeof(uint64_t)))) { 164 pr_err("Can't access ring base address\n"); 165 return -EFAULT; 166 } 167 168 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 169 pr_err("Ring size must be a power of 2 or 0\n"); 170 return -EINVAL; 171 } 172 173 if (!access_ok(VERIFY_WRITE, 174 (const void __user *) args->read_pointer_address, 175 sizeof(uint32_t))) { 176 pr_err("Can't access read pointer\n"); 177 return -EFAULT; 178 } 179 180 if (!access_ok(VERIFY_WRITE, 181 (const void __user *) args->write_pointer_address, 182 sizeof(uint32_t))) { 183 pr_err("Can't access write pointer\n"); 184 return -EFAULT; 185 } 186 187 if (args->eop_buffer_address && 188 !access_ok(VERIFY_WRITE, 189 (const void __user *) args->eop_buffer_address, 190 sizeof(uint32_t))) { 191 pr_debug("Can't access eop buffer"); 192 return -EFAULT; 193 } 194 195 if (args->ctx_save_restore_address && 196 !access_ok(VERIFY_WRITE, 197 (const void __user *) args->ctx_save_restore_address, 198 sizeof(uint32_t))) { 199 pr_debug("Can't access ctx save restore buffer"); 200 return -EFAULT; 201 } 202 203 q_properties->is_interop = false; 204 q_properties->queue_percent = args->queue_percentage; 205 q_properties->priority = args->queue_priority; 206 q_properties->queue_address = args->ring_base_address; 207 q_properties->queue_size = args->ring_size; 208 q_properties->read_ptr = (uint32_t *) args->read_pointer_address; 209 q_properties->write_ptr = (uint32_t *) args->write_pointer_address; 210 q_properties->eop_ring_buffer_address = args->eop_buffer_address; 211 q_properties->eop_ring_buffer_size = args->eop_buffer_size; 212 q_properties->ctx_save_restore_area_address = 213 args->ctx_save_restore_address; 214 q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; 215 q_properties->ctl_stack_size = args->ctl_stack_size; 216 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || 217 args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 218 q_properties->type = KFD_QUEUE_TYPE_COMPUTE; 219 else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA) 220 q_properties->type = KFD_QUEUE_TYPE_SDMA; 221 else 222 return -ENOTSUPP; 223 224 if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) 225 q_properties->format = KFD_QUEUE_FORMAT_AQL; 226 else 227 q_properties->format = KFD_QUEUE_FORMAT_PM4; 228 229 pr_debug("Queue Percentage: %d, %d\n", 230 q_properties->queue_percent, args->queue_percentage); 231 232 pr_debug("Queue Priority: %d, %d\n", 233 q_properties->priority, args->queue_priority); 234 235 pr_debug("Queue Address: 0x%llX, 0x%llX\n", 236 q_properties->queue_address, args->ring_base_address); 237 238 pr_debug("Queue Size: 0x%llX, %u\n", 239 q_properties->queue_size, args->ring_size); 240 241 pr_debug("Queue r/w Pointers: %px, %px\n", 242 q_properties->read_ptr, 243 q_properties->write_ptr); 244 245 pr_debug("Queue Format: %d\n", q_properties->format); 246 247 pr_debug("Queue EOP: 0x%llX\n", q_properties->eop_ring_buffer_address); 248 249 pr_debug("Queue CTX save area: 0x%llX\n", 250 q_properties->ctx_save_restore_area_address); 251 252 return 0; 253 } 254 255 static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, 256 void *data) 257 { 258 struct kfd_ioctl_create_queue_args *args = data; 259 struct kfd_dev *dev; 260 int err = 0; 261 unsigned int queue_id; 262 struct kfd_process_device *pdd; 263 struct queue_properties q_properties; 264 265 memset(&q_properties, 0, sizeof(struct queue_properties)); 266 267 pr_debug("Creating queue ioctl\n"); 268 269 err = set_queue_properties_from_user(&q_properties, args); 270 if (err) 271 return err; 272 273 pr_debug("Looking for gpu id 0x%x\n", args->gpu_id); 274 dev = kfd_device_by_id(args->gpu_id); 275 if (!dev) { 276 pr_debug("Could not find gpu id 0x%x\n", args->gpu_id); 277 return -EINVAL; 278 } 279 280 mutex_lock(&p->mutex); 281 282 pdd = kfd_bind_process_to_device(dev, p); 283 if (IS_ERR(pdd)) { 284 err = -ESRCH; 285 goto err_bind_process; 286 } 287 288 pr_debug("Creating queue for PASID %d on gpu 0x%x\n", 289 p->pasid, 290 dev->id); 291 292 err = pqm_create_queue(&p->pqm, dev, filep, &q_properties, &queue_id); 293 if (err != 0) 294 goto err_create_queue; 295 296 args->queue_id = queue_id; 297 298 299 /* Return gpu_id as doorbell offset for mmap usage */ 300 args->doorbell_offset = KFD_MMAP_TYPE_DOORBELL; 301 args->doorbell_offset |= KFD_MMAP_GPU_ID(args->gpu_id); 302 args->doorbell_offset <<= PAGE_SHIFT; 303 if (KFD_IS_SOC15(dev->device_info->asic_family)) 304 /* On SOC15 ASICs, doorbell allocation must be 305 * per-device, and independent from the per-process 306 * queue_id. Return the doorbell offset within the 307 * doorbell aperture to user mode. 308 */ 309 args->doorbell_offset |= q_properties.doorbell_off; 310 311 mutex_unlock(&p->mutex); 312 313 pr_debug("Queue id %d was created successfully\n", args->queue_id); 314 315 pr_debug("Ring buffer address == 0x%016llX\n", 316 args->ring_base_address); 317 318 pr_debug("Read ptr address == 0x%016llX\n", 319 args->read_pointer_address); 320 321 pr_debug("Write ptr address == 0x%016llX\n", 322 args->write_pointer_address); 323 324 return 0; 325 326 err_create_queue: 327 err_bind_process: 328 mutex_unlock(&p->mutex); 329 return err; 330 } 331 332 static int kfd_ioctl_destroy_queue(struct file *filp, struct kfd_process *p, 333 void *data) 334 { 335 int retval; 336 struct kfd_ioctl_destroy_queue_args *args = data; 337 338 pr_debug("Destroying queue id %d for pasid %d\n", 339 args->queue_id, 340 p->pasid); 341 342 mutex_lock(&p->mutex); 343 344 retval = pqm_destroy_queue(&p->pqm, args->queue_id); 345 346 mutex_unlock(&p->mutex); 347 return retval; 348 } 349 350 static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, 351 void *data) 352 { 353 int retval; 354 struct kfd_ioctl_update_queue_args *args = data; 355 struct queue_properties properties; 356 357 if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { 358 pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); 359 return -EINVAL; 360 } 361 362 if (args->queue_priority > KFD_MAX_QUEUE_PRIORITY) { 363 pr_err("Queue priority must be between 0 to KFD_MAX_QUEUE_PRIORITY\n"); 364 return -EINVAL; 365 } 366 367 if ((args->ring_base_address) && 368 (!access_ok(VERIFY_WRITE, 369 (const void __user *) args->ring_base_address, 370 sizeof(uint64_t)))) { 371 pr_err("Can't access ring base address\n"); 372 return -EFAULT; 373 } 374 375 if (!is_power_of_2(args->ring_size) && (args->ring_size != 0)) { 376 pr_err("Ring size must be a power of 2 or 0\n"); 377 return -EINVAL; 378 } 379 380 properties.queue_address = args->ring_base_address; 381 properties.queue_size = args->ring_size; 382 properties.queue_percent = args->queue_percentage; 383 properties.priority = args->queue_priority; 384 385 pr_debug("Updating queue id %d for pasid %d\n", 386 args->queue_id, p->pasid); 387 388 mutex_lock(&p->mutex); 389 390 retval = pqm_update_queue(&p->pqm, args->queue_id, &properties); 391 392 mutex_unlock(&p->mutex); 393 394 return retval; 395 } 396 397 static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, 398 void *data) 399 { 400 int retval; 401 const int max_num_cus = 1024; 402 struct kfd_ioctl_set_cu_mask_args *args = data; 403 struct queue_properties properties; 404 uint32_t __user *cu_mask_ptr = (uint32_t __user *)args->cu_mask_ptr; 405 size_t cu_mask_size = sizeof(uint32_t) * (args->num_cu_mask / 32); 406 407 if ((args->num_cu_mask % 32) != 0) { 408 pr_debug("num_cu_mask 0x%x must be a multiple of 32", 409 args->num_cu_mask); 410 return -EINVAL; 411 } 412 413 properties.cu_mask_count = args->num_cu_mask; 414 if (properties.cu_mask_count == 0) { 415 pr_debug("CU mask cannot be 0"); 416 return -EINVAL; 417 } 418 419 /* To prevent an unreasonably large CU mask size, set an arbitrary 420 * limit of max_num_cus bits. We can then just drop any CU mask bits 421 * past max_num_cus bits and just use the first max_num_cus bits. 422 */ 423 if (properties.cu_mask_count > max_num_cus) { 424 pr_debug("CU mask cannot be greater than 1024 bits"); 425 properties.cu_mask_count = max_num_cus; 426 cu_mask_size = sizeof(uint32_t) * (max_num_cus/32); 427 } 428 429 properties.cu_mask = kzalloc(cu_mask_size, GFP_KERNEL); 430 if (!properties.cu_mask) 431 return -ENOMEM; 432 433 retval = copy_from_user(properties.cu_mask, cu_mask_ptr, cu_mask_size); 434 if (retval) { 435 pr_debug("Could not copy CU mask from userspace"); 436 kfree(properties.cu_mask); 437 return -EFAULT; 438 } 439 440 mutex_lock(&p->mutex); 441 442 retval = pqm_set_cu_mask(&p->pqm, args->queue_id, &properties); 443 444 mutex_unlock(&p->mutex); 445 446 if (retval) 447 kfree(properties.cu_mask); 448 449 return retval; 450 } 451 452 static int kfd_ioctl_get_queue_wave_state(struct file *filep, 453 struct kfd_process *p, void *data) 454 { 455 struct kfd_ioctl_get_queue_wave_state_args *args = data; 456 int r; 457 458 mutex_lock(&p->mutex); 459 460 r = pqm_get_wave_state(&p->pqm, args->queue_id, 461 (void __user *)args->ctl_stack_address, 462 &args->ctl_stack_used_size, 463 &args->save_area_used_size); 464 465 mutex_unlock(&p->mutex); 466 467 return r; 468 } 469 470 static int kfd_ioctl_set_memory_policy(struct file *filep, 471 struct kfd_process *p, void *data) 472 { 473 struct kfd_ioctl_set_memory_policy_args *args = data; 474 struct kfd_dev *dev; 475 int err = 0; 476 struct kfd_process_device *pdd; 477 enum cache_policy default_policy, alternate_policy; 478 479 if (args->default_policy != KFD_IOC_CACHE_POLICY_COHERENT 480 && args->default_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 481 return -EINVAL; 482 } 483 484 if (args->alternate_policy != KFD_IOC_CACHE_POLICY_COHERENT 485 && args->alternate_policy != KFD_IOC_CACHE_POLICY_NONCOHERENT) { 486 return -EINVAL; 487 } 488 489 dev = kfd_device_by_id(args->gpu_id); 490 if (!dev) 491 return -EINVAL; 492 493 mutex_lock(&p->mutex); 494 495 pdd = kfd_bind_process_to_device(dev, p); 496 if (IS_ERR(pdd)) { 497 err = -ESRCH; 498 goto out; 499 } 500 501 default_policy = (args->default_policy == KFD_IOC_CACHE_POLICY_COHERENT) 502 ? cache_policy_coherent : cache_policy_noncoherent; 503 504 alternate_policy = 505 (args->alternate_policy == KFD_IOC_CACHE_POLICY_COHERENT) 506 ? cache_policy_coherent : cache_policy_noncoherent; 507 508 if (!dev->dqm->ops.set_cache_memory_policy(dev->dqm, 509 &pdd->qpd, 510 default_policy, 511 alternate_policy, 512 (void __user *)args->alternate_aperture_base, 513 args->alternate_aperture_size)) 514 err = -EINVAL; 515 516 out: 517 mutex_unlock(&p->mutex); 518 519 return err; 520 } 521 522 static int kfd_ioctl_set_trap_handler(struct file *filep, 523 struct kfd_process *p, void *data) 524 { 525 struct kfd_ioctl_set_trap_handler_args *args = data; 526 struct kfd_dev *dev; 527 int err = 0; 528 struct kfd_process_device *pdd; 529 530 dev = kfd_device_by_id(args->gpu_id); 531 if (dev == NULL) 532 return -EINVAL; 533 534 mutex_lock(&p->mutex); 535 536 pdd = kfd_bind_process_to_device(dev, p); 537 if (IS_ERR(pdd)) { 538 err = -ESRCH; 539 goto out; 540 } 541 542 if (dev->dqm->ops.set_trap_handler(dev->dqm, 543 &pdd->qpd, 544 args->tba_addr, 545 args->tma_addr)) 546 err = -EINVAL; 547 548 out: 549 mutex_unlock(&p->mutex); 550 551 return err; 552 } 553 554 static int kfd_ioctl_dbg_register(struct file *filep, 555 struct kfd_process *p, void *data) 556 { 557 struct kfd_ioctl_dbg_register_args *args = data; 558 struct kfd_dev *dev; 559 struct kfd_dbgmgr *dbgmgr_ptr; 560 struct kfd_process_device *pdd; 561 bool create_ok; 562 long status = 0; 563 564 dev = kfd_device_by_id(args->gpu_id); 565 if (!dev) 566 return -EINVAL; 567 568 if (dev->device_info->asic_family == CHIP_CARRIZO) { 569 pr_debug("kfd_ioctl_dbg_register not supported on CZ\n"); 570 return -EINVAL; 571 } 572 573 mutex_lock(&p->mutex); 574 mutex_lock(kfd_get_dbgmgr_mutex()); 575 576 /* 577 * make sure that we have pdd, if this the first queue created for 578 * this process 579 */ 580 pdd = kfd_bind_process_to_device(dev, p); 581 if (IS_ERR(pdd)) { 582 status = PTR_ERR(pdd); 583 goto out; 584 } 585 586 if (!dev->dbgmgr) { 587 /* In case of a legal call, we have no dbgmgr yet */ 588 create_ok = kfd_dbgmgr_create(&dbgmgr_ptr, dev); 589 if (create_ok) { 590 status = kfd_dbgmgr_register(dbgmgr_ptr, p); 591 if (status != 0) 592 kfd_dbgmgr_destroy(dbgmgr_ptr); 593 else 594 dev->dbgmgr = dbgmgr_ptr; 595 } 596 } else { 597 pr_debug("debugger already registered\n"); 598 status = -EINVAL; 599 } 600 601 out: 602 mutex_unlock(kfd_get_dbgmgr_mutex()); 603 mutex_unlock(&p->mutex); 604 605 return status; 606 } 607 608 static int kfd_ioctl_dbg_unregister(struct file *filep, 609 struct kfd_process *p, void *data) 610 { 611 struct kfd_ioctl_dbg_unregister_args *args = data; 612 struct kfd_dev *dev; 613 long status; 614 615 dev = kfd_device_by_id(args->gpu_id); 616 if (!dev || !dev->dbgmgr) 617 return -EINVAL; 618 619 if (dev->device_info->asic_family == CHIP_CARRIZO) { 620 pr_debug("kfd_ioctl_dbg_unregister not supported on CZ\n"); 621 return -EINVAL; 622 } 623 624 mutex_lock(kfd_get_dbgmgr_mutex()); 625 626 status = kfd_dbgmgr_unregister(dev->dbgmgr, p); 627 if (!status) { 628 kfd_dbgmgr_destroy(dev->dbgmgr); 629 dev->dbgmgr = NULL; 630 } 631 632 mutex_unlock(kfd_get_dbgmgr_mutex()); 633 634 return status; 635 } 636 637 /* 638 * Parse and generate variable size data structure for address watch. 639 * Total size of the buffer and # watch points is limited in order 640 * to prevent kernel abuse. (no bearing to the much smaller HW limitation 641 * which is enforced by dbgdev module) 642 * please also note that the watch address itself are not "copied from user", 643 * since it be set into the HW in user mode values. 644 * 645 */ 646 static int kfd_ioctl_dbg_address_watch(struct file *filep, 647 struct kfd_process *p, void *data) 648 { 649 struct kfd_ioctl_dbg_address_watch_args *args = data; 650 struct kfd_dev *dev; 651 struct dbg_address_watch_info aw_info; 652 unsigned char *args_buff; 653 long status; 654 void __user *cmd_from_user; 655 uint64_t watch_mask_value = 0; 656 unsigned int args_idx = 0; 657 658 memset((void *) &aw_info, 0, sizeof(struct dbg_address_watch_info)); 659 660 dev = kfd_device_by_id(args->gpu_id); 661 if (!dev) 662 return -EINVAL; 663 664 if (dev->device_info->asic_family == CHIP_CARRIZO) { 665 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 666 return -EINVAL; 667 } 668 669 cmd_from_user = (void __user *) args->content_ptr; 670 671 /* Validate arguments */ 672 673 if ((args->buf_size_in_bytes > MAX_ALLOWED_AW_BUFF_SIZE) || 674 (args->buf_size_in_bytes <= sizeof(*args) + sizeof(int) * 2) || 675 (cmd_from_user == NULL)) 676 return -EINVAL; 677 678 /* this is the actual buffer to work with */ 679 args_buff = memdup_user(cmd_from_user, 680 args->buf_size_in_bytes - sizeof(*args)); 681 if (IS_ERR(args_buff)) 682 return PTR_ERR(args_buff); 683 684 aw_info.process = p; 685 686 aw_info.num_watch_points = *((uint32_t *)(&args_buff[args_idx])); 687 args_idx += sizeof(aw_info.num_watch_points); 688 689 aw_info.watch_mode = (enum HSA_DBG_WATCH_MODE *) &args_buff[args_idx]; 690 args_idx += sizeof(enum HSA_DBG_WATCH_MODE) * aw_info.num_watch_points; 691 692 /* 693 * set watch address base pointer to point on the array base 694 * within args_buff 695 */ 696 aw_info.watch_address = (uint64_t *) &args_buff[args_idx]; 697 698 /* skip over the addresses buffer */ 699 args_idx += sizeof(aw_info.watch_address) * aw_info.num_watch_points; 700 701 if (args_idx >= args->buf_size_in_bytes - sizeof(*args)) { 702 status = -EINVAL; 703 goto out; 704 } 705 706 watch_mask_value = (uint64_t) args_buff[args_idx]; 707 708 if (watch_mask_value > 0) { 709 /* 710 * There is an array of masks. 711 * set watch mask base pointer to point on the array base 712 * within args_buff 713 */ 714 aw_info.watch_mask = (uint64_t *) &args_buff[args_idx]; 715 716 /* skip over the masks buffer */ 717 args_idx += sizeof(aw_info.watch_mask) * 718 aw_info.num_watch_points; 719 } else { 720 /* just the NULL mask, set to NULL and skip over it */ 721 aw_info.watch_mask = NULL; 722 args_idx += sizeof(aw_info.watch_mask); 723 } 724 725 if (args_idx >= args->buf_size_in_bytes - sizeof(args)) { 726 status = -EINVAL; 727 goto out; 728 } 729 730 /* Currently HSA Event is not supported for DBG */ 731 aw_info.watch_event = NULL; 732 733 mutex_lock(kfd_get_dbgmgr_mutex()); 734 735 status = kfd_dbgmgr_address_watch(dev->dbgmgr, &aw_info); 736 737 mutex_unlock(kfd_get_dbgmgr_mutex()); 738 739 out: 740 kfree(args_buff); 741 742 return status; 743 } 744 745 /* Parse and generate fixed size data structure for wave control */ 746 static int kfd_ioctl_dbg_wave_control(struct file *filep, 747 struct kfd_process *p, void *data) 748 { 749 struct kfd_ioctl_dbg_wave_control_args *args = data; 750 struct kfd_dev *dev; 751 struct dbg_wave_control_info wac_info; 752 unsigned char *args_buff; 753 uint32_t computed_buff_size; 754 long status; 755 void __user *cmd_from_user; 756 unsigned int args_idx = 0; 757 758 memset((void *) &wac_info, 0, sizeof(struct dbg_wave_control_info)); 759 760 /* we use compact form, independent of the packing attribute value */ 761 computed_buff_size = sizeof(*args) + 762 sizeof(wac_info.mode) + 763 sizeof(wac_info.operand) + 764 sizeof(wac_info.dbgWave_msg.DbgWaveMsg) + 765 sizeof(wac_info.dbgWave_msg.MemoryVA) + 766 sizeof(wac_info.trapId); 767 768 dev = kfd_device_by_id(args->gpu_id); 769 if (!dev) 770 return -EINVAL; 771 772 if (dev->device_info->asic_family == CHIP_CARRIZO) { 773 pr_debug("kfd_ioctl_dbg_wave_control not supported on CZ\n"); 774 return -EINVAL; 775 } 776 777 /* input size must match the computed "compact" size */ 778 if (args->buf_size_in_bytes != computed_buff_size) { 779 pr_debug("size mismatch, computed : actual %u : %u\n", 780 args->buf_size_in_bytes, computed_buff_size); 781 return -EINVAL; 782 } 783 784 cmd_from_user = (void __user *) args->content_ptr; 785 786 if (cmd_from_user == NULL) 787 return -EINVAL; 788 789 /* copy the entire buffer from user */ 790 791 args_buff = memdup_user(cmd_from_user, 792 args->buf_size_in_bytes - sizeof(*args)); 793 if (IS_ERR(args_buff)) 794 return PTR_ERR(args_buff); 795 796 /* move ptr to the start of the "pay-load" area */ 797 wac_info.process = p; 798 799 wac_info.operand = *((enum HSA_DBG_WAVEOP *)(&args_buff[args_idx])); 800 args_idx += sizeof(wac_info.operand); 801 802 wac_info.mode = *((enum HSA_DBG_WAVEMODE *)(&args_buff[args_idx])); 803 args_idx += sizeof(wac_info.mode); 804 805 wac_info.trapId = *((uint32_t *)(&args_buff[args_idx])); 806 args_idx += sizeof(wac_info.trapId); 807 808 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value = 809 *((uint32_t *)(&args_buff[args_idx])); 810 wac_info.dbgWave_msg.MemoryVA = NULL; 811 812 mutex_lock(kfd_get_dbgmgr_mutex()); 813 814 pr_debug("Calling dbg manager process %p, operand %u, mode %u, trapId %u, message %u\n", 815 wac_info.process, wac_info.operand, 816 wac_info.mode, wac_info.trapId, 817 wac_info.dbgWave_msg.DbgWaveMsg.WaveMsgInfoGen2.Value); 818 819 status = kfd_dbgmgr_wave_control(dev->dbgmgr, &wac_info); 820 821 pr_debug("Returned status of dbg manager is %ld\n", status); 822 823 mutex_unlock(kfd_get_dbgmgr_mutex()); 824 825 kfree(args_buff); 826 827 return status; 828 } 829 830 static int kfd_ioctl_get_clock_counters(struct file *filep, 831 struct kfd_process *p, void *data) 832 { 833 struct kfd_ioctl_get_clock_counters_args *args = data; 834 struct kfd_dev *dev; 835 836 dev = kfd_device_by_id(args->gpu_id); 837 if (dev) 838 /* Reading GPU clock counter from KGD */ 839 args->gpu_clock_counter = amdgpu_amdkfd_get_gpu_clock_counter(dev->kgd); 840 else 841 /* Node without GPU resource */ 842 args->gpu_clock_counter = 0; 843 844 /* No access to rdtsc. Using raw monotonic time */ 845 args->cpu_clock_counter = ktime_get_raw_ns(); 846 args->system_clock_counter = ktime_get_boot_ns(); 847 848 /* Since the counter is in nano-seconds we use 1GHz frequency */ 849 args->system_clock_freq = 1000000000; 850 851 return 0; 852 } 853 854 855 static int kfd_ioctl_get_process_apertures(struct file *filp, 856 struct kfd_process *p, void *data) 857 { 858 struct kfd_ioctl_get_process_apertures_args *args = data; 859 struct kfd_process_device_apertures *pAperture; 860 struct kfd_process_device *pdd; 861 862 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 863 864 args->num_of_nodes = 0; 865 866 mutex_lock(&p->mutex); 867 868 /*if the process-device list isn't empty*/ 869 if (kfd_has_process_device_data(p)) { 870 /* Run over all pdd of the process */ 871 pdd = kfd_get_first_process_device_data(p); 872 do { 873 pAperture = 874 &args->process_apertures[args->num_of_nodes]; 875 pAperture->gpu_id = pdd->dev->id; 876 pAperture->lds_base = pdd->lds_base; 877 pAperture->lds_limit = pdd->lds_limit; 878 pAperture->gpuvm_base = pdd->gpuvm_base; 879 pAperture->gpuvm_limit = pdd->gpuvm_limit; 880 pAperture->scratch_base = pdd->scratch_base; 881 pAperture->scratch_limit = pdd->scratch_limit; 882 883 dev_dbg(kfd_device, 884 "node id %u\n", args->num_of_nodes); 885 dev_dbg(kfd_device, 886 "gpu id %u\n", pdd->dev->id); 887 dev_dbg(kfd_device, 888 "lds_base %llX\n", pdd->lds_base); 889 dev_dbg(kfd_device, 890 "lds_limit %llX\n", pdd->lds_limit); 891 dev_dbg(kfd_device, 892 "gpuvm_base %llX\n", pdd->gpuvm_base); 893 dev_dbg(kfd_device, 894 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 895 dev_dbg(kfd_device, 896 "scratch_base %llX\n", pdd->scratch_base); 897 dev_dbg(kfd_device, 898 "scratch_limit %llX\n", pdd->scratch_limit); 899 900 args->num_of_nodes++; 901 902 pdd = kfd_get_next_process_device_data(p, pdd); 903 } while (pdd && (args->num_of_nodes < NUM_OF_SUPPORTED_GPUS)); 904 } 905 906 mutex_unlock(&p->mutex); 907 908 return 0; 909 } 910 911 static int kfd_ioctl_get_process_apertures_new(struct file *filp, 912 struct kfd_process *p, void *data) 913 { 914 struct kfd_ioctl_get_process_apertures_new_args *args = data; 915 struct kfd_process_device_apertures *pa; 916 struct kfd_process_device *pdd; 917 uint32_t nodes = 0; 918 int ret; 919 920 dev_dbg(kfd_device, "get apertures for PASID %d", p->pasid); 921 922 if (args->num_of_nodes == 0) { 923 /* Return number of nodes, so that user space can alloacate 924 * sufficient memory 925 */ 926 mutex_lock(&p->mutex); 927 928 if (!kfd_has_process_device_data(p)) 929 goto out_unlock; 930 931 /* Run over all pdd of the process */ 932 pdd = kfd_get_first_process_device_data(p); 933 do { 934 args->num_of_nodes++; 935 pdd = kfd_get_next_process_device_data(p, pdd); 936 } while (pdd); 937 938 goto out_unlock; 939 } 940 941 /* Fill in process-aperture information for all available 942 * nodes, but not more than args->num_of_nodes as that is 943 * the amount of memory allocated by user 944 */ 945 pa = kzalloc((sizeof(struct kfd_process_device_apertures) * 946 args->num_of_nodes), GFP_KERNEL); 947 if (!pa) 948 return -ENOMEM; 949 950 mutex_lock(&p->mutex); 951 952 if (!kfd_has_process_device_data(p)) { 953 args->num_of_nodes = 0; 954 kfree(pa); 955 goto out_unlock; 956 } 957 958 /* Run over all pdd of the process */ 959 pdd = kfd_get_first_process_device_data(p); 960 do { 961 pa[nodes].gpu_id = pdd->dev->id; 962 pa[nodes].lds_base = pdd->lds_base; 963 pa[nodes].lds_limit = pdd->lds_limit; 964 pa[nodes].gpuvm_base = pdd->gpuvm_base; 965 pa[nodes].gpuvm_limit = pdd->gpuvm_limit; 966 pa[nodes].scratch_base = pdd->scratch_base; 967 pa[nodes].scratch_limit = pdd->scratch_limit; 968 969 dev_dbg(kfd_device, 970 "gpu id %u\n", pdd->dev->id); 971 dev_dbg(kfd_device, 972 "lds_base %llX\n", pdd->lds_base); 973 dev_dbg(kfd_device, 974 "lds_limit %llX\n", pdd->lds_limit); 975 dev_dbg(kfd_device, 976 "gpuvm_base %llX\n", pdd->gpuvm_base); 977 dev_dbg(kfd_device, 978 "gpuvm_limit %llX\n", pdd->gpuvm_limit); 979 dev_dbg(kfd_device, 980 "scratch_base %llX\n", pdd->scratch_base); 981 dev_dbg(kfd_device, 982 "scratch_limit %llX\n", pdd->scratch_limit); 983 nodes++; 984 985 pdd = kfd_get_next_process_device_data(p, pdd); 986 } while (pdd && (nodes < args->num_of_nodes)); 987 mutex_unlock(&p->mutex); 988 989 args->num_of_nodes = nodes; 990 ret = copy_to_user( 991 (void __user *)args->kfd_process_device_apertures_ptr, 992 pa, 993 (nodes * sizeof(struct kfd_process_device_apertures))); 994 kfree(pa); 995 return ret ? -EFAULT : 0; 996 997 out_unlock: 998 mutex_unlock(&p->mutex); 999 return 0; 1000 } 1001 1002 static int kfd_ioctl_create_event(struct file *filp, struct kfd_process *p, 1003 void *data) 1004 { 1005 struct kfd_ioctl_create_event_args *args = data; 1006 int err; 1007 1008 /* For dGPUs the event page is allocated in user mode. The 1009 * handle is passed to KFD with the first call to this IOCTL 1010 * through the event_page_offset field. 1011 */ 1012 if (args->event_page_offset) { 1013 struct kfd_dev *kfd; 1014 struct kfd_process_device *pdd; 1015 void *mem, *kern_addr; 1016 uint64_t size; 1017 1018 if (p->signal_page) { 1019 pr_err("Event page is already set\n"); 1020 return -EINVAL; 1021 } 1022 1023 kfd = kfd_device_by_id(GET_GPU_ID(args->event_page_offset)); 1024 if (!kfd) { 1025 pr_err("Getting device by id failed in %s\n", __func__); 1026 return -EINVAL; 1027 } 1028 1029 mutex_lock(&p->mutex); 1030 pdd = kfd_bind_process_to_device(kfd, p); 1031 if (IS_ERR(pdd)) { 1032 err = PTR_ERR(pdd); 1033 goto out_unlock; 1034 } 1035 1036 mem = kfd_process_device_translate_handle(pdd, 1037 GET_IDR_HANDLE(args->event_page_offset)); 1038 if (!mem) { 1039 pr_err("Can't find BO, offset is 0x%llx\n", 1040 args->event_page_offset); 1041 err = -EINVAL; 1042 goto out_unlock; 1043 } 1044 mutex_unlock(&p->mutex); 1045 1046 err = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(kfd->kgd, 1047 mem, &kern_addr, &size); 1048 if (err) { 1049 pr_err("Failed to map event page to kernel\n"); 1050 return err; 1051 } 1052 1053 err = kfd_event_page_set(p, kern_addr, size); 1054 if (err) { 1055 pr_err("Failed to set event page\n"); 1056 return err; 1057 } 1058 } 1059 1060 err = kfd_event_create(filp, p, args->event_type, 1061 args->auto_reset != 0, args->node_id, 1062 &args->event_id, &args->event_trigger_data, 1063 &args->event_page_offset, 1064 &args->event_slot_index); 1065 1066 return err; 1067 1068 out_unlock: 1069 mutex_unlock(&p->mutex); 1070 return err; 1071 } 1072 1073 static int kfd_ioctl_destroy_event(struct file *filp, struct kfd_process *p, 1074 void *data) 1075 { 1076 struct kfd_ioctl_destroy_event_args *args = data; 1077 1078 return kfd_event_destroy(p, args->event_id); 1079 } 1080 1081 static int kfd_ioctl_set_event(struct file *filp, struct kfd_process *p, 1082 void *data) 1083 { 1084 struct kfd_ioctl_set_event_args *args = data; 1085 1086 return kfd_set_event(p, args->event_id); 1087 } 1088 1089 static int kfd_ioctl_reset_event(struct file *filp, struct kfd_process *p, 1090 void *data) 1091 { 1092 struct kfd_ioctl_reset_event_args *args = data; 1093 1094 return kfd_reset_event(p, args->event_id); 1095 } 1096 1097 static int kfd_ioctl_wait_events(struct file *filp, struct kfd_process *p, 1098 void *data) 1099 { 1100 struct kfd_ioctl_wait_events_args *args = data; 1101 int err; 1102 1103 err = kfd_wait_on_events(p, args->num_events, 1104 (void __user *)args->events_ptr, 1105 (args->wait_for_all != 0), 1106 args->timeout, &args->wait_result); 1107 1108 return err; 1109 } 1110 static int kfd_ioctl_set_scratch_backing_va(struct file *filep, 1111 struct kfd_process *p, void *data) 1112 { 1113 struct kfd_ioctl_set_scratch_backing_va_args *args = data; 1114 struct kfd_process_device *pdd; 1115 struct kfd_dev *dev; 1116 long err; 1117 1118 dev = kfd_device_by_id(args->gpu_id); 1119 if (!dev) 1120 return -EINVAL; 1121 1122 mutex_lock(&p->mutex); 1123 1124 pdd = kfd_bind_process_to_device(dev, p); 1125 if (IS_ERR(pdd)) { 1126 err = PTR_ERR(pdd); 1127 goto bind_process_to_device_fail; 1128 } 1129 1130 pdd->qpd.sh_hidden_private_base = args->va_addr; 1131 1132 mutex_unlock(&p->mutex); 1133 1134 if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS && 1135 pdd->qpd.vmid != 0) 1136 dev->kfd2kgd->set_scratch_backing_va( 1137 dev->kgd, args->va_addr, pdd->qpd.vmid); 1138 1139 return 0; 1140 1141 bind_process_to_device_fail: 1142 mutex_unlock(&p->mutex); 1143 return err; 1144 } 1145 1146 static int kfd_ioctl_get_tile_config(struct file *filep, 1147 struct kfd_process *p, void *data) 1148 { 1149 struct kfd_ioctl_get_tile_config_args *args = data; 1150 struct kfd_dev *dev; 1151 struct tile_config config; 1152 int err = 0; 1153 1154 dev = kfd_device_by_id(args->gpu_id); 1155 if (!dev) 1156 return -EINVAL; 1157 1158 dev->kfd2kgd->get_tile_config(dev->kgd, &config); 1159 1160 args->gb_addr_config = config.gb_addr_config; 1161 args->num_banks = config.num_banks; 1162 args->num_ranks = config.num_ranks; 1163 1164 if (args->num_tile_configs > config.num_tile_configs) 1165 args->num_tile_configs = config.num_tile_configs; 1166 err = copy_to_user((void __user *)args->tile_config_ptr, 1167 config.tile_config_ptr, 1168 args->num_tile_configs * sizeof(uint32_t)); 1169 if (err) { 1170 args->num_tile_configs = 0; 1171 return -EFAULT; 1172 } 1173 1174 if (args->num_macro_tile_configs > config.num_macro_tile_configs) 1175 args->num_macro_tile_configs = 1176 config.num_macro_tile_configs; 1177 err = copy_to_user((void __user *)args->macro_tile_config_ptr, 1178 config.macro_tile_config_ptr, 1179 args->num_macro_tile_configs * sizeof(uint32_t)); 1180 if (err) { 1181 args->num_macro_tile_configs = 0; 1182 return -EFAULT; 1183 } 1184 1185 return 0; 1186 } 1187 1188 static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p, 1189 void *data) 1190 { 1191 struct kfd_ioctl_acquire_vm_args *args = data; 1192 struct kfd_process_device *pdd; 1193 struct kfd_dev *dev; 1194 struct file *drm_file; 1195 int ret; 1196 1197 dev = kfd_device_by_id(args->gpu_id); 1198 if (!dev) 1199 return -EINVAL; 1200 1201 drm_file = fget(args->drm_fd); 1202 if (!drm_file) 1203 return -EINVAL; 1204 1205 mutex_lock(&p->mutex); 1206 1207 pdd = kfd_get_process_device_data(dev, p); 1208 if (!pdd) { 1209 ret = -EINVAL; 1210 goto err_unlock; 1211 } 1212 1213 if (pdd->drm_file) { 1214 ret = pdd->drm_file == drm_file ? 0 : -EBUSY; 1215 goto err_unlock; 1216 } 1217 1218 ret = kfd_process_device_init_vm(pdd, drm_file); 1219 if (ret) 1220 goto err_unlock; 1221 /* On success, the PDD keeps the drm_file reference */ 1222 mutex_unlock(&p->mutex); 1223 1224 return 0; 1225 1226 err_unlock: 1227 mutex_unlock(&p->mutex); 1228 fput(drm_file); 1229 return ret; 1230 } 1231 1232 bool kfd_dev_is_large_bar(struct kfd_dev *dev) 1233 { 1234 struct kfd_local_mem_info mem_info; 1235 1236 if (debug_largebar) { 1237 pr_debug("Simulate large-bar allocation on non large-bar machine\n"); 1238 return true; 1239 } 1240 1241 if (dev->device_info->needs_iommu_device) 1242 return false; 1243 1244 amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info); 1245 if (mem_info.local_mem_size_private == 0 && 1246 mem_info.local_mem_size_public > 0) 1247 return true; 1248 return false; 1249 } 1250 1251 static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, 1252 struct kfd_process *p, void *data) 1253 { 1254 struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; 1255 struct kfd_process_device *pdd; 1256 void *mem; 1257 struct kfd_dev *dev; 1258 int idr_handle; 1259 long err; 1260 uint64_t offset = args->mmap_offset; 1261 uint32_t flags = args->flags; 1262 1263 if (args->size == 0) 1264 return -EINVAL; 1265 1266 dev = kfd_device_by_id(args->gpu_id); 1267 if (!dev) 1268 return -EINVAL; 1269 1270 if ((flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) && 1271 (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) && 1272 !kfd_dev_is_large_bar(dev)) { 1273 pr_err("Alloc host visible vram on small bar is not allowed\n"); 1274 return -EINVAL; 1275 } 1276 1277 if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { 1278 if (args->size != kfd_doorbell_process_slice(dev)) 1279 return -EINVAL; 1280 offset = kfd_get_process_doorbells(dev, p); 1281 } 1282 1283 mutex_lock(&p->mutex); 1284 1285 pdd = kfd_bind_process_to_device(dev, p); 1286 if (IS_ERR(pdd)) { 1287 err = PTR_ERR(pdd); 1288 goto err_unlock; 1289 } 1290 1291 err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( 1292 dev->kgd, args->va_addr, args->size, 1293 pdd->vm, (struct kgd_mem **) &mem, &offset, 1294 flags); 1295 1296 if (err) 1297 goto err_unlock; 1298 1299 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1300 if (idr_handle < 0) { 1301 err = -EFAULT; 1302 goto err_free; 1303 } 1304 1305 mutex_unlock(&p->mutex); 1306 1307 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1308 args->mmap_offset = offset; 1309 1310 return 0; 1311 1312 err_free: 1313 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1314 err_unlock: 1315 mutex_unlock(&p->mutex); 1316 return err; 1317 } 1318 1319 static int kfd_ioctl_free_memory_of_gpu(struct file *filep, 1320 struct kfd_process *p, void *data) 1321 { 1322 struct kfd_ioctl_free_memory_of_gpu_args *args = data; 1323 struct kfd_process_device *pdd; 1324 void *mem; 1325 struct kfd_dev *dev; 1326 int ret; 1327 1328 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1329 if (!dev) 1330 return -EINVAL; 1331 1332 mutex_lock(&p->mutex); 1333 1334 pdd = kfd_get_process_device_data(dev, p); 1335 if (!pdd) { 1336 pr_err("Process device data doesn't exist\n"); 1337 ret = -EINVAL; 1338 goto err_unlock; 1339 } 1340 1341 mem = kfd_process_device_translate_handle( 1342 pdd, GET_IDR_HANDLE(args->handle)); 1343 if (!mem) { 1344 ret = -EINVAL; 1345 goto err_unlock; 1346 } 1347 1348 ret = amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, 1349 (struct kgd_mem *)mem); 1350 1351 /* If freeing the buffer failed, leave the handle in place for 1352 * clean-up during process tear-down. 1353 */ 1354 if (!ret) 1355 kfd_process_device_remove_obj_handle( 1356 pdd, GET_IDR_HANDLE(args->handle)); 1357 1358 err_unlock: 1359 mutex_unlock(&p->mutex); 1360 return ret; 1361 } 1362 1363 static int kfd_ioctl_map_memory_to_gpu(struct file *filep, 1364 struct kfd_process *p, void *data) 1365 { 1366 struct kfd_ioctl_map_memory_to_gpu_args *args = data; 1367 struct kfd_process_device *pdd, *peer_pdd; 1368 void *mem; 1369 struct kfd_dev *dev, *peer; 1370 long err = 0; 1371 int i; 1372 uint32_t *devices_arr = NULL; 1373 1374 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1375 if (!dev) 1376 return -EINVAL; 1377 1378 if (!args->n_devices) { 1379 pr_debug("Device IDs array empty\n"); 1380 return -EINVAL; 1381 } 1382 if (args->n_success > args->n_devices) { 1383 pr_debug("n_success exceeds n_devices\n"); 1384 return -EINVAL; 1385 } 1386 1387 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1388 GFP_KERNEL); 1389 if (!devices_arr) 1390 return -ENOMEM; 1391 1392 err = copy_from_user(devices_arr, 1393 (void __user *)args->device_ids_array_ptr, 1394 args->n_devices * sizeof(*devices_arr)); 1395 if (err != 0) { 1396 err = -EFAULT; 1397 goto copy_from_user_failed; 1398 } 1399 1400 mutex_lock(&p->mutex); 1401 1402 pdd = kfd_bind_process_to_device(dev, p); 1403 if (IS_ERR(pdd)) { 1404 err = PTR_ERR(pdd); 1405 goto bind_process_to_device_failed; 1406 } 1407 1408 mem = kfd_process_device_translate_handle(pdd, 1409 GET_IDR_HANDLE(args->handle)); 1410 if (!mem) { 1411 err = -ENOMEM; 1412 goto get_mem_obj_from_handle_failed; 1413 } 1414 1415 for (i = args->n_success; i < args->n_devices; i++) { 1416 peer = kfd_device_by_id(devices_arr[i]); 1417 if (!peer) { 1418 pr_debug("Getting device by id failed for 0x%x\n", 1419 devices_arr[i]); 1420 err = -EINVAL; 1421 goto get_mem_obj_from_handle_failed; 1422 } 1423 1424 peer_pdd = kfd_bind_process_to_device(peer, p); 1425 if (IS_ERR(peer_pdd)) { 1426 err = PTR_ERR(peer_pdd); 1427 goto get_mem_obj_from_handle_failed; 1428 } 1429 err = amdgpu_amdkfd_gpuvm_map_memory_to_gpu( 1430 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1431 if (err) { 1432 pr_err("Failed to map to gpu %d/%d\n", 1433 i, args->n_devices); 1434 goto map_memory_to_gpu_failed; 1435 } 1436 args->n_success = i+1; 1437 } 1438 1439 mutex_unlock(&p->mutex); 1440 1441 err = amdgpu_amdkfd_gpuvm_sync_memory(dev->kgd, (struct kgd_mem *) mem, true); 1442 if (err) { 1443 pr_debug("Sync memory failed, wait interrupted by user signal\n"); 1444 goto sync_memory_failed; 1445 } 1446 1447 /* Flush TLBs after waiting for the page table updates to complete */ 1448 for (i = 0; i < args->n_devices; i++) { 1449 peer = kfd_device_by_id(devices_arr[i]); 1450 if (WARN_ON_ONCE(!peer)) 1451 continue; 1452 peer_pdd = kfd_get_process_device_data(peer, p); 1453 if (WARN_ON_ONCE(!peer_pdd)) 1454 continue; 1455 kfd_flush_tlb(peer_pdd); 1456 } 1457 1458 kfree(devices_arr); 1459 1460 return err; 1461 1462 bind_process_to_device_failed: 1463 get_mem_obj_from_handle_failed: 1464 map_memory_to_gpu_failed: 1465 mutex_unlock(&p->mutex); 1466 copy_from_user_failed: 1467 sync_memory_failed: 1468 kfree(devices_arr); 1469 1470 return err; 1471 } 1472 1473 static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, 1474 struct kfd_process *p, void *data) 1475 { 1476 struct kfd_ioctl_unmap_memory_from_gpu_args *args = data; 1477 struct kfd_process_device *pdd, *peer_pdd; 1478 void *mem; 1479 struct kfd_dev *dev, *peer; 1480 long err = 0; 1481 uint32_t *devices_arr = NULL, i; 1482 1483 dev = kfd_device_by_id(GET_GPU_ID(args->handle)); 1484 if (!dev) 1485 return -EINVAL; 1486 1487 if (!args->n_devices) { 1488 pr_debug("Device IDs array empty\n"); 1489 return -EINVAL; 1490 } 1491 if (args->n_success > args->n_devices) { 1492 pr_debug("n_success exceeds n_devices\n"); 1493 return -EINVAL; 1494 } 1495 1496 devices_arr = kmalloc_array(args->n_devices, sizeof(*devices_arr), 1497 GFP_KERNEL); 1498 if (!devices_arr) 1499 return -ENOMEM; 1500 1501 err = copy_from_user(devices_arr, 1502 (void __user *)args->device_ids_array_ptr, 1503 args->n_devices * sizeof(*devices_arr)); 1504 if (err != 0) { 1505 err = -EFAULT; 1506 goto copy_from_user_failed; 1507 } 1508 1509 mutex_lock(&p->mutex); 1510 1511 pdd = kfd_get_process_device_data(dev, p); 1512 if (!pdd) { 1513 err = -EINVAL; 1514 goto bind_process_to_device_failed; 1515 } 1516 1517 mem = kfd_process_device_translate_handle(pdd, 1518 GET_IDR_HANDLE(args->handle)); 1519 if (!mem) { 1520 err = -ENOMEM; 1521 goto get_mem_obj_from_handle_failed; 1522 } 1523 1524 for (i = args->n_success; i < args->n_devices; i++) { 1525 peer = kfd_device_by_id(devices_arr[i]); 1526 if (!peer) { 1527 err = -EINVAL; 1528 goto get_mem_obj_from_handle_failed; 1529 } 1530 1531 peer_pdd = kfd_get_process_device_data(peer, p); 1532 if (!peer_pdd) { 1533 err = -ENODEV; 1534 goto get_mem_obj_from_handle_failed; 1535 } 1536 err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( 1537 peer->kgd, (struct kgd_mem *)mem, peer_pdd->vm); 1538 if (err) { 1539 pr_err("Failed to unmap from gpu %d/%d\n", 1540 i, args->n_devices); 1541 goto unmap_memory_from_gpu_failed; 1542 } 1543 args->n_success = i+1; 1544 } 1545 kfree(devices_arr); 1546 1547 mutex_unlock(&p->mutex); 1548 1549 return 0; 1550 1551 bind_process_to_device_failed: 1552 get_mem_obj_from_handle_failed: 1553 unmap_memory_from_gpu_failed: 1554 mutex_unlock(&p->mutex); 1555 copy_from_user_failed: 1556 kfree(devices_arr); 1557 return err; 1558 } 1559 1560 static int kfd_ioctl_get_dmabuf_info(struct file *filep, 1561 struct kfd_process *p, void *data) 1562 { 1563 struct kfd_ioctl_get_dmabuf_info_args *args = data; 1564 struct kfd_dev *dev = NULL; 1565 struct kgd_dev *dma_buf_kgd; 1566 void *metadata_buffer = NULL; 1567 uint32_t flags; 1568 unsigned int i; 1569 int r; 1570 1571 /* Find a KFD GPU device that supports the get_dmabuf_info query */ 1572 for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++) 1573 if (dev) 1574 break; 1575 if (!dev) 1576 return -EINVAL; 1577 1578 if (args->metadata_ptr) { 1579 metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL); 1580 if (!metadata_buffer) 1581 return -ENOMEM; 1582 } 1583 1584 /* Get dmabuf info from KGD */ 1585 r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd, 1586 &dma_buf_kgd, &args->size, 1587 metadata_buffer, args->metadata_size, 1588 &args->metadata_size, &flags); 1589 if (r) 1590 goto exit; 1591 1592 /* Reverse-lookup gpu_id from kgd pointer */ 1593 dev = kfd_device_by_kgd(dma_buf_kgd); 1594 if (!dev) { 1595 r = -EINVAL; 1596 goto exit; 1597 } 1598 args->gpu_id = dev->id; 1599 args->flags = flags; 1600 1601 /* Copy metadata buffer to user mode */ 1602 if (metadata_buffer) { 1603 r = copy_to_user((void __user *)args->metadata_ptr, 1604 metadata_buffer, args->metadata_size); 1605 if (r != 0) 1606 r = -EFAULT; 1607 } 1608 1609 exit: 1610 kfree(metadata_buffer); 1611 1612 return r; 1613 } 1614 1615 static int kfd_ioctl_import_dmabuf(struct file *filep, 1616 struct kfd_process *p, void *data) 1617 { 1618 struct kfd_ioctl_import_dmabuf_args *args = data; 1619 struct kfd_process_device *pdd; 1620 struct dma_buf *dmabuf; 1621 struct kfd_dev *dev; 1622 int idr_handle; 1623 uint64_t size; 1624 void *mem; 1625 int r; 1626 1627 dev = kfd_device_by_id(args->gpu_id); 1628 if (!dev) 1629 return -EINVAL; 1630 1631 dmabuf = dma_buf_get(args->dmabuf_fd); 1632 if (!dmabuf) 1633 return -EINVAL; 1634 1635 mutex_lock(&p->mutex); 1636 1637 pdd = kfd_bind_process_to_device(dev, p); 1638 if (IS_ERR(pdd)) { 1639 r = PTR_ERR(pdd); 1640 goto err_unlock; 1641 } 1642 1643 r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf, 1644 args->va_addr, pdd->vm, 1645 (struct kgd_mem **)&mem, &size, 1646 NULL); 1647 if (r) 1648 goto err_unlock; 1649 1650 idr_handle = kfd_process_device_create_obj_handle(pdd, mem); 1651 if (idr_handle < 0) { 1652 r = -EFAULT; 1653 goto err_free; 1654 } 1655 1656 mutex_unlock(&p->mutex); 1657 1658 args->handle = MAKE_HANDLE(args->gpu_id, idr_handle); 1659 1660 return 0; 1661 1662 err_free: 1663 amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem); 1664 err_unlock: 1665 mutex_unlock(&p->mutex); 1666 return r; 1667 } 1668 1669 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ 1670 [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ 1671 .cmd_drv = 0, .name = #ioctl} 1672 1673 /** Ioctl table */ 1674 static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { 1675 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_VERSION, 1676 kfd_ioctl_get_version, 0), 1677 1678 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_QUEUE, 1679 kfd_ioctl_create_queue, 0), 1680 1681 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_QUEUE, 1682 kfd_ioctl_destroy_queue, 0), 1683 1684 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_MEMORY_POLICY, 1685 kfd_ioctl_set_memory_policy, 0), 1686 1687 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_CLOCK_COUNTERS, 1688 kfd_ioctl_get_clock_counters, 0), 1689 1690 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES, 1691 kfd_ioctl_get_process_apertures, 0), 1692 1693 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UPDATE_QUEUE, 1694 kfd_ioctl_update_queue, 0), 1695 1696 AMDKFD_IOCTL_DEF(AMDKFD_IOC_CREATE_EVENT, 1697 kfd_ioctl_create_event, 0), 1698 1699 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DESTROY_EVENT, 1700 kfd_ioctl_destroy_event, 0), 1701 1702 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_EVENT, 1703 kfd_ioctl_set_event, 0), 1704 1705 AMDKFD_IOCTL_DEF(AMDKFD_IOC_RESET_EVENT, 1706 kfd_ioctl_reset_event, 0), 1707 1708 AMDKFD_IOCTL_DEF(AMDKFD_IOC_WAIT_EVENTS, 1709 kfd_ioctl_wait_events, 0), 1710 1711 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_REGISTER, 1712 kfd_ioctl_dbg_register, 0), 1713 1714 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_UNREGISTER, 1715 kfd_ioctl_dbg_unregister, 0), 1716 1717 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_ADDRESS_WATCH, 1718 kfd_ioctl_dbg_address_watch, 0), 1719 1720 AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_WAVE_CONTROL, 1721 kfd_ioctl_dbg_wave_control, 0), 1722 1723 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_SCRATCH_BACKING_VA, 1724 kfd_ioctl_set_scratch_backing_va, 0), 1725 1726 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_TILE_CONFIG, 1727 kfd_ioctl_get_tile_config, 0), 1728 1729 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_TRAP_HANDLER, 1730 kfd_ioctl_set_trap_handler, 0), 1731 1732 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_PROCESS_APERTURES_NEW, 1733 kfd_ioctl_get_process_apertures_new, 0), 1734 1735 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ACQUIRE_VM, 1736 kfd_ioctl_acquire_vm, 0), 1737 1738 AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_MEMORY_OF_GPU, 1739 kfd_ioctl_alloc_memory_of_gpu, 0), 1740 1741 AMDKFD_IOCTL_DEF(AMDKFD_IOC_FREE_MEMORY_OF_GPU, 1742 kfd_ioctl_free_memory_of_gpu, 0), 1743 1744 AMDKFD_IOCTL_DEF(AMDKFD_IOC_MAP_MEMORY_TO_GPU, 1745 kfd_ioctl_map_memory_to_gpu, 0), 1746 1747 AMDKFD_IOCTL_DEF(AMDKFD_IOC_UNMAP_MEMORY_FROM_GPU, 1748 kfd_ioctl_unmap_memory_from_gpu, 0), 1749 1750 AMDKFD_IOCTL_DEF(AMDKFD_IOC_SET_CU_MASK, 1751 kfd_ioctl_set_cu_mask, 0), 1752 1753 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE, 1754 kfd_ioctl_get_queue_wave_state, 0), 1755 1756 AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO, 1757 kfd_ioctl_get_dmabuf_info, 0), 1758 1759 AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, 1760 kfd_ioctl_import_dmabuf, 0), 1761 1762 }; 1763 1764 #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) 1765 1766 static long kfd_ioctl(struct file *filep, unsigned int cmd, unsigned long arg) 1767 { 1768 struct kfd_process *process; 1769 amdkfd_ioctl_t *func; 1770 const struct amdkfd_ioctl_desc *ioctl = NULL; 1771 unsigned int nr = _IOC_NR(cmd); 1772 char stack_kdata[128]; 1773 char *kdata = NULL; 1774 unsigned int usize, asize; 1775 int retcode = -EINVAL; 1776 1777 if (nr >= AMDKFD_CORE_IOCTL_COUNT) 1778 goto err_i1; 1779 1780 if ((nr >= AMDKFD_COMMAND_START) && (nr < AMDKFD_COMMAND_END)) { 1781 u32 amdkfd_size; 1782 1783 ioctl = &amdkfd_ioctls[nr]; 1784 1785 amdkfd_size = _IOC_SIZE(ioctl->cmd); 1786 usize = asize = _IOC_SIZE(cmd); 1787 if (amdkfd_size > asize) 1788 asize = amdkfd_size; 1789 1790 cmd = ioctl->cmd; 1791 } else 1792 goto err_i1; 1793 1794 dev_dbg(kfd_device, "ioctl cmd 0x%x (#%d), arg 0x%lx\n", cmd, nr, arg); 1795 1796 process = kfd_get_process(current); 1797 if (IS_ERR(process)) { 1798 dev_dbg(kfd_device, "no process\n"); 1799 goto err_i1; 1800 } 1801 1802 /* Do not trust userspace, use our own definition */ 1803 func = ioctl->func; 1804 1805 if (unlikely(!func)) { 1806 dev_dbg(kfd_device, "no function\n"); 1807 retcode = -EINVAL; 1808 goto err_i1; 1809 } 1810 1811 if (cmd & (IOC_IN | IOC_OUT)) { 1812 if (asize <= sizeof(stack_kdata)) { 1813 kdata = stack_kdata; 1814 } else { 1815 kdata = kmalloc(asize, GFP_KERNEL); 1816 if (!kdata) { 1817 retcode = -ENOMEM; 1818 goto err_i1; 1819 } 1820 } 1821 if (asize > usize) 1822 memset(kdata + usize, 0, asize - usize); 1823 } 1824 1825 if (cmd & IOC_IN) { 1826 if (copy_from_user(kdata, (void __user *)arg, usize) != 0) { 1827 retcode = -EFAULT; 1828 goto err_i1; 1829 } 1830 } else if (cmd & IOC_OUT) { 1831 memset(kdata, 0, usize); 1832 } 1833 1834 retcode = func(filep, process, kdata); 1835 1836 if (cmd & IOC_OUT) 1837 if (copy_to_user((void __user *)arg, kdata, usize) != 0) 1838 retcode = -EFAULT; 1839 1840 err_i1: 1841 if (!ioctl) 1842 dev_dbg(kfd_device, "invalid ioctl: pid=%d, cmd=0x%02x, nr=0x%02x\n", 1843 task_pid_nr(current), cmd, nr); 1844 1845 if (kdata != stack_kdata) 1846 kfree(kdata); 1847 1848 if (retcode) 1849 dev_dbg(kfd_device, "ret = %d\n", retcode); 1850 1851 return retcode; 1852 } 1853 1854 static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) 1855 { 1856 struct kfd_process *process; 1857 struct kfd_dev *dev = NULL; 1858 unsigned long vm_pgoff; 1859 unsigned int gpu_id; 1860 1861 process = kfd_get_process(current); 1862 if (IS_ERR(process)) 1863 return PTR_ERR(process); 1864 1865 vm_pgoff = vma->vm_pgoff; 1866 vma->vm_pgoff = KFD_MMAP_OFFSET_VALUE_GET(vm_pgoff); 1867 gpu_id = KFD_MMAP_GPU_ID_GET(vm_pgoff); 1868 if (gpu_id) 1869 dev = kfd_device_by_id(gpu_id); 1870 1871 switch (vm_pgoff & KFD_MMAP_TYPE_MASK) { 1872 case KFD_MMAP_TYPE_DOORBELL: 1873 if (!dev) 1874 return -ENODEV; 1875 return kfd_doorbell_mmap(dev, process, vma); 1876 1877 case KFD_MMAP_TYPE_EVENTS: 1878 return kfd_event_mmap(process, vma); 1879 1880 case KFD_MMAP_TYPE_RESERVED_MEM: 1881 if (!dev) 1882 return -ENODEV; 1883 return kfd_reserved_mem_mmap(dev, process, vma); 1884 } 1885 1886 return -EFAULT; 1887 } 1888