1 // SPDX-License-Identifier: GPL-2.0 OR MIT 2 /* 3 * Copyright 2014-2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 #include "kfd_priv.h" 24 #include <linux/mm.h> 25 #include <linux/mman.h> 26 #include <linux/slab.h> 27 #include <linux/io.h> 28 #include <linux/idr.h> 29 30 /* 31 * This extension supports a kernel level doorbells management for the 32 * kernel queues using the first doorbell page reserved for the kernel. 33 */ 34 35 /* 36 * Each device exposes a doorbell aperture, a PCI MMIO aperture that 37 * receives 32-bit writes that are passed to queues as wptr values. 38 * The doorbells are intended to be written by applications as part 39 * of queueing work on user-mode queues. 40 * We assign doorbells to applications in PAGE_SIZE-sized and aligned chunks. 41 * We map the doorbell address space into user-mode when a process creates 42 * its first queue on each device. 43 * Although the mapping is done by KFD, it is equivalent to an mmap of 44 * the /dev/kfd with the particular device encoded in the mmap offset. 45 * There will be other uses for mmap of /dev/kfd, so only a range of 46 * offsets (KFD_MMAP_DOORBELL_START-END) is used for doorbells. 47 */ 48 49 /* # of doorbell bytes allocated for each process. */ 50 size_t kfd_doorbell_process_slice(struct kfd_dev *kfd) 51 { 52 if (!kfd->shared_resources.enable_mes) 53 return roundup(kfd->device_info.doorbell_size * 54 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS, 55 PAGE_SIZE); 56 else 57 return amdgpu_mes_doorbell_process_slice( 58 (struct amdgpu_device *)kfd->adev); 59 } 60 61 /* Doorbell calculations for device init. */ 62 int kfd_doorbell_init(struct kfd_dev *kfd) 63 { 64 size_t doorbell_start_offset; 65 size_t doorbell_aperture_size; 66 size_t doorbell_process_limit; 67 68 /* 69 * With MES enabled, just set the doorbell base as it is needed 70 * to calculate doorbell physical address. 71 */ 72 if (kfd->shared_resources.enable_mes) { 73 kfd->doorbell_base = 74 kfd->shared_resources.doorbell_physical_address; 75 return 0; 76 } 77 78 /* 79 * We start with calculations in bytes because the input data might 80 * only be byte-aligned. 81 * Only after we have done the rounding can we assume any alignment. 82 */ 83 84 doorbell_start_offset = 85 roundup(kfd->shared_resources.doorbell_start_offset, 86 kfd_doorbell_process_slice(kfd)); 87 88 doorbell_aperture_size = 89 rounddown(kfd->shared_resources.doorbell_aperture_size, 90 kfd_doorbell_process_slice(kfd)); 91 92 if (doorbell_aperture_size > doorbell_start_offset) 93 doorbell_process_limit = 94 (doorbell_aperture_size - doorbell_start_offset) / 95 kfd_doorbell_process_slice(kfd); 96 else 97 return -ENOSPC; 98 99 if (!kfd->max_doorbell_slices || 100 doorbell_process_limit < kfd->max_doorbell_slices) 101 kfd->max_doorbell_slices = doorbell_process_limit; 102 103 kfd->doorbell_base = kfd->shared_resources.doorbell_physical_address + 104 doorbell_start_offset; 105 106 kfd->doorbell_base_dw_offset = doorbell_start_offset / sizeof(u32); 107 108 kfd->doorbell_kernel_ptr = ioremap(kfd->doorbell_base, 109 kfd_doorbell_process_slice(kfd)); 110 111 if (!kfd->doorbell_kernel_ptr) 112 return -ENOMEM; 113 114 pr_debug("Doorbell initialization:\n"); 115 pr_debug("doorbell base == 0x%08lX\n", 116 (uintptr_t)kfd->doorbell_base); 117 118 pr_debug("doorbell_base_dw_offset == 0x%08lX\n", 119 kfd->doorbell_base_dw_offset); 120 121 pr_debug("doorbell_process_limit == 0x%08lX\n", 122 doorbell_process_limit); 123 124 pr_debug("doorbell_kernel_offset == 0x%08lX\n", 125 (uintptr_t)kfd->doorbell_base); 126 127 pr_debug("doorbell aperture size == 0x%08lX\n", 128 kfd->shared_resources.doorbell_aperture_size); 129 130 pr_debug("doorbell kernel address == %p\n", kfd->doorbell_kernel_ptr); 131 132 return 0; 133 } 134 135 void kfd_doorbell_fini(struct kfd_dev *kfd) 136 { 137 if (kfd->doorbell_kernel_ptr) 138 iounmap(kfd->doorbell_kernel_ptr); 139 } 140 141 int kfd_doorbell_mmap(struct kfd_dev *dev, struct kfd_process *process, 142 struct vm_area_struct *vma) 143 { 144 phys_addr_t address; 145 struct kfd_process_device *pdd; 146 147 /* 148 * For simplicitly we only allow mapping of the entire doorbell 149 * allocation of a single device & process. 150 */ 151 if (vma->vm_end - vma->vm_start != kfd_doorbell_process_slice(dev)) 152 return -EINVAL; 153 154 pdd = kfd_get_process_device_data(dev, process); 155 if (!pdd) 156 return -EINVAL; 157 158 /* Calculate physical address of doorbell */ 159 address = kfd_get_process_doorbells(pdd); 160 vma->vm_flags |= VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_NORESERVE | 161 VM_DONTDUMP | VM_PFNMAP; 162 163 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 164 165 pr_debug("Mapping doorbell page\n" 166 " target user address == 0x%08llX\n" 167 " physical address == 0x%08llX\n" 168 " vm_flags == 0x%04lX\n" 169 " size == 0x%04lX\n", 170 (unsigned long long) vma->vm_start, address, vma->vm_flags, 171 kfd_doorbell_process_slice(dev)); 172 173 174 return io_remap_pfn_range(vma, 175 vma->vm_start, 176 address >> PAGE_SHIFT, 177 kfd_doorbell_process_slice(dev), 178 vma->vm_page_prot); 179 } 180 181 182 /* get kernel iomem pointer for a doorbell */ 183 void __iomem *kfd_get_kernel_doorbell(struct kfd_dev *kfd, 184 unsigned int *doorbell_off) 185 { 186 u32 inx; 187 188 mutex_lock(&kfd->doorbell_mutex); 189 inx = find_first_zero_bit(kfd->doorbell_available_index, 190 KFD_MAX_NUM_OF_QUEUES_PER_PROCESS); 191 192 __set_bit(inx, kfd->doorbell_available_index); 193 mutex_unlock(&kfd->doorbell_mutex); 194 195 if (inx >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) 196 return NULL; 197 198 inx *= kfd->device_info.doorbell_size / sizeof(u32); 199 200 /* 201 * Calculating the kernel doorbell offset using the first 202 * doorbell page. 203 */ 204 *doorbell_off = kfd->doorbell_base_dw_offset + inx; 205 206 pr_debug("Get kernel queue doorbell\n" 207 " doorbell offset == 0x%08X\n" 208 " doorbell index == 0x%x\n", 209 *doorbell_off, inx); 210 211 return kfd->doorbell_kernel_ptr + inx; 212 } 213 214 void kfd_release_kernel_doorbell(struct kfd_dev *kfd, u32 __iomem *db_addr) 215 { 216 unsigned int inx; 217 218 inx = (unsigned int)(db_addr - kfd->doorbell_kernel_ptr) 219 * sizeof(u32) / kfd->device_info.doorbell_size; 220 221 mutex_lock(&kfd->doorbell_mutex); 222 __clear_bit(inx, kfd->doorbell_available_index); 223 mutex_unlock(&kfd->doorbell_mutex); 224 } 225 226 void write_kernel_doorbell(void __iomem *db, u32 value) 227 { 228 if (db) { 229 writel(value, db); 230 pr_debug("Writing %d to doorbell address %p\n", value, db); 231 } 232 } 233 234 void write_kernel_doorbell64(void __iomem *db, u64 value) 235 { 236 if (db) { 237 WARN(((unsigned long)db & 7) != 0, 238 "Unaligned 64-bit doorbell"); 239 writeq(value, (u64 __iomem *)db); 240 pr_debug("writing %llu to doorbell address %p\n", value, db); 241 } 242 } 243 244 unsigned int kfd_get_doorbell_dw_offset_in_bar(struct kfd_dev *kfd, 245 struct kfd_process_device *pdd, 246 unsigned int doorbell_id) 247 { 248 /* 249 * doorbell_base_dw_offset accounts for doorbells taken by KGD. 250 * index * kfd_doorbell_process_slice/sizeof(u32) adjusts to 251 * the process's doorbells. The offset returned is in dword 252 * units regardless of the ASIC-dependent doorbell size. 253 */ 254 if (!kfd->shared_resources.enable_mes) 255 return kfd->doorbell_base_dw_offset + 256 pdd->doorbell_index 257 * kfd_doorbell_process_slice(kfd) / sizeof(u32) + 258 doorbell_id * 259 kfd->device_info.doorbell_size / sizeof(u32); 260 else 261 return amdgpu_mes_get_doorbell_dw_offset_in_bar( 262 (struct amdgpu_device *)kfd->adev, 263 pdd->doorbell_index, doorbell_id); 264 } 265 266 uint64_t kfd_get_number_elems(struct kfd_dev *kfd) 267 { 268 uint64_t num_of_elems = (kfd->shared_resources.doorbell_aperture_size - 269 kfd->shared_resources.doorbell_start_offset) / 270 kfd_doorbell_process_slice(kfd) + 1; 271 272 return num_of_elems; 273 274 } 275 276 phys_addr_t kfd_get_process_doorbells(struct kfd_process_device *pdd) 277 { 278 return pdd->dev->doorbell_base + 279 pdd->doorbell_index * kfd_doorbell_process_slice(pdd->dev); 280 } 281 282 int kfd_alloc_process_doorbells(struct kfd_dev *kfd, unsigned int *doorbell_index) 283 { 284 int r = 0; 285 286 if (!kfd->shared_resources.enable_mes) 287 r = ida_simple_get(&kfd->doorbell_ida, 1, 288 kfd->max_doorbell_slices, GFP_KERNEL); 289 else 290 r = amdgpu_mes_alloc_process_doorbells( 291 (struct amdgpu_device *)kfd->adev, 292 doorbell_index); 293 294 if (r > 0) 295 *doorbell_index = r; 296 297 return r; 298 } 299 300 void kfd_free_process_doorbells(struct kfd_dev *kfd, unsigned int doorbell_index) 301 { 302 if (doorbell_index) { 303 if (!kfd->shared_resources.enable_mes) 304 ida_simple_remove(&kfd->doorbell_ida, doorbell_index); 305 else 306 amdgpu_mes_free_process_doorbells( 307 (struct amdgpu_device *)kfd->adev, 308 doorbell_index); 309 } 310 } 311