1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2024 Intel Corporation 4 */ 5 6 #include <linux/scatterlist.h> 7 #include <linux/mmu_notifier.h> 8 #include <linux/dma-mapping.h> 9 #include <linux/memremap.h> 10 #include <linux/swap.h> 11 #include <linux/hmm.h> 12 #include <linux/mm.h> 13 #include "xe_hmm.h" 14 #include "xe_vm.h" 15 #include "xe_bo.h" 16 17 static u64 xe_npages_in_range(unsigned long start, unsigned long end) 18 { 19 return (end - start) >> PAGE_SHIFT; 20 } 21 22 static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st, 23 struct hmm_range *range, struct rw_semaphore *notifier_sem) 24 { 25 unsigned long i, npages, hmm_pfn; 26 unsigned long num_chunks = 0; 27 int ret; 28 29 /* HMM docs says this is needed. */ 30 ret = down_read_interruptible(notifier_sem); 31 if (ret) 32 return ret; 33 34 if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) { 35 up_read(notifier_sem); 36 return -EAGAIN; 37 } 38 39 npages = xe_npages_in_range(range->start, range->end); 40 for (i = 0; i < npages;) { 41 unsigned long len; 42 43 hmm_pfn = range->hmm_pfns[i]; 44 xe_assert(xe, hmm_pfn & HMM_PFN_VALID); 45 46 len = 1UL << hmm_pfn_to_map_order(hmm_pfn); 47 48 /* If order > 0 the page may extend beyond range->start */ 49 len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1); 50 i += len; 51 num_chunks++; 52 } 53 up_read(notifier_sem); 54 55 return sg_alloc_table(st, num_chunks, GFP_KERNEL); 56 } 57 58 /** 59 * xe_build_sg() - build a scatter gather table for all the physical pages/pfn 60 * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table 61 * and will be used to program GPU page table later. 62 * @xe: the xe device who will access the dma-address in sg table 63 * @range: the hmm range that we build the sg table from. range->hmm_pfns[] 64 * has the pfn numbers of pages that back up this hmm address range. 65 * @st: pointer to the sg table. 66 * @notifier_sem: The xe notifier lock. 67 * @write: whether we write to this range. This decides dma map direction 68 * for system pages. If write we map it bi-diretional; otherwise 69 * DMA_TO_DEVICE 70 * 71 * All the contiguous pfns will be collapsed into one entry in 72 * the scatter gather table. This is for the purpose of efficiently 73 * programming GPU page table. 74 * 75 * The dma_address in the sg table will later be used by GPU to 76 * access memory. So if the memory is system memory, we need to 77 * do a dma-mapping so it can be accessed by GPU/DMA. 78 * 79 * FIXME: This function currently only support pages in system 80 * memory. If the memory is GPU local memory (of the GPU who 81 * is going to access memory), we need gpu dpa (device physical 82 * address), and there is no need of dma-mapping. This is TBD. 83 * 84 * FIXME: dma-mapping for peer gpu device to access remote gpu's 85 * memory. Add this when you support p2p 86 * 87 * This function allocates the storage of the sg table. It is 88 * caller's responsibility to free it calling sg_free_table. 89 * 90 * Returns 0 if successful; -ENOMEM if fails to allocate memory 91 */ 92 static int xe_build_sg(struct xe_device *xe, struct hmm_range *range, 93 struct sg_table *st, 94 struct rw_semaphore *notifier_sem, 95 bool write) 96 { 97 unsigned long npages = xe_npages_in_range(range->start, range->end); 98 struct device *dev = xe->drm.dev; 99 struct scatterlist *sgl; 100 struct page *page; 101 unsigned long i, j; 102 103 lockdep_assert_held(notifier_sem); 104 105 i = 0; 106 for_each_sg(st->sgl, sgl, st->nents, j) { 107 unsigned long hmm_pfn, size; 108 109 hmm_pfn = range->hmm_pfns[i]; 110 page = hmm_pfn_to_page(hmm_pfn); 111 xe_assert(xe, !is_device_private_page(page)); 112 113 size = 1UL << hmm_pfn_to_map_order(hmm_pfn); 114 size -= page_to_pfn(page) & (size - 1); 115 i += size; 116 117 if (unlikely(j == st->nents - 1)) { 118 xe_assert(xe, i >= npages); 119 if (i > npages) 120 size -= (i - npages); 121 122 sg_mark_end(sgl); 123 } else { 124 xe_assert(xe, i < npages); 125 } 126 127 sg_set_page(sgl, page, size << PAGE_SHIFT, 0); 128 } 129 130 return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 131 DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING); 132 } 133 134 static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma) 135 { 136 struct xe_userptr *userptr = &uvma->userptr; 137 struct xe_vm *vm = xe_vma_vm(&uvma->vma); 138 139 lockdep_assert_held_write(&vm->lock); 140 lockdep_assert_held(&vm->userptr.notifier_lock); 141 142 mutex_lock(&userptr->unmap_mutex); 143 xe_assert(vm->xe, !userptr->mapped); 144 userptr->mapped = true; 145 mutex_unlock(&userptr->unmap_mutex); 146 } 147 148 void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma) 149 { 150 struct xe_userptr *userptr = &uvma->userptr; 151 struct xe_vma *vma = &uvma->vma; 152 bool write = !xe_vma_read_only(vma); 153 struct xe_vm *vm = xe_vma_vm(vma); 154 struct xe_device *xe = vm->xe; 155 156 if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) && 157 !lockdep_is_held_type(&vm->lock, 0) && 158 !(vma->gpuva.flags & XE_VMA_DESTROYED)) { 159 /* Don't unmap in exec critical section. */ 160 xe_vm_assert_held(vm); 161 /* Don't unmap while mapping the sg. */ 162 lockdep_assert_held(&vm->lock); 163 } 164 165 mutex_lock(&userptr->unmap_mutex); 166 if (userptr->sg && userptr->mapped) 167 dma_unmap_sgtable(xe->drm.dev, userptr->sg, 168 write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0); 169 userptr->mapped = false; 170 mutex_unlock(&userptr->unmap_mutex); 171 } 172 173 /** 174 * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr 175 * @uvma: the userptr vma which hold the scatter gather table 176 * 177 * With function xe_userptr_populate_range, we allocate storage of 178 * the userptr sg table. This is a helper function to free this 179 * sg table, and dma unmap the address in the table. 180 */ 181 void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma) 182 { 183 struct xe_userptr *userptr = &uvma->userptr; 184 185 xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg); 186 xe_hmm_userptr_unmap(uvma); 187 sg_free_table(userptr->sg); 188 userptr->sg = NULL; 189 } 190 191 /** 192 * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual 193 * address range 194 * 195 * @uvma: userptr vma which has information of the range to populate. 196 * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller. 197 * 198 * This function populate the physical pages of a virtual 199 * address range. The populated physical pages is saved in 200 * userptr's sg table. It is similar to get_user_pages but call 201 * hmm_range_fault. 202 * 203 * This function also read mmu notifier sequence # ( 204 * mmu_interval_read_begin), for the purpose of later 205 * comparison (through mmu_interval_read_retry). 206 * 207 * This must be called with mmap read or write lock held. 208 * 209 * This function allocates the storage of the userptr sg table. 210 * It is caller's responsibility to free it calling sg_free_table. 211 * 212 * returns: 0 for success; negative error no on failure 213 */ 214 int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, 215 bool is_mm_mmap_locked) 216 { 217 unsigned long timeout = 218 jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); 219 unsigned long *pfns; 220 struct xe_userptr *userptr; 221 struct xe_vma *vma = &uvma->vma; 222 u64 userptr_start = xe_vma_userptr(vma); 223 u64 userptr_end = userptr_start + xe_vma_size(vma); 224 struct xe_vm *vm = xe_vma_vm(vma); 225 struct hmm_range hmm_range = { 226 .pfn_flags_mask = 0, /* ignore pfns */ 227 .default_flags = HMM_PFN_REQ_FAULT, 228 .start = userptr_start, 229 .end = userptr_end, 230 .notifier = &uvma->userptr.notifier, 231 .dev_private_owner = vm->xe, 232 }; 233 bool write = !xe_vma_read_only(vma); 234 unsigned long notifier_seq; 235 u64 npages; 236 int ret; 237 238 userptr = &uvma->userptr; 239 240 if (is_mm_mmap_locked) 241 mmap_assert_locked(userptr->notifier.mm); 242 243 if (vma->gpuva.flags & XE_VMA_DESTROYED) 244 return 0; 245 246 notifier_seq = mmu_interval_read_begin(&userptr->notifier); 247 if (notifier_seq == userptr->notifier_seq) 248 return 0; 249 250 if (userptr->sg) 251 xe_hmm_userptr_free_sg(uvma); 252 253 npages = xe_npages_in_range(userptr_start, userptr_end); 254 pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); 255 if (unlikely(!pfns)) 256 return -ENOMEM; 257 258 if (write) 259 hmm_range.default_flags |= HMM_PFN_REQ_WRITE; 260 261 if (!mmget_not_zero(userptr->notifier.mm)) { 262 ret = -EFAULT; 263 goto free_pfns; 264 } 265 266 hmm_range.hmm_pfns = pfns; 267 268 while (true) { 269 hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier); 270 271 if (!is_mm_mmap_locked) 272 mmap_read_lock(userptr->notifier.mm); 273 274 ret = hmm_range_fault(&hmm_range); 275 276 if (!is_mm_mmap_locked) 277 mmap_read_unlock(userptr->notifier.mm); 278 279 if (ret == -EBUSY) { 280 if (time_after(jiffies, timeout)) 281 break; 282 283 continue; 284 } 285 break; 286 } 287 288 mmput(userptr->notifier.mm); 289 290 if (ret) 291 goto free_pfns; 292 293 ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock); 294 if (ret) 295 goto free_pfns; 296 297 ret = down_read_interruptible(&vm->userptr.notifier_lock); 298 if (ret) 299 goto free_st; 300 301 if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) { 302 ret = -EAGAIN; 303 goto out_unlock; 304 } 305 306 ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt, 307 &vm->userptr.notifier_lock, write); 308 if (ret) 309 goto out_unlock; 310 311 userptr->sg = &userptr->sgt; 312 xe_hmm_userptr_set_mapped(uvma); 313 userptr->notifier_seq = hmm_range.notifier_seq; 314 up_read(&vm->userptr.notifier_lock); 315 kvfree(pfns); 316 return 0; 317 318 out_unlock: 319 up_read(&vm->userptr.notifier_lock); 320 free_st: 321 sg_free_table(&userptr->sgt); 322 free_pfns: 323 kvfree(pfns); 324 return ret; 325 } 326