1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * IOMMU helpers in MMU context. 4 * 5 * Copyright (C) 2015 IBM Corp. <aik@ozlabs.ru> 6 */ 7 8 #include <linux/sched/signal.h> 9 #include <linux/slab.h> 10 #include <linux/rculist.h> 11 #include <linux/vmalloc.h> 12 #include <linux/mutex.h> 13 #include <linux/migrate.h> 14 #include <linux/hugetlb.h> 15 #include <linux/swap.h> 16 #include <linux/sizes.h> 17 #include <linux/mm.h> 18 #include <asm/mmu_context.h> 19 #include <asm/pte-walk.h> 20 #include <linux/mm_inline.h> 21 22 static DEFINE_MUTEX(mem_list_mutex); 23 24 #define MM_IOMMU_TABLE_GROUP_PAGE_DIRTY 0x1 25 #define MM_IOMMU_TABLE_GROUP_PAGE_MASK ~(SZ_4K - 1) 26 27 struct mm_iommu_table_group_mem_t { 28 struct list_head next; 29 struct rcu_head rcu; 30 unsigned long used; 31 atomic64_t mapped; 32 unsigned int pageshift; 33 u64 ua; /* userspace address */ 34 u64 entries; /* number of entries in hpas/hpages[] */ 35 /* 36 * in mm_iommu_get we temporarily use this to store 37 * struct page address. 38 * 39 * We need to convert ua to hpa in real mode. Make it 40 * simpler by storing physical address. 41 */ 42 union { 43 struct page **hpages; /* vmalloc'ed */ 44 phys_addr_t *hpas; 45 }; 46 #define MM_IOMMU_TABLE_INVALID_HPA ((uint64_t)-1) 47 u64 dev_hpa; /* Device memory base address */ 48 }; 49 50 bool mm_iommu_preregistered(struct mm_struct *mm) 51 { 52 return !list_empty(&mm->context.iommu_group_mem_list); 53 } 54 EXPORT_SYMBOL_GPL(mm_iommu_preregistered); 55 56 static long mm_iommu_do_alloc(struct mm_struct *mm, unsigned long ua, 57 unsigned long entries, unsigned long dev_hpa, 58 struct mm_iommu_table_group_mem_t **pmem) 59 { 60 struct mm_iommu_table_group_mem_t *mem, *mem2; 61 long i, ret, locked_entries = 0, pinned = 0; 62 unsigned int pageshift; 63 unsigned long entry, chunk; 64 65 if (dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { 66 ret = account_locked_vm(mm, entries, true); 67 if (ret) 68 return ret; 69 70 locked_entries = entries; 71 } 72 73 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 74 if (!mem) { 75 ret = -ENOMEM; 76 goto unlock_exit; 77 } 78 79 if (dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) { 80 mem->pageshift = __ffs(dev_hpa | (entries << PAGE_SHIFT)); 81 mem->dev_hpa = dev_hpa; 82 goto good_exit; 83 } 84 mem->dev_hpa = MM_IOMMU_TABLE_INVALID_HPA; 85 86 /* 87 * For a starting point for a maximum page size calculation 88 * we use @ua and @entries natural alignment to allow IOMMU pages 89 * smaller than huge pages but still bigger than PAGE_SIZE. 90 */ 91 mem->pageshift = __ffs(ua | (entries << PAGE_SHIFT)); 92 mem->hpas = vzalloc(array_size(entries, sizeof(mem->hpas[0]))); 93 if (!mem->hpas) { 94 kfree(mem); 95 ret = -ENOMEM; 96 goto unlock_exit; 97 } 98 99 mmap_read_lock(mm); 100 chunk = (1UL << (PAGE_SHIFT + MAX_ORDER - 1)) / 101 sizeof(struct vm_area_struct *); 102 chunk = min(chunk, entries); 103 for (entry = 0; entry < entries; entry += chunk) { 104 unsigned long n = min(entries - entry, chunk); 105 106 ret = pin_user_pages(ua + (entry << PAGE_SHIFT), n, 107 FOLL_WRITE | FOLL_LONGTERM, 108 mem->hpages + entry, NULL); 109 if (ret == n) { 110 pinned += n; 111 continue; 112 } 113 if (ret > 0) 114 pinned += ret; 115 break; 116 } 117 mmap_read_unlock(mm); 118 if (pinned != entries) { 119 if (!ret) 120 ret = -EFAULT; 121 goto free_exit; 122 } 123 124 good_exit: 125 atomic64_set(&mem->mapped, 1); 126 mem->used = 1; 127 mem->ua = ua; 128 mem->entries = entries; 129 130 mutex_lock(&mem_list_mutex); 131 132 list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next, 133 lockdep_is_held(&mem_list_mutex)) { 134 /* Overlap? */ 135 if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && 136 (ua < (mem2->ua + 137 (mem2->entries << PAGE_SHIFT)))) { 138 ret = -EINVAL; 139 mutex_unlock(&mem_list_mutex); 140 goto free_exit; 141 } 142 } 143 144 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) { 145 /* 146 * Allow to use larger than 64k IOMMU pages. Only do that 147 * if we are backed by hugetlb. Skip device memory as it is not 148 * backed with page structs. 149 */ 150 pageshift = PAGE_SHIFT; 151 for (i = 0; i < entries; ++i) { 152 struct page *page = mem->hpages[i]; 153 154 if ((mem->pageshift > PAGE_SHIFT) && PageHuge(page)) 155 pageshift = page_shift(compound_head(page)); 156 mem->pageshift = min(mem->pageshift, pageshift); 157 /* 158 * We don't need struct page reference any more, switch 159 * to physical address. 160 */ 161 mem->hpas[i] = page_to_pfn(page) << PAGE_SHIFT; 162 } 163 } 164 165 list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); 166 167 mutex_unlock(&mem_list_mutex); 168 169 *pmem = mem; 170 171 return 0; 172 173 free_exit: 174 /* free the references taken */ 175 unpin_user_pages(mem->hpages, pinned); 176 177 vfree(mem->hpas); 178 kfree(mem); 179 180 unlock_exit: 181 account_locked_vm(mm, locked_entries, false); 182 183 return ret; 184 } 185 186 long mm_iommu_new(struct mm_struct *mm, unsigned long ua, unsigned long entries, 187 struct mm_iommu_table_group_mem_t **pmem) 188 { 189 return mm_iommu_do_alloc(mm, ua, entries, MM_IOMMU_TABLE_INVALID_HPA, 190 pmem); 191 } 192 EXPORT_SYMBOL_GPL(mm_iommu_new); 193 194 long mm_iommu_newdev(struct mm_struct *mm, unsigned long ua, 195 unsigned long entries, unsigned long dev_hpa, 196 struct mm_iommu_table_group_mem_t **pmem) 197 { 198 return mm_iommu_do_alloc(mm, ua, entries, dev_hpa, pmem); 199 } 200 EXPORT_SYMBOL_GPL(mm_iommu_newdev); 201 202 static void mm_iommu_unpin(struct mm_iommu_table_group_mem_t *mem) 203 { 204 long i; 205 struct page *page = NULL; 206 207 if (!mem->hpas) 208 return; 209 210 for (i = 0; i < mem->entries; ++i) { 211 if (!mem->hpas[i]) 212 continue; 213 214 page = pfn_to_page(mem->hpas[i] >> PAGE_SHIFT); 215 if (!page) 216 continue; 217 218 if (mem->hpas[i] & MM_IOMMU_TABLE_GROUP_PAGE_DIRTY) 219 SetPageDirty(page); 220 221 unpin_user_page(page); 222 223 mem->hpas[i] = 0; 224 } 225 } 226 227 static void mm_iommu_do_free(struct mm_iommu_table_group_mem_t *mem) 228 { 229 230 mm_iommu_unpin(mem); 231 vfree(mem->hpas); 232 kfree(mem); 233 } 234 235 static void mm_iommu_free(struct rcu_head *head) 236 { 237 struct mm_iommu_table_group_mem_t *mem = container_of(head, 238 struct mm_iommu_table_group_mem_t, rcu); 239 240 mm_iommu_do_free(mem); 241 } 242 243 static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) 244 { 245 list_del_rcu(&mem->next); 246 call_rcu(&mem->rcu, mm_iommu_free); 247 } 248 249 long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) 250 { 251 long ret = 0; 252 unsigned long unlock_entries = 0; 253 254 mutex_lock(&mem_list_mutex); 255 256 if (mem->used == 0) { 257 ret = -ENOENT; 258 goto unlock_exit; 259 } 260 261 --mem->used; 262 /* There are still users, exit */ 263 if (mem->used) 264 goto unlock_exit; 265 266 /* Are there still mappings? */ 267 if (atomic64_cmpxchg(&mem->mapped, 1, 0) != 1) { 268 ++mem->used; 269 ret = -EBUSY; 270 goto unlock_exit; 271 } 272 273 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 274 unlock_entries = mem->entries; 275 276 /* @mapped became 0 so now mappings are disabled, release the region */ 277 mm_iommu_release(mem); 278 279 unlock_exit: 280 mutex_unlock(&mem_list_mutex); 281 282 account_locked_vm(mm, unlock_entries, false); 283 284 return ret; 285 } 286 EXPORT_SYMBOL_GPL(mm_iommu_put); 287 288 struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, 289 unsigned long ua, unsigned long size) 290 { 291 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 292 293 rcu_read_lock(); 294 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 295 if ((mem->ua <= ua) && 296 (ua + size <= mem->ua + 297 (mem->entries << PAGE_SHIFT))) { 298 ret = mem; 299 break; 300 } 301 } 302 rcu_read_unlock(); 303 304 return ret; 305 } 306 EXPORT_SYMBOL_GPL(mm_iommu_lookup); 307 308 struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm, 309 unsigned long ua, unsigned long size) 310 { 311 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 312 313 list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list, 314 next) { 315 if ((mem->ua <= ua) && 316 (ua + size <= mem->ua + 317 (mem->entries << PAGE_SHIFT))) { 318 ret = mem; 319 break; 320 } 321 } 322 323 return ret; 324 } 325 326 struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, 327 unsigned long ua, unsigned long entries) 328 { 329 struct mm_iommu_table_group_mem_t *mem, *ret = NULL; 330 331 mutex_lock(&mem_list_mutex); 332 333 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next, 334 lockdep_is_held(&mem_list_mutex)) { 335 if ((mem->ua == ua) && (mem->entries == entries)) { 336 ret = mem; 337 ++mem->used; 338 break; 339 } 340 } 341 342 mutex_unlock(&mem_list_mutex); 343 344 return ret; 345 } 346 EXPORT_SYMBOL_GPL(mm_iommu_get); 347 348 long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, 349 unsigned long ua, unsigned int pageshift, unsigned long *hpa) 350 { 351 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 352 u64 *va; 353 354 if (entry >= mem->entries) 355 return -EFAULT; 356 357 if (pageshift > mem->pageshift) 358 return -EFAULT; 359 360 if (!mem->hpas) { 361 *hpa = mem->dev_hpa + (ua - mem->ua); 362 return 0; 363 } 364 365 va = &mem->hpas[entry]; 366 *hpa = (*va & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); 367 368 return 0; 369 } 370 EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa); 371 372 long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem, 373 unsigned long ua, unsigned int pageshift, unsigned long *hpa) 374 { 375 const long entry = (ua - mem->ua) >> PAGE_SHIFT; 376 unsigned long *pa; 377 378 if (entry >= mem->entries) 379 return -EFAULT; 380 381 if (pageshift > mem->pageshift) 382 return -EFAULT; 383 384 if (!mem->hpas) { 385 *hpa = mem->dev_hpa + (ua - mem->ua); 386 return 0; 387 } 388 389 pa = (void *) vmalloc_to_phys(&mem->hpas[entry]); 390 if (!pa) 391 return -EFAULT; 392 393 *hpa = (*pa & MM_IOMMU_TABLE_GROUP_PAGE_MASK) | (ua & ~PAGE_MASK); 394 395 return 0; 396 } 397 398 extern void mm_iommu_ua_mark_dirty_rm(struct mm_struct *mm, unsigned long ua) 399 { 400 struct mm_iommu_table_group_mem_t *mem; 401 long entry; 402 void *va; 403 unsigned long *pa; 404 405 mem = mm_iommu_lookup_rm(mm, ua, PAGE_SIZE); 406 if (!mem) 407 return; 408 409 if (mem->dev_hpa != MM_IOMMU_TABLE_INVALID_HPA) 410 return; 411 412 entry = (ua - mem->ua) >> PAGE_SHIFT; 413 va = &mem->hpas[entry]; 414 415 pa = (void *) vmalloc_to_phys(va); 416 if (!pa) 417 return; 418 419 *pa |= MM_IOMMU_TABLE_GROUP_PAGE_DIRTY; 420 } 421 422 bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, 423 unsigned int pageshift, unsigned long *size) 424 { 425 struct mm_iommu_table_group_mem_t *mem; 426 unsigned long end; 427 428 rcu_read_lock(); 429 list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { 430 if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) 431 continue; 432 433 end = mem->dev_hpa + (mem->entries << PAGE_SHIFT); 434 if ((mem->dev_hpa <= hpa) && (hpa < end)) { 435 /* 436 * Since the IOMMU page size might be bigger than 437 * PAGE_SIZE, the amount of preregistered memory 438 * starting from @hpa might be smaller than 1<<pageshift 439 * and the caller needs to distinguish this situation. 440 */ 441 *size = min(1UL << pageshift, end - hpa); 442 return true; 443 } 444 } 445 rcu_read_unlock(); 446 447 return false; 448 } 449 EXPORT_SYMBOL_GPL(mm_iommu_is_devmem); 450 451 long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem) 452 { 453 if (atomic64_inc_not_zero(&mem->mapped)) 454 return 0; 455 456 /* Last mm_iommu_put() has been called, no more mappings allowed() */ 457 return -ENXIO; 458 } 459 EXPORT_SYMBOL_GPL(mm_iommu_mapped_inc); 460 461 void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) 462 { 463 atomic64_add_unless(&mem->mapped, -1, 1); 464 } 465 EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); 466 467 void mm_iommu_init(struct mm_struct *mm) 468 { 469 INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); 470 } 471