1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2006-2009, Intel Corporation. 4 * 5 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 6 */ 7 8 #include <linux/iova.h> 9 #include <linux/module.h> 10 #include <linux/slab.h> 11 #include <linux/smp.h> 12 #include <linux/bitops.h> 13 #include <linux/cpu.h> 14 15 /* The anchor node sits above the top of the usable address space */ 16 #define IOVA_ANCHOR ~0UL 17 18 static bool iova_rcache_insert(struct iova_domain *iovad, 19 unsigned long pfn, 20 unsigned long size); 21 static unsigned long iova_rcache_get(struct iova_domain *iovad, 22 unsigned long size, 23 unsigned long limit_pfn); 24 static void init_iova_rcaches(struct iova_domain *iovad); 25 static void free_iova_rcaches(struct iova_domain *iovad); 26 static void fq_destroy_all_entries(struct iova_domain *iovad); 27 static void fq_flush_timeout(struct timer_list *t); 28 29 void 30 init_iova_domain(struct iova_domain *iovad, unsigned long granule, 31 unsigned long start_pfn) 32 { 33 /* 34 * IOVA granularity will normally be equal to the smallest 35 * supported IOMMU page size; both *must* be capable of 36 * representing individual CPU pages exactly. 37 */ 38 BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); 39 40 spin_lock_init(&iovad->iova_rbtree_lock); 41 iovad->rbroot = RB_ROOT; 42 iovad->cached_node = &iovad->anchor.node; 43 iovad->cached32_node = &iovad->anchor.node; 44 iovad->granule = granule; 45 iovad->start_pfn = start_pfn; 46 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); 47 iovad->max32_alloc_size = iovad->dma_32bit_pfn; 48 iovad->flush_cb = NULL; 49 iovad->fq = NULL; 50 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; 51 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); 52 rb_insert_color(&iovad->anchor.node, &iovad->rbroot); 53 init_iova_rcaches(iovad); 54 } 55 EXPORT_SYMBOL_GPL(init_iova_domain); 56 57 bool has_iova_flush_queue(struct iova_domain *iovad) 58 { 59 return !!iovad->fq; 60 } 61 62 static void free_iova_flush_queue(struct iova_domain *iovad) 63 { 64 if (!has_iova_flush_queue(iovad)) 65 return; 66 67 if (timer_pending(&iovad->fq_timer)) 68 del_timer(&iovad->fq_timer); 69 70 fq_destroy_all_entries(iovad); 71 72 free_percpu(iovad->fq); 73 74 iovad->fq = NULL; 75 iovad->flush_cb = NULL; 76 iovad->entry_dtor = NULL; 77 } 78 79 int init_iova_flush_queue(struct iova_domain *iovad, 80 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) 81 { 82 struct iova_fq __percpu *queue; 83 int cpu; 84 85 atomic64_set(&iovad->fq_flush_start_cnt, 0); 86 atomic64_set(&iovad->fq_flush_finish_cnt, 0); 87 88 queue = alloc_percpu(struct iova_fq); 89 if (!queue) 90 return -ENOMEM; 91 92 iovad->flush_cb = flush_cb; 93 iovad->entry_dtor = entry_dtor; 94 95 for_each_possible_cpu(cpu) { 96 struct iova_fq *fq; 97 98 fq = per_cpu_ptr(queue, cpu); 99 fq->head = 0; 100 fq->tail = 0; 101 102 spin_lock_init(&fq->lock); 103 } 104 105 smp_wmb(); 106 107 iovad->fq = queue; 108 109 timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); 110 atomic_set(&iovad->fq_timer_on, 0); 111 112 return 0; 113 } 114 EXPORT_SYMBOL_GPL(init_iova_flush_queue); 115 116 static struct rb_node * 117 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) 118 { 119 if (limit_pfn <= iovad->dma_32bit_pfn) 120 return iovad->cached32_node; 121 122 return iovad->cached_node; 123 } 124 125 static void 126 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) 127 { 128 if (new->pfn_hi < iovad->dma_32bit_pfn) 129 iovad->cached32_node = &new->node; 130 else 131 iovad->cached_node = &new->node; 132 } 133 134 static void 135 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) 136 { 137 struct iova *cached_iova; 138 139 cached_iova = rb_entry(iovad->cached32_node, struct iova, node); 140 if (free == cached_iova || 141 (free->pfn_hi < iovad->dma_32bit_pfn && 142 free->pfn_lo >= cached_iova->pfn_lo)) { 143 iovad->cached32_node = rb_next(&free->node); 144 iovad->max32_alloc_size = iovad->dma_32bit_pfn; 145 } 146 147 cached_iova = rb_entry(iovad->cached_node, struct iova, node); 148 if (free->pfn_lo >= cached_iova->pfn_lo) 149 iovad->cached_node = rb_next(&free->node); 150 } 151 152 /* Insert the iova into domain rbtree by holding writer lock */ 153 static void 154 iova_insert_rbtree(struct rb_root *root, struct iova *iova, 155 struct rb_node *start) 156 { 157 struct rb_node **new, *parent = NULL; 158 159 new = (start) ? &start : &(root->rb_node); 160 /* Figure out where to put new node */ 161 while (*new) { 162 struct iova *this = rb_entry(*new, struct iova, node); 163 164 parent = *new; 165 166 if (iova->pfn_lo < this->pfn_lo) 167 new = &((*new)->rb_left); 168 else if (iova->pfn_lo > this->pfn_lo) 169 new = &((*new)->rb_right); 170 else { 171 WARN_ON(1); /* this should not happen */ 172 return; 173 } 174 } 175 /* Add new node and rebalance tree. */ 176 rb_link_node(&iova->node, parent, new); 177 rb_insert_color(&iova->node, root); 178 } 179 180 static int __alloc_and_insert_iova_range(struct iova_domain *iovad, 181 unsigned long size, unsigned long limit_pfn, 182 struct iova *new, bool size_aligned) 183 { 184 struct rb_node *curr, *prev; 185 struct iova *curr_iova; 186 unsigned long flags; 187 unsigned long new_pfn; 188 unsigned long align_mask = ~0UL; 189 190 if (size_aligned) 191 align_mask <<= fls_long(size - 1); 192 193 /* Walk the tree backwards */ 194 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 195 if (limit_pfn <= iovad->dma_32bit_pfn && 196 size >= iovad->max32_alloc_size) 197 goto iova32_full; 198 199 curr = __get_cached_rbnode(iovad, limit_pfn); 200 curr_iova = rb_entry(curr, struct iova, node); 201 do { 202 limit_pfn = min(limit_pfn, curr_iova->pfn_lo); 203 new_pfn = (limit_pfn - size) & align_mask; 204 prev = curr; 205 curr = rb_prev(curr); 206 curr_iova = rb_entry(curr, struct iova, node); 207 } while (curr && new_pfn <= curr_iova->pfn_hi); 208 209 if (limit_pfn < size || new_pfn < iovad->start_pfn) { 210 iovad->max32_alloc_size = size; 211 goto iova32_full; 212 } 213 214 /* pfn_lo will point to size aligned address if size_aligned is set */ 215 new->pfn_lo = new_pfn; 216 new->pfn_hi = new->pfn_lo + size - 1; 217 218 /* If we have 'prev', it's a valid place to start the insertion. */ 219 iova_insert_rbtree(&iovad->rbroot, new, prev); 220 __cached_rbnode_insert_update(iovad, new); 221 222 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 223 return 0; 224 225 iova32_full: 226 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 227 return -ENOMEM; 228 } 229 230 static struct kmem_cache *iova_cache; 231 static unsigned int iova_cache_users; 232 static DEFINE_MUTEX(iova_cache_mutex); 233 234 struct iova *alloc_iova_mem(void) 235 { 236 return kmem_cache_alloc(iova_cache, GFP_ATOMIC); 237 } 238 EXPORT_SYMBOL(alloc_iova_mem); 239 240 void free_iova_mem(struct iova *iova) 241 { 242 if (iova->pfn_lo != IOVA_ANCHOR) 243 kmem_cache_free(iova_cache, iova); 244 } 245 EXPORT_SYMBOL(free_iova_mem); 246 247 int iova_cache_get(void) 248 { 249 mutex_lock(&iova_cache_mutex); 250 if (!iova_cache_users) { 251 iova_cache = kmem_cache_create( 252 "iommu_iova", sizeof(struct iova), 0, 253 SLAB_HWCACHE_ALIGN, NULL); 254 if (!iova_cache) { 255 mutex_unlock(&iova_cache_mutex); 256 printk(KERN_ERR "Couldn't create iova cache\n"); 257 return -ENOMEM; 258 } 259 } 260 261 iova_cache_users++; 262 mutex_unlock(&iova_cache_mutex); 263 264 return 0; 265 } 266 EXPORT_SYMBOL_GPL(iova_cache_get); 267 268 void iova_cache_put(void) 269 { 270 mutex_lock(&iova_cache_mutex); 271 if (WARN_ON(!iova_cache_users)) { 272 mutex_unlock(&iova_cache_mutex); 273 return; 274 } 275 iova_cache_users--; 276 if (!iova_cache_users) 277 kmem_cache_destroy(iova_cache); 278 mutex_unlock(&iova_cache_mutex); 279 } 280 EXPORT_SYMBOL_GPL(iova_cache_put); 281 282 /** 283 * alloc_iova - allocates an iova 284 * @iovad: - iova domain in question 285 * @size: - size of page frames to allocate 286 * @limit_pfn: - max limit address 287 * @size_aligned: - set if size_aligned address range is required 288 * This function allocates an iova in the range iovad->start_pfn to limit_pfn, 289 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned 290 * flag is set then the allocated address iova->pfn_lo will be naturally 291 * aligned on roundup_power_of_two(size). 292 */ 293 struct iova * 294 alloc_iova(struct iova_domain *iovad, unsigned long size, 295 unsigned long limit_pfn, 296 bool size_aligned) 297 { 298 struct iova *new_iova; 299 int ret; 300 301 new_iova = alloc_iova_mem(); 302 if (!new_iova) 303 return NULL; 304 305 ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, 306 new_iova, size_aligned); 307 308 if (ret) { 309 free_iova_mem(new_iova); 310 return NULL; 311 } 312 313 return new_iova; 314 } 315 EXPORT_SYMBOL_GPL(alloc_iova); 316 317 static struct iova * 318 private_find_iova(struct iova_domain *iovad, unsigned long pfn) 319 { 320 struct rb_node *node = iovad->rbroot.rb_node; 321 322 assert_spin_locked(&iovad->iova_rbtree_lock); 323 324 while (node) { 325 struct iova *iova = rb_entry(node, struct iova, node); 326 327 if (pfn < iova->pfn_lo) 328 node = node->rb_left; 329 else if (pfn > iova->pfn_hi) 330 node = node->rb_right; 331 else 332 return iova; /* pfn falls within iova's range */ 333 } 334 335 return NULL; 336 } 337 338 static void private_free_iova(struct iova_domain *iovad, struct iova *iova) 339 { 340 assert_spin_locked(&iovad->iova_rbtree_lock); 341 __cached_rbnode_delete_update(iovad, iova); 342 rb_erase(&iova->node, &iovad->rbroot); 343 free_iova_mem(iova); 344 } 345 346 /** 347 * find_iova - finds an iova for a given pfn 348 * @iovad: - iova domain in question. 349 * @pfn: - page frame number 350 * This function finds and returns an iova belonging to the 351 * given doamin which matches the given pfn. 352 */ 353 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) 354 { 355 unsigned long flags; 356 struct iova *iova; 357 358 /* Take the lock so that no other thread is manipulating the rbtree */ 359 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 360 iova = private_find_iova(iovad, pfn); 361 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 362 return iova; 363 } 364 EXPORT_SYMBOL_GPL(find_iova); 365 366 /** 367 * __free_iova - frees the given iova 368 * @iovad: iova domain in question. 369 * @iova: iova in question. 370 * Frees the given iova belonging to the giving domain 371 */ 372 void 373 __free_iova(struct iova_domain *iovad, struct iova *iova) 374 { 375 unsigned long flags; 376 377 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 378 private_free_iova(iovad, iova); 379 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 380 } 381 EXPORT_SYMBOL_GPL(__free_iova); 382 383 /** 384 * free_iova - finds and frees the iova for a given pfn 385 * @iovad: - iova domain in question. 386 * @pfn: - pfn that is allocated previously 387 * This functions finds an iova for a given pfn and then 388 * frees the iova from that domain. 389 */ 390 void 391 free_iova(struct iova_domain *iovad, unsigned long pfn) 392 { 393 struct iova *iova = find_iova(iovad, pfn); 394 395 if (iova) 396 __free_iova(iovad, iova); 397 398 } 399 EXPORT_SYMBOL_GPL(free_iova); 400 401 /** 402 * alloc_iova_fast - allocates an iova from rcache 403 * @iovad: - iova domain in question 404 * @size: - size of page frames to allocate 405 * @limit_pfn: - max limit address 406 * @flush_rcache: - set to flush rcache on regular allocation failure 407 * This function tries to satisfy an iova allocation from the rcache, 408 * and falls back to regular allocation on failure. If regular allocation 409 * fails too and the flush_rcache flag is set then the rcache will be flushed. 410 */ 411 unsigned long 412 alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 413 unsigned long limit_pfn, bool flush_rcache) 414 { 415 unsigned long iova_pfn; 416 struct iova *new_iova; 417 418 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); 419 if (iova_pfn) 420 return iova_pfn; 421 422 retry: 423 new_iova = alloc_iova(iovad, size, limit_pfn, true); 424 if (!new_iova) { 425 unsigned int cpu; 426 427 if (!flush_rcache) 428 return 0; 429 430 /* Try replenishing IOVAs by flushing rcache. */ 431 flush_rcache = false; 432 for_each_online_cpu(cpu) 433 free_cpu_cached_iovas(cpu, iovad); 434 goto retry; 435 } 436 437 return new_iova->pfn_lo; 438 } 439 EXPORT_SYMBOL_GPL(alloc_iova_fast); 440 441 /** 442 * free_iova_fast - free iova pfn range into rcache 443 * @iovad: - iova domain in question. 444 * @pfn: - pfn that is allocated previously 445 * @size: - # of pages in range 446 * This functions frees an iova range by trying to put it into the rcache, 447 * falling back to regular iova deallocation via free_iova() if this fails. 448 */ 449 void 450 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) 451 { 452 if (iova_rcache_insert(iovad, pfn, size)) 453 return; 454 455 free_iova(iovad, pfn); 456 } 457 EXPORT_SYMBOL_GPL(free_iova_fast); 458 459 #define fq_ring_for_each(i, fq) \ 460 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 461 462 static inline bool fq_full(struct iova_fq *fq) 463 { 464 assert_spin_locked(&fq->lock); 465 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 466 } 467 468 static inline unsigned fq_ring_add(struct iova_fq *fq) 469 { 470 unsigned idx = fq->tail; 471 472 assert_spin_locked(&fq->lock); 473 474 fq->tail = (idx + 1) % IOVA_FQ_SIZE; 475 476 return idx; 477 } 478 479 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) 480 { 481 u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); 482 unsigned idx; 483 484 assert_spin_locked(&fq->lock); 485 486 fq_ring_for_each(idx, fq) { 487 488 if (fq->entries[idx].counter >= counter) 489 break; 490 491 if (iovad->entry_dtor) 492 iovad->entry_dtor(fq->entries[idx].data); 493 494 free_iova_fast(iovad, 495 fq->entries[idx].iova_pfn, 496 fq->entries[idx].pages); 497 498 fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 499 } 500 } 501 502 static void iova_domain_flush(struct iova_domain *iovad) 503 { 504 atomic64_inc(&iovad->fq_flush_start_cnt); 505 iovad->flush_cb(iovad); 506 atomic64_inc(&iovad->fq_flush_finish_cnt); 507 } 508 509 static void fq_destroy_all_entries(struct iova_domain *iovad) 510 { 511 int cpu; 512 513 /* 514 * This code runs when the iova_domain is being detroyed, so don't 515 * bother to free iovas, just call the entry_dtor on all remaining 516 * entries. 517 */ 518 if (!iovad->entry_dtor) 519 return; 520 521 for_each_possible_cpu(cpu) { 522 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); 523 int idx; 524 525 fq_ring_for_each(idx, fq) 526 iovad->entry_dtor(fq->entries[idx].data); 527 } 528 } 529 530 static void fq_flush_timeout(struct timer_list *t) 531 { 532 struct iova_domain *iovad = from_timer(iovad, t, fq_timer); 533 int cpu; 534 535 atomic_set(&iovad->fq_timer_on, 0); 536 iova_domain_flush(iovad); 537 538 for_each_possible_cpu(cpu) { 539 unsigned long flags; 540 struct iova_fq *fq; 541 542 fq = per_cpu_ptr(iovad->fq, cpu); 543 spin_lock_irqsave(&fq->lock, flags); 544 fq_ring_free(iovad, fq); 545 spin_unlock_irqrestore(&fq->lock, flags); 546 } 547 } 548 549 void queue_iova(struct iova_domain *iovad, 550 unsigned long pfn, unsigned long pages, 551 unsigned long data) 552 { 553 struct iova_fq *fq = raw_cpu_ptr(iovad->fq); 554 unsigned long flags; 555 unsigned idx; 556 557 spin_lock_irqsave(&fq->lock, flags); 558 559 /* 560 * First remove all entries from the flush queue that have already been 561 * flushed out on another CPU. This makes the fq_full() check below less 562 * likely to be true. 563 */ 564 fq_ring_free(iovad, fq); 565 566 if (fq_full(fq)) { 567 iova_domain_flush(iovad); 568 fq_ring_free(iovad, fq); 569 } 570 571 idx = fq_ring_add(fq); 572 573 fq->entries[idx].iova_pfn = pfn; 574 fq->entries[idx].pages = pages; 575 fq->entries[idx].data = data; 576 fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); 577 578 spin_unlock_irqrestore(&fq->lock, flags); 579 580 if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) 581 mod_timer(&iovad->fq_timer, 582 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 583 } 584 EXPORT_SYMBOL_GPL(queue_iova); 585 586 /** 587 * put_iova_domain - destroys the iova doamin 588 * @iovad: - iova domain in question. 589 * All the iova's in that domain are destroyed. 590 */ 591 void put_iova_domain(struct iova_domain *iovad) 592 { 593 struct iova *iova, *tmp; 594 595 free_iova_flush_queue(iovad); 596 free_iova_rcaches(iovad); 597 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) 598 free_iova_mem(iova); 599 } 600 EXPORT_SYMBOL_GPL(put_iova_domain); 601 602 static int 603 __is_range_overlap(struct rb_node *node, 604 unsigned long pfn_lo, unsigned long pfn_hi) 605 { 606 struct iova *iova = rb_entry(node, struct iova, node); 607 608 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) 609 return 1; 610 return 0; 611 } 612 613 static inline struct iova * 614 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) 615 { 616 struct iova *iova; 617 618 iova = alloc_iova_mem(); 619 if (iova) { 620 iova->pfn_lo = pfn_lo; 621 iova->pfn_hi = pfn_hi; 622 } 623 624 return iova; 625 } 626 627 static struct iova * 628 __insert_new_range(struct iova_domain *iovad, 629 unsigned long pfn_lo, unsigned long pfn_hi) 630 { 631 struct iova *iova; 632 633 iova = alloc_and_init_iova(pfn_lo, pfn_hi); 634 if (iova) 635 iova_insert_rbtree(&iovad->rbroot, iova, NULL); 636 637 return iova; 638 } 639 640 static void 641 __adjust_overlap_range(struct iova *iova, 642 unsigned long *pfn_lo, unsigned long *pfn_hi) 643 { 644 if (*pfn_lo < iova->pfn_lo) 645 iova->pfn_lo = *pfn_lo; 646 if (*pfn_hi > iova->pfn_hi) 647 *pfn_lo = iova->pfn_hi + 1; 648 } 649 650 /** 651 * reserve_iova - reserves an iova in the given range 652 * @iovad: - iova domain pointer 653 * @pfn_lo: - lower page frame address 654 * @pfn_hi:- higher pfn adderss 655 * This function allocates reserves the address range from pfn_lo to pfn_hi so 656 * that this address is not dished out as part of alloc_iova. 657 */ 658 struct iova * 659 reserve_iova(struct iova_domain *iovad, 660 unsigned long pfn_lo, unsigned long pfn_hi) 661 { 662 struct rb_node *node; 663 unsigned long flags; 664 struct iova *iova; 665 unsigned int overlap = 0; 666 667 /* Don't allow nonsensical pfns */ 668 if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) 669 return NULL; 670 671 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 672 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { 673 if (__is_range_overlap(node, pfn_lo, pfn_hi)) { 674 iova = rb_entry(node, struct iova, node); 675 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); 676 if ((pfn_lo >= iova->pfn_lo) && 677 (pfn_hi <= iova->pfn_hi)) 678 goto finish; 679 overlap = 1; 680 681 } else if (overlap) 682 break; 683 } 684 685 /* We are here either because this is the first reserver node 686 * or need to insert remaining non overlap addr range 687 */ 688 iova = __insert_new_range(iovad, pfn_lo, pfn_hi); 689 finish: 690 691 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 692 return iova; 693 } 694 EXPORT_SYMBOL_GPL(reserve_iova); 695 696 /** 697 * copy_reserved_iova - copies the reserved between domains 698 * @from: - source doamin from where to copy 699 * @to: - destination domin where to copy 700 * This function copies reserved iova's from one doamin to 701 * other. 702 */ 703 void 704 copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) 705 { 706 unsigned long flags; 707 struct rb_node *node; 708 709 spin_lock_irqsave(&from->iova_rbtree_lock, flags); 710 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { 711 struct iova *iova = rb_entry(node, struct iova, node); 712 struct iova *new_iova; 713 714 if (iova->pfn_lo == IOVA_ANCHOR) 715 continue; 716 717 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 718 if (!new_iova) 719 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", 720 iova->pfn_lo, iova->pfn_lo); 721 } 722 spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); 723 } 724 EXPORT_SYMBOL_GPL(copy_reserved_iova); 725 726 struct iova * 727 split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, 728 unsigned long pfn_lo, unsigned long pfn_hi) 729 { 730 unsigned long flags; 731 struct iova *prev = NULL, *next = NULL; 732 733 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 734 if (iova->pfn_lo < pfn_lo) { 735 prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); 736 if (prev == NULL) 737 goto error; 738 } 739 if (iova->pfn_hi > pfn_hi) { 740 next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); 741 if (next == NULL) 742 goto error; 743 } 744 745 __cached_rbnode_delete_update(iovad, iova); 746 rb_erase(&iova->node, &iovad->rbroot); 747 748 if (prev) { 749 iova_insert_rbtree(&iovad->rbroot, prev, NULL); 750 iova->pfn_lo = pfn_lo; 751 } 752 if (next) { 753 iova_insert_rbtree(&iovad->rbroot, next, NULL); 754 iova->pfn_hi = pfn_hi; 755 } 756 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 757 758 return iova; 759 760 error: 761 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 762 if (prev) 763 free_iova_mem(prev); 764 return NULL; 765 } 766 767 /* 768 * Magazine caches for IOVA ranges. For an introduction to magazines, 769 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab 770 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. 771 * For simplicity, we use a static magazine size and don't implement the 772 * dynamic size tuning described in the paper. 773 */ 774 775 #define IOVA_MAG_SIZE 128 776 777 struct iova_magazine { 778 unsigned long size; 779 unsigned long pfns[IOVA_MAG_SIZE]; 780 }; 781 782 struct iova_cpu_rcache { 783 spinlock_t lock; 784 struct iova_magazine *loaded; 785 struct iova_magazine *prev; 786 }; 787 788 static struct iova_magazine *iova_magazine_alloc(gfp_t flags) 789 { 790 return kzalloc(sizeof(struct iova_magazine), flags); 791 } 792 793 static void iova_magazine_free(struct iova_magazine *mag) 794 { 795 kfree(mag); 796 } 797 798 static void 799 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) 800 { 801 unsigned long flags; 802 int i; 803 804 if (!mag) 805 return; 806 807 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 808 809 for (i = 0 ; i < mag->size; ++i) { 810 struct iova *iova = private_find_iova(iovad, mag->pfns[i]); 811 812 BUG_ON(!iova); 813 private_free_iova(iovad, iova); 814 } 815 816 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 817 818 mag->size = 0; 819 } 820 821 static bool iova_magazine_full(struct iova_magazine *mag) 822 { 823 return (mag && mag->size == IOVA_MAG_SIZE); 824 } 825 826 static bool iova_magazine_empty(struct iova_magazine *mag) 827 { 828 return (!mag || mag->size == 0); 829 } 830 831 static unsigned long iova_magazine_pop(struct iova_magazine *mag, 832 unsigned long limit_pfn) 833 { 834 int i; 835 unsigned long pfn; 836 837 BUG_ON(iova_magazine_empty(mag)); 838 839 /* Only fall back to the rbtree if we have no suitable pfns at all */ 840 for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) 841 if (i == 0) 842 return 0; 843 844 /* Swap it to pop it */ 845 pfn = mag->pfns[i]; 846 mag->pfns[i] = mag->pfns[--mag->size]; 847 848 return pfn; 849 } 850 851 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) 852 { 853 BUG_ON(iova_magazine_full(mag)); 854 855 mag->pfns[mag->size++] = pfn; 856 } 857 858 static void init_iova_rcaches(struct iova_domain *iovad) 859 { 860 struct iova_cpu_rcache *cpu_rcache; 861 struct iova_rcache *rcache; 862 unsigned int cpu; 863 int i; 864 865 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 866 rcache = &iovad->rcaches[i]; 867 spin_lock_init(&rcache->lock); 868 rcache->depot_size = 0; 869 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); 870 if (WARN_ON(!rcache->cpu_rcaches)) 871 continue; 872 for_each_possible_cpu(cpu) { 873 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 874 spin_lock_init(&cpu_rcache->lock); 875 cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); 876 cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); 877 } 878 } 879 } 880 881 /* 882 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and 883 * return true on success. Can fail if rcache is full and we can't free 884 * space, and free_iova() (our only caller) will then return the IOVA 885 * range to the rbtree instead. 886 */ 887 static bool __iova_rcache_insert(struct iova_domain *iovad, 888 struct iova_rcache *rcache, 889 unsigned long iova_pfn) 890 { 891 struct iova_magazine *mag_to_free = NULL; 892 struct iova_cpu_rcache *cpu_rcache; 893 bool can_insert = false; 894 unsigned long flags; 895 896 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 897 spin_lock_irqsave(&cpu_rcache->lock, flags); 898 899 if (!iova_magazine_full(cpu_rcache->loaded)) { 900 can_insert = true; 901 } else if (!iova_magazine_full(cpu_rcache->prev)) { 902 swap(cpu_rcache->prev, cpu_rcache->loaded); 903 can_insert = true; 904 } else { 905 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); 906 907 if (new_mag) { 908 spin_lock(&rcache->lock); 909 if (rcache->depot_size < MAX_GLOBAL_MAGS) { 910 rcache->depot[rcache->depot_size++] = 911 cpu_rcache->loaded; 912 } else { 913 mag_to_free = cpu_rcache->loaded; 914 } 915 spin_unlock(&rcache->lock); 916 917 cpu_rcache->loaded = new_mag; 918 can_insert = true; 919 } 920 } 921 922 if (can_insert) 923 iova_magazine_push(cpu_rcache->loaded, iova_pfn); 924 925 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 926 927 if (mag_to_free) { 928 iova_magazine_free_pfns(mag_to_free, iovad); 929 iova_magazine_free(mag_to_free); 930 } 931 932 return can_insert; 933 } 934 935 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, 936 unsigned long size) 937 { 938 unsigned int log_size = order_base_2(size); 939 940 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 941 return false; 942 943 return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn); 944 } 945 946 /* 947 * Caller wants to allocate a new IOVA range from 'rcache'. If we can 948 * satisfy the request, return a matching non-NULL range and remove 949 * it from the 'rcache'. 950 */ 951 static unsigned long __iova_rcache_get(struct iova_rcache *rcache, 952 unsigned long limit_pfn) 953 { 954 struct iova_cpu_rcache *cpu_rcache; 955 unsigned long iova_pfn = 0; 956 bool has_pfn = false; 957 unsigned long flags; 958 959 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 960 spin_lock_irqsave(&cpu_rcache->lock, flags); 961 962 if (!iova_magazine_empty(cpu_rcache->loaded)) { 963 has_pfn = true; 964 } else if (!iova_magazine_empty(cpu_rcache->prev)) { 965 swap(cpu_rcache->prev, cpu_rcache->loaded); 966 has_pfn = true; 967 } else { 968 spin_lock(&rcache->lock); 969 if (rcache->depot_size > 0) { 970 iova_magazine_free(cpu_rcache->loaded); 971 cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; 972 has_pfn = true; 973 } 974 spin_unlock(&rcache->lock); 975 } 976 977 if (has_pfn) 978 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); 979 980 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 981 982 return iova_pfn; 983 } 984 985 /* 986 * Try to satisfy IOVA allocation range from rcache. Fail if requested 987 * size is too big or the DMA limit we are given isn't satisfied by the 988 * top element in the magazine. 989 */ 990 static unsigned long iova_rcache_get(struct iova_domain *iovad, 991 unsigned long size, 992 unsigned long limit_pfn) 993 { 994 unsigned int log_size = order_base_2(size); 995 996 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 997 return 0; 998 999 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); 1000 } 1001 1002 /* 1003 * free rcache data structures. 1004 */ 1005 static void free_iova_rcaches(struct iova_domain *iovad) 1006 { 1007 struct iova_rcache *rcache; 1008 struct iova_cpu_rcache *cpu_rcache; 1009 unsigned int cpu; 1010 int i, j; 1011 1012 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1013 rcache = &iovad->rcaches[i]; 1014 for_each_possible_cpu(cpu) { 1015 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1016 iova_magazine_free(cpu_rcache->loaded); 1017 iova_magazine_free(cpu_rcache->prev); 1018 } 1019 free_percpu(rcache->cpu_rcaches); 1020 for (j = 0; j < rcache->depot_size; ++j) 1021 iova_magazine_free(rcache->depot[j]); 1022 } 1023 } 1024 1025 /* 1026 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) 1027 */ 1028 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) 1029 { 1030 struct iova_cpu_rcache *cpu_rcache; 1031 struct iova_rcache *rcache; 1032 unsigned long flags; 1033 int i; 1034 1035 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1036 rcache = &iovad->rcaches[i]; 1037 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1038 spin_lock_irqsave(&cpu_rcache->lock, flags); 1039 iova_magazine_free_pfns(cpu_rcache->loaded, iovad); 1040 iova_magazine_free_pfns(cpu_rcache->prev, iovad); 1041 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 1042 } 1043 } 1044 1045 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>"); 1046 MODULE_LICENSE("GPL"); 1047