1 /* 2 * Copyright © 2006-2009, Intel Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 18 */ 19 20 #include <linux/iova.h> 21 #include <linux/module.h> 22 #include <linux/slab.h> 23 #include <linux/smp.h> 24 #include <linux/bitops.h> 25 #include <linux/cpu.h> 26 27 /* The anchor node sits above the top of the usable address space */ 28 #define IOVA_ANCHOR ~0UL 29 30 static bool iova_rcache_insert(struct iova_domain *iovad, 31 unsigned long pfn, 32 unsigned long size); 33 static unsigned long iova_rcache_get(struct iova_domain *iovad, 34 unsigned long size, 35 unsigned long limit_pfn); 36 static void init_iova_rcaches(struct iova_domain *iovad); 37 static void free_iova_rcaches(struct iova_domain *iovad); 38 static void fq_destroy_all_entries(struct iova_domain *iovad); 39 static void fq_flush_timeout(struct timer_list *t); 40 41 void 42 init_iova_domain(struct iova_domain *iovad, unsigned long granule, 43 unsigned long start_pfn) 44 { 45 /* 46 * IOVA granularity will normally be equal to the smallest 47 * supported IOMMU page size; both *must* be capable of 48 * representing individual CPU pages exactly. 49 */ 50 BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); 51 52 spin_lock_init(&iovad->iova_rbtree_lock); 53 iovad->rbroot = RB_ROOT; 54 iovad->cached_node = &iovad->anchor.node; 55 iovad->cached32_node = &iovad->anchor.node; 56 iovad->granule = granule; 57 iovad->start_pfn = start_pfn; 58 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); 59 iovad->flush_cb = NULL; 60 iovad->fq = NULL; 61 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; 62 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); 63 rb_insert_color(&iovad->anchor.node, &iovad->rbroot); 64 init_iova_rcaches(iovad); 65 } 66 EXPORT_SYMBOL_GPL(init_iova_domain); 67 68 static void free_iova_flush_queue(struct iova_domain *iovad) 69 { 70 if (!iovad->fq) 71 return; 72 73 if (timer_pending(&iovad->fq_timer)) 74 del_timer(&iovad->fq_timer); 75 76 fq_destroy_all_entries(iovad); 77 78 free_percpu(iovad->fq); 79 80 iovad->fq = NULL; 81 iovad->flush_cb = NULL; 82 iovad->entry_dtor = NULL; 83 } 84 85 int init_iova_flush_queue(struct iova_domain *iovad, 86 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) 87 { 88 int cpu; 89 90 atomic64_set(&iovad->fq_flush_start_cnt, 0); 91 atomic64_set(&iovad->fq_flush_finish_cnt, 0); 92 93 iovad->fq = alloc_percpu(struct iova_fq); 94 if (!iovad->fq) 95 return -ENOMEM; 96 97 iovad->flush_cb = flush_cb; 98 iovad->entry_dtor = entry_dtor; 99 100 for_each_possible_cpu(cpu) { 101 struct iova_fq *fq; 102 103 fq = per_cpu_ptr(iovad->fq, cpu); 104 fq->head = 0; 105 fq->tail = 0; 106 107 spin_lock_init(&fq->lock); 108 } 109 110 timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); 111 atomic_set(&iovad->fq_timer_on, 0); 112 113 return 0; 114 } 115 EXPORT_SYMBOL_GPL(init_iova_flush_queue); 116 117 static struct rb_node * 118 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) 119 { 120 if (limit_pfn <= iovad->dma_32bit_pfn) 121 return iovad->cached32_node; 122 123 return iovad->cached_node; 124 } 125 126 static void 127 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) 128 { 129 if (new->pfn_hi < iovad->dma_32bit_pfn) 130 iovad->cached32_node = &new->node; 131 else 132 iovad->cached_node = &new->node; 133 } 134 135 static void 136 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) 137 { 138 struct iova *cached_iova; 139 140 cached_iova = rb_entry(iovad->cached32_node, struct iova, node); 141 if (free->pfn_hi < iovad->dma_32bit_pfn && 142 free->pfn_lo >= cached_iova->pfn_lo) 143 iovad->cached32_node = rb_next(&free->node); 144 145 cached_iova = rb_entry(iovad->cached_node, struct iova, node); 146 if (free->pfn_lo >= cached_iova->pfn_lo) 147 iovad->cached_node = rb_next(&free->node); 148 } 149 150 /* Insert the iova into domain rbtree by holding writer lock */ 151 static void 152 iova_insert_rbtree(struct rb_root *root, struct iova *iova, 153 struct rb_node *start) 154 { 155 struct rb_node **new, *parent = NULL; 156 157 new = (start) ? &start : &(root->rb_node); 158 /* Figure out where to put new node */ 159 while (*new) { 160 struct iova *this = rb_entry(*new, struct iova, node); 161 162 parent = *new; 163 164 if (iova->pfn_lo < this->pfn_lo) 165 new = &((*new)->rb_left); 166 else if (iova->pfn_lo > this->pfn_lo) 167 new = &((*new)->rb_right); 168 else { 169 WARN_ON(1); /* this should not happen */ 170 return; 171 } 172 } 173 /* Add new node and rebalance tree. */ 174 rb_link_node(&iova->node, parent, new); 175 rb_insert_color(&iova->node, root); 176 } 177 178 static int __alloc_and_insert_iova_range(struct iova_domain *iovad, 179 unsigned long size, unsigned long limit_pfn, 180 struct iova *new, bool size_aligned) 181 { 182 struct rb_node *curr, *prev; 183 struct iova *curr_iova; 184 unsigned long flags; 185 unsigned long new_pfn; 186 unsigned long align_mask = ~0UL; 187 188 if (size_aligned) 189 align_mask <<= fls_long(size - 1); 190 191 /* Walk the tree backwards */ 192 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 193 curr = __get_cached_rbnode(iovad, limit_pfn); 194 curr_iova = rb_entry(curr, struct iova, node); 195 do { 196 limit_pfn = min(limit_pfn, curr_iova->pfn_lo); 197 new_pfn = (limit_pfn - size) & align_mask; 198 prev = curr; 199 curr = rb_prev(curr); 200 curr_iova = rb_entry(curr, struct iova, node); 201 } while (curr && new_pfn <= curr_iova->pfn_hi); 202 203 if (limit_pfn < size || new_pfn < iovad->start_pfn) { 204 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 205 return -ENOMEM; 206 } 207 208 /* pfn_lo will point to size aligned address if size_aligned is set */ 209 new->pfn_lo = new_pfn; 210 new->pfn_hi = new->pfn_lo + size - 1; 211 212 /* If we have 'prev', it's a valid place to start the insertion. */ 213 iova_insert_rbtree(&iovad->rbroot, new, prev); 214 __cached_rbnode_insert_update(iovad, new); 215 216 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 217 218 219 return 0; 220 } 221 222 static struct kmem_cache *iova_cache; 223 static unsigned int iova_cache_users; 224 static DEFINE_MUTEX(iova_cache_mutex); 225 226 struct iova *alloc_iova_mem(void) 227 { 228 return kmem_cache_alloc(iova_cache, GFP_ATOMIC); 229 } 230 EXPORT_SYMBOL(alloc_iova_mem); 231 232 void free_iova_mem(struct iova *iova) 233 { 234 if (iova->pfn_lo != IOVA_ANCHOR) 235 kmem_cache_free(iova_cache, iova); 236 } 237 EXPORT_SYMBOL(free_iova_mem); 238 239 int iova_cache_get(void) 240 { 241 mutex_lock(&iova_cache_mutex); 242 if (!iova_cache_users) { 243 iova_cache = kmem_cache_create( 244 "iommu_iova", sizeof(struct iova), 0, 245 SLAB_HWCACHE_ALIGN, NULL); 246 if (!iova_cache) { 247 mutex_unlock(&iova_cache_mutex); 248 printk(KERN_ERR "Couldn't create iova cache\n"); 249 return -ENOMEM; 250 } 251 } 252 253 iova_cache_users++; 254 mutex_unlock(&iova_cache_mutex); 255 256 return 0; 257 } 258 EXPORT_SYMBOL_GPL(iova_cache_get); 259 260 void iova_cache_put(void) 261 { 262 mutex_lock(&iova_cache_mutex); 263 if (WARN_ON(!iova_cache_users)) { 264 mutex_unlock(&iova_cache_mutex); 265 return; 266 } 267 iova_cache_users--; 268 if (!iova_cache_users) 269 kmem_cache_destroy(iova_cache); 270 mutex_unlock(&iova_cache_mutex); 271 } 272 EXPORT_SYMBOL_GPL(iova_cache_put); 273 274 /** 275 * alloc_iova - allocates an iova 276 * @iovad: - iova domain in question 277 * @size: - size of page frames to allocate 278 * @limit_pfn: - max limit address 279 * @size_aligned: - set if size_aligned address range is required 280 * This function allocates an iova in the range iovad->start_pfn to limit_pfn, 281 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned 282 * flag is set then the allocated address iova->pfn_lo will be naturally 283 * aligned on roundup_power_of_two(size). 284 */ 285 struct iova * 286 alloc_iova(struct iova_domain *iovad, unsigned long size, 287 unsigned long limit_pfn, 288 bool size_aligned) 289 { 290 struct iova *new_iova; 291 int ret; 292 293 new_iova = alloc_iova_mem(); 294 if (!new_iova) 295 return NULL; 296 297 ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, 298 new_iova, size_aligned); 299 300 if (ret) { 301 free_iova_mem(new_iova); 302 return NULL; 303 } 304 305 return new_iova; 306 } 307 EXPORT_SYMBOL_GPL(alloc_iova); 308 309 static struct iova * 310 private_find_iova(struct iova_domain *iovad, unsigned long pfn) 311 { 312 struct rb_node *node = iovad->rbroot.rb_node; 313 314 assert_spin_locked(&iovad->iova_rbtree_lock); 315 316 while (node) { 317 struct iova *iova = rb_entry(node, struct iova, node); 318 319 if (pfn < iova->pfn_lo) 320 node = node->rb_left; 321 else if (pfn > iova->pfn_hi) 322 node = node->rb_right; 323 else 324 return iova; /* pfn falls within iova's range */ 325 } 326 327 return NULL; 328 } 329 330 static void private_free_iova(struct iova_domain *iovad, struct iova *iova) 331 { 332 assert_spin_locked(&iovad->iova_rbtree_lock); 333 __cached_rbnode_delete_update(iovad, iova); 334 rb_erase(&iova->node, &iovad->rbroot); 335 free_iova_mem(iova); 336 } 337 338 /** 339 * find_iova - finds an iova for a given pfn 340 * @iovad: - iova domain in question. 341 * @pfn: - page frame number 342 * This function finds and returns an iova belonging to the 343 * given doamin which matches the given pfn. 344 */ 345 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) 346 { 347 unsigned long flags; 348 struct iova *iova; 349 350 /* Take the lock so that no other thread is manipulating the rbtree */ 351 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 352 iova = private_find_iova(iovad, pfn); 353 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 354 return iova; 355 } 356 EXPORT_SYMBOL_GPL(find_iova); 357 358 /** 359 * __free_iova - frees the given iova 360 * @iovad: iova domain in question. 361 * @iova: iova in question. 362 * Frees the given iova belonging to the giving domain 363 */ 364 void 365 __free_iova(struct iova_domain *iovad, struct iova *iova) 366 { 367 unsigned long flags; 368 369 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 370 private_free_iova(iovad, iova); 371 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 372 } 373 EXPORT_SYMBOL_GPL(__free_iova); 374 375 /** 376 * free_iova - finds and frees the iova for a given pfn 377 * @iovad: - iova domain in question. 378 * @pfn: - pfn that is allocated previously 379 * This functions finds an iova for a given pfn and then 380 * frees the iova from that domain. 381 */ 382 void 383 free_iova(struct iova_domain *iovad, unsigned long pfn) 384 { 385 struct iova *iova = find_iova(iovad, pfn); 386 387 if (iova) 388 __free_iova(iovad, iova); 389 390 } 391 EXPORT_SYMBOL_GPL(free_iova); 392 393 /** 394 * alloc_iova_fast - allocates an iova from rcache 395 * @iovad: - iova domain in question 396 * @size: - size of page frames to allocate 397 * @limit_pfn: - max limit address 398 * @flush_rcache: - set to flush rcache on regular allocation failure 399 * This function tries to satisfy an iova allocation from the rcache, 400 * and falls back to regular allocation on failure. If regular allocation 401 * fails too and the flush_rcache flag is set then the rcache will be flushed. 402 */ 403 unsigned long 404 alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 405 unsigned long limit_pfn, bool flush_rcache) 406 { 407 unsigned long iova_pfn; 408 struct iova *new_iova; 409 410 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); 411 if (iova_pfn) 412 return iova_pfn; 413 414 retry: 415 new_iova = alloc_iova(iovad, size, limit_pfn, true); 416 if (!new_iova) { 417 unsigned int cpu; 418 419 if (!flush_rcache) 420 return 0; 421 422 /* Try replenishing IOVAs by flushing rcache. */ 423 flush_rcache = false; 424 for_each_online_cpu(cpu) 425 free_cpu_cached_iovas(cpu, iovad); 426 goto retry; 427 } 428 429 return new_iova->pfn_lo; 430 } 431 EXPORT_SYMBOL_GPL(alloc_iova_fast); 432 433 /** 434 * free_iova_fast - free iova pfn range into rcache 435 * @iovad: - iova domain in question. 436 * @pfn: - pfn that is allocated previously 437 * @size: - # of pages in range 438 * This functions frees an iova range by trying to put it into the rcache, 439 * falling back to regular iova deallocation via free_iova() if this fails. 440 */ 441 void 442 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) 443 { 444 if (iova_rcache_insert(iovad, pfn, size)) 445 return; 446 447 free_iova(iovad, pfn); 448 } 449 EXPORT_SYMBOL_GPL(free_iova_fast); 450 451 #define fq_ring_for_each(i, fq) \ 452 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 453 454 static inline bool fq_full(struct iova_fq *fq) 455 { 456 assert_spin_locked(&fq->lock); 457 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 458 } 459 460 static inline unsigned fq_ring_add(struct iova_fq *fq) 461 { 462 unsigned idx = fq->tail; 463 464 assert_spin_locked(&fq->lock); 465 466 fq->tail = (idx + 1) % IOVA_FQ_SIZE; 467 468 return idx; 469 } 470 471 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) 472 { 473 u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); 474 unsigned idx; 475 476 assert_spin_locked(&fq->lock); 477 478 fq_ring_for_each(idx, fq) { 479 480 if (fq->entries[idx].counter >= counter) 481 break; 482 483 if (iovad->entry_dtor) 484 iovad->entry_dtor(fq->entries[idx].data); 485 486 free_iova_fast(iovad, 487 fq->entries[idx].iova_pfn, 488 fq->entries[idx].pages); 489 490 fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 491 } 492 } 493 494 static void iova_domain_flush(struct iova_domain *iovad) 495 { 496 atomic64_inc(&iovad->fq_flush_start_cnt); 497 iovad->flush_cb(iovad); 498 atomic64_inc(&iovad->fq_flush_finish_cnt); 499 } 500 501 static void fq_destroy_all_entries(struct iova_domain *iovad) 502 { 503 int cpu; 504 505 /* 506 * This code runs when the iova_domain is being detroyed, so don't 507 * bother to free iovas, just call the entry_dtor on all remaining 508 * entries. 509 */ 510 if (!iovad->entry_dtor) 511 return; 512 513 for_each_possible_cpu(cpu) { 514 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); 515 int idx; 516 517 fq_ring_for_each(idx, fq) 518 iovad->entry_dtor(fq->entries[idx].data); 519 } 520 } 521 522 static void fq_flush_timeout(struct timer_list *t) 523 { 524 struct iova_domain *iovad = from_timer(iovad, t, fq_timer); 525 int cpu; 526 527 atomic_set(&iovad->fq_timer_on, 0); 528 iova_domain_flush(iovad); 529 530 for_each_possible_cpu(cpu) { 531 unsigned long flags; 532 struct iova_fq *fq; 533 534 fq = per_cpu_ptr(iovad->fq, cpu); 535 spin_lock_irqsave(&fq->lock, flags); 536 fq_ring_free(iovad, fq); 537 spin_unlock_irqrestore(&fq->lock, flags); 538 } 539 } 540 541 void queue_iova(struct iova_domain *iovad, 542 unsigned long pfn, unsigned long pages, 543 unsigned long data) 544 { 545 struct iova_fq *fq = raw_cpu_ptr(iovad->fq); 546 unsigned long flags; 547 unsigned idx; 548 549 spin_lock_irqsave(&fq->lock, flags); 550 551 /* 552 * First remove all entries from the flush queue that have already been 553 * flushed out on another CPU. This makes the fq_full() check below less 554 * likely to be true. 555 */ 556 fq_ring_free(iovad, fq); 557 558 if (fq_full(fq)) { 559 iova_domain_flush(iovad); 560 fq_ring_free(iovad, fq); 561 } 562 563 idx = fq_ring_add(fq); 564 565 fq->entries[idx].iova_pfn = pfn; 566 fq->entries[idx].pages = pages; 567 fq->entries[idx].data = data; 568 fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); 569 570 spin_unlock_irqrestore(&fq->lock, flags); 571 572 if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) 573 mod_timer(&iovad->fq_timer, 574 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 575 } 576 EXPORT_SYMBOL_GPL(queue_iova); 577 578 /** 579 * put_iova_domain - destroys the iova doamin 580 * @iovad: - iova domain in question. 581 * All the iova's in that domain are destroyed. 582 */ 583 void put_iova_domain(struct iova_domain *iovad) 584 { 585 struct iova *iova, *tmp; 586 587 free_iova_flush_queue(iovad); 588 free_iova_rcaches(iovad); 589 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) 590 free_iova_mem(iova); 591 } 592 EXPORT_SYMBOL_GPL(put_iova_domain); 593 594 static int 595 __is_range_overlap(struct rb_node *node, 596 unsigned long pfn_lo, unsigned long pfn_hi) 597 { 598 struct iova *iova = rb_entry(node, struct iova, node); 599 600 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) 601 return 1; 602 return 0; 603 } 604 605 static inline struct iova * 606 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) 607 { 608 struct iova *iova; 609 610 iova = alloc_iova_mem(); 611 if (iova) { 612 iova->pfn_lo = pfn_lo; 613 iova->pfn_hi = pfn_hi; 614 } 615 616 return iova; 617 } 618 619 static struct iova * 620 __insert_new_range(struct iova_domain *iovad, 621 unsigned long pfn_lo, unsigned long pfn_hi) 622 { 623 struct iova *iova; 624 625 iova = alloc_and_init_iova(pfn_lo, pfn_hi); 626 if (iova) 627 iova_insert_rbtree(&iovad->rbroot, iova, NULL); 628 629 return iova; 630 } 631 632 static void 633 __adjust_overlap_range(struct iova *iova, 634 unsigned long *pfn_lo, unsigned long *pfn_hi) 635 { 636 if (*pfn_lo < iova->pfn_lo) 637 iova->pfn_lo = *pfn_lo; 638 if (*pfn_hi > iova->pfn_hi) 639 *pfn_lo = iova->pfn_hi + 1; 640 } 641 642 /** 643 * reserve_iova - reserves an iova in the given range 644 * @iovad: - iova domain pointer 645 * @pfn_lo: - lower page frame address 646 * @pfn_hi:- higher pfn adderss 647 * This function allocates reserves the address range from pfn_lo to pfn_hi so 648 * that this address is not dished out as part of alloc_iova. 649 */ 650 struct iova * 651 reserve_iova(struct iova_domain *iovad, 652 unsigned long pfn_lo, unsigned long pfn_hi) 653 { 654 struct rb_node *node; 655 unsigned long flags; 656 struct iova *iova; 657 unsigned int overlap = 0; 658 659 /* Don't allow nonsensical pfns */ 660 if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) 661 return NULL; 662 663 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 664 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { 665 if (__is_range_overlap(node, pfn_lo, pfn_hi)) { 666 iova = rb_entry(node, struct iova, node); 667 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); 668 if ((pfn_lo >= iova->pfn_lo) && 669 (pfn_hi <= iova->pfn_hi)) 670 goto finish; 671 overlap = 1; 672 673 } else if (overlap) 674 break; 675 } 676 677 /* We are here either because this is the first reserver node 678 * or need to insert remaining non overlap addr range 679 */ 680 iova = __insert_new_range(iovad, pfn_lo, pfn_hi); 681 finish: 682 683 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 684 return iova; 685 } 686 EXPORT_SYMBOL_GPL(reserve_iova); 687 688 /** 689 * copy_reserved_iova - copies the reserved between domains 690 * @from: - source doamin from where to copy 691 * @to: - destination domin where to copy 692 * This function copies reserved iova's from one doamin to 693 * other. 694 */ 695 void 696 copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) 697 { 698 unsigned long flags; 699 struct rb_node *node; 700 701 spin_lock_irqsave(&from->iova_rbtree_lock, flags); 702 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { 703 struct iova *iova = rb_entry(node, struct iova, node); 704 struct iova *new_iova; 705 706 if (iova->pfn_lo == IOVA_ANCHOR) 707 continue; 708 709 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 710 if (!new_iova) 711 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", 712 iova->pfn_lo, iova->pfn_lo); 713 } 714 spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); 715 } 716 EXPORT_SYMBOL_GPL(copy_reserved_iova); 717 718 struct iova * 719 split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, 720 unsigned long pfn_lo, unsigned long pfn_hi) 721 { 722 unsigned long flags; 723 struct iova *prev = NULL, *next = NULL; 724 725 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 726 if (iova->pfn_lo < pfn_lo) { 727 prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); 728 if (prev == NULL) 729 goto error; 730 } 731 if (iova->pfn_hi > pfn_hi) { 732 next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); 733 if (next == NULL) 734 goto error; 735 } 736 737 __cached_rbnode_delete_update(iovad, iova); 738 rb_erase(&iova->node, &iovad->rbroot); 739 740 if (prev) { 741 iova_insert_rbtree(&iovad->rbroot, prev, NULL); 742 iova->pfn_lo = pfn_lo; 743 } 744 if (next) { 745 iova_insert_rbtree(&iovad->rbroot, next, NULL); 746 iova->pfn_hi = pfn_hi; 747 } 748 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 749 750 return iova; 751 752 error: 753 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 754 if (prev) 755 free_iova_mem(prev); 756 return NULL; 757 } 758 759 /* 760 * Magazine caches for IOVA ranges. For an introduction to magazines, 761 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab 762 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. 763 * For simplicity, we use a static magazine size and don't implement the 764 * dynamic size tuning described in the paper. 765 */ 766 767 #define IOVA_MAG_SIZE 128 768 769 struct iova_magazine { 770 unsigned long size; 771 unsigned long pfns[IOVA_MAG_SIZE]; 772 }; 773 774 struct iova_cpu_rcache { 775 spinlock_t lock; 776 struct iova_magazine *loaded; 777 struct iova_magazine *prev; 778 }; 779 780 static struct iova_magazine *iova_magazine_alloc(gfp_t flags) 781 { 782 return kzalloc(sizeof(struct iova_magazine), flags); 783 } 784 785 static void iova_magazine_free(struct iova_magazine *mag) 786 { 787 kfree(mag); 788 } 789 790 static void 791 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) 792 { 793 unsigned long flags; 794 int i; 795 796 if (!mag) 797 return; 798 799 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 800 801 for (i = 0 ; i < mag->size; ++i) { 802 struct iova *iova = private_find_iova(iovad, mag->pfns[i]); 803 804 BUG_ON(!iova); 805 private_free_iova(iovad, iova); 806 } 807 808 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 809 810 mag->size = 0; 811 } 812 813 static bool iova_magazine_full(struct iova_magazine *mag) 814 { 815 return (mag && mag->size == IOVA_MAG_SIZE); 816 } 817 818 static bool iova_magazine_empty(struct iova_magazine *mag) 819 { 820 return (!mag || mag->size == 0); 821 } 822 823 static unsigned long iova_magazine_pop(struct iova_magazine *mag, 824 unsigned long limit_pfn) 825 { 826 int i; 827 unsigned long pfn; 828 829 BUG_ON(iova_magazine_empty(mag)); 830 831 /* Only fall back to the rbtree if we have no suitable pfns at all */ 832 for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) 833 if (i == 0) 834 return 0; 835 836 /* Swap it to pop it */ 837 pfn = mag->pfns[i]; 838 mag->pfns[i] = mag->pfns[--mag->size]; 839 840 return pfn; 841 } 842 843 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) 844 { 845 BUG_ON(iova_magazine_full(mag)); 846 847 mag->pfns[mag->size++] = pfn; 848 } 849 850 static void init_iova_rcaches(struct iova_domain *iovad) 851 { 852 struct iova_cpu_rcache *cpu_rcache; 853 struct iova_rcache *rcache; 854 unsigned int cpu; 855 int i; 856 857 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 858 rcache = &iovad->rcaches[i]; 859 spin_lock_init(&rcache->lock); 860 rcache->depot_size = 0; 861 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); 862 if (WARN_ON(!rcache->cpu_rcaches)) 863 continue; 864 for_each_possible_cpu(cpu) { 865 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 866 spin_lock_init(&cpu_rcache->lock); 867 cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); 868 cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); 869 } 870 } 871 } 872 873 /* 874 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and 875 * return true on success. Can fail if rcache is full and we can't free 876 * space, and free_iova() (our only caller) will then return the IOVA 877 * range to the rbtree instead. 878 */ 879 static bool __iova_rcache_insert(struct iova_domain *iovad, 880 struct iova_rcache *rcache, 881 unsigned long iova_pfn) 882 { 883 struct iova_magazine *mag_to_free = NULL; 884 struct iova_cpu_rcache *cpu_rcache; 885 bool can_insert = false; 886 unsigned long flags; 887 888 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 889 spin_lock_irqsave(&cpu_rcache->lock, flags); 890 891 if (!iova_magazine_full(cpu_rcache->loaded)) { 892 can_insert = true; 893 } else if (!iova_magazine_full(cpu_rcache->prev)) { 894 swap(cpu_rcache->prev, cpu_rcache->loaded); 895 can_insert = true; 896 } else { 897 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); 898 899 if (new_mag) { 900 spin_lock(&rcache->lock); 901 if (rcache->depot_size < MAX_GLOBAL_MAGS) { 902 rcache->depot[rcache->depot_size++] = 903 cpu_rcache->loaded; 904 } else { 905 mag_to_free = cpu_rcache->loaded; 906 } 907 spin_unlock(&rcache->lock); 908 909 cpu_rcache->loaded = new_mag; 910 can_insert = true; 911 } 912 } 913 914 if (can_insert) 915 iova_magazine_push(cpu_rcache->loaded, iova_pfn); 916 917 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 918 919 if (mag_to_free) { 920 iova_magazine_free_pfns(mag_to_free, iovad); 921 iova_magazine_free(mag_to_free); 922 } 923 924 return can_insert; 925 } 926 927 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, 928 unsigned long size) 929 { 930 unsigned int log_size = order_base_2(size); 931 932 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 933 return false; 934 935 return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn); 936 } 937 938 /* 939 * Caller wants to allocate a new IOVA range from 'rcache'. If we can 940 * satisfy the request, return a matching non-NULL range and remove 941 * it from the 'rcache'. 942 */ 943 static unsigned long __iova_rcache_get(struct iova_rcache *rcache, 944 unsigned long limit_pfn) 945 { 946 struct iova_cpu_rcache *cpu_rcache; 947 unsigned long iova_pfn = 0; 948 bool has_pfn = false; 949 unsigned long flags; 950 951 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 952 spin_lock_irqsave(&cpu_rcache->lock, flags); 953 954 if (!iova_magazine_empty(cpu_rcache->loaded)) { 955 has_pfn = true; 956 } else if (!iova_magazine_empty(cpu_rcache->prev)) { 957 swap(cpu_rcache->prev, cpu_rcache->loaded); 958 has_pfn = true; 959 } else { 960 spin_lock(&rcache->lock); 961 if (rcache->depot_size > 0) { 962 iova_magazine_free(cpu_rcache->loaded); 963 cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; 964 has_pfn = true; 965 } 966 spin_unlock(&rcache->lock); 967 } 968 969 if (has_pfn) 970 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); 971 972 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 973 974 return iova_pfn; 975 } 976 977 /* 978 * Try to satisfy IOVA allocation range from rcache. Fail if requested 979 * size is too big or the DMA limit we are given isn't satisfied by the 980 * top element in the magazine. 981 */ 982 static unsigned long iova_rcache_get(struct iova_domain *iovad, 983 unsigned long size, 984 unsigned long limit_pfn) 985 { 986 unsigned int log_size = order_base_2(size); 987 988 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 989 return 0; 990 991 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); 992 } 993 994 /* 995 * free rcache data structures. 996 */ 997 static void free_iova_rcaches(struct iova_domain *iovad) 998 { 999 struct iova_rcache *rcache; 1000 struct iova_cpu_rcache *cpu_rcache; 1001 unsigned int cpu; 1002 int i, j; 1003 1004 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1005 rcache = &iovad->rcaches[i]; 1006 for_each_possible_cpu(cpu) { 1007 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1008 iova_magazine_free(cpu_rcache->loaded); 1009 iova_magazine_free(cpu_rcache->prev); 1010 } 1011 free_percpu(rcache->cpu_rcaches); 1012 for (j = 0; j < rcache->depot_size; ++j) 1013 iova_magazine_free(rcache->depot[j]); 1014 } 1015 } 1016 1017 /* 1018 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) 1019 */ 1020 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) 1021 { 1022 struct iova_cpu_rcache *cpu_rcache; 1023 struct iova_rcache *rcache; 1024 unsigned long flags; 1025 int i; 1026 1027 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1028 rcache = &iovad->rcaches[i]; 1029 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1030 spin_lock_irqsave(&cpu_rcache->lock, flags); 1031 iova_magazine_free_pfns(cpu_rcache->loaded, iovad); 1032 iova_magazine_free_pfns(cpu_rcache->prev, iovad); 1033 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 1034 } 1035 } 1036 1037 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>"); 1038 MODULE_LICENSE("GPL"); 1039