1 /* 2 * Copyright © 2006-2009, Intel Corporation. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 * 13 * You should have received a copy of the GNU General Public License along with 14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple 15 * Place - Suite 330, Boston, MA 02111-1307 USA. 16 * 17 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 18 */ 19 20 #include <linux/iova.h> 21 #include <linux/module.h> 22 #include <linux/slab.h> 23 #include <linux/smp.h> 24 #include <linux/bitops.h> 25 #include <linux/cpu.h> 26 27 /* The anchor node sits above the top of the usable address space */ 28 #define IOVA_ANCHOR ~0UL 29 30 static bool iova_rcache_insert(struct iova_domain *iovad, 31 unsigned long pfn, 32 unsigned long size); 33 static unsigned long iova_rcache_get(struct iova_domain *iovad, 34 unsigned long size, 35 unsigned long limit_pfn); 36 static void init_iova_rcaches(struct iova_domain *iovad); 37 static void free_iova_rcaches(struct iova_domain *iovad); 38 static void fq_destroy_all_entries(struct iova_domain *iovad); 39 static void fq_flush_timeout(struct timer_list *t); 40 41 void 42 init_iova_domain(struct iova_domain *iovad, unsigned long granule, 43 unsigned long start_pfn) 44 { 45 /* 46 * IOVA granularity will normally be equal to the smallest 47 * supported IOMMU page size; both *must* be capable of 48 * representing individual CPU pages exactly. 49 */ 50 BUG_ON((granule > PAGE_SIZE) || !is_power_of_2(granule)); 51 52 spin_lock_init(&iovad->iova_rbtree_lock); 53 iovad->rbroot = RB_ROOT; 54 iovad->cached_node = &iovad->anchor.node; 55 iovad->cached32_node = &iovad->anchor.node; 56 iovad->granule = granule; 57 iovad->start_pfn = start_pfn; 58 iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); 59 iovad->max32_alloc_size = iovad->dma_32bit_pfn; 60 iovad->flush_cb = NULL; 61 iovad->fq = NULL; 62 iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; 63 rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); 64 rb_insert_color(&iovad->anchor.node, &iovad->rbroot); 65 init_iova_rcaches(iovad); 66 } 67 EXPORT_SYMBOL_GPL(init_iova_domain); 68 69 static void free_iova_flush_queue(struct iova_domain *iovad) 70 { 71 if (!iovad->fq) 72 return; 73 74 if (timer_pending(&iovad->fq_timer)) 75 del_timer(&iovad->fq_timer); 76 77 fq_destroy_all_entries(iovad); 78 79 free_percpu(iovad->fq); 80 81 iovad->fq = NULL; 82 iovad->flush_cb = NULL; 83 iovad->entry_dtor = NULL; 84 } 85 86 int init_iova_flush_queue(struct iova_domain *iovad, 87 iova_flush_cb flush_cb, iova_entry_dtor entry_dtor) 88 { 89 int cpu; 90 91 atomic64_set(&iovad->fq_flush_start_cnt, 0); 92 atomic64_set(&iovad->fq_flush_finish_cnt, 0); 93 94 iovad->fq = alloc_percpu(struct iova_fq); 95 if (!iovad->fq) 96 return -ENOMEM; 97 98 iovad->flush_cb = flush_cb; 99 iovad->entry_dtor = entry_dtor; 100 101 for_each_possible_cpu(cpu) { 102 struct iova_fq *fq; 103 104 fq = per_cpu_ptr(iovad->fq, cpu); 105 fq->head = 0; 106 fq->tail = 0; 107 108 spin_lock_init(&fq->lock); 109 } 110 111 timer_setup(&iovad->fq_timer, fq_flush_timeout, 0); 112 atomic_set(&iovad->fq_timer_on, 0); 113 114 return 0; 115 } 116 EXPORT_SYMBOL_GPL(init_iova_flush_queue); 117 118 static struct rb_node * 119 __get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) 120 { 121 if (limit_pfn <= iovad->dma_32bit_pfn) 122 return iovad->cached32_node; 123 124 return iovad->cached_node; 125 } 126 127 static void 128 __cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) 129 { 130 if (new->pfn_hi < iovad->dma_32bit_pfn) 131 iovad->cached32_node = &new->node; 132 else 133 iovad->cached_node = &new->node; 134 } 135 136 static void 137 __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) 138 { 139 struct iova *cached_iova; 140 141 cached_iova = rb_entry(iovad->cached32_node, struct iova, node); 142 if (free->pfn_hi < iovad->dma_32bit_pfn && 143 free->pfn_lo >= cached_iova->pfn_lo) { 144 iovad->cached32_node = rb_next(&free->node); 145 iovad->max32_alloc_size = iovad->dma_32bit_pfn; 146 } 147 148 cached_iova = rb_entry(iovad->cached_node, struct iova, node); 149 if (free->pfn_lo >= cached_iova->pfn_lo) 150 iovad->cached_node = rb_next(&free->node); 151 } 152 153 /* Insert the iova into domain rbtree by holding writer lock */ 154 static void 155 iova_insert_rbtree(struct rb_root *root, struct iova *iova, 156 struct rb_node *start) 157 { 158 struct rb_node **new, *parent = NULL; 159 160 new = (start) ? &start : &(root->rb_node); 161 /* Figure out where to put new node */ 162 while (*new) { 163 struct iova *this = rb_entry(*new, struct iova, node); 164 165 parent = *new; 166 167 if (iova->pfn_lo < this->pfn_lo) 168 new = &((*new)->rb_left); 169 else if (iova->pfn_lo > this->pfn_lo) 170 new = &((*new)->rb_right); 171 else { 172 WARN_ON(1); /* this should not happen */ 173 return; 174 } 175 } 176 /* Add new node and rebalance tree. */ 177 rb_link_node(&iova->node, parent, new); 178 rb_insert_color(&iova->node, root); 179 } 180 181 static int __alloc_and_insert_iova_range(struct iova_domain *iovad, 182 unsigned long size, unsigned long limit_pfn, 183 struct iova *new, bool size_aligned) 184 { 185 struct rb_node *curr, *prev; 186 struct iova *curr_iova; 187 unsigned long flags; 188 unsigned long new_pfn; 189 unsigned long align_mask = ~0UL; 190 191 if (size_aligned) 192 align_mask <<= fls_long(size - 1); 193 194 /* Walk the tree backwards */ 195 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 196 if (limit_pfn <= iovad->dma_32bit_pfn && 197 size >= iovad->max32_alloc_size) 198 goto iova32_full; 199 200 curr = __get_cached_rbnode(iovad, limit_pfn); 201 curr_iova = rb_entry(curr, struct iova, node); 202 do { 203 limit_pfn = min(limit_pfn, curr_iova->pfn_lo); 204 new_pfn = (limit_pfn - size) & align_mask; 205 prev = curr; 206 curr = rb_prev(curr); 207 curr_iova = rb_entry(curr, struct iova, node); 208 } while (curr && new_pfn <= curr_iova->pfn_hi); 209 210 if (limit_pfn < size || new_pfn < iovad->start_pfn) { 211 iovad->max32_alloc_size = size; 212 goto iova32_full; 213 } 214 215 /* pfn_lo will point to size aligned address if size_aligned is set */ 216 new->pfn_lo = new_pfn; 217 new->pfn_hi = new->pfn_lo + size - 1; 218 219 /* If we have 'prev', it's a valid place to start the insertion. */ 220 iova_insert_rbtree(&iovad->rbroot, new, prev); 221 __cached_rbnode_insert_update(iovad, new); 222 223 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 224 return 0; 225 226 iova32_full: 227 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 228 return -ENOMEM; 229 } 230 231 static struct kmem_cache *iova_cache; 232 static unsigned int iova_cache_users; 233 static DEFINE_MUTEX(iova_cache_mutex); 234 235 struct iova *alloc_iova_mem(void) 236 { 237 return kmem_cache_alloc(iova_cache, GFP_ATOMIC); 238 } 239 EXPORT_SYMBOL(alloc_iova_mem); 240 241 void free_iova_mem(struct iova *iova) 242 { 243 if (iova->pfn_lo != IOVA_ANCHOR) 244 kmem_cache_free(iova_cache, iova); 245 } 246 EXPORT_SYMBOL(free_iova_mem); 247 248 int iova_cache_get(void) 249 { 250 mutex_lock(&iova_cache_mutex); 251 if (!iova_cache_users) { 252 iova_cache = kmem_cache_create( 253 "iommu_iova", sizeof(struct iova), 0, 254 SLAB_HWCACHE_ALIGN, NULL); 255 if (!iova_cache) { 256 mutex_unlock(&iova_cache_mutex); 257 printk(KERN_ERR "Couldn't create iova cache\n"); 258 return -ENOMEM; 259 } 260 } 261 262 iova_cache_users++; 263 mutex_unlock(&iova_cache_mutex); 264 265 return 0; 266 } 267 EXPORT_SYMBOL_GPL(iova_cache_get); 268 269 void iova_cache_put(void) 270 { 271 mutex_lock(&iova_cache_mutex); 272 if (WARN_ON(!iova_cache_users)) { 273 mutex_unlock(&iova_cache_mutex); 274 return; 275 } 276 iova_cache_users--; 277 if (!iova_cache_users) 278 kmem_cache_destroy(iova_cache); 279 mutex_unlock(&iova_cache_mutex); 280 } 281 EXPORT_SYMBOL_GPL(iova_cache_put); 282 283 /** 284 * alloc_iova - allocates an iova 285 * @iovad: - iova domain in question 286 * @size: - size of page frames to allocate 287 * @limit_pfn: - max limit address 288 * @size_aligned: - set if size_aligned address range is required 289 * This function allocates an iova in the range iovad->start_pfn to limit_pfn, 290 * searching top-down from limit_pfn to iovad->start_pfn. If the size_aligned 291 * flag is set then the allocated address iova->pfn_lo will be naturally 292 * aligned on roundup_power_of_two(size). 293 */ 294 struct iova * 295 alloc_iova(struct iova_domain *iovad, unsigned long size, 296 unsigned long limit_pfn, 297 bool size_aligned) 298 { 299 struct iova *new_iova; 300 int ret; 301 302 new_iova = alloc_iova_mem(); 303 if (!new_iova) 304 return NULL; 305 306 ret = __alloc_and_insert_iova_range(iovad, size, limit_pfn + 1, 307 new_iova, size_aligned); 308 309 if (ret) { 310 free_iova_mem(new_iova); 311 return NULL; 312 } 313 314 return new_iova; 315 } 316 EXPORT_SYMBOL_GPL(alloc_iova); 317 318 static struct iova * 319 private_find_iova(struct iova_domain *iovad, unsigned long pfn) 320 { 321 struct rb_node *node = iovad->rbroot.rb_node; 322 323 assert_spin_locked(&iovad->iova_rbtree_lock); 324 325 while (node) { 326 struct iova *iova = rb_entry(node, struct iova, node); 327 328 if (pfn < iova->pfn_lo) 329 node = node->rb_left; 330 else if (pfn > iova->pfn_hi) 331 node = node->rb_right; 332 else 333 return iova; /* pfn falls within iova's range */ 334 } 335 336 return NULL; 337 } 338 339 static void private_free_iova(struct iova_domain *iovad, struct iova *iova) 340 { 341 assert_spin_locked(&iovad->iova_rbtree_lock); 342 __cached_rbnode_delete_update(iovad, iova); 343 rb_erase(&iova->node, &iovad->rbroot); 344 free_iova_mem(iova); 345 } 346 347 /** 348 * find_iova - finds an iova for a given pfn 349 * @iovad: - iova domain in question. 350 * @pfn: - page frame number 351 * This function finds and returns an iova belonging to the 352 * given doamin which matches the given pfn. 353 */ 354 struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) 355 { 356 unsigned long flags; 357 struct iova *iova; 358 359 /* Take the lock so that no other thread is manipulating the rbtree */ 360 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 361 iova = private_find_iova(iovad, pfn); 362 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 363 return iova; 364 } 365 EXPORT_SYMBOL_GPL(find_iova); 366 367 /** 368 * __free_iova - frees the given iova 369 * @iovad: iova domain in question. 370 * @iova: iova in question. 371 * Frees the given iova belonging to the giving domain 372 */ 373 void 374 __free_iova(struct iova_domain *iovad, struct iova *iova) 375 { 376 unsigned long flags; 377 378 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 379 private_free_iova(iovad, iova); 380 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 381 } 382 EXPORT_SYMBOL_GPL(__free_iova); 383 384 /** 385 * free_iova - finds and frees the iova for a given pfn 386 * @iovad: - iova domain in question. 387 * @pfn: - pfn that is allocated previously 388 * This functions finds an iova for a given pfn and then 389 * frees the iova from that domain. 390 */ 391 void 392 free_iova(struct iova_domain *iovad, unsigned long pfn) 393 { 394 struct iova *iova = find_iova(iovad, pfn); 395 396 if (iova) 397 __free_iova(iovad, iova); 398 399 } 400 EXPORT_SYMBOL_GPL(free_iova); 401 402 /** 403 * alloc_iova_fast - allocates an iova from rcache 404 * @iovad: - iova domain in question 405 * @size: - size of page frames to allocate 406 * @limit_pfn: - max limit address 407 * @flush_rcache: - set to flush rcache on regular allocation failure 408 * This function tries to satisfy an iova allocation from the rcache, 409 * and falls back to regular allocation on failure. If regular allocation 410 * fails too and the flush_rcache flag is set then the rcache will be flushed. 411 */ 412 unsigned long 413 alloc_iova_fast(struct iova_domain *iovad, unsigned long size, 414 unsigned long limit_pfn, bool flush_rcache) 415 { 416 unsigned long iova_pfn; 417 struct iova *new_iova; 418 419 iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); 420 if (iova_pfn) 421 return iova_pfn; 422 423 retry: 424 new_iova = alloc_iova(iovad, size, limit_pfn, true); 425 if (!new_iova) { 426 unsigned int cpu; 427 428 if (!flush_rcache) 429 return 0; 430 431 /* Try replenishing IOVAs by flushing rcache. */ 432 flush_rcache = false; 433 for_each_online_cpu(cpu) 434 free_cpu_cached_iovas(cpu, iovad); 435 goto retry; 436 } 437 438 return new_iova->pfn_lo; 439 } 440 EXPORT_SYMBOL_GPL(alloc_iova_fast); 441 442 /** 443 * free_iova_fast - free iova pfn range into rcache 444 * @iovad: - iova domain in question. 445 * @pfn: - pfn that is allocated previously 446 * @size: - # of pages in range 447 * This functions frees an iova range by trying to put it into the rcache, 448 * falling back to regular iova deallocation via free_iova() if this fails. 449 */ 450 void 451 free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) 452 { 453 if (iova_rcache_insert(iovad, pfn, size)) 454 return; 455 456 free_iova(iovad, pfn); 457 } 458 EXPORT_SYMBOL_GPL(free_iova_fast); 459 460 #define fq_ring_for_each(i, fq) \ 461 for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) 462 463 static inline bool fq_full(struct iova_fq *fq) 464 { 465 assert_spin_locked(&fq->lock); 466 return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); 467 } 468 469 static inline unsigned fq_ring_add(struct iova_fq *fq) 470 { 471 unsigned idx = fq->tail; 472 473 assert_spin_locked(&fq->lock); 474 475 fq->tail = (idx + 1) % IOVA_FQ_SIZE; 476 477 return idx; 478 } 479 480 static void fq_ring_free(struct iova_domain *iovad, struct iova_fq *fq) 481 { 482 u64 counter = atomic64_read(&iovad->fq_flush_finish_cnt); 483 unsigned idx; 484 485 assert_spin_locked(&fq->lock); 486 487 fq_ring_for_each(idx, fq) { 488 489 if (fq->entries[idx].counter >= counter) 490 break; 491 492 if (iovad->entry_dtor) 493 iovad->entry_dtor(fq->entries[idx].data); 494 495 free_iova_fast(iovad, 496 fq->entries[idx].iova_pfn, 497 fq->entries[idx].pages); 498 499 fq->head = (fq->head + 1) % IOVA_FQ_SIZE; 500 } 501 } 502 503 static void iova_domain_flush(struct iova_domain *iovad) 504 { 505 atomic64_inc(&iovad->fq_flush_start_cnt); 506 iovad->flush_cb(iovad); 507 atomic64_inc(&iovad->fq_flush_finish_cnt); 508 } 509 510 static void fq_destroy_all_entries(struct iova_domain *iovad) 511 { 512 int cpu; 513 514 /* 515 * This code runs when the iova_domain is being detroyed, so don't 516 * bother to free iovas, just call the entry_dtor on all remaining 517 * entries. 518 */ 519 if (!iovad->entry_dtor) 520 return; 521 522 for_each_possible_cpu(cpu) { 523 struct iova_fq *fq = per_cpu_ptr(iovad->fq, cpu); 524 int idx; 525 526 fq_ring_for_each(idx, fq) 527 iovad->entry_dtor(fq->entries[idx].data); 528 } 529 } 530 531 static void fq_flush_timeout(struct timer_list *t) 532 { 533 struct iova_domain *iovad = from_timer(iovad, t, fq_timer); 534 int cpu; 535 536 atomic_set(&iovad->fq_timer_on, 0); 537 iova_domain_flush(iovad); 538 539 for_each_possible_cpu(cpu) { 540 unsigned long flags; 541 struct iova_fq *fq; 542 543 fq = per_cpu_ptr(iovad->fq, cpu); 544 spin_lock_irqsave(&fq->lock, flags); 545 fq_ring_free(iovad, fq); 546 spin_unlock_irqrestore(&fq->lock, flags); 547 } 548 } 549 550 void queue_iova(struct iova_domain *iovad, 551 unsigned long pfn, unsigned long pages, 552 unsigned long data) 553 { 554 struct iova_fq *fq = raw_cpu_ptr(iovad->fq); 555 unsigned long flags; 556 unsigned idx; 557 558 spin_lock_irqsave(&fq->lock, flags); 559 560 /* 561 * First remove all entries from the flush queue that have already been 562 * flushed out on another CPU. This makes the fq_full() check below less 563 * likely to be true. 564 */ 565 fq_ring_free(iovad, fq); 566 567 if (fq_full(fq)) { 568 iova_domain_flush(iovad); 569 fq_ring_free(iovad, fq); 570 } 571 572 idx = fq_ring_add(fq); 573 574 fq->entries[idx].iova_pfn = pfn; 575 fq->entries[idx].pages = pages; 576 fq->entries[idx].data = data; 577 fq->entries[idx].counter = atomic64_read(&iovad->fq_flush_start_cnt); 578 579 spin_unlock_irqrestore(&fq->lock, flags); 580 581 if (atomic_cmpxchg(&iovad->fq_timer_on, 0, 1) == 0) 582 mod_timer(&iovad->fq_timer, 583 jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); 584 } 585 EXPORT_SYMBOL_GPL(queue_iova); 586 587 /** 588 * put_iova_domain - destroys the iova doamin 589 * @iovad: - iova domain in question. 590 * All the iova's in that domain are destroyed. 591 */ 592 void put_iova_domain(struct iova_domain *iovad) 593 { 594 struct iova *iova, *tmp; 595 596 free_iova_flush_queue(iovad); 597 free_iova_rcaches(iovad); 598 rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) 599 free_iova_mem(iova); 600 } 601 EXPORT_SYMBOL_GPL(put_iova_domain); 602 603 static int 604 __is_range_overlap(struct rb_node *node, 605 unsigned long pfn_lo, unsigned long pfn_hi) 606 { 607 struct iova *iova = rb_entry(node, struct iova, node); 608 609 if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) 610 return 1; 611 return 0; 612 } 613 614 static inline struct iova * 615 alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) 616 { 617 struct iova *iova; 618 619 iova = alloc_iova_mem(); 620 if (iova) { 621 iova->pfn_lo = pfn_lo; 622 iova->pfn_hi = pfn_hi; 623 } 624 625 return iova; 626 } 627 628 static struct iova * 629 __insert_new_range(struct iova_domain *iovad, 630 unsigned long pfn_lo, unsigned long pfn_hi) 631 { 632 struct iova *iova; 633 634 iova = alloc_and_init_iova(pfn_lo, pfn_hi); 635 if (iova) 636 iova_insert_rbtree(&iovad->rbroot, iova, NULL); 637 638 return iova; 639 } 640 641 static void 642 __adjust_overlap_range(struct iova *iova, 643 unsigned long *pfn_lo, unsigned long *pfn_hi) 644 { 645 if (*pfn_lo < iova->pfn_lo) 646 iova->pfn_lo = *pfn_lo; 647 if (*pfn_hi > iova->pfn_hi) 648 *pfn_lo = iova->pfn_hi + 1; 649 } 650 651 /** 652 * reserve_iova - reserves an iova in the given range 653 * @iovad: - iova domain pointer 654 * @pfn_lo: - lower page frame address 655 * @pfn_hi:- higher pfn adderss 656 * This function allocates reserves the address range from pfn_lo to pfn_hi so 657 * that this address is not dished out as part of alloc_iova. 658 */ 659 struct iova * 660 reserve_iova(struct iova_domain *iovad, 661 unsigned long pfn_lo, unsigned long pfn_hi) 662 { 663 struct rb_node *node; 664 unsigned long flags; 665 struct iova *iova; 666 unsigned int overlap = 0; 667 668 /* Don't allow nonsensical pfns */ 669 if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) 670 return NULL; 671 672 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 673 for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { 674 if (__is_range_overlap(node, pfn_lo, pfn_hi)) { 675 iova = rb_entry(node, struct iova, node); 676 __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); 677 if ((pfn_lo >= iova->pfn_lo) && 678 (pfn_hi <= iova->pfn_hi)) 679 goto finish; 680 overlap = 1; 681 682 } else if (overlap) 683 break; 684 } 685 686 /* We are here either because this is the first reserver node 687 * or need to insert remaining non overlap addr range 688 */ 689 iova = __insert_new_range(iovad, pfn_lo, pfn_hi); 690 finish: 691 692 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 693 return iova; 694 } 695 EXPORT_SYMBOL_GPL(reserve_iova); 696 697 /** 698 * copy_reserved_iova - copies the reserved between domains 699 * @from: - source doamin from where to copy 700 * @to: - destination domin where to copy 701 * This function copies reserved iova's from one doamin to 702 * other. 703 */ 704 void 705 copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) 706 { 707 unsigned long flags; 708 struct rb_node *node; 709 710 spin_lock_irqsave(&from->iova_rbtree_lock, flags); 711 for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { 712 struct iova *iova = rb_entry(node, struct iova, node); 713 struct iova *new_iova; 714 715 if (iova->pfn_lo == IOVA_ANCHOR) 716 continue; 717 718 new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); 719 if (!new_iova) 720 printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", 721 iova->pfn_lo, iova->pfn_lo); 722 } 723 spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); 724 } 725 EXPORT_SYMBOL_GPL(copy_reserved_iova); 726 727 struct iova * 728 split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, 729 unsigned long pfn_lo, unsigned long pfn_hi) 730 { 731 unsigned long flags; 732 struct iova *prev = NULL, *next = NULL; 733 734 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 735 if (iova->pfn_lo < pfn_lo) { 736 prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); 737 if (prev == NULL) 738 goto error; 739 } 740 if (iova->pfn_hi > pfn_hi) { 741 next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); 742 if (next == NULL) 743 goto error; 744 } 745 746 __cached_rbnode_delete_update(iovad, iova); 747 rb_erase(&iova->node, &iovad->rbroot); 748 749 if (prev) { 750 iova_insert_rbtree(&iovad->rbroot, prev, NULL); 751 iova->pfn_lo = pfn_lo; 752 } 753 if (next) { 754 iova_insert_rbtree(&iovad->rbroot, next, NULL); 755 iova->pfn_hi = pfn_hi; 756 } 757 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 758 759 return iova; 760 761 error: 762 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 763 if (prev) 764 free_iova_mem(prev); 765 return NULL; 766 } 767 768 /* 769 * Magazine caches for IOVA ranges. For an introduction to magazines, 770 * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab 771 * Allocator to Many CPUs and Arbitrary Resources" by Bonwick and Adams. 772 * For simplicity, we use a static magazine size and don't implement the 773 * dynamic size tuning described in the paper. 774 */ 775 776 #define IOVA_MAG_SIZE 128 777 778 struct iova_magazine { 779 unsigned long size; 780 unsigned long pfns[IOVA_MAG_SIZE]; 781 }; 782 783 struct iova_cpu_rcache { 784 spinlock_t lock; 785 struct iova_magazine *loaded; 786 struct iova_magazine *prev; 787 }; 788 789 static struct iova_magazine *iova_magazine_alloc(gfp_t flags) 790 { 791 return kzalloc(sizeof(struct iova_magazine), flags); 792 } 793 794 static void iova_magazine_free(struct iova_magazine *mag) 795 { 796 kfree(mag); 797 } 798 799 static void 800 iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) 801 { 802 unsigned long flags; 803 int i; 804 805 if (!mag) 806 return; 807 808 spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); 809 810 for (i = 0 ; i < mag->size; ++i) { 811 struct iova *iova = private_find_iova(iovad, mag->pfns[i]); 812 813 BUG_ON(!iova); 814 private_free_iova(iovad, iova); 815 } 816 817 spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); 818 819 mag->size = 0; 820 } 821 822 static bool iova_magazine_full(struct iova_magazine *mag) 823 { 824 return (mag && mag->size == IOVA_MAG_SIZE); 825 } 826 827 static bool iova_magazine_empty(struct iova_magazine *mag) 828 { 829 return (!mag || mag->size == 0); 830 } 831 832 static unsigned long iova_magazine_pop(struct iova_magazine *mag, 833 unsigned long limit_pfn) 834 { 835 int i; 836 unsigned long pfn; 837 838 BUG_ON(iova_magazine_empty(mag)); 839 840 /* Only fall back to the rbtree if we have no suitable pfns at all */ 841 for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) 842 if (i == 0) 843 return 0; 844 845 /* Swap it to pop it */ 846 pfn = mag->pfns[i]; 847 mag->pfns[i] = mag->pfns[--mag->size]; 848 849 return pfn; 850 } 851 852 static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) 853 { 854 BUG_ON(iova_magazine_full(mag)); 855 856 mag->pfns[mag->size++] = pfn; 857 } 858 859 static void init_iova_rcaches(struct iova_domain *iovad) 860 { 861 struct iova_cpu_rcache *cpu_rcache; 862 struct iova_rcache *rcache; 863 unsigned int cpu; 864 int i; 865 866 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 867 rcache = &iovad->rcaches[i]; 868 spin_lock_init(&rcache->lock); 869 rcache->depot_size = 0; 870 rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); 871 if (WARN_ON(!rcache->cpu_rcaches)) 872 continue; 873 for_each_possible_cpu(cpu) { 874 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 875 spin_lock_init(&cpu_rcache->lock); 876 cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); 877 cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); 878 } 879 } 880 } 881 882 /* 883 * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and 884 * return true on success. Can fail if rcache is full and we can't free 885 * space, and free_iova() (our only caller) will then return the IOVA 886 * range to the rbtree instead. 887 */ 888 static bool __iova_rcache_insert(struct iova_domain *iovad, 889 struct iova_rcache *rcache, 890 unsigned long iova_pfn) 891 { 892 struct iova_magazine *mag_to_free = NULL; 893 struct iova_cpu_rcache *cpu_rcache; 894 bool can_insert = false; 895 unsigned long flags; 896 897 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 898 spin_lock_irqsave(&cpu_rcache->lock, flags); 899 900 if (!iova_magazine_full(cpu_rcache->loaded)) { 901 can_insert = true; 902 } else if (!iova_magazine_full(cpu_rcache->prev)) { 903 swap(cpu_rcache->prev, cpu_rcache->loaded); 904 can_insert = true; 905 } else { 906 struct iova_magazine *new_mag = iova_magazine_alloc(GFP_ATOMIC); 907 908 if (new_mag) { 909 spin_lock(&rcache->lock); 910 if (rcache->depot_size < MAX_GLOBAL_MAGS) { 911 rcache->depot[rcache->depot_size++] = 912 cpu_rcache->loaded; 913 } else { 914 mag_to_free = cpu_rcache->loaded; 915 } 916 spin_unlock(&rcache->lock); 917 918 cpu_rcache->loaded = new_mag; 919 can_insert = true; 920 } 921 } 922 923 if (can_insert) 924 iova_magazine_push(cpu_rcache->loaded, iova_pfn); 925 926 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 927 928 if (mag_to_free) { 929 iova_magazine_free_pfns(mag_to_free, iovad); 930 iova_magazine_free(mag_to_free); 931 } 932 933 return can_insert; 934 } 935 936 static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, 937 unsigned long size) 938 { 939 unsigned int log_size = order_base_2(size); 940 941 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 942 return false; 943 944 return __iova_rcache_insert(iovad, &iovad->rcaches[log_size], pfn); 945 } 946 947 /* 948 * Caller wants to allocate a new IOVA range from 'rcache'. If we can 949 * satisfy the request, return a matching non-NULL range and remove 950 * it from the 'rcache'. 951 */ 952 static unsigned long __iova_rcache_get(struct iova_rcache *rcache, 953 unsigned long limit_pfn) 954 { 955 struct iova_cpu_rcache *cpu_rcache; 956 unsigned long iova_pfn = 0; 957 bool has_pfn = false; 958 unsigned long flags; 959 960 cpu_rcache = raw_cpu_ptr(rcache->cpu_rcaches); 961 spin_lock_irqsave(&cpu_rcache->lock, flags); 962 963 if (!iova_magazine_empty(cpu_rcache->loaded)) { 964 has_pfn = true; 965 } else if (!iova_magazine_empty(cpu_rcache->prev)) { 966 swap(cpu_rcache->prev, cpu_rcache->loaded); 967 has_pfn = true; 968 } else { 969 spin_lock(&rcache->lock); 970 if (rcache->depot_size > 0) { 971 iova_magazine_free(cpu_rcache->loaded); 972 cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; 973 has_pfn = true; 974 } 975 spin_unlock(&rcache->lock); 976 } 977 978 if (has_pfn) 979 iova_pfn = iova_magazine_pop(cpu_rcache->loaded, limit_pfn); 980 981 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 982 983 return iova_pfn; 984 } 985 986 /* 987 * Try to satisfy IOVA allocation range from rcache. Fail if requested 988 * size is too big or the DMA limit we are given isn't satisfied by the 989 * top element in the magazine. 990 */ 991 static unsigned long iova_rcache_get(struct iova_domain *iovad, 992 unsigned long size, 993 unsigned long limit_pfn) 994 { 995 unsigned int log_size = order_base_2(size); 996 997 if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) 998 return 0; 999 1000 return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); 1001 } 1002 1003 /* 1004 * free rcache data structures. 1005 */ 1006 static void free_iova_rcaches(struct iova_domain *iovad) 1007 { 1008 struct iova_rcache *rcache; 1009 struct iova_cpu_rcache *cpu_rcache; 1010 unsigned int cpu; 1011 int i, j; 1012 1013 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1014 rcache = &iovad->rcaches[i]; 1015 for_each_possible_cpu(cpu) { 1016 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1017 iova_magazine_free(cpu_rcache->loaded); 1018 iova_magazine_free(cpu_rcache->prev); 1019 } 1020 free_percpu(rcache->cpu_rcaches); 1021 for (j = 0; j < rcache->depot_size; ++j) 1022 iova_magazine_free(rcache->depot[j]); 1023 } 1024 } 1025 1026 /* 1027 * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) 1028 */ 1029 void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) 1030 { 1031 struct iova_cpu_rcache *cpu_rcache; 1032 struct iova_rcache *rcache; 1033 unsigned long flags; 1034 int i; 1035 1036 for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { 1037 rcache = &iovad->rcaches[i]; 1038 cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); 1039 spin_lock_irqsave(&cpu_rcache->lock, flags); 1040 iova_magazine_free_pfns(cpu_rcache->loaded, iovad); 1041 iova_magazine_free_pfns(cpu_rcache->prev, iovad); 1042 spin_unlock_irqrestore(&cpu_rcache->lock, flags); 1043 } 1044 } 1045 1046 MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>"); 1047 MODULE_LICENSE("GPL"); 1048