xref: /linux/drivers/infiniband/hw/mthca/mthca_mr.c (revision d67b569f5f620c0fb95d5212642746b7ba9d29e4)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
33  */
34 
35 #include <linux/slab.h>
36 #include <linux/init.h>
37 #include <linux/errno.h>
38 
39 #include "mthca_dev.h"
40 #include "mthca_cmd.h"
41 #include "mthca_memfree.h"
42 
43 struct mthca_mtt {
44 	struct mthca_buddy *buddy;
45 	int                 order;
46 	u32                 first_seg;
47 };
48 
49 /*
50  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
51  */
52 struct mthca_mpt_entry {
53 	u32 flags;
54 	u32 page_size;
55 	u32 key;
56 	u32 pd;
57 	u64 start;
58 	u64 length;
59 	u32 lkey;
60 	u32 window_count;
61 	u32 window_count_limit;
62 	u64 mtt_seg;
63 	u32 mtt_sz;		/* Arbel only */
64 	u32 reserved[2];
65 } __attribute__((packed));
66 
67 #define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
68 #define MTHCA_MPT_FLAG_MIO           (1 << 17)
69 #define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
70 #define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
71 #define MTHCA_MPT_FLAG_REGION        (1 <<  8)
72 
73 #define MTHCA_MTT_FLAG_PRESENT       1
74 
75 #define MTHCA_MPT_STATUS_SW 0xF0
76 #define MTHCA_MPT_STATUS_HW 0x00
77 
78 /*
79  * Buddy allocator for MTT segments (currently not very efficient
80  * since it doesn't keep a free list and just searches linearly
81  * through the bitmaps)
82  */
83 
84 static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
85 {
86 	int o;
87 	int m;
88 	u32 seg;
89 
90 	spin_lock(&buddy->lock);
91 
92 	for (o = order; o <= buddy->max_order; ++o) {
93 		m = 1 << (buddy->max_order - o);
94 		seg = find_first_bit(buddy->bits[o], m);
95 		if (seg < m)
96 			goto found;
97 	}
98 
99 	spin_unlock(&buddy->lock);
100 	return -1;
101 
102  found:
103 	clear_bit(seg, buddy->bits[o]);
104 
105 	while (o > order) {
106 		--o;
107 		seg <<= 1;
108 		set_bit(seg ^ 1, buddy->bits[o]);
109 	}
110 
111 	spin_unlock(&buddy->lock);
112 
113 	seg <<= order;
114 
115 	return seg;
116 }
117 
118 static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
119 {
120 	seg >>= order;
121 
122 	spin_lock(&buddy->lock);
123 
124 	while (test_bit(seg ^ 1, buddy->bits[order])) {
125 		clear_bit(seg ^ 1, buddy->bits[order]);
126 		seg >>= 1;
127 		++order;
128 	}
129 
130 	set_bit(seg, buddy->bits[order]);
131 
132 	spin_unlock(&buddy->lock);
133 }
134 
135 static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
136 {
137 	int i, s;
138 
139 	buddy->max_order = max_order;
140 	spin_lock_init(&buddy->lock);
141 
142 	buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *),
143 			      GFP_KERNEL);
144 	if (!buddy->bits)
145 		goto err_out;
146 
147 	memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *));
148 
149 	for (i = 0; i <= buddy->max_order; ++i) {
150 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
151 		buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
152 		if (!buddy->bits[i])
153 			goto err_out_free;
154 		bitmap_zero(buddy->bits[i],
155 			    1 << (buddy->max_order - i));
156 	}
157 
158 	set_bit(0, buddy->bits[buddy->max_order]);
159 
160 	return 0;
161 
162 err_out_free:
163 	for (i = 0; i <= buddy->max_order; ++i)
164 		kfree(buddy->bits[i]);
165 
166 	kfree(buddy->bits);
167 
168 err_out:
169 	return -ENOMEM;
170 }
171 
172 static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
173 {
174 	int i;
175 
176 	for (i = 0; i <= buddy->max_order; ++i)
177 		kfree(buddy->bits[i]);
178 
179 	kfree(buddy->bits);
180 }
181 
182 static u32 mthca_alloc_mtt_range(struct mthca_dev *dev, int order,
183 				 struct mthca_buddy *buddy)
184 {
185 	u32 seg = mthca_buddy_alloc(buddy, order);
186 
187 	if (seg == -1)
188 		return -1;
189 
190 	if (mthca_is_memfree(dev))
191 		if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
192 					  seg + (1 << order) - 1)) {
193 			mthca_buddy_free(buddy, seg, order);
194 			seg = -1;
195 		}
196 
197 	return seg;
198 }
199 
200 static struct mthca_mtt *__mthca_alloc_mtt(struct mthca_dev *dev, int size,
201 					   struct mthca_buddy *buddy)
202 {
203 	struct mthca_mtt *mtt;
204 	int i;
205 
206 	if (size <= 0)
207 		return ERR_PTR(-EINVAL);
208 
209 	mtt = kmalloc(sizeof *mtt, GFP_KERNEL);
210 	if (!mtt)
211 		return ERR_PTR(-ENOMEM);
212 
213 	mtt->buddy = buddy;
214 	mtt->order = 0;
215 	for (i = MTHCA_MTT_SEG_SIZE / 8; i < size; i <<= 1)
216 		++mtt->order;
217 
218 	mtt->first_seg = mthca_alloc_mtt_range(dev, mtt->order, buddy);
219 	if (mtt->first_seg == -1) {
220 		kfree(mtt);
221 		return ERR_PTR(-ENOMEM);
222 	}
223 
224 	return mtt;
225 }
226 
227 struct mthca_mtt *mthca_alloc_mtt(struct mthca_dev *dev, int size)
228 {
229 	return __mthca_alloc_mtt(dev, size, &dev->mr_table.mtt_buddy);
230 }
231 
232 void mthca_free_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt)
233 {
234 	if (!mtt)
235 		return;
236 
237 	mthca_buddy_free(mtt->buddy, mtt->first_seg, mtt->order);
238 
239 	mthca_table_put_range(dev, dev->mr_table.mtt_table,
240 			      mtt->first_seg,
241 			      mtt->first_seg + (1 << mtt->order) - 1);
242 
243 	kfree(mtt);
244 }
245 
246 int mthca_write_mtt(struct mthca_dev *dev, struct mthca_mtt *mtt,
247 		    int start_index, u64 *buffer_list, int list_len)
248 {
249 	struct mthca_mailbox *mailbox;
250 	u64 *mtt_entry;
251 	int err = 0;
252 	u8 status;
253 	int i;
254 
255 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
256 	if (IS_ERR(mailbox))
257 		return PTR_ERR(mailbox);
258 	mtt_entry = mailbox->buf;
259 
260 	while (list_len > 0) {
261 		mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
262 					   mtt->first_seg * MTHCA_MTT_SEG_SIZE +
263 					   start_index * 8);
264 		mtt_entry[1] = 0;
265 		for (i = 0; i < list_len && i < MTHCA_MAILBOX_SIZE / 8 - 2; ++i)
266 			mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
267 						       MTHCA_MTT_FLAG_PRESENT);
268 
269 		/*
270 		 * If we have an odd number of entries to write, add
271 		 * one more dummy entry for firmware efficiency.
272 		 */
273 		if (i & 1)
274 			mtt_entry[i + 2] = 0;
275 
276 		err = mthca_WRITE_MTT(dev, mailbox, (i + 1) & ~1, &status);
277 		if (err) {
278 			mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
279 			goto out;
280 		}
281 		if (status) {
282 			mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
283 				   status);
284 			err = -EINVAL;
285 			goto out;
286 		}
287 
288 		list_len    -= i;
289 		start_index += i;
290 		buffer_list += i;
291 	}
292 
293 out:
294 	mthca_free_mailbox(dev, mailbox);
295 	return err;
296 }
297 
298 static inline u32 tavor_hw_index_to_key(u32 ind)
299 {
300 	return ind;
301 }
302 
303 static inline u32 tavor_key_to_hw_index(u32 key)
304 {
305 	return key;
306 }
307 
308 static inline u32 arbel_hw_index_to_key(u32 ind)
309 {
310 	return (ind >> 24) | (ind << 8);
311 }
312 
313 static inline u32 arbel_key_to_hw_index(u32 key)
314 {
315 	return (key << 24) | (key >> 8);
316 }
317 
318 static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
319 {
320 	if (mthca_is_memfree(dev))
321 		return arbel_hw_index_to_key(ind);
322 	else
323 		return tavor_hw_index_to_key(ind);
324 }
325 
326 static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
327 {
328 	if (mthca_is_memfree(dev))
329 		return arbel_key_to_hw_index(key);
330 	else
331 		return tavor_key_to_hw_index(key);
332 }
333 
334 int mthca_mr_alloc(struct mthca_dev *dev, u32 pd, int buffer_size_shift,
335 		   u64 iova, u64 total_size, u32 access, struct mthca_mr *mr)
336 {
337 	struct mthca_mailbox *mailbox;
338 	struct mthca_mpt_entry *mpt_entry;
339 	u32 key;
340 	int i;
341 	int err;
342 	u8 status;
343 
344 	might_sleep();
345 
346 	WARN_ON(buffer_size_shift >= 32);
347 
348 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
349 	if (key == -1)
350 		return -ENOMEM;
351 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
352 
353 	if (mthca_is_memfree(dev)) {
354 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
355 		if (err)
356 			goto err_out_mpt_free;
357 	}
358 
359 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
360 	if (IS_ERR(mailbox)) {
361 		err = PTR_ERR(mailbox);
362 		goto err_out_table;
363 	}
364 	mpt_entry = mailbox->buf;
365 
366 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
367 				       MTHCA_MPT_FLAG_MIO         |
368 				       MTHCA_MPT_FLAG_REGION      |
369 				       access);
370 	if (!mr->mtt)
371 		mpt_entry->flags |= cpu_to_be32(MTHCA_MPT_FLAG_PHYSICAL);
372 
373 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
374 	mpt_entry->key       = cpu_to_be32(key);
375 	mpt_entry->pd        = cpu_to_be32(pd);
376 	mpt_entry->start     = cpu_to_be64(iova);
377 	mpt_entry->length    = cpu_to_be64(total_size);
378 
379 	memset(&mpt_entry->lkey, 0,
380 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
381 
382 	if (mr->mtt)
383 		mpt_entry->mtt_seg =
384 			cpu_to_be64(dev->mr_table.mtt_base +
385 				    mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE);
386 
387 	if (0) {
388 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
389 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
390 			if (i % 4 == 0)
391 				printk("[%02x] ", i * 4);
392 			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
393 			if ((i + 1) % 4 == 0)
394 				printk("\n");
395 		}
396 	}
397 
398 	err = mthca_SW2HW_MPT(dev, mailbox,
399 			      key & (dev->limits.num_mpts - 1),
400 			      &status);
401 	if (err) {
402 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
403 		goto err_out_mailbox;
404 	} else if (status) {
405 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
406 			   status);
407 		err = -EINVAL;
408 		goto err_out_mailbox;
409 	}
410 
411 	mthca_free_mailbox(dev, mailbox);
412 	return err;
413 
414 err_out_mailbox:
415 	mthca_free_mailbox(dev, mailbox);
416 
417 err_out_table:
418 	mthca_table_put(dev, dev->mr_table.mpt_table, key);
419 
420 err_out_mpt_free:
421 	mthca_free(&dev->mr_table.mpt_alloc, key);
422 	return err;
423 }
424 
425 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
426 			   u32 access, struct mthca_mr *mr)
427 {
428 	mr->mtt = NULL;
429 	return mthca_mr_alloc(dev, pd, 12, 0, ~0ULL, access, mr);
430 }
431 
432 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
433 			u64 *buffer_list, int buffer_size_shift,
434 			int list_len, u64 iova, u64 total_size,
435 			u32 access, struct mthca_mr *mr)
436 {
437 	int err;
438 
439 	mr->mtt = mthca_alloc_mtt(dev, list_len);
440 	if (IS_ERR(mr->mtt))
441 		return PTR_ERR(mr->mtt);
442 
443 	err = mthca_write_mtt(dev, mr->mtt, 0, buffer_list, list_len);
444 	if (err) {
445 		mthca_free_mtt(dev, mr->mtt);
446 		return err;
447 	}
448 
449 	err = mthca_mr_alloc(dev, pd, buffer_size_shift, iova,
450 			     total_size, access, mr);
451 	if (err)
452 		mthca_free_mtt(dev, mr->mtt);
453 
454 	return err;
455 }
456 
457 /* Free mr or fmr */
458 static void mthca_free_region(struct mthca_dev *dev, u32 lkey)
459 {
460 	mthca_table_put(dev, dev->mr_table.mpt_table,
461 			arbel_key_to_hw_index(lkey));
462 
463 	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
464 }
465 
466 void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
467 {
468 	int err;
469 	u8 status;
470 
471 	might_sleep();
472 
473 	err = mthca_HW2SW_MPT(dev, NULL,
474 			      key_to_hw_index(dev, mr->ibmr.lkey) &
475 			      (dev->limits.num_mpts - 1),
476 			      &status);
477 	if (err)
478 		mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
479 	else if (status)
480 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
481 			   status);
482 
483 	mthca_free_region(dev, mr->ibmr.lkey);
484 	mthca_free_mtt(dev, mr->mtt);
485 }
486 
487 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
488 		    u32 access, struct mthca_fmr *mr)
489 {
490 	struct mthca_mpt_entry *mpt_entry;
491 	struct mthca_mailbox *mailbox;
492 	u64 mtt_seg;
493 	u32 key, idx;
494 	u8 status;
495 	int list_len = mr->attr.max_pages;
496 	int err = -ENOMEM;
497 	int i;
498 
499 	might_sleep();
500 
501 	if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
502 		return -EINVAL;
503 
504 	/* For Arbel, all MTTs must fit in the same page. */
505 	if (mthca_is_memfree(dev) &&
506 	    mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
507 		return -EINVAL;
508 
509 	mr->maps = 0;
510 
511 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
512 	if (key == -1)
513 		return -ENOMEM;
514 
515 	idx = key & (dev->limits.num_mpts - 1);
516 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
517 
518 	if (mthca_is_memfree(dev)) {
519 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
520 		if (err)
521 			goto err_out_mpt_free;
522 
523 		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
524 		BUG_ON(!mr->mem.arbel.mpt);
525 	} else
526 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
527 		       	sizeof *(mr->mem.tavor.mpt) * idx;
528 
529 	mr->mtt = __mthca_alloc_mtt(dev, list_len, dev->mr_table.fmr_mtt_buddy);
530 	if (IS_ERR(mr->mtt))
531 		goto err_out_table;
532 
533 	mtt_seg = mr->mtt->first_seg * MTHCA_MTT_SEG_SIZE;
534 
535 	if (mthca_is_memfree(dev)) {
536 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
537 						      mr->mtt->first_seg);
538 		BUG_ON(!mr->mem.arbel.mtts);
539 	} else
540 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
541 
542 	mailbox = mthca_alloc_mailbox(dev, GFP_KERNEL);
543 	if (IS_ERR(mailbox))
544 		goto err_out_free_mtt;
545 
546 	mpt_entry = mailbox->buf;
547 
548 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
549 				       MTHCA_MPT_FLAG_MIO         |
550 				       MTHCA_MPT_FLAG_REGION      |
551 				       access);
552 
553 	mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
554 	mpt_entry->key       = cpu_to_be32(key);
555 	mpt_entry->pd        = cpu_to_be32(pd);
556 	memset(&mpt_entry->start, 0,
557 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
558 	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
559 
560 	if (0) {
561 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
562 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
563 			if (i % 4 == 0)
564 				printk("[%02x] ", i * 4);
565 			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
566 			if ((i + 1) % 4 == 0)
567 				printk("\n");
568 		}
569 	}
570 
571 	err = mthca_SW2HW_MPT(dev, mailbox,
572 			      key & (dev->limits.num_mpts - 1),
573 			      &status);
574 	if (err) {
575 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
576 		goto err_out_mailbox_free;
577 	}
578 	if (status) {
579 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
580 			   status);
581 		err = -EINVAL;
582 		goto err_out_mailbox_free;
583 	}
584 
585 	mthca_free_mailbox(dev, mailbox);
586 	return 0;
587 
588 err_out_mailbox_free:
589 	mthca_free_mailbox(dev, mailbox);
590 
591 err_out_free_mtt:
592 	mthca_free_mtt(dev, mr->mtt);
593 
594 err_out_table:
595 	mthca_table_put(dev, dev->mr_table.mpt_table, key);
596 
597 err_out_mpt_free:
598 	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
599 	return err;
600 }
601 
602 int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
603 {
604 	if (fmr->maps)
605 		return -EBUSY;
606 
607 	mthca_free_region(dev, fmr->ibmr.lkey);
608 	mthca_free_mtt(dev, fmr->mtt);
609 
610 	return 0;
611 }
612 
613 static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
614 				  int list_len, u64 iova)
615 {
616 	int i, page_mask;
617 
618 	if (list_len > fmr->attr.max_pages)
619 		return -EINVAL;
620 
621 	page_mask = (1 << fmr->attr.page_size) - 1;
622 
623 	/* We are getting page lists, so va must be page aligned. */
624 	if (iova & page_mask)
625 		return -EINVAL;
626 
627 	/* Trust the user not to pass misaligned data in page_list */
628 	if (0)
629 		for (i = 0; i < list_len; ++i) {
630 			if (page_list[i] & ~page_mask)
631 				return -EINVAL;
632 		}
633 
634 	if (fmr->maps >= fmr->attr.max_maps)
635 		return -EINVAL;
636 
637 	return 0;
638 }
639 
640 
641 int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
642 			     int list_len, u64 iova)
643 {
644 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
645 	struct mthca_dev *dev = to_mdev(ibfmr->device);
646 	struct mthca_mpt_entry mpt_entry;
647 	u32 key;
648 	int i, err;
649 
650 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
651 	if (err)
652 		return err;
653 
654 	++fmr->maps;
655 
656 	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
657 	key += dev->limits.num_mpts;
658 	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
659 
660 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
661 
662 	for (i = 0; i < list_len; ++i) {
663 		__be64 mtt_entry = cpu_to_be64(page_list[i] |
664 					       MTHCA_MTT_FLAG_PRESENT);
665 		mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
666 	}
667 
668 	mpt_entry.lkey   = cpu_to_be32(key);
669 	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
670 	mpt_entry.start  = cpu_to_be64(iova);
671 
672 	writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
673 	memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
674 		    offsetof(struct mthca_mpt_entry, window_count) -
675 		    offsetof(struct mthca_mpt_entry, start));
676 
677 	writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
678 
679 	return 0;
680 }
681 
682 int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
683 			     int list_len, u64 iova)
684 {
685 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
686 	struct mthca_dev *dev = to_mdev(ibfmr->device);
687 	u32 key;
688 	int i, err;
689 
690 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
691 	if (err)
692 		return err;
693 
694 	++fmr->maps;
695 
696 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
697 	key += dev->limits.num_mpts;
698 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
699 
700 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
701 
702 	wmb();
703 
704 	for (i = 0; i < list_len; ++i)
705 		fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
706 						     MTHCA_MTT_FLAG_PRESENT);
707 
708 	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
709 	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
710 	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
711 	fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
712 
713 	wmb();
714 
715 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
716 
717 	wmb();
718 
719 	return 0;
720 }
721 
722 void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
723 {
724 	u32 key;
725 
726 	if (!fmr->maps)
727 		return;
728 
729 	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
730 	key &= dev->limits.num_mpts - 1;
731 	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
732 
733 	fmr->maps = 0;
734 
735 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
736 }
737 
738 void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
739 {
740 	u32 key;
741 
742 	if (!fmr->maps)
743 		return;
744 
745 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
746 	key &= dev->limits.num_mpts - 1;
747 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
748 
749 	fmr->maps = 0;
750 
751 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
752 }
753 
754 int __devinit mthca_init_mr_table(struct mthca_dev *dev)
755 {
756 	int err, i;
757 
758 	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
759 			       dev->limits.num_mpts,
760 			       ~0, dev->limits.reserved_mrws);
761 	if (err)
762 		return err;
763 
764 	if (!mthca_is_memfree(dev) &&
765 	    (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
766 		dev->limits.fmr_reserved_mtts = 0;
767 	else
768 		dev->mthca_flags |= MTHCA_FLAG_FMR;
769 
770 	err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
771 			       fls(dev->limits.num_mtt_segs - 1));
772 
773 	if (err)
774 		goto err_mtt_buddy;
775 
776 	dev->mr_table.tavor_fmr.mpt_base = NULL;
777 	dev->mr_table.tavor_fmr.mtt_base = NULL;
778 
779 	if (dev->limits.fmr_reserved_mtts) {
780 		i = fls(dev->limits.fmr_reserved_mtts - 1);
781 
782 		if (i >= 31) {
783 			mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
784 			err = -EINVAL;
785 			goto err_fmr_mpt;
786 		}
787 
788 		dev->mr_table.tavor_fmr.mpt_base =
789 		       	ioremap(dev->mr_table.mpt_base,
790 				(1 << i) * sizeof (struct mthca_mpt_entry));
791 
792 		if (!dev->mr_table.tavor_fmr.mpt_base) {
793 			mthca_warn(dev, "MPT ioremap for FMR failed.\n");
794 			err = -ENOMEM;
795 			goto err_fmr_mpt;
796 		}
797 
798 		dev->mr_table.tavor_fmr.mtt_base =
799 			ioremap(dev->mr_table.mtt_base,
800 				(1 << i) * MTHCA_MTT_SEG_SIZE);
801 		if (!dev->mr_table.tavor_fmr.mtt_base) {
802 			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
803 			err = -ENOMEM;
804 			goto err_fmr_mtt;
805 		}
806 
807 		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
808 		if (err)
809 			goto err_fmr_mtt_buddy;
810 
811 		/* Prevent regular MRs from using FMR keys */
812 		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
813 		if (err)
814 			goto err_reserve_fmr;
815 
816 		dev->mr_table.fmr_mtt_buddy =
817 		       	&dev->mr_table.tavor_fmr.mtt_buddy;
818 	} else
819 		dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
820 
821 	/* FMR table is always the first, take reserved MTTs out of there */
822 	if (dev->limits.reserved_mtts) {
823 		i = fls(dev->limits.reserved_mtts - 1);
824 
825 		if (mthca_alloc_mtt_range(dev, i,
826 					  dev->mr_table.fmr_mtt_buddy) == -1) {
827 			mthca_warn(dev, "MTT table of order %d is too small.\n",
828 				  dev->mr_table.fmr_mtt_buddy->max_order);
829 			err = -ENOMEM;
830 			goto err_reserve_mtts;
831 		}
832 	}
833 
834 	return 0;
835 
836 err_reserve_mtts:
837 err_reserve_fmr:
838 	if (dev->limits.fmr_reserved_mtts)
839 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
840 
841 err_fmr_mtt_buddy:
842 	if (dev->mr_table.tavor_fmr.mtt_base)
843 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
844 
845 err_fmr_mtt:
846 	if (dev->mr_table.tavor_fmr.mpt_base)
847 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
848 
849 err_fmr_mpt:
850 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
851 
852 err_mtt_buddy:
853 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
854 
855 	return err;
856 }
857 
858 void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
859 {
860 	/* XXX check if any MRs are still allocated? */
861 	if (dev->limits.fmr_reserved_mtts)
862 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
863 
864 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
865 
866 	if (dev->mr_table.tavor_fmr.mtt_base)
867 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
868 	if (dev->mr_table.tavor_fmr.mpt_base)
869 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
870 
871 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
872 }
873