xref: /linux/drivers/infiniband/hw/mthca/mthca_mr.c (revision 776cfebb430c7b22c208b1b17add97f354d97cab)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  *
32  * $Id: mthca_mr.c 1349 2004-12-16 21:09:43Z roland $
33  */
34 
35 #include <linux/slab.h>
36 #include <linux/init.h>
37 #include <linux/errno.h>
38 
39 #include "mthca_dev.h"
40 #include "mthca_cmd.h"
41 #include "mthca_memfree.h"
42 
43 /*
44  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
45  */
46 struct mthca_mpt_entry {
47 	u32 flags;
48 	u32 page_size;
49 	u32 key;
50 	u32 pd;
51 	u64 start;
52 	u64 length;
53 	u32 lkey;
54 	u32 window_count;
55 	u32 window_count_limit;
56 	u64 mtt_seg;
57 	u32 mtt_sz;		/* Arbel only */
58 	u32 reserved[2];
59 } __attribute__((packed));
60 
61 #define MTHCA_MPT_FLAG_SW_OWNS       (0xfUL << 28)
62 #define MTHCA_MPT_FLAG_MIO           (1 << 17)
63 #define MTHCA_MPT_FLAG_BIND_ENABLE   (1 << 15)
64 #define MTHCA_MPT_FLAG_PHYSICAL      (1 <<  9)
65 #define MTHCA_MPT_FLAG_REGION        (1 <<  8)
66 
67 #define MTHCA_MTT_FLAG_PRESENT       1
68 
69 #define MTHCA_MPT_STATUS_SW 0xF0
70 #define MTHCA_MPT_STATUS_HW 0x00
71 
72 /*
73  * Buddy allocator for MTT segments (currently not very efficient
74  * since it doesn't keep a free list and just searches linearly
75  * through the bitmaps)
76  */
77 
78 static u32 mthca_buddy_alloc(struct mthca_buddy *buddy, int order)
79 {
80 	int o;
81 	int m;
82 	u32 seg;
83 
84 	spin_lock(&buddy->lock);
85 
86 	for (o = order; o <= buddy->max_order; ++o) {
87 		m = 1 << (buddy->max_order - o);
88 		seg = find_first_bit(buddy->bits[o], m);
89 		if (seg < m)
90 			goto found;
91 	}
92 
93 	spin_unlock(&buddy->lock);
94 	return -1;
95 
96  found:
97 	clear_bit(seg, buddy->bits[o]);
98 
99 	while (o > order) {
100 		--o;
101 		seg <<= 1;
102 		set_bit(seg ^ 1, buddy->bits[o]);
103 	}
104 
105 	spin_unlock(&buddy->lock);
106 
107 	seg <<= order;
108 
109 	return seg;
110 }
111 
112 static void mthca_buddy_free(struct mthca_buddy *buddy, u32 seg, int order)
113 {
114 	seg >>= order;
115 
116 	spin_lock(&buddy->lock);
117 
118 	while (test_bit(seg ^ 1, buddy->bits[order])) {
119 		clear_bit(seg ^ 1, buddy->bits[order]);
120 		seg >>= 1;
121 		++order;
122 	}
123 
124 	set_bit(seg, buddy->bits[order]);
125 
126 	spin_unlock(&buddy->lock);
127 }
128 
129 static int __devinit mthca_buddy_init(struct mthca_buddy *buddy, int max_order)
130 {
131 	int i, s;
132 
133 	buddy->max_order = max_order;
134 	spin_lock_init(&buddy->lock);
135 
136 	buddy->bits = kmalloc((buddy->max_order + 1) * sizeof (long *),
137 			      GFP_KERNEL);
138 	if (!buddy->bits)
139 		goto err_out;
140 
141 	memset(buddy->bits, 0, (buddy->max_order + 1) * sizeof (long *));
142 
143 	for (i = 0; i <= buddy->max_order; ++i) {
144 		s = BITS_TO_LONGS(1 << (buddy->max_order - i));
145 		buddy->bits[i] = kmalloc(s * sizeof (long), GFP_KERNEL);
146 		if (!buddy->bits[i])
147 			goto err_out_free;
148 		bitmap_zero(buddy->bits[i],
149 			    1 << (buddy->max_order - i));
150 	}
151 
152 	set_bit(0, buddy->bits[buddy->max_order]);
153 
154 	return 0;
155 
156 err_out_free:
157 	for (i = 0; i <= buddy->max_order; ++i)
158 		kfree(buddy->bits[i]);
159 
160 	kfree(buddy->bits);
161 
162 err_out:
163 	return -ENOMEM;
164 }
165 
166 static void __devexit mthca_buddy_cleanup(struct mthca_buddy *buddy)
167 {
168 	int i;
169 
170 	for (i = 0; i <= buddy->max_order; ++i)
171 		kfree(buddy->bits[i]);
172 
173 	kfree(buddy->bits);
174 }
175 
176 static u32 mthca_alloc_mtt(struct mthca_dev *dev, int order,
177 			   struct mthca_buddy *buddy)
178 {
179 	u32 seg = mthca_buddy_alloc(buddy, order);
180 
181 	if (seg == -1)
182 		return -1;
183 
184 	if (mthca_is_memfree(dev))
185 		if (mthca_table_get_range(dev, dev->mr_table.mtt_table, seg,
186 					  seg + (1 << order) - 1)) {
187 			mthca_buddy_free(buddy, seg, order);
188 			seg = -1;
189 		}
190 
191 	return seg;
192 }
193 
194 static void mthca_free_mtt(struct mthca_dev *dev, u32 seg, int order,
195 			   struct mthca_buddy* buddy)
196 {
197 	mthca_buddy_free(buddy, seg, order);
198 
199 	if (mthca_is_memfree(dev))
200 		mthca_table_put_range(dev, dev->mr_table.mtt_table, seg,
201 				      seg + (1 << order) - 1);
202 }
203 
204 static inline u32 tavor_hw_index_to_key(u32 ind)
205 {
206 	return ind;
207 }
208 
209 static inline u32 tavor_key_to_hw_index(u32 key)
210 {
211 	return key;
212 }
213 
214 static inline u32 arbel_hw_index_to_key(u32 ind)
215 {
216 	return (ind >> 24) | (ind << 8);
217 }
218 
219 static inline u32 arbel_key_to_hw_index(u32 key)
220 {
221 	return (key << 24) | (key >> 8);
222 }
223 
224 static inline u32 hw_index_to_key(struct mthca_dev *dev, u32 ind)
225 {
226 	if (mthca_is_memfree(dev))
227 		return arbel_hw_index_to_key(ind);
228 	else
229 		return tavor_hw_index_to_key(ind);
230 }
231 
232 static inline u32 key_to_hw_index(struct mthca_dev *dev, u32 key)
233 {
234 	if (mthca_is_memfree(dev))
235 		return arbel_key_to_hw_index(key);
236 	else
237 		return tavor_key_to_hw_index(key);
238 }
239 
240 int mthca_mr_alloc_notrans(struct mthca_dev *dev, u32 pd,
241 			   u32 access, struct mthca_mr *mr)
242 {
243 	void *mailbox = NULL;
244 	struct mthca_mpt_entry *mpt_entry;
245 	u32 key;
246 	int err;
247 	u8 status;
248 
249 	might_sleep();
250 
251 	mr->order = -1;
252 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
253 	if (key == -1)
254 		return -ENOMEM;
255 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
256 
257 	if (mthca_is_memfree(dev)) {
258 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
259 		if (err)
260 			goto err_out_mpt_free;
261 	}
262 
263 	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
264 			  GFP_KERNEL);
265 	if (!mailbox) {
266 		err = -ENOMEM;
267 		goto err_out_table;
268 	}
269 	mpt_entry = MAILBOX_ALIGN(mailbox);
270 
271 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
272 				       MTHCA_MPT_FLAG_MIO         |
273 				       MTHCA_MPT_FLAG_PHYSICAL    |
274 				       MTHCA_MPT_FLAG_REGION      |
275 				       access);
276 	mpt_entry->page_size = 0;
277 	mpt_entry->key       = cpu_to_be32(key);
278 	mpt_entry->pd        = cpu_to_be32(pd);
279 	mpt_entry->start     = 0;
280 	mpt_entry->length    = ~0ULL;
281 
282 	memset(&mpt_entry->lkey, 0,
283 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
284 
285 	err = mthca_SW2HW_MPT(dev, mpt_entry,
286 			      key & (dev->limits.num_mpts - 1),
287 			      &status);
288 	if (err) {
289 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
290 		goto err_out_table;
291 	} else if (status) {
292 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
293 			   status);
294 		err = -EINVAL;
295 		goto err_out_table;
296 	}
297 
298 	kfree(mailbox);
299 	return err;
300 
301 err_out_table:
302 	if (mthca_is_memfree(dev))
303 		mthca_table_put(dev, dev->mr_table.mpt_table, key);
304 
305 err_out_mpt_free:
306 	mthca_free(&dev->mr_table.mpt_alloc, key);
307 	kfree(mailbox);
308 	return err;
309 }
310 
311 int mthca_mr_alloc_phys(struct mthca_dev *dev, u32 pd,
312 			u64 *buffer_list, int buffer_size_shift,
313 			int list_len, u64 iova, u64 total_size,
314 			u32 access, struct mthca_mr *mr)
315 {
316 	void *mailbox;
317 	u64 *mtt_entry;
318 	struct mthca_mpt_entry *mpt_entry;
319 	u32 key;
320 	int err = -ENOMEM;
321 	u8 status;
322 	int i;
323 
324 	might_sleep();
325 	WARN_ON(buffer_size_shift >= 32);
326 
327 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
328 	if (key == -1)
329 		return -ENOMEM;
330 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
331 
332 	if (mthca_is_memfree(dev)) {
333 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
334 		if (err)
335 			goto err_out_mpt_free;
336 	}
337 
338 	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
339 	     i < list_len;
340 	     i <<= 1, ++mr->order)
341 		; /* nothing */
342 
343 	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
344 				       	&dev->mr_table.mtt_buddy);
345 	if (mr->first_seg == -1)
346 		goto err_out_table;
347 
348 	/*
349 	 * If list_len is odd, we add one more dummy entry for
350 	 * firmware efficiency.
351 	 */
352 	mailbox = kmalloc(max(sizeof *mpt_entry,
353 			      (size_t) 8 * (list_len + (list_len & 1) + 2)) +
354 			  MTHCA_CMD_MAILBOX_EXTRA,
355 			  GFP_KERNEL);
356 	if (!mailbox)
357 		goto err_out_free_mtt;
358 
359 	mtt_entry = MAILBOX_ALIGN(mailbox);
360 
361 	mtt_entry[0] = cpu_to_be64(dev->mr_table.mtt_base +
362 				   mr->first_seg * MTHCA_MTT_SEG_SIZE);
363 	mtt_entry[1] = 0;
364 	for (i = 0; i < list_len; ++i)
365 		mtt_entry[i + 2] = cpu_to_be64(buffer_list[i] |
366 					       MTHCA_MTT_FLAG_PRESENT);
367 	if (list_len & 1) {
368 		mtt_entry[i + 2] = 0;
369 		++list_len;
370 	}
371 
372 	if (0) {
373 		mthca_dbg(dev, "Dumping MPT entry\n");
374 		for (i = 0; i < list_len + 2; ++i)
375 			printk(KERN_ERR "[%2d] %016llx\n",
376 			       i, (unsigned long long) be64_to_cpu(mtt_entry[i]));
377 	}
378 
379 	err = mthca_WRITE_MTT(dev, mtt_entry, list_len, &status);
380 	if (err) {
381 		mthca_warn(dev, "WRITE_MTT failed (%d)\n", err);
382 		goto err_out_mailbox_free;
383 	}
384 	if (status) {
385 		mthca_warn(dev, "WRITE_MTT returned status 0x%02x\n",
386 			   status);
387 		err = -EINVAL;
388 		goto err_out_mailbox_free;
389 	}
390 
391 	mpt_entry = MAILBOX_ALIGN(mailbox);
392 
393 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
394 				       MTHCA_MPT_FLAG_MIO         |
395 				       MTHCA_MPT_FLAG_REGION      |
396 				       access);
397 
398 	mpt_entry->page_size = cpu_to_be32(buffer_size_shift - 12);
399 	mpt_entry->key       = cpu_to_be32(key);
400 	mpt_entry->pd        = cpu_to_be32(pd);
401 	mpt_entry->start     = cpu_to_be64(iova);
402 	mpt_entry->length    = cpu_to_be64(total_size);
403 	memset(&mpt_entry->lkey, 0,
404 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, lkey));
405 	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base +
406 					   mr->first_seg * MTHCA_MTT_SEG_SIZE);
407 
408 	if (0) {
409 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
410 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
411 			if (i % 4 == 0)
412 				printk("[%02x] ", i * 4);
413 			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
414 			if ((i + 1) % 4 == 0)
415 				printk("\n");
416 		}
417 	}
418 
419 	err = mthca_SW2HW_MPT(dev, mpt_entry,
420 			      key & (dev->limits.num_mpts - 1),
421 			      &status);
422 	if (err)
423 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
424 	else if (status) {
425 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
426 			   status);
427 		err = -EINVAL;
428 	}
429 
430 	kfree(mailbox);
431 	return err;
432 
433 err_out_mailbox_free:
434 	kfree(mailbox);
435 
436 err_out_free_mtt:
437 	mthca_free_mtt(dev, mr->first_seg, mr->order, &dev->mr_table.mtt_buddy);
438 
439 err_out_table:
440 	if (mthca_is_memfree(dev))
441 		mthca_table_put(dev, dev->mr_table.mpt_table, key);
442 
443 err_out_mpt_free:
444 	mthca_free(&dev->mr_table.mpt_alloc, key);
445 	return err;
446 }
447 
448 /* Free mr or fmr */
449 static void mthca_free_region(struct mthca_dev *dev, u32 lkey, int order,
450 			      u32 first_seg, struct mthca_buddy *buddy)
451 {
452 	if (order >= 0)
453 		mthca_free_mtt(dev, first_seg, order, buddy);
454 
455 	if (mthca_is_memfree(dev))
456 		mthca_table_put(dev, dev->mr_table.mpt_table,
457 				arbel_key_to_hw_index(lkey));
458 
459 	mthca_free(&dev->mr_table.mpt_alloc, key_to_hw_index(dev, lkey));
460 }
461 
462 void mthca_free_mr(struct mthca_dev *dev, struct mthca_mr *mr)
463 {
464 	int err;
465 	u8 status;
466 
467 	might_sleep();
468 
469 	err = mthca_HW2SW_MPT(dev, NULL,
470 			      key_to_hw_index(dev, mr->ibmr.lkey) &
471 			      (dev->limits.num_mpts - 1),
472 			      &status);
473 	if (err)
474 		mthca_warn(dev, "HW2SW_MPT failed (%d)\n", err);
475 	else if (status)
476 		mthca_warn(dev, "HW2SW_MPT returned status 0x%02x\n",
477 			   status);
478 
479 	mthca_free_region(dev, mr->ibmr.lkey, mr->order, mr->first_seg,
480 			  &dev->mr_table.mtt_buddy);
481 }
482 
483 int mthca_fmr_alloc(struct mthca_dev *dev, u32 pd,
484 		    u32 access, struct mthca_fmr *mr)
485 {
486 	struct mthca_mpt_entry *mpt_entry;
487 	void *mailbox;
488 	u64 mtt_seg;
489 	u32 key, idx;
490 	u8 status;
491 	int list_len = mr->attr.max_pages;
492 	int err = -ENOMEM;
493 	int i;
494 
495 	might_sleep();
496 
497 	if (mr->attr.page_size < 12 || mr->attr.page_size >= 32)
498 		return -EINVAL;
499 
500 	/* For Arbel, all MTTs must fit in the same page. */
501 	if (mthca_is_memfree(dev) &&
502 	    mr->attr.max_pages * sizeof *mr->mem.arbel.mtts > PAGE_SIZE)
503 		return -EINVAL;
504 
505 	mr->maps = 0;
506 
507 	key = mthca_alloc(&dev->mr_table.mpt_alloc);
508 	if (key == -1)
509 		return -ENOMEM;
510 
511 	idx = key & (dev->limits.num_mpts - 1);
512 	mr->ibmr.rkey = mr->ibmr.lkey = hw_index_to_key(dev, key);
513 
514 	if (mthca_is_memfree(dev)) {
515 		err = mthca_table_get(dev, dev->mr_table.mpt_table, key);
516 		if (err)
517 			goto err_out_mpt_free;
518 
519 		mr->mem.arbel.mpt = mthca_table_find(dev->mr_table.mpt_table, key);
520 		BUG_ON(!mr->mem.arbel.mpt);
521 	} else
522 		mr->mem.tavor.mpt = dev->mr_table.tavor_fmr.mpt_base +
523 		       	sizeof *(mr->mem.tavor.mpt) * idx;
524 
525 	for (i = MTHCA_MTT_SEG_SIZE / 8, mr->order = 0;
526 	     i < list_len;
527 	     i <<= 1, ++mr->order)
528 		; /* nothing */
529 
530 	mr->first_seg = mthca_alloc_mtt(dev, mr->order,
531 				       	dev->mr_table.fmr_mtt_buddy);
532 	if (mr->first_seg == -1)
533 		goto err_out_table;
534 
535 	mtt_seg = mr->first_seg * MTHCA_MTT_SEG_SIZE;
536 
537 	if (mthca_is_memfree(dev)) {
538 		mr->mem.arbel.mtts = mthca_table_find(dev->mr_table.mtt_table,
539 						      mr->first_seg);
540 		BUG_ON(!mr->mem.arbel.mtts);
541 	} else
542 		mr->mem.tavor.mtts = dev->mr_table.tavor_fmr.mtt_base + mtt_seg;
543 
544 	mailbox = kmalloc(sizeof *mpt_entry + MTHCA_CMD_MAILBOX_EXTRA,
545 			  GFP_KERNEL);
546 	if (!mailbox)
547 		goto err_out_free_mtt;
548 
549 	mpt_entry = MAILBOX_ALIGN(mailbox);
550 
551 	mpt_entry->flags = cpu_to_be32(MTHCA_MPT_FLAG_SW_OWNS     |
552 				       MTHCA_MPT_FLAG_MIO         |
553 				       MTHCA_MPT_FLAG_REGION      |
554 				       access);
555 
556 	mpt_entry->page_size = cpu_to_be32(mr->attr.page_size - 12);
557 	mpt_entry->key       = cpu_to_be32(key);
558 	mpt_entry->pd        = cpu_to_be32(pd);
559 	memset(&mpt_entry->start, 0,
560 	       sizeof *mpt_entry - offsetof(struct mthca_mpt_entry, start));
561 	mpt_entry->mtt_seg   = cpu_to_be64(dev->mr_table.mtt_base + mtt_seg);
562 
563 	if (0) {
564 		mthca_dbg(dev, "Dumping MPT entry %08x:\n", mr->ibmr.lkey);
565 		for (i = 0; i < sizeof (struct mthca_mpt_entry) / 4; ++i) {
566 			if (i % 4 == 0)
567 				printk("[%02x] ", i * 4);
568 			printk(" %08x", be32_to_cpu(((u32 *) mpt_entry)[i]));
569 			if ((i + 1) % 4 == 0)
570 				printk("\n");
571 		}
572 	}
573 
574 	err = mthca_SW2HW_MPT(dev, mpt_entry,
575 			      key & (dev->limits.num_mpts - 1),
576 			      &status);
577 	if (err) {
578 		mthca_warn(dev, "SW2HW_MPT failed (%d)\n", err);
579 		goto err_out_mailbox_free;
580 	}
581 	if (status) {
582 		mthca_warn(dev, "SW2HW_MPT returned status 0x%02x\n",
583 			   status);
584 		err = -EINVAL;
585 		goto err_out_mailbox_free;
586 	}
587 
588 	kfree(mailbox);
589 	return 0;
590 
591 err_out_mailbox_free:
592 	kfree(mailbox);
593 
594 err_out_free_mtt:
595 	mthca_free_mtt(dev, mr->first_seg, mr->order,
596 		       dev->mr_table.fmr_mtt_buddy);
597 
598 err_out_table:
599 	if (mthca_is_memfree(dev))
600 		mthca_table_put(dev, dev->mr_table.mpt_table, key);
601 
602 err_out_mpt_free:
603 	mthca_free(&dev->mr_table.mpt_alloc, mr->ibmr.lkey);
604 	return err;
605 }
606 
607 int mthca_free_fmr(struct mthca_dev *dev, struct mthca_fmr *fmr)
608 {
609 	if (fmr->maps)
610 		return -EBUSY;
611 
612 	mthca_free_region(dev, fmr->ibmr.lkey, fmr->order, fmr->first_seg,
613 			  dev->mr_table.fmr_mtt_buddy);
614 	return 0;
615 }
616 
617 static inline int mthca_check_fmr(struct mthca_fmr *fmr, u64 *page_list,
618 				  int list_len, u64 iova)
619 {
620 	int i, page_mask;
621 
622 	if (list_len > fmr->attr.max_pages)
623 		return -EINVAL;
624 
625 	page_mask = (1 << fmr->attr.page_size) - 1;
626 
627 	/* We are getting page lists, so va must be page aligned. */
628 	if (iova & page_mask)
629 		return -EINVAL;
630 
631 	/* Trust the user not to pass misaligned data in page_list */
632 	if (0)
633 		for (i = 0; i < list_len; ++i) {
634 			if (page_list[i] & ~page_mask)
635 				return -EINVAL;
636 		}
637 
638 	if (fmr->maps >= fmr->attr.max_maps)
639 		return -EINVAL;
640 
641 	return 0;
642 }
643 
644 
645 int mthca_tavor_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
646 			     int list_len, u64 iova)
647 {
648 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
649 	struct mthca_dev *dev = to_mdev(ibfmr->device);
650 	struct mthca_mpt_entry mpt_entry;
651 	u32 key;
652 	int i, err;
653 
654 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
655 	if (err)
656 		return err;
657 
658 	++fmr->maps;
659 
660 	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
661 	key += dev->limits.num_mpts;
662 	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
663 
664 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
665 
666 	for (i = 0; i < list_len; ++i) {
667 		__be64 mtt_entry = cpu_to_be64(page_list[i] |
668 					       MTHCA_MTT_FLAG_PRESENT);
669 		mthca_write64_raw(mtt_entry, fmr->mem.tavor.mtts + i);
670 	}
671 
672 	mpt_entry.lkey   = cpu_to_be32(key);
673 	mpt_entry.length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
674 	mpt_entry.start  = cpu_to_be64(iova);
675 
676 	writel(mpt_entry.lkey, &fmr->mem.tavor.mpt->key);
677 	memcpy_toio(&fmr->mem.tavor.mpt->start, &mpt_entry.start,
678 		    offsetof(struct mthca_mpt_entry, window_count) -
679 		    offsetof(struct mthca_mpt_entry, start));
680 
681 	writeb(MTHCA_MPT_STATUS_HW, fmr->mem.tavor.mpt);
682 
683 	return 0;
684 }
685 
686 int mthca_arbel_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list,
687 			     int list_len, u64 iova)
688 {
689 	struct mthca_fmr *fmr = to_mfmr(ibfmr);
690 	struct mthca_dev *dev = to_mdev(ibfmr->device);
691 	u32 key;
692 	int i, err;
693 
694 	err = mthca_check_fmr(fmr, page_list, list_len, iova);
695 	if (err)
696 		return err;
697 
698 	++fmr->maps;
699 
700 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
701 	key += dev->limits.num_mpts;
702 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
703 
704 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
705 
706 	wmb();
707 
708 	for (i = 0; i < list_len; ++i)
709 		fmr->mem.arbel.mtts[i] = cpu_to_be64(page_list[i] |
710 						     MTHCA_MTT_FLAG_PRESENT);
711 
712 	fmr->mem.arbel.mpt->key    = cpu_to_be32(key);
713 	fmr->mem.arbel.mpt->lkey   = cpu_to_be32(key);
714 	fmr->mem.arbel.mpt->length = cpu_to_be64(list_len * (1ull << fmr->attr.page_size));
715 	fmr->mem.arbel.mpt->start  = cpu_to_be64(iova);
716 
717 	wmb();
718 
719 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_HW;
720 
721 	wmb();
722 
723 	return 0;
724 }
725 
726 void mthca_tavor_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
727 {
728 	u32 key;
729 
730 	if (!fmr->maps)
731 		return;
732 
733 	key = tavor_key_to_hw_index(fmr->ibmr.lkey);
734 	key &= dev->limits.num_mpts - 1;
735 	fmr->ibmr.lkey = fmr->ibmr.rkey = tavor_hw_index_to_key(key);
736 
737 	fmr->maps = 0;
738 
739 	writeb(MTHCA_MPT_STATUS_SW, fmr->mem.tavor.mpt);
740 }
741 
742 void mthca_arbel_fmr_unmap(struct mthca_dev *dev, struct mthca_fmr *fmr)
743 {
744 	u32 key;
745 
746 	if (!fmr->maps)
747 		return;
748 
749 	key = arbel_key_to_hw_index(fmr->ibmr.lkey);
750 	key &= dev->limits.num_mpts - 1;
751 	fmr->ibmr.lkey = fmr->ibmr.rkey = arbel_hw_index_to_key(key);
752 
753 	fmr->maps = 0;
754 
755 	*(u8 *) fmr->mem.arbel.mpt = MTHCA_MPT_STATUS_SW;
756 }
757 
758 int __devinit mthca_init_mr_table(struct mthca_dev *dev)
759 {
760 	int err, i;
761 
762 	err = mthca_alloc_init(&dev->mr_table.mpt_alloc,
763 			       dev->limits.num_mpts,
764 			       ~0, dev->limits.reserved_mrws);
765 	if (err)
766 		return err;
767 
768 	if (!mthca_is_memfree(dev) &&
769 	    (dev->mthca_flags & MTHCA_FLAG_DDR_HIDDEN))
770 		dev->limits.fmr_reserved_mtts = 0;
771 	else
772 		dev->mthca_flags |= MTHCA_FLAG_FMR;
773 
774 	err = mthca_buddy_init(&dev->mr_table.mtt_buddy,
775 			       fls(dev->limits.num_mtt_segs - 1));
776 
777 	if (err)
778 		goto err_mtt_buddy;
779 
780 	dev->mr_table.tavor_fmr.mpt_base = NULL;
781 	dev->mr_table.tavor_fmr.mtt_base = NULL;
782 
783 	if (dev->limits.fmr_reserved_mtts) {
784 		i = fls(dev->limits.fmr_reserved_mtts - 1);
785 
786 		if (i >= 31) {
787 			mthca_warn(dev, "Unable to reserve 2^31 FMR MTTs.\n");
788 			err = -EINVAL;
789 			goto err_fmr_mpt;
790 		}
791 
792 		dev->mr_table.tavor_fmr.mpt_base =
793 		       	ioremap(dev->mr_table.mpt_base,
794 				(1 << i) * sizeof (struct mthca_mpt_entry));
795 
796 		if (!dev->mr_table.tavor_fmr.mpt_base) {
797 			mthca_warn(dev, "MPT ioremap for FMR failed.\n");
798 			err = -ENOMEM;
799 			goto err_fmr_mpt;
800 		}
801 
802 		dev->mr_table.tavor_fmr.mtt_base =
803 			ioremap(dev->mr_table.mtt_base,
804 				(1 << i) * MTHCA_MTT_SEG_SIZE);
805 		if (!dev->mr_table.tavor_fmr.mtt_base) {
806 			mthca_warn(dev, "MTT ioremap for FMR failed.\n");
807 			err = -ENOMEM;
808 			goto err_fmr_mtt;
809 		}
810 
811 		err = mthca_buddy_init(&dev->mr_table.tavor_fmr.mtt_buddy, i);
812 		if (err)
813 			goto err_fmr_mtt_buddy;
814 
815 		/* Prevent regular MRs from using FMR keys */
816 		err = mthca_buddy_alloc(&dev->mr_table.mtt_buddy, i);
817 		if (err)
818 			goto err_reserve_fmr;
819 
820 		dev->mr_table.fmr_mtt_buddy =
821 		       	&dev->mr_table.tavor_fmr.mtt_buddy;
822 	} else
823 		dev->mr_table.fmr_mtt_buddy = &dev->mr_table.mtt_buddy;
824 
825 	/* FMR table is always the first, take reserved MTTs out of there */
826 	if (dev->limits.reserved_mtts) {
827 		i = fls(dev->limits.reserved_mtts - 1);
828 
829 		if (mthca_alloc_mtt(dev, i, dev->mr_table.fmr_mtt_buddy) == -1) {
830 			mthca_warn(dev, "MTT table of order %d is too small.\n",
831 				  dev->mr_table.fmr_mtt_buddy->max_order);
832 			err = -ENOMEM;
833 			goto err_reserve_mtts;
834 		}
835 	}
836 
837 	return 0;
838 
839 err_reserve_mtts:
840 err_reserve_fmr:
841 	if (dev->limits.fmr_reserved_mtts)
842 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
843 
844 err_fmr_mtt_buddy:
845 	if (dev->mr_table.tavor_fmr.mtt_base)
846 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
847 
848 err_fmr_mtt:
849 	if (dev->mr_table.tavor_fmr.mpt_base)
850 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
851 
852 err_fmr_mpt:
853 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
854 
855 err_mtt_buddy:
856 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
857 
858 	return err;
859 }
860 
861 void __devexit mthca_cleanup_mr_table(struct mthca_dev *dev)
862 {
863 	/* XXX check if any MRs are still allocated? */
864 	if (dev->limits.fmr_reserved_mtts)
865 		mthca_buddy_cleanup(&dev->mr_table.tavor_fmr.mtt_buddy);
866 
867 	mthca_buddy_cleanup(&dev->mr_table.mtt_buddy);
868 
869 	if (dev->mr_table.tavor_fmr.mtt_base)
870 		iounmap(dev->mr_table.tavor_fmr.mtt_base);
871 	if (dev->mr_table.tavor_fmr.mpt_base)
872 		iounmap(dev->mr_table.tavor_fmr.mpt_base);
873 
874 	mthca_alloc_cleanup(&dev->mr_table.mpt_alloc);
875 }
876