xref: /linux/drivers/infiniband/hw/mthca/mthca_provider.c (revision 20d0021394c1b070bf04b22c5bc8fdb437edd4c5)
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2005 Cisco Systems. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id: mthca_provider.c 1397 2004-12-28 05:09:00Z roland $
35  */
36 
37 #include <ib_smi.h>
38 #include <linux/mm.h>
39 
40 #include "mthca_dev.h"
41 #include "mthca_cmd.h"
42 #include "mthca_user.h"
43 #include "mthca_memfree.h"
44 
45 static int mthca_query_device(struct ib_device *ibdev,
46 			      struct ib_device_attr *props)
47 {
48 	struct ib_smp *in_mad  = NULL;
49 	struct ib_smp *out_mad = NULL;
50 	int err = -ENOMEM;
51 	struct mthca_dev* mdev = to_mdev(ibdev);
52 
53 	u8 status;
54 
55 	in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
56 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
57 	if (!in_mad || !out_mad)
58 		goto out;
59 
60 	memset(props, 0, sizeof *props);
61 
62 	props->fw_ver              = mdev->fw_ver;
63 
64 	memset(in_mad, 0, sizeof *in_mad);
65 	in_mad->base_version       = 1;
66 	in_mad->mgmt_class     	   = IB_MGMT_CLASS_SUBN_LID_ROUTED;
67 	in_mad->class_version  	   = 1;
68 	in_mad->method         	   = IB_MGMT_METHOD_GET;
69 	in_mad->attr_id   	   = IB_SMP_ATTR_NODE_INFO;
70 
71 	err = mthca_MAD_IFC(mdev, 1, 1,
72 			    1, NULL, NULL, in_mad, out_mad,
73 			    &status);
74 	if (err)
75 		goto out;
76 	if (status) {
77 		err = -EINVAL;
78 		goto out;
79 	}
80 
81 	props->device_cap_flags    = mdev->device_cap_flags;
82 	props->vendor_id           = be32_to_cpup((u32 *) (out_mad->data + 36)) &
83 		0xffffff;
84 	props->vendor_part_id      = be16_to_cpup((u16 *) (out_mad->data + 30));
85 	props->hw_ver              = be16_to_cpup((u16 *) (out_mad->data + 32));
86 	memcpy(&props->sys_image_guid, out_mad->data +  4, 8);
87 	memcpy(&props->node_guid,      out_mad->data + 12, 8);
88 
89 	props->max_mr_size         = ~0ull;
90 	props->max_qp              = mdev->limits.num_qps - mdev->limits.reserved_qps;
91 	props->max_qp_wr           = 0xffff;
92 	props->max_sge             = mdev->limits.max_sg;
93 	props->max_cq              = mdev->limits.num_cqs - mdev->limits.reserved_cqs;
94 	props->max_cqe             = 0xffff;
95 	props->max_mr              = mdev->limits.num_mpts - mdev->limits.reserved_mrws;
96 	props->max_pd              = mdev->limits.num_pds - mdev->limits.reserved_pds;
97 	props->max_qp_rd_atom      = 1 << mdev->qp_table.rdb_shift;
98 	props->max_qp_init_rd_atom = 1 << mdev->qp_table.rdb_shift;
99 	props->local_ca_ack_delay  = mdev->limits.local_ca_ack_delay;
100 
101 	err = 0;
102  out:
103 	kfree(in_mad);
104 	kfree(out_mad);
105 	return err;
106 }
107 
108 static int mthca_query_port(struct ib_device *ibdev,
109 			    u8 port, struct ib_port_attr *props)
110 {
111 	struct ib_smp *in_mad  = NULL;
112 	struct ib_smp *out_mad = NULL;
113 	int err = -ENOMEM;
114 	u8 status;
115 
116 	in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
117 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
118 	if (!in_mad || !out_mad)
119 		goto out;
120 
121 	memset(in_mad, 0, sizeof *in_mad);
122 	in_mad->base_version       = 1;
123 	in_mad->mgmt_class     	   = IB_MGMT_CLASS_SUBN_LID_ROUTED;
124 	in_mad->class_version  	   = 1;
125 	in_mad->method         	   = IB_MGMT_METHOD_GET;
126 	in_mad->attr_id   	   = IB_SMP_ATTR_PORT_INFO;
127 	in_mad->attr_mod           = cpu_to_be32(port);
128 
129 	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
130 			    port, NULL, NULL, in_mad, out_mad,
131 			    &status);
132 	if (err)
133 		goto out;
134 	if (status) {
135 		err = -EINVAL;
136 		goto out;
137 	}
138 
139 	props->lid               = be16_to_cpup((u16 *) (out_mad->data + 16));
140 	props->lmc               = out_mad->data[34] & 0x7;
141 	props->sm_lid            = be16_to_cpup((u16 *) (out_mad->data + 18));
142 	props->sm_sl             = out_mad->data[36] & 0xf;
143 	props->state             = out_mad->data[32] & 0xf;
144 	props->phys_state        = out_mad->data[33] >> 4;
145 	props->port_cap_flags    = be32_to_cpup((u32 *) (out_mad->data + 20));
146 	props->gid_tbl_len       = to_mdev(ibdev)->limits.gid_table_len;
147 	props->pkey_tbl_len      = to_mdev(ibdev)->limits.pkey_table_len;
148 	props->qkey_viol_cntr    = be16_to_cpup((u16 *) (out_mad->data + 48));
149 	props->active_width      = out_mad->data[31] & 0xf;
150 	props->active_speed      = out_mad->data[35] >> 4;
151 
152  out:
153 	kfree(in_mad);
154 	kfree(out_mad);
155 	return err;
156 }
157 
158 static int mthca_modify_port(struct ib_device *ibdev,
159 			     u8 port, int port_modify_mask,
160 			     struct ib_port_modify *props)
161 {
162 	struct mthca_set_ib_param set_ib;
163 	struct ib_port_attr attr;
164 	int err;
165 	u8 status;
166 
167 	if (down_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
168 		return -ERESTARTSYS;
169 
170 	err = mthca_query_port(ibdev, port, &attr);
171 	if (err)
172 		goto out;
173 
174 	set_ib.set_si_guid     = 0;
175 	set_ib.reset_qkey_viol = !!(port_modify_mask & IB_PORT_RESET_QKEY_CNTR);
176 
177 	set_ib.cap_mask = (attr.port_cap_flags | props->set_port_cap_mask) &
178 		~props->clr_port_cap_mask;
179 
180 	err = mthca_SET_IB(to_mdev(ibdev), &set_ib, port, &status);
181 	if (err)
182 		goto out;
183 	if (status) {
184 		err = -EINVAL;
185 		goto out;
186 	}
187 
188 out:
189 	up(&to_mdev(ibdev)->cap_mask_mutex);
190 	return err;
191 }
192 
193 static int mthca_query_pkey(struct ib_device *ibdev,
194 			    u8 port, u16 index, u16 *pkey)
195 {
196 	struct ib_smp *in_mad  = NULL;
197 	struct ib_smp *out_mad = NULL;
198 	int err = -ENOMEM;
199 	u8 status;
200 
201 	in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
202 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
203 	if (!in_mad || !out_mad)
204 		goto out;
205 
206 	memset(in_mad, 0, sizeof *in_mad);
207 	in_mad->base_version       = 1;
208 	in_mad->mgmt_class     	   = IB_MGMT_CLASS_SUBN_LID_ROUTED;
209 	in_mad->class_version  	   = 1;
210 	in_mad->method         	   = IB_MGMT_METHOD_GET;
211 	in_mad->attr_id   	   = IB_SMP_ATTR_PKEY_TABLE;
212 	in_mad->attr_mod           = cpu_to_be32(index / 32);
213 
214 	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
215 			    port, NULL, NULL, in_mad, out_mad,
216 			    &status);
217 	if (err)
218 		goto out;
219 	if (status) {
220 		err = -EINVAL;
221 		goto out;
222 	}
223 
224 	*pkey = be16_to_cpu(((u16 *) out_mad->data)[index % 32]);
225 
226  out:
227 	kfree(in_mad);
228 	kfree(out_mad);
229 	return err;
230 }
231 
232 static int mthca_query_gid(struct ib_device *ibdev, u8 port,
233 			   int index, union ib_gid *gid)
234 {
235 	struct ib_smp *in_mad  = NULL;
236 	struct ib_smp *out_mad = NULL;
237 	int err = -ENOMEM;
238 	u8 status;
239 
240 	in_mad  = kmalloc(sizeof *in_mad, GFP_KERNEL);
241 	out_mad = kmalloc(sizeof *out_mad, GFP_KERNEL);
242 	if (!in_mad || !out_mad)
243 		goto out;
244 
245 	memset(in_mad, 0, sizeof *in_mad);
246 	in_mad->base_version       = 1;
247 	in_mad->mgmt_class     	   = IB_MGMT_CLASS_SUBN_LID_ROUTED;
248 	in_mad->class_version  	   = 1;
249 	in_mad->method         	   = IB_MGMT_METHOD_GET;
250 	in_mad->attr_id   	   = IB_SMP_ATTR_PORT_INFO;
251 	in_mad->attr_mod           = cpu_to_be32(port);
252 
253 	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
254 			    port, NULL, NULL, in_mad, out_mad,
255 			    &status);
256 	if (err)
257 		goto out;
258 	if (status) {
259 		err = -EINVAL;
260 		goto out;
261 	}
262 
263 	memcpy(gid->raw, out_mad->data + 8, 8);
264 
265 	memset(in_mad, 0, sizeof *in_mad);
266 	in_mad->base_version       = 1;
267 	in_mad->mgmt_class     	   = IB_MGMT_CLASS_SUBN_LID_ROUTED;
268 	in_mad->class_version  	   = 1;
269 	in_mad->method         	   = IB_MGMT_METHOD_GET;
270 	in_mad->attr_id   	   = IB_SMP_ATTR_GUID_INFO;
271 	in_mad->attr_mod           = cpu_to_be32(index / 8);
272 
273 	err = mthca_MAD_IFC(to_mdev(ibdev), 1, 1,
274 			    port, NULL, NULL, in_mad, out_mad,
275 			    &status);
276 	if (err)
277 		goto out;
278 	if (status) {
279 		err = -EINVAL;
280 		goto out;
281 	}
282 
283 	memcpy(gid->raw + 8, out_mad->data + (index % 8) * 16, 8);
284 
285  out:
286 	kfree(in_mad);
287 	kfree(out_mad);
288 	return err;
289 }
290 
291 static struct ib_ucontext *mthca_alloc_ucontext(struct ib_device *ibdev,
292 						struct ib_udata *udata)
293 {
294 	struct mthca_alloc_ucontext_resp uresp;
295 	struct mthca_ucontext           *context;
296 	int                              err;
297 
298 	memset(&uresp, 0, sizeof uresp);
299 
300 	uresp.qp_tab_size = to_mdev(ibdev)->limits.num_qps;
301 	if (mthca_is_memfree(to_mdev(ibdev)))
302 		uresp.uarc_size = to_mdev(ibdev)->uar_table.uarc_size;
303 	else
304 		uresp.uarc_size = 0;
305 
306 	context = kmalloc(sizeof *context, GFP_KERNEL);
307 	if (!context)
308 		return ERR_PTR(-ENOMEM);
309 
310 	err = mthca_uar_alloc(to_mdev(ibdev), &context->uar);
311 	if (err) {
312 		kfree(context);
313 		return ERR_PTR(err);
314 	}
315 
316 	context->db_tab = mthca_init_user_db_tab(to_mdev(ibdev));
317 	if (IS_ERR(context->db_tab)) {
318 		err = PTR_ERR(context->db_tab);
319 		mthca_uar_free(to_mdev(ibdev), &context->uar);
320 		kfree(context);
321 		return ERR_PTR(err);
322 	}
323 
324 	if (ib_copy_to_udata(udata, &uresp, sizeof uresp)) {
325 		mthca_cleanup_user_db_tab(to_mdev(ibdev), &context->uar, context->db_tab);
326 		mthca_uar_free(to_mdev(ibdev), &context->uar);
327 		kfree(context);
328 		return ERR_PTR(-EFAULT);
329 	}
330 
331 	return &context->ibucontext;
332 }
333 
334 static int mthca_dealloc_ucontext(struct ib_ucontext *context)
335 {
336 	mthca_cleanup_user_db_tab(to_mdev(context->device), &to_mucontext(context)->uar,
337 				  to_mucontext(context)->db_tab);
338 	mthca_uar_free(to_mdev(context->device), &to_mucontext(context)->uar);
339 	kfree(to_mucontext(context));
340 
341 	return 0;
342 }
343 
344 static int mthca_mmap_uar(struct ib_ucontext *context,
345 			  struct vm_area_struct *vma)
346 {
347 	if (vma->vm_end - vma->vm_start != PAGE_SIZE)
348 		return -EINVAL;
349 
350 	vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
351 
352 	if (remap_pfn_range(vma, vma->vm_start,
353 			    to_mucontext(context)->uar.pfn,
354 			    PAGE_SIZE, vma->vm_page_prot))
355 		return -EAGAIN;
356 
357 	return 0;
358 }
359 
360 static struct ib_pd *mthca_alloc_pd(struct ib_device *ibdev,
361 				    struct ib_ucontext *context,
362 				    struct ib_udata *udata)
363 {
364 	struct mthca_pd *pd;
365 	int err;
366 
367 	pd = kmalloc(sizeof *pd, GFP_KERNEL);
368 	if (!pd)
369 		return ERR_PTR(-ENOMEM);
370 
371 	err = mthca_pd_alloc(to_mdev(ibdev), !context, pd);
372 	if (err) {
373 		kfree(pd);
374 		return ERR_PTR(err);
375 	}
376 
377 	if (context) {
378 		if (ib_copy_to_udata(udata, &pd->pd_num, sizeof (__u32))) {
379 			mthca_pd_free(to_mdev(ibdev), pd);
380 			kfree(pd);
381 			return ERR_PTR(-EFAULT);
382 		}
383 	}
384 
385 	return &pd->ibpd;
386 }
387 
388 static int mthca_dealloc_pd(struct ib_pd *pd)
389 {
390 	mthca_pd_free(to_mdev(pd->device), to_mpd(pd));
391 	kfree(pd);
392 
393 	return 0;
394 }
395 
396 static struct ib_ah *mthca_ah_create(struct ib_pd *pd,
397 				     struct ib_ah_attr *ah_attr)
398 {
399 	int err;
400 	struct mthca_ah *ah;
401 
402 	ah = kmalloc(sizeof *ah, GFP_ATOMIC);
403 	if (!ah)
404 		return ERR_PTR(-ENOMEM);
405 
406 	err = mthca_create_ah(to_mdev(pd->device), to_mpd(pd), ah_attr, ah);
407 	if (err) {
408 		kfree(ah);
409 		return ERR_PTR(err);
410 	}
411 
412 	return &ah->ibah;
413 }
414 
415 static int mthca_ah_destroy(struct ib_ah *ah)
416 {
417 	mthca_destroy_ah(to_mdev(ah->device), to_mah(ah));
418 	kfree(ah);
419 
420 	return 0;
421 }
422 
423 static struct ib_qp *mthca_create_qp(struct ib_pd *pd,
424 				     struct ib_qp_init_attr *init_attr,
425 				     struct ib_udata *udata)
426 {
427 	struct mthca_create_qp ucmd;
428 	struct mthca_qp *qp;
429 	int err;
430 
431 	switch (init_attr->qp_type) {
432 	case IB_QPT_RC:
433 	case IB_QPT_UC:
434 	case IB_QPT_UD:
435 	{
436 		struct mthca_ucontext *context;
437 
438 		qp = kmalloc(sizeof *qp, GFP_KERNEL);
439 		if (!qp)
440 			return ERR_PTR(-ENOMEM);
441 
442 		if (pd->uobject) {
443 			context = to_mucontext(pd->uobject->context);
444 
445 			if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
446 				return ERR_PTR(-EFAULT);
447 
448 			err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
449 						context->db_tab,
450 						ucmd.sq_db_index, ucmd.sq_db_page);
451 			if (err) {
452 				kfree(qp);
453 				return ERR_PTR(err);
454 			}
455 
456 			err = mthca_map_user_db(to_mdev(pd->device), &context->uar,
457 						context->db_tab,
458 						ucmd.rq_db_index, ucmd.rq_db_page);
459 			if (err) {
460 				mthca_unmap_user_db(to_mdev(pd->device),
461 						    &context->uar,
462 						    context->db_tab,
463 						    ucmd.sq_db_index);
464 				kfree(qp);
465 				return ERR_PTR(err);
466 			}
467 
468 			qp->mr.ibmr.lkey = ucmd.lkey;
469 			qp->sq.db_index  = ucmd.sq_db_index;
470 			qp->rq.db_index  = ucmd.rq_db_index;
471 		}
472 
473 		err = mthca_alloc_qp(to_mdev(pd->device), to_mpd(pd),
474 				     to_mcq(init_attr->send_cq),
475 				     to_mcq(init_attr->recv_cq),
476 				     init_attr->qp_type, init_attr->sq_sig_type,
477 				     &init_attr->cap, qp);
478 
479 		if (err && pd->uobject) {
480 			context = to_mucontext(pd->uobject->context);
481 
482 			mthca_unmap_user_db(to_mdev(pd->device),
483 					    &context->uar,
484 					    context->db_tab,
485 					    ucmd.sq_db_index);
486 			mthca_unmap_user_db(to_mdev(pd->device),
487 					    &context->uar,
488 					    context->db_tab,
489 					    ucmd.rq_db_index);
490 		}
491 
492 		qp->ibqp.qp_num = qp->qpn;
493 		break;
494 	}
495 	case IB_QPT_SMI:
496 	case IB_QPT_GSI:
497 	{
498 		/* Don't allow userspace to create special QPs */
499 		if (pd->uobject)
500 			return ERR_PTR(-EINVAL);
501 
502 		qp = kmalloc(sizeof (struct mthca_sqp), GFP_KERNEL);
503 		if (!qp)
504 			return ERR_PTR(-ENOMEM);
505 
506 		qp->ibqp.qp_num = init_attr->qp_type == IB_QPT_SMI ? 0 : 1;
507 
508 		err = mthca_alloc_sqp(to_mdev(pd->device), to_mpd(pd),
509 				      to_mcq(init_attr->send_cq),
510 				      to_mcq(init_attr->recv_cq),
511 				      init_attr->sq_sig_type, &init_attr->cap,
512 				      qp->ibqp.qp_num, init_attr->port_num,
513 				      to_msqp(qp));
514 		break;
515 	}
516 	default:
517 		/* Don't support raw QPs */
518 		return ERR_PTR(-ENOSYS);
519 	}
520 
521 	if (err) {
522 		kfree(qp);
523 		return ERR_PTR(err);
524 	}
525 
526 	init_attr->cap.max_inline_data = 0;
527 	init_attr->cap.max_send_wr     = qp->sq.max;
528 	init_attr->cap.max_recv_wr     = qp->rq.max;
529 	init_attr->cap.max_send_sge    = qp->sq.max_gs;
530 	init_attr->cap.max_recv_sge    = qp->rq.max_gs;
531 
532 	return &qp->ibqp;
533 }
534 
535 static int mthca_destroy_qp(struct ib_qp *qp)
536 {
537 	if (qp->uobject) {
538 		mthca_unmap_user_db(to_mdev(qp->device),
539 				    &to_mucontext(qp->uobject->context)->uar,
540 				    to_mucontext(qp->uobject->context)->db_tab,
541 				    to_mqp(qp)->sq.db_index);
542 		mthca_unmap_user_db(to_mdev(qp->device),
543 				    &to_mucontext(qp->uobject->context)->uar,
544 				    to_mucontext(qp->uobject->context)->db_tab,
545 				    to_mqp(qp)->rq.db_index);
546 	}
547 	mthca_free_qp(to_mdev(qp->device), to_mqp(qp));
548 	kfree(qp);
549 	return 0;
550 }
551 
552 static struct ib_cq *mthca_create_cq(struct ib_device *ibdev, int entries,
553 				     struct ib_ucontext *context,
554 				     struct ib_udata *udata)
555 {
556 	struct mthca_create_cq ucmd;
557 	struct mthca_cq *cq;
558 	int nent;
559 	int err;
560 
561 	if (context) {
562 		if (ib_copy_from_udata(&ucmd, udata, sizeof ucmd))
563 			return ERR_PTR(-EFAULT);
564 
565 		err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
566 					to_mucontext(context)->db_tab,
567 					ucmd.set_db_index, ucmd.set_db_page);
568 		if (err)
569 			return ERR_PTR(err);
570 
571 		err = mthca_map_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
572 					to_mucontext(context)->db_tab,
573 					ucmd.arm_db_index, ucmd.arm_db_page);
574 		if (err)
575 			goto err_unmap_set;
576 	}
577 
578 	cq = kmalloc(sizeof *cq, GFP_KERNEL);
579 	if (!cq) {
580 		err = -ENOMEM;
581 		goto err_unmap_arm;
582 	}
583 
584 	if (context) {
585 		cq->mr.ibmr.lkey    = ucmd.lkey;
586 		cq->set_ci_db_index = ucmd.set_db_index;
587 		cq->arm_db_index    = ucmd.arm_db_index;
588 	}
589 
590 	for (nent = 1; nent <= entries; nent <<= 1)
591 		; /* nothing */
592 
593 	err = mthca_init_cq(to_mdev(ibdev), nent,
594 			    context ? to_mucontext(context) : NULL,
595 			    context ? ucmd.pdn : to_mdev(ibdev)->driver_pd.pd_num,
596 			    cq);
597 	if (err)
598 		goto err_free;
599 
600 	if (context && ib_copy_to_udata(udata, &cq->cqn, sizeof (__u32))) {
601 		mthca_free_cq(to_mdev(ibdev), cq);
602 		goto err_free;
603 	}
604 
605 	return &cq->ibcq;
606 
607 err_free:
608 	kfree(cq);
609 
610 err_unmap_arm:
611 	if (context)
612 		mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
613 				    to_mucontext(context)->db_tab, ucmd.arm_db_index);
614 
615 err_unmap_set:
616 	if (context)
617 		mthca_unmap_user_db(to_mdev(ibdev), &to_mucontext(context)->uar,
618 				    to_mucontext(context)->db_tab, ucmd.set_db_index);
619 
620 	return ERR_PTR(err);
621 }
622 
623 static int mthca_destroy_cq(struct ib_cq *cq)
624 {
625 	if (cq->uobject) {
626 		mthca_unmap_user_db(to_mdev(cq->device),
627 				    &to_mucontext(cq->uobject->context)->uar,
628 				    to_mucontext(cq->uobject->context)->db_tab,
629 				    to_mcq(cq)->arm_db_index);
630 		mthca_unmap_user_db(to_mdev(cq->device),
631 				    &to_mucontext(cq->uobject->context)->uar,
632 				    to_mucontext(cq->uobject->context)->db_tab,
633 				    to_mcq(cq)->set_ci_db_index);
634 	}
635 	mthca_free_cq(to_mdev(cq->device), to_mcq(cq));
636 	kfree(cq);
637 
638 	return 0;
639 }
640 
641 static inline u32 convert_access(int acc)
642 {
643 	return (acc & IB_ACCESS_REMOTE_ATOMIC ? MTHCA_MPT_FLAG_ATOMIC       : 0) |
644 	       (acc & IB_ACCESS_REMOTE_WRITE  ? MTHCA_MPT_FLAG_REMOTE_WRITE : 0) |
645 	       (acc & IB_ACCESS_REMOTE_READ   ? MTHCA_MPT_FLAG_REMOTE_READ  : 0) |
646 	       (acc & IB_ACCESS_LOCAL_WRITE   ? MTHCA_MPT_FLAG_LOCAL_WRITE  : 0) |
647 	       MTHCA_MPT_FLAG_LOCAL_READ;
648 }
649 
650 static struct ib_mr *mthca_get_dma_mr(struct ib_pd *pd, int acc)
651 {
652 	struct mthca_mr *mr;
653 	int err;
654 
655 	mr = kmalloc(sizeof *mr, GFP_KERNEL);
656 	if (!mr)
657 		return ERR_PTR(-ENOMEM);
658 
659 	err = mthca_mr_alloc_notrans(to_mdev(pd->device),
660 				     to_mpd(pd)->pd_num,
661 				     convert_access(acc), mr);
662 
663 	if (err) {
664 		kfree(mr);
665 		return ERR_PTR(err);
666 	}
667 
668 	return &mr->ibmr;
669 }
670 
671 static struct ib_mr *mthca_reg_phys_mr(struct ib_pd       *pd,
672 				       struct ib_phys_buf *buffer_list,
673 				       int                 num_phys_buf,
674 				       int                 acc,
675 				       u64                *iova_start)
676 {
677 	struct mthca_mr *mr;
678 	u64 *page_list;
679 	u64 total_size;
680 	u64 mask;
681 	int shift;
682 	int npages;
683 	int err;
684 	int i, j, n;
685 
686 	/* First check that we have enough alignment */
687 	if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK))
688 		return ERR_PTR(-EINVAL);
689 
690 	if (num_phys_buf > 1 &&
691 	    ((buffer_list[0].addr + buffer_list[0].size) & ~PAGE_MASK))
692 		return ERR_PTR(-EINVAL);
693 
694 	mask = 0;
695 	total_size = 0;
696 	for (i = 0; i < num_phys_buf; ++i) {
697 		if (i != 0 && buffer_list[i].addr & ~PAGE_MASK)
698 			return ERR_PTR(-EINVAL);
699 		if (i != 0 && i != num_phys_buf - 1 &&
700 		    (buffer_list[i].size & ~PAGE_MASK))
701 			return ERR_PTR(-EINVAL);
702 
703 		total_size += buffer_list[i].size;
704 		if (i > 0)
705 			mask |= buffer_list[i].addr;
706 	}
707 
708 	/* Find largest page shift we can use to cover buffers */
709 	for (shift = PAGE_SHIFT; shift < 31; ++shift)
710 		if (num_phys_buf > 1) {
711 			if ((1ULL << shift) & mask)
712 				break;
713 		} else {
714 			if (1ULL << shift >=
715 			    buffer_list[0].size +
716 			    (buffer_list[0].addr & ((1ULL << shift) - 1)))
717 				break;
718 		}
719 
720 	buffer_list[0].size += buffer_list[0].addr & ((1ULL << shift) - 1);
721 	buffer_list[0].addr &= ~0ull << shift;
722 
723 	mr = kmalloc(sizeof *mr, GFP_KERNEL);
724 	if (!mr)
725 		return ERR_PTR(-ENOMEM);
726 
727 	npages = 0;
728 	for (i = 0; i < num_phys_buf; ++i)
729 		npages += (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
730 
731 	if (!npages)
732 		return &mr->ibmr;
733 
734 	page_list = kmalloc(npages * sizeof *page_list, GFP_KERNEL);
735 	if (!page_list) {
736 		kfree(mr);
737 		return ERR_PTR(-ENOMEM);
738 	}
739 
740 	n = 0;
741 	for (i = 0; i < num_phys_buf; ++i)
742 		for (j = 0;
743 		     j < (buffer_list[i].size + (1ULL << shift) - 1) >> shift;
744 		     ++j)
745 			page_list[n++] = buffer_list[i].addr + ((u64) j << shift);
746 
747 	mthca_dbg(to_mdev(pd->device), "Registering memory at %llx (iova %llx) "
748 		  "in PD %x; shift %d, npages %d.\n",
749 		  (unsigned long long) buffer_list[0].addr,
750 		  (unsigned long long) *iova_start,
751 		  to_mpd(pd)->pd_num,
752 		  shift, npages);
753 
754 	err = mthca_mr_alloc_phys(to_mdev(pd->device),
755 				  to_mpd(pd)->pd_num,
756 				  page_list, shift, npages,
757 				  *iova_start, total_size,
758 				  convert_access(acc), mr);
759 
760 	if (err) {
761 		kfree(page_list);
762 		kfree(mr);
763 		return ERR_PTR(err);
764 	}
765 
766 	kfree(page_list);
767 	return &mr->ibmr;
768 }
769 
770 static struct ib_mr *mthca_reg_user_mr(struct ib_pd *pd, struct ib_umem *region,
771 				       int acc, struct ib_udata *udata)
772 {
773 	struct mthca_dev *dev = to_mdev(pd->device);
774 	struct ib_umem_chunk *chunk;
775 	struct mthca_mr *mr;
776 	u64 *pages;
777 	int shift, n, len;
778 	int i, j, k;
779 	int err = 0;
780 
781 	shift = ffs(region->page_size) - 1;
782 
783 	mr = kmalloc(sizeof *mr, GFP_KERNEL);
784 	if (!mr)
785 		return ERR_PTR(-ENOMEM);
786 
787 	n = 0;
788 	list_for_each_entry(chunk, &region->chunk_list, list)
789 		n += chunk->nents;
790 
791 	mr->mtt = mthca_alloc_mtt(dev, n);
792 	if (IS_ERR(mr->mtt)) {
793 		err = PTR_ERR(mr->mtt);
794 		goto err;
795 	}
796 
797 	pages = (u64 *) __get_free_page(GFP_KERNEL);
798 	if (!pages) {
799 		err = -ENOMEM;
800 		goto err_mtt;
801 	}
802 
803 	i = n = 0;
804 
805 	list_for_each_entry(chunk, &region->chunk_list, list)
806 		for (j = 0; j < chunk->nmap; ++j) {
807 			len = sg_dma_len(&chunk->page_list[j]) >> shift;
808 			for (k = 0; k < len; ++k) {
809 				pages[i++] = sg_dma_address(&chunk->page_list[j]) +
810 					region->page_size * k;
811 				/*
812 				 * Be friendly to WRITE_MTT command
813 				 * and leave two empty slots for the
814 				 * index and reserved fields of the
815 				 * mailbox.
816 				 */
817 				if (i == PAGE_SIZE / sizeof (u64) - 2) {
818 					err = mthca_write_mtt(dev, mr->mtt,
819 							      n, pages, i);
820 					if (err)
821 						goto mtt_done;
822 					n += i;
823 					i = 0;
824 				}
825 			}
826 		}
827 
828 	if (i)
829 		err = mthca_write_mtt(dev, mr->mtt, n, pages, i);
830 mtt_done:
831 	free_page((unsigned long) pages);
832 	if (err)
833 		goto err_mtt;
834 
835 	err = mthca_mr_alloc(dev, to_mpd(pd)->pd_num, shift, region->virt_base,
836 			     region->length, convert_access(acc), mr);
837 
838 	if (err)
839 		goto err_mtt;
840 
841 	return &mr->ibmr;
842 
843 err_mtt:
844 	mthca_free_mtt(dev, mr->mtt);
845 
846 err:
847 	kfree(mr);
848 	return ERR_PTR(err);
849 }
850 
851 static int mthca_dereg_mr(struct ib_mr *mr)
852 {
853 	struct mthca_mr *mmr = to_mmr(mr);
854 	mthca_free_mr(to_mdev(mr->device), mmr);
855 	kfree(mmr);
856 	return 0;
857 }
858 
859 static struct ib_fmr *mthca_alloc_fmr(struct ib_pd *pd, int mr_access_flags,
860 				      struct ib_fmr_attr *fmr_attr)
861 {
862 	struct mthca_fmr *fmr;
863 	int err;
864 
865 	fmr = kmalloc(sizeof *fmr, GFP_KERNEL);
866 	if (!fmr)
867 		return ERR_PTR(-ENOMEM);
868 
869 	memcpy(&fmr->attr, fmr_attr, sizeof *fmr_attr);
870 	err = mthca_fmr_alloc(to_mdev(pd->device), to_mpd(pd)->pd_num,
871 			     convert_access(mr_access_flags), fmr);
872 
873 	if (err) {
874 		kfree(fmr);
875 		return ERR_PTR(err);
876 	}
877 
878 	return &fmr->ibmr;
879 }
880 
881 static int mthca_dealloc_fmr(struct ib_fmr *fmr)
882 {
883 	struct mthca_fmr *mfmr = to_mfmr(fmr);
884 	int err;
885 
886 	err = mthca_free_fmr(to_mdev(fmr->device), mfmr);
887 	if (err)
888 		return err;
889 
890 	kfree(mfmr);
891 	return 0;
892 }
893 
894 static int mthca_unmap_fmr(struct list_head *fmr_list)
895 {
896 	struct ib_fmr *fmr;
897 	int err;
898 	u8 status;
899 	struct mthca_dev *mdev = NULL;
900 
901 	list_for_each_entry(fmr, fmr_list, list) {
902 		if (mdev && to_mdev(fmr->device) != mdev)
903 			return -EINVAL;
904 		mdev = to_mdev(fmr->device);
905 	}
906 
907 	if (!mdev)
908 		return 0;
909 
910 	if (mthca_is_memfree(mdev)) {
911 		list_for_each_entry(fmr, fmr_list, list)
912 			mthca_arbel_fmr_unmap(mdev, to_mfmr(fmr));
913 
914 		wmb();
915 	} else
916 		list_for_each_entry(fmr, fmr_list, list)
917 			mthca_tavor_fmr_unmap(mdev, to_mfmr(fmr));
918 
919 	err = mthca_SYNC_TPT(mdev, &status);
920 	if (err)
921 		return err;
922 	if (status)
923 		return -EINVAL;
924 	return 0;
925 }
926 
927 static ssize_t show_rev(struct class_device *cdev, char *buf)
928 {
929 	struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
930 	return sprintf(buf, "%x\n", dev->rev_id);
931 }
932 
933 static ssize_t show_fw_ver(struct class_device *cdev, char *buf)
934 {
935 	struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
936 	return sprintf(buf, "%x.%x.%x\n", (int) (dev->fw_ver >> 32),
937 		       (int) (dev->fw_ver >> 16) & 0xffff,
938 		       (int) dev->fw_ver & 0xffff);
939 }
940 
941 static ssize_t show_hca(struct class_device *cdev, char *buf)
942 {
943 	struct mthca_dev *dev = container_of(cdev, struct mthca_dev, ib_dev.class_dev);
944 	switch (dev->pdev->device) {
945 	case PCI_DEVICE_ID_MELLANOX_TAVOR:
946 		return sprintf(buf, "MT23108\n");
947 	case PCI_DEVICE_ID_MELLANOX_ARBEL_COMPAT:
948 		return sprintf(buf, "MT25208 (MT23108 compat mode)\n");
949 	case PCI_DEVICE_ID_MELLANOX_ARBEL:
950 		return sprintf(buf, "MT25208\n");
951 	case PCI_DEVICE_ID_MELLANOX_SINAI:
952 	case PCI_DEVICE_ID_MELLANOX_SINAI_OLD:
953 		return sprintf(buf, "MT25204\n");
954 	default:
955 		return sprintf(buf, "unknown\n");
956 	}
957 }
958 
959 static CLASS_DEVICE_ATTR(hw_rev,   S_IRUGO, show_rev,    NULL);
960 static CLASS_DEVICE_ATTR(fw_ver,   S_IRUGO, show_fw_ver, NULL);
961 static CLASS_DEVICE_ATTR(hca_type, S_IRUGO, show_hca,    NULL);
962 
963 static struct class_device_attribute *mthca_class_attributes[] = {
964 	&class_device_attr_hw_rev,
965 	&class_device_attr_fw_ver,
966 	&class_device_attr_hca_type
967 };
968 
969 int mthca_register_device(struct mthca_dev *dev)
970 {
971 	int ret;
972 	int i;
973 
974 	strlcpy(dev->ib_dev.name, "mthca%d", IB_DEVICE_NAME_MAX);
975 	dev->ib_dev.owner                = THIS_MODULE;
976 
977 	dev->ib_dev.node_type            = IB_NODE_CA;
978 	dev->ib_dev.phys_port_cnt        = dev->limits.num_ports;
979 	dev->ib_dev.dma_device           = &dev->pdev->dev;
980 	dev->ib_dev.class_dev.dev        = &dev->pdev->dev;
981 	dev->ib_dev.query_device         = mthca_query_device;
982 	dev->ib_dev.query_port           = mthca_query_port;
983 	dev->ib_dev.modify_port          = mthca_modify_port;
984 	dev->ib_dev.query_pkey           = mthca_query_pkey;
985 	dev->ib_dev.query_gid            = mthca_query_gid;
986 	dev->ib_dev.alloc_ucontext       = mthca_alloc_ucontext;
987 	dev->ib_dev.dealloc_ucontext     = mthca_dealloc_ucontext;
988 	dev->ib_dev.mmap                 = mthca_mmap_uar;
989 	dev->ib_dev.alloc_pd             = mthca_alloc_pd;
990 	dev->ib_dev.dealloc_pd           = mthca_dealloc_pd;
991 	dev->ib_dev.create_ah            = mthca_ah_create;
992 	dev->ib_dev.destroy_ah           = mthca_ah_destroy;
993 	dev->ib_dev.create_qp            = mthca_create_qp;
994 	dev->ib_dev.modify_qp            = mthca_modify_qp;
995 	dev->ib_dev.destroy_qp           = mthca_destroy_qp;
996 	dev->ib_dev.create_cq            = mthca_create_cq;
997 	dev->ib_dev.destroy_cq           = mthca_destroy_cq;
998 	dev->ib_dev.poll_cq              = mthca_poll_cq;
999 	dev->ib_dev.get_dma_mr           = mthca_get_dma_mr;
1000 	dev->ib_dev.reg_phys_mr          = mthca_reg_phys_mr;
1001 	dev->ib_dev.reg_user_mr          = mthca_reg_user_mr;
1002 	dev->ib_dev.dereg_mr             = mthca_dereg_mr;
1003 
1004 	if (dev->mthca_flags & MTHCA_FLAG_FMR) {
1005 		dev->ib_dev.alloc_fmr            = mthca_alloc_fmr;
1006 		dev->ib_dev.unmap_fmr            = mthca_unmap_fmr;
1007 		dev->ib_dev.dealloc_fmr          = mthca_dealloc_fmr;
1008 		if (mthca_is_memfree(dev))
1009 			dev->ib_dev.map_phys_fmr = mthca_arbel_map_phys_fmr;
1010 		else
1011 			dev->ib_dev.map_phys_fmr = mthca_tavor_map_phys_fmr;
1012 	}
1013 
1014 	dev->ib_dev.attach_mcast         = mthca_multicast_attach;
1015 	dev->ib_dev.detach_mcast         = mthca_multicast_detach;
1016 	dev->ib_dev.process_mad          = mthca_process_mad;
1017 
1018 	if (mthca_is_memfree(dev)) {
1019 		dev->ib_dev.req_notify_cq = mthca_arbel_arm_cq;
1020 		dev->ib_dev.post_send     = mthca_arbel_post_send;
1021 		dev->ib_dev.post_recv     = mthca_arbel_post_receive;
1022 	} else {
1023 		dev->ib_dev.req_notify_cq = mthca_tavor_arm_cq;
1024 		dev->ib_dev.post_send     = mthca_tavor_post_send;
1025 		dev->ib_dev.post_recv     = mthca_tavor_post_receive;
1026 	}
1027 
1028 	init_MUTEX(&dev->cap_mask_mutex);
1029 
1030 	ret = ib_register_device(&dev->ib_dev);
1031 	if (ret)
1032 		return ret;
1033 
1034 	for (i = 0; i < ARRAY_SIZE(mthca_class_attributes); ++i) {
1035 		ret = class_device_create_file(&dev->ib_dev.class_dev,
1036 					       mthca_class_attributes[i]);
1037 		if (ret) {
1038 			ib_unregister_device(&dev->ib_dev);
1039 			return ret;
1040 		}
1041 	}
1042 
1043 	return 0;
1044 }
1045 
1046 void mthca_unregister_device(struct mthca_dev *dev)
1047 {
1048 	ib_unregister_device(&dev->ib_dev);
1049 }
1050