xref: /freebsd/sys/ofed/drivers/infiniband/core/ib_verbs.c (revision 685dc743dc3b5645e34836464128e1c0558b404b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2004 Mellanox Technologies Ltd.  All rights reserved.
5  * Copyright (c) 2004 Infinicon Corporation.  All rights reserved.
6  * Copyright (c) 2004 Intel Corporation.  All rights reserved.
7  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
8  * Copyright (c) 2004 Voltaire Corporation.  All rights reserved.
9  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
10  * Copyright (c) 2005, 2006 Cisco Systems.  All rights reserved.
11  *
12  * This software is available to you under a choice of one of two
13  * licenses.  You may choose to be licensed under the terms of the GNU
14  * General Public License (GPL) Version 2, available from the file
15  * COPYING in the main directory of this source tree, or the
16  * OpenIB.org BSD license below:
17  *
18  *     Redistribution and use in source and binary forms, with or
19  *     without modification, are permitted provided that the following
20  *     conditions are met:
21  *
22  *      - Redistributions of source code must retain the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer.
25  *
26  *      - Redistributions in binary form must reproduce the above
27  *        copyright notice, this list of conditions and the following
28  *        disclaimer in the documentation and/or other materials
29  *        provided with the distribution.
30  *
31  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
32  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
33  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
34  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
35  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
36  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
37  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38  * SOFTWARE.
39  */
40 
41 #include <sys/cdefs.h>
42 #include <linux/errno.h>
43 #include <linux/err.h>
44 #include <linux/string.h>
45 #include <linux/slab.h>
46 #include <linux/in.h>
47 #include <linux/in6.h>
48 #include <linux/wait.h>
49 
50 #include <rdma/ib_verbs.h>
51 #include <rdma/ib_cache.h>
52 #include <rdma/ib_addr.h>
53 
54 #include <netinet/ip.h>
55 #include <netinet/ip6.h>
56 
57 #include <machine/in_cksum.h>
58 
59 #include "core_priv.h"
60 
61 static const char * const ib_events[] = {
62 	[IB_EVENT_CQ_ERR]		= "CQ error",
63 	[IB_EVENT_QP_FATAL]		= "QP fatal error",
64 	[IB_EVENT_QP_REQ_ERR]		= "QP request error",
65 	[IB_EVENT_QP_ACCESS_ERR]	= "QP access error",
66 	[IB_EVENT_COMM_EST]		= "communication established",
67 	[IB_EVENT_SQ_DRAINED]		= "send queue drained",
68 	[IB_EVENT_PATH_MIG]		= "path migration successful",
69 	[IB_EVENT_PATH_MIG_ERR]		= "path migration error",
70 	[IB_EVENT_DEVICE_FATAL]		= "device fatal error",
71 	[IB_EVENT_PORT_ACTIVE]		= "port active",
72 	[IB_EVENT_PORT_ERR]		= "port error",
73 	[IB_EVENT_LID_CHANGE]		= "LID change",
74 	[IB_EVENT_PKEY_CHANGE]		= "P_key change",
75 	[IB_EVENT_SM_CHANGE]		= "SM change",
76 	[IB_EVENT_SRQ_ERR]		= "SRQ error",
77 	[IB_EVENT_SRQ_LIMIT_REACHED]	= "SRQ limit reached",
78 	[IB_EVENT_QP_LAST_WQE_REACHED]	= "last WQE reached",
79 	[IB_EVENT_CLIENT_REREGISTER]	= "client reregister",
80 	[IB_EVENT_GID_CHANGE]		= "GID changed",
81 };
82 
ib_event_msg(enum ib_event_type event)83 const char *__attribute_const__ ib_event_msg(enum ib_event_type event)
84 {
85 	size_t index = event;
86 
87 	return (index < ARRAY_SIZE(ib_events) && ib_events[index]) ?
88 			ib_events[index] : "unrecognized event";
89 }
90 EXPORT_SYMBOL(ib_event_msg);
91 
92 static const char * const wc_statuses[] = {
93 	[IB_WC_SUCCESS]			= "success",
94 	[IB_WC_LOC_LEN_ERR]		= "local length error",
95 	[IB_WC_LOC_QP_OP_ERR]		= "local QP operation error",
96 	[IB_WC_LOC_EEC_OP_ERR]		= "local EE context operation error",
97 	[IB_WC_LOC_PROT_ERR]		= "local protection error",
98 	[IB_WC_WR_FLUSH_ERR]		= "WR flushed",
99 	[IB_WC_MW_BIND_ERR]		= "memory management operation error",
100 	[IB_WC_BAD_RESP_ERR]		= "bad response error",
101 	[IB_WC_LOC_ACCESS_ERR]		= "local access error",
102 	[IB_WC_REM_INV_REQ_ERR]		= "invalid request error",
103 	[IB_WC_REM_ACCESS_ERR]		= "remote access error",
104 	[IB_WC_REM_OP_ERR]		= "remote operation error",
105 	[IB_WC_RETRY_EXC_ERR]		= "transport retry counter exceeded",
106 	[IB_WC_RNR_RETRY_EXC_ERR]	= "RNR retry counter exceeded",
107 	[IB_WC_LOC_RDD_VIOL_ERR]	= "local RDD violation error",
108 	[IB_WC_REM_INV_RD_REQ_ERR]	= "remote invalid RD request",
109 	[IB_WC_REM_ABORT_ERR]		= "operation aborted",
110 	[IB_WC_INV_EECN_ERR]		= "invalid EE context number",
111 	[IB_WC_INV_EEC_STATE_ERR]	= "invalid EE context state",
112 	[IB_WC_FATAL_ERR]		= "fatal error",
113 	[IB_WC_RESP_TIMEOUT_ERR]	= "response timeout error",
114 	[IB_WC_GENERAL_ERR]		= "general error",
115 };
116 
ib_wc_status_msg(enum ib_wc_status status)117 const char *__attribute_const__ ib_wc_status_msg(enum ib_wc_status status)
118 {
119 	size_t index = status;
120 
121 	return (index < ARRAY_SIZE(wc_statuses) && wc_statuses[index]) ?
122 			wc_statuses[index] : "unrecognized status";
123 }
124 EXPORT_SYMBOL(ib_wc_status_msg);
125 
ib_rate_to_mult(enum ib_rate rate)126 __attribute_const__ int ib_rate_to_mult(enum ib_rate rate)
127 {
128 	switch (rate) {
129 	case IB_RATE_2_5_GBPS: return   1;
130 	case IB_RATE_5_GBPS:   return   2;
131 	case IB_RATE_10_GBPS:  return   4;
132 	case IB_RATE_20_GBPS:  return   8;
133 	case IB_RATE_30_GBPS:  return  12;
134 	case IB_RATE_40_GBPS:  return  16;
135 	case IB_RATE_60_GBPS:  return  24;
136 	case IB_RATE_80_GBPS:  return  32;
137 	case IB_RATE_120_GBPS: return  48;
138 	case IB_RATE_14_GBPS:  return   6;
139 	case IB_RATE_56_GBPS:  return  22;
140 	case IB_RATE_112_GBPS: return  45;
141 	case IB_RATE_168_GBPS: return  67;
142 	case IB_RATE_25_GBPS:  return  10;
143 	case IB_RATE_100_GBPS: return  40;
144 	case IB_RATE_200_GBPS: return  80;
145 	case IB_RATE_300_GBPS: return 120;
146 	case IB_RATE_28_GBPS:  return  11;
147 	case IB_RATE_50_GBPS:  return  20;
148 	case IB_RATE_400_GBPS: return 160;
149 	case IB_RATE_600_GBPS: return 240;
150 	default:	       return  -1;
151 	}
152 }
153 EXPORT_SYMBOL(ib_rate_to_mult);
154 
mult_to_ib_rate(int mult)155 __attribute_const__ enum ib_rate mult_to_ib_rate(int mult)
156 {
157 	switch (mult) {
158 	case 1:   return IB_RATE_2_5_GBPS;
159 	case 2:   return IB_RATE_5_GBPS;
160 	case 4:   return IB_RATE_10_GBPS;
161 	case 8:   return IB_RATE_20_GBPS;
162 	case 12:  return IB_RATE_30_GBPS;
163 	case 16:  return IB_RATE_40_GBPS;
164 	case 24:  return IB_RATE_60_GBPS;
165 	case 32:  return IB_RATE_80_GBPS;
166 	case 48:  return IB_RATE_120_GBPS;
167 	case 6:   return IB_RATE_14_GBPS;
168 	case 22:  return IB_RATE_56_GBPS;
169 	case 45:  return IB_RATE_112_GBPS;
170 	case 67:  return IB_RATE_168_GBPS;
171 	case 10:  return IB_RATE_25_GBPS;
172 	case 40:  return IB_RATE_100_GBPS;
173 	case 80:  return IB_RATE_200_GBPS;
174 	case 120: return IB_RATE_300_GBPS;
175 	case 11:  return IB_RATE_28_GBPS;
176 	case 20:  return IB_RATE_50_GBPS;
177 	case 160: return IB_RATE_400_GBPS;
178 	case 240: return IB_RATE_600_GBPS;
179 	default:  return IB_RATE_PORT_CURRENT;
180 	}
181 }
182 EXPORT_SYMBOL(mult_to_ib_rate);
183 
ib_rate_to_mbps(enum ib_rate rate)184 __attribute_const__ int ib_rate_to_mbps(enum ib_rate rate)
185 {
186 	switch (rate) {
187 	case IB_RATE_2_5_GBPS: return 2500;
188 	case IB_RATE_5_GBPS:   return 5000;
189 	case IB_RATE_10_GBPS:  return 10000;
190 	case IB_RATE_20_GBPS:  return 20000;
191 	case IB_RATE_30_GBPS:  return 30000;
192 	case IB_RATE_40_GBPS:  return 40000;
193 	case IB_RATE_60_GBPS:  return 60000;
194 	case IB_RATE_80_GBPS:  return 80000;
195 	case IB_RATE_120_GBPS: return 120000;
196 	case IB_RATE_14_GBPS:  return 14062;
197 	case IB_RATE_56_GBPS:  return 56250;
198 	case IB_RATE_112_GBPS: return 112500;
199 	case IB_RATE_168_GBPS: return 168750;
200 	case IB_RATE_25_GBPS:  return 25781;
201 	case IB_RATE_100_GBPS: return 103125;
202 	case IB_RATE_200_GBPS: return 206250;
203 	case IB_RATE_300_GBPS: return 309375;
204 	case IB_RATE_28_GBPS:  return 28125;
205 	case IB_RATE_50_GBPS:  return 53125;
206 	case IB_RATE_400_GBPS: return 425000;
207 	case IB_RATE_600_GBPS: return 637500;
208 	default:	       return -1;
209 	}
210 }
211 EXPORT_SYMBOL(ib_rate_to_mbps);
212 
213 __attribute_const__ enum rdma_transport_type
rdma_node_get_transport(enum rdma_node_type node_type)214 rdma_node_get_transport(enum rdma_node_type node_type)
215 {
216 	switch (node_type) {
217 	case RDMA_NODE_IB_CA:
218 	case RDMA_NODE_IB_SWITCH:
219 	case RDMA_NODE_IB_ROUTER:
220 		return RDMA_TRANSPORT_IB;
221 	case RDMA_NODE_RNIC:
222 		return RDMA_TRANSPORT_IWARP;
223 	case RDMA_NODE_USNIC:
224 		return RDMA_TRANSPORT_USNIC;
225 	case RDMA_NODE_USNIC_UDP:
226 		return RDMA_TRANSPORT_USNIC_UDP;
227 	default:
228 		BUG();
229 		return 0;
230 	}
231 }
232 EXPORT_SYMBOL(rdma_node_get_transport);
233 
rdma_port_get_link_layer(struct ib_device * device,u8 port_num)234 enum rdma_link_layer rdma_port_get_link_layer(struct ib_device *device, u8 port_num)
235 {
236 	if (device->get_link_layer)
237 		return device->get_link_layer(device, port_num);
238 
239 	switch (rdma_node_get_transport(device->node_type)) {
240 	case RDMA_TRANSPORT_IB:
241 		return IB_LINK_LAYER_INFINIBAND;
242 	case RDMA_TRANSPORT_IWARP:
243 	case RDMA_TRANSPORT_USNIC:
244 	case RDMA_TRANSPORT_USNIC_UDP:
245 		return IB_LINK_LAYER_ETHERNET;
246 	default:
247 		return IB_LINK_LAYER_UNSPECIFIED;
248 	}
249 }
250 EXPORT_SYMBOL(rdma_port_get_link_layer);
251 
252 /* Protection domains */
253 
254 /**
255  * ib_alloc_pd - Allocates an unused protection domain.
256  * @device: The device on which to allocate the protection domain.
257  *
258  * A protection domain object provides an association between QPs, shared
259  * receive queues, address handles, memory regions, and memory windows.
260  *
261  * Every PD has a local_dma_lkey which can be used as the lkey value for local
262  * memory operations.
263  */
__ib_alloc_pd(struct ib_device * device,unsigned int flags,const char * caller)264 struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags,
265 		const char *caller)
266 {
267 	struct ib_pd *pd;
268 	int mr_access_flags = 0;
269 	int ret;
270 
271 	pd = rdma_zalloc_drv_obj(device, ib_pd);
272 	if (!pd)
273 		return ERR_PTR(-ENOMEM);
274 
275 	pd->device = device;
276 	pd->uobject = NULL;
277 	pd->__internal_mr = NULL;
278 	atomic_set(&pd->usecnt, 0);
279 	pd->flags = flags;
280 
281 	ret = device->alloc_pd(pd, NULL);
282 	if (ret) {
283 		kfree(pd);
284 		return ERR_PTR(ret);
285 	}
286 
287 	if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)
288 		pd->local_dma_lkey = device->local_dma_lkey;
289 	else
290 		mr_access_flags |= IB_ACCESS_LOCAL_WRITE;
291 
292 	if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
293 		pr_warn("%s: enabling unsafe global rkey\n", caller);
294 		mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE;
295 	}
296 
297 	if (mr_access_flags) {
298 		struct ib_mr *mr;
299 
300 		mr = pd->device->get_dma_mr(pd, mr_access_flags);
301 		if (IS_ERR(mr)) {
302 			ib_dealloc_pd(pd);
303 			return ERR_CAST(mr);
304 		}
305 
306 		mr->device	= pd->device;
307 		mr->pd		= pd;
308 		mr->type        = IB_MR_TYPE_DMA;
309 		mr->uobject	= NULL;
310 		mr->need_inval	= false;
311 
312 		pd->__internal_mr = mr;
313 
314 		if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY))
315 			pd->local_dma_lkey = pd->__internal_mr->lkey;
316 
317 		if (flags & IB_PD_UNSAFE_GLOBAL_RKEY)
318 			pd->unsafe_global_rkey = pd->__internal_mr->rkey;
319 	}
320 
321 	return pd;
322 }
323 EXPORT_SYMBOL(__ib_alloc_pd);
324 
325 /**
326  * ib_dealloc_pd_user - Deallocates a protection domain.
327  * @pd: The protection domain to deallocate.
328  * @udata: Valid user data or NULL for kernel object
329  *
330  * It is an error to call this function while any resources in the pd still
331  * exist.  The caller is responsible to synchronously destroy them and
332  * guarantee no new allocations will happen.
333  */
ib_dealloc_pd_user(struct ib_pd * pd,struct ib_udata * udata)334 void ib_dealloc_pd_user(struct ib_pd *pd, struct ib_udata *udata)
335 {
336 	int ret;
337 
338 	if (pd->__internal_mr) {
339 		ret = pd->device->dereg_mr(pd->__internal_mr, NULL);
340 		WARN_ON(ret);
341 		pd->__internal_mr = NULL;
342 	}
343 
344 	/* uverbs manipulates usecnt with proper locking, while the kabi
345 	   requires the caller to guarantee we can't race here. */
346 	WARN_ON(atomic_read(&pd->usecnt));
347 
348 	pd->device->dealloc_pd(pd, udata);
349 	kfree(pd);
350 }
351 EXPORT_SYMBOL(ib_dealloc_pd_user);
352 
353 /* Address handles */
354 
_ib_create_ah(struct ib_pd * pd,struct ib_ah_attr * ah_attr,u32 flags,struct ib_udata * udata)355 static struct ib_ah *_ib_create_ah(struct ib_pd *pd,
356 				     struct ib_ah_attr *ah_attr,
357 				     u32 flags,
358 				     struct ib_udata *udata)
359 {
360 	struct ib_device *device = pd->device;
361 	struct ib_ah *ah;
362 	int ret;
363 
364 	might_sleep_if(flags & RDMA_CREATE_AH_SLEEPABLE);
365 
366 	if (!device->create_ah)
367 		return ERR_PTR(-EOPNOTSUPP);
368 
369 	ah = rdma_zalloc_drv_obj_gfp(
370 		device, ib_ah,
371 		(flags & RDMA_CREATE_AH_SLEEPABLE) ? GFP_KERNEL : GFP_ATOMIC);
372 	if (!ah)
373 		return ERR_PTR(-ENOMEM);
374 
375 	ah->device = device;
376 	ah->pd = pd;
377 
378 	ret = device->create_ah(ah, ah_attr, flags, udata);
379 	if (ret) {
380 		kfree(ah);
381 		return ERR_PTR(ret);
382 	}
383 
384 	atomic_inc(&pd->usecnt);
385 	return ah;
386 }
387 
388 /**
389  * rdma_create_ah - Creates an address handle for the
390  * given address vector.
391  * @pd: The protection domain associated with the address handle.
392  * @ah_attr: The attributes of the address vector.
393  * @flags: Create address handle flags (see enum rdma_create_ah_flags).
394  *
395  * It returns 0 on success and returns appropriate error code on error.
396  * The address handle is used to reference a local or global destination
397  * in all UD QP post sends.
398  */
ib_create_ah(struct ib_pd * pd,struct ib_ah_attr * ah_attr,u32 flags)399 struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr,
400 			   u32 flags)
401 {
402 	struct ib_ah *ah;
403 
404 	ah = _ib_create_ah(pd, ah_attr, flags, NULL);
405 
406 	return ah;
407 }
408 EXPORT_SYMBOL(ib_create_ah);
409 
410 /**
411  * ib_create_user_ah - Creates an address handle for the
412  * given address vector.
413  * It resolves destination mac address for ah attribute of RoCE type.
414  * @pd: The protection domain associated with the address handle.
415  * @ah_attr: The attributes of the address vector.
416  * @udata: pointer to user's input output buffer information need by
417  *         provider driver.
418  *
419  * It returns a valid address handle pointer on success and
420  * returns appropriate error code on error.
421  * The address handle is used to reference a local or global destination
422  * in all UD QP post sends.
423  */
ib_create_user_ah(struct ib_pd * pd,struct ib_ah_attr * ah_attr,struct ib_udata * udata)424 struct ib_ah *ib_create_user_ah(struct ib_pd *pd,
425 				struct ib_ah_attr *ah_attr,
426 				struct ib_udata *udata)
427 {
428 	int err;
429 
430 	if (rdma_protocol_roce(pd->device, ah_attr->port_num)) {
431 		err = ib_resolve_eth_dmac(pd->device, ah_attr);
432 		if (err)
433 			return ERR_PTR(err);
434 	}
435 
436 	return _ib_create_ah(pd, ah_attr, RDMA_CREATE_AH_SLEEPABLE, udata);
437 }
438 EXPORT_SYMBOL(ib_create_user_ah);
439 
ib_get_header_version(const union rdma_network_hdr * hdr)440 static int ib_get_header_version(const union rdma_network_hdr *hdr)
441 {
442 	const struct ip *ip4h = (const struct ip *)&hdr->roce4grh;
443 	struct ip ip4h_checked;
444 	const struct ip6_hdr *ip6h = (const struct ip6_hdr *)&hdr->ibgrh;
445 
446 	/* If it's IPv6, the version must be 6, otherwise, the first
447 	 * 20 bytes (before the IPv4 header) are garbled.
448 	 */
449 	if ((ip6h->ip6_vfc & IPV6_VERSION_MASK) != IPV6_VERSION)
450 		return (ip4h->ip_v == 4) ? 4 : 0;
451 	/* version may be 6 or 4 because the first 20 bytes could be garbled */
452 
453 	/* RoCE v2 requires no options, thus header length
454 	 * must be 5 words
455 	 */
456 	if (ip4h->ip_hl != 5)
457 		return 6;
458 
459 	/* Verify checksum.
460 	 * We can't write on scattered buffers so we need to copy to
461 	 * temp buffer.
462 	 */
463 	memcpy(&ip4h_checked, ip4h, sizeof(ip4h_checked));
464 	ip4h_checked.ip_sum = 0;
465 #if defined(INET) || defined(INET6)
466 	ip4h_checked.ip_sum = in_cksum_hdr(&ip4h_checked);
467 #endif
468 	/* if IPv4 header checksum is OK, believe it */
469 	if (ip4h->ip_sum == ip4h_checked.ip_sum)
470 		return 4;
471 	return 6;
472 }
473 
ib_get_net_type_by_grh(struct ib_device * device,u8 port_num,const struct ib_grh * grh)474 static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device,
475 						     u8 port_num,
476 						     const struct ib_grh *grh)
477 {
478 	int grh_version;
479 
480 	if (rdma_protocol_ib(device, port_num))
481 		return RDMA_NETWORK_IB;
482 
483 	grh_version = ib_get_header_version((const union rdma_network_hdr *)grh);
484 
485 	if (grh_version == 4)
486 		return RDMA_NETWORK_IPV4;
487 
488 	if (grh->next_hdr == IPPROTO_UDP)
489 		return RDMA_NETWORK_IPV6;
490 
491 	return RDMA_NETWORK_ROCE_V1;
492 }
493 
494 struct find_gid_index_context {
495 	u16 vlan_id;
496 	enum ib_gid_type gid_type;
497 };
498 
499 
500 /*
501  * This function will return true only if a inspected GID index
502  * matches the request based on the GID type and VLAN configuration
503  */
find_gid_index(const union ib_gid * gid,const struct ib_gid_attr * gid_attr,void * context)504 static bool find_gid_index(const union ib_gid *gid,
505 			   const struct ib_gid_attr *gid_attr,
506 			   void *context)
507 {
508 	u16 vlan_diff;
509 	struct find_gid_index_context *ctx =
510 		(struct find_gid_index_context *)context;
511 
512 	if (ctx->gid_type != gid_attr->gid_type)
513 		return false;
514 
515 	/*
516 	 * The following will verify:
517 	 * 1. VLAN ID matching for VLAN tagged requests.
518 	 * 2. prio-tagged/untagged to prio-tagged/untagged matching.
519 	 *
520 	 * This XOR is valid, since 0x0 < vlan_id < 0x0FFF.
521 	 */
522 	vlan_diff = rdma_vlan_dev_vlan_id(gid_attr->ndev) ^ ctx->vlan_id;
523 
524 	return (vlan_diff == 0x0000 || vlan_diff == 0xFFFF);
525 }
526 
get_sgid_index_from_eth(struct ib_device * device,u8 port_num,u16 vlan_id,const union ib_gid * sgid,enum ib_gid_type gid_type,u16 * gid_index)527 static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num,
528 				   u16 vlan_id, const union ib_gid *sgid,
529 				   enum ib_gid_type gid_type,
530 				   u16 *gid_index)
531 {
532 	struct find_gid_index_context context = {.vlan_id = vlan_id,
533 						 .gid_type = gid_type};
534 
535 	return ib_find_gid_by_filter(device, sgid, port_num, find_gid_index,
536 				     &context, gid_index);
537 }
538 
get_gids_from_rdma_hdr(const union rdma_network_hdr * hdr,enum rdma_network_type net_type,union ib_gid * sgid,union ib_gid * dgid)539 static int get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr,
540 				  enum rdma_network_type net_type,
541 				  union ib_gid *sgid, union ib_gid *dgid)
542 {
543 	struct sockaddr_in  src_in;
544 	struct sockaddr_in  dst_in;
545 	__be32 src_saddr, dst_saddr;
546 
547 	if (!sgid || !dgid)
548 		return -EINVAL;
549 
550 	if (net_type == RDMA_NETWORK_IPV4) {
551 		memcpy(&src_in.sin_addr.s_addr,
552 		       &hdr->roce4grh.ip_src, 4);
553 		memcpy(&dst_in.sin_addr.s_addr,
554 		       &hdr->roce4grh.ip_dst, 4);
555 		src_saddr = src_in.sin_addr.s_addr;
556 		dst_saddr = dst_in.sin_addr.s_addr;
557 		ipv6_addr_set_v4mapped(src_saddr,
558 				       (struct in6_addr *)sgid);
559 		ipv6_addr_set_v4mapped(dst_saddr,
560 				       (struct in6_addr *)dgid);
561 		return 0;
562 	} else if (net_type == RDMA_NETWORK_IPV6 ||
563 		   net_type == RDMA_NETWORK_IB) {
564 		*dgid = hdr->ibgrh.dgid;
565 		*sgid = hdr->ibgrh.sgid;
566 		return 0;
567 	} else {
568 		return -EINVAL;
569 	}
570 }
571 
ib_init_ah_from_wc(struct ib_device * device,u8 port_num,const struct ib_wc * wc,const struct ib_grh * grh,struct ib_ah_attr * ah_attr)572 int ib_init_ah_from_wc(struct ib_device *device, u8 port_num,
573 		       const struct ib_wc *wc, const struct ib_grh *grh,
574 		       struct ib_ah_attr *ah_attr)
575 {
576 	u32 flow_class;
577 	u16 gid_index = 0;
578 	int ret;
579 	enum rdma_network_type net_type = RDMA_NETWORK_IB;
580 	enum ib_gid_type gid_type = IB_GID_TYPE_IB;
581 	int hoplimit = 0xff;
582 	union ib_gid dgid;
583 	union ib_gid sgid;
584 
585 	memset(ah_attr, 0, sizeof *ah_attr);
586 	if (rdma_cap_eth_ah(device, port_num)) {
587 		if (wc->wc_flags & IB_WC_WITH_NETWORK_HDR_TYPE)
588 			net_type = wc->network_hdr_type;
589 		else
590 			net_type = ib_get_net_type_by_grh(device, port_num, grh);
591 		gid_type = ib_network_to_gid_type(net_type);
592 	}
593 	ret = get_gids_from_rdma_hdr((const union rdma_network_hdr *)grh, net_type,
594 				     &sgid, &dgid);
595 	if (ret)
596 		return ret;
597 
598 	if (rdma_protocol_roce(device, port_num)) {
599 		struct ib_gid_attr dgid_attr;
600 		const u16 vlan_id = (wc->wc_flags & IB_WC_WITH_VLAN) ?
601 				wc->vlan_id : 0xffff;
602 
603 		if (!(wc->wc_flags & IB_WC_GRH))
604 			return -EPROTOTYPE;
605 
606 		ret = get_sgid_index_from_eth(device, port_num, vlan_id,
607 					      &dgid, gid_type, &gid_index);
608 		if (ret)
609 			return ret;
610 
611 		ret = ib_get_cached_gid(device, port_num, gid_index, &dgid, &dgid_attr);
612 		if (ret)
613 			return ret;
614 
615 		if (dgid_attr.ndev == NULL)
616 			return -ENODEV;
617 
618 		ret = rdma_addr_find_l2_eth_by_grh(&dgid, &sgid, ah_attr->dmac,
619 		    dgid_attr.ndev, &hoplimit);
620 
621 		dev_put(dgid_attr.ndev);
622 		if (ret)
623 			return ret;
624 	}
625 
626 	ah_attr->dlid = wc->slid;
627 	ah_attr->sl = wc->sl;
628 	ah_attr->src_path_bits = wc->dlid_path_bits;
629 	ah_attr->port_num = port_num;
630 
631 	if (wc->wc_flags & IB_WC_GRH) {
632 		ah_attr->ah_flags = IB_AH_GRH;
633 		ah_attr->grh.dgid = sgid;
634 
635 		if (!rdma_cap_eth_ah(device, port_num)) {
636 			if (dgid.global.interface_id != cpu_to_be64(IB_SA_WELL_KNOWN_GUID)) {
637 				ret = ib_find_cached_gid_by_port(device, &dgid,
638 								 IB_GID_TYPE_IB,
639 								 port_num, NULL,
640 								 &gid_index);
641 				if (ret)
642 					return ret;
643 			}
644 		}
645 
646 		ah_attr->grh.sgid_index = (u8) gid_index;
647 		flow_class = be32_to_cpu(grh->version_tclass_flow);
648 		ah_attr->grh.flow_label = flow_class & 0xFFFFF;
649 		ah_attr->grh.hop_limit = hoplimit;
650 		ah_attr->grh.traffic_class = (flow_class >> 20) & 0xFF;
651 	}
652 	return 0;
653 }
654 EXPORT_SYMBOL(ib_init_ah_from_wc);
655 
ib_create_ah_from_wc(struct ib_pd * pd,const struct ib_wc * wc,const struct ib_grh * grh,u8 port_num)656 struct ib_ah *ib_create_ah_from_wc(struct ib_pd *pd, const struct ib_wc *wc,
657 				   const struct ib_grh *grh, u8 port_num)
658 {
659 	struct ib_ah_attr ah_attr;
660 	int ret;
661 
662 	ret = ib_init_ah_from_wc(pd->device, port_num, wc, grh, &ah_attr);
663 	if (ret)
664 		return ERR_PTR(ret);
665 
666 	return ib_create_ah(pd, &ah_attr, RDMA_CREATE_AH_SLEEPABLE);
667 }
668 EXPORT_SYMBOL(ib_create_ah_from_wc);
669 
ib_modify_ah(struct ib_ah * ah,struct ib_ah_attr * ah_attr)670 int ib_modify_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
671 {
672 	return ah->device->modify_ah ?
673 		ah->device->modify_ah(ah, ah_attr) :
674 		-ENOSYS;
675 }
676 EXPORT_SYMBOL(ib_modify_ah);
677 
ib_query_ah(struct ib_ah * ah,struct ib_ah_attr * ah_attr)678 int ib_query_ah(struct ib_ah *ah, struct ib_ah_attr *ah_attr)
679 {
680 	return ah->device->query_ah ?
681 		ah->device->query_ah(ah, ah_attr) :
682 		-ENOSYS;
683 }
684 EXPORT_SYMBOL(ib_query_ah);
685 
ib_destroy_ah_user(struct ib_ah * ah,u32 flags,struct ib_udata * udata)686 int ib_destroy_ah_user(struct ib_ah *ah, u32 flags, struct ib_udata *udata)
687 {
688 	struct ib_pd *pd;
689 
690 	might_sleep_if(flags & RDMA_DESTROY_AH_SLEEPABLE);
691 
692 	pd = ah->pd;
693 	ah->device->destroy_ah(ah, flags);
694 	atomic_dec(&pd->usecnt);
695 
696 	kfree(ah);
697 	return 0;
698 }
699 EXPORT_SYMBOL(ib_destroy_ah_user);
700 
701 /* Shared receive queues */
702 
ib_create_srq(struct ib_pd * pd,struct ib_srq_init_attr * srq_init_attr)703 struct ib_srq *ib_create_srq(struct ib_pd *pd,
704 			     struct ib_srq_init_attr *srq_init_attr)
705 {
706 	struct ib_srq *srq;
707 	int ret;
708 
709 	if (!pd->device->create_srq)
710 		return ERR_PTR(-EOPNOTSUPP);
711 
712 	srq = rdma_zalloc_drv_obj(pd->device, ib_srq);
713 	if (!srq)
714 		return ERR_PTR(-ENOMEM);
715 
716 	srq->device = pd->device;
717 	srq->pd = pd;
718 	srq->event_handler = srq_init_attr->event_handler;
719 	srq->srq_context = srq_init_attr->srq_context;
720 	srq->srq_type = srq_init_attr->srq_type;
721 
722 	if (ib_srq_has_cq(srq->srq_type)) {
723 		srq->ext.cq = srq_init_attr->ext.cq;
724 		atomic_inc(&srq->ext.cq->usecnt);
725 	}
726 	if (srq->srq_type == IB_SRQT_XRC) {
727 		srq->ext.xrc.xrcd = srq_init_attr->ext.xrc.xrcd;
728 		atomic_inc(&srq->ext.xrc.xrcd->usecnt);
729 	}
730 	atomic_inc(&pd->usecnt);
731 
732 	ret = pd->device->create_srq(srq, srq_init_attr, NULL);
733 	if (ret) {
734 		atomic_dec(&srq->pd->usecnt);
735 		if (srq->srq_type == IB_SRQT_XRC)
736 			atomic_dec(&srq->ext.xrc.xrcd->usecnt);
737 		if (ib_srq_has_cq(srq->srq_type))
738 			atomic_dec(&srq->ext.cq->usecnt);
739 		kfree(srq);
740 		return ERR_PTR(ret);
741 	}
742 
743 	return srq;
744 }
745 EXPORT_SYMBOL(ib_create_srq);
746 
ib_modify_srq(struct ib_srq * srq,struct ib_srq_attr * srq_attr,enum ib_srq_attr_mask srq_attr_mask)747 int ib_modify_srq(struct ib_srq *srq,
748 		  struct ib_srq_attr *srq_attr,
749 		  enum ib_srq_attr_mask srq_attr_mask)
750 {
751 	return srq->device->modify_srq ?
752 		srq->device->modify_srq(srq, srq_attr, srq_attr_mask, NULL) :
753 		-ENOSYS;
754 }
755 EXPORT_SYMBOL(ib_modify_srq);
756 
ib_query_srq(struct ib_srq * srq,struct ib_srq_attr * srq_attr)757 int ib_query_srq(struct ib_srq *srq,
758 		 struct ib_srq_attr *srq_attr)
759 {
760 	return srq->device->query_srq ?
761 		srq->device->query_srq(srq, srq_attr) : -ENOSYS;
762 }
763 EXPORT_SYMBOL(ib_query_srq);
764 
ib_destroy_srq_user(struct ib_srq * srq,struct ib_udata * udata)765 int ib_destroy_srq_user(struct ib_srq *srq, struct ib_udata *udata)
766 {
767 	if (atomic_read(&srq->usecnt))
768 		return -EBUSY;
769 
770 	srq->device->destroy_srq(srq, udata);
771 
772 	atomic_dec(&srq->pd->usecnt);
773 	if (srq->srq_type == IB_SRQT_XRC)
774 		atomic_dec(&srq->ext.xrc.xrcd->usecnt);
775 	if (ib_srq_has_cq(srq->srq_type))
776 		atomic_dec(&srq->ext.cq->usecnt);
777 	kfree(srq);
778 
779 	return 0;
780 }
781 EXPORT_SYMBOL(ib_destroy_srq_user);
782 
783 /* Queue pairs */
784 
__ib_shared_qp_event_handler(struct ib_event * event,void * context)785 static void __ib_shared_qp_event_handler(struct ib_event *event, void *context)
786 {
787 	struct ib_qp *qp = context;
788 	unsigned long flags;
789 
790 	spin_lock_irqsave(&qp->device->event_handler_lock, flags);
791 	list_for_each_entry(event->element.qp, &qp->open_list, open_list)
792 		if (event->element.qp->event_handler)
793 			event->element.qp->event_handler(event, event->element.qp->qp_context);
794 	spin_unlock_irqrestore(&qp->device->event_handler_lock, flags);
795 }
796 
__ib_insert_xrcd_qp(struct ib_xrcd * xrcd,struct ib_qp * qp)797 static void __ib_insert_xrcd_qp(struct ib_xrcd *xrcd, struct ib_qp *qp)
798 {
799 	mutex_lock(&xrcd->tgt_qp_mutex);
800 	list_add(&qp->xrcd_list, &xrcd->tgt_qp_list);
801 	mutex_unlock(&xrcd->tgt_qp_mutex);
802 }
803 
__ib_open_qp(struct ib_qp * real_qp,void (* event_handler)(struct ib_event *,void *),void * qp_context)804 static struct ib_qp *__ib_open_qp(struct ib_qp *real_qp,
805 				  void (*event_handler)(struct ib_event *, void *),
806 				  void *qp_context)
807 {
808 	struct ib_qp *qp;
809 	unsigned long flags;
810 
811 	qp = kzalloc(sizeof *qp, GFP_KERNEL);
812 	if (!qp)
813 		return ERR_PTR(-ENOMEM);
814 
815 	qp->real_qp = real_qp;
816 	atomic_inc(&real_qp->usecnt);
817 	qp->device = real_qp->device;
818 	qp->event_handler = event_handler;
819 	qp->qp_context = qp_context;
820 	qp->qp_num = real_qp->qp_num;
821 	qp->qp_type = real_qp->qp_type;
822 
823 	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
824 	list_add(&qp->open_list, &real_qp->open_list);
825 	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
826 
827 	return qp;
828 }
829 
ib_open_qp(struct ib_xrcd * xrcd,struct ib_qp_open_attr * qp_open_attr)830 struct ib_qp *ib_open_qp(struct ib_xrcd *xrcd,
831 			 struct ib_qp_open_attr *qp_open_attr)
832 {
833 	struct ib_qp *qp, *real_qp;
834 
835 	if (qp_open_attr->qp_type != IB_QPT_XRC_TGT)
836 		return ERR_PTR(-EINVAL);
837 
838 	qp = ERR_PTR(-EINVAL);
839 	mutex_lock(&xrcd->tgt_qp_mutex);
840 	list_for_each_entry(real_qp, &xrcd->tgt_qp_list, xrcd_list) {
841 		if (real_qp->qp_num == qp_open_attr->qp_num) {
842 			qp = __ib_open_qp(real_qp, qp_open_attr->event_handler,
843 					  qp_open_attr->qp_context);
844 			break;
845 		}
846 	}
847 	mutex_unlock(&xrcd->tgt_qp_mutex);
848 	return qp;
849 }
850 EXPORT_SYMBOL(ib_open_qp);
851 
ib_create_xrc_qp(struct ib_qp * qp,struct ib_qp_init_attr * qp_init_attr)852 static struct ib_qp *ib_create_xrc_qp(struct ib_qp *qp,
853 		struct ib_qp_init_attr *qp_init_attr)
854 {
855 	struct ib_qp *real_qp = qp;
856 
857 	qp->event_handler = __ib_shared_qp_event_handler;
858 	qp->qp_context = qp;
859 	qp->pd = NULL;
860 	qp->send_cq = qp->recv_cq = NULL;
861 	qp->srq = NULL;
862 	qp->xrcd = qp_init_attr->xrcd;
863 	atomic_inc(&qp_init_attr->xrcd->usecnt);
864 	INIT_LIST_HEAD(&qp->open_list);
865 
866 	qp = __ib_open_qp(real_qp, qp_init_attr->event_handler,
867 			  qp_init_attr->qp_context);
868 	if (!IS_ERR(qp))
869 		__ib_insert_xrcd_qp(qp_init_attr->xrcd, real_qp);
870 	else
871 		real_qp->device->destroy_qp(real_qp, NULL);
872 	return qp;
873 }
874 
ib_create_qp(struct ib_pd * pd,struct ib_qp_init_attr * qp_init_attr)875 struct ib_qp *ib_create_qp(struct ib_pd *pd,
876 			   struct ib_qp_init_attr *qp_init_attr)
877 {
878 	struct ib_device *device = pd ? pd->device : qp_init_attr->xrcd->device;
879 	struct ib_qp *qp;
880 
881 	if (qp_init_attr->rwq_ind_tbl &&
882 	    (qp_init_attr->recv_cq ||
883 	    qp_init_attr->srq || qp_init_attr->cap.max_recv_wr ||
884 	    qp_init_attr->cap.max_recv_sge))
885 		return ERR_PTR(-EINVAL);
886 
887 	qp = _ib_create_qp(device, pd, qp_init_attr, NULL, NULL);
888 	if (IS_ERR(qp))
889 		return qp;
890 
891 	qp->device     = device;
892 	qp->real_qp    = qp;
893 	qp->uobject    = NULL;
894 	qp->qp_type    = qp_init_attr->qp_type;
895 	qp->rwq_ind_tbl = qp_init_attr->rwq_ind_tbl;
896 
897 	atomic_set(&qp->usecnt, 0);
898 	spin_lock_init(&qp->mr_lock);
899 
900 	if (qp_init_attr->qp_type == IB_QPT_XRC_TGT)
901 		return ib_create_xrc_qp(qp, qp_init_attr);
902 
903 	qp->event_handler = qp_init_attr->event_handler;
904 	qp->qp_context = qp_init_attr->qp_context;
905 	if (qp_init_attr->qp_type == IB_QPT_XRC_INI) {
906 		qp->recv_cq = NULL;
907 		qp->srq = NULL;
908 	} else {
909 		qp->recv_cq = qp_init_attr->recv_cq;
910 		if (qp_init_attr->recv_cq)
911 			atomic_inc(&qp_init_attr->recv_cq->usecnt);
912 		qp->srq = qp_init_attr->srq;
913 		if (qp->srq)
914 			atomic_inc(&qp_init_attr->srq->usecnt);
915 	}
916 
917 	qp->pd	    = pd;
918 	qp->send_cq = qp_init_attr->send_cq;
919 	qp->xrcd    = NULL;
920 
921 	atomic_inc(&pd->usecnt);
922 	if (qp_init_attr->send_cq)
923 		atomic_inc(&qp_init_attr->send_cq->usecnt);
924 	if (qp_init_attr->rwq_ind_tbl)
925 		atomic_inc(&qp->rwq_ind_tbl->usecnt);
926 
927 	/*
928 	 * Note: all hw drivers guarantee that max_send_sge is lower than
929 	 * the device RDMA WRITE SGE limit but not all hw drivers ensure that
930 	 * max_send_sge <= max_sge_rd.
931 	 */
932 	qp->max_write_sge = qp_init_attr->cap.max_send_sge;
933 	qp->max_read_sge = min_t(u32, qp_init_attr->cap.max_send_sge,
934 				 device->attrs.max_sge_rd);
935 
936 	return qp;
937 }
938 EXPORT_SYMBOL(ib_create_qp);
939 
940 static const struct {
941 	int			valid;
942 	enum ib_qp_attr_mask	req_param[IB_QPT_MAX];
943 	enum ib_qp_attr_mask	opt_param[IB_QPT_MAX];
944 } qp_state_table[IB_QPS_ERR + 1][IB_QPS_ERR + 1] = {
945 	[IB_QPS_RESET] = {
946 		[IB_QPS_RESET] = { .valid = 1 },
947 		[IB_QPS_INIT]  = {
948 			.valid = 1,
949 			.req_param = {
950 				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
951 						IB_QP_PORT			|
952 						IB_QP_QKEY),
953 				[IB_QPT_RAW_PACKET] = IB_QP_PORT,
954 				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
955 						IB_QP_PORT			|
956 						IB_QP_ACCESS_FLAGS),
957 				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
958 						IB_QP_PORT			|
959 						IB_QP_ACCESS_FLAGS),
960 				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		|
961 						IB_QP_PORT			|
962 						IB_QP_ACCESS_FLAGS),
963 				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		|
964 						IB_QP_PORT			|
965 						IB_QP_ACCESS_FLAGS),
966 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
967 						IB_QP_QKEY),
968 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
969 						IB_QP_QKEY),
970 			}
971 		},
972 	},
973 	[IB_QPS_INIT]  = {
974 		[IB_QPS_RESET] = { .valid = 1 },
975 		[IB_QPS_ERR] =   { .valid = 1 },
976 		[IB_QPS_INIT]  = {
977 			.valid = 1,
978 			.opt_param = {
979 				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
980 						IB_QP_PORT			|
981 						IB_QP_QKEY),
982 				[IB_QPT_UC]  = (IB_QP_PKEY_INDEX		|
983 						IB_QP_PORT			|
984 						IB_QP_ACCESS_FLAGS),
985 				[IB_QPT_RC]  = (IB_QP_PKEY_INDEX		|
986 						IB_QP_PORT			|
987 						IB_QP_ACCESS_FLAGS),
988 				[IB_QPT_XRC_INI] = (IB_QP_PKEY_INDEX		|
989 						IB_QP_PORT			|
990 						IB_QP_ACCESS_FLAGS),
991 				[IB_QPT_XRC_TGT] = (IB_QP_PKEY_INDEX		|
992 						IB_QP_PORT			|
993 						IB_QP_ACCESS_FLAGS),
994 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
995 						IB_QP_QKEY),
996 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
997 						IB_QP_QKEY),
998 			}
999 		},
1000 		[IB_QPS_RTR]   = {
1001 			.valid = 1,
1002 			.req_param = {
1003 				[IB_QPT_UC]  = (IB_QP_AV			|
1004 						IB_QP_PATH_MTU			|
1005 						IB_QP_DEST_QPN			|
1006 						IB_QP_RQ_PSN),
1007 				[IB_QPT_RC]  = (IB_QP_AV			|
1008 						IB_QP_PATH_MTU			|
1009 						IB_QP_DEST_QPN			|
1010 						IB_QP_RQ_PSN			|
1011 						IB_QP_MAX_DEST_RD_ATOMIC	|
1012 						IB_QP_MIN_RNR_TIMER),
1013 				[IB_QPT_XRC_INI] = (IB_QP_AV			|
1014 						IB_QP_PATH_MTU			|
1015 						IB_QP_DEST_QPN			|
1016 						IB_QP_RQ_PSN),
1017 				[IB_QPT_XRC_TGT] = (IB_QP_AV			|
1018 						IB_QP_PATH_MTU			|
1019 						IB_QP_DEST_QPN			|
1020 						IB_QP_RQ_PSN			|
1021 						IB_QP_MAX_DEST_RD_ATOMIC	|
1022 						IB_QP_MIN_RNR_TIMER),
1023 			},
1024 			.opt_param = {
1025 				 [IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
1026 						 IB_QP_QKEY),
1027 				 [IB_QPT_UC]  = (IB_QP_ALT_PATH			|
1028 						 IB_QP_ACCESS_FLAGS		|
1029 						 IB_QP_PKEY_INDEX),
1030 				 [IB_QPT_RC]  = (IB_QP_ALT_PATH			|
1031 						 IB_QP_ACCESS_FLAGS		|
1032 						 IB_QP_PKEY_INDEX),
1033 				 [IB_QPT_XRC_INI] = (IB_QP_ALT_PATH		|
1034 						 IB_QP_ACCESS_FLAGS		|
1035 						 IB_QP_PKEY_INDEX),
1036 				 [IB_QPT_XRC_TGT] = (IB_QP_ALT_PATH		|
1037 						 IB_QP_ACCESS_FLAGS		|
1038 						 IB_QP_PKEY_INDEX),
1039 				 [IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
1040 						 IB_QP_QKEY),
1041 				 [IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
1042 						 IB_QP_QKEY),
1043 			 },
1044 		},
1045 	},
1046 	[IB_QPS_RTR]   = {
1047 		[IB_QPS_RESET] = { .valid = 1 },
1048 		[IB_QPS_ERR] =   { .valid = 1 },
1049 		[IB_QPS_RTS]   = {
1050 			.valid = 1,
1051 			.req_param = {
1052 				[IB_QPT_UD]  = IB_QP_SQ_PSN,
1053 				[IB_QPT_UC]  = IB_QP_SQ_PSN,
1054 				[IB_QPT_RC]  = (IB_QP_TIMEOUT			|
1055 						IB_QP_RETRY_CNT			|
1056 						IB_QP_RNR_RETRY			|
1057 						IB_QP_SQ_PSN			|
1058 						IB_QP_MAX_QP_RD_ATOMIC),
1059 				[IB_QPT_XRC_INI] = (IB_QP_TIMEOUT		|
1060 						IB_QP_RETRY_CNT			|
1061 						IB_QP_RNR_RETRY			|
1062 						IB_QP_SQ_PSN			|
1063 						IB_QP_MAX_QP_RD_ATOMIC),
1064 				[IB_QPT_XRC_TGT] = (IB_QP_TIMEOUT		|
1065 						IB_QP_SQ_PSN),
1066 				[IB_QPT_SMI] = IB_QP_SQ_PSN,
1067 				[IB_QPT_GSI] = IB_QP_SQ_PSN,
1068 			},
1069 			.opt_param = {
1070 				 [IB_QPT_UD]  = (IB_QP_CUR_STATE		|
1071 						 IB_QP_QKEY),
1072 				 [IB_QPT_UC]  = (IB_QP_CUR_STATE		|
1073 						 IB_QP_ALT_PATH			|
1074 						 IB_QP_ACCESS_FLAGS		|
1075 						 IB_QP_PATH_MIG_STATE),
1076 				 [IB_QPT_RC]  = (IB_QP_CUR_STATE		|
1077 						 IB_QP_ALT_PATH			|
1078 						 IB_QP_ACCESS_FLAGS		|
1079 						 IB_QP_MIN_RNR_TIMER		|
1080 						 IB_QP_PATH_MIG_STATE),
1081 				 [IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
1082 						 IB_QP_ALT_PATH			|
1083 						 IB_QP_ACCESS_FLAGS		|
1084 						 IB_QP_PATH_MIG_STATE),
1085 				 [IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
1086 						 IB_QP_ALT_PATH			|
1087 						 IB_QP_ACCESS_FLAGS		|
1088 						 IB_QP_MIN_RNR_TIMER		|
1089 						 IB_QP_PATH_MIG_STATE),
1090 				 [IB_QPT_SMI] = (IB_QP_CUR_STATE		|
1091 						 IB_QP_QKEY),
1092 				 [IB_QPT_GSI] = (IB_QP_CUR_STATE		|
1093 						 IB_QP_QKEY),
1094 				 [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1095 			 }
1096 		}
1097 	},
1098 	[IB_QPS_RTS]   = {
1099 		[IB_QPS_RESET] = { .valid = 1 },
1100 		[IB_QPS_ERR] =   { .valid = 1 },
1101 		[IB_QPS_RTS]   = {
1102 			.valid = 1,
1103 			.opt_param = {
1104 				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
1105 						IB_QP_QKEY),
1106 				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
1107 						IB_QP_ACCESS_FLAGS		|
1108 						IB_QP_ALT_PATH			|
1109 						IB_QP_PATH_MIG_STATE),
1110 				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
1111 						IB_QP_ACCESS_FLAGS		|
1112 						IB_QP_ALT_PATH			|
1113 						IB_QP_PATH_MIG_STATE		|
1114 						IB_QP_MIN_RNR_TIMER),
1115 				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
1116 						IB_QP_ACCESS_FLAGS		|
1117 						IB_QP_ALT_PATH			|
1118 						IB_QP_PATH_MIG_STATE),
1119 				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
1120 						IB_QP_ACCESS_FLAGS		|
1121 						IB_QP_ALT_PATH			|
1122 						IB_QP_PATH_MIG_STATE		|
1123 						IB_QP_MIN_RNR_TIMER),
1124 				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
1125 						IB_QP_QKEY),
1126 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
1127 						IB_QP_QKEY),
1128 				[IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT,
1129 			}
1130 		},
1131 		[IB_QPS_SQD]   = {
1132 			.valid = 1,
1133 			.opt_param = {
1134 				[IB_QPT_UD]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1135 				[IB_QPT_UC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1136 				[IB_QPT_RC]  = IB_QP_EN_SQD_ASYNC_NOTIFY,
1137 				[IB_QPT_XRC_INI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1138 				[IB_QPT_XRC_TGT] = IB_QP_EN_SQD_ASYNC_NOTIFY, /* ??? */
1139 				[IB_QPT_SMI] = IB_QP_EN_SQD_ASYNC_NOTIFY,
1140 				[IB_QPT_GSI] = IB_QP_EN_SQD_ASYNC_NOTIFY
1141 			}
1142 		},
1143 	},
1144 	[IB_QPS_SQD]   = {
1145 		[IB_QPS_RESET] = { .valid = 1 },
1146 		[IB_QPS_ERR] =   { .valid = 1 },
1147 		[IB_QPS_RTS]   = {
1148 			.valid = 1,
1149 			.opt_param = {
1150 				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
1151 						IB_QP_QKEY),
1152 				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
1153 						IB_QP_ALT_PATH			|
1154 						IB_QP_ACCESS_FLAGS		|
1155 						IB_QP_PATH_MIG_STATE),
1156 				[IB_QPT_RC]  = (IB_QP_CUR_STATE			|
1157 						IB_QP_ALT_PATH			|
1158 						IB_QP_ACCESS_FLAGS		|
1159 						IB_QP_MIN_RNR_TIMER		|
1160 						IB_QP_PATH_MIG_STATE),
1161 				[IB_QPT_XRC_INI] = (IB_QP_CUR_STATE		|
1162 						IB_QP_ALT_PATH			|
1163 						IB_QP_ACCESS_FLAGS		|
1164 						IB_QP_PATH_MIG_STATE),
1165 				[IB_QPT_XRC_TGT] = (IB_QP_CUR_STATE		|
1166 						IB_QP_ALT_PATH			|
1167 						IB_QP_ACCESS_FLAGS		|
1168 						IB_QP_MIN_RNR_TIMER		|
1169 						IB_QP_PATH_MIG_STATE),
1170 				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
1171 						IB_QP_QKEY),
1172 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
1173 						IB_QP_QKEY),
1174 			}
1175 		},
1176 		[IB_QPS_SQD]   = {
1177 			.valid = 1,
1178 			.opt_param = {
1179 				[IB_QPT_UD]  = (IB_QP_PKEY_INDEX		|
1180 						IB_QP_QKEY),
1181 				[IB_QPT_UC]  = (IB_QP_AV			|
1182 						IB_QP_ALT_PATH			|
1183 						IB_QP_ACCESS_FLAGS		|
1184 						IB_QP_PKEY_INDEX		|
1185 						IB_QP_PATH_MIG_STATE),
1186 				[IB_QPT_RC]  = (IB_QP_PORT			|
1187 						IB_QP_AV			|
1188 						IB_QP_TIMEOUT			|
1189 						IB_QP_RETRY_CNT			|
1190 						IB_QP_RNR_RETRY			|
1191 						IB_QP_MAX_QP_RD_ATOMIC		|
1192 						IB_QP_MAX_DEST_RD_ATOMIC	|
1193 						IB_QP_ALT_PATH			|
1194 						IB_QP_ACCESS_FLAGS		|
1195 						IB_QP_PKEY_INDEX		|
1196 						IB_QP_MIN_RNR_TIMER		|
1197 						IB_QP_PATH_MIG_STATE),
1198 				[IB_QPT_XRC_INI] = (IB_QP_PORT			|
1199 						IB_QP_AV			|
1200 						IB_QP_TIMEOUT			|
1201 						IB_QP_RETRY_CNT			|
1202 						IB_QP_RNR_RETRY			|
1203 						IB_QP_MAX_QP_RD_ATOMIC		|
1204 						IB_QP_ALT_PATH			|
1205 						IB_QP_ACCESS_FLAGS		|
1206 						IB_QP_PKEY_INDEX		|
1207 						IB_QP_PATH_MIG_STATE),
1208 				[IB_QPT_XRC_TGT] = (IB_QP_PORT			|
1209 						IB_QP_AV			|
1210 						IB_QP_TIMEOUT			|
1211 						IB_QP_MAX_DEST_RD_ATOMIC	|
1212 						IB_QP_ALT_PATH			|
1213 						IB_QP_ACCESS_FLAGS		|
1214 						IB_QP_PKEY_INDEX		|
1215 						IB_QP_MIN_RNR_TIMER		|
1216 						IB_QP_PATH_MIG_STATE),
1217 				[IB_QPT_SMI] = (IB_QP_PKEY_INDEX		|
1218 						IB_QP_QKEY),
1219 				[IB_QPT_GSI] = (IB_QP_PKEY_INDEX		|
1220 						IB_QP_QKEY),
1221 			}
1222 		}
1223 	},
1224 	[IB_QPS_SQE]   = {
1225 		[IB_QPS_RESET] = { .valid = 1 },
1226 		[IB_QPS_ERR] =   { .valid = 1 },
1227 		[IB_QPS_RTS]   = {
1228 			.valid = 1,
1229 			.opt_param = {
1230 				[IB_QPT_UD]  = (IB_QP_CUR_STATE			|
1231 						IB_QP_QKEY),
1232 				[IB_QPT_UC]  = (IB_QP_CUR_STATE			|
1233 						IB_QP_ACCESS_FLAGS),
1234 				[IB_QPT_SMI] = (IB_QP_CUR_STATE			|
1235 						IB_QP_QKEY),
1236 				[IB_QPT_GSI] = (IB_QP_CUR_STATE			|
1237 						IB_QP_QKEY),
1238 			}
1239 		}
1240 	},
1241 	[IB_QPS_ERR] = {
1242 		[IB_QPS_RESET] = { .valid = 1 },
1243 		[IB_QPS_ERR] =   { .valid = 1 }
1244 	}
1245 };
1246 
ib_modify_qp_is_ok(enum ib_qp_state cur_state,enum ib_qp_state next_state,enum ib_qp_type type,enum ib_qp_attr_mask mask)1247 bool ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state,
1248 			enum ib_qp_type type, enum ib_qp_attr_mask mask)
1249 {
1250 	enum ib_qp_attr_mask req_param, opt_param;
1251 
1252 	if (mask & IB_QP_CUR_STATE  &&
1253 	    cur_state != IB_QPS_RTR && cur_state != IB_QPS_RTS &&
1254 	    cur_state != IB_QPS_SQD && cur_state != IB_QPS_SQE)
1255 		return false;
1256 
1257 	if (!qp_state_table[cur_state][next_state].valid)
1258 		return false;
1259 
1260 	req_param = qp_state_table[cur_state][next_state].req_param[type];
1261 	opt_param = qp_state_table[cur_state][next_state].opt_param[type];
1262 
1263 	if ((mask & req_param) != req_param)
1264 		return false;
1265 
1266 	if (mask & ~(req_param | opt_param | IB_QP_STATE))
1267 		return false;
1268 
1269 	return true;
1270 }
1271 EXPORT_SYMBOL(ib_modify_qp_is_ok);
1272 
ib_resolve_eth_dmac(struct ib_device * device,struct ib_ah_attr * ah_attr)1273 int ib_resolve_eth_dmac(struct ib_device *device,
1274 			struct ib_ah_attr *ah_attr)
1275 {
1276 	struct ib_gid_attr sgid_attr;
1277 	union ib_gid sgid;
1278 	int hop_limit;
1279 	int ret;
1280 
1281 	if (ah_attr->port_num < rdma_start_port(device) ||
1282 	    ah_attr->port_num > rdma_end_port(device))
1283 		return -EINVAL;
1284 
1285 	if (!rdma_cap_eth_ah(device, ah_attr->port_num))
1286 		return 0;
1287 
1288 	if (rdma_is_multicast_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1289 		if (ipv6_addr_v4mapped((struct in6_addr *)ah_attr->grh.dgid.raw)) {
1290 			__be32 addr = 0;
1291 
1292 			memcpy(&addr, ah_attr->grh.dgid.raw + 12, 4);
1293 			ip_eth_mc_map(addr, (char *)ah_attr->dmac);
1294 		} else {
1295 			ipv6_eth_mc_map((struct in6_addr *)ah_attr->grh.dgid.raw,
1296 					(char *)ah_attr->dmac);
1297 		}
1298 		return 0;
1299 	}
1300 
1301 	ret = ib_query_gid(device,
1302 			   ah_attr->port_num,
1303 			   ah_attr->grh.sgid_index,
1304 			   &sgid, &sgid_attr);
1305 	if (ret != 0)
1306 		return (ret);
1307 	if (!sgid_attr.ndev)
1308 		return -ENXIO;
1309 
1310 	ret = rdma_addr_find_l2_eth_by_grh(&sgid,
1311 					   &ah_attr->grh.dgid,
1312 					   ah_attr->dmac,
1313 					   sgid_attr.ndev, &hop_limit);
1314 	dev_put(sgid_attr.ndev);
1315 
1316 	ah_attr->grh.hop_limit = hop_limit;
1317 	return ret;
1318 }
1319 EXPORT_SYMBOL(ib_resolve_eth_dmac);
1320 
is_qp_type_connected(const struct ib_qp * qp)1321 static bool is_qp_type_connected(const struct ib_qp *qp)
1322 {
1323 	return (qp->qp_type == IB_QPT_UC ||
1324 		qp->qp_type == IB_QPT_RC ||
1325 		qp->qp_type == IB_QPT_XRC_INI ||
1326 		qp->qp_type == IB_QPT_XRC_TGT);
1327 }
1328 
1329 /**
1330  * IB core internal function to perform QP attributes modification.
1331  */
_ib_modify_qp(struct ib_qp * qp,struct ib_qp_attr * attr,int attr_mask,struct ib_udata * udata)1332 static int _ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *attr,
1333 			 int attr_mask, struct ib_udata *udata)
1334 {
1335 	u8 port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
1336 	int ret;
1337 
1338 	if (port < rdma_start_port(qp->device) ||
1339 	    port > rdma_end_port(qp->device))
1340 		return -EINVAL;
1341 
1342 	if (attr_mask & IB_QP_ALT_PATH) {
1343 		/*
1344 		 * Today the core code can only handle alternate paths and APM
1345 		 * for IB. Ban them in roce mode.
1346 		 */
1347 		if (!(rdma_protocol_ib(qp->device,
1348 		      attr->alt_ah_attr.port_num) &&
1349 		      rdma_protocol_ib(qp->device, port))) {
1350 			ret = EINVAL;
1351 			goto out;
1352 		}
1353 	}
1354 
1355 	/*
1356 	 * If the user provided the qp_attr then we have to resolve it. Kernel
1357 	 * users have to provide already resolved rdma_ah_attr's
1358 	 */
1359 	if (udata && (attr_mask & IB_QP_AV) &&
1360 	    rdma_protocol_roce(qp->device, port) &&
1361 	    is_qp_type_connected(qp)) {
1362 		ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr);
1363 		if (ret)
1364 			goto out;
1365 	}
1366 
1367 	if (rdma_ib_or_roce(qp->device, port)) {
1368 		if (attr_mask & IB_QP_RQ_PSN && attr->rq_psn & ~0xffffff) {
1369 			dev_warn(&qp->device->dev,
1370 				 "%s rq_psn overflow, masking to 24 bits\n",
1371 				 __func__);
1372 			attr->rq_psn &= 0xffffff;
1373 		}
1374 
1375 		if (attr_mask & IB_QP_SQ_PSN && attr->sq_psn & ~0xffffff) {
1376 			dev_warn(&qp->device->dev,
1377 				 " %s sq_psn overflow, masking to 24 bits\n",
1378 				 __func__);
1379 			attr->sq_psn &= 0xffffff;
1380 		}
1381 	}
1382 
1383 	ret = qp->device->modify_qp(qp, attr, attr_mask, udata);
1384 	if (ret)
1385 		goto out;
1386 
1387 	if (attr_mask & IB_QP_PORT)
1388 		qp->port = attr->port_num;
1389 out:
1390 	return ret;
1391 }
1392 
1393 /**
1394  * ib_modify_qp_with_udata - Modifies the attributes for the specified QP.
1395  * @ib_qp: The QP to modify.
1396  * @attr: On input, specifies the QP attributes to modify.  On output,
1397  *   the current values of selected QP attributes are returned.
1398  * @attr_mask: A bit-mask used to specify which attributes of the QP
1399  *   are being modified.
1400  * @udata: pointer to user's input output buffer information
1401  *   are being modified.
1402  * It returns 0 on success and returns appropriate error code on error.
1403  */
ib_modify_qp_with_udata(struct ib_qp * ib_qp,struct ib_qp_attr * attr,int attr_mask,struct ib_udata * udata)1404 int ib_modify_qp_with_udata(struct ib_qp *ib_qp, struct ib_qp_attr *attr,
1405 			    int attr_mask, struct ib_udata *udata)
1406 {
1407 	return _ib_modify_qp(ib_qp->real_qp, attr, attr_mask, udata);
1408 }
1409 EXPORT_SYMBOL(ib_modify_qp_with_udata);
1410 
ib_modify_qp(struct ib_qp * qp,struct ib_qp_attr * qp_attr,int qp_attr_mask)1411 int ib_modify_qp(struct ib_qp *qp,
1412 		 struct ib_qp_attr *qp_attr,
1413 		 int qp_attr_mask)
1414 {
1415 	if (qp_attr_mask & IB_QP_AV) {
1416 		int ret;
1417 
1418 		ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr);
1419 		if (ret)
1420 			return ret;
1421 	}
1422 
1423 	return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL);
1424 }
1425 EXPORT_SYMBOL(ib_modify_qp);
1426 
ib_query_qp(struct ib_qp * qp,struct ib_qp_attr * qp_attr,int qp_attr_mask,struct ib_qp_init_attr * qp_init_attr)1427 int ib_query_qp(struct ib_qp *qp,
1428 		struct ib_qp_attr *qp_attr,
1429 		int qp_attr_mask,
1430 		struct ib_qp_init_attr *qp_init_attr)
1431 {
1432 	return qp->device->query_qp ?
1433 		qp->device->query_qp(qp->real_qp, qp_attr, qp_attr_mask, qp_init_attr) :
1434 		-ENOSYS;
1435 }
1436 EXPORT_SYMBOL(ib_query_qp);
1437 
ib_close_qp(struct ib_qp * qp)1438 int ib_close_qp(struct ib_qp *qp)
1439 {
1440 	struct ib_qp *real_qp;
1441 	unsigned long flags;
1442 
1443 	real_qp = qp->real_qp;
1444 	if (real_qp == qp)
1445 		return -EINVAL;
1446 
1447 	spin_lock_irqsave(&real_qp->device->event_handler_lock, flags);
1448 	list_del(&qp->open_list);
1449 	spin_unlock_irqrestore(&real_qp->device->event_handler_lock, flags);
1450 
1451 	atomic_dec(&real_qp->usecnt);
1452 	kfree(qp);
1453 
1454 	return 0;
1455 }
1456 EXPORT_SYMBOL(ib_close_qp);
1457 
__ib_destroy_shared_qp(struct ib_qp * qp)1458 static int __ib_destroy_shared_qp(struct ib_qp *qp)
1459 {
1460 	struct ib_xrcd *xrcd;
1461 	struct ib_qp *real_qp;
1462 	int ret;
1463 
1464 	real_qp = qp->real_qp;
1465 	xrcd = real_qp->xrcd;
1466 
1467 	mutex_lock(&xrcd->tgt_qp_mutex);
1468 	ib_close_qp(qp);
1469 	if (atomic_read(&real_qp->usecnt) == 0)
1470 		list_del(&real_qp->xrcd_list);
1471 	else
1472 		real_qp = NULL;
1473 	mutex_unlock(&xrcd->tgt_qp_mutex);
1474 
1475 	if (real_qp) {
1476 		ret = ib_destroy_qp(real_qp);
1477 		if (!ret)
1478 			atomic_dec(&xrcd->usecnt);
1479 		else
1480 			__ib_insert_xrcd_qp(xrcd, real_qp);
1481 	}
1482 
1483 	return 0;
1484 }
1485 
ib_destroy_qp_user(struct ib_qp * qp,struct ib_udata * udata)1486 int ib_destroy_qp_user(struct ib_qp *qp, struct ib_udata *udata)
1487 {
1488 	struct ib_pd *pd;
1489 	struct ib_cq *scq, *rcq;
1490 	struct ib_srq *srq;
1491 	struct ib_rwq_ind_table *ind_tbl;
1492 	int ret;
1493 
1494 	if (atomic_read(&qp->usecnt))
1495 		return -EBUSY;
1496 
1497 	if (qp->real_qp != qp)
1498 		return __ib_destroy_shared_qp(qp);
1499 
1500 	pd   = qp->pd;
1501 	scq  = qp->send_cq;
1502 	rcq  = qp->recv_cq;
1503 	srq  = qp->srq;
1504 	ind_tbl = qp->rwq_ind_tbl;
1505 
1506 	ret = qp->device->destroy_qp(qp, udata);
1507 	if (!ret) {
1508 		if (pd)
1509 			atomic_dec(&pd->usecnt);
1510 		if (scq)
1511 			atomic_dec(&scq->usecnt);
1512 		if (rcq)
1513 			atomic_dec(&rcq->usecnt);
1514 		if (srq)
1515 			atomic_dec(&srq->usecnt);
1516 		if (ind_tbl)
1517 			atomic_dec(&ind_tbl->usecnt);
1518 	}
1519 
1520 	return ret;
1521 }
1522 EXPORT_SYMBOL(ib_destroy_qp_user);
1523 
1524 /* Completion queues */
1525 
__ib_create_cq(struct ib_device * device,ib_comp_handler comp_handler,void (* event_handler)(struct ib_event *,void *),void * cq_context,const struct ib_cq_init_attr * cq_attr,const char * caller)1526 struct ib_cq *__ib_create_cq(struct ib_device *device,
1527 			     ib_comp_handler comp_handler,
1528 			     void (*event_handler)(struct ib_event *, void *),
1529 			     void *cq_context,
1530 			     const struct ib_cq_init_attr *cq_attr,
1531 			     const char *caller)
1532 {
1533 	struct ib_cq *cq;
1534 	int ret;
1535 
1536 	cq = rdma_zalloc_drv_obj(device, ib_cq);
1537 	if (!cq)
1538 		return ERR_PTR(-ENOMEM);
1539 
1540 	cq->device = device;
1541 	cq->uobject = NULL;
1542 	cq->comp_handler = comp_handler;
1543 	cq->event_handler = event_handler;
1544 	cq->cq_context = cq_context;
1545 	atomic_set(&cq->usecnt, 0);
1546 
1547 	ret = device->create_cq(cq, cq_attr, NULL);
1548 	if (ret) {
1549 		kfree(cq);
1550 		return ERR_PTR(ret);
1551 	}
1552 
1553 	return cq;
1554 }
1555 EXPORT_SYMBOL(__ib_create_cq);
1556 
ib_modify_cq(struct ib_cq * cq,u16 cq_count,u16 cq_period)1557 int ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
1558 {
1559 	return cq->device->modify_cq ?
1560 		cq->device->modify_cq(cq, cq_count, cq_period) : -ENOSYS;
1561 }
1562 EXPORT_SYMBOL(ib_modify_cq);
1563 
ib_destroy_cq_user(struct ib_cq * cq,struct ib_udata * udata)1564 int ib_destroy_cq_user(struct ib_cq *cq, struct ib_udata *udata)
1565 {
1566 	if (atomic_read(&cq->usecnt))
1567 		return -EBUSY;
1568 
1569 	cq->device->destroy_cq(cq, udata);
1570 	kfree(cq);
1571 	return 0;
1572 }
1573 EXPORT_SYMBOL(ib_destroy_cq_user);
1574 
ib_resize_cq(struct ib_cq * cq,int cqe)1575 int ib_resize_cq(struct ib_cq *cq, int cqe)
1576 {
1577 	return cq->device->resize_cq ?
1578 		cq->device->resize_cq(cq, cqe, NULL) : -ENOSYS;
1579 }
1580 EXPORT_SYMBOL(ib_resize_cq);
1581 
1582 /* Memory regions */
1583 
ib_dereg_mr_user(struct ib_mr * mr,struct ib_udata * udata)1584 int ib_dereg_mr_user(struct ib_mr *mr, struct ib_udata *udata)
1585 {
1586 	struct ib_pd *pd = mr->pd;
1587 	struct ib_dm *dm = mr->dm;
1588 	struct ib_sig_attrs *sig_attrs = mr->sig_attrs;
1589 	int ret;
1590 
1591 	ret = mr->device->dereg_mr(mr, udata);
1592 	if (!ret) {
1593 		atomic_dec(&pd->usecnt);
1594 		if (dm)
1595 			atomic_dec(&dm->usecnt);
1596 		kfree(sig_attrs);
1597 	}
1598 
1599 	return ret;
1600 }
1601 EXPORT_SYMBOL(ib_dereg_mr_user);
1602 
1603 /**
1604  * ib_alloc_mr_user() - Allocates a memory region
1605  * @pd:            protection domain associated with the region
1606  * @mr_type:       memory region type
1607  * @max_num_sg:    maximum sg entries available for registration.
1608  * @udata:	   user data or null for kernel objects
1609  *
1610  * Notes:
1611  * Memory registeration page/sg lists must not exceed max_num_sg.
1612  * For mr_type IB_MR_TYPE_MEM_REG, the total length cannot exceed
1613  * max_num_sg * used_page_size.
1614  *
1615  */
ib_alloc_mr_user(struct ib_pd * pd,enum ib_mr_type mr_type,u32 max_num_sg,struct ib_udata * udata)1616 struct ib_mr *ib_alloc_mr_user(struct ib_pd *pd, enum ib_mr_type mr_type,
1617 			       u32 max_num_sg, struct ib_udata *udata)
1618 {
1619 	struct ib_mr *mr;
1620 
1621 	if (!pd->device->alloc_mr) {
1622 		mr = ERR_PTR(-EOPNOTSUPP);
1623 		goto out;
1624 	}
1625 
1626 	if (mr_type == IB_MR_TYPE_INTEGRITY) {
1627 		WARN_ON_ONCE(1);
1628 		mr = ERR_PTR(-EINVAL);
1629 		goto out;
1630 	}
1631 
1632 	mr = pd->device->alloc_mr(pd, mr_type, max_num_sg, udata);
1633 	if (!IS_ERR(mr)) {
1634 		mr->device  = pd->device;
1635 		mr->pd      = pd;
1636 		mr->dm      = NULL;
1637 		mr->uobject = NULL;
1638 		atomic_inc(&pd->usecnt);
1639 		mr->need_inval = false;
1640 		mr->type = mr_type;
1641 		mr->sig_attrs = NULL;
1642 	}
1643 
1644 out:
1645 	return mr;
1646 }
1647 EXPORT_SYMBOL(ib_alloc_mr_user);
1648 
1649 /* "Fast" memory regions */
1650 
ib_alloc_fmr(struct ib_pd * pd,int mr_access_flags,struct ib_fmr_attr * fmr_attr)1651 struct ib_fmr *ib_alloc_fmr(struct ib_pd *pd,
1652 			    int mr_access_flags,
1653 			    struct ib_fmr_attr *fmr_attr)
1654 {
1655 	struct ib_fmr *fmr;
1656 
1657 	if (!pd->device->alloc_fmr)
1658 		return ERR_PTR(-ENOSYS);
1659 
1660 	fmr = pd->device->alloc_fmr(pd, mr_access_flags, fmr_attr);
1661 	if (!IS_ERR(fmr)) {
1662 		fmr->device = pd->device;
1663 		fmr->pd     = pd;
1664 		atomic_inc(&pd->usecnt);
1665 	}
1666 
1667 	return fmr;
1668 }
1669 EXPORT_SYMBOL(ib_alloc_fmr);
1670 
ib_unmap_fmr(struct list_head * fmr_list)1671 int ib_unmap_fmr(struct list_head *fmr_list)
1672 {
1673 	struct ib_fmr *fmr;
1674 
1675 	if (list_empty(fmr_list))
1676 		return 0;
1677 
1678 	fmr = list_entry(fmr_list->next, struct ib_fmr, list);
1679 	return fmr->device->unmap_fmr(fmr_list);
1680 }
1681 EXPORT_SYMBOL(ib_unmap_fmr);
1682 
ib_dealloc_fmr(struct ib_fmr * fmr)1683 int ib_dealloc_fmr(struct ib_fmr *fmr)
1684 {
1685 	struct ib_pd *pd;
1686 	int ret;
1687 
1688 	pd = fmr->pd;
1689 	ret = fmr->device->dealloc_fmr(fmr);
1690 	if (!ret)
1691 		atomic_dec(&pd->usecnt);
1692 
1693 	return ret;
1694 }
1695 EXPORT_SYMBOL(ib_dealloc_fmr);
1696 
1697 /* Multicast groups */
1698 
is_valid_mcast_lid(struct ib_qp * qp,u16 lid)1699 static bool is_valid_mcast_lid(struct ib_qp *qp, u16 lid)
1700 {
1701 	struct ib_qp_init_attr init_attr = {};
1702 	struct ib_qp_attr attr = {};
1703 	int num_eth_ports = 0;
1704 	int port;
1705 
1706 	/* If QP state >= init, it is assigned to a port and we can check this
1707 	 * port only.
1708 	 */
1709 	if (!ib_query_qp(qp, &attr, IB_QP_STATE | IB_QP_PORT, &init_attr)) {
1710 		if (attr.qp_state >= IB_QPS_INIT) {
1711 			if (rdma_port_get_link_layer(qp->device, attr.port_num) !=
1712 			    IB_LINK_LAYER_INFINIBAND)
1713 				return true;
1714 			goto lid_check;
1715 		}
1716 	}
1717 
1718 	/* Can't get a quick answer, iterate over all ports */
1719 	for (port = 0; port < qp->device->phys_port_cnt; port++)
1720 		if (rdma_port_get_link_layer(qp->device, port) !=
1721 		    IB_LINK_LAYER_INFINIBAND)
1722 			num_eth_ports++;
1723 
1724 	/* If we have at lease one Ethernet port, RoCE annex declares that
1725 	 * multicast LID should be ignored. We can't tell at this step if the
1726 	 * QP belongs to an IB or Ethernet port.
1727 	 */
1728 	if (num_eth_ports)
1729 		return true;
1730 
1731 	/* If all the ports are IB, we can check according to IB spec. */
1732 lid_check:
1733 	return !(lid < be16_to_cpu(IB_MULTICAST_LID_BASE) ||
1734 		 lid == be16_to_cpu(IB_LID_PERMISSIVE));
1735 }
1736 
ib_attach_mcast(struct ib_qp * qp,union ib_gid * gid,u16 lid)1737 int ib_attach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
1738 {
1739 	int ret;
1740 
1741 	if (!qp->device->attach_mcast)
1742 		return -ENOSYS;
1743 
1744 	if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
1745 	    qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
1746 		return -EINVAL;
1747 
1748 	ret = qp->device->attach_mcast(qp, gid, lid);
1749 	if (!ret)
1750 		atomic_inc(&qp->usecnt);
1751 	return ret;
1752 }
1753 EXPORT_SYMBOL(ib_attach_mcast);
1754 
ib_detach_mcast(struct ib_qp * qp,union ib_gid * gid,u16 lid)1755 int ib_detach_mcast(struct ib_qp *qp, union ib_gid *gid, u16 lid)
1756 {
1757 	int ret;
1758 
1759 	if (!qp->device->detach_mcast)
1760 		return -ENOSYS;
1761 
1762 	if (!rdma_is_multicast_addr((struct in6_addr *)gid->raw) ||
1763 	    qp->qp_type != IB_QPT_UD || !is_valid_mcast_lid(qp, lid))
1764 		return -EINVAL;
1765 
1766 	ret = qp->device->detach_mcast(qp, gid, lid);
1767 	if (!ret)
1768 		atomic_dec(&qp->usecnt);
1769 	return ret;
1770 }
1771 EXPORT_SYMBOL(ib_detach_mcast);
1772 
__ib_alloc_xrcd(struct ib_device * device,const char * caller)1773 struct ib_xrcd *__ib_alloc_xrcd(struct ib_device *device, const char *caller)
1774 {
1775 	struct ib_xrcd *xrcd;
1776 
1777 	if (!device->alloc_xrcd)
1778 		return ERR_PTR(-EOPNOTSUPP);
1779 
1780 	xrcd = device->alloc_xrcd(device, NULL);
1781 	if (!IS_ERR(xrcd)) {
1782 		xrcd->device = device;
1783 		xrcd->inode = NULL;
1784 		atomic_set(&xrcd->usecnt, 0);
1785 		mutex_init(&xrcd->tgt_qp_mutex);
1786 		INIT_LIST_HEAD(&xrcd->tgt_qp_list);
1787 	}
1788 
1789 	return xrcd;
1790 }
1791 EXPORT_SYMBOL(__ib_alloc_xrcd);
1792 
ib_dealloc_xrcd(struct ib_xrcd * xrcd,struct ib_udata * udata)1793 int ib_dealloc_xrcd(struct ib_xrcd *xrcd, struct ib_udata *udata)
1794 {
1795 	struct ib_qp *qp;
1796 	int ret;
1797 
1798 	if (atomic_read(&xrcd->usecnt))
1799 		return -EBUSY;
1800 
1801 	while (!list_empty(&xrcd->tgt_qp_list)) {
1802 		qp = list_entry(xrcd->tgt_qp_list.next, struct ib_qp, xrcd_list);
1803 		ret = ib_destroy_qp(qp);
1804 		if (ret)
1805 			return ret;
1806 	}
1807 	mutex_destroy(&xrcd->tgt_qp_mutex);
1808 
1809 	return xrcd->device->dealloc_xrcd(xrcd, udata);
1810 }
1811 EXPORT_SYMBOL(ib_dealloc_xrcd);
1812 
1813 /**
1814  * ib_create_wq - Creates a WQ associated with the specified protection
1815  * domain.
1816  * @pd: The protection domain associated with the WQ.
1817  * @wq_init_attr: A list of initial attributes required to create the
1818  * WQ. If WQ creation succeeds, then the attributes are updated to
1819  * the actual capabilities of the created WQ.
1820  *
1821  * wq_init_attr->max_wr and wq_init_attr->max_sge determine
1822  * the requested size of the WQ, and set to the actual values allocated
1823  * on return.
1824  * If ib_create_wq() succeeds, then max_wr and max_sge will always be
1825  * at least as large as the requested values.
1826  */
ib_create_wq(struct ib_pd * pd,struct ib_wq_init_attr * wq_attr)1827 struct ib_wq *ib_create_wq(struct ib_pd *pd,
1828 			   struct ib_wq_init_attr *wq_attr)
1829 {
1830 	struct ib_wq *wq;
1831 
1832 	if (!pd->device->create_wq)
1833 		return ERR_PTR(-ENOSYS);
1834 
1835 	wq = pd->device->create_wq(pd, wq_attr, NULL);
1836 	if (!IS_ERR(wq)) {
1837 		wq->event_handler = wq_attr->event_handler;
1838 		wq->wq_context = wq_attr->wq_context;
1839 		wq->wq_type = wq_attr->wq_type;
1840 		wq->cq = wq_attr->cq;
1841 		wq->device = pd->device;
1842 		wq->pd = pd;
1843 		wq->uobject = NULL;
1844 		atomic_inc(&pd->usecnt);
1845 		atomic_inc(&wq_attr->cq->usecnt);
1846 		atomic_set(&wq->usecnt, 0);
1847 	}
1848 	return wq;
1849 }
1850 EXPORT_SYMBOL(ib_create_wq);
1851 
1852 /**
1853  * ib_destroy_wq - Destroys the specified user WQ.
1854  * @wq: The WQ to destroy.
1855  * @udata: Valid user data
1856  */
ib_destroy_wq(struct ib_wq * wq,struct ib_udata * udata)1857 int ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
1858 {
1859 	struct ib_cq *cq = wq->cq;
1860 	struct ib_pd *pd = wq->pd;
1861 
1862 	if (atomic_read(&wq->usecnt))
1863 		return -EBUSY;
1864 
1865 	wq->device->destroy_wq(wq, udata);
1866 	atomic_dec(&pd->usecnt);
1867 	atomic_dec(&cq->usecnt);
1868 
1869 	return 0;
1870 }
1871 EXPORT_SYMBOL(ib_destroy_wq);
1872 
1873 /**
1874  * ib_modify_wq - Modifies the specified WQ.
1875  * @wq: The WQ to modify.
1876  * @wq_attr: On input, specifies the WQ attributes to modify.
1877  * @wq_attr_mask: A bit-mask used to specify which attributes of the WQ
1878  *   are being modified.
1879  * On output, the current values of selected WQ attributes are returned.
1880  */
ib_modify_wq(struct ib_wq * wq,struct ib_wq_attr * wq_attr,u32 wq_attr_mask)1881 int ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
1882 		 u32 wq_attr_mask)
1883 {
1884 	int err;
1885 
1886 	if (!wq->device->modify_wq)
1887 		return -ENOSYS;
1888 
1889 	err = wq->device->modify_wq(wq, wq_attr, wq_attr_mask, NULL);
1890 	return err;
1891 }
1892 EXPORT_SYMBOL(ib_modify_wq);
1893 
1894 /*
1895  * ib_create_rwq_ind_table - Creates a RQ Indirection Table.
1896  * @device: The device on which to create the rwq indirection table.
1897  * @ib_rwq_ind_table_init_attr: A list of initial attributes required to
1898  * create the Indirection Table.
1899  *
1900  * Note: The life time of ib_rwq_ind_table_init_attr->ind_tbl is not less
1901  *	than the created ib_rwq_ind_table object and the caller is responsible
1902  *	for its memory allocation/free.
1903  */
ib_create_rwq_ind_table(struct ib_device * device,struct ib_rwq_ind_table_init_attr * init_attr)1904 struct ib_rwq_ind_table *ib_create_rwq_ind_table(struct ib_device *device,
1905 						 struct ib_rwq_ind_table_init_attr *init_attr)
1906 {
1907 	struct ib_rwq_ind_table *rwq_ind_table;
1908 	int i;
1909 	u32 table_size;
1910 
1911 	if (!device->create_rwq_ind_table)
1912 		return ERR_PTR(-ENOSYS);
1913 
1914 	table_size = (1 << init_attr->log_ind_tbl_size);
1915 	rwq_ind_table = device->create_rwq_ind_table(device,
1916 				init_attr, NULL);
1917 	if (IS_ERR(rwq_ind_table))
1918 		return rwq_ind_table;
1919 
1920 	rwq_ind_table->ind_tbl = init_attr->ind_tbl;
1921 	rwq_ind_table->log_ind_tbl_size = init_attr->log_ind_tbl_size;
1922 	rwq_ind_table->device = device;
1923 	rwq_ind_table->uobject = NULL;
1924 	atomic_set(&rwq_ind_table->usecnt, 0);
1925 
1926 	for (i = 0; i < table_size; i++)
1927 		atomic_inc(&rwq_ind_table->ind_tbl[i]->usecnt);
1928 
1929 	return rwq_ind_table;
1930 }
1931 EXPORT_SYMBOL(ib_create_rwq_ind_table);
1932 
1933 /*
1934  * ib_destroy_rwq_ind_table - Destroys the specified Indirection Table.
1935  * @wq_ind_table: The Indirection Table to destroy.
1936 */
ib_destroy_rwq_ind_table(struct ib_rwq_ind_table * rwq_ind_table)1937 int ib_destroy_rwq_ind_table(struct ib_rwq_ind_table *rwq_ind_table)
1938 {
1939 	int err, i;
1940 	u32 table_size = (1 << rwq_ind_table->log_ind_tbl_size);
1941 	struct ib_wq **ind_tbl = rwq_ind_table->ind_tbl;
1942 
1943 	if (atomic_read(&rwq_ind_table->usecnt))
1944 		return -EBUSY;
1945 
1946 	err = rwq_ind_table->device->destroy_rwq_ind_table(rwq_ind_table);
1947 	if (!err) {
1948 		for (i = 0; i < table_size; i++)
1949 			atomic_dec(&ind_tbl[i]->usecnt);
1950 	}
1951 
1952 	return err;
1953 }
1954 EXPORT_SYMBOL(ib_destroy_rwq_ind_table);
1955 
ib_check_mr_status(struct ib_mr * mr,u32 check_mask,struct ib_mr_status * mr_status)1956 int ib_check_mr_status(struct ib_mr *mr, u32 check_mask,
1957 		       struct ib_mr_status *mr_status)
1958 {
1959 	return mr->device->check_mr_status ?
1960 		mr->device->check_mr_status(mr, check_mask, mr_status) : -ENOSYS;
1961 }
1962 EXPORT_SYMBOL(ib_check_mr_status);
1963 
ib_set_vf_link_state(struct ib_device * device,int vf,u8 port,int state)1964 int ib_set_vf_link_state(struct ib_device *device, int vf, u8 port,
1965 			 int state)
1966 {
1967 	if (!device->set_vf_link_state)
1968 		return -ENOSYS;
1969 
1970 	return device->set_vf_link_state(device, vf, port, state);
1971 }
1972 EXPORT_SYMBOL(ib_set_vf_link_state);
1973 
ib_get_vf_config(struct ib_device * device,int vf,u8 port,struct ifla_vf_info * info)1974 int ib_get_vf_config(struct ib_device *device, int vf, u8 port,
1975 		     struct ifla_vf_info *info)
1976 {
1977 	if (!device->get_vf_config)
1978 		return -ENOSYS;
1979 
1980 	return device->get_vf_config(device, vf, port, info);
1981 }
1982 EXPORT_SYMBOL(ib_get_vf_config);
1983 
ib_get_vf_stats(struct ib_device * device,int vf,u8 port,struct ifla_vf_stats * stats)1984 int ib_get_vf_stats(struct ib_device *device, int vf, u8 port,
1985 		    struct ifla_vf_stats *stats)
1986 {
1987 	if (!device->get_vf_stats)
1988 		return -ENOSYS;
1989 
1990 	return device->get_vf_stats(device, vf, port, stats);
1991 }
1992 EXPORT_SYMBOL(ib_get_vf_stats);
1993 
ib_set_vf_guid(struct ib_device * device,int vf,u8 port,u64 guid,int type)1994 int ib_set_vf_guid(struct ib_device *device, int vf, u8 port, u64 guid,
1995 		   int type)
1996 {
1997 	if (!device->set_vf_guid)
1998 		return -ENOSYS;
1999 
2000 	return device->set_vf_guid(device, vf, port, guid, type);
2001 }
2002 EXPORT_SYMBOL(ib_set_vf_guid);
2003 
2004 /**
2005  * ib_map_mr_sg() - Map the largest prefix of a dma mapped SG list
2006  *     and set it the memory region.
2007  * @mr:            memory region
2008  * @sg:            dma mapped scatterlist
2009  * @sg_nents:      number of entries in sg
2010  * @sg_offset:     offset in bytes into sg
2011  * @page_size:     page vector desired page size
2012  *
2013  * Constraints:
2014  * - The first sg element is allowed to have an offset.
2015  * - Each sg element must either be aligned to page_size or virtually
2016  *   contiguous to the previous element. In case an sg element has a
2017  *   non-contiguous offset, the mapping prefix will not include it.
2018  * - The last sg element is allowed to have length less than page_size.
2019  * - If sg_nents total byte length exceeds the mr max_num_sge * page_size
2020  *   then only max_num_sg entries will be mapped.
2021  * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these
2022  *   constraints holds and the page_size argument is ignored.
2023  *
2024  * Returns the number of sg elements that were mapped to the memory region.
2025  *
2026  * After this completes successfully, the  memory region
2027  * is ready for registration.
2028  */
ib_map_mr_sg(struct ib_mr * mr,struct scatterlist * sg,int sg_nents,unsigned int * sg_offset,unsigned int page_size)2029 int ib_map_mr_sg(struct ib_mr *mr, struct scatterlist *sg, int sg_nents,
2030 		 unsigned int *sg_offset, unsigned int page_size)
2031 {
2032 	if (unlikely(!mr->device->map_mr_sg))
2033 		return -ENOSYS;
2034 
2035 	mr->page_size = page_size;
2036 
2037 	return mr->device->map_mr_sg(mr, sg, sg_nents, sg_offset);
2038 }
2039 EXPORT_SYMBOL(ib_map_mr_sg);
2040 
2041 /**
2042  * ib_sg_to_pages() - Convert the largest prefix of a sg list
2043  *     to a page vector
2044  * @mr:            memory region
2045  * @sgl:           dma mapped scatterlist
2046  * @sg_nents:      number of entries in sg
2047  * @sg_offset_p:   IN:  start offset in bytes into sg
2048  *                 OUT: offset in bytes for element n of the sg of the first
2049  *                      byte that has not been processed where n is the return
2050  *                      value of this function.
2051  * @set_page:      driver page assignment function pointer
2052  *
2053  * Core service helper for drivers to convert the largest
2054  * prefix of given sg list to a page vector. The sg list
2055  * prefix converted is the prefix that meet the requirements
2056  * of ib_map_mr_sg.
2057  *
2058  * Returns the number of sg elements that were assigned to
2059  * a page vector.
2060  */
ib_sg_to_pages(struct ib_mr * mr,struct scatterlist * sgl,int sg_nents,unsigned int * sg_offset_p,int (* set_page)(struct ib_mr *,u64))2061 int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents,
2062 		unsigned int *sg_offset_p, int (*set_page)(struct ib_mr *, u64))
2063 {
2064 	struct scatterlist *sg;
2065 	u64 last_end_dma_addr = 0;
2066 	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2067 	unsigned int last_page_off = 0;
2068 	u64 page_mask = ~((u64)mr->page_size - 1);
2069 	int i, ret;
2070 
2071 	if (unlikely(sg_nents <= 0 || sg_offset > sg_dma_len(&sgl[0])))
2072 		return -EINVAL;
2073 
2074 	mr->iova = sg_dma_address(&sgl[0]) + sg_offset;
2075 	mr->length = 0;
2076 
2077 	for_each_sg(sgl, sg, sg_nents, i) {
2078 		u64 dma_addr = sg_dma_address(sg) + sg_offset;
2079 		u64 prev_addr = dma_addr;
2080 		unsigned int dma_len = sg_dma_len(sg) - sg_offset;
2081 		u64 end_dma_addr = dma_addr + dma_len;
2082 		u64 page_addr = dma_addr & page_mask;
2083 
2084 		/*
2085 		 * For the second and later elements, check whether either the
2086 		 * end of element i-1 or the start of element i is not aligned
2087 		 * on a page boundary.
2088 		 */
2089 		if (i && (last_page_off != 0 || page_addr != dma_addr)) {
2090 			/* Stop mapping if there is a gap. */
2091 			if (last_end_dma_addr != dma_addr)
2092 				break;
2093 
2094 			/*
2095 			 * Coalesce this element with the last. If it is small
2096 			 * enough just update mr->length. Otherwise start
2097 			 * mapping from the next page.
2098 			 */
2099 			goto next_page;
2100 		}
2101 
2102 		do {
2103 			ret = set_page(mr, page_addr);
2104 			if (unlikely(ret < 0)) {
2105 				sg_offset = prev_addr - sg_dma_address(sg);
2106 				mr->length += prev_addr - dma_addr;
2107 				if (sg_offset_p)
2108 					*sg_offset_p = sg_offset;
2109 				return i || sg_offset ? i : ret;
2110 			}
2111 			prev_addr = page_addr;
2112 next_page:
2113 			page_addr += mr->page_size;
2114 		} while (page_addr < end_dma_addr);
2115 
2116 		mr->length += dma_len;
2117 		last_end_dma_addr = end_dma_addr;
2118 		last_page_off = end_dma_addr & ~page_mask;
2119 
2120 		sg_offset = 0;
2121 	}
2122 
2123 	if (sg_offset_p)
2124 		*sg_offset_p = 0;
2125 	return i;
2126 }
2127 EXPORT_SYMBOL(ib_sg_to_pages);
2128 
2129 struct ib_drain_cqe {
2130 	struct ib_cqe cqe;
2131 	struct completion done;
2132 };
2133 
ib_drain_qp_done(struct ib_cq * cq,struct ib_wc * wc)2134 static void ib_drain_qp_done(struct ib_cq *cq, struct ib_wc *wc)
2135 {
2136 	struct ib_drain_cqe *cqe = container_of(wc->wr_cqe, struct ib_drain_cqe,
2137 						cqe);
2138 
2139 	complete(&cqe->done);
2140 }
2141 
2142 /*
2143  * Post a WR and block until its completion is reaped for the SQ.
2144  */
__ib_drain_sq(struct ib_qp * qp)2145 static void __ib_drain_sq(struct ib_qp *qp)
2146 {
2147 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2148 	struct ib_drain_cqe sdrain;
2149 	const struct ib_send_wr *bad_swr;
2150 	struct ib_rdma_wr swr = {
2151 		.wr = {
2152 			.opcode	= IB_WR_RDMA_WRITE,
2153 			.wr_cqe	= &sdrain.cqe,
2154 		},
2155 	};
2156 	int ret;
2157 
2158 	if (qp->send_cq->poll_ctx == IB_POLL_DIRECT) {
2159 		WARN_ONCE(qp->send_cq->poll_ctx == IB_POLL_DIRECT,
2160 			  "IB_POLL_DIRECT poll_ctx not supported for drain\n");
2161 		return;
2162 	}
2163 
2164 	sdrain.cqe.done = ib_drain_qp_done;
2165 	init_completion(&sdrain.done);
2166 
2167 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2168 	if (ret) {
2169 		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2170 		return;
2171 	}
2172 
2173 	ret = ib_post_send(qp, &swr.wr, &bad_swr);
2174 	if (ret) {
2175 		WARN_ONCE(ret, "failed to drain send queue: %d\n", ret);
2176 		return;
2177 	}
2178 
2179 	wait_for_completion(&sdrain.done);
2180 }
2181 
2182 /*
2183  * Post a WR and block until its completion is reaped for the RQ.
2184  */
__ib_drain_rq(struct ib_qp * qp)2185 static void __ib_drain_rq(struct ib_qp *qp)
2186 {
2187 	struct ib_qp_attr attr = { .qp_state = IB_QPS_ERR };
2188 	struct ib_drain_cqe rdrain;
2189 	struct ib_recv_wr rwr = {};
2190 	const struct ib_recv_wr *bad_rwr;
2191 	int ret;
2192 
2193 	if (qp->recv_cq->poll_ctx == IB_POLL_DIRECT) {
2194 		WARN_ONCE(qp->recv_cq->poll_ctx == IB_POLL_DIRECT,
2195 			  "IB_POLL_DIRECT poll_ctx not supported for drain\n");
2196 		return;
2197 	}
2198 
2199 	rwr.wr_cqe = &rdrain.cqe;
2200 	rdrain.cqe.done = ib_drain_qp_done;
2201 	init_completion(&rdrain.done);
2202 
2203 	ret = ib_modify_qp(qp, &attr, IB_QP_STATE);
2204 	if (ret) {
2205 		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2206 		return;
2207 	}
2208 
2209 	ret = ib_post_recv(qp, &rwr, &bad_rwr);
2210 	if (ret) {
2211 		WARN_ONCE(ret, "failed to drain recv queue: %d\n", ret);
2212 		return;
2213 	}
2214 
2215 	wait_for_completion(&rdrain.done);
2216 }
2217 
2218 /**
2219  * ib_drain_sq() - Block until all SQ CQEs have been consumed by the
2220  *		   application.
2221  * @qp:            queue pair to drain
2222  *
2223  * If the device has a provider-specific drain function, then
2224  * call that.  Otherwise call the generic drain function
2225  * __ib_drain_sq().
2226  *
2227  * The caller must:
2228  *
2229  * ensure there is room in the CQ and SQ for the drain work request and
2230  * completion.
2231  *
2232  * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
2233  * IB_POLL_DIRECT.
2234  *
2235  * ensure that there are no other contexts that are posting WRs concurrently.
2236  * Otherwise the drain is not guaranteed.
2237  */
ib_drain_sq(struct ib_qp * qp)2238 void ib_drain_sq(struct ib_qp *qp)
2239 {
2240 	if (qp->device->drain_sq)
2241 		qp->device->drain_sq(qp);
2242 	else
2243 		__ib_drain_sq(qp);
2244 }
2245 EXPORT_SYMBOL(ib_drain_sq);
2246 
2247 /**
2248  * ib_drain_rq() - Block until all RQ CQEs have been consumed by the
2249  *		   application.
2250  * @qp:            queue pair to drain
2251  *
2252  * If the device has a provider-specific drain function, then
2253  * call that.  Otherwise call the generic drain function
2254  * __ib_drain_rq().
2255  *
2256  * The caller must:
2257  *
2258  * ensure there is room in the CQ and RQ for the drain work request and
2259  * completion.
2260  *
2261  * allocate the CQ using ib_alloc_cq() and the CQ poll context cannot be
2262  * IB_POLL_DIRECT.
2263  *
2264  * ensure that there are no other contexts that are posting WRs concurrently.
2265  * Otherwise the drain is not guaranteed.
2266  */
ib_drain_rq(struct ib_qp * qp)2267 void ib_drain_rq(struct ib_qp *qp)
2268 {
2269 	if (qp->device->drain_rq)
2270 		qp->device->drain_rq(qp);
2271 	else
2272 		__ib_drain_rq(qp);
2273 }
2274 EXPORT_SYMBOL(ib_drain_rq);
2275 
2276 /**
2277  * ib_drain_qp() - Block until all CQEs have been consumed by the
2278  *		   application on both the RQ and SQ.
2279  * @qp:            queue pair to drain
2280  *
2281  * The caller must:
2282  *
2283  * ensure there is room in the CQ(s), SQ, and RQ for drain work requests
2284  * and completions.
2285  *
2286  * allocate the CQs using ib_alloc_cq() and the CQ poll context cannot be
2287  * IB_POLL_DIRECT.
2288  *
2289  * ensure that there are no other contexts that are posting WRs concurrently.
2290  * Otherwise the drain is not guaranteed.
2291  */
ib_drain_qp(struct ib_qp * qp)2292 void ib_drain_qp(struct ib_qp *qp)
2293 {
2294 	ib_drain_sq(qp);
2295 	if (!qp->srq)
2296 		ib_drain_rq(qp);
2297 }
2298 EXPORT_SYMBOL(ib_drain_qp);
2299