xref: /linux/drivers/infiniband/core/mad.c (revision d09560435cb712c9ec1e62b8a43a79b0af69fe77)
1 /*
2  * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5  * Copyright (c) 2009 HNR Consulting. All rights reserved.
6  * Copyright (c) 2014,2018 Intel Corporation.  All rights reserved.
7  *
8  * This software is available to you under a choice of one of two
9  * licenses.  You may choose to be licensed under the terms of the GNU
10  * General Public License (GPL) Version 2, available from the file
11  * COPYING in the main directory of this source tree, or the
12  * OpenIB.org BSD license below:
13  *
14  *     Redistribution and use in source and binary forms, with or
15  *     without modification, are permitted provided that the following
16  *     conditions are met:
17  *
18  *      - Redistributions of source code must retain the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer.
21  *
22  *      - Redistributions in binary form must reproduce the above
23  *        copyright notice, this list of conditions and the following
24  *        disclaimer in the documentation and/or other materials
25  *        provided with the distribution.
26  *
27  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
28  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
29  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
30  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
31  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
32  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
33  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34  * SOFTWARE.
35  *
36  */
37 
38 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
39 
40 #include <linux/dma-mapping.h>
41 #include <linux/slab.h>
42 #include <linux/module.h>
43 #include <linux/security.h>
44 #include <linux/xarray.h>
45 #include <rdma/ib_cache.h>
46 
47 #include "mad_priv.h"
48 #include "core_priv.h"
49 #include "mad_rmpp.h"
50 #include "smi.h"
51 #include "opa_smi.h"
52 #include "agent.h"
53 
54 #define CREATE_TRACE_POINTS
55 #include <trace/events/ib_mad.h>
56 
57 #ifdef CONFIG_TRACEPOINTS
58 static void create_mad_addr_info(struct ib_mad_send_wr_private *mad_send_wr,
59 			  struct ib_mad_qp_info *qp_info,
60 			  struct trace_event_raw_ib_mad_send_template *entry)
61 {
62 	u16 pkey;
63 	struct ib_device *dev = qp_info->port_priv->device;
64 	u32 pnum = qp_info->port_priv->port_num;
65 	struct ib_ud_wr *wr = &mad_send_wr->send_wr;
66 	struct rdma_ah_attr attr = {};
67 
68 	rdma_query_ah(wr->ah, &attr);
69 
70 	/* These are common */
71 	entry->sl = attr.sl;
72 	ib_query_pkey(dev, pnum, wr->pkey_index, &pkey);
73 	entry->pkey = pkey;
74 	entry->rqpn = wr->remote_qpn;
75 	entry->rqkey = wr->remote_qkey;
76 	entry->dlid = rdma_ah_get_dlid(&attr);
77 }
78 #endif
79 
80 static int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
81 static int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
82 
83 module_param_named(send_queue_size, mad_sendq_size, int, 0444);
84 MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
85 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
86 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
87 
88 static DEFINE_XARRAY_ALLOC1(ib_mad_clients);
89 static u32 ib_mad_client_next;
90 static struct list_head ib_mad_port_list;
91 
92 /* Port list lock */
93 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
94 
95 /* Forward declarations */
96 static int method_in_use(struct ib_mad_mgmt_method_table **method,
97 			 struct ib_mad_reg_req *mad_reg_req);
98 static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
99 static struct ib_mad_agent_private *find_mad_agent(
100 					struct ib_mad_port_private *port_priv,
101 					const struct ib_mad_hdr *mad);
102 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
103 				    struct ib_mad_private *mad);
104 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
105 static void timeout_sends(struct work_struct *work);
106 static void local_completions(struct work_struct *work);
107 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
108 			      struct ib_mad_agent_private *agent_priv,
109 			      u8 mgmt_class);
110 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
111 			   struct ib_mad_agent_private *agent_priv);
112 static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
113 			      struct ib_wc *wc);
114 static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc);
115 
116 /*
117  * Returns a ib_mad_port_private structure or NULL for a device/port
118  * Assumes ib_mad_port_list_lock is being held
119  */
120 static inline struct ib_mad_port_private *
121 __ib_get_mad_port(struct ib_device *device, u32 port_num)
122 {
123 	struct ib_mad_port_private *entry;
124 
125 	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
126 		if (entry->device == device && entry->port_num == port_num)
127 			return entry;
128 	}
129 	return NULL;
130 }
131 
132 /*
133  * Wrapper function to return a ib_mad_port_private structure or NULL
134  * for a device/port
135  */
136 static inline struct ib_mad_port_private *
137 ib_get_mad_port(struct ib_device *device, u32 port_num)
138 {
139 	struct ib_mad_port_private *entry;
140 	unsigned long flags;
141 
142 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
143 	entry = __ib_get_mad_port(device, port_num);
144 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
145 
146 	return entry;
147 }
148 
149 static inline u8 convert_mgmt_class(u8 mgmt_class)
150 {
151 	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
152 	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
153 		0 : mgmt_class;
154 }
155 
156 static int get_spl_qp_index(enum ib_qp_type qp_type)
157 {
158 	switch (qp_type) {
159 	case IB_QPT_SMI:
160 		return 0;
161 	case IB_QPT_GSI:
162 		return 1;
163 	default:
164 		return -1;
165 	}
166 }
167 
168 static int vendor_class_index(u8 mgmt_class)
169 {
170 	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
171 }
172 
173 static int is_vendor_class(u8 mgmt_class)
174 {
175 	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
176 	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
177 		return 0;
178 	return 1;
179 }
180 
181 static int is_vendor_oui(char *oui)
182 {
183 	if (oui[0] || oui[1] || oui[2])
184 		return 1;
185 	return 0;
186 }
187 
188 static int is_vendor_method_in_use(
189 		struct ib_mad_mgmt_vendor_class *vendor_class,
190 		struct ib_mad_reg_req *mad_reg_req)
191 {
192 	struct ib_mad_mgmt_method_table *method;
193 	int i;
194 
195 	for (i = 0; i < MAX_MGMT_OUI; i++) {
196 		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
197 			method = vendor_class->method_table[i];
198 			if (method) {
199 				if (method_in_use(&method, mad_reg_req))
200 					return 1;
201 				else
202 					break;
203 			}
204 		}
205 	}
206 	return 0;
207 }
208 
209 int ib_response_mad(const struct ib_mad_hdr *hdr)
210 {
211 	return ((hdr->method & IB_MGMT_METHOD_RESP) ||
212 		(hdr->method == IB_MGMT_METHOD_TRAP_REPRESS) ||
213 		((hdr->mgmt_class == IB_MGMT_CLASS_BM) &&
214 		 (hdr->attr_mod & IB_BM_ATTR_MOD_RESP)));
215 }
216 EXPORT_SYMBOL(ib_response_mad);
217 
218 /*
219  * ib_register_mad_agent - Register to send/receive MADs
220  *
221  * Context: Process context.
222  */
223 struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
224 					   u32 port_num,
225 					   enum ib_qp_type qp_type,
226 					   struct ib_mad_reg_req *mad_reg_req,
227 					   u8 rmpp_version,
228 					   ib_mad_send_handler send_handler,
229 					   ib_mad_recv_handler recv_handler,
230 					   void *context,
231 					   u32 registration_flags)
232 {
233 	struct ib_mad_port_private *port_priv;
234 	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
235 	struct ib_mad_agent_private *mad_agent_priv;
236 	struct ib_mad_reg_req *reg_req = NULL;
237 	struct ib_mad_mgmt_class_table *class;
238 	struct ib_mad_mgmt_vendor_class_table *vendor;
239 	struct ib_mad_mgmt_vendor_class *vendor_class;
240 	struct ib_mad_mgmt_method_table *method;
241 	int ret2, qpn;
242 	u8 mgmt_class, vclass;
243 
244 	if ((qp_type == IB_QPT_SMI && !rdma_cap_ib_smi(device, port_num)) ||
245 	    (qp_type == IB_QPT_GSI && !rdma_cap_ib_cm(device, port_num)))
246 		return ERR_PTR(-EPROTONOSUPPORT);
247 
248 	/* Validate parameters */
249 	qpn = get_spl_qp_index(qp_type);
250 	if (qpn == -1) {
251 		dev_dbg_ratelimited(&device->dev, "%s: invalid QP Type %d\n",
252 				    __func__, qp_type);
253 		goto error1;
254 	}
255 
256 	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION) {
257 		dev_dbg_ratelimited(&device->dev,
258 				    "%s: invalid RMPP Version %u\n",
259 				    __func__, rmpp_version);
260 		goto error1;
261 	}
262 
263 	/* Validate MAD registration request if supplied */
264 	if (mad_reg_req) {
265 		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION) {
266 			dev_dbg_ratelimited(&device->dev,
267 					    "%s: invalid Class Version %u\n",
268 					    __func__,
269 					    mad_reg_req->mgmt_class_version);
270 			goto error1;
271 		}
272 		if (!recv_handler) {
273 			dev_dbg_ratelimited(&device->dev,
274 					    "%s: no recv_handler\n", __func__);
275 			goto error1;
276 		}
277 		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
278 			/*
279 			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
280 			 * one in this range currently allowed
281 			 */
282 			if (mad_reg_req->mgmt_class !=
283 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
284 				dev_dbg_ratelimited(&device->dev,
285 					"%s: Invalid Mgmt Class 0x%x\n",
286 					__func__, mad_reg_req->mgmt_class);
287 				goto error1;
288 			}
289 		} else if (mad_reg_req->mgmt_class == 0) {
290 			/*
291 			 * Class 0 is reserved in IBA and is used for
292 			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
293 			 */
294 			dev_dbg_ratelimited(&device->dev,
295 					    "%s: Invalid Mgmt Class 0\n",
296 					    __func__);
297 			goto error1;
298 		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
299 			/*
300 			 * If class is in "new" vendor range,
301 			 * ensure supplied OUI is not zero
302 			 */
303 			if (!is_vendor_oui(mad_reg_req->oui)) {
304 				dev_dbg_ratelimited(&device->dev,
305 					"%s: No OUI specified for class 0x%x\n",
306 					__func__,
307 					mad_reg_req->mgmt_class);
308 				goto error1;
309 			}
310 		}
311 		/* Make sure class supplied is consistent with RMPP */
312 		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
313 			if (rmpp_version) {
314 				dev_dbg_ratelimited(&device->dev,
315 					"%s: RMPP version for non-RMPP class 0x%x\n",
316 					__func__, mad_reg_req->mgmt_class);
317 				goto error1;
318 			}
319 		}
320 
321 		/* Make sure class supplied is consistent with QP type */
322 		if (qp_type == IB_QPT_SMI) {
323 			if ((mad_reg_req->mgmt_class !=
324 					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
325 			    (mad_reg_req->mgmt_class !=
326 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
327 				dev_dbg_ratelimited(&device->dev,
328 					"%s: Invalid SM QP type: class 0x%x\n",
329 					__func__, mad_reg_req->mgmt_class);
330 				goto error1;
331 			}
332 		} else {
333 			if ((mad_reg_req->mgmt_class ==
334 					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
335 			    (mad_reg_req->mgmt_class ==
336 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
337 				dev_dbg_ratelimited(&device->dev,
338 					"%s: Invalid GS QP type: class 0x%x\n",
339 					__func__, mad_reg_req->mgmt_class);
340 				goto error1;
341 			}
342 		}
343 	} else {
344 		/* No registration request supplied */
345 		if (!send_handler)
346 			goto error1;
347 		if (registration_flags & IB_MAD_USER_RMPP)
348 			goto error1;
349 	}
350 
351 	/* Validate device and port */
352 	port_priv = ib_get_mad_port(device, port_num);
353 	if (!port_priv) {
354 		dev_dbg_ratelimited(&device->dev, "%s: Invalid port %u\n",
355 				    __func__, port_num);
356 		ret = ERR_PTR(-ENODEV);
357 		goto error1;
358 	}
359 
360 	/* Verify the QP requested is supported. For example, Ethernet devices
361 	 * will not have QP0.
362 	 */
363 	if (!port_priv->qp_info[qpn].qp) {
364 		dev_dbg_ratelimited(&device->dev, "%s: QP %d not supported\n",
365 				    __func__, qpn);
366 		ret = ERR_PTR(-EPROTONOSUPPORT);
367 		goto error1;
368 	}
369 
370 	/* Allocate structures */
371 	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
372 	if (!mad_agent_priv) {
373 		ret = ERR_PTR(-ENOMEM);
374 		goto error1;
375 	}
376 
377 	if (mad_reg_req) {
378 		reg_req = kmemdup(mad_reg_req, sizeof *reg_req, GFP_KERNEL);
379 		if (!reg_req) {
380 			ret = ERR_PTR(-ENOMEM);
381 			goto error3;
382 		}
383 	}
384 
385 	/* Now, fill in the various structures */
386 	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
387 	mad_agent_priv->reg_req = reg_req;
388 	mad_agent_priv->agent.rmpp_version = rmpp_version;
389 	mad_agent_priv->agent.device = device;
390 	mad_agent_priv->agent.recv_handler = recv_handler;
391 	mad_agent_priv->agent.send_handler = send_handler;
392 	mad_agent_priv->agent.context = context;
393 	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
394 	mad_agent_priv->agent.port_num = port_num;
395 	mad_agent_priv->agent.flags = registration_flags;
396 	spin_lock_init(&mad_agent_priv->lock);
397 	INIT_LIST_HEAD(&mad_agent_priv->send_list);
398 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
399 	INIT_LIST_HEAD(&mad_agent_priv->done_list);
400 	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
401 	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
402 	INIT_LIST_HEAD(&mad_agent_priv->local_list);
403 	INIT_WORK(&mad_agent_priv->local_work, local_completions);
404 	refcount_set(&mad_agent_priv->refcount, 1);
405 	init_completion(&mad_agent_priv->comp);
406 
407 	ret2 = ib_mad_agent_security_setup(&mad_agent_priv->agent, qp_type);
408 	if (ret2) {
409 		ret = ERR_PTR(ret2);
410 		goto error4;
411 	}
412 
413 	/*
414 	 * The mlx4 driver uses the top byte to distinguish which virtual
415 	 * function generated the MAD, so we must avoid using it.
416 	 */
417 	ret2 = xa_alloc_cyclic(&ib_mad_clients, &mad_agent_priv->agent.hi_tid,
418 			mad_agent_priv, XA_LIMIT(0, (1 << 24) - 1),
419 			&ib_mad_client_next, GFP_KERNEL);
420 	if (ret2 < 0) {
421 		ret = ERR_PTR(ret2);
422 		goto error5;
423 	}
424 
425 	/*
426 	 * Make sure MAD registration (if supplied)
427 	 * is non overlapping with any existing ones
428 	 */
429 	spin_lock_irq(&port_priv->reg_lock);
430 	if (mad_reg_req) {
431 		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
432 		if (!is_vendor_class(mgmt_class)) {
433 			class = port_priv->version[mad_reg_req->
434 						   mgmt_class_version].class;
435 			if (class) {
436 				method = class->method_table[mgmt_class];
437 				if (method) {
438 					if (method_in_use(&method,
439 							   mad_reg_req))
440 						goto error6;
441 				}
442 			}
443 			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
444 						  mgmt_class);
445 		} else {
446 			/* "New" vendor class range */
447 			vendor = port_priv->version[mad_reg_req->
448 						    mgmt_class_version].vendor;
449 			if (vendor) {
450 				vclass = vendor_class_index(mgmt_class);
451 				vendor_class = vendor->vendor_class[vclass];
452 				if (vendor_class) {
453 					if (is_vendor_method_in_use(
454 							vendor_class,
455 							mad_reg_req))
456 						goto error6;
457 				}
458 			}
459 			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
460 		}
461 		if (ret2) {
462 			ret = ERR_PTR(ret2);
463 			goto error6;
464 		}
465 	}
466 	spin_unlock_irq(&port_priv->reg_lock);
467 
468 	trace_ib_mad_create_agent(mad_agent_priv);
469 	return &mad_agent_priv->agent;
470 error6:
471 	spin_unlock_irq(&port_priv->reg_lock);
472 	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
473 error5:
474 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
475 error4:
476 	kfree(reg_req);
477 error3:
478 	kfree(mad_agent_priv);
479 error1:
480 	return ret;
481 }
482 EXPORT_SYMBOL(ib_register_mad_agent);
483 
484 static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
485 {
486 	if (refcount_dec_and_test(&mad_agent_priv->refcount))
487 		complete(&mad_agent_priv->comp);
488 }
489 
490 static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
491 {
492 	struct ib_mad_port_private *port_priv;
493 
494 	/* Note that we could still be handling received MADs */
495 	trace_ib_mad_unregister_agent(mad_agent_priv);
496 
497 	/*
498 	 * Canceling all sends results in dropping received response
499 	 * MADs, preventing us from queuing additional work
500 	 */
501 	cancel_mads(mad_agent_priv);
502 	port_priv = mad_agent_priv->qp_info->port_priv;
503 	cancel_delayed_work(&mad_agent_priv->timed_work);
504 
505 	spin_lock_irq(&port_priv->reg_lock);
506 	remove_mad_reg_req(mad_agent_priv);
507 	spin_unlock_irq(&port_priv->reg_lock);
508 	xa_erase(&ib_mad_clients, mad_agent_priv->agent.hi_tid);
509 
510 	flush_workqueue(port_priv->wq);
511 
512 	deref_mad_agent(mad_agent_priv);
513 	wait_for_completion(&mad_agent_priv->comp);
514 	ib_cancel_rmpp_recvs(mad_agent_priv);
515 
516 	ib_mad_agent_security_cleanup(&mad_agent_priv->agent);
517 
518 	kfree(mad_agent_priv->reg_req);
519 	kfree_rcu(mad_agent_priv, rcu);
520 }
521 
522 /*
523  * ib_unregister_mad_agent - Unregisters a client from using MAD services
524  *
525  * Context: Process context.
526  */
527 void ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
528 {
529 	struct ib_mad_agent_private *mad_agent_priv;
530 
531 	mad_agent_priv = container_of(mad_agent,
532 				      struct ib_mad_agent_private,
533 				      agent);
534 	unregister_mad_agent(mad_agent_priv);
535 }
536 EXPORT_SYMBOL(ib_unregister_mad_agent);
537 
538 static void dequeue_mad(struct ib_mad_list_head *mad_list)
539 {
540 	struct ib_mad_queue *mad_queue;
541 	unsigned long flags;
542 
543 	mad_queue = mad_list->mad_queue;
544 	spin_lock_irqsave(&mad_queue->lock, flags);
545 	list_del(&mad_list->list);
546 	mad_queue->count--;
547 	spin_unlock_irqrestore(&mad_queue->lock, flags);
548 }
549 
550 static void build_smp_wc(struct ib_qp *qp, struct ib_cqe *cqe, u16 slid,
551 		u16 pkey_index, u32 port_num, struct ib_wc *wc)
552 {
553 	memset(wc, 0, sizeof *wc);
554 	wc->wr_cqe = cqe;
555 	wc->status = IB_WC_SUCCESS;
556 	wc->opcode = IB_WC_RECV;
557 	wc->pkey_index = pkey_index;
558 	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
559 	wc->src_qp = IB_QP0;
560 	wc->qp = qp;
561 	wc->slid = slid;
562 	wc->sl = 0;
563 	wc->dlid_path_bits = 0;
564 	wc->port_num = port_num;
565 }
566 
567 static size_t mad_priv_size(const struct ib_mad_private *mp)
568 {
569 	return sizeof(struct ib_mad_private) + mp->mad_size;
570 }
571 
572 static struct ib_mad_private *alloc_mad_private(size_t mad_size, gfp_t flags)
573 {
574 	size_t size = sizeof(struct ib_mad_private) + mad_size;
575 	struct ib_mad_private *ret = kzalloc(size, flags);
576 
577 	if (ret)
578 		ret->mad_size = mad_size;
579 
580 	return ret;
581 }
582 
583 static size_t port_mad_size(const struct ib_mad_port_private *port_priv)
584 {
585 	return rdma_max_mad_size(port_priv->device, port_priv->port_num);
586 }
587 
588 static size_t mad_priv_dma_size(const struct ib_mad_private *mp)
589 {
590 	return sizeof(struct ib_grh) + mp->mad_size;
591 }
592 
593 /*
594  * Return 0 if SMP is to be sent
595  * Return 1 if SMP was consumed locally (whether or not solicited)
596  * Return < 0 if error
597  */
598 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
599 				  struct ib_mad_send_wr_private *mad_send_wr)
600 {
601 	int ret = 0;
602 	struct ib_smp *smp = mad_send_wr->send_buf.mad;
603 	struct opa_smp *opa_smp = (struct opa_smp *)smp;
604 	unsigned long flags;
605 	struct ib_mad_local_private *local;
606 	struct ib_mad_private *mad_priv;
607 	struct ib_mad_port_private *port_priv;
608 	struct ib_mad_agent_private *recv_mad_agent = NULL;
609 	struct ib_device *device = mad_agent_priv->agent.device;
610 	u32 port_num;
611 	struct ib_wc mad_wc;
612 	struct ib_ud_wr *send_wr = &mad_send_wr->send_wr;
613 	size_t mad_size = port_mad_size(mad_agent_priv->qp_info->port_priv);
614 	u16 out_mad_pkey_index = 0;
615 	u16 drslid;
616 	bool opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
617 				    mad_agent_priv->qp_info->port_priv->port_num);
618 
619 	if (rdma_cap_ib_switch(device) &&
620 	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
621 		port_num = send_wr->port_num;
622 	else
623 		port_num = mad_agent_priv->agent.port_num;
624 
625 	/*
626 	 * Directed route handling starts if the initial LID routed part of
627 	 * a request or the ending LID routed part of a response is empty.
628 	 * If we are at the start of the LID routed part, don't update the
629 	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
630 	 */
631 	if (opa && smp->class_version == OPA_SM_CLASS_VERSION) {
632 		u32 opa_drslid;
633 
634 		trace_ib_mad_handle_out_opa_smi(opa_smp);
635 
636 		if ((opa_get_smp_direction(opa_smp)
637 		     ? opa_smp->route.dr.dr_dlid : opa_smp->route.dr.dr_slid) ==
638 		     OPA_LID_PERMISSIVE &&
639 		     opa_smi_handle_dr_smp_send(opa_smp,
640 						rdma_cap_ib_switch(device),
641 						port_num) == IB_SMI_DISCARD) {
642 			ret = -EINVAL;
643 			dev_err(&device->dev, "OPA Invalid directed route\n");
644 			goto out;
645 		}
646 		opa_drslid = be32_to_cpu(opa_smp->route.dr.dr_slid);
647 		if (opa_drslid != be32_to_cpu(OPA_LID_PERMISSIVE) &&
648 		    opa_drslid & 0xffff0000) {
649 			ret = -EINVAL;
650 			dev_err(&device->dev, "OPA Invalid dr_slid 0x%x\n",
651 			       opa_drslid);
652 			goto out;
653 		}
654 		drslid = (u16)(opa_drslid & 0x0000ffff);
655 
656 		/* Check to post send on QP or process locally */
657 		if (opa_smi_check_local_smp(opa_smp, device) == IB_SMI_DISCARD &&
658 		    opa_smi_check_local_returning_smp(opa_smp, device) == IB_SMI_DISCARD)
659 			goto out;
660 	} else {
661 		trace_ib_mad_handle_out_ib_smi(smp);
662 
663 		if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
664 		     IB_LID_PERMISSIVE &&
665 		     smi_handle_dr_smp_send(smp, rdma_cap_ib_switch(device), port_num) ==
666 		     IB_SMI_DISCARD) {
667 			ret = -EINVAL;
668 			dev_err(&device->dev, "Invalid directed route\n");
669 			goto out;
670 		}
671 		drslid = be16_to_cpu(smp->dr_slid);
672 
673 		/* Check to post send on QP or process locally */
674 		if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
675 		    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
676 			goto out;
677 	}
678 
679 	local = kmalloc(sizeof *local, GFP_ATOMIC);
680 	if (!local) {
681 		ret = -ENOMEM;
682 		goto out;
683 	}
684 	local->mad_priv = NULL;
685 	local->recv_mad_agent = NULL;
686 	mad_priv = alloc_mad_private(mad_size, GFP_ATOMIC);
687 	if (!mad_priv) {
688 		ret = -ENOMEM;
689 		kfree(local);
690 		goto out;
691 	}
692 
693 	build_smp_wc(mad_agent_priv->agent.qp,
694 		     send_wr->wr.wr_cqe, drslid,
695 		     send_wr->pkey_index,
696 		     send_wr->port_num, &mad_wc);
697 
698 	if (opa && smp->base_version == OPA_MGMT_BASE_VERSION) {
699 		mad_wc.byte_len = mad_send_wr->send_buf.hdr_len
700 					+ mad_send_wr->send_buf.data_len
701 					+ sizeof(struct ib_grh);
702 	}
703 
704 	/* No GRH for DR SMP */
705 	ret = device->ops.process_mad(device, 0, port_num, &mad_wc, NULL,
706 				      (const struct ib_mad *)smp,
707 				      (struct ib_mad *)mad_priv->mad, &mad_size,
708 				      &out_mad_pkey_index);
709 	switch (ret) {
710 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
711 		if (ib_response_mad((const struct ib_mad_hdr *)mad_priv->mad) &&
712 		    mad_agent_priv->agent.recv_handler) {
713 			local->mad_priv = mad_priv;
714 			local->recv_mad_agent = mad_agent_priv;
715 			/*
716 			 * Reference MAD agent until receive
717 			 * side of local completion handled
718 			 */
719 			refcount_inc(&mad_agent_priv->refcount);
720 		} else
721 			kfree(mad_priv);
722 		break;
723 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
724 		kfree(mad_priv);
725 		break;
726 	case IB_MAD_RESULT_SUCCESS:
727 		/* Treat like an incoming receive MAD */
728 		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
729 					    mad_agent_priv->agent.port_num);
730 		if (port_priv) {
731 			memcpy(mad_priv->mad, smp, mad_priv->mad_size);
732 			recv_mad_agent = find_mad_agent(port_priv,
733 						        (const struct ib_mad_hdr *)mad_priv->mad);
734 		}
735 		if (!port_priv || !recv_mad_agent) {
736 			/*
737 			 * No receiving agent so drop packet and
738 			 * generate send completion.
739 			 */
740 			kfree(mad_priv);
741 			break;
742 		}
743 		local->mad_priv = mad_priv;
744 		local->recv_mad_agent = recv_mad_agent;
745 		break;
746 	default:
747 		kfree(mad_priv);
748 		kfree(local);
749 		ret = -EINVAL;
750 		goto out;
751 	}
752 
753 	local->mad_send_wr = mad_send_wr;
754 	if (opa) {
755 		local->mad_send_wr->send_wr.pkey_index = out_mad_pkey_index;
756 		local->return_wc_byte_len = mad_size;
757 	}
758 	/* Reference MAD agent until send side of local completion handled */
759 	refcount_inc(&mad_agent_priv->refcount);
760 	/* Queue local completion to local list */
761 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
762 	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
763 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
764 	queue_work(mad_agent_priv->qp_info->port_priv->wq,
765 		   &mad_agent_priv->local_work);
766 	ret = 1;
767 out:
768 	return ret;
769 }
770 
771 static int get_pad_size(int hdr_len, int data_len, size_t mad_size)
772 {
773 	int seg_size, pad;
774 
775 	seg_size = mad_size - hdr_len;
776 	if (data_len && seg_size) {
777 		pad = seg_size - data_len % seg_size;
778 		return pad == seg_size ? 0 : pad;
779 	} else
780 		return seg_size;
781 }
782 
783 static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
784 {
785 	struct ib_rmpp_segment *s, *t;
786 
787 	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
788 		list_del(&s->list);
789 		kfree(s);
790 	}
791 }
792 
793 static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
794 				size_t mad_size, gfp_t gfp_mask)
795 {
796 	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
797 	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
798 	struct ib_rmpp_segment *seg = NULL;
799 	int left, seg_size, pad;
800 
801 	send_buf->seg_size = mad_size - send_buf->hdr_len;
802 	send_buf->seg_rmpp_size = mad_size - IB_MGMT_RMPP_HDR;
803 	seg_size = send_buf->seg_size;
804 	pad = send_wr->pad;
805 
806 	/* Allocate data segments. */
807 	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
808 		seg = kmalloc(sizeof(*seg) + seg_size, gfp_mask);
809 		if (!seg) {
810 			free_send_rmpp_list(send_wr);
811 			return -ENOMEM;
812 		}
813 		seg->num = ++send_buf->seg_count;
814 		list_add_tail(&seg->list, &send_wr->rmpp_list);
815 	}
816 
817 	/* Zero any padding */
818 	if (pad)
819 		memset(seg->data + seg_size - pad, 0, pad);
820 
821 	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
822 					  agent.rmpp_version;
823 	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
824 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
825 
826 	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
827 					struct ib_rmpp_segment, list);
828 	send_wr->last_ack_seg = send_wr->cur_seg;
829 	return 0;
830 }
831 
832 int ib_mad_kernel_rmpp_agent(const struct ib_mad_agent *agent)
833 {
834 	return agent->rmpp_version && !(agent->flags & IB_MAD_USER_RMPP);
835 }
836 EXPORT_SYMBOL(ib_mad_kernel_rmpp_agent);
837 
838 struct ib_mad_send_buf *ib_create_send_mad(struct ib_mad_agent *mad_agent,
839 					   u32 remote_qpn, u16 pkey_index,
840 					   int rmpp_active, int hdr_len,
841 					   int data_len, gfp_t gfp_mask,
842 					   u8 base_version)
843 {
844 	struct ib_mad_agent_private *mad_agent_priv;
845 	struct ib_mad_send_wr_private *mad_send_wr;
846 	int pad, message_size, ret, size;
847 	void *buf;
848 	size_t mad_size;
849 	bool opa;
850 
851 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
852 				      agent);
853 
854 	opa = rdma_cap_opa_mad(mad_agent->device, mad_agent->port_num);
855 
856 	if (opa && base_version == OPA_MGMT_BASE_VERSION)
857 		mad_size = sizeof(struct opa_mad);
858 	else
859 		mad_size = sizeof(struct ib_mad);
860 
861 	pad = get_pad_size(hdr_len, data_len, mad_size);
862 	message_size = hdr_len + data_len + pad;
863 
864 	if (ib_mad_kernel_rmpp_agent(mad_agent)) {
865 		if (!rmpp_active && message_size > mad_size)
866 			return ERR_PTR(-EINVAL);
867 	} else
868 		if (rmpp_active || message_size > mad_size)
869 			return ERR_PTR(-EINVAL);
870 
871 	size = rmpp_active ? hdr_len : mad_size;
872 	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
873 	if (!buf)
874 		return ERR_PTR(-ENOMEM);
875 
876 	mad_send_wr = buf + size;
877 	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
878 	mad_send_wr->send_buf.mad = buf;
879 	mad_send_wr->send_buf.hdr_len = hdr_len;
880 	mad_send_wr->send_buf.data_len = data_len;
881 	mad_send_wr->pad = pad;
882 
883 	mad_send_wr->mad_agent_priv = mad_agent_priv;
884 	mad_send_wr->sg_list[0].length = hdr_len;
885 	mad_send_wr->sg_list[0].lkey = mad_agent->qp->pd->local_dma_lkey;
886 
887 	/* OPA MADs don't have to be the full 2048 bytes */
888 	if (opa && base_version == OPA_MGMT_BASE_VERSION &&
889 	    data_len < mad_size - hdr_len)
890 		mad_send_wr->sg_list[1].length = data_len;
891 	else
892 		mad_send_wr->sg_list[1].length = mad_size - hdr_len;
893 
894 	mad_send_wr->sg_list[1].lkey = mad_agent->qp->pd->local_dma_lkey;
895 
896 	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
897 
898 	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
899 	mad_send_wr->send_wr.wr.sg_list = mad_send_wr->sg_list;
900 	mad_send_wr->send_wr.wr.num_sge = 2;
901 	mad_send_wr->send_wr.wr.opcode = IB_WR_SEND;
902 	mad_send_wr->send_wr.wr.send_flags = IB_SEND_SIGNALED;
903 	mad_send_wr->send_wr.remote_qpn = remote_qpn;
904 	mad_send_wr->send_wr.remote_qkey = IB_QP_SET_QKEY;
905 	mad_send_wr->send_wr.pkey_index = pkey_index;
906 
907 	if (rmpp_active) {
908 		ret = alloc_send_rmpp_list(mad_send_wr, mad_size, gfp_mask);
909 		if (ret) {
910 			kfree(buf);
911 			return ERR_PTR(ret);
912 		}
913 	}
914 
915 	mad_send_wr->send_buf.mad_agent = mad_agent;
916 	refcount_inc(&mad_agent_priv->refcount);
917 	return &mad_send_wr->send_buf;
918 }
919 EXPORT_SYMBOL(ib_create_send_mad);
920 
921 int ib_get_mad_data_offset(u8 mgmt_class)
922 {
923 	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
924 		return IB_MGMT_SA_HDR;
925 	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
926 		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
927 		 (mgmt_class == IB_MGMT_CLASS_BIS))
928 		return IB_MGMT_DEVICE_HDR;
929 	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
930 		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
931 		return IB_MGMT_VENDOR_HDR;
932 	else
933 		return IB_MGMT_MAD_HDR;
934 }
935 EXPORT_SYMBOL(ib_get_mad_data_offset);
936 
937 int ib_is_mad_class_rmpp(u8 mgmt_class)
938 {
939 	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
940 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
941 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
942 	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
943 	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
944 	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
945 		return 1;
946 	return 0;
947 }
948 EXPORT_SYMBOL(ib_is_mad_class_rmpp);
949 
950 void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
951 {
952 	struct ib_mad_send_wr_private *mad_send_wr;
953 	struct list_head *list;
954 
955 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
956 				   send_buf);
957 	list = &mad_send_wr->cur_seg->list;
958 
959 	if (mad_send_wr->cur_seg->num < seg_num) {
960 		list_for_each_entry(mad_send_wr->cur_seg, list, list)
961 			if (mad_send_wr->cur_seg->num == seg_num)
962 				break;
963 	} else if (mad_send_wr->cur_seg->num > seg_num) {
964 		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
965 			if (mad_send_wr->cur_seg->num == seg_num)
966 				break;
967 	}
968 	return mad_send_wr->cur_seg->data;
969 }
970 EXPORT_SYMBOL(ib_get_rmpp_segment);
971 
972 static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
973 {
974 	if (mad_send_wr->send_buf.seg_count)
975 		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
976 					   mad_send_wr->seg_num);
977 	else
978 		return mad_send_wr->send_buf.mad +
979 		       mad_send_wr->send_buf.hdr_len;
980 }
981 
982 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
983 {
984 	struct ib_mad_agent_private *mad_agent_priv;
985 	struct ib_mad_send_wr_private *mad_send_wr;
986 
987 	mad_agent_priv = container_of(send_buf->mad_agent,
988 				      struct ib_mad_agent_private, agent);
989 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
990 				   send_buf);
991 
992 	free_send_rmpp_list(mad_send_wr);
993 	kfree(send_buf->mad);
994 	deref_mad_agent(mad_agent_priv);
995 }
996 EXPORT_SYMBOL(ib_free_send_mad);
997 
998 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
999 {
1000 	struct ib_mad_qp_info *qp_info;
1001 	struct list_head *list;
1002 	struct ib_mad_agent *mad_agent;
1003 	struct ib_sge *sge;
1004 	unsigned long flags;
1005 	int ret;
1006 
1007 	/* Set WR ID to find mad_send_wr upon completion */
1008 	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1009 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1010 	mad_send_wr->mad_list.cqe.done = ib_mad_send_done;
1011 	mad_send_wr->send_wr.wr.wr_cqe = &mad_send_wr->mad_list.cqe;
1012 
1013 	mad_agent = mad_send_wr->send_buf.mad_agent;
1014 	sge = mad_send_wr->sg_list;
1015 	sge[0].addr = ib_dma_map_single(mad_agent->device,
1016 					mad_send_wr->send_buf.mad,
1017 					sge[0].length,
1018 					DMA_TO_DEVICE);
1019 	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[0].addr)))
1020 		return -ENOMEM;
1021 
1022 	mad_send_wr->header_mapping = sge[0].addr;
1023 
1024 	sge[1].addr = ib_dma_map_single(mad_agent->device,
1025 					ib_get_payload(mad_send_wr),
1026 					sge[1].length,
1027 					DMA_TO_DEVICE);
1028 	if (unlikely(ib_dma_mapping_error(mad_agent->device, sge[1].addr))) {
1029 		ib_dma_unmap_single(mad_agent->device,
1030 				    mad_send_wr->header_mapping,
1031 				    sge[0].length, DMA_TO_DEVICE);
1032 		return -ENOMEM;
1033 	}
1034 	mad_send_wr->payload_mapping = sge[1].addr;
1035 
1036 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1037 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1038 		trace_ib_mad_ib_send_mad(mad_send_wr, qp_info);
1039 		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr.wr,
1040 				   NULL);
1041 		list = &qp_info->send_queue.list;
1042 	} else {
1043 		ret = 0;
1044 		list = &qp_info->overflow_list;
1045 	}
1046 
1047 	if (!ret) {
1048 		qp_info->send_queue.count++;
1049 		list_add_tail(&mad_send_wr->mad_list.list, list);
1050 	}
1051 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1052 	if (ret) {
1053 		ib_dma_unmap_single(mad_agent->device,
1054 				    mad_send_wr->header_mapping,
1055 				    sge[0].length, DMA_TO_DEVICE);
1056 		ib_dma_unmap_single(mad_agent->device,
1057 				    mad_send_wr->payload_mapping,
1058 				    sge[1].length, DMA_TO_DEVICE);
1059 	}
1060 	return ret;
1061 }
1062 
1063 /*
1064  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1065  *  with the registered client
1066  */
1067 int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1068 		     struct ib_mad_send_buf **bad_send_buf)
1069 {
1070 	struct ib_mad_agent_private *mad_agent_priv;
1071 	struct ib_mad_send_buf *next_send_buf;
1072 	struct ib_mad_send_wr_private *mad_send_wr;
1073 	unsigned long flags;
1074 	int ret = -EINVAL;
1075 
1076 	/* Walk list of send WRs and post each on send list */
1077 	for (; send_buf; send_buf = next_send_buf) {
1078 		mad_send_wr = container_of(send_buf,
1079 					   struct ib_mad_send_wr_private,
1080 					   send_buf);
1081 		mad_agent_priv = mad_send_wr->mad_agent_priv;
1082 
1083 		ret = ib_mad_enforce_security(mad_agent_priv,
1084 					      mad_send_wr->send_wr.pkey_index);
1085 		if (ret)
1086 			goto error;
1087 
1088 		if (!send_buf->mad_agent->send_handler ||
1089 		    (send_buf->timeout_ms &&
1090 		     !send_buf->mad_agent->recv_handler)) {
1091 			ret = -EINVAL;
1092 			goto error;
1093 		}
1094 
1095 		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1096 			if (mad_agent_priv->agent.rmpp_version) {
1097 				ret = -EINVAL;
1098 				goto error;
1099 			}
1100 		}
1101 
1102 		/*
1103 		 * Save pointer to next work request to post in case the
1104 		 * current one completes, and the user modifies the work
1105 		 * request associated with the completion
1106 		 */
1107 		next_send_buf = send_buf->next;
1108 		mad_send_wr->send_wr.ah = send_buf->ah;
1109 
1110 		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1111 		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1112 			ret = handle_outgoing_dr_smp(mad_agent_priv,
1113 						     mad_send_wr);
1114 			if (ret < 0)		/* error */
1115 				goto error;
1116 			else if (ret == 1)	/* locally consumed */
1117 				continue;
1118 		}
1119 
1120 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1121 		/* Timeout will be updated after send completes */
1122 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1123 		mad_send_wr->max_retries = send_buf->retries;
1124 		mad_send_wr->retries_left = send_buf->retries;
1125 		send_buf->retries = 0;
1126 		/* Reference for work request to QP + response */
1127 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1128 		mad_send_wr->status = IB_WC_SUCCESS;
1129 
1130 		/* Reference MAD agent until send completes */
1131 		refcount_inc(&mad_agent_priv->refcount);
1132 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1133 		list_add_tail(&mad_send_wr->agent_list,
1134 			      &mad_agent_priv->send_list);
1135 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1136 
1137 		if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1138 			ret = ib_send_rmpp_mad(mad_send_wr);
1139 			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1140 				ret = ib_send_mad(mad_send_wr);
1141 		} else
1142 			ret = ib_send_mad(mad_send_wr);
1143 		if (ret < 0) {
1144 			/* Fail send request */
1145 			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1146 			list_del(&mad_send_wr->agent_list);
1147 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1148 			deref_mad_agent(mad_agent_priv);
1149 			goto error;
1150 		}
1151 	}
1152 	return 0;
1153 error:
1154 	if (bad_send_buf)
1155 		*bad_send_buf = send_buf;
1156 	return ret;
1157 }
1158 EXPORT_SYMBOL(ib_post_send_mad);
1159 
1160 /*
1161  * ib_free_recv_mad - Returns data buffers used to receive
1162  *  a MAD to the access layer
1163  */
1164 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1165 {
1166 	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1167 	struct ib_mad_private_header *mad_priv_hdr;
1168 	struct ib_mad_private *priv;
1169 	struct list_head free_list;
1170 
1171 	INIT_LIST_HEAD(&free_list);
1172 	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1173 
1174 	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1175 					&free_list, list) {
1176 		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1177 					   recv_buf);
1178 		mad_priv_hdr = container_of(mad_recv_wc,
1179 					    struct ib_mad_private_header,
1180 					    recv_wc);
1181 		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1182 				    header);
1183 		kfree(priv);
1184 	}
1185 }
1186 EXPORT_SYMBOL(ib_free_recv_mad);
1187 
1188 static int method_in_use(struct ib_mad_mgmt_method_table **method,
1189 			 struct ib_mad_reg_req *mad_reg_req)
1190 {
1191 	int i;
1192 
1193 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1194 		if ((*method)->agent[i]) {
1195 			pr_err("Method %d already in use\n", i);
1196 			return -EINVAL;
1197 		}
1198 	}
1199 	return 0;
1200 }
1201 
1202 static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1203 {
1204 	/* Allocate management method table */
1205 	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1206 	return (*method) ? 0 : (-ENOMEM);
1207 }
1208 
1209 /*
1210  * Check to see if there are any methods still in use
1211  */
1212 static int check_method_table(struct ib_mad_mgmt_method_table *method)
1213 {
1214 	int i;
1215 
1216 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1217 		if (method->agent[i])
1218 			return 1;
1219 	return 0;
1220 }
1221 
1222 /*
1223  * Check to see if there are any method tables for this class still in use
1224  */
1225 static int check_class_table(struct ib_mad_mgmt_class_table *class)
1226 {
1227 	int i;
1228 
1229 	for (i = 0; i < MAX_MGMT_CLASS; i++)
1230 		if (class->method_table[i])
1231 			return 1;
1232 	return 0;
1233 }
1234 
1235 static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1236 {
1237 	int i;
1238 
1239 	for (i = 0; i < MAX_MGMT_OUI; i++)
1240 		if (vendor_class->method_table[i])
1241 			return 1;
1242 	return 0;
1243 }
1244 
1245 static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1246 			   const char *oui)
1247 {
1248 	int i;
1249 
1250 	for (i = 0; i < MAX_MGMT_OUI; i++)
1251 		/* Is there matching OUI for this vendor class ? */
1252 		if (!memcmp(vendor_class->oui[i], oui, 3))
1253 			return i;
1254 
1255 	return -1;
1256 }
1257 
1258 static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1259 {
1260 	int i;
1261 
1262 	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1263 		if (vendor->vendor_class[i])
1264 			return 1;
1265 
1266 	return 0;
1267 }
1268 
1269 static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1270 				     struct ib_mad_agent_private *agent)
1271 {
1272 	int i;
1273 
1274 	/* Remove any methods for this mad agent */
1275 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1276 		if (method->agent[i] == agent)
1277 			method->agent[i] = NULL;
1278 }
1279 
1280 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1281 			      struct ib_mad_agent_private *agent_priv,
1282 			      u8 mgmt_class)
1283 {
1284 	struct ib_mad_port_private *port_priv;
1285 	struct ib_mad_mgmt_class_table **class;
1286 	struct ib_mad_mgmt_method_table **method;
1287 	int i, ret;
1288 
1289 	port_priv = agent_priv->qp_info->port_priv;
1290 	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1291 	if (!*class) {
1292 		/* Allocate management class table for "new" class version */
1293 		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1294 		if (!*class) {
1295 			ret = -ENOMEM;
1296 			goto error1;
1297 		}
1298 
1299 		/* Allocate method table for this management class */
1300 		method = &(*class)->method_table[mgmt_class];
1301 		if ((ret = allocate_method_table(method)))
1302 			goto error2;
1303 	} else {
1304 		method = &(*class)->method_table[mgmt_class];
1305 		if (!*method) {
1306 			/* Allocate method table for this management class */
1307 			if ((ret = allocate_method_table(method)))
1308 				goto error1;
1309 		}
1310 	}
1311 
1312 	/* Now, make sure methods are not already in use */
1313 	if (method_in_use(method, mad_reg_req))
1314 		goto error3;
1315 
1316 	/* Finally, add in methods being registered */
1317 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1318 		(*method)->agent[i] = agent_priv;
1319 
1320 	return 0;
1321 
1322 error3:
1323 	/* Remove any methods for this mad agent */
1324 	remove_methods_mad_agent(*method, agent_priv);
1325 	/* Now, check to see if there are any methods in use */
1326 	if (!check_method_table(*method)) {
1327 		/* If not, release management method table */
1328 		kfree(*method);
1329 		*method = NULL;
1330 	}
1331 	ret = -EINVAL;
1332 	goto error1;
1333 error2:
1334 	kfree(*class);
1335 	*class = NULL;
1336 error1:
1337 	return ret;
1338 }
1339 
1340 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1341 			   struct ib_mad_agent_private *agent_priv)
1342 {
1343 	struct ib_mad_port_private *port_priv;
1344 	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1345 	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1346 	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1347 	struct ib_mad_mgmt_method_table **method;
1348 	int i, ret = -ENOMEM;
1349 	u8 vclass;
1350 
1351 	/* "New" vendor (with OUI) class */
1352 	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1353 	port_priv = agent_priv->qp_info->port_priv;
1354 	vendor_table = &port_priv->version[
1355 				mad_reg_req->mgmt_class_version].vendor;
1356 	if (!*vendor_table) {
1357 		/* Allocate mgmt vendor class table for "new" class version */
1358 		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1359 		if (!vendor)
1360 			goto error1;
1361 
1362 		*vendor_table = vendor;
1363 	}
1364 	if (!(*vendor_table)->vendor_class[vclass]) {
1365 		/* Allocate table for this management vendor class */
1366 		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1367 		if (!vendor_class)
1368 			goto error2;
1369 
1370 		(*vendor_table)->vendor_class[vclass] = vendor_class;
1371 	}
1372 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1373 		/* Is there matching OUI for this vendor class ? */
1374 		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1375 			    mad_reg_req->oui, 3)) {
1376 			method = &(*vendor_table)->vendor_class[
1377 						vclass]->method_table[i];
1378 			if (!*method)
1379 				goto error3;
1380 			goto check_in_use;
1381 		}
1382 	}
1383 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1384 		/* OUI slot available ? */
1385 		if (!is_vendor_oui((*vendor_table)->vendor_class[
1386 				vclass]->oui[i])) {
1387 			method = &(*vendor_table)->vendor_class[
1388 				vclass]->method_table[i];
1389 			/* Allocate method table for this OUI */
1390 			if (!*method) {
1391 				ret = allocate_method_table(method);
1392 				if (ret)
1393 					goto error3;
1394 			}
1395 			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1396 			       mad_reg_req->oui, 3);
1397 			goto check_in_use;
1398 		}
1399 	}
1400 	dev_err(&agent_priv->agent.device->dev, "All OUI slots in use\n");
1401 	goto error3;
1402 
1403 check_in_use:
1404 	/* Now, make sure methods are not already in use */
1405 	if (method_in_use(method, mad_reg_req))
1406 		goto error4;
1407 
1408 	/* Finally, add in methods being registered */
1409 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1410 		(*method)->agent[i] = agent_priv;
1411 
1412 	return 0;
1413 
1414 error4:
1415 	/* Remove any methods for this mad agent */
1416 	remove_methods_mad_agent(*method, agent_priv);
1417 	/* Now, check to see if there are any methods in use */
1418 	if (!check_method_table(*method)) {
1419 		/* If not, release management method table */
1420 		kfree(*method);
1421 		*method = NULL;
1422 	}
1423 	ret = -EINVAL;
1424 error3:
1425 	if (vendor_class) {
1426 		(*vendor_table)->vendor_class[vclass] = NULL;
1427 		kfree(vendor_class);
1428 	}
1429 error2:
1430 	if (vendor) {
1431 		*vendor_table = NULL;
1432 		kfree(vendor);
1433 	}
1434 error1:
1435 	return ret;
1436 }
1437 
1438 static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1439 {
1440 	struct ib_mad_port_private *port_priv;
1441 	struct ib_mad_mgmt_class_table *class;
1442 	struct ib_mad_mgmt_method_table *method;
1443 	struct ib_mad_mgmt_vendor_class_table *vendor;
1444 	struct ib_mad_mgmt_vendor_class *vendor_class;
1445 	int index;
1446 	u8 mgmt_class;
1447 
1448 	/*
1449 	 * Was MAD registration request supplied
1450 	 * with original registration ?
1451 	 */
1452 	if (!agent_priv->reg_req)
1453 		goto out;
1454 
1455 	port_priv = agent_priv->qp_info->port_priv;
1456 	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1457 	class = port_priv->version[
1458 			agent_priv->reg_req->mgmt_class_version].class;
1459 	if (!class)
1460 		goto vendor_check;
1461 
1462 	method = class->method_table[mgmt_class];
1463 	if (method) {
1464 		/* Remove any methods for this mad agent */
1465 		remove_methods_mad_agent(method, agent_priv);
1466 		/* Now, check to see if there are any methods still in use */
1467 		if (!check_method_table(method)) {
1468 			/* If not, release management method table */
1469 			kfree(method);
1470 			class->method_table[mgmt_class] = NULL;
1471 			/* Any management classes left ? */
1472 			if (!check_class_table(class)) {
1473 				/* If not, release management class table */
1474 				kfree(class);
1475 				port_priv->version[
1476 					agent_priv->reg_req->
1477 					mgmt_class_version].class = NULL;
1478 			}
1479 		}
1480 	}
1481 
1482 vendor_check:
1483 	if (!is_vendor_class(mgmt_class))
1484 		goto out;
1485 
1486 	/* normalize mgmt_class to vendor range 2 */
1487 	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1488 	vendor = port_priv->version[
1489 			agent_priv->reg_req->mgmt_class_version].vendor;
1490 
1491 	if (!vendor)
1492 		goto out;
1493 
1494 	vendor_class = vendor->vendor_class[mgmt_class];
1495 	if (vendor_class) {
1496 		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1497 		if (index < 0)
1498 			goto out;
1499 		method = vendor_class->method_table[index];
1500 		if (method) {
1501 			/* Remove any methods for this mad agent */
1502 			remove_methods_mad_agent(method, agent_priv);
1503 			/*
1504 			 * Now, check to see if there are
1505 			 * any methods still in use
1506 			 */
1507 			if (!check_method_table(method)) {
1508 				/* If not, release management method table */
1509 				kfree(method);
1510 				vendor_class->method_table[index] = NULL;
1511 				memset(vendor_class->oui[index], 0, 3);
1512 				/* Any OUIs left ? */
1513 				if (!check_vendor_class(vendor_class)) {
1514 					/* If not, release vendor class table */
1515 					kfree(vendor_class);
1516 					vendor->vendor_class[mgmt_class] = NULL;
1517 					/* Any other vendor classes left ? */
1518 					if (!check_vendor_table(vendor)) {
1519 						kfree(vendor);
1520 						port_priv->version[
1521 							agent_priv->reg_req->
1522 							mgmt_class_version].
1523 							vendor = NULL;
1524 					}
1525 				}
1526 			}
1527 		}
1528 	}
1529 
1530 out:
1531 	return;
1532 }
1533 
1534 static struct ib_mad_agent_private *
1535 find_mad_agent(struct ib_mad_port_private *port_priv,
1536 	       const struct ib_mad_hdr *mad_hdr)
1537 {
1538 	struct ib_mad_agent_private *mad_agent = NULL;
1539 	unsigned long flags;
1540 
1541 	if (ib_response_mad(mad_hdr)) {
1542 		u32 hi_tid;
1543 
1544 		/*
1545 		 * Routing is based on high 32 bits of transaction ID
1546 		 * of MAD.
1547 		 */
1548 		hi_tid = be64_to_cpu(mad_hdr->tid) >> 32;
1549 		rcu_read_lock();
1550 		mad_agent = xa_load(&ib_mad_clients, hi_tid);
1551 		if (mad_agent && !refcount_inc_not_zero(&mad_agent->refcount))
1552 			mad_agent = NULL;
1553 		rcu_read_unlock();
1554 	} else {
1555 		struct ib_mad_mgmt_class_table *class;
1556 		struct ib_mad_mgmt_method_table *method;
1557 		struct ib_mad_mgmt_vendor_class_table *vendor;
1558 		struct ib_mad_mgmt_vendor_class *vendor_class;
1559 		const struct ib_vendor_mad *vendor_mad;
1560 		int index;
1561 
1562 		spin_lock_irqsave(&port_priv->reg_lock, flags);
1563 		/*
1564 		 * Routing is based on version, class, and method
1565 		 * For "newer" vendor MADs, also based on OUI
1566 		 */
1567 		if (mad_hdr->class_version >= MAX_MGMT_VERSION)
1568 			goto out;
1569 		if (!is_vendor_class(mad_hdr->mgmt_class)) {
1570 			class = port_priv->version[
1571 					mad_hdr->class_version].class;
1572 			if (!class)
1573 				goto out;
1574 			if (convert_mgmt_class(mad_hdr->mgmt_class) >=
1575 			    ARRAY_SIZE(class->method_table))
1576 				goto out;
1577 			method = class->method_table[convert_mgmt_class(
1578 							mad_hdr->mgmt_class)];
1579 			if (method)
1580 				mad_agent = method->agent[mad_hdr->method &
1581 							  ~IB_MGMT_METHOD_RESP];
1582 		} else {
1583 			vendor = port_priv->version[
1584 					mad_hdr->class_version].vendor;
1585 			if (!vendor)
1586 				goto out;
1587 			vendor_class = vendor->vendor_class[vendor_class_index(
1588 						mad_hdr->mgmt_class)];
1589 			if (!vendor_class)
1590 				goto out;
1591 			/* Find matching OUI */
1592 			vendor_mad = (const struct ib_vendor_mad *)mad_hdr;
1593 			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1594 			if (index == -1)
1595 				goto out;
1596 			method = vendor_class->method_table[index];
1597 			if (method) {
1598 				mad_agent = method->agent[mad_hdr->method &
1599 							  ~IB_MGMT_METHOD_RESP];
1600 			}
1601 		}
1602 		if (mad_agent)
1603 			refcount_inc(&mad_agent->refcount);
1604 out:
1605 		spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1606 	}
1607 
1608 	if (mad_agent && !mad_agent->agent.recv_handler) {
1609 		dev_notice(&port_priv->device->dev,
1610 			   "No receive handler for client %p on port %u\n",
1611 			   &mad_agent->agent, port_priv->port_num);
1612 		deref_mad_agent(mad_agent);
1613 		mad_agent = NULL;
1614 	}
1615 
1616 	return mad_agent;
1617 }
1618 
1619 static int validate_mad(const struct ib_mad_hdr *mad_hdr,
1620 			const struct ib_mad_qp_info *qp_info,
1621 			bool opa)
1622 {
1623 	int valid = 0;
1624 	u32 qp_num = qp_info->qp->qp_num;
1625 
1626 	/* Make sure MAD base version is understood */
1627 	if (mad_hdr->base_version != IB_MGMT_BASE_VERSION &&
1628 	    (!opa || mad_hdr->base_version != OPA_MGMT_BASE_VERSION)) {
1629 		pr_err("MAD received with unsupported base version %u %s\n",
1630 		       mad_hdr->base_version, opa ? "(opa)" : "");
1631 		goto out;
1632 	}
1633 
1634 	/* Filter SMI packets sent to other than QP0 */
1635 	if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1636 	    (mad_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1637 		if (qp_num == 0)
1638 			valid = 1;
1639 	} else {
1640 		/* CM attributes other than ClassPortInfo only use Send method */
1641 		if ((mad_hdr->mgmt_class == IB_MGMT_CLASS_CM) &&
1642 		    (mad_hdr->attr_id != IB_MGMT_CLASSPORTINFO_ATTR_ID) &&
1643 		    (mad_hdr->method != IB_MGMT_METHOD_SEND))
1644 			goto out;
1645 		/* Filter GSI packets sent to QP0 */
1646 		if (qp_num != 0)
1647 			valid = 1;
1648 	}
1649 
1650 out:
1651 	return valid;
1652 }
1653 
1654 static int is_rmpp_data_mad(const struct ib_mad_agent_private *mad_agent_priv,
1655 			    const struct ib_mad_hdr *mad_hdr)
1656 {
1657 	struct ib_rmpp_mad *rmpp_mad;
1658 
1659 	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1660 	return !mad_agent_priv->agent.rmpp_version ||
1661 		!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent) ||
1662 		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1663 				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1664 		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1665 }
1666 
1667 static inline int rcv_has_same_class(const struct ib_mad_send_wr_private *wr,
1668 				     const struct ib_mad_recv_wc *rwc)
1669 {
1670 	return ((struct ib_mad_hdr *)(wr->send_buf.mad))->mgmt_class ==
1671 		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1672 }
1673 
1674 static inline int
1675 rcv_has_same_gid(const struct ib_mad_agent_private *mad_agent_priv,
1676 		 const struct ib_mad_send_wr_private *wr,
1677 		 const struct ib_mad_recv_wc *rwc)
1678 {
1679 	struct rdma_ah_attr attr;
1680 	u8 send_resp, rcv_resp;
1681 	union ib_gid sgid;
1682 	struct ib_device *device = mad_agent_priv->agent.device;
1683 	u32 port_num = mad_agent_priv->agent.port_num;
1684 	u8 lmc;
1685 	bool has_grh;
1686 
1687 	send_resp = ib_response_mad((struct ib_mad_hdr *)wr->send_buf.mad);
1688 	rcv_resp = ib_response_mad(&rwc->recv_buf.mad->mad_hdr);
1689 
1690 	if (send_resp == rcv_resp)
1691 		/* both requests, or both responses. GIDs different */
1692 		return 0;
1693 
1694 	if (rdma_query_ah(wr->send_buf.ah, &attr))
1695 		/* Assume not equal, to avoid false positives. */
1696 		return 0;
1697 
1698 	has_grh = !!(rdma_ah_get_ah_flags(&attr) & IB_AH_GRH);
1699 	if (has_grh != !!(rwc->wc->wc_flags & IB_WC_GRH))
1700 		/* one has GID, other does not.  Assume different */
1701 		return 0;
1702 
1703 	if (!send_resp && rcv_resp) {
1704 		/* is request/response. */
1705 		if (!has_grh) {
1706 			if (ib_get_cached_lmc(device, port_num, &lmc))
1707 				return 0;
1708 			return (!lmc || !((rdma_ah_get_path_bits(&attr) ^
1709 					   rwc->wc->dlid_path_bits) &
1710 					  ((1 << lmc) - 1)));
1711 		} else {
1712 			const struct ib_global_route *grh =
1713 					rdma_ah_read_grh(&attr);
1714 
1715 			if (rdma_query_gid(device, port_num,
1716 					   grh->sgid_index, &sgid))
1717 				return 0;
1718 			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1719 				       16);
1720 		}
1721 	}
1722 
1723 	if (!has_grh)
1724 		return rdma_ah_get_dlid(&attr) == rwc->wc->slid;
1725 	else
1726 		return !memcmp(rdma_ah_read_grh(&attr)->dgid.raw,
1727 			       rwc->recv_buf.grh->sgid.raw,
1728 			       16);
1729 }
1730 
1731 static inline int is_direct(u8 class)
1732 {
1733 	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1734 }
1735 
1736 struct ib_mad_send_wr_private*
1737 ib_find_send_mad(const struct ib_mad_agent_private *mad_agent_priv,
1738 		 const struct ib_mad_recv_wc *wc)
1739 {
1740 	struct ib_mad_send_wr_private *wr;
1741 	const struct ib_mad_hdr *mad_hdr;
1742 
1743 	mad_hdr = &wc->recv_buf.mad->mad_hdr;
1744 
1745 	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1746 		if ((wr->tid == mad_hdr->tid) &&
1747 		    rcv_has_same_class(wr, wc) &&
1748 		    /*
1749 		     * Don't check GID for direct routed MADs.
1750 		     * These might have permissive LIDs.
1751 		     */
1752 		    (is_direct(mad_hdr->mgmt_class) ||
1753 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1754 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1755 	}
1756 
1757 	/*
1758 	 * It's possible to receive the response before we've
1759 	 * been notified that the send has completed
1760 	 */
1761 	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1762 		if (is_rmpp_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1763 		    wr->tid == mad_hdr->tid &&
1764 		    wr->timeout &&
1765 		    rcv_has_same_class(wr, wc) &&
1766 		    /*
1767 		     * Don't check GID for direct routed MADs.
1768 		     * These might have permissive LIDs.
1769 		     */
1770 		    (is_direct(mad_hdr->mgmt_class) ||
1771 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1772 			/* Verify request has not been canceled */
1773 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1774 	}
1775 	return NULL;
1776 }
1777 
1778 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1779 {
1780 	mad_send_wr->timeout = 0;
1781 	if (mad_send_wr->refcount == 1)
1782 		list_move_tail(&mad_send_wr->agent_list,
1783 			      &mad_send_wr->mad_agent_priv->done_list);
1784 }
1785 
1786 static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1787 				 struct ib_mad_recv_wc *mad_recv_wc)
1788 {
1789 	struct ib_mad_send_wr_private *mad_send_wr;
1790 	struct ib_mad_send_wc mad_send_wc;
1791 	unsigned long flags;
1792 	int ret;
1793 
1794 	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1795 	ret = ib_mad_enforce_security(mad_agent_priv,
1796 				      mad_recv_wc->wc->pkey_index);
1797 	if (ret) {
1798 		ib_free_recv_mad(mad_recv_wc);
1799 		deref_mad_agent(mad_agent_priv);
1800 		return;
1801 	}
1802 
1803 	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1804 	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
1805 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1806 						      mad_recv_wc);
1807 		if (!mad_recv_wc) {
1808 			deref_mad_agent(mad_agent_priv);
1809 			return;
1810 		}
1811 	}
1812 
1813 	/* Complete corresponding request */
1814 	if (ib_response_mad(&mad_recv_wc->recv_buf.mad->mad_hdr)) {
1815 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1816 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1817 		if (!mad_send_wr) {
1818 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1819 			if (!ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)
1820 			   && ib_is_mad_class_rmpp(mad_recv_wc->recv_buf.mad->mad_hdr.mgmt_class)
1821 			   && (ib_get_rmpp_flags(&((struct ib_rmpp_mad *)mad_recv_wc->recv_buf.mad)->rmpp_hdr)
1822 					& IB_MGMT_RMPP_FLAG_ACTIVE)) {
1823 				/* user rmpp is in effect
1824 				 * and this is an active RMPP MAD
1825 				 */
1826 				mad_agent_priv->agent.recv_handler(
1827 						&mad_agent_priv->agent, NULL,
1828 						mad_recv_wc);
1829 				deref_mad_agent(mad_agent_priv);
1830 			} else {
1831 				/* not user rmpp, revert to normal behavior and
1832 				 * drop the mad
1833 				 */
1834 				ib_free_recv_mad(mad_recv_wc);
1835 				deref_mad_agent(mad_agent_priv);
1836 				return;
1837 			}
1838 		} else {
1839 			ib_mark_mad_done(mad_send_wr);
1840 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1841 
1842 			/* Defined behavior is to complete response before request */
1843 			mad_agent_priv->agent.recv_handler(
1844 					&mad_agent_priv->agent,
1845 					&mad_send_wr->send_buf,
1846 					mad_recv_wc);
1847 			deref_mad_agent(mad_agent_priv);
1848 
1849 			mad_send_wc.status = IB_WC_SUCCESS;
1850 			mad_send_wc.vendor_err = 0;
1851 			mad_send_wc.send_buf = &mad_send_wr->send_buf;
1852 			ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1853 		}
1854 	} else {
1855 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent, NULL,
1856 						   mad_recv_wc);
1857 		deref_mad_agent(mad_agent_priv);
1858 	}
1859 }
1860 
1861 static enum smi_action handle_ib_smi(const struct ib_mad_port_private *port_priv,
1862 				     const struct ib_mad_qp_info *qp_info,
1863 				     const struct ib_wc *wc,
1864 				     u32 port_num,
1865 				     struct ib_mad_private *recv,
1866 				     struct ib_mad_private *response)
1867 {
1868 	enum smi_forward_action retsmi;
1869 	struct ib_smp *smp = (struct ib_smp *)recv->mad;
1870 
1871 	trace_ib_mad_handle_ib_smi(smp);
1872 
1873 	if (smi_handle_dr_smp_recv(smp,
1874 				   rdma_cap_ib_switch(port_priv->device),
1875 				   port_num,
1876 				   port_priv->device->phys_port_cnt) ==
1877 				   IB_SMI_DISCARD)
1878 		return IB_SMI_DISCARD;
1879 
1880 	retsmi = smi_check_forward_dr_smp(smp);
1881 	if (retsmi == IB_SMI_LOCAL)
1882 		return IB_SMI_HANDLE;
1883 
1884 	if (retsmi == IB_SMI_SEND) { /* don't forward */
1885 		if (smi_handle_dr_smp_send(smp,
1886 					   rdma_cap_ib_switch(port_priv->device),
1887 					   port_num) == IB_SMI_DISCARD)
1888 			return IB_SMI_DISCARD;
1889 
1890 		if (smi_check_local_smp(smp, port_priv->device) == IB_SMI_DISCARD)
1891 			return IB_SMI_DISCARD;
1892 	} else if (rdma_cap_ib_switch(port_priv->device)) {
1893 		/* forward case for switches */
1894 		memcpy(response, recv, mad_priv_size(response));
1895 		response->header.recv_wc.wc = &response->header.wc;
1896 		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1897 		response->header.recv_wc.recv_buf.grh = &response->grh;
1898 
1899 		agent_send_response((const struct ib_mad_hdr *)response->mad,
1900 				    &response->grh, wc,
1901 				    port_priv->device,
1902 				    smi_get_fwd_port(smp),
1903 				    qp_info->qp->qp_num,
1904 				    response->mad_size,
1905 				    false);
1906 
1907 		return IB_SMI_DISCARD;
1908 	}
1909 	return IB_SMI_HANDLE;
1910 }
1911 
1912 static bool generate_unmatched_resp(const struct ib_mad_private *recv,
1913 				    struct ib_mad_private *response,
1914 				    size_t *resp_len, bool opa)
1915 {
1916 	const struct ib_mad_hdr *recv_hdr = (const struct ib_mad_hdr *)recv->mad;
1917 	struct ib_mad_hdr *resp_hdr = (struct ib_mad_hdr *)response->mad;
1918 
1919 	if (recv_hdr->method == IB_MGMT_METHOD_GET ||
1920 	    recv_hdr->method == IB_MGMT_METHOD_SET) {
1921 		memcpy(response, recv, mad_priv_size(response));
1922 		response->header.recv_wc.wc = &response->header.wc;
1923 		response->header.recv_wc.recv_buf.mad = (struct ib_mad *)response->mad;
1924 		response->header.recv_wc.recv_buf.grh = &response->grh;
1925 		resp_hdr->method = IB_MGMT_METHOD_GET_RESP;
1926 		resp_hdr->status = cpu_to_be16(IB_MGMT_MAD_STATUS_UNSUPPORTED_METHOD_ATTRIB);
1927 		if (recv_hdr->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1928 			resp_hdr->status |= IB_SMP_DIRECTION;
1929 
1930 		if (opa && recv_hdr->base_version == OPA_MGMT_BASE_VERSION) {
1931 			if (recv_hdr->mgmt_class ==
1932 			    IB_MGMT_CLASS_SUBN_LID_ROUTED ||
1933 			    recv_hdr->mgmt_class ==
1934 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
1935 				*resp_len = opa_get_smp_header_size(
1936 							(struct opa_smp *)recv->mad);
1937 			else
1938 				*resp_len = sizeof(struct ib_mad_hdr);
1939 		}
1940 
1941 		return true;
1942 	} else {
1943 		return false;
1944 	}
1945 }
1946 
1947 static enum smi_action
1948 handle_opa_smi(struct ib_mad_port_private *port_priv,
1949 	       struct ib_mad_qp_info *qp_info,
1950 	       struct ib_wc *wc,
1951 	       u32 port_num,
1952 	       struct ib_mad_private *recv,
1953 	       struct ib_mad_private *response)
1954 {
1955 	enum smi_forward_action retsmi;
1956 	struct opa_smp *smp = (struct opa_smp *)recv->mad;
1957 
1958 	trace_ib_mad_handle_opa_smi(smp);
1959 
1960 	if (opa_smi_handle_dr_smp_recv(smp,
1961 				   rdma_cap_ib_switch(port_priv->device),
1962 				   port_num,
1963 				   port_priv->device->phys_port_cnt) ==
1964 				   IB_SMI_DISCARD)
1965 		return IB_SMI_DISCARD;
1966 
1967 	retsmi = opa_smi_check_forward_dr_smp(smp);
1968 	if (retsmi == IB_SMI_LOCAL)
1969 		return IB_SMI_HANDLE;
1970 
1971 	if (retsmi == IB_SMI_SEND) { /* don't forward */
1972 		if (opa_smi_handle_dr_smp_send(smp,
1973 					   rdma_cap_ib_switch(port_priv->device),
1974 					   port_num) == IB_SMI_DISCARD)
1975 			return IB_SMI_DISCARD;
1976 
1977 		if (opa_smi_check_local_smp(smp, port_priv->device) ==
1978 		    IB_SMI_DISCARD)
1979 			return IB_SMI_DISCARD;
1980 
1981 	} else if (rdma_cap_ib_switch(port_priv->device)) {
1982 		/* forward case for switches */
1983 		memcpy(response, recv, mad_priv_size(response));
1984 		response->header.recv_wc.wc = &response->header.wc;
1985 		response->header.recv_wc.recv_buf.opa_mad =
1986 				(struct opa_mad *)response->mad;
1987 		response->header.recv_wc.recv_buf.grh = &response->grh;
1988 
1989 		agent_send_response((const struct ib_mad_hdr *)response->mad,
1990 				    &response->grh, wc,
1991 				    port_priv->device,
1992 				    opa_smi_get_fwd_port(smp),
1993 				    qp_info->qp->qp_num,
1994 				    recv->header.wc.byte_len,
1995 				    true);
1996 
1997 		return IB_SMI_DISCARD;
1998 	}
1999 
2000 	return IB_SMI_HANDLE;
2001 }
2002 
2003 static enum smi_action
2004 handle_smi(struct ib_mad_port_private *port_priv,
2005 	   struct ib_mad_qp_info *qp_info,
2006 	   struct ib_wc *wc,
2007 	   u32 port_num,
2008 	   struct ib_mad_private *recv,
2009 	   struct ib_mad_private *response,
2010 	   bool opa)
2011 {
2012 	struct ib_mad_hdr *mad_hdr = (struct ib_mad_hdr *)recv->mad;
2013 
2014 	if (opa && mad_hdr->base_version == OPA_MGMT_BASE_VERSION &&
2015 	    mad_hdr->class_version == OPA_SM_CLASS_VERSION)
2016 		return handle_opa_smi(port_priv, qp_info, wc, port_num, recv,
2017 				      response);
2018 
2019 	return handle_ib_smi(port_priv, qp_info, wc, port_num, recv, response);
2020 }
2021 
2022 static void ib_mad_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2023 {
2024 	struct ib_mad_port_private *port_priv = cq->cq_context;
2025 	struct ib_mad_list_head *mad_list =
2026 		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2027 	struct ib_mad_qp_info *qp_info;
2028 	struct ib_mad_private_header *mad_priv_hdr;
2029 	struct ib_mad_private *recv, *response = NULL;
2030 	struct ib_mad_agent_private *mad_agent;
2031 	u32 port_num;
2032 	int ret = IB_MAD_RESULT_SUCCESS;
2033 	size_t mad_size;
2034 	u16 resp_mad_pkey_index = 0;
2035 	bool opa;
2036 
2037 	if (list_empty_careful(&port_priv->port_list))
2038 		return;
2039 
2040 	if (wc->status != IB_WC_SUCCESS) {
2041 		/*
2042 		 * Receive errors indicate that the QP has entered the error
2043 		 * state - error handling/shutdown code will cleanup
2044 		 */
2045 		return;
2046 	}
2047 
2048 	qp_info = mad_list->mad_queue->qp_info;
2049 	dequeue_mad(mad_list);
2050 
2051 	opa = rdma_cap_opa_mad(qp_info->port_priv->device,
2052 			       qp_info->port_priv->port_num);
2053 
2054 	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
2055 				    mad_list);
2056 	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
2057 	ib_dma_unmap_single(port_priv->device,
2058 			    recv->header.mapping,
2059 			    mad_priv_dma_size(recv),
2060 			    DMA_FROM_DEVICE);
2061 
2062 	/* Setup MAD receive work completion from "normal" work completion */
2063 	recv->header.wc = *wc;
2064 	recv->header.recv_wc.wc = &recv->header.wc;
2065 
2066 	if (opa && ((struct ib_mad_hdr *)(recv->mad))->base_version == OPA_MGMT_BASE_VERSION) {
2067 		recv->header.recv_wc.mad_len = wc->byte_len - sizeof(struct ib_grh);
2068 		recv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2069 	} else {
2070 		recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2071 		recv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2072 	}
2073 
2074 	recv->header.recv_wc.recv_buf.mad = (struct ib_mad *)recv->mad;
2075 	recv->header.recv_wc.recv_buf.grh = &recv->grh;
2076 
2077 	/* Validate MAD */
2078 	if (!validate_mad((const struct ib_mad_hdr *)recv->mad, qp_info, opa))
2079 		goto out;
2080 
2081 	trace_ib_mad_recv_done_handler(qp_info, wc,
2082 				       (struct ib_mad_hdr *)recv->mad);
2083 
2084 	mad_size = recv->mad_size;
2085 	response = alloc_mad_private(mad_size, GFP_KERNEL);
2086 	if (!response)
2087 		goto out;
2088 
2089 	if (rdma_cap_ib_switch(port_priv->device))
2090 		port_num = wc->port_num;
2091 	else
2092 		port_num = port_priv->port_num;
2093 
2094 	if (((struct ib_mad_hdr *)recv->mad)->mgmt_class ==
2095 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
2096 		if (handle_smi(port_priv, qp_info, wc, port_num, recv,
2097 			       response, opa)
2098 		    == IB_SMI_DISCARD)
2099 			goto out;
2100 	}
2101 
2102 	/* Give driver "right of first refusal" on incoming MAD */
2103 	if (port_priv->device->ops.process_mad) {
2104 		ret = port_priv->device->ops.process_mad(
2105 			port_priv->device, 0, port_priv->port_num, wc,
2106 			&recv->grh, (const struct ib_mad *)recv->mad,
2107 			(struct ib_mad *)response->mad, &mad_size,
2108 			&resp_mad_pkey_index);
2109 
2110 		if (opa)
2111 			wc->pkey_index = resp_mad_pkey_index;
2112 
2113 		if (ret & IB_MAD_RESULT_SUCCESS) {
2114 			if (ret & IB_MAD_RESULT_CONSUMED)
2115 				goto out;
2116 			if (ret & IB_MAD_RESULT_REPLY) {
2117 				agent_send_response((const struct ib_mad_hdr *)response->mad,
2118 						    &recv->grh, wc,
2119 						    port_priv->device,
2120 						    port_num,
2121 						    qp_info->qp->qp_num,
2122 						    mad_size, opa);
2123 				goto out;
2124 			}
2125 		}
2126 	}
2127 
2128 	mad_agent = find_mad_agent(port_priv, (const struct ib_mad_hdr *)recv->mad);
2129 	if (mad_agent) {
2130 		trace_ib_mad_recv_done_agent(mad_agent);
2131 		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
2132 		/*
2133 		 * recv is freed up in error cases in ib_mad_complete_recv
2134 		 * or via recv_handler in ib_mad_complete_recv()
2135 		 */
2136 		recv = NULL;
2137 	} else if ((ret & IB_MAD_RESULT_SUCCESS) &&
2138 		   generate_unmatched_resp(recv, response, &mad_size, opa)) {
2139 		agent_send_response((const struct ib_mad_hdr *)response->mad, &recv->grh, wc,
2140 				    port_priv->device, port_num,
2141 				    qp_info->qp->qp_num, mad_size, opa);
2142 	}
2143 
2144 out:
2145 	/* Post another receive request for this QP */
2146 	if (response) {
2147 		ib_mad_post_receive_mads(qp_info, response);
2148 		kfree(recv);
2149 	} else
2150 		ib_mad_post_receive_mads(qp_info, recv);
2151 }
2152 
2153 static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
2154 {
2155 	struct ib_mad_send_wr_private *mad_send_wr;
2156 	unsigned long delay;
2157 
2158 	if (list_empty(&mad_agent_priv->wait_list)) {
2159 		cancel_delayed_work(&mad_agent_priv->timed_work);
2160 	} else {
2161 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2162 					 struct ib_mad_send_wr_private,
2163 					 agent_list);
2164 
2165 		if (time_after(mad_agent_priv->timeout,
2166 			       mad_send_wr->timeout)) {
2167 			mad_agent_priv->timeout = mad_send_wr->timeout;
2168 			delay = mad_send_wr->timeout - jiffies;
2169 			if ((long)delay <= 0)
2170 				delay = 1;
2171 			mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2172 					 &mad_agent_priv->timed_work, delay);
2173 		}
2174 	}
2175 }
2176 
2177 static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
2178 {
2179 	struct ib_mad_agent_private *mad_agent_priv;
2180 	struct ib_mad_send_wr_private *temp_mad_send_wr;
2181 	struct list_head *list_item;
2182 	unsigned long delay;
2183 
2184 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2185 	list_del(&mad_send_wr->agent_list);
2186 
2187 	delay = mad_send_wr->timeout;
2188 	mad_send_wr->timeout += jiffies;
2189 
2190 	if (delay) {
2191 		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2192 			temp_mad_send_wr = list_entry(list_item,
2193 						struct ib_mad_send_wr_private,
2194 						agent_list);
2195 			if (time_after(mad_send_wr->timeout,
2196 				       temp_mad_send_wr->timeout))
2197 				break;
2198 		}
2199 	} else {
2200 		list_item = &mad_agent_priv->wait_list;
2201 	}
2202 
2203 	list_add(&mad_send_wr->agent_list, list_item);
2204 
2205 	/* Reschedule a work item if we have a shorter timeout */
2206 	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list)
2207 		mod_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2208 				 &mad_agent_priv->timed_work, delay);
2209 }
2210 
2211 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2212 			  unsigned long timeout_ms)
2213 {
2214 	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2215 	wait_for_response(mad_send_wr);
2216 }
2217 
2218 /*
2219  * Process a send work completion
2220  */
2221 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2222 			     struct ib_mad_send_wc *mad_send_wc)
2223 {
2224 	struct ib_mad_agent_private	*mad_agent_priv;
2225 	unsigned long			flags;
2226 	int				ret;
2227 
2228 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2229 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2230 	if (ib_mad_kernel_rmpp_agent(&mad_agent_priv->agent)) {
2231 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2232 		if (ret == IB_RMPP_RESULT_CONSUMED)
2233 			goto done;
2234 	} else
2235 		ret = IB_RMPP_RESULT_UNHANDLED;
2236 
2237 	if (mad_send_wc->status != IB_WC_SUCCESS &&
2238 	    mad_send_wr->status == IB_WC_SUCCESS) {
2239 		mad_send_wr->status = mad_send_wc->status;
2240 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2241 	}
2242 
2243 	if (--mad_send_wr->refcount > 0) {
2244 		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2245 		    mad_send_wr->status == IB_WC_SUCCESS) {
2246 			wait_for_response(mad_send_wr);
2247 		}
2248 		goto done;
2249 	}
2250 
2251 	/* Remove send from MAD agent and notify client of completion */
2252 	list_del(&mad_send_wr->agent_list);
2253 	adjust_timeout(mad_agent_priv);
2254 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2255 
2256 	if (mad_send_wr->status != IB_WC_SUCCESS)
2257 		mad_send_wc->status = mad_send_wr->status;
2258 	if (ret == IB_RMPP_RESULT_INTERNAL)
2259 		ib_rmpp_send_handler(mad_send_wc);
2260 	else
2261 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2262 						   mad_send_wc);
2263 
2264 	/* Release reference on agent taken when sending */
2265 	deref_mad_agent(mad_agent_priv);
2266 	return;
2267 done:
2268 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2269 }
2270 
2271 static void ib_mad_send_done(struct ib_cq *cq, struct ib_wc *wc)
2272 {
2273 	struct ib_mad_port_private *port_priv = cq->cq_context;
2274 	struct ib_mad_list_head *mad_list =
2275 		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2276 	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2277 	struct ib_mad_qp_info		*qp_info;
2278 	struct ib_mad_queue		*send_queue;
2279 	struct ib_mad_send_wc		mad_send_wc;
2280 	unsigned long flags;
2281 	int ret;
2282 
2283 	if (list_empty_careful(&port_priv->port_list))
2284 		return;
2285 
2286 	if (wc->status != IB_WC_SUCCESS) {
2287 		if (!ib_mad_send_error(port_priv, wc))
2288 			return;
2289 	}
2290 
2291 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2292 				   mad_list);
2293 	send_queue = mad_list->mad_queue;
2294 	qp_info = send_queue->qp_info;
2295 
2296 	trace_ib_mad_send_done_agent(mad_send_wr->mad_agent_priv);
2297 	trace_ib_mad_send_done_handler(mad_send_wr, wc);
2298 
2299 retry:
2300 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2301 			    mad_send_wr->header_mapping,
2302 			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2303 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2304 			    mad_send_wr->payload_mapping,
2305 			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2306 	queued_send_wr = NULL;
2307 	spin_lock_irqsave(&send_queue->lock, flags);
2308 	list_del(&mad_list->list);
2309 
2310 	/* Move queued send to the send queue */
2311 	if (send_queue->count-- > send_queue->max_active) {
2312 		mad_list = container_of(qp_info->overflow_list.next,
2313 					struct ib_mad_list_head, list);
2314 		queued_send_wr = container_of(mad_list,
2315 					struct ib_mad_send_wr_private,
2316 					mad_list);
2317 		list_move_tail(&mad_list->list, &send_queue->list);
2318 	}
2319 	spin_unlock_irqrestore(&send_queue->lock, flags);
2320 
2321 	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2322 	mad_send_wc.status = wc->status;
2323 	mad_send_wc.vendor_err = wc->vendor_err;
2324 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2325 
2326 	if (queued_send_wr) {
2327 		trace_ib_mad_send_done_resend(queued_send_wr, qp_info);
2328 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr.wr,
2329 				   NULL);
2330 		if (ret) {
2331 			dev_err(&port_priv->device->dev,
2332 				"ib_post_send failed: %d\n", ret);
2333 			mad_send_wr = queued_send_wr;
2334 			wc->status = IB_WC_LOC_QP_OP_ERR;
2335 			goto retry;
2336 		}
2337 	}
2338 }
2339 
2340 static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2341 {
2342 	struct ib_mad_send_wr_private *mad_send_wr;
2343 	struct ib_mad_list_head *mad_list;
2344 	unsigned long flags;
2345 
2346 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2347 	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2348 		mad_send_wr = container_of(mad_list,
2349 					   struct ib_mad_send_wr_private,
2350 					   mad_list);
2351 		mad_send_wr->retry = 1;
2352 	}
2353 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2354 }
2355 
2356 static bool ib_mad_send_error(struct ib_mad_port_private *port_priv,
2357 		struct ib_wc *wc)
2358 {
2359 	struct ib_mad_list_head *mad_list =
2360 		container_of(wc->wr_cqe, struct ib_mad_list_head, cqe);
2361 	struct ib_mad_qp_info *qp_info = mad_list->mad_queue->qp_info;
2362 	struct ib_mad_send_wr_private *mad_send_wr;
2363 	int ret;
2364 
2365 	/*
2366 	 * Send errors will transition the QP to SQE - move
2367 	 * QP to RTS and repost flushed work requests
2368 	 */
2369 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2370 				   mad_list);
2371 	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2372 		if (mad_send_wr->retry) {
2373 			/* Repost send */
2374 			mad_send_wr->retry = 0;
2375 			trace_ib_mad_error_handler(mad_send_wr, qp_info);
2376 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr.wr,
2377 					   NULL);
2378 			if (!ret)
2379 				return false;
2380 		}
2381 	} else {
2382 		struct ib_qp_attr *attr;
2383 
2384 		/* Transition QP to RTS and fail offending send */
2385 		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2386 		if (attr) {
2387 			attr->qp_state = IB_QPS_RTS;
2388 			attr->cur_qp_state = IB_QPS_SQE;
2389 			ret = ib_modify_qp(qp_info->qp, attr,
2390 					   IB_QP_STATE | IB_QP_CUR_STATE);
2391 			kfree(attr);
2392 			if (ret)
2393 				dev_err(&port_priv->device->dev,
2394 					"%s - ib_modify_qp to RTS: %d\n",
2395 					__func__, ret);
2396 			else
2397 				mark_sends_for_retry(qp_info);
2398 		}
2399 	}
2400 
2401 	return true;
2402 }
2403 
2404 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2405 {
2406 	unsigned long flags;
2407 	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2408 	struct ib_mad_send_wc mad_send_wc;
2409 	struct list_head cancel_list;
2410 
2411 	INIT_LIST_HEAD(&cancel_list);
2412 
2413 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2414 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2415 				 &mad_agent_priv->send_list, agent_list) {
2416 		if (mad_send_wr->status == IB_WC_SUCCESS) {
2417 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2418 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2419 		}
2420 	}
2421 
2422 	/* Empty wait list to prevent receives from finding a request */
2423 	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2424 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2425 
2426 	/* Report all cancelled requests */
2427 	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2428 	mad_send_wc.vendor_err = 0;
2429 
2430 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2431 				 &cancel_list, agent_list) {
2432 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2433 		list_del(&mad_send_wr->agent_list);
2434 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2435 						   &mad_send_wc);
2436 		deref_mad_agent(mad_agent_priv);
2437 	}
2438 }
2439 
2440 static struct ib_mad_send_wr_private*
2441 find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2442 	     struct ib_mad_send_buf *send_buf)
2443 {
2444 	struct ib_mad_send_wr_private *mad_send_wr;
2445 
2446 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2447 			    agent_list) {
2448 		if (&mad_send_wr->send_buf == send_buf)
2449 			return mad_send_wr;
2450 	}
2451 
2452 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2453 			    agent_list) {
2454 		if (is_rmpp_data_mad(mad_agent_priv,
2455 				     mad_send_wr->send_buf.mad) &&
2456 		    &mad_send_wr->send_buf == send_buf)
2457 			return mad_send_wr;
2458 	}
2459 	return NULL;
2460 }
2461 
2462 int ib_modify_mad(struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2463 {
2464 	struct ib_mad_agent_private *mad_agent_priv;
2465 	struct ib_mad_send_wr_private *mad_send_wr;
2466 	unsigned long flags;
2467 	int active;
2468 
2469 	if (!send_buf)
2470 		return -EINVAL;
2471 
2472 	mad_agent_priv = container_of(send_buf->mad_agent,
2473 				      struct ib_mad_agent_private, agent);
2474 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2475 	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2476 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2477 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2478 		return -EINVAL;
2479 	}
2480 
2481 	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2482 	if (!timeout_ms) {
2483 		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2484 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2485 	}
2486 
2487 	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2488 	if (active)
2489 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2490 	else
2491 		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2492 
2493 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2494 	return 0;
2495 }
2496 EXPORT_SYMBOL(ib_modify_mad);
2497 
2498 static void local_completions(struct work_struct *work)
2499 {
2500 	struct ib_mad_agent_private *mad_agent_priv;
2501 	struct ib_mad_local_private *local;
2502 	struct ib_mad_agent_private *recv_mad_agent;
2503 	unsigned long flags;
2504 	int free_mad;
2505 	struct ib_wc wc;
2506 	struct ib_mad_send_wc mad_send_wc;
2507 	bool opa;
2508 
2509 	mad_agent_priv =
2510 		container_of(work, struct ib_mad_agent_private, local_work);
2511 
2512 	opa = rdma_cap_opa_mad(mad_agent_priv->qp_info->port_priv->device,
2513 			       mad_agent_priv->qp_info->port_priv->port_num);
2514 
2515 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2516 	while (!list_empty(&mad_agent_priv->local_list)) {
2517 		local = list_entry(mad_agent_priv->local_list.next,
2518 				   struct ib_mad_local_private,
2519 				   completion_list);
2520 		list_del(&local->completion_list);
2521 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2522 		free_mad = 0;
2523 		if (local->mad_priv) {
2524 			u8 base_version;
2525 			recv_mad_agent = local->recv_mad_agent;
2526 			if (!recv_mad_agent) {
2527 				dev_err(&mad_agent_priv->agent.device->dev,
2528 					"No receive MAD agent for local completion\n");
2529 				free_mad = 1;
2530 				goto local_send_completion;
2531 			}
2532 
2533 			/*
2534 			 * Defined behavior is to complete response
2535 			 * before request
2536 			 */
2537 			build_smp_wc(recv_mad_agent->agent.qp,
2538 				     local->mad_send_wr->send_wr.wr.wr_cqe,
2539 				     be16_to_cpu(IB_LID_PERMISSIVE),
2540 				     local->mad_send_wr->send_wr.pkey_index,
2541 				     recv_mad_agent->agent.port_num, &wc);
2542 
2543 			local->mad_priv->header.recv_wc.wc = &wc;
2544 
2545 			base_version = ((struct ib_mad_hdr *)(local->mad_priv->mad))->base_version;
2546 			if (opa && base_version == OPA_MGMT_BASE_VERSION) {
2547 				local->mad_priv->header.recv_wc.mad_len = local->return_wc_byte_len;
2548 				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct opa_mad);
2549 			} else {
2550 				local->mad_priv->header.recv_wc.mad_len = sizeof(struct ib_mad);
2551 				local->mad_priv->header.recv_wc.mad_seg_size = sizeof(struct ib_mad);
2552 			}
2553 
2554 			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2555 			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2556 				 &local->mad_priv->header.recv_wc.rmpp_list);
2557 			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2558 			local->mad_priv->header.recv_wc.recv_buf.mad =
2559 						(struct ib_mad *)local->mad_priv->mad;
2560 			recv_mad_agent->agent.recv_handler(
2561 						&recv_mad_agent->agent,
2562 						&local->mad_send_wr->send_buf,
2563 						&local->mad_priv->header.recv_wc);
2564 			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2565 			deref_mad_agent(recv_mad_agent);
2566 			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2567 		}
2568 
2569 local_send_completion:
2570 		/* Complete send */
2571 		mad_send_wc.status = IB_WC_SUCCESS;
2572 		mad_send_wc.vendor_err = 0;
2573 		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2574 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2575 						   &mad_send_wc);
2576 
2577 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2578 		deref_mad_agent(mad_agent_priv);
2579 		if (free_mad)
2580 			kfree(local->mad_priv);
2581 		kfree(local);
2582 	}
2583 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2584 }
2585 
2586 static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2587 {
2588 	int ret;
2589 
2590 	if (!mad_send_wr->retries_left)
2591 		return -ETIMEDOUT;
2592 
2593 	mad_send_wr->retries_left--;
2594 	mad_send_wr->send_buf.retries++;
2595 
2596 	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2597 
2598 	if (ib_mad_kernel_rmpp_agent(&mad_send_wr->mad_agent_priv->agent)) {
2599 		ret = ib_retry_rmpp(mad_send_wr);
2600 		switch (ret) {
2601 		case IB_RMPP_RESULT_UNHANDLED:
2602 			ret = ib_send_mad(mad_send_wr);
2603 			break;
2604 		case IB_RMPP_RESULT_CONSUMED:
2605 			ret = 0;
2606 			break;
2607 		default:
2608 			ret = -ECOMM;
2609 			break;
2610 		}
2611 	} else
2612 		ret = ib_send_mad(mad_send_wr);
2613 
2614 	if (!ret) {
2615 		mad_send_wr->refcount++;
2616 		list_add_tail(&mad_send_wr->agent_list,
2617 			      &mad_send_wr->mad_agent_priv->send_list);
2618 	}
2619 	return ret;
2620 }
2621 
2622 static void timeout_sends(struct work_struct *work)
2623 {
2624 	struct ib_mad_agent_private *mad_agent_priv;
2625 	struct ib_mad_send_wr_private *mad_send_wr;
2626 	struct ib_mad_send_wc mad_send_wc;
2627 	unsigned long flags, delay;
2628 
2629 	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2630 				      timed_work.work);
2631 	mad_send_wc.vendor_err = 0;
2632 
2633 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2634 	while (!list_empty(&mad_agent_priv->wait_list)) {
2635 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2636 					 struct ib_mad_send_wr_private,
2637 					 agent_list);
2638 
2639 		if (time_after(mad_send_wr->timeout, jiffies)) {
2640 			delay = mad_send_wr->timeout - jiffies;
2641 			if ((long)delay <= 0)
2642 				delay = 1;
2643 			queue_delayed_work(mad_agent_priv->qp_info->
2644 					   port_priv->wq,
2645 					   &mad_agent_priv->timed_work, delay);
2646 			break;
2647 		}
2648 
2649 		list_del(&mad_send_wr->agent_list);
2650 		if (mad_send_wr->status == IB_WC_SUCCESS &&
2651 		    !retry_send(mad_send_wr))
2652 			continue;
2653 
2654 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2655 
2656 		if (mad_send_wr->status == IB_WC_SUCCESS)
2657 			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2658 		else
2659 			mad_send_wc.status = mad_send_wr->status;
2660 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2661 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2662 						   &mad_send_wc);
2663 
2664 		deref_mad_agent(mad_agent_priv);
2665 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2666 	}
2667 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2668 }
2669 
2670 /*
2671  * Allocate receive MADs and post receive WRs for them
2672  */
2673 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2674 				    struct ib_mad_private *mad)
2675 {
2676 	unsigned long flags;
2677 	int post, ret;
2678 	struct ib_mad_private *mad_priv;
2679 	struct ib_sge sg_list;
2680 	struct ib_recv_wr recv_wr;
2681 	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2682 
2683 	/* Initialize common scatter list fields */
2684 	sg_list.lkey = qp_info->port_priv->pd->local_dma_lkey;
2685 
2686 	/* Initialize common receive WR fields */
2687 	recv_wr.next = NULL;
2688 	recv_wr.sg_list = &sg_list;
2689 	recv_wr.num_sge = 1;
2690 
2691 	do {
2692 		/* Allocate and map receive buffer */
2693 		if (mad) {
2694 			mad_priv = mad;
2695 			mad = NULL;
2696 		} else {
2697 			mad_priv = alloc_mad_private(port_mad_size(qp_info->port_priv),
2698 						     GFP_ATOMIC);
2699 			if (!mad_priv) {
2700 				ret = -ENOMEM;
2701 				break;
2702 			}
2703 		}
2704 		sg_list.length = mad_priv_dma_size(mad_priv);
2705 		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2706 						 &mad_priv->grh,
2707 						 mad_priv_dma_size(mad_priv),
2708 						 DMA_FROM_DEVICE);
2709 		if (unlikely(ib_dma_mapping_error(qp_info->port_priv->device,
2710 						  sg_list.addr))) {
2711 			kfree(mad_priv);
2712 			ret = -ENOMEM;
2713 			break;
2714 		}
2715 		mad_priv->header.mapping = sg_list.addr;
2716 		mad_priv->header.mad_list.mad_queue = recv_queue;
2717 		mad_priv->header.mad_list.cqe.done = ib_mad_recv_done;
2718 		recv_wr.wr_cqe = &mad_priv->header.mad_list.cqe;
2719 
2720 		/* Post receive WR */
2721 		spin_lock_irqsave(&recv_queue->lock, flags);
2722 		post = (++recv_queue->count < recv_queue->max_active);
2723 		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2724 		spin_unlock_irqrestore(&recv_queue->lock, flags);
2725 		ret = ib_post_recv(qp_info->qp, &recv_wr, NULL);
2726 		if (ret) {
2727 			spin_lock_irqsave(&recv_queue->lock, flags);
2728 			list_del(&mad_priv->header.mad_list.list);
2729 			recv_queue->count--;
2730 			spin_unlock_irqrestore(&recv_queue->lock, flags);
2731 			ib_dma_unmap_single(qp_info->port_priv->device,
2732 					    mad_priv->header.mapping,
2733 					    mad_priv_dma_size(mad_priv),
2734 					    DMA_FROM_DEVICE);
2735 			kfree(mad_priv);
2736 			dev_err(&qp_info->port_priv->device->dev,
2737 				"ib_post_recv failed: %d\n", ret);
2738 			break;
2739 		}
2740 	} while (post);
2741 
2742 	return ret;
2743 }
2744 
2745 /*
2746  * Return all the posted receive MADs
2747  */
2748 static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2749 {
2750 	struct ib_mad_private_header *mad_priv_hdr;
2751 	struct ib_mad_private *recv;
2752 	struct ib_mad_list_head *mad_list;
2753 
2754 	if (!qp_info->qp)
2755 		return;
2756 
2757 	while (!list_empty(&qp_info->recv_queue.list)) {
2758 
2759 		mad_list = list_entry(qp_info->recv_queue.list.next,
2760 				      struct ib_mad_list_head, list);
2761 		mad_priv_hdr = container_of(mad_list,
2762 					    struct ib_mad_private_header,
2763 					    mad_list);
2764 		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2765 				    header);
2766 
2767 		/* Remove from posted receive MAD list */
2768 		list_del(&mad_list->list);
2769 
2770 		ib_dma_unmap_single(qp_info->port_priv->device,
2771 				    recv->header.mapping,
2772 				    mad_priv_dma_size(recv),
2773 				    DMA_FROM_DEVICE);
2774 		kfree(recv);
2775 	}
2776 
2777 	qp_info->recv_queue.count = 0;
2778 }
2779 
2780 /*
2781  * Start the port
2782  */
2783 static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2784 {
2785 	int ret, i;
2786 	struct ib_qp_attr *attr;
2787 	struct ib_qp *qp;
2788 	u16 pkey_index;
2789 
2790 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2791 	if (!attr)
2792 		return -ENOMEM;
2793 
2794 	ret = ib_find_pkey(port_priv->device, port_priv->port_num,
2795 			   IB_DEFAULT_PKEY_FULL, &pkey_index);
2796 	if (ret)
2797 		pkey_index = 0;
2798 
2799 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2800 		qp = port_priv->qp_info[i].qp;
2801 		if (!qp)
2802 			continue;
2803 
2804 		/*
2805 		 * PKey index for QP1 is irrelevant but
2806 		 * one is needed for the Reset to Init transition
2807 		 */
2808 		attr->qp_state = IB_QPS_INIT;
2809 		attr->pkey_index = pkey_index;
2810 		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2811 		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2812 					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2813 		if (ret) {
2814 			dev_err(&port_priv->device->dev,
2815 				"Couldn't change QP%d state to INIT: %d\n",
2816 				i, ret);
2817 			goto out;
2818 		}
2819 
2820 		attr->qp_state = IB_QPS_RTR;
2821 		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2822 		if (ret) {
2823 			dev_err(&port_priv->device->dev,
2824 				"Couldn't change QP%d state to RTR: %d\n",
2825 				i, ret);
2826 			goto out;
2827 		}
2828 
2829 		attr->qp_state = IB_QPS_RTS;
2830 		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2831 		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2832 		if (ret) {
2833 			dev_err(&port_priv->device->dev,
2834 				"Couldn't change QP%d state to RTS: %d\n",
2835 				i, ret);
2836 			goto out;
2837 		}
2838 	}
2839 
2840 	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2841 	if (ret) {
2842 		dev_err(&port_priv->device->dev,
2843 			"Failed to request completion notification: %d\n",
2844 			ret);
2845 		goto out;
2846 	}
2847 
2848 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2849 		if (!port_priv->qp_info[i].qp)
2850 			continue;
2851 
2852 		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2853 		if (ret) {
2854 			dev_err(&port_priv->device->dev,
2855 				"Couldn't post receive WRs\n");
2856 			goto out;
2857 		}
2858 	}
2859 out:
2860 	kfree(attr);
2861 	return ret;
2862 }
2863 
2864 static void qp_event_handler(struct ib_event *event, void *qp_context)
2865 {
2866 	struct ib_mad_qp_info	*qp_info = qp_context;
2867 
2868 	/* It's worse than that! He's dead, Jim! */
2869 	dev_err(&qp_info->port_priv->device->dev,
2870 		"Fatal error (%d) on MAD QP (%u)\n",
2871 		event->event, qp_info->qp->qp_num);
2872 }
2873 
2874 static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2875 			   struct ib_mad_queue *mad_queue)
2876 {
2877 	mad_queue->qp_info = qp_info;
2878 	mad_queue->count = 0;
2879 	spin_lock_init(&mad_queue->lock);
2880 	INIT_LIST_HEAD(&mad_queue->list);
2881 }
2882 
2883 static void init_mad_qp(struct ib_mad_port_private *port_priv,
2884 			struct ib_mad_qp_info *qp_info)
2885 {
2886 	qp_info->port_priv = port_priv;
2887 	init_mad_queue(qp_info, &qp_info->send_queue);
2888 	init_mad_queue(qp_info, &qp_info->recv_queue);
2889 	INIT_LIST_HEAD(&qp_info->overflow_list);
2890 }
2891 
2892 static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2893 			 enum ib_qp_type qp_type)
2894 {
2895 	struct ib_qp_init_attr	qp_init_attr;
2896 	int ret;
2897 
2898 	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2899 	qp_init_attr.send_cq = qp_info->port_priv->cq;
2900 	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2901 	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2902 	qp_init_attr.cap.max_send_wr = mad_sendq_size;
2903 	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2904 	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2905 	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2906 	qp_init_attr.qp_type = qp_type;
2907 	qp_init_attr.port_num = qp_info->port_priv->port_num;
2908 	qp_init_attr.qp_context = qp_info;
2909 	qp_init_attr.event_handler = qp_event_handler;
2910 	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2911 	if (IS_ERR(qp_info->qp)) {
2912 		dev_err(&qp_info->port_priv->device->dev,
2913 			"Couldn't create ib_mad QP%d\n",
2914 			get_spl_qp_index(qp_type));
2915 		ret = PTR_ERR(qp_info->qp);
2916 		goto error;
2917 	}
2918 	/* Use minimum queue sizes unless the CQ is resized */
2919 	qp_info->send_queue.max_active = mad_sendq_size;
2920 	qp_info->recv_queue.max_active = mad_recvq_size;
2921 	return 0;
2922 
2923 error:
2924 	return ret;
2925 }
2926 
2927 static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2928 {
2929 	if (!qp_info->qp)
2930 		return;
2931 
2932 	ib_destroy_qp(qp_info->qp);
2933 }
2934 
2935 /*
2936  * Open the port
2937  * Create the QP, PD, MR, and CQ if needed
2938  */
2939 static int ib_mad_port_open(struct ib_device *device,
2940 			    u32 port_num)
2941 {
2942 	int ret, cq_size;
2943 	struct ib_mad_port_private *port_priv;
2944 	unsigned long flags;
2945 	char name[sizeof "ib_mad123"];
2946 	int has_smi;
2947 
2948 	if (WARN_ON(rdma_max_mad_size(device, port_num) < IB_MGMT_MAD_SIZE))
2949 		return -EFAULT;
2950 
2951 	if (WARN_ON(rdma_cap_opa_mad(device, port_num) &&
2952 		    rdma_max_mad_size(device, port_num) < OPA_MGMT_MAD_SIZE))
2953 		return -EFAULT;
2954 
2955 	/* Create new device info */
2956 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2957 	if (!port_priv)
2958 		return -ENOMEM;
2959 
2960 	port_priv->device = device;
2961 	port_priv->port_num = port_num;
2962 	spin_lock_init(&port_priv->reg_lock);
2963 	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2964 	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2965 
2966 	cq_size = mad_sendq_size + mad_recvq_size;
2967 	has_smi = rdma_cap_ib_smi(device, port_num);
2968 	if (has_smi)
2969 		cq_size *= 2;
2970 
2971 	port_priv->pd = ib_alloc_pd(device, 0);
2972 	if (IS_ERR(port_priv->pd)) {
2973 		dev_err(&device->dev, "Couldn't create ib_mad PD\n");
2974 		ret = PTR_ERR(port_priv->pd);
2975 		goto error3;
2976 	}
2977 
2978 	port_priv->cq = ib_alloc_cq(port_priv->device, port_priv, cq_size, 0,
2979 			IB_POLL_UNBOUND_WORKQUEUE);
2980 	if (IS_ERR(port_priv->cq)) {
2981 		dev_err(&device->dev, "Couldn't create ib_mad CQ\n");
2982 		ret = PTR_ERR(port_priv->cq);
2983 		goto error4;
2984 	}
2985 
2986 	if (has_smi) {
2987 		ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2988 		if (ret)
2989 			goto error6;
2990 	}
2991 	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
2992 	if (ret)
2993 		goto error7;
2994 
2995 	snprintf(name, sizeof(name), "ib_mad%u", port_num);
2996 	port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
2997 	if (!port_priv->wq) {
2998 		ret = -ENOMEM;
2999 		goto error8;
3000 	}
3001 
3002 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3003 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
3004 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3005 
3006 	ret = ib_mad_port_start(port_priv);
3007 	if (ret) {
3008 		dev_err(&device->dev, "Couldn't start port\n");
3009 		goto error9;
3010 	}
3011 
3012 	return 0;
3013 
3014 error9:
3015 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3016 	list_del_init(&port_priv->port_list);
3017 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3018 
3019 	destroy_workqueue(port_priv->wq);
3020 error8:
3021 	destroy_mad_qp(&port_priv->qp_info[1]);
3022 error7:
3023 	destroy_mad_qp(&port_priv->qp_info[0]);
3024 error6:
3025 	ib_free_cq(port_priv->cq);
3026 	cleanup_recv_queue(&port_priv->qp_info[1]);
3027 	cleanup_recv_queue(&port_priv->qp_info[0]);
3028 error4:
3029 	ib_dealloc_pd(port_priv->pd);
3030 error3:
3031 	kfree(port_priv);
3032 
3033 	return ret;
3034 }
3035 
3036 /*
3037  * Close the port
3038  * If there are no classes using the port, free the port
3039  * resources (CQ, MR, PD, QP) and remove the port's info structure
3040  */
3041 static int ib_mad_port_close(struct ib_device *device, u32 port_num)
3042 {
3043 	struct ib_mad_port_private *port_priv;
3044 	unsigned long flags;
3045 
3046 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
3047 	port_priv = __ib_get_mad_port(device, port_num);
3048 	if (port_priv == NULL) {
3049 		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3050 		dev_err(&device->dev, "Port %u not found\n", port_num);
3051 		return -ENODEV;
3052 	}
3053 	list_del_init(&port_priv->port_list);
3054 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
3055 
3056 	destroy_workqueue(port_priv->wq);
3057 	destroy_mad_qp(&port_priv->qp_info[1]);
3058 	destroy_mad_qp(&port_priv->qp_info[0]);
3059 	ib_free_cq(port_priv->cq);
3060 	ib_dealloc_pd(port_priv->pd);
3061 	cleanup_recv_queue(&port_priv->qp_info[1]);
3062 	cleanup_recv_queue(&port_priv->qp_info[0]);
3063 	/* XXX: Handle deallocation of MAD registration tables */
3064 
3065 	kfree(port_priv);
3066 
3067 	return 0;
3068 }
3069 
3070 static int ib_mad_init_device(struct ib_device *device)
3071 {
3072 	int start, i;
3073 	unsigned int count = 0;
3074 	int ret;
3075 
3076 	start = rdma_start_port(device);
3077 
3078 	for (i = start; i <= rdma_end_port(device); i++) {
3079 		if (!rdma_cap_ib_mad(device, i))
3080 			continue;
3081 
3082 		ret = ib_mad_port_open(device, i);
3083 		if (ret) {
3084 			dev_err(&device->dev, "Couldn't open port %d\n", i);
3085 			goto error;
3086 		}
3087 		ret = ib_agent_port_open(device, i);
3088 		if (ret) {
3089 			dev_err(&device->dev,
3090 				"Couldn't open port %d for agents\n", i);
3091 			goto error_agent;
3092 		}
3093 		count++;
3094 	}
3095 	if (!count)
3096 		return -EOPNOTSUPP;
3097 
3098 	return 0;
3099 
3100 error_agent:
3101 	if (ib_mad_port_close(device, i))
3102 		dev_err(&device->dev, "Couldn't close port %d\n", i);
3103 
3104 error:
3105 	while (--i >= start) {
3106 		if (!rdma_cap_ib_mad(device, i))
3107 			continue;
3108 
3109 		if (ib_agent_port_close(device, i))
3110 			dev_err(&device->dev,
3111 				"Couldn't close port %d for agents\n", i);
3112 		if (ib_mad_port_close(device, i))
3113 			dev_err(&device->dev, "Couldn't close port %d\n", i);
3114 	}
3115 	return ret;
3116 }
3117 
3118 static void ib_mad_remove_device(struct ib_device *device, void *client_data)
3119 {
3120 	unsigned int i;
3121 
3122 	rdma_for_each_port (device, i) {
3123 		if (!rdma_cap_ib_mad(device, i))
3124 			continue;
3125 
3126 		if (ib_agent_port_close(device, i))
3127 			dev_err(&device->dev,
3128 				"Couldn't close port %u for agents\n", i);
3129 		if (ib_mad_port_close(device, i))
3130 			dev_err(&device->dev, "Couldn't close port %u\n", i);
3131 	}
3132 }
3133 
3134 static struct ib_client mad_client = {
3135 	.name   = "mad",
3136 	.add = ib_mad_init_device,
3137 	.remove = ib_mad_remove_device
3138 };
3139 
3140 int ib_mad_init(void)
3141 {
3142 	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
3143 	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
3144 
3145 	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
3146 	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
3147 
3148 	INIT_LIST_HEAD(&ib_mad_port_list);
3149 
3150 	if (ib_register_client(&mad_client)) {
3151 		pr_err("Couldn't register ib_mad client\n");
3152 		return -EINVAL;
3153 	}
3154 
3155 	return 0;
3156 }
3157 
3158 void ib_mad_cleanup(void)
3159 {
3160 	ib_unregister_client(&mad_client);
3161 }
3162