xref: /linux/drivers/infiniband/core/mad.c (revision c145211d1f9e2ef19e7b4c2b943f68366daa97af)
1 /*
2  * Copyright (c) 2004-2007 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5  * Copyright (c) 2009 HNR Consulting. All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  *
35  */
36 #include <linux/dma-mapping.h>
37 #include <linux/slab.h>
38 #include <rdma/ib_cache.h>
39 
40 #include "mad_priv.h"
41 #include "mad_rmpp.h"
42 #include "smi.h"
43 #include "agent.h"
44 
45 MODULE_LICENSE("Dual BSD/GPL");
46 MODULE_DESCRIPTION("kernel IB MAD API");
47 MODULE_AUTHOR("Hal Rosenstock");
48 MODULE_AUTHOR("Sean Hefty");
49 
50 int mad_sendq_size = IB_MAD_QP_SEND_SIZE;
51 int mad_recvq_size = IB_MAD_QP_RECV_SIZE;
52 
53 module_param_named(send_queue_size, mad_sendq_size, int, 0444);
54 MODULE_PARM_DESC(send_queue_size, "Size of send queue in number of work requests");
55 module_param_named(recv_queue_size, mad_recvq_size, int, 0444);
56 MODULE_PARM_DESC(recv_queue_size, "Size of receive queue in number of work requests");
57 
58 static struct kmem_cache *ib_mad_cache;
59 
60 static struct list_head ib_mad_port_list;
61 static u32 ib_mad_client_id = 0;
62 
63 /* Port list lock */
64 static DEFINE_SPINLOCK(ib_mad_port_list_lock);
65 
66 /* Forward declarations */
67 static int method_in_use(struct ib_mad_mgmt_method_table **method,
68 			 struct ib_mad_reg_req *mad_reg_req);
69 static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
70 static struct ib_mad_agent_private *find_mad_agent(
71 					struct ib_mad_port_private *port_priv,
72 					struct ib_mad *mad);
73 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
74 				    struct ib_mad_private *mad);
75 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
76 static void timeout_sends(struct work_struct *work);
77 static void local_completions(struct work_struct *work);
78 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
79 			      struct ib_mad_agent_private *agent_priv,
80 			      u8 mgmt_class);
81 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
82 			   struct ib_mad_agent_private *agent_priv);
83 
84 /*
85  * Returns a ib_mad_port_private structure or NULL for a device/port
86  * Assumes ib_mad_port_list_lock is being held
87  */
88 static inline struct ib_mad_port_private *
89 __ib_get_mad_port(struct ib_device *device, int port_num)
90 {
91 	struct ib_mad_port_private *entry;
92 
93 	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
94 		if (entry->device == device && entry->port_num == port_num)
95 			return entry;
96 	}
97 	return NULL;
98 }
99 
100 /*
101  * Wrapper function to return a ib_mad_port_private structure or NULL
102  * for a device/port
103  */
104 static inline struct ib_mad_port_private *
105 ib_get_mad_port(struct ib_device *device, int port_num)
106 {
107 	struct ib_mad_port_private *entry;
108 	unsigned long flags;
109 
110 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
111 	entry = __ib_get_mad_port(device, port_num);
112 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
113 
114 	return entry;
115 }
116 
117 static inline u8 convert_mgmt_class(u8 mgmt_class)
118 {
119 	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
120 	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
121 		0 : mgmt_class;
122 }
123 
124 static int get_spl_qp_index(enum ib_qp_type qp_type)
125 {
126 	switch (qp_type)
127 	{
128 	case IB_QPT_SMI:
129 		return 0;
130 	case IB_QPT_GSI:
131 		return 1;
132 	default:
133 		return -1;
134 	}
135 }
136 
137 static int vendor_class_index(u8 mgmt_class)
138 {
139 	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
140 }
141 
142 static int is_vendor_class(u8 mgmt_class)
143 {
144 	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
145 	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
146 		return 0;
147 	return 1;
148 }
149 
150 static int is_vendor_oui(char *oui)
151 {
152 	if (oui[0] || oui[1] || oui[2])
153 		return 1;
154 	return 0;
155 }
156 
157 static int is_vendor_method_in_use(
158 		struct ib_mad_mgmt_vendor_class *vendor_class,
159 		struct ib_mad_reg_req *mad_reg_req)
160 {
161 	struct ib_mad_mgmt_method_table *method;
162 	int i;
163 
164 	for (i = 0; i < MAX_MGMT_OUI; i++) {
165 		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
166 			method = vendor_class->method_table[i];
167 			if (method) {
168 				if (method_in_use(&method, mad_reg_req))
169 					return 1;
170 				else
171 					break;
172 			}
173 		}
174 	}
175 	return 0;
176 }
177 
178 int ib_response_mad(struct ib_mad *mad)
179 {
180 	return ((mad->mad_hdr.method & IB_MGMT_METHOD_RESP) ||
181 		(mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
182 		((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_BM) &&
183 		 (mad->mad_hdr.attr_mod & IB_BM_ATTR_MOD_RESP)));
184 }
185 EXPORT_SYMBOL(ib_response_mad);
186 
187 /*
188  * ib_register_mad_agent - Register to send/receive MADs
189  */
190 struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
191 					   u8 port_num,
192 					   enum ib_qp_type qp_type,
193 					   struct ib_mad_reg_req *mad_reg_req,
194 					   u8 rmpp_version,
195 					   ib_mad_send_handler send_handler,
196 					   ib_mad_recv_handler recv_handler,
197 					   void *context)
198 {
199 	struct ib_mad_port_private *port_priv;
200 	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
201 	struct ib_mad_agent_private *mad_agent_priv;
202 	struct ib_mad_reg_req *reg_req = NULL;
203 	struct ib_mad_mgmt_class_table *class;
204 	struct ib_mad_mgmt_vendor_class_table *vendor;
205 	struct ib_mad_mgmt_vendor_class *vendor_class;
206 	struct ib_mad_mgmt_method_table *method;
207 	int ret2, qpn;
208 	unsigned long flags;
209 	u8 mgmt_class, vclass;
210 
211 	/* Validate parameters */
212 	qpn = get_spl_qp_index(qp_type);
213 	if (qpn == -1)
214 		goto error1;
215 
216 	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
217 		goto error1;
218 
219 	/* Validate MAD registration request if supplied */
220 	if (mad_reg_req) {
221 		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
222 			goto error1;
223 		if (!recv_handler)
224 			goto error1;
225 		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
226 			/*
227 			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
228 			 * one in this range currently allowed
229 			 */
230 			if (mad_reg_req->mgmt_class !=
231 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
232 				goto error1;
233 		} else if (mad_reg_req->mgmt_class == 0) {
234 			/*
235 			 * Class 0 is reserved in IBA and is used for
236 			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
237 			 */
238 			goto error1;
239 		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
240 			/*
241 			 * If class is in "new" vendor range,
242 			 * ensure supplied OUI is not zero
243 			 */
244 			if (!is_vendor_oui(mad_reg_req->oui))
245 				goto error1;
246 		}
247 		/* Make sure class supplied is consistent with RMPP */
248 		if (!ib_is_mad_class_rmpp(mad_reg_req->mgmt_class)) {
249 			if (rmpp_version)
250 				goto error1;
251 		}
252 		/* Make sure class supplied is consistent with QP type */
253 		if (qp_type == IB_QPT_SMI) {
254 			if ((mad_reg_req->mgmt_class !=
255 					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
256 			    (mad_reg_req->mgmt_class !=
257 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
258 				goto error1;
259 		} else {
260 			if ((mad_reg_req->mgmt_class ==
261 					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
262 			    (mad_reg_req->mgmt_class ==
263 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
264 				goto error1;
265 		}
266 	} else {
267 		/* No registration request supplied */
268 		if (!send_handler)
269 			goto error1;
270 	}
271 
272 	/* Validate device and port */
273 	port_priv = ib_get_mad_port(device, port_num);
274 	if (!port_priv) {
275 		ret = ERR_PTR(-ENODEV);
276 		goto error1;
277 	}
278 
279 	/* Allocate structures */
280 	mad_agent_priv = kzalloc(sizeof *mad_agent_priv, GFP_KERNEL);
281 	if (!mad_agent_priv) {
282 		ret = ERR_PTR(-ENOMEM);
283 		goto error1;
284 	}
285 
286 	mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
287 						 IB_ACCESS_LOCAL_WRITE);
288 	if (IS_ERR(mad_agent_priv->agent.mr)) {
289 		ret = ERR_PTR(-ENOMEM);
290 		goto error2;
291 	}
292 
293 	if (mad_reg_req) {
294 		reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
295 		if (!reg_req) {
296 			ret = ERR_PTR(-ENOMEM);
297 			goto error3;
298 		}
299 		/* Make a copy of the MAD registration request */
300 		memcpy(reg_req, mad_reg_req, sizeof *reg_req);
301 	}
302 
303 	/* Now, fill in the various structures */
304 	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
305 	mad_agent_priv->reg_req = reg_req;
306 	mad_agent_priv->agent.rmpp_version = rmpp_version;
307 	mad_agent_priv->agent.device = device;
308 	mad_agent_priv->agent.recv_handler = recv_handler;
309 	mad_agent_priv->agent.send_handler = send_handler;
310 	mad_agent_priv->agent.context = context;
311 	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
312 	mad_agent_priv->agent.port_num = port_num;
313 	spin_lock_init(&mad_agent_priv->lock);
314 	INIT_LIST_HEAD(&mad_agent_priv->send_list);
315 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
316 	INIT_LIST_HEAD(&mad_agent_priv->done_list);
317 	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
318 	INIT_DELAYED_WORK(&mad_agent_priv->timed_work, timeout_sends);
319 	INIT_LIST_HEAD(&mad_agent_priv->local_list);
320 	INIT_WORK(&mad_agent_priv->local_work, local_completions);
321 	atomic_set(&mad_agent_priv->refcount, 1);
322 	init_completion(&mad_agent_priv->comp);
323 
324 	spin_lock_irqsave(&port_priv->reg_lock, flags);
325 	mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
326 
327 	/*
328 	 * Make sure MAD registration (if supplied)
329 	 * is non overlapping with any existing ones
330 	 */
331 	if (mad_reg_req) {
332 		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
333 		if (!is_vendor_class(mgmt_class)) {
334 			class = port_priv->version[mad_reg_req->
335 						   mgmt_class_version].class;
336 			if (class) {
337 				method = class->method_table[mgmt_class];
338 				if (method) {
339 					if (method_in_use(&method,
340 							   mad_reg_req))
341 						goto error4;
342 				}
343 			}
344 			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
345 						  mgmt_class);
346 		} else {
347 			/* "New" vendor class range */
348 			vendor = port_priv->version[mad_reg_req->
349 						    mgmt_class_version].vendor;
350 			if (vendor) {
351 				vclass = vendor_class_index(mgmt_class);
352 				vendor_class = vendor->vendor_class[vclass];
353 				if (vendor_class) {
354 					if (is_vendor_method_in_use(
355 							vendor_class,
356 							mad_reg_req))
357 						goto error4;
358 				}
359 			}
360 			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
361 		}
362 		if (ret2) {
363 			ret = ERR_PTR(ret2);
364 			goto error4;
365 		}
366 	}
367 
368 	/* Add mad agent into port's agent list */
369 	list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
370 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
371 
372 	return &mad_agent_priv->agent;
373 
374 error4:
375 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
376 	kfree(reg_req);
377 error3:
378 	ib_dereg_mr(mad_agent_priv->agent.mr);
379 error2:
380 	kfree(mad_agent_priv);
381 error1:
382 	return ret;
383 }
384 EXPORT_SYMBOL(ib_register_mad_agent);
385 
386 static inline int is_snooping_sends(int mad_snoop_flags)
387 {
388 	return (mad_snoop_flags &
389 		(/*IB_MAD_SNOOP_POSTED_SENDS |
390 		 IB_MAD_SNOOP_RMPP_SENDS |*/
391 		 IB_MAD_SNOOP_SEND_COMPLETIONS /*|
392 		 IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
393 }
394 
395 static inline int is_snooping_recvs(int mad_snoop_flags)
396 {
397 	return (mad_snoop_flags &
398 		(IB_MAD_SNOOP_RECVS /*|
399 		 IB_MAD_SNOOP_RMPP_RECVS*/));
400 }
401 
402 static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
403 				struct ib_mad_snoop_private *mad_snoop_priv)
404 {
405 	struct ib_mad_snoop_private **new_snoop_table;
406 	unsigned long flags;
407 	int i;
408 
409 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
410 	/* Check for empty slot in array. */
411 	for (i = 0; i < qp_info->snoop_table_size; i++)
412 		if (!qp_info->snoop_table[i])
413 			break;
414 
415 	if (i == qp_info->snoop_table_size) {
416 		/* Grow table. */
417 		new_snoop_table = krealloc(qp_info->snoop_table,
418 					   sizeof mad_snoop_priv *
419 					   (qp_info->snoop_table_size + 1),
420 					   GFP_ATOMIC);
421 		if (!new_snoop_table) {
422 			i = -ENOMEM;
423 			goto out;
424 		}
425 
426 		qp_info->snoop_table = new_snoop_table;
427 		qp_info->snoop_table_size++;
428 	}
429 	qp_info->snoop_table[i] = mad_snoop_priv;
430 	atomic_inc(&qp_info->snoop_count);
431 out:
432 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
433 	return i;
434 }
435 
436 struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
437 					   u8 port_num,
438 					   enum ib_qp_type qp_type,
439 					   int mad_snoop_flags,
440 					   ib_mad_snoop_handler snoop_handler,
441 					   ib_mad_recv_handler recv_handler,
442 					   void *context)
443 {
444 	struct ib_mad_port_private *port_priv;
445 	struct ib_mad_agent *ret;
446 	struct ib_mad_snoop_private *mad_snoop_priv;
447 	int qpn;
448 
449 	/* Validate parameters */
450 	if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
451 	    (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
452 		ret = ERR_PTR(-EINVAL);
453 		goto error1;
454 	}
455 	qpn = get_spl_qp_index(qp_type);
456 	if (qpn == -1) {
457 		ret = ERR_PTR(-EINVAL);
458 		goto error1;
459 	}
460 	port_priv = ib_get_mad_port(device, port_num);
461 	if (!port_priv) {
462 		ret = ERR_PTR(-ENODEV);
463 		goto error1;
464 	}
465 	/* Allocate structures */
466 	mad_snoop_priv = kzalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
467 	if (!mad_snoop_priv) {
468 		ret = ERR_PTR(-ENOMEM);
469 		goto error1;
470 	}
471 
472 	/* Now, fill in the various structures */
473 	mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
474 	mad_snoop_priv->agent.device = device;
475 	mad_snoop_priv->agent.recv_handler = recv_handler;
476 	mad_snoop_priv->agent.snoop_handler = snoop_handler;
477 	mad_snoop_priv->agent.context = context;
478 	mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
479 	mad_snoop_priv->agent.port_num = port_num;
480 	mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
481 	init_completion(&mad_snoop_priv->comp);
482 	mad_snoop_priv->snoop_index = register_snoop_agent(
483 						&port_priv->qp_info[qpn],
484 						mad_snoop_priv);
485 	if (mad_snoop_priv->snoop_index < 0) {
486 		ret = ERR_PTR(mad_snoop_priv->snoop_index);
487 		goto error2;
488 	}
489 
490 	atomic_set(&mad_snoop_priv->refcount, 1);
491 	return &mad_snoop_priv->agent;
492 
493 error2:
494 	kfree(mad_snoop_priv);
495 error1:
496 	return ret;
497 }
498 EXPORT_SYMBOL(ib_register_mad_snoop);
499 
500 static inline void deref_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
501 {
502 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
503 		complete(&mad_agent_priv->comp);
504 }
505 
506 static inline void deref_snoop_agent(struct ib_mad_snoop_private *mad_snoop_priv)
507 {
508 	if (atomic_dec_and_test(&mad_snoop_priv->refcount))
509 		complete(&mad_snoop_priv->comp);
510 }
511 
512 static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
513 {
514 	struct ib_mad_port_private *port_priv;
515 	unsigned long flags;
516 
517 	/* Note that we could still be handling received MADs */
518 
519 	/*
520 	 * Canceling all sends results in dropping received response
521 	 * MADs, preventing us from queuing additional work
522 	 */
523 	cancel_mads(mad_agent_priv);
524 	port_priv = mad_agent_priv->qp_info->port_priv;
525 	cancel_delayed_work(&mad_agent_priv->timed_work);
526 
527 	spin_lock_irqsave(&port_priv->reg_lock, flags);
528 	remove_mad_reg_req(mad_agent_priv);
529 	list_del(&mad_agent_priv->agent_list);
530 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
531 
532 	flush_workqueue(port_priv->wq);
533 	ib_cancel_rmpp_recvs(mad_agent_priv);
534 
535 	deref_mad_agent(mad_agent_priv);
536 	wait_for_completion(&mad_agent_priv->comp);
537 
538 	kfree(mad_agent_priv->reg_req);
539 	ib_dereg_mr(mad_agent_priv->agent.mr);
540 	kfree(mad_agent_priv);
541 }
542 
543 static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
544 {
545 	struct ib_mad_qp_info *qp_info;
546 	unsigned long flags;
547 
548 	qp_info = mad_snoop_priv->qp_info;
549 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
550 	qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
551 	atomic_dec(&qp_info->snoop_count);
552 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
553 
554 	deref_snoop_agent(mad_snoop_priv);
555 	wait_for_completion(&mad_snoop_priv->comp);
556 
557 	kfree(mad_snoop_priv);
558 }
559 
560 /*
561  * ib_unregister_mad_agent - Unregisters a client from using MAD services
562  */
563 int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
564 {
565 	struct ib_mad_agent_private *mad_agent_priv;
566 	struct ib_mad_snoop_private *mad_snoop_priv;
567 
568 	/* If the TID is zero, the agent can only snoop. */
569 	if (mad_agent->hi_tid) {
570 		mad_agent_priv = container_of(mad_agent,
571 					      struct ib_mad_agent_private,
572 					      agent);
573 		unregister_mad_agent(mad_agent_priv);
574 	} else {
575 		mad_snoop_priv = container_of(mad_agent,
576 					      struct ib_mad_snoop_private,
577 					      agent);
578 		unregister_mad_snoop(mad_snoop_priv);
579 	}
580 	return 0;
581 }
582 EXPORT_SYMBOL(ib_unregister_mad_agent);
583 
584 static void dequeue_mad(struct ib_mad_list_head *mad_list)
585 {
586 	struct ib_mad_queue *mad_queue;
587 	unsigned long flags;
588 
589 	BUG_ON(!mad_list->mad_queue);
590 	mad_queue = mad_list->mad_queue;
591 	spin_lock_irqsave(&mad_queue->lock, flags);
592 	list_del(&mad_list->list);
593 	mad_queue->count--;
594 	spin_unlock_irqrestore(&mad_queue->lock, flags);
595 }
596 
597 static void snoop_send(struct ib_mad_qp_info *qp_info,
598 		       struct ib_mad_send_buf *send_buf,
599 		       struct ib_mad_send_wc *mad_send_wc,
600 		       int mad_snoop_flags)
601 {
602 	struct ib_mad_snoop_private *mad_snoop_priv;
603 	unsigned long flags;
604 	int i;
605 
606 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
607 	for (i = 0; i < qp_info->snoop_table_size; i++) {
608 		mad_snoop_priv = qp_info->snoop_table[i];
609 		if (!mad_snoop_priv ||
610 		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
611 			continue;
612 
613 		atomic_inc(&mad_snoop_priv->refcount);
614 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
615 		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
616 						    send_buf, mad_send_wc);
617 		deref_snoop_agent(mad_snoop_priv);
618 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
619 	}
620 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
621 }
622 
623 static void snoop_recv(struct ib_mad_qp_info *qp_info,
624 		       struct ib_mad_recv_wc *mad_recv_wc,
625 		       int mad_snoop_flags)
626 {
627 	struct ib_mad_snoop_private *mad_snoop_priv;
628 	unsigned long flags;
629 	int i;
630 
631 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
632 	for (i = 0; i < qp_info->snoop_table_size; i++) {
633 		mad_snoop_priv = qp_info->snoop_table[i];
634 		if (!mad_snoop_priv ||
635 		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
636 			continue;
637 
638 		atomic_inc(&mad_snoop_priv->refcount);
639 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
640 		mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
641 						   mad_recv_wc);
642 		deref_snoop_agent(mad_snoop_priv);
643 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
644 	}
645 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
646 }
647 
648 static void build_smp_wc(struct ib_qp *qp,
649 			 u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
650 			 struct ib_wc *wc)
651 {
652 	memset(wc, 0, sizeof *wc);
653 	wc->wr_id = wr_id;
654 	wc->status = IB_WC_SUCCESS;
655 	wc->opcode = IB_WC_RECV;
656 	wc->pkey_index = pkey_index;
657 	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
658 	wc->src_qp = IB_QP0;
659 	wc->qp = qp;
660 	wc->slid = slid;
661 	wc->sl = 0;
662 	wc->dlid_path_bits = 0;
663 	wc->port_num = port_num;
664 }
665 
666 /*
667  * Return 0 if SMP is to be sent
668  * Return 1 if SMP was consumed locally (whether or not solicited)
669  * Return < 0 if error
670  */
671 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
672 				  struct ib_mad_send_wr_private *mad_send_wr)
673 {
674 	int ret = 0;
675 	struct ib_smp *smp = mad_send_wr->send_buf.mad;
676 	unsigned long flags;
677 	struct ib_mad_local_private *local;
678 	struct ib_mad_private *mad_priv;
679 	struct ib_mad_port_private *port_priv;
680 	struct ib_mad_agent_private *recv_mad_agent = NULL;
681 	struct ib_device *device = mad_agent_priv->agent.device;
682 	u8 port_num;
683 	struct ib_wc mad_wc;
684 	struct ib_send_wr *send_wr = &mad_send_wr->send_wr;
685 
686 	if (device->node_type == RDMA_NODE_IB_SWITCH &&
687 	    smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
688 		port_num = send_wr->wr.ud.port_num;
689 	else
690 		port_num = mad_agent_priv->agent.port_num;
691 
692 	/*
693 	 * Directed route handling starts if the initial LID routed part of
694 	 * a request or the ending LID routed part of a response is empty.
695 	 * If we are at the start of the LID routed part, don't update the
696 	 * hop_ptr or hop_cnt.  See section 14.2.2, Vol 1 IB spec.
697 	 */
698 	if ((ib_get_smp_direction(smp) ? smp->dr_dlid : smp->dr_slid) ==
699 	     IB_LID_PERMISSIVE &&
700 	     smi_handle_dr_smp_send(smp, device->node_type, port_num) ==
701 	     IB_SMI_DISCARD) {
702 		ret = -EINVAL;
703 		printk(KERN_ERR PFX "Invalid directed route\n");
704 		goto out;
705 	}
706 
707 	/* Check to post send on QP or process locally */
708 	if (smi_check_local_smp(smp, device) == IB_SMI_DISCARD &&
709 	    smi_check_local_returning_smp(smp, device) == IB_SMI_DISCARD)
710 		goto out;
711 
712 	local = kmalloc(sizeof *local, GFP_ATOMIC);
713 	if (!local) {
714 		ret = -ENOMEM;
715 		printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
716 		goto out;
717 	}
718 	local->mad_priv = NULL;
719 	local->recv_mad_agent = NULL;
720 	mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
721 	if (!mad_priv) {
722 		ret = -ENOMEM;
723 		printk(KERN_ERR PFX "No memory for local response MAD\n");
724 		kfree(local);
725 		goto out;
726 	}
727 
728 	build_smp_wc(mad_agent_priv->agent.qp,
729 		     send_wr->wr_id, be16_to_cpu(smp->dr_slid),
730 		     send_wr->wr.ud.pkey_index,
731 		     send_wr->wr.ud.port_num, &mad_wc);
732 
733 	/* No GRH for DR SMP */
734 	ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
735 				  (struct ib_mad *)smp,
736 				  (struct ib_mad *)&mad_priv->mad);
737 	switch (ret)
738 	{
739 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
740 		if (ib_response_mad(&mad_priv->mad.mad) &&
741 		    mad_agent_priv->agent.recv_handler) {
742 			local->mad_priv = mad_priv;
743 			local->recv_mad_agent = mad_agent_priv;
744 			/*
745 			 * Reference MAD agent until receive
746 			 * side of local completion handled
747 			 */
748 			atomic_inc(&mad_agent_priv->refcount);
749 		} else
750 			kmem_cache_free(ib_mad_cache, mad_priv);
751 		break;
752 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
753 		kmem_cache_free(ib_mad_cache, mad_priv);
754 		break;
755 	case IB_MAD_RESULT_SUCCESS:
756 		/* Treat like an incoming receive MAD */
757 		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
758 					    mad_agent_priv->agent.port_num);
759 		if (port_priv) {
760 			memcpy(&mad_priv->mad.mad, smp, sizeof(struct ib_mad));
761 			recv_mad_agent = find_mad_agent(port_priv,
762 						        &mad_priv->mad.mad);
763 		}
764 		if (!port_priv || !recv_mad_agent) {
765 			/*
766 			 * No receiving agent so drop packet and
767 			 * generate send completion.
768 			 */
769 			kmem_cache_free(ib_mad_cache, mad_priv);
770 			break;
771 		}
772 		local->mad_priv = mad_priv;
773 		local->recv_mad_agent = recv_mad_agent;
774 		break;
775 	default:
776 		kmem_cache_free(ib_mad_cache, mad_priv);
777 		kfree(local);
778 		ret = -EINVAL;
779 		goto out;
780 	}
781 
782 	local->mad_send_wr = mad_send_wr;
783 	/* Reference MAD agent until send side of local completion handled */
784 	atomic_inc(&mad_agent_priv->refcount);
785 	/* Queue local completion to local list */
786 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
787 	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
788 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
789 	queue_work(mad_agent_priv->qp_info->port_priv->wq,
790 		   &mad_agent_priv->local_work);
791 	ret = 1;
792 out:
793 	return ret;
794 }
795 
796 static int get_pad_size(int hdr_len, int data_len)
797 {
798 	int seg_size, pad;
799 
800 	seg_size = sizeof(struct ib_mad) - hdr_len;
801 	if (data_len && seg_size) {
802 		pad = seg_size - data_len % seg_size;
803 		return pad == seg_size ? 0 : pad;
804 	} else
805 		return seg_size;
806 }
807 
808 static void free_send_rmpp_list(struct ib_mad_send_wr_private *mad_send_wr)
809 {
810 	struct ib_rmpp_segment *s, *t;
811 
812 	list_for_each_entry_safe(s, t, &mad_send_wr->rmpp_list, list) {
813 		list_del(&s->list);
814 		kfree(s);
815 	}
816 }
817 
818 static int alloc_send_rmpp_list(struct ib_mad_send_wr_private *send_wr,
819 				gfp_t gfp_mask)
820 {
821 	struct ib_mad_send_buf *send_buf = &send_wr->send_buf;
822 	struct ib_rmpp_mad *rmpp_mad = send_buf->mad;
823 	struct ib_rmpp_segment *seg = NULL;
824 	int left, seg_size, pad;
825 
826 	send_buf->seg_size = sizeof (struct ib_mad) - send_buf->hdr_len;
827 	seg_size = send_buf->seg_size;
828 	pad = send_wr->pad;
829 
830 	/* Allocate data segments. */
831 	for (left = send_buf->data_len + pad; left > 0; left -= seg_size) {
832 		seg = kmalloc(sizeof (*seg) + seg_size, gfp_mask);
833 		if (!seg) {
834 			printk(KERN_ERR "alloc_send_rmpp_segs: RMPP mem "
835 			       "alloc failed for len %zd, gfp %#x\n",
836 			       sizeof (*seg) + seg_size, gfp_mask);
837 			free_send_rmpp_list(send_wr);
838 			return -ENOMEM;
839 		}
840 		seg->num = ++send_buf->seg_count;
841 		list_add_tail(&seg->list, &send_wr->rmpp_list);
842 	}
843 
844 	/* Zero any padding */
845 	if (pad)
846 		memset(seg->data + seg_size - pad, 0, pad);
847 
848 	rmpp_mad->rmpp_hdr.rmpp_version = send_wr->mad_agent_priv->
849 					  agent.rmpp_version;
850 	rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
851 	ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr, IB_MGMT_RMPP_FLAG_ACTIVE);
852 
853 	send_wr->cur_seg = container_of(send_wr->rmpp_list.next,
854 					struct ib_rmpp_segment, list);
855 	send_wr->last_ack_seg = send_wr->cur_seg;
856 	return 0;
857 }
858 
859 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
860 					    u32 remote_qpn, u16 pkey_index,
861 					    int rmpp_active,
862 					    int hdr_len, int data_len,
863 					    gfp_t gfp_mask)
864 {
865 	struct ib_mad_agent_private *mad_agent_priv;
866 	struct ib_mad_send_wr_private *mad_send_wr;
867 	int pad, message_size, ret, size;
868 	void *buf;
869 
870 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
871 				      agent);
872 	pad = get_pad_size(hdr_len, data_len);
873 	message_size = hdr_len + data_len + pad;
874 
875 	if ((!mad_agent->rmpp_version &&
876 	     (rmpp_active || message_size > sizeof(struct ib_mad))) ||
877 	    (!rmpp_active && message_size > sizeof(struct ib_mad)))
878 		return ERR_PTR(-EINVAL);
879 
880 	size = rmpp_active ? hdr_len : sizeof(struct ib_mad);
881 	buf = kzalloc(sizeof *mad_send_wr + size, gfp_mask);
882 	if (!buf)
883 		return ERR_PTR(-ENOMEM);
884 
885 	mad_send_wr = buf + size;
886 	INIT_LIST_HEAD(&mad_send_wr->rmpp_list);
887 	mad_send_wr->send_buf.mad = buf;
888 	mad_send_wr->send_buf.hdr_len = hdr_len;
889 	mad_send_wr->send_buf.data_len = data_len;
890 	mad_send_wr->pad = pad;
891 
892 	mad_send_wr->mad_agent_priv = mad_agent_priv;
893 	mad_send_wr->sg_list[0].length = hdr_len;
894 	mad_send_wr->sg_list[0].lkey = mad_agent->mr->lkey;
895 	mad_send_wr->sg_list[1].length = sizeof(struct ib_mad) - hdr_len;
896 	mad_send_wr->sg_list[1].lkey = mad_agent->mr->lkey;
897 
898 	mad_send_wr->send_wr.wr_id = (unsigned long) mad_send_wr;
899 	mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
900 	mad_send_wr->send_wr.num_sge = 2;
901 	mad_send_wr->send_wr.opcode = IB_WR_SEND;
902 	mad_send_wr->send_wr.send_flags = IB_SEND_SIGNALED;
903 	mad_send_wr->send_wr.wr.ud.remote_qpn = remote_qpn;
904 	mad_send_wr->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
905 	mad_send_wr->send_wr.wr.ud.pkey_index = pkey_index;
906 
907 	if (rmpp_active) {
908 		ret = alloc_send_rmpp_list(mad_send_wr, gfp_mask);
909 		if (ret) {
910 			kfree(buf);
911 			return ERR_PTR(ret);
912 		}
913 	}
914 
915 	mad_send_wr->send_buf.mad_agent = mad_agent;
916 	atomic_inc(&mad_agent_priv->refcount);
917 	return &mad_send_wr->send_buf;
918 }
919 EXPORT_SYMBOL(ib_create_send_mad);
920 
921 int ib_get_mad_data_offset(u8 mgmt_class)
922 {
923 	if (mgmt_class == IB_MGMT_CLASS_SUBN_ADM)
924 		return IB_MGMT_SA_HDR;
925 	else if ((mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
926 		 (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
927 		 (mgmt_class == IB_MGMT_CLASS_BIS))
928 		return IB_MGMT_DEVICE_HDR;
929 	else if ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
930 		 (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END))
931 		return IB_MGMT_VENDOR_HDR;
932 	else
933 		return IB_MGMT_MAD_HDR;
934 }
935 EXPORT_SYMBOL(ib_get_mad_data_offset);
936 
937 int ib_is_mad_class_rmpp(u8 mgmt_class)
938 {
939 	if ((mgmt_class == IB_MGMT_CLASS_SUBN_ADM) ||
940 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_MGMT) ||
941 	    (mgmt_class == IB_MGMT_CLASS_DEVICE_ADM) ||
942 	    (mgmt_class == IB_MGMT_CLASS_BIS) ||
943 	    ((mgmt_class >= IB_MGMT_CLASS_VENDOR_RANGE2_START) &&
944 	     (mgmt_class <= IB_MGMT_CLASS_VENDOR_RANGE2_END)))
945 		return 1;
946 	return 0;
947 }
948 EXPORT_SYMBOL(ib_is_mad_class_rmpp);
949 
950 void *ib_get_rmpp_segment(struct ib_mad_send_buf *send_buf, int seg_num)
951 {
952 	struct ib_mad_send_wr_private *mad_send_wr;
953 	struct list_head *list;
954 
955 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
956 				   send_buf);
957 	list = &mad_send_wr->cur_seg->list;
958 
959 	if (mad_send_wr->cur_seg->num < seg_num) {
960 		list_for_each_entry(mad_send_wr->cur_seg, list, list)
961 			if (mad_send_wr->cur_seg->num == seg_num)
962 				break;
963 	} else if (mad_send_wr->cur_seg->num > seg_num) {
964 		list_for_each_entry_reverse(mad_send_wr->cur_seg, list, list)
965 			if (mad_send_wr->cur_seg->num == seg_num)
966 				break;
967 	}
968 	return mad_send_wr->cur_seg->data;
969 }
970 EXPORT_SYMBOL(ib_get_rmpp_segment);
971 
972 static inline void *ib_get_payload(struct ib_mad_send_wr_private *mad_send_wr)
973 {
974 	if (mad_send_wr->send_buf.seg_count)
975 		return ib_get_rmpp_segment(&mad_send_wr->send_buf,
976 					   mad_send_wr->seg_num);
977 	else
978 		return mad_send_wr->send_buf.mad +
979 		       mad_send_wr->send_buf.hdr_len;
980 }
981 
982 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
983 {
984 	struct ib_mad_agent_private *mad_agent_priv;
985 	struct ib_mad_send_wr_private *mad_send_wr;
986 
987 	mad_agent_priv = container_of(send_buf->mad_agent,
988 				      struct ib_mad_agent_private, agent);
989 	mad_send_wr = container_of(send_buf, struct ib_mad_send_wr_private,
990 				   send_buf);
991 
992 	free_send_rmpp_list(mad_send_wr);
993 	kfree(send_buf->mad);
994 	deref_mad_agent(mad_agent_priv);
995 }
996 EXPORT_SYMBOL(ib_free_send_mad);
997 
998 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
999 {
1000 	struct ib_mad_qp_info *qp_info;
1001 	struct list_head *list;
1002 	struct ib_send_wr *bad_send_wr;
1003 	struct ib_mad_agent *mad_agent;
1004 	struct ib_sge *sge;
1005 	unsigned long flags;
1006 	int ret;
1007 
1008 	/* Set WR ID to find mad_send_wr upon completion */
1009 	qp_info = mad_send_wr->mad_agent_priv->qp_info;
1010 	mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
1011 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
1012 
1013 	mad_agent = mad_send_wr->send_buf.mad_agent;
1014 	sge = mad_send_wr->sg_list;
1015 	sge[0].addr = ib_dma_map_single(mad_agent->device,
1016 					mad_send_wr->send_buf.mad,
1017 					sge[0].length,
1018 					DMA_TO_DEVICE);
1019 	mad_send_wr->header_mapping = sge[0].addr;
1020 
1021 	sge[1].addr = ib_dma_map_single(mad_agent->device,
1022 					ib_get_payload(mad_send_wr),
1023 					sge[1].length,
1024 					DMA_TO_DEVICE);
1025 	mad_send_wr->payload_mapping = sge[1].addr;
1026 
1027 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1028 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
1029 		ret = ib_post_send(mad_agent->qp, &mad_send_wr->send_wr,
1030 				   &bad_send_wr);
1031 		list = &qp_info->send_queue.list;
1032 	} else {
1033 		ret = 0;
1034 		list = &qp_info->overflow_list;
1035 	}
1036 
1037 	if (!ret) {
1038 		qp_info->send_queue.count++;
1039 		list_add_tail(&mad_send_wr->mad_list.list, list);
1040 	}
1041 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1042 	if (ret) {
1043 		ib_dma_unmap_single(mad_agent->device,
1044 				    mad_send_wr->header_mapping,
1045 				    sge[0].length, DMA_TO_DEVICE);
1046 		ib_dma_unmap_single(mad_agent->device,
1047 				    mad_send_wr->payload_mapping,
1048 				    sge[1].length, DMA_TO_DEVICE);
1049 	}
1050 	return ret;
1051 }
1052 
1053 /*
1054  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
1055  *  with the registered client
1056  */
1057 int ib_post_send_mad(struct ib_mad_send_buf *send_buf,
1058 		     struct ib_mad_send_buf **bad_send_buf)
1059 {
1060 	struct ib_mad_agent_private *mad_agent_priv;
1061 	struct ib_mad_send_buf *next_send_buf;
1062 	struct ib_mad_send_wr_private *mad_send_wr;
1063 	unsigned long flags;
1064 	int ret = -EINVAL;
1065 
1066 	/* Walk list of send WRs and post each on send list */
1067 	for (; send_buf; send_buf = next_send_buf) {
1068 
1069 		mad_send_wr = container_of(send_buf,
1070 					   struct ib_mad_send_wr_private,
1071 					   send_buf);
1072 		mad_agent_priv = mad_send_wr->mad_agent_priv;
1073 
1074 		if (!send_buf->mad_agent->send_handler ||
1075 		    (send_buf->timeout_ms &&
1076 		     !send_buf->mad_agent->recv_handler)) {
1077 			ret = -EINVAL;
1078 			goto error;
1079 		}
1080 
1081 		if (!ib_is_mad_class_rmpp(((struct ib_mad_hdr *) send_buf->mad)->mgmt_class)) {
1082 			if (mad_agent_priv->agent.rmpp_version) {
1083 				ret = -EINVAL;
1084 				goto error;
1085 			}
1086 		}
1087 
1088 		/*
1089 		 * Save pointer to next work request to post in case the
1090 		 * current one completes, and the user modifies the work
1091 		 * request associated with the completion
1092 		 */
1093 		next_send_buf = send_buf->next;
1094 		mad_send_wr->send_wr.wr.ud.ah = send_buf->ah;
1095 
1096 		if (((struct ib_mad_hdr *) send_buf->mad)->mgmt_class ==
1097 		    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1098 			ret = handle_outgoing_dr_smp(mad_agent_priv,
1099 						     mad_send_wr);
1100 			if (ret < 0)		/* error */
1101 				goto error;
1102 			else if (ret == 1)	/* locally consumed */
1103 				continue;
1104 		}
1105 
1106 		mad_send_wr->tid = ((struct ib_mad_hdr *) send_buf->mad)->tid;
1107 		/* Timeout will be updated after send completes */
1108 		mad_send_wr->timeout = msecs_to_jiffies(send_buf->timeout_ms);
1109 		mad_send_wr->max_retries = send_buf->retries;
1110 		mad_send_wr->retries_left = send_buf->retries;
1111 		send_buf->retries = 0;
1112 		/* Reference for work request to QP + response */
1113 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
1114 		mad_send_wr->status = IB_WC_SUCCESS;
1115 
1116 		/* Reference MAD agent until send completes */
1117 		atomic_inc(&mad_agent_priv->refcount);
1118 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1119 		list_add_tail(&mad_send_wr->agent_list,
1120 			      &mad_agent_priv->send_list);
1121 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1122 
1123 		if (mad_agent_priv->agent.rmpp_version) {
1124 			ret = ib_send_rmpp_mad(mad_send_wr);
1125 			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
1126 				ret = ib_send_mad(mad_send_wr);
1127 		} else
1128 			ret = ib_send_mad(mad_send_wr);
1129 		if (ret < 0) {
1130 			/* Fail send request */
1131 			spin_lock_irqsave(&mad_agent_priv->lock, flags);
1132 			list_del(&mad_send_wr->agent_list);
1133 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1134 			atomic_dec(&mad_agent_priv->refcount);
1135 			goto error;
1136 		}
1137 	}
1138 	return 0;
1139 error:
1140 	if (bad_send_buf)
1141 		*bad_send_buf = send_buf;
1142 	return ret;
1143 }
1144 EXPORT_SYMBOL(ib_post_send_mad);
1145 
1146 /*
1147  * ib_free_recv_mad - Returns data buffers used to receive
1148  *  a MAD to the access layer
1149  */
1150 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1151 {
1152 	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1153 	struct ib_mad_private_header *mad_priv_hdr;
1154 	struct ib_mad_private *priv;
1155 	struct list_head free_list;
1156 
1157 	INIT_LIST_HEAD(&free_list);
1158 	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1159 
1160 	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1161 					&free_list, list) {
1162 		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1163 					   recv_buf);
1164 		mad_priv_hdr = container_of(mad_recv_wc,
1165 					    struct ib_mad_private_header,
1166 					    recv_wc);
1167 		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1168 				    header);
1169 		kmem_cache_free(ib_mad_cache, priv);
1170 	}
1171 }
1172 EXPORT_SYMBOL(ib_free_recv_mad);
1173 
1174 struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
1175 					u8 rmpp_version,
1176 					ib_mad_send_handler send_handler,
1177 					ib_mad_recv_handler recv_handler,
1178 					void *context)
1179 {
1180 	return ERR_PTR(-EINVAL);	/* XXX: for now */
1181 }
1182 EXPORT_SYMBOL(ib_redirect_mad_qp);
1183 
1184 int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
1185 		      struct ib_wc *wc)
1186 {
1187 	printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
1188 	return 0;
1189 }
1190 EXPORT_SYMBOL(ib_process_mad_wc);
1191 
1192 static int method_in_use(struct ib_mad_mgmt_method_table **method,
1193 			 struct ib_mad_reg_req *mad_reg_req)
1194 {
1195 	int i;
1196 
1197 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS) {
1198 		if ((*method)->agent[i]) {
1199 			printk(KERN_ERR PFX "Method %d already in use\n", i);
1200 			return -EINVAL;
1201 		}
1202 	}
1203 	return 0;
1204 }
1205 
1206 static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1207 {
1208 	/* Allocate management method table */
1209 	*method = kzalloc(sizeof **method, GFP_ATOMIC);
1210 	if (!*method) {
1211 		printk(KERN_ERR PFX "No memory for "
1212 		       "ib_mad_mgmt_method_table\n");
1213 		return -ENOMEM;
1214 	}
1215 
1216 	return 0;
1217 }
1218 
1219 /*
1220  * Check to see if there are any methods still in use
1221  */
1222 static int check_method_table(struct ib_mad_mgmt_method_table *method)
1223 {
1224 	int i;
1225 
1226 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1227 		if (method->agent[i])
1228 			return 1;
1229 	return 0;
1230 }
1231 
1232 /*
1233  * Check to see if there are any method tables for this class still in use
1234  */
1235 static int check_class_table(struct ib_mad_mgmt_class_table *class)
1236 {
1237 	int i;
1238 
1239 	for (i = 0; i < MAX_MGMT_CLASS; i++)
1240 		if (class->method_table[i])
1241 			return 1;
1242 	return 0;
1243 }
1244 
1245 static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1246 {
1247 	int i;
1248 
1249 	for (i = 0; i < MAX_MGMT_OUI; i++)
1250 		if (vendor_class->method_table[i])
1251 			return 1;
1252 	return 0;
1253 }
1254 
1255 static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1256 			   char *oui)
1257 {
1258 	int i;
1259 
1260 	for (i = 0; i < MAX_MGMT_OUI; i++)
1261 		/* Is there matching OUI for this vendor class ? */
1262 		if (!memcmp(vendor_class->oui[i], oui, 3))
1263 			return i;
1264 
1265 	return -1;
1266 }
1267 
1268 static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1269 {
1270 	int i;
1271 
1272 	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1273 		if (vendor->vendor_class[i])
1274 			return 1;
1275 
1276 	return 0;
1277 }
1278 
1279 static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1280 				     struct ib_mad_agent_private *agent)
1281 {
1282 	int i;
1283 
1284 	/* Remove any methods for this mad agent */
1285 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1286 		if (method->agent[i] == agent) {
1287 			method->agent[i] = NULL;
1288 		}
1289 	}
1290 }
1291 
1292 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1293 			      struct ib_mad_agent_private *agent_priv,
1294 			      u8 mgmt_class)
1295 {
1296 	struct ib_mad_port_private *port_priv;
1297 	struct ib_mad_mgmt_class_table **class;
1298 	struct ib_mad_mgmt_method_table **method;
1299 	int i, ret;
1300 
1301 	port_priv = agent_priv->qp_info->port_priv;
1302 	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1303 	if (!*class) {
1304 		/* Allocate management class table for "new" class version */
1305 		*class = kzalloc(sizeof **class, GFP_ATOMIC);
1306 		if (!*class) {
1307 			printk(KERN_ERR PFX "No memory for "
1308 			       "ib_mad_mgmt_class_table\n");
1309 			ret = -ENOMEM;
1310 			goto error1;
1311 		}
1312 
1313 		/* Allocate method table for this management class */
1314 		method = &(*class)->method_table[mgmt_class];
1315 		if ((ret = allocate_method_table(method)))
1316 			goto error2;
1317 	} else {
1318 		method = &(*class)->method_table[mgmt_class];
1319 		if (!*method) {
1320 			/* Allocate method table for this management class */
1321 			if ((ret = allocate_method_table(method)))
1322 				goto error1;
1323 		}
1324 	}
1325 
1326 	/* Now, make sure methods are not already in use */
1327 	if (method_in_use(method, mad_reg_req))
1328 		goto error3;
1329 
1330 	/* Finally, add in methods being registered */
1331 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1332 		(*method)->agent[i] = agent_priv;
1333 
1334 	return 0;
1335 
1336 error3:
1337 	/* Remove any methods for this mad agent */
1338 	remove_methods_mad_agent(*method, agent_priv);
1339 	/* Now, check to see if there are any methods in use */
1340 	if (!check_method_table(*method)) {
1341 		/* If not, release management method table */
1342 		kfree(*method);
1343 		*method = NULL;
1344 	}
1345 	ret = -EINVAL;
1346 	goto error1;
1347 error2:
1348 	kfree(*class);
1349 	*class = NULL;
1350 error1:
1351 	return ret;
1352 }
1353 
1354 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1355 			   struct ib_mad_agent_private *agent_priv)
1356 {
1357 	struct ib_mad_port_private *port_priv;
1358 	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1359 	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1360 	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1361 	struct ib_mad_mgmt_method_table **method;
1362 	int i, ret = -ENOMEM;
1363 	u8 vclass;
1364 
1365 	/* "New" vendor (with OUI) class */
1366 	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1367 	port_priv = agent_priv->qp_info->port_priv;
1368 	vendor_table = &port_priv->version[
1369 				mad_reg_req->mgmt_class_version].vendor;
1370 	if (!*vendor_table) {
1371 		/* Allocate mgmt vendor class table for "new" class version */
1372 		vendor = kzalloc(sizeof *vendor, GFP_ATOMIC);
1373 		if (!vendor) {
1374 			printk(KERN_ERR PFX "No memory for "
1375 			       "ib_mad_mgmt_vendor_class_table\n");
1376 			goto error1;
1377 		}
1378 
1379 		*vendor_table = vendor;
1380 	}
1381 	if (!(*vendor_table)->vendor_class[vclass]) {
1382 		/* Allocate table for this management vendor class */
1383 		vendor_class = kzalloc(sizeof *vendor_class, GFP_ATOMIC);
1384 		if (!vendor_class) {
1385 			printk(KERN_ERR PFX "No memory for "
1386 			       "ib_mad_mgmt_vendor_class\n");
1387 			goto error2;
1388 		}
1389 
1390 		(*vendor_table)->vendor_class[vclass] = vendor_class;
1391 	}
1392 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1393 		/* Is there matching OUI for this vendor class ? */
1394 		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1395 			    mad_reg_req->oui, 3)) {
1396 			method = &(*vendor_table)->vendor_class[
1397 						vclass]->method_table[i];
1398 			BUG_ON(!*method);
1399 			goto check_in_use;
1400 		}
1401 	}
1402 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1403 		/* OUI slot available ? */
1404 		if (!is_vendor_oui((*vendor_table)->vendor_class[
1405 				vclass]->oui[i])) {
1406 			method = &(*vendor_table)->vendor_class[
1407 				vclass]->method_table[i];
1408 			BUG_ON(*method);
1409 			/* Allocate method table for this OUI */
1410 			if ((ret = allocate_method_table(method)))
1411 				goto error3;
1412 			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1413 			       mad_reg_req->oui, 3);
1414 			goto check_in_use;
1415 		}
1416 	}
1417 	printk(KERN_ERR PFX "All OUI slots in use\n");
1418 	goto error3;
1419 
1420 check_in_use:
1421 	/* Now, make sure methods are not already in use */
1422 	if (method_in_use(method, mad_reg_req))
1423 		goto error4;
1424 
1425 	/* Finally, add in methods being registered */
1426 	for_each_set_bit(i, mad_reg_req->method_mask, IB_MGMT_MAX_METHODS)
1427 		(*method)->agent[i] = agent_priv;
1428 
1429 	return 0;
1430 
1431 error4:
1432 	/* Remove any methods for this mad agent */
1433 	remove_methods_mad_agent(*method, agent_priv);
1434 	/* Now, check to see if there are any methods in use */
1435 	if (!check_method_table(*method)) {
1436 		/* If not, release management method table */
1437 		kfree(*method);
1438 		*method = NULL;
1439 	}
1440 	ret = -EINVAL;
1441 error3:
1442 	if (vendor_class) {
1443 		(*vendor_table)->vendor_class[vclass] = NULL;
1444 		kfree(vendor_class);
1445 	}
1446 error2:
1447 	if (vendor) {
1448 		*vendor_table = NULL;
1449 		kfree(vendor);
1450 	}
1451 error1:
1452 	return ret;
1453 }
1454 
1455 static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1456 {
1457 	struct ib_mad_port_private *port_priv;
1458 	struct ib_mad_mgmt_class_table *class;
1459 	struct ib_mad_mgmt_method_table *method;
1460 	struct ib_mad_mgmt_vendor_class_table *vendor;
1461 	struct ib_mad_mgmt_vendor_class *vendor_class;
1462 	int index;
1463 	u8 mgmt_class;
1464 
1465 	/*
1466 	 * Was MAD registration request supplied
1467 	 * with original registration ?
1468 	 */
1469 	if (!agent_priv->reg_req) {
1470 		goto out;
1471 	}
1472 
1473 	port_priv = agent_priv->qp_info->port_priv;
1474 	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1475 	class = port_priv->version[
1476 			agent_priv->reg_req->mgmt_class_version].class;
1477 	if (!class)
1478 		goto vendor_check;
1479 
1480 	method = class->method_table[mgmt_class];
1481 	if (method) {
1482 		/* Remove any methods for this mad agent */
1483 		remove_methods_mad_agent(method, agent_priv);
1484 		/* Now, check to see if there are any methods still in use */
1485 		if (!check_method_table(method)) {
1486 			/* If not, release management method table */
1487 			 kfree(method);
1488 			 class->method_table[mgmt_class] = NULL;
1489 			 /* Any management classes left ? */
1490 			if (!check_class_table(class)) {
1491 				/* If not, release management class table */
1492 				kfree(class);
1493 				port_priv->version[
1494 					agent_priv->reg_req->
1495 					mgmt_class_version].class = NULL;
1496 			}
1497 		}
1498 	}
1499 
1500 vendor_check:
1501 	if (!is_vendor_class(mgmt_class))
1502 		goto out;
1503 
1504 	/* normalize mgmt_class to vendor range 2 */
1505 	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1506 	vendor = port_priv->version[
1507 			agent_priv->reg_req->mgmt_class_version].vendor;
1508 
1509 	if (!vendor)
1510 		goto out;
1511 
1512 	vendor_class = vendor->vendor_class[mgmt_class];
1513 	if (vendor_class) {
1514 		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1515 		if (index < 0)
1516 			goto out;
1517 		method = vendor_class->method_table[index];
1518 		if (method) {
1519 			/* Remove any methods for this mad agent */
1520 			remove_methods_mad_agent(method, agent_priv);
1521 			/*
1522 			 * Now, check to see if there are
1523 			 * any methods still in use
1524 			 */
1525 			if (!check_method_table(method)) {
1526 				/* If not, release management method table */
1527 				kfree(method);
1528 				vendor_class->method_table[index] = NULL;
1529 				memset(vendor_class->oui[index], 0, 3);
1530 				/* Any OUIs left ? */
1531 				if (!check_vendor_class(vendor_class)) {
1532 					/* If not, release vendor class table */
1533 					kfree(vendor_class);
1534 					vendor->vendor_class[mgmt_class] = NULL;
1535 					/* Any other vendor classes left ? */
1536 					if (!check_vendor_table(vendor)) {
1537 						kfree(vendor);
1538 						port_priv->version[
1539 							agent_priv->reg_req->
1540 							mgmt_class_version].
1541 							vendor = NULL;
1542 					}
1543 				}
1544 			}
1545 		}
1546 	}
1547 
1548 out:
1549 	return;
1550 }
1551 
1552 static struct ib_mad_agent_private *
1553 find_mad_agent(struct ib_mad_port_private *port_priv,
1554 	       struct ib_mad *mad)
1555 {
1556 	struct ib_mad_agent_private *mad_agent = NULL;
1557 	unsigned long flags;
1558 
1559 	spin_lock_irqsave(&port_priv->reg_lock, flags);
1560 	if (ib_response_mad(mad)) {
1561 		u32 hi_tid;
1562 		struct ib_mad_agent_private *entry;
1563 
1564 		/*
1565 		 * Routing is based on high 32 bits of transaction ID
1566 		 * of MAD.
1567 		 */
1568 		hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
1569 		list_for_each_entry(entry, &port_priv->agent_list, agent_list) {
1570 			if (entry->agent.hi_tid == hi_tid) {
1571 				mad_agent = entry;
1572 				break;
1573 			}
1574 		}
1575 	} else {
1576 		struct ib_mad_mgmt_class_table *class;
1577 		struct ib_mad_mgmt_method_table *method;
1578 		struct ib_mad_mgmt_vendor_class_table *vendor;
1579 		struct ib_mad_mgmt_vendor_class *vendor_class;
1580 		struct ib_vendor_mad *vendor_mad;
1581 		int index;
1582 
1583 		/*
1584 		 * Routing is based on version, class, and method
1585 		 * For "newer" vendor MADs, also based on OUI
1586 		 */
1587 		if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION)
1588 			goto out;
1589 		if (!is_vendor_class(mad->mad_hdr.mgmt_class)) {
1590 			class = port_priv->version[
1591 					mad->mad_hdr.class_version].class;
1592 			if (!class)
1593 				goto out;
1594 			method = class->method_table[convert_mgmt_class(
1595 							mad->mad_hdr.mgmt_class)];
1596 			if (method)
1597 				mad_agent = method->agent[mad->mad_hdr.method &
1598 							  ~IB_MGMT_METHOD_RESP];
1599 		} else {
1600 			vendor = port_priv->version[
1601 					mad->mad_hdr.class_version].vendor;
1602 			if (!vendor)
1603 				goto out;
1604 			vendor_class = vendor->vendor_class[vendor_class_index(
1605 						mad->mad_hdr.mgmt_class)];
1606 			if (!vendor_class)
1607 				goto out;
1608 			/* Find matching OUI */
1609 			vendor_mad = (struct ib_vendor_mad *)mad;
1610 			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1611 			if (index == -1)
1612 				goto out;
1613 			method = vendor_class->method_table[index];
1614 			if (method) {
1615 				mad_agent = method->agent[mad->mad_hdr.method &
1616 							  ~IB_MGMT_METHOD_RESP];
1617 			}
1618 		}
1619 	}
1620 
1621 	if (mad_agent) {
1622 		if (mad_agent->agent.recv_handler)
1623 			atomic_inc(&mad_agent->refcount);
1624 		else {
1625 			printk(KERN_NOTICE PFX "No receive handler for client "
1626 			       "%p on port %d\n",
1627 			       &mad_agent->agent, port_priv->port_num);
1628 			mad_agent = NULL;
1629 		}
1630 	}
1631 out:
1632 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1633 
1634 	return mad_agent;
1635 }
1636 
1637 static int validate_mad(struct ib_mad *mad, u32 qp_num)
1638 {
1639 	int valid = 0;
1640 
1641 	/* Make sure MAD base version is understood */
1642 	if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
1643 		printk(KERN_ERR PFX "MAD received with unsupported base "
1644 		       "version %d\n", mad->mad_hdr.base_version);
1645 		goto out;
1646 	}
1647 
1648 	/* Filter SMI packets sent to other than QP0 */
1649 	if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1650 	    (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1651 		if (qp_num == 0)
1652 			valid = 1;
1653 	} else {
1654 		/* Filter GSI packets sent to QP0 */
1655 		if (qp_num != 0)
1656 			valid = 1;
1657 	}
1658 
1659 out:
1660 	return valid;
1661 }
1662 
1663 static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
1664 		       struct ib_mad_hdr *mad_hdr)
1665 {
1666 	struct ib_rmpp_mad *rmpp_mad;
1667 
1668 	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1669 	return !mad_agent_priv->agent.rmpp_version ||
1670 		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1671 				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1672 		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1673 }
1674 
1675 static inline int rcv_has_same_class(struct ib_mad_send_wr_private *wr,
1676 				     struct ib_mad_recv_wc *rwc)
1677 {
1678 	return ((struct ib_mad *)(wr->send_buf.mad))->mad_hdr.mgmt_class ==
1679 		rwc->recv_buf.mad->mad_hdr.mgmt_class;
1680 }
1681 
1682 static inline int rcv_has_same_gid(struct ib_mad_agent_private *mad_agent_priv,
1683 				   struct ib_mad_send_wr_private *wr,
1684 				   struct ib_mad_recv_wc *rwc )
1685 {
1686 	struct ib_ah_attr attr;
1687 	u8 send_resp, rcv_resp;
1688 	union ib_gid sgid;
1689 	struct ib_device *device = mad_agent_priv->agent.device;
1690 	u8 port_num = mad_agent_priv->agent.port_num;
1691 	u8 lmc;
1692 
1693 	send_resp = ib_response_mad((struct ib_mad *)wr->send_buf.mad);
1694 	rcv_resp = ib_response_mad(rwc->recv_buf.mad);
1695 
1696 	if (send_resp == rcv_resp)
1697 		/* both requests, or both responses. GIDs different */
1698 		return 0;
1699 
1700 	if (ib_query_ah(wr->send_buf.ah, &attr))
1701 		/* Assume not equal, to avoid false positives. */
1702 		return 0;
1703 
1704 	if (!!(attr.ah_flags & IB_AH_GRH) !=
1705 	    !!(rwc->wc->wc_flags & IB_WC_GRH))
1706 		/* one has GID, other does not.  Assume different */
1707 		return 0;
1708 
1709 	if (!send_resp && rcv_resp) {
1710 		/* is request/response. */
1711 		if (!(attr.ah_flags & IB_AH_GRH)) {
1712 			if (ib_get_cached_lmc(device, port_num, &lmc))
1713 				return 0;
1714 			return (!lmc || !((attr.src_path_bits ^
1715 					   rwc->wc->dlid_path_bits) &
1716 					  ((1 << lmc) - 1)));
1717 		} else {
1718 			if (ib_get_cached_gid(device, port_num,
1719 					      attr.grh.sgid_index, &sgid))
1720 				return 0;
1721 			return !memcmp(sgid.raw, rwc->recv_buf.grh->dgid.raw,
1722 				       16);
1723 		}
1724 	}
1725 
1726 	if (!(attr.ah_flags & IB_AH_GRH))
1727 		return attr.dlid == rwc->wc->slid;
1728 	else
1729 		return !memcmp(attr.grh.dgid.raw, rwc->recv_buf.grh->sgid.raw,
1730 			       16);
1731 }
1732 
1733 static inline int is_direct(u8 class)
1734 {
1735 	return (class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE);
1736 }
1737 
1738 struct ib_mad_send_wr_private*
1739 ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv,
1740 		 struct ib_mad_recv_wc *wc)
1741 {
1742 	struct ib_mad_send_wr_private *wr;
1743 	struct ib_mad *mad;
1744 
1745 	mad = (struct ib_mad *)wc->recv_buf.mad;
1746 
1747 	list_for_each_entry(wr, &mad_agent_priv->wait_list, agent_list) {
1748 		if ((wr->tid == mad->mad_hdr.tid) &&
1749 		    rcv_has_same_class(wr, wc) &&
1750 		    /*
1751 		     * Don't check GID for direct routed MADs.
1752 		     * These might have permissive LIDs.
1753 		     */
1754 		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1755 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1756 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1757 	}
1758 
1759 	/*
1760 	 * It's possible to receive the response before we've
1761 	 * been notified that the send has completed
1762 	 */
1763 	list_for_each_entry(wr, &mad_agent_priv->send_list, agent_list) {
1764 		if (is_data_mad(mad_agent_priv, wr->send_buf.mad) &&
1765 		    wr->tid == mad->mad_hdr.tid &&
1766 		    wr->timeout &&
1767 		    rcv_has_same_class(wr, wc) &&
1768 		    /*
1769 		     * Don't check GID for direct routed MADs.
1770 		     * These might have permissive LIDs.
1771 		     */
1772 		    (is_direct(wc->recv_buf.mad->mad_hdr.mgmt_class) ||
1773 		     rcv_has_same_gid(mad_agent_priv, wr, wc)))
1774 			/* Verify request has not been canceled */
1775 			return (wr->status == IB_WC_SUCCESS) ? wr : NULL;
1776 	}
1777 	return NULL;
1778 }
1779 
1780 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1781 {
1782 	mad_send_wr->timeout = 0;
1783 	if (mad_send_wr->refcount == 1)
1784 		list_move_tail(&mad_send_wr->agent_list,
1785 			      &mad_send_wr->mad_agent_priv->done_list);
1786 }
1787 
1788 static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1789 				 struct ib_mad_recv_wc *mad_recv_wc)
1790 {
1791 	struct ib_mad_send_wr_private *mad_send_wr;
1792 	struct ib_mad_send_wc mad_send_wc;
1793 	unsigned long flags;
1794 
1795 	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1796 	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1797 	if (mad_agent_priv->agent.rmpp_version) {
1798 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1799 						      mad_recv_wc);
1800 		if (!mad_recv_wc) {
1801 			deref_mad_agent(mad_agent_priv);
1802 			return;
1803 		}
1804 	}
1805 
1806 	/* Complete corresponding request */
1807 	if (ib_response_mad(mad_recv_wc->recv_buf.mad)) {
1808 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1809 		mad_send_wr = ib_find_send_mad(mad_agent_priv, mad_recv_wc);
1810 		if (!mad_send_wr) {
1811 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1812 			ib_free_recv_mad(mad_recv_wc);
1813 			deref_mad_agent(mad_agent_priv);
1814 			return;
1815 		}
1816 		ib_mark_mad_done(mad_send_wr);
1817 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1818 
1819 		/* Defined behavior is to complete response before request */
1820 		mad_recv_wc->wc->wr_id = (unsigned long) &mad_send_wr->send_buf;
1821 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1822 						   mad_recv_wc);
1823 		atomic_dec(&mad_agent_priv->refcount);
1824 
1825 		mad_send_wc.status = IB_WC_SUCCESS;
1826 		mad_send_wc.vendor_err = 0;
1827 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
1828 		ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1829 	} else {
1830 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1831 						   mad_recv_wc);
1832 		deref_mad_agent(mad_agent_priv);
1833 	}
1834 }
1835 
1836 static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1837 				     struct ib_wc *wc)
1838 {
1839 	struct ib_mad_qp_info *qp_info;
1840 	struct ib_mad_private_header *mad_priv_hdr;
1841 	struct ib_mad_private *recv, *response = NULL;
1842 	struct ib_mad_list_head *mad_list;
1843 	struct ib_mad_agent_private *mad_agent;
1844 	int port_num;
1845 
1846 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1847 	qp_info = mad_list->mad_queue->qp_info;
1848 	dequeue_mad(mad_list);
1849 
1850 	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
1851 				    mad_list);
1852 	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
1853 	ib_dma_unmap_single(port_priv->device,
1854 			    recv->header.mapping,
1855 			    sizeof(struct ib_mad_private) -
1856 			      sizeof(struct ib_mad_private_header),
1857 			    DMA_FROM_DEVICE);
1858 
1859 	/* Setup MAD receive work completion from "normal" work completion */
1860 	recv->header.wc = *wc;
1861 	recv->header.recv_wc.wc = &recv->header.wc;
1862 	recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
1863 	recv->header.recv_wc.recv_buf.mad = &recv->mad.mad;
1864 	recv->header.recv_wc.recv_buf.grh = &recv->grh;
1865 
1866 	if (atomic_read(&qp_info->snoop_count))
1867 		snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
1868 
1869 	/* Validate MAD */
1870 	if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
1871 		goto out;
1872 
1873 	response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1874 	if (!response) {
1875 		printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
1876 		       "for response buffer\n");
1877 		goto out;
1878 	}
1879 
1880 	if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH)
1881 		port_num = wc->port_num;
1882 	else
1883 		port_num = port_priv->port_num;
1884 
1885 	if (recv->mad.mad.mad_hdr.mgmt_class ==
1886 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1887 		enum smi_forward_action retsmi;
1888 
1889 		if (smi_handle_dr_smp_recv(&recv->mad.smp,
1890 					   port_priv->device->node_type,
1891 					   port_num,
1892 					   port_priv->device->phys_port_cnt) ==
1893 					   IB_SMI_DISCARD)
1894 			goto out;
1895 
1896 		retsmi = smi_check_forward_dr_smp(&recv->mad.smp);
1897 		if (retsmi == IB_SMI_LOCAL)
1898 			goto local;
1899 
1900 		if (retsmi == IB_SMI_SEND) { /* don't forward */
1901 			if (smi_handle_dr_smp_send(&recv->mad.smp,
1902 						   port_priv->device->node_type,
1903 						   port_num) == IB_SMI_DISCARD)
1904 				goto out;
1905 
1906 			if (smi_check_local_smp(&recv->mad.smp, port_priv->device) == IB_SMI_DISCARD)
1907 				goto out;
1908 		} else if (port_priv->device->node_type == RDMA_NODE_IB_SWITCH) {
1909 			/* forward case for switches */
1910 			memcpy(response, recv, sizeof(*response));
1911 			response->header.recv_wc.wc = &response->header.wc;
1912 			response->header.recv_wc.recv_buf.mad = &response->mad.mad;
1913 			response->header.recv_wc.recv_buf.grh = &response->grh;
1914 
1915 			agent_send_response(&response->mad.mad,
1916 					    &response->grh, wc,
1917 					    port_priv->device,
1918 					    smi_get_fwd_port(&recv->mad.smp),
1919 					    qp_info->qp->qp_num);
1920 
1921 			goto out;
1922 		}
1923 	}
1924 
1925 local:
1926 	/* Give driver "right of first refusal" on incoming MAD */
1927 	if (port_priv->device->process_mad) {
1928 		int ret;
1929 
1930 		ret = port_priv->device->process_mad(port_priv->device, 0,
1931 						     port_priv->port_num,
1932 						     wc, &recv->grh,
1933 						     &recv->mad.mad,
1934 						     &response->mad.mad);
1935 		if (ret & IB_MAD_RESULT_SUCCESS) {
1936 			if (ret & IB_MAD_RESULT_CONSUMED)
1937 				goto out;
1938 			if (ret & IB_MAD_RESULT_REPLY) {
1939 				agent_send_response(&response->mad.mad,
1940 						    &recv->grh, wc,
1941 						    port_priv->device,
1942 						    port_num,
1943 						    qp_info->qp->qp_num);
1944 				goto out;
1945 			}
1946 		}
1947 	}
1948 
1949 	mad_agent = find_mad_agent(port_priv, &recv->mad.mad);
1950 	if (mad_agent) {
1951 		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
1952 		/*
1953 		 * recv is freed up in error cases in ib_mad_complete_recv
1954 		 * or via recv_handler in ib_mad_complete_recv()
1955 		 */
1956 		recv = NULL;
1957 	}
1958 
1959 out:
1960 	/* Post another receive request for this QP */
1961 	if (response) {
1962 		ib_mad_post_receive_mads(qp_info, response);
1963 		if (recv)
1964 			kmem_cache_free(ib_mad_cache, recv);
1965 	} else
1966 		ib_mad_post_receive_mads(qp_info, recv);
1967 }
1968 
1969 static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
1970 {
1971 	struct ib_mad_send_wr_private *mad_send_wr;
1972 	unsigned long delay;
1973 
1974 	if (list_empty(&mad_agent_priv->wait_list)) {
1975 		__cancel_delayed_work(&mad_agent_priv->timed_work);
1976 	} else {
1977 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
1978 					 struct ib_mad_send_wr_private,
1979 					 agent_list);
1980 
1981 		if (time_after(mad_agent_priv->timeout,
1982 			       mad_send_wr->timeout)) {
1983 			mad_agent_priv->timeout = mad_send_wr->timeout;
1984 			__cancel_delayed_work(&mad_agent_priv->timed_work);
1985 			delay = mad_send_wr->timeout - jiffies;
1986 			if ((long)delay <= 0)
1987 				delay = 1;
1988 			queue_delayed_work(mad_agent_priv->qp_info->
1989 					   port_priv->wq,
1990 					   &mad_agent_priv->timed_work, delay);
1991 		}
1992 	}
1993 }
1994 
1995 static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
1996 {
1997 	struct ib_mad_agent_private *mad_agent_priv;
1998 	struct ib_mad_send_wr_private *temp_mad_send_wr;
1999 	struct list_head *list_item;
2000 	unsigned long delay;
2001 
2002 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2003 	list_del(&mad_send_wr->agent_list);
2004 
2005 	delay = mad_send_wr->timeout;
2006 	mad_send_wr->timeout += jiffies;
2007 
2008 	if (delay) {
2009 		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
2010 			temp_mad_send_wr = list_entry(list_item,
2011 						struct ib_mad_send_wr_private,
2012 						agent_list);
2013 			if (time_after(mad_send_wr->timeout,
2014 				       temp_mad_send_wr->timeout))
2015 				break;
2016 		}
2017 	}
2018 	else
2019 		list_item = &mad_agent_priv->wait_list;
2020 	list_add(&mad_send_wr->agent_list, list_item);
2021 
2022 	/* Reschedule a work item if we have a shorter timeout */
2023 	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
2024 		__cancel_delayed_work(&mad_agent_priv->timed_work);
2025 		queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
2026 				   &mad_agent_priv->timed_work, delay);
2027 	}
2028 }
2029 
2030 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
2031 			  int timeout_ms)
2032 {
2033 	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2034 	wait_for_response(mad_send_wr);
2035 }
2036 
2037 /*
2038  * Process a send work completion
2039  */
2040 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
2041 			     struct ib_mad_send_wc *mad_send_wc)
2042 {
2043 	struct ib_mad_agent_private	*mad_agent_priv;
2044 	unsigned long			flags;
2045 	int				ret;
2046 
2047 	mad_agent_priv = mad_send_wr->mad_agent_priv;
2048 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2049 	if (mad_agent_priv->agent.rmpp_version) {
2050 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
2051 		if (ret == IB_RMPP_RESULT_CONSUMED)
2052 			goto done;
2053 	} else
2054 		ret = IB_RMPP_RESULT_UNHANDLED;
2055 
2056 	if (mad_send_wc->status != IB_WC_SUCCESS &&
2057 	    mad_send_wr->status == IB_WC_SUCCESS) {
2058 		mad_send_wr->status = mad_send_wc->status;
2059 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2060 	}
2061 
2062 	if (--mad_send_wr->refcount > 0) {
2063 		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
2064 		    mad_send_wr->status == IB_WC_SUCCESS) {
2065 			wait_for_response(mad_send_wr);
2066 		}
2067 		goto done;
2068 	}
2069 
2070 	/* Remove send from MAD agent and notify client of completion */
2071 	list_del(&mad_send_wr->agent_list);
2072 	adjust_timeout(mad_agent_priv);
2073 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2074 
2075 	if (mad_send_wr->status != IB_WC_SUCCESS )
2076 		mad_send_wc->status = mad_send_wr->status;
2077 	if (ret == IB_RMPP_RESULT_INTERNAL)
2078 		ib_rmpp_send_handler(mad_send_wc);
2079 	else
2080 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2081 						   mad_send_wc);
2082 
2083 	/* Release reference on agent taken when sending */
2084 	deref_mad_agent(mad_agent_priv);
2085 	return;
2086 done:
2087 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2088 }
2089 
2090 static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
2091 				     struct ib_wc *wc)
2092 {
2093 	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
2094 	struct ib_mad_list_head		*mad_list;
2095 	struct ib_mad_qp_info		*qp_info;
2096 	struct ib_mad_queue		*send_queue;
2097 	struct ib_send_wr		*bad_send_wr;
2098 	struct ib_mad_send_wc		mad_send_wc;
2099 	unsigned long flags;
2100 	int ret;
2101 
2102 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2103 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2104 				   mad_list);
2105 	send_queue = mad_list->mad_queue;
2106 	qp_info = send_queue->qp_info;
2107 
2108 retry:
2109 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2110 			    mad_send_wr->header_mapping,
2111 			    mad_send_wr->sg_list[0].length, DMA_TO_DEVICE);
2112 	ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent->device,
2113 			    mad_send_wr->payload_mapping,
2114 			    mad_send_wr->sg_list[1].length, DMA_TO_DEVICE);
2115 	queued_send_wr = NULL;
2116 	spin_lock_irqsave(&send_queue->lock, flags);
2117 	list_del(&mad_list->list);
2118 
2119 	/* Move queued send to the send queue */
2120 	if (send_queue->count-- > send_queue->max_active) {
2121 		mad_list = container_of(qp_info->overflow_list.next,
2122 					struct ib_mad_list_head, list);
2123 		queued_send_wr = container_of(mad_list,
2124 					struct ib_mad_send_wr_private,
2125 					mad_list);
2126 		list_move_tail(&mad_list->list, &send_queue->list);
2127 	}
2128 	spin_unlock_irqrestore(&send_queue->lock, flags);
2129 
2130 	mad_send_wc.send_buf = &mad_send_wr->send_buf;
2131 	mad_send_wc.status = wc->status;
2132 	mad_send_wc.vendor_err = wc->vendor_err;
2133 	if (atomic_read(&qp_info->snoop_count))
2134 		snoop_send(qp_info, &mad_send_wr->send_buf, &mad_send_wc,
2135 			   IB_MAD_SNOOP_SEND_COMPLETIONS);
2136 	ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
2137 
2138 	if (queued_send_wr) {
2139 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
2140 				   &bad_send_wr);
2141 		if (ret) {
2142 			printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
2143 			mad_send_wr = queued_send_wr;
2144 			wc->status = IB_WC_LOC_QP_OP_ERR;
2145 			goto retry;
2146 		}
2147 	}
2148 }
2149 
2150 static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
2151 {
2152 	struct ib_mad_send_wr_private *mad_send_wr;
2153 	struct ib_mad_list_head *mad_list;
2154 	unsigned long flags;
2155 
2156 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
2157 	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
2158 		mad_send_wr = container_of(mad_list,
2159 					   struct ib_mad_send_wr_private,
2160 					   mad_list);
2161 		mad_send_wr->retry = 1;
2162 	}
2163 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
2164 }
2165 
2166 static void mad_error_handler(struct ib_mad_port_private *port_priv,
2167 			      struct ib_wc *wc)
2168 {
2169 	struct ib_mad_list_head *mad_list;
2170 	struct ib_mad_qp_info *qp_info;
2171 	struct ib_mad_send_wr_private *mad_send_wr;
2172 	int ret;
2173 
2174 	/* Determine if failure was a send or receive */
2175 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
2176 	qp_info = mad_list->mad_queue->qp_info;
2177 	if (mad_list->mad_queue == &qp_info->recv_queue)
2178 		/*
2179 		 * Receive errors indicate that the QP has entered the error
2180 		 * state - error handling/shutdown code will cleanup
2181 		 */
2182 		return;
2183 
2184 	/*
2185 	 * Send errors will transition the QP to SQE - move
2186 	 * QP to RTS and repost flushed work requests
2187 	 */
2188 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
2189 				   mad_list);
2190 	if (wc->status == IB_WC_WR_FLUSH_ERR) {
2191 		if (mad_send_wr->retry) {
2192 			/* Repost send */
2193 			struct ib_send_wr *bad_send_wr;
2194 
2195 			mad_send_wr->retry = 0;
2196 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
2197 					&bad_send_wr);
2198 			if (ret)
2199 				ib_mad_send_done_handler(port_priv, wc);
2200 		} else
2201 			ib_mad_send_done_handler(port_priv, wc);
2202 	} else {
2203 		struct ib_qp_attr *attr;
2204 
2205 		/* Transition QP to RTS and fail offending send */
2206 		attr = kmalloc(sizeof *attr, GFP_KERNEL);
2207 		if (attr) {
2208 			attr->qp_state = IB_QPS_RTS;
2209 			attr->cur_qp_state = IB_QPS_SQE;
2210 			ret = ib_modify_qp(qp_info->qp, attr,
2211 					   IB_QP_STATE | IB_QP_CUR_STATE);
2212 			kfree(attr);
2213 			if (ret)
2214 				printk(KERN_ERR PFX "mad_error_handler - "
2215 				       "ib_modify_qp to RTS : %d\n", ret);
2216 			else
2217 				mark_sends_for_retry(qp_info);
2218 		}
2219 		ib_mad_send_done_handler(port_priv, wc);
2220 	}
2221 }
2222 
2223 /*
2224  * IB MAD completion callback
2225  */
2226 static void ib_mad_completion_handler(struct work_struct *work)
2227 {
2228 	struct ib_mad_port_private *port_priv;
2229 	struct ib_wc wc;
2230 
2231 	port_priv = container_of(work, struct ib_mad_port_private, work);
2232 	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2233 
2234 	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
2235 		if (wc.status == IB_WC_SUCCESS) {
2236 			switch (wc.opcode) {
2237 			case IB_WC_SEND:
2238 				ib_mad_send_done_handler(port_priv, &wc);
2239 				break;
2240 			case IB_WC_RECV:
2241 				ib_mad_recv_done_handler(port_priv, &wc);
2242 				break;
2243 			default:
2244 				BUG_ON(1);
2245 				break;
2246 			}
2247 		} else
2248 			mad_error_handler(port_priv, &wc);
2249 	}
2250 }
2251 
2252 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2253 {
2254 	unsigned long flags;
2255 	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2256 	struct ib_mad_send_wc mad_send_wc;
2257 	struct list_head cancel_list;
2258 
2259 	INIT_LIST_HEAD(&cancel_list);
2260 
2261 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2262 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2263 				 &mad_agent_priv->send_list, agent_list) {
2264 		if (mad_send_wr->status == IB_WC_SUCCESS) {
2265 			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2266 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2267 		}
2268 	}
2269 
2270 	/* Empty wait list to prevent receives from finding a request */
2271 	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2272 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2273 
2274 	/* Report all cancelled requests */
2275 	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2276 	mad_send_wc.vendor_err = 0;
2277 
2278 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2279 				 &cancel_list, agent_list) {
2280 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2281 		list_del(&mad_send_wr->agent_list);
2282 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2283 						   &mad_send_wc);
2284 		atomic_dec(&mad_agent_priv->refcount);
2285 	}
2286 }
2287 
2288 static struct ib_mad_send_wr_private*
2289 find_send_wr(struct ib_mad_agent_private *mad_agent_priv,
2290 	     struct ib_mad_send_buf *send_buf)
2291 {
2292 	struct ib_mad_send_wr_private *mad_send_wr;
2293 
2294 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2295 			    agent_list) {
2296 		if (&mad_send_wr->send_buf == send_buf)
2297 			return mad_send_wr;
2298 	}
2299 
2300 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2301 			    agent_list) {
2302 		if (is_data_mad(mad_agent_priv, mad_send_wr->send_buf.mad) &&
2303 		    &mad_send_wr->send_buf == send_buf)
2304 			return mad_send_wr;
2305 	}
2306 	return NULL;
2307 }
2308 
2309 int ib_modify_mad(struct ib_mad_agent *mad_agent,
2310 		  struct ib_mad_send_buf *send_buf, u32 timeout_ms)
2311 {
2312 	struct ib_mad_agent_private *mad_agent_priv;
2313 	struct ib_mad_send_wr_private *mad_send_wr;
2314 	unsigned long flags;
2315 	int active;
2316 
2317 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2318 				      agent);
2319 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2320 	mad_send_wr = find_send_wr(mad_agent_priv, send_buf);
2321 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2322 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2323 		return -EINVAL;
2324 	}
2325 
2326 	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2327 	if (!timeout_ms) {
2328 		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2329 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2330 	}
2331 
2332 	mad_send_wr->send_buf.timeout_ms = timeout_ms;
2333 	if (active)
2334 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2335 	else
2336 		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2337 
2338 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2339 	return 0;
2340 }
2341 EXPORT_SYMBOL(ib_modify_mad);
2342 
2343 void ib_cancel_mad(struct ib_mad_agent *mad_agent,
2344 		   struct ib_mad_send_buf *send_buf)
2345 {
2346 	ib_modify_mad(mad_agent, send_buf, 0);
2347 }
2348 EXPORT_SYMBOL(ib_cancel_mad);
2349 
2350 static void local_completions(struct work_struct *work)
2351 {
2352 	struct ib_mad_agent_private *mad_agent_priv;
2353 	struct ib_mad_local_private *local;
2354 	struct ib_mad_agent_private *recv_mad_agent;
2355 	unsigned long flags;
2356 	int free_mad;
2357 	struct ib_wc wc;
2358 	struct ib_mad_send_wc mad_send_wc;
2359 
2360 	mad_agent_priv =
2361 		container_of(work, struct ib_mad_agent_private, local_work);
2362 
2363 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2364 	while (!list_empty(&mad_agent_priv->local_list)) {
2365 		local = list_entry(mad_agent_priv->local_list.next,
2366 				   struct ib_mad_local_private,
2367 				   completion_list);
2368 		list_del(&local->completion_list);
2369 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2370 		free_mad = 0;
2371 		if (local->mad_priv) {
2372 			recv_mad_agent = local->recv_mad_agent;
2373 			if (!recv_mad_agent) {
2374 				printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
2375 				free_mad = 1;
2376 				goto local_send_completion;
2377 			}
2378 
2379 			/*
2380 			 * Defined behavior is to complete response
2381 			 * before request
2382 			 */
2383 			build_smp_wc(recv_mad_agent->agent.qp,
2384 				     (unsigned long) local->mad_send_wr,
2385 				     be16_to_cpu(IB_LID_PERMISSIVE),
2386 				     0, recv_mad_agent->agent.port_num, &wc);
2387 
2388 			local->mad_priv->header.recv_wc.wc = &wc;
2389 			local->mad_priv->header.recv_wc.mad_len =
2390 						sizeof(struct ib_mad);
2391 			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2392 			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2393 				 &local->mad_priv->header.recv_wc.rmpp_list);
2394 			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2395 			local->mad_priv->header.recv_wc.recv_buf.mad =
2396 						&local->mad_priv->mad.mad;
2397 			if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
2398 				snoop_recv(recv_mad_agent->qp_info,
2399 					  &local->mad_priv->header.recv_wc,
2400 					   IB_MAD_SNOOP_RECVS);
2401 			recv_mad_agent->agent.recv_handler(
2402 						&recv_mad_agent->agent,
2403 						&local->mad_priv->header.recv_wc);
2404 			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2405 			atomic_dec(&recv_mad_agent->refcount);
2406 			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2407 		}
2408 
2409 local_send_completion:
2410 		/* Complete send */
2411 		mad_send_wc.status = IB_WC_SUCCESS;
2412 		mad_send_wc.vendor_err = 0;
2413 		mad_send_wc.send_buf = &local->mad_send_wr->send_buf;
2414 		if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
2415 			snoop_send(mad_agent_priv->qp_info,
2416 				   &local->mad_send_wr->send_buf,
2417 				   &mad_send_wc, IB_MAD_SNOOP_SEND_COMPLETIONS);
2418 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2419 						   &mad_send_wc);
2420 
2421 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2422 		atomic_dec(&mad_agent_priv->refcount);
2423 		if (free_mad)
2424 			kmem_cache_free(ib_mad_cache, local->mad_priv);
2425 		kfree(local);
2426 	}
2427 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2428 }
2429 
2430 static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2431 {
2432 	int ret;
2433 
2434 	if (!mad_send_wr->retries_left)
2435 		return -ETIMEDOUT;
2436 
2437 	mad_send_wr->retries_left--;
2438 	mad_send_wr->send_buf.retries++;
2439 
2440 	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_buf.timeout_ms);
2441 
2442 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
2443 		ret = ib_retry_rmpp(mad_send_wr);
2444 		switch (ret) {
2445 		case IB_RMPP_RESULT_UNHANDLED:
2446 			ret = ib_send_mad(mad_send_wr);
2447 			break;
2448 		case IB_RMPP_RESULT_CONSUMED:
2449 			ret = 0;
2450 			break;
2451 		default:
2452 			ret = -ECOMM;
2453 			break;
2454 		}
2455 	} else
2456 		ret = ib_send_mad(mad_send_wr);
2457 
2458 	if (!ret) {
2459 		mad_send_wr->refcount++;
2460 		list_add_tail(&mad_send_wr->agent_list,
2461 			      &mad_send_wr->mad_agent_priv->send_list);
2462 	}
2463 	return ret;
2464 }
2465 
2466 static void timeout_sends(struct work_struct *work)
2467 {
2468 	struct ib_mad_agent_private *mad_agent_priv;
2469 	struct ib_mad_send_wr_private *mad_send_wr;
2470 	struct ib_mad_send_wc mad_send_wc;
2471 	unsigned long flags, delay;
2472 
2473 	mad_agent_priv = container_of(work, struct ib_mad_agent_private,
2474 				      timed_work.work);
2475 	mad_send_wc.vendor_err = 0;
2476 
2477 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2478 	while (!list_empty(&mad_agent_priv->wait_list)) {
2479 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2480 					 struct ib_mad_send_wr_private,
2481 					 agent_list);
2482 
2483 		if (time_after(mad_send_wr->timeout, jiffies)) {
2484 			delay = mad_send_wr->timeout - jiffies;
2485 			if ((long)delay <= 0)
2486 				delay = 1;
2487 			queue_delayed_work(mad_agent_priv->qp_info->
2488 					   port_priv->wq,
2489 					   &mad_agent_priv->timed_work, delay);
2490 			break;
2491 		}
2492 
2493 		list_del(&mad_send_wr->agent_list);
2494 		if (mad_send_wr->status == IB_WC_SUCCESS &&
2495 		    !retry_send(mad_send_wr))
2496 			continue;
2497 
2498 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2499 
2500 		if (mad_send_wr->status == IB_WC_SUCCESS)
2501 			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2502 		else
2503 			mad_send_wc.status = mad_send_wr->status;
2504 		mad_send_wc.send_buf = &mad_send_wr->send_buf;
2505 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2506 						   &mad_send_wc);
2507 
2508 		atomic_dec(&mad_agent_priv->refcount);
2509 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2510 	}
2511 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2512 }
2513 
2514 static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
2515 {
2516 	struct ib_mad_port_private *port_priv = cq->cq_context;
2517 	unsigned long flags;
2518 
2519 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2520 	if (!list_empty(&port_priv->port_list))
2521 		queue_work(port_priv->wq, &port_priv->work);
2522 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2523 }
2524 
2525 /*
2526  * Allocate receive MADs and post receive WRs for them
2527  */
2528 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2529 				    struct ib_mad_private *mad)
2530 {
2531 	unsigned long flags;
2532 	int post, ret;
2533 	struct ib_mad_private *mad_priv;
2534 	struct ib_sge sg_list;
2535 	struct ib_recv_wr recv_wr, *bad_recv_wr;
2536 	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2537 
2538 	/* Initialize common scatter list fields */
2539 	sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
2540 	sg_list.lkey = (*qp_info->port_priv->mr).lkey;
2541 
2542 	/* Initialize common receive WR fields */
2543 	recv_wr.next = NULL;
2544 	recv_wr.sg_list = &sg_list;
2545 	recv_wr.num_sge = 1;
2546 
2547 	do {
2548 		/* Allocate and map receive buffer */
2549 		if (mad) {
2550 			mad_priv = mad;
2551 			mad = NULL;
2552 		} else {
2553 			mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
2554 			if (!mad_priv) {
2555 				printk(KERN_ERR PFX "No memory for receive buffer\n");
2556 				ret = -ENOMEM;
2557 				break;
2558 			}
2559 		}
2560 		sg_list.addr = ib_dma_map_single(qp_info->port_priv->device,
2561 						 &mad_priv->grh,
2562 						 sizeof *mad_priv -
2563 						   sizeof mad_priv->header,
2564 						 DMA_FROM_DEVICE);
2565 		mad_priv->header.mapping = sg_list.addr;
2566 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2567 		mad_priv->header.mad_list.mad_queue = recv_queue;
2568 
2569 		/* Post receive WR */
2570 		spin_lock_irqsave(&recv_queue->lock, flags);
2571 		post = (++recv_queue->count < recv_queue->max_active);
2572 		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2573 		spin_unlock_irqrestore(&recv_queue->lock, flags);
2574 		ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
2575 		if (ret) {
2576 			spin_lock_irqsave(&recv_queue->lock, flags);
2577 			list_del(&mad_priv->header.mad_list.list);
2578 			recv_queue->count--;
2579 			spin_unlock_irqrestore(&recv_queue->lock, flags);
2580 			ib_dma_unmap_single(qp_info->port_priv->device,
2581 					    mad_priv->header.mapping,
2582 					    sizeof *mad_priv -
2583 					      sizeof mad_priv->header,
2584 					    DMA_FROM_DEVICE);
2585 			kmem_cache_free(ib_mad_cache, mad_priv);
2586 			printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
2587 			break;
2588 		}
2589 	} while (post);
2590 
2591 	return ret;
2592 }
2593 
2594 /*
2595  * Return all the posted receive MADs
2596  */
2597 static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2598 {
2599 	struct ib_mad_private_header *mad_priv_hdr;
2600 	struct ib_mad_private *recv;
2601 	struct ib_mad_list_head *mad_list;
2602 
2603 	while (!list_empty(&qp_info->recv_queue.list)) {
2604 
2605 		mad_list = list_entry(qp_info->recv_queue.list.next,
2606 				      struct ib_mad_list_head, list);
2607 		mad_priv_hdr = container_of(mad_list,
2608 					    struct ib_mad_private_header,
2609 					    mad_list);
2610 		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2611 				    header);
2612 
2613 		/* Remove from posted receive MAD list */
2614 		list_del(&mad_list->list);
2615 
2616 		ib_dma_unmap_single(qp_info->port_priv->device,
2617 				    recv->header.mapping,
2618 				    sizeof(struct ib_mad_private) -
2619 				      sizeof(struct ib_mad_private_header),
2620 				    DMA_FROM_DEVICE);
2621 		kmem_cache_free(ib_mad_cache, recv);
2622 	}
2623 
2624 	qp_info->recv_queue.count = 0;
2625 }
2626 
2627 /*
2628  * Start the port
2629  */
2630 static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2631 {
2632 	int ret, i;
2633 	struct ib_qp_attr *attr;
2634 	struct ib_qp *qp;
2635 
2636 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2637 	if (!attr) {
2638 		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
2639 		return -ENOMEM;
2640 	}
2641 
2642 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2643 		qp = port_priv->qp_info[i].qp;
2644 		/*
2645 		 * PKey index for QP1 is irrelevant but
2646 		 * one is needed for the Reset to Init transition
2647 		 */
2648 		attr->qp_state = IB_QPS_INIT;
2649 		attr->pkey_index = 0;
2650 		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2651 		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2652 					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2653 		if (ret) {
2654 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2655 			       "INIT: %d\n", i, ret);
2656 			goto out;
2657 		}
2658 
2659 		attr->qp_state = IB_QPS_RTR;
2660 		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2661 		if (ret) {
2662 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2663 			       "RTR: %d\n", i, ret);
2664 			goto out;
2665 		}
2666 
2667 		attr->qp_state = IB_QPS_RTS;
2668 		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2669 		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2670 		if (ret) {
2671 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2672 			       "RTS: %d\n", i, ret);
2673 			goto out;
2674 		}
2675 	}
2676 
2677 	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2678 	if (ret) {
2679 		printk(KERN_ERR PFX "Failed to request completion "
2680 		       "notification: %d\n", ret);
2681 		goto out;
2682 	}
2683 
2684 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2685 		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2686 		if (ret) {
2687 			printk(KERN_ERR PFX "Couldn't post receive WRs\n");
2688 			goto out;
2689 		}
2690 	}
2691 out:
2692 	kfree(attr);
2693 	return ret;
2694 }
2695 
2696 static void qp_event_handler(struct ib_event *event, void *qp_context)
2697 {
2698 	struct ib_mad_qp_info	*qp_info = qp_context;
2699 
2700 	/* It's worse than that! He's dead, Jim! */
2701 	printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
2702 		event->event, qp_info->qp->qp_num);
2703 }
2704 
2705 static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2706 			   struct ib_mad_queue *mad_queue)
2707 {
2708 	mad_queue->qp_info = qp_info;
2709 	mad_queue->count = 0;
2710 	spin_lock_init(&mad_queue->lock);
2711 	INIT_LIST_HEAD(&mad_queue->list);
2712 }
2713 
2714 static void init_mad_qp(struct ib_mad_port_private *port_priv,
2715 			struct ib_mad_qp_info *qp_info)
2716 {
2717 	qp_info->port_priv = port_priv;
2718 	init_mad_queue(qp_info, &qp_info->send_queue);
2719 	init_mad_queue(qp_info, &qp_info->recv_queue);
2720 	INIT_LIST_HEAD(&qp_info->overflow_list);
2721 	spin_lock_init(&qp_info->snoop_lock);
2722 	qp_info->snoop_table = NULL;
2723 	qp_info->snoop_table_size = 0;
2724 	atomic_set(&qp_info->snoop_count, 0);
2725 }
2726 
2727 static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2728 			 enum ib_qp_type qp_type)
2729 {
2730 	struct ib_qp_init_attr	qp_init_attr;
2731 	int ret;
2732 
2733 	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2734 	qp_init_attr.send_cq = qp_info->port_priv->cq;
2735 	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2736 	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2737 	qp_init_attr.cap.max_send_wr = mad_sendq_size;
2738 	qp_init_attr.cap.max_recv_wr = mad_recvq_size;
2739 	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2740 	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2741 	qp_init_attr.qp_type = qp_type;
2742 	qp_init_attr.port_num = qp_info->port_priv->port_num;
2743 	qp_init_attr.qp_context = qp_info;
2744 	qp_init_attr.event_handler = qp_event_handler;
2745 	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2746 	if (IS_ERR(qp_info->qp)) {
2747 		printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
2748 		       get_spl_qp_index(qp_type));
2749 		ret = PTR_ERR(qp_info->qp);
2750 		goto error;
2751 	}
2752 	/* Use minimum queue sizes unless the CQ is resized */
2753 	qp_info->send_queue.max_active = mad_sendq_size;
2754 	qp_info->recv_queue.max_active = mad_recvq_size;
2755 	return 0;
2756 
2757 error:
2758 	return ret;
2759 }
2760 
2761 static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2762 {
2763 	ib_destroy_qp(qp_info->qp);
2764 	kfree(qp_info->snoop_table);
2765 }
2766 
2767 /*
2768  * Open the port
2769  * Create the QP, PD, MR, and CQ if needed
2770  */
2771 static int ib_mad_port_open(struct ib_device *device,
2772 			    int port_num)
2773 {
2774 	int ret, cq_size;
2775 	struct ib_mad_port_private *port_priv;
2776 	unsigned long flags;
2777 	char name[sizeof "ib_mad123"];
2778 
2779 	/* Create new device info */
2780 	port_priv = kzalloc(sizeof *port_priv, GFP_KERNEL);
2781 	if (!port_priv) {
2782 		printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
2783 		return -ENOMEM;
2784 	}
2785 
2786 	port_priv->device = device;
2787 	port_priv->port_num = port_num;
2788 	spin_lock_init(&port_priv->reg_lock);
2789 	INIT_LIST_HEAD(&port_priv->agent_list);
2790 	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2791 	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2792 
2793 	cq_size = (mad_sendq_size + mad_recvq_size) * 2;
2794 	port_priv->cq = ib_create_cq(port_priv->device,
2795 				     ib_mad_thread_completion_handler,
2796 				     NULL, port_priv, cq_size, 0);
2797 	if (IS_ERR(port_priv->cq)) {
2798 		printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
2799 		ret = PTR_ERR(port_priv->cq);
2800 		goto error3;
2801 	}
2802 
2803 	port_priv->pd = ib_alloc_pd(device);
2804 	if (IS_ERR(port_priv->pd)) {
2805 		printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
2806 		ret = PTR_ERR(port_priv->pd);
2807 		goto error4;
2808 	}
2809 
2810 	port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
2811 	if (IS_ERR(port_priv->mr)) {
2812 		printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
2813 		ret = PTR_ERR(port_priv->mr);
2814 		goto error5;
2815 	}
2816 
2817 	ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2818 	if (ret)
2819 		goto error6;
2820 	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
2821 	if (ret)
2822 		goto error7;
2823 
2824 	snprintf(name, sizeof name, "ib_mad%d", port_num);
2825 	port_priv->wq = create_singlethread_workqueue(name);
2826 	if (!port_priv->wq) {
2827 		ret = -ENOMEM;
2828 		goto error8;
2829 	}
2830 	INIT_WORK(&port_priv->work, ib_mad_completion_handler);
2831 
2832 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2833 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2834 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2835 
2836 	ret = ib_mad_port_start(port_priv);
2837 	if (ret) {
2838 		printk(KERN_ERR PFX "Couldn't start port\n");
2839 		goto error9;
2840 	}
2841 
2842 	return 0;
2843 
2844 error9:
2845 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2846 	list_del_init(&port_priv->port_list);
2847 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2848 
2849 	destroy_workqueue(port_priv->wq);
2850 error8:
2851 	destroy_mad_qp(&port_priv->qp_info[1]);
2852 error7:
2853 	destroy_mad_qp(&port_priv->qp_info[0]);
2854 error6:
2855 	ib_dereg_mr(port_priv->mr);
2856 error5:
2857 	ib_dealloc_pd(port_priv->pd);
2858 error4:
2859 	ib_destroy_cq(port_priv->cq);
2860 	cleanup_recv_queue(&port_priv->qp_info[1]);
2861 	cleanup_recv_queue(&port_priv->qp_info[0]);
2862 error3:
2863 	kfree(port_priv);
2864 
2865 	return ret;
2866 }
2867 
2868 /*
2869  * Close the port
2870  * If there are no classes using the port, free the port
2871  * resources (CQ, MR, PD, QP) and remove the port's info structure
2872  */
2873 static int ib_mad_port_close(struct ib_device *device, int port_num)
2874 {
2875 	struct ib_mad_port_private *port_priv;
2876 	unsigned long flags;
2877 
2878 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2879 	port_priv = __ib_get_mad_port(device, port_num);
2880 	if (port_priv == NULL) {
2881 		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2882 		printk(KERN_ERR PFX "Port %d not found\n", port_num);
2883 		return -ENODEV;
2884 	}
2885 	list_del_init(&port_priv->port_list);
2886 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2887 
2888 	destroy_workqueue(port_priv->wq);
2889 	destroy_mad_qp(&port_priv->qp_info[1]);
2890 	destroy_mad_qp(&port_priv->qp_info[0]);
2891 	ib_dereg_mr(port_priv->mr);
2892 	ib_dealloc_pd(port_priv->pd);
2893 	ib_destroy_cq(port_priv->cq);
2894 	cleanup_recv_queue(&port_priv->qp_info[1]);
2895 	cleanup_recv_queue(&port_priv->qp_info[0]);
2896 	/* XXX: Handle deallocation of MAD registration tables */
2897 
2898 	kfree(port_priv);
2899 
2900 	return 0;
2901 }
2902 
2903 static void ib_mad_init_device(struct ib_device *device)
2904 {
2905 	int start, end, i;
2906 
2907 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
2908 		return;
2909 
2910 	if (device->node_type == RDMA_NODE_IB_SWITCH) {
2911 		start = 0;
2912 		end   = 0;
2913 	} else {
2914 		start = 1;
2915 		end   = device->phys_port_cnt;
2916 	}
2917 
2918 	for (i = start; i <= end; i++) {
2919 		if (ib_mad_port_open(device, i)) {
2920 			printk(KERN_ERR PFX "Couldn't open %s port %d\n",
2921 			       device->name, i);
2922 			goto error;
2923 		}
2924 		if (ib_agent_port_open(device, i)) {
2925 			printk(KERN_ERR PFX "Couldn't open %s port %d "
2926 			       "for agents\n",
2927 			       device->name, i);
2928 			goto error_agent;
2929 		}
2930 	}
2931 	return;
2932 
2933 error_agent:
2934 	if (ib_mad_port_close(device, i))
2935 		printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2936 		       device->name, i);
2937 
2938 error:
2939 	i--;
2940 
2941 	while (i >= start) {
2942 		if (ib_agent_port_close(device, i))
2943 			printk(KERN_ERR PFX "Couldn't close %s port %d "
2944 			       "for agents\n",
2945 			       device->name, i);
2946 		if (ib_mad_port_close(device, i))
2947 			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2948 			       device->name, i);
2949 		i--;
2950 	}
2951 }
2952 
2953 static void ib_mad_remove_device(struct ib_device *device)
2954 {
2955 	int i, num_ports, cur_port;
2956 
2957 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
2958 		return;
2959 
2960 	if (device->node_type == RDMA_NODE_IB_SWITCH) {
2961 		num_ports = 1;
2962 		cur_port = 0;
2963 	} else {
2964 		num_ports = device->phys_port_cnt;
2965 		cur_port = 1;
2966 	}
2967 	for (i = 0; i < num_ports; i++, cur_port++) {
2968 		if (ib_agent_port_close(device, cur_port))
2969 			printk(KERN_ERR PFX "Couldn't close %s port %d "
2970 			       "for agents\n",
2971 			       device->name, cur_port);
2972 		if (ib_mad_port_close(device, cur_port))
2973 			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2974 			       device->name, cur_port);
2975 	}
2976 }
2977 
2978 static struct ib_client mad_client = {
2979 	.name   = "mad",
2980 	.add = ib_mad_init_device,
2981 	.remove = ib_mad_remove_device
2982 };
2983 
2984 static int __init ib_mad_init_module(void)
2985 {
2986 	int ret;
2987 
2988 	mad_recvq_size = min(mad_recvq_size, IB_MAD_QP_MAX_SIZE);
2989 	mad_recvq_size = max(mad_recvq_size, IB_MAD_QP_MIN_SIZE);
2990 
2991 	mad_sendq_size = min(mad_sendq_size, IB_MAD_QP_MAX_SIZE);
2992 	mad_sendq_size = max(mad_sendq_size, IB_MAD_QP_MIN_SIZE);
2993 
2994 	ib_mad_cache = kmem_cache_create("ib_mad",
2995 					 sizeof(struct ib_mad_private),
2996 					 0,
2997 					 SLAB_HWCACHE_ALIGN,
2998 					 NULL);
2999 	if (!ib_mad_cache) {
3000 		printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
3001 		ret = -ENOMEM;
3002 		goto error1;
3003 	}
3004 
3005 	INIT_LIST_HEAD(&ib_mad_port_list);
3006 
3007 	if (ib_register_client(&mad_client)) {
3008 		printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
3009 		ret = -EINVAL;
3010 		goto error2;
3011 	}
3012 
3013 	return 0;
3014 
3015 error2:
3016 	kmem_cache_destroy(ib_mad_cache);
3017 error1:
3018 	return ret;
3019 }
3020 
3021 static void __exit ib_mad_cleanup_module(void)
3022 {
3023 	ib_unregister_client(&mad_client);
3024 	kmem_cache_destroy(ib_mad_cache);
3025 }
3026 
3027 module_init(ib_mad_init_module);
3028 module_exit(ib_mad_cleanup_module);
3029