xref: /linux/drivers/infiniband/core/mad.c (revision 54a8a2220c936a47840c9a3d74910c5a56fae2ed)
1 /*
2  * Copyright (c) 2004, 2005 Voltaire, Inc. All rights reserved.
3  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
4  * Copyright (c) 2005 Mellanox Technologies Ltd.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id: mad.c 2817 2005-07-07 11:29:26Z halr $
35  */
36 #include <linux/dma-mapping.h>
37 
38 #include "mad_priv.h"
39 #include "mad_rmpp.h"
40 #include "smi.h"
41 #include "agent.h"
42 
43 MODULE_LICENSE("Dual BSD/GPL");
44 MODULE_DESCRIPTION("kernel IB MAD API");
45 MODULE_AUTHOR("Hal Rosenstock");
46 MODULE_AUTHOR("Sean Hefty");
47 
48 
49 kmem_cache_t *ib_mad_cache;
50 
51 static struct list_head ib_mad_port_list;
52 static u32 ib_mad_client_id = 0;
53 
54 /* Port list lock */
55 static spinlock_t ib_mad_port_list_lock;
56 
57 
58 /* Forward declarations */
59 static int method_in_use(struct ib_mad_mgmt_method_table **method,
60 			 struct ib_mad_reg_req *mad_reg_req);
61 static void remove_mad_reg_req(struct ib_mad_agent_private *priv);
62 static struct ib_mad_agent_private *find_mad_agent(
63 					struct ib_mad_port_private *port_priv,
64 					struct ib_mad *mad);
65 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
66 				    struct ib_mad_private *mad);
67 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv);
68 static void timeout_sends(void *data);
69 static void local_completions(void *data);
70 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
71 			      struct ib_mad_agent_private *agent_priv,
72 			      u8 mgmt_class);
73 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
74 			   struct ib_mad_agent_private *agent_priv);
75 
76 /*
77  * Returns a ib_mad_port_private structure or NULL for a device/port
78  * Assumes ib_mad_port_list_lock is being held
79  */
80 static inline struct ib_mad_port_private *
81 __ib_get_mad_port(struct ib_device *device, int port_num)
82 {
83 	struct ib_mad_port_private *entry;
84 
85 	list_for_each_entry(entry, &ib_mad_port_list, port_list) {
86 		if (entry->device == device && entry->port_num == port_num)
87 			return entry;
88 	}
89 	return NULL;
90 }
91 
92 /*
93  * Wrapper function to return a ib_mad_port_private structure or NULL
94  * for a device/port
95  */
96 static inline struct ib_mad_port_private *
97 ib_get_mad_port(struct ib_device *device, int port_num)
98 {
99 	struct ib_mad_port_private *entry;
100 	unsigned long flags;
101 
102 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
103 	entry = __ib_get_mad_port(device, port_num);
104 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
105 
106 	return entry;
107 }
108 
109 static inline u8 convert_mgmt_class(u8 mgmt_class)
110 {
111 	/* Alias IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE to 0 */
112 	return mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE ?
113 		0 : mgmt_class;
114 }
115 
116 static int get_spl_qp_index(enum ib_qp_type qp_type)
117 {
118 	switch (qp_type)
119 	{
120 	case IB_QPT_SMI:
121 		return 0;
122 	case IB_QPT_GSI:
123 		return 1;
124 	default:
125 		return -1;
126 	}
127 }
128 
129 static int vendor_class_index(u8 mgmt_class)
130 {
131 	return mgmt_class - IB_MGMT_CLASS_VENDOR_RANGE2_START;
132 }
133 
134 static int is_vendor_class(u8 mgmt_class)
135 {
136 	if ((mgmt_class < IB_MGMT_CLASS_VENDOR_RANGE2_START) ||
137 	    (mgmt_class > IB_MGMT_CLASS_VENDOR_RANGE2_END))
138 		return 0;
139 	return 1;
140 }
141 
142 static int is_vendor_oui(char *oui)
143 {
144 	if (oui[0] || oui[1] || oui[2])
145 		return 1;
146 	return 0;
147 }
148 
149 static int is_vendor_method_in_use(
150 		struct ib_mad_mgmt_vendor_class *vendor_class,
151 		struct ib_mad_reg_req *mad_reg_req)
152 {
153 	struct ib_mad_mgmt_method_table *method;
154 	int i;
155 
156 	for (i = 0; i < MAX_MGMT_OUI; i++) {
157 		if (!memcmp(vendor_class->oui[i], mad_reg_req->oui, 3)) {
158 			method = vendor_class->method_table[i];
159 			if (method) {
160 				if (method_in_use(&method, mad_reg_req))
161 					return 1;
162 				else
163 					break;
164 			}
165 		}
166 	}
167 	return 0;
168 }
169 
170 /*
171  * ib_register_mad_agent - Register to send/receive MADs
172  */
173 struct ib_mad_agent *ib_register_mad_agent(struct ib_device *device,
174 					   u8 port_num,
175 					   enum ib_qp_type qp_type,
176 					   struct ib_mad_reg_req *mad_reg_req,
177 					   u8 rmpp_version,
178 					   ib_mad_send_handler send_handler,
179 					   ib_mad_recv_handler recv_handler,
180 					   void *context)
181 {
182 	struct ib_mad_port_private *port_priv;
183 	struct ib_mad_agent *ret = ERR_PTR(-EINVAL);
184 	struct ib_mad_agent_private *mad_agent_priv;
185 	struct ib_mad_reg_req *reg_req = NULL;
186 	struct ib_mad_mgmt_class_table *class;
187 	struct ib_mad_mgmt_vendor_class_table *vendor;
188 	struct ib_mad_mgmt_vendor_class *vendor_class;
189 	struct ib_mad_mgmt_method_table *method;
190 	int ret2, qpn;
191 	unsigned long flags;
192 	u8 mgmt_class, vclass;
193 
194 	/* Validate parameters */
195 	qpn = get_spl_qp_index(qp_type);
196 	if (qpn == -1)
197 		goto error1;
198 
199 	if (rmpp_version && rmpp_version != IB_MGMT_RMPP_VERSION)
200 		goto error1;
201 
202 	/* Validate MAD registration request if supplied */
203 	if (mad_reg_req) {
204 		if (mad_reg_req->mgmt_class_version >= MAX_MGMT_VERSION)
205 			goto error1;
206 		if (!recv_handler)
207 			goto error1;
208 		if (mad_reg_req->mgmt_class >= MAX_MGMT_CLASS) {
209 			/*
210 			 * IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE is the only
211 			 * one in this range currently allowed
212 			 */
213 			if (mad_reg_req->mgmt_class !=
214 			    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
215 				goto error1;
216 		} else if (mad_reg_req->mgmt_class == 0) {
217 			/*
218 			 * Class 0 is reserved in IBA and is used for
219 			 * aliasing of IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE
220 			 */
221 			goto error1;
222 		} else if (is_vendor_class(mad_reg_req->mgmt_class)) {
223 			/*
224 			 * If class is in "new" vendor range,
225 			 * ensure supplied OUI is not zero
226 			 */
227 			if (!is_vendor_oui(mad_reg_req->oui))
228 				goto error1;
229 		}
230 		/* Make sure class supplied is consistent with QP type */
231 		if (qp_type == IB_QPT_SMI) {
232 			if ((mad_reg_req->mgmt_class !=
233 					IB_MGMT_CLASS_SUBN_LID_ROUTED) &&
234 			    (mad_reg_req->mgmt_class !=
235 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
236 				goto error1;
237 		} else {
238 			if ((mad_reg_req->mgmt_class ==
239 					IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
240 			    (mad_reg_req->mgmt_class ==
241 					IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE))
242 				goto error1;
243 		}
244 	} else {
245 		/* No registration request supplied */
246 		if (!send_handler)
247 			goto error1;
248 	}
249 
250 	/* Validate device and port */
251 	port_priv = ib_get_mad_port(device, port_num);
252 	if (!port_priv) {
253 		ret = ERR_PTR(-ENODEV);
254 		goto error1;
255 	}
256 
257 	/* Allocate structures */
258 	mad_agent_priv = kmalloc(sizeof *mad_agent_priv, GFP_KERNEL);
259 	if (!mad_agent_priv) {
260 		ret = ERR_PTR(-ENOMEM);
261 		goto error1;
262 	}
263 	memset(mad_agent_priv, 0, sizeof *mad_agent_priv);
264 
265 	mad_agent_priv->agent.mr = ib_get_dma_mr(port_priv->qp_info[qpn].qp->pd,
266 						 IB_ACCESS_LOCAL_WRITE);
267 	if (IS_ERR(mad_agent_priv->agent.mr)) {
268 		ret = ERR_PTR(-ENOMEM);
269 		goto error2;
270 	}
271 
272 	if (mad_reg_req) {
273 		reg_req = kmalloc(sizeof *reg_req, GFP_KERNEL);
274 		if (!reg_req) {
275 			ret = ERR_PTR(-ENOMEM);
276 			goto error3;
277 		}
278 		/* Make a copy of the MAD registration request */
279 		memcpy(reg_req, mad_reg_req, sizeof *reg_req);
280 	}
281 
282 	/* Now, fill in the various structures */
283 	mad_agent_priv->qp_info = &port_priv->qp_info[qpn];
284 	mad_agent_priv->reg_req = reg_req;
285 	mad_agent_priv->agent.rmpp_version = rmpp_version;
286 	mad_agent_priv->agent.device = device;
287 	mad_agent_priv->agent.recv_handler = recv_handler;
288 	mad_agent_priv->agent.send_handler = send_handler;
289 	mad_agent_priv->agent.context = context;
290 	mad_agent_priv->agent.qp = port_priv->qp_info[qpn].qp;
291 	mad_agent_priv->agent.port_num = port_num;
292 
293 	spin_lock_irqsave(&port_priv->reg_lock, flags);
294 	mad_agent_priv->agent.hi_tid = ++ib_mad_client_id;
295 
296 	/*
297 	 * Make sure MAD registration (if supplied)
298 	 * is non overlapping with any existing ones
299 	 */
300 	if (mad_reg_req) {
301 		mgmt_class = convert_mgmt_class(mad_reg_req->mgmt_class);
302 		if (!is_vendor_class(mgmt_class)) {
303 			class = port_priv->version[mad_reg_req->
304 						   mgmt_class_version].class;
305 			if (class) {
306 				method = class->method_table[mgmt_class];
307 				if (method) {
308 					if (method_in_use(&method,
309 							   mad_reg_req))
310 						goto error4;
311 				}
312 			}
313 			ret2 = add_nonoui_reg_req(mad_reg_req, mad_agent_priv,
314 						  mgmt_class);
315 		} else {
316 			/* "New" vendor class range */
317 			vendor = port_priv->version[mad_reg_req->
318 						    mgmt_class_version].vendor;
319 			if (vendor) {
320 				vclass = vendor_class_index(mgmt_class);
321 				vendor_class = vendor->vendor_class[vclass];
322 				if (vendor_class) {
323 					if (is_vendor_method_in_use(
324 							vendor_class,
325 							mad_reg_req))
326 						goto error4;
327 				}
328 			}
329 			ret2 = add_oui_reg_req(mad_reg_req, mad_agent_priv);
330 		}
331 		if (ret2) {
332 			ret = ERR_PTR(ret2);
333 			goto error4;
334 		}
335 	}
336 
337 	/* Add mad agent into port's agent list */
338 	list_add_tail(&mad_agent_priv->agent_list, &port_priv->agent_list);
339 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
340 
341 	spin_lock_init(&mad_agent_priv->lock);
342 	INIT_LIST_HEAD(&mad_agent_priv->send_list);
343 	INIT_LIST_HEAD(&mad_agent_priv->wait_list);
344 	INIT_LIST_HEAD(&mad_agent_priv->done_list);
345 	INIT_LIST_HEAD(&mad_agent_priv->rmpp_list);
346 	INIT_WORK(&mad_agent_priv->timed_work, timeout_sends, mad_agent_priv);
347 	INIT_LIST_HEAD(&mad_agent_priv->local_list);
348 	INIT_WORK(&mad_agent_priv->local_work, local_completions,
349 		   mad_agent_priv);
350 	atomic_set(&mad_agent_priv->refcount, 1);
351 	init_waitqueue_head(&mad_agent_priv->wait);
352 
353 	return &mad_agent_priv->agent;
354 
355 error4:
356 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
357 	kfree(reg_req);
358 error3:
359 	kfree(mad_agent_priv);
360 error2:
361 	ib_dereg_mr(mad_agent_priv->agent.mr);
362 error1:
363 	return ret;
364 }
365 EXPORT_SYMBOL(ib_register_mad_agent);
366 
367 static inline int is_snooping_sends(int mad_snoop_flags)
368 {
369 	return (mad_snoop_flags &
370 		(/*IB_MAD_SNOOP_POSTED_SENDS |
371 		 IB_MAD_SNOOP_RMPP_SENDS |*/
372 		 IB_MAD_SNOOP_SEND_COMPLETIONS /*|
373 		 IB_MAD_SNOOP_RMPP_SEND_COMPLETIONS*/));
374 }
375 
376 static inline int is_snooping_recvs(int mad_snoop_flags)
377 {
378 	return (mad_snoop_flags &
379 		(IB_MAD_SNOOP_RECVS /*|
380 		 IB_MAD_SNOOP_RMPP_RECVS*/));
381 }
382 
383 static int register_snoop_agent(struct ib_mad_qp_info *qp_info,
384 				struct ib_mad_snoop_private *mad_snoop_priv)
385 {
386 	struct ib_mad_snoop_private **new_snoop_table;
387 	unsigned long flags;
388 	int i;
389 
390 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
391 	/* Check for empty slot in array. */
392 	for (i = 0; i < qp_info->snoop_table_size; i++)
393 		if (!qp_info->snoop_table[i])
394 			break;
395 
396 	if (i == qp_info->snoop_table_size) {
397 		/* Grow table. */
398 		new_snoop_table = kmalloc(sizeof mad_snoop_priv *
399 					  qp_info->snoop_table_size + 1,
400 					  GFP_ATOMIC);
401 		if (!new_snoop_table) {
402 			i = -ENOMEM;
403 			goto out;
404 		}
405 		if (qp_info->snoop_table) {
406 			memcpy(new_snoop_table, qp_info->snoop_table,
407 			       sizeof mad_snoop_priv *
408 			       qp_info->snoop_table_size);
409 			kfree(qp_info->snoop_table);
410 		}
411 		qp_info->snoop_table = new_snoop_table;
412 		qp_info->snoop_table_size++;
413 	}
414 	qp_info->snoop_table[i] = mad_snoop_priv;
415 	atomic_inc(&qp_info->snoop_count);
416 out:
417 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
418 	return i;
419 }
420 
421 struct ib_mad_agent *ib_register_mad_snoop(struct ib_device *device,
422 					   u8 port_num,
423 					   enum ib_qp_type qp_type,
424 					   int mad_snoop_flags,
425 					   ib_mad_snoop_handler snoop_handler,
426 					   ib_mad_recv_handler recv_handler,
427 					   void *context)
428 {
429 	struct ib_mad_port_private *port_priv;
430 	struct ib_mad_agent *ret;
431 	struct ib_mad_snoop_private *mad_snoop_priv;
432 	int qpn;
433 
434 	/* Validate parameters */
435 	if ((is_snooping_sends(mad_snoop_flags) && !snoop_handler) ||
436 	    (is_snooping_recvs(mad_snoop_flags) && !recv_handler)) {
437 		ret = ERR_PTR(-EINVAL);
438 		goto error1;
439 	}
440 	qpn = get_spl_qp_index(qp_type);
441 	if (qpn == -1) {
442 		ret = ERR_PTR(-EINVAL);
443 		goto error1;
444 	}
445 	port_priv = ib_get_mad_port(device, port_num);
446 	if (!port_priv) {
447 		ret = ERR_PTR(-ENODEV);
448 		goto error1;
449 	}
450 	/* Allocate structures */
451 	mad_snoop_priv = kmalloc(sizeof *mad_snoop_priv, GFP_KERNEL);
452 	if (!mad_snoop_priv) {
453 		ret = ERR_PTR(-ENOMEM);
454 		goto error1;
455 	}
456 
457 	/* Now, fill in the various structures */
458 	memset(mad_snoop_priv, 0, sizeof *mad_snoop_priv);
459 	mad_snoop_priv->qp_info = &port_priv->qp_info[qpn];
460 	mad_snoop_priv->agent.device = device;
461 	mad_snoop_priv->agent.recv_handler = recv_handler;
462 	mad_snoop_priv->agent.snoop_handler = snoop_handler;
463 	mad_snoop_priv->agent.context = context;
464 	mad_snoop_priv->agent.qp = port_priv->qp_info[qpn].qp;
465 	mad_snoop_priv->agent.port_num = port_num;
466 	mad_snoop_priv->mad_snoop_flags = mad_snoop_flags;
467 	init_waitqueue_head(&mad_snoop_priv->wait);
468 	mad_snoop_priv->snoop_index = register_snoop_agent(
469 						&port_priv->qp_info[qpn],
470 						mad_snoop_priv);
471 	if (mad_snoop_priv->snoop_index < 0) {
472 		ret = ERR_PTR(mad_snoop_priv->snoop_index);
473 		goto error2;
474 	}
475 
476 	atomic_set(&mad_snoop_priv->refcount, 1);
477 	return &mad_snoop_priv->agent;
478 
479 error2:
480 	kfree(mad_snoop_priv);
481 error1:
482 	return ret;
483 }
484 EXPORT_SYMBOL(ib_register_mad_snoop);
485 
486 static void unregister_mad_agent(struct ib_mad_agent_private *mad_agent_priv)
487 {
488 	struct ib_mad_port_private *port_priv;
489 	unsigned long flags;
490 
491 	/* Note that we could still be handling received MADs */
492 
493 	/*
494 	 * Canceling all sends results in dropping received response
495 	 * MADs, preventing us from queuing additional work
496 	 */
497 	cancel_mads(mad_agent_priv);
498 	port_priv = mad_agent_priv->qp_info->port_priv;
499 	cancel_delayed_work(&mad_agent_priv->timed_work);
500 
501 	spin_lock_irqsave(&port_priv->reg_lock, flags);
502 	remove_mad_reg_req(mad_agent_priv);
503 	list_del(&mad_agent_priv->agent_list);
504 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
505 
506 	flush_workqueue(port_priv->wq);
507 	ib_cancel_rmpp_recvs(mad_agent_priv);
508 
509 	atomic_dec(&mad_agent_priv->refcount);
510 	wait_event(mad_agent_priv->wait,
511 		   !atomic_read(&mad_agent_priv->refcount));
512 
513 	if (mad_agent_priv->reg_req)
514 		kfree(mad_agent_priv->reg_req);
515 	ib_dereg_mr(mad_agent_priv->agent.mr);
516 	kfree(mad_agent_priv);
517 }
518 
519 static void unregister_mad_snoop(struct ib_mad_snoop_private *mad_snoop_priv)
520 {
521 	struct ib_mad_qp_info *qp_info;
522 	unsigned long flags;
523 
524 	qp_info = mad_snoop_priv->qp_info;
525 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
526 	qp_info->snoop_table[mad_snoop_priv->snoop_index] = NULL;
527 	atomic_dec(&qp_info->snoop_count);
528 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
529 
530 	atomic_dec(&mad_snoop_priv->refcount);
531 	wait_event(mad_snoop_priv->wait,
532 		   !atomic_read(&mad_snoop_priv->refcount));
533 
534 	kfree(mad_snoop_priv);
535 }
536 
537 /*
538  * ib_unregister_mad_agent - Unregisters a client from using MAD services
539  */
540 int ib_unregister_mad_agent(struct ib_mad_agent *mad_agent)
541 {
542 	struct ib_mad_agent_private *mad_agent_priv;
543 	struct ib_mad_snoop_private *mad_snoop_priv;
544 
545 	/* If the TID is zero, the agent can only snoop. */
546 	if (mad_agent->hi_tid) {
547 		mad_agent_priv = container_of(mad_agent,
548 					      struct ib_mad_agent_private,
549 					      agent);
550 		unregister_mad_agent(mad_agent_priv);
551 	} else {
552 		mad_snoop_priv = container_of(mad_agent,
553 					      struct ib_mad_snoop_private,
554 					      agent);
555 		unregister_mad_snoop(mad_snoop_priv);
556 	}
557 	return 0;
558 }
559 EXPORT_SYMBOL(ib_unregister_mad_agent);
560 
561 static inline int response_mad(struct ib_mad *mad)
562 {
563 	/* Trap represses are responses although response bit is reset */
564 	return ((mad->mad_hdr.method == IB_MGMT_METHOD_TRAP_REPRESS) ||
565 		(mad->mad_hdr.method & IB_MGMT_METHOD_RESP));
566 }
567 
568 static void dequeue_mad(struct ib_mad_list_head *mad_list)
569 {
570 	struct ib_mad_queue *mad_queue;
571 	unsigned long flags;
572 
573 	BUG_ON(!mad_list->mad_queue);
574 	mad_queue = mad_list->mad_queue;
575 	spin_lock_irqsave(&mad_queue->lock, flags);
576 	list_del(&mad_list->list);
577 	mad_queue->count--;
578 	spin_unlock_irqrestore(&mad_queue->lock, flags);
579 }
580 
581 static void snoop_send(struct ib_mad_qp_info *qp_info,
582 		       struct ib_send_wr *send_wr,
583 		       struct ib_mad_send_wc *mad_send_wc,
584 		       int mad_snoop_flags)
585 {
586 	struct ib_mad_snoop_private *mad_snoop_priv;
587 	unsigned long flags;
588 	int i;
589 
590 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
591 	for (i = 0; i < qp_info->snoop_table_size; i++) {
592 		mad_snoop_priv = qp_info->snoop_table[i];
593 		if (!mad_snoop_priv ||
594 		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
595 			continue;
596 
597 		atomic_inc(&mad_snoop_priv->refcount);
598 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
599 		mad_snoop_priv->agent.snoop_handler(&mad_snoop_priv->agent,
600 						    send_wr, mad_send_wc);
601 		if (atomic_dec_and_test(&mad_snoop_priv->refcount))
602 			wake_up(&mad_snoop_priv->wait);
603 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
604 	}
605 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
606 }
607 
608 static void snoop_recv(struct ib_mad_qp_info *qp_info,
609 		       struct ib_mad_recv_wc *mad_recv_wc,
610 		       int mad_snoop_flags)
611 {
612 	struct ib_mad_snoop_private *mad_snoop_priv;
613 	unsigned long flags;
614 	int i;
615 
616 	spin_lock_irqsave(&qp_info->snoop_lock, flags);
617 	for (i = 0; i < qp_info->snoop_table_size; i++) {
618 		mad_snoop_priv = qp_info->snoop_table[i];
619 		if (!mad_snoop_priv ||
620 		    !(mad_snoop_priv->mad_snoop_flags & mad_snoop_flags))
621 			continue;
622 
623 		atomic_inc(&mad_snoop_priv->refcount);
624 		spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
625 		mad_snoop_priv->agent.recv_handler(&mad_snoop_priv->agent,
626 						   mad_recv_wc);
627 		if (atomic_dec_and_test(&mad_snoop_priv->refcount))
628 			wake_up(&mad_snoop_priv->wait);
629 		spin_lock_irqsave(&qp_info->snoop_lock, flags);
630 	}
631 	spin_unlock_irqrestore(&qp_info->snoop_lock, flags);
632 }
633 
634 static void build_smp_wc(u64 wr_id, u16 slid, u16 pkey_index, u8 port_num,
635 			 struct ib_wc *wc)
636 {
637 	memset(wc, 0, sizeof *wc);
638 	wc->wr_id = wr_id;
639 	wc->status = IB_WC_SUCCESS;
640 	wc->opcode = IB_WC_RECV;
641 	wc->pkey_index = pkey_index;
642 	wc->byte_len = sizeof(struct ib_mad) + sizeof(struct ib_grh);
643 	wc->src_qp = IB_QP0;
644 	wc->qp_num = IB_QP0;
645 	wc->slid = slid;
646 	wc->sl = 0;
647 	wc->dlid_path_bits = 0;
648 	wc->port_num = port_num;
649 }
650 
651 /*
652  * Return 0 if SMP is to be sent
653  * Return 1 if SMP was consumed locally (whether or not solicited)
654  * Return < 0 if error
655  */
656 static int handle_outgoing_dr_smp(struct ib_mad_agent_private *mad_agent_priv,
657 				  struct ib_smp *smp,
658 				  struct ib_send_wr *send_wr)
659 {
660 	int ret;
661 	unsigned long flags;
662 	struct ib_mad_local_private *local;
663 	struct ib_mad_private *mad_priv;
664 	struct ib_mad_port_private *port_priv;
665 	struct ib_mad_agent_private *recv_mad_agent = NULL;
666 	struct ib_device *device = mad_agent_priv->agent.device;
667 	u8 port_num = mad_agent_priv->agent.port_num;
668 	struct ib_wc mad_wc;
669 
670 	if (!smi_handle_dr_smp_send(smp, device->node_type, port_num)) {
671 		ret = -EINVAL;
672 		printk(KERN_ERR PFX "Invalid directed route\n");
673 		goto out;
674 	}
675 	/* Check to post send on QP or process locally */
676 	ret = smi_check_local_dr_smp(smp, device, port_num);
677 	if (!ret || !device->process_mad)
678 		goto out;
679 
680 	local = kmalloc(sizeof *local, GFP_ATOMIC);
681 	if (!local) {
682 		ret = -ENOMEM;
683 		printk(KERN_ERR PFX "No memory for ib_mad_local_private\n");
684 		goto out;
685 	}
686 	local->mad_priv = NULL;
687 	local->recv_mad_agent = NULL;
688 	mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_ATOMIC);
689 	if (!mad_priv) {
690 		ret = -ENOMEM;
691 		printk(KERN_ERR PFX "No memory for local response MAD\n");
692 		kfree(local);
693 		goto out;
694 	}
695 
696 	build_smp_wc(send_wr->wr_id, be16_to_cpu(smp->dr_slid),
697 		     send_wr->wr.ud.pkey_index,
698 		     send_wr->wr.ud.port_num, &mad_wc);
699 
700 	/* No GRH for DR SMP */
701 	ret = device->process_mad(device, 0, port_num, &mad_wc, NULL,
702 				  (struct ib_mad *)smp,
703 				  (struct ib_mad *)&mad_priv->mad);
704 	switch (ret)
705 	{
706 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_REPLY:
707 		if (response_mad(&mad_priv->mad.mad) &&
708 		    mad_agent_priv->agent.recv_handler) {
709 			local->mad_priv = mad_priv;
710 			local->recv_mad_agent = mad_agent_priv;
711 			/*
712 			 * Reference MAD agent until receive
713 			 * side of local completion handled
714 			 */
715 			atomic_inc(&mad_agent_priv->refcount);
716 		} else
717 			kmem_cache_free(ib_mad_cache, mad_priv);
718 		break;
719 	case IB_MAD_RESULT_SUCCESS | IB_MAD_RESULT_CONSUMED:
720 		kmem_cache_free(ib_mad_cache, mad_priv);
721 		break;
722 	case IB_MAD_RESULT_SUCCESS:
723 		/* Treat like an incoming receive MAD */
724 		port_priv = ib_get_mad_port(mad_agent_priv->agent.device,
725 					    mad_agent_priv->agent.port_num);
726 		if (port_priv) {
727 			mad_priv->mad.mad.mad_hdr.tid =
728 				((struct ib_mad *)smp)->mad_hdr.tid;
729 			recv_mad_agent = find_mad_agent(port_priv,
730 						        &mad_priv->mad.mad);
731 		}
732 		if (!port_priv || !recv_mad_agent) {
733 			kmem_cache_free(ib_mad_cache, mad_priv);
734 			kfree(local);
735 			ret = 0;
736 			goto out;
737 		}
738 		local->mad_priv = mad_priv;
739 		local->recv_mad_agent = recv_mad_agent;
740 		break;
741 	default:
742 		kmem_cache_free(ib_mad_cache, mad_priv);
743 		kfree(local);
744 		ret = -EINVAL;
745 		goto out;
746 	}
747 
748 	local->send_wr = *send_wr;
749 	local->send_wr.sg_list = local->sg_list;
750 	memcpy(local->sg_list, send_wr->sg_list,
751 	       sizeof *send_wr->sg_list * send_wr->num_sge);
752 	local->send_wr.next = NULL;
753 	local->tid = send_wr->wr.ud.mad_hdr->tid;
754 	local->wr_id = send_wr->wr_id;
755 	/* Reference MAD agent until send side of local completion handled */
756 	atomic_inc(&mad_agent_priv->refcount);
757 	/* Queue local completion to local list */
758 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
759 	list_add_tail(&local->completion_list, &mad_agent_priv->local_list);
760 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
761 	queue_work(mad_agent_priv->qp_info->port_priv->wq,
762 		   &mad_agent_priv->local_work);
763 	ret = 1;
764 out:
765 	return ret;
766 }
767 
768 static int get_buf_length(int hdr_len, int data_len)
769 {
770 	int seg_size, pad;
771 
772 	seg_size = sizeof(struct ib_mad) - hdr_len;
773 	if (data_len && seg_size) {
774 		pad = seg_size - data_len % seg_size;
775 		if (pad == seg_size)
776 			pad = 0;
777 	} else
778 		pad = seg_size;
779 	return hdr_len + data_len + pad;
780 }
781 
782 struct ib_mad_send_buf * ib_create_send_mad(struct ib_mad_agent *mad_agent,
783 					    u32 remote_qpn, u16 pkey_index,
784 					    struct ib_ah *ah, int rmpp_active,
785 					    int hdr_len, int data_len,
786 					    gfp_t gfp_mask)
787 {
788 	struct ib_mad_agent_private *mad_agent_priv;
789 	struct ib_mad_send_buf *send_buf;
790 	int buf_size;
791 	void *buf;
792 
793 	mad_agent_priv = container_of(mad_agent,
794 				      struct ib_mad_agent_private, agent);
795 	buf_size = get_buf_length(hdr_len, data_len);
796 
797 	if ((!mad_agent->rmpp_version &&
798 	     (rmpp_active || buf_size > sizeof(struct ib_mad))) ||
799 	    (!rmpp_active && buf_size > sizeof(struct ib_mad)))
800 		return ERR_PTR(-EINVAL);
801 
802 	buf = kmalloc(sizeof *send_buf + buf_size, gfp_mask);
803 	if (!buf)
804 		return ERR_PTR(-ENOMEM);
805 	memset(buf, 0, sizeof *send_buf + buf_size);
806 
807 	send_buf = buf + buf_size;
808 	send_buf->mad = buf;
809 
810 	send_buf->sge.addr = dma_map_single(mad_agent->device->dma_device,
811 					    buf, buf_size, DMA_TO_DEVICE);
812 	pci_unmap_addr_set(send_buf, mapping, send_buf->sge.addr);
813 	send_buf->sge.length = buf_size;
814 	send_buf->sge.lkey = mad_agent->mr->lkey;
815 
816 	send_buf->send_wr.wr_id = (unsigned long) send_buf;
817 	send_buf->send_wr.sg_list = &send_buf->sge;
818 	send_buf->send_wr.num_sge = 1;
819 	send_buf->send_wr.opcode = IB_WR_SEND;
820 	send_buf->send_wr.send_flags = IB_SEND_SIGNALED;
821 	send_buf->send_wr.wr.ud.ah = ah;
822 	send_buf->send_wr.wr.ud.mad_hdr = &send_buf->mad->mad_hdr;
823 	send_buf->send_wr.wr.ud.remote_qpn = remote_qpn;
824 	send_buf->send_wr.wr.ud.remote_qkey = IB_QP_SET_QKEY;
825 	send_buf->send_wr.wr.ud.pkey_index = pkey_index;
826 
827 	if (rmpp_active) {
828 		struct ib_rmpp_mad *rmpp_mad;
829 		rmpp_mad = (struct ib_rmpp_mad *)send_buf->mad;
830 		rmpp_mad->rmpp_hdr.paylen_newwin = cpu_to_be32(hdr_len -
831 			offsetof(struct ib_rmpp_mad, data) + data_len);
832 		rmpp_mad->rmpp_hdr.rmpp_version = mad_agent->rmpp_version;
833 		rmpp_mad->rmpp_hdr.rmpp_type = IB_MGMT_RMPP_TYPE_DATA;
834 		ib_set_rmpp_flags(&rmpp_mad->rmpp_hdr,
835 				  IB_MGMT_RMPP_FLAG_ACTIVE);
836 	}
837 
838 	send_buf->mad_agent = mad_agent;
839 	atomic_inc(&mad_agent_priv->refcount);
840 	return send_buf;
841 }
842 EXPORT_SYMBOL(ib_create_send_mad);
843 
844 void ib_free_send_mad(struct ib_mad_send_buf *send_buf)
845 {
846 	struct ib_mad_agent_private *mad_agent_priv;
847 
848 	mad_agent_priv = container_of(send_buf->mad_agent,
849 				      struct ib_mad_agent_private, agent);
850 
851 	dma_unmap_single(send_buf->mad_agent->device->dma_device,
852 			 pci_unmap_addr(send_buf, mapping),
853 			 send_buf->sge.length, DMA_TO_DEVICE);
854 	kfree(send_buf->mad);
855 
856 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
857 		wake_up(&mad_agent_priv->wait);
858 }
859 EXPORT_SYMBOL(ib_free_send_mad);
860 
861 int ib_send_mad(struct ib_mad_send_wr_private *mad_send_wr)
862 {
863 	struct ib_mad_qp_info *qp_info;
864 	struct ib_send_wr *bad_send_wr;
865 	struct list_head *list;
866 	unsigned long flags;
867 	int ret;
868 
869 	/* Set WR ID to find mad_send_wr upon completion */
870 	qp_info = mad_send_wr->mad_agent_priv->qp_info;
871 	mad_send_wr->send_wr.wr_id = (unsigned long)&mad_send_wr->mad_list;
872 	mad_send_wr->mad_list.mad_queue = &qp_info->send_queue;
873 
874 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
875 	if (qp_info->send_queue.count < qp_info->send_queue.max_active) {
876 		ret = ib_post_send(mad_send_wr->mad_agent_priv->agent.qp,
877 				   &mad_send_wr->send_wr, &bad_send_wr);
878 		list = &qp_info->send_queue.list;
879 	} else {
880 		ret = 0;
881 		list = &qp_info->overflow_list;
882 	}
883 
884 	if (!ret) {
885 		qp_info->send_queue.count++;
886 		list_add_tail(&mad_send_wr->mad_list.list, list);
887 	}
888 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
889 	return ret;
890 }
891 
892 /*
893  * ib_post_send_mad - Posts MAD(s) to the send queue of the QP associated
894  *  with the registered client
895  */
896 int ib_post_send_mad(struct ib_mad_agent *mad_agent,
897 		     struct ib_send_wr *send_wr,
898 		     struct ib_send_wr **bad_send_wr)
899 {
900 	int ret = -EINVAL;
901 	struct ib_mad_agent_private *mad_agent_priv;
902 
903 	/* Validate supplied parameters */
904 	if (!bad_send_wr)
905 		goto error1;
906 
907 	if (!mad_agent || !send_wr)
908 		goto error2;
909 
910 	if (!mad_agent->send_handler)
911 		goto error2;
912 
913 	mad_agent_priv = container_of(mad_agent,
914 				      struct ib_mad_agent_private,
915 				      agent);
916 
917 	/* Walk list of send WRs and post each on send list */
918 	while (send_wr) {
919 		unsigned long			flags;
920 		struct ib_send_wr		*next_send_wr;
921 		struct ib_mad_send_wr_private	*mad_send_wr;
922 		struct ib_smp			*smp;
923 
924 		/* Validate more parameters */
925 		if (send_wr->num_sge > IB_MAD_SEND_REQ_MAX_SG)
926 			goto error2;
927 
928 		if (send_wr->wr.ud.timeout_ms && !mad_agent->recv_handler)
929 			goto error2;
930 
931 		if (!send_wr->wr.ud.mad_hdr) {
932 			printk(KERN_ERR PFX "MAD header must be supplied "
933 			       "in WR %p\n", send_wr);
934 			goto error2;
935 		}
936 
937 		/*
938 		 * Save pointer to next work request to post in case the
939 		 * current one completes, and the user modifies the work
940 		 * request associated with the completion
941 		 */
942 		next_send_wr = (struct ib_send_wr *)send_wr->next;
943 
944 		smp = (struct ib_smp *)send_wr->wr.ud.mad_hdr;
945 		if (smp->mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
946 			ret = handle_outgoing_dr_smp(mad_agent_priv, smp,
947 						     send_wr);
948 			if (ret < 0)		/* error */
949 				goto error2;
950 			else if (ret == 1)	/* locally consumed */
951 				goto next;
952 		}
953 
954 		/* Allocate MAD send WR tracking structure */
955 		mad_send_wr = kmalloc(sizeof *mad_send_wr, GFP_ATOMIC);
956 		if (!mad_send_wr) {
957 			printk(KERN_ERR PFX "No memory for "
958 			       "ib_mad_send_wr_private\n");
959 			ret = -ENOMEM;
960 			goto error2;
961 		}
962 		memset(mad_send_wr, 0, sizeof *mad_send_wr);
963 
964 		mad_send_wr->send_wr = *send_wr;
965 		mad_send_wr->send_wr.sg_list = mad_send_wr->sg_list;
966 		memcpy(mad_send_wr->sg_list, send_wr->sg_list,
967 		       sizeof *send_wr->sg_list * send_wr->num_sge);
968 		mad_send_wr->wr_id = send_wr->wr_id;
969 		mad_send_wr->tid = send_wr->wr.ud.mad_hdr->tid;
970 		mad_send_wr->mad_agent_priv = mad_agent_priv;
971 		/* Timeout will be updated after send completes */
972 		mad_send_wr->timeout = msecs_to_jiffies(send_wr->wr.
973 							ud.timeout_ms);
974 		mad_send_wr->retries = mad_send_wr->send_wr.wr.ud.retries;
975 		/* One reference for each work request to QP + response */
976 		mad_send_wr->refcount = 1 + (mad_send_wr->timeout > 0);
977 		mad_send_wr->status = IB_WC_SUCCESS;
978 
979 		/* Reference MAD agent until send completes */
980 		atomic_inc(&mad_agent_priv->refcount);
981 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
982 		list_add_tail(&mad_send_wr->agent_list,
983 			      &mad_agent_priv->send_list);
984 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
985 
986 		if (mad_agent_priv->agent.rmpp_version) {
987 			ret = ib_send_rmpp_mad(mad_send_wr);
988 			if (ret >= 0 && ret != IB_RMPP_RESULT_CONSUMED)
989 				ret = ib_send_mad(mad_send_wr);
990 		} else
991 			ret = ib_send_mad(mad_send_wr);
992 		if (ret < 0) {
993 			/* Fail send request */
994 			spin_lock_irqsave(&mad_agent_priv->lock, flags);
995 			list_del(&mad_send_wr->agent_list);
996 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
997 			atomic_dec(&mad_agent_priv->refcount);
998 			goto error2;
999 		}
1000 next:
1001 		send_wr = next_send_wr;
1002 	}
1003 	return 0;
1004 
1005 error2:
1006 	*bad_send_wr = send_wr;
1007 error1:
1008 	return ret;
1009 }
1010 EXPORT_SYMBOL(ib_post_send_mad);
1011 
1012 /*
1013  * ib_free_recv_mad - Returns data buffers used to receive
1014  *  a MAD to the access layer
1015  */
1016 void ib_free_recv_mad(struct ib_mad_recv_wc *mad_recv_wc)
1017 {
1018 	struct ib_mad_recv_buf *mad_recv_buf, *temp_recv_buf;
1019 	struct ib_mad_private_header *mad_priv_hdr;
1020 	struct ib_mad_private *priv;
1021 	struct list_head free_list;
1022 
1023 	INIT_LIST_HEAD(&free_list);
1024 	list_splice_init(&mad_recv_wc->rmpp_list, &free_list);
1025 
1026 	list_for_each_entry_safe(mad_recv_buf, temp_recv_buf,
1027 					&free_list, list) {
1028 		mad_recv_wc = container_of(mad_recv_buf, struct ib_mad_recv_wc,
1029 					   recv_buf);
1030 		mad_priv_hdr = container_of(mad_recv_wc,
1031 					    struct ib_mad_private_header,
1032 					    recv_wc);
1033 		priv = container_of(mad_priv_hdr, struct ib_mad_private,
1034 				    header);
1035 		kmem_cache_free(ib_mad_cache, priv);
1036 	}
1037 }
1038 EXPORT_SYMBOL(ib_free_recv_mad);
1039 
1040 struct ib_mad_agent *ib_redirect_mad_qp(struct ib_qp *qp,
1041 					u8 rmpp_version,
1042 					ib_mad_send_handler send_handler,
1043 					ib_mad_recv_handler recv_handler,
1044 					void *context)
1045 {
1046 	return ERR_PTR(-EINVAL);	/* XXX: for now */
1047 }
1048 EXPORT_SYMBOL(ib_redirect_mad_qp);
1049 
1050 int ib_process_mad_wc(struct ib_mad_agent *mad_agent,
1051 		      struct ib_wc *wc)
1052 {
1053 	printk(KERN_ERR PFX "ib_process_mad_wc() not implemented yet\n");
1054 	return 0;
1055 }
1056 EXPORT_SYMBOL(ib_process_mad_wc);
1057 
1058 static int method_in_use(struct ib_mad_mgmt_method_table **method,
1059 			 struct ib_mad_reg_req *mad_reg_req)
1060 {
1061 	int i;
1062 
1063 	for (i = find_first_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS);
1064 	     i < IB_MGMT_MAX_METHODS;
1065 	     i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1066 			       1+i)) {
1067 		if ((*method)->agent[i]) {
1068 			printk(KERN_ERR PFX "Method %d already in use\n", i);
1069 			return -EINVAL;
1070 		}
1071 	}
1072 	return 0;
1073 }
1074 
1075 static int allocate_method_table(struct ib_mad_mgmt_method_table **method)
1076 {
1077 	/* Allocate management method table */
1078 	*method = kmalloc(sizeof **method, GFP_ATOMIC);
1079 	if (!*method) {
1080 		printk(KERN_ERR PFX "No memory for "
1081 		       "ib_mad_mgmt_method_table\n");
1082 		return -ENOMEM;
1083 	}
1084 	/* Clear management method table */
1085 	memset(*method, 0, sizeof **method);
1086 
1087 	return 0;
1088 }
1089 
1090 /*
1091  * Check to see if there are any methods still in use
1092  */
1093 static int check_method_table(struct ib_mad_mgmt_method_table *method)
1094 {
1095 	int i;
1096 
1097 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++)
1098 		if (method->agent[i])
1099 			return 1;
1100 	return 0;
1101 }
1102 
1103 /*
1104  * Check to see if there are any method tables for this class still in use
1105  */
1106 static int check_class_table(struct ib_mad_mgmt_class_table *class)
1107 {
1108 	int i;
1109 
1110 	for (i = 0; i < MAX_MGMT_CLASS; i++)
1111 		if (class->method_table[i])
1112 			return 1;
1113 	return 0;
1114 }
1115 
1116 static int check_vendor_class(struct ib_mad_mgmt_vendor_class *vendor_class)
1117 {
1118 	int i;
1119 
1120 	for (i = 0; i < MAX_MGMT_OUI; i++)
1121 		if (vendor_class->method_table[i])
1122 			return 1;
1123 	return 0;
1124 }
1125 
1126 static int find_vendor_oui(struct ib_mad_mgmt_vendor_class *vendor_class,
1127 			   char *oui)
1128 {
1129 	int i;
1130 
1131 	for (i = 0; i < MAX_MGMT_OUI; i++)
1132                 /* Is there matching OUI for this vendor class ? */
1133                 if (!memcmp(vendor_class->oui[i], oui, 3))
1134 			return i;
1135 
1136 	return -1;
1137 }
1138 
1139 static int check_vendor_table(struct ib_mad_mgmt_vendor_class_table *vendor)
1140 {
1141 	int i;
1142 
1143 	for (i = 0; i < MAX_MGMT_VENDOR_RANGE2; i++)
1144 		if (vendor->vendor_class[i])
1145 			return 1;
1146 
1147 	return 0;
1148 }
1149 
1150 static void remove_methods_mad_agent(struct ib_mad_mgmt_method_table *method,
1151 				     struct ib_mad_agent_private *agent)
1152 {
1153 	int i;
1154 
1155 	/* Remove any methods for this mad agent */
1156 	for (i = 0; i < IB_MGMT_MAX_METHODS; i++) {
1157 		if (method->agent[i] == agent) {
1158 			method->agent[i] = NULL;
1159 		}
1160 	}
1161 }
1162 
1163 static int add_nonoui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1164 			      struct ib_mad_agent_private *agent_priv,
1165 			      u8 mgmt_class)
1166 {
1167 	struct ib_mad_port_private *port_priv;
1168 	struct ib_mad_mgmt_class_table **class;
1169 	struct ib_mad_mgmt_method_table **method;
1170 	int i, ret;
1171 
1172 	port_priv = agent_priv->qp_info->port_priv;
1173 	class = &port_priv->version[mad_reg_req->mgmt_class_version].class;
1174 	if (!*class) {
1175 		/* Allocate management class table for "new" class version */
1176 		*class = kmalloc(sizeof **class, GFP_ATOMIC);
1177 		if (!*class) {
1178 			printk(KERN_ERR PFX "No memory for "
1179 			       "ib_mad_mgmt_class_table\n");
1180 			ret = -ENOMEM;
1181 			goto error1;
1182 		}
1183 		/* Clear management class table */
1184 		memset(*class, 0, sizeof(**class));
1185 		/* Allocate method table for this management class */
1186 		method = &(*class)->method_table[mgmt_class];
1187 		if ((ret = allocate_method_table(method)))
1188 			goto error2;
1189 	} else {
1190 		method = &(*class)->method_table[mgmt_class];
1191 		if (!*method) {
1192 			/* Allocate method table for this management class */
1193 			if ((ret = allocate_method_table(method)))
1194 				goto error1;
1195 		}
1196 	}
1197 
1198 	/* Now, make sure methods are not already in use */
1199 	if (method_in_use(method, mad_reg_req))
1200 		goto error3;
1201 
1202 	/* Finally, add in methods being registered */
1203 	for (i = find_first_bit(mad_reg_req->method_mask,
1204 				IB_MGMT_MAX_METHODS);
1205 	     i < IB_MGMT_MAX_METHODS;
1206 	     i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1207 			       1+i)) {
1208 		(*method)->agent[i] = agent_priv;
1209 	}
1210 	return 0;
1211 
1212 error3:
1213 	/* Remove any methods for this mad agent */
1214 	remove_methods_mad_agent(*method, agent_priv);
1215 	/* Now, check to see if there are any methods in use */
1216 	if (!check_method_table(*method)) {
1217 		/* If not, release management method table */
1218 		kfree(*method);
1219 		*method = NULL;
1220 	}
1221 	ret = -EINVAL;
1222 	goto error1;
1223 error2:
1224 	kfree(*class);
1225 	*class = NULL;
1226 error1:
1227 	return ret;
1228 }
1229 
1230 static int add_oui_reg_req(struct ib_mad_reg_req *mad_reg_req,
1231 			   struct ib_mad_agent_private *agent_priv)
1232 {
1233 	struct ib_mad_port_private *port_priv;
1234 	struct ib_mad_mgmt_vendor_class_table **vendor_table;
1235 	struct ib_mad_mgmt_vendor_class_table *vendor = NULL;
1236 	struct ib_mad_mgmt_vendor_class *vendor_class = NULL;
1237 	struct ib_mad_mgmt_method_table **method;
1238 	int i, ret = -ENOMEM;
1239 	u8 vclass;
1240 
1241 	/* "New" vendor (with OUI) class */
1242 	vclass = vendor_class_index(mad_reg_req->mgmt_class);
1243 	port_priv = agent_priv->qp_info->port_priv;
1244 	vendor_table = &port_priv->version[
1245 				mad_reg_req->mgmt_class_version].vendor;
1246 	if (!*vendor_table) {
1247 		/* Allocate mgmt vendor class table for "new" class version */
1248 		vendor = kmalloc(sizeof *vendor, GFP_ATOMIC);
1249 		if (!vendor) {
1250 			printk(KERN_ERR PFX "No memory for "
1251 			       "ib_mad_mgmt_vendor_class_table\n");
1252 			goto error1;
1253 		}
1254 		/* Clear management vendor class table */
1255 		memset(vendor, 0, sizeof(*vendor));
1256 		*vendor_table = vendor;
1257 	}
1258 	if (!(*vendor_table)->vendor_class[vclass]) {
1259 		/* Allocate table for this management vendor class */
1260 		vendor_class = kmalloc(sizeof *vendor_class, GFP_ATOMIC);
1261 		if (!vendor_class) {
1262 			printk(KERN_ERR PFX "No memory for "
1263 			       "ib_mad_mgmt_vendor_class\n");
1264 			goto error2;
1265 		}
1266 		memset(vendor_class, 0, sizeof(*vendor_class));
1267 		(*vendor_table)->vendor_class[vclass] = vendor_class;
1268 	}
1269 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1270 		/* Is there matching OUI for this vendor class ? */
1271 		if (!memcmp((*vendor_table)->vendor_class[vclass]->oui[i],
1272 			    mad_reg_req->oui, 3)) {
1273 			method = &(*vendor_table)->vendor_class[
1274 						vclass]->method_table[i];
1275 			BUG_ON(!*method);
1276 			goto check_in_use;
1277 		}
1278 	}
1279 	for (i = 0; i < MAX_MGMT_OUI; i++) {
1280 		/* OUI slot available ? */
1281 		if (!is_vendor_oui((*vendor_table)->vendor_class[
1282 				vclass]->oui[i])) {
1283 			method = &(*vendor_table)->vendor_class[
1284 				vclass]->method_table[i];
1285 			BUG_ON(*method);
1286 			/* Allocate method table for this OUI */
1287 			if ((ret = allocate_method_table(method)))
1288 				goto error3;
1289 			memcpy((*vendor_table)->vendor_class[vclass]->oui[i],
1290 			       mad_reg_req->oui, 3);
1291 			goto check_in_use;
1292 		}
1293 	}
1294 	printk(KERN_ERR PFX "All OUI slots in use\n");
1295 	goto error3;
1296 
1297 check_in_use:
1298 	/* Now, make sure methods are not already in use */
1299 	if (method_in_use(method, mad_reg_req))
1300 		goto error4;
1301 
1302 	/* Finally, add in methods being registered */
1303 	for (i = find_first_bit(mad_reg_req->method_mask,
1304 				IB_MGMT_MAX_METHODS);
1305 	     i < IB_MGMT_MAX_METHODS;
1306 	     i = find_next_bit(mad_reg_req->method_mask, IB_MGMT_MAX_METHODS,
1307 			       1+i)) {
1308 		(*method)->agent[i] = agent_priv;
1309 	}
1310 	return 0;
1311 
1312 error4:
1313 	/* Remove any methods for this mad agent */
1314 	remove_methods_mad_agent(*method, agent_priv);
1315 	/* Now, check to see if there are any methods in use */
1316 	if (!check_method_table(*method)) {
1317 		/* If not, release management method table */
1318 		kfree(*method);
1319 		*method = NULL;
1320 	}
1321 	ret = -EINVAL;
1322 error3:
1323 	if (vendor_class) {
1324 		(*vendor_table)->vendor_class[vclass] = NULL;
1325 		kfree(vendor_class);
1326 	}
1327 error2:
1328 	if (vendor) {
1329 		*vendor_table = NULL;
1330 		kfree(vendor);
1331 	}
1332 error1:
1333 	return ret;
1334 }
1335 
1336 static void remove_mad_reg_req(struct ib_mad_agent_private *agent_priv)
1337 {
1338 	struct ib_mad_port_private *port_priv;
1339 	struct ib_mad_mgmt_class_table *class;
1340 	struct ib_mad_mgmt_method_table *method;
1341 	struct ib_mad_mgmt_vendor_class_table *vendor;
1342 	struct ib_mad_mgmt_vendor_class *vendor_class;
1343 	int index;
1344 	u8 mgmt_class;
1345 
1346 	/*
1347 	 * Was MAD registration request supplied
1348 	 * with original registration ?
1349 	 */
1350 	if (!agent_priv->reg_req) {
1351 		goto out;
1352 	}
1353 
1354 	port_priv = agent_priv->qp_info->port_priv;
1355 	mgmt_class = convert_mgmt_class(agent_priv->reg_req->mgmt_class);
1356 	class = port_priv->version[
1357 			agent_priv->reg_req->mgmt_class_version].class;
1358 	if (!class)
1359 		goto vendor_check;
1360 
1361 	method = class->method_table[mgmt_class];
1362 	if (method) {
1363 		/* Remove any methods for this mad agent */
1364 		remove_methods_mad_agent(method, agent_priv);
1365 		/* Now, check to see if there are any methods still in use */
1366 		if (!check_method_table(method)) {
1367 			/* If not, release management method table */
1368 			 kfree(method);
1369 			 class->method_table[mgmt_class] = NULL;
1370 			 /* Any management classes left ? */
1371 			if (!check_class_table(class)) {
1372 				/* If not, release management class table */
1373 				kfree(class);
1374 				port_priv->version[
1375 					agent_priv->reg_req->
1376 					mgmt_class_version].class = NULL;
1377 			}
1378 		}
1379 	}
1380 
1381 vendor_check:
1382 	if (!is_vendor_class(mgmt_class))
1383 		goto out;
1384 
1385 	/* normalize mgmt_class to vendor range 2 */
1386 	mgmt_class = vendor_class_index(agent_priv->reg_req->mgmt_class);
1387 	vendor = port_priv->version[
1388 			agent_priv->reg_req->mgmt_class_version].vendor;
1389 
1390 	if (!vendor)
1391 		goto out;
1392 
1393 	vendor_class = vendor->vendor_class[mgmt_class];
1394 	if (vendor_class) {
1395 		index = find_vendor_oui(vendor_class, agent_priv->reg_req->oui);
1396 		if (index < 0)
1397 			goto out;
1398 		method = vendor_class->method_table[index];
1399 		if (method) {
1400 			/* Remove any methods for this mad agent */
1401 			remove_methods_mad_agent(method, agent_priv);
1402 			/*
1403 			 * Now, check to see if there are
1404 			 * any methods still in use
1405 			 */
1406 			if (!check_method_table(method)) {
1407 				/* If not, release management method table */
1408 				kfree(method);
1409 				vendor_class->method_table[index] = NULL;
1410 				memset(vendor_class->oui[index], 0, 3);
1411 				/* Any OUIs left ? */
1412 				if (!check_vendor_class(vendor_class)) {
1413 					/* If not, release vendor class table */
1414 					kfree(vendor_class);
1415 					vendor->vendor_class[mgmt_class] = NULL;
1416 					/* Any other vendor classes left ? */
1417 					if (!check_vendor_table(vendor)) {
1418 						kfree(vendor);
1419 						port_priv->version[
1420 							agent_priv->reg_req->
1421 							mgmt_class_version].
1422 							vendor = NULL;
1423 					}
1424 				}
1425 			}
1426 		}
1427 	}
1428 
1429 out:
1430 	return;
1431 }
1432 
1433 static struct ib_mad_agent_private *
1434 find_mad_agent(struct ib_mad_port_private *port_priv,
1435 	       struct ib_mad *mad)
1436 {
1437 	struct ib_mad_agent_private *mad_agent = NULL;
1438 	unsigned long flags;
1439 
1440 	spin_lock_irqsave(&port_priv->reg_lock, flags);
1441 	if (response_mad(mad)) {
1442 		u32 hi_tid;
1443 		struct ib_mad_agent_private *entry;
1444 
1445 		/*
1446 		 * Routing is based on high 32 bits of transaction ID
1447 		 * of MAD.
1448 		 */
1449 		hi_tid = be64_to_cpu(mad->mad_hdr.tid) >> 32;
1450 		list_for_each_entry(entry, &port_priv->agent_list,
1451 				    agent_list) {
1452 			if (entry->agent.hi_tid == hi_tid) {
1453 				mad_agent = entry;
1454 				break;
1455 			}
1456 		}
1457 	} else {
1458 		struct ib_mad_mgmt_class_table *class;
1459 		struct ib_mad_mgmt_method_table *method;
1460 		struct ib_mad_mgmt_vendor_class_table *vendor;
1461 		struct ib_mad_mgmt_vendor_class *vendor_class;
1462 		struct ib_vendor_mad *vendor_mad;
1463 		int index;
1464 
1465 		/*
1466 		 * Routing is based on version, class, and method
1467 		 * For "newer" vendor MADs, also based on OUI
1468 		 */
1469 		if (mad->mad_hdr.class_version >= MAX_MGMT_VERSION)
1470 			goto out;
1471 		if (!is_vendor_class(mad->mad_hdr.mgmt_class)) {
1472 			class = port_priv->version[
1473 					mad->mad_hdr.class_version].class;
1474 			if (!class)
1475 				goto out;
1476 			method = class->method_table[convert_mgmt_class(
1477 							mad->mad_hdr.mgmt_class)];
1478 			if (method)
1479 				mad_agent = method->agent[mad->mad_hdr.method &
1480 							  ~IB_MGMT_METHOD_RESP];
1481 		} else {
1482 			vendor = port_priv->version[
1483 					mad->mad_hdr.class_version].vendor;
1484 			if (!vendor)
1485 				goto out;
1486 			vendor_class = vendor->vendor_class[vendor_class_index(
1487 						mad->mad_hdr.mgmt_class)];
1488 			if (!vendor_class)
1489 				goto out;
1490 			/* Find matching OUI */
1491 			vendor_mad = (struct ib_vendor_mad *)mad;
1492 			index = find_vendor_oui(vendor_class, vendor_mad->oui);
1493 			if (index == -1)
1494 				goto out;
1495 			method = vendor_class->method_table[index];
1496 			if (method) {
1497 				mad_agent = method->agent[mad->mad_hdr.method &
1498 							  ~IB_MGMT_METHOD_RESP];
1499 			}
1500 		}
1501 	}
1502 
1503 	if (mad_agent) {
1504 		if (mad_agent->agent.recv_handler)
1505 			atomic_inc(&mad_agent->refcount);
1506 		else {
1507 			printk(KERN_NOTICE PFX "No receive handler for client "
1508 			       "%p on port %d\n",
1509 			       &mad_agent->agent, port_priv->port_num);
1510 			mad_agent = NULL;
1511 		}
1512 	}
1513 out:
1514 	spin_unlock_irqrestore(&port_priv->reg_lock, flags);
1515 
1516 	return mad_agent;
1517 }
1518 
1519 static int validate_mad(struct ib_mad *mad, u32 qp_num)
1520 {
1521 	int valid = 0;
1522 
1523 	/* Make sure MAD base version is understood */
1524 	if (mad->mad_hdr.base_version != IB_MGMT_BASE_VERSION) {
1525 		printk(KERN_ERR PFX "MAD received with unsupported base "
1526 		       "version %d\n", mad->mad_hdr.base_version);
1527 		goto out;
1528 	}
1529 
1530 	/* Filter SMI packets sent to other than QP0 */
1531 	if ((mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_LID_ROUTED) ||
1532 	    (mad->mad_hdr.mgmt_class == IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)) {
1533 		if (qp_num == 0)
1534 			valid = 1;
1535 	} else {
1536 		/* Filter GSI packets sent to QP0 */
1537 		if (qp_num != 0)
1538 			valid = 1;
1539 	}
1540 
1541 out:
1542 	return valid;
1543 }
1544 
1545 static int is_data_mad(struct ib_mad_agent_private *mad_agent_priv,
1546 		       struct ib_mad_hdr *mad_hdr)
1547 {
1548 	struct ib_rmpp_mad *rmpp_mad;
1549 
1550 	rmpp_mad = (struct ib_rmpp_mad *)mad_hdr;
1551 	return !mad_agent_priv->agent.rmpp_version ||
1552 		!(ib_get_rmpp_flags(&rmpp_mad->rmpp_hdr) &
1553 				    IB_MGMT_RMPP_FLAG_ACTIVE) ||
1554 		(rmpp_mad->rmpp_hdr.rmpp_type == IB_MGMT_RMPP_TYPE_DATA);
1555 }
1556 
1557 struct ib_mad_send_wr_private*
1558 ib_find_send_mad(struct ib_mad_agent_private *mad_agent_priv, __be64 tid)
1559 {
1560 	struct ib_mad_send_wr_private *mad_send_wr;
1561 
1562 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
1563 			    agent_list) {
1564 		if (mad_send_wr->tid == tid)
1565 			return mad_send_wr;
1566 	}
1567 
1568 	/*
1569 	 * It's possible to receive the response before we've
1570 	 * been notified that the send has completed
1571 	 */
1572 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
1573 			    agent_list) {
1574 		if (is_data_mad(mad_agent_priv,
1575 				mad_send_wr->send_wr.wr.ud.mad_hdr) &&
1576 		    mad_send_wr->tid == tid && mad_send_wr->timeout) {
1577 			/* Verify request has not been canceled */
1578 			return (mad_send_wr->status == IB_WC_SUCCESS) ?
1579 				mad_send_wr : NULL;
1580 		}
1581 	}
1582 	return NULL;
1583 }
1584 
1585 void ib_mark_mad_done(struct ib_mad_send_wr_private *mad_send_wr)
1586 {
1587 	mad_send_wr->timeout = 0;
1588 	if (mad_send_wr->refcount == 1) {
1589 		list_del(&mad_send_wr->agent_list);
1590 		list_add_tail(&mad_send_wr->agent_list,
1591 			      &mad_send_wr->mad_agent_priv->done_list);
1592 	}
1593 }
1594 
1595 static void ib_mad_complete_recv(struct ib_mad_agent_private *mad_agent_priv,
1596 				 struct ib_mad_recv_wc *mad_recv_wc)
1597 {
1598 	struct ib_mad_send_wr_private *mad_send_wr;
1599 	struct ib_mad_send_wc mad_send_wc;
1600 	unsigned long flags;
1601 	__be64 tid;
1602 
1603 	INIT_LIST_HEAD(&mad_recv_wc->rmpp_list);
1604 	list_add(&mad_recv_wc->recv_buf.list, &mad_recv_wc->rmpp_list);
1605 	if (mad_agent_priv->agent.rmpp_version) {
1606 		mad_recv_wc = ib_process_rmpp_recv_wc(mad_agent_priv,
1607 						      mad_recv_wc);
1608 		if (!mad_recv_wc) {
1609 			if (atomic_dec_and_test(&mad_agent_priv->refcount))
1610 				wake_up(&mad_agent_priv->wait);
1611 			return;
1612 		}
1613 	}
1614 
1615 	/* Complete corresponding request */
1616 	if (response_mad(mad_recv_wc->recv_buf.mad)) {
1617 		tid = mad_recv_wc->recv_buf.mad->mad_hdr.tid;
1618 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
1619 		mad_send_wr = ib_find_send_mad(mad_agent_priv, tid);
1620 		if (!mad_send_wr) {
1621 			spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1622 			ib_free_recv_mad(mad_recv_wc);
1623 			if (atomic_dec_and_test(&mad_agent_priv->refcount))
1624 				wake_up(&mad_agent_priv->wait);
1625 			return;
1626 		}
1627 		ib_mark_mad_done(mad_send_wr);
1628 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1629 
1630 		/* Defined behavior is to complete response before request */
1631 		mad_recv_wc->wc->wr_id = mad_send_wr->wr_id;
1632 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1633 						   mad_recv_wc);
1634 		atomic_dec(&mad_agent_priv->refcount);
1635 
1636 		mad_send_wc.status = IB_WC_SUCCESS;
1637 		mad_send_wc.vendor_err = 0;
1638 		mad_send_wc.wr_id = mad_send_wr->wr_id;
1639 		ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc);
1640 	} else {
1641 		mad_agent_priv->agent.recv_handler(&mad_agent_priv->agent,
1642 						   mad_recv_wc);
1643 		if (atomic_dec_and_test(&mad_agent_priv->refcount))
1644 			wake_up(&mad_agent_priv->wait);
1645 	}
1646 }
1647 
1648 static void ib_mad_recv_done_handler(struct ib_mad_port_private *port_priv,
1649 				     struct ib_wc *wc)
1650 {
1651 	struct ib_mad_qp_info *qp_info;
1652 	struct ib_mad_private_header *mad_priv_hdr;
1653 	struct ib_mad_private *recv, *response;
1654 	struct ib_mad_list_head *mad_list;
1655 	struct ib_mad_agent_private *mad_agent;
1656 
1657 	response = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
1658 	if (!response)
1659 		printk(KERN_ERR PFX "ib_mad_recv_done_handler no memory "
1660 		       "for response buffer\n");
1661 
1662 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1663 	qp_info = mad_list->mad_queue->qp_info;
1664 	dequeue_mad(mad_list);
1665 
1666 	mad_priv_hdr = container_of(mad_list, struct ib_mad_private_header,
1667 				    mad_list);
1668 	recv = container_of(mad_priv_hdr, struct ib_mad_private, header);
1669 	dma_unmap_single(port_priv->device->dma_device,
1670 			 pci_unmap_addr(&recv->header, mapping),
1671 			 sizeof(struct ib_mad_private) -
1672 			 sizeof(struct ib_mad_private_header),
1673 			 DMA_FROM_DEVICE);
1674 
1675 	/* Setup MAD receive work completion from "normal" work completion */
1676 	recv->header.wc = *wc;
1677 	recv->header.recv_wc.wc = &recv->header.wc;
1678 	recv->header.recv_wc.mad_len = sizeof(struct ib_mad);
1679 	recv->header.recv_wc.recv_buf.mad = &recv->mad.mad;
1680 	recv->header.recv_wc.recv_buf.grh = &recv->grh;
1681 
1682 	if (atomic_read(&qp_info->snoop_count))
1683 		snoop_recv(qp_info, &recv->header.recv_wc, IB_MAD_SNOOP_RECVS);
1684 
1685 	/* Validate MAD */
1686 	if (!validate_mad(&recv->mad.mad, qp_info->qp->qp_num))
1687 		goto out;
1688 
1689 	if (recv->mad.mad.mad_hdr.mgmt_class ==
1690 	    IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE) {
1691 		if (!smi_handle_dr_smp_recv(&recv->mad.smp,
1692 					    port_priv->device->node_type,
1693 					    port_priv->port_num,
1694 					    port_priv->device->phys_port_cnt))
1695 			goto out;
1696 		if (!smi_check_forward_dr_smp(&recv->mad.smp))
1697 			goto local;
1698 		if (!smi_handle_dr_smp_send(&recv->mad.smp,
1699 					    port_priv->device->node_type,
1700 					    port_priv->port_num))
1701 			goto out;
1702 		if (!smi_check_local_dr_smp(&recv->mad.smp,
1703 					    port_priv->device,
1704 					    port_priv->port_num))
1705 			goto out;
1706 	}
1707 
1708 local:
1709 	/* Give driver "right of first refusal" on incoming MAD */
1710 	if (port_priv->device->process_mad) {
1711 		int ret;
1712 
1713 		if (!response) {
1714 			printk(KERN_ERR PFX "No memory for response MAD\n");
1715 			/*
1716 			 * Is it better to assume that
1717 			 * it wouldn't be processed ?
1718 			 */
1719 			goto out;
1720 		}
1721 
1722 		ret = port_priv->device->process_mad(port_priv->device, 0,
1723 						     port_priv->port_num,
1724 						     wc, &recv->grh,
1725 						     &recv->mad.mad,
1726 						     &response->mad.mad);
1727 		if (ret & IB_MAD_RESULT_SUCCESS) {
1728 			if (ret & IB_MAD_RESULT_CONSUMED)
1729 				goto out;
1730 			if (ret & IB_MAD_RESULT_REPLY) {
1731 				/* Send response */
1732 				if (!agent_send(response, &recv->grh, wc,
1733 						port_priv->device,
1734 						port_priv->port_num))
1735 					response = NULL;
1736 				goto out;
1737 			}
1738 		}
1739 	}
1740 
1741 	mad_agent = find_mad_agent(port_priv, &recv->mad.mad);
1742 	if (mad_agent) {
1743 		ib_mad_complete_recv(mad_agent, &recv->header.recv_wc);
1744 		/*
1745 		 * recv is freed up in error cases in ib_mad_complete_recv
1746 		 * or via recv_handler in ib_mad_complete_recv()
1747 		 */
1748 		recv = NULL;
1749 	}
1750 
1751 out:
1752 	/* Post another receive request for this QP */
1753 	if (response) {
1754 		ib_mad_post_receive_mads(qp_info, response);
1755 		if (recv)
1756 			kmem_cache_free(ib_mad_cache, recv);
1757 	} else
1758 		ib_mad_post_receive_mads(qp_info, recv);
1759 }
1760 
1761 static void adjust_timeout(struct ib_mad_agent_private *mad_agent_priv)
1762 {
1763 	struct ib_mad_send_wr_private *mad_send_wr;
1764 	unsigned long delay;
1765 
1766 	if (list_empty(&mad_agent_priv->wait_list)) {
1767 		cancel_delayed_work(&mad_agent_priv->timed_work);
1768 	} else {
1769 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
1770 					 struct ib_mad_send_wr_private,
1771 					 agent_list);
1772 
1773 		if (time_after(mad_agent_priv->timeout,
1774 			       mad_send_wr->timeout)) {
1775 			mad_agent_priv->timeout = mad_send_wr->timeout;
1776 			cancel_delayed_work(&mad_agent_priv->timed_work);
1777 			delay = mad_send_wr->timeout - jiffies;
1778 			if ((long)delay <= 0)
1779 				delay = 1;
1780 			queue_delayed_work(mad_agent_priv->qp_info->
1781 					   port_priv->wq,
1782 					   &mad_agent_priv->timed_work, delay);
1783 		}
1784 	}
1785 }
1786 
1787 static void wait_for_response(struct ib_mad_send_wr_private *mad_send_wr)
1788 {
1789 	struct ib_mad_agent_private *mad_agent_priv;
1790 	struct ib_mad_send_wr_private *temp_mad_send_wr;
1791 	struct list_head *list_item;
1792 	unsigned long delay;
1793 
1794 	mad_agent_priv = mad_send_wr->mad_agent_priv;
1795 	list_del(&mad_send_wr->agent_list);
1796 
1797 	delay = mad_send_wr->timeout;
1798 	mad_send_wr->timeout += jiffies;
1799 
1800 	if (delay) {
1801 		list_for_each_prev(list_item, &mad_agent_priv->wait_list) {
1802 			temp_mad_send_wr = list_entry(list_item,
1803 						struct ib_mad_send_wr_private,
1804 						agent_list);
1805 			if (time_after(mad_send_wr->timeout,
1806 				       temp_mad_send_wr->timeout))
1807 				break;
1808 		}
1809 	}
1810 	else
1811 		list_item = &mad_agent_priv->wait_list;
1812 	list_add(&mad_send_wr->agent_list, list_item);
1813 
1814 	/* Reschedule a work item if we have a shorter timeout */
1815 	if (mad_agent_priv->wait_list.next == &mad_send_wr->agent_list) {
1816 		cancel_delayed_work(&mad_agent_priv->timed_work);
1817 		queue_delayed_work(mad_agent_priv->qp_info->port_priv->wq,
1818 				   &mad_agent_priv->timed_work, delay);
1819 	}
1820 }
1821 
1822 void ib_reset_mad_timeout(struct ib_mad_send_wr_private *mad_send_wr,
1823 			  int timeout_ms)
1824 {
1825 	mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
1826 	wait_for_response(mad_send_wr);
1827 }
1828 
1829 /*
1830  * Process a send work completion
1831  */
1832 void ib_mad_complete_send_wr(struct ib_mad_send_wr_private *mad_send_wr,
1833 			     struct ib_mad_send_wc *mad_send_wc)
1834 {
1835 	struct ib_mad_agent_private	*mad_agent_priv;
1836 	unsigned long			flags;
1837 	int				ret;
1838 
1839 	mad_agent_priv = mad_send_wr->mad_agent_priv;
1840 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
1841 	if (mad_agent_priv->agent.rmpp_version) {
1842 		ret = ib_process_rmpp_send_wc(mad_send_wr, mad_send_wc);
1843 		if (ret == IB_RMPP_RESULT_CONSUMED)
1844 			goto done;
1845 	} else
1846 		ret = IB_RMPP_RESULT_UNHANDLED;
1847 
1848 	if (mad_send_wc->status != IB_WC_SUCCESS &&
1849 	    mad_send_wr->status == IB_WC_SUCCESS) {
1850 		mad_send_wr->status = mad_send_wc->status;
1851 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
1852 	}
1853 
1854 	if (--mad_send_wr->refcount > 0) {
1855 		if (mad_send_wr->refcount == 1 && mad_send_wr->timeout &&
1856 		    mad_send_wr->status == IB_WC_SUCCESS) {
1857 			wait_for_response(mad_send_wr);
1858 		}
1859 		goto done;
1860 	}
1861 
1862 	/* Remove send from MAD agent and notify client of completion */
1863 	list_del(&mad_send_wr->agent_list);
1864 	adjust_timeout(mad_agent_priv);
1865 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1866 
1867 	if (mad_send_wr->status != IB_WC_SUCCESS )
1868 		mad_send_wc->status = mad_send_wr->status;
1869 	if (ret != IB_RMPP_RESULT_INTERNAL)
1870 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
1871 						   mad_send_wc);
1872 
1873 	/* Release reference on agent taken when sending */
1874 	if (atomic_dec_and_test(&mad_agent_priv->refcount))
1875 		wake_up(&mad_agent_priv->wait);
1876 
1877 	kfree(mad_send_wr);
1878 	return;
1879 done:
1880 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
1881 }
1882 
1883 static void ib_mad_send_done_handler(struct ib_mad_port_private *port_priv,
1884 				     struct ib_wc *wc)
1885 {
1886 	struct ib_mad_send_wr_private	*mad_send_wr, *queued_send_wr;
1887 	struct ib_mad_list_head		*mad_list;
1888 	struct ib_mad_qp_info		*qp_info;
1889 	struct ib_mad_queue		*send_queue;
1890 	struct ib_send_wr		*bad_send_wr;
1891 	unsigned long flags;
1892 	int ret;
1893 
1894 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1895 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
1896 				   mad_list);
1897 	send_queue = mad_list->mad_queue;
1898 	qp_info = send_queue->qp_info;
1899 
1900 retry:
1901 	queued_send_wr = NULL;
1902 	spin_lock_irqsave(&send_queue->lock, flags);
1903 	list_del(&mad_list->list);
1904 
1905 	/* Move queued send to the send queue */
1906 	if (send_queue->count-- > send_queue->max_active) {
1907 		mad_list = container_of(qp_info->overflow_list.next,
1908 					struct ib_mad_list_head, list);
1909 		queued_send_wr = container_of(mad_list,
1910 					struct ib_mad_send_wr_private,
1911 					mad_list);
1912 		list_del(&mad_list->list);
1913 		list_add_tail(&mad_list->list, &send_queue->list);
1914 	}
1915 	spin_unlock_irqrestore(&send_queue->lock, flags);
1916 
1917 	/* Restore client wr_id in WC and complete send */
1918 	wc->wr_id = mad_send_wr->wr_id;
1919 	if (atomic_read(&qp_info->snoop_count))
1920 		snoop_send(qp_info, &mad_send_wr->send_wr,
1921 			   (struct ib_mad_send_wc *)wc,
1922 			   IB_MAD_SNOOP_SEND_COMPLETIONS);
1923 	ib_mad_complete_send_wr(mad_send_wr, (struct ib_mad_send_wc *)wc);
1924 
1925 	if (queued_send_wr) {
1926 		ret = ib_post_send(qp_info->qp, &queued_send_wr->send_wr,
1927 				&bad_send_wr);
1928 		if (ret) {
1929 			printk(KERN_ERR PFX "ib_post_send failed: %d\n", ret);
1930 			mad_send_wr = queued_send_wr;
1931 			wc->status = IB_WC_LOC_QP_OP_ERR;
1932 			goto retry;
1933 		}
1934 	}
1935 }
1936 
1937 static void mark_sends_for_retry(struct ib_mad_qp_info *qp_info)
1938 {
1939 	struct ib_mad_send_wr_private *mad_send_wr;
1940 	struct ib_mad_list_head *mad_list;
1941 	unsigned long flags;
1942 
1943 	spin_lock_irqsave(&qp_info->send_queue.lock, flags);
1944 	list_for_each_entry(mad_list, &qp_info->send_queue.list, list) {
1945 		mad_send_wr = container_of(mad_list,
1946 					   struct ib_mad_send_wr_private,
1947 					   mad_list);
1948 		mad_send_wr->retry = 1;
1949 	}
1950 	spin_unlock_irqrestore(&qp_info->send_queue.lock, flags);
1951 }
1952 
1953 static void mad_error_handler(struct ib_mad_port_private *port_priv,
1954 			      struct ib_wc *wc)
1955 {
1956 	struct ib_mad_list_head *mad_list;
1957 	struct ib_mad_qp_info *qp_info;
1958 	struct ib_mad_send_wr_private *mad_send_wr;
1959 	int ret;
1960 
1961 	/* Determine if failure was a send or receive */
1962 	mad_list = (struct ib_mad_list_head *)(unsigned long)wc->wr_id;
1963 	qp_info = mad_list->mad_queue->qp_info;
1964 	if (mad_list->mad_queue == &qp_info->recv_queue)
1965 		/*
1966 		 * Receive errors indicate that the QP has entered the error
1967 		 * state - error handling/shutdown code will cleanup
1968 		 */
1969 		return;
1970 
1971 	/*
1972 	 * Send errors will transition the QP to SQE - move
1973 	 * QP to RTS and repost flushed work requests
1974 	 */
1975 	mad_send_wr = container_of(mad_list, struct ib_mad_send_wr_private,
1976 				   mad_list);
1977 	if (wc->status == IB_WC_WR_FLUSH_ERR) {
1978 		if (mad_send_wr->retry) {
1979 			/* Repost send */
1980 			struct ib_send_wr *bad_send_wr;
1981 
1982 			mad_send_wr->retry = 0;
1983 			ret = ib_post_send(qp_info->qp, &mad_send_wr->send_wr,
1984 					&bad_send_wr);
1985 			if (ret)
1986 				ib_mad_send_done_handler(port_priv, wc);
1987 		} else
1988 			ib_mad_send_done_handler(port_priv, wc);
1989 	} else {
1990 		struct ib_qp_attr *attr;
1991 
1992 		/* Transition QP to RTS and fail offending send */
1993 		attr = kmalloc(sizeof *attr, GFP_KERNEL);
1994 		if (attr) {
1995 			attr->qp_state = IB_QPS_RTS;
1996 			attr->cur_qp_state = IB_QPS_SQE;
1997 			ret = ib_modify_qp(qp_info->qp, attr,
1998 					   IB_QP_STATE | IB_QP_CUR_STATE);
1999 			kfree(attr);
2000 			if (ret)
2001 				printk(KERN_ERR PFX "mad_error_handler - "
2002 				       "ib_modify_qp to RTS : %d\n", ret);
2003 			else
2004 				mark_sends_for_retry(qp_info);
2005 		}
2006 		ib_mad_send_done_handler(port_priv, wc);
2007 	}
2008 }
2009 
2010 /*
2011  * IB MAD completion callback
2012  */
2013 static void ib_mad_completion_handler(void *data)
2014 {
2015 	struct ib_mad_port_private *port_priv;
2016 	struct ib_wc wc;
2017 
2018 	port_priv = (struct ib_mad_port_private *)data;
2019 	ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2020 
2021 	while (ib_poll_cq(port_priv->cq, 1, &wc) == 1) {
2022 		if (wc.status == IB_WC_SUCCESS) {
2023 			switch (wc.opcode) {
2024 			case IB_WC_SEND:
2025 				ib_mad_send_done_handler(port_priv, &wc);
2026 				break;
2027 			case IB_WC_RECV:
2028 				ib_mad_recv_done_handler(port_priv, &wc);
2029 				break;
2030 			default:
2031 				BUG_ON(1);
2032 				break;
2033 			}
2034 		} else
2035 			mad_error_handler(port_priv, &wc);
2036 	}
2037 }
2038 
2039 static void cancel_mads(struct ib_mad_agent_private *mad_agent_priv)
2040 {
2041 	unsigned long flags;
2042 	struct ib_mad_send_wr_private *mad_send_wr, *temp_mad_send_wr;
2043 	struct ib_mad_send_wc mad_send_wc;
2044 	struct list_head cancel_list;
2045 
2046 	INIT_LIST_HEAD(&cancel_list);
2047 
2048 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2049 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2050 				 &mad_agent_priv->send_list, agent_list) {
2051 		if (mad_send_wr->status == IB_WC_SUCCESS) {
2052  			mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2053 			mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2054 		}
2055 	}
2056 
2057 	/* Empty wait list to prevent receives from finding a request */
2058 	list_splice_init(&mad_agent_priv->wait_list, &cancel_list);
2059 	/* Empty local completion list as well */
2060 	list_splice_init(&mad_agent_priv->local_list, &cancel_list);
2061 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2062 
2063 	/* Report all cancelled requests */
2064 	mad_send_wc.status = IB_WC_WR_FLUSH_ERR;
2065 	mad_send_wc.vendor_err = 0;
2066 
2067 	list_for_each_entry_safe(mad_send_wr, temp_mad_send_wr,
2068 				 &cancel_list, agent_list) {
2069 		mad_send_wc.wr_id = mad_send_wr->wr_id;
2070 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2071 						   &mad_send_wc);
2072 
2073 		list_del(&mad_send_wr->agent_list);
2074 		kfree(mad_send_wr);
2075 		atomic_dec(&mad_agent_priv->refcount);
2076 	}
2077 }
2078 
2079 static struct ib_mad_send_wr_private*
2080 find_send_by_wr_id(struct ib_mad_agent_private *mad_agent_priv, u64 wr_id)
2081 {
2082 	struct ib_mad_send_wr_private *mad_send_wr;
2083 
2084 	list_for_each_entry(mad_send_wr, &mad_agent_priv->wait_list,
2085 			    agent_list) {
2086 		if (mad_send_wr->wr_id == wr_id)
2087 			return mad_send_wr;
2088 	}
2089 
2090 	list_for_each_entry(mad_send_wr, &mad_agent_priv->send_list,
2091 			    agent_list) {
2092 		if (is_data_mad(mad_agent_priv,
2093 				mad_send_wr->send_wr.wr.ud.mad_hdr) &&
2094 		    mad_send_wr->wr_id == wr_id)
2095 			return mad_send_wr;
2096 	}
2097 	return NULL;
2098 }
2099 
2100 int ib_modify_mad(struct ib_mad_agent *mad_agent, u64 wr_id, u32 timeout_ms)
2101 {
2102 	struct ib_mad_agent_private *mad_agent_priv;
2103 	struct ib_mad_send_wr_private *mad_send_wr;
2104 	unsigned long flags;
2105 	int active;
2106 
2107 	mad_agent_priv = container_of(mad_agent, struct ib_mad_agent_private,
2108 				      agent);
2109 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2110 	mad_send_wr = find_send_by_wr_id(mad_agent_priv, wr_id);
2111 	if (!mad_send_wr || mad_send_wr->status != IB_WC_SUCCESS) {
2112 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2113 		return -EINVAL;
2114 	}
2115 
2116 	active = (!mad_send_wr->timeout || mad_send_wr->refcount > 1);
2117 	if (!timeout_ms) {
2118 		mad_send_wr->status = IB_WC_WR_FLUSH_ERR;
2119 		mad_send_wr->refcount -= (mad_send_wr->timeout > 0);
2120 	}
2121 
2122 	mad_send_wr->send_wr.wr.ud.timeout_ms = timeout_ms;
2123 	if (active)
2124 		mad_send_wr->timeout = msecs_to_jiffies(timeout_ms);
2125 	else
2126 		ib_reset_mad_timeout(mad_send_wr, timeout_ms);
2127 
2128 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2129 	return 0;
2130 }
2131 EXPORT_SYMBOL(ib_modify_mad);
2132 
2133 void ib_cancel_mad(struct ib_mad_agent *mad_agent, u64 wr_id)
2134 {
2135 	ib_modify_mad(mad_agent, wr_id, 0);
2136 }
2137 EXPORT_SYMBOL(ib_cancel_mad);
2138 
2139 static void local_completions(void *data)
2140 {
2141 	struct ib_mad_agent_private *mad_agent_priv;
2142 	struct ib_mad_local_private *local;
2143 	struct ib_mad_agent_private *recv_mad_agent;
2144 	unsigned long flags;
2145 	int recv = 0;
2146 	struct ib_wc wc;
2147 	struct ib_mad_send_wc mad_send_wc;
2148 
2149 	mad_agent_priv = (struct ib_mad_agent_private *)data;
2150 
2151 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2152 	while (!list_empty(&mad_agent_priv->local_list)) {
2153 		local = list_entry(mad_agent_priv->local_list.next,
2154 				   struct ib_mad_local_private,
2155 				   completion_list);
2156 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2157 		if (local->mad_priv) {
2158 			recv_mad_agent = local->recv_mad_agent;
2159 			if (!recv_mad_agent) {
2160 				printk(KERN_ERR PFX "No receive MAD agent for local completion\n");
2161 				goto local_send_completion;
2162 			}
2163 
2164 			recv = 1;
2165 			/*
2166 			 * Defined behavior is to complete response
2167 			 * before request
2168 			 */
2169 			build_smp_wc(local->wr_id,
2170 				     be16_to_cpu(IB_LID_PERMISSIVE),
2171 				     0 /* pkey index */,
2172 				     recv_mad_agent->agent.port_num, &wc);
2173 
2174 			local->mad_priv->header.recv_wc.wc = &wc;
2175 			local->mad_priv->header.recv_wc.mad_len =
2176 						sizeof(struct ib_mad);
2177 			INIT_LIST_HEAD(&local->mad_priv->header.recv_wc.rmpp_list);
2178 			list_add(&local->mad_priv->header.recv_wc.recv_buf.list,
2179 				 &local->mad_priv->header.recv_wc.rmpp_list);
2180 			local->mad_priv->header.recv_wc.recv_buf.grh = NULL;
2181 			local->mad_priv->header.recv_wc.recv_buf.mad =
2182 						&local->mad_priv->mad.mad;
2183 			if (atomic_read(&recv_mad_agent->qp_info->snoop_count))
2184 				snoop_recv(recv_mad_agent->qp_info,
2185 					  &local->mad_priv->header.recv_wc,
2186 					   IB_MAD_SNOOP_RECVS);
2187 			recv_mad_agent->agent.recv_handler(
2188 						&recv_mad_agent->agent,
2189 						&local->mad_priv->header.recv_wc);
2190 			spin_lock_irqsave(&recv_mad_agent->lock, flags);
2191 			atomic_dec(&recv_mad_agent->refcount);
2192 			spin_unlock_irqrestore(&recv_mad_agent->lock, flags);
2193 		}
2194 
2195 local_send_completion:
2196 		/* Complete send */
2197 		mad_send_wc.status = IB_WC_SUCCESS;
2198 		mad_send_wc.vendor_err = 0;
2199 		mad_send_wc.wr_id = local->wr_id;
2200 		if (atomic_read(&mad_agent_priv->qp_info->snoop_count))
2201 			snoop_send(mad_agent_priv->qp_info, &local->send_wr,
2202 				  &mad_send_wc,
2203 				   IB_MAD_SNOOP_SEND_COMPLETIONS);
2204 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2205 						   &mad_send_wc);
2206 
2207 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2208 		list_del(&local->completion_list);
2209 		atomic_dec(&mad_agent_priv->refcount);
2210 		if (!recv)
2211 			kmem_cache_free(ib_mad_cache, local->mad_priv);
2212 		kfree(local);
2213 	}
2214 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2215 }
2216 
2217 static int retry_send(struct ib_mad_send_wr_private *mad_send_wr)
2218 {
2219 	int ret;
2220 
2221 	if (!mad_send_wr->retries--)
2222 		return -ETIMEDOUT;
2223 
2224 	mad_send_wr->timeout = msecs_to_jiffies(mad_send_wr->send_wr.
2225 						wr.ud.timeout_ms);
2226 
2227 	if (mad_send_wr->mad_agent_priv->agent.rmpp_version) {
2228 		ret = ib_retry_rmpp(mad_send_wr);
2229 		switch (ret) {
2230 		case IB_RMPP_RESULT_UNHANDLED:
2231 			ret = ib_send_mad(mad_send_wr);
2232 			break;
2233 		case IB_RMPP_RESULT_CONSUMED:
2234 			ret = 0;
2235 			break;
2236 		default:
2237 			ret = -ECOMM;
2238 			break;
2239 		}
2240 	} else
2241 		ret = ib_send_mad(mad_send_wr);
2242 
2243 	if (!ret) {
2244 		mad_send_wr->refcount++;
2245 		list_add_tail(&mad_send_wr->agent_list,
2246 			      &mad_send_wr->mad_agent_priv->send_list);
2247 	}
2248 	return ret;
2249 }
2250 
2251 static void timeout_sends(void *data)
2252 {
2253 	struct ib_mad_agent_private *mad_agent_priv;
2254 	struct ib_mad_send_wr_private *mad_send_wr;
2255 	struct ib_mad_send_wc mad_send_wc;
2256 	unsigned long flags, delay;
2257 
2258 	mad_agent_priv = (struct ib_mad_agent_private *)data;
2259 	mad_send_wc.vendor_err = 0;
2260 
2261 	spin_lock_irqsave(&mad_agent_priv->lock, flags);
2262 	while (!list_empty(&mad_agent_priv->wait_list)) {
2263 		mad_send_wr = list_entry(mad_agent_priv->wait_list.next,
2264 					 struct ib_mad_send_wr_private,
2265 					 agent_list);
2266 
2267 		if (time_after(mad_send_wr->timeout, jiffies)) {
2268 			delay = mad_send_wr->timeout - jiffies;
2269 			if ((long)delay <= 0)
2270 				delay = 1;
2271 			queue_delayed_work(mad_agent_priv->qp_info->
2272 					   port_priv->wq,
2273 					   &mad_agent_priv->timed_work, delay);
2274 			break;
2275 		}
2276 
2277 		list_del(&mad_send_wr->agent_list);
2278 		if (mad_send_wr->status == IB_WC_SUCCESS &&
2279 		    !retry_send(mad_send_wr))
2280 			continue;
2281 
2282 		spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2283 
2284 		if (mad_send_wr->status == IB_WC_SUCCESS)
2285 			mad_send_wc.status = IB_WC_RESP_TIMEOUT_ERR;
2286 		else
2287 			mad_send_wc.status = mad_send_wr->status;
2288 		mad_send_wc.wr_id = mad_send_wr->wr_id;
2289 		mad_agent_priv->agent.send_handler(&mad_agent_priv->agent,
2290 						   &mad_send_wc);
2291 
2292 		kfree(mad_send_wr);
2293 		atomic_dec(&mad_agent_priv->refcount);
2294 		spin_lock_irqsave(&mad_agent_priv->lock, flags);
2295 	}
2296 	spin_unlock_irqrestore(&mad_agent_priv->lock, flags);
2297 }
2298 
2299 static void ib_mad_thread_completion_handler(struct ib_cq *cq, void *arg)
2300 {
2301 	struct ib_mad_port_private *port_priv = cq->cq_context;
2302 
2303 	queue_work(port_priv->wq, &port_priv->work);
2304 }
2305 
2306 /*
2307  * Allocate receive MADs and post receive WRs for them
2308  */
2309 static int ib_mad_post_receive_mads(struct ib_mad_qp_info *qp_info,
2310 				    struct ib_mad_private *mad)
2311 {
2312 	unsigned long flags;
2313 	int post, ret;
2314 	struct ib_mad_private *mad_priv;
2315 	struct ib_sge sg_list;
2316 	struct ib_recv_wr recv_wr, *bad_recv_wr;
2317 	struct ib_mad_queue *recv_queue = &qp_info->recv_queue;
2318 
2319 	/* Initialize common scatter list fields */
2320 	sg_list.length = sizeof *mad_priv - sizeof mad_priv->header;
2321 	sg_list.lkey = (*qp_info->port_priv->mr).lkey;
2322 
2323 	/* Initialize common receive WR fields */
2324 	recv_wr.next = NULL;
2325 	recv_wr.sg_list = &sg_list;
2326 	recv_wr.num_sge = 1;
2327 
2328 	do {
2329 		/* Allocate and map receive buffer */
2330 		if (mad) {
2331 			mad_priv = mad;
2332 			mad = NULL;
2333 		} else {
2334 			mad_priv = kmem_cache_alloc(ib_mad_cache, GFP_KERNEL);
2335 			if (!mad_priv) {
2336 				printk(KERN_ERR PFX "No memory for receive buffer\n");
2337 				ret = -ENOMEM;
2338 				break;
2339 			}
2340 		}
2341 		sg_list.addr = dma_map_single(qp_info->port_priv->
2342 						device->dma_device,
2343 					&mad_priv->grh,
2344 					sizeof *mad_priv -
2345 						sizeof mad_priv->header,
2346 					DMA_FROM_DEVICE);
2347 		pci_unmap_addr_set(&mad_priv->header, mapping, sg_list.addr);
2348 		recv_wr.wr_id = (unsigned long)&mad_priv->header.mad_list;
2349 		mad_priv->header.mad_list.mad_queue = recv_queue;
2350 
2351 		/* Post receive WR */
2352 		spin_lock_irqsave(&recv_queue->lock, flags);
2353 		post = (++recv_queue->count < recv_queue->max_active);
2354 		list_add_tail(&mad_priv->header.mad_list.list, &recv_queue->list);
2355 		spin_unlock_irqrestore(&recv_queue->lock, flags);
2356 		ret = ib_post_recv(qp_info->qp, &recv_wr, &bad_recv_wr);
2357 		if (ret) {
2358 			spin_lock_irqsave(&recv_queue->lock, flags);
2359 			list_del(&mad_priv->header.mad_list.list);
2360 			recv_queue->count--;
2361 			spin_unlock_irqrestore(&recv_queue->lock, flags);
2362 			dma_unmap_single(qp_info->port_priv->device->dma_device,
2363 					 pci_unmap_addr(&mad_priv->header,
2364 							mapping),
2365 					 sizeof *mad_priv -
2366 					   sizeof mad_priv->header,
2367 					 DMA_FROM_DEVICE);
2368 			kmem_cache_free(ib_mad_cache, mad_priv);
2369 			printk(KERN_ERR PFX "ib_post_recv failed: %d\n", ret);
2370 			break;
2371 		}
2372 	} while (post);
2373 
2374 	return ret;
2375 }
2376 
2377 /*
2378  * Return all the posted receive MADs
2379  */
2380 static void cleanup_recv_queue(struct ib_mad_qp_info *qp_info)
2381 {
2382 	struct ib_mad_private_header *mad_priv_hdr;
2383 	struct ib_mad_private *recv;
2384 	struct ib_mad_list_head *mad_list;
2385 
2386 	while (!list_empty(&qp_info->recv_queue.list)) {
2387 
2388 		mad_list = list_entry(qp_info->recv_queue.list.next,
2389 				      struct ib_mad_list_head, list);
2390 		mad_priv_hdr = container_of(mad_list,
2391 					    struct ib_mad_private_header,
2392 					    mad_list);
2393 		recv = container_of(mad_priv_hdr, struct ib_mad_private,
2394 				    header);
2395 
2396 		/* Remove from posted receive MAD list */
2397 		list_del(&mad_list->list);
2398 
2399 		dma_unmap_single(qp_info->port_priv->device->dma_device,
2400 				 pci_unmap_addr(&recv->header, mapping),
2401 				 sizeof(struct ib_mad_private) -
2402 				 sizeof(struct ib_mad_private_header),
2403 				 DMA_FROM_DEVICE);
2404 		kmem_cache_free(ib_mad_cache, recv);
2405 	}
2406 
2407 	qp_info->recv_queue.count = 0;
2408 }
2409 
2410 /*
2411  * Start the port
2412  */
2413 static int ib_mad_port_start(struct ib_mad_port_private *port_priv)
2414 {
2415 	int ret, i;
2416 	struct ib_qp_attr *attr;
2417 	struct ib_qp *qp;
2418 
2419 	attr = kmalloc(sizeof *attr, GFP_KERNEL);
2420  	if (!attr) {
2421 		printk(KERN_ERR PFX "Couldn't kmalloc ib_qp_attr\n");
2422 		return -ENOMEM;
2423 	}
2424 
2425 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2426 		qp = port_priv->qp_info[i].qp;
2427 		/*
2428 		 * PKey index for QP1 is irrelevant but
2429 		 * one is needed for the Reset to Init transition
2430 		 */
2431 		attr->qp_state = IB_QPS_INIT;
2432 		attr->pkey_index = 0;
2433 		attr->qkey = (qp->qp_num == 0) ? 0 : IB_QP1_QKEY;
2434 		ret = ib_modify_qp(qp, attr, IB_QP_STATE |
2435 					     IB_QP_PKEY_INDEX | IB_QP_QKEY);
2436 		if (ret) {
2437 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2438 			       "INIT: %d\n", i, ret);
2439 			goto out;
2440 		}
2441 
2442 		attr->qp_state = IB_QPS_RTR;
2443 		ret = ib_modify_qp(qp, attr, IB_QP_STATE);
2444 		if (ret) {
2445 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2446 			       "RTR: %d\n", i, ret);
2447 			goto out;
2448 		}
2449 
2450 		attr->qp_state = IB_QPS_RTS;
2451 		attr->sq_psn = IB_MAD_SEND_Q_PSN;
2452 		ret = ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_SQ_PSN);
2453 		if (ret) {
2454 			printk(KERN_ERR PFX "Couldn't change QP%d state to "
2455 			       "RTS: %d\n", i, ret);
2456 			goto out;
2457 		}
2458 	}
2459 
2460 	ret = ib_req_notify_cq(port_priv->cq, IB_CQ_NEXT_COMP);
2461 	if (ret) {
2462 		printk(KERN_ERR PFX "Failed to request completion "
2463 		       "notification: %d\n", ret);
2464 		goto out;
2465 	}
2466 
2467 	for (i = 0; i < IB_MAD_QPS_CORE; i++) {
2468 		ret = ib_mad_post_receive_mads(&port_priv->qp_info[i], NULL);
2469 		if (ret) {
2470 			printk(KERN_ERR PFX "Couldn't post receive WRs\n");
2471 			goto out;
2472 		}
2473 	}
2474 out:
2475 	kfree(attr);
2476 	return ret;
2477 }
2478 
2479 static void qp_event_handler(struct ib_event *event, void *qp_context)
2480 {
2481 	struct ib_mad_qp_info	*qp_info = qp_context;
2482 
2483 	/* It's worse than that! He's dead, Jim! */
2484 	printk(KERN_ERR PFX "Fatal error (%d) on MAD QP (%d)\n",
2485 		event->event, qp_info->qp->qp_num);
2486 }
2487 
2488 static void init_mad_queue(struct ib_mad_qp_info *qp_info,
2489 			   struct ib_mad_queue *mad_queue)
2490 {
2491 	mad_queue->qp_info = qp_info;
2492 	mad_queue->count = 0;
2493 	spin_lock_init(&mad_queue->lock);
2494 	INIT_LIST_HEAD(&mad_queue->list);
2495 }
2496 
2497 static void init_mad_qp(struct ib_mad_port_private *port_priv,
2498 			struct ib_mad_qp_info *qp_info)
2499 {
2500 	qp_info->port_priv = port_priv;
2501 	init_mad_queue(qp_info, &qp_info->send_queue);
2502 	init_mad_queue(qp_info, &qp_info->recv_queue);
2503 	INIT_LIST_HEAD(&qp_info->overflow_list);
2504 	spin_lock_init(&qp_info->snoop_lock);
2505 	qp_info->snoop_table = NULL;
2506 	qp_info->snoop_table_size = 0;
2507 	atomic_set(&qp_info->snoop_count, 0);
2508 }
2509 
2510 static int create_mad_qp(struct ib_mad_qp_info *qp_info,
2511 			 enum ib_qp_type qp_type)
2512 {
2513 	struct ib_qp_init_attr	qp_init_attr;
2514 	int ret;
2515 
2516 	memset(&qp_init_attr, 0, sizeof qp_init_attr);
2517 	qp_init_attr.send_cq = qp_info->port_priv->cq;
2518 	qp_init_attr.recv_cq = qp_info->port_priv->cq;
2519 	qp_init_attr.sq_sig_type = IB_SIGNAL_ALL_WR;
2520 	qp_init_attr.cap.max_send_wr = IB_MAD_QP_SEND_SIZE;
2521 	qp_init_attr.cap.max_recv_wr = IB_MAD_QP_RECV_SIZE;
2522 	qp_init_attr.cap.max_send_sge = IB_MAD_SEND_REQ_MAX_SG;
2523 	qp_init_attr.cap.max_recv_sge = IB_MAD_RECV_REQ_MAX_SG;
2524 	qp_init_attr.qp_type = qp_type;
2525 	qp_init_attr.port_num = qp_info->port_priv->port_num;
2526 	qp_init_attr.qp_context = qp_info;
2527 	qp_init_attr.event_handler = qp_event_handler;
2528 	qp_info->qp = ib_create_qp(qp_info->port_priv->pd, &qp_init_attr);
2529 	if (IS_ERR(qp_info->qp)) {
2530 		printk(KERN_ERR PFX "Couldn't create ib_mad QP%d\n",
2531 		       get_spl_qp_index(qp_type));
2532 		ret = PTR_ERR(qp_info->qp);
2533 		goto error;
2534 	}
2535 	/* Use minimum queue sizes unless the CQ is resized */
2536 	qp_info->send_queue.max_active = IB_MAD_QP_SEND_SIZE;
2537 	qp_info->recv_queue.max_active = IB_MAD_QP_RECV_SIZE;
2538 	return 0;
2539 
2540 error:
2541 	return ret;
2542 }
2543 
2544 static void destroy_mad_qp(struct ib_mad_qp_info *qp_info)
2545 {
2546 	ib_destroy_qp(qp_info->qp);
2547 	if (qp_info->snoop_table)
2548 		kfree(qp_info->snoop_table);
2549 }
2550 
2551 /*
2552  * Open the port
2553  * Create the QP, PD, MR, and CQ if needed
2554  */
2555 static int ib_mad_port_open(struct ib_device *device,
2556 			    int port_num)
2557 {
2558 	int ret, cq_size;
2559 	struct ib_mad_port_private *port_priv;
2560 	unsigned long flags;
2561 	char name[sizeof "ib_mad123"];
2562 
2563 	/* Create new device info */
2564 	port_priv = kmalloc(sizeof *port_priv, GFP_KERNEL);
2565 	if (!port_priv) {
2566 		printk(KERN_ERR PFX "No memory for ib_mad_port_private\n");
2567 		return -ENOMEM;
2568 	}
2569 	memset(port_priv, 0, sizeof *port_priv);
2570 	port_priv->device = device;
2571 	port_priv->port_num = port_num;
2572 	spin_lock_init(&port_priv->reg_lock);
2573 	INIT_LIST_HEAD(&port_priv->agent_list);
2574 	init_mad_qp(port_priv, &port_priv->qp_info[0]);
2575 	init_mad_qp(port_priv, &port_priv->qp_info[1]);
2576 
2577 	cq_size = (IB_MAD_QP_SEND_SIZE + IB_MAD_QP_RECV_SIZE) * 2;
2578 	port_priv->cq = ib_create_cq(port_priv->device,
2579 				     ib_mad_thread_completion_handler,
2580 				     NULL, port_priv, cq_size);
2581 	if (IS_ERR(port_priv->cq)) {
2582 		printk(KERN_ERR PFX "Couldn't create ib_mad CQ\n");
2583 		ret = PTR_ERR(port_priv->cq);
2584 		goto error3;
2585 	}
2586 
2587 	port_priv->pd = ib_alloc_pd(device);
2588 	if (IS_ERR(port_priv->pd)) {
2589 		printk(KERN_ERR PFX "Couldn't create ib_mad PD\n");
2590 		ret = PTR_ERR(port_priv->pd);
2591 		goto error4;
2592 	}
2593 
2594 	port_priv->mr = ib_get_dma_mr(port_priv->pd, IB_ACCESS_LOCAL_WRITE);
2595 	if (IS_ERR(port_priv->mr)) {
2596 		printk(KERN_ERR PFX "Couldn't get ib_mad DMA MR\n");
2597 		ret = PTR_ERR(port_priv->mr);
2598 		goto error5;
2599 	}
2600 
2601 	ret = create_mad_qp(&port_priv->qp_info[0], IB_QPT_SMI);
2602 	if (ret)
2603 		goto error6;
2604 	ret = create_mad_qp(&port_priv->qp_info[1], IB_QPT_GSI);
2605 	if (ret)
2606 		goto error7;
2607 
2608 	snprintf(name, sizeof name, "ib_mad%d", port_num);
2609 	port_priv->wq = create_singlethread_workqueue(name);
2610 	if (!port_priv->wq) {
2611 		ret = -ENOMEM;
2612 		goto error8;
2613 	}
2614 	INIT_WORK(&port_priv->work, ib_mad_completion_handler, port_priv);
2615 
2616 	ret = ib_mad_port_start(port_priv);
2617 	if (ret) {
2618 		printk(KERN_ERR PFX "Couldn't start port\n");
2619 		goto error9;
2620 	}
2621 
2622 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2623 	list_add_tail(&port_priv->port_list, &ib_mad_port_list);
2624 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2625 	return 0;
2626 
2627 error9:
2628 	destroy_workqueue(port_priv->wq);
2629 error8:
2630 	destroy_mad_qp(&port_priv->qp_info[1]);
2631 error7:
2632 	destroy_mad_qp(&port_priv->qp_info[0]);
2633 error6:
2634 	ib_dereg_mr(port_priv->mr);
2635 error5:
2636 	ib_dealloc_pd(port_priv->pd);
2637 error4:
2638 	ib_destroy_cq(port_priv->cq);
2639 	cleanup_recv_queue(&port_priv->qp_info[1]);
2640 	cleanup_recv_queue(&port_priv->qp_info[0]);
2641 error3:
2642 	kfree(port_priv);
2643 
2644 	return ret;
2645 }
2646 
2647 /*
2648  * Close the port
2649  * If there are no classes using the port, free the port
2650  * resources (CQ, MR, PD, QP) and remove the port's info structure
2651  */
2652 static int ib_mad_port_close(struct ib_device *device, int port_num)
2653 {
2654 	struct ib_mad_port_private *port_priv;
2655 	unsigned long flags;
2656 
2657 	spin_lock_irqsave(&ib_mad_port_list_lock, flags);
2658 	port_priv = __ib_get_mad_port(device, port_num);
2659 	if (port_priv == NULL) {
2660 		spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2661 		printk(KERN_ERR PFX "Port %d not found\n", port_num);
2662 		return -ENODEV;
2663 	}
2664 	list_del(&port_priv->port_list);
2665 	spin_unlock_irqrestore(&ib_mad_port_list_lock, flags);
2666 
2667 	/* Stop processing completions. */
2668 	flush_workqueue(port_priv->wq);
2669 	destroy_workqueue(port_priv->wq);
2670 	destroy_mad_qp(&port_priv->qp_info[1]);
2671 	destroy_mad_qp(&port_priv->qp_info[0]);
2672 	ib_dereg_mr(port_priv->mr);
2673 	ib_dealloc_pd(port_priv->pd);
2674 	ib_destroy_cq(port_priv->cq);
2675 	cleanup_recv_queue(&port_priv->qp_info[1]);
2676 	cleanup_recv_queue(&port_priv->qp_info[0]);
2677 	/* XXX: Handle deallocation of MAD registration tables */
2678 
2679 	kfree(port_priv);
2680 
2681 	return 0;
2682 }
2683 
2684 static void ib_mad_init_device(struct ib_device *device)
2685 {
2686 	int num_ports, cur_port, i;
2687 
2688 	if (device->node_type == IB_NODE_SWITCH) {
2689 		num_ports = 1;
2690 		cur_port = 0;
2691 	} else {
2692 		num_ports = device->phys_port_cnt;
2693 		cur_port = 1;
2694 	}
2695 	for (i = 0; i < num_ports; i++, cur_port++) {
2696 		if (ib_mad_port_open(device, cur_port)) {
2697 			printk(KERN_ERR PFX "Couldn't open %s port %d\n",
2698 			       device->name, cur_port);
2699 			goto error_device_open;
2700 		}
2701 		if (ib_agent_port_open(device, cur_port)) {
2702 			printk(KERN_ERR PFX "Couldn't open %s port %d "
2703 			       "for agents\n",
2704 			       device->name, cur_port);
2705 			goto error_device_open;
2706 		}
2707 	}
2708 	return;
2709 
2710 error_device_open:
2711 	while (i > 0) {
2712 		cur_port--;
2713 		if (ib_agent_port_close(device, cur_port))
2714 			printk(KERN_ERR PFX "Couldn't close %s port %d "
2715 			       "for agents\n",
2716 			       device->name, cur_port);
2717 		if (ib_mad_port_close(device, cur_port))
2718 			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2719 			       device->name, cur_port);
2720 		i--;
2721 	}
2722 }
2723 
2724 static void ib_mad_remove_device(struct ib_device *device)
2725 {
2726 	int i, num_ports, cur_port;
2727 
2728 	if (device->node_type == IB_NODE_SWITCH) {
2729 		num_ports = 1;
2730 		cur_port = 0;
2731 	} else {
2732 		num_ports = device->phys_port_cnt;
2733 		cur_port = 1;
2734 	}
2735 	for (i = 0; i < num_ports; i++, cur_port++) {
2736 		if (ib_agent_port_close(device, cur_port))
2737 			printk(KERN_ERR PFX "Couldn't close %s port %d "
2738 			       "for agents\n",
2739 			       device->name, cur_port);
2740 		if (ib_mad_port_close(device, cur_port))
2741 			printk(KERN_ERR PFX "Couldn't close %s port %d\n",
2742 			       device->name, cur_port);
2743 	}
2744 }
2745 
2746 static struct ib_client mad_client = {
2747 	.name   = "mad",
2748 	.add = ib_mad_init_device,
2749 	.remove = ib_mad_remove_device
2750 };
2751 
2752 static int __init ib_mad_init_module(void)
2753 {
2754 	int ret;
2755 
2756 	spin_lock_init(&ib_mad_port_list_lock);
2757 	spin_lock_init(&ib_agent_port_list_lock);
2758 
2759 	ib_mad_cache = kmem_cache_create("ib_mad",
2760 					 sizeof(struct ib_mad_private),
2761 					 0,
2762 					 SLAB_HWCACHE_ALIGN,
2763 					 NULL,
2764 					 NULL);
2765 	if (!ib_mad_cache) {
2766 		printk(KERN_ERR PFX "Couldn't create ib_mad cache\n");
2767 		ret = -ENOMEM;
2768 		goto error1;
2769 	}
2770 
2771 	INIT_LIST_HEAD(&ib_mad_port_list);
2772 
2773 	if (ib_register_client(&mad_client)) {
2774 		printk(KERN_ERR PFX "Couldn't register ib_mad client\n");
2775 		ret = -EINVAL;
2776 		goto error2;
2777 	}
2778 
2779 	return 0;
2780 
2781 error2:
2782 	kmem_cache_destroy(ib_mad_cache);
2783 error1:
2784 	return ret;
2785 }
2786 
2787 static void __exit ib_mad_cleanup_module(void)
2788 {
2789 	ib_unregister_client(&mad_client);
2790 
2791 	if (kmem_cache_destroy(ib_mad_cache)) {
2792 		printk(KERN_DEBUG PFX "Failed to destroy ib_mad cache\n");
2793 	}
2794 }
2795 
2796 module_init(ib_mad_init_module);
2797 module_exit(ib_mad_cleanup_module);
2798 
2799