xref: /linux/drivers/infiniband/core/sa_query.c (revision a7edd0e676d51145ae634a2acf7a447e319200fa)
1 /*
2  * Copyright (c) 2004 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Voltaire, Inc.  All rights reserved.
4  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  *
34  * $Id: sa_query.c 2811 2005-07-06 18:11:43Z halr $
35  */
36 
37 #include <linux/module.h>
38 #include <linux/init.h>
39 #include <linux/err.h>
40 #include <linux/random.h>
41 #include <linux/spinlock.h>
42 #include <linux/slab.h>
43 #include <linux/pci.h>
44 #include <linux/dma-mapping.h>
45 #include <linux/kref.h>
46 #include <linux/idr.h>
47 #include <linux/workqueue.h>
48 
49 #include <rdma/ib_pack.h>
50 #include <rdma/ib_cache.h>
51 #include "sa.h"
52 
53 MODULE_AUTHOR("Roland Dreier");
54 MODULE_DESCRIPTION("InfiniBand subnet administration query support");
55 MODULE_LICENSE("Dual BSD/GPL");
56 
57 struct ib_sa_sm_ah {
58 	struct ib_ah        *ah;
59 	struct kref          ref;
60 };
61 
62 struct ib_sa_port {
63 	struct ib_mad_agent *agent;
64 	struct ib_sa_sm_ah  *sm_ah;
65 	struct work_struct   update_task;
66 	spinlock_t           ah_lock;
67 	u8                   port_num;
68 };
69 
70 struct ib_sa_device {
71 	int                     start_port, end_port;
72 	struct ib_event_handler event_handler;
73 	struct ib_sa_port port[0];
74 };
75 
76 struct ib_sa_query {
77 	void (*callback)(struct ib_sa_query *, int, struct ib_sa_mad *);
78 	void (*release)(struct ib_sa_query *);
79 	struct ib_sa_client    *client;
80 	struct ib_sa_port      *port;
81 	struct ib_mad_send_buf *mad_buf;
82 	struct ib_sa_sm_ah     *sm_ah;
83 	int			id;
84 };
85 
86 struct ib_sa_service_query {
87 	void (*callback)(int, struct ib_sa_service_rec *, void *);
88 	void *context;
89 	struct ib_sa_query sa_query;
90 };
91 
92 struct ib_sa_path_query {
93 	void (*callback)(int, struct ib_sa_path_rec *, void *);
94 	void *context;
95 	struct ib_sa_query sa_query;
96 };
97 
98 struct ib_sa_mcmember_query {
99 	void (*callback)(int, struct ib_sa_mcmember_rec *, void *);
100 	void *context;
101 	struct ib_sa_query sa_query;
102 };
103 
104 static void ib_sa_add_one(struct ib_device *device);
105 static void ib_sa_remove_one(struct ib_device *device);
106 
107 static struct ib_client sa_client = {
108 	.name   = "sa",
109 	.add    = ib_sa_add_one,
110 	.remove = ib_sa_remove_one
111 };
112 
113 static spinlock_t idr_lock;
114 static DEFINE_IDR(query_idr);
115 
116 static spinlock_t tid_lock;
117 static u32 tid;
118 
119 #define PATH_REC_FIELD(field) \
120 	.struct_offset_bytes = offsetof(struct ib_sa_path_rec, field),		\
121 	.struct_size_bytes   = sizeof ((struct ib_sa_path_rec *) 0)->field,	\
122 	.field_name          = "sa_path_rec:" #field
123 
124 static const struct ib_field path_rec_table[] = {
125 	{ RESERVED,
126 	  .offset_words = 0,
127 	  .offset_bits  = 0,
128 	  .size_bits    = 32 },
129 	{ RESERVED,
130 	  .offset_words = 1,
131 	  .offset_bits  = 0,
132 	  .size_bits    = 32 },
133 	{ PATH_REC_FIELD(dgid),
134 	  .offset_words = 2,
135 	  .offset_bits  = 0,
136 	  .size_bits    = 128 },
137 	{ PATH_REC_FIELD(sgid),
138 	  .offset_words = 6,
139 	  .offset_bits  = 0,
140 	  .size_bits    = 128 },
141 	{ PATH_REC_FIELD(dlid),
142 	  .offset_words = 10,
143 	  .offset_bits  = 0,
144 	  .size_bits    = 16 },
145 	{ PATH_REC_FIELD(slid),
146 	  .offset_words = 10,
147 	  .offset_bits  = 16,
148 	  .size_bits    = 16 },
149 	{ PATH_REC_FIELD(raw_traffic),
150 	  .offset_words = 11,
151 	  .offset_bits  = 0,
152 	  .size_bits    = 1 },
153 	{ RESERVED,
154 	  .offset_words = 11,
155 	  .offset_bits  = 1,
156 	  .size_bits    = 3 },
157 	{ PATH_REC_FIELD(flow_label),
158 	  .offset_words = 11,
159 	  .offset_bits  = 4,
160 	  .size_bits    = 20 },
161 	{ PATH_REC_FIELD(hop_limit),
162 	  .offset_words = 11,
163 	  .offset_bits  = 24,
164 	  .size_bits    = 8 },
165 	{ PATH_REC_FIELD(traffic_class),
166 	  .offset_words = 12,
167 	  .offset_bits  = 0,
168 	  .size_bits    = 8 },
169 	{ PATH_REC_FIELD(reversible),
170 	  .offset_words = 12,
171 	  .offset_bits  = 8,
172 	  .size_bits    = 1 },
173 	{ PATH_REC_FIELD(numb_path),
174 	  .offset_words = 12,
175 	  .offset_bits  = 9,
176 	  .size_bits    = 7 },
177 	{ PATH_REC_FIELD(pkey),
178 	  .offset_words = 12,
179 	  .offset_bits  = 16,
180 	  .size_bits    = 16 },
181 	{ RESERVED,
182 	  .offset_words = 13,
183 	  .offset_bits  = 0,
184 	  .size_bits    = 12 },
185 	{ PATH_REC_FIELD(sl),
186 	  .offset_words = 13,
187 	  .offset_bits  = 12,
188 	  .size_bits    = 4 },
189 	{ PATH_REC_FIELD(mtu_selector),
190 	  .offset_words = 13,
191 	  .offset_bits  = 16,
192 	  .size_bits    = 2 },
193 	{ PATH_REC_FIELD(mtu),
194 	  .offset_words = 13,
195 	  .offset_bits  = 18,
196 	  .size_bits    = 6 },
197 	{ PATH_REC_FIELD(rate_selector),
198 	  .offset_words = 13,
199 	  .offset_bits  = 24,
200 	  .size_bits    = 2 },
201 	{ PATH_REC_FIELD(rate),
202 	  .offset_words = 13,
203 	  .offset_bits  = 26,
204 	  .size_bits    = 6 },
205 	{ PATH_REC_FIELD(packet_life_time_selector),
206 	  .offset_words = 14,
207 	  .offset_bits  = 0,
208 	  .size_bits    = 2 },
209 	{ PATH_REC_FIELD(packet_life_time),
210 	  .offset_words = 14,
211 	  .offset_bits  = 2,
212 	  .size_bits    = 6 },
213 	{ PATH_REC_FIELD(preference),
214 	  .offset_words = 14,
215 	  .offset_bits  = 8,
216 	  .size_bits    = 8 },
217 	{ RESERVED,
218 	  .offset_words = 14,
219 	  .offset_bits  = 16,
220 	  .size_bits    = 48 },
221 };
222 
223 #define MCMEMBER_REC_FIELD(field) \
224 	.struct_offset_bytes = offsetof(struct ib_sa_mcmember_rec, field),	\
225 	.struct_size_bytes   = sizeof ((struct ib_sa_mcmember_rec *) 0)->field,	\
226 	.field_name          = "sa_mcmember_rec:" #field
227 
228 static const struct ib_field mcmember_rec_table[] = {
229 	{ MCMEMBER_REC_FIELD(mgid),
230 	  .offset_words = 0,
231 	  .offset_bits  = 0,
232 	  .size_bits    = 128 },
233 	{ MCMEMBER_REC_FIELD(port_gid),
234 	  .offset_words = 4,
235 	  .offset_bits  = 0,
236 	  .size_bits    = 128 },
237 	{ MCMEMBER_REC_FIELD(qkey),
238 	  .offset_words = 8,
239 	  .offset_bits  = 0,
240 	  .size_bits    = 32 },
241 	{ MCMEMBER_REC_FIELD(mlid),
242 	  .offset_words = 9,
243 	  .offset_bits  = 0,
244 	  .size_bits    = 16 },
245 	{ MCMEMBER_REC_FIELD(mtu_selector),
246 	  .offset_words = 9,
247 	  .offset_bits  = 16,
248 	  .size_bits    = 2 },
249 	{ MCMEMBER_REC_FIELD(mtu),
250 	  .offset_words = 9,
251 	  .offset_bits  = 18,
252 	  .size_bits    = 6 },
253 	{ MCMEMBER_REC_FIELD(traffic_class),
254 	  .offset_words = 9,
255 	  .offset_bits  = 24,
256 	  .size_bits    = 8 },
257 	{ MCMEMBER_REC_FIELD(pkey),
258 	  .offset_words = 10,
259 	  .offset_bits  = 0,
260 	  .size_bits    = 16 },
261 	{ MCMEMBER_REC_FIELD(rate_selector),
262 	  .offset_words = 10,
263 	  .offset_bits  = 16,
264 	  .size_bits    = 2 },
265 	{ MCMEMBER_REC_FIELD(rate),
266 	  .offset_words = 10,
267 	  .offset_bits  = 18,
268 	  .size_bits    = 6 },
269 	{ MCMEMBER_REC_FIELD(packet_life_time_selector),
270 	  .offset_words = 10,
271 	  .offset_bits  = 24,
272 	  .size_bits    = 2 },
273 	{ MCMEMBER_REC_FIELD(packet_life_time),
274 	  .offset_words = 10,
275 	  .offset_bits  = 26,
276 	  .size_bits    = 6 },
277 	{ MCMEMBER_REC_FIELD(sl),
278 	  .offset_words = 11,
279 	  .offset_bits  = 0,
280 	  .size_bits    = 4 },
281 	{ MCMEMBER_REC_FIELD(flow_label),
282 	  .offset_words = 11,
283 	  .offset_bits  = 4,
284 	  .size_bits    = 20 },
285 	{ MCMEMBER_REC_FIELD(hop_limit),
286 	  .offset_words = 11,
287 	  .offset_bits  = 24,
288 	  .size_bits    = 8 },
289 	{ MCMEMBER_REC_FIELD(scope),
290 	  .offset_words = 12,
291 	  .offset_bits  = 0,
292 	  .size_bits    = 4 },
293 	{ MCMEMBER_REC_FIELD(join_state),
294 	  .offset_words = 12,
295 	  .offset_bits  = 4,
296 	  .size_bits    = 4 },
297 	{ MCMEMBER_REC_FIELD(proxy_join),
298 	  .offset_words = 12,
299 	  .offset_bits  = 8,
300 	  .size_bits    = 1 },
301 	{ RESERVED,
302 	  .offset_words = 12,
303 	  .offset_bits  = 9,
304 	  .size_bits    = 23 },
305 };
306 
307 #define SERVICE_REC_FIELD(field) \
308 	.struct_offset_bytes = offsetof(struct ib_sa_service_rec, field),	\
309 	.struct_size_bytes   = sizeof ((struct ib_sa_service_rec *) 0)->field,	\
310 	.field_name          = "sa_service_rec:" #field
311 
312 static const struct ib_field service_rec_table[] = {
313 	{ SERVICE_REC_FIELD(id),
314 	  .offset_words = 0,
315 	  .offset_bits  = 0,
316 	  .size_bits    = 64 },
317 	{ SERVICE_REC_FIELD(gid),
318 	  .offset_words = 2,
319 	  .offset_bits  = 0,
320 	  .size_bits    = 128 },
321 	{ SERVICE_REC_FIELD(pkey),
322 	  .offset_words = 6,
323 	  .offset_bits  = 0,
324 	  .size_bits    = 16 },
325 	{ SERVICE_REC_FIELD(lease),
326 	  .offset_words = 7,
327 	  .offset_bits  = 0,
328 	  .size_bits    = 32 },
329 	{ SERVICE_REC_FIELD(key),
330 	  .offset_words = 8,
331 	  .offset_bits  = 0,
332 	  .size_bits    = 128 },
333 	{ SERVICE_REC_FIELD(name),
334 	  .offset_words = 12,
335 	  .offset_bits  = 0,
336 	  .size_bits    = 64*8 },
337 	{ SERVICE_REC_FIELD(data8),
338 	  .offset_words = 28,
339 	  .offset_bits  = 0,
340 	  .size_bits    = 16*8 },
341 	{ SERVICE_REC_FIELD(data16),
342 	  .offset_words = 32,
343 	  .offset_bits  = 0,
344 	  .size_bits    = 8*16 },
345 	{ SERVICE_REC_FIELD(data32),
346 	  .offset_words = 36,
347 	  .offset_bits  = 0,
348 	  .size_bits    = 4*32 },
349 	{ SERVICE_REC_FIELD(data64),
350 	  .offset_words = 40,
351 	  .offset_bits  = 0,
352 	  .size_bits    = 2*64 },
353 };
354 
355 static void free_sm_ah(struct kref *kref)
356 {
357 	struct ib_sa_sm_ah *sm_ah = container_of(kref, struct ib_sa_sm_ah, ref);
358 
359 	ib_destroy_ah(sm_ah->ah);
360 	kfree(sm_ah);
361 }
362 
363 static void update_sm_ah(struct work_struct *work)
364 {
365 	struct ib_sa_port *port =
366 		container_of(work, struct ib_sa_port, update_task);
367 	struct ib_sa_sm_ah *new_ah, *old_ah;
368 	struct ib_port_attr port_attr;
369 	struct ib_ah_attr   ah_attr;
370 
371 	if (ib_query_port(port->agent->device, port->port_num, &port_attr)) {
372 		printk(KERN_WARNING "Couldn't query port\n");
373 		return;
374 	}
375 
376 	new_ah = kmalloc(sizeof *new_ah, GFP_KERNEL);
377 	if (!new_ah) {
378 		printk(KERN_WARNING "Couldn't allocate new SM AH\n");
379 		return;
380 	}
381 
382 	kref_init(&new_ah->ref);
383 
384 	memset(&ah_attr, 0, sizeof ah_attr);
385 	ah_attr.dlid     = port_attr.sm_lid;
386 	ah_attr.sl       = port_attr.sm_sl;
387 	ah_attr.port_num = port->port_num;
388 
389 	new_ah->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
390 	if (IS_ERR(new_ah->ah)) {
391 		printk(KERN_WARNING "Couldn't create new SM AH\n");
392 		kfree(new_ah);
393 		return;
394 	}
395 
396 	spin_lock_irq(&port->ah_lock);
397 	old_ah = port->sm_ah;
398 	port->sm_ah = new_ah;
399 	spin_unlock_irq(&port->ah_lock);
400 
401 	if (old_ah)
402 		kref_put(&old_ah->ref, free_sm_ah);
403 }
404 
405 static void ib_sa_event(struct ib_event_handler *handler, struct ib_event *event)
406 {
407 	if (event->event == IB_EVENT_PORT_ERR    ||
408 	    event->event == IB_EVENT_PORT_ACTIVE ||
409 	    event->event == IB_EVENT_LID_CHANGE  ||
410 	    event->event == IB_EVENT_PKEY_CHANGE ||
411 	    event->event == IB_EVENT_SM_CHANGE   ||
412 	    event->event == IB_EVENT_CLIENT_REREGISTER) {
413 		struct ib_sa_device *sa_dev;
414 		sa_dev = container_of(handler, typeof(*sa_dev), event_handler);
415 
416 		schedule_work(&sa_dev->port[event->element.port_num -
417 					    sa_dev->start_port].update_task);
418 	}
419 }
420 
421 void ib_sa_register_client(struct ib_sa_client *client)
422 {
423 	atomic_set(&client->users, 1);
424 	init_completion(&client->comp);
425 }
426 EXPORT_SYMBOL(ib_sa_register_client);
427 
428 void ib_sa_unregister_client(struct ib_sa_client *client)
429 {
430 	ib_sa_client_put(client);
431 	wait_for_completion(&client->comp);
432 }
433 EXPORT_SYMBOL(ib_sa_unregister_client);
434 
435 /**
436  * ib_sa_cancel_query - try to cancel an SA query
437  * @id:ID of query to cancel
438  * @query:query pointer to cancel
439  *
440  * Try to cancel an SA query.  If the id and query don't match up or
441  * the query has already completed, nothing is done.  Otherwise the
442  * query is canceled and will complete with a status of -EINTR.
443  */
444 void ib_sa_cancel_query(int id, struct ib_sa_query *query)
445 {
446 	unsigned long flags;
447 	struct ib_mad_agent *agent;
448 	struct ib_mad_send_buf *mad_buf;
449 
450 	spin_lock_irqsave(&idr_lock, flags);
451 	if (idr_find(&query_idr, id) != query) {
452 		spin_unlock_irqrestore(&idr_lock, flags);
453 		return;
454 	}
455 	agent = query->port->agent;
456 	mad_buf = query->mad_buf;
457 	spin_unlock_irqrestore(&idr_lock, flags);
458 
459 	ib_cancel_mad(agent, mad_buf);
460 }
461 EXPORT_SYMBOL(ib_sa_cancel_query);
462 
463 int ib_init_ah_from_path(struct ib_device *device, u8 port_num,
464 			 struct ib_sa_path_rec *rec, struct ib_ah_attr *ah_attr)
465 {
466 	int ret;
467 	u16 gid_index;
468 
469 	memset(ah_attr, 0, sizeof *ah_attr);
470 	ah_attr->dlid = be16_to_cpu(rec->dlid);
471 	ah_attr->sl = rec->sl;
472 	ah_attr->src_path_bits = be16_to_cpu(rec->slid) & 0x7f;
473 	ah_attr->port_num = port_num;
474 	ah_attr->static_rate = rec->rate;
475 
476 	if (rec->hop_limit > 1) {
477 		ah_attr->ah_flags = IB_AH_GRH;
478 		ah_attr->grh.dgid = rec->dgid;
479 
480 		ret = ib_find_cached_gid(device, &rec->sgid, &port_num,
481 					 &gid_index);
482 		if (ret)
483 			return ret;
484 
485 		ah_attr->grh.sgid_index    = gid_index;
486 		ah_attr->grh.flow_label    = be32_to_cpu(rec->flow_label);
487 		ah_attr->grh.hop_limit     = rec->hop_limit;
488 		ah_attr->grh.traffic_class = rec->traffic_class;
489 	}
490 	return 0;
491 }
492 EXPORT_SYMBOL(ib_init_ah_from_path);
493 
494 static void init_mad(struct ib_sa_mad *mad, struct ib_mad_agent *agent)
495 {
496 	unsigned long flags;
497 
498 	memset(mad, 0, sizeof *mad);
499 
500 	mad->mad_hdr.base_version  = IB_MGMT_BASE_VERSION;
501 	mad->mad_hdr.mgmt_class    = IB_MGMT_CLASS_SUBN_ADM;
502 	mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
503 
504 	spin_lock_irqsave(&tid_lock, flags);
505 	mad->mad_hdr.tid           =
506 		cpu_to_be64(((u64) agent->hi_tid) << 32 | tid++);
507 	spin_unlock_irqrestore(&tid_lock, flags);
508 }
509 
510 static int send_mad(struct ib_sa_query *query, int timeout_ms, gfp_t gfp_mask)
511 {
512 	unsigned long flags;
513 	int ret, id;
514 
515 retry:
516 	if (!idr_pre_get(&query_idr, gfp_mask))
517 		return -ENOMEM;
518 	spin_lock_irqsave(&idr_lock, flags);
519 	ret = idr_get_new(&query_idr, query, &id);
520 	spin_unlock_irqrestore(&idr_lock, flags);
521 	if (ret == -EAGAIN)
522 		goto retry;
523 	if (ret)
524 		return ret;
525 
526 	query->mad_buf->timeout_ms  = timeout_ms;
527 	query->mad_buf->context[0] = query;
528 	query->id = id;
529 
530 	spin_lock_irqsave(&query->port->ah_lock, flags);
531 	kref_get(&query->port->sm_ah->ref);
532 	query->sm_ah = query->port->sm_ah;
533 	spin_unlock_irqrestore(&query->port->ah_lock, flags);
534 
535 	query->mad_buf->ah = query->sm_ah->ah;
536 
537 	ret = ib_post_send_mad(query->mad_buf, NULL);
538 	if (ret) {
539 		spin_lock_irqsave(&idr_lock, flags);
540 		idr_remove(&query_idr, id);
541 		spin_unlock_irqrestore(&idr_lock, flags);
542 
543 		kref_put(&query->sm_ah->ref, free_sm_ah);
544 	}
545 
546 	/*
547 	 * It's not safe to dereference query any more, because the
548 	 * send may already have completed and freed the query in
549 	 * another context.
550 	 */
551 	return ret ? ret : id;
552 }
553 
554 static void ib_sa_path_rec_callback(struct ib_sa_query *sa_query,
555 				    int status,
556 				    struct ib_sa_mad *mad)
557 {
558 	struct ib_sa_path_query *query =
559 		container_of(sa_query, struct ib_sa_path_query, sa_query);
560 
561 	if (mad) {
562 		struct ib_sa_path_rec rec;
563 
564 		ib_unpack(path_rec_table, ARRAY_SIZE(path_rec_table),
565 			  mad->data, &rec);
566 		query->callback(status, &rec, query->context);
567 	} else
568 		query->callback(status, NULL, query->context);
569 }
570 
571 static void ib_sa_path_rec_release(struct ib_sa_query *sa_query)
572 {
573 	kfree(container_of(sa_query, struct ib_sa_path_query, sa_query));
574 }
575 
576 /**
577  * ib_sa_path_rec_get - Start a Path get query
578  * @client:SA client
579  * @device:device to send query on
580  * @port_num: port number to send query on
581  * @rec:Path Record to send in query
582  * @comp_mask:component mask to send in query
583  * @timeout_ms:time to wait for response
584  * @gfp_mask:GFP mask to use for internal allocations
585  * @callback:function called when query completes, times out or is
586  * canceled
587  * @context:opaque user context passed to callback
588  * @sa_query:query context, used to cancel query
589  *
590  * Send a Path Record Get query to the SA to look up a path.  The
591  * callback function will be called when the query completes (or
592  * fails); status is 0 for a successful response, -EINTR if the query
593  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
594  * occurred sending the query.  The resp parameter of the callback is
595  * only valid if status is 0.
596  *
597  * If the return value of ib_sa_path_rec_get() is negative, it is an
598  * error code.  Otherwise it is a query ID that can be used to cancel
599  * the query.
600  */
601 int ib_sa_path_rec_get(struct ib_sa_client *client,
602 		       struct ib_device *device, u8 port_num,
603 		       struct ib_sa_path_rec *rec,
604 		       ib_sa_comp_mask comp_mask,
605 		       int timeout_ms, gfp_t gfp_mask,
606 		       void (*callback)(int status,
607 					struct ib_sa_path_rec *resp,
608 					void *context),
609 		       void *context,
610 		       struct ib_sa_query **sa_query)
611 {
612 	struct ib_sa_path_query *query;
613 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
614 	struct ib_sa_port   *port;
615 	struct ib_mad_agent *agent;
616 	struct ib_sa_mad *mad;
617 	int ret;
618 
619 	if (!sa_dev)
620 		return -ENODEV;
621 
622 	port  = &sa_dev->port[port_num - sa_dev->start_port];
623 	agent = port->agent;
624 
625 	query = kmalloc(sizeof *query, gfp_mask);
626 	if (!query)
627 		return -ENOMEM;
628 
629 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
630 						     0, IB_MGMT_SA_HDR,
631 						     IB_MGMT_SA_DATA, gfp_mask);
632 	if (!query->sa_query.mad_buf) {
633 		ret = -ENOMEM;
634 		goto err1;
635 	}
636 
637 	ib_sa_client_get(client);
638 	query->sa_query.client = client;
639 	query->callback        = callback;
640 	query->context         = context;
641 
642 	mad = query->sa_query.mad_buf->mad;
643 	init_mad(mad, agent);
644 
645 	query->sa_query.callback = callback ? ib_sa_path_rec_callback : NULL;
646 	query->sa_query.release  = ib_sa_path_rec_release;
647 	query->sa_query.port     = port;
648 	mad->mad_hdr.method	 = IB_MGMT_METHOD_GET;
649 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_PATH_REC);
650 	mad->sa_hdr.comp_mask	 = comp_mask;
651 
652 	ib_pack(path_rec_table, ARRAY_SIZE(path_rec_table), rec, mad->data);
653 
654 	*sa_query = &query->sa_query;
655 
656 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
657 	if (ret < 0)
658 		goto err2;
659 
660 	return ret;
661 
662 err2:
663 	*sa_query = NULL;
664 	ib_sa_client_put(query->sa_query.client);
665 	ib_free_send_mad(query->sa_query.mad_buf);
666 
667 err1:
668 	kfree(query);
669 	return ret;
670 }
671 EXPORT_SYMBOL(ib_sa_path_rec_get);
672 
673 static void ib_sa_service_rec_callback(struct ib_sa_query *sa_query,
674 				    int status,
675 				    struct ib_sa_mad *mad)
676 {
677 	struct ib_sa_service_query *query =
678 		container_of(sa_query, struct ib_sa_service_query, sa_query);
679 
680 	if (mad) {
681 		struct ib_sa_service_rec rec;
682 
683 		ib_unpack(service_rec_table, ARRAY_SIZE(service_rec_table),
684 			  mad->data, &rec);
685 		query->callback(status, &rec, query->context);
686 	} else
687 		query->callback(status, NULL, query->context);
688 }
689 
690 static void ib_sa_service_rec_release(struct ib_sa_query *sa_query)
691 {
692 	kfree(container_of(sa_query, struct ib_sa_service_query, sa_query));
693 }
694 
695 /**
696  * ib_sa_service_rec_query - Start Service Record operation
697  * @client:SA client
698  * @device:device to send request on
699  * @port_num: port number to send request on
700  * @method:SA method - should be get, set, or delete
701  * @rec:Service Record to send in request
702  * @comp_mask:component mask to send in request
703  * @timeout_ms:time to wait for response
704  * @gfp_mask:GFP mask to use for internal allocations
705  * @callback:function called when request completes, times out or is
706  * canceled
707  * @context:opaque user context passed to callback
708  * @sa_query:request context, used to cancel request
709  *
710  * Send a Service Record set/get/delete to the SA to register,
711  * unregister or query a service record.
712  * The callback function will be called when the request completes (or
713  * fails); status is 0 for a successful response, -EINTR if the query
714  * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
715  * occurred sending the query.  The resp parameter of the callback is
716  * only valid if status is 0.
717  *
718  * If the return value of ib_sa_service_rec_query() is negative, it is an
719  * error code.  Otherwise it is a request ID that can be used to cancel
720  * the query.
721  */
722 int ib_sa_service_rec_query(struct ib_sa_client *client,
723 			    struct ib_device *device, u8 port_num, u8 method,
724 			    struct ib_sa_service_rec *rec,
725 			    ib_sa_comp_mask comp_mask,
726 			    int timeout_ms, gfp_t gfp_mask,
727 			    void (*callback)(int status,
728 					     struct ib_sa_service_rec *resp,
729 					     void *context),
730 			    void *context,
731 			    struct ib_sa_query **sa_query)
732 {
733 	struct ib_sa_service_query *query;
734 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
735 	struct ib_sa_port   *port;
736 	struct ib_mad_agent *agent;
737 	struct ib_sa_mad *mad;
738 	int ret;
739 
740 	if (!sa_dev)
741 		return -ENODEV;
742 
743 	port  = &sa_dev->port[port_num - sa_dev->start_port];
744 	agent = port->agent;
745 
746 	if (method != IB_MGMT_METHOD_GET &&
747 	    method != IB_MGMT_METHOD_SET &&
748 	    method != IB_SA_METHOD_DELETE)
749 		return -EINVAL;
750 
751 	query = kmalloc(sizeof *query, gfp_mask);
752 	if (!query)
753 		return -ENOMEM;
754 
755 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
756 						     0, IB_MGMT_SA_HDR,
757 						     IB_MGMT_SA_DATA, gfp_mask);
758 	if (!query->sa_query.mad_buf) {
759 		ret = -ENOMEM;
760 		goto err1;
761 	}
762 
763 	ib_sa_client_get(client);
764 	query->sa_query.client = client;
765 	query->callback        = callback;
766 	query->context         = context;
767 
768 	mad = query->sa_query.mad_buf->mad;
769 	init_mad(mad, agent);
770 
771 	query->sa_query.callback = callback ? ib_sa_service_rec_callback : NULL;
772 	query->sa_query.release  = ib_sa_service_rec_release;
773 	query->sa_query.port     = port;
774 	mad->mad_hdr.method	 = method;
775 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_SERVICE_REC);
776 	mad->sa_hdr.comp_mask	 = comp_mask;
777 
778 	ib_pack(service_rec_table, ARRAY_SIZE(service_rec_table),
779 		rec, mad->data);
780 
781 	*sa_query = &query->sa_query;
782 
783 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
784 	if (ret < 0)
785 		goto err2;
786 
787 	return ret;
788 
789 err2:
790 	*sa_query = NULL;
791 	ib_sa_client_put(query->sa_query.client);
792 	ib_free_send_mad(query->sa_query.mad_buf);
793 
794 err1:
795 	kfree(query);
796 	return ret;
797 }
798 EXPORT_SYMBOL(ib_sa_service_rec_query);
799 
800 static void ib_sa_mcmember_rec_callback(struct ib_sa_query *sa_query,
801 					int status,
802 					struct ib_sa_mad *mad)
803 {
804 	struct ib_sa_mcmember_query *query =
805 		container_of(sa_query, struct ib_sa_mcmember_query, sa_query);
806 
807 	if (mad) {
808 		struct ib_sa_mcmember_rec rec;
809 
810 		ib_unpack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
811 			  mad->data, &rec);
812 		query->callback(status, &rec, query->context);
813 	} else
814 		query->callback(status, NULL, query->context);
815 }
816 
817 static void ib_sa_mcmember_rec_release(struct ib_sa_query *sa_query)
818 {
819 	kfree(container_of(sa_query, struct ib_sa_mcmember_query, sa_query));
820 }
821 
822 int ib_sa_mcmember_rec_query(struct ib_sa_client *client,
823 			     struct ib_device *device, u8 port_num,
824 			     u8 method,
825 			     struct ib_sa_mcmember_rec *rec,
826 			     ib_sa_comp_mask comp_mask,
827 			     int timeout_ms, gfp_t gfp_mask,
828 			     void (*callback)(int status,
829 					      struct ib_sa_mcmember_rec *resp,
830 					      void *context),
831 			     void *context,
832 			     struct ib_sa_query **sa_query)
833 {
834 	struct ib_sa_mcmember_query *query;
835 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
836 	struct ib_sa_port   *port;
837 	struct ib_mad_agent *agent;
838 	struct ib_sa_mad *mad;
839 	int ret;
840 
841 	if (!sa_dev)
842 		return -ENODEV;
843 
844 	port  = &sa_dev->port[port_num - sa_dev->start_port];
845 	agent = port->agent;
846 
847 	query = kmalloc(sizeof *query, gfp_mask);
848 	if (!query)
849 		return -ENOMEM;
850 
851 	query->sa_query.mad_buf = ib_create_send_mad(agent, 1, 0,
852 						     0, IB_MGMT_SA_HDR,
853 						     IB_MGMT_SA_DATA, gfp_mask);
854 	if (!query->sa_query.mad_buf) {
855 		ret = -ENOMEM;
856 		goto err1;
857 	}
858 
859 	ib_sa_client_get(client);
860 	query->sa_query.client = client;
861 	query->callback        = callback;
862 	query->context         = context;
863 
864 	mad = query->sa_query.mad_buf->mad;
865 	init_mad(mad, agent);
866 
867 	query->sa_query.callback = callback ? ib_sa_mcmember_rec_callback : NULL;
868 	query->sa_query.release  = ib_sa_mcmember_rec_release;
869 	query->sa_query.port     = port;
870 	mad->mad_hdr.method	 = method;
871 	mad->mad_hdr.attr_id	 = cpu_to_be16(IB_SA_ATTR_MC_MEMBER_REC);
872 	mad->sa_hdr.comp_mask	 = comp_mask;
873 
874 	ib_pack(mcmember_rec_table, ARRAY_SIZE(mcmember_rec_table),
875 		rec, mad->data);
876 
877 	*sa_query = &query->sa_query;
878 
879 	ret = send_mad(&query->sa_query, timeout_ms, gfp_mask);
880 	if (ret < 0)
881 		goto err2;
882 
883 	return ret;
884 
885 err2:
886 	*sa_query = NULL;
887 	ib_sa_client_put(query->sa_query.client);
888 	ib_free_send_mad(query->sa_query.mad_buf);
889 
890 err1:
891 	kfree(query);
892 	return ret;
893 }
894 
895 static void send_handler(struct ib_mad_agent *agent,
896 			 struct ib_mad_send_wc *mad_send_wc)
897 {
898 	struct ib_sa_query *query = mad_send_wc->send_buf->context[0];
899 	unsigned long flags;
900 
901 	if (query->callback)
902 		switch (mad_send_wc->status) {
903 		case IB_WC_SUCCESS:
904 			/* No callback -- already got recv */
905 			break;
906 		case IB_WC_RESP_TIMEOUT_ERR:
907 			query->callback(query, -ETIMEDOUT, NULL);
908 			break;
909 		case IB_WC_WR_FLUSH_ERR:
910 			query->callback(query, -EINTR, NULL);
911 			break;
912 		default:
913 			query->callback(query, -EIO, NULL);
914 			break;
915 		}
916 
917 	spin_lock_irqsave(&idr_lock, flags);
918 	idr_remove(&query_idr, query->id);
919 	spin_unlock_irqrestore(&idr_lock, flags);
920 
921 	ib_free_send_mad(mad_send_wc->send_buf);
922 	kref_put(&query->sm_ah->ref, free_sm_ah);
923 	ib_sa_client_put(query->client);
924 	query->release(query);
925 }
926 
927 static void recv_handler(struct ib_mad_agent *mad_agent,
928 			 struct ib_mad_recv_wc *mad_recv_wc)
929 {
930 	struct ib_sa_query *query;
931 	struct ib_mad_send_buf *mad_buf;
932 
933 	mad_buf = (void *) (unsigned long) mad_recv_wc->wc->wr_id;
934 	query = mad_buf->context[0];
935 
936 	if (query->callback) {
937 		if (mad_recv_wc->wc->status == IB_WC_SUCCESS)
938 			query->callback(query,
939 					mad_recv_wc->recv_buf.mad->mad_hdr.status ?
940 					-EINVAL : 0,
941 					(struct ib_sa_mad *) mad_recv_wc->recv_buf.mad);
942 		else
943 			query->callback(query, -EIO, NULL);
944 	}
945 
946 	ib_free_recv_mad(mad_recv_wc);
947 }
948 
949 static void ib_sa_add_one(struct ib_device *device)
950 {
951 	struct ib_sa_device *sa_dev;
952 	int s, e, i;
953 
954 	if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
955 		return;
956 
957 	if (device->node_type == RDMA_NODE_IB_SWITCH)
958 		s = e = 0;
959 	else {
960 		s = 1;
961 		e = device->phys_port_cnt;
962 	}
963 
964 	sa_dev = kmalloc(sizeof *sa_dev +
965 			 (e - s + 1) * sizeof (struct ib_sa_port),
966 			 GFP_KERNEL);
967 	if (!sa_dev)
968 		return;
969 
970 	sa_dev->start_port = s;
971 	sa_dev->end_port   = e;
972 
973 	for (i = 0; i <= e - s; ++i) {
974 		sa_dev->port[i].sm_ah    = NULL;
975 		sa_dev->port[i].port_num = i + s;
976 		spin_lock_init(&sa_dev->port[i].ah_lock);
977 
978 		sa_dev->port[i].agent =
979 			ib_register_mad_agent(device, i + s, IB_QPT_GSI,
980 					      NULL, 0, send_handler,
981 					      recv_handler, sa_dev);
982 		if (IS_ERR(sa_dev->port[i].agent))
983 			goto err;
984 
985 		INIT_WORK(&sa_dev->port[i].update_task, update_sm_ah);
986 	}
987 
988 	ib_set_client_data(device, &sa_client, sa_dev);
989 
990 	/*
991 	 * We register our event handler after everything is set up,
992 	 * and then update our cached info after the event handler is
993 	 * registered to avoid any problems if a port changes state
994 	 * during our initialization.
995 	 */
996 
997 	INIT_IB_EVENT_HANDLER(&sa_dev->event_handler, device, ib_sa_event);
998 	if (ib_register_event_handler(&sa_dev->event_handler))
999 		goto err;
1000 
1001 	for (i = 0; i <= e - s; ++i)
1002 		update_sm_ah(&sa_dev->port[i].update_task);
1003 
1004 	return;
1005 
1006 err:
1007 	while (--i >= 0)
1008 		ib_unregister_mad_agent(sa_dev->port[i].agent);
1009 
1010 	kfree(sa_dev);
1011 
1012 	return;
1013 }
1014 
1015 static void ib_sa_remove_one(struct ib_device *device)
1016 {
1017 	struct ib_sa_device *sa_dev = ib_get_client_data(device, &sa_client);
1018 	int i;
1019 
1020 	if (!sa_dev)
1021 		return;
1022 
1023 	ib_unregister_event_handler(&sa_dev->event_handler);
1024 
1025 	flush_scheduled_work();
1026 
1027 	for (i = 0; i <= sa_dev->end_port - sa_dev->start_port; ++i) {
1028 		ib_unregister_mad_agent(sa_dev->port[i].agent);
1029 		kref_put(&sa_dev->port[i].sm_ah->ref, free_sm_ah);
1030 	}
1031 
1032 	kfree(sa_dev);
1033 }
1034 
1035 static int __init ib_sa_init(void)
1036 {
1037 	int ret;
1038 
1039 	spin_lock_init(&idr_lock);
1040 	spin_lock_init(&tid_lock);
1041 
1042 	get_random_bytes(&tid, sizeof tid);
1043 
1044 	ret = ib_register_client(&sa_client);
1045 	if (ret) {
1046 		printk(KERN_ERR "Couldn't register ib_sa client\n");
1047 		goto err1;
1048 	}
1049 
1050 	ret = mcast_init();
1051 	if (ret) {
1052 		printk(KERN_ERR "Couldn't initialize multicast handling\n");
1053 		goto err2;
1054 	}
1055 
1056 	return 0;
1057 err2:
1058 	ib_unregister_client(&sa_client);
1059 err1:
1060 	return ret;
1061 }
1062 
1063 static void __exit ib_sa_cleanup(void)
1064 {
1065 	mcast_cleanup();
1066 	ib_unregister_client(&sa_client);
1067 	idr_destroy(&query_idr);
1068 }
1069 
1070 module_init(ib_sa_init);
1071 module_exit(ib_sa_cleanup);
1072